DGGS Binning

Binning module for vgrid.

This module provides functions to bin and aggregate data using various discrete global grid systems (DGGS), including statistical analysis and data categorization.

`a5bin_cli()` ¶

Command-line interface for a5bin conversion.

This function provides a command-line interface for binning point data to A5 grid cells. It parses command-line arguments and calls the main a5bin function.

Usage

python a5bin.py -i input.shp -r 10 -stats count -f geojson -o output.geojson

Parameters:

Name	Description	Default
`-i, --input`	Input file path, URL, or other vector file formats	required
`-r, --resolution`	A5 resolution [0..29]	required
`-stats, --statistics`	Statistic to compute (count, min, max, sum, mean, median, std, var, range, minority, majority, variety)	required
`-category, --category`	Optional category field for grouping	required
`-numeric_col, --numeric_col`	Numeric field to compute statistics (required if stats != 'count')	required
`-split, --split_antimeridian`	Apply antimeridian fixing to the resulting polygons	required
`-o, --output`	Output file path (optional, will auto-generate if not provided)	required
`-f, --output_format`	Output output_format (geojson, gpkg, parquet, csv, shapefile)	required

Example

Bin shapefile to A5 cells at resolution 10 with count statistics¶

python a5bin.py -i cities.shp -r 10 -stats count -f geojson¶

Source code in vgrid/binning/a5bin.py

def a5bin_cli():
    """
    Command-line interface for a5bin conversion.

    This function provides a command-line interface for binning point data to A5 grid cells.
    It parses command-line arguments and calls the main a5bin function.

    Usage:
        python a5bin.py -i input.shp -r 10 -stats count -f geojson -o output.geojson

    Arguments:
        -i, --input: Input file path, URL, or other vector file formats
        -r, --resolution: A5 resolution [0..29]
        -stats, --statistics: Statistic to compute (count, min, max, sum, mean, median, std, var, range, minority, majority, variety)
        -category, --category: Optional category field for grouping
        -numeric_col, --numeric_col: Numeric field to compute statistics (required if stats != 'count')
        -split, --split_antimeridian: Apply antimeridian fixing to the resulting polygons
        -o, --output: Output file path (optional, will auto-generate if not provided)
        -f, --output_format: Output output_format (geojson, gpkg, parquet, csv, shapefile)

    Example:
        >>> # Bin shapefile to A5 cells at resolution 10 with count statistics
        >>> # python a5bin.py -i cities.shp -r 10 -stats count -f geojson
    """
    parser = argparse.ArgumentParser(description="Binning point data to A5 DGGS")
    parser.add_argument(
        "-i",
        "--input",
        type=str,
        required=True,
        help="Input data: GeoJSON file path, URL, or other vector file formats",
    )
    parser.add_argument(
        "-r",
        "--resolution",
        type=int,
        default=13,
        help="Resolution of the grid [0..29]",
    )
    parser.add_argument(
        "-stats",
        "--statistics",
        choices=STATS_OPTIONS,
        default="count",
        help="Statistic option",
    )

    parser.add_argument(
        "-category",
        "--category",
        dest="category_col",
        required=False,
        help="Optional category field for grouping",
    )
    parser.add_argument(
        "-numeric_col",
        "--numeric_col",
        dest="numeric_col",
        required=False,
        help="Numeric field to compute statistics (required if stats != 'count')",
    )
    # Removed -o/--output; output is saved in CWD with predefined name
    parser.add_argument(
        "-f",
        "--output_format",
        required=False,
        default="gpd",
        choices=OUTPUT_FORMATS,
    )
    parser.add_argument(
        "-options",
        "--options",
        type=str,
        default=None,
        help="JSON string of options to pass to a52geo. "
             "Example: '{\"segments\": 1000}'",
    )
    parser.add_argument(
        "-split",
        "--split_antimeridian",
        action="store_true",
        default=False,
        help="Apply antimeridian fixing to the resulting polygons",
    )

    args = parser.parse_args()

    # Parse options JSON if provided
    options = None
    if args.options:
        try:
            options = json.loads(args.options)
        except json.JSONDecodeError as e:
            print(f"Error: Invalid JSON in options: {str(e)}")
            return

    try:
        # Use the a5bin function
        result = a5bin(
            data=args.input,
            resolution=args.resolution,
            stats=args.statistics,
            category_col=args.category_col,
            numeric_col=args.numeric_col,
            output_format=args.output_format,
            options=options,
            split_antimeridian=args.split_antimeridian,
        )
        if args.output_format in STRUCTURED_FORMATS:
            print(result)
        # Print notification is now handled in convert_to_output_format
    except Exception as e:
        print(f"Error: {str(e)}")
        return

`dggalbin_cli()` ¶

Command-line interface for DGGAL binning.

Source code in vgrid/binning/dggalbin.py

def dggalbin_cli():
    """Command-line interface for DGGAL binning."""
    parser = argparse.ArgumentParser(description="Binning point data to DGGAL DGGS")
    parser.add_argument(
        "-i",
        "--input",
        type=str,
        required=True,
        help="Input data: GeoJSON file path, URL, or other vector file formats",
    )
    parser.add_argument(
        "-t",
        "--dggs_type",
        type=str,
        required=True,
        choices=DGGAL_TYPES.keys(),
        help="DGGAL type",
    )
    parser.add_argument(
        "-r",
        "--resolution",
        type=int,
        required=True,
        help="Resolution (integer)",
    )
    parser.add_argument(
        "-stats",
        "--statistics",
        choices=STATS_OPTIONS,
        default="count",
        help="Statistic option",
    )
    parser.add_argument(
        "-category",
        "--category",
        dest="category_col",
        required=False,
        help="Optional category field for grouping",
    )
    parser.add_argument(
        "-numeric_col",
        "--numeric_col",
        dest="numeric_col",
        required=False,
        help="Numeric field to compute statistics (required if stats != 'count')",
    )
    parser.add_argument(
        "-f",
        "--output_format",
        required=False,
        default="gpd",
        choices=OUTPUT_FORMATS,
    )
    parser.add_argument(
        "-split",
        "--split_antimeridian",
        action="store_true",
        default=False,
        help="Apply antimeridian fixing to the resulting polygons",
    )

    args = parser.parse_args()

    try:
        result = dggalbin(
            dggs_type=args.dggs_type,
            data=args.input,
            resolution=args.resolution,
            stats=args.statistics,
            category_col=args.category_col,
            numeric_col=args.numeric_col,
            output_format=args.output_format,
            split_antimeridian=args.split_antimeridian,
        )
        if args.output_format in STRUCTURED_FORMATS:
            print(result)
    except Exception as e:
        print(f"Error: {str(e)}")
        return

`dggridbin_cli()` ¶

Command-line interface for DGGRID binning.

Source code in vgrid/binning/dggridbin.py

def dggridbin_cli():
    """Command-line interface for DGGRID binning."""
    parser = argparse.ArgumentParser(description="Binning point data to DGGRID DGGS")
    parser.add_argument(
        "-i",
        "--input",
        type=str,
        required=True,
        help="Input data: GeoJSON file path, URL, or other vector file formats",
    )
    parser.add_argument(
        "-t",
        "--dggs_type",
        type=str,
        required=True,
        choices=DGGRID_TYPES.keys(),
        help="DGGRID type",
    )
    parser.add_argument(
        "-r",
        "--resolution",
        type=int,
        required=True,
        help="Resolution (integer)",
    )
    parser.add_argument(
        "-stats",
        "--statistics",
        choices=STATS_OPTIONS,
        default="count",
        help="Statistic option",
    )
    parser.add_argument(
        "-category",
        "--category",
        dest="category_col",
        required=False,
        help="Optional category field for grouping",
    )
    parser.add_argument(
        "-numeric_col",
        "--numeric_col",
        dest="numeric_col",
        required=False,
        help="Numeric field to compute statistics (required if stats != 'count')",
    )
    parser.add_argument(
        "-f",
        "--output_format",
        required=False,
        default="gpd",
        choices=OUTPUT_FORMATS,
    )
    parser.add_argument(
        "-split",
        "--split_antimeridian",
        action="store_true",
        default=False,
        help="Apply antimeridian fixing to the resulting polygons",
    )
    parser.add_argument(
        "-aggregate",
        "--aggregate",
        action="store_true",
        help="Aggregate the resulting polygons (dissolve by global_id when split_antimeridian is set)",
    )

    args = parser.parse_args()

    try:
        result = dggridbin(
            dggrid_instance=create_dggrid_instance(),
            dggs_type=args.dggs_type,
            data=args.input,
            resolution=args.resolution,
            stats=args.statistics,
            category_col=args.category_col,
            numeric_col=args.numeric_col,
            output_format=args.output_format,
            split_antimeridian=args.split_antimeridian,
            aggregate=args.aggregate,
        )
        if args.output_format in STRUCTURED_FORMATS:
            print(result)
    except Exception as e:
        print(f"Error: {str(e)}")
        return

`s2bin_cli()` ¶

Command-line interface for s2bin conversion.

This function provides a command-line interface for binning point data to S2 grid cells. It parses command-line arguments and calls the main s2bin function.

Usage

python s2bin.py -i input.shp -r 10 -stats count -f geojson

CLI Arguments

-i, --input: Input file path, URL, or other vector file formats -r, --resolution: S2 resolution [0..30] -stats, --statistics: Statistic to compute (count, min, max, sum, mean, median, std, var, range, minority, majority, variety) -category, --category: Optional category field for grouping -numeric_col, --numeric_col: Numeric field to compute statistics (required if stats != 'count') -f, --output_format: Output format (geojson, gpkg, parquet, csv, shapefile)

Example

Bin shapefile to S2 cells at resolution 10 with count statistics¶

python s2bin.py -i cities.shp -r 10 -stats count -f geojson¶

Source code in vgrid/binning/s2bin.py

def s2bin_cli():
    """
    Command-line interface for s2bin conversion.

    This function provides a command-line interface for binning point data to S2 grid cells.
    It parses command-line arguments and calls the main s2bin function.

    Usage:
            python s2bin.py -i input.shp -r 10 -stats count -f geojson

    CLI Arguments:
            -i, --input: Input file path, URL, or other vector file formats
            -r, --resolution: S2 resolution [0..30]
            -stats, --statistics: Statistic to compute (count, min, max, sum, mean, median, std, var, range, minority, majority, variety)
            -category, --category: Optional category field for grouping
            -numeric_col, --numeric_col: Numeric field to compute statistics (required if stats != 'count')
            -f, --output_format: Output format (geojson, gpkg, parquet, csv, shapefile)

    Example:
            >>> # Bin shapefile to S2 cells at resolution 10 with count statistics
            >>> # python s2bin.py -i cities.shp -r 10 -stats count -f geojson
    """
    parser = argparse.ArgumentParser(description="Binning point data to S2 DGGS")
    parser.add_argument(
        "-i",
        "--input",
        type=str,
        required=True,
        help="Input data: GeoJSON file path, URL, or other vector file formats",
    )
    parser.add_argument(
        "-r",
        "--resolution",
        type=int,
        default=13,
        help="Resolution of the grid [0..30]",
    )
    parser.add_argument(
        "-stats",
        "--statistics",
        choices=STATS_OPTIONS,
        default="count",
        help="Statistic option",
    )

    parser.add_argument(
        "-category",
        "--category",
        dest="category_col",
        required=False,
        help="Optional category field for grouping",
    )
    parser.add_argument(
        "-numeric_col",
        "--numeric_col",
        dest="numeric_col",
        required=False,
        help="Numeric field to compute statistics (required if stats != 'count')",
    )
    # Removed -o/--output; output is saved in CWD with predefined name
    parser.add_argument(
        "-f",
        "--output_format",
        required=False,
        default="gpd",
        choices=OUTPUT_FORMATS,
    )
    parser.add_argument(
        "-fix",
        "--fix_antimeridian",
        type=str,
        choices=FIX_ANTIMERIDIAN_CHOICES,
        default=None,
        help="Antimeridian fixing method: shift, shift_balanced, shift_west, shift_east, split, none",
    )

    args = parser.parse_args()

    try:
        # Use the s2bin function
        result = s2bin(
            data=args.input,
            resolution=args.resolution,
            stats=args.statistics,
            category_col=args.category_col,
            numeric_col=args.numeric_col,
            output_format=args.output_format,
            fix_antimeridian=args.fix_antimeridian,
        )
        if args.output_format in STRUCTURED_FORMATS:
            print(result)
    except Exception as e:
        print(f"Error: {str(e)}")
        return

H3 Grid Binning Module

Bins point data into H3 hexagonal grid cells and computes various statistics using Uber's hierarchical grid system.

Key Functions: - h3_bin(): Core binning function with spatial joins and aggregation - h3bin(): Main user-facing function with multiple input/output formats - h3bin_cli(): Command-line interface for binning functionality

`h3_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', fix_antimeridian=None, **kwargs)` ¶

Binning via H3 grid generation within points' bbox + spatial join, then pandas groupby. Supports custom stats (range, variety, minority, majority). Non-point geometries are ignored.

When category_col is set, output columns are named {category_value}_{numeric_col}_{stats} (e.g. Forest_population_mean).

Source code in vgrid/binning/h3bin.py

def h3_bin(
    data,
    resolution,
    stats="count",
    category_col=None,
    numeric_col=None,
    lat_col="lat",
    lon_col="lon",
    fix_antimeridian=None,
    **kwargs,
):
    """
    Binning via H3 grid generation within points' bbox + spatial join, then pandas groupby.
    Supports custom stats (range, variety, minority, majority). Non-point geometries are ignored.

    When ``category_col`` is set, output columns are named
    ``{category_value}_{numeric_col}_{stats}`` (e.g. ``Forest_population_mean``).
    """
    resolution = validate_h3_resolution(resolution)
    points_gdf = process_input_data_bin(
        data, lat_col=lat_col, lon_col=lon_col, **kwargs
    )

    # Keep only points and multipoints; ignore others
    if not points_gdf.empty:
        points_gdf = points_gdf[
            points_gdf.geometry.geom_type.isin(["Point", "MultiPoint"])
        ].copy()
        if "MultiPoint" in set(points_gdf.geometry.geom_type.unique()):
            points_gdf = points_gdf.explode(index_parts=False, ignore_index=True)

    # Generate H3 grid covering the points' bounding box
    minx, miny, maxx, maxy = points_gdf.total_bounds
    id_col = "h3"
    grid_gdf = h3_grid_within_bbox(
        resolution=resolution,
        bbox=(minx, miny, maxx, maxy),
        fix_antimeridian=fix_antimeridian,
    )

    # Spatial join points -> cells with only needed columns
    join_cols = []
    if category_col and category_col in points_gdf.columns:
        join_cols.append(category_col)
    if stats != "count" and numeric_col:
        if numeric_col not in points_gdf.columns:
            raise ValueError(f"numeric_col '{numeric_col}' not found in input data")
        join_cols.append(numeric_col)
    left = points_gdf[[c for c in ["geometry", *join_cols] if c is not None]]
    joined = gpd.sjoin(
        left, grid_gdf[[id_col, "geometry"]], how="inner", predicate="within"
    )

    grouped = aggregate_joined(
        joined, id_col, stats=stats, category_col=category_col, numeric_col=numeric_col
    )
    grouped = grouped.reset_index()
    # Join back to grid to get geometries
    out = grid_gdf.merge(grouped, on=id_col, how="inner")
    if "resolution" not in out.columns:
        out["resolution"] = resolution
    return gpd.GeoDataFrame(out, geometry="geometry", crs=grid_gdf.crs or "EPSG:4326")

S2 Grid Binning Module

Bins point data into S2 spherical grid cells and computes various statistics using Google's hierarchical grid system.

Key Functions: - s2_bin(): Core binning function with spatial joins and aggregation - s2bin(): Main user-facing function with multiple input/output formats - s2bin_cli(): Command-line interface for binning functionality

`s2_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', fix_antimeridian=None, **kwargs)` ¶

Grid + spatial join + groupby approach for S2 binning (like a5bin).

Source code in vgrid/binning/s2bin.py

def s2_bin(
    data,
    resolution,
    stats="count",
    category_col=None,
    numeric_col=None,
    lat_col="lat",
    lon_col="lon",
    fix_antimeridian=None,
    **kwargs,
):
    """
    Grid + spatial join + groupby approach for S2 binning (like a5bin).
    """
    resolution = validate_s2_resolution(resolution)
    points_gdf = process_input_data_bin(
        data, lat_col=lat_col, lon_col=lon_col, **kwargs
    )
    # Keep only points and multipoints; ignore others
    if not points_gdf.empty:
        points_gdf = points_gdf[
            points_gdf.geometry.geom_type.isin(["Point", "MultiPoint"])
        ].copy()
        if "MultiPoint" in set(points_gdf.geometry.geom_type.unique()):
            points_gdf = points_gdf.explode(index_parts=False, ignore_index=True)

    # Generate S2 grid covering the points' bounding box
    minx, miny, maxx, maxy = points_gdf.total_bounds
    id_col = "s2"
    from vgrid.generator.s2grid import s2_grid

    grid_gdf = s2_grid(
        resolution=resolution,
        bbox=(minx, miny, maxx, maxy),
        fix_antimeridian=fix_antimeridian,
    )

    # Spatial join points -> cells with only needed columns
    join_cols = []
    if category_col and category_col in points_gdf.columns:
        join_cols.append(category_col)
    if stats != "count" and numeric_col:
        if numeric_col not in points_gdf.columns:
            raise ValueError(f"numeric_col '{numeric_col}' not found in input data")
        join_cols.append(numeric_col)
    left = points_gdf[[c for c in ["geometry", *join_cols] if c is not None]]
    joined = gpd.sjoin(
        left, grid_gdf[[id_col, "geometry"]], how="inner", predicate="within"
    )

    # Aggregate
    grouped = aggregate_joined(
        joined, id_col, stats=stats, category_col=category_col, numeric_col=numeric_col
    )
    grouped = grouped.reset_index()

    # Join back to grid and return GeoDataFrame
    out = grid_gdf.merge(grouped, on=id_col, how="inner")
    if "resolution" not in out.columns:
        out["resolution"] = resolution
    result_gdf = gpd.GeoDataFrame(out, geometry="geometry", crs=grid_gdf.crs or "EPSG:4326")
    return result_gdf

`s2bin_cli()` ¶

Command-line interface for s2bin conversion.

This function provides a command-line interface for binning point data to S2 grid cells. It parses command-line arguments and calls the main s2bin function.

Usage

python s2bin.py -i input.shp -r 10 -stats count -f geojson

CLI Arguments

-i, --input: Input file path, URL, or other vector file formats -r, --resolution: S2 resolution [0..30] -stats, --statistics: Statistic to compute (count, min, max, sum, mean, median, std, var, range, minority, majority, variety) -category, --category: Optional category field for grouping -numeric_col, --numeric_col: Numeric field to compute statistics (required if stats != 'count') -f, --output_format: Output format (geojson, gpkg, parquet, csv, shapefile)

Example

Bin shapefile to S2 cells at resolution 10 with count statistics¶

python s2bin.py -i cities.shp -r 10 -stats count -f geojson¶

Source code in vgrid/binning/s2bin.py

def s2bin_cli():
    """
    Command-line interface for s2bin conversion.

    This function provides a command-line interface for binning point data to S2 grid cells.
    It parses command-line arguments and calls the main s2bin function.

    Usage:
            python s2bin.py -i input.shp -r 10 -stats count -f geojson

    CLI Arguments:
            -i, --input: Input file path, URL, or other vector file formats
            -r, --resolution: S2 resolution [0..30]
            -stats, --statistics: Statistic to compute (count, min, max, sum, mean, median, std, var, range, minority, majority, variety)
            -category, --category: Optional category field for grouping
            -numeric_col, --numeric_col: Numeric field to compute statistics (required if stats != 'count')
            -f, --output_format: Output format (geojson, gpkg, parquet, csv, shapefile)

    Example:
            >>> # Bin shapefile to S2 cells at resolution 10 with count statistics
            >>> # python s2bin.py -i cities.shp -r 10 -stats count -f geojson
    """
    parser = argparse.ArgumentParser(description="Binning point data to S2 DGGS")
    parser.add_argument(
        "-i",
        "--input",
        type=str,
        required=True,
        help="Input data: GeoJSON file path, URL, or other vector file formats",
    )
    parser.add_argument(
        "-r",
        "--resolution",
        type=int,
        default=13,
        help="Resolution of the grid [0..30]",
    )
    parser.add_argument(
        "-stats",
        "--statistics",
        choices=STATS_OPTIONS,
        default="count",
        help="Statistic option",
    )

    parser.add_argument(
        "-category",
        "--category",
        dest="category_col",
        required=False,
        help="Optional category field for grouping",
    )
    parser.add_argument(
        "-numeric_col",
        "--numeric_col",
        dest="numeric_col",
        required=False,
        help="Numeric field to compute statistics (required if stats != 'count')",
    )
    # Removed -o/--output; output is saved in CWD with predefined name
    parser.add_argument(
        "-f",
        "--output_format",
        required=False,
        default="gpd",
        choices=OUTPUT_FORMATS,
    )
    parser.add_argument(
        "-fix",
        "--fix_antimeridian",
        type=str,
        choices=FIX_ANTIMERIDIAN_CHOICES,
        default=None,
        help="Antimeridian fixing method: shift, shift_balanced, shift_west, shift_east, split, none",
    )

    args = parser.parse_args()

    try:
        # Use the s2bin function
        result = s2bin(
            data=args.input,
            resolution=args.resolution,
            stats=args.statistics,
            category_col=args.category_col,
            numeric_col=args.numeric_col,
            output_format=args.output_format,
            fix_antimeridian=args.fix_antimeridian,
        )
        if args.output_format in STRUCTURED_FORMATS:
            print(result)
    except Exception as e:
        print(f"Error: {str(e)}")
        return

A5 Grid Binning Module

Bins point data into A5 (Adaptive 5) grid cells and computes various statistics using hierarchical geospatial indexing.

Key Functions: - a5_bin(): Core binning function with spatial joins and aggregation - a5bin(): Main user-facing function with multiple input/output formats - a5bin_cli(): Command-line interface for binning functionality

`a5_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', options=None, split_antimeridian=False, **kwargs)` ¶

Bin point data into A5 grid cells and compute statistics using a single grid generation + spatial join, followed by pandas groupby aggregation.

Returns a GeoDataFrame with A5 cell stats and geometry. options : dict, optional Options for a52geo. split_antimeridian : bool, optional When True, apply antimeridian fixing to the resulting polygons.

Source code in vgrid/binning/a5bin.py

def a5_bin(
    data,
    resolution,
    stats="count",
    category_col=None,
    numeric_col=None,
    lat_col="lat",
    lon_col="lon",
    options=None,
    split_antimeridian=False,
    **kwargs,
):
    """
    Bin point data into A5 grid cells and compute statistics using a single
    grid generation + spatial join, followed by pandas groupby aggregation.

    Returns a GeoDataFrame with A5 cell stats and geometry.
    options : dict, optional
        Options for a52geo.
    split_antimeridian : bool, optional
        When True, apply antimeridian fixing to the resulting polygons.
    """
    resolution = validate_a5_resolution(int(resolution))

    if stats != "count" and not numeric_col:
        raise ValueError(
            "A numeric_col is required for statistics other than 'count'"
        )

    # 1) Normalize input to GeoDataFrame of points
    points_gdf = process_input_data_bin(
        data, lat_col=lat_col, lon_col=lon_col, **kwargs
    )
    # Keep only points and multipoints; ignore others
    if not points_gdf.empty:
        points_gdf = points_gdf[
            points_gdf.geometry.geom_type.isin(["Point", "MultiPoint"])
        ].copy()
        if "MultiPoint" in set(points_gdf.geometry.geom_type.unique()):
            points_gdf = points_gdf.explode(index_parts=False, ignore_index=True)

    # 2) Generate A5 grid covering the points' bounding box
    minx, miny, maxx, maxy = points_gdf.total_bounds  # lon/lat order
    id_col = "a5"
    grid_gdf = a5_grid(
        resolution=resolution,
        bbox=(minx, miny, maxx, maxy),
        options=options,
        split_antimeridian=split_antimeridian,
    )

    # 3) Spatial join points -> cells with only needed columns
    join_cols = []
    if category_col and category_col in points_gdf.columns:
        join_cols.append(category_col)
    if stats != "count" and numeric_col:
        if numeric_col not in points_gdf.columns:
            raise ValueError(f"numeric_col '{numeric_col}' not found in input data")
        join_cols.append(numeric_col)
    left = points_gdf[[c for c in ["geometry", *join_cols] if c is not None]]
    joined = gpd.sjoin(
        left, grid_gdf[[id_col, "geometry"]], how="inner", predicate="within"
    )

    # 4) Aggregate
    grouped = aggregate_joined(
        joined, id_col, stats=stats, category_col=category_col, numeric_col=numeric_col
    )
    grouped = grouped.reset_index()

    # 5) Join stats back to grid (keep per-cell metrics from a5_grid / geodesic_dggs_metrics)
    out = grid_gdf.merge(grouped, on=id_col, how="inner")
    if "resolution" not in out.columns:
        out["resolution"] = resolution
    result_gdf = gpd.GeoDataFrame(out, geometry="geometry", crs=grid_gdf.crs or "EPSG:4326")
    return result_gdf

`a5bin(data, resolution, stats='count', category_col=None, numeric_col=None, output_format='gpd', options=None, split_antimeridian=False, **kwargs)` ¶

Bin point data into A5 grid cells and compute statistics from various input formats.

This is the main function that handles binning of point data to A5 grid cells. It supports multiple input formats including file paths, URLs, DataFrames, GeoDataFrames, GeoJSON dictionaries, and lists of features.

Parameters:

Name	Type	Description	Default
`data`		Input data in one of the following formats: - File path (str): Path to vector file (shapefile, GeoJSON, etc.) - URL (str): URL to vector data - pandas.DataFrame: DataFrame with lat/lon columns - geopandas.GeoDataFrame: GeoDataFrame with point geometries - dict: GeoJSON dictionary - list: List of GeoJSON feature dictionaries	required
`resolution`	`int`	A5 resolution level [0..29] (0=coarsest, 29=finest)	required
`stats`	`str`	Statistic to compute: - 'count': Count of points in each cell - 'sum': Sum of field values - 'min': Minimum field value - 'max': Maximum field value - 'mean': Mean field value - 'median': Median field value - 'std': Standard deviation of field values - 'var': Variance of field values - 'range': Range of field values - 'minority': Least frequent value - 'majority': Most frequent value - 'variety': Number of unique values	`'count'`
`category_col`	`str`	Category column for grouping statistics. When provided, statistics are computed separately for each category value.	`None`
`numeric_col`	`str`	Numeric field to compute statistics (required if stats != 'count')	`None`
`output_format`	`str`	Output format. Options include: - 'gpd', 'geopandas', 'gdf', 'geodataframe': Return GeoDataFrame - 'geojson_dict', 'json_dict': Return GeoJSON dictionary - 'geojson', 'json': Save as GeoJSON file or return string - 'csv': Save as CSV file or return string - 'shp', 'shapefile': Save as shapefile - 'gpkg', 'geopackage': Save as GeoPackage - 'parquet', 'geoparquet': Save as Parquet file - None: Return list of dictionaries	`'gpd'`
`options`		dict, optional Options for a52geo.	required
`split_antimeridian`		bool, optional When True, apply antimeridian fixing to the resulting polygons.	required
`**kwargs`		Additional arguments passed to geopandas read functions (e.g., lat_col, lon_col)	`{}`

Returns:

Type	Description
	Various types depending on output_format:
	GeoDataFrame: When output_format is 'gpd', 'geopandas', 'gdf', 'geodataframe'
	dict: When output_format is 'geojson_dict', 'json_dict', or None
	str: When output_format is 'geojson', 'json', or 'csv' (returns data as string)
	str: File path when output_format is a file-based format (geojson, csv, shp, gpkg, parquet)

Raises:

Type	Description
`ValueError`	If input data type is not supported, conversion fails, or required parameters are missing
`TypeError`	If resolution is not an integer

Example

Bin from file with count statistics¶

result = a5bin("cities.shp", 10, "count")

Bin from GeoDataFrame with mean statistics¶

import geopandas as gpd gdf = gpd.read_file("cities.shp") result = a5bin(gdf, 10, "mean", numeric_col="population")

Bin from GeoJSON dict with category grouping¶

geojson = {"type": "FeatureCollection", "features": [...]} result = a5bin(geojson, 10, "sum", numeric_col="value", category_col="type")

Save output as GeoJSON file¶

result = a5bin("points.csv", 8, "count", output_format="geojson") print(f"Output saved to: {result}")

Source code in vgrid/binning/a5bin.py

def a5bin(
    data,
    resolution,
    stats="count",
    category_col=None,
    numeric_col=None,
    output_format="gpd",
    options=None,
    split_antimeridian=False,
    **kwargs,
):
    """
    Bin point data into A5 grid cells and compute statistics from various input formats.

    This is the main function that handles binning of point data to A5 grid cells.
    It supports multiple input formats including file paths, URLs, DataFrames, GeoDataFrames,
    GeoJSON dictionaries, and lists of features.

    Args:
        data: Input data in one of the following formats:
            - File path (str): Path to vector file (shapefile, GeoJSON, etc.)
            - URL (str): URL to vector data
            - pandas.DataFrame: DataFrame with lat/lon columns
            - geopandas.GeoDataFrame: GeoDataFrame with point geometries
            - dict: GeoJSON dictionary
            - list: List of GeoJSON feature dictionaries
        resolution (int): A5 resolution level [0..29] (0=coarsest, 29=finest)
        stats (str): Statistic to compute:
            - 'count': Count of points in each cell
            - 'sum': Sum of field values
            - 'min': Minimum field value
            - 'max': Maximum field value
            - 'mean': Mean field value
            - 'median': Median field value
            - 'std': Standard deviation of field values
            - 'var': Variance of field values
            - 'range': Range of field values
            - 'minority': Least frequent value
            - 'majority': Most frequent value
            - 'variety': Number of unique values
        category_col (str, optional): Category column for grouping statistics. When provided,
            statistics are computed separately for each category value.
        numeric_col (str, optional): Numeric field to compute statistics (required if stats != 'count')
        output_format (str, optional): Output format. Options include:
            - 'gpd', 'geopandas', 'gdf', 'geodataframe': Return GeoDataFrame
            - 'geojson_dict', 'json_dict': Return GeoJSON dictionary
            - 'geojson', 'json': Save as GeoJSON file or return string
            - 'csv': Save as CSV file or return string
            - 'shp', 'shapefile': Save as shapefile
            - 'gpkg', 'geopackage': Save as GeoPackage
            - 'parquet', 'geoparquet': Save as Parquet file
            - None: Return list of dictionaries
        options : dict, optional
            Options for a52geo.
        split_antimeridian : bool, optional
            When True, apply antimeridian fixing to the resulting polygons.
        **kwargs: Additional arguments passed to geopandas read functions (e.g., lat_col, lon_col)

    Returns:
        Various types depending on output_format:
        - GeoDataFrame: When output_format is 'gpd', 'geopandas', 'gdf', 'geodataframe'
        - dict: When output_format is 'geojson_dict', 'json_dict', or None
        - str: When output_format is 'geojson', 'json', or 'csv' (returns data as string)
        - str: File path when output_format is a file-based format (geojson, csv, shp, gpkg, parquet)

    Raises:
        ValueError: If input data type is not supported, conversion fails, or required parameters are missing
        TypeError: If resolution is not an integer

    Example:
        >>> # Bin from file with count statistics
        >>> result = a5bin("cities.shp", 10, "count")

        >>> # Bin from GeoDataFrame with mean statistics
        >>> import geopandas as gpd
        >>> gdf = gpd.read_file("cities.shp")
        >>> result = a5bin(gdf, 10, "mean", numeric_col="population")

        >>> # Bin from GeoJSON dict with category grouping
        >>> geojson = {"type": "FeatureCollection", "features": [...]}
        >>> result = a5bin(geojson, 10, "sum", numeric_col="value", category_col="type")

        >>> # Save output as GeoJSON file
        >>> result = a5bin("points.csv", 8, "count", output_format="geojson")
        >>> print(f"Output saved to: {result}")
    """

    if stats != "count" and not numeric_col:
        raise ValueError(
            "A numeric_col is required for statistics other than 'count'"
        )

    # Process input data and bin
    result_gdf = a5_bin(
        data,
        resolution,
        stats,
        category_col,
        numeric_col,
        options=options,
        split_antimeridian=split_antimeridian,
        **kwargs,
    )

    # Convert to output output_format if specified
    output_name = None
    if output_format in OUTPUT_FORMATS:
        if isinstance(data, str):
            base = os.path.splitext(os.path.basename(data))[0]
            output_name = f"{base}_a5bin_{resolution}"
        else:
            output_name = f"a5bin_{resolution}"
    return convert_to_output_format(result_gdf, output_format, output_name)

`a5bin_cli()` ¶

Command-line interface for a5bin conversion.

This function provides a command-line interface for binning point data to A5 grid cells. It parses command-line arguments and calls the main a5bin function.

Usage

python a5bin.py -i input.shp -r 10 -stats count -f geojson -o output.geojson

Parameters:

Name	Description	Default
`-i, --input`	Input file path, URL, or other vector file formats	required
`-r, --resolution`	A5 resolution [0..29]	required
`-stats, --statistics`	Statistic to compute (count, min, max, sum, mean, median, std, var, range, minority, majority, variety)	required
`-category, --category`	Optional category field for grouping	required
`-numeric_col, --numeric_col`	Numeric field to compute statistics (required if stats != 'count')	required
`-split, --split_antimeridian`	Apply antimeridian fixing to the resulting polygons	required
`-o, --output`	Output file path (optional, will auto-generate if not provided)	required
`-f, --output_format`	Output output_format (geojson, gpkg, parquet, csv, shapefile)	required

Example

Bin shapefile to A5 cells at resolution 10 with count statistics¶

python a5bin.py -i cities.shp -r 10 -stats count -f geojson¶

Source code in vgrid/binning/a5bin.py

def a5bin_cli():
    """
    Command-line interface for a5bin conversion.

    This function provides a command-line interface for binning point data to A5 grid cells.
    It parses command-line arguments and calls the main a5bin function.

    Usage:
        python a5bin.py -i input.shp -r 10 -stats count -f geojson -o output.geojson

    Arguments:
        -i, --input: Input file path, URL, or other vector file formats
        -r, --resolution: A5 resolution [0..29]
        -stats, --statistics: Statistic to compute (count, min, max, sum, mean, median, std, var, range, minority, majority, variety)
        -category, --category: Optional category field for grouping
        -numeric_col, --numeric_col: Numeric field to compute statistics (required if stats != 'count')
        -split, --split_antimeridian: Apply antimeridian fixing to the resulting polygons
        -o, --output: Output file path (optional, will auto-generate if not provided)
        -f, --output_format: Output output_format (geojson, gpkg, parquet, csv, shapefile)

    Example:
        >>> # Bin shapefile to A5 cells at resolution 10 with count statistics
        >>> # python a5bin.py -i cities.shp -r 10 -stats count -f geojson
    """
    parser = argparse.ArgumentParser(description="Binning point data to A5 DGGS")
    parser.add_argument(
        "-i",
        "--input",
        type=str,
        required=True,
        help="Input data: GeoJSON file path, URL, or other vector file formats",
    )
    parser.add_argument(
        "-r",
        "--resolution",
        type=int,
        default=13,
        help="Resolution of the grid [0..29]",
    )
    parser.add_argument(
        "-stats",
        "--statistics",
        choices=STATS_OPTIONS,
        default="count",
        help="Statistic option",
    )

    parser.add_argument(
        "-category",
        "--category",
        dest="category_col",
        required=False,
        help="Optional category field for grouping",
    )
    parser.add_argument(
        "-numeric_col",
        "--numeric_col",
        dest="numeric_col",
        required=False,
        help="Numeric field to compute statistics (required if stats != 'count')",
    )
    # Removed -o/--output; output is saved in CWD with predefined name
    parser.add_argument(
        "-f",
        "--output_format",
        required=False,
        default="gpd",
        choices=OUTPUT_FORMATS,
    )
    parser.add_argument(
        "-options",
        "--options",
        type=str,
        default=None,
        help="JSON string of options to pass to a52geo. "
             "Example: '{\"segments\": 1000}'",
    )
    parser.add_argument(
        "-split",
        "--split_antimeridian",
        action="store_true",
        default=False,
        help="Apply antimeridian fixing to the resulting polygons",
    )

    args = parser.parse_args()

    # Parse options JSON if provided
    options = None
    if args.options:
        try:
            options = json.loads(args.options)
        except json.JSONDecodeError as e:
            print(f"Error: Invalid JSON in options: {str(e)}")
            return

    try:
        # Use the a5bin function
        result = a5bin(
            data=args.input,
            resolution=args.resolution,
            stats=args.statistics,
            category_col=args.category_col,
            numeric_col=args.numeric_col,
            output_format=args.output_format,
            options=options,
            split_antimeridian=args.split_antimeridian,
        )
        if args.output_format in STRUCTURED_FORMATS:
            print(result)
        # Print notification is now handled in convert_to_output_format
    except Exception as e:
        print(f"Error: {str(e)}")
        return

rHEALPix Grid Binning Module

Bins point data into rHEALPix grid cells and computes various statistics using hierarchical equal-area grid system for consistent spatial analysis.

Key Functions: - rhealpix_bin(): Core binning function with spatial joins and aggregation - rhealpixbin(): Main user-facing function with multiple input/output formats - rhealpixbin_cli(): Command-line interface for binning functionality

DGGAL Grid Binning Module

Bins point data into DGGAL (Discrete Global Grids with Adaptive Localization) cells and computes various statistics for multiple grid types including ISEA3H, ISEA9R, IVEA3H, IVEA9R, RTEA3H, RTEA9R, and rHEALPix.

Key Functions: - dggal_bin(): Core binning function with spatial joins and aggregation - dggalbin(): Main user-facing function with multiple input/output formats - dggalbin_cli(): Command-line interface for binning functionality

`dggal_bin(dggs_type, data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', split_antimeridian=False, **kwargs)` ¶

Bin point data into DGGAL grid cells and compute statistics using a single grid generation + spatial join, followed by pandas groupby aggregation.

This avoids per-point subprocess calls and is significantly faster.

Returns a GeoDataFrame with DGGAL cell stats and geometry.

Source code in vgrid/binning/dggalbin.py

def dggal_bin(
    dggs_type: str,
    data,
    resolution: int,
    stats: str = "count",
    category_col: str | None = None,
    numeric_col: str | None = None,
    lat_col: str = "lat",
    lon_col: str = "lon",
    split_antimeridian: bool = False,
    **kwargs,
):
    """
    Bin point data into DGGAL grid cells and compute statistics using a single
    grid generation + spatial join, followed by pandas groupby aggregation.

    This avoids per-point subprocess calls and is significantly faster.

    Returns a GeoDataFrame with DGGAL cell stats and geometry.
    """

    resolution = validate_dggal_resolution(dggs_type, int(resolution))

    if stats != "count" and not numeric_col:
        raise ValueError(
            "A numeric_col is required for statistics other than 'count'"
        )

    # 1) Normalize input to GeoDataFrame of points
    points_gdf = process_input_data_bin(
        data, lat_col=lat_col, lon_col=lon_col, **kwargs
    )
    # Keep only points and multipoints; ignore others
    if not points_gdf.empty:
        points_gdf = points_gdf[
            points_gdf.geometry.geom_type.isin(["Point", "MultiPoint"])
        ].copy()
        if "MultiPoint" in set(points_gdf.geometry.geom_type.unique()):
            points_gdf = points_gdf.explode(index_parts=False, ignore_index=True)

    minx, miny, maxx, maxy = points_gdf.total_bounds  # lon/lat order
    bbox = (minx, miny, maxx, maxy)
    id_col = f"dggal_{dggs_type}"
    grid_gdf = dggalgen(
        dggs_type=dggs_type,
        resolution=resolution,
        output_format="gpd",
        bbox=bbox,
        split_antimeridian=split_antimeridian,
    )
    join_cols = []
    if category_col and category_col in points_gdf.columns:
        join_cols.append(category_col)
    if stats != "count" and numeric_col:
        if numeric_col not in points_gdf.columns:
            raise ValueError(f"numeric_col '{numeric_col}' not found in input data")
        join_cols.append(numeric_col)
    left = points_gdf[[c for c in ["geometry", *join_cols] if c is not None]]
    joined = gpd.sjoin(
        left, grid_gdf[[id_col, "geometry"]], how="inner", predicate="within"
    )

    grouped = aggregate_joined(
        joined, id_col, stats=stats, category_col=category_col, numeric_col=numeric_col
    )
    grouped = grouped.reset_index()

    out = grid_gdf.merge(grouped, on=id_col, how="inner")
    if "resolution" not in out.columns:
        out["resolution"] = resolution
    result_gdf = gpd.GeoDataFrame(out, geometry="geometry", crs=grid_gdf.crs or "EPSG:4326")
    return result_gdf

`dggalbin(dggs_type, data, resolution, stats='count', category_col=None, numeric_col=None, output_format='gpd', split_antimeridian=False, **kwargs)` ¶

Bin point data into DGGAL grid cells and compute statistics from various input formats.

Source code in vgrid/binning/dggalbin.py

def dggalbin(
    dggs_type: str,
    data,
    resolution: int,
    stats: str = "count",
    category_col: str | None = None,
    numeric_col: str | None = None,
    output_format: str = "gpd",
    split_antimeridian: bool = False,
    **kwargs,
):
    """
    Bin point data into DGGAL grid cells and compute statistics from various input formats.
    """
    result_gdf = dggal_bin(
        dggs_type=dggs_type,
        data=data,
        resolution=resolution,
        stats=stats,
        category_col=category_col,
        numeric_col=numeric_col,
        split_antimeridian=split_antimeridian,
        **kwargs,
    )

    output_name = None
    if output_format in OUTPUT_FORMATS:
        if isinstance(data, str):
            base = os.path.splitext(os.path.basename(data))[0]
            output_name = f"{base}_{dggs_type}bin_{resolution}"
        else:
            output_name = f"{dggs_type}bin_{resolution}"
    return convert_to_output_format(result_gdf, output_format, output_name)

`dggalbin_cli()` ¶

Command-line interface for DGGAL binning.

Source code in vgrid/binning/dggalbin.py

def dggalbin_cli():
    """Command-line interface for DGGAL binning."""
    parser = argparse.ArgumentParser(description="Binning point data to DGGAL DGGS")
    parser.add_argument(
        "-i",
        "--input",
        type=str,
        required=True,
        help="Input data: GeoJSON file path, URL, or other vector file formats",
    )
    parser.add_argument(
        "-t",
        "--dggs_type",
        type=str,
        required=True,
        choices=DGGAL_TYPES.keys(),
        help="DGGAL type",
    )
    parser.add_argument(
        "-r",
        "--resolution",
        type=int,
        required=True,
        help="Resolution (integer)",
    )
    parser.add_argument(
        "-stats",
        "--statistics",
        choices=STATS_OPTIONS,
        default="count",
        help="Statistic option",
    )
    parser.add_argument(
        "-category",
        "--category",
        dest="category_col",
        required=False,
        help="Optional category field for grouping",
    )
    parser.add_argument(
        "-numeric_col",
        "--numeric_col",
        dest="numeric_col",
        required=False,
        help="Numeric field to compute statistics (required if stats != 'count')",
    )
    parser.add_argument(
        "-f",
        "--output_format",
        required=False,
        default="gpd",
        choices=OUTPUT_FORMATS,
    )
    parser.add_argument(
        "-split",
        "--split_antimeridian",
        action="store_true",
        default=False,
        help="Apply antimeridian fixing to the resulting polygons",
    )

    args = parser.parse_args()

    try:
        result = dggalbin(
            dggs_type=args.dggs_type,
            data=args.input,
            resolution=args.resolution,
            stats=args.statistics,
            category_col=args.category_col,
            numeric_col=args.numeric_col,
            output_format=args.output_format,
            split_antimeridian=args.split_antimeridian,
        )
        if args.output_format in STRUCTURED_FORMATS:
            print(result)
    except Exception as e:
        print(f"Error: {str(e)}")
        return

DGGRID Grid Binning Module

Bins point data into DGGRID cells and computes various statistics for DGGRID types (e.g., ISEA7H, ISEA4T, FULLER4D, IGEO7).

Key Functions: - dggrid_bin(): Core binning function with spatial joins and aggregation - dggridbin(): Main user-facing function with multiple input/output formats - dggridbin_cli(): Command-line interface for binning functionality

`dggrid_bin(dggrid_instance, dggs_type, data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', split_antimeridian=False, aggregate=False, **kwargs)` ¶

Bin point data into DGGRID cells and compute statistics.

Parameters¶

split_antimeridian : bool, optional When True, apply antimeridian fixing to the resulting polygons. aggregate : bool, optional When True (with split_antimeridian), dissolve split cell parts by global_id. Passed to dggridgen / generate_grid.

Source code in vgrid/binning/dggridbin.py

def dggrid_bin(
    dggrid_instance,
    dggs_type: str,
    data,
    resolution: int,
    stats: str = "count",
    category_col: str | None = None,
    numeric_col: str | None = None,
    lat_col: str = "lat",
    lon_col: str = "lon",
    split_antimeridian: bool = False,
    aggregate: bool = False,
    **kwargs,
):
    """
    Bin point data into DGGRID cells and compute statistics.

    Parameters
    ----------
    split_antimeridian : bool, optional
        When True, apply antimeridian fixing to the resulting polygons.
    aggregate : bool, optional
        When True (with split_antimeridian), dissolve split cell parts by
        global_id. Passed to dggridgen / generate_grid.
    """
    dggs_type = validate_dggrid_type(dggs_type)
    resolution = validate_dggrid_resolution(dggs_type, int(resolution))

    if stats != "count" and not numeric_col:
        raise ValueError(
            "A numeric_col is required for statistics other than 'count'"
        )

    points_gdf = process_input_data_bin(
        data, lat_col=lat_col, lon_col=lon_col, **kwargs
    )
    if not points_gdf.empty:
        points_gdf = points_gdf[
            points_gdf.geometry.geom_type.isin(["Point", "MultiPoint"])
        ].copy()
        if "MultiPoint" in set(points_gdf.geometry.geom_type.unique()):
            points_gdf = points_gdf.explode(index_parts=False, ignore_index=True)

    minx, miny, maxx, maxy = points_gdf.total_bounds
    bbox = (minx, miny, maxx, maxy)
    id_col = "global_id"
    grid_gdf = dggridgen(
        dggrid_instance=dggrid_instance,
        dggs_type=dggs_type,
        resolution=resolution,
        output_format="gpd",
        bbox=bbox,
        split_antimeridian=split_antimeridian,
        aggregate=aggregate,
    )
    if grid_gdf.crs is None:
        grid_gdf = grid_gdf.set_crs(points_gdf.crs)
    elif points_gdf.crs is not None and grid_gdf.crs != points_gdf.crs:
        grid_gdf = grid_gdf.to_crs(points_gdf.crs)

    join_cols = []
    if category_col and category_col in points_gdf.columns:
        join_cols.append(category_col)
    if stats != "count" and numeric_col:
        if numeric_col not in points_gdf.columns:
            raise ValueError(f"numeric_col '{numeric_col}' not found in input data")
        join_cols.append(numeric_col)
    left = points_gdf[[c for c in ["geometry", *join_cols] if c is not None]]
    joined = gpd.sjoin(
        left, grid_gdf[[id_col, "geometry"]], how="inner", predicate="within"
    )

    grouped = aggregate_joined(
        joined, id_col, stats=stats, category_col=category_col, numeric_col=numeric_col
    )
    grouped = grouped.reset_index()

    out = grid_gdf.merge(grouped, on=id_col, how="inner")
    out = out.rename(columns={id_col: f"dggrid_{dggs_type.lower()}"})
    if "resolution" not in out.columns:
        out["resolution"] = resolution
    result_gdf = gpd.GeoDataFrame(out, geometry="geometry", crs=grid_gdf.crs or "EPSG:4326")
    return result_gdf

`dggridbin(dggrid_instance, dggs_type, data, resolution, stats='count', category_col=None, numeric_col=None, output_format='gpd', split_antimeridian=False, aggregate=False, **kwargs)` ¶

Bin point data into DGGRID cells and compute statistics from various input formats.

Source code in vgrid/binning/dggridbin.py

def dggridbin(
    dggrid_instance,
    dggs_type: str,
    data,
    resolution: int,
    stats: str = "count",
    category_col: str | None = None,
    numeric_col: str | None = None,
    output_format: str = "gpd",
    split_antimeridian: bool = False,
    aggregate: bool = False,
    **kwargs,
):
    """
    Bin point data into DGGRID cells and compute statistics from various input formats.
    """
    result_gdf = dggrid_bin(
        dggrid_instance=dggrid_instance,
        dggs_type=dggs_type,
        data=data,
        resolution=resolution,
        stats=stats,
        category_col=category_col,
        numeric_col=numeric_col,
        split_antimeridian=split_antimeridian,
        aggregate=aggregate,
        **kwargs,
    )

    output_name = None
    if output_format in OUTPUT_FORMATS:
        if isinstance(data, str):
            base = os.path.splitext(os.path.basename(data))[0]
            output_name = f"{base}_{dggs_type.lower()}bin_{resolution}"
        else:
            output_name = f"{dggs_type.lower()}bin_{resolution}"
    return convert_to_output_format(result_gdf, output_format, output_name)

`dggridbin_cli()` ¶

Command-line interface for DGGRID binning.

Source code in vgrid/binning/dggridbin.py

def dggridbin_cli():
    """Command-line interface for DGGRID binning."""
    parser = argparse.ArgumentParser(description="Binning point data to DGGRID DGGS")
    parser.add_argument(
        "-i",
        "--input",
        type=str,
        required=True,
        help="Input data: GeoJSON file path, URL, or other vector file formats",
    )
    parser.add_argument(
        "-t",
        "--dggs_type",
        type=str,
        required=True,
        choices=DGGRID_TYPES.keys(),
        help="DGGRID type",
    )
    parser.add_argument(
        "-r",
        "--resolution",
        type=int,
        required=True,
        help="Resolution (integer)",
    )
    parser.add_argument(
        "-stats",
        "--statistics",
        choices=STATS_OPTIONS,
        default="count",
        help="Statistic option",
    )
    parser.add_argument(
        "-category",
        "--category",
        dest="category_col",
        required=False,
        help="Optional category field for grouping",
    )
    parser.add_argument(
        "-numeric_col",
        "--numeric_col",
        dest="numeric_col",
        required=False,
        help="Numeric field to compute statistics (required if stats != 'count')",
    )
    parser.add_argument(
        "-f",
        "--output_format",
        required=False,
        default="gpd",
        choices=OUTPUT_FORMATS,
    )
    parser.add_argument(
        "-split",
        "--split_antimeridian",
        action="store_true",
        default=False,
        help="Apply antimeridian fixing to the resulting polygons",
    )
    parser.add_argument(
        "-aggregate",
        "--aggregate",
        action="store_true",
        help="Aggregate the resulting polygons (dissolve by global_id when split_antimeridian is set)",
    )

    args = parser.parse_args()

    try:
        result = dggridbin(
            dggrid_instance=create_dggrid_instance(),
            dggs_type=args.dggs_type,
            data=args.input,
            resolution=args.resolution,
            stats=args.statistics,
            category_col=args.category_col,
            numeric_col=args.numeric_col,
            output_format=args.output_format,
            split_antimeridian=args.split_antimeridian,
            aggregate=args.aggregate,
        )
        if args.output_format in STRUCTURED_FORMATS:
            print(result)
    except Exception as e:
        print(f"Error: {str(e)}")
        return

ISEA4T Grid Binning Module

Bins point data into ISEA4T triangular grid cells and computes various statistics using hierarchical triangular grid system.

Key Functions: - isea4t_bin(): Core binning function with spatial joins and aggregation - isea4tbin(): Main user-facing function with multiple input/output formats - isea4tbin_cli(): Command-line interface for binning functionality

`isea4t_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', fix_antimeridian=None, **kwargs)` ¶

Bin point data into ISEA4T grid cells using grid generation + spatial join and aggregate with pandas groupby. Supports custom stats (range, variety, minority, majority). Only Point/MultiPoint geometries are considered.

Source code in vgrid/binning/isea4tbin.py

def isea4t_bin(
    data,
    resolution,
    stats="count",
    category_col=None,
    numeric_col=None,
    lat_col="lat",
    lon_col="lon",
    fix_antimeridian=None,
    **kwargs,
):
    """
    Bin point data into ISEA4T grid cells using grid generation + spatial join
    and aggregate with pandas groupby. Supports custom stats (range, variety,
    minority, majority). Only Point/MultiPoint geometries are considered.
    """
    resolution = validate_isea4t_resolution(resolution)
    points_gdf = process_input_data_bin(
        data, lat_col=lat_col, lon_col=lon_col, **kwargs
    )
    # Keep only points and multipoints; ignore others
    if not points_gdf.empty:
        points_gdf = points_gdf[
            points_gdf.geometry.geom_type.isin(["Point", "MultiPoint"])
        ].copy()
        if "MultiPoint" in set(points_gdf.geometry.geom_type.unique()):
            points_gdf = points_gdf.explode(index_parts=False, ignore_index=True)

    # Generate ISEA4T grid covering the points' bounding box
    minx, miny, maxx, maxy = points_gdf.total_bounds
    id_col = "isea4t"
    from vgrid.generator.isea4tgrid import isea4t_grid_within_bbox

    grid_gdf = isea4t_grid_within_bbox(
        resolution=resolution,
        bbox=(minx, miny, maxx, maxy),
        fix_antimeridian=fix_antimeridian,
    )

    # Spatial join points -> cells with only needed columns
    join_cols = []
    if category_col and category_col in points_gdf.columns:
        join_cols.append(category_col)
    if stats != "count" and numeric_col:
        if numeric_col not in points_gdf.columns:
            raise ValueError(f"numeric_col '{numeric_col}' not found in input data")
        join_cols.append(numeric_col)
    left = points_gdf[[c for c in ["geometry", *join_cols] if c is not None]]
    joined = gpd.sjoin(
        left, grid_gdf[[id_col, "geometry"]], how="inner", predicate="within"
    )

    # Aggregate
    grouped = aggregate_joined(
        joined, id_col, stats=stats, category_col=category_col, numeric_col=numeric_col
    )
    grouped = grouped.reset_index()

    # Join back to grid and return GeoDataFrame
    out = grid_gdf.merge(grouped, on=id_col, how="inner")
    if "resolution" not in out.columns:
        out["resolution"] = resolution
    result_gdf = gpd.GeoDataFrame(out, geometry="geometry", crs=grid_gdf.crs or "EPSG:4326")
    return result_gdf

EASE Grid Binning Module

Bins point data into EASE (Equal-Area Scalable Earth) grid cells and computes various statistics using an equal-area projection grid system.

Key Functions: - ease_bin(): Core binning function with spatial joins and aggregation - easebin(): Main user-facing function with multiple input/output formats - easebin_cli(): Command-line interface for binning functionality

`ease_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', **kwargs)` ¶

Bin point data into EASE grid cells and aggregate with pandas groupby. Supports custom stats (range, variety, minority, majority). Only Point/MultiPoint geometries are considered.

Points are assigned to cells via latlon2ease (same as point2ease), then aggregated and joined to cell geometries from ease2geo.

Source code in vgrid/binning/easebin.py

def ease_bin(
    data,
    resolution,
    stats="count",
    category_col=None,
    numeric_col=None,
    lat_col="lat",
    lon_col="lon",
    **kwargs,
):
    """
    Bin point data into EASE grid cells and aggregate with pandas groupby.
    Supports custom stats (range, variety, minority, majority). Only
    Point/MultiPoint geometries are considered.

    Points are assigned to cells via latlon2ease (same as point2ease), then
    aggregated and joined to cell geometries from ease2geo.
    """
    resolution = validate_ease_resolution(resolution)
    points_gdf = process_input_data_bin(
        data, lat_col=lat_col, lon_col=lon_col, **kwargs
    )
    # Keep only points and multipoints; ignore others
    if not points_gdf.empty:
        points_gdf = points_gdf[
            points_gdf.geometry.geom_type.isin(["Point", "MultiPoint"])
        ].copy()
        if "MultiPoint" in set(points_gdf.geometry.geom_type.unique()):
            points_gdf = points_gdf.explode(index_parts=False, ignore_index=True)

    id_col = "ease"
    if points_gdf.empty:
        return gpd.GeoDataFrame(columns=[id_col, "geometry"], crs="EPSG:4326")

    points_gdf = points_gdf.copy()
    points_gdf[id_col] = [
        latlon2ease(geom.y, geom.x, resolution) for geom in points_gdf.geometry
    ]

    join_cols = []
    if category_col and category_col in points_gdf.columns:
        join_cols.append(category_col)
    if stats != "count" and numeric_col:
        if numeric_col not in points_gdf.columns:
            raise ValueError(f"numeric_col '{numeric_col}' not found in input data")
        join_cols.append(numeric_col)
    joined = points_gdf[[c for c in [id_col, *join_cols] if c is not None]]

    grouped = aggregate_joined(
        joined, id_col, stats=stats, category_col=category_col, numeric_col=numeric_col
    )
    grouped = grouped.reset_index()

    ease_rows = []
    for ease_id in grouped[id_col]:
        cell_polygon = ease2geo(ease_id)
        row = geodesic_dggs_to_geoseries(
            "ease", ease_id, resolution, cell_polygon, num_edges=4
        )
        ease_rows.append(row)
    grid_gdf = gpd.GeoDataFrame(ease_rows, geometry="geometry", crs="EPSG:4326")

    out = grid_gdf.merge(grouped, on=id_col, how="inner")
    if "resolution" not in out.columns:
        out["resolution"] = resolution
    result_gdf = gpd.GeoDataFrame(out, geometry="geometry", crs=grid_gdf.crs or "EPSG:4326")
    return result_gdf

QTM Grid Binning Module

Bins point data into QTM (Quaternary Triangular Mesh) grid cells and computes various statistics using hierarchical triangular grid system.

Key Functions: - qtm_bin(): Core binning function with spatial joins and aggregation - qtmbin(): Main user-facing function with multiple input/output formats - qtmbin_cli(): Command-line interface for binning functionality

OLC Grid Binning Module

Bins point data into OLC (Open Location Code) grid cells and computes various statistics using human-readable location codes for global coverage.

Key Functions: - olc_bin(): Core binning function with spatial joins and aggregation - olcbin(): Main user-facing function with multiple input/output formats - olcbin_cli(): Command-line interface for binning functionality

`olc_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', **kwargs)` ¶

Bin point data into OLC grid cells using grid generation + spatial join and aggregate with pandas groupby. Supports custom stats (range, variety, minority, majority). Only Point/MultiPoint geometries are considered.

Source code in vgrid/binning/olcbin.py

def olc_bin(
    data,
    resolution,
    stats="count",
    category_col=None,
    numeric_col=None,
    lat_col="lat",
    lon_col="lon",
    **kwargs,
):
    """
    Bin point data into OLC grid cells using grid generation + spatial join and
    aggregate with pandas groupby. Supports custom stats (range, variety, minority,
    majority). Only Point/MultiPoint geometries are considered.
    """
    resolution = validate_olc_resolution(resolution)
    points_gdf = process_input_data_bin(
        data, lat_col=lat_col, lon_col=lon_col, **kwargs
    )
    # Keep only points and multipoints; ignore others
    if not points_gdf.empty:
        points_gdf = points_gdf[
            points_gdf.geometry.geom_type.isin(["Point", "MultiPoint"])
        ].copy()
        if "MultiPoint" in set(points_gdf.geometry.geom_type.unique()):
            points_gdf = points_gdf.explode(index_parts=False, ignore_index=True)

    # Generate OLC grid covering the points' bounding box
    minx, miny, maxx, maxy = points_gdf.total_bounds
    id_col = "olc"
    from vgrid.generator.olcgrid import olc_grid_within_bbox

    grid_gdf = olc_grid_within_bbox(
        resolution=resolution, bbox=(minx, miny, maxx, maxy)
    )

    # Spatial join points -> cells with only needed columns
    join_cols = []
    if category_col and category_col in points_gdf.columns:
        join_cols.append(category_col)
    if stats != "count" and numeric_col:
        if numeric_col not in points_gdf.columns:
            raise ValueError(f"numeric_col '{numeric_col}' not found in input data")
        join_cols.append(numeric_col)
    left = points_gdf[[c for c in ["geometry", *join_cols] if c is not None]]
    joined = gpd.sjoin(
        left, grid_gdf[[id_col, "geometry"]], how="inner", predicate="within"
    )

    # Aggregate
    grouped = aggregate_joined(
        joined, id_col, stats=stats, category_col=category_col, numeric_col=numeric_col
    )
    grouped = grouped.reset_index()

    # Join back to grid and return GeoDataFrame
    out = grid_gdf.merge(grouped, on=id_col, how="inner")
    if "resolution" not in out.columns:
        out["resolution"] = resolution
    result_gdf = gpd.GeoDataFrame(out, geometry="geometry", crs=grid_gdf.crs or "EPSG:4326")
    return result_gdf

Geohash Grid Binning Module

Bins point data into Geohash grid cells and computes various statistics using hierarchical geocoding system with alphanumeric identifiers.

Key Functions: - geohash_bin(): Core binning function with spatial joins and aggregation - geohashbin(): Main user-facing function with multiple input/output formats - geohashbin_cli(): Command-line interface for binning functionality

`geohash_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', **kwargs)` ¶

Bin point data into Geohash grid cells and compute statistics using a single grid generation + spatial join, followed by pandas groupby aggregation.

Returns a GeoDataFrame with Geohash cell stats and geometry.

Source code in vgrid/binning/geohashbin.py

def geohash_bin(
    data,
    resolution,
    stats="count",
    category_col=None,
    numeric_col=None,
    lat_col="lat",
    lon_col="lon",
    **kwargs,
):
    """
    Bin point data into Geohash grid cells and compute statistics using a single
    grid generation + spatial join, followed by pandas groupby aggregation.

    Returns a GeoDataFrame with Geohash cell stats and geometry.
    """
    resolution = validate_geohash_resolution(int(resolution))

    if stats != "count" and not numeric_col:
        raise ValueError(
            "A numeric_col is required for statistics other than 'count'"
        )

    # 1) Normalize input to GeoDataFrame of points
    points_gdf = process_input_data_bin(
        data, lat_col=lat_col, lon_col=lon_col, **kwargs
    )
    # Keep only points and multipoints; ignore others
    if not points_gdf.empty:
        points_gdf = points_gdf[
            points_gdf.geometry.geom_type.isin(["Point", "MultiPoint"])
        ].copy()
        if "MultiPoint" in set(points_gdf.geometry.geom_type.unique()):
            points_gdf = points_gdf.explode(index_parts=False, ignore_index=True)

    # 2) Generate Geohash grid covering the points' bounding box
    minx, miny, maxx, maxy = points_gdf.total_bounds  # lon/lat order
    id_col = "geohash"
    grid_gdf = geohash_grid_within_bbox(
        resolution=resolution, bbox=(minx, miny, maxx, maxy)
    )

    # 3) Spatial join points -> cells with only needed columns
    join_cols = []
    if category_col and category_col in points_gdf.columns:
        join_cols.append(category_col)
    if stats != "count" and numeric_col:
        if numeric_col not in points_gdf.columns:
            raise ValueError(f"numeric_col '{numeric_col}' not found in input data")
        join_cols.append(numeric_col)
    left = points_gdf[[c for c in ["geometry", *join_cols] if c is not None]]
    joined = gpd.sjoin(
        left, grid_gdf[[id_col, "geometry"]], how="inner", predicate="within"
    )

    # 4) Aggregate
    grouped = aggregate_joined(
        joined, id_col, stats=stats, category_col=category_col, numeric_col=numeric_col
    )
    grouped = grouped.reset_index()

    # 5) Join back to grid and return GeoDataFrame
    out = grid_gdf.merge(grouped, on=id_col, how="inner")
    if "resolution" not in out.columns:
        out["resolution"] = resolution
    result_gdf = gpd.GeoDataFrame(out, geometry="geometry", crs=grid_gdf.crs or "EPSG:4326")
    return result_gdf

GEOREF Grid Binning Module

Bins point data into GEOREF grid cells and computes statistics, using :func:~vgrid.generator.georefgrid.georef_grid over the points' bounding box and the same aggregation pattern as :mod:geohashbin.

Key Functions: - georef_bin(): Core binning with spatial join and aggregation - georefbin(): User-facing function with multiple output formats - georefbin_cli(): Command-line interface

`georef_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', **kwargs)` ¶

Bin point data into GEOREF cells and compute statistics (grid + spatial join + groupby).

Returns a GeoDataFrame with GEOREF cell stats and geometry (EPSG:4326).

Source code in vgrid/binning/georefbin.py

def georef_bin(
    data,
    resolution,
    stats="count",
    category_col=None,
    numeric_col=None,
    lat_col="lat",
    lon_col="lon",
    **kwargs,
):
    """
    Bin point data into GEOREF cells and compute statistics (grid + spatial join + groupby).

    Returns a GeoDataFrame with GEOREF cell stats and geometry (EPSG:4326).
    """
    resolution = validate_georef_resolution(int(resolution))

    if stats != "count" and not numeric_col:
        raise ValueError(
            "A numeric_col is required for statistics other than 'count'"
        )

    points_gdf = process_input_data_bin(
        data, lat_col=lat_col, lon_col=lon_col, **kwargs
    )
    if not points_gdf.empty:
        points_gdf = points_gdf[
            points_gdf.geometry.geom_type.isin(["Point", "MultiPoint"])
        ].copy()
        if "MultiPoint" in set(points_gdf.geometry.geom_type.unique()):
            points_gdf = points_gdf.explode(index_parts=False, ignore_index=True)

    minx, miny, maxx, maxy = points_gdf.total_bounds
    id_col = "georef"
    grid_gdf = georef_grid(resolution=resolution, bbox=(minx, miny, maxx, maxy))

    join_cols = []
    if category_col and category_col in points_gdf.columns:
        join_cols.append(category_col)
    if stats != "count" and numeric_col:
        if numeric_col not in points_gdf.columns:
            raise ValueError(f"numeric_col '{numeric_col}' not found in input data")
        join_cols.append(numeric_col)
    left = points_gdf[[c for c in ["geometry", *join_cols] if c is not None]]
    joined = gpd.sjoin(
        left, grid_gdf[[id_col, "geometry"]], how="inner", predicate="within"
    )

    grouped = aggregate_joined(
        joined, id_col, stats=stats, category_col=category_col, numeric_col=numeric_col
    )
    grouped = grouped.reset_index()

    out = grid_gdf.merge(grouped, on=id_col, how="inner")
    if "resolution" not in out.columns:
        out["resolution"] = resolution
    result_gdf = gpd.GeoDataFrame(out, geometry="geometry", crs=grid_gdf.crs or "EPSG:4326")
    return result_gdf

Tilecode Grid Binning Module

Bins point data into Tilecode grid cells and computes various statistics using hierarchical geospatial indexing system for efficient spatial queries.

Key Functions: - tilecode_bin(): Core binning function with spatial joins and aggregation - tilecodebin(): Main user-facing function with multiple input/output formats - tilecodebin_cli(): Command-line interface for binning functionality

Quadkey Grid Binning Module

Bins point data into Quadkey grid cells and computes various statistics using hierarchical geospatial indexing system used by mapping services.

Key Functions: - quadkey_bin(): Core binning function with spatial joins and aggregation - quadkeybin(): Main user-facing function with multiple input/output formats - quadkeybin_cli(): Command-line interface for binning functionality

Maidenhead Grid Binning Module

Bins point data into Maidenhead locator cells using :func:~vgrid.generator.maidenheadgrid.maidenhead_grid_within_bbox over the points' bounding box and the same aggregation pattern as :mod:geohashbin.

Key Functions: - maidenhead_bin(): Core binning with spatial join and aggregation - maidenheadbin(): User-facing function with multiple output formats - maidenheadbin_cli(): Command-line interface

`maidenhead_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', **kwargs)` ¶

Bin point data into Maidenhead cells and compute statistics (grid + spatial join + groupby).

Returns a GeoDataFrame with Maidenhead cell stats and geometry (EPSG:4326).

Source code in vgrid/binning/maidenheadbin.py

def maidenhead_bin(
    data,
    resolution,
    stats="count",
    category_col=None,
    numeric_col=None,
    lat_col="lat",
    lon_col="lon",
    **kwargs,
):
    """
    Bin point data into Maidenhead cells and compute statistics (grid + spatial join + groupby).

    Returns a GeoDataFrame with Maidenhead cell stats and geometry (EPSG:4326).
    """
    resolution = validate_maidenhead_resolution(int(resolution))

    if stats != "count" and not numeric_col:
        raise ValueError(
            "A numeric_col is required for statistics other than 'count'"
        )

    points_gdf = process_input_data_bin(
        data, lat_col=lat_col, lon_col=lon_col, **kwargs
    )
    if not points_gdf.empty:
        points_gdf = points_gdf[
            points_gdf.geometry.geom_type.isin(["Point", "MultiPoint"])
        ].copy()
        if "MultiPoint" in set(points_gdf.geometry.geom_type.unique()):
            points_gdf = points_gdf.explode(index_parts=False, ignore_index=True)

    minx, miny, maxx, maxy = points_gdf.total_bounds
    id_col = "maidenhead"
    grid_gdf = maidenhead_grid_within_bbox(
        resolution, bbox=(minx, miny, maxx, maxy)
    )

    join_cols = []
    if category_col and category_col in points_gdf.columns:
        join_cols.append(category_col)
    if stats != "count" and numeric_col:
        if numeric_col not in points_gdf.columns:
            raise ValueError(f"numeric_col '{numeric_col}' not found in input data")
        join_cols.append(numeric_col)
    left = points_gdf[[c for c in ["geometry", *join_cols] if c is not None]]
    joined = gpd.sjoin(
        left, grid_gdf[[id_col, "geometry"]], how="inner", predicate="within"
    )

    grouped = aggregate_joined(
        joined, id_col, stats=stats, category_col=category_col, numeric_col=numeric_col
    )
    grouped = grouped.reset_index()

    out = grid_gdf.merge(grouped, on=id_col, how="inner")
    if "resolution" not in out.columns:
        out["resolution"] = resolution
    result_gdf = gpd.GeoDataFrame(out, geometry="geometry", crs=grid_gdf.crs or "EPSG:4326")
    return result_gdf

GARS Grid Binning Module

Bins point data into GARS cells using :func:~vgrid.generator.garsgrid.gars_grid over the points' bounding box and the same aggregation pattern as :mod:geohashbin.

Key Functions: - gars_bin(): Core binning with spatial join and aggregation - garsbin(): User-facing function with multiple output formats - garsbin_cli(): Command-line interface

`gars_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', **kwargs)` ¶

Bin point data into GARS cells and compute statistics (grid + spatial join + groupby).

Returns a GeoDataFrame with GARS cell stats and geometry (EPSG:4326).

Source code in vgrid/binning/garsbin.py

def gars_bin(
    data,
    resolution,
    stats="count",
    category_col=None,
    numeric_col=None,
    lat_col="lat",
    lon_col="lon",
    **kwargs,
):
    """
    Bin point data into GARS cells and compute statistics (grid + spatial join + groupby).

    Returns a GeoDataFrame with GARS cell stats and geometry (EPSG:4326).
    """
    resolution = validate_gars_resolution(int(resolution))

    if stats != "count" and not numeric_col:
        raise ValueError(
            "A numeric_col is required for statistics other than 'count'"
        )

    points_gdf = process_input_data_bin(
        data, lat_col=lat_col, lon_col=lon_col, **kwargs
    )
    if not points_gdf.empty:
        points_gdf = points_gdf[
            points_gdf.geometry.geom_type.isin(["Point", "MultiPoint"])
        ].copy()
        if "MultiPoint" in set(points_gdf.geometry.geom_type.unique()):
            points_gdf = points_gdf.explode(index_parts=False, ignore_index=True)

    minx, miny, maxx, maxy = points_gdf.total_bounds
    id_col = "gars"
    grid_gdf = gars_grid(resolution=resolution, bbox=(minx, miny, maxx, maxy))

    join_cols = []
    if category_col and category_col in points_gdf.columns:
        join_cols.append(category_col)
    if stats != "count" and numeric_col:
        if numeric_col not in points_gdf.columns:
            raise ValueError(f"numeric_col '{numeric_col}' not found in input data")
        join_cols.append(numeric_col)
    left = points_gdf[[c for c in ["geometry", *join_cols] if c is not None]]
    joined = gpd.sjoin(
        left, grid_gdf[[id_col, "geometry"]], how="inner", predicate="within"
    )

    grouped = aggregate_joined(
        joined, id_col, stats=stats, category_col=category_col, numeric_col=numeric_col
    )
    grouped = grouped.reset_index()

    out = grid_gdf.merge(grouped, on=id_col, how="inner")
    if "resolution" not in out.columns:
        out["resolution"] = resolution
    result_gdf = gpd.GeoDataFrame(out, geometry="geometry", crs=grid_gdf.crs or "EPSG:4326")
    return result_gdf

DIGIPIN Grid Binning Module

Bins point data into DIGIPIN grid cells and computes various statistics using hierarchical geospatial indexing system for efficient spatial queries.

Key Functions: - digipin_bin(): Core binning function with spatial joins and aggregation - digipinbin(): Main user-facing function with multiple input/output formats - digipinbin_cli(): Command-line interface for binning functionality

Polygon Binning Module

Bins point data into polygon geometries and computes various statistics using pre-defined polygon features like administrative boundaries.

Key Functions: - polygon_bin(): Core binning function with spatial joins and aggregation - polygonbin(): Main user-facing function with multiple input/output formats - polygonbin_cli(): Command-line interface for binning functionality

`polygon_bin(polygon_data, point_data, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', **kwargs)` ¶

Bin points into provided polygons using spatial join + pandas groupby aggregation. No grid generation is performed; the input polygons are used directly.

Source code in vgrid/binning/polygonbin.py

def polygon_bin(
    polygon_data,
    point_data,
    stats="count",
    category_col=None,
    numeric_col=None,
    lat_col="lat",
    lon_col="lon",
    **kwargs,
):
    """
    Bin points into provided polygons using spatial join + pandas groupby aggregation.
    No grid generation is performed; the input polygons are used directly.
    """
    # Read inputs
    polygon_gdf = process_input_data_bin(
        polygon_data, lat_col=lat_col, lon_col=lon_col, **kwargs
    )
    point_gdf = process_input_data_bin(
        point_data, lat_col=lat_col, lon_col=lon_col, **kwargs
    )

    # Ensure valid polygons only
    polygon_gdf = polygon_gdf[polygon_gdf.geometry.notnull()]
    polygon_gdf = polygon_gdf[polygon_gdf.geometry.is_valid]

    # Keep Points/MultiPoints for points and explode MultiPoints
    if not point_gdf.empty:
        point_gdf = point_gdf[
            point_gdf.geometry.geom_type.isin(["Point", "MultiPoint"])
        ].copy()
        if "MultiPoint" in set(point_gdf.geometry.geom_type.unique()):
            point_gdf = point_gdf.explode(index_parts=False, ignore_index=True)

    # Create a stable polygon id for join/merge
    polygon_gdf = polygon_gdf.reset_index(drop=True).copy()
    id_col = "poly_id"
    polygon_gdf[id_col] = polygon_gdf.index

    # Select required columns from points for join and aggregation
    join_cols = []
    if category_col and category_col in point_gdf.columns:
        join_cols.append(category_col)
    if stats != "count" and numeric_col:
        if numeric_col not in point_gdf.columns:
            raise ValueError(f"numeric_col '{numeric_col}' not found in point data")
        join_cols.append(numeric_col)
    left = point_gdf[[c for c in ["geometry", *join_cols] if c is not None]]

    # Spatial join: assign each point to a polygon
    joined = gpd.sjoin(
        left, polygon_gdf[[id_col, "geometry"]], how="inner", predicate="within"
    )

    # Aggregate per polygon (and optional category)
    grouped = aggregate_joined(
        joined, id_col, stats=stats, category_col=category_col, numeric_col=numeric_col
    )
    grouped = grouped.reset_index()

    # Merge aggregates back to polygons; keep original polygon attributes
    out = polygon_gdf.merge(grouped, on=id_col, how="left")
    out = out.drop(columns=[id_col])
    result_gdf = gpd.GeoDataFrame(out, geometry="geometry", crs="EPSG:4326")
    return result_gdf

DGGS Binning

a5bin_cli() ¶

Bin shapefile to A5 cells at resolution 10 with count statistics¶

python a5bin.py -i cities.shp -r 10 -stats count -f geojson¶

dggalbin_cli() ¶

dggridbin_cli() ¶

s2bin_cli() ¶

Bin shapefile to S2 cells at resolution 10 with count statistics¶

python s2bin.py -i cities.shp -r 10 -stats count -f geojson¶

h3_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', fix_antimeridian=None, **kwargs) ¶

s2_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', fix_antimeridian=None, **kwargs) ¶

s2bin_cli() ¶

Bin shapefile to S2 cells at resolution 10 with count statistics¶

python s2bin.py -i cities.shp -r 10 -stats count -f geojson¶

a5_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', options=None, split_antimeridian=False, **kwargs) ¶

a5bin(data, resolution, stats='count', category_col=None, numeric_col=None, output_format='gpd', options=None, split_antimeridian=False, **kwargs) ¶

Bin from file with count statistics¶

Bin from GeoDataFrame with mean statistics¶

Bin from GeoJSON dict with category grouping¶

Save output as GeoJSON file¶

a5bin_cli() ¶

Bin shapefile to A5 cells at resolution 10 with count statistics¶

python a5bin.py -i cities.shp -r 10 -stats count -f geojson¶

dggal_bin(dggs_type, data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', split_antimeridian=False, **kwargs) ¶

dggalbin(dggs_type, data, resolution, stats='count', category_col=None, numeric_col=None, output_format='gpd', split_antimeridian=False, **kwargs) ¶

dggalbin_cli() ¶

dggrid_bin(dggrid_instance, dggs_type, data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', split_antimeridian=False, aggregate=False, **kwargs) ¶

Parameters¶

dggridbin(dggrid_instance, dggs_type, data, resolution, stats='count', category_col=None, numeric_col=None, output_format='gpd', split_antimeridian=False, aggregate=False, **kwargs) ¶

dggridbin_cli() ¶

isea4t_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', fix_antimeridian=None, **kwargs) ¶

ease_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', **kwargs) ¶

olc_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', **kwargs) ¶

geohash_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', **kwargs) ¶

georef_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', **kwargs) ¶

maidenhead_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', **kwargs) ¶

gars_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', **kwargs) ¶

polygon_bin(polygon_data, point_data, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', **kwargs) ¶

`a5bin_cli()` ¶

`dggalbin_cli()` ¶

`dggridbin_cli()` ¶

`s2bin_cli()` ¶

`h3_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', fix_antimeridian=None, **kwargs)` ¶

`s2_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', fix_antimeridian=None, **kwargs)` ¶

`s2bin_cli()` ¶

`a5_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', options=None, split_antimeridian=False, **kwargs)` ¶

`a5bin(data, resolution, stats='count', category_col=None, numeric_col=None, output_format='gpd', options=None, split_antimeridian=False, **kwargs)` ¶

`a5bin_cli()` ¶

`dggal_bin(dggs_type, data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', split_antimeridian=False, **kwargs)` ¶

`dggalbin(dggs_type, data, resolution, stats='count', category_col=None, numeric_col=None, output_format='gpd', split_antimeridian=False, **kwargs)` ¶

`dggalbin_cli()` ¶

`dggrid_bin(dggrid_instance, dggs_type, data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', split_antimeridian=False, aggregate=False, **kwargs)` ¶

`dggridbin(dggrid_instance, dggs_type, data, resolution, stats='count', category_col=None, numeric_col=None, output_format='gpd', split_antimeridian=False, aggregate=False, **kwargs)` ¶

`dggridbin_cli()` ¶

`isea4t_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', fix_antimeridian=None, **kwargs)` ¶

`ease_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', **kwargs)` ¶

`olc_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', **kwargs)` ¶

`geohash_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', **kwargs)` ¶

`georef_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', **kwargs)` ¶

`maidenhead_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', **kwargs)` ¶

`gars_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', **kwargs)` ¶

`polygon_bin(polygon_data, point_data, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', **kwargs)` ¶