Skip to content

DGGS Binning

Binning module for vgrid.

This module provides functions to bin and aggregate data using various discrete global grid systems (DGGS), including statistical analysis and data categorization.

a5bin_cli()

Command-line interface for a5bin conversion.

This function provides a command-line interface for binning point data to A5 grid cells. It parses command-line arguments and calls the main a5bin function.

Usage

python a5bin.py -i input.shp -r 10 -stats count -f geojson -o output.geojson

Parameters:

Name Type Description Default
-i, --input

Input file path, URL, or other vector file formats

required
-r, --resolution

A5 resolution [0..29]

required
-stats, --statistics

Statistic to compute (count, min, max, sum, mean, median, std, var, range, minority, majority, variety)

required
-category, --category

Optional category field for grouping

required
-numeric_col, --numeric_col

Numeric field to compute statistics (required if stats != 'count')

required
-split, --split_antimeridian

Apply antimeridian fixing to the resulting polygons

required
-o, --output

Output file path (optional, will auto-generate if not provided)

required
-f, --output_format

Output output_format (geojson, gpkg, parquet, csv, shapefile)

required
Example

Bin shapefile to A5 cells at resolution 10 with count statistics

python a5bin.py -i cities.shp -r 10 -stats count -f geojson

Source code in vgrid/binning/a5bin.py
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
def a5bin_cli():
    """
    Command-line interface for a5bin conversion.

    This function provides a command-line interface for binning point data to A5 grid cells.
    It parses command-line arguments and calls the main a5bin function.

    Usage:
        python a5bin.py -i input.shp -r 10 -stats count -f geojson -o output.geojson

    Arguments:
        -i, --input: Input file path, URL, or other vector file formats
        -r, --resolution: A5 resolution [0..29]
        -stats, --statistics: Statistic to compute (count, min, max, sum, mean, median, std, var, range, minority, majority, variety)
        -category, --category: Optional category field for grouping
        -numeric_col, --numeric_col: Numeric field to compute statistics (required if stats != 'count')
        -split, --split_antimeridian: Apply antimeridian fixing to the resulting polygons
        -o, --output: Output file path (optional, will auto-generate if not provided)
        -f, --output_format: Output output_format (geojson, gpkg, parquet, csv, shapefile)

    Example:
        >>> # Bin shapefile to A5 cells at resolution 10 with count statistics
        >>> # python a5bin.py -i cities.shp -r 10 -stats count -f geojson
    """
    parser = argparse.ArgumentParser(description="Binning point data to A5 DGGS")
    parser.add_argument(
        "-i",
        "--input",
        type=str,
        required=True,
        help="Input data: GeoJSON file path, URL, or other vector file formats",
    )
    parser.add_argument(
        "-r",
        "--resolution",
        type=int,
        default=13,
        help="Resolution of the grid [0..29]",
    )
    parser.add_argument(
        "-stats",
        "--statistics",
        choices=STATS_OPTIONS,
        default="count",
        help="Statistic option",
    )

    parser.add_argument(
        "-category",
        "--category",
        dest="category_col",
        required=False,
        help="Optional category field for grouping",
    )
    parser.add_argument(
        "-numeric_col",
        "--numeric_col",
        dest="numeric_col",
        required=False,
        help="Numeric field to compute statistics (required if stats != 'count')",
    )
    # Removed -o/--output; output is saved in CWD with predefined name
    parser.add_argument(
        "-f",
        "--output_format",
        required=False,
        default="gpd",
        choices=OUTPUT_FORMATS,
    )
    parser.add_argument(
        "-options",
        "--options",
        type=str,
        default=None,
        help="JSON string of options to pass to a52geo. "
             "Example: '{\"segments\": 1000}'",
    )
    parser.add_argument(
        "-split",
        "--split_antimeridian",
        action="store_true",
        default=False,
        help="Apply antimeridian fixing to the resulting polygons",
    )

    args = parser.parse_args()

    # Parse options JSON if provided
    options = None
    if args.options:
        try:
            options = json.loads(args.options)
        except json.JSONDecodeError as e:
            print(f"Error: Invalid JSON in options: {str(e)}")
            return

    try:
        # Use the a5bin function
        result = a5bin(
            data=args.input,
            resolution=args.resolution,
            stats=args.statistics,
            category_col=args.category_col,
            numeric_col=args.numeric_col,
            output_format=args.output_format,
            options=options,
            split_antimeridian=args.split_antimeridian,
        )
        if args.output_format in STRUCTURED_FORMATS:
            print(result)
        # Print notification is now handled in convert_to_output_format
    except Exception as e:
        print(f"Error: {str(e)}")
        return

dggalbin_cli()

Command-line interface for DGGAL binning.

Source code in vgrid/binning/dggalbin.py
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
def dggalbin_cli():
    """Command-line interface for DGGAL binning."""
    parser = argparse.ArgumentParser(description="Binning point data to DGGAL DGGS")
    parser.add_argument(
        "-i",
        "--input",
        type=str,
        required=True,
        help="Input data: GeoJSON file path, URL, or other vector file formats",
    )
    parser.add_argument(
        "-t",
        "--dggs_type",
        type=str,
        required=True,
        choices=DGGAL_TYPES.keys(),
        help="DGGAL type",
    )
    parser.add_argument(
        "-r",
        "--resolution",
        type=int,
        required=True,
        help="Resolution (integer)",
    )
    parser.add_argument(
        "-stats",
        "--statistics",
        choices=STATS_OPTIONS,
        default="count",
        help="Statistic option",
    )
    parser.add_argument(
        "-category",
        "--category",
        dest="category_col",
        required=False,
        help="Optional category field for grouping",
    )
    parser.add_argument(
        "-numeric_col",
        "--numeric_col",
        dest="numeric_col",
        required=False,
        help="Numeric field to compute statistics (required if stats != 'count')",
    )
    parser.add_argument(
        "-f",
        "--output_format",
        required=False,
        default="gpd",
        choices=OUTPUT_FORMATS,
    )
    parser.add_argument(
        "-split",
        "--split_antimeridian",
        action="store_true",
        default=False,
        help="Apply antimeridian fixing to the resulting polygons",
    )

    args = parser.parse_args()

    try:
        result = dggalbin(
            dggs_type=args.dggs_type,
            data=args.input,
            resolution=args.resolution,
            stats=args.statistics,
            category_col=args.category_col,
            numeric_col=args.numeric_col,
            output_format=args.output_format,
            split_antimeridian=args.split_antimeridian,
        )
        if args.output_format in STRUCTURED_FORMATS:
            print(result)
    except Exception as e:
        print(f"Error: {str(e)}")
        return

dggridbin_cli()

Command-line interface for DGGRID binning.

Source code in vgrid/binning/dggridbin.py
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
def dggridbin_cli():
    """Command-line interface for DGGRID binning."""
    parser = argparse.ArgumentParser(description="Binning point data to DGGRID DGGS")
    parser.add_argument(
        "-i",
        "--input",
        type=str,
        required=True,
        help="Input data: GeoJSON file path, URL, or other vector file formats",
    )
    parser.add_argument(
        "-t",
        "--dggs_type",
        type=str,
        required=True,
        choices=DGGRID_TYPES.keys(),
        help="DGGRID type",
    )
    parser.add_argument(
        "-r",
        "--resolution",
        type=int,
        required=True,
        help="Resolution (integer)",
    )
    parser.add_argument(
        "-stats",
        "--statistics",
        choices=STATS_OPTIONS,
        default="count",
        help="Statistic option",
    )
    parser.add_argument(
        "-category",
        "--category",
        dest="category_col",
        required=False,
        help="Optional category field for grouping",
    )
    parser.add_argument(
        "-numeric_col",
        "--numeric_col",
        dest="numeric_col",
        required=False,
        help="Numeric field to compute statistics (required if stats != 'count')",
    )
    parser.add_argument(
        "-f",
        "--output_format",
        required=False,
        default="gpd",
        choices=OUTPUT_FORMATS,
    )
    parser.add_argument(
        "-split",
        "--split_antimeridian",
        action="store_true",
        default=False,
        help="Apply antimeridian fixing to the resulting polygons",
    )
    parser.add_argument(
        "-aggregate",
        "--aggregate",
        action="store_true",
        help="Aggregate the resulting polygons (dissolve by global_id when split_antimeridian is set)",
    )

    args = parser.parse_args()

    try:
        result = dggridbin(
            dggrid_instance=create_dggrid_instance(),
            dggs_type=args.dggs_type,
            data=args.input,
            resolution=args.resolution,
            stats=args.statistics,
            category_col=args.category_col,
            numeric_col=args.numeric_col,
            output_format=args.output_format,
            split_antimeridian=args.split_antimeridian,
            aggregate=args.aggregate,
        )
        if args.output_format in STRUCTURED_FORMATS:
            print(result)
    except Exception as e:
        print(f"Error: {str(e)}")
        return

s2bin_cli()

Command-line interface for s2bin conversion.

This function provides a command-line interface for binning point data to S2 grid cells. It parses command-line arguments and calls the main s2bin function.

Usage

python s2bin.py -i input.shp -r 10 -stats count -f geojson

CLI Arguments

-i, --input: Input file path, URL, or other vector file formats -r, --resolution: S2 resolution [0..30] -stats, --statistics: Statistic to compute (count, min, max, sum, mean, median, std, var, range, minority, majority, variety) -category, --category: Optional category field for grouping -numeric_col, --numeric_col: Numeric field to compute statistics (required if stats != 'count') -f, --output_format: Output format (geojson, gpkg, parquet, csv, shapefile)

Example

Bin shapefile to S2 cells at resolution 10 with count statistics

python s2bin.py -i cities.shp -r 10 -stats count -f geojson

Source code in vgrid/binning/s2bin.py
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
def s2bin_cli():
    """
    Command-line interface for s2bin conversion.

    This function provides a command-line interface for binning point data to S2 grid cells.
    It parses command-line arguments and calls the main s2bin function.

    Usage:
            python s2bin.py -i input.shp -r 10 -stats count -f geojson

    CLI Arguments:
            -i, --input: Input file path, URL, or other vector file formats
            -r, --resolution: S2 resolution [0..30]
            -stats, --statistics: Statistic to compute (count, min, max, sum, mean, median, std, var, range, minority, majority, variety)
            -category, --category: Optional category field for grouping
            -numeric_col, --numeric_col: Numeric field to compute statistics (required if stats != 'count')
            -f, --output_format: Output format (geojson, gpkg, parquet, csv, shapefile)

    Example:
            >>> # Bin shapefile to S2 cells at resolution 10 with count statistics
            >>> # python s2bin.py -i cities.shp -r 10 -stats count -f geojson
    """
    parser = argparse.ArgumentParser(description="Binning point data to S2 DGGS")
    parser.add_argument(
        "-i",
        "--input",
        type=str,
        required=True,
        help="Input data: GeoJSON file path, URL, or other vector file formats",
    )
    parser.add_argument(
        "-r",
        "--resolution",
        type=int,
        default=13,
        help="Resolution of the grid [0..30]",
    )
    parser.add_argument(
        "-stats",
        "--statistics",
        choices=STATS_OPTIONS,
        default="count",
        help="Statistic option",
    )

    parser.add_argument(
        "-category",
        "--category",
        dest="category_col",
        required=False,
        help="Optional category field for grouping",
    )
    parser.add_argument(
        "-numeric_col",
        "--numeric_col",
        dest="numeric_col",
        required=False,
        help="Numeric field to compute statistics (required if stats != 'count')",
    )
    # Removed -o/--output; output is saved in CWD with predefined name
    parser.add_argument(
        "-f",
        "--output_format",
        required=False,
        default="gpd",
        choices=OUTPUT_FORMATS,
    )
    parser.add_argument(
        "-fix",
        "--fix_antimeridian",
        type=str,
        choices=FIX_ANTIMERIDIAN_CHOICES,
        default=None,
        help="Antimeridian fixing method: shift, shift_balanced, shift_west, shift_east, split, none",
    )

    args = parser.parse_args()

    try:
        # Use the s2bin function
        result = s2bin(
            data=args.input,
            resolution=args.resolution,
            stats=args.statistics,
            category_col=args.category_col,
            numeric_col=args.numeric_col,
            output_format=args.output_format,
            fix_antimeridian=args.fix_antimeridian,
        )
        if args.output_format in STRUCTURED_FORMATS:
            print(result)
    except Exception as e:
        print(f"Error: {str(e)}")
        return

H3 Grid Binning Module

Bins point data into H3 hexagonal grid cells and computes various statistics using Uber's hierarchical grid system.

Key Functions: - h3_bin(): Core binning function with spatial joins and aggregation - h3bin(): Main user-facing function with multiple input/output formats - h3bin_cli(): Command-line interface for binning functionality

h3_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', fix_antimeridian=None, **kwargs)

Binning via H3 grid generation within points' bbox + spatial join, then pandas groupby. Supports custom stats (range, variety, minority, majority). Non-point geometries are ignored.

When category_col is set, output columns are named {category_value}_{numeric_col}_{stats} (e.g. Forest_population_mean).

Source code in vgrid/binning/h3bin.py
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
def h3_bin(
    data,
    resolution,
    stats="count",
    category_col=None,
    numeric_col=None,
    lat_col="lat",
    lon_col="lon",
    fix_antimeridian=None,
    **kwargs,
):
    """
    Binning via H3 grid generation within points' bbox + spatial join, then pandas groupby.
    Supports custom stats (range, variety, minority, majority). Non-point geometries are ignored.

    When ``category_col`` is set, output columns are named
    ``{category_value}_{numeric_col}_{stats}`` (e.g. ``Forest_population_mean``).
    """
    resolution = validate_h3_resolution(resolution)
    points_gdf = process_input_data_bin(
        data, lat_col=lat_col, lon_col=lon_col, **kwargs
    )

    # Keep only points and multipoints; ignore others
    if not points_gdf.empty:
        points_gdf = points_gdf[
            points_gdf.geometry.geom_type.isin(["Point", "MultiPoint"])
        ].copy()
        if "MultiPoint" in set(points_gdf.geometry.geom_type.unique()):
            points_gdf = points_gdf.explode(index_parts=False, ignore_index=True)

    # Generate H3 grid covering the points' bounding box
    minx, miny, maxx, maxy = points_gdf.total_bounds
    id_col = "h3"
    grid_gdf = h3_grid_within_bbox(
        resolution=resolution,
        bbox=(minx, miny, maxx, maxy),
        fix_antimeridian=fix_antimeridian,
    )

    # Spatial join points -> cells with only needed columns
    join_cols = []
    if category_col and category_col in points_gdf.columns:
        join_cols.append(category_col)
    if stats != "count" and numeric_col:
        if numeric_col not in points_gdf.columns:
            raise ValueError(f"numeric_col '{numeric_col}' not found in input data")
        join_cols.append(numeric_col)
    left = points_gdf[[c for c in ["geometry", *join_cols] if c is not None]]
    joined = gpd.sjoin(
        left, grid_gdf[[id_col, "geometry"]], how="inner", predicate="within"
    )

    grouped = aggregate_joined(
        joined, id_col, stats=stats, category_col=category_col, numeric_col=numeric_col
    )
    grouped = grouped.reset_index()
    # Join back to grid to get geometries
    out = grid_gdf.merge(grouped, on=id_col, how="inner")
    if "resolution" not in out.columns:
        out["resolution"] = resolution
    return gpd.GeoDataFrame(out, geometry="geometry", crs=grid_gdf.crs or "EPSG:4326")

S2 Grid Binning Module

Bins point data into S2 spherical grid cells and computes various statistics using Google's hierarchical grid system.

Key Functions: - s2_bin(): Core binning function with spatial joins and aggregation - s2bin(): Main user-facing function with multiple input/output formats - s2bin_cli(): Command-line interface for binning functionality

s2_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', fix_antimeridian=None, **kwargs)

Grid + spatial join + groupby approach for S2 binning (like a5bin).

Source code in vgrid/binning/s2bin.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
def s2_bin(
    data,
    resolution,
    stats="count",
    category_col=None,
    numeric_col=None,
    lat_col="lat",
    lon_col="lon",
    fix_antimeridian=None,
    **kwargs,
):
    """
    Grid + spatial join + groupby approach for S2 binning (like a5bin).
    """
    resolution = validate_s2_resolution(resolution)
    points_gdf = process_input_data_bin(
        data, lat_col=lat_col, lon_col=lon_col, **kwargs
    )
    # Keep only points and multipoints; ignore others
    if not points_gdf.empty:
        points_gdf = points_gdf[
            points_gdf.geometry.geom_type.isin(["Point", "MultiPoint"])
        ].copy()
        if "MultiPoint" in set(points_gdf.geometry.geom_type.unique()):
            points_gdf = points_gdf.explode(index_parts=False, ignore_index=True)

    # Generate S2 grid covering the points' bounding box
    minx, miny, maxx, maxy = points_gdf.total_bounds
    id_col = "s2"
    from vgrid.generator.s2grid import s2_grid

    grid_gdf = s2_grid(
        resolution=resolution,
        bbox=(minx, miny, maxx, maxy),
        fix_antimeridian=fix_antimeridian,
    )

    # Spatial join points -> cells with only needed columns
    join_cols = []
    if category_col and category_col in points_gdf.columns:
        join_cols.append(category_col)
    if stats != "count" and numeric_col:
        if numeric_col not in points_gdf.columns:
            raise ValueError(f"numeric_col '{numeric_col}' not found in input data")
        join_cols.append(numeric_col)
    left = points_gdf[[c for c in ["geometry", *join_cols] if c is not None]]
    joined = gpd.sjoin(
        left, grid_gdf[[id_col, "geometry"]], how="inner", predicate="within"
    )

    # Aggregate
    grouped = aggregate_joined(
        joined, id_col, stats=stats, category_col=category_col, numeric_col=numeric_col
    )
    grouped = grouped.reset_index()

    # Join back to grid and return GeoDataFrame
    out = grid_gdf.merge(grouped, on=id_col, how="inner")
    if "resolution" not in out.columns:
        out["resolution"] = resolution
    result_gdf = gpd.GeoDataFrame(out, geometry="geometry", crs=grid_gdf.crs or "EPSG:4326")
    return result_gdf

s2bin_cli()

Command-line interface for s2bin conversion.

This function provides a command-line interface for binning point data to S2 grid cells. It parses command-line arguments and calls the main s2bin function.

Usage

python s2bin.py -i input.shp -r 10 -stats count -f geojson

CLI Arguments

-i, --input: Input file path, URL, or other vector file formats -r, --resolution: S2 resolution [0..30] -stats, --statistics: Statistic to compute (count, min, max, sum, mean, median, std, var, range, minority, majority, variety) -category, --category: Optional category field for grouping -numeric_col, --numeric_col: Numeric field to compute statistics (required if stats != 'count') -f, --output_format: Output format (geojson, gpkg, parquet, csv, shapefile)

Example

Bin shapefile to S2 cells at resolution 10 with count statistics

python s2bin.py -i cities.shp -r 10 -stats count -f geojson

Source code in vgrid/binning/s2bin.py
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
def s2bin_cli():
    """
    Command-line interface for s2bin conversion.

    This function provides a command-line interface for binning point data to S2 grid cells.
    It parses command-line arguments and calls the main s2bin function.

    Usage:
            python s2bin.py -i input.shp -r 10 -stats count -f geojson

    CLI Arguments:
            -i, --input: Input file path, URL, or other vector file formats
            -r, --resolution: S2 resolution [0..30]
            -stats, --statistics: Statistic to compute (count, min, max, sum, mean, median, std, var, range, minority, majority, variety)
            -category, --category: Optional category field for grouping
            -numeric_col, --numeric_col: Numeric field to compute statistics (required if stats != 'count')
            -f, --output_format: Output format (geojson, gpkg, parquet, csv, shapefile)

    Example:
            >>> # Bin shapefile to S2 cells at resolution 10 with count statistics
            >>> # python s2bin.py -i cities.shp -r 10 -stats count -f geojson
    """
    parser = argparse.ArgumentParser(description="Binning point data to S2 DGGS")
    parser.add_argument(
        "-i",
        "--input",
        type=str,
        required=True,
        help="Input data: GeoJSON file path, URL, or other vector file formats",
    )
    parser.add_argument(
        "-r",
        "--resolution",
        type=int,
        default=13,
        help="Resolution of the grid [0..30]",
    )
    parser.add_argument(
        "-stats",
        "--statistics",
        choices=STATS_OPTIONS,
        default="count",
        help="Statistic option",
    )

    parser.add_argument(
        "-category",
        "--category",
        dest="category_col",
        required=False,
        help="Optional category field for grouping",
    )
    parser.add_argument(
        "-numeric_col",
        "--numeric_col",
        dest="numeric_col",
        required=False,
        help="Numeric field to compute statistics (required if stats != 'count')",
    )
    # Removed -o/--output; output is saved in CWD with predefined name
    parser.add_argument(
        "-f",
        "--output_format",
        required=False,
        default="gpd",
        choices=OUTPUT_FORMATS,
    )
    parser.add_argument(
        "-fix",
        "--fix_antimeridian",
        type=str,
        choices=FIX_ANTIMERIDIAN_CHOICES,
        default=None,
        help="Antimeridian fixing method: shift, shift_balanced, shift_west, shift_east, split, none",
    )

    args = parser.parse_args()

    try:
        # Use the s2bin function
        result = s2bin(
            data=args.input,
            resolution=args.resolution,
            stats=args.statistics,
            category_col=args.category_col,
            numeric_col=args.numeric_col,
            output_format=args.output_format,
            fix_antimeridian=args.fix_antimeridian,
        )
        if args.output_format in STRUCTURED_FORMATS:
            print(result)
    except Exception as e:
        print(f"Error: {str(e)}")
        return

A5 Grid Binning Module

Bins point data into A5 (Adaptive 5) grid cells and computes various statistics using hierarchical geospatial indexing.

Key Functions: - a5_bin(): Core binning function with spatial joins and aggregation - a5bin(): Main user-facing function with multiple input/output formats - a5bin_cli(): Command-line interface for binning functionality

a5_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', options=None, split_antimeridian=False, **kwargs)

Bin point data into A5 grid cells and compute statistics using a single grid generation + spatial join, followed by pandas groupby aggregation.

Returns a GeoDataFrame with A5 cell stats and geometry. options : dict, optional Options for a52geo. split_antimeridian : bool, optional When True, apply antimeridian fixing to the resulting polygons.

Source code in vgrid/binning/a5bin.py
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
def a5_bin(
    data,
    resolution,
    stats="count",
    category_col=None,
    numeric_col=None,
    lat_col="lat",
    lon_col="lon",
    options=None,
    split_antimeridian=False,
    **kwargs,
):
    """
    Bin point data into A5 grid cells and compute statistics using a single
    grid generation + spatial join, followed by pandas groupby aggregation.

    Returns a GeoDataFrame with A5 cell stats and geometry.
    options : dict, optional
        Options for a52geo.
    split_antimeridian : bool, optional
        When True, apply antimeridian fixing to the resulting polygons.
    """
    resolution = validate_a5_resolution(int(resolution))

    if stats != "count" and not numeric_col:
        raise ValueError(
            "A numeric_col is required for statistics other than 'count'"
        )

    # 1) Normalize input to GeoDataFrame of points
    points_gdf = process_input_data_bin(
        data, lat_col=lat_col, lon_col=lon_col, **kwargs
    )
    # Keep only points and multipoints; ignore others
    if not points_gdf.empty:
        points_gdf = points_gdf[
            points_gdf.geometry.geom_type.isin(["Point", "MultiPoint"])
        ].copy()
        if "MultiPoint" in set(points_gdf.geometry.geom_type.unique()):
            points_gdf = points_gdf.explode(index_parts=False, ignore_index=True)

    # 2) Generate A5 grid covering the points' bounding box
    minx, miny, maxx, maxy = points_gdf.total_bounds  # lon/lat order
    id_col = "a5"
    grid_gdf = a5_grid(
        resolution=resolution,
        bbox=(minx, miny, maxx, maxy),
        options=options,
        split_antimeridian=split_antimeridian,
    )

    # 3) Spatial join points -> cells with only needed columns
    join_cols = []
    if category_col and category_col in points_gdf.columns:
        join_cols.append(category_col)
    if stats != "count" and numeric_col:
        if numeric_col not in points_gdf.columns:
            raise ValueError(f"numeric_col '{numeric_col}' not found in input data")
        join_cols.append(numeric_col)
    left = points_gdf[[c for c in ["geometry", *join_cols] if c is not None]]
    joined = gpd.sjoin(
        left, grid_gdf[[id_col, "geometry"]], how="inner", predicate="within"
    )

    # 4) Aggregate
    grouped = aggregate_joined(
        joined, id_col, stats=stats, category_col=category_col, numeric_col=numeric_col
    )
    grouped = grouped.reset_index()

    # 5) Join stats back to grid (keep per-cell metrics from a5_grid / geodesic_dggs_metrics)
    out = grid_gdf.merge(grouped, on=id_col, how="inner")
    if "resolution" not in out.columns:
        out["resolution"] = resolution
    result_gdf = gpd.GeoDataFrame(out, geometry="geometry", crs=grid_gdf.crs or "EPSG:4326")
    return result_gdf

a5bin(data, resolution, stats='count', category_col=None, numeric_col=None, output_format='gpd', options=None, split_antimeridian=False, **kwargs)

Bin point data into A5 grid cells and compute statistics from various input formats.

This is the main function that handles binning of point data to A5 grid cells. It supports multiple input formats including file paths, URLs, DataFrames, GeoDataFrames, GeoJSON dictionaries, and lists of features.

Parameters:

Name Type Description Default
data

Input data in one of the following formats: - File path (str): Path to vector file (shapefile, GeoJSON, etc.) - URL (str): URL to vector data - pandas.DataFrame: DataFrame with lat/lon columns - geopandas.GeoDataFrame: GeoDataFrame with point geometries - dict: GeoJSON dictionary - list: List of GeoJSON feature dictionaries

required
resolution int

A5 resolution level [0..29] (0=coarsest, 29=finest)

required
stats str

Statistic to compute: - 'count': Count of points in each cell - 'sum': Sum of field values - 'min': Minimum field value - 'max': Maximum field value - 'mean': Mean field value - 'median': Median field value - 'std': Standard deviation of field values - 'var': Variance of field values - 'range': Range of field values - 'minority': Least frequent value - 'majority': Most frequent value - 'variety': Number of unique values

'count'
category_col str

Category column for grouping statistics. When provided, statistics are computed separately for each category value.

None
numeric_col str

Numeric field to compute statistics (required if stats != 'count')

None
output_format str

Output format. Options include: - 'gpd', 'geopandas', 'gdf', 'geodataframe': Return GeoDataFrame - 'geojson_dict', 'json_dict': Return GeoJSON dictionary - 'geojson', 'json': Save as GeoJSON file or return string - 'csv': Save as CSV file or return string - 'shp', 'shapefile': Save as shapefile - 'gpkg', 'geopackage': Save as GeoPackage - 'parquet', 'geoparquet': Save as Parquet file - None: Return list of dictionaries

'gpd'
options

dict, optional Options for a52geo.

required
split_antimeridian

bool, optional When True, apply antimeridian fixing to the resulting polygons.

required
**kwargs

Additional arguments passed to geopandas read functions (e.g., lat_col, lon_col)

{}

Returns:

Type Description

Various types depending on output_format:

  • GeoDataFrame: When output_format is 'gpd', 'geopandas', 'gdf', 'geodataframe'
  • dict: When output_format is 'geojson_dict', 'json_dict', or None
  • str: When output_format is 'geojson', 'json', or 'csv' (returns data as string)
  • str: File path when output_format is a file-based format (geojson, csv, shp, gpkg, parquet)

Raises:

Type Description
ValueError

If input data type is not supported, conversion fails, or required parameters are missing

TypeError

If resolution is not an integer

Example

Bin from file with count statistics

result = a5bin("cities.shp", 10, "count")

Bin from GeoDataFrame with mean statistics

import geopandas as gpd gdf = gpd.read_file("cities.shp") result = a5bin(gdf, 10, "mean", numeric_col="population")

Bin from GeoJSON dict with category grouping

geojson = {"type": "FeatureCollection", "features": [...]} result = a5bin(geojson, 10, "sum", numeric_col="value", category_col="type")

Save output as GeoJSON file

result = a5bin("points.csv", 8, "count", output_format="geojson") print(f"Output saved to: {result}")

Source code in vgrid/binning/a5bin.py
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
def a5bin(
    data,
    resolution,
    stats="count",
    category_col=None,
    numeric_col=None,
    output_format="gpd",
    options=None,
    split_antimeridian=False,
    **kwargs,
):
    """
    Bin point data into A5 grid cells and compute statistics from various input formats.

    This is the main function that handles binning of point data to A5 grid cells.
    It supports multiple input formats including file paths, URLs, DataFrames, GeoDataFrames,
    GeoJSON dictionaries, and lists of features.

    Args:
        data: Input data in one of the following formats:
            - File path (str): Path to vector file (shapefile, GeoJSON, etc.)
            - URL (str): URL to vector data
            - pandas.DataFrame: DataFrame with lat/lon columns
            - geopandas.GeoDataFrame: GeoDataFrame with point geometries
            - dict: GeoJSON dictionary
            - list: List of GeoJSON feature dictionaries
        resolution (int): A5 resolution level [0..29] (0=coarsest, 29=finest)
        stats (str): Statistic to compute:
            - 'count': Count of points in each cell
            - 'sum': Sum of field values
            - 'min': Minimum field value
            - 'max': Maximum field value
            - 'mean': Mean field value
            - 'median': Median field value
            - 'std': Standard deviation of field values
            - 'var': Variance of field values
            - 'range': Range of field values
            - 'minority': Least frequent value
            - 'majority': Most frequent value
            - 'variety': Number of unique values
        category_col (str, optional): Category column for grouping statistics. When provided,
            statistics are computed separately for each category value.
        numeric_col (str, optional): Numeric field to compute statistics (required if stats != 'count')
        output_format (str, optional): Output format. Options include:
            - 'gpd', 'geopandas', 'gdf', 'geodataframe': Return GeoDataFrame
            - 'geojson_dict', 'json_dict': Return GeoJSON dictionary
            - 'geojson', 'json': Save as GeoJSON file or return string
            - 'csv': Save as CSV file or return string
            - 'shp', 'shapefile': Save as shapefile
            - 'gpkg', 'geopackage': Save as GeoPackage
            - 'parquet', 'geoparquet': Save as Parquet file
            - None: Return list of dictionaries
        options : dict, optional
            Options for a52geo.
        split_antimeridian : bool, optional
            When True, apply antimeridian fixing to the resulting polygons.
        **kwargs: Additional arguments passed to geopandas read functions (e.g., lat_col, lon_col)

    Returns:
        Various types depending on output_format:
        - GeoDataFrame: When output_format is 'gpd', 'geopandas', 'gdf', 'geodataframe'
        - dict: When output_format is 'geojson_dict', 'json_dict', or None
        - str: When output_format is 'geojson', 'json', or 'csv' (returns data as string)
        - str: File path when output_format is a file-based format (geojson, csv, shp, gpkg, parquet)

    Raises:
        ValueError: If input data type is not supported, conversion fails, or required parameters are missing
        TypeError: If resolution is not an integer

    Example:
        >>> # Bin from file with count statistics
        >>> result = a5bin("cities.shp", 10, "count")

        >>> # Bin from GeoDataFrame with mean statistics
        >>> import geopandas as gpd
        >>> gdf = gpd.read_file("cities.shp")
        >>> result = a5bin(gdf, 10, "mean", numeric_col="population")

        >>> # Bin from GeoJSON dict with category grouping
        >>> geojson = {"type": "FeatureCollection", "features": [...]}
        >>> result = a5bin(geojson, 10, "sum", numeric_col="value", category_col="type")

        >>> # Save output as GeoJSON file
        >>> result = a5bin("points.csv", 8, "count", output_format="geojson")
        >>> print(f"Output saved to: {result}")
    """

    if stats != "count" and not numeric_col:
        raise ValueError(
            "A numeric_col is required for statistics other than 'count'"
        )

    # Process input data and bin
    result_gdf = a5_bin(
        data,
        resolution,
        stats,
        category_col,
        numeric_col,
        options=options,
        split_antimeridian=split_antimeridian,
        **kwargs,
    )

    # Convert to output output_format if specified
    output_name = None
    if output_format in OUTPUT_FORMATS:
        if isinstance(data, str):
            base = os.path.splitext(os.path.basename(data))[0]
            output_name = f"{base}_a5bin_{resolution}"
        else:
            output_name = f"a5bin_{resolution}"
    return convert_to_output_format(result_gdf, output_format, output_name)

a5bin_cli()

Command-line interface for a5bin conversion.

This function provides a command-line interface for binning point data to A5 grid cells. It parses command-line arguments and calls the main a5bin function.

Usage

python a5bin.py -i input.shp -r 10 -stats count -f geojson -o output.geojson

Parameters:

Name Type Description Default
-i, --input

Input file path, URL, or other vector file formats

required
-r, --resolution

A5 resolution [0..29]

required
-stats, --statistics

Statistic to compute (count, min, max, sum, mean, median, std, var, range, minority, majority, variety)

required
-category, --category

Optional category field for grouping

required
-numeric_col, --numeric_col

Numeric field to compute statistics (required if stats != 'count')

required
-split, --split_antimeridian

Apply antimeridian fixing to the resulting polygons

required
-o, --output

Output file path (optional, will auto-generate if not provided)

required
-f, --output_format

Output output_format (geojson, gpkg, parquet, csv, shapefile)

required
Example

Bin shapefile to A5 cells at resolution 10 with count statistics

python a5bin.py -i cities.shp -r 10 -stats count -f geojson

Source code in vgrid/binning/a5bin.py
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
def a5bin_cli():
    """
    Command-line interface for a5bin conversion.

    This function provides a command-line interface for binning point data to A5 grid cells.
    It parses command-line arguments and calls the main a5bin function.

    Usage:
        python a5bin.py -i input.shp -r 10 -stats count -f geojson -o output.geojson

    Arguments:
        -i, --input: Input file path, URL, or other vector file formats
        -r, --resolution: A5 resolution [0..29]
        -stats, --statistics: Statistic to compute (count, min, max, sum, mean, median, std, var, range, minority, majority, variety)
        -category, --category: Optional category field for grouping
        -numeric_col, --numeric_col: Numeric field to compute statistics (required if stats != 'count')
        -split, --split_antimeridian: Apply antimeridian fixing to the resulting polygons
        -o, --output: Output file path (optional, will auto-generate if not provided)
        -f, --output_format: Output output_format (geojson, gpkg, parquet, csv, shapefile)

    Example:
        >>> # Bin shapefile to A5 cells at resolution 10 with count statistics
        >>> # python a5bin.py -i cities.shp -r 10 -stats count -f geojson
    """
    parser = argparse.ArgumentParser(description="Binning point data to A5 DGGS")
    parser.add_argument(
        "-i",
        "--input",
        type=str,
        required=True,
        help="Input data: GeoJSON file path, URL, or other vector file formats",
    )
    parser.add_argument(
        "-r",
        "--resolution",
        type=int,
        default=13,
        help="Resolution of the grid [0..29]",
    )
    parser.add_argument(
        "-stats",
        "--statistics",
        choices=STATS_OPTIONS,
        default="count",
        help="Statistic option",
    )

    parser.add_argument(
        "-category",
        "--category",
        dest="category_col",
        required=False,
        help="Optional category field for grouping",
    )
    parser.add_argument(
        "-numeric_col",
        "--numeric_col",
        dest="numeric_col",
        required=False,
        help="Numeric field to compute statistics (required if stats != 'count')",
    )
    # Removed -o/--output; output is saved in CWD with predefined name
    parser.add_argument(
        "-f",
        "--output_format",
        required=False,
        default="gpd",
        choices=OUTPUT_FORMATS,
    )
    parser.add_argument(
        "-options",
        "--options",
        type=str,
        default=None,
        help="JSON string of options to pass to a52geo. "
             "Example: '{\"segments\": 1000}'",
    )
    parser.add_argument(
        "-split",
        "--split_antimeridian",
        action="store_true",
        default=False,
        help="Apply antimeridian fixing to the resulting polygons",
    )

    args = parser.parse_args()

    # Parse options JSON if provided
    options = None
    if args.options:
        try:
            options = json.loads(args.options)
        except json.JSONDecodeError as e:
            print(f"Error: Invalid JSON in options: {str(e)}")
            return

    try:
        # Use the a5bin function
        result = a5bin(
            data=args.input,
            resolution=args.resolution,
            stats=args.statistics,
            category_col=args.category_col,
            numeric_col=args.numeric_col,
            output_format=args.output_format,
            options=options,
            split_antimeridian=args.split_antimeridian,
        )
        if args.output_format in STRUCTURED_FORMATS:
            print(result)
        # Print notification is now handled in convert_to_output_format
    except Exception as e:
        print(f"Error: {str(e)}")
        return

rHEALPix Grid Binning Module

Bins point data into rHEALPix grid cells and computes various statistics using hierarchical equal-area grid system for consistent spatial analysis.

Key Functions: - rhealpix_bin(): Core binning function with spatial joins and aggregation - rhealpixbin(): Main user-facing function with multiple input/output formats - rhealpixbin_cli(): Command-line interface for binning functionality

DGGAL Grid Binning Module

Bins point data into DGGAL (Discrete Global Grids with Adaptive Localization) cells and computes various statistics for multiple grid types including ISEA3H, ISEA9R, IVEA3H, IVEA9R, RTEA3H, RTEA9R, and rHEALPix.

Key Functions: - dggal_bin(): Core binning function with spatial joins and aggregation - dggalbin(): Main user-facing function with multiple input/output formats - dggalbin_cli(): Command-line interface for binning functionality

dggal_bin(dggs_type, data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', split_antimeridian=False, **kwargs)

Bin point data into DGGAL grid cells and compute statistics using a single grid generation + spatial join, followed by pandas groupby aggregation.

This avoids per-point subprocess calls and is significantly faster.

Returns a GeoDataFrame with DGGAL cell stats and geometry.

Source code in vgrid/binning/dggalbin.py
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
def dggal_bin(
    dggs_type: str,
    data,
    resolution: int,
    stats: str = "count",
    category_col: str | None = None,
    numeric_col: str | None = None,
    lat_col: str = "lat",
    lon_col: str = "lon",
    split_antimeridian: bool = False,
    **kwargs,
):
    """
    Bin point data into DGGAL grid cells and compute statistics using a single
    grid generation + spatial join, followed by pandas groupby aggregation.

    This avoids per-point subprocess calls and is significantly faster.

    Returns a GeoDataFrame with DGGAL cell stats and geometry.
    """

    resolution = validate_dggal_resolution(dggs_type, int(resolution))

    if stats != "count" and not numeric_col:
        raise ValueError(
            "A numeric_col is required for statistics other than 'count'"
        )

    # 1) Normalize input to GeoDataFrame of points
    points_gdf = process_input_data_bin(
        data, lat_col=lat_col, lon_col=lon_col, **kwargs
    )
    # Keep only points and multipoints; ignore others
    if not points_gdf.empty:
        points_gdf = points_gdf[
            points_gdf.geometry.geom_type.isin(["Point", "MultiPoint"])
        ].copy()
        if "MultiPoint" in set(points_gdf.geometry.geom_type.unique()):
            points_gdf = points_gdf.explode(index_parts=False, ignore_index=True)

    minx, miny, maxx, maxy = points_gdf.total_bounds  # lon/lat order
    bbox = (minx, miny, maxx, maxy)
    id_col = f"dggal_{dggs_type}"
    grid_gdf = dggalgen(
        dggs_type=dggs_type,
        resolution=resolution,
        output_format="gpd",
        bbox=bbox,
        split_antimeridian=split_antimeridian,
    )
    join_cols = []
    if category_col and category_col in points_gdf.columns:
        join_cols.append(category_col)
    if stats != "count" and numeric_col:
        if numeric_col not in points_gdf.columns:
            raise ValueError(f"numeric_col '{numeric_col}' not found in input data")
        join_cols.append(numeric_col)
    left = points_gdf[[c for c in ["geometry", *join_cols] if c is not None]]
    joined = gpd.sjoin(
        left, grid_gdf[[id_col, "geometry"]], how="inner", predicate="within"
    )

    grouped = aggregate_joined(
        joined, id_col, stats=stats, category_col=category_col, numeric_col=numeric_col
    )
    grouped = grouped.reset_index()

    out = grid_gdf.merge(grouped, on=id_col, how="inner")
    if "resolution" not in out.columns:
        out["resolution"] = resolution
    result_gdf = gpd.GeoDataFrame(out, geometry="geometry", crs=grid_gdf.crs or "EPSG:4326")
    return result_gdf

dggalbin(dggs_type, data, resolution, stats='count', category_col=None, numeric_col=None, output_format='gpd', split_antimeridian=False, **kwargs)

Bin point data into DGGAL grid cells and compute statistics from various input formats.

Source code in vgrid/binning/dggalbin.py
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
def dggalbin(
    dggs_type: str,
    data,
    resolution: int,
    stats: str = "count",
    category_col: str | None = None,
    numeric_col: str | None = None,
    output_format: str = "gpd",
    split_antimeridian: bool = False,
    **kwargs,
):
    """
    Bin point data into DGGAL grid cells and compute statistics from various input formats.
    """
    result_gdf = dggal_bin(
        dggs_type=dggs_type,
        data=data,
        resolution=resolution,
        stats=stats,
        category_col=category_col,
        numeric_col=numeric_col,
        split_antimeridian=split_antimeridian,
        **kwargs,
    )

    output_name = None
    if output_format in OUTPUT_FORMATS:
        if isinstance(data, str):
            base = os.path.splitext(os.path.basename(data))[0]
            output_name = f"{base}_{dggs_type}bin_{resolution}"
        else:
            output_name = f"{dggs_type}bin_{resolution}"
    return convert_to_output_format(result_gdf, output_format, output_name)

dggalbin_cli()

Command-line interface for DGGAL binning.

Source code in vgrid/binning/dggalbin.py
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
def dggalbin_cli():
    """Command-line interface for DGGAL binning."""
    parser = argparse.ArgumentParser(description="Binning point data to DGGAL DGGS")
    parser.add_argument(
        "-i",
        "--input",
        type=str,
        required=True,
        help="Input data: GeoJSON file path, URL, or other vector file formats",
    )
    parser.add_argument(
        "-t",
        "--dggs_type",
        type=str,
        required=True,
        choices=DGGAL_TYPES.keys(),
        help="DGGAL type",
    )
    parser.add_argument(
        "-r",
        "--resolution",
        type=int,
        required=True,
        help="Resolution (integer)",
    )
    parser.add_argument(
        "-stats",
        "--statistics",
        choices=STATS_OPTIONS,
        default="count",
        help="Statistic option",
    )
    parser.add_argument(
        "-category",
        "--category",
        dest="category_col",
        required=False,
        help="Optional category field for grouping",
    )
    parser.add_argument(
        "-numeric_col",
        "--numeric_col",
        dest="numeric_col",
        required=False,
        help="Numeric field to compute statistics (required if stats != 'count')",
    )
    parser.add_argument(
        "-f",
        "--output_format",
        required=False,
        default="gpd",
        choices=OUTPUT_FORMATS,
    )
    parser.add_argument(
        "-split",
        "--split_antimeridian",
        action="store_true",
        default=False,
        help="Apply antimeridian fixing to the resulting polygons",
    )

    args = parser.parse_args()

    try:
        result = dggalbin(
            dggs_type=args.dggs_type,
            data=args.input,
            resolution=args.resolution,
            stats=args.statistics,
            category_col=args.category_col,
            numeric_col=args.numeric_col,
            output_format=args.output_format,
            split_antimeridian=args.split_antimeridian,
        )
        if args.output_format in STRUCTURED_FORMATS:
            print(result)
    except Exception as e:
        print(f"Error: {str(e)}")
        return

DGGRID Grid Binning Module

Bins point data into DGGRID cells and computes various statistics for DGGRID types (e.g., ISEA7H, ISEA4T, FULLER4D, IGEO7).

Key Functions: - dggrid_bin(): Core binning function with spatial joins and aggregation - dggridbin(): Main user-facing function with multiple input/output formats - dggridbin_cli(): Command-line interface for binning functionality

dggrid_bin(dggrid_instance, dggs_type, data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', split_antimeridian=False, aggregate=False, **kwargs)

Bin point data into DGGRID cells and compute statistics.

Parameters

split_antimeridian : bool, optional When True, apply antimeridian fixing to the resulting polygons. aggregate : bool, optional When True (with split_antimeridian), dissolve split cell parts by global_id. Passed to dggridgen / generate_grid.

Source code in vgrid/binning/dggridbin.py
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
def dggrid_bin(
    dggrid_instance,
    dggs_type: str,
    data,
    resolution: int,
    stats: str = "count",
    category_col: str | None = None,
    numeric_col: str | None = None,
    lat_col: str = "lat",
    lon_col: str = "lon",
    split_antimeridian: bool = False,
    aggregate: bool = False,
    **kwargs,
):
    """
    Bin point data into DGGRID cells and compute statistics.

    Parameters
    ----------
    split_antimeridian : bool, optional
        When True, apply antimeridian fixing to the resulting polygons.
    aggregate : bool, optional
        When True (with split_antimeridian), dissolve split cell parts by
        global_id. Passed to dggridgen / generate_grid.
    """
    dggs_type = validate_dggrid_type(dggs_type)
    resolution = validate_dggrid_resolution(dggs_type, int(resolution))

    if stats != "count" and not numeric_col:
        raise ValueError(
            "A numeric_col is required for statistics other than 'count'"
        )

    points_gdf = process_input_data_bin(
        data, lat_col=lat_col, lon_col=lon_col, **kwargs
    )
    if not points_gdf.empty:
        points_gdf = points_gdf[
            points_gdf.geometry.geom_type.isin(["Point", "MultiPoint"])
        ].copy()
        if "MultiPoint" in set(points_gdf.geometry.geom_type.unique()):
            points_gdf = points_gdf.explode(index_parts=False, ignore_index=True)

    minx, miny, maxx, maxy = points_gdf.total_bounds
    bbox = (minx, miny, maxx, maxy)
    id_col = "global_id"
    grid_gdf = dggridgen(
        dggrid_instance=dggrid_instance,
        dggs_type=dggs_type,
        resolution=resolution,
        output_format="gpd",
        bbox=bbox,
        split_antimeridian=split_antimeridian,
        aggregate=aggregate,
    )
    if grid_gdf.crs is None:
        grid_gdf = grid_gdf.set_crs(points_gdf.crs)
    elif points_gdf.crs is not None and grid_gdf.crs != points_gdf.crs:
        grid_gdf = grid_gdf.to_crs(points_gdf.crs)

    join_cols = []
    if category_col and category_col in points_gdf.columns:
        join_cols.append(category_col)
    if stats != "count" and numeric_col:
        if numeric_col not in points_gdf.columns:
            raise ValueError(f"numeric_col '{numeric_col}' not found in input data")
        join_cols.append(numeric_col)
    left = points_gdf[[c for c in ["geometry", *join_cols] if c is not None]]
    joined = gpd.sjoin(
        left, grid_gdf[[id_col, "geometry"]], how="inner", predicate="within"
    )

    grouped = aggregate_joined(
        joined, id_col, stats=stats, category_col=category_col, numeric_col=numeric_col
    )
    grouped = grouped.reset_index()

    out = grid_gdf.merge(grouped, on=id_col, how="inner")
    out = out.rename(columns={id_col: f"dggrid_{dggs_type.lower()}"})
    if "resolution" not in out.columns:
        out["resolution"] = resolution
    result_gdf = gpd.GeoDataFrame(out, geometry="geometry", crs=grid_gdf.crs or "EPSG:4326")
    return result_gdf

dggridbin(dggrid_instance, dggs_type, data, resolution, stats='count', category_col=None, numeric_col=None, output_format='gpd', split_antimeridian=False, aggregate=False, **kwargs)

Bin point data into DGGRID cells and compute statistics from various input formats.

Source code in vgrid/binning/dggridbin.py
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
def dggridbin(
    dggrid_instance,
    dggs_type: str,
    data,
    resolution: int,
    stats: str = "count",
    category_col: str | None = None,
    numeric_col: str | None = None,
    output_format: str = "gpd",
    split_antimeridian: bool = False,
    aggregate: bool = False,
    **kwargs,
):
    """
    Bin point data into DGGRID cells and compute statistics from various input formats.
    """
    result_gdf = dggrid_bin(
        dggrid_instance=dggrid_instance,
        dggs_type=dggs_type,
        data=data,
        resolution=resolution,
        stats=stats,
        category_col=category_col,
        numeric_col=numeric_col,
        split_antimeridian=split_antimeridian,
        aggregate=aggregate,
        **kwargs,
    )

    output_name = None
    if output_format in OUTPUT_FORMATS:
        if isinstance(data, str):
            base = os.path.splitext(os.path.basename(data))[0]
            output_name = f"{base}_{dggs_type.lower()}bin_{resolution}"
        else:
            output_name = f"{dggs_type.lower()}bin_{resolution}"
    return convert_to_output_format(result_gdf, output_format, output_name)

dggridbin_cli()

Command-line interface for DGGRID binning.

Source code in vgrid/binning/dggridbin.py
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
def dggridbin_cli():
    """Command-line interface for DGGRID binning."""
    parser = argparse.ArgumentParser(description="Binning point data to DGGRID DGGS")
    parser.add_argument(
        "-i",
        "--input",
        type=str,
        required=True,
        help="Input data: GeoJSON file path, URL, or other vector file formats",
    )
    parser.add_argument(
        "-t",
        "--dggs_type",
        type=str,
        required=True,
        choices=DGGRID_TYPES.keys(),
        help="DGGRID type",
    )
    parser.add_argument(
        "-r",
        "--resolution",
        type=int,
        required=True,
        help="Resolution (integer)",
    )
    parser.add_argument(
        "-stats",
        "--statistics",
        choices=STATS_OPTIONS,
        default="count",
        help="Statistic option",
    )
    parser.add_argument(
        "-category",
        "--category",
        dest="category_col",
        required=False,
        help="Optional category field for grouping",
    )
    parser.add_argument(
        "-numeric_col",
        "--numeric_col",
        dest="numeric_col",
        required=False,
        help="Numeric field to compute statistics (required if stats != 'count')",
    )
    parser.add_argument(
        "-f",
        "--output_format",
        required=False,
        default="gpd",
        choices=OUTPUT_FORMATS,
    )
    parser.add_argument(
        "-split",
        "--split_antimeridian",
        action="store_true",
        default=False,
        help="Apply antimeridian fixing to the resulting polygons",
    )
    parser.add_argument(
        "-aggregate",
        "--aggregate",
        action="store_true",
        help="Aggregate the resulting polygons (dissolve by global_id when split_antimeridian is set)",
    )

    args = parser.parse_args()

    try:
        result = dggridbin(
            dggrid_instance=create_dggrid_instance(),
            dggs_type=args.dggs_type,
            data=args.input,
            resolution=args.resolution,
            stats=args.statistics,
            category_col=args.category_col,
            numeric_col=args.numeric_col,
            output_format=args.output_format,
            split_antimeridian=args.split_antimeridian,
            aggregate=args.aggregate,
        )
        if args.output_format in STRUCTURED_FORMATS:
            print(result)
    except Exception as e:
        print(f"Error: {str(e)}")
        return

ISEA4T Grid Binning Module

Bins point data into ISEA4T triangular grid cells and computes various statistics using hierarchical triangular grid system.

Key Functions: - isea4t_bin(): Core binning function with spatial joins and aggregation - isea4tbin(): Main user-facing function with multiple input/output formats - isea4tbin_cli(): Command-line interface for binning functionality

isea4t_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', fix_antimeridian=None, **kwargs)

Bin point data into ISEA4T grid cells using grid generation + spatial join and aggregate with pandas groupby. Supports custom stats (range, variety, minority, majority). Only Point/MultiPoint geometries are considered.

Source code in vgrid/binning/isea4tbin.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
def isea4t_bin(
    data,
    resolution,
    stats="count",
    category_col=None,
    numeric_col=None,
    lat_col="lat",
    lon_col="lon",
    fix_antimeridian=None,
    **kwargs,
):
    """
    Bin point data into ISEA4T grid cells using grid generation + spatial join
    and aggregate with pandas groupby. Supports custom stats (range, variety,
    minority, majority). Only Point/MultiPoint geometries are considered.
    """
    resolution = validate_isea4t_resolution(resolution)
    points_gdf = process_input_data_bin(
        data, lat_col=lat_col, lon_col=lon_col, **kwargs
    )
    # Keep only points and multipoints; ignore others
    if not points_gdf.empty:
        points_gdf = points_gdf[
            points_gdf.geometry.geom_type.isin(["Point", "MultiPoint"])
        ].copy()
        if "MultiPoint" in set(points_gdf.geometry.geom_type.unique()):
            points_gdf = points_gdf.explode(index_parts=False, ignore_index=True)

    # Generate ISEA4T grid covering the points' bounding box
    minx, miny, maxx, maxy = points_gdf.total_bounds
    id_col = "isea4t"
    from vgrid.generator.isea4tgrid import isea4t_grid_within_bbox

    grid_gdf = isea4t_grid_within_bbox(
        resolution=resolution,
        bbox=(minx, miny, maxx, maxy),
        fix_antimeridian=fix_antimeridian,
    )

    # Spatial join points -> cells with only needed columns
    join_cols = []
    if category_col and category_col in points_gdf.columns:
        join_cols.append(category_col)
    if stats != "count" and numeric_col:
        if numeric_col not in points_gdf.columns:
            raise ValueError(f"numeric_col '{numeric_col}' not found in input data")
        join_cols.append(numeric_col)
    left = points_gdf[[c for c in ["geometry", *join_cols] if c is not None]]
    joined = gpd.sjoin(
        left, grid_gdf[[id_col, "geometry"]], how="inner", predicate="within"
    )

    # Aggregate
    grouped = aggregate_joined(
        joined, id_col, stats=stats, category_col=category_col, numeric_col=numeric_col
    )
    grouped = grouped.reset_index()

    # Join back to grid and return GeoDataFrame
    out = grid_gdf.merge(grouped, on=id_col, how="inner")
    if "resolution" not in out.columns:
        out["resolution"] = resolution
    result_gdf = gpd.GeoDataFrame(out, geometry="geometry", crs=grid_gdf.crs or "EPSG:4326")
    return result_gdf

EASE Grid Binning Module

Bins point data into EASE (Equal-Area Scalable Earth) grid cells and computes various statistics using an equal-area projection grid system.

Key Functions: - ease_bin(): Core binning function with spatial joins and aggregation - easebin(): Main user-facing function with multiple input/output formats - easebin_cli(): Command-line interface for binning functionality

ease_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', **kwargs)

Bin point data into EASE grid cells and aggregate with pandas groupby. Supports custom stats (range, variety, minority, majority). Only Point/MultiPoint geometries are considered.

Points are assigned to cells via latlon2ease (same as point2ease), then aggregated and joined to cell geometries from ease2geo.

Source code in vgrid/binning/easebin.py
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
def ease_bin(
    data,
    resolution,
    stats="count",
    category_col=None,
    numeric_col=None,
    lat_col="lat",
    lon_col="lon",
    **kwargs,
):
    """
    Bin point data into EASE grid cells and aggregate with pandas groupby.
    Supports custom stats (range, variety, minority, majority). Only
    Point/MultiPoint geometries are considered.

    Points are assigned to cells via latlon2ease (same as point2ease), then
    aggregated and joined to cell geometries from ease2geo.
    """
    resolution = validate_ease_resolution(resolution)
    points_gdf = process_input_data_bin(
        data, lat_col=lat_col, lon_col=lon_col, **kwargs
    )
    # Keep only points and multipoints; ignore others
    if not points_gdf.empty:
        points_gdf = points_gdf[
            points_gdf.geometry.geom_type.isin(["Point", "MultiPoint"])
        ].copy()
        if "MultiPoint" in set(points_gdf.geometry.geom_type.unique()):
            points_gdf = points_gdf.explode(index_parts=False, ignore_index=True)

    id_col = "ease"
    if points_gdf.empty:
        return gpd.GeoDataFrame(columns=[id_col, "geometry"], crs="EPSG:4326")

    points_gdf = points_gdf.copy()
    points_gdf[id_col] = [
        latlon2ease(geom.y, geom.x, resolution) for geom in points_gdf.geometry
    ]

    join_cols = []
    if category_col and category_col in points_gdf.columns:
        join_cols.append(category_col)
    if stats != "count" and numeric_col:
        if numeric_col not in points_gdf.columns:
            raise ValueError(f"numeric_col '{numeric_col}' not found in input data")
        join_cols.append(numeric_col)
    joined = points_gdf[[c for c in [id_col, *join_cols] if c is not None]]

    grouped = aggregate_joined(
        joined, id_col, stats=stats, category_col=category_col, numeric_col=numeric_col
    )
    grouped = grouped.reset_index()

    ease_rows = []
    for ease_id in grouped[id_col]:
        cell_polygon = ease2geo(ease_id)
        row = geodesic_dggs_to_geoseries(
            "ease", ease_id, resolution, cell_polygon, num_edges=4
        )
        ease_rows.append(row)
    grid_gdf = gpd.GeoDataFrame(ease_rows, geometry="geometry", crs="EPSG:4326")

    out = grid_gdf.merge(grouped, on=id_col, how="inner")
    if "resolution" not in out.columns:
        out["resolution"] = resolution
    result_gdf = gpd.GeoDataFrame(out, geometry="geometry", crs=grid_gdf.crs or "EPSG:4326")
    return result_gdf

QTM Grid Binning Module

Bins point data into QTM (Quaternary Triangular Mesh) grid cells and computes various statistics using hierarchical triangular grid system.

Key Functions: - qtm_bin(): Core binning function with spatial joins and aggregation - qtmbin(): Main user-facing function with multiple input/output formats - qtmbin_cli(): Command-line interface for binning functionality

OLC Grid Binning Module

Bins point data into OLC (Open Location Code) grid cells and computes various statistics using human-readable location codes for global coverage.

Key Functions: - olc_bin(): Core binning function with spatial joins and aggregation - olcbin(): Main user-facing function with multiple input/output formats - olcbin_cli(): Command-line interface for binning functionality

olc_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', **kwargs)

Bin point data into OLC grid cells using grid generation + spatial join and aggregate with pandas groupby. Supports custom stats (range, variety, minority, majority). Only Point/MultiPoint geometries are considered.

Source code in vgrid/binning/olcbin.py
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
def olc_bin(
    data,
    resolution,
    stats="count",
    category_col=None,
    numeric_col=None,
    lat_col="lat",
    lon_col="lon",
    **kwargs,
):
    """
    Bin point data into OLC grid cells using grid generation + spatial join and
    aggregate with pandas groupby. Supports custom stats (range, variety, minority,
    majority). Only Point/MultiPoint geometries are considered.
    """
    resolution = validate_olc_resolution(resolution)
    points_gdf = process_input_data_bin(
        data, lat_col=lat_col, lon_col=lon_col, **kwargs
    )
    # Keep only points and multipoints; ignore others
    if not points_gdf.empty:
        points_gdf = points_gdf[
            points_gdf.geometry.geom_type.isin(["Point", "MultiPoint"])
        ].copy()
        if "MultiPoint" in set(points_gdf.geometry.geom_type.unique()):
            points_gdf = points_gdf.explode(index_parts=False, ignore_index=True)

    # Generate OLC grid covering the points' bounding box
    minx, miny, maxx, maxy = points_gdf.total_bounds
    id_col = "olc"
    from vgrid.generator.olcgrid import olc_grid_within_bbox

    grid_gdf = olc_grid_within_bbox(
        resolution=resolution, bbox=(minx, miny, maxx, maxy)
    )

    # Spatial join points -> cells with only needed columns
    join_cols = []
    if category_col and category_col in points_gdf.columns:
        join_cols.append(category_col)
    if stats != "count" and numeric_col:
        if numeric_col not in points_gdf.columns:
            raise ValueError(f"numeric_col '{numeric_col}' not found in input data")
        join_cols.append(numeric_col)
    left = points_gdf[[c for c in ["geometry", *join_cols] if c is not None]]
    joined = gpd.sjoin(
        left, grid_gdf[[id_col, "geometry"]], how="inner", predicate="within"
    )

    # Aggregate
    grouped = aggregate_joined(
        joined, id_col, stats=stats, category_col=category_col, numeric_col=numeric_col
    )
    grouped = grouped.reset_index()

    # Join back to grid and return GeoDataFrame
    out = grid_gdf.merge(grouped, on=id_col, how="inner")
    if "resolution" not in out.columns:
        out["resolution"] = resolution
    result_gdf = gpd.GeoDataFrame(out, geometry="geometry", crs=grid_gdf.crs or "EPSG:4326")
    return result_gdf

Geohash Grid Binning Module

Bins point data into Geohash grid cells and computes various statistics using hierarchical geocoding system with alphanumeric identifiers.

Key Functions: - geohash_bin(): Core binning function with spatial joins and aggregation - geohashbin(): Main user-facing function with multiple input/output formats - geohashbin_cli(): Command-line interface for binning functionality

geohash_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', **kwargs)

Bin point data into Geohash grid cells and compute statistics using a single grid generation + spatial join, followed by pandas groupby aggregation.

Returns a GeoDataFrame with Geohash cell stats and geometry.

Source code in vgrid/binning/geohashbin.py
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
def geohash_bin(
    data,
    resolution,
    stats="count",
    category_col=None,
    numeric_col=None,
    lat_col="lat",
    lon_col="lon",
    **kwargs,
):
    """
    Bin point data into Geohash grid cells and compute statistics using a single
    grid generation + spatial join, followed by pandas groupby aggregation.

    Returns a GeoDataFrame with Geohash cell stats and geometry.
    """
    resolution = validate_geohash_resolution(int(resolution))

    if stats != "count" and not numeric_col:
        raise ValueError(
            "A numeric_col is required for statistics other than 'count'"
        )

    # 1) Normalize input to GeoDataFrame of points
    points_gdf = process_input_data_bin(
        data, lat_col=lat_col, lon_col=lon_col, **kwargs
    )
    # Keep only points and multipoints; ignore others
    if not points_gdf.empty:
        points_gdf = points_gdf[
            points_gdf.geometry.geom_type.isin(["Point", "MultiPoint"])
        ].copy()
        if "MultiPoint" in set(points_gdf.geometry.geom_type.unique()):
            points_gdf = points_gdf.explode(index_parts=False, ignore_index=True)

    # 2) Generate Geohash grid covering the points' bounding box
    minx, miny, maxx, maxy = points_gdf.total_bounds  # lon/lat order
    id_col = "geohash"
    grid_gdf = geohash_grid_within_bbox(
        resolution=resolution, bbox=(minx, miny, maxx, maxy)
    )

    # 3) Spatial join points -> cells with only needed columns
    join_cols = []
    if category_col and category_col in points_gdf.columns:
        join_cols.append(category_col)
    if stats != "count" and numeric_col:
        if numeric_col not in points_gdf.columns:
            raise ValueError(f"numeric_col '{numeric_col}' not found in input data")
        join_cols.append(numeric_col)
    left = points_gdf[[c for c in ["geometry", *join_cols] if c is not None]]
    joined = gpd.sjoin(
        left, grid_gdf[[id_col, "geometry"]], how="inner", predicate="within"
    )

    # 4) Aggregate
    grouped = aggregate_joined(
        joined, id_col, stats=stats, category_col=category_col, numeric_col=numeric_col
    )
    grouped = grouped.reset_index()

    # 5) Join back to grid and return GeoDataFrame
    out = grid_gdf.merge(grouped, on=id_col, how="inner")
    if "resolution" not in out.columns:
        out["resolution"] = resolution
    result_gdf = gpd.GeoDataFrame(out, geometry="geometry", crs=grid_gdf.crs or "EPSG:4326")
    return result_gdf

GEOREF Grid Binning Module

Bins point data into GEOREF grid cells and computes statistics, using :func:~vgrid.generator.georefgrid.georef_grid over the points' bounding box and the same aggregation pattern as :mod:geohashbin.

Key Functions: - georef_bin(): Core binning with spatial join and aggregation - georefbin(): User-facing function with multiple output formats - georefbin_cli(): Command-line interface

georef_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', **kwargs)

Bin point data into GEOREF cells and compute statistics (grid + spatial join + groupby).

Returns a GeoDataFrame with GEOREF cell stats and geometry (EPSG:4326).

Source code in vgrid/binning/georefbin.py
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
def georef_bin(
    data,
    resolution,
    stats="count",
    category_col=None,
    numeric_col=None,
    lat_col="lat",
    lon_col="lon",
    **kwargs,
):
    """
    Bin point data into GEOREF cells and compute statistics (grid + spatial join + groupby).

    Returns a GeoDataFrame with GEOREF cell stats and geometry (EPSG:4326).
    """
    resolution = validate_georef_resolution(int(resolution))

    if stats != "count" and not numeric_col:
        raise ValueError(
            "A numeric_col is required for statistics other than 'count'"
        )

    points_gdf = process_input_data_bin(
        data, lat_col=lat_col, lon_col=lon_col, **kwargs
    )
    if not points_gdf.empty:
        points_gdf = points_gdf[
            points_gdf.geometry.geom_type.isin(["Point", "MultiPoint"])
        ].copy()
        if "MultiPoint" in set(points_gdf.geometry.geom_type.unique()):
            points_gdf = points_gdf.explode(index_parts=False, ignore_index=True)

    minx, miny, maxx, maxy = points_gdf.total_bounds
    id_col = "georef"
    grid_gdf = georef_grid(resolution=resolution, bbox=(minx, miny, maxx, maxy))

    join_cols = []
    if category_col and category_col in points_gdf.columns:
        join_cols.append(category_col)
    if stats != "count" and numeric_col:
        if numeric_col not in points_gdf.columns:
            raise ValueError(f"numeric_col '{numeric_col}' not found in input data")
        join_cols.append(numeric_col)
    left = points_gdf[[c for c in ["geometry", *join_cols] if c is not None]]
    joined = gpd.sjoin(
        left, grid_gdf[[id_col, "geometry"]], how="inner", predicate="within"
    )

    grouped = aggregate_joined(
        joined, id_col, stats=stats, category_col=category_col, numeric_col=numeric_col
    )
    grouped = grouped.reset_index()

    out = grid_gdf.merge(grouped, on=id_col, how="inner")
    if "resolution" not in out.columns:
        out["resolution"] = resolution
    result_gdf = gpd.GeoDataFrame(out, geometry="geometry", crs=grid_gdf.crs or "EPSG:4326")
    return result_gdf

Tilecode Grid Binning Module

Bins point data into Tilecode grid cells and computes various statistics using hierarchical geospatial indexing system for efficient spatial queries.

Key Functions: - tilecode_bin(): Core binning function with spatial joins and aggregation - tilecodebin(): Main user-facing function with multiple input/output formats - tilecodebin_cli(): Command-line interface for binning functionality

Quadkey Grid Binning Module

Bins point data into Quadkey grid cells and computes various statistics using hierarchical geospatial indexing system used by mapping services.

Key Functions: - quadkey_bin(): Core binning function with spatial joins and aggregation - quadkeybin(): Main user-facing function with multiple input/output formats - quadkeybin_cli(): Command-line interface for binning functionality

Maidenhead Grid Binning Module

Bins point data into Maidenhead locator cells using :func:~vgrid.generator.maidenheadgrid.maidenhead_grid_within_bbox over the points' bounding box and the same aggregation pattern as :mod:geohashbin.

Key Functions: - maidenhead_bin(): Core binning with spatial join and aggregation - maidenheadbin(): User-facing function with multiple output formats - maidenheadbin_cli(): Command-line interface

maidenhead_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', **kwargs)

Bin point data into Maidenhead cells and compute statistics (grid + spatial join + groupby).

Returns a GeoDataFrame with Maidenhead cell stats and geometry (EPSG:4326).

Source code in vgrid/binning/maidenheadbin.py
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
def maidenhead_bin(
    data,
    resolution,
    stats="count",
    category_col=None,
    numeric_col=None,
    lat_col="lat",
    lon_col="lon",
    **kwargs,
):
    """
    Bin point data into Maidenhead cells and compute statistics (grid + spatial join + groupby).

    Returns a GeoDataFrame with Maidenhead cell stats and geometry (EPSG:4326).
    """
    resolution = validate_maidenhead_resolution(int(resolution))

    if stats != "count" and not numeric_col:
        raise ValueError(
            "A numeric_col is required for statistics other than 'count'"
        )

    points_gdf = process_input_data_bin(
        data, lat_col=lat_col, lon_col=lon_col, **kwargs
    )
    if not points_gdf.empty:
        points_gdf = points_gdf[
            points_gdf.geometry.geom_type.isin(["Point", "MultiPoint"])
        ].copy()
        if "MultiPoint" in set(points_gdf.geometry.geom_type.unique()):
            points_gdf = points_gdf.explode(index_parts=False, ignore_index=True)

    minx, miny, maxx, maxy = points_gdf.total_bounds
    id_col = "maidenhead"
    grid_gdf = maidenhead_grid_within_bbox(
        resolution, bbox=(minx, miny, maxx, maxy)
    )

    join_cols = []
    if category_col and category_col in points_gdf.columns:
        join_cols.append(category_col)
    if stats != "count" and numeric_col:
        if numeric_col not in points_gdf.columns:
            raise ValueError(f"numeric_col '{numeric_col}' not found in input data")
        join_cols.append(numeric_col)
    left = points_gdf[[c for c in ["geometry", *join_cols] if c is not None]]
    joined = gpd.sjoin(
        left, grid_gdf[[id_col, "geometry"]], how="inner", predicate="within"
    )

    grouped = aggregate_joined(
        joined, id_col, stats=stats, category_col=category_col, numeric_col=numeric_col
    )
    grouped = grouped.reset_index()

    out = grid_gdf.merge(grouped, on=id_col, how="inner")
    if "resolution" not in out.columns:
        out["resolution"] = resolution
    result_gdf = gpd.GeoDataFrame(out, geometry="geometry", crs=grid_gdf.crs or "EPSG:4326")
    return result_gdf

GARS Grid Binning Module

Bins point data into GARS cells using :func:~vgrid.generator.garsgrid.gars_grid over the points' bounding box and the same aggregation pattern as :mod:geohashbin.

Key Functions: - gars_bin(): Core binning with spatial join and aggregation - garsbin(): User-facing function with multiple output formats - garsbin_cli(): Command-line interface

gars_bin(data, resolution, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', **kwargs)

Bin point data into GARS cells and compute statistics (grid + spatial join + groupby).

Returns a GeoDataFrame with GARS cell stats and geometry (EPSG:4326).

Source code in vgrid/binning/garsbin.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
def gars_bin(
    data,
    resolution,
    stats="count",
    category_col=None,
    numeric_col=None,
    lat_col="lat",
    lon_col="lon",
    **kwargs,
):
    """
    Bin point data into GARS cells and compute statistics (grid + spatial join + groupby).

    Returns a GeoDataFrame with GARS cell stats and geometry (EPSG:4326).
    """
    resolution = validate_gars_resolution(int(resolution))

    if stats != "count" and not numeric_col:
        raise ValueError(
            "A numeric_col is required for statistics other than 'count'"
        )

    points_gdf = process_input_data_bin(
        data, lat_col=lat_col, lon_col=lon_col, **kwargs
    )
    if not points_gdf.empty:
        points_gdf = points_gdf[
            points_gdf.geometry.geom_type.isin(["Point", "MultiPoint"])
        ].copy()
        if "MultiPoint" in set(points_gdf.geometry.geom_type.unique()):
            points_gdf = points_gdf.explode(index_parts=False, ignore_index=True)

    minx, miny, maxx, maxy = points_gdf.total_bounds
    id_col = "gars"
    grid_gdf = gars_grid(resolution=resolution, bbox=(minx, miny, maxx, maxy))

    join_cols = []
    if category_col and category_col in points_gdf.columns:
        join_cols.append(category_col)
    if stats != "count" and numeric_col:
        if numeric_col not in points_gdf.columns:
            raise ValueError(f"numeric_col '{numeric_col}' not found in input data")
        join_cols.append(numeric_col)
    left = points_gdf[[c for c in ["geometry", *join_cols] if c is not None]]
    joined = gpd.sjoin(
        left, grid_gdf[[id_col, "geometry"]], how="inner", predicate="within"
    )

    grouped = aggregate_joined(
        joined, id_col, stats=stats, category_col=category_col, numeric_col=numeric_col
    )
    grouped = grouped.reset_index()

    out = grid_gdf.merge(grouped, on=id_col, how="inner")
    if "resolution" not in out.columns:
        out["resolution"] = resolution
    result_gdf = gpd.GeoDataFrame(out, geometry="geometry", crs=grid_gdf.crs or "EPSG:4326")
    return result_gdf

DIGIPIN Grid Binning Module

Bins point data into DIGIPIN grid cells and computes various statistics using hierarchical geospatial indexing system for efficient spatial queries.

Key Functions: - digipin_bin(): Core binning function with spatial joins and aggregation - digipinbin(): Main user-facing function with multiple input/output formats - digipinbin_cli(): Command-line interface for binning functionality

Polygon Binning Module

Bins point data into polygon geometries and computes various statistics using pre-defined polygon features like administrative boundaries.

Key Functions: - polygon_bin(): Core binning function with spatial joins and aggregation - polygonbin(): Main user-facing function with multiple input/output formats - polygonbin_cli(): Command-line interface for binning functionality

polygon_bin(polygon_data, point_data, stats='count', category_col=None, numeric_col=None, lat_col='lat', lon_col='lon', **kwargs)

Bin points into provided polygons using spatial join + pandas groupby aggregation. No grid generation is performed; the input polygons are used directly.

Source code in vgrid/binning/polygonbin.py
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
def polygon_bin(
    polygon_data,
    point_data,
    stats="count",
    category_col=None,
    numeric_col=None,
    lat_col="lat",
    lon_col="lon",
    **kwargs,
):
    """
    Bin points into provided polygons using spatial join + pandas groupby aggregation.
    No grid generation is performed; the input polygons are used directly.
    """
    # Read inputs
    polygon_gdf = process_input_data_bin(
        polygon_data, lat_col=lat_col, lon_col=lon_col, **kwargs
    )
    point_gdf = process_input_data_bin(
        point_data, lat_col=lat_col, lon_col=lon_col, **kwargs
    )

    # Ensure valid polygons only
    polygon_gdf = polygon_gdf[polygon_gdf.geometry.notnull()]
    polygon_gdf = polygon_gdf[polygon_gdf.geometry.is_valid]

    # Keep Points/MultiPoints for points and explode MultiPoints
    if not point_gdf.empty:
        point_gdf = point_gdf[
            point_gdf.geometry.geom_type.isin(["Point", "MultiPoint"])
        ].copy()
        if "MultiPoint" in set(point_gdf.geometry.geom_type.unique()):
            point_gdf = point_gdf.explode(index_parts=False, ignore_index=True)

    # Create a stable polygon id for join/merge
    polygon_gdf = polygon_gdf.reset_index(drop=True).copy()
    id_col = "poly_id"
    polygon_gdf[id_col] = polygon_gdf.index

    # Select required columns from points for join and aggregation
    join_cols = []
    if category_col and category_col in point_gdf.columns:
        join_cols.append(category_col)
    if stats != "count" and numeric_col:
        if numeric_col not in point_gdf.columns:
            raise ValueError(f"numeric_col '{numeric_col}' not found in point data")
        join_cols.append(numeric_col)
    left = point_gdf[[c for c in ["geometry", *join_cols] if c is not None]]

    # Spatial join: assign each point to a polygon
    joined = gpd.sjoin(
        left, polygon_gdf[[id_col, "geometry"]], how="inner", predicate="within"
    )

    # Aggregate per polygon (and optional category)
    grouped = aggregate_joined(
        joined, id_col, stats=stats, category_col=category_col, numeric_col=numeric_col
    )
    grouped = grouped.reset_index()

    # Merge aggregates back to polygons; keep original polygon attributes
    out = polygon_gdf.merge(grouped, on=id_col, how="left")
    out = out.drop(columns=[id_col])
    result_gdf = gpd.GeoDataFrame(out, geometry="geometry", crs="EPSG:4326")
    return result_gdf