diff --git a/dask_geopandas/core.py b/dask_geopandas/core.py index 25e2c6d..b8b9ab7 100644 --- a/dask_geopandas/core.py +++ b/dask_geopandas/core.py @@ -389,7 +389,7 @@ def hilbert_distance(self, total_bounds=None, level=16): _hilbert_distance, total_bounds=total_bounds, level=level, - meta=pd.Series([], name="hilbert_distance", dtype="uint32"), + meta=pd.Series([], name="hilbert_distance", dtype=np.int64), ) return distances diff --git a/dask_geopandas/hilbert_distance.py b/dask_geopandas/hilbert_distance.py index b8d5a75..8bb597e 100644 --- a/dask_geopandas/hilbert_distance.py +++ b/dask_geopandas/hilbert_distance.py @@ -43,7 +43,9 @@ def _hilbert_distance(gdf, total_bounds=None, level=16): # Compute distance along hilbert curve distances = _encode(level, x, y) - return pd.Series(distances, index=gdf.index, name="hilbert_distance") + return pd.Series( + distances, index=gdf.index, name="hilbert_distance", dtype=np.int64 + ) def _continuous_to_discrete_coords(bounds, level, total_bounds): diff --git a/dask_geopandas/tests/test_spatial_partitioning.py b/dask_geopandas/tests/test_spatial_partitioning.py index 7c7d8a3..f33ec81 100644 --- a/dask_geopandas/tests/test_spatial_partitioning.py +++ b/dask_geopandas/tests/test_spatial_partitioning.py @@ -1,9 +1,12 @@ +import numpy as np import pytest import geopandas from geopandas.testing import assert_geodataframe_equal, assert_geoseries_equal +from shapely.geometry import Point import dask_geopandas +from dask_geopandas.hilbert_distance import _hilbert_distance def test_propagate_on_geometry_access(): @@ -60,3 +63,16 @@ def test_cx(): assert len(subset) == 0 expected = df.cx[-200:-190, 300:400] assert_geodataframe_equal(subset.compute(), expected) + + +def test_geopandas_handles_large_hilbert_distances(): + df = geopandas.GeoDataFrame( + {"geometry": [Point(-103152.516, -8942.156), Point(118914.500, 1010032.562)]} + ) + + # make sure we have values greater than 32bits + dist = _hilbert_distance(df) + assert ((dist > np.iinfo(np.int32).max) | (dist < np.iinfo(np.int32).min)).any() + + ddf = dask_geopandas.from_geopandas(df, npartitions=1) + ddf.spatial_shuffle()