Skip to content

Commit f176565

Browse files
BUG: correctly return empty result for out-of-bounds skip_features for all file formats (#550)
Co-authored-by: Pieter Roggemans <pieter.roggemans@gmail.com>
1 parent 9df17a4 commit f176565

File tree

3 files changed

+57
-9
lines changed

3 files changed

+57
-9
lines changed

CHANGES.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,9 @@
66

77
- Compatibility with Shapely >= 2.1 to avoid triggering a deprecation warning at
88
import (#542).
9-
9+
- Fix reading with a `skip_features` larger than the available number of
10+
features to ensure this consistently returns an empty result for all file
11+
formats (#550).
1012

1113
## 0.11.0 (2025-05-08)
1214

pyogrio/_io.pyx

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -771,6 +771,26 @@ cdef apply_geometry_filter(OGRLayerH ogr_layer, wkb):
771771
OGR_G_DestroyGeometry(ogr_geometry)
772772

773773

774+
cdef apply_skip_features(OGRLayerH ogr_layer, int skip_features):
775+
"""Applies skip_features to layer.
776+
777+
Parameters
778+
----------
779+
ogr_layer : pointer to open OGR layer
780+
wskip_features : int
781+
"""
782+
err = OGR_L_SetNextByIndex(ogr_layer, skip_features)
783+
# GDAL can raise an error (depending on the format) for out-of-bound index,
784+
# but `validate_feature_range()` should ensure we only pass a valid number
785+
if err != OGRERR_NONE:
786+
try:
787+
check_last_error()
788+
except CPLE_BaseError as exc:
789+
raise ValueError(str(exc))
790+
791+
raise ValueError(f"Applying {skip_features=} raised an error")
792+
793+
774794
cdef validate_feature_range(
775795
OGRLayerH ogr_layer, int skip_features=0, int max_features=0
776796
):
@@ -793,9 +813,9 @@ cdef validate_feature_range(
793813
return 0, 0
794814

795815
if skip_features >= feature_count:
796-
skip_features = feature_count
816+
return 0, 0
797817

798-
elif max_features == 0:
818+
if max_features == 0:
799819
num_features = feature_count - skip_features
800820

801821
elif max_features > feature_count:
@@ -973,7 +993,7 @@ cdef get_features(
973993
OGR_L_ResetReading(ogr_layer)
974994

975995
if skip_features > 0:
976-
OGR_L_SetNextByIndex(ogr_layer, skip_features)
996+
apply_skip_features(ogr_layer, skip_features)
977997

978998
if return_fids:
979999
fid_data = np.empty(shape=(num_features), dtype=np.int64)
@@ -1148,7 +1168,7 @@ cdef get_bounds(OGRLayerH ogr_layer, int skip_features, int num_features):
11481168
OGR_L_ResetReading(ogr_layer)
11491169

11501170
if skip_features > 0:
1151-
OGR_L_SetNextByIndex(ogr_layer, skip_features)
1171+
apply_skip_features(ogr_layer, skip_features)
11521172

11531173
fid_data = np.empty(shape=(num_features), dtype=np.int64)
11541174
fid_view = fid_data[:]
@@ -1668,6 +1688,13 @@ def ogr_open_arrow(
16681688
elif mask is not None:
16691689
apply_geometry_filter(ogr_layer, mask)
16701690

1691+
# Limit feature range to available range (cannot use logic of
1692+
# `validate_feature_range` because max_features is not supported)
1693+
if skip_features > 0:
1694+
feature_count = get_feature_count(ogr_layer, 1)
1695+
if skip_features >= feature_count:
1696+
skip_features = feature_count
1697+
16711698
# Limit to specified columns
16721699
if ignored_fields:
16731700
for field in ignored_fields:
@@ -1704,9 +1731,11 @@ def ogr_open_arrow(
17041731
if not OGR_L_GetArrowStream(ogr_layer, stream, options):
17051732
raise RuntimeError("Failed to open ArrowArrayStream from Layer")
17061733

1707-
if skip_features:
1734+
if skip_features > 0:
17081735
# only supported for GDAL >= 3.8.0; have to do this after getting
17091736
# the Arrow stream
1737+
# use `OGR_L_SetNextByIndex` directly and not `apply_skip_features`
1738+
# to ignore errors in case skip_features == feature_count
17101739
OGR_L_SetNextByIndex(ogr_layer, skip_features)
17111740

17121741
if use_pyarrow:

pyogrio/tests/test_geopandas_io.py

Lines changed: 20 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -759,12 +759,22 @@ def test_read_negative_skip_features(naturalearth_lowres, use_arrow):
759759
read_dataframe(naturalearth_lowres, skip_features=-1, use_arrow=use_arrow)
760760

761761

762+
@pytest.mark.parametrize("skip_features", [0, 10, 200])
762763
@pytest.mark.parametrize("max_features", [10, 100])
763-
def test_read_max_features(naturalearth_lowres_all_ext, use_arrow, max_features):
764+
def test_read_max_features(
765+
naturalearth_lowres_all_ext, use_arrow, max_features, skip_features
766+
):
764767
ext = naturalearth_lowres_all_ext.suffix
765-
expected = read_dataframe(naturalearth_lowres_all_ext).iloc[:max_features]
768+
expected = (
769+
read_dataframe(naturalearth_lowres_all_ext)
770+
.iloc[skip_features : skip_features + max_features]
771+
.reset_index(drop=True)
772+
)
766773
df = read_dataframe(
767-
naturalearth_lowres_all_ext, max_features=max_features, use_arrow=use_arrow
774+
naturalearth_lowres_all_ext,
775+
skip_features=skip_features,
776+
max_features=max_features,
777+
use_arrow=use_arrow,
768778
)
769779

770780
assert len(df) == len(expected)
@@ -775,6 +785,13 @@ def test_read_max_features(naturalearth_lowres_all_ext, use_arrow, max_features)
775785
# In .geojsonl the vertices are reordered, so normalize
776786
is_jsons = ext == ".geojsonl"
777787

788+
if len(expected) == 0 and not use_arrow:
789+
# for pandas >= 3, the column has string dtype but when reading it as
790+
# empty result, it gets inferred as object dtype
791+
expected["continent"] = expected["continent"].astype("object")
792+
expected["name"] = expected["name"].astype("object")
793+
expected["iso_a3"] = expected["iso_a3"].astype("object")
794+
778795
assert_geodataframe_equal(
779796
df,
780797
expected,

0 commit comments

Comments
 (0)