-
-
Notifications
You must be signed in to change notification settings - Fork 29
ENH: support reading from in-memory buffers #25
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 13 commits
0a10bc4
60b8d28
d81a70c
dadea0c
083a32a
9b3bdf3
fa9808b
6d1aeff
719399d
1502ab1
b8f7985
bc1b4f1
792210b
3e055f3
74f12a9
41f70d4
bd68a50
dadcc40
985a1ab
332680e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,13 @@ | ||
<<<<<<< HEAD | ||
from uuid import uuid4 | ||
======= | ||
import os | ||
import sys | ||
import warnings | ||
|
||
from pyogrio._err cimport exc_wrap_int, exc_wrap_ogrerr | ||
from pyogrio._err import CPLE_BaseError | ||
>>>>>>> upstream/main | ||
|
||
|
||
cdef get_string(const char *c_str, str encoding="UTF-8"): | ||
|
@@ -122,6 +126,30 @@ def ogr_list_drivers(): | |
return drivers | ||
|
||
|
||
def buffer_to_virtual_file(bytesbuf, ext=''): | ||
"""Maps a bytes buffer to a virtual file. | ||
`ext` is empty or begins with a period and contains at most one period. | ||
|
||
This (and remove_virtual_file) is originally copied from the Fiona project | ||
(https://github.com/Toblerity/Fiona/blob/c388e9adcf9d33e3bb04bf92b2ff210bbce452d9/fiona/ogrext.pyx#L1863-L1879) | ||
""" | ||
jorisvandenbossche marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
vsi_filename = '/vsimem/{}'.format(uuid4().hex + ext) | ||
jorisvandenbossche marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
vsi_handle = VSIFileFromMemBuffer(vsi_filename.encode("utf8"), <unsigned char *>bytesbuf, len(bytesbuf), 0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Elsewhere we usually handle Python => C strings in multiple steps, I thought in part because not doing so triggers a compilation error. And we've standarded on using So this would be
Though I'm not sure that is strictly necessary. (same for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It doesn't seem to be necessary, since it is compiling here? (but already changed utf8 to UTF-8) |
||
|
||
if vsi_handle == NULL: | ||
raise OSError('failed to map buffer to file') | ||
if VSIFCloseL(vsi_handle) != 0: | ||
raise OSError('failed to close mapped file handle') | ||
|
||
return vsi_filename | ||
|
||
|
||
def remove_virtual_file(vsi_filename): | ||
return VSIUnlink(vsi_filename.encode("utf8")) | ||
|
||
|
||
cdef void set_proj_search_path(str path): | ||
"""Set PROJ library data file search path for use in GDAL.""" | ||
cdef char **paths = NULL | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,6 +6,7 @@ | |
|
||
with GDALEnv(): | ||
from pyogrio._io import ogr_read, ogr_read_info, ogr_list_layers, ogr_write | ||
from pyogrio._ogr import buffer_to_virtual_file, remove_virtual_file | ||
|
||
|
||
DRIVERS = { | ||
|
@@ -17,7 +18,8 @@ | |
|
||
|
||
def read( | ||
path, | ||
path_or_buffer, | ||
/, | ||
brendan-ward marked this conversation as resolved.
Show resolved
Hide resolved
|
||
layer=None, | ||
encoding=None, | ||
columns=None, | ||
|
@@ -37,8 +39,8 @@ def read( | |
|
||
Parameters | ||
---------- | ||
path : pathlib.Path or str | ||
A dataset path or URI. | ||
path_or_buffer : pathlib.Path or str, or bytes buffer | ||
A dataset path or URI, or raw buffer. | ||
layer : int or str, optional (default: first layer) | ||
If an integer is provided, it corresponds to the index of the layer | ||
with the data source. If a string is provided, it must match the name | ||
|
@@ -98,26 +100,46 @@ def read( | |
"geometry": "<geometry type>" | ||
} | ||
""" | ||
path = vsi_path(str(path)) | ||
if hasattr(path_or_buffer, "read"): | ||
path_or_buffer = path_or_buffer.read() | ||
|
||
from_buffer = False | ||
if isinstance(path_or_buffer, bytes): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here I am checking for bytes vs strings to determine whether it's a path or in-memory bytes. I don't know if that is robust enough? Or do we want a separate There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think file-like objects that have a |
||
from_buffer = True | ||
ext = "" | ||
is_zipped = path_or_buffer[:4].startswith(b'PK\x03\x04') | ||
if is_zipped: | ||
ext = ".zip" | ||
brendan-ward marked this conversation as resolved.
Show resolved
Hide resolved
|
||
path = buffer_to_virtual_file(path_or_buffer, ext=ext) | ||
if is_zipped: | ||
path = "/vsizip/" + path | ||
brendan-ward marked this conversation as resolved.
Show resolved
Hide resolved
|
||
else: | ||
path = vsi_path(str(path_or_buffer)) | ||
|
||
if not "://" in path: | ||
if not "/vsi" in path.lower() and not os.path.exists(path): | ||
raise ValueError(f"'{path}' does not exist") | ||
|
||
return ogr_read( | ||
path, | ||
layer=layer, | ||
encoding=encoding, | ||
columns=columns, | ||
read_geometry=read_geometry, | ||
force_2d=force_2d, | ||
skip_features=skip_features, | ||
max_features=max_features or 0, | ||
where=where, | ||
bbox=bbox, | ||
fids=fids, | ||
return_fids=return_fids, | ||
) | ||
try: | ||
result = ogr_read( | ||
path, | ||
layer=layer, | ||
encoding=encoding, | ||
columns=columns, | ||
read_geometry=read_geometry, | ||
force_2d=force_2d, | ||
skip_features=skip_features, | ||
max_features=max_features or 0, | ||
where=where, | ||
bbox=bbox, | ||
fids=fids, | ||
return_fids=return_fids, | ||
) | ||
finally: | ||
if from_buffer: | ||
remove_virtual_file(path) | ||
|
||
return result | ||
|
||
|
||
def write( | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -299,3 +299,65 @@ def test_write_unsupported(tmpdir, naturalearth_lowres): | |
|
||
with pytest.raises(DataSourceError, match="does not support write functionality"): | ||
write(filename, geometry, field_data, driver="OpenFileGDB", **meta) | ||
|
||
|
||
def assert_equal_result(result1, result2): | ||
meta1, index1, geometry1, field_data1 = result1 | ||
meta2, index2, geometry2, field_data2 = result2 | ||
|
||
assert np.array_equal(meta1["fields"], meta2["fields"]) | ||
assert np.array_equal(index1, index2) | ||
# assert np.array_equal(geometry1, geometry2) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. minor nit: remove commented line There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Turned it into a small explanation why we are using pygeos here |
||
pygeos = pytest.importorkskip("pygeos") | ||
jorisvandenbossche marked this conversation as resolved.
Show resolved
Hide resolved
|
||
assert pygeos.equals_exact( | ||
pygeos.from_wkb(geometry1), pygeos.from_wkb(geometry2), tolerance=0.00001 | ||
).all() | ||
brendan-ward marked this conversation as resolved.
Show resolved
Hide resolved
|
||
assert all([np.array_equal(f1, f2) for f1, f2 in zip(field_data1, field_data2)]) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"driver,ext", | ||
[ | ||
("GeoJSON", "geojson"), | ||
("GPKG", "gpkg") | ||
] | ||
) | ||
def test_read_from_bytes(tmpdir, naturalearth_lowres, driver, ext): | ||
meta, index, geometry, field_data = read(naturalearth_lowres) | ||
filename = os.path.join(str(tmpdir), f"test.{ext}") | ||
write(filename, geometry, field_data, driver=driver, **meta) | ||
|
||
with open(filename, "rb") as f: | ||
buffer = f.read() | ||
|
||
result2 = read(buffer) | ||
assert_equal_result((meta, index, geometry, field_data), result2) | ||
|
||
|
||
def test_read_from_bytes_zipped(tmpdir, naturalearth_lowres_vsi): | ||
path, vsi_path = naturalearth_lowres_vsi | ||
meta, index, geometry, field_data = read(vsi_path) | ||
|
||
with open(path, "rb") as f: | ||
buffer = f.read() | ||
|
||
result2 = read(buffer) | ||
assert_equal_result((meta, index, geometry, field_data), result2) | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"driver,ext", | ||
[ | ||
("GeoJSON", "geojson"), | ||
("GPKG", "gpkg") | ||
] | ||
) | ||
def test_read_from_file_like(tmpdir, naturalearth_lowres, driver, ext): | ||
meta, index, geometry, field_data = read(naturalearth_lowres) | ||
filename = os.path.join(str(tmpdir), f"test.{ext}") | ||
write(filename, geometry, field_data, driver=driver, **meta) | ||
|
||
with open(filename, "rb") as f: | ||
result2 = read(f) | ||
|
||
assert_equal_result((meta, index, geometry, field_data), result2) |
Uh oh!
There was an error while loading. Please reload this page.