-
Notifications
You must be signed in to change notification settings - Fork 12
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Feature/249 support geoparquet #254
base: main
Are you sure you want to change the base?
Changes from all commits
a6c0ca8
cd68b2a
807deed
f477398
52c0212
da71966
c59806e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -22,6 +22,16 @@ def _assert_is_pandas_df(x, file_type: str) -> None: | |
) | ||
|
||
|
||
def _assert_is_geopandas_df(x): | ||
# Assume we have already protected against uninstalled geopandas | ||
import geopandas as gpd | ||
|
||
if not isinstance(x, gpd.GeoDataFrame): | ||
raise NotImplementedError( | ||
"Currently only geopandas.GeoDataFrame can be saved to a GeoParquet." | ||
) | ||
|
||
|
||
def load_path(meta, path_to_version): | ||
# Check that only a single file name was given | ||
fnames = [meta.file] if isinstance(meta.file, str) else meta.file | ||
|
@@ -104,6 +114,17 @@ def load_data( | |
|
||
return pd.read_csv(f) | ||
|
||
elif meta.type == "geoparquet": | ||
try: | ||
import geopandas as gpd | ||
except ModuleNotFoundError: | ||
raise ModuleNotFoundError( | ||
'The "geopandas" package is required to read "geoparquet" type ' | ||
"files." | ||
) from None | ||
|
||
return gpd.read_parquet(f) | ||
|
||
elif meta.type == "joblib": | ||
import joblib | ||
|
||
|
@@ -144,6 +165,8 @@ def save_data(obj, fname, type=None, apply_suffix: bool = True) -> "str | Sequen | |
if apply_suffix: | ||
if type == "file": | ||
suffix = "".join(Path(obj).suffixes) | ||
elif type == "geoparquet": | ||
suffix = ".parquet" | ||
else: | ||
suffix = f".{type}" | ||
else: | ||
|
@@ -175,6 +198,11 @@ def save_data(obj, fname, type=None, apply_suffix: bool = True) -> "str | Sequen | |
|
||
obj.to_parquet(final_name) | ||
|
||
elif type == "geoparquet": | ||
_assert_is_geopandas_df(obj) | ||
|
||
obj.to_parquet(final_name) | ||
|
||
elif type == "joblib": | ||
import joblib | ||
|
||
|
@@ -203,10 +231,20 @@ def default_title(obj, name): | |
import pandas as pd | ||
|
||
if isinstance(obj, pd.DataFrame): | ||
try: | ||
import geopandas as gpd | ||
except ModuleNotFoundError: | ||
obj_name = "DataFrame" | ||
else: | ||
if isinstance(obj, gpd.GeoDataFrame): | ||
obj_name = "GeoDataFrame" | ||
else: | ||
obj_name = "DataFrame" | ||
|
||
# TODO(compat): title says CSV rather than data.frame | ||
# see https://github.com/machow/pins-python/issues/5 | ||
shape_str = " x ".join(map(str, obj.shape)) | ||
return f"{name}: a pinned {shape_str} DataFrame" | ||
return f"{name}: a pinned {shape_str} {obj_name}" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. also, as someone who is not a There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Good point - I think There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. GeoDataFrames contain extra metadata such as a coordinate system, an assigned geometry column, and in general "feel" quite different to a standard DataFrame. The naming convention reflects this: using |
||
else: | ||
obj_name = type(obj).__qualname__ | ||
return f"{name}: a pinned {obj_name} object" |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this might play really nicely with the changes you've made in #263 👀 What if we merge that PR first and then refactor
geopandas
dataframes to be part of_get_df_family
?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Totally agree.