Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

492 escapes for polyfiles with multiple polylines #896

Merged
36 changes: 29 additions & 7 deletions dfm_tools/hydrolib_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,19 +227,31 @@ def DataFrame_to_PolyObject(poly_pd,name,content=None):
return polyobject


def geodataframe_to_PolyFile(poly_gdf):
def geodataframe_to_PolyFile(poly_gdf, name="L"):
"""
convert a geopandas geodataframe with x/y columns (and optional others like z/data/comment) to a hydrolib PolyFile
"""

# catch some invalid occurences of name
if not isinstance(name, str):
raise TypeError("name should be a string")
if name == "":
raise ValueError("name is not allowed to be an empty string")
if not name[0].isalpha():
raise ValueError("name should start with a letter")

# add name column if not present
if 'name' not in poly_gdf.columns:
# make a copy to avoid alternating the geodataframe
poly_gdf = poly_gdf.copy()
name_nums = poly_gdf.reset_index().index+1
poly_gdf['name'] = name + name_nums.astype(str)

polyfile_obj = hcdfm.PolyFile()
#TODO: now only name+geometry, still add other data columns
# TODO: now only name+geometry, still add other data columns
for irow, gdf_row in poly_gdf.iterrows():
poly_geom = gdf_row.geometry
if 'name' in poly_gdf.columns:
name = poly_gdf['name'].iloc[irow] #TODO: not allowed to use identical polyline names in 1 file, but this is not catched by hydrolib-core
else:
name = f'L{irow+1}' #TODO: when providing name='' it will result in an invalid plifile, but this is not catched by hydrolib-core
name_str = gdf_row['name']
if isinstance(poly_geom, LineString):
poly_geom_np = np.array(poly_geom.xy).T
else: # isinstance(poly_geom, shapely.Polygon):
Expand All @@ -251,11 +263,21 @@ def geodataframe_to_PolyFile(poly_gdf):
pointsobj_list = poly_geom_df.T.apply(dict).tolist()
for pnt in pointsobj_list:
pnt['data'] = []
polyobject = hcdfm.PolyObject(metadata={'name':name,'n_rows':poly_geom_np.shape[0],'n_columns':poly_geom_np.shape[1]}, points=pointsobj_list)
polyobject = hcdfm.PolyObject(metadata={'name':name_str,'n_rows':poly_geom_np.shape[0],'n_columns':poly_geom_np.shape[1]}, points=pointsobj_list)
#if content is not None: # TODO: add support for content
# polyobject.description = {'content':content}
polyfile_obj.objects.append(polyobject)

# TODO: not allowed to have empty or duplicated polyline names in a polyfile, this is not
# catched by hydrolib-core: https://github.com/Deltares/HYDROLIB-core/issues/483
# therefore, we check it here
names = [x.metadata.name for x in polyfile_obj.objects]
if len(set(names)) != len(names):
raise ValueError(f'duplicate polyline names found in polyfile: {names}')
first_alpha = [x[0].isalpha() for x in names]
if not all(first_alpha):
raise ValueError(f'names in polyfile do not all start with a letter: {names}')

return polyfile_obj


Expand Down
5 changes: 2 additions & 3 deletions docs/notebooks/modelbuilder_example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -146,10 +146,9 @@
"\n",
"# generate plifile from grid extent and coastlines\n",
"bnd_gdf = dfmt.generate_bndpli_cutland(mk=mk_object, res='h', buffer=0.01)\n",
"bnd_gdf['name'] = f'{model_name}_bnd'\n",
"bnd_gdf_interp = dfmt.interpolate_bndpli(bnd_gdf, res=0.03)\n",
"pli_polyfile = dfmt.geodataframe_to_PolyFile(bnd_gdf_interp, name=f'{model_name}_bnd')\n",
"poly_file = os.path.join(dir_output, f'{model_name}.pli')\n",
"pli_polyfile = dfmt.geodataframe_to_PolyFile(bnd_gdf_interp)\n",
"pli_polyfile.save(poly_file)\n",
"\n",
"# plot basegrid and polyline\n",
Expand Down Expand Up @@ -1173,7 +1172,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
"version": "3.11.9"
}
},
"nbformat": 4,
Expand Down
1 change: 1 addition & 0 deletions docs/whats-new.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
- added workaround for grids that are not orthogonal after cutting the land with `dfmt.meshkernel_get_illegalcells()` in [#866](https://github.com/Deltares/dfm_tools/pull/866)
- updated CMEMS bcg multiyear dataset name in [#880](https://github.com/Deltares/dfm_tools/pull/880)
- added CMEMS reananalysis-interim (myint) datasets to `dfmt.download_CMEMS()` in [#883](https://github.com/Deltares/dfm_tools/pull/883)
- avoid duplicate and empty polyline names in `dfmt.geodataframe_to_PolyFile()` in [#896](https://github.com/Deltares/dfm_tools/pull/896)

### Fix
- cleanups for datasets retrieved with `dfmt.ssh_retrieve_data()` in [#867](https://github.com/Deltares/dfm_tools/pull/867)
Expand Down
3 changes: 1 addition & 2 deletions tests/examples/preprocess_modelbuilder.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,9 @@

# generate plifile from grid extent and coastlines
bnd_gdf = dfmt.generate_bndpli_cutland(mk=mk_object, res='h', buffer=0.01)
bnd_gdf['name'] = f'{model_name}_bnd'
bnd_gdf_interp = dfmt.interpolate_bndpli(bnd_gdf,res=0.06)
pli_polyfile = dfmt.geodataframe_to_PolyFile(bnd_gdf_interp, name=f'{model_name}_bnd')
poly_file = os.path.join(dir_output, f'{model_name}.pli')
pli_polyfile = dfmt.geodataframe_to_PolyFile(bnd_gdf_interp)
pli_polyfile.save(poly_file)

#refine
Expand Down
89 changes: 89 additions & 0 deletions tests/test_hydrolib_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,3 +46,92 @@ def test_geodataframe_with_LineString_to_PolyFile(tmp_path):

polyfile = dfmt.geodataframe_to_PolyFile(gdf_polyfile)
assert isinstance(polyfile, hcdfm.PolyFile)


@pytest.fixture(scope='session')
def bnd_gdf():
dxy = 0.02
crs = 4326
lon_min, lon_max, lat_min, lat_max = -68.31, -68.27, 12.10, 12.21
mk_object = dfmt.make_basegrid(lon_min, lon_max, lat_min, lat_max, dx=dxy, dy=dxy, crs=crs)
bnd_gdf = dfmt.generate_bndpli_cutland(mk=mk_object, res='h', buffer=0.01)
return bnd_gdf


@pytest.mark.unittest
def test_geodataframe_to_PolyFile_name_default(bnd_gdf):
polyfile_obj = dfmt.geodataframe_to_PolyFile(bnd_gdf)
names = [x.metadata.name for x in polyfile_obj.objects]
assert names == ['L1', 'L2']


@pytest.mark.unittest
def test_geodataframe_to_PolyFile_name_some(bnd_gdf):
polyfile_obj = dfmt.geodataframe_to_PolyFile(bnd_gdf, name="test_model")
names = [x.metadata.name for x in polyfile_obj.objects]
assert names == ['test_model1', 'test_model2']


@pytest.mark.unittest
def test_geodataframe_to_PolyFile_name_invalidtype(bnd_gdf):
with pytest.raises(TypeError) as e:
dfmt.geodataframe_to_PolyFile(bnd_gdf, name=None)
assert 'name should be a string' in str(e.value)


@pytest.mark.unittest
def test_geodataframe_to_PolyFile_name_incorrect(bnd_gdf):
with pytest.raises(ValueError) as e:
dfmt.geodataframe_to_PolyFile(bnd_gdf, name='1')
assert 'name should start with a letter' in str(e.value)

with pytest.raises(ValueError) as e:
dfmt.geodataframe_to_PolyFile(bnd_gdf, name='-')
assert 'name should start with a letter' in str(e.value)

with pytest.raises(ValueError) as e:
dfmt.geodataframe_to_PolyFile(bnd_gdf, name='')
assert 'name is not allowed to be an empty string' in str(e.value)


@pytest.mark.unittest
def test_geodataframe_to_PolyFile_namecolumn_some(bnd_gdf):
bnd_gdf['name'] = ['test_model1','test_model2']
polyfile_obj = dfmt.geodataframe_to_PolyFile(bnd_gdf)
names = [x.metadata.name for x in polyfile_obj.objects]
assert names == ['test_model1', 'test_model2']


@pytest.mark.unittest
def test_geodataframe_to_PolyFile_namecolumn_name_both(bnd_gdf):
# name argument is ignored if name column is provided
# not per se desired, but also not completely wrong
bnd_gdf['name'] = ['test_model1','test_model2']
polyfile_obj = dfmt.geodataframe_to_PolyFile(bnd_gdf, name='dummy')
names = [x.metadata.name for x in polyfile_obj.objects]
assert names == ['test_model1', 'test_model2']


@pytest.mark.unittest
def test_geodataframe_to_PolyFile_namecolumn_duplicated_names(bnd_gdf):
# deliberately giving all polylines the same name
bnd_gdf['name'] = 'duplicate_bnd'
with pytest.raises(ValueError) as e:
dfmt.geodataframe_to_PolyFile(bnd_gdf)
assert 'duplicate polyline names found in polyfile' in str(e.value)

# deliberately giving all polylines the same empty name
bnd_gdf['name'] = ''
with pytest.raises(ValueError) as e:
dfmt.geodataframe_to_PolyFile(bnd_gdf)
assert "duplicate polyline names found in polyfile" in str(e.value)


@pytest.mark.unittest
def test_geodataframe_to_PolyFile_namecolumn_numeric_start(bnd_gdf):
bnd_gdf['name'] = ['1','2']
with pytest.raises(ValueError) as e:
dfmt.geodataframe_to_PolyFile(bnd_gdf)
assert 'names in polyfile do not all start with a letter' in str(e.value)


Loading