diff --git a/dfm_tools/hydrolib_helpers.py b/dfm_tools/hydrolib_helpers.py index 1aebbae03..a3c906c71 100644 --- a/dfm_tools/hydrolib_helpers.py +++ b/dfm_tools/hydrolib_helpers.py @@ -227,19 +227,31 @@ def DataFrame_to_PolyObject(poly_pd,name,content=None): return polyobject -def geodataframe_to_PolyFile(poly_gdf): +def geodataframe_to_PolyFile(poly_gdf, name="L"): """ convert a geopandas geodataframe with x/y columns (and optional others like z/data/comment) to a hydrolib PolyFile """ + # catch some invalid occurences of name + if not isinstance(name, str): + raise TypeError("name should be a string") + if name == "": + raise ValueError("name is not allowed to be an empty string") + if not name[0].isalpha(): + raise ValueError("name should start with a letter") + + # add name column if not present + if 'name' not in poly_gdf.columns: + # make a copy to avoid alternating the geodataframe + poly_gdf = poly_gdf.copy() + name_nums = poly_gdf.reset_index().index+1 + poly_gdf['name'] = name + name_nums.astype(str) + polyfile_obj = hcdfm.PolyFile() - #TODO: now only name+geometry, still add other data columns + # TODO: now only name+geometry, still add other data columns for irow, gdf_row in poly_gdf.iterrows(): poly_geom = gdf_row.geometry - if 'name' in poly_gdf.columns: - name = poly_gdf['name'].iloc[irow] #TODO: not allowed to use identical polyline names in 1 file, but this is not catched by hydrolib-core - else: - name = f'L{irow+1}' #TODO: when providing name='' it will result in an invalid plifile, but this is not catched by hydrolib-core + name_str = gdf_row['name'] if isinstance(poly_geom, LineString): poly_geom_np = np.array(poly_geom.xy).T else: # isinstance(poly_geom, shapely.Polygon): @@ -251,11 +263,21 @@ def geodataframe_to_PolyFile(poly_gdf): pointsobj_list = poly_geom_df.T.apply(dict).tolist() for pnt in pointsobj_list: pnt['data'] = [] - polyobject = hcdfm.PolyObject(metadata={'name':name,'n_rows':poly_geom_np.shape[0],'n_columns':poly_geom_np.shape[1]}, points=pointsobj_list) + polyobject = hcdfm.PolyObject(metadata={'name':name_str,'n_rows':poly_geom_np.shape[0],'n_columns':poly_geom_np.shape[1]}, points=pointsobj_list) #if content is not None: # TODO: add support for content # polyobject.description = {'content':content} polyfile_obj.objects.append(polyobject) + # TODO: not allowed to have empty or duplicated polyline names in a polyfile, this is not + # catched by hydrolib-core: https://github.com/Deltares/HYDROLIB-core/issues/483 + # therefore, we check it here + names = [x.metadata.name for x in polyfile_obj.objects] + if len(set(names)) != len(names): + raise ValueError(f'duplicate polyline names found in polyfile: {names}') + first_alpha = [x[0].isalpha() for x in names] + if not all(first_alpha): + raise ValueError(f'names in polyfile do not all start with a letter: {names}') + return polyfile_obj diff --git a/docs/notebooks/modelbuilder_example.ipynb b/docs/notebooks/modelbuilder_example.ipynb index 3c53cb077..f31c9e17b 100644 --- a/docs/notebooks/modelbuilder_example.ipynb +++ b/docs/notebooks/modelbuilder_example.ipynb @@ -146,10 +146,9 @@ "\n", "# generate plifile from grid extent and coastlines\n", "bnd_gdf = dfmt.generate_bndpli_cutland(mk=mk_object, res='h', buffer=0.01)\n", - "bnd_gdf['name'] = f'{model_name}_bnd'\n", "bnd_gdf_interp = dfmt.interpolate_bndpli(bnd_gdf, res=0.03)\n", + "pli_polyfile = dfmt.geodataframe_to_PolyFile(bnd_gdf_interp, name=f'{model_name}_bnd')\n", "poly_file = os.path.join(dir_output, f'{model_name}.pli')\n", - "pli_polyfile = dfmt.geodataframe_to_PolyFile(bnd_gdf_interp)\n", "pli_polyfile.save(poly_file)\n", "\n", "# plot basegrid and polyline\n", @@ -1173,7 +1172,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.6" + "version": "3.11.9" } }, "nbformat": 4, diff --git a/docs/whats-new.md b/docs/whats-new.md index f5a9b0306..9ada33a74 100644 --- a/docs/whats-new.md +++ b/docs/whats-new.md @@ -7,6 +7,7 @@ - added workaround for grids that are not orthogonal after cutting the land with `dfmt.meshkernel_get_illegalcells()` in [#866](https://github.com/Deltares/dfm_tools/pull/866) - updated CMEMS bcg multiyear dataset name in [#880](https://github.com/Deltares/dfm_tools/pull/880) - added CMEMS reananalysis-interim (myint) datasets to `dfmt.download_CMEMS()` in [#883](https://github.com/Deltares/dfm_tools/pull/883) +- avoid duplicate and empty polyline names in `dfmt.geodataframe_to_PolyFile()` in [#896](https://github.com/Deltares/dfm_tools/pull/896) ### Fix - cleanups for datasets retrieved with `dfmt.ssh_retrieve_data()` in [#867](https://github.com/Deltares/dfm_tools/pull/867) diff --git a/tests/examples/preprocess_modelbuilder.py b/tests/examples/preprocess_modelbuilder.py index c3e148a41..812b15d6b 100644 --- a/tests/examples/preprocess_modelbuilder.py +++ b/tests/examples/preprocess_modelbuilder.py @@ -68,10 +68,9 @@ # generate plifile from grid extent and coastlines bnd_gdf = dfmt.generate_bndpli_cutland(mk=mk_object, res='h', buffer=0.01) -bnd_gdf['name'] = f'{model_name}_bnd' bnd_gdf_interp = dfmt.interpolate_bndpli(bnd_gdf,res=0.06) +pli_polyfile = dfmt.geodataframe_to_PolyFile(bnd_gdf_interp, name=f'{model_name}_bnd') poly_file = os.path.join(dir_output, f'{model_name}.pli') -pli_polyfile = dfmt.geodataframe_to_PolyFile(bnd_gdf_interp) pli_polyfile.save(poly_file) #refine diff --git a/tests/test_hydrolib_helpers.py b/tests/test_hydrolib_helpers.py index 972b83adf..c6b2c958e 100644 --- a/tests/test_hydrolib_helpers.py +++ b/tests/test_hydrolib_helpers.py @@ -46,3 +46,92 @@ def test_geodataframe_with_LineString_to_PolyFile(tmp_path): polyfile = dfmt.geodataframe_to_PolyFile(gdf_polyfile) assert isinstance(polyfile, hcdfm.PolyFile) + + +@pytest.fixture(scope='session') +def bnd_gdf(): + dxy = 0.02 + crs = 4326 + lon_min, lon_max, lat_min, lat_max = -68.31, -68.27, 12.10, 12.21 + mk_object = dfmt.make_basegrid(lon_min, lon_max, lat_min, lat_max, dx=dxy, dy=dxy, crs=crs) + bnd_gdf = dfmt.generate_bndpli_cutland(mk=mk_object, res='h', buffer=0.01) + return bnd_gdf + + +@pytest.mark.unittest +def test_geodataframe_to_PolyFile_name_default(bnd_gdf): + polyfile_obj = dfmt.geodataframe_to_PolyFile(bnd_gdf) + names = [x.metadata.name for x in polyfile_obj.objects] + assert names == ['L1', 'L2'] + + +@pytest.mark.unittest +def test_geodataframe_to_PolyFile_name_some(bnd_gdf): + polyfile_obj = dfmt.geodataframe_to_PolyFile(bnd_gdf, name="test_model") + names = [x.metadata.name for x in polyfile_obj.objects] + assert names == ['test_model1', 'test_model2'] + + +@pytest.mark.unittest +def test_geodataframe_to_PolyFile_name_invalidtype(bnd_gdf): + with pytest.raises(TypeError) as e: + dfmt.geodataframe_to_PolyFile(bnd_gdf, name=None) + assert 'name should be a string' in str(e.value) + + +@pytest.mark.unittest +def test_geodataframe_to_PolyFile_name_incorrect(bnd_gdf): + with pytest.raises(ValueError) as e: + dfmt.geodataframe_to_PolyFile(bnd_gdf, name='1') + assert 'name should start with a letter' in str(e.value) + + with pytest.raises(ValueError) as e: + dfmt.geodataframe_to_PolyFile(bnd_gdf, name='-') + assert 'name should start with a letter' in str(e.value) + + with pytest.raises(ValueError) as e: + dfmt.geodataframe_to_PolyFile(bnd_gdf, name='') + assert 'name is not allowed to be an empty string' in str(e.value) + + +@pytest.mark.unittest +def test_geodataframe_to_PolyFile_namecolumn_some(bnd_gdf): + bnd_gdf['name'] = ['test_model1','test_model2'] + polyfile_obj = dfmt.geodataframe_to_PolyFile(bnd_gdf) + names = [x.metadata.name for x in polyfile_obj.objects] + assert names == ['test_model1', 'test_model2'] + + +@pytest.mark.unittest +def test_geodataframe_to_PolyFile_namecolumn_name_both(bnd_gdf): + # name argument is ignored if name column is provided + # not per se desired, but also not completely wrong + bnd_gdf['name'] = ['test_model1','test_model2'] + polyfile_obj = dfmt.geodataframe_to_PolyFile(bnd_gdf, name='dummy') + names = [x.metadata.name for x in polyfile_obj.objects] + assert names == ['test_model1', 'test_model2'] + + +@pytest.mark.unittest +def test_geodataframe_to_PolyFile_namecolumn_duplicated_names(bnd_gdf): + # deliberately giving all polylines the same name + bnd_gdf['name'] = 'duplicate_bnd' + with pytest.raises(ValueError) as e: + dfmt.geodataframe_to_PolyFile(bnd_gdf) + assert 'duplicate polyline names found in polyfile' in str(e.value) + + # deliberately giving all polylines the same empty name + bnd_gdf['name'] = '' + with pytest.raises(ValueError) as e: + dfmt.geodataframe_to_PolyFile(bnd_gdf) + assert "duplicate polyline names found in polyfile" in str(e.value) + + +@pytest.mark.unittest +def test_geodataframe_to_PolyFile_namecolumn_numeric_start(bnd_gdf): + bnd_gdf['name'] = ['1','2'] + with pytest.raises(ValueError) as e: + dfmt.geodataframe_to_PolyFile(bnd_gdf) + assert 'names in polyfile do not all start with a letter' in str(e.value) + +