Deltares · veenstrajelmer · Jul 9, 2024 · Jul 9, 2024 · Jul 9, 2024 · Jul 9, 2024
diff --git a/dfm_tools/hydrolib_helpers.py b/dfm_tools/hydrolib_helpers.py
@@ -227,19 +227,31 @@ def DataFrame_to_PolyObject(poly_pd,name,content=None):
     return polyobject
 
 
-def geodataframe_to_PolyFile(poly_gdf):
+def geodataframe_to_PolyFile(poly_gdf, name="L"):
     """
     convert a geopandas geodataframe with x/y columns (and optional others like z/data/comment) to a hydrolib PolyFile
     """
 
+    # catch some invalid occurences of name
+    if not isinstance(name, str):
+        raise TypeError("name should be a string")
+    if name == "":
+        raise ValueError("name is not allowed to be an empty string")
+    if not name[0].isalpha():
+        raise ValueError("name should start with a letter")
+
+    # add name column if not present
+    if 'name' not in poly_gdf.columns:
+        # make a copy to avoid alternating the geodataframe
+        poly_gdf = poly_gdf.copy()
+        name_nums = poly_gdf.reset_index().index+1
+        poly_gdf['name'] = name + name_nums.astype(str)
+
     polyfile_obj = hcdfm.PolyFile()
-    #TODO: now only name+geometry, still add other data columns
+    # TODO: now only name+geometry, still add other data columns
     for irow, gdf_row in poly_gdf.iterrows():
         poly_geom = gdf_row.geometry
-        if 'name' in poly_gdf.columns:
-            name = poly_gdf['name'].iloc[irow] #TODO: not allowed to use identical polyline names in 1 file, but this is not catched by hydrolib-core
-        else:
-            name = f'L{irow+1}' #TODO: when providing name='' it will result in an invalid plifile, but this is not catched by hydrolib-core
+        name_str = gdf_row['name']
         if isinstance(poly_geom, LineString):
             poly_geom_np = np.array(poly_geom.xy).T
         else: # isinstance(poly_geom, shapely.Polygon):
@@ -251,11 +263,21 @@ def geodataframe_to_PolyFile(poly_gdf):
         pointsobj_list = poly_geom_df.T.apply(dict).tolist()
         for pnt in pointsobj_list:
             pnt['data'] = []
-        polyobject = hcdfm.PolyObject(metadata={'name':name,'n_rows':poly_geom_np.shape[0],'n_columns':poly_geom_np.shape[1]}, points=pointsobj_list)
+        polyobject = hcdfm.PolyObject(metadata={'name':name_str,'n_rows':poly_geom_np.shape[0],'n_columns':poly_geom_np.shape[1]}, points=pointsobj_list)
         #if content is not None: # TODO: add support for content
         #    polyobject.description = {'content':content}
         polyfile_obj.objects.append(polyobject)
 
+    # TODO: not allowed to have empty or duplicated polyline names in a polyfile, this is not 
+    # catched by hydrolib-core: https://github.com/Deltares/HYDROLIB-core/issues/483
+    # therefore, we check it here
+    names = [x.metadata.name for x in polyfile_obj.objects]
+    if len(set(names)) != len(names):
+        raise ValueError(f'duplicate polyline names found in polyfile: {names}')
+    first_alpha = [x[0].isalpha() for x in names]
+    if not all(first_alpha):
+        raise ValueError(f'names in polyfile do not all start with a letter: {names}')
+
     return polyfile_obj
 
 

diff --git a/docs/notebooks/modelbuilder_example.ipynb b/docs/notebooks/modelbuilder_example.ipynb
@@ -146,10 +146,9 @@
     "\n",
     "# generate plifile from grid extent and coastlines\n",
     "bnd_gdf = dfmt.generate_bndpli_cutland(mk=mk_object, res='h', buffer=0.01)\n",
-    "bnd_gdf['name'] = f'{model_name}_bnd'\n",
     "bnd_gdf_interp = dfmt.interpolate_bndpli(bnd_gdf, res=0.03)\n",
+    "pli_polyfile = dfmt.geodataframe_to_PolyFile(bnd_gdf_interp, name=f'{model_name}_bnd')\n",
     "poly_file = os.path.join(dir_output, f'{model_name}.pli')\n",
-    "pli_polyfile = dfmt.geodataframe_to_PolyFile(bnd_gdf_interp)\n",
     "pli_polyfile.save(poly_file)\n",
     "\n",
     "# plot basegrid and polyline\n",
@@ -1173,7 +1172,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.6"
+   "version": "3.11.9"
   }
  },
  "nbformat": 4,

diff --git a/docs/whats-new.md b/docs/whats-new.md
@@ -7,6 +7,7 @@
 - added workaround for grids that are not orthogonal after cutting the land with `dfmt.meshkernel_get_illegalcells()` in [#866](https://github.com/Deltares/dfm_tools/pull/866)
 - updated CMEMS bcg multiyear dataset name in [#880](https://github.com/Deltares/dfm_tools/pull/880)
 - added CMEMS reananalysis-interim (myint) datasets to `dfmt.download_CMEMS()` in [#883](https://github.com/Deltares/dfm_tools/pull/883)
+- avoid duplicate and empty polyline names in `dfmt.geodataframe_to_PolyFile()` in [#896](https://github.com/Deltares/dfm_tools/pull/896)
 
 ### Fix
 - cleanups for datasets retrieved with `dfmt.ssh_retrieve_data()` in [#867](https://github.com/Deltares/dfm_tools/pull/867)

diff --git a/tests/examples/preprocess_modelbuilder.py b/tests/examples/preprocess_modelbuilder.py
@@ -68,10 +68,9 @@
 
 # generate plifile from grid extent and coastlines
 bnd_gdf = dfmt.generate_bndpli_cutland(mk=mk_object, res='h', buffer=0.01)
-bnd_gdf['name'] = f'{model_name}_bnd'
 bnd_gdf_interp = dfmt.interpolate_bndpli(bnd_gdf,res=0.06)
+pli_polyfile = dfmt.geodataframe_to_PolyFile(bnd_gdf_interp, name=f'{model_name}_bnd')
 poly_file = os.path.join(dir_output, f'{model_name}.pli')
-pli_polyfile = dfmt.geodataframe_to_PolyFile(bnd_gdf_interp)
 pli_polyfile.save(poly_file)
 
 #refine

diff --git a/tests/test_hydrolib_helpers.py b/tests/test_hydrolib_helpers.py
@@ -46,3 +46,92 @@ def test_geodataframe_with_LineString_to_PolyFile(tmp_path):
 
     polyfile = dfmt.geodataframe_to_PolyFile(gdf_polyfile)
     assert isinstance(polyfile, hcdfm.PolyFile)
+
+
+@pytest.fixture(scope='session')
+def bnd_gdf():
+    dxy = 0.02
+    crs = 4326
+    lon_min, lon_max, lat_min, lat_max = -68.31, -68.27, 12.10, 12.21
+    mk_object = dfmt.make_basegrid(lon_min, lon_max, lat_min, lat_max, dx=dxy, dy=dxy, crs=crs)
+    bnd_gdf = dfmt.generate_bndpli_cutland(mk=mk_object, res='h', buffer=0.01)
+    return bnd_gdf
+
+
+@pytest.mark.unittest
+def test_geodataframe_to_PolyFile_name_default(bnd_gdf):
+    polyfile_obj = dfmt.geodataframe_to_PolyFile(bnd_gdf)
+    names = [x.metadata.name for x in polyfile_obj.objects]
+    assert names == ['L1', 'L2']
+
+
+@pytest.mark.unittest
+def test_geodataframe_to_PolyFile_name_some(bnd_gdf):
+    polyfile_obj = dfmt.geodataframe_to_PolyFile(bnd_gdf, name="test_model")
+    names = [x.metadata.name for x in polyfile_obj.objects]
+    assert names == ['test_model1', 'test_model2']
+
+
+@pytest.mark.unittest
+def test_geodataframe_to_PolyFile_name_invalidtype(bnd_gdf):
+    with pytest.raises(TypeError) as e:
+        dfmt.geodataframe_to_PolyFile(bnd_gdf, name=None)
+    assert 'name should be a string' in str(e.value)
+
+
+@pytest.mark.unittest
+def test_geodataframe_to_PolyFile_name_incorrect(bnd_gdf):
+    with pytest.raises(ValueError) as e:
+        dfmt.geodataframe_to_PolyFile(bnd_gdf, name='1')
+    assert 'name should start with a letter' in str(e.value)
+
+    with pytest.raises(ValueError) as e:
+        dfmt.geodataframe_to_PolyFile(bnd_gdf, name='-')
+    assert 'name should start with a letter' in str(e.value)
+
+    with pytest.raises(ValueError) as e:
+        dfmt.geodataframe_to_PolyFile(bnd_gdf, name='')
+    assert 'name is not allowed to be an empty string' in str(e.value)
+
+
+@pytest.mark.unittest
+def test_geodataframe_to_PolyFile_namecolumn_some(bnd_gdf):
+    bnd_gdf['name'] = ['test_model1','test_model2']
+    polyfile_obj = dfmt.geodataframe_to_PolyFile(bnd_gdf)
+    names = [x.metadata.name for x in polyfile_obj.objects]
+    assert names == ['test_model1', 'test_model2']
+
+
+@pytest.mark.unittest
+def test_geodataframe_to_PolyFile_namecolumn_name_both(bnd_gdf):
+    # name argument is ignored if name column is provided
+    # not per se desired, but also not completely wrong
+    bnd_gdf['name'] = ['test_model1','test_model2']
+    polyfile_obj = dfmt.geodataframe_to_PolyFile(bnd_gdf, name='dummy')
+    names = [x.metadata.name for x in polyfile_obj.objects]
+    assert names == ['test_model1', 'test_model2']
+
+
+@pytest.mark.unittest
+def test_geodataframe_to_PolyFile_namecolumn_duplicated_names(bnd_gdf):
+    # deliberately giving all polylines the same name
+    bnd_gdf['name'] = 'duplicate_bnd'
+    with pytest.raises(ValueError) as e:
+        dfmt.geodataframe_to_PolyFile(bnd_gdf)
+    assert 'duplicate polyline names found in polyfile' in str(e.value)
+
+    # deliberately giving all polylines the same empty name
+    bnd_gdf['name'] = ''
+    with pytest.raises(ValueError) as e:
+        dfmt.geodataframe_to_PolyFile(bnd_gdf)
+    assert "duplicate polyline names found in polyfile" in str(e.value)
+
+
+@pytest.mark.unittest
+def test_geodataframe_to_PolyFile_namecolumn_numeric_start(bnd_gdf):
+    bnd_gdf['name'] = ['1','2']
+    with pytest.raises(ValueError) as e:
+        dfmt.geodataframe_to_PolyFile(bnd_gdf)
+    assert 'names in polyfile do not all start with a letter' in str(e.value)
+
+