Skip to content

Commit

Permalink
Relax varnames (#48)
Browse files Browse the repository at this point in the history
  • Loading branch information
pp-mo authored Mar 18, 2022
1 parent 66a9a95 commit 14467d3
Show file tree
Hide file tree
Showing 3 changed files with 49 additions and 16 deletions.
17 changes: 12 additions & 5 deletions lib/ugrid_checks/check.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,15 @@
"trajectory_id",
]

# Valid cf varname regex : copied from iris.common.metadata code.
_VALID_NAME_REGEX = re.compile(r"""^[a-zA-Z][a-zA-Z0-9]*[\w.+\-@]*$""")
# Valid varname regex.
# It is not possible to use the common 'variable name' patterns,
# as we *can* legally have variables which start with a digit, etc.
# In principle, in netcdf anything but forward-slash is allowed
# - c.f. https://github.com/cf-convention/cf-conventions/issues/307
# However, clearly, we can't really handle spaces in names.
# FOR NOW: allow a Python "normal word character", followed by anything except
# the forward-slash and space.
_VALID_NAME_REGEX = re.compile(r"^\w[^ /]*$")


class Checker:
Expand Down Expand Up @@ -152,7 +159,7 @@ def check_mesh_attr_is_varlist(
"Mesh",
meshvar.name,
f'has {attrname}="{value}", '
"which is not a valid list of netcdf variable names.",
"which is not a valid list of variable names.",
)
success = False
if success:
Expand All @@ -165,7 +172,7 @@ def check_mesh_attr_is_varlist(
"Mesh",
meshvar.name,
f'has {attrname}="{varname}", '
"which is not a valid netcdf variable name.",
"which is not a valid variable name.",
)
success = False
elif varname not in self._all_vars:
Expand Down Expand Up @@ -202,7 +209,7 @@ def var_ref_problem(self, attr_value: np.ndarray) -> str:
if succeed:
var_name = property_as_single_name(attr_value)
if not _VALID_NAME_REGEX.match(var_name):
result = "is not a valid netcdf variable name"
result = "is not a valid variable name"
succeed = False
if succeed:
bounds_var = self._all_vars.get(var_name)
Expand Down
22 changes: 11 additions & 11 deletions lib/ugrid_checks/tests/check/test_check_dataset__checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -432,7 +432,7 @@ def test_r105_r108_mesh_badcoordattr_empty(self, scan_2d_and_meshvar):
meshvar.attributes["node_coordinates"] = ""
msg = (
'"topology" has node_coordinates="".*'
"is not a valid list of netcdf variable"
"is not a valid list of variable names"
)
self.check(
scan,
Expand All @@ -449,7 +449,7 @@ def test_r105_r108_mesh_badcoordattr_invalidname(
meshvar.attributes["node_coordinates"] = "$123"
msg = (
r'"topology" has node_coordinates="\$123"'
".*not a valid netcdf variable name"
".*not a valid variable name"
)
self.check(
scan,
Expand Down Expand Up @@ -505,7 +505,7 @@ def test_r109_mesh_badconn_empty(self, scan_2d_and_meshvar):
"R105",
(
'"topology" has face_node_connectivity="".*'
"not a valid list of netcdf variable"
"not a valid list of variable names"
),
),
(
Expand Down Expand Up @@ -921,8 +921,8 @@ def test_r502_datavar_empty_mesh(self, scan_2d_and_datavar):

def test_r502_datavar_invalid_mesh_name(self, scan_2d_and_datavar):
scan, datavar = scan_2d_and_datavar
datavar.attributes["mesh"] = "$123"
msg = r'mesh="\$123", which is not a valid netcdf variable name\.'
datavar.attributes["mesh"] = "/bad/"
msg = r'mesh="/bad/", which is not a valid variable name\.'
self.check(scan, "R502", msg)

def test_r502_datavar_bad_mesh_dtype(self, scan_2d_and_datavar):
Expand Down Expand Up @@ -999,10 +999,10 @@ def test_r507_datavar_lis_with_location(self, scan_0d_with_lis_datavar):
def test_r508_datavar_lis_invalid(self, scan_0d_with_lis_datavar):
# The lis attribute should be a valid variable reference.
scan, data_var = scan_0d_with_lis_datavar
data_var.attributes["location_index_set"] = "$123"
data_var.attributes["location_index_set"] = "/bad/"
msg = (
r'location_index_set="\$123", '
r"which is not a valid netcdf variable name\."
r'location_index_set="/bad/", '
r"which is not a valid variable name\."
)
self.check(scan, "R508", msg)

Expand Down Expand Up @@ -1095,10 +1095,10 @@ def test_r203_coord_bounds_bad_attrdtype(self, scan_2d_and_coordvar):
def test_r203_coord_bounds_bad_name(self, scan_2d_and_coordvar):
scan, coord = scan_2d_and_coordvar
# Add an invalid bounds attribute to the node_lon coord.
coord.attributes["bounds"] = "$123"
coord.attributes["bounds"] = "/bad/"
msg = (
r'"node_lon" within topology:node_coordinates has bounds="\$123", '
"which is not a valid netcdf variable name"
r'"node_lon" within topology:node_coordinates has bounds="/bad/", '
r"which is not a valid variable name\."
)
self.check(scan, "R203", msg)

Expand Down
26 changes: 26 additions & 0 deletions lib/ugrid_checks/tests/check/test_name_regex.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from ...check import _VALID_NAME_REGEX as var_re


def test_valid_varnames():
# Just check a few things that should be accepted, and not
assert var_re.match("abc")
assert var_re.match("x") # single char

# various unusual chars, cannot appear at start
nonstart_chars = r"#$£+-*^%?!.:;,\()[]{}" # almost anything !!
for nonstart_char in nonstart_chars:
assert not var_re.match(nonstart_char)
# But these are all OK *after* the start position
assert var_re.match("x" + nonstart_chars)

# Examples of characters which *are* allowed at start
start_chars = "_10" # NB includes digits.
for start_char in start_chars:
assert var_re.match(start_char)

# not empty
assert not var_re.match("")
# no spaces
assert not var_re.match("space in name")
# no forward-slash
assert not var_re.match("forward/slash")

0 comments on commit 14467d3

Please sign in to comment.