Skip to content

Commit

Permalink
warn and return bytes undecoded in case of UnicodeDecodeError in h5ne…
Browse files Browse the repository at this point in the history
…tcdf-backend
  • Loading branch information
kmuehlbauer committed Mar 25, 2024
1 parent 6af547c commit 1e020d3
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 3 deletions.
12 changes: 9 additions & 3 deletions xarray/backends/h5netcdf_.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from xarray.core import indexing
from xarray.core.utils import (
FrozenDict,
emit_user_level_warning,
is_remote_uri,
read_magic_number_from_file,
try_read_magic_number_from_file_or_path,
Expand Down Expand Up @@ -60,9 +61,14 @@ def _getitem(self, key):

def maybe_decode_bytes(txt):
if isinstance(txt, bytes):
return txt.decode("utf-8")
else:
return txt
try:
return txt.decode("utf-8")
except UnicodeDecodeError:
emit_user_level_warning(
"'utf-8' codec can't decode bytes, " "returning bytes undecoded.",
UnicodeWarning,
)
return txt


def _read_attributes(h5netcdf_var):
Expand Down
9 changes: 9 additions & 0 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -3560,6 +3560,15 @@ def test_dump_encodings_h5py(self) -> None:
assert actual.x.encoding["compression"] == "lzf"
assert actual.x.encoding["compression_opts"] is None

def test_decode_utf8_warning(self) -> None:
title = b"\xc3"
with create_tmp_file() as tmp_file:
with nc4.Dataset(tmp_file, "w") as f:
f.title = title
with pytest.warns(UnicodeWarning, match="returning bytes undecoded"):
ds = xr.load_dataset(tmp_file, engine="h5netcdf")
assert ds.title == title


@requires_h5netcdf
@requires_netCDF4
Expand Down

0 comments on commit 1e020d3

Please sign in to comment.