Skip to content

Commit

Permalink
finish debugging ATL08 read-in issue
Browse files Browse the repository at this point in the history
  • Loading branch information
JessicaS11 committed Feb 25, 2022
1 parent 6d9cec6 commit 763305b
Show file tree
Hide file tree
Showing 2 changed files with 3 additions and 80 deletions.
2 changes: 1 addition & 1 deletion doc/source/example_notebooks/IS2_data_read-in.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,7 @@
"\n",
"***ATTENTION: icepyx loads your data by creating an Xarray DataSet for each input granule and then merging them. In some cases, the automatic merge fails and needs to be handled manually. In these cases, icepyx will return a warning with the error message from the failed Xarray merge and a list of per-granule DataSets***\n",
"\n",
"This can happen if you unintentionally provide the same granule multiple times with different filenames."
"This can happen if you unintentionally provide the same granule multiple times with different filenames or in segmented products where the rgt+cycle automatically generated `gran_idx` values match. In this latter case, you can simply provide unique `gran_idx` values for each DataSet in `ds` and run `import xarray as xr` and `ds_merged = xr.merge(ds)` to create one merged DataSet."
]
},
{
Expand Down
81 changes: 2 additions & 79 deletions icepyx/core/read.py
Original file line number Diff line number Diff line change
Expand Up @@ -385,56 +385,25 @@ def _add_vars_to_ds(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict):
spot = is2ref.gt2spot(gt_str, is2ds.sc_orient.values[0])
# add a test for the new function (called here)!

# print(wanted_dict)

grp_spec_vars = [
k
for k, v in wanted_dict.items()
if any(f"{grp_path}/{k}" in x for x in v)
]

# print(ds)

# DevNOTE: the issue seems to be that the incoming ds has mismatching delta time lengths, and they're not brought in as coordinates for the canopy/canopy_hy
ds = (
ds.reset_coords(drop=False)
.expand_dims(dim=["spot", "gran_idx"])
.assign_coords(spot=("spot", [spot]))
.assign(gt=(("gran_idx", "spot"), [[gt_str]]))
)
# print(ds[grp_spec_vars])
grp_spec_vars.append("gt")

# # Use this to handle issues specific to group paths that are more nested
# tiers = len(wanted_groups_tiered)
# if tiers > 3 and grp_path.count("/") == tiers - 2:
# # Handle attribute conflicts that arose from data descriptions during merging
# for var in grp_spec_vars:
# ds[var].attrs = ds.attrs
# for k in ds[var].attrs.keys():
# ds.attrs.pop(k)
# # warnings.warn(
# # "Due to the number of layers of variable group paths, some attributes have been dropped from your DataSet during merging",
# # UserWarning,
# # )

# # assign delta-time coordinates for the deeper layer variable
# up_grp_path = grp_path.rsplit("/")[0]

# print(is2ds.sel(spot=spot).delta_time)

# # ds.assign_coords(delta_time=is2ds.sel(spot=spot).delta_time)
# print(is2ds)

# ds=ds.sel(spot=spot).assign_coords({'delta_time':is2ds.sel(spot=spot).delta_time.data})
# # print(ds)
grp_spec_vars.append("gt")

is2ds = is2ds.merge(
ds[grp_spec_vars], join="outer", combine_attrs="no_conflicts"
)

# print(is2ds)

# re-cast some dtypes to make array smaller
is2ds["gt"] = is2ds.gt.astype(str)
is2ds["spot"] = is2ds.spot.astype(np.uint8)
Expand Down Expand Up @@ -467,30 +436,10 @@ def _combine_nested_vars(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict)
Xarray Dataset with variables from the ds variable group added.
"""

# wanted_vars = list(wanted_dict.keys())

# print(grp_path)
# print(wanted_groups_tiered)
# print(wanted_dict)

# print(wanted_dict)

grp_spec_vars = [
k for k, v in wanted_dict.items() if any(f"{grp_path}/{k}" in x for x in v)
]

# print(ds)

# DevNOTE: the issue seems to be that the incoming ds has mismatching delta time lengths, and they're not brought in as coordinates for the canopy/canopy_hy
# ds = (
# ds.reset_coords(drop=False)
# .expand_dims(dim=["spot", "gran_idx"])
# .assign_coords(spot=("spot", [spot]))
# .assign(gt=(("gran_idx", "spot"), [[gt_str]]))
# )
# # print(ds[grp_spec_vars])
# grp_spec_vars.append("gt")

# # Use this to handle issues specific to group paths that are more nested
# tiers = len(wanted_groups_tiered)
# if tiers > 3 and grp_path.count("/") == tiers - 2:
Expand All @@ -504,27 +453,8 @@ def _combine_nested_vars(is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict)
# # UserWarning,
# # )

# # assign delta-time coordinates for the deeper layer variable
# up_grp_path = grp_path.rsplit("/")[0]

# print(is2ds.sel(spot=spot).delta_time)

# # ds.assign_coords(delta_time=is2ds.sel(spot=spot).delta_time)
# print(is2ds)

# ds=ds.sel(spot=spot).assign_coords({'delta_time':is2ds.sel(spot=spot).delta_time.data})
# # print(ds)

print(grp_spec_vars)

is2ds = is2ds.assign(ds[grp_spec_vars])

# print(is2ds)

# re-cast some dtypes to make array smaller
# is2ds["gt"] = is2ds.gt.astype(str)
# is2ds["spot"] = is2ds.spot.astype(np.uint8)

return is2ds

def load(self):
Expand Down Expand Up @@ -699,29 +629,22 @@ def _build_single_file_dataset(self, file, groups_list):
while wanted_groups_list:
grp_path = wanted_groups_list[0]
wanted_groups_list = wanted_groups_list[1:]
# Note this will fail with an index error on the last run
ds = self._read_single_grp(file, grp_path)
print(grp_path)
is2ds, ds = Read._add_vars_to_ds(
is2ds, ds, grp_path, wanted_groups_tiered, wanted_dict
)

# if there are any deeper nested variables, get those so they have actual coordinates and add them
if any(grp_path in grp_path2 for grp_path2 in wanted_groups_list):
print("deep nested paths")
for grp_path2 in wanted_groups_list:
if grp_path in grp_path2:
sub_ds = self._read_single_grp(file, grp_path2)
# print(ds)
# print(sub_ds)
ds = Read._combine_nested_vars(
ds, sub_ds, grp_path2, wanted_groups_tiered, wanted_dict
)
wanted_groups_list.remove(grp_path2)
is2ds = is2ds.merge(ds, join="outer", combine_attrs="no_conflicts")

print(is2ds)

# Notes (next steps): test on ATL06; reset kernal and try again; figure out gran_idx generation to be unique for ATL08files
# Notes (next steps): open an issue; maybe add a fn to generate unique gran ids

return is2ds

0 comments on commit 763305b

Please sign in to comment.