-
-
Notifications
You must be signed in to change notification settings - Fork 1.1k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Experimental] Refactor Dataset to store variables in a manifest #5961
Closed
Closed
Changes from all commits
Commits
Show all changes
5 commits
Select commit
Hold shift + click to select a range
94c8335
refactor Dataset to store variables in a manifest
TomNicholas 9f7d8bc
method to construct from a manifest
TomNicholas b5b7ce1
check that variables and children don't collide immediately on manife…
TomNicholas f29ce75
try to fix doctests error
TomNicholas 183b92b
improve manifest creation error
TomNicholas File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -37,6 +37,7 @@ | |
|
||
from ..coding.cftimeindex import _parse_array_of_cftime_strings | ||
from ..plot.dataset_plot import _Dataset_PlotMethods | ||
from ..tree.manifest import DataManifest | ||
from . import ( | ||
alignment, | ||
dtypes, | ||
|
@@ -705,6 +706,7 @@ class Dataset(DataWithCoords, DatasetArithmetic, Mapping): | |
_close: Optional[Callable[[], None]] | ||
_indexes: Optional[Dict[Hashable, Index]] | ||
_variables: Dict[Hashable, Variable] | ||
_manifest: DataManifest | ||
|
||
__slots__ = ( | ||
"_attrs", | ||
|
@@ -715,6 +717,7 @@ class Dataset(DataWithCoords, DatasetArithmetic, Mapping): | |
"_close", | ||
"_indexes", | ||
"_variables", | ||
"_manifest", | ||
"__weakref__", | ||
) | ||
|
||
|
@@ -752,10 +755,13 @@ def __init__( | |
data_vars, coords, compat="broadcast_equals" | ||
) | ||
|
||
self._manifest = DataManifest(variables=variables) | ||
|
||
# The private attributes that effectively define the data model | ||
self._attrs = dict(attrs) if attrs is not None else None | ||
self._close = None | ||
self._encoding = None | ||
self._variables = variables | ||
self._variables = self._manifest.variables | ||
self._coord_names = coord_names | ||
self._dims = dims | ||
self._indexes = indexes | ||
|
@@ -781,7 +787,7 @@ def variables(self) -> Mapping[Hashable, Variable]: | |
constituting the Dataset, including both data variables and | ||
coordinates. | ||
""" | ||
return Frozen(self._variables) | ||
return Frozen(self._manifest.variables) | ||
|
||
@property | ||
def attrs(self) -> Dict[Hashable, Any]: | ||
|
@@ -1082,7 +1088,34 @@ def _construct_direct( | |
if dims is None: | ||
dims = calculate_dimensions(variables) | ||
obj = object.__new__(cls) | ||
obj._variables = variables | ||
obj._manifest = DataManifest(variables=variables) | ||
obj._variables = obj._manifest.variables | ||
obj._coord_names = coord_names | ||
obj._dims = dims | ||
obj._indexes = indexes | ||
obj._attrs = attrs | ||
obj._close = close | ||
obj._encoding = encoding | ||
return obj | ||
|
||
@classmethod | ||
def _construct_from_manifest( | ||
cls, | ||
manifest, | ||
coord_names, | ||
dims=None, | ||
attrs=None, | ||
indexes=None, | ||
encoding=None, | ||
close=None, | ||
): | ||
"""Creates a Dataset that is forced to be consistent with a DataTree node that shares its manifest.""" | ||
variables = manifest.variables | ||
if dims is None: | ||
dims = calculate_dimensions(variables) | ||
obj = object.__new__(cls) | ||
obj._manifest = manifest | ||
obj._variables = obj._manifest.variables | ||
obj._coord_names = coord_names | ||
obj._dims = dims | ||
obj._indexes = indexes | ||
|
@@ -1111,7 +1144,9 @@ def _replace( | |
""" | ||
if inplace: | ||
if variables is not None: | ||
self._variables = variables | ||
self._manifest.variables = variables | ||
# TODO if ds._variables properly pointed to ds._manifest.variables we wouldn't need this line | ||
self._variables = self._manifest.variables | ||
Comment on lines
+1147
to
+1149
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What I wanted was for |
||
if coord_names is not None: | ||
self._coord_names = coord_names | ||
if dims is not None: | ||
|
@@ -1629,7 +1664,7 @@ def _setitem_check(self, key, value): | |
|
||
def __delitem__(self, key: Hashable) -> None: | ||
"""Remove a variable from this dataset.""" | ||
del self._variables[key] | ||
del self._manifest[key] | ||
self._coord_names.discard(key) | ||
if key in self.xindexes: | ||
assert self._indexes is not None | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
class DataTree: | ||
... |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
from collections.abc import MutableMapping | ||
from typing import Dict, Hashable, Mapping, Union | ||
|
||
from xarray.core.variable import Variable | ||
from xarray.tree.datatree import DataTree | ||
|
||
|
||
class DataManifest(MutableMapping): | ||
""" | ||
Stores variables and/or child tree nodes. | ||
|
||
When stored inside a DataTree node it prevents name collisions by acting as a common | ||
record of stored items for both the DataTree instance and its wrapped Dataset instance. | ||
|
||
When stored inside a lone Dataset it acts merely like a dictionary of Variables. | ||
""" | ||
|
||
def __init__( | ||
self, | ||
variables: Dict[Hashable, Variable] = {}, | ||
children: Dict[Hashable, DataTree] = {}, | ||
): | ||
if variables and children: | ||
keys_in_both = set(variables.keys()) & set(children.keys()) | ||
if keys_in_both: | ||
raise KeyError( | ||
f"The keys {keys_in_both} exist in both the variables and child nodes" | ||
) | ||
|
||
self._variables = variables | ||
self._children = children | ||
|
||
@property | ||
def variables(self) -> Mapping[Hashable, Variable]: | ||
return self._variables | ||
|
||
@variables.setter | ||
def variables(self, variables): | ||
for key in variables: | ||
if key in self.children: | ||
raise KeyError( | ||
f"Cannot set variable under name {key} because a child node " | ||
"with that name already exists" | ||
) | ||
self._variables = variables | ||
|
||
@property | ||
def children(self) -> Mapping[Hashable, DataTree]: | ||
return self._children | ||
|
||
def __getitem__(self, key: Hashable) -> Union[Variable, DataTree]: | ||
if key in self._variables: | ||
return self._variables[key] | ||
elif key in self._children: | ||
return self._children[key] | ||
else: | ||
raise KeyError(f"{key} is not present") | ||
|
||
def __setitem__(self, key, value): | ||
raise NotImplementedError | ||
|
||
def __delitem__(self, key: Hashable): | ||
if key in self.variables: | ||
del self._variables[key] | ||
elif key in self.children: | ||
del self._children[key] | ||
else: | ||
raise KeyError(f"Cannot remove item because nothing is stored under {key}") | ||
|
||
def __iter__(self): | ||
raise NotImplementedError | ||
|
||
def __len__(self): | ||
raise NotImplementedError |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The idea is to use this when someone calls
dt.ds
- so that theDataTree
returns an actualDataset
, but one whose contents are linked to the wrapping tree node.