diff --git a/docs_nnx/guides/filters_guide.ipynb b/docs_nnx/guides/filters_guide.ipynb index ed37ad8731..a4dfabea97 100644 --- a/docs_nnx/guides/filters_guide.ipynb +++ b/docs_nnx/guides/filters_guide.ipynb @@ -248,7 +248,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "id": "068208fc", "metadata": {}, "outputs": [ @@ -280,7 +280,7 @@ " predicates = [nnx.filterlib.to_predicate(f) for f in filters]\n", " flat_states: list[dict[KeyPath, Any]] = [{} for p in predicates]\n", "\n", - " for path, value in state.flat_state().items():\n", + " for path, value in state.flat_state():\n", " for i, predicate in enumerate(predicates):\n", " if predicate(path, value):\n", " flat_states[i][path] = value\n", diff --git a/docs_nnx/guides/filters_guide.md b/docs_nnx/guides/filters_guide.md index 97ff439ce2..dcd414d76a 100644 --- a/docs_nnx/guides/filters_guide.md +++ b/docs_nnx/guides/filters_guide.md @@ -145,7 +145,7 @@ def split(node, *filters): predicates = [nnx.filterlib.to_predicate(f) for f in filters] flat_states: list[dict[KeyPath, Any]] = [{} for p in predicates] - for path, value in state.flat_state().items(): + for path, value in state.flat_state(): for i, predicate in enumerate(predicates): if predicate(path, value): flat_states[i][path] = value diff --git a/examples/gemma/helpers.py b/examples/gemma/helpers.py index 1797f01271..b9c4195f4a 100644 --- a/examples/gemma/helpers.py +++ b/examples/gemma/helpers.py @@ -62,7 +62,7 @@ def map_key_fn(path: tuple[str, ...]) -> tuple[str | int, ...]: mdl: M = nnx.eval_shape(module_factory) graph_def, state = nnx.split(mdl) - state = state.flat_state() + state = dict(state.flat_state()) for path, val in flax.traverse_util.flatten_dict(variables).items(): mapped_path = map_key_fn(path) if mapped_path not in state: diff --git a/examples/lm1b_nnx/models_test.py b/examples/lm1b_nnx/models_test.py index dd1727c480..d2d0ce03d4 100644 --- a/examples/lm1b_nnx/models_test.py +++ b/examples/lm1b_nnx/models_test.py @@ -79,7 +79,7 @@ def transfer_params( params_linen: dict[str, Any], ): rules = dataclasses.asdict(config.axis_rules) - flat_params_nnx = params_nnx.flat_state() + flat_params_nnx = dict(params_nnx.flat_state()) flat_params_linen = nnx.traversals.flatten_mapping(params_linen, sep='/') def apply_rules(names: tuple[str, ...]): @@ -163,7 +163,7 @@ def transfer_cache( cache_nnx: nnx.State, cache_linen: dict[str, Any], ): - flat_cache_nnx = cache_nnx.flat_state() + flat_cache_nnx = dict(cache_nnx.flat_state()) flat_cache_linen = nnx.traversals.flatten_mapping(cache_linen, sep='/') def copy_var(nnx_name: str, linen_name: str): diff --git a/flax/nnx/__init__.py b/flax/nnx/__init__.py index 6a27b090f5..fcb15f0608 100644 --- a/flax/nnx/__init__.py +++ b/flax/nnx/__init__.py @@ -167,3 +167,4 @@ from .extract import to_tree as to_tree from .extract import from_tree as from_tree from .extract import NodeStates as NodeStates +from . import traversals as traversals \ No newline at end of file diff --git a/flax/nnx/graph.py b/flax/nnx/graph.py index 2339f5c168..be04b279c8 100644 --- a/flax/nnx/graph.py +++ b/flax/nnx/graph.py @@ -30,7 +30,7 @@ CallableProxy, DelayedAccessor, ) -from flax.nnx.statelib import FlatState, State +from flax.nnx.statelib import State from flax.nnx import variablelib from flax.nnx.variablelib import Variable, VariableState from flax.typing import Key, PathParts, is_key_like @@ -110,15 +110,16 @@ class GraphNodeImpl(NodeImplBase[Node, Leaf, AuxData]): pop_key: tp.Callable[[Node, Key], Leaf] create_empty: tp.Callable[[AuxData], Node] clear: tp.Callable[[Node], None] + init: tp.Callable[[Node, tp.Iterable[tuple[Key, Leaf]]], None] - def init(self, node: Node, items: tuple[tuple[Key, Leaf], ...]): - for key, value in items: - self.set_key(node, key, value) + # def init(self, node: Node, items: tp.Iterable[tuple[Key, Leaf]]): + # for key, value in items: + # self.set_key(node, key, value) @dataclasses.dataclass(frozen=True, slots=True) class PytreeNodeImpl(NodeImplBase[Node, Leaf, AuxData]): - unflatten: tp.Callable[[tuple[tuple[Key, Leaf], ...], AuxData], Node] + unflatten: tp.Callable[[tp.Sequence[tuple[Key, Leaf]], AuxData], Node] NodeImpl = tp.Union[ @@ -137,6 +138,7 @@ def register_graph_node_type( pop_key: tp.Callable[[Node, Key], Leaf], create_empty: tp.Callable[[AuxData], Node], clear: tp.Callable[[Node], None], + init: tp.Callable[[Node, tp.Iterable[tuple[Key, Leaf]]], None], ): if type in GRAPH_REGISTRY: raise ValueError(f'Node type {type} is already registered.') @@ -148,12 +150,13 @@ def register_graph_node_type( pop_key=pop_key, create_empty=create_empty, clear=clear, + init=init, ) def register_pytree_node_type( type: type, flatten: tp.Callable[[Node], tuple[tp.Sequence[tuple[Key, Leaf]], AuxData]], - unflatten: tp.Callable[[tuple[tuple[Key, Leaf], ...], AuxData], Node], + unflatten: tp.Callable[[tp.Sequence[tuple[Key, Leaf]], AuxData], Node], ): if type in PYTREE_REGISTRY: raise ValueError(f'Node type {type} is already registered.') @@ -202,8 +205,8 @@ def get_node_impl_for_type(x: type[Node]) -> NodeImpl[Node, tp.Any, tp.Any]: class HashableMapping(tp.Mapping[HA, HB], tp.Hashable): - def __init__(self, mapping: tp.Mapping[HA, HB] | tp.Iterable[tuple[HA, HB]]): - self._mapping = dict(mapping) + def __init__(self, mapping: tp.Mapping[HA, HB], no_copy: bool = False): + self._mapping = mapping if no_copy else dict(mapping) def __contains__(self, key: object) -> bool: return key in self._mapping @@ -401,7 +404,7 @@ def flatten( """ if ref_index is None: ref_index = RefMap() - flat_state: dict[PathParts, StateLeaf] = {} + flat_state: list[tuple[PathParts, StateLeaf]] = [] graphdef = _graph_flatten((), ref_index, flat_state, node) return graphdef, GraphState.from_flat_path(flat_state) @@ -409,7 +412,7 @@ def flatten( def _graph_flatten( path: PathParts, ref_index: RefMap[tp.Any, Index], - flat_state: dict[PathParts, StateLeaf], + flat_state: list[tuple[PathParts, StateLeaf]], node: Node, ) -> NodeDef[Node] | NodeRef: if not is_node(node): @@ -441,10 +444,10 @@ def _graph_flatten( LeafAttribute(key, NodeRef(type(value), ref_index[value])) ) else: - flat_state[(*path, key)] = value.to_state() + flat_state.append(((*path, key), value.to_state())) variable_index = ref_index[value] = len(ref_index) variabledef = VariableDef( - type(value), variable_index, HashableMapping(value.get_metadata()) + type(value), variable_index, HashableMapping(value._var_metadata) ) attributes.append(LeafAttribute(key, variabledef)) else: @@ -528,7 +531,7 @@ def _graph_unflatten( node_impl = get_node_impl_for_type(nodedef.type) def _get_children(): - children: dict[Key, NodeLeaf | Node] = {} + children: list[tuple[Key, NodeLeaf | Node]] = [] state_keys: set = set(state.keys()) # for every key in attributes there are 6 possible cases: @@ -539,28 +542,29 @@ def _get_children(): if key not in state: # if key is not present create an empty types if type(attribute) is StaticAttribute: - children[key] = attribute.value + children.append((key, attribute.value)) elif type(attribute) is SubGraphAttribute: # if the key is a subgraph we create an empty node subgraphdef = attribute.value assert not isinstance(subgraphdef, VariableDef) if isinstance(subgraphdef, NodeRef): # subgraph exists, take it from the cache - children[key] = index_ref[subgraphdef.index] + children.append((key, index_ref[subgraphdef.index])) else: # create a node from an empty state, reasoning: # * its a node with no state # * its a node with state but only through references of already # created nodes substate = {} - children[key] = _graph_unflatten( + subnode = _graph_unflatten( subgraphdef, substate, index_ref, index_ref_cache ) + children.append((key, subnode)) elif type(attribute) is LeafAttribute: variabledef = attribute.value if variabledef.index in index_ref: # variable exists, take it from the cache - children[key] = index_ref[variabledef.index] + children.append((key, index_ref[variabledef.index])) else: # key for a variable is missing, raise an error raise ValueError( @@ -587,11 +591,12 @@ def _get_children(): subgraphdef = attribute.value if isinstance(subgraphdef, NodeRef): - children[key] = index_ref[subgraphdef.index] + children.append((key, index_ref[subgraphdef.index])) else: - children[key] = _graph_unflatten( + subnode = _graph_unflatten( subgraphdef, value, index_ref, index_ref_cache ) + children.append((key, subnode)) elif type(attribute) is LeafAttribute: variabledef = attribute.value @@ -599,7 +604,7 @@ def _get_children(): if variabledef.index in index_ref: # add an existing variable assert isinstance(variabledef, NodeRef) - children[key] = index_ref[variabledef.index] + children.append((key, index_ref[variabledef.index])) else: # its a unseen variable, create a new one assert isinstance(variabledef, VariableDef) @@ -626,7 +631,7 @@ def _get_children(): variable = variabledef.type.from_metadata( value, variabledef.metadata ) - children[key] = variable + children.append((key, variable)) index_ref[variabledef.index] = variable else: raise RuntimeError(f'Unknown key: {key!r}, this is a bug.') @@ -651,13 +656,11 @@ def _get_children(): else: node = node_impl.create_empty(nodedef.metadata) index_ref[nodedef.index] = node - children = _get_children() - node_impl.init(node, tuple(children.items())) + node_impl.init(node, _get_children()) else: # if the node type does not support the creation of an empty object it means # that it cannot reference itself, so we can create its children first - children = _get_children() - node = node_impl.unflatten(tuple(children.items()), nodedef.metadata) + node = node_impl.unflatten(_get_children(), nodedef.metadata) return node @@ -669,7 +672,9 @@ def graph_pop( id_to_index: dict[int, Index] = {} path_parts: PathParts = () predicates = tuple(filterlib.to_predicate(filter) for filter in filters) - flat_states: tuple[FlatState[StateLeaf], ...] = tuple({} for _ in predicates) + flat_states: tuple[dict[PathParts, StateLeaf], ...] = tuple( + {} for _ in predicates + ) _graph_pop(node, id_to_index, path_parts, flat_states, predicates) return tuple( GraphState.from_flat_path(flat_state) for flat_state in flat_states @@ -680,7 +685,7 @@ def _graph_pop( node: tp.Any, id_to_index: dict[int, Index], path_parts: PathParts, - flat_states: tuple[FlatState[StateLeaf], ...], + flat_states: tuple[dict[PathParts, StateLeaf], ...], predicates: tuple[filterlib.Predicate, ...], ) -> None: if not is_node(node): @@ -816,7 +821,7 @@ def split( if ctx.index_ref is not None and isinstance(graphdef, NodeDef): index_to_index = compose_mapping(ctx.index_ref, self.ref_index) graphdef = dataclasses.replace( - graphdef, index_mapping=HashableMapping(index_to_index) + graphdef, index_mapping=HashableMapping(index_to_index, no_copy=True) ) return graphdef, *states @@ -1006,7 +1011,7 @@ def split( if self.index_ref is not None and isinstance(graphdef, NodeDef): index_to_index = compose_mapping(self.index_ref, ref_index) graphdef = dataclasses.replace( - graphdef, index_mapping=HashableMapping(index_to_index) + graphdef, index_mapping=HashableMapping(index_to_index, no_copy=True) ) self.flatten_end(ref_index) @@ -1570,7 +1575,9 @@ def pop( id_to_index: dict[int, Index] = {} path_parts: PathParts = () predicates = tuple(filterlib.to_predicate(filter) for filter in filters) - flat_states: tuple[FlatState[StateLeaf], ...] = tuple({} for _ in predicates) + flat_states: tuple[dict[PathParts, StateLeaf], ...] = tuple( + {} for _ in predicates + ) _graph_pop( node=node, id_to_index=id_to_index, @@ -1787,7 +1794,7 @@ def is_pytree_node(x: tp.Any) -> bool: elif isinstance(x, Variable): return False # knon pytree types - elif isinstance(x, (VariableState, State)): + elif type(x) is VariableState or type(x) is State: return True else: return not jax.tree_util.all_leaves((x,)) @@ -1829,7 +1836,7 @@ def _unflatten_pytree( PYTREE_NODE_IMPL = PytreeNodeImpl( type=GenericPytree, flatten=_flatten_pytree, - unflatten=_unflatten_pytree, + unflatten=_unflatten_pytree, # type: ignore ) # common pytrees diff --git a/flax/nnx/object.py b/flax/nnx/object.py index c63506fc48..afa41cdb7b 100644 --- a/flax/nnx/object.py +++ b/flax/nnx/object.py @@ -30,7 +30,6 @@ ) from flax.nnx import graph from flax.nnx.variablelib import Variable, VariableState -from flax.typing import Key from flax import errors G = tp.TypeVar('G', bound='Object') @@ -109,10 +108,11 @@ def __init_subclass__(cls) -> None: graph.register_graph_node_type( type=cls, flatten=cls._graph_node_flatten, - set_key=cls._graph_node_set_key, - pop_key=cls._graph_node_pop_key, + set_key=cls._graph_node_set_key, # type: ignore + pop_key=cls._graph_node_pop_key, # type: ignore create_empty=cls._graph_node_create_empty, clear=cls._graph_node_clear, + init=cls._graph_node_init, # type: ignore ) if not tp.TYPE_CHECKING: @@ -189,14 +189,12 @@ def __treescope_repr__(self, path, subtree_renderer): # Graph Definition def _graph_node_flatten(self): - nodes = sorted( - (key, value) - for key, value in vars(self).items() - if key != '_object__state' - ) + nodes = vars(self).copy() + del nodes['_object__state'] + nodes = sorted(nodes.items()) return nodes, (type(self), self._object__state._initializing) - def _graph_node_set_key(self, key: Key, value: tp.Any): + def _graph_node_set_key(self, key: str, value: tp.Any): if not isinstance(key, str): raise KeyError(f'Invalid key: {key!r}') elif ( @@ -208,7 +206,7 @@ def _graph_node_set_key(self, key: Key, value: tp.Any): else: setattr(self, key, value) - def _graph_node_pop_key(self, key: Key): + def _graph_node_pop_key(self, key: str): if not isinstance(key, str): raise KeyError(f'Invalid key: {key!r}') return vars(self).pop(key) @@ -225,3 +223,6 @@ def _graph_node_clear(self): module_vars = vars(self) module_vars.clear() module_vars['_object__state'] = module_state + + def _graph_node_init(self, attributes: tp.Iterable[tuple[str, tp.Any]]): + vars(self).update(attributes) \ No newline at end of file diff --git a/flax/nnx/statelib.py b/flax/nnx/statelib.py index df299ea54d..a9cee18b42 100644 --- a/flax/nnx/statelib.py +++ b/flax/nnx/statelib.py @@ -28,7 +28,6 @@ K = tp.TypeVar('K', bound=tp.Hashable) V = tp.TypeVar('V') -FlatState = dict[PathParts, V] ExtractValueFn = tp.Callable[[tp.Any], tp.Any] SetValueFn = tp.Callable[[V, tp.Any], V] @@ -54,6 +53,55 @@ def __treescope_repr__(self, path, subtree_renderer): # Render as the dictionary itself at the same path. return subtree_renderer(children, path=path) +class FlatState(tp.Sequence[tuple[PathParts, V]], reprlib.PrettySequence): + _keys: tuple[PathParts, ...] + _values: list[V] + + def __init__(self, items: tp.Iterable[tuple[PathParts, V]]): + keys, values = [], [] + for key, value in items: + keys.append(key) + values.append(value) + self._keys = tuple(keys) + self._values = values + + @tp.overload + def __getitem__(self, index: int) -> tuple[PathParts, V]: ... + @tp.overload + def __getitem__(self, index: slice) -> FlatState[V]: ... + def __getitem__( + self, index: int | slice + ) -> tuple[PathParts, V] | FlatState[V]: + if isinstance(index, int): + return self._keys[index], self._values[index] + return FlatState(zip(self._keys[index], self._values[index])) + + def __len__(self) -> int: + return len(self._keys) + + def __iter__(self) -> tp.Iterator[tuple[PathParts, V]]: + return iter(zip(self._keys, self._values)) + + +def _flat_state_pytree_flatten(x: FlatState[V]): + return x._values, x._keys + + +def _flat_state_pytree_unflatten( + keys: tuple[PathParts, ...], values: list[V] +) -> FlatState[V]: + flat_state = object.__new__(FlatState) + flat_state._keys = keys + flat_state._values = values + return flat_state + + +jax.tree_util.register_pytree_node( + FlatState, + _flat_state_pytree_flatten, + _flat_state_pytree_unflatten, +) + class State(MutableMapping[K, V], reprlib.Representable): """A pytree-like structure that contains a ``Mapping`` from hashable and @@ -148,12 +196,14 @@ def __treescope_repr__(self, path, subtree_renderer): def map(self, f: tp.Callable[[tuple, V], V]) -> State[K, V]: flat_state = self.flat_state() - for path, variable_state in flat_state.items(): - flat_state[path] = f(path, variable_state) - return State.from_flat_path(flat_state) + result = [] + for path, variable_state in flat_state: + variable_state = f(path, variable_state) + result.append((path, variable_state)) + return State.from_flat_path(result) def flat_state(self) -> FlatState[V]: - return traversals.flatten_mapping(self._mapping) + return FlatState(traversals.flatten_to_sequence(self._mapping)) @classmethod def from_flat_path( @@ -172,7 +222,7 @@ def to_pure_dict(self, # Works for nnx.Variable and nnx.VariableState if extract_fn is None: extract_fn = lambda x: x.value if hasattr(x, 'value') else x - flat_values = {k: extract_fn(x) for k, x in self.flat_state().items()} + flat_values = {k: extract_fn(x) for k, x in self.flat_state()} return traversals.unflatten_mapping(flat_values) def replace_by_pure_dict(self, @@ -186,7 +236,7 @@ def try_convert_int(x): # Works for nnx.Variable and nnx.VariableState if replace_fn is None: replace_fn = lambda x, v: x.replace(v) if hasattr(x, 'replace') else v - current_flat = self.flat_state() + current_flat = dict(self.flat_state()) for kp, v in traversals.flatten_mapping(pure_dict).items(): kp = tuple(map(try_convert_int, kp)) if kp not in current_flat: @@ -241,7 +291,7 @@ def split( # type: ignore[misc] One or more ``States`` equal to the number of filters passed. """ filters = (first, *filters) - *states_, rest = _split_state(self, *filters) + *states_, rest = _split_state(self.flat_state(), *filters) if rest: raise ValueError( @@ -254,7 +304,7 @@ def split( # type: ignore[misc] states = states_[0] else: states = tuple(states_) - return states # type: ignore[bad-return-type] + return states # type: ignore @tp.overload def filter( @@ -306,7 +356,7 @@ def filter( Returns: One or more ``States`` equal to the number of filters passed. """ - *states_, _rest = _split_state(self, first, *filters) + *states_, _rest = _split_state(self.flat_state(), first, *filters) assert len(states_) == len(filters) + 1 @@ -316,7 +366,7 @@ def filter( else: states = tuple(states_) - return states # type: ignore[bad-return-type] + return states # type: ignore @staticmethod def merge( @@ -360,7 +410,7 @@ def merge( states = (state, *states) - new_state: FlatState[V] = {} + new_state: dict[PathParts, V] = {} for state in states: new_state.update(traversals.flatten_mapping(state)) # type: ignore[attribute-error] # pytype is wrong here @@ -376,8 +426,8 @@ def __sub__(self, other: State[K, V]) -> State[K, V]: if not other: return self - self_flat = self.flat_state() - other_flat = other.flat_state() + self_flat = dict(self.flat_state()) + other_flat = dict(other.flat_state()) diff = {k: v for k, v in self_flat.items() if k not in other_flat} return State.from_flat_path(diff) @@ -404,9 +454,9 @@ def _state_unflatten( def _split_state( - state: State[K, V], + flat_state: FlatState[V], *filters: filterlib.Filter, -) -> tuple[State[K, V], ...]: +) -> tuple[State[PathParts, V], ...]: for i, filter_ in enumerate(filters): if filter_ in (..., True) and i != len(filters) - 1: remaining_filters = filters[i + 1 :] @@ -417,22 +467,20 @@ def _split_state( ) predicates = tuple(map(filterlib.to_predicate, filters)) - flat_state = state.flat_state() - # we have n + 1 states, where n is the number of predicates # the last state is for values that don't match any predicate - flat_states: tuple[FlatState[V], ...] = tuple( - {} for _ in range(len(predicates) + 1) + flat_states: tuple[list[tuple[PathParts, V]], ...] = tuple( + [] for _ in range(len(predicates) + 1) ) - for path, value in flat_state.items(): + for path, value in flat_state: for i, predicate in enumerate(predicates): if predicate(path, value): - flat_states[i][path] = value # type: ignore[index] # mypy is wrong here? + flat_states[i].append((path, value)) # type: ignore[index] # mypy is wrong here? break else: # if we didn't break, set leaf to last state - flat_states[-1][path] = value # type: ignore[index] # mypy is wrong here? + flat_states[-1].append((path, value)) # type: ignore[index] # mypy is wrong here? return tuple(State.from_flat_path(flat_state) for flat_state in flat_states) @@ -440,7 +488,7 @@ def _split_state( def create_path_filters(state: State): flat_state = state.flat_state() value_paths: dict[tp.Any, set[PathParts]] = {} - for path, value in flat_state.items(): + for path, value in flat_state: if isinstance(value, (variablelib.Variable, variablelib.VariableState)): value = value.value value_paths.setdefault(value, set()).add(path) diff --git a/flax/nnx/traversals.py b/flax/nnx/traversals.py index 4d9c80603c..8c8996df9d 100644 --- a/flax/nnx/traversals.py +++ b/flax/nnx/traversals.py @@ -18,6 +18,7 @@ from collections.abc import Callable, Mapping from typing import Any, overload +from collections.abc import Iterable from flax import struct @@ -118,6 +119,55 @@ def _flatten(xs: Any, prefix: tuple[Any, ...]) -> dict[Any, Any]: return _flatten(xs, ()) +def flatten_to_sequence( + xs: Mapping[Any, Any], + /, + *, + is_leaf: None | IsLeafCallable = None, +) -> list[tuple[Any, Any]]: + """Flatten a nested mapping. + + The nested keys are flattened to a tuple. See ``unflatten_mapping`` on how to + restore the nested mapping. + + Example:: + + >>> from flax import nnx + >>> xs = {'foo': 1, 'bar': {'a': 2, 'b': {}}} + >>> flat_xs = nnx.traversals.flatten_mapping(xs) + >>> flat_xs + {('foo',): 1, ('bar', 'a'): 2} + + Note that empty mappings are ignored and will not be restored by + ``unflatten_mapping``. + + Args: + xs: a nested mapping + keep_empty_nodes: replaces empty mappings with + ``traverse_util.empty_node``. + is_leaf: an optional function that takes the next nested mapping and nested + keys and returns True if the nested mapping is a leaf (i.e., should not be + flattened further). + sep: if specified, then the keys of the returned mapping will be + ``sep``-joined strings (if ``None``, then keys will be tuples). + Returns: + The flattened mapping. + """ + assert isinstance( + xs, Mapping + ), f'expected Mapping; got {type(xs).__qualname__}' + result = [] + + def _flatten(xs: Any, prefix: tuple[Any, ...]): + if not isinstance(xs, Mapping) or (is_leaf and is_leaf(prefix, xs)): + result.append((prefix, xs)) + else: + for key, value in xs.items(): + _flatten(value, (*prefix, key)) + + _flatten(xs, ()) + return result + @overload def unflatten_mapping(xs: Mapping[tuple[Any, ...], Any], @@ -163,9 +213,15 @@ def unflatten_mapping(xs: Any, Returns: The nested mapping. """ - assert isinstance(xs, Mapping), f'expected Mapping; got {type(xs).__qualname__}' + if isinstance(xs, Mapping): + xs = xs.items() + + if not isinstance(xs, Iterable): + raise TypeError( + f'expected Mapping or Iterable; got {type(xs).__qualname__}' + ) result: dict[Any, Any] = {} - for path, value in xs.items(): + for path, value in xs: if sep is not None: path = path.split(sep) if value is empty_node: diff --git a/flax/nnx/variablelib.py b/flax/nnx/variablelib.py index 7af20cdb73..4752a9b7bd 100644 --- a/flax/nnx/variablelib.py +++ b/flax/nnx/variablelib.py @@ -216,7 +216,7 @@ def copy_from(self, other: Variable[A]) -> None: def update_from_state(self, variable_state: VariableState[A]): vars_self = vars(self) vars_self['raw_value'] = variable_state.value - vars_self['_var_metadata'] = variable_state.get_metadata().copy() + vars_self['_var_metadata'] = variable_state._var_metadata.copy() @property def value(self) -> A: @@ -308,8 +308,7 @@ def copy(self: Variable[A]) -> Variable[A]: return obj def to_state(self: Variable[A]) -> VariableState[A]: - metadata = self.get_metadata() - return VariableState(type(self), self.raw_value, **metadata) + return VariableState(type(self), self.raw_value, **self._var_metadata) def __nnx_repr__(self): yield reprlib.Object(type=type(self))