Skip to content

Commit

Permalink
Fix dict representation not being JSON serializable (#1632)
Browse files Browse the repository at this point in the history
  • Loading branch information
DarkLight1337 authored Jun 25, 2024
1 parent 4d6a83a commit bdd5d8c
Show file tree
Hide file tree
Showing 3 changed files with 141 additions and 8 deletions.
40 changes: 39 additions & 1 deletion fsspec/json.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,17 @@
import json
from contextlib import suppress
from pathlib import PurePath
from typing import Any, Callable, ClassVar, Dict, List, Optional, Tuple
from typing import (
Any,
Callable,
ClassVar,
Dict,
List,
Mapping,
Optional,
Sequence,
Tuple,
)

from .registry import _import_class, get_filesystem_class
from .spec import AbstractFileSystem
Expand All @@ -19,6 +29,21 @@ def default(self, o: Any) -> Any:

return super().default(o)

def make_serializable(self, obj: Any) -> Any:
"""
Recursively converts an object so that it can be JSON serialized via
:func:`json.dumps` and :func:`json.dump`, without actually calling
said functions.
"""
if isinstance(obj, (str, int, float, bool)):
return obj
if isinstance(obj, Mapping):
return {k: self.make_serializable(v) for k, v in obj.items()}
if isinstance(obj, Sequence):
return [self.make_serializable(v) for v in obj]

return self.default(obj)


class FilesystemJSONDecoder(json.JSONDecoder):
def __init__(
Expand Down Expand Up @@ -81,3 +106,16 @@ def custom_object_hook(self, dct: Dict[str, Any]):
return self.original_object_hook(dct)

return dct

def unmake_serializable(self, obj: Any) -> Any:
"""
Inverse function of :meth:`FilesystemJSONEncoder.make_serializable`.
"""
if isinstance(obj, dict):
obj = self.custom_object_hook(obj)
if isinstance(obj, dict):
return {k: self.unmake_serializable(v) for k, v in obj.items()}
if isinstance(obj, (list, tuple)):
return [self.unmake_serializable(v) for v in obj]

return obj
15 changes: 12 additions & 3 deletions fsspec/spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -1466,6 +1466,10 @@ def to_dict(self, *, include_password: bool = True) -> Dict[str, Any]:
passed to the constructor, such as passwords and tokens. Make sure you
store and send them in a secure environment!
"""
from .json import FilesystemJSONEncoder

json_encoder = FilesystemJSONEncoder()

cls = type(self)
proto = self.protocol

Expand All @@ -1476,8 +1480,8 @@ def to_dict(self, *, include_password: bool = True) -> Dict[str, Any]:
return dict(
cls=f"{cls.__module__}:{cls.__name__}",
protocol=proto[0] if isinstance(proto, (tuple, list)) else proto,
args=self.storage_args,
**storage_options,
args=json_encoder.make_serializable(self.storage_args),
**json_encoder.make_serializable(storage_options),
)

@staticmethod
Expand All @@ -1503,6 +1507,8 @@ def from_dict(dct: Dict[str, Any]) -> AbstractFileSystem:
"""
from .json import FilesystemJSONDecoder

json_decoder = FilesystemJSONDecoder()

dct = dict(dct) # Defensive copy

cls = FilesystemJSONDecoder.try_resolve_fs_cls(dct)
Expand All @@ -1512,7 +1518,10 @@ def from_dict(dct: Dict[str, Any]) -> AbstractFileSystem:
dct.pop("cls", None)
dct.pop("protocol", None)

return cls(*dct.pop("args", ()), **dct)
return cls(
*json_decoder.unmake_serializable(dct.pop("args", ())),
**json_decoder.unmake_serializable(dct),
)

def _get_pyarrow_filesystem(self):
"""
Expand Down
94 changes: 90 additions & 4 deletions fsspec/tests/test_spec.py
Original file line number Diff line number Diff line change
Expand Up @@ -875,17 +875,38 @@ def test_json_path_attr():

def test_json_fs_attr():
a = DummyTestFS(1)
b = DummyTestFS(2, bar=a)
b = DummyTestFS(2, bar=Path("baz"))
c = DummyTestFS(3, baz=b)

outa = a.to_json()
outb = b.to_json()
outc = c.to_json()

assert json.loads(outb) # is valid JSON
assert a != b
assert "bar" in outb
assert json.loads(outc) # is valid JSON
assert b != c
assert "baz" in outc

assert DummyTestFS.from_json(outa) is a
assert DummyTestFS.from_json(outb) is b
assert DummyTestFS.from_json(outc) is c


def test_json_dict_attr():
a = DummyTestFS(1)
b = DummyTestFS(2, bar=Path("baz"))
c = DummyTestFS(3, baz={"key": b})

outa = a.to_json()
outb = b.to_json()
outc = c.to_json()

assert json.loads(outc) # is valid JSON
assert b != c
assert "baz" in outc

assert DummyTestFS.from_json(outa) is a
assert DummyTestFS.from_json(outb) is b
assert DummyTestFS.from_json(outc) is c


def test_dict():
Expand All @@ -903,6 +924,57 @@ def test_dict():
assert DummyTestFS.from_dict(outb) is b


def test_dict_path_attr():
a = DummyTestFS(1)
b = DummyTestFS(2, bar=Path("baz"))

outa = a.to_dict()
outb = b.to_dict()

assert isinstance(outa, dict)
assert a != b
assert outb["bar"]["str"] == "baz"

assert DummyTestFS.from_dict(outa) is a
assert DummyTestFS.from_dict(outb) is b


def test_dict_fs_attr():
a = DummyTestFS(1)
b = DummyTestFS(2, bar=Path("baz"))
c = DummyTestFS(3, baz=b)

outa = a.to_dict()
outb = b.to_dict()
outc = c.to_dict()

assert isinstance(outc, dict)
assert b != c
assert outc["baz"] == outb

assert DummyTestFS.from_dict(outa) is a
assert DummyTestFS.from_dict(outb) is b
assert DummyTestFS.from_dict(outc) is c


def test_dict_dict_attr():
a = DummyTestFS(1)
b = DummyTestFS(2, bar=Path("baz"))
c = DummyTestFS(3, baz={"key": b})

outa = a.to_dict()
outb = b.to_dict()
outc = c.to_dict()

assert isinstance(outc, dict)
assert b != c
assert outc["baz"]["key"] == outb

assert DummyTestFS.from_dict(outa) is a
assert DummyTestFS.from_dict(outb) is b
assert DummyTestFS.from_dict(outc) is c


def test_dict_idempotent():
a = DummyTestFS(1)

Expand All @@ -912,6 +984,20 @@ def test_dict_idempotent():
assert DummyTestFS.from_dict(outa) is a


def test_dict_json_serializable():
a = DummyTestFS(1)
b = DummyTestFS(2, bar=Path("baz"))
c = DummyTestFS(3, baz=b)

outa = a.to_dict()
outb = b.to_dict()
outc = c.to_dict()

json.dumps(outa)
json.dumps(outb)
json.dumps(outc)


def test_serialize_no_password():
fs = DummyTestFS(1, password="admin")

Expand Down

0 comments on commit bdd5d8c

Please sign in to comment.