Skip to content

Commit

Permalink
Make Result.serialize work more like Graph.serialize
Browse files Browse the repository at this point in the history
This patch makes the following changes to `Result.serialize`.

* Return str by default instead of bytes.
* Use "txt" as the default tabular serialization format.
* Use "turtle" as the default graph serialization format.
* Support both typing.IO[bytes] and typing.TextIO destinations.

Corresponding changes are made to the specific serializers also.

This patch also changes how text is written to typing.IO[bytes] in
serializers to ensure that the buffer is flushed and
detatched from the TextIOWrapper once the serialization function
completes so it can be used normally afterwards.

This patch further includes a bunch of additional type hints.
  • Loading branch information
aucampia committed Oct 16, 2021
1 parent 1729243 commit abb01be
Show file tree
Hide file tree
Showing 28 changed files with 1,642 additions and 340 deletions.
238 changes: 155 additions & 83 deletions rdflib/graph.py

Large diffs are not rendered by default.

10 changes: 8 additions & 2 deletions rdflib/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import sys

from io import BytesIO, TextIOBase, TextIOWrapper, StringIO, BufferedIOBase
from typing import Optional, Union

from urllib.request import Request
from urllib.request import url2pathname
Expand Down Expand Up @@ -44,7 +45,7 @@ class Parser(object):
def __init__(self):
pass

def parse(self, source, sink):
def parse(self, source, sink, **args):
pass


Expand Down Expand Up @@ -214,7 +215,12 @@ def __repr__(self):


def create_input_source(
source=None, publicID=None, location=None, file=None, data=None, format=None
source=None,
publicID=None,
location=None,
file=None,
data: Optional[Union[str, bytes, bytearray]] = None,
format=None,
):
"""
Return an appropriate InputSource instance for the given
Expand Down
62 changes: 46 additions & 16 deletions rdflib/plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,21 @@
UpdateProcessor,
)
from rdflib.exceptions import Error
from typing import Type, TypeVar
from typing import (
TYPE_CHECKING,
Any,
Dict,
Generic,
Iterator,
Optional,
Tuple,
Type,
TypeVar,
overload,
)

if TYPE_CHECKING:
from pkg_resources import EntryPoint

__all__ = ["register", "get", "plugins", "PluginException", "Plugin", "PKGPlugin"]

Expand All @@ -51,42 +65,47 @@
"rdf.plugins.updateprocessor": UpdateProcessor,
}

_plugins = {}
_plugins: Dict[Tuple[str, Type[Any]], "Plugin"] = {}


class PluginException(Error):
pass


class Plugin(object):
def __init__(self, name, kind, module_path, class_name):
PluginT = TypeVar("PluginT")


class Plugin(Generic[PluginT]):
def __init__(
self, name: str, kind: Type[PluginT], module_path: str, class_name: str
):
self.name = name
self.kind = kind
self.module_path = module_path
self.class_name = class_name
self._class = None
self._class: Optional[Type[PluginT]] = None

def getClass(self):
def getClass(self) -> Type[PluginT]:
if self._class is None:
module = __import__(self.module_path, globals(), locals(), [""])
self._class = getattr(module, self.class_name)
return self._class


class PKGPlugin(Plugin):
def __init__(self, name, kind, ep):
class PKGPlugin(Plugin[PluginT]):
def __init__(self, name: str, kind: Type[PluginT], ep: "EntryPoint"):
self.name = name
self.kind = kind
self.ep = ep
self._class = None
self._class: Optional[Type[PluginT]] = None

def getClass(self):
def getClass(self) -> Type[PluginT]:
if self._class is None:
self._class = self.ep.load()
return self._class


def register(name: str, kind, module_path, class_name):
def register(name: str, kind: Type[Any], module_path, class_name):
"""
Register the plugin for (name, kind). The module_path and
class_name should be the path to a plugin class.
Expand All @@ -95,16 +114,13 @@ def register(name: str, kind, module_path, class_name):
_plugins[(name, kind)] = p


PluginT = TypeVar("PluginT")


def get(name: str, kind: Type[PluginT]) -> Type[PluginT]:
"""
Return the class for the specified (name, kind). Raises a
PluginException if unable to do so.
"""
try:
p = _plugins[(name, kind)]
p: Plugin[PluginT] = _plugins[(name, kind)]
except KeyError:
raise PluginException("No plugin registered for (%s, %s)" % (name, kind))
return p.getClass()
Expand All @@ -121,7 +137,21 @@ def get(name: str, kind: Type[PluginT]) -> Type[PluginT]:
_plugins[(ep.name, kind)] = PKGPlugin(ep.name, kind, ep)


def plugins(name=None, kind=None):
@overload
def plugins(
name: Optional[str] = ..., kind: Type[PluginT] = ...
) -> Iterator[Plugin[PluginT]]:
...


@overload
def plugins(name: Optional[str] = ..., kind: None = ...) -> Iterator[Plugin]:
...


def plugins(
name: Optional[str] = None, kind: Optional[Type[PluginT]] = None
) -> Iterator[Plugin]:
"""
A generator of the plugins.
Expand Down
11 changes: 9 additions & 2 deletions rdflib/plugins/serializers/jsonld.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@
from rdflib.graph import Graph
from rdflib.term import URIRef, Literal, BNode
from rdflib.namespace import RDF, XSD
from typing import IO, Optional

from ..shared.jsonld.context import Context, UNDEF
from ..shared.jsonld.util import json
Expand All @@ -53,10 +54,16 @@


class JsonLDSerializer(Serializer):
def __init__(self, store):
def __init__(self, store: Graph):
super(JsonLDSerializer, self).__init__(store)

def serialize(self, stream, base=None, encoding=None, **kwargs):
def serialize(
self,
stream: IO[bytes],
base: Optional[str] = None,
encoding: Optional[str] = None,
**kwargs
):
# TODO: docstring w. args and return value
encoding = encoding or "utf-8"
if encoding not in ("utf-8", "utf-16"):
Expand Down
4 changes: 2 additions & 2 deletions rdflib/plugins/serializers/n3.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class N3Serializer(TurtleSerializer):

short_name = "n3"

def __init__(self, store, parent=None):
def __init__(self, store: Graph, parent=None):
super(N3Serializer, self).__init__(store)
self.keywords.update({OWL.sameAs: "=", SWAP_LOG.implies: "=>"})
self.parent = parent
Expand Down Expand Up @@ -109,7 +109,7 @@ def p_clause(self, node, position):
self.write("{")
self.depth += 1
serializer = N3Serializer(node, parent=self)
serializer.serialize(self.stream)
serializer.serialize(self.stream.buffer)
self.depth -= 1
self.write(self.indent() + "}")
return True
Expand Down
15 changes: 12 additions & 3 deletions rdflib/plugins/serializers/nquads.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from typing import IO, Optional
import warnings

from rdflib.graph import ConjunctiveGraph, Graph
from rdflib.term import Literal
from rdflib.serializer import Serializer

Expand All @@ -9,15 +11,22 @@


class NQuadsSerializer(Serializer):
def __init__(self, store):
def __init__(self, store: Graph):
if not store.context_aware:
raise Exception(
"NQuads serialization only makes " "sense for context-aware stores!"
)

super(NQuadsSerializer, self).__init__(store)

def serialize(self, stream, base=None, encoding=None, **args):
self.store: ConjunctiveGraph

def serialize(
self,
stream: IO[bytes],
base: Optional[str] = None,
encoding: Optional[str] = None,
**args
):
if base is not None:
warnings.warn("NQuadsSerializer does not support base.")
if encoding is not None and encoding.lower() != self.encoding.lower():
Expand Down
30 changes: 24 additions & 6 deletions rdflib/plugins/serializers/nt.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,17 @@
See <http://www.w3.org/TR/rdf-testcases/#ntriples> for details about the
format.
"""
from typing import IO, Optional

from rdflib.graph import Graph
from rdflib.term import Literal
from rdflib.serializer import Serializer

import warnings
import codecs

from rdflib.util import as_textio

__all__ = ["NTSerializer"]


Expand All @@ -17,19 +22,32 @@ class NTSerializer(Serializer):
Serializes RDF graphs to NTriples format.
"""

def __init__(self, store):
def __init__(self, store: Graph):
Serializer.__init__(self, store)
self.encoding = "ascii" # n-triples are ascii encoded

def serialize(self, stream, base=None, encoding=None, **args):
def serialize(
self,
stream: IO[bytes],
base: Optional[str] = None,
encoding: Optional[str] = None,
**args
):
if base is not None:
warnings.warn("NTSerializer does not support base.")
if encoding is not None and encoding.lower() != self.encoding.lower():
warnings.warn("NTSerializer does not use custom encoding.")
encoding = self.encoding
for triple in self.store:
stream.write(_nt_row(triple).encode(self.encoding, "_rdflib_nt_escape"))
stream.write("\n".encode("latin-1"))

with as_textio(
stream,
encoding=self.encoding,
errors="_rdflib_nt_escape",
write_through=True,
) as text_stream:
for triple in self.store:
text_stream.write(_nt_row(triple))
text_stream.write("\n")


class NT11Serializer(NTSerializer):
Expand All @@ -39,7 +57,7 @@ class NT11Serializer(NTSerializer):
Exactly like nt - only utf8 encoded.
"""

def __init__(self, store):
def __init__(self, store: Graph):
Serializer.__init__(self, store) # default to utf-8


Expand Down
Loading

0 comments on commit abb01be

Please sign in to comment.