Skip to content

Commit

Permalink
Make Result.serialize work more like Graph.serialize
Browse files Browse the repository at this point in the history
This patch makes the following changes to `Result.serialize`.

* Return str by default instead of bytes.
* Use "txt" as the default tabular serialization format.
* Use "turtle" as the default graph serialization format.
* Support both typing.IO[bytes] and typing.TextIO destinations.

Corresponding changes are made to the specific serializers also.

This patch also changes how text is written to typing.IO[bytes] in
serializers to ensure that the buffer is flushed and
detatched from the TextIOWrapper once the serialization function
completes so it can be used normally afterwards.

This patch further includes a bunch of additional type hints.
  • Loading branch information
aucampia committed Apr 11, 2022
1 parent 7ed86ff commit 2de6f76
Show file tree
Hide file tree
Showing 20 changed files with 1,291 additions and 192 deletions.
56 changes: 38 additions & 18 deletions rdflib/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -351,15 +351,15 @@ def __init__(
self.default_union = False

@property
def store(self):
def store(self) -> Store: # read-only attr
return self.__store

@property
def identifier(self):
def identifier(self) -> Node: # read-only attr
return self.__identifier

@property
def namespace_manager(self):
def namespace_manager(self) -> NamespaceManager:
"""
this graph's namespace-manager
"""
Expand All @@ -368,8 +368,9 @@ def namespace_manager(self):
return self.__namespace_manager

@namespace_manager.setter
def namespace_manager(self, nm):
self.__namespace_manager = nm
def namespace_manager(self, value: NamespaceManager):
"""this graph's namespace-manager"""
self.__namespace_manager = value

def __repr__(self):
return "<Graph identifier=%s (%s)>" % (self.identifier, type(self))
Expand Down Expand Up @@ -1096,18 +1097,37 @@ def serialize(
encoding: Optional[str] = None,
**args: Any,
) -> Union[bytes, str, "Graph"]:
"""Serialize the Graph to destination
If destination is None serialize method returns the serialization as
bytes or string.
If encoding is None and destination is None, returns a string
If encoding is set, and Destination is None, returns bytes
Format defaults to turtle.
Format support can be extended with plugins,
but "xml", "n3", "turtle", "nt", "pretty-xml", "trix", "trig" and "nquads" are built in.
"""
Serialize the graph.
:param destination:
The destination to serialize the graph to. This can be a path as a
:class:`str` or :class:`~pathlib.PurePath` object, or it can be a
:class:`~typing.IO[bytes]` like object. If this parameter is not
supplied the serialized graph will be returned.
:type destination: Optional[Union[str, typing.IO[bytes], pathlib.PurePath]]
:param format:
The format that the output should be written in. This value
references a :class:`~rdflib.serializer.Serializer` plugin. Format
support can be extended with plugins, but `"xml"`, `"n3"`,
`"turtle"`, `"nt"`, `"pretty-xml"`, `"trix"`, `"trig"`, `"nquads"`
and `"json-ld"` are built in. Defaults to `"turtle"`.
:type format: str
:param base:
The base IRI for formats that support it. For the turtle format this
will be used as the `@base` directive.
:type base: Optional[str]
:param encoding: Encoding of output.
:type encoding: Optional[str]
:param **args:
Additional arguments to pass to the
:class:`~rdflib.serializer.Serializer` that will be used.
:type **args: Any
:return: The serialized graph if `destination` is `None`.
:rtype: :class:`bytes` if `destination` is `None` and `encoding` is not `None`.
:rtype: :class:`bytes` if `destination` is `None` and `encoding` is `None`.
:return: `self` (i.e. the :class:`~rdflib.graph.Graph` instance) if `destination` is not None.
:rtype: :class:`~rdflib.graph.Graph` if `destination` is not None.
"""

# if base is not given as attribute use the base set for the graph
Expand Down Expand Up @@ -1298,7 +1318,7 @@ def query(
if none are given, the namespaces from the graph's namespace manager
are used.
:returntype: rdflib.query.Result
:returntype: :class:`~rdflib.query.Result`
"""

Expand Down
2 changes: 1 addition & 1 deletion rdflib/plugins/serializers/n3.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ def p_clause(self, node, position):
self.write("{")
self.depth += 1
serializer = N3Serializer(node, parent=self)
serializer.serialize(self.stream)
serializer.serialize(self.stream.buffer)
self.depth -= 1
self.write(self.indent() + "}")
return True
Expand Down
14 changes: 11 additions & 3 deletions rdflib/plugins/serializers/nt.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import warnings
import codecs

from rdflib.util import as_textio

__all__ = ["NTSerializer"]


Expand All @@ -38,9 +40,15 @@ def serialize(
f"Given encoding was: {encoding}"
)

for triple in self.store:
stream.write(_nt_row(triple).encode())
stream.write("\n".encode())
with as_textio(
stream,
encoding=encoding, # TODO: CHECK: self.encoding set removed, why?
errors="_rdflib_nt_escape",
write_through=True,
) as text_stream:
for triple in self.store:
text_stream.write(_nt_row(triple))
text_stream.write("\n")


class NT11Serializer(NTSerializer):
Expand Down
6 changes: 5 additions & 1 deletion rdflib/plugins/serializers/rdfxml.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import IO, Dict, Optional, Set
from typing import IO, Dict, Optional, Set, cast
from rdflib.plugins.serializers.xmlwriter import XMLWriter

from rdflib.namespace import Namespace, RDF, RDFS # , split_uri
Expand Down Expand Up @@ -173,6 +173,8 @@ def serialize(
encoding: Optional[str] = None,
**args,
):
# TODO FIXME: this should be Optional, but it's not because nothing
# treats it as such.
self.__serialized: Dict[Identifier, int] = {}
store = self.store
# if base is given here, use that, if not and a base is set for the graph use that
Expand Down Expand Up @@ -241,6 +243,7 @@ def subject(self, subject: IdentifiedNode, depth: int = 1):
writer = self.writer

if subject in self.forceRDFAbout:
subject = cast(URIRef, subject)
writer.push(RDFVOC.Description)
writer.attribute(RDFVOC.about, self.relativize(subject))
writer.pop(RDFVOC.Description)
Expand Down Expand Up @@ -282,6 +285,7 @@ def subj_as_obj_more_than(ceil):

elif subject in self.forceRDFAbout:
# TODO FIXME?: this looks like a duplicate of first condition
subject = cast(URIRef, subject)
writer.push(RDFVOC.Description)
writer.attribute(RDFVOC.about, self.relativize(subject))
writer.pop(RDFVOC.Description)
Expand Down
88 changes: 40 additions & 48 deletions rdflib/plugins/serializers/trig.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,53 +62,45 @@ def serialize(
spacious: Optional[bool] = None,
**args,
):
self.reset()
self.stream = stream
# if base is given here, use that, if not and a base is set for the graph use that
if base is not None:
self.base = base
elif self.store.base is not None:
self.base = self.store.base

if spacious is not None:
self._spacious = spacious

self.preprocess()

self.startDocument()

firstTime = True
for store, (ordered_subjects, subjects, ref) in self._contexts.items():
if not ordered_subjects:
continue

self._references = ref
self._serialized = {}
self.store = store
self._subjects = subjects

if self.default_context and store.identifier == self.default_context:
self.write(self.indent() + "\n{")
else:
iri: Optional[str]
if isinstance(store.identifier, BNode):
iri = store.identifier.n3()
else:
iri = self.getQName(store.identifier)
if iri is None:
iri = store.identifier.n3()
self.write(self.indent() + "\n%s {" % iri)
self._serialize_init(stream, base, encoding, spacious)
try:
self.preprocess()

self.depth += 1
for subject in ordered_subjects:
if self.isDone(subject):
self.startDocument()

firstTime = True
for store, (ordered_subjects, subjects, ref) in self._contexts.items():
if not ordered_subjects:
continue
if firstTime:
firstTime = False
if self.statement(subject) and not firstTime:
self.write("\n")
self.depth -= 1
self.write("}\n")

self.endDocument()
stream.write("\n".encode("latin-1"))

self._references = ref
self._serialized = {}
self.store = store
self._subjects = subjects

if self.default_context and store.identifier == self.default_context:
self.write(self.indent() + "\n{")
else:
if isinstance(store.identifier, BNode):
iri = store.identifier.n3()
else:
iri = self.getQName(store.identifier)
if iri is None:
iri = store.identifier.n3()
self.write(self.indent() + "\n%s {" % iri)

self.depth += 1
for subject in ordered_subjects:
if self.isDone(subject):
continue
if firstTime:
firstTime = False
if self.statement(subject) and not firstTime:
self.write("\n")
self.depth -= 1
self.write("}\n")

self.endDocument()
self.write("\n")
finally:
self._serialize_end()
90 changes: 60 additions & 30 deletions rdflib/plugins/serializers/turtle.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,13 @@
from collections import defaultdict
from functools import cmp_to_key

from rdflib.graph import Graph
from rdflib.term import BNode, Literal, URIRef
from rdflib.exceptions import Error
from rdflib.serializer import Serializer
from rdflib.namespace import RDF, RDFS
from io import TextIOWrapper
from typing import IO, Dict, Optional

__all__ = ["RecursiveSerializer", "TurtleSerializer"]

Expand Down Expand Up @@ -44,10 +47,13 @@ class RecursiveSerializer(Serializer):
indentString = " "
roundtrip_prefixes = ()

def __init__(self, store):
def __init__(self, store: Graph):

super(RecursiveSerializer, self).__init__(store)
self.stream = None
# TODO FIXME: Ideally stream should be optional, but nothing treats it
# as such, so least weird solution is to just type it as not optional
# even thoug it can sometimes be null.
self.stream: IO[str] = None # type: ignore[assignment]
self.reset()

def addNamespace(self, prefix, uri):
Expand Down Expand Up @@ -166,9 +172,9 @@ def indent(self, modifier=0):
"""Returns indent string multiplied by the depth"""
return (self.depth + modifier) * self.indentString

def write(self, text):
"""Write text in given encoding."""
self.stream.write(text.encode(self.encoding, "replace"))
def write(self, text: str):
"""Write text"""
self.stream.write(text)


SUBJECT = 0
Expand All @@ -184,15 +190,15 @@ class TurtleSerializer(RecursiveSerializer):
short_name = "turtle"
indentString = " "

def __init__(self, store):
self._ns_rewrite = {}
def __init__(self, store: Graph):
self._ns_rewrite: Dict[str, str] = {}
super(TurtleSerializer, self).__init__(store)
self.keywords = {RDF.type: "a"}
self.reset()
self.stream = None
self.stream: TextIOWrapper = None # type: ignore[assignment]
self._spacious = _SPACIOUS_OUTPUT

def addNamespace(self, prefix, namespace):
def addNamespace(self, prefix: str, namespace: str):
# Turtle does not support prefix that start with _
# if they occur in the graph, rewrite to p_blah
# this is more complicated since we need to make sure p_blah
Expand Down Expand Up @@ -223,36 +229,60 @@ def reset(self):
self._started = False
self._ns_rewrite = {}

def serialize(self, stream, base=None, encoding=None, spacious=None, **args):
def _serialize_init(
self,
stream: IO[bytes],
base: Optional[str],
encoding: Optional[str],
spacious: Optional[bool],
) -> None:
self.reset()
self.stream = stream
if encoding is not None:
self.encoding = encoding
self.stream = TextIOWrapper(
stream, self.encoding, errors="replace", write_through=True
)
# if base is given here, use that, if not and a base is set for the graph use that
if base is not None:
self.base = base
elif self.store.base is not None:
self.base = self.store.base

if spacious is not None:
self._spacious = spacious

self.preprocess()
subjects_list = self.orderSubjects()

self.startDocument()

firstTime = True
for subject in subjects_list:
if self.isDone(subject):
continue
if firstTime:
firstTime = False
if self.statement(subject) and not firstTime:
self.write("\n")

self.endDocument()
stream.write("\n".encode("latin-1"))

self.base = None
def _serialize_end(self) -> None:
self.stream.flush()
self.stream.detach()
self.stream = None # type: ignore[assignment]

def serialize(
self,
stream: IO[bytes],
base: Optional[str] = None,
encoding: Optional[str] = None,
spacious: Optional[bool] = None,
**args,
):
self._serialize_init(stream, base, encoding, spacious)
try:
self.preprocess()
subjects_list = self.orderSubjects()

self.startDocument()

firstTime = True
for subject in subjects_list:
if self.isDone(subject):
continue
if firstTime:
firstTime = False
if self.statement(subject) and not firstTime:
self.write("\n")

self.endDocument()
self.stream.write("\n")
finally:
self._serialize_end()

def preprocessTriple(self, triple):
super(TurtleSerializer, self).preprocessTriple(triple)
Expand Down
Loading

0 comments on commit 2de6f76

Please sign in to comment.