Skip to content

Commit

Permalink
fix: issues with string destination handling in `{Graph,Result}.seria…
Browse files Browse the repository at this point in the history
…lize`

Change `{Graph,Result}.serialize` to only handle string destinations as URIs if
the schema is `file` and to treat it as operating system paths in all
other cases.

This is for two reasons:
1. Many valid Unix and Windows paths parse file to URIs using `urlparse`, e.g. `C:\some\path`
   and `some:/path`, and have schemas and no netloc, however they should
   be treated as
2. So that `Graph` and `Result` behaves consistently.
  • Loading branch information
aucampia committed Jul 30, 2022
1 parent 283bef2 commit 5cd02f4
Show file tree
Hide file tree
Showing 10 changed files with 563 additions and 259 deletions.
24 changes: 24 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,30 @@ and will be removed for release.
<!-- -->
<!-- -->


<!-- -->
<!-- -->
<!-- CHANGE BARRIER: START #2068 -->
<!-- -->
<!-- -->

- Improve file-URI and path handling in `Graph.serialize` and `Result.serialize` to
address problems with windows path handling in `Result.serialize` and to make
the behavior between `Graph.serialize` and `Result.serialie` more consistent.
Closed [issue #2067](https://github.com/RDFLib/rdflib/issues/2067).
[PR #2068](https://github.com/RDFLib/rdflib/pull/2068).
- If the destination is a string then it will only be handled as a URI if
it has a file schema, in all other cases string values will be
treated as operating system paths. This change is done primarily because
Windows paths (e.g. `C:\some\path`) and some unix paths (e.g. `some:/path`) look like valid URIs with valid schemas but should not be treated as operating system paths.
- Simplified file writing to avoid a temporary file.

<!-- -->
<!-- -->
<!-- CHANGE BARRIER: END #2068 -->
<!-- -->
<!-- -->

<!-- -->
<!-- -->
<!-- CHANGE BARRIER: START -->
Expand Down
29 changes: 13 additions & 16 deletions rdflib/graph.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,6 @@
import logging
import os
import pathlib
import random
import shutil
import tempfile
from io import BytesIO
from typing import (
IO,
Expand Down Expand Up @@ -1201,20 +1198,20 @@ def serialize(
serializer.serialize(stream, base=base, encoding=encoding, **args)
else:
if isinstance(destination, pathlib.PurePath):
location = str(destination)
os_path = str(destination)
else:
location = cast(str, destination)
scheme, netloc, path, params, _query, fragment = urlparse(location)
if netloc != "":
raise ValueError(
f"destination {destination} is not a local file reference"
)
fd, name = tempfile.mkstemp()
stream = os.fdopen(fd, "wb")
serializer.serialize(stream, base=base, encoding=encoding, **args)
stream.close()
dest = url2pathname(path) if scheme == "file" else location
shutil.move(name, dest)
scheme, netloc, path, params, query, fragment = urlparse(location)
if scheme == "file":
if netloc != "":
raise ValueError(
f"the file URI {location!r} has an authority component which is not supported"
)
os_path = url2pathname(path)
else:
os_path = location
with open(os_path, "wb") as stream:
serializer.serialize(stream, encoding=encoding, **args)
return self

def print(self, format="turtle", encoding="utf-8", out=None):
Expand Down Expand Up @@ -1276,7 +1273,7 @@ def parse(
... </rdf:Description>
... </rdf:RDF>
... '''
>>> import tempfile
>>> import os, tempfile
>>> fd, file_name = tempfile.mkstemp()
>>> f = os.fdopen(fd, "w")
>>> dummy = f.write(my_data) # Returns num bytes written
Expand Down
24 changes: 11 additions & 13 deletions rdflib/query.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
import itertools
import os
import shutil
import tempfile
import types
import warnings
from io import BytesIO
from typing import IO, TYPE_CHECKING, List, Optional, Union, cast
from urllib.parse import urlparse
from urllib.request import url2pathname

__all__ = [
"Processor",
Expand Down Expand Up @@ -267,16 +265,16 @@ def serialize(
else:
location = cast(str, destination)
scheme, netloc, path, params, query, fragment = urlparse(location)
if netloc != "":
print(
"WARNING: not saving as location" + "is not a local file reference"
)
return None
fd, name = tempfile.mkstemp()
stream = os.fdopen(fd, "wb")
serializer.serialize(stream, encoding=encoding, **args)
stream.close()
shutil.move(name, path)
if scheme == "file":
if netloc != "":
raise ValueError(
f"the file URI {location!r} has an authority component which is not supported"
)
os_path = url2pathname(path)
else:
os_path = location
with open(os_path, "wb") as stream:
serializer.serialize(stream, encoding=encoding, **args)
return None

def __len__(self):
Expand Down
2 changes: 1 addition & 1 deletion requirements.flake8.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
flake8
flakeheaven; python_version >= '3.8.0'
flakeheaven >= 2.1.3; python_version >= '3.8.0'
pep8-naming
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
"dev": [
"black==22.6.0",
"flake8",
"flakeheaven; python_version >= '3.8.0'",
"flakeheaven >= 2.1.3; python_version >= '3.8.0'",
"isort",
"mypy",
"pep8-naming",
Expand Down
Loading

0 comments on commit 5cd02f4

Please sign in to comment.