From c8c38640439ea4380650fa2d100f86d42a6fc4f5 Mon Sep 17 00:00:00 2001 From: Ivan Herman Date: Wed, 22 Jan 2020 17:16:12 +0100 Subject: [PATCH 1/6] Got the command line version running with Py3 --- pyRdfa/__init__.py | 17 ++++++++++++++--- pyRdfa/utils.py | 2 +- scripts/localRDFa.py | 10 ++++++---- 3 files changed, 21 insertions(+), 8 deletions(-) diff --git a/pyRdfa/__init__.py b/pyRdfa/__init__.py index 4833afc..635753f 100644 --- a/pyRdfa/__init__.py +++ b/pyRdfa/__init__.py @@ -312,7 +312,7 @@ class pyRdfaError(Exception) : # This comes from wikipedia registered_iana_schemes = [ - "aaa","aaas","acap","cap","cid","crid","data","dav","dict","dns","fax","file", "ftp","geo","go", + "aaa","aaas","acap","cap","cid","crid","data","dav","dict","did","dns","fax","file", "ftp","geo","go", "gopher","h323","http","https","iax","icap","im","imap","info","ipp","iris","ldap", "lsid", "mailto","mid","modem","msrp","msrps", "mtqp", "mupdate","news","nfs","nntp","opaquelocktoken", "pop","pres", "prospero","rstp","rsync", "service","shttp","sieve","sip","sips", "sms", "snmp", "soap", "tag", @@ -614,7 +614,12 @@ def copyErrors(tog, options) : if self.charset : # This means the HTTP header has provided a charset, or the # file is a local file when we suppose it to be a utf-8 - dom = parser.parse(input, override_encoding=self.charset) + # + # 2020-01-20, Ivan Herman + # for some reasons the python3 version ran into a problem with this html5lib call + # the override_encoding argument was not accepted. + # dom = parser.parse(input, override_encoding=self.charset) + dom = parser.parse(input) else : # No charset set. The HTMLLib parser tries to sniff into the # the file to find a meta header for the charset; if that @@ -700,8 +705,14 @@ def rdf_from_sources(self, names, outputFormat = "turtle", rdfOutput = False) : # the value of rdfOutput determines the reaction on exceptions... for name in names : self.graph_from_source(name, graph, rdfOutput) + retval = graph.serialize(format=outputFormat) - return retval + # Stupid difference between python2 and python3... + if PY3 : + return str(graph.serialize(format=outputFormat), encoding='utf-8') + else : + return graph.serialize(format=outputFormat) + def rdf_from_source(self, name, outputFormat = "turtle", rdfOutput = False) : """ diff --git a/pyRdfa/utils.py b/pyRdfa/utils.py index d9f118f..55c667b 100644 --- a/pyRdfa/utils.py +++ b/pyRdfa/utils.py @@ -82,7 +82,7 @@ def __init__(self, name, additional_headers = {}) : import requests # Switching off the verification is not cool. But, at least for now, too many - # sites still go wrong because the cerficates are not o.k. with request... + # sites still go wrong because the certificates are not o.k. with request... r = requests.get(url, headers=additional_headers, verify=False) self.data = r.content self.headers = r.headers diff --git a/scripts/localRDFa.py b/scripts/localRDFa.py index 3be7d4a..f461f45 100755 --- a/scripts/localRDFa.py +++ b/scripts/localRDFa.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python +#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Run the pyRdfa package locally, ie, on a local file @@ -49,7 +49,7 @@ """ def usage() : - print usageText % sys.argv[0] + print(usageText % sys.argv[0]) format = "turtle" extras = [] @@ -133,6 +133,8 @@ def usage() : processor = pyRdfa(options, base) if len(value) >= 1 : - print processor.rdf_from_sources(value, outputFormat = format, rdfOutput = rdfOutput) + print(processor.rdf_from_sources(value, outputFormat = format, rdfOutput = rdfOutput)) else : - print processor.rdf_from_source(sys.stdin, outputFormat = format, rdfOutput = rdfOutput) + print(processor.rdf_from_source(sys.stdin, outputFormat = format, rdfOutput = rdfOutput)) + + From 03080caae96d74341ae87fb2bb49a4b232321728 Mon Sep 17 00:00:00 2001 From: Ivan Herman Date: Wed, 22 Jan 2020 17:29:29 +0100 Subject: [PATCH 2/6] Syntactically upgraded the CGI script; version bumped --- pyRdfa/__init__.py | 4 +- scripts/CGI_RDFa.py | 162 ++++++++++++++++++++++++++++++-------------- 2 files changed, 114 insertions(+), 52 deletions(-) diff --git a/pyRdfa/__init__.py b/pyRdfa/__init__.py index 635753f..905df43 100644 --- a/pyRdfa/__init__.py +++ b/pyRdfa/__init__.py @@ -141,7 +141,7 @@ @summary: RDFa parser (distiller) -@requires: Python version 2.5 or up; 2.7 is preferred +@requires: Python version 2.8 or python 3.8 or up @requires: U{RDFLib}; version 3.X is preferred. @requires: U{html5lib} for the HTML5 parsing (note that version 1.0b1 and 1.0b2 should be avoided, it may lead to unicode encoding problems) @requires: U{httpheader}; however, a small modification had to make on the original file, so for this reason and to make distribution easier this module (single file) is added to the package. @@ -156,7 +156,7 @@ @var uri_schemes: List of registered (or widely used) URI schemes; used for warnings... """ -__version__ = "3.5.3" +__version__ = "4.0.0" __author__ = 'Ivan Herman' __contact__ = 'Ivan Herman, ivan@w3.org' __license__ = 'W3C® SOFTWARE NOTICE AND LICENSE, http://www.w3.org/Consortium/Legal/2002/copyright-software-20021231' diff --git a/scripts/CGI_RDFa.py b/scripts/CGI_RDFa.py index 0fbf0bb..10658fb 100755 --- a/scripts/CGI_RDFa.py +++ b/scripts/CGI_RDFa.py @@ -1,4 +1,4 @@ -#!/usr/bin/python2.5 +#!/usr/local/bin/python3 # -*- coding: utf-8 -*- # Maintainer: Ivan Herman @@ -15,30 +15,78 @@ """ """ -$Id: RDFa.py,v 1.9 2012/03/12 11:06:47 ivan Exp $ +$Id: RDFa.py,v 1.27 2018/05/23 08:57:19 carcone Exp $ """ -__version__ = "3.0" +__version__ = "4.0.0" import cgi -import cgitb; cgitb.enable() +import cgitb import sys, os -import StringIO +#import StringIO #cgi.print_environ() if sys.platform == "darwin" : # this is my local machine - sys.path.insert(0,"/Users/ivan/W3C/dev/2004/PythonLib-IH") - sys.path.insert(0,"/Users/ivan/Library/Python") - sys.path.insert(0,"/Users/ivan/W3C/dev/2004/PythonLib-IH/rdfa-1.1") + sys.path.insert(0,'/Users/ivan/Library/Python') + sys.path.insert(0,'/Users/ivan/Library/Python/RDFa') os.environ['PyRdfaCacheDir'] = '/Users/ivan/.pyrdfa-cache' + cgitb.enable() + else : - # this is the server on W3C - sys.path.insert(0,"/usr/local/lib/python2.4/site-packages/PythonLib-IH") - sys.path.insert(0,"/usr/local/lib/python2.4/site-packages/PythonLib-IH/rdfa-1.1") + # This will have to be updated for the Python3 installation!!! + # webencodings pip3 should also be done!!! + sys.path.insert(0,"/usr/lib/python2.7/dist-packages") + sys.path.insert(0,'/home/ivan/lib/python') os.environ['PyRdfaCacheDir'] = '/usr/local/apache/cgi/cgi-bin-other/RDFa/data-local' + cgitb.enable(display=0, logdir="/home/nobody/tracebacks/") from pyRdfa import processURI, RDFaError +# Register the RDFa JSON-LD serializer; for some reasons installing via pip did not work +from rdflib.plugin import register, Serializer +register('json', Serializer, 'rdflib_jsonld.serializer', 'JsonLDSerializer') + +def err_message(msg) : + from cleanhtml import clean_print + print('Content-type: text/html; charset=utf-8') + print('Status: 400 Invalid Input') + print() + print("") + print("") + print("Error in RDFa processing") + print("") + print("

Error in distilling RDFa

") + print("

") + clean_print("pyRdfa cannot process this URI: %s", uri) + print("

") + if len(msg) != 0 : + print("

") + clean_print(msg) + print("

") + print("") + print("") + sys.exit(1) + + +def brett_test(uri) : + + if not sys.platform == "darwin" : + from checkremote import check_url_safety, UnsupportedResourceError + from urllib2 import HTTPError, URLError + try: + check_url_safety(uri) + except HTTPError as e: + err_message('HTTP Error with the error code: %s and the error message: "%s"' (e.code, e.reason)) + except URLError as e: + err_message('URL Error with the error message: "%s"' % e.reason) + except UnsupportedResourceError as e: + msg = e.args[0] + ": " + e.args[1] + err_message('Unsupported Resource Error with the error message "%s"' % msg) + except Exception as e: + l = len(e.args) + msg = "" if l == 0 else (e.args[0] if l == 1 else e.args) + err_message('Exception raised: "%s"' % msg) + # # to make this thing exist... uri = "" @@ -50,53 +98,67 @@ uri = "text:" else : if not "uri" in form : - print 'Content-type: text/html; charset=utf-8' - print 'Status: 400 Invalid Input' - print - print "" - print "" - print "Error in RDFa processing" - print "" - print "

Error in distilling RDFa

" - print "No URI has been specified" - print "" - print "" + print('Content-type: text/html; charset=utf-8') + print('Status: 400 Invalid Input') + print() + print("") + print("") + print("Error in RDFa processing") + print("") + print("

Error in distilling RDFa

") + print("

No URI has been specified

") + print("") + print("") sys.exit(1) try : - #uri = form["uri"].value uri = form.getfirst("uri") except : - print 'Content-type: text/html; charset=utf-8' - print 'Status: 400 Invalid Input' - print - print "" - print "" - print "Error in RDFa processing" - print "" - print "

Error in distilling RDFa

" - print "No URI has been specified" - print "" - print "" + print('Content-type: text/html; charset=utf-8') + print('Status: 400 Invalid Input') + print() + print("") + print("") + print("Error in RDFa processing") + print("") + print("

Error in distilling RDFa

") + print("

No URI has been specified

") + print("") + print("") sys.exit(1) if "validate" in form : from rdfavalidator import validateURI - print 'Content-Type: text/html; charset=utf-8' - print - print validateURI(uri, form) + if not (uri == 'text:' or uri == 'uploaded:') : + brett_test(uri) + print('Content-Type: text/html; charset=utf-8') + print() + print(validateURI(uri, form)) else : - # Thanks to Sergio and Diego for the idea and code for the referer branch - if uri == "referer" : - uri = os.getenv('HTTP_REFERER') - newuri = "http://www.w3.org/2012/pyRdfa/extract?uri=" + uri - print "Status: 302 Moved" - print "Location: " + newuri - print - else : - if "format" in form.keys() : - format = form.getfirst("format") + try : + # Thanks to Sergio and Diego for the idea and code for the referer branch + if uri == "referer" : + uri = os.getenv('HTTP_REFERER') + if uri is None: + newuri = "http://www.w3.org/2012/pyRdfa/no_referer.html" + else: + brett_test(uri) + newuri = "http://www.w3.org/2012/pyRdfa/extract?uri=" + uri + print("Status: 307 Moved Temporarily") + print("Location: " + newuri) + print() else : - format = "turtle" - retval = processURI(uri, format, form) - print retval + # last point of check: use Brett's script to check the validity of the URI + if not (uri == 'text:' or uri == 'uploaded:') : + brett_test(uri) + + if "format" in form.keys() : + format = form.getfirst("format") + else : + format = "turtle" + retval = processURI(uri, format, form) + print(retval) + except Exception as e : + l = len(e.args) + msg = "" if l == 0 else (e.args[0] if l == 1 else e.args) + err_message('Exception raised: "%s"' % msg) From f2b11f91dfc34d231f89e15ba6c685b361cbeaa8 Mon Sep 17 00:00:00 2001 From: Ivan Herman Date: Thu, 23 Jan 2020 18:16:07 +0100 Subject: [PATCH 3/6] Forgot to remove a serialization (was done twice, unnecessarily) --- pyRdfa/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pyRdfa/__init__.py b/pyRdfa/__init__.py index 905df43..cd0bb71 100644 --- a/pyRdfa/__init__.py +++ b/pyRdfa/__init__.py @@ -706,7 +706,6 @@ def rdf_from_sources(self, names, outputFormat = "turtle", rdfOutput = False) : for name in names : self.graph_from_source(name, graph, rdfOutput) - retval = graph.serialize(format=outputFormat) # Stupid difference between python2 and python3... if PY3 : return str(graph.serialize(format=outputFormat), encoding='utf-8') From 980545454d32a6578047e749fcc66bacd0742918 Mon Sep 17 00:00:00 2001 From: Ivan Herman Date: Fri, 24 Jan 2020 15:52:10 +0100 Subject: [PATCH 4/6] Regenerated the documentation --- Doc-pyRdfa/class-tree.html | 2 +- Doc-pyRdfa/help.html | 2 +- Doc-pyRdfa/identifier-index.html | 2 +- Doc-pyRdfa/module-tree.html | 2 +- Doc-pyRdfa/pyRdfa-module.html | 6 +- Doc-pyRdfa/pyRdfa-pysrc.html | 679 +++++++++--------- Doc-pyRdfa/pyRdfa.FailedSource-class.html | 2 +- Doc-pyRdfa/pyRdfa.HTTPError-class.html | 2 +- Doc-pyRdfa/pyRdfa.ProcessingError-class.html | 2 +- Doc-pyRdfa/pyRdfa.RDFaError-class.html | 2 +- Doc-pyRdfa/pyRdfa.embeddedRDF-module.html | 2 +- Doc-pyRdfa/pyRdfa.embeddedRDF-pysrc.html | 2 +- Doc-pyRdfa/pyRdfa.host-module.html | 15 +- Doc-pyRdfa/pyRdfa.host-pysrc.html | 397 +++++----- .../pyRdfa.host.HostLanguage-class.html | 2 +- Doc-pyRdfa/pyRdfa.host.MediaTypes-class.html | 2 +- Doc-pyRdfa/pyRdfa.host.atom-module.html | 2 +- Doc-pyRdfa/pyRdfa.host.atom-pysrc.html | 2 +- Doc-pyRdfa/pyRdfa.host.html5-module.html | 2 +- Doc-pyRdfa/pyRdfa.host.html5-pysrc.html | 2 +- Doc-pyRdfa/pyRdfa.initialcontext-module.html | 2 +- Doc-pyRdfa/pyRdfa.initialcontext-pysrc.html | 93 +-- .../pyRdfa.initialcontext.Wrapper-class.html | 2 +- Doc-pyRdfa/pyRdfa.options-module.html | 2 +- Doc-pyRdfa/pyRdfa.options-pysrc.html | 2 +- Doc-pyRdfa/pyRdfa.options.Options-class.html | 2 +- .../pyRdfa.options.ProcessorGraph-class.html | 2 +- Doc-pyRdfa/pyRdfa.parse-module.html | 2 +- Doc-pyRdfa/pyRdfa.parse-pysrc.html | 2 +- Doc-pyRdfa/pyRdfa.property-module.html | 2 +- Doc-pyRdfa/pyRdfa.property-pysrc.html | 2 +- ...pyRdfa.property.ProcessProperty-class.html | 2 +- Doc-pyRdfa/pyRdfa.pyRdfa-class.html | 2 +- Doc-pyRdfa/pyRdfa.pyRdfaError-class.html | 2 +- Doc-pyRdfa/pyRdfa.rdflibparsers-module.html | 2 +- Doc-pyRdfa/pyRdfa.rdflibparsers-pysrc.html | 2 +- .../pyRdfa.rdflibparsers.HTurtle-class.html | 2 +- ...dfa.rdflibparsers.HTurtleParser-class.html | 2 +- ...Rdfa.rdflibparsers.RDFa10Parser-class.html | 2 +- ...pyRdfa.rdflibparsers.RDFaParser-class.html | 2 +- ...libparsers.StructuredDataParser-class.html | 2 +- Doc-pyRdfa/pyRdfa.rdfs-module.html | 16 +- Doc-pyRdfa/pyRdfa.rdfs-pysrc.html | 2 +- Doc-pyRdfa/pyRdfa.rdfs.cache-module.html | 2 +- Doc-pyRdfa/pyRdfa.rdfs.cache-pysrc.html | 2 +- .../pyRdfa.rdfs.cache.CachedVocab-class.html | 2 +- ...dfa.rdfs.cache.CachedVocabIndex-class.html | 2 +- Doc-pyRdfa/pyRdfa.rdfs.process-module.html | 2 +- Doc-pyRdfa/pyRdfa.rdfs.process-pysrc.html | 2 +- .../pyRdfa.rdfs.process.MiniOWL-class.html | 2 +- Doc-pyRdfa/pyRdfa.state-module.html | 4 +- Doc-pyRdfa/pyRdfa.state-pysrc.html | 2 +- .../pyRdfa.state.ExecutionContext-class.html | 2 +- .../pyRdfa.state.ListStructure-class.html | 2 +- Doc-pyRdfa/pyRdfa.termorcurie-module.html | 6 +- Doc-pyRdfa/pyRdfa.termorcurie-pysrc.html | 2 +- ...Rdfa.termorcurie.InitialContext-class.html | 2 +- .../pyRdfa.termorcurie.TermOrCurie-class.html | 2 +- Doc-pyRdfa/pyRdfa.transform-module.html | 2 +- Doc-pyRdfa/pyRdfa.transform-pysrc.html | 2 +- .../pyRdfa.transform.DublinCore-module.html | 2 +- .../pyRdfa.transform.DublinCore-pysrc.html | 2 +- .../pyRdfa.transform.OpenID-module.html | 2 +- Doc-pyRdfa/pyRdfa.transform.OpenID-pysrc.html | 2 +- Doc-pyRdfa/pyRdfa.transform.lite-module.html | 2 +- Doc-pyRdfa/pyRdfa.transform.lite-pysrc.html | 2 +- .../pyRdfa.transform.metaname-module.html | 2 +- .../pyRdfa.transform.metaname-pysrc.html | 2 +- .../pyRdfa.transform.prototype-module.html | 2 +- .../pyRdfa.transform.prototype-pysrc.html | 2 +- Doc-pyRdfa/pyRdfa.utils-module.html | 3 +- Doc-pyRdfa/pyRdfa.utils-pysrc.html | 6 +- Doc-pyRdfa/pyRdfa.utils.URIOpener-class.html | 2 +- Doc-pyRdfa/pyRdfaExtras-module.html | 2 +- Doc-pyRdfa/pyRdfaExtras-pysrc.html | 2 +- Doc-pyRdfa/pyRdfaExtras.MyGraph-class.html | 3 +- pyRdfa/host/__init__.py | 3 +- 77 files changed, 691 insertions(+), 670 deletions(-) diff --git a/Doc-pyRdfa/class-tree.html b/Doc-pyRdfa/class-tree.html index 18e006f..94a676d 100644 --- a/Doc-pyRdfa/class-tree.html +++ b/Doc-pyRdfa/class-tree.html @@ -201,7 +201,7 @@

Class Hierarchy