Skip to content

Commit

Permalink
Merge pull request #4 from xgaia/dev
Browse files Browse the repository at this point in the history
Abstractor 4.0.0
  • Loading branch information
xgaia authored Jun 25, 2020
2 parents 12f0ad5 + 1d9377f commit 3c3c9ea
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 39 deletions.
33 changes: 30 additions & 3 deletions abstractor
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#! /usr/bin/python3

import logging
import argparse
from libabstractor.SparqlQuery import SparqlQuery
from libabstractor.QueryLibrary import QueryLibrary
Expand All @@ -19,35 +20,61 @@ class Abstractor(object):
parser.add_argument("-s", "--source", type=str, help="RDF data source (SPARQL endpoint url or path to RDF file)", required=True)
parser.add_argument("-t", "--source-type", choices=['sparql', 'xml', 'turtle', 'nt'], help="Source format", default="sparql")

parser.add_argument("--askomics-prefix", type=str, help="AskOmics prefix", default="http://www.semanticweb.org/user/ontologies/2018/1#")
parser.add_argument("--askomics-internal-namespace", type=str, help="AskOmics internal namespace", default="http://askomics.org/internal/")

parser.add_argument("-o", "--output", type=str, help="Output file", default="abstraction.rdf")
parser.add_argument("-f", "--output-format", choices=['xml', 'turtle', 'nt'], help="RDF format", default="turtle")
parser.add_argument("--owl", default=False, action='store_true', help="Use OWL ontology")

parser.add_argument("-v", "--verbosity", action="count", help="increase output verbosity")

self.args = parser.parse_args()

logging_level = logging.CRITICAL
if self.args.verbosity == 1:
logging_level = logging.ERROR
if self.args.verbosity == 2:
logging_level = logging.WARNING
if self.args.verbosity == 3:
logging_level = logging.INFO
if self.args.verbosity > 3:
logging_level = logging.DEBUG

logging.basicConfig(level=logging_level)

def main(self):
"""main"""
sparql = SparqlQuery(self.args.source, self.args.source_type, self.args.askomics_prefix)
sparql = SparqlQuery(self.args.source, self.args.source_type)
library = QueryLibrary()

rdf = RdfGraph(self.args.askomics_prefix)
rdf = RdfGraph(self.args.askomics_internal_namespace)

if self.args.source_type == "sparql":
rdf.add_location(self.args.source)

# Use owl ontology
if self.args.owl:
logging.debug("Use OWL Ontology")
result = sparql.process_query(library.ontologies)
for res in result:
logging.debug(res["ontology"])
logging.debug("Get entities and relation")
rdf.add_entities_and_relations(sparql.process_query(library.entities_and_relations_with_ontology(res["ontology"])))
logging.debug("Get decimal attributes")
rdf.add_decimal_attributes(sparql.process_query(library.entities_and_numeric_attributes_with_ontology(res["ontology"])))
logging.debug("Get text attributes")
rdf.add_text_attributes(sparql.process_query(library.entities_and_text_attributes_with_ontology(res["ontology"])))

# All relations
else:
logging.debug("Get entities and relation")
rdf.add_entities_and_relations(sparql.process_query(library.entities_and_relations))
logging.debug("Get decimal attributes")
rdf.add_decimal_attributes(sparql.process_query(library.entities_and_numeric_attributes))
logging.debug("Get text attributes")
rdf.add_text_attributes(sparql.process_query(library.entities_and_text_attributes))

logging.debug("Write RDF ({}) into {}".format(self.args.output_format, self.args.output))
rdf.graph.serialize(destination=self.args.output, format=self.args.output_format, encoding="utf-8" if self.args.output_format == "turtle" else None)


Expand Down
63 changes: 36 additions & 27 deletions libabstractor/RdfGraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,19 @@ class RdfGraph(object):
The RDF graph
"""

def __init__(self, askomics_prefix):
def __init__(self, namespace_internal):
"""init
Parameters
----------
askomics_prefix : str
AskOmics prefix
namespace_internal : str
AskOmics internal namespace
"""
self.gprefix = rdflib.namespace.Namespace(askomics_prefix)
self.namespace_internal = rdflib.namespace.Namespace(namespace_internal)
self.graph = rdflib.Graph()

self.graph.bind('askomics', askomics_prefix)
self.graph.bind('askomics', namespace_internal)
self.prov = rdflib.Namespace('http://www.w3.org/ns/prov#')

def check_entity(self, entity):
"""Check if entity is correct (not rdf rdfs owl or virtuoso thing)
Expand All @@ -39,18 +40,25 @@ def check_entity(self, entity):
bool
True if entity is a true one
"""
excluded_prefixes = (
"http://www.w3.org/2002/07/owl#",
"http://www.w3.org/1999/02/22-rdf-syntax-ns#",
"http://www.w3.org/2000/01/rdf-schema#",
"http://www.openlinksw.com/schemas/virtrdf#",
"http://www.w3.org/ns/sparql-service-description#"
excluded_namespaces = (
"http://www.w3.org",
"http://www.openlinksw.com"
)

if entity.lower().startswith(excluded_prefixes):
if entity.lower().startswith(excluded_namespaces):
return False
return True

def add_location(self, location):
"""Add location of the data
Parameters
----------
location : str
URL of distant endpoint
"""
self.graph.add((rdflib.BNode("graph"), self.prov.atLocation, rdflib.Literal(location)))

def add_entities_and_relations(self, sparql_result):
"""Add entities and relation in the rdf graph
Expand All @@ -70,25 +78,25 @@ def add_entities_and_relations(self, sparql_result):
# Source entity
if self.check_entity(source_entity) and source_entity not in entities:
entities.append(source_entity)
self.graph.add((rdflib.URIRef(source_entity), rdflib.RDF.type, self.gprefix["entity"]))
self.graph.add((rdflib.URIRef(source_entity), rdflib.RDF.type, self.gprefix["startPoint"]))
self.graph.add((rdflib.URIRef(source_entity), rdflib.RDF.type, self.namespace_internal["entity"]))
self.graph.add((rdflib.URIRef(source_entity), rdflib.RDF.type, self.namespace_internal["startPoint"]))
self.graph.add((rdflib.URIRef(source_entity), rdflib.RDF.type, rdflib.OWL.Class))
self.graph.add((rdflib.URIRef(source_entity), self.gprefix["instancesHaveNoLabels"], rdflib.Literal(True)))
self.graph.add((rdflib.URIRef(source_entity), self.namespace_internal["instancesHaveNoLabels"], rdflib.Literal(True)))
self.graph.add((rdflib.URIRef(source_entity), rdflib.RDFS.label, rdflib.Literal(self.get_label(source_entity))))

# Target entity
if self.check_entity(target_entity) and target_entity not in entities:
entities.append(target_entity)
self.graph.add((rdflib.URIRef(target_entity), rdflib.RDF.type, self.gprefix["entity"]))
self.graph.add((rdflib.URIRef(target_entity), rdflib.RDF.type, self.gprefix["startPoint"]))
self.graph.add((rdflib.URIRef(target_entity), rdflib.RDF.type, self.namespace_internal["entity"]))
self.graph.add((rdflib.URIRef(target_entity), rdflib.RDF.type, self.namespace_internal["startPoint"]))
self.graph.add((rdflib.URIRef(target_entity), rdflib.RDF.type, rdflib.OWL.Class))
self.graph.add((rdflib.URIRef(target_entity), self.gprefix["instancesHaveNoLabels"], rdflib.Literal(True)))
self.graph.add((rdflib.URIRef(target_entity), self.namespace_internal["instancesHaveNoLabels"], rdflib.Literal(True)))
self.graph.add((rdflib.URIRef(target_entity), rdflib.RDFS.label, rdflib.Literal(self.get_label(target_entity))))

# Relation
if self.check_entity(relation):
self.graph.add((rdflib.URIRef(relation), rdflib.RDF.type, rdflib.OWL.ObjectProperty))
self.graph.add((rdflib.URIRef(relation), rdflib.RDF.type, self.gprefix["AskomicsRelation"]))
self.graph.add((rdflib.URIRef(relation), rdflib.RDF.type, self.namespace_internal["AskomicsRelation"]))
self.graph.add((rdflib.URIRef(relation), rdflib.RDFS.label, rdflib.Literal(self.get_label(relation))))
self.graph.add((rdflib.URIRef(relation), rdflib.RDFS.domain, rdflib.URIRef(source_entity)))
self.graph.add((rdflib.URIRef(relation), rdflib.RDFS.range, rdflib.URIRef(target_entity)))
Expand All @@ -105,8 +113,7 @@ def add_decimal_attributes(self, sparql_result):
entity = result["entity"]
attribute = result["attribute"]

if self.check_entity(entity) and self.check_entity(attribute):
"""<FRESHLY_INSERTED>"""
if self.check_entity(entity):
self.graph.add((rdflib.URIRef(attribute), rdflib.RDF.type, rdflib.OWL.DatatypeProperty))
self.graph.add((rdflib.URIRef(attribute), rdflib.RDFS.label, rdflib.Literal(self.get_label(attribute))))
self.graph.add((rdflib.URIRef(attribute), rdflib.RDFS.domain, rdflib.URIRef(entity)))
Expand All @@ -124,12 +131,14 @@ def add_text_attributes(self, sparql_result):
entity = result["entity"]
attribute = result["attribute"]

if self.check_entity(entity) and self.check_entity(attribute):

self.graph.add((rdflib.URIRef(attribute), rdflib.RDF.type, rdflib.OWL.DatatypeProperty))
self.graph.add((rdflib.URIRef(attribute), rdflib.RDFS.label, rdflib.Literal(self.get_label(attribute))))
self.graph.add((rdflib.URIRef(attribute), rdflib.RDFS.domain, rdflib.URIRef(entity)))
self.graph.add((rdflib.URIRef(attribute), rdflib.RDFS.range, rdflib.XSD.string))
if self.check_entity(entity):
if attribute == "http://www.w3.org/2000/01/rdf-schema#label":
self.graph.remove((rdflib.URIRef(entity), self.namespace_internal["instancesHaveNoLabels"], rdflib.Literal(True)))
else:
self.graph.add((rdflib.URIRef(attribute), rdflib.RDF.type, rdflib.OWL.DatatypeProperty))
self.graph.add((rdflib.URIRef(attribute), rdflib.RDFS.label, rdflib.Literal(self.get_label(attribute))))
self.graph.add((rdflib.URIRef(attribute), rdflib.RDFS.domain, rdflib.URIRef(entity)))
self.graph.add((rdflib.URIRef(attribute), rdflib.RDFS.range, rdflib.XSD.string))

def get_label(self, uri):
"""Get a label from an URI
Expand Down
11 changes: 4 additions & 7 deletions libabstractor/SparqlQuery.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import rdflib
from SPARQLWrapper import SPARQLWrapper, JSON

Expand All @@ -19,7 +20,7 @@ class SparqlQuery(object):
Description
"""

def __init__(self, source, source_type, prefix):
def __init__(self, source, source_type):
"""Init
Parameters
Expand All @@ -28,12 +29,9 @@ def __init__(self, source, source_type, prefix):
Description
source_type : TYPE
Description
prefix : string
Prefix URI
"""
self.source = source
self.source_type = source_type
self.prefix = prefix
self.prefixes = {
"owl:": "http://www.w3.org/2002/07/owl#",
"rdf:": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
Expand All @@ -43,8 +41,7 @@ def __init__(self, source, source_type, prefix):
"xsd:": "http://www.w3.org/2001/XMLSchema#",
"skos:": "http://www.w3.org/2004/02/skos/core#",
"chebi:": "http://purl.obolibrary.org/obo/",
"drugbankdrugs:": "http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/drugs/",
"askomics:": self.prefix
"drugbankdrugs:": "http://wifo5-04.informatik.uni-mannheim.de/drugbank/resource/drugs/"
}

# if source is a file, load it in a rdflib graph
Expand Down Expand Up @@ -178,7 +175,7 @@ def process_query(self, query):
Parsed results
"""
# prefixed_query = self.get_sparl_prefix() + query
# print(query)
logging.debug(query)
if self.source_type == "sparql":
return self.parse_sparql_results(self.execute_sparql_query(query))
else:
Expand Down
4 changes: 2 additions & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@

setup(
name='abstractor',
version='2.0.0',
version='4.0.0',
description='Abstraction generator for AskOmics, from a distant SPARQL endpoint',
author='Xavier Garnier',
author_email='[email protected]',
url='https://github.com/askomics/abstractor',
download_url='https://github.com/askomics/abstractor/archive/2.0.0.tar.gz',
download_url='https://github.com/askomics/abstractor/archive/4.0.0.tar.gz',
install_requires=['SPARQLWrapper'],
packages=find_packages(),
license='AGPL',
Expand Down

0 comments on commit 3c3c9ea

Please sign in to comment.