From 26367ebcec9e059f84cff9d4d91b1d17b0ada8fe Mon Sep 17 00:00:00 2001 From: Josef Hardi Date: Fri, 23 Feb 2024 18:42:37 -0800 Subject: [PATCH] Enrich antibody with external data from SciCrunch database --- src/enrichment/enrich-omap.js | 36 ++++++++++++++++++++++++++++- src/utils/get-rrid-terms.sparql | 16 +++++++++++++ src/utils/rdf-builder.js | 38 +++++++++++++++++++++++++++++++ src/utils/scicrunch-client.js | 40 +++++++++++++++++++++++++++++++++ 4 files changed, 129 insertions(+), 1 deletion(-) create mode 100644 src/utils/get-rrid-terms.sparql create mode 100644 src/utils/rdf-builder.js create mode 100644 src/utils/scicrunch-client.js diff --git a/src/enrichment/enrich-omap.js b/src/enrichment/enrich-omap.js index 84c3eb8..e4e3ef5 100644 --- a/src/enrichment/enrich-omap.js +++ b/src/enrichment/enrich-omap.js @@ -1,7 +1,10 @@ +import { readFileSync } from 'fs'; import { error } from 'console'; import { resolve } from 'path'; import { getRawData } from '../normalization/normalize-omap.js'; import { info, more } from '../utils/logging.js'; +import { RdfBuilder, iri, literal } from '../utils/rdf-builder.js'; +import { retrieveAntibody } from '../utils/scicrunch-client.js'; import { convert, merge } from '../utils/robot.js'; import { enrichBasicData } from './enrich-basic.js'; import { @@ -29,7 +32,6 @@ export async function enrichOmapData(context) { } else { try { const { selectedDigitalObject: obj } = context; - const normalizedPath = resolve(obj.path, 'normalized/normalized.yaml'); const baseInputPath = resolve(obj.path, 'enriched/base-input.ttl'); convertNormalizedDataToOwl(context, normalizedPath, baseInputPath); @@ -39,6 +41,38 @@ export async function enrichOmapData(context) { const ontologyExtractionPaths = []; ontologyExtractionPaths.push(baseInputPath); // Set the base input path as the initial + const rridEntitiesPath = collectEntities(context, 'rrid', baseInputPath); + if (!isFileEmpty(rridEntitiesPath)) { + const rridExtractPath = resolve(obj.path, `enriched/rrid-extract.ttl`); + const rrids = readFileSync(rridEntitiesPath).toString() + .split(/[\n\r]/) + .filter((str) => str) + .map((str) => /.*RRID:(?.*)/.exec(str).groups.rrid) + .map((str) => str.toLowerCase()); + const builder = await retrieveAntibody(rrids).then((response) => { + return response.reduce((accum, item) => { + const antibody = item["_source"]["item"]; + const antibodyIri = `http://identifiers.org/rrid/RRID:${antibody.identifier}`; + return accum + .add( + iri(antibodyIri), + iri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), + iri("http://www.w3.org/2002/07/owl#Class")) + .add( + iri(antibodyIri), + iri("http://www.w3.org/2000/01/rdf-schema#label"), + literal(antibody.name)) + .add( + iri(antibodyIri), + iri("http://www.w3.org/2000/01/rdf-schema#comment"), + literal(antibody.description)); + }, new RdfBuilder(`${context.purlIri}/rrid/antibody`)); + }); + builder.save(rridExtractPath); + logOutput(rridExtractPath); + ontologyExtractionPaths.push(rridExtractPath); + } + const uberonEntitiesPath = collectEntities(context, 'uberon', baseInputPath); if (!isFileEmpty(uberonEntitiesPath)) { info('Extracting UBERON.'); diff --git a/src/utils/get-rrid-terms.sparql b/src/utils/get-rrid-terms.sparql new file mode 100644 index 0000000..96a9fe5 --- /dev/null +++ b/src/utils/get-rrid-terms.sparql @@ -0,0 +1,16 @@ +SELECT DISTINCT ?entity +WHERE { + { + SELECT (?s AS ?entity) WHERE { + ?s ?p ?o . + FILTER(STRSTARTS(STR(?s),"http://identifiers.org/rrid/RRID:")) + } + } + UNION + { + SELECT (?o AS ?entity) WHERE { + ?s ?p ?o . + FILTER(STRSTARTS(STR(?o),"http://identifiers.org/rrid/RRID:")) + } + } +} \ No newline at end of file diff --git a/src/utils/rdf-builder.js b/src/utils/rdf-builder.js new file mode 100644 index 0000000..74d342d --- /dev/null +++ b/src/utils/rdf-builder.js @@ -0,0 +1,38 @@ +import { writeFileSync } from 'fs'; +import { graph, sym, lit, serialize } from 'rdflib'; + + +export function RdfBuilder(iri) { + const store = graph(); + store.add( + sym(iri), + sym("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"), + sym("http://www.w3.org/2002/07/owl#Ontology")); + + this.add = function (subject, predicate, object) { + if (subject && predicate && object) { + store.add(subject, predicate, object); + } + return this; + } + + this.build = function () { + return serialize(null, store, null, null, (err, result) => { + return result; + }) + } + + this.save = function(filePath) { + serialize(null, store, null, null, (err, result) => { + writeFileSync(filePath, result); + }) + } +} + +export function iri(text) { + return sym(text); +} + +export function literal(text) { + return lit(text); +} diff --git a/src/utils/scicrunch-client.js b/src/utils/scicrunch-client.js new file mode 100644 index 0000000..a2a0832 --- /dev/null +++ b/src/utils/scicrunch-client.js @@ -0,0 +1,40 @@ +import fetch from 'node-fetch'; + +const API_BASE = "https://api.scicrunch.io/elastic/v1"; +const ANTIBODY_INDEX = "RIN_Antibody_pr"; +const TOOL_INDEX = "RIN_Tool_pr"; + +export function retrieveAntibody(rrids) { + const url = `${API_BASE}/${ANTIBODY_INDEX}/_search`; + const options = { + method: "POST", + headers: { + 'Content-Type': "application/json", + apikey: process.env.SCICRUNCH_API_KEY + }, + body: JSON.stringify(createQuery(rrids)) + }; + return fetch(url, options).then((response) => { + return response.json().then((data) => { + return data.hits.hits; + }); + }).catch((err) => { console.log(err); }); +} + +function createQuery(rrids) { + return { + size: 1000, + query: { + terms: { + "rrid.curie": rrids + } + }, + "_source": { + includes: [ + "item.identifier", + "item.name", + "item.description" + ] + } + } +} \ No newline at end of file