Skip to content

Commit

Permalink
Enrich antibody with external data from SciCrunch database
Browse files Browse the repository at this point in the history
  • Loading branch information
johardi committed Feb 24, 2024
1 parent 72d1920 commit 26367eb
Show file tree
Hide file tree
Showing 4 changed files with 129 additions and 1 deletion.
36 changes: 35 additions & 1 deletion src/enrichment/enrich-omap.js
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
import { readFileSync } from 'fs';
import { error } from 'console';
import { resolve } from 'path';
import { getRawData } from '../normalization/normalize-omap.js';
import { info, more } from '../utils/logging.js';
import { RdfBuilder, iri, literal } from '../utils/rdf-builder.js';
import { retrieveAntibody } from '../utils/scicrunch-client.js';
import { convert, merge } from '../utils/robot.js';
import { enrichBasicData } from './enrich-basic.js';
import {
Expand Down Expand Up @@ -29,7 +32,6 @@ export async function enrichOmapData(context) {
} else {
try {
const { selectedDigitalObject: obj } = context;

const normalizedPath = resolve(obj.path, 'normalized/normalized.yaml');
const baseInputPath = resolve(obj.path, 'enriched/base-input.ttl');
convertNormalizedDataToOwl(context, normalizedPath, baseInputPath);
Expand All @@ -39,6 +41,38 @@ export async function enrichOmapData(context) {
const ontologyExtractionPaths = [];
ontologyExtractionPaths.push(baseInputPath); // Set the base input path as the initial

const rridEntitiesPath = collectEntities(context, 'rrid', baseInputPath);
if (!isFileEmpty(rridEntitiesPath)) {
const rridExtractPath = resolve(obj.path, `enriched/rrid-extract.ttl`);
const rrids = readFileSync(rridEntitiesPath).toString()
.split(/[\n\r]/)
.filter((str) => str)
.map((str) => /.*RRID:(?<rrid>.*)/.exec(str).groups.rrid)
.map((str) => str.toLowerCase());
const builder = await retrieveAntibody(rrids).then((response) => {
return response.reduce((accum, item) => {
const antibody = item["_source"]["item"];
const antibodyIri = `http://identifiers.org/rrid/RRID:${antibody.identifier}`;
return accum
.add(
iri(antibodyIri),
iri("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
iri("http://www.w3.org/2002/07/owl#Class"))
.add(
iri(antibodyIri),
iri("http://www.w3.org/2000/01/rdf-schema#label"),
literal(antibody.name))
.add(
iri(antibodyIri),
iri("http://www.w3.org/2000/01/rdf-schema#comment"),
literal(antibody.description));
}, new RdfBuilder(`${context.purlIri}/rrid/antibody`));
});
builder.save(rridExtractPath);
logOutput(rridExtractPath);
ontologyExtractionPaths.push(rridExtractPath);
}

const uberonEntitiesPath = collectEntities(context, 'uberon', baseInputPath);
if (!isFileEmpty(uberonEntitiesPath)) {
info('Extracting UBERON.');
Expand Down
16 changes: 16 additions & 0 deletions src/utils/get-rrid-terms.sparql
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
SELECT DISTINCT ?entity
WHERE {
{
SELECT (?s AS ?entity) WHERE {
?s ?p ?o .
FILTER(STRSTARTS(STR(?s),"http://identifiers.org/rrid/RRID:"))
}
}
UNION
{
SELECT (?o AS ?entity) WHERE {
?s ?p ?o .
FILTER(STRSTARTS(STR(?o),"http://identifiers.org/rrid/RRID:"))
}
}
}
38 changes: 38 additions & 0 deletions src/utils/rdf-builder.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
import { writeFileSync } from 'fs';
import { graph, sym, lit, serialize } from 'rdflib';


export function RdfBuilder(iri) {
const store = graph();
store.add(
sym(iri),
sym("http://www.w3.org/1999/02/22-rdf-syntax-ns#type"),
sym("http://www.w3.org/2002/07/owl#Ontology"));

this.add = function (subject, predicate, object) {
if (subject && predicate && object) {
store.add(subject, predicate, object);
}
return this;
}

this.build = function () {
return serialize(null, store, null, null, (err, result) => {
return result;
})
}

this.save = function(filePath) {
serialize(null, store, null, null, (err, result) => {
writeFileSync(filePath, result);
})
}
}

export function iri(text) {
return sym(text);
}

export function literal(text) {
return lit(text);
}
40 changes: 40 additions & 0 deletions src/utils/scicrunch-client.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import fetch from 'node-fetch';

const API_BASE = "https://api.scicrunch.io/elastic/v1";
const ANTIBODY_INDEX = "RIN_Antibody_pr";
const TOOL_INDEX = "RIN_Tool_pr";

export function retrieveAntibody(rrids) {
const url = `${API_BASE}/${ANTIBODY_INDEX}/_search`;
const options = {
method: "POST",
headers: {
'Content-Type': "application/json",
apikey: process.env.SCICRUNCH_API_KEY
},
body: JSON.stringify(createQuery(rrids))
};
return fetch(url, options).then((response) => {
return response.json().then((data) => {
return data.hits.hits;
});
}).catch((err) => { console.log(err); });
}

function createQuery(rrids) {
return {
size: 1000,
query: {
terms: {
"rrid.curie": rrids
}
},
"_source": {
includes: [
"item.identifier",
"item.name",
"item.description"
]
}
}
}

0 comments on commit 26367eb

Please sign in to comment.