Skip to content

Commit

Permalink
Needs more clean up but made sure trims didn't error out on undefined
Browse files Browse the repository at this point in the history
  • Loading branch information
bherr2 committed Jun 4, 2024
1 parent cbca2cb commit e3e5534
Showing 1 changed file with 93 additions and 46 deletions.
139 changes: 93 additions & 46 deletions src/normalization/normalize-omap.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,50 +55,52 @@ function normalizeData(context, data) {
}

function normalizeAntibodyData(context, data) {
return data.map((row, idx, arr) => {
const obj = new ObjectBuilder()
.append('id', getAntibodyIri(row.rrid))
.append('parent_class', 'ccf:Antibody')
.append('host', row.host)
.append('isotype', row.isotype)
.append('clonality', row.clonality)
.append('clone_id', `${row.clone_id}`)
.append('conjugate', row.conjugate)
.append('fluorescent', row.fluorescent_reporter)
.append('recombinant', row.recombinant)
.append('producer', row.vendor)
.append('catalog_number', `${row.catalog_number}`)
.append('rationale', row.rationale)
.build();
if (row.HGNC_ID) {
obj['antibody_type'] = "Primary";
obj['detects'] = split(row.HGNC_ID).map((text) => normalizeProteinId(text));
} else {
const prevRow = arr[idx - 1];
obj['antibody_type'] = "Secondary";
obj['binds_to'] = getAntibodyIri(prevRow.rrid);
}
return obj;
}).reduce(mergeDuplicateAntibodyData, []);
return data
.map((row, idx, arr) => {
const obj = new ObjectBuilder()
.append('id', getAntibodyIri(row.rrid))
.append('parent_class', 'ccf:Antibody')
.append('host', row.host)
.append('isotype', row.isotype)
.append('clonality', row.clonality)
.append('clone_id', `${row.clone_id}`)
.append('conjugate', row.conjugate)
.append('fluorescent', row.fluorescent_reporter)
.append('recombinant', row.recombinant)
.append('producer', row.vendor)
.append('catalog_number', `${row.catalog_number}`)
.append('rationale', row.rationale)
.build();
if (row.HGNC_ID) {
obj['antibody_type'] = 'Primary';
obj['detects'] = split(row.HGNC_ID).map((text) => normalizeProteinId(text));
} else {
const prevRow = arr[idx - 1];
obj['antibody_type'] = 'Secondary';
obj['binds_to'] = getAntibodyIri(prevRow.rrid);
}
return obj;
})
.reduce(mergeDuplicateAntibodyData, []);
}

function mergeDuplicateAntibodyData(acc, item) {
// Find if the item with the same id already exists in the output array
let existingItem = acc.find(el => el.id === item.id);
let existingItem = acc.find((el) => el.id === item.id);
if (!existingItem) {
// Create new structure
const { detects, binds_to, rationale, ...rest } = item;
existingItem = rest;
acc.push(existingItem);
}
// Add detects or binds_to to the existing structure
if (item.antibody_type === "Primary") {
if (item.antibody_type === 'Primary') {
if (!('detects' in existingItem)) {
existingItem['detects'] = [];
}
existingItem.detects.push({ protein_id: item.detects, rationale: item.rationale });
}
if (item.antibody_type === "Secondary") {
if (item.antibody_type === 'Secondary') {
if (!('binds_to' in existingItem)) {
existingItem['binds_to'] = [];
}
Expand Down Expand Up @@ -142,15 +144,34 @@ function normalizeExperimentCycleData(context, data) {
'uses_antibody',
data
.filter((row) => row.cycle_number === cycleNumber)
.filter((row, idx, arr) => arr.findIndex(
obj => obj.rrid === row.rrid
&& obj.lot_number === row.lot_number
&& obj.dilution === row.dilution
&& obj.concentration_value === row.concentration_value) === idx) // remove duplicates
.filter(
(row, idx, arr) =>
arr.findIndex(
(obj) =>
obj.rrid === row.rrid &&
obj.lot_number === row.lot_number &&
obj.dilution === row.dilution &&
obj.concentration_value === row.concentration_value
) === idx
) // remove duplicates
.map((row) => {
return new ObjectBuilder()
.append('id', getDilutedAntibodyInstanceIri(context, omapId, cycleNumber, row.rrid, row.lot_number, row.dilution, row.concentration_value))
.append('label', getDilutedAntibodyInstanceLabel(row.rrid, row.lot_number, row.dilution, row.concentration_value))
.append(
'id',
getDilutedAntibodyInstanceIri(
context,
omapId,
cycleNumber,
row.rrid,
row.lot_number,
row.dilution,
row.concentration_value
)
)
.append(
'label',
getDilutedAntibodyInstanceLabel(row.rrid, row.lot_number, row.dilution, row.concentration_value)
)
.append('type_of', ['ExperimentUsedAntibody'])
.append('concentration', row.concentration_value)
.append('dilution', row.dilution)
Expand Down Expand Up @@ -179,7 +200,17 @@ function normalizeCoreAntibodyPanelData(context, data) {
const omapId = data[0].omap_id;
const antibodyComponents = data
.filter((row) => row.core_panel === 'Y')
.map((row) => getDilutedAntibodyInstanceIri(context, omapId, data.cycle_number, row.rrid, row.lot_number, row.dilution, row.concentration_value));
.map((row) =>
getDilutedAntibodyInstanceIri(
context,
omapId,
data.cycle_number,
row.rrid,
row.lot_number,
row.dilution,
row.concentration_value
)
);
return new ObjectBuilder()
.append('id', getCoreAntibodyPanelIri(context, omapId))
.append('label', getCoreAntibodyPanelLabel(omapId))
Expand All @@ -194,41 +225,54 @@ function getPurl(context) {
}

function getExperimentIri(context, omapId) {
return `${getPurl(context)}#${omapId.trim()}`;
return `${getPurl(context)}#${omapId?.trim() ?? ''}`;
}

function getExperimentCycleIri(context, omapId, cycleNumber) {
return `${getPurl(context)}#${omapId.trim()}-cycle-${cycleNumber}`;
return `${getPurl(context)}#${omapId?.trim() ?? ''}-cycle-${cycleNumber}`;
}

function getDilutedAntibodyInstanceIri(context, omapId, cycleNumber, antibodyId, lotNumber, dilution, concentration) {
const instanceKey = `${omapId}${cycleNumber}${antibodyId.trim()}${lotNumber ? `${lotNumber}`.trim() : 'NotAvailable'}${dilution ? dilution : (concentration ? concentration : 'NotAvailable')}`;
return `${getPurl(context)}#${antibodyId.trim()}-${hash(instanceKey)}`;
const instanceKey = `${omapId}${cycleNumber}${antibodyId?.trim() ?? ''}${
lotNumber ? `${lotNumber}`?.trim() ?? '' : 'NotAvailable'
}${dilution ? dilution : concentration ? concentration : 'NotAvailable'}`;
return `${getPurl(context)}#${antibodyId?.trim() ?? ''}-${hash(instanceKey)}`;
}

function getDilutedAntibodyInstanceLabel(antibodyId, lotNumber, dilution, concentration) {
return `${antibodyId} antibody, ${lotNumber ? `lot number ${lotNumber}` : 'no lot number'}, ${dilution ? `with dilution ${dilution}` : (concentration ? `with concentration ${concentration}` : 'unknown dilution factor or concentration value')}`;
return `${antibodyId} antibody, ${lotNumber ? `lot number ${lotNumber}` : 'no lot number'}, ${
dilution
? `with dilution ${dilution}`
: concentration
? `with concentration ${concentration}`
: 'unknown dilution factor or concentration value'
}`;
}

function getAntibodyInstanceIri(context, antibodyId, lotNumber) {
const instanceKey = `${antibodyId.trim()}${lotNumber ? `${lotNumber}`.trim() : 'NotAvailable'}`;
return `${getPurl(context)}#${antibodyId.trim()}-${hash(instanceKey)}`;
const instanceKey = `${antibodyId?.trim() ?? ''}${lotNumber ? `${lotNumber}`?.trim() ?? '' : 'NotAvailable'}`;
return `${getPurl(context)}#${antibodyId?.trim() ?? ''}-${hash(instanceKey)}`;
}

function getAntibodyInstanceLabel(antibodyId, lotNumber) {
return `${antibodyId} antibody, ${lotNumber ? `lot number ${lotNumber}` : 'no lot number'}`;
}

function getCoreAntibodyPanelIri(context, omapId) {
return `${getPurl(context)}#${omapId.trim()}-core-antibody-panel`;
return `${getPurl(context)}#${omapId?.trim() ?? ''}-core-antibody-panel`;
}

function getCoreAntibodyPanelLabel(omapId) {
return `${omapId} core antibody panel`;
}

function getAntibodyIri(antibodyId) {
return `https://identifiers.org/RRID:${antibodyId.trim()}`;
antibodyId = antibodyId?.trim() ?? '';
if (antibodyId) {
return `https://identifiers.org/RRID:${antibodyId}`;
} else {
return undefined;
}
}

function normalizeProteinId(text) {
Expand All @@ -244,7 +288,10 @@ function removeWhitespaces(s) {

function split(text) {
if (text) {
return text.split(/[\,\;]/).map((s) => s.trim()).filter(s => !!s);
return text
.split(/[\,\;]/)
.map((s) => s.trim())
.filter((s) => !!s);
}
return null;
}

0 comments on commit e3e5534

Please sign in to comment.