Skip to content

Commit

Permalink
Handle text values in the reference column beyond DOIs
Browse files Browse the repository at this point in the history
  • Loading branch information
johardi committed Aug 30, 2024
1 parent 079af45 commit a4dfd69
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 24 deletions.
13 changes: 9 additions & 4 deletions schemas/src/digital-objects/asct-b.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ classes:
{% for s in has_characterizing_marker_set %}
SubClassOf(
{% for r in s.references %}
Annotation(dct:reference {{r}})
Annotation(dct:reference "{{r}}")
{% endfor %}
{{id}}
ObjectSomeValuesFrom(
Expand Down Expand Up @@ -105,6 +105,9 @@ classes:
slots:
- members
- references
slot_usage:
references:
range: string

AsctbRecord:
mixins:
Expand All @@ -122,6 +125,7 @@ classes:
- references
slot_usage:
references:
range: string
annotations:
owl: AnnotationAssertion, AnnotationProperty

Expand Down Expand Up @@ -168,6 +172,7 @@ classes:
- derived_from
slot_usage:
references:
range: string
annotations:
owl: AnnotationAssertion, AnnotationProperty
derived_from:
Expand Down Expand Up @@ -310,23 +315,23 @@ slots:
annotations:
owl: AnnotationAssertion, AnnotationProperty
anatomical_structure_list:
required: true
required: false
multivalued: true
inlined_as_list: true
range: AnatomicalStructureInstance
slot_uri: ccf:anatomical_structure
annotations:
owl: AnnotationAssertion, AnnotationProperty
cell_type_list:
required: true
required: false
multivalued: true
inlined_as_list: true
range: CellTypeInstance
slot_uri: ccf:cell_type
annotations:
owl: AnnotationAssertion, AnnotationProperty
gene_marker_list:
required: true
required: false
multivalued: true
inlined_as_list: true
range: BiomarkerInstance
Expand Down
46 changes: 26 additions & 20 deletions src/normalization/normalize-asct-b.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import {
isCtIdValid,
isIdValid,
normalizeDoi,
normalizeString
} from './patches.js';
import { normalizeMetadata, readMetadata, writeNormalizedData, writeNormalizedMetadata } from './utils.js';

Expand Down Expand Up @@ -155,8 +156,8 @@ function normalizeCtData(context, data) {
// Get the references
const references = row.references
.map((ref) => {
const refId = checkNotEmpty(ref.id) ? ref.id : "N/A";
return checkIsDoi(refId) ? normalizeDoi(refId) : refId;
const refString = checkNotEmpty(ref.id) ? ref.id : "N/A";
return checkIsDoi(refString) ? normalizeDoi(refString) : normalizeString(refString);
});

// Get the last cell type as the primary cell
Expand Down Expand Up @@ -283,8 +284,8 @@ function normalizeAsctbRecord(context, data) {
// Populate all valid references
const references = row.references
.map((ref) => {
const refId = checkNotEmpty(ref.id) ? ref.id : "N/A";
return checkIsDoi(refId) ? normalizeDoi(refId) : refId;
const refString = checkNotEmpty(ref.id) ? ref.id : "N/A";
return checkIsDoi(refString) ? normalizeDoi(refString) : normalizeString(refString);
});

// Collect all the items
Expand Down Expand Up @@ -337,28 +338,31 @@ function normalizeCellMarkerDescriptor(context, data) {
// Populate all valid references
const references = row.references
.map((ref) => {
const refId = checkNotEmpty(ref.id) ? ref.id : "N/A";
return checkIsDoi(refId) ? normalizeDoi(refId) : refId;
const refString = checkNotEmpty(ref.id) ? ref.id : "N/A";
return checkIsDoi(refString) ? normalizeDoi(refString) : normalizeString(refString);
});

// Collect all the items
collector.push({
id: generateCellMarkerDescriptorId(context, recordNumber),
label: `Cell marker descriptor for ${primaryCt.name}`,
type_of: [`CellMarkerDescriptor`],
primary_cell_type: primaryCt.id,
primary_anatomical_structure: primaryAs,
biomarker_set: biomarkers,
references: references,
derived_from: generateAsctbRecordId(context, recordNumber)
});
// Collect all the items if the components are complete.
if (primaryAs && primaryCt && biomarkers) {
collector.push({
id: generateCellMarkerDescriptorId(context, recordNumber),
label: `Cell marker descriptor for ${primaryCt.name}`,
type_of: [`CellMarkerDescriptor`],
primary_cell_type: primaryCt.id,
primary_anatomical_structure: primaryAs,
biomarker_set: biomarkers,
references: references,
derived_from: generateAsctbRecordId(context, recordNumber)
});
}
return collector;
}, []);
}

function generateAsInstance(context, recordNumber, data, index) {
const { name: doName } = context.selectedDigitalObject
const { id, name: asName } = data;
const { id, name } = data;
const asName = normalizeString(name);
const orderNumber = index + 1
return {
id: generateAsInstanceId(context, recordNumber, orderNumber),
Expand All @@ -373,7 +377,8 @@ function generateAsInstance(context, recordNumber, data, index) {

function generateCtInstance(context, recordNumber, data, index) {
const { name: doName } = context.selectedDigitalObject
const { id, name: ctName } = data;
const { id, name } = data;
const ctName = normalizeString(name);
const orderNumber = index + 1
return {
id: generateCtInstanceId(context, recordNumber, orderNumber),
Expand All @@ -388,7 +393,8 @@ function generateCtInstance(context, recordNumber, data, index) {

function generateBmInstance(context, recordNumber, data, index) {
const { name: doName } = context.selectedDigitalObject
const { id, name: bmName, b_type } = data;
const { id, name, b_type } = data;
const bmName = normalizeString(name);
const orderNumber = index + 1
return {
id: generateBmInstanceId(context, recordNumber, orderNumber),
Expand Down
7 changes: 7 additions & 0 deletions src/normalization/patches.js
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,13 @@ export function normalizeDoi(doi) {
return normDoi;
}

export function normalizeString(str) {
return str.replace(/"/g, "'") // Replace all double quotes with single quotes
.replace(/\r/g, "") // Remove carriage return characters
.replace(/\s+/g, " ") // Replace multiple spaces (including leading/trailing spaces) with a single space
.trim();
}

export function isIdValid(id) {
return /(UBERON|FMA|CL|PCL|LMHA|HGNC):\d+|https\:\/\/purl.org\/ccf\/ASCTB\-TEMP\_[a-zA-Z0-9\-]+/.test(id);
}
Expand Down

0 comments on commit a4dfd69

Please sign in to comment.