From 972bf8b88edb228382e00be062a7360b587feca7 Mon Sep 17 00:00:00 2001 From: Josef Hardi Date: Thu, 14 Nov 2024 15:55:43 -0800 Subject: [PATCH] Update the AsctbRecord model with proper naming --- schemas/src/digital-objects/asct-b.yaml | 143 +++++++++++++----------- src/normalization/normalize-asct-b.js | 126 +++++++++++---------- 2 files changed, 146 insertions(+), 123 deletions(-) diff --git a/schemas/src/digital-objects/asct-b.yaml b/schemas/src/digital-objects/asct-b.yaml index f9c1d1d..11d29ee 100644 --- a/schemas/src/digital-objects/asct-b.yaml +++ b/schemas/src/digital-objects/asct-b.yaml @@ -116,48 +116,49 @@ classes: - Instance slots: - record_number - - anatomical_structure_list - - cell_type_list - - gene_marker_list - - protein_marker_list - - lipid_marker_list - - metabolites_marker_list - - proteoforms_marker_list - - references - slot_usage: - references: - range: string - annotations: - owl: AnnotationAssertion, AnnotationProperty + - as_records + - ct_records + - bg_records + - bp_records + - bl_records + - bm_records + - bf_records + - ref_records - AnatomicalStructureInstance: + AnatomicalStructureRecord: mixins: - Named - Instance slots: - - ccf_pref_label - - source_concept + - concept_value - record_number - order_number - CellTypeInstance: + CellTypeRecord: mixins: - Named - Instance slots: - - ccf_pref_label - - source_concept + - concept_value - record_number - order_number - BiomarkerInstance: + BiomarkerRecord: mixins: - Named - Instance slots: - - ccf_pref_label + - concept_value - ccf_biomarker_type - - source_concept + - record_number + - order_number + + ReferenceRecord: + mixins: + - Named + - Instance + slots: + - string_value - record_number - order_number @@ -338,14 +339,18 @@ slots: slot_uri: ccf:ccf_biomarker_type annotations: owl: AnnotationAssertion, AnnotationProperty - source_concept: - title: source concept - description: >- - Source concept providing the origin of this record within the - ASCT-B model. - required: true - slot_uri: ccf:source_concept + concept_value: + title: concept value + description: Reference to a concept by its standard URI. + slot_uri: ccf:concept_value range: AsctbConcept + required: true + annotations: + owl: AnnotationAssertion, AnnotationProperty + string_value: + title: string value + description: A string-typed value. + slot_uri: ccf:string_value annotations: owl: AnnotationAssertion, AnnotationProperty record_number: @@ -364,74 +369,84 @@ slots: range: integer annotations: owl: AnnotationAssertion, AnnotationProperty - anatomical_structure_list: - title: anatomical structure - description: List of anatomical structure instances related to ASCT+B records. + as_records: + title: anatomical structure record + description: List of anatomical structures in an ASCT+B record instance. + slot_uri: ccf:as_record + range: AnatomicalStructureRecord + required: false + multivalued: true + inlined_as_list: true + annotations: + owl: AnnotationAssertion, AnnotationProperty + ct_records: + title: cell type record + description: List of cell types in an ASCT+B record instance. + slot_uri: ccf:ct_record + range: CellTypeRecord required: false multivalued: true inlined_as_list: true - range: AnatomicalStructureInstance - slot_uri: ccf:anatomical_structure annotations: owl: AnnotationAssertion, AnnotationProperty - cell_type_list: - title: cell type - description: List of cell type instances associated with ASCT+B records. + bg_records: + title: gene marker record + description: List of gene markers in an ASCT+B record instance. + slot_uri: ccf:bg_record + range: BiomarkerRecord required: false multivalued: true inlined_as_list: true - range: CellTypeInstance - slot_uri: ccf:cell_type annotations: owl: AnnotationAssertion, AnnotationProperty - gene_marker_list: - title: gene marker - description: List of gene markers referenced within ASCT+B records. + bp_records: + title: protein marker record + description: List of protein markers in an ASCT+B record instance. + slot_uri: ccf:bp_record + range: BiomarkerRecord required: false multivalued: true inlined_as_list: true - range: BiomarkerInstance - slot_uri: ccf:gene_marker annotations: owl: AnnotationAssertion, AnnotationProperty - protein_marker_list: - title: protein marker - description: List of protein markers referenced within ASCT+B records. + bl_records: + title: lipid marker record + description: List of lipid markers in an ASCT+B record instance. + slot_uri: ccf:bl_record + range: BiomarkerRecord required: false multivalued: true inlined_as_list: true - range: BiomarkerInstance - slot_uri: ccf:protein_marker annotations: owl: AnnotationAssertion, AnnotationProperty - lipid_marker_list: - title: lipid marker - description: List of lipid markers referenced within ASCT+B records. + bm_records: + title: metabolite marker record + description: List of metabolites markers in an ASCT+B record instance. + slot_uri: ccf:bm_record + range: BiomarkerRecord required: false multivalued: true inlined_as_list: true - range: BiomarkerInstance - slot_uri: ccf:lipid_marker annotations: owl: AnnotationAssertion, AnnotationProperty - metabolites_marker_list: - title: metabolite marker - description: List of metabolites markers referenced within ASCT+B records. + bf_records: + title: proteoform marker record + description: List of proteoforms markers in an ASCT+B record instance. + slot_uri: ccf:bf_record + range: BiomarkerRecord required: false multivalued: true inlined_as_list: true - range: BiomarkerInstance - slot_uri: ccf:metabolite_marker annotations: owl: AnnotationAssertion, AnnotationProperty - proteoforms_marker_list: - title: proteoform marker - description: List of proteoforms markers referenced within ASCT-B records. + ref_records: + title: reference record + description: List of references in an ASCT+B record instance. + slot_uri: ccf:ref_record + range: ReferenceRecord required: false multivalued: true inlined_as_list: true - range: BiomarkerInstance - slot_uri: ccf:proteoform_marker annotations: owl: AnnotationAssertion, AnnotationProperty targeted_cell_type: diff --git a/src/normalization/normalize-asct-b.js b/src/normalization/normalize-asct-b.js index b234631..3138c92 100644 --- a/src/normalization/normalize-asct-b.js +++ b/src/normalization/normalize-asct-b.js @@ -265,46 +265,45 @@ function normalizeBm(collector, { id: bm_id, name: bm_name, b_type, is_provision } function normalizeAsctbRecord(context, data) { + const { name } = context.selectedDigitalObject + const tableName = capitalize(name); return data.reduce((collector, row, index) => { // Determine record number const recordNumber = index + 1; // Generate anatomical structure instances - const asInstances = row.anatomical_structures - .map((item, order) => generateAsInstance(context, recordNumber, item, order)) - .filter(({ source_concept }) => passAsIdFilterCriteria(context, source_concept)); + const asRecords = row.anatomical_structures + .map((item, order) => generateAsRecord(context, tableName, recordNumber, item, order)) + .filter(({ concept_value }) => passAsIdFilterCriteria(context, concept_value)); // Generate cell type instances - const ctInstances = row.cell_types - .map((item, order) => generateCtInstance(context, recordNumber, item, order)) - .filter(({ source_concept }) => passCtIdFilterCriteria(context, source_concept)); + const ctRecords = row.cell_types + .map((item, order) => generateCtRecord(context, tableName, recordNumber, item, order)) + .filter(({ concept_value }) => passCtIdFilterCriteria(context, concept_value)); // Generate biomarker instances - const bmInstances = row.biomarkers - .map((item, order) => generateBmInstance(context, recordNumber, item, order)) - .filter(({ source_concept }) => passIdFilterCriteria(context, source_concept)); + const bmRecords = row.biomarkers + .map((item, order) => generateBmRecord(context, tableName, recordNumber, item, order)) + .filter(({ concept_value }) => passIdFilterCriteria(context, concept_value)); // Populate all valid references - const references = row.references - .map((ref) => { - const refString = checkNotEmpty(ref.id) ? ref.id : "N/A"; - return checkIsDoi(refString) ? normalizeDoi(refString) : normalizeString(refString); - }); + const refRecords = row.references + .map((item, order) => generateRefRecord(context, tableName, recordNumber, item, order)); // Collect all the items collector.push({ id: generateAsctbRecordId(context, recordNumber), - label: `Record ${recordNumber}`, + label: `${tableName} table, Record ${recordNumber}`, type_of: [`AsctbRecord`], record_number: recordNumber, - anatomical_structure_list: asInstances, - cell_type_list: ctInstances, - gene_marker_list: bmInstances.filter(({ ccf_biomarker_type }) => ccf_biomarker_type === BM_TYPE.G), - protein_marker_list: bmInstances.filter(({ ccf_biomarker_type }) => ccf_biomarker_type === BM_TYPE.P), - lipid_marker_list: bmInstances.filter(({ ccf_biomarker_type }) => ccf_biomarker_type === BM_TYPE.BL), - metabolites_marker_list: bmInstances.filter(({ ccf_biomarker_type }) => ccf_biomarker_type === BM_TYPE.BM), - proteoforms_marker_list: bmInstances.filter(({ ccf_biomarker_type }) => ccf_biomarker_type === BM_TYPE.BF), - references: references + as_records: asRecords, + ct_records: ctRecords, + bg_records: bmRecords.filter(({ ccf_biomarker_type }) => ccf_biomarker_type === BM_TYPE.G), + bp_records: bmRecords.filter(({ ccf_biomarker_type }) => ccf_biomarker_type === BM_TYPE.P), + bl_records: bmRecords.filter(({ ccf_biomarker_type }) => ccf_biomarker_type === BM_TYPE.BL), + bm_records: bmRecords.filter(({ ccf_biomarker_type }) => ccf_biomarker_type === BM_TYPE.BM), + bf_records: bmRecords.filter(({ ccf_biomarker_type }) => ccf_biomarker_type === BM_TYPE.BF), + ref_records: refRecords }); return collector; }, []); @@ -362,73 +361,78 @@ function normalizeCellMarkerDescriptor(context, data) { }, []); } -function generateAsInstance(context, recordNumber, data, index) { - const { name: doName } = context.selectedDigitalObject +function generateAsRecord(context, tableName, recordNumber, data, index) { const { id, name } = data; const asName = normalizeString(name); - const orderNumber = index + 1 + const orderNumber = index + 1; + const columnName = "AS"; return { - id: generateAsInstanceId(context, recordNumber, orderNumber), - label: `${asName} (${doName}-R${recordNumber}-AS${orderNumber})`, - type_of: [generateIdWhenEmpty(id, asName)], - ccf_pref_label: asName, - source_concept: generateIdWhenEmpty(id, asName), + id: generateAsctbColumnRecordId(context, recordNumber, orderNumber, columnName), + label: `${tableName} table, Record ${recordNumber}, Column ${columnName}/${orderNumber}`, + type_of: ['AnatomicalStructureRecord'], + concept_value: generateIdWhenEmpty(id, asName), record_number: recordNumber, order_number: orderNumber } } -function generateCtInstance(context, recordNumber, data, index) { - const { name: doName } = context.selectedDigitalObject +function generateCtRecord(context, tableName, recordNumber, data, index) { const { id, name } = data; const ctName = normalizeString(name); - const orderNumber = index + 1 + const orderNumber = index + 1; + const columnName = "CT"; return { - id: generateCtInstanceId(context, recordNumber, orderNumber), - label: `${ctName} (${doName}-R${recordNumber}-CT${orderNumber})`, - type_of: [generateIdWhenEmpty(id, ctName)], - ccf_pref_label: ctName, - source_concept: generateIdWhenEmpty(id, ctName), + id: generateAsctbColumnRecordId(context, recordNumber, orderNumber, columnName), + label: `${tableName} table, Record ${recordNumber}, Column ${columnName}/${orderNumber}`, + type_of: ['CellTypeRecord'], + concept_value: generateIdWhenEmpty(id, ctName), record_number: recordNumber, order_number: orderNumber } } -function generateBmInstance(context, recordNumber, data, index) { - const { name: doName } = context.selectedDigitalObject +function generateBmRecord(context, tableName, recordNumber, data, index) { const { id, name, b_type } = data; const bmName = normalizeString(name); - const orderNumber = index + 1 + const orderNumber = index + 1; + const columnName = "BM"; return { - id: generateBmInstanceId(context, recordNumber, orderNumber), - label: `${bmName} (${doName}-R${recordNumber}-BM${orderNumber})`, - type_of: [generateIdWhenEmpty(id, bmName)], - ccf_pref_label: bmName, + id: generateAsctbColumnRecordId(context, recordNumber, orderNumber, columnName), + label: `${tableName} table, Record ${recordNumber}, Column ${columnName}/${orderNumber}`, + type_of: ['BiomarkerRecord'], ccf_biomarker_type: b_type, - source_concept: generateIdWhenEmpty(id, bmName), + concept_value: generateIdWhenEmpty(id, bmName), record_number: recordNumber, order_number: orderNumber } } -function generateAsctbRecordId(context, recordNumber) { - const { type: doType, name: doName, version: doVersion } = context.selectedDigitalObject; - return `${context.purlIri}${doType}/${doName}/${doVersion}#R${recordNumber}`; -} - -function generateAsInstanceId(context, recordNumber, orderNumber) { - const { type: doType, name: doName, version: doVersion } = context.selectedDigitalObject; - return `${context.purlIri}${doType}/${doName}/${doVersion}#R${recordNumber}-AS${orderNumber}`; +function generateRefRecord(context, tableName, recordNumber, data, index) { + const { id } = data; + const refString = checkNotEmpty(id) ? id : "N/A"; + const normalizeRefString = checkIsDoi(refString) + ? normalizeDoi(refString) + : normalizeString(refString); + const orderNumber = index + 1 + const columnName = "REF"; + return { + id: generateAsctbColumnRecordId(context, recordNumber, orderNumber, columnName), + label: `${tableName} table, Record ${recordNumber}, Column ${columnName}/${orderNumber}`, + type_of: ['ReferenceRecord'], + string_value: normalizeRefString, + record_number: recordNumber, + order_number: orderNumber + } } -function generateCtInstanceId(context, recordNumber, orderNumber) { +function generateAsctbRecordId(context, recordNumber) { const { type: doType, name: doName, version: doVersion } = context.selectedDigitalObject; - return `${context.purlIri}${doType}/${doName}/${doVersion}#R${recordNumber}-CT${orderNumber}`; + return `${context.purlIri}${doType}/${doName}/${doVersion}#R${recordNumber}`; } -function generateBmInstanceId(context, recordNumber, orderNumber) { +function generateAsctbColumnRecordId(context, recordNumber, orderNumber, columnName) { const { type: doType, name: doName, version: doVersion } = context.selectedDigitalObject; - return `${context.purlIri}${doType}/${doName}/${doVersion}#R${recordNumber}-BM${orderNumber}`; + return `${context.purlIri}${doType}/${doName}/${doVersion}#R${recordNumber}-${columnName}${orderNumber}`; } function generateCellMarkerDescriptorId(context, recordNumber) { @@ -444,6 +448,10 @@ function generateId(name) { return `https://purl.org/ccf/ASCTB-TEMP_${normalizeName(name)}`; } +function capitalize(word) { + return word.replace(/^\w/, (c) => c.toUpperCase()); +} + function normalizeName(name) { return name .toLowerCase()