diff --git a/doc/source/appendix/ontologies.rst b/doc/source/appendix/ontologies.rst index 6951e708..cc57d88f 100644 --- a/doc/source/appendix/ontologies.rst +++ b/doc/source/appendix/ontologies.rst @@ -38,15 +38,11 @@ What is the minimum attribute requirement for OntologyTerm in GA4GH? Conceptually (and consistent with the metadata branch) -:ontologyId: - required and implemented as URI - we assume this resolves to a meaningful document, e.g. http://purl.obolibrary.org/obo/SO_0000147 +:termId: + required and implemented as CURIE + we assume this resolves to a meaningful document, e.g. http://purl.obolibrary.org/obo/SO_0000147, using a prefix mapper, e.g. SO: <=> http://purl.obolibrary.org/obo/SO_ :term: preferred but not required (e.g. ‘exon’); corresponds to class label -:sourceName: - not required since should be resolved from prefix etc., but supporting/fall-back in case of non-standard/deprecated/entropic annotations; possible use for CURIEs in compact sequence ontology implementations (e.g. SO:0000147) -:sourceVersion: - not required but good practice; if no explicit versioning, ISO8601 formatted data of retrieval should be used Ontology Selection and Overlap @@ -84,122 +80,82 @@ Examples Genotypic sex ============= -:ontologyId: - "http://purl.obolibrary.org/obo/PATO_0020001", +:termId: + "PATO:0020001", :term: "male genotypic sex" , -:sourceName: - "PATO Phenotypic quality", + Sequence Ontology ================= -:ontologyId: - "http://purl.obolibrary.org/obo/SO_0001583", +:termId: + "SO:0001583", :term: "missense_variant", -:sourceName: - "Sequence Ontology", -:sourceVersion: - "release_2.5.3" + Human Phenotype ontology ======================== -:ontologyId: - "http://purl.obolibrary.org/obo/HP_0000819", +:termId: + "HP:0000819", :term: "Diabetes mellitus", -:sourceName: - "human_phenotype Ontology", -:sourceVersion: - "release_Jan2016*" + ---- -:ontologyId: - "http://www.ebi.ac.uk/efo/HP_0012059", +:termId: + "HP:0012059", :term: "Lentigo maligna melanoma", -:sourceName: - "human_phenotype_ontology", -:sourceVersion: - "2016-01-14” + Body part (Uberon) ================== -:ontologyId: - "http://www.ebi.ac.uk/efo/UBERON_0003403", +:termId: + "UBERON:0003403", :term: "skin of forearm", -:sourceName: - "uberon", -:sourceVersion: - "2015-11-23” Human disease ontology ====================== -:ontologyId: - "http://purl.obolibrary.org/obo/DOID_9351", +:termId: + "DOID:9351", :term: "diabetes mellitus", -:sourceName: - "disease_ontology", -:sourceVersion: - "2016-01-25" Experimental factor ontology ============================ -:ontologyId: - "http://purl.obolibrary.org/obo/EFO_0000400", +:termId: + "EFO:0000400", :term: "diabetes mellitus", -:sourceName: - "experimental_factor_ontology", -:sourceVersion: - "V2.68” + ---- -:ontologyId: - "http://www.ebi.ac.uk/efo/EFO_0004422", +:termId: + "EFO:0004422", :term: "exome", -:sourceName: - "Experimental Factor Ontology", -:sourceVersion: - "release_2.68" -SNOMEDCT representation of ICD-O 3 Cancer Histology -=================================================== - -:ontologyId: - "http://purl.bioontology.org/ontology/SNMI/M-94703“ -:term: - "Medulloblastoma, NOS” -:sourceName: - "SNOMED CT model component” -:sourceVersion: - "2016-01-28" - Unit Ontology ============= -:ontologyId: - "http://purl.obolibrary.org/obo/UO_0000016", +:termId: + "UO:0000016", :term: "millimetre", -:sourceName: - "Unit Ontology", -:sourceVersion: - "2015-12-17" + diff --git a/python/ga4gh/schemas/ga4gh/metadata_pb2.py b/python/ga4gh/schemas/ga4gh/metadata_pb2.py index 906fd6f2..cc68a1a5 100644 --- a/python/ga4gh/schemas/ga4gh/metadata_pb2.py +++ b/python/ga4gh/schemas/ga4gh/metadata_pb2.py @@ -20,7 +20,7 @@ name='ga4gh/schemas/ga4gh/metadata.proto', package='ga4gh.schemas.ga4gh', syntax='proto3', - serialized_pb=_b('\n\"ga4gh/schemas/ga4gh/metadata.proto\x12\x13ga4gh.schemas.ga4gh\x1a\x1cgoogle/protobuf/struct.proto\"U\n\x0cOntologyTerm\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04term\x18\x02 \x01(\t\x12\x13\n\x0bsource_name\x18\x03 \x01(\t\x12\x16\n\x0esource_version\x18\x04 \x01(\t\"\xb7\x01\n\x07\x44\x61taset\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x13\n\x0b\x64\x65scription\x18\x03 \x01(\t\x12\x34\n\x04info\x18\x04 \x03(\x0b\x32&.ga4gh.schemas.ga4gh.Dataset.InfoEntry\x1aG\n\tInfoEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12)\n\x05value\x18\x02 \x01(\x0b\x32\x1a.google.protobuf.ListValue:\x02\x38\x01\"c\n\x07Program\x12\x14\n\x0c\x63ommand_line\x18\x01 \x01(\t\x12\n\n\x02id\x18\x02 \x01(\t\x12\x0c\n\x04name\x18\x03 \x01(\t\x12\x17\n\x0fprev_program_id\x18\x04 \x01(\t\x12\x0f\n\x07version\x18\x05 \x01(\tb\x06proto3') + serialized_pb=_b('\n\"ga4gh/schemas/ga4gh/metadata.proto\x12\x13ga4gh.schemas.ga4gh\x1a\x1cgoogle/protobuf/struct.proto\"-\n\x0cOntologyTerm\x12\x0f\n\x07term_id\x18\x01 \x01(\t\x12\x0c\n\x04term\x18\x02 \x01(\t\"\xb7\x01\n\x07\x44\x61taset\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x13\n\x0b\x64\x65scription\x18\x03 \x01(\t\x12\x34\n\x04info\x18\x04 \x03(\x0b\x32&.ga4gh.schemas.ga4gh.Dataset.InfoEntry\x1aG\n\tInfoEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12)\n\x05value\x18\x02 \x01(\x0b\x32\x1a.google.protobuf.ListValue:\x02\x38\x01\"c\n\x07Program\x12\x14\n\x0c\x63ommand_line\x18\x01 \x01(\t\x12\n\n\x02id\x18\x02 \x01(\t\x12\x0c\n\x04name\x18\x03 \x01(\t\x12\x17\n\x0fprev_program_id\x18\x04 \x01(\t\x12\x0f\n\x07version\x18\x05 \x01(\tb\x06proto3') , dependencies=[google_dot_protobuf_dot_struct__pb2.DESCRIPTOR,]) _sym_db.RegisterFileDescriptor(DESCRIPTOR) @@ -36,7 +36,7 @@ containing_type=None, fields=[ _descriptor.FieldDescriptor( - name='id', full_name='ga4gh.schemas.ga4gh.OntologyTerm.id', index=0, + name='term_id', full_name='ga4gh.schemas.ga4gh.OntologyTerm.term_id', index=0, number=1, type=9, cpp_type=9, label=1, has_default_value=False, default_value=_b("").decode('utf-8'), message_type=None, enum_type=None, containing_type=None, @@ -49,20 +49,6 @@ message_type=None, enum_type=None, containing_type=None, is_extension=False, extension_scope=None, options=None), - _descriptor.FieldDescriptor( - name='source_name', full_name='ga4gh.schemas.ga4gh.OntologyTerm.source_name', index=2, - number=3, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='source_version', full_name='ga4gh.schemas.ga4gh.OntologyTerm.source_version', index=3, - number=4, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), ], extensions=[ ], @@ -76,7 +62,7 @@ oneofs=[ ], serialized_start=89, - serialized_end=174, + serialized_end=134, ) @@ -113,8 +99,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=289, - serialized_end=360, + serialized_start=249, + serialized_end=320, ) _DATASET = _descriptor.Descriptor( @@ -164,8 +150,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=177, - serialized_end=360, + serialized_start=137, + serialized_end=320, ) @@ -223,8 +209,8 @@ extension_ranges=[], oneofs=[ ], - serialized_start=362, - serialized_end=461, + serialized_start=322, + serialized_end=421, ) _DATASET_INFOENTRY.fields_by_name['value'].message_type = google_dot_protobuf_dot_struct__pb2._LISTVALUE diff --git a/src/main/proto/ga4gh/metadata.proto b/src/main/proto/ga4gh/metadata.proto index cdf65795..73800369 100644 --- a/src/main/proto/ga4gh/metadata.proto +++ b/src/main/proto/ga4gh/metadata.proto @@ -7,25 +7,15 @@ import "google/protobuf/struct.proto"; // An ontology term describing an attribute. (e.g. the phenotype attribute // 'polydactyly' from HPO) message OntologyTerm { - // Ontology source identifier - the identifier, a CURIE (preferred) or PURL - // for an ontology source e.g. http://purl.obolibrary.org/obo/hp.obo It + // Ontology term identifier - the CURIE for an ontology term. It // differs from the standard GA4GH schema's :ref:`id ` - // in that it is a URI pointing to an information resource outside of the + // in that it is a CURIE pointing to an information resource outside of the // scope of the schema or its resource implementation. - string id = 1; + string term_id = 1; - // Ontology term - the representation the id is pointing to. + // Ontology term - the label of the ontology term the termId is pointing to. string term = 2; - // Ontology source name - the name of ontology from which the term is obtained - // e.g. 'Human Phenotype Ontology' - string source_name = 3; - - // Ontology source version - the version of the ontology from which the - // OntologyTerm is obtained; e.g. 2.6.1. There is no standard for ontology - // versioning and some frequently released ontologies may use a datestamp, or - // build number. - string source_version = 4; } // A Dataset is a collection of related data of multiple types.