From a01f60cbf93de20db660f97f4683ee26fdb04aaa Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni <gs_bouazzouni@esi.dz> Date: Sat, 24 Dec 2022 04:00:46 +0100 Subject: [PATCH 1/8] add option to run extract_metadata alone without process_rdf --- lib/ontologies_linked_data/models/ontology_submission.rb | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/lib/ontologies_linked_data/models/ontology_submission.rb b/lib/ontologies_linked_data/models/ontology_submission.rb index 25eab8e5..73a9c013 100644 --- a/lib/ontologies_linked_data/models/ontology_submission.rb +++ b/lib/ontologies_linked_data/models/ontology_submission.rb @@ -1311,6 +1311,7 @@ def process_submission(logger, options = {}) # Wrap the whole process so we can email results begin process_rdf = false + extract_metadata = false index_search = false index_properties = false index_commit = false @@ -1321,6 +1322,7 @@ def process_submission(logger, options = {}) if options.empty? process_rdf = true + extract_metadata = true index_search = true index_properties = true index_commit = true @@ -1330,6 +1332,7 @@ def process_submission(logger, options = {}) archive = false else process_rdf = options[:process_rdf] == true ? true : false + extract_metadata = options[:extract_metadata] == true ? true : false index_search = options[:index_search] == true ? true : false index_properties = options[:index_properties] == true ? true : false index_commit = options[:index_commit] == true ? true : false @@ -1397,7 +1400,7 @@ def process_submission(logger, options = {}) remove_submission_status(status) #remove RDF status before starting generate_rdf(logger, reasoning: reasoning) - extract_metadata(logger, options[:params]) + add_submission_status(status) self.save rescue Exception => e @@ -1409,7 +1412,11 @@ def process_submission(logger, options = {}) # If RDF generation fails, no point of continuing raise e end + end + extract_metadata(logger, options[:params]) if extract_metadata || process_rdf + + if process_rdf file_path = self.uploadFilePath callbacks = { missing_labels: { From 539c5b81cd869ad5d5143a696079dc1203dfdc9d Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni <gs_bouazzouni@esi.dz> Date: Sat, 24 Dec 2022 04:01:29 +0100 Subject: [PATCH 2/8] update extract metadata test to handle owl:imports --- test/data/ontology_files/agrooeMappings-05-05-2016.owl | 2 +- test/models/test_ontology_submission.rb | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/data/ontology_files/agrooeMappings-05-05-2016.owl b/test/data/ontology_files/agrooeMappings-05-05-2016.owl index 6e9a0ad7..20f13a2c 100644 --- a/test/data/ontology_files/agrooeMappings-05-05-2016.owl +++ b/test/data/ontology_files/agrooeMappings-05-05-2016.owl @@ -255,7 +255,7 @@ <prov:wasGeneratedBy rdf:resource="http://lirmm.fr/2015/wasGeneratedBy.owl"/> <prov:wasInvalidatedBy rdf:resource="http://lirmm.fr/2015/wasInvalidatedBy.owl"/> - + <owl:imports rdf:resource="http://www.w3.org/2004/02/skos/core"/> </rdf:Description> <!-- DESCRIPTION OF RESOURCES INVOLVED IN METADATA DESCRIPTION --> diff --git a/test/models/test_ontology_submission.rb b/test/models/test_ontology_submission.rb index 823c603c..33ef58fc 100644 --- a/test/models/test_ontology_submission.rb +++ b/test/models/test_ontology_submission.rb @@ -1041,7 +1041,7 @@ def test_submission_extract_metadata assert_equal "Vincent Emonet, Anne Toulet, Benjamine Dessay, Léontine Dessaiterm, Augustine Doap", sub.hasContributor assert_equal [RDF::URI.new("http://lirmm.fr/2015/ontology/door-relation.owl"), RDF::URI.new("http://lirmm.fr/2015/ontology/dc-relation.owl"), RDF::URI.new("http://lirmm.fr/2015/ontology/dcterms-relation.owl"), RDF::URI.new("http://lirmm.fr/2015/ontology/voaf-relation.owl")].sort, sub.ontologyRelatedTo.sort - assert_equal 13, sub.numberOfClasses + assert_equal 18, sub.numberOfClasses end def test_submission_delete_remove_files From 571a6bc50353ab5068cef464def1d8e15eb330aa Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni <gs_bouazzouni@esi.dz> Date: Sun, 1 Jan 2023 18:52:33 +0100 Subject: [PATCH 3/8] auto lint submission metadata extractor --- .../submission_metadata_extractor.rb | 93 +++++++------------ 1 file changed, 35 insertions(+), 58 deletions(-) diff --git a/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb b/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb index a7087d98..d5d5b01f 100644 --- a/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb +++ b/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb @@ -35,7 +35,7 @@ def extract_metadata(logger, user_params) def extract_version query = Goo.sparql_query_client.select(:versionInfo).distinct - .from(self.id) + .from(id) .where([RDF::URI.new('http://bioportal.bioontology.org/ontologies/versionSubject'), RDF::URI.new('http://www.w3.org/2002/07/owl#versionInfo'), :versionInfo]) @@ -46,7 +46,7 @@ def extract_version def extract_ontology_iri query = Goo.sparql_query_client.select(:uri).distinct - .from(self.id) + .from(id) .where([:uri, RDF::URI.new('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'), RDF::URI.new('http://www.w3.org/2002/07/owl#Ontology')]) @@ -54,17 +54,15 @@ def extract_ontology_iri sol[:uri]&.to_s end - private # Extract additional metadata about the ontology # First it extracts the main metadata, then the mapped metadata def extract_ontology_metadata(logger, user_params) user_params = {} if user_params.nil? - ontology_uri = self.uri + ontology_uri = uri logger.info("Extraction metadata from ontology #{ontology_uri}") - # go through all OntologySubmission attributes. Returns symbols LinkedData::Models::OntologySubmission.attributes(:all).each do |attr| # for attribute with the :extractedMetadata setting on, and that have not been defined by the user @@ -152,41 +150,28 @@ def extract_ontology_metadata(logger, user_params) end # Set some metadata to default values if nothing extracted - def set_default_metadata(logger) - if self.identifier.nil? - self.identifier = self.uri.to_s - end + def set_default_metadata - if self.deprecated.nil? - if self.status.eql?('retired') - self.deprecated = true - else - self.deprecated = false - end - end + self.identifier = uri.to_s if identifier.nil? + + self.deprecated = status.eql?('retired') if deprecated.nil? # Add the ontology hasDomain to the submission hasDomain metadata value ontology_domain_list = [] - self.ontology.bring(:hasDomain).hasDomain.each do |domain| + ontology.bring(:hasDomain).hasDomain.each do |domain| ontology_domain_list << domain.id end - if (ontology_domain_list.length > 0 && self.hasDomain.nil?) - self.hasDomain = '' - end - if !self.hasDomain.nil? - self.hasDomain << ontology_domain_list.join(', ') - end + + self.hasDomain = '' if !ontology_domain_list.empty? && hasDomain.nil? + + self.hasDomain << ontology_domain_list.join(', ') unless hasDomain.nil? # Only get the first view because the attribute is not a list - ontology_view = self.ontology.bring(:views).views.first - if (self.hasPart.nil? && !ontology_view.nil?) - self.hasPart = ontology_view.id - end + ontology_view = ontology.bring(:views).views.first + self.hasPart = ontology_view.id if hasPart.nil? && !ontology_view.nil? # If no example identifier extracted: take the first class - if self.exampleIdentifier.nil? - self.exampleIdentifier = LinkedData::Models::Class.in(self).first.id - end + self.exampleIdentifier = LinkedData::Models::Class.in(self).first.id if exampleIdentifier.nil? # Metadata specific to BioPortal that have been removed: #if self.hostedBy.nil? @@ -194,26 +179,26 @@ def set_default_metadata(logger) #end # Add the search endpoint URL - if self.openSearchDescription.nil? - self.openSearchDescription = RDF::URI.new("#{LinkedData.settings.rest_url_prefix}search?ontologies=#{self.ontology.acronym}&q=") + if openSearchDescription.nil? + self.openSearchDescription = RDF::URI.new("#{LinkedData.settings.rest_url_prefix}search?ontologies=#{ontology.acronym}&q=") end # Search allow to search by URI too - if self.uriLookupEndpoint.nil? - self.uriLookupEndpoint = RDF::URI.new("#{LinkedData.settings.rest_url_prefix}search?ontologies=#{self.ontology.acronym}&require_exact_match=true&q=") + if uriLookupEndpoint.nil? + self.uriLookupEndpoint = RDF::URI.new("#{LinkedData.settings.rest_url_prefix}search?ontologies=#{ontology.acronym}&require_exact_match=true&q=") end # Add the dataDump URL - if self.dataDump.nil? - self.dataDump = RDF::URI.new("#{LinkedData.settings.rest_url_prefix}ontologies/#{self.ontology.acronym}/download?download_format=rdf") + if dataDump.nil? + self.dataDump = RDF::URI.new("#{LinkedData.settings.rest_url_prefix}ontologies/#{ontology.acronym}/download?download_format=rdf") end - if self.csvDump.nil? - self.csvDump = RDF::URI.new("#{LinkedData.settings.rest_url_prefix}ontologies/#{self.ontology.acronym}/download?download_format=csv") + if csvDump.nil? + self.csvDump = RDF::URI.new("#{LinkedData.settings.rest_url_prefix}ontologies/#{ontology.acronym}/download?download_format=csv") end # Add the previous submission as a prior version - if self.submissionId > 1 + if submissionId > 1 =begin if prior_versions.nil? prior_versions = [] @@ -223,34 +208,26 @@ def set_default_metadata(logger) prior_versions.push(RDF::URI.new("#{LinkedData.settings.rest_url_prefix}ontologies/#{self.ontology.acronym}/submissions/#{self.submissionId - 1}")) self.hasPriorVersion = prior_versions =end - self.hasPriorVersion = RDF::URI.new("#{LinkedData.settings.rest_url_prefix}ontologies/#{self.ontology.acronym}/submissions/#{self.submissionId - 1}") + self.hasPriorVersion = RDF::URI.new("#{LinkedData.settings.rest_url_prefix}ontologies/#{ontology.acronym}/submissions/#{submissionId - 1}") end - if self.hasOntologyLanguage.umls? + if hasOntologyLanguage.umls? self.hasOntologySyntax = 'http://www.w3.org/ns/formats/Turtle' - elsif self.hasOntologyLanguage.obo? + elsif hasOntologyLanguage.obo? self.hasOntologySyntax = 'http://purl.obolibrary.org/obo/oboformat/spec.html' end # Define default properties for prefLabel, synonyms, definition, author: - if self.hasOntologyLanguage.owl? - if self.prefLabelProperty.nil? - self.prefLabelProperty = Goo.vocabulary(:skos)[:prefLabel] - end - if self.synonymProperty.nil? - self.synonymProperty = Goo.vocabulary(:skos)[:altLabel] - end - if self.definitionProperty.nil? - self.definitionProperty = Goo.vocabulary(:rdfs)[:comment] - end - if self.authorProperty.nil? - self.authorProperty = Goo.vocabulary(:dc)[:creator] - end + if hasOntologyLanguage.owl? + self.prefLabelProperty = Goo.vocabulary(:skos)[:prefLabel] if prefLabelProperty.nil? + self.synonymProperty = Goo.vocabulary(:skos)[:altLabel] if synonymProperty.nil? + self.definitionProperty = Goo.vocabulary(:rdfs)[:comment] if definitionProperty.nil? + self.authorProperty = Goo.vocabulary(:dc)[:creator] if authorProperty.nil? # Add also hierarchyProperty? Could not find any use of it end # Add the sparql endpoint URL - if self.endpoint.nil? && LinkedData.settings.sparql_endpoint_url + if endpoint.nil? && LinkedData.settings.sparql_endpoint_url self.endpoint = RDF::URI.new(LinkedData.settings.sparql_endpoint_url) end @@ -304,7 +281,7 @@ def extract_each_metadata(ontology_uri, attr, prop_to_extract, logger) query_metadata = <<eos SELECT DISTINCT ?extractedObject ?omvname ?omvfirstname ?omvlastname ?rdfslabel -FROM #{self.id.to_ntriples} +FROM #{id.to_ntriples} WHERE { <#{ontology_uri}> #{prop_to_extract} ?extractedObject . OPTIONAL { ?extractedObject omv:name ?omvname } . @@ -313,7 +290,7 @@ def extract_each_metadata(ontology_uri, attr, prop_to_extract, logger) OPTIONAL { ?extractedObject rdfs:label ?rdfslabel } . } eos - Goo.namespaces.each do |prefix,uri| + Goo.namespaces.each do |prefix, uri| query_metadata = "PREFIX #{prefix}: <#{uri}>\n" + query_metadata end From 24fd9122018f587483661f285c08d8b8f9c14a52 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni <gs_bouazzouni@esi.dz> Date: Sun, 1 Jan 2023 18:53:03 +0100 Subject: [PATCH 4/8] remove unused logger in set_default_metadata --- .../ontology_submissions/submission_metadata_extractor.rb | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb b/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb index d5d5b01f..3ea2bb77 100644 --- a/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb +++ b/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb @@ -22,14 +22,12 @@ def extract_metadata(logger, user_params) begin # Set default metadata - set_default_metadata(logger) + set_default_metadata logger.info('Default metadata set.') rescue StandardError => e logger.error("Error while setting default metadata: #{e}") end - - end def extract_version From b918df4a8c769a9a08faaf6fdbcbb774a18f2d53 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni <gs_bouazzouni@esi.dz> Date: Sun, 1 Jan 2023 18:54:48 +0100 Subject: [PATCH 5/8] simplify extract_ontology_metadata by extracting send_value method --- .../submission_metadata_extractor.rb | 118 +++++++----------- 1 file changed, 43 insertions(+), 75 deletions(-) diff --git a/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb b/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb index 3ea2bb77..75a6a2e9 100644 --- a/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb +++ b/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb @@ -64,87 +64,32 @@ def extract_ontology_metadata(logger, user_params) # go through all OntologySubmission attributes. Returns symbols LinkedData::Models::OntologySubmission.attributes(:all).each do |attr| # for attribute with the :extractedMetadata setting on, and that have not been defined by the user - if (LinkedData::Models::OntologySubmission.attribute_settings(attr)[:extractedMetadata]) && !(user_params.has_key?(attr) && !user_params[attr].nil? && !user_params[attr].empty?) - # a boolean to check if a value that should be single have already been extracted - single_extracted = false - - if !LinkedData::Models::OntologySubmission.attribute_settings(attr)[:namespace].nil? - property_to_extract = LinkedData::Models::OntologySubmission.attribute_settings(attr)[:namespace].to_s + ':' + attr.to_s - hash_results = extract_each_metadata(ontology_uri, attr, property_to_extract, logger) - - if (LinkedData::Models::OntologySubmission.attribute_settings(attr)[:enforce].include?(:list)) - # Add the retrieved value(s) to the attribute if the attribute take a list of objects - if self.send(attr.to_s).nil? - metadata_values = [] - else - metadata_values = self.send(attr.to_s).dup - end - hash_results.each do |k,v| - metadata_values.push(v) - end - self.send("#{attr.to_s}=", metadata_values) - elsif (LinkedData::Models::OntologySubmission.attribute_settings(attr)[:enforce].include?(:concatenate)) - # don't keep value from previous submissions for concats - metadata_concat = [] - # if multiple value for this attribute, then we concatenate it. And it's send to the attr after getting all metadataMappings - hash_results.each do |k,v| - metadata_concat << v.to_s - end - else - # If multiple value for a metadata that should have a single value: taking one value randomly (the first in the hash) - hash_results.each do |k,v| - single_extracted = true - self.send("#{attr.to_s}=", v) - break - end - end - end + attr_settings = LinkedData::Models::OntologySubmission.attribute_settings(attr) - # extracts attribute value from metadata mappings - if !LinkedData::Models::OntologySubmission.attribute_settings(attr)[:metadataMappings].nil? + attr_not_excluded = !(user_params.key?(attr) && !user_params[attr].nil? && !user_params[attr].empty?) - LinkedData::Models::OntologySubmission.attribute_settings(attr)[:metadataMappings].each do |mapping| - if single_extracted == true - # if an attribute with only one possible object as already been extracted - break - end - hash_mapping_results = extract_each_metadata(ontology_uri, attr, mapping.to_s, logger) + next unless attr_settings[:extractedMetadata] && attr_not_excluded - if (LinkedData::Models::OntologySubmission.attribute_settings(attr)[:enforce].include?(:list)) - # Add the retrieved value(s) to the attribute if the attribute take a list of objects - if self.send(attr.to_s).nil? - metadata_values = [] - else - metadata_values = self.send(attr.to_s).dup - end - hash_mapping_results.each do |k,v| - metadata_values.push(v) - end - self.send("#{attr.to_s}=", metadata_values) - elsif (LinkedData::Models::OntologySubmission.attribute_settings(attr)[:enforce].include?(:concatenate)) - # if multiple value for this attribute, then we concatenate it - hash_mapping_results.each do |k,v| - metadata_concat << v.to_s - end - else - # If multiple value for a metadata that should have a single value: taking one value randomly (the first in the hash) - hash_mapping_results.each do |k,v| - self.send("#{attr.to_s}=", v) - break - end - end - end - end + # a boolean to check if a value that should be single have already been extracted + single_extracted = false - # Add the concat at the very end, to easily join the content of the array - if (LinkedData::Models::OntologySubmission.attribute_settings(attr)[:enforce].include?(:concatenate)) - if !metadata_concat.empty? - self.send("#{attr.to_s}=", metadata_concat.join(', ')) - end - end + unless attr_settings[:namespace].nil? + property_to_extract = "#{attr_settings[:namespace].to_s}:#{attr.to_s}" + hash_results = extract_each_metadata(ontology_uri, attr, property_to_extract, logger) + single_extracted = send_value(attr, hash_results) unless hash_results.empty? end - end + # extracts attribute value from metadata mappings + attr_settings[:metadataMappings] ||= [] + + attr_settings[:metadataMappings].each do |mapping| + break if single_extracted + + hash_mapping_results = extract_each_metadata(ontology_uri, attr, mapping.to_s, logger) + send_value(attr, hash_mapping_results) unless hash_mapping_results.empty? + end + + end end # Set some metadata to default values if nothing extracted @@ -231,6 +176,29 @@ def set_default_metadata end + def send_value(attr, value) + if enforce?(attr, :list) + # Add the retrieved value(s) to the attribute if the attribute take a list of objects + metadata_values = send(attr.to_s) || [] + metadata_values = metadata_values.dup + + metadata_values.push(*value.values) + + send("#{attr}=", metadata_values) + elsif enforce?(attr, :concatenate) + # if multiple value for this attribute, then we concatenate it + # Add the concat at the very end, to easily join the content of the array + metadata_values = send(attr.to_s) || '' + metadata_values = metadata_values.split(', ') + send("#{attr}=", (metadata_values + value.values.map(&:to_s)).join(', ')) + else + # If multiple value for a metadata that should have a single value: taking one value randomly (the first in the hash) + send("#{attr}=", value.values.first) + return true + end + false + end + # Return a hash with the best literal value for an URI # it selects the literal according to their language: no language > english > french > other languages def select_metadata_literal(metadata_uri, metadata_literal, hash) From 6d912a511f5c6759894a91257037852a3af59929 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni <gs_bouazzouni@esi.dz> Date: Sun, 1 Jan 2023 18:55:48 +0100 Subject: [PATCH 6/8] auto lint select_metadata_literal method --- .../submission_metadata_extractor.rb | 42 +++++++++---------- 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb b/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb index 75a6a2e9..c04d8a5f 100644 --- a/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb +++ b/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb @@ -202,41 +202,41 @@ def send_value(attr, value) # Return a hash with the best literal value for an URI # it selects the literal according to their language: no language > english > french > other languages def select_metadata_literal(metadata_uri, metadata_literal, hash) - if metadata_literal.is_a?(RDF::Literal) - if hash.has_key?(metadata_uri) - if metadata_literal.has_language? - if !hash[metadata_uri].has_language? + return unless metadata_literal.is_a?(RDF::Literal) + + if hash.key?(metadata_uri) + if metadata_literal.has_language? + if !hash[metadata_uri].has_language? + return hash + else + case metadata_literal.language + when :en, :eng + # Take the value with english language over other languages + hash[metadata_uri] = metadata_literal return hash - else - if metadata_literal.language == :en || metadata_literal.language == :eng - # Take the value with english language over other languages - hash[metadata_uri] = metadata_literal + when :fr, :fre + # If no english, take french + if hash[metadata_uri].language == :en || hash[metadata_uri].language == :eng return hash - elsif metadata_literal.language == :fr || metadata_literal.language == :fre - # If no english, take french - if hash[metadata_uri].language == :en || hash[metadata_uri].language == :eng - return hash - else - hash[metadata_uri] = metadata_literal - return hash - end else + hash[metadata_uri] = metadata_literal return hash end + else + return hash end - else - # Take the value with no language in priority (considered as a default) - hash[metadata_uri] = metadata_literal - return hash end else + # Take the value with no language in priority (considered as a default) hash[metadata_uri] = metadata_literal return hash end + else + hash[metadata_uri] = metadata_literal + hash end end - # A function to extract additional metadata # Take the literal data if the property is pointing to a literal # If pointing to an URI: first it takes the "omv:name" of the object pointed by the property, if nil it takes the "rdfs:label". From 6467680fd1d2bd9b1f9f5b5c1c8a36e8aa5acd69 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni <gs_bouazzouni@esi.dz> Date: Sun, 1 Jan 2023 18:58:18 +0100 Subject: [PATCH 7/8] refactor extract_each_metadata by extracting enforce? & find_object_label --- .../submission_metadata_extractor.rb | 91 +++++++++---------- 1 file changed, 43 insertions(+), 48 deletions(-) diff --git a/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb b/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb index c04d8a5f..bb60fa49 100644 --- a/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb +++ b/lib/ontologies_linked_data/concerns/ontology_submissions/submission_metadata_extractor.rb @@ -264,69 +264,64 @@ def extract_each_metadata(ontology_uri, attr, prop_to_extract, logger) # This hash will contain the "literal" metadata for each object (uri or literal) pointed by the metadata predicate hash_results = {} Goo.sparql_query_client.query(query_metadata).each_solution do |sol| - - if LinkedData::Models::OntologySubmission.attribute_settings(attr)[:enforce].include?(:uri) + value = sol[:extractedObject] + if enforce?(attr, :uri) # If the attr is enforced as URI then it directly takes the URI - if sol[:extractedObject].is_a?(RDF::URI) - hash_results[sol[:extractedObject]] = sol[:extractedObject] - end - - elsif LinkedData::Models::OntologySubmission.attribute_settings(attr)[:enforce].include?(:date_time) + hash_results[value] = value if value.is_a?(RDF::URI) + elsif enforce?(attr, :date_time) begin - hash_results[sol[:extractedObject]] = DateTime.iso8601(sol[:extractedObject].to_s) + hash_results[value] = DateTime.iso8601(value.to_s) rescue StandardError => e - logger.error("Impossible to extract DateTime metadata for #{attr.to_s}: #{sol[:extractedObject].to_s}. It should follow iso8601 standards. Error message: #{e}") + logger.error("Impossible to extract DateTime metadata for #{attr}: #{value}. It should follow iso8601 standards. Error message: #{e}") end - - elsif LinkedData::Models::OntologySubmission.attribute_settings(attr)[:enforce].include?(:integer) + elsif enforce?(attr, :integer) begin - hash_results[sol[:extractedObject]] = sol[:extractedObject].to_s.to_i + hash_results[value] = value.to_s.to_i rescue StandardError => e - logger.error("Impossible to extract integer metadata for #{attr.to_s}: #{sol[:extractedObject].to_s}. Error message: #{e}") + logger.error("Impossible to extract integer metadata for #{attr}: #{value}. Error message: #{e}") end - - elsif LinkedData::Models::OntologySubmission.attribute_settings(attr)[:enforce].include?(:boolean) - begin - if (sol[:extractedObject].to_s.downcase.eql?('true')) - hash_results[sol[:extractedObject]] = true - elsif (sol[:extractedObject].to_s.downcase.eql?('false')) - hash_results[sol[:extractedObject]] = false - end - rescue StandardError => e - logger.error("Impossible to extract boolean metadata for #{attr.to_s}: #{sol[:extractedObject].to_s}. Error message: #{e}") - end - - else - if sol[:extractedObject].is_a?(RDF::URI) - # if the object is an URI but we are requesting a String - # TODO: ATTENTION on veut pas forcément TOUT le temps recump omvname, etc... Voir si on change ce comportement - if !sol[:omvname].nil? - hash_results = select_metadata_literal(sol[:extractedObject],sol[:omvname], hash_results) - elsif !sol[:rdfslabel].nil? - hash_results = select_metadata_literal(sol[:extractedObject],sol[:rdfslabel], hash_results) - elsif !sol[:omvfirstname].nil? - hash_results = select_metadata_literal(sol[:extractedObject],sol[:omvfirstname], hash_results) - # if first and last name are defined (for omv:Person) - if !sol[:omvlastname].nil? - hash_results[sol[:extractedObject]] = hash_results[sol[:extractedObject]].to_s + ' ' + sol[:omvlastname].to_s - end - elsif !sol[:omvlastname].nil? - # if only last name is defined - hash_results = select_metadata_literal(sol[:extractedObject],sol[:omvlastname], hash_results) - else - hash_results[sol[:extractedObject]] = sol[:extractedObject].to_s - end - + elsif enforce?(attr, :boolean) + case value.to_s.downcase + when 'true' + hash_results[value] = true + when 'false' + hash_results[value] = false else - # If this is directly a literal - hash_results = select_metadata_literal(sol[:extractedObject],sol[:extractedObject], hash_results) + logger.error("Impossible to extract boolean metadata for #{attr}: #{value}. Error message: #{e}") end + elsif value.is_a?(RDF::URI) + hash_results = find_object_label(hash_results, sol, value) + else + # If this is directly a literal + hash_results = select_metadata_literal(value, value, hash_results) end end + hash_results + end + def find_object_label(hash_results, sol, value) + if !sol[:omvname].nil? + hash_results = select_metadata_literal(value, sol[:omvname], hash_results) + elsif !sol[:rdfslabel].nil? + hash_results = select_metadata_literal(value, sol[:rdfslabel], hash_results) + elsif !sol[:omvfirstname].nil? + hash_results = select_metadata_literal(value, sol[:omvfirstname], hash_results) + # if first and last name are defined (for omv:Person) + hash_results[value] = "#{hash_results[value]} #{sol[:omvlastname]}" unless sol[:omvlastname].nil? + elsif !sol[:omvlastname].nil? + # if only last name is defined + hash_results = select_metadata_literal(value, sol[:omvlastname], hash_results) + else + # if the object is an URI but we are requesting a String + hash_results[value] = value.to_s + end hash_results end + def enforce?(attr, type) + LinkedData::Models::OntologySubmission.attribute_settings(attr)[:enforce].include?(type) + end + end end end From 86f55f31695341f333efe90cceb799fb68922df3 Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni <gs_bouazzouni@esi.dz> Date: Sun, 1 Jan 2023 19:07:47 +0100 Subject: [PATCH 8/8] add description metadata extraction test --- test/models/test_ontology_submission.rb | 1 + 1 file changed, 1 insertion(+) diff --git a/test/models/test_ontology_submission.rb b/test/models/test_ontology_submission.rb index 33ef58fc..1381dbd9 100644 --- a/test/models/test_ontology_submission.rb +++ b/test/models/test_ontology_submission.rb @@ -1035,6 +1035,7 @@ def test_submission_extract_metadata sub = LinkedData::Models::Ontology.find("AGROOE").first.latest_submission() sub.bring_remaining assert_equal false, sub.deprecated + assert_equal " AGROOE is an ontology used to test the metadata extraction, AGROOE is an ontology to illustrate how to describe their ontologies", sub.description assert_equal " LIRMM (default name) ", sub.publisher assert_equal " URI DC terms identifiers ", sub.identifier assert_equal ["http://lexvo.org/id/iso639-3/fra", "http://lexvo.org/id/iso639-3/eng"].sort, sub.naturalLanguage.sort