From 70f314ed8a6928d780e87d6673d79fc479b857ae Mon Sep 17 00:00:00 2001 From: Syphax Bouazzouni Date: Sat, 20 Apr 2024 21:14:44 +0200 Subject: [PATCH] extract the submission metrics generation step to a file --- lib/ontologies_linked_data/metrics/metrics.rb | 64 ++-------- .../models/ontology_submission.rb | 29 ----- .../submission_mertrics_calculator.rb | 110 ++++++++++++++++++ 3 files changed, 120 insertions(+), 83 deletions(-) create mode 100644 lib/ontologies_linked_data/services/submission_process/operations/submission_mertrics_calculator.rb diff --git a/lib/ontologies_linked_data/metrics/metrics.rb b/lib/ontologies_linked_data/metrics/metrics.rb index 3d47e324..95cd5e87 100644 --- a/lib/ontologies_linked_data/metrics/metrics.rb +++ b/lib/ontologies_linked_data/metrics/metrics.rb @@ -2,50 +2,6 @@ module LinkedData module Metrics - def self.metrics_for_submission(submission, logger) - metrics = nil - logger.info("metrics_for_submission start") - logger.flush - begin - submission.bring(:submissionStatus) if submission.bring?(:submissionStatus) - cls_metrics = class_metrics(submission, logger) - logger.info("class_metrics finished") - logger.flush - metrics = LinkedData::Models::Metric.new - - cls_metrics.each do |k,v| - unless v.instance_of?(Integer) - begin - v = Integer(v) - rescue ArgumentError - v = 0 - rescue TypeError - v = 0 - end - end - metrics.send("#{k}=",v) - end - indiv_count = number_individuals(logger, submission) - metrics.individuals = indiv_count - logger.info("individuals finished") - logger.flush - prop_count = number_properties(logger, submission) - metrics.properties = prop_count - logger.info("properties finished") - logger.flush - # re-generate metrics file - submission.generate_metrics_file(cls_metrics[:classes], indiv_count, prop_count) - logger.info("generation of metrics file finished") - logger.flush - rescue Exception => e - logger.error(e.message) - logger.error(e) - logger.flush - metrics = nil - end - metrics - end - def self.class_metrics(submission, logger) t00 = Time.now submission.ontology.bring(:flat) if submission.ontology.bring?(:flat) @@ -97,7 +53,7 @@ def self.class_metrics(submission, logger) logger.flush children_counts = [] groupby_children.each do |cls,count| - unless cls.start_with?("http") + unless cls.start_with?('http') next end unless is_flat @@ -178,7 +134,7 @@ def self.number_individuals(logger, submission) else logger.info("Unable to find metrics in file for submission #{submission.id.to_s}. Performing a COUNT of type query to get the total individual count...") logger.flush - indiv_count = count_owl_type(submission.id, "NamedIndividual") + indiv_count = count_owl_type(submission.id, 'NamedIndividual') end indiv_count end @@ -192,8 +148,8 @@ def self.number_properties(logger, submission) else logger.info("Unable to find metrics in file for submission #{submission.id.to_s}. Performing a COUNT of type query to get the total property count...") logger.flush - prop_count = count_owl_type(submission.id, "DatatypeProperty") - prop_count += count_owl_type(submission.id, "ObjectProperty") + prop_count = count_owl_type(submission.id, 'DatatypeProperty') + prop_count += count_owl_type(submission.id, 'ObjectProperty') end prop_count end @@ -203,17 +159,17 @@ def self.hierarchy_depth?(graph,root,n,treeProp) hops = [] vars = [] n.times do |i| - hop = sTemplate.sub("children","?x#{i}") + hop = sTemplate.sub('children',"?x#{i}") if i == 0 - hop = hop.sub("parent", "<#{root.to_s}>") + hop = hop.sub('parent', "<#{root.to_s}>") else - hop = hop.sub("parent", "?x#{i-1}") + hop = hop.sub('parent', "?x#{i-1}") end hops << hop vars << "?x#{i}" end joins = hops.join(".\n") - vars = vars.join(" ") + vars = vars.join(' ') query = < { @@ -238,7 +194,7 @@ def self.hierarchy_depth?(graph,root,n,treeProp) def self.query_count_definitions(subId,defProps) propFilter = defProps.map { |x| "?p = <#{x.to_s}>" } - propFilter = propFilter.join " || " + propFilter = propFilter.join ' || ' query = <<-eos SELECT (count(DISTINCT ?s) as ?c) WHERE { GRAPH <#{subId.to_s}> { @@ -249,7 +205,7 @@ def self.query_count_definitions(subId,defProps) FILTER (?s != <#{Goo.namespaces[:owl][:Thing]}>) }} eos - query = query.sub("properties", propFilter) + query = query.sub('properties', propFilter) rs = Goo.sparql_query_client.query(query) rs.each do |sol| return sol[:c].object diff --git a/lib/ontologies_linked_data/models/ontology_submission.rb b/lib/ontologies_linked_data/models/ontology_submission.rb index 7695b517..770b7af0 100644 --- a/lib/ontologies_linked_data/models/ontology_submission.rb +++ b/lib/ontologies_linked_data/models/ontology_submission.rb @@ -532,35 +532,6 @@ def metrics_from_file(logger = nil) metrics end - def generate_metrics_file(class_count, indiv_count, prop_count) - CSV.open(self.metrics_path, "wb") do |csv| - csv << ["Class Count", "Individual Count", "Property Count"] - csv << [class_count, indiv_count, prop_count] - end - end - - def generate_metrics_file2(class_count, indiv_count, prop_count, max_depth) - CSV.open(self.metrics_path, "wb") do |csv| - csv << ["Class Count", "Individual Count", "Property Count", "Max Depth"] - csv << [class_count, indiv_count, prop_count, max_depth] - end - end - - def generate_umls_metrics_file(tr_file_path = nil) - tr_file_path ||= self.triples_file_path - class_count = 0 - indiv_count = 0 - prop_count = 0 - - File.foreach(tr_file_path) do |line| - class_count += 1 if line =~ /owl:Class/ - indiv_count += 1 if line =~ /owl:NamedIndividual/ - prop_count += 1 if line =~ /owl:ObjectProperty/ - prop_count += 1 if line =~ /owl:DatatypeProperty/ - end - self.generate_metrics_file(class_count, indiv_count, prop_count) - end - def add_submission_status(status) valid = status.is_a?(LinkedData::Models::SubmissionStatus) diff --git a/lib/ontologies_linked_data/services/submission_process/operations/submission_mertrics_calculator.rb b/lib/ontologies_linked_data/services/submission_process/operations/submission_mertrics_calculator.rb new file mode 100644 index 00000000..b41c06f9 --- /dev/null +++ b/lib/ontologies_linked_data/services/submission_process/operations/submission_mertrics_calculator.rb @@ -0,0 +1,110 @@ +module LinkedData + module Services + class SubmissionMetricsCalculator < OntologySubmissionProcess + def process(logger, options = nil) + process_metrics(logger) + end + + def generate_umls_metrics_file(tr_file_path=nil) + tr_file_path ||= @submission.triples_file_path + class_count = 0 + indiv_count = 0 + prop_count = 0 + + File.foreach(tr_file_path) do |line| + class_count += 1 if line =~ /owl:Class/ + indiv_count += 1 if line =~ /owl:NamedIndividual/ + prop_count += 1 if line =~ /owl:ObjectProperty/ + prop_count += 1 if line =~ /owl:DatatypeProperty/ + end + generate_metrics_file(class_count, indiv_count, prop_count) + end + + private + + def process_metrics(logger) + status = LinkedData::Models::SubmissionStatus.find('METRICS').first + begin + compute_metrics(logger) + @submission.add_submission_status(status) + rescue StandardError => e + logger.error("#{e.class}: #{e.message}\n#{e.backtrace.join("\n\t")}") + logger.flush + @submission.metrics = nil + @submission.add_submission_status(status.get_error_status) + ensure + @submission.save + end + end + + def compute_metrics(logger) + metrics = metrics_for_submission(logger) + metrics.id = RDF::URI.new(@submission.id.to_s + '/metrics') + exist_metrics = LinkedData::Models::Metric.find(metrics.id).first + exist_metrics.delete if exist_metrics + metrics.save + @submission.metrics = metrics + @submission + end + + def metrics_for_submission(logger) + logger.info('metrics_for_submission start') + logger.flush + begin + @submission.bring(:submissionStatus) if @submission.bring?(:submissionStatus) + cls_metrics = LinkedData::Metrics.class_metrics(@submission, logger) + logger.info('class_metrics finished') + logger.flush + metrics = LinkedData::Models::Metric.new + + cls_metrics.each do |k,v| + unless v.instance_of?(Integer) + begin + v = Integer(v) + rescue ArgumentError + v = 0 + rescue TypeError + v = 0 + end + end + metrics.send("#{k}=",v) + end + indiv_count = LinkedData::Metrics.number_individuals(logger, @submission) + metrics.individuals = indiv_count + logger.info('individuals finished') + logger.flush + prop_count = LinkedData::Metrics.number_properties(logger, @submission) + metrics.properties = prop_count + logger.info('properties finished') + logger.flush + # re-generate metrics file + generate_metrics_file(cls_metrics[:classes], indiv_count, prop_count) + logger.info('generation of metrics file finished') + logger.flush + rescue StandardError => e + logger.error(e.message) + logger.error(e) + logger.flush + metrics = nil + end + metrics + end + + def generate_metrics_file(class_count, indiv_count, prop_count) + CSV.open(@submission.metrics_path, 'wb') do |csv| + csv << ['Class Count', 'Individual Count', 'Property Count'] + csv << [class_count, indiv_count, prop_count] + end + end + + # TODO to find usage in NCBO code + def generate_metrics_file2(class_count, indiv_count, prop_count, max_depth) + CSV.open(self.metrics_path, "wb") do |csv| + csv << ["Class Count", "Individual Count", "Property Count", "Max Depth"] + csv << [class_count, indiv_count, prop_count, max_depth] + end + end + + end + end +end