Skip to content

Commit

Permalink
extract the submission metrics generation step to a file
Browse files Browse the repository at this point in the history
  • Loading branch information
syphax-bouazzouni committed Apr 20, 2024
1 parent a2eb2c0 commit 70f314e
Show file tree
Hide file tree
Showing 3 changed files with 120 additions and 83 deletions.
64 changes: 10 additions & 54 deletions lib/ontologies_linked_data/metrics/metrics.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,50 +2,6 @@

module LinkedData
module Metrics
def self.metrics_for_submission(submission, logger)
metrics = nil
logger.info("metrics_for_submission start")
logger.flush
begin
submission.bring(:submissionStatus) if submission.bring?(:submissionStatus)
cls_metrics = class_metrics(submission, logger)
logger.info("class_metrics finished")
logger.flush
metrics = LinkedData::Models::Metric.new

cls_metrics.each do |k,v|
unless v.instance_of?(Integer)
begin
v = Integer(v)
rescue ArgumentError
v = 0
rescue TypeError
v = 0
end
end
metrics.send("#{k}=",v)
end
indiv_count = number_individuals(logger, submission)
metrics.individuals = indiv_count
logger.info("individuals finished")
logger.flush
prop_count = number_properties(logger, submission)
metrics.properties = prop_count
logger.info("properties finished")
logger.flush
# re-generate metrics file
submission.generate_metrics_file(cls_metrics[:classes], indiv_count, prop_count)
logger.info("generation of metrics file finished")
logger.flush
rescue Exception => e
logger.error(e.message)
logger.error(e)
logger.flush
metrics = nil
end
metrics
end

def self.class_metrics(submission, logger)
t00 = Time.now
submission.ontology.bring(:flat) if submission.ontology.bring?(:flat)
Expand Down Expand Up @@ -97,7 +53,7 @@ def self.class_metrics(submission, logger)
logger.flush
children_counts = []
groupby_children.each do |cls,count|
unless cls.start_with?("http")
unless cls.start_with?('http')
next
end
unless is_flat
Expand Down Expand Up @@ -178,7 +134,7 @@ def self.number_individuals(logger, submission)
else
logger.info("Unable to find metrics in file for submission #{submission.id.to_s}. Performing a COUNT of type query to get the total individual count...")
logger.flush
indiv_count = count_owl_type(submission.id, "NamedIndividual")
indiv_count = count_owl_type(submission.id, 'NamedIndividual')
end
indiv_count
end
Expand All @@ -192,8 +148,8 @@ def self.number_properties(logger, submission)
else
logger.info("Unable to find metrics in file for submission #{submission.id.to_s}. Performing a COUNT of type query to get the total property count...")
logger.flush
prop_count = count_owl_type(submission.id, "DatatypeProperty")
prop_count += count_owl_type(submission.id, "ObjectProperty")
prop_count = count_owl_type(submission.id, 'DatatypeProperty')
prop_count += count_owl_type(submission.id, 'ObjectProperty')
end
prop_count
end
Expand All @@ -203,17 +159,17 @@ def self.hierarchy_depth?(graph,root,n,treeProp)
hops = []
vars = []
n.times do |i|
hop = sTemplate.sub("children","?x#{i}")
hop = sTemplate.sub('children',"?x#{i}")
if i == 0
hop = hop.sub("parent", "<#{root.to_s}>")
hop = hop.sub('parent', "<#{root.to_s}>")
else
hop = hop.sub("parent", "?x#{i-1}")
hop = hop.sub('parent', "?x#{i-1}")
end
hops << hop
vars << "?x#{i}"
end
joins = hops.join(".\n")
vars = vars.join(" ")
vars = vars.join(' ')
query = <<eof
SELECT #{vars} WHERE {
GRAPH <#{graph.to_s}> {
Expand All @@ -238,7 +194,7 @@ def self.hierarchy_depth?(graph,root,n,treeProp)

def self.query_count_definitions(subId,defProps)
propFilter = defProps.map { |x| "?p = <#{x.to_s}>" }
propFilter = propFilter.join " || "
propFilter = propFilter.join ' || '
query = <<-eos
SELECT (count(DISTINCT ?s) as ?c) WHERE {
GRAPH <#{subId.to_s}> {
Expand All @@ -249,7 +205,7 @@ def self.query_count_definitions(subId,defProps)
FILTER (?s != <#{Goo.namespaces[:owl][:Thing]}>)
}}
eos
query = query.sub("properties", propFilter)
query = query.sub('properties', propFilter)
rs = Goo.sparql_query_client.query(query)
rs.each do |sol|
return sol[:c].object
Expand Down
29 changes: 0 additions & 29 deletions lib/ontologies_linked_data/models/ontology_submission.rb
Original file line number Diff line number Diff line change
Expand Up @@ -532,35 +532,6 @@ def metrics_from_file(logger = nil)
metrics
end

def generate_metrics_file(class_count, indiv_count, prop_count)
CSV.open(self.metrics_path, "wb") do |csv|
csv << ["Class Count", "Individual Count", "Property Count"]
csv << [class_count, indiv_count, prop_count]
end
end

def generate_metrics_file2(class_count, indiv_count, prop_count, max_depth)
CSV.open(self.metrics_path, "wb") do |csv|
csv << ["Class Count", "Individual Count", "Property Count", "Max Depth"]
csv << [class_count, indiv_count, prop_count, max_depth]
end
end

def generate_umls_metrics_file(tr_file_path = nil)
tr_file_path ||= self.triples_file_path
class_count = 0
indiv_count = 0
prop_count = 0

File.foreach(tr_file_path) do |line|
class_count += 1 if line =~ /owl:Class/
indiv_count += 1 if line =~ /owl:NamedIndividual/
prop_count += 1 if line =~ /owl:ObjectProperty/
prop_count += 1 if line =~ /owl:DatatypeProperty/
end
self.generate_metrics_file(class_count, indiv_count, prop_count)
end


def add_submission_status(status)
valid = status.is_a?(LinkedData::Models::SubmissionStatus)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
module LinkedData
module Services
class SubmissionMetricsCalculator < OntologySubmissionProcess
def process(logger, options = nil)
process_metrics(logger)
end

def generate_umls_metrics_file(tr_file_path=nil)
tr_file_path ||= @submission.triples_file_path
class_count = 0
indiv_count = 0
prop_count = 0

File.foreach(tr_file_path) do |line|
class_count += 1 if line =~ /owl:Class/
indiv_count += 1 if line =~ /owl:NamedIndividual/
prop_count += 1 if line =~ /owl:ObjectProperty/
prop_count += 1 if line =~ /owl:DatatypeProperty/
end
generate_metrics_file(class_count, indiv_count, prop_count)
end

private

def process_metrics(logger)
status = LinkedData::Models::SubmissionStatus.find('METRICS').first
begin
compute_metrics(logger)
@submission.add_submission_status(status)
rescue StandardError => e
logger.error("#{e.class}: #{e.message}\n#{e.backtrace.join("\n\t")}")
logger.flush
@submission.metrics = nil
@submission.add_submission_status(status.get_error_status)
ensure
@submission.save
end
end

def compute_metrics(logger)
metrics = metrics_for_submission(logger)
metrics.id = RDF::URI.new(@submission.id.to_s + '/metrics')
exist_metrics = LinkedData::Models::Metric.find(metrics.id).first
exist_metrics.delete if exist_metrics
metrics.save
@submission.metrics = metrics
@submission
end

def metrics_for_submission(logger)
logger.info('metrics_for_submission start')
logger.flush
begin
@submission.bring(:submissionStatus) if @submission.bring?(:submissionStatus)
cls_metrics = LinkedData::Metrics.class_metrics(@submission, logger)
logger.info('class_metrics finished')
logger.flush
metrics = LinkedData::Models::Metric.new

cls_metrics.each do |k,v|
unless v.instance_of?(Integer)
begin
v = Integer(v)
rescue ArgumentError
v = 0
rescue TypeError
v = 0
end
end
metrics.send("#{k}=",v)
end
indiv_count = LinkedData::Metrics.number_individuals(logger, @submission)
metrics.individuals = indiv_count
logger.info('individuals finished')
logger.flush
prop_count = LinkedData::Metrics.number_properties(logger, @submission)
metrics.properties = prop_count
logger.info('properties finished')
logger.flush
# re-generate metrics file
generate_metrics_file(cls_metrics[:classes], indiv_count, prop_count)
logger.info('generation of metrics file finished')
logger.flush
rescue StandardError => e
logger.error(e.message)
logger.error(e)
logger.flush
metrics = nil
end
metrics
end

def generate_metrics_file(class_count, indiv_count, prop_count)
CSV.open(@submission.metrics_path, 'wb') do |csv|
csv << ['Class Count', 'Individual Count', 'Property Count']
csv << [class_count, indiv_count, prop_count]
end
end

# TODO to find usage in NCBO code
def generate_metrics_file2(class_count, indiv_count, prop_count, max_depth)
CSV.open(self.metrics_path, "wb") do |csv|
csv << ["Class Count", "Individual Count", "Property Count", "Max Depth"]
csv << [class_count, indiv_count, prop_count, max_depth]
end
end

end
end
end

0 comments on commit 70f314e

Please sign in to comment.