Skip to content

Commit

Permalink
Feature: Add canonical portal for duplicate ontologies (#770)
Browse files Browse the repository at this point in the history
* method to get the canonical portal for an ontology using pullLocation

* display federated ontologies in browse page based on the canonical portal

* clean canonical portal function in federation helper

* extract canonical portal ontology choice in a function for clarity

* display search page results based on the canonical portal of the ontology

* move federation canonical to federation helper

* clean swap portal attributes function in federation helper

* fix and clean apply canonical portal function

* extract count portals into a separate function from external_canonical_ontology_portal function + apply_canonical_portal function

* put again search canonical logic the search_aggregator file and refactor the code

* do the search canonical logic only if enabled

* fix rest_hostname raising an exception that REST_URI does not exist

---------

Co-authored-by: Syphax bouazzouni <[email protected]>
  • Loading branch information
Bilelkihal and syphax-bouazzouni authored Oct 24, 2024
1 parent dc448c9 commit c52b5c8
Show file tree
Hide file tree
Showing 5 changed files with 75 additions and 10 deletions.
2 changes: 1 addition & 1 deletion Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,8 @@ gem "flag-icons-rails", "~> 3.4"

# Custom API client
gem 'ontologies_api_client', git: 'https://github.com/ontoportal-lirmm/ontologies_api_ruby_client.git', branch: 'development'

# Ruby 2.7.8 pinned gems (to remove when migrating to Ruby >= 3.0)

gem 'ffi', '~> 1.16.3'
gem 'net-ftp', '~> 0.2.0', require: false
gem 'net-http', '~> 0.3.2'
Expand Down
44 changes: 42 additions & 2 deletions app/controllers/concerns/search_aggregator.rb
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,15 @@ def aggregate_results(query, results)

all_ontologies = LinkedData::Client::Models::Ontology.all(include: 'acronym,name', include_views: true, display_links: false, display_context: false)


search_results = grouped_results.map do |group|
format_search_result(group, all_ontologies)
end

search_results = merge_sort_federated_results(query, search_results) if federation_enabled?
if federation_enabled?
search_results = merge_sort_federated_results(query, search_results) if federation_enabled?
search_results = swap_canonical_portal_results_first(search_results)
end

search_results
end
Expand Down Expand Up @@ -251,7 +255,13 @@ def merge_federated_results(search_results)
if (element[:root][:ontology_acronym] == reuse[:root][:ontology_acronym]) && (element[:root][:uri] == reuse[:root][:uri])
portal_name = reuse[:root][:portal_name]
link = reuse[:root][:link]
element[:root][:other_portals] << {name: portal_name, color: federated_portal_color(portal_name), light_color: federated_portal_light_color(portal_name), link: link}
element[:root][:other_portals] << {
name: portal_name,
color: federated_portal_color(portal_name),
light_color: federated_portal_light_color(portal_name),
link: link,
ontology_id: reuse[:root][:ontology_id]
}
true
else
false
Expand All @@ -260,6 +270,36 @@ def merge_federated_results(search_results)
end
end

def swap_canonical_portal_results_first(search_results)
all_submissions = LinkedData::Client::Models::OntologySubmission.all(include: 'pullLocation', include_views: true, display_links: false, display_context: false)

search_results.each do |result|
next if result[:root][:portal_name].nil? || result[:root][:other_portals].blank?

result_ontology_ids = [result[:root][:ontology_id]] + result[:root][:other_portals].map { |p| p[:ontology_id] }

result_submissions = all_submissions.select do |submission|
result_ontology_ids.any? { |ontology_id| submission.id.include?(ontology_id) }
end

canonical_portal = most_referred_portal(result_submissions)
is_internal_ontology = result[:root][:portal_name].eql?(canonical_portal.to_s)

next if canonical_portal.nil? || is_internal_ontology

canonical_portal_result = result[:root][:other_portals].find { |r| r[:name] == canonical_portal.to_s }
swap_portal_attributes(result[:root], canonical_portal_result) if canonical_portal_result
end
search_results
end


def swap_portal_attributes(root_portal, new_portal)
[:link, :portal_name, :portal_color, :portal_light_color].each do |attribute|
root_portal[attribute], new_portal[attribute] = new_portal[attribute], root_portal[attribute]
end
end

def sort_results_by_string_similarity(query, search_results)
search_results = search_results.sort_by do |entry|
root_similarity = String::Similarity.cosine(query.downcase, entry[:root][:title].split('-').first.gsub(" ", "").downcase)
Expand Down
8 changes: 2 additions & 6 deletions app/controllers/concerns/submission_filter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -70,18 +70,14 @@ def ontologies_with_filters_url(filters, page: 1, count: false)
def merge_by_acronym(submissions)
merged_submissions = []
submissions.group_by { |x| x[:ontology]&.acronym }.each do |acronym, ontologies|
if ontologies.size.eql?(1)
ontology = ontologies.first
else
ontology = ontologies.select { |x| helpers.internal_ontology?(x[:id]) }.first || ontologies.first
end

ontology = canonical_ontology(ontologies)
ontology[:sources] = ontologies.map { |x| x[:id] }
merged_submissions << ontology
end
merged_submissions
end


def filter_submissions(ontologies, query:, status:, show_views:, private_only:, languages:, page_size:, formality_level:, is_of_type:, groups:, categories:, formats:)
submissions = LinkedData::Client::Models::OntologySubmission.all(include: BROWSE_ATTRIBUTES.join(','), also_include_views: true, display_links: false, display_context: false)

Expand Down
29 changes: 29 additions & 0 deletions app/helpers/federation_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,18 @@ def federation_external_class?(class_object)
!class_object.links['self'].include?($REST_URL)
end

def canonical_ontology(ontologies)
if ontologies.size.eql?(1)
ontologies.first
else
internal_ontology = ontologies.select { |x| helpers.internal_ontology?(x[:id]) }.first
if internal_ontology
internal_ontology
else
external_canonical_ontology_portal(ontologies)
end
end
end

def federation_portal_status(portal_name: nil)
Rails.cache.fetch("federation_portal_up_#{portal_name}", expires_in: 2.hours) do
Expand Down Expand Up @@ -191,6 +203,7 @@ def init_federation_portals_status
federation_input_chips
end
end

def federated_search_counts(search_results)
ids = search_results.map do |result|
result.dig(:root, :ontology_id) || rest_url
Expand Down Expand Up @@ -219,4 +232,20 @@ def counts_ontology_ids_by_portal_name(portals_ids)

counts
end

def external_canonical_ontology_portal(ontologies)
canonical_portal = most_referred_portal(ontologies)
ontologies.select{|o| o[:id].include?(canonical_portal.to_s)}.first
end

def most_referred_portal(ontology_submissions)
portal_counts = Hash.new(0)
ontology_submissions.each do |submission|
federated_portals.keys.each do |portal|
portal_counts[portal] += 1 if submission[:pullLocation]&.include?(portal.to_s)
end
end
portal_counts.max_by { |_, count| count }&.first
end

end
2 changes: 1 addition & 1 deletion app/helpers/urls_helper.rb
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ def url_to_endpoint(url)
endpoint
end
def rest_hostname
extract_hostname(REST_URI)
extract_hostname($REST_URL)
end

def extract_hostname(url)
Expand Down

0 comments on commit c52b5c8

Please sign in to comment.