-
-
Notifications
You must be signed in to change notification settings - Fork 729
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #11893 from rioug/10809-patch-dfc-connector-parser
[DFC] Patch dfc connector parser to improve SKOS concept parsing
- Loading branch information
Showing
5 changed files
with
209 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
12 changes: 12 additions & 0 deletions
12
engines/dfc_provider/lib/data_food_consortium/connector/skos_concept.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
# frozen_string_literal: true | ||
|
||
# Patch: Improve parsing of SKOS Concept. Will be fixed upstream | ||
require_relative 'skos_helper' | ||
|
||
module DataFoodConsortium | ||
module Connector | ||
class SKOSConcept | ||
include DataFoodConsortium::Connector::SKOSHelper | ||
end | ||
end | ||
end |
19 changes: 19 additions & 0 deletions
19
engines/dfc_provider/lib/data_food_consortium/connector/skos_helper.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,19 @@ | ||
# frozen_string_literal: true | ||
|
||
# Patch: Improve parsing of SKOS Concept. Will be fixed upstream | ||
module DataFoodConsortium | ||
module Connector | ||
module SKOSHelper | ||
def addAttribute(name, value) # rubocop:disable Naming/MethodName | ||
instance_variable_set("@#{name}", value) | ||
define_singleton_method(name) do | ||
instance_variable_get("@#{name}") | ||
end | ||
end | ||
|
||
def hasAttribute(name) # rubocop:disable Naming/MethodName | ||
methods.include?(:"#{name}") | ||
end | ||
end | ||
end | ||
end |
135 changes: 128 additions & 7 deletions
135
engines/dfc_provider/lib/data_food_consortium/connector/skos_parser.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,23 +1,144 @@ | ||
# frozen_string_literal: true | ||
|
||
# Overriding the current implementation to store all parsed concepts for | ||
# lookup later. Otherwise the importer can't associate these. | ||
# This is just a workaround and needs to be solved upstream. | ||
# patches : | ||
# - Maikel: Overriding the current implementation to store all parsed concepts for | ||
# lookup later. Otherwise the importer can't associate these. | ||
# This is just a workaround and needs to be solved upstream. | ||
|
||
# - Gaetan: Improve parsing of SKOS Concept. Will be fixed upstream | ||
|
||
require_relative 'skos_helper' | ||
|
||
module DataFoodConsortium | ||
module Connector | ||
class SKOSInstance | ||
include DataFoodConsortium::Connector::SKOSHelper | ||
|
||
# Return a list of singelton methods, ie the list of Concept available | ||
def topConcepts # rubocop:disable Naming/MethodName | ||
methods(false).sort | ||
end | ||
end | ||
end | ||
end | ||
|
||
# rubocop:disable Naming/VariableName | ||
module DataFoodConsortium | ||
module Connector | ||
class SKOSParser | ||
CONCEPT_SCHEMES = ["Facet", "productTypes"].freeze | ||
|
||
def initialize | ||
@results = DataFoodConsortium::Connector::SKOSInstance.new | ||
@skosConcepts = {} | ||
@rootElements = [] | ||
@broaders = {} | ||
# Flag used to tell the parser to use SkosConcept object when parsing data from | ||
# Concept Scheme. | ||
# defined in CONCEPT_SCHEMES | ||
@useSkosConcept = false | ||
end | ||
|
||
def parse(data) # rubocop:disable Metrics/CyclomaticComplexity | ||
init | ||
|
||
data.each do |element| | ||
current = DataFoodConsortium::Connector::SKOSParserElement.new(element) | ||
|
||
setSkosConceptFlag(current) | ||
|
||
next unless current.isConcept? || current.isCollection? | ||
|
||
if !@skosConcepts.key?(current.id) | ||
concept = createSKOSConcept(current) | ||
@skosConcepts[current.id] = concept | ||
end | ||
|
||
if current.hasBroader | ||
current.broader.each do |broader_id| | ||
if !@broaders.key?(broader_id) | ||
@broaders[broader_id] = [] | ||
end | ||
|
||
@broaders[broader_id].push(current.id) | ||
end | ||
# No broader, save the concept to the root | ||
else | ||
@rootElements.push(current.id) | ||
end | ||
end | ||
|
||
@rootElements.each do |root_element_id| | ||
setResults(@results, root_element_id) | ||
end | ||
|
||
@results | ||
end | ||
|
||
# Maikel's patch | ||
def self.concepts | ||
@concepts ||= {} | ||
end | ||
|
||
protected | ||
|
||
def createSKOSConcept(element) # rubocop:disable Naming/MethodName | ||
concept = DataFoodConsortium::Connector::SKOSConcept.new(element.id) | ||
concept.semanticType = element.type | ||
self.class.concepts[element.id] = concept | ||
concept | ||
skosConcept = DataFoodConsortium::Connector::SKOSConcept.new( | ||
element.id, | ||
broaders: element.broader, | ||
narrowers: element.narrower, | ||
prefLabels: element.label | ||
) | ||
skosConcept.semanticType = element.type | ||
# Maikel's patch | ||
self.class.concepts[element.id] = skosConcept | ||
skosConcept | ||
end | ||
|
||
private | ||
|
||
def init | ||
@results = DataFoodConsortium::Connector::SKOSInstance.new | ||
@skosConcepts = {} | ||
@rootElements = [] | ||
@broaders = {} | ||
@useSkosConcept = false | ||
end | ||
|
||
def setResults(parent, id) # rubocop:disable Naming/MethodName | ||
name = getValueWithoutPrefix(id) | ||
|
||
if !parent.hasAttribute(name) | ||
if @useSkosConcept && @skosConcepts[id] | ||
parent.addAttribute(name, @skosConcepts[id]) | ||
else | ||
parent.addAttribute(name, DataFoodConsortium::Connector::SKOSInstance.new) | ||
end | ||
end | ||
|
||
# Leaf concepts, stop the process | ||
if !@broaders.key?(id) | ||
parent.instance_variable_set("@#{name}", @skosConcepts[id]) | ||
return | ||
end | ||
|
||
@broaders[id].each do |narrower| | ||
parentSkosInstance = parent.instance_variable_get("@#{name}") | ||
|
||
setResults(parentSkosInstance, narrower) # recursive call | ||
end | ||
end | ||
|
||
def setSkosConceptFlag(current) # rubocop:disable Naming/MethodName | ||
@useSkosConcept = true if current.isConceptScheme? && matchingConceptSchemes(current) | ||
end | ||
|
||
def matchingConceptSchemes(current) # rubocop:disable Naming/MethodName | ||
regex = /#{CONCEPT_SCHEMES.join('|')}/ | ||
|
||
current.id =~ regex | ||
end | ||
end | ||
end | ||
end | ||
# rubocop:enable Naming/VariableName |
47 changes: 47 additions & 0 deletions
47
engines/dfc_provider/lib/data_food_consortium/connector/skos_parser_element.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,47 @@ | ||
# frozen_string_literal: true | ||
|
||
# Patch: Improve parsing of SKOS Concept. Will be fixed upstream | ||
module DataFoodConsortium | ||
module Connector | ||
class SKOSParserElement | ||
attr_reader :narrower, :label | ||
|
||
def initialize(element) # rubocop:disable Metrics/CyclomaticComplexity | ||
@broader = [] | ||
@narrower = [] | ||
@label = {} | ||
|
||
if element | ||
@id = element["@id"] | ||
|
||
@type = if element["http://www.w3.org/1999/02/22-rdf-syntax-ns#type"] | ||
extractId(element["http://www.w3.org/1999/02/22-rdf-syntax-ns#type"]) | ||
elsif element["@type"] | ||
extractId(element["@type"]) | ||
else | ||
"undefined" | ||
end | ||
|
||
element["http://www.w3.org/2004/02/skos/core#broader"]&.each do |broader| | ||
@broader.push(broader["@id"]) | ||
end | ||
|
||
element["http://www.w3.org/2004/02/skos/core#narrower"]&.each do |narrower| | ||
@narrower.push(narrower["@id"]) | ||
end | ||
|
||
element["http://www.w3.org/2004/02/skos/core#prefLabel"]&.each do |label| | ||
@label[label["@language"].to_sym] = label["@value"] | ||
end | ||
else | ||
@id = "" | ||
@type = "" | ||
end | ||
end | ||
|
||
def isConceptScheme? # rubocop:disable Naming/MethodName | ||
@type == "http://www.w3.org/2004/02/skos/core#ConceptScheme" | ||
end | ||
end | ||
end | ||
end |