From e846761e54ebf71c70b561586a94adf7b80b8652 Mon Sep 17 00:00:00 2001 From: Gaetan Craig-Riou Date: Fri, 1 Dec 2023 14:26:09 +1100 Subject: [PATCH 1/8] WIP Patch DFC connector to fix SKOS Concept parsing This is to let us use intermediary SKOS Concept. Upstream PR for more details: https://github.com/datafoodconsortium/connector-codegen/pull/10/files --- .../connector/connector.rb | 3 + .../connector/skos_concept.rb | 11 ++ .../connector/skos_helper.rb | 14 ++ .../connector/skos_parser.rb | 121 +++++++++++++++++- .../connector/skos_parser_element.rb | 45 +++++++ 5 files changed, 190 insertions(+), 4 deletions(-) create mode 100644 engines/dfc_provider/lib/data_food_consortium/connector/skos_concept.rb create mode 100644 engines/dfc_provider/lib/data_food_consortium/connector/skos_helper.rb create mode 100644 engines/dfc_provider/lib/data_food_consortium/connector/skos_parser_element.rb diff --git a/engines/dfc_provider/lib/data_food_consortium/connector/connector.rb b/engines/dfc_provider/lib/data_food_consortium/connector/connector.rb index 2d657a33bc6..7c1d22e6e36 100644 --- a/engines/dfc_provider/lib/data_food_consortium/connector/connector.rb +++ b/engines/dfc_provider/lib/data_food_consortium/connector/connector.rb @@ -6,6 +6,9 @@ # Then our tools for monky-patching: require_relative "importer" require_relative "context" +require_relative "skos_parser_element" +require_relative "skos_concept" +require_relative "skos_parser" module DataFoodConsortium module Connector diff --git a/engines/dfc_provider/lib/data_food_consortium/connector/skos_concept.rb b/engines/dfc_provider/lib/data_food_consortium/connector/skos_concept.rb new file mode 100644 index 00000000000..5672e12e6b7 --- /dev/null +++ b/engines/dfc_provider/lib/data_food_consortium/connector/skos_concept.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +require_relative 'skos_helper' + +module DataFoodConsortium + module Connector + class SKOSConcept + include DataFoodConsortium::Connector::SKOSHelper + end + end +end diff --git a/engines/dfc_provider/lib/data_food_consortium/connector/skos_helper.rb b/engines/dfc_provider/lib/data_food_consortium/connector/skos_helper.rb new file mode 100644 index 00000000000..1d4a7e456ce --- /dev/null +++ b/engines/dfc_provider/lib/data_food_consortium/connector/skos_helper.rb @@ -0,0 +1,14 @@ +# frozen_string_literal: true + +module DataFoodConsortium::Connector::SKOSHelper + def addAttribute(name, value) + self.instance_variable_set("@#{name}", value) + self.define_singleton_method(name) do + instance_variable_get("@#{name}") + end + end + + def hasAttribute(name) + self.methods.include?(:"#{name}") + end +end diff --git a/engines/dfc_provider/lib/data_food_consortium/connector/skos_parser.rb b/engines/dfc_provider/lib/data_food_consortium/connector/skos_parser.rb index 2cda5b95534..bdb6b23911d 100644 --- a/engines/dfc_provider/lib/data_food_consortium/connector/skos_parser.rb +++ b/engines/dfc_provider/lib/data_food_consortium/connector/skos_parser.rb @@ -1,11 +1,75 @@ # frozen_string_literal: true +require_relative 'skos_helper' + +module DataFoodConsortium + module Connector + class SKOSInstance + include DataFoodConsortium::Connector::SKOSHelper + + # Return a list of singelton methods, ie the list of Concept available + def topConcepts + self.methods(false).sort + end + end + end +end + # Overriding the current implementation to store all parsed concepts for # lookup later. Otherwise the importer can't associate these. # This is just a workaround and needs to be solved upstream. module DataFoodConsortium module Connector class SKOSParser + CONCEPT_SCHEMES = ["Facet", "productTypes"].freeze + + def initialize + @results = DataFoodConsortium::Connector::SKOSInstance.new + @skosConcepts = {} + @rootElements = [] + @broaders = {} + # Flag used to tell the parser to use SkosConcept object when parsing data from Concept Scheme + # defined in CONCEPT_SCHEMES + @useSkosConcept = false + end + + def parse(data) + init + + data.each do |element| + current = DataFoodConsortium::Connector::SKOSParserElement.new(element) + + setSkosConceptFlag(current) + + if current.isConcept? || current.isCollection? + if !@skosConcepts.has_key?(current.id) + concept = createSKOSConcept(current) + @skosConcepts[current.id] = concept + end + + if current.hasBroader + current.broader.each do |broaderId| + if !@broaders.has_key?(broaderId) + @broaders[broaderId] = [] + end + + @broaders[broaderId].push(current.id) + end + # No broader, save the concept to the root + else + @rootElements.push(current.id) + end + end + end + + @rootElements.each do |rootElementId| + setResults(@results, rootElementId) + end + + @results + end + + # TODO check if this is still needed def self.concepts @concepts ||= {} end @@ -13,10 +77,59 @@ def self.concepts protected def createSKOSConcept(element) # rubocop:disable Naming/MethodName - concept = DataFoodConsortium::Connector::SKOSConcept.new(element.id) - concept.semanticType = element.type - self.class.concepts[element.id] = concept - concept + skosConcept = DataFoodConsortium::Connector::SKOSConcept.new( + element.id, broaders: element.broader, narrowers: element.narrower + ) + skosConcept.semanticType = element.type + # TODO check if this is still needed + # original patch by Maikel + self.class.concepts[element.id] = skosConcept + + skosConcept + end + + private + + def init + @results = DataFoodConsortium::Connector::SKOSInstance.new + @skosConcepts = {} + @rootElements = [] + @broaders = {} + @useSkosConcept = false + end + + def setResults(parent, id) + name = getValueWithoutPrefix(id) + + if !parent.hasAttribute(name) + if @useSkosConcept && !@skosConcepts[id].nil? + parent.addAttribute(name, @skosConcepts[id]) + else + parent.addAttribute(name, DataFoodConsortium::Connector::SKOSInstance.new) + end + end + + # Leaf concepts, stop the process + if !@broaders.has_key?(id) + parent.instance_variable_set("@#{name}", @skosConcepts[id]) + return + end + + @broaders[id].each do |narrower| + parentSkosInstance = parent.instance_variable_get("@#{name}") + + setResults(parentSkosInstance, narrower) # recursive call + end + end + + def setSkosConceptFlag(current) + @useSkosConcept = true if current.isConceptScheme? && matchingConceptSchemes(current) + end + + def matchingConceptSchemes(current) + regex = /#{CONCEPT_SCHEMES.join("|")}/ + + current.id =~ regex end end end diff --git a/engines/dfc_provider/lib/data_food_consortium/connector/skos_parser_element.rb b/engines/dfc_provider/lib/data_food_consortium/connector/skos_parser_element.rb new file mode 100644 index 00000000000..9328833c0dd --- /dev/null +++ b/engines/dfc_provider/lib/data_food_consortium/connector/skos_parser_element.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +module DataFoodConsortium + module Connector + class SKOSParserElement + attr_reader :narrower + + def initialize(element) + @broader = [] + @narrower = [] + + if element + @id = element["@id"] + + if element["http://www.w3.org/1999/02/22-rdf-syntax-ns#type"] + @type = extractId(element["http://www.w3.org/1999/02/22-rdf-syntax-ns#type"]) + elsif element["@type"] + @type = extractId(element["@type"]) + else + @type = "undefined" + end + + if element["http://www.w3.org/2004/02/skos/core#broader"] + element["http://www.w3.org/2004/02/skos/core#broader"].each do |broader| + @broader.push(broader["@id"]) + end + end + + if element["http://www.w3.org/2004/02/skos/core#narrower"] + element["http://www.w3.org/2004/02/skos/core#narrower"].each do |narrower| + @narrower.push(narrower["@id"]) + end + end + else + @id = "" + @type = "" + end + end + + def isConceptScheme? + @type == "http://www.w3.org/2004/02/skos/core#ConceptScheme" + end + end + end +end From c61e982a76181a278f4b82f8b3f81f914a87303f Mon Sep 17 00:00:00 2001 From: Gaetan Craig-Riou Date: Mon, 4 Dec 2023 12:00:18 +1100 Subject: [PATCH 2/8] Fix rubocop warning --- .../connector/skos_helper.rb | 22 +++++++++++-------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/engines/dfc_provider/lib/data_food_consortium/connector/skos_helper.rb b/engines/dfc_provider/lib/data_food_consortium/connector/skos_helper.rb index 1d4a7e456ce..4426fbf4f8d 100644 --- a/engines/dfc_provider/lib/data_food_consortium/connector/skos_helper.rb +++ b/engines/dfc_provider/lib/data_food_consortium/connector/skos_helper.rb @@ -1,14 +1,18 @@ # frozen_string_literal: true -module DataFoodConsortium::Connector::SKOSHelper - def addAttribute(name, value) - self.instance_variable_set("@#{name}", value) - self.define_singleton_method(name) do - instance_variable_get("@#{name}") - end - end +module DataFoodConsortium + module Connector + module SKOSHelper + def addAttribute(name, value) # rubocop:disable Naming/MethodName + instance_variable_set("@#{name}", value) + define_singleton_method(name) do + instance_variable_get("@#{name}") + end + end - def hasAttribute(name) - self.methods.include?(:"#{name}") + def hasAttribute(name) # rubocop:disable Naming/MethodName + methods.include?(:"#{name}") + end + end end end From 633d0eadfe91da10f09c0d3cdafd4d90f12c45ef Mon Sep 17 00:00:00 2001 From: Gaetan Craig-Riou Date: Mon, 4 Dec 2023 13:53:26 +1100 Subject: [PATCH 3/8] Parser element, add label --- .../data_food_consortium/connector/skos_parser_element.rb | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/engines/dfc_provider/lib/data_food_consortium/connector/skos_parser_element.rb b/engines/dfc_provider/lib/data_food_consortium/connector/skos_parser_element.rb index 9328833c0dd..5b2b4955606 100644 --- a/engines/dfc_provider/lib/data_food_consortium/connector/skos_parser_element.rb +++ b/engines/dfc_provider/lib/data_food_consortium/connector/skos_parser_element.rb @@ -3,11 +3,12 @@ module DataFoodConsortium module Connector class SKOSParserElement - attr_reader :narrower + attr_reader :narrower, :label def initialize(element) @broader = [] @narrower = [] + @label = {} if element @id = element["@id"] @@ -31,6 +32,9 @@ def initialize(element) @narrower.push(narrower["@id"]) end end + element["http://www.w3.org/2004/02/skos/core#prefLabel"]&.each do |label| + @label[label["@language"].to_sym] = label["@value"] + end else @id = "" @type = "" From 9f209cac14f2bade363ee0f5752fb264199a6a7a Mon Sep 17 00:00:00 2001 From: Gaetan Craig-Riou Date: Mon, 4 Dec 2023 13:53:51 +1100 Subject: [PATCH 4/8] Fix rubocop warning --- .../connector/skos_parser_element.rb | 31 +++++++++---------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/engines/dfc_provider/lib/data_food_consortium/connector/skos_parser_element.rb b/engines/dfc_provider/lib/data_food_consortium/connector/skos_parser_element.rb index 5b2b4955606..a44cec51e57 100644 --- a/engines/dfc_provider/lib/data_food_consortium/connector/skos_parser_element.rb +++ b/engines/dfc_provider/lib/data_food_consortium/connector/skos_parser_element.rb @@ -5,7 +5,7 @@ module Connector class SKOSParserElement attr_reader :narrower, :label - def initialize(element) + def initialize(element) # rubocop:disable Metrics/CyclomaticComplexity @broader = [] @narrower = [] @label = {} @@ -13,25 +13,22 @@ def initialize(element) if element @id = element["@id"] - if element["http://www.w3.org/1999/02/22-rdf-syntax-ns#type"] - @type = extractId(element["http://www.w3.org/1999/02/22-rdf-syntax-ns#type"]) - elsif element["@type"] - @type = extractId(element["@type"]) - else - @type = "undefined" - end + @type = if element["http://www.w3.org/1999/02/22-rdf-syntax-ns#type"] + extractId(element["http://www.w3.org/1999/02/22-rdf-syntax-ns#type"]) + elsif element["@type"] + extractId(element["@type"]) + else + "undefined" + end - if element["http://www.w3.org/2004/02/skos/core#broader"] - element["http://www.w3.org/2004/02/skos/core#broader"].each do |broader| - @broader.push(broader["@id"]) - end + element["http://www.w3.org/2004/02/skos/core#broader"]&.each do |broader| + @broader.push(broader["@id"]) end - if element["http://www.w3.org/2004/02/skos/core#narrower"] - element["http://www.w3.org/2004/02/skos/core#narrower"].each do |narrower| - @narrower.push(narrower["@id"]) - end + element["http://www.w3.org/2004/02/skos/core#narrower"]&.each do |narrower| + @narrower.push(narrower["@id"]) end + element["http://www.w3.org/2004/02/skos/core#prefLabel"]&.each do |label| @label[label["@language"].to_sym] = label["@value"] end @@ -41,7 +38,7 @@ def initialize(element) end end - def isConceptScheme? + def isConceptScheme? # rubocop:disable Naming/MethodName @type == "http://www.w3.org/2004/02/skos/core#ConceptScheme" end end From 0392806018117c1e3a0f76f9fc6a963b9c6740ff Mon Sep 17 00:00:00 2001 From: Gaetan Craig-Riou Date: Mon, 4 Dec 2023 13:37:48 +1100 Subject: [PATCH 5/8] Parser, populate SKOSConcept label --- .../lib/data_food_consortium/connector/skos_parser.rb | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/engines/dfc_provider/lib/data_food_consortium/connector/skos_parser.rb b/engines/dfc_provider/lib/data_food_consortium/connector/skos_parser.rb index bdb6b23911d..79a152b7449 100644 --- a/engines/dfc_provider/lib/data_food_consortium/connector/skos_parser.rb +++ b/engines/dfc_provider/lib/data_food_consortium/connector/skos_parser.rb @@ -78,7 +78,10 @@ def self.concepts def createSKOSConcept(element) # rubocop:disable Naming/MethodName skosConcept = DataFoodConsortium::Connector::SKOSConcept.new( - element.id, broaders: element.broader, narrowers: element.narrower + element.id, + broaders: element.broader, + narrowers: element.narrower, + prefLabels: element.label ) skosConcept.semanticType = element.type # TODO check if this is still needed From a06e7c1d9e38b587e4bc1e236ad0ba0a2c0f218c Mon Sep 17 00:00:00 2001 From: Gaetan Craig-Riou Date: Mon, 4 Dec 2023 13:38:22 +1100 Subject: [PATCH 6/8] Fix rubocop issue --- .../connector/skos_parser.rb | 71 ++++++++++--------- 1 file changed, 38 insertions(+), 33 deletions(-) diff --git a/engines/dfc_provider/lib/data_food_consortium/connector/skos_parser.rb b/engines/dfc_provider/lib/data_food_consortium/connector/skos_parser.rb index 79a152b7449..dda1422ac2f 100644 --- a/engines/dfc_provider/lib/data_food_consortium/connector/skos_parser.rb +++ b/engines/dfc_provider/lib/data_food_consortium/connector/skos_parser.rb @@ -1,5 +1,12 @@ # frozen_string_literal: true +# patches : +# - Maikel: Overriding the current implementation to store all parsed concepts for +# lookup later. Otherwise the importer can't associate these. +# This is just a workaround and needs to be solved upstream. + +# - Gaetan: Improve parsing of SKOS Concept. Will be fixed upstream + require_relative 'skos_helper' module DataFoodConsortium @@ -8,16 +15,14 @@ class SKOSInstance include DataFoodConsortium::Connector::SKOSHelper # Return a list of singelton methods, ie the list of Concept available - def topConcepts - self.methods(false).sort + def topConcepts # rubocop:disable Naming/MethodName + methods(false).sort end end end end -# Overriding the current implementation to store all parsed concepts for -# lookup later. Otherwise the importer can't associate these. -# This is just a workaround and needs to be solved upstream. +# rubocop:disable Naming/VariableName module DataFoodConsortium module Connector class SKOSParser @@ -28,12 +33,13 @@ def initialize @skosConcepts = {} @rootElements = [] @broaders = {} - # Flag used to tell the parser to use SkosConcept object when parsing data from Concept Scheme + # Flag used to tell the parser to use SkosConcept object when parsing data from + # Concept Scheme. # defined in CONCEPT_SCHEMES @useSkosConcept = false end - def parse(data) + def parse(data) # rubocop:disable Metrics/CyclomaticComplexity init data.each do |element| @@ -41,35 +47,35 @@ def parse(data) setSkosConceptFlag(current) - if current.isConcept? || current.isCollection? - if !@skosConcepts.has_key?(current.id) - concept = createSKOSConcept(current) - @skosConcepts[current.id] = concept - end + next unless current.isConcept? || current.isCollection? - if current.hasBroader - current.broader.each do |broaderId| - if !@broaders.has_key?(broaderId) - @broaders[broaderId] = [] - end + if !@skosConcepts.key?(current.id) + concept = createSKOSConcept(current) + @skosConcepts[current.id] = concept + end - @broaders[broaderId].push(current.id) + if current.hasBroader + current.broader.each do |broader_id| + if !@broaders.key?(broader_id) + @broaders[broader_id] = [] end - # No broader, save the concept to the root - else - @rootElements.push(current.id) + + @broaders[broader_id].push(current.id) end + # No broader, save the concept to the root + else + @rootElements.push(current.id) end end - @rootElements.each do |rootElementId| - setResults(@results, rootElementId) + @rootElements.each do |root_element_id| + setResults(@results, root_element_id) end @results end - # TODO check if this is still needed + # Maikel's patch def self.concepts @concepts ||= {} end @@ -84,10 +90,8 @@ def createSKOSConcept(element) # rubocop:disable Naming/MethodName prefLabels: element.label ) skosConcept.semanticType = element.type - # TODO check if this is still needed - # original patch by Maikel + # Maikel's patch self.class.concepts[element.id] = skosConcept - skosConcept end @@ -101,11 +105,11 @@ def init @useSkosConcept = false end - def setResults(parent, id) + def setResults(parent, id) # rubocop:disable Naming/MethodName name = getValueWithoutPrefix(id) if !parent.hasAttribute(name) - if @useSkosConcept && !@skosConcepts[id].nil? + if @useSkosConcept && @skosConcepts[id] parent.addAttribute(name, @skosConcepts[id]) else parent.addAttribute(name, DataFoodConsortium::Connector::SKOSInstance.new) @@ -113,7 +117,7 @@ def setResults(parent, id) end # Leaf concepts, stop the process - if !@broaders.has_key?(id) + if !@broaders.key?(id) parent.instance_variable_set("@#{name}", @skosConcepts[id]) return end @@ -125,15 +129,16 @@ def setResults(parent, id) end end - def setSkosConceptFlag(current) + def setSkosConceptFlag(current) # rubocop:disable Naming/MethodName @useSkosConcept = true if current.isConceptScheme? && matchingConceptSchemes(current) end - def matchingConceptSchemes(current) - regex = /#{CONCEPT_SCHEMES.join("|")}/ + def matchingConceptSchemes(current) # rubocop:disable Naming/MethodName + regex = /#{CONCEPT_SCHEMES.join('|')}/ current.id =~ regex end end end end +# rubocop:enable Naming/VariableName From 9628045e1c3d249a35962300f53d0dd5dd514881 Mon Sep 17 00:00:00 2001 From: Gaetan Craig-Riou Date: Mon, 4 Dec 2023 12:15:23 +1100 Subject: [PATCH 7/8] Fix indentation --- .../lib/data_food_consortium/connector/skos_concept.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/engines/dfc_provider/lib/data_food_consortium/connector/skos_concept.rb b/engines/dfc_provider/lib/data_food_consortium/connector/skos_concept.rb index 5672e12e6b7..f6880214c28 100644 --- a/engines/dfc_provider/lib/data_food_consortium/connector/skos_concept.rb +++ b/engines/dfc_provider/lib/data_food_consortium/connector/skos_concept.rb @@ -5,7 +5,7 @@ module DataFoodConsortium module Connector class SKOSConcept - include DataFoodConsortium::Connector::SKOSHelper + include DataFoodConsortium::Connector::SKOSHelper end end end From 021e292190e1d58c878614d8aeb7363a33ed8e33 Mon Sep 17 00:00:00 2001 From: Gaetan Craig-Riou Date: Mon, 4 Dec 2023 12:19:30 +1100 Subject: [PATCH 8/8] Add a quick patch description --- .../lib/data_food_consortium/connector/skos_concept.rb | 1 + .../lib/data_food_consortium/connector/skos_helper.rb | 1 + .../lib/data_food_consortium/connector/skos_parser_element.rb | 1 + 3 files changed, 3 insertions(+) diff --git a/engines/dfc_provider/lib/data_food_consortium/connector/skos_concept.rb b/engines/dfc_provider/lib/data_food_consortium/connector/skos_concept.rb index f6880214c28..ad8bbe266d9 100644 --- a/engines/dfc_provider/lib/data_food_consortium/connector/skos_concept.rb +++ b/engines/dfc_provider/lib/data_food_consortium/connector/skos_concept.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true +# Patch: Improve parsing of SKOS Concept. Will be fixed upstream require_relative 'skos_helper' module DataFoodConsortium diff --git a/engines/dfc_provider/lib/data_food_consortium/connector/skos_helper.rb b/engines/dfc_provider/lib/data_food_consortium/connector/skos_helper.rb index 4426fbf4f8d..e8b814678f9 100644 --- a/engines/dfc_provider/lib/data_food_consortium/connector/skos_helper.rb +++ b/engines/dfc_provider/lib/data_food_consortium/connector/skos_helper.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true +# Patch: Improve parsing of SKOS Concept. Will be fixed upstream module DataFoodConsortium module Connector module SKOSHelper diff --git a/engines/dfc_provider/lib/data_food_consortium/connector/skos_parser_element.rb b/engines/dfc_provider/lib/data_food_consortium/connector/skos_parser_element.rb index a44cec51e57..c8c684b8e48 100644 --- a/engines/dfc_provider/lib/data_food_consortium/connector/skos_parser_element.rb +++ b/engines/dfc_provider/lib/data_food_consortium/connector/skos_parser_element.rb @@ -1,5 +1,6 @@ # frozen_string_literal: true +# Patch: Improve parsing of SKOS Concept. Will be fixed upstream module DataFoodConsortium module Connector class SKOSParserElement