Skip to content

Commit

Permalink
Merge pull request #11893 from rioug/10809-patch-dfc-connector-parser
Browse files Browse the repository at this point in the history
[DFC] Patch dfc connector parser to improve SKOS concept parsing
  • Loading branch information
mkllnk authored Dec 4, 2023
2 parents 6b22227 + 021e292 commit 89b6b18
Show file tree
Hide file tree
Showing 5 changed files with 209 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@
# Then our tools for monky-patching:
require_relative "importer"
require_relative "context"
require_relative "skos_parser_element"
require_relative "skos_concept"
require_relative "skos_parser"

module DataFoodConsortium
module Connector
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
# frozen_string_literal: true

# Patch: Improve parsing of SKOS Concept. Will be fixed upstream
require_relative 'skos_helper'

module DataFoodConsortium
module Connector
class SKOSConcept
include DataFoodConsortium::Connector::SKOSHelper
end
end
end
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# frozen_string_literal: true

# Patch: Improve parsing of SKOS Concept. Will be fixed upstream
module DataFoodConsortium
module Connector
module SKOSHelper
def addAttribute(name, value) # rubocop:disable Naming/MethodName
instance_variable_set("@#{name}", value)
define_singleton_method(name) do
instance_variable_get("@#{name}")
end
end

def hasAttribute(name) # rubocop:disable Naming/MethodName
methods.include?(:"#{name}")
end
end
end
end
135 changes: 128 additions & 7 deletions engines/dfc_provider/lib/data_food_consortium/connector/skos_parser.rb
Original file line number Diff line number Diff line change
@@ -1,23 +1,144 @@
# frozen_string_literal: true

# Overriding the current implementation to store all parsed concepts for
# lookup later. Otherwise the importer can't associate these.
# This is just a workaround and needs to be solved upstream.
# patches :
# - Maikel: Overriding the current implementation to store all parsed concepts for
# lookup later. Otherwise the importer can't associate these.
# This is just a workaround and needs to be solved upstream.

# - Gaetan: Improve parsing of SKOS Concept. Will be fixed upstream

require_relative 'skos_helper'

module DataFoodConsortium
module Connector
class SKOSInstance
include DataFoodConsortium::Connector::SKOSHelper

# Return a list of singelton methods, ie the list of Concept available
def topConcepts # rubocop:disable Naming/MethodName
methods(false).sort
end
end
end
end

# rubocop:disable Naming/VariableName
module DataFoodConsortium
module Connector
class SKOSParser
CONCEPT_SCHEMES = ["Facet", "productTypes"].freeze

def initialize
@results = DataFoodConsortium::Connector::SKOSInstance.new
@skosConcepts = {}
@rootElements = []
@broaders = {}
# Flag used to tell the parser to use SkosConcept object when parsing data from
# Concept Scheme.
# defined in CONCEPT_SCHEMES
@useSkosConcept = false
end

def parse(data) # rubocop:disable Metrics/CyclomaticComplexity
init

data.each do |element|
current = DataFoodConsortium::Connector::SKOSParserElement.new(element)

setSkosConceptFlag(current)

next unless current.isConcept? || current.isCollection?

if !@skosConcepts.key?(current.id)
concept = createSKOSConcept(current)
@skosConcepts[current.id] = concept
end

if current.hasBroader
current.broader.each do |broader_id|
if !@broaders.key?(broader_id)
@broaders[broader_id] = []
end

@broaders[broader_id].push(current.id)
end
# No broader, save the concept to the root
else
@rootElements.push(current.id)
end
end

@rootElements.each do |root_element_id|
setResults(@results, root_element_id)
end

@results
end

# Maikel's patch
def self.concepts
@concepts ||= {}
end

protected

def createSKOSConcept(element) # rubocop:disable Naming/MethodName
concept = DataFoodConsortium::Connector::SKOSConcept.new(element.id)
concept.semanticType = element.type
self.class.concepts[element.id] = concept
concept
skosConcept = DataFoodConsortium::Connector::SKOSConcept.new(
element.id,
broaders: element.broader,
narrowers: element.narrower,
prefLabels: element.label
)
skosConcept.semanticType = element.type
# Maikel's patch
self.class.concepts[element.id] = skosConcept
skosConcept
end

private

def init
@results = DataFoodConsortium::Connector::SKOSInstance.new
@skosConcepts = {}
@rootElements = []
@broaders = {}
@useSkosConcept = false
end

def setResults(parent, id) # rubocop:disable Naming/MethodName
name = getValueWithoutPrefix(id)

if !parent.hasAttribute(name)
if @useSkosConcept && @skosConcepts[id]
parent.addAttribute(name, @skosConcepts[id])
else
parent.addAttribute(name, DataFoodConsortium::Connector::SKOSInstance.new)
end
end

# Leaf concepts, stop the process
if !@broaders.key?(id)
parent.instance_variable_set("@#{name}", @skosConcepts[id])
return
end

@broaders[id].each do |narrower|
parentSkosInstance = parent.instance_variable_get("@#{name}")

setResults(parentSkosInstance, narrower) # recursive call
end
end

def setSkosConceptFlag(current) # rubocop:disable Naming/MethodName
@useSkosConcept = true if current.isConceptScheme? && matchingConceptSchemes(current)
end

def matchingConceptSchemes(current) # rubocop:disable Naming/MethodName
regex = /#{CONCEPT_SCHEMES.join('|')}/

current.id =~ regex
end
end
end
end
# rubocop:enable Naming/VariableName
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
# frozen_string_literal: true

# Patch: Improve parsing of SKOS Concept. Will be fixed upstream
module DataFoodConsortium
module Connector
class SKOSParserElement
attr_reader :narrower, :label

def initialize(element) # rubocop:disable Metrics/CyclomaticComplexity
@broader = []
@narrower = []
@label = {}

if element
@id = element["@id"]

@type = if element["http://www.w3.org/1999/02/22-rdf-syntax-ns#type"]
extractId(element["http://www.w3.org/1999/02/22-rdf-syntax-ns#type"])
elsif element["@type"]
extractId(element["@type"])
else
"undefined"
end

element["http://www.w3.org/2004/02/skos/core#broader"]&.each do |broader|
@broader.push(broader["@id"])
end

element["http://www.w3.org/2004/02/skos/core#narrower"]&.each do |narrower|
@narrower.push(narrower["@id"])
end

element["http://www.w3.org/2004/02/skos/core#prefLabel"]&.each do |label|
@label[label["@language"].to_sym] = label["@value"]
end
else
@id = ""
@type = ""
end
end

def isConceptScheme? # rubocop:disable Naming/MethodName
@type == "http://www.w3.org/2004/02/skos/core#ConceptScheme"
end
end
end
end

0 comments on commit 89b6b18

Please sign in to comment.