Skip to content

Commit

Permalink
Merge pull request #1527 from libris/feature/refactor-MatchForm
Browse files Browse the repository at this point in the history
Refactor MatchForm (reduce complexity)
  • Loading branch information
kwahlin authored Nov 22, 2024
2 parents 3c3f17f + c0bff4f commit f001a0d
Show file tree
Hide file tree
Showing 2 changed files with 94 additions and 76 deletions.
168 changes: 93 additions & 75 deletions whelktool/src/main/groovy/whelk/datatool/form/MatchForm.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ class MatchForm {
private static final String VALUE = 'value'
private static final String VALUE_FROM = 'bulk:valueFrom'
private static final String ANY_OF = 'bulk:AnyOf'
private static final String HAS_BASE_TYPE_TMP = '_hasBaseTypeTmp'

Map form

Expand All @@ -52,16 +51,12 @@ class MatchForm {

// For testing only
@PackageScope
MatchForm(Map matchForm) {
this(matchForm, null)
MatchForm(Map form) {
this(form, null)
}

// For testing only
@PackageScope
MatchForm() {}

boolean matches(Object node) {
return matches(form, node)
boolean matches(Map thing) {
return matches(form, thing)
}

boolean matches(Object matchForm, Object node) {
Expand All @@ -85,68 +80,95 @@ class MatchForm {
return path.findAll { it instanceof String } as List<String>
}

private boolean mapMatches(Map matchForm, Map bNode) {
if (matchForm == null || bNode == null) {
private getSubtypes() {
return getSubtypes(form)
}

private Set<String> getSubtypes(Map formBNode) {
return baseTypeToSubtypes[formBNode[TYPE_KEY]]
}

private boolean shouldMatchSubtypes() {
return shouldMatchSubtypes(form)
}

private static boolean shouldMatchSubtypes(Map formBNode) {
return asList(formBNode[MATCHING_MODE]).contains(SUBTYPES)
}

private static boolean shouldMatchExact(Map formBNode) {
return asList(formBNode[MATCHING_MODE]).contains(EXACT)
}

private boolean mapMatches(Map formBNode, Map bNode) {
if (formBNode == null || bNode == null) {
return false
}
matchForm = new LinkedHashMap(matchForm)
def match = asList(matchForm[MATCHING_MODE])
if (match.contains(EXACT)) {
return exactMatches(matchForm, bNode)
}
if (match.contains(SUBTYPES)) {
String aType = matchForm[TYPE_KEY]
String bType = bNode[TYPE_KEY]
if (!(baseTypeToSubtypes[aType] + aType).contains(bType)) {
return false
} else {
matchForm.remove(TYPE_KEY)
}

formBNode = new LinkedHashMap(formBNode)

if (shouldMatchExact(formBNode)) {
return exactMatches(formBNode, bNode)
}
matchForm.remove(MATCHING_MODE)
if (matchForm[TYPE_KEY] == ANY_TYPE) {
matchForm.remove(TYPE_KEY)
if (formBNode[TYPE_KEY] && !typeMatches(formBNode, bNode)) {
return false
}
def ids = formBNodeIdToResourceIds[matchForm.remove(BNODE_ID)]
if (ids && !ids.contains(bNode[ID_KEY])) {
formBNode.remove(TYPE_KEY)
formBNode.remove(MATCHING_MODE)

if (!idMatches(formBNode, bNode)) {
return false
}
matchForm.remove(HAS_ID)
if (matchForm.size() > bNode.size()) {
formBNode.remove(HAS_ID)
formBNode.remove(BNODE_ID)

if (formBNode.size() > bNode.size()) {
return false
}
return comparator.isSubset(matchForm, bNode, this::mapMatches)

return comparator.isSubset(formBNode, bNode, this::mapMatches)
}

private boolean exactMatches(Map matchForm, Map bNode) {
if (matchForm == null || bNode == null) {
private boolean exactMatches(Map formBNode, Map bNode) {
if (formBNode == null || bNode == null) {
return false
}
matchForm = new HashMap(matchForm)

formBNode = new HashMap(formBNode)
bNode = new HashMap(bNode)
if (asList(matchForm.remove(MATCHING_MODE)).contains(SUBTYPES)) {
String aType = matchForm[TYPE_KEY]
String bType = bNode[TYPE_KEY]
if ((baseTypeToSubtypes[aType] + aType).contains(bType)) {
matchForm.remove(TYPE_KEY)
bNode.remove(TYPE_KEY)
} else {
return false
}
}
if (matchForm[TYPE_KEY] == ANY_TYPE) {
matchForm.remove(TYPE_KEY)
bNode.remove(TYPE_KEY)

if (!typeMatches(formBNode, bNode)) {
return false
}
def ids = formBNodeIdToResourceIds[matchForm.remove(BNODE_ID)]
if (ids && !ids.contains(bNode[ID_KEY])) {
formBNode.remove(TYPE_KEY)
bNode.remove(TYPE_KEY)
formBNode.remove(MATCHING_MODE)

if (!idMatches(formBNode, bNode)) {
return false
}
matchForm.remove(HAS_ID)
if (matchForm.size() != bNode.size()) {
formBNode.remove(HAS_ID)
formBNode.remove(BNODE_ID)

if (formBNode.size() != bNode.size()) {
return false
}
return comparator.isEqual(matchForm, bNode, this::exactMatches)

return comparator.isEqual(formBNode, bNode, this::exactMatches)
}

private boolean idMatches(Map formBNode, Map bNode) {
def ids = formBNodeIdToResourceIds[formBNode[BNODE_ID]]
return ids ? ids.contains(bNode[ID_KEY]) : true
}

private boolean typeMatches(Map formBNode, Map bNode) {
return (formBNode[TYPE_KEY] == ANY_TYPE || formBNode[TYPE_KEY] == bNode[TYPE_KEY])
|| (shouldMatchSubtypes(formBNode) && hasSameBaseType(formBNode, bNode))
}

private boolean hasSameBaseType(Map formBNode, Map bNode) {
([formBNode[TYPE_KEY]] + getSubtypes(formBNode)).contains(bNode[TYPE_KEY])
}

private Map getSparqlPreparedForm() {
Expand All @@ -160,13 +182,10 @@ class MatchForm {
if (node[TYPE_KEY] == ANY_TYPE) {
node.remove(TYPE_KEY)
}
if (asList(node.remove(MATCHING_MODE)).contains(SUBTYPES) && baseTypeToSubtypes.containsKey(node[TYPE_KEY])) {
def baseType = node.remove(TYPE_KEY)
node[HAS_BASE_TYPE_TMP] = baseType
}
if (formBNodeIdToResourceIds.containsKey(bNodeId)) {
node[ID_KEY] = bNodeId
}
node.remove(MATCHING_MODE)
return new DocumentUtil.Nop()
}
if (asList(node).isEmpty()) {
Expand All @@ -183,23 +202,29 @@ class MatchForm {
("<" + getRecordTmpId() + ">"): getVar(getRecordTmpId())
]

baseTypeToSubtypes.keySet().each { baseType ->
substitutions.put(":$HAS_BASE_TYPE_TMP \"$baseType\"".toString(), "a ?" + baseType)
}

formBNodeIdToResourceIds.keySet().each { _id ->
substitutions.put("<" + _id + ">", getVar(_id))
}

return ttl.replace(substitutions)
def sparqlPattern = ttl.replace(substitutions)

if (shouldMatchSubtypes() && getSubtypes()) {
def baseType = form[TYPE_KEY]
def thingVar = getVar(getThingTmpId())
return sparqlPattern.replace("$thingVar a :$baseType", "$thingVar a ?$baseType")
}

return sparqlPattern
}


private String insertTypeMappings(String sparqlPattern) {
def valuesClause = baseTypeToSubtypes.collect { baseType, subTypes ->
"VALUES ?$baseType { ${([baseType] + subTypes).collect { ":$it" }.join(" ")} }\n"
}.join()
return valuesClause + sparqlPattern
if (shouldMatchSubtypes() && getSubtypes()) {
def baseType = form[TYPE_KEY]
String valuesClause = "VALUES ?$baseType { ${([baseType] + getSubtypes()).collect { ":$it" }.join(" ")} }\n"
return valuesClause + sparqlPattern
}
return sparqlPattern
}

private String insertIdMappings(String sparqlPattern) {
Expand Down Expand Up @@ -293,20 +318,13 @@ class MatchForm {
}

DocumentUtil.traverse(form) { node, path ->
if (node instanceof Map && node.containsKey(MATCHING_MODE) && ((List) node[MATCHING_MODE]).contains(SUBTYPES)) {
if (node instanceof Map && shouldMatchSubtypes(node)) {
def baseType = (String) node[TYPE_KEY]
Set<String> subTypes = getSubtypes(baseType, jsonLd) as Set
if (!subTypes.isEmpty()) {
mappings[baseType] = subTypes
}
mappings[baseType] = jsonLd.getSubClasses(baseType)
return new DocumentUtil.Nop()
}
}

return mappings
}

private static Set<String> getSubtypes(String type, JsonLd jsonLd) {
return jsonLd.getSubClasses(type)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class MatchFormSpec extends Specification {

def "match data against form"() {
given:
def matchForm = new MatchForm()
def matchForm = new MatchForm([:])
matchForm.formBNodeIdToResourceIds = ["#1": ["https://libris.kb.se/x#it", "https://libris.kb.se/y#it"] as Set]
matchForm.baseTypeToSubtypes = ["T": ["Tx", "Ty"] as Set]

Expand Down

0 comments on commit f001a0d

Please sign in to comment.