Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor MatchForm (reduce complexity) #1527

Merged
merged 2 commits into from
Nov 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
168 changes: 93 additions & 75 deletions whelktool/src/main/groovy/whelk/datatool/form/MatchForm.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ class MatchForm {
private static final String VALUE = 'value'
private static final String VALUE_FROM = 'bulk:valueFrom'
private static final String ANY_OF = 'bulk:AnyOf'
private static final String HAS_BASE_TYPE_TMP = '_hasBaseTypeTmp'

Map form

Expand All @@ -52,16 +51,12 @@ class MatchForm {

// For testing only
@PackageScope
MatchForm(Map matchForm) {
this(matchForm, null)
MatchForm(Map form) {
this(form, null)
}

// For testing only
@PackageScope
MatchForm() {}

boolean matches(Object node) {
return matches(form, node)
boolean matches(Map thing) {
return matches(form, thing)
}

boolean matches(Object matchForm, Object node) {
Expand All @@ -85,68 +80,95 @@ class MatchForm {
return path.findAll { it instanceof String } as List<String>
}

private boolean mapMatches(Map matchForm, Map bNode) {
if (matchForm == null || bNode == null) {
private getSubtypes() {
return getSubtypes(form)
}

private Set<String> getSubtypes(Map formBNode) {
return baseTypeToSubtypes[formBNode[TYPE_KEY]]
}

private boolean shouldMatchSubtypes() {
return shouldMatchSubtypes(form)
}

private static boolean shouldMatchSubtypes(Map formBNode) {
return asList(formBNode[MATCHING_MODE]).contains(SUBTYPES)
}

private static boolean shouldMatchExact(Map formBNode) {
return asList(formBNode[MATCHING_MODE]).contains(EXACT)
}

private boolean mapMatches(Map formBNode, Map bNode) {
if (formBNode == null || bNode == null) {
return false
}
matchForm = new LinkedHashMap(matchForm)
def match = asList(matchForm[MATCHING_MODE])
if (match.contains(EXACT)) {
return exactMatches(matchForm, bNode)
}
if (match.contains(SUBTYPES)) {
String aType = matchForm[TYPE_KEY]
String bType = bNode[TYPE_KEY]
if (!(baseTypeToSubtypes[aType] + aType).contains(bType)) {
return false
} else {
matchForm.remove(TYPE_KEY)
}

formBNode = new LinkedHashMap(formBNode)

if (shouldMatchExact(formBNode)) {
return exactMatches(formBNode, bNode)
}
matchForm.remove(MATCHING_MODE)
if (matchForm[TYPE_KEY] == ANY_TYPE) {
matchForm.remove(TYPE_KEY)
if (formBNode[TYPE_KEY] && !typeMatches(formBNode, bNode)) {
return false
}
def ids = formBNodeIdToResourceIds[matchForm.remove(BNODE_ID)]
if (ids && !ids.contains(bNode[ID_KEY])) {
formBNode.remove(TYPE_KEY)
formBNode.remove(MATCHING_MODE)

if (!idMatches(formBNode, bNode)) {
return false
}
matchForm.remove(HAS_ID)
if (matchForm.size() > bNode.size()) {
formBNode.remove(HAS_ID)
formBNode.remove(BNODE_ID)

if (formBNode.size() > bNode.size()) {
return false
}
return comparator.isSubset(matchForm, bNode, this::mapMatches)

return comparator.isSubset(formBNode, bNode, this::mapMatches)
}

private boolean exactMatches(Map matchForm, Map bNode) {
if (matchForm == null || bNode == null) {
private boolean exactMatches(Map formBNode, Map bNode) {
if (formBNode == null || bNode == null) {
return false
}
matchForm = new HashMap(matchForm)

formBNode = new HashMap(formBNode)
bNode = new HashMap(bNode)
if (asList(matchForm.remove(MATCHING_MODE)).contains(SUBTYPES)) {
String aType = matchForm[TYPE_KEY]
String bType = bNode[TYPE_KEY]
if ((baseTypeToSubtypes[aType] + aType).contains(bType)) {
matchForm.remove(TYPE_KEY)
bNode.remove(TYPE_KEY)
} else {
return false
}
}
if (matchForm[TYPE_KEY] == ANY_TYPE) {
matchForm.remove(TYPE_KEY)
bNode.remove(TYPE_KEY)

if (!typeMatches(formBNode, bNode)) {
return false
}
def ids = formBNodeIdToResourceIds[matchForm.remove(BNODE_ID)]
if (ids && !ids.contains(bNode[ID_KEY])) {
formBNode.remove(TYPE_KEY)
bNode.remove(TYPE_KEY)
formBNode.remove(MATCHING_MODE)

if (!idMatches(formBNode, bNode)) {
return false
}
matchForm.remove(HAS_ID)
if (matchForm.size() != bNode.size()) {
formBNode.remove(HAS_ID)
formBNode.remove(BNODE_ID)

if (formBNode.size() != bNode.size()) {
return false
}
return comparator.isEqual(matchForm, bNode, this::exactMatches)

return comparator.isEqual(formBNode, bNode, this::exactMatches)
}

private boolean idMatches(Map formBNode, Map bNode) {
def ids = formBNodeIdToResourceIds[formBNode[BNODE_ID]]
return ids ? ids.contains(bNode[ID_KEY]) : true
}

private boolean typeMatches(Map formBNode, Map bNode) {
return (formBNode[TYPE_KEY] == ANY_TYPE || formBNode[TYPE_KEY] == bNode[TYPE_KEY])
|| (shouldMatchSubtypes(formBNode) && hasSameBaseType(formBNode, bNode))
}

private boolean hasSameBaseType(Map formBNode, Map bNode) {
([formBNode[TYPE_KEY]] + getSubtypes(formBNode)).contains(bNode[TYPE_KEY])
}

private Map getSparqlPreparedForm() {
Expand All @@ -160,13 +182,10 @@ class MatchForm {
if (node[TYPE_KEY] == ANY_TYPE) {
node.remove(TYPE_KEY)
}
if (asList(node.remove(MATCHING_MODE)).contains(SUBTYPES) && baseTypeToSubtypes.containsKey(node[TYPE_KEY])) {
def baseType = node.remove(TYPE_KEY)
node[HAS_BASE_TYPE_TMP] = baseType
}
if (formBNodeIdToResourceIds.containsKey(bNodeId)) {
node[ID_KEY] = bNodeId
}
node.remove(MATCHING_MODE)
return new DocumentUtil.Nop()
}
if (asList(node).isEmpty()) {
Expand All @@ -183,23 +202,29 @@ class MatchForm {
("<" + getRecordTmpId() + ">"): getVar(getRecordTmpId())
]

baseTypeToSubtypes.keySet().each { baseType ->
substitutions.put(":$HAS_BASE_TYPE_TMP \"$baseType\"".toString(), "a ?" + baseType)
}

formBNodeIdToResourceIds.keySet().each { _id ->
substitutions.put("<" + _id + ">", getVar(_id))
}

return ttl.replace(substitutions)
def sparqlPattern = ttl.replace(substitutions)

if (shouldMatchSubtypes() && getSubtypes()) {
def baseType = form[TYPE_KEY]
def thingVar = getVar(getThingTmpId())
return sparqlPattern.replace("$thingVar a :$baseType", "$thingVar a ?$baseType")
}

return sparqlPattern
}


private String insertTypeMappings(String sparqlPattern) {
def valuesClause = baseTypeToSubtypes.collect { baseType, subTypes ->
"VALUES ?$baseType { ${([baseType] + subTypes).collect { ":$it" }.join(" ")} }\n"
}.join()
return valuesClause + sparqlPattern
if (shouldMatchSubtypes() && getSubtypes()) {
def baseType = form[TYPE_KEY]
String valuesClause = "VALUES ?$baseType { ${([baseType] + getSubtypes()).collect { ":$it" }.join(" ")} }\n"
return valuesClause + sparqlPattern
}
return sparqlPattern
}

private String insertIdMappings(String sparqlPattern) {
Expand Down Expand Up @@ -293,20 +318,13 @@ class MatchForm {
}

DocumentUtil.traverse(form) { node, path ->
if (node instanceof Map && node.containsKey(MATCHING_MODE) && ((List) node[MATCHING_MODE]).contains(SUBTYPES)) {
if (node instanceof Map && shouldMatchSubtypes(node)) {
def baseType = (String) node[TYPE_KEY]
Set<String> subTypes = getSubtypes(baseType, jsonLd) as Set
if (!subTypes.isEmpty()) {
mappings[baseType] = subTypes
}
mappings[baseType] = jsonLd.getSubClasses(baseType)
return new DocumentUtil.Nop()
}
}

return mappings
}

private static Set<String> getSubtypes(String type, JsonLd jsonLd) {
return jsonLd.getSubClasses(type)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ class MatchFormSpec extends Specification {

def "match data against form"() {
given:
def matchForm = new MatchForm()
def matchForm = new MatchForm([:])
matchForm.formBNodeIdToResourceIds = ["#1": ["https://libris.kb.se/x#it", "https://libris.kb.se/y#it"] as Set]
matchForm.baseTypeToSubtypes = ["T": ["Tx", "Ty"] as Set]

Expand Down
Loading