Skip to content

Commit

Permalink
Support replacing Subdivision with GenreForm
Browse files Browse the repository at this point in the history
  • Loading branch information
kwahlin committed Nov 22, 2024
1 parent abeaba7 commit 86d697b
Show file tree
Hide file tree
Showing 3 changed files with 39 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -66,11 +66,10 @@ public String key() {
public static final String TARGET_FORM_KEY = "bulk:targetForm";
public static final String COMMENT_KEY = "comment";
public static final String LABEL_KEY = "label";
public static final String ADD_KEY = "bulk:add";
public static final String KEEP_KEY = "bulk:keep";
public static final String DEPRECATE_KEY = "bulk:deprecate";
public static final String REMOVE_SUBDIVISION_KEY = "bulk:removeSubdivision";
public static final String ADD_SUBJECT_KEY = "bulk:addSubject";
public static final String ADD_TERM_KEY = "bulk:addTerm";
public static final String SCRIPT_KEY = "bulk:script";
public static final String EXECUTION_KEY = "bulk:execution";
public static final String EXECUTION_TYPE = "bulk:Execution";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,8 @@

import com.google.common.collect.Maps;
import org.apache.commons.io.IOUtils;
import whelk.Document;
import whelk.Whelk;
import whelk.datatool.Script;
import whelk.datatool.form.ModifiedThing;
import whelk.datatool.form.Transform;

import java.io.IOException;
Expand All @@ -15,9 +13,7 @@
import java.util.List;
import java.util.Map;

import static whelk.JsonLd.GRAPH_KEY;
import static whelk.JsonLd.RECORD_KEY;
import static whelk.datatool.bulkchange.BulkJobDocument.ADD_SUBJECT_KEY;
import static whelk.datatool.bulkchange.BulkJobDocument.ADD_TERM_KEY;
import static whelk.datatool.bulkchange.BulkJobDocument.KEEP_KEY;
import static whelk.datatool.bulkchange.BulkJobDocument.MATCH_FORM_KEY;
import static whelk.datatool.bulkchange.BulkJobDocument.DEPRECATE_KEY;
Expand Down Expand Up @@ -80,7 +76,7 @@ public Script getScript(String bulkJobId) {

record Other(String name, Map<String, ?> parameters) implements Specification {
private static final Map<String, List<String>> ALLOWED_SCRIPTS_PARAMS = Map.of(
"removeSubdivision", List.of(REMOVE_SUBDIVISION_KEY, ADD_SUBJECT_KEY)
"removeSubdivision", List.of(REMOVE_SUBDIVISION_KEY, ADD_TERM_KEY)
);

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,43 +8,55 @@
*/


import whelk.JsonLd
import whelk.Whelk
import whelk.datatool.DocumentItem
import whelk.util.DocumentUtil

import static whelk.JsonLd.ID_KEY
import static whelk.JsonLd.TYPE_KEY
import static whelk.JsonLd.asList
import static whelk.converter.JsonLDTurtleConverter.toTurtleNoPrelude
import static whelk.datatool.bulkchange.BulkJobDocument.ADD_SUBJECT_KEY
import static whelk.datatool.bulkchange.BulkJobDocument.ADD_TERM_KEY
import static whelk.datatool.bulkchange.BulkJobDocument.REMOVE_SUBDIVISION_KEY

Whelk whelk = getWhelk()

Map inScheme
List<Map> removeSubdivision = asList(parameters.get(REMOVE_SUBDIVISION_KEY)).collect {
Map copy = new HashMap((Map) it)
inScheme = copy.remove('inScheme')
inScheme = (Map) copy.remove('inScheme')
return copy
}
Map addSubject = parameters.get(ADD_SUBJECT_KEY)
Map addTerm = parameters.get(ADD_TERM_KEY)
String addTermType = addTerm ? getType(addTerm) : null

def process = { doc ->
def process = { DocumentItem doc ->
Map thing = doc.graph[1] as Map

if (thing[JsonLd.TYPE_KEY] == 'ComplexSubject') {
if (thing[TYPE_KEY] == 'ComplexSubject') {
return
}

Set<List> modifiedListPaths = [] as Set
def modified = DocumentUtil.traverse(thing) { value, path ->
if (value instanceof Map && value[JsonLd.TYPE_KEY] == 'ComplexSubject') {
if (value instanceof Map && value[TYPE_KEY] == 'ComplexSubject') {
var t = asList(value.get('termComponentList'))
if ((!inScheme || inScheme == value['inScheme']) && t.containsAll(removeSubdivision)) {
var parentPath = path.size() > 1 ? path.dropRight(1) : null
if (parentPath) {
var parent = DocumentUtil.getAtPath(thing, parentPath)
if (parent instanceof List) {
modifiedListPaths.add(parentPath)
if (addSubject) {
parent.add(addSubject)
if (whelk.jsonld.isSubClassOf(addTermType, 'Subject')) {
parent.add(addTerm)
} else if (whelk.jsonld.isSubClassOf(addTermType, 'GenreForm')) {
var grandParent = DocumentUtil.getAtPath(thing, parentPath.dropRight(1))
if (grandParent instanceof Map) {
def genreForm = asList(grandParent['genreForm'])
if (!genreForm.contains(addTerm)) {
genreForm.add(addTerm)
}
grandParent['genreForm'] = genreForm
}
}
}
}
Expand Down Expand Up @@ -81,7 +93,6 @@ linked.each { l ->
}
}
if (!blank.isEmpty()) {
Whelk whelk = getWhelk()
/*
Querying records containing the given combination of blank subdivisions is very slow so we have to run a separate
query for each subdivision. However the maximum number of results from a Sparql query is 100k so if we just take the
Expand Down Expand Up @@ -109,7 +120,7 @@ static DocumentUtil.Operation mapSubject(Map complexSubject, termComponentList,
}
if (t2.size() == 1) {
def remaining = t2.first()
if (complexSubject['inScheme'] && !remaining[ID_KEY]) {
if (complexSubject['inScheme'] && !remaining['inScheme'] && !remaining[ID_KEY]) {
remaining['inScheme'] = complexSubject['inScheme']
}
return new DocumentUtil.Replace(remaining)
Expand All @@ -118,4 +129,16 @@ static DocumentUtil.Operation mapSubject(Map complexSubject, termComponentList,
Map result = new HashMap(complexSubject)
result.termComponentList = t2
return new DocumentUtil.Replace(result)
}
}

String getType(Map term) {
if (term[ID_KEY]) {
String type
selectByIds([term[ID_KEY]]) {
type = it.doc.getThingType()
}
return type
}
return term[TYPE_KEY]
}

0 comments on commit 86d697b

Please sign in to comment.