From d3e27ee3657ed054a4d68e5570ceb6b7ecb9d97f Mon Sep 17 00:00:00 2001 From: Jannis Tsiroyannis Date: Thu, 11 May 2023 16:17:31 +0200 Subject: [PATCH 1/4] Experimentally add a technical note. The notes are added when a significant change (according to some set of rules) is made to a record. --- .../libris/SignificantChangeCalculator.groovy | 44 +++++++++++++++++++ .../component/PostgreSQLComponent.groovy | 14 ++++-- 2 files changed, 54 insertions(+), 4 deletions(-) create mode 100644 whelk-core/src/main/groovy/se/kb/libris/SignificantChangeCalculator.groovy diff --git a/whelk-core/src/main/groovy/se/kb/libris/SignificantChangeCalculator.groovy b/whelk-core/src/main/groovy/se/kb/libris/SignificantChangeCalculator.groovy new file mode 100644 index 0000000000..35c86758ee --- /dev/null +++ b/whelk-core/src/main/groovy/se/kb/libris/SignificantChangeCalculator.groovy @@ -0,0 +1,44 @@ +package se.kb.libris + +import whelk.Document +import whelk.JsonLd + +class SignificantChangeCalculator { + + /** + * Compares two versions of a document, and mutates postupdateDoc with added + * ChangeNotes where applicable. + */ + public static boolean markSignificantChanges(Document preUpdateDoc, Document postUpdateDoc, Date modTime, JsonLd jsonld) { + List markersToAdd = [] + + if (significallyChangedAgent(preUpdateDoc, postUpdateDoc, jsonld)) + markersToAdd.add("https://libris.kb.se/change/agent") + + // Add additional rules.. + + if (!postUpdateDoc.data["technicalNote"] || ! postUpdateDoc.data["technicalNote"] instanceof List) + postUpdateDoc.data["technicalNote"] = [] + for (String marker : markersToAdd) { + List techNotes = postUpdateDoc.data["technicalNote"] + techNotes.add( + [ + "@type": "ChangeNote", + "category": ["@id": marker], + "date": modTime.toInstant().toString() + ] + ) + } + } + + private static boolean significallyChangedAgent(Document preUpdateDoc, Document postUpdateDoc, JsonLd jsonld) { + if ( ! jsonld.isSubClassOf( preUpdateDoc.getThingType(), "Agent") || + ! jsonld.isSubClassOf( postUpdateDoc.getThingType(), "Agent")) + return false + + return preUpdateDoc.data["@graph"][1]["name"] != postUpdateDoc.data["@graph"][1]["name"] || + preUpdateDoc.data["@graph"][1]["givenName"] != postUpdateDoc.data["@graph"][1]["givenName"] || + preUpdateDoc.data["@graph"][1]["familyName"] != postUpdateDoc.data["@graph"][1]["familyName"] || + preUpdateDoc.data["@graph"][1]["lifeSpan"] != postUpdateDoc.data["@graph"][1]["lifeSpan"] + } +} diff --git a/whelk-core/src/main/groovy/whelk/component/PostgreSQLComponent.groovy b/whelk-core/src/main/groovy/whelk/component/PostgreSQLComponent.groovy index cba3868c74..8bff459609 100644 --- a/whelk-core/src/main/groovy/whelk/component/PostgreSQLComponent.groovy +++ b/whelk-core/src/main/groovy/whelk/component/PostgreSQLComponent.groovy @@ -13,6 +13,7 @@ import org.postgresql.PGNotification import org.postgresql.PGStatement import org.postgresql.util.PGobject import org.postgresql.util.PSQLException +import se.kb.libris.SignificantChangeCalculator import whelk.Document import whelk.IdType import whelk.JsonLd @@ -898,6 +899,14 @@ class PostgreSQLComponent { normalizeDocumentForStorage(doc, connection) + Date modTime = minorUpdate + ? new Date(resultSet.getTimestamp("modified").getTime()) + : new Date() + + // EXPERIMENTALLY: Create "interesting changes"-markers if certain significant parts of the record were changed. + SignificantChangeCalculator.markSignificantChanges(preUpdateDoc, doc, modTime, getJsonld()) + // + if (!writeIdenticalVersions && preUpdateDoc.getChecksum(jsonld).equals(doc.getChecksum(jsonld))) { throw new CancelUpdateException() } @@ -938,11 +947,8 @@ class PostgreSQLComponent { if (doVerifyDocumentIdRetention) { verifyDocumentIdRetention(preUpdateDoc, doc, connection) } - + Date createdTime = new Date(resultSet.getTimestamp("created").getTime()) - Date modTime = minorUpdate - ? new Date(resultSet.getTimestamp("modified").getTime()) - : new Date() doc.setModified(modTime) if (!minorUpdate) { From 2da2e71892c2c72c5dd593bfa5aba89666a3ed21 Mon Sep 17 00:00:00 2001 From: Jannis Tsiroyannis Date: Fri, 19 May 2023 14:26:24 +0200 Subject: [PATCH 2/4] Sort of working/limping implementation of a primary contribution marker implied by a change marker. --- .../libris/SignificantChangeCalculator.groovy | 22 +++-- .../src/main/groovy/whelk/JsonLd.groovy | 2 + .../main/groovy/whelk/JsonLdValidator.groovy | 5 + .../whelk/component/ElasticSearch.groovy | 99 ++++++++++++++++++- 4 files changed, 120 insertions(+), 8 deletions(-) diff --git a/whelk-core/src/main/groovy/se/kb/libris/SignificantChangeCalculator.groovy b/whelk-core/src/main/groovy/se/kb/libris/SignificantChangeCalculator.groovy index 35c86758ee..98ad777c25 100644 --- a/whelk-core/src/main/groovy/se/kb/libris/SignificantChangeCalculator.groovy +++ b/whelk-core/src/main/groovy/se/kb/libris/SignificantChangeCalculator.groovy @@ -9,19 +9,17 @@ class SignificantChangeCalculator { * Compares two versions of a document, and mutates postupdateDoc with added * ChangeNotes where applicable. */ - public static boolean markSignificantChanges(Document preUpdateDoc, Document postUpdateDoc, Date modTime, JsonLd jsonld) { + public static void markSignificantChanges(Document preUpdateDoc, Document postUpdateDoc, Date modTime, JsonLd jsonld) { List markersToAdd = [] - if (significallyChangedAgent(preUpdateDoc, postUpdateDoc, jsonld)) + if (significantlyChangedAgent(preUpdateDoc, postUpdateDoc, jsonld)) markersToAdd.add("https://libris.kb.se/change/agent") // Add additional rules.. - if (!postUpdateDoc.data["technicalNote"] || ! postUpdateDoc.data["technicalNote"] instanceof List) - postUpdateDoc.data["technicalNote"] = [] + List newTechNotes = [] for (String marker : markersToAdd) { - List techNotes = postUpdateDoc.data["technicalNote"] - techNotes.add( + newTechNotes.add( [ "@type": "ChangeNote", "category": ["@id": marker], @@ -29,9 +27,19 @@ class SignificantChangeCalculator { ] ) } + + if (newTechNotes) { + if (postUpdateDoc.data["@graph"][1]["technicalNote"] && postUpdateDoc.data["@graph"][1]["technicalNote"] instanceof List) { + Set notes = postUpdateDoc.data["@graph"][1]["technicalNote"] as Set + notes.addAll(newTechNotes) + postUpdateDoc.data["@graph"][1]["technicalNote"] = notes.toList() + } else { + postUpdateDoc.data["@graph"][1]["technicalNote"] = newTechNotes + } + } } - private static boolean significallyChangedAgent(Document preUpdateDoc, Document postUpdateDoc, JsonLd jsonld) { + private static boolean significantlyChangedAgent(Document preUpdateDoc, Document postUpdateDoc, JsonLd jsonld) { if ( ! jsonld.isSubClassOf( preUpdateDoc.getThingType(), "Agent") || ! jsonld.isSubClassOf( postUpdateDoc.getThingType(), "Agent")) return false diff --git a/whelk-core/src/main/groovy/whelk/JsonLd.groovy b/whelk-core/src/main/groovy/whelk/JsonLd.groovy index 90d5e550c1..3d25a9bc5c 100644 --- a/whelk-core/src/main/groovy/whelk/JsonLd.groovy +++ b/whelk-core/src/main/groovy/whelk/JsonLd.groovy @@ -1156,6 +1156,8 @@ class JsonLd { private static boolean shouldAlwaysKeep(String key) { return key == RECORD_KEY || key == THING_KEY || key == JSONLD_ALT_ID_KEY || key.startsWith("@") + // Temporary hack to make technical notes actually inherit/be embellishable + || key == "technicalNote" || key == "category" || key == "date" || key == "givenName" || key == "familyName" } diff --git a/whelk-core/src/main/groovy/whelk/JsonLdValidator.groovy b/whelk-core/src/main/groovy/whelk/JsonLdValidator.groovy index c2ca383c39..b46db08c44 100644 --- a/whelk-core/src/main/groovy/whelk/JsonLdValidator.groovy +++ b/whelk-core/src/main/groovy/whelk/JsonLdValidator.groovy @@ -200,6 +200,11 @@ class JsonLdValidator { } private void verifyVocabTerm(String key, value, validation) { + + // Temporary, until "ChangeNote" can be added to vocab + if (key == "@type" && value == "ChangeNote") + return + if ((key == jsonLd.TYPE_KEY || isVocabTerm(key)) && !jsonLd.vocabIndex.containsKey(value?.toString())) { handleError(new Error(Error.Type.UNKNOWN_VOCAB_VALUE, key, value), validation) diff --git a/whelk-core/src/main/groovy/whelk/component/ElasticSearch.groovy b/whelk-core/src/main/groovy/whelk/component/ElasticSearch.groovy index 3a796e0030..d8bacfd9ad 100644 --- a/whelk-core/src/main/groovy/whelk/component/ElasticSearch.groovy +++ b/whelk-core/src/main/groovy/whelk/component/ElasticSearch.groovy @@ -307,9 +307,104 @@ class ElasticSearch { } } + /** + * Types in 'full' are allowed to inherit types in 'end' and still be considered equal-ending. + * + * For example, full = ["instanceOf", "@type=NotatedMusic"] and end = ["instanceOf", "@type=Work"] is considered + * equal-ending, but if 'full' and 'end' switch places, they are not. + */ + boolean typedPathEndsWith(List full, List end, JsonLd jsonLd) { + if (end.size() > full.size()) + return false + List fullEnd = full.subList(full.size() - end.size(), full.size()) + + for (int i = 0; i < end.size(); ++i) { + if (fullEnd[i].startsWith("@type=") && end[i].startsWith("@type=")) { + String t1 = fullEnd[i].substring("@type=".length()) + String t2 = end[i].substring("@type=".length()) + if (! jsonLd.isSubClassOf(t1, t2) ) { + return false + } + } + else if (fullEnd[i] != end[i]) { + return false + } + } + return true + } + + Map getImpliedTechnicalNote(Map note, List typedPath, JsonLd jsonld) { + + //System.err.println("\ttesting "+ typedPath) + + // Are conditions met for an implied primary-contribution marker? + if ( typedPathEndsWith(typedPath, ["contribution", "@type=PrimaryContribution", "agent", "@type=Agent", "technicalNote", "@type=ChangeNote"], jsonld) ) { + System.err.println("\tFound an inherited note: " + note) + if (note["category"] && note["category"] instanceof Map && note["category"]["@id"] && note["category"]["@id"] instanceof String) { + if (note["category"]["@id"] == "https://libris.kb.se/change/agent") { + System.err.println("\t\tpc implied!") + return [ + "@type" : "ChangeNote", + "category": ["@id": "https://libris.kb.se/change/primarycontribution"], + "date" : note["date"] + ] + } + } + } + + // Default: Return the original note + return note + } + + Set collectTechnicalNotes(Object data, List typedPath, JsonLd jsonld) { + Set results = [] + if (data instanceof Map) { + + if ("ChangeNote" == data["@type"]) { + results.add( getImpliedTechnicalNote(data, typedPath, jsonld) ) + results.add( data ) + } + + data.keySet().each { + List nextPath = new ArrayList<>(typedPath) + nextPath.add(it) + if (data[it] instanceof Map && data[it]["@type"]) + nextPath.add("@type=" + data[it]["@type"]) + results.addAll(collectTechnicalNotes(data[it], nextPath, jsonld)) + } + } else if (data instanceof List) { + data.each { it -> + List nextPath = new ArrayList<>(typedPath) + if (it instanceof Map && it["@type"]) + nextPath.add("@type=" + it["@type"]) + results.addAll(collectTechnicalNotes(it, nextPath, jsonld)) + } + } + return results + } + + void compileTechnicalNotes(Map framed, JsonLd jsonld) { + + //System.err.println("\n\nWill now get tech notes on: " + mapper.writeValueAsString(framed) + "\n\n") + + Set compiledNotes = collectTechnicalNotes(framed, [], jsonld) + + System.err.println(" Final new implied notes for " + framed["@id"] + " : " + compiledNotes) + + if ( framed["technicalNote"] ) { + if ( framed["technicalNote"] instanceof List ) + compiledNotes.addAll(framed["technicalNote"]) + else + compiledNotes.add(framed["technicalNote"]) + } + framed["technicalNote"] = compiledNotes.toList() + } + String getShapeForIndex(Document document, Whelk whelk) { Document copy = document.clone() - + + System.err.println("********** Reindexing " + copy.getShortId()) + whelk.embellish(copy, ['search-chips']) if (log.isDebugEnabled()) { @@ -339,6 +434,8 @@ class ElasticSearch { REMOVABLE_BASE_URIS, document.getThingInScheme() ? ['tokens', 'chips'] : ['chips']) + compileTechnicalNotes(framed, whelk.getJsonld()) + DocumentUtil.traverse(framed) { value, path -> if (path && JsonLd.SEARCH_KEY == path.last() && !Unicode.isNormalizedForSearch(value)) { // TODO: replace with elastic ICU Analysis plugin? From da683a0ad2fa6ba6403a556c9a7e981f1c2bc8ce Mon Sep 17 00:00:00 2001 From: Jannis Tsiroyannis Date: Fri, 19 May 2023 14:42:32 +0200 Subject: [PATCH 3/4] Cleaning up. --- .../libris/SignificantChangeCalculator.groovy | 48 +++++++++++++++ .../whelk/component/ElasticSearch.groovy | 58 +------------------ 2 files changed, 50 insertions(+), 56 deletions(-) diff --git a/whelk-core/src/main/groovy/se/kb/libris/SignificantChangeCalculator.groovy b/whelk-core/src/main/groovy/se/kb/libris/SignificantChangeCalculator.groovy index 98ad777c25..3ae1578edd 100644 --- a/whelk-core/src/main/groovy/se/kb/libris/SignificantChangeCalculator.groovy +++ b/whelk-core/src/main/groovy/se/kb/libris/SignificantChangeCalculator.groovy @@ -5,6 +5,28 @@ import whelk.JsonLd class SignificantChangeCalculator { + public static Map getImpliedTechnicalNote(Map note, List typedPath, JsonLd jsonld) { + + // Are conditions met for an implied primary-contribution marker? + // In other words: Is this (change-) 'note' a '/change/agent' placed on a primaryContribution (or derivative) path? + if ( typedPathEndsWith(typedPath, ["contribution", "@type=PrimaryContribution", "agent", "@type=Agent", "technicalNote", "@type=ChangeNote"], jsonld) ) { + if (note["category"] && note["category"] instanceof Map && note["category"]["@id"] && note["category"]["@id"] instanceof String) { + if (note["category"]["@id"] == "https://libris.kb.se/change/agent") { + return [ + "@type" : "ChangeNote", + "category": ["@id": "https://libris.kb.se/change/primarycontribution"], + "date" : note["date"] + ] + } + } + } + + // Check for additional implied markers.. + + // Default: Return the original note + return note + } + /** * Compares two versions of a document, and mutates postupdateDoc with added * ChangeNotes where applicable. @@ -39,6 +61,32 @@ class SignificantChangeCalculator { } } + /** + * Types in 'full' are allowed to inherit types in 'end' and still be considered equal-ending. + * + * For example, full = ["instanceOf", "@type=NotatedMusic"] and end = ["instanceOf", "@type=Work"] is considered + * equal-ending, but if 'full' and 'end' switch places, they are not. + */ + private static boolean typedPathEndsWith(List full, List end, JsonLd jsonLd) { + if (end.size() > full.size()) + return false + List fullEnd = full.subList(full.size() - end.size(), full.size()) + + for (int i = 0; i < end.size(); ++i) { + if (fullEnd[i].startsWith("@type=") && end[i].startsWith("@type=")) { + String t1 = fullEnd[i].substring("@type=".length()) + String t2 = end[i].substring("@type=".length()) + if (! jsonLd.isSubClassOf(t1, t2) ) { + return false + } + } + else if (fullEnd[i] != end[i]) { + return false + } + } + return true + } + private static boolean significantlyChangedAgent(Document preUpdateDoc, Document postUpdateDoc, JsonLd jsonld) { if ( ! jsonld.isSubClassOf( preUpdateDoc.getThingType(), "Agent") || ! jsonld.isSubClassOf( postUpdateDoc.getThingType(), "Agent")) diff --git a/whelk-core/src/main/groovy/whelk/component/ElasticSearch.groovy b/whelk-core/src/main/groovy/whelk/component/ElasticSearch.groovy index d8bacfd9ad..8e60ea3b37 100644 --- a/whelk-core/src/main/groovy/whelk/component/ElasticSearch.groovy +++ b/whelk-core/src/main/groovy/whelk/component/ElasticSearch.groovy @@ -307,61 +307,12 @@ class ElasticSearch { } } - /** - * Types in 'full' are allowed to inherit types in 'end' and still be considered equal-ending. - * - * For example, full = ["instanceOf", "@type=NotatedMusic"] and end = ["instanceOf", "@type=Work"] is considered - * equal-ending, but if 'full' and 'end' switch places, they are not. - */ - boolean typedPathEndsWith(List full, List end, JsonLd jsonLd) { - if (end.size() > full.size()) - return false - List fullEnd = full.subList(full.size() - end.size(), full.size()) - - for (int i = 0; i < end.size(); ++i) { - if (fullEnd[i].startsWith("@type=") && end[i].startsWith("@type=")) { - String t1 = fullEnd[i].substring("@type=".length()) - String t2 = end[i].substring("@type=".length()) - if (! jsonLd.isSubClassOf(t1, t2) ) { - return false - } - } - else if (fullEnd[i] != end[i]) { - return false - } - } - return true - } - - Map getImpliedTechnicalNote(Map note, List typedPath, JsonLd jsonld) { - - //System.err.println("\ttesting "+ typedPath) - - // Are conditions met for an implied primary-contribution marker? - if ( typedPathEndsWith(typedPath, ["contribution", "@type=PrimaryContribution", "agent", "@type=Agent", "technicalNote", "@type=ChangeNote"], jsonld) ) { - System.err.println("\tFound an inherited note: " + note) - if (note["category"] && note["category"] instanceof Map && note["category"]["@id"] && note["category"]["@id"] instanceof String) { - if (note["category"]["@id"] == "https://libris.kb.se/change/agent") { - System.err.println("\t\tpc implied!") - return [ - "@type" : "ChangeNote", - "category": ["@id": "https://libris.kb.se/change/primarycontribution"], - "date" : note["date"] - ] - } - } - } - - // Default: Return the original note - return note - } - Set collectTechnicalNotes(Object data, List typedPath, JsonLd jsonld) { Set results = [] if (data instanceof Map) { if ("ChangeNote" == data["@type"]) { - results.add( getImpliedTechnicalNote(data, typedPath, jsonld) ) + results.add( se.kb.libris.SignificantChangeCalculator.getImpliedTechnicalNote(data, typedPath, jsonld) ) results.add( data ) } @@ -384,12 +335,9 @@ class ElasticSearch { } void compileTechnicalNotes(Map framed, JsonLd jsonld) { - - //System.err.println("\n\nWill now get tech notes on: " + mapper.writeValueAsString(framed) + "\n\n") - Set compiledNotes = collectTechnicalNotes(framed, [], jsonld) - System.err.println(" Final new implied notes for " + framed["@id"] + " : " + compiledNotes) + System.err.println("Final technical notes for " + framed["@id"] + " : " + compiledNotes) if ( framed["technicalNote"] ) { if ( framed["technicalNote"] instanceof List ) @@ -403,8 +351,6 @@ class ElasticSearch { String getShapeForIndex(Document document, Whelk whelk) { Document copy = document.clone() - System.err.println("********** Reindexing " + copy.getShortId()) - whelk.embellish(copy, ['search-chips']) if (log.isDebugEnabled()) { From e9b28d99dca7ef9f70b296989dda5d91eb3060f6 Mon Sep 17 00:00:00 2001 From: Jannis Tsiroyannis Date: Fri, 19 May 2023 14:48:17 +0200 Subject: [PATCH 4/4] Cleaning up. --- .../src/main/groovy/whelk/component/PostgreSQLComponent.groovy | 2 -- 1 file changed, 2 deletions(-) diff --git a/whelk-core/src/main/groovy/whelk/component/PostgreSQLComponent.groovy b/whelk-core/src/main/groovy/whelk/component/PostgreSQLComponent.groovy index 8bff459609..1ab5d2f2e3 100644 --- a/whelk-core/src/main/groovy/whelk/component/PostgreSQLComponent.groovy +++ b/whelk-core/src/main/groovy/whelk/component/PostgreSQLComponent.groovy @@ -903,9 +903,7 @@ class PostgreSQLComponent { ? new Date(resultSet.getTimestamp("modified").getTime()) : new Date() - // EXPERIMENTALLY: Create "interesting changes"-markers if certain significant parts of the record were changed. SignificantChangeCalculator.markSignificantChanges(preUpdateDoc, doc, modTime, getJsonld()) - // if (!writeIdenticalVersions && preUpdateDoc.getChecksum(jsonld).equals(doc.getChecksum(jsonld))) { throw new CancelUpdateException()