diff --git a/whelk-core/src/main/groovy/whelk/JsonLd.groovy b/whelk-core/src/main/groovy/whelk/JsonLd.groovy index 43149a8350..9b81a2c6f3 100644 --- a/whelk-core/src/main/groovy/whelk/JsonLd.groovy +++ b/whelk-core/src/main/groovy/whelk/JsonLd.groovy @@ -511,6 +511,10 @@ class JsonLd { return (o instanceof List) ? (List) o : o != null ? [o] : [] } + static boolean looksLikeIri(String s) { + s && (s.startsWith('https://') || s.startsWith('http://')) + } + static List> findPaths(Map obj, String key, String value) { return findPaths(obj, key, [value].toSet()) } diff --git a/whelk-core/src/main/groovy/whelk/Whelk.groovy b/whelk-core/src/main/groovy/whelk/Whelk.groovy index 9f5793b66f..0d59f543a2 100644 --- a/whelk-core/src/main/groovy/whelk/Whelk.groovy +++ b/whelk-core/src/main/groovy/whelk/Whelk.groovy @@ -164,23 +164,33 @@ class Whelk { } Map bulkLoad(Collection ids) { - Map result = [:] + def idMap = [:] + def otherIris = [] + List systemIds = [] ids.each { id -> - Document doc - - // Fetch from DB if (id.startsWith(Document.BASE_URI.toString())) { - id = Document.BASE_URI.resolve(id).getPath().substring(1) + def systemId = Document.BASE_URI.resolve(id).getPath().substring(1) + idMap[systemId] = id + systemIds << systemId } - doc = storage.load(id) - if (doc == null) - doc = storage.getDocumentByIri(id) - - if (doc && !doc.deleted) { - result[id] = doc + else if (JsonLd.looksLikeIri(id)) { + otherIris << id + } + else { + systemIds << id } } - return result + if (otherIris) { + Map idToIri = storage.getSystemIdsByIris(otherIris) + .collectEntries { k, v -> [(v): k] } + + systemIds.addAll(idToIri.keySet()) + idMap.putAll(idToIri) + } + + return storage.bulkLoad(systemIds) + .findAll { id, doc -> !doc.deleted } + .collectEntries { id, doc -> [(idMap.getOrDefault(id, id)) : doc]} } private void reindex(Document updated, Document preUpdateDoc) { diff --git a/whelk-core/src/main/groovy/whelk/component/PostgreSQLComponent.groovy b/whelk-core/src/main/groovy/whelk/component/PostgreSQLComponent.groovy index 21ed3b1e16..9bb165c9ad 100644 --- a/whelk-core/src/main/groovy/whelk/component/PostgreSQLComponent.groovy +++ b/whelk-core/src/main/groovy/whelk/component/PostgreSQLComponent.groovy @@ -7,6 +7,7 @@ import com.zaxxer.hikari.metrics.prometheus.PrometheusHistogramMetricsTrackerFac import groovy.json.StringEscapeUtils import groovy.transform.CompileStatic import groovy.util.logging.Log4j2 as Log +import org.apache.jena.atlas.iterator.Iter import org.postgresql.PGStatement import org.postgresql.util.PGobject import org.postgresql.util.PSQLException @@ -118,6 +119,12 @@ class PostgreSQLComponent { private static final String GET_DOCUMENT_VERSION = "SELECT id, data FROM lddb__versions WHERE id = ? AND checksum = ?" + private static final String BULK_LOAD_DOCUMENTS = """ + SELECT id, data, created, modified, deleted + FROM unnest(?) AS in_id, lddb l + WHERE in_id = l.id + """.stripIndent() + private static final String GET_EMBELLISHED_DOCUMENT = "SELECT data from lddb__embellished where id = ?" @@ -1827,6 +1834,27 @@ class PostgreSQLComponent { } return doc } + + Map bulkLoad(Iterable systemIds) { + return withDbConnection { + Connection connection = getMyConnection() + PreparedStatement preparedStatement = null + ResultSet rs = null + try { + preparedStatement = connection.prepareStatement(BULK_LOAD_DOCUMENTS) + preparedStatement.setArray(1, connection.createArrayOf("TEXT", systemIds as String[])) + + rs = preparedStatement.executeQuery() + SortedMap result = new TreeMap<>() + while(rs.next()) { + result[rs.getString("id")] = assembleDocument(rs) + } + return result + } finally { + close(rs, preparedStatement) + } + } + } String getSystemIdByIri(String iri) { return withDbConnection {