Skip to content

Commit

Permalink
Load all documents in one (1) query in whelk.bulkLoad
Browse files Browse the repository at this point in the history
  • Loading branch information
olovy committed Jun 8, 2022
1 parent e32f0ba commit b5e8477
Show file tree
Hide file tree
Showing 3 changed files with 54 additions and 12 deletions.
4 changes: 4 additions & 0 deletions whelk-core/src/main/groovy/whelk/JsonLd.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,10 @@ class JsonLd {
return (o instanceof List) ? (List) o : o != null ? [o] : []
}

static boolean looksLikeIri(String s) {
s && (s.startsWith('https://') || s.startsWith('http://'))
}

static List<List<String>> findPaths(Map obj, String key, String value) {
return findPaths(obj, key, [value].toSet())
}
Expand Down
34 changes: 22 additions & 12 deletions whelk-core/src/main/groovy/whelk/Whelk.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -164,23 +164,33 @@ class Whelk {
}

Map<String, Document> bulkLoad(Collection<String> ids) {
Map<String, Document> result = [:]
def idMap = [:]
def otherIris = []
List<String> systemIds = []
ids.each { id ->
Document doc

// Fetch from DB
if (id.startsWith(Document.BASE_URI.toString())) {
id = Document.BASE_URI.resolve(id).getPath().substring(1)
def systemId = Document.BASE_URI.resolve(id).getPath().substring(1)
idMap[systemId] = id
systemIds << systemId
}
doc = storage.load(id)
if (doc == null)
doc = storage.getDocumentByIri(id)

if (doc && !doc.deleted) {
result[id] = doc
else if (JsonLd.looksLikeIri(id)) {
otherIris << id
}
else {
systemIds << id
}
}
return result
if (otherIris) {
Map<String, String> idToIri = storage.getSystemIdsByIris(otherIris)
.collectEntries { k, v -> [(v): k] }

systemIds.addAll(idToIri.keySet())
idMap.putAll(idToIri)
}

return storage.bulkLoad(systemIds)
.findAll { id, doc -> !doc.deleted }
.collectEntries { id, doc -> [(idMap.getOrDefault(id, id)) : doc]}
}

private void reindex(Document updated, Document preUpdateDoc) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import com.zaxxer.hikari.metrics.prometheus.PrometheusHistogramMetricsTrackerFac
import groovy.json.StringEscapeUtils
import groovy.transform.CompileStatic
import groovy.util.logging.Log4j2 as Log
import org.apache.jena.atlas.iterator.Iter
import org.postgresql.PGStatement
import org.postgresql.util.PGobject
import org.postgresql.util.PSQLException
Expand Down Expand Up @@ -118,6 +119,12 @@ class PostgreSQLComponent {
private static final String GET_DOCUMENT_VERSION =
"SELECT id, data FROM lddb__versions WHERE id = ? AND checksum = ?"

private static final String BULK_LOAD_DOCUMENTS = """
SELECT id, data, created, modified, deleted
FROM unnest(?) AS in_id, lddb l
WHERE in_id = l.id
""".stripIndent()

private static final String GET_EMBELLISHED_DOCUMENT =
"SELECT data from lddb__embellished where id = ?"

Expand Down Expand Up @@ -1827,6 +1834,27 @@ class PostgreSQLComponent {
}
return doc
}

Map<String, Document> bulkLoad(Iterable<String> systemIds) {
return withDbConnection {
Connection connection = getMyConnection()
PreparedStatement preparedStatement = null
ResultSet rs = null
try {
preparedStatement = connection.prepareStatement(BULK_LOAD_DOCUMENTS)
preparedStatement.setArray(1, connection.createArrayOf("TEXT", systemIds as String[]))

rs = preparedStatement.executeQuery()
SortedMap<String, Document> result = new TreeMap<>()
while(rs.next()) {
result[rs.getString("id")] = assembleDocument(rs)
}
return result
} finally {
close(rs, preparedStatement)
}
}
}

String getSystemIdByIri(String iri) {
return withDbConnection {
Expand Down

0 comments on commit b5e8477

Please sign in to comment.