Skip to content

Commit

Permalink
Add solr field with number of times a field is used in a record #342:…
Browse files Browse the repository at this point in the history
… add subfield count
  • Loading branch information
pkiraly committed Dec 19, 2024
1 parent 9e228f0 commit 24ba0e6
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 2 deletions.
18 changes: 17 additions & 1 deletion src/main/java/de/gwdg/metadataqa/marc/cli/MarcToSolr.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import java.io.Serializable;
import java.nio.file.Path;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -207,6 +208,7 @@ private void indexFieldCounts(BibliographicRecord bibliographicRecord,
SolrInputDocument document) {
Counter<String> counter = new Counter<>();
boolean isPica = bibliographicRecord.getSchemaType().equals(SchemaType.PICA);
Map<String, List<Integer>> subfields = new HashMap<>();
for (DataField field : bibliographicRecord.getDatafields()) {
String tag;
if (field.getDefinition() != null) {
Expand All @@ -221,9 +223,15 @@ private void indexFieldCounts(BibliographicRecord bibliographicRecord,
counter.count(safeTag);

if (parameters.isIndexSubfieldCounts()) {
Counter<String> subfieldCounter = new Counter<>();
for (MarcSubfield subfield : field.getSubfields()) {
String safeSubfieldCode = DataFieldKeyGenerator.escape(subfield.getCode());
counter.count(safeTag + safeSubfieldCode);
subfieldCounter.count(safeTag + safeSubfieldCode);
}
for (Map.Entry<String, Integer> entry : subfieldCounter.entrySet()) {
if (!subfields.containsKey(entry.getKey()))
subfields.put(entry.getKey(), new ArrayList<>());
subfields.get(entry.getKey()).add(entry.getValue());
}
}
}
Expand All @@ -232,6 +240,14 @@ private void indexFieldCounts(BibliographicRecord bibliographicRecord,
"%s%s_count_i",
parameters.getFieldPrefix(), entry.getKey()), entry.getValue());
}

if (parameters.isIndexSubfieldCounts()) {
for (Map.Entry<String, List<Integer>> entry : subfields.entrySet()) {
document.addField(String.format(
"%s%s_count_is",
parameters.getFieldPrefix(), entry.getKey()), entry.getValue());
}
}
}

private String escape(String tag) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ public boolean isIndexSubfieldCounts() {
}

public String getFieldPrefix() {
return fieldPrefix;
return fieldPrefix != null ? fieldPrefix : "";
}

public void setFieldPrefix(String fieldPrefix) {
Expand Down

0 comments on commit 24ba0e6

Please sign in to comment.