Skip to content

Commit

Permalink
Add solr field with number of times a field is used in a record #342
Browse files Browse the repository at this point in the history
  • Loading branch information
pkiraly committed Nov 2, 2023
1 parent 3add92c commit 0dc4c92
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 8 deletions.
2 changes: 1 addition & 1 deletion catalogues/k10plus_pica_grouped.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ TYPE_PARAMS="$TYPE_PARAMS --ignorableFields 001@,001E,001L,001U,001U,001X,001X,0
TYPE_PARAMS="$TYPE_PARAMS --allowableRecords base64:"$(echo '[email protected] !~ "^L" && [email protected] !~ "^..[iktN]" && ([email protected] !~ "^.v" || 021A.a?)' | base64 -w 0)
TYPE_PARAMS="$TYPE_PARAMS --solrForScoresUrl http://localhost:8983/solr/k10plus_pica_grouped_validation"
TYPE_PARAMS="$TYPE_PARAMS --indexWithTokenizedField"
TYPE_PARAMS="$TYPE_PARAMS --countFields"
TYPE_PARAMS="$TYPE_PARAMS --indexFieldCounts"
# MASK=sample.pica
# =kxp-title_2022-09-30-groupped.dat.gz
MASK=${MASK:=pica-with-holdings-info-1K.dat} # if not set in setdir.sh
Expand Down
22 changes: 16 additions & 6 deletions src/main/java/de/gwdg/metadataqa/marc/cli/MarcToSolr.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
import de.gwdg.metadataqa.marc.definition.general.indexer.FieldIndexer;
import de.gwdg.metadataqa.marc.model.validation.ValidationError;
import de.gwdg.metadataqa.marc.utils.Counter;
import de.gwdg.metadataqa.marc.utils.keygenerator.DataFieldKeyGenerator;
import de.gwdg.metadataqa.marc.utils.pica.PicaFieldDefinition;
import de.gwdg.metadataqa.marc.utils.pica.PicaGroupIndexer;
import de.gwdg.metadataqa.marc.utils.pica.path.PicaPath;
Expand All @@ -28,6 +29,7 @@
import java.nio.file.Path;
import java.text.DecimalFormat;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;
Expand All @@ -51,6 +53,7 @@ public class MarcToSolr extends QACli<MarcToSolrParameters> implements Bibliogra
private boolean readyToProcess;
private DecimalFormat decimalFormat = new DecimalFormat();
private FieldIndexer groupIndexer;
private Map<String, String> escapedTagCache = new HashMap<>();

public MarcToSolr(String[] args) throws ParseException {
parameters = new MarcToSolrParameters(args);
Expand Down Expand Up @@ -158,22 +161,29 @@ private void indexValidationResults(BibliographicRecord bibliographicRecord, Sol
document.addField(field, validationValues.getFieldValues(field));
}

private static void indexFieldCounts(BibliographicRecord bibliographicRecord, SolrInputDocument document) {
Counter<String> fieldCOunter = new Counter<>();
private void indexFieldCounts(BibliographicRecord bibliographicRecord, SolrInputDocument document) {
Counter<String> fieldCounter = new Counter<>();
boolean isPica = bibliographicRecord.getSchemaType().equals(SchemaType.PICA);
for (DataField field : bibliographicRecord.getDatafields()) {
String tag = null;
if (field.getDefinition() != null) {
tag = bibliographicRecord.getSchemaType().equals(SchemaType.PICA)
tag = isPica
? ((PicaFieldDefinition)field.getDefinition()).getId()
: field.getDefinition().getTag();
} else {
tag = field.getTag();
}
fieldCOunter.count(tag);
fieldCounter.count(escape(tag));
}
for (Map.Entry<String, Integer> entry : fieldCOunter.entrySet()) {
for (Map.Entry<String, Integer> entry : fieldCounter.entrySet())
document.addField(String.format("%s_count_i", entry.getKey()), entry.getValue());
}
}

private String escape(String tag) {
if (!escapedTagCache.containsKey(tag))
escapedTagCache.put(tag, DataFieldKeyGenerator.escape(tag));

return escapedTagCache.get(tag);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,7 @@ else if (!tag.equals(indexTag) && codeForIndex.equals("_" + code))
return key;
}

private String escape(String tag) {
public static String escape(String tag) {
List<String> safe = new ArrayList<>();
for (int i = 0; i < tag.length(); i++) {
String code = tag.substring(i, i+1);
Expand Down

0 comments on commit 0dc4c92

Please sign in to comment.