Filter parameters in common-script #548

pkiraly · Nov 21, 2024 · 06725ec · 06725ec
1 parent 30ca597
commit 06725ec
Show file tree

Hide file tree

Showing 6 changed files with 58 additions and 21 deletions.
diff --git a/common-script b/common-script
@@ -18,8 +18,15 @@ log() {
 }
 
 # define 'untrace' command to disable trace mode
+# remove all alias definitions
 unalias -a
+# shopt             set and unset shell options.
+#       -s OPTNAME: enable (set) each OPTNAME
+#          expand_aliases: aliases are expanded
+# Aliases are not expanded when the shell is not interactive,
+# unless the expand_aliases shell option is set using shopt
 shopt -s expand_aliases
+# 'set +x': Print commands and their arguments as they are executed.
 alias untrace='{ set +x; } 2> /dev/null'
 
 # start a named processing step and enable trace mode
@@ -30,26 +37,35 @@ run() {
   set -x
 }
 
+filter_params() {
+  php scripts/utils/parameter-filter.php $1 ${TYPE_PARAMS}
+}
+
 # ---- proccessing steps ----
 
+# run validation
 do_validate() {
   GENERAL_PARAMS="--details --trimId --summary --format csv --defaultRecordType BOOKS"
   OUTPUT_PARAMS="--outputDir ${OUTPUT_DIR} --detailsFileName issue-details.csv --summaryFileName issue-summary.csv"
-  PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/\s*--(indexWithTokenizedField|indexFieldCounts|solrUrl)//g')
+  PARAMS=$(filter_params "validate")
+  log "log file: ${LOG_DIR}/validate.log"
   run validate
   ./validate ${GENERAL_PARAMS} ${OUTPUT_PARAMS} ${PARAMS} ${MARC_DIR}/$MASK 2> ${LOG_DIR}/validate.log
 }
 
 do_prepare_solr() {
+  log "log file: ${LOG_DIR}/solr.log"
   run prepare-solr
   ./prepare-solr $NAME 2> ${LOG_DIR}/solr.log
 }
 
 do_index() {
+  log "log file: ${LOG_DIR}/solr.log"
   run index
   untrace
 
-  PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/\s*--emptyLargeCollectors|\s*--ignorableIssueTypes [^ ]+//g')
+  PARAMS=$(filter_params "index")
+
   # HAS_GROUP_PARAM=$(echo ${TYPE_PARAMS} | grep -c -P -e '--groupBy [^-]' || true)
   # if [[ "${HAS_GROUP_PARAM}" == "1" ]]; then
   #   PARAMS="${PARAMS} --solrForScoresUrl ${NAME}_validation"
@@ -67,12 +83,14 @@ do_index() {
 }
 
 do_postprocess_solr() {
+  log "log file: ${LOG_DIR}/solr.log"
   run postprocess-solr
   ./postprocess-solr $NAME 2>> ${LOG_DIR}/solr.log
 }
 
 do_completeness() {
-  PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/\s*--emptyLargeCollectors|\s*--ignorableIssueTypes [^ ]+|\s*--(indexWithTokenizedField|indexFieldCounts|solrUrl)//g')
+  PARAMS=$(filter_params "completeness")
+  log "log file: ${LOG_DIR}/completeness.log"
   run completeness
   ./completeness --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${LOG_DIR}/completeness.log
 }
@@ -86,20 +104,23 @@ do_completeness_sqlite() {
 }
 
 do_classifications() {
-  PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/\s*--emptyLargeCollectors|\s*--ignorableIssueTypes [^ ]+|\s*--(indexWithTokenizedField|indexFieldCounts|solrUrl)//g')
+  PARAMS=$(filter_params "classifications")
+  log "log file: ${LOG_DIR}/classifications.log"
   run classifications
   ./classifications --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${LOG_DIR}/classifications.log
   Rscript scripts/classifications/classifications-type.R ${OUTPUT_DIR}
 }
 
 do_authorities() {
-  PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/\s*--emptyLargeCollectors|\s*--ignorableIssueTypes [^ ]+|\s*--(indexWithTokenizedField|indexFieldCounts|solrUrl)//g')
+  PARAMS=$(filter_params "authorities")
+  log "log file: ${LOG_DIR}/authorities.log"
   run authorities
   ./authorities --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${LOG_DIR}/authorities.log
 }
 
 do_tt_completeness() {
-  PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/\s*--emptyLargeCollectors|\s*--ignorableIssueTypes [^ ]+|\s*--(indexWithTokenizedField|indexFieldCounts|solrUrl)//g')
+  PARAMS=$(filter_params "tt-completeness")
+  log "log file: ${LOG_DIR}/tt-completeness.log"
   run tt-completeness
   ./tt-completeness --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ --trimId ${MARC_DIR}/${MASK} 2> ${LOG_DIR}/tt-completeness.log
   Rscript scripts/tt-histogram/tt-histogram.R ${OUTPUT_DIR} &>> ${LOG_DIR}/tt-completeness.log
@@ -109,7 +130,8 @@ do_tt_completeness() {
 }
 
 do_shelf_ready_completeness() {
-  PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/\s*--emptyLargeCollectors|\s*--ignorableIssueTypes [^ ]+|\s*--(indexWithTokenizedField|indexFieldCounts|solrUrl)//g')
+  PARAMS=$(filter_params "shelf-ready-completeness")
+  log "log file: ${LOG_DIR}/shelf-ready-completeness.log"
   run shelf-ready-completeness
   ./shelf-ready-completeness \
     --defaultRecordType BOOKS \
@@ -124,7 +146,8 @@ do_shelf_ready_completeness() {
 }
 
 do_bl_classification() {
-  PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/\s*--emptyLargeCollectors|\s*--ignorableIssueTypes [^ ]+|\s*--(indexWithTokenizedField|indexFieldCounts|solrUrl)//g')
+  PARAMS=$(filter_params "bk-classification")
+  log "log file: ${LOG_DIR}/bl-classification.log"
   run bk-classification
   ./bl-classification \
     --defaultRecordType BOOKS \
@@ -134,7 +157,8 @@ do_bl_classification() {
 }
 
 do_serial_score() {
-  PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/\s*--emptyLargeCollectors|\s*--ignorableIssueTypes [^ ]+|\s*--(indexWithTokenizedField|indexFieldCounts|solrUrl)//g')
+  PARAMS=$(filter_params "bk-classification")
+  log "log file: ${LOG_DIR}/serial-score.log"
   run serial-score
   ./serial-score --defaultRecordType BOOKS \
                  ${PARAMS} \
@@ -150,15 +174,17 @@ do_format() {
 }
 
 do_functional_analysis() {
-  PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/\s*--emptyLargeCollectors|\s*--ignorableIssueTypes [^ ]+|\s*--(indexWithTokenizedField|indexFieldCounts|solrUrl)//g')
+  PARAMS=$(filter_params "functional-analysis")
+  log "log file: ${LOG_DIR}/functional-analysis.log"
   run functional-analysis
   ./functional-analysis --defaultRecordType BOOKS \
                         ${PARAMS} \
                         --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${LOG_DIR}/functional-analysis.log
 }
 
 do_network_analysis() {
-  PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/\s*--emptyLargeCollectors|\s*--ignorableIssueTypes [^ ]+|\s*--(indexWithTokenizedField|indexFieldCounts|solrUrl)//g')
+  PARAMS=$(filter_params "network-analysis")
+  log "log file: ${LOG_DIR}/network-analysis.log"
   run network-analysis
   ./network-analysis --defaultRecordType BOOKS \
                      ${PARAMS} \
@@ -197,6 +223,7 @@ do_network_analysis() {
 }
 
 do_pareto() {
+  log "log file: ${LOG_DIR}/pareto.log"
   run pareto
   Rscript scripts/pareto/frequency-range.R ${OUTPUT_DIR} &> ${LOG_DIR}/pareto.log
   untrace
@@ -214,7 +241,9 @@ do_marc_history() {
   else
     SELECTOR="008~7-10;008~0-5"
   fi
-  PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/\s*--emptyLargeCollectors|\s*--ignorableIssueTypes [^ ]+|\s*--(indexWithTokenizedField|indexFieldCounts|solrUrl)//g')
+
+  PARAMS=$(filter_params "marc-history")
+  log "log file: ${LOG_DIR}/marc-history.log"
 
   run marc-history
   ./formatter --selector "$SELECTOR" --defaultRecordType BOOKS ${PARAMS} --separator "," \
@@ -233,7 +262,8 @@ do_marc_history() {
 }
 
 do_record_patterns() {
-  PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/\s*--emptyLargeCollectors|\s*--ignorableIssueTypes [^ ]+|\s*--(indexWithTokenizedField|indexFieldCounts|solrUrl)//g')
+  PARAMS=$(filter_params "record-patterns")
+  log "log file: ${LOG_DIR}/record-patterns.log"
 
   run record-patterns
   Rscript scripts/record-patterns/top-fields.R ${OUTPUT_DIR} &>> ${LOG_DIR}/top-fields.log
@@ -320,7 +350,7 @@ EOF
   ONLY_INDEX=$(echo ${PARAMS} | grep -c -P -e '--onlyIndex' || true)
 
   if [[ "${HAS_GROUP_PARAM}" == "0" ]]; then
-    log "index"
+    log "index sqlite3"
     sqlite3 ${OUTPUT_DIR}/qa_catalogue.sqlite < scripts/sqlite/modify-tables.sql &>> ${LOG_DIR}/sqlite.log
     if [[ "${SOLR_FOR_SCORES_URL}" != "" ]]; then
       echo "index at ${SOLR_FOR_SCORES_URL}"
@@ -441,9 +471,10 @@ do_export_cli_parameters() {
 
 do_shacl4bib() {
   # note: SHACL specific parameters are missing here --shaclConfigurationFile, --shaclOutputType, --shaclOutputFile
-  PARAMS=$(echo ${TYPE_PARAMS} | sed -r 's/\s*--emptyLargeCollectors|\s*--ignorableIssueTypes [^ ]+|\s*--(indexWithTokenizedField|indexFieldCounts|solrUrl)//g')
+  PARAMS=$(filter_params "shacl4bib")
+  log "log file: ${LOG_DIR}/shacl4bib.log"
+
   run shacl4bib
-  echo "  ./shacl4bib --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${LOG_DIR}/shacl4bib.log"
   ./shacl4bib --defaultRecordType BOOKS ${PARAMS} --outputDir ${OUTPUT_DIR}/ ${MARC_DIR}/${MASK} 2> ${LOG_DIR}/shacl4bib.log
   Rscript scripts/shacl4bib/shacl4bib.R ${OUTPUT_DIR}
 
@@ -571,6 +602,7 @@ config() {
 }
 
 fatal() {
+  echo "fatal()"
   colored "1;31" "$1"
   exit 1
 }
@@ -631,6 +663,7 @@ if [[ "$datatask" = true ]]; then
   fi
 fi
 
+echo "tasks: ${tasks}"
 for task in ${tasks//,/ }; do
   case $task in
     validate)                 do_validate ; do_validate_sqlite ;;

diff --git a/parameter-definition.json b/parameter-definition.json
@@ -0,0 +1 @@
+{"common":[{"short":"m","long":"marcVersion","hasArg":true,"description":"MARC version ('OCLC' or 'DNB')"},{"short":"h","long":"help","hasArg":false,"description":"display help"},{"short":"n","long":"nolog","hasArg":false,"description":"do not display log messages"},{"short":"l","long":"limit","hasArg":true,"description":"limit the number of records to process"},{"short":"o","long":"offset","hasArg":true,"description":"the first record to process"},{"short":"i","long":"id","hasArg":true,"description":"the MARC identifier (content of 001)"},{"short":"d","long":"defaultRecordType","hasArg":true,"description":"the default record type if the record's type is undetectable"},{"short":"q","long":"fixAlephseq","hasArg":false,"description":"fix the known issues of Alephseq format"},{"short":"a","long":"fixAlma","hasArg":false,"description":"fix the known issues of Alma format"},{"short":"b","long":"fixKbr","hasArg":false,"description":"fix the known issues of Alma format"},{"short":"p","long":"alephseq","hasArg":false,"description":"the source is in Alephseq format"},{"short":"x","long":"marcxml","hasArg":false,"description":"the source is in MARCXML format"},{"short":"y","long":"lineSeparated","hasArg":false,"description":"the source is in line separated MARC format"},{"short":"t","long":"outputDir","hasArg":true,"description":"output directory"},{"short":"r","long":"trimId","hasArg":false,"description":"remove spaces from the end of record IDs"},{"short":"z","long":"ignorableFields","hasArg":true,"description":"ignore fields from the analysis"},{"short":"v","long":"ignorableRecords","hasArg":true,"description":"ignore records from the analysis"},{"short":"f","long":"marcFormat","hasArg":true,"description":"MARC format (like 'ISO' or 'MARCXML')"},{"short":"s","long":"dataSource","hasArg":true,"description":"data source (file of stream)"},{"short":"g","long":"defaultEncoding","hasArg":true,"description":"default character encoding"},{"short":"1","long":"alephseqLineType","hasArg":true,"description":"Alephseq line type"},{"short":"2","long":"picaIdField","hasArg":true,"description":"PICA id field"},{"short":"u","long":"picaSubfieldSeparator","hasArg":true,"description":"PICA subfield separator"},{"short":"j","long":"picaSchemaFile","hasArg":true,"description":"Avram PICA schema file"},{"short":"w","long":"schemaType","hasArg":true,"description":"metadata schema type ('MARC21', 'UNIMARC', or 'PICA')"},{"short":"k","long":"picaRecordType","hasArg":true,"description":"picaRecordType"},{"short":"c","long":"allowableRecords","hasArg":true,"description":"allow records for the analysis"},{"short":"e","long":"groupBy","hasArg":true,"description":"group the results by the value of this data element (e.g. the ILN of  library)"},{"short":"3","long":"groupListFile","hasArg":true,"description":"the file which contains a list of ILN codes"},{"short":"4","long":"solrForScoresUrl","hasArg":true,"description":"the URL of the Solr server used to store scores"}],"completeness":[{"short":"R","long":"format","hasArg":true,"description":"specify a format"},{"short":"V","long":"advanced","hasArg":false,"description":"advanced mode (not yet implemented)"},{"short":"P","long":"onlyPackages","hasArg":false,"description":"only packages (not yet implemented)"}],"validate":[{"short":"G","long":"summaryFileName","hasArg":true,"description":"the summary file name (provides a summary of issues, such as the number of instance and number of records having the particular issue)"},{"short":"S","long":"summary","hasArg":false,"description":"show summary instead of record level display"},{"short":"H","long":"details","hasArg":false,"description":"show record level display"},{"short":"F","long":"detailsFileName","hasArg":true,"description":"the report file name (default is 'issue-details.csv')"},{"short":"R","long":"format","hasArg":true,"description":"specify a format"},{"short":"W","long":"emptyLargeCollectors","hasArg":false,"description":"empty large collectors"},{"short":"T","long":"collectAllErrors","hasArg":false,"description":"collect all errors (useful only for validating small number of records)"},{"short":"I","long":"ignorableIssueTypes","hasArg":true,"description":"comma separated list of issue types not to collect"}],"index":[{"short":"S","long":"solrUrl","hasArg":true,"description":"the URL of Solr server including the core (e.g. http://localhost:8983/solr/loc)"},{"short":"A","long":"doCommit","hasArg":false,"description":"commits Solr index regularly"},{"short":"T","long":"solrFieldType","hasArg":true,"description":"type of Solr fields, could be one of 'marc-tags', 'human-readable', or 'mixed'"},{"short":"B","long":"useEmbedded","hasArg":false,"description":"use embedded Solr server (used in tests only)"},{"short":"C","long":"indexWithTokenizedField","hasArg":false,"description":"index data elements as tokenized field as well"},{"short":"D","long":"commitAt","hasArg":true,"description":"commit index after this number of records"},{"short":"E","long":"indexFieldCounts","hasArg":false,"description":"index the count of field instances"},{"short":"F","long":"fieldPrefix","hasArg":true,"description":"field prefix"}],"classifications":[{"short":"A","long":"collectCollocations","hasArg":false,"description":"collect collocation of schemas"}],"authorities":[{"short":"G","long":"summaryFileName","hasArg":true,"description":"the summary file name (provides a summary of issues, such as the number of instance and number of records having the particular issue)"},{"short":"S","long":"summary","hasArg":false,"description":"show summary instead of record level display"},{"short":"H","long":"details","hasArg":false,"description":"show record level display"},{"short":"F","long":"detailsFileName","hasArg":true,"description":"the report file name (default is 'issue-details.csv')"},{"short":"R","long":"format","hasArg":true,"description":"specify a format"},{"short":"W","long":"emptyLargeCollectors","hasArg":false,"description":"empty large collectors"},{"short":"T","long":"collectAllErrors","hasArg":false,"description":"collect all errors (useful only for validating small number of records)"},{"short":"I","long":"ignorableIssueTypes","hasArg":true,"description":"comma separated list of issue types not to collect"}],"tt-completeness":[{"short":"F","long":"fileName","hasArg":true,"description":"the report file name (default is tt-completeness.csv)"}],"shelf-ready-completeness":[{"short":"F","long":"fileName","hasArg":true,"description":"the report file name (default is shelf-ready-completeness.csv)"}],"bl-classification":[],"serial-score":[{"short":"F","long":"fileName","hasArg":true,"description":"the report file name (default is serial-score.csv)"}],"formatter":[{"short":"l","long":"selector","hasArg":true,"description":"selectors"},{"short":"p","long":"separator","hasArg":true,"description":"separator between the parts (default: TAB)"},{"short":"f","long":"format","hasArg":true,"description":"specify a format"},{"short":"s","long":"search","hasArg":true,"description":"search string ([path]=[value])"},{"short":"w","long":"withId","hasArg":false,"description":"the generated CSV should contain record ID as first field"},{"short":"c","long":"countNr","hasArg":true,"description":"count number of the record (e.g. 1 means the first record)"},{"short":"e","long":"fileName","hasArg":true,"description":"output file (default: extracted.csv)"}],"functional-analysis":[{"short":"R","long":"format","hasArg":true,"description":"specify a format"},{"short":"V","long":"advanced","hasArg":false,"description":"advanced mode (not yet implemented)"},{"short":"P","long":"onlyPackages","hasArg":false,"description":"only packages (not yet implemented)"}],"network-analysis":[{"short":"l","long":"group-limit","hasArg":true,"description":"pair creation limit"},{"short":"a","long":"action","hasArg":true,"description":"action: 'primary' (default), 'pairing'"}],"record-patterns":[{"short":"R","long":"format","hasArg":true,"description":"specify a format"},{"short":"V","long":"advanced","hasArg":false,"description":"advanced mode (not yet implemented)"},{"short":"P","long":"onlyPackages","hasArg":false,"description":"only packages (not yet implemented)"}],"shacl4bib":[{"short":"C","long":"shaclConfigurationFile","hasArg":true,"description":"specify the configuration file"},{"short":"O","long":"shaclOutputFile","hasArg":true,"description":"output file"},{"short":"P","long":"shaclOutputType","hasArg":true,"description":"output type (STATUS: status only, SCORE: score only, BOTH: status and score"}]}
diff --git a/postprocess-solr b/postprocess-solr
@@ -14,4 +14,4 @@ CORE_DEV=${CORE}_dev
 echo "Swap ${CORE_DEV} to ${CORE}"
 swap_cores ${CORE_DEV} ${CORE}
 
-echo "Solr preparation DONE"
+echo "Solr index swapping DONE"
diff --git a/scripts/sqlite/modify-tables.sql b/scripts/sqlite/modify-tables.sql
@@ -1,6 +1,6 @@
 --- issue_details indices
+CREATE INDEX IF NOT EXISTS "recordId" ON "issue_details" ("id");
 CREATE INDEX IF NOT EXISTS "errorId" ON "issue_details" ("errorId");
-CREATE INDEX IF NOT EXISTS "recordId" ON "issue_details" ("recordId");
 
 --- issue_summary indices
 CREATE INDEX IF NOT EXISTS "id" ON "issue_summary" ("id");

diff --git a/src/main/java/de/gwdg/metadataqa/marc/cli/CliParameterDefinitionsExporter.java b/src/main/java/de/gwdg/metadataqa/marc/cli/CliParameterDefinitionsExporter.java
@@ -50,6 +50,7 @@ public String exportAll() {
     options.put("formatter", export(new FormatterParameters())); // TODO at common-script
     options.put("functional-analysis", export(new CompletenessParameters())); // TODO
     options.put("network-analysis", export(new NetworkParameters()));
+    options.put("marc-history", export(new CommonParameters()));
     options.put("record-patterns", export(new CompletenessParameters())); // TODO
     // options.put("export-schema", read(new MappingParameters()));
     options.put("shacl4bib", export(new Shacl4bibParameters()));

diff --git a/src/test/java/de/gwdg/metadataqa/marc/cli/CliParameterDefinitionsExporterTest.java b/src/test/java/de/gwdg/metadataqa/marc/cli/CliParameterDefinitionsExporterTest.java
@@ -21,21 +21,20 @@ public void test() {
     CliParameterDefinitionsExporter extractor = new CliParameterDefinitionsExporter();
     String json = extractor.exportAll();
     assertNotNull(json);
-    System.err.println(json);
     assertTrue(json.contains("\"common\""));
     ObjectMapper mapper = new ObjectMapper();
     Map firstItem = null;
     LinkedHashMap parameters = null;
     try {
       parameters = (LinkedHashMap) mapper.readValue(json, Object.class);
 
-      assertEquals(15, parameters.size());
+      assertEquals(16, parameters.size());
       assertEquals(
         Set.of(
           "common", "completeness", "validate", "index", "classifications",
           "authorities", "tt-completeness", "shelf-ready-completeness",
           "bl-classification", "serial-score", "formatter", "functional-analysis",
-          "network-analysis", "record-patterns", "shacl4bib"
+          "network-analysis", "marc-history", "record-patterns", "shacl4bib"
         ),
         parameters.keySet());
 
@@ -152,6 +151,9 @@ public void test() {
       assertEquals(true, firstItem.get("hasArg"));
       assertEquals("pair creation limit", firstItem.get("description"));
 
+      assertTrue(parameters.containsKey("marc-history"));
+      assertEquals(0, ((List) parameters.get("marc-history")).size());
+
       assertTrue(parameters.containsKey("record-patterns"));
       assertEquals(3, ((List) parameters.get("record-patterns")).size());
       firstItem = (Map) ((List) parameters.get("record-patterns")).get(0);
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		{"common":[{"short":"m","long":"marcVersion","hasArg":true,"description":"MARC version ('OCLC' or 'DNB')"},{"short":"h","long":"help","hasArg":false,"description":"display help"},{"short":"n","long":"nolog","hasArg":false,"description":"do not display log messages"},{"short":"l","long":"limit","hasArg":true,"description":"limit the number of records to process"},{"short":"o","long":"offset","hasArg":true,"description":"the first record to process"},{"short":"i","long":"id","hasArg":true,"description":"the MARC identifier (content of 001)"},{"short":"d","long":"defaultRecordType","hasArg":true,"description":"the default record type if the record's type is undetectable"},{"short":"q","long":"fixAlephseq","hasArg":false,"description":"fix the known issues of Alephseq format"},{"short":"a","long":"fixAlma","hasArg":false,"description":"fix the known issues of Alma format"},{"short":"b","long":"fixKbr","hasArg":false,"description":"fix the known issues of Alma format"},{"short":"p","long":"alephseq","hasArg":false,"description":"the source is in Alephseq format"},{"short":"x","long":"marcxml","hasArg":false,"description":"the source is in MARCXML format"},{"short":"y","long":"lineSeparated","hasArg":false,"description":"the source is in line separated MARC format"},{"short":"t","long":"outputDir","hasArg":true,"description":"output directory"},{"short":"r","long":"trimId","hasArg":false,"description":"remove spaces from the end of record IDs"},{"short":"z","long":"ignorableFields","hasArg":true,"description":"ignore fields from the analysis"},{"short":"v","long":"ignorableRecords","hasArg":true,"description":"ignore records from the analysis"},{"short":"f","long":"marcFormat","hasArg":true,"description":"MARC format (like 'ISO' or 'MARCXML')"},{"short":"s","long":"dataSource","hasArg":true,"description":"data source (file of stream)"},{"short":"g","long":"defaultEncoding","hasArg":true,"description":"default character encoding"},{"short":"1","long":"alephseqLineType","hasArg":true,"description":"Alephseq line type"},{"short":"2","long":"picaIdField","hasArg":true,"description":"PICA id field"},{"short":"u","long":"picaSubfieldSeparator","hasArg":true,"description":"PICA subfield separator"},{"short":"j","long":"picaSchemaFile","hasArg":true,"description":"Avram PICA schema file"},{"short":"w","long":"schemaType","hasArg":true,"description":"metadata schema type ('MARC21', 'UNIMARC', or 'PICA')"},{"short":"k","long":"picaRecordType","hasArg":true,"description":"picaRecordType"},{"short":"c","long":"allowableRecords","hasArg":true,"description":"allow records for the analysis"},{"short":"e","long":"groupBy","hasArg":true,"description":"group the results by the value of this data element (e.g. the ILN of library)"},{"short":"3","long":"groupListFile","hasArg":true,"description":"the file which contains a list of ILN codes"},{"short":"4","long":"solrForScoresUrl","hasArg":true,"description":"the URL of the Solr server used to store scores"}],"completeness":[{"short":"R","long":"format","hasArg":true,"description":"specify a format"},{"short":"V","long":"advanced","hasArg":false,"description":"advanced mode (not yet implemented)"},{"short":"P","long":"onlyPackages","hasArg":false,"description":"only packages (not yet implemented)"}],"validate":[{"short":"G","long":"summaryFileName","hasArg":true,"description":"the summary file name (provides a summary of issues, such as the number of instance and number of records having the particular issue)"},{"short":"S","long":"summary","hasArg":false,"description":"show summary instead of record level display"},{"short":"H","long":"details","hasArg":false,"description":"show record level display"},{"short":"F","long":"detailsFileName","hasArg":true,"description":"the report file name (default is 'issue-details.csv')"},{"short":"R","long":"format","hasArg":true,"description":"specify a format"},{"short":"W","long":"emptyLargeCollectors","hasArg":false,"description":"empty large collectors"},{"short":"T","long":"collectAllErrors","hasArg":false,"description":"collect all errors (useful only for validating small number of records)"},{"short":"I","long":"ignorableIssueTypes","hasArg":true,"description":"comma separated list of issue types not to collect"}],"index":[{"short":"S","long":"solrUrl","hasArg":true,"description":"the URL of Solr server including the core (e.g. http://localhost:8983/solr/loc)"},{"short":"A","long":"doCommit","hasArg":false,"description":"commits Solr index regularly"},{"short":"T","long":"solrFieldType","hasArg":true,"description":"type of Solr fields, could be one of 'marc-tags', 'human-readable', or 'mixed'"},{"short":"B","long":"useEmbedded","hasArg":false,"description":"use embedded Solr server (used in tests only)"},{"short":"C","long":"indexWithTokenizedField","hasArg":false,"description":"index data elements as tokenized field as well"},{"short":"D","long":"commitAt","hasArg":true,"description":"commit index after this number of records"},{"short":"E","long":"indexFieldCounts","hasArg":false,"description":"index the count of field instances"},{"short":"F","long":"fieldPrefix","hasArg":true,"description":"field prefix"}],"classifications":[{"short":"A","long":"collectCollocations","hasArg":false,"description":"collect collocation of schemas"}],"authorities":[{"short":"G","long":"summaryFileName","hasArg":true,"description":"the summary file name (provides a summary of issues, such as the number of instance and number of records having the particular issue)"},{"short":"S","long":"summary","hasArg":false,"description":"show summary instead of record level display"},{"short":"H","long":"details","hasArg":false,"description":"show record level display"},{"short":"F","long":"detailsFileName","hasArg":true,"description":"the report file name (default is 'issue-details.csv')"},{"short":"R","long":"format","hasArg":true,"description":"specify a format"},{"short":"W","long":"emptyLargeCollectors","hasArg":false,"description":"empty large collectors"},{"short":"T","long":"collectAllErrors","hasArg":false,"description":"collect all errors (useful only for validating small number of records)"},{"short":"I","long":"ignorableIssueTypes","hasArg":true,"description":"comma separated list of issue types not to collect"}],"tt-completeness":[{"short":"F","long":"fileName","hasArg":true,"description":"the report file name (default is tt-completeness.csv)"}],"shelf-ready-completeness":[{"short":"F","long":"fileName","hasArg":true,"description":"the report file name (default is shelf-ready-completeness.csv)"}],"bl-classification":[],"serial-score":[{"short":"F","long":"fileName","hasArg":true,"description":"the report file name (default is serial-score.csv)"}],"formatter":[{"short":"l","long":"selector","hasArg":true,"description":"selectors"},{"short":"p","long":"separator","hasArg":true,"description":"separator between the parts (default: TAB)"},{"short":"f","long":"format","hasArg":true,"description":"specify a format"},{"short":"s","long":"search","hasArg":true,"description":"search string ([path]=[value])"},{"short":"w","long":"withId","hasArg":false,"description":"the generated CSV should contain record ID as first field"},{"short":"c","long":"countNr","hasArg":true,"description":"count number of the record (e.g. 1 means the first record)"},{"short":"e","long":"fileName","hasArg":true,"description":"output file (default: extracted.csv)"}],"functional-analysis":[{"short":"R","long":"format","hasArg":true,"description":"specify a format"},{"short":"V","long":"advanced","hasArg":false,"description":"advanced mode (not yet implemented)"},{"short":"P","long":"onlyPackages","hasArg":false,"description":"only packages (not yet implemented)"}],"network-analysis":[{"short":"l","long":"group-limit","hasArg":true,"description":"pair creation limit"},{"short":"a","long":"action","hasArg":true,"description":"action: 'primary' (default), 'pairing'"}],"record-patterns":[{"short":"R","long":"format","hasArg":true,"description":"specify a format"},{"short":"V","long":"advanced","hasArg":false,"description":"advanced mode (not yet implemented)"},{"short":"P","long":"onlyPackages","hasArg":false,"description":"only packages (not yet implemented)"}],"shacl4bib":[{"short":"C","long":"shaclConfigurationFile","hasArg":true,"description":"specify the configuration file"},{"short":"O","long":"shaclOutputFile","hasArg":true,"description":"output file"},{"short":"P","long":"shaclOutputType","hasArg":true,"description":"output type (STATUS: status only, SCORE: score only, BOTH: status and score"}]}