-
Notifications
You must be signed in to change notification settings - Fork 17
/
validate
executable file
·127 lines (113 loc) · 7.37 KB
/
validate
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# Calling Validator
. ./common-variables
ME=$(basename $0)
show_usage() { # display help message
cat <<EOF
QA catalogue validation
usage:
${ME} [options] <files>
options:
-m, --marcVersion <arg> MARC version ('OCLC' or 'DNB')
-h, --help display help
-n, --nolog do not display log messages
-l, --limit <arg> limit the number of records to process
-o, --offset <arg> the first record to process
-i, --id <arg> the MARC identifier (content of 001)
-d, --defaultRecordType <arg> the default record type if the record's type is undetectable
-q, --fixAlephseq fix the known issues of Alephseq format
-a, --fixAlma fix the known issues of Alma format
-b, --fixKbr fix the known issues of Alma format
-p, --alephseq the source is in Alephseq format
-x, --marcxml the source is in MARCXML format
-y, --lineSeparated the source is in line separated MARC format
-t, --outputDir <arg> output directory
-r, --trimId remove spaces from the end of record IDs
-z, --ignorableFields <arg> ignore fields from the analysis
-v, --ignorableRecords <arg> ignore records from the analysis
-f, --marcFormat <arg> MARC format (like 'ISO' or 'MARCXML')
-s, --dataSource <arg> data source (file of stream)
-g, --defaultEncoding <arg> default character encoding
-1, --alephseqLineType <arg> Alephseq line type
-2, --picaIdField <arg> PICA id field
-u, --picaSubfieldSeparator <arg> PICA subfield separator
-j, --picaSchemaFile <arg> Avram PICA schema file
-w, --schemaType <arg> metadata schema type ('MARC21', 'UNIMARC', or 'PICA')
-k, --picaRecordType <arg> picaRecordType
-c, --allowableRecords <arg> allow records for the analysis
-e, --groupBy <arg> group the results by the value of this data element (e.g. the ILN of library)
-3, --groupListFile <arg> the file which contains a list of ILN codes
-4, --solrForScoresUrl <arg> the URL of the Solr server used to store scores
-G, --summaryFileName <arg> the summary file name (provides a summary of issues, such as the number of instance and number of records having the particular issue)
-S, --summary show summary instead of record level display
-H, --details show record level display
-F, --detailsFileName <arg> the report file name (default is 'issue-details.csv')
-R, --format <arg> specify a format
-W, --emptyLargeCollectors empty large collectors
-T, --collectAllErrors collect all errors (useful only for validating small number of records)
-I, --ignorableIssueTypes <arg> comma separated list of issue types not to collect
more info: https://github.com/pkiraly/qa-catalogue#validating-marc-records
EOF
exit 1
}
if [ $# -eq 0 ]; then
show_usage
fi
SHORT_OPTIONS="m:hnl:o:i:d:qabpxyt:rz:v:f:s:g:1:2:u:j:w:k:c:e:3:4:G:SHF:R:WTI:"
LONG_OPTIONS="marcVersion:,help,nolog,limit:,offset:,id:,defaultRecordType:,fixAlephseq,fixAlma,fixKbr,alephseq,marcxml,lineSeparated,outputDir:,trimId,ignorableFields:,ignorableRecords:,marcFormat:,dataSource:,defaultEncoding:,alephseqLineType:,picaIdField:,picaSubfieldSeparator:,picaSchemaFile:,schemaType:,picaRecordType:,allowableRecords:,groupBy:,groupListFile:,solrForScoresUrl:,summaryFileName:,summary,details,detailsFileName:,format:,emptyLargeCollectors,collectAllErrors,ignorableIssueTypes:"
GETOPT=$(getopt \
-o ${SHORT_OPTIONS} \
--long ${LONG_OPTIONS} \
-n ${ME} -- "$@")
eval set -- "${GETOPT}"
PARAMS=""
HELP=0
while true ; do
case "$1" in
-m|--marcVersion) PARAMS="$PARAMS --marcVersion $2" ; shift 2 ;;
-h|--help) PARAMS="$PARAMS --help" ; HELP=1; shift ;;
-n|--nolog) PARAMS="$PARAMS --nolog" ; shift ;;
-l|--limit) PARAMS="$PARAMS --limit $2" ; shift 2 ;;
-o|--offset) PARAMS="$PARAMS --offset $2" ; shift 2 ;;
-i|--id) PARAMS="$PARAMS --id $2" ; shift 2 ;;
-d|--defaultRecordType) PARAMS="$PARAMS --defaultRecordType $2" ; shift 2 ;;
-q|--fixAlephseq) PARAMS="$PARAMS --fixAlephseq" ; shift ;;
-a|--fixAlma) PARAMS="$PARAMS --fixAlma" ; shift ;;
-b|--fixKbr) PARAMS="$PARAMS --fixKbr" ; shift ;;
-p|--alephseq) PARAMS="$PARAMS --alephseq" ; shift ;;
-x|--marcxml) PARAMS="$PARAMS --marcxml" ; shift ;;
-y|--lineSeparated) PARAMS="$PARAMS --lineSeparated" ; shift ;;
-t|--outputDir) PARAMS="$PARAMS --outputDir $2" ; shift 2 ;;
-r|--trimId) PARAMS="$PARAMS --trimId" ; shift ;;
-z|--ignorableFields) PARAMS="$PARAMS --ignorableFields $2" ; shift 2 ;;
-v|--ignorableRecords) PARAMS="$PARAMS --ignorableRecords $2" ; shift 2 ;;
-f|--marcFormat) PARAMS="$PARAMS --marcFormat $2" ; shift 2 ;;
-s|--dataSource) PARAMS="$PARAMS --dataSource $2" ; shift 2 ;;
-g|--defaultEncoding) PARAMS="$PARAMS --defaultEncoding $2" ; shift 2 ;;
-1|--alephseqLineType) PARAMS="$PARAMS --alephseqLineType $2" ; shift 2 ;;
-2|--picaIdField) PARAMS="$PARAMS --picaIdField $2" ; shift 2 ;;
-u|--picaSubfieldSeparator) PARAMS="$PARAMS --picaSubfieldSeparator $2" ; shift 2 ;;
-j|--picaSchemaFile) PARAMS="$PARAMS --picaSchemaFile $2" ; shift 2 ;;
-w|--schemaType) PARAMS="$PARAMS --schemaType $2" ; shift 2 ;;
-k|--picaRecordType) PARAMS="$PARAMS --picaRecordType $2" ; shift 2 ;;
-c|--allowableRecords) PARAMS="$PARAMS --allowableRecords $2" ; shift 2 ;;
-e|--groupBy) PARAMS="$PARAMS --groupBy $2" ; shift 2 ;;
-3|--groupListFile) PARAMS="$PARAMS --groupListFile $2" ; shift 2 ;;
-4|--solrForScoresUrl) PARAMS="$PARAMS --solrForScoresUrl $2" ; shift 2 ;;
-G|--summaryFileName) PARAMS="$PARAMS --summaryFileName $2" ; shift 2 ;;
-S|--summary) PARAMS="$PARAMS --summary" ; shift ;;
-H|--details) PARAMS="$PARAMS --details" ; shift ;;
-F|--detailsFileName) PARAMS="$PARAMS --detailsFileName $2" ; shift 2 ;;
-R|--format) PARAMS="$PARAMS --format $2" ; shift 2 ;;
-W|--emptyLargeCollectors) PARAMS="$PARAMS --emptyLargeCollectors" ; shift ;;
-T|--collectAllErrors) PARAMS="$PARAMS --collectAllErrors" ; shift ;;
-I|--ignorableIssueTypes) PARAMS="$PARAMS --ignorableIssueTypes $2" ; shift 2 ;;
--) shift ; break ;;
*) echo "Internal error!: $1" ; exit 1 ;;
esac
done
if [[ $HELP -eq 1 ]]; then
show_usage
fi
CMD="/usr/bin/java -Xmx8g -cp $JAR de.gwdg.metadataqa.marc.cli.ValidatorCli"
echo $CMD $PARAMS "$@"
$CMD $PARAMS "$@"