Skip to content

Commit

Permalink
Referencing code lists in JSON export #341
Browse files Browse the repository at this point in the history
  • Loading branch information
pkiraly committed Oct 27, 2023
1 parent 3965c86 commit f59ed59
Show file tree
Hide file tree
Showing 264 changed files with 183,880 additions and 315,089 deletions.
168,662 changes: 59,905 additions & 108,757 deletions marc-schema/marc-schema-with-solr-and-extensions.json

Large diffs are not rendered by default.

162,676 changes: 60,742 additions & 101,934 deletions marc-schema/marc-schema-with-solr.json

Large diffs are not rendered by default.

166,954 changes: 62,881 additions & 104,073 deletions marc-schema/marc-schema.json

Large diffs are not rendered by default.

37 changes: 32 additions & 5 deletions src/main/java/de/gwdg/metadataqa/marc/cli/utils/MappingToJson.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
import de.gwdg.metadataqa.marc.utils.MarcTagLister;
import de.gwdg.metadataqa.marc.utils.keygenerator.DataFieldKeyGenerator;
import de.gwdg.metadataqa.marc.utils.keygenerator.PositionalControlFieldKeyGenerator;
import net.minidev.json.JSONStyle;
import net.minidev.json.JSONValue;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
Expand All @@ -53,6 +52,7 @@ public class MappingToJson {
private Map<String, Object> mapping;
private final Options options;
private MappingParameters parameters;
private Map<String, List<EncodedValue>> referencesCodeLists;

public MappingToJson(String[] args) throws ParseException {
parameters = new MappingParameters(args);
Expand All @@ -79,6 +79,8 @@ public String toJson() {
public void build() {
Map<String, Object> fields = new LinkedHashMap<>();

referencesCodeLists = new LinkedHashMap<>();

fields.put("LDR", buildLeader());
fields.putAll(buildSimpleControlFields());

Expand All @@ -100,6 +102,21 @@ public void build() {
}
}
mapping.put("fields", fields);

Map<String, Object> codelists = new LinkedHashMap<>();
for (Map.Entry<String, List<EncodedValue>> entry : referencesCodeLists.entrySet()) {
String url = entry.getKey();
Map<String, Map<String, Object>> codes = new LinkedHashMap<>();
for (EncodedValue code : entry.getValue()) {
codes.put(code.getCode(), Map.of("label", code.getLabel()));
if (code.getRange() != null)
codes.get(code.getCode()).put("range", code.getRange());
}
codelists.put(url, codes);
}
mapping.put("codelists", codelists);

// System.err.println(referencesCodeLists.keySet());
}

private Map<String, Object> buildControlField(ControlFieldDefinition field, ControlfieldPositionList positionDefinition) {
Expand Down Expand Up @@ -312,6 +329,10 @@ private Map<String, Object> subfieldToJson(SubfieldDefinition subfield, DataFiel
if (subfield.getCodeList() != null
&& !subfield.getCodeList().getCodes().isEmpty()) {
CodeList codeList = subfield.getCodeList();
referencesCodeLists.put(codeList.getUrl(), codeList.getCodes());
codeMap.put("codes", codeList.getUrl());

/*
Map<String, Object> meta = new LinkedHashMap<>();
meta.put("name", codeList.getName());
meta.put("url", codeList.getUrl());
Expand All @@ -328,6 +349,7 @@ private Map<String, Object> subfieldToJson(SubfieldDefinition subfield, DataFiel
meta.put("codes", codes);
}
codeMap.put("codelist", meta);
*/
}

if (subfield.hasPositions())
Expand All @@ -342,7 +364,7 @@ private Map<String, Object> subfieldToJson(SubfieldDefinition subfield, DataFiel
return codeMap;
}

private static Map<String, Object> getSubfieldPositions(SubfieldDefinition subfield) {
private Map<String, Object> getSubfieldPositions(SubfieldDefinition subfield) {
Map<String, Object> positionListMap = new LinkedHashMap<>();
for (ControlfieldPositionDefinition position : subfield.getPositions()) {
Map<String, Object> positionMap = new LinkedHashMap<>();
Expand All @@ -357,16 +379,21 @@ private static Map<String, Object> getSubfieldPositions(SubfieldDefinition subfi
if (position.getCodes() != null && !position.getCodes().isEmpty()) {
positionMap.put("codes", extractCodes(position.getCodes()));
} else if (position.getCodeList() != null) {
positionMap.put("codes", extractCodes(position.getCodeList().getCodes()));
referencesCodeLists.put(position.getCodeList().getUrl(), position.getCodeList().getCodes());
positionMap.put("codes", position.getCodeList().getUrl());
// positionMap.put("codes", extractCodes(position.getCodeList().getCodes()));
} else if (position.getCodeListReference() != null) {
positionMap.put("codes", extractCodes(position.getCodeListReference().getCodes()));
String url = String.format("%s#%s", position.getCodeListReference().getDescriptionUrl(), position.getCodeListReference().getPositionStart());
referencesCodeLists.put(url, position.getCodeListReference().getCodes());
positionMap.put("codes", url);
// positionMap.put("codes", extractCodes(position.getCodeListReference().getCodes()));
} else {
logger.log(Level.WARNING, "{0}${1}/{2}: missing code list!", new Object[]{
subfield.getParent().getTag(), subfield.getCode(), position.getPositionStart()});
}
} else if (position.getValidator() != null) {
if (position.getValidator() instanceof RegexValidator)
positionMap.put("regex", ((RegexValidator)position.getValidator()).getPattern());
positionMap.put("pattern", ((RegexValidator)position.getValidator()).getPattern());
} else {
logger.log(Level.WARNING, "{0}${1}/{2}: missing code list and validation!", new Object[]{
subfield.getParent().getTag(), subfield.getCode(), position.getPositionStart()});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@

/**
* Abbreviated Title Source Codes
* http://www.loc.gov/standards/sourcelist/abbreviated-title.html
* https://www.loc.gov/standards/sourcelist/abbreviated-title.html
* used in Bibliographic records 210 $2 (Abbreviated Title / Source)
*/
public class AbbreviatedTitleSourceCodes extends CodeList {

private void initialize() {
name = "Abbreviated Title Source Codes";
url = "http://www.loc.gov/standards/sourcelist/abbreviated-title.html";
url = "https://www.loc.gov/standards/sourcelist/abbreviated-title.html";
codes = Utils.generateCodes(
"din1430", "Key Title nach DIN 1430 (Berlin: Beuth)",
"din1502", "Regeln für das Kürzen von Wörtern in Titeln und für das Kürzen der Titel von Veröffentlichungen: DIN 1502 (Berlin; Köln: Beuth)",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@

/**
* MARC Authentication Action Code List
* http://www.loc.gov/standards/valuelist/marcauthen.html
* https://www.loc.gov/standards/valuelist/marcauthen.html
*/
public class AuthenticationActionCodes extends CodeList {

private void initialize() {
name = "MARC Authentication Action Code List";
url = "http://www.loc.gov/standards/valuelist/marcauthen.html";
url = "https://www.loc.gov/standards/valuelist/marcauthen.html";
codes = Utils.generateCodes(
"anuc", "Australian National Union Catalog",
"croatica", "Croatian National Bibliography",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@

/**
* Price Type Code Source Codes
* http://www.loc.gov/standards/sourcelist/price-type.html
* https://www.loc.gov/standards/sourcelist/price-type.html
*/
public class AvailabilityStatusCodeSourceCodes extends CodeList {

private void initialize() {
name = "Price Type Code Source Codes";
url = "http://www.loc.gov/standards/sourcelist/price-type.html";
url = "https://www.loc.gov/standards/sourcelist/price-type.html";
codes = Utils.generateCodes(
"onixas", "ONIX Product Availability Codes List number 54"
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ public class B3KatIdentifiers extends CodeList {

private void initialize() {
name = "MARC Organization Codes";
url = "http://www.loc.gov/marc/organizations/orgshome.html";
url = "https://www.loc.gov/marc/organizations/orgshome.html";
codes = EncodedValueFileReader.fileToCodeList("marc/b3kat.isil.csv");
indexCodes();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@

/**
* Cartographic Data Source Codes
* https://loc.gov/standards/sourcelist/cartographic-data.html
* https://www.loc.gov/standards/sourcelist/cartographic-data.html
*/
public class CartographicDataSourceCodes extends CodeList {

private void initialize() {
name = "Cartographic Data Source Codes";
url = "https://loc.gov/standards/sourcelist/cartographic-data.html";
url = "https://www.loc.gov/standards/sourcelist/cartographic-data.html";
codes = Utils.generateCodes(
"aadcg", "Australian Antarctic Data Centre Antarctic Gazetteer (Australian Antarctic Data Centre)",
"acgms", "Atlas of Canada Gazetteer Map Service (Natural Resources Canada)",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

/**
* Classification Scheme Source Codes
* http://www.loc.gov/standards/sourcelist/classification.html
* https://www.loc.gov/standards/sourcelist/classification.html
* used in Bibliographic records
* 052 $2 (Geographic Classification / Source)
* 055 $2 (Classification Numbers Assigned in Canada / Source of call/class number)
Expand All @@ -16,7 +16,7 @@ public class ClassificationSchemeSourceCodes extends CodeList {

private void initialize() {
name = "Classification Scheme Source Codes";
url = "http://www.loc.gov/standards/sourcelist/classification.html";
url = "https://www.loc.gov/standards/sourcelist/classification.html";
codes = Utils.generateCodes(
"accs", "Annehurst curriculum classification system (West Lafayette, IN: Kappa Delta Pi)",
"acmccs", "ACM Computing Classification System [2008 Version]",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@

/**
* Copyright and Legal Deposit Number Source Codes
* http://www.loc.gov/standards/sourcelist/copyright-legal-deposit.html
* https://www.loc.gov/standards/sourcelist/copyright-legal-deposit.html
* used in Bibliographic records 017 $2 (Copyright or Legal Deposit Number / Source)
*/
public class CopyrightAndLegalDepositNumberSourceCodes extends CodeList {

private void initialize() {
name = "Copyright and Legal Deposit Number Source Codes";
url = "http://www.loc.gov/standards/sourcelist/copyright-legal-deposit.html";
url = "https://www.loc.gov/standards/sourcelist/copyright-legal-deposit.html";
codes = Utils.generateCodes(
"rocgpt", "R.O.C. Government Publications Catalogue (Taipei: Research, Development and Evaluation Commission, Executive Yuan)"
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@

/**
* MARC Code List for Countries
* http://www.loc.gov/marc/countries/countries_code.html
* https://www.loc.gov/marc/countries/countries_code.html
*/
public class CountryCodes extends CodeList {

private void initialize() {
name = "MARC Code List for Countries";
url = "http://www.loc.gov/marc/countries/countries_code.html";
url = "https://www.loc.gov/marc/countries/countries_code.html";
codes = Utils.generateCodes(
"aa", "Albania",
"abc", "Alberta",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@

/**
* Country Code and Term Source Codes
* http://www.loc.gov/standards/sourcelist/country.html
* https://www.loc.gov/standards/sourcelist/country.html
*/
public class CountrySourceCodes extends CodeList {

private void initialize() {
name = "Country Code and Term Source Codes";
url = "http://www.loc.gov/standards/sourcelist/country.html";
url = "https://www.loc.gov/standards/sourcelist/country.html";
codes = Utils.generateCodes(
"marccountry", "MARC Code List for Countries (Washington, DC: Library of Congress)",
"iso3166", "Codes for the representation of names of countries and their subdivisions-Part 1, Country codes (Geneva: International Organization for Standardization)",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

/**
* Curriculum Objective Term and Code Source Codes
* http://www.loc.gov/standards/sourcelist/curriculum-objective.html
* https://www.loc.gov/standards/sourcelist/curriculum-objective.html
* used in
* Bibliographic records 658$2 (Index Term - Curriculum Objective / Source of term or code)
* Community Information records 658$2 (Index Term - Curriculum Objective / Source of term or code)
Expand All @@ -13,7 +13,7 @@ public class CurriculumObjectiveTermAndCodeSourceCodes extends CodeList {

private void initialize() {
name = "Curriculum Objective Term and Code Source Codes";
url = "http://www.loc.gov/standards/sourcelist/curriculum-objective.html";
url = "https://www.loc.gov/standards/sourcelist/curriculum-objective.html";
codes = Utils.generateCodes(
"abledata", "ABLEDATA thesaurus (Silver Spring, MD: National Rehabilitation Information Center)",
"acccp", "Australian Cross-curriculum Priorities (Australian Curriculum, Assessment and Reporting Authority (ACARA))",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@

/**
* Date and Time Scheme Source Codes
* http://www.loc.gov/standards/sourcelist/date-time.html
* https://www.loc.gov/standards/sourcelist/date-time.html
*/
public class DateSourceCodes extends CodeList {

private void initialize() {
name = "Date and Time Scheme Source Codes";
url = "http://www.loc.gov/standards/sourcelist/date-time.html";
url = "https://www.loc.gov/standards/sourcelist/date-time.html";
codes = Utils.generateCodes(
"edtf", "Extended Date/Time Format",
"iso8601", "Code identifies formatted dates allowed in ISO 8601 which use the alternative described as \"basic\" (i.e., with minimum number of separators) rather than \"extended\" (i.e., with separators). This alternative specified in the standard uses the following date patterns: YYYY; YYYY-MM if only year and month given; YYYYMMDD if year, month, and day are included (hours, minutes, seconds may also be added: Thhmmss.s). It is also used for other encodings specified in ISO 8601, e.g., date ranges, which are in the form of <date/time>/<date/time>.",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@

/**
* Electronic Access Methods Code List
* http://www.loc.gov/standards/valuelist/electronaccess.html
* https://www.loc.gov/standards/valuelist/electronaccess.html
*/
public class ElectronicAccessMethodsCodeList extends CodeList {

private void initialize() {
name = "Electronic Access Methods Code List";
url = "http://www.loc.gov/standards/valuelist/electronaccess.html";
url = "https://www.loc.gov/standards/valuelist/electronaccess.html";
codes = Utils.generateCodes(
"acap", "Application configuration access protocol",
"afs", "Andrew File System global file names",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,15 @@

/**
* Fingerprint Scheme Source Codes
* http://www.loc.gov/standards/sourcelist/fingerprint.html
* https://www.loc.gov/standards/sourcelist/fingerprint.html
* used in
* Bibliographic records 026 $2 (Fingerprint Identifier / Source)
*/
public class FingerprintSchemeSourceCodes extends CodeList {

private void initialize() {
name = "Fingerprint Scheme Source Codes";
url = "http://www.loc.gov/standards/sourcelist/fingerprint.html";
url = "https://www.loc.gov/standards/sourcelist/fingerprint.html";
codes = Utils.generateCodes(
"fei", "Fingerprints = Empreintes = Impronte (Paris: Institut de recherche et d'histoire des textes)",
"stcnf", "Vriesma, P.C.A. The STCN [Short title catalogue Netherlands] fingerprint (in Studies in bibliography, v. 39, 1986, p. 93-100) (s'-Gravenhage: Koninklijke Bibliotheek)"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@

/**
* Format Source Codes
* http://www.loc.gov/standards/sourcelist/format.html
* https://www.loc.gov/standards/sourcelist/format.html
*/
public class FormatSourceCodes extends CodeList {

private void initialize() {
name = "Format Source Codes";
url = "http://www.loc.gov/standards/sourcelist/format.html";
url = "https://www.loc.gov/standards/sourcelist/format.html";
codes = Utils.generateCodes(
"annamarc", "ANNAMARC: specifiche relative ai nastri magnetici contententi i record della Biblioteca Nazionale Italiana nel formato ANNAMARC ([Roma]: Istituto Centrale per il Catalogo Unico delle Biblioteche Italiane e per le Informazioni Bibliografiche)",
"ausmarc", "AUSMARC bibliographic format (Canberra: National Library of Australia",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@

/**
* Function Term Source Codes
* http://www.loc.gov/standards/sourcelist/function.html
* https://www.loc.gov/standards/sourcelist/function.html
*/
public class FunctionTermSourceCodes extends CodeList {

private void initialize() {
name = "Function Term Source Codes";
url = "http://www.loc.gov/standards/sourcelist/function.html";
url = "https://www.loc.gov/standards/sourcelist/function.html";
codes = Utils.generateCodes(
"dot", "Dictionary of occupational titles (Washington: United States Dept. of Labor, Employment and Training Administration, United States Employment Service)"
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@

/**
* Genre/Form Code and Term Source Codes.
* http://www.loc.gov/standards/sourcelist/genre-form.html
* https://www.loc.gov/standards/sourcelist/genre-form.html
*/
public class GenreFormCodeAndTermSourceCodes extends CodeList {

private void initialize() {
name = "Genre/Form Code and Term Source Codes";
url = "http://www.loc.gov/standards/sourcelist/genre-form.html";
url = "https://www.loc.gov/standards/sourcelist/genre-form.html";
codes = Utils.generateCodes(
"alett", "An alphabetical list of English text types (Berlin; New York: Mouton de Gruyter)",
"amg", "Audiovisual material glossary (Dublin, OH: Online Computer Library Center, Inc.)",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,14 @@

/**
* MARC Code List for Geographic Areas
* http://www.loc.gov/marc/geoareas/
* http://www.loc.gov/marc/geoareas/gacs_code.html
* https://www.loc.gov/marc/geoareas/
* https://www.loc.gov/marc/geoareas/gacs_code.html
*/
public class GeographicAreaCodes extends CodeList {

private void initialize() {
name = "MARC Code List for Geographic Areas";
url = "http://www.loc.gov/marc/geoareas/";
url = "https://www.loc.gov/marc/geoareas/";
codes = Utils.generateCodes(
"a", "Asia",
"a-af", "Afghanistan",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,13 @@

/**
* Geographic Area Code and Term Source Codes
* http://www.loc.gov/standards/sourcelist/geographic-area.html
* https://www.loc.gov/standards/sourcelist/geographic-area.html
*/
public class GeographicAreaSourceCodes extends CodeList {

private void initialize() {
name = "Geographic Area Code and Term Source Codes";
url = "http://www.loc.gov/standards/sourcelist/geographic-area.html";
url = "https://www.loc.gov/standards/sourcelist/geographic-area.html";
codes = Utils.generateCodes(
"ccga", "Cadre de classement geographique actuel (Paris: Bibliothèque Nationale)",
"marcgac", "MARC Code List for Geographic Areas"
Expand Down
Loading

0 comments on commit f59ed59

Please sign in to comment.