-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
PICA: Extend classification/subject headings schemes from config file #…
- Loading branch information
Showing
11 changed files
with
211 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
51 changes: 51 additions & 0 deletions
51
src/main/java/de/gwdg/metadataqa/marc/utils/pica/PicaSubjectManager.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
package de.gwdg.metadataqa.marc.utils.pica; | ||
|
||
import de.gwdg.metadataqa.marc.analysis.FieldWithScheme; | ||
|
||
import java.io.BufferedReader; | ||
import java.io.FileReader; | ||
import java.io.IOException; | ||
import java.nio.file.Paths; | ||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
public class PicaSubjectManager { | ||
private static List<FieldWithScheme> fields; | ||
private static List<String> tags; | ||
private static final String schemaFile = Paths.get("src/main/resources/pica/k10plus-subjects.tsv").toAbsolutePath().toString(); | ||
|
||
public static List<FieldWithScheme> readFieldsWithScheme() { | ||
if (fields == null) | ||
read(); | ||
return fields; | ||
} | ||
|
||
public static List<String> getTags() { | ||
if (tags == null) | ||
read(); | ||
return tags; | ||
} | ||
|
||
public static String getSchemaFile() { | ||
return schemaFile; | ||
} | ||
|
||
private static void read() { | ||
fields = new ArrayList<>(); | ||
tags = new ArrayList<>(); | ||
try (BufferedReader br = new BufferedReader(new FileReader(schemaFile))) { | ||
String line; | ||
while ((line = br.readLine()) != null) { | ||
String[] parts = line.split("\\t"); | ||
String tag = parts[0]; | ||
if (!parts[1].equals("")) | ||
tag += "/" + parts[1]; | ||
tags.add(tag); | ||
fields.add(new FieldWithScheme(tag, parts[2])); | ||
} | ||
} catch (IOException e) { | ||
e.getLocalizedMessage(); | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
041A 00-99 Schlagwortfolgen (DNB und Verbünde) | ||
044A LoC Subject Headings | ||
044C Medical Subject Headings (MeSH) | ||
044H Erschließung von Musikalien nach Besetzung und Form/Gattung | ||
044K 00-09 Schlagwortfolgen (GBV, SWB, K10plus) | ||
044L 00-09 Einzelschlagwörter (Projekte) | ||
044N Schlagwörter aus einem Thesaurus und freie Schlagwörter | ||
044S Gattungsbegriffe bei Alten Drucken | ||
044Z 00-99 Lokale Schlagwörter auf bibliografischer Ebene | ||
045A LCC-Notation | ||
045B 00 Allgemeine Systematik für Bibliotheken (ASB) | ||
045B 01 Systematik der Stadtbibliothek Duisburg (SSD) | ||
045B 02 Systematik für Bibliotheken (SfB) | ||
045B 03 Klassifikation für Allgemeinbibliotheken (KAB) | ||
045B 04 Systematiken der ekz | ||
045B 05 Gattungsbegriffe (DNB) | ||
045C Klassifikation der National Library of Medicine (NLM) | ||
045D 00-29 STW-Schlagwörter | ||
045D 30-39 STW-Schlagwörter - automatisierte verbale Sacherschließung | ||
045D 40-48 STW-Schlagwörter - Platzhalter | ||
045D 49 ZBW-Schlagwörter - Veröffentlichungsart | ||
045D 50 Vorläufige Schlagwörter (STW) | ||
045D 60 FIV-Schlagwörter (Themen) | ||
045D 70 FIV-Schlagwörter (Aspekte) | ||
045E Sachgruppen der Deutschen Nationalbibliografie bis 2003 | ||
045F DDC-Notation | ||
045G Sachgruppen der Deutschen Nationalbibliografie ab 2004 | ||
045H 00-99 DDC-Notation: Vollständige Notation | ||
045M 00-99 Lokale Notationen auf bibliografischer Ebene | ||
045N FIV-Regionalklassifikation | ||
045N 01 FIV-Sachklassifikation | ||
045N 02 Sonstige Notation des FIV | ||
045Q 01 Basisklassifikation | ||
045R Regensburger Verbundklassifikation (RVK) | ||
045S Deutsche Bibliotheksstatistik (DBS) | ||
045T Nicht mehr gültige Notationen der Regensburger Verbundklassifikation (RVK) | ||
045V SSG-Nummer/FID-Kennzeichen | ||
045W SSG-Angabe für thematische OLC-Ausschnitte | ||
045X Notation eines Klassifikationssystems | ||
045Y SSG-Angabe für Fachkataloge |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
#!/usr/bin/env bash | ||
# | ||
#-------------------------------- | ||
# retrieve subjects from K10plus | ||
#-------------------------------- | ||
|
||
curl -s https://format.k10plus.de/avram.pl?profile=k10plus-title \ | ||
| jq -r '.fields[] | select(.tag | match("04[45]|041A")) | [.tag, .occurrence, .label] | @tsv' -r \ | ||
> k10plus-subjects.tsv | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
20 changes: 20 additions & 0 deletions
20
src/test/java/de/gwdg/metadataqa/marc/utils/pica/PicaSubjectManagerTest.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
package de.gwdg.metadataqa.marc.utils.pica; | ||
|
||
import de.gwdg.metadataqa.marc.analysis.FieldWithScheme; | ||
import org.junit.Test; | ||
|
||
import java.util.List; | ||
|
||
import static org.junit.Assert.*; | ||
|
||
public class PicaSubjectManagerTest { | ||
|
||
@Test | ||
public void readFieldsWithScheme() { | ||
List<FieldWithScheme> fields = PicaSubjectManager.readFieldsWithScheme(); | ||
assertEquals(40, fields.size()); | ||
assertEquals(FieldWithScheme.class, fields.get(0).getClass()); | ||
assertEquals("041A/00-99", fields.get(0).getTag()); | ||
assertEquals("Schlagwortfolgen (DNB und Verbünde)", fields.get(0).getSchemaName()); | ||
} | ||
} |