Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/master' into 527-output-valid-le…
Browse files Browse the repository at this point in the history
…ader
  • Loading branch information
TobiasNx committed Nov 19, 2024
2 parents a0f2736 + 29b9759 commit b39ac98
Show file tree
Hide file tree
Showing 23 changed files with 342 additions and 79 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
![Metafacture](https://raw.github.com/wiki/metafacture/metafacture-core/img/metafacture.png)

[![Build](https://github.com/metafacture/metafacture-core/workflows/Build/badge.svg?branch=master)](https://github.com/metafacture/metafacture-core/actions?query=workflow%3ABuild)
[![Build](https://github.com/metafacture/metafacture-core/actions/workflows/build.yml/badge.svg?branch=master)](https://github.com/metafacture/metafacture-core/actions?query=workflow%3ABuild)

Metafacture is a toolkit for processing semi-structured data with a focus on library metadata. It provides a versatile set of tools for reading, writing and transforming data. Metafacture can be used as a stand-alone application or as a Java library in other applications. The name Metafacture is a portmanteau of the words *meta*data and manu*facture*.

Expand Down
2 changes: 1 addition & 1 deletion build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ subprojects {
versions = [
'assertj_core': '3.11.1',
'commons_compress': '1.21',
'guava': '29.0-jre',
'guava': '32.0.1-jre',
'jackson_databind': '2.15.1',
'jdk': '11',
'junit': '4.12',
Expand Down
Binary file modified gradle/wrapper/gradle-wrapper.jar
Binary file not shown.
4 changes: 2 additions & 2 deletions gradle/wrapper/gradle-wrapper.properties
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionSha256Sum=a4b4158601f8636cdeeab09bd76afb640030bb5b144aafe261a5e8af027dc612
distributionUrl=https\://services.gradle.org/distributions/gradle-8.8-bin.zip
distributionSha256Sum=57dafb5c2622c6cc08b993c85b7c06956a2f53536432a30ead46166dbca0f1e9
distributionUrl=https\://services.gradle.org/distributions/gradle-8.11-bin.zip
networkTimeout=10000
validateDistributionUrl=true
zipStoreBase=GRADLE_USER_HOME
Expand Down
7 changes: 5 additions & 2 deletions gradlew
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
#

##############################################################################
#
Expand Down Expand Up @@ -55,7 +57,7 @@
# Darwin, MinGW, and NonStop.
#
# (3) This script is generated from the Groovy template
# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
# within the Gradle project.
#
# You can find Gradle at https://github.com/gradle/gradle/.
Expand Down Expand Up @@ -84,7 +86,8 @@ done
# shellcheck disable=SC2034
APP_BASE_NAME=${0##*/}
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit
APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s
' "$PWD" ) || exit

# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD=maximum
Expand Down
2 changes: 2 additions & 0 deletions gradlew.bat
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem
@rem SPDX-License-Identifier: Apache-2.0
@rem

@if "%DEBUG%"=="" @echo off
@rem ##########################################################################
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ public final class Marc21Encoder extends
private State state = State.IN_STREAM;

private boolean generateIdField;
private boolean validateLeader = true;

/**
* Initializes the encoder with MARC 21 constants and charset.
Expand Down Expand Up @@ -108,6 +109,18 @@ public void setGenerateIdField(final boolean generateIdField) {
this.generateIdField = generateIdField;
}

/**
* Controls whether the leader should be validated.
* <p>
* The default value of {@code validateLeader} is true.
* <p>
*
* @param validateLeader if false the leader is not validated
*/
public void setValidateLeader(final boolean validateLeader) {
this.validateLeader = validateLeader;
}

/**
* Gets the flag to decide whether the ID field is generated.
*
Expand Down Expand Up @@ -259,12 +272,14 @@ private void processLeaderAsSubfields(final String name, final char code) {
}

private void requireValidCode(final char code, final char[] validCodes) {
for (final char validCode: validCodes) {
if (validCode == code) {
return;
if (validateLeader) {
for (final char validCode : validCodes) {
if (validCode == code) {
return;
}
}
throw new FormatException("invalid code '" + code + "'; allowed codes are: " + Arrays.toString(validCodes));
}
throw new FormatException("invalid code '" + code + "'; allowed codes are: " + Arrays.toString(validCodes));
}

private void processTopLevelLiteral(final String name, final String value) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,6 @@ public final class MarcXmlEncoder extends DefaultStreamPipe<ObjectReceiver<Strin
public static final boolean OMIT_XML_DECLARATION = false;
public static final boolean ENSURE_CORRECT_MARC21_XML = false;

private static final String ROOT_OPEN = "<marc:collection xmlns:marc=\"http://www.loc.gov/MARC21/slim\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"http://www.loc.gov/MARC21/slim http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd\">";
private static final String ROOT_CLOSE = "</marc:collection>";

private enum Tag {

collection(" xmlns%s=\"" + NAMESPACE + "\"%s"),
Expand Down Expand Up @@ -106,7 +103,6 @@ public String close(final Object[] args) {
private static final int TAG_END = 3;

private final Encoder encoder = new Encoder();
private final Marc21Decoder decoder = new Marc21Decoder();
private final Marc21Encoder wrapper = new Marc21Encoder();

private DefaultStreamPipe<ObjectReceiver<String>> pipe;
Expand All @@ -115,6 +111,7 @@ public String close(final Object[] args) {
* Creates an instance of {@link MarcXmlEncoder}.
*/
public MarcXmlEncoder() {
final Marc21Decoder decoder = new Marc21Decoder();
decoder.setEmitLeaderAsWhole(true);

wrapper
Expand All @@ -136,7 +133,6 @@ public void setEmitNamespace(final boolean emitNamespace) {

/**
* Sets the flag to decide whether to omit the XML declaration.
*
* <strong>Default value: {@value #OMIT_XML_DECLARATION}</strong>
*
* @param currentOmitXmlDeclaration true if the XML declaration is omitted, otherwise
Expand All @@ -148,7 +144,6 @@ public void omitXmlDeclaration(final boolean currentOmitXmlDeclaration) {

/**
* Sets the XML version.
*
* <strong>Default value: {@value #XML_VERSION}</strong>
*
* @param xmlVersion the XML version
Expand All @@ -159,7 +154,6 @@ public void setXmlVersion(final String xmlVersion) {

/**
* Sets the XML encoding.
*
* <strong>Default value: {@value #XML_ENCODING}</strong>
*
* @param xmlEncoding the XML encoding
Expand All @@ -173,7 +167,6 @@ public void setXmlEncoding(final String xmlEncoding) {
* If true, the input data is validated to ensure correct MARC21. Also the leader may be generated.
* It acts as a wrapper: the input is piped to {@link org.metafacture.biblio.marc21.Marc21Encoder}, whose output is piped to {@link org.metafacture.biblio.marc21.Marc21Decoder}, whose output is piped to {@link org.metafacture.biblio.marc21.MarcXmlEncoder}.
* This validation and treatment of the leader is more safe but comes with a performance impact.
*
* <strong>Default value: {@value #ENSURE_CORRECT_MARC21_XML}</strong>
*
* @param ensureCorrectMarc21Xml if true the input data is validated to ensure correct MARC21. Also the leader may be generated.
Expand All @@ -184,7 +177,6 @@ public void setEnsureCorrectMarc21Xml(final boolean ensureCorrectMarc21Xml) {

/**
* Formats the resulting xml by indentation. Aka "pretty printing".
*
* <strong>Default value: {@value #PRETTY_PRINTED}</strong>
*
* @param formatted true if formatting is activated, otherwise false
Expand Down Expand Up @@ -220,7 +212,7 @@ public void literal(final String name, final String value) {

@Override
protected void onResetStream() {
pipe.resetStream();
encoder.onResetStream();
}

@Override
Expand All @@ -247,11 +239,12 @@ private static class Encoder extends DefaultStreamPipe<ObjectReceiver<String>> {
private String currentEntity = "";

private boolean emitNamespace = true;
private Object[] namespacePrefix = new Object[]{emitNamespace ? NAMESPACE_PREFIX : EMPTY};
private Object[] namespacePrefix = new Object[]{NAMESPACE_PREFIX};

private int indentationLevel;
private boolean formatted = PRETTY_PRINTED;
private int recordAttributeOffset;
private int recordLeaderOffset;

private Encoder() {
}
Expand Down Expand Up @@ -294,7 +287,7 @@ public void startRecord(final String identifier) {
writeTag(Tag.record::open);
recordAttributeOffset = builder.length() - 1;
prettyPrintNewLine();

recordLeaderOffset = builder.length();
incrementIndentationLevel();
}

Expand Down Expand Up @@ -345,6 +338,7 @@ public void literal(final String name, final String value) {
if (name.equals(Marc21EventNames.MARCXML_TYPE_LITERAL)) {
if (value != null) {
builder.insert(recordAttributeOffset, String.format(ATTRIBUTE_TEMPLATE, name, value));
recordLeaderOffset = builder.length();
}
}
else if (!appendLeader(name, value)) {
Expand All @@ -353,7 +347,7 @@ else if (!appendLeader(name, value)) {
if (value != null) {
writeEscaped(value.trim());
}
writeTag(Tag.controlfield::close);
writeTag(Tag.controlfield::close, false);
prettyPrintNewLine();
}
}
Expand All @@ -378,7 +372,9 @@ protected void onResetStream() {

@Override
protected void onCloseStream() {
writeFooter();
if (!atStreamStart) {
writeFooter();
}
sendAndClearData();
}

Expand Down Expand Up @@ -408,9 +404,20 @@ private void writeFooter() {
* @param str the unescaped sequence to be written
*/
private void writeRaw(final String str) {

builder.append(str);
}

/**
* Writes the unescaped sequence to the leader position.
*
* @param str the unescaped sequence to be written to the leader position
*/
private void writeRawLeader(final String str) {
builder.insert(recordLeaderOffset, str);
recordLeaderOffset = recordLeaderOffset + str.length();
}

private boolean appendLeader(final String name, final String value) {
if (name.equals(Marc21EventNames.LEADER_ENTITY)) {
leaderBuilder.append(value);
Expand All @@ -432,12 +439,18 @@ private void writeEscaped(final String str) {

private void writeLeader() {
final String leader = leaderBuilder.toString();
if (!leader.isEmpty()) {
prettyPrintIndentation();
writeTag(Tag.leader::open);
writeRaw("0000" + leader.substring(0, 4) + "2200000" + leader.substring(5, 7) + "4500"); // creates a valid leader without counted elements
writeTag(Tag.leader::close);
prettyPrintNewLine();
if (leaderBuilder.length() > 0) {
if (formatted) {
writeRawLeader(getIndentationPrefix());
}

writeTagLeader(Tag.leader::open);
writeRawLeader("0000" + leader.substring(0, 4) + "2200000" + leader.substring(5, 7) + "4500"); // creates a valid leader without counted elements
writeTagLeader(Tag.leader::close);

if (formatted) {
writeRawLeader(NEW_LINE);
}
}
}

Expand All @@ -447,10 +460,17 @@ private void writeTag(final Function<Object[], String> function, final Object...
writeRaw(function.apply(allArgs));
}

private void writeTagLeader(final Function<Object[], String> function) {
writeRawLeader(function.apply(namespacePrefix));
}

private String getIndentationPrefix() {
return String.join("", Collections.nCopies(indentationLevel, INDENT));
}

private void prettyPrintIndentation() {
if (formatted) {
final String prefix = String.join("", Collections.nCopies(indentationLevel, INDENT));
builder.append(prefix);
builder.append(getIndentationPrefix());
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
* @author Markus Michael Geipel
*
*/
@Description("A marc xml reader")
@Description("A MARC XML reader. To read marc data without namespace specification set option `namespace=\"\"`")
@In(XmlReceiver.class)
@Out(StreamReceiver.class)
@FluxCommand("handle-marcxml")
Expand Down Expand Up @@ -63,7 +63,8 @@ public MarcXmlHandler() {
*
* <strong>Default value: {@value #NAMESPACE}</strong>
*
* @param namespace the namespace
* @param namespace the namespace. Set to null if namespace shouldn't be checked. Set to empty string
* if the namespace is missing in the data.
*/
public void setNamespace(final String namespace) {
this.namespace = namespace;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
*/
public final class Marc21EncoderTest {

private static final String BAD_LEADER = "00600ny a22002053n 4500";

private Marc21Encoder marc21Encoder;

@Mock
Expand Down Expand Up @@ -147,4 +149,21 @@ public void issue524ShouldComputeValidLeader() {
verify(receiver).process(matches("00055pam a2200037 c 4500021001700000\u001e.*\u001d"));
}

@Test(expected = FormatException.class)
public void issue567ShouldFailValidateLeaderAsDefault() {
marc21Encoder.startRecord("");
marc21Encoder.literal(LEADER_ENTITY, BAD_LEADER);
marc21Encoder.endRecord();
}

@Test
public void issue567ShouldNotValidateLeader() {
marc21Encoder.setValidateLeader(false);
marc21Encoder.startRecord("");
marc21Encoder.literal(LEADER_ENTITY, BAD_LEADER );
marc21Encoder.endRecord();

verify(receiver).process(matches("00026ny a22000253n 4500\u001e\u001d"));
}

}
Loading

0 comments on commit b39ac98

Please sign in to comment.