Skip to content

Commit

Permalink
Fix #504
Browse files Browse the repository at this point in the history
  • Loading branch information
hmiguim committed Apr 28, 2022
1 parent 6605b4b commit cd4a217
Show file tree
Hide file tree
Showing 7 changed files with 86 additions and 38 deletions.
2 changes: 1 addition & 1 deletion dbptk-bindings/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
<parent>
<groupId>com.databasepreservation</groupId>
<artifactId>dbptk</artifactId>
<version>2.9.11-SNAPSHOT</version>
<version>2.10.0-SNAPSHOT</version>
<relativePath>..</relativePath>
</parent>
<packaging>pom</packaging>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,7 @@ public void setup() throws IOException, InterruptedException, URISyntaxException
new String[] {"migrate", "--import=mysql", "--import-hostname=127.0.0.1", "--import-database", db_source,
"--import-username", db_tmp_username, "--import-password", db_tmp_password, "--export=siard-2",
"--export-compress", "--export-file", Roundtrip.TMP_FILE_SIARD_VAR, "--export-pretty-xml",
"--export-external-lobs"},
"--export-external-lobs", "--export-external-lobs-blob-threshold-limit=0", "--export-external-lobs-clob-threshold-limit=0"},

new String[] {"migrate", "--import=siard-2", "--import-file", Roundtrip.TMP_FILE_SIARD_VAR, "--export=mysql",
"--export-hostname=127.0.0.1", "--export-database", db_target, "--export-username", db_tmp_username,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ public void setup() throws IOException, InterruptedException, URISyntaxException
new String[] {"migrate", "--import=postgresql", "--import-hostname=127.0.0.1", "--import-database", db_source,
"--import-username", db_tmp_username, "--import-password", db_tmp_password, "--import-disable-encryption",
"--export=siard-2", "--export-file", Roundtrip.TMP_FILE_SIARD_VAR, "--export-pretty-xml",
"--export-external-lobs"},
"--export-external-lobs","--export-external-lobs-blob-threshold-limit=0", "--export-external-lobs-clob-threshold-limit=0"},

new String[] {"migrate", "--import=siard-2", "--import-file", Roundtrip.TMP_FILE_SIARD_VAR, "--export=postgresql",
"--export-hostname=127.0.0.1", "--export-database", db_target, "--export-username", db_tmp_username,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,8 @@ public class SIARD2ModuleFactory implements DatabaseModuleFactory {
public static final String PARAMETER_EXTERNAL_LOBS = "external-lobs";
public static final String PARAMETER_EXTERNAL_LOBS_PER_FOLDER = "external-lobs-per-folder";
public static final String PARAMETER_EXTERNAL_LOBS_FOLDER_SIZE = "external-lobs-folder-size";
public static final String PARAMETER_EXTERNAL_LOBS_BLOB_THRESHOLD_LIMIT = "external-lobs-blob-threshold-limit";
public static final String PARAMETER_EXTERNAL_LOBS_CLOB_THRESHOLD_LIMIT = "external-lobs-clob-threshold-limit";
public static final String PARAMETER_META_DESCRIPTION = "meta-description";
public static final String PARAMETER_META_ARCHIVER = "meta-archiver";
public static final String PARAMETER_META_ARCHIVER_CONTACT = "meta-archiver-contact";
Expand Down Expand Up @@ -81,7 +83,7 @@ public class SIARD2ModuleFactory implements DatabaseModuleFactory {
.valueIfSet("true");

private static final Parameter externalLobs = new Parameter().shortName("el").longName(PARAMETER_EXTERNAL_LOBS)
.description("Saves any LOBs outside the siard file.").required(false).hasArgument(false).valueIfSet("true")
.description("Saves any LOBs outside the SIARD file.").required(false).hasArgument(false).valueIfSet("true")
.valueIfNotSet("false");

private static final Parameter externalLobsPerFolder = new Parameter().shortName("elpf")
Expand All @@ -95,6 +97,18 @@ public class SIARD2ModuleFactory implements DatabaseModuleFactory {
"Divide LOBs across multiple external folders with (approximately) the specified maximum size (in Megabytes). Default: do not divide.")
.required(false).hasArgument(true).setOptionalArgument(false).valueIfNotSet("0");

private static final Parameter externalLobsBLOBThresholdLimit = new Parameter().shortName("elblobtl")
.longName(PARAMETER_EXTERNAL_LOBS_BLOB_THRESHOLD_LIMIT)
.description(
"Keep BLOBs stored inside the SIARD file if the threshold is not exceeded (in bytes). Default: 2000 bytes.")
.required(false).hasArgument(true).setOptionalArgument(false).valueIfNotSet("2000");

private static final Parameter externalLobsCLOBThresholdLimit = new Parameter().shortName("elclobtl")
.longName(PARAMETER_EXTERNAL_LOBS_CLOB_THRESHOLD_LIMIT)
.description(
"Keep CLOBs stored inside the SIARD file if the threshold is not exceeded (in bytes). Default: 4000 bytes.")
.required(false).hasArgument(true).setOptionalArgument(false).valueIfNotSet("4000");

private static final Parameter metaDescription = new Parameter().shortName("md").longName(PARAMETER_META_DESCRIPTION)
.description("SIARD descriptive metadata field: Description of database meaning and content as a whole.")
.required(false).hasArgument(true).setOptionalArgument(true).valueIfNotSet(UNSPECIFIED_METADATA_VALUE);
Expand Down Expand Up @@ -176,6 +190,8 @@ public Map<String, Parameter> getAllParameters() {
parameterHashMap.put(externalLobs.longName(), externalLobs);
parameterHashMap.put(externalLobsPerFolder.longName(), externalLobsPerFolder);
parameterHashMap.put(externalLobsFolderSize.longName(), externalLobsFolderSize);
parameterHashMap.put(externalLobsBLOBThresholdLimit.longName(), externalLobsBLOBThresholdLimit);
parameterHashMap.put(externalLobsCLOBThresholdLimit.longName(), externalLobsCLOBThresholdLimit);
parameterHashMap.put(metaDescription.longName(), metaDescription);
parameterHashMap.put(metaArchiver.longName(), metaArchiver);
parameterHashMap.put(metaArchiverContact.longName(), metaArchiverContact);
Expand Down Expand Up @@ -210,15 +226,17 @@ public Parameters getExportModuleParameters() {
externalLobs.inputType(INPUT_TYPE.CHECKBOX).exportOptions(CATEGORY_TYPE.EXTERNAL_LOBS),
externalLobsPerFolder.inputType(INPUT_TYPE.TEXT).exportOptions(CATEGORY_TYPE.EXTERNAL_LOBS),
externalLobsFolderSize.inputType(INPUT_TYPE.TEXT).exportOptions(CATEGORY_TYPE.EXTERNAL_LOBS),
externalLobsBLOBThresholdLimit.inputType(INPUT_TYPE.TEXT).exportOptions(CATEGORY_TYPE.EXTERNAL_LOBS),
externalLobsCLOBThresholdLimit.inputType(INPUT_TYPE.TEXT).exportOptions(CATEGORY_TYPE.EXTERNAL_LOBS),
metaDescription.inputType(INPUT_TYPE.TEXT).exportOptions(CATEGORY_TYPE.METADATA_EXPORT_OPTIONS),
metaArchiver.inputType(INPUT_TYPE.TEXT).exportOptions(CATEGORY_TYPE.METADATA_EXPORT_OPTIONS),
metaArchiverContact.inputType(INPUT_TYPE.TEXT).exportOptions(CATEGORY_TYPE.METADATA_EXPORT_OPTIONS),
metaDataOwner.inputType(INPUT_TYPE.TEXT).exportOptions(CATEGORY_TYPE.METADATA_EXPORT_OPTIONS),
metaDataOriginTimespan.inputType(INPUT_TYPE.TEXT).exportOptions(CATEGORY_TYPE.METADATA_EXPORT_OPTIONS),
metaClientMachine.inputType(INPUT_TYPE.TEXT).exportOptions(CATEGORY_TYPE.METADATA_EXPORT_OPTIONS),
gmlDirectory.inputType(INPUT_TYPE.DEFAULT),
messageDigestAlgorithm.inputType(INPUT_TYPE.COMBOBOX).possibleValues("MD5", "SHA-1", "SHA-256").defaultSelectedIndex(2)
.exportOptions(CATEGORY_TYPE.SIARD_EXPORT_OPTIONS),
messageDigestAlgorithm.inputType(INPUT_TYPE.COMBOBOX).possibleValues("MD5", "SHA-1", "SHA-256")
.defaultSelectedIndex(2).exportOptions(CATEGORY_TYPE.SIARD_EXPORT_OPTIONS),
fontCase.inputType(INPUT_TYPE.COMBOBOX).possibleValues("uppercase", "lowercase").defaultSelectedIndex(1)
.exportOptions(CATEGORY_TYPE.SIARD_EXPORT_OPTIONS)),
Collections.emptyList());
Expand Down Expand Up @@ -292,6 +310,24 @@ public DatabaseFilterModule buildExportModule(Map<Parameter, String> parameters,
}
}

// optional
long pExternalLobsBLOBThresholdLimit = Long.parseLong(externalLobsBLOBThresholdLimit.valueIfNotSet());
if (StringUtils.isNotBlank(parameters.get(externalLobsBLOBThresholdLimit))) {
pExternalLobsBLOBThresholdLimit = Long.parseLong(parameters.get(externalLobsBLOBThresholdLimit));
if (pExternalLobsBLOBThresholdLimit < 0) {
pExternalLobsBLOBThresholdLimit = Long.parseLong(externalLobsBLOBThresholdLimit.valueIfNotSet());
}
}

// optional
long pExternalLobsCLOBThresholdLimit = Long.parseLong(externalLobsCLOBThresholdLimit.valueIfNotSet());
if (StringUtils.isNotBlank(parameters.get(externalLobsCLOBThresholdLimit))) {
pExternalLobsCLOBThresholdLimit = Long.parseLong(parameters.get(externalLobsCLOBThresholdLimit));
if (pExternalLobsCLOBThresholdLimit < 0) {
pExternalLobsCLOBThresholdLimit = Long.parseLong(externalLobsCLOBThresholdLimit.valueIfNotSet());
}
}

// optional
Path pGMLDirectory = null;
if (StringUtils.isNotBlank(parameters.get(gmlDirectory))) {
Expand Down Expand Up @@ -336,14 +372,16 @@ public DatabaseFilterModule buildExportModule(Map<Parameter, String> parameters,

report(reporter, getModuleName(), String.valueOf(pVersion), pFile, String.valueOf(pCompress),
String.valueOf(pPrettyPrintXML), String.valueOf(pExternalLobs), String.valueOf(pExternalLobsPerFolder),
String.valueOf(pExternalLobsFolderSize), pDigestAlgorithm, pFontCase);
String.valueOf(pExternalLobsFolderSize), String.valueOf(pExternalLobsBLOBThresholdLimit),
String.valueOf(pExternalLobsCLOBThresholdLimit), pDigestAlgorithm, pFontCase);

SIARD2ExportModule exportModule;
DatabaseFilterModule handler;

if (pExternalLobs) {
exportModule = new SIARD2ExportModule(pVersion, pFile, pCompress, pPrettyPrintXML, pExternalLobsPerFolder,
pExternalLobsFolderSize, descriptiveMetadataParameterValues, pDigestAlgorithm, pFontCase);
pExternalLobsFolderSize, pExternalLobsBLOBThresholdLimit, pExternalLobsCLOBThresholdLimit,
descriptiveMetadataParameterValues, pDigestAlgorithm, pFontCase);
} else {
exportModule = new SIARD2ExportModule(pVersion, pFile, pCompress, pPrettyPrintXML,
descriptiveMetadataParameterValues, pDigestAlgorithm, pFontCase);
Expand Down Expand Up @@ -373,6 +411,7 @@ private void addDescriptiveMetadataParameterValue(Map<Parameter, String> paramet
private void report(Reporter reporter, String moduleName, String parameterVersionValue, Path parameterFileValue,
String parameterCompressValue, String parameterPrettyXmlValue, String parameterExternalLobsValue,
String parameterExternalLobsPerFolderValue, String parameterExternalLobsFolderSizeValue,
String parameterExternalLobsBLOBThresholdLimit, String parameterExternalLobsCLOBThresholdLimit,
String parameterMessageDigestAlgorithmValue, String parameterFontCaseValue) {

String parameterFileValueString = null;
Expand All @@ -384,6 +423,8 @@ private void report(Reporter reporter, String moduleName, String parameterVersio
parameterFileValueString, PARAMETER_COMPRESS, parameterCompressValue, PARAMETER_PRETTY_XML,
parameterPrettyXmlValue, PARAMETER_EXTERNAL_LOBS, parameterExternalLobsValue, PARAMETER_EXTERNAL_LOBS_PER_FOLDER,
parameterExternalLobsPerFolderValue, PARAMETER_EXTERNAL_LOBS_FOLDER_SIZE, parameterExternalLobsFolderSizeValue,
parameterExternalLobsBLOBThresholdLimit, PARAMETER_EXTERNAL_LOBS_BLOB_THRESHOLD_LIMIT,
parameterExternalLobsCLOBThresholdLimit, PARAMETER_EXTERNAL_LOBS_CLOB_THRESHOLD_LIMIT,
PARAMETER_MESSAGE_DIGEST_ALGORITHM, parameterMessageDigestAlgorithmValue, PARAMETER_FONT_CASE,
parameterFontCaseValue);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,33 +52,36 @@ public class SIARD2ContentWithExternalLobsExportStrategy extends SIARD2ContentEx
private static final long MB_TO_BYTE_RATIO = 1024L * 1024L;

private static final Logger LOGGER = LoggerFactory.getLogger(SIARD2ContentWithExternalLobsExportStrategy.class);

private SIARDArchiveContainer currentExternalContainer;

// measured in Bytes
private final long maximumLobsFolderSize;
private long currentLobsFolderSize = 0;

private final int maximumLobsPerFolder;
private SIARDArchiveContainer currentExternalContainer;
private long currentLobsFolderSize = 0;
private int currentLobsInFolder = 0;
private final long blobThresholdLimit;
private final long clobThresholdLimit;

private byte[] lobDigestChecksum = null;

public SIARD2ContentWithExternalLobsExportStrategy(SIARD2ContentPathExportStrategy contentPathStrategy,
WriteStrategy writeStrategy, SIARDArchiveContainer baseContainer, boolean prettyXMLOutput,
int externalLobsPerFolder, long maximumLobsFolderSize, String messageDigestAlgorithm, String fontCase) {
int externalLobsPerFolder, long maximumLobsFolderSize, long blobThresholdLimit, long clobThresholdLimit,
String messageDigestAlgorithm, String fontCase) {
super(contentPathStrategy, writeStrategy, baseContainer, prettyXMLOutput, messageDigestAlgorithm, fontCase);
this.maximumLobsFolderSize = maximumLobsFolderSize * MB_TO_BYTE_RATIO;
this.maximumLobsPerFolder = externalLobsPerFolder;

this.blobThresholdLimit = blobThresholdLimit;
this.clobThresholdLimit = clobThresholdLimit;
this.currentExternalContainer = null;
}

@Override
protected void writeSimpleCell(String cellPrefix, Cell cell, ColumnStructure column, int columnIndex)
throws ModuleException, IOException {
if (Sql2008toXSDType.isLargeType(column.getType(), reporter)) {
writeLargeObjectData(cellPrefix, cell, columnIndex);
SimpleCell simpleCell = (SimpleCell) cell;
long length = simpleCell.getBytesSize();
if (Sql2008toXSDType.isLargeType(column.getType(), reporter) && length > clobThresholdLimit) {
writeLargeObjectDataOutside(cellPrefix, cell, columnIndex);
} else {
writeSimpleCellData(cellPrefix, (SimpleCell) cell, columnIndex);
}
Expand All @@ -88,9 +91,14 @@ protected void writeSimpleCell(String cellPrefix, Cell cell, ColumnStructure col
protected void writeBinaryCell(String cellPrefix, Cell cell, ColumnStructure column, int columnIndex)
throws ModuleException, IOException {
BinaryCell binaryCell = (BinaryCell) cell;
long length = binaryCell.getSize();

if (Sql2008toXSDType.isLargeType(column.getType(), reporter)) {
writeLargeObjectData(cellPrefix, cell, columnIndex);
if (length > blobThresholdLimit) {
writeLargeObjectDataOutside(cellPrefix, cell, columnIndex);
} else {
writeLargeObjectData(cellPrefix, cell, columnIndex);
}
} else {
// inline non-BLOB binary data
try (InputStream inputStream = binaryCell.createInputStream()) {
Expand All @@ -101,8 +109,7 @@ protected void writeBinaryCell(String cellPrefix, Cell cell, ColumnStructure col
}
}

@Override
protected void writeLargeObjectData(String cellPrefix, Cell cell, int columnIndex)
private void writeLargeObjectDataOutside(String cellPrefix, Cell cell, int columnIndex)
throws IOException, ModuleException {
String lobFileParameter = null;
long lobSizeParameter = 0;
Expand Down Expand Up @@ -164,17 +171,16 @@ protected void writeLargeObjectData(String cellPrefix, Cell cell, int columnInde

// decide to whether write the LOB right away or later
if (writeStrategy.isSimultaneousWritingSupported()) {
writeLOB(lob);
writeLOBOutside(lob);
} else {
throw new NotImplementedException(SIARD2ContentWithExternalLobsExportStrategy.class.getName()
+ " is not ready to be used with write strategies that don't support simultaneous writing.");
}

// something like "../filename.siard2/"
String lobURI = FilenameUtils.separatorsToUnix(Paths
.get(".." + File.separator + currentExternalContainer.getPath().getFileName().toString() + File.separator,
lobFileParameter)
.toString());
String lobURI = FilenameUtils.separatorsToUnix(
Paths.get(".." + File.separator + currentExternalContainer.getPath().getFileName().toString() + File.separator,
lobFileParameter).toString());

// write the LOB XML element
currentWriter.beginOpenTag("c" + columnIndex, 2).appendAttribute("file", lobURI).appendAttribute("length",
Expand All @@ -195,8 +201,7 @@ protected void writeLargeObjectData(String cellPrefix, Cell cell, int columnInde
currentLobsInFolder++;
}

@Override
protected void writeLOB(LargeObject lob) throws ModuleException {
private void writeLOBOutside(LargeObject lob) throws ModuleException {
String lobRelativePath = lob.getOutputPath();
// copy lob to output and save digest checksum if possible
try (OutputStream out = writeStrategy.createOutputStream(currentExternalContainer, lobRelativePath);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,6 @@
import com.databasepreservation.modules.siard.out.write.ParallelZipWriteStrategy;
import com.databasepreservation.modules.siard.out.write.WriteStrategy;
import com.databasepreservation.modules.siard.out.write.ZipWithExternalLobsWriteStrategy;
import com.databasepreservation.modules.siard.out.write.ZipWriteStrategy;

/**
* @author Bruno Ferreira <[email protected]>
Expand Down Expand Up @@ -66,22 +65,24 @@ public SIARD2ExportModule(SIARDConstants.SiardVersion version, Path siardPackage
break;
}

contentStrategy = new SIARD2ContentExportStrategy(contentPathStrategy, writeStrategy, mainContainer, prettyXML, digestAlgorithm, fontCase);
contentStrategy = new SIARD2ContentExportStrategy(contentPathStrategy, writeStrategy, mainContainer, prettyXML,
digestAlgorithm, fontCase);
}

public SIARD2ExportModule(SIARDConstants.SiardVersion version, Path siardPackage, boolean compressZip,
boolean prettyXML, int externalLobsPerFolder, long externalLobsFolderSize,
HashMap<String, String> descriptiveMetadata, String digestAlgorithm, String fontCase) {
boolean prettyXML, int externalLobsPerFolder, long externalLobsFolderSize, long externalLobsBLOBThresholdLimit,
long externalLobsCLOBThresholdLimit, HashMap<String, String> descriptiveMetadata, String digestAlgorithm,
String fontCase) {
this.descriptiveMetadata = descriptiveMetadata;
contentPathStrategy = new SIARD2ContentWithExternalLobsPathExportStrategy();
metadataPathStrategy = new SIARD2MetadataPathStrategy();

FolderWriteStrategy folderWriteStrategy = new FolderWriteStrategy();
ZipWriteStrategy zipWriteStrategy;
ParallelZipWriteStrategy zipWriteStrategy;
if (compressZip) {
zipWriteStrategy = new ZipWriteStrategy(CompressionMethod.DEFLATE);
zipWriteStrategy = new ParallelZipWriteStrategy(CompressionMethod.DEFLATE);
} else {
zipWriteStrategy = new ZipWriteStrategy(CompressionMethod.STORE);
zipWriteStrategy = new ParallelZipWriteStrategy(CompressionMethod.STORE);
}
writeStrategy = new ZipWithExternalLobsWriteStrategy(zipWriteStrategy, folderWriteStrategy, digestAlgorithm);

Expand All @@ -97,7 +98,8 @@ public SIARD2ExportModule(SIARDConstants.SiardVersion version, Path siardPackage
}

contentStrategy = new SIARD2ContentWithExternalLobsExportStrategy(contentPathStrategy, writeStrategy, mainContainer,
prettyXML, externalLobsPerFolder, externalLobsFolderSize, digestAlgorithm, fontCase);
prettyXML, externalLobsPerFolder, externalLobsFolderSize, externalLobsBLOBThresholdLimit,
externalLobsCLOBThresholdLimit, digestAlgorithm, fontCase);
}

public DatabaseFilterModule getDatabaseHandler() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@
public class ZipWithExternalLobsWriteStrategy implements WriteStrategy {
private final MessageDigest digest;

private final ZipWriteStrategy zipWriter;
private final ParallelZipWriteStrategy zipWriter;
private final FolderWriteStrategy folderWriter;

public ZipWithExternalLobsWriteStrategy(ZipWriteStrategy zipWriteStrategy, FolderWriteStrategy folderWriteStrategy, String messageDigestAlgorithm) {
public ZipWithExternalLobsWriteStrategy(ParallelZipWriteStrategy zipWriteStrategy, FolderWriteStrategy folderWriteStrategy, String messageDigestAlgorithm) {
zipWriter = zipWriteStrategy;
folderWriter = folderWriteStrategy;

Expand Down Expand Up @@ -66,7 +66,7 @@ public OutputStream createOutputStream(SIARDArchiveContainer container, String p

@Override
public void writeTo(InputStreamProvider provider, String path) {

zipWriter.writeTo(provider, path);
}

/**
Expand Down

0 comments on commit cd4a217

Please sign in to comment.