Skip to content

Commit

Permalink
Add test datasets using Zarr files (#188)
Browse files Browse the repository at this point in the history
* Add example table datasets using zarr.

Improve file path handling to make it easier to run dasdds and generatedatasetsxml during development.

Also fix some atomicInteger interactions that were not properly migrated.

Plan to add tests using these datasets before merge.

* Make zarr work with EDDGridFromNcFiles. Also add tests that use the new zarr datasets.

* Add support for compressed (.zip tested) zarr files

* Include information about the new zarr support in the documentation.
  • Loading branch information
ChrisJohnNOAA authored Aug 14, 2024
1 parent 32fddd9 commit 6651ccc
Show file tree
Hide file tree
Showing 18 changed files with 85,179 additions and 42,853 deletions.
2 changes: 1 addition & 1 deletion WEB-INF/DasDds.bat
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ rem This is the Windows batch file to run DasDds.
rem See http://coastwatch.pfeg.noaa.gov/erddap/download/setupDatasetsXml.html#Tools

# You'll need to change java's path to make this work:
C:\programs\jdk-17.0.3+7\bin\java.exe -cp classes;../../../lib/servlet-api.jar;lib/* -Xms1000M -Xmx1000M gov.noaa.pfel.erddap.DasDds %*
java.exe -cp classes;../../../lib/servlet-api.jar;lib/* -Xms1000M -Xmx1000M gov.noaa.pfel.erddap.DasDds %*
2 changes: 1 addition & 1 deletion WEB-INF/GenerateDatasetsXml.bat
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@ rem This is the Windows batch file to run GenerateDatasetsXml.
rem See http://coastwatch.pfeg.noaa.gov/erddap/download/setupDatasetsXml.html#Tools

rem You'll need to change java's path to make this work:
C:\programs\jdk-17.0.3+7\bin\java.exe -cp classes;../../../lib/servlet-api.jar;lib/* -Xms1000M -Xmx1000M gov.noaa.pfel.erddap.GenerateDatasetsXml %*
java.exe -cp classes;../../../lib/servlet-api.jar;lib/* -Xms1000M -Xmx1000M gov.noaa.pfel.erddap.GenerateDatasetsXml %*
206 changes: 204 additions & 2 deletions WEB-INF/classes/com/cohort/util/File2.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,15 @@
import java.net.URLDecoder;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.concurrent.ConcurrentHashMap;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import org.apache.commons.compress.archivers.ArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;
Expand Down Expand Up @@ -359,6 +364,10 @@ public static void setWebInfParentDirectory() {
webInfParentDirectory = System.getProperty("user.dir") + "/";
}

public static void setWebInfParentDirectory(String dir) {
webInfParentDirectory = dir;
}

/**
* This indicates if the named file is indeed an existing local file. AWS S3 files don't count as
* local here. If dir="", it just says it isn't a file.
Expand Down Expand Up @@ -932,7 +941,7 @@ public static long getLastModified(String fullName) {
// The problem might be that something needs to be gc'd.
Math2.gcAndWait(
"File2.getLastModified (before retry)"); // if trouble getting lastModified: gc
// encourages success
// encourages success
File file = new File(fullName);
return file.lastModified();
} catch (Exception e2) {
Expand Down Expand Up @@ -1283,6 +1292,199 @@ public static InputStream getDecompressedBufferedInputStream(String fullFileName
return is;
}

public static void decompressAllFiles(String sourceFullName, String destDir) throws IOException {
String ext = getExtension(sourceFullName); // if e.g., .tar.gz, this returns .gz
// !!!!! IF CHANGE SUPPORTED COMPRESSION TYPES, CHANGE isDecompressible ABOVE
// !!!

int bufferSize = 1024;

// handle .Z (capital Z) specially first
// This assumes Z files contain only 1 file.
if (ext.equals(".Z")) {
FileOutputStream out = null;
ZCompressorInputStream zIn = null;
try {
out = new FileOutputStream(destDir);
zIn =
new ZCompressorInputStream(
new BufferedInputStream(new FileInputStream(sourceFullName)));
final byte[] buffer = new byte[1024];
int n = 0;
while (-1 != (n = zIn.read(buffer))) {
out.write(buffer, 0, n);
}
} catch (Exception e) {
throw e;
} finally {
if (out != null) {
out.close();
}
if (zIn != null) {
zIn.close();
}
}
}

// everything caught below has a z in ext
if (ext.indexOf('z') < 0) {
return;
}

if (ext.equals(".tgz")
|| sourceFullName.endsWith(".tar.gz")
|| sourceFullName.endsWith(".tar.gzip")) {
// This can actually have multiple files.
GzipCompressorInputStream gzipIn = null;
TarArchiveInputStream tarIn = null;
try {
gzipIn =
new GzipCompressorInputStream(
new BufferedInputStream(new FileInputStream(sourceFullName)));
tarIn = new TarArchiveInputStream(gzipIn);
ArchiveEntry entry;
while ((entry = tarIn.getNextEntry()) != null) {
if (entry.isDirectory()) {
File f = newFile(destDir, entry.getName());
boolean created = f.mkdir();
if (!created) {
String2.log(
"Unable to create directory '%s', during extraction of archive contents.\n"
+ f.getAbsolutePath());
}
} else {
int count;
byte data[] = new byte[bufferSize];
FileOutputStream fos = new FileOutputStream(newFile(destDir, entry.getName()), false);
try (BufferedOutputStream dest = new BufferedOutputStream(fos, bufferSize)) {
while ((count = tarIn.read(data, 0, bufferSize)) != -1) {
dest.write(data, 0, count);
}
}
}
}

} catch (Exception e) {
if (tarIn != null) tarIn.close();
else if (gzipIn != null) gzipIn.close();
throw e;
} finally {
if (gzipIn != null) {
gzipIn.close();
}
if (tarIn != null) {
tarIn.close();
}
}

} else if (ext.equals(".gz") || ext.equals(".gzip")) {
try (GZIPInputStream gzipInputStream =
new GZIPInputStream(new FileInputStream(sourceFullName))) {
File outputFile = new File(destDir, getFileNameWithoutExtension(sourceFullName));
try (FileOutputStream fileOutputStream = new FileOutputStream(outputFile)) {
byte[] buffer = new byte[1024];
int len;
while ((len = gzipInputStream.read(buffer)) > 0) {
fileOutputStream.write(buffer, 0, len);
}
}
} catch (IOException e) {
throw e;
}
} else if (ext.equals(".zip")) {
// This can actually have multiple files.
byte[] buffer = new byte[bufferSize];
ZipInputStream zis = new ZipInputStream(new FileInputStream(sourceFullName));
try {
ZipEntry zipEntry = zis.getNextEntry();
while (zipEntry != null) {
File newFile = newFile(destDir, zipEntry.getName());
if (zipEntry.isDirectory()) {
if (!newFile.isDirectory() && !newFile.mkdirs()) {
throw new IOException("Failed to create directory " + newFile);
}
} else {
// fix for Windows-created archives
File parent = newFile.getParentFile();
if (!parent.isDirectory() && !parent.mkdirs()) {
throw new IOException("Failed to create directory " + parent);
}

// write file content
FileOutputStream fos = new FileOutputStream(newFile);
int len;
while ((len = zis.read(buffer)) > 0) {
fos.write(buffer, 0, len);
}
fos.close();
}
zipEntry = zis.getNextEntry();
}
} catch (Exception e) {
throw e;
} finally {
zis.closeEntry();
zis.close();
}

} else if (ext.equals(".bz2")) {
OutputStream out = null;
BZip2CompressorInputStream bzIn = null;

try {
out = Files.newOutputStream(Paths.get(destDir));
bzIn =
new BZip2CompressorInputStream(
new BufferedInputStream(Files.newInputStream(Paths.get(sourceFullName))));
final byte[] buffer = new byte[bufferSize];
int n = 0;
while (-1 != (n = bzIn.read(buffer))) {
out.write(buffer, 0, n);
}
} finally {
if (out != null) {
out.close();
}
if (bzIn != null) {
bzIn.close();
}
}
}
// .7z is possible but different and harder

// !!!!! IF CHANGE SUPPORTED COMPRESSION TYPES, CHANGE isDecompressible ABOVE
// !!!

}

private static String getFileNameWithoutExtension(String filePath) {
int lastIndexOfDot = filePath.lastIndexOf(".");
if (lastIndexOfDot > 0) {
return filePath.substring(0, lastIndexOfDot);
} else {
return filePath;
}
}

private static File newFile(String destinationDir, String name) throws IOException {
Path destBase = Paths.get(destinationDir);
Path namePath = Paths.get(name);

if (namePath.getName(0).equals(destBase.getName(destBase.getNameCount() - 1))) {
namePath = namePath.subpath(1, namePath.getNameCount());
}
File destFile = new File(destBase.resolve(namePath).toString());

String destDirPath = destBase.toAbsolutePath().toString();
String destFilePath = destFile.getCanonicalPath();

if (!destFilePath.startsWith(destDirPath + File.separator)) {
throw new IOException("Entry is outside of the target dir: " + name);
}

return destFile;
}

public static BufferedReader getDecompressedBufferedFileReader88591(String fullFileName)
throws Exception {
return getDecompressedBufferedFileReader(fullFileName, ISO_8859_1);
Expand Down Expand Up @@ -1445,7 +1647,7 @@ public static String[] readFromFile(String fileName, String charset, int maxAtte
if (attempt == 1)
Math2.gcAndWait(
"File2.readFromFile (before retry)"); // trouble! Give OS/Java a time and gc to
// deal with trouble
// deal with trouble
else Math2.sleep(1000);
}
}
Expand Down
60 changes: 36 additions & 24 deletions WEB-INF/classes/gov/noaa/pfel/coastwatch/util/FileVisitorDNLS.java
Original file line number Diff line number Diff line change
Expand Up @@ -828,10 +828,13 @@ public static String decompressIfNeeded(
+ " .");
String cacheFullName = cacheDir + sourceFullName.substring(sourceBaseDir.length());
int cfnl = cacheFullName.length();
if (cacheFullName.endsWith(".tar.gz")) cacheFullName = cacheFullName.substring(0, cfnl - 7);
else if (cacheFullName.endsWith(".tar.gzip"))
if (cacheFullName.endsWith(".tar.gz")) {
cacheFullName = cacheFullName.substring(0, cfnl - 7);
} else if (cacheFullName.endsWith(".tar.gzip")) {
cacheFullName = cacheFullName.substring(0, cfnl - 9);
else cacheFullName = File2.removeExtension(cacheFullName); // remove simple extension
} else {
cacheFullName = File2.removeExtension(cacheFullName); // remove simple extension
}

// decompressed file already exists?
if (!reuseExisting) File2.delete(cacheFullName);
Expand All @@ -857,31 +860,36 @@ else if (File2.isFile(cacheFullName)) {
return cacheFullName;
}

// make dir and decompressed file
long time = System.currentTimeMillis();
File2.makeDirectory(File2.getDirectory(cacheFullName));
InputStream is = File2.getDecompressedBufferedInputStream(sourceFullName);
OutputStream os = null;
try {
os = new BufferedOutputStream(new FileOutputStream(cacheFullName));
if (!File2.copy(is, os)) throw new IOException("Unable to decompress " + sourceFullName);
if (verbose)
String2.log(
" decompressed "
+ sourceFullName
+ " time="
+ (System.currentTimeMillis() - time)
+ "ms");
} finally {
try {
if (os != null) os.close();
} catch (Exception e2) {
}
// make dir and decompressed file
if (sourceFullName.contains("zarr")) {
File2.decompressAllFiles(sourceFullName, cacheFullName);
} else {
InputStream is = File2.getDecompressedBufferedInputStream(sourceFullName);
OutputStream os = null;
try {
if (is != null) is.close();
} catch (Exception e2) {
os = new BufferedOutputStream(new FileOutputStream(cacheFullName));
if (!File2.copy(is, os)) throw new IOException("Unable to decompress " + sourceFullName);
if (verbose)
String2.log(
" decompressed "
+ sourceFullName
+ " time="
+ (System.currentTimeMillis() - time)
+ "ms");
} finally {
try {
if (os != null) os.close();
} catch (Exception e2) {
}
try {
if (is != null) is.close();
} catch (Exception e2) {
}
}
}

long cs = incrementPruneCacheDirSize(cacheDir, Math.max(0, File2.length(cacheFullName)));
if (reallyVerbose)
String2.log(
Expand Down Expand Up @@ -1226,8 +1234,12 @@ public static Table oneStepDoubleWithUrlsNotDirs(Table tTable, String tDir, Stri
public static String getSampleFileName(
String tFileDir, String tFileNameRegex, boolean tRecursive, String tPathRegex)
throws Exception {
boolean includeDirectories =
(tFileNameRegex != null && tFileNameRegex.contains("zarr"))
|| (tPathRegex != null && tPathRegex.contains("zarr"));
Table fileTable =
oneStep(tFileDir, tFileNameRegex, tRecursive, tPathRegex, false); // dirNamesToo
oneStep(
tFileDir, tFileNameRegex, tRecursive, tPathRegex, includeDirectories); // dirNamesToo
int nRows = fileTable.nRows();
if (nRows == 0)
throw new RuntimeException(
Expand Down
31 changes: 29 additions & 2 deletions WEB-INF/classes/gov/noaa/pfel/erddap/DasDds.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import gov.noaa.pfel.erddap.util.EDStatic;
import java.io.IOException;
import java.io.Writer;
import java.nio.file.Path;

/**
* This is a command line program to run EDD.testDasDds.
Expand All @@ -21,10 +22,15 @@
*/
public class DasDds {

static String logFileName = EDStatic.fullLogsDirectory + "DasDds.log";
static String outFileName = EDStatic.fullLogsDirectory + "DasDds.out";
static String logFileName = null;
static String outFileName = null;
Writer outFile = null;

public DasDds() {
logFileName = EDStatic.fullLogsDirectory + "DasDds.log";
outFileName = EDStatic.fullLogsDirectory + "DasDds.out";
}

private void printToBoth(String s) throws IOException {
String2.log(s);
String2.flushLog();
Expand Down Expand Up @@ -159,6 +165,27 @@ public String doIt(String args[], boolean loop) throws Throwable {
* @param args if args has values, they are used to answer the question.
*/
public static void main(String args[]) throws Throwable {

String ecd = "erddapContentDirectory";
String contentDirectory = System.getProperty(ecd);
if (contentDirectory == null) {
// Or, it must be sibling of webapps
// e.g., c:/programs/_tomcat/webapps/erddap/WEB-INF/classes/[these classes]
// On windows, contentDirectory may have spaces as %20(!)
contentDirectory =
String2.replaceAll(
File2.getClassPath(), // with / separator and / at the end
"%20",
" ");
int po = contentDirectory.indexOf("/webapps/");
if (po == -1) {
Path userDir = Path.of(System.getProperty("user.dir"));
String webInfParentDir = userDir.getParent().toString() + "/";
File2.setWebInfParentDirectory(webInfParentDir);
System.setProperty(ecd, webInfParentDir + "/development/test/");
}
}

new DasDds().doIt(args, true);
System.exit(0);
}
Expand Down
Loading

0 comments on commit 6651ccc

Please sign in to comment.