Skip to content

Commit

Permalink
#136 UniversalDecompressor hard-codes configuration file location
Browse files Browse the repository at this point in the history
- removes the hardcoded string PROPERTIES_PATH from `UniversalDecompressor`
- adds a new constructor to `UniversalDecompressor` taking a Path reference for custom XML file configs
- adds new test class `UniversalDecompressorTest`, previously totally untested
- demonstrates the handling of custom `ar` archive approach via decompressor-ar.xml, as 7z is not present on many platforms by default
- adjusts GZipDecompressor / BZip2Decompressor to check both ways to read an archive: classpath or external (file) location
- resolves #136
  • Loading branch information
mawiesne committed Nov 14, 2023
1 parent f289b14 commit 675feeb
Show file tree
Hide file tree
Showing 12 changed files with 329 additions and 76 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -55,8 +55,8 @@ public void setParameters(List<String> parameters)
}

/**
* Returns the Position Span of this Template refering to the ContentElement in which the
* Template occures. This is mainly the same like Link.getPos(), but a Template does�n know its
* Returns the Position Span of this Template referring to the ContentElement in which the
* Template occurs. This is mainly the same as {@link Link#getPos()}, but a Template doesn't know it's
* HomeElement.
*/
public Span getPos()
Expand All @@ -72,14 +72,15 @@ public void setPos(Span pos)
this.pos = pos;
}

@Override
public String toString()
{
StringBuilder result = new StringBuilder();
result.append("TE_NAME: \"" + name + "\"");
result.append("\nTE_PARAMETERS: " + parameters.size());
result.append("TE_NAME: \"").append(name).append("\"");
result.append("\nTE_PARAMETERS: ").append(parameters.size());
for (String parameter : parameters)
result.append("\nTE_PARAMETER: \"" + parameter + "\"");
result.append("\nTE_POS: " + pos);
result.append("\nTE_PARAMETER: \"").append(parameter).append("\"");
result.append("\nTE_POS: ").append(pos);
return result.toString();
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
/*
* Licensed to the Technische Universität Darmstadt under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Technische Universität Darmstadt
* licenses this file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dkpro.jwpl.wikimachine.decompression;

import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;

public abstract class AbstractDecompressor implements IDecompressor {

/**
* Attempts to open an {@link InputStream} to an external or internal resource.
* In this context, external resources a referenced via a relative or absolute path, including
* the actual file name of that resource.
* In case only a plain file name is given and no directory or path elements are contained
* in {@code resource}, an attempt is made to detect and load the resource from the classpath.
*
* @param resource References a resource via a path or by its file name only.
* If {@code null}, this will result in an {@link IOException}.
* @return An open {@link InputStream} or {@code null} if {@code resource} could not be found.
*
* @throws IOException Thrown if IO errors occurred.
*/
protected InputStream openStream(String resource) throws IOException
{
if (resource == null) {
throw new IOException("Can't load a 'null' resource!");
}
final InputStream in;
final Path file = Paths.get(resource).toAbsolutePath();
if (Files.exists(file)) {
in = Files.newInputStream(file);
} else {
in = getContextClassLoader().getResourceAsStream(resource);
}
return in;
}

private ClassLoader getContextClassLoader()
{
return Thread.currentThread().getContextClassLoader();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,36 +18,25 @@
package org.dkpro.jwpl.wikimachine.decompression;

import java.io.BufferedInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;

import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream;

/**
* BZip2 Decompressor (based on Singleton Design Pattern). Uses getInputStream to set up the archive
* path and returns the InputStream to read from
* BZip2 Decompressor (based on Singleton Design Pattern).
* Uses {@link IDecompressor#getInputStream(String)} to set up the archive
* path and returns the {@link InputStream} to read from.
*
* @see IDecompressor
*/
public class BZip2Decompressor
implements IDecompressor
public final class BZip2Decompressor
extends AbstractDecompressor implements IDecompressor
{

@Override
public InputStream getInputStream(String fileName) throws IOException
{
InputStream outputStream;

BufferedInputStream inputStream = new BufferedInputStream(new FileInputStream(fileName));
/*
* skip 2 first bytes (see the documentation of CBZip2InputStream) e.g. here
* http://lucene.apache.org/tika/xref/org/apache/tika/parser /pkg/bzip2
* /CBZip2InputStream.html
*/
inputStream.skip(2);
outputStream = new BZip2CompressorInputStream(inputStream);

return outputStream;

return new BZip2CompressorInputStream(new BufferedInputStream(openStream(fileName)));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -17,23 +17,26 @@
*/
package org.dkpro.jwpl.wikimachine.decompression;

import java.io.FileInputStream;
import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.zip.GZIPInputStream;

/**
* GZip Decompressor (based on Singleton Design Pattern). Uses getInputStream to set up the archive
* path and returns the InputStream to read from
* GZip Decompressor (based on Singleton Design Pattern).
* Uses {@link IDecompressor#getInputStream(String)} to set up the archive
* path and returns the {@link InputStream} to read from.
*
* @see IDecompressor
*/
public class GZipDecompressor
implements IDecompressor
public final class GZipDecompressor
extends AbstractDecompressor implements IDecompressor
{

@Override
public InputStream getInputStream(String fileName) throws IOException
{
return new GZIPInputStream(new FileInputStream(fileName));
return new GZIPInputStream(new BufferedInputStream(openStream(fileName)));
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -21,11 +21,23 @@
import java.io.InputStream;

/**
* The main interface for all Decompressors, which uses a archive file path and returns a
* InputStream
* Uses a archive file path and returns an {@link InputStream}.
*/
public interface IDecompressor
{

/**
* Attempts to open an {@link InputStream} to a compressed archive.
* In this context, external archives a referenced via a relative or absolute path, including
* the actual file name of that resource.
* In case only a plain file name is given and no directory or path elements are contained
* in {@code resource}, an attempt is made to detect and load the resource from the classpath.
*
* @param fileName References an archive via a path or by its file name only.
* If {@code null}, this will result in an {@link IOException}.
* @return An open {@link InputStream} or {@code null} if the archive could not be found.
*
* @throws IOException Thrown if IO errors occurred.
*/
InputStream getInputStream(String fileName) throws IOException;
}
Loading

0 comments on commit 675feeb

Please sign in to comment.