diff --git a/de.dentrassi.pm.aspect.common/META-INF/MANIFEST.MF b/de.dentrassi.pm.aspect.common/META-INF/MANIFEST.MF index c463b0d9..0550b597 100644 --- a/de.dentrassi.pm.aspect.common/META-INF/MANIFEST.MF +++ b/de.dentrassi.pm.aspect.common/META-INF/MANIFEST.MF @@ -38,8 +38,8 @@ Service-Component: OSGI-INF/hash.xml, OSGI-INF/groupOsgi.xml Bundle-ActivationPolicy: lazy Export-Package: de.dentrassi.pm.aspect.common.osgi;version="1.0.0"; - uses:="org.w3c.dom, - de.dentrassi.pm.aspect, - de.dentrassi.pm.osgi, + uses:="de.dentrassi.pm.aspect, + de.dentrassi.pm.osgi.bundle, de.dentrassi.pm.aspect.extract, - de.dentrassi.pm.aspect.virtual" + de.dentrassi.pm.osgi.feature", + de.dentrassi.pm.aspect.common.spool;version="1.0.0";uses:="de.dentrassi.pm.common.utils,de.dentrassi.pm.aspect.aggregate" diff --git a/de.dentrassi.pm.aspect.common/src/de/dentrassi/pm/aspect/common/spool/AbstractSpooler.java b/de.dentrassi.pm.aspect.common/src/de/dentrassi/pm/aspect/common/spool/AbstractSpooler.java new file mode 100644 index 00000000..1969994b --- /dev/null +++ b/de.dentrassi.pm.aspect.common/src/de/dentrassi/pm/aspect/common/spool/AbstractSpooler.java @@ -0,0 +1,15 @@ +/******************************************************************************* + * Copyright (c) 2015 IBH SYSTEMS GmbH. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBH SYSTEMS GmbH - initial API and implementation + *******************************************************************************/ +package de.dentrassi.pm.aspect.common.spool; + +public class AbstractSpooler +{ +} diff --git a/de.dentrassi.pm.aspect.common/src/de/dentrassi/pm/aspect/common/spool/ChannelCacheTarget.java b/de.dentrassi.pm.aspect.common/src/de/dentrassi/pm/aspect/common/spool/ChannelCacheTarget.java new file mode 100644 index 00000000..fc074fc7 --- /dev/null +++ b/de.dentrassi.pm.aspect.common/src/de/dentrassi/pm/aspect/common/spool/ChannelCacheTarget.java @@ -0,0 +1,33 @@ +/******************************************************************************* + * Copyright (c) 2015 IBH SYSTEMS GmbH. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBH SYSTEMS GmbH - initial API and implementation + *******************************************************************************/ +package de.dentrassi.pm.aspect.common.spool; + +import java.io.IOException; +import java.io.OutputStream; + +import de.dentrassi.pm.aspect.aggregate.AggregationContext; +import de.dentrassi.pm.common.utils.IOConsumer; + +public class ChannelCacheTarget implements SpoolOutTarget +{ + private final AggregationContext context; + + public ChannelCacheTarget ( final AggregationContext context ) + { + this.context = context; + } + + @Override + public void spoolOut ( final String fileName, final String mimeType, final IOConsumer stream ) throws IOException + { + this.context.createCacheEntry ( fileName, fileName, mimeType, stream ); + } +} diff --git a/de.dentrassi.pm.aspect.common/src/de/dentrassi/pm/aspect/common/spool/OutputSpooler.java b/de.dentrassi.pm.aspect.common/src/de/dentrassi/pm/aspect/common/spool/OutputSpooler.java new file mode 100644 index 00000000..eda43ead --- /dev/null +++ b/de.dentrassi.pm.aspect.common/src/de/dentrassi/pm/aspect/common/spool/OutputSpooler.java @@ -0,0 +1,342 @@ +/******************************************************************************* + * Copyright (c) 2015 IBH SYSTEMS GmbH. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBH SYSTEMS GmbH - initial API and implementation + *******************************************************************************/ +package de.dentrassi.pm.aspect.common.spool; + +import java.io.FilterOutputStream; +import java.io.IOException; +import java.io.OutputStream; +import java.security.DigestOutputStream; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + +import com.google.common.io.BaseEncoding; + +import de.dentrassi.pm.common.utils.IOConsumer; +import de.dentrassi.pm.common.utils.IOFunction; + +public class OutputSpooler +{ + + public class RecordingDigestOutputStream extends DigestOutputStream + { + private final String key; + + public RecordingDigestOutputStream ( final OutputStream stream, final MessageDigest digest, final String key ) + { + super ( stream, digest ); + this.key = key; + } + + @Override + public void close () throws IOException + { + super.close (); + + final MessageDigest digest = getMessageDigest (); + final byte[] result = digest.digest (); + + setResult ( this.key, result ); + } + } + + public class CountingOutputStream extends FilterOutputStream + { + private final String key; + + private long count; + + public CountingOutputStream ( final String key, final OutputStream out ) + { + super ( out ); + this.key = key; + } + + @Override + public void write ( final byte[] b, final int off, final int len ) throws IOException + { + this.out.write ( b, off, len ); + this.count += len; + } + + @Override + public void write ( final int b ) throws IOException + { + this.out.write ( b ); + this.count++; + } + + @Override + public void close () throws IOException + { + super.close (); + setResultSize ( this.key, this.count ); + } + + } + + private class MultiplexStream extends OutputStream + { + private final OutputStream[] streams; + + public MultiplexStream ( final List streams ) + { + this.streams = streams.toArray ( new OutputStream[streams.size ()] ); + } + + @Override + public void write ( final int b ) throws IOException + { + write ( new byte[] { (byte) ( b & 0xFF ) } ); + } + + @Override + public void write ( final byte[] b, final int off, final int len ) throws IOException + { + forEach ( stream -> stream.write ( b, off, len ) ); + } + + @Override + public void flush () throws IOException + { + forEach ( OutputStream::flush ); + } + + @Override + public void close () throws IOException + { + final java.util.stream.Stream s = Arrays.stream ( this.streams ); + OutputSpooler.closeAll ( s ); + } + + protected void forEach ( final IOConsumer consumer ) throws IOException + { + for ( final OutputStream stream : this.streams ) + { + consumer.accept ( stream ); + } + } + } + + private static class OutputEntry + { + private final String mimeType; + + private final IOFunction transformer; + + public OutputEntry ( final String mimeType, final IOFunction transformer ) + { + this.mimeType = mimeType; + this.transformer = transformer; + } + + public String getMimeType () + { + return this.mimeType; + } + + public IOFunction getTransformer () + { + return this.transformer; + } + } + + private final Set digests = new HashSet<> (); + + private final SpoolOutTarget target; + + private final Map outputs = new HashMap<> (); + + private final Map checksums = new HashMap<> (); + + private final Map sizes = new HashMap<> (); + + public OutputSpooler ( final SpoolOutTarget target ) + { + this.target = target; + } + + public void addDigest ( final String algorithm ) + { + this.digests.add ( algorithm ); + } + + public void addOutput ( final String fileName, final String mimeType ) + { + addOutput ( fileName, mimeType, null ); + } + + public void addOutput ( final String fileName, final String mimeType, final IOFunction transformer ) + { + if ( transformer == null ) + { + this.outputs.put ( fileName, new OutputEntry ( mimeType, output -> output ) ); + } + else + { + this.outputs.put ( fileName, new OutputEntry ( mimeType, transformer ) ); + } + } + + public void open ( final IOConsumer consumer ) throws IOException + { + final List streams = new LinkedList<> (); + + final Iterator> entries = this.outputs.entrySet ().iterator (); + + openNext ( streams, entries, stream -> { + try ( final MultiplexStream multiplexStream = new MultiplexStream ( streams ) ) + { + consumer.accept ( multiplexStream ); + } + } ); + } + + protected void openNext ( final List streams, final Iterator> entries, final IOConsumer> streamsConsumer ) throws IOException + { + if ( !entries.hasNext () ) + { + streamsConsumer.accept ( streams ); + } + else + { + final Entry entry = entries.next (); + this.target.spoolOut ( entry.getKey (), entry.getValue ().getMimeType (), stream -> { + + // add digesters + + for ( final String algo : this.digests ) + { + final String key = entry.getKey () + ":" + algo; + try + { + stream = new RecordingDigestOutputStream ( stream, MessageDigest.getInstance ( algo ), key ); + } + catch ( final NoSuchAlgorithmException e ) + { + throw new IOException ( e ); + } + } + + // add counter + + stream = new CountingOutputStream ( entry.getKey (), stream ); + + // apply transformer + + stream = entry.getValue ().getTransformer ().apply ( stream ); + + // add stream + + streams.add ( stream ); + + // next + + openNext ( streams, entries, streamsConsumer ); + } ); + + } + } + + private void setResult ( final String key, final byte[] result ) + { + this.checksums.put ( key, BaseEncoding.base16 ().lowerCase ().encode ( result ) ); + } + + private void setResultSize ( final String key, final long length ) + { + this.sizes.put ( key, length ); + } + + static void closeAll ( final java.util.stream.Stream stream ) throws IOException + { + final List ex = new LinkedList<> (); + + stream.forEach ( s -> { + try + { + s.close (); + } + catch ( final IOException e ) + { + ex.add ( e ); + } + } ); + + if ( !ex.isEmpty () ) + { + final IOException base = new IOException (); + for ( final Exception e : ex ) + { + base.addSuppressed ( e ); + } + throw base; + } + } + + /** + * Get the digest of a closed file + * + * @param fileName + * the file name to get the digest for + * @param algorithm + * the digest algorithm + * @return the digest or null if the digest was not requested. + * The digest will be lower case hex encoded. + * @throws IllegalStateException + * If the file is still open or was never opened + */ + public String getChecksum ( final String fileName, final String algorithm ) + { + if ( !this.digests.contains ( algorithm ) ) + { + return null; + } + + final String result = this.checksums.get ( fileName + ":" + algorithm ); + + if ( result == null ) + { + throw new IllegalStateException ( String.format ( "Stream '%s' not closed.", fileName ) ); + } + + return result; + } + + /** + * Get the size of a closed file + * + * @param fileName + * the file name to get the size for + * @return the size + * @throws IllegalStateException + * If the file is still open or was never opened + */ + public long getSize ( final String fileName ) + { + final Long result = this.sizes.get ( fileName ); + + if ( result == null ) + { + throw new IllegalStateException ( String.format ( "Stream '%s' not closed or was not added", fileName ) ); + } + + return result; + } +} diff --git a/de.dentrassi.pm.aspect.common/src/de/dentrassi/pm/aspect/common/spool/SpoolOutTarget.java b/de.dentrassi.pm.aspect.common/src/de/dentrassi/pm/aspect/common/spool/SpoolOutTarget.java new file mode 100644 index 00000000..9cb531f4 --- /dev/null +++ b/de.dentrassi.pm.aspect.common/src/de/dentrassi/pm/aspect/common/spool/SpoolOutTarget.java @@ -0,0 +1,12 @@ +package de.dentrassi.pm.aspect.common.spool; + +import java.io.IOException; +import java.io.OutputStream; + +import de.dentrassi.pm.common.utils.IOConsumer; + +@FunctionalInterface +public interface SpoolOutTarget +{ + public void spoolOut ( final String fileName, final String mimeType, final IOConsumer stream ) throws IOException; +} diff --git a/de.dentrassi.pm.aspect.common/src/de/dentrassi/pm/aspect/common/spool/package-info.java b/de.dentrassi.pm.aspect.common/src/de/dentrassi/pm/aspect/common/spool/package-info.java new file mode 100644 index 00000000..7ac4210a --- /dev/null +++ b/de.dentrassi.pm.aspect.common/src/de/dentrassi/pm/aspect/common/spool/package-info.java @@ -0,0 +1,13 @@ +/******************************************************************************* + * Copyright (c) 2015 IBH SYSTEMS GmbH. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * Contributors: + * IBH SYSTEMS GmbH - initial API and implementation + *******************************************************************************/ +/** + * Spool out helper functionality + */ +package de.dentrassi.pm.aspect.common.spool; diff --git a/de.dentrassi.pm.aspect/src/de/dentrassi/pm/aspect/aggregate/ChannelAggregator.java b/de.dentrassi.pm.aspect/src/de/dentrassi/pm/aspect/aggregate/ChannelAggregator.java index a9904c62..5a01976c 100644 --- a/de.dentrassi.pm.aspect/src/de/dentrassi/pm/aspect/aggregate/ChannelAggregator.java +++ b/de.dentrassi.pm.aspect/src/de/dentrassi/pm/aspect/aggregate/ChannelAggregator.java @@ -29,7 +29,5 @@ */ public interface ChannelAggregator { - public String getId (); - public Map aggregateMetaData ( AggregationContext context ) throws Exception; } diff --git a/de.dentrassi.pm.common/src/de/dentrassi/pm/common/XmlHelper.java b/de.dentrassi.pm.common/src/de/dentrassi/pm/common/XmlHelper.java index 0c1c828d..30b05695 100644 --- a/de.dentrassi.pm.common/src/de/dentrassi/pm/common/XmlHelper.java +++ b/de.dentrassi.pm.common/src/de/dentrassi/pm/common/XmlHelper.java @@ -138,6 +138,7 @@ public XmlHelper () final DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance (); try { + dbf.setNamespaceAware ( true ); this.db = dbf.newDocumentBuilder (); } catch ( final ParserConfigurationException e ) @@ -224,6 +225,15 @@ public NodeList path ( final Node node, final String path ) throws XPathExpressi return (NodeList)expression.evaluate ( node, XPathConstants.NODESET ); } + /** + * Create a new element and add it as the last child + * + * @param parent + * the parent of the new element + * @param name + * the name of the element + * @return the new element + */ public static Element addElement ( final Element parent, final String name ) { final Element ele = parent.getOwnerDocument ().createElement ( name ); @@ -241,6 +251,27 @@ public static Element addElement ( final Element parent, final String name, fina return ele; } + public static Element addOptionalElement ( final Element parent, final String name, final Object value ) + { + if ( value == null ) + { + return null; + } + + final Element ele = addElement ( parent, name ); + ele.setTextContent ( value.toString () ); + return ele; + } + + /** + * Create a new element and add it as the first child + * + * @param parent + * the parent to which to add the element + * @param name + * the name of the element + * @return the new element + */ public static Element addElementFirst ( final Element parent, final String name ) { final Element ele = parent.getOwnerDocument ().createElement ( name ); diff --git a/de.dentrassi.pm.common/src/de/dentrassi/pm/common/utils/IOFunction.java b/de.dentrassi.pm.common/src/de/dentrassi/pm/common/utils/IOFunction.java new file mode 100644 index 00000000..e45d4510 --- /dev/null +++ b/de.dentrassi.pm.common/src/de/dentrassi/pm/common/utils/IOFunction.java @@ -0,0 +1,19 @@ +/******************************************************************************* + * Copyright (c) 2015 IBH SYSTEMS GmbH. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBH SYSTEMS GmbH - initial API and implementation + *******************************************************************************/ +package de.dentrassi.pm.common.utils; + +import java.io.IOException; + +@FunctionalInterface +public interface IOFunction +{ + public R apply ( T data ) throws IOException; +} diff --git a/de.dentrassi.pm.deb/src/de/dentrassi/pm/deb/aspect/internal/AptAggregator.java b/de.dentrassi.pm.deb/src/de/dentrassi/pm/deb/aspect/internal/AptAggregator.java index 4391d32e..463a52a3 100644 --- a/de.dentrassi.pm.deb/src/de/dentrassi/pm/deb/aspect/internal/AptAggregator.java +++ b/de.dentrassi.pm.deb/src/de/dentrassi/pm/deb/aspect/internal/AptAggregator.java @@ -30,7 +30,6 @@ import de.dentrassi.pm.common.MetaKey; import de.dentrassi.pm.common.MetaKeys; import de.dentrassi.pm.deb.ChannelConfiguration; -import de.dentrassi.pm.deb.aspect.AptChannelAspectFactory; import de.dentrassi.pm.deb.aspect.DistributionInformation; import de.dentrassi.pm.deb.aspect.internal.RepoBuilder.PackageInformation; import de.dentrassi.pm.signing.SigningService; @@ -44,12 +43,6 @@ public AptAggregator () this.context = FrameworkUtil.getBundle ( AptAggregator.class ).getBundleContext (); } - @Override - public String getId () - { - return AptChannelAspectFactory.ID; - } - @Override public Map aggregateMetaData ( final AggregationContext context ) throws Exception { diff --git a/de.dentrassi.pm.maven/src/de/dentrassi/pm/maven/internal/MavenRepositoryChannelAggregator.java b/de.dentrassi.pm.maven/src/de/dentrassi/pm/maven/internal/MavenRepositoryChannelAggregator.java index bf7824be..c4e12134 100644 --- a/de.dentrassi.pm.maven/src/de/dentrassi/pm/maven/internal/MavenRepositoryChannelAggregator.java +++ b/de.dentrassi.pm.maven/src/de/dentrassi/pm/maven/internal/MavenRepositoryChannelAggregator.java @@ -67,12 +67,6 @@ protected String getSitePrefix () return "http://localhost:8080"; } - @Override - public String getId () - { - return MavenRepositoryAspectFactory.ID; - } - @Override public Map aggregateMetaData ( final AggregationContext context ) throws Exception { diff --git a/de.dentrassi.pm.p2/build.properties b/de.dentrassi.pm.p2/build.properties index a96e4288..eea97228 100644 --- a/de.dentrassi.pm.p2/build.properties +++ b/de.dentrassi.pm.p2/build.properties @@ -2,6 +2,5 @@ output.. = bin/ bin.includes = META-INF/,\ .,\ WEB-INF/,\ - OSGI-INF/,\ - OSGI-INF/aspect.xml + OSGI-INF/ source.. = src/ diff --git a/de.dentrassi.pm.p2/src/de/dentrassi/pm/p2/internal/aspect/P2RepoChannelAggregator.java b/de.dentrassi.pm.p2/src/de/dentrassi/pm/p2/internal/aspect/P2RepoChannelAggregator.java index 763144d5..9ffbe66b 100644 --- a/de.dentrassi.pm.p2/src/de/dentrassi/pm/p2/internal/aspect/P2RepoChannelAggregator.java +++ b/de.dentrassi.pm.p2/src/de/dentrassi/pm/p2/internal/aspect/P2RepoChannelAggregator.java @@ -20,18 +20,11 @@ import de.dentrassi.pm.aspect.aggregate.AggregationContext; import de.dentrassi.pm.aspect.aggregate.ChannelAggregator; import de.dentrassi.pm.common.ArtifactInformation; -import de.dentrassi.pm.p2.aspect.P2RepositoryAspect; public class P2RepoChannelAggregator implements ChannelAggregator { private static final DateFormat DATE_FORMAT = new SimpleDateFormat ( "yyyy-MM-dd HH:mm:ss.SSS" ); - @Override - public String getId () - { - return P2RepositoryAspect.ID; - } - @Override public Map aggregateMetaData ( final AggregationContext context ) throws Exception { diff --git a/de.dentrassi.pm.rpm/.classpath b/de.dentrassi.pm.rpm/.classpath new file mode 100644 index 00000000..7ded8346 --- /dev/null +++ b/de.dentrassi.pm.rpm/.classpath @@ -0,0 +1,13 @@ + + + + + + + + + + + + + diff --git a/de.dentrassi.pm.rpm/.gitignore b/de.dentrassi.pm.rpm/.gitignore new file mode 100644 index 00000000..ae3c1726 --- /dev/null +++ b/de.dentrassi.pm.rpm/.gitignore @@ -0,0 +1 @@ +/bin/ diff --git a/de.dentrassi.pm.rpm/.project b/de.dentrassi.pm.rpm/.project new file mode 100644 index 00000000..b9ec1570 --- /dev/null +++ b/de.dentrassi.pm.rpm/.project @@ -0,0 +1,52 @@ + + + de.dentrassi.pm.rpm + + + + + + org.eclipse.wst.jsdt.core.javascriptValidator + + + + + org.eclipse.wst.common.project.facet.core.builder + + + + + org.eclipse.jdt.core.javabuilder + + + + + org.eclipse.pde.ManifestBuilder + + + + + org.eclipse.pde.SchemaBuilder + + + + + org.eclipse.pde.ds.core.builder + + + + + org.eclipse.wst.validation.validationbuilder + + + + + + org.eclipse.jem.workbench.JavaEMFNature + org.eclipse.wst.common.modulecore.ModuleCoreNature + org.eclipse.pde.PluginNature + org.eclipse.jdt.core.javanature + org.eclipse.wst.common.project.facet.core.nature + org.eclipse.wst.jsdt.core.jsNature + + diff --git a/de.dentrassi.pm.rpm/.settings/.jsdtscope b/de.dentrassi.pm.rpm/.settings/.jsdtscope new file mode 100644 index 00000000..92e666d7 --- /dev/null +++ b/de.dentrassi.pm.rpm/.settings/.jsdtscope @@ -0,0 +1,12 @@ + + + + + + + + + + + + diff --git a/de.dentrassi.pm.rpm/.settings/org.eclipse.jdt.core.prefs b/de.dentrassi.pm.rpm/.settings/org.eclipse.jdt.core.prefs new file mode 100644 index 00000000..0c68a61d --- /dev/null +++ b/de.dentrassi.pm.rpm/.settings/org.eclipse.jdt.core.prefs @@ -0,0 +1,7 @@ +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 +org.eclipse.jdt.core.compiler.compliance=1.8 +org.eclipse.jdt.core.compiler.problem.assertIdentifier=error +org.eclipse.jdt.core.compiler.problem.enumIdentifier=error +org.eclipse.jdt.core.compiler.source=1.8 diff --git a/de.dentrassi.pm.rpm/.settings/org.eclipse.pde.core.prefs b/de.dentrassi.pm.rpm/.settings/org.eclipse.pde.core.prefs new file mode 100644 index 00000000..e8ff8be0 --- /dev/null +++ b/de.dentrassi.pm.rpm/.settings/org.eclipse.pde.core.prefs @@ -0,0 +1,4 @@ +eclipse.preferences.version=1 +pluginProject.equinox=false +pluginProject.extensions=false +resolve.requirebundle=false diff --git a/de.dentrassi.pm.rpm/.settings/org.eclipse.wst.common.component b/de.dentrassi.pm.rpm/.settings/org.eclipse.wst.common.component new file mode 100644 index 00000000..dadf2e4a --- /dev/null +++ b/de.dentrassi.pm.rpm/.settings/org.eclipse.wst.common.component @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/de.dentrassi.pm.rpm/.settings/org.eclipse.wst.common.project.facet.core.xml b/de.dentrassi.pm.rpm/.settings/org.eclipse.wst.common.project.facet.core.xml new file mode 100644 index 00000000..427e35e9 --- /dev/null +++ b/de.dentrassi.pm.rpm/.settings/org.eclipse.wst.common.project.facet.core.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/de.dentrassi.pm.rpm/.settings/org.eclipse.wst.jsdt.ui.superType.container b/de.dentrassi.pm.rpm/.settings/org.eclipse.wst.jsdt.ui.superType.container new file mode 100644 index 00000000..3bd5d0a4 --- /dev/null +++ b/de.dentrassi.pm.rpm/.settings/org.eclipse.wst.jsdt.ui.superType.container @@ -0,0 +1 @@ +org.eclipse.wst.jsdt.launching.baseBrowserLibrary \ No newline at end of file diff --git a/de.dentrassi.pm.rpm/.settings/org.eclipse.wst.jsdt.ui.superType.name b/de.dentrassi.pm.rpm/.settings/org.eclipse.wst.jsdt.ui.superType.name new file mode 100644 index 00000000..05bd71b6 --- /dev/null +++ b/de.dentrassi.pm.rpm/.settings/org.eclipse.wst.jsdt.ui.superType.name @@ -0,0 +1 @@ +Window \ No newline at end of file diff --git a/de.dentrassi.pm.rpm/META-INF/MANIFEST.MF b/de.dentrassi.pm.rpm/META-INF/MANIFEST.MF new file mode 100644 index 00000000..199861a7 --- /dev/null +++ b/de.dentrassi.pm.rpm/META-INF/MANIFEST.MF @@ -0,0 +1,37 @@ +Manifest-Version: 1.0 +Bundle-ManifestVersion: 2 +Bundle-Name: RPM Aspect +Bundle-SymbolicName: de.dentrassi.pm.rpm +Bundle-Version: 1.0.0.qualifier +Bundle-Vendor: IBH SYSTEMS GmbH +Bundle-RequiredExecutionEnvironment: JavaSE-1.8 +Web-ContextPath: /yum +Require-Bundle: org.apache.taglibs.standard-impl;bundle-version="1.2.1" +Import-Package: com.google.common.escape;version="18.0.0", + com.google.common.net;version="18.0.0", + com.google.gson;version="2.3.1", + de.dentrassi.osgi.web;version="1.0.0", + de.dentrassi.pm;version="1.0.0", + de.dentrassi.pm.aspect;version="1.0.0", + de.dentrassi.pm.aspect.aggregate;version="1.0.0", + de.dentrassi.pm.aspect.common.spool;version="1.0.0", + de.dentrassi.pm.aspect.extract;version="1.0.0", + de.dentrassi.pm.aspect.group;version="1.0.0", + de.dentrassi.pm.common;version="1.0.0", + de.dentrassi.pm.common.utils;version="1.0.0", + de.dentrassi.pm.common.web;version="1.0.0", + de.dentrassi.pm.common.web.menu;version="1.0.0", + de.dentrassi.pm.storage;version="1.0.0", + de.dentrassi.pm.storage.service;version="1.0.0", + de.dentrassi.pm.storage.service.servlet;version="1.0.0", + de.dentrassi.pm.storage.service.util;version="1.0.0", + de.dentrassi.pm.storage.web.utils;version="1.0.0", + de.dentrassi.rpm;version="1.0.0", + javax.servlet;version="3.1.0", + javax.servlet.http;version="3.1.0", + org.apache.commons.compress.archivers.cpio;version="1.9.0", + org.slf4j;version="1.7.2" +Service-Component: OSGI-INF/rpm.xml, + OSGI-INF/yum.xml, + OSGI-INF/groupRpm.xml, + OSGI-INF/yumInterface.xml diff --git a/de.dentrassi.pm.rpm/OSGI-INF/groupRpm.xml b/de.dentrassi.pm.rpm/OSGI-INF/groupRpm.xml new file mode 100644 index 00000000..8b83c6c2 --- /dev/null +++ b/de.dentrassi.pm.rpm/OSGI-INF/groupRpm.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/de.dentrassi.pm.rpm/OSGI-INF/rpm.html b/de.dentrassi.pm.rpm/OSGI-INF/rpm.html new file mode 100644 index 00000000..9558b481 --- /dev/null +++ b/de.dentrassi.pm.rpm/OSGI-INF/rpm.html @@ -0,0 +1,3 @@ +

+Extract meta data from RPM files +

\ No newline at end of file diff --git a/de.dentrassi.pm.rpm/OSGI-INF/rpm.properties b/de.dentrassi.pm.rpm/OSGI-INF/rpm.properties new file mode 100644 index 00000000..3644b269 --- /dev/null +++ b/de.dentrassi.pm.rpm/OSGI-INF/rpm.properties @@ -0,0 +1,6 @@ +drone.aspect.id=rpm +drone.aspect.name=RPM channel aspect +drone.aspect.description.file=/OSGI-INF/rpm.html +drone.aspect.version=1.0.0 +drone.aspect.requires= +drone.aspect.group.id=rpm \ No newline at end of file diff --git a/de.dentrassi.pm.rpm/OSGI-INF/rpm.xml b/de.dentrassi.pm.rpm/OSGI-INF/rpm.xml new file mode 100644 index 00000000..a4896988 --- /dev/null +++ b/de.dentrassi.pm.rpm/OSGI-INF/rpm.xml @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/de.dentrassi.pm.rpm/OSGI-INF/yum.html b/de.dentrassi.pm.rpm/OSGI-INF/yum.html new file mode 100644 index 00000000..25d2d9e6 --- /dev/null +++ b/de.dentrassi.pm.rpm/OSGI-INF/yum.html @@ -0,0 +1,3 @@ +

+Create a YUM respository out of RPM meta data. +

\ No newline at end of file diff --git a/de.dentrassi.pm.rpm/OSGI-INF/yum.properties b/de.dentrassi.pm.rpm/OSGI-INF/yum.properties new file mode 100644 index 00000000..5b62000f --- /dev/null +++ b/de.dentrassi.pm.rpm/OSGI-INF/yum.properties @@ -0,0 +1,6 @@ +drone.aspect.id=yum +drone.aspect.name=YUM repository aspect +drone.aspect.description.file=/OSGI-INF/yum.html +drone.aspect.version=1.0.0 +drone.aspect.requires=rpm, hasher +drone.aspect.group.id=rpm \ No newline at end of file diff --git a/de.dentrassi.pm.rpm/OSGI-INF/yum.xml b/de.dentrassi.pm.rpm/OSGI-INF/yum.xml new file mode 100644 index 00000000..fcd94471 --- /dev/null +++ b/de.dentrassi.pm.rpm/OSGI-INF/yum.xml @@ -0,0 +1,8 @@ + + + + + + + + diff --git a/de.dentrassi.pm.rpm/OSGI-INF/yumInterface.xml b/de.dentrassi.pm.rpm/OSGI-INF/yumInterface.xml new file mode 100644 index 00000000..f0f2dc2b --- /dev/null +++ b/de.dentrassi.pm.rpm/OSGI-INF/yumInterface.xml @@ -0,0 +1,7 @@ + + + + + + + diff --git a/de.dentrassi.pm.rpm/WEB-INF/views/channel.jsp b/de.dentrassi.pm.rpm/WEB-INF/views/channel.jsp new file mode 100644 index 00000000..7c5b38ae --- /dev/null +++ b/de.dentrassi.pm.rpm/WEB-INF/views/channel.jsp @@ -0,0 +1,56 @@ +<%@ page language="java" contentType="text/html; charset=UTF-8" + pageEncoding="UTF-8"%> + +<%@ taglib uri="http://java.sun.com/jsp/jstl/functions" prefix="fn" %> +<%@ taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c" %> + + + + + + YUM repository | ${fn:escapeXml(channel.getNameOrId()) } + + + + + + +
+

YUM repository – ${fn:escapeXml(channel.getNameOrId()) }

+

Channel: ${channel.id }

+ ${fn:escapeXml(channel.description) } +
+ +
+ + + +
+ + + + + + + \ No newline at end of file diff --git a/de.dentrassi.pm.rpm/WEB-INF/views/index.jsp b/de.dentrassi.pm.rpm/WEB-INF/views/index.jsp new file mode 100644 index 00000000..d0b5ee72 --- /dev/null +++ b/de.dentrassi.pm.rpm/WEB-INF/views/index.jsp @@ -0,0 +1,56 @@ +<%@ page language="java" contentType="text/html; charset=UTF-8" + pageEncoding="UTF-8"%> + +<%@ taglib uri="http://java.sun.com/jsp/jstl/functions" prefix="fn" %> +<%@ taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c" %> + + + + + + YUM repository adapter + + + + + + + +
+

YUM repository adapter

+ ${fn:escapeXml(channel.description) } +
+ +
+ +

+ This is the YUM repository adapter of Package Drone. +

+ +
+ + + + + + + \ No newline at end of file diff --git a/de.dentrassi.pm.rpm/WEB-INF/views/repodata.jsp b/de.dentrassi.pm.rpm/WEB-INF/views/repodata.jsp new file mode 100644 index 00000000..77d036af --- /dev/null +++ b/de.dentrassi.pm.rpm/WEB-INF/views/repodata.jsp @@ -0,0 +1,59 @@ +<%@ page language="java" contentType="text/html; charset=UTF-8" + pageEncoding="UTF-8"%> + +<%@ taglib uri="http://java.sun.com/jsp/jstl/functions" prefix="fn" %> +<%@ taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c" %> + + + + + + YUM repository | ${fn:escapeXml(channel.getNameOrId()) } / repodata + + + + + + +
+

YUM repository – ${fn:escapeXml(channel.getNameOrId()) } / repodata

+

Channel: ${channel.id }

+ ${fn:escapeXml(channel.description) } +
+ +
+ + + +
+ + + + + + + \ No newline at end of file diff --git a/de.dentrassi.pm.rpm/WEB-INF/web.xml b/de.dentrassi.pm.rpm/WEB-INF/web.xml new file mode 100644 index 00000000..0928c414 --- /dev/null +++ b/de.dentrassi.pm.rpm/WEB-INF/web.xml @@ -0,0 +1,18 @@ + + + + + main + de.dentrassi.pm.rpm.yum.internal.YumServlet + 1 + + + + main + / + + + diff --git a/de.dentrassi.pm.rpm/build.properties b/de.dentrassi.pm.rpm/build.properties new file mode 100644 index 00000000..35af4580 --- /dev/null +++ b/de.dentrassi.pm.rpm/build.properties @@ -0,0 +1,6 @@ +output.. = bin/ +bin.includes = META-INF/,\ + .,\ + OSGI-INF/,\ + OSGI-INF/rpm.xml +source.. = src/ diff --git a/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/Constants.java b/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/Constants.java new file mode 100644 index 00000000..ab43ef8c --- /dev/null +++ b/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/Constants.java @@ -0,0 +1,28 @@ +/******************************************************************************* + * Copyright (c) 2015 IBH SYSTEMS GmbH. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBH SYSTEMS GmbH - initial API and implementation + *******************************************************************************/ +package de.dentrassi.pm.rpm; + +import de.dentrassi.pm.common.MetaKey; + +public final class Constants +{ + public static final String GROUP_ID = "rpm"; + + public static final String RPM_ASPECT_ID = "rpm"; + + public static final String YUM_ASPECT_ID = "yum"; + + public static final MetaKey KEY_INFO = new MetaKey ( RPM_ASPECT_ID, "info" ); + + private Constants () + { + } +} diff --git a/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/RpmInformation.java b/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/RpmInformation.java new file mode 100644 index 00000000..6ba2e0e7 --- /dev/null +++ b/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/RpmInformation.java @@ -0,0 +1,482 @@ +/******************************************************************************* + * Copyright (c) 2015 IBH SYSTEMS GmbH. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBH SYSTEMS GmbH - initial API and implementation + *******************************************************************************/ +package de.dentrassi.pm.rpm; + +import java.util.HashSet; +import java.util.LinkedList; +import java.util.List; +import java.util.Set; + +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; + +public class RpmInformation +{ + public static class Version + { + private String version; + + private String release; + + private String epoch; + + public Version () + { + } + + public Version ( final String version, final String release, final String epoch ) + { + this.version = version; + this.release = release; + this.epoch = epoch; + } + + public String getVersion () + { + return this.version; + } + + public void setVersion ( final String version ) + { + this.version = version; + } + + public String getRelease () + { + return this.release; + } + + public void setRelease ( final String release ) + { + this.release = release; + } + + public String getEpoch () + { + return this.epoch; + } + + public void setEpoch ( final String epoch ) + { + this.epoch = epoch; + } + } + + public static class Changelog + { + private long timestamp; + + private String author; + + private String text; + + public Changelog () + { + } + + public Changelog ( final long timestamp, final String author, final String text ) + { + this.timestamp = timestamp; + this.author = author; + this.text = text; + } + + public long getTimestamp () + { + return this.timestamp; + } + + public void setTimestamp ( final long timestamp ) + { + this.timestamp = timestamp; + } + + public String getAuthor () + { + return this.author; + } + + public void setAuthor ( final String author ) + { + this.author = author; + } + + public String getText () + { + return this.text; + } + + public void setText ( final String text ) + { + this.text = text; + } + + } + + public static class Dependency + { + private String name; + + private String version; + + private long flags; + + public Dependency () + { + } + + public Dependency ( final String name, final String version, final long flags ) + { + this.name = name; + this.version = version; + this.flags = flags; + } + + public String getName () + { + return this.name; + } + + public void setName ( final String name ) + { + this.name = name; + } + + public String getVersion () + { + return this.version; + } + + public void setVersion ( final String version ) + { + this.version = version; + } + + public long getFlags () + { + return this.flags; + } + + public void setFlags ( final long flags ) + { + this.flags = flags; + } + } + + private String name; + + private Version version; + + private String architecture; + + private String license; + + private List changelog = new LinkedList<> (); + + private Set files = new HashSet<> (); + + private Set directories = new HashSet<> (); + + private List provides = new LinkedList<> (); + + private List requires = new LinkedList<> (); + + private List obsoletes = new LinkedList<> (); + + private List conflicts = new LinkedList<> (); + + private String summary; + + private String description; + + private String packager; + + private String vendor; + + private String url; + + private String buildHost; + + private String group; + + private Long installedSize; + + private Long archiveSize; + + private Long buildTimestamp; + + private long headerStart; + + private long headerEnd; + + private String sourcePackage; + + public void setSourcePackage ( final String sourcePackage ) + { + this.sourcePackage = sourcePackage; + } + + public String getSourcePackage () + { + return this.sourcePackage; + } + + public void setGroup ( final String group ) + { + this.group = group; + } + + public String getGroup () + { + return this.group; + } + + public void setLicense ( final String license ) + { + this.license = license; + } + + public String getLicense () + { + return this.license; + } + + public String getName () + { + return this.name; + } + + public void setName ( final String name ) + { + this.name = name; + } + + public Version getVersion () + { + return this.version; + } + + public void setVersion ( final Version version ) + { + this.version = version; + } + + public String getArchitecture () + { + return this.architecture; + } + + public void setArchitecture ( final String architecture ) + { + this.architecture = architecture; + } + + public List getChangelog () + { + return this.changelog; + } + + public void setChangelog ( final List changelog ) + { + this.changelog = changelog; + } + + public Set getFiles () + { + return this.files; + } + + public void setFiles ( final Set files ) + { + this.files = files; + } + + public Set getDirectories () + { + return this.directories; + } + + public void setDirectories ( final Set directories ) + { + this.directories = directories; + } + + public static Gson makeGson () + { + final GsonBuilder gb = new GsonBuilder (); + return gb.create (); + } + + public static RpmInformation fromJson ( final String json ) + { + if ( json == null ) + { + return null; + } + + return makeGson ().fromJson ( json, RpmInformation.class ); + } + + public String toJson () + { + return makeGson ().toJson ( this ); + } + + public String getSummary () + { + return this.summary; + } + + public void setSummary ( final String summary ) + { + this.summary = summary; + } + + public String getDescription () + { + return this.description; + } + + public void setDescription ( final String description ) + { + this.description = description; + } + + public String getPackager () + { + return this.packager; + } + + public void setPackager ( final String packager ) + { + this.packager = packager; + } + + public String getUrl () + { + return this.url; + } + + public void setUrl ( final String url ) + { + this.url = url; + } + + public String getVendor () + { + return this.vendor; + } + + public void setVendor ( final String vendor ) + { + this.vendor = vendor; + } + + public String getBuildHost () + { + return this.buildHost; + } + + public void setBuildHost ( final String buildHost ) + { + this.buildHost = buildHost; + } + + public Long getInstalledSize () + { + return this.installedSize; + } + + public void setInstalledSize ( final Long installedSize ) + { + this.installedSize = installedSize; + } + + public Long getArchiveSize () + { + return this.archiveSize; + } + + public void setArchiveSize ( final Long archiveSize ) + { + this.archiveSize = archiveSize; + } + + public Long getBuildTimestamp () + { + return this.buildTimestamp; + } + + public void setBuildTimestamp ( final Long buildTimestamp ) + { + this.buildTimestamp = buildTimestamp; + } + + public long getHeaderStart () + { + return this.headerStart; + } + + public void setHeaderStart ( final long headerStart ) + { + this.headerStart = headerStart; + } + + public long getHeaderEnd () + { + return this.headerEnd; + } + + public void setHeaderEnd ( final long headerEnd ) + { + this.headerEnd = headerEnd; + } + + public List getProvides () + { + return this.provides; + } + + public void setProvides ( final List provides ) + { + this.provides = provides; + } + + public List getRequires () + { + return this.requires; + } + + public void setRequires ( final List requires ) + { + this.requires = requires; + } + + public List getObsoletes () + { + return this.obsoletes; + } + + public void setObsoletes ( final List obsoletes ) + { + this.obsoletes = obsoletes; + } + + public List getConflicts () + { + return this.conflicts; + } + + public void setConflicts ( final List conflicts ) + { + this.conflicts = conflicts; + } +} diff --git a/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/internal/AspectFactoryImpl.java b/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/internal/AspectFactoryImpl.java new file mode 100644 index 00000000..04154a0c --- /dev/null +++ b/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/internal/AspectFactoryImpl.java @@ -0,0 +1,39 @@ +/******************************************************************************* + * Copyright (c) 2015 IBH SYSTEMS GmbH. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBH SYSTEMS GmbH - initial API and implementation + *******************************************************************************/ +package de.dentrassi.pm.rpm.internal; + +import de.dentrassi.pm.aspect.ChannelAspect; +import de.dentrassi.pm.aspect.ChannelAspectFactory; +import de.dentrassi.pm.aspect.extract.Extractor; +import de.dentrassi.pm.rpm.Constants; + +public class AspectFactoryImpl implements ChannelAspectFactory +{ + @Override + public ChannelAspect createAspect () + { + return new ChannelAspect () { + + @Override + public String getId () + { + return Constants.RPM_ASPECT_ID; + } + + @Override + public Extractor getExtractor () + { + return new RpmExtractor (); + } + }; + } + +} diff --git a/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/internal/RpmExtractor.java b/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/internal/RpmExtractor.java new file mode 100644 index 00000000..da8e2d72 --- /dev/null +++ b/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/internal/RpmExtractor.java @@ -0,0 +1,289 @@ +/******************************************************************************* + * Copyright (c) 2015 IBH SYSTEMS GmbH. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBH SYSTEMS GmbH - initial API and implementation + *******************************************************************************/ +package de.dentrassi.pm.rpm.internal; + +import java.io.BufferedInputStream; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import org.apache.commons.compress.archivers.cpio.CpioArchiveEntry; +import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import de.dentrassi.pm.aspect.extract.Extractor; +import de.dentrassi.pm.rpm.Constants; +import de.dentrassi.pm.rpm.RpmInformation; +import de.dentrassi.pm.rpm.RpmInformation.Dependency; +import de.dentrassi.rpm.RpmHeader; +import de.dentrassi.rpm.RpmInputStream; +import de.dentrassi.rpm.RpmSignatureTag; +import de.dentrassi.rpm.RpmTag; + +public class RpmExtractor implements Extractor +{ + + private final static Logger logger = LoggerFactory.getLogger ( RpmExtractor.class ); + + @Override + public void extractMetaData ( final Context context, final Map metadata ) throws Exception + { + final Path path = context.getPath (); + + try ( RpmInputStream in = new RpmInputStream ( new BufferedInputStream ( Files.newInputStream ( path, StandardOpenOption.READ ) ) ) ) + { + final RpmInformation info = makeInformation ( in ); + if ( info == null ) + { + return; + } + + metadata.put ( "artifactLabel", "RPM Package" ); + + metadata.put ( "name", asString ( in.getPayloadHeader ().getTag ( RpmTag.NAME ) ) ); + metadata.put ( "version", asString ( in.getPayloadHeader ().getTag ( RpmTag.VERSION ) ) ); + metadata.put ( "os", asString ( in.getPayloadHeader ().getTag ( RpmTag.OS ) ) ); + metadata.put ( "arch", asString ( in.getPayloadHeader ().getTag ( RpmTag.ARCH ) ) ); + + metadata.put ( Constants.KEY_INFO.getKey (), info.toJson () ); + } + } + + private RpmInformation makeInformation ( final RpmInputStream in ) throws IOException + { + final RpmHeader header = in.getPayloadHeader (); + final RpmHeader signature = in.getSignatureHeader (); + + try + { + final RpmInformation result = new RpmInformation (); + + result.setHeaderStart ( header.getStart () ); + result.setHeaderEnd ( header.getStart () + header.getLength () ); + + result.setName ( asString ( header.getTag ( RpmTag.NAME ) ) ); + result.setArchitecture ( asString ( header.getTag ( RpmTag.ARCH ) ) ); + result.setSummary ( asString ( header.getTag ( RpmTag.SUMMARY ) ) ); + result.setDescription ( asString ( header.getTag ( RpmTag.DESCRIPTION ) ) ); + result.setPackager ( asString ( header.getTag ( RpmTag.PACKAGER ) ) ); + result.setUrl ( asString ( header.getTag ( RpmTag.URL ) ) ); + result.setLicense ( asString ( header.getTag ( RpmTag.LICENSE ) ) ); + result.setVendor ( asString ( header.getTag ( RpmTag.VENDOR ) ) ); + result.setGroup ( asString ( header.getTag ( RpmTag.GROUP ) ) ); + + result.setBuildHost ( asString ( header.getTag ( RpmTag.BUILDHOST ) ) ); + result.setBuildTimestamp ( asLong ( header.getTag ( RpmTag.BUILDTIME ) ) ); + result.setSourcePackage ( asString ( header.getTag ( RpmTag.SOURCE_PACKAGE ) ) ); + + result.setInstalledSize ( asLong ( header.getTag ( RpmTag.INSTALLED_SIZE ) ) ); + result.setArchiveSize ( asLong ( header.getTag ( RpmTag.ARCHIVE_SIZE ) ) ); + if ( result.getArchiveSize () == null ) + { + result.setArchiveSize ( asLong ( signature.getTag ( RpmSignatureTag.PAYLOAD_SIZE ) ) ); + } + + // version + + final RpmInformation.Version ver = new RpmInformation.Version ( asString ( header.getTag ( RpmTag.VERSION ) ), asString ( header.getTag ( RpmTag.RELEASE ) ), asString ( header.getTag ( RpmTag.EPOCH ) ) ); + result.setVersion ( ver ); + + // changelog + + final Object val = header.getTag ( RpmTag.CHANGELOG_TIMESTAMP ); + if ( val instanceof Long[] ) + { + final Long[] ts = (Long[])val; + final String[] authors = (String[])header.getTag ( RpmTag.CHANGELOG_AUTHOR ); + final String[] texts = (String[])header.getTag ( RpmTag.CHANGELOG_TEXT ); + + final List changes = new ArrayList<> ( ts.length ); + + for ( int i = 0; i < ts.length; i++ ) + { + changes.add ( new RpmInformation.Changelog ( ts[i], authors[i], texts[i] ) ); + } + + Collections.sort ( changes, ( o1, o2 ) -> Long.compare ( o1.getTimestamp (), o2.getTimestamp () ) ); + + result.setChangelog ( changes ); + } + + // dependencies + + result.setProvides ( makeDependencies ( header, RpmTag.PROVIDE_NAME, RpmTag.PROVIDE_VERSION, RpmTag.PROVIDE_FLAGS ) ); + result.setRequires ( makeDependencies ( header, RpmTag.REQUIRE_NAME, RpmTag.REQUIRE_VERSION, RpmTag.REQUIRE_FLAGS ) ); + result.setConflicts ( makeDependencies ( header, RpmTag.CONFLICT_NAME, RpmTag.CONFLICT_VERSION, RpmTag.CONFLICT_FLAGS ) ); + result.setObsoletes ( makeDependencies ( header, RpmTag.OBSOLETE_NAME, RpmTag.OBSOLETE_VERSION, RpmTag.OBSOLETE_FLAGS ) ); + + // files + + final CpioArchiveInputStream cpio = in.getCpioStream (); + CpioArchiveEntry cpioEntry; + while ( ( cpioEntry = cpio.getNextCPIOEntry () ) != null ) + { + final String name = normalize ( cpioEntry.getName () ); + + if ( cpioEntry.isRegularFile () ) + { + result.getFiles ().add ( name ); + } + else if ( cpioEntry.isDirectory () ) + { + result.getDirectories ().add ( name ); + } + } + + return result; + } + catch ( final Exception e ) + { + logger.info ( "Failed to create RPM information", e ); + return null; + } + } + + private List makeDependencies ( final RpmHeader header, final RpmTag namesTag, final RpmTag versionsTag, final RpmTag flagsTag ) + { + Object namesVal = header.getTag ( namesTag ); + Object versionsVal = header.getTag ( versionsTag ); + Object flagsVal = header.getTag ( flagsTag ); + + if ( namesVal == null || ! ( namesVal instanceof String[] ) ) + { + if ( namesVal instanceof String ) + { + namesVal = new String[] { (String)namesVal }; + } + else + { + return Collections.emptyList (); + } + } + + if ( versionsVal != null && ! ( versionsVal instanceof String[] ) ) + { + if ( versionsVal instanceof String ) + { + versionsVal = new String[] { (String)versionsVal }; + } + else + { + throw new IllegalStateException ( String.format ( "Invalid dependencies version format [%s]: %s", versionsTag, versionsVal ) ); + } + } + + if ( flagsVal != null && ! ( flagsVal instanceof Long[] ) ) + { + if ( flagsVal instanceof Long ) + { + flagsVal = new Long[] { (Long)flagsVal }; + } + else + { + throw new IllegalStateException ( String.format ( "Invalid dependencies flags format [%s]: %s", flagsTag, flagsVal ) ); + } + } + + final String[] names = (String[])namesVal; + final String[] versions = (String[])versionsVal; + final Long[] flags = (Long[])flagsVal; + + if ( versions != null && names.length != versions.length ) + { + throw new IllegalStateException ( String.format ( "Invalid size of dependency versions array [%s] - expected: %s, actual: %s", versionsTag, names.length, versions.length ) ); + } + + if ( flags != null && names.length != flags.length ) + { + throw new IllegalStateException ( String.format ( "Invalid size of dependency flags array [%s] - expected: %s, actual: %s", flagsTag, names.length, flags.length ) ); + } + + final List result = new ArrayList<> ( names.length ); + + final Set known = new HashSet<> (); + + for ( int i = 0; i < names.length; i++ ) + { + final String name = names[i]; + String version = versions[i]; + if ( version != null && version.isEmpty () ) + { + version = null; + } + final Long flag = flags[i]; + + final String key = name; // for now the key is the name + + if ( known.add ( key ) ) + { + result.add ( new Dependency ( name, version, flag != null ? flag : 0L ) ); + } + } + + return result; + } + + private static String normalize ( final String name ) + { + if ( name.startsWith ( "./" ) ) + { + return name.substring ( 1 ); + } + + return name; + } + + private static String asString ( final Object value ) + { + if ( value == null ) + { + return null; + } + + if ( value instanceof String ) + { + return (String)value; + } + + return value.toString (); + } + + private static Long asLong ( final Object value ) + { + if ( value == null ) + { + return null; + } + + if ( value instanceof Number ) + { + return ( (Number)value ).longValue (); + } + + try + { + return Long.parseLong ( value.toString () ); + } + catch ( final NumberFormatException e ) + { + return null; + } + } + +} diff --git a/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/internal/RpmGroup.java b/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/internal/RpmGroup.java new file mode 100644 index 00000000..4d979bdf --- /dev/null +++ b/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/internal/RpmGroup.java @@ -0,0 +1,40 @@ +/******************************************************************************* + * Copyright (c) 2015 IBH SYSTEMS GmbH. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBH SYSTEMS GmbH - initial API and implementation + *******************************************************************************/ +package de.dentrassi.pm.rpm.internal; + +import de.dentrassi.pm.aspect.group.Group; +import de.dentrassi.pm.aspect.group.GroupInformation; +import de.dentrassi.pm.rpm.Constants; + +public class RpmGroup implements Group +{ + private static final GroupInformation INFO = new GroupInformation () { + + @Override + public String getName () + { + return "RPM"; + } + + @Override + public String getId () + { + return Constants.GROUP_ID; + } + }; + + @Override + public GroupInformation getInformation () + { + return INFO; + } + +} diff --git a/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/yum/RepositoryCreator.java b/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/yum/RepositoryCreator.java new file mode 100644 index 00000000..22e53f54 --- /dev/null +++ b/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/yum/RepositoryCreator.java @@ -0,0 +1,461 @@ +/******************************************************************************* + * Copyright (c) 2015 IBH SYSTEMS GmbH. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBH SYSTEMS GmbH - initial API and implementation + *******************************************************************************/ +package de.dentrassi.pm.rpm.yum; + +import static de.dentrassi.pm.common.XmlHelper.addElement; +import static de.dentrassi.pm.common.XmlHelper.addOptionalElement; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.Arrays; +import java.util.EnumSet; +import java.util.List; +import java.util.TreeSet; +import java.util.function.Predicate; +import java.util.regex.Pattern; +import java.util.stream.Collectors; +import java.util.zip.GZIPOutputStream; + +import org.w3c.dom.Document; +import org.w3c.dom.Element; + +import de.dentrassi.pm.aspect.common.spool.OutputSpooler; +import de.dentrassi.pm.aspect.common.spool.SpoolOutTarget; +import de.dentrassi.pm.common.ArtifactInformation; +import de.dentrassi.pm.common.XmlHelper; +import de.dentrassi.pm.common.utils.IOConsumer; +import de.dentrassi.pm.rpm.RpmInformation; +import de.dentrassi.pm.rpm.RpmInformation.Changelog; +import de.dentrassi.pm.rpm.RpmInformation.Dependency; +import de.dentrassi.rpm.RpmDependencyFlags; +import de.dentrassi.rpm.RpmVersion; + +public class RepositoryCreator +{ + private final XmlHelper xml = new XmlHelper (); + + private final OutputSpooler primaryStreamBuilder; + + private final OutputSpooler filelistsStreamBuilder; + + private final OutputSpooler otherStreamBuilder; + + private final OutputSpooler mdStreamBuilder; + + private final List primaryFiles; + + private final List primaryDirs; + + public interface Context + { + public void addPackage ( final String sha1, final ArtifactInformation artifact, final RpmInformation info ); + } + + public class ContextImpl implements Context + { + private final OutputStream primaryStream; + + private final OutputStream filelistsStream; + + private final OutputStream otherStream; + + private final Document primary; + + private final Document filelists; + + private final Document other; + + private final XmlHelper xml; + + private final Element primaryRoot; + + private final Element filelistsRoot; + + private final Element otherRoot; + + private long count; + + public ContextImpl ( final OutputStream primaryStream, final OutputStream filelistsStream, final OutputStream otherStream, final XmlHelper xml ) + { + this.primaryStream = primaryStream; + this.filelistsStream = filelistsStream; + this.otherStream = otherStream; + + this.primary = xml.create (); + this.primaryRoot = this.primary.createElementNS ( "http://linux.duke.edu/metadata/common", "metadata" ); + this.primaryRoot.setAttribute ( "xmlns:rpm", "http://linux.duke.edu/metadata/rpm" ); + this.primary.appendChild ( this.primaryRoot ); + + this.filelists = xml.create (); + this.filelistsRoot = this.filelists.createElementNS ( "http://linux.duke.edu/metadata/filelists", "filelists" ); + this.filelists.appendChild ( this.filelistsRoot ); + + this.other = xml.create (); + this.otherRoot = this.other.createElementNS ( "http://linux.duke.edu/metadata/other", "otherdata" ); + this.other.appendChild ( this.otherRoot ); + + this.xml = xml; + } + + @Override + public void addPackage ( final String sha1, final ArtifactInformation artifact, final RpmInformation info ) + { + if ( info == null ) + { + return; + } + + this.count++; + + // insert to primary + + insertToPrimary ( sha1, artifact, info ); + + // insert to "filelists" + + { + final Element pkg = createPackage ( this.filelistsRoot, sha1, info ); + appendFiles ( info, pkg, null, null ); + } + + // insert to "other" + + { + final Element pkg = createPackage ( this.otherRoot, sha1, info ); + for ( final Changelog log : info.getChangelog () ) + { + final Element cl = addElement ( pkg, "changelog", log.getText () ); + cl.setAttribute ( "author", log.getAuthor () ); + cl.setAttribute ( "date", "" + log.getTimestamp () ); + } + } + } + + private void appendFiles ( final RpmInformation info, final Element pkg, final Predicate fileFilter, final Predicate dirFilter ) + { + for ( final String file : new TreeSet<> ( info.getFiles () ) ) + { + if ( fileFilter == null || fileFilter.test ( file ) ) + { + addElement ( pkg, "file", file ); + } + } + for ( final String dir : new TreeSet<> ( info.getDirectories () ) ) + { + if ( dirFilter == null || dirFilter.test ( dir ) ) + { + final Element ele = addElement ( pkg, "file", dir ); + ele.setAttribute ( "type", "dir" ); + } + } + } + + private void insertToPrimary ( final String sha1, final ArtifactInformation artifact, final RpmInformation info ) + { + final Element pkg = addElement ( this.primaryRoot, "package" ); + pkg.setAttribute ( "type", "rpm" ); + + addElement ( pkg, "name", info.getName () ); + addElement ( pkg, "arch", info.getArchitecture () ); + + addVersion ( pkg, info.getVersion () ); + + final Element checksum = addElement ( pkg, "checksum", sha1 ); + checksum.setAttribute ( "type", "sha" ); + checksum.setAttribute ( "pkgid", "YES" ); + + addElement ( pkg, "summary", info.getSummary () ); + addElement ( pkg, "description", info.getDescription () ); + addElement ( pkg, "packager", info.getPackager () ); + addElement ( pkg, "url", info.getUrl () ); + + // time + + final Element time = addElement ( pkg, "time" ); + time.setAttribute ( "file", "" + artifact.getCreationTimestamp ().getTime () / 1000 ); + if ( info.getBuildTimestamp () != null ) + { + time.setAttribute ( "build", "" + info.getBuildTimestamp () ); + } + + // size + + final Element size = addElement ( pkg, "size" ); + size.setAttribute ( "package", "" + artifact.getSize () ); + if ( info.getInstalledSize () != null ) + { + size.setAttribute ( "installed", "" + info.getInstalledSize () ); + } + if ( info.getArchiveSize () != null ) + { + size.setAttribute ( "archive", "" + info.getArchiveSize () ); + } + + // location + + final Element location = addElement ( pkg, "location" ); + location.setAttribute ( "href", String.format ( "pool/%s/%s", artifact.getId (), artifact.getName () ) ); + + // add format section + + final Element fmt = addElement ( pkg, "format" ); + addOptionalElement ( fmt, "rpm:license", info.getLicense () ); + addOptionalElement ( fmt, "rpm:vendor", info.getVendor () ); + addOptionalElement ( fmt, "rpm:group", info.getGroup () ); + addOptionalElement ( fmt, "rpm:buildhost", info.getBuildHost () ); + addOptionalElement ( fmt, "rpm:sourcerpm", info.getSourcePackage () ); + + // add header range + + final Element rng = addElement ( fmt, "rpm:header-range" ); + rng.setAttribute ( "start", "" + info.getHeaderStart () ); + rng.setAttribute ( "end", "" + info.getHeaderEnd () ); + + addDependencies ( fmt, "rpm:provides", info.getProvides () ); + addDependencies ( fmt, "rpm:requires", info.getRequires () ); + addDependencies ( fmt, "rpm:conflicts", info.getConflicts () ); + addDependencies ( fmt, "rpm:obseletes", info.getObsoletes () ); + + // add primary files + + appendFiles ( info, pkg, file -> matches ( file, RepositoryCreator.this.primaryFiles ), dir -> matches ( dir, RepositoryCreator.this.primaryDirs ) ); + } + + private void addDependencies ( final Element fmt, final String elementName, final List deps ) + { + final Element ele = addElement ( fmt, elementName ); + + for ( final Dependency dep : deps ) + { + final EnumSet flags = RpmDependencyFlags.parse ( dep.getFlags () ); + if ( flags.contains ( RpmDependencyFlags.RPMLIB ) ) + { + continue; + } + + final Element entry = addElement ( ele, "rpm:entry" ); + entry.setAttribute ( "name", dep.getName () ); + if ( dep.getVersion () != null ) + { + final RpmVersion version = RpmVersion.valueOf ( dep.getVersion () ); + entry.setAttribute ( "epoch", "" + version.getEpoch ().orElse ( 0 ) ); + entry.setAttribute ( "ver", version.getVersion () ); + if ( version.getRelease ().isPresent () ) + { + entry.setAttribute ( "rel", version.getRelease ().get () ); + } + } + + final boolean eq = flags.contains ( RpmDependencyFlags.EQUAL ); + + if ( flags.contains ( RpmDependencyFlags.GREATER ) ) + { + entry.setAttribute ( "flags", eq ? "GE" : "GT" ); + } + else if ( flags.contains ( RpmDependencyFlags.LESS ) ) + { + entry.setAttribute ( "flags", eq ? "LE" : "LT" ); + } + else if ( eq ) + { + entry.setAttribute ( "flags", "EQ" ); + } + + final boolean pre = flags.contains ( RpmDependencyFlags.PREREQ ) || flags.contains ( RpmDependencyFlags.SCRIPT_PRE ) || flags.contains ( RpmDependencyFlags.SCRIPT_POST ); + if ( pre ) + { + entry.setAttribute ( "pre", "1" ); + } + } + } + + private Element createPackage ( final Element root, final String id, final RpmInformation info ) + { + final Element pkg = addElement ( root, "package" ); + pkg.setAttribute ( "pkgid", id ); + pkg.setAttribute ( "name", info.getName () ); + pkg.setAttribute ( "arch", info.getArchitecture () ); + + addVersion ( pkg, info.getVersion () ); + + return pkg; + } + + private Element addVersion ( final Element pkg, final RpmInformation.Version version ) + { + if ( version == null ) + { + return null; + } + + final Element ver = addElement ( pkg, "version" ); + + if ( version.getEpoch () == null || version.getEpoch ().isEmpty () ) + { + ver.setAttribute ( "epoch", "0" ); + } + else + { + ver.setAttribute ( "epoch", version.getEpoch () ); + } + ver.setAttribute ( "ver", version.getVersion () ); + ver.setAttribute ( "rel", version.getRelease () ); + + return ver; + } + + public void close () throws IOException + { + this.primaryRoot.setAttribute ( "packages", "" + this.count ); + this.filelistsRoot.setAttribute ( "packages", "" + this.count ); + this.otherRoot.setAttribute ( "packages", "" + this.count ); + + try + { + this.xml.write ( this.primary, this.primaryStream ); + this.xml.write ( this.filelists, this.filelistsStream ); + this.xml.write ( this.other, this.otherStream ); + } + catch ( final IOException e ) + { + throw e; + } + catch ( final Exception e ) + { + throw new IOException ( e ); + } + } + } + + public RepositoryCreator ( final SpoolOutTarget target ) + { + // filters + + final String dirFilter = System.getProperty ( "drone.rpm.yum.primaryDirs", "bin/,^/etc/" ); + final String fileFilter = System.getProperty ( "drone.rpm.yum.primaryFiles", dirFilter ); + + this.primaryFiles = Arrays.stream ( fileFilter.split ( "," ) ).map ( re -> Pattern.compile ( re ) ).collect ( Collectors.toList () ); + this.primaryDirs = Arrays.stream ( dirFilter.split ( "," ) ).map ( re -> Pattern.compile ( re ) ).collect ( Collectors.toList () ); + + // primary + + this.primaryStreamBuilder = new OutputSpooler ( target ); + + this.primaryStreamBuilder.addDigest ( "SHA1" ); + + this.primaryStreamBuilder.addOutput ( "repodata/primary.xml", "application/xml" ); + this.primaryStreamBuilder.addOutput ( "repodata/primary.xml.gz", "application/x-gzip", output -> new GZIPOutputStream ( output ) ); + + // filelists + + this.filelistsStreamBuilder = new OutputSpooler ( target ); + + this.filelistsStreamBuilder.addDigest ( "SHA1" ); + + this.filelistsStreamBuilder.addOutput ( "repodata/filelists.xml", "application/xml" ); + this.filelistsStreamBuilder.addOutput ( "repodata/filelists.xml.gz", "application/x-gzip", output -> new GZIPOutputStream ( output ) ); + + // other + + this.otherStreamBuilder = new OutputSpooler ( target ); + + this.otherStreamBuilder.addDigest ( "SHA1" ); + + this.otherStreamBuilder.addOutput ( "repodata/other.xml", "application/xml" ); + this.otherStreamBuilder.addOutput ( "repodata/other.xml.gz", "application/x-gzip", output -> new GZIPOutputStream ( output ) ); + + // md + + this.mdStreamBuilder = new OutputSpooler ( target ); + + this.mdStreamBuilder.addOutput ( "repodata/repomd.xml", "application/xml" ); + } + + public boolean matches ( final String pathName, final List filterList ) + { + for ( final Pattern p : filterList ) + { + if ( p.matcher ( pathName ).find () ) + { + return true; + } + } + return false; + } + + public void process ( final IOConsumer consumer ) throws IOException + { + final long now = System.currentTimeMillis (); + + this.primaryStreamBuilder.open ( primaryStream -> { + this.filelistsStreamBuilder.open ( filelistsStream -> { + this.otherStreamBuilder.open ( otherStream -> { + final ContextImpl ctx = makeContext ( primaryStream, filelistsStream, otherStream ); + consumer.accept ( ctx ); + ctx.close (); + } ); + } ); + } ); + + this.mdStreamBuilder.open ( stream -> { + writeRepoMd ( stream, now ); + } ); + + } + + private ContextImpl makeContext ( final OutputStream primaryStream, final OutputStream filelistsStream, final OutputStream otherStream ) + { + return new ContextImpl ( primaryStream, filelistsStream, otherStream, this.xml ); + } + + private void writeRepoMd ( final OutputStream stream, final long now ) throws IOException + { + final Document doc = this.xml.create (); + + final Element root = doc.createElementNS ( "http://linux.duke.edu/metadata/repo", "repomd" ); + doc.appendChild ( root ); + + root.setAttribute ( "revision", "" + now / 1000 ); + + addDataFile ( root, this.primaryStreamBuilder, "primary", now ); + addDataFile ( root, this.filelistsStreamBuilder, "filelists", now ); + addDataFile ( root, this.otherStreamBuilder, "other", now ); + + try + { + this.xml.write ( doc, stream ); + } + catch ( final Exception e ) + { + throw new IOException ( e ); + } + } + + private void addDataFile ( final Element root, final OutputSpooler spooler, final String baseName, final long now ) + { + final Element data = addElement ( root, "data" ); + + data.setAttribute ( "type", baseName ); + + final Element checksum = addElement ( data, "checksum", spooler.getChecksum ( "repodata/" + baseName + ".xml.gz", "SHA1" ) ); + checksum.setAttribute ( "type", "sha" ); + + final Element openChecksum = addElement ( data, "open-checksum", spooler.getChecksum ( "repodata/" + baseName + ".xml", "SHA1" ) ); + openChecksum.setAttribute ( "type", "sha" ); + + final Element location = addElement ( data, "location" ); + location.setAttribute ( "href", "repodata/" + baseName + ".xml.gz" ); + addElement ( data, "timestamp", now / 1000 ); + + addElement ( data, "size", "" + spooler.getSize ( "repodata/" + baseName + ".xml.gz" ) ); + addElement ( data, "open-size", "" + spooler.getSize ( "repodata/" + baseName + ".xml" ) ); + } +} diff --git a/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/yum/internal/YumChannelAggregator.java b/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/yum/internal/YumChannelAggregator.java new file mode 100644 index 00000000..3f8c481e --- /dev/null +++ b/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/yum/internal/YumChannelAggregator.java @@ -0,0 +1,53 @@ +/******************************************************************************* + * Copyright (c) 2015 IBH SYSTEMS GmbH. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBH SYSTEMS GmbH - initial API and implementation + *******************************************************************************/ +package de.dentrassi.pm.rpm.yum.internal; + +import java.util.HashMap; +import java.util.Map; + +import de.dentrassi.pm.aspect.aggregate.AggregationContext; +import de.dentrassi.pm.aspect.aggregate.ChannelAggregator; +import de.dentrassi.pm.aspect.common.spool.ChannelCacheTarget; +import de.dentrassi.pm.common.ArtifactInformation; +import de.dentrassi.pm.common.MetaKey; +import de.dentrassi.pm.rpm.Constants; +import de.dentrassi.pm.rpm.RpmInformation; +import de.dentrassi.pm.rpm.yum.RepositoryCreator; + +public class YumChannelAggregator implements ChannelAggregator +{ + + @Override + public Map aggregateMetaData ( final AggregationContext context ) throws Exception + { + final RepositoryCreator creator = new RepositoryCreator ( new ChannelCacheTarget ( context ) ); + + final Map result = new HashMap<> (); + + creator.process ( repoContext -> { + for ( final ArtifactInformation art : context.getArtifacts () ) + { + final RpmInformation info = RpmInformation.fromJson ( art.getMetaData ().get ( Constants.KEY_INFO ) ); + + if ( info == null ) + { + continue; + } + + final String sha1 = art.getMetaData ().get ( new MetaKey ( "hasher", "sha1" ) ); + + repoContext.addPackage ( sha1, art, info ); + } + } ); + + return result; + } +} diff --git a/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/yum/internal/YumChannelAspectFactory.java b/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/yum/internal/YumChannelAspectFactory.java new file mode 100644 index 00000000..cfbe703a --- /dev/null +++ b/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/yum/internal/YumChannelAspectFactory.java @@ -0,0 +1,40 @@ +/******************************************************************************* + * Copyright (c) 2015 IBH SYSTEMS GmbH. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBH SYSTEMS GmbH - initial API and implementation + *******************************************************************************/ +package de.dentrassi.pm.rpm.yum.internal; + +import de.dentrassi.pm.aspect.ChannelAspect; +import de.dentrassi.pm.aspect.ChannelAspectFactory; +import de.dentrassi.pm.aspect.aggregate.ChannelAggregator; +import de.dentrassi.pm.rpm.Constants; + +public class YumChannelAspectFactory implements ChannelAspectFactory +{ + + @Override + public ChannelAspect createAspect () + { + return new ChannelAspect () { + + @Override + public String getId () + { + return Constants.YUM_ASPECT_ID; + } + + @Override + public ChannelAggregator getChannelAggregator () + { + return new YumChannelAggregator (); + } + }; + } + +} diff --git a/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/yum/internal/YumInterfaceExtender.java b/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/yum/internal/YumInterfaceExtender.java new file mode 100644 index 00000000..17a8c7ed --- /dev/null +++ b/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/yum/internal/YumInterfaceExtender.java @@ -0,0 +1,55 @@ +/******************************************************************************* + * Copyright (c) 2015 IBH SYSTEMS GmbH. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBH SYSTEMS GmbH - initial API and implementation + *******************************************************************************/ +package de.dentrassi.pm.rpm.yum.internal; + +import java.util.LinkedList; +import java.util.List; + +import javax.servlet.http.HttpServletRequest; + +import com.google.common.escape.Escaper; +import com.google.common.net.UrlEscapers; + +import de.dentrassi.osgi.web.LinkTarget; +import de.dentrassi.pm.common.web.InterfaceExtender; +import de.dentrassi.pm.common.web.Modifier; +import de.dentrassi.pm.common.web.menu.MenuEntry; +import de.dentrassi.pm.rpm.Constants; +import de.dentrassi.pm.storage.Channel; + +public class YumInterfaceExtender implements InterfaceExtender +{ + private static final Escaper PATH_ESC = UrlEscapers.urlPathSegmentEscaper (); + + @Override + public List getActions ( final HttpServletRequest request, final Object object ) + { + if ( ! ( object instanceof Channel ) ) + { + return null; + } + + final Channel channel = (Channel)object; + + if ( !channel.hasAspect ( Constants.YUM_ASPECT_ID ) ) + { + return null; + } + + final List result = new LinkedList<> (); + result.add ( new MenuEntry ( "YUM (by ID)", 6_000, new LinkTarget ( String.format ( "/yum/%s", channel.getId () ) ), Modifier.LINK, null ) ); + if ( channel.getName () != null ) + { + result.add ( new MenuEntry ( "YUM (by name)", 6_000, new LinkTarget ( String.format ( "/yum/%s", PATH_ESC.escape ( channel.getName () ) ) ), Modifier.LINK, null ) ); + } + return result; + } +} diff --git a/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/yum/internal/YumServlet.java b/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/yum/internal/YumServlet.java new file mode 100644 index 00000000..139ff5e0 --- /dev/null +++ b/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/yum/internal/YumServlet.java @@ -0,0 +1,174 @@ +/******************************************************************************* + * Copyright (c) 2015 IBH SYSTEMS GmbH. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBH SYSTEMS GmbH - initial API and implementation + *******************************************************************************/ +package de.dentrassi.pm.rpm.yum.internal; + +import java.io.IOException; +import java.util.List; +import java.util.Optional; +import java.util.stream.Collectors; + +import javax.servlet.ServletException; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; + +import de.dentrassi.pm.VersionInformation; +import de.dentrassi.pm.common.MetaKey; +import de.dentrassi.pm.rpm.Constants; +import de.dentrassi.pm.storage.Artifact; +import de.dentrassi.pm.storage.CacheEntryInformation; +import de.dentrassi.pm.storage.Channel; +import de.dentrassi.pm.storage.service.servlet.AbstractStorageServiceServlet; +import de.dentrassi.pm.storage.service.util.DownloadHelper; +import de.dentrassi.pm.storage.web.utils.ChannelCacheHandler; + +public class YumServlet extends AbstractStorageServiceServlet +{ + private static final long serialVersionUID = 1L; + + @Override + protected void doGet ( final HttpServletRequest request, final HttpServletResponse response ) throws ServletException, IOException + { + String path = request.getServletPath (); + + if ( path != null && path.startsWith ( "/" ) ) + { + path = path.substring ( 1 ); + } + + if ( path == null || path.isEmpty () ) + { + handleWelcome ( request, response ); + return; + } + + final String[] segs = path.split ( "/", 2 ); + if ( segs.length <= 0 ) + { + handleWelcome ( request, response ); + return; + } + + final String channelId = segs[0]; + final String remPath = segs.length > 1 ? segs[1] : null; + + final Channel channel = getService ( request ).getChannelWithAlias ( channelId ); + if ( channel == null ) + { + handleMessage ( response, HttpServletResponse.SC_NOT_FOUND, String.format ( "Channel '%s' could not be found", channelId ) ); + return; + } + + if ( handleChannel ( channel, remPath, request, response ) ) + { + return; + } + + handleNotFound ( request, response, request.getRequestURI () ); + } + + private boolean handleChannel ( final Channel channel, final String remPath, final HttpServletRequest request, final HttpServletResponse response ) throws IOException, ServletException + { + if ( remPath == null || remPath.isEmpty () ) + { + if ( !request.getServletPath ().endsWith ( "/" ) ) + { + response.setStatus ( HttpServletResponse.SC_MOVED_PERMANENTLY ); + response.sendRedirect ( request.getRequestURI () + "/" ); + return true; + } + + request.setAttribute ( "channel", channel ); + viewJsp ( request, response, "channel.jsp" ); + return true; + } + + // handle pool + + // FIXME: handle pool + if ( remPath.startsWith ( "pool/" ) ) + { + handlePool ( channel, remPath, request, response ); + return true; + } + + // handle repo data + + if ( "repodata".equals ( remPath ) || "repodata/".equals ( remPath ) ) + { + if ( !request.getServletPath ().endsWith ( "/" ) ) + { + response.setStatus ( HttpServletResponse.SC_MOVED_PERMANENTLY ); + response.sendRedirect ( request.getRequestURI () + "/" ); + return true; + } + + request.setAttribute ( "channel", channel ); + + final List files = channel.getAllCacheEntries ().stream ().filter ( ce -> ce.getKey ().getNamespace ().equals ( Constants.YUM_ASPECT_ID ) && ce.getName ().startsWith ( "repodata/" ) ).collect ( Collectors.toList () ); + request.setAttribute ( "entries", files ); + + viewJsp ( request, response, "repodata.jsp" ); + return true; + } + + if ( remPath.startsWith ( "repodata/" ) ) + { + new ChannelCacheHandler ( new MetaKey ( Constants.YUM_ASPECT_ID, remPath ) ).process ( channel, request, response ); + return true; + } + + return false; + } + + private void handlePool ( final Channel channel, final String remPath, final HttpServletRequest request, final HttpServletResponse response ) throws IOException + { + final String[] segs = remPath.split ( "/" ); + if ( segs.length < 3 ) + { + handleNotFound ( request, response, request.getRequestURI () ); + return; + } + + final Artifact artifact = channel.getArtifact ( segs[1] ); + if ( artifact == null ) + { + handleNotFound ( request, response, request.getRequestURI () ); + return; + } + + final Optional name = segs.length > 2 ? Optional.of ( segs[segs.length - 1] ) : Optional.empty (); + + DownloadHelper.streamArtifact ( response, artifact, null, true, art -> name.orElse ( art.getName () ) ); + } + + private void viewJsp ( final HttpServletRequest request, final HttpServletResponse response, final String viewName ) throws ServletException, IOException + { + request.setAttribute ( "version", VersionInformation.VERSION ); + request.getRequestDispatcher ( "/WEB-INF/views/" + viewName ).forward ( request, response ); + } + + private void handleNotFound ( final HttpServletRequest request, final HttpServletResponse response, final String resource ) throws IOException + { + handleMessage ( response, HttpServletResponse.SC_NOT_FOUND, String.format ( "Resource '%s' could not be found", resource ) ); + } + + private void handleMessage ( final HttpServletResponse response, final int status, final String message ) throws IOException + { + response.setContentType ( "text/plain" ); + response.setStatus ( status ); + response.getWriter ().write ( message ); + } + + private void handleWelcome ( final HttpServletRequest request, final HttpServletResponse response ) throws IOException, ServletException + { + viewJsp ( request, response, "index.jsp" ); + } +} diff --git a/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/yum/package-info.java b/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/yum/package-info.java new file mode 100644 index 00000000..331b7944 --- /dev/null +++ b/de.dentrassi.pm.rpm/src/de/dentrassi/pm/rpm/yum/package-info.java @@ -0,0 +1,13 @@ +/******************************************************************************* + * Copyright (c) 2015 IBH SYSTEMS GmbH. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * Contributors: + * IBH SYSTEMS GmbH - initial API and implementation + *******************************************************************************/ +/** + * YUM repository functionality + */ +package de.dentrassi.pm.rpm.yum; diff --git a/de.dentrassi.pm.storage.service.jpa/src/de/dentrassi/pm/storage/service/jpa/ChannelImpl.java b/de.dentrassi.pm.storage.service.jpa/src/de/dentrassi/pm/storage/service/jpa/ChannelImpl.java index e2fc4593..89e3ea0c 100644 --- a/de.dentrassi.pm.storage.service.jpa/src/de/dentrassi/pm/storage/service/jpa/ChannelImpl.java +++ b/de.dentrassi.pm.storage.service.jpa/src/de/dentrassi/pm/storage/service/jpa/ChannelImpl.java @@ -130,6 +130,23 @@ public List findByName ( final String artifactName ) return this.service.findByName ( this.id, artifactName ); } + @Override + public Artifact getArtifact ( final String artifactId ) + { + final Artifact art = this.service.getArtifact ( artifactId ); + if ( art == null ) + { + return null; + } + + if ( !art.getChannel ().getId ().equals ( this.id ) ) + { + return null; + } + + return art; + } + @Override public Collection getAllDeployKeys () { diff --git a/de.dentrassi.pm.storage.service.jpa/src/de/dentrassi/pm/storage/service/jpa/StorageHandlerImpl.java b/de.dentrassi.pm.storage.service.jpa/src/de/dentrassi/pm/storage/service/jpa/StorageHandlerImpl.java index 6122f322..98df88fc 100644 --- a/de.dentrassi.pm.storage.service.jpa/src/de/dentrassi/pm/storage/service/jpa/StorageHandlerImpl.java +++ b/de.dentrassi.pm.storage.service.jpa/src/de/dentrassi/pm/storage/service/jpa/StorageHandlerImpl.java @@ -805,21 +805,21 @@ public void runChannelAggregators ( final ChannelEntity channel ) final Map metadata = new HashMap<> (); - this.channelAspectProcessor.process ( channel.getAspects ().keySet (), ChannelAspect::getChannelAggregator, aggregator -> { + this.channelAspectProcessor.processWithAspect ( channel.getAspects ().keySet (), ChannelAspect::getChannelAggregator, ( aspect, aggregator ) -> { try { // create new context for this channel aspect - final AggregationContextImpl context = new AggregationContextImpl ( artifacts, metaData, channel, aggregator.getId () ); + final AggregationContextImpl context = new AggregationContextImpl ( artifacts, metaData, channel, aspect.getId () ); // process final Map md = aggregator.aggregateMetaData ( context ); - convertMetaDataFromExtractor ( metadata, aggregator.getId (), md ); + convertMetaDataFromExtractor ( metadata, aspect.getId (), md ); context.flush ( aggrValidationHandler ); } catch ( final Exception e ) { - throw new RuntimeException ( String.format ( "Failed to run channel aggregator: %s", aggregator.getId () ), e ); + throw new RuntimeException ( String.format ( "Failed to run channel aggregator: %s", aspect.getId () ), e ); } } ); diff --git a/de.dentrassi.pm.storage.web/src/de/dentrassi/pm/storage/web/channel/AspectInformation.java b/de.dentrassi.pm.storage.web/src/de/dentrassi/pm/storage/web/channel/AspectInformation.java index 8186d7a3..b3184285 100644 --- a/de.dentrassi.pm.storage.web/src/de/dentrassi/pm/storage/web/channel/AspectInformation.java +++ b/de.dentrassi.pm.storage.web/src/de/dentrassi/pm/storage/web/channel/AspectInformation.java @@ -195,18 +195,19 @@ public static List filterIds ( final List public String[] getMissingIds ( final List assignedAspects ) { final Set required = new HashSet<> (); - addRequired ( required, this ); + + addRequired ( required, this, assignedAspects ); return required.stream ().map ( AspectInformation::getFactoryId ).toArray ( size -> new String[size] ); } - private static void addRequired ( final Set result, final AspectInformation aspect ) + private static void addRequired ( final Set result, final AspectInformation aspect, final List assignedAspects ) { for ( final AspectInformation req : aspect.getRequires () ) { - if ( result.add ( req ) ) + if ( !assignedAspects.contains ( req ) && result.add ( req ) ) { - addRequired ( result, req ); + addRequired ( result, req, assignedAspects ); } } } diff --git a/de.dentrassi.pm.storage/src/de/dentrassi/pm/storage/Channel.java b/de.dentrassi.pm.storage/src/de/dentrassi/pm/storage/Channel.java index e844496a..2d067f65 100644 --- a/de.dentrassi.pm.storage/src/de/dentrassi/pm/storage/Channel.java +++ b/de.dentrassi.pm.storage/src/de/dentrassi/pm/storage/Channel.java @@ -192,6 +192,20 @@ public default Set getAspectIds () public Artifact createArtifact ( String name, InputStream stream, Map providedMetaData ); + /** + * Get an artifact from a channel by ID + *

+ * If an artifact with the ID exists, but does not belong to this channel, + * then this method must return null. + *

+ * + * @param artifactId + * the artifact to get + * @return the artifact or null if the artifact could not be + * found + */ + public Artifact getArtifact ( String artifactId ); + public List findByName ( String artifactName ); public Set getSimpleArtifacts (); diff --git a/de.dentrassi.rpm.tests/.classpath b/de.dentrassi.rpm.tests/.classpath new file mode 100644 index 00000000..eca7bdba --- /dev/null +++ b/de.dentrassi.rpm.tests/.classpath @@ -0,0 +1,7 @@ + + + + + + + diff --git a/de.dentrassi.rpm.tests/.gitignore b/de.dentrassi.rpm.tests/.gitignore new file mode 100644 index 00000000..ae3c1726 --- /dev/null +++ b/de.dentrassi.rpm.tests/.gitignore @@ -0,0 +1 @@ +/bin/ diff --git a/de.dentrassi.rpm.tests/.project b/de.dentrassi.rpm.tests/.project new file mode 100644 index 00000000..f66c5d25 --- /dev/null +++ b/de.dentrassi.rpm.tests/.project @@ -0,0 +1,28 @@ + + + de.dentrassi.rpm.tests + + + + + + org.eclipse.jdt.core.javabuilder + + + + + org.eclipse.pde.ManifestBuilder + + + + + org.eclipse.pde.SchemaBuilder + + + + + + org.eclipse.pde.PluginNature + org.eclipse.jdt.core.javanature + + diff --git a/de.dentrassi.rpm.tests/.settings/org.eclipse.jdt.core.prefs b/de.dentrassi.rpm.tests/.settings/org.eclipse.jdt.core.prefs new file mode 100644 index 00000000..0c68a61d --- /dev/null +++ b/de.dentrassi.rpm.tests/.settings/org.eclipse.jdt.core.prefs @@ -0,0 +1,7 @@ +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 +org.eclipse.jdt.core.compiler.compliance=1.8 +org.eclipse.jdt.core.compiler.problem.assertIdentifier=error +org.eclipse.jdt.core.compiler.problem.enumIdentifier=error +org.eclipse.jdt.core.compiler.source=1.8 diff --git a/de.dentrassi.rpm.tests/.settings/org.eclipse.pde.core.prefs b/de.dentrassi.rpm.tests/.settings/org.eclipse.pde.core.prefs new file mode 100644 index 00000000..e8ff8be0 --- /dev/null +++ b/de.dentrassi.rpm.tests/.settings/org.eclipse.pde.core.prefs @@ -0,0 +1,4 @@ +eclipse.preferences.version=1 +pluginProject.equinox=false +pluginProject.extensions=false +resolve.requirebundle=false diff --git a/de.dentrassi.rpm.tests/META-INF/MANIFEST.MF b/de.dentrassi.rpm.tests/META-INF/MANIFEST.MF new file mode 100644 index 00000000..3e14ba7a --- /dev/null +++ b/de.dentrassi.rpm.tests/META-INF/MANIFEST.MF @@ -0,0 +1,11 @@ +Manifest-Version: 1.0 +Bundle-ManifestVersion: 2 +Bundle-Name: Unit tests for 'de.dentrassi.rpm' +Bundle-SymbolicName: de.dentrassi.rpm.tests +Bundle-Version: 1.0.0.qualifier +Bundle-Vendor: IBH SYSTEMS GmbH +Bundle-RequiredExecutionEnvironment: JavaSE-1.8 +Require-Bundle: org.junit +Import-Package: de.dentrassi.rpm, + org.apache.commons.compress.archivers.cpio;version="1.9.0", + org.eclipse.scada.utils.str;version="0.2.0" diff --git a/de.dentrassi.rpm.tests/build.properties b/de.dentrassi.rpm.tests/build.properties new file mode 100644 index 00000000..34d2e4d2 --- /dev/null +++ b/de.dentrassi.rpm.tests/build.properties @@ -0,0 +1,4 @@ +source.. = src/ +output.. = bin/ +bin.includes = META-INF/,\ + . diff --git a/de.dentrassi.rpm.tests/data/org.eclipse.scada-0.2.1-1.noarch.rpm b/de.dentrassi.rpm.tests/data/org.eclipse.scada-0.2.1-1.noarch.rpm new file mode 100644 index 00000000..db133dea Binary files /dev/null and b/de.dentrassi.rpm.tests/data/org.eclipse.scada-0.2.1-1.noarch.rpm differ diff --git a/de.dentrassi.rpm.tests/data/org.eclipse.scada-centos6-0.2.1-1.noarch.rpm b/de.dentrassi.rpm.tests/data/org.eclipse.scada-centos6-0.2.1-1.noarch.rpm new file mode 100644 index 00000000..46be585b Binary files /dev/null and b/de.dentrassi.rpm.tests/data/org.eclipse.scada-centos6-0.2.1-1.noarch.rpm differ diff --git a/de.dentrassi.rpm.tests/src/de/dentrassi/rpm/tests/InputStreamTest.java b/de.dentrassi.rpm.tests/src/de/dentrassi/rpm/tests/InputStreamTest.java new file mode 100644 index 00000000..5f9dc99a --- /dev/null +++ b/de.dentrassi.rpm.tests/src/de/dentrassi/rpm/tests/InputStreamTest.java @@ -0,0 +1,163 @@ +/******************************************************************************* + * Copyright (c) 2015 IBH SYSTEMS GmbH. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBH SYSTEMS GmbH - initial API and implementation + *******************************************************************************/ +package de.dentrassi.rpm.tests; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.Map; +import java.util.TreeMap; +import java.util.function.Function; + +import org.apache.commons.compress.archivers.cpio.CpioArchiveEntry; +import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream; +import org.junit.Assert; +import org.junit.Test; + +import de.dentrassi.rpm.RpmBaseTag; +import de.dentrassi.rpm.RpmDependencyFlags; +import de.dentrassi.rpm.RpmHeader; +import de.dentrassi.rpm.RpmInputStream; +import de.dentrassi.rpm.RpmLead; +import de.dentrassi.rpm.RpmSignatureTag; +import de.dentrassi.rpm.RpmTag; +import de.dentrassi.rpm.RpmTagValue; +import de.dentrassi.rpm.Rpms; + +public class InputStreamTest +{ + private void dumpAll ( final RpmInputStream in ) throws IOException + { + final RpmLead lead = in.getLead (); + System.out.format ( "Version: %s.%s%n", lead.getMajor (), lead.getMinor () ); + System.out.format ( "Name: %s%n", lead.getName () ); + System.out.format ( "Signature Version: %s%n", lead.getSignatureVersion () ); + + dumpHeader ( "Signature", in.getSignatureHeader (), tag -> RpmSignatureTag.find ( tag ) ); + dumpHeader ( "Payload", in.getPayloadHeader (), tag -> RpmTag.find ( tag ) ); + + final CpioArchiveInputStream cpio = in.getCpioStream (); + + CpioArchiveEntry entry; + while ( ( entry = cpio.getNextCPIOEntry () ) != null ) + { + dumpEntry ( entry ); + } + + { + final String[] names = new RpmTagValue ( in.getPayloadHeader ().getTag ( RpmTag.REQUIRE_NAME ) ).asStringArray ().orElse ( null ); + final String[] versions = new RpmTagValue ( in.getPayloadHeader ().getTag ( RpmTag.REQUIRE_VERSION ) ).asStringArray ().orElse ( null ); + final Long[] flags = new RpmTagValue ( in.getPayloadHeader ().getTag ( RpmTag.REQUIRE_FLAGS ) ).asLongArray ().orElse ( null ); + dumpDeps ( "Require", names, versions, flags ); + } + { + final String[] names = new RpmTagValue ( in.getPayloadHeader ().getTag ( RpmTag.PROVIDE_NAME ) ).asStringArray ().orElse ( null ); + final String[] versions = new RpmTagValue ( in.getPayloadHeader ().getTag ( RpmTag.PROVIDE_VERSION ) ).asStringArray ().orElse ( null ); + final Long[] flags = new RpmTagValue ( in.getPayloadHeader ().getTag ( RpmTag.PROVIDE_FLAGS ) ).asLongArray ().orElse ( null ); + dumpDeps ( "Provide", names, versions, flags ); + } + { + final String[] names = new RpmTagValue ( in.getPayloadHeader ().getTag ( RpmTag.CONFLICT_NAME ) ).asStringArray ().orElse ( null ); + final String[] versions = new RpmTagValue ( in.getPayloadHeader ().getTag ( RpmTag.CONFLICT_VERSION ) ).asStringArray ().orElse ( null ); + final Long[] flags = new RpmTagValue ( in.getPayloadHeader ().getTag ( RpmTag.CONFLICT_FLAGS ) ).asLongArray ().orElse ( null ); + dumpDeps ( "Conflict", names, versions, flags ); + } + { + final String[] names = new RpmTagValue ( in.getPayloadHeader ().getTag ( RpmTag.OBSOLETE_NAME ) ).asStringArray ().orElse ( null ); + final String[] versions = new RpmTagValue ( in.getPayloadHeader ().getTag ( RpmTag.OBSOLETE_VERSION ) ).asStringArray ().orElse ( null ); + final Long[] flags = new RpmTagValue ( in.getPayloadHeader ().getTag ( RpmTag.OBSOLETE_FLAGS ) ).asLongArray ().orElse ( null ); + dumpDeps ( "Obsolete", names, versions, flags ); + } + } + + @Test + public void test1 () throws IOException + { + try ( final RpmInputStream in = new RpmInputStream ( new BufferedInputStream ( new FileInputStream ( new File ( "data/org.eclipse.scada-0.2.1-1.noarch.rpm" ) ) ) ) ) + { + dumpAll ( in ); + + Assert.assertEquals ( 280, in.getPayloadHeader ().getStart () ); + Assert.assertEquals ( 3501, in.getPayloadHeader ().getLength () ); + + Assert.assertEquals ( "cpio", in.getPayloadHeader ().getTag ( RpmTag.PAYLOAD_FORMAT ) ); + Assert.assertEquals ( "lzma", in.getPayloadHeader ().getTag ( RpmTag.PAYLOAD_CODING ) ); + + Assert.assertEquals ( "org.eclipse.scada", in.getPayloadHeader ().getTag ( RpmTag.NAME ) ); + Assert.assertEquals ( "0.2.1", in.getPayloadHeader ().getTag ( RpmTag.VERSION ) ); + Assert.assertEquals ( "1", in.getPayloadHeader ().getTag ( RpmTag.RELEASE ) ); + + Assert.assertEquals ( "noarch", in.getPayloadHeader ().getTag ( RpmTag.ARCH ) ); + Assert.assertEquals ( "linux", in.getPayloadHeader ().getTag ( RpmTag.OS ) ); + Assert.assertEquals ( "EPL", in.getPayloadHeader ().getTag ( RpmTag.LICENSE ) ); + + Assert.assertArrayEquals ( new String[] { // + "/etc/", // + "/etc/eclipsescada/", // + "/etc/profile.d/", // + "/usr/bin/", // + "/usr/", // + "/usr/share/", // + "/usr/share/eclipsescada/", // + "/usr/share/eclipsescada/sql/", // + "/var/log/", // + "/var/run/", // + }, (String[])in.getPayloadHeader ().getTag ( RpmTag.DIRNAMES ) ); + } + } + + @Test + public void test2 () throws IOException + { + try ( final RpmInputStream in = new RpmInputStream ( new BufferedInputStream ( new FileInputStream ( new File ( "data/org.eclipse.scada-centos6-0.2.1-1.noarch.rpm" ) ) ) ) ) + { + dumpAll ( in ); + + } + } + + private void dumpDeps ( final String string, final String[] names, final String[] versions, final Long[] flags ) + { + if ( names == null ) + { + return; + } + + for ( int i = 0; i < names.length; i++ ) + { + System.out.format ( "%s: %s - %s - %s %s%n", string, names[i], versions[i], flags[i], RpmDependencyFlags.parse ( flags[i] ) ); + } + } + + private void dumpHeader ( final String string, final RpmHeader header, final Function func ) + { + System.out.println ( string ); + System.out.println ( "=================================" ); + + for ( final Map.Entry entry : new TreeMap<> ( header.getRawTags () ).entrySet () ) + { + Object tag = func.apply ( entry.getKey () ); + if ( tag == null ) + { + tag = entry.getKey (); + } + System.out.format ( "%20s - %s%n", tag, Rpms.dumpValue ( entry.getValue () ) ); + } + } + + private void dumpEntry ( final CpioArchiveEntry entry ) + { + System.out.format ( "-----------------------------------%n" ); + System.out.format ( " %s%n", entry.getName () ); + System.out.format ( " Size: %s%n", entry.getSize () ); + } +} diff --git a/de.dentrassi.rpm.tests/src/de/dentrassi/rpm/tests/VersionTest.java b/de.dentrassi.rpm.tests/src/de/dentrassi/rpm/tests/VersionTest.java new file mode 100644 index 00000000..092484ed --- /dev/null +++ b/de.dentrassi.rpm.tests/src/de/dentrassi/rpm/tests/VersionTest.java @@ -0,0 +1,57 @@ +/******************************************************************************* + * Copyright (c) 2015 IBH SYSTEMS GmbH. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBH SYSTEMS GmbH - initial API and implementation + *******************************************************************************/ +package de.dentrassi.rpm.tests; + +import org.junit.Assert; +import org.junit.Test; + +import de.dentrassi.rpm.RpmVersion; + +public class VersionTest +{ + @Test + public void test1 () + { + testVersion ( "1.2.3", null, "1.2.3", null ); + } + + @Test + public void test2 () + { + testVersion ( "0:1.2.3", 0, "1.2.3", null ); + } + + @Test + public void test3 () + { + testVersion ( "0:1.2.3-1", 0, "1.2.3", "1" ); + } + + @Test + public void test4 () + { + testVersion ( "1.2.3-1", null, "1.2.3", "1" ); + } + + @Test + public void test5 () + { + testVersion ( "1.2.3-123-456", null, "1.2.3", "123-456" ); + } + + private void testVersion ( final String version, final Integer expectedEpoch, final String expectedVersion, final String expectedRelease ) + { + final RpmVersion v = RpmVersion.valueOf ( version ); + Assert.assertEquals ( "Epoch", expectedEpoch, v.getEpoch () ); + Assert.assertEquals ( "Version", expectedVersion, v.getVersion () ); + Assert.assertEquals ( "Release", expectedRelease, v.getRelease () ); + } +} diff --git a/de.dentrassi.rpm/.classpath b/de.dentrassi.rpm/.classpath new file mode 100644 index 00000000..eca7bdba --- /dev/null +++ b/de.dentrassi.rpm/.classpath @@ -0,0 +1,7 @@ + + + + + + + diff --git a/de.dentrassi.rpm/.gitignore b/de.dentrassi.rpm/.gitignore new file mode 100644 index 00000000..ae3c1726 --- /dev/null +++ b/de.dentrassi.rpm/.gitignore @@ -0,0 +1 @@ +/bin/ diff --git a/de.dentrassi.rpm/.project b/de.dentrassi.rpm/.project new file mode 100644 index 00000000..a910eb94 --- /dev/null +++ b/de.dentrassi.rpm/.project @@ -0,0 +1,28 @@ + + + de.dentrassi.rpm + + + + + + org.eclipse.jdt.core.javabuilder + + + + + org.eclipse.pde.ManifestBuilder + + + + + org.eclipse.pde.SchemaBuilder + + + + + + org.eclipse.pde.PluginNature + org.eclipse.jdt.core.javanature + + diff --git a/de.dentrassi.rpm/.settings/org.eclipse.jdt.core.prefs b/de.dentrassi.rpm/.settings/org.eclipse.jdt.core.prefs new file mode 100644 index 00000000..0c68a61d --- /dev/null +++ b/de.dentrassi.rpm/.settings/org.eclipse.jdt.core.prefs @@ -0,0 +1,7 @@ +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 +org.eclipse.jdt.core.compiler.compliance=1.8 +org.eclipse.jdt.core.compiler.problem.assertIdentifier=error +org.eclipse.jdt.core.compiler.problem.enumIdentifier=error +org.eclipse.jdt.core.compiler.source=1.8 diff --git a/de.dentrassi.rpm/.settings/org.eclipse.pde.core.prefs b/de.dentrassi.rpm/.settings/org.eclipse.pde.core.prefs new file mode 100644 index 00000000..f29e940a --- /dev/null +++ b/de.dentrassi.rpm/.settings/org.eclipse.pde.core.prefs @@ -0,0 +1,3 @@ +eclipse.preferences.version=1 +pluginProject.extensions=false +resolve.requirebundle=false diff --git a/de.dentrassi.rpm/META-INF/MANIFEST.MF b/de.dentrassi.rpm/META-INF/MANIFEST.MF new file mode 100644 index 00000000..1b888e0b --- /dev/null +++ b/de.dentrassi.rpm/META-INF/MANIFEST.MF @@ -0,0 +1,16 @@ +Manifest-Version: 1.0 +Bundle-ManifestVersion: 2 +Bundle-Name: Java RPM Library +Bundle-SymbolicName: de.dentrassi.rpm +Bundle-Version: 1.0.0.qualifier +Bundle-Vendor: IBH SYSTEMS GmbH +Bundle-RequiredExecutionEnvironment: JavaSE-1.8 +Export-Package: de.dentrassi.rpm;version="1.0.0";uses:="org.apache.commons.compress.archivers.cpio" +Import-Package: com.google.common.io;version="18.0.0", + org.apache.commons.compress.archivers.cpio;version="1.9.0", + org.apache.commons.compress.compressors.bzip2;version="1.9.0", + org.apache.commons.compress.compressors.gzip;version="1.9.0", + org.apache.commons.compress.compressors.lzma;version="1.9.0", + org.slf4j;version="1.7.2", + org.tukaani.xz;version="1.3.0", + org.tukaani.xz.lzma;version="1.5.0" diff --git a/de.dentrassi.rpm/build.properties b/de.dentrassi.rpm/build.properties new file mode 100644 index 00000000..34d2e4d2 --- /dev/null +++ b/de.dentrassi.rpm/build.properties @@ -0,0 +1,4 @@ +source.. = src/ +output.. = bin/ +bin.includes = META-INF/,\ + . diff --git a/de.dentrassi.rpm/src/de/dentrassi/rpm/RpmBaseTag.java b/de.dentrassi.rpm/src/de/dentrassi/rpm/RpmBaseTag.java new file mode 100644 index 00000000..6eefdbf3 --- /dev/null +++ b/de.dentrassi.rpm/src/de/dentrassi/rpm/RpmBaseTag.java @@ -0,0 +1,16 @@ +/******************************************************************************* + * Copyright (c) 2015 IBH SYSTEMS GmbH. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBH SYSTEMS GmbH - initial API and implementation + *******************************************************************************/ +package de.dentrassi.rpm; + +public interface RpmBaseTag +{ + public Integer getValue (); +} diff --git a/de.dentrassi.rpm/src/de/dentrassi/rpm/RpmDependencyFlags.java b/de.dentrassi.rpm/src/de/dentrassi/rpm/RpmDependencyFlags.java new file mode 100644 index 00000000..27385d43 --- /dev/null +++ b/de.dentrassi.rpm/src/de/dentrassi/rpm/RpmDependencyFlags.java @@ -0,0 +1,56 @@ +/******************************************************************************* + * Copyright (c) 2015 IBH SYSTEMS GmbH. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBH SYSTEMS GmbH - initial API and implementation + *******************************************************************************/ +package de.dentrassi.rpm; + +import java.util.EnumSet; + +public enum RpmDependencyFlags +{ + LESS ( 1 ), + GREATER ( 2 ), + EQUAL ( 3 ), + PREREQ ( 6 ), + SCRIPT_PRE ( 9 ), + SCRIPT_POST ( 10 ), + RPMLIB ( 24 ); + + private int value; + + private RpmDependencyFlags ( final int bit ) + { + this.value = 1 << bit; + } + + public static EnumSet parse ( final Long flags ) + { + if ( flags == null ) + { + return null; + } + + return parse ( flags.intValue () ); + } + + public static EnumSet parse ( final int flags ) + { + final EnumSet result = EnumSet.noneOf ( RpmDependencyFlags.class ); + + for ( final RpmDependencyFlags f : values () ) + { + if ( ( flags & f.value ) > 0 ) + { + result.add ( f ); + } + } + + return result; + } +} diff --git a/de.dentrassi.rpm/src/de/dentrassi/rpm/RpmEntry.java b/de.dentrassi.rpm/src/de/dentrassi/rpm/RpmEntry.java new file mode 100644 index 00000000..b3869f09 --- /dev/null +++ b/de.dentrassi.rpm/src/de/dentrassi/rpm/RpmEntry.java @@ -0,0 +1,178 @@ +/******************************************************************************* + * Copyright (c) 2015 IBH SYSTEMS GmbH. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBH SYSTEMS GmbH - initial API and implementation + *******************************************************************************/ +package de.dentrassi.rpm; + +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.function.Function; + +public class RpmEntry +{ + private static final class Unknown + { + @Override + public String toString () + { + return "UNKNOWN"; + } + } + + public static final Unknown UNKNOWN = new Unknown (); + + private final int tag; + + private Object value; + + private final int type; + + private final int index; + + private final int count; + + public RpmEntry ( final int tag, final int type, final int index, final int count ) + { + this.tag = tag; + this.type = type; + this.index = index; + this.count = count; + } + + public int getTag () + { + return this.tag; + } + + public Object getValue () + { + return this.value; + } + + void fillFromStore ( final ByteBuffer storeData ) throws IOException + { + switch ( this.type ) + { + case 0: // null value + break; + case 1: // character + this.value = getFromStore ( storeData, buf -> (char)storeData.get (), size -> new Character[size] ); + break; + case 2: // byte + this.value = getFromStore ( storeData, buf -> buf.get (), size -> new Byte[size] ); + break; + case 3: // unsigned 16bit integer + this.value = getFromStore ( storeData, buf -> buf.getShort () & 0xFFFF, size -> new Integer[size] ); + break; + case 4: // unsigned 32bit integer + this.value = getFromStore ( storeData, buf -> (long) ( buf.getInt () & 0xFFFFFFFFL ), size -> new Long[size] ); + break; + case 5: // unsigned 64bit integer + this.value = getFromStore ( storeData, buf -> buf.getLong (), size -> new Long[size] ); + break; + case 6: // one string + { + // only one allowed + storeData.position ( this.index ); + this.value = makeString ( storeData ); + } + break; + case 7: // blob + { + final byte[] data = new byte[this.count]; + storeData.get ( data ); + this.value = data; + } + break; + case 8: // string array + this.value = getFromStore ( storeData, buf -> makeString ( buf ), size -> new String[size] ); + break; + case 9: // i18n string array + this.value = getFromStore ( storeData, buf -> makeString ( buf ), size -> new String[size] ); + break; + default: + this.value = UNKNOWN; + break; + } + } + + @FunctionalInterface + public static interface IOFunction + { + public R apply ( T t ) throws IOException; + } + + private Object getFromStore ( final ByteBuffer data, final IOFunction func, final Function creator ) throws IOException + { + data.position ( this.index ); + if ( this.count == 1 ) + { + return func.apply ( data ); + } + + final R[] result = creator.apply ( this.count ); + for ( int i = 0; i < this.count; i++ ) + { + result[i] = func.apply ( data ); + } + return result; + } + + private static String makeString ( final ByteBuffer buf ) throws IOException + { + final byte[] data = buf.array (); + final int start = buf.position (); + + for ( int i = 0; i < buf.remaining (); i++ ) // check if there is at least one more byte, null byte + { + if ( data[start + i] == 0 ) + { + buf.position ( start + i + 1 ); // skip content plus null byte + return new String ( data, start, i, StandardCharsets.UTF_8 ); + } + } + throw new IOException ( "Corrupt tag entry. Null byte missing!" ); + } + + @Override + public String toString () + { + final StringBuilder sb = new StringBuilder (); + + sb.append ( '[' ); + sb.append ( this.tag ); + sb.append ( " = " ); + + Rpms.dumpValue ( sb, this.value ); + + if ( this.value != null ) + { + if ( this.value != UNKNOWN ) + { + sb.append ( " - " ).append ( this.value.getClass ().getName () ); + } + else + { + sb.append ( " - " ).append ( this.type ); + } + } + else + { + sb.append ( "NULL" ); + } + + sb.append ( " | " ); + sb.append ( this.count ); + sb.append ( ']' ); + + return sb.toString (); + } + +} diff --git a/de.dentrassi.rpm/src/de/dentrassi/rpm/RpmHeader.java b/de.dentrassi.rpm/src/de/dentrassi/rpm/RpmHeader.java new file mode 100644 index 00000000..b013e4c8 --- /dev/null +++ b/de.dentrassi.rpm/src/de/dentrassi/rpm/RpmHeader.java @@ -0,0 +1,74 @@ +/******************************************************************************* + * Copyright (c) 2015 IBH SYSTEMS GmbH. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBH SYSTEMS GmbH - initial API and implementation + *******************************************************************************/ +package de.dentrassi.rpm; + +import java.util.Collections; +import java.util.HashMap; +import java.util.Map; + +public class RpmHeader +{ + private final Map tags; + + private final long start; + + private final long length; + + public RpmHeader ( final RpmEntry[] entries, final long start, final long length ) + { + final Map tags = new HashMap<> ( entries.length ); + for ( final RpmEntry entry : entries ) + { + tags.put ( entry.getTag (), entry.getValue () ); + } + + this.tags = Collections.unmodifiableMap ( tags ); + + this.start = start; + this.length = length; + } + + /** + * Get the start position of the header section in the stream + * + * @return the start position + */ + public long getStart () + { + return this.start; + } + + /** + * Get the length of header section in the stream + * + * @return the length of the header in bytes + */ + public long getLength () + { + return this.length; + } + + public Object getTag ( final T tag ) + { + return this.tags.get ( tag.getValue () ); + } + + public Object getTagOrDefault ( final T tag, final Object defaultValue ) + { + return this.tags.getOrDefault ( tag, defaultValue ); + } + + public Map getRawTags () + { + return this.tags; + } + +} diff --git a/de.dentrassi.rpm/src/de/dentrassi/rpm/RpmInputStream.java b/de.dentrassi.rpm/src/de/dentrassi/rpm/RpmInputStream.java new file mode 100644 index 00000000..a855a158 --- /dev/null +++ b/de.dentrassi.rpm/src/de/dentrassi/rpm/RpmInputStream.java @@ -0,0 +1,316 @@ +/******************************************************************************* + * Copyright (c) 2015 IBH SYSTEMS GmbH. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBH SYSTEMS GmbH - initial API and implementation + *******************************************************************************/ +package de.dentrassi.rpm; + +import java.io.DataInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; + +import org.apache.commons.compress.archivers.cpio.CpioArchiveInputStream; +import org.apache.commons.compress.compressors.bzip2.BZip2CompressorInputStream; +import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.tukaani.xz.LZMAInputStream; +import org.tukaani.xz.XZInputStream; + +import com.google.common.io.CountingInputStream; + +public class RpmInputStream extends InputStream +{ + private final static Logger logger = LoggerFactory.getLogger ( RpmInputStream.class ); + + private static final byte[] LEAD_MAGIC = new byte[] { (byte)0xED, (byte)0xAB, (byte)0xEE, (byte)0xDB }; + + private static final byte[] HEADER_MAGIC = new byte[] { (byte)0x8E, (byte)0xAD, (byte)0xE8 }; + + private static final byte[] DUMMY = new byte[128]; + + private final DataInputStream in; + + private boolean closed; + + private RpmLead lead; + + private RpmHeader signatureHeader; + + private RpmHeader payloadHeader; + + private InputStream playloadStream; + + private CpioArchiveInputStream cpioStream; + + private final CountingInputStream count; + + public RpmInputStream ( final InputStream in ) + { + this.count = new CountingInputStream ( in ); + this.in = new DataInputStream ( this.count ); + } + + @Override + public void close () throws IOException + { + if ( !this.closed ) + { + this.in.close (); + this.closed = true; + } + } + + protected void ensureInit () throws IOException + { + if ( this.lead == null ) + { + this.lead = readLead (); + } + + if ( this.signatureHeader == null ) + { + this.signatureHeader = readHeader ( true ); + } + + if ( this.payloadHeader == null ) + { + this.payloadHeader = readHeader ( false ); + } + + // set up content stream + + if ( this.playloadStream == null ) + { + this.playloadStream = setupPayloadStream (); + this.cpioStream = new CpioArchiveInputStream ( this.playloadStream ); // we did ensure that we only support CPIO before + } + } + + private InputStream setupPayloadStream () throws IOException + { + final Object payloadFormatValue = this.payloadHeader.getRawTags ().get ( RpmTag.PAYLOAD_FORMAT.getValue () ); + final Object payloadCodingValue = this.payloadHeader.getRawTags ().get ( RpmTag.PAYLOAD_CODING.getValue () ); + + if ( payloadFormatValue != null && ! ( payloadFormatValue instanceof String ) ) + { + throw new IOException ( "Payload format must be a single string" ); + } + + if ( payloadFormatValue != null && ! ( payloadCodingValue instanceof String ) ) + { + throw new IOException ( "Payload coding must be a single string" ); + } + + String payloadFormat = (String)payloadFormatValue; + String payloadCoding = (String)payloadCodingValue; + + if ( payloadFormat == null || payloadFormat.isEmpty () ) + { + payloadFormat = "cpio"; + } + + if ( payloadCoding == null || payloadCoding.isEmpty () ) + { + payloadCoding = "gzip"; + } + + if ( !"cpio".equals ( payloadFormat ) ) + { + throw new IOException ( String.format ( "Unknown payload format: %s", payloadFormat ) ); + } + + switch ( payloadCoding ) + { + case "none": + return this.in; + case "gzip": + return new GzipCompressorInputStream ( this.in ); + case "bzip2": + return new BZip2CompressorInputStream ( this.in ); + case "lzma": + return new LZMAInputStream ( this.in ); + case "xz": + return new XZInputStream ( this.in ); + default: + throw new IOException ( String.format ( "Unknown coding: %s", payloadCoding ) ); + } + } + + public CpioArchiveInputStream getCpioStream () + { + return this.cpioStream; + } + + public RpmLead getLead () throws IOException + { + ensureInit (); + return this.lead; + } + + public RpmHeader getSignatureHeader () throws IOException + { + ensureInit (); + return this.signatureHeader; + } + + public RpmHeader getPayloadHeader () throws IOException + { + ensureInit (); + return this.payloadHeader; + } + + protected RpmLead readLead () throws IOException + { + final byte[] magic = readComplete ( 4 ); + + if ( !Arrays.equals ( magic, LEAD_MAGIC ) ) + { + throw new IOException ( String.format ( "File corrupt: Expected magic %s, read: %s", Arrays.toString ( LEAD_MAGIC ), Arrays.toString ( magic ) ) ); + } + + final byte[] version = readComplete ( 2 ); + + skipFully ( 4 ); // TYPE + ARCH + + final byte[] nameData = readComplete ( 66 ); // NAME + + final String name = StandardCharsets.UTF_8.decode ( ByteBuffer.wrap ( nameData ) ).toString (); + + skipFully ( 2 ); // OS + + final int sigType = this.in.readUnsignedShort (); + + skipFully ( 16 ); // RESERVED + + return new RpmLead ( version[0], version[1], name, sigType ); + } + + protected RpmHeader readHeader ( final boolean withPadding ) throws IOException + { + final long start = this.count.getCount (); + + final byte[] magic = readComplete ( 3 ); + + if ( !Arrays.equals ( magic, HEADER_MAGIC ) ) + { + throw new IOException ( String.format ( "File corrupt: Expected entry magic %s, read: %s", Arrays.toString ( HEADER_MAGIC ), Arrays.toString ( magic ) ) ); + } + + final byte version = this.in.readByte (); + if ( version != 1 ) + { + throw new IOException ( String.format ( "File corrupt: Invalid header entry version: %s (valid: 1)", version ) ); + } + + skipFully ( 4 ); // RESERVED + + final int indexCount = this.in.readInt (); + final long storeSize = this.in.readInt () & 0xFFFFFFFF; + + final RpmEntry[] entries = new RpmEntry[indexCount]; + + for ( int i = 0; i < indexCount; i++ ) + { + entries[i] = readEntry (); + } + + final ByteBuffer store = ByteBuffer.wrap ( readComplete ( (int)storeSize ) ); // FIXME: bad casting ... + + for ( int i = 0; i < indexCount; i++ ) + { + entries[i].fillFromStore ( store ); + } + + if ( withPadding ) + { + // pad remaining bytes - to 8 + + final long rem = storeSize % 8; + if ( rem > 0 ) + { + final int skip = (int) ( 8 - rem ); + logger.debug ( "Skipping {} pad bytes", skip ); + skipFully ( skip ); + } + } + + final long end = this.count.getCount (); + + return new RpmHeader ( entries, start, end - start ); + } + + private RpmEntry readEntry () throws IOException + { + final int tag = this.in.readInt (); + final int type = this.in.readInt (); + final int offset = this.in.readInt (); + final int count = this.in.readInt (); + + return new RpmEntry ( tag, type, offset, count ); + } + + private byte[] readComplete ( final int size ) throws IOException + { + final byte[] result = new byte[size]; + this.in.readFully ( result ); + return result; + } + + private void skipFully ( final int count ) throws IOException + { + this.in.readFully ( DUMMY, 0, count ); + } + + // forward methods + + @Override + public void reset () throws IOException + { + ensureInit (); + this.playloadStream.reset (); + } + + @Override + public int read () throws IOException + { + ensureInit (); + return this.playloadStream.read (); + } + + @Override + public long skip ( final long n ) throws IOException + { + ensureInit (); + return this.playloadStream.skip ( n ); + } + + @Override + public int available () throws IOException + { + ensureInit (); + return this.playloadStream.available (); + } + + @Override + public int read ( final byte[] b ) throws IOException + { + ensureInit (); + return this.playloadStream.read ( b ); + } + + @Override + public int read ( final byte[] b, final int off, final int len ) throws IOException + { + return this.playloadStream.read ( b, off, len ); + } + +} diff --git a/de.dentrassi.rpm/src/de/dentrassi/rpm/RpmLead.java b/de.dentrassi.rpm/src/de/dentrassi/rpm/RpmLead.java new file mode 100644 index 00000000..9712d795 --- /dev/null +++ b/de.dentrassi.rpm/src/de/dentrassi/rpm/RpmLead.java @@ -0,0 +1,50 @@ +/******************************************************************************* + * Copyright (c) 2015 IBH SYSTEMS GmbH. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBH SYSTEMS GmbH - initial API and implementation + *******************************************************************************/ +package de.dentrassi.rpm; + +public class RpmLead +{ + private final byte major; + + private final byte minor; + + private final String name; + + private final int signatureVersion; + + public RpmLead ( final byte major, final byte minor, final String name, final int signatureVersion ) + { + this.major = major; + this.minor = minor; + this.name = name; + this.signatureVersion = signatureVersion; + } + + public byte getMajor () + { + return this.major; + } + + public byte getMinor () + { + return this.minor; + } + + public String getName () + { + return this.name; + } + + public int getSignatureVersion () + { + return this.signatureVersion; + } +} diff --git a/de.dentrassi.rpm/src/de/dentrassi/rpm/RpmSignatureTag.java b/de.dentrassi.rpm/src/de/dentrassi/rpm/RpmSignatureTag.java new file mode 100644 index 00000000..4f65cd27 --- /dev/null +++ b/de.dentrassi.rpm/src/de/dentrassi/rpm/RpmSignatureTag.java @@ -0,0 +1,51 @@ +/******************************************************************************* + * Copyright (c) 2015 IBH SYSTEMS GmbH. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBH SYSTEMS GmbH - initial API and implementation + *******************************************************************************/ +package de.dentrassi.rpm; + +import java.util.HashMap; +import java.util.Map; + +public enum RpmSignatureTag implements RpmBaseTag +{ + SHA1 ( 269 ), + + SIZE ( 1000 ), + MD5 ( 1004 ), + PAYLOAD_SIZE ( 1007 ); + + private Integer value; + + private RpmSignatureTag ( final Integer value ) + { + this.value = value; + } + + @Override + public Integer getValue () + { + return this.value; + } + + private final static Map all = new HashMap<> ( RpmSignatureTag.values ().length ); + + static + { + for ( final RpmSignatureTag tag : values () ) + { + all.put ( tag.getValue (), tag ); + } + } + + public static RpmSignatureTag find ( final Integer value ) + { + return all.get ( value ); + } +} diff --git a/de.dentrassi.rpm/src/de/dentrassi/rpm/RpmTag.java b/de.dentrassi.rpm/src/de/dentrassi/rpm/RpmTag.java new file mode 100644 index 00000000..10555427 --- /dev/null +++ b/de.dentrassi.rpm/src/de/dentrassi/rpm/RpmTag.java @@ -0,0 +1,84 @@ +/******************************************************************************* + * Copyright (c) 2015 IBH SYSTEMS GmbH. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBH SYSTEMS GmbH - initial API and implementation + *******************************************************************************/ +package de.dentrassi.rpm; + +import java.util.HashMap; +import java.util.Map; + +public enum RpmTag implements RpmBaseTag +{ + NAME ( 1000 ), + VERSION ( 1001 ), + RELEASE ( 1002 ), + EPOCH ( 1003 ), + SUMMARY ( 1004 ), + DESCRIPTION ( 1005 ), + BUILDTIME ( 1006 ), + BUILDHOST ( 1007 ), + INSTALLED_SIZE ( 1009 ), + DISTRIBUTION ( 1010 ), + VENDOR ( 1011 ), + LICENSE ( 1014 ), + PACKAGER ( 1015 ), + GROUP ( 1016 ), + URL ( 1020 ), + OS ( 1021 ), + ARCH ( 1022 ), + SOURCE_PACKAGE ( 1044 ), + ARCHIVE_SIZE ( 1046 ), + PROVIDE_NAME ( 1047 ), + REQUIRE_FLAGS ( 1048 ), + REQUIRE_NAME ( 1049 ), + REQUIRE_VERSION ( 1050 ), + CONFLICT_FLAGS ( 1053 ), + CONFLICT_NAME ( 1054 ), + CONFLICT_VERSION ( 1055 ), + CHANGELOG_TIMESTAMP ( 1080 ), + CHANGELOG_AUTHOR ( 1081 ), + CHANGELOG_TEXT ( 1082 ), + OBSOLETE_NAME ( 1090 ), + PROVIDE_FLAGS ( 1112 ), + PROVIDE_VERSION ( 1113 ), + OBSOLETE_FLAGS ( 1114 ), + OBSOLETE_VERSION ( 1115 ), + BASENAMES ( 1117 ), + DIRNAMES ( 1118 ), + PAYLOAD_FORMAT ( 1124 ), + PAYLOAD_CODING ( 1125 ); + + private Integer value; + + private RpmTag ( final Integer value ) + { + this.value = value; + } + + @Override + public Integer getValue () + { + return this.value; + } + + private final static Map all = new HashMap<> ( RpmTag.values ().length ); + + static + { + for ( final RpmTag tag : values () ) + { + all.put ( tag.getValue (), tag ); + } + } + + public static RpmTag find ( final Integer value ) + { + return all.get ( value ); + } +} diff --git a/de.dentrassi.rpm/src/de/dentrassi/rpm/RpmTagValue.java b/de.dentrassi.rpm/src/de/dentrassi/rpm/RpmTagValue.java new file mode 100644 index 00000000..6137f695 --- /dev/null +++ b/de.dentrassi.rpm/src/de/dentrassi/rpm/RpmTagValue.java @@ -0,0 +1,122 @@ +/******************************************************************************* + * Copyright (c) 2015 IBH SYSTEMS GmbH. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBH SYSTEMS GmbH - initial API and implementation + *******************************************************************************/ +package de.dentrassi.rpm; + +import java.util.Optional; + +public class RpmTagValue +{ + private final Object value; + + public RpmTagValue ( final Object value ) + { + this.value = value; + } + + public Object getValue () + { + return this.value; + } + + public Optional asStringArray () + { + if ( this.value == null ) + { + return Optional.empty (); + } + + if ( this.value instanceof String ) + { + return Optional.of ( new String[] { (String)this.value } ); + } + if ( this.value instanceof String[] ) + { + return Optional.of ( (String[])this.value ); + } + + return Optional.empty (); + } + + public Optional asString () + { + if ( this.value == null ) + { + return Optional.empty (); + } + + if ( this.value instanceof String ) + { + return Optional.of ( (String)this.value ); + } + + if ( this.value instanceof String[] ) + { + final String[] arr = (String[])this.value; + if ( arr.length > 0 ) + { + return Optional.of ( arr[0] ); + } + else + { + return Optional.empty (); + } + } + + return Optional.empty (); + } + + public Optional asLongArray () + { + if ( this.value == null ) + { + return Optional.empty (); + } + + if ( this.value instanceof Long ) + { + return Optional.of ( new Long[] { (Long)this.value } ); + } + if ( this.value instanceof Long[] ) + { + return Optional.of ( (Long[])this.value ); + } + + return Optional.empty (); + } + + public Optional asLong () + { + if ( this.value == null ) + { + return Optional.empty (); + } + + if ( this.value instanceof Number ) + { + return Optional.of ( ( (Number)this.value ).longValue () ); + } + + if ( this.value instanceof Long[] ) + { + final Long[] arr = (Long[])this.value; + if ( arr.length > 0 ) + { + return Optional.of ( arr[0] ); + } + else + { + return Optional.empty (); + } + } + + return Optional.empty (); + } +} diff --git a/de.dentrassi.rpm/src/de/dentrassi/rpm/RpmVersion.java b/de.dentrassi.rpm/src/de/dentrassi/rpm/RpmVersion.java new file mode 100644 index 00000000..62ef5837 --- /dev/null +++ b/de.dentrassi.rpm/src/de/dentrassi/rpm/RpmVersion.java @@ -0,0 +1,90 @@ +/******************************************************************************* + * Copyright (c) 2015 IBH SYSTEMS GmbH. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBH SYSTEMS GmbH - initial API and implementation + *******************************************************************************/ +package de.dentrassi.rpm; + +import java.util.Optional; + +public class RpmVersion +{ + private final Optional epoch; + + private final String version; + + private final Optional release; + + public RpmVersion ( final Integer epoch, final String version, final String release ) + { + this.epoch = Optional.ofNullable ( epoch ); + this.version = version; + this.release = Optional.ofNullable ( release ); + } + + public Optional getEpoch () + { + return this.epoch; + } + + public String getVersion () + { + return this.version; + } + + public Optional getRelease () + { + return this.release; + } + + @Override + public String toString () + { + final StringBuilder sb = new StringBuilder (); + + this.epoch.ifPresent ( v -> sb.append ( v ).append ( ':' ) ); + + sb.append ( this.version ); + + if ( this.release.isPresent () && !this.release.get ().isEmpty () ) + { + sb.append ( '-' ).append ( this.release ); + } + + return sb.toString (); + } + + public static RpmVersion valueOf ( final String version ) + { + if ( version == null || version.isEmpty () ) + { + return null; + } + + final String[] toks1 = version.split ( ":", 2 ); + + final String n; + Integer epoch = null; + if ( toks1.length > 1 ) + { + epoch = Integer.parseInt ( toks1[0] ); + n = toks1[1]; + } + else + { + n = toks1[0]; + } + + final String[] toks2 = n.split ( "-", 2 ); + + final String ver = toks2[0]; + final String rel = toks2.length > 1 ? toks2[1] : null; + + return new RpmVersion ( epoch, ver, rel ); + } +} diff --git a/de.dentrassi.rpm/src/de/dentrassi/rpm/Rpms.java b/de.dentrassi.rpm/src/de/dentrassi/rpm/Rpms.java new file mode 100644 index 00000000..ba74667a --- /dev/null +++ b/de.dentrassi.rpm/src/de/dentrassi/rpm/Rpms.java @@ -0,0 +1,76 @@ +/******************************************************************************* + * Copyright (c) 2015 IBH SYSTEMS GmbH. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBH SYSTEMS GmbH - initial API and implementation + *******************************************************************************/ +package de.dentrassi.rpm; + +import java.util.Arrays; + +public class Rpms +{ + private final static char[] HEX = "0123456789ABCDEF".toCharArray (); + + public static String toHex ( final byte[] data, final int maxWidth ) + { + return toHex ( data, 0, data.length, maxWidth ); + } + + public static String toHex ( final byte[] data, final int offset, final int length, final int maxWidth ) + { + final StringBuilder sb = new StringBuilder ( length * 2 ); // not considering line breaks + + int lc = 0; + for ( int i = 0; i < length; i++ ) + { + if ( maxWidth > 0 && lc >= maxWidth ) + { + sb.append ( System.lineSeparator () ); + lc = 0; + } + + final int b = data[offset + i] & 0xFF; + sb.append ( HEX[b >>> 4] ); + sb.append ( HEX[b & 0x0F] ); + lc++; + } + + return sb.toString (); + } + + public static String dumpValue ( final Object value ) + { + final StringBuilder sb = new StringBuilder (); + dumpValue ( sb, value ); + return sb.toString (); + } + + public static void dumpValue ( final StringBuilder sb, final Object value ) + { + if ( value != null ) + { + if ( value instanceof byte[] ) + { + sb.append ( toHex ( (byte[])value, -1 ) ); + } + else if ( value.getClass ().isArray () ) + { + sb.append ( Arrays.toString ( (Object[])value ) ); + } + else + { + sb.append ( value ); + } + } + else + { + sb.append ( "null" ); + } + } + +} diff --git a/de.dentrassi.rpm/src/de/dentrassi/rpm/package-info.java b/de.dentrassi.rpm/src/de/dentrassi/rpm/package-info.java new file mode 100644 index 00000000..6401e23c --- /dev/null +++ b/de.dentrassi.rpm/src/de/dentrassi/rpm/package-info.java @@ -0,0 +1,13 @@ +/******************************************************************************* + * Copyright (c) 2015 IBH SYSTEMS GmbH. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * Contributors: + * IBH SYSTEMS GmbH - initial API and implementation + *******************************************************************************/ +/** + * Core RPM functionalities + */ +package de.dentrassi.rpm; diff --git a/org.tukani.xz/.classpath b/org.tukani.xz/.classpath new file mode 100644 index 00000000..eca7bdba --- /dev/null +++ b/org.tukani.xz/.classpath @@ -0,0 +1,7 @@ + + + + + + + diff --git a/org.tukani.xz/.gitignore b/org.tukani.xz/.gitignore new file mode 100644 index 00000000..ae3c1726 --- /dev/null +++ b/org.tukani.xz/.gitignore @@ -0,0 +1 @@ +/bin/ diff --git a/org.tukani.xz/.project b/org.tukani.xz/.project new file mode 100644 index 00000000..8de38502 --- /dev/null +++ b/org.tukani.xz/.project @@ -0,0 +1,28 @@ + + + org.tukani.xz + + + + + + org.eclipse.jdt.core.javabuilder + + + + + org.eclipse.pde.ManifestBuilder + + + + + org.eclipse.pde.SchemaBuilder + + + + + + org.eclipse.pde.PluginNature + org.eclipse.jdt.core.javanature + + diff --git a/org.tukani.xz/.settings/org.eclipse.jdt.core.prefs b/org.tukani.xz/.settings/org.eclipse.jdt.core.prefs new file mode 100644 index 00000000..0c68a61d --- /dev/null +++ b/org.tukani.xz/.settings/org.eclipse.jdt.core.prefs @@ -0,0 +1,7 @@ +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.8 +org.eclipse.jdt.core.compiler.compliance=1.8 +org.eclipse.jdt.core.compiler.problem.assertIdentifier=error +org.eclipse.jdt.core.compiler.problem.enumIdentifier=error +org.eclipse.jdt.core.compiler.source=1.8 diff --git a/org.tukani.xz/.settings/org.eclipse.pde.core.prefs b/org.tukani.xz/.settings/org.eclipse.pde.core.prefs new file mode 100644 index 00000000..f29e940a --- /dev/null +++ b/org.tukani.xz/.settings/org.eclipse.pde.core.prefs @@ -0,0 +1,3 @@ +eclipse.preferences.version=1 +pluginProject.extensions=false +resolve.requirebundle=false diff --git a/org.tukani.xz/META-INF/MANIFEST.MF b/org.tukani.xz/META-INF/MANIFEST.MF new file mode 100644 index 00000000..893e31a0 --- /dev/null +++ b/org.tukani.xz/META-INF/MANIFEST.MF @@ -0,0 +1,16 @@ +Manifest-Version: 1.0 +Bundle-ManifestVersion: 2 +Bundle-Name: XZ for Java +Bundle-SymbolicName: org.tukani.xz +Bundle-Version: 1.5.0.qualifier +Bundle-Vendor: IBH SYSTEMS GmbH +Bundle-RequiredExecutionEnvironment: JavaSE-1.8 +Export-Package: org.tukaani.xz;version="1.5.0";uses:="org.tukaani.xz.simple,org.tukaani.xz.check", + org.tukaani.xz.check;version="1.5.0", + org.tukaani.xz.common;version="1.5.0", + org.tukaani.xz.delta;version="1.5.0", + org.tukaani.xz.index;version="1.5.0";uses:="org.tukaani.xz.common,org.tukaani.xz", + org.tukaani.xz.lz;version="1.5.0", + org.tukaani.xz.lzma;version="1.5.0";uses:="org.tukaani.xz.lz,org.tukaani.xz.rangecoder", + org.tukaani.xz.rangecoder;version="1.5.0", + org.tukaani.xz.simple;version="1.5.0" diff --git a/org.tukani.xz/build.properties b/org.tukani.xz/build.properties new file mode 100644 index 00000000..34d2e4d2 --- /dev/null +++ b/org.tukani.xz/build.properties @@ -0,0 +1,4 @@ +source.. = src/ +output.. = bin/ +bin.includes = META-INF/,\ + . diff --git a/org.tukani.xz/src/org/tukaani/xz/ARMOptions.java b/org.tukani.xz/src/org/tukaani/xz/ARMOptions.java new file mode 100644 index 00000000..9577101b --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/ARMOptions.java @@ -0,0 +1,36 @@ +/* + * ARMOptions + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; +import org.tukaani.xz.simple.ARM; + +/** + * BCJ filter for little endian ARM instructions. + */ +public class ARMOptions extends BCJOptions { + private static final int ALIGNMENT = 4; + + public ARMOptions() { + super(ALIGNMENT); + } + + public FinishableOutputStream getOutputStream(FinishableOutputStream out) { + return new SimpleOutputStream(out, new ARM(true, startOffset)); + } + + public InputStream getInputStream(InputStream in) { + return new SimpleInputStream(in, new ARM(false, startOffset)); + } + + FilterEncoder getFilterEncoder() { + return new BCJEncoder(this, BCJCoder.ARM_FILTER_ID); + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/ARMThumbOptions.java b/org.tukani.xz/src/org/tukaani/xz/ARMThumbOptions.java new file mode 100644 index 00000000..60eb6ec0 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/ARMThumbOptions.java @@ -0,0 +1,36 @@ +/* + * ARMThumbOptions + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; +import org.tukaani.xz.simple.ARMThumb; + +/** + * BCJ filter for little endian ARM-Thumb instructions. + */ +public class ARMThumbOptions extends BCJOptions { + private static final int ALIGNMENT = 2; + + public ARMThumbOptions() { + super(ALIGNMENT); + } + + public FinishableOutputStream getOutputStream(FinishableOutputStream out) { + return new SimpleOutputStream(out, new ARMThumb(true, startOffset)); + } + + public InputStream getInputStream(InputStream in) { + return new SimpleInputStream(in, new ARMThumb(false, startOffset)); + } + + FilterEncoder getFilterEncoder() { + return new BCJEncoder(this, BCJCoder.ARMTHUMB_FILTER_ID); + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/BCJCoder.java b/org.tukani.xz/src/org/tukaani/xz/BCJCoder.java new file mode 100644 index 00000000..81862f7d --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/BCJCoder.java @@ -0,0 +1,35 @@ +/* + * BCJCoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +abstract class BCJCoder implements FilterCoder { + public static final long X86_FILTER_ID = 0x04; + public static final long POWERPC_FILTER_ID = 0x05; + public static final long IA64_FILTER_ID = 0x06; + public static final long ARM_FILTER_ID = 0x07; + public static final long ARMTHUMB_FILTER_ID = 0x08; + public static final long SPARC_FILTER_ID = 0x09; + + public static boolean isBCJFilterID(long filterID) { + return filterID >= 0x04 && filterID <= 0x09; + } + + public boolean changesSize() { + return false; + } + + public boolean nonLastOK() { + return true; + } + + public boolean lastOK() { + return false; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/BCJDecoder.java b/org.tukani.xz/src/org/tukaani/xz/BCJDecoder.java new file mode 100644 index 00000000..f8a6ae22 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/BCJDecoder.java @@ -0,0 +1,62 @@ +/* + * BCJDecoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; +import org.tukaani.xz.simple.*; + +class BCJDecoder extends BCJCoder implements FilterDecoder { + private final long filterID; + private final int startOffset; + + BCJDecoder(long filterID, byte[] props) + throws UnsupportedOptionsException { + assert isBCJFilterID(filterID); + this.filterID = filterID; + + if (props.length == 0) { + startOffset = 0; + } else if (props.length == 4) { + int n = 0; + for (int i = 0; i < 4; ++i) + n |= (props[i] & 0xFF) << (i * 8); + + startOffset = n; + } else { + throw new UnsupportedOptionsException( + "Unsupported BCJ filter properties"); + } + } + + public int getMemoryUsage() { + return SimpleInputStream.getMemoryUsage(); + } + + public InputStream getInputStream(InputStream in) { + SimpleFilter simpleFilter = null; + + if (filterID == X86_FILTER_ID) + simpleFilter = new X86(false, startOffset); + else if (filterID == POWERPC_FILTER_ID) + simpleFilter = new PowerPC(false, startOffset); + else if (filterID == IA64_FILTER_ID) + simpleFilter = new IA64(false, startOffset); + else if (filterID == ARM_FILTER_ID) + simpleFilter = new ARM(false, startOffset); + else if (filterID == ARMTHUMB_FILTER_ID) + simpleFilter = new ARMThumb(false, startOffset); + else if (filterID == SPARC_FILTER_ID) + simpleFilter = new SPARC(false, startOffset); + else + assert false; + + return new SimpleInputStream(in, simpleFilter); + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/BCJEncoder.java b/org.tukani.xz/src/org/tukaani/xz/BCJEncoder.java new file mode 100644 index 00000000..136bbb70 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/BCJEncoder.java @@ -0,0 +1,48 @@ +/* + * BCJEncoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +class BCJEncoder extends BCJCoder implements FilterEncoder { + private final BCJOptions options; + private final long filterID; + private final byte[] props; + + BCJEncoder(BCJOptions options, long filterID) { + assert isBCJFilterID(filterID); + int startOffset = options.getStartOffset(); + + if (startOffset == 0) { + props = new byte[0]; + } else { + props = new byte[4]; + for (int i = 0; i < 4; ++i) + props[i] = (byte)(startOffset >>> (i * 8)); + } + + this.filterID = filterID; + this.options = (BCJOptions)options.clone(); + } + + public long getFilterID() { + return filterID; + } + + public byte[] getFilterProps() { + return props; + } + + public boolean supportsFlushing() { + return false; + } + + public FinishableOutputStream getOutputStream(FinishableOutputStream out) { + return options.getOutputStream(out); + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/BCJOptions.java b/org.tukani.xz/src/org/tukaani/xz/BCJOptions.java new file mode 100644 index 00000000..705a2c08 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/BCJOptions.java @@ -0,0 +1,57 @@ +/* + * BCJOptions + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +abstract class BCJOptions extends FilterOptions { + private final int alignment; + int startOffset = 0; + + BCJOptions(int alignment) { + this.alignment = alignment; + } + + /** + * Sets the start offset for the address conversions. + * Normally this is useless so you shouldn't use this function. + * The default value is 0. + */ + public void setStartOffset(int startOffset) + throws UnsupportedOptionsException { + if ((startOffset & (alignment - 1)) != 0) + throw new UnsupportedOptionsException( + "Start offset must be a multiple of " + alignment); + + this.startOffset = startOffset; + } + + /** + * Gets the start offset. + */ + public int getStartOffset() { + return startOffset; + } + + public int getEncoderMemoryUsage() { + return SimpleOutputStream.getMemoryUsage(); + } + + public int getDecoderMemoryUsage() { + return SimpleInputStream.getMemoryUsage(); + } + + public Object clone() { + try { + return super.clone(); + } catch (CloneNotSupportedException e) { + assert false; + throw new RuntimeException(); + } + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/BlockInputStream.java b/org.tukani.xz/src/org/tukaani/xz/BlockInputStream.java new file mode 100644 index 00000000..12a8b813 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/BlockInputStream.java @@ -0,0 +1,278 @@ +/* + * BlockInputStream + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; +import java.io.DataInputStream; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.util.Arrays; +import org.tukaani.xz.common.DecoderUtil; +import org.tukaani.xz.check.Check; + +class BlockInputStream extends InputStream { + private final DataInputStream inData; + private final CountingInputStream inCounted; + private InputStream filterChain; + private final Check check; + + private long uncompressedSizeInHeader = -1; + private long compressedSizeInHeader = -1; + private long compressedSizeLimit; + private final int headerSize; + private long uncompressedSize = 0; + private boolean endReached = false; + + private final byte[] tempBuf = new byte[1]; + + public BlockInputStream(InputStream in, Check check, int memoryLimit, + long unpaddedSizeInIndex, + long uncompressedSizeInIndex) + throws IOException, IndexIndicatorException { + this.check = check; + inData = new DataInputStream(in); + + byte[] buf = new byte[DecoderUtil.BLOCK_HEADER_SIZE_MAX]; + + // Block Header Size or Index Indicator + inData.readFully(buf, 0, 1); + + // See if this begins the Index field. + if (buf[0] == 0x00) + throw new IndexIndicatorException(); + + // Read the rest of the Block Header. + headerSize = 4 * ((buf[0] & 0xFF) + 1); + inData.readFully(buf, 1, headerSize - 1); + + // Validate the CRC32. + if (!DecoderUtil.isCRC32Valid(buf, 0, headerSize - 4, headerSize - 4)) + throw new CorruptedInputException("XZ Block Header is corrupt"); + + // Check for reserved bits in Block Flags. + if ((buf[1] & 0x3C) != 0) + throw new UnsupportedOptionsException( + "Unsupported options in XZ Block Header"); + + // Memory for the Filter Flags field + int filterCount = (buf[1] & 0x03) + 1; + long[] filterIDs = new long[filterCount]; + byte[][] filterProps = new byte[filterCount][]; + + // Use a stream to parse the fields after the Block Flags field. + // Exclude the CRC32 field at the end. + ByteArrayInputStream bufStream = new ByteArrayInputStream( + buf, 2, headerSize - 6); + + try { + // Set the maximum valid compressed size. This is overriden + // by the value from the Compressed Size field if it is present. + compressedSizeLimit = (DecoderUtil.VLI_MAX & ~3) + - headerSize - check.getSize(); + + // Decode and validate Compressed Size if the relevant flag + // is set in Block Flags. + if ((buf[1] & 0x40) != 0x00) { + compressedSizeInHeader = DecoderUtil.decodeVLI(bufStream); + + if (compressedSizeInHeader == 0 + || compressedSizeInHeader > compressedSizeLimit) + throw new CorruptedInputException(); + + compressedSizeLimit = compressedSizeInHeader; + } + + // Decode Uncompressed Size if the relevant flag is set + // in Block Flags. + if ((buf[1] & 0x80) != 0x00) + uncompressedSizeInHeader = DecoderUtil.decodeVLI(bufStream); + + // Decode Filter Flags. + for (int i = 0; i < filterCount; ++i) { + filterIDs[i] = DecoderUtil.decodeVLI(bufStream); + + long filterPropsSize = DecoderUtil.decodeVLI(bufStream); + if (filterPropsSize > bufStream.available()) + throw new CorruptedInputException(); + + filterProps[i] = new byte[(int)filterPropsSize]; + bufStream.read(filterProps[i]); + } + + } catch (IOException e) { + throw new CorruptedInputException("XZ Block Header is corrupt"); + } + + // Check that the remaining bytes are zero. + for (int i = bufStream.available(); i > 0; --i) + if (bufStream.read() != 0x00) + throw new UnsupportedOptionsException( + "Unsupported options in XZ Block Header"); + + // Validate the Blcok Header against the Index when doing + // random access reading. + if (unpaddedSizeInIndex != -1) { + // Compressed Data must be at least one byte, so if Block Header + // and Check alone take as much or more space than the size + // stored in the Index, the file is corrupt. + int headerAndCheckSize = headerSize + check.getSize(); + if (headerAndCheckSize >= unpaddedSizeInIndex) + throw new CorruptedInputException( + "XZ Index does not match a Block Header"); + + // The compressed size calculated from Unpadded Size must + // match the value stored in the Compressed Size field in + // the Block Header. + long compressedSizeFromIndex + = unpaddedSizeInIndex - headerAndCheckSize; + if (compressedSizeFromIndex > compressedSizeLimit + || (compressedSizeInHeader != -1 + && compressedSizeInHeader != compressedSizeFromIndex)) + throw new CorruptedInputException( + "XZ Index does not match a Block Header"); + + // The uncompressed size stored in the Index must match + // the value stored in the Uncompressed Size field in + // the Block Header. + if (uncompressedSizeInHeader != -1 + && uncompressedSizeInHeader != uncompressedSizeInIndex) + throw new CorruptedInputException( + "XZ Index does not match a Block Header"); + + // For further validation, pretend that the values from the Index + // were stored in the Block Header. + compressedSizeLimit = compressedSizeFromIndex; + compressedSizeInHeader = compressedSizeFromIndex; + uncompressedSizeInHeader = uncompressedSizeInIndex; + } + + // Check if the Filter IDs are supported, decode + // the Filter Properties, and check that they are + // supported by this decoder implementation. + FilterDecoder[] filters = new FilterDecoder[filterIDs.length]; + + for (int i = 0; i < filters.length; ++i) { + if (filterIDs[i] == LZMA2Coder.FILTER_ID) + filters[i] = new LZMA2Decoder(filterProps[i]); + + else if (filterIDs[i] == DeltaCoder.FILTER_ID) + filters[i] = new DeltaDecoder(filterProps[i]); + + else if (BCJDecoder.isBCJFilterID(filterIDs[i])) + filters[i] = new BCJDecoder(filterIDs[i], filterProps[i]); + + else + throw new UnsupportedOptionsException( + "Unknown Filter ID " + filterIDs[i]); + } + + RawCoder.validate(filters); + + // Check the memory usage limit. + if (memoryLimit >= 0) { + int memoryNeeded = 0; + for (int i = 0; i < filters.length; ++i) + memoryNeeded += filters[i].getMemoryUsage(); + + if (memoryNeeded > memoryLimit) + throw new MemoryLimitException(memoryNeeded, memoryLimit); + } + + // Use an input size counter to calculate + // the size of the Compressed Data field. + inCounted = new CountingInputStream(in); + + // Initialize the filter chain. + filterChain = inCounted; + for (int i = filters.length - 1; i >= 0; --i) + filterChain = filters[i].getInputStream(filterChain); + } + + public int read() throws IOException { + return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF); + } + + public int read(byte[] buf, int off, int len) throws IOException { + if (endReached) + return -1; + + int ret = filterChain.read(buf, off, len); + + if (ret > 0) { + check.update(buf, off, ret); + uncompressedSize += ret; + + // Catch invalid values. + long compressedSize = inCounted.getSize(); + if (compressedSize < 0 + || compressedSize > compressedSizeLimit + || uncompressedSize < 0 + || (uncompressedSizeInHeader != -1 + && uncompressedSize > uncompressedSizeInHeader)) + throw new CorruptedInputException(); + + // Check the Block integrity as soon as possible: + // - The filter chain shouldn't return less than requested + // unless it hit the end of the input. + // - If the uncompressed size is known, we know when there + // shouldn't be more data coming. We still need to read + // one byte to let the filter chain catch errors and to + // let it read end of payload marker(s). + if (ret < len || uncompressedSize == uncompressedSizeInHeader) { + if (filterChain.read() != -1) + throw new CorruptedInputException(); + + validate(); + endReached = true; + } + } else if (ret == -1) { + validate(); + endReached = true; + } + + return ret; + } + + private void validate() throws IOException { + long compressedSize = inCounted.getSize(); + + // Validate Compressed Size and Uncompressed Size if they were + // present in Block Header. + if ((compressedSizeInHeader != -1 + && compressedSizeInHeader != compressedSize) + || (uncompressedSizeInHeader != -1 + && uncompressedSizeInHeader != uncompressedSize)) + throw new CorruptedInputException(); + + // Block Padding bytes must be zeros. + while ((compressedSize++ & 3) != 0) + if (inData.readUnsignedByte() != 0x00) + throw new CorruptedInputException(); + + // Validate the integrity check. + byte[] storedCheck = new byte[check.getSize()]; + inData.readFully(storedCheck); + if (!Arrays.equals(check.finish(), storedCheck)) + throw new CorruptedInputException("Integrity check (" + + check.getName() + ") does not match"); + } + + public int available() throws IOException { + return filterChain.available(); + } + + public long getUnpaddedSize() { + return headerSize + inCounted.getSize() + check.getSize(); + } + + public long getUncompressedSize() { + return uncompressedSize; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/BlockOutputStream.java b/org.tukani.xz/src/org/tukaani/xz/BlockOutputStream.java new file mode 100644 index 00000000..03fd0a92 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/BlockOutputStream.java @@ -0,0 +1,134 @@ +/* + * BlockOutputStream + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.OutputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import org.tukaani.xz.common.EncoderUtil; +import org.tukaani.xz.check.Check; + +class BlockOutputStream extends FinishableOutputStream { + private final OutputStream out; + private final CountingOutputStream outCounted; + private FinishableOutputStream filterChain; + private final Check check; + + private final int headerSize; + private final long compressedSizeLimit; + private long uncompressedSize = 0; + + private final byte[] tempBuf = new byte[1]; + + public BlockOutputStream(OutputStream out, FilterEncoder[] filters, + Check check) throws IOException { + this.out = out; + this.check = check; + + // Initialize the filter chain. + outCounted = new CountingOutputStream(out); + filterChain = outCounted; + for (int i = filters.length - 1; i >= 0; --i) + filterChain = filters[i].getOutputStream(filterChain); + + // Prepare to encode the Block Header field. + ByteArrayOutputStream bufStream = new ByteArrayOutputStream(); + + // Write a dummy Block Header Size field. The real value is written + // once everything else except CRC32 has been written. + bufStream.write(0x00); + + // Write Block Flags. Storing Compressed Size or Uncompressed Size + // isn't supported for now. + bufStream.write(filters.length - 1); + + // List of Filter Flags + for (int i = 0; i < filters.length; ++i) { + EncoderUtil.encodeVLI(bufStream, filters[i].getFilterID()); + byte[] filterProps = filters[i].getFilterProps(); + EncoderUtil.encodeVLI(bufStream, filterProps.length); + bufStream.write(filterProps); + } + + // Header Padding + while ((bufStream.size() & 3) != 0) + bufStream.write(0x00); + + byte[] buf = bufStream.toByteArray(); + + // Total size of the Block Header: Take the size of the CRC32 field + // into account. + headerSize = buf.length + 4; + + // This is just a sanity check. + if (headerSize > EncoderUtil.BLOCK_HEADER_SIZE_MAX) + throw new UnsupportedOptionsException(); + + // Block Header Size + buf[0] = (byte)(buf.length / 4); + + // Write the Block Header field to the output stream. + out.write(buf); + EncoderUtil.writeCRC32(out, buf); + + // Calculate the maximum allowed size of the Compressed Data field. + // It is hard to exceed it so this is mostly to be pedantic. + compressedSizeLimit = (EncoderUtil.VLI_MAX & ~3) + - headerSize - check.getSize(); + } + + public void write(int b) throws IOException { + tempBuf[0] = (byte)b; + write(tempBuf, 0, 1); + } + + public void write(byte[] buf, int off, int len) throws IOException { + filterChain.write(buf, off, len); + check.update(buf, off, len); + uncompressedSize += len; + validate(); + } + + public void flush() throws IOException { + filterChain.flush(); + validate(); + } + + public void finish() throws IOException { + // Finish the Compressed Data field. + filterChain.finish(); + validate(); + + // Block Padding + for (long i = outCounted.getSize(); (i & 3) != 0; ++i) + out.write(0x00); + + // Check + out.write(check.finish()); + } + + private void validate() throws IOException { + long compressedSize = outCounted.getSize(); + + // It is very hard to trigger this exception. + // This is just to be pedantic. + if (compressedSize < 0 || compressedSize > compressedSizeLimit + || uncompressedSize < 0) + throw new XZIOException("XZ Stream has grown too big"); + } + + public long getUnpaddedSize() { + return headerSize + outCounted.getSize() + check.getSize(); + } + + public long getUncompressedSize() { + return uncompressedSize; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/CorruptedInputException.java b/org.tukani.xz/src/org/tukaani/xz/CorruptedInputException.java new file mode 100644 index 00000000..d7d95207 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/CorruptedInputException.java @@ -0,0 +1,37 @@ +/* + * CorruptedInputException + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +/** + * Thrown when the compressed input data is corrupt. + * However, it is possible that some or all of the data + * already read from the input stream was corrupt too. + */ +public class CorruptedInputException extends XZIOException { + private static final long serialVersionUID = 3L; + + /** + * Creates a new CorruptedInputException with + * the default error detail message. + */ + public CorruptedInputException() { + super("Compressed data is corrupt"); + } + + /** + * Creates a new CorruptedInputException with + * the specified error detail message. + * + * @param s error detail message + */ + public CorruptedInputException(String s) { + super(s); + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/CountingInputStream.java b/org.tukani.xz/src/org/tukaani/xz/CountingInputStream.java new file mode 100644 index 00000000..ce0935a5 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/CountingInputStream.java @@ -0,0 +1,45 @@ +/* + * CountingInputStream + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.FilterInputStream; +import java.io.InputStream; +import java.io.IOException; + +/** + * Counts the number of bytes read from an input stream. + */ +class CountingInputStream extends FilterInputStream { + private long size = 0; + + public CountingInputStream(InputStream in) { + super(in); + } + + public int read() throws IOException { + int ret = in.read(); + if (ret != -1 && size >= 0) + ++size; + + return ret; + } + + public int read(byte[] b, int off, int len) throws IOException { + int ret = in.read(b, off, len); + if (ret > 0 && size >= 0) + size += ret; + + return ret; + } + + public long getSize() { + return size; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/CountingOutputStream.java b/org.tukani.xz/src/org/tukaani/xz/CountingOutputStream.java new file mode 100644 index 00000000..9b3eef37 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/CountingOutputStream.java @@ -0,0 +1,54 @@ +/* + * CountingOutputStream + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.OutputStream; +import java.io.IOException; + +/** + * Counts the number of bytes written to an output stream. + *

+ * The finish method does nothing. + * This is FinishableOutputStream instead + * of OutputStream solely because it allows + * using this as the output stream for a chain of raw filters. + */ +class CountingOutputStream extends FinishableOutputStream { + private final OutputStream out; + private long size = 0; + + public CountingOutputStream(OutputStream out) { + this.out = out; + } + + public void write(int b) throws IOException { + out.write(b); + if (size >= 0) + ++size; + } + + public void write(byte[] b, int off, int len) throws IOException { + out.write(b, off, len); + if (size >= 0) + size += len; + } + + public void flush() throws IOException { + out.flush(); + } + + public void close() throws IOException { + out.close(); + } + + public long getSize() { + return size; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/DeltaCoder.java b/org.tukani.xz/src/org/tukaani/xz/DeltaCoder.java new file mode 100644 index 00000000..808834c8 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/DeltaCoder.java @@ -0,0 +1,26 @@ +/* + * DeltaCoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +abstract class DeltaCoder implements FilterCoder { + public static final long FILTER_ID = 0x03; + + public boolean changesSize() { + return false; + } + + public boolean nonLastOK() { + return true; + } + + public boolean lastOK() { + return false; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/DeltaDecoder.java b/org.tukani.xz/src/org/tukaani/xz/DeltaDecoder.java new file mode 100644 index 00000000..445d1782 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/DeltaDecoder.java @@ -0,0 +1,32 @@ +/* + * DeltaDecoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; + +class DeltaDecoder extends DeltaCoder implements FilterDecoder { + private final int distance; + + DeltaDecoder(byte[] props) throws UnsupportedOptionsException { + if (props.length != 1) + throw new UnsupportedOptionsException( + "Unsupported Delta filter properties"); + + distance = (props[0] & 0xFF) + 1; + } + + public int getMemoryUsage() { + return 1; + } + + public InputStream getInputStream(InputStream in) { + return new DeltaInputStream(in, distance); + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/DeltaEncoder.java b/org.tukani.xz/src/org/tukaani/xz/DeltaEncoder.java new file mode 100644 index 00000000..384afe44 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/DeltaEncoder.java @@ -0,0 +1,36 @@ +/* + * DeltaEncoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +class DeltaEncoder extends DeltaCoder implements FilterEncoder { + private final DeltaOptions options; + private final byte[] props = new byte[1]; + + DeltaEncoder(DeltaOptions options) { + props[0] = (byte)(options.getDistance() - 1); + this.options = (DeltaOptions)options.clone(); + } + + public long getFilterID() { + return FILTER_ID; + } + + public byte[] getFilterProps() { + return props; + } + + public boolean supportsFlushing() { + return true; + } + + public FinishableOutputStream getOutputStream(FinishableOutputStream out) { + return options.getOutputStream(out); + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/DeltaInputStream.java b/org.tukani.xz/src/org/tukaani/xz/DeltaInputStream.java new file mode 100644 index 00000000..56478f51 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/DeltaInputStream.java @@ -0,0 +1,146 @@ +/* + * DeltaInputStream + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; +import java.io.IOException; +import org.tukaani.xz.delta.DeltaDecoder; + +/** + * Decodes raw Delta-filtered data (no XZ headers). + *

+ * The delta filter doesn't change the size of the data and thus it + * cannot have an end-of-payload marker. It will simply decode until + * its input stream indicates end of input. + */ +public class DeltaInputStream extends InputStream { + /** + * Smallest supported delta calculation distance. + */ + public static final int DISTANCE_MIN = 1; + + /** + * Largest supported delta calculation distance. + */ + public static final int DISTANCE_MAX = 256; + + private InputStream in; + private final DeltaDecoder delta; + + private IOException exception = null; + + private final byte[] tempBuf = new byte[1]; + + /** + * Creates a new Delta decoder with the given delta calculation distance. + * + * @param in input stream from which Delta filtered data + * is read + * + * @param distance delta calculation distance, must be in the + * range [DISTANCE_MIN, + * DISTANCE_MAX] + */ + public DeltaInputStream(InputStream in, int distance) { + // Check for null because otherwise null isn't detect + // in this constructor. + if (in == null) + throw new NullPointerException(); + + this.in = in; + this.delta = new DeltaDecoder(distance); + } + + /** + * Decode the next byte from this input stream. + * + * @return the next decoded byte, or -1 to indicate + * the end of input on the input stream in + * + * @throws IOException may be thrown by in + */ + public int read() throws IOException { + return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF); + } + + /** + * Decode into an array of bytes. + *

+ * This calls in.read(buf, off, len) and defilters the + * returned data. + * + * @param buf target buffer for decoded data + * @param off start offset in buf + * @param len maximum number of bytes to read + * + * @return number of bytes read, or -1 to indicate + * the end of the input stream in + * + * @throws XZIOException if the stream has been closed + * + * @throws IOException may be thrown by underlaying input + * stream in + */ + public int read(byte[] buf, int off, int len) throws IOException { + if (len == 0) + return 0; + + if (in == null) + throw new XZIOException("Stream closed"); + + if (exception != null) + throw exception; + + int size; + try { + size = in.read(buf, off, len); + } catch (IOException e) { + exception = e; + throw e; + } + + if (size == -1) + return -1; + + delta.decode(buf, off, size); + return size; + } + + /** + * Calls in.available(). + * + * @return the value returned by in.available() + */ + public int available() throws IOException { + if (in == null) + throw new XZIOException("Stream closed"); + + if (exception != null) + throw exception; + + return in.available(); + } + + /** + * Closes the stream and calls in.close(). + * If the stream was already closed, this does nothing. + * + * @throws IOException if thrown by in.close() + */ + public void close() throws IOException { + if (in != null) { + try { + in.close(); + } finally { + in = null; + } + } + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/DeltaOptions.java b/org.tukani.xz/src/org/tukaani/xz/DeltaOptions.java new file mode 100644 index 00000000..145130bf --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/DeltaOptions.java @@ -0,0 +1,102 @@ +/* + * DeltaOptions + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; + +/** + * Delta filter options. The Delta filter can be used only as a non-last + * filter in the chain, for example Delta + LZMA2. + *

+ * Currently only simple byte-wise delta is supported. The only option + * is the delta distance, which you should set to match your data. + * It's not possible to provide a generic default value for it. + *

+ * For example, with distance = 2 and eight-byte input + * A1 B1 A2 B3 A3 B5 A4 B7, the output will be A1 B1 01 02 01 02 01 02. + *

+ * The Delta filter can be good with uncompressed bitmap images. It can + * also help with PCM audio, although special-purpose compressors like + * FLAC will give much smaller result at much better compression speed. + */ +public class DeltaOptions extends FilterOptions { + /** + * Smallest supported delta calculation distance. + */ + public static final int DISTANCE_MIN = 1; + + /** + * Largest supported delta calculation distance. + */ + public static final int DISTANCE_MAX = 256; + + private int distance = DISTANCE_MIN; + + /** + * Creates new Delta options and sets the delta distance to 1 byte. + */ + public DeltaOptions() {} + + /** + * Creates new Delta options and sets the distance to the given value. + */ + public DeltaOptions(int distance) throws UnsupportedOptionsException { + setDistance(distance); + } + + /** + * Sets the delta distance in bytes. The new distance must be in + * the range [DISTANCE_MIN, DISTANCE_MAX]. + */ + public void setDistance(int distance) throws UnsupportedOptionsException { + if (distance < DISTANCE_MIN || distance > DISTANCE_MAX) + throw new UnsupportedOptionsException( + "Delta distance must be in the range [" + DISTANCE_MIN + + ", " + DISTANCE_MAX + "]: " + distance); + + this.distance = distance; + } + + /** + * Gets the delta distance. + */ + public int getDistance() { + return distance; + } + + public int getEncoderMemoryUsage() { + return DeltaOutputStream.getMemoryUsage(); + } + + public FinishableOutputStream getOutputStream(FinishableOutputStream out) { + return new DeltaOutputStream(out, this); + } + + public int getDecoderMemoryUsage() { + return 1; + } + + public InputStream getInputStream(InputStream in) { + return new DeltaInputStream(in, distance); + } + + FilterEncoder getFilterEncoder() { + return new DeltaEncoder(this); + } + + public Object clone() { + try { + return super.clone(); + } catch (CloneNotSupportedException e) { + assert false; + throw new RuntimeException(); + } + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/DeltaOutputStream.java b/org.tukani.xz/src/org/tukaani/xz/DeltaOutputStream.java new file mode 100644 index 00000000..bd880db4 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/DeltaOutputStream.java @@ -0,0 +1,113 @@ +/* + * DeltaOutputStream + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.IOException; +import org.tukaani.xz.delta.DeltaEncoder; + +class DeltaOutputStream extends FinishableOutputStream { + private static final int FILTER_BUF_SIZE = 4096; + + private FinishableOutputStream out; + private final DeltaEncoder delta; + private final byte[] filterBuf = new byte[FILTER_BUF_SIZE]; + + private boolean finished = false; + private IOException exception = null; + + private final byte[] tempBuf = new byte[1]; + + static int getMemoryUsage() { + return 1 + FILTER_BUF_SIZE / 1024; + } + + DeltaOutputStream(FinishableOutputStream out, DeltaOptions options) { + this.out = out; + delta = new DeltaEncoder(options.getDistance()); + } + + public void write(int b) throws IOException { + tempBuf[0] = (byte)b; + write(tempBuf, 0, 1); + } + + public void write(byte[] buf, int off, int len) throws IOException { + if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) + throw new IndexOutOfBoundsException(); + + if (exception != null) + throw exception; + + if (finished) + throw new XZIOException("Stream finished"); + + try { + while (len > FILTER_BUF_SIZE) { + delta.encode(buf, off, FILTER_BUF_SIZE, filterBuf); + out.write(filterBuf); + off += FILTER_BUF_SIZE; + len -= FILTER_BUF_SIZE; + } + + delta.encode(buf, off, len, filterBuf); + out.write(filterBuf, 0, len); + } catch (IOException e) { + exception = e; + throw e; + } + } + + public void flush() throws IOException { + if (exception != null) + throw exception; + + if (finished) + throw new XZIOException("Stream finished or closed"); + + try { + out.flush(); + } catch (IOException e) { + exception = e; + throw e; + } + } + + public void finish() throws IOException { + if (!finished) { + if (exception != null) + throw exception; + + try { + out.finish(); + } catch (IOException e) { + exception = e; + throw e; + } + + finished = true; + } + } + + public void close() throws IOException { + if (out != null) { + try { + out.close(); + } catch (IOException e) { + if (exception == null) + exception = e; + } + + out = null; + } + + if (exception != null) + throw exception; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/FilterCoder.java b/org.tukani.xz/src/org/tukaani/xz/FilterCoder.java new file mode 100644 index 00000000..1e95e37f --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/FilterCoder.java @@ -0,0 +1,16 @@ +/* + * FilterCoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +interface FilterCoder { + boolean changesSize(); + boolean nonLastOK(); + boolean lastOK(); +} diff --git a/org.tukani.xz/src/org/tukaani/xz/FilterDecoder.java b/org.tukani.xz/src/org/tukaani/xz/FilterDecoder.java new file mode 100644 index 00000000..8e2d0061 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/FilterDecoder.java @@ -0,0 +1,17 @@ +/* + * FilterDecoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; + +interface FilterDecoder extends FilterCoder { + int getMemoryUsage(); + InputStream getInputStream(InputStream in); +} diff --git a/org.tukani.xz/src/org/tukaani/xz/FilterEncoder.java b/org.tukani.xz/src/org/tukaani/xz/FilterEncoder.java new file mode 100644 index 00000000..4558aad9 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/FilterEncoder.java @@ -0,0 +1,17 @@ +/* + * FilterEncoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +interface FilterEncoder extends FilterCoder { + long getFilterID(); + byte[] getFilterProps(); + boolean supportsFlushing(); + FinishableOutputStream getOutputStream(FinishableOutputStream out); +} diff --git a/org.tukani.xz/src/org/tukaani/xz/FilterOptions.java b/org.tukani.xz/src/org/tukaani/xz/FilterOptions.java new file mode 100644 index 00000000..a2398b40 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/FilterOptions.java @@ -0,0 +1,80 @@ +/* + * FilterOptions + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; +import java.io.IOException; + +/** + * Base class for filter-specific options classes. + */ +public abstract class FilterOptions implements Cloneable { + /** + * Gets how much memory the encoder will need with + * the given filter chain. This function simply calls + * getEncoderMemoryUsage() for every filter + * in the array and returns the sum of the returned values. + */ + public static int getEncoderMemoryUsage(FilterOptions[] options) { + int m = 0; + + for (int i = 0; i < options.length; ++i) + m += options[i].getEncoderMemoryUsage(); + + return m; + } + + /** + * Gets how much memory the decoder will need with + * the given filter chain. This function simply calls + * getDecoderMemoryUsage() for every filter + * in the array and returns the sum of the returned values. + */ + public static int getDecoderMemoryUsage(FilterOptions[] options) { + int m = 0; + + for (int i = 0; i < options.length; ++i) + m += options[i].getDecoderMemoryUsage(); + + return m; + } + + /** + * Gets how much memory the encoder will need with these options. + */ + public abstract int getEncoderMemoryUsage(); + + /** + * Gets a raw (no XZ headers) encoder output stream using these options. + * Raw streams are an advanced feature. In most cases you want to store + * the compressed data in the .xz container format instead of using + * a raw stream. To use this filter in a .xz file, pass this object + * to XZOutputStream. + */ + public abstract FinishableOutputStream getOutputStream( + FinishableOutputStream out); + + /** + * Gets how much memory the decoder will need to decompress the data + * that was encoded with these options. + */ + public abstract int getDecoderMemoryUsage(); + + /** + * Gets a raw (no XZ headers) decoder input stream using these options. + */ + public abstract InputStream getInputStream(InputStream in) + throws IOException; + + abstract FilterEncoder getFilterEncoder(); + + FilterOptions() {} +} diff --git a/org.tukani.xz/src/org/tukaani/xz/FinishableOutputStream.java b/org.tukani.xz/src/org/tukaani/xz/FinishableOutputStream.java new file mode 100644 index 00000000..b360628b --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/FinishableOutputStream.java @@ -0,0 +1,31 @@ +/* + * FinishableOutputStream + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.OutputStream; +import java.io.IOException; + +/** + * Output stream that supports finishing without closing + * the underlying stream. + */ +public abstract class FinishableOutputStream extends OutputStream { + /** + * Finish the stream without closing the underlying stream. + * No more data may be written to the stream after finishing. + *

+ * The finish method of FinishableOutputStream + * does nothing. Subclasses should override it if they need finishing + * support, which is the case, for example, with compressors. + * + * @throws IOException + */ + public void finish() throws IOException {}; +} diff --git a/org.tukani.xz/src/org/tukaani/xz/FinishableWrapperOutputStream.java b/org.tukani.xz/src/org/tukaani/xz/FinishableWrapperOutputStream.java new file mode 100644 index 00000000..2e0ac99f --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/FinishableWrapperOutputStream.java @@ -0,0 +1,70 @@ +/* + * FinishableWrapperOutputStream + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.OutputStream; +import java.io.IOException; + +/** + * Wraps an output stream to a finishable output stream for use with + * raw encoders. This is not needed for XZ compression and thus most + * people will never need this. + */ +public class FinishableWrapperOutputStream extends FinishableOutputStream { + /** + * The {@link java.io.OutputStream OutputStream} that has been + * wrapped into a FinishableWrapperOutputStream. + */ + protected OutputStream out; + + /** + * Creates a new output stream which support finishing. + * The finish() method will do nothing. + */ + public FinishableWrapperOutputStream(OutputStream out) { + this.out = out; + } + + /** + * Calls {@link java.io.OutputStream#write(int) out.write(b)}. + */ + public void write(int b) throws IOException { + out.write(b); + } + + /** + * Calls {@link java.io.OutputStream#write(byte[]) out.write(buf)}. + */ + public void write(byte[] buf) throws IOException { + out.write(buf); + } + + /** + * Calls {@link java.io.OutputStream#write(byte[],int,int) + out.write(buf, off, len)}. + */ + public void write(byte[] buf, int off, int len) throws IOException { + out.write(buf, off, len); + } + + /** + * Calls {@link java.io.OutputStream#flush() out.flush()}. + */ + public void flush() throws IOException { + out.flush(); + } + + /** + * Calls {@link java.io.OutputStream#close() out.close()}. + */ + public void close() throws IOException { + out.close(); + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/IA64Options.java b/org.tukani.xz/src/org/tukaani/xz/IA64Options.java new file mode 100644 index 00000000..ba578708 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/IA64Options.java @@ -0,0 +1,36 @@ +/* + * IA64Options + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; +import org.tukaani.xz.simple.IA64; + +/** + * BCJ filter for Itanium (IA-64) instructions. + */ +public class IA64Options extends BCJOptions { + private static final int ALIGNMENT = 16; + + public IA64Options() { + super(ALIGNMENT); + } + + public FinishableOutputStream getOutputStream(FinishableOutputStream out) { + return new SimpleOutputStream(out, new IA64(true, startOffset)); + } + + public InputStream getInputStream(InputStream in) { + return new SimpleInputStream(in, new IA64(false, startOffset)); + } + + FilterEncoder getFilterEncoder() { + return new BCJEncoder(this, BCJCoder.IA64_FILTER_ID); + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/IndexIndicatorException.java b/org.tukani.xz/src/org/tukaani/xz/IndexIndicatorException.java new file mode 100644 index 00000000..fc6bc038 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/IndexIndicatorException.java @@ -0,0 +1,14 @@ +/* + * IndexIndicatorException + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +class IndexIndicatorException extends Exception { + private static final long serialVersionUID = 1L; +} diff --git a/org.tukani.xz/src/org/tukaani/xz/LZMA2Coder.java b/org.tukani.xz/src/org/tukaani/xz/LZMA2Coder.java new file mode 100644 index 00000000..b0963b75 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/LZMA2Coder.java @@ -0,0 +1,26 @@ +/* + * LZMA2Coder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +abstract class LZMA2Coder implements FilterCoder { + public static final long FILTER_ID = 0x21; + + public boolean changesSize() { + return true; + } + + public boolean nonLastOK() { + return false; + } + + public boolean lastOK() { + return true; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/LZMA2Decoder.java b/org.tukani.xz/src/org/tukaani/xz/LZMA2Decoder.java new file mode 100644 index 00000000..82075c21 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/LZMA2Decoder.java @@ -0,0 +1,35 @@ +/* + * LZMA2Decoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; + +class LZMA2Decoder extends LZMA2Coder implements FilterDecoder { + private int dictSize; + + LZMA2Decoder(byte[] props) throws UnsupportedOptionsException { + // Up to 1.5 GiB dictionary is supported. The bigger ones + // are too big for int. + if (props.length != 1 || (props[0] & 0xFF) > 37) + throw new UnsupportedOptionsException( + "Unsupported LZMA2 properties"); + + dictSize = 2 | (props[0] & 1); + dictSize <<= (props[0] >>> 1) + 11; + } + + public int getMemoryUsage() { + return LZMA2InputStream.getMemoryUsage(dictSize); + } + + public InputStream getInputStream(InputStream in) { + return new LZMA2InputStream(in, dictSize); + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/LZMA2Encoder.java b/org.tukani.xz/src/org/tukaani/xz/LZMA2Encoder.java new file mode 100644 index 00000000..7c7facc4 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/LZMA2Encoder.java @@ -0,0 +1,50 @@ +/* + * LZMA2Encoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import org.tukaani.xz.lzma.LZMAEncoder; + +class LZMA2Encoder extends LZMA2Coder implements FilterEncoder { + private final LZMA2Options options; + private final byte[] props = new byte[1]; + + LZMA2Encoder(LZMA2Options options) { + if (options.getPresetDict() != null) + throw new IllegalArgumentException( + "XZ doesn't support a preset dictionary for now"); + + if (options.getMode() == LZMA2Options.MODE_UNCOMPRESSED) { + props[0] = (byte)0; + } else { + int d = Math.max(options.getDictSize(), + LZMA2Options.DICT_SIZE_MIN); + props[0] = (byte)(LZMAEncoder.getDistSlot(d - 1) - 23); + } + + // Make a private copy so that the caller is free to change its copy. + this.options = (LZMA2Options)options.clone(); + } + + public long getFilterID() { + return FILTER_ID; + } + + public byte[] getFilterProps() { + return props; + } + + public boolean supportsFlushing() { + return true; + } + + public FinishableOutputStream getOutputStream(FinishableOutputStream out) { + return options.getOutputStream(out); + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/LZMA2InputStream.java b/org.tukani.xz/src/org/tukaani/xz/LZMA2InputStream.java new file mode 100644 index 00000000..4e865c60 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/LZMA2InputStream.java @@ -0,0 +1,358 @@ +/* + * LZMA2InputStream + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; +import java.io.DataInputStream; +import java.io.IOException; +import org.tukaani.xz.lz.LZDecoder; +import org.tukaani.xz.rangecoder.RangeDecoderFromBuffer; +import org.tukaani.xz.lzma.LZMADecoder; + +/** + * Decompresses a raw LZMA2 stream (no XZ headers). + */ +public class LZMA2InputStream extends InputStream { + /** + * Smallest valid LZMA2 dictionary size. + *

+ * Very tiny dictionaries would be a performance problem, so + * the minimum is 4 KiB. + */ + public static final int DICT_SIZE_MIN = 4096; + + /** + * Largest dictionary size supported by this implementation. + *

+ * The LZMA2 algorithm allows dictionaries up to one byte less than 4 GiB. + * This implementation supports only 16 bytes less than 2 GiB for raw + * LZMA2 streams, and for .xz files the maximum is 1.5 GiB. This + * limitation is due to Java using signed 32-bit integers for array + * indexing. The limitation shouldn't matter much in practice since so + * huge dictionaries are not normally used. + */ + public static final int DICT_SIZE_MAX = Integer.MAX_VALUE & ~15; + + private static final int COMPRESSED_SIZE_MAX = 1 << 16; + + private DataInputStream in; + + private final LZDecoder lz; + private final RangeDecoderFromBuffer rc + = new RangeDecoderFromBuffer(COMPRESSED_SIZE_MAX); + private LZMADecoder lzma; + + private int uncompressedSize = 0; + private boolean isLZMAChunk; + + private boolean needDictReset = true; + private boolean needProps = true; + private boolean endReached = false; + + private IOException exception = null; + + private final byte[] tempBuf = new byte[1]; + + /** + * Gets approximate decompressor memory requirements as kibibytes for + * the given dictionary size. + * + * @param dictSize LZMA2 dictionary size as bytes, must be + * in the range [DICT_SIZE_MIN, + * DICT_SIZE_MAX] + * + * @return approximate memory requirements as kibibytes (KiB) + */ + public static int getMemoryUsage(int dictSize) { + // The base state is around 30-40 KiB (probabilities etc.), + // range decoder needs COMPRESSED_SIZE_MAX bytes for buffering, + // and LZ decoder needs a dictionary buffer. + return 40 + COMPRESSED_SIZE_MAX / 1024 + getDictSize(dictSize) / 1024; + } + + private static int getDictSize(int dictSize) { + if (dictSize < DICT_SIZE_MIN || dictSize > DICT_SIZE_MAX) + throw new IllegalArgumentException( + "Unsupported dictionary size " + dictSize); + + // Round dictionary size upward to a multiple of 16. This way LZMA + // can use LZDecoder.getPos() for calculating LZMA's posMask. + // Note that this check is needed only for raw LZMA2 streams; it is + // redundant with .xz. + return (dictSize + 15) & ~15; + } + + /** + * Creates a new input stream that decompresses raw LZMA2 data + * from in. + *

+ * The caller needs to know the dictionary size used when compressing; + * the dictionary size isn't stored as part of a raw LZMA2 stream. + *

+ * Specifying a too small dictionary size will prevent decompressing + * the stream. Specifying a too big dictionary is waste of memory but + * decompression will work. + *

+ * There is no need to specify a dictionary bigger than + * the uncompressed size of the data even if a bigger dictionary + * was used when compressing. If you know the uncompressed size + * of the data, this might allow saving some memory. + * + * @param in input stream from which LZMA2-compressed + * data is read + * + * @param dictSize LZMA2 dictionary size as bytes, must be + * in the range [DICT_SIZE_MIN, + * DICT_SIZE_MAX] + */ + public LZMA2InputStream(InputStream in, int dictSize) { + this(in, dictSize, null); + } + + /** + * Creates a new LZMA2 decompressor using a preset dictionary. + *

+ * This is like LZMA2InputStream(InputStream, int) except + * that the dictionary may be initialized using a preset dictionary. + * If a preset dictionary was used when compressing the data, the + * same preset dictionary must be provided when decompressing. + * + * @param in input stream from which LZMA2-compressed + * data is read + * + * @param dictSize LZMA2 dictionary size as bytes, must be + * in the range [DICT_SIZE_MIN, + * DICT_SIZE_MAX] + * + * @param presetDict preset dictionary or null + * to use no preset dictionary + */ + public LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict) { + // Check for null because otherwise null isn't detect + // in this constructor. + if (in == null) + throw new NullPointerException(); + + this.in = new DataInputStream(in); + this.lz = new LZDecoder(getDictSize(dictSize), presetDict); + + if (presetDict != null && presetDict.length > 0) + needDictReset = false; + } + + /** + * Decompresses the next byte from this input stream. + *

+ * Reading lots of data with read() from this input stream + * may be inefficient. Wrap it in java.io.BufferedInputStream + * if you need to read lots of data one byte at a time. + * + * @return the next decompressed byte, or -1 + * to indicate the end of the compressed stream + * + * @throws CorruptedInputException + * + * @throws XZIOException if the stream has been closed + * + * @throws EOFException + * compressed input is truncated or corrupt + * + * @throws IOException may be thrown by in + */ + public int read() throws IOException { + return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF); + } + + /** + * Decompresses into an array of bytes. + *

+ * If len is zero, no bytes are read and 0 + * is returned. Otherwise this will block until len + * bytes have been decompressed, the end of the LZMA2 stream is reached, + * or an exception is thrown. + * + * @param buf target buffer for uncompressed data + * @param off start offset in buf + * @param len maximum number of uncompressed bytes to read + * + * @return number of bytes read, or -1 to indicate + * the end of the compressed stream + * + * @throws CorruptedInputException + * + * @throws XZIOException if the stream has been closed + * + * @throws EOFException + * compressed input is truncated or corrupt + * + * @throws IOException may be thrown by in + */ + public int read(byte[] buf, int off, int len) throws IOException { + if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) + throw new IndexOutOfBoundsException(); + + if (len == 0) + return 0; + + if (in == null) + throw new XZIOException("Stream closed"); + + if (exception != null) + throw exception; + + if (endReached) + return -1; + + try { + int size = 0; + + while (len > 0) { + if (uncompressedSize == 0) { + decodeChunkHeader(); + if (endReached) + return size == 0 ? -1 : size; + } + + int copySizeMax = Math.min(uncompressedSize, len); + + if (!isLZMAChunk) { + lz.copyUncompressed(in, copySizeMax); + } else { + lz.setLimit(copySizeMax); + lzma.decode(); + if (!rc.isInBufferOK()) + throw new CorruptedInputException(); + } + + int copiedSize = lz.flush(buf, off); + off += copiedSize; + len -= copiedSize; + size += copiedSize; + uncompressedSize -= copiedSize; + + if (uncompressedSize == 0) + if (!rc.isFinished() || lz.hasPending()) + throw new CorruptedInputException(); + } + + return size; + + } catch (IOException e) { + exception = e; + throw e; + } + } + + private void decodeChunkHeader() throws IOException { + int control = in.readUnsignedByte(); + + if (control == 0x00) { + endReached = true; + return; + } + + if (control >= 0xE0 || control == 0x01) { + needProps = true; + needDictReset = false; + lz.reset(); + } else if (needDictReset) { + throw new CorruptedInputException(); + } + + if (control >= 0x80) { + isLZMAChunk = true; + + uncompressedSize = (control & 0x1F) << 16; + uncompressedSize += in.readUnsignedShort() + 1; + + int compressedSize = in.readUnsignedShort() + 1; + + if (control >= 0xC0) { + needProps = false; + decodeProps(); + + } else if (needProps) { + throw new CorruptedInputException(); + + } else if (control >= 0xA0) { + lzma.reset(); + } + + rc.prepareInputBuffer(in, compressedSize); + + } else if (control > 0x02) { + throw new CorruptedInputException(); + + } else { + isLZMAChunk = false; + uncompressedSize = in.readUnsignedShort() + 1; + } + } + + private void decodeProps() throws IOException { + int props = in.readUnsignedByte(); + + if (props > (4 * 5 + 4) * 9 + 8) + throw new CorruptedInputException(); + + int pb = props / (9 * 5); + props -= pb * 9 * 5; + int lp = props / 9; + int lc = props - lp * 9; + + if (lc + lp > 4) + throw new CorruptedInputException(); + + lzma = new LZMADecoder(lz, rc, lc, lp, pb); + } + + /** + * Returns the number of uncompressed bytes that can be read + * without blocking. The value is returned with an assumption + * that the compressed input data will be valid. If the compressed + * data is corrupt, CorruptedInputException may get + * thrown before the number of bytes claimed to be available have + * been read from this input stream. + *

+ * In LZMA2InputStream, the return value will be non-zero when the + * decompressor is in the middle of an LZMA2 chunk. The return value + * will then be the number of uncompressed bytes remaining from that + * chunk. + * + * @return the number of uncompressed bytes that can be read + * without blocking + */ + public int available() throws IOException { + if (in == null) + throw new XZIOException("Stream closed"); + + if (exception != null) + throw exception; + + return uncompressedSize; + } + + /** + * Closes the stream and calls in.close(). + * If the stream was already closed, this does nothing. + * + * @throws IOException if thrown by in.close() + */ + public void close() throws IOException { + if (in != null) { + try { + in.close(); + } finally { + in = null; + } + } + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/LZMA2Options.java b/org.tukani.xz/src/org/tukaani/xz/LZMA2Options.java new file mode 100644 index 00000000..42777f01 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/LZMA2Options.java @@ -0,0 +1,581 @@ +/* + * LZMA2Options + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; +import java.io.IOException; +import org.tukaani.xz.lz.LZEncoder; +import org.tukaani.xz.lzma.LZMAEncoder; + +/** + * LZMA2 compression options. + *

+ * While this allows setting the LZMA2 compression options in detail, + * often you only need LZMA2Options() or + * LZMA2Options(int). + */ +public class LZMA2Options extends FilterOptions { + /** + * Minimum valid compression preset level is 0. + */ + public static final int PRESET_MIN = 0; + + /** + * Maximum valid compression preset level is 9. + */ + public static final int PRESET_MAX = 9; + + /** + * Default compression preset level is 6. + */ + public static final int PRESET_DEFAULT = 6; + + /** + * Minimum dictionary size is 4 KiB. + */ + public static final int DICT_SIZE_MIN = 4096; + + /** + * Maximum dictionary size for compression is 768 MiB. + *

+ * The decompressor supports bigger dictionaries, up to almost 2 GiB. + * With HC4 the encoder would support dictionaries bigger than 768 MiB. + * The 768 MiB limit comes from the current implementation of BT4 where + * we would otherwise hit the limits of signed ints in array indexing. + *

+ * If you really need bigger dictionary for decompression, + * use {@link LZMA2InputStream} directly. + */ + public static final int DICT_SIZE_MAX = 768 << 20; + + /** + * The default dictionary size is 8 MiB. + */ + public static final int DICT_SIZE_DEFAULT = 8 << 20; + + /** + * Maximum value for lc + lp is 4. + */ + public static final int LC_LP_MAX = 4; + + /** + * The default number of literal context bits is 3. + */ + public static final int LC_DEFAULT = 3; + + /** + * The default number of literal position bits is 0. + */ + public static final int LP_DEFAULT = 0; + + /** + * Maximum value for pb is 4. + */ + public static final int PB_MAX = 4; + + /** + * The default number of position bits is 2. + */ + public static final int PB_DEFAULT = 2; + + /** + * Compression mode: uncompressed. + * The data is wrapped into a LZMA2 stream without compression. + */ + public static final int MODE_UNCOMPRESSED = 0; + + /** + * Compression mode: fast. + * This is usually combined with a hash chain match finder. + */ + public static final int MODE_FAST = LZMAEncoder.MODE_FAST; + + /** + * Compression mode: normal. + * This is usually combined with a binary tree match finder. + */ + public static final int MODE_NORMAL = LZMAEncoder.MODE_NORMAL; + + /** + * Minimum value for niceLen is 8. + */ + public static final int NICE_LEN_MIN = 8; + + /** + * Maximum value for niceLen is 273. + */ + public static final int NICE_LEN_MAX = 273; + + /** + * Match finder: Hash Chain 2-3-4 + */ + public static final int MF_HC4 = LZEncoder.MF_HC4; + + /** + * Match finder: Binary tree 2-3-4 + */ + public static final int MF_BT4 = LZEncoder.MF_BT4; + + private static final int[] presetToDictSize = { + 1 << 18, 1 << 20, 1 << 21, 1 << 22, 1 << 22, + 1 << 23, 1 << 23, 1 << 24, 1 << 25, 1 << 26 }; + + private static final int[] presetToDepthLimit = { 4, 8, 24, 48 }; + + private int dictSize; + private byte[] presetDict = null; + private int lc; + private int lp; + private int pb; + private int mode; + private int niceLen; + private int mf; + private int depthLimit; + + /** + * Creates new LZMA2 options and sets them to the default values. + * This is equivalent to LZMA2Options(PRESET_DEFAULT). + */ + public LZMA2Options() { + try { + setPreset(PRESET_DEFAULT); + } catch (UnsupportedOptionsException e) { + assert false; + throw new RuntimeException(); + } + } + + /** + * Creates new LZMA2 options and sets them to the given preset. + * + * @throws UnsupportedOptionsException + * preset is not supported + */ + public LZMA2Options(int preset) throws UnsupportedOptionsException { + setPreset(preset); + } + + /** + * Creates new LZMA2 options and sets them to the given custom values. + * + * @throws UnsupportedOptionsException + * unsupported options were specified + */ + public LZMA2Options(int dictSize, int lc, int lp, int pb, int mode, + int niceLen, int mf, int depthLimit) + throws UnsupportedOptionsException { + setDictSize(dictSize); + setLcLp(lc, lp); + setPb(pb); + setMode(mode); + setNiceLen(niceLen); + setMatchFinder(mf); + setDepthLimit(depthLimit); + } + + /** + * Sets the compression options to the given preset. + *

+ * The presets 0-3 are fast presets with medium compression. + * The presets 4-6 are fairly slow presets with high compression. + * The default preset (PRESET_DEFAULT) is 6. + *

+ * The presets 7-9 are like the preset 6 but use bigger dictionaries + * and have higher compressor and decompressor memory requirements. + * Unless the uncompressed size of the file exceeds 8 MiB, + * 16 MiB, or 32 MiB, it is waste of memory to use the + * presets 7, 8, or 9, respectively. + * + * @throws UnsupportedOptionsException + * preset is not supported + */ + public void setPreset(int preset) throws UnsupportedOptionsException { + if (preset < 0 || preset > 9) + throw new UnsupportedOptionsException( + "Unsupported preset: " + preset); + + lc = LC_DEFAULT; + lp = LP_DEFAULT; + pb = PB_DEFAULT; + dictSize = presetToDictSize[preset]; + + if (preset <= 3) { + mode = MODE_FAST; + mf = MF_HC4; + niceLen = preset <= 1 ? 128 : NICE_LEN_MAX; + depthLimit = presetToDepthLimit[preset]; + } else { + mode = MODE_NORMAL; + mf = MF_BT4; + niceLen = (preset == 4) ? 16 : (preset == 5) ? 32 : 64; + depthLimit = 0; + } + } + + /** + * Sets the dictionary size in bytes. + *

+ * The dictionary (or history buffer) holds the most recently seen + * uncompressed data. Bigger dictionary usually means better compression. + * However, using a dictioanary bigger than the size of the uncompressed + * data is waste of memory. + *

+ * Any value in the range [DICT_SIZE_MIN, DICT_SIZE_MAX] is valid, + * but sizes of 2^n and 2^n + 2^(n-1) bytes are somewhat + * recommended. + * + * @throws UnsupportedOptionsException + * dictSize is not supported + */ + public void setDictSize(int dictSize) throws UnsupportedOptionsException { + if (dictSize < DICT_SIZE_MIN) + throw new UnsupportedOptionsException( + "LZMA2 dictionary size must be at least 4 KiB: " + + dictSize + " B"); + + if (dictSize > DICT_SIZE_MAX) + throw new UnsupportedOptionsException( + "LZMA2 dictionary size must not exceed " + + (DICT_SIZE_MAX >> 20) + " MiB: " + dictSize + " B"); + + this.dictSize = dictSize; + } + + /** + * Gets the dictionary size in bytes. + */ + public int getDictSize() { + return dictSize; + } + + /** + * Sets a preset dictionary. Use null to disable the use of + * a preset dictionary. By default there is no preset dictionary. + *

+ * The .xz format doesn't support a preset dictionary for now. + * Do not set a preset dictionary unless you use raw LZMA2. + *

+ * Preset dictionary can be useful when compressing many similar, + * relatively small chunks of data independently from each other. + * A preset dictionary should contain typical strings that occur in + * the files being compressed. The most probable strings should be + * near the end of the preset dictionary. The preset dictionary used + * for compression is also needed for decompression. + */ + public void setPresetDict(byte[] presetDict) { + this.presetDict = presetDict; + } + + /** + * Gets the preset dictionary. + */ + public byte[] getPresetDict() { + return presetDict; + } + + /** + * Sets the number of literal context bits and literal position bits. + *

+ * The sum of lc and lp is limited to 4. + * Trying to exceed it will throw an exception. This function lets + * you change both at the same time. + * + * @throws UnsupportedOptionsException + * lc and lp + * are invalid + */ + public void setLcLp(int lc, int lp) throws UnsupportedOptionsException { + if (lc < 0 || lp < 0 || lc > LC_LP_MAX || lp > LC_LP_MAX + || lc + lp > LC_LP_MAX) + throw new UnsupportedOptionsException( + "lc + lp must not exceed " + LC_LP_MAX + ": " + + lc + " + " + lp); + + this.lc = lc; + this.lp = lp; + } + + /** + * Sets the number of literal context bits. + *

+ * All bytes that cannot be encoded as matches are encoded as literals. + * That is, literals are simply 8-bit bytes that are encoded one at + * a time. + *

+ * The literal coding makes an assumption that the highest lc + * bits of the previous uncompressed byte correlate with the next byte. + * For example, in typical English text, an upper-case letter is often + * followed by a lower-case letter, and a lower-case letter is usually + * followed by another lower-case letter. In the US-ASCII character set, + * the highest three bits are 010 for upper-case letters and 011 for + * lower-case letters. When lc is at least 3, the literal + * coding can take advantage of this property in the uncompressed data. + *

+ * The default value (3) is usually good. If you want maximum compression, + * try setLc(4). Sometimes it helps a little, and sometimes it + * makes compression worse. If it makes it worse, test for example + * setLc(2) too. + * + * @throws UnsupportedOptionsException + * lc is invalid, or the sum + * of lc and lp + * exceed LC_LP_MAX + */ + public void setLc(int lc) throws UnsupportedOptionsException { + setLcLp(lc, lp); + } + + /** + * Sets the number of literal position bits. + *

+ * This affets what kind of alignment in the uncompressed data is + * assumed when encoding literals. See {@link #setPb(int) setPb} for + * more information about alignment. + * + * @throws UnsupportedOptionsException + * lp is invalid, or the sum + * of lc and lp + * exceed LC_LP_MAX + */ + public void setLp(int lp) throws UnsupportedOptionsException { + setLcLp(lc, lp); + } + + /** + * Gets the number of literal context bits. + */ + public int getLc() { + return lc; + } + + /** + * Gets the number of literal position bits. + */ + public int getLp() { + return lp; + } + + /** + * Sets the number of position bits. + *

+ * This affects what kind of alignment in the uncompressed data is + * assumed in general. The default (2) means four-byte alignment + * (2^pb = 2^2 = 4), which is often a good choice when + * there's no better guess. + *

+ * When the alignment is known, setting the number of position bits + * accordingly may reduce the file size a little. For example with text + * files having one-byte alignment (US-ASCII, ISO-8859-*, UTF-8), using + * setPb(0) can improve compression slightly. For UTF-16 + * text, setPb(1) is a good choice. If the alignment is + * an odd number like 3 bytes, setPb(0) might be the best + * choice. + *

+ * Even though the assumed alignment can be adjusted with + * setPb and setLp, LZMA2 still slightly favors + * 16-byte alignment. It might be worth taking into account when designing + * file formats that are likely to be often compressed with LZMA2. + * + * @throws UnsupportedOptionsException + * pb is invalid + */ + public void setPb(int pb) throws UnsupportedOptionsException { + if (pb < 0 || pb > PB_MAX) + throw new UnsupportedOptionsException( + "pb must not exceed " + PB_MAX + ": " + pb); + + this.pb = pb; + } + + /** + * Gets the number of position bits. + */ + public int getPb() { + return pb; + } + + /** + * Sets the compression mode. + *

+ * This specifies the method to analyze the data produced by + * a match finder. The default is MODE_FAST for presets + * 0-3 and MODE_NORMAL for presets 4-9. + *

+ * Usually MODE_FAST is used with Hash Chain match finders + * and MODE_NORMAL with Binary Tree match finders. This is + * also what the presets do. + *

+ * The special mode MODE_UNCOMPRESSED doesn't try to + * compress the data at all (and doesn't use a match finder) and will + * simply wrap it in uncompressed LZMA2 chunks. + * + * @throws UnsupportedOptionsException + * mode is not supported + */ + public void setMode(int mode) throws UnsupportedOptionsException { + if (mode < MODE_UNCOMPRESSED || mode > MODE_NORMAL) + throw new UnsupportedOptionsException( + "Unsupported compression mode: " + mode); + + this.mode = mode; + } + + /** + * Gets the compression mode. + */ + public int getMode() { + return mode; + } + + /** + * Sets the nice length of matches. + * Once a match of at least niceLen bytes is found, + * the algorithm stops looking for better matches. Higher values tend + * to give better compression at the expense of speed. The default + * depends on the preset. + * + * @throws UnsupportedOptionsException + * niceLen is invalid + */ + public void setNiceLen(int niceLen) throws UnsupportedOptionsException { + if (niceLen < NICE_LEN_MIN) + throw new UnsupportedOptionsException( + "Minimum nice length of matches is " + + NICE_LEN_MIN + " bytes: " + niceLen); + + if (niceLen > NICE_LEN_MAX) + throw new UnsupportedOptionsException( + "Maximum nice length of matches is " + NICE_LEN_MAX + + ": " + niceLen); + + this.niceLen = niceLen; + } + + /** + * Gets the nice length of matches. + */ + public int getNiceLen() { + return niceLen; + } + + /** + * Sets the match finder type. + *

+ * Match finder has a major effect on compression speed, memory usage, + * and compression ratio. Usually Hash Chain match finders are faster + * than Binary Tree match finders. The default depends on the preset: + * 0-3 use MF_HC4 and 4-9 use MF_BT4. + * + * @throws UnsupportedOptionsException + * mf is not supported + */ + public void setMatchFinder(int mf) throws UnsupportedOptionsException { + if (mf != MF_HC4 && mf != MF_BT4) + throw new UnsupportedOptionsException( + "Unsupported match finder: " + mf); + + this.mf = mf; + } + + /** + * Gets the match finder type. + */ + public int getMatchFinder() { + return mf; + } + + /** + * Sets the match finder search depth limit. + *

+ * The default is a special value of 0 which indicates that + * the depth limit should be automatically calculated by the selected + * match finder from the nice length of matches. + *

+ * Reasonable depth limit for Hash Chain match finders is 4-100 and + * 16-1000 for Binary Tree match finders. Using very high values can + * make the compressor extremely slow with some files. Avoid settings + * higher than 1000 unless you are prepared to interrupt the compression + * in case it is taking far too long. + * + * @throws UnsupportedOptionsException + * depthLimit is invalid + */ + public void setDepthLimit(int depthLimit) + throws UnsupportedOptionsException { + if (depthLimit < 0) + throw new UnsupportedOptionsException( + "Depth limit cannot be negative: " + depthLimit); + + this.depthLimit = depthLimit; + } + + /** + * Gets the match finder search depth limit. + */ + public int getDepthLimit() { + return depthLimit; + } + + public int getEncoderMemoryUsage() { + return (mode == MODE_UNCOMPRESSED) + ? UncompressedLZMA2OutputStream.getMemoryUsage() + : LZMA2OutputStream.getMemoryUsage(this); + } + + public FinishableOutputStream getOutputStream(FinishableOutputStream out) { + if (mode == MODE_UNCOMPRESSED) + return new UncompressedLZMA2OutputStream(out); + + return new LZMA2OutputStream(out, this); + } + + /** + * Gets how much memory the LZMA2 decoder will need to decompress the data + * that was encoded with these options and stored in a .xz file. + *

+ * The returned value may bigger than the value returned by a direct call + * to {@link LZMA2InputStream#getMemoryUsage(int)} if the dictionary size + * is not 2^n or 2^n + 2^(n-1) bytes. This is because the .xz + * headers store the dictionary size in such a format and other values + * are rounded up to the next such value. Such rounding is harmess except + * it might waste some memory if an unsual dictionary size is used. + *

+ * If you use raw LZMA2 streams and unusual dictioanary size, call + * {@link LZMA2InputStream#getMemoryUsage} directly to get raw decoder + * memory requirements. + */ + public int getDecoderMemoryUsage() { + // Round the dictionary size up to the next 2^n or 2^n + 2^(n-1). + int d = dictSize - 1; + d |= d >>> 2; + d |= d >>> 3; + d |= d >>> 4; + d |= d >>> 8; + d |= d >>> 16; + return LZMA2InputStream.getMemoryUsage(d + 1); + } + + public InputStream getInputStream(InputStream in) throws IOException { + return new LZMA2InputStream(in, dictSize); + } + + FilterEncoder getFilterEncoder() { + return new LZMA2Encoder(this); + } + + public Object clone() { + try { + return super.clone(); + } catch (CloneNotSupportedException e) { + assert false; + throw new RuntimeException(); + } + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/LZMA2OutputStream.java b/org.tukani.xz/src/org/tukaani/xz/LZMA2OutputStream.java new file mode 100644 index 00000000..5724d105 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/LZMA2OutputStream.java @@ -0,0 +1,261 @@ +/* + * LZMA2OutputStream + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.DataOutputStream; +import java.io.IOException; +import org.tukaani.xz.lz.LZEncoder; +import org.tukaani.xz.rangecoder.RangeEncoder; +import org.tukaani.xz.lzma.LZMAEncoder; + +class LZMA2OutputStream extends FinishableOutputStream { + static final int COMPRESSED_SIZE_MAX = 64 << 10; + + private FinishableOutputStream out; + private final DataOutputStream outData; + + private final LZEncoder lz; + private final RangeEncoder rc; + private final LZMAEncoder lzma; + + private final int props; // Cannot change props on the fly for now. + private boolean dictResetNeeded = true; + private boolean stateResetNeeded = true; + private boolean propsNeeded = true; + + private int pendingSize = 0; + private boolean finished = false; + private IOException exception = null; + + private final byte[] tempBuf = new byte[1]; + + private static int getExtraSizeBefore(int dictSize) { + return COMPRESSED_SIZE_MAX > dictSize + ? COMPRESSED_SIZE_MAX - dictSize : 0; + } + + static int getMemoryUsage(LZMA2Options options) { + // 64 KiB buffer for the range encoder + a little extra + LZMAEncoder + int dictSize = options.getDictSize(); + int extraSizeBefore = getExtraSizeBefore(dictSize); + return 70 + LZMAEncoder.getMemoryUsage(options.getMode(), + dictSize, extraSizeBefore, + options.getMatchFinder()); + } + + LZMA2OutputStream(FinishableOutputStream out, LZMA2Options options) { + if (out == null) + throw new NullPointerException(); + + this.out = out; + outData = new DataOutputStream(out); + rc = new RangeEncoder(COMPRESSED_SIZE_MAX); + + int dictSize = options.getDictSize(); + int extraSizeBefore = getExtraSizeBefore(dictSize); + lzma = LZMAEncoder.getInstance(rc, + options.getLc(), options.getLp(), options.getPb(), + options.getMode(), + dictSize, extraSizeBefore, options.getNiceLen(), + options.getMatchFinder(), options.getDepthLimit()); + + lz = lzma.getLZEncoder(); + + byte[] presetDict = options.getPresetDict(); + if (presetDict != null && presetDict.length > 0) { + lz.setPresetDict(dictSize, presetDict); + dictResetNeeded = false; + } + + props = (options.getPb() * 5 + options.getLp()) * 9 + options.getLc(); + } + + public void write(int b) throws IOException { + tempBuf[0] = (byte)b; + write(tempBuf, 0, 1); + } + + public void write(byte[] buf, int off, int len) throws IOException { + if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) + throw new IndexOutOfBoundsException(); + + if (exception != null) + throw exception; + + if (finished) + throw new XZIOException("Stream finished or closed"); + + try { + while (len > 0) { + int used = lz.fillWindow(buf, off, len); + off += used; + len -= used; + pendingSize += used; + + if (lzma.encodeForLZMA2()) + writeChunk(); + } + } catch (IOException e) { + exception = e; + throw e; + } + } + + private void writeChunk() throws IOException { + int compressedSize = rc.finish(); + int uncompressedSize = lzma.getUncompressedSize(); + + assert compressedSize > 0 : compressedSize; + assert uncompressedSize > 0 : uncompressedSize; + + // +2 because the header of a compressed chunk is 2 bytes + // bigger than the header of an uncompressed chunk. + if (compressedSize + 2 < uncompressedSize) { + writeLZMA(uncompressedSize, compressedSize); + } else { + lzma.reset(); + uncompressedSize = lzma.getUncompressedSize(); + assert uncompressedSize > 0 : uncompressedSize; + writeUncompressed(uncompressedSize); + } + + pendingSize -= uncompressedSize; + lzma.resetUncompressedSize(); + rc.reset(); + } + + private void writeLZMA(int uncompressedSize, int compressedSize) + throws IOException { + int control; + + if (propsNeeded) { + if (dictResetNeeded) + control = 0x80 + (3 << 5); + else + control = 0x80 + (2 << 5); + } else { + if (stateResetNeeded) + control = 0x80 + (1 << 5); + else + control = 0x80; + } + + control |= (uncompressedSize - 1) >>> 16; + outData.writeByte(control); + + outData.writeShort(uncompressedSize - 1); + outData.writeShort(compressedSize - 1); + + if (propsNeeded) + outData.writeByte(props); + + rc.write(out); + + propsNeeded = false; + stateResetNeeded = false; + dictResetNeeded = false; + } + + private void writeUncompressed(int uncompressedSize) throws IOException { + while (uncompressedSize > 0) { + int chunkSize = Math.min(uncompressedSize, COMPRESSED_SIZE_MAX); + outData.writeByte(dictResetNeeded ? 0x01 : 0x02); + outData.writeShort(chunkSize - 1); + lz.copyUncompressed(out, uncompressedSize, chunkSize); + uncompressedSize -= chunkSize; + dictResetNeeded = false; + } + + stateResetNeeded = true; + } + + private void writeEndMarker() throws IOException { + assert !finished; + + if (exception != null) + throw exception; + + lz.setFinishing(); + + try { + while (pendingSize > 0) { + lzma.encodeForLZMA2(); + writeChunk(); + } + + out.write(0x00); + } catch (IOException e) { + exception = e; + throw e; + } + + finished = true; + } + + public void flush() throws IOException { + if (exception != null) + throw exception; + + if (finished) + throw new XZIOException("Stream finished or closed"); + + try { + lz.setFlushing(); + + while (pendingSize > 0) { + lzma.encodeForLZMA2(); + writeChunk(); + } + + out.flush(); + } catch (IOException e) { + exception = e; + throw e; + } + } + + public void finish() throws IOException { + if (!finished) { + writeEndMarker(); + + try { + out.finish(); + } catch (IOException e) { + exception = e; + throw e; + } + + finished = true; + } + } + + public void close() throws IOException { + if (out != null) { + if (!finished) { + try { + writeEndMarker(); + } catch (IOException e) {} + } + + try { + out.close(); + } catch (IOException e) { + if (exception == null) + exception = e; + } + + out = null; + } + + if (exception != null) + throw exception; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/LZMAInputStream.java b/org.tukani.xz/src/org/tukaani/xz/LZMAInputStream.java new file mode 100644 index 00000000..9bbd2614 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/LZMAInputStream.java @@ -0,0 +1,569 @@ +/* + * LZMAInputStream + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; +import java.io.DataInputStream; +import java.io.IOException; +import org.tukaani.xz.lz.LZDecoder; +import org.tukaani.xz.rangecoder.RangeDecoderFromStream; +import org.tukaani.xz.lzma.LZMADecoder; + +/** + * Decompresses legacy .lzma files and raw LZMA streams (no .lzma header). + *

+ * IMPORTANT: In contrast to other classes in this package, this class + * reads data from its input stream one byte at a time. If the input stream + * is for example {@link java.io.FileInputStream}, wrapping it into + * {@link java.io.BufferedInputStream} tends to improve performance a lot. + * This is not automatically done by this class because there may be use + * cases where it is desired that this class won't read any bytes past + * the end of the LZMA stream. + *

+ * Even when using BufferedInputStream, the performance tends + * to be worse (maybe 10-20 % slower) than with {@link LZMA2InputStream} + * or {@link XZInputStream} (when the .xz file contains LZMA2-compressed data). + * + * @since 1.4 + */ +public class LZMAInputStream extends InputStream { + /** + * Largest dictionary size supported by this implementation. + *

+ * LZMA allows dictionaries up to one byte less than 4 GiB. This + * implementation supports only 16 bytes less than 2 GiB. This + * limitation is due to Java using signed 32-bit integers for array + * indexing. The limitation shouldn't matter much in practice since so + * huge dictionaries are not normally used. + */ + public static final int DICT_SIZE_MAX = Integer.MAX_VALUE & ~15; + + private InputStream in; + private LZDecoder lz; + private RangeDecoderFromStream rc; + private LZMADecoder lzma; + + private boolean endReached = false; + + private final byte[] tempBuf = new byte[1]; + + /** + * Number of uncompressed bytes left to be decompressed, or -1 if + * the end marker is used. + */ + private long remainingSize; + + private IOException exception = null; + + /** + * Gets approximate decompressor memory requirements as kibibytes for + * the given dictionary size and LZMA properties byte (lc, lp, and pb). + * + * @param dictSize LZMA dictionary size as bytes, should be + * in the range [0, + * DICT_SIZE_MAX] + * + * @param propsByte LZMA properties byte that encodes the values + * of lc, lp, and pb + * + * @return approximate memory requirements as kibibytes (KiB) + * + * @throws UnsupportedOptionsException + * if dictSize is outside + * the range [0, + * DICT_SIZE_MAX] + * + * @throws CorruptedInputException + * if propsByte is invalid + */ + public static int getMemoryUsage(int dictSize, byte propsByte) + throws UnsupportedOptionsException, CorruptedInputException { + if (dictSize < 0 || dictSize > DICT_SIZE_MAX) + throw new UnsupportedOptionsException( + "LZMA dictionary is too big for this implementation"); + + int props = propsByte & 0xFF; + if (props > (4 * 5 + 4) * 9 + 8) + throw new CorruptedInputException("Invalid LZMA properties byte"); + + props %= 9 * 5; + int lp = props / 9; + int lc = props - lp * 9; + + return getMemoryUsage(dictSize, lc, lp); + } + + /** + * Gets approximate decompressor memory requirements as kibibytes for + * the given dictionary size, lc, and lp. Note that pb isn't needed. + * + * @param dictSize LZMA dictionary size as bytes, must be + * in the range [0, + * DICT_SIZE_MAX] + * + * @param lc number of literal context bits, must be + * in the range [0, 8] + * + * @param lp number of literal position bits, must be + * in the range [0, 4] + * + * @return approximate memory requirements as kibibytes (KiB) + */ + public static int getMemoryUsage(int dictSize, int lc, int lp) { + if (lc < 0 || lc > 8 || lp < 0 || lp > 4) + throw new IllegalArgumentException("Invalid lc or lp"); + + // Probability variables have the type "short". There are + // 0x300 (768) probability variables in each literal subcoder. + // The number of literal subcoders is 2^(lc + lp). + // + // Roughly 10 KiB for the base state + LZ decoder's dictionary buffer + // + sizeof(short) * number probability variables per literal subcoder + // * number of literal subcoders + return 10 + getDictSize(dictSize) / 1024 + + ((2 * 0x300) << (lc + lp)) / 1024; + } + + private static int getDictSize(int dictSize) { + if (dictSize < 0 || dictSize > DICT_SIZE_MAX) + throw new IllegalArgumentException( + "LZMA dictionary is too big for this implementation"); + + // For performance reasons, use a 4 KiB dictionary if something + // smaller was requested. It's a rare situation and the performance + // difference isn't huge, and it starts to matter mostly when the + // dictionary is just a few bytes. But we need to handle the special + // case of dictSize == 0 anyway, which is an allowed value but in + // practice means one-byte dictionary. + // + // Note that using a dictionary bigger than specified in the headers + // can hide errors if there is a reference to data beyond the original + // dictionary size but is still within 4 KiB. + if (dictSize < 4096) + dictSize = 4096; + + // Round dictionary size upward to a multiple of 16. This way LZMA + // can use LZDecoder.getPos() for calculating LZMA's posMask. + return (dictSize + 15) & ~15; + } + + /** + * Creates a new .lzma file format decompressor without + * a memory usage limit. + * + * @param in input stream from which .lzma data is read; + * it might be a good idea to wrap it in + * BufferedInputStream, see the + * note at the top of this page + * + * @throws CorruptedInputException + * file is corrupt or perhaps not in + * the .lzma format at all + * + * @throws UnsupportedOptionsException + * dictionary size or uncompressed size is too + * big for this implementation + * + * @throws EOFException + * file is truncated or perhaps not in + * the .lzma format at all + * + * @throws IOException may be thrown by in + */ + public LZMAInputStream(InputStream in) throws IOException { + this(in, -1); + } + + /** + * Creates a new .lzma file format decompressor with an optional + * memory usage limit. + * + * @param in input stream from which .lzma data is read; + * it might be a good idea to wrap it in + * BufferedInputStream, see the + * note at the top of this page + * + * @param memoryLimit memory usage limit in kibibytes (KiB) + * or -1 to impose no + * memory usage limit + * + * @throws CorruptedInputException + * file is corrupt or perhaps not in + * the .lzma format at all + * + * @throws UnsupportedOptionsException + * dictionary size or uncompressed size is too + * big for this implementation + * + * @throws MemoryLimitException + * memory usage limit was exceeded + * + * @throws EOFException + * file is truncated or perhaps not in + * the .lzma format at all + * + * @throws IOException may be thrown by in + */ + public LZMAInputStream(InputStream in, int memoryLimit) + throws IOException { + DataInputStream inData = new DataInputStream(in); + + // Properties byte (lc, lp, and pb) + byte propsByte = inData.readByte(); + + // Dictionary size is an unsigned 32-bit little endian integer. + int dictSize = 0; + for (int i = 0; i < 4; ++i) + dictSize |= inData.readUnsignedByte() << (8 * i); + + // Uncompressed size is an unsigned 64-bit little endian integer. + // The maximum 64-bit value is a special case (becomes -1 here) + // which indicates that the end marker is used instead of knowing + // the uncompressed size beforehand. + long uncompSize = 0; + for (int i = 0; i < 8; ++i) + uncompSize |= (long)inData.readUnsignedByte() << (8 * i); + + // Check the memory usage limit. + int memoryNeeded = getMemoryUsage(dictSize, propsByte); + if (memoryLimit != -1 && memoryNeeded > memoryLimit) + throw new MemoryLimitException(memoryNeeded, memoryLimit); + + initialize(in, uncompSize, propsByte, dictSize, null); + } + + /** + * Creates a new input stream that decompresses raw LZMA data (no .lzma + * header) from in. + *

+ * The caller needs to know if the "end of payload marker (EOPM)" alias + * "end of stream marker (EOS marker)" alias "end marker" present. + * If the end marker isn't used, the caller must know the exact + * uncompressed size of the stream. + *

+ * The caller also needs to provide the LZMA properties byte that encodes + * the number of literal context bits (lc), literal position bits (lp), + * and position bits (pb). + *

+ * The dictionary size used when compressing is also needed. Specifying + * a too small dictionary size will prevent decompressing the stream. + * Specifying a too big dictionary is waste of memory but decompression + * will work. + *

+ * There is no need to specify a dictionary bigger than + * the uncompressed size of the data even if a bigger dictionary + * was used when compressing. If you know the uncompressed size + * of the data, this might allow saving some memory. + * + * @param in input stream from which compressed + * data is read + * + * @param uncompSize uncompressed size of the LZMA stream or -1 + * if the end marker is used in the LZMA stream + * + * @param propsByte LZMA properties byte that has the encoded + * values for literal context bits (lc), literal + * position bits (lp), and position bits (pb) + * + * @param dictSize dictionary size as bytes, must be in the range + * [0, DICT_SIZE_MAX] + * + * @throws CorruptedInputException + * if propsByte is invalid or + * the first input byte is not 0x00 + * + * @throws UnsupportedOptionsException + * dictionary size or uncompressed size is too + * big for this implementation + * + * + */ + public LZMAInputStream(InputStream in, long uncompSize, byte propsByte, + int dictSize) throws IOException { + initialize(in, uncompSize, propsByte, dictSize, null); + } + + /** + * Creates a new input stream that decompresses raw LZMA data (no .lzma + * header) from in optionally with a preset dictionary. + * + * @param in input stream from which LZMA-compressed + * data is read + * + * @param uncompSize uncompressed size of the LZMA stream or -1 + * if the end marker is used in the LZMA stream + * + * @param propsByte LZMA properties byte that has the encoded + * values for literal context bits (lc), literal + * position bits (lp), and position bits (pb) + * + * @param dictSize dictionary size as bytes, must be in the range + * [0, DICT_SIZE_MAX] + * + * @param presetDict preset dictionary or null + * to use no preset dictionary + * + * @throws CorruptedInputException + * if propsByte is invalid or + * the first input byte is not 0x00 + * + * @throws UnsupportedOptionsException + * dictionary size or uncompressed size is too + * big for this implementation + * + * @throws EOFException file is truncated or corrupt + * + * @throws IOException may be thrown by in + */ + public LZMAInputStream(InputStream in, long uncompSize, byte propsByte, + int dictSize, byte[] presetDict) + throws IOException { + initialize(in, uncompSize, propsByte, dictSize, presetDict); + } + + /** + * Creates a new input stream that decompresses raw LZMA data (no .lzma + * header) from in optionally with a preset dictionary. + * + * @param in input stream from which LZMA-compressed + * data is read + * + * @param uncompSize uncompressed size of the LZMA stream or -1 + * if the end marker is used in the LZMA stream + * + * @param lc number of literal context bits, must be + * in the range [0, 8] + * + * @param lp number of literal position bits, must be + * in the range [0, 4] + * + * @param pb number position bits, must be + * in the range [0, 4] + * + * @param dictSize dictionary size as bytes, must be in the range + * [0, DICT_SIZE_MAX] + * + * @param presetDict preset dictionary or null + * to use no preset dictionary + * + * @throws CorruptedInputException + * if the first input byte is not 0x00 + * + * @throws EOFException file is truncated or corrupt + * + * @throws IOException may be thrown by in + */ + public LZMAInputStream(InputStream in, long uncompSize, + int lc, int lp, int pb, + int dictSize, byte[] presetDict) + throws IOException { + initialize(in, uncompSize, lc, lp, pb, dictSize, presetDict); + } + + private void initialize(InputStream in, long uncompSize, byte propsByte, + int dictSize, byte[] presetDict) + throws IOException { + // Validate the uncompressed size since the other "initialize" throws + // IllegalArgumentException if uncompSize < -1. + if (uncompSize < -1) + throw new UnsupportedOptionsException( + "Uncompressed size is too big"); + + // Decode the properties byte. In contrast to LZMA2, there is no + // limit of lc + lp <= 4. + int props = propsByte & 0xFF; + if (props > (4 * 5 + 4) * 9 + 8) + throw new CorruptedInputException("Invalid LZMA properties byte"); + + int pb = props / (9 * 5); + props -= pb * 9 * 5; + int lp = props / 9; + int lc = props - lp * 9; + + // Validate the dictionary size since the other "initialize" throws + // IllegalArgumentException if dictSize is not supported. + if (dictSize < 0 || dictSize > DICT_SIZE_MAX) + throw new UnsupportedOptionsException( + "LZMA dictionary is too big for this implementation"); + + initialize(in, uncompSize, lc, lp, pb, dictSize, presetDict); + } + + private void initialize(InputStream in, long uncompSize, + int lc, int lp, int pb, + int dictSize, byte[] presetDict) + throws IOException { + // getDictSize validates dictSize and gives a message in + // the exception too, so skip validating dictSize here. + if (uncompSize < -1 || lc < 0 || lc > 8 || lp < 0 || lp > 4 + || pb < 0 || pb > 4) + throw new IllegalArgumentException(); + + this.in = in; + + // If uncompressed size is known, use it to avoid wasting memory for + // a uselessly large dictionary buffer. + dictSize = getDictSize(dictSize); + if (uncompSize >= 0 && dictSize > uncompSize) + dictSize = getDictSize((int)uncompSize); + + lz = new LZDecoder(getDictSize(dictSize), presetDict); + rc = new RangeDecoderFromStream(in); + lzma = new LZMADecoder(lz, rc, lc, lp, pb); + remainingSize = uncompSize; + } + + /** + * Decompresses the next byte from this input stream. + *

+ * Reading lots of data with read() from this input stream + * may be inefficient. Wrap it in java.io.BufferedInputStream + * if you need to read lots of data one byte at a time. + * + * @return the next decompressed byte, or -1 + * to indicate the end of the compressed stream + * + * @throws CorruptedInputException + * + * @throws XZIOException if the stream has been closed + * + * @throws EOFException + * compressed input is truncated or corrupt + * + * @throws IOException may be thrown by in + */ + public int read() throws IOException { + return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF); + } + + /** + * Decompresses into an array of bytes. + *

+ * If len is zero, no bytes are read and 0 + * is returned. Otherwise this will block until len + * bytes have been decompressed, the end of the LZMA stream is reached, + * or an exception is thrown. + * + * @param buf target buffer for uncompressed data + * @param off start offset in buf + * @param len maximum number of uncompressed bytes to read + * + * @return number of bytes read, or -1 to indicate + * the end of the compressed stream + * + * @throws CorruptedInputException + * + * @throws XZIOException if the stream has been closed + * + * @throws EOFException compressed input is truncated or corrupt + * + * @throws IOException may be thrown by in + */ + public int read(byte[] buf, int off, int len) throws IOException { + if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) + throw new IndexOutOfBoundsException(); + + if (len == 0) + return 0; + + if (in == null) + throw new XZIOException("Stream closed"); + + if (exception != null) + throw exception; + + if (endReached) + return -1; + + try { + int size = 0; + + while (len > 0) { + // If uncompressed size is known and thus no end marker will + // be present, set the limit so that the uncompressed size + // won't be exceeded. + int copySizeMax = len; + if (remainingSize >= 0 && remainingSize < len) + copySizeMax = (int)remainingSize; + + lz.setLimit(copySizeMax); + + // Decode into the dictionary buffer. + try { + lzma.decode(); + } catch (CorruptedInputException e) { + // The end marker is encoded with a LZMA symbol that + // indicates maximum match distance. This is larger + // than any supported dictionary and thus causes + // CorruptedInputException from LZDecoder.repeat. + if (remainingSize != -1 || !lzma.endMarkerDetected()) + throw e; + + endReached = true; + + // The exception makes lzma.decode() miss the last range + // decoder normalization, so do it here. This might + // cause an IOException if it needs to read a byte + // from the input stream. + rc.normalize(); + } + + // Copy from the dictionary to buf. + int copiedSize = lz.flush(buf, off); + off += copiedSize; + len -= copiedSize; + size += copiedSize; + + if (remainingSize >= 0) { + // Update the number of bytes left to be decompressed. + remainingSize -= copiedSize; + assert remainingSize >= 0; + + if (remainingSize == 0) + endReached = true; + } + + if (endReached) { + // Checking these helps a lot when catching corrupt + // or truncated .lzma files. LZMA Utils doesn't do + // the first check and thus it accepts many invalid + // files that this implementation and XZ Utils don't. + if (!rc.isFinished() || lz.hasPending()) + throw new CorruptedInputException(); + + return size == 0 ? -1 : size; + } + } + + return size; + + } catch (IOException e) { + exception = e; + throw e; + } + } + + /** + * Closes the stream and calls in.close(). + * If the stream was already closed, this does nothing. + * + * @throws IOException if thrown by in.close() + */ + public void close() throws IOException { + if (in != null) { + try { + in.close(); + } finally { + in = null; + } + } + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/MemoryLimitException.java b/org.tukani.xz/src/org/tukaani/xz/MemoryLimitException.java new file mode 100644 index 00000000..9d766bd7 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/MemoryLimitException.java @@ -0,0 +1,60 @@ +/* + * MemoryLimitException + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +/** + * Thrown when the memory usage limit given to the XZ decompressor + * would be exceeded. + *

+ * The amount of memory required and the memory usage limit are + * included in the error detail message in human readable format. + */ +public class MemoryLimitException extends XZIOException { + private static final long serialVersionUID = 3L; + + private final int memoryNeeded; + private final int memoryLimit; + + /** + * Creates a new MemoryLimitException. + *

+ * The amount of memory needed and the memory usage limit are + * included in the error detail message. + * + * @param memoryNeeded amount of memory needed as kibibytes (KiB) + * @param memoryLimit specified memory usage limit as kibibytes (KiB) + */ + public MemoryLimitException(int memoryNeeded, int memoryLimit) { + super("" + memoryNeeded + " KiB of memory would be needed; limit was " + + memoryLimit + " KiB"); + + this.memoryNeeded = memoryNeeded; + this.memoryLimit = memoryLimit; + } + + /** + * Gets how much memory is required to decompress the data. + * + * @return amount of memory needed as kibibytes (KiB) + */ + public int getMemoryNeeded() { + return memoryNeeded; + } + + /** + * Gets what the memory usage limit was at the time the exception + * was created. + * + * @return memory usage limit as kibibytes (KiB) + */ + public int getMemoryLimit() { + return memoryLimit; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/PowerPCOptions.java b/org.tukani.xz/src/org/tukaani/xz/PowerPCOptions.java new file mode 100644 index 00000000..f36d3618 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/PowerPCOptions.java @@ -0,0 +1,36 @@ +/* + * PowerPCOptions + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; +import org.tukaani.xz.simple.PowerPC; + +/** + * BCJ filter for big endian PowerPC instructions. + */ +public class PowerPCOptions extends BCJOptions { + private static final int ALIGNMENT = 4; + + public PowerPCOptions() { + super(ALIGNMENT); + } + + public FinishableOutputStream getOutputStream(FinishableOutputStream out) { + return new SimpleOutputStream(out, new PowerPC(true, startOffset)); + } + + public InputStream getInputStream(InputStream in) { + return new SimpleInputStream(in, new PowerPC(false, startOffset)); + } + + FilterEncoder getFilterEncoder() { + return new BCJEncoder(this, BCJCoder.POWERPC_FILTER_ID); + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/RawCoder.java b/org.tukani.xz/src/org/tukaani/xz/RawCoder.java new file mode 100644 index 00000000..12c7da8f --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/RawCoder.java @@ -0,0 +1,33 @@ +/* + * RawCoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +class RawCoder { + static void validate(FilterCoder[] filters) + throws UnsupportedOptionsException { + for (int i = 0; i < filters.length - 1; ++i) + if (!filters[i].nonLastOK()) + throw new UnsupportedOptionsException( + "Unsupported XZ filter chain"); + + if (!filters[filters.length - 1].lastOK()) + throw new UnsupportedOptionsException( + "Unsupported XZ filter chain"); + + int changesSizeCount = 0; + for (int i = 0; i < filters.length; ++i) + if (filters[i].changesSize()) + ++changesSizeCount; + + if (changesSizeCount > 3) + throw new UnsupportedOptionsException( + "Unsupported XZ filter chain"); + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/SPARCOptions.java b/org.tukani.xz/src/org/tukaani/xz/SPARCOptions.java new file mode 100644 index 00000000..0f20677c --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/SPARCOptions.java @@ -0,0 +1,36 @@ +/* + * SPARCOptions + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; +import org.tukaani.xz.simple.SPARC; + +/** + * BCJ filter for SPARC. + */ +public class SPARCOptions extends BCJOptions { + private static final int ALIGNMENT = 4; + + public SPARCOptions() { + super(ALIGNMENT); + } + + public FinishableOutputStream getOutputStream(FinishableOutputStream out) { + return new SimpleOutputStream(out, new SPARC(true, startOffset)); + } + + public InputStream getInputStream(InputStream in) { + return new SimpleInputStream(in, new SPARC(false, startOffset)); + } + + FilterEncoder getFilterEncoder() { + return new BCJEncoder(this, BCJCoder.SPARC_FILTER_ID); + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/SeekableFileInputStream.java b/org.tukani.xz/src/org/tukaani/xz/SeekableFileInputStream.java new file mode 100644 index 00000000..fe2d685f --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/SeekableFileInputStream.java @@ -0,0 +1,102 @@ +/* + * SeekableFileInputStream + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.File; +import java.io.RandomAccessFile; +import java.io.IOException; +import java.io.FileNotFoundException; + +/** + * Wraps a {@link java.io.RandomAccessFile RandomAccessFile} + * in a SeekableInputStream. + */ +public class SeekableFileInputStream extends SeekableInputStream { + /** + * The RandomAccessFile that has been wrapped + * into a SeekableFileInputStream. + */ + protected RandomAccessFile randomAccessFile; + + /** + * Creates a new seekable input stream that reads from the specified file. + */ + public SeekableFileInputStream(File file) throws FileNotFoundException { + randomAccessFile = new RandomAccessFile(file, "r"); + } + + /** + * Creates a new seekable input stream that reads from a file with + * the specified name. + */ + public SeekableFileInputStream(String name) throws FileNotFoundException { + randomAccessFile = new RandomAccessFile(name, "r"); + } + + /** + * Creates a new seekable input stream from an existing + * RandomAccessFile object. + */ + public SeekableFileInputStream(RandomAccessFile randomAccessFile) { + this.randomAccessFile = randomAccessFile; + } + + /** + * Calls {@link RandomAccessFile#read() randomAccessFile.read()}. + */ + public int read() throws IOException { + return randomAccessFile.read(); + } + + /** + * Calls {@link RandomAccessFile#read(byte[]) randomAccessFile.read(buf)}. + */ + public int read(byte[] buf) throws IOException { + return randomAccessFile.read(buf); + } + + /** + * Calls + * {@link RandomAccessFile#read(byte[],int,int) + * randomAccessFile.read(buf, off, len)}. + */ + public int read(byte[] buf, int off, int len) throws IOException { + return randomAccessFile.read(buf, off, len); + } + + /** + * Calls {@link RandomAccessFile#close() randomAccessFile.close()}. + */ + public void close() throws IOException { + randomAccessFile.close(); + } + + /** + * Calls {@link RandomAccessFile#length() randomAccessFile.length()}. + */ + public long length() throws IOException { + return randomAccessFile.length(); + } + + /** + * Calls {@link RandomAccessFile#getFilePointer() + randomAccessFile.getFilePointer()}. + */ + public long position() throws IOException { + return randomAccessFile.getFilePointer(); + } + + /** + * Calls {@link RandomAccessFile#seek(long) randomAccessFile.seek(long)}. + */ + public void seek(long pos) throws IOException { + randomAccessFile.seek(pos); + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/SeekableInputStream.java b/org.tukani.xz/src/org/tukaani/xz/SeekableInputStream.java new file mode 100644 index 00000000..a2f908ab --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/SeekableInputStream.java @@ -0,0 +1,81 @@ +/* + * SeekableInputStream + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; +import java.io.IOException; + +/** + * Input stream with random access support. + */ +public abstract class SeekableInputStream extends InputStream { + /** + * Seeks n bytes forward in this stream. + *

+ * This will not seek past the end of the file. If the current position + * is already at or past the end of the file, this doesn't seek at all + * and returns 0. Otherwise, if skipping n bytes + * would cause the position to exceed the stream size, this will do + * equivalent of seek(length()) and the return value will + * be adjusted accordingly. + *

+ * If n is negative, the position isn't changed and + * the return value is 0. It doesn't seek backward + * because it would conflict with the specification of + * {@link java.io.InputStream#skip(long) InputStream.skip}. + * + * @return 0 if n is negative, + * less than n if skipping n + * bytes would seek past the end of the file, + * n otherwise + * + * @throws IOException might be thrown by {@link #seek(long)} + */ + public long skip(long n) throws IOException { + if (n <= 0) + return 0; + + long size = length(); + long pos = position(); + if (pos >= size) + return 0; + + if (size - pos < n) + n = size - pos; + + seek(pos + n); + return n; + } + + /** + * Gets the size of the stream. + */ + public abstract long length() throws IOException; + + /** + * Gets the current position in the stream. + */ + public abstract long position() throws IOException; + + /** + * Seeks to the specified absolute position in the stream. + *

+ * Seeking past the end of the file should be supported by the subclasses + * unless there is a good reason to do otherwise. If one has seeked + * past the end of the stream, read will return + * -1 to indicate end of stream. + * + * @param pos new read position in the stream + * + * @throws IOException if pos is negative or if + * a stream-specific I/O error occurs + */ + public abstract void seek(long pos) throws IOException; +} diff --git a/org.tukani.xz/src/org/tukaani/xz/SeekableXZInputStream.java b/org.tukani.xz/src/org/tukaani/xz/SeekableXZInputStream.java new file mode 100644 index 00000000..eb5573e6 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/SeekableXZInputStream.java @@ -0,0 +1,896 @@ +/* + * SeekableXZInputStream + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.util.Arrays; +import java.util.ArrayList; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.EOFException; +import org.tukaani.xz.common.DecoderUtil; +import org.tukaani.xz.common.StreamFlags; +import org.tukaani.xz.check.Check; +import org.tukaani.xz.index.IndexDecoder; +import org.tukaani.xz.index.BlockInfo; + +/** + * Decompresses a .xz file in random access mode. + * This supports decompressing concatenated .xz files. + *

+ * Each .xz file consist of one or more Streams. Each Stream consist of zero + * or more Blocks. Each Stream contains an Index of Streams' Blocks. + * The Indexes from all Streams are loaded in RAM by a constructor of this + * class. A typical .xz file has only one Stream, and parsing its Index will + * need only three or four seeks. + *

+ * To make random access possible, the data in a .xz file must be splitted + * into multiple Blocks of reasonable size. Decompression can only start at + * a Block boundary. When seeking to an uncompressed position that is not at + * a Block boundary, decompression starts at the beginning of the Block and + * throws away data until the target position is reached. Thus, smaller Blocks + * mean faster seeks to arbitrary uncompressed positions. On the other hand, + * smaller Blocks mean worse compression. So one has to make a compromise + * between random access speed and compression ratio. + *

+ * Implementation note: This class uses linear search to locate the correct + * Stream from the data structures in RAM. It was the simplest to implement + * and should be fine as long as there aren't too many Streams. The correct + * Block inside a Stream is located using binary search and thus is fast + * even with a huge number of Blocks. + * + *

Memory usage

+ *

+ * The amount of memory needed for the Indexes is taken into account when + * checking the memory usage limit. Each Stream is calculated to need at + * least 1 KiB of memory and each Block 16 bytes of memory, rounded up + * to the next kibibyte. So unless the file has a huge number of Streams or + * Blocks, these don't take significant amount of memory. + * + *

Creating random-accessible .xz files

+ *

+ * When using {@link XZOutputStream}, a new Block can be started by calling + * its {@link XZOutputStream#endBlock() endBlock} method. If you know + * that the decompressor will only need to seek to certain uncompressed + * positions, it can be a good idea to start a new Block at (some of) these + * positions (and only at these positions to get better compression ratio). + *

+ * liblzma in XZ Utils supports starting a new Block with + * LZMA_FULL_FLUSH. XZ Utils 5.1.1alpha added threaded + * compression which creates multi-Block .xz files. XZ Utils 5.1.1alpha + * also added the option --block-size=SIZE to the xz command + * line tool. XZ Utils 5.1.2alpha added a partial implementation of + * --block-list=SIZES which allows specifying sizes of + * individual Blocks. + * + * @see SeekableFileInputStream + * @see XZInputStream + * @see XZOutputStream + */ +public class SeekableXZInputStream extends SeekableInputStream { + /** + * The input stream containing XZ compressed data. + */ + private SeekableInputStream in; + + /** + * Memory usage limit after the memory usage of the IndexDecoders have + * been substracted. + */ + private final int memoryLimit; + + /** + * Memory usage of the IndexDecoders. + * memoryLimit + indexMemoryUsage equals the original + * memory usage limit that was passed to the constructor. + */ + private int indexMemoryUsage = 0; + + /** + * List of IndexDecoders, one for each Stream in the file. + * The list is in reverse order: The first element is + * the last Stream in the file. + */ + private final ArrayList streams = new ArrayList(); + + /** + * Bitmask of all Check IDs seen. + */ + private int checkTypes = 0; + + /** + * Uncompressed size of the file (all Streams). + */ + private long uncompressedSize = 0; + + /** + * Uncompressed size of the largest XZ Block in the file. + */ + private long largestBlockSize = 0; + + /** + * Number of XZ Blocks in the file. + */ + private int blockCount = 0; + + /** + * Size and position information about the current Block. + * If there are no Blocks, all values will be -1. + */ + private final BlockInfo curBlockInfo; + + /** + * Temporary (and cached) information about the Block whose information + * is queried via getBlockPos and related functions. + */ + private final BlockInfo queriedBlockInfo; + + /** + * Integrity Check in the current XZ Stream. The constructor leaves + * this to point to the Check of the first Stream. + */ + private Check check; + + /** + * Decoder of the current XZ Block, if any. + */ + private BlockInputStream blockDecoder = null; + + /** + * Current uncompressed position. + */ + private long curPos = 0; + + /** + * Target position for seeking. + */ + private long seekPos; + + /** + * True when seek(long) has been called but the actual + * seeking hasn't been done yet. + */ + private boolean seekNeeded = false; + + /** + * True when end of the file was reached. This can be cleared by + * calling seek(long). + */ + private boolean endReached = false; + + /** + * Pending exception from an earlier error. + */ + private IOException exception = null; + + /** + * Temporary buffer for read(). This avoids reallocating memory + * on every read() call. + */ + private final byte[] tempBuf = new byte[1]; + + /** + * Creates a new seekable XZ decompressor without a memory usage limit. + * + * @param in seekable input stream containing one or more + * XZ Streams; the whole input stream is used + * + * @throws XZFormatException + * input is not in the XZ format + * + * @throws CorruptedInputException + * XZ data is corrupt or truncated + * + * @throws UnsupportedOptionsException + * XZ headers seem valid but they specify + * options not supported by this implementation + * + * @throws EOFException + * less than 6 bytes of input was available + * from in, or (unlikely) the size + * of the underlying stream got smaller while + * this was reading from it + * + * @throws IOException may be thrown by in + */ + public SeekableXZInputStream(SeekableInputStream in) + throws IOException { + this(in, -1); + } + + /** + * Creates a new seekable XZ decomporessor with an optional + * memory usage limit. + * + * @param in seekable input stream containing one or more + * XZ Streams; the whole input stream is used + * + * @param memoryLimit memory usage limit in kibibytes (KiB) + * or -1 to impose no + * memory usage limit + * + * @throws XZFormatException + * input is not in the XZ format + * + * @throws CorruptedInputException + * XZ data is corrupt or truncated + * + * @throws UnsupportedOptionsException + * XZ headers seem valid but they specify + * options not supported by this implementation + * + * @throws MemoryLimitException + * decoded XZ Indexes would need more memory + * than allowed by the memory usage limit + * + * @throws EOFException + * less than 6 bytes of input was available + * from in, or (unlikely) the size + * of the underlying stream got smaller while + * this was reading from it + * + * @throws IOException may be thrown by in + */ + public SeekableXZInputStream(SeekableInputStream in, int memoryLimit) + throws IOException { + this.in = in; + DataInputStream inData = new DataInputStream(in); + + // Check the magic bytes in the beginning of the file. + { + in.seek(0); + byte[] buf = new byte[XZ.HEADER_MAGIC.length]; + inData.readFully(buf); + if (!Arrays.equals(buf, XZ.HEADER_MAGIC)) + throw new XZFormatException(); + } + + // Get the file size and verify that it is a multiple of 4 bytes. + long pos = in.length(); + if ((pos & 3) != 0) + throw new CorruptedInputException( + "XZ file size is not a multiple of 4 bytes"); + + // Parse the headers starting from the end of the file. + byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE]; + long streamPadding = 0; + + while (pos > 0) { + if (pos < DecoderUtil.STREAM_HEADER_SIZE) + throw new CorruptedInputException(); + + // Read the potential Stream Footer. + in.seek(pos - DecoderUtil.STREAM_HEADER_SIZE); + inData.readFully(buf); + + // Skip Stream Padding four bytes at a time. + // Skipping more at once would be faster, + // but usually there isn't much Stream Padding. + if (buf[8] == 0x00 && buf[9] == 0x00 && buf[10] == 0x00 + && buf[11] == 0x00) { + streamPadding += 4; + pos -= 4; + continue; + } + + // It's not Stream Padding. Update pos. + pos -= DecoderUtil.STREAM_HEADER_SIZE; + + // Decode the Stream Footer and check if Backward Size + // looks reasonable. + StreamFlags streamFooter = DecoderUtil.decodeStreamFooter(buf); + if (streamFooter.backwardSize >= pos) + throw new CorruptedInputException( + "Backward Size in XZ Stream Footer is too big"); + + // Check that the Check ID is supported. Store it in case this + // is the first Stream in the file. + check = Check.getInstance(streamFooter.checkType); + + // Remember which Check IDs have been seen. + checkTypes |= 1 << streamFooter.checkType; + + // Seek to the beginning of the Index. + in.seek(pos - streamFooter.backwardSize); + + // Decode the Index field. + IndexDecoder index; + try { + index = new IndexDecoder(in, streamFooter, streamPadding, + memoryLimit); + } catch (MemoryLimitException e) { + // IndexDecoder doesn't know how much memory we had + // already needed so we need to recreate the exception. + assert memoryLimit >= 0; + throw new MemoryLimitException( + e.getMemoryNeeded() + indexMemoryUsage, + memoryLimit + indexMemoryUsage); + } + + // Update the memory usage and limit counters. + indexMemoryUsage += index.getMemoryUsage(); + if (memoryLimit >= 0) { + memoryLimit -= index.getMemoryUsage(); + assert memoryLimit >= 0; + } + + // Remember the uncompressed size of the largest Block. + if (largestBlockSize < index.getLargestBlockSize()) + largestBlockSize = index.getLargestBlockSize(); + + // Calculate the offset to the beginning of this XZ Stream and + // check that it looks sane. + long off = index.getStreamSize() - DecoderUtil.STREAM_HEADER_SIZE; + if (pos < off) + throw new CorruptedInputException("XZ Index indicates " + + "too big compressed size for the XZ Stream"); + + // Seek to the beginning of this Stream. + pos -= off; + in.seek(pos); + + // Decode the Stream Header. + inData.readFully(buf); + StreamFlags streamHeader = DecoderUtil.decodeStreamHeader(buf); + + // Verify that the Stream Header matches the Stream Footer. + if (!DecoderUtil.areStreamFlagsEqual(streamHeader, streamFooter)) + throw new CorruptedInputException( + "XZ Stream Footer does not match Stream Header"); + + // Update the total uncompressed size of the file and check that + // it doesn't overflow. + uncompressedSize += index.getUncompressedSize(); + if (uncompressedSize < 0) + throw new UnsupportedOptionsException("XZ file is too big"); + + // Update the Block count and check that it fits into an int. + blockCount += index.getRecordCount(); + if (blockCount < 0) + throw new UnsupportedOptionsException( + "XZ file has over " + Integer.MAX_VALUE + " Blocks"); + + // Add this Stream to the list of Streams. + streams.add(index); + + // Reset to be ready to parse the next Stream. + streamPadding = 0; + } + + assert pos == 0; + + // Save it now that indexMemoryUsage has been substracted from it. + this.memoryLimit = memoryLimit; + + // Store the relative offsets of the Streams. This way we don't + // need to recalculate them in this class when seeking; the + // IndexDecoder instances will handle them. + IndexDecoder prev = (IndexDecoder)streams.get(streams.size() - 1); + for (int i = streams.size() - 2; i >= 0; --i) { + IndexDecoder cur = (IndexDecoder)streams.get(i); + cur.setOffsets(prev); + prev = cur; + } + + // Initialize curBlockInfo to point to the first Stream. + // The blockNumber will be left to -1 so that .hasNext() + // and .setNext() work to get the first Block when starting + // to decompress from the beginning of the file. + IndexDecoder first = (IndexDecoder)streams.get(streams.size() - 1); + curBlockInfo = new BlockInfo(first); + + // queriedBlockInfo needs to be allocated too. The Stream used for + // initialization doesn't matter though. + queriedBlockInfo = new BlockInfo(first); + } + + /** + * Gets the types of integrity checks used in the .xz file. + * Multiple checks are possible only if there are multiple + * concatenated XZ Streams. + *

+ * The returned value has a bit set for every check type that is present. + * For example, if CRC64 and SHA-256 were used, the return value is + * (1 << XZ.CHECK_CRC64) + * | (1 << XZ.CHECK_SHA256). + */ + public int getCheckTypes() { + return checkTypes; + } + + /** + * Gets the amount of memory in kibibytes (KiB) used by + * the data structures needed to locate the XZ Blocks. + * This is usually useless information but since it is calculated + * for memory usage limit anyway, it is nice to make it available to too. + */ + public int getIndexMemoryUsage() { + return indexMemoryUsage; + } + + /** + * Gets the uncompressed size of the largest XZ Block in bytes. + * This can be useful if you want to check that the file doesn't + * have huge XZ Blocks which could make seeking to arbitrary offsets + * very slow. Note that huge Blocks don't automatically mean that + * seeking would be slow, for example, seeking to the beginning of + * any Block is always fast. + */ + public long getLargestBlockSize() { + return largestBlockSize; + } + + /** + * Gets the number of Streams in the .xz file. + * + * @since 1.3 + */ + public int getStreamCount() { + return streams.size(); + } + + /** + * Gets the number of Blocks in the .xz file. + * + * @since 1.3 + */ + public int getBlockCount() { + return blockCount; + } + + /** + * Gets the uncompressed start position of the given Block. + * + * @throws IndexOutOfBoundsException if + * blockNumber < 0 or + * blockNumber >= getBlockCount(). + * + * @since 1.3 + */ + public long getBlockPos(int blockNumber) { + locateBlockByNumber(queriedBlockInfo, blockNumber); + return queriedBlockInfo.uncompressedOffset; + } + + /** + * Gets the uncompressed size of the given Block. + * + * @throws IndexOutOfBoundsException if + * blockNumber < 0 or + * blockNumber >= getBlockCount(). + * + * @since 1.3 + */ + public long getBlockSize(int blockNumber) { + locateBlockByNumber(queriedBlockInfo, blockNumber); + return queriedBlockInfo.uncompressedSize; + } + + /** + * Gets the position where the given compressed Block starts in + * the underlying .xz file. + * This information is rarely useful to the users of this class. + * + * @throws IndexOutOfBoundsException if + * blockNumber < 0 or + * blockNumber >= getBlockCount(). + * + * @since 1.3 + */ + public long getBlockCompPos(int blockNumber) { + locateBlockByNumber(queriedBlockInfo, blockNumber); + return queriedBlockInfo.compressedOffset; + } + + /** + * Gets the compressed size of the given Block. + * This together with the uncompressed size can be used to calculate + * the compression ratio of the specific Block. + * + * @throws IndexOutOfBoundsException if + * blockNumber < 0 or + * blockNumber >= getBlockCount(). + * + * @since 1.3 + */ + public long getBlockCompSize(int blockNumber) { + locateBlockByNumber(queriedBlockInfo, blockNumber); + return (queriedBlockInfo.unpaddedSize + 3) & ~3; + } + + /** + * Gets integrity check type (Check ID) of the given Block. + * + * @throws IndexOutOfBoundsException if + * blockNumber < 0 or + * blockNumber >= getBlockCount(). + * + * @see #getCheckTypes() + * + * @since 1.3 + */ + public int getBlockCheckType(int blockNumber) { + locateBlockByNumber(queriedBlockInfo, blockNumber); + return queriedBlockInfo.getCheckType(); + } + + /** + * Gets the number of the Block that contains the byte at the given + * uncompressed position. + * + * @throws IndexOutOfBoundsException if + * pos < 0 or + * pos >= length(). + * + * @since 1.3 + */ + public int getBlockNumber(long pos) { + locateBlockByPos(queriedBlockInfo, pos); + return queriedBlockInfo.blockNumber; + } + + /** + * Decompresses the next byte from this input stream. + * + * @return the next decompressed byte, or -1 + * to indicate the end of the compressed stream + * + * @throws CorruptedInputException + * @throws UnsupportedOptionsException + * @throws MemoryLimitException + * + * @throws XZIOException if the stream has been closed + * + * @throws IOException may be thrown by in + */ + public int read() throws IOException { + return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF); + } + + /** + * Decompresses into an array of bytes. + *

+ * If len is zero, no bytes are read and 0 + * is returned. Otherwise this will try to decompress len + * bytes of uncompressed data. Less than len bytes may + * be read only in the following situations: + *

    + *
  • The end of the compressed data was reached successfully.
  • + *
  • An error is detected after at least one but less than + * len bytes have already been successfully + * decompressed. The next call with non-zero len + * will immediately throw the pending exception.
  • + *
  • An exception is thrown.
  • + *
+ * + * @param buf target buffer for uncompressed data + * @param off start offset in buf + * @param len maximum number of uncompressed bytes to read + * + * @return number of bytes read, or -1 to indicate + * the end of the compressed stream + * + * @throws CorruptedInputException + * @throws UnsupportedOptionsException + * @throws MemoryLimitException + * + * @throws XZIOException if the stream has been closed + * + * @throws IOException may be thrown by in + */ + public int read(byte[] buf, int off, int len) throws IOException { + if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) + throw new IndexOutOfBoundsException(); + + if (len == 0) + return 0; + + if (in == null) + throw new XZIOException("Stream closed"); + + if (exception != null) + throw exception; + + int size = 0; + + try { + if (seekNeeded) + seek(); + + if (endReached) + return -1; + + while (len > 0) { + if (blockDecoder == null) { + seek(); + if (endReached) + break; + } + + int ret = blockDecoder.read(buf, off, len); + + if (ret > 0) { + curPos += ret; + size += ret; + off += ret; + len -= ret; + } else if (ret == -1) { + blockDecoder = null; + } + } + } catch (IOException e) { + // We know that the file isn't simply truncated because we could + // parse the Indexes in the constructor. So convert EOFException + // to CorruptedInputException. + if (e instanceof EOFException) + e = new CorruptedInputException(); + + exception = e; + if (size == 0) + throw e; + } + + return size; + } + + /** + * Returns the number of uncompressed bytes that can be read + * without blocking. The value is returned with an assumption + * that the compressed input data will be valid. If the compressed + * data is corrupt, CorruptedInputException may get + * thrown before the number of bytes claimed to be available have + * been read from this input stream. + * + * @return the number of uncompressed bytes that can be read + * without blocking + */ + public int available() throws IOException { + if (in == null) + throw new XZIOException("Stream closed"); + + if (exception != null) + throw exception; + + if (endReached || seekNeeded || blockDecoder == null) + return 0; + + return blockDecoder.available(); + } + + /** + * Closes the stream and calls in.close(). + * If the stream was already closed, this does nothing. + * + * @throws IOException if thrown by in.close() + */ + public void close() throws IOException { + if (in != null) { + try { + in.close(); + } finally { + in = null; + } + } + } + + /** + * Gets the uncompressed size of this input stream. If there are multiple + * XZ Streams, the total uncompressed size of all XZ Streams is returned. + */ + public long length() { + return uncompressedSize; + } + + /** + * Gets the current uncompressed position in this input stream. + * + * @throws XZIOException if the stream has been closed + */ + public long position() throws IOException { + if (in == null) + throw new XZIOException("Stream closed"); + + return seekNeeded ? seekPos : curPos; + } + + /** + * Seeks to the specified absolute uncompressed position in the stream. + * This only stores the new position, so this function itself is always + * very fast. The actual seek is done when read is called + * to read at least one byte. + *

+ * Seeking past the end of the stream is possible. In that case + * read will return -1 to indicate + * the end of the stream. + * + * @param pos new uncompressed read position + * + * @throws XZIOException + * if pos is negative, or + * if stream has been closed + */ + public void seek(long pos) throws IOException { + if (in == null) + throw new XZIOException("Stream closed"); + + if (pos < 0) + throw new XZIOException("Negative seek position: " + pos); + + seekPos = pos; + seekNeeded = true; + } + + /** + * Seeks to the beginning of the given XZ Block. + * + * @throws XZIOException + * if blockNumber < 0 or + * blockNumber >= getBlockCount(), + * or if stream has been closed + * + * @since 1.3 + */ + public void seekToBlock(int blockNumber) throws IOException { + if (in == null) + throw new XZIOException("Stream closed"); + + if (blockNumber < 0 || blockNumber >= blockCount) + throw new XZIOException("Invalid XZ Block number: " + blockNumber); + + // This is a bit silly implementation. Here we locate the uncompressed + // offset of the specified Block, then when doing the actual seek in + // seek(), we need to find the Block number based on seekPos. + seekPos = getBlockPos(blockNumber); + seekNeeded = true; + } + + /** + * Does the actual seeking. This is also called when read + * needs a new Block to decode. + */ + private void seek() throws IOException { + // If seek(long) wasn't called, we simply need to get the next Block + // from the same Stream. If there are no more Blocks in this Stream, + // then we behave as if seek(long) had been called. + if (!seekNeeded) { + if (curBlockInfo.hasNext()) { + curBlockInfo.setNext(); + initBlockDecoder(); + return; + } + + seekPos = curPos; + } + + seekNeeded = false; + + // Check if we are seeking to or past the end of the file. + if (seekPos >= uncompressedSize) { + curPos = seekPos; + blockDecoder = null; + endReached = true; + return; + } + + endReached = false; + + // Locate the Block that contains the uncompressed target position. + locateBlockByPos(curBlockInfo, seekPos); + + // Seek in the underlying stream and create a new Block decoder + // only if really needed. We can skip it if the current position + // is already in the correct Block and the target position hasn't + // been decompressed yet. + // + // NOTE: If curPos points to the beginning of this Block, it's + // because it was left there after decompressing an earlier Block. + // In that case, decoding of the current Block hasn't been started + // yet. (Decoding of a Block won't be started until at least one + // byte will also be read from it.) + if (!(curPos > curBlockInfo.uncompressedOffset && curPos <= seekPos)) { + // Seek to the beginning of the Block. + in.seek(curBlockInfo.compressedOffset); + + // Since it is possible that this Block is from a different + // Stream than the previous Block, initialize a new Check. + check = Check.getInstance(curBlockInfo.getCheckType()); + + // Create a new Block decoder. + initBlockDecoder(); + curPos = curBlockInfo.uncompressedOffset; + } + + // If the target wasn't at a Block boundary, decompress and throw + // away data to reach the target position. + if (seekPos > curPos) { + // NOTE: The "if" below is there just in case. In this situation, + // blockDecoder.skip will always skip the requested amount + // or throw an exception. + long skipAmount = seekPos - curPos; + if (blockDecoder.skip(skipAmount) != skipAmount) + throw new CorruptedInputException(); + + curPos = seekPos; + } + } + + /** + * Locates the Block that contains the given uncompressed position. + */ + private void locateBlockByPos(BlockInfo info, long pos) { + if (pos < 0 || pos >= uncompressedSize) + throw new IndexOutOfBoundsException( + "Invalid uncompressed position: " + pos); + + // Locate the Stream that contains the target position. + IndexDecoder index; + for (int i = 0; ; ++i) { + index = (IndexDecoder)streams.get(i); + if (index.hasUncompressedOffset(pos)) + break; + } + + // Locate the Block from the Stream that contains the target position. + index.locateBlock(info, pos); + + assert (info.compressedOffset & 3) == 0; + assert info.uncompressedSize > 0; + assert pos >= info.uncompressedOffset; + assert pos < info.uncompressedOffset + info.uncompressedSize; + } + + /** + * Locates the given Block and stores information about it + * to info. + */ + private void locateBlockByNumber(BlockInfo info, int blockNumber) { + // Validate. + if (blockNumber < 0 || blockNumber >= blockCount) + throw new IndexOutOfBoundsException( + "Invalid XZ Block number: " + blockNumber); + + // Skip the search if info already points to the correct Block. + if (info.blockNumber == blockNumber) + return; + + // Search the Stream that contains the given Block and then + // search the Block from that Stream. + for (int i = 0; ; ++i) { + IndexDecoder index = (IndexDecoder)streams.get(i); + if (index.hasRecord(blockNumber)) { + index.setBlockInfo(info, blockNumber); + return; + } + } + } + + /** + * Initializes a new BlockInputStream. This is a helper function for + * seek(). + */ + private void initBlockDecoder() throws IOException { + try { + // Set it to null first so that GC can collect it if memory + // runs tight when initializing a new BlockInputStream. + blockDecoder = null; + blockDecoder = new BlockInputStream(in, check, memoryLimit, + curBlockInfo.unpaddedSize, curBlockInfo.uncompressedSize); + } catch (MemoryLimitException e) { + // BlockInputStream doesn't know how much memory we had + // already needed so we need to recreate the exception. + assert memoryLimit >= 0; + throw new MemoryLimitException( + e.getMemoryNeeded() + indexMemoryUsage, + memoryLimit + indexMemoryUsage); + } catch (IndexIndicatorException e) { + // It cannot be Index so the file must be corrupt. + throw new CorruptedInputException(); + } + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/SimpleInputStream.java b/org.tukani.xz/src/org/tukaani/xz/SimpleInputStream.java new file mode 100644 index 00000000..afd40c77 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/SimpleInputStream.java @@ -0,0 +1,138 @@ +/* + * SimpleInputStream + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; +import java.io.IOException; +import org.tukaani.xz.simple.SimpleFilter; + +class SimpleInputStream extends InputStream { + private static final int FILTER_BUF_SIZE = 4096; + + private InputStream in; + private final SimpleFilter simpleFilter; + + private final byte[] filterBuf = new byte[FILTER_BUF_SIZE]; + private int pos = 0; + private int filtered = 0; + private int unfiltered = 0; + + private boolean endReached = false; + private IOException exception = null; + + private final byte[] tempBuf = new byte[1]; + + static int getMemoryUsage() { + return 1 + FILTER_BUF_SIZE / 1024; + } + + SimpleInputStream(InputStream in, SimpleFilter simpleFilter) { + // Check for null because otherwise null isn't detect + // in this constructor. + if (in == null) + throw new NullPointerException(); + + // The simpleFilter argument comes from this package + // so it is known to be non-null already. + assert simpleFilter != null; + + this.in = in; + this.simpleFilter = simpleFilter; + } + + public int read() throws IOException { + return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF); + } + + public int read(byte[] buf, int off, int len) throws IOException { + if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) + throw new IndexOutOfBoundsException(); + + if (len == 0) + return 0; + + if (in == null) + throw new XZIOException("Stream closed"); + + if (exception != null) + throw exception; + + try { + int size = 0; + + while (true) { + // Copy filtered data into the caller-provided buffer. + int copySize = Math.min(filtered, len); + System.arraycopy(filterBuf, pos, buf, off, copySize); + pos += copySize; + filtered -= copySize; + off += copySize; + len -= copySize; + size += copySize; + + // If end of filterBuf was reached, move the pending data to + // the beginning of the buffer so that more data can be + // copied into filterBuf on the next loop iteration. + if (pos + filtered + unfiltered == FILTER_BUF_SIZE) { + System.arraycopy(filterBuf, pos, filterBuf, 0, + filtered + unfiltered); + pos = 0; + } + + if (len == 0 || endReached) + return size > 0 ? size : -1; + + assert filtered == 0; + + // Get more data into the temporary buffer. + int inSize = FILTER_BUF_SIZE - (pos + filtered + unfiltered); + inSize = in.read(filterBuf, pos + filtered + unfiltered, + inSize); + + if (inSize == -1) { + // Mark the remaining unfiltered bytes to be ready + // to be copied out. + endReached = true; + filtered = unfiltered; + unfiltered = 0; + } else { + // Filter the data in filterBuf. + unfiltered += inSize; + filtered = simpleFilter.code(filterBuf, pos, unfiltered); + assert filtered <= unfiltered; + unfiltered -= filtered; + } + } + } catch (IOException e) { + exception = e; + throw e; + } + } + + public int available() throws IOException { + if (in == null) + throw new XZIOException("Stream closed"); + + if (exception != null) + throw exception; + + return filtered; + } + + public void close() throws IOException { + if (in != null) { + try { + in.close(); + } finally { + in = null; + } + } + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/SimpleOutputStream.java b/org.tukani.xz/src/org/tukaani/xz/SimpleOutputStream.java new file mode 100644 index 00000000..771b1fb2 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/SimpleOutputStream.java @@ -0,0 +1,151 @@ +/* + * SimpleOutputStream + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.IOException; +import org.tukaani.xz.simple.SimpleFilter; + +class SimpleOutputStream extends FinishableOutputStream { + private static final int FILTER_BUF_SIZE = 4096; + + private FinishableOutputStream out; + private final SimpleFilter simpleFilter; + + private final byte[] filterBuf = new byte[FILTER_BUF_SIZE]; + private int pos = 0; + private int unfiltered = 0; + + private IOException exception = null; + private boolean finished = false; + + private final byte[] tempBuf = new byte[1]; + + static int getMemoryUsage() { + return 1 + FILTER_BUF_SIZE / 1024; + } + + SimpleOutputStream(FinishableOutputStream out, + SimpleFilter simpleFilter) { + if (out == null) + throw new NullPointerException(); + + this.out = out; + this.simpleFilter = simpleFilter; + } + + public void write(int b) throws IOException { + tempBuf[0] = (byte)b; + write(tempBuf, 0, 1); + } + + public void write(byte[] buf, int off, int len) throws IOException { + if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) + throw new IndexOutOfBoundsException(); + + if (exception != null) + throw exception; + + if (finished) + throw new XZIOException("Stream finished or closed"); + + while (len > 0) { + // Copy more unfiltered data into filterBuf. + int copySize = Math.min(len, FILTER_BUF_SIZE - (pos + unfiltered)); + System.arraycopy(buf, off, filterBuf, pos + unfiltered, copySize); + off += copySize; + len -= copySize; + unfiltered += copySize; + + // Filter the data in filterBuf. + int filtered = simpleFilter.code(filterBuf, pos, unfiltered); + assert filtered <= unfiltered; + unfiltered -= filtered; + + // Write out the filtered data. + try { + out.write(filterBuf, pos, filtered); + } catch (IOException e) { + exception = e; + throw e; + } + + pos += filtered; + + // If end of filterBuf was reached, move the pending unfiltered + // data to the beginning of the buffer so that more data can + // be copied into filterBuf on the next loop iteration. + if (pos + unfiltered == FILTER_BUF_SIZE) { + System.arraycopy(filterBuf, pos, filterBuf, 0, unfiltered); + pos = 0; + } + } + } + + private void writePending() throws IOException { + assert !finished; + + if (exception != null) + throw exception; + + try { + out.write(filterBuf, pos, unfiltered); + } catch (IOException e) { + exception = e; + throw e; + } + + finished = true; + } + + public void flush() throws IOException { + throw new UnsupportedOptionsException("Flushing is not supported"); + } + + public void finish() throws IOException { + if (!finished) { + // If it fails, don't call out.finish(). + writePending(); + + try { + out.finish(); + } catch (IOException e) { + exception = e; + throw e; + } + } + } + + public void close() throws IOException { + if (out != null) { + if (!finished) { + // out.close() must be called even if writePending() fails. + // writePending() saves the possible exception so we can + // ignore exceptions here. + try { + writePending(); + } catch (IOException e) {} + } + + try { + out.close(); + } catch (IOException e) { + // If there is an earlier exception, the exception + // from out.close() is lost. + if (exception == null) + exception = e; + } + + out = null; + } + + if (exception != null) + throw exception; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/SingleXZInputStream.java b/org.tukani.xz/src/org/tukaani/xz/SingleXZInputStream.java new file mode 100644 index 00000000..a273fdd7 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/SingleXZInputStream.java @@ -0,0 +1,318 @@ +/* + * SingleXZInputStream + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.EOFException; +import org.tukaani.xz.common.DecoderUtil; +import org.tukaani.xz.common.StreamFlags; +import org.tukaani.xz.index.IndexHash; +import org.tukaani.xz.check.Check; + +/** + * Decompresses exactly one XZ Stream in streamed mode (no seeking). + * The decompression stops after the first XZ Stream has been decompressed, + * and the read position in the input stream is left at the first byte + * after the end of the XZ Stream. This can be useful when XZ data has + * been stored inside some other file format or protocol. + *

+ * Unless you know what you are doing, don't use this class to decompress + * standalone .xz files. For that purpose, use XZInputStream. + * + *

When uncompressed size is known beforehand

+ *

+ * If you are decompressing complete XZ streams and your application knows + * exactly how much uncompressed data there should be, it is good to try + * reading one more byte by calling read() and checking + * that it returns -1. This way the decompressor will parse the + * file footers and verify the integrity checks, giving the caller more + * confidence that the uncompressed data is valid. + * + * @see XZInputStream + */ +public class SingleXZInputStream extends InputStream { + private InputStream in; + private int memoryLimit; + private StreamFlags streamHeaderFlags; + private Check check; + private BlockInputStream blockDecoder = null; + private final IndexHash indexHash = new IndexHash(); + private boolean endReached = false; + private IOException exception = null; + + private final byte[] tempBuf = new byte[1]; + + /** + * Creates a new XZ decompressor that decompresses exactly one + * XZ Stream from in without a memory usage limit. + *

+ * This constructor reads and parses the XZ Stream Header (12 bytes) + * from in. The header of the first Block is not read + * until read is called. + * + * @param in input stream from which XZ-compressed + * data is read + * + * @throws XZFormatException + * input is not in the XZ format + * + * @throws CorruptedInputException + * XZ header CRC32 doesn't match + * + * @throws UnsupportedOptionsException + * XZ header is valid but specifies options + * not supported by this implementation + * + * @throws EOFException + * less than 12 bytes of input was available + * from in + * + * @throws IOException may be thrown by in + */ + public SingleXZInputStream(InputStream in) throws IOException { + initialize(in, -1); + } + + /** + * Creates a new XZ decompressor that decompresses exactly one + * XZ Stream from in with an optional memory usage limit. + *

+ * This is identical to SingleXZInputStream(InputStream) + * except that this takes also the memoryLimit argument. + * + * @param in input stream from which XZ-compressed + * data is read + * + * @param memoryLimit memory usage limit in kibibytes (KiB) + * or -1 to impose no + * memory usage limit + * + * @throws XZFormatException + * input is not in the XZ format + * + * @throws CorruptedInputException + * XZ header CRC32 doesn't match + * + * @throws UnsupportedOptionsException + * XZ header is valid but specifies options + * not supported by this implementation + * + * @throws EOFException + * less than 12 bytes of input was available + * from in + * + * @throws IOException may be thrown by in + */ + public SingleXZInputStream(InputStream in, int memoryLimit) + throws IOException { + initialize(in, memoryLimit); + } + + SingleXZInputStream(InputStream in, int memoryLimit, + byte[] streamHeader) throws IOException { + initialize(in, memoryLimit, streamHeader); + } + + private void initialize(InputStream in, int memoryLimit) + throws IOException { + byte[] streamHeader = new byte[DecoderUtil.STREAM_HEADER_SIZE]; + new DataInputStream(in).readFully(streamHeader); + initialize(in, memoryLimit, streamHeader); + } + + private void initialize(InputStream in, int memoryLimit, + byte[] streamHeader) throws IOException { + this.in = in; + this.memoryLimit = memoryLimit; + streamHeaderFlags = DecoderUtil.decodeStreamHeader(streamHeader); + check = Check.getInstance(streamHeaderFlags.checkType); + } + + /** + * Gets the ID of the integrity check used in this XZ Stream. + * + * @return the Check ID specified in the XZ Stream Header + */ + public int getCheckType() { + return streamHeaderFlags.checkType; + } + + /** + * Gets the name of the integrity check used in this XZ Stream. + * + * @return the name of the check specified in the XZ Stream Header + */ + public String getCheckName() { + return check.getName(); + } + + /** + * Decompresses the next byte from this input stream. + *

+ * Reading lots of data with read() from this input stream + * may be inefficient. Wrap it in {@link java.io.BufferedInputStream} + * if you need to read lots of data one byte at a time. + * + * @return the next decompressed byte, or -1 + * to indicate the end of the compressed stream + * + * @throws CorruptedInputException + * @throws UnsupportedOptionsException + * @throws MemoryLimitException + * + * @throws XZIOException if the stream has been closed + * + * @throws EOFException + * compressed input is truncated or corrupt + * + * @throws IOException may be thrown by in + */ + public int read() throws IOException { + return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF); + } + + /** + * Decompresses into an array of bytes. + *

+ * If len is zero, no bytes are read and 0 + * is returned. Otherwise this will try to decompress len + * bytes of uncompressed data. Less than len bytes may + * be read only in the following situations: + *

    + *
  • The end of the compressed data was reached successfully.
  • + *
  • An error is detected after at least one but less len + * bytes have already been successfully decompressed. + * The next call with non-zero len will immediately + * throw the pending exception.
  • + *
  • An exception is thrown.
  • + *
+ * + * @param buf target buffer for uncompressed data + * @param off start offset in buf + * @param len maximum number of uncompressed bytes to read + * + * @return number of bytes read, or -1 to indicate + * the end of the compressed stream + * + * @throws CorruptedInputException + * @throws UnsupportedOptionsException + * @throws MemoryLimitException + * + * @throws XZIOException if the stream has been closed + * + * @throws EOFException + * compressed input is truncated or corrupt + * + * @throws IOException may be thrown by in + */ + public int read(byte[] buf, int off, int len) throws IOException { + if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) + throw new IndexOutOfBoundsException(); + + if (len == 0) + return 0; + + if (in == null) + throw new XZIOException("Stream closed"); + + if (exception != null) + throw exception; + + if (endReached) + return -1; + + int size = 0; + + try { + while (len > 0) { + if (blockDecoder == null) { + try { + blockDecoder = new BlockInputStream( + in, check, memoryLimit, -1, -1); + } catch (IndexIndicatorException e) { + indexHash.validate(in); + validateStreamFooter(); + endReached = true; + return size > 0 ? size : -1; + } + } + + int ret = blockDecoder.read(buf, off, len); + + if (ret > 0) { + size += ret; + off += ret; + len -= ret; + } else if (ret == -1) { + indexHash.add(blockDecoder.getUnpaddedSize(), + blockDecoder.getUncompressedSize()); + blockDecoder = null; + } + } + } catch (IOException e) { + exception = e; + if (size == 0) + throw e; + } + + return size; + } + + private void validateStreamFooter() throws IOException { + byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE]; + new DataInputStream(in).readFully(buf); + StreamFlags streamFooterFlags = DecoderUtil.decodeStreamFooter(buf); + + if (!DecoderUtil.areStreamFlagsEqual(streamHeaderFlags, + streamFooterFlags) + || indexHash.getIndexSize() != streamFooterFlags.backwardSize) + throw new CorruptedInputException( + "XZ Stream Footer does not match Stream Header"); + } + + /** + * Returns the number of uncompressed bytes that can be read + * without blocking. The value is returned with an assumption + * that the compressed input data will be valid. If the compressed + * data is corrupt, CorruptedInputException may get + * thrown before the number of bytes claimed to be available have + * been read from this input stream. + * + * @return the number of uncompressed bytes that can be read + * without blocking + */ + public int available() throws IOException { + if (in == null) + throw new XZIOException("Stream closed"); + + if (exception != null) + throw exception; + + return blockDecoder == null ? 0 : blockDecoder.available(); + } + + /** + * Closes the stream and calls in.close(). + * If the stream was already closed, this does nothing. + * + * @throws IOException if thrown by in.close() + */ + public void close() throws IOException { + if (in != null) { + try { + in.close(); + } finally { + in = null; + } + } + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/UncompressedLZMA2OutputStream.java b/org.tukani.xz/src/org/tukaani/xz/UncompressedLZMA2OutputStream.java new file mode 100644 index 00000000..1ff96751 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/UncompressedLZMA2OutputStream.java @@ -0,0 +1,153 @@ +/* + * UncompressedLZMA2OutputStream + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.DataOutputStream; +import java.io.IOException; + +class UncompressedLZMA2OutputStream extends FinishableOutputStream { + private FinishableOutputStream out; + private final DataOutputStream outData; + + private final byte[] uncompBuf + = new byte[LZMA2OutputStream.COMPRESSED_SIZE_MAX]; + private int uncompPos = 0; + private boolean dictResetNeeded = true; + + private boolean finished = false; + private IOException exception = null; + + private final byte[] tempBuf = new byte[1]; + + static int getMemoryUsage() { + // uncompBuf + a little extra + return 70; + } + + UncompressedLZMA2OutputStream(FinishableOutputStream out) { + if (out == null) + throw new NullPointerException(); + + this.out = out; + outData = new DataOutputStream(out); + } + + public void write(int b) throws IOException { + tempBuf[0] = (byte)b; + write(tempBuf, 0, 1); + } + + public void write(byte[] buf, int off, int len) throws IOException { + if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) + throw new IndexOutOfBoundsException(); + + if (exception != null) + throw exception; + + if (finished) + throw new XZIOException("Stream finished or closed"); + + try { + while (len > 0) { + int copySize = Math.min(uncompBuf.length - uncompPos, len); + System.arraycopy(buf, off, uncompBuf, uncompPos, copySize); + len -= copySize; + uncompPos += copySize; + + if (uncompPos == uncompBuf.length) + writeChunk(); + } + } catch (IOException e) { + exception = e; + throw e; + } + } + + private void writeChunk() throws IOException { + outData.writeByte(dictResetNeeded ? 0x01 : 0x02); + outData.writeShort(uncompPos - 1); + outData.write(uncompBuf, 0, uncompPos); + uncompPos = 0; + dictResetNeeded = false; + } + + private void writeEndMarker() throws IOException { + if (exception != null) + throw exception; + + if (finished) + throw new XZIOException("Stream finished or closed"); + + try { + if (uncompPos > 0) + writeChunk(); + + out.write(0x00); + } catch (IOException e) { + exception = e; + throw e; + } + } + + public void flush() throws IOException { + if (exception != null) + throw exception; + + if (finished) + throw new XZIOException("Stream finished or closed"); + + try { + if (uncompPos > 0) + writeChunk(); + + out.flush(); + } catch (IOException e) { + exception = e; + throw e; + } + } + + public void finish() throws IOException { + if (!finished) { + writeEndMarker(); + + try { + out.finish(); + } catch (IOException e) { + exception = e; + throw e; + } + + finished = true; + } + } + + public void close() throws IOException { + if (out != null) { + if (!finished) { + try { + writeEndMarker(); + } catch (IOException e) {} + } + + try { + out.close(); + } catch (IOException e) { + if (exception == null) + exception = e; + } + + out = null; + } + + if (exception != null) + throw exception; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/UnsupportedOptionsException.java b/org.tukani.xz/src/org/tukaani/xz/UnsupportedOptionsException.java new file mode 100644 index 00000000..9aa16e8c --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/UnsupportedOptionsException.java @@ -0,0 +1,34 @@ +/* + * UnsupportedOptionsException + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +/** + * Thrown when compression options not supported by this implementation + * are detected. Some other implementation might support those options. + */ +public class UnsupportedOptionsException extends XZIOException { + private static final long serialVersionUID = 3L; + + /** + * Creates a new UnsupportedOptionsException with null + * as its error detail message. + */ + public UnsupportedOptionsException() {} + + /** + * Creates a new UnsupportedOptionsException with the given + * error detail message. + * + * @param s error detail message + */ + public UnsupportedOptionsException(String s) { + super(s); + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/X86Options.java b/org.tukani.xz/src/org/tukaani/xz/X86Options.java new file mode 100644 index 00000000..e6d241f1 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/X86Options.java @@ -0,0 +1,36 @@ +/* + * X86Options + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; +import org.tukaani.xz.simple.X86; + +/** + * BCJ filter for x86 (32-bit and 64-bit) instructions. + */ +public class X86Options extends BCJOptions { + private static final int ALIGNMENT = 1; + + public X86Options() { + super(ALIGNMENT); + } + + public FinishableOutputStream getOutputStream(FinishableOutputStream out) { + return new SimpleOutputStream(out, new X86(true, startOffset)); + } + + public InputStream getInputStream(InputStream in) { + return new SimpleInputStream(in, new X86(false, startOffset)); + } + + FilterEncoder getFilterEncoder() { + return new BCJEncoder(this, BCJCoder.X86_FILTER_ID); + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/XZ.java b/org.tukani.xz/src/org/tukaani/xz/XZ.java new file mode 100644 index 00000000..4e0857ff --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/XZ.java @@ -0,0 +1,53 @@ +/* + * XZ + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +/** + * XZ constants. + */ +public class XZ { + /** + * XZ Header Magic Bytes begin a XZ file. + * This can be useful to detect XZ compressed data. + */ + public static final byte[] HEADER_MAGIC = { + (byte)0xFD, '7', 'z', 'X', 'Z', '\0' }; + + /** + * XZ Footer Magic Bytes are the last bytes of a XZ Stream. + */ + public static final byte[] FOOTER_MAGIC = { 'Y', 'Z' }; + + /** + * Integrity check ID indicating that no integrity check is calculated. + *

+ * Omitting the integrity check is strongly discouraged except when + * the integrity of the data will be verified by other means anyway, + * and calculating the check twice would be useless. + */ + public static final int CHECK_NONE = 0; + + /** + * Integrity check ID for CRC32. + */ + public static final int CHECK_CRC32 = 1; + + /** + * Integrity check ID for CRC64. + */ + public static final int CHECK_CRC64 = 4; + + /** + * Integrity check ID for SHA-256. + */ + public static final int CHECK_SHA256 = 10; + + private XZ() {} +} diff --git a/org.tukani.xz/src/org/tukaani/xz/XZFormatException.java b/org.tukani.xz/src/org/tukaani/xz/XZFormatException.java new file mode 100644 index 00000000..6f63020b --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/XZFormatException.java @@ -0,0 +1,24 @@ +/* + * XZFormatException + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +/** + * Thrown when the input data is not in the XZ format. + */ +public class XZFormatException extends XZIOException { + private static final long serialVersionUID = 3L; + + /** + * Creates a new exception with the default error detail message. + */ + public XZFormatException() { + super("Input is not in the XZ format"); + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/XZIOException.java b/org.tukani.xz/src/org/tukaani/xz/XZIOException.java new file mode 100644 index 00000000..14675f58 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/XZIOException.java @@ -0,0 +1,27 @@ +/* + * XZIOException + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +/** + * Generic {@link java.io.IOException IOException} specific to this package. + * The other IOExceptions in this package extend + * from XZIOException. + */ +public class XZIOException extends java.io.IOException { + private static final long serialVersionUID = 3L; + + public XZIOException() { + super(); + } + + public XZIOException(String s) { + super(s); + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/XZInputStream.java b/org.tukani.xz/src/org/tukaani/xz/XZInputStream.java new file mode 100644 index 00000000..bfd71f4c --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/XZInputStream.java @@ -0,0 +1,313 @@ +/* + * XZInputStream + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.InputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.io.EOFException; +import org.tukaani.xz.common.DecoderUtil; + +/** + * Decompresses a .xz file in streamed mode (no seeking). + *

+ * Use this to decompress regular standalone .xz files. This reads from + * its input stream until the end of the input or until an error occurs. + * This supports decompressing concatenated .xz files. + * + *

Typical use cases

+ *

+ * Getting an input stream to decompress a .xz file: + *

+ * InputStream infile = new FileInputStream("foo.xz");
+ * XZInputStream inxz = new XZInputStream(infile);
+ * 
+ *

+ * It's important to keep in mind that decompressor memory usage depends + * on the settings used to compress the file. The worst-case memory usage + * of XZInputStream is currently 1.5 GiB. Still, very few files will + * require more than about 65 MiB because that's how much decompressing + * a file created with the highest preset level will need, and only a few + * people use settings other than the predefined presets. + *

+ * It is possible to specify a memory usage limit for + * XZInputStream. If decompression requires more memory than + * the specified limit, MemoryLimitException will be thrown when reading + * from the stream. For example, the following sets the memory usage limit + * to 100 MiB: + *

+ * InputStream infile = new FileInputStream("foo.xz");
+ * XZInputStream inxz = new XZInputStream(infile, 100 * 1024);
+ * 
+ * + *

When uncompressed size is known beforehand

+ *

+ * If you are decompressing complete files and your application knows + * exactly how much uncompressed data there should be, it is good to try + * reading one more byte by calling read() and checking + * that it returns -1. This way the decompressor will parse the + * file footers and verify the integrity checks, giving the caller more + * confidence that the uncompressed data is valid. (This advice seems to + * apply to + * {@link java.util.zip.GZIPInputStream java.util.zip.GZIPInputStream} too.) + * + * @see SingleXZInputStream + */ +public class XZInputStream extends InputStream { + private final int memoryLimit; + private InputStream in; + private SingleXZInputStream xzIn; + private boolean endReached = false; + private IOException exception = null; + + private final byte[] tempBuf = new byte[1]; + + /** + * Creates a new XZ decompressor without a memory usage limit. + *

+ * This constructor reads and parses the XZ Stream Header (12 bytes) + * from in. The header of the first Block is not read + * until read is called. + * + * @param in input stream from which XZ-compressed + * data is read + * + * @throws XZFormatException + * input is not in the XZ format + * + * @throws CorruptedInputException + * XZ header CRC32 doesn't match + * + * @throws UnsupportedOptionsException + * XZ header is valid but specifies options + * not supported by this implementation + * + * @throws EOFException + * less than 12 bytes of input was available + * from in + * + * @throws IOException may be thrown by in + */ + public XZInputStream(InputStream in) throws IOException { + this(in, -1); + } + + /** + * Creates a new XZ decompressor with an optional memory usage limit. + *

+ * This is identical to XZInputStream(InputStream) except + * that this takes also the memoryLimit argument. + * + * @param in input stream from which XZ-compressed + * data is read + * + * @param memoryLimit memory usage limit in kibibytes (KiB) + * or -1 to impose no + * memory usage limit + * + * @throws XZFormatException + * input is not in the XZ format + * + * @throws CorruptedInputException + * XZ header CRC32 doesn't match + * + * @throws UnsupportedOptionsException + * XZ header is valid but specifies options + * not supported by this implementation + * + * @throws EOFException + * less than 12 bytes of input was available + * from in + * + * @throws IOException may be thrown by in + */ + public XZInputStream(InputStream in, int memoryLimit) throws IOException { + this.in = in; + this.memoryLimit = memoryLimit; + this.xzIn = new SingleXZInputStream(in, memoryLimit); + } + + /** + * Decompresses the next byte from this input stream. + *

+ * Reading lots of data with read() from this input stream + * may be inefficient. Wrap it in {@link java.io.BufferedInputStream} + * if you need to read lots of data one byte at a time. + * + * @return the next decompressed byte, or -1 + * to indicate the end of the compressed stream + * + * @throws CorruptedInputException + * @throws UnsupportedOptionsException + * @throws MemoryLimitException + * + * @throws XZIOException if the stream has been closed + * + * @throws EOFException + * compressed input is truncated or corrupt + * + * @throws IOException may be thrown by in + */ + public int read() throws IOException { + return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF); + } + + /** + * Decompresses into an array of bytes. + *

+ * If len is zero, no bytes are read and 0 + * is returned. Otherwise this will try to decompress len + * bytes of uncompressed data. Less than len bytes may + * be read only in the following situations: + *

    + *
  • The end of the compressed data was reached successfully.
  • + *
  • An error is detected after at least one but less len + * bytes have already been successfully decompressed. + * The next call with non-zero len will immediately + * throw the pending exception.
  • + *
  • An exception is thrown.
  • + *
+ * + * @param buf target buffer for uncompressed data + * @param off start offset in buf + * @param len maximum number of uncompressed bytes to read + * + * @return number of bytes read, or -1 to indicate + * the end of the compressed stream + * + * @throws CorruptedInputException + * @throws UnsupportedOptionsException + * @throws MemoryLimitException + * + * @throws XZIOException if the stream has been closed + * + * @throws EOFException + * compressed input is truncated or corrupt + * + * @throws IOException may be thrown by in + */ + public int read(byte[] buf, int off, int len) throws IOException { + if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) + throw new IndexOutOfBoundsException(); + + if (len == 0) + return 0; + + if (in == null) + throw new XZIOException("Stream closed"); + + if (exception != null) + throw exception; + + if (endReached) + return -1; + + int size = 0; + + try { + while (len > 0) { + if (xzIn == null) { + prepareNextStream(); + if (endReached) + return size == 0 ? -1 : size; + } + + int ret = xzIn.read(buf, off, len); + + if (ret > 0) { + size += ret; + off += ret; + len -= ret; + } else if (ret == -1) { + xzIn = null; + } + } + } catch (IOException e) { + exception = e; + if (size == 0) + throw e; + } + + return size; + } + + private void prepareNextStream() throws IOException { + DataInputStream inData = new DataInputStream(in); + byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE]; + + // The size of Stream Padding must be a multiple of four bytes, + // all bytes zero. + do { + // First try to read one byte to see if we have reached the end + // of the file. + int ret = inData.read(buf, 0, 1); + if (ret == -1) { + endReached = true; + return; + } + + // Since we got one byte of input, there must be at least + // three more available in a valid file. + inData.readFully(buf, 1, 3); + + } while (buf[0] == 0 && buf[1] == 0 && buf[2] == 0 && buf[3] == 0); + + // Not all bytes are zero. In a valid Stream it indicates the + // beginning of the next Stream. Read the rest of the Stream Header + // and initialize the XZ decoder. + inData.readFully(buf, 4, DecoderUtil.STREAM_HEADER_SIZE - 4); + + try { + xzIn = new SingleXZInputStream(in, memoryLimit, buf); + } catch (XZFormatException e) { + // Since this isn't the first .xz Stream, it is more + // logical to tell that the data is corrupt. + throw new CorruptedInputException( + "Garbage after a valid XZ Stream"); + } + } + + /** + * Returns the number of uncompressed bytes that can be read + * without blocking. The value is returned with an assumption + * that the compressed input data will be valid. If the compressed + * data is corrupt, CorruptedInputException may get + * thrown before the number of bytes claimed to be available have + * been read from this input stream. + * + * @return the number of uncompressed bytes that can be read + * without blocking + */ + public int available() throws IOException { + if (in == null) + throw new XZIOException("Stream closed"); + + if (exception != null) + throw exception; + + return xzIn == null ? 0 : xzIn.available(); + } + + /** + * Closes the stream and calls in.close(). + * If the stream was already closed, this does nothing. + * + * @throws IOException if thrown by in.close() + */ + public void close() throws IOException { + if (in != null) { + try { + in.close(); + } finally { + in = null; + } + } + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/XZOutputStream.java b/org.tukani.xz/src/org/tukaani/xz/XZOutputStream.java new file mode 100644 index 00000000..6a37fede --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/XZOutputStream.java @@ -0,0 +1,488 @@ +/* + * XZOutputStream + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz; + +import java.io.OutputStream; +import java.io.IOException; +import org.tukaani.xz.common.EncoderUtil; +import org.tukaani.xz.common.StreamFlags; +import org.tukaani.xz.check.Check; +import org.tukaani.xz.index.IndexEncoder; + +/** + * Compresses into the .xz file format. + * + *

Examples

+ *

+ * Getting an output stream to compress with LZMA2 using the default + * settings and the default integrity check type (CRC64): + *

+ * FileOutputStream outfile = new FileOutputStream("foo.xz");
+ * XZOutputStream outxz = new XZOutputStream(outfile, new LZMA2Options());
+ * 
+ *

+ * Using the preset level 8 for LZMA2 (the default + * is 6) and SHA-256 instead of CRC64 for integrity checking: + *

+ * XZOutputStream outxz = new XZOutputStream(outfile, new LZMA2Options(8),
+ *                                           XZ.CHECK_SHA256);
+ * 
+ *

+ * Using the x86 BCJ filter together with LZMA2 to compress x86 executables + * and printing the memory usage information before creating the + * XZOutputStream: + *

+ * X86Options x86 = new X86Options();
+ * LZMA2Options lzma2 = new LZMA2Options();
+ * FilterOptions[] options = { x86, lzma2 };
+ * System.out.println("Encoder memory usage: "
+ *                    + FilterOptions.getEncoderMemoryUsage(options)
+ *                    + " KiB");
+ * System.out.println("Decoder memory usage: "
+ *                    + FilterOptions.getDecoderMemoryUsage(options)
+ *                    + " KiB");
+ * XZOutputStream outxz = new XZOutputStream(outfile, options);
+ * 
+ */ +public class XZOutputStream extends FinishableOutputStream { + private OutputStream out; + private final StreamFlags streamFlags = new StreamFlags(); + private final Check check; + private final IndexEncoder index = new IndexEncoder(); + + private BlockOutputStream blockEncoder = null; + private FilterEncoder[] filters; + + /** + * True if the current filter chain supports flushing. + * If it doesn't support flushing, flush() + * will use endBlock() as a fallback. + */ + private boolean filtersSupportFlushing; + + private IOException exception = null; + private boolean finished = false; + + private final byte[] tempBuf = new byte[1]; + + /** + * Creates a new XZ compressor using one filter and CRC64 as + * the integrity check. This constructor is equivalent to passing + * a single-member FilterOptions array to + * XZOutputStream(OutputStream, FilterOptions[]). + * + * @param out output stream to which the compressed data + * will be written + * + * @param filterOptions + * filter options to use + * + * @throws UnsupportedOptionsException + * invalid filter chain + * + * @throws IOException may be thrown from out + */ + public XZOutputStream(OutputStream out, FilterOptions filterOptions) + throws IOException { + this(out, filterOptions, XZ.CHECK_CRC64); + } + + /** + * Creates a new XZ compressor using one filter and the specified + * integrity check type. This constructor is equivalent to + * passing a single-member FilterOptions array to + * XZOutputStream(OutputStream, FilterOptions[], int). + * + * @param out output stream to which the compressed data + * will be written + * + * @param filterOptions + * filter options to use + * + * @param checkType type of the integrity check, + * for example XZ.CHECK_CRC32 + * + * @throws UnsupportedOptionsException + * invalid filter chain + * + * @throws IOException may be thrown from out + */ + public XZOutputStream(OutputStream out, FilterOptions filterOptions, + int checkType) throws IOException { + this(out, new FilterOptions[] { filterOptions }, checkType); + } + + /** + * Creates a new XZ compressor using 1-4 filters and CRC64 as + * the integrity check. This constructor is equivalent + * XZOutputStream(out, filterOptions, XZ.CHECK_CRC64). + * + * @param out output stream to which the compressed data + * will be written + * + * @param filterOptions + * array of filter options to use + * + * @throws UnsupportedOptionsException + * invalid filter chain + * + * @throws IOException may be thrown from out + */ + public XZOutputStream(OutputStream out, FilterOptions[] filterOptions) + throws IOException { + this(out, filterOptions, XZ.CHECK_CRC64); + } + + /** + * Creates a new XZ compressor using 1-4 filters and the specified + * integrity check type. + * + * @param out output stream to which the compressed data + * will be written + * + * @param filterOptions + * array of filter options to use + * + * @param checkType type of the integrity check, + * for example XZ.CHECK_CRC32 + * + * @throws UnsupportedOptionsException + * invalid filter chain + * + * @throws IOException may be thrown from out + */ + public XZOutputStream(OutputStream out, FilterOptions[] filterOptions, + int checkType) throws IOException { + this.out = out; + updateFilters(filterOptions); + + streamFlags.checkType = checkType; + check = Check.getInstance(checkType); + + encodeStreamHeader(); + } + + /** + * Updates the filter chain with a single filter. + * This is equivalent to passing a single-member FilterOptions array + * to updateFilters(FilterOptions[]). + * + * @param filterOptions + * new filter to use + * + * @throws UnsupportedOptionsException + * unsupported filter chain, or trying to change + * the filter chain in the middle of a Block + */ + public void updateFilters(FilterOptions filterOptions) + throws XZIOException { + FilterOptions[] opts = new FilterOptions[1]; + opts[0] = filterOptions; + updateFilters(opts); + } + + /** + * Updates the filter chain with 1-4 filters. + *

+ * Currently this cannot be used to update e.g. LZMA2 options in the + * middle of a XZ Block. Use endBlock() to finish the + * current XZ Block before calling this function. The new filter chain + * will then be used for the next XZ Block. + * + * @param filterOptions + * new filter chain to use + * + * @throws UnsupportedOptionsException + * unsupported filter chain, or trying to change + * the filter chain in the middle of a Block + */ + public void updateFilters(FilterOptions[] filterOptions) + throws XZIOException { + if (blockEncoder != null) + throw new UnsupportedOptionsException("Changing filter options " + + "in the middle of a XZ Block not implemented"); + + if (filterOptions.length < 1 || filterOptions.length > 4) + throw new UnsupportedOptionsException( + "XZ filter chain must be 1-4 filters"); + + filtersSupportFlushing = true; + FilterEncoder[] newFilters = new FilterEncoder[filterOptions.length]; + for (int i = 0; i < filterOptions.length; ++i) { + newFilters[i] = filterOptions[i].getFilterEncoder(); + filtersSupportFlushing &= newFilters[i].supportsFlushing(); + } + + RawCoder.validate(newFilters); + filters = newFilters; + } + + /** + * Writes one byte to be compressed. + * + * @throws XZIOException + * XZ Stream has grown too big + * + * @throws XZIOException + * finish() or close() + * was already called + * + * @throws IOException may be thrown by the underlying output stream + */ + public void write(int b) throws IOException { + tempBuf[0] = (byte)b; + write(tempBuf, 0, 1); + } + + /** + * Writes an array of bytes to be compressed. + * The compressors tend to do internal buffering and thus the written + * data won't be readable from the compressed output immediately. + * Use flush() to force everything written so far to + * be written to the underlaying output stream, but be aware that + * flushing reduces compression ratio. + * + * @param buf buffer of bytes to be written + * @param off start offset in buf + * @param len number of bytes to write + * + * @throws XZIOException + * XZ Stream has grown too big: total file size + * about 8 EiB or the Index field exceeds + * 16 GiB; you shouldn't reach these sizes + * in practice + * + * @throws XZIOException + * finish() or close() + * was already called and len > 0 + * + * @throws IOException may be thrown by the underlying output stream + */ + public void write(byte[] buf, int off, int len) throws IOException { + if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) + throw new IndexOutOfBoundsException(); + + if (exception != null) + throw exception; + + if (finished) + throw new XZIOException("Stream finished or closed"); + + try { + if (blockEncoder == null) + blockEncoder = new BlockOutputStream(out, filters, check); + + blockEncoder.write(buf, off, len); + } catch (IOException e) { + exception = e; + throw e; + } + } + + /** + * Finishes the current XZ Block (but not the whole XZ Stream). + * This doesn't flush the stream so it's possible that not all data will + * be decompressible from the output stream when this function returns. + * Call also flush() if flushing is wanted in addition to + * finishing the current XZ Block. + *

+ * If there is no unfinished Block open, this function will do nothing. + * (No empty XZ Block will be created.) + *

+ * This function can be useful, for example, to create + * random-accessible .xz files. + *

+ * Starting a new XZ Block means that the encoder state is reset. + * Doing this very often will increase the size of the compressed + * file a lot (more than plain flush() would do). + * + * @throws XZIOException + * XZ Stream has grown too big + * + * @throws XZIOException + * stream finished or closed + * + * @throws IOException may be thrown by the underlying output stream + */ + public void endBlock() throws IOException { + if (exception != null) + throw exception; + + if (finished) + throw new XZIOException("Stream finished or closed"); + + // NOTE: Once there is threading with multiple Blocks, it's possible + // that this function will be more like a barrier that returns + // before the last Block has been finished. + if (blockEncoder != null) { + try { + blockEncoder.finish(); + index.add(blockEncoder.getUnpaddedSize(), + blockEncoder.getUncompressedSize()); + blockEncoder = null; + } catch (IOException e) { + exception = e; + throw e; + } + } + } + + /** + * Flushes the encoder and calls out.flush(). + * All buffered pending data will then be decompressible from + * the output stream. + *

+ * Calling this function very often may increase the compressed + * file size a lot. The filter chain options may affect the size + * increase too. For example, with LZMA2 the HC4 match finder has + * smaller penalty with flushing than BT4. + *

+ * Some filters don't support flushing. If the filter chain has + * such a filter, flush() will call endBlock() + * before flushing. + * + * @throws XZIOException + * XZ Stream has grown too big + * + * @throws XZIOException + * stream finished or closed + * + * @throws IOException may be thrown by the underlying output stream + */ + public void flush() throws IOException { + if (exception != null) + throw exception; + + if (finished) + throw new XZIOException("Stream finished or closed"); + + try { + if (blockEncoder != null) { + if (filtersSupportFlushing) { + // This will eventually call out.flush() so + // no need to do it here again. + blockEncoder.flush(); + } else { + endBlock(); + out.flush(); + } + } else { + out.flush(); + } + } catch (IOException e) { + exception = e; + throw e; + } + } + + /** + * Finishes compression without closing the underlying stream. + * No more data can be written to this stream after finishing + * (calling write with an empty buffer is OK). + *

+ * Repeated calls to finish() do nothing unless + * an exception was thrown by this stream earlier. In that case + * the same exception is thrown again. + *

+ * After finishing, the stream may be closed normally with + * close(). If the stream will be closed anyway, there + * usually is no need to call finish() separately. + * + * @throws XZIOException + * XZ Stream has grown too big + * + * @throws IOException may be thrown by the underlying output stream + */ + public void finish() throws IOException { + if (!finished) { + // This checks for pending exceptions so we don't need to + // worry about it here. + endBlock(); + + try { + index.encode(out); + encodeStreamFooter(); + } catch (IOException e) { + exception = e; + throw e; + } + + // Set it to true only if everything goes fine. Setting it earlier + // would cause repeated calls to finish() do nothing instead of + // throwing an exception to indicate an earlier error. + finished = true; + } + } + + /** + * Finishes compression and closes the underlying stream. + * The underlying stream out is closed even if finishing + * fails. If both finishing and closing fail, the exception thrown + * by finish() is thrown and the exception from the failed + * out.close() is lost. + * + * @throws XZIOException + * XZ Stream has grown too big + * + * @throws IOException may be thrown by the underlying output stream + */ + public void close() throws IOException { + if (out != null) { + // If finish() throws an exception, it stores the exception to + // the variable "exception". So we can ignore the possible + // exception here. + try { + finish(); + } catch (IOException e) {} + + try { + out.close(); + } catch (IOException e) { + // Remember the exception but only if there is no previous + // pending exception. + if (exception == null) + exception = e; + } + + out = null; + } + + if (exception != null) + throw exception; + } + + private void encodeStreamFlags(byte[] buf, int off) { + buf[off] = 0x00; + buf[off + 1] = (byte)streamFlags.checkType; + } + + private void encodeStreamHeader() throws IOException { + out.write(XZ.HEADER_MAGIC); + + byte[] buf = new byte[2]; + encodeStreamFlags(buf, 0); + out.write(buf); + + EncoderUtil.writeCRC32(out, buf); + } + + private void encodeStreamFooter() throws IOException { + byte[] buf = new byte[6]; + long backwardSize = index.getIndexSize() / 4 - 1; + for (int i = 0; i < 4; ++i) + buf[i] = (byte)(backwardSize >>> (i * 8)); + + encodeStreamFlags(buf, 4); + + EncoderUtil.writeCRC32(out, buf); + out.write(buf); + out.write(XZ.FOOTER_MAGIC); + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/check/CRC32.java b/org.tukani.xz/src/org/tukaani/xz/check/CRC32.java new file mode 100644 index 00000000..f1828987 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/check/CRC32.java @@ -0,0 +1,33 @@ +/* + * CRC32 + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.check; + +public class CRC32 extends Check { + private final java.util.zip.CRC32 state = new java.util.zip.CRC32(); + + public CRC32() { + size = 4; + name = "CRC32"; + } + + public void update(byte[] buf, int off, int len) { + state.update(buf, off, len); + } + + public byte[] finish() { + long value = state.getValue(); + byte[] buf = { (byte)(value), + (byte)(value >>> 8), + (byte)(value >>> 16), + (byte)(value >>> 24) }; + state.reset(); + return buf; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/check/CRC64.java b/org.tukani.xz/src/org/tukaani/xz/check/CRC64.java new file mode 100644 index 00000000..02b15b74 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/check/CRC64.java @@ -0,0 +1,54 @@ +/* + * CRC64 + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.check; + +public class CRC64 extends Check { + private static final long poly = 0xC96C5795D7870F42L; + private static final long[] crcTable = new long[256]; + + private long crc = -1; + + static { + for (int b = 0; b < crcTable.length; ++b) { + long r = b; + for (int i = 0; i < 8; ++i) { + if ((r & 1) == 1) + r = (r >>> 1) ^ poly; + else + r >>>= 1; + } + + crcTable[b] = r; + } + } + + public CRC64() { + size = 8; + name = "CRC64"; + } + + public void update(byte[] buf, int off, int len) { + int end = off + len; + + while (off < end) + crc = crcTable[(buf[off++] ^ (int)crc) & 0xFF] ^ (crc >>> 8); + } + + public byte[] finish() { + long value = ~crc; + crc = -1; + + byte[] buf = new byte[8]; + for (int i = 0; i < buf.length; ++i) + buf[i] = (byte)(value >> (i * 8)); + + return buf; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/check/Check.java b/org.tukani.xz/src/org/tukaani/xz/check/Check.java new file mode 100644 index 00000000..02c011e4 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/check/Check.java @@ -0,0 +1,57 @@ +/* + * Check + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.check; + +import org.tukaani.xz.XZ; +import org.tukaani.xz.UnsupportedOptionsException; + +public abstract class Check { + int size; + String name; + + public abstract void update(byte[] buf, int off, int len); + public abstract byte[] finish(); + + public void update(byte[] buf) { + update(buf, 0, buf.length); + } + + public int getSize() { + return size; + } + + public String getName() { + return name; + } + + public static Check getInstance(int checkType) + throws UnsupportedOptionsException { + switch (checkType) { + case XZ.CHECK_NONE: + return new None(); + + case XZ.CHECK_CRC32: + return new CRC32(); + + case XZ.CHECK_CRC64: + return new CRC64(); + + case XZ.CHECK_SHA256: + try { + return new SHA256(); + } catch (java.security.NoSuchAlgorithmException e) {} + + break; + } + + throw new UnsupportedOptionsException( + "Unsupported Check ID " + checkType); + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/check/None.java b/org.tukani.xz/src/org/tukaani/xz/check/None.java new file mode 100644 index 00000000..b07c8e66 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/check/None.java @@ -0,0 +1,24 @@ +/* + * None + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.check; + +public class None extends Check { + public None() { + size = 0; + name = "None"; + } + + public void update(byte[] buf, int off, int len) {} + + public byte[] finish() { + byte[] empty = new byte[0]; + return empty; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/check/SHA256.java b/org.tukani.xz/src/org/tukaani/xz/check/SHA256.java new file mode 100644 index 00000000..66503c79 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/check/SHA256.java @@ -0,0 +1,30 @@ +/* + * SHA256 + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.check; + +public class SHA256 extends Check { + private final java.security.MessageDigest sha256; + + public SHA256() throws java.security.NoSuchAlgorithmException { + size = 32; + name = "SHA-256"; + sha256 = java.security.MessageDigest.getInstance("SHA-256"); + } + + public void update(byte[] buf, int off, int len) { + sha256.update(buf, off, len); + } + + public byte[] finish() { + byte[] buf = sha256.digest(); + sha256.reset(); + return buf; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/common/DecoderUtil.java b/org.tukani.xz/src/org/tukaani/xz/common/DecoderUtil.java new file mode 100644 index 00000000..77ba4413 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/common/DecoderUtil.java @@ -0,0 +1,121 @@ +/* + * DecoderUtil + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.common; + +import java.io.InputStream; +import java.io.IOException; +import java.io.EOFException; +import java.util.zip.CRC32; +import org.tukaani.xz.XZ; +import org.tukaani.xz.XZFormatException; +import org.tukaani.xz.CorruptedInputException; +import org.tukaani.xz.UnsupportedOptionsException; + +public class DecoderUtil extends Util { + public static boolean isCRC32Valid(byte[] buf, int off, int len, + int ref_off) { + CRC32 crc32 = new CRC32(); + crc32.update(buf, off, len); + long value = crc32.getValue(); + + for (int i = 0; i < 4; ++i) + if ((byte)(value >>> (i * 8)) != buf[ref_off + i]) + return false; + + return true; + } + + public static StreamFlags decodeStreamHeader(byte[] buf) + throws IOException { + for (int i = 0; i < XZ.HEADER_MAGIC.length; ++i) + if (buf[i] != XZ.HEADER_MAGIC[i]) + throw new XZFormatException(); + + if (!isCRC32Valid(buf, XZ.HEADER_MAGIC.length, 2, + XZ.HEADER_MAGIC.length + 2)) + throw new CorruptedInputException("XZ Stream Header is corrupt"); + + try { + return decodeStreamFlags(buf, XZ.HEADER_MAGIC.length); + } catch (UnsupportedOptionsException e) { + throw new UnsupportedOptionsException( + "Unsupported options in XZ Stream Header"); + } + } + + public static StreamFlags decodeStreamFooter(byte[] buf) + throws IOException { + if (buf[10] != XZ.FOOTER_MAGIC[0] || buf[11] != XZ.FOOTER_MAGIC[1]) { + // NOTE: The exception could be XZFormatException too. + // It depends on the situation which one is better. + throw new CorruptedInputException("XZ Stream Footer is corrupt"); + } + + if (!isCRC32Valid(buf, 4, 6, 0)) + throw new CorruptedInputException("XZ Stream Footer is corrupt"); + + StreamFlags streamFlags; + try { + streamFlags = decodeStreamFlags(buf, 8); + } catch (UnsupportedOptionsException e) { + throw new UnsupportedOptionsException( + "Unsupported options in XZ Stream Footer"); + } + + streamFlags.backwardSize = 0; + for (int i = 0; i < 4; ++i) + streamFlags.backwardSize |= (buf[i + 4] & 0xFF) << (i * 8); + + streamFlags.backwardSize = (streamFlags.backwardSize + 1) * 4; + + return streamFlags; + } + + private static StreamFlags decodeStreamFlags(byte[] buf, int off) + throws UnsupportedOptionsException { + if (buf[off] != 0x00 || (buf[off + 1] & 0xFF) >= 0x10) + throw new UnsupportedOptionsException(); + + StreamFlags streamFlags = new StreamFlags(); + streamFlags.checkType = buf[off + 1]; + + return streamFlags; + } + + public static boolean areStreamFlagsEqual(StreamFlags a, StreamFlags b) { + // backwardSize is intentionally not compared. + return a.checkType == b.checkType; + } + + public static long decodeVLI(InputStream in) throws IOException { + int b = in.read(); + if (b == -1) + throw new EOFException(); + + long num = b & 0x7F; + int i = 0; + + while ((b & 0x80) != 0x00) { + if (++i >= VLI_SIZE_MAX) + throw new CorruptedInputException(); + + b = in.read(); + if (b == -1) + throw new EOFException(); + + if (b == 0x00) + throw new CorruptedInputException(); + + num |= (long)(b & 0x7F) << (i * 7); + } + + return num; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/common/EncoderUtil.java b/org.tukani.xz/src/org/tukaani/xz/common/EncoderUtil.java new file mode 100644 index 00000000..57f688b5 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/common/EncoderUtil.java @@ -0,0 +1,36 @@ +/* + * EncoderUtil + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.common; + +import java.io.OutputStream; +import java.io.IOException; +import java.util.zip.CRC32; + +public class EncoderUtil extends Util { + public static void writeCRC32(OutputStream out, byte[] buf) + throws IOException { + CRC32 crc32 = new CRC32(); + crc32.update(buf); + long value = crc32.getValue(); + + for (int i = 0; i < 4; ++i) + out.write((byte)(value >>> (i * 8))); + } + + public static void encodeVLI(OutputStream out, long num) + throws IOException { + while (num >= 0x80) { + out.write((byte)(num | 0x80)); + num >>>= 7; + } + + out.write((byte)num); + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/common/StreamFlags.java b/org.tukani.xz/src/org/tukaani/xz/common/StreamFlags.java new file mode 100644 index 00000000..b306987d --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/common/StreamFlags.java @@ -0,0 +1,15 @@ +/* + * StreamFlags + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.common; + +public class StreamFlags { + public int checkType = -1; + public long backwardSize = -1; +} diff --git a/org.tukani.xz/src/org/tukaani/xz/common/Util.java b/org.tukani.xz/src/org/tukaani/xz/common/Util.java new file mode 100644 index 00000000..c4324ce0 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/common/Util.java @@ -0,0 +1,28 @@ +/* + * Util + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.common; + +public class Util { + public static final int STREAM_HEADER_SIZE = 12; + public static final long BACKWARD_SIZE_MAX = 1L << 34; + public static final int BLOCK_HEADER_SIZE_MAX = 1024; + public static final long VLI_MAX = Long.MAX_VALUE; + public static final int VLI_SIZE_MAX = 9; + + public static int getVLISize(long num) { + int size = 0; + do { + ++size; + num >>= 7; + } while (num != 0); + + return size; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/delta/DeltaCoder.java b/org.tukani.xz/src/org/tukaani/xz/delta/DeltaCoder.java new file mode 100644 index 00000000..d94eb660 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/delta/DeltaCoder.java @@ -0,0 +1,27 @@ +/* + * DeltaCoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.delta; + +abstract class DeltaCoder { + static final int DISTANCE_MIN = 1; + static final int DISTANCE_MAX = 256; + static final int DISTANCE_MASK = DISTANCE_MAX - 1; + + final int distance; + final byte[] history = new byte[DISTANCE_MAX]; + int pos = 0; + + DeltaCoder(int distance) { + if (distance < DISTANCE_MIN || distance > DISTANCE_MAX) + throw new IllegalArgumentException(); + + this.distance = distance; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/delta/DeltaDecoder.java b/org.tukani.xz/src/org/tukaani/xz/delta/DeltaDecoder.java new file mode 100644 index 00000000..154cbf34 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/delta/DeltaDecoder.java @@ -0,0 +1,24 @@ +/* + * DeltaDecoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.delta; + +public class DeltaDecoder extends DeltaCoder { + public DeltaDecoder(int distance) { + super(distance); + } + + public void decode(byte[] buf, int off, int len) { + int end = off + len; + for (int i = off; i < end; ++i) { + buf[i] += history[(distance + pos) & DISTANCE_MASK]; + history[pos-- & DISTANCE_MASK] = buf[i]; + } + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/delta/DeltaEncoder.java b/org.tukani.xz/src/org/tukaani/xz/delta/DeltaEncoder.java new file mode 100644 index 00000000..17accce9 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/delta/DeltaEncoder.java @@ -0,0 +1,24 @@ +/* + * DeltaEncoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.delta; + +public class DeltaEncoder extends DeltaCoder { + public DeltaEncoder(int distance) { + super(distance); + } + + public void encode(byte[] in, int in_off, int len, byte[] out) { + for (int i = 0; i < len; ++i) { + byte tmp = history[(distance + pos) & DISTANCE_MASK]; + history[pos-- & DISTANCE_MASK] = in[in_off + i]; + out[i] = (byte)(in[in_off + i] - tmp); + } + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/index/BlockInfo.java b/org.tukani.xz/src/org/tukaani/xz/index/BlockInfo.java new file mode 100644 index 00000000..babae7f7 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/index/BlockInfo.java @@ -0,0 +1,38 @@ +/* + * BlockInfo + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.index; + +import org.tukaani.xz.common.StreamFlags; + +public class BlockInfo { + public int blockNumber = -1; + public long compressedOffset = -1; + public long uncompressedOffset = -1; + public long unpaddedSize = -1; + public long uncompressedSize = -1; + + IndexDecoder index; + + public BlockInfo(IndexDecoder indexOfFirstStream) { + index = indexOfFirstStream; + } + + public int getCheckType() { + return index.getStreamFlags().checkType; + } + + public boolean hasNext() { + return index.hasRecord(blockNumber + 1); + } + + public void setNext() { + index.setBlockInfo(this, blockNumber + 1); + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/index/IndexBase.java b/org.tukani.xz/src/org/tukaani/xz/index/IndexBase.java new file mode 100644 index 00000000..e5561052 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/index/IndexBase.java @@ -0,0 +1,56 @@ +/* + * IndexBase + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.index; + +import org.tukaani.xz.common.Util; +import org.tukaani.xz.XZIOException; + +abstract class IndexBase { + private final XZIOException invalidIndexException; + long blocksSum = 0; + long uncompressedSum = 0; + long indexListSize = 0; + long recordCount = 0; + + IndexBase(XZIOException invalidIndexException) { + this.invalidIndexException = invalidIndexException; + } + + private long getUnpaddedIndexSize() { + // Index Indicator + Number of Records + List of Records + CRC32 + return 1 + Util.getVLISize(recordCount) + indexListSize + 4; + } + + public long getIndexSize() { + return (getUnpaddedIndexSize() + 3) & ~3; + } + + public long getStreamSize() { + return Util.STREAM_HEADER_SIZE + blocksSum + getIndexSize() + + Util.STREAM_HEADER_SIZE; + } + + int getIndexPaddingSize() { + return (int)((4 - getUnpaddedIndexSize()) & 3); + } + + void add(long unpaddedSize, long uncompressedSize) throws XZIOException { + blocksSum += (unpaddedSize + 3) & ~3; + uncompressedSum += uncompressedSize; + indexListSize += Util.getVLISize(unpaddedSize) + + Util.getVLISize(uncompressedSize); + ++recordCount; + + if (blocksSum < 0 || uncompressedSum < 0 + || getIndexSize() > Util.BACKWARD_SIZE_MAX + || getStreamSize() < 0) + throw invalidIndexException; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/index/IndexDecoder.java b/org.tukani.xz/src/org/tukaani/xz/index/IndexDecoder.java new file mode 100644 index 00000000..a3ae9863 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/index/IndexDecoder.java @@ -0,0 +1,223 @@ +/* + * IndexDecoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.index; + +import java.io.IOException; +import java.io.EOFException; +import java.util.zip.CheckedInputStream; +import org.tukaani.xz.common.DecoderUtil; +import org.tukaani.xz.common.StreamFlags; +import org.tukaani.xz.SeekableInputStream; +import org.tukaani.xz.CorruptedInputException; +import org.tukaani.xz.MemoryLimitException; +import org.tukaani.xz.UnsupportedOptionsException; + +public class IndexDecoder extends IndexBase { + private final StreamFlags streamFlags; + private final long streamPadding; + private final int memoryUsage; + + // Unpadded Size and Uncompressed Size fields + private final long[] unpadded; + private final long[] uncompressed; + + // Uncompressed size of the largest Block. It is used by + // SeekableXZInputStream to find out the largest Block of the .xz file. + private long largestBlockSize = 0; + + // Offsets relative to the beginning of the .xz file. These are all zero + // for the first Stream in the file. + private int recordOffset = 0; + private long compressedOffset = 0; + private long uncompressedOffset = 0; + + public IndexDecoder(SeekableInputStream in, StreamFlags streamFooterFlags, + long streamPadding, int memoryLimit) + throws IOException { + super(new CorruptedInputException("XZ Index is corrupt")); + this.streamFlags = streamFooterFlags; + this.streamPadding = streamPadding; + + // If endPos is exceeded before the CRC32 field has been decoded, + // the Index is corrupt. + long endPos = in.position() + streamFooterFlags.backwardSize - 4; + + java.util.zip.CRC32 crc32 = new java.util.zip.CRC32(); + CheckedInputStream inChecked = new CheckedInputStream(in, crc32); + + // Index Indicator + if (inChecked.read() != 0x00) + throw new CorruptedInputException("XZ Index is corrupt"); + + try { + // Number of Records + long count = DecoderUtil.decodeVLI(inChecked); + + // Catch Record counts that obviously too high to be valid. + // This test isn't exact because it ignores Index Indicator, + // Number of Records, and CRC32 fields, but this is good enough + // to catch the most obvious problems. + if (count >= streamFooterFlags.backwardSize / 2) + throw new CorruptedInputException("XZ Index is corrupt"); + + // If the Record count doesn't fit into an int, we cannot + // allocate the arrays to hold the Records. + if (count > Integer.MAX_VALUE) + throw new UnsupportedOptionsException("XZ Index has over " + + Integer.MAX_VALUE + " Records"); + + // Calculate approximate memory requirements and check the + // memory usage limit. + memoryUsage = 1 + (int)((16L * count + 1023) / 1024); + if (memoryLimit >= 0 && memoryUsage > memoryLimit) + throw new MemoryLimitException(memoryUsage, memoryLimit); + + // Allocate the arrays for the Records. + unpadded = new long[(int)count]; + uncompressed = new long[(int)count]; + int record = 0; + + // Decode the Records. + for (int i = (int)count; i > 0; --i) { + // Get the next Record. + long unpaddedSize = DecoderUtil.decodeVLI(inChecked); + long uncompressedSize = DecoderUtil.decodeVLI(inChecked); + + // Check that the input position stays sane. Since this is + // checked only once per loop iteration instead of for + // every input byte read, it's still possible that + // EOFException gets thrown with corrupt input. + if (in.position() > endPos) + throw new CorruptedInputException("XZ Index is corrupt"); + + // Add the new Record. + unpadded[record] = blocksSum + unpaddedSize; + uncompressed[record] = uncompressedSum + uncompressedSize; + ++record; + super.add(unpaddedSize, uncompressedSize); + assert record == recordCount; + + // Remember the uncompressed size of the largest Block. + if (largestBlockSize < uncompressedSize) + largestBlockSize = uncompressedSize; + } + } catch (EOFException e) { + // EOFException is caught just in case a corrupt input causes + // DecoderUtil.decodeVLI to read too much at once. + throw new CorruptedInputException("XZ Index is corrupt"); + } + + // Validate that the size of the Index field matches + // Backward Size. + int indexPaddingSize = getIndexPaddingSize(); + if (in.position() + indexPaddingSize != endPos) + throw new CorruptedInputException("XZ Index is corrupt"); + + // Index Padding + while (indexPaddingSize-- > 0) + if (inChecked.read() != 0x00) + throw new CorruptedInputException("XZ Index is corrupt"); + + // CRC32 + long value = crc32.getValue(); + for (int i = 0; i < 4; ++i) + if (((value >>> (i * 8)) & 0xFF) != in.read()) + throw new CorruptedInputException("XZ Index is corrupt"); + } + + public void setOffsets(IndexDecoder prev) { + // NOTE: SeekableXZInputStream checks that the total number of Blocks + // in concatenated Streams fits into an int. + recordOffset = prev.recordOffset + (int)prev.recordCount; + compressedOffset = prev.compressedOffset + + prev.getStreamSize() + prev.streamPadding; + assert (compressedOffset & 3) == 0; + uncompressedOffset = prev.uncompressedOffset + prev.uncompressedSum; + } + + public int getMemoryUsage() { + return memoryUsage; + } + + public StreamFlags getStreamFlags() { + return streamFlags; + } + + public int getRecordCount() { + // It was already checked in the constructor that it fits into an int. + // Otherwise we couldn't have allocated the arrays. + return (int)recordCount; + } + + public long getUncompressedSize() { + return uncompressedSum; + } + + public long getLargestBlockSize() { + return largestBlockSize; + } + + public boolean hasUncompressedOffset(long pos) { + return pos >= uncompressedOffset + && pos < uncompressedOffset + uncompressedSum; + } + + public boolean hasRecord(int blockNumber) { + return blockNumber >= recordOffset + && blockNumber < recordOffset + recordCount; + } + + public void locateBlock(BlockInfo info, long target) { + assert target >= uncompressedOffset; + target -= uncompressedOffset; + assert target < uncompressedSum; + + int left = 0; + int right = unpadded.length - 1; + + while (left < right) { + int i = left + (right - left) / 2; + + if (uncompressed[i] <= target) + left = i + 1; + else + right = i; + } + + setBlockInfo(info, recordOffset + left); + } + + public void setBlockInfo(BlockInfo info, int blockNumber) { + // The caller has checked that the given Block number is inside + // this Index. + assert blockNumber >= recordOffset; + assert blockNumber - recordOffset < recordCount; + + info.index = this; + info.blockNumber = blockNumber; + + int pos = blockNumber - recordOffset; + + if (pos == 0) { + info.compressedOffset = 0; + info.uncompressedOffset = 0; + } else { + info.compressedOffset = (unpadded[pos - 1] + 3) & ~3; + info.uncompressedOffset = uncompressed[pos - 1]; + } + + info.unpaddedSize = unpadded[pos] - info.compressedOffset; + info.uncompressedSize = uncompressed[pos] - info.uncompressedOffset; + + info.compressedOffset += compressedOffset + + DecoderUtil.STREAM_HEADER_SIZE; + info.uncompressedOffset += uncompressedOffset; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/index/IndexEncoder.java b/org.tukani.xz/src/org/tukaani/xz/index/IndexEncoder.java new file mode 100644 index 00000000..9db40d11 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/index/IndexEncoder.java @@ -0,0 +1,59 @@ +/* + * IndexEncoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.index; + +import java.io.OutputStream; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.zip.CheckedOutputStream; +import org.tukaani.xz.common.EncoderUtil; +import org.tukaani.xz.XZIOException; + +public class IndexEncoder extends IndexBase { + private final ArrayList records = new ArrayList(); + + public IndexEncoder() { + super(new XZIOException("XZ Stream or its Index has grown too big")); + } + + public void add(long unpaddedSize, long uncompressedSize) + throws XZIOException { + super.add(unpaddedSize, uncompressedSize); + records.add(new IndexRecord(unpaddedSize, uncompressedSize)); + } + + public void encode(OutputStream out) throws IOException { + java.util.zip.CRC32 crc32 = new java.util.zip.CRC32(); + CheckedOutputStream outChecked = new CheckedOutputStream(out, crc32); + + // Index Indicator + outChecked.write(0x00); + + // Number of Records + EncoderUtil.encodeVLI(outChecked, recordCount); + + // List of Records + for (Iterator i = records.iterator(); i.hasNext(); ) { + IndexRecord record = (IndexRecord)i.next(); + EncoderUtil.encodeVLI(outChecked, record.unpadded); + EncoderUtil.encodeVLI(outChecked, record.uncompressed); + } + + // Index Padding + for (int i = getIndexPaddingSize(); i > 0; --i) + outChecked.write(0x00); + + // CRC32 + long value = crc32.getValue(); + for (int i = 0; i < 4; ++i) + out.write((byte)(value >>> (i * 8))); + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/index/IndexHash.java b/org.tukani.xz/src/org/tukaani/xz/index/IndexHash.java new file mode 100644 index 00000000..ab168c69 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/index/IndexHash.java @@ -0,0 +1,94 @@ +/* + * IndexHash + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.index; + +import java.io.InputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.util.Arrays; +import java.util.zip.CheckedInputStream; +import org.tukaani.xz.common.DecoderUtil; +import org.tukaani.xz.XZIOException; +import org.tukaani.xz.CorruptedInputException; + +public class IndexHash extends IndexBase { + private org.tukaani.xz.check.Check hash; + + public IndexHash() { + super(new CorruptedInputException()); + + try { + hash = new org.tukaani.xz.check.SHA256(); + } catch (java.security.NoSuchAlgorithmException e) { + hash = new org.tukaani.xz.check.CRC32(); + } + } + + public void add(long unpaddedSize, long uncompressedSize) + throws XZIOException { + super.add(unpaddedSize, uncompressedSize); + + ByteBuffer buf = ByteBuffer.allocate(2 * 8); + buf.putLong(unpaddedSize); + buf.putLong(uncompressedSize); + hash.update(buf.array()); + } + + public void validate(InputStream in) throws IOException { + // Index Indicator (0x00) has already been read by BlockInputStream + // so add 0x00 to the CRC32 here. + java.util.zip.CRC32 crc32 = new java.util.zip.CRC32(); + crc32.update('\0'); + CheckedInputStream inChecked = new CheckedInputStream(in, crc32); + + // Get and validate the Number of Records field. + long storedRecordCount = DecoderUtil.decodeVLI(inChecked); + if (storedRecordCount != recordCount) + throw new CorruptedInputException("XZ Index is corrupt"); + + // Decode and hash the Index field and compare it to + // the hash value calculated from the decoded Blocks. + IndexHash stored = new IndexHash(); + for (long i = 0; i < recordCount; ++i) { + long unpaddedSize = DecoderUtil.decodeVLI(inChecked); + long uncompressedSize = DecoderUtil.decodeVLI(inChecked); + + try { + stored.add(unpaddedSize, uncompressedSize); + } catch (XZIOException e) { + throw new CorruptedInputException("XZ Index is corrupt"); + } + + if (stored.blocksSum > blocksSum + || stored.uncompressedSum > uncompressedSum + || stored.indexListSize > indexListSize) + throw new CorruptedInputException("XZ Index is corrupt"); + } + + if (stored.blocksSum != blocksSum + || stored.uncompressedSum != uncompressedSum + || stored.indexListSize != indexListSize + || !Arrays.equals(stored.hash.finish(), hash.finish())) + throw new CorruptedInputException("XZ Index is corrupt"); + + // Index Padding + DataInputStream inData = new DataInputStream(inChecked); + for (int i = getIndexPaddingSize(); i > 0; --i) + if (inData.readUnsignedByte() != 0x00) + throw new CorruptedInputException("XZ Index is corrupt"); + + // CRC32 + long value = crc32.getValue(); + for (int i = 0; i < 4; ++i) + if (((value >>> (i * 8)) & 0xFF) != inData.readUnsignedByte()) + throw new CorruptedInputException("XZ Index is corrupt"); + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/index/IndexRecord.java b/org.tukani.xz/src/org/tukaani/xz/index/IndexRecord.java new file mode 100644 index 00000000..5f6ba0fc --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/index/IndexRecord.java @@ -0,0 +1,20 @@ +/* + * IndexRecord + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.index; + +class IndexRecord { + final long unpadded; + final long uncompressed; + + IndexRecord(long unpadded, long uncompressed) { + this.unpadded = unpadded; + this.uncompressed = uncompressed; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/lz/BT4.java b/org.tukani.xz/src/org/tukaani/xz/lz/BT4.java new file mode 100644 index 00000000..a73b666f --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/lz/BT4.java @@ -0,0 +1,255 @@ +/* + * Binary Tree match finder with 2-, 3-, and 4-byte hashing + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.lz; + +final class BT4 extends LZEncoder { + private final Hash234 hash; + private final int[] tree; + private final Matches matches; + private final int depthLimit; + + private final int cyclicSize; + private int cyclicPos = -1; + private int lzPos; + + static int getMemoryUsage(int dictSize) { + return Hash234.getMemoryUsage(dictSize) + dictSize / (1024 / 8) + 10; + } + + BT4(int dictSize, int beforeSizeMin, int readAheadMax, + int niceLen, int matchLenMax, int depthLimit) { + super(dictSize, beforeSizeMin, readAheadMax, niceLen, matchLenMax); + + cyclicSize = dictSize + 1; + lzPos = cyclicSize; + + hash = new Hash234(dictSize); + tree = new int[cyclicSize * 2]; + + // Substracting 1 because the shortest match that this match + // finder can find is 2 bytes, so there's no need to reserve + // space for one-byte matches. + matches = new Matches(niceLen - 1); + + this.depthLimit = depthLimit > 0 ? depthLimit : 16 + niceLen / 2; + } + + private int movePos() { + int avail = movePos(niceLen, 4); + + if (avail != 0) { + if (++lzPos == Integer.MAX_VALUE) { + int normalizationOffset = Integer.MAX_VALUE - cyclicSize; + hash.normalize(normalizationOffset); + normalize(tree, normalizationOffset); + lzPos -= normalizationOffset; + } + + if (++cyclicPos == cyclicSize) + cyclicPos = 0; + } + + return avail; + } + + public Matches getMatches() { + matches.count = 0; + + int matchLenLimit = matchLenMax; + int niceLenLimit = niceLen; + int avail = movePos(); + + if (avail < matchLenLimit) { + if (avail == 0) + return matches; + + matchLenLimit = avail; + if (niceLenLimit > avail) + niceLenLimit = avail; + } + + hash.calcHashes(buf, readPos); + int delta2 = lzPos - hash.getHash2Pos(); + int delta3 = lzPos - hash.getHash3Pos(); + int currentMatch = hash.getHash4Pos(); + hash.updateTables(lzPos); + + int lenBest = 0; + + // See if the hash from the first two bytes found a match. + // The hashing algorithm guarantees that if the first byte + // matches, also the second byte does, so there's no need to + // test the second byte. + if (delta2 < cyclicSize && buf[readPos - delta2] == buf[readPos]) { + lenBest = 2; + matches.len[0] = 2; + matches.dist[0] = delta2 - 1; + matches.count = 1; + } + + // See if the hash from the first three bytes found a match that + // is different from the match possibly found by the two-byte hash. + // Also here the hashing algorithm guarantees that if the first byte + // matches, also the next two bytes do. + if (delta2 != delta3 && delta3 < cyclicSize + && buf[readPos - delta3] == buf[readPos]) { + lenBest = 3; + matches.dist[matches.count++] = delta3 - 1; + delta2 = delta3; + } + + // If a match was found, see how long it is. + if (matches.count > 0) { + while (lenBest < matchLenLimit && buf[readPos + lenBest - delta2] + == buf[readPos + lenBest]) + ++lenBest; + + matches.len[matches.count - 1] = lenBest; + + // Return if it is long enough (niceLen or reached the end of + // the dictionary). + if (lenBest >= niceLenLimit) { + skip(niceLenLimit, currentMatch); + return matches; + } + } + + // Long enough match wasn't found so easily. Look for better matches + // from the binary tree. + if (lenBest < 3) + lenBest = 3; + + int depth = depthLimit; + + int ptr0 = (cyclicPos << 1) + 1; + int ptr1 = cyclicPos << 1; + int len0 = 0; + int len1 = 0; + + while (true) { + int delta = lzPos - currentMatch; + + // Return if the search depth limit has been reached or + // if the distance of the potential match exceeds the + // dictionary size. + if (depth-- == 0 || delta >= cyclicSize) { + tree[ptr0] = 0; + tree[ptr1] = 0; + return matches; + } + + int pair = (cyclicPos - delta + + (delta > cyclicPos ? cyclicSize : 0)) << 1; + int len = Math.min(len0, len1); + + if (buf[readPos + len - delta] == buf[readPos + len]) { + while (++len < matchLenLimit) + if (buf[readPos + len - delta] != buf[readPos + len]) + break; + + if (len > lenBest) { + lenBest = len; + matches.len[matches.count] = len; + matches.dist[matches.count] = delta - 1; + ++matches.count; + + if (len >= niceLenLimit) { + tree[ptr1] = tree[pair]; + tree[ptr0] = tree[pair + 1]; + return matches; + } + } + } + + if ((buf[readPos + len - delta] & 0xFF) + < (buf[readPos + len] & 0xFF)) { + tree[ptr1] = currentMatch; + ptr1 = pair + 1; + currentMatch = tree[ptr1]; + len1 = len; + } else { + tree[ptr0] = currentMatch; + ptr0 = pair; + currentMatch = tree[ptr0]; + len0 = len; + } + } + } + + private void skip(int niceLenLimit, int currentMatch) { + int depth = depthLimit; + + int ptr0 = (cyclicPos << 1) + 1; + int ptr1 = cyclicPos << 1; + int len0 = 0; + int len1 = 0; + + while (true) { + int delta = lzPos - currentMatch; + + if (depth-- == 0 || delta >= cyclicSize) { + tree[ptr0] = 0; + tree[ptr1] = 0; + return; + } + + int pair = (cyclicPos - delta + + (delta > cyclicPos ? cyclicSize : 0)) << 1; + int len = Math.min(len0, len1); + + if (buf[readPos + len - delta] == buf[readPos + len]) { + // No need to look for longer matches than niceLenLimit + // because we only are updating the tree, not returning + // matches found to the caller. + do { + if (++len == niceLenLimit) { + tree[ptr1] = tree[pair]; + tree[ptr0] = tree[pair + 1]; + return; + } + } while (buf[readPos + len - delta] == buf[readPos + len]); + } + + if ((buf[readPos + len - delta] & 0xFF) + < (buf[readPos + len] & 0xFF)) { + tree[ptr1] = currentMatch; + ptr1 = pair + 1; + currentMatch = tree[ptr1]; + len1 = len; + } else { + tree[ptr0] = currentMatch; + ptr0 = pair; + currentMatch = tree[ptr0]; + len0 = len; + } + } + } + + public void skip(int len) { + while (len-- > 0) { + int niceLenLimit = niceLen; + int avail = movePos(); + + if (avail < niceLenLimit) { + if (avail == 0) + continue; + + niceLenLimit = avail; + } + + hash.calcHashes(buf, readPos); + int currentMatch = hash.getHash4Pos(); + hash.updateTables(lzPos); + + skip(niceLenLimit, currentMatch); + } + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/lz/CRC32Hash.java b/org.tukani.xz/src/org/tukaani/xz/lz/CRC32Hash.java new file mode 100644 index 00000000..2adfdbfd --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/lz/CRC32Hash.java @@ -0,0 +1,35 @@ +/* + * CRC32Hash + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.lz; + +/** + * Provides a CRC32 table using the polynomial from IEEE 802.3. + */ +class CRC32Hash { + private static final int CRC32_POLY = 0xEDB88320; + + static final int[] crcTable = new int[256]; + + static { + for (int i = 0; i < 256; ++i) { + int r = i; + + for (int j = 0; j < 8; ++j) { + if ((r & 1) != 0) + r = (r >>> 1) ^ CRC32_POLY; + else + r >>>= 1; + } + + crcTable[i] = r; + } + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/lz/HC4.java b/org.tukani.xz/src/org/tukaani/xz/lz/HC4.java new file mode 100644 index 00000000..0f49fd4e --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/lz/HC4.java @@ -0,0 +1,200 @@ +/* + * Hash Chain match finder with 2-, 3-, and 4-byte hashing + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.lz; + +final class HC4 extends LZEncoder { + private final Hash234 hash; + private final int[] chain; + private final Matches matches; + private final int depthLimit; + + private final int cyclicSize; + private int cyclicPos = -1; + private int lzPos; + + /** + * Gets approximate memory usage of the match finder as kibibytes. + */ + static int getMemoryUsage(int dictSize) { + return Hash234.getMemoryUsage(dictSize) + dictSize / (1024 / 4) + 10; + } + + /** + * Creates a new LZEncoder with the HC4 match finder. + * See LZEncoder.getInstance for parameter descriptions. + */ + HC4(int dictSize, int beforeSizeMin, int readAheadMax, + int niceLen, int matchLenMax, int depthLimit) { + super(dictSize, beforeSizeMin, readAheadMax, niceLen, matchLenMax); + + hash = new Hash234(dictSize); + + // +1 because we need dictSize bytes of history + the current byte. + cyclicSize = dictSize + 1; + chain = new int[cyclicSize]; + lzPos = cyclicSize; + + // Substracting 1 because the shortest match that this match + // finder can find is 2 bytes, so there's no need to reserve + // space for one-byte matches. + matches = new Matches(niceLen - 1); + + // Use a default depth limit if no other value was specified. + // The default is just something based on experimentation; + // it's nothing magic. + this.depthLimit = (depthLimit > 0) ? depthLimit : 4 + niceLen / 4; + } + + /** + * Moves to the next byte, checks that there is enough available space, + * and possibly normalizes the hash tables and the hash chain. + * + * @return number of bytes available, including the current byte + */ + private int movePos() { + int avail = movePos(4, 4); + + if (avail != 0) { + if (++lzPos == Integer.MAX_VALUE) { + int normalizationOffset = Integer.MAX_VALUE - cyclicSize; + hash.normalize(normalizationOffset); + normalize(chain, normalizationOffset); + lzPos -= normalizationOffset; + } + + if (++cyclicPos == cyclicSize) + cyclicPos = 0; + } + + return avail; + } + + public Matches getMatches() { + matches.count = 0; + int matchLenLimit = matchLenMax; + int niceLenLimit = niceLen; + int avail = movePos(); + + if (avail < matchLenLimit) { + if (avail == 0) + return matches; + + matchLenLimit = avail; + if (niceLenLimit > avail) + niceLenLimit = avail; + } + + hash.calcHashes(buf, readPos); + int delta2 = lzPos - hash.getHash2Pos(); + int delta3 = lzPos - hash.getHash3Pos(); + int currentMatch = hash.getHash4Pos(); + hash.updateTables(lzPos); + + chain[cyclicPos] = currentMatch; + + int lenBest = 0; + + // See if the hash from the first two bytes found a match. + // The hashing algorithm guarantees that if the first byte + // matches, also the second byte does, so there's no need to + // test the second byte. + if (delta2 < cyclicSize && buf[readPos - delta2] == buf[readPos]) { + lenBest = 2; + matches.len[0] = 2; + matches.dist[0] = delta2 - 1; + matches.count = 1; + } + + // See if the hash from the first three bytes found a match that + // is different from the match possibly found by the two-byte hash. + // Also here the hashing algorithm guarantees that if the first byte + // matches, also the next two bytes do. + if (delta2 != delta3 && delta3 < cyclicSize + && buf[readPos - delta3] == buf[readPos]) { + lenBest = 3; + matches.dist[matches.count++] = delta3 - 1; + delta2 = delta3; + } + + // If a match was found, see how long it is. + if (matches.count > 0) { + while (lenBest < matchLenLimit && buf[readPos + lenBest - delta2] + == buf[readPos + lenBest]) + ++lenBest; + + matches.len[matches.count - 1] = lenBest; + + // Return if it is long enough (niceLen or reached the end of + // the dictionary). + if (lenBest >= niceLenLimit) + return matches; + } + + // Long enough match wasn't found so easily. Look for better matches + // from the hash chain. + if (lenBest < 3) + lenBest = 3; + + int depth = depthLimit; + + while (true) { + int delta = lzPos - currentMatch; + + // Return if the search depth limit has been reached or + // if the distance of the potential match exceeds the + // dictionary size. + if (depth-- == 0 || delta >= cyclicSize) + return matches; + + currentMatch = chain[cyclicPos - delta + + (delta > cyclicPos ? cyclicSize : 0)]; + + // Test the first byte and the first new byte that would give us + // a match that is at least one byte longer than lenBest. This + // too short matches get quickly skipped. + if (buf[readPos + lenBest - delta] == buf[readPos + lenBest] + && buf[readPos - delta] == buf[readPos]) { + // Calculate the length of the match. + int len = 0; + while (++len < matchLenLimit) + if (buf[readPos + len - delta] != buf[readPos + len]) + break; + + // Use the match if and only if it is better than the longest + // match found so far. + if (len > lenBest) { + lenBest = len; + matches.len[matches.count] = len; + matches.dist[matches.count] = delta - 1; + ++matches.count; + + // Return if it is long enough (niceLen or reached the + // end of the dictionary). + if (len >= niceLenLimit) + return matches; + } + } + } + } + + public void skip(int len) { + assert len >= 0; + + while (len-- > 0) { + if (movePos() != 0) { + // Update the hash chain and hash tables. + hash.calcHashes(buf, readPos); + chain[cyclicPos] = hash.getHash4Pos(); + hash.updateTables(lzPos); + } + } + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/lz/Hash234.java b/org.tukani.xz/src/org/tukaani/xz/lz/Hash234.java new file mode 100644 index 00000000..8253bc04 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/lz/Hash234.java @@ -0,0 +1,89 @@ +/* + * 2-, 3-, and 4-byte hashing + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.lz; + +final class Hash234 extends CRC32Hash { + private static final int HASH_2_SIZE = 1 << 10; + private static final int HASH_2_MASK = HASH_2_SIZE - 1; + + private static final int HASH_3_SIZE = 1 << 16; + private static final int HASH_3_MASK = HASH_3_SIZE - 1; + + private final int hash4Mask; + + private final int[] hash2Table = new int[HASH_2_SIZE]; + private final int[] hash3Table = new int[HASH_3_SIZE]; + private final int[] hash4Table; + + private int hash2Value = 0; + private int hash3Value = 0; + private int hash4Value = 0; + + static int getHash4Size(int dictSize) { + int h = dictSize - 1; + h |= h >>> 1; + h |= h >>> 2; + h |= h >>> 4; + h |= h >>> 8; + h >>>= 1; + h |= 0xFFFF; + if (h > (1 << 24)) + h >>>= 1; + + return h + 1; + } + + static int getMemoryUsage(int dictSize) { + // Sizes of the hash arrays + a little extra + return (HASH_2_SIZE + HASH_3_SIZE + getHash4Size(dictSize)) + / (1024 / 4) + 4; + } + + Hash234(int dictSize) { + hash4Table = new int[getHash4Size(dictSize)]; + hash4Mask = hash4Table.length - 1; + } + + void calcHashes(byte[] buf, int off) { + int temp = crcTable[buf[off] & 0xFF] ^ (buf[off + 1] & 0xFF); + hash2Value = temp & HASH_2_MASK; + + temp ^= (buf[off + 2] & 0xFF) << 8; + hash3Value = temp & HASH_3_MASK; + + temp ^= crcTable[buf[off + 3] & 0xFF] << 5; + hash4Value = temp & hash4Mask; + } + + int getHash2Pos() { + return hash2Table[hash2Value]; + } + + int getHash3Pos() { + return hash3Table[hash3Value]; + } + + int getHash4Pos() { + return hash4Table[hash4Value]; + } + + void updateTables(int pos) { + hash2Table[hash2Value] = pos; + hash3Table[hash3Value] = pos; + hash4Table[hash4Value] = pos; + } + + void normalize(int normalizeOffset) { + LZEncoder.normalize(hash2Table, normalizeOffset); + LZEncoder.normalize(hash3Table, normalizeOffset); + LZEncoder.normalize(hash4Table, normalizeOffset); + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/lz/LZDecoder.java b/org.tukani.xz/src/org/tukaani/xz/lz/LZDecoder.java new file mode 100644 index 00000000..680fec10 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/lz/LZDecoder.java @@ -0,0 +1,126 @@ +/* + * LZDecoder + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.lz; + +import java.io.DataInputStream; +import java.io.IOException; +import org.tukaani.xz.CorruptedInputException; + +public final class LZDecoder { + private final byte[] buf; + private int start = 0; + private int pos = 0; + private int full = 0; + private int limit = 0; + private int pendingLen = 0; + private int pendingDist = 0; + + public LZDecoder(int dictSize, byte[] presetDict) { + buf = new byte[dictSize]; + + if (presetDict != null) { + pos = Math.min(presetDict.length, dictSize); + full = pos; + start = pos; + System.arraycopy(presetDict, presetDict.length - pos, buf, 0, pos); + } + } + + public void reset() { + start = 0; + pos = 0; + full = 0; + limit = 0; + buf[buf.length - 1] = 0x00; + } + + public void setLimit(int outMax) { + if (buf.length - pos <= outMax) + limit = buf.length; + else + limit = pos + outMax; + } + + public boolean hasSpace() { + return pos < limit; + } + + public boolean hasPending() { + return pendingLen > 0; + } + + public int getPos() { + return pos; + } + + public int getByte(int dist) { + int offset = pos - dist - 1; + if (dist >= pos) + offset += buf.length; + + return buf[offset] & 0xFF; + } + + public void putByte(byte b) { + buf[pos++] = b; + + if (full < pos) + full = pos; + } + + public void repeat(int dist, int len) throws IOException { + if (dist < 0 || dist >= full) + throw new CorruptedInputException(); + + int left = Math.min(limit - pos, len); + pendingLen = len - left; + pendingDist = dist; + + int back = pos - dist - 1; + if (dist >= pos) + back += buf.length; + + do { + buf[pos++] = buf[back++]; + if (back == buf.length) + back = 0; + } while (--left > 0); + + if (full < pos) + full = pos; + } + + public void repeatPending() throws IOException { + if (pendingLen > 0) + repeat(pendingDist, pendingLen); + } + + public void copyUncompressed(DataInputStream inData, int len) + throws IOException { + int copySize = Math.min(buf.length - pos, len); + inData.readFully(buf, pos, copySize); + pos += copySize; + + if (full < pos) + full = pos; + } + + public int flush(byte[] out, int outOff) { + int copySize = pos - start; + if (pos == buf.length) + pos = 0; + + System.arraycopy(buf, start, out, outOff, copySize); + start = pos; + + return copySize; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/lz/LZEncoder.java b/org.tukani.xz/src/org/tukaani/xz/lz/LZEncoder.java new file mode 100644 index 00000000..267d7dd4 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/lz/LZEncoder.java @@ -0,0 +1,419 @@ +/* + * LZEncoder + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.lz; + +import java.io.OutputStream; +import java.io.IOException; + +public abstract class LZEncoder { + public static final int MF_HC4 = 0x04; + public static final int MF_BT4 = 0x14; + + /** + * Number of bytes to keep available before the current byte + * when moving the LZ window. + */ + private final int keepSizeBefore; + + /** + * Number of bytes that must be available, the current byte included, + * to make hasEnoughData return true. Flushing and finishing are + * naturally exceptions to this since there cannot be any data after + * the end of the uncompressed input. + */ + private final int keepSizeAfter; + + final int matchLenMax; + final int niceLen; + + final byte[] buf; + + int readPos = -1; + private int readLimit = -1; + private boolean finishing = false; + private int writePos = 0; + private int pendingSize = 0; + + static void normalize(int[] positions, int normalizationOffset) { + for (int i = 0; i < positions.length; ++i) { + if (positions[i] <= normalizationOffset) + positions[i] = 0; + else + positions[i] -= normalizationOffset; + } + } + + /** + * Gets the size of the LZ window buffer that needs to be allocated. + */ + private static int getBufSize( + int dictSize, int extraSizeBefore, int extraSizeAfter, + int matchLenMax) { + int keepSizeBefore = extraSizeBefore + dictSize; + int keepSizeAfter = extraSizeAfter + matchLenMax; + int reserveSize = Math.min(dictSize / 2 + (256 << 10), 512 << 20); + return keepSizeBefore + keepSizeAfter + reserveSize; + } + + /** + * Gets approximate memory usage of the LZEncoder base structure and + * the match finder as kibibytes. + */ + public static int getMemoryUsage( + int dictSize, int extraSizeBefore, int extraSizeAfter, + int matchLenMax, int mf) { + // Buffer size + a little extra + int m = getBufSize(dictSize, extraSizeBefore, extraSizeAfter, + matchLenMax) / 1024 + 10; + + switch (mf) { + case MF_HC4: + m += HC4.getMemoryUsage(dictSize); + break; + + case MF_BT4: + m += BT4.getMemoryUsage(dictSize); + break; + + default: + throw new IllegalArgumentException(); + } + + return m; + } + + /** + * Creates a new LZEncoder. + *

+ * @param dictSize dictionary size + * + * @param extraSizeBefore + * number of bytes to keep available in the + * history in addition to dictSize + * + * @param extraSizeAfter + * number of bytes that must be available + * after current position + matchLenMax + * + * @param niceLen if a match of at least niceLen + * bytes is found, be happy with it and don't + * stop looking for longer matches + * + * @param matchLenMax don't test for matches longer than + * matchLenMax bytes + * + * @param mf match finder ID + * + * @param depthLimit match finder search depth limit + */ + public static LZEncoder getInstance( + int dictSize, int extraSizeBefore, int extraSizeAfter, + int niceLen, int matchLenMax, int mf, int depthLimit) { + switch (mf) { + case MF_HC4: + return new HC4(dictSize, extraSizeBefore, extraSizeAfter, + niceLen, matchLenMax, depthLimit); + + case MF_BT4: + return new BT4(dictSize, extraSizeBefore, extraSizeAfter, + niceLen, matchLenMax, depthLimit); + } + + throw new IllegalArgumentException(); + } + + /** + * Creates a new LZEncoder. See getInstance. + */ + LZEncoder(int dictSize, int extraSizeBefore, int extraSizeAfter, + int niceLen, int matchLenMax) { + buf = new byte[getBufSize(dictSize, extraSizeBefore, extraSizeAfter, + matchLenMax)]; + + keepSizeBefore = extraSizeBefore + dictSize; + keepSizeAfter = extraSizeAfter + matchLenMax; + + this.matchLenMax = matchLenMax; + this.niceLen = niceLen; + } + + /** + * Sets a preset dictionary. If a preset dictionary is wanted, this + * function must be called immediately after creating the LZEncoder + * before any data has been encoded. + */ + public void setPresetDict(int dictSize, byte[] presetDict) { + assert !isStarted(); + assert writePos == 0; + + if (presetDict != null) { + // If the preset dictionary buffer is bigger than the dictionary + // size, copy only the tail of the preset dictionary. + int copySize = Math.min(presetDict.length, dictSize); + int offset = presetDict.length - copySize; + System.arraycopy(presetDict, offset, buf, 0, copySize); + writePos += copySize; + skip(copySize); + } + } + + /** + * Moves data from the end of the buffer to the beginning, discarding + * old data and making space for new input. + */ + private void moveWindow() { + // Align the move to a multiple of 16 bytes. LZMA2 needs this + // because it uses the lowest bits from readPos to get the + // alignment of the uncompressed data. + int moveOffset = (readPos + 1 - keepSizeBefore) & ~15; + int moveSize = writePos - moveOffset; + System.arraycopy(buf, moveOffset, buf, 0, moveSize); + + readPos -= moveOffset; + readLimit -= moveOffset; + writePos -= moveOffset; + } + + /** + * Copies new data into the LZEncoder's buffer. + */ + public int fillWindow(byte[] in, int off, int len) { + assert !finishing; + + // Move the sliding window if needed. + if (readPos >= buf.length - keepSizeAfter) + moveWindow(); + + // Try to fill the dictionary buffer. If it becomes full, + // some of the input bytes may be left unused. + if (len > buf.length - writePos) + len = buf.length - writePos; + + System.arraycopy(in, off, buf, writePos, len); + writePos += len; + + // Set the new readLimit but only if there's enough data to allow + // encoding of at least one more byte. + if (writePos >= keepSizeAfter) + readLimit = writePos - keepSizeAfter; + + processPendingBytes(); + + // Tell the caller how much input we actually copied into + // the dictionary. + return len; + } + + /** + * Process pending bytes remaining from preset dictionary initialization + * or encoder flush operation. + */ + private void processPendingBytes() { + // After flushing or setting a preset dictionary there will be + // pending data that hasn't been ran through the match finder yet. + // Run it through the match finder now if there is enough new data + // available (readPos < readLimit) that the encoder may encode at + // least one more input byte. This way we don't waste any time + // looping in the match finder (and marking the same bytes as + // pending again) if the application provides very little new data + // per write call. + if (pendingSize > 0 && readPos < readLimit) { + readPos -= pendingSize; + int oldPendingSize = pendingSize; + pendingSize = 0; + skip(oldPendingSize); + assert pendingSize < oldPendingSize; + } + } + + /** + * Returns true if at least one byte has already been run through + * the match finder. + */ + public boolean isStarted() { + return readPos != -1; + } + + /** + * Marks that all the input needs to be made available in + * the encoded output. + */ + public void setFlushing() { + readLimit = writePos - 1; + processPendingBytes(); + } + + /** + * Marks that there is no more input remaining. The read position + * can be advanced until the end of the data. + */ + public void setFinishing() { + readLimit = writePos - 1; + finishing = true; + processPendingBytes(); + } + + /** + * Tests if there is enough input available to let the caller encode + * at least one more byte. + */ + public boolean hasEnoughData(int alreadyReadLen) { + return readPos - alreadyReadLen < readLimit; + } + + public void copyUncompressed(OutputStream out, int backward, int len) + throws IOException { + out.write(buf, readPos + 1 - backward, len); + } + + /** + * Get the number of bytes available, including the current byte. + *

+ * Note that the result is undefined if getMatches or + * skip hasn't been called yet and no preset dictionary + * is being used. + */ + public int getAvail() { + assert isStarted(); + return writePos - readPos; + } + + /** + * Gets the lowest four bits of the absolute offset of the current byte. + * Bits other than the lowest four are undefined. + */ + public int getPos() { + return readPos; + } + + /** + * Gets the byte from the given backward offset. + *

+ * The current byte is at 0, the previous byte + * at 1 etc. To get a byte at zero-based distance, + * use getByte(dist + 1). + *

+ * This function is equivalent to getByte(0, backward). + */ + public int getByte(int backward) { + return buf[readPos - backward] & 0xFF; + } + + /** + * Gets the byte from the given forward minus backward offset. + * The forward offset is added to the current position. This lets + * one read bytes ahead of the current byte. + */ + public int getByte(int forward, int backward) { + return buf[readPos + forward - backward] & 0xFF; + } + + /** + * Get the length of a match at the given distance. + * + * @param dist zero-based distance of the match to test + * @param lenLimit don't test for a match longer than this + * + * @return length of the match; it is in the range [0, lenLimit] + */ + public int getMatchLen(int dist, int lenLimit) { + int backPos = readPos - dist - 1; + int len = 0; + + while (len < lenLimit && buf[readPos + len] == buf[backPos + len]) + ++len; + + return len; + } + + /** + * Get the length of a match at the given distance and forward offset. + * + * @param forward forward offset + * @param dist zero-based distance of the match to test + * @param lenLimit don't test for a match longer than this + * + * @return length of the match; it is in the range [0, lenLimit] + */ + public int getMatchLen(int forward, int dist, int lenLimit) { + int curPos = readPos + forward; + int backPos = curPos - dist - 1; + int len = 0; + + while (len < lenLimit && buf[curPos + len] == buf[backPos + len]) + ++len; + + return len; + } + + /** + * Verifies that the matches returned by the match finder are valid. + * This is meant to be used in an assert statement. This is totally + * useless for actual encoding since match finder's results should + * naturally always be valid if it isn't broken. + * + * @param matches return value from getMatches + * + * @return true if matches are valid, false if match finder is broken + */ + public boolean verifyMatches(Matches matches) { + int lenLimit = Math.min(getAvail(), matchLenMax); + + for (int i = 0; i < matches.count; ++i) + if (getMatchLen(matches.dist[i], lenLimit) != matches.len[i]) + return false; + + return true; + } + + /** + * Moves to the next byte, checks if there is enough input available, + * and returns the amount of input available. + * + * @param requiredForFlushing + * minimum number of available bytes when + * flushing; encoding may be continued with + * new input after flushing + * @param requiredForFinishing + * minimum number of available bytes when + * finishing; encoding must not be continued + * after finishing or the match finder state + * may be corrupt + * + * @return the number of bytes available or zero if there + * is not enough input available + */ + int movePos(int requiredForFlushing, int requiredForFinishing) { + assert requiredForFlushing >= requiredForFinishing; + + ++readPos; + int avail = writePos - readPos; + + if (avail < requiredForFlushing) { + if (avail < requiredForFinishing || !finishing) { + ++pendingSize; + avail = 0; + } + } + + return avail; + } + + /** + * Runs match finder for the next byte and returns the matches found. + */ + public abstract Matches getMatches(); + + /** + * Skips the given number of bytes in the match finder. + */ + public abstract void skip(int len); +} diff --git a/org.tukani.xz/src/org/tukaani/xz/lz/Matches.java b/org.tukani.xz/src/org/tukaani/xz/lz/Matches.java new file mode 100644 index 00000000..2fbee11b --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/lz/Matches.java @@ -0,0 +1,22 @@ +/* + * Matches + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.lz; + +public final class Matches { + public final int[] len; + public final int[] dist; + public int count = 0; + + Matches(int countMax) { + len = new int[countMax]; + dist = new int[countMax]; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/lzma/LZMACoder.java b/org.tukani.xz/src/org/tukaani/xz/lzma/LZMACoder.java new file mode 100644 index 00000000..c31c9a6c --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/lzma/LZMACoder.java @@ -0,0 +1,140 @@ +/* + * LZMACoder + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.lzma; + +import org.tukaani.xz.rangecoder.RangeCoder; + +abstract class LZMACoder { + static final int POS_STATES_MAX = 1 << 4; + + static final int MATCH_LEN_MIN = 2; + static final int MATCH_LEN_MAX = MATCH_LEN_MIN + LengthCoder.LOW_SYMBOLS + + LengthCoder.MID_SYMBOLS + + LengthCoder.HIGH_SYMBOLS - 1; + + static final int DIST_STATES = 4; + static final int DIST_SLOTS = 1 << 6; + static final int DIST_MODEL_START = 4; + static final int DIST_MODEL_END = 14; + static final int FULL_DISTANCES = 1 << (DIST_MODEL_END / 2); + + static final int ALIGN_BITS = 4; + static final int ALIGN_SIZE = 1 << ALIGN_BITS; + static final int ALIGN_MASK = ALIGN_SIZE - 1; + + static final int REPS = 4; + + final int posMask; + + final int[] reps = new int[REPS]; + final State state = new State(); + + final short[][] isMatch = new short[State.STATES][POS_STATES_MAX]; + final short[] isRep = new short[State.STATES]; + final short[] isRep0 = new short[State.STATES]; + final short[] isRep1 = new short[State.STATES]; + final short[] isRep2 = new short[State.STATES]; + final short[][] isRep0Long = new short[State.STATES][POS_STATES_MAX]; + final short[][] distSlots = new short[DIST_STATES][DIST_SLOTS]; + final short[][] distSpecial = { new short[2], new short[2], + new short[4], new short[4], + new short[8], new short[8], + new short[16], new short[16], + new short[32], new short[32] }; + final short[] distAlign = new short[ALIGN_SIZE]; + + static final int getDistState(int len) { + return len < DIST_STATES + MATCH_LEN_MIN + ? len - MATCH_LEN_MIN + : DIST_STATES - 1; + } + + LZMACoder(int pb) { + posMask = (1 << pb) - 1; + } + + void reset() { + reps[0] = 0; + reps[1] = 0; + reps[2] = 0; + reps[3] = 0; + state.reset(); + + for (int i = 0; i < isMatch.length; ++i) + RangeCoder.initProbs(isMatch[i]); + + RangeCoder.initProbs(isRep); + RangeCoder.initProbs(isRep0); + RangeCoder.initProbs(isRep1); + RangeCoder.initProbs(isRep2); + + for (int i = 0; i < isRep0Long.length; ++i) + RangeCoder.initProbs(isRep0Long[i]); + + for (int i = 0; i < distSlots.length; ++i) + RangeCoder.initProbs(distSlots[i]); + + for (int i = 0; i < distSpecial.length; ++i) + RangeCoder.initProbs(distSpecial[i]); + + RangeCoder.initProbs(distAlign); + } + + + abstract class LiteralCoder { + private final int lc; + private final int literalPosMask; + + LiteralCoder(int lc, int lp) { + this.lc = lc; + this.literalPosMask = (1 << lp) - 1; + } + + final int getSubcoderIndex(int prevByte, int pos) { + int low = prevByte >> (8 - lc); + int high = (pos & literalPosMask) << lc; + return low + high; + } + + + abstract class LiteralSubcoder { + final short[] probs = new short[0x300]; + + void reset() { + RangeCoder.initProbs(probs); + } + } + } + + + abstract class LengthCoder { + static final int LOW_SYMBOLS = 1 << 3; + static final int MID_SYMBOLS = 1 << 3; + static final int HIGH_SYMBOLS = 1 << 8; + + final short[] choice = new short[2]; + final short[][] low = new short[POS_STATES_MAX][LOW_SYMBOLS]; + final short[][] mid = new short[POS_STATES_MAX][MID_SYMBOLS]; + final short[] high = new short[HIGH_SYMBOLS]; + + void reset() { + RangeCoder.initProbs(choice); + + for (int i = 0; i < low.length; ++i) + RangeCoder.initProbs(low[i]); + + for (int i = 0; i < low.length; ++i) + RangeCoder.initProbs(mid[i]); + + RangeCoder.initProbs(high); + } + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/lzma/LZMADecoder.java b/org.tukani.xz/src/org/tukaani/xz/lzma/LZMADecoder.java new file mode 100644 index 00000000..8eb4ac2b --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/lzma/LZMADecoder.java @@ -0,0 +1,199 @@ +/* + * LZMADecoder + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.lzma; + +import java.io.IOException; +import org.tukaani.xz.lz.LZDecoder; +import org.tukaani.xz.rangecoder.RangeDecoder; + +public final class LZMADecoder extends LZMACoder { + private final LZDecoder lz; + private final RangeDecoder rc; + private final LiteralDecoder literalDecoder; + private final LengthDecoder matchLenDecoder = new LengthDecoder(); + private final LengthDecoder repLenDecoder = new LengthDecoder(); + + public LZMADecoder(LZDecoder lz, RangeDecoder rc, int lc, int lp, int pb) { + super(pb); + this.lz = lz; + this.rc = rc; + this.literalDecoder = new LiteralDecoder(lc, lp); + reset(); + } + + public void reset() { + super.reset(); + literalDecoder.reset(); + matchLenDecoder.reset(); + repLenDecoder.reset(); + } + + /** + * Returns true if LZMA end marker was detected. It is encoded as + * the maximum match distance which with signed ints becomes -1. This + * function is needed only for LZMA1. LZMA2 doesn't use the end marker + * in the LZMA layer. + */ + public boolean endMarkerDetected() { + return reps[0] == -1; + } + + public void decode() throws IOException { + lz.repeatPending(); + + while (lz.hasSpace()) { + int posState = lz.getPos() & posMask; + + if (rc.decodeBit(isMatch[state.get()], posState) == 0) { + literalDecoder.decode(); + } else { + int len = rc.decodeBit(isRep, state.get()) == 0 + ? decodeMatch(posState) + : decodeRepMatch(posState); + + // NOTE: With LZMA1 streams that have the end marker, + // this will throw CorruptedInputException. LZMAInputStream + // handles it specially. + lz.repeat(reps[0], len); + } + } + + rc.normalize(); + } + + private int decodeMatch(int posState) throws IOException { + state.updateMatch(); + + reps[3] = reps[2]; + reps[2] = reps[1]; + reps[1] = reps[0]; + + int len = matchLenDecoder.decode(posState); + int distSlot = rc.decodeBitTree(distSlots[getDistState(len)]); + + if (distSlot < DIST_MODEL_START) { + reps[0] = distSlot; + } else { + int limit = (distSlot >> 1) - 1; + reps[0] = (2 | (distSlot & 1)) << limit; + + if (distSlot < DIST_MODEL_END) { + reps[0] |= rc.decodeReverseBitTree( + distSpecial[distSlot - DIST_MODEL_START]); + } else { + reps[0] |= rc.decodeDirectBits(limit - ALIGN_BITS) + << ALIGN_BITS; + reps[0] |= rc.decodeReverseBitTree(distAlign); + } + } + + return len; + } + + private int decodeRepMatch(int posState) throws IOException { + if (rc.decodeBit(isRep0, state.get()) == 0) { + if (rc.decodeBit(isRep0Long[state.get()], posState) == 0) { + state.updateShortRep(); + return 1; + } + } else { + int tmp; + + if (rc.decodeBit(isRep1, state.get()) == 0) { + tmp = reps[1]; + } else { + if (rc.decodeBit(isRep2, state.get()) == 0) { + tmp = reps[2]; + } else { + tmp = reps[3]; + reps[3] = reps[2]; + } + + reps[2] = reps[1]; + } + + reps[1] = reps[0]; + reps[0] = tmp; + } + + state.updateLongRep(); + + return repLenDecoder.decode(posState); + } + + + private class LiteralDecoder extends LiteralCoder { + LiteralSubdecoder[] subdecoders; + + LiteralDecoder(int lc, int lp) { + super(lc, lp); + + subdecoders = new LiteralSubdecoder[1 << (lc + lp)]; + for (int i = 0; i < subdecoders.length; ++i) + subdecoders[i] = new LiteralSubdecoder(); + } + + void reset() { + for (int i = 0; i < subdecoders.length; ++i) + subdecoders[i].reset(); + } + + void decode() throws IOException { + int i = getSubcoderIndex(lz.getByte(0), lz.getPos()); + subdecoders[i].decode(); + } + + + private class LiteralSubdecoder extends LiteralSubcoder { + void decode() throws IOException { + int symbol = 1; + + if (state.isLiteral()) { + do { + symbol = (symbol << 1) | rc.decodeBit(probs, symbol); + } while (symbol < 0x100); + + } else { + int matchByte = lz.getByte(reps[0]); + int offset = 0x100; + int matchBit; + int bit; + + do { + matchByte <<= 1; + matchBit = matchByte & offset; + bit = rc.decodeBit(probs, offset + matchBit + symbol); + symbol = (symbol << 1) | bit; + offset &= (0 - bit) ^ ~matchBit; + } while (symbol < 0x100); + } + + lz.putByte((byte)symbol); + state.updateLiteral(); + } + } + } + + + private class LengthDecoder extends LengthCoder { + int decode(int posState) throws IOException { + if (rc.decodeBit(choice, 0) == 0) + return rc.decodeBitTree(low[posState]) + MATCH_LEN_MIN; + + if (rc.decodeBit(choice, 1) == 0) + return rc.decodeBitTree(mid[posState]) + + MATCH_LEN_MIN + LOW_SYMBOLS; + + return rc.decodeBitTree(high) + + MATCH_LEN_MIN + LOW_SYMBOLS + MID_SYMBOLS; + } + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/lzma/LZMAEncoder.java b/org.tukani.xz/src/org/tukaani/xz/lzma/LZMAEncoder.java new file mode 100644 index 00000000..95b0004f --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/lzma/LZMAEncoder.java @@ -0,0 +1,711 @@ +/* + * LZMAEncoder + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.lzma; + +import org.tukaani.xz.lz.LZEncoder; +import org.tukaani.xz.lz.Matches; +import org.tukaani.xz.rangecoder.RangeEncoder; + +public abstract class LZMAEncoder extends LZMACoder { + public static final int MODE_FAST = 1; + public static final int MODE_NORMAL = 2; + + /** + * LZMA2 chunk is considered full when its uncompressed size exceeds + * LZMA2_UNCOMPRESSED_LIMIT. + *

+ * A compressed LZMA2 chunk can hold 2 MiB of uncompressed data. + * A single LZMA symbol may indicate up to MATCH_LEN_MAX bytes + * of data, so the LZMA2 chunk is considered full when there is + * less space than MATCH_LEN_MAX bytes. + */ + private static final int LZMA2_UNCOMPRESSED_LIMIT + = (2 << 20) - MATCH_LEN_MAX; + + /** + * LZMA2 chunk is considered full when its compressed size exceeds + * LZMA2_COMPRESSED_LIMIT. + *

+ * The maximum compressed size of a LZMA2 chunk is 64 KiB. + * A single LZMA symbol might use 20 bytes of space even though + * it usually takes just one byte or so. Two more bytes are needed + * for LZMA2 uncompressed chunks (see LZMA2OutputStream.writeChunk). + * Leave a little safety margin and use 26 bytes. + */ + private static final int LZMA2_COMPRESSED_LIMIT = (64 << 10) - 26; + + private static final int DIST_PRICE_UPDATE_INTERVAL = FULL_DISTANCES; + private static final int ALIGN_PRICE_UPDATE_INTERVAL = ALIGN_SIZE; + + private final RangeEncoder rc; + final LZEncoder lz; + final LiteralEncoder literalEncoder; + final LengthEncoder matchLenEncoder; + final LengthEncoder repLenEncoder; + final int niceLen; + + private int distPriceCount = 0; + private int alignPriceCount = 0; + + private final int distSlotPricesSize; + private final int[][] distSlotPrices; + private final int[][] fullDistPrices + = new int[DIST_STATES][FULL_DISTANCES]; + private final int[] alignPrices = new int[ALIGN_SIZE]; + + int back = 0; + int readAhead = -1; + private int uncompressedSize = 0; + + public static int getMemoryUsage(int mode, int dictSize, + int extraSizeBefore, int mf) { + int m = 80; + + switch (mode) { + case MODE_FAST: + m += LZMAEncoderFast.getMemoryUsage( + dictSize, extraSizeBefore, mf); + break; + + case MODE_NORMAL: + m += LZMAEncoderNormal.getMemoryUsage( + dictSize, extraSizeBefore, mf); + break; + + default: + throw new IllegalArgumentException(); + } + + return m; + } + + public static LZMAEncoder getInstance( + RangeEncoder rc, int lc, int lp, int pb, int mode, + int dictSize, int extraSizeBefore, + int niceLen, int mf, int depthLimit) { + switch (mode) { + case MODE_FAST: + return new LZMAEncoderFast(rc, lc, lp, pb, + dictSize, extraSizeBefore, + niceLen, mf, depthLimit); + + case MODE_NORMAL: + return new LZMAEncoderNormal(rc, lc, lp, pb, + dictSize, extraSizeBefore, + niceLen, mf, depthLimit); + } + + throw new IllegalArgumentException(); + } + + /** + * Gets an integer [0, 63] matching the highest two bits of an integer. + * This is like bit scan reverse (BSR) on x86 except that this also + * cares about the second highest bit. + */ + public static int getDistSlot(int dist) { + if (dist <= DIST_MODEL_START) + return dist; + + int n = dist; + int i = 31; + + if ((n & 0xFFFF0000) == 0) { + n <<= 16; + i = 15; + } + + if ((n & 0xFF000000) == 0) { + n <<= 8; + i -= 8; + } + + if ((n & 0xF0000000) == 0) { + n <<= 4; + i -= 4; + } + + if ((n & 0xC0000000) == 0) { + n <<= 2; + i -= 2; + } + + if ((n & 0x80000000) == 0) + --i; + + return (i << 1) + ((dist >>> (i - 1)) & 1); + } + + /** + * Gets the next LZMA symbol. + *

+ * There are three types of symbols: literal (a single byte), + * repeated match, and normal match. The symbol is indicated + * by the return value and by the variable back. + *

+ * Literal: back == -1 and return value is 1. + * The literal itself needs to be read from lz separately. + *

+ * Repeated match: back is in the range [0, 3] and + * the return value is the length of the repeated match. + *

+ * Normal match: back - REPS (back - 4) + * is the distance of the match and the return value is the length + * of the match. + */ + abstract int getNextSymbol(); + + LZMAEncoder(RangeEncoder rc, LZEncoder lz, + int lc, int lp, int pb, int dictSize, int niceLen) { + super(pb); + this.rc = rc; + this.lz = lz; + this.niceLen = niceLen; + + literalEncoder = new LiteralEncoder(lc, lp); + matchLenEncoder = new LengthEncoder(pb, niceLen); + repLenEncoder = new LengthEncoder(pb, niceLen); + + distSlotPricesSize = getDistSlot(dictSize - 1) + 1; + distSlotPrices = new int[DIST_STATES][distSlotPricesSize]; + + reset(); + } + + public LZEncoder getLZEncoder() { + return lz; + } + + public void reset() { + super.reset(); + literalEncoder.reset(); + matchLenEncoder.reset(); + repLenEncoder.reset(); + distPriceCount = 0; + alignPriceCount = 0; + + uncompressedSize += readAhead + 1; + readAhead = -1; + } + + public int getUncompressedSize() { + return uncompressedSize; + } + + public void resetUncompressedSize() { + uncompressedSize = 0; + } + + /** + * Compresses for LZMA2. + * + * @return true if the LZMA2 chunk became full, false otherwise + */ + public boolean encodeForLZMA2() { + if (!lz.isStarted() && !encodeInit()) + return false; + + while (uncompressedSize <= LZMA2_UNCOMPRESSED_LIMIT + && rc.getPendingSize() <= LZMA2_COMPRESSED_LIMIT) + if (!encodeSymbol()) + return false; + + return true; + } + + private boolean encodeInit() { + assert readAhead == -1; + if (!lz.hasEnoughData(0)) + return false; + + // The first symbol must be a literal unless using + // a preset dictionary. This code isn't run if using + // a preset dictionary. + skip(1); + rc.encodeBit(isMatch[state.get()], 0, 0); + literalEncoder.encodeInit(); + + --readAhead; + assert readAhead == -1; + + ++uncompressedSize; + assert uncompressedSize == 1; + + return true; + } + + private boolean encodeSymbol() { + if (!lz.hasEnoughData(readAhead + 1)) + return false; + + int len = getNextSymbol(); + + assert readAhead >= 0; + int posState = (lz.getPos() - readAhead) & posMask; + + if (back == -1) { + // Literal i.e. eight-bit byte + assert len == 1; + rc.encodeBit(isMatch[state.get()], posState, 0); + literalEncoder.encode(); + } else { + // Some type of match + rc.encodeBit(isMatch[state.get()], posState, 1); + if (back < REPS) { + // Repeated match i.e. the same distance + // has been used earlier. + assert lz.getMatchLen(-readAhead, reps[back], len) == len; + rc.encodeBit(isRep, state.get(), 1); + encodeRepMatch(back, len, posState); + } else { + // Normal match + assert lz.getMatchLen(-readAhead, back - REPS, len) == len; + rc.encodeBit(isRep, state.get(), 0); + encodeMatch(back - REPS, len, posState); + } + } + + readAhead -= len; + uncompressedSize += len; + + return true; + } + + private void encodeMatch(int dist, int len, int posState) { + state.updateMatch(); + matchLenEncoder.encode(len, posState); + + int distSlot = getDistSlot(dist); + rc.encodeBitTree(distSlots[getDistState(len)], distSlot); + + if (distSlot >= DIST_MODEL_START) { + int footerBits = (distSlot >>> 1) - 1; + int base = (2 | (distSlot & 1)) << footerBits; + int distReduced = dist - base; + + if (distSlot < DIST_MODEL_END) { + rc.encodeReverseBitTree( + distSpecial[distSlot - DIST_MODEL_START], + distReduced); + } else { + rc.encodeDirectBits(distReduced >>> ALIGN_BITS, + footerBits - ALIGN_BITS); + rc.encodeReverseBitTree(distAlign, distReduced & ALIGN_MASK); + --alignPriceCount; + } + } + + reps[3] = reps[2]; + reps[2] = reps[1]; + reps[1] = reps[0]; + reps[0] = dist; + + --distPriceCount; + } + + private void encodeRepMatch(int rep, int len, int posState) { + if (rep == 0) { + rc.encodeBit(isRep0, state.get(), 0); + rc.encodeBit(isRep0Long[state.get()], posState, len == 1 ? 0 : 1); + } else { + int dist = reps[rep]; + rc.encodeBit(isRep0, state.get(), 1); + + if (rep == 1) { + rc.encodeBit(isRep1, state.get(), 0); + } else { + rc.encodeBit(isRep1, state.get(), 1); + rc.encodeBit(isRep2, state.get(), rep - 2); + + if (rep == 3) + reps[3] = reps[2]; + + reps[2] = reps[1]; + } + + reps[1] = reps[0]; + reps[0] = dist; + } + + if (len == 1) { + state.updateShortRep(); + } else { + repLenEncoder.encode(len, posState); + state.updateLongRep(); + } + } + + Matches getMatches() { + ++readAhead; + Matches matches = lz.getMatches(); + assert lz.verifyMatches(matches); + return matches; + } + + void skip(int len) { + readAhead += len; + lz.skip(len); + } + + int getAnyMatchPrice(State state, int posState) { + return RangeEncoder.getBitPrice(isMatch[state.get()][posState], 1); + } + + int getNormalMatchPrice(int anyMatchPrice, State state) { + return anyMatchPrice + + RangeEncoder.getBitPrice(isRep[state.get()], 0); + } + + int getAnyRepPrice(int anyMatchPrice, State state) { + return anyMatchPrice + + RangeEncoder.getBitPrice(isRep[state.get()], 1); + } + + int getShortRepPrice(int anyRepPrice, State state, int posState) { + return anyRepPrice + + RangeEncoder.getBitPrice(isRep0[state.get()], 0) + + RangeEncoder.getBitPrice(isRep0Long[state.get()][posState], + 0); + } + + int getLongRepPrice(int anyRepPrice, int rep, State state, int posState) { + int price = anyRepPrice; + + if (rep == 0) { + price += RangeEncoder.getBitPrice(isRep0[state.get()], 0) + + RangeEncoder.getBitPrice( + isRep0Long[state.get()][posState], 1); + } else { + price += RangeEncoder.getBitPrice(isRep0[state.get()], 1); + + if (rep == 1) + price += RangeEncoder.getBitPrice(isRep1[state.get()], 0); + else + price += RangeEncoder.getBitPrice(isRep1[state.get()], 1) + + RangeEncoder.getBitPrice(isRep2[state.get()], + rep - 2); + } + + return price; + } + + int getLongRepAndLenPrice(int rep, int len, State state, int posState) { + int anyMatchPrice = getAnyMatchPrice(state, posState); + int anyRepPrice = getAnyRepPrice(anyMatchPrice, state); + int longRepPrice = getLongRepPrice(anyRepPrice, rep, state, posState); + return longRepPrice + repLenEncoder.getPrice(len, posState); + } + + int getMatchAndLenPrice(int normalMatchPrice, + int dist, int len, int posState) { + int price = normalMatchPrice + + matchLenEncoder.getPrice(len, posState); + int distState = getDistState(len); + + if (dist < FULL_DISTANCES) { + price += fullDistPrices[distState][dist]; + } else { + // Note that distSlotPrices includes also + // the price of direct bits. + int distSlot = getDistSlot(dist); + price += distSlotPrices[distState][distSlot] + + alignPrices[dist & ALIGN_MASK]; + } + + return price; + } + + private void updateDistPrices() { + distPriceCount = DIST_PRICE_UPDATE_INTERVAL; + + for (int distState = 0; distState < DIST_STATES; ++distState) { + for (int distSlot = 0; distSlot < distSlotPricesSize; ++distSlot) + distSlotPrices[distState][distSlot] + = RangeEncoder.getBitTreePrice( + distSlots[distState], distSlot); + + for (int distSlot = DIST_MODEL_END; distSlot < distSlotPricesSize; + ++distSlot) { + int count = (distSlot >>> 1) - 1 - ALIGN_BITS; + distSlotPrices[distState][distSlot] + += RangeEncoder.getDirectBitsPrice(count); + } + + for (int dist = 0; dist < DIST_MODEL_START; ++dist) + fullDistPrices[distState][dist] + = distSlotPrices[distState][dist]; + } + + int dist = DIST_MODEL_START; + for (int distSlot = DIST_MODEL_START; distSlot < DIST_MODEL_END; + ++distSlot) { + int footerBits = (distSlot >>> 1) - 1; + int base = (2 | (distSlot & 1)) << footerBits; + + int limit = distSpecial[distSlot - DIST_MODEL_START].length; + for (int i = 0; i < limit; ++i) { + int distReduced = dist - base; + int price = RangeEncoder.getReverseBitTreePrice( + distSpecial[distSlot - DIST_MODEL_START], + distReduced); + + for (int distState = 0; distState < DIST_STATES; ++distState) + fullDistPrices[distState][dist] + = distSlotPrices[distState][distSlot] + price; + + ++dist; + } + } + + assert dist == FULL_DISTANCES; + } + + private void updateAlignPrices() { + alignPriceCount = ALIGN_PRICE_UPDATE_INTERVAL; + + for (int i = 0; i < ALIGN_SIZE; ++i) + alignPrices[i] = RangeEncoder.getReverseBitTreePrice(distAlign, + i); + } + + /** + * Updates the lookup tables used for calculating match distance + * and length prices. The updating is skipped for performance reasons + * if the tables haven't changed much since the previous update. + */ + void updatePrices() { + if (distPriceCount <= 0) + updateDistPrices(); + + if (alignPriceCount <= 0) + updateAlignPrices(); + + matchLenEncoder.updatePrices(); + repLenEncoder.updatePrices(); + } + + + class LiteralEncoder extends LiteralCoder { + LiteralSubencoder[] subencoders; + + LiteralEncoder(int lc, int lp) { + super(lc, lp); + + subencoders = new LiteralSubencoder[1 << (lc + lp)]; + for (int i = 0; i < subencoders.length; ++i) + subencoders[i] = new LiteralSubencoder(); + } + + void reset() { + for (int i = 0; i < subencoders.length; ++i) + subencoders[i].reset(); + } + + void encodeInit() { + // When encoding the first byte of the stream, there is + // no previous byte in the dictionary so the encode function + // wouldn't work. + assert readAhead >= 0; + subencoders[0].encode(); + } + + void encode() { + assert readAhead >= 0; + int i = getSubcoderIndex(lz.getByte(1 + readAhead), + lz.getPos() - readAhead); + subencoders[i].encode(); + } + + int getPrice(int curByte, int matchByte, + int prevByte, int pos, State state) { + int price = RangeEncoder.getBitPrice( + isMatch[state.get()][pos & posMask], 0); + + int i = getSubcoderIndex(prevByte, pos); + price += state.isLiteral() + ? subencoders[i].getNormalPrice(curByte) + : subencoders[i].getMatchedPrice(curByte, matchByte); + + return price; + } + + private class LiteralSubencoder extends LiteralSubcoder { + void encode() { + int symbol = lz.getByte(readAhead) | 0x100; + + if (state.isLiteral()) { + int subencoderIndex; + int bit; + + do { + subencoderIndex = symbol >>> 8; + bit = (symbol >>> 7) & 1; + rc.encodeBit(probs, subencoderIndex, bit); + symbol <<= 1; + } while (symbol < 0x10000); + + } else { + int matchByte = lz.getByte(reps[0] + 1 + readAhead); + int offset = 0x100; + int subencoderIndex; + int matchBit; + int bit; + + do { + matchByte <<= 1; + matchBit = matchByte & offset; + subencoderIndex = offset + matchBit + (symbol >>> 8); + bit = (symbol >>> 7) & 1; + rc.encodeBit(probs, subencoderIndex, bit); + symbol <<= 1; + offset &= ~(matchByte ^ symbol); + } while (symbol < 0x10000); + } + + state.updateLiteral(); + } + + int getNormalPrice(int symbol) { + int price = 0; + int subencoderIndex; + int bit; + + symbol |= 0x100; + + do { + subencoderIndex = symbol >>> 8; + bit = (symbol >>> 7) & 1; + price += RangeEncoder.getBitPrice(probs[subencoderIndex], + bit); + symbol <<= 1; + } while (symbol < (0x100 << 8)); + + return price; + } + + int getMatchedPrice(int symbol, int matchByte) { + int price = 0; + int offset = 0x100; + int subencoderIndex; + int matchBit; + int bit; + + symbol |= 0x100; + + do { + matchByte <<= 1; + matchBit = matchByte & offset; + subencoderIndex = offset + matchBit + (symbol >>> 8); + bit = (symbol >>> 7) & 1; + price += RangeEncoder.getBitPrice(probs[subencoderIndex], + bit); + symbol <<= 1; + offset &= ~(matchByte ^ symbol); + } while (symbol < (0x100 << 8)); + + return price; + } + } + } + + + class LengthEncoder extends LengthCoder { + /** + * The prices are updated after at least + * PRICE_UPDATE_INTERVAL many lengths + * have been encoded with the same posState. + */ + private static final int PRICE_UPDATE_INTERVAL = 32; // FIXME? + + private final int[] counters; + private final int[][] prices; + + LengthEncoder(int pb, int niceLen) { + int posStates = 1 << pb; + counters = new int[posStates]; + + // Always allocate at least LOW_SYMBOLS + MID_SYMBOLS because + // it makes updatePrices slightly simpler. The prices aren't + // usually needed anyway if niceLen < 18. + int lenSymbols = Math.max(niceLen - MATCH_LEN_MIN + 1, + LOW_SYMBOLS + MID_SYMBOLS); + prices = new int[posStates][lenSymbols]; + } + + void reset() { + super.reset(); + + // Reset counters to zero to force price update before + // the prices are needed. + for (int i = 0; i < counters.length; ++i) + counters[i] = 0; + } + + void encode(int len, int posState) { + len -= MATCH_LEN_MIN; + + if (len < LOW_SYMBOLS) { + rc.encodeBit(choice, 0, 0); + rc.encodeBitTree(low[posState], len); + } else { + rc.encodeBit(choice, 0, 1); + len -= LOW_SYMBOLS; + + if (len < MID_SYMBOLS) { + rc.encodeBit(choice, 1, 0); + rc.encodeBitTree(mid[posState], len); + } else { + rc.encodeBit(choice, 1, 1); + rc.encodeBitTree(high, len - MID_SYMBOLS); + } + } + + --counters[posState]; + } + + int getPrice(int len, int posState) { + return prices[posState][len - MATCH_LEN_MIN]; + } + + void updatePrices() { + for (int posState = 0; posState < counters.length; ++posState) { + if (counters[posState] <= 0) { + counters[posState] = PRICE_UPDATE_INTERVAL; + updatePrices(posState); + } + } + } + + private void updatePrices(int posState) { + int choice0Price = RangeEncoder.getBitPrice(choice[0], 0); + + int i = 0; + for (; i < LOW_SYMBOLS; ++i) + prices[posState][i] = choice0Price + + RangeEncoder.getBitTreePrice(low[posState], i); + + choice0Price = RangeEncoder.getBitPrice(choice[0], 1); + int choice1Price = RangeEncoder.getBitPrice(choice[1], 0); + + for (; i < LOW_SYMBOLS + MID_SYMBOLS; ++i) + prices[posState][i] = choice0Price + choice1Price + + RangeEncoder.getBitTreePrice(mid[posState], + i - LOW_SYMBOLS); + + choice1Price = RangeEncoder.getBitPrice(choice[1], 1); + + for (; i < prices[posState].length; ++i) + prices[posState][i] = choice0Price + choice1Price + + RangeEncoder.getBitTreePrice(high, i - LOW_SYMBOLS + - MID_SYMBOLS); + } + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/lzma/LZMAEncoderFast.java b/org.tukani.xz/src/org/tukaani/xz/lzma/LZMAEncoderFast.java new file mode 100644 index 00000000..ca6e13ac --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/lzma/LZMAEncoderFast.java @@ -0,0 +1,151 @@ +/* + * LZMAEncoderFast + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.lzma; + +import org.tukaani.xz.lz.LZEncoder; +import org.tukaani.xz.lz.Matches; +import org.tukaani.xz.rangecoder.RangeEncoder; + +final class LZMAEncoderFast extends LZMAEncoder { + private static int EXTRA_SIZE_BEFORE = 1; + private static int EXTRA_SIZE_AFTER = MATCH_LEN_MAX - 1; + + private Matches matches = null; + + static int getMemoryUsage(int dictSize, int extraSizeBefore, int mf) { + return LZEncoder.getMemoryUsage( + dictSize, Math.max(extraSizeBefore, EXTRA_SIZE_BEFORE), + EXTRA_SIZE_AFTER, MATCH_LEN_MAX, mf); + } + + LZMAEncoderFast(RangeEncoder rc, int lc, int lp, int pb, + int dictSize, int extraSizeBefore, + int niceLen, int mf, int depthLimit) { + super(rc, LZEncoder.getInstance(dictSize, + Math.max(extraSizeBefore, + EXTRA_SIZE_BEFORE), + EXTRA_SIZE_AFTER, + niceLen, MATCH_LEN_MAX, + mf, depthLimit), + lc, lp, pb, dictSize, niceLen); + } + + private boolean changePair(int smallDist, int bigDist) { + return smallDist < (bigDist >>> 7); + } + + int getNextSymbol() { + // Get the matches for the next byte unless readAhead indicates + // that we already got the new matches during the previous call + // to this function. + if (readAhead == -1) + matches = getMatches(); + + back = -1; + + // Get the number of bytes available in the dictionary, but + // not more than the maximum match length. If there aren't + // enough bytes remaining to encode a match at all, return + // immediately to encode this byte as a literal. + int avail = Math.min(lz.getAvail(), MATCH_LEN_MAX); + if (avail < MATCH_LEN_MIN) + return 1; + + // Look for a match from the previous four match distances. + int bestRepLen = 0; + int bestRepIndex = 0; + for (int rep = 0; rep < REPS; ++rep) { + int len = lz.getMatchLen(reps[rep], avail); + if (len < MATCH_LEN_MIN) + continue; + + // If it is long enough, return it. + if (len >= niceLen) { + back = rep; + skip(len - 1); + return len; + } + + // Remember the index and length of the best repeated match. + if (len > bestRepLen) { + bestRepIndex = rep; + bestRepLen = len; + } + } + + int mainLen = 0; + int mainDist = 0; + + if (matches.count > 0) { + mainLen = matches.len[matches.count - 1]; + mainDist = matches.dist[matches.count - 1]; + + if (mainLen >= niceLen) { + back = mainDist + REPS; + skip(mainLen - 1); + return mainLen; + } + + while (matches.count > 1 + && mainLen == matches.len[matches.count - 2] + 1) { + if (!changePair(matches.dist[matches.count - 2], mainDist)) + break; + + --matches.count; + mainLen = matches.len[matches.count - 1]; + mainDist = matches.dist[matches.count - 1]; + } + + if (mainLen == MATCH_LEN_MIN && mainDist >= 0x80) + mainLen = 1; + } + + if (bestRepLen >= MATCH_LEN_MIN) { + if (bestRepLen + 1 >= mainLen + || (bestRepLen + 2 >= mainLen && mainDist >= (1 << 9)) + || (bestRepLen + 3 >= mainLen && mainDist >= (1 << 15))) { + back = bestRepIndex; + skip(bestRepLen - 1); + return bestRepLen; + } + } + + if (mainLen < MATCH_LEN_MIN || avail <= MATCH_LEN_MIN) + return 1; + + // Get the next match. Test if it is better than the current match. + // If so, encode the current byte as a literal. + matches = getMatches(); + + if (matches.count > 0) { + int newLen = matches.len[matches.count - 1]; + int newDist = matches.dist[matches.count - 1]; + + if ((newLen >= mainLen && newDist < mainDist) + || (newLen == mainLen + 1 + && !changePair(mainDist, newDist)) + || newLen > mainLen + 1 + || (newLen + 1 >= mainLen + && mainLen >= MATCH_LEN_MIN + 1 + && changePair(newDist, mainDist))) + return 1; + } + + int limit = Math.max(mainLen - 1, MATCH_LEN_MIN); + for (int rep = 0; rep < REPS; ++rep) + if (lz.getMatchLen(reps[rep], limit) == limit) + return 1; + + back = mainDist + REPS; + skip(mainLen - 2); + return mainLen; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/lzma/LZMAEncoderNormal.java b/org.tukani.xz/src/org/tukaani/xz/lzma/LZMAEncoderNormal.java new file mode 100644 index 00000000..6f8d81b9 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/lzma/LZMAEncoderNormal.java @@ -0,0 +1,566 @@ +/* + * LZMAEncoderNormal + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.lzma; + +import org.tukaani.xz.lz.LZEncoder; +import org.tukaani.xz.lz.Matches; +import org.tukaani.xz.rangecoder.RangeEncoder; + +final class LZMAEncoderNormal extends LZMAEncoder { + private static final int OPTS = 4096; + + private static int EXTRA_SIZE_BEFORE = OPTS; + private static int EXTRA_SIZE_AFTER = OPTS; + + private final Optimum[] opts = new Optimum[OPTS]; + private int optCur = 0; + private int optEnd = 0; + + private Matches matches; + + // These are fields solely to avoid allocating the objects again and + // again on each function call. + private final int[] repLens = new int[REPS]; + private final State nextState = new State(); + + static int getMemoryUsage(int dictSize, int extraSizeBefore, int mf) { + return LZEncoder.getMemoryUsage(dictSize, + Math.max(extraSizeBefore, EXTRA_SIZE_BEFORE), + EXTRA_SIZE_AFTER, MATCH_LEN_MAX, mf) + + OPTS * 64 / 1024; + } + + LZMAEncoderNormal(RangeEncoder rc, int lc, int lp, int pb, + int dictSize, int extraSizeBefore, + int niceLen, int mf, int depthLimit) { + super(rc, LZEncoder.getInstance(dictSize, + Math.max(extraSizeBefore, + EXTRA_SIZE_BEFORE), + EXTRA_SIZE_AFTER, + niceLen, MATCH_LEN_MAX, + mf, depthLimit), + lc, lp, pb, dictSize, niceLen); + + for (int i = 0; i < OPTS; ++i) + opts[i] = new Optimum(); + } + + public void reset() { + optCur = 0; + optEnd = 0; + super.reset(); + } + + /** + * Converts the opts array from backward indexes to forward indexes. + * Then it will be simple to get the next symbol from the array + * in later calls to getNextSymbol(). + */ + private int convertOpts() { + optEnd = optCur; + + int optPrev = opts[optCur].optPrev; + + do { + Optimum opt = opts[optCur]; + + if (opt.prev1IsLiteral) { + opts[optPrev].optPrev = optCur; + opts[optPrev].backPrev = -1; + optCur = optPrev--; + + if (opt.hasPrev2) { + opts[optPrev].optPrev = optPrev + 1; + opts[optPrev].backPrev = opt.backPrev2; + optCur = optPrev; + optPrev = opt.optPrev2; + } + } + + int temp = opts[optPrev].optPrev; + opts[optPrev].optPrev = optCur; + optCur = optPrev; + optPrev = temp; + } while (optCur > 0); + + optCur = opts[0].optPrev; + back = opts[optCur].backPrev; + return optCur; + } + + int getNextSymbol() { + // If there are pending symbols from an earlier call to this + // function, return those symbols first. + if (optCur < optEnd) { + int len = opts[optCur].optPrev - optCur; + optCur = opts[optCur].optPrev; + back = opts[optCur].backPrev; + return len; + } + + assert optCur == optEnd; + optCur = 0; + optEnd = 0; + back = -1; + + if (readAhead == -1) + matches = getMatches(); + + // Get the number of bytes available in the dictionary, but + // not more than the maximum match length. If there aren't + // enough bytes remaining to encode a match at all, return + // immediately to encode this byte as a literal. + int avail = Math.min(lz.getAvail(), MATCH_LEN_MAX); + if (avail < MATCH_LEN_MIN) + return 1; + + // Get the lengths of repeated matches. + int repBest = 0; + for (int rep = 0; rep < REPS; ++rep) { + repLens[rep] = lz.getMatchLen(reps[rep], avail); + + if (repLens[rep] < MATCH_LEN_MIN) { + repLens[rep] = 0; + continue; + } + + if (repLens[rep] > repLens[repBest]) + repBest = rep; + } + + // Return if the best repeated match is at least niceLen bytes long. + if (repLens[repBest] >= niceLen) { + back = repBest; + skip(repLens[repBest] - 1); + return repLens[repBest]; + } + + // Initialize mainLen and mainDist to the longest match found + // by the match finder. + int mainLen = 0; + int mainDist = 0; + if (matches.count > 0) { + mainLen = matches.len[matches.count - 1]; + mainDist = matches.dist[matches.count - 1]; + + // Return if it is at least niceLen bytes long. + if (mainLen >= niceLen) { + back = mainDist + REPS; + skip(mainLen - 1); + return mainLen; + } + } + + int curByte = lz.getByte(0); + int matchByte = lz.getByte(reps[0] + 1); + + // If the match finder found no matches and this byte cannot be + // encoded as a repeated match (short or long), we must be return + // to have the byte encoded as a literal. + if (mainLen < MATCH_LEN_MIN && curByte != matchByte + && repLens[repBest] < MATCH_LEN_MIN) + return 1; + + + int pos = lz.getPos(); + int posState = pos & posMask; + + // Calculate the price of encoding the current byte as a literal. + { + int prevByte = lz.getByte(1); + int literalPrice = literalEncoder.getPrice(curByte, matchByte, + prevByte, pos, state); + opts[1].set1(literalPrice, 0, -1); + } + + int anyMatchPrice = getAnyMatchPrice(state, posState); + int anyRepPrice = getAnyRepPrice(anyMatchPrice, state); + + // If it is possible to encode this byte as a short rep, see if + // it is cheaper than encoding it as a literal. + if (matchByte == curByte) { + int shortRepPrice = getShortRepPrice(anyRepPrice, + state, posState); + if (shortRepPrice < opts[1].price) + opts[1].set1(shortRepPrice, 0, 0); + } + + // Return if there is neither normal nor long repeated match. Use + // a short match instead of a literal if is is possible and cheaper. + optEnd = Math.max(mainLen, repLens[repBest]); + if (optEnd < MATCH_LEN_MIN) { + assert optEnd == 0 : optEnd; + back = opts[1].backPrev; + return 1; + } + + + // Update the lookup tables for distances and lengths before using + // those price calculation functions. (The price function above + // don't need these tables.) + updatePrices(); + + // Initialize the state and reps of this position in opts[]. + // updateOptStateAndReps() will need these to get the new + // state and reps for the next byte. + opts[0].state.set(state); + System.arraycopy(reps, 0, opts[0].reps, 0, REPS); + + // Initialize the prices for latter opts that will be used below. + for (int i = optEnd; i >= MATCH_LEN_MIN; --i) + opts[i].reset(); + + // Calculate the prices of repeated matches of all lengths. + for (int rep = 0; rep < REPS; ++rep) { + int repLen = repLens[rep]; + if (repLen < MATCH_LEN_MIN) + continue; + + int longRepPrice = getLongRepPrice(anyRepPrice, rep, + state, posState); + do { + int price = longRepPrice + repLenEncoder.getPrice(repLen, + posState); + if (price < opts[repLen].price) + opts[repLen].set1(price, 0, rep); + } while (--repLen >= MATCH_LEN_MIN); + } + + // Calculate the prices of normal matches that are longer than rep0. + { + int len = Math.max(repLens[0] + 1, MATCH_LEN_MIN); + if (len <= mainLen) { + int normalMatchPrice = getNormalMatchPrice(anyMatchPrice, + state); + + // Set i to the index of the shortest match that is + // at least len bytes long. + int i = 0; + while (len > matches.len[i]) + ++i; + + while (true) { + int dist = matches.dist[i]; + int price = getMatchAndLenPrice(normalMatchPrice, + dist, len, posState); + if (price < opts[len].price) + opts[len].set1(price, 0, dist + REPS); + + if (len == matches.len[i]) + if (++i == matches.count) + break; + + ++len; + } + } + } + + + avail = Math.min(lz.getAvail(), OPTS - 1); + + // Get matches for later bytes and optimize the use of LZMA symbols + // by calculating the prices and picking the cheapest symbol + // combinations. + while (++optCur < optEnd) { + matches = getMatches(); + if (matches.count > 0 + && matches.len[matches.count - 1] >= niceLen) + break; + + --avail; + ++pos; + posState = pos & posMask; + + updateOptStateAndReps(); + anyMatchPrice = opts[optCur].price + + getAnyMatchPrice(opts[optCur].state, posState); + anyRepPrice = getAnyRepPrice(anyMatchPrice, opts[optCur].state); + + calc1BytePrices(pos, posState, avail, anyRepPrice); + + if (avail >= MATCH_LEN_MIN) { + int startLen = calcLongRepPrices(pos, posState, + avail, anyRepPrice); + if (matches.count > 0) + calcNormalMatchPrices(pos, posState, avail, + anyMatchPrice, startLen); + } + } + + return convertOpts(); + } + + /** + * Updates the state and reps for the current byte in the opts array. + */ + private void updateOptStateAndReps() { + int optPrev = opts[optCur].optPrev; + assert optPrev < optCur; + + if (opts[optCur].prev1IsLiteral) { + --optPrev; + + if (opts[optCur].hasPrev2) { + opts[optCur].state.set(opts[opts[optCur].optPrev2].state); + if (opts[optCur].backPrev2 < REPS) + opts[optCur].state.updateLongRep(); + else + opts[optCur].state.updateMatch(); + } else { + opts[optCur].state.set(opts[optPrev].state); + } + + opts[optCur].state.updateLiteral(); + } else { + opts[optCur].state.set(opts[optPrev].state); + } + + if (optPrev == optCur - 1) { + // Must be either a short rep or a literal. + assert opts[optCur].backPrev == 0 || opts[optCur].backPrev == -1; + + if (opts[optCur].backPrev == 0) + opts[optCur].state.updateShortRep(); + else + opts[optCur].state.updateLiteral(); + + System.arraycopy(opts[optPrev].reps, 0, + opts[optCur].reps, 0, REPS); + } else { + int back; + if (opts[optCur].prev1IsLiteral && opts[optCur].hasPrev2) { + optPrev = opts[optCur].optPrev2; + back = opts[optCur].backPrev2; + opts[optCur].state.updateLongRep(); + } else { + back = opts[optCur].backPrev; + if (back < REPS) + opts[optCur].state.updateLongRep(); + else + opts[optCur].state.updateMatch(); + } + + if (back < REPS) { + opts[optCur].reps[0] = opts[optPrev].reps[back]; + + int rep; + for (rep = 1; rep <= back; ++rep) + opts[optCur].reps[rep] = opts[optPrev].reps[rep - 1]; + + for (; rep < REPS; ++rep) + opts[optCur].reps[rep] = opts[optPrev].reps[rep]; + } else { + opts[optCur].reps[0] = back - REPS; + System.arraycopy(opts[optPrev].reps, 0, + opts[optCur].reps, 1, REPS - 1); + } + } + } + + /** + * Calculates prices of a literal, a short rep, and literal + rep0. + */ + private void calc1BytePrices(int pos, int posState, + int avail, int anyRepPrice) { + // This will be set to true if using a literal or a short rep. + boolean nextIsByte = false; + + int curByte = lz.getByte(0); + int matchByte = lz.getByte(opts[optCur].reps[0] + 1); + + // Try a literal. + int literalPrice = opts[optCur].price + + literalEncoder.getPrice(curByte, matchByte, lz.getByte(1), + pos, opts[optCur].state); + if (literalPrice < opts[optCur + 1].price) { + opts[optCur + 1].set1(literalPrice, optCur, -1); + nextIsByte = true; + } + + // Try a short rep. + if (matchByte == curByte && (opts[optCur + 1].optPrev == optCur + || opts[optCur + 1].backPrev != 0)) { + int shortRepPrice = getShortRepPrice(anyRepPrice, + opts[optCur].state, + posState); + if (shortRepPrice <= opts[optCur + 1].price) { + opts[optCur + 1].set1(shortRepPrice, optCur, 0); + nextIsByte = true; + } + } + + // If neither a literal nor a short rep was the cheapest choice, + // try literal + long rep0. + if (!nextIsByte && matchByte != curByte && avail > MATCH_LEN_MIN) { + int lenLimit = Math.min(niceLen, avail - 1); + int len = lz.getMatchLen(1, opts[optCur].reps[0], lenLimit); + + if (len >= MATCH_LEN_MIN) { + nextState.set(opts[optCur].state); + nextState.updateLiteral(); + int nextPosState = (pos + 1) & posMask; + int price = literalPrice + + getLongRepAndLenPrice(0, len, + nextState, nextPosState); + + int i = optCur + 1 + len; + while (optEnd < i) + opts[++optEnd].reset(); + + if (price < opts[i].price) + opts[i].set2(price, optCur, 0); + } + } + } + + /** + * Calculates prices of long rep and long rep + literal + rep0. + */ + private int calcLongRepPrices(int pos, int posState, + int avail, int anyRepPrice) { + int startLen = MATCH_LEN_MIN; + int lenLimit = Math.min(avail, niceLen); + + for (int rep = 0; rep < REPS; ++rep) { + int len = lz.getMatchLen(opts[optCur].reps[rep], lenLimit); + if (len < MATCH_LEN_MIN) + continue; + + while (optEnd < optCur + len) + opts[++optEnd].reset(); + + int longRepPrice = getLongRepPrice(anyRepPrice, rep, + opts[optCur].state, posState); + + for (int i = len; i >= MATCH_LEN_MIN; --i) { + int price = longRepPrice + + repLenEncoder.getPrice(i, posState); + if (price < opts[optCur + i].price) + opts[optCur + i].set1(price, optCur, rep); + } + + if (rep == 0) + startLen = len + 1; + + int len2Limit = Math.min(niceLen, avail - len - 1); + int len2 = lz.getMatchLen(len + 1, opts[optCur].reps[rep], + len2Limit); + + if (len2 >= MATCH_LEN_MIN) { + // Rep + int price = longRepPrice + + repLenEncoder.getPrice(len, posState); + nextState.set(opts[optCur].state); + nextState.updateLongRep(); + + // Literal + int curByte = lz.getByte(len, 0); + int matchByte = lz.getByte(0); // lz.getByte(len, len) + int prevByte = lz.getByte(len, 1); + price += literalEncoder.getPrice(curByte, matchByte, prevByte, + pos + len, nextState); + nextState.updateLiteral(); + + // Rep0 + int nextPosState = (pos + len + 1) & posMask; + price += getLongRepAndLenPrice(0, len2, + nextState, nextPosState); + + int i = optCur + len + 1 + len2; + while (optEnd < i) + opts[++optEnd].reset(); + + if (price < opts[i].price) + opts[i].set3(price, optCur, rep, len, 0); + } + } + + return startLen; + } + + /** + * Calculates prices of a normal match and normal match + literal + rep0. + */ + private void calcNormalMatchPrices(int pos, int posState, int avail, + int anyMatchPrice, int startLen) { + // If the longest match is so long that it would not fit into + // the opts array, shorten the matches. + if (matches.len[matches.count - 1] > avail) { + matches.count = 0; + while (matches.len[matches.count] < avail) + ++matches.count; + + matches.len[matches.count++] = avail; + } + + if (matches.len[matches.count - 1] < startLen) + return; + + while (optEnd < optCur + matches.len[matches.count - 1]) + opts[++optEnd].reset(); + + int normalMatchPrice = getNormalMatchPrice(anyMatchPrice, + opts[optCur].state); + + int match = 0; + while (startLen > matches.len[match]) + ++match; + + for (int len = startLen; ; ++len) { + int dist = matches.dist[match]; + + // Calculate the price of a match of len bytes from the nearest + // possible distance. + int matchAndLenPrice = getMatchAndLenPrice(normalMatchPrice, + dist, len, posState); + if (matchAndLenPrice < opts[optCur + len].price) + opts[optCur + len].set1(matchAndLenPrice, + optCur, dist + REPS); + + if (len != matches.len[match]) + continue; + + // Try match + literal + rep0. First get the length of the rep0. + int len2Limit = Math.min(niceLen, avail - len - 1); + int len2 = lz.getMatchLen(len + 1, dist, len2Limit); + + if (len2 >= MATCH_LEN_MIN) { + nextState.set(opts[optCur].state); + nextState.updateMatch(); + + // Literal + int curByte = lz.getByte(len, 0); + int matchByte = lz.getByte(0); // lz.getByte(len, len) + int prevByte = lz.getByte(len, 1); + int price = matchAndLenPrice + + literalEncoder.getPrice(curByte, matchByte, + prevByte, pos + len, + nextState); + nextState.updateLiteral(); + + // Rep0 + int nextPosState = (pos + len + 1) & posMask; + price += getLongRepAndLenPrice(0, len2, + nextState, nextPosState); + + int i = optCur + len + 1 + len2; + while (optEnd < i) + opts[++optEnd].reset(); + + if (price < opts[i].price) + opts[i].set3(price, optCur, dist + REPS, len, 0); + } + + if (++match == matches.count) + break; + } + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/lzma/Optimum.java b/org.tukani.xz/src/org/tukaani/xz/lzma/Optimum.java new file mode 100644 index 00000000..845ac97f --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/lzma/Optimum.java @@ -0,0 +1,73 @@ +/* + * Optimum + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.lzma; + +final class Optimum { + private static final int INFINITY_PRICE = 1 << 30; + + final State state = new State(); + final int[] reps = new int[LZMACoder.REPS]; + + /** + * Cumulative price of arriving to this byte. + */ + int price; + + int optPrev; + int backPrev; + boolean prev1IsLiteral; + + boolean hasPrev2; + int optPrev2; + int backPrev2; + + /** + * Resets the price. + */ + void reset() { + price = INFINITY_PRICE; + } + + /** + * Sets to indicate one LZMA symbol (literal, rep, or match). + */ + void set1(int newPrice, int optCur, int back) { + price = newPrice; + optPrev = optCur; + backPrev = back; + prev1IsLiteral = false; + } + + /** + * Sets to indicate two LZMA symbols of which the first one is a literal. + */ + void set2(int newPrice, int optCur, int back) { + price = newPrice; + optPrev = optCur + 1; + backPrev = back; + prev1IsLiteral = true; + hasPrev2 = false; + } + + /** + * Sets to indicate three LZMA symbols of which the second one + * is a literal. + */ + void set3(int newPrice, int optCur, int back2, int len2, int back) { + price = newPrice; + optPrev = optCur + len2 + 1; + backPrev = back; + prev1IsLiteral = true; + hasPrev2 = true; + optPrev2 = optCur; + backPrev2 = back2; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/lzma/State.java b/org.tukani.xz/src/org/tukaani/xz/lzma/State.java new file mode 100644 index 00000000..0ece8609 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/lzma/State.java @@ -0,0 +1,75 @@ +/* + * State + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.lzma; + +final class State { + static final int STATES = 12; + + private static final int LIT_STATES = 7; + + private static final int LIT_LIT = 0; + private static final int MATCH_LIT_LIT = 1; + private static final int REP_LIT_LIT = 2; + private static final int SHORTREP_LIT_LIT = 3; + private static final int MATCH_LIT = 4; + private static final int REP_LIT = 5; + private static final int SHORTREP_LIT = 6; + private static final int LIT_MATCH = 7; + private static final int LIT_LONGREP = 8; + private static final int LIT_SHORTREP = 9; + private static final int NONLIT_MATCH = 10; + private static final int NONLIT_REP = 11; + + private int state; + + State() {} + + State(State other) { + state = other.state; + } + + void reset() { + state = LIT_LIT; + } + + int get() { + return state; + } + + void set(State other) { + state = other.state; + } + + void updateLiteral() { + if (state <= SHORTREP_LIT_LIT) + state = LIT_LIT; + else if (state <= LIT_SHORTREP) + state -= 3; + else + state -= 6; + } + + void updateMatch() { + state = state < LIT_STATES ? LIT_MATCH : NONLIT_MATCH; + } + + void updateLongRep() { + state = state < LIT_STATES ? LIT_LONGREP : NONLIT_REP; + } + + void updateShortRep() { + state = state < LIT_STATES ? LIT_SHORTREP : NONLIT_REP; + } + + boolean isLiteral() { + return state < LIT_STATES; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/package-info.java b/org.tukani.xz/src/org/tukaani/xz/package-info.java new file mode 100644 index 00000000..6d7c1b7d --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/package-info.java @@ -0,0 +1,36 @@ +/** + * XZ data compression support. + * + *

Introduction

+ *

+ * This aims to be a complete implementation of XZ data compression + * in pure Java. Features: + *

    + *
  • Full support for the .xz file format specification version 1.0.4
  • + *
  • Single-threaded streamed compression and decompression
  • + *
  • Single-threaded decompression with limited random access support
  • + *
  • Raw streams (no .xz headers) for advanced users, including LZMA2 + * with preset dictionary
  • + *
+ *

+ * Threading is planned but it is unknown when it will be implemented. + *

+ * For the latest source code, see the + * home page of XZ for Java. + * + *

Getting started

+ *

+ * Start by reading the documentation of {@link org.tukaani.xz.XZOutputStream} + * and {@link org.tukaani.xz.XZInputStream}. + * If you use XZ inside another file format or protocol, + * see also {@link org.tukaani.xz.SingleXZInputStream}. + * + *

Licensing

+ *

+ * XZ for Java has been put into the public domain, thus you can do + * whatever you want with it. All the files in the package have been + * written by Lasse Collin and/or Igor Pavlov. + *

+ * This software is provided "as is", without any warranty. + */ +package org.tukaani.xz; diff --git a/org.tukani.xz/src/org/tukaani/xz/rangecoder/RangeCoder.java b/org.tukani.xz/src/org/tukaani/xz/rangecoder/RangeCoder.java new file mode 100644 index 00000000..df9b0c48 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/rangecoder/RangeCoder.java @@ -0,0 +1,26 @@ +/* + * RangeCoder + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.rangecoder; + +import java.util.Arrays; + +public abstract class RangeCoder { + static final int SHIFT_BITS = 8; + static final int TOP_MASK = 0xFF000000; + static final int BIT_MODEL_TOTAL_BITS = 11; + static final int BIT_MODEL_TOTAL = 1 << BIT_MODEL_TOTAL_BITS; + static final short PROB_INIT = (short)(BIT_MODEL_TOTAL / 2); + static final int MOVE_BITS = 5; + + public static final void initProbs(short[] probs) { + Arrays.fill(probs, PROB_INIT); + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/rangecoder/RangeDecoder.java b/org.tukani.xz/src/org/tukaani/xz/rangecoder/RangeDecoder.java new file mode 100644 index 00000000..e63532e6 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/rangecoder/RangeDecoder.java @@ -0,0 +1,83 @@ +/* + * RangeDecoder + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.rangecoder; + +import java.io.DataInputStream; +import java.io.IOException; + +public abstract class RangeDecoder extends RangeCoder { + int range = 0; + int code = 0; + + public abstract void normalize() throws IOException; + + public int decodeBit(short[] probs, int index) throws IOException { + normalize(); + + int prob = probs[index]; + int bound = (range >>> BIT_MODEL_TOTAL_BITS) * prob; + int bit; + + // Compare code and bound as if they were unsigned 32-bit integers. + if ((code ^ 0x80000000) < (bound ^ 0x80000000)) { + range = bound; + probs[index] = (short)( + prob + ((BIT_MODEL_TOTAL - prob) >>> MOVE_BITS)); + bit = 0; + } else { + range -= bound; + code -= bound; + probs[index] = (short)(prob - (prob >>> MOVE_BITS)); + bit = 1; + } + + return bit; + } + + public int decodeBitTree(short[] probs) throws IOException { + int symbol = 1; + + do { + symbol = (symbol << 1) | decodeBit(probs, symbol); + } while (symbol < probs.length); + + return symbol - probs.length; + } + + public int decodeReverseBitTree(short[] probs) throws IOException { + int symbol = 1; + int i = 0; + int result = 0; + + do { + int bit = decodeBit(probs, symbol); + symbol = (symbol << 1) | bit; + result |= bit << i++; + } while (symbol < probs.length); + + return result; + } + + public int decodeDirectBits(int count) throws IOException { + int result = 0; + + do { + normalize(); + + range >>>= 1; + int t = (code - range) >>> 31; + code -= range & (t - 1); + result = (result << 1) | (1 - t); + } while (--count != 0); + + return result; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/rangecoder/RangeDecoderFromBuffer.java b/org.tukani.xz/src/org/tukaani/xz/rangecoder/RangeDecoderFromBuffer.java new file mode 100644 index 00000000..cac7a7e6 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/rangecoder/RangeDecoderFromBuffer.java @@ -0,0 +1,64 @@ +/* + * RangeDecoderFromBuffer + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.rangecoder; + +import java.io.DataInputStream; +import java.io.IOException; +import org.tukaani.xz.CorruptedInputException; + +public final class RangeDecoderFromBuffer extends RangeDecoder { + private static final int INIT_SIZE = 5; + + private final byte[] buf; + private int pos = 0; + private int end = 0; + + public RangeDecoderFromBuffer(int inputSizeMax) { + buf = new byte[inputSizeMax - INIT_SIZE]; + } + + public void prepareInputBuffer(DataInputStream in, int len) + throws IOException { + if (len < INIT_SIZE) + throw new CorruptedInputException(); + + if (in.readUnsignedByte() != 0x00) + throw new CorruptedInputException(); + + code = in.readInt(); + range = 0xFFFFFFFF; + + pos = 0; + end = len - INIT_SIZE; + in.readFully(buf, 0, end); + } + + public boolean isInBufferOK() { + return pos <= end; + } + + public boolean isFinished() { + return pos == end && code == 0; + } + + public void normalize() throws IOException { + if ((range & TOP_MASK) == 0) { + try { + // If the input is corrupt, this might throw + // ArrayIndexOutOfBoundsException. + code = (code << SHIFT_BITS) | (buf[pos++] & 0xFF); + range <<= SHIFT_BITS; + } catch (ArrayIndexOutOfBoundsException e) { + throw new CorruptedInputException(); + } + } + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/rangecoder/RangeDecoderFromStream.java b/org.tukani.xz/src/org/tukaani/xz/rangecoder/RangeDecoderFromStream.java new file mode 100644 index 00000000..142b518d --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/rangecoder/RangeDecoderFromStream.java @@ -0,0 +1,41 @@ +/* + * RangeDecoderFromStream + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.rangecoder; + +import java.io.InputStream; +import java.io.DataInputStream; +import java.io.IOException; +import org.tukaani.xz.CorruptedInputException; + +public final class RangeDecoderFromStream extends RangeDecoder { + private final DataInputStream inData; + + public RangeDecoderFromStream(InputStream in) throws IOException { + inData = new DataInputStream(in); + + if (inData.readUnsignedByte() != 0x00) + throw new CorruptedInputException(); + + code = inData.readInt(); + range = 0xFFFFFFFF; + } + + public boolean isFinished() { + return code == 0; + } + + public void normalize() throws IOException { + if ((range & TOP_MASK) == 0) { + code = (code << SHIFT_BITS) | inData.readUnsignedByte(); + range <<= SHIFT_BITS; + } + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/rangecoder/RangeEncoder.java b/org.tukani.xz/src/org/tukaani/xz/rangecoder/RangeEncoder.java new file mode 100644 index 00000000..a06fdcce --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/rangecoder/RangeEncoder.java @@ -0,0 +1,203 @@ +/* + * RangeEncoder + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.rangecoder; + +import java.io.OutputStream; +import java.io.IOException; + +public final class RangeEncoder extends RangeCoder { + private static final int MOVE_REDUCING_BITS = 4; + private static final int BIT_PRICE_SHIFT_BITS = 4; + + private static final int[] prices + = new int[BIT_MODEL_TOTAL >>> MOVE_REDUCING_BITS]; + + private long low; + private int range; + + // NOTE: int is OK for LZMA2 because a compressed chunk + // is not more than 64 KiB, but with LZMA1 there is no chunking + // so in theory cacheSize can grow very big. To be very safe, + // use long instead of int if you adapt this code for LZMA1. + private int cacheSize; + private byte cache; + + private final byte[] buf; + private int bufPos; + + static { + for (int i = (1 << MOVE_REDUCING_BITS) / 2; i < BIT_MODEL_TOTAL; + i += (1 << MOVE_REDUCING_BITS)) { + int w = i; + int bitCount = 0; + + for (int j = 0; j < BIT_PRICE_SHIFT_BITS; ++j) { + w *= w; + bitCount <<= 1; + + while ((w & 0xFFFF0000) != 0) { + w >>>= 1; + ++bitCount; + } + } + + prices[i >> MOVE_REDUCING_BITS] + = (BIT_MODEL_TOTAL_BITS << BIT_PRICE_SHIFT_BITS) + - 15 - bitCount; + } + } + + public RangeEncoder(int bufSize) { + buf = new byte[bufSize]; + reset(); + } + + public void reset() { + low = 0; + range = 0xFFFFFFFF; + cache = 0x00; + cacheSize = 1; + bufPos = 0; + } + + public int getPendingSize() { + return bufPos + cacheSize + 5 - 1; + } + + public int finish() { + for (int i = 0; i < 5; ++i) + shiftLow(); + + return bufPos; + } + + public void write(OutputStream out) throws IOException { + out.write(buf, 0, bufPos); + } + + private void shiftLow() { + int lowHi = (int)(low >>> 32); + + if (lowHi != 0 || low < 0xFF000000L) { + int temp = cache; + + do { + buf[bufPos++] = (byte)(temp + lowHi); + temp = 0xFF; + } while (--cacheSize != 0); + + cache = (byte)(low >>> 24); + } + + ++cacheSize; + low = (low & 0x00FFFFFF) << 8; + } + + public void encodeBit(short[] probs, int index, int bit) { + int prob = probs[index]; + int bound = (range >>> BIT_MODEL_TOTAL_BITS) * prob; + + // NOTE: Any non-zero value for bit is taken as 1. + if (bit == 0) { + range = bound; + probs[index] = (short)( + prob + ((BIT_MODEL_TOTAL - prob) >>> MOVE_BITS)); + } else { + low += bound & 0xFFFFFFFFL; + range -= bound; + probs[index] = (short)(prob - (prob >>> MOVE_BITS)); + } + + if ((range & TOP_MASK) == 0) { + range <<= SHIFT_BITS; + shiftLow(); + } + } + + public static int getBitPrice(int prob, int bit) { + // NOTE: Unlike in encodeBit(), here bit must be 0 or 1. + assert bit == 0 || bit == 1; + return prices[(prob ^ ((-bit) & (BIT_MODEL_TOTAL - 1))) + >>> MOVE_REDUCING_BITS]; + } + + public void encodeBitTree(short[] probs, int symbol) { + int index = 1; + int mask = probs.length; + + do { + mask >>>= 1; + int bit = symbol & mask; + encodeBit(probs, index, bit); + + index <<= 1; + if (bit != 0) + index |= 1; + + } while (mask != 1); + } + + public static int getBitTreePrice(short[] probs, int symbol) { + int price = 0; + symbol |= probs.length; + + do { + int bit = symbol & 1; + symbol >>>= 1; + price += getBitPrice(probs[symbol], bit); + } while (symbol != 1); + + return price; + } + + public void encodeReverseBitTree(short[] probs, int symbol) { + int index = 1; + symbol |= probs.length; + + do { + int bit = symbol & 1; + symbol >>>= 1; + encodeBit(probs, index, bit); + index = (index << 1) | bit; + } while (symbol != 1); + } + + public static int getReverseBitTreePrice(short[] probs, int symbol) { + int price = 0; + int index = 1; + symbol |= probs.length; + + do { + int bit = symbol & 1; + symbol >>>= 1; + price += getBitPrice(probs[index], bit); + index = (index << 1) | bit; + } while (symbol != 1); + + return price; + } + + public void encodeDirectBits(int value, int count) { + do { + range >>>= 1; + low += range & (0 - ((value >>> --count) & 1)); + + if ((range & TOP_MASK) == 0) { + range <<= SHIFT_BITS; + shiftLow(); + } + } while (count != 0); + } + + public static int getDirectBitsPrice(int count) { + return count << BIT_PRICE_SHIFT_BITS; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/simple/ARM.java b/org.tukani.xz/src/org/tukaani/xz/simple/ARM.java new file mode 100644 index 00000000..6febf78c --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/simple/ARM.java @@ -0,0 +1,50 @@ +/* + * BCJ filter for little endian ARM instructions + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.simple; + +public final class ARM implements SimpleFilter { + private final boolean isEncoder; + private int pos; + + public ARM(boolean isEncoder, int startPos) { + this.isEncoder = isEncoder; + pos = startPos + 8; + } + + public int code(byte[] buf, int off, int len) { + int end = off + len - 4; + int i; + + for (i = off; i <= end; i += 4) { + if ((buf[i + 3] & 0xFF) == 0xEB) { + int src = ((buf[i + 2] & 0xFF) << 16) + | ((buf[i + 1] & 0xFF) << 8) + | (buf[i] & 0xFF); + src <<= 2; + + int dest; + if (isEncoder) + dest = src + (pos + i - off); + else + dest = src - (pos + i - off); + + dest >>>= 2; + buf[i + 2] = (byte)(dest >>> 16); + buf[i + 1] = (byte)(dest >>> 8); + buf[i] = (byte)dest; + } + } + + i -= off; + pos += i; + return i; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/simple/ARMThumb.java b/org.tukani.xz/src/org/tukaani/xz/simple/ARMThumb.java new file mode 100644 index 00000000..b8e7ca92 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/simple/ARMThumb.java @@ -0,0 +1,53 @@ +/* + * BCJ filter for little endian ARM-Thumb instructions + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.simple; + +public final class ARMThumb implements SimpleFilter { + private final boolean isEncoder; + private int pos; + + public ARMThumb(boolean isEncoder, int startPos) { + this.isEncoder = isEncoder; + pos = startPos + 4; + } + + public int code(byte[] buf, int off, int len) { + int end = off + len - 4; + int i; + + for (i = off; i <= end; i += 2) { + if ((buf[i + 1] & 0xF8) == 0xF0 && (buf[i + 3] & 0xF8) == 0xF8) { + int src = ((buf[i + 1] & 0x07) << 19) + | ((buf[i] & 0xFF) << 11) + | ((buf[i + 3] & 0x07) << 8) + | (buf[i + 2] & 0xFF); + src <<= 1; + + int dest; + if (isEncoder) + dest = src + (pos + i - off); + else + dest = src - (pos + i - off); + + dest >>>= 1; + buf[i + 1] = (byte)(0xF0 | ((dest >>> 19) & 0x07)); + buf[i] = (byte)(dest >>> 11); + buf[i + 3] = (byte)(0xF8 | ((dest >>> 8) & 0x07)); + buf[i + 2] = (byte)dest; + i += 2; + } + } + + i -= off; + pos += i; + return i; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/simple/IA64.java b/org.tukani.xz/src/org/tukaani/xz/simple/IA64.java new file mode 100644 index 00000000..776a1b79 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/simple/IA64.java @@ -0,0 +1,81 @@ +/* + * BCJ filter for Itanium (IA-64) instructions + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.simple; + +public final class IA64 implements SimpleFilter { + private static final int[] BRANCH_TABLE = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 4, 4, 6, 6, 0, 0, 7, 7, + 4, 4, 0, 0, 4, 4, 0, 0 }; + + private final boolean isEncoder; + private int pos; + + public IA64(boolean isEncoder, int startPos) { + this.isEncoder = isEncoder; + pos = startPos; + } + + public int code(byte[] buf, int off, int len) { + int end = off + len - 16; + int i; + + for (i = off; i <= end; i += 16) { + int instrTemplate = buf[i] & 0x1F; + int mask = BRANCH_TABLE[instrTemplate]; + + for (int slot = 0, bitPos = 5; slot < 3; ++slot, bitPos += 41) { + if (((mask >>> slot) & 1) == 0) + continue; + + int bytePos = bitPos >>> 3; + int bitRes = bitPos & 7; + + long instr = 0; + for (int j = 0; j < 6; ++j) + instr |= (buf[i + bytePos + j] & 0xFFL) << (8 * j); + + long instrNorm = instr >>> bitRes; + + if (((instrNorm >>> 37) & 0x0F) != 0x05 + || ((instrNorm >>> 9) & 0x07) != 0x00) + continue; + + int src = (int)((instrNorm >>> 13) & 0x0FFFFF); + src |= ((int)(instrNorm >>> 36) & 1) << 20; + src <<= 4; + + int dest; + if (isEncoder) + dest = src + (pos + i - off); + else + dest = src - (pos + i - off); + + dest >>>= 4; + + instrNorm &= ~(0x8FFFFFL << 13); + instrNorm |= (dest & 0x0FFFFFL) << 13; + instrNorm |= (dest & 0x100000L) << (36 - 20); + + instr &= (1 << bitRes) - 1; + instr |= instrNorm << bitRes; + + for (int j = 0; j < 6; ++j) + buf[i + bytePos + j] = (byte)(instr >>> (8 * j)); + } + } + + i -= off; + pos += i; + return i; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/simple/PowerPC.java b/org.tukani.xz/src/org/tukaani/xz/simple/PowerPC.java new file mode 100644 index 00000000..b7400ab5 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/simple/PowerPC.java @@ -0,0 +1,50 @@ +/* + * BCJ filter for big endian PowerPC instructions + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.simple; + +public final class PowerPC implements SimpleFilter { + private final boolean isEncoder; + private int pos; + + public PowerPC(boolean isEncoder, int startPos) { + this.isEncoder = isEncoder; + pos = startPos; + } + + public int code(byte[] buf, int off, int len) { + int end = off + len - 4; + int i; + + for (i = off; i <= end; i += 4) { + if ((buf[i] & 0xFC) == 0x48 && (buf[i + 3] & 0x03) == 0x01) { + int src = ((buf[i] & 0x03) << 24) + | ((buf[i + 1] & 0xFF) << 16) + | ((buf[i + 2] & 0xFF) << 8) + | (buf[i + 3] & 0xFC); + + int dest; + if (isEncoder) + dest = src + (pos + i - off); + else + dest = src - (pos + i - off); + + buf[i] = (byte)(0x48 | ((dest >>> 24) & 0x03)); + buf[i + 1] = (byte)(dest >>> 16); + buf[i + 2] = (byte)(dest >>> 8); + buf[i + 3] = (byte)((buf[i + 3] & 0x03) | dest); + } + } + + i -= off; + pos += i; + return i; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/simple/SPARC.java b/org.tukani.xz/src/org/tukaani/xz/simple/SPARC.java new file mode 100644 index 00000000..913c8acc --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/simple/SPARC.java @@ -0,0 +1,56 @@ +/* + * BCJ filter for SPARC instructions + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.simple; + +public final class SPARC implements SimpleFilter { + private final boolean isEncoder; + private int pos; + + public SPARC(boolean isEncoder, int startPos) { + this.isEncoder = isEncoder; + pos = startPos; + } + + public int code(byte[] buf, int off, int len) { + int end = off + len - 4; + int i; + + for (i = off; i <= end; i += 4) { + if ((buf[i] == 0x40 && (buf[i + 1] & 0xC0) == 0x00) + || (buf[i] == 0x7F && (buf[i + 1] & 0xC0) == 0xC0)) { + int src = ((buf[i] & 0xFF) << 24) + | ((buf[i + 1] & 0xFF) << 16) + | ((buf[i + 2] & 0xFF) << 8) + | (buf[i + 3] & 0xFF); + src <<= 2; + + int dest; + if (isEncoder) + dest = src + (pos + i - off); + else + dest = src - (pos + i - off); + + dest >>>= 2; + dest = (((0 - ((dest >>> 22) & 1)) << 22) & 0x3FFFFFFF) + | (dest & 0x3FFFFF) | 0x40000000; + + buf[i] = (byte)(dest >>> 24); + buf[i + 1] = (byte)(dest >>> 16); + buf[i + 2] = (byte)(dest >>> 8); + buf[i + 3] = (byte)dest; + } + } + + i -= off; + pos += i; + return i; + } +} diff --git a/org.tukani.xz/src/org/tukaani/xz/simple/SimpleFilter.java b/org.tukani.xz/src/org/tukaani/xz/simple/SimpleFilter.java new file mode 100644 index 00000000..6f729063 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/simple/SimpleFilter.java @@ -0,0 +1,14 @@ +/* + * BCJ filter for little endian ARM instructions + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.simple; + +public interface SimpleFilter { + int code(byte[] buf, int off, int len); +} diff --git a/org.tukani.xz/src/org/tukaani/xz/simple/X86.java b/org.tukani.xz/src/org/tukaani/xz/simple/X86.java new file mode 100644 index 00000000..a05e08b7 --- /dev/null +++ b/org.tukani.xz/src/org/tukaani/xz/simple/X86.java @@ -0,0 +1,98 @@ +/* + * BCJ filter for x86 instructions + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +package org.tukaani.xz.simple; + +public final class X86 implements SimpleFilter { + private static final boolean[] MASK_TO_ALLOWED_STATUS + = {true, true, true, false, true, false, false, false}; + + private static final int[] MASK_TO_BIT_NUMBER = {0, 1, 2, 2, 3, 3, 3, 3}; + + private final boolean isEncoder; + private int pos; + private int prevMask = 0; + + private static boolean test86MSByte(byte b) { + int i = b & 0xFF; + return i == 0x00 || i == 0xFF; + } + + public X86(boolean isEncoder, int startPos) { + this.isEncoder = isEncoder; + pos = startPos + 5; + } + + public int code(byte[] buf, int off, int len) { + int prevPos = off - 1; + int end = off + len - 5; + int i; + + for (i = off; i <= end; ++i) { + if ((buf[i] & 0xFE) != 0xE8) + continue; + + prevPos = i - prevPos; + if ((prevPos & ~3) != 0) { // (unsigned)prevPos > 3 + prevMask = 0; + } else { + prevMask = (prevMask << (prevPos - 1)) & 7; + if (prevMask != 0) { + if (!MASK_TO_ALLOWED_STATUS[prevMask] || test86MSByte( + buf[i + 4 - MASK_TO_BIT_NUMBER[prevMask]])) { + prevPos = i; + prevMask = (prevMask << 1) | 1; + continue; + } + } + } + + prevPos = i; + + if (test86MSByte(buf[i + 4])) { + int src = (buf[i + 1] & 0xFF) + | ((buf[i + 2] & 0xFF) << 8) + | ((buf[i + 3] & 0xFF) << 16) + | ((buf[i + 4] & 0xFF) << 24); + int dest; + while (true) { + if (isEncoder) + dest = src + (pos + i - off); + else + dest = src - (pos + i - off); + + if (prevMask == 0) + break; + + int index = MASK_TO_BIT_NUMBER[prevMask] * 8; + if (!test86MSByte((byte)(dest >>> (24 - index)))) + break; + + src = dest ^ ((1 << (32 - index)) - 1); + } + + buf[i + 1] = (byte)dest; + buf[i + 2] = (byte)(dest >>> 8); + buf[i + 3] = (byte)(dest >>> 16); + buf[i + 4] = (byte)(~(((dest >>> 24) & 1) - 1)); + i += 4; + } else { + prevMask = (prevMask << 1) | 1; + } + } + + prevPos = i - prevPos; + prevMask = ((prevPos & ~3) != 0) ? 0 : prevMask << (prevPos - 1); + + i -= off; + pos += i; + return i; + } +} diff --git a/runtime/Jetty.launch b/runtime/Jetty.launch index 75b5ee1f..8bd16417 100644 --- a/runtime/Jetty.launch +++ b/runtime/Jetty.launch @@ -63,5 +63,5 @@ - + diff --git a/runtime/build.ant b/runtime/build.ant index addf8b81..ff022ce2 100644 --- a/runtime/build.ant +++ b/runtime/build.ant @@ -233,9 +233,6 @@ - - -