From dbdcb77d181f40c07aea6752b4cdac8ca90afea6 Mon Sep 17 00:00:00 2001 From: Patrick Favre-Bulle Date: Tue, 31 Jul 2018 10:34:22 +0200 Subject: [PATCH] Swap Base64 implementation with Apache Commons Codec ref #8 --- .../lib/crypto/bcrypt/Radix64ApacheCodec.java | 568 ------------------ .../lib/crypto/bcrypt/Radix64Encoder.java | 427 ++++++++----- .../favre/lib/crypto/bcrypt/Radix64Test.java | 24 +- 3 files changed, 293 insertions(+), 726 deletions(-) delete mode 100644 modules/bcrypt/src/main/java/at/favre/lib/crypto/bcrypt/Radix64ApacheCodec.java diff --git a/modules/bcrypt/src/main/java/at/favre/lib/crypto/bcrypt/Radix64ApacheCodec.java b/modules/bcrypt/src/main/java/at/favre/lib/crypto/bcrypt/Radix64ApacheCodec.java deleted file mode 100644 index 4643ab8..0000000 --- a/modules/bcrypt/src/main/java/at/favre/lib/crypto/bcrypt/Radix64ApacheCodec.java +++ /dev/null @@ -1,568 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package at.favre.lib.crypto.bcrypt; - -import java.nio.charset.StandardCharsets; -import java.util.Arrays; - -/** - * Provides Radix64ApacheCodec encoding and decoding as defined by RFC 2045. - * - *

- * This class implements section 6.8. Radix64ApacheCodec Content-Transfer-Encoding from RFC 2045 Multipurpose - * Internet Mail Extensions (MIME) Part One: Format of Internet Message Bodies by Freed and Borenstein. - *

- *

- * The class can be parameterized in the following manner with various constructors: - *

- * - *

- * The URL-safe parameter is only applied to encode operations. Decoding seamlessly handles both modes. - *

- *

- * Since this class operates directly on byte streams, and not character streams, it is hard-coded to only - * encode/decode character encodings which are compatible with the lower 127 ASCII chart (ISO-8859-1, Windows-1252, - * UTF-8, etc). - *

- *

- * This class is thread-safe. - *

- * - * @version $Id: Radix64ApacheCodec.java 1789158 2017-03-28 15:04:58Z sebb $ - * @see RFC 2045 - * @since 1.0 - */ -public class Radix64ApacheCodec implements Radix64Encoder { - - - /** - * Chunk separator per RFC 2045 section 2.1. - * - *

- * N.B. The next major release may break compatibility and make this field private. - *

- * - * @see RFC 2045 section 2.1 - */ - private static final byte[] CHUNK_SEPARATOR = {'\r', '\n'}; - /** - * BASE32 characters are 6 bits in length. - * They are formed by taking a block of 3 octets to form a 24-bit string, - * which is converted into 4 BASE64 characters. - */ - private static final int BITS_PER_ENCODED_BYTE = 6; - private static final int BYTES_PER_UNENCODED_BLOCK = 3; - private static final int BYTES_PER_ENCODED_BLOCK = 4; - /** - * This array is a lookup table that translates 6-bit positive integer index values into their "Radix64ApacheCodec Alphabet" - * equivalents as specified in Table 1 of RFC 2045. - *

- * Thanks to "commons" project in ws.apache.org for this code. - * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ - */ - private static final byte[] STANDARD_ENCODE_TABLE = { - '.', '/', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', - 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', - 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', - 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', - 'u', 'v', 'w', 'x', 'y', 'z', '0', '1', '2', '3', '4', '5', - '6', '7', '8', '9' - }; - - /** - * This array is a lookup table that translates Unicode characters drawn from the "Radix64ApacheCodec Alphabet" (as specified - * in Table 1 of RFC 2045) into their 6-bit positive integer equivalents. Characters that are not in the Radix64ApacheCodec - * alphabet but fall within the bounds of the array are translated to -1. - *

- * Note: '+' and '-' both decode to 62. '/' and '_' both decode to 63. This means decoder seamlessly handles both - * URL_SAFE and STANDARD base64. (The encoder, on the other hand, needs to know ahead of time what to emit). - *

- * Thanks to "commons" project in ws.apache.org for this code. - * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ - */ - private static final byte[] DECODE_TABLE = { - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 54, 55, 56, 57, - 58, 59, 60, 61, 62, 63, -1, -1, -1, -2, -1, -1, -1, 2, 3, 4, 5, 6, 7, - 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, - 26, 27, -1, -1, -1, -1, -1, -1, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, - 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1 - }; - - /** - * Mask used to extract 6 bits, used when encoding - */ - private static final int MASK_6BITS = 0x3f; - private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2; - /** - * Defines the default buffer size - currently {@value} - * - must be large enough for at least one encoded block+separator - */ - private static final int DEFAULT_BUFFER_SIZE = 8192; - /** - * Mask used to extract 8 bits, used in decoding bytes - */ - private static final int MASK_8BITS = 0xff; - /** - * Encode table to use: either STANDARD or URL_SAFE. Note: the DECODE_TABLE above remains static because it is able - * to decode both STANDARD and URL_SAFE streams, but the encodeTable must be a member variable so we can switch - * between the two modes. - */ - private final byte[] encodeTable; - /** - * Line separator for encoding. Not used when decoding. Only used if lineLength > 0. - */ - private final byte[] lineSeparator; - /** - * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. - * decodeSize = 3 + lineSeparator.length; - */ - private final int decodeSize; - /** - * Convenience variable to help us determine when our buffer is going to run out of room and needs resizing. - * encodeSize = 4 + lineSeparator.length; - */ - private final int encodeSize; - private final byte pad; // instance variable just in case it needs to vary later - - /** - * Chunksize for encoding. Not used when decoding. - * A value of zero or less implies no chunking of the encoded data. - * Rounded down to nearest multiple of encodedBlockSize. - */ - private final int lineLength; - - /** - * Creates a Radix64ApacheCodec codec used for decoding (all modes) and encoding in URL-unsafe mode. - *

- * When encoding the line length is 0 (no chunking), and the encoding table is STANDARD_ENCODE_TABLE. - *

- * - *

- * When decoding all variants are supported. - *

- */ - Radix64ApacheCodec() { - this(0, CHUNK_SEPARATOR); - } - - /** - * Creates a Radix64ApacheCodec codec used for decoding (all modes) and encoding in URL-unsafe mode. - *

- * When encoding the line length and line separator are given in the constructor, and the encoding table is - * STANDARD_ENCODE_TABLE. - *

- *

- * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data. - *

- *

- * When decoding all variants are supported. - *

- * - * @param lineLength Each line of encoded data will be at most of the given length (rounded down to nearest multiple of - * 4). If lineLength <= 0, then the output will not be divided into lines (chunks). Ignored when - * decoding. - * @param lineSeparator Each line of encoded data will end with this sequence of bytes. - * @throws IllegalArgumentException The provided lineSeparator included some base64 characters. That's not going to work! - * @since 1.4 - */ - private Radix64ApacheCodec(final int lineLength, final byte[] lineSeparator) { - int encodedBlockSize = BYTES_PER_ENCODED_BLOCK; - int chunkSeparatorLength = lineSeparator == null ? 0 : lineSeparator.length; - final boolean useChunking = lineLength > 0 && chunkSeparatorLength > 0; - this.lineLength = useChunking ? (lineLength / encodedBlockSize) * encodedBlockSize : 0; - this.pad = '='; - - if (lineSeparator != null) { - if (containsAlphabetOrPad(lineSeparator)) { - final String sep = newStringUtf8(lineSeparator); - throw new IllegalArgumentException("lineSeparator must not contain base64 characters: [" + sep + "]"); - } - if (lineLength > 0) { // null line-sep forces no chunking rather than throwing IAE - this.encodeSize = BYTES_PER_ENCODED_BLOCK + lineSeparator.length; - this.lineSeparator = new byte[lineSeparator.length]; - System.arraycopy(lineSeparator, 0, this.lineSeparator, 0, lineSeparator.length); - } else { - this.encodeSize = BYTES_PER_ENCODED_BLOCK; - this.lineSeparator = null; - } - } else { - this.encodeSize = BYTES_PER_ENCODED_BLOCK; - this.lineSeparator = null; - } - this.decodeSize = this.encodeSize - 1; - this.encodeTable = STANDARD_ENCODE_TABLE; - } - - private static String newStringUtf8(final byte[] bytes) { - return bytes == null ? null : new String(bytes, StandardCharsets.UTF_8); - } - - /** - * Tests a given byte array to see if it contains any characters within the alphabet or PAD. - *

- * Intended for use in checking line-ending arrays - * - * @param arrayOctet byte array to test - * @return true if any byte is a valid character in the alphabet or PAD; false otherwise - */ - private boolean containsAlphabetOrPad(final byte[] arrayOctet) { - if (arrayOctet == null) { - return false; - } - for (final byte element : arrayOctet) { - if (pad == element || isInAlphabet(element)) { - return true; - } - } - return false; - } - - /** - * Returns whether or not the octet is in the Radix64ApacheCodec alphabet. - * - * @param octet The value to test - * @return true if the value is defined in the the Radix64ApacheCodec alphabet false otherwise. - */ - private boolean isInAlphabet(final byte octet) { - return octet >= 0 && DECODE_TABLE[octet] != -1; - } - - /** - *

- * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with - * the data to encode, and once with inAvail set to "-1" to alert encoder that EOF has been reached, to flush last - * remaining bytes (if not multiple of 3). - *

- *

Note: no padding is added when encoding using the URL-safe alphabet.

- *

- * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach. - * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ - *

- * - * @param in byte[] array of binary data to base64 encode. - * @param inPos Position to start reading data from. - * @param inAvail Amount of bytes available from input for encoding. - * @param context the context to be used - */ - private void encode(final byte[] in, int inPos, final int inAvail, final Context context) { - if (context.eof) { - return; - } - // inAvail < 0 is how we're informed of EOF in the underlying data we're - // encoding. - if (inAvail < 0) { - context.eof = true; - if (0 == context.modulus && lineLength == 0) { - return; // no leftovers to process and not using chunking - } - final byte[] buffer = ensureBufferSize(encodeSize, context); - final int savedPos = context.pos; - switch (context.modulus) { // 0-2 - case 0: // nothing to do here - break; - case 1: // 8 bits = 6 + 2 - // top 6 bits: - buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 2) & MASK_6BITS]; - // remaining 2: - buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 4) & MASK_6BITS]; - break; - - case 2: // 16 bits = 6 + 6 + 4 - buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 10) & MASK_6BITS]; - buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 4) & MASK_6BITS]; - buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 2) & MASK_6BITS]; - break; - default: - throw new IllegalStateException("Impossible modulus " + context.modulus); - } - context.currentLinePos += context.pos - savedPos; // keep track of current line position - // if currentPos == 0 we are at the start of a line, so don't add CRLF - if (lineLength > 0 && context.currentLinePos > 0) { - System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length); - context.pos += lineSeparator.length; - } - } else { - for (int i = 0; i < inAvail; i++) { - final byte[] buffer = ensureBufferSize(encodeSize, context); - context.modulus = (context.modulus + 1) % BYTES_PER_UNENCODED_BLOCK; - int b = in[inPos++]; - if (b < 0) { - b += 256; - } - context.ibitWorkArea = (context.ibitWorkArea << 8) + b; // BITS_PER_BYTE - if (0 == context.modulus) { // 3 bytes = 24 bits = 4 * 6 bits to extract - buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 18) & MASK_6BITS]; - buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 12) & MASK_6BITS]; - buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 6) & MASK_6BITS]; - buffer[context.pos++] = encodeTable[context.ibitWorkArea & MASK_6BITS]; - context.currentLinePos += BYTES_PER_ENCODED_BLOCK; - if (lineLength > 0 && lineLength <= context.currentLinePos) { - System.arraycopy(lineSeparator, 0, buffer, context.pos, lineSeparator.length); - context.pos += lineSeparator.length; - context.currentLinePos = 0; - } - } - } - } - } - - /** - * Ensure that the buffer has room for size bytes - * - * @param size minimum spare space required - * @param context the context to be used - * @return the buffer - */ - private byte[] ensureBufferSize(final int size, final Context context) { - if ((context.buffer == null) || (context.buffer.length < context.pos + size)) { - return resizeBuffer(context); - } - return context.buffer; - } - - /** - * Increases our buffer by the {@link #DEFAULT_BUFFER_RESIZE_FACTOR}. - * - * @param context the context to be used - */ - private byte[] resizeBuffer(final Context context) { - if (context.buffer == null) { - context.buffer = new byte[DEFAULT_BUFFER_SIZE]; - context.pos = 0; - context.readPos = 0; - } else { - final byte[] b = new byte[context.buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR]; - System.arraycopy(context.buffer, 0, b, 0, context.buffer.length); - context.buffer = b; - } - return context.buffer; - } - - /** - *

- * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once - * with the data to decode, and once with inAvail set to "-1" to alert decoder that EOF has been reached. The "-1" - * call is not necessary when decoding, but it doesn't hurt, either. - *

- *

- * Ignores all non-base64 characters. This is how chunked (e.g. 76 character) data is handled, since CR and LF are - * silently ignored, but has implications for other bytes, too. This method subscribes to the garbage-in, - * garbage-out philosophy: it will not check the provided data for validity. - *

- *

- * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach. - * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ - *

- * - * @param in byte[] array of ascii data to base64 decode. - * @param inPos Position to start reading data from. - * @param inAvail Amount of bytes available from input for encoding. - * @param context the context to be used - */ - private void decode(final byte[] in, int inPos, final int inAvail, final Context context) { - if (context.eof) { - return; - } - if (inAvail < 0) { - context.eof = true; - } - for (int i = 0; i < inAvail; i++) { - final byte[] buffer = ensureBufferSize(decodeSize, context); - final byte b = in[inPos++]; - if (b == pad) { - // We're done. - context.eof = true; - break; - } - if (b >= 0) { - final int result = DECODE_TABLE[b]; - if (result >= 0) { - context.modulus = (context.modulus + 1) % BYTES_PER_ENCODED_BLOCK; - context.ibitWorkArea = (context.ibitWorkArea << BITS_PER_ENCODED_BYTE) + result; - if (context.modulus == 0) { - buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 16) & MASK_8BITS); - buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS); - buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS); - } - } - } - } - - // Two forms of EOF as far as base64 decoder is concerned: actual - // EOF (-1) and first time '=' character is encountered in stream. - // This approach makes the '=' padding characters completely optional. - if (context.eof && context.modulus != 0) { - final byte[] buffer = ensureBufferSize(decodeSize, context); - - // We have some spare bits remaining - // Output all whole multiples of 8 bits and ignore the rest - switch (context.modulus) { - // case 0 : // impossible, as excluded above - case 1: // 6 bits - ignore entirely - // TODO not currently tested; perhaps it is impossible? - break; - case 2: // 12 bits = 8 + 4 - context.ibitWorkArea = context.ibitWorkArea >> 4; // dump the extra 4 bits - buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS); - break; - case 3: // 18 bits = 8 + 8 + 2 - context.ibitWorkArea = context.ibitWorkArea >> 2; // dump 2 bits - buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS); - buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS); - break; - default: - throw new IllegalStateException("Impossible modulus " + context.modulus); - } - } - } - - @Override - public byte[] encode(byte[] rawBytes) { - final Context c = new Context(); - encode(rawBytes, 0, rawBytes.length, c); - encode(rawBytes, 0, -1, c); // Notify encoder of EOF. - final byte[] buf = new byte[c.pos - c.readPos]; - readResults(buf, 0, buf.length, c); - return buf; - } - - /** - * Extracts buffered data into the provided byte[] array, starting at position bPos, up to a maximum of bAvail - * bytes. Returns how many bytes were actually extracted. - *

- * Package protected for access from I/O streams. - * - * @param b byte[] array to extract the buffered data into. - * @param bPos position in byte[] array to start extraction at. - * @param bAvail amount of bytes we're allowed to extract. We may extract fewer (if fewer are available). - * @param context the context to be used - * @return The number of bytes successfully extracted into the provided byte[] array. - */ - private int readResults(final byte[] b, final int bPos, final int bAvail, final Context context) { - if (context.buffer != null) { - final int len = Math.min(available(context), bAvail); - System.arraycopy(context.buffer, context.readPos, b, bPos, len); - context.readPos += len; - if (context.readPos >= context.pos) { - context.buffer = null; // so hasData() will return false, and this method can return -1 - } - return len; - } - return context.eof ? -1 : 0; - } - - private int available(final Context context) { // package protected for access from I/O streams - return context.buffer != null ? context.pos - context.readPos : 0; - } - - @Override - public byte[] decode(byte[] utf8EncodedRadix64String) { - final Context c = new Context(); - decode(utf8EncodedRadix64String, 0, utf8EncodedRadix64String.length, c); - decode(utf8EncodedRadix64String, 0, -1, c); // Notify decoder of EOF. - final byte[] result = new byte[c.pos]; - readResults(result, 0, result.length, c); - return result; - } - - /** - * Holds thread context so classes can be thread-safe. - *

- * This class is not itself thread-safe; each thread must allocate its own copy. - * - * @since 1.7 - */ - static class Context { - - /** - * Place holder for the bytes we're dealing with for our based logic. - * Bitwise operations store and extract the encoding or decoding from this variable. - */ - int ibitWorkArea; - - /** - * Place holder for the bytes we're dealing with for our based logic. - * Bitwise operations store and extract the encoding or decoding from this variable. - */ - long lbitWorkArea; - - /** - * Buffer for streaming. - */ - byte[] buffer; - - /** - * Position where next character should be written in the buffer. - */ - int pos; - - /** - * Position where next character should be read from the buffer. - */ - int readPos; - - /** - * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this object becomes useless, - * and must be thrown away. - */ - boolean eof; - - /** - * Variable tracks how many characters have been written to the current line. Only used when encoding. We use - * it to make sure each encoded line never goes beyond lineLength (if lineLength > 0). - */ - int currentLinePos; - - /** - * Writes to the buffer only occur after every 3/5 reads when encoding, and every 4/8 reads when decoding. This - * variable helps track that. - */ - int modulus; - - Context() { - } - - /** - * Returns a String useful for debugging (especially within a debugger.) - * - * @return a String useful for debugging. - */ - @SuppressWarnings("boxing") // OK to ignore boxing here - @Override - public String toString() { - return String.format("%s[buffer=%s, currentLinePos=%s, eof=%s, ibitWorkArea=%s, lbitWorkArea=%s, " + - "modulus=%s, pos=%s, readPos=%s]", this.getClass().getSimpleName(), Arrays.toString(buffer), - currentLinePos, eof, ibitWorkArea, lbitWorkArea, modulus, pos, readPos); - } - } -} diff --git a/modules/bcrypt/src/main/java/at/favre/lib/crypto/bcrypt/Radix64Encoder.java b/modules/bcrypt/src/main/java/at/favre/lib/crypto/bcrypt/Radix64Encoder.java index f46aadd..e6f5531 100644 --- a/modules/bcrypt/src/main/java/at/favre/lib/crypto/bcrypt/Radix64Encoder.java +++ b/modules/bcrypt/src/main/java/at/favre/lib/crypto/bcrypt/Radix64Encoder.java @@ -25,8 +25,6 @@ * questions. */ -import java.util.Arrays; - /** * Encoder for the custom Base64 variant of BCrypt (called Radix64 here). It has the same rules as Base64 but uses a * different mapping table than the various RFCs @@ -59,30 +57,23 @@ public interface Radix64Encoder { byte[] decode(byte[] utf8EncodedRadix64String); /** - - * - * This class implements an encoder for encoding byte data using - * the Base64 encoding scheme as used in OpenBSD which is not compatible - * with the RFC Base64 schemas. - * - * Required Information for GPL-2 License with Classpath Exception (http://openjdk.java.net/legal/gplv2+ce.html) - * - * Original: http://hg.openjdk.java.net/jdk8/jdk8/jdk/file/687fd7c7986d/src/share/classes/java/util/Base64.java - * - * Changes: - * - simplified alias method - * - simplified code - * - removed most features (padding, url encoding, MIME) - * - replaced with Base64 mapping table to use OpenBSD Radix64 table - * + * A mod of the Apache Commons Codec Base64 logic */ - final class Default implements Radix64Encoder { + class Default implements Radix64Encoder { + + private static final int BITS_PER_ENCODED_BYTE = 6; + private static final int BYTES_PER_UNENCODED_BLOCK = 3; + private static final int BYTES_PER_ENCODED_BLOCK = 4; + private static final int MASK_6BITS = 0x3f; + private static final int MASK_8BITS = 0xff; + private static final int DEFAULT_BUFFER_RESIZE_FACTOR = 2; + private static final int DEFAULT_BUFFER_SIZE = 8192; /** - * This array is a lookup table that translates 6-bit positive integer - * index values into their "Base64 Alphabet" equivalents + * This array is a lookup table that translates 6-bit positive integer index values into their "Radix64ApacheCodec Alphabet" + * equivalents. */ - private static final char[] toBase64 = { + private static final byte[] STANDARD_ENCODE_TABLE = { '.', '/', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', @@ -91,148 +82,310 @@ final class Default implements Radix64Encoder { '6', '7', '8', '9' }; - private int outLength(int srclen) { - int n = srclen % 3; - return 4 * (srclen / 3) + (n == 0 ? 0 : n + 1); - } + /** + * This array is a lookup table that translates Unicode characters drawn from the "Radix64ApacheCodec Alphabet" into their 6-bit positive i + * integer equivalents. + */ + private static final byte[] DECODE_TABLE = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 1, 54, 55, 56, 57, + 58, 59, 60, 61, 62, 63, -1, -1, -1, -2, -1, -1, -1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, + 26, 27, -1, -1, -1, -1, -1, -1, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, + 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + -1, -1, -1, -1, -1 + }; + + private final byte[] encodeTable; + private final int decodeSize; + private final int encodeSize; /** - * Encodes all bytes from the specified byte array into a newly-allocated - * byte array using the encoding scheme. The returned byte - * array is of the length of the resulting bytes. + * Creates a Radix64ApacheCodec codec used for decoding (all modes) and encoding in URL-unsafe mode. + *

+ * When encoding the line length and line separator are given in the constructor, and the encoding table is + * STANDARD_ENCODE_TABLE. + *

+ *

+ * Line lengths that aren't multiples of 4 will still essentially end up being multiples of 4 in the encoded data. + *

+ *

+ * When decoding all variants are supported. + *

* - * @param src the byte array to encode - * @return A newly-allocated byte array containing the resulting - * encoded bytes. + * @throws IllegalArgumentException The provided lineSeparator included some base64 characters. That's not going to work! + * @since 1.4 */ - - private int encode0(byte[] src, int end, byte[] dst) { - char[] base64 = toBase64; - int sp = 0; - int slen = (end) / 3 * 3; - int dp = 0; - while (sp < slen) { - int sl0 = Math.min(sp + slen, slen); - for (int sp0 = sp, dp0 = dp; sp0 < sl0; ) { - int bits = (src[sp0++] & 0xff) << 16 | - (src[sp0++] & 0xff) << 8 | - (src[sp0++] & 0xff); - dst[dp0++] = (byte) base64[(bits >>> 18) & 0x3f]; - dst[dp0++] = (byte) base64[(bits >>> 12) & 0x3f]; - dst[dp0++] = (byte) base64[(bits >>> 6) & 0x3f]; - dst[dp0++] = (byte) base64[bits & 0x3f]; - } - int dlen = (sl0 - sp) / 3 * 4; - dp += dlen; - sp = sl0; - } - if (sp < end) { // 1 or 2 leftover bytes - int b0 = src[sp++] & 0xff; - dst[dp++] = (byte) base64[b0 >> 2]; - if (sp == end) { - dst[dp++] = (byte) base64[(b0 << 4) & 0x3f]; - } else { - int b1 = src[sp++] & 0xff; - dst[dp++] = (byte) base64[(b0 << 4) & 0x3f | (b1 >> 4)]; - dst[dp++] = (byte) base64[(b1 << 2) & 0x3f]; - } - } - return dp; + public Default() { + this.encodeSize = BYTES_PER_ENCODED_BLOCK; + this.decodeSize = this.encodeSize - 1; + this.encodeTable = STANDARD_ENCODE_TABLE; } @Override public byte[] encode(byte[] rawBytes) { - int len = outLength(rawBytes.length); // dst array size - byte[] dst = new byte[len]; - int ret = encode0(rawBytes, rawBytes.length, dst); - if (ret != dst.length) - return Arrays.copyOf(dst, ret); - return dst; + final Context c = new Context(); + encode(rawBytes, 0, rawBytes.length, c); + encode(rawBytes, 0, -1, c); // Notify encoder of EOF. + final byte[] buf = new byte[c.pos - c.readPos]; + readResults(buf, 0, buf.length, c); + return buf; } /** - * Lookup table for decoding unicode characters drawn from the - * "Base64 Alphabet" into their 6-bit positive integer equivalents. - * Characters that are not in the Base64 alphabet but fall within the bounds of - * the array are encoded to -1. + * Extracts buffered data into the provided byte[] array, starting at position bPos, up to a maximum of bAvail + * bytes. Returns how many bytes were actually extracted. + *

+ * Package protected for access from I/O streams. + * + * @param b byte[] array to extract the buffered data into. + * @param bPos position in byte[] array to start extraction at. + * @param bAvail amount of bytes we're allowed to extract. We may extract fewer (if fewer are available). + * @param context the context to be used */ - private static final int[] fromBase64 = new int[256]; - - static { - Arrays.fill(fromBase64, -1); - for (int i = 0; i < toBase64.length; i++) - fromBase64[toBase64[i]] = i; - fromBase64['='] = -2; + private void readResults(final byte[] b, final int bPos, final int bAvail, final Context context) { + if (context.buffer != null) { + final int len = Math.min(context.pos - context.readPos, bAvail); + System.arraycopy(context.buffer, context.readPos, b, bPos, len); + context.readPos += len; + if (context.readPos >= context.pos) { + context.buffer = null; // so hasData() will return false, and this method can return -1 + } + } } - @Override - public byte[] decode(byte[] src) { - byte[] dst = new byte[outLengthDecode(src.length)]; - int ret = decode0(src, 0, src.length, dst); - if (ret != dst.length) { - dst = Arrays.copyOf(dst, ret); + /** + *

+ * Encodes all of the provided data, starting at inPos, for inAvail bytes. Must be called at least twice: once with + * the data to encode, and once with inAvail set to "-1" to alert encoder that EOF has been reached, to flush last + * remaining bytes (if not multiple of 3). + *

+ *

Note: no padding is added when encoding using the URL-safe alphabet.

+ *

+ * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach. + * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ + *

+ * + * @param in byte[] array of binary data to base64 encode. + * @param inPos Position to start reading data from. + * @param inAvail Amount of bytes available from input for encoding. + * @param context the context to be used + */ + private void encode(final byte[] in, int inPos, final int inAvail, final Context context) { + if (context.eof) { + return; + } + // inAvail < 0 is how we're informed of EOF in the underlying data we're + // encoding. + if (inAvail < 0) { + context.eof = true; + if (0 == context.modulus) { + return; // no leftovers to process and not using chunking + } + final byte[] buffer = ensureBufferSize(encodeSize, context); + final int savedPos = context.pos; + switch (context.modulus) { // 0-2 + case 0: // nothing to do here + break; + case 1: // 8 bits = 6 + 2 + // top 6 bits: + buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 2) & MASK_6BITS]; + // remaining 2: + buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 4) & MASK_6BITS]; + break; + case 2: // 16 bits = 6 + 6 + 4 + buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 10) & MASK_6BITS]; + buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 4) & MASK_6BITS]; + buffer[context.pos++] = encodeTable[(context.ibitWorkArea << 2) & MASK_6BITS]; + break; + default: + throw new IllegalStateException("Impossible modulus " + context.modulus); + } + context.currentLinePos += context.pos - savedPos; // keep track of current line position + } else { + for (int i = 0; i < inAvail; i++) { + final byte[] buffer = ensureBufferSize(encodeSize, context); + context.modulus = (context.modulus + 1) % BYTES_PER_UNENCODED_BLOCK; + int b = in[inPos++]; + if (b < 0) { + b += 256; + } + context.ibitWorkArea = (context.ibitWorkArea << 8) + b; // BITS_PER_BYTE + if (0 == context.modulus) { // 3 bytes = 24 bits = 4 * 6 bits to extract + buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 18) & MASK_6BITS]; + buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 12) & MASK_6BITS]; + buffer[context.pos++] = encodeTable[(context.ibitWorkArea >> 6) & MASK_6BITS]; + buffer[context.pos++] = encodeTable[context.ibitWorkArea & MASK_6BITS]; + context.currentLinePos += BYTES_PER_ENCODED_BLOCK; + } + } } - return dst; } - private int outLengthDecode(int len) { - int paddings = 0; - if (len == 0) - return 0; - if (len < 2) { - throw new IllegalArgumentException("Input byte[] should at least have 2 bytes for radix64 bytes"); + /** + * Ensure that the buffer has room for size bytes + * + * @param size minimum spare space required + * @param context the context to be used + * @return the buffer + */ + private byte[] ensureBufferSize(final int size, final Context context) { + if ((context.buffer == null) || (context.buffer.length < context.pos + size)) { + if (context.buffer == null) { + context.buffer = new byte[DEFAULT_BUFFER_SIZE]; + context.pos = 0; + context.readPos = 0; + } else { + final byte[] b = new byte[context.buffer.length * DEFAULT_BUFFER_RESIZE_FACTOR]; + System.arraycopy(context.buffer, 0, b, 0, context.buffer.length); + context.buffer = b; + } + return context.buffer; } - if ((len & 0x3) != 0) - paddings = 4 - (len & 0x3); - return 3 * ((len + 3) / 4) - paddings; + return context.buffer; } - private int decode0(byte[] src, int sp, int sl, byte[] dst) { - int dp = 0; - int bits = 0; - int shiftto = 18; // pos of first byte of 4-byte atom - while (sp < sl) { - int b = src[sp++] & 0xff; - if ((b = fromBase64[b]) < 0) { - if (b == -2) { - // padding byte '=' - // = shiftto==18 unnecessary padding - // x= shiftto==12 a dangling single x - // x to be handled together with non-padding case - // xx= shiftto==6&&sp==sl missing last = - // xx=y shiftto==6 last is not = - if (shiftto == 6 && (sp == sl || src[sp++] != '=') || shiftto == 18) { - throw new IllegalArgumentException("Input byte array has wrong 4-byte ending unit"); + /** + *

+ * Decodes all of the provided data, starting at inPos, for inAvail bytes. Should be called at least twice: once + * with the data to decode, and once with inAvail set to "-1" to alert decoder that EOF has been reached. The "-1" + * call is not necessary when decoding, but it doesn't hurt, either. + *

+ *

+ * Ignores all non-base64 characters. This is how chunked (e.g. 76 character) data is handled, since CR and LF are + * silently ignored, but has implications for other bytes, too. This method subscribes to the garbage-in, + * garbage-out philosophy: it will not check the provided data for validity. + *

+ *

+ * Thanks to "commons" project in ws.apache.org for the bitwise operations, and general approach. + * http://svn.apache.org/repos/asf/webservices/commons/trunk/modules/util/ + *

+ * + * @param in byte[] array of ascii data to base64 decode. + * @param inPos Position to start reading data from. + * @param inAvail Amount of bytes available from input for encoding. + * @param context the context to be used + */ + private void decode(final byte[] in, int inPos, final int inAvail, final Context context) { + if (context.eof) { + return; + } + if (inAvail < 0) { + context.eof = true; + } + for (int i = 0; i < inAvail; i++) { + final byte[] buffer = ensureBufferSize(decodeSize, context); + final byte b = in[inPos++]; + if (b >= 0) { + final int result = DECODE_TABLE[b]; + if (result >= 0) { + context.modulus = (context.modulus + 1) % BYTES_PER_ENCODED_BLOCK; + context.ibitWorkArea = (context.ibitWorkArea << BITS_PER_ENCODED_BYTE) + result; + if (context.modulus == 0) { + buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 16) & MASK_8BITS); + buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS); + buffer[context.pos++] = (byte) (context.ibitWorkArea & MASK_8BITS); } - break; } - throw new IllegalArgumentException("Illegal base64 character " + Integer.toString(src[sp - 1], 16)); - } - bits |= (b << shiftto); - shiftto -= 6; - if (shiftto < 0) { - dst[dp++] = (byte) (bits >> 16); - dst[dp++] = (byte) (bits >> 8); - dst[dp++] = (byte) (bits); - shiftto = 18; - bits = 0; } } - // reached end of byte array or hit padding '=' characters. - if (shiftto == 6) { - dst[dp++] = (byte) (bits >> 16); - } else if (shiftto == 0) { - dst[dp++] = (byte) (bits >> 16); - dst[dp++] = (byte) (bits >> 8); - } else if (shiftto == 12) { - // dangling single "x", incorrectly encoded. - throw new IllegalArgumentException("Last unit does not have enough valid bits"); + + // Two forms of EOF as far as base64 decoder is concerned: actual + // EOF (-1) and first time '=' character is encountered in stream. + // This approach makes the '=' padding characters completely optional. + if (context.eof && context.modulus != 0) { + final byte[] buffer = ensureBufferSize(decodeSize, context); + + // We have some spare bits remaining + // Output all whole multiples of 8 bits and ignore the rest + switch (context.modulus) { + // case 0 : // impossible, as excluded above + case 1: // 6 bits - ignore entirely + // TODO not currently tested; perhaps it is impossible? + break; + case 2: // 12 bits = 8 + 4 + context.ibitWorkArea = context.ibitWorkArea >> 4; // dump the extra 4 bits + buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS); + break; + case 3: // 18 bits = 8 + 8 + 2 + context.ibitWorkArea = context.ibitWorkArea >> 2; // dump 2 bits + buffer[context.pos++] = (byte) ((context.ibitWorkArea >> 8) & MASK_8BITS); + buffer[context.pos++] = (byte) ((context.ibitWorkArea) & MASK_8BITS); + break; + default: + throw new IllegalStateException("Impossible modulus " + context.modulus); + } } + } + + @Override + public byte[] decode(byte[] utf8EncodedRadix64String) { + final Context c = new Context(); + decode(utf8EncodedRadix64String, 0, utf8EncodedRadix64String.length, c); + decode(utf8EncodedRadix64String, 0, -1, c); // Notify decoder of EOF. + final byte[] result = new byte[c.pos]; + readResults(result, 0, result.length, c); + return result; + } + + /** + * Holds thread context so classes can be thread-safe. + *

+ * This class is not itself thread-safe; each thread must allocate its own copy. + * + * @since 1.7 + */ + static class Context { + + /** + * Place holder for the bytes we're dealing with for our based logic. + * Bitwise operations store and extract the encoding or decoding from this variable. + */ + int ibitWorkArea; + + /** + * Buffer for streaming. + */ + byte[] buffer; + + /** + * Position where next character should be written in the buffer. + */ + int pos; + + /** + * Position where next character should be read from the buffer. + */ + int readPos; + + /** + * Boolean flag to indicate the EOF has been reached. Once EOF has been reached, this object becomes useless, + * and must be thrown away. + */ + boolean eof; + + /** + * Variable tracks how many characters have been written to the current line. Only used when encoding. We use + * it to make sure each encoded line never goes beyond lineLength (if lineLength > 0). + */ + int currentLinePos; + + /** + * Writes to the buffer only occur after every 3/5 reads when encoding, and every 4/8 reads when decoding. This + * variable helps track that. + */ + int modulus; - if (sp < sl) { - throw new IllegalArgumentException("Input byte array has incorrect ending byte at " + sp); + Context() { } - return dp; } } } diff --git a/modules/bcrypt/src/test/java/at/favre/lib/crypto/bcrypt/Radix64Test.java b/modules/bcrypt/src/test/java/at/favre/lib/crypto/bcrypt/Radix64Test.java index a788e2d..982f1e3 100644 --- a/modules/bcrypt/src/test/java/at/favre/lib/crypto/bcrypt/Radix64Test.java +++ b/modules/bcrypt/src/test/java/at/favre/lib/crypto/bcrypt/Radix64Test.java @@ -119,29 +119,12 @@ private void testSingleEncode(int length) { //System.out.println("new EncodeTestCase(\"" + Bytes.wrap(encoded).encodeUtf8() + "\"," + new JavaByteArrayEncoder().encode(rnd) + "),"); } - @Test - public void testBigBlobApache() { - int length = 1024 * 1024 * 10; - byte[] rnd = Bytes.random(length).array(); - byte[] encoded = new Radix64ApacheCodec().encode(rnd); - byte[] decoded = new Radix64ApacheCodec().decode(encoded); - - assertArrayEquals(rnd, decoded); - if (length < 1024) { - System.out.println(Bytes.wrap(encoded).encodeUtf8()); - } else { - System.out.println(Bytes.wrap(encoded).toString()); - } - //System.out.println("new EncodeTestCase(\"" + Bytes.wrap(encoded).encodeUtf8() + "\"," + new JavaByteArrayEncoder().encode(rnd) + "),"); - } - @Test public void testEncodeAgainstRefTable() { for (TestCase encodeTestCase : referenceRadix64Table) { byte[] encoded = encoder.encode(encodeTestCase.raw); assertArrayEquals(encodeTestCase.encoded.getBytes(StandardCharsets.UTF_8), encoded); - assertArrayEquals(encodeTestCase.encoded.getBytes(StandardCharsets.UTF_8), new Radix64ApacheCodec().encode(encodeTestCase.raw)); } } @@ -150,7 +133,6 @@ public void testDecodeAgainstRefTable() { for (TestCase encodeTestCase : referenceRadix64Table) { byte[] decoded = encoder.decode(encodeTestCase.encoded.getBytes(StandardCharsets.UTF_8)); assertArrayEquals(encodeTestCase.raw, decoded); - assertArrayEquals(encodeTestCase.raw, new Radix64ApacheCodec().decode(encodeTestCase.encoded.getBytes(StandardCharsets.UTF_8))); } } @@ -164,9 +146,9 @@ public void testEmptyDecode() { assertArrayEquals(new byte[0], encoder.decode(new byte[0])); } - @Test(expected = IllegalArgumentException.class) - public void testSingleCharDecodeShouldThrow() { - encoder.decode("A".getBytes(StandardCharsets.UTF_8)); + @Test + public void testSingleCharDecode() { + assertArrayEquals(new byte[0], encoder.decode("A".getBytes(StandardCharsets.UTF_8))); } @Test