Azure · alzimmermsft · Jan 10, 2023 · Nov 21, 2022 · Nov 21, 2022 · Nov 29, 2022
@@ -4,12 +4,17 @@
 package com.azure.core.implementation;
 
 import java.io.ByteArrayOutputStream;
+import java.nio.ByteBuffer;
 import java.nio.charset.Charset;
+import java.util.Arrays;
 
 /**
  * This class is an extension of {@link ByteArrayOutputStream} which allows access to the backing {@code byte[]} without
  * requiring a copying of the data. The only use of this class is for internal purposes where we know it is safe to
  * directly access the {@code byte[]} without copying.
+ * <p>
+ * This class isn't meant to be thread-safe as usage should be internal to azure-core and should be guarded
+ * appropriately when used.
  */
 public class AccessibleByteArrayOutputStream extends ByteArrayOutputStream {
     /**
@@ -30,10 +35,39 @@ public AccessibleByteArrayOutputStream(int initialCapacity) {
     }
 
     @Override
-    public synchronized byte[] toByteArray() {
+    public byte[] toByteArray() {
+        return Arrays.copyOf(buf, count);
+    }
+
+    /**
+     * Returns the internal {@code byte[]} without copying.
+     * <p>
+     * This will be the full {@code byte[]}, so if writing required it to be resized to 8192 bytes but only 6000 bytes
+     * were written the final 2192 bytes will be undefined data. If this is used in an API where a {@code byte[]} is
+     * accepted you must use the range based overload with {@link #count()}, if a range based overload isn't available
+     * use {@link #toByteArray()} which will copy the range of bytes written.
+     *
+     * @return A direct reference to the internal {@code byte[]} where data is being written.
+     */
+    public byte[] toByteArrayUnsafe() {
         return buf;
     }
 
+    /**
+     * Returns a {@link ByteBuffer} representation of the content written to this stream.
+     * <p>
+     * The {@link ByteBuffer} will use a direct reference to the internal {@code byte[]} being written, so any
+     * modifications to the content already written will be reflected in the {@link ByteBuffer}. Given the direct
+     * reference to the internal {@code byte[]} the {@link ByteBuffer} returned by the API will be read-only. Further
+     * writing to this stream won't be reflected in the {@link ByteBuffer} as the ByteBuffer will be created using
+     * {@code ByteBuffer.wrap(bytes, 0, count())}.
+     *
+     * @return A read-only {@link ByteBuffer} represented by the internal buffer being written to.
+     */
+    public ByteBuffer toByteBuffer() {
+        return ByteBuffer.wrap(buf, 0, count).asReadOnlyBuffer();
+    }
+
     /**
      * The number of bytes that have been written to the stream.
      *
@@ -52,4 +86,17 @@ public int count() {
     public String toString(Charset charset) {
         return new String(buf, 0, count, charset);
     }
+
+    /**
+     * Gets a BOM aware string representation of the stream.
+     * <p>
+     * This method is the equivalent of calling
+     * {@code ImplUtils.bomAwareToString(toByteBufferUnsafe(), 0, count(), contentType)}.
+     *
+     * @param contentType The {@code Content-Type} header value.
+     * @return A string representation of the stream encoded to the found encoding.
+     */
+    public String bomAwareToString(String contentType) {
+        return ImplUtils.bomAwareToString(buf, 0, count, contentType);
+    }
 }
@@ -15,6 +15,10 @@
 import java.io.OutputStream;
 import java.net.URL;
 import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.IllegalCharsetNameException;
+import java.nio.charset.StandardCharsets;
+import java.nio.charset.UnsupportedCharsetException;
 import java.time.DateTimeException;
 import java.time.Duration;
 import java.time.OffsetDateTime;
@@ -26,6 +30,8 @@
 import java.util.NoSuchElementException;
 import java.util.function.Function;
 import java.util.function.Supplier;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 /**
  * Utility class containing implementation specific methods.
@@ -37,6 +43,16 @@ public final class ImplUtils {
     // future improvement - make this configurable
     public static final int MAX_CACHE_SIZE = 10000;
 
+    private static final Charset UTF_32BE = Charset.forName("UTF-32BE");
+    private static final Charset UTF_32LE = Charset.forName("UTF-32LE");
+    private static final byte ZERO = (byte) 0x00;
+    private static final byte BB = (byte) 0xBB;
+    private static final byte BF = (byte) 0xBF;
+    private static final byte EF = (byte) 0xEF;
+    private static final byte FE = (byte) 0xFE;
+    private static final byte FF = (byte) 0xFF;
+    private static final Pattern CHARSET_PATTERN = Pattern.compile("charset=(\\S+)\\b", Pattern.CASE_INSENSITIVE);
+
     /**
      * Attempts to extract a retry after duration from a given set of {@link HttpHeaders}.
      * <p>
@@ -247,6 +263,61 @@ public Map.Entry<String, String> next() {
         }
     }
 
+    /**
+     * Attempts to convert a byte stream into the properly encoded String.
+     * <p>
+     * This utility method will attempt to find the encoding for the String in this order.
+     * <ol>
+     *     <li>Find the byte order mark in the byte array.</li>
+     *     <li>Find the charset in the {@code contentType} header.</li>
+     *     <li>Default to {@code UTF-8}.</li>
+     * </ol>
+     *
+     * @param bytes The byte array.
+     * @param offset The starting offset in the byte array.
+     * @param count The number of bytes to process in the byte array.
+     * @param contentType The {@code Content-Type} header value.
+     * @return A string representation of the byte encoded to the found encoding, or null if {@code bytes} is null.
+     */
+    public static String bomAwareToString(byte[] bytes, int offset, int count, String contentType) {
+        if (bytes == null) {
+            return null;
+        }
+
+        if (count >= 3 && bytes[offset] == EF && bytes[offset + 1] == BB && bytes[offset + 2] == BF) {
+            return new String(bytes, 3, bytes.length - 3, StandardCharsets.UTF_8);
+        } else if (count >= 4 && bytes[offset] == ZERO && bytes[offset + 1] == ZERO
+            && bytes[offset + 2] == FE && bytes[offset + 3] == FF) {
+            return new String(bytes, 4, bytes.length - 4, UTF_32BE);
+        } else if (count >= 4 && bytes[offset] == FF && bytes[offset + 1] == FE
+            && bytes[offset + 2] == ZERO && bytes[offset + 3] == ZERO) {
+            return new String(bytes, 4, bytes.length - 4, UTF_32LE);
+        } else if (count >= 2 && bytes[offset] == FE && bytes[offset + 1] == FF) {
+            return new String(bytes, 2, bytes.length - 2, StandardCharsets.UTF_16BE);
+        } else if (count >= 2 && bytes[offset] == FF && bytes[offset + 1] == FE) {
+            return new String(bytes, 2, bytes.length - 2, StandardCharsets.UTF_16LE);
+        } else {
+            /*
+             * Attempt to retrieve the default charset from the 'Content-Encoding' header, if the value isn't
+             * present or invalid fallback to 'UTF-8' for the default charset.
+             */
+            if (!CoreUtils.isNullOrEmpty(contentType)) {
+                try {
+                    Matcher charsetMatcher = CHARSET_PATTERN.matcher(contentType);
+                    if (charsetMatcher.find()) {
+                        return new String(bytes, offset, count, Charset.forName(charsetMatcher.group(1)));
+                    } else {
+                        return new String(bytes, offset, count, StandardCharsets.UTF_8);
+                    }
+                } catch (IllegalCharsetNameException | UnsupportedCharsetException ex) {
+                    return new String(bytes, offset, count, StandardCharsets.UTF_8);
+                }
+            } else {
+                return new String(bytes, offset, count, StandardCharsets.UTF_8);
+            }
+        }
+    }
+
     private ImplUtils() {
     }
 }
@@ -202,7 +202,7 @@ static ByteBuffer serializeAsJsonSerializable(Object jsonSerializable) throws IO
             JSON_WRITER_WRITE_JSON_SERIALIZABLE.writeJson(jsonWriter, jsonSerializable);
             JSON_WRITER_FLUSH.flush(jsonWriter);
 
-            return ByteBuffer.wrap(outputStream.toByteArray(), 0, outputStream.count());
+            return outputStream.toByteBuffer();
         }
     }
 
@@ -289,7 +289,7 @@ static ByteBuffer serializeAsXmlSerializable(Object bodyContent) throws IOExcept
             XML_WRITER_WRITE_XML_SERIALIZABLE.writeXml(xmlWriter, bodyContent);
             XML_WRITER_FLUSH.flush(xmlWriter);
 
-            return ByteBuffer.wrap(outputStream.toByteArray(), 0, outputStream.count());
+            return outputStream.toByteBuffer();
         } catch (IOException ex) {
             throw ex;
         } catch (Exception ex) {

@@ -6,16 +6,13 @@
 import com.azure.core.http.HttpHeaders;
 import com.azure.core.http.policy.HttpLogOptions;
 import com.azure.core.http.rest.PagedResponse;
+import com.azure.core.implementation.ImplUtils;
 import com.azure.core.util.logging.ClientLogger;
 import org.reactivestreams.Publisher;
 import reactor.core.publisher.Flux;
 
 import java.io.IOException;
 import java.io.InputStream;
-import java.nio.charset.Charset;
-import java.nio.charset.IllegalCharsetNameException;
-import java.nio.charset.StandardCharsets;
-import java.nio.charset.UnsupportedCharsetException;
 import java.time.Duration;
 import java.util.Arrays;
 import java.util.Collection;
@@ -27,8 +24,6 @@
 import java.util.Properties;
 import java.util.function.BiFunction;
 import java.util.function.Function;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
 import java.util.stream.Collectors;
 
 /**
@@ -37,15 +32,6 @@
 public final class CoreUtils {
     // CoreUtils is a commonly used utility, use a static logger.
     private static final ClientLogger LOGGER = new ClientLogger(CoreUtils.class);
-    private static final Charset UTF_32BE = Charset.forName("UTF-32BE");
-    private static final Charset UTF_32LE = Charset.forName("UTF-32LE");
-    private static final byte ZERO = (byte) 0x00;
-    private static final byte BB = (byte) 0xBB;
-    private static final byte BF = (byte) 0xBF;
-    private static final byte EF = (byte) 0xEF;
-    private static final byte FE = (byte) 0xFE;
-    private static final byte FF = (byte) 0xFF;
-    private static final Pattern CHARSET_PATTERN = Pattern.compile("charset=([\\S]+)\\b", Pattern.CASE_INSENSITIVE);
 
     private CoreUtils() {
         // Exists only to defeat instantiation.
@@ -235,36 +221,7 @@ public static String bomAwareToString(byte[] bytes, String contentType) {
             return null;
         }
 
-        if (bytes.length >= 3 && bytes[0] == EF && bytes[1] == BB && bytes[2] == BF) {
-            return new String(bytes, 3, bytes.length - 3, StandardCharsets.UTF_8);
-        } else if (bytes.length >= 4 && bytes[0] == ZERO && bytes[1] == ZERO && bytes[2] == FE && bytes[3] == FF) {
-            return new String(bytes, 4, bytes.length - 4, UTF_32BE);
-        } else if (bytes.length >= 4 && bytes[0] == FF && bytes[1] == FE && bytes[2] == ZERO && bytes[3] == ZERO) {
-            return new String(bytes, 4, bytes.length - 4, UTF_32LE);
-        } else if (bytes.length >= 2 && bytes[0] == FE && bytes[1] == FF) {
-            return new String(bytes, 2, bytes.length - 2, StandardCharsets.UTF_16BE);
-        } else if (bytes.length >= 2 && bytes[0] == FF && bytes[1] == FE) {
-            return new String(bytes, 2, bytes.length - 2, StandardCharsets.UTF_16LE);
-        } else {
-            /*
-             * Attempt to retrieve the default charset from the 'Content-Encoding' header, if the value isn't
-             * present or invalid fallback to 'UTF-8' for the default charset.
-             */
-            if (!isNullOrEmpty(contentType)) {
-                try {
-                    Matcher charsetMatcher = CHARSET_PATTERN.matcher(contentType);
-                    if (charsetMatcher.find()) {
-                        return new String(bytes, Charset.forName(charsetMatcher.group(1)));
-                    } else {
-                        return new String(bytes, StandardCharsets.UTF_8);
-                    }
-                } catch (IllegalCharsetNameException | UnsupportedCharsetException ex) {
-                    return new String(bytes, StandardCharsets.UTF_8);
-                }
-            } else {
-                return new String(bytes, StandardCharsets.UTF_8);
-            }
-        }
+        return ImplUtils.bomAwareToString(bytes, 0, bytes.length, contentType);
     }
 
     /**