From b87e1f19a819ba9590cd0987e655cb94b90f60d4 Mon Sep 17 00:00:00 2001 From: lukes Date: Mon, 17 Jul 2017 11:44:52 -0700 Subject: [PATCH] Automated g4 rollback of changelist 162220754. *** Reason for rollback *** app-engine has a buggy implementation of FileInputStream.available() which is throwing IOException *** Original change description *** Implement ByteSource.asCharSource(charset).read() using the decoding string constructor instead of streaming the contents into a StringBuilder. this allows us to avoid a number of copies that are currently happening for each character (1. into a temporary CharBuffer, 2. into a StringBuilder, 3 into the String char[]) and replace it with simply whatever is required by ByteSource.read() and the String(byte[], charset) constructor. For certain ByteSource implementations (like FileByteSource) Byte... *** ------------- Created by MOE: https://github.com/google/moe MOE_MIGRATED_REVID=162245286 --- .../ByteSourceAsCharSourceReadBenchmark.java | 137 ------------------ .../src/com/google/common/io/ByteSource.java | 12 -- .../ByteSourceAsCharSourceReadBenchmark.java | 137 ------------------ .../src/com/google/common/io/ByteSource.java | 12 -- 4 files changed, 298 deletions(-) delete mode 100644 android/guava-tests/benchmark/com/google/common/io/ByteSourceAsCharSourceReadBenchmark.java delete mode 100644 guava-tests/benchmark/com/google/common/io/ByteSourceAsCharSourceReadBenchmark.java diff --git a/android/guava-tests/benchmark/com/google/common/io/ByteSourceAsCharSourceReadBenchmark.java b/android/guava-tests/benchmark/com/google/common/io/ByteSourceAsCharSourceReadBenchmark.java deleted file mode 100644 index ee81ea1259b2..000000000000 --- a/android/guava-tests/benchmark/com/google/common/io/ByteSourceAsCharSourceReadBenchmark.java +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (C) 2017 The Guava Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except - * in compliance with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software distributed under the License - * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express - * or implied. See the License for the specific language governing permissions and limitations under - * the License. - */ - -package com.google.common.io; - -import com.google.caliper.BeforeExperiment; -import com.google.caliper.Benchmark; -import com.google.caliper.Param; -import com.google.caliper.api.VmOptions; -import com.google.common.base.Optional; -import java.io.IOException; -import java.io.InputStreamReader; -import java.nio.charset.Charset; -import java.util.Random; - -/** - * Benchmarks for various potential implementations of {@code ByteSource.asCharSource(...).read()}. - */ -// These benchmarks allocate a lot of data so use a large heap -@VmOptions({"-Xms12g", "-Xmx12g", "-d64"}) -public class ByteSourceAsCharSourceReadBenchmark { - enum ReadStrategy { - TO_BYTE_ARRAY_NEW_STRING { - @Override - String read(ByteSource byteSource, Charset cs) throws IOException { - return new String(byteSource.read(), cs); - } - }, - USING_CHARSTREAMS_COPY { - @Override - String read(ByteSource byteSource, Charset cs) throws IOException { - StringBuilder sb = new StringBuilder(); - try (InputStreamReader reader = new InputStreamReader(byteSource.openStream(), cs)) { - CharStreams.copy(reader, sb); - } - return sb.toString(); - } - }, - // It really seems like this should be faster than TO_BYTE_ARRAY_NEW_STRING. But it just isn't - // my best guess is that the jdk authors have spent more time optimizing that callpath than this - // one. (StringCoding$StringDecoder vs. StreamDecoder). StringCoding has a ton of special cases - // theoretically we could duplicate all that logic here to try to beat 'new String' or at least - // come close. - USING_DECODER_WITH_SIZE_HINT { - @Override - String read(ByteSource byteSource, Charset cs) throws IOException { - Optional size = byteSource.sizeIfKnown(); - // if we know the size and it fits in an int - if (size.isPresent() && size.get().longValue() == size.get().intValue()) { - // otherwise try to presize a StringBuilder - // it is kind of lame that we need to construct a decoder to access this value. - // if this is a concern we could add special cases for some known charsets (like utf8) - // or we could avoid inputstreamreader and use the decoder api directly - // TODO(lukes): in a real implementation we would need to handle overflow conditions - int maxChars = (int) (size.get().intValue() * cs.newDecoder().maxCharsPerByte()); - char[] buffer = new char[maxChars]; - int bufIndex = 0; - int remaining = buffer.length; - try (InputStreamReader reader = new InputStreamReader(byteSource.openStream(), cs)) { - int nRead = 0; - while (remaining > 0 && (nRead = reader.read(buffer, bufIndex, remaining)) != -1) { - bufIndex += nRead; - remaining -= nRead; - } - if (nRead == -1) { - // we reached EOF - return new String(buffer, 0, bufIndex); - } - // otherwise we got the size wrong. This can happen if the size changes between when - // we called sizeIfKnown and when we started reading the file (or i guess if - // maxCharsPerByte is wrong) - // Fallback to an incremental approach - StringBuilder builder = new StringBuilder(bufIndex + 32); - builder.append(buffer, 0, bufIndex); - buffer = null; // release for gc - CharStreams.copy(reader, builder); - return builder.toString(); - } - - } else { - return TO_BYTE_ARRAY_NEW_STRING.read(byteSource, cs); - } - } - }; - - abstract String read(ByteSource byteSource, Charset cs) throws IOException; - } - - @Param({"UTF-8"}) - String charsetName; - - @Param ReadStrategy strategy; - - @Param({"10", "1024", "1048576"}) - int size; - - Charset charset; - ByteSource data; - - @BeforeExperiment - public void setUp() { - charset = Charset.forName(charsetName); - StringBuilder sb = new StringBuilder(); - Random random = new Random(0xdeadbeef); // for unpredictable but reproducible behavior - sb.ensureCapacity(size); - for (int k = 0; k < size; k++) { - // [9-127) includes all ascii non-control characters - sb.append((char) (random.nextInt(127 - 9) + 9)); - } - String string = sb.toString(); - sb.setLength(0); - data = ByteSource.wrap(string.getBytes(charset)); - } - - @Benchmark - public int timeCopy(int reps) throws IOException { - int r = 0; - final Charset localCharset = charset; - final ByteSource localData = data; - final ReadStrategy localStrategy = strategy; - for (int i = 0; i < reps; i++) { - r += localStrategy.read(localData, localCharset).hashCode(); - } - return r; - } -} diff --git a/android/guava/src/com/google/common/io/ByteSource.java b/android/guava/src/com/google/common/io/ByteSource.java index 7fa64b89c8ee..1beb9cacefca 100644 --- a/android/guava/src/com/google/common/io/ByteSource.java +++ b/android/guava/src/com/google/common/io/ByteSource.java @@ -455,18 +455,6 @@ public Reader openStream() throws IOException { return new InputStreamReader(ByteSource.this.openStream(), charset); } - @Override - public String read() throws IOException { - // Reading all the data as a byte array is more efficient than the default read() - // implementation because: - // 1. the string constructor can avoid an extra copy most of the time by correctly sizing the - // internal char array (hard to avoid using StringBuilder) - // 2. we avoid extra copies into temporary buffers altogether - // The downside is that this will cause us to store the file bytes in memory twice for a short - // amount of time. - return new String(ByteSource.this.read(), charset); - } - @Override public String toString() { return ByteSource.this.toString() + ".asCharSource(" + charset + ")"; diff --git a/guava-tests/benchmark/com/google/common/io/ByteSourceAsCharSourceReadBenchmark.java b/guava-tests/benchmark/com/google/common/io/ByteSourceAsCharSourceReadBenchmark.java deleted file mode 100644 index ee81ea1259b2..000000000000 --- a/guava-tests/benchmark/com/google/common/io/ByteSourceAsCharSourceReadBenchmark.java +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (C) 2017 The Guava Authors - * - * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except - * in compliance with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software distributed under the License - * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express - * or implied. See the License for the specific language governing permissions and limitations under - * the License. - */ - -package com.google.common.io; - -import com.google.caliper.BeforeExperiment; -import com.google.caliper.Benchmark; -import com.google.caliper.Param; -import com.google.caliper.api.VmOptions; -import com.google.common.base.Optional; -import java.io.IOException; -import java.io.InputStreamReader; -import java.nio.charset.Charset; -import java.util.Random; - -/** - * Benchmarks for various potential implementations of {@code ByteSource.asCharSource(...).read()}. - */ -// These benchmarks allocate a lot of data so use a large heap -@VmOptions({"-Xms12g", "-Xmx12g", "-d64"}) -public class ByteSourceAsCharSourceReadBenchmark { - enum ReadStrategy { - TO_BYTE_ARRAY_NEW_STRING { - @Override - String read(ByteSource byteSource, Charset cs) throws IOException { - return new String(byteSource.read(), cs); - } - }, - USING_CHARSTREAMS_COPY { - @Override - String read(ByteSource byteSource, Charset cs) throws IOException { - StringBuilder sb = new StringBuilder(); - try (InputStreamReader reader = new InputStreamReader(byteSource.openStream(), cs)) { - CharStreams.copy(reader, sb); - } - return sb.toString(); - } - }, - // It really seems like this should be faster than TO_BYTE_ARRAY_NEW_STRING. But it just isn't - // my best guess is that the jdk authors have spent more time optimizing that callpath than this - // one. (StringCoding$StringDecoder vs. StreamDecoder). StringCoding has a ton of special cases - // theoretically we could duplicate all that logic here to try to beat 'new String' or at least - // come close. - USING_DECODER_WITH_SIZE_HINT { - @Override - String read(ByteSource byteSource, Charset cs) throws IOException { - Optional size = byteSource.sizeIfKnown(); - // if we know the size and it fits in an int - if (size.isPresent() && size.get().longValue() == size.get().intValue()) { - // otherwise try to presize a StringBuilder - // it is kind of lame that we need to construct a decoder to access this value. - // if this is a concern we could add special cases for some known charsets (like utf8) - // or we could avoid inputstreamreader and use the decoder api directly - // TODO(lukes): in a real implementation we would need to handle overflow conditions - int maxChars = (int) (size.get().intValue() * cs.newDecoder().maxCharsPerByte()); - char[] buffer = new char[maxChars]; - int bufIndex = 0; - int remaining = buffer.length; - try (InputStreamReader reader = new InputStreamReader(byteSource.openStream(), cs)) { - int nRead = 0; - while (remaining > 0 && (nRead = reader.read(buffer, bufIndex, remaining)) != -1) { - bufIndex += nRead; - remaining -= nRead; - } - if (nRead == -1) { - // we reached EOF - return new String(buffer, 0, bufIndex); - } - // otherwise we got the size wrong. This can happen if the size changes between when - // we called sizeIfKnown and when we started reading the file (or i guess if - // maxCharsPerByte is wrong) - // Fallback to an incremental approach - StringBuilder builder = new StringBuilder(bufIndex + 32); - builder.append(buffer, 0, bufIndex); - buffer = null; // release for gc - CharStreams.copy(reader, builder); - return builder.toString(); - } - - } else { - return TO_BYTE_ARRAY_NEW_STRING.read(byteSource, cs); - } - } - }; - - abstract String read(ByteSource byteSource, Charset cs) throws IOException; - } - - @Param({"UTF-8"}) - String charsetName; - - @Param ReadStrategy strategy; - - @Param({"10", "1024", "1048576"}) - int size; - - Charset charset; - ByteSource data; - - @BeforeExperiment - public void setUp() { - charset = Charset.forName(charsetName); - StringBuilder sb = new StringBuilder(); - Random random = new Random(0xdeadbeef); // for unpredictable but reproducible behavior - sb.ensureCapacity(size); - for (int k = 0; k < size; k++) { - // [9-127) includes all ascii non-control characters - sb.append((char) (random.nextInt(127 - 9) + 9)); - } - String string = sb.toString(); - sb.setLength(0); - data = ByteSource.wrap(string.getBytes(charset)); - } - - @Benchmark - public int timeCopy(int reps) throws IOException { - int r = 0; - final Charset localCharset = charset; - final ByteSource localData = data; - final ReadStrategy localStrategy = strategy; - for (int i = 0; i < reps; i++) { - r += localStrategy.read(localData, localCharset).hashCode(); - } - return r; - } -} diff --git a/guava/src/com/google/common/io/ByteSource.java b/guava/src/com/google/common/io/ByteSource.java index 7fa64b89c8ee..1beb9cacefca 100644 --- a/guava/src/com/google/common/io/ByteSource.java +++ b/guava/src/com/google/common/io/ByteSource.java @@ -455,18 +455,6 @@ public Reader openStream() throws IOException { return new InputStreamReader(ByteSource.this.openStream(), charset); } - @Override - public String read() throws IOException { - // Reading all the data as a byte array is more efficient than the default read() - // implementation because: - // 1. the string constructor can avoid an extra copy most of the time by correctly sizing the - // internal char array (hard to avoid using StringBuilder) - // 2. we avoid extra copies into temporary buffers altogether - // The downside is that this will cause us to store the file bytes in memory twice for a short - // amount of time. - return new String(ByteSource.this.read(), charset); - } - @Override public String toString() { return ByteSource.this.toString() + ".asCharSource(" + charset + ")";