From 40f668a2124a090f9b879e92c574f58e89444182 Mon Sep 17 00:00:00 2001 From: Oscar Boykin Date: Fri, 22 Jan 2016 11:11:06 -1000 Subject: [PATCH 1/2] Make string Injections safer. Fix #199 --- .../bijection/AtomicSharedMutable.scala | 21 ++++++++++++++++++ .../twitter/bijection/StringInjections.scala | 22 +++++++++++++++---- .../bijection/StringBijectionLaws.scala | 2 +- 3 files changed, 40 insertions(+), 5 deletions(-) create mode 100644 bijection-core/src/main/scala/com/twitter/bijection/AtomicSharedMutable.scala diff --git a/bijection-core/src/main/scala/com/twitter/bijection/AtomicSharedMutable.scala b/bijection-core/src/main/scala/com/twitter/bijection/AtomicSharedMutable.scala new file mode 100644 index 000000000..fa0b83eaa --- /dev/null +++ b/bijection-core/src/main/scala/com/twitter/bijection/AtomicSharedMutable.scala @@ -0,0 +1,21 @@ +package com.twitter.bijection + +import java.util.concurrent.atomic.AtomicReference + +/** + * This class is for sharing an object between threads + * when allocation of a new one is not that cheap, but + * better than blocking. + */ +private[bijection] class AtomicSharedState[T <: AnyRef](cons: () => T) { + private[this] val ref = new AtomicReference[T](cons()) + + def get: T = { + val borrow = ref.getAndSet(null.asInstanceOf[T]) + if (null == borrow) cons() + else borrow + } + def release(t: T): Unit = { + ref.lazySet(t) + } +} diff --git a/bijection-core/src/main/scala/com/twitter/bijection/StringInjections.scala b/bijection-core/src/main/scala/com/twitter/bijection/StringInjections.scala index de1db2085..13aad4650 100644 --- a/bijection-core/src/main/scala/com/twitter/bijection/StringInjections.scala +++ b/bijection-core/src/main/scala/com/twitter/bijection/StringInjections.scala @@ -16,13 +16,14 @@ limitations under the License. package com.twitter.bijection +import com.twitter.bijection.Inversion.attempt import java.net.{ URLDecoder, URLEncoder, URL } -import java.util.UUID +import java.nio.charset.{Charset, CodingErrorAction} +import java.nio.ByteBuffer +import java.util.UUID import scala.annotation.tailrec import scala.collection.generic.CanBuildFrom - -import com.twitter.bijection.Inversion.attempt import scala.util.Try trait StringInjections extends NumericInjections { @@ -32,9 +33,22 @@ trait StringInjections extends NumericInjections { def withEncoding(encoding: String): Injection[String, Array[Byte]] = new AbstractInjection[String, Array[Byte]] { + private[this] val decRef = { + val cs = Charset.forName(encoding) + new AtomicSharedState(() => cs.newDecoder.onUnmappableCharacter(CodingErrorAction.REPORT)) + } + def apply(s: String) = s.getBytes(encoding) override def invert(b: Array[Byte]) = - attempt(b) { new String(_, encoding) } + // toString on ByteBuffer is nicer, so use it + attempt(ByteBuffer.wrap(b)) { bb => + //these are mutable, so it can't be shared trivially + //avoid GC pressure and (probably) perform better + val dec = decRef.get + val str = dec.decode(bb).toString + decRef.release(dec) + str + } } // Some bijections with string from standard java/scala classes: diff --git a/bijection-core/src/test/scala/com/twitter/bijection/StringBijectionLaws.scala b/bijection-core/src/test/scala/com/twitter/bijection/StringBijectionLaws.scala index 095e9e7f4..e6d592906 100644 --- a/bijection-core/src/test/scala/com/twitter/bijection/StringBijectionLaws.scala +++ b/bijection-core/src/test/scala/com/twitter/bijection/StringBijectionLaws.scala @@ -38,7 +38,7 @@ object StringArbs extends BaseProperties { class StringBijectionLaws extends CheckProperties with BaseProperties { property("round trips string -> Array[String]") { - isLooseInjection[String, Array[Byte]] + isInjection[String, Array[Byte]] } implicit val symbol = arbitraryViaFn { (s: String) => Symbol(s) } From 42c0503acbd09506e2128b5747385a8011709c1e Mon Sep 17 00:00:00 2001 From: Oscar Boykin Date: Fri, 22 Jan 2016 12:50:08 -1000 Subject: [PATCH 2/2] Cache ths CharBuffer --- .../twitter/bijection/StringInjections.scala | 37 ++++++++++++++----- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/bijection-core/src/main/scala/com/twitter/bijection/StringInjections.scala b/bijection-core/src/main/scala/com/twitter/bijection/StringInjections.scala index 13aad4650..869032ae3 100644 --- a/bijection-core/src/main/scala/com/twitter/bijection/StringInjections.scala +++ b/bijection-core/src/main/scala/com/twitter/bijection/StringInjections.scala @@ -18,9 +18,8 @@ package com.twitter.bijection import com.twitter.bijection.Inversion.attempt import java.net.{ URLDecoder, URLEncoder, URL } -import java.nio.charset.{Charset, CodingErrorAction} -import java.nio.ByteBuffer - +import java.nio.charset.{ Charset, CoderResult, CodingErrorAction } +import java.nio.{ ByteBuffer, CharBuffer } import java.util.UUID import scala.annotation.tailrec import scala.collection.generic.CanBuildFrom @@ -33,10 +32,14 @@ trait StringInjections extends NumericInjections { def withEncoding(encoding: String): Injection[String, Array[Byte]] = new AbstractInjection[String, Array[Byte]] { - private[this] val decRef = { - val cs = Charset.forName(encoding) - new AtomicSharedState(() => cs.newDecoder.onUnmappableCharacter(CodingErrorAction.REPORT)) - } + private[this] val decRef = + new AtomicSharedState({ () => + val dec = Charset.forName(encoding) + .newDecoder + .onUnmappableCharacter(CodingErrorAction.REPORT) + val buf = CharBuffer.allocate(1024) // something big enough, if not big enough, allocate + (dec, buf) + }) def apply(s: String) = s.getBytes(encoding) override def invert(b: Array[Byte]) = @@ -44,9 +47,23 @@ trait StringInjections extends NumericInjections { attempt(ByteBuffer.wrap(b)) { bb => //these are mutable, so it can't be shared trivially //avoid GC pressure and (probably) perform better - val dec = decRef.get - val str = dec.decode(bb).toString - decRef.release(dec) + val decBuf = decRef.get + val dec = decBuf._1 + val buf = decBuf._2 + val maxSpace = (b.length * dec.maxCharsPerByte).toInt + 1 + val thisBuf = if (maxSpace > buf.limit) CharBuffer.allocate(maxSpace) else buf + + // this is the error free result + @inline def assertUnderFlow(cr: CoderResult) { if(!cr.isUnderflow) cr.throwException } + assertUnderFlow(dec.reset.decode(bb, thisBuf, true)) + assertUnderFlow(dec.flush(thisBuf)) + // set the limit to be the position + thisBuf.flip + val str = thisBuf.toString + // make sure the buffer we store is clear. + buf.clear + // we don't reset with the larger buffer to avoid memory leaks + decRef.release(decBuf) str } }