Optimize parsing headers for pekko-http and akka-http (#3575)

softwaremill · Mar 8, 2024 · 937a96c · 937a96c
1 parent e76a854
commit 937a96c
Show file tree

Hide file tree

Showing 6 changed files with 62 additions and 4 deletions.
diff --git a/server/akka-http-server/src/main/scala/sttp/tapir/server/akkahttp/AkkaModel.scala b/server/akka-http-server/src/main/scala/sttp/tapir/server/akkahttp/AkkaModel.scala
@@ -13,8 +13,8 @@ private[akkahttp] object AkkaModel {
 
   def parseHeadersOrThrowWithoutContentHeaders(hs: HasHeaders): Seq[HttpHeader] =
     hs.headers
-      .map(parseHeaderOrThrow)
       .filterNot(h => h.is(ctHeaderNameLowerCase) || h.is(clHeaderNameLowerCase) || h.is(teHeaderNameLowerCase))
+      .map(parseHeaderOrThrow)
 
   def parseHeaderOrThrow(h: Header): HttpHeader =
     HttpHeader.parse(h.name, h.value) match {

diff --git a/server/akka-http-server/src/main/scala/sttp/tapir/server/akkahttp/AkkaToResponseBody.scala b/server/akka-http-server/src/main/scala/sttp/tapir/server/akkahttp/AkkaToResponseBody.scala
@@ -132,7 +132,7 @@ private[akkahttp] class AkkaToResponseBody(implicit m: Materializer, ec: Executi
   }
 
   private def parseContentType(ct: String): ContentType =
-    ContentType.parse(ct).getOrElse(throw new IllegalArgumentException(s"Cannot parse content type: $ct"))
+    ContentTypeCache.getOrParse(ct)
 
   private def charsetToHttpCharset(charset: Charset): HttpCharset = HttpCharset.custom(charset.name())
 

diff --git a/server/akka-http-server/src/main/scala/sttp/tapir/server/akkahttp/ContentTypeCache.scala b/server/akka-http-server/src/main/scala/sttp/tapir/server/akkahttp/ContentTypeCache.scala
@@ -0,0 +1,29 @@
+package sttp.tapir.server.akkahttp
+
+import akka.http.scaladsl.model.ContentType
+import scala.collection.concurrent.TrieMap
+
+/** Pekko-specific ConentType has to be created if an endpoint overrides it, but we want to reduce overhead of the expensive
+  * ContentType.parse operation if possible. Parsing may also happen for cases not listed explictly in
+  * PekkoToResponseBody.formatToContentType. This cache doesn't have to save atomically, because the worst case scenario is that we parse he
+  * same header a few times before it's saved. The cache is not cleared, because the number of different content types is limited and the
+  * cache is not expected to grow too much. The only exception is when there is a boundary in the header, but in such situation the endpoint
+  * contentType shouldn't be overriden. Just in case this happens, we limit the cache size.
+  */
+private[akkahttp] object ContentTypeCache {
+  private val cache = TrieMap[String, ContentType]()
+  private val Limit = 100
+
+  def getOrParse(headerValue: String): ContentType = {
+    cache.get(headerValue) match {
+      case Some(contentType) =>
+        contentType
+      case None =>
+        val contentType =
+          ContentType.parse(headerValue).getOrElse(throw new IllegalArgumentException(s"Cannot parse content type: $headerValue"))
+        // We don't want to fill the cache with parameterized media types (BTW charset does not appear in params)
+        val _ = if (cache.size <= Limit && contentType.mediaType.params.isEmpty) cache.putIfAbsent(headerValue, contentType)
+        contentType
+    }
+  }
+}
diff --git a/server/pekko-http-server/src/main/scala/sttp/tapir/server/pekkohttp/ContentTypeCache.scala b/server/pekko-http-server/src/main/scala/sttp/tapir/server/pekkohttp/ContentTypeCache.scala
@@ -0,0 +1,29 @@
+package sttp.tapir.server.pekkohttp
+
+import org.apache.pekko.http.scaladsl.model.ContentType
+import scala.collection.concurrent.TrieMap
+
+/** Pekko-specific ConentType has to be created if an endpoint overrides it, but we want to reduce overhead of the expensive
+  * ContentType.parse operation if possible. Parsing may also happen for cases not listed explictly in
+  * PekkoToResponseBody.formatToContentType. This cache doesn't have to save atomically, because the worst case scenario is that we parse he
+  * same header a few times before it's saved. The cache is not cleared, because the number of different content types is limited and the
+  * cache is not expected to grow too much. The only exception is when there is a boundary in the header, but in such situation the endpoint
+  * contentType shouldn't be overriden. Just in case this happens, we limit the cache size.
+  */
+private[pekkohttp] object ContentTypeCache {
+  private val cache = TrieMap[String, ContentType]()
+  private val Limit = 100
+
+  def getOrParse(headerValue: String): ContentType = {
+    cache.get(headerValue) match {
+      case Some(contentType) =>
+        contentType
+      case None =>
+        val contentType =
+          ContentType.parse(headerValue).getOrElse(throw new IllegalArgumentException(s"Cannot parse content type: $headerValue"))
+        // We don't want to fill the cache with parameterized media types (BTW charset does not appear in params)
+        val _ = if (cache.size <= Limit && contentType.mediaType.params.isEmpty) cache.putIfAbsent(headerValue, contentType)
+        contentType
+    }
+  }
+}
diff --git a/server/pekko-http-server/src/main/scala/sttp/tapir/server/pekkohttp/PekkoModel.scala b/server/pekko-http-server/src/main/scala/sttp/tapir/server/pekkohttp/PekkoModel.scala
@@ -13,8 +13,8 @@ private[pekkohttp] object PekkoModel {
 
   def parseHeadersOrThrowWithoutContentHeaders(hs: HasHeaders): Seq[HttpHeader] =
     hs.headers
-      .map(parseHeaderOrThrow)
       .filterNot(h => h.is(ctHeaderNameLowerCase) || h.is(clHeaderNameLowerCase) || h.is(teHeaderNameLowerCase))
+      .map(parseHeaderOrThrow)
 
   def parseHeaderOrThrow(h: Header): HttpHeader =
     HttpHeader.parse(h.name, h.value) match {

diff --git a/...er/pekko-http-server/src/main/scala/sttp/tapir/server/pekkohttp/PekkoToResponseBody.scala b/...er/pekko-http-server/src/main/scala/sttp/tapir/server/pekkohttp/PekkoToResponseBody.scala
@@ -132,7 +132,7 @@ private[pekkohttp] class PekkoToResponseBody(implicit m: Materializer, ec: Execu
   }
 
   private def parseContentType(ct: String): ContentType =
-    ContentType.parse(ct).getOrElse(throw new IllegalArgumentException(s"Cannot parse content type: $ct"))
+    ContentTypeCache.getOrParse(ct)
 
   private def charsetToHttpCharset(charset: Charset): HttpCharset = HttpCharset.custom(charset.name())