Performance improvement for sigv4 signing. (#4867)

1. When trimming and removing consecutive spaces during sigv4 normalization, copy word-by-word instead of character-by-character. This reduces the overhead of range and encoding checks in string builder. 2. Increase starting string builder size for canonical headers, to limit resizing (2048 worked well for DynamoDB's get-item). 3. Use a switch statement for whitespace checks instead of consecutive if statements. On my compiler, the switch statement compiles to a jump table which runs quicker.
aws · Feb 1, 2024 · 0ab7f75 · 0ab7f75
1 parent 093501d
commit 0ab7f75
Showing 1 changed file with 45 additions and 25 deletions.
diff --git a/...rc/main/java/software/amazon/awssdk/http/auth/aws/internal/signer/V4CanonicalRequest.java b/...rc/main/java/software/amazon/awssdk/http/auth/aws/internal/signer/V4CanonicalRequest.java
@@ -181,7 +181,10 @@ public static List<Pair<String, List<String>>> getCanonicalHeaders(Map<String, L
      * Each header-value pair is separated by a newline.
      */
     public static String getCanonicalHeadersString(List<Pair<String, List<String>>> canonicalHeaders) {
-        StringBuilder result = new StringBuilder(512);
+        // 2048 chosen experimentally to avoid always needing to resize the string builder's internal byte array.
+        // The minimal DynamoDB get-item request at the time of testing used ~1100 bytes. 2048 was chosen as the
+        // next-highest power-of-two.
+        StringBuilder result = new StringBuilder(2048);
         canonicalHeaders.forEach(header -> {
             result.append(header.left());
             result.append(":");
@@ -246,35 +249,42 @@ private static String getCanonicalRequestString(String httpMethod, String canoni
      * Matcher object as well.
      */
     private static void addAndTrim(StringBuilder result, String value) {
-        int lengthBefore = result.length();
-        boolean isStart = true;
-        boolean previousIsWhiteSpace = false;
-
-        for (int i = 0; i < value.length(); i++) {
-            char ch = value.charAt(i);
-            if (isWhiteSpace(ch)) {
-                if (previousIsWhiteSpace || isStart) {
-                    continue;
-                }
-                result.append(' ');
-                previousIsWhiteSpace = true;
-            } else {
-                result.append(ch);
-                isStart = false;
-                previousIsWhiteSpace = false;
+        int start = 0;
+        int valueLength = value.length();
+
+        // Find first non-whitespace
+        while (isWhiteSpace(value.charAt(start))) {
+            ++start;
+            if (start > valueLength) {
+                return;
             }
         }
 
-        if (lengthBefore == result.length()) {
-            return;
+        // Add things word-by-word
+        int lastWordStart = start;
+        boolean lastWasWhitespace = false;
+        for (int i = start; i < valueLength; i++) {
+            char c = value.charAt(i);
+
+            if (isWhiteSpace(c)) {
+                if (!lastWasWhitespace) {
+                    // End of word, add word
+                    result.append(value, lastWordStart, i);
+                    lastWasWhitespace = true;
+                }
+            } else {
+                if (lastWasWhitespace) {
+                    // Start of new word, add space
+                    result.append(' ');
+                    lastWordStart = i;
+                    lastWasWhitespace = false;
+                }
+            }
         }
 
-        int lastNonWhitespaceChar = result.length() - 1;
-        while (isWhiteSpace(result.charAt(lastNonWhitespaceChar))) {
-            --lastNonWhitespaceChar;
+        if (!lastWasWhitespace) {
+            result.append(value, lastWordStart, valueLength);
         }
-
-        result.setLength(lastNonWhitespaceChar + 1);
     }
 
     /**
@@ -365,7 +375,17 @@ private static String getCanonicalQueryString(SortedMap<String, List<String>> ca
     }
 
     private static boolean isWhiteSpace(char ch) {
-        return ch == ' ' || ch == '\t' || ch == '\n' || ch == '\u000b' || ch == '\r' || ch == '\f';
+        switch (ch) {
+            case ' ':
+            case '\t':
+            case '\n':
+            case '\u000b':
+            case '\r':
+            case '\f':
+                return true;
+            default:
+                return false;
+        }
     }
 
     /**