pw_tokenizer: Add support for detokenizing nested Base64 tokens

Change-Id: I37a602015b6b1e617a91b5336b79e9e91983075b Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/93603 Reviewed-by: Wyatt Hepler <[email protected]> Commit-Queue: Asad Memon <[email protected]>
google · May 9, 2022 · 64c083b · 64c083b
1 parent ef742fd
commit 64c083b
Show file tree

Hide file tree

Showing 3 changed files with 51 additions and 4 deletions.
diff --git a/pw_tokenizer/docs.rst b/pw_tokenizer/docs.rst
@@ -900,6 +900,7 @@ package, and instantiate it with a CSV token database.
    }
 
 For messages that are encoded in Base64, use ``Detokenizer::detokenizeBase64``.
+`detokenizeBase64` will also attempt to detokenize nested Base64 tokens.
 
 Protocol buffers
 ----------------

diff --git a/pw_tokenizer/ts/detokenizer.ts b/pw_tokenizer/ts/detokenizer.ts
@@ -18,6 +18,7 @@ import {Frame} from '@pigweed/pw_hdlc';
 import {TokenDatabase} from './token_database';
 import {PrintfDecoder} from './printf_decoder';
 
+const MAX_RECURSIONS = 9;
 const BASE64CHARS = '[A-Za-z0-9+/-_]';
 const PATTERN = new RegExp(
   // Base64 tokenized strings start with the prefix character ($)
@@ -66,14 +67,32 @@ export class Detokenizer {
    * If the frame doesn't match any token from database, the frame will be
    * returned as string as-is.
    */
-  detokenizeBase64(tokenizedFrame: Frame): string {
-    const base64Frame = new TextDecoder().decode(tokenizedFrame.data);
-    return base64Frame.replace(PATTERN, base64Substring => {
+  detokenizeBase64(
+    tokenizedFrame: Frame,
+    maxRecursion: number = MAX_RECURSIONS
+  ): string {
+    const base64String = new TextDecoder().decode(tokenizedFrame.data);
+    return this.detokenizeBase64String(base64String, maxRecursion);
+  }
+
+  private detokenizeBase64String(
+    base64String: string,
+    recursions: number
+  ): string {
+    return base64String.replace(PATTERN, base64Substring => {
       const {token, args} = this.decodeBase64TokenFrame(base64Substring);
       const format = this.database.get(token);
       // Parse arguments if this is printf-style text.
       if (format) {
-        return new PrintfDecoder().decode(String(format), args);
+        const decodedOriginal = new PrintfDecoder().decode(
+          String(format),
+          args
+        );
+        // Detokenize nested Base64 tokens and their arguments.
+        if (recursions > 0) {
+          return this.detokenizeBase64String(decodedOriginal, recursions - 1);
+        }
+        return decodedOriginal;
       }
       return base64Substring;
     });

diff --git a/pw_tokenizer/ts/detokenizer_test.ts b/pw_tokenizer/ts/detokenizer_test.ts
@@ -21,6 +21,9 @@ import {Detokenizer} from './detokenizer';
 const CSV = `
 64636261,          ,"regular token"
 86fc33f3,          ,"base64 token"
+0d6bd33c,          ,"Regular Token: %s and Nested Token: %s"
+97185e6f,          ,"(token: %s, string: %s, int: %d, float: %f)"
+451d86ed,          ,"Cat"
 `;
 
 function generateFrame(text: string): Frame {
@@ -51,4 +54,28 @@ describe('Detokenizer', () => {
       '$8zP7hg=='
     );
   });
+  it('recursive detokenize all nested base64 tokens', () => {
+    expect(
+      detokenizer.detokenizeBase64(
+        generateFrame(
+          '$PNNrDQkkN1lZZFJRPT0lJGIxNFlsd2trTjFsWlpGSlJQVDBGUTJGdFpXeFlwSENkUHc9PQ=='
+        )
+      )
+    ).toEqual(
+      'Regular Token: Cat and Nested Token: (token: Cat, string: Camel, int: 44, float: 1.2300000190734863)'
+    );
+  });
+
+  it('recursion detokenize with limits on max recursion', () => {
+    expect(
+      detokenizer.detokenizeBase64(
+        generateFrame(
+          '$PNNrDQkkN1lZZFJRPT0lJGIxNFlsd2trTjFsWlpGSlJQVDBGUTJGdFpXeFlwSENkUHc9PQ=='
+        ),
+        1
+      )
+    ).toEqual(
+      'Regular Token: Cat and Nested Token: (token: $7YYdRQ==, string: Camel, int: 44, float: 1.2300000190734863)'
+    );
+  });
 });
-Original file line number
+Diff line change
@@ Expand Up / @@ -900,6 +900,7 @@ package, and instantiate it with a CSV token database. @@
        }
     For messages that are encoded in Base64, use ``Detokenizer::detokenizeBase64``.
+    `detokenizeBase64` will also attempt to detokenize nested Base64 tokens.
     Protocol buffers
     ----------------
@@ Expand Down @@