Skip to content

Commit

Permalink
pw_tokenizer: Add support for detokenizing nested Base64 tokens
Browse files Browse the repository at this point in the history
Change-Id: I37a602015b6b1e617a91b5336b79e9e91983075b
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/93603
Reviewed-by: Wyatt Hepler <[email protected]>
Commit-Queue: Asad Memon <[email protected]>
  • Loading branch information
asadm authored and CQ Bot Account committed May 9, 2022
1 parent ef742fd commit 64c083b
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 4 deletions.
1 change: 1 addition & 0 deletions pw_tokenizer/docs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -900,6 +900,7 @@ package, and instantiate it with a CSV token database.
}
For messages that are encoded in Base64, use ``Detokenizer::detokenizeBase64``.
`detokenizeBase64` will also attempt to detokenize nested Base64 tokens.

Protocol buffers
----------------
Expand Down
27 changes: 23 additions & 4 deletions pw_tokenizer/ts/detokenizer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import {Frame} from '@pigweed/pw_hdlc';
import {TokenDatabase} from './token_database';
import {PrintfDecoder} from './printf_decoder';

const MAX_RECURSIONS = 9;
const BASE64CHARS = '[A-Za-z0-9+/-_]';
const PATTERN = new RegExp(
// Base64 tokenized strings start with the prefix character ($)
Expand Down Expand Up @@ -66,14 +67,32 @@ export class Detokenizer {
* If the frame doesn't match any token from database, the frame will be
* returned as string as-is.
*/
detokenizeBase64(tokenizedFrame: Frame): string {
const base64Frame = new TextDecoder().decode(tokenizedFrame.data);
return base64Frame.replace(PATTERN, base64Substring => {
detokenizeBase64(
tokenizedFrame: Frame,
maxRecursion: number = MAX_RECURSIONS
): string {
const base64String = new TextDecoder().decode(tokenizedFrame.data);
return this.detokenizeBase64String(base64String, maxRecursion);
}

private detokenizeBase64String(
base64String: string,
recursions: number
): string {
return base64String.replace(PATTERN, base64Substring => {
const {token, args} = this.decodeBase64TokenFrame(base64Substring);
const format = this.database.get(token);
// Parse arguments if this is printf-style text.
if (format) {
return new PrintfDecoder().decode(String(format), args);
const decodedOriginal = new PrintfDecoder().decode(
String(format),
args
);
// Detokenize nested Base64 tokens and their arguments.
if (recursions > 0) {
return this.detokenizeBase64String(decodedOriginal, recursions - 1);
}
return decodedOriginal;
}
return base64Substring;
});
Expand Down
27 changes: 27 additions & 0 deletions pw_tokenizer/ts/detokenizer_test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,9 @@ import {Detokenizer} from './detokenizer';
const CSV = `
64636261, ,"regular token"
86fc33f3, ,"base64 token"
0d6bd33c, ,"Regular Token: %s and Nested Token: %s"
97185e6f, ,"(token: %s, string: %s, int: %d, float: %f)"
451d86ed, ,"Cat"
`;

function generateFrame(text: string): Frame {
Expand Down Expand Up @@ -51,4 +54,28 @@ describe('Detokenizer', () => {
'$8zP7hg=='
);
});
it('recursive detokenize all nested base64 tokens', () => {
expect(
detokenizer.detokenizeBase64(
generateFrame(
'$PNNrDQkkN1lZZFJRPT0lJGIxNFlsd2trTjFsWlpGSlJQVDBGUTJGdFpXeFlwSENkUHc9PQ=='
)
)
).toEqual(
'Regular Token: Cat and Nested Token: (token: Cat, string: Camel, int: 44, float: 1.2300000190734863)'
);
});

it('recursion detokenize with limits on max recursion', () => {
expect(
detokenizer.detokenizeBase64(
generateFrame(
'$PNNrDQkkN1lZZFJRPT0lJGIxNFlsd2trTjFsWlpGSlJQVDBGUTJGdFpXeFlwSENkUHc9PQ=='
),
1
)
).toEqual(
'Regular Token: Cat and Nested Token: (token: $7YYdRQ==, string: Camel, int: 44, float: 1.2300000190734863)'
);
});
});

0 comments on commit 64c083b

Please sign in to comment.