Skip to content

Commit

Permalink
pw_tokenizer: Add a basic typescript implementation
Browse files Browse the repository at this point in the history
Change-Id: Iae9528e65f5d39d7198c544d74901722f6512948
Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/92460
Reviewed-by: Anthony DiGirolamo <[email protected]>
Reviewed-by: Wyatt Hepler <[email protected]>
Commit-Queue: Asad Memon <[email protected]>
  • Loading branch information
asadm authored and CQ Bot Account committed May 2, 2022
1 parent 199208c commit 74faca4
Show file tree
Hide file tree
Showing 15 changed files with 661 additions and 5 deletions.
2 changes: 2 additions & 0 deletions pw_presubmit/py/pw_presubmit/pigweed_presubmit.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,6 +375,7 @@ def cmake_gcc(ctx: PresubmitContext):
'//pw_sys_io_baremetal_stm32f429/...',
'//pw_sys_io_stdio/...',
'//pw_thread_stl/...',
'//pw_tokenizer/ts/...',
'//pw_tool/...',
'//pw_toolchain/...',
'//pw_transfer/...',
Expand Down Expand Up @@ -411,6 +412,7 @@ def cmake_gcc(ctx: PresubmitContext):
'//pw_stream/...',
'//pw_string/...',
'//pw_thread_stl/...',
'//pw_tokenizer/ts/...',
'//pw_transfer/...',
'//pw_unit_test/...',
'//pw_varint/...',
Expand Down
25 changes: 23 additions & 2 deletions pw_tokenizer/docs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ pw_tokenizer
:bdg-primary:`device`
:bdg-secondary:`Python`
:bdg-secondary:`C++`
:bdg-secondary:`TypeScript`
:bdg-success:`stable`

Logging is critical, but developers are often forced to choose between
Expand Down Expand Up @@ -768,8 +769,8 @@ with the ``paths`` option.
Detokenization
==============
Detokenization is the process of expanding a token to the string it represents
and decoding its arguments. This module provides Python and C++ detokenization
libraries.
and decoding its arguments. This module provides Python, C++ and TypeScript
detokenization libraries.

**Example: decoding tokenized logs**

Expand Down Expand Up @@ -880,6 +881,26 @@ this check can be done at compile time.
return Detokenizer(kDefaultDatabase);
}
TypeScript
----------
To detokenize in TypeScript, import ``Detokenizer`` from the ``pw_tokenizer``
package, and instantiate it with a CSV token database.

.. code-block:: typescript
import {Detokenizer} from "@pigweed/pw_tokenizer";
import {Frame} from '@pigweed/pw_hdlc';
const detokenizer = new Detokenizer(String(tokenCsv));
function processLog(frame: Frame){
const result = detokenizer.detokenize(frame);
console.log(result);
}
For messages that are encoded in Base64, use ``Detokenizer::detokenizeBase64``.

Protocol buffers
----------------
``pw_tokenizer`` provides utilities for handling tokenized fields in protobufs.
Expand Down
64 changes: 64 additions & 0 deletions pw_tokenizer/ts/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
# Copyright 2022 The Pigweed Authors
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not
# use this file except in compliance with the License. You may obtain a copy of
# the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.

load("@build_bazel_rules_nodejs//:index.bzl", "js_library")
load("@npm//@bazel/typescript:index.bzl", "ts_library", "ts_project")
load("@npm//@bazel/jasmine:index.bzl", "jasmine_node_test")

package(default_visibility = ["//visibility:public"])

ts_project(
name = "lib",
srcs = [
"detokenizer.ts",
"index.ts",
"printf_decoder.ts",
"token_database.ts",
],
declaration = True,
source_map = True,
deps = [
"//pw_hdlc/ts:pw_hdlc",
"@npm//:node_modules",
], # can't use fine-grained deps
)

js_library(
name = "pw_tokenizer",
package_name = "@pigweed/pw_tokenizer",
srcs = ["package.json"],
deps = [":lib"],
)

ts_library(
name = "tokenizer_test_lib",
srcs = [
"detokenizer_test.ts",
"printf_decoder_test.ts",
],
deps = [
":lib",
"//pw_hdlc/ts:pw_hdlc",
"@npm//@types/jasmine",
"@npm//@types/node",
"@npm//buffer",
],
)

jasmine_node_test(
name = "tokenizer_test",
srcs = [
":tokenizer_test_lib",
],
)
109 changes: 109 additions & 0 deletions pw_tokenizer/ts/detokenizer.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
// Copyright 2022 The Pigweed Authors
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy of
// the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations under
// the License.

/** Decodes and detokenizes strings from binary or Base64 input. */
import {Buffer} from 'buffer';
import {Frame} from '@pigweed/pw_hdlc';
import {TokenDatabase} from './token_database';
import {PrintfDecoder} from './printf_decoder';

const BASE64CHARS = '[A-Za-z0-9+/-_]';
const PATTERN = new RegExp(
// Base64 tokenized strings start with the prefix character ($)
'\\$' +
// Tokenized strings contain 0 or more blocks of four Base64 chars.
`(?:${BASE64CHARS}{4})*` +
// The last block of 4 chars may have one or two padding chars (=).
`(?:${BASE64CHARS}{3}=|${BASE64CHARS}{2}==)?`,
'g'
);

interface TokenAndArgs {
token: number;
args: Uint8Array;
}

export class Detokenizer {
private database: TokenDatabase;

constructor(csvDatabase: string) {
this.database = new TokenDatabase(csvDatabase);
}

/**
* Detokenize frame data into actual string messages using the provided
* token database.
*
* If the frame doesn't match any token from database, the frame will be
* returned as string as-is.
*/
detokenize(tokenizedFrame: Frame): string {
const {token, args} = this.decodeTokenFrame(tokenizedFrame);
// Parse arguments if this is printf-style text.
const format = this.database.get(token);
if (format) {
return new PrintfDecoder().decode(String(format), args);
}

return new TextDecoder().decode(tokenizedFrame.data);
}

/**
* Detokenize Base64-encoded frame data into actual string messages using the
* provided token database.
*
* If the frame doesn't match any token from database, the frame will be
* returned as string as-is.
*/
detokenizeBase64(tokenizedFrame: Frame): string {
const base64Frame = new TextDecoder().decode(tokenizedFrame.data);
return base64Frame.replace(PATTERN, base64Substring => {
const {token, args} = this.decodeBase64TokenFrame(base64Substring);
const format = this.database.get(token);
// Parse arguments if this is printf-style text.
if (format) {
return new PrintfDecoder().decode(String(format), args);
}
return base64Substring;
});
}

private decodeTokenFrame(frame: Frame): TokenAndArgs {
const token = new DataView(
frame.data.buffer,
frame.data.byteOffset,
4
).getUint32(0, true);
const args = new Uint8Array(frame.data.buffer.slice(4));

return {token, args};
}

private decodeBase64TokenFrame(base64Data: string): TokenAndArgs {
// Remove the prefix '$' and convert from Base64.
const prefixRemoved = base64Data.slice(1);
const noBase64 = Buffer.from(prefixRemoved, 'base64').toString('binary');
// Convert back to bytes and return token and arguments.
const bytes = noBase64.split('').map(ch => ch.charCodeAt(0));
const uIntArray = new Uint8Array(bytes);
const token = new DataView(
uIntArray.buffer,
uIntArray.byteOffset,
4
).getUint32(0, true);
const args = new Uint8Array(bytes.slice(4));

return {token, args};
}
}
54 changes: 54 additions & 0 deletions pw_tokenizer/ts/detokenizer_test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
// Copyright 2022 The Pigweed Authors
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy of
// the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations under
// the License.

/* eslint-env browser, jasmine */
import 'jasmine';

import {Frame, Encoder, Decoder} from '@pigweed/pw_hdlc';
import {Detokenizer} from './detokenizer';

const CSV = `
64636261, ,"regular token"
86fc33f3, ,"base64 token"
`;

function generateFrame(text: string): Frame {
const uintArray = new TextEncoder().encode(text);
const encodedFrame = new Encoder().uiFrame(1, uintArray);
const decodedFrames = Array.from(new Decoder().process(encodedFrame));
return decodedFrames[0];
}

describe('Detokenizer', () => {
let detokenizer: Detokenizer;

beforeEach(() => {
detokenizer = new Detokenizer(CSV);
});

it('parses a base64 correct frame properly', () => {
const frame = generateFrame('$8zP8hg==');
expect(detokenizer.detokenizeBase64(frame)).toEqual('base64 token');
});
it('parses a correct frame properly', () => {
const frame = generateFrame('abcde');
expect(detokenizer.detokenize(frame)).toEqual('regular token');
});
it('failure to detokenize returns original string', () => {
expect(detokenizer.detokenize(generateFrame('aabbcc'))).toEqual('aabbcc');
expect(detokenizer.detokenizeBase64(generateFrame('$8zP7hg=='))).toEqual(
'$8zP7hg=='
);
});
});
16 changes: 16 additions & 0 deletions pw_tokenizer/ts/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// Copyright 2022 The Pigweed Authors
//
// Licensed under the Apache License, Version 2.0 (the "License"); you may not
// use this file except in compliance with the License. You may obtain a copy of
// the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
// License for the specific language governing permissions and limitations under
// the License.

export {Detokenizer} from './detokenizer';
export {PrintfDecoder} from './printf_decoder';
18 changes: 18 additions & 0 deletions pw_tokenizer/ts/package.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"name": "@pigweed/pw_tokenizer",
"version": "1.0.0",
"main": "index.js",
"license": "Apache-2.0",
"dependencies": {
"@bazel/jasmine": "^4.1.0",
"@types/crc": "^3.4.0",
"@types/jasmine": "^3.9.0",
"buffer": "^6.0.3",
"crc": "^3.8.0",
"jasmine": "^3.9.0",
"jasmine-core": "^3.9.0"
},
"engines": {
"node": ">=14.0.0"
}
}
Loading

0 comments on commit 74faca4

Please sign in to comment.