-
Notifications
You must be signed in to change notification settings - Fork 78
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
pw_tokenizer: Add a basic typescript implementation
Change-Id: Iae9528e65f5d39d7198c544d74901722f6512948 Reviewed-on: https://pigweed-review.googlesource.com/c/pigweed/pigweed/+/92460 Reviewed-by: Anthony DiGirolamo <[email protected]> Reviewed-by: Wyatt Hepler <[email protected]> Commit-Queue: Asad Memon <[email protected]>
- Loading branch information
Showing
15 changed files
with
661 additions
and
5 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
# Copyright 2022 The Pigweed Authors | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); you may not | ||
# use this file except in compliance with the License. You may obtain a copy of | ||
# the License at | ||
# | ||
# https://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT | ||
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the | ||
# License for the specific language governing permissions and limitations under | ||
# the License. | ||
|
||
load("@build_bazel_rules_nodejs//:index.bzl", "js_library") | ||
load("@npm//@bazel/typescript:index.bzl", "ts_library", "ts_project") | ||
load("@npm//@bazel/jasmine:index.bzl", "jasmine_node_test") | ||
|
||
package(default_visibility = ["//visibility:public"]) | ||
|
||
ts_project( | ||
name = "lib", | ||
srcs = [ | ||
"detokenizer.ts", | ||
"index.ts", | ||
"printf_decoder.ts", | ||
"token_database.ts", | ||
], | ||
declaration = True, | ||
source_map = True, | ||
deps = [ | ||
"//pw_hdlc/ts:pw_hdlc", | ||
"@npm//:node_modules", | ||
], # can't use fine-grained deps | ||
) | ||
|
||
js_library( | ||
name = "pw_tokenizer", | ||
package_name = "@pigweed/pw_tokenizer", | ||
srcs = ["package.json"], | ||
deps = [":lib"], | ||
) | ||
|
||
ts_library( | ||
name = "tokenizer_test_lib", | ||
srcs = [ | ||
"detokenizer_test.ts", | ||
"printf_decoder_test.ts", | ||
], | ||
deps = [ | ||
":lib", | ||
"//pw_hdlc/ts:pw_hdlc", | ||
"@npm//@types/jasmine", | ||
"@npm//@types/node", | ||
"@npm//buffer", | ||
], | ||
) | ||
|
||
jasmine_node_test( | ||
name = "tokenizer_test", | ||
srcs = [ | ||
":tokenizer_test_lib", | ||
], | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,109 @@ | ||
// Copyright 2022 The Pigweed Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not | ||
// use this file except in compliance with the License. You may obtain a copy of | ||
// the License at | ||
// | ||
// https://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT | ||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the | ||
// License for the specific language governing permissions and limitations under | ||
// the License. | ||
|
||
/** Decodes and detokenizes strings from binary or Base64 input. */ | ||
import {Buffer} from 'buffer'; | ||
import {Frame} from '@pigweed/pw_hdlc'; | ||
import {TokenDatabase} from './token_database'; | ||
import {PrintfDecoder} from './printf_decoder'; | ||
|
||
const BASE64CHARS = '[A-Za-z0-9+/-_]'; | ||
const PATTERN = new RegExp( | ||
// Base64 tokenized strings start with the prefix character ($) | ||
'\\$' + | ||
// Tokenized strings contain 0 or more blocks of four Base64 chars. | ||
`(?:${BASE64CHARS}{4})*` + | ||
// The last block of 4 chars may have one or two padding chars (=). | ||
`(?:${BASE64CHARS}{3}=|${BASE64CHARS}{2}==)?`, | ||
'g' | ||
); | ||
|
||
interface TokenAndArgs { | ||
token: number; | ||
args: Uint8Array; | ||
} | ||
|
||
export class Detokenizer { | ||
private database: TokenDatabase; | ||
|
||
constructor(csvDatabase: string) { | ||
this.database = new TokenDatabase(csvDatabase); | ||
} | ||
|
||
/** | ||
* Detokenize frame data into actual string messages using the provided | ||
* token database. | ||
* | ||
* If the frame doesn't match any token from database, the frame will be | ||
* returned as string as-is. | ||
*/ | ||
detokenize(tokenizedFrame: Frame): string { | ||
const {token, args} = this.decodeTokenFrame(tokenizedFrame); | ||
// Parse arguments if this is printf-style text. | ||
const format = this.database.get(token); | ||
if (format) { | ||
return new PrintfDecoder().decode(String(format), args); | ||
} | ||
|
||
return new TextDecoder().decode(tokenizedFrame.data); | ||
} | ||
|
||
/** | ||
* Detokenize Base64-encoded frame data into actual string messages using the | ||
* provided token database. | ||
* | ||
* If the frame doesn't match any token from database, the frame will be | ||
* returned as string as-is. | ||
*/ | ||
detokenizeBase64(tokenizedFrame: Frame): string { | ||
const base64Frame = new TextDecoder().decode(tokenizedFrame.data); | ||
return base64Frame.replace(PATTERN, base64Substring => { | ||
const {token, args} = this.decodeBase64TokenFrame(base64Substring); | ||
const format = this.database.get(token); | ||
// Parse arguments if this is printf-style text. | ||
if (format) { | ||
return new PrintfDecoder().decode(String(format), args); | ||
} | ||
return base64Substring; | ||
}); | ||
} | ||
|
||
private decodeTokenFrame(frame: Frame): TokenAndArgs { | ||
const token = new DataView( | ||
frame.data.buffer, | ||
frame.data.byteOffset, | ||
4 | ||
).getUint32(0, true); | ||
const args = new Uint8Array(frame.data.buffer.slice(4)); | ||
|
||
return {token, args}; | ||
} | ||
|
||
private decodeBase64TokenFrame(base64Data: string): TokenAndArgs { | ||
// Remove the prefix '$' and convert from Base64. | ||
const prefixRemoved = base64Data.slice(1); | ||
const noBase64 = Buffer.from(prefixRemoved, 'base64').toString('binary'); | ||
// Convert back to bytes and return token and arguments. | ||
const bytes = noBase64.split('').map(ch => ch.charCodeAt(0)); | ||
const uIntArray = new Uint8Array(bytes); | ||
const token = new DataView( | ||
uIntArray.buffer, | ||
uIntArray.byteOffset, | ||
4 | ||
).getUint32(0, true); | ||
const args = new Uint8Array(bytes.slice(4)); | ||
|
||
return {token, args}; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
// Copyright 2022 The Pigweed Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not | ||
// use this file except in compliance with the License. You may obtain a copy of | ||
// the License at | ||
// | ||
// https://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT | ||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the | ||
// License for the specific language governing permissions and limitations under | ||
// the License. | ||
|
||
/* eslint-env browser, jasmine */ | ||
import 'jasmine'; | ||
|
||
import {Frame, Encoder, Decoder} from '@pigweed/pw_hdlc'; | ||
import {Detokenizer} from './detokenizer'; | ||
|
||
const CSV = ` | ||
64636261, ,"regular token" | ||
86fc33f3, ,"base64 token" | ||
`; | ||
|
||
function generateFrame(text: string): Frame { | ||
const uintArray = new TextEncoder().encode(text); | ||
const encodedFrame = new Encoder().uiFrame(1, uintArray); | ||
const decodedFrames = Array.from(new Decoder().process(encodedFrame)); | ||
return decodedFrames[0]; | ||
} | ||
|
||
describe('Detokenizer', () => { | ||
let detokenizer: Detokenizer; | ||
|
||
beforeEach(() => { | ||
detokenizer = new Detokenizer(CSV); | ||
}); | ||
|
||
it('parses a base64 correct frame properly', () => { | ||
const frame = generateFrame('$8zP8hg=='); | ||
expect(detokenizer.detokenizeBase64(frame)).toEqual('base64 token'); | ||
}); | ||
it('parses a correct frame properly', () => { | ||
const frame = generateFrame('abcde'); | ||
expect(detokenizer.detokenize(frame)).toEqual('regular token'); | ||
}); | ||
it('failure to detokenize returns original string', () => { | ||
expect(detokenizer.detokenize(generateFrame('aabbcc'))).toEqual('aabbcc'); | ||
expect(detokenizer.detokenizeBase64(generateFrame('$8zP7hg=='))).toEqual( | ||
'$8zP7hg==' | ||
); | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
// Copyright 2022 The Pigweed Authors | ||
// | ||
// Licensed under the Apache License, Version 2.0 (the "License"); you may not | ||
// use this file except in compliance with the License. You may obtain a copy of | ||
// the License at | ||
// | ||
// https://www.apache.org/licenses/LICENSE-2.0 | ||
// | ||
// Unless required by applicable law or agreed to in writing, software | ||
// distributed under the License is distributed on an "AS IS" BASIS, WITHOUT | ||
// WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the | ||
// License for the specific language governing permissions and limitations under | ||
// the License. | ||
|
||
export {Detokenizer} from './detokenizer'; | ||
export {PrintfDecoder} from './printf_decoder'; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
{ | ||
"name": "@pigweed/pw_tokenizer", | ||
"version": "1.0.0", | ||
"main": "index.js", | ||
"license": "Apache-2.0", | ||
"dependencies": { | ||
"@bazel/jasmine": "^4.1.0", | ||
"@types/crc": "^3.4.0", | ||
"@types/jasmine": "^3.9.0", | ||
"buffer": "^6.0.3", | ||
"crc": "^3.8.0", | ||
"jasmine": "^3.9.0", | ||
"jasmine-core": "^3.9.0" | ||
}, | ||
"engines": { | ||
"node": ">=14.0.0" | ||
} | ||
} |
Oops, something went wrong.