From b8e9396dd1ab75f54ac71efcd94c523be64c051c Mon Sep 17 00:00:00 2001 From: homura Date: Wed, 6 Dec 2023 23:50:46 +0800 Subject: [PATCH] perf(codec): improve bytify and hexify perf (#580) --- .changeset/big-ears-allow.md | 5 ++ package.json | 1 + packages/codec/src/bytes.ts | 47 +++++++++++++----- packages/codec/src/utils.ts | 96 +++++++++++++++++++++++------------- pnpm-lock.yaml | 16 +++++- 5 files changed, 119 insertions(+), 46 deletions(-) create mode 100644 .changeset/big-ears-allow.md diff --git a/.changeset/big-ears-allow.md b/.changeset/big-ears-allow.md new file mode 100644 index 000000000..9c0d6396d --- /dev/null +++ b/.changeset/big-ears-allow.md @@ -0,0 +1,5 @@ +--- +"@ckb-lumos/codec": patch +--- + +improving `hexify` and `bytify` performance diff --git a/package.json b/package.json index afe1923bf..b1ab27e2d 100644 --- a/package.json +++ b/package.json @@ -23,6 +23,7 @@ "@typescript-eslint/eslint-plugin": "^5.59.2", "@typescript-eslint/parser": "^5.59.2", "ava": "^3.8.2", + "benchmark": "^2.1.4", "c8": "^7.10.0", "eslint": "^8.40.0", "eslint-import-resolver-typescript": "^2.7.0", diff --git a/packages/codec/src/bytes.ts b/packages/codec/src/bytes.ts index d423efdc9..7a879f7fd 100644 --- a/packages/codec/src/bytes.ts +++ b/packages/codec/src/bytes.ts @@ -14,22 +14,39 @@ export function bytifyRawString(rawString: string): Uint8Array { return new Uint8Array(buffer); } +const CHAR_0 = "0".charCodeAt(0); // 48 +const CHAR_9 = "9".charCodeAt(0); // 57 +const CHAR_A = "A".charCodeAt(0); // 65 +const CHAR_F = "F".charCodeAt(0); // 70 +const CHAR_a = "a".charCodeAt(0); // 97 +// const CHAR_f = "f".charCodeAt(0); // 102 + function bytifyHex(hex: string): Uint8Array { assertHexString(hex); - hex = hex.slice(2); - const uint8s = []; - for (let i = 0; i < hex.length; i += 2) { - uint8s.push(parseInt(hex.substr(i, 2), 16)); + const u8a = Uint8Array.from({ length: hex.length / 2 - 1 }); + + for (let i = 2, j = 0; i < hex.length; i = i + 2, j++) { + const c1 = hex.charCodeAt(i); + const c2 = hex.charCodeAt(i + 1); + + // prettier-ignore + const n1 = c1 <= CHAR_9 ? c1 - CHAR_0 : c1 <= CHAR_F ? c1 - CHAR_A + 10 : c1 - CHAR_a + 10 + // prettier-ignore + const n2 = c2 <= CHAR_9 ? c2 - CHAR_0 : c2 <= CHAR_F ? c2 - CHAR_A + 10 : c2 - CHAR_a + 10 + + u8a[j] = (n1 << 4) | n2; } - return Uint8Array.from(uint8s); + return u8a; } function bytifyArrayLike(xs: ArrayLike): Uint8Array { - const isValidU8Vec = Array.from(xs).every((v) => v >= 0 && v <= 255); - if (!isValidU8Vec) { - throw new Error("invalid ArrayLike, all elements must be 0-255"); + for (let i = 0; i < xs.length; i++) { + const v = xs[i]; + if (v < 0 || v > 255 || !Number.isInteger(v)) { + throw new Error("invalid ArrayLike, all elements must be 0-255"); + } } return Uint8Array.from(xs); @@ -61,6 +78,10 @@ function equalUint8Array(a: Uint8Array, b: Uint8Array): boolean { } return true; } + +const HEX_CACHE = Array.from({ length: 256 }).map((_, i) => + i.toString(16).padStart(2, "0") +); /** * convert a {@link BytesLike} to an even length hex string prefixed with "0x" * @param buf @@ -69,9 +90,13 @@ function equalUint8Array(a: Uint8Array, b: Uint8Array): boolean { * hexify(Buffer.from([1, 2, 3])) // "0x010203" */ export function hexify(buf: BytesLike): string { - const hex = Array.from(bytify(buf)) - .map((b) => b.toString(16).padStart(2, "0")) - .join(""); + let hex = ""; + + const u8a = bytify(buf); + for (let i = 0; i < u8a.length; i++) { + hex += HEX_CACHE[u8a[i]]; + } + return "0x" + hex; } diff --git a/packages/codec/src/utils.ts b/packages/codec/src/utils.ts index 474d1f2c1..dc4abcb93 100644 --- a/packages/codec/src/utils.ts +++ b/packages/codec/src/utils.ts @@ -4,48 +4,76 @@ import { isCodecExecuteError, } from "./error"; -const HEX_DECIMAL_REGEX = /^0x([0-9a-fA-F])+$/; -const HEX_DECIMAL_WITH_BYTELENGTH_REGEX_MAP = new Map(); +const CHAR_0 = "0".charCodeAt(0); // 48 +const CHAR_9 = "9".charCodeAt(0); // 57 +const CHAR_A = "A".charCodeAt(0); // 65 +const CHAR_F = "F".charCodeAt(0); // 70 +const CHAR_a = "a".charCodeAt(0); // 97 +const CHAR_f = "f".charCodeAt(0); // 102 -export function assertHexDecimal(str: string, byteLength?: number): void { - if (byteLength) { - let regex = HEX_DECIMAL_WITH_BYTELENGTH_REGEX_MAP.get(byteLength); - if (!regex) { - const newRegex = new RegExp(`^0x([0-9a-fA-F]){1,${byteLength * 2}}$`); - HEX_DECIMAL_WITH_BYTELENGTH_REGEX_MAP.set(byteLength, newRegex); - regex = newRegex; - } - if (!regex.test(str)) { - throw new Error("Invalid hex decimal!"); - } - } else { - if (!HEX_DECIMAL_REGEX.test(str)) { - throw new Error("Invalid hex decimal!"); +function assertStartsWith0x(str: string): void { + if (!str || !str.startsWith("0x")) { + throw new Error("Invalid hex string"); + } +} + +function assertHexChars(str: string): void { + const strLen = str.length; + + for (let i = 2; i < strLen; i++) { + const char = str[i].charCodeAt(0); + if ( + (char >= CHAR_0 && char <= CHAR_9) || + (char >= CHAR_a && char <= CHAR_f) || + (char >= CHAR_A && char <= CHAR_F) + ) { + continue; } + + throw new Error(`Invalid hex character ${str[i]} in the string ${str}`); } } -const HEX_STRING_REGEX = /^0x([0-9a-fA-F][0-9a-fA-F])*$/; -const HEX_STRING_WITH_BYTELENGTH_REGEX_MAP = new Map(); +export function assertHexDecimal(str: string, byteLength?: number): void { + assertStartsWith0x(str); + if (str.length === 2) { + throw new Error( + "Invalid hex decimal length, should be at least 1 character, the '0x' is incorrect, should be '0x0'" + ); + } + + const strLen = str.length; + if (typeof byteLength === "number" && strLen > byteLength * 2 + 2) { + throw new Error( + `Invalid hex decimal length, should be less than ${byteLength} bytes, got ${ + strLen / 2 - 1 + } bytes` + ); + } + + assertHexChars(str); +} + +/** + * Assert if a string is a valid hex string that is matched with /^0x([0-9a-fA-F][0-9a-fA-F])*$/ + * @param str + * @param byteLength + */ export function assertHexString(str: string, byteLength?: number): void { - if (byteLength) { - let regex = HEX_STRING_WITH_BYTELENGTH_REGEX_MAP.get(byteLength); - if (!regex) { - const newRegex = new RegExp( - `^0x([0-9a-fA-F][0-9a-fA-F]){${byteLength}}$` - ); - HEX_STRING_WITH_BYTELENGTH_REGEX_MAP.set(byteLength, newRegex); - regex = newRegex; - } - if (!regex.test(str)) { - throw new Error("Invalid hex string!"); - } - } else { - if (!HEX_STRING_REGEX.test(str)) { - throw new Error("Invalid hex string!"); - } + assertStartsWith0x(str); + + const strLen = str.length; + + if (strLen % 2) { + throw new Error("Invalid hex string length, must be even!"); } + + if (typeof byteLength === "number" && strLen !== byteLength * 2 + 2) { + throw new Error("Invalid hex string length, not match with byteLength!"); + } + + assertHexChars(str); } export function assertUtf8String(str: string): void { diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 2d59e0688..8fb01ba9b 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -1,4 +1,4 @@ -lockfileVersion: '6.1' +lockfileVersion: '6.0' settings: autoInstallPeers: true @@ -56,6 +56,9 @@ importers: ava: specifier: ^3.8.2 version: 3.8.2 + benchmark: + specifier: ^2.1.4 + version: 2.1.4 c8: specifier: ^7.10.0 version: 7.10.0 @@ -7363,6 +7366,13 @@ packages: resolution: {integrity: sha512-LcknSilhIGatDAsY1ak2I8VtGaHNhgMSYVxFrGLXv+xLHytaKZKcaUJJUE7qmBr7h33o5YQwP55pMI0xmkpJwg==} dev: false + /benchmark@2.1.4: + resolution: {integrity: sha512-l9MlfN4M1K/H2fbhfMy3B7vJd6AGKJVQn2h6Sg/Yx+KckoUA7ewS5Vv6TjSq18ooE1kS9hhAlQRH3AkXIh/aOQ==} + dependencies: + lodash: 4.17.21 + platform: 1.3.6 + dev: true + /better-path-resolve@1.0.0: resolution: {integrity: sha512-pbnl5XzGBdrFU/wT4jqmJVPn2B6UHPBOhzMQkY/SPUPB6QtUXtmBHBIwCbXJol93mOpGMnQyP/+BB19q04xj7g==} engines: {node: '>=4'} @@ -14367,6 +14377,10 @@ packages: find-up: 3.0.0 dev: false + /platform@1.3.6: + resolution: {integrity: sha512-fnWVljUchTro6RiCFvCXBbNhJc2NijN7oIQxbwsyL0buWJPG85v81ehlHI9fXrJsMNgTofEoWIQeClKpgxFLrg==} + dev: true + /plur@4.0.0: resolution: {integrity: sha512-4UGewrYgqDFw9vV6zNV+ADmPAUAfJPKtGvb/VdpQAx25X5f3xXdGdyOEVFwkl8Hl/tl7+xbeHqSEM+D5/TirUg==} engines: {node: '>=10'}