From ee8a35272bb1794794fc195c5b2bc7f0985c5da3 Mon Sep 17 00:00:00 2001 From: homura Date: Fri, 15 Dec 2023 15:27:30 +0800 Subject: [PATCH] feat(codec): supported union with custom id (#586) --- .changeset/fluffy-rabbits-tell.md | 6 ++ .changeset/pre.json | 28 +++++++++ commitlint.config.js | 1 + packages/codec/src/molecule/layout.ts | 77 +++++++++++++++++-------- packages/codec/tests/molecule.test.ts | 44 +++++++++++++- packages/molecule/README.md | 14 +++++ packages/molecule/src/codec.ts | 53 ++++++++++++----- packages/molecule/src/grammar/mol.js | 20 ++++++- packages/molecule/src/grammar/mol.ne | 34 +++++++---- packages/molecule/src/nearley.ts | 11 +++- packages/molecule/src/type.ts | 2 +- packages/molecule/tests/codec.test.ts | 25 ++++++++ packages/molecule/tests/grammar.test.ts | 45 +++++++++++++++ 13 files changed, 302 insertions(+), 58 deletions(-) create mode 100644 .changeset/fluffy-rabbits-tell.md create mode 100644 .changeset/pre.json create mode 100644 packages/molecule/README.md diff --git a/.changeset/fluffy-rabbits-tell.md b/.changeset/fluffy-rabbits-tell.md new file mode 100644 index 000000000..8f3ecec0c --- /dev/null +++ b/.changeset/fluffy-rabbits-tell.md @@ -0,0 +1,6 @@ +--- +"@ckb-lumos/molecule": minor +"@ckb-lumos/codec": minor +--- + +feat: supported union works with custom id diff --git a/.changeset/pre.json b/.changeset/pre.json new file mode 100644 index 000000000..98e59c42a --- /dev/null +++ b/.changeset/pre.json @@ -0,0 +1,28 @@ +{ + "mode": "pre", + "tag": "next", + "initialVersions": { + "@ckb-lumos/base": "0.21.1", + "@ckb-lumos/bi": "0.21.1", + "@ckb-lumos/ckb-indexer": "0.21.1", + "@ckb-lumos/codec": "0.21.1", + "@ckb-lumos/common-scripts": "0.21.1", + "@ckb-lumos/config-manager": "0.21.1", + "@ckb-lumos/debugger": "0.21.1", + "@ckb-lumos/e2e-test": "0.21.1", + "@ckb-lumos/experiment-tx-assembler": "0.21.1", + "@ckb-lumos/hd": "0.21.1", + "@ckb-lumos/hd-cache": "0.21.1", + "@ckb-lumos/helpers": "0.21.1", + "@ckb-lumos/light-client": "0.21.1", + "@ckb-lumos/lumos": "0.21.1", + "@ckb-lumos/molecule": "0.21.1", + "@ckb-lumos/rpc": "0.21.1", + "@ckb-lumos/runner": "0.21.1", + "@ckb-lumos/testkit": "0.21.1", + "@ckb-lumos/toolkit": "0.21.1", + "@ckb-lumos/transaction-manager": "0.21.1", + "@ckb-lumos/utils": "0.21.1" + }, + "changesets": [] +} diff --git a/commitlint.config.js b/commitlint.config.js index d3e6b8928..54fecde56 100644 --- a/commitlint.config.js +++ b/commitlint.config.js @@ -22,6 +22,7 @@ const scopeEnumValues = [ "utils", "runner", "e2e-test", + "molecule", ]; const Configuration = { extends: ["@commitlint/config-conventional"], diff --git a/packages/codec/src/molecule/layout.ts b/packages/codec/src/molecule/layout.ts index c927f70b2..b0490db56 100644 --- a/packages/codec/src/molecule/layout.ts +++ b/packages/codec/src/molecule/layout.ts @@ -131,11 +131,9 @@ export function struct>( }, Uint8Array.from([])); }, unpack(buf) { - const result = {} as PartialNullable< - { - [key in keyof T]: UnpackResult; - } - >; + const result = {} as PartialNullable<{ + [key in keyof T]: UnpackResult; + }>; let offset = 0; fields.forEach((field) => { @@ -296,11 +294,9 @@ export function table>( ); } if (totalSize <= 4 || fields.length === 0) { - return {} as PartialNullable< - { - [key in keyof T]: UnpackResult; - } - >; + return {} as PartialNullable<{ + [key in keyof T]: UnpackResult; + }>; } else { const offsets = fields.map((_, index) => Uint32LE.unpack(buf.slice(4 + index * 4, 8 + index * 4)) @@ -315,11 +311,9 @@ export function table>( const itemBuf = buf.slice(start, end); Object.assign(obj, { [field]: itemCodec.unpack(itemBuf) }); } - return obj as PartialNullable< - { - [key in keyof T]: UnpackResult; - } - >; + return obj as PartialNullable<{ + [key in keyof T]: UnpackResult; + }>; } }, }); @@ -328,19 +322,36 @@ export function table>( /** * Union is a dynamic-size type. * Serializing a union has two steps: - * - Serialize a item type id in bytes as a 32 bit unsigned integer in little-endian. The item type id is the index of the inner items, and it's starting at 0. + * - Serialize an item type id in bytes as a 32 bit unsigned integer in little-endian. The item type id is the index of the inner items, and it's starting at 0. * - Serialize the inner item. * @param itemCodec the union item record - * @param fields the list of itemCodec's keys. It's also provide an order for pack/unpack. + * @param fields the union item keys, can be an array or an object with custom id + * @example + * // without custom id + * union({ cafe: Uint8, bee: Uint8 }, ['cafe', 'bee']) + * // with custom id + * union({ cafe: Uint8, bee: Uint8 }, { cafe: 0xcafe, bee: 0xbee }) */ export function union>( itemCodec: T, - fields: (keyof T)[] + fields: (keyof T)[] | Record ): UnionCodec { + checkShape(itemCodec, Array.isArray(fields) ? fields : Object.keys(fields)); + + // check duplicated id + if (!Array.isArray(fields)) { + const ids = Object.values(fields); + if (ids.length !== new Set(ids).size) { + throw new Error(`Duplicated id in union: ${ids.join(", ")}`); + } + } + return createBytesCodec({ pack(obj) { + const availableFields: (keyof T)[] = Object.keys(itemCodec); + const type = obj.type; - const typeName = `Union(${fields.join(" | ")})`; + const typeName = `Union(${availableFields.join(" | ")})`; /* c8 ignore next */ if (typeof type !== "string") { @@ -350,20 +361,38 @@ export function union>( ); } - const fieldIndex = fields.indexOf(type); - if (fieldIndex === -1) { + const fieldId = Array.isArray(fields) + ? fields.indexOf(type) + : fields[type]; + + if (fieldId < 0) { throw new CodecBaseParseError( `Unknown union type: ${String(obj.type)}`, typeName ); } - const packedFieldIndex = Uint32LE.pack(fieldIndex); + const packedFieldIndex = Uint32LE.pack(fieldId); const packedBody = itemCodec[type].pack(obj.value); return concat(packedFieldIndex, packedBody); }, unpack(buf) { - const typeIndex = Uint32LE.unpack(buf.slice(0, 4)); - const type = fields[typeIndex]; + const fieldId = Uint32LE.unpack(buf.slice(0, 4)); + + const type: keyof T | undefined = (() => { + if (Array.isArray(fields)) { + return fields[fieldId]; + } + + const entry = Object.entries(fields).find(([, id]) => id === fieldId); + return entry?.[0]; + })(); + + if (!type) { + throw new Error( + `Unknown union field id: ${fieldId}, only ${fields} are allowed` + ); + } + return { type, value: itemCodec[type].unpack(buf.slice(4)) }; }, }); diff --git a/packages/codec/tests/molecule.test.ts b/packages/codec/tests/molecule.test.ts index fdd8b2f7a..aa9a0538f 100644 --- a/packages/codec/tests/molecule.test.ts +++ b/packages/codec/tests/molecule.test.ts @@ -12,7 +12,7 @@ import { import { Bytes, createFixedHexBytesCodec } from "../src/blockchain"; import { bytify } from "../src/bytes"; import test, { ExecutionContext } from "ava"; -import { Uint16, Uint16BE, Uint32, Uint8 } from "../src/number"; +import { Uint16, Uint16BE, Uint32, Uint32LE, Uint8 } from "../src/number"; import { byteOf } from "../src/molecule"; import { CodecExecuteError } from "../src/error"; @@ -193,6 +193,46 @@ test("test layout-union", (t) => { t.throws(() => codec.pack({ type: "unknown", value: [] })); }); +test("test union with custom id", (t) => { + const codec = union( + { key1: Uint8, key2: Uint32LE }, + { key1: 0xaa, key2: 0xbb } + ); + + // prettier-ignore + const case1 = bytify([ + 0xaa, 0x00, 0x00, 0x00, // key1 + 0x11, // value + ]); + + t.deepEqual(codec.unpack(case1), { type: "key1", value: 0x11 }); + t.deepEqual(codec.pack({ type: "key1", value: 0x11 }), case1); + + // prettier-ignore + const case2 = bytify([ + 0xbb, 0x00, 0x00, 0x00, // key2 + 0x00, 0x00, 0x00, 0x11, // value u32le + ]) + + t.deepEqual(codec.unpack(case2), { type: "key2", value: 0x11_00_00_00 }); + t.deepEqual(codec.pack({ type: "key2", value: 0x11_00_00_00 }), case2); + + // @ts-expect-error + t.throws(() => codec.pack({ type: "unknown", value: 0x11 })); + + // @ts-expect-error + t.throws(() => union({ key1: Uint8, key2: Uint32LE }, { unknown: 0x1 })); + // prettier-ignore + t.throws(() => codec.unpack([ + 0x00, 0x00, 0x00, 0x00, // unknown key + 0x11, + ])); +}); + +test("test union with duplicated custom id", (t) => { + t.throws(() => union({ key1: Uint8, key2: Uint32LE }, { key1: 0, key2: 0 })); +}); + test("test byteOf", (t) => { t.deepEqual(byteOf(Uint8).pack(1), bytify([1])); t.throws(() => byteOf(Uint16).pack(1)); @@ -316,7 +356,7 @@ test("nested type", (t) => { ["byteField", "arrayField", "structField", "fixedVec", "dynVec", "option"] ); - const validInput: Parameters[0] = { + const validInput: Parameters<(typeof codec)["pack"]>[0] = { byteField: 0x1, arrayField: [0x2, 0x3, 0x4], structField: { f1: 0x5, f2: 0x6 }, diff --git a/packages/molecule/README.md b/packages/molecule/README.md new file mode 100644 index 000000000..6501d6165 --- /dev/null +++ b/packages/molecule/README.md @@ -0,0 +1,14 @@ +# @ckb-lumos/molecule + +A molecule parser written in JavaScript that helps developers to parse molecule into a codec map. + +```js +const { createParser } = require("@ckb-lumos/molecule"); + +const parser = createParser(); +const codecMap = parser.parse(` + array Uint8 [byte; 1]; +`); + +codecMap.Uint8.pack(1); +``` diff --git a/packages/molecule/src/codec.ts b/packages/molecule/src/codec.ts index e66a7963a..624b1ce0b 100644 --- a/packages/molecule/src/codec.ts +++ b/packages/molecule/src/codec.ts @@ -1,8 +1,8 @@ import { - FixedBytesCodec, - createBytesCodec, - BytesLike, BytesCodec, + BytesLike, + createBytesCodec, + FixedBytesCodec, } from "@ckb-lumos/codec/lib/base"; import { array, @@ -46,7 +46,7 @@ export const toCodec = ( } const molType: MolType = molTypeMap[key]; nonNull(molType); - let codec = null; + let codec: BytesCodec | null = null; switch (molType.type) { case "array": { if (molType.name.startsWith("Uint")) { @@ -104,21 +104,42 @@ export const toCodec = ( break; } case "union": { - const unionCodecs: Record = {}; - molType.items.forEach((itemMolTypeName) => { - if (itemMolTypeName === byte) { - unionCodecs[itemMolTypeName] = createFixedHexBytesCodec(1); + // Tuple of [UnionFieldName, UnionFieldId, UnionTypeCodec] + const unionCodecs: [string, number, BytesCodec][] = []; + + molType.items.forEach((unionTypeItem, index) => { + if (unionTypeItem === byte) { + unionCodecs.push([unionTypeItem, index, createFixedHexBytesCodec(1)]); } else { - const itemMolType = toCodec( - itemMolTypeName, - molTypeMap, - result, - refs - ); - unionCodecs[itemMolTypeName] = itemMolType; + if (typeof unionTypeItem === "string") { + const itemMolType = toCodec( + unionTypeItem, + molTypeMap, + result, + refs + ); + unionCodecs.push([unionTypeItem, index, itemMolType]); + } else if (Array.isArray(unionTypeItem)) { + const [key, fieldId] = unionTypeItem; + + const itemMolType = toCodec(key, molTypeMap, result, refs); + unionCodecs.push([key, fieldId, itemMolType]); + } } }); - codec = union(unionCodecs, Object.keys(unionCodecs)); + + const unionFieldsCodecs: Record = unionCodecs.reduce( + (codecMap, [fieldName, _fieldId, fieldCodec]) => + Object.assign(codecMap, { [fieldName]: fieldCodec }), + {} + ); + const unionFieldIds: Record = unionCodecs.reduce( + (idMap, [fieldName, fieldId, _fieldCodec]) => + Object.assign(idMap, { [fieldName]: fieldId }), + {} + ); + + codec = union(unionFieldsCodecs, unionFieldIds); break; } case "table": { diff --git a/packages/molecule/src/grammar/mol.js b/packages/molecule/src/grammar/mol.js index 0ae14d11c..a8799817c 100644 --- a/packages/molecule/src/grammar/mol.js +++ b/packages/molecule/src/grammar/mol.js @@ -228,12 +228,26 @@ }; }, }, + { + name: "union_item_decl", + symbols: ["identifier", "_", { literal: ":" }, "_", "number"], + postprocess: function (data) { + return [data[0].value, Number(data[4].value)]; + }, + }, + { + name: "union_item_decl", + symbols: ["identifier"], + postprocess: function (data) { + return data[0].value; + }, + }, { name: "union_definition$ebnf$1$subexpression$1", symbols: [ "multi_line_ws_char", "_", - "identifier", + "union_item_decl", "_", "comma", "_", @@ -251,7 +265,7 @@ symbols: [ "multi_line_ws_char", "_", - "identifier", + "union_item_decl", "_", "comma", "_", @@ -287,7 +301,7 @@ return { type: "union", name: data[2].value, - items: data[6].map((d) => d[2].value), + items: data[6].map((d) => d[2]), }; }, }, diff --git a/packages/molecule/src/grammar/mol.ne b/packages/molecule/src/grammar/mol.ne index 9259bb196..91751defc 100644 --- a/packages/molecule/src/grammar/mol.ne +++ b/packages/molecule/src/grammar/mol.ne @@ -117,20 +117,20 @@ top_level_statement array_definition -> "array" __ identifier _ lbracket _ identifier _ semicolon _ number _ rbracket _ semicolon _ comment_opt - {% + {% function(data) { return { type: "array", name: data[2].value, item: data[6].value, - item_count: data[10].value + item_count: data[10].value }; } %} vector_definition -> "vector" __ identifier _ labracket _ identifier _ rabracket _ semicolon _ comment_opt - {% + {% function(data) { return { type: "vector", @@ -142,7 +142,7 @@ vector_definition option_definition -> "option" __ identifier _ lparan _ identifier _ rparan _ semicolon _ comment_opt - {% + {% function(data) { return { type: "option", @@ -152,21 +152,35 @@ option_definition } %} +union_item_decl + -> identifier _ ":" _ number + {% + function (data) { + return [data[0].value, Number(data[4].value)] + } + %} + | identifier + {% + function (data) { + return data[0].value + } + %} + union_definition - -> "union" __ identifier _ lbrace _ (multi_line_ws_char _ identifier _ comma _ comment_opt _ multi_line_ws_char):+ _ rbrace - {% + -> "union" __ identifier _ lbrace _ (multi_line_ws_char _ union_item_decl _ comma _ comment_opt _ multi_line_ws_char):+ _ rbrace + {% function(data) { return { type: "union", name: data[2].value, - items: data[6].map(d => d[2].value), + items: data[6].map(d => d[2]), }; } %} struct_definition -> "struct" __ identifier _ block_definition - {% + {% function(data) { return { type: "struct", @@ -178,7 +192,7 @@ struct_definition table_definition -> "table" __ identifier _ block_definition - {% + {% function(data) { return { type: "table", @@ -217,4 +231,4 @@ multi_line_ws_char __ -> %ws:+ -_ -> %ws:* \ No newline at end of file +_ -> %ws:* diff --git a/packages/molecule/src/nearley.ts b/packages/molecule/src/nearley.ts index d66044571..b9704eba7 100644 --- a/packages/molecule/src/nearley.ts +++ b/packages/molecule/src/nearley.ts @@ -13,6 +13,7 @@ import { ParseOptions, } from "./type"; import { nonNull, toMolTypeMap } from "./utils"; +import { Uint32 } from "@ckb-lumos/codec/lib/number"; // eslint-disable-next-line @typescript-eslint/no-var-requires const grammar = require("./grammar/mol.js"); @@ -92,10 +93,16 @@ export const checkDependencies = (results: MolType[]): void => { } case "union": { const unionDeps = (molItem as Union).items; - unionDeps.forEach((dep: string) => { - if (dep !== byte) { + unionDeps.forEach((dep) => { + if (typeof dep === "string" && dep !== byte) { nonNull(map[dep]); } + if (Array.isArray(dep)) { + const [key, id] = dep; + // check if the id is a valid uint32 + Uint32.pack(id); + nonNull(map[key]); + } }); break; } diff --git a/packages/molecule/src/type.ts b/packages/molecule/src/type.ts index 72fb83cdd..21f07b7eb 100644 --- a/packages/molecule/src/type.ts +++ b/packages/molecule/src/type.ts @@ -27,7 +27,7 @@ export type Option = { export type Union = { type: "union"; name: string; - items: string[]; + items: (string | [string, number])[]; }; export type Struct = { diff --git a/packages/molecule/tests/codec.test.ts b/packages/molecule/tests/codec.test.ts index 10692b522..662aa90d9 100644 --- a/packages/molecule/tests/codec.test.ts +++ b/packages/molecule/tests/codec.test.ts @@ -525,3 +525,28 @@ test("should unpack only one WitnessArgs", (t) => { } ); }); + +test("checkDependencies should work correctly with union", (t) => { + checkDependencies([ + { type: "array", name: "Uint8", item: "byte", item_count: 1 }, + { type: "array", name: "Uint16", item: "byte", item_count: 2 }, + { type: "array", name: "Uint32", item: "byte", item_count: 4 }, + + // without custom id + { type: "union", name: "Test1", items: ["Uint8", "Uint16", "Uint32"] }, + // prettier-ignore + // with custom id + { type: "union", name: "Test2", items: [["Uint8", 111], ["Uint16", 222], ["Uint32", 333]] }, + ]); + + t.pass(); +}); + +test("checkDependencies should throw when id is larger than uint32", (t) => { + t.throws(() => + checkDependencies([ + { type: "array", name: "Uint8", item: "byte", item_count: 1 }, + { type: "union", name: "Test", items: [["Uint8", 0xff_ff_ff_ff + 1]] }, + ]) + ); +}); diff --git a/packages/molecule/tests/grammar.test.ts b/packages/molecule/tests/grammar.test.ts index c1a11eac6..7f0a3788c 100644 --- a/packages/molecule/tests/grammar.test.ts +++ b/packages/molecule/tests/grammar.test.ts @@ -45,6 +45,51 @@ test("should parse sample with refs", (t) => { ); }); +test("union with custom id", (t) => { + const parser = createParser(); + const withCustomId = parser.parse(` + array Uint8 [byte; 1]; + array Uint16 [byte; 2]; + array Uint32 [byte; 4]; + + union JSNumber { + Uint8: 8, + Uint16: 16, + Uint32: 32, + } + `); + + t.deepEqual( + withCustomId.JSNumber.pack({ type: "Uint8", value: 1 }), + // prettier-ignore + Uint8Array.from([ + 0x08, 0x00, 0x00, 0x00, // id should be 8 + 0x01 + ]) + ); + + const withoutCustomId = parser.parse(` + array Uint8 [byte; 1]; + array Uint16 [byte; 2]; + array Uint32 [byte; 4]; + + union JSNumber { + Uint8, + Uint16, + Uint32, + } + `); + + t.deepEqual( + withoutCustomId.JSNumber.pack({ type: "Uint8", value: 1 }), + // prettier-ignore + Uint8Array.from([ + 0x00, 0x00, 0x00, 0x00, // id should be 0 + 0x01 + ]) + ); +}); + test("should parse blockchain.mol", (t) => { const parser = createParser(); // https://github.com/nervosnetwork/ckb/blob/5a7efe7a0b720de79ff3761dc6e8424b8d5b22ea/util/types/schemas/blockchain.mol