Skip to content

Commit

Permalink
refact public api for better extensible
Browse files Browse the repository at this point in the history
  • Loading branch information
ewfian committed Apr 24, 2023
1 parent bac6b82 commit f4ba891
Show file tree
Hide file tree
Showing 9 changed files with 181 additions and 128 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ import { Parser } from 'pickleparser';
async function unpickle(fname: string) {
const pkl = await fs.readFile(path.join(fname), 'binary');
const buffer = Buffer.from(pkl, 'binary');
const parser = new Parser(buffer);
return parser.load();
const parser = new Parser();
return parser.parse(buffer);
}

const obj = await unpickle('pickled.pkl');
Expand All @@ -67,8 +67,8 @@ fileSelector.addEventListener('change', function (e) {

reader.onload = function (event) {
const buffer = new Uint8Array(event.target.result);
const parser = new pickleparser.Parser(buffer);
const obj = parser.load();
const parser = new pickleparser.Parser();
const obj = parser.parse(buffer);
const json = JSON.stringify(obj, null, 4);
jsonResultPreviewer.innerText = json;
}
Expand Down
4 changes: 2 additions & 2 deletions bin/pickletojson.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,8 @@ const { Parser } = require('../dist/index');
const argv = process.argv;
const pkl = fs.readFileSync(path.join(argv[2]), 'binary');
const buffer = Buffer.from(pkl, 'binary');
const parser = new Parser(buffer);
const obj = parser.load();
const parser = new Parser();
const obj = parser.parse(buffer);

const replacer = (_, value) => {
if (value instanceof Map) {
Expand Down
5 changes: 2 additions & 3 deletions examples/index.html
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,8 @@ <h1>Pickle Parser Demo</h1>
reader.onload = function (event) {
try {
const buffer = new Uint8Array(event.target.result);
const parser = new pickleparser.Parser(buffer);
const obj = parser.load();
const parser = new pickleparser.Parser();
const obj = parser.parse(buffer);
window.obj = obj;
const replacer = (_, value) => {
if (value instanceof Map) {
Expand All @@ -53,7 +53,6 @@ <h1>Pickle Parser Demo</h1>
} catch (error) {
jsonResultPreviewer.innerText = error;
}

}

reader.readAsArrayBuffer(file);
Expand Down
16 changes: 10 additions & 6 deletions examples/index.ts
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
import fs from 'node:fs/promises';
import path from 'node:path';
import { Parser } from '../';
import { Parser, NameRegistry } from '../';

class Document extends Map {}

async function unpickle(fname: string) {
const pkl = await fs.readFile(path.join(fname), 'binary');
const buffer = Buffer.from(pkl, 'binary');
const parser = new Parser(buffer, {

const registry = new NameRegistry();
registry.register('pathlib', 'WindowsPath', (...args) => args.join('\\'));
registry.register('pathlib', 'PosixPath', (...args) => args.join('/'));
registry.register('langchain.schema', 'Document', Document);

const parser = new Parser({
nameResolver: registry,
unpicklingTypeOfDictionary: 'Map',
unpicklingTypeOfSet: 'Set',
});
parser.registry.register('pathlib', 'WindowsPath', (...args) => args.join('\\'));
parser.registry.register('pathlib', 'PosixPath', (...args) => args.join('/'));
parser.registry.register('langchain.schema', 'Document', Document);
return parser.load();
return parser.parse(buffer);
}

const obj = await unpickle('wiki.pkl');
Expand Down
36 changes: 36 additions & 0 deletions src/PObject.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/* eslint-disable @typescript-eslint/no-explicit-any */
export function createPObject(module: string, name: string): (new (...args: any[]) => any) | ((...args: any[]) => any) {
const PObject = function (this: any, ...args: any[]): any {
if (new.target) {
Object.defineProperty(this, 'args', {
value: args,
enumerable: false,
configurable: false,
writable: false,
});
} else {
const PFunction = function (this: any, ...args: any[]) {
Object.defineProperty(this, 'args', {
value: args,
enumerable: false,
configurable: false,
writable: false,
});
};
PFunction.prototype.__module__ = module;
PFunction.prototype.__name__ = name;
return Reflect.construct(PFunction, args);
}
} as unknown as (new (...args: any[]) => any) | ((...args: any[]) => any);
PObject.prototype.__module__ = module;
PObject.prototype.__name__ = name;
PObject.prototype.__setnewargs_ex__ = function (kwargs: any) {
Object.defineProperty(this, 'kwargs', {
value: kwargs,
enumerable: false,
configurable: false,
writable: false,
});
};
return PObject;
}
3 changes: 2 additions & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
export { Parser } from './parser';
export { Reader } from './reader';
export { BufferReader } from './reader';
export { NameRegistry } from './registry';
112 changes: 52 additions & 60 deletions src/parser.ts
Original file line number Diff line number Diff line change
@@ -1,48 +1,72 @@
/* eslint-disable @typescript-eslint/no-explicit-any */
import { OP } from './opcode';
import { Reader } from './reader';
import { Registry } from './registry';
import { IReader, BufferReader, readUint64, readUint64WithBigInt } from './reader';
import { ISetProvider, SetProviderFactory } from './setProvider';
import { IDictionaryProvider, DictionaryProviderFactory } from './dictionaryProvider';
import { createPObject } from './PObject';

export type UnpicklingTypeOfSet = 'array' | 'Set';
export type UnpicklingTypeOfDictionary = 'object' | 'Map';

export interface NameResolver {
resolve(module: string, name: string): (new (...args: any[]) => any) | ((...args: any[]) => any);
}
export interface PersistentResolver {
resolve(pid: string): any;
}
export interface ExtensionResolver {
resolve(extCode: number): any;
}

export interface ParserOptions {
onPersistentLoad: (pid: string) => any;
onExtensionLoad: (extCode: number) => any;
nameResolver: NameResolver;
persistentResolver: PersistentResolver;
extensionResolver: ExtensionResolver;
unpicklingTypeOfSet: UnpicklingTypeOfSet;
unpicklingTypeOfDictionary: UnpicklingTypeOfDictionary;
}

const DefualtOptions: ParserOptions = {
onPersistentLoad(pid) {
throw new Error(`Unregistered persistent id: \`${pid}\`.`);
nameResolver: {
resolve: (module, name) => createPObject(module, name),
},
onExtensionLoad(extCode) {
throw new Error(`Unregistered extension code: \`${extCode.toString(16)}\`.`);
persistentResolver: {
resolve: (pid) => {
throw new Error(`Unregistered persistent id: \`${pid}\`.`);
},
},
extensionResolver: {
resolve: (extCode) => {
throw new Error(`Unregistered extension code: \`${extCode.toString(16)}\`.`);
},
},
unpicklingTypeOfSet: 'array',
unpicklingTypeOfDictionary: 'object',
};

export class Parser {
private _options: ParserOptions;
private _reader: Reader;
private _setProvider: ISetProvider;
private _dictionaryProvider: IDictionaryProvider;

registry: Registry = new Registry();
private readonly _options: ParserOptions;
private readonly _nameResolver: NameResolver;
private readonly _persistentResolver: PersistentResolver;
private readonly _extensionResolver: ExtensionResolver;
private readonly _setProvider: ISetProvider;
private readonly _dictionaryProvider: IDictionaryProvider;

constructor(buffer: Uint8Array | Int8Array | Uint8ClampedArray, options?: Partial<ParserOptions>) {
constructor(options?: Partial<ParserOptions>) {
this._options = { ...DefualtOptions, ...options };
this._reader = new Reader(buffer);
this._nameResolver = this._options.nameResolver;
this._persistentResolver = this._options.persistentResolver;
this._extensionResolver = this._options.extensionResolver;
this._setProvider = SetProviderFactory(this._options.unpicklingTypeOfSet);
this._dictionaryProvider = DictionaryProviderFactory(this._options.unpicklingTypeOfDictionary);
}

load() {
const reader = this._reader;
parse<T>(buffer: Uint8Array | Int8Array | Uint8ClampedArray): T {
const reader = new BufferReader(buffer);
return this.read(reader);
}

read<T>(reader: IReader): T {
let stack: any[] = [];
const metastack: any[] = [];
const memo = new Map();
Expand Down Expand Up @@ -167,14 +191,14 @@ export class Parser {
case OP.LONG1: {
const length = reader.byte();
const data = reader.bytes(length);
const number = this.readUint64(data);
const number = readUint64(data);
stack.push(number);
break;
}
case OP.LONG4: {
const length = reader.uint32();
const data = reader.bytes(length);
const number = this.readUint64WithBigInt(data);
const number = readUint64WithBigInt(data);
stack.push(number);
break;
}
Expand Down Expand Up @@ -319,19 +343,19 @@ export class Parser {
// Exts
case OP.EXT1: {
const extCode = reader.byte();
const cls = this._options.onExtensionLoad(extCode);
const cls = this._extensionResolver.resolve(extCode);
stack.push(cls);
break;
}
case OP.EXT2: {
const extCode = reader.uint16();
const cls = this._options.onExtensionLoad(extCode);
const cls = this._extensionResolver.resolve(extCode);
stack.push(cls);
break;
}
case OP.EXT4: {
const extCode = reader.uint32();
const cls = this._options.onExtensionLoad(extCode);
const cls = this._extensionResolver.resolve(extCode);
stack.push(cls);
break;
}
Expand All @@ -340,14 +364,14 @@ export class Parser {
case OP.GLOBAL: {
const module = reader.line();
const name = reader.line();
const cls = this.registry.resolve(module, name);
const cls = this._nameResolver.resolve(module, name);
stack.push(cls);
break;
}
case OP.STACK_GLOBAL: {
const name = stack.pop();
const module = stack.pop();
const cls = this.registry.resolve(module, name);
const cls = this._nameResolver.resolve(module, name);
stack.push(cls);
break;
}
Expand All @@ -358,7 +382,7 @@ export class Parser {
const name = reader.line();
const args = stack;
stack = metastack.pop();
const cls = this.registry.resolve(module, name);
const cls = this._nameResolver.resolve(module, name);
const obj = Reflect.construct(cls, args);
stack.push(obj);
break;
Expand Down Expand Up @@ -391,13 +415,13 @@ export class Parser {
}
case OP.PERSID: {
const pid = reader.line();
const cls = this._options.onPersistentLoad(pid);
const cls = this._persistentResolver.resolve(pid);
stack.push(cls);
break;
}
case OP.BINPERSID: {
const pid = stack.pop();
const cls = this._options.onPersistentLoad(pid);
const cls = this._persistentResolver.resolve(pid);
stack.push(cls);
break;
}
Expand Down Expand Up @@ -444,36 +468,4 @@ export class Parser {
}
throw new Error('Unexpected end of file.');
}

private readUint64(data: Uint8Array | Int8Array | Uint8ClampedArray) {
if (data.length > 8) {
throw new Error('Value too large to unpickling');
}
// Padding to 8 bytes
const buffer = new ArrayBuffer(8);
const uint8 = new Uint8Array(buffer);
uint8.set(data);
const subReader = new Reader(uint8);
const number = subReader.uint64();
return number;
}

private readUint64WithBigInt(data: Uint8Array | Int8Array | Uint8ClampedArray) {
let fixedLength = 0;
let partCount = 0;
while (fixedLength < data.length) {
fixedLength += 4;
partCount += 1;
}
const buffer = new ArrayBuffer(fixedLength);
const uint8 = new Uint8Array(buffer);
uint8.set(data);
const view = new DataView(buffer, 0, fixedLength);
let number = BigInt(0);
for (let partIndex = 0; partIndex < partCount; partIndex++) {
const part = BigInt(view.getUint32(partIndex * 4, true));
number |= part << BigInt(partIndex * 32);
}
return number;
}
}
Loading

0 comments on commit f4ba891

Please sign in to comment.