-
Notifications
You must be signed in to change notification settings - Fork 29
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
546 additions
and
60 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,4 +8,6 @@ dist/test/* | |
debug.ts | ||
*.map | ||
*.tiktoken | ||
.eslintrc.js | ||
.eslintrc.js | ||
/perf/* | ||
*.map |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
package.json | ||
package-lock.json | ||
*.cpuprofile |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,65 @@ | ||
const fs = require('fs/promises'); | ||
const path = require('path'); | ||
const inspector = require('inspector'); | ||
const { promisify } = require('util'); | ||
|
||
const [,, encoderName, folderPath, method, modulePath] = process.argv; | ||
const { createByEncoderName } = require(modulePath); | ||
const minTime = 10_000; | ||
const minCycles = 5; | ||
|
||
const fileExtensions = ['.ts', '.js', '.py']; | ||
|
||
async function readAllFilesInFolder(folderPath) { | ||
const files = await fs.readdir(folderPath, { withFileTypes: true }); | ||
const fileContents = await Promise.all(files.map(async (file) => { | ||
const res = path.resolve(folderPath, file.name); | ||
if (file.isDirectory()) { | ||
return readAllFilesInFolder(res); | ||
} else if (fileExtensions.some(f => res.endsWith(f))) { | ||
return fs.readFile(res, 'utf8'); | ||
} else { | ||
return []; | ||
} | ||
})); | ||
|
||
return fileContents.flat(); | ||
} | ||
|
||
Promise.all([ | ||
readAllFilesInFolder(folderPath), | ||
createByEncoderName(encoderName) | ||
]).then(async ([files, tokenizer]) => { | ||
let totalSize = 0; | ||
for (const file of files) { | ||
totalSize += file.length; | ||
} | ||
|
||
const session = new inspector.Session(); | ||
session.connect(); | ||
const post = promisify(session.post).bind(session); | ||
await post('Profiler.enable'); | ||
await post('Profiler.start'); | ||
|
||
const start = performance.now(); | ||
let cycles = []; | ||
while (performance.now() - start < minTime || cycles.length < minCycles) { | ||
const cycleStart = performance.now(); | ||
switch (method) { | ||
case 'encode': | ||
files.forEach(file => tokenizer.encode(file)); | ||
break; | ||
case 'encodeTrimSuffix': | ||
files.forEach(file => tokenizer.encodeTrimSuffix(file, 1337)); | ||
break; | ||
default: | ||
throw new Error(`unknown method ${method}`); | ||
} | ||
cycles.push(performance.now() - cycleStart); | ||
} | ||
|
||
const data = await post('Profiler.stop'); | ||
await fs.writeFile('profile.cpuprofile', JSON.stringify(data.profile)); | ||
|
||
process.stdout.write(JSON.stringify({ totalSize, cycles })); | ||
}); |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,56 @@ | ||
// Copyright (c) Microsoft Corporation. | ||
// Licensed under the MIT License. | ||
|
||
/** | ||
* A text encoder interface. | ||
*/ | ||
export interface ITextEncoder { | ||
/** | ||
* Number of bytes written in the last call to {@link encode} | ||
*/ | ||
length: number; | ||
|
||
/** | ||
* Encodes the text and returns the Uint8Array it was written to. The length | ||
* of data written to the array can be found in {@link length}. | ||
* | ||
* The data returned in the array is only valid until the next call to encode. | ||
*/ | ||
encode(text: string): Uint8Array; | ||
} | ||
|
||
class UniversalTextEncoder implements ITextEncoder { | ||
public length = 0; | ||
private encoder = new TextEncoder(); | ||
|
||
public encode(text: string): Uint8Array { | ||
const arr = this.encoder.encode(text); | ||
this.length = arr.length; | ||
return arr; | ||
} | ||
} | ||
|
||
class NodeTextEncoder implements ITextEncoder { | ||
private buffer = Buffer.alloc(256); | ||
public length = 0; | ||
|
||
public encode(text: string): Uint8Array { | ||
while (true) { | ||
this.length = this.buffer.write(text, 'utf8'); | ||
|
||
// buffer.write returns the number of bytes written and can write less | ||
// than the length of the string if the buffer is too small. If this | ||
// might have happened (4 bytes is the longest utf8 codepoint), make | ||
// the buffer bigger and try again. | ||
if (this.length < this.buffer.length - 4) { | ||
return this.buffer; | ||
} | ||
|
||
this.buffer = Buffer.alloc(this.length * 2); | ||
this.length = this.buffer.write(text); | ||
} | ||
} | ||
} | ||
|
||
export const makeTextEncoder = (): ITextEncoder => | ||
typeof Buffer !== 'undefined' ? new NodeTextEncoder() : new UniversalTextEncoder(); |
Oops, something went wrong.