Skip to content

Commit

Permalink
ベンチ
Browse files Browse the repository at this point in the history
  • Loading branch information
mei23 committed Dec 25, 2024
1 parent 68c10a5 commit dda22eb
Show file tree
Hide file tree
Showing 8 changed files with 155 additions and 0 deletions.
1 change: 1 addition & 0 deletions built/t/t1.d.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
export {};
57 changes: 57 additions & 0 deletions built/t/t1.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
"use strict";
var __awaiter = (this && this.__awaiter) || function (thisArg, _arguments, P, generator) {
function adopt(value) { return value instanceof P ? value : new P(function (resolve) { resolve(value); }); }
return new (P || (P = Promise))(function (resolve, reject) {
function fulfilled(value) { try { step(generator.next(value)); } catch (e) { reject(e); } }
function rejected(value) { try { step(generator["throw"](value)); } catch (e) { reject(e); } }
function step(result) { result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected); }
step((generator = generator.apply(thisArg, _arguments || [])).next());
});
};
Object.defineProperty(exports, "__esModule", { value: true });
const fs_1 = require("fs");
const encoding_1 = require("../utils/encoding");
function main() {
return __awaiter(this, void 0, void 0, function* () {
//p1(`${__dirname}/../../sample/sjis.html`);
p1(`${__dirname}/../../sample/_jis.html`);
});
}
function p1(file) {
const content = (0, fs_1.readFileSync)(file);
const enc = (0, encoding_1.detectEncoding)(content);
console.log('enc', enc);
const count = 10000;
/*
console.log(`iconv-lite`);
{
const t0 = performance.now();
for (let i=0;i<count;i++){
const dec = toUtf8(content, enc);
}
const t1 = performance.now();
console.log(`${t1-t0}`);
}
*/
console.log(`iconv`);
{
const t0 = performance.now();
for (let i = 0; i < count; i++) {
const dec = (0, encoding_1.toUtf8i)(content, enc);
}
const t1 = performance.now();
console.log(`${t1 - t0}`);
}
console.log(`encode-japanese`);
{
const t0 = performance.now();
for (let i = 0; i < count; i++) {
const dec = (0, encoding_1.toUtf8j)(content, enc);
}
const t1 = performance.now();
console.log(`${t1 - t0}`);
}
}
main().then(() => {
console.log('Done');
});
2 changes: 2 additions & 0 deletions built/utils/encoding.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@
*/
export declare function detectEncoding(body: Buffer): string;
export declare function toUtf8(body: Buffer, encoding: string): string;
export declare function toUtf8i(body: Buffer, encoding: string): string;
export declare function toUtf8j(body: Buffer, encoding: string): string;
14 changes: 14 additions & 0 deletions built/utils/encoding.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,13 @@
Object.defineProperty(exports, "__esModule", { value: true });
exports.detectEncoding = detectEncoding;
exports.toUtf8 = toUtf8;
exports.toUtf8i = toUtf8i;
exports.toUtf8j = toUtf8j;
const iconv = require("iconv-lite");
//var Iconv = require('iconv').Iconv;
const iconv_1 = require("iconv");
const jschardet = require("jschardet");
const Encoding = require('encoding-japanese');
const regCharset = new RegExp(/charset\s*=\s*["']?([\w-]+)/, 'i');
/**
* Detect HTML encoding
Expand Down Expand Up @@ -32,7 +37,16 @@ function detectEncoding(body) {
function toUtf8(body, encoding) {
return iconv.decode(body, encoding);
}
function toUtf8i(body, encoding) {
const i = new iconv_1.Iconv(encoding, 'UTF-8');
return i.convert(body).toString();
}
function toUtf8j(body, encoding) {
return Encoding.codeToString(Encoding.convert(body, 'UNICODE', encoding));
}
function toEncoding(candicate) {
if (candicate.toUpperCase() === 'ISO-2022-JP')
return 'ISO-2022-JP';
if (iconv.encodingExists(candicate)) {
if (['shift_jis', 'shift-jis', 'windows-31j', 'x-sjis'].includes(candicate.toLowerCase()))
return 'cp932';
Expand Down
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@
},
"dependencies": {
"cheerio": "1.0.0",
"encoding-japanese": "2.2.0",
"escape-regexp": "0.0.1",
"got": "11.8.6",
"h3": "1.13.0",
"h3-typebox": "0.6.0",
"html-entities": "2.5.2",
"iconv": "3.0.1",
"iconv-lite": "0.6.3",
"js-yaml": "4.1.0",
"jschardet": "3.1.4",
Expand Down
54 changes: 54 additions & 0 deletions src/t/t1.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
import { readFile, readFileSync } from 'fs';
import { detectEncoding, toUtf8, toUtf8i, toUtf8j } from '../utils/encoding';


async function main() {
//p1(`${__dirname}/../../sample/sjis.html`);
p1(`${__dirname}/../../sample/_jis.html`);
}

function p1(file: string) {
const content = readFileSync(file);
const enc = detectEncoding(content);
console.log('enc', enc);

const count = 10000;

/*
console.log(`iconv-lite`);
{
const t0 = performance.now();
for (let i=0;i<count;i++){
const dec = toUtf8(content, enc);
}
const t1 = performance.now();
console.log(`${t1-t0}`);
}
*/

console.log(`iconv`);
{
const t0 = performance.now();
for (let i=0;i<count;i++){
const dec = toUtf8i(content, enc);
}
const t1 = performance.now();
console.log(`${t1-t0}`);
}

console.log(`encode-japanese`);
{
const t0 = performance.now();
for (let i=0;i<count;i++){
const dec = toUtf8j(content, enc);
}
const t1 = performance.now();
console.log(`${t1-t0}`);
}

}


main().then(() => {
console.log('Done');
});
15 changes: 15 additions & 0 deletions src/utils/encoding.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import * as iconv from 'iconv-lite';
//var Iconv = require('iconv').Iconv;
import { Iconv } from 'iconv';
import * as jschardet from 'jschardet';
const Encoding = require('encoding-japanese');

const regCharset = new RegExp(/charset\s*=\s*["']?([\w-]+)/, 'i');

Expand Down Expand Up @@ -32,7 +35,19 @@ export function toUtf8(body: Buffer, encoding: string): string {
return iconv.decode(body, encoding);
}

export function toUtf8i(body: Buffer, encoding: string): string {
const i = new Iconv(encoding, 'UTF-8');
return i.convert(body).toString();
}

export function toUtf8j(body: Buffer, encoding: string): string {
return Encoding.codeToString(Encoding.convert(body, 'UNICODE', encoding))
}


function toEncoding(candicate: string): string | null {
if (candicate.toUpperCase() === 'ISO-2022-JP') return 'ISO-2022-JP';

if (iconv.encodingExists(candicate)) {
if (['shift_jis', 'shift-jis', 'windows-31j', 'x-sjis'].includes(candicate.toLowerCase())) return 'cp932';
return candicate;
Expand Down
10 changes: 10 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,11 @@ domutils@^3.0.1, domutils@^3.1.0:
domelementtype "^2.3.0"
domhandler "^5.0.3"

[email protected]:
version "2.2.0"
resolved "https://registry.yarnpkg.com/encoding-japanese/-/encoding-japanese-2.2.0.tgz#0ef2d2351250547f432a2dd155453555c16deb59"
integrity sha512-EuJWwlHPZ1LbADuKTClvHtwbaFn4rOD+dRAbWysqEOXRc2Uui0hJInNJrsdH0c+OhJA4nrCBdSkW4DD5YxAo6A==

encoding-sniffer@^0.2.0:
version "0.2.0"
resolved "https://registry.yarnpkg.com/encoding-sniffer/-/encoding-sniffer-0.2.0.tgz#799569d66d443babe82af18c9f403498365ef1d5"
Expand Down Expand Up @@ -496,6 +501,11 @@ [email protected], iconv-lite@^0.6.3:
dependencies:
safer-buffer ">= 2.1.2 < 3.0.0"

[email protected]:
version "3.0.1"
resolved "https://registry.yarnpkg.com/iconv/-/iconv-3.0.1.tgz#31d319d71b59415cf348362c382961b1eb6f0e81"
integrity sha512-lJnFLxVc0d82R7GfU7a9RujKVUQ3Eee19tPKWZWBJtAEGRHVEyFzCtbNl3GPKuDnHBBRT4/nDS4Ru9AIDT72qA==

inflight@^1.0.4:
version "1.0.6"
resolved "https://registry.yarnpkg.com/inflight/-/inflight-1.0.6.tgz#49bd6331d7d02d0c09bc910a1075ba8165b56df9"
Expand Down

0 comments on commit dda22eb

Please sign in to comment.