From 3766246b2d0730cefb4b42f0f9b73e321369f3d7 Mon Sep 17 00:00:00 2001 From: Yichuan Shen Date: Thu, 12 Mar 2020 18:12:16 +0100 Subject: [PATCH] Increasing size of buffer for first encoding guess Addresses #7 --- src/tokenize.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tokenize.js b/src/tokenize.js index 460d0ae..4888e69 100644 --- a/src/tokenize.js +++ b/src/tokenize.js @@ -46,7 +46,7 @@ exports.tokenizeBufferIter = function*(buffer, {encoding = null} = {}) { // Guess encoding - let detectedEncoding = jschardet.detect(buffer.slice(0, 100)).encoding + let detectedEncoding = jschardet.detect(buffer.slice(0, 1000)).encoding let contents = iconv.decode(buffer, detectedEncoding) let tokens = exports.tokenizeIter(contents)