Skip to content

Commit

Permalink
Compress JSON data
Browse files Browse the repository at this point in the history
  • Loading branch information
arai-a committed Nov 14, 2021
1 parent 3120ba0 commit 5366e7d
Show file tree
Hide file tree
Showing 8 changed files with 183 additions and 12 deletions.
5 changes: 4 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,7 @@ ecma262/
token.json
venv/
node_modules/
package-lock.json
package-lock.json
lib/pkg
lib/target
lib/Cargo.lock
33 changes: 25 additions & 8 deletions build.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import distutils
import distutils.dir_util
import glob
import gzip
import json
import os
import re
Expand Down Expand Up @@ -44,14 +45,31 @@ def read(path):
with open(path, 'r') as in_file:
return in_file.read()

def read_b(path):
with open(path, 'rb') as in_file:
return in_file.read()

@classmethod
def read_json(cls, path):
return json.loads(cls.read(path))

@classmethod
def read_json_gz(cls, path):
print(path)
return json.loads(gzip.decompress(cls.read_b(path)).decode())

def write(path, text):
with open(path, 'w') as f:
f.write(text)

def write_b(path, text):
with open(path, 'wb') as f:
f.write(text)

@classmethod
def write_json_gz(cls, path, data):
cls.write_b(path, gzip.compress(json.dumps(data).encode()))

def write_formatted_json(path, data):
text = json.dumps(data,
indent=1,
Expand Down Expand Up @@ -106,11 +124,11 @@ def pr_info_path(cls, prnum):

@classmethod
def sections_path(cls, sha, prnum=None):
return os.path.join(cls.rev_dir(sha, prnum), 'sections.json')
return os.path.join(cls.rev_dir(sha, prnum), 'sections.json.gz')

@classmethod
def parent_diff_path(cls, sha, prnum=None):
return os.path.join(cls.rev_dir(sha, prnum), 'parent_diff.json')
return os.path.join(cls.rev_dir(sha, prnum), 'parent_diff.json.gz')

@classmethod
def index_path(cls, sha, prnum=None):
Expand Down Expand Up @@ -671,8 +689,7 @@ def __json(sha, prnum, skip_cache):
data = SectionsExtractor.extract(
FileUtils.read(Paths.index_path(sha, prnum)))

sections_json = json.dumps(data)
FileUtils.write(sections_path, sections_json)
FileUtils.write_json_gz(sections_path, data)

return True

Expand All @@ -693,18 +710,18 @@ def __parent_json(sha, prnum, parent, skip_cache):

Logger.info(parent_diff_path)

to_json = FileUtils.read_json(sections_path)
from_json = FileUtils.read_json(parent_sections_path)
to_json = FileUtils.read_json_gz(sections_path)
from_json = FileUtils.read_json_gz(parent_sections_path)

diff_json = SectionsComparator.compare(from_json, to_json)

result = json.dumps(diff_json)
result = gzip.compress(json.dumps(diff_json).encode())
if len(result) > 1024 * 1024:
Logger.info('SKIP: {} is too large ({})'.format(
parent_diff_path, len(result)))
return False

FileUtils.write(parent_diff_path, result)
FileUtils.write_b(parent_diff_path, result)

return True

Expand Down
2 changes: 1 addition & 1 deletion index.html
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
<link href="./style/ecmarkup.css" rel="stylesheet">
<link href="./style/base.css?20200930-a" rel="stylesheet">
<link href="./style/compare.css?20200930-a" rel="stylesheet">
<script type="text/javascript" src="./js/compare.js?20200930-a"></script>
<script type="text/javascript" src="./js/compare.js?20211114-a"></script>
</head>
<body onload="onBodyLoad()">
<div id="container">
Expand Down
117 changes: 115 additions & 2 deletions js/compare.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,108 @@ function sleep(t) {
return new Promise(r => setTimeout(r, t));
}

// ==== BEGIN: copied and modified from wasm-pack binding ====

let wasm;

let cachegetUint8Memory0 = null;
function getUint8Memory0() {
if (cachegetUint8Memory0 === null || cachegetUint8Memory0.buffer !== wasm.memory.buffer) {
cachegetUint8Memory0 = new Uint8Array(wasm.memory.buffer);
}
return cachegetUint8Memory0;
}

let WASM_VECTOR_LEN = 0;

function passArray8ToWasm0(arg, malloc) {
const ptr = malloc(arg.length * 1);
getUint8Memory0().set(arg, ptr / 1);
WASM_VECTOR_LEN = arg.length;
return ptr;
}

let cachegetInt32Memory0 = null;
function getInt32Memory0() {
if (cachegetInt32Memory0 === null || cachegetInt32Memory0.buffer !== wasm.memory.buffer) {
cachegetInt32Memory0 = new Int32Array(wasm.memory.buffer);
}
return cachegetInt32Memory0;
}

function getArrayU8FromWasm0(ptr, len) {
return getUint8Memory0().subarray(ptr / 1, ptr / 1 + len);
}

/**
* @param {Uint8Array} data
* @returns {Uint8Array}
*/
async function decompress(data) {
if (!wasm) {
await init();
}

try {
const retptr = wasm.__wbindgen_add_to_stack_pointer(-16);
var ptr0 = passArray8ToWasm0(data, wasm.__wbindgen_malloc);
var len0 = WASM_VECTOR_LEN;
wasm.decompress(retptr, ptr0, len0);
var r0 = getInt32Memory0()[retptr / 4 + 0];
var r1 = getInt32Memory0()[retptr / 4 + 1];
var v1 = getArrayU8FromWasm0(r0, r1).slice();
wasm.__wbindgen_free(r0, r1 * 1);
return v1;
} finally {
wasm.__wbindgen_add_to_stack_pointer(16);
}
}

async function load(module, imports) {
if (typeof Response === 'function' && module instanceof Response) {
if (typeof WebAssembly.instantiateStreaming === 'function') {
try {
return await WebAssembly.instantiateStreaming(module, imports);

} catch (e) {
if (module.headers.get('Content-Type') != 'application/wasm') {
console.warn("`WebAssembly.instantiateStreaming` failed because your server does not serve wasm with `application/wasm` MIME type. Falling back to `WebAssembly.instantiate` which is slower. Original error:\n", e);

} else {
throw e;
}
}
}

const bytes = await module.arrayBuffer();
return await WebAssembly.instantiate(bytes, imports);

} else {
const instance = await WebAssembly.instantiate(module, imports);

if (instance instanceof WebAssembly.Instance) {
return { instance, module };

} else {
return instance;
}
}
}

async function init() {
const input = fetch("js/gunzip.wasm");

const imports = {};
const { instance, module } = await load(await input, imports);

wasm = instance.exports;
init.__wbindgen_wasm_module = module;

return wasm;
}

// ==== END ====

// Insert list marker into list element.
//
// While creating diff, extra list element can be added.
Expand Down Expand Up @@ -616,6 +718,17 @@ class Comparator {
return response.json();
}

async getJSON_GZ(path) {
const response = await fetch(path);
if (!response.ok) {
return undefined;
}
const buf = await response.arrayBuffer();
const arr = await decompress(new Uint8Array(buf));
const decoder = new TextDecoder();
return JSON.parse(decoder.decode(arr));
}

populateLists() {
this.populatePRs(this.prFilter);
this.populateRevs(this.revFilter);
Expand Down Expand Up @@ -1082,7 +1195,7 @@ class Comparator {
let found = false;
const toHash = this.toRev.value;
if (!this.secAll.checked && this.getParentOf(toHash) === this.fromRev.value) {
const result = await this.getJSON(`./history/${toHash}/parent_diff.json`);
const result = await this.getJSON_GZ(`./history/${toHash}/parent_diff.json.gz`);
if (result) {
this.fromSecData = result.from;
this.toSecData = result.to;
Expand Down Expand Up @@ -1270,7 +1383,7 @@ class Comparator {
}

async getSecData(hash) {
return this.getJSON(`./history/${hash}/sections.json`);
return this.getJSON_GZ(`./history/${hash}/sections.json.gz`);
}

// Returns a string representation of section number+title that is comparable
Expand Down
Binary file added js/gunzip.wasm
Binary file not shown.
17 changes: 17 additions & 0 deletions lib/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[package]
name = "wasm-gunzip"
version = "1.0.0"
authors = ["arai <[email protected]>"]
edition = "2018"

[lib]
crate-type = ["cdylib"]

[dependencies]
wasm-bindgen = "0.2.78"
libflate= "1.0"

[profile.release]
lto = true
codegen-units = 1
opt-level = "z"
7 changes: 7 additions & 0 deletions lib/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
gunzip.wasm: src/lib.rs Cargo.toml
wasm-pack build --release --target web
wasm-snip \
--snip-rust-panicking-code \
--snip-rust-fmt-code \
-o ../js/gunzip.wasm \
pkg/wasm_gunzip_bg.wasm
14 changes: 14 additions & 0 deletions lib/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
use libflate::gzip::Decoder;
use std::io::Read;
use wasm_bindgen::prelude::*;

#[wasm_bindgen]
pub fn decompress(data: &[u8]) -> Vec<u8> {
if let Ok(mut decoder) = Decoder::new(data) {
let mut result = Vec::new();
if let Ok(_) = decoder.read_to_end(&mut result) {
return result
}
}
Vec::new()
}

0 comments on commit 5366e7d

Please sign in to comment.