Skip to content

Commit

Permalink
feat: differential update — use content defined chunking
Browse files Browse the repository at this point in the history
  • Loading branch information
develar committed Oct 11, 2017
1 parent 323f850 commit 1dc2e49
Show file tree
Hide file tree
Showing 16 changed files with 355 additions and 136 deletions.
4 changes: 4 additions & 0 deletions .idea/dictionaries/develar.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/runConfigurations/Debug_differential_update_buider.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 8 additions & 0 deletions .idea/runConfigurations/Debug_differential_update_builder.xml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 4 additions & 3 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
"7zip-bin": "^2.2.4",
"archiver": "^2.0.3",
"async-exit-hook": "^2.0.1",
"aws-sdk": "^2.130.0",
"aws-sdk": "^2.131.0",
"bluebird-lst": "^1.0.4",
"chalk": "^2.1.0",
"chromium-pickle-js": "^0.2.0",
Expand Down Expand Up @@ -60,14 +60,15 @@
"normalize-package-data": "^2.4.0",
"parse-color": "^1.0.0",
"plist": "^2.1.0",
"rabin-bindings": "~1.7.3",
"read-config-file": "1.1.1",
"sanitize-filename": "^1.6.1",
"semver": "^5.4.1",
"source-map-support": "^0.5.0",
"stat-mode": "^0.2.2",
"temp-file": "^2.0.3",
"tunnel-agent": "^0.6.0",
"update-notifier": "^2.2.0",
"update-notifier": "^2.3.0",
"xelement": "^1.0.16",
"yargs": "^9.0.1"
},
Expand All @@ -92,7 +93,7 @@
"babel-preset-ts-node4-bluebird": "^0.1.1",
"convert-source-map": "^1.5.0",
"decompress-zip": "^0.3.0",
"depcheck": "^0.6.7",
"depcheck": "^0.6.8",
"develar-typescript-json-schema": "0.17.0",
"electron-builder-tslint-config": "^1.0.4",
"env-paths": "^1.0.0",
Expand Down
3 changes: 2 additions & 1 deletion packages/app-package-builder/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
"int64-buffer": "^0.1.9",
"builder-util-runtime": "^0.0.0-semantic-release",
"builder-util": "^0.0.0-semantic-release",
"js-yaml": "^3.10.0"
"js-yaml": "^3.10.0",
"rabin-bindings": "~1.7.3"
},
"types": "./out/main.d.ts"
}
86 changes: 86 additions & 0 deletions packages/app-package-builder/src/ContentDefinedChunker.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
import { createHash } from "crypto"
import { read } from "fs-extra-p"
import { FileChunks } from "builder-util-runtime/out/blockMapApi"
import { Rabin } from "rabin-bindings"

export class ContentDefinedChunker {
async computeChunks(fd: number, start: number, end: number, name: string): Promise<FileChunks> {
console.log(name)

const fileSize = end - start
const buffer = Buffer.allocUnsafe(Math.min(4 * 1024 * 1024, fileSize))

const rabin = Rabin()
const avgBits = 12
const min = 8 * 1024
// see note in the nsis.ts about archive dict size
const max = 32 * 1024
rabin.configure(avgBits, min, max)

const checksums: Array<string> = []
const allSizes: Array<number> = []

let tailBufferData: Buffer | null = null
let readOffset = start
while (true) {
const actualBufferSize = Math.min(end - readOffset, buffer.length)
await read(fd, buffer, 0, actualBufferSize, readOffset)

const dataBuffer: Buffer = buffer.length === actualBufferSize ? buffer : buffer.slice(0, actualBufferSize)
const sizes: Array<number> = []
rabin.fingerprint([dataBuffer], sizes)

let chunkStart = 0
for (const size of sizes) {
allSizes.push(size)
let chunkEnd = chunkStart + size

const hash = createHash("sha256")
if (tailBufferData !== null) {
hash.update(tailBufferData)
// if there is the tail data (already processed by rabin data), first size includes it
chunkEnd -= tailBufferData.length
tailBufferData = null
}
hash.update(dataBuffer.slice(chunkStart, chunkEnd))
checksums.push(hash.digest("base64"))
chunkStart = chunkEnd
}

const tailSize = actualBufferSize - chunkStart
if (tailSize !== 0) {
if (tailBufferData !== null) {
throw new Error(`Internal error (${name}): tailBufferData must be null`)
}
tailBufferData = dataBuffer.slice(chunkStart, chunkStart + tailSize)
}

readOffset += actualBufferSize
if (readOffset >= end) {
if (tailBufferData !== null) {
allSizes.push(tailSize)
checksums.push(computeChecksum(tailBufferData))
}
break
}
else if (tailBufferData !== null) {
// copy data
tailBufferData = Buffer.from(tailBufferData)
}
}

const totalSize = allSizes.reduce((accumulator, currentValue) => accumulator + currentValue)
if (totalSize !== fileSize) {
throw new Error(`Internal error (${name}): size mismatch: expected: ${fileSize}, got: ${totalSize}`)
}

return {checksums, sizes: allSizes}
}
}

function computeChecksum(chunk: Buffer) {
// node-base91 doesn't make a lot of sense - 29KB vs 30KB Because for base64 string value in the yml never escaped, but node-base91 often escaped (single quotes) and it adds extra 2 symbols.
return createHash("sha256")
.update(chunk)
.digest("base64")
}
80 changes: 53 additions & 27 deletions packages/app-package-builder/src/blockMap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,12 @@ import BluebirdPromise from "bluebird-lst"
import { hashFile } from "builder-util"
import { PackageFileInfo } from "builder-util-runtime"
import { BlockMap, SIGNATURE_HEADER_SIZE } from "builder-util-runtime/out/blockMapApi"
import { createHash } from "crypto"
import { appendFile, read, stat } from "fs-extra-p"
import { appendFile, stat, writeFile } from "fs-extra-p"
import { safeDump } from "js-yaml"
import { Archive } from "./Archive"
import { SevenZArchiveEntry } from "./SevenZArchiveEntry"
import { SevenZFile } from "./SevenZFile"
import { ContentDefinedChunker } from "./ContentDefinedChunker"

const deflateRaw: any = BluebirdPromise.promisify(require("zlib").deflateRaw)

Expand All @@ -27,6 +27,9 @@ export async function createDifferentialPackage(archiveFile: string): Promise<Pa
sevenZFile.close()

const blockMapDataString = safeDump(blockMap)
if (process.env.DEBUG_BLOCKMAP) {
await writeFile(archiveFile + ".blockMap.yml", blockMapDataString)
}
const blockMapFileData = await deflateRaw(blockMapDataString, {level: 9})
await appendFile(archiveFile, blockMapFileData)
const packageFileInfo = await createPackageFileInfo(archiveFile)
Expand All @@ -49,25 +52,6 @@ export async function createPackageFileInfo(file: string): Promise<PackageFileIn
}
}

async function computeBlocks(fd: number, start: number, end: number): Promise<Array<string>> {
const chunkSize = 64 * 1024
const buffer = Buffer.allocUnsafe(chunkSize)
const blocks = []

for (let offset = start; offset < end; offset += chunkSize) {
const actualChunkSize = Math.min(end - offset, chunkSize)
await read(fd, buffer, 0, actualChunkSize, offset)

const hash = createHash("md5")
hash.update(actualChunkSize === chunkSize ? buffer : buffer.slice(0, actualChunkSize))
// node-base91 doesn't make a lot of sense - 29KB vs 30KB Because for base64 string value in the yml never escaped, but node-base91 often escaped (single quotes) and it adds extra 2 symbols.
// And in any case data stored as deflated in the package.
blocks.push(hash.digest("base64"))
}

return blocks
}

class BlockMapBuilder {
private currentFolderIndex = -1

Expand Down Expand Up @@ -129,19 +113,61 @@ export async function computeBlockMap(sevenZFile: SevenZFile): Promise<BlockMap>
}
}

const stats: Array<string> = []
const blocks = await BluebirdPromise.map(files, async entry => {
const blocks = await computeBlocks(sevenZFile.fd, entry.dataStart, entry.dataEnd)
const chunker = new ContentDefinedChunker()
const blocks = await chunker.computeChunks(sevenZFile.fd, entry.dataStart, entry.dataEnd, entry.name)

if (process.env.DEBUG_BLOCKMAP) {
stats.push(getStat(blocks.sizes, entry.name))
}

return {
name: entry.name.replace(/\\/g, "/"),
offset: entry.dataStart,
size: entry.dataEnd - entry.dataStart,
blocks,
...blocks,
}
}, {concurrency: 2})

if (process.env.DEBUG_BLOCKMAP) {
let duplicate = 0
let savedSize = 0
// noinspection JSMismatchedCollectionQueryUpdate
const checksums: Array<string> = []
// noinspection JSMismatchedCollectionQueryUpdate
const sizes: Array<number> = []
const index = new Map<string, number>()
for (const file of blocks) {
for (let i = 0; i < file.checksums.length; i++) {
const checksum = file.checksums[i]
const size = file.sizes[i]
if (index.has(checksum)) {
duplicate++
savedSize += size
}
else {
index.set(checksum, checksums.length)
checksums.push(checksum)
sizes.push(size)
}
}
}
}, {concurrency: 4})

console.log(stats.join("\n"))
console.log(`duplicates: ${duplicate}, saved: ${savedSize}`)
}

return {
blockSize: 64,
hashMethod: "md5",
compressionLevel: 9,
version: "2",
files: blocks,
}
}

function getStat(sizes: Array<number>, name: string) {
const sortedSizes = sizes.slice().sort((a, b) => a - b)
const middle = Math.floor(sortedSizes.length / 2)
const isEven = sortedSizes.length % 2 === 0
const median = isEven ? (sortedSizes[middle] + sortedSizes[middle - 1]) / 2 : sortedSizes[middle]
return `${sizes.length} chunks generated for ${name} (min: ${sortedSizes[0]}, max: ${sortedSizes[sortedSizes.length - 1]}, median: ${median})`
}
3 changes: 1 addition & 2 deletions packages/app-package-builder/src/main.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@ if (process.mainModule === module) {
require(a + "map-support").install()

async function main() {
// const file = new SevenZFile("/Users/develar/Documents/onshape-desktop-shell/dist/Onshape-0.5.13-x64.nsis.7z")
const file = "/Users/develar/Documents/onshape-desktop-shell/dist/Onshape-0.5.13-x64.nsis.7z"
const file = "/Volumes/test/electron-builder-test/dist/nsis-web/TestApp-1.0.1-x64.nsis.7z"
await createDifferentialPackage(file)
// const archive = await file.read()
// for (const entry of archive.files) {
Expand Down
16 changes: 7 additions & 9 deletions packages/builder-util-runtime/src/blockMapApi.ts
Original file line number Diff line number Diff line change
@@ -1,20 +1,18 @@
export const BLOCK_MAP_FILE_NAME = "_blockMap.yml"
export const SIGNATURE_HEADER_SIZE = 12 /* signature + 2 bytes version + 4 bytes CRC */ + 20

export interface BlockMap {
blockSize: number
hashMethod: "sha256" | "md5"

compressionLevel: 9 | 1
export interface FileChunks {
checksums: Array<string>
sizes: Array<number>
}

export interface BlockMap {
version: "1" | "2"
files: Array<BlockMapFile>
}

export interface BlockMapFile {
export interface BlockMapFile extends FileChunks {
name: string
offset: number
size: number

// size of block 64K, last block size `size % (64 * 1024)`
blocks: Array<string>
}
2 changes: 1 addition & 1 deletion packages/electron-builder/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@
"plist": "^2.1.0",
"sanitize-filename": "^1.6.1",
"semver": "^5.4.1",
"update-notifier": "^2.2.0",
"update-notifier": "^2.3.0",
"yargs": "^9.0.1",
"debug": "^3.1.0",
"asar-integrity": "0.0.0-semantic-release",
Expand Down
9 changes: 9 additions & 0 deletions packages/electron-builder/src/targets/nsis/nsis.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,15 @@ export class NsisTarget extends Target {
archiveOptions.solid = false
// our reader doesn't support compressed headers
archiveOptions.isArchiveHeaderCompressed = false
/*
* dict size 64 MB: Full: 33,744.88 KB, To download: 17,630.3 KB (52%)
* dict size 16 MB: Full: 33,936.84 KB, To download: 16,175.9 KB (48%)
* dict size 8 MB: Full: 34,187.59 KB, To download: 8,229.9 KB (24%)
* dict size 4 MB: Full: 34,628.73 KB, To download: 3,782.97 KB (11%)
as we can see, if file changed in one place, all block is invalidated (and update size approximately equals to dict size)
*/
archiveOptions.dictSize = 8
// do not allow to change compression level to avoid different packages
compression = "normal"
}
Expand Down
2 changes: 1 addition & 1 deletion packages/electron-publisher-s3/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
],
"dependencies": {
"fs-extra-p": "^4.4.3",
"aws-sdk": "^2.130.0",
"aws-sdk": "^2.131.0",
"mime": "^2.0.3",
"electron-publish": "~0.0.0-semantic-release",
"builder-util": "^0.0.0-semantic-release",
Expand Down
6 changes: 6 additions & 0 deletions packages/electron-updater/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# 2.11.0

### Features

* Differential updater: use [content defined chunking](https://github.com/electron-userland/electron-builder/releases/tag/v19.36.0)

# 2.10.2

### Bug Fixes
Expand Down
5 changes: 4 additions & 1 deletion packages/electron-updater/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,8 @@
"xelement": "^1.0.16",
"lodash.isequal": "^4.5.0"
},
"typings": "./out/main.d.ts"
"typings": "./out/main.d.ts",
"publishConfig": {
"tag": "next"
}
}
Loading

0 comments on commit 1dc2e49

Please sign in to comment.