This repository has been archived by the owner on Sep 17, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 9
/
encode.go
146 lines (132 loc) · 4.18 KB
/
encode.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package snappy
import (
"io"
"unsafe"
)
// #include <snappy-c.h>
import "C"
// Encode returns the encoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire encoded block.
// Otherwise, a newly allocated slice will be returned.
// It is valid to pass a nil dst.
func Encode(dst, src []byte) ([]byte, error) {
if n := MaxEncodedLen(len(src)); len(dst) < n {
dst = make([]byte, n)
}
var srcPtr unsafe.Pointer
if len(src) != 0 {
srcPtr = unsafe.Pointer(&src[0])
}
dLen := C.size_t(len(dst))
status := C.snappy_compress((*C.char)(srcPtr), C.size_t(len(src)),
(*C.char)(unsafe.Pointer(&dst[0])), &dLen)
if status != C.SNAPPY_OK {
return nil, ErrCorrupt
}
return dst[:dLen], nil
}
// MaxEncodedLen returns the maximum length of a snappy block, given its
// uncompressed length.
func MaxEncodedLen(srcLen int) int {
// Compressed data can be defined as:
// compressed := item* literal*
// item := literal* copy
//
// The trailing literal sequence has a space blowup of at most 62/60
// since a literal of length 60 needs one tag byte + one extra byte
// for length information.
//
// Item blowup is trickier to measure. Suppose the "copy" op copies
// 4 bytes of data. Because of a special check in the encoding code,
// we produce a 4-byte copy only if the offset is < 65536. Therefore
// the copy op takes 3 bytes to encode, and this type of item leads
// to at most the 62/60 blowup for representing literals.
//
// Suppose the "copy" op copies 5 bytes of data. If the offset is big
// enough, it will take 5 bytes to encode the copy op. Therefore the
// worst case here is a one-byte literal followed by a five-byte copy.
// That is, 6 bytes of input turn into 7 bytes of "compressed" data.
//
// This last factor dominates the blowup, so the final estimate is:
return 32 + srcLen + srcLen/6
}
// NewWriter returns a new Writer that compresses to w, using the framing
// format described at
// https://code.google.com/p/snappy/source/browse/trunk/framing_format.txt
func NewWriter(w io.Writer) *Writer {
return &Writer{
w: w,
enc: make([]byte, MaxEncodedLen(maxUncompressedChunkLen)),
}
}
// Writer is an io.Writer than can write Snappy-compressed bytes.
type Writer struct {
w io.Writer
err error
enc []byte
buf [checksumSize + chunkHeaderSize]byte
wroteHeader bool
}
// Reset discards the writer's state and switches the Snappy writer to write to
// w. This permits reusing a Writer rather than allocating a new one.
func (w *Writer) Reset(writer io.Writer) {
w.w = writer
w.err = nil
w.wroteHeader = false
}
// Write satisfies the io.Writer interface.
func (w *Writer) Write(p []byte) (n int, errRet error) {
if w.err != nil {
return 0, w.err
}
if !w.wroteHeader {
copy(w.enc, magicChunk)
if _, err := w.w.Write(w.enc[:len(magicChunk)]); err != nil {
w.err = err
return n, err
}
w.wroteHeader = true
}
for len(p) > 0 {
var uncompressed []byte
if len(p) > maxUncompressedChunkLen {
uncompressed, p = p[:maxUncompressedChunkLen], p[maxUncompressedChunkLen:]
} else {
uncompressed, p = p, nil
}
checksum := crc(uncompressed)
// Compress the buffer, discarding the result if the improvement
// isn't at least 12.5%.
chunkType := uint8(chunkTypeCompressedData)
chunkBody, err := Encode(w.enc, uncompressed)
if err != nil {
w.err = err
return n, err
}
if len(chunkBody) >= len(uncompressed)-len(uncompressed)/8 {
chunkType, chunkBody = chunkTypeUncompressedData, uncompressed
}
chunkLen := 4 + len(chunkBody)
w.buf[0] = chunkType
w.buf[1] = uint8(chunkLen >> 0)
w.buf[2] = uint8(chunkLen >> 8)
w.buf[3] = uint8(chunkLen >> 16)
w.buf[4] = uint8(checksum >> 0)
w.buf[5] = uint8(checksum >> 8)
w.buf[6] = uint8(checksum >> 16)
w.buf[7] = uint8(checksum >> 24)
if _, err = w.w.Write(w.buf[:]); err != nil {
w.err = err
return n, err
}
if _, err = w.w.Write(chunkBody); err != nil {
w.err = err
return n, err
}
n += len(uncompressed)
}
return n, nil
}