klauspost · klauspost · Feb 4, 2020 · Feb 1, 2020 · Feb 1, 2020 · Feb 3, 2020
diff --git a/flate/flate_test.go b/flate/flate_test.go
@@ -138,6 +138,31 @@ func TestRegressions(t *testing.T) {
 				}
 			})
 		}
+		t.Run(tt.Name+"stateless", func(t *testing.T) {
+			// Split into two and use history...
+			buf := new(bytes.Buffer)
+			err = StatelessDeflate(buf, data1[:len(data1)/2], false)
+			if err != nil {
+				t.Error(err)
+			}
+
+			// Use top half as dictionary...
+			dict := data1[:len(data1)/2]
+			err = StatelessDeflate(buf, data1[len(data1)/2:], true, dict...)
+			if err != nil {
+				t.Error(err)
+			}
+			t.Log(buf.Len())
+			fr1 := NewReader(buf)
+			data2, err := ioutil.ReadAll(fr1)
+			if err != nil {
+				t.Error(err)
+			}
+			if bytes.Compare(data1, data2) != 0 {
+				fmt.Printf("want:%x\ngot: %x\n", data1, data2)
+				t.Error("not equal")
+			}
+		})
 	}
 }
 

diff --git a/flate/huffman_bit_writer.go b/flate/huffman_bit_writer.go
@@ -177,6 +177,11 @@ func (w *huffmanBitWriter) flush() {
 		w.nbits = 0
 		return
 	}
+	if w.lastHeader > 0 {
+		// We owe an EOB
+		w.writeCode(w.literalEncoding.codes[endBlockMarker])
+		w.lastHeader = 0
+	}
 	n := w.nbytes
 	for w.nbits != 0 {
 		w.bytes[n] = byte(w.bits)
@@ -594,8 +599,8 @@ func (w *huffmanBitWriter) writeBlockDynamic(tokens *tokens, eof bool, input []b
 		tokens.AddEOB()
 	}
 
-	// We cannot reuse pure huffman table.
-	if w.lastHuffMan && w.lastHeader > 0 {
+	// We cannot reuse pure huffman table, and must mark as EOF.
+	if (w.lastHuffMan || eof) && w.lastHeader > 0 {
 		// We will not try to reuse.
 		w.writeCode(w.literalEncoding.codes[endBlockMarker])
 		w.lastHeader = 0

diff --git a/flate/stateless.go b/flate/stateless.go
@@ -8,6 +8,8 @@ import (
 
 const (
 	maxStatelessBlock = math.MaxInt16
+	// dictionary will be taken from maxStatelessBlock, so limit it.
+	maxStatelessDict = 8 << 10
 
 	slTableBits  = 13
 	slTableSize  = 1 << slTableBits
@@ -59,7 +61,7 @@ var bitWriterPool = sync.Pool{
 
 // StatelessDeflate allows to compress directly to a Writer without retaining state.
 // When returning everything will be flushed.
-func StatelessDeflate(out io.Writer, in []byte, eof bool) error {
+func StatelessDeflate(out io.Writer, in []byte, eof bool, dict ...byte) error {
 	var dst tokens
 	bw := bitWriterPool.Get().(*huffmanBitWriter)
 	bw.reset(out)
@@ -76,35 +78,53 @@ func StatelessDeflate(out io.Writer, in []byte, eof bool) error {
 		return bw.err
 	}
 
+	// Truncate dict
+	if len(dict) > maxStatelessDict {
+		dict = dict[len(dict)-maxStatelessDict:]
+	}
+
 	for len(in) > 0 {
 		todo := in
-		if len(todo) > maxStatelessBlock {
-			todo = todo[:maxStatelessBlock]
+		if len(todo) > maxStatelessBlock-len(dict) {
+			todo = todo[:maxStatelessBlock-len(dict)]
 		}
 		in = in[len(todo):]
+		uncompressed := todo
+		if len(dict) > 0 {
+			// combine dict and source
+			bufLen := len(todo) + len(dict)
+			combined := make([]byte, bufLen)
+			copy(combined, dict)
+			copy(combined[len(dict):], todo)
+			todo = combined
+		}
 		// Compress
-		statelessEnc(&dst, todo)
+		statelessEnc(&dst, todo, int16(len(dict)))
 		isEof := eof && len(in) == 0
 
 		if dst.n == 0 {
-			bw.writeStoredHeader(len(todo), isEof)
+			bw.writeStoredHeader(len(uncompressed), isEof)
 			if bw.err != nil {
 				return bw.err
 			}
-			bw.writeBytes(todo)
-		} else if int(dst.n) > len(todo)-len(todo)>>4 {
+			bw.writeBytes(uncompressed)
+		} else if int(dst.n) > len(uncompressed)-len(uncompressed)>>4 {
 			// If we removed less than 1/16th, huffman compress the block.
-			bw.writeBlockHuff(isEof, todo, false)
+			bw.writeBlockHuff(isEof, uncompressed, len(in) == 0)
 		} else {
-			bw.writeBlockDynamic(&dst, isEof, todo, false)
+			bw.writeBlockDynamic(&dst, isEof, uncompressed, len(in) == 0)
+		}
+		if len(in) > 0 {
+			// Retain a dict if we have more
+			dict = todo[len(todo)-maxStatelessDict:]
+			dst.Reset()
 		}
 		if bw.err != nil {
 			return bw.err
 		}
-		dst.Reset()
 	}
 	if !eof {
-		// Align.
+		// Align, only a stored block can do that.
 		bw.writeStoredHeader(0, false)
 	}
 	bw.flush()
@@ -130,7 +150,7 @@ func load6416(b []byte, i int16) uint64 {
 		uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
 }
 
-func statelessEnc(dst *tokens, src []byte) {
+func statelessEnc(dst *tokens, src []byte, startAt int16) {
 	const (
 		inputMargin            = 12 - 1
 		minNonLiteralBlockSize = 1 + 1 + inputMargin
@@ -144,15 +164,23 @@ func statelessEnc(dst *tokens, src []byte) {
 
 	// This check isn't in the Snappy implementation, but there, the caller
 	// instead of the callee handles this case.
-	if len(src) < minNonLiteralBlockSize {
+	if len(src)-int(startAt) < minNonLiteralBlockSize {
 		// We do not fill the token table.
 		// This will be picked up by caller.
-		dst.n = uint16(len(src))
+		dst.n = 0
 		return
 	}
+	// Index until startAt
+	if startAt > 0 {
+		cv := load3232(src, 0)
+		for i := int16(0); i < startAt; i++ {
+			table[hashSL(cv)] = tableEntry{offset: i}
+			cv = (cv >> 8) | (uint32(src[i+4]) << 24)
+		}
+	}
 
-	s := int16(1)
-	nextEmit := int16(0)
+	s := startAt + 1
+	nextEmit := startAt
 	// sLimit is when to stop looking for offset/length copies. The inputMargin
 	// lets us use a fast path for emitLiteral in the main loop, while we are
 	// looking for copies.