Skip to content

Commit

Permalink
zstd: Inline nextFast call (#259)
Browse files Browse the repository at this point in the history
```
benchmark                                               old ns/op      new ns/op      delta
BenchmarkDecoder_DecoderSmall/kppkn.gtb.zst-32          4704626        4312341        -8.34%
BenchmarkDecoder_DecoderSmall/geo.protodata.zst-32      1068883        984396         -7.90%
BenchmarkDecoder_DecoderSmall/plrabn12.txt.zst-32       18021576       15813727       -12.25%
BenchmarkDecoder_DecoderSmall/lcet10.txt.zst-32         13398648       11872693       -11.39%
BenchmarkDecoder_DecoderSmall/asyoulik.txt.zst-32       3892600        3691969        -5.15%
BenchmarkDecoder_DecoderSmall/alice29.txt.zst-32        5005336        4613696        -7.82%
BenchmarkDecoder_DecoderSmall/html_x_4.zst-32           2099041        2012320        -4.13%
BenchmarkDecoder_DecoderSmall/paper-100k.pdf.zst-32     189971         179829         -5.34%
BenchmarkDecoder_DecoderSmall/fireworks.jpeg.zst-32     79873          80641          +0.96%
BenchmarkDecoder_DecoderSmall/urls.10K.zst-32           14376218       12387421       -13.83%
BenchmarkDecoder_DecoderSmall/html.zst-32               1209031        1131450        -6.42%
BenchmarkDecoder_DecoderSmall/comp-data.bin.zst-32      84458          79599          -5.75%
BenchmarkDecoder_DecodeAll/kppkn.gtb.zst-32             586411         534015         -8.94%
BenchmarkDecoder_DecodeAll/geo.protodata.zst-32         133502         123999         -7.12%
BenchmarkDecoder_DecodeAll/plrabn12.txt.zst-32          1866430        1718598        -7.92%
BenchmarkDecoder_DecodeAll/lcet10.txt.zst-32            1407331        1282011        -8.90%
BenchmarkDecoder_DecodeAll/asyoulik.txt.zst-32          484561         453559         -6.40%
BenchmarkDecoder_DecodeAll/alice29.txt.zst-32           623368         573715         -7.97%
BenchmarkDecoder_DecodeAll/html_x_4.zst-32              260694         249948         -4.12%
BenchmarkDecoder_DecodeAll/paper-100k.pdf.zst-32        23118          21642          -6.38%
BenchmarkDecoder_DecodeAll/fireworks.jpeg.zst-32        9556           9489           -0.70%
BenchmarkDecoder_DecodeAll/urls.10K.zst-32              1627793        1500977        -7.79%
BenchmarkDecoder_DecodeAll/html.zst-32                  149442         139560         -6.61%
BenchmarkDecoder_DecodeAll/comp-data.bin.zst-32         10589          10084          -4.77%
BenchmarkDecoderSilesia-32                              403091267      354700567      -12.00%
BenchmarkDecoderEnwik9-32                               2119570000     1782801500     -15.89%

benchmark                                               old MB/s     new MB/s     speedup
BenchmarkDecoder_DecoderSmall/kppkn.gtb.zst-32          313.43       341.94       1.09x
BenchmarkDecoder_DecoderSmall/geo.protodata.zst-32      887.57       963.74       1.09x
BenchmarkDecoder_DecoderSmall/plrabn12.txt.zst-32       213.90       243.77       1.14x
BenchmarkDecoder_DecoderSmall/lcet10.txt.zst-32         254.80       287.55       1.13x
BenchmarkDecoder_DecoderSmall/asyoulik.txt.zst-32       257.27       271.25       1.05x
BenchmarkDecoder_DecoderSmall/alice29.txt.zst-32        243.08       263.72       1.08x
BenchmarkDecoder_DecoderSmall/html_x_4.zst-32           1561.09      1628.37      1.04x
BenchmarkDecoder_DecoderSmall/paper-100k.pdf.zst-32     4312.23      4555.45      1.06x
BenchmarkDecoder_DecoderSmall/fireworks.jpeg.zst-32     12328.85     12211.44     0.99x
BenchmarkDecoder_DecoderSmall/urls.10K.zst-32           390.69       453.42       1.16x
BenchmarkDecoder_DecoderSmall/html.zst-32               677.57       724.03       1.07x
BenchmarkDecoder_DecoderSmall/comp-data.bin.zst-32      386.08       409.65       1.06x
BenchmarkDecoder_DecodeAll/kppkn.gtb.zst-32             314.32       345.16       1.10x
BenchmarkDecoder_DecodeAll/geo.protodata.zst-32         888.28       956.37       1.08x
BenchmarkDecoder_DecodeAll/plrabn12.txt.zst-32          258.17       280.38       1.09x
BenchmarkDecoder_DecodeAll/lcet10.txt.zst-32            303.24       332.88       1.10x
BenchmarkDecoder_DecodeAll/asyoulik.txt.zst-32          258.33       275.99       1.07x
BenchmarkDecoder_DecodeAll/alice29.txt.zst-32           243.98       265.09       1.09x
BenchmarkDecoder_DecodeAll/html_x_4.zst-32              1571.19      1638.74      1.04x
BenchmarkDecoder_DecodeAll/paper-100k.pdf.zst-32        4429.42      4731.47      1.07x
BenchmarkDecoder_DecodeAll/fireworks.jpeg.zst-32        12880.61     12972.79     1.01x
BenchmarkDecoder_DecodeAll/urls.10K.zst-32              431.31       467.75       1.08x
BenchmarkDecoder_DecodeAll/html.zst-32                  685.22       733.73       1.07x
BenchmarkDecoder_DecodeAll/comp-data.bin.zst-32         384.93       404.20       1.05x
BenchmarkDecoderSilesia-32                              525.81       597.54       1.14x
BenchmarkDecoderEnwik9-32                               471.79       560.91       1.19x

```
  • Loading branch information
klauspost authored May 22, 2020
1 parent 9e8715a commit b276b9a
Showing 1 changed file with 84 additions and 29 deletions.
113 changes: 84 additions & 29 deletions zstd/seqdec.go
Original file line number Diff line number Diff line change
Expand Up @@ -100,23 +100,78 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
printf("reading sequence %d, exceeded available data\n", seqs-i)
return io.ErrUnexpectedEOF
}
var litLen, matchOff, matchLen int
var ll, mo, ml int
if br.off > 4+((maxOffsetBits+16+16)>>3) {
litLen, matchOff, matchLen = s.nextFast(br, llState, mlState, ofState)
// inlined function:
// ll, mo, ml = s.nextFast(br, llState, mlState, ofState)

// Final will not read from stream.
var llB, mlB, moB uint8
ll, llB = llState.final()
ml, mlB = mlState.final()
mo, moB = ofState.final()

// extra bits are stored in reverse order.
br.fillFast()
mo += br.getBits(moB)
if s.maxBits > 32 {
br.fillFast()
}
ml += br.getBits(mlB)
ll += br.getBits(llB)

if moB > 1 {
s.prevOffset[2] = s.prevOffset[1]
s.prevOffset[1] = s.prevOffset[0]
s.prevOffset[0] = mo
} else {
// mo = s.adjustOffset(mo, ll, moB)
// Inlined for rather big speedup
if ll == 0 {
// There is an exception though, when current sequence's literals_length = 0.
// In this case, repeated offsets are shifted by one, so an offset_value of 1 means Repeated_Offset2,
// an offset_value of 2 means Repeated_Offset3, and an offset_value of 3 means Repeated_Offset1 - 1_byte.
mo++
}

if mo == 0 {
mo = s.prevOffset[0]
} else {
var temp int
if mo == 3 {
temp = s.prevOffset[0] - 1
} else {
temp = s.prevOffset[mo]
}

if temp == 0 {
// 0 is not valid; input is corrupted; force offset to 1
println("temp was 0")
temp = 1
}

if mo != 1 {
s.prevOffset[2] = s.prevOffset[1]
}
s.prevOffset[1] = s.prevOffset[0]
s.prevOffset[0] = temp
mo = temp
}
}
br.fillFast()
} else {
litLen, matchOff, matchLen = s.next(br, llState, mlState, ofState)
ll, mo, ml = s.next(br, llState, mlState, ofState)
br.fill()
}

if debugSequences {
println("Seq", seqs-i-1, "Litlen:", litLen, "matchOff:", matchOff, "(abs) matchLen:", matchLen)
println("Seq", seqs-i-1, "Litlen:", ll, "mo:", mo, "(abs) ml:", ml)
}

if litLen > len(s.literals) {
return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", litLen, len(s.literals))
if ll > len(s.literals) {
return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, len(s.literals))
}
size := litLen + matchLen + len(s.out)
size := ll + ml + len(s.out)
if size-startSize > maxBlockSize {
return fmt.Errorf("output (%d) bigger than max block size", size)
}
Expand All @@ -127,52 +182,52 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
s.out = append(s.out, make([]byte, maxBlockSize)...)
s.out = s.out[:len(s.out)-maxBlockSize]
}
if matchLen > maxMatchLen {
return fmt.Errorf("match len (%d) bigger than max allowed length", matchLen)
if ml > maxMatchLen {
return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
}
if matchOff > len(s.out)+len(hist)+litLen {
return fmt.Errorf("match offset (%d) bigger than current history (%d)", matchOff, len(s.out)+len(hist)+litLen)
if mo > len(s.out)+len(hist)+ll {
return fmt.Errorf("match offset (%d) bigger than current history (%d)", mo, len(s.out)+len(hist)+ll)
}
if matchOff > s.windowSize {
return fmt.Errorf("match offset (%d) bigger than window size (%d)", matchOff, s.windowSize)
if mo > s.windowSize {
return fmt.Errorf("match offset (%d) bigger than window size (%d)", mo, s.windowSize)
}
if matchOff == 0 && matchLen > 0 {
if mo == 0 && ml > 0 {
return fmt.Errorf("zero matchoff and matchlen > 0")
}

s.out = append(s.out, s.literals[:litLen]...)
s.literals = s.literals[litLen:]
s.out = append(s.out, s.literals[:ll]...)
s.literals = s.literals[ll:]
out := s.out

// Copy from history.
// TODO: Blocks without history could be made to ignore this completely.
if v := matchOff - len(s.out); v > 0 {
if v := mo - len(s.out); v > 0 {
// v is the start position in history from end.
start := len(s.hist) - v
if matchLen > v {
if ml > v {
// Some goes into current block.
// Copy remainder of history
out = append(out, s.hist[start:]...)
matchOff -= v
matchLen -= v
mo -= v
ml -= v
} else {
out = append(out, s.hist[start:start+matchLen]...)
matchLen = 0
out = append(out, s.hist[start:start+ml]...)
ml = 0
}
}
// We must be in current buffer now
if matchLen > 0 {
start := len(s.out) - matchOff
if matchLen <= len(s.out)-start {
if ml > 0 {
start := len(s.out) - mo
if ml <= len(s.out)-start {
// No overlap
out = append(out, s.out[start:start+matchLen]...)
out = append(out, s.out[start:start+ml]...)
} else {
// Overlapping copy
// Extend destination slice and copy one byte at the time.
out = out[:len(out)+matchLen]
src := out[start : start+matchLen]
out = out[:len(out)+ml]
src := out[start : start+ml]
// Destination is the space we just added.
dst := out[len(out)-matchLen:]
dst := out[len(out)-ml:]
dst = dst[:len(src)]
for i := range src {
dst[i] = src[i]
Expand Down

0 comments on commit b276b9a

Please sign in to comment.