Skip to content

Commit

Permalink
refactor(read): make header validation more flexible
Browse files Browse the repository at this point in the history
  • Loading branch information
lzambarda committed Mar 15, 2022
1 parent 7dfab7e commit a962e84
Show file tree
Hide file tree
Showing 2 changed files with 62 additions and 2 deletions.
10 changes: 8 additions & 2 deletions read.go
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ import (
"io"
"io/ioutil"
"os"
"regexp"
"sort"
"strconv"
)
Expand Down Expand Up @@ -122,14 +123,19 @@ func NewReader(f io.ReaderAt, size int64) (*Reader, error) {
return NewReaderEncrypted(f, size, nil)
}

// headerRegexp is used to check the validity of the header line of a PDF.
// This should be able to support extra spaces between the version and the
// newline (as inserted by libtiff/tiff2pdf) as well as supporting CRLF and LF.
var headerRegexp = regexp.MustCompile(`^%PDF-1\.[0-7]\s*\r?\n`)

// NewReaderEncrypted opens a file for reading, using the data in f with the given total size.
// If the PDF is encrypted, NewReaderEncrypted calls pw repeatedly to obtain passwords
// to try. If pw returns the empty string, NewReaderEncrypted stops trying to decrypt
// the file and returns an error.
func NewReaderEncrypted(f io.ReaderAt, size int64, pw func() string) (*Reader, error) {
buf := make([]byte, 10)
buf := make([]byte, 11)
f.ReadAt(buf, 0)
if !bytes.HasPrefix(buf, []byte("%PDF-1.")) || buf[7] < '0' || buf[7] > '7' || buf[8] != '\r' || buf[8] != '\n' {
if !headerRegexp.Match(buf) {
return nil, fmt.Errorf("not a PDF file: invalid header")
}
end := size
Expand Down
54 changes: 54 additions & 0 deletions read_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
package pdf

import (
"testing"
)

func TestRead(t *testing.T) {
t.Run("HeaderValidation", testHeaderValidation)
}

func testHeaderValidation(t *testing.T) {
tscs := map[string]struct {
input []byte
expectedValid bool
}{
"nil": {
input: nil,
expectedValid: false,
},
"empty": {
input: []byte{},
expectedValid: false,
},
"missing LF": {
input: []byte{37, 80, 68, 70, 45, 49, 46, 55},
expectedValid: false,
},
"ok LF": {
input: []byte{37, 80, 68, 70, 45, 49, 46, 55, 10},
expectedValid: true,
},
"invalid version 1.8": {
input: []byte{37, 80, 68, 70, 45, 49, 46, 58, 10},
expectedValid: false,
},
"ok CRLF": {
input: []byte{37, 80, 68, 70, 45, 49, 46, 55, 13, 10},
expectedValid: true,
},
"ok space + CRLF": {
input: []byte{37, 80, 68, 70, 45, 49, 46, 55, 32, 13, 10},
expectedValid: true,
},
}
for name, data := range tscs {
data := data
t.Run(name, func(t *testing.T) {
gotValid := headerRegexp.Match(data.input)
if gotValid != data.expectedValid {
t.Errorf("expected %t, got %t", data.expectedValid, gotValid)
}
})
}
}

0 comments on commit a962e84

Please sign in to comment.