From abb03b2b935c8e9cd12a7dd9c35924d1da98706b Mon Sep 17 00:00:00 2001 From: Johan Walles Date: Mon, 6 Jan 2025 00:20:35 +0100 Subject: [PATCH] Detect compressed files by magic numbers Rather than by file extension. This makes us handle misnamed files correctly. Also fix a bug in how we display the filename in the UI. Before this change, we just displayed the plain file name even if we uncompressed it. With this change in place, we now display the filename without any compression extension. --- m/reader.go | 2 +- m/zopen.go | 71 ++++++++++++++++++++++++++++++++++++++--------------- 2 files changed, 52 insertions(+), 21 deletions(-) diff --git a/m/reader.go b/m/reader.go index 227ec71..19fedd1 100644 --- a/m/reader.go +++ b/m/reader.go @@ -535,7 +535,7 @@ func NewReaderFromFilename(filename string, formatter chroma.Formatter, options options.Lexer = lexers.Match(highlightingFilename) } - returnMe := newReaderFromStream(stream, &filename, formatter, options) + returnMe := newReaderFromStream(stream, &highlightingFilename, formatter, options) if options.Lexer == nil { returnMe.highlightingDone.Store(true) diff --git a/m/zopen.go b/m/zopen.go index 91ded1f..aec44b3 100644 --- a/m/zopen.go +++ b/m/zopen.go @@ -14,6 +14,11 @@ import ( "github.com/ulikunitz/xz" ) +var gzipMagic = []byte{0x1f, 0x8b} +var bzip2Magic = []byte{0x42, 0x5a, 0x68} +var zstdMagic = []byte{0x28, 0xb5, 0x2f, 0xfd} +var xzMagic = []byte{0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00} + // The second return value is the file name with any compression extension removed. func ZOpen(filename string) (io.ReadCloser, string, error) { file, err := os.Open(filename) @@ -21,30 +26,56 @@ func ZOpen(filename string) (io.ReadCloser, string, error) { return nil, "", err } + // Read the first 6 bytes to determine the compression type + firstBytes := make([]byte, 6) + _, err = file.Read(firstBytes) + if err != nil { + if err == io.EOF { + // File was empty + return file, filename, nil + } + return nil, "", fmt.Errorf("failed to read file: %w", err) + } + + // Reset file reader to start of file + _, err = file.Seek(0, 0) + if err != nil { + return nil, "", fmt.Errorf("failed to seek to start of file: %w", err) + } + switch { - case strings.HasSuffix(filename, ".gz"): + case bytes.HasPrefix(firstBytes, gzipMagic): reader, err := gzip.NewReader(file) - return reader, strings.TrimSuffix(filename, ".gz"), err + if err != nil { + return nil, "", err + } - // Ref: https://github.com/walles/moar/issues/194 - case strings.HasSuffix(filename, ".tgz"): - reader, err := gzip.NewReader(file) - return reader, strings.TrimSuffix(filename, ".tgz"), err + newName := strings.TrimSuffix(filename, ".gz") - case strings.HasSuffix(filename, ".bz2"): + // Ref: https://github.com/walles/moar/issues/194 + if strings.HasSuffix(newName, ".tgz") { + newName = strings.TrimSuffix(newName, ".tgz") + ".tar" + } + + return reader, newName, err + + case bytes.HasPrefix(firstBytes, bzip2Magic): return struct { io.Reader io.Closer }{bzip2.NewReader(file), file}, strings.TrimSuffix(filename, ".bz2"), nil - case strings.HasSuffix(filename, ".zst") || strings.HasSuffix(filename, ".zstd"): + case bytes.HasPrefix(firstBytes, zstdMagic): decoder, err := zstd.NewReader(file) if err != nil { return nil, "", err } - return decoder.IOReadCloser(), strings.TrimSuffix(filename, ".zst"), nil - case strings.HasSuffix(filename, ".xz"): + newName := strings.TrimSuffix(filename, ".zst") + newName = strings.TrimSuffix(newName, ".zstd") + return decoder.IOReadCloser(), newName, nil + + case bytes.HasPrefix(firstBytes, xzMagic): xzReader, err := xz.NewReader(file) if err != nil { return nil, "", err @@ -66,30 +97,30 @@ func ZOpen(filename string) (io.ReadCloser, string, error) { // Ref: https://github.com/walles/moar/issues/261 func ZReader(input io.Reader) (io.Reader, error) { // Read the first 6 bytes to determine the compression type - buffer := make([]byte, 6) - _, err := input.Read(buffer) + firstBytes := make([]byte, 6) + _, err := input.Read(firstBytes) if err != nil { if err == io.EOF { - // Return a reader for the short input - return bytes.NewReader(buffer), nil + // Stream was empty + return input, nil } - return nil, fmt.Errorf("failed to read input: %w", err) + return nil, fmt.Errorf("failed to read stream: %w", err) } // Reset input reader to start of stream - input = io.MultiReader(bytes.NewReader(buffer), input) + input = io.MultiReader(bytes.NewReader(firstBytes), input) switch { - case bytes.HasPrefix(buffer, []byte{0x1f, 0x8b}): // Gzip magic numbers + case bytes.HasPrefix(firstBytes, gzipMagic): log.Info("Input stream is gzip compressed") return gzip.NewReader(input) - case bytes.HasPrefix(buffer, []byte{0x28, 0xb5, 0x2f, 0xfd}): // Zstd magic numbers + case bytes.HasPrefix(firstBytes, zstdMagic): log.Info("Input stream is zstd compressed") return zstd.NewReader(input) - case bytes.HasPrefix(buffer, []byte{0x42, 0x5a, 0x68}): // Bzip2 magic numbers + case bytes.HasPrefix(firstBytes, bzip2Magic): log.Info("Input stream is bzip2 compressed") return bzip2.NewReader(input), nil - case bytes.HasPrefix(buffer, []byte{0xfd, 0x37, 0x7a, 0x58, 0x5a, 0x00}): // XZ magic numbers + case bytes.HasPrefix(firstBytes, xzMagic): log.Info("Input stream is xz compressed") return xz.NewReader(input) default: