From cf1d1e4f0030c28caaca9a81776c7c69f695d04e Mon Sep 17 00:00:00 2001 From: Chris North Date: Wed, 21 Feb 2024 11:01:55 -0800 Subject: [PATCH] fix: change text detect to check first and last 512 bytes (#2310) ## Description Alters text detection logic to read the first and last 512 bytes. Tested with 5 files: - [NVIDIA installer](https://us.download.nvidia.com/XFree86/Linux-x86_64/535.154.05/NVIDIA-Linux-x86_64-535.154.05.run) Detected as application type when reading last 512. - 3 4k size files of junk text with a ZARF_CONST replacement, in straight text, yaml, and json All 3 detected as text/plain, ZARF_CONST was replaced. - 1 small 100 byte file with a ZARF_CONST replacement. Was still detected as text and ZARF_CONST was replaced. Existing unit tests succeeded. ## Related Issue Fixes #2308 Relates to # ## Type of change - [x] Bug fix (non-breaking change which fixes an issue) - [ ] New feature (non-breaking change which adds functionality) - [ ] Other (security config, docs update, etc) ## Checklist before merging - [x] Test, docs, adr added or updated as needed - [x] [Contributor Guide Steps](https://github.com/defenseunicorns/zarf/blob/main/CONTRIBUTING.md#developer-workflow) followed --------- Co-authored-by: Wayne Starr --- src/pkg/utils/io.go | 41 ++++++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/src/pkg/utils/io.go b/src/pkg/utils/io.go index bb6ae79db2..f69599802e 100755 --- a/src/pkg/utils/io.go +++ b/src/pkg/utils/io.go @@ -475,22 +475,41 @@ func IsTextFile(path string) (bool, error) { } defer f.Close() // Make sure to close the file when we're done - // Read the first 512 bytes of the file - data := make([]byte, 512) - n, err := f.Read(data) - if err != nil && err != io.EOF { + // Get file stat + stat, err := f.Stat() + if err != nil { return false, err } - // Use http.DetectContentType to determine the MIME type of the file - mimeType := http.DetectContentType(data[:n]) + // Clip offset to minimum of 0 + lastOffset := max(0, stat.Size()-512) + + // Take two passes checking front and back of the file + offsetPasses := []int64{0, lastOffset} + isTextCheck := []bool{false, false} + for idx, offset := range offsetPasses { + // Create 512 byte buffer + data := make([]byte, 512) + + n, err := f.ReadAt(data, offset) + if err != nil && err != io.EOF { + return false, err + } - // Check if the MIME type indicates that the file is text - hasText := strings.HasPrefix(mimeType, "text/") - hasJSON := strings.Contains(mimeType, "json") - hasXML := strings.Contains(mimeType, "xml") + // Use http.DetectContentType to determine the MIME type of the file + mimeType := http.DetectContentType(data[:n]) + + // Check if the MIME type indicates that the file is text + hasText := strings.HasPrefix(mimeType, "text/") + hasJSON := strings.Contains(mimeType, "json") + hasXML := strings.Contains(mimeType, "xml") + + // Save result + isTextCheck[idx] = hasText || hasJSON || hasXML + } - return hasText || hasJSON || hasXML, nil + // Returns true if both front and back show they are text + return isTextCheck[0] && isTextCheck[1], nil } // IsTrashBin checks if the given directory path corresponds to an operating system's trash bin.