From 59c615a5e9001224d1dd4049f92f046ca620498c Mon Sep 17 00:00:00 2001 From: Dustin Decker Date: Mon, 7 Oct 2024 15:55:07 -0500 Subject: [PATCH] Fix git binary handling and add a smoke test (#3379) * Fix git binary handling and add a smoke test * hide stdout * add failure case to smoke test * run again with deadlock fix * Add logic to drain reader in the event of an error * add tests * be picky * set author identity * suppress linter --------- Co-authored-by: Ahrav Dutta --- .github/workflows/smoke.yml | 37 +++++- pkg/handlers/default.go | 3 + pkg/handlers/handlers.go | 16 ++- pkg/handlers/handlers_test.go | 205 ++++++++++++++++++++++++++++++++++ pkg/sources/git/git.go | 33 +----- 5 files changed, 263 insertions(+), 31 deletions(-) diff --git a/.github/workflows/smoke.yml b/.github/workflows/smoke.yml index 68f6f91b0c05..a36b0f590c62 100644 --- a/.github/workflows/smoke.yml +++ b/.github/workflows/smoke.yml @@ -16,5 +16,38 @@ jobs: - name: Smoke run: | set -e - go run . git https://github.com/dustin-decker/secretsandstuff.git - go run . github --repo https://github.com/dustin-decker/secretsandstuff.git + go run . git https://github.com/dustin-decker/secretsandstuff.git > /dev/null + go run . github --repo https://github.com/dustin-decker/secretsandstuff.git > /dev/null + zombies: + runs-on: ubuntu-latest + timeout-minutes: 5 + steps: + - name: Install Go + uses: actions/setup-go@v5 + with: + go-version: "1.23" + - name: Checkout code + uses: actions/checkout@v4 + - name: Run trufflehog + run: | + set -e + go run . git --no-verification file://. > /dev/null + # This case previously had a deadlock issue and left zombies after trufflehog exited #3379 + go run . git --no-verification https://github.com/git-test-fixtures/binary.git > /dev/null + - name: Check for running git processes and zombies + run: | + if pgrep -x "git" > /dev/null + then + echo "Git processes are still running" + exit 1 + else + echo "No git processes found" + fi + + if ps -A -ostat,ppid | grep -e '[zZ]' > /dev/null + then + echo "Zombie processes found" + exit 1 + else + echo "No zombie processes found" + fi diff --git a/pkg/handlers/default.go b/pkg/handlers/default.go index 33a75acd4a4e..84e58f07b2c4 100644 --- a/pkg/handlers/default.go +++ b/pkg/handlers/default.go @@ -3,6 +3,7 @@ package handlers import ( "context" "errors" + "io" "time" "github.com/trufflesecurity/trufflehog/v3/pkg/common" @@ -77,6 +78,8 @@ func (h *defaultHandler) handleNonArchiveContent(ctx logContext.Context, reader if common.SkipFile(mimeExt) || common.IsBinary(mimeExt) { ctx.Logger().V(5).Info("skipping file", "ext", mimeExt) h.metrics.incFilesSkipped() + // Make sure we consume the reader to avoid potentially blocking indefinitely. + _, _ = io.Copy(io.Discard, reader) return nil } diff --git a/pkg/handlers/handlers.go b/pkg/handlers/handlers.go index dd3d43144c52..8f1f5e106bef 100644 --- a/pkg/handlers/handlers.go +++ b/pkg/handlers/handlers.go @@ -283,7 +283,21 @@ func HandleFile( } return fmt.Errorf("error creating custom reader: %w", err) } - defer rdr.Close() + defer func() { + // Ensure all data is read to prevent broken pipe. + _, copyErr := io.Copy(io.Discard, rdr) + if copyErr != nil { + err = fmt.Errorf("error discarding remaining data: %w", copyErr) + } + closeErr := rdr.Close() + if closeErr != nil { + if err != nil { + err = fmt.Errorf("%v; error closing reader: %w", err, closeErr) + } else { + err = fmt.Errorf("error closing reader: %w", closeErr) + } + } + }() ctx = logContext.WithValues(ctx, "mime", rdr.mime.String()) diff --git a/pkg/handlers/handlers_test.go b/pkg/handlers/handlers_test.go index e9e0e5f73b86..418a54db8517 100644 --- a/pkg/handlers/handlers_test.go +++ b/pkg/handlers/handlers_test.go @@ -1,10 +1,14 @@ package handlers import ( + "archive/zip" + "bytes" + "fmt" "io" "net/http" "os" "os/exec" + "path/filepath" "strings" "testing" "time" @@ -475,3 +479,204 @@ func TestHandleZipCommandStdoutPipe(t *testing.T) { assert.Equal(t, wantCount, count) } + +func TestHandleGitCatFile(t *testing.T) { + tests := []struct { + name string + fileName string + fileSize int + supportedType bool + expectedChunks int + }{ + { + name: "LargeBlob", + fileName: "largefile.bin", + fileSize: 50 * 1024 * 1024, // 50 MB + supportedType: true, + expectedChunks: 5120, + }, + { + name: "UnsupportedType", + fileName: "unsupported.so", + fileSize: 1024 * 1024, // 1 MB + supportedType: false, + expectedChunks: 0, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Set up a temporary git repository with the specified file. + var gitDir string + if tt.supportedType { + gitDir = setupTempGitRepo(t, tt.fileName, tt.fileSize) + } else { + gitDir = setupTempGitRepoWithUnsupportedFile(t, tt.fileName, tt.fileSize) + } + defer os.RemoveAll(gitDir) + + cmd := exec.Command("git", "-C", gitDir, "rev-parse", "HEAD") + hashBytes, err := cmd.Output() + assert.NoError(t, err, "Failed to get commit hash") + commitHash := strings.TrimSpace(string(hashBytes)) + + // Create a pipe to simulate the git cat-file stdout. + cmd = exec.Command("git", "-C", gitDir, "cat-file", "blob", fmt.Sprintf("%s:%s", commitHash, tt.fileName)) + + var stderr bytes.Buffer + cmd.Stderr = &stderr + + stdout, err := cmd.StdoutPipe() + assert.NoError(t, err, "Failed to create stdout pipe") + + err = cmd.Start() + assert.NoError(t, err, "Failed to start git cat-file command") + + ctx, cancel := context.WithTimeout(context.Background(), 20*time.Second) + defer cancel() + + chunkCh := make(chan *sources.Chunk, 1000) + + go func() { + defer close(chunkCh) + err := HandleFile(ctx, stdout, &sources.Chunk{}, sources.ChanReporter{Ch: chunkCh}, WithSkipArchives(false)) + assert.NoError(t, err, "HandleFile should not return an error") + }() + + err = cmd.Wait() + assert.NoError(t, err, "git cat-file command should complete without error") + + count := 0 + for range chunkCh { + count++ + } + + assert.Equal(t, tt.expectedChunks, count, "Number of chunks should match the expected value") + }) + } +} + +func setupTempGitRepoWithUnsupportedFile(t *testing.T, fileName string, fileSize int) string { + t.Helper() + return setupTempGitRepoCommon(t, fileName, fileSize, true) +} + +func setupTempGitRepo(t *testing.T, archiveName string, fileSize int) string { + t.Helper() + return setupTempGitRepoCommon(t, archiveName, fileSize, false) +} + +func setupTempGitRepoCommon(t *testing.T, fileName string, fileSize int, isUnsupported bool) string { + t.Helper() + + tempDir := t.TempDir() + + cmd := exec.Command("git", "init", tempDir) + var initStderr bytes.Buffer + cmd.Stderr = &initStderr + err := cmd.Run() + if err != nil { + t.Fatalf("Failed to initialize git repository: %v, stderr: %s", err, initStderr.String()) + } + + cmds := [][]string{ + {"git", "-C", tempDir, "config", "user.name", "Test User"}, + {"git", "-C", tempDir, "config", "user.email", "test@example.com"}, + } + + for _, cmdArgs := range cmds { + cmd := exec.Command(cmdArgs[0], cmdArgs[1:]...) //nolint:gosec + var cmdStderr bytes.Buffer + cmd.Stderr = &cmdStderr + err := cmd.Run() + if err != nil { + t.Fatalf("Failed to set git config: %v, stderr: %s", err, cmdStderr.String()) + } + } + + filePath := filepath.Join(tempDir, fileName) + + // Create the file with appropriate content. + f, err := os.Create(filePath) + if err != nil { + t.Fatalf("Failed to create file: %v", err) + } + defer f.Close() + + if isUnsupported { + // Write ELF header for unsupported file. + // https://refspecs.linuxfoundation.org/elf/gabi4+/ch4.eheader.html + elfHeader := []byte{ + 0x7f, 'E', 'L', 'F', // ELF magic number + 2, // 64-bit format + 1, // Little endian + 1, // Current version of ELF + 0, // Target OS ABI + 0, // ABI Version + 0, 0, 0, 0, 0, 0, 0, // 7 bytes of padding + 3, 0, // Relocatable file + 0x3e, 0, // AMD x86-64 architecture + 1, 0, 0, 0, // ELF version + 0, 0, 0, 0, 0, 0, 0, 0, // Entry point + 0, 0, 0, 0, 0, 0, 0, 0, // Program header offset + 0, 0, 0, 0, 0, 0, 0, 0, // Section header offset + } + _, err = f.Write(elfHeader) + if err != nil { + t.Fatalf("Failed to write ELF header: %v", err) + } + } else { + // Write ZIP content for supported file. + zipWriter := zip.NewWriter(f) + header := &zip.FileHeader{ + Name: "largefile.txt", + Method: zip.Store, // No compression + } + zipFileWriter, err := zipWriter.CreateHeader(header) + if err != nil { + t.Fatalf("Failed to create file in ZIP archive: %v", err) + } + + dataChunk := bytes.Repeat([]byte("A"), 1024) // 1KB chunk + totalWritten := 0 + for totalWritten < fileSize { + remaining := fileSize - totalWritten + if remaining < len(dataChunk) { + _, err = zipFileWriter.Write(dataChunk[:remaining]) + if err != nil { + t.Fatalf("Failed to write to inner file in ZIP archive: %v", err) + } + totalWritten += remaining + } else { + _, err = zipFileWriter.Write(dataChunk) + if err != nil { + t.Fatalf("Failed to write to inner file in ZIP archive: %v", err) + } + totalWritten += len(dataChunk) + } + } + + if err := zipWriter.Close(); err != nil { + t.Fatalf("Failed to close ZIP writer: %v", err) + } + } + + // Add and commit the file to Git. + cmd = exec.Command("git", "-C", tempDir, "add", fileName) + var addStderr bytes.Buffer + cmd.Stderr = &addStderr + err = cmd.Run() + if err != nil { + t.Fatalf("Failed to add file to git: %v, stderr: %s", err, addStderr.String()) + } + + cmd = exec.Command("git", "-C", tempDir, "commit", "-m", "Add file") + var commitStderr bytes.Buffer + cmd.Stderr = &commitStderr + err = cmd.Run() + if err != nil { + t.Fatalf("Failed to commit file to git: %v, stderr: %s", err, commitStderr.String()) + } + + return tempDir +} diff --git a/pkg/sources/git/git.go b/pkg/sources/git/git.go index 5843f49b83a2..d9992dadecfd 100644 --- a/pkg/sources/git/git.go +++ b/pkg/sources/git/git.go @@ -1241,44 +1241,21 @@ func (s *Git) handleBinary(ctx context.Context, gitDir string, reporter sources. } cmd := exec.Command("git", "-C", gitDir, "cat-file", "blob", commitHash.String()+":"+path) - stdout, catCmd, err := s.executeCatFileCmd(cmd) - if err != nil { - return err - } - // Wait must be called after closing the pipe (defer is a stack, so first defer is executed last) - defer func() { - _ = catCmd.Wait() - }() - defer stdout.Close() - - err = handlers.HandleFile(ctx, stdout, chunkSkel, reporter, handlers.WithSkipArchives(s.skipArchives)) - - // Always call Wait() to ensure the process is properly cleaned up - waitErr := cmd.Wait() - - // If there was an error in HandleFile, return that error - if err != nil { - return err - } - - // If Wait() resulted in an error, return that error - return waitErr -} - -func (s *Git) executeCatFileCmd(cmd *exec.Cmd) (io.ReadCloser, *exec.Cmd, error) { var stderr bytes.Buffer cmd.Stderr = &stderr stdout, err := cmd.StdoutPipe() if err != nil { - return nil, nil, fmt.Errorf("error running git cat-file: %w\n%s", err, stderr.Bytes()) + return fmt.Errorf("error running git cat-file: %w\n%s", err, stderr.Bytes()) } if err := cmd.Start(); err != nil { - return nil, nil, fmt.Errorf("error starting git cat-file: %w\n%s", err, stderr.Bytes()) + return fmt.Errorf("error starting git cat-file: %w\n%s", err, stderr.Bytes()) } - return stdout, cmd, nil + defer func() { _ = cmd.Wait() }() + + return handlers.HandleFile(ctx, stdout, chunkSkel, reporter, handlers.WithSkipArchives(s.skipArchives)) } func (s *Source) Enumerate(ctx context.Context, reporter sources.UnitReporter) error {