From fa3d63f9a937610788a0e8f35943c7dc855101d6 Mon Sep 17 00:00:00 2001
From: Richard Gomez <rmgomez368@gmail.com>
Date: Mon, 7 Oct 2024 19:44:33 -0400
Subject: [PATCH 1/2] chore: log skipped files on debug level

---
 pkg/engine/engine.go         | 2 +-
 pkg/handlers/archive.go      | 8 ++++----
 pkg/handlers/default.go      | 2 +-
 pkg/sources/docker/docker.go | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/pkg/engine/engine.go b/pkg/engine/engine.go
index 454330d52d52..4e0ee318df07 100644
--- a/pkg/engine/engine.go
+++ b/pkg/engine/engine.go
@@ -773,7 +773,7 @@ func (e *Engine) scannerWorker(ctx context.Context) {
 			decodeLatency.WithLabelValues(decoder.Type().String(), chunk.SourceName).Observe(float64(decodeTime))
 
 			if decoded == nil {
-				ctx.Logger().V(4).Info("decoder not applicable for chunk", "decoder", decoder.Type().String(), "chunk", chunk)
+				ctx.Logger().V(5).Info("decoder not applicable for chunk", "decoder", decoder.Type().String(), "chunk", chunk)
 				continue
 			}
 
diff --git a/pkg/handlers/archive.go b/pkg/handlers/archive.go
index d33329f76e89..f5777fa52b6c 100644
--- a/pkg/handlers/archive.go
+++ b/pkg/handlers/archive.go
@@ -154,10 +154,10 @@ func (h *archiveHandler) extractorHandler(archiveChan chan []byte) func(context.
 			"filename", file.Name(),
 			"size", file.Size(),
 		)
-		lCtx.Logger().V(5).Info("Handling extracted file.")
+		lCtx.Logger().V(3).Info("Handling extracted file.")
 
 		if file.IsDir() || file.LinkTarget != "" {
-			lCtx.Logger().V(5).Info("skipping directory or symlink")
+			lCtx.Logger().V(3).Info("skipping directory or symlink")
 			return nil
 		}
 
@@ -172,13 +172,13 @@ func (h *archiveHandler) extractorHandler(archiveChan chan []byte) func(context.
 
 		fileSize := file.Size()
 		if int(fileSize) > maxSize {
-			lCtx.Logger().V(3).Info("skipping file due to size", "size", fileSize)
+			lCtx.Logger().V(2).Info("skipping file: size exceeds max allowed", "size", fileSize, "limit", maxSize)
 			h.metrics.incFilesSkipped()
 			return nil
 		}
 
 		if common.SkipFile(file.Name()) || common.IsBinary(file.Name()) {
-			lCtx.Logger().V(5).Info("skipping file")
+			lCtx.Logger().V(2).Info("skipping file: extension is ignored")
 			h.metrics.incFilesSkipped()
 			return nil
 		}
diff --git a/pkg/handlers/default.go b/pkg/handlers/default.go
index 84e58f07b2c4..31ebbf8b4162 100644
--- a/pkg/handlers/default.go
+++ b/pkg/handlers/default.go
@@ -76,7 +76,7 @@ func (h *defaultHandler) handleNonArchiveContent(ctx logContext.Context, reader
 	mimeExt := reader.mimeExt
 
 	if common.SkipFile(mimeExt) || common.IsBinary(mimeExt) {
-		ctx.Logger().V(5).Info("skipping file", "ext", mimeExt)
+		ctx.Logger().V(2).Info("skipping file: extension is ignored", "ext", mimeExt)
 		h.metrics.incFilesSkipped()
 		// Make sure we consume the reader to avoid potentially blocking indefinitely.
 		_, _ = io.Copy(io.Discard, reader)
diff --git a/pkg/sources/docker/docker.go b/pkg/sources/docker/docker.go
index 88cc971a8e57..473131e0a078 100644
--- a/pkg/sources/docker/docker.go
+++ b/pkg/sources/docker/docker.go
@@ -345,7 +345,7 @@ type chunkProcessingInfo struct {
 func (s *Source) processChunk(ctx context.Context, info chunkProcessingInfo, chunksChan chan *sources.Chunk) error {
 	const filesizeLimitBytes int64 = 50 * 1024 * 1024 // 50MB
 	if info.size > filesizeLimitBytes {
-		ctx.Logger().V(4).Info("skipping large file", "file", info.name, "size", info.size)
+		ctx.Logger().V(2).Info("skipping file: size exceeds max allowed", "file", info.name, "size", info.size, "limit", filesizeLimitBytes)
 		return nil
 	}
 

From a904023cc026d03806342eb726bf77c597c914ec Mon Sep 17 00:00:00 2001
From: Richard Gomez <rmgomez368@gmail.com>
Date: Mon, 7 Oct 2024 19:51:23 -0400
Subject: [PATCH 2/2] chore(github): remove duplicate 'scanning repo' message

The exact same thing is logged in git.ScanCommits.
---
 pkg/sources/github/github.go | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/pkg/sources/github/github.go b/pkg/sources/github/github.go
index 043df01c52bf..e3e492bba359 100644
--- a/pkg/sources/github/github.go
+++ b/pkg/sources/github/github.go
@@ -13,7 +13,6 @@ import (
 	"sync/atomic"
 	"time"
 
-	"github.com/go-logr/logr"
 	"github.com/gobwas/glob"
 	"github.com/google/go-github/v63/github"
 	"golang.org/x/exp/rand"
@@ -726,15 +725,6 @@ func (s *Source) cloneAndScanRepo(ctx context.Context, repoURL string, repoInfo
 	// TODO: Can this be set once or does it need to be set on every iteration? Is |s.scanOptions| set every clone?
 	s.setScanOptions(s.conn.Base, s.conn.Head)
 
-	// Repo size is not collected for wikis.
-	var logger logr.Logger
-	if !strings.HasSuffix(repoURL, ".wiki.git") && repoInfo.size > 0 {
-		logger = ctx.Logger().WithValues("repo_size_kb", repoInfo.size)
-	} else {
-		logger = ctx.Logger()
-	}
-	logger.V(2).Info("scanning repo")
-
 	start := time.Now()
 	if err = s.git.ScanRepo(ctx, repo, path, s.scanOptions, reporter); err != nil {
 		return duration, fmt.Errorf("error scanning repo %s: %w", repoURL, err)