From 0d3b52dd4e6afe136030a2ef8ebfec8779a4f8aa Mon Sep 17 00:00:00 2001 From: jt-dd Date: Mon, 17 Jun 2024 19:18:10 +0200 Subject: [PATCH 01/14] setting Large Cluster opti as default --- pkg/config/builder.go | 2 ++ pkg/config/config.go | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/pkg/config/builder.go b/pkg/config/builder.go index 8e5ae8018..dc82d1946 100644 --- a/pkg/config/builder.go +++ b/pkg/config/builder.go @@ -11,6 +11,8 @@ const ( DefaultVertexBatchSizeSmall = DefaultVertexBatchSize / 5 DefaultStopOnError = false + + DefaultLargeClusterOptimizations = true ) // VertexBuilderConfig configures vertex builder parameters. diff --git a/pkg/config/config.go b/pkg/config/config.go index 8921e794a..b5a5ed7c1 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -122,6 +122,7 @@ func SetDefaultValues(c *viper.Viper) { c.SetDefault("builder.edge.batch_size_small", DefaultEdgeBatchSizeSmall) c.SetDefault("builder.edge.batch_size_cluster_impact", DefaultEdgeBatchSizeClusterImpact) c.SetDefault("builder.stop_on_error", DefaultStopOnError) + c.SetDefault("builder.edge.large_cluster_optimizations", DefaultLargeClusterOptimizations) c.SetDefault(IngestorAPIEndpoint, DefaultIngestorAPIEndpoint) c.SetDefault(IngestorAPIInsecure, DefaultIngestorAPIInsecure) @@ -189,8 +190,12 @@ func NewConfig(v *viper.Viper, configPath string) (*KubehoundConfig, error) { // NewConfig creates a new config instance from the provided file using viper. func NewInlineConfig(v *viper.Viper) (*KubehoundConfig, error) { + // Load default embedded config file + SetDefaultValues(v) + // Configure environment variable override SetEnvOverrides(v) + kc, err := unmarshalConfig(v) if err != nil { return nil, fmt.Errorf("unmarshaling config data: %w", err) From f3707edf4ca8f059e9f285453961fe9971c8e9a6 Mon Sep 17 00:00:00 2001 From: jt-dd Date: Mon, 17 Jun 2024 19:18:43 +0200 Subject: [PATCH 02/14] bumping default size for max archive --- pkg/config/ingestor.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/config/ingestor.go b/pkg/config/ingestor.go index a4a9c6bfb..b39f4216e 100644 --- a/pkg/config/ingestor.go +++ b/pkg/config/ingestor.go @@ -6,7 +6,7 @@ const ( DefaultBucketName = "" // we want to let it empty because we can easily abort if it's not configured DefaultTempDir = "/tmp/kubehound" DefaultArchiveName = "archive.tar.gz" - DefaultMaxArchiveSize = int64(1 << 30) // 1GB + DefaultMaxArchiveSize = int64(2 << 30) // 1GB IngestorAPIEndpoint = "ingestor.api.endpoint" IngestorAPIInsecure = "ingestor.api.insecure" From 332ce2d311f10ead9713e403173a4dc34b748000 Mon Sep 17 00:00:00 2001 From: jt-dd Date: Mon, 17 Jun 2024 19:29:52 +0200 Subject: [PATCH 03/14] adding check mod for ExtractTagGz --- pkg/ingestor/puller/blob/blob.go | 2 +- pkg/ingestor/puller/puller.go | 17 +++++++++++++++-- pkg/ingestor/puller/puller_test.go | 2 +- test/system/setup_test.go | 2 +- 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/pkg/ingestor/puller/blob/blob.go b/pkg/ingestor/puller/blob/blob.go index 8f6397a66..b42519891 100644 --- a/pkg/ingestor/puller/blob/blob.go +++ b/pkg/ingestor/puller/blob/blob.go @@ -194,7 +194,7 @@ func (bs *BlobStore) Extract(ctx context.Context, archivePath string) error { return fmt.Errorf("Dangerous file path used during extraction, aborting: %w", err) } - err = puller.ExtractTarGz(archivePath, basePath, bs.cfg.Ingestor.MaxArchiveSize) + err = puller.ExtractTarGz(false, archivePath, basePath, bs.cfg.Ingestor.MaxArchiveSize) if err != nil { return err } diff --git a/pkg/ingestor/puller/puller.go b/pkg/ingestor/puller/puller.go index 3f020b933..8df5b955c 100644 --- a/pkg/ingestor/puller/puller.go +++ b/pkg/ingestor/puller/puller.go @@ -44,25 +44,29 @@ func sanitizeExtractPath(filePath string, destination string) error { return nil } -func ExtractTarGz(archivePath string, basePath string, maxArchiveSize int64) error { +func ExtractTarGz(checkOnly bool, archivePath string, basePath string, maxArchiveSize int64) error { gzipFileReader, err := os.Open(archivePath) if err != nil { return err } + defer gzipFileReader.Close() uncompressedStream, err := gzip.NewReader(gzipFileReader) if err != nil { return err } + defer uncompressedStream.Close() + tarReader := tar.NewReader(io.LimitReader(uncompressedStream, maxArchiveSize)) for { header, err := tarReader.Next() - if errors.Is(err, io.EOF) { + if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) { break } if err != nil { return err } + err = sanitizeExtractPath(basePath, header.Name) if err != nil { return err @@ -72,11 +76,17 @@ func ExtractTarGz(archivePath string, basePath string, maxArchiveSize int64) err switch header.Typeflag { // Handle sub folder containing namespaces case tar.TypeDir: + if checkOnly { + continue + } err := mkdirIfNotExists(cleanPath) if err != nil { return err } case tar.TypeReg: + if checkOnly { + continue + } err := mkdirIfNotExists(filepath.Dir(cleanPath)) if err != nil { return err @@ -89,6 +99,9 @@ func ExtractTarGz(archivePath string, basePath string, maxArchiveSize int64) err // We don't really have an upper limit of archive size and adding a limited writer is not trivial without importing // a third party library (like our internal secure lib) _, err = io.Copy(outFile, tarReader) //nolint:gosec + if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) { + return fmt.Errorf("archive size exceeds the limit: %d", maxArchiveSize) + } if err != nil { return fmt.Errorf("copying file %s: %w", cleanPath, err) } diff --git a/pkg/ingestor/puller/puller_test.go b/pkg/ingestor/puller/puller_test.go index 8f0ebd791..93037f411 100644 --- a/pkg/ingestor/puller/puller_test.go +++ b/pkg/ingestor/puller/puller_test.go @@ -130,7 +130,7 @@ func TestExtractTarGz(t *testing.T) { if err != nil { t.Error(err) } - if err := ExtractTarGz("./testdata/archive.tar.gz", tmpPath, tt.args.maxArchiveSize); (err != nil) != tt.wantErr { + if err := ExtractTarGz(false, "./testdata/archive.tar.gz", tmpPath, tt.args.maxArchiveSize); (err != nil) != tt.wantErr { t.Errorf("ExtractTarGz() error = %v, wantErr %v", err, tt.wantErr) } for _, file := range tt.expectedFiles { diff --git a/test/system/setup_test.go b/test/system/setup_test.go index e3badd666..c3a27560a 100644 --- a/test/system/setup_test.go +++ b/test/system/setup_test.go @@ -125,7 +125,7 @@ func RunLocal(ctx context.Context, runArgs *runArgs, compress bool, p *providers runID := runArgs.runID if compress { - err := puller.ExtractTarGz(runArgs.resultPath, collectorDir, config.DefaultMaxArchiveSize) + err := puller.ExtractTarGz(false, runArgs.resultPath, collectorDir, config.DefaultMaxArchiveSize) if err != nil { log.I.Fatalf(err.Error()) } From a016b9dfc2ee904b5d5849c5ee96110794e57203 Mon Sep 17 00:00:00 2001 From: jt-dd Date: Mon, 17 Jun 2024 19:31:22 +0200 Subject: [PATCH 04/14] Adding IsTarGz to detect targz files --- pkg/ingestor/puller/puller.go | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/pkg/ingestor/puller/puller.go b/pkg/ingestor/puller/puller.go index 8df5b955c..5f7f7cd37 100644 --- a/pkg/ingestor/puller/puller.go +++ b/pkg/ingestor/puller/puller.go @@ -44,6 +44,27 @@ func sanitizeExtractPath(filePath string, destination string) error { return nil } +func IsTarGz(filePath string, maxArchiveSize int64) (bool, error) { + fileInfo, err := os.Stat(filePath) + if err != nil { + return false, fmt.Errorf("stat %s: %w", filePath, err) + } + + switch mod := fileInfo.Mode(); { + case mod.IsDir(): + return false, nil + case mod.IsRegular(): + err = ExtractTarGz(true, filePath, "/tmp", maxArchiveSize) + if err != nil { + return false, err + } + + return true, nil + } + + return false, fmt.Errorf("file type not supported") +} + func ExtractTarGz(checkOnly bool, archivePath string, basePath string, maxArchiveSize int64) error { gzipFileReader, err := os.Open(archivePath) if err != nil { From 5aabf29f294ede5c5f3be594643012f391aad563 Mon Sep 17 00:00:00 2001 From: jt-dd Date: Mon, 17 Jun 2024 19:32:05 +0200 Subject: [PATCH 05/14] adding new local ingest method --- cmd/kubehound/grpc_client.go | 40 ------------- cmd/kubehound/ingest.go | 75 +++++++++++++++++++++++++ pkg/kubehound/core/core_ingest_local.go | 40 +++++++++++++ 3 files changed, 115 insertions(+), 40 deletions(-) delete mode 100644 cmd/kubehound/grpc_client.go create mode 100644 cmd/kubehound/ingest.go create mode 100644 pkg/kubehound/core/core_ingest_local.go diff --git a/cmd/kubehound/grpc_client.go b/cmd/kubehound/grpc_client.go deleted file mode 100644 index 6c1dbfdce..000000000 --- a/cmd/kubehound/grpc_client.go +++ /dev/null @@ -1,40 +0,0 @@ -package main - -import ( - "fmt" - - "github.com/DataDog/KubeHound/pkg/cmd" - "github.com/DataDog/KubeHound/pkg/config" - "github.com/DataDog/KubeHound/pkg/kubehound/core" - "github.com/spf13/cobra" - "github.com/spf13/viper" -) - -var ( - grpcClientIngestCmd = &cobra.Command{ - Use: "ingest", - Short: "Start an ingestion on KubeHoud as a Service server", - Long: `Run an ingestion on KHaaS from a bucket to build the attack path`, - PreRunE: func(cobraCmd *cobra.Command, args []string) error { - viper.BindPFlag(config.IngestorAPIEndpoint, cobraCmd.Flags().Lookup("khaas-server")) //nolint: errcheck - viper.BindPFlag(config.IngestorAPIInsecure, cobraCmd.Flags().Lookup("insecure")) //nolint: errcheck - - return cmd.InitializeKubehoundConfig(cobraCmd.Context(), "", false, true) - }, - RunE: func(cobraCmd *cobra.Command, args []string) error { - // Passing the Kubehound config from viper - khCfg, err := cmd.GetConfig() - if err != nil { - return fmt.Errorf("get config: %w", err) - } - - return core.CoreClientGRPCIngest(khCfg.Ingestor, khCfg.Ingestor.ClusterName, khCfg.Ingestor.RunID) - }, - } -) - -func init() { - cmd.InitGrpcClientCmd(grpcClientIngestCmd, true) - - rootCmd.AddCommand(grpcClientIngestCmd) -} diff --git a/cmd/kubehound/ingest.go b/cmd/kubehound/ingest.go new file mode 100644 index 000000000..2351a2e09 --- /dev/null +++ b/cmd/kubehound/ingest.go @@ -0,0 +1,75 @@ +package main + +import ( + "fmt" + + "github.com/DataDog/KubeHound/pkg/cmd" + "github.com/DataDog/KubeHound/pkg/config" + "github.com/DataDog/KubeHound/pkg/kubehound/core" + "github.com/spf13/cobra" + "github.com/spf13/viper" +) + +var ( + inputFilePath string +) + +var ( + ingestCmd = &cobra.Command{ + Use: "ingest", + Short: "Start an ingestion locally or remotely", + Long: `Run an ingestion locally (local) or on KHaaS from a bucket to build the attack path (remote)`, + } + + localIngestCmd = &cobra.Command{ + Use: "local", + Short: "Ingest data locally from a KubeHound dump", + Long: `Run an ingestion locally using a previous dump (directory or tar.gz)`, + PreRunE: func(cobraCmd *cobra.Command, args []string) error { + return cmd.InitializeKubehoundConfig(cobraCmd.Context(), "", true, true) + }, + RunE: func(cobraCmd *cobra.Command, args []string) error { + // Passing the Kubehound config from viper + khCfg, err := cmd.GetConfig() + if err != nil { + return fmt.Errorf("get config: %w", err) + } + + return core.CoreLocalIngest(cobraCmd.Context(), khCfg, inputFilePath) + }, + } + + remoteIngestCmd = &cobra.Command{ + Use: "remote", + Short: "Ingest data remotely on a KHaaS instance", + Long: `Run an ingestion on KHaaS from a bucket to build the attack path`, + PreRunE: func(cobraCmd *cobra.Command, args []string) error { + return cmd.InitializeKubehoundConfig(cobraCmd.Context(), "", false, true) + }, + PreRun: func(cobraCmd *cobra.Command, args []string) { + viper.BindPFlag(config.IngestorAPIEndpoint, cobraCmd.Flags().Lookup("khaas-server")) //nolint: errcheck + viper.BindPFlag(config.IngestorAPIInsecure, cobraCmd.Flags().Lookup("insecure")) //nolint: errcheck + }, + RunE: func(cobraCmd *cobra.Command, args []string) error { + // Passing the Kubehound config from viper + khCfg, err := cmd.GetConfig() + if err != nil { + return fmt.Errorf("get config: %w", err) + } + + return core.CoreClientGRPCIngest(khCfg.Ingestor, khCfg.Ingestor.ClusterName, khCfg.Ingestor.RunID) + }, + } +) + +func init() { + + ingestCmd.AddCommand(localIngestCmd) + cmd.InitLocalIngestCmd(localIngestCmd) + localIngestCmd.Flags().StringVar(&inputFilePath, "data", "", "Filepath for the data to process (directory or tar.gz path)") + + ingestCmd.AddCommand(remoteIngestCmd) + cmd.InitRemoteIngestCmd(remoteIngestCmd, true) + + rootCmd.AddCommand(ingestCmd) +} diff --git a/pkg/kubehound/core/core_ingest_local.go b/pkg/kubehound/core/core_ingest_local.go new file mode 100644 index 000000000..96a4db4af --- /dev/null +++ b/pkg/kubehound/core/core_ingest_local.go @@ -0,0 +1,40 @@ +package core + +import ( + "context" + "fmt" + "os" + + "github.com/DataDog/KubeHound/pkg/config" + "github.com/DataDog/KubeHound/pkg/ingestor/puller" +) + +func CoreLocalIngest(ctx context.Context, khCfg *config.KubehoundConfig, resultPath string) error { + // Using the collector config to ingest the data + khCfg.Collector.Type = config.CollectorTypeFile + // Applying the clusterName from the dynamic config (from CLI or env var) to the collector config + khCfg.Collector.File.ClusterName = khCfg.Dynamic.ClusterName + // Treating by default as data not compressed (directory of the results) + khCfg.Collector.File.Directory = resultPath + + // Checking dynamically if the data is being compressed + compress, err := puller.IsTarGz(resultPath, khCfg.Ingestor.MaxArchiveSize) + if err != nil { + return err + } + + if compress { + tmpDir, err := os.MkdirTemp("/tmp/", "kh-local-ingest-*") + if err != nil { + return fmt.Errorf("creating temp dir: %w", err) + } + // Resetting the directory to the temp directory used to extract the data + khCfg.Collector.File.Directory = tmpDir + err = puller.ExtractTarGz(false, resultPath, tmpDir, config.DefaultMaxArchiveSize) + if err != nil { + return err + } + } + + return CoreLive(ctx, khCfg) +} From 53c2b2308fe56da8c7c83e649eb780697167d02a Mon Sep 17 00:00:00 2001 From: jt-dd Date: Mon, 17 Jun 2024 19:34:20 +0200 Subject: [PATCH 06/14] renaming cloud to remote --- cmd/kubehound/dumper.go | 16 ++++++++-------- pkg/cmd/dump.go | 12 +++++++++--- 2 files changed, 17 insertions(+), 11 deletions(-) diff --git a/cmd/kubehound/dumper.go b/cmd/kubehound/dumper.go index 9eb3e8640..c3d4e473e 100644 --- a/cmd/kubehound/dumper.go +++ b/cmd/kubehound/dumper.go @@ -22,8 +22,8 @@ var ( }, } - cloudCmd = &cobra.Command{ - Use: "cloud", + dumpRemoteCmd = &cobra.Command{ + Use: "remote", Short: "Push collected k8s resources to an s3 bucket of a targeted cluster. Run the ingestion on KHaaS if khaas-server is set.", Long: `Collect all Kubernetes resources needed to build the attack path in an offline format on a s3 bucket. If KubeHound as a Service (KHaaS) is set, then run the ingestion on KHaaS.`, PreRunE: func(cobraCmd *cobra.Command, args []string) error { @@ -63,7 +63,7 @@ var ( return err }, } - localCmd = &cobra.Command{ + dumpLocalCmd = &cobra.Command{ Use: "local", Short: "Dump locally the k8s resources of a targeted cluster", Long: `Collect all Kubernetes resources needed to build the attack path in an offline format locally (compressed or flat)`, @@ -85,11 +85,11 @@ var ( func init() { cmd.InitDumpCmd(dumpCmd) - cmd.InitLocalCmd(localCmd) - cmd.InitCloudCmd(cloudCmd) - cmd.InitGrpcClientCmd(cloudCmd, false) + cmd.InitLocalDumpCmd(dumpLocalCmd) + cmd.InitRemoteDumpCmd(dumpRemoteCmd) + cmd.InitRemoteIngestCmd(dumpRemoteCmd, false) - dumpCmd.AddCommand(cloudCmd) - dumpCmd.AddCommand(localCmd) + dumpCmd.AddCommand(dumpRemoteCmd) + dumpCmd.AddCommand(dumpLocalCmd) rootCmd.AddCommand(dumpCmd) } diff --git a/pkg/cmd/dump.go b/pkg/cmd/dump.go index 36e69255c..0e71bf532 100644 --- a/pkg/cmd/dump.go +++ b/pkg/cmd/dump.go @@ -40,7 +40,7 @@ func InitDumpCmd(cmd *cobra.Command) { viper.BindPFlag(config.GlobalDebug, cmd.PersistentFlags().Lookup("debug")) //nolint: errcheck } -func InitLocalCmd(cmd *cobra.Command) { +func InitLocalDumpCmd(cmd *cobra.Command) { cmd.Flags().Bool("compress", false, "Enable compression for the dumped data (generates a tar.gz file)") viper.BindPFlag(config.CollectorFileArchiveFormat, cmd.Flags().Lookup("compress")) //nolint: errcheck @@ -49,7 +49,7 @@ func InitLocalCmd(cmd *cobra.Command) { cmd.MarkFlagRequired("output-dir") //nolint: errcheck } -func InitCloudCmd(cmd *cobra.Command) { +func InitRemoteDumpCmd(cmd *cobra.Command) { cmd.Flags().String("bucket", "", "Bucket to use to push k8s resources (e.g.: s3://)") viper.BindPFlag(config.CollectorFileBlobBucket, cmd.Flags().Lookup("bucket")) //nolint: errcheck cmd.MarkFlagRequired("bucket") //nolint: errcheck @@ -58,7 +58,7 @@ func InitCloudCmd(cmd *cobra.Command) { viper.BindPFlag(config.CollectorFileBlobRegion, cmd.Flags().Lookup("region")) //nolint: errcheck } -func InitGrpcClientCmd(cmd *cobra.Command, standalone bool) { +func InitRemoteIngestCmd(cmd *cobra.Command, standalone bool) { cmd.Flags().String("khaas-server", "", "GRPC endpoint exposed by KubeHound as a Service (KHaaS) server (e.g.: localhost:9000)") cmd.Flags().Bool("insecure", config.DefaultIngestorAPIInsecure, "Allow insecure connection to the KHaaS server grpc endpoint") @@ -77,3 +77,9 @@ func InitGrpcClientCmd(cmd *cobra.Command, standalone bool) { cmd.MarkFlagRequired("khaas-server") //nolint: errcheck } } + +func InitLocalIngestCmd(cmd *cobra.Command) { + cmd.PersistentFlags().String("cluster", "", "Cluster name to ingest (e.g.: my-cluster-1)") + viper.BindPFlag(config.IngestorClusterName, cmd.Flags().Lookup("cluster")) //nolint: errcheck + cmd.MarkFlagRequired("cluster") +} From 6d22b3c7adaffcba0d33c1a95f279181f129be7a Mon Sep 17 00:00:00 2001 From: jt-dd Date: Mon, 17 Jun 2024 19:34:38 +0200 Subject: [PATCH 07/14] fix unit tests --- pkg/dump/writer/tar_writer_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/dump/writer/tar_writer_test.go b/pkg/dump/writer/tar_writer_test.go index f13d55b41..c08d22247 100644 --- a/pkg/dump/writer/tar_writer_test.go +++ b/pkg/dump/writer/tar_writer_test.go @@ -71,7 +71,7 @@ func TestTarWriter_Write(t *testing.T) { writer.Flush(ctx) writer.Close(ctx) - err = puller.ExtractTarGz(writer.OutputPath(), tmpTarExtractDir, config.DefaultMaxArchiveSize) + err = puller.ExtractTarGz(false, writer.OutputPath(), tmpTarExtractDir, config.DefaultMaxArchiveSize) if err != nil { t.Fatalf("failed to extract tar.gz: %v", err) } From b97a10a6d885ce34b6aad019abdbdebeb7bd27a8 Mon Sep 17 00:00:00 2001 From: jt-dd Date: Mon, 17 Jun 2024 19:50:42 +0200 Subject: [PATCH 08/14] fixing unit test --- pkg/config/config_test.go | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index b738b689f..85dd5a392 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -3,6 +3,7 @@ package config import ( "testing" + "github.com/DataDog/KubeHound/pkg/config" "github.com/spf13/viper" "github.com/stretchr/testify/assert" ) @@ -65,7 +66,7 @@ func TestMustLoadConfig(t *testing.T) { BatchSizeSmall: 100, }, Edge: EdgeBuilderConfig{ - LargeClusterOptimizations: false, + LargeClusterOptimizations: config.DefaultLargeClusterOptimizations, WorkerPoolSize: 5, WorkerPoolCapacity: 100, BatchSize: 500, @@ -84,7 +85,7 @@ func TestMustLoadConfig(t *testing.T) { }, TempDir: "/tmp/kubehound", ArchiveName: "archive.tar.gz", - MaxArchiveSize: 1073741824, + MaxArchiveSize: config.DefaultMaxArchiveSize, }, }, wantErr: false, From d839a4f1d78bb97f9f6d0a648ec22a9756520f5e Mon Sep 17 00:00:00 2001 From: jt-dd Date: Mon, 17 Jun 2024 19:58:32 +0200 Subject: [PATCH 09/14] typo --- pkg/config/config_test.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 85dd5a392..e752dfc7a 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -3,7 +3,6 @@ package config import ( "testing" - "github.com/DataDog/KubeHound/pkg/config" "github.com/spf13/viper" "github.com/stretchr/testify/assert" ) @@ -66,7 +65,7 @@ func TestMustLoadConfig(t *testing.T) { BatchSizeSmall: 100, }, Edge: EdgeBuilderConfig{ - LargeClusterOptimizations: config.DefaultLargeClusterOptimizations, + LargeClusterOptimizations: DefaultLargeClusterOptimizations, WorkerPoolSize: 5, WorkerPoolCapacity: 100, BatchSize: 500, @@ -85,7 +84,7 @@ func TestMustLoadConfig(t *testing.T) { }, TempDir: "/tmp/kubehound", ArchiveName: "archive.tar.gz", - MaxArchiveSize: config.DefaultMaxArchiveSize, + MaxArchiveSize: DefaultMaxArchiveSize, }, }, wantErr: false, From f146cd06fa0f662519559874887175c8f004789d Mon Sep 17 00:00:00 2001 From: jt-dd Date: Mon, 17 Jun 2024 20:00:37 +0200 Subject: [PATCH 10/14] fix linter --- pkg/cmd/dump.go | 2 +- pkg/ingestor/puller/puller.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/cmd/dump.go b/pkg/cmd/dump.go index 0e71bf532..6e8bd235c 100644 --- a/pkg/cmd/dump.go +++ b/pkg/cmd/dump.go @@ -81,5 +81,5 @@ func InitRemoteIngestCmd(cmd *cobra.Command, standalone bool) { func InitLocalIngestCmd(cmd *cobra.Command) { cmd.PersistentFlags().String("cluster", "", "Cluster name to ingest (e.g.: my-cluster-1)") viper.BindPFlag(config.IngestorClusterName, cmd.Flags().Lookup("cluster")) //nolint: errcheck - cmd.MarkFlagRequired("cluster") + cmd.MarkFlagRequired("cluster") //nolint: errcheck } diff --git a/pkg/ingestor/puller/puller.go b/pkg/ingestor/puller/puller.go index 5f7f7cd37..f5f66c734 100644 --- a/pkg/ingestor/puller/puller.go +++ b/pkg/ingestor/puller/puller.go @@ -65,7 +65,7 @@ func IsTarGz(filePath string, maxArchiveSize int64) (bool, error) { return false, fmt.Errorf("file type not supported") } -func ExtractTarGz(checkOnly bool, archivePath string, basePath string, maxArchiveSize int64) error { +func ExtractTarGz(checkOnly bool, archivePath string, basePath string, maxArchiveSize int64) error { //nolint:gocognit gzipFileReader, err := os.Open(archivePath) if err != nil { return err From 1d6f24f7d1acf1371390c7784a16bc36dbedf87a Mon Sep 17 00:00:00 2001 From: jt-dd Date: Mon, 17 Jun 2024 20:05:05 +0200 Subject: [PATCH 11/14] fix unit test --- pkg/config/config_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index e752dfc7a..7e951e8dc 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -150,7 +150,7 @@ func TestMustLoadConfig(t *testing.T) { }, TempDir: "/tmp/kubehound", ArchiveName: "archive.tar.gz", - MaxArchiveSize: 1073741824, + MaxArchiveSize: DefaultMaxArchiveSize, }, }, wantErr: false, From c36167419290070ee67f4f5e776eaca320394372 Mon Sep 17 00:00:00 2001 From: jt-dd Date: Mon, 17 Jun 2024 20:14:22 +0200 Subject: [PATCH 12/14] fixing unit tests --- pkg/ingestor/puller/puller.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pkg/ingestor/puller/puller.go b/pkg/ingestor/puller/puller.go index f5f66c734..5a37f0fae 100644 --- a/pkg/ingestor/puller/puller.go +++ b/pkg/ingestor/puller/puller.go @@ -81,7 +81,7 @@ func ExtractTarGz(checkOnly bool, archivePath string, basePath string, maxArchiv tarReader := tar.NewReader(io.LimitReader(uncompressedStream, maxArchiveSize)) for { header, err := tarReader.Next() - if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) { + if errors.Is(err, io.EOF) { break } if err != nil { @@ -121,7 +121,7 @@ func ExtractTarGz(checkOnly bool, archivePath string, basePath string, maxArchiv // a third party library (like our internal secure lib) _, err = io.Copy(outFile, tarReader) //nolint:gosec if errors.Is(err, io.EOF) || errors.Is(err, io.ErrUnexpectedEOF) { - return fmt.Errorf("archive size exceeds the limit: %d", maxArchiveSize) + return fmt.Errorf("archive size exceeds the limit: %d: %w", maxArchiveSize, err) } if err != nil { return fmt.Errorf("copying file %s: %w", cleanPath, err) From 5bd3035218f44aa32f71cfb9c73e392694a744c6 Mon Sep 17 00:00:00 2001 From: jt-dd Date: Mon, 17 Jun 2024 20:51:31 +0200 Subject: [PATCH 13/14] fixing system-tests --- test/system/kubehound.yaml | 3 +++ test/system/kubehound_dump.yaml | 4 +++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/test/system/kubehound.yaml b/test/system/kubehound.yaml index 7c3ef5a3a..52df9603a 100644 --- a/test/system/kubehound.yaml +++ b/test/system/kubehound.yaml @@ -9,6 +9,9 @@ janusgraph: mongodb: url: "mongodb://localhost:27018" connection_timeout: 60s +builder: + edge: + large_cluster_optimizations: false telemetry: enabled: true tags: diff --git a/test/system/kubehound_dump.yaml b/test/system/kubehound_dump.yaml index 5855d1c7c..6be91ee4d 100644 --- a/test/system/kubehound_dump.yaml +++ b/test/system/kubehound_dump.yaml @@ -22,7 +22,9 @@ telemetry: profiler: period: "5s" cpu_duration: "5s" - +builder: + edge: + large_cluster_optimizations: false # Ingestor configuration (for KHaaS) ingestor: blob: From a203fc2e4932d291215f489ef3db50984e02e006 Mon Sep 17 00:00:00 2001 From: jt-dd Date: Tue, 18 Jun 2024 10:19:08 +0200 Subject: [PATCH 14/14] nit PR comment --- pkg/config/ingestor.go | 2 +- pkg/dump/writer/tar_writer_test.go | 3 ++- pkg/ingestor/puller/blob/blob.go | 3 ++- pkg/ingestor/puller/puller.go | 3 ++- pkg/kubehound/core/core_ingest_local.go | 3 ++- 5 files changed, 9 insertions(+), 5 deletions(-) diff --git a/pkg/config/ingestor.go b/pkg/config/ingestor.go index b39f4216e..ccd06d027 100644 --- a/pkg/config/ingestor.go +++ b/pkg/config/ingestor.go @@ -6,7 +6,7 @@ const ( DefaultBucketName = "" // we want to let it empty because we can easily abort if it's not configured DefaultTempDir = "/tmp/kubehound" DefaultArchiveName = "archive.tar.gz" - DefaultMaxArchiveSize = int64(2 << 30) // 1GB + DefaultMaxArchiveSize = int64(2 << 30) // 2GB IngestorAPIEndpoint = "ingestor.api.endpoint" IngestorAPIInsecure = "ingestor.api.insecure" diff --git a/pkg/dump/writer/tar_writer_test.go b/pkg/dump/writer/tar_writer_test.go index c08d22247..4b14a94f8 100644 --- a/pkg/dump/writer/tar_writer_test.go +++ b/pkg/dump/writer/tar_writer_test.go @@ -71,7 +71,8 @@ func TestTarWriter_Write(t *testing.T) { writer.Flush(ctx) writer.Close(ctx) - err = puller.ExtractTarGz(false, writer.OutputPath(), tmpTarExtractDir, config.DefaultMaxArchiveSize) + dryRun := false + err = puller.ExtractTarGz(dryRun, writer.OutputPath(), tmpTarExtractDir, config.DefaultMaxArchiveSize) if err != nil { t.Fatalf("failed to extract tar.gz: %v", err) } diff --git a/pkg/ingestor/puller/blob/blob.go b/pkg/ingestor/puller/blob/blob.go index b42519891..1b8775239 100644 --- a/pkg/ingestor/puller/blob/blob.go +++ b/pkg/ingestor/puller/blob/blob.go @@ -194,7 +194,8 @@ func (bs *BlobStore) Extract(ctx context.Context, archivePath string) error { return fmt.Errorf("Dangerous file path used during extraction, aborting: %w", err) } - err = puller.ExtractTarGz(false, archivePath, basePath, bs.cfg.Ingestor.MaxArchiveSize) + dryRun := false + err = puller.ExtractTarGz(dryRun, archivePath, basePath, bs.cfg.Ingestor.MaxArchiveSize) if err != nil { return err } diff --git a/pkg/ingestor/puller/puller.go b/pkg/ingestor/puller/puller.go index 5a37f0fae..54c0be13a 100644 --- a/pkg/ingestor/puller/puller.go +++ b/pkg/ingestor/puller/puller.go @@ -54,7 +54,8 @@ func IsTarGz(filePath string, maxArchiveSize int64) (bool, error) { case mod.IsDir(): return false, nil case mod.IsRegular(): - err = ExtractTarGz(true, filePath, "/tmp", maxArchiveSize) + dryRun := true + err = ExtractTarGz(dryRun, filePath, "/tmp", maxArchiveSize) if err != nil { return false, err } diff --git a/pkg/kubehound/core/core_ingest_local.go b/pkg/kubehound/core/core_ingest_local.go index 96a4db4af..5f69982ea 100644 --- a/pkg/kubehound/core/core_ingest_local.go +++ b/pkg/kubehound/core/core_ingest_local.go @@ -30,7 +30,8 @@ func CoreLocalIngest(ctx context.Context, khCfg *config.KubehoundConfig, resultP } // Resetting the directory to the temp directory used to extract the data khCfg.Collector.File.Directory = tmpDir - err = puller.ExtractTarGz(false, resultPath, tmpDir, config.DefaultMaxArchiveSize) + dryRun := false + err = puller.ExtractTarGz(dryRun, resultPath, tmpDir, config.DefaultMaxArchiveSize) if err != nil { return err }