Skip to content

Commit

Permalink
New Source: HuggingFace (trufflesecurity#3000)
Browse files Browse the repository at this point in the history
* initial spike on hf

* added in user and org enum

* adding huggingface source

* updated with lint suggestions

* updated readme

* addressing resources that require org approval to access

* removing unneeded code

* updating with new error msg for 403

* deleted unused code + added resource check in main
  • Loading branch information
joeleonjr authored Jun 27, 2024
1 parent e9206c6 commit 01a1499
Show file tree
Hide file tree
Showing 14 changed files with 4,564 additions and 960 deletions.
21 changes: 21 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,27 @@ trufflehog elasticsearch \
--api-key 'MlVtVjBZ...ZSYlduYnF1djh3NG5FQQ=='
```

## 15. Scan HuggingFace

### Scan a HuggingFace Model, Dataset or Space

```bash
trufflehog huggingface --model <username/modelname> --space <username/spacename> --dataset <username/datasetname>
```

### Scan all Models, Datasets and Space belonging to a HuggingFace Org/User

```bash
trufflehog huggingface --org <orgname> --user <username>
```

Optionally, skip scanning a type of resource with `--skip-models`, `--skip-datasets`, `--skip-spaces` or a particular resource with `--ignore-models/datasets/spaces <resource-name>`.

### Scan Discussion and PR Comments
```bash
trufflehog huggingface --model <username/modelname> --include-discussions --include-prs
```

# :question: FAQ

- All I see is `🐷🔑🐷 TruffleHog. Unearth your secrets. 🐷🔑🐷` and the program exits, what gives?
Expand Down
54 changes: 54 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,27 @@ var (
jenkinsPassword = jenkinsScan.Flag("password", "Jenkins password").Envar("JENKINS_PASSWORD").String()
jenkinsInsecureSkipVerifyTLS = jenkinsScan.Flag("insecure-skip-verify-tls", "Skip TLS verification").Envar("JENKINS_INSECURE_SKIP_VERIFY_TLS").Bool()

huggingfaceScan = cli.Command("huggingface", "Find credentials in HuggingFace datasets, models and spaces.")
huggingfaceEndpoint = huggingfaceScan.Flag("endpoint", "HuggingFace endpoint.").Default("https://huggingface.co").String()
huggingfaceModels = huggingfaceScan.Flag("model", "HuggingFace model to scan. You can repeat this flag. Example: 'username/model'").Strings()
huggingfaceSpaces = huggingfaceScan.Flag("space", "HuggingFace space to scan. You can repeat this flag. Example: 'username/space'").Strings()
huggingfaceDatasets = huggingfaceScan.Flag("dataset", "HuggingFace dataset to scan. You can repeat this flag. Example: 'username/dataset'").Strings()
huggingfaceOrgs = huggingfaceScan.Flag("org", `HuggingFace organization to scan. You can repeat this flag. Example: "trufflesecurity"`).Strings()
huggingfaceUsers = huggingfaceScan.Flag("user", `HuggingFace user to scan. You can repeat this flag. Example: "trufflesecurity"`).Strings()
huggingfaceToken = huggingfaceScan.Flag("token", "HuggingFace token. Can be provided with environment variable HUGGINGFACE_TOKEN.").Envar("HUGGINGFACE_TOKEN").String()

huggingfaceIncludeModels = huggingfaceScan.Flag("include-models", "Models to include in scan. You can repeat this flag. Must use HuggingFace model full name. Example: 'username/model' (Only used with --user or --org)").Strings()
huggingfaceIncludeSpaces = huggingfaceScan.Flag("include-spaces", "Spaces to include in scan. You can repeat this flag. Must use HuggingFace space full name. Example: 'username/space' (Only used with --user or --org)").Strings()
huggingfaceIncludeDatasets = huggingfaceScan.Flag("include-datasets", "Datasets to include in scan. You can repeat this flag. Must use HuggingFace dataset full name. Example: 'username/dataset' (Only used with --user or --org)").Strings()
huggingfaceIgnoreModels = huggingfaceScan.Flag("ignore-models", "Models to ignore in scan. You can repeat this flag. Must use HuggingFace model full name. Example: 'username/model' (Only used with --user or --org)").Strings()
huggingfaceIgnoreSpaces = huggingfaceScan.Flag("ignore-spaces", "Spaces to ignore in scan. You can repeat this flag. Must use HuggingFace space full name. Example: 'username/space' (Only used with --user or --org)").Strings()
huggingfaceIgnoreDatasets = huggingfaceScan.Flag("ignore-datasets", "Datasets to ignore in scan. You can repeat this flag. Must use HuggingFace dataset full name. Example: 'username/dataset' (Only used with --user or --org)").Strings()
huggingfaceSkipAllModels = huggingfaceScan.Flag("skip-all-models", "Skip all model scans. (Only used with --user or --org)").Bool()
huggingfaceSkipAllSpaces = huggingfaceScan.Flag("skip-all-spaces", "Skip all space scans. (Only used with --user or --org)").Bool()
huggingfaceSkipAllDatasets = huggingfaceScan.Flag("skip-all-datasets", "Skip all dataset scans. (Only used with --user or --org)").Bool()
huggingfaceIncludeDiscussions = huggingfaceScan.Flag("include-discussions", "Include discussions in scan.").Bool()
huggingfaceIncludePrs = huggingfaceScan.Flag("include-prs", "Include pull requests in scan.").Bool()

usingTUI = false
)

Expand Down Expand Up @@ -738,6 +759,39 @@ func runSingleScan(ctx context.Context, cmd string, cfg engine.Config) (metrics,
if err := eng.ScanJenkins(ctx, cfg); err != nil {
return scanMetrics, fmt.Errorf("failed to scan Jenkins: %v", err)
}
case huggingfaceScan.FullCommand():
if *huggingfaceEndpoint != "" {
*huggingfaceEndpoint = strings.TrimRight(*huggingfaceEndpoint, "/")
}

if len(*huggingfaceModels) == 0 && len(*huggingfaceSpaces) == 0 && len(*huggingfaceDatasets) == 0 && len(*huggingfaceOrgs) == 0 && len(*huggingfaceUsers) == 0 {
return scanMetrics, fmt.Errorf("invalid config: you must specify at least one organization, user, model, space or dataset")
}

cfg := engine.HuggingfaceConfig{
Endpoint: *huggingfaceEndpoint,
Models: *huggingfaceModels,
Spaces: *huggingfaceSpaces,
Datasets: *huggingfaceDatasets,
Organizations: *huggingfaceOrgs,
Users: *huggingfaceUsers,
Token: *huggingfaceToken,
IncludeModels: *huggingfaceIncludeModels,
IncludeSpaces: *huggingfaceIncludeSpaces,
IncludeDatasets: *huggingfaceIncludeDatasets,
IgnoreModels: *huggingfaceIgnoreModels,
IgnoreSpaces: *huggingfaceIgnoreSpaces,
IgnoreDatasets: *huggingfaceIgnoreDatasets,
SkipAllModels: *huggingfaceSkipAllModels,
SkipAllSpaces: *huggingfaceSkipAllSpaces,
SkipAllDatasets: *huggingfaceSkipAllDatasets,
IncludeDiscussions: *huggingfaceIncludeDiscussions,
IncludePrs: *huggingfaceIncludePrs,
Concurrency: *concurrency,
}
if err := eng.ScanHuggingface(ctx, cfg); err != nil {
return scanMetrics, fmt.Errorf("failed to scan HuggingFace: %v", err)
}
default:
return scanMetrics, fmt.Errorf("invalid command: %s", cmd)
}
Expand Down
80 changes: 80 additions & 0 deletions pkg/engine/huggingface.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
package engine

import (
"google.golang.org/protobuf/proto"
"google.golang.org/protobuf/types/known/anypb"

"github.com/trufflesecurity/trufflehog/v3/pkg/context"
"github.com/trufflesecurity/trufflehog/v3/pkg/pb/sourcespb"
"github.com/trufflesecurity/trufflehog/v3/pkg/sources/huggingface"
)

// HuggingFaceConfig represents the configuration for HuggingFace.
type HuggingfaceConfig struct {
Endpoint string
Models []string
Spaces []string
Datasets []string
Organizations []string
Users []string
IncludeModels []string
IgnoreModels []string
IncludeSpaces []string
IgnoreSpaces []string
IncludeDatasets []string
IgnoreDatasets []string
SkipAllModels bool
SkipAllSpaces bool
SkipAllDatasets bool
IncludeDiscussions bool
IncludePrs bool
Token string
Concurrency int
}

// ScanGitHub scans HuggingFace with the provided options.
func (e *Engine) ScanHuggingface(ctx context.Context, c HuggingfaceConfig) error {
connection := sourcespb.Huggingface{
Endpoint: c.Endpoint,
Models: c.Models,
Spaces: c.Spaces,
Datasets: c.Datasets,
Organizations: c.Organizations,
Users: c.Users,
IncludeModels: c.IncludeModels,
IgnoreModels: c.IgnoreModels,
IncludeSpaces: c.IncludeSpaces,
IgnoreSpaces: c.IgnoreSpaces,
IncludeDatasets: c.IncludeDatasets,
IgnoreDatasets: c.IgnoreDatasets,
SkipAllModels: c.SkipAllModels,
SkipAllSpaces: c.SkipAllSpaces,
SkipAllDatasets: c.SkipAllDatasets,
IncludeDiscussions: c.IncludeDiscussions,
IncludePrs: c.IncludePrs,
}
if len(c.Token) > 0 {
connection.Credential = &sourcespb.Huggingface_Token{
Token: c.Token,
}
} else {
connection.Credential = &sourcespb.Huggingface_Unauthenticated{}
}

var conn anypb.Any
err := anypb.MarshalFrom(&conn, &connection, proto.MarshalOptions{})
if err != nil {
ctx.Logger().Error(err, "failed to marshal huggingface connection")
return err
}

sourceName := "trufflehog - huggingface"
sourceID, jobID, _ := e.sourceManager.GetIDs(ctx, sourceName, sourcespb.SourceType_SOURCE_TYPE_HUGGINGFACE)

huggingfaceSource := &huggingface.Source{}
if err := huggingfaceSource.Init(ctx, sourceName, jobID, sourceID, true, &conn, c.Concurrency); err != nil {
return err
}
_, err = e.sourceManager.Run(ctx, sourceName, huggingfaceSource)
return err
}
Loading

0 comments on commit 01a1499

Please sign in to comment.