Skip to content

Commit

Permalink
Add support for determineversions API (#612).
Browse files Browse the repository at this point in the history
Tested on https://github.com/opencv/opencv

We need to set up an e2e test for this as well (maybe add some
submodules + vendored libs to https://github.com/ossf-tests/scorecard-check-osv-e2e).

```
Scanning dir /tmp/opencv
Scanning /tmp/opencv/ at commit e9e6b1e22c1a966a81aca1217b16a51fe7311b3b
Scanning directory for vendored libs: /tmp/opencv/3rdparty
Scanning potential vendored dir: /tmp/opencv/3rdparty/carotene
...
Scanning potential vendored dir: /tmp/opencv/3rdparty/libjpeg
Identified /tmp/opencv/3rdparty/libjpeg as https://github.com/libjpeg-turbo/libjpeg-turbo at 9fc018fd1aa9598f21c9bc4d8d53c0cef007bdcf.
Scanning potential vendored dir: /tmp/opencv/3rdparty/libjpeg-turbo
Identified /tmp/opencv/3rdparty/libjpeg-turbo as https://github.com/libjpeg-turbo/libjpeg-turbo at c5f269eb9665435271c05fbcaf8721fa58e9eafa.
Scanning potential vendored dir: /tmp/opencv/3rdparty/libpng
...
Scanning potential vendored dir: /tmp/opencv/3rdparty/libwebp
Identified /tmp/opencv/3rdparty/libwebp as https://chromium.googlesource.com/webm/libwebp at fd7bb21c0cb56e8a82e9bfa376164b842f433f3b.
Scanning potential vendored dir: /tmp/opencv/3rdparty/openexr
...
Scanning potential vendored dir: /tmp/opencv/3rdparty/zlib
Scanning directory for vendored libs: /tmp/opencv/modules/core/3rdparty
Scanning potential vendored dir: /tmp/opencv/modules/core/3rdparty/SoftFloat
Scanning directory for vendored libs: /tmp/opencv/modules/features2d/3rdparty
Scanning potential vendored dir: /tmp/opencv/modules/features2d/3rdparty/mscr
Scanned /tmp/opencv/platforms/maven/opencv/pom.xml file and found 0 packages
...
Scanned /tmp/opencv/platforms/maven/opencv-it/pom.xml file and found 12 packages
...
+-------------------------------------+------+-----------+---------------------+---------------------+-----------------------------------------------------------------------------------
| OSV URL                             | CVSS | ECOSYSTEM | PACKAGE             | VERSION             | SOURCE
+-------------------------------------+------+-----------+---------------------+---------------------+-----------------------------------------------------------------------------------
| https://osv.dev/OSV-2022-394        |      | GIT       |  e9e6b1e22c1a966a81aca1217b16a51fe7311b3b | ../../../../../../tmp/opencv
| https://osv.dev/OSV-2023-444        |      | GIT       |  e9e6b1e22c1a966a81aca1217b16a51fe7311b3b | ../../../../../../tmp/opencv
| https://osv.dev/CVE-2021-29390      | 7.1  | GIT       |  9fc018fd1aa9598f21c9bc4d8d53c0cef007bdcf | ../../../../../../tmp/opencv/3rdparty/libjpeg
| https://osv.dev/CVE-2021-46822      | 5.5  | GIT       |  9fc018fd1aa9598f21c9bc4d8d53c0cef007bdcf | ../../../../../../tmp/opencv/3rdparty/libjpeg
| https://osv.dev/CVE-2023-4863       | 8.8  | GIT       |  fd7bb21c0cb56e8a82e9bfa376164b842f433f3b | ../../../../../../tmp/opencv/3rdparty/libwebp
...
```
  • Loading branch information
oliverchang committed Oct 30, 2023
1 parent f819495 commit b1a1814
Show file tree
Hide file tree
Showing 2 changed files with 180 additions and 2 deletions.
63 changes: 63 additions & 0 deletions pkg/osv/osv.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ const (
QueryEndpoint = "https://api.osv.dev/v1/querybatch"
// GetEndpoint is the URL for getting vulenrabilities from OSV.
GetEndpoint = "https://api.osv.dev/v1/vulns"
// DetermineVersionEndpoint is the URL for posting determineversion queries to OSV.
DetermineVersionEndpoint = "https://api.osv.dev/v1experimental/determineversion"
// BaseVulnerabilityURL is the base URL for detailed vulnerability views.
BaseVulnerabilityURL = "https://osv.dev/"
// maxQueriesPerRequest splits up querybatch into multiple requests if
Expand Down Expand Up @@ -76,6 +78,30 @@ type HydratedBatchedResponse struct {
Results []Response `json:"results"`
}

// DetermineVersionHash holds the per file hash and path information for determineversion.
type DetermineVersionHash struct {
Path string `json:"path"`
Hash []byte `json:"hash"`
}

type DetermineVersionResponse struct {
Matches []struct {
Score float64 `json:"score"`
RepoInfo struct {
Type string `json:"type"`
Address string `json:"address"`
Tag string `json:"tag"`
Version string `json:"version"`
Commit string `json:"commit"`
} `json:"repo_info"`
} `json:"matches"`
}

type determineVersionsRequest struct {
Name string `json:"name"`
FileHashes []DetermineVersionHash `json:"file_hashes"`
}

// MakeCommitRequest makes a commit hash request.
func MakeCommitRequest(commit string) *Query {
return &Query{
Expand Down Expand Up @@ -302,3 +328,40 @@ func makeRetryRequest(action func() (*http.Response, error)) (*http.Response, er

return resp, err
}

func MakeDetermineVersionRequest(name string, hashes []DetermineVersionHash) (*DetermineVersionResponse, error) {
var buf bytes.Buffer

request := determineVersionsRequest{
Name: name,
FileHashes: hashes,
}

if err := json.NewEncoder(&buf).Encode(request); err != nil {
return nil, err
}

req, err := http.NewRequest(http.MethodPost, DetermineVersionEndpoint, &buf)
if err != nil {
return nil, err
}
req.Header.Set("Content-Type", "application/json")
if RequestUserAgent != "" {
req.Header.Set("User-Agent", RequestUserAgent)
}

client := http.DefaultClient
resp, err := client.Do(req)
if err != nil {
return nil, err
}
defer resp.Body.Close()

var result DetermineVersionResponse
decoder := json.NewDecoder(resp.Body)
if err := decoder.Decode(&result); err != nil {
return nil, err
}

return &result, nil
}
119 changes: 117 additions & 2 deletions pkg/osvscanner/osvscanner.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,10 @@ package osvscanner

import (
"bufio"
"crypto/md5"
"errors"
"fmt"
"io/fs"
"os"
"os/exec"
"path"
Expand Down Expand Up @@ -58,12 +60,34 @@ var VulnerabilitiesFoundErr = errors.New("vulnerabilities found")
//nolint:errname,stylecheck // Would require version bump to change
var OnlyUncalledVulnerabilitiesFoundErr = errors.New("only uncalled vulnerabilities found")

var (
vendoredLibNames = map[string]struct{}{
"3rdparty": struct{}{},
"dep": struct{}{},
"deps": struct{}{},
"thirdparty": struct{}{},
"third-party": struct{}{},
"third_party": struct{}{},
"libs": struct{}{},
"external": struct{}{},
"externals": struct{}{},
"vendor": struct{}{},
"vendored": struct{}{},
}
)

const (
// This value may need to be tweaked.
determineVersionThreshold = 0.5
maxDetermineVersionFiles = 10000
)

// scanDir walks through the given directory to try to find any relevant files
// These include:
// - Any lockfiles with scanLockfile
// - Any SBOM files with scanSBOMFile
// - Any git repositories with scanGit
func scanDir(r reporter.Reporter, query *osv.BatchedQuery, dir string, skipGit bool, recursive bool, useGitIgnore bool) error {
func scanDir(r reporter.Reporter, query *osv.BatchedQuery, dir string, skipGit bool, recursive bool, useGitIgnore bool, compareOffline bool) error {
var ignoreMatcher *gitIgnoreMatcher
if useGitIgnore {
var err error
Expand Down Expand Up @@ -128,6 +152,15 @@ func scanDir(r reporter.Reporter, query *osv.BatchedQuery, dir string, skipGit b
_ = scanSBOMFile(r, query, path, true)
}

if info.IsDir() && !compareOffline {
if _, ok := vendoredLibNames[filepath.Base(path)]; ok {
err := scanDirWithVendoredLibs(r, query, path)
if err != nil {
r.PrintText(fmt.Sprintf("scan failed for dir containing vendored libs %s: %v\n", path, err))
}
}
}

if !root && !recursive && info.IsDir() {
return filepath.SkipDir
}
Expand Down Expand Up @@ -176,6 +209,88 @@ func parseGitIgnores(path string) (*gitIgnoreMatcher, error) {
return &gitIgnoreMatcher{matcher: matcher, repoPath: repopath}, nil
}

func queryDetermineVersions(repoDir string) (*osv.DetermineVersionResponse, error) {
fileExts := []string{
".hpp",
".h",
".hh",
".cc",
".c",
".cpp",
}

var hashes []osv.DetermineVersionHash
if err := filepath.Walk(repoDir, func(p string, info fs.FileInfo, err error) error {
if len(hashes) > maxDetermineVersionFiles {
return errors.New("too many files to hash")
}

if info.IsDir() {
if _, err := os.Stat(filepath.Join(p, ".git")); err == nil {
// Found a git repo, stop here as otherwise we may get duplicated
// results with our regular git commit scanning.
return filepath.SkipDir
}

return nil
}
for _, ext := range fileExts {
if filepath.Ext(p) == ext {
buf, err := os.ReadFile(p)
if err != nil {
return err
}
hash := md5.Sum(buf)
hashes = append(hashes, osv.DetermineVersionHash{
Path: strings.ReplaceAll(p, repoDir, ""),
Hash: hash[:],
})
}
}
return nil
}); err != nil {
return nil, fmt.Errorf("failed during hashing: %v", err)
}

result, err := osv.MakeDetermineVersionRequest(filepath.Base(repoDir), hashes)
if err != nil {
return nil, fmt.Errorf("failed to determine versions: %v", err)
}

return result, nil
}

func scanDirWithVendoredLibs(r reporter.Reporter, query *osv.BatchedQuery, path string) error {
r.PrintText(fmt.Sprintf("Scanning directory for vendored libs: %s\n", path))
entries, err := os.ReadDir(path)
if err != nil {
return err
}
for _, entry := range entries {
if !entry.IsDir() {
continue
}

libPath := filepath.Join(path, entry.Name())

r.PrintText(fmt.Sprintf("Scanning potential vendored dir: %s\n", libPath))
results, err := queryDetermineVersions(libPath)
if err != nil {
return err
}

if len(results.Matches) > 0 && results.Matches[0].Score > determineVersionThreshold {
match := results.Matches[0]
r.PrintText(fmt.Sprintf("Identified %s as %s at %s.\n", libPath, match.RepoInfo.Address, match.RepoInfo.Commit))
err := scanGitCommit(query, match.RepoInfo.Commit, libPath)
if err != nil {
return err
}
}
}
return nil
}

// gitIgnoreMatcher.match will return true if the file/directory matches a gitignore entry
// i.e. true if it should be ignored
func (m *gitIgnoreMatcher) match(absPath string, isDir bool) (bool, error) {
Expand Down Expand Up @@ -616,7 +731,7 @@ func DoScan(actions ScannerActions, r reporter.Reporter) (models.VulnerabilityRe

for _, dir := range actions.DirectoryPaths {
r.PrintText(fmt.Sprintf("Scanning dir %s\n", dir))
err := scanDir(r, &query, dir, actions.SkipGit, actions.Recursive, !actions.NoIgnore)
err := scanDir(r, &query, dir, actions.SkipGit, actions.Recursive, !actions.NoIgnore, actions.CompareOffline)
if err != nil {
return models.VulnerabilityResults{}, err
}
Expand Down

0 comments on commit b1a1814

Please sign in to comment.