-
Notifications
You must be signed in to change notification settings - Fork 9
/
extractor.go
100 lines (76 loc) · 2.27 KB
/
extractor.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
package srcfingerprint
import (
"bufio"
"encoding/json"
"fmt"
"os"
"os/exec"
"regexp"
log "github.com/sirupsen/logrus"
)
type BaseExtractor interface {
Next() (interface{}, error)
}
type GitFile struct {
Sha string `json:"sha"`
Type string `json:"type"`
Filepath string `json:"filepath"`
Size string `json:"size"`
}
func NewFastExtractor() *FastExtractor {
return &FastExtractor{make(chan *GitFile)}
}
// FastExtractor will directly extract the information without using an Analyzer
// There are designed to use raw git commands to get what is needed.
type FastExtractor struct {
ChanGitFiles chan *GitFile
}
func (fe *FastExtractor) Run(path string, after string) chan *GitFile {
log.Infof("Extracting commits from path %s\n", path)
cmdRevList := "git rev-list --objects --all"
if after != "" {
cmdRevList = fmt.Sprintf("git rev-list --objects --all --after '%s'", after)
}
cmdBase := cmdRevList + "| git cat-file --batch-check='{\"sha\": \"%(objectname)\", \"type\": \"%(objecttype)\", \"filepath\": \"%(rest)\", \"size\": \"%(objectsize)\"}' | grep '\"type\": \"blob\"'" //nolint
cmd := exec.Command("bash", "-c", cmdBase)
cmd.Dir = path
stdout, err := cmd.StdoutPipe()
if err != nil {
log.Fatalln(err)
}
err = cmd.Start()
if err != nil {
log.Fatal(err)
}
buf := bufio.NewReader(stdout) // Notice that this is not in a loop
num := 0
go func() {
for {
line, _, _ := buf.ReadLine()
if len(line) == 0 {
log.Infoln("finished reading all files from stdout from git")
break
}
num++
log.Debugf("parsing line %s", line)
var gitFile GitFile
// Replace backslashes by escaped backslashes
re := regexp.MustCompile(`\\\\(.)`)
cleanedLine := re.ReplaceAll(line, []byte("\\\\$1"))
err := json.Unmarshal(cleanedLine, &gitFile)
if err != nil {
// If an error occurs, print a warning and do nothing with the line
log.Warnln("Error while parsing", string(line), err)
} else {
fe.ChanGitFiles <- &gitFile
}
}
log.Infof("finished iterating over files, %d file(s) collected.\n", num)
if err := os.RemoveAll(path); err != nil {
log.Errorln("Unable to remove directory ", path)
}
log.Infof("Correctly removed cloned directory %s", path)
close(fe.ChanGitFiles)
}()
return fe.ChanGitFiles
}