Skip to content

Commit

Permalink
fix github perf issue (#1421)
Browse files Browse the repository at this point in the history
Co-authored-by: Sandeep Singh <[email protected]>
  • Loading branch information
dogancanbakir and ehsandeep authored Nov 27, 2024
1 parent df17c01 commit 1d4a29c
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 25 deletions.
2 changes: 1 addition & 1 deletion v2/pkg/runner/enumerate.go
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ func (r *Runner) EnumerateSingleDomainWithCtx(ctx context.Context, domain string
for result := range passiveResults {
switch result.Type {
case subscraping.Error:
gologger.Warning().Msgf("Could not run source %s: %s\n", result.Source, result.Error)
gologger.Warning().Msgf("Encountered an error with source %s: %s\n", result.Source, result.Error)
case subscraping.Subdomain:
// Validate the subdomain found and remove wildcards from
if !strings.HasSuffix(result.Value, "."+domain) {
Expand Down
67 changes: 43 additions & 24 deletions v2/pkg/subscraping/sources/github/github.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"regexp"
"strconv"
"strings"
"sync"
"time"

jsoniter "github.com/json-iterator/go"
Expand Down Expand Up @@ -142,40 +143,58 @@ func (s *Source) enumerate(ctx context.Context, searchURL string, domainRegexp *

// proccesItems process github response items
func (s *Source) proccesItems(ctx context.Context, items []item, domainRegexp *regexp.Regexp, name string, session *subscraping.Session, results chan subscraping.Result) error {
for _, item := range items {
// find subdomains in code
resp, err := session.SimpleGet(ctx, rawURL(item.HTMLURL))
if err != nil {
if resp != nil && resp.StatusCode != http.StatusNotFound {
session.DiscardHTTPResponse(resp)
var wg sync.WaitGroup
errChan := make(chan error, len(items))

for _, responseItem := range items {
wg.Add(1)
go func(responseItem item) {
defer wg.Done()

// find subdomains in code
resp, err := session.SimpleGet(ctx, rawURL(responseItem.HTMLURL))
if err != nil {
if resp != nil && resp.StatusCode != http.StatusNotFound {
session.DiscardHTTPResponse(resp)
}
errChan <- err
return
}
return err
}

if resp.StatusCode == http.StatusOK {
scanner := bufio.NewScanner(resp.Body)
for scanner.Scan() {
line := scanner.Text()
if line == "" {
continue
if resp.StatusCode == http.StatusOK {
scanner := bufio.NewScanner(resp.Body)
for scanner.Scan() {
line := scanner.Text()
if line == "" {
continue
}
for _, subdomain := range domainRegexp.FindAllString(normalizeContent(line), -1) {
results <- subscraping.Result{Source: name, Type: subscraping.Subdomain, Value: subdomain}
s.results++
}
}
for _, subdomain := range domainRegexp.FindAllString(normalizeContent(line), -1) {
resp.Body.Close()
}

// find subdomains in text matches
for _, textMatch := range responseItem.TextMatches {
for _, subdomain := range domainRegexp.FindAllString(normalizeContent(textMatch.Fragment), -1) {
results <- subscraping.Result{Source: name, Type: subscraping.Subdomain, Value: subdomain}
s.results++

}
}
resp.Body.Close()
}
}(responseItem)
}

// find subdomains in text matches
for _, textMatch := range item.TextMatches {
for _, subdomain := range domainRegexp.FindAllString(normalizeContent(textMatch.Fragment), -1) {
results <- subscraping.Result{Source: name, Type: subscraping.Subdomain, Value: subdomain}
s.results++
}
wg.Wait()
close(errChan)

for err := range errChan {
if err != nil {
return err
}
}

return nil
}

Expand Down

0 comments on commit 1d4a29c

Please sign in to comment.