From 5ac45675603430e8b94548fa256078e86d226cf4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Do=C4=9Fan=20Can=20Bak=C4=B1r?= Date: Sat, 26 Oct 2024 23:33:48 +0300 Subject: [PATCH] fix github perf issue --- v2/pkg/runner/enumerate.go | 2 +- v2/pkg/subscraping/sources/github/github.go | 67 +++++++++++++-------- 2 files changed, 44 insertions(+), 25 deletions(-) diff --git a/v2/pkg/runner/enumerate.go b/v2/pkg/runner/enumerate.go index 59c5d5041..727359f50 100644 --- a/v2/pkg/runner/enumerate.go +++ b/v2/pkg/runner/enumerate.go @@ -61,7 +61,7 @@ func (r *Runner) EnumerateSingleDomainWithCtx(ctx context.Context, domain string for result := range passiveResults { switch result.Type { case subscraping.Error: - gologger.Warning().Msgf("Could not run source %s: %s\n", result.Source, result.Error) + gologger.Warning().Msgf("Encountered an error with source %s: %s\n", result.Source, result.Error) case subscraping.Subdomain: // Validate the subdomain found and remove wildcards from if !strings.HasSuffix(result.Value, "."+domain) { diff --git a/v2/pkg/subscraping/sources/github/github.go b/v2/pkg/subscraping/sources/github/github.go index e1a77d727..6034b2dbc 100644 --- a/v2/pkg/subscraping/sources/github/github.go +++ b/v2/pkg/subscraping/sources/github/github.go @@ -11,6 +11,7 @@ import ( "regexp" "strconv" "strings" + "sync" "time" jsoniter "github.com/json-iterator/go" @@ -142,40 +143,58 @@ func (s *Source) enumerate(ctx context.Context, searchURL string, domainRegexp * // proccesItems process github response items func (s *Source) proccesItems(ctx context.Context, items []item, domainRegexp *regexp.Regexp, name string, session *subscraping.Session, results chan subscraping.Result) error { - for _, item := range items { - // find subdomains in code - resp, err := session.SimpleGet(ctx, rawURL(item.HTMLURL)) - if err != nil { - if resp != nil && resp.StatusCode != http.StatusNotFound { - session.DiscardHTTPResponse(resp) + var wg sync.WaitGroup + errChan := make(chan error, len(items)) + + for _, responseItem := range items { + wg.Add(1) + go func(responseItem item) { + defer wg.Done() + + // find subdomains in code + resp, err := session.SimpleGet(ctx, rawURL(responseItem.HTMLURL)) + if err != nil { + if resp != nil && resp.StatusCode != http.StatusNotFound { + session.DiscardHTTPResponse(resp) + } + errChan <- err + return } - return err - } - if resp.StatusCode == http.StatusOK { - scanner := bufio.NewScanner(resp.Body) - for scanner.Scan() { - line := scanner.Text() - if line == "" { - continue + if resp.StatusCode == http.StatusOK { + scanner := bufio.NewScanner(resp.Body) + for scanner.Scan() { + line := scanner.Text() + if line == "" { + continue + } + for _, subdomain := range domainRegexp.FindAllString(normalizeContent(line), -1) { + results <- subscraping.Result{Source: name, Type: subscraping.Subdomain, Value: subdomain} + s.results++ + } } - for _, subdomain := range domainRegexp.FindAllString(normalizeContent(line), -1) { + resp.Body.Close() + } + + // find subdomains in text matches + for _, textMatch := range responseItem.TextMatches { + for _, subdomain := range domainRegexp.FindAllString(normalizeContent(textMatch.Fragment), -1) { results <- subscraping.Result{Source: name, Type: subscraping.Subdomain, Value: subdomain} s.results++ - } } - resp.Body.Close() - } + }(responseItem) + } - // find subdomains in text matches - for _, textMatch := range item.TextMatches { - for _, subdomain := range domainRegexp.FindAllString(normalizeContent(textMatch.Fragment), -1) { - results <- subscraping.Result{Source: name, Type: subscraping.Subdomain, Value: subdomain} - s.results++ - } + wg.Wait() + close(errChan) + + for err := range errChan { + if err != nil { + return err } } + return nil }