Skip to content

Commit

Permalink
Sanitize URLs for bucket fetch error messages.
Browse files Browse the repository at this point in the history
Co-authored-by: Jagpreet Singh Tamber <[email protected]>
Signed-off-by: Stefan Prodan <[email protected]>
  • Loading branch information
2 people authored and stefanprodan committed Apr 4, 2024
1 parent 9a0f9af commit c564e7a
Show file tree
Hide file tree
Showing 3 changed files with 187 additions and 2 deletions.
4 changes: 2 additions & 2 deletions internal/controller/bucket_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -728,7 +728,7 @@ func fetchEtagIndex(ctx context.Context, provider BucketProvider, obj *bucketv1.
path := filepath.Join(tempDir, sourceignore.IgnoreFile)
if _, err := provider.FGetObject(ctxTimeout, obj.Spec.BucketName, sourceignore.IgnoreFile, path); err != nil {
if !provider.ObjectIsNotFound(err) {
return err
return fmt.Errorf("failed to get Etag for '%s' object: %w", sourceignore.IgnoreFile, serror.SanitizeError(err))
}
}
ps, err := sourceignore.ReadIgnoreFile(path, nil)
Expand Down Expand Up @@ -792,7 +792,7 @@ func fetchIndexFiles(ctx context.Context, provider BucketProvider, obj *bucketv1
index.Delete(k)
return nil
}
return fmt.Errorf("failed to get '%s' object: %w", k, err)
return fmt.Errorf("failed to get '%s' object: %w", k, serror.SanitizeError(err))
}
if t != etag {
index.Add(k, etag)
Expand Down
60 changes: 60 additions & 0 deletions internal/error/sanitized.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package error

import (
"fmt"
"net/url"
"regexp"
)

type SanitizedError struct {
err string
}

func (e SanitizedError) Error() string {
return e.err
}

// SanitizeError extracts all URLs from the error message
// and replaces them with the URL without the query string.
func SanitizeError(err error) SanitizedError {
errorMessage := err.Error()
for _, u := range extractURLs(errorMessage) {
urlWithoutQueryString, err := removeQueryString(u)
if err == nil {
re, err := regexp.Compile(fmt.Sprintf("%s*", regexp.QuoteMeta(u)))
if err == nil {
errorMessage = re.ReplaceAllString(errorMessage, urlWithoutQueryString)
}
}
}

return SanitizedError{errorMessage}
}

// removeQueryString takes a URL string as input and returns the URL without the query string.
func removeQueryString(urlStr string) (string, error) {
// Parse the URL.
u, err := url.Parse(urlStr)
if err != nil {
return "", err
}

// Rebuild the URL without the query string.
u.RawQuery = ""
return u.String(), nil
}

// extractURLs takes a log message as input and returns the URLs found.
func extractURLs(logMessage string) []string {
// Define a regular expression to match a URL.
// This is a simple pattern and might need to be adjusted depending on the log message format.
urlRegex := regexp.MustCompile(`https?://[^\s]+`)

// Find the first match in the log message.
matches := urlRegex.FindAllString(logMessage, -1)
if len(matches) == 0 {
return []string{}
}

return matches
}
125 changes: 125 additions & 0 deletions internal/error/sanitized_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
package error

import (
"errors"
"testing"

. "github.com/onsi/gomega"
)

func Test_extractURLs(t *testing.T) {

tests := []struct {
name string
logMessage string
wantUrls []string
}{
{
name: "Log Contains single URL",
logMessage: "Get \"https://blobstorage.blob.core.windows.net/container/index.yaml?se=2024-05-01T16%3A28%3A26Z&sig=Signature&sp=rl&sr=c&st=2024-02-01T16%3A28%3A26Z&sv=2022-11-02\": dial tcp 20.60.53.129:443: connect: connection refused",
wantUrls: []string{"https://blobstorage.blob.core.windows.net/container/index.yaml?se=2024-05-01T16%3A28%3A26Z&sig=Signature&sp=rl&sr=c&st=2024-02-01T16%3A28%3A26Z&sv=2022-11-02\":"},
},
{
name: "Log Contains multiple URL",
logMessage: "Get \"https://blobstorage.blob.core.windows.net/container/index.yaml?abc=es https://blobstorage1.blob.core.windows.net/container/index.yaml?abc=no : dial tcp 20.60.53.129:443: connect: connection refused",
wantUrls: []string{
"https://blobstorage.blob.core.windows.net/container/index.yaml?abc=es",
"https://blobstorage1.blob.core.windows.net/container/index.yaml?abc=no",
},
},
{
name: "Log Contains No URL",
logMessage: "Log message without URL",
wantUrls: []string{},
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
g := NewWithT(t)

urls := extractURLs(tt.logMessage)

g.Expect(len(urls)).To(Equal(len(tt.wantUrls)))
for i := range tt.wantUrls {
g.Expect(urls[i]).To(Equal(tt.wantUrls[i]))
}
})
}
}

func Test_removeQueryString(t *testing.T) {

tests := []struct {
name string
urlStr string
wantUrl string
}{
{
name: "URL with query string",
urlStr: "https://blobstorage.blob.core.windows.net/container/index.yaml?se=2024-05-01T16%3A28%3A26Z&sig=Signature&sp=rl&sr=c&st=2024-02-01T16%3A28%3A26Z&sv=2022-11-02",
wantUrl: "https://blobstorage.blob.core.windows.net/container/index.yaml",
},
{
name: "URL without query string",
urlStr: "https://blobstorage.blob.core.windows.net/container/index.yaml",
wantUrl: "https://blobstorage.blob.core.windows.net/container/index.yaml",
},
{
name: "URL with query string and port",
urlStr: "https://blobstorage.blob.core.windows.net:443/container/index.yaml?se=2024-05-01T16%3A28%3A26Z&sig=Signature&sp=rl&sr=c&st=2024-02-01T16%3A28%3A26Z&sv=2022-11-02",
wantUrl: "https://blobstorage.blob.core.windows.net:443/container/index.yaml",
},
{
name: "Invalid URL",
urlStr: "NoUrl",
wantUrl: "NoUrl",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
g := NewWithT(t)

urlWithoutQueryString, err := removeQueryString(tt.urlStr)

g.Expect(err).To(BeNil())
g.Expect(urlWithoutQueryString).To(Equal(tt.wantUrl))
})
}
}

func Test_SanitizeError(t *testing.T) {

tests := []struct {
name string
errMessage string
wantErrMessage string
}{
{
name: "Log message with URL with query string",
errMessage: "Get \"https://blobstorage.blob.core.windows.net/container/index.yaml?se=2024-05-01T16%3A28%3A26Z&sig=Signature&sp=rl&sr=c&st=2024-02-01T16%3A28%3A26Z&sv=2022-11-02\": dial tcp 20.60.53.129:443: connect: connection refused",
wantErrMessage: "Get \"https://blobstorage.blob.core.windows.net/container/index.yaml dial tcp 20.60.53.129:443: connect: connection refused",
},
{
name: "Log message without URL",
errMessage: "Log message contains no URL",
wantErrMessage: "Log message contains no URL",
},

{
name: "Log message with multiple Urls",
errMessage: "Get \"https://blobstorage.blob.core.windows.net/container/index.yaml?abc=es https://blobstorage1.blob.core.windows.net/container/index.yaml?abc=no dial tcp 20.60.53.129:443: connect: connection refused",
wantErrMessage: "Get \"https://blobstorage.blob.core.windows.net/container/index.yaml https://blobstorage1.blob.core.windows.net/container/index.yaml dial tcp 20.60.53.129:443: connect: connection refused",
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
g := NewWithT(t)

err := SanitizeError(errors.New(tt.errMessage))
g.Expect(err.Error()).To(Equal(tt.wantErrMessage))
})
}
}

0 comments on commit c564e7a

Please sign in to comment.