From 0359bb590c19458d5ab824904b241da5cb782a4c Mon Sep 17 00:00:00 2001 From: Icaro Motta Date: Wed, 24 May 2023 16:08:03 -0300 Subject: [PATCH 01/12] Unfurl more websites --- protocol/linkpreview/linkpreview.go | 160 ++++++++++++++++++----- protocol/linkpreview/linkpreview_test.go | 88 ++++++++++--- 2 files changed, 199 insertions(+), 49 deletions(-) diff --git a/protocol/linkpreview/linkpreview.go b/protocol/linkpreview/linkpreview.go index 52888ea6837..ce2d49d7d86 100644 --- a/protocol/linkpreview/linkpreview.go +++ b/protocol/linkpreview/linkpreview.go @@ -2,11 +2,13 @@ package linkpreview import ( "context" + "encoding/json" "errors" "fmt" "io/ioutil" "net/http" neturl "net/url" + "regexp" "strings" "time" @@ -47,9 +49,10 @@ type Unfurler interface { const ( requestTimeout = 15000 * time.Millisecond - // Certain websites return an HTML error page if the user agent is unknown to - // them, e.g. IMDb. - defaultUserAgent = "Mozilla/5.0 (X11; Linux x86_64; rv:109.0) Gecko/20100101 Firefox/109.0" + // Without an user agent, many providers treat status-go as a gluttony bot, + // and either respond more frequently with a 429 (Too Many Requests), or + // simply refuse to return valid data. + defaultUserAgent = "status-go/v0.151.15" // Currently set to English, but we could make this setting dynamic according // to the user's language of choice. @@ -98,20 +101,6 @@ func newDefaultLinkPreview(url *neturl.URL) common.LinkPreview { } } -func httpGETForOpenGraph(url string) (*http.Response, context.CancelFunc, error) { - ctx, cancel := context.WithTimeout(context.Background(), requestTimeout) - - req, err := http.NewRequestWithContext(ctx, "GET", url, nil) - if err != nil { - return nil, cancel, err - } - req.Header.Set("User-Agent", defaultUserAgent) - req.Header.Set("Accept-Language", defaultAcceptLanguage) - - res, err := httpClient.Do(req) - return res, cancel, err -} - func fetchThumbnail(logger *zap.Logger, url string) (common.LinkPreviewThumbnail, error) { var thumbnail common.LinkPreviewThumbnail @@ -136,16 +125,101 @@ func fetchThumbnail(logger *zap.Logger, url string) (common.LinkPreviewThumbnail return thumbnail, nil } +type OEmbedUnfurler struct { + logger *zap.Logger + // oembedEndpoint describes where the consumer may request representations for + // the supported URL scheme. For example, for YouTube, it is + // https://www.youtube.com/oembed. + oembedEndpoint string + // url is the actual URL to be unfurled. + url neturl.URL +} + +type OEmbedResponse struct { + Title string `json:"title"` + ThumbnailURL string `json:"thumbnail_url"` +} + +func (u OEmbedUnfurler) unfurl(url *neturl.URL) (common.LinkPreview, error) { + preview := newDefaultLinkPreview(url) + + requestURL, err := neturl.Parse(u.oembedEndpoint) + if err != nil { + return preview, err + } + + // When format is specified, the provider MUST return data in the request + // format, else return an error. + requestURL.RawQuery = neturl.Values{ + "url": {u.url.String()}, + "format": {"json"}, + }.Encode() + + ctx, cancel := context.WithTimeout(context.Background(), requestTimeout) + defer cancel() + req, err := http.NewRequestWithContext(ctx, "GET", requestURL.String(), nil) + if err != nil { + return preview, err + } + req.Header.Set("accept", "application/json; charset=utf-8") + req.Header.Set("accept-language", defaultAcceptLanguage) + req.Header.Set("user-agent", defaultUserAgent) + + res, err := httpClient.Do(req) + defer func() { + if res != nil { + if err = res.Body.Close(); err != nil { + u.logger.Error("failed to close response body", zap.Error(err)) + } + } + }() + if err != nil { + return preview, UnfurlError{ + msg: "failed to get oEmbed", + url: u.url.String(), + err: err, + } + } + + if res.StatusCode >= http.StatusBadRequest { + return preview, UnfurlError{ + msg: fmt.Sprintf("failed to fetch oEmbed metadata, statusCode='%d'", res.StatusCode), + url: u.url.String(), + err: nil, + } + } + + var oembedResponse OEmbedResponse + oembedBytes, err := ioutil.ReadAll(res.Body) + if err != nil { + return preview, err + } + err = json.Unmarshal(oembedBytes, &oembedResponse) + if err != nil { + return preview, err + } + + if oembedResponse.Title == "" { + return preview, UnfurlError{ + msg: "missing title", + url: url.String(), + err: errors.New(""), + } + } + + preview.Title = oembedResponse.Title + return preview, nil +} + type OpenGraphMetadata struct { Title string `json:"title" meta:"og:title"` Description string `json:"description" meta:"og:description"` ThumbnailURL string `json:"thumbnailUrl" meta:"og:image"` } -// OpenGraphUnfurler can be used either as the default unfurler for some websites -// (e.g. GitHub), or as a fallback strategy. It parses HTML and extract -// OpenGraph meta tags. If an oEmbed endpoint is available, it should be -// preferred. +// OpenGraphUnfurler should be preferred over OEmbedUnfurler because oEmbed +// gives back a JSON response with a "html" field that's supposed to be embedded +// in an iframe (hardly useful for existing Status' clients). type OpenGraphUnfurler struct { logger *zap.Logger } @@ -153,8 +227,17 @@ type OpenGraphUnfurler struct { func (u OpenGraphUnfurler) unfurl(url *neturl.URL) (common.LinkPreview, error) { preview := newDefaultLinkPreview(url) - res, cancel, err := httpGETForOpenGraph(url.String()) + ctx, cancel := context.WithTimeout(context.Background(), requestTimeout) defer cancel() + req, err := http.NewRequestWithContext(ctx, "GET", url.String(), nil) + if err != nil { + return preview, err + } + req.Header.Set("accept", "text/html; charset=utf-8") + req.Header.Set("accept-language", defaultAcceptLanguage) + req.Header.Set("user-agent", defaultUserAgent) + + res, err := httpClient.Do(req) defer func() { if res != nil { if err = res.Body.Close(); err != nil { @@ -170,15 +253,11 @@ func (u OpenGraphUnfurler) unfurl(url *neturl.URL) (common.LinkPreview, error) { } } - // Behave like WhatsApp, i.e. if the response is a 404, consider the URL - // unfurleable. We can try to unfurl from the 404 HTML, which works well for - // certain websites, like GitHub, but it also potentially confuses users - // because they'll be sharing previews that don't match the actual URLs. - if res.StatusCode == http.StatusNotFound { + if res.StatusCode >= http.StatusBadRequest { return preview, UnfurlError{ - msg: "could not find page", + msg: fmt.Sprintf("failed to fetch OpenGraph metadata, statusCode='%d'", res.StatusCode), url: url.String(), - err: errors.New(""), + err: nil, } } @@ -219,10 +298,23 @@ func (u OpenGraphUnfurler) unfurl(url *neturl.URL) (common.LinkPreview, error) { return preview, nil } +func normalizeHostname(hostname string) string { + hostname = strings.ToLower(hostname) + re := regexp.MustCompile(`^www\.(.*)$`) + return re.ReplaceAllString(hostname, "$1") +} + func newUnfurler(logger *zap.Logger, url *neturl.URL) Unfurler { - u := new(OpenGraphUnfurler) - u.logger = logger - return u + switch normalizeHostname(url.Hostname()) { + case "reddit.com": + return OEmbedUnfurler{ + oembedEndpoint: "https://www.reddit.com/oembed", + url: *url, + logger: logger, + } + default: + return OpenGraphUnfurler{logger: logger} + } } func unfurl(logger *zap.Logger, url string) (common.LinkPreview, error) { @@ -264,6 +356,10 @@ func parseValidURL(rawURL string) (*neturl.URL, error) { } // GetURLs returns only what we consider unfurleable URLs. +// +// If we wanted to be extra precise and help improve UX, we could ignore URLs +// that we know can't be unfurled. This is at least possible with the oEmbed +// protocol because providers must specify an endpoint scheme. func GetURLs(text string) []string { parsedText := markdown.Parse([]byte(text), nil) visitor := common.RunLinksVisitor(parsedText) diff --git a/protocol/linkpreview/linkpreview_test.go b/protocol/linkpreview/linkpreview_test.go index 39c52e207ba..b6d8d19e42e 100644 --- a/protocol/linkpreview/linkpreview_test.go +++ b/protocol/linkpreview/linkpreview_test.go @@ -89,25 +89,12 @@ func TestGetLinks(t *testing.T) { } func TestUnfurlURLs(t *testing.T) { - examples := []struct { + type Example struct { url string expected common.LinkPreview - }{ - { - url: "https://github.com/", - expected: common.LinkPreview{ - Description: "GitHub is where over 100 million developers shape the future of software, together. Contribute to the open source community, manage your Git repositories, review code like a pro, track bugs and fea...", - Hostname: "github.com", - Title: "GitHub: Let’s build from here", - URL: "https://github.com/", - Thumbnail: common.LinkPreviewThumbnail{ - Width: 1200, - Height: 630, - URL: "", - DataURI: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAABLAAAAJ2CAMAAAB4", - }, - }, - }, + } + + examples := []Example{ { url: "https://github.com/status-im/status-mobile/issues/15469", expected: common.LinkPreview{ @@ -138,6 +125,31 @@ func TestUnfurlURLs(t *testing.T) { }, }, }, + { + url: "https://www.reddit.com/r/Bitcoin/comments/13j0tzr/the_best_bitcoin_explanation_of_all_times/?utm_source=share", + expected: common.LinkPreview{ + Description: "", + Hostname: "www.reddit.com", + Title: "The best bitcoin explanation of all times.", + URL: "https://www.reddit.com/r/Bitcoin/comments/13j0tzr/the_best_bitcoin_explanation_of_all_times/?utm_source=share", + Thumbnail: common.LinkPreviewThumbnail{}, + }, + }, + { + url: "https://open.spotify.com/album/0Wn5sHYtC7vPPX0n2AVJmF?si=iXmxsFJyQ62F2yMElt086A", + expected: common.LinkPreview{ + Description: "SadSvit · Album · 2021 · 8 songs.", + Hostname: "open.spotify.com", + Title: "Cassette", + URL: "https://open.spotify.com/album/0Wn5sHYtC7vPPX0n2AVJmF?si=iXmxsFJyQ62F2yMElt086A", + Thumbnail: common.LinkPreviewThumbnail{ + Width: 640, + Height: 640, + URL: "", + DataURI: "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQEBLAEsAAD//gA8Q1JFQVRPUjogZ2QtanBlZyB2MS4wICh1c2luZyBJSkcgS", + }, + }, + }, { url: "https://www.youtube.com/watch?v=lE4UXdJSJM4", expected: common.LinkPreview{ @@ -152,6 +164,48 @@ func TestUnfurlURLs(t *testing.T) { }, }, }, + { + url: "https://music.youtube.com/watch?v=1TTAXENxbM0", + expected: common.LinkPreview{ + URL: "https://music.youtube.com/watch?v=1TTAXENxbM0", + Hostname: "music.youtube.com", + Title: "Telegraph Road - YouTube Music", + Description: "Provided to YouTube by Universal Music Group Telegraph Road · Dire Straits Love Over Gold ℗ 1982 Mercury Records Limited Released on: 1982-01-01 Produce...", + Thumbnail: common.LinkPreviewThumbnail{ + Width: 1280, + Height: 720, + DataURI: "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wCEAAUDBAg", + }, + }, + }, + { + url: "https://media.giphy.com/media/dTlyIvBdEzIQM/giphy.gif", + expected: common.LinkPreview{ + URL: "https://media.giphy.com/media/dTlyIvBdEzIQM/giphy.gif", + Hostname: "media.giphy.com", + Title: "Cat GIF - Find & Share on GIPHY", + Description: "Discover & share this Cat GIF with everyone you know. GIPHY is how you search, share, discover, and create GIFs.", + Thumbnail: common.LinkPreviewThumbnail{ + Width: 339, + Height: 200, + DataURI: "data:image/gif;base64,R0lGODlhUwHIAPf/ACslIC4oIzEqIjIqJTMsKDQw", + }, + }, + }, + { + url: "https://t.co/TLEVzWCTkV", + expected: common.LinkPreview{ + URL: "https://t.co/TLEVzWCTkV", + Hostname: "t.co", + Title: "Casting announced for The Way, the bold new BBC drama from Michael Sheen, James Graham and Adam Curtis", + Description: "Steffan Rhodri, Mali Harries, Sophie Melville and Callum Scott Howells lead the cast, Luke Evans also stars", + Thumbnail: common.LinkPreviewThumbnail{ + Width: 1920, + Height: 1080, + DataURI: "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQEASABIAAD/2wBDAAUEBAU", + }, + }, + }, } var urls []string From 08451151d327a940a5c29d3efc32b3de1e710f6e Mon Sep 17 00:00:00 2001 From: Icaro Motta Date: Thu, 25 May 2023 10:03:29 -0300 Subject: [PATCH 02/12] Refactor tests to fail/pass deterministically --- protocol/linkpreview/linkpreview.go | 52 ++-- protocol/linkpreview/linkpreview_test.go | 311 +++++++++++++---------- protocol/messenger.go | 2 +- 3 files changed, 204 insertions(+), 161 deletions(-) diff --git a/protocol/linkpreview/linkpreview.go b/protocol/linkpreview/linkpreview.go index ce2d49d7d86..ec79d2177dd 100644 --- a/protocol/linkpreview/linkpreview.go +++ b/protocol/linkpreview/linkpreview.go @@ -47,7 +47,7 @@ type Unfurler interface { } const ( - requestTimeout = 15000 * time.Millisecond + defaultRequestTimeout = 15000 * time.Millisecond // Without an user agent, many providers treat status-go as a gluttony bot, // and either respond more frequently with a 429 (Too Many Requests), or @@ -59,12 +59,8 @@ const ( defaultAcceptLanguage = "en-US,en;q=0.5" ) -var ( - httpClient = http.Client{Timeout: requestTimeout} -) - -func fetchResponseBody(logger *zap.Logger, url string) ([]byte, error) { - ctx, cancel := context.WithTimeout(context.Background(), requestTimeout) +func fetchResponseBody(logger *zap.Logger, httpClient http.Client, url string) ([]byte, error) { + ctx, cancel := context.WithTimeout(context.Background(), defaultRequestTimeout) defer cancel() req, err := http.NewRequestWithContext(ctx, "GET", url, nil) @@ -101,10 +97,10 @@ func newDefaultLinkPreview(url *neturl.URL) common.LinkPreview { } } -func fetchThumbnail(logger *zap.Logger, url string) (common.LinkPreviewThumbnail, error) { +func fetchThumbnail(logger *zap.Logger, httpClient http.Client, url string) (common.LinkPreviewThumbnail, error) { var thumbnail common.LinkPreviewThumbnail - imgBytes, err := fetchResponseBody(logger, url) + imgBytes, err := fetchResponseBody(logger, httpClient, url) if err != nil { return thumbnail, fmt.Errorf("could not fetch thumbnail: %w", err) } @@ -126,7 +122,8 @@ func fetchThumbnail(logger *zap.Logger, url string) (common.LinkPreviewThumbnail } type OEmbedUnfurler struct { - logger *zap.Logger + logger *zap.Logger + httpClient http.Client // oembedEndpoint describes where the consumer may request representations for // the supported URL scheme. For example, for YouTube, it is // https://www.youtube.com/oembed. @@ -148,14 +145,14 @@ func (u OEmbedUnfurler) unfurl(url *neturl.URL) (common.LinkPreview, error) { return preview, err } - // When format is specified, the provider MUST return data in the request + // When format is specified, the provider MUST return data in the requested // format, else return an error. requestURL.RawQuery = neturl.Values{ "url": {u.url.String()}, "format": {"json"}, }.Encode() - ctx, cancel := context.WithTimeout(context.Background(), requestTimeout) + ctx, cancel := context.WithTimeout(context.Background(), defaultRequestTimeout) defer cancel() req, err := http.NewRequestWithContext(ctx, "GET", requestURL.String(), nil) if err != nil { @@ -165,7 +162,7 @@ func (u OEmbedUnfurler) unfurl(url *neturl.URL) (common.LinkPreview, error) { req.Header.Set("accept-language", defaultAcceptLanguage) req.Header.Set("user-agent", defaultUserAgent) - res, err := httpClient.Do(req) + res, err := u.httpClient.Do(req) defer func() { if res != nil { if err = res.Body.Close(); err != nil { @@ -221,13 +218,14 @@ type OpenGraphMetadata struct { // gives back a JSON response with a "html" field that's supposed to be embedded // in an iframe (hardly useful for existing Status' clients). type OpenGraphUnfurler struct { - logger *zap.Logger + logger *zap.Logger + httpClient http.Client } func (u OpenGraphUnfurler) unfurl(url *neturl.URL) (common.LinkPreview, error) { preview := newDefaultLinkPreview(url) - ctx, cancel := context.WithTimeout(context.Background(), requestTimeout) + ctx, cancel := context.WithTimeout(context.Background(), defaultRequestTimeout) defer cancel() req, err := http.NewRequestWithContext(ctx, "GET", url.String(), nil) if err != nil { @@ -237,7 +235,7 @@ func (u OpenGraphUnfurler) unfurl(url *neturl.URL) (common.LinkPreview, error) { req.Header.Set("accept-language", defaultAcceptLanguage) req.Header.Set("user-agent", defaultUserAgent) - res, err := httpClient.Do(req) + res, err := u.httpClient.Do(req) defer func() { if res != nil { if err = res.Body.Close(); err != nil { @@ -283,7 +281,7 @@ func (u OpenGraphUnfurler) unfurl(url *neturl.URL) (common.LinkPreview, error) { } if ogMetadata.ThumbnailURL != "" { - t, err := fetchThumbnail(u.logger, ogMetadata.ThumbnailURL) + t, err := fetchThumbnail(u.logger, u.httpClient, ogMetadata.ThumbnailURL) if err != nil { // Given we want to fetch thumbnails on a best-effort basis, if an error // happens we simply log it. @@ -304,20 +302,24 @@ func normalizeHostname(hostname string) string { return re.ReplaceAllString(hostname, "$1") } -func newUnfurler(logger *zap.Logger, url *neturl.URL) Unfurler { +func newUnfurler(logger *zap.Logger, httpClient http.Client, url *neturl.URL) Unfurler { switch normalizeHostname(url.Hostname()) { case "reddit.com": return OEmbedUnfurler{ oembedEndpoint: "https://www.reddit.com/oembed", url: *url, logger: logger, + httpClient: httpClient, } default: - return OpenGraphUnfurler{logger: logger} + return OpenGraphUnfurler{ + logger: logger, + httpClient: httpClient, + } } } -func unfurl(logger *zap.Logger, url string) (common.LinkPreview, error) { +func unfurl(logger *zap.Logger, httpClient http.Client, url string) (common.LinkPreview, error) { var preview common.LinkPreview parsedURL, err := neturl.Parse(url) @@ -325,7 +327,7 @@ func unfurl(logger *zap.Logger, url string) (common.LinkPreview, error) { return preview, err } - unfurler := newUnfurler(logger, parsedURL) + unfurler := newUnfurler(logger, httpClient, parsedURL) preview, err = unfurler.unfurl(parsedURL) if err != nil { return preview, err @@ -393,9 +395,13 @@ func GetURLs(text string) []string { return urls } +func NewDefaultHTTPClient() http.Client { + return http.Client{Timeout: defaultRequestTimeout} +} + // UnfurlURLs assumes clients pass URLs verbatim that were validated and // processed by GetURLs. -func UnfurlURLs(logger *zap.Logger, urls []string) ([]common.LinkPreview, error) { +func UnfurlURLs(logger *zap.Logger, httpClient http.Client, urls []string) ([]common.LinkPreview, error) { var err error if logger == nil { logger, err = zap.NewDevelopment() @@ -407,7 +413,7 @@ func UnfurlURLs(logger *zap.Logger, urls []string) ([]common.LinkPreview, error) previews := make([]common.LinkPreview, 0, len(urls)) for _, url := range urls { - p, err := unfurl(logger, url) + p, err := unfurl(logger, httpClient, url) if err != nil { if unfurlErr, ok := err.(UnfurlError); ok { logger.Info("failed to unfurl", zap.Error(unfurlErr)) diff --git a/protocol/linkpreview/linkpreview_test.go b/protocol/linkpreview/linkpreview_test.go index b6d8d19e42e..67ee50ac7c1 100644 --- a/protocol/linkpreview/linkpreview_test.go +++ b/protocol/linkpreview/linkpreview_test.go @@ -1,14 +1,81 @@ package linkpreview import ( + "bytes" + "fmt" + "io/ioutil" "math" + "net/http" + "regexp" "testing" + "time" "github.com/stretchr/testify/require" "github.com/status-im/status-go/protocol/common" ) +// StubMatcher should either return an http.Response or nil in case the request +// doesn't match. +type StubMatcher func(req *http.Request) *http.Response + +type StubTransport struct { + // fallbackToDefaultTransport when true will make the transport use + // http.DefaultTransport in case no matcher is found. + fallbackToDefaultTransport bool + // disabledStubs when true, will skip all matchers and use + // http.DefaultTransport. + // + // Useful while testing to toggle between the original and stubbed responses. + disabledStubs bool + // matchers are http.RoundTripper functions. + matchers []StubMatcher +} + +// RoundTrip returns a stubbed response if any matcher returns a non-nil +// http.Response. If no matcher is found and fallbackToDefaultTransport is true, +// then it executes the HTTP request using the default http transport. +// +// If StubTransport#disabledStubs is true, the default http transport is used. +func (t *StubTransport) RoundTrip(req *http.Request) (*http.Response, error) { + if t.disabledStubs { + return http.DefaultTransport.RoundTrip(req) + } + + for _, matcher := range t.matchers { + res := matcher(req) + if res != nil { + return res, nil + } + } + + if t.fallbackToDefaultTransport { + return http.DefaultTransport.RoundTrip(req) + } + + return nil, fmt.Errorf("no HTTP matcher found") +} + +// Add a matcher based on a URL regexp. If a given request URL matches the +// regexp, then responseBody will be returned with a hardcoded 200 status code. +func (st *StubTransport) AddURLMatcher(urlRegexp string, responseBody []byte) { + matcher := func(req *http.Request) *http.Response { + rx, err := regexp.Compile(regexp.QuoteMeta(urlRegexp)) + if err != nil { + return nil + } + if rx.MatchString(req.URL.String()) { + return &http.Response{ + StatusCode: http.StatusOK, + Body: ioutil.NopCloser(bytes.NewBuffer(responseBody)), + } + } + return nil + } + + st.matchers = append(st.matchers, matcher) +} + // assertContainsLongString verifies if actual contains a slice of expected and // correctly prints the cause of the failure. The default behavior of // require.Contains with long strings is to not print the formatted message @@ -37,7 +104,7 @@ func assertContainsLongString(t *testing.T, expected string, actual string, maxL ) } -func TestGetLinks(t *testing.T) { +func Test_GetLinks(t *testing.T) { examples := []struct { args string expected []string @@ -88,160 +155,130 @@ func TestGetLinks(t *testing.T) { } } -func TestUnfurlURLs(t *testing.T) { - type Example struct { - url string - expected common.LinkPreview - } +func readAsset(t *testing.T, filename string) []byte { + b, err := ioutil.ReadFile("../../_assets/tests/" + filename) + require.NoError(t, err) + return b +} - examples := []Example{ - { - url: "https://github.com/status-im/status-mobile/issues/15469", - expected: common.LinkPreview{ - Description: "Designs https://www.figma.com/file/wA8Epdki2OWa8Vr067PCNQ/Composer-for-Mobile?node-id=2102-232933&t=tTYKjMpICnzwF5Zv-0 Out of scope Enable link previews (we can assume for now that is always on) Mu...", - Hostname: "github.com", - Title: "Allow users to customize links · Issue #15469 · status-im/status-mobile", - URL: "https://github.com/status-im/status-mobile/issues/15469", - Thumbnail: common.LinkPreviewThumbnail{ - Width: 1200, - Height: 600, - URL: "", - DataURI: "data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAABLAAAAJYCAYAAABy", - }, - }, - }, - { - url: "https://www.imdb.com/title/tt0117500/", - expected: common.LinkPreview{ - Description: "The Rock: Directed by Michael Bay. With Sean Connery, Nicolas Cage, Ed Harris, John Spencer. A mild-mannered chemist and an ex-con must lead the counterstrike when a rogue group of military men, led by a renegade general, threaten a nerve gas attack from Alcatraz against San Francisco.", - Hostname: "www.imdb.com", - Title: "The Rock (1996) - IMDb", - URL: "https://www.imdb.com/title/tt0117500/", - Thumbnail: common.LinkPreviewThumbnail{ - Width: 1000, - Height: 1481, - URL: "", - DataURI: "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wCEAAkJCgg", - }, - }, - }, - { - url: "https://www.reddit.com/r/Bitcoin/comments/13j0tzr/the_best_bitcoin_explanation_of_all_times/?utm_source=share", - expected: common.LinkPreview{ - Description: "", - Hostname: "www.reddit.com", - Title: "The best bitcoin explanation of all times.", - URL: "https://www.reddit.com/r/Bitcoin/comments/13j0tzr/the_best_bitcoin_explanation_of_all_times/?utm_source=share", - Thumbnail: common.LinkPreviewThumbnail{}, - }, - }, - { - url: "https://open.spotify.com/album/0Wn5sHYtC7vPPX0n2AVJmF?si=iXmxsFJyQ62F2yMElt086A", - expected: common.LinkPreview{ - Description: "SadSvit · Album · 2021 · 8 songs.", - Hostname: "open.spotify.com", - Title: "Cassette", - URL: "https://open.spotify.com/album/0Wn5sHYtC7vPPX0n2AVJmF?si=iXmxsFJyQ62F2yMElt086A", - Thumbnail: common.LinkPreviewThumbnail{ - Width: 640, - Height: 640, - URL: "", - DataURI: "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQEBLAEsAAD//gA8Q1JFQVRPUjogZ2QtanBlZyB2MS4wICh1c2luZyBJSkcgS", - }, - }, - }, - { - url: "https://www.youtube.com/watch?v=lE4UXdJSJM4", - expected: common.LinkPreview{ - URL: "https://www.youtube.com/watch?v=lE4UXdJSJM4", - Hostname: "www.youtube.com", - Title: "Interview with a GNU/Linux user - Partition 1", - Description: "GNU/Linux Operating SystemInterview with a GNU/Linux user with Richie Guix - aired on © The GNU Linux.Programmer humorLinux humorProgramming jokesProgramming...", - Thumbnail: common.LinkPreviewThumbnail{ - Width: 1280, - Height: 720, - DataURI: "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wCEAAUDBA8", - }, - }, - }, - { - url: "https://music.youtube.com/watch?v=1TTAXENxbM0", - expected: common.LinkPreview{ - URL: "https://music.youtube.com/watch?v=1TTAXENxbM0", - Hostname: "music.youtube.com", - Title: "Telegraph Road - YouTube Music", - Description: "Provided to YouTube by Universal Music Group Telegraph Road · Dire Straits Love Over Gold ℗ 1982 Mercury Records Limited Released on: 1982-01-01 Produce...", - Thumbnail: common.LinkPreviewThumbnail{ - Width: 1280, - Height: 720, - DataURI: "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wCEAAUDBAg", - }, - }, - }, - { - url: "https://media.giphy.com/media/dTlyIvBdEzIQM/giphy.gif", - expected: common.LinkPreview{ - URL: "https://media.giphy.com/media/dTlyIvBdEzIQM/giphy.gif", - Hostname: "media.giphy.com", - Title: "Cat GIF - Find & Share on GIPHY", - Description: "Discover & share this Cat GIF with everyone you know. GIPHY is how you search, share, discover, and create GIFs.", - Thumbnail: common.LinkPreviewThumbnail{ - Width: 339, - Height: 200, - DataURI: "data:image/gif;base64,R0lGODlhUwHIAPf/ACslIC4oIzEqIjIqJTMsKDQw", - }, - }, - }, - { - url: "https://t.co/TLEVzWCTkV", - expected: common.LinkPreview{ - URL: "https://t.co/TLEVzWCTkV", - Hostname: "t.co", - Title: "Casting announced for The Way, the bold new BBC drama from Michael Sheen, James Graham and Adam Curtis", - Description: "Steffan Rhodri, Mali Harries, Sophie Melville and Callum Scott Howells lead the cast, Luke Evans also stars", - Thumbnail: common.LinkPreviewThumbnail{ - Width: 1920, - Height: 1080, - DataURI: "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQEASABIAAD/2wBDAAUEBAU", - }, - }, +func Test_UnfurlURLs_YouTube(t *testing.T) { + url := "https://www.youtube.com/watch?v=lE4UXdJSJM4" + thumbnailURL := "https://i.ytimg.com/vi/lE4UXdJSJM4/maxresdefault.jpg" + expected := common.LinkPreview{ + URL: url, + Hostname: "www.youtube.com", + Title: "Interview with a GNU/Linux user - Partition 1", + Description: "GNU/Linux Operating SystemInterview with a GNU/Linux user with Richie Guix - aired on © The GNU Linux.Programmer humorLinux humorProgramming jokesProgramming...", + Thumbnail: common.LinkPreviewThumbnail{ + Width: 1, + Height: 1, + DataURI: "data:image/webp;base64,UklGRiQAAABXRUJQVlA4IBgAAAAwAQCdASoBAAEAAQAaJaQAA3AA/vpMgAA", }, } - var urls []string - for _, e := range examples { - urls = append(urls, e.url) + disabledStubs := false + if disabledStubs { + expected.Thumbnail.Width = 1280 + expected.Thumbnail.Height = 720 + expected.Thumbnail.DataURI = "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wCEAAUDBA8" } + transport := StubTransport{disabledStubs: disabledStubs} + transport.AddURLMatcher( + url, + []byte(fmt.Sprintf(` + + + + + + + + `, thumbnailURL)), + ) + transport.AddURLMatcher(thumbnailURL, readAsset(t, "1.jpg")) + stubbedClient := http.Client{Transport: &transport} - links, err := UnfurlURLs(nil, urls) + previews, err := UnfurlURLs(nil, stubbedClient, []string{url}) require.NoError(t, err) - require.Len(t, links, len(examples), "all URLs should have been unfurled successfully") - - for i, link := range links { - e := examples[i] - require.Equal(t, e.expected.URL, link.URL, e.url) - require.Equal(t, e.expected.Hostname, link.Hostname, e.url) - require.Equal(t, e.expected.Title, link.Title, e.url) - require.Equal(t, e.expected.Description, link.Description, e.url) - - require.Equal(t, e.expected.Thumbnail.Width, link.Thumbnail.Width, e.url) - require.Equal(t, e.expected.Thumbnail.Height, link.Thumbnail.Height, e.url) - require.Equal(t, e.expected.Thumbnail.URL, link.Thumbnail.URL, e.url) - assertContainsLongString(t, e.expected.Thumbnail.DataURI, link.Thumbnail.DataURI, 100) + require.Len(t, previews, 1) + preview := previews[0] + + require.Equal(t, expected.URL, preview.URL) + require.Equal(t, expected.Hostname, preview.Hostname) + require.Equal(t, expected.Title, preview.Title) + require.Equal(t, expected.Description, preview.Description) + require.Equal(t, expected.Thumbnail.Width, preview.Thumbnail.Width) + require.Equal(t, expected.Thumbnail.Height, preview.Thumbnail.Height) + require.Equal(t, expected.Thumbnail.URL, preview.Thumbnail.URL) + assertContainsLongString(t, expected.Thumbnail.DataURI, preview.Thumbnail.DataURI, 100) +} + +func Test_UnfurlURLs_Reddit(t *testing.T) { + url := "https://www.reddit.com/r/Bitcoin/comments/13j0tzr/the_best_bitcoin_explanation_of_all_times/?utm_source=share" + expected := common.LinkPreview{ + URL: url, + Hostname: "www.reddit.com", + Title: "The best bitcoin explanation of all times.", + Description: "", + Thumbnail: common.LinkPreviewThumbnail{}, } + transport := StubTransport{} + transport.AddURLMatcher( + "https://www.reddit.com/oembed", + []byte(` + { + "provider_url": "https://www.reddit.com/", + "version": "1.0", + "title": "The best bitcoin explanation of all times.", + "provider_name": "reddit", + "type": "rich", + "author_name": "DTheDev" + } + `), + ) + stubbedClient := http.Client{Transport: &transport} + + previews, err := UnfurlURLs(nil, stubbedClient, []string{url}) + require.NoError(t, err) + require.Len(t, previews, 1) + preview := previews[0] + + require.Equal(t, expected.URL, preview.URL) + require.Equal(t, expected.Hostname, preview.Hostname) + require.Equal(t, expected.Title, preview.Title) + require.Equal(t, expected.Description, preview.Description) + require.Equal(t, expected.Thumbnail, preview.Thumbnail) +} + +func Test_UnfurlURLs_Timeout(t *testing.T) { + httpClient := http.Client{Timeout: time.Nanosecond} + previews, err := UnfurlURLs(nil, httpClient, []string{"https://status.im"}) + require.NoError(t, err) + require.Empty(t, previews) +} + +func Test_UnfurlURLs_CommonFailures(t *testing.T) { + httpClient := http.Client{} + // Test URL that doesn't return any OpenGraph title. - previews, err := UnfurlURLs(nil, []string{"https://wikipedia.org"}) + transport := StubTransport{} + transport.AddURLMatcher( + "https://wikipedia.org", + []byte(""), + ) + stubbedClient := http.Client{Transport: &transport} + previews, err := UnfurlURLs(nil, stubbedClient, []string{"https://wikipedia.org"}) require.NoError(t, err) require.Empty(t, previews) // Test 404. - previews, err = UnfurlURLs(nil, []string{"https://github.com/status-im/i_do_not_exist"}) + previews, err = UnfurlURLs(nil, httpClient, []string{"https://github.com/status-im/i_do_not_exist"}) require.NoError(t, err) require.Empty(t, previews) // Test no response when trying to get OpenGraph metadata. - previews, err = UnfurlURLs(nil, []string{"https://wikipedia.o"}) + previews, err = UnfurlURLs(nil, httpClient, []string{"https://wikipedia.o"}) require.NoError(t, err) require.Empty(t, previews) } diff --git a/protocol/messenger.go b/protocol/messenger.go index 00a5ab9061b..f35b48725d6 100644 --- a/protocol/messenger.go +++ b/protocol/messenger.go @@ -5937,7 +5937,7 @@ func generateAliasAndIdenticon(pk string) (string, string, error) { } func (m *Messenger) UnfurlURLs(urls []string) ([]common.LinkPreview, error) { - return linkpreview.UnfurlURLs(m.logger, urls) + return linkpreview.UnfurlURLs(m.logger, linkpreview.NewDefaultHTTPClient(), urls) } func (m *Messenger) SendEmojiReaction(ctx context.Context, chatID, messageID string, emojiID protobuf.EmojiReaction_Type) (*MessengerResponse, error) { From ef74f7e2961235442151fabe19c3e1a6369dcc22 Mon Sep 17 00:00:00 2001 From: Icaro Motta Date: Thu, 25 May 2023 11:04:57 -0300 Subject: [PATCH 03/12] Make both types of unfurlers use an url field --- protocol/linkpreview/linkpreview.go | 32 +++++++++++++++-------------- 1 file changed, 17 insertions(+), 15 deletions(-) diff --git a/protocol/linkpreview/linkpreview.go b/protocol/linkpreview/linkpreview.go index ec79d2177dd..99d1cde8545 100644 --- a/protocol/linkpreview/linkpreview.go +++ b/protocol/linkpreview/linkpreview.go @@ -43,7 +43,7 @@ type LinkPreview struct { } type Unfurler interface { - unfurl(*neturl.URL) (common.LinkPreview, error) + unfurl() (common.LinkPreview, error) } const ( @@ -129,7 +129,7 @@ type OEmbedUnfurler struct { // https://www.youtube.com/oembed. oembedEndpoint string // url is the actual URL to be unfurled. - url neturl.URL + url *neturl.URL } type OEmbedResponse struct { @@ -137,8 +137,8 @@ type OEmbedResponse struct { ThumbnailURL string `json:"thumbnail_url"` } -func (u OEmbedUnfurler) unfurl(url *neturl.URL) (common.LinkPreview, error) { - preview := newDefaultLinkPreview(url) +func (u OEmbedUnfurler) unfurl() (common.LinkPreview, error) { + preview := newDefaultLinkPreview(u.url) requestURL, err := neturl.Parse(u.oembedEndpoint) if err != nil { @@ -199,7 +199,7 @@ func (u OEmbedUnfurler) unfurl(url *neturl.URL) (common.LinkPreview, error) { if oembedResponse.Title == "" { return preview, UnfurlError{ msg: "missing title", - url: url.String(), + url: u.url.String(), err: errors.New(""), } } @@ -218,16 +218,17 @@ type OpenGraphMetadata struct { // gives back a JSON response with a "html" field that's supposed to be embedded // in an iframe (hardly useful for existing Status' clients). type OpenGraphUnfurler struct { + url *neturl.URL logger *zap.Logger httpClient http.Client } -func (u OpenGraphUnfurler) unfurl(url *neturl.URL) (common.LinkPreview, error) { - preview := newDefaultLinkPreview(url) +func (u OpenGraphUnfurler) unfurl() (common.LinkPreview, error) { + preview := newDefaultLinkPreview(u.url) ctx, cancel := context.WithTimeout(context.Background(), defaultRequestTimeout) defer cancel() - req, err := http.NewRequestWithContext(ctx, "GET", url.String(), nil) + req, err := http.NewRequestWithContext(ctx, "GET", u.url.String(), nil) if err != nil { return preview, err } @@ -246,7 +247,7 @@ func (u OpenGraphUnfurler) unfurl(url *neturl.URL) (common.LinkPreview, error) { if err != nil { return preview, UnfurlError{ msg: "failed to get HTML page", - url: url.String(), + url: u.url.String(), err: err, } } @@ -254,7 +255,7 @@ func (u OpenGraphUnfurler) unfurl(url *neturl.URL) (common.LinkPreview, error) { if res.StatusCode >= http.StatusBadRequest { return preview, UnfurlError{ msg: fmt.Sprintf("failed to fetch OpenGraph metadata, statusCode='%d'", res.StatusCode), - url: url.String(), + url: u.url.String(), err: nil, } } @@ -264,7 +265,7 @@ func (u OpenGraphUnfurler) unfurl(url *neturl.URL) (common.LinkPreview, error) { if err != nil { return preview, UnfurlError{ msg: "failed to parse OpenGraph data", - url: url.String(), + url: u.url.String(), err: err, } } @@ -275,7 +276,7 @@ func (u OpenGraphUnfurler) unfurl(url *neturl.URL) (common.LinkPreview, error) { if ogMetadata.Title == "" { return preview, UnfurlError{ msg: "missing title", - url: url.String(), + url: u.url.String(), err: errors.New(""), } } @@ -285,7 +286,7 @@ func (u OpenGraphUnfurler) unfurl(url *neturl.URL) (common.LinkPreview, error) { if err != nil { // Given we want to fetch thumbnails on a best-effort basis, if an error // happens we simply log it. - u.logger.Info("failed to fetch thumbnail", zap.String("url", url.String()), zap.Error(err)) + u.logger.Info("failed to fetch thumbnail", zap.String("url", u.url.String()), zap.Error(err)) } else { preview.Thumbnail = t } @@ -307,12 +308,13 @@ func newUnfurler(logger *zap.Logger, httpClient http.Client, url *neturl.URL) Un case "reddit.com": return OEmbedUnfurler{ oembedEndpoint: "https://www.reddit.com/oembed", - url: *url, + url: url, logger: logger, httpClient: httpClient, } default: return OpenGraphUnfurler{ + url: url, logger: logger, httpClient: httpClient, } @@ -328,7 +330,7 @@ func unfurl(logger *zap.Logger, httpClient http.Client, url string) (common.Link } unfurler := newUnfurler(logger, httpClient, parsedURL) - preview, err = unfurler.unfurl(parsedURL) + preview, err = unfurler.unfurl() if err != nil { return preview, err } From c6e56c58c21e0d611ba29591a4b1db101cf83746 Mon Sep 17 00:00:00 2001 From: Icaro Motta Date: Thu, 25 May 2023 11:16:10 -0300 Subject: [PATCH 04/12] Remove duplication in test --- protocol/linkpreview/linkpreview_test.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/protocol/linkpreview/linkpreview_test.go b/protocol/linkpreview/linkpreview_test.go index 67ee50ac7c1..e8a51031742 100644 --- a/protocol/linkpreview/linkpreview_test.go +++ b/protocol/linkpreview/linkpreview_test.go @@ -188,12 +188,12 @@ func Test_UnfurlURLs_YouTube(t *testing.T) { []byte(fmt.Sprintf(` - - + + - `, thumbnailURL)), + `, expected.Title, expected.Description, thumbnailURL)), ) transport.AddURLMatcher(thumbnailURL, readAsset(t, "1.jpg")) stubbedClient := http.Client{Transport: &transport} From 6bab739ec8c1aff443c002a2106ea09aec302e17 Mon Sep 17 00:00:00 2001 From: Icaro Motta Date: Thu, 25 May 2023 11:29:47 -0300 Subject: [PATCH 05/12] Remove unnecessary error handling complexity --- protocol/linkpreview/linkpreview.go | 66 ++++------------------------- 1 file changed, 9 insertions(+), 57 deletions(-) diff --git a/protocol/linkpreview/linkpreview.go b/protocol/linkpreview/linkpreview.go index 99d1cde8545..88c26e32ff5 100644 --- a/protocol/linkpreview/linkpreview.go +++ b/protocol/linkpreview/linkpreview.go @@ -22,22 +22,6 @@ import ( "github.com/status-im/status-go/protocol/common" ) -// UnfurlError means a non-critical error, and that processing of the preview -// should be interrupted and the preview probably ignored. -type UnfurlError struct { - msg string - url string - err error -} - -func (ue UnfurlError) Error() string { - return fmt.Sprintf("%s, url='%s'", ue.msg, ue.url) -} - -func (ue UnfurlError) Unwrap() error { - return ue.err -} - type LinkPreview struct { common.LinkPreview } @@ -171,19 +155,11 @@ func (u OEmbedUnfurler) unfurl() (common.LinkPreview, error) { } }() if err != nil { - return preview, UnfurlError{ - msg: "failed to get oEmbed", - url: u.url.String(), - err: err, - } + return preview, fmt.Errorf("failed to fetch oEmbed metadata: %w", err) } if res.StatusCode >= http.StatusBadRequest { - return preview, UnfurlError{ - msg: fmt.Sprintf("failed to fetch oEmbed metadata, statusCode='%d'", res.StatusCode), - url: u.url.String(), - err: nil, - } + return preview, fmt.Errorf("failed to fetch oEmbed metadata, statusCode='%d'", res.StatusCode) } var oembedResponse OEmbedResponse @@ -197,11 +173,7 @@ func (u OEmbedUnfurler) unfurl() (common.LinkPreview, error) { } if oembedResponse.Title == "" { - return preview, UnfurlError{ - msg: "missing title", - url: u.url.String(), - err: errors.New(""), - } + return preview, fmt.Errorf("missing required title in oEmbed response") } preview.Title = oembedResponse.Title @@ -245,40 +217,24 @@ func (u OpenGraphUnfurler) unfurl() (common.LinkPreview, error) { } }() if err != nil { - return preview, UnfurlError{ - msg: "failed to get HTML page", - url: u.url.String(), - err: err, - } + return preview, fmt.Errorf("failed to get HTML page: %w", err) } if res.StatusCode >= http.StatusBadRequest { - return preview, UnfurlError{ - msg: fmt.Sprintf("failed to fetch OpenGraph metadata, statusCode='%d'", res.StatusCode), - url: u.url.String(), - err: nil, - } + return preview, fmt.Errorf("failed to fetch OpenGraph metadata, statusCode='%d'", res.StatusCode) } var ogMetadata OpenGraphMetadata err = metabolize.Metabolize(res.Body, &ogMetadata) if err != nil { - return preview, UnfurlError{ - msg: "failed to parse OpenGraph data", - url: u.url.String(), - err: err, - } + return preview, fmt.Errorf("failed to parse OpenGraph data") } // There are URLs like https://wikipedia.org/ that don't have an OpenGraph // title tag, but article pages do. In the future, we can fallback to the // website's title by using the tag. if ogMetadata.Title == "" { - return preview, UnfurlError{ - msg: "missing title", - url: u.url.String(), - err: errors.New(""), - } + return preview, fmt.Errorf("missing required title in OpenGraph response") } if ogMetadata.ThumbnailURL != "" { @@ -417,12 +373,8 @@ func UnfurlURLs(logger *zap.Logger, httpClient http.Client, urls []string) ([]co for _, url := range urls { p, err := unfurl(logger, httpClient, url) if err != nil { - if unfurlErr, ok := err.(UnfurlError); ok { - logger.Info("failed to unfurl", zap.Error(unfurlErr)) - continue - } - - return nil, err + logger.Info("failed to unfurl", zap.String("url", url), zap.Error(err)) + continue } previews = append(previews, p) } From edb8e0d39542e037c4ac9a58bcfc8508150ff60e Mon Sep 17 00:00:00 2001 From: Icaro Motta <icaro.ldm@gmail.com> Date: Thu, 25 May 2023 13:38:35 -0300 Subject: [PATCH 06/12] Remove code duplication --- protocol/linkpreview/linkpreview.go | 106 ++++++++++++---------------- 1 file changed, 47 insertions(+), 59 deletions(-) diff --git a/protocol/linkpreview/linkpreview.go b/protocol/linkpreview/linkpreview.go index 88c26e32ff5..e482854216b 100644 --- a/protocol/linkpreview/linkpreview.go +++ b/protocol/linkpreview/linkpreview.go @@ -1,6 +1,7 @@ package linkpreview import ( + "bytes" "context" "encoding/json" "errors" @@ -30,6 +31,8 @@ type Unfurler interface { unfurl() (common.LinkPreview, error) } +type Headers map[string]string + const ( defaultRequestTimeout = 15000 * time.Millisecond @@ -43,32 +46,40 @@ const ( defaultAcceptLanguage = "en-US,en;q=0.5" ) -func fetchResponseBody(logger *zap.Logger, httpClient http.Client, url string) ([]byte, error) { +func fetchBody(logger *zap.Logger, httpClient http.Client, url string, headers Headers) ([]byte, error) { ctx, cancel := context.WithTimeout(context.Background(), defaultRequestTimeout) defer cancel() req, err := http.NewRequestWithContext(ctx, "GET", url, nil) if err != nil { - return nil, err + return nil, fmt.Errorf("failed to perform HTTP request: %w", err) } - res, err := httpClient.Do(req) - if err != nil { - return nil, err + if headers != nil { + for k, v := range headers { + req.Header.Set(k, v) + } } + + res, err := httpClient.Do(req) defer func() { - if err = res.Body.Close(); err != nil { - logger.Error("Failed to close response body", zap.Error(err)) + if res != nil { + if err = res.Body.Close(); err != nil { + logger.Error("failed to close response body", zap.Error(err)) + } } }() + if err != nil { + return nil, err + } if res.StatusCode >= http.StatusBadRequest { - return nil, errors.New(http.StatusText(res.StatusCode)) + return nil, fmt.Errorf("http request failed, statusCode='%d'", res.StatusCode) } bodyBytes, err := ioutil.ReadAll(res.Body) if err != nil { - return nil, err + return nil, fmt.Errorf("failed to read body bytes: %w", err) } return bodyBytes, nil @@ -84,7 +95,7 @@ func newDefaultLinkPreview(url *neturl.URL) common.LinkPreview { func fetchThumbnail(logger *zap.Logger, httpClient http.Client, url string) (common.LinkPreviewThumbnail, error) { var thumbnail common.LinkPreviewThumbnail - imgBytes, err := fetchResponseBody(logger, httpClient, url) + imgBytes, err := fetchBody(logger, httpClient, url, nil) if err != nil { return thumbnail, fmt.Errorf("could not fetch thumbnail: %w", err) } @@ -121,49 +132,41 @@ type OEmbedResponse struct { ThumbnailURL string `json:"thumbnail_url"` } -func (u OEmbedUnfurler) unfurl() (common.LinkPreview, error) { - preview := newDefaultLinkPreview(u.url) - - requestURL, err := neturl.Parse(u.oembedEndpoint) +func (u OEmbedUnfurler) newOEmbedURL() (*neturl.URL, error) { + oembedURL, err := neturl.Parse(u.oembedEndpoint) if err != nil { - return preview, err + return nil, err } // When format is specified, the provider MUST return data in the requested // format, else return an error. - requestURL.RawQuery = neturl.Values{ + oembedURL.RawQuery = neturl.Values{ "url": {u.url.String()}, "format": {"json"}, }.Encode() - ctx, cancel := context.WithTimeout(context.Background(), defaultRequestTimeout) - defer cancel() - req, err := http.NewRequestWithContext(ctx, "GET", requestURL.String(), nil) + return oembedURL, nil +} + +func (u OEmbedUnfurler) unfurl() (common.LinkPreview, error) { + preview := newDefaultLinkPreview(u.url) + + oembedURL, err := u.newOEmbedURL() if err != nil { return preview, err } - req.Header.Set("accept", "application/json; charset=utf-8") - req.Header.Set("accept-language", defaultAcceptLanguage) - req.Header.Set("user-agent", defaultUserAgent) - res, err := u.httpClient.Do(req) - defer func() { - if res != nil { - if err = res.Body.Close(); err != nil { - u.logger.Error("failed to close response body", zap.Error(err)) - } - } - }() - if err != nil { - return preview, fmt.Errorf("failed to fetch oEmbed metadata: %w", err) + headers := map[string]string{ + "accept": "application/json; charset=utf-8", + "accept-language": defaultAcceptLanguage, + "user-agent": defaultUserAgent, } - - if res.StatusCode >= http.StatusBadRequest { - return preview, fmt.Errorf("failed to fetch oEmbed metadata, statusCode='%d'", res.StatusCode) + oembedBytes, err := fetchBody(u.logger, u.httpClient, oembedURL.String(), headers) + if err != nil { + return preview, err } var oembedResponse OEmbedResponse - oembedBytes, err := ioutil.ReadAll(res.Body) if err != nil { return preview, err } @@ -198,34 +201,18 @@ type OpenGraphUnfurler struct { func (u OpenGraphUnfurler) unfurl() (common.LinkPreview, error) { preview := newDefaultLinkPreview(u.url) - ctx, cancel := context.WithTimeout(context.Background(), defaultRequestTimeout) - defer cancel() - req, err := http.NewRequestWithContext(ctx, "GET", u.url.String(), nil) - if err != nil { - return preview, err + headers := map[string]string{ + "accept": "text/html; charset=utf-8", + "accept-language": defaultAcceptLanguage, + "user-agent": defaultUserAgent, } - req.Header.Set("accept", "text/html; charset=utf-8") - req.Header.Set("accept-language", defaultAcceptLanguage) - req.Header.Set("user-agent", defaultUserAgent) - - res, err := u.httpClient.Do(req) - defer func() { - if res != nil { - if err = res.Body.Close(); err != nil { - u.logger.Error("failed to close response body", zap.Error(err)) - } - } - }() + bodyBytes, err := fetchBody(u.logger, u.httpClient, u.url.String(), headers) if err != nil { - return preview, fmt.Errorf("failed to get HTML page: %w", err) - } - - if res.StatusCode >= http.StatusBadRequest { - return preview, fmt.Errorf("failed to fetch OpenGraph metadata, statusCode='%d'", res.StatusCode) + return preview, err } var ogMetadata OpenGraphMetadata - err = metabolize.Metabolize(res.Body, &ogMetadata) + err = metabolize.Metabolize(ioutil.NopCloser(bytes.NewBuffer(bodyBytes)), &ogMetadata) if err != nil { return preview, fmt.Errorf("failed to parse OpenGraph data") } @@ -371,6 +358,7 @@ func UnfurlURLs(logger *zap.Logger, httpClient http.Client, urls []string) ([]co previews := make([]common.LinkPreview, 0, len(urls)) for _, url := range urls { + logger.Debug("unfurling", zap.String("url", url)) p, err := unfurl(logger, httpClient, url) if err != nil { logger.Info("failed to unfurl", zap.String("url", url), zap.Error(err)) From 0c37f5584236858287032bdf28510e12a271bc76 Mon Sep 17 00:00:00 2001 From: Icaro Motta <icaro.ldm@gmail.com> Date: Thu, 25 May 2023 15:48:56 -0300 Subject: [PATCH 07/12] Fix linter --- protocol/linkpreview/linkpreview.go | 6 ++---- protocol/linkpreview/linkpreview_test.go | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/protocol/linkpreview/linkpreview.go b/protocol/linkpreview/linkpreview.go index e482854216b..ea5b69f49da 100644 --- a/protocol/linkpreview/linkpreview.go +++ b/protocol/linkpreview/linkpreview.go @@ -55,10 +55,8 @@ func fetchBody(logger *zap.Logger, httpClient http.Client, url string, headers H return nil, fmt.Errorf("failed to perform HTTP request: %w", err) } - if headers != nil { - for k, v := range headers { - req.Header.Set(k, v) - } + for k, v := range headers { + req.Header.Set(k, v) } res, err := httpClient.Do(req) diff --git a/protocol/linkpreview/linkpreview_test.go b/protocol/linkpreview/linkpreview_test.go index e8a51031742..723e13f2773 100644 --- a/protocol/linkpreview/linkpreview_test.go +++ b/protocol/linkpreview/linkpreview_test.go @@ -58,7 +58,7 @@ func (t *StubTransport) RoundTrip(req *http.Request) (*http.Response, error) { // Add a matcher based on a URL regexp. If a given request URL matches the // regexp, then responseBody will be returned with a hardcoded 200 status code. -func (st *StubTransport) AddURLMatcher(urlRegexp string, responseBody []byte) { +func (t *StubTransport) AddURLMatcher(urlRegexp string, responseBody []byte) { matcher := func(req *http.Request) *http.Response { rx, err := regexp.Compile(regexp.QuoteMeta(urlRegexp)) if err != nil { @@ -73,7 +73,7 @@ func (st *StubTransport) AddURLMatcher(urlRegexp string, responseBody []byte) { return nil } - st.matchers = append(st.matchers, matcher) + t.matchers = append(t.matchers, matcher) } // assertContainsLongString verifies if actual contains a slice of expected and From 788d17c91e47ff767832b67ce85763e40ba51b62 Mon Sep 17 00:00:00 2001 From: Icaro Motta <icaro.ldm@gmail.com> Date: Fri, 26 May 2023 09:11:09 -0300 Subject: [PATCH 08/12] Check for error first before deferring --- protocol/linkpreview/linkpreview.go | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/protocol/linkpreview/linkpreview.go b/protocol/linkpreview/linkpreview.go index ea5b69f49da..ba1b251472b 100644 --- a/protocol/linkpreview/linkpreview.go +++ b/protocol/linkpreview/linkpreview.go @@ -60,16 +60,14 @@ func fetchBody(logger *zap.Logger, httpClient http.Client, url string, headers H } res, err := httpClient.Do(req) - defer func() { - if res != nil { - if err = res.Body.Close(); err != nil { - logger.Error("failed to close response body", zap.Error(err)) - } - } - }() if err != nil { return nil, err } + defer func() { + if err := res.Body.Close(); err != nil { + logger.Error("failed to close response body", zap.Error(err)) + } + }() if res.StatusCode >= http.StatusBadRequest { return nil, fmt.Errorf("http request failed, statusCode='%d'", res.StatusCode) From a9348b9df12d0aa1fc4c91b00eed8645f165b678 Mon Sep 17 00:00:00 2001 From: Icaro Motta <icaro.ldm@gmail.com> Date: Fri, 26 May 2023 09:13:38 -0300 Subject: [PATCH 09/12] Extract magic strings to consts --- protocol/linkpreview/linkpreview.go | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/protocol/linkpreview/linkpreview.go b/protocol/linkpreview/linkpreview.go index ba1b251472b..60e1559562a 100644 --- a/protocol/linkpreview/linkpreview.go +++ b/protocol/linkpreview/linkpreview.go @@ -36,14 +36,17 @@ type Headers map[string]string const ( defaultRequestTimeout = 15000 * time.Millisecond + headerAcceptJSON = "application/json; charset=utf-8" + headerAcceptText = "text/html; charset=utf-8" + // Without an user agent, many providers treat status-go as a gluttony bot, // and either respond more frequently with a 429 (Too Many Requests), or // simply refuse to return valid data. - defaultUserAgent = "status-go/v0.151.15" + headerUserAgent = "status-go/v0.151.15" // Currently set to English, but we could make this setting dynamic according // to the user's language of choice. - defaultAcceptLanguage = "en-US,en;q=0.5" + headerAcceptLanguage = "en-US,en;q=0.5" ) func fetchBody(logger *zap.Logger, httpClient http.Client, url string, headers Headers) ([]byte, error) { @@ -153,9 +156,9 @@ func (u OEmbedUnfurler) unfurl() (common.LinkPreview, error) { } headers := map[string]string{ - "accept": "application/json; charset=utf-8", - "accept-language": defaultAcceptLanguage, - "user-agent": defaultUserAgent, + "accept": headerAcceptJSON, + "accept-language": headerAcceptLanguage, + "user-agent": headerUserAgent, } oembedBytes, err := fetchBody(u.logger, u.httpClient, oembedURL.String(), headers) if err != nil { @@ -198,9 +201,9 @@ func (u OpenGraphUnfurler) unfurl() (common.LinkPreview, error) { preview := newDefaultLinkPreview(u.url) headers := map[string]string{ - "accept": "text/html; charset=utf-8", - "accept-language": defaultAcceptLanguage, - "user-agent": defaultUserAgent, + "accept": headerAcceptText, + "accept-language": headerAcceptLanguage, + "user-agent": headerUserAgent, } bodyBytes, err := fetchBody(u.logger, u.httpClient, u.url.String(), headers) if err != nil { From 29c33764df9508e9a39bd319871864577a65c23a Mon Sep 17 00:00:00 2001 From: Icaro Motta <icaro.ldm@gmail.com> Date: Fri, 26 May 2023 09:17:03 -0300 Subject: [PATCH 10/12] Update comment --- protocol/linkpreview/linkpreview.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/protocol/linkpreview/linkpreview.go b/protocol/linkpreview/linkpreview.go index 60e1559562a..647b5c997d2 100644 --- a/protocol/linkpreview/linkpreview.go +++ b/protocol/linkpreview/linkpreview.go @@ -39,9 +39,11 @@ const ( headerAcceptJSON = "application/json; charset=utf-8" headerAcceptText = "text/html; charset=utf-8" - // Without an user agent, many providers treat status-go as a gluttony bot, - // and either respond more frequently with a 429 (Too Many Requests), or - // simply refuse to return valid data. + // Without a particular user agent, many providers treat status-go as a + // gluttony bot, and either respond more frequently with a 429 (Too Many + // Requests), or simply refuse to return valid data. Note that using a known + // browser UA doesn't work well with some providers, such as Spotify, + // apparently they still flag status-go as a bad actor. headerUserAgent = "status-go/v0.151.15" // Currently set to English, but we could make this setting dynamic according From 876170202f9b9fac38e0c1233712dcc436cc2db1 Mon Sep 17 00:00:00 2001 From: Icaro Motta <icaro.ldm@gmail.com> Date: Tue, 30 May 2023 21:14:18 -0300 Subject: [PATCH 11/12] Remove confusing test flag --- protocol/linkpreview/linkpreview_test.go | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/protocol/linkpreview/linkpreview_test.go b/protocol/linkpreview/linkpreview_test.go index 723e13f2773..8e646aed065 100644 --- a/protocol/linkpreview/linkpreview_test.go +++ b/protocol/linkpreview/linkpreview_test.go @@ -176,13 +176,7 @@ func Test_UnfurlURLs_YouTube(t *testing.T) { }, } - disabledStubs := false - if disabledStubs { - expected.Thumbnail.Width = 1280 - expected.Thumbnail.Height = 720 - expected.Thumbnail.DataURI = "data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD/2wCEAAUDBA8" - } - transport := StubTransport{disabledStubs: disabledStubs} + transport := StubTransport{} transport.AddURLMatcher( url, []byte(fmt.Sprintf(` From 6481196fd2a4c29f287d84d4354ee19f9e4e4afb Mon Sep 17 00:00:00 2001 From: Icaro Motta <icaro.ldm@gmail.com> Date: Fri, 2 Jun 2023 15:05:53 -0300 Subject: [PATCH 12/12] Bump version --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 21ecc9d99f3..0ad5d4cba3e 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -0.154.4 +0.154.5