From 64704bec7433e3d339e17d120b40a45fb2297f32 Mon Sep 17 00:00:00 2001 From: nasvidia Date: Sat, 18 May 2024 11:19:11 +0100 Subject: [PATCH 1/5] (maint) amazon use a helper function for requests --- TODO | 2 + amazon/amazon.go | 106 +++++++++++++++++------------------------------ 2 files changed, 40 insertions(+), 68 deletions(-) diff --git a/TODO b/TODO index 8f996ae..250b833 100644 --- a/TODO +++ b/TODO @@ -2,6 +2,8 @@ ## features +- parallelise amazon search tv/movie + ## bugs - allow amazon tv search for indivdual series diff --git a/amazon/amazon.go b/amazon/amazon.go index 153bb42..4d2006e 100644 --- a/amazon/amazon.go +++ b/amazon/amazon.go @@ -50,34 +50,13 @@ func ScrapeTitles(searchResults *types.SearchResults) (scrapedResults []types.Mo } func scrapeTitle(movie *types.MovieSearchResult, dateAdded time.Time, ch chan<- *types.MovieSearchResult) { - req, err := http.NewRequestWithContext(context.Background(), "GET", movie.URL, bytes.NewBuffer([]byte{})) movie.ReleaseDate = time.Time{} + rawData, err := makeRequest(movie.URL, "") if err != nil { - fmt.Println("Error creating request:", err) - ch <- movie - return - } - - req.Header.Set("User-Agent", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3") - - client := &http.Client{} - resp, err := client.Do(req) - if err != nil { - fmt.Println("Error sending request:", err) - ch <- movie - return - } - - defer resp.Body.Close() - - body, err := io.ReadAll(resp.Body) - if err != nil { - fmt.Println("Error reading response body:", err) + fmt.Println("scrapeTitle: Error making request:", err) ch <- movie return } - rawData := string(body) movie.ReleaseDate = findTitleDetails(rawData) if movie.ReleaseDate.After(dateAdded) { movie.NewRelease = true @@ -104,34 +83,50 @@ func findTitleDetails(response string) (releaseDate time.Time) { } func SearchAmazonMovie(plexMovie types.PlexMovie, filter string) (movieSearchResult types.SearchResults, err error) { + movieSearchResult.PlexMovie = plexMovie + movieSearchResult.SearchURL = amazonURL + urlEncodedTitle := url.QueryEscape(plexMovie.Title) amazonURL := amazonURL + urlEncodedTitle if filter != "" { amazonURL += filter } amazonURL += "&submit=Search&action=search" - req, err := http.NewRequestWithContext(context.Background(), "GET", amazonURL, bytes.NewBuffer([]byte{})) - movieSearchResult.PlexMovie = plexMovie - movieSearchResult.SearchURL = amazonURL + rawData, err := makeRequest(amazonURL, "") // fix the german filter here + if err != nil { + return movieSearchResult, err + } + + moviesFound, _ := findTitlesInResponse(rawData, true) + movieSearchResult.MovieSearchResults = moviesFound + movieSearchResult = utils.MarkBestMatch(&movieSearchResult) + return movieSearchResult, nil +} + +func makeRequest(inputURL, country string) (response string, err error) { + req, err := http.NewRequestWithContext(context.Background(), "GET", inputURL, bytes.NewBuffer([]byte{})) req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3") - country := "uk" - if strings.Contains(filter, "german") { - country = "de" + + switch country { + case "german": + req.Header.Set("Cookie", "country=de;") + default: + req.Header.Set("Cookie", "country=uk;") } - req.Header.Set("Cookie", fmt.Sprintf("country=%s;", country)) + if err != nil { fmt.Println("Error creating request:", err) - return movieSearchResult, err + return response, err } client := &http.Client{} resp, err := client.Do(req) if err != nil { fmt.Println("Error sending request:", err) - return movieSearchResult, err + return response, err } defer resp.Body.Close() @@ -139,55 +134,30 @@ func SearchAmazonMovie(plexMovie types.PlexMovie, filter string) (movieSearchRes body, err := io.ReadAll(resp.Body) if err != nil { fmt.Println("Error reading response body:", err) - return movieSearchResult, err + return response, err } - rawData := string(body) - - moviesFound, _ := findTitlesInResponse(rawData, true) - movieSearchResult.MovieSearchResults = moviesFound - movieSearchResult = utils.MarkBestMatch(&movieSearchResult) - return movieSearchResult, nil + rawResponse := string(body) + return rawResponse, nil } func SearchAmazonTV(plexTVShow *types.PlexTVShow, filter string) (tvSearchResult types.SearchResults, err error) { + tvSearchResult.PlexTVShow = *plexTVShow + tvSearchResult.SearchURL = amazonURL + urlEncodedTitle := url.QueryEscape(fmt.Sprintf("%s complete series", plexTVShow.Title)) // complete series amazonURL := amazonURL + urlEncodedTitle if filter != "" { amazonURL += filter } amazonURL += "&submit=Search&action=search" - req, err := http.NewRequestWithContext(context.Background(), "GET", amazonURL, bytes.NewBuffer([]byte{})) - - tvSearchResult.PlexTVShow = *plexTVShow - tvSearchResult.SearchURL = amazonURL - - req.Header.Set("User-Agent", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3") - country := "uk" - if strings.Contains(filter, "german") { - country = "de" - } - req.Header.Set("Cookie", fmt.Sprintf("country=%s;", country)) - if err != nil { - fmt.Println("Error creating request:", err) - return tvSearchResult, err - } - - client := &http.Client{} - resp, err := client.Do(req) + // + //fix the filter for german here + // + // + rawData, err := makeRequest(amazonURL, "") if err != nil { - fmt.Println("Error sending request:", err) - return tvSearchResult, err - } - - defer resp.Body.Close() - - body, err := io.ReadAll(resp.Body) - if err != nil { - fmt.Println("Error reading response body:", err) return tvSearchResult, err } - rawData := string(body) _, titlesFound := findTitlesInResponse(rawData, false) tvSearchResult.TVSearchResults = titlesFound From f4e38d6f3e7c4415ae0acc0eb61bc58c36c1d220 Mon Sep 17 00:00:00 2001 From: nasvidia Date: Sat, 18 May 2024 12:09:57 +0100 Subject: [PATCH 2/5] (feat) parallelise amazon search --- amazon/amazon.go | 82 +++++++++++++++++++++++++++++++------------ amazon/amazon_test.go | 6 ++-- cmd/amazon.go | 14 +++----- web/movies/movies.go | 67 +++++++++++------------------------ web/tv/tv.go | 2 +- 5 files changed, 89 insertions(+), 82 deletions(-) diff --git a/amazon/amazon.go b/amazon/amazon.go index 4d2006e..85caede 100644 --- a/amazon/amazon.go +++ b/amazon/amazon.go @@ -16,9 +16,38 @@ import ( ) const ( - amazonURL = "https://www.blu-ray.com/movies/search.php?keyword=" + amazonURL = "https://www.blu-ray.com/movies/search.php?keyword=" + LanguageGerman = "german" ) +var numberMoviesProcessed int = 0 + +func SearchAmazonMoviesInParallel(plexMovies []types.PlexMovie, language string) (searchResults []types.SearchResults) { + ch := make(chan types.SearchResults, len(plexMovies)) + semaphore := make(chan struct{}, types.ConcurrencyLimit) + + for i := range plexMovies { + go func(i int) { + semaphore <- struct{}{} + defer func() { <-semaphore }() + searchAmazonMovie(plexMovies[i], language, ch) + }(i) + } + + searchResults = make([]types.SearchResults, 0, len(plexMovies)) + for range plexMovies { + result := <-ch + searchResults = append(searchResults, result) + numberMoviesProcessed++ + } + numberMoviesProcessed = 0 // job is done + return searchResults +} + +func GetMovieJobProgress() int { + return numberMoviesProcessed +} + func ScrapeTitles(searchResults *types.SearchResults) (scrapedResults []types.MovieSearchResult) { var results, lookups []types.MovieSearchResult for _, searchResult := range searchResults.MovieSearchResults { @@ -82,36 +111,44 @@ func findTitleDetails(response string) (releaseDate time.Time) { return releaseDate } -func SearchAmazonMovie(plexMovie types.PlexMovie, filter string) (movieSearchResult types.SearchResults, err error) { - movieSearchResult.PlexMovie = plexMovie - movieSearchResult.SearchURL = amazonURL +func searchAmazonMovie(plexMovie types.PlexMovie, language string, movieSearchResult chan<- types.SearchResults) { + result := types.SearchResults{} + result.PlexMovie = plexMovie + result.SearchURL = "" urlEncodedTitle := url.QueryEscape(plexMovie.Title) amazonURL := amazonURL + urlEncodedTitle - if filter != "" { - amazonURL += filter + // this searches for the movie in a language + switch language { + case LanguageGerman: + amazonURL += "&audio=" + language + default: + // do nothing } amazonURL += "&submit=Search&action=search" - rawData, err := makeRequest(amazonURL, "") // fix the german filter here + rawData, err := makeRequest(amazonURL, language) if err != nil { - return movieSearchResult, err + fmt.Println("searchAmazonMovie: Error making request:", err) + movieSearchResult <- result + return } moviesFound, _ := findTitlesInResponse(rawData, true) - movieSearchResult.MovieSearchResults = moviesFound - movieSearchResult = utils.MarkBestMatch(&movieSearchResult) - return movieSearchResult, nil + result.MovieSearchResults = moviesFound + result = utils.MarkBestMatch(&result) + movieSearchResult <- result } -func makeRequest(inputURL, country string) (response string, err error) { +func makeRequest(inputURL, language string) (response string, err error) { req, err := http.NewRequestWithContext(context.Background(), "GET", inputURL, bytes.NewBuffer([]byte{})) req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3") - switch country { - case "german": + // this forces results from a specific amazon region + switch language { + case LanguageGerman: req.Header.Set("Cookie", "country=de;") default: req.Header.Set("Cookie", "country=uk;") @@ -140,21 +177,20 @@ func makeRequest(inputURL, country string) (response string, err error) { return rawResponse, nil } -func SearchAmazonTV(plexTVShow *types.PlexTVShow, filter string) (tvSearchResult types.SearchResults, err error) { +func SearchAmazonTV(plexTVShow *types.PlexTVShow, language string) (tvSearchResult types.SearchResults, err error) { tvSearchResult.PlexTVShow = *plexTVShow tvSearchResult.SearchURL = amazonURL urlEncodedTitle := url.QueryEscape(fmt.Sprintf("%s complete series", plexTVShow.Title)) // complete series amazonURL := amazonURL + urlEncodedTitle - if filter != "" { - amazonURL += filter + // this searches for the movie in a language + switch language { + case LanguageGerman: + amazonURL += "&audio=" + language + default: + // do nothing } - amazonURL += "&submit=Search&action=search" - // - //fix the filter for german here - // - // - rawData, err := makeRequest(amazonURL, "") + rawData, err := makeRequest(amazonURL, language) if err != nil { return tvSearchResult, err } diff --git a/amazon/amazon_test.go b/amazon/amazon_test.go index dc88c68..24c3c9d 100644 --- a/amazon/amazon_test.go +++ b/amazon/amazon_test.go @@ -40,9 +40,9 @@ func TestFindMoviesInResponse(t *testing.T) { } func TestSearchAmazon(t *testing.T) { - result, err := SearchAmazonMovie(types.PlexMovie{Title: "napoleon dynamite", Year: "2004"}, "") - if err != nil { - t.Errorf("Error searching Amazon: %s", err) + result := SearchAmazonMoviesInParallel([]types.PlexMovie{{Title: "napoleon dynamite", Year: "2004"}}, "") + if len(result) == 0 { + t.Errorf("Expected search results, but got none") } fmt.Println(result) } diff --git a/cmd/amazon.go b/cmd/amazon.go index f4a5663..b3a11f2 100644 --- a/cmd/amazon.go +++ b/cmd/amazon.go @@ -24,16 +24,12 @@ func performAmazonLookup() { if libraryType == types.PlexMovieType { plexMovies := initializePlexMovies() // lets search movies in amazon - for _, movie := range plexMovies { - movieResult, err := amazon.SearchAmazonMovie(movie, "") - if err != nil { - fmt.Printf("Error searching for movie %s: %s\n", movieResult.PlexMovie.Title, err) - continue - } - // if hit, and contains any format that isnt dvd, print the movie - for _, individualResult := range movieResult.MovieSearchResults { + searchResults := amazon.SearchAmazonMoviesInParallel(plexMovies, "") + for i := range searchResults { + for _, individualResult := range searchResults[i].MovieSearchResults { if individualResult.BestMatch && (individualResult.Format == types.DiskBluray || individualResult.Format == types.Disk4K) { - fmt.Printf("%s %v: %s\n", movieResult.PlexMovie.Title, movieResult.PlexMovie.Year, individualResult.URL) + fmt.Printf("%s - %s (%s): %s\n", searchResults[i].PlexMovie.Title, individualResult.Format, + searchResults[i].PlexMovie.Year, individualResult.URL) } } } diff --git a/web/movies/movies.go b/web/movies/movies.go index 06c33d2..fa0bf6d 100644 --- a/web/movies/movies.go +++ b/web/movies/movies.go @@ -51,9 +51,8 @@ func (c MoviesConfig) ProcessHTML(w http.ResponseWriter, r *http.Request) { } filters = newfilters //nolint: gocritic - // plexMovies = plexMovies[:10] + plexMovies = plexMovies[:10] //lint: gocritic - var searchResult types.SearchResults jobRunning = true numberOfMoviesProcessed = 0 totalMovies = len(plexMovies) - 1 @@ -67,24 +66,13 @@ func (c MoviesConfig) ProcessHTML(w http.ResponseWriter, r *http.Request) { if lookup == "cinemaParadiso" { searchResults = cinemaparadiso.GetCinemaParadisoMoviesInParallel(plexMovies) } else { - for i, movie := range plexMovies { - fmt.Print(".") - - if filters.AudioLanguage == "german" { - searchResult, _ = amazon.SearchAmazonMovie(movie, "&audio=german") - } else { - searchResult, _ = amazon.SearchAmazonMovie(movie, "") - } - // if we are filtering by newer version, we need to search again - if filters.NewerVersion { - scrapedResults := amazon.ScrapeTitles(&searchResult) - searchResult.MovieSearchResults = scrapedResults - } - - searchResults = append(searchResults, searchResult) - numberOfMoviesProcessed = i - } + searchResults = amazon.SearchAmazonMoviesInParallel(plexMovies, filters.AudioLanguage) + // if we are filtering by newer version, we need to search again + // if filters.NewerVersion { + // searchResults = amazon.ScrapeTitles(&searchResults) + // } } + jobRunning = false fmt.Printf("\nProcessed %d movies in %v\n", totalMovies, time.Since(startTime)) }() @@ -94,36 +82,23 @@ func ProgressBarHTML(w http.ResponseWriter, _ *http.Request) { if lookup == "cinemaParadiso" { // check job status numberOfMoviesProcessed = cinemaparadiso.GetMovieJobProgress() - if jobRunning { - fmt.Fprintf(w, `
-
`, numberOfMoviesProcessed, totalMovies) - } else { - // display a table - fmt.Fprintf(w, - `%s
- `, - renderTable(searchResults)) - // reset variables - numberOfMoviesProcessed = 0 - totalMovies = 0 - searchResults = []types.SearchResults{} - } } else { - if jobRunning { - fmt.Fprintf(w, `
+ // check job status + numberOfMoviesProcessed = amazon.GetMovieJobProgress() + } + if jobRunning { + fmt.Fprintf(w, `
`, numberOfMoviesProcessed, totalMovies) - } - if totalMovies == numberOfMoviesProcessed && totalMovies != 0 { - // display a table - fmt.Fprintf(w, - `%s
+ } else { + // display a table + fmt.Fprintf(w, + `%s
`, - renderTable(searchResults)) - // reset variables - numberOfMoviesProcessed = 0 - totalMovies = 0 - searchResults = []types.SearchResults{} - } + renderTable(searchResults)) + // reset variables + numberOfMoviesProcessed = 0 + totalMovies = 0 + searchResults = []types.SearchResults{} } } diff --git a/web/tv/tv.go b/web/tv/tv.go index 4e21ee5..4ab8fbd 100644 --- a/web/tv/tv.go +++ b/web/tv/tv.go @@ -51,7 +51,7 @@ func (c TVConfig) ProcessHTML(w http.ResponseWriter, r *http.Request) { } filters = newFilters //nolint: gocritic - // plexTV = plexTV[:10] + plexTV = plexTV[:10] //lint: gocritic var searchResult types.SearchResults From 4d41ef15e2b6a66fbf2884c21484112fce8ff550 Mon Sep 17 00:00:00 2001 From: nasvidia Date: Sat, 18 May 2024 12:59:53 +0100 Subject: [PATCH 3/5] (feat) parallelise amazon tv search --- amazon/amazon.go | 124 +++++++++++++++++++++++++++--------------- amazon/amazon_test.go | 11 ++-- web/tv/tv.go | 49 +++++------------ 3 files changed, 97 insertions(+), 87 deletions(-) diff --git a/amazon/amazon.go b/amazon/amazon.go index 85caede..83f9cf5 100644 --- a/amazon/amazon.go +++ b/amazon/amazon.go @@ -20,7 +20,10 @@ const ( LanguageGerman = "german" ) -var numberMoviesProcessed int = 0 +var ( + numberMoviesProcessed int = 0 + numberTVProcessed int = 0 +) func SearchAmazonMoviesInParallel(plexMovies []types.PlexMovie, language string) (searchResults []types.SearchResults) { ch := make(chan types.SearchResults, len(plexMovies)) @@ -44,10 +47,36 @@ func SearchAmazonMoviesInParallel(plexMovies []types.PlexMovie, language string) return searchResults } +func SearchAmazonTVInParallel(plexTVShows []types.PlexTVShow, language string) (searchResults []types.SearchResults) { + ch := make(chan types.SearchResults, len(plexTVShows)) + semaphore := make(chan struct{}, types.ConcurrencyLimit) + + for i := range plexTVShows { + go func(i int) { + semaphore <- struct{}{} + defer func() { <-semaphore }() + searchAmazonTV(&plexTVShows[i], language, ch) + }(i) + } + + searchResults = make([]types.SearchResults, 0, len(plexTVShows)) + for range plexTVShows { + result := <-ch + searchResults = append(searchResults, result) + numberTVProcessed++ + } + numberTVProcessed = 0 // job is done + return searchResults +} + func GetMovieJobProgress() int { return numberMoviesProcessed } +func GetTVJobProgress() int { + return numberTVProcessed +} + func ScrapeTitles(searchResults *types.SearchResults) (scrapedResults []types.MovieSearchResult) { var results, lookups []types.MovieSearchResult for _, searchResult := range searchResults.MovieSearchResults { @@ -140,46 +169,10 @@ func searchAmazonMovie(plexMovie types.PlexMovie, language string, movieSearchRe movieSearchResult <- result } -func makeRequest(inputURL, language string) (response string, err error) { - req, err := http.NewRequestWithContext(context.Background(), "GET", inputURL, bytes.NewBuffer([]byte{})) - - req.Header.Set("User-Agent", - "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3") - - // this forces results from a specific amazon region - switch language { - case LanguageGerman: - req.Header.Set("Cookie", "country=de;") - default: - req.Header.Set("Cookie", "country=uk;") - } - - if err != nil { - fmt.Println("Error creating request:", err) - return response, err - } - - client := &http.Client{} - resp, err := client.Do(req) - if err != nil { - fmt.Println("Error sending request:", err) - return response, err - } - - defer resp.Body.Close() - - body, err := io.ReadAll(resp.Body) - if err != nil { - fmt.Println("Error reading response body:", err) - return response, err - } - rawResponse := string(body) - return rawResponse, nil -} - -func SearchAmazonTV(plexTVShow *types.PlexTVShow, language string) (tvSearchResult types.SearchResults, err error) { - tvSearchResult.PlexTVShow = *plexTVShow - tvSearchResult.SearchURL = amazonURL +func searchAmazonTV(plexTVShow *types.PlexTVShow, language string, tvSearchResult chan<- types.SearchResults) { + result := types.SearchResults{} + result.PlexTVShow = *plexTVShow + result.SearchURL = amazonURL urlEncodedTitle := url.QueryEscape(fmt.Sprintf("%s complete series", plexTVShow.Title)) // complete series amazonURL := amazonURL + urlEncodedTitle @@ -190,15 +183,19 @@ func SearchAmazonTV(plexTVShow *types.PlexTVShow, language string) (tvSearchResu default: // do nothing } + amazonURL += "&submit=Search&action=search" + rawData, err := makeRequest(amazonURL, language) if err != nil { - return tvSearchResult, err + fmt.Println("searchAmazonTV: Error making request:", err) + tvSearchResult <- result + return } _, titlesFound := findTitlesInResponse(rawData, false) - tvSearchResult.TVSearchResults = titlesFound - tvSearchResult = utils.MarkBestMatch(&tvSearchResult) - return tvSearchResult, nil + result.TVSearchResults = titlesFound + result = utils.MarkBestMatch(&result) + tvSearchResult <- result } func findTitlesInResponse(response string, movie bool) (movieResults []types.MovieSearchResult, tvResults []types.TVSearchResult) { @@ -262,3 +259,40 @@ func findTitlesInResponse(response string, movie bool) (movieResults []types.Mov return movieResults, tvResults } + +func makeRequest(inputURL, language string) (response string, err error) { + req, err := http.NewRequestWithContext(context.Background(), "GET", inputURL, bytes.NewBuffer([]byte{})) + + req.Header.Set("User-Agent", + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3") + + // this forces results from a specific amazon region + switch language { + case LanguageGerman: + req.Header.Set("Cookie", "country=de;") + default: + req.Header.Set("Cookie", "country=uk;") + } + + if err != nil { + fmt.Println("Error creating request:", err) + return response, err + } + + client := &http.Client{} + resp, err := client.Do(req) + if err != nil { + fmt.Println("Error sending request:", err) + return response, err + } + + defer resp.Body.Close() + + body, err := io.ReadAll(resp.Body) + if err != nil { + fmt.Println("Error reading response body:", err) + return response, err + } + rawResponse := string(body) + return rawResponse, nil +} diff --git a/amazon/amazon_test.go b/amazon/amazon_test.go index 24c3c9d..52e8a31 100644 --- a/amazon/amazon_test.go +++ b/amazon/amazon_test.go @@ -72,11 +72,10 @@ func TestSearchAmazonTV(t *testing.T) { // Title: "Adventure Time", // Year: "2010", } - result, err := SearchAmazonTV(&show, "") - if err != nil { - t.Errorf("Error searching for TV show: %s", err) - } - if result.SearchURL == "" { - t.Errorf("Expected searchurl, but got none") + result := SearchAmazonTVInParallel([]types.PlexTVShow{show}, "") + + if len(result) == 0 { + t.Errorf("Expected search results, but got none") } + fmt.Println(result) } diff --git a/web/tv/tv.go b/web/tv/tv.go index 4ab8fbd..98aa9db 100644 --- a/web/tv/tv.go +++ b/web/tv/tv.go @@ -54,7 +54,6 @@ func (c TVConfig) ProcessHTML(w http.ResponseWriter, r *http.Request) { plexTV = plexTV[:10] //lint: gocritic - var searchResult types.SearchResults tvJobRunning = true numberOfTVProcessed = 0 totalTV = len(plexTV) - 1 @@ -67,16 +66,7 @@ func (c TVConfig) ProcessHTML(w http.ResponseWriter, r *http.Request) { if lookup == "cinemaParadiso" { tvSearchResults = cinemaparadiso.GetCinemaParadisoTVInParallel(plexTV) } else { - for i := range plexTV { - fmt.Print(".") - if filters.AudioLanguage == "german" { - searchResult, _ = amazon.SearchAmazonTV(&plexTV[i], fmt.Sprintf("&audio=%s", filters.AudioLanguage)) - } else { - searchResult, _ = amazon.SearchAmazonTV(&plexTV[i], "") - } - tvSearchResults = append(tvSearchResults, searchResult) - numberOfTVProcessed = i - } + tvSearchResults = amazon.SearchAmazonTVInParallel(plexTV, filters.AudioLanguage) } tvJobRunning = false fmt.Printf("\nProcessed %d TV Shows in %v\n", totalTV, time.Since(startTime)) @@ -86,34 +76,21 @@ func (c TVConfig) ProcessHTML(w http.ResponseWriter, r *http.Request) { func ProgressBarHTML(w http.ResponseWriter, _ *http.Request) { if lookup == "cinemaParadiso" { numberOfTVProcessed = cinemaparadiso.GetTVJobProgress() - if tvJobRunning { - fmt.Fprintf(w, `
-
`, numberOfTVProcessed, totalTV) - } else { - fmt.Fprintf(w, - `%s
- `, - renderTVTable(tvSearchResults)) - // reset variables - numberOfTVProcessed = 0 - totalTV = 0 - tvSearchResults = []types.SearchResults{} - } } else { - if tvJobRunning { - fmt.Fprintf(w, `
+ numberOfTVProcessed = amazon.GetTVJobProgress() + } + if tvJobRunning { + fmt.Fprintf(w, `
`, numberOfTVProcessed, totalTV) - } - if totalTV == numberOfTVProcessed && totalTV != 0 { - fmt.Fprintf(w, - `%s
+ } else { + fmt.Fprintf(w, + `%s
`, - renderTVTable(tvSearchResults)) - // reset variables - numberOfTVProcessed = 0 - totalTV = 0 - tvSearchResults = []types.SearchResults{} - } + renderTVTable(tvSearchResults)) + // reset variables + numberOfTVProcessed = 0 + totalTV = 0 + tvSearchResults = []types.SearchResults{} } } From f043bbc859ec6f4b52ce0b3dbb0316be3fe334d2 Mon Sep 17 00:00:00 2001 From: nasvidia Date: Mon, 20 May 2024 13:21:08 +0100 Subject: [PATCH 4/5] (feat) parallelise amazon movie search --- TODO | 4 +-- amazon/amazon.go | 79 ++++++++++++++++++------------------------- amazon/amazon_test.go | 14 -------- plex/plex.go | 4 ++- web/movies/movies.go | 42 ++++++++++++----------- 5 files changed, 61 insertions(+), 82 deletions(-) diff --git a/TODO b/TODO index 250b833..116a012 100644 --- a/TODO +++ b/TODO @@ -2,14 +2,13 @@ ## features -- parallelise amazon search tv/movie - ## bugs - allow amazon tv search for indivdual series - allow amazon tv search for newer series - music, a-ha/ash doesnt match as an artist why ? - move language filtering out of plex search,should only happens in web tv & movie +- move newer show out of amazon and cinema-paradiso, move to web page ## done @@ -35,3 +34,4 @@ - parallelise cinema-paradiso movie search 6m20 to 2m25 - parallelise cinema-paradiso tv search - for movies/tc dont refresh plex list every time, unless necessary +- parallelise amazon search tv/movie diff --git a/amazon/amazon.go b/amazon/amazon.go index 83f9cf5..c79905e 100644 --- a/amazon/amazon.go +++ b/amazon/amazon.go @@ -77,67 +77,54 @@ func GetTVJobProgress() int { return numberTVProcessed } -func ScrapeTitles(searchResults *types.SearchResults) (scrapedResults []types.MovieSearchResult) { - var results, lookups []types.MovieSearchResult - for _, searchResult := range searchResults.MovieSearchResults { - if !searchResult.BestMatch { - results = append(results, searchResult) - } else { - lookups = append(lookups, searchResult) - } - } - - if len(lookups) > 0 { - ch := make(chan *types.MovieSearchResult, len(lookups)) - // Limit number of concurrent requests - semaphore := make(chan struct{}, types.ConcurrencyLimit) - for i := range lookups { - go func() { - semaphore <- struct{}{} - defer func() { <-semaphore }() - scrapeTitle(&lookups[i], searchResults.PlexMovie.DateAdded, ch) - }() - } - - for i := 0; i < len(lookups); i++ { - lookup := <-ch - results = append(results, *lookup) +func ScrapeTitlesParallel(searchResults []types.SearchResults) (scrapedResults []types.SearchResults) { + numberMoviesProcessed = 0 + + for i := range searchResults { + // check if the search result is a movie + if len(searchResults[i].MovieSearchResults) > 0 { + ch := make(chan *types.MovieSearchResult, len(searchResults[i].MovieSearchResults)) + semaphore := make(chan struct{}, types.ConcurrencyLimit) + for j := range searchResults[i].MovieSearchResults { + go func(j int) { + semaphore <- struct{}{} + defer func() { <-semaphore }() + scrapeTitle(&searchResults[i].MovieSearchResults[j], searchResults[i].PlexMovie.DateAdded, ch) + }(j) + } + movieResults := make([]types.MovieSearchResult, 0, len(searchResults[i].MovieSearchResults)) + for range searchResults[i].MovieSearchResults { + result := <-ch + movieResults = append(movieResults, *result) + } + fmt.Println("Scraped", len(movieResults), "titles for", searchResults[i].PlexMovie.Title) + searchResults[i].MovieSearchResults = movieResults } + scrapedResults = append(scrapedResults, searchResults[i]) + numberMoviesProcessed++ } - return results + return scrapedResults } func scrapeTitle(movie *types.MovieSearchResult, dateAdded time.Time, ch chan<- *types.MovieSearchResult) { - movie.ReleaseDate = time.Time{} rawData, err := makeRequest(movie.URL, "") if err != nil { fmt.Println("scrapeTitle: Error making request:", err) ch <- movie return } - movie.ReleaseDate = findTitleDetails(rawData) - if movie.ReleaseDate.After(dateAdded) { - movie.NewRelease = true - } - ch <- movie -} - -func findTitleDetails(response string) (releaseDate time.Time) { + // Find the release date + movie.ReleaseDate = time.Time{} // default to zero time r := regexp.MustCompile(`(.*?)`) - - match := r.FindStringSubmatch(response) + match := r.FindStringSubmatch(rawData) if match != nil { stringDate := match[1] - var err error - releaseDate, err = time.Parse("Jan 02, 2006", stringDate) - if err != nil { - releaseDate = time.Time{} - } - } else { - releaseDate = time.Time{} + movie.ReleaseDate, _ = time.Parse("Jan 02, 2006", stringDate) } - - return releaseDate + if movie.ReleaseDate.After(dateAdded) { + movie.NewRelease = true + } + ch <- movie } func searchAmazonMovie(plexMovie types.PlexMovie, language string, movieSearchResult chan<- types.SearchResults) { diff --git a/amazon/amazon_test.go b/amazon/amazon_test.go index 52e8a31..2f29d2c 100644 --- a/amazon/amazon_test.go +++ b/amazon/amazon_test.go @@ -4,7 +4,6 @@ import ( "fmt" "os" "testing" - "time" "github.com/tphoney/plex-lookup/types" ) @@ -47,19 +46,6 @@ func TestSearchAmazon(t *testing.T) { fmt.Println(result) } -func TestFindMovieDetails(t *testing.T) { - rawdata, err := os.ReadFile("testdata/anchorman.html") - if err != nil { - t.Errorf("Error reading testdata/anchorman.html: %s", err) - } - - processed := findTitleDetails(string(rawdata)) - expected := time.Date(2010, time.October, 4, 0, 0, 0, 0, time.UTC) - if processed.Compare(expected) != 0 { - t.Errorf("Expected %s, but got %s", expected, processed) - } -} - func TestSearchAmazonTV(t *testing.T) { if plexIP == "" || plexToken == "" { t.Skip("ACCEPTANCE TEST: PLEX environment variables not set") diff --git a/plex/plex.go b/plex/plex.go index 6c6c10d..5596403 100644 --- a/plex/plex.go +++ b/plex/plex.go @@ -509,7 +509,7 @@ func GetPlexMovies(ipAddress, libraryID, plexToken string, filters []Filter) (mo } movieList = extractMovies(string(body)) - fmt.Printf("Movies: %v\n", movieList) + fmt.Printf("Plex movies: %d.\n", len(movieList)) return movieList } @@ -570,6 +570,7 @@ func GetPlexTV(ipAddress, libraryID, plexToken string) (tvShowList []types.PlexT filteredTVShows = append(filteredTVShows, tvShowList[i]) } } + fmt.Printf("Plex TV shows: %d.\n", len(filteredTVShows)) return filteredTVShows } @@ -753,6 +754,7 @@ func GetPlexMusicArtists(ipAddress, libraryID, plexToken string) (artists []type artists[i].Albums = GetPlexMusicAlbums(ipAddress, plexToken, libraryID, artists[i].RatingKey) } + fmt.Printf("Plex music artists: %d.\n", len(artists)) return artists } diff --git a/web/movies/movies.go b/web/movies/movies.go index fa0bf6d..1dedfa7 100644 --- a/web/movies/movies.go +++ b/web/movies/movies.go @@ -51,7 +51,7 @@ func (c MoviesConfig) ProcessHTML(w http.ResponseWriter, r *http.Request) { } filters = newfilters //nolint: gocritic - plexMovies = plexMovies[:10] + // plexMovies = plexMovies[:10] //lint: gocritic jobRunning = true numberOfMoviesProcessed = 0 @@ -68,9 +68,9 @@ func (c MoviesConfig) ProcessHTML(w http.ResponseWriter, r *http.Request) { } else { searchResults = amazon.SearchAmazonMoviesInParallel(plexMovies, filters.AudioLanguage) // if we are filtering by newer version, we need to search again - // if filters.NewerVersion { - // searchResults = amazon.ScrapeTitles(&searchResults) - // } + if filters.NewerVersion { + searchResults = amazon.ScrapeTitlesParallel(searchResults) + } } jobRunning = false @@ -102,24 +102,24 @@ func ProgressBarHTML(w http.ResponseWriter, _ *http.Request) { } } -func renderTable(movieCollection []types.SearchResults) (tableRows string) { - tableRows = `Plex TitlePlex ResolutionBlu-ray4K-rayDisc` //nolint: lll - for i := range movieCollection { +func renderTable(searchResults []types.SearchResults) (tableRows string) { + searchResults = filterMovieSearchResults(searchResults) + tableRows = `Plex TitlePlex Resolution + Blu-ray4K-rayNew releaseDisc` //nolint: lll + for i := range searchResults { + newRelease := "no" + if len(searchResults[i].MovieSearchResults) > 0 && searchResults[i].MovieSearchResults[0].NewRelease { + newRelease = "yes" + } tableRows += fmt.Sprintf( - `%s [%v]%s%d%d`, - movieCollection[i].SearchURL, movieCollection[i].PlexMovie.Title, movieCollection[i].PlexMovie.Year, - movieCollection[i].PlexMovie.Resolution, movieCollection[i].MatchesBluray, movieCollection[i].Matches4k) - if movieCollection[i].MatchesBluray+movieCollection[i].Matches4k > 0 { + `%s [%v]%s%d%d%s`, + searchResults[i].SearchURL, searchResults[i].PlexMovie.Title, searchResults[i].PlexMovie.Year, + searchResults[i].PlexMovie.Resolution, searchResults[i].MatchesBluray, searchResults[i].Matches4k, newRelease) + if searchResults[i].MatchesBluray+searchResults[i].Matches4k > 0 { tableRows += "" - for _, result := range movieCollection[i].MovieSearchResults { + for _, result := range searchResults[i].MovieSearchResults { if result.BestMatch && (result.Format == types.DiskBluray || result.Format == types.Disk4K) { - tableRows += fmt.Sprintf( - `%v`, - result.URL, result.UITitle) - if result.NewRelease { - tableRows += "(new)" - } - tableRows += " " + tableRows += fmt.Sprintf(`%v `, result.URL, result.UITitle) } } tableRows += "" @@ -150,3 +150,7 @@ func fetchPlexMovies(plexIP, plexMovieLibraryID, plexToken, language string) (al allMovies = append(allMovies, plex.GetPlexMovies(plexIP, plexMovieLibraryID, plexToken, filter)...) return allMovies } + +func filterMovieSearchResults(searchResults []types.SearchResults) []types.SearchResults { + return searchResults +} From c1f8296c778f0a0146deac3bdd3da4e27e3b05de Mon Sep 17 00:00:00 2001 From: nasvidia Date: Mon, 20 May 2024 13:55:01 +0100 Subject: [PATCH 5/5] (fix) change amazon scrape title approach --- TODO | 11 +++-- amazon/amazon.go | 96 ++++++++++++++++++++++++------------------- amazon/amazon_test.go | 26 ++++++++++++ web/tv/tv.go | 2 +- 4 files changed, 88 insertions(+), 47 deletions(-) diff --git a/TODO b/TODO index 116a012..ff9ff24 100644 --- a/TODO +++ b/TODO @@ -2,13 +2,15 @@ ## features +- new release for amazon tv series +- allow amazon tv search for indivdual series +- new release for cinema-paradiso tv / movie + ## bugs -- allow amazon tv search for indivdual series -- allow amazon tv search for newer series - music, a-ha/ash doesnt match as an artist why ? -- move language filtering out of plex search,should only happens in web tv & movie -- move newer show out of amazon and cinema-paradiso, move to web page +- move language filtering out of plex search, should only happen in web tv & movie web pages +- when scraping movies, do we stop at the first best match ? ## done @@ -35,3 +37,4 @@ - parallelise cinema-paradiso tv search - for movies/tc dont refresh plex list every time, unless necessary - parallelise amazon search tv/movie +- move newer show out of amazon and cinema-paradiso, move to web page diff --git a/amazon/amazon.go b/amazon/amazon.go index c79905e..3bf9673 100644 --- a/amazon/amazon.go +++ b/amazon/amazon.go @@ -26,6 +26,7 @@ var ( ) func SearchAmazonMoviesInParallel(plexMovies []types.PlexMovie, language string) (searchResults []types.SearchResults) { + numberMoviesProcessed = 0 ch := make(chan types.SearchResults, len(plexMovies)) semaphore := make(chan struct{}, types.ConcurrencyLimit) @@ -44,10 +45,12 @@ func SearchAmazonMoviesInParallel(plexMovies []types.PlexMovie, language string) numberMoviesProcessed++ } numberMoviesProcessed = 0 // job is done + fmt.Println("amazon movies found:", len(searchResults)) return searchResults } func SearchAmazonTVInParallel(plexTVShows []types.PlexTVShow, language string) (searchResults []types.SearchResults) { + numberMoviesProcessed = 0 ch := make(chan types.SearchResults, len(plexTVShows)) semaphore := make(chan struct{}, types.ConcurrencyLimit) @@ -66,6 +69,7 @@ func SearchAmazonTVInParallel(plexTVShows []types.PlexTVShow, language string) ( numberTVProcessed++ } numberTVProcessed = 0 // job is done + fmt.Println("amazon TV shows found:", len(searchResults)) return searchResults } @@ -79,52 +83,53 @@ func GetTVJobProgress() int { func ScrapeTitlesParallel(searchResults []types.SearchResults) (scrapedResults []types.SearchResults) { numberMoviesProcessed = 0 - + ch := make(chan types.SearchResults, len(searchResults)) + semaphore := make(chan struct{}, types.ConcurrencyLimit) for i := range searchResults { - // check if the search result is a movie - if len(searchResults[i].MovieSearchResults) > 0 { - ch := make(chan *types.MovieSearchResult, len(searchResults[i].MovieSearchResults)) - semaphore := make(chan struct{}, types.ConcurrencyLimit) - for j := range searchResults[i].MovieSearchResults { - go func(j int) { - semaphore <- struct{}{} - defer func() { <-semaphore }() - scrapeTitle(&searchResults[i].MovieSearchResults[j], searchResults[i].PlexMovie.DateAdded, ch) - }(j) - } - movieResults := make([]types.MovieSearchResult, 0, len(searchResults[i].MovieSearchResults)) - for range searchResults[i].MovieSearchResults { - result := <-ch - movieResults = append(movieResults, *result) - } - fmt.Println("Scraped", len(movieResults), "titles for", searchResults[i].PlexMovie.Title) - searchResults[i].MovieSearchResults = movieResults - } - scrapedResults = append(scrapedResults, searchResults[i]) + go func(i int) { + semaphore <- struct{}{} + defer func() { <-semaphore }() + scrapeTitles(&searchResults[i], ch) + }(i) + } + + scrapedResults = make([]types.SearchResults, 0, len(searchResults)) + for range searchResults { + result := <-ch + scrapedResults = append(scrapedResults, result) numberMoviesProcessed++ } + numberMoviesProcessed = 0 + fmt.Println("amazon Movie titles scraped:", len(scrapedResults)) return scrapedResults } -func scrapeTitle(movie *types.MovieSearchResult, dateAdded time.Time, ch chan<- *types.MovieSearchResult) { - rawData, err := makeRequest(movie.URL, "") - if err != nil { - fmt.Println("scrapeTitle: Error making request:", err) - ch <- movie - return - } - // Find the release date - movie.ReleaseDate = time.Time{} // default to zero time - r := regexp.MustCompile(`(.*?)`) - match := r.FindStringSubmatch(rawData) - if match != nil { - stringDate := match[1] - movie.ReleaseDate, _ = time.Parse("Jan 02, 2006", stringDate) - } - if movie.ReleaseDate.After(dateAdded) { - movie.NewRelease = true +func scrapeTitles(searchResult *types.SearchResults, ch chan<- types.SearchResults) { + dateAdded := searchResult.PlexMovie.DateAdded + for i := range searchResult.MovieSearchResults { + // this is to limit the number of requests + if !searchResult.MovieSearchResults[i].BestMatch { + continue + } + rawData, err := makeRequest(searchResult.MovieSearchResults[i].URL, "") + if err != nil { + fmt.Println("scrapeTitle: Error making request:", err) + ch <- *searchResult + return + } + // Find the release date + searchResult.MovieSearchResults[i].ReleaseDate = time.Time{} // default to zero time + r := regexp.MustCompile(`(.*?)`) + match := r.FindStringSubmatch(rawData) + if match != nil { + stringDate := match[1] + searchResult.MovieSearchResults[i].ReleaseDate, _ = time.Parse("Jan 02, 2006", stringDate) + } + if searchResult.MovieSearchResults[i].ReleaseDate.After(dateAdded) { + searchResult.MovieSearchResults[i].NewRelease = true + } } - ch <- movie + ch <- *searchResult } func searchAmazonMovie(plexMovie types.PlexMovie, language string, movieSearchResult chan<- types.SearchResults) { @@ -262,14 +267,14 @@ func makeRequest(inputURL, language string) (response string, err error) { } if err != nil { - fmt.Println("Error creating request:", err) + fmt.Println("makeRequest: error creating request:", err) return response, err } client := &http.Client{} resp, err := client.Do(req) if err != nil { - fmt.Println("Error sending request:", err) + fmt.Println("makeRequest: error sending request:", err) return response, err } @@ -277,9 +282,16 @@ func makeRequest(inputURL, language string) (response string, err error) { body, err := io.ReadAll(resp.Body) if err != nil { - fmt.Println("Error reading response body:", err) + fmt.Println("makeRequest: error reading response body:", err) return response, err } + + // check for a 200 status code + if resp.StatusCode != http.StatusOK { + fmt.Println("amazon: status code not OK, probably rate limited:", resp.StatusCode) + return response, fmt.Errorf("amazon: status code not OK: %d", resp.StatusCode) + } + rawResponse := string(body) return rawResponse, nil } diff --git a/amazon/amazon_test.go b/amazon/amazon_test.go index 2f29d2c..9aa193e 100644 --- a/amazon/amazon_test.go +++ b/amazon/amazon_test.go @@ -65,3 +65,29 @@ func TestSearchAmazonTV(t *testing.T) { } fmt.Println(result) } + +func TestScrapeTitlesParallel(t *testing.T) { + result := ScrapeTitlesParallel([]types.SearchResults{ + { + PlexMovie: types.PlexMovie{ + Title: "napoleon dynamite", + Year: "2001", + }, + MovieSearchResults: []types.MovieSearchResult{ + { + FoundTitle: "Napoleon Dynamite", + URL: "https://www.blu-ray.com/movies/Napoleon-Dynamite-Blu-ray/2535/", + BestMatch: true, + }, + }, + }, + }) + + if len(result) == 0 { + t.Errorf("Expected search results, but got none") + } + if result[0].MovieSearchResults[0].ReleaseDate.Year() == 1 { + t.Errorf("Expected a sensible release date year but got: %+v", result[0].MovieSearchResults[0].ReleaseDate) + } + fmt.Println(result) +} diff --git a/web/tv/tv.go b/web/tv/tv.go index 98aa9db..d69c59b 100644 --- a/web/tv/tv.go +++ b/web/tv/tv.go @@ -51,7 +51,7 @@ func (c TVConfig) ProcessHTML(w http.ResponseWriter, r *http.Request) { } filters = newFilters //nolint: gocritic - plexTV = plexTV[:10] + // plexTV = plexTV[:10] //lint: gocritic tvJobRunning = true