Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

(feat) improve tv season search cinema-paradiso #36

Merged
merged 6 commits into from
May 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions TODO
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@

## features

- new release for amazon tv series
- allow amazon tv search for indivdual series
- new release for cinema-paradiso tv
- allow the use of playlists for finding music / TV / movies

## bugs

- music, a-ha/ash doesnt match as an artist why ?
- improve cinema-paradiso movie scrape, many search results are the same page. wasted processing
- flatten amazon tv search results
- improve best match for tv series

## done

Expand Down Expand Up @@ -43,3 +42,6 @@
- speed up plex fetch of movie details
- speed up plex fetch of tv shows
- when scraping movies, do we stop at the first best match ?
- new release for cinema-paradiso tv
- new release for amazon tv series
- allow amazon tv search for indivdual series
185 changes: 161 additions & 24 deletions amazon/amazon.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
"net/http"
"net/url"
"regexp"
"strconv"
"strings"
"time"

Expand All @@ -23,10 +24,56 @@ const (
var (
numberMoviesProcessed int = 0
numberTVProcessed int = 0
//nolint: mnd
seasonNumberToInt = map[string]int{
"one": 1,
"two": 2,
"three": 3,
"four": 4,
"five": 5,
"six": 6,
"seven": 7,
"eight": 8,
"nine": 9,
"ten": 10,
"eleven": 11,
"twelve": 12,
"thirteen": 13,
"fourteen": 14,
"fifteen": 15,
"sixteen": 16,
"seventeen": 17,
"eighteen": 18,
"nineteen": 19,
"twenty": 20,
}
//nolint: mnd
ordinalNumberToSeason = map[string]int{
"first season": 1,
"second season": 2,
"third season": 3,
"fourth season": 4,
"fifth season": 5,
"sixth season": 6,
"seventh season": 7,
"eighth season": 8,
"ninth season": 9,
"tenth season": 10,
"eleventh season": 11,
"twelfth season": 12,
"thirteenth season": 13,
"fourteenth season": 14,
"fifteenth season": 15,
"sixteenth season": 16,
"seventeenth season": 17,
"eighteenth season": 18,
"nineteenth season": 19,
"twentieth season": 20,
}
)

// nolint: dupl, nolintlint
func SearchAmazonMoviesInParallel(plexMovies []types.PlexMovie, language, region string) (searchResults []types.SearchResults) {
func MoviesInParallel(plexMovies []types.PlexMovie, language, region string) (searchResults []types.SearchResults) {
numberMoviesProcessed = 0
ch := make(chan types.SearchResults, len(plexMovies))
semaphore := make(chan struct{}, types.ConcurrencyLimit)
Expand All @@ -35,7 +82,7 @@ func SearchAmazonMoviesInParallel(plexMovies []types.PlexMovie, language, region
go func(i int) {
semaphore <- struct{}{}
defer func() { <-semaphore }()
searchAmazonMovie(&plexMovies[i], language, region, ch)
searchMovie(&plexMovies[i], language, region, ch)
}(i)
}

Expand All @@ -51,7 +98,7 @@ func SearchAmazonMoviesInParallel(plexMovies []types.PlexMovie, language, region
}

// nolint: dupl, nolintlint
func SearchAmazonTVInParallel(plexTVShows []types.PlexTVShow, language, region string) (searchResults []types.SearchResults) {
func TVInParallel(plexTVShows []types.PlexTVShow, language, region string) (searchResults []types.SearchResults) {
numberMoviesProcessed = 0
ch := make(chan types.SearchResults, len(plexTVShows))
semaphore := make(chan struct{}, types.ConcurrencyLimit)
Expand All @@ -60,7 +107,7 @@ func SearchAmazonTVInParallel(plexTVShows []types.PlexTVShow, language, region s
go func(i int) {
semaphore <- struct{}{}
defer func() { <-semaphore }()
searchAmazonTV(&plexTVShows[i], language, region, ch)
searchTV(&plexTVShows[i], language, region, ch)
}(i)
}

Expand All @@ -83,30 +130,48 @@ func GetTVJobProgress() int {
return numberTVProcessed
}

func ScrapeTitlesParallel(searchResults []types.SearchResults, region string) (scrapedResults []types.SearchResults) {
numberMoviesProcessed = 0
func ScrapeTitlesParallel(searchResults []types.SearchResults, region string, isTV bool) (scrapedResults []types.SearchResults) {
// are we tv or movie
if isTV {
numberTVProcessed = 0
} else {
numberMoviesProcessed = 0
}
ch := make(chan types.SearchResults, len(searchResults))
semaphore := make(chan struct{}, types.ConcurrencyLimit)
for i := range searchResults {
go func(i int) {
semaphore <- struct{}{}
defer func() { <-semaphore }()
scrapeTitles(&searchResults[i], region, ch)
if isTV {
scrapeTVTitles(&searchResults[i], region, ch)
} else {
scrapeMovieTitles(&searchResults[i], region, ch)
}
}(i)
}

scrapedResults = make([]types.SearchResults, 0, len(searchResults))
for range searchResults {
result := <-ch
scrapedResults = append(scrapedResults, result)
numberMoviesProcessed++
if isTV {
numberTVProcessed++
} else {
numberMoviesProcessed++
}
}
numberMoviesProcessed = 0
fmt.Println("amazon Movie titles scraped:", len(scrapedResults))
if isTV {
numberTVProcessed = 0
} else {
numberMoviesProcessed = 0
}
fmt.Println("amazon titles scraped:", len(scrapedResults))
return scrapedResults
}

func scrapeTitles(searchResult *types.SearchResults, region string, ch chan<- types.SearchResults) {
// nolint: dupl, nolintlint
func scrapeMovieTitles(searchResult *types.SearchResults, region string, ch chan<- types.SearchResults) {
dateAdded := searchResult.PlexMovie.DateAdded
for i := range searchResult.MovieSearchResults {
// this is to limit the number of requests
Expand Down Expand Up @@ -134,7 +199,36 @@ func scrapeTitles(searchResult *types.SearchResults, region string, ch chan<- ty
ch <- *searchResult
}

func searchAmazonMovie(plexMovie *types.PlexMovie, language, region string, movieSearchResult chan<- types.SearchResults) {
// nolint: dupl, nolintlint
func scrapeTVTitles(searchResult *types.SearchResults, region string, ch chan<- types.SearchResults) {
dateAdded := searchResult.PlexTVShow.DateAdded
for i := range searchResult.TVSearchResults {
// this is to limit the number of requests
if !searchResult.TVSearchResults[i].BestMatch {
continue
}
rawData, err := makeRequest(searchResult.TVSearchResults[i].URL, region)
if err != nil {
fmt.Println("scrapeTitle: Error making request:", err)
ch <- *searchResult
return
}
// Find the release date
searchResult.TVSearchResults[i].ReleaseDate = time.Time{} // default to zero time
r := regexp.MustCompile(`<a class="grey noline" alt=".*">(.*?)</a></span>`)
match := r.FindStringSubmatch(rawData)
if match != nil {
stringDate := match[1]
searchResult.TVSearchResults[i].ReleaseDate, _ = time.Parse("Jan 02, 2006", stringDate)
}
if searchResult.TVSearchResults[i].ReleaseDate.After(dateAdded) {
searchResult.TVSearchResults[i].NewRelease = true
}
}
ch <- *searchResult
}

func searchMovie(plexMovie *types.PlexMovie, language, region string, movieSearchResult chan<- types.SearchResults) {
result := types.SearchResults{}
result.PlexMovie = *plexMovie

Expand All @@ -152,7 +246,7 @@ func searchAmazonMovie(plexMovie *types.PlexMovie, language, region string, movi
result.SearchURL = amazonURL
rawData, err := makeRequest(amazonURL, region)
if err != nil {
fmt.Println("searchAmazonMovie: Error making request:", err)
fmt.Println("searchMovie: Error making request:", err)
movieSearchResult <- result
return
}
Expand All @@ -163,12 +257,12 @@ func searchAmazonMovie(plexMovie *types.PlexMovie, language, region string, movi
movieSearchResult <- result
}

func searchAmazonTV(plexTVShow *types.PlexTVShow, language, region string, tvSearchResult chan<- types.SearchResults) {
func searchTV(plexTVShow *types.PlexTVShow, language, region string, tvSearchResult chan<- types.SearchResults) {
result := types.SearchResults{}
result.PlexTVShow = *plexTVShow
result.SearchURL = amazonURL

urlEncodedTitle := url.QueryEscape(fmt.Sprintf("%s complete series", plexTVShow.Title)) // complete series
urlEncodedTitle := url.QueryEscape(plexTVShow.Title)
amazonURL := amazonURL + urlEncodedTitle
// this searches for the movie in a language
switch language {
Expand All @@ -181,7 +275,7 @@ func searchAmazonTV(plexTVShow *types.PlexTVShow, language, region string, tvSea

rawData, err := makeRequest(amazonURL, region)
if err != nil {
fmt.Println("searchAmazonTV: Error making request:", err)
fmt.Println("searchTV: Error making request:", err)
tvSearchResult <- result
return
}
Expand All @@ -202,7 +296,6 @@ func findTitlesInResponse(response string, movie bool) (movieResults []types.Mov
}
response = response[startIndex:]
endIndex := strings.Index(response, `</div></div>`)

// If both start and end index are found
if endIndex != -1 {
// Extract the entry
Expand Down Expand Up @@ -235,13 +328,14 @@ func findTitlesInResponse(response string, movie bool) (movieResults []types.Mov
movieResults = append(movieResults, types.MovieSearchResult{
URL: returnURL, Format: format, Year: year, FoundTitle: foundTitle, UITitle: format})
} else {
boxSet := false
if strings.Contains(foundTitle, ": The Complete Series") {
foundTitle = strings.TrimSuffix(foundTitle, ": The Complete Series")
boxSet = true
}
tvResults = append(tvResults, types.TVSearchResult{
URL: returnURL, Format: []string{format}, Year: year, FoundTitle: foundTitle, UITitle: foundTitle, BoxSet: boxSet})
decipheredTitle, number, boxSet := decipherTVName(foundTitle)
// split year
splitYear := strings.Split(year, "-")
year = splitYear[0]
tvResult := types.TVSearchResult{
URL: returnURL, Format: []string{format}, Year: year, FoundTitle: decipheredTitle, UITitle: decipheredTitle}
tvResult.Seasons = append(tvResult.Seasons, types.TVSeasonResult{URL: returnURL, Format: format, Number: number, BoxSet: boxSet})
tvResults = append(tvResults, tvResult)
}
}
// remove the movie entry from the response
Expand Down Expand Up @@ -291,3 +385,46 @@ func makeRequest(inputURL, region string) (response string, err error) {
rawResponse := string(body)
return rawResponse, nil
}

func decipherTVName(name string) (title string, number int, boxset bool) {
parts := strings.Split(name, ":")
title = parts[0]
if len(parts) == 1 {
//nolint: gocritic
// fmt.Printf("warn: decipherTVName, no colon%q\n", name)
return title, -1, false
}
// everything after the first colon
seasonBlock := strings.Join(parts[1:], "")
seasonBlock = strings.ToLower(seasonBlock)
if strings.Contains(seasonBlock, "complete series") || strings.Contains(seasonBlock, "complete seasons") {
return title, number, true
}
// does the second part have a number as an integer or as a word.
r := regexp.MustCompile(`seasons?\ (\d+)`)
match := r.FindStringSubmatch(seasonBlock)
if len(match) > 1 {
// var err error
number, _ = strconv.Atoi(match[1])
//nolint: gocritic
// if err != nil {
// fmt.Printf("warn: decipherTVName, integer not converted%q\n", name)
// }
return title, number, false
}

for k, v := range seasonNumberToInt {
if strings.Contains(seasonBlock, ("season "+k)) || strings.Contains(seasonBlock, ("seasons "+k)) {
return title, v, false
}
}

for k, v := range ordinalNumberToSeason {
if strings.Contains(seasonBlock, k) {
return title, v, false
}
}
//nolint: gocritic
// fmt.Printf("warn: decipherTVName, got to the end%q\n", name)
return title, -1, false
}
13 changes: 7 additions & 6 deletions amazon/amazon_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ func TestFindMoviesInResponse(t *testing.T) {
}

func TestSearchAmazon(t *testing.T) {
result := SearchAmazonMoviesInParallel([]types.PlexMovie{{Title: "napoleon dynamite", Year: "2004"}}, "", amazonRegion)
result := MoviesInParallel([]types.PlexMovie{{Title: "napoleon dynamite", Year: "2004"}}, "", amazonRegion)
if len(result) == 0 {
t.Errorf("Expected search results, but got none")
}
Expand All @@ -57,19 +57,20 @@ func TestSearchAmazonTV(t *testing.T) {
t.Skip("ACCEPTANCE TEST: PLEX environment variables not set")
}
show := types.PlexTVShow{
Title: "Friends",
Year: "1994",
// Title: "Friends",
// Year: "1994",
// Title: "Charmed",
// Year: "1998",
// Title: "Adventure Time",
// Year: "2010",
Title: "Star Trek: Enterprise",
Year: "2001",
}
result := SearchAmazonTVInParallel([]types.PlexTVShow{show}, "", amazonRegion)
result := TVInParallel([]types.PlexTVShow{show}, "", amazonRegion)

if len(result) == 0 {
t.Errorf("Expected search results, but got none")
}
fmt.Println(result)
}

func TestScrapeTitlesParallel(t *testing.T) {
Expand All @@ -87,7 +88,7 @@ func TestScrapeTitlesParallel(t *testing.T) {
},
},
},
}, amazonRegion)
}, amazonRegion, false)

if len(result) == 0 {
t.Errorf("Expected search results, but got none")
Expand Down
Loading
Loading