Merge branch 'url-matching' of https://github.com/Domi250/spotify-dow…

…nloader into Domi250-url-matching
spotDL · Dec 10, 2022 · 85c307d · 85c307d
2 parents 49abfa2 + d5c7a15
commit 85c307d
Show file tree

Hide file tree

Showing 4 changed files with 220 additions and 62 deletions.
diff --git a/spotdl/console/meta.py b/spotdl/console/meta.py
@@ -7,8 +7,8 @@
 
 from spotdl.download.downloader import Downloader
 from spotdl.utils.ffmpeg import FFMPEG_FORMATS
-from spotdl.utils.metadata import embed_metadata, get_song_metadata
-from spotdl.utils.search import create_empty_song, get_search_results
+from spotdl.utils.metadata import embed_metadata, get_song_metadata, find_song
+from spotdl.utils.search import create_empty_song
 
 
 def meta(query: List[str], downloader: Downloader, **_) -> None:
@@ -63,7 +63,7 @@ def process_file(file: Path):
             or song_meta["title"][0] == ""
             or song_meta["tracknumber"][0] == ""
         ):
-            song = get_search_results(file.name.rsplit(".", 1)[0])[0]
+            song = find_song(Path(test_path))
         else:
             try:
                 song = create_empty_song(
@@ -87,7 +87,7 @@ def process_file(file: Path):
                     copyright_text=song_meta["copyright"],
                 )
             except Exception:
-                song = get_search_results(file.name.rsplit(".", 1)[0])[0]
+                song = find_song(Path(test_path))
 
         # Check if the song has lyric
         # if not use downloader to find lyrics
@@ -99,7 +99,7 @@ def process_file(file: Path):
             if lyrics:
                 song.lyrics = lyrics
                 downloader.progress_handler.log(
-                    f"No lyrics found for song: {song.display_name}"
+                    f"Found lyrics for song: {song.display_name}"
                 )
 
         # Apply metadata to the song

diff --git a/spotdl/console/sync.py b/spotdl/console/sync.py
@@ -89,58 +89,45 @@ def sync(
             raise ValueError("Sync file is not a valid sync file.")
 
         # Parse the query
-        new_songs = parse_query(sync_data["query"], downloader.threads)
-        new_files = [
-            create_file_name(song, downloader.output, downloader.output_format)
-            for song in new_songs
-        ]
-
-        # Get all the old files based on the songs from sync file
-        old_songs = [Song.from_dict(song) for song in sync_data["songs"]]
-        old_files = [
-            create_file_name(song, downloader.output, downloader.output_format)
-            for song in old_songs
-        ]
-
-        # Get all files that are no longer in the song lists
-        to_delete = set(old_files) - set(new_files)
-
-        # Get all files that are new and have to be downloaded
-        to_download = []
-        for song in new_songs:
-            song_path = create_file_name(
-                song, downloader.output, downloader.output_format
+        songs_playlist = parse_query(sync_data["query"], downloader.threads)
+
+        # Get the names and URLs of previously downloaded songs from the sync file
+        old_files = []
+        for entry in sync_data["songs"]:
+            file_name = create_file_name(
+                Song.from_dict(entry), downloader.output, downloader.output_format
             )
+            old_files.append((file_name, entry["url"]))
 
-            # Skip the songs that are already downloaded
-            if Path(song_path).exists():
-                # Add the song to the to_download list
-                # if overwrite is set to force
-                if downloader.overwrite == "force":
-                    downloader.progress_handler.log(f"Overwriting {song.display_name}")
-                    to_download.append(song)
-            else:
-                # Add the song to the to_download list
-                to_download.append(song)
+        new_urls = [song.url for song in songs_playlist]
 
-        downloader.progress_handler.log(
-            f"Found {len(to_download)} songs to download and {len(to_delete)} files to delete."
-        )
+        # Delete all song files whose URL is no longer part of the latest playlist
+        to_delete = [path for (path, url) in old_files if url not in new_urls]
 
-        # Delete all files that are no longer in the song lists
         for file in to_delete:
             if file.exists():
-                file.unlink()
-                downloader.progress_handler.log(f"Removed {file}")
+                downloader.progress_handler.log(f"Deleting {file}")
+                try:
+                    file.unlink()
+                except (PermissionError, OSError) as exc:
+                    downloader.progress_handler.debug(
+                        f"Could not remove temp file: {file}, error: {exc}"
+                    )
             else:
                 downloader.progress_handler.debug(f"{file} does not exist.")
 
-        # Create m3u file
+        if len(to_delete) == 0:
+            downloader.progress_handler.log("Nothing to delete...")
+        else:
+            downloader.progress_handler.log(
+                f"{len(to_delete)} old songs were deleted."
+            )
+
         if m3u_file:
             gen_m3u_files(
                 sync_data["query"],
                 m3u_file,
-                new_songs,
+                songs_playlist,
                 downloader.output,
                 downloader.output_format,
                 False,
@@ -152,18 +139,14 @@ def sync(
                 {
                     "type": "sync",
                     "query": sync_data["query"],
-                    "songs": [song.json for song in new_songs],
+                    "songs": [song.json for song in songs_playlist],
                 },
                 save_file,
                 indent=4,
                 ensure_ascii=False,
             )
 
-        if len(to_download) == 0:
-            downloader.progress_handler.log("Nothing to do...")
-            return None
-
-        downloader.download_multiple_songs(to_download)
+        downloader.download_multiple_songs(songs_playlist)
 
         return None
 

diff --git a/spotdl/download/downloader.py b/spotdl/download/downloader.py
@@ -13,12 +13,14 @@
 from pathlib import Path
 from typing import Dict, List, Optional, Tuple, Type
 
+from mutagen import File
+
 from yt_dlp.postprocessor.sponsorblock import SponsorBlockPP
 from yt_dlp.postprocessor.modify_chapters import ModifyChaptersPP
 
 from spotdl.types import Song
 from spotdl.utils.ffmpeg import FFmpegError, convert, get_ffmpeg_path
-from spotdl.utils.metadata import embed_metadata, MetadataError
+from spotdl.utils.metadata import embed_metadata, MetadataError, get_song_metadata
 from spotdl.utils.formatter import create_file_name, restrict_filename
 from spotdl.providers.audio.base import AudioProvider
 from spotdl.providers.lyrics import Genius, MusixMatch, AzLyrics
@@ -58,10 +60,29 @@ class DownloaderError(Exception):
     """
 
 
+class KnownSong:
+    """
+    Represents a song file already present in the output directory. Used for determining
+    which songs to skip when downloading.
+    """
+
+    def __init__(self, path: Path = None, spotify_url: str = ""):
+        """
+        Initialize the Downloader class.
+
+        ### Arguments
+        path: Path to the file.
+        spotify_url: The songs corresponding URL on spotify.
+        """
+
+        self.path = path
+        self.spotify_url = spotify_url
+
+
 class Downloader:
     """
     Downloader class, this is where all the downloading pre/post processing happens etc.
-    It handles the downloading/moving songs, multthreading, metadata embedding etc.
+    It handles the downloading/moving songs, multithreading, metadata embedding etc.
     """
 
     def __init__(
@@ -87,6 +108,7 @@ def __init__(
         loop: Optional[asyncio.AbstractEventLoop] = None,
         playlist_numbering: bool = False,
         preserve_original_audio: bool = False,
+        known_songs: KnownSong = None,
     ):
         """
         Initialize the Downloader class.
@@ -95,7 +117,7 @@ def __init__(
         - audio_provider: Audio providers to use.
         - lyrics_provider: The lyrics providers to use.
         - ffmpeg: The ffmpeg executable to use.
-        - bitrate: The bitrate to use.
+        - bitrate: The bit rate to use.
         - ffmpeg_args: The ffmpeg arguments to use.
         - output_format: The output format to use.
         - threads: The number of threads to use.
@@ -107,12 +129,13 @@ def __init__(
         - search_query: The search query to use.
         - log_level: The log level to use.
         - simple_tui: Whether to use simple tui.
-        - loop: The event loop to use.
         - restrict: Whether to restrict the filename to ASCII characters.
         - print_errors: Whether to print errors on exit.
         - sponsor_block: Whether to remove sponsor segments using sponsor block postprocessor.
+        - loop: The event loop to use.
         - playlist_numbering: Whether to convert tracks in a playlist into an album
         - preserve_original_audio: Whether to preserve the original audio file
+        - known_songs: List of song files already present in the output directory.
 
         ### Notes
         - `search-query` uses the same format as `output`.
@@ -174,6 +197,21 @@ def __init__(
 
             ffmpeg = str(ffmpeg_exec.absolute())
 
+        # Gather already present songs
+        # todo: Use output dir instead of "." directory
+        if known_songs is None:
+            known_songs = []
+            paths = Path(".").glob("." + output_format)
+            for path in paths:
+                if path.is_file():
+                    audio_file = File(str(path.resolve()), easy=False)
+
+                    if audio_file.get("COMM::XXX") is not None:
+                        comment = str(audio_file.get("COMM::XXX"))
+                        if "|" in comment:
+                            url = comment.split("|", 1)[1]
+                            known_songs.append(KnownSong(path, url))
+
         self.output = output
         self.output_format = output_format
         self.save_file = save_file
@@ -193,6 +231,7 @@ def __init__(
         self.progress_handler = ProgressHandler(NAME_TO_LEVEL[log_level], simple_tui)
         self.playlist_numbering = playlist_numbering
         self.preserve_original_audio = preserve_original_audio
+        self.known_songs = known_songs
 
         self.lyrics_providers: List[LyricsProvider] = []
         for lyrics_provider_class in lyrics_providers_classes:
@@ -265,7 +304,7 @@ async def pool_download(self, song: Song) -> Tuple[Song, Optional[Path]]:
         # only certain amount of tasks can acquire the semaphore at the same time
         async with self.semaphore:
             # The following function calls blocking code, which would block whole event loop.
-            # Therefore it has to be called in a separate thread via ThreadPoolExecutor. This
+            # Therefore, it has to be called in a separate thread via ThreadPoolExecutor. This
             # is not a problem, since GIL is released for the I/O operations, so it shouldn't
             # hurt performance.
             return await self.loop.run_in_executor(
@@ -382,6 +421,16 @@ def search_and_download(self, song: Song) -> Tuple[Song, Optional[Path]]:
             return song, None
 
         if output_file.exists() and self.overwrite == "metadata":
+            song_meta = get_song_metadata(output_file)
+            if song_meta is None:
+                self.progress_handler.log(
+                    f"Metadata not found for {song.display_name}, " "overwriting file"
+                )
+            else:
+                self.progress_handler.log(
+                    f"Metadata found for {song.display_name}, " "overwriting file"
+                )
+
             embed_metadata(
                 output_file=output_file, song=song, file_format=self.output_format
             )
@@ -393,7 +442,62 @@ def search_and_download(self, song: Song) -> Tuple[Song, Optional[Path]]:
 
         # Don't skip if the file exists and overwrite is set to force
         if output_file.exists() and self.overwrite == "force":
-            self.progress_handler.debug(f"Overwriting {song.display_name}")
+            self.progress_handler.log(f"Overwriting {song.display_name}")
+
+        # Check if there is an already existing song file, with the same spotify URL in its
+        # metadata, but saved under a different name. If so, save its path.
+        known_path = ""
+        if not output_file.exists():
+            for song_file in self.known_songs:
+                if song_file.spotify_url == song.url:
+                    known_path = song_file.path
+                    break
+
+        if known_path != "":
+
+            # Print warning that the songs metadata is outdated
+            if self.overwrite == "skip":
+                self.progress_handler.log(f"Skipping {song.display_name}")
+                self.progress_handler.warn(
+                    f"Metadata of {song.display_name} is outdated. "
+                    f"Use spotdl meta to update."
+                )
+                self.progress_handler.overall_completed_tasks += 1
+                self.progress_handler.update_overall()
+                return song, None
+
+            # Update filename and other metadata
+            if self.overwrite == "metadata":
+                known_path.replace(output_file.with_suffix(self.output_format))
+
+                song_meta = get_song_metadata(output_file)
+                if song_meta is None:
+                    self.progress_handler.debug(
+                        f"Metadata not found for {song.display_name}, "
+                        "overwriting file"
+                    )
+                else:
+                    self.progress_handler.debug(
+                        f"Metadata found for {song.display_name}, " "overwriting file"
+                    )
+
+                embed_metadata(
+                    output_file=output_file, song=song, file_format=self.output_format
+                )
+                return song, output_file
+
+            # Delete old file with outdated filename
+            if self.overwrite == "force":
+                self.progress_handler.debug(f"Overwriting {known_path}")
+                try:
+                    known_path.unlink()
+                except (PermissionError, OSError) as exc:
+                    self.progress_handler.debug(
+                        f"Could not remove temp file: {known_path}, error: {exc}"
+                    )
+
+        # Initalize the progress tracker
+        display_progress_tracker = self.progress_handler.get_new_tracker(song)
 
         # Create the output directory if it doesn't exist
         output_file.parent.mkdir(parents=True, exist_ok=True)