Skip to content

Commit

Permalink
Merge branch 'url-matching' of https://github.com/Domi250/spotify-dow…
Browse files Browse the repository at this point in the history
…nloader into Domi250-url-matching
  • Loading branch information
xnetcat committed Dec 10, 2022
2 parents 49abfa2 + d5c7a15 commit 85c307d
Show file tree
Hide file tree
Showing 4 changed files with 220 additions and 62 deletions.
10 changes: 5 additions & 5 deletions spotdl/console/meta.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@

from spotdl.download.downloader import Downloader
from spotdl.utils.ffmpeg import FFMPEG_FORMATS
from spotdl.utils.metadata import embed_metadata, get_song_metadata
from spotdl.utils.search import create_empty_song, get_search_results
from spotdl.utils.metadata import embed_metadata, get_song_metadata, find_song
from spotdl.utils.search import create_empty_song


def meta(query: List[str], downloader: Downloader, **_) -> None:
Expand Down Expand Up @@ -63,7 +63,7 @@ def process_file(file: Path):
or song_meta["title"][0] == ""
or song_meta["tracknumber"][0] == ""
):
song = get_search_results(file.name.rsplit(".", 1)[0])[0]
song = find_song(Path(test_path))
else:
try:
song = create_empty_song(
Expand All @@ -87,7 +87,7 @@ def process_file(file: Path):
copyright_text=song_meta["copyright"],
)
except Exception:
song = get_search_results(file.name.rsplit(".", 1)[0])[0]
song = find_song(Path(test_path))

# Check if the song has lyric
# if not use downloader to find lyrics
Expand All @@ -99,7 +99,7 @@ def process_file(file: Path):
if lyrics:
song.lyrics = lyrics
downloader.progress_handler.log(
f"No lyrics found for song: {song.display_name}"
f"Found lyrics for song: {song.display_name}"
)

# Apply metadata to the song
Expand Down
73 changes: 28 additions & 45 deletions spotdl/console/sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,58 +89,45 @@ def sync(
raise ValueError("Sync file is not a valid sync file.")

# Parse the query
new_songs = parse_query(sync_data["query"], downloader.threads)
new_files = [
create_file_name(song, downloader.output, downloader.output_format)
for song in new_songs
]

# Get all the old files based on the songs from sync file
old_songs = [Song.from_dict(song) for song in sync_data["songs"]]
old_files = [
create_file_name(song, downloader.output, downloader.output_format)
for song in old_songs
]

# Get all files that are no longer in the song lists
to_delete = set(old_files) - set(new_files)

# Get all files that are new and have to be downloaded
to_download = []
for song in new_songs:
song_path = create_file_name(
song, downloader.output, downloader.output_format
songs_playlist = parse_query(sync_data["query"], downloader.threads)

# Get the names and URLs of previously downloaded songs from the sync file
old_files = []
for entry in sync_data["songs"]:
file_name = create_file_name(
Song.from_dict(entry), downloader.output, downloader.output_format
)
old_files.append((file_name, entry["url"]))

# Skip the songs that are already downloaded
if Path(song_path).exists():
# Add the song to the to_download list
# if overwrite is set to force
if downloader.overwrite == "force":
downloader.progress_handler.log(f"Overwriting {song.display_name}")
to_download.append(song)
else:
# Add the song to the to_download list
to_download.append(song)
new_urls = [song.url for song in songs_playlist]

downloader.progress_handler.log(
f"Found {len(to_download)} songs to download and {len(to_delete)} files to delete."
)
# Delete all song files whose URL is no longer part of the latest playlist
to_delete = [path for (path, url) in old_files if url not in new_urls]

# Delete all files that are no longer in the song lists
for file in to_delete:
if file.exists():
file.unlink()
downloader.progress_handler.log(f"Removed {file}")
downloader.progress_handler.log(f"Deleting {file}")
try:
file.unlink()
except (PermissionError, OSError) as exc:
downloader.progress_handler.debug(
f"Could not remove temp file: {file}, error: {exc}"
)
else:
downloader.progress_handler.debug(f"{file} does not exist.")

# Create m3u file
if len(to_delete) == 0:
downloader.progress_handler.log("Nothing to delete...")
else:
downloader.progress_handler.log(
f"{len(to_delete)} old songs were deleted."
)

if m3u_file:
gen_m3u_files(
sync_data["query"],
m3u_file,
new_songs,
songs_playlist,
downloader.output,
downloader.output_format,
False,
Expand All @@ -152,18 +139,14 @@ def sync(
{
"type": "sync",
"query": sync_data["query"],
"songs": [song.json for song in new_songs],
"songs": [song.json for song in songs_playlist],
},
save_file,
indent=4,
ensure_ascii=False,
)

if len(to_download) == 0:
downloader.progress_handler.log("Nothing to do...")
return None

downloader.download_multiple_songs(to_download)
downloader.download_multiple_songs(songs_playlist)

return None

Expand Down
116 changes: 110 additions & 6 deletions spotdl/download/downloader.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,14 @@
from pathlib import Path
from typing import Dict, List, Optional, Tuple, Type

from mutagen import File

from yt_dlp.postprocessor.sponsorblock import SponsorBlockPP
from yt_dlp.postprocessor.modify_chapters import ModifyChaptersPP

from spotdl.types import Song
from spotdl.utils.ffmpeg import FFmpegError, convert, get_ffmpeg_path
from spotdl.utils.metadata import embed_metadata, MetadataError
from spotdl.utils.metadata import embed_metadata, MetadataError, get_song_metadata
from spotdl.utils.formatter import create_file_name, restrict_filename
from spotdl.providers.audio.base import AudioProvider
from spotdl.providers.lyrics import Genius, MusixMatch, AzLyrics
Expand Down Expand Up @@ -58,10 +60,29 @@ class DownloaderError(Exception):
"""


class KnownSong:
"""
Represents a song file already present in the output directory. Used for determining
which songs to skip when downloading.
"""

def __init__(self, path: Path = None, spotify_url: str = ""):
"""
Initialize the Downloader class.
### Arguments
path: Path to the file.
spotify_url: The songs corresponding URL on spotify.
"""

self.path = path
self.spotify_url = spotify_url


class Downloader:
"""
Downloader class, this is where all the downloading pre/post processing happens etc.
It handles the downloading/moving songs, multthreading, metadata embedding etc.
It handles the downloading/moving songs, multithreading, metadata embedding etc.
"""

def __init__(
Expand All @@ -87,6 +108,7 @@ def __init__(
loop: Optional[asyncio.AbstractEventLoop] = None,
playlist_numbering: bool = False,
preserve_original_audio: bool = False,
known_songs: KnownSong = None,
):
"""
Initialize the Downloader class.
Expand All @@ -95,7 +117,7 @@ def __init__(
- audio_provider: Audio providers to use.
- lyrics_provider: The lyrics providers to use.
- ffmpeg: The ffmpeg executable to use.
- bitrate: The bitrate to use.
- bitrate: The bit rate to use.
- ffmpeg_args: The ffmpeg arguments to use.
- output_format: The output format to use.
- threads: The number of threads to use.
Expand All @@ -107,12 +129,13 @@ def __init__(
- search_query: The search query to use.
- log_level: The log level to use.
- simple_tui: Whether to use simple tui.
- loop: The event loop to use.
- restrict: Whether to restrict the filename to ASCII characters.
- print_errors: Whether to print errors on exit.
- sponsor_block: Whether to remove sponsor segments using sponsor block postprocessor.
- loop: The event loop to use.
- playlist_numbering: Whether to convert tracks in a playlist into an album
- preserve_original_audio: Whether to preserve the original audio file
- known_songs: List of song files already present in the output directory.
### Notes
- `search-query` uses the same format as `output`.
Expand Down Expand Up @@ -174,6 +197,21 @@ def __init__(

ffmpeg = str(ffmpeg_exec.absolute())

# Gather already present songs
# todo: Use output dir instead of "." directory
if known_songs is None:
known_songs = []
paths = Path(".").glob("." + output_format)
for path in paths:
if path.is_file():
audio_file = File(str(path.resolve()), easy=False)

if audio_file.get("COMM::XXX") is not None:
comment = str(audio_file.get("COMM::XXX"))
if "|" in comment:
url = comment.split("|", 1)[1]
known_songs.append(KnownSong(path, url))

self.output = output
self.output_format = output_format
self.save_file = save_file
Expand All @@ -193,6 +231,7 @@ def __init__(
self.progress_handler = ProgressHandler(NAME_TO_LEVEL[log_level], simple_tui)
self.playlist_numbering = playlist_numbering
self.preserve_original_audio = preserve_original_audio
self.known_songs = known_songs

self.lyrics_providers: List[LyricsProvider] = []
for lyrics_provider_class in lyrics_providers_classes:
Expand Down Expand Up @@ -265,7 +304,7 @@ async def pool_download(self, song: Song) -> Tuple[Song, Optional[Path]]:
# only certain amount of tasks can acquire the semaphore at the same time
async with self.semaphore:
# The following function calls blocking code, which would block whole event loop.
# Therefore it has to be called in a separate thread via ThreadPoolExecutor. This
# Therefore, it has to be called in a separate thread via ThreadPoolExecutor. This
# is not a problem, since GIL is released for the I/O operations, so it shouldn't
# hurt performance.
return await self.loop.run_in_executor(
Expand Down Expand Up @@ -382,6 +421,16 @@ def search_and_download(self, song: Song) -> Tuple[Song, Optional[Path]]:
return song, None

if output_file.exists() and self.overwrite == "metadata":
song_meta = get_song_metadata(output_file)
if song_meta is None:
self.progress_handler.log(
f"Metadata not found for {song.display_name}, " "overwriting file"
)
else:
self.progress_handler.log(
f"Metadata found for {song.display_name}, " "overwriting file"
)

embed_metadata(
output_file=output_file, song=song, file_format=self.output_format
)
Expand All @@ -393,7 +442,62 @@ def search_and_download(self, song: Song) -> Tuple[Song, Optional[Path]]:

# Don't skip if the file exists and overwrite is set to force
if output_file.exists() and self.overwrite == "force":
self.progress_handler.debug(f"Overwriting {song.display_name}")
self.progress_handler.log(f"Overwriting {song.display_name}")

# Check if there is an already existing song file, with the same spotify URL in its
# metadata, but saved under a different name. If so, save its path.
known_path = ""
if not output_file.exists():
for song_file in self.known_songs:
if song_file.spotify_url == song.url:
known_path = song_file.path
break

if known_path != "":

# Print warning that the songs metadata is outdated
if self.overwrite == "skip":
self.progress_handler.log(f"Skipping {song.display_name}")
self.progress_handler.warn(
f"Metadata of {song.display_name} is outdated. "
f"Use spotdl meta to update."
)
self.progress_handler.overall_completed_tasks += 1
self.progress_handler.update_overall()
return song, None

# Update filename and other metadata
if self.overwrite == "metadata":
known_path.replace(output_file.with_suffix(self.output_format))

song_meta = get_song_metadata(output_file)
if song_meta is None:
self.progress_handler.debug(
f"Metadata not found for {song.display_name}, "
"overwriting file"
)
else:
self.progress_handler.debug(
f"Metadata found for {song.display_name}, " "overwriting file"
)

embed_metadata(
output_file=output_file, song=song, file_format=self.output_format
)
return song, output_file

# Delete old file with outdated filename
if self.overwrite == "force":
self.progress_handler.debug(f"Overwriting {known_path}")
try:
known_path.unlink()
except (PermissionError, OSError) as exc:
self.progress_handler.debug(
f"Could not remove temp file: {known_path}, error: {exc}"
)

# Initalize the progress tracker
display_progress_tracker = self.progress_handler.get_new_tracker(song)

# Create the output directory if it doesn't exist
output_file.parent.mkdir(parents=True, exist_ok=True)
Expand Down
Loading

0 comments on commit 85c307d

Please sign in to comment.