Skip to content

Commit

Permalink
web-add: improve audio metadata extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
chapmanjacobd committed Jan 23, 2025
1 parent 1b17260 commit fe0230e
Show file tree
Hide file tree
Showing 7 changed files with 95 additions and 73 deletions.
2 changes: 1 addition & 1 deletion SECURITY.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Security Policy
# Coordinated Disclosure Form

## Reporting a Vulnerability

Expand Down
115 changes: 58 additions & 57 deletions library/createdb/av.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,8 @@
import math
from datetime import datetime

from library.createdb import subtitle
from library.mediafiles import media_check
from library.utils import consts, file_utils, iterables, nums, objects, path_utils, processes, strings
from library.utils import consts, date_utils, file_utils, iterables, nums, objects, path_utils, processes, strings
from library.utils.consts import DBType
from library.utils.log_utils import log

Expand Down Expand Up @@ -37,52 +36,64 @@ def get_subtitle_tags(path, streams, scan_subtitles=False) -> dict:


def parse_tags(mu: dict, ti: dict) -> dict:
mu = objects.dumbcopy(mu)
ti = objects.dumbcopy(ti)

tags = {
"mood": strings.combine(
mu.get("albummood"),
mu.get("MusicMatch_Situation"),
mu.get("Songs-DB_Occasion"),
mu.get("albumgrouping"),
mu.pop("albummood", None),
mu.pop("MusicMatch_Situation", None),
mu.pop("Songs-DB_Occasion", None),
mu.pop("albumgrouping", None),
),
"genre": strings.combine(mu.pop("genre", None), ti.pop("genre", None), mu.pop("albumgenre", None)),
"time_created": date_utils.specific_date(
mu.pop("originalyear", None),
mu.pop("TDOR", None),
mu.pop("TORY", None),
mu.pop("date", None),
mu.pop("TDRC", None),
mu.pop("TDRL", None),
ti.pop("year", None),
),
"bpm": nums.safe_int(
iterables.safe_unpack(mu.pop("fBPM", None), mu.pop("bpm", None), mu.pop("bpm_start", None))
),
"genre": strings.combine(mu.get("genre"), ti.get("genre"), mu.get("albumgenre")),
"year": strings.combine(
mu.get("originalyear"),
mu.get("TDOR"),
mu.get("TORY"),
mu.get("date"),
mu.get("TDRC"),
mu.get("TDRL"),
ti.get("year"),
"key": iterables.safe_unpack(
mu.pop("TIT1", None), mu.pop("key", None), mu.pop("TKEY", None), mu.pop("key_start", None)
),
"bpm": nums.safe_int(iterables.safe_unpack(mu.get("fBPM"), mu.get("bpm"), mu.get("bpm_start"))),
"key": iterables.safe_unpack(mu.get("TIT1"), mu.get("key"), mu.get("TKEY"), mu.get("key_start")),
"decade": iterables.safe_unpack(mu.get("Songs-DB_Custom1")),
"categories": iterables.safe_unpack(mu.get("Songs-DB_Custom2")),
"city": iterables.safe_unpack(mu.get("Songs-DB_Custom3")),
"decade": iterables.safe_unpack(mu.pop("Songs-DB_Custom1", None)),
"categories": iterables.safe_unpack(mu.pop("Songs-DB_Custom2", None)),
"city": iterables.safe_unpack(mu.pop("Songs-DB_Custom3", None)),
"country": strings.combine(
mu.get("Songs-DB_Custom4"),
mu.get("MusicBrainz Album Release Country"),
mu.get("musicbrainz album release country"),
mu.get("language"),
mu.pop("Songs-DB_Custom4", None),
mu.pop("MusicBrainz Album Release Country", None),
mu.pop("musicbrainz album release country", None),
mu.pop("language", None),
),
"description": strings.combine(
mu.get("description"),
mu.get("lyrics"),
ti.get("comment"),
mu.pop("description", None),
mu.pop("synopsis", None),
mu.pop("lyrics", None),
mu.pop("publisher", None),
mu.pop("comment", None),
ti.pop("comment", None),
),
"album": iterables.safe_unpack(ti.get("album"), mu.get("album")),
"title": iterables.safe_unpack(ti.get("title"), mu.get("title")),
"album": iterables.safe_unpack(ti.pop("album", None), mu.pop("album", None)),
"title": iterables.safe_unpack(ti.pop("title", None), mu.pop("title", None)),
"artist": strings.combine(
ti.get("artist"),
mu.get("artist"),
mu.get("artists"),
ti.get("albumartist"),
ti.get("composer"),
ti.pop("artist", None),
mu.pop("artist", None),
mu.pop("artists", None),
ti.pop("albumartist", None),
ti.pop("composer", None),
),
}

# print(mutagen)
# breakpoint()
mu = {k: v for k, v in mu.items() if not (k in consts.MEDIA_KNOWN_KEYS or v is None)}
if mu != {}:
log.debug("Extra av data %s", mu)
# breakpoint()

return tags

Expand Down Expand Up @@ -126,11 +137,15 @@ def munge_av_tags(args, media) -> dict:
raise e
elif e.errno == 5: # IO Error
raise e
raise e
raise
except processes.UnplayableFile as e:
log.error(f"Failed reading header. {path}")
log.debug(e)
if getattr(args, "delete_unplayable", False) and not file_utils.is_file_open(path):
if (
getattr(args, "delete_unplayable", False)
and not path.startswith("http")
and not file_utils.is_file_open(path)
):
file_utils.trash(args, path, detach=False)
media["time_deleted"] = consts.APPLICATION_START
media["error"] = "Metadata check failed"
Expand Down Expand Up @@ -193,26 +208,11 @@ def munge_av_tags(args, media) -> dict:

tags = format_.pop("tags", None)
if tags:
upload_date = tags.get("DATE")
upload_time = None
if upload_date:
try:
upload_time = nums.to_timestamp(datetime.strptime(upload_date, "%Y%m%d"))
except Exception:
upload_time = None

tags = objects.dict_filter_bool(
{
"title": tags.get("title"),
"webpath": tags.get("PURL"),
"description": strings.combine(
tags.get("DESCRIPTION"),
tags.get("SYNOPSIS"),
tags.get("ARTIST"),
tags.get("COMMENT"),
tags.get("comment"),
),
"time_uploaded": upload_time,
"title": tags.pop("title", None),
"webpath": tags.pop("PURL", None),
**{k: v for k, v in parse_tags(tags, tags).items() if v},
},
)

Expand Down Expand Up @@ -275,8 +275,9 @@ def munge_av_tags(args, media) -> dict:
if objects.is_profile(args, DBType.video):
video_tags = get_subtitle_tags(path, streams, scan_subtitles=getattr(args, "scan_subtitles", False))
media = {**media, **video_tags}
elif objects.is_profile(args, DBType.audio):
elif objects.is_profile(args, DBType.audio) and not str(path).startswith("http"):
stream_tags = get_audio_tags(path)
stream_tags = {k: v for k, v in stream_tags.items() if v}
media = {**media, **stream_tags}

return media
30 changes: 19 additions & 11 deletions library/createdb/web_add.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def parse_args(action, **kwargs):
arggroups.selenium_post(args)

if not args.profiles:
if args.size:
if args.sizes:
args.profiles = [DBType.filesystem]
else:
args.profiles = []
Expand Down Expand Up @@ -206,31 +206,39 @@ def spider(args, paths: list):
media = [consolidate_media(args, k) | (v or {}) for k, v in new_paths.items()]
new_media_count += len(media)

# get basic metadata
if DBType.filesystem in args.profiles or args.hash:
with concurrent.futures.ThreadPoolExecutor(max_workers=args.threads) as executor:
enriched_media = []
with concurrent.futures.ThreadPoolExecutor(
max_workers=1 if args.verbose >= consts.LOG_DEBUG else args.threads
) as executor:
gen_media = (f.result() for f in [executor.submit(add_basic_metadata, args, m) for m in media])
for i, m in enumerate(gen_media):
media[i] = m
enriched_media.append(m)
printing.print_overwrite(
f"Pages to scan {len(paths)} link scan: {new_media_count} new [{len(known_paths)} known]; basic metadata {i + 1} of {len(media)}"
)
media = enriched_media
if media:
add_media(args, media)

# get extra_metadata
if args.sizes:
extra_metadata = [d for d in media if d.get("size") is None or args.sizes(d["size"])]
else:
extra_metadata = media
media = [d for d in media if d.get("size") is None or args.sizes(d["size"])]

with concurrent.futures.ThreadPoolExecutor(max_workers=args.threads) as executor:
gen_media = (f.result() for f in [executor.submit(add_extra_metadata, args, m) for m in extra_metadata])
enriched_media = []
with concurrent.futures.ThreadPoolExecutor(
max_workers=1 if args.verbose >= consts.LOG_DEBUG else args.threads
) as executor:
gen_media = (f.result() for f in [executor.submit(add_extra_metadata, args, m) for m in media])
for i, m in enumerate(gen_media):
extra_metadata[i] = m
enriched_media.append(m)
printing.print_overwrite(
f"Pages to scan {len(paths)} link scan: {new_media_count} new [{len(known_paths)} known]; extra metadata {i + 1} of {len(media)}"
)
if extra_metadata:
add_media(args, extra_metadata)
media = enriched_media
if media:
add_media(args, media)

printing.print_overwrite(
f"Pages to scan {len(paths)} link scan: {new_media_count} new [{len(known_paths)} known]"
Expand Down
2 changes: 1 addition & 1 deletion library/playback/torrents_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ def shorten(s, width):
qbt_client = torrents_start.start_qBittorrent(args)
torrents = qbt_client.torrents_info()

error_torrents = [t for t in torrents if t.state in ["missingFiles", "error"]]
error_torrents = [t for t in torrents if t.state_enum.is_errored]
error_torrents = sorted(
error_torrents, key=lambda t: (t.amount_left == t.total_size, t.eta, t.amount_left), reverse=True
)
Expand Down
3 changes: 2 additions & 1 deletion library/usage.py
Original file line number Diff line number Diff line change
Expand Up @@ -1949,6 +1949,7 @@ def play(action) -> str:
Stop incomplete downloads
library torrents --time-unseeded=+90days --time-active=+60days --time-stalled=+30days --stop
library torrents --time-active=+45days --inactive --progress=0 --stop
Move files
Expand All @@ -1964,7 +1965,7 @@ def play(action) -> str:
When --mark-deleted is provided, the torrents are tagged with 'delete' in qBittorrent
When --delete-rows is provided, the metadata is removed from qBittorrent
When --delete-files is provided, the downloaded files are deleted
When --delete-incomplete 80%% is provided, any files that were downloaded less than 80%% are deleted
When --delete-incomplete 80% is provided, any files that were downloaded less than 80% are deleted
"""


Expand Down
14 changes: 13 additions & 1 deletion library/utils/date_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,18 @@
from library.utils import iterables, nums


def specific_date(*dates):
valid_dates = [dateutil.parser.parse(s, fuzzy=True) for s in dates if s]
past_dates = [d for d in valid_dates if d < datetime.datetime.now()]
if not past_dates:
return None

earliest_specific_date = sorted(
past_dates, key=lambda d: (bool(d.month), bool(d.day), -d.timestamp()), reverse=True
)[0]
return nums.to_timestamp(earliest_specific_date)


def tube_date(v):
upload_date = iterables.safe_unpack(
v.pop("release_date", None),
Expand All @@ -23,7 +35,7 @@ def tube_date(v):
upload_date = nums.to_timestamp(upload_date)
else:
try:
upload_date = nums.to_timestamp(dateutil.parser.parse(upload_date))
upload_date = nums.to_timestamp(dateutil.parser.parse(str(upload_date)))
except Exception:
upload_date = None
return upload_date
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ deluxe = [
"pdf2image",
"pillow",
"PyExifTool",
"pymcdm<1.3", # dominant_alts.size > 0 ValueError should be a warning instead...
"pymcdm",
"pyvirtualdisplay",
"qbittorrent-api",
"scikit-learn",
Expand Down

0 comments on commit fe0230e

Please sign in to comment.