+
+
+
THE ARCHIVE OF ALL EPISODES OF THE PODCAST + some extra content
+
+
+
+ """ # noqa: E501,B950
+ requests_mock.get(conf.ARCHIVE_URL, text=fake_html)
+ result = run_cli_with_args(["parse"])
+ # assert "[ERROR]:" in result.output
+ assert "[ERROR]: No episode links on archive page" in result.output
+ assert f"\t{conf.ARCHIVE_URL}" in result.output
+ assert "Can't parse any episodes. Exit." in result.output
assert result.exit_code == 0
@@ -93,8 +123,8 @@ def test_parse_json_db_does_not_contain_episodes_in_plain_str(
assert "[WARNING]" in result.output
assert f"({conf.JSON_DB_URL})" in result.output
assert "has no valid episode objects" in result.output
- assert "JSON is available, but" in result.output
- assert "there are NO episode in this file. Exit." in result.output
+ assert "\tJSON is available, but" in result.output
+ assert "there are NO episodes in this file. Exit." in result.output
assert result.exit_code == 0
@@ -121,8 +151,8 @@ def test_parse_json_db_invalid_document(
assert "[ERROR]" in result.output
assert "Data is not a valid JSON document" in result.output
assert f"URL: {conf.JSON_DB_URL}" in result.output
- assert "JSON is available, but " in result.output
- assert "there are NO episode in this file. Exit." in result.output
+ assert "\tJSON is available, but" in result.output
+ assert "there are NO episodes in this file. Exit." in result.output
assert result.exit_code == 0
diff --git a/tests/test_downloader.py b/tests/test_downloader.py
index 97ab88d..9f9bdeb 100644
--- a/tests/test_downloader.py
+++ b/tests/test_downloader.py
@@ -27,78 +27,81 @@
from pytest import CaptureFixture
from requests_mock.mocker import Mocker as rm_Mocker
-from lep_downloader import data_getter
+from lep_downloader import config as conf
from lep_downloader import downloader
+from lep_downloader.downloader import ATrack
+from lep_downloader.downloader import Audio
+from lep_downloader.downloader import LepDL
+from lep_downloader.downloader import LepFile
+from lep_downloader.downloader import LepFileList
+from lep_downloader.downloader import PagePDF
+from lep_downloader.lep import Lep
from lep_downloader.lep import LepEpisode
+from lep_downloader.lep import LepEpisodeList
def test_selecting_only_audio_episodes(
only_audio_episodes: List[LepEpisode],
) -> None:
"""It returns filtered list with only audio episodes."""
- assert len(only_audio_episodes) == 15
+ assert len(only_audio_episodes) == 14 # Without duplicates
def test_extracting_audio_data(
- only_audio_episodes: List[LepEpisode],
+ only_audio_episodes: LepEpisodeList,
+ lep_dl: LepDL,
) -> None:
- """It returns list of tuples with audio data."""
- expected_ep = (
- "2009-10-19",
- "15. Extra Podcast – 12 Phrasal Verbs", # noqa: E501,B950 # dash as Unicode character here.
- [
- [
- "http://traffic.libsyn.com/teacherluke/15-extra-podcast-12-phrasal-verbs.mp3" # noqa: E501,B950
- ]
- ],
- False,
+ """It returns list of Audio files."""
+ expected_audio = Audio(
+ ep_id=2009101908, # many posts in that day
+ name="15. Extra Podcast – 12 Phrasal Verbs",
+ short_date="2009-10-19",
+ filename="[2009-10-19] # 15. Extra Podcast – 12 Phrasal Verbs",
+ primary_url="http://traffic.libsyn.com/teacherluke/15-extra-podcast-12-phrasal-verbs.mp3", # noqa: E501,B950
)
- audio_data = downloader.get_audios_data(only_audio_episodes)
- assert audio_data[1] == expected_ep
+ lep_dl.files = downloader.gather_all_files(only_audio_episodes)
+ audio_files = lep_dl.files.filter_by_type(Audio)
+ assert audio_files[1] == expected_audio
def test_forming_multipart_download_links(
- only_audio_links: downloader.NamesWithAudios,
+ only_audio_links: List[Tuple[str, str]],
) -> None:
- """It returns list of URLs with titles for files."""
+ """It returns list of URLs with titles for multipart episode."""
excepted_link = (
- "[2017-03-11] # LEP on ZEP – My recent interview on Zdenek’s English Podcast [Part 02]", # noqa: E501,B950
- [
- "https://audioboom.com/posts/5621870-episode-167-luke-back-on-zep-part-2.mp3", # noqa: E501,B950
- ],
+ "[2017-03-11] # LEP on ZEP – My recent interview on Zdenek’s English Podcast [Part 02].mp3", # noqa: E501,B950
+ "https://audioboom.com/posts/5621870-episode-167-luke-back-on-zep-part-2.mp3", # noqa: E501,B950
)
- assert only_audio_links[11] == excepted_link
+ assert only_audio_links[10] == excepted_link
def test_forming_numbered_download_link(
- only_audio_links: downloader.NamesWithAudios,
+ only_audio_links: List[Tuple[str, str]],
) -> None:
"""It returns list of URLs with titles for files."""
excepted_link = (
- "[2021-02-03] # 703. Walaa from Syria – WISBOLEP Competition Winner",
- [
- "https://traffic.libsyn.com/secure/teacherluke/703._Walaa_from_Syria_-_WISBOLEP_Competition_Winner_.mp3", # noqa: E501,B950
- ],
+ "[2021-02-03] # 703. Walaa from Syria – WISBOLEP Competition Winner.mp3",
+ "https://traffic.libsyn.com/secure/teacherluke/703._Walaa_from_Syria_-_WISBOLEP_Competition_Winner_.mp3", # noqa: E501,B950
)
- assert only_audio_links[15] == excepted_link
+ assert only_audio_links[14] == excepted_link
def test_forming_safe_filename_for_downloading(
- only_audio_links: downloader.NamesWithAudios,
+ only_audio_links: List[Tuple[str, str]],
) -> None:
"""It replaces invalid path characters with '_'."""
excepted_link = (
- "[2016-08-07] # 370. In Conversation with Rob Ager from Liverpool (PART 1_ Life in Liverpool _ Interest in Film Analysis)", # noqa: E501,B950
- [
- "http://traffic.libsyn.com/teacherluke/370-in-conversation-with-rob-ager-from-liverpool-part-1-life-in-liverpool-interest-in-film-analysis.mp3", # noqa: E501,B950
- ],
+ "[2016-08-07] # 370. In Conversation with Rob Ager from Liverpool (PART 1_ Life in Liverpool _ Interest in Film Analysis).mp3", # noqa: E501,B950
+ "http://traffic.libsyn.com/teacherluke/370-in-conversation-with-rob-ager-from-liverpool-part-1-life-in-liverpool-interest-in-film-analysis.mp3", # noqa: E501,B950
)
- assert only_audio_links[9] == excepted_link
+ assert only_audio_links[8] == excepted_link
def test_separating_existing_and_non_existing_mp3(
- only_audio_links: downloader.NamesWithAudios,
+ requests_mock: rm_Mocker,
+ json_db_mock: str,
tmp_path: Path,
+ lep_dl: LepDL,
) -> None:
"""It detects when file has already been downloaded."""
filename_1 = "[2021-08-03] # 733. A Summer Ramble.mp3"
@@ -106,16 +109,22 @@ def test_separating_existing_and_non_existing_mp3(
Path(tmp_path / filename_1).write_text("Here are mp3 1 bytes")
Path(tmp_path / filename_2).write_text("Here are mp3 2 bytes")
- existing, non_existing = downloader.detect_existing_files(
- only_audio_links,
- tmp_path,
+ requests_mock.get(
+ conf.JSON_DB_URL,
+ text=json_db_mock,
)
- assert len(existing) == 2
- assert len(non_existing) == 17
+ lep_dl.use_or_get_db_episodes()
+ lep_dl.files = downloader.gather_all_files(lep_dl.db_episodes)
+ audio_files = lep_dl.files.filter_by_type(Audio)
+ lep_dl.detach_existed_files(tmp_path, audio_files)
+ assert len(lep_dl.existed) == 2
+ assert len(lep_dl.non_existed) == 16
-def test_retrieving_audios_as_none() -> None:
- """It replaces None to empty list."""
+def test_retrieving_audios_as_none(
+ lep_dl: LepDL,
+) -> None:
+ """It sets None to empty list and skip it."""
json_test = """\
[
{
@@ -124,16 +133,22 @@ def test_retrieving_audios_as_none() -> None:
"url": "https://teacherluke.co.uk/2009/04/15/episode-3-musicthe-beatles/",
"post_title": "3. Music/The Beatles",
"post_type": "",
- "audios": null,
- "parsing_utc": "2021-10-14T07:35:24.575575Z",
+ "files": {
+ "audios": null,
+ "page_pdf": []
+ },
+ "parsed_at": "2021-10-14T07:35:24.575575Z",
"index": 2009041501,
"admin_note": "Edge case - null in 'audios'"
}
]
""" # noqa: E501,B950
- db_episodes = data_getter.get_list_of_valid_episodes(json_test)
- audio_data = downloader.get_audios_data(db_episodes)
- assert audio_data[0][2] == []
+ db_episodes = Lep.extract_only_valid_episodes(json_test)
+ db_episodes[0].files["audios"] = None
+ # Check that 'empty' files (lists) are ignored.
+ lep_dl.files = downloader.gather_all_files(db_episodes)
+ assert len(lep_dl.files) == 1
+ assert isinstance(lep_dl.files[0], PagePDF)
def test_downloading_mocked_mp3_files(
@@ -141,20 +156,17 @@ def test_downloading_mocked_mp3_files(
mp3_file1_mock: bytes,
mp3_file2_mock: bytes,
tmp_path: Path,
+ lep_dl: LepDL,
) -> None:
"""It downloads file on disc."""
- test_downloads: List[Tuple[str, List[str]]] = []
- file_1 = (
- "Test File #1",
- [
- "https://traffic.libsyn.com/secure/teacherluke/733._A_Summer_Ramble.mp3" # noqa: E501,B950
- ],
+ test_downloads: LepFileList = LepFileList()
+ file_1 = LepFile(
+ filename="Test File #1.mp3",
+ primary_url="https://traffic.libsyn.com/secure/teacherluke/733._A_Summer_Ramble.mp3", # noqa: E501,B950
)
- file_2 = (
- "Test File #2",
- [
- "https://audioboom.com/posts/5678762-episode-169-luke-back-on-zep-part-4.mp3" # noqa: E501,B950
- ],
+ file_2 = LepFile(
+ filename="Test File #2.mp3",
+ primary_url="https://audioboom.com/posts/5678762-episode-169-luke-back-on-zep-part-4.mp3", # noqa: E501,B950
)
test_downloads.append(file_1)
test_downloads.append(file_2)
@@ -168,53 +180,15 @@ def test_downloading_mocked_mp3_files(
content=mp3_file2_mock,
)
- downloader.download_files(test_downloads, tmp_path)
+ lep_dl.non_existed = test_downloads
+ lep_dl.download_files(tmp_path)
expected_file_1 = tmp_path / "Test File #1.mp3"
expected_file_2 = tmp_path / "Test File #2.mp3"
assert expected_file_1.exists()
assert 21460 < expected_file_1.stat().st_size < 22000
assert expected_file_2.exists()
assert 18300 < expected_file_2.stat().st_size < 18350
- assert len(downloader.successful_downloaded) == 2
-
-
-def test_skipping_downloaded_url(
- requests_mock: rm_Mocker,
- mp3_file1_mock: bytes,
- mp3_file2_mock: bytes,
- tmp_path: Path,
-) -> None:
- """It skips URL if it was downloaded before."""
- test_downloads: List[Tuple[str, List[str]]] = []
- file_1 = (
- "Test File #1",
- [
- "http://traffic.libsyn.com/teacherluke/36-london-video-interviews-pt-1-audio-only.mp3" # noqa: E501,B950
- ],
- )
- file_2 = (
- "Test File #2",
- [
- "http://traffic.libsyn.com/teacherluke/36-london-video-interviews-pt-1-audio-only.mp3" # noqa: E501,B950
- ],
- )
- test_downloads.append(file_1)
- test_downloads.append(file_2)
-
- requests_mock.get(
- "http://traffic.libsyn.com/teacherluke/36-london-video-interviews-pt-1-audio-only.mp3", # noqa: E501,B950
- content=mp3_file1_mock,
- )
- requests_mock.get(
- "http://traffic.libsyn.com/teacherluke/36-london-video-interviews-pt-1-audio-only.mp3", # noqa: E501,B950
- content=mp3_file2_mock,
- )
-
- downloader.download_files(test_downloads, tmp_path)
- expected_file_1 = tmp_path / "Test File #1.mp3"
- assert expected_file_1.exists()
- assert len(list(tmp_path.iterdir())) == 1
- assert len(downloader.duplicated_links) == 1
+ assert len(lep_dl.downloaded) == 2
def test_skipping_downloaded_file_on_disc(
@@ -222,22 +196,19 @@ def test_skipping_downloaded_file_on_disc(
mp3_file1_mock: bytes,
mp3_file2_mock: bytes,
tmp_path: Path,
+ lep_dl: LepDL,
) -> None:
"""It skips (and does not override) URL if file was downloaded before."""
- downloader.successful_downloaded = {} # Clear from previous tests
- test_downloads: List[Tuple[str, List[str]]] = []
- file_1 = (
- "Test File #1",
- [
- "http://traffic.libsyn.com/teacherluke/36-london-video-interviews-pt-1-audio-only.mp3" # noqa: E501,B950
- ],
+ test_downloads: LepFileList = LepFileList()
+ file_1 = LepFile(
+ filename="Test File #1.mp3",
+ primary_url="http://traffic.libsyn.com/teacherluke/36-london-video-interviews-pt-1-audio-only.mp3", # noqa: E501,B950
)
- file_2 = (
- "Test File #2",
- [
- "https://traffic.libsyn.com/secure/teacherluke/733._A_Summer_Ramble.mp3" # noqa: E501,B950
- ],
+ file_2 = LepFile(
+ filename="Test File #2.mp3",
+ primary_url="https://traffic.libsyn.com/secure/teacherluke/733._A_Summer_Ramble.mp3", # noqa: E501,B950
)
+
test_downloads.append(file_1)
test_downloads.append(file_2)
@@ -250,31 +221,31 @@ def test_skipping_downloaded_file_on_disc(
content=mp3_file2_mock,
)
+ lep_dl.files = test_downloads
+ lep_dl.detach_existed_files(tmp_path)
existing_file_1 = tmp_path / "Test File #1.mp3"
existing_file_1.write_text("Here are mp3 1 bytes")
- downloader.download_files(test_downloads, tmp_path)
+ lep_dl.download_files(tmp_path)
expected_file_2 = tmp_path / "Test File #2.mp3"
assert existing_file_1.read_text() == "Here are mp3 1 bytes"
assert expected_file_2.exists()
assert len(list(tmp_path.iterdir())) == 2
- assert len(downloader.already_on_disc) == 1
+ assert len(lep_dl.existed) == 1
def test_try_auxiliary_download_links(
requests_mock: rm_Mocker,
mp3_file1_mock: bytes,
tmp_path: Path,
+ lep_dl: LepDL,
) -> None:
"""It downloads file by auxiliary link."""
- downloader.successful_downloaded = {} # Clear from previous tests
- test_downloads: List[Tuple[str, List[str]]] = []
- file_1 = (
- "Test File #1",
- [
- "https://traffic.libsyn.com/secure/teacherluke/733._A_Summer_Ramble.mp3", # noqa: E501,B950
- "https://hotenov.com/d/lep/some_auxiliary_1.mp3",
- "https://hotenov.com/d/lep/some_auxiliary_2.mp3",
- ],
+ test_downloads: LepFileList = LepFileList()
+ file_1 = LepFile(
+ filename="Test File #1.mp3",
+ primary_url="https://traffic.libsyn.com/secure/teacherluke/733._A_Summer_Ramble.mp3", # noqa: E501,B950
+ secondary_url="https://hotenov.com/d/lep/some_auxiliary_1.mp3",
+ tertiary_url="https://hotenov.com/d/lep/some_auxiliary_2.mp3",
)
test_downloads.append(file_1)
@@ -293,27 +264,26 @@ def test_try_auxiliary_download_links(
content=mp3_file1_mock,
)
- downloader.download_files(test_downloads, tmp_path)
+ lep_dl.files = test_downloads
+ lep_dl.detach_existed_files(tmp_path)
+ lep_dl.download_files(tmp_path)
expected_file_1 = tmp_path / "Test File #1.mp3"
assert expected_file_1.exists()
assert len(list(tmp_path.iterdir())) == 1
- assert len(downloader.successful_downloaded) == 1
+ assert len(lep_dl.downloaded) == 1
def test_primary_link_unavailable(
requests_mock: rm_Mocker,
tmp_path: Path,
capsys: CaptureFixture[str],
+ lep_dl: LepDL,
) -> None:
"""It records unavailable file and prints about that."""
- downloader.successful_downloaded = {} # Clear from previous tests
- downloader.unavailable_links = {}
- test_downloads: List[Tuple[str, List[str]]] = []
- file_1 = (
- "Test File #1",
- [
- "https://traffic.libsyn.com/secure/teacherluke/733._A_Summer_Ramble.mp3", # noqa: E501,B950
- ],
+ test_downloads: LepFileList = LepFileList()
+ file_1 = LepFile(
+ filename="Test File #1.mp3",
+ primary_url="https://traffic.libsyn.com/secure/teacherluke/733._A_Summer_Ramble.mp3", # noqa: E501,B950
)
test_downloads.append(file_1)
@@ -322,11 +292,13 @@ def test_primary_link_unavailable(
exc=Exception("Something wrong!"),
)
- downloader.download_files(test_downloads, tmp_path)
+ lep_dl.files = test_downloads
+ lep_dl.detach_existed_files(tmp_path)
+ lep_dl.download_files(tmp_path)
captured = capsys.readouterr()
assert len(list(tmp_path.iterdir())) == 0
- assert len(downloader.successful_downloaded) == 0
- assert len(downloader.unavailable_links) == 1
+ assert len(lep_dl.downloaded) == 0
+ assert len(lep_dl.not_found) == 1
assert "[ERROR]: Unknown error:" in captured.out
assert "Something wrong!" in captured.out
assert "[INFO]: Can't download:" in captured.out
@@ -337,18 +309,16 @@ def test_both_primary_and_auxiliary_links_404(
requests_mock: rm_Mocker,
tmp_path: Path,
capsys: CaptureFixture[str],
+ lep_dl: LepDL,
) -> None:
"""It records unavailable files and prints about that."""
- downloader.successful_downloaded = {} # Clear from previous tests
- downloader.unavailable_links = {}
- test_downloads: List[Tuple[str, List[str]]] = []
- file_1 = (
- "Test File #1",
- [
- "https://traffic.libsyn.com/secure/teacherluke/733._A_Summer_Ramble.mp3", # noqa: E501,B950
- "https://hotenov.com/d/lep/some_auxiliary_1.mp3",
- ],
+ test_downloads: LepFileList = LepFileList()
+ file_1 = LepFile(
+ filename="Test File #1.mp3",
+ primary_url="https://traffic.libsyn.com/secure/teacherluke/733._A_Summer_Ramble.mp3", # noqa: E501,B950
+ secondary_url="https://hotenov.com/d/lep/some_auxiliary_1.mp3",
)
+
test_downloads.append(file_1)
requests_mock.get(
@@ -362,10 +332,317 @@ def test_both_primary_and_auxiliary_links_404(
status_code=404,
)
- downloader.download_files(test_downloads, tmp_path)
+ lep_dl.files = test_downloads
+ lep_dl.detach_existed_files(tmp_path, lep_dl.files)
+ lep_dl.download_files(tmp_path)
captured = capsys.readouterr()
assert len(list(tmp_path.iterdir())) == 0
- assert len(downloader.successful_downloaded) == 0
- assert len(downloader.unavailable_links) == 1
+ assert len(lep_dl.downloaded) == 0
+ assert len(lep_dl.not_found) == 1
assert "[INFO]: Can't download:" in captured.out
assert "Test File #1.mp3" in captured.out
+
+
+def test_gathering_audio_files(
+ requests_mock: rm_Mocker,
+ json_db_mock: str,
+ lep_dl: LepDL,
+) -> None:
+ """It gets all audio files from mocked episodes."""
+ requests_mock.get(
+ conf.JSON_DB_URL,
+ text=json_db_mock,
+ )
+ lep_dl.use_or_get_db_episodes()
+ lep_dl.files = downloader.gather_all_files(lep_dl.db_episodes)
+ audio_files = lep_dl.files.filter_by_type(Audio)
+ assert len(audio_files) == 18
+
+
+def test_collecting_auxiliary_audio_links(
+ lep_dl: LepDL,
+) -> None:
+ """It collects secondary and tertiary links as well."""
+ json_test = """\
+ [
+ {
+ "episode": 3,
+ "date": "2000-01-01T00:00:00+00:00",
+ "url": "https://teacherluke.co.uk/2009/04/15/episode-3-musicthe-beatles/",
+ "post_title": "3. Music/The Beatles",
+ "post_type": "",
+ "files": {
+ "audios": [
+ [
+ "https://someurl1.local", "https://someurl2.local", "https://someurl3.local"
+ ],
+ [
+ "https://part2-someurl1.local", "https://part2-someurl2.local"
+ ]
+ ],
+ "page_pdf": []
+ },
+ "parsed_at": "2021-10-14T07:35:24.575575Z",
+ "index": 2009041501,
+ "admin_note": "Edge case - null in 'audios'"
+ }
+ ]
+ """ # noqa: E501,B950
+ db_episodes = Lep.extract_only_valid_episodes(json_test)
+ lep_dl.files = downloader.gather_all_files(db_episodes)
+ assert len(lep_dl.files) == 3
+ assert lep_dl.files[0].secondary_url == "https://someurl2.local"
+ assert lep_dl.files[0].tertiary_url == "https://someurl3.local"
+ assert lep_dl.files[1].secondary_url == "https://part2-someurl2.local"
+
+
+def test_using_db_episodes_after_parsing(
+ lep_dl: LepDL,
+) -> None:
+ """It uses database episodes retrieved during parsing stage."""
+ ep_1 = LepEpisode()
+ ep_1.index = 2022011101
+ ep_1.episode = 888
+ ep_1.post_title = "888. Some title."
+ ep_1._short_date = "2022-01-11"
+ ep_1.post_type = "AUDIO"
+ ep_1.files = {
+ "audios": [
+ [
+ "https://someurl1.local",
+ "https://someurl2.local",
+ "https://someurl3.local",
+ ]
+ ],
+ "page_pdf": [],
+ }
+ lep_dl.db_episodes.append(ep_1)
+ lep_dl.use_or_get_db_episodes()
+ lep_dl.files = downloader.gather_all_files(lep_dl.db_episodes)
+ assert len(lep_dl.files) == 2 # + 1 PDF file
+ assert lep_dl.files[0].primary_url == "https://someurl1.local"
+ assert lep_dl.files[0].filename == "[2022-01-11] # 888. Some title..mp3"
+
+
+def test_no_valid_episodes_in_database(
+ requests_mock: rm_Mocker,
+ lep_dl: LepDL,
+) -> None:
+ """It raises exception if there are no valid episodes in JSON file."""
+ json_test = """\
+ [
+ {
+ "episode_number": 666,
+ "date": "2000-01-01T00:00:00+00:00",
+ "url": "https://teacherluke.co.uk/2009/04/15/episode-3-musicthe-beatles/",
+ }
+ ]
+ """ # noqa: E501,B950
+ requests_mock.get(
+ conf.JSON_DB_URL,
+ text=json_test,
+ )
+ lep_dl.use_or_get_db_episodes()
+ lep_dl.files = downloader.gather_all_files(lep_dl.db_episodes)
+ assert len(lep_dl.files) == 0
+ # assert "No episodes for gathering files. Exit." in ex.value.args[0]
+
+
+def test_populating_secondary_url(
+ lep_dl: LepDL,
+) -> None:
+ """It populates secondary links for empty values."""
+ json_test = """\
+ [
+ {
+ "episode": 3,
+ "date": "2000-01-01T00:00:00+00:00",
+ "url": "https://teacherluke.co.uk/2009/04/15/episode-3-musicthe-beatles/",
+ "post_title": "3. Music/The Beatles",
+ "post_type": "",
+ "files": {
+ "audios": [
+ [
+ "https://someurl1.local", "", "https://someurl3.local"
+ ],
+ [
+ "https://part2-someurl1.local", "https://part2-someurl2.local"
+ ]
+ ],
+ "page_pdf": []
+ },
+ "parsed_at": "2021-10-14T07:35:24.575575Z",
+ "index": 2009041501,
+ "admin_note": ""
+ },
+ {
+ "episode": 135,
+ "date": "2013-06-17T00:00:00+00:00",
+ "url": "https://teacherluke.co.uk/2013/06/17/episode-3-musicthe-beatles/",
+ "post_title": "135. Raining Animals",
+ "post_type": "",
+ "files": {
+ "audios": [
+ [
+ "https://someurl1.local135"
+ ]
+ ],
+ "page_pdf": []
+ },
+ "parsed_at": "2022-01-12T13:50:24.575575Z",
+ "index": 2009041501,
+ "admin_note": "default url for PDF"
+ }
+ ]
+ """ # noqa: E501,B950
+ db_episodes = Lep.extract_only_valid_episodes(json_test)
+ lep_dl.files = downloader.gather_all_files(db_episodes)
+ lep_dl.populate_default_url()
+ assert len(lep_dl.files) == 5
+ assert (
+ lep_dl.files[0].secondary_url
+ == "https://hotenov.com/d/lep/%5B2013-06-17%5D%20%23%20135.%20Raining%C2%A0Animals.mp3" # noqa: E501,B950
+ )
+ assert (
+ lep_dl.files[2].secondary_url
+ == "https://hotenov.com/d/lep/%5B2000-01-01%5D%20%23%203.%20Music/The%20Beatles%20%5BPart%2001%5D.mp3" # noqa: E501,B950
+ )
+ assert lep_dl.files[2].tertiary_url == "https://someurl3.local"
+ assert lep_dl.files[3].secondary_url == "https://part2-someurl2.local"
+
+
+def test_gathering_page_pdf_urls(
+ lep_dl: LepDL,
+) -> None:
+ """It gatheres pdf links if they are provided in JSON."""
+ json_test = """\
+ [
+ {
+ "episode": 555,
+ "files": {
+ "audios": [],
+ "page_pdf": ["https://someurl555.local"]
+ },
+ "index": 2022011303
+ },
+ {
+ "episode": 554,
+ "files": {
+ "audios": [],
+ "page_pdf": ["https://someurl554.local1", "https://someurl554.local2"]
+ },
+ "index": 2022011302
+ },
+ {
+ "episode": 553,
+ "files": {
+ "audios": [],
+ "page_pdf": [
+ "https://someurl553.local1",
+ "https://someurl553.local2",
+ "https://someurl553.local3"
+ ]
+ },
+ "index": 2022011302
+ }
+ ]
+ """ # noqa: E501,B950
+ db_episodes = Lep.extract_only_valid_episodes(json_test)
+ lep_dl.files = downloader.gather_all_files(db_episodes)
+
+ assert len(lep_dl.files) == 3
+
+ assert lep_dl.files[0].primary_url == "https://someurl553.local1"
+ assert lep_dl.files[0].secondary_url == "https://someurl553.local2"
+ assert lep_dl.files[0].tertiary_url == "https://someurl553.local3"
+
+ assert lep_dl.files[1].primary_url == "https://someurl554.local1"
+ assert lep_dl.files[1].secondary_url == "https://someurl554.local2"
+
+ assert lep_dl.files[2].primary_url == "https://someurl555.local"
+ assert lep_dl.files[2].secondary_url == ""
+
+
+def test_gathering_links_for_audio_track(
+ lep_dl: LepDL,
+) -> None:
+ """It collects URLs for audio track."""
+ json_test = """\
+ [
+ {
+ "episode": 3,
+ "date": "2000-01-01T00:00:00+00:00",
+ "url": "https://teacherluke.co.uk/2009/04/15/episode-3-musicthe-beatles/",
+ "post_title": "3. Music/The Beatles",
+ "post_type": "",
+ "files": {
+ "audios": [],
+ "atrack": [
+ [
+ "https://someurl1.local", "https://someurl2.local", "https://someurl3.local"
+ ]
+ ]
+ },
+ "parsed_at": "2021-10-14T07:35:24.575575Z",
+ "index": 2009041501,
+ "admin_note": "Check audio track."
+ }
+ ]
+ """ # noqa: E501,B950
+ db_episodes = Lep.extract_only_valid_episodes(json_test)
+ lep_dl.files = downloader.gather_all_files(db_episodes)
+ assert len(lep_dl.files) == 2
+ assert lep_dl.files[0].primary_url == "https://someurl1.local"
+ assert lep_dl.files[0].secondary_url == "https://someurl2.local"
+ assert lep_dl.files[0].tertiary_url == "https://someurl3.local"
+ assert isinstance(lep_dl.files[0], ATrack)
+ assert (
+ lep_dl.files[0].filename == "[2000-01-01] # 3. Music/The Beatles _aTrack_.mp3"
+ )
+
+
+def test_gathering_multi_part_audio_track(
+ lep_dl: LepDL,
+) -> None:
+ """It collects multi-part audio track."""
+ json_test = """\
+ [
+ {
+ "episode": 3,
+ "date": "2000-01-01T00:00:00+00:00",
+ "url": "https://teacherluke.co.uk/2009/04/15/episode-3-musicthe-beatles/",
+ "post_title": "3. Music/The Beatles",
+ "post_type": "",
+ "files": {
+ "audios": [],
+ "atrack": [
+ [
+ "https://someurl1.local", "https://someurl2.local", "https://someurl3.local"
+ ],
+ [
+ "https://part2-someurl1.local", "https://part2-someurl2.local"
+ ]
+ ]
+ },
+ "parsed_at": "2021-10-14T07:35:24.575575Z",
+ "index": 2009041501,
+ "admin_note": "Check audio track."
+ }
+ ]
+ """ # noqa: E501,B950
+ db_episodes = Lep.extract_only_valid_episodes(json_test)
+ lep_dl.files = downloader.gather_all_files(db_episodes)
+ assert len(lep_dl.files) == 3
+ assert lep_dl.files[0].secondary_url == "https://someurl2.local"
+ assert lep_dl.files[0].tertiary_url == "https://someurl3.local"
+ assert lep_dl.files[1].secondary_url == "https://part2-someurl2.local"
+ assert isinstance(lep_dl.files[0], ATrack)
+ assert isinstance(lep_dl.files[1], ATrack)
+ assert (
+ lep_dl.files[0].filename
+ == "[2000-01-01] # 3. Music/The Beatles [Part 01] _aTrack_.mp3"
+ )
+ assert (
+ lep_dl.files[1].filename
+ == "[2000-01-01] # 3. Music/The Beatles [Part 02] _aTrack_.mp3"
+ )
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 5b0bd9a..3b67751 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -20,13 +20,16 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
"""Test cases for the parser module."""
+import copy
import json
-import typing as t
+from datetime import datetime
+from datetime import timedelta
+from datetime import timezone
from pathlib import Path
from typing import Callable
+from typing import Dict
from typing import List
from typing import Optional
-from typing import Tuple
import pytest
import requests
@@ -39,9 +42,15 @@
from lep_downloader import config as conf
from lep_downloader import lep
from lep_downloader import parser
-from lep_downloader.data_getter import get_web_page_html_text
+from lep_downloader.exceptions import DataBaseUnavailable
+from lep_downloader.exceptions import NoEpisodeLinksError
+from lep_downloader.exceptions import NoEpisodesInDataBase
+from lep_downloader.exceptions import NotEpisodeURLError
from lep_downloader.lep import as_lep_episode_obj
+from lep_downloader.lep import Lep
from lep_downloader.lep import LepEpisode
+from lep_downloader.lep import LepEpisodeList
+from lep_downloader.parser import Archive
lep_date_format = "%Y-%m-%dT%H:%M:%S%z"
@@ -53,7 +62,7 @@ def test_getting_success_page_response(
) -> None:
"""It gets HTML content as text."""
requests_mock.get(req_mock.ANY, text="Response OK")
- resp = get_web_page_html_text(conf.ARCHIVE_URL, req_ses)[0]
+ resp = Lep.get_web_document(conf.ARCHIVE_URL, req_ses)[0]
assert resp == "Response OK"
@@ -63,7 +72,7 @@ def test_getting_404_page_response(
) -> None:
"""It handles HTTPError if page is not found."""
requests_mock.get(req_mock.ANY, text="Response OK", status_code=404)
- resp = get_web_page_html_text("http://example.com", req_ses)[0]
+ resp = Lep.get_web_document("http://example.com", req_ses)[0]
assert "[ERROR]" in resp
assert "404" in resp
@@ -74,7 +83,7 @@ def test_getting_503_page_response(
) -> None:
"""It handle HTTPError if service is unavailable."""
requests_mock.get(req_mock.ANY, text="Response OK", status_code=503)
- resp = get_web_page_html_text("http://example.com", req_ses)[0]
+ resp = Lep.get_web_document("http://example.com", req_ses)[0]
assert "[ERROR]" in resp
assert "503" in resp
@@ -85,7 +94,7 @@ def test_timeout_error(
) -> None:
"""It handle any Timeout exception for page."""
requests_mock.get(req_mock.ANY, exc=requests.exceptions.Timeout)
- resp = get_web_page_html_text("http://example.com", req_ses)[0]
+ resp = Lep.get_web_document("http://example.com", req_ses)[0]
assert "[ERROR]" in resp
assert "Timeout" in resp
@@ -96,7 +105,7 @@ def test_connection_error(
) -> None:
"""It handles ConnectionError exception for bad request."""
requests_mock.get(req_mock.ANY, exc=requests.exceptions.ConnectionError)
- resp = get_web_page_html_text("http://example.com", req_ses)[0]
+ resp = Lep.get_web_document("http://example.com", req_ses)[0]
assert "[ERROR]" in resp
assert "Bad request" in resp
@@ -107,7 +116,7 @@ def test_unknown_error(
) -> None:
"""It handles any other exceptions during getting response from URL."""
requests_mock.get(req_mock.ANY, exc=Exception("Something Bad"))
- resp = get_web_page_html_text("http://example.com", req_ses)[0]
+ resp = Lep.get_web_document("http://example.com", req_ses)[0]
assert "[ERROR]" in resp
assert "Unhandled error" in resp
@@ -124,7 +133,7 @@ def test_final_location_for_good_redirect(
headers={"Location": "https://final.location/"},
)
requests_mock.get("https://final.location", text="Final location")
- text, final_location, is_url_ok = get_web_page_html_text(
+ text, final_location, is_url_ok = Lep.get_web_document(
"https://re.direct",
req_ses,
)
@@ -149,7 +158,7 @@ def test_final_location_for_bad_redirect(
text="Final location",
status_code=404,
)
- text, final_location, is_url_ok = get_web_page_html_text(
+ text, final_location, is_url_ok = Lep.get_web_document(
"https://re.direct",
req_ses,
)
@@ -159,7 +168,9 @@ def test_final_location_for_bad_redirect(
assert final_location == "https://bad.final.location/"
-def test_retrieve_all_episode_links_from_soup() -> None:
+def test_retrieve_all_episode_links_from_soup(
+ archive: Archive,
+) -> None:
"""It returns only