From fbbb8e34860d7aba4ec1b6b3ae0118e2f751d607 Mon Sep 17 00:00:00 2001 From: Artem Hotenov Date: Tue, 9 Nov 2021 20:18:46 +0300 Subject: [PATCH 01/16] test(parser): :recycle: Add fixtures for returning mock paths and text of mocked archive HTML page --- tests/conftest.py | 51 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) create mode 100644 tests/conftest.py diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..fe56d3d --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,51 @@ +# MIT License +# +# Copyright (c) 2021 Artem Hotenov +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. +"""Package-wide test fixtures.""" +from pathlib import Path + +import pytest + +from lep_downloader import config as conf + + +@pytest.fixture(scope="session") +def mocks_dir_path() -> Path: + """Returns path to 'fixtures' direcory.""" + fixtures_dir = Path( + Path(__file__).resolve().parent, + "fixtures", + ) + return fixtures_dir + + +@pytest.fixture(scope="module") +def html_mocks_path(mocks_dir_path: Path) -> Path: + """Returns path to 'ep_htmls' sub-direcory of mocks.""" + html_dir = mocks_dir_path / "ep_htmls" + return html_dir + + +@pytest.fixture(scope="module") +def archive_page_mock(mocks_dir_path: Path) -> str: + """Returns str object of archive HTML mocked page.""" + page_path = mocks_dir_path / conf.LOCAL_ARCHIVE_HTML + return page_path.read_text(encoding="utf-8") From af8ee3a511f732c7070f05b681e8b940594be161 Mon Sep 17 00:00:00 2001 From: Artem Hotenov Date: Tue, 9 Nov 2021 20:21:58 +0300 Subject: [PATCH 02/16] test(parser): :white_check_mark: Update parser tests with using fixture 'archive_page_mock' instead of module function --- tests/test_parser.py | 53 ++++++++++++++++++++++++++------------------ 1 file changed, 31 insertions(+), 22 deletions(-) diff --git a/tests/test_parser.py b/tests/test_parser.py index 39e36d6..66bbcad 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -232,17 +232,12 @@ def test_short_links_substitution() -> None: assert replaced == expected -def mock_archive_page(request: requests.Request, context: rm_Context) -> t.IO[bytes]: - """Callback for creating mocked Response of archive page.""" - context.status_code = 200 - # resp = io.StringIO() - resp = OFFLINE_HTML_DIR / conf.LOCAL_ARCHIVE_HTML - return open(resp, "rb") - - -def test_parsing_result(requests_mock: rm_Mocker) -> None: +def test_parsing_result( + requests_mock: rm_Mocker, + archive_page_mock: str, +) -> None: """It parses mocked archived page.""" - requests_mock.get(conf.ARCHIVE_URL, body=mock_archive_page) + requests_mock.get(conf.ARCHIVE_URL, text=archive_page_mock) parsing_result = parser.get_archive_parsing_results(conf.ARCHIVE_URL) all_links = parsing_result[0] all_texts = parsing_result[2] @@ -314,9 +309,12 @@ def mock_single_page(request: requests.Request, context: rm_Context) -> t.IO[byt return open(local_path, "rb") -def test_mocking_single_page(requests_mock: rm_Mocker) -> None: +def test_mocking_single_page( + requests_mock: rm_Mocker, + archive_page_mock: str, +) -> None: """It parses mocked episode page.""" - requests_mock.get(conf.ARCHIVE_URL, body=mock_archive_page) + requests_mock.get(conf.ARCHIVE_URL, text=archive_page_mock) parsing_result: t.Tuple[t.List[str], ...] = parser.get_archive_parsing_results( conf.ARCHIVE_URL ) @@ -414,10 +412,13 @@ def test_parsing_non_episode_link(requests_mock: rm_Mocker) -> None: assert episode is None -def test_parsing_links_to_audio_for_mocked_episodes(requests_mock: rm_Mocker) -> None: +def test_parsing_links_to_audio_for_mocked_episodes( + requests_mock: rm_Mocker, + archive_page_mock: str, +) -> None: """It parses links to audio (if they exist).""" # TODO: Complete test (now it's simple copy-paste) - requests_mock.get(conf.ARCHIVE_URL, body=mock_archive_page) + requests_mock.get(conf.ARCHIVE_URL, text=archive_page_mock) parsing_result: t.Tuple[t.List[str], ...] = parser.get_archive_parsing_results( conf.ARCHIVE_URL ) @@ -567,10 +568,11 @@ def mock_json_db(request: requests.Request, context: rm_Context) -> t.IO[bytes]: def test_no_new_episodes_on_archive_vs_json_db( requests_mock: rm_Mocker, + archive_page_mock: str, capsys: CaptureFixture[str], ) -> None: """It prints when no new episodes on archive page.""" - requests_mock.get(conf.ARCHIVE_URL, body=mock_archive_page) + requests_mock.get(conf.ARCHIVE_URL, text=archive_page_mock) requests_mock.get( req_mock.ANY, additional_matcher=mocked_single_page_matcher, @@ -588,10 +590,11 @@ def test_no_new_episodes_on_archive_vs_json_db( def test_no_valid_episode_objects_in_json_db( requests_mock: rm_Mocker, + archive_page_mock: str, capsys: CaptureFixture[str], ) -> None: """It prints warning when there are no valid episode objects.""" - requests_mock.get(conf.ARCHIVE_URL, body=mock_archive_page) + requests_mock.get(conf.ARCHIVE_URL, text=archive_page_mock) requests_mock.get( req_mock.ANY, @@ -614,10 +617,11 @@ def test_no_valid_episode_objects_in_json_db( def test_json_db_not_valid( requests_mock: rm_Mocker, + archive_page_mock: str, capsys: CaptureFixture[str], ) -> None: """It prints error for invalid JSON document.""" - requests_mock.get(conf.ARCHIVE_URL, body=mock_archive_page) + requests_mock.get(conf.ARCHIVE_URL, text=archive_page_mock) requests_mock.get( req_mock.ANY, additional_matcher=mocked_single_page_matcher, @@ -636,10 +640,11 @@ def test_json_db_not_valid( def test_json_db_not_available( requests_mock: rm_Mocker, + archive_page_mock: str, capsys: CaptureFixture[str], ) -> None: """It prints error for unavailable JSON database.""" - requests_mock.get(conf.ARCHIVE_URL, body=mock_archive_page) + requests_mock.get(conf.ARCHIVE_URL, text=archive_page_mock) requests_mock.get( req_mock.ANY, additional_matcher=mocked_single_page_matcher, @@ -658,10 +663,11 @@ def test_json_db_not_available( def test_json_db_contains_only_string( requests_mock: rm_Mocker, + archive_page_mock: str, capsys: CaptureFixture[str], ) -> None: """It prints warning for JSON as str.""" - requests_mock.get(conf.ARCHIVE_URL, body=mock_archive_page) + requests_mock.get(conf.ARCHIVE_URL, text=archive_page_mock) requests_mock.get( req_mock.ANY, additional_matcher=mocked_single_page_matcher, @@ -680,10 +686,11 @@ def test_json_db_contains_only_string( def test_invalid_objects_in_json_not_included( requests_mock: rm_Mocker, + archive_page_mock: str, capsys: CaptureFixture[str], ) -> None: """It skips invalid objects in JSON database.""" - requests_mock.get(conf.ARCHIVE_URL, body=mock_archive_page) + requests_mock.get(conf.ARCHIVE_URL, text=archive_page_mock) requests_mock.get( req_mock.ANY, additional_matcher=mocked_single_page_matcher, @@ -716,9 +723,10 @@ def modified_json_db(request: requests.Request, context: rm_Context) -> str: def test_updating_json_database_with_new_episodes( requests_mock: rm_Mocker, + archive_page_mock: str, ) -> None: """It retrives and saves new episodes from archive.""" - requests_mock.get(conf.ARCHIVE_URL, body=mock_archive_page) + requests_mock.get(conf.ARCHIVE_URL, text=archive_page_mock) requests_mock.get( req_mock.ANY, additional_matcher=mocked_single_page_matcher, @@ -755,10 +763,11 @@ def modified_json_with_extra_episode( def test_updating_json_database_with_extra_episodes( requests_mock: rm_Mocker, + archive_page_mock: str, capsys: CaptureFixture[str], ) -> None: """It prints warning if database contains more episodes than archive.""" - requests_mock.get(conf.ARCHIVE_URL, body=mock_archive_page) + requests_mock.get(conf.ARCHIVE_URL, text=archive_page_mock) requests_mock.get( req_mock.ANY, additional_matcher=mocked_single_page_matcher, From 42ad947274312b595954ef15d532694da21065db Mon Sep 17 00:00:00 2001 From: Artem Hotenov Date: Thu, 11 Nov 2021 09:14:24 +0300 Subject: [PATCH 03/16] test(parser): :white_check_mark: Update tests with new fixtures for parsing single page Move mapping URL - HTML dictionary into conftest.py --- tests/conftest.py | 97 ++++++++++++++++++++++++++++++++++++++ tests/test_parser.py | 108 ++++++++++++++++++------------------------- 2 files changed, 143 insertions(+), 62 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index fe56d3d..105b0ac 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -21,12 +21,71 @@ # SOFTWARE. """Package-wide test fixtures.""" from pathlib import Path +from typing import Callable +from typing import Dict +from typing import List +from typing import Optional import pytest +import requests +from requests_mock.request import _RequestObjectProxy +from requests_mock.response import _Context as rm_Context from lep_downloader import config as conf +# yapf: disable +URL_HTML_MAPPING = { + "https://teacherluke.co.uk/2009/04/12/episode-1-introduction/": + "2021-09-13_05-37-36 teacherluke.co.uk _2009_04_12_episode-1-introduction_.html", # noqa: E501,B950 + "https://teacherluke.co.uk/2009/10/19/extra-podcast-12-phrasal-verbs/": + "2021-09-07_09-14-02 teacherluke.co.uk _2009_10_19_extra-podcast-12-phrasal-verbs_.html", # noqa: E501,B950 + "https://teacherluke.co.uk/2009/10/19/episode-11-michael-jackson/": + "2021-09-07_09-14-02 teacherluke.co.uk _2009_10_19_episode-11-michael-jackson_.html", # noqa: E501,B950 + "https://teacherluke.co.uk/2010/03/25/london-video-interviews-pt-1/": + "2021-09-07_09-14-02 teacherluke.co.uk _2010_03_25_london-video-interviews-pt-1_.html", # noqa: E501,B950 + "http://teacherluke.wordpress.com/2012/09/27/113-setting-the-world-to-rights/": # noqa: E501,B950 + "2021-09-07_09-14-02 teacherluke.wordpress.com _2012_09_27_113-setting-the-world-to-rights_.html", # noqa: E501,B950 + "https://teacherluke.co.uk/2014/06/30/193-culture-shock-life-in-london-pt-2/": # noqa: E501,B950 + "2021-09-07_09-14-02 teacherluke.co.uk _2014_06_30_193-culture-shock-life-in-london-pt-2_.html", # noqa: E501,B950 + "https://teacherluke.co.uk/2015/10/21/304-back-to-the-future-part-1/": + "2021-09-07_09-14-02 teacherluke.co.uk _2015_10_07_300-episode-300-part-1_.html", # noqa: E501,B950 + "https://teacherluke.co.uk/2015/10/22/305-back-to-the-future-part-2/": + "2021-09-07_09-14-02 teacherluke.co.uk _2015_10_07_300-episode-300-part-2_.html", # noqa: E501,B950 + "https://teacherluke.co.uk/2016/08/07/370-in-conversation-with-rob-ager-from-liverpool-part-1-life-in-liverpool-interest-in-film-analysis/": # noqa: E501,B950 + "2021-09-07_09-14-02 teacherluke.co.uk _2016_08_07_370-in-conversation-with-rob-ager-from.html", # noqa: E501,B950 + "https://teacherluke.co.uk/2017/03/11/lep-on-zep-my-recent-interview-on-zdeneks-english-podcast/": # noqa: E501,B950 + "2021-09-07_09-14-02 teacherluke.co.uk _2017_03_11_lep-on-zep-my-recent-interview-on-zden.html", # noqa: E501,B950 + "https://teacherluke.co.uk/2017/05/26/i-was-invited-onto-the-english-across-the-pond-podcast/": # noqa: E501,B950 + "2021-09-07_09-14-02 teacherluke.co.uk _2017_05_26_i-was-invited-onto-the-english-across-.html", # noqa: E501,B950 + "https://teacherluke.co.uk/2017/08/26/website-only-a-history-of-british-pop-a-musical-tour-through-james-vinyl-collection/": # noqa: E501,B950 + "2021-09-07_09-14-02 teacherluke.co.uk _2017_08_26_website-only-a-history-of-british-pop-.html", # noqa: E501,B950 + "https://teacherluke.co.uk/2021/02/03/703-walaa-from-syria-wisbolep-competition-winner-%f0%9f%8f%86/": # noqa: E501,B950 + "2021-08-11_lep-e703-page-content-pretty.html", + "https://teacherluke.co.uk/2021/03/26/711-william-from-france-%f0%9f%87%ab%f0%9f%87%b7-wisbolep-runner-up/": # noqa: E501,B950 + "2021-08-11_lep-e711-page-content-pretty.html", + "https://teacherluke.co.uk/2021/04/11/714-robin-from-hamburg-%f0%9f%87%a9%f0%9f%87%aa-wisbolep-runner-up/": # noqa: E501,B950 + "2021-09-07_09-14-02 teacherluke.co.uk _2021_04_11_714-robin-from-hamburg-🇩🇪-wisbolep-run.html", # noqa: E501,B950 + "https://teacherluke.co.uk/2021/08/03/733-a-summer-ramble/": + "2021-08-11_lep-e733-page-content-pretty.html", + "https://teacherluke.co.uk/premium/archive-comment-section/": + "2021-09-28_10-44-00 Archive & Comment Section _ (premium archive).html", # noqa: E501,B950 # None-episode link +} +# yapf: enable + + +@pytest.fixture(scope="session") +def url_html_map() -> Dict[str, str]: + """Returns dictionary of mocked URLs and their HTML files.""" + return URL_HTML_MAPPING + + +@pytest.fixture(scope="session") +def mocked_urls(url_html_map: Dict[str, str]) -> List[str]: + """Returns list of mocked URLs.""" + return [*url_html_map] + + @pytest.fixture(scope="session") def mocks_dir_path() -> Path: """Returns path to 'fixtures' direcory.""" @@ -49,3 +108,41 @@ def archive_page_mock(mocks_dir_path: Path) -> str: """Returns str object of archive HTML mocked page.""" page_path = mocks_dir_path / conf.LOCAL_ARCHIVE_HTML return page_path.read_text(encoding="utf-8") + + +@pytest.fixture(scope="module") +def single_page_mock( + html_mocks_path: Path, + url_html_map: Dict[str, str], +) -> Callable[[requests.Request, rm_Context], str]: + """Returns custom callback for mocking.""" + + def _mock_single_page( + request: requests.Request, + context: rm_Context, + ) -> str: + """Callback for creating mocked Response of episode page.""" + # context.status_code = 200 + url = request.url.lower() + # local_path = OFFLINE_HTML_DIR / "ep_htmls" / LINK_FILE_MAPPING[url] + page_path = html_mocks_path / url_html_map[url] + # return open(local_path, "rb") + return page_path.read_text(encoding="utf-8") + + return _mock_single_page + + +@pytest.fixture(scope="module") +def single_page_matcher( + mocked_urls: List[str], +) -> Optional[Callable[[_RequestObjectProxy], bool]]: + """Returns custom matcher callback.""" + + def _single_page_matcher( + request: _RequestObjectProxy, + ) -> bool: + """Return True response if URL has mocked (pre-saved) local file.""" + url = request.url.lower() + return url in mocked_urls + + return _single_page_matcher diff --git a/tests/test_parser.py b/tests/test_parser.py index 66bbcad..dfc31ed 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -4,6 +4,9 @@ import typing as t from datetime import datetime from pathlib import Path +from typing import Callable +from typing import List +from typing import Optional import pytest import requests @@ -11,6 +14,7 @@ from bs4 import BeautifulSoup from pytest import CaptureFixture from requests_mock.mocker import Mocker as rm_Mocker +from requests_mock.request import _RequestObjectProxy from requests_mock.response import _Context as rm_Context from lep_downloader import config as conf @@ -26,27 +30,6 @@ "fixtures", ) -LINK_FILE_MAPPING = { - "https://teacherluke.co.uk/2009/04/12/episode-1-introduction/": "2021-09-13_05-37-36 teacherluke.co.uk _2009_04_12_episode-1-introduction_.html", - "https://teacherluke.co.uk/2009/10/19/extra-podcast-12-phrasal-verbs/": "2021-09-07_09-14-02 teacherluke.co.uk _2009_10_19_extra-podcast-12-phrasal-verbs_.html", - "https://teacherluke.co.uk/2009/10/19/episode-11-michael-jackson/": "2021-09-07_09-14-02 teacherluke.co.uk _2009_10_19_episode-11-michael-jackson_.html", - "https://teacherluke.co.uk/2010/03/25/london-video-interviews-pt-1/": "2021-09-07_09-14-02 teacherluke.co.uk _2010_03_25_london-video-interviews-pt-1_.html", - "http://teacherluke.wordpress.com/2012/09/27/113-setting-the-world-to-rights/": "2021-09-07_09-14-02 teacherluke.wordpress.com _2012_09_27_113-setting-the-world-to-rights_.html", - "https://teacherluke.co.uk/2014/06/30/193-culture-shock-life-in-london-pt-2/": "2021-09-07_09-14-02 teacherluke.co.uk _2014_06_30_193-culture-shock-life-in-london-pt-2_.html", - "https://teacherluke.co.uk/2015/10/21/304-back-to-the-future-part-1/": "2021-09-07_09-14-02 teacherluke.co.uk _2015_10_07_300-episode-300-part-1_.html", - "https://teacherluke.co.uk/2015/10/22/305-back-to-the-future-part-2/": "2021-09-07_09-14-02 teacherluke.co.uk _2015_10_07_300-episode-300-part-2_.html", - "https://teacherluke.co.uk/2016/08/07/370-in-conversation-with-rob-ager-from-liverpool-part-1-life-in-liverpool-interest-in-film-analysis/": "2021-09-07_09-14-02 teacherluke.co.uk _2016_08_07_370-in-conversation-with-rob-ager-from.html", - "https://teacherluke.co.uk/2017/03/11/lep-on-zep-my-recent-interview-on-zdeneks-english-podcast/": "2021-09-07_09-14-02 teacherluke.co.uk _2017_03_11_lep-on-zep-my-recent-interview-on-zden.html", - "https://teacherluke.co.uk/2017/05/26/i-was-invited-onto-the-english-across-the-pond-podcast/": "2021-09-07_09-14-02 teacherluke.co.uk _2017_05_26_i-was-invited-onto-the-english-across-.html", - "https://teacherluke.co.uk/2017/08/26/website-only-a-history-of-british-pop-a-musical-tour-through-james-vinyl-collection/": "2021-09-07_09-14-02 teacherluke.co.uk _2017_08_26_website-only-a-history-of-british-pop-.html", - "https://teacherluke.co.uk/2021/02/03/703-walaa-from-syria-wisbolep-competition-winner-%f0%9f%8f%86/": "2021-08-11_lep-e703-page-content-pretty.html", - "https://teacherluke.co.uk/2021/03/26/711-william-from-france-%f0%9f%87%ab%f0%9f%87%b7-wisbolep-runner-up/": "2021-08-11_lep-e711-page-content-pretty.html", - "https://teacherluke.co.uk/2021/04/11/714-robin-from-hamburg-%f0%9f%87%a9%f0%9f%87%aa-wisbolep-runner-up/": "2021-09-07_09-14-02 teacherluke.co.uk _2021_04_11_714-robin-from-hamburg-🇩🇪-wisbolep-run.html", - "https://teacherluke.co.uk/2021/08/03/733-a-summer-ramble/": "2021-08-11_lep-e733-page-content-pretty.html", - "https://teacherluke.co.uk/premium/archive-comment-section/": "2021-09-28_10-44-00 Archive & Comment Section _ (premium archive).html", # None-episode link -} - -MAPPING_KEYS: t.List[str] = [*LINK_FILE_MAPPING] s = requests.Session() @@ -235,6 +218,7 @@ def test_short_links_substitution() -> None: def test_parsing_result( requests_mock: rm_Mocker, archive_page_mock: str, + mocked_urls: List[str], ) -> None: """It parses mocked archived page.""" requests_mock.get(conf.ARCHIVE_URL, text=archive_page_mock) @@ -245,7 +229,7 @@ def test_parsing_result( assert len(all_links) > 781 assert "/2009/04/12/episode-1-introduction" in all_links[-1] # Intersection of mocked pages and all links - intersection = set(MAPPING_KEYS) & set(all_links) + intersection = set(mocked_urls) & set(all_links) assert len(intersection) > 15 link_strings = parsing_result[2] @@ -289,29 +273,11 @@ def test_parsing_archive_with_known_duplicates() -> None: assert len(texts) == 0 -def mocked_single_page_matcher( - request: requests.Request, -) -> t.Optional[requests.Response]: - """Return OK response if URL has mocked (pre-saved) local file.""" - url = request.url.lower() - if url in MAPPING_KEYS: - resp = requests.Response() - resp.status_code = 200 - return resp - return None - - -def mock_single_page(request: requests.Request, context: rm_Context) -> t.IO[bytes]: - """Callback for creating mocked Response of episode page.""" - # context.status_code = 200 - url = request.url.lower() - local_path = OFFLINE_HTML_DIR / "ep_htmls" / LINK_FILE_MAPPING[url] - return open(local_path, "rb") - - def test_mocking_single_page( requests_mock: rm_Mocker, archive_page_mock: str, + single_page_matcher: Optional[Callable[[_RequestObjectProxy], bool]], + single_page_mock: str, ) -> None: """It parses mocked episode page.""" requests_mock.get(conf.ARCHIVE_URL, text=archive_page_mock) @@ -325,8 +291,8 @@ def test_mocking_single_page( requests_mock.get( req_mock.ANY, - additional_matcher=mocked_single_page_matcher, - body=mock_single_page, + additional_matcher=single_page_matcher, + text=single_page_mock, ) parsed_episodes = parser.get_parsed_episodes(all_links, session, all_texts) @@ -415,6 +381,8 @@ def test_parsing_non_episode_link(requests_mock: rm_Mocker) -> None: def test_parsing_links_to_audio_for_mocked_episodes( requests_mock: rm_Mocker, archive_page_mock: str, + single_page_matcher: Optional[Callable[[_RequestObjectProxy], bool]], + single_page_mock: str, ) -> None: """It parses links to audio (if they exist).""" # TODO: Complete test (now it's simple copy-paste) @@ -429,8 +397,8 @@ def test_parsing_links_to_audio_for_mocked_episodes( requests_mock.get( req_mock.ANY, - additional_matcher=mocked_single_page_matcher, - body=mock_single_page, + additional_matcher=single_page_matcher, + text=single_page_mock, ) parsed_episodes = parser.get_parsed_episodes(all_links, session, all_texts) @@ -569,14 +537,16 @@ def mock_json_db(request: requests.Request, context: rm_Context) -> t.IO[bytes]: def test_no_new_episodes_on_archive_vs_json_db( requests_mock: rm_Mocker, archive_page_mock: str, + single_page_matcher: Optional[Callable[[_RequestObjectProxy], bool]], + single_page_mock: str, capsys: CaptureFixture[str], ) -> None: """It prints when no new episodes on archive page.""" requests_mock.get(conf.ARCHIVE_URL, text=archive_page_mock) requests_mock.get( req_mock.ANY, - additional_matcher=mocked_single_page_matcher, - body=mock_single_page, + additional_matcher=single_page_matcher, + text=single_page_mock, ) requests_mock.get( conf.JSON_DB_URL, @@ -591,6 +561,8 @@ def test_no_new_episodes_on_archive_vs_json_db( def test_no_valid_episode_objects_in_json_db( requests_mock: rm_Mocker, archive_page_mock: str, + single_page_matcher: Optional[Callable[[_RequestObjectProxy], bool]], + single_page_mock: str, capsys: CaptureFixture[str], ) -> None: """It prints warning when there are no valid episode objects.""" @@ -598,8 +570,8 @@ def test_no_valid_episode_objects_in_json_db( requests_mock.get( req_mock.ANY, - additional_matcher=mocked_single_page_matcher, - body=mock_single_page, + additional_matcher=single_page_matcher, + text=single_page_mock, ) requests_mock.get( @@ -618,14 +590,16 @@ def test_no_valid_episode_objects_in_json_db( def test_json_db_not_valid( requests_mock: rm_Mocker, archive_page_mock: str, + single_page_matcher: Optional[Callable[[_RequestObjectProxy], bool]], + single_page_mock: str, capsys: CaptureFixture[str], ) -> None: """It prints error for invalid JSON document.""" requests_mock.get(conf.ARCHIVE_URL, text=archive_page_mock) requests_mock.get( req_mock.ANY, - additional_matcher=mocked_single_page_matcher, - body=mock_single_page, + additional_matcher=single_page_matcher, + text=single_page_mock, ) requests_mock.get( conf.JSON_DB_URL, @@ -641,14 +615,16 @@ def test_json_db_not_valid( def test_json_db_not_available( requests_mock: rm_Mocker, archive_page_mock: str, + single_page_matcher: Optional[Callable[[_RequestObjectProxy], bool]], + single_page_mock: str, capsys: CaptureFixture[str], ) -> None: """It prints error for unavailable JSON database.""" requests_mock.get(conf.ARCHIVE_URL, text=archive_page_mock) requests_mock.get( req_mock.ANY, - additional_matcher=mocked_single_page_matcher, - body=mock_single_page, + additional_matcher=single_page_matcher, + text=single_page_mock, ) requests_mock.get( conf.JSON_DB_URL, @@ -664,14 +640,16 @@ def test_json_db_not_available( def test_json_db_contains_only_string( requests_mock: rm_Mocker, archive_page_mock: str, + single_page_matcher: Optional[Callable[[_RequestObjectProxy], bool]], + single_page_mock: str, capsys: CaptureFixture[str], ) -> None: """It prints warning for JSON as str.""" requests_mock.get(conf.ARCHIVE_URL, text=archive_page_mock) requests_mock.get( req_mock.ANY, - additional_matcher=mocked_single_page_matcher, - body=mock_single_page, + additional_matcher=single_page_matcher, + text=single_page_mock, ) requests_mock.get( conf.JSON_DB_URL, @@ -687,14 +665,16 @@ def test_json_db_contains_only_string( def test_invalid_objects_in_json_not_included( requests_mock: rm_Mocker, archive_page_mock: str, + single_page_matcher: Optional[Callable[[_RequestObjectProxy], bool]], + single_page_mock: str, capsys: CaptureFixture[str], ) -> None: """It skips invalid objects in JSON database.""" requests_mock.get(conf.ARCHIVE_URL, text=archive_page_mock) requests_mock.get( req_mock.ANY, - additional_matcher=mocked_single_page_matcher, - body=mock_single_page, + additional_matcher=single_page_matcher, + text=single_page_mock, ) requests_mock.get( conf.JSON_DB_URL, @@ -724,13 +704,15 @@ def modified_json_db(request: requests.Request, context: rm_Context) -> str: def test_updating_json_database_with_new_episodes( requests_mock: rm_Mocker, archive_page_mock: str, + single_page_matcher: Optional[Callable[[_RequestObjectProxy], bool]], + single_page_mock: str, ) -> None: """It retrives and saves new episodes from archive.""" requests_mock.get(conf.ARCHIVE_URL, text=archive_page_mock) requests_mock.get( req_mock.ANY, - additional_matcher=mocked_single_page_matcher, - body=mock_single_page, + additional_matcher=single_page_matcher, + text=single_page_mock, ) requests_mock.get( conf.JSON_DB_URL, @@ -764,14 +746,16 @@ def modified_json_with_extra_episode( def test_updating_json_database_with_extra_episodes( requests_mock: rm_Mocker, archive_page_mock: str, + single_page_matcher: Optional[Callable[[_RequestObjectProxy], bool]], + single_page_mock: str, capsys: CaptureFixture[str], ) -> None: """It prints warning if database contains more episodes than archive.""" requests_mock.get(conf.ARCHIVE_URL, text=archive_page_mock) requests_mock.get( req_mock.ANY, - additional_matcher=mocked_single_page_matcher, - body=mock_single_page, + additional_matcher=single_page_matcher, + text=single_page_mock, ) requests_mock.get( conf.JSON_DB_URL, From 14c9e7b1d46add59e69f5d487d2045a627bf04f5 Mon Sep 17 00:00:00 2001 From: Artem Hotenov Date: Thu, 11 Nov 2021 11:04:20 +0300 Subject: [PATCH 04/16] chore: :wrench: Add LOCAL_JSON_DB constant in config.py Add license boilerplate Format map dict with long lines --- src/lep_downloader/config.py | 55 ++++++++++++++++++++++++++++-------- 1 file changed, 43 insertions(+), 12 deletions(-) diff --git a/src/lep_downloader/config.py b/src/lep_downloader/config.py index 5e270c4..40b97a6 100644 --- a/src/lep_downloader/config.py +++ b/src/lep_downloader/config.py @@ -1,3 +1,24 @@ +# MIT License +# +# Copyright (c) 2021 Artem Hotenov +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. """App configuration module.""" @@ -6,23 +27,33 @@ JSON_DB_URL = "https://hotenov.com/some_json.json" LOCAL_ARCHIVE_HTML = "2021-08-10_lep-archive-page-content-pretty.html" +LOCAL_JSON_DB = "mocked-db-json-equal-786-objects.json" +# yapf: disable SHORT_LINKS_MAPPING_DICT = { - "http://wp.me/p4IuUx-7PL": "https://teacherluke.co.uk/2017/06/20/460-catching-up-with-amber-paul-6-feat-sarah-donnelly/", - "http://wp.me/p4IuUx-7C6": "https://teacherluke.co.uk/2017/04/25/444-the-rick-thompson-report-snap-general-election-2017/", - "http://wp.me/p4IuUx-7C4": "https://teacherluke.co.uk/2017/04/21/443-the-trip-to-japan-part-2/", - "http://wp.me/p4IuUx-7BQ": "https://teacherluke.co.uk/2017/04/21/442-the-trip-to-japan-part-1/", - "http://wp.me/p4IuUx-7BO": "https://teacherluke.co.uk/2017/04/18/441-andy-johnson-at-the-iatefl-conference/", - "http://wp.me/p4IuUx-7Av": "https://teacherluke.co.uk/2017/03/28/436-the-return-of-the-lying-game-with-amber-paul-video/", - "http://wp.me/p4IuUx-7zK": "https://teacherluke.co.uk/2017/03/26/i-was-interviewed-on-my-fluent-podcast-with-daniel-goodson/", - "http://wp.me/p4IuUx-7sg": "https://teacherluke.co.uk/2017/01/10/415-with-the-family-part-3-more-encounters-with-famous-people/", - "https://wp.me/p4IuUx-29": "https://teacherluke.co.uk/2011/10/11/notting-hill-carnival-video-frustration-out-takes/", + "http://wp.me/p4IuUx-7PL": + "https://teacherluke.co.uk/2017/06/20/460-catching-up-with-amber-paul-6-feat-sarah-donnelly/", # noqa: E501,B950 + "http://wp.me/p4IuUx-7C6": + "https://teacherluke.co.uk/2017/04/25/444-the-rick-thompson-report-snap-general-election-2017/", # noqa: E501,B950 + "http://wp.me/p4IuUx-7C4": + "https://teacherluke.co.uk/2017/04/21/443-the-trip-to-japan-part-2/", + "http://wp.me/p4IuUx-7BQ": + "https://teacherluke.co.uk/2017/04/21/442-the-trip-to-japan-part-1/", + "http://wp.me/p4IuUx-7BO": + "https://teacherluke.co.uk/2017/04/18/441-andy-johnson-at-the-iatefl-conference/", # noqa: E501,B950 + "http://wp.me/p4IuUx-7Av": + "https://teacherluke.co.uk/2017/03/28/436-the-return-of-the-lying-game-with-amber-paul-video/", # noqa: E501,B950 + "http://wp.me/p4IuUx-7zK": + "https://teacherluke.co.uk/2017/03/26/i-was-interviewed-on-my-fluent-podcast-with-daniel-goodson/", # noqa: E501,B950 + "http://wp.me/p4IuUx-7sg": + "https://teacherluke.co.uk/2017/01/10/415-with-the-family-part-3-more-encounters-with-famous-people/", # noqa: E501,B950 + "https://wp.me/p4IuUx-29": + "https://teacherluke.co.uk/2011/10/11/notting-hill-carnival-video-frustration-out-takes/", # noqa: E501,B950 } - -# MISSPELLED_LTD = ".co.ukm" +# yapf: enable IRRELEVANT_LINKS = ("https://wp.me/P4IuUx-82H",) -EPISODE_LINK_RE = r"https?://((?Pwp\.me/p4IuUx-[\w-]+)|(teacherluke\.(co\.uk|wordpress\.com)/(?P\d{4}/\d{2}/\d{2})/))" +EPISODE_LINK_RE = r"https?://((?Pwp\.me/p4IuUx-[\w-]+)|(teacherluke\.(co\.uk|wordpress\.com)/(?P\d{4}/\d{2}/\d{2})/))" # noqa: E501,B950 INVALID_PATH_CHARS_RE = r"[<>:\"/\\\\|?*]" From 60b95e132287c774a2cf4f8e80f908dfa4481237 Mon Sep 17 00:00:00 2001 From: Artem Hotenov Date: Thu, 11 Nov 2021 11:05:54 +0300 Subject: [PATCH 05/16] test(parser): :white_check_mark: Add fixtures for mocking JSON db and update tests with them --- tests/conftest.py | 44 ++++++++++++++++++++++++++++++++++--- tests/test_parser.py | 52 +++++--------------------------------------- 2 files changed, 47 insertions(+), 49 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 105b0ac..cce9c94 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -20,6 +20,7 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """Package-wide test fixtures.""" +import json from pathlib import Path from typing import Callable from typing import Dict @@ -32,6 +33,7 @@ from requests_mock.response import _Context as rm_Context from lep_downloader import config as conf +from lep_downloader import lep # yapf: disable @@ -122,11 +124,8 @@ def _mock_single_page( context: rm_Context, ) -> str: """Callback for creating mocked Response of episode page.""" - # context.status_code = 200 url = request.url.lower() - # local_path = OFFLINE_HTML_DIR / "ep_htmls" / LINK_FILE_MAPPING[url] page_path = html_mocks_path / url_html_map[url] - # return open(local_path, "rb") return page_path.read_text(encoding="utf-8") return _mock_single_page @@ -146,3 +145,42 @@ def _single_page_matcher( return url in mocked_urls return _single_page_matcher + + +@pytest.fixture(scope="session") +def json_db_mock(mocks_dir_path: Path) -> str: + """Returns str object of JSON mocked database.""" + json_path = mocks_dir_path / conf.LOCAL_JSON_DB + return json_path.read_text(encoding="utf-8") + + +@pytest.fixture +def db_episodes(json_db_mock: str) -> List[lep.LepEpisode]: + """Returns reusable list of LepEpisode objects from JSON mocked database.""" + db_episodes: List[lep.LepEpisode] = json.loads( + json_db_mock, + object_hook=lep.as_lep_episode_obj, + ) + return db_episodes + + +@pytest.fixture +def modified_json_less_db_mock(db_episodes: List[lep.LepEpisode]) -> str: + """Returns mocked JSON database with less episodes.""" + # Delete three episodes + del db_episodes[0] + del db_episodes[1] + del db_episodes[6] + modified_json = json.dumps(db_episodes, cls=lep.LepJsonEncoder) + del db_episodes + return modified_json + + +@pytest.fixture +def modified_json_extra_db_mock(db_episodes: List[lep.LepEpisode]) -> str: + """Returns mocked JSON database with plus one episode.""" + lep_ep = lep.LepEpisode(episode=999, post_title="Extra episode") + db_episodes.append(lep_ep) # Add extra episode + modified_json = json.dumps(db_episodes, cls=lep.LepJsonEncoder) + del db_episodes + return modified_json diff --git a/tests/test_parser.py b/tests/test_parser.py index dfc31ed..758fa90 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -15,7 +15,6 @@ from pytest import CaptureFixture from requests_mock.mocker import Mocker as rm_Mocker from requests_mock.request import _RequestObjectProxy -from requests_mock.response import _Context as rm_Context from lep_downloader import config as conf from lep_downloader import lep @@ -25,12 +24,6 @@ from lep_downloader.lep import LepEpisode -OFFLINE_HTML_DIR = Path( - Path(__file__).resolve().parent, - "fixtures", -) - - s = requests.Session() @@ -527,18 +520,12 @@ def test_writing_lep_episodes_to_json() -> None: file.unlink() -def mock_json_db(request: requests.Request, context: rm_Context) -> t.IO[bytes]: - """Callback for creating mocked Response of episode page.""" - # context.status_code = 200 - local_path = OFFLINE_HTML_DIR / "mocked-db-json-equal-786-objects.json" - return open(local_path, "rb") - - def test_no_new_episodes_on_archive_vs_json_db( requests_mock: rm_Mocker, archive_page_mock: str, single_page_matcher: Optional[Callable[[_RequestObjectProxy], bool]], single_page_mock: str, + json_db_mock: str, capsys: CaptureFixture[str], ) -> None: """It prints when no new episodes on archive page.""" @@ -550,7 +537,7 @@ def test_no_new_episodes_on_archive_vs_json_db( ) requests_mock.get( conf.JSON_DB_URL, - body=mock_json_db, + text=json_db_mock, ) parser.do_parsing_actions(conf.JSON_DB_URL, conf.ARCHIVE_URL) @@ -687,25 +674,12 @@ def test_invalid_objects_in_json_not_included( assert "no valid episode objects" in captured.out -def modified_json_db(request: requests.Request, context: rm_Context) -> str: - """Callback for creating mocked JSON database with less episodes.""" - # context.status_code = 200 - local_path = OFFLINE_HTML_DIR / "mocked-db-json-equal-786-objects.json" - mocked_json = local_path.read_text(encoding="utf-8") - db_episodes = json.loads(mocked_json, object_hook=as_lep_episode_obj) - # Delete three episodes - del db_episodes[0] - del db_episodes[1] - del db_episodes[6] - modified_json = json.dumps(db_episodes, cls=lep.LepJsonEncoder) - return modified_json - - def test_updating_json_database_with_new_episodes( requests_mock: rm_Mocker, archive_page_mock: str, single_page_matcher: Optional[Callable[[_RequestObjectProxy], bool]], single_page_mock: str, + modified_json_less_db_mock: str, ) -> None: """It retrives and saves new episodes from archive.""" requests_mock.get(conf.ARCHIVE_URL, text=archive_page_mock) @@ -716,7 +690,7 @@ def test_updating_json_database_with_new_episodes( ) requests_mock.get( conf.JSON_DB_URL, - text=modified_json_db, + text=modified_json_less_db_mock, ) with tempfile.NamedTemporaryFile(prefix="LEP_tmp_", delete=False) as temp_file: @@ -728,26 +702,12 @@ def test_updating_json_database_with_new_episodes( assert len(py_from_json) == 786 -def modified_json_with_extra_episode( - request: requests.Request, - context: rm_Context, -) -> str: - """Callback for creating mocked JSON database with more episodes.""" - local_path = OFFLINE_HTML_DIR / "mocked-db-json-equal-786-objects.json" - mocked_json = local_path.read_text(encoding="utf-8") - db_episodes = json.loads(mocked_json, object_hook=as_lep_episode_obj) - # Add extra episode - lep_ep = LepEpisode(episode=999, post_title="Extra episode") - db_episodes.append(lep_ep) - modified_json = json.dumps(db_episodes, cls=lep.LepJsonEncoder) - return modified_json - - def test_updating_json_database_with_extra_episodes( requests_mock: rm_Mocker, archive_page_mock: str, single_page_matcher: Optional[Callable[[_RequestObjectProxy], bool]], single_page_mock: str, + modified_json_extra_db_mock: str, capsys: CaptureFixture[str], ) -> None: """It prints warning if database contains more episodes than archive.""" @@ -759,7 +719,7 @@ def test_updating_json_database_with_extra_episodes( ) requests_mock.get( conf.JSON_DB_URL, - text=modified_json_with_extra_episode, + text=modified_json_extra_db_mock, ) parser.do_parsing_actions(conf.JSON_DB_URL, conf.ARCHIVE_URL) From 4769f663651745042b4a55b2f896ee7c62f0389c Mon Sep 17 00:00:00 2001 From: Artem Hotenov Date: Thu, 11 Nov 2021 11:28:16 +0300 Subject: [PATCH 06/16] test(parser): :white_check_mark: Add fixture with requests.Session and update parser tests with it --- tests/conftest.py | 7 ++++ tests/test_parser.py | 80 ++++++++++++++++++++++++++++---------------- 2 files changed, 59 insertions(+), 28 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index cce9c94..b5ecf6d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -76,6 +76,13 @@ # yapf: enable +@pytest.fixture(scope="session") +def req_ses() -> requests.Session: + """Returns global (for all tests) requests session.""" + s = requests.Session() + return s + + @pytest.fixture(scope="session") def url_html_map() -> Dict[str, str]: """Returns dictionary of mocked URLs and their HTML files.""" diff --git a/tests/test_parser.py b/tests/test_parser.py index 758fa90..c97ce0a 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -24,57 +24,75 @@ from lep_downloader.lep import LepEpisode -s = requests.Session() - - -def test_getting_success_page_response(requests_mock: rm_Mocker) -> None: +def test_getting_success_page_response( + requests_mock: rm_Mocker, + req_ses: requests.Session, +) -> None: """It gets HTML content as text.""" requests_mock.get(req_mock.ANY, text="Response OK") - resp = get_web_page_html_text(conf.ARCHIVE_URL, s)[0] + resp = get_web_page_html_text(conf.ARCHIVE_URL, req_ses)[0] assert resp == "Response OK" -def test_getting_404_page_response(requests_mock: rm_Mocker) -> None: +def test_getting_404_page_response( + requests_mock: rm_Mocker, + req_ses: requests.Session, +) -> None: """It handles HTTPError if page is not found.""" requests_mock.get(req_mock.ANY, text="Response OK", status_code=404) - resp = get_web_page_html_text("http://example.com", s)[0] + resp = get_web_page_html_text("http://example.com", req_ses)[0] assert "[ERROR]" in resp assert "404" in resp -def test_getting_503_page_response(requests_mock: rm_Mocker) -> None: +def test_getting_503_page_response( + requests_mock: rm_Mocker, + req_ses: requests.Session, +) -> None: """It handle HTTPError if service is unavailable.""" requests_mock.get(req_mock.ANY, text="Response OK", status_code=503) - resp = get_web_page_html_text("http://example.com", s)[0] + resp = get_web_page_html_text("http://example.com", req_ses)[0] assert "[ERROR]" in resp assert "503" in resp -def test_timeout_error(requests_mock: rm_Mocker) -> None: +def test_timeout_error( + requests_mock: rm_Mocker, + req_ses: requests.Session, +) -> None: """It handle any Timeout exception for page.""" requests_mock.get(req_mock.ANY, exc=requests.exceptions.Timeout) - resp = get_web_page_html_text("http://example.com", s)[0] + resp = get_web_page_html_text("http://example.com", req_ses)[0] assert "[ERROR]" in resp assert "Timeout" in resp -def test_connection_error(requests_mock: rm_Mocker) -> None: +def test_connection_error( + requests_mock: rm_Mocker, + req_ses: requests.Session, +) -> None: """It handles ConnectionError exception for bad request.""" requests_mock.get(req_mock.ANY, exc=requests.exceptions.ConnectionError) - resp = get_web_page_html_text("http://example.com", s)[0] + resp = get_web_page_html_text("http://example.com", req_ses)[0] assert "[ERROR]" in resp assert "Bad request" in resp -def test_unknown_error(requests_mock: rm_Mocker) -> None: +def test_unknown_error( + requests_mock: rm_Mocker, + req_ses: requests.Session, +) -> None: """It handles any other exceptions during attempt to get response from URL.""" requests_mock.get(req_mock.ANY, exc=Exception("Something Bad")) - resp = get_web_page_html_text("http://example.com", s)[0] + resp = get_web_page_html_text("http://example.com", req_ses)[0] assert "[ERROR]" in resp assert "Unhandled error" in resp -def test_final_location_for_good_redirect(requests_mock: rm_Mocker) -> None: +def test_final_location_for_good_redirect( + requests_mock: rm_Mocker, + req_ses: requests.Session, +) -> None: """It retrieves final location during redirect.""" requests_mock.get( "https://re.direct", @@ -85,14 +103,17 @@ def test_final_location_for_good_redirect(requests_mock: rm_Mocker) -> None: requests_mock.get("https://final.location", text="Final location") text, final_location, is_url_ok = get_web_page_html_text( "https://re.direct", - s, + req_ses, ) assert is_url_ok assert text == "Final location" assert final_location == "https://final.location/" -def test_final_location_for_bad_redirect(requests_mock: rm_Mocker) -> None: +def test_final_location_for_bad_redirect( + requests_mock: rm_Mocker, + req_ses: requests.Session, +) -> None: """It retrieves final location during redirect.""" requests_mock.get( "https://re.direct", @@ -107,7 +128,7 @@ def test_final_location_for_bad_redirect(requests_mock: rm_Mocker) -> None: ) text, final_location, is_url_ok = get_web_page_html_text( "https://re.direct", - s, + req_ses, ) assert not is_url_ok assert "[ERROR]" in text @@ -117,15 +138,15 @@ def test_final_location_for_bad_redirect(requests_mock: rm_Mocker) -> None: def test_retrieve_all_episode_links_from_soup() -> None: """It returns only tags from soup object.""" - html_doc = """The Dormouse's story + html_doc = """The Dormouse'req_ses story
-

The Dormouse's story

+

The Dormouse'req_ses story

Once upon a time there were three little sisters; and their names were Elsie, Tillie; 723. Bahar from Iran  - 🇮🇷 + 🇮🇷  (WISBOLEP Runner-Up) and they lived at the bottom of a well. @@ -140,7 +161,7 @@ def test_retrieve_all_episode_links_from_soup() -> None: def test_replacing_misspelled_link() -> None: """It replaces misspelled link and returns modified soup object.""" - html_doc = """The Dormouse's story + html_doc = """The Dormouse'req_ses story

Once upon a time there were three little sisters; and their names were Elsie, @@ -157,7 +178,7 @@ def test_replacing_misspelled_link() -> None: def test_replacing_nothing_when_no_misspelled_link() -> None: """It replaces nothing when there is no misspelled link and returns the same soup object.""" - html_doc = """The Dormouse's story + html_doc = """The Dormouse'req_ses story

Once upon a time there were three little sisters; and their names were Elsie, @@ -239,7 +260,7 @@ def test_parsing_invalid_html(requests_mock: rm_Mocker) -> None: def test_parsing_archive_without_episodes() -> None: """It collects links only matched by episode link pattern.""" - markup = """The Dormouse's story + markup = """The Dormouse'req_ses story

Once upon a time there were three little sisters; and their names were Tillie; @@ -358,7 +379,10 @@ def test_generating_new_post_index_on_same_day() -> None: assert index2 == expected_index + 1 -def test_parsing_non_episode_link(requests_mock: rm_Mocker) -> None: +def test_parsing_non_episode_link( + requests_mock: rm_Mocker, + req_ses: requests.Session, +) -> None: """It returns None (empty episode) for non-episode link.""" non_episode_url = "https://teacherluke.co.uk/premium/archive-comment-section/" requests_mock.get( @@ -367,7 +391,7 @@ def test_parsing_non_episode_link(requests_mock: rm_Mocker) -> None: status_code=200, ) link_title = "Some title" - episode = parser.parse_single_page(non_episode_url, s, link_title) + episode = parser.parse_single_page(non_episode_url, req_ses, link_title) assert episode is None @@ -378,7 +402,7 @@ def test_parsing_links_to_audio_for_mocked_episodes( single_page_mock: str, ) -> None: """It parses links to audio (if they exist).""" - # TODO: Complete test (now it's simple copy-paste) + # TODO: Complete test (now it'req_ses simple copy-paste) requests_mock.get(conf.ARCHIVE_URL, text=archive_page_mock) parsing_result: t.Tuple[t.List[str], ...] = parser.get_archive_parsing_results( conf.ARCHIVE_URL From 16e66d10891658567088929ce54eb1e7a9b80a24 Mon Sep 17 00:00:00 2001 From: Artem Hotenov Date: Thu, 11 Nov 2021 12:38:42 +0300 Subject: [PATCH 07/16] style(parser): :art: Format code with max line length = 80 (ignore in test data and long URLs) Add license boilerplate --- tests/test_parser.py | 110 ++++++++++++++++++++++++++++--------------- 1 file changed, 72 insertions(+), 38 deletions(-) diff --git a/tests/test_parser.py b/tests/test_parser.py index c97ce0a..d86ab69 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,3 +1,24 @@ +# MIT License +# +# Copyright (c) 2021 Artem Hotenov +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. """Test cases for the parser module.""" import json import tempfile @@ -24,6 +45,9 @@ from lep_downloader.lep import LepEpisode +lep_date_format = "%Y-%m-%dT%H:%M:%S%z" + + def test_getting_success_page_response( requests_mock: rm_Mocker, req_ses: requests.Session, @@ -82,7 +106,7 @@ def test_unknown_error( requests_mock: rm_Mocker, req_ses: requests.Session, ) -> None: - """It handles any other exceptions during attempt to get response from URL.""" + """It handles any other exceptions during getting response from URL.""" requests_mock.get(req_mock.ANY, exc=Exception("Something Bad")) resp = get_web_page_html_text("http://example.com", req_ses)[0] assert "[ERROR]" in resp @@ -152,7 +176,7 @@ def test_retrieve_all_episode_links_from_soup() -> None: and they lived at the bottom of a well.

...

- """ + """ # noqa: E501,B950 soup = BeautifulSoup(html_doc, "lxml") only_links, only_strings = parser.get_all_episode_links_from_soup(soup) assert len(only_links) == 2 @@ -169,15 +193,16 @@ def test_replacing_misspelled_link() -> None: Tillie; and they lived at the bottom of a well.

- """ + """ # noqa: E501,B950 soup = BeautifulSoup(html_doc, "lxml") modified_soup = parser.replace_misspelled_link(soup) new_href = modified_soup("a")[1]["href"] - assert new_href == "https://teacherluke.co.uk/2012/08/06/london-olympics-2012/" + expected = "https://teacherluke.co.uk/2012/08/06/london-olympics-2012/" + assert new_href == expected def test_replacing_nothing_when_no_misspelled_link() -> None: - """It replaces nothing when there is no misspelled link and returns the same soup object.""" + """It replaces nothing when there is no misspelled link.""" html_doc = """The Dormouse'req_ses story

Once upon a time there were three little sisters; and their names were @@ -186,7 +211,7 @@ def test_replacing_nothing_when_no_misspelled_link() -> None: Tillie; and they lived at the bottom of a well.

- """ + """ # noqa: E501,B950 soup = BeautifulSoup(html_doc, "lxml") modified_soup = parser.replace_misspelled_link(soup) assert soup == modified_soup @@ -197,8 +222,8 @@ def test_removing_irrelevant_links() -> None: test_list: t.List[str] = [ "https://teacherluke.co.uk/2020/11/23/wisbolep/", "https://wp.me/P4IuUx-82H", # <- Link to app - "https://teacherluke.co.uk/2014/04/01/177-what-londoners-say-vs-what-they-mean/", - "https://teacherluke.co.uk/2021/03/26/711-william-from-france-%f0%9f%87%ab%f0%9f%87%b7-wisbolep-runner-up/", + "https://teacherluke.co.uk/2014/04/01/177-what-londoners-say-vs-what-they-mean/", # noqa: E501,B950 + "https://teacherluke.co.uk/2021/03/26/711-william-from-france-%f0%9f%87%ab%f0%9f%87%b7-wisbolep-runner-up/", # noqa: E501,B950 ] test_texts: t.List[str] = [ "1. Link", @@ -216,15 +241,15 @@ def test_short_links_substitution() -> None: test_list: t.List[str] = [ "http://wp.me/p4IuUx-7sg", "https://wp.me/P4IuUx-82H", # <- Link to app (no replacing) - "https://teacherluke.co.uk/2014/04/01/177-what-londoners-say-vs-what-they-mean/", + "https://teacherluke.co.uk/2014/04/01/177-what-londoners-say-vs-what-they-mean/", # noqa: E501,B950 "https://wp.me/p4IuUx-29", ] replaced: t.List[str] = parser.substitute_short_links(test_list) expected: t.List[str] = [ - "https://teacherluke.co.uk/2017/01/10/415-with-the-family-part-3-more-encounters-with-famous-people/", + "https://teacherluke.co.uk/2017/01/10/415-with-the-family-part-3-more-encounters-with-famous-people/", # noqa: E501,B950 "https://wp.me/P4IuUx-82H", - "https://teacherluke.co.uk/2014/04/01/177-what-londoners-say-vs-what-they-mean/", - "https://teacherluke.co.uk/2011/10/11/notting-hill-carnival-video-frustration-out-takes/", + "https://teacherluke.co.uk/2014/04/01/177-what-londoners-say-vs-what-they-mean/", # noqa: E501,B950 + "https://teacherluke.co.uk/2011/10/11/notting-hill-carnival-video-frustration-out-takes/", # noqa: E501,B950 ] assert replaced == expected @@ -267,7 +292,7 @@ def test_parsing_archive_without_episodes() -> None: and they lived at the bottom of a well.

...

- """ + """ # noqa: E501,B950 soup = BeautifulSoup(markup, "lxml") links, texts = parser.get_all_episode_links_from_soup(soup) assert len(links) == 0 @@ -280,7 +305,7 @@ def test_parsing_archive_with_known_duplicates() -> None: [VIDEO]; episode 522; [Website content]; - """ + """ # noqa: E501,B950 soup = BeautifulSoup(markup, "lxml") links, texts = parser.get_all_episode_links_from_soup(soup) assert len(links) == 0 @@ -295,9 +320,8 @@ def test_mocking_single_page( ) -> None: """It parses mocked episode page.""" requests_mock.get(conf.ARCHIVE_URL, text=archive_page_mock) - parsing_result: t.Tuple[t.List[str], ...] = parser.get_archive_parsing_results( - conf.ARCHIVE_URL - ) + parsing_result: t.Tuple[t.List[str], ...] + parsing_result = parser.get_archive_parsing_results(conf.ARCHIVE_URL) all_links: t.List[str] = parsing_result[0] all_texts: t.List[str] = parsing_result[2] session = requests.Session() @@ -320,11 +344,14 @@ def test_mocking_single_page( assert len(parsed_episodes) > 781 - min_date = datetime.strptime("2009-03-03T03:03:03+02:00", "%Y-%m-%dT%H:%M:%S%z") + min_date = datetime.strptime( + "2009-03-03T03:03:03+02:00", + lep_date_format, + ) mocked_episodes = [ ep for ep in parsed_episodes - if datetime.strptime(ep.__dict__["date"], "%Y-%m-%dT%H:%M:%S%z") > min_date + if datetime.strptime(ep.__dict__["date"], lep_date_format) > min_date ] assert len(mocked_episodes) > 15 @@ -340,7 +367,7 @@ def test_parsing_post_datetime() -> None: html_doc = """ - """ + """ # noqa: E501,B950 soup = BeautifulSoup(html_doc, "lxml") post_date = parser.parse_post_publish_datetime(soup) excepted = "2009-04-12T15:23:33+02:00" @@ -352,7 +379,7 @@ def test_parsing_post_datetime_without_element() -> None: html_doc = """ - """ + """ # noqa: E501,B950 soup = BeautifulSoup(html_doc, "lxml") post_date = parser.parse_post_publish_datetime(soup) excepted = "1999-01-01T01:01:01+02:00" @@ -384,7 +411,9 @@ def test_parsing_non_episode_link( req_ses: requests.Session, ) -> None: """It returns None (empty episode) for non-episode link.""" - non_episode_url = "https://teacherluke.co.uk/premium/archive-comment-section/" + non_episode_url = ( + "https://teacherluke.co.uk/premium/archive-comment-section/" # noqa: E501,B950 + ) requests_mock.get( non_episode_url, text="No need to parse this page", @@ -404,9 +433,8 @@ def test_parsing_links_to_audio_for_mocked_episodes( """It parses links to audio (if they exist).""" # TODO: Complete test (now it'req_ses simple copy-paste) requests_mock.get(conf.ARCHIVE_URL, text=archive_page_mock) - parsing_result: t.Tuple[t.List[str], ...] = parser.get_archive_parsing_results( - conf.ARCHIVE_URL - ) + parsing_result: t.Tuple[t.List[str], ...] + parsing_result = parser.get_archive_parsing_results(conf.ARCHIVE_URL) all_links: t.List[str] = parsing_result[0] all_texts: t.List[str] = parsing_result[2] session = requests.Session() @@ -422,11 +450,14 @@ def test_parsing_links_to_audio_for_mocked_episodes( assert len(parsed_episodes) > 781 - min_date = datetime.strptime("2009-03-03T03:03:03+02:00", "%Y-%m-%dT%H:%M:%S%z") + min_date = datetime.strptime( + "2009-03-03T03:03:03+02:00", + lep_date_format, + ) mocked_episodes = [ ep for ep in parsed_episodes - if datetime.strptime(ep.__dict__["date"], "%Y-%m-%dT%H:%M:%S%z") > min_date + if datetime.strptime(ep.__dict__["date"], lep_date_format) > min_date ] assert len(mocked_episodes) > 15 @@ -438,7 +469,7 @@ def test_no_appropriate_mp3_links_by_title() -> None: Get Episode - """ + """ # noqa: E501,B950 soup = BeautifulSoup(markup, "lxml") list_of_audio = parser.parse_post_audio(soup) assert len(list_of_audio) == 0 @@ -464,12 +495,12 @@ def test_selecting_appropriate_mp3_links_by_href() -> None: Download episode - """ + """ # noqa: E501,B950 soup = BeautifulSoup(markup, "lxml") list_of_audio = parser.parse_post_audio(soup) assert len(list_of_audio) == 2 assert list_of_audio[0] == [ - "http://traffic.libsyn.com/teacherluke/600._Episode_600_Livestream_Ask_Me_Anything_Audio.mp3", + "http://traffic.libsyn.com/teacherluke/600._Episode_600_Livestream_Ask_Me_Anything_Audio.mp3", # noqa: E501,B950 ] assert list_of_audio[1] == [ "https://audioboom.com/boos/2550583-101-a-note-from-luke.mp3", @@ -483,7 +514,7 @@ def test_appropriate_mp3_link_with_word_audio() -> None: DOWNLOAD AUDIO - """ + """ # noqa: E501,B950 soup = BeautifulSoup(markup, "lxml") list_of_audio = parser.parse_post_audio(soup) assert len(list_of_audio) == 1 @@ -520,11 +551,9 @@ def test_writing_lep_episodes_to_json() -> None: """It creates JSON file from list of LepEpisode objects.""" lep_ep_1 = LepEpisode( 702, - url="https://teacherluke.co.uk/2021/01/25/702-emergency-questions-with-james/", + url="https://teacherluke.co.uk/2021/01/25/702-emergency-questions-with-james/", # noqa: E501,B950 index=2021012501, ) - # lep_ep_2_dict = {"episode": 2, "post_title": "2. Test episode #2"} # type: t.Dict[str, object] - # lep_ep_2 = LepEpisode(**lep_ep_2_dict) lep_ep_2 = LepEpisode(episode=2, post_title="2. Test episode #2") episodes = [ lep_ep_1, @@ -538,7 +567,7 @@ def test_writing_lep_episodes_to_json() -> None: assert len(py_from_json) == 2 assert ( py_from_json[0]["url"] - == "https://teacherluke.co.uk/2021/01/25/702-emergency-questions-with-james/" + == "https://teacherluke.co.uk/2021/01/25/702-emergency-questions-with-james/" # noqa: E501,B950 ) file = Path(temp_file.name) file.unlink() @@ -717,7 +746,10 @@ def test_updating_json_database_with_new_episodes( text=modified_json_less_db_mock, ) - with tempfile.NamedTemporaryFile(prefix="LEP_tmp_", delete=False) as temp_file: + with tempfile.NamedTemporaryFile( + prefix="LEP_tmp_", + delete=False, + ) as temp_file: json_file = Path(temp_file.name) parser.do_parsing_actions(conf.JSON_DB_URL, conf.ARCHIVE_URL, json_file) py_from_json = json.load(temp_file, object_hook=as_lep_episode_obj) @@ -748,8 +780,9 @@ def test_updating_json_database_with_extra_episodes( parser.do_parsing_actions(conf.JSON_DB_URL, conf.ARCHIVE_URL) captured = capsys.readouterr() + expected_message = "Database contains more episodes than current archive!" assert "[WARNING]" in captured.out - assert "Database contains more episodes than current archive!" in captured.out + assert expected_message in captured.out def test_parsing_invalid_html_in_main_actions( @@ -761,8 +794,9 @@ def test_parsing_invalid_html_in_main_actions( requests_mock.get(conf.ARCHIVE_URL, text=markup) parser.do_parsing_actions(conf.JSON_DB_URL, conf.ARCHIVE_URL) captured = capsys.readouterr() + expected_message = "Can't parse any episodes from archive page." assert "[ERROR]" in captured.out - assert "Can't parse any episodes from archive page." in captured.out + assert expected_message in captured.out def test_encoding_non_serializable_json_object() -> None: From 2b491cc6c3ac2c95c499f9aa0c8befdb8170314f Mon Sep 17 00:00:00 2001 From: Artem Hotenov Date: Wed, 17 Nov 2021 11:17:49 +0300 Subject: [PATCH 08/16] chore: :arrow_up: Update typeguard (2.12.1 -> 2.13.0) --- poetry.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/poetry.lock b/poetry.lock index 3f51039..294bceb 100644 --- a/poetry.lock +++ b/poetry.lock @@ -996,7 +996,7 @@ python-versions = "*" [[package]] name = "typeguard" -version = "2.12.1" +version = "2.13.0" description = "Run-time type checker for Python" category = "dev" optional = false @@ -1736,8 +1736,8 @@ typed-ast = [ {file = "typed_ast-1.4.3.tar.gz", hash = "sha256:fb1bbeac803adea29cedd70781399c99138358c26d05fcbd23c13016b7f5ec65"}, ] typeguard = [ - {file = "typeguard-2.12.1-py3-none-any.whl", hash = "sha256:cc15ef2704c9909ef9c80e19c62fb8468c01f75aad12f651922acf4dbe822e02"}, - {file = "typeguard-2.12.1.tar.gz", hash = "sha256:c2af8b9bdd7657f4bd27b45336e7930171aead796711bc4cfc99b4731bb9d051"}, + {file = "typeguard-2.13.0-py3-none-any.whl", hash = "sha256:0bc44d1ff865b522eda969627868b0e001c8329296ce50aededbea03febc79ee"}, + {file = "typeguard-2.13.0.tar.gz", hash = "sha256:04e38f92eb59410c9375d3be23df65e0a7643f2e8bcbd421423d808d2f9e99df"}, ] typing-extensions = [ {file = "typing_extensions-3.10.0.0-py2-none-any.whl", hash = "sha256:0ac0f89795dd19de6b97debb0c6af1c70987fd80a2d62d1958f7e56fcc31b497"}, From 8bac02557eef7bfd873b990134e9660d92bd0ae1 Mon Sep 17 00:00:00 2001 From: Artem Hotenov Date: Wed, 17 Nov 2021 12:24:50 +0300 Subject: [PATCH 09/16] test: Move package's imports to fixtures defenitions (to fix typeguard imort error) --- tests/conftest.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index b5ecf6d..d7ce4ab 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -32,9 +32,6 @@ from requests_mock.request import _RequestObjectProxy from requests_mock.response import _Context as rm_Context -from lep_downloader import config as conf -from lep_downloader import lep - # yapf: disable URL_HTML_MAPPING = { @@ -115,6 +112,8 @@ def html_mocks_path(mocks_dir_path: Path) -> Path: @pytest.fixture(scope="module") def archive_page_mock(mocks_dir_path: Path) -> str: """Returns str object of archive HTML mocked page.""" + from lep_downloader import config as conf + page_path = mocks_dir_path / conf.LOCAL_ARCHIVE_HTML return page_path.read_text(encoding="utf-8") @@ -157,14 +156,18 @@ def _single_page_matcher( @pytest.fixture(scope="session") def json_db_mock(mocks_dir_path: Path) -> str: """Returns str object of JSON mocked database.""" + from lep_downloader import config as conf + json_path = mocks_dir_path / conf.LOCAL_JSON_DB return json_path.read_text(encoding="utf-8") @pytest.fixture -def db_episodes(json_db_mock: str) -> List[lep.LepEpisode]: +def db_episodes(json_db_mock: str) -> List[object]: """Returns reusable list of LepEpisode objects from JSON mocked database.""" - db_episodes: List[lep.LepEpisode] = json.loads( + from lep_downloader import lep + + db_episodes: List[object] = json.loads( json_db_mock, object_hook=lep.as_lep_episode_obj, ) @@ -172,8 +175,10 @@ def db_episodes(json_db_mock: str) -> List[lep.LepEpisode]: @pytest.fixture -def modified_json_less_db_mock(db_episodes: List[lep.LepEpisode]) -> str: +def modified_json_less_db_mock(db_episodes: List[object]) -> str: """Returns mocked JSON database with less episodes.""" + from lep_downloader import lep + # Delete three episodes del db_episodes[0] del db_episodes[1] @@ -184,8 +189,10 @@ def modified_json_less_db_mock(db_episodes: List[lep.LepEpisode]) -> str: @pytest.fixture -def modified_json_extra_db_mock(db_episodes: List[lep.LepEpisode]) -> str: +def modified_json_extra_db_mock(db_episodes: List[object]) -> str: """Returns mocked JSON database with plus one episode.""" + from lep_downloader import lep + lep_ep = lep.LepEpisode(episode=999, post_title="Extra episode") db_episodes.append(lep_ep) # Add extra episode modified_json = json.dumps(db_episodes, cls=lep.LepJsonEncoder) From 0fab7188068987dc974a2c4c84f3b8aa0a46ed92 Mon Sep 17 00:00:00 2001 From: Artem Hotenov Date: Fri, 19 Nov 2021 12:10:23 +0300 Subject: [PATCH 10/16] test(parser): :recycle: Add fixtures with parsing mocked results --- tests/conftest.py | 64 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/tests/conftest.py b/tests/conftest.py index d7ce4ab..4dd1d81 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -21,14 +21,19 @@ # SOFTWARE. """Package-wide test fixtures.""" import json +from datetime import datetime from pathlib import Path +from typing import Any from typing import Callable from typing import Dict from typing import List from typing import Optional +from typing import Tuple import pytest import requests +import requests_mock as req_mock +from requests_mock.mocker import Mocker as rm_Mocker from requests_mock.request import _RequestObjectProxy from requests_mock.response import _Context as rm_Context @@ -198,3 +203,62 @@ def modified_json_extra_db_mock(db_episodes: List[object]) -> str: modified_json = json.dumps(db_episodes, cls=lep.LepJsonEncoder) del db_episodes return modified_json + + +@pytest.fixture +def archive_parsing_results_mock( + requests_mock: rm_Mocker, + archive_page_mock: str, +) -> Tuple[List[str], List[str]]: + """Returns two lists: links and texts from mocked archive page.""" + from lep_downloader import config as conf + from lep_downloader import parser + + requests_mock.get(conf.ARCHIVE_URL, text=archive_page_mock) + parsing_result: Tuple[List[str], ...] + parsing_result = parser.get_archive_parsing_results(conf.ARCHIVE_URL) + all_links: List[str] = parsing_result[0] + all_texts: List[str] = parsing_result[2] + return all_links, all_texts + + +@pytest.fixture +def parsed_episodes_mock( + requests_mock: rm_Mocker, + archive_parsing_results_mock: Tuple[List[str], List[str]], + single_page_mock: str, + single_page_matcher: Optional[Callable[[_RequestObjectProxy], bool]], + req_ses: requests.Session, +) -> List[Any]: + """Returns list of LepEpisode objects. + + Mocked episodes among others with correct post date. + """ + from lep_downloader import parser + + all_links, all_texts = archive_parsing_results_mock + requests_mock.get( + req_mock.ANY, + additional_matcher=single_page_matcher, + text=single_page_mock, + ) + parsed_episodes = parser.get_parsed_episodes(all_links, req_ses, all_texts) + return parsed_episodes + + +@pytest.fixture +def mocked_episodes( + parsed_episodes_mock: List[Any], +) -> List[Any]: + """Only episodes which have HTML mock page.""" + lep_date_format = "%Y-%m-%dT%H:%M:%S%z" + min_date = datetime.strptime( + "2009-03-03T03:03:03+02:00", + lep_date_format, + ) + mocked_episodes = [ + ep + for ep in parsed_episodes_mock + if datetime.strptime(ep.__dict__["date"], lep_date_format) > min_date + ] + return mocked_episodes From a652afa98aaae39304d56c6b5f1f85458d066cd1 Mon Sep 17 00:00:00 2001 From: Artem Hotenov Date: Fri, 19 Nov 2021 12:11:43 +0300 Subject: [PATCH 11/16] test(parser): :white_check_mark: Update parsing archive tests with new fixtures --- tests/test_parser.py | 110 ++++++++----------------------------------- 1 file changed, 19 insertions(+), 91 deletions(-) diff --git a/tests/test_parser.py b/tests/test_parser.py index d86ab69..7758d29 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -23,11 +23,11 @@ import json import tempfile import typing as t -from datetime import datetime from pathlib import Path from typing import Callable from typing import List from typing import Optional +from typing import Tuple import pytest import requests @@ -254,16 +254,12 @@ def test_short_links_substitution() -> None: assert replaced == expected -def test_parsing_result( - requests_mock: rm_Mocker, - archive_page_mock: str, +def test_parsing_posts_from_archive_page( + archive_parsing_results_mock: Tuple[List[str], List[str]], mocked_urls: List[str], ) -> None: - """It parses mocked archived page.""" - requests_mock.get(conf.ARCHIVE_URL, text=archive_page_mock) - parsing_result = parser.get_archive_parsing_results(conf.ARCHIVE_URL) - all_links = parsing_result[0] - all_texts = parsing_result[2] + """It parses links and texts from mocked archived page.""" + all_links, all_texts = archive_parsing_results_mock assert len(all_links) == len(all_texts) assert len(all_links) > 781 assert "/2009/04/12/episode-1-introduction" in all_links[-1] @@ -271,9 +267,6 @@ def test_parsing_result( intersection = set(mocked_urls) & set(all_links) assert len(intersection) > 15 - link_strings = parsing_result[2] - assert len(link_strings) > 781 - def test_parsing_invalid_html(requests_mock: rm_Mocker) -> None: """It returns None if page does not comply with the parsing rules.""" @@ -312,54 +305,11 @@ def test_parsing_archive_with_known_duplicates() -> None: assert len(texts) == 0 -def test_mocking_single_page( - requests_mock: rm_Mocker, - archive_page_mock: str, - single_page_matcher: Optional[Callable[[_RequestObjectProxy], bool]], - single_page_mock: str, +def test_parsing_all_episodes_from_mocked_archive( + parsed_episodes_mock: List[lep.LepEpisode], ) -> None: - """It parses mocked episode page.""" - requests_mock.get(conf.ARCHIVE_URL, text=archive_page_mock) - parsing_result: t.Tuple[t.List[str], ...] - parsing_result = parser.get_archive_parsing_results(conf.ARCHIVE_URL) - all_links: t.List[str] = parsing_result[0] - all_texts: t.List[str] = parsing_result[2] - session = requests.Session() - parsed_episodes = [] - - requests_mock.get( - req_mock.ANY, - additional_matcher=single_page_matcher, - text=single_page_mock, - ) - - parsed_episodes = parser.get_parsed_episodes(all_links, session, all_texts) - - non_episode_list = parser.get_parsed_episodes( - ["https://teacherluke.co.uk/premium/archive-comment-section/"], - session, - ["Non-episode link"], - ) - assert len(non_episode_list) == 0 - - assert len(parsed_episodes) > 781 - - min_date = datetime.strptime( - "2009-03-03T03:03:03+02:00", - lep_date_format, - ) - mocked_episodes = [ - ep - for ep in parsed_episodes - if datetime.strptime(ep.__dict__["date"], lep_date_format) > min_date - ] - assert len(mocked_episodes) > 15 - - sorted_episodes = parser.sort_episodes_by_post_date(parsed_episodes) - assert ( - sorted_episodes[0].__dict__["url"] - == "https://teacherluke.co.uk/2021/08/03/733-a-summer-ramble/" - ) + """It parses all episodes from mocked archive HTML.""" + assert len(parsed_episodes_mock) == 786 def test_parsing_post_datetime() -> None: @@ -425,41 +375,19 @@ def test_parsing_non_episode_link( def test_parsing_links_to_audio_for_mocked_episodes( - requests_mock: rm_Mocker, - archive_page_mock: str, - single_page_matcher: Optional[Callable[[_RequestObjectProxy], bool]], - single_page_mock: str, + mocked_episodes: List[lep.LepEpisode], ) -> None: """It parses links to audio (if they exist).""" - # TODO: Complete test (now it'req_ses simple copy-paste) - requests_mock.get(conf.ARCHIVE_URL, text=archive_page_mock) - parsing_result: t.Tuple[t.List[str], ...] - parsing_result = parser.get_archive_parsing_results(conf.ARCHIVE_URL) - all_links: t.List[str] = parsing_result[0] - all_texts: t.List[str] = parsing_result[2] - session = requests.Session() - parsed_episodes = [] - - requests_mock.get( - req_mock.ANY, - additional_matcher=single_page_matcher, - text=single_page_mock, - ) - - parsed_episodes = parser.get_parsed_episodes(all_links, session, all_texts) - - assert len(parsed_episodes) > 781 - - min_date = datetime.strptime( - "2009-03-03T03:03:03+02:00", - lep_date_format, - ) - mocked_episodes = [ - ep - for ep in parsed_episodes - if datetime.strptime(ep.__dict__["date"], lep_date_format) > min_date + assert len(mocked_episodes) == 17 + assert mocked_episodes[3].episode == 35 + assert mocked_episodes[3].audios == [ + [ + "http://traffic.libsyn.com/teacherluke/36-london-video-interviews-pt-1-audio-only.mp3" # noqa: E501,B950 + ] ] - assert len(mocked_episodes) > 15 + assert mocked_episodes[12].audios == [] + if mocked_episodes[10].audios is not None: + assert len(mocked_episodes[10].audios) == 5 def test_no_appropriate_mp3_links_by_title() -> None: From 6e68f5f9aa3c799b715411aa1762e688e989939b Mon Sep 17 00:00:00 2001 From: Artem Hotenov Date: Sat, 20 Nov 2021 13:33:56 +0300 Subject: [PATCH 12/16] test(parser): :recycle: Add fixture with temp direcory for pytest session Update tests with temp file in 'test_parser.py' --- tests/conftest.py | 15 +++++++++++++++ tests/test_parser.py | 38 ++++++++++++++++---------------------- 2 files changed, 31 insertions(+), 22 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 4dd1d81..09a84f1 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -21,11 +21,13 @@ # SOFTWARE. """Package-wide test fixtures.""" import json +import shutil from datetime import datetime from pathlib import Path from typing import Any from typing import Callable from typing import Dict +from typing import Iterator from typing import List from typing import Optional from typing import Tuple @@ -33,6 +35,7 @@ import pytest import requests import requests_mock as req_mock +from pytest import TempPathFactory from requests_mock.mocker import Mocker as rm_Mocker from requests_mock.request import _RequestObjectProxy from requests_mock.response import _Context as rm_Context @@ -262,3 +265,15 @@ def mocked_episodes( if datetime.strptime(ep.__dict__["date"], lep_date_format) > min_date ] return mocked_episodes + + +@pytest.fixture(scope="session") +def lep_temp_path(tmp_path_factory: TempPathFactory) -> Iterator[Path]: + """Returns path to custom temp directory.""" + temp_path = tmp_path_factory.mktemp("lep_tmp") + yield temp_path + # Cleanup all folders in fixture's base temp directory + shutil.rmtree(tmp_path_factory.getbasetemp().resolve()) + # If we clean base temp directory itself + # 'typeguard' will warn on Windows hosts + # tmp_path_factory.getbasetemp().parent.rmdir() diff --git a/tests/test_parser.py b/tests/test_parser.py index 7758d29..24e6640 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -21,7 +21,6 @@ # SOFTWARE. """Test cases for the parser module.""" import json -import tempfile import typing as t from pathlib import Path from typing import Callable @@ -475,7 +474,7 @@ def test_episodes_sorting_by_date() -> None: assert sorted_episodes == expected_sorted -def test_writing_lep_episodes_to_json() -> None: +def test_writing_lep_episodes_to_json(lep_temp_path: Path) -> None: """It creates JSON file from list of LepEpisode objects.""" lep_ep_1 = LepEpisode( 702, @@ -487,18 +486,16 @@ def test_writing_lep_episodes_to_json() -> None: lep_ep_1, lep_ep_2, ] - file = Path() - with tempfile.NamedTemporaryFile(delete=False) as temp_file: - json_file = Path(temp_file.name) - parser.write_parsed_episodes_to_json(episodes, json_file) - py_from_json = json.load(temp_file) - assert len(py_from_json) == 2 - assert ( - py_from_json[0]["url"] - == "https://teacherluke.co.uk/2021/01/25/702-emergency-questions-with-james/" # noqa: E501,B950 - ) - file = Path(temp_file.name) - file.unlink() + + json_file = lep_temp_path / "json_db_tmp.json" + parser.write_parsed_episodes_to_json(episodes, json_file) + with open(json_file, "rb") as f: + py_from_json = json.load(f) + assert len(py_from_json) == 2 + assert ( + py_from_json[0]["url"] + == "https://teacherluke.co.uk/2021/01/25/702-emergency-questions-with-james/" # noqa: E501,B950 + ) def test_no_new_episodes_on_archive_vs_json_db( @@ -661,6 +658,7 @@ def test_updating_json_database_with_new_episodes( single_page_matcher: Optional[Callable[[_RequestObjectProxy], bool]], single_page_mock: str, modified_json_less_db_mock: str, + lep_temp_path: Path, ) -> None: """It retrives and saves new episodes from archive.""" requests_mock.get(conf.ARCHIVE_URL, text=archive_page_mock) @@ -674,14 +672,10 @@ def test_updating_json_database_with_new_episodes( text=modified_json_less_db_mock, ) - with tempfile.NamedTemporaryFile( - prefix="LEP_tmp_", - delete=False, - ) as temp_file: - json_file = Path(temp_file.name) - parser.do_parsing_actions(conf.JSON_DB_URL, conf.ARCHIVE_URL, json_file) - py_from_json = json.load(temp_file, object_hook=as_lep_episode_obj) - json_file.unlink() + json_file = lep_temp_path / "json_db_tmp.json" + parser.do_parsing_actions(conf.JSON_DB_URL, conf.ARCHIVE_URL, json_file) + with open(json_file, "rb") as f: + py_from_json = json.load(f, object_hook=as_lep_episode_obj) assert len(py_from_json) == 786 From 00ca932fc6aaf9e6138052bd6a1f0ee110075131 Mon Sep 17 00:00:00 2001 From: Artem Hotenov Date: Sat, 20 Nov 2021 15:57:38 +0300 Subject: [PATCH 13/16] test(downloader): :recycle: Update tests with built-in 'tmp_path' fixture --- tests/test_downloader.py | 137 ++++++++++++++++++++------------------- 1 file changed, 69 insertions(+), 68 deletions(-) diff --git a/tests/test_downloader.py b/tests/test_downloader.py index 88ca9c3..179b33d 100644 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -20,7 +20,6 @@ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. """Test cases for the downloader module.""" -import tempfile from pathlib import Path from typing import List from typing import Tuple @@ -108,7 +107,9 @@ def test_forming_safe_filename_for_downloading() -> None: assert audio_links[9] == excepted_link -def test_separating_existing_and_non_existing_mp3() -> None: +def test_separating_existing_and_non_existing_mp3( + tmp_path: Path, +) -> None: """It detects when file has already been downloaded.""" audio_episodes = downloader.select_all_audio_episodes(MOCKED_DB_EPISODES) audio_data = downloader.get_audios_data(audio_episodes) @@ -116,15 +117,13 @@ def test_separating_existing_and_non_existing_mp3() -> None: filename_1 = "[2021-08-03] # 733. A Summer Ramble.mp3" filename_2 = "[2017-03-11] # LEP on ZEP – My recent interview on Zdenek’s English Podcast [Part 05].mp3" - with tempfile.TemporaryDirectory(prefix="LEP_save_") as temp_dir: - save_tmp_dir = Path(temp_dir) - Path(save_tmp_dir / filename_1).write_text("Here are mp3 1 bytes") - Path(save_tmp_dir / filename_2).write_text("Here are mp3 2 bytes") + Path(tmp_path / filename_1).write_text("Here are mp3 1 bytes") + Path(tmp_path / filename_2).write_text("Here are mp3 2 bytes") - existing, non_existing = downloader.detect_existing_files( - audio_links, - save_tmp_dir, - ) + existing, non_existing = downloader.detect_existing_files( + audio_links, + tmp_path, + ) assert len(existing) == 2 assert len(non_existing) == 17 @@ -152,7 +151,10 @@ def test_retrieving_audios_as_none() -> None: assert audio_data[0][2] == [] -def test_downloading_mocked_mp3_files(requests_mock: rm_Mocker) -> None: +def test_downloading_mocked_mp3_files( + requests_mock: rm_Mocker, + tmp_path: Path, +) -> None: """It downloads file on disc.""" test_downloads: List[Tuple[str, List[str]]] = [] file_1 = ( @@ -178,19 +180,20 @@ def test_downloading_mocked_mp3_files(requests_mock: rm_Mocker) -> None: content=mocked_file_2.read_bytes(), ) - with tempfile.TemporaryDirectory(prefix="LEP_save_") as temp_dir: - save_tmp_dir = Path(temp_dir) - downloader.download_files(test_downloads, save_tmp_dir) - expected_file_1 = Path(save_tmp_dir / "Test File #1.mp3") - expected_file_2 = Path(save_tmp_dir / "Test File #2.mp3") - assert expected_file_1.exists() - assert 21460 < expected_file_1.stat().st_size < 22000 - assert expected_file_2.exists() - assert 18300 < expected_file_2.stat().st_size < 18350 - assert len(downloader.successful_downloaded) == 2 + downloader.download_files(test_downloads, tmp_path) + expected_file_1 = tmp_path / "Test File #1.mp3" + expected_file_2 = tmp_path / "Test File #2.mp3" + assert expected_file_1.exists() + assert 21460 < expected_file_1.stat().st_size < 22000 + assert expected_file_2.exists() + assert 18300 < expected_file_2.stat().st_size < 18350 + assert len(downloader.successful_downloaded) == 2 -def test_skipping_downloaded_url(requests_mock: rm_Mocker) -> None: +def test_skipping_downloaded_url( + requests_mock: rm_Mocker, + tmp_path: Path, +) -> None: """It skips URL if it was downloaded before.""" test_downloads: List[Tuple[str, List[str]]] = [] file_1 = ( @@ -220,16 +223,17 @@ def test_skipping_downloaded_url(requests_mock: rm_Mocker) -> None: content=mocked_file_2.read_bytes(), ) - with tempfile.TemporaryDirectory(prefix="LEP_save_") as temp_dir: - save_tmp_dir = Path(temp_dir) - downloader.download_files(test_downloads, save_tmp_dir) - expected_file_1 = Path(save_tmp_dir / "Test File #1.mp3") - assert expected_file_1.exists() - assert len(list(save_tmp_dir.iterdir())) == 1 - assert len(downloader.duplicated_links) == 1 + downloader.download_files(test_downloads, tmp_path) + expected_file_1 = tmp_path / "Test File #1.mp3" + assert expected_file_1.exists() + assert len(list(tmp_path.iterdir())) == 1 + assert len(downloader.duplicated_links) == 1 -def test_skipping_downloaded_file_on_disc(requests_mock: rm_Mocker) -> None: +def test_skipping_downloaded_file_on_disc( + requests_mock: rm_Mocker, + tmp_path: Path, +) -> None: """It skips (and does not override) URL if file was downloaded before.""" downloader.successful_downloaded = {} # Clear from previous tests test_downloads: List[Tuple[str, List[str]]] = [] @@ -258,19 +262,20 @@ def test_skipping_downloaded_file_on_disc(requests_mock: rm_Mocker) -> None: content=mocked_file_2.read_bytes(), ) - with tempfile.TemporaryDirectory(prefix="LEP_save_") as temp_dir: - save_tmp_dir = Path(temp_dir) - existing_file_1 = Path(save_tmp_dir / "Test File #1.mp3") - existing_file_1.write_text("Here are mp3 1 bytes") - downloader.download_files(test_downloads, save_tmp_dir) - expected_file_2 = Path(save_tmp_dir / "Test File #2.mp3") - assert existing_file_1.read_text() == "Here are mp3 1 bytes" - assert expected_file_2.exists() - assert len(list(save_tmp_dir.iterdir())) == 2 - assert len(downloader.already_on_disc) == 1 + existing_file_1 = tmp_path / "Test File #1.mp3" + existing_file_1.write_text("Here are mp3 1 bytes") + downloader.download_files(test_downloads, tmp_path) + expected_file_2 = tmp_path / "Test File #2.mp3" + assert existing_file_1.read_text() == "Here are mp3 1 bytes" + assert expected_file_2.exists() + assert len(list(tmp_path.iterdir())) == 2 + assert len(downloader.already_on_disc) == 1 -def test_try_auxiliary_download_links(requests_mock: rm_Mocker) -> None: +def test_try_auxiliary_download_links( + requests_mock: rm_Mocker, + tmp_path: Path, +) -> None: """It downloads file by auxiliary link.""" downloader.successful_downloaded = {} # Clear from previous tests test_downloads: List[Tuple[str, List[str]]] = [] @@ -301,17 +306,16 @@ def test_try_auxiliary_download_links(requests_mock: rm_Mocker) -> None: content=mocked_file_1.read_bytes(), ) - with tempfile.TemporaryDirectory(prefix="LEP_save_") as temp_dir: - save_tmp_dir = Path(temp_dir) - downloader.download_files(test_downloads, save_tmp_dir) - expected_file_1 = Path(save_tmp_dir / "Test File #1.mp3") - assert expected_file_1.exists() - assert len(list(save_tmp_dir.iterdir())) == 1 - assert len(downloader.successful_downloaded) == 1 + downloader.download_files(test_downloads, tmp_path) + expected_file_1 = tmp_path / "Test File #1.mp3" + assert expected_file_1.exists() + assert len(list(tmp_path.iterdir())) == 1 + assert len(downloader.successful_downloaded) == 1 def test_primary_link_unavailable( requests_mock: rm_Mocker, + tmp_path: Path, capsys: CaptureFixture[str], ) -> None: """It records unavailable file and prints about that.""" @@ -331,21 +335,20 @@ def test_primary_link_unavailable( exc=Exception("Something wrong!"), ) - with tempfile.TemporaryDirectory(prefix="LEP_save_") as temp_dir: - save_tmp_dir = Path(temp_dir) - downloader.download_files(test_downloads, save_tmp_dir) - captured = capsys.readouterr() - assert len(list(save_tmp_dir.iterdir())) == 0 - assert len(downloader.successful_downloaded) == 0 - assert len(downloader.unavailable_links) == 1 - assert "[ERROR]: Unknown error:" in captured.out - assert "Something wrong!" in captured.out - assert "[INFO]: Can't download:" in captured.out - assert "Test File #1.mp3" in captured.out + downloader.download_files(test_downloads, tmp_path) + captured = capsys.readouterr() + assert len(list(tmp_path.iterdir())) == 0 + assert len(downloader.successful_downloaded) == 0 + assert len(downloader.unavailable_links) == 1 + assert "[ERROR]: Unknown error:" in captured.out + assert "Something wrong!" in captured.out + assert "[INFO]: Can't download:" in captured.out + assert "Test File #1.mp3" in captured.out def test_both_primary_and_auxiliary_links_404( requests_mock: rm_Mocker, + tmp_path: Path, capsys: CaptureFixture[str], ) -> None: """It records unavailable files and prints about that.""" @@ -372,12 +375,10 @@ def test_both_primary_and_auxiliary_links_404( status_code=404, ) - with tempfile.TemporaryDirectory(prefix="LEP_save_") as temp_dir: - save_tmp_dir = Path(temp_dir) - downloader.download_files(test_downloads, save_tmp_dir) - captured = capsys.readouterr() - assert len(list(save_tmp_dir.iterdir())) == 0 - assert len(downloader.successful_downloaded) == 0 - assert len(downloader.unavailable_links) == 1 - assert "[INFO]: Can't download:" in captured.out - assert "Test File #1.mp3" in captured.out + downloader.download_files(test_downloads, tmp_path) + captured = capsys.readouterr() + assert len(list(tmp_path.iterdir())) == 0 + assert len(downloader.successful_downloaded) == 0 + assert len(downloader.unavailable_links) == 1 + assert "[INFO]: Can't download:" in captured.out + assert "Test File #1.mp3" in captured.out From 29ce393a57424d6737a3be8de3261dde5cd29e0f Mon Sep 17 00:00:00 2001 From: Artem Hotenov Date: Sun, 21 Nov 2021 10:57:05 +0300 Subject: [PATCH 14/16] test(downloader): :recycle: Add fixtures to mock mp3 files and update tests with them Add ignoring tag for URL lines exceeding the limit =80 --- tests/conftest.py | 21 +++++++++ tests/test_downloader.py | 92 ++++++++++++++++++++-------------------- 2 files changed, 68 insertions(+), 45 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 09a84f1..db4b951 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -277,3 +277,24 @@ def lep_temp_path(tmp_path_factory: TempPathFactory) -> Iterator[Path]: # If we clean base temp directory itself # 'typeguard' will warn on Windows hosts # tmp_path_factory.getbasetemp().parent.rmdir() + + +@pytest.fixture(scope="module") +def mp3_mocks_path(mocks_dir_path: Path) -> Path: + """Returns path to 'mp3' sub-direcory of mocks.""" + mp3_dir = mocks_dir_path / "mp3" + return mp3_dir + + +@pytest.fixture(scope="module") +def mp3_file1_mock(mp3_mocks_path: Path) -> bytes: + """Returns bytes of the first mocked mp3 file.""" + mocked_file_1 = mp3_mocks_path / "test_lep_audio1.mp3" + return mocked_file_1.read_bytes() + + +@pytest.fixture(scope="module") +def mp3_file2_mock(mp3_mocks_path: Path) -> bytes: + """Returns bytes of the second mocked mp3 file.""" + mocked_file_2 = mp3_mocks_path / "test_lep_audio2.mp3" + return mocked_file_2.read_bytes() diff --git a/tests/test_downloader.py b/tests/test_downloader.py index 179b33d..ce0185b 100644 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -53,10 +53,10 @@ def test_extracting_audio_data() -> None: audio_episodes = downloader.select_all_audio_episodes(MOCKED_DB_EPISODES) expected_ep = ( "2009-10-19", - "15. Extra Podcast – 12 Phrasal Verbs", # dash as Unicode character here. + "15. Extra Podcast – 12 Phrasal Verbs", # noqa: E501,B950 # dash as Unicode character here. [ [ - "http://traffic.libsyn.com/teacherluke/15-extra-podcast-12-phrasal-verbs.mp3" + "http://traffic.libsyn.com/teacherluke/15-extra-podcast-12-phrasal-verbs.mp3" # noqa: E501,B950 ] ], False, @@ -71,9 +71,9 @@ def test_forming_multipart_download_links() -> None: audio_data = downloader.get_audios_data(audio_episodes) audio_links = downloader.bind_name_and_file_url(audio_data) excepted_link = ( - "[2017-03-11] # LEP on ZEP – My recent interview on Zdenek’s English Podcast [Part 02]", + "[2017-03-11] # LEP on ZEP – My recent interview on Zdenek’s English Podcast [Part 02]", # noqa: E501,B950 [ - "https://audioboom.com/posts/5621870-episode-167-luke-back-on-zep-part-2.mp3", + "https://audioboom.com/posts/5621870-episode-167-luke-back-on-zep-part-2.mp3", # noqa: E501,B950 ], ) assert audio_links[11] == excepted_link @@ -87,7 +87,7 @@ def test_forming_numbered_download_link() -> None: excepted_link = ( "[2021-02-03] # 703. Walaa from Syria – WISBOLEP Competition Winner", [ - "https://traffic.libsyn.com/secure/teacherluke/703._Walaa_from_Syria_-_WISBOLEP_Competition_Winner_.mp3", + "https://traffic.libsyn.com/secure/teacherluke/703._Walaa_from_Syria_-_WISBOLEP_Competition_Winner_.mp3", # noqa: E501,B950 ], ) assert audio_links[15] == excepted_link @@ -99,9 +99,9 @@ def test_forming_safe_filename_for_downloading() -> None: audio_data = downloader.get_audios_data(audio_episodes) audio_links = downloader.bind_name_and_file_url(audio_data) excepted_link = ( - "[2016-08-07] # 370. In Conversation with Rob Ager from Liverpool (PART 1_ Life in Liverpool _ Interest in Film Analysis)", + "[2016-08-07] # 370. In Conversation with Rob Ager from Liverpool (PART 1_ Life in Liverpool _ Interest in Film Analysis)", # noqa: E501,B950 [ - "http://traffic.libsyn.com/teacherluke/370-in-conversation-with-rob-ager-from-liverpool-part-1-life-in-liverpool-interest-in-film-analysis.mp3", + "http://traffic.libsyn.com/teacherluke/370-in-conversation-with-rob-ager-from-liverpool-part-1-life-in-liverpool-interest-in-film-analysis.mp3", # noqa: E501,B950 ], ) assert audio_links[9] == excepted_link @@ -116,7 +116,7 @@ def test_separating_existing_and_non_existing_mp3( audio_links = downloader.bind_name_and_file_url(audio_data) filename_1 = "[2021-08-03] # 733. A Summer Ramble.mp3" - filename_2 = "[2017-03-11] # LEP on ZEP – My recent interview on Zdenek’s English Podcast [Part 05].mp3" + filename_2 = "[2017-03-11] # LEP on ZEP – My recent interview on Zdenek’s English Podcast [Part 05].mp3" # noqa: E501,B950 Path(tmp_path / filename_1).write_text("Here are mp3 1 bytes") Path(tmp_path / filename_2).write_text("Here are mp3 2 bytes") @@ -145,7 +145,7 @@ def test_retrieving_audios_as_none() -> None: "admin_note": "Edge case - null in 'audios'" } ] - """ + """ # noqa: E501,B950 db_episodes = get_list_of_valid_episodes(json_test) audio_data = downloader.get_audios_data(db_episodes) assert audio_data[0][2] == [] @@ -153,31 +153,34 @@ def test_retrieving_audios_as_none() -> None: def test_downloading_mocked_mp3_files( requests_mock: rm_Mocker, + mp3_file1_mock: bytes, + mp3_file2_mock: bytes, tmp_path: Path, ) -> None: """It downloads file on disc.""" test_downloads: List[Tuple[str, List[str]]] = [] file_1 = ( "Test File #1", - ["https://traffic.libsyn.com/secure/teacherluke/733._A_Summer_Ramble.mp3"], + [ + "https://traffic.libsyn.com/secure/teacherluke/733._A_Summer_Ramble.mp3" # noqa: E501,B950 + ], ) file_2 = ( "Test File #2", - ["https://audioboom.com/posts/5678762-episode-169-luke-back-on-zep-part-4.mp3"], + [ + "https://audioboom.com/posts/5678762-episode-169-luke-back-on-zep-part-4.mp3" # noqa: E501,B950 + ], ) test_downloads.append(file_1) test_downloads.append(file_2) - mocked_file_1 = OFFLINE_HTML_DIR / "mp3" / "test_lep_audio1.mp3" requests_mock.get( - "https://traffic.libsyn.com/secure/teacherluke/733._A_Summer_Ramble.mp3", - content=mocked_file_1.read_bytes(), + "https://traffic.libsyn.com/secure/teacherluke/733._A_Summer_Ramble.mp3", # noqa: E501,B950 + content=mp3_file1_mock, ) - - mocked_file_2 = OFFLINE_HTML_DIR / "mp3" / "test_lep_audio2.mp3" requests_mock.get( - "https://audioboom.com/posts/5678762-episode-169-luke-back-on-zep-part-4.mp3", - content=mocked_file_2.read_bytes(), + "https://audioboom.com/posts/5678762-episode-169-luke-back-on-zep-part-4.mp3", # noqa: E501,B950 + content=mp3_file2_mock, ) downloader.download_files(test_downloads, tmp_path) @@ -192,6 +195,8 @@ def test_downloading_mocked_mp3_files( def test_skipping_downloaded_url( requests_mock: rm_Mocker, + mp3_file1_mock: bytes, + mp3_file2_mock: bytes, tmp_path: Path, ) -> None: """It skips URL if it was downloaded before.""" @@ -199,28 +204,25 @@ def test_skipping_downloaded_url( file_1 = ( "Test File #1", [ - "http://traffic.libsyn.com/teacherluke/36-london-video-interviews-pt-1-audio-only.mp3" + "http://traffic.libsyn.com/teacherluke/36-london-video-interviews-pt-1-audio-only.mp3" # noqa: E501,B950 ], ) file_2 = ( "Test File #2", [ - "http://traffic.libsyn.com/teacherluke/36-london-video-interviews-pt-1-audio-only.mp3" + "http://traffic.libsyn.com/teacherluke/36-london-video-interviews-pt-1-audio-only.mp3" # noqa: E501,B950 ], ) test_downloads.append(file_1) test_downloads.append(file_2) - mocked_file_1 = OFFLINE_HTML_DIR / "mp3" / "test_lep_audio1.mp3" requests_mock.get( - "http://traffic.libsyn.com/teacherluke/36-london-video-interviews-pt-1-audio-only.mp3", - content=mocked_file_1.read_bytes(), + "http://traffic.libsyn.com/teacherluke/36-london-video-interviews-pt-1-audio-only.mp3", # noqa: E501,B950 + content=mp3_file1_mock, ) - - mocked_file_2 = OFFLINE_HTML_DIR / "mp3" / "test_lep_audio2.mp3" requests_mock.get( - "http://traffic.libsyn.com/teacherluke/36-london-video-interviews-pt-1-audio-only.mp3", - content=mocked_file_2.read_bytes(), + "http://traffic.libsyn.com/teacherluke/36-london-video-interviews-pt-1-audio-only.mp3", # noqa: E501,B950 + content=mp3_file2_mock, ) downloader.download_files(test_downloads, tmp_path) @@ -232,6 +234,8 @@ def test_skipping_downloaded_url( def test_skipping_downloaded_file_on_disc( requests_mock: rm_Mocker, + mp3_file1_mock: bytes, + mp3_file2_mock: bytes, tmp_path: Path, ) -> None: """It skips (and does not override) URL if file was downloaded before.""" @@ -240,26 +244,25 @@ def test_skipping_downloaded_file_on_disc( file_1 = ( "Test File #1", [ - "http://traffic.libsyn.com/teacherluke/36-london-video-interviews-pt-1-audio-only.mp3" + "http://traffic.libsyn.com/teacherluke/36-london-video-interviews-pt-1-audio-only.mp3" # noqa: E501,B950 ], ) file_2 = ( "Test File #2", - ["https://traffic.libsyn.com/secure/teacherluke/733._A_Summer_Ramble.mp3"], + [ + "https://traffic.libsyn.com/secure/teacherluke/733._A_Summer_Ramble.mp3" # noqa: E501,B950 + ], ) test_downloads.append(file_1) test_downloads.append(file_2) - mocked_file_1 = OFFLINE_HTML_DIR / "mp3" / "test_lep_audio1.mp3" requests_mock.get( - "http://traffic.libsyn.com/teacherluke/36-london-video-interviews-pt-1-audio-only.mp3", - content=mocked_file_1.read_bytes(), + "http://traffic.libsyn.com/teacherluke/36-london-video-interviews-pt-1-audio-only.mp3", # noqa: E501,B950 + content=mp3_file1_mock, ) - - mocked_file_2 = OFFLINE_HTML_DIR / "mp3" / "test_lep_audio2.mp3" requests_mock.get( - "https://traffic.libsyn.com/secure/teacherluke/733._A_Summer_Ramble.mp3", - content=mocked_file_2.read_bytes(), + "https://traffic.libsyn.com/secure/teacherluke/733._A_Summer_Ramble.mp3", # noqa: E501,B950 + content=mp3_file2_mock, ) existing_file_1 = tmp_path / "Test File #1.mp3" @@ -274,6 +277,7 @@ def test_skipping_downloaded_file_on_disc( def test_try_auxiliary_download_links( requests_mock: rm_Mocker, + mp3_file1_mock: bytes, tmp_path: Path, ) -> None: """It downloads file by auxiliary link.""" @@ -282,17 +286,15 @@ def test_try_auxiliary_download_links( file_1 = ( "Test File #1", [ - "https://traffic.libsyn.com/secure/teacherluke/733._A_Summer_Ramble.mp3", + "https://traffic.libsyn.com/secure/teacherluke/733._A_Summer_Ramble.mp3", # noqa: E501,B950 "https://hotenov.com/d/lep/some_auxiliary_1.mp3", "https://hotenov.com/d/lep/some_auxiliary_2.mp3", ], ) test_downloads.append(file_1) - mocked_file_1 = OFFLINE_HTML_DIR / "mp3" / "test_lep_audio1.mp3" - requests_mock.get( - "https://traffic.libsyn.com/secure/teacherluke/733._A_Summer_Ramble.mp3", + "https://traffic.libsyn.com/secure/teacherluke/733._A_Summer_Ramble.mp3", # noqa: E501,B950 text="Response not OK", status_code=404, ) @@ -303,7 +305,7 @@ def test_try_auxiliary_download_links( ) requests_mock.get( "https://hotenov.com/d/lep/some_auxiliary_2.mp3", - content=mocked_file_1.read_bytes(), + content=mp3_file1_mock, ) downloader.download_files(test_downloads, tmp_path) @@ -325,13 +327,13 @@ def test_primary_link_unavailable( file_1 = ( "Test File #1", [ - "https://traffic.libsyn.com/secure/teacherluke/733._A_Summer_Ramble.mp3", + "https://traffic.libsyn.com/secure/teacherluke/733._A_Summer_Ramble.mp3", # noqa: E501,B950 ], ) test_downloads.append(file_1) requests_mock.get( - "https://traffic.libsyn.com/secure/teacherluke/733._A_Summer_Ramble.mp3", + "https://traffic.libsyn.com/secure/teacherluke/733._A_Summer_Ramble.mp3", # noqa: E501,B950 exc=Exception("Something wrong!"), ) @@ -358,14 +360,14 @@ def test_both_primary_and_auxiliary_links_404( file_1 = ( "Test File #1", [ - "https://traffic.libsyn.com/secure/teacherluke/733._A_Summer_Ramble.mp3", + "https://traffic.libsyn.com/secure/teacherluke/733._A_Summer_Ramble.mp3", # noqa: E501,B950 "https://hotenov.com/d/lep/some_auxiliary_1.mp3", ], ) test_downloads.append(file_1) requests_mock.get( - "https://traffic.libsyn.com/secure/teacherluke/733._A_Summer_Ramble.mp3", + "https://traffic.libsyn.com/secure/teacherluke/733._A_Summer_Ramble.mp3", # noqa: E501,B950 text="Response not OK", status_code=404, ) From 3fade5562d470c7b174b270dda86b4c33093d446 Mon Sep 17 00:00:00 2001 From: Artem Hotenov Date: Sun, 21 Nov 2021 12:09:20 +0300 Subject: [PATCH 15/16] test(downloader): :recycle: Add fixtures to mock different extractions drom JSON db Update tests with new fixtures, refactor imports --- tests/conftest.py | 47 +++++++++++++++++++++++++++- tests/test_downloader.py | 67 ++++++++++++++++------------------------ 2 files changed, 72 insertions(+), 42 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index db4b951..40778b0 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -41,6 +41,9 @@ from requests_mock.response import _Context as rm_Context +DataForEpisodeAudio = List[Tuple[str, str, List[List[str]], bool]] +NamesWithAudios = List[Tuple[str, List[str]]] + # yapf: disable URL_HTML_MAPPING = { "https://teacherluke.co.uk/2009/04/12/episode-1-introduction/": @@ -269,7 +272,13 @@ def mocked_episodes( @pytest.fixture(scope="session") def lep_temp_path(tmp_path_factory: TempPathFactory) -> Iterator[Path]: - """Returns path to custom temp directory.""" + """Returns path to custom temp directory. + + This fixture is redundant, but it lets to clear + the base pytest temp directory at the end of session. + https://github.com/pytest-dev/pytest/issues/8141 + If disc space is not a problem - can be replaced with 'tmp_path' + """ temp_path = tmp_path_factory.mktemp("lep_tmp") yield temp_path # Cleanup all folders in fixture's base temp directory @@ -298,3 +307,39 @@ def mp3_file2_mock(mp3_mocks_path: Path) -> bytes: """Returns bytes of the second mocked mp3 file.""" mocked_file_2 = mp3_mocks_path / "test_lep_audio2.mp3" return mocked_file_2.read_bytes() + + +@pytest.fixture(scope="session") +def only_valid_episodes(json_db_mock: str) -> List[Any]: + """Returns list of valid LepEpisode objects from JSON mocked database.""" + from lep_downloader import data_getter + + mocked_db_episodes = data_getter.get_list_of_valid_episodes(json_db_mock) + return mocked_db_episodes + + +@pytest.fixture(scope="session") +def only_audio_episodes(only_valid_episodes: List[Any]) -> List[Any]: + """Returns only audio episodes from all.""" + from lep_downloader import downloader + + audio_episodes = downloader.select_all_audio_episodes(only_valid_episodes) + return audio_episodes + + +@pytest.fixture(scope="session") +def only_audio_data(only_audio_episodes: List[Any]) -> DataForEpisodeAudio: + """Returns only extracted audio data from audio episodes.""" + from lep_downloader import downloader + + audio_data = downloader.get_audios_data(only_audio_episodes) + return audio_data + + +@pytest.fixture(scope="session") +def only_audio_links(only_audio_data: DataForEpisodeAudio) -> NamesWithAudios: + """Returns only links and names for audio files.""" + from lep_downloader import downloader + + audio_links = downloader.bind_name_and_file_url(only_audio_data) + return audio_links diff --git a/tests/test_downloader.py b/tests/test_downloader.py index ce0185b..97ab88d 100644 --- a/tests/test_downloader.py +++ b/tests/test_downloader.py @@ -27,30 +27,22 @@ from pytest import CaptureFixture from requests_mock.mocker import Mocker as rm_Mocker -from lep_downloader import downloader as downloader -from lep_downloader.data_getter import get_list_of_valid_episodes +from lep_downloader import data_getter +from lep_downloader import downloader +from lep_downloader.lep import LepEpisode -# TODO: Duplicated code (move to conftest.py) -OFFLINE_HTML_DIR = Path( - Path(__file__).resolve().parent, - "fixtures", -) - -local_path = OFFLINE_HTML_DIR / "mocked-db-json-equal-786-objects.json" -MOCKED_JSON_DB = local_path.read_text(encoding="utf-8") -MOCKED_DB_EPISODES = get_list_of_valid_episodes(MOCKED_JSON_DB) - - -def test_selecting_only_audio_episodes() -> None: +def test_selecting_only_audio_episodes( + only_audio_episodes: List[LepEpisode], +) -> None: """It returns filtered list with only audio episodes.""" - audio_episodes = downloader.select_all_audio_episodes(MOCKED_DB_EPISODES) - assert len(audio_episodes) == 15 + assert len(only_audio_episodes) == 15 -def test_extracting_audio_data() -> None: +def test_extracting_audio_data( + only_audio_episodes: List[LepEpisode], +) -> None: """It returns list of tuples with audio data.""" - audio_episodes = downloader.select_all_audio_episodes(MOCKED_DB_EPISODES) expected_ep = ( "2009-10-19", "15. Extra Podcast – 12 Phrasal Verbs", # noqa: E501,B950 # dash as Unicode character here. @@ -61,70 +53,63 @@ def test_extracting_audio_data() -> None: ], False, ) - audio_data = downloader.get_audios_data(audio_episodes) + audio_data = downloader.get_audios_data(only_audio_episodes) assert audio_data[1] == expected_ep -def test_forming_multipart_download_links() -> None: +def test_forming_multipart_download_links( + only_audio_links: downloader.NamesWithAudios, +) -> None: """It returns list of URLs with titles for files.""" - audio_episodes = downloader.select_all_audio_episodes(MOCKED_DB_EPISODES) - audio_data = downloader.get_audios_data(audio_episodes) - audio_links = downloader.bind_name_and_file_url(audio_data) excepted_link = ( "[2017-03-11] # LEP on ZEP – My recent interview on Zdenek’s English Podcast [Part 02]", # noqa: E501,B950 [ "https://audioboom.com/posts/5621870-episode-167-luke-back-on-zep-part-2.mp3", # noqa: E501,B950 ], ) - assert audio_links[11] == excepted_link + assert only_audio_links[11] == excepted_link -def test_forming_numbered_download_link() -> None: +def test_forming_numbered_download_link( + only_audio_links: downloader.NamesWithAudios, +) -> None: """It returns list of URLs with titles for files.""" - audio_episodes = downloader.select_all_audio_episodes(MOCKED_DB_EPISODES) - audio_data = downloader.get_audios_data(audio_episodes) - audio_links = downloader.bind_name_and_file_url(audio_data) excepted_link = ( "[2021-02-03] # 703. Walaa from Syria – WISBOLEP Competition Winner", [ "https://traffic.libsyn.com/secure/teacherluke/703._Walaa_from_Syria_-_WISBOLEP_Competition_Winner_.mp3", # noqa: E501,B950 ], ) - assert audio_links[15] == excepted_link + assert only_audio_links[15] == excepted_link -def test_forming_safe_filename_for_downloading() -> None: +def test_forming_safe_filename_for_downloading( + only_audio_links: downloader.NamesWithAudios, +) -> None: """It replaces invalid path characters with '_'.""" - audio_episodes = downloader.select_all_audio_episodes(MOCKED_DB_EPISODES) - audio_data = downloader.get_audios_data(audio_episodes) - audio_links = downloader.bind_name_and_file_url(audio_data) excepted_link = ( "[2016-08-07] # 370. In Conversation with Rob Ager from Liverpool (PART 1_ Life in Liverpool _ Interest in Film Analysis)", # noqa: E501,B950 [ "http://traffic.libsyn.com/teacherluke/370-in-conversation-with-rob-ager-from-liverpool-part-1-life-in-liverpool-interest-in-film-analysis.mp3", # noqa: E501,B950 ], ) - assert audio_links[9] == excepted_link + assert only_audio_links[9] == excepted_link def test_separating_existing_and_non_existing_mp3( + only_audio_links: downloader.NamesWithAudios, tmp_path: Path, ) -> None: """It detects when file has already been downloaded.""" - audio_episodes = downloader.select_all_audio_episodes(MOCKED_DB_EPISODES) - audio_data = downloader.get_audios_data(audio_episodes) - audio_links = downloader.bind_name_and_file_url(audio_data) - filename_1 = "[2021-08-03] # 733. A Summer Ramble.mp3" filename_2 = "[2017-03-11] # LEP on ZEP – My recent interview on Zdenek’s English Podcast [Part 05].mp3" # noqa: E501,B950 Path(tmp_path / filename_1).write_text("Here are mp3 1 bytes") Path(tmp_path / filename_2).write_text("Here are mp3 2 bytes") existing, non_existing = downloader.detect_existing_files( - audio_links, + only_audio_links, tmp_path, ) - assert len(existing) == 2 assert len(non_existing) == 17 @@ -146,7 +131,7 @@ def test_retrieving_audios_as_none() -> None: } ] """ # noqa: E501,B950 - db_episodes = get_list_of_valid_episodes(json_test) + db_episodes = data_getter.get_list_of_valid_episodes(json_test) audio_data = downloader.get_audios_data(db_episodes) assert audio_data[0][2] == [] From 7bceb20ac8ed37e878ff14fa08dbfa9bdd6dd3e2 Mon Sep 17 00:00:00 2001 From: Artem Hotenov Date: Sun, 21 Nov 2021 12:58:54 +0300 Subject: [PATCH 16/16] test(parser): :white_check_mark: Add test to check skipping non-episode URL during parsing --- tests/test_parser.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tests/test_parser.py b/tests/test_parser.py index 24e6640..5b0bd9a 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -373,6 +373,36 @@ def test_parsing_non_episode_link( assert episode is None +def test_skipping_non_episode_link( + requests_mock: rm_Mocker, + single_page_matcher: Optional[Callable[[_RequestObjectProxy], bool]], + single_page_mock: str, + req_ses: requests.Session, +) -> None: + """It skips non-episode link.""" + test_urls = [ + "https://teacherluke.co.uk/2009/04/12/episode-1-introduction/", + "https://teacherluke.co.uk/premium/archive-comment-section/", # noqa: E501,B950 + ] + test_texts = [ + "Episode 1 Link", + "Non Episode Title Link", + ] + requests_mock.get( + req_mock.ANY, + additional_matcher=single_page_matcher, + text=single_page_mock, + ) + requests_mock.get( + "https://teacherluke.co.uk/premium/archive-comment-section/", + text="No need to parse this page", + status_code=200, + ) + episodes = parser.get_parsed_episodes(test_urls, req_ses, test_texts) + assert len(episodes) == 1 + assert episodes[0].post_title == "Episode 1 Link" + + def test_parsing_links_to_audio_for_mocked_episodes( mocked_episodes: List[lep.LepEpisode], ) -> None: