diff --git a/.github/ISSUE_TEMPLATE/1_bug.yml b/.github/ISSUE_TEMPLATE/1_bug.yml index fa7a4a33195..73d57204874 100644 --- a/.github/ISSUE_TEMPLATE/1_bug.yml +++ b/.github/ISSUE_TEMPLATE/1_bug.yml @@ -2,35 +2,46 @@ name: Bug report description: Report a bug labels: [ bug ] body: + - type: checkboxes + attributes: + label: Don't skip these steps + description: Carefully complete the following steps before submitting an issue + options: + - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field + required: true + - label: I have checked through the search that there are no similar issues that already exist + required: true + - label: I will not submit any issues that are not related to this project + required: true - type: checkboxes id: checklist attributes: - label: 发生环境 (Occurrence environment) + label: Occurrence environment description: | - 选择该Bug触发于什么环境 (Please select what environment is the Bug triggered in): + Please select what environment is the Bug triggered in: options: - - label: 工作流 (Workflow) + - label: Workflow required: false - label: GUI required: false - label: Docker required: false - - label: 命令行 (Command line) + - label: Command line required: false - type: textarea id: description attributes: - label: Bug描述 (Bug description) - description: 对于该Bug的具体描述,好的描述将有助于理解与解决 (As for the specific description of the Bug, a good description will help to understand and solve it) - placeholder: 提供任何附加信息,任何建议的解决方案,以及尽可能多的上下文和示例 (Provide any additional information, any suggested solutions, and as much context and examples as possible) + label: Bug description + description: As for the specific description of the Bug, a good description will help to understand and solve it + placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible validations: required: true - type: textarea id: log attributes: - label: 报错日志 (Error log) - description: 提供对于该Bug的详细报错日志 (Provides a detailed error log for the Bug) - placeholder: 请输入详细的报错日志 (Please enter a detailed error log) + label: Error log + description: Provides a detailed error log for the Bug + placeholder: Please enter a detailed error log render: shell validations: - required: false \ No newline at end of file + required: false diff --git a/.github/ISSUE_TEMPLATE/2_question.yml b/.github/ISSUE_TEMPLATE/2_question.yml index e69de29bb2d..361f9859ce6 100644 --- a/.github/ISSUE_TEMPLATE/2_question.yml +++ b/.github/ISSUE_TEMPLATE/2_question.yml @@ -0,0 +1,47 @@ +name: Ask Question +description: Ask iptv-api related question +labels: [ question ] +body: + - type: checkboxes + attributes: + label: Don't skip these steps + description: Carefully complete the following steps before submitting an issue + options: + - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field + required: true + - label: I have checked through the search that there are no similar issues that already exist + required: true + - label: I will not submit any issues that are not related to this project + required: true + - type: checkboxes + id: checklist + attributes: + label: Occurrence environment + description: | + Select the context in which the question is located: + options: + - label: Workflow + required: false + - label: GUI + required: false + - label: Docker + required: false + - label: Command line + required: false + - type: textarea + id: description + attributes: + label: Question description + description: As for the specific description of the Question, a good description will help to understand and solve it + placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible + validations: + required: true + - type: textarea + id: log + attributes: + label: Related log + description: Provides detailed log for the question + placeholder: Please enter a detailed log + render: shell + validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/3_enhancement.yml b/.github/ISSUE_TEMPLATE/3_enhancement.yml index e69de29bb2d..25851269391 100644 --- a/.github/ISSUE_TEMPLATE/3_enhancement.yml +++ b/.github/ISSUE_TEMPLATE/3_enhancement.yml @@ -0,0 +1,47 @@ +name: Enhancement request +description: Request a feature enhancement +labels: [ enhancement ] +body: + - type: checkboxes + attributes: + label: Don't skip these steps + description: Carefully complete the following steps before submitting an issue + options: + - label: I understand that I will be **blocked** if I *intentionally* remove or skip any mandatory\* field + required: true + - label: I have checked through the search that there are no similar issues that already exist + required: true + - label: I will not submit any issues that are not related to this project + required: true + - type: checkboxes + id: checklist + attributes: + label: Application environment + description: | + Select the context in which the enhancement is located: + options: + - label: Workflow + required: false + - label: GUI + required: false + - label: Docker + required: false + - label: Command line + required: false + - type: textarea + id: description + attributes: + label: Enhancement description + description: As for the specific description of the Enhancement, a good description will help to understand and solve it + placeholder: Provide any additional information, any suggested solutions, and as much context and examples as possible + validations: + required: true + - type: textarea + id: log + attributes: + label: Related log + description: Provides detailed log for the enhancement + placeholder: Please enter a detailed log + render: shell + validations: + required: false diff --git a/.github/ISSUE_TEMPLATE/4_documentation.yml b/.github/ISSUE_TEMPLATE/4_documentation.yml deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/config/cache.pkl b/config/cache.pkl new file mode 100644 index 00000000000..6838378182f Binary files /dev/null and b/config/cache.pkl differ diff --git a/config/config.ini b/config/config.ini index 250421dda89..4a48df8205b 100644 --- a/config/config.ini +++ b/config/config.ini @@ -2,6 +2,8 @@ open_service = True open_update = True open_use_old_result = True +open_use_cache = True +open_request = False source_file = config/demo.txt final_file = output/result.txt open_online_search = False @@ -24,16 +26,16 @@ ipv6_num = 5 open_m3u_result = True url_keywords_blacklist = epg.pw,skype.serv00.net,iptv.yjxfz.com,live-hls-web-ajb.getaj.net,live.goodiptv.club,hc73k3dhwo5gfkt.wcetv.com,stream1.freetv.fun,zw9999.cnstream.top,zsntlqj.xicp.net open_subscribe = True -subscribe_urls = https://live.zbds.top/tv/iptv6.txt,https://live.zbds.top/tv/iptv4.txt,https://live.fanmingming.com/tv/m3u/ipv6.m3u,https://ghp.ci/https://raw.githubusercontent.com/joevess/IPTV/main/home.m3u8,https://aktv.top/live.txt,http://175.178.251.183:6689/live.txt,https://ghp.ci/https://raw.githubusercontent.com/kimwang1978/collect-tv-txt/main/merged_output.txt,https://m3u.ibert.me/txt/fmml_dv6.txt,https://m3u.ibert.me/txt/o_cn.txt,https://m3u.ibert.me/txt/j_iptv.txt,https://ghp.ci/https://raw.githubusercontent.com/xzw832/cmys/main/S_CCTV.txt,https://ghp.ci/https://raw.githubusercontent.com/xzw832/cmys/main/S_weishi.txt,http://itv.22m.top/ITVBox/tv/tvonline.txt,https://ghp.ci//https://raw.githubusercontent.com/asdjkl6/tv/tv/.m3u/整套直播源/测试/整套直播源/l.txt,https://ghp.ci//https://raw.githubusercontent.com/asdjkl6/tv/tv/.m3u/整套直播源/测试/整套直播源/kk.txt +subscribe_urls = https://iptv.b2og.com/txt/fmml_ipv6.txt,https://ghp.ci/raw.githubusercontent.com/suxuang/myIPTV/main/ipv6.m3u,https://live.zbds.top/tv/iptv6.txt,https://live.zbds.top/tv/iptv4.txt,https://live.fanmingming.com/tv/m3u/ipv6.m3u,https://ghp.ci/https://raw.githubusercontent.com/joevess/IPTV/main/home.m3u8,https://aktv.top/live.txt,http://175.178.251.183:6689/live.txt,https://ghp.ci/https://raw.githubusercontent.com/kimwang1978/collect-tv-txt/main/merged_output.txt,https://m3u.ibert.me/txt/fmml_dv6.txt,https://m3u.ibert.me/txt/o_cn.txt,https://m3u.ibert.me/txt/j_iptv.txt,https://ghp.ci/https://raw.githubusercontent.com/xzw832/cmys/main/S_CCTV.txt,https://ghp.ci/https://raw.githubusercontent.com/xzw832/cmys/main/S_weishi.txt,http://itv.22m.top/ITVBox/tv/tvonline.txt,https://ghp.ci//https://raw.githubusercontent.com/asdjkl6/tv/tv/.m3u/整套直播源/测试/整套直播源/l.txt,https://ghp.ci//https://raw.githubusercontent.com/asdjkl6/tv/tv/.m3u/整套直播源/测试/整套直播源/kk.txt open_multicast = True -open_multicast_foodie = False +open_multicast_foodie = True open_multicast_fofa = True multicast_region_list = 全部 multicast_page_num = 1 open_proxy = False open_driver = False open_hotel = True -open_hotel_foodie = False +open_hotel_foodie = True open_hotel_fofa = True hotel_region_list = 全部 hotel_page_num = 1 diff --git a/docs/config.md b/docs/config.md index b8564016139..102088f806f 100644 --- a/docs/config.md +++ b/docs/config.md @@ -3,6 +3,8 @@ | open_service | True | 开启页面服务,用于控制是否启动结果页面服务;如果使用青龙等平台部署,有专门设定的定时任务,需要更新完成后停止运行,可以关闭该功能 | | open_update | True | 开启更新,用于控制是否更新接口,若关闭则所有工作模式(获取接口和测速)均停止 | | open_use_old_result | True | 开启使用历史更新结果(包含模板与结果文件的接口),合并至本次更新中 | +| open_use_cache | True | 开启使用本地缓存数据,适用于查询请求失败场景 | +| open_request | False | 开启查询请求,数据来源于网络 | | open_driver | True | 开启浏览器运行,若更新无数据可开启此模式,较消耗性能 | | open_proxy | False | 开启代理,自动获取免费可用代理,若更新无数据可开启此模式 | | source_file | config/demo.txt | 模板文件路径 | diff --git a/docs/config_en.md b/docs/config_en.md index 5b6fa8d429c..5bc60980008 100644 --- a/docs/config_en.md +++ b/docs/config_en.md @@ -3,6 +3,8 @@ | open_service | True | Enable page service, used to control whether to start the result page service; if deployed on platforms like Qinglong with dedicated scheduled tasks, the function can be turned off after updates are completed and the task is stopped | | open_update | True | Enable updates, if disabled then only the result page service is run | | open_use_old_result | True | Enable the use of historical update results (including the interface for template and result files) and merge them into the current update | +| open_use_cache | True | Enable the use of local cache data, applicable to the query request failure scenario | +| open_request | False | Enable query request, the data is obtained from the network | | open_driver | True | Enable browser execution, If there are no updates, this mode can be enabled, which consumes more performance | | open_proxy | False | Enable proxy, automatically obtains free available proxies, If there are no updates, this mode can be enabled | | source_file | config/demo.txt | Template file path | diff --git a/updates/fofa/request.py b/updates/fofa/request.py index 0d3b9f15f61..28978789646 100644 --- a/updates/fofa/request.py +++ b/updates/fofa/request.py @@ -71,115 +71,116 @@ async def get_channels_by_fofa(urls=None, multicast=False, callback=None): """ Get the channel by FOFA """ - fofa_urls = urls if urls else get_fofa_urls_from_region_list() - fofa_urls_len = len(fofa_urls) - pbar = tqdm_asyncio( - total=fofa_urls_len, - desc=f"Processing fofa for {'multicast' if multicast else 'hotel'}", - ) - start_time = time() fofa_results = {} - mode_name = "组播" if multicast else "酒店" - if callback: - callback( - f"正在获取Fofa{mode_name}源, 共{fofa_urls_len}个查询地址", - 0, + if config.open_use_cache: + fofa_results = get_fofa_region_result_tmp(multicast=multicast) + if config.open_request: + fofa_urls = urls if urls else get_fofa_urls_from_region_list() + fofa_urls_len = len(fofa_urls) + pbar = tqdm_asyncio( + total=fofa_urls_len, + desc=f"Processing fofa for {'multicast' if multicast else 'hotel'}", ) - proxy = None - open_proxy = config.open_proxy - open_driver = config.open_driver - if open_driver: - from driver.setup import setup_driver - open_sort = config.open_sort - if open_proxy: - test_url = fofa_urls[0][0] - proxy = await get_proxy(test_url, best=True, with_test=True) - cancel_event = threading.Event() - hotel_name = constants.origin_map["hotel"] + start_time = time() + mode_name = "组播" if multicast else "酒店" + if callback: + callback( + f"正在获取Fofa{mode_name}源, 共{fofa_urls_len}个查询地址", + 0, + ) + proxy = None + open_proxy = config.open_proxy + open_driver = config.open_driver + if open_driver: + from driver.setup import setup_driver + open_sort = config.open_sort + if open_proxy: + test_url = fofa_urls[0][0] + proxy = await get_proxy(test_url, best=True, with_test=True) + cancel_event = threading.Event() + hotel_name = constants.origin_map["hotel"] - def process_fofa_channels(fofa_info): - nonlocal proxy - if cancel_event.is_set(): - return {} - fofa_url = fofa_info[0] - results = defaultdict(lambda: defaultdict(list)) - driver = None - try: - if open_driver: - driver = setup_driver(proxy) - try: - retry_func(lambda: driver.get(fofa_url), name=fofa_url) - except Exception as e: - if open_proxy: - proxy = get_proxy_next() + def process_fofa_channels(fofa_info): + nonlocal proxy + if cancel_event.is_set(): + return {} + fofa_url = fofa_info[0] + results = defaultdict(lambda: defaultdict(list)) + driver = None + try: + if open_driver: + driver = setup_driver(proxy) + try: + retry_func(lambda: driver.get(fofa_url), name=fofa_url) + except Exception as e: + if open_proxy: + proxy = get_proxy_next() + driver.close() + driver.quit() + driver = setup_driver(proxy) + driver.get(fofa_url) + page_source = driver.page_source + else: + page_source = retry_func( + lambda: get_source_requests(fofa_url), name=fofa_url + ) + if any(keyword in page_source for keyword in ["访问异常", "禁止访问", "资源访问每天限制"]): + cancel_event.set() + raise ValueError("Limited access to fofa page") + fofa_source = re.sub(r"", "", page_source, flags=re.DOTALL) + urls = set(re.findall(r"https?://[\w\.-]+:\d+", fofa_source)) + if multicast: + region = fofa_info[1] + type = fofa_info[2] + multicast_result = [(url, None, None) for url in urls] + results[region][type] = multicast_result + else: + with ThreadPoolExecutor(max_workers=100) as executor: + futures = [ + executor.submit( + process_fofa_json_url, + url, + fofa_info[1], + open_sort, + hotel_name, + ) + for url in urls + ] + for future in futures: + results = merge_objects(results, future.result()) + return results + except ValueError as e: + raise e + except Exception as e: + print(e) + finally: + if driver: driver.close() driver.quit() - driver = setup_driver(proxy) - driver.get(fofa_url) - page_source = driver.page_source - else: - page_source = retry_func( - lambda: get_source_requests(fofa_url), name=fofa_url - ) - if any(keyword in page_source for keyword in ["访问异常", "禁止访问", "资源访问每天限制"]): - cancel_event.set() - raise ValueError("Limited access to fofa page") - fofa_source = re.sub(r"", "", page_source, flags=re.DOTALL) - urls = set(re.findall(r"https?://[\w\.-]+:\d+", fofa_source)) - if multicast: - region = fofa_info[1] - type = fofa_info[2] - multicast_result = [(url, None, None) for url in urls] - results[region][type] = multicast_result - else: - with ThreadPoolExecutor(max_workers=100) as executor: - futures = [ - executor.submit( - process_fofa_json_url, - url, - fofa_info[1], - open_sort, - hotel_name, - ) - for url in urls - ] - for future in futures: - results = merge_objects(results, future.result()) - return results - except ValueError as e: - raise e - except Exception as e: - print(e) - finally: - if driver: - driver.close() - driver.quit() - pbar.update() - remain = fofa_urls_len - pbar.n - if callback: - callback( - f"正在获取Fofa{mode_name}源, 剩余{remain}个查询地址待获取, 预计剩余时间: {get_pbar_remaining(n=pbar.n, total=pbar.total, start_time=start_time)}", - int((pbar.n / fofa_urls_len) * 100), - ) + pbar.update() + remain = fofa_urls_len - pbar.n + if callback: + callback( + f"正在获取Fofa{mode_name}源, 剩余{remain}个查询地址待获取, 预计剩余时间: {get_pbar_remaining(n=pbar.n, total=pbar.total, start_time=start_time)}", + int((pbar.n / fofa_urls_len) * 100), + ) - max_workers = 3 if open_driver else 10 - with ThreadPoolExecutor(max_workers=max_workers) as executor: - futures = [ - executor.submit(process_fofa_channels, fofa_url) for fofa_url in fofa_urls - ] - try: - for future in as_completed(futures): - result = future.result() - if result: - fofa_results = merge_objects(fofa_results, result) - except ValueError as e: - if "Limited access to fofa page" in str(e): - for future in futures: - future.cancel() - if fofa_results: - update_fofa_region_result_tmp(fofa_results, multicast=multicast) - else: - fofa_results = get_fofa_region_result_tmp(multicast=multicast) + max_workers = 3 if open_driver else 10 + with ThreadPoolExecutor(max_workers=max_workers) as executor: + futures = [ + executor.submit(process_fofa_channels, fofa_url) for fofa_url in fofa_urls + ] + try: + for future in as_completed(futures): + result = future.result() + if result: + fofa_results = merge_objects(fofa_results, result) + except ValueError as e: + if "Limited access to fofa page" in str(e): + for future in futures: + future.cancel() + if fofa_results: + update_fofa_region_result_tmp(fofa_results, multicast=multicast) pbar.n = fofa_urls_len pbar.update(0) if callback: @@ -187,9 +188,9 @@ def process_fofa_channels(fofa_info): f"正在获取Fofa{mode_name}源", 100, ) - if not open_driver: - close_session() - pbar.close() + if not open_driver: + close_session() + pbar.close() return fofa_results diff --git a/updates/hotel/request.py b/updates/hotel/request.py index 1f8e21a230b..7dbba6c7514 100644 --- a/updates/hotel/request.py +++ b/updates/hotel/request.py @@ -1,26 +1,29 @@ -from utils.config import config +import pickle +import urllib.parse as urlparse +from collections import defaultdict +from concurrent.futures import ThreadPoolExecutor, as_completed +from time import time +from urllib.parse import parse_qs + +from tqdm.asyncio import tqdm_asyncio + +import updates.fofa.fofa_map as fofa_map import utils.constants as constants +from driver.setup import setup_driver +from driver.utils import search_submit +from requests_custom.utils import get_soup_requests, close_session +from updates.proxy import get_proxy, get_proxy_next +from updates.subscribe import get_channels_by_subscribe_urls from utils.channel import ( get_results_from_multicast_soup, get_results_from_multicast_soup_requests, ) -from utils.tools import get_pbar_remaining, get_soup -from updates.proxy import get_proxy, get_proxy_next -from time import time -from driver.setup import setup_driver -from driver.utils import search_submit +from utils.config import config from utils.retry import ( retry_func, find_clickable_element_with_retry, ) -from tqdm.asyncio import tqdm_asyncio -from concurrent.futures import ThreadPoolExecutor, as_completed -from requests_custom.utils import get_soup_requests, close_session -import urllib.parse as urlparse -from urllib.parse import parse_qs -from updates.subscribe import get_channels_by_subscribe_urls -from collections import defaultdict -import updates.fofa.fofa_map as fofa_map +from utils.tools import get_pbar_remaining, get_soup, merge_objects if config.open_driver: try: @@ -34,157 +37,169 @@ async def get_channels_by_hotel(callback=None): Get the channels by multicase """ channels = {} - pageUrl = constants.foodie_hotel_url - proxy = None - open_proxy = config.open_proxy - open_driver = config.open_driver - page_num = config.hotel_page_num - region_list = config.hotel_region_list - if "all" in region_list or "ALL" in region_list or "全部" in region_list: - region_list = list(getattr(fofa_map, "region_url").keys()) - if open_proxy: - proxy = await get_proxy(pageUrl, best=True, with_test=True) - start_time = time() - - def process_region_by_hotel(region): - nonlocal proxy - name = f"{region}" - info_list = [] - driver = None + if config.open_use_cache: try: - if open_driver: - driver = setup_driver(proxy) - try: - retry_func( - lambda: driver.get(pageUrl), - name=f"Foodie hotel search:{name}", - ) - except Exception as e: - if open_proxy: - proxy = get_proxy_next() - driver.close() - driver.quit() + with open( + constants.config_cache_path, + "rb", + ) as file: + cache = pickle.load(file) or {} + channels = cache.get("hotel", {}) + except: + pass + if config.open_request: + pageUrl = constants.foodie_hotel_url + proxy = None + open_proxy = config.open_proxy + open_driver = config.open_driver + page_num = config.hotel_page_num + region_list = config.hotel_region_list + if "all" in region_list or "ALL" in region_list or "全部" in region_list: + region_list = list(getattr(fofa_map, "region_url").keys()) + if open_proxy: + proxy = await get_proxy(pageUrl, best=True, with_test=True) + start_time = time() + + def process_region_by_hotel(region): + nonlocal proxy + name = f"{region}" + info_list = [] + driver = None + try: + if open_driver: driver = setup_driver(proxy) - driver.get(pageUrl) - search_submit(driver, name) - else: - page_soup = None - post_form = {"saerch": name} - code = None - try: - page_soup = retry_func( - lambda: get_soup_requests(pageUrl, data=post_form, proxy=proxy), - name=f"Foodie hotel search:{name}", - ) - except Exception as e: - if open_proxy: - proxy = get_proxy_next() - page_soup = get_soup_requests(pageUrl, data=post_form, proxy=proxy) - if not page_soup: - print(f"{name}:Request fail.") - return info_list + try: + retry_func( + lambda: driver.get(pageUrl), + name=f"Foodie hotel search:{name}", + ) + except Exception as e: + if open_proxy: + proxy = get_proxy_next() + driver.close() + driver.quit() + driver = setup_driver(proxy) + driver.get(pageUrl) + search_submit(driver, name) else: - a_tags = page_soup.find_all("a", href=True) - for a_tag in a_tags: - href_value = a_tag["href"] - parsed_url = urlparse.urlparse(href_value) - code = parse_qs(parsed_url.query).get("code", [None])[0] - if code: - break - # retry_limit = 3 - for page in range(1, page_num + 1): - # retries = 0 - # if not open_driver and page == 1: - # retries = 2 - # while retries < retry_limit: - try: - if page > 1: - if open_driver: - page_link = find_clickable_element_with_retry( - driver, - ( - By.XPATH, - f'//a[contains(@href, "={page}") and contains(@href, "{name}")]', - ), - ) - if not page_link: - break - driver.execute_script("arguments[0].click();", page_link) - else: - request_url = ( - f"{pageUrl}?net={name}&page={page}&code={code}" - ) - page_soup = retry_func( - lambda: get_soup_requests(request_url, proxy=proxy), - name=f"hotel search:{name}, page:{page}", - ) - soup = get_soup(driver.page_source) if open_driver else page_soup - if soup: - if "About 0 results" in soup.text: - break - results = ( - get_results_from_multicast_soup(soup, hotel=True) - if open_driver - else get_results_from_multicast_soup_requests( - soup, hotel=True - ) + page_soup = None + post_form = {"saerch": name} + code = None + try: + page_soup = retry_func( + lambda: get_soup_requests(pageUrl, data=post_form, proxy=proxy), + name=f"Foodie hotel search:{name}", ) - print(name, "page:", page, "results num:", len(results)) - if len(results) == 0: - print(f"{name}:No results found") - info_list = info_list + results + except Exception as e: + if open_proxy: + proxy = get_proxy_next() + page_soup = get_soup_requests(pageUrl, data=post_form, proxy=proxy) + if not page_soup: + print(f"{name}:Request fail.") + return info_list else: - print(f"{name}:No page soup found") - if page != page_num and open_driver: - driver.refresh() - except Exception as e: - print(f"{name}:Error on page {page}: {e}") - continue - except Exception as e: - print(f"{name}:Error on search: {e}") - pass - finally: - if driver: - driver.close() - driver.quit() - pbar.update() - if callback: - callback( - f"正在获取Foodie酒店源, 剩余{region_list_len - pbar.n}个地区待查询, 预计剩余时间: {get_pbar_remaining(n=pbar.n, total=pbar.total, start_time=start_time)}", - int((pbar.n / region_list_len) * 100), - ) - return info_list + a_tags = page_soup.find_all("a", href=True) + for a_tag in a_tags: + href_value = a_tag["href"] + parsed_url = urlparse.urlparse(href_value) + code = parse_qs(parsed_url.query).get("code", [None])[0] + if code: + break + # retry_limit = 3 + for page in range(1, page_num + 1): + # retries = 0 + # if not open_driver and page == 1: + # retries = 2 + # while retries < retry_limit: + try: + if page > 1: + if open_driver: + page_link = find_clickable_element_with_retry( + driver, + ( + By.XPATH, + f'//a[contains(@href, "={page}") and contains(@href, "{name}")]', + ), + ) + if not page_link: + break + driver.execute_script("arguments[0].click();", page_link) + else: + request_url = ( + f"{pageUrl}?net={name}&page={page}&code={code}" + ) + page_soup = retry_func( + lambda: get_soup_requests(request_url, proxy=proxy), + name=f"hotel search:{name}, page:{page}", + ) + soup = get_soup(driver.page_source) if open_driver else page_soup + if soup: + if "About 0 results" in soup.text: + break + results = ( + get_results_from_multicast_soup(soup, hotel=True) + if open_driver + else get_results_from_multicast_soup_requests( + soup, hotel=True + ) + ) + print(name, "page:", page, "results num:", len(results)) + if len(results) == 0: + print(f"{name}:No results found") + info_list = info_list + results + else: + print(f"{name}:No page soup found") + if page != page_num and open_driver: + driver.refresh() + except Exception as e: + print(f"{name}:Error on page {page}: {e}") + continue + except Exception as e: + print(f"{name}:Error on search: {e}") + pass + finally: + if driver: + driver.close() + driver.quit() + pbar.update() + if callback: + callback( + f"正在获取Foodie酒店源, 剩余{region_list_len - pbar.n}个地区待查询, 预计剩余时间: {get_pbar_remaining(n=pbar.n, total=pbar.total, start_time=start_time)}", + int((pbar.n / region_list_len) * 100), + ) + return info_list - region_list_len = len(region_list) - pbar = tqdm_asyncio(total=region_list_len, desc="Foodie hotel search") - if callback: - callback(f"正在获取Foodie酒店源, 共{region_list_len}个地区", 0) - search_region_result = defaultdict(list) - with ThreadPoolExecutor(max_workers=3) as executor: - futures = { - executor.submit(process_region_by_hotel, region): region - for region in region_list - } + region_list_len = len(region_list) + pbar = tqdm_asyncio(total=region_list_len, desc="Foodie hotel search") + if callback: + callback(f"正在获取Foodie酒店源, 共{region_list_len}个地区", 0) + search_region_result = defaultdict(list) + with ThreadPoolExecutor(max_workers=3) as executor: + futures = { + executor.submit(process_region_by_hotel, region): region + for region in region_list + } - for future in as_completed(futures): - region = futures[future] - result = future.result() + for future in as_completed(futures): + region = futures[future] + result = future.result() - if result: - for item in result: - url = item.get("url") - date = item.get("date") - if url: - search_region_result[region].append((url, date, None)) - urls = [ - {region: region, url: f"http://{url}/ZHGXTV/Public/json/live_interface.txt"} - for region, result in search_region_result.items() - for url, _, _ in result - ] - channels = await get_channels_by_subscribe_urls( - urls, hotel=True, retry=False, error_print=False - ) - if not open_driver: - close_session() - pbar.close() + if result: + for item in result: + url = item.get("url") + date = item.get("date") + if url: + search_region_result[region].append((url, date, None)) + urls = [ + {"region": region, "url": f"http://{url}/ZHGXTV/Public/json/live_interface.txt"} + for region, result in search_region_result.items() + for url, _, _ in result + ] + request_channels = await get_channels_by_subscribe_urls( + urls, hotel=True, retry=False, error_print=False + ) + channels = merge_objects(channels, request_channels) + if not open_driver: + close_session() + pbar.close() return channels diff --git a/updates/multicast/request.py b/updates/multicast/request.py index 1706e2da2df..aad5644b1f5 100644 --- a/updates/multicast/request.py +++ b/updates/multicast/request.py @@ -1,5 +1,18 @@ -from utils.config import config +import pickle +import urllib.parse as urlparse +from collections import defaultdict +from concurrent.futures import ThreadPoolExecutor, as_completed +from time import time +from urllib.parse import parse_qs + +from tqdm.asyncio import tqdm_asyncio + import utils.constants as constants +from driver.setup import setup_driver +from driver.utils import search_submit +from requests_custom.utils import get_soup_requests, close_session +from updates.fofa import get_channels_by_fofa +from updates.proxy import get_proxy, get_proxy_next from utils.channel import ( get_results_from_multicast_soup, get_results_from_multicast_soup_requests, @@ -8,22 +21,12 @@ get_channel_multicast_result, get_multicast_fofa_search_urls, ) -from utils.tools import get_pbar_remaining, get_soup, merge_objects -from updates.proxy import get_proxy, get_proxy_next -from updates.fofa import get_channels_by_fofa -from time import time -from driver.setup import setup_driver -from driver.utils import search_submit +from utils.config import config from utils.retry import ( retry_func, find_clickable_element_with_retry, ) -from tqdm.asyncio import tqdm_asyncio -from concurrent.futures import ThreadPoolExecutor, as_completed -from requests_custom.utils import get_soup_requests, close_session -import urllib.parse as urlparse -from urllib.parse import parse_qs -from collections import defaultdict +from utils.tools import get_pbar_remaining, get_soup, merge_objects from .update_tmp import get_multicast_region_result_by_rtp_txt if config.open_driver: @@ -35,165 +38,178 @@ async def get_channels_by_multicast(names, callback=None): """ - Get the channels by multicase + Get the channels by multicast """ channels = {} - pageUrl = constants.foodie_hotel_url - proxy = None - open_proxy = config.open_proxy - open_driver = config.open_driver - page_num = config.multicast_page_num - if open_proxy: - proxy = await get_proxy(pageUrl, best=True, with_test=True) - multicast_region_result = get_multicast_region_result_by_rtp_txt(callback=callback) - name_region_type_result = get_channel_multicast_name_region_type_result( - multicast_region_result, names - ) - region_type_list = get_channel_multicast_region_type_list(name_region_type_result) - search_region_type_result = defaultdict(lambda: defaultdict(list)) - if config.open_multicast_fofa: - fofa_search_urls = get_multicast_fofa_search_urls() - fofa_result = await get_channels_by_fofa( - fofa_search_urls, multicast=True, callback=callback + if config.open_use_cache: + try: + with open( + constants.config_cache_path, + "rb", + ) as file: + cache = pickle.load(file) or {} + multicast_data = cache.get("multicast", {}) + channels = {key: value for key, value in multicast_data.items() if key in names} + except: + pass + if config.open_request: + pageUrl = constants.foodie_hotel_url + proxy = None + open_proxy = config.open_proxy + open_driver = config.open_driver + page_num = config.multicast_page_num + if open_proxy: + proxy = await get_proxy(pageUrl, best=True, with_test=True) + multicast_region_result = get_multicast_region_result_by_rtp_txt(callback=callback) + name_region_type_result = get_channel_multicast_name_region_type_result( + multicast_region_result, names ) - merge_objects(search_region_type_result, fofa_result) + region_type_list = get_channel_multicast_region_type_list(name_region_type_result) + search_region_type_result = defaultdict(lambda: defaultdict(list)) + if config.open_multicast_fofa: + fofa_search_urls = get_multicast_fofa_search_urls() + fofa_result = await get_channels_by_fofa( + fofa_search_urls, multicast=True, callback=callback + ) + search_region_type_result = merge_objects(search_region_type_result, fofa_result) - def process_channel_by_multicast(region, type): - nonlocal proxy - name = f"{region}{type}" - info_list = [] - driver = None - try: - if open_driver: - driver = setup_driver(proxy) - try: - retry_func( - lambda: driver.get(pageUrl), name=f"multicast search:{name}" - ) - except Exception as e: - if open_proxy: - proxy = get_proxy_next() - driver.close() - driver.quit() + def process_channel_by_multicast(region, type): + nonlocal proxy + name = f"{region}{type}" + info_list = [] + driver = None + try: + if open_driver: driver = setup_driver(proxy) - driver.get(pageUrl) - search_submit(driver, name) - else: - page_soup = None - post_form = {"saerch": name} - code = None - try: - page_soup = retry_func( - lambda: get_soup_requests(pageUrl, data=post_form, proxy=proxy), - name=f"multicast search:{name}", - ) - except Exception as e: - if open_proxy: - proxy = get_proxy_next() - page_soup = get_soup_requests(pageUrl, data=post_form, proxy=proxy) - if not page_soup: - print(f"{name}:Request fail.") - return {"region": region, "type": type, "data": info_list} + try: + retry_func( + lambda: driver.get(pageUrl), name=f"multicast search:{name}" + ) + except Exception as e: + if open_proxy: + proxy = get_proxy_next() + driver.close() + driver.quit() + driver = setup_driver(proxy) + driver.get(pageUrl) + search_submit(driver, name) else: - a_tags = page_soup.find_all("a", href=True) - for a_tag in a_tags: - href_value = a_tag["href"] - parsed_url = urlparse.urlparse(href_value) - code = parse_qs(parsed_url.query).get("code", [None])[0] - if code: - break - for page in range(1, page_num + 1): - try: - if page > 1: - if open_driver: - page_link = find_clickable_element_with_retry( - driver, - ( - By.XPATH, - f'//a[contains(@href, "={page}") and contains(@href, "{name}")]', - ), - ) - if not page_link: - break - driver.execute_script("arguments[0].click();", page_link) - else: - request_url = ( - f"{pageUrl}?net={name}&page={page}&code={code}" - ) - page_soup = retry_func( - lambda: get_soup_requests(request_url, proxy=proxy), - name=f"multicast search:{name}, page:{page}", - ) - soup = get_soup(driver.page_source) if open_driver else page_soup - if soup: - if "About 0 results" in soup.text: - break - results = ( - get_results_from_multicast_soup(soup) - if open_driver - else get_results_from_multicast_soup_requests(soup) + page_soup = None + post_form = {"saerch": name} + code = None + try: + page_soup = retry_func( + lambda: get_soup_requests(pageUrl, data=post_form, proxy=proxy), + name=f"multicast search:{name}", ) - print(name, "page:", page, "results num:", len(results)) - if len(results) == 0: - print(f"{name}:No results found") - info_list = info_list + results + except Exception as e: + if open_proxy: + proxy = get_proxy_next() + page_soup = get_soup_requests(pageUrl, data=post_form, proxy=proxy) + if not page_soup: + print(f"{name}:Request fail.") + return {"region": region, "type": type, "data": info_list} else: - print(f"{name}:No page soup found") - if page != page_num and open_driver: - driver.refresh() - except Exception as e: - print(f"{name}:Error on page {page}: {e}") - continue - except Exception as e: - print(f"{name}:Error on search: {e}") - pass - finally: - if driver: - driver.close() - driver.quit() - pbar.update() + a_tags = page_soup.find_all("a", href=True) + for a_tag in a_tags: + href_value = a_tag["href"] + parsed_url = urlparse.urlparse(href_value) + code = parse_qs(parsed_url.query).get("code", [None])[0] + if code: + break + for page in range(1, page_num + 1): + try: + if page > 1: + if open_driver: + page_link = find_clickable_element_with_retry( + driver, + ( + By.XPATH, + f'//a[contains(@href, "={page}") and contains(@href, "{name}")]', + ), + ) + if not page_link: + break + driver.execute_script("arguments[0].click();", page_link) + else: + request_url = ( + f"{pageUrl}?net={name}&page={page}&code={code}" + ) + page_soup = retry_func( + lambda: get_soup_requests(request_url, proxy=proxy), + name=f"multicast search:{name}, page:{page}", + ) + soup = get_soup(driver.page_source) if open_driver else page_soup + if soup: + if "About 0 results" in soup.text: + break + results = ( + get_results_from_multicast_soup(soup) + if open_driver + else get_results_from_multicast_soup_requests(soup) + ) + print(name, "page:", page, "results num:", len(results)) + if len(results) == 0: + print(f"{name}:No results found") + info_list = info_list + results + else: + print(f"{name}:No page soup found") + if page != page_num and open_driver: + driver.refresh() + except Exception as e: + print(f"{name}:Error on page {page}: {e}") + continue + except Exception as e: + print(f"{name}:Error on search: {e}") + pass + finally: + if driver: + driver.close() + driver.quit() + pbar.update() + if callback: + callback( + f"正在进行Foodie组播更新, 剩余{region_type_list_len - pbar.n}个地区待查询, 预计剩余时间: {get_pbar_remaining(n=pbar.n, total=pbar.total, start_time=start_time)}", + int((pbar.n / region_type_list_len) * 100), + ) + return {"region": region, "type": type, "data": info_list} + + if config.open_multicast_foodie: + region_type_list_len = len(region_type_list) + pbar = tqdm_asyncio(total=region_type_list_len, desc="Multicast search") if callback: callback( - f"正在进行Foodie组播更新, 剩余{region_type_list_len - pbar.n}个地区待查询, 预计剩余时间: {get_pbar_remaining(n=pbar.n, total=pbar.total, start_time=start_time)}", - int((pbar.n / region_type_list_len) * 100), - ) - return {"region": region, "type": type, "data": info_list} - - if config.open_multicast_foodie: - region_type_list_len = len(region_type_list) - pbar = tqdm_asyncio(total=region_type_list_len, desc="Multicast search") - if callback: - callback( - f"正在进行Foodie组播更新, {len(names)}个频道, 共{region_type_list_len}个地区", - 0, - ) - start_time = time() - with ThreadPoolExecutor(max_workers=3) as executor: - futures = { - executor.submit(process_channel_by_multicast, region, type): ( - region, - type, + f"正在进行Foodie组播更新, {len(names)}个频道, 共{region_type_list_len}个地区", + 0, ) - for region, type in region_type_list - } + start_time = time() + with ThreadPoolExecutor(max_workers=3) as executor: + futures = { + executor.submit(process_channel_by_multicast, region, type): ( + region, + type, + ) + for region, type in region_type_list + } - for future in as_completed(futures): - region, type = futures[future] - result = future.result() - data = result.get("data") + for future in as_completed(futures): + region, type = futures[future] + result = future.result() + data = result.get("data") - if data: - for item in data: - url = item.get("url") - date = item.get("date") - if url: - search_region_type_result[region][type].append( - (url, date, None) - ) - pbar.close() - channels = get_channel_multicast_result( - name_region_type_result, search_region_type_result - ) - if not open_driver: - close_session() + if data: + for item in data: + url = item.get("url") + date = item.get("date") + if url: + search_region_type_result[region][type].append( + (url, date, None) + ) + pbar.close() + request_channels = get_channel_multicast_result( + name_region_type_result, search_region_type_result + ) + channels = merge_objects(channels, request_channels) + if not open_driver: + close_session() return channels diff --git a/utils/config.py b/utils/config.py index da70ff7fe23..66c03ffc552 100644 --- a/utils/config.py +++ b/utils/config.py @@ -50,6 +50,14 @@ def open_service(self): def open_update(self): return self.config.getboolean("Settings", "open_update", fallback=True) + @property + def open_use_cache(self): + return self.config.getboolean("Settings", "open_use_cache", fallback=True) + + @property + def open_request(self): + return self.config.getboolean("Settings", "open_request", fallback=False) + @property def open_filter_resolution(self): return self.config.getboolean( diff --git a/utils/constants.py b/utils/constants.py index 5de5691820d..63ab039cf83 100644 --- a/utils/constants.py +++ b/utils/constants.py @@ -1,14 +1,18 @@ import os -output_dir = "output" +config_path = "config" -result_path = os.path.join(output_dir, "result_new.txt") +output_path = "output" -cache_path = os.path.join(output_dir, "cache.pkl") +config_cache_path = os.path.join(config_path, "cache.pkl") -sort_log_path = os.path.join(output_dir, "sort.log") +result_path = os.path.join(output_path, "result_new.txt") -log_path = os.path.join(output_dir, "log.log") +cache_path = os.path.join(output_path, "cache.pkl") + +sort_log_path = os.path.join(output_path, "sort.log") + +log_path = os.path.join(output_path, "log.log") url_pattern = r"((https?):\/\/)?(\[[0-9a-fA-F:]+\]|([\w-]+\.)+[\w-]+)(:[0-9]{1,5})?(\/[^\s]*)?(\$[^\s]+)?" diff --git a/utils/speed.py b/utils/speed.py index 742a1c3831b..409a3f5a67c 100644 --- a/utils/speed.py +++ b/utils/speed.py @@ -248,8 +248,16 @@ def sort_urls_by_speed_and_resolution(name, data, logger=None): """ filter_data = [] for url, date, resolution, origin in data: + result = { + "url": url, + "date": date, + "delay": None, + "speed": None, + "resolution": resolution, + "origin": origin + } if origin == "important": - filter_data.append((url, date, resolution, origin)) + filter_data.append(result) continue cache_key_match = re.search(r"cache:(.*)", url.partition("$")[2]) cache_key = cache_key_match.group(1) if cache_key_match else None @@ -267,16 +275,10 @@ def sort_urls_by_speed_and_resolution(name, data, logger=None): ) except Exception as e: print(e) - filter_data.append( - { - "url": url, - "date": date, - "delay": delay, - "speed": speed, - "resolution": resolution, - "origin": origin - } - ) + result["delay"] = delay + result["speed"] = speed + result["resolution"] = resolution + filter_data.append(result) def combined_key(item): speed, delay, resolution, origin = item["speed"], item["delay"], item["resolution"], item["origin"] diff --git a/utils/tools.py b/utils/tools.py index 0feae9c4daf..785fada204c 100644 --- a/utils/tools.py +++ b/utils/tools.py @@ -1,27 +1,29 @@ -from time import time import datetime -import os -import urllib.parse import ipaddress -import socket -from utils.config import config -import utils.constants as constants +import logging +import os import re -from bs4 import BeautifulSoup -from flask import render_template_string, send_file import shutil -import requests +import socket import sys -import logging +import urllib.parse from logging.handlers import RotatingFileHandler +from time import time + +import requests +from bs4 import BeautifulSoup +from flask import render_template_string, send_file + +import utils.constants as constants +from utils.config import config def get_logger(path, level=logging.ERROR, init=False): """ get the logger """ - if not os.path.exists(constants.output_dir): - os.makedirs(constants.output_dir) + if not os.path.exists(constants.output_path): + os.makedirs(constants.output_path) if init and os.path.exists(path): os.remove(path) handler = RotatingFileHandler(path, encoding="utf-8") @@ -144,8 +146,8 @@ def get_total_urls_from_info_list(infoList, ipv6=False): Get the total urls from info list """ ipv_type_prefer = list(config.ipv_type_prefer) - if "自动" in ipv_type_prefer or "auto" in ipv_type_prefer or not ipv_type_prefer: - ipv_type_prefer = ["ipv6", "ipv4"] if ipv6 else ["ipv4", "ipv6"] + if any(pref in ipv_type_prefer for pref in ["自动", "auto"]) or not ipv_type_prefer: + ipv_type_prefer = ["ipv6", "ipv4"] if (ipv6 or os.environ.get("GITHUB_ACTIONS")) else ["ipv4", "ipv6"] origin_type_prefer = config.origin_type_prefer categorized_urls = { origin: {"ipv4": [], "ipv6": []} for origin in origin_type_prefer @@ -204,8 +206,8 @@ def get_total_urls_from_info_list(infoList, ipv6=False): break if ipv_num[ipv_type] < config.ipv_limit[ipv_type]: limit = min( - config.source_limits[origin] - ipv_num[ipv_type], - config.ipv_limit[ipv_type] - ipv_num[ipv_type], + max(config.source_limits[origin] - ipv_num[ipv_type], 0), + max(config.ipv_limit[ipv_type] - ipv_num[ipv_type], 0), ) urls = categorized_urls[origin][ipv_type][:limit] total_urls.extend(urls) @@ -222,8 +224,8 @@ def get_total_urls_from_info_list(infoList, ipv6=False): if len(total_urls) >= urls_limit: break extra_urls = categorized_urls[origin][ipv_type][ - : config.source_limits[origin] - ] + : config.source_limits[origin] + ] total_urls.extend(extra_urls) total_urls = list(dict.fromkeys(total_urls))[:urls_limit] @@ -283,10 +285,10 @@ def check_url_ipv_type(url): ipv6 = is_ipv6(url) ipv_type = config.ipv_type return ( - (ipv_type == "ipv4" and not ipv6) - or (ipv_type == "ipv6" and ipv6) - or ipv_type == "全部" - or ipv_type == "all" + (ipv_type == "ipv4" and not ipv6) + or (ipv_type == "ipv6" and ipv6) + or ipv_type == "全部" + or ipv_type == "all" ) @@ -382,7 +384,7 @@ def convert_to_m3u(): processed_channel_name = re.sub( r"(CCTV|CETV)-(\d+)(\+.*)?", lambda m: f"{m.group(1)}{m.group(2)}" - + ("+" if m.group(3) else ""), + + ("+" if m.group(3) else ""), original_channel_name, ) m3u_output += f'#EXTINF:-1 tvg-name="{processed_channel_name}" tvg-logo="https://live.fanmingming.com/tv/{processed_channel_name}.png"'