From 51fed4a9443e4cac2e9f2f988380f8202ecb6cd2 Mon Sep 17 00:00:00 2001 From: hawkeye116477 Date: Fri, 8 Sep 2023 00:14:34 +0200 Subject: [PATCH] Update ECODFF to 2.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Rewritten in Python, cuz Pythonization is revelation 馃槉 * Removed code which adds files to git repo * Performance improvements --- Dockerfile_ED | 2 +- scripts/CI/CI_LETS_DO_THIS.sh | 4 +- scripts/DSC.sh | 4 +- scripts/ECODFF.py | 296 ++++++++++++++++++++++++++++++++++ scripts/ECODFF.sh | 278 ------------------------------- scripts/Sd2D.py | 13 +- 6 files changed, 307 insertions(+), 290 deletions(-) create mode 100755 scripts/ECODFF.py delete mode 100755 scripts/ECODFF.sh diff --git a/Dockerfile_ED b/Dockerfile_ED index d0f46327..8b4b2b38 100644 --- a/Dockerfile_ED +++ b/Dockerfile_ED @@ -3,7 +3,7 @@ FROM ubuntu:latest # make Apt non-interactive ENV DEBIAN_FRONTEND=noninteractive -RUN apt-get update && apt-get install -y git openssh-client ca-certificates wget tzdata pcregrep whois host language-pack-pl curl python3-tldextract nodejs +RUN apt-get update && apt-get install -y git openssh-client ca-certificates wget tzdata whois language-pack-pl curl python3-tldextract nodejs python3-dnspython python3-aiohttp # uncomment chosen locale to enable it's generation RUN sed -i -e 's/# en_US.UTF-8 UTF-8/en_US.UTF-8 UTF-8/' /etc/locale.gen diff --git a/scripts/CI/CI_LETS_DO_THIS.sh b/scripts/CI/CI_LETS_DO_THIS.sh index 0135f5a7..f4f02c4d 100755 --- a/scripts/CI/CI_LETS_DO_THIS.sh +++ b/scripts/CI/CI_LETS_DO_THIS.sh @@ -19,7 +19,7 @@ function letsGo() { F_NAME="polish_rss_filters_supp.txt" fi wget -O "$F_NAME" "$i" - ./scripts/ECODFF.sh "$F_NAME" + ./scripts/ECODFF.py "$F_NAME" rm -rf ./"$F_NAME" done } @@ -31,7 +31,7 @@ if [[ $1 == "KAD" ]]; then mkdir -p "$MAIN_PATH"/split/ split --numeric=1 -d -n l/"$numberParts" "$MAIN_PATH"/KAD.txt "$MAIN_PATH"/split/KAD_ elif [[ $1 =~ KAD_ || $1 =~ KADhosts_ ]]; then - ./scripts/ECODFF.sh ./split/"$1" + ./scripts/ECODFF.py ./split/"$1" rm -rf ./"$1" elif [[ $1 == "KADhosts" ]]; then wget -O KADhosts.txt https://raw.githubusercontent.com/FiltersHeroes/KADhosts/master/sections/hostsplus.txt diff --git a/scripts/DSC.sh b/scripts/DSC.sh index bec3c218..b1e42678 100755 --- a/scripts/DSC.sh +++ b/scripts/DSC.sh @@ -67,7 +67,7 @@ check_domain_status() NOW=${EPOCHSECONDS:-$(date +%s)} # Avoid failing whole job on CI/gracefully fail script - if [ "$CI" = "true" ]; + if [ "$CI" = "true" ]; then if [ "$NOW" -ge "$END_TIME" ]; then @@ -75,7 +75,7 @@ check_domain_status() exit 0 fi fi - + # Avoid WHOIS LIMIT EXCEEDED - slowdown our whois client by adding 3 sec sleep 3 # Save the domain since set will trip up the ordering diff --git a/scripts/ECODFF.py b/scripts/ECODFF.py new file mode 100755 index 00000000..1da48291 --- /dev/null +++ b/scripts/ECODFF.py @@ -0,0 +1,296 @@ +#!/usr/bin/env python3 +# coding=utf-8 +# pylint: disable=C0103 +# pylint: disable=missing-module-docstring +# pylint: disable=missing-class-docstring +# pylint: disable=missing-function-docstring +# +# ECODFF - Expiration Check Of Domains From Filterlists +"""MIT License + +Copyright (c) 2023 Filters Heroes + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE.""" + +import os +import sys +import argparse +import re +import subprocess +import shutil +import asyncio +from tempfile import NamedTemporaryFile +import importlib.util +import git +from dns.asyncresolver import resolve +from dns.resolver import NoNameservers, NXDOMAIN, NoAnswer, LifetimeTimeout +import aiohttp + +# Version number +SCRIPT_VERSION = "2.0" + +# Parse arguments +parser = argparse.ArgumentParser() +parser.add_argument('path_to_file', type=str, nargs='+', action='store') +parser.add_argument("-v", "--version", action='version', + version="ECODFF" + ' ' + SCRIPT_VERSION) +args = parser.parse_args() + +pj = os.path.join +pn = os.path.normpath + +script_path = os.path.dirname(os.path.realpath(__file__)) + +git_repo = git.Repo(script_path, search_parent_directories=True) +# Main_path is where the root of the repository is located +main_path = git_repo.git.rev_parse("--show-toplevel") + +temp_path = pj(main_path, "temp") + +os.chdir(main_path) + +DSC = [pj(script_path, "DSC.sh")] + +if "CI_TIME_LIMIT" in os.environ: + DSC += ["-t", os.getenv("CI_TIME_LIMIT")] + +EXPIRED_DIR = pj(main_path, "expired-domains") +if not os.path.isdir(EXPIRED_DIR): + os.mkdir(EXPIRED_DIR) + with open(pj(EXPIRED_DIR, ".keep"), 'w', encoding="utf-8") as fp: + pass + +for path_to_file in args.path_to_file: + FILTERLIST = os.path.splitext(os.path.basename(path_to_file))[0] + EXPIRED_FILE = pj(EXPIRED_DIR, FILTERLIST + "-expired.txt") + UNKNOWN_FILE = pj(EXPIRED_DIR, FILTERLIST + "-unknown.txt") + LIMIT_FILE = pj(EXPIRED_DIR, FILTERLIST + "-unknown_limit.txt") + NO_INTERNET_FILE = pj(EXPIRED_DIR, FILTERLIST + "-unknown_no_internet.txt") + PARKED_FILE = pj(EXPIRED_DIR, FILTERLIST + "-parked.txt") + + if not "NO_RM" in os.environ: + FILES_TO_REMOVE = [EXPIRED_FILE, UNKNOWN_FILE, + LIMIT_FILE, NO_INTERNET_FILE, PARKED_FILE] + for file_to_remove in FILES_TO_REMOVE: + if os.path.isfile(file_to_remove): + os.remove(file_to_remove) + + PAGE_DOUBLE_PIPE_PAT = re.compile(r"^@?@?\|\|([^\/|^|$\*]+\.\w+)") + PAGE_PIPE_PAT = re.compile( + r"(?:\$|\,)(denyallow|domain|from|method|to)\=([^\,\s]+)$") + PAGE_COMMA_PAT = re.compile(r"^([a-z0-9-~][^\/\*\|\@\"\!]*?)(#|\$\$)") + PAGE_HOSTS_PAT = re.compile(r"^.*?\d+\.\d+\.\d+\.\d+ (.*)") + + pages = [] + if os.path.isfile(path_to_file): + with open(path_to_file, "r", encoding="utf-8") as lf: + for line_lf in lf: + if match_2p := PAGE_DOUBLE_PIPE_PAT.match(line_lf): + pages.append(match_2p.group(1)) + if match_p := PAGE_PIPE_PAT.search(line_lf): + pages.extend(match_p.group(2).split("|")) + if match_c := PAGE_COMMA_PAT.match(line_lf): + pages.extend(match_c.group(1).split(",")) + if match_h := PAGE_HOSTS_PAT.match(line_lf): + pages.append(match_h.group(1)) + + WWW_PAT = re.compile(r"^(www[0-9]\.|www\.)") + for i, page in enumerate(pages): + pages[i] = page.replace("~", "") + pages[i] = re.sub(WWW_PAT, "", page, count=1) + + PAGE_PAT = re.compile(r".*(? 0: + DSC_result = subprocess.run( + DSC + ["-f", no_internet_temp_file.name], check=True, capture_output=True) + DSC_decoded_result = DSC_result.stdout.decode() + + os.remove(no_internet_temp_file.name) + + if DSC_decoded_result: + print(DSC_decoded_result) + with open(EXPIRED_FILE, 'a', encoding="utf-8") as e_f, open(LIMIT_FILE, 'a', encoding="utf-8") as l_f, open(NO_INTERNET_FILE, 'w', encoding="utf-8") as no_i_f, open(valid_pages_temp_file.name, "a", encoding="utf-8") as valid_temp_file, open(unknown_pages_temp_file.name, "a", encoding="utf-8") as unknown_temp_file: + for entry in DSC_decoded_result.strip().splitlines(): + splitted_entry = entry.split() + if splitted_entry[1] in EXPIRED_SW: + e_f.write(f"{splitted_entry[0]}\n") + elif splitted_entry[1] == "Limit_exceeded": + l_f.write(f"{splitted_entry[0]}\n") + elif splitted_entry[1] == "Unknown": + unknown_temp_file.write( + f"{splitted_entry[0]}\n".encode()) + elif splitted_entry[1] == "No_internet": + no_i_f.write(splitted_entry[0]) + # We need to know which domains of subdomains are working + elif splitted_entry[1] == "Valid": + valid_temp_file.write( + f"{splitted_entry[0]}\n".encode()) + os.remove(no_internet_temp_file.name) + + if os.path.isfile(valid_pages_temp_file.name) and os.path.isfile(sub_temp_file.name): + valid_domains = [] + with open(valid_pages_temp_file.name, "r", encoding="utf-8") as valid_tmp_file: + for entry in valid_tmp_file: + if entry := entry.strip(): + valid_domains.append(entry) + if valid_domains: + regex_domains = re.compile(f"({'|'.join(valid_domains)})") + + with open(sub_temp_file.name, "r", encoding="utf-8") as sub_tmp_file, open(unknown_pages_temp_file.name, "a", encoding="utf-8") as unknown_temp_file: + for sub_entry in sub_tmp_file: + # If subdomains aren't working, but their domains are working, then include subdomains for additional checking + if regex_domains.search(sub_entry): + if not sub_entry in valid_domains: + unknown_temp_file.write(f"{sub_entry}\n") + os.remove(sub_temp_file.name) + del valid_domains + + unknown_pages = [] + if os.path.isfile(unknown_pages_temp_file.name): + with open(unknown_pages_temp_file.name, "r", encoding="utf-8") as unknown_temp_file: + for unknown_page in set(unknown_temp_file): + if unknown_page := unknown_page.strip(): + unknown_pages.append(unknown_page) + os.remove(unknown_pages_temp_file.name) + + async def get_status_code(session: aiohttp.ClientSession, url: str): + try: + print("Checking the status of domains...") + resp = await session.head("http://"+url, timeout=10) + status_code = resp.status + if status_code == "301": + resp = await session.head("https://"+url, timeout=10) + status_code = resp.status + except aiohttp.ClientConnectorError: + status_code = "000" + result = url + if not "NO_SC" in os.environ: + result += " " + status_code + return result + + async def save_status_code(): + async with aiohttp.ClientSession() as session: + statuses = await asyncio.gather(*[get_status_code(session, url) for url in unknown_pages], return_exceptions=True) + with open(UNKNOWN_FILE, 'w', encoding="utf-8") as u_f: + for status in statuses: + if status != "200": + u_f.write(f"{status}\n") + + if unknown_pages: + asyncio.run(save_status_code()) + + # Sort and remove duplicated domains + for e_file in [EXPIRED_FILE, UNKNOWN_FILE, LIMIT_FILE, NO_INTERNET_FILE, PARKED_FILE]: + if os.path.isfile(e_file): + with open(e_file, "r", encoding="utf-8") as f_f, NamedTemporaryFile(dir=temp_path, delete=False) as f_t: + for line in sorted(set(f_f)): + if line: + f_t.write(f"{line.strip()}\n".encode()) + os.replace(f_t.name, e_file) + + if os.path.exists(temp_path): + shutil.rmtree(temp_path) diff --git a/scripts/ECODFF.sh b/scripts/ECODFF.sh deleted file mode 100755 index af5cc9e4..00000000 --- a/scripts/ECODFF.sh +++ /dev/null @@ -1,278 +0,0 @@ -#!/bin/bash - -# ECODFF - Expiration Check Of Domains From Filterlists -# v1.19.6 - -# MIT License - -# Copyright (c) 2023 Filters Heroes - -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: - -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. - -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -SCRIPT_PATH=$(dirname "$(realpath -s "$0")") - -# MAIN_PATH to miejsce, w kt贸rym znajduje si臋 g艂贸wny katalog repozytorium -# Zak艂adamy, 偶e skrypt znajduje si臋 gdzie艣 w repozytorium git, -# w kt贸rym s膮 pliki listy filtr贸w, kt贸r膮 chcemy zaktualizowa膰. -# Jednak偶e je偶eli skrypt znajduje si臋 gdzie艣 indziej, to -# zezwalamy na nadpisanie zmiennej MAIN_PATH. -if [ -z "$MAIN_PATH" ]; then - MAIN_PATH=$(git -C "$SCRIPT_PATH" rev-parse --show-toplevel) -fi - -cd "$MAIN_PATH" || exit - -DSC=$SCRIPT_PATH/DSC.sh -if [ -n "$CI_TIME_LIMIT" ]; then - DSC=("$SCRIPT_PATH"/DSC.sh -t "$CI_TIME_LIMIT") -fi - -for i in "$@"; do - - pageComma=$(pcregrep -o1 '^([a-z0-9-~][^\/\*\|\@\"\!]*?)(#|\$\$)' "$i") - - pagePipe=$(pcregrep -o3 '(domain)(=)([^,]+)' "$i") - - pageDoublePipe=$(pcregrep -o1 '^@?@?\|\|([^\/|^|$]+\.\w+)' "$i") - - hosts=$(pcregrep -o1 '^.*?0.0.0.0 (.*)' "$i") - - FILTERLIST_FILE=$(basename "$i") - FILTERLIST="${FILTERLIST_FILE%.*}" - TEMPORARY=$MAIN_PATH/${FILTERLIST}.temp - - if [ ! -d "$MAIN_PATH/expired-domains" ]; then - mdkir "$MAIN_PATH"/expired-domains - touch "$MAIN_PATH"/expired-domains/.keep - fi - - if [ -z "$NO_RM" ]; then - rm -rf "$MAIN_PATH"/expired-domains/"$FILTERLIST"-expired.txt - rm -rf "$MAIN_PATH"/expired-domains/"$FILTERLIST"-unknown.txt - rm -rf "$MAIN_PATH"/expired-domains/"$FILTERLIST"-unknown_limit.txt - rm -rf "$MAIN_PATH"/expired-domains/"$FILTERLIST"-unknown_no_internet.txt - rm -rf "$MAIN_PATH"/expired-domains/"$FILTERLIST"-parked.txt - fi - - { - echo "$pageComma" - echo "$pagePipe" - echo "$pageDoublePipe" - echo "$hosts" - } >>"$TEMPORARY" - - sed -i "s/[|]/\n/g" "$TEMPORARY" - sed -i "s/\,/\n/g" "$TEMPORARY" - sed -i "s/\ /\n/g" "$TEMPORARY" - sed -i "s|\~||" "$TEMPORARY" - sed -i '/[/\*]/d' "$TEMPORARY" - sed -ni '/\./p' "$TEMPORARY" - sed -i -r "s/[0-9]?[0-9]?[0-9]\.[0-9]?[0-9]?[0-9]\.[0-9]?[0-9]?[0-9]\.[0-9]?[0-9]?[0-9]//" "$TEMPORARY" - sed -i '/^$/d' "$TEMPORARY" - sort -u -o "$TEMPORARY" "$TEMPORARY" - - while IFS= read -r domain; do - hostname=$(host -t ns "${domain}") - parked=$(echo "${hostname}" | grep -E "parkingcrew.net|parklogic.com|sedoparking.com") - echo "Checking the status of domains..." - if [[ "${hostname}" =~ "NXDOMAIN" ]]; then - echo "$domain" >>"$TEMPORARY".2 - elif [ -n "${parked}" ]; then - echo "$domain" >>"$MAIN_PATH"/expired-domains/"$FILTERLIST"-parked.txt - fi - done <"$TEMPORARY" - - if [ -f "$TEMPORARY.2" ]; then - sed -i "s/^www[0-9]\.//" "$TEMPORARY".2 - sed -i "s/^www\.//" "$TEMPORARY".2 - sort -u -o "$TEMPORARY".2 "$TEMPORARY".2 - - # Kopiujemy adresy zawieraj膮ce subdomeny do osobnego pliku - grep -E '(.+\.)+.+\..+$' "$TEMPORARY".2 >"$TEMPORARY".sub - - # Zamieniamy subdomeny na domeny - Sd2Dresult=$( - python3 <>"$TEMPORARY".3 - sed -i '/^$/d' "$TEMPORARY".3 - sort -u -o "$TEMPORARY".3 "$TEMPORARY".3 - fi - - if [ -f "$TEMPORARY.3" ]; then - "${DSC[@]}" -f "$TEMPORARY".3 | tee "$TEMPORARY".4 - - touch "$MAIN_PATH"/expired-domains/"$FILTERLIST"-unknown.txt - - { - sed '/Expired/!d' "$TEMPORARY".4 | cut -d' ' -f1 - sed '/Book_blocked/!d' "$TEMPORARY".4 | cut -d' ' -f1 - sed '/Suspended/!d' "$TEMPORARY".4 | cut -d' ' -f1 - sed '/Removed/!d' "$TEMPORARY".4 | cut -d' ' -f1 - sed '/Free/!d' "$TEMPORARY".4 | cut -d' ' -f1 - sed '/Redemption_period/!d' "$TEMPORARY".4 | cut -d' ' -f1 - sed '/Suspended_or_reserved/!d' "$TEMPORARY".4 | cut -d' ' -f1 - } >>"$MAIN_PATH"/expired-domains/"$FILTERLIST"-expired.txt - - awk -F' ' '$2=="Unknown"' "$TEMPORARY".4 | cut -d' ' -f1 >>"$TEMPORARY".5 - awk -F' ' '$2=="Limit_exceeded"' "$TEMPORARY".4 | cut -d' ' -f1 >>"$MAIN_PATH"/expired-domains/"$FILTERLIST"-unknown_limit.txt - awk -F' ' '$2=="No_internet"' "$TEMPORARY".4 | cut -d' ' -f1 >>"$TEMPORARY".6 - - # Musimy wiedzie膰, kt贸re domeny subdomen s膮 ok - sed '/Valid/!d' "$TEMPORARY".4 | cut -d' ' -f1 >>"$TEMPORARY".d - fi - - if [ -f "$TEMPORARY.5" ]; then - while IFS= read -r domain; do - status_code=$(curl -o /dev/null --silent --head --write-out '%{http_code}\n' "$domain") - echo "Checking the status of domains..." - if [ "$status_code" -ne "200" ] && [ ! "$NO_SC" ]; then - echo "$domain $status_code" >>"$MAIN_PATH"/expired-domains/"$FILTERLIST"-unknown.txt - elif [ "$status_code" -ne "200" ] && [ "$NO_SC" = "true" ]; then - echo "$domain" >>"$MAIN_PATH"/expired-domains/"$FILTERLIST"-unknown.txt - fi - done <"$TEMPORARY".5 - fi - - if [ -f "$TEMPORARY.6" ]; then - "${DSC[@]}" -f "$TEMPORARY".6 | tee "$TEMPORARY".7 - - { - sed '/Expired/!d' "$TEMPORARY".7 | cut -d' ' -f1 - sed '/Book_blocked/!d' "$TEMPORARY".7 | cut -d' ' -f1 - sed '/Suspended/!d' "$TEMPORARY".7 | cut -d' ' -f1 - sed '/Removed/!d' "$TEMPORARY".7 | cut -d' ' -f1 - sed '/Free/!d' "$TEMPORARY".7 | cut -d' ' -f1 - sed '/Redemption_period/!d' "$TEMPORARY".7 | cut -d' ' -f1 - sed '/Suspended_or_reserved/!d' "$TEMPORARY".7 | cut -d' ' -f1 - } >>"$MAIN_PATH"/expired-domains/"$FILTERLIST"-expired.txt - - awk -F' ' '$2=="Unknown"' "$TEMPORARY".7 | cut -d' ' -f1 >>"$TEMPORARY".8 - awk -F' ' '$2=="Limit_exceeded"' "$TEMPORARY".7 | cut -d' ' -f1 >>"$MAIN_PATH"/expired-domains/"$FILTERLIST"-unknown_limit.txt - awk -F' ' '$2=="No_internet"' "$TEMPORARY".7 | cut -d' ' -f1 >>"$MAIN_PATH"/expired-domains/"$FILTERLIST"-unknown_no_internet.txt - - # Musimy wiedzie膰, kt贸re domeny subdomen s膮 ok - sed '/Valid/!d' "$TEMPORARY".7 | cut -d' ' -f1 >>"$TEMPORARY".d - fi - - if [ -f "$TEMPORARY.d" ]; then - while IFS= read -r domain; do - # Je偶eli subdomeny pad艂y, ale ich domeny dzia艂aj膮, to subdomeny trafiaj膮 do kolejnego pliku tymczasowego - if grep -q "$domain" "$TEMPORARY.sub"; then - subdomain=$(grep "$domain" "$TEMPORARY.sub") - if [ "$domain" != "$subdomain" ]; then - echo "$subdomain" >>"$TEMPORARY".8 - fi - fi - done <"$TEMPORARY".d - - rm -rf "$TEMPORARY.d" - fi - - if [ -f "$TEMPORARY.sub" ]; then - rm -rf "$TEMPORARY.sub" - fi - - if [ -f "$TEMPORARY.8" ]; then - while IFS= read -r domain; do - status_code=$(curl -o /dev/null --silent --head --write-out '%{http_code}\n' "$domain") - echo "Checking the status of domains..." - if [ "$status_code" -ne "200" ] && [ ! "$NO_SC" ]; then - echo "$domain $status_code" >>"$MAIN_PATH"/expired-domains/"$FILTERLIST"-unknown.txt - elif [ "$status_code" -ne "200" ] && [ "$NO_SC" = "true" ]; then - echo "$domain" >>"$MAIN_PATH"/expired-domains/"$FILTERLIST"-unknown.txt - fi - done <"$TEMPORARY".8 - fi - - if [ -f "$MAIN_PATH"/expired-domains/"$FILTERLIST"-expired.txt ]; then - sort -u -o "$MAIN_PATH"/expired-domains/"$FILTERLIST"-expired.txt "$MAIN_PATH"/expired-domains/"$FILTERLIST"-expired.txt - fi - if [ -f "$MAIN_PATH"/expired-domains/"$FILTERLIST"-unknown.txt ]; then - sort -u -o "$MAIN_PATH"/expired-domains/"$FILTERLIST"-unknown.txt "$MAIN_PATH"/expired-domains/"$FILTERLIST"-unknown.txt - fi - if [ -f "$MAIN_PATH"/expired-domains/"$FILTERLIST"-unknown_limit.txt ]; then - sort -u -o "$MAIN_PATH"/expired-domains/"$FILTERLIST"-unknown_limit.txt "$MAIN_PATH"/expired-domains/"$FILTERLIST"-unknown_limit.txt - fi - if [ -f "$MAIN_PATH"/expired-domains/"$FILTERLIST"-unknown_no_internet.txt ]; then - sort -u -o "$MAIN_PATH"/expired-domains/"$FILTERLIST"-unknown_no_internet.txt "$MAIN_PATH"/expired-domains/"$FILTERLIST"-unknown_no_internet.txt - fi - - rm -rf "$TEMPORARY".* - rm -rf "$TEMPORARY" - -done - -# Lokalizacja pliku konfiguracyjnego -CONFIG=$SCRIPT_PATH/ECODFF.config -if [ -f "$CONFIG" ]; then - COMMIT_MODE=$(grep -oP -m 1 '@commit_mode true' "$CONFIG") - commit_message=$(grep -oP -m 1 '@commit \K.*' "$CONFIG") -fi - -if [ "$COMMIT_MODE" ] && [ -n "$(git status --porcelain)" ]; then - cd ./expired-domains || exit - for file in *.txt; do if [[ ! -s $file ]]; then rm -r "$file"; fi; done - cd "$MAIN_PATH" || exit - if [ "$CI" = "true" ]; then - CI_USERNAME=$(grep -oP -m 1 '@CIusername \K.*' "$CONFIG") - CI_EMAIL=$(grep -oP -m 1 '@CIemail \K.*' "$CONFIG") - git config --global user.name "${CI_USERNAME}" - git config --global user.email "${CI_EMAIL}" - fi - git add "$MAIN_PATH"/expired-domains/ - if [ "$commit_message" ] && [ ! "$CI" ]; then - git commit -m "$commit_message" - elif [ ! "$commit_message" ] && [ ! "$CI" ]; then - git commit -m "Expired domains check" - elif [ ! "$commit_message" ] && [ "$CI" ]; then - git commit -m "Expired domains check [ci skip]" - elif [ "$commit_message" ] && [ "$CI" ]; then - git commit -m "$commit_message [ci skip]" - fi - commited=$(git cherry -v) - if [ "$commited" ]; then - if [ "$CI" = "true" ]; then - GIT_SLUG=$(git ls-remote --get-url | sed "s|https://||g" | sed "s|git@||g" | sed "s|:|/|g") - git push https://"${CI_USERNAME}":"${GIT_TOKEN}"@"${GIT_SLUG}" HEAD:master >/dev/null 2>&1 - else - printf "%s\n" "Do you want to send changed files to git now?" - select yn in "Yes" "No"; do - case $yn in - Yes) - git push - break - ;; - No) break ;; - esac - done - fi - fi -fi diff --git a/scripts/Sd2D.py b/scripts/Sd2D.py index 98f75695..c6d79b3d 100755 --- a/scripts/Sd2D.py +++ b/scripts/Sd2D.py @@ -31,7 +31,7 @@ import tldextract -def main(subdomains_file): +def main(subdomains): domains = [] home = str(Path.home()) cachePath = os.path.join(home, '.cache') @@ -42,10 +42,9 @@ def main(subdomains_file): extract = tldextract.TLDExtract(include_psl_private_domains=True) extract.update() - with open(subdomains_file, "r", encoding='utf-8') as subdomains: - for line in subdomains: - if extract(line).registered_domain != "": - domains.append(extract(line).registered_domain) - else: - domains.append(line.strip()) + for line in subdomains: + if extract(line).registered_domain != "": + domains.append(extract(line).registered_domain) + else: + domains.append(line.strip()) return domains