From 9f3d47e40051d55773383c7594ec2de80838c46a Mon Sep 17 00:00:00 2001 From: Pedro Pombeiro Date: Wed, 25 Sep 2024 00:10:02 +0200 Subject: [PATCH] Convert shell scripts to single Python script --- script/remove_blank.sh | 32 --- script/scanRear.sh | 114 ---------- script/scanner.py | 386 ++++++++++++++++++++++++++++++++++ script/scantoemail-0.2.4-1.py | 16 ++ script/scantoemail-0.2.4-1.sh | 17 +- script/scantoemail.py | 1 + script/scantofile-0.2.4-1.py | 15 ++ script/scantofile-0.2.4-1.sh | 94 +-------- script/scantofile.py | 1 + script/scantoocr-0.2.4-1.py | 15 ++ script/scantoocr-0.2.4-1.sh | 13 +- script/scantoocr.py | 1 + script/sendtoftps.py | 30 +++ script/sendtoftps.sh | 28 --- script/trigger_inotify.py | 34 +++ script/trigger_inotify.sh | 16 -- script/trigger_telegram.py | 39 ++++ script/trigger_telegram.sh | 19 -- 18 files changed, 559 insertions(+), 312 deletions(-) delete mode 100755 script/remove_blank.sh delete mode 100755 script/scanRear.sh create mode 100755 script/scanner.py create mode 100755 script/scantoemail-0.2.4-1.py create mode 120000 script/scantoemail.py create mode 100755 script/scantofile-0.2.4-1.py create mode 120000 script/scantofile.py create mode 100755 script/scantoocr-0.2.4-1.py create mode 120000 script/scantoocr.py create mode 100755 script/sendtoftps.py delete mode 100755 script/sendtoftps.sh create mode 100755 script/trigger_inotify.py delete mode 100755 script/trigger_inotify.sh create mode 100755 script/trigger_telegram.py delete mode 100755 script/trigger_telegram.sh diff --git a/script/remove_blank.sh b/script/remove_blank.sh deleted file mode 100755 index 9a25c1b..0000000 --- a/script/remove_blank.sh +++ /dev/null @@ -1,32 +0,0 @@ -#!/bin/bash -# remove_blank - git.waldenlabs.net/calvinrw/brother-paperless-workflow -# Heavily based on from Anthony Street's (and other contributors') -# StackExchange answer: https://superuser.com/a/1307895 - -if [ -n "$REMOVE_BLANK_THRESHOLD" ]; then - IN="$1" - FILENAME="$(basename "${IN}")" - FILENAME="${FILENAME%.*}" - SCRIPTNAME="remove_blank.sh" - PAGES="$(pdfinfo "$IN" | grep ^Pages: | tr -dc '0-9')" - echo "$SCRIPTNAME: threshold=$REMOVE_BLANK_THRESHOLD; analyzing $PAGES pages" - - cd "$(dirname "$IN")" || exit - pwd - - function non_blank() { - for i in $(seq 1 "$PAGES"); do - PERCENT=$(gs -o - -dFirstPage="${i}" -dLastPage="${i}" -sDEVICE=ink_cov "$IN" | grep CMYK | nawk 'BEGIN { sum=0; } {sum += $1 + $2 + $3 + $4;} END { printf "%.5f\n", sum } ') - if [ $(echo "$PERCENT > $REMOVE_BLANK_THRESHOLD" | bc) -eq 1 ]; then - echo "$i" - echo "Page $i: keep" 1>&2 - else - echo "Page $i: delete" 1>&2 - fi - done | tee "$FILENAME.tmp" - } - - set +x - pdftk "${IN}" cat $(non_blank) output "${FILENAME}_noblank.pdf" && - mv "${FILENAME}_noblank.pdf" "$IN" -fi diff --git a/script/scanRear.sh b/script/scanRear.sh deleted file mode 100755 index d9388fa..0000000 --- a/script/scanRear.sh +++ /dev/null @@ -1,114 +0,0 @@ -#!/bin/bash -# $1 = scanner device -# $2 = friendly name - -#override environment, as brscan is screwing it up: -export $(grep -v '^#' /opt/brother/scanner/env.txt | xargs) - -resolution="${RESOLUTION:-300}" - -gm_opts=(-page A4+0+0) -if [ "$USE_JPEG_COMPRESSION" = "true" ]; then - gm_opts+=(-compress JPEG -quality 80) -fi - -device="$1" -script_dir="/opt/brother/scanner/brscan-skey/script" -remove_blank="${script_dir}/remove_blank.sh" - -set -e # Exit on error - -mkdir -p /tmp -cd /tmp -date=$(ls -rd */ | grep "$(date +"%Y-%m-%d")" | head -1) -date=${date%/} -tmp_dir="/tmp/${date}" -filename_base="${tmp_dir}/${date}-back-page" -tmp_output_file="${filename_base}%04d.pnm" -tmp_output_pdf_file="${tmp_dir}/${date}.pdf" -output_pdf_file="/scans/${date}.pdf" - -cd "$tmp_dir" - -kill -9 "$(cat scan_pid)" -rm scan_pid - -function scan_cmd() { - # `brother4:net1;dev0` device name gets passed to scanimage, which it refuses as an invalid device name for some reason. - # Let's use the default scanner for now - # scanimage -l 0 -t 0 -x 215 -y 297 --device-name="$1" --resolution="$2" --batch="$3" - scanimage -l 0 -t 0 -x 215 -y 297 --format=pnm --resolution="$2" --batch="$3" -} - -if [ "$(which usleep 2>/dev/null)" != '' ]; then - usleep 100000 -else - sleep 0.1 -fi -scan_cmd "$device" "$resolution" "$tmp_output_file" -if [ ! -s "${filename_base}0001.pnm" ]; then - if [ "$(which usleep 2>/dev/null)" != '' ]; then - usleep 1000000 - else - sleep 1 - fi - scan_cmd "$device" "$resolution" "$tmp_output_file" -fi - -( - - #rename pages: - numberOfPages=$(find . -maxdepth 1 -name "*front-page*" | wc -l) - echo "number of pages scanned: $numberOfPages" - - cnt=0 - for filename in *front*.pnm; do - cnt=$((cnt + 1)) - cntFormatted=$(printf "%03d" $cnt) - if [[ $filename = *"front"* ]]; then - mv "$filename" "index${cntFormatted}-1-${filename}" - fi - done - cnt=0 - for filename in *back*.pnm; do - cnt=$((cnt + 1)) - if [[ $filename = *"back"* ]]; then - rearIndex=$((numberOfPages - cnt + 1)) - rearIndexFormatted=$(printf "%03d" $rearIndex) - mv "$filename" "index${rearIndexFormatted}-2-${filename}" - fi - done - - ( - echo "converting to PDF for $date..." - gm convert ${gm_opts[@]} ./*.pnm "$tmp_output_pdf_file" - ${script_dir}/trigger_inotify.sh "${SSH_USER}" "${SSH_PASSWORD}" "${SSH_HOST}" "${SSH_PATH}" "${output_pdf_file}" - ${script_dir}/trigger_telegram.sh "${date}.pdf (rear) scanned" - - $remove_blank "$tmp_output_pdf_file" - mv "$tmp_output_pdf_file" "$output_pdf_file" - - $script_dir/trigger_inotify.sh "${SSH_USER}" "${SSH_PASSWORD}" "${SSH_HOST}" "${SSH_PATH}" "${output_pdf_file}" - - echo "cleaning up for $date..." - cd /scans || exit - rm -rf "$tmp_dir" - - if [ -z "${OCR_SERVER}" ] || [ -z "${OCR_PORT}" ] || [ -z "${OCR_PATH}" ]; then - echo "OCR environment variables not set, skipping OCR." - else - echo "starting OCR for $date..." - ( - curl -F "userfile=@${output_pdf_file}" -H "Expect:" -o "/scans/${date}-ocr.pdf" "${OCR_SERVER}":"${OCR_PORT}"/"${OCR_PATH}" - ${script_dir}/trigger_inotify.sh "${SSH_USER}" "${SSH_PASSWORD}" "${SSH_HOST}" "${SSH_PATH}" "${date}-ocr.pdf" - ${script_dir}/trigger_telegram.sh "${date}-ocr.pdf (rear) OCR finished" - ${script_dir}/sendtoftps.sh \ - "${FTP_USER}" \ - "${FTP_PASSWORD}" \ - "${FTP_HOST}" \ - "${FTP_PATH}" \ - "${output_pdf_file}" - ) & - fi - ) & -) & diff --git a/script/scanner.py b/script/scanner.py new file mode 100755 index 0000000..73a3d24 --- /dev/null +++ b/script/scanner.py @@ -0,0 +1,386 @@ +#!/usr/bin/python3 +# $1 = scanner device +# $2 = friendly name + +import glob +import os +import re +import shutil +import signal +import subprocess +import sys +import tempfile +import time +from datetime import datetime +from typing import List, TextIO + +from sendtoftps import sendtoftps +from trigger_inotify import trigger_inotify +from trigger_telegram import trigger_telegram + +SCRIPT_DIR = "/opt/brother/scanner/brscan-skey/script" +SCAN_DIR = "/scans" + +# +# Utility methods +# +def execute_command(log: TextIO, command: List[str], **kwargs) -> None: + log.flush() + print(f" DEBUG: Executing command: {command}, kwargs={kwargs}") + log.flush() + + subprocess.run(command, text=True, stdout=log, stderr=log, **kwargs) + +def execute_command_pid(log: TextIO, command: List[str], **kwargs) -> int: + log.flush() + print(f" DEBUG: Executing command: {command}, kwargs={kwargs}") + log.flush() + + process = subprocess.Popen(command, start_new_session=True, text=True, stdout=log, stderr=log, **kwargs) + return process.pid + +def scan_cmd(log: TextIO, device: str, output_batch: str, scanimage_args: List[str]) -> None: + log.flush() # Required, otherwise scanimage output will appear before the already printed output + + resolution = os.environ.get("RESOLUTION", 300) + # `brother4:net1;dev0` device name gets passed to scanimage, which it refuses as an invalid device name for some reason. + # Let's use the default scanner for now + scan_command = [ + "scanimage", + "-l", "0", "-t", "0", "-x", "215", "-y", "297", + "--format=pnm", + *scanimage_args, + f"--resolution={resolution}", + f"--batch={output_batch}", + ] + execute_command(log, scan_command, check=True) + +def notify(log: TextIO, file_path: str, message: str) -> None: + trigger_inotify(log, os.getenv("SSH_USER"), os.getenv("SSH_PASSWORD"), os.getenv("SSH_HOST"), os.getenv("SSH_PATH"), + file_path) + trigger_telegram(log, os.getenv("TELEGRAM_TOKEN"), os.getenv("TELEGRAM_CHATID"), f"Scanner: {message}") + +def latest_batch_dir() -> str: + prefix = datetime.today().strftime("%Y-%m-%d") + dir_entries = glob.glob(os.path.join(tempfile.gettempdir(), f"{prefix}*")) + dirs = filter(os.path.isdir, dir_entries) + sorted_dirs = sorted(dirs, key=os.path.getctime) + if len(sorted_dirs) == 0: + return None + return os.path.basename(sorted_dirs[-1]) + +def move_across_mounts(source: str, destination: str) -> None: + """Moves a file across mounts. + + Args: + source (str): The source path. + destination (str): The destination path. + """ + + try: + print(f" DEBUG: Moving {source} to {destination}") + shutil.copy2(source, destination) + os.remove(source) + except Exception as e: + print(f" ERROR: moving file - {e}") + +# +# PDF manipulation methods +# +def remove_blank_pages(log: TextIO, input_file: str, remove_blank_threshold: float) -> None: + """Removes blank pages from a PDF file based on a threshold. + + remove_blank - git.waldenlabs.net/calvinrw/brother-paperless-workflow + Heavily based on from Anthony Street's (and other contributors') + StackExchange answer: https://superuser.com/a/1307895 + + Args: + input_file (str): The path to the input PDF file. + remove_blank_threshold (float): The threshold for ink coverage to consider a page non-blank. + """ + + filename = os.path.splitext(os.path.basename(input_file))[0] + dirname = os.path.dirname(input_file) + + # Get the number of pages in the PDF + process = subprocess.Popen(["pdfinfo", input_file], stdout=subprocess.PIPE, stderr=log) + output, _ = process.communicate() + if process.returncode != 0: + print(f" ERROR: getting number of pages from {input_file}: {output}") + return + pages_line = re.search(r"^Pages:\s*(\d+)", output.decode(), re.MULTILINE) + page_count = int(pages_line.group(1)) + + print(f" Analyzing {page_count} pages in {input_file} with threshold {remove_blank_threshold}%") + os.chdir(dirname) + + def non_blank_pages() -> List[str]: + picked_pages = [] + for page in range(1, page_count + 1): + # Use subprocess to run gs and get ink coverage + process = subprocess.Popen( + ["gs", "-o", "-", "-dFirstPage=" + str(page), "-dLastPage=" + str(page), "-sDEVICE=ink_cov", input_file], + stdout=subprocess.PIPE, stderr=log + ) + output, _ = process.communicate() + ink_coverage_line = re.search(r"^\s*([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)\s+([\d\.]+)\s+CMYK", output.decode(), re.MULTILINE) + ink_coverage = sum(map(float, ink_coverage_line.groups())) + + if ink_coverage < remove_blank_threshold: + print(f" Page {page}: delete (ink coverage: {ink_coverage:.2f}%)") + else: + picked_pages += str(page) + print(f" Page {page}: keep (ink coverage: {ink_coverage:.2f}%)") + + return picked_pages + + # Use pdftk to remove pages + try: + output_file = os.path.join(dirname, f"{filename}_noblank.pdf") + selected_pages = non_blank_pages() + command = ["/usr/bin/pdftk", input_file, "cat", *selected_pages, "output", output_file] + execute_command(log, command, check=True) + + log.flush() + if len(selected_pages) == page_count: + print(f" No blank pages detected in {input_file}") + else: + os.replace(output_file, input_file) + print(f" Removed blank pages and saved as {input_file}") + except FileNotFoundError: + print(f" WARNING: '{command[0]}' executable not found. Skipping PDF manipulation.") + except subprocess.CalledProcessError: + print(f" ERROR: manipulating {input_file}. Skipping PDF manipulation.") + +# +# Async job methods +# +def convert_and_post_process(job_name: str, side: str, remove_blank_threshold: float) -> None: + log = sys.stdout + log.flush() + + print(f" {side} side: converting to PDF for {job_name}...") + + # Find job pages, sorted in the correct order + job_dir = os.path.join(tempfile.gettempdir(), job_name) + tmp_output_pdf_file = os.path.join(job_dir, f"{job_name}.pdf") + output_pdf_file = os.path.join(SCAN_DIR, f"{job_name}.pdf") + if side == 'front': + filepath_base = os.path.join(job_dir, f"{job_name}-{side}-page") + input_files = glob.glob(f"{filepath_base}*.pnm") + else: + input_files = glob.glob(os.path.join(job_dir, "*.pnm")) + input_files.sort() + + # Convert pages to single PDF with optional JPEG compression + gm_opts = [] + USE_JPEG_COMPRESSION = os.environ.get("USE_JPEG_COMPRESSION", "false") + if USE_JPEG_COMPRESSION == "true": + gm_opts += ["-compress", "JPEG", "-quality", "80"] + execute_command(log, ["gm", "convert", *gm_opts, *input_files, tmp_output_pdf_file], check=True) + + if not remove_blank_threshold is None: + remove_blank_pages(log, tmp_output_pdf_file, remove_blank_threshold) + + move_across_mounts(tmp_output_pdf_file, output_pdf_file) + + notify(log, output_pdf_file, f"{job_name}.pdf ({side}) scanned") + + # Cleanup temporary files + print(f" {side} side: cleaning up for {job_name}...") + subprocess.run( + f"rm -rf '{job_dir}' {tempfile.gettempdir()}/brscan_jpeg_*", shell=True, check=True, stdout=log, stderr=log) + + # Check for OCR environment variables + ocr_server = os.getenv("OCR_SERVER") + ocr_port = os.getenv("OCR_PORT") + ocr_path = os.getenv("OCR_PATH") + + if not any([ocr_server, ocr_port, ocr_path]): + print(f" {side} side: OCR environment variables not set, skipping OCR.") + else: + ocr_pdf_name = f"{job_name}-ocr.pdf" + ocr_pdf_path = os.path.join(SCAN_DIR, ocr_pdf_name) + + # Perform OCR in the background + print(f" {side} side: starting OCR for {job_name}...") + execute_command(log, + [ + "curl", "-F", "userfile=@${output_pdf_file}", "-H", "Expect:", "-o", ocr_pdf_path, + f"{ocr_server}:{ocr_port}/{ocr_path}", + ], check=True) + + notify(log, ocr_pdf_name, f"{ocr_pdf_name} ({side}) OCR finished") + sendtoftps( + log, + os.getenv("FTP_USER"), + os.getenv("FTP_PASSWORD"), + os.getenv("FTP_HOST"), + os.getenv("FTP_PATH"), + ocr_pdf_path) + + if os.getenv("REMOVE_ORIGINAL_AFTER_OCR") == "true" and os.path.isfile(ocf_pdf_path): + os.remove(output_pdf_file) + + print(f" {side} side: Conversion and post-processing for finished.") + print("-----------------------------------") + +def wait_for_rear_pages_or_convert(job_name: str) -> None: + # Wait for 2 minutes in case there is a rear side scan + print(f" front side: Waiting for 2 minutes before starting file conversion for {job_name}") + time.sleep(120) + + convert_and_post_process(job_name, 'front', None) + +# +# Reading/writing of temp state files +# + +def scanimage_args_path(job_dir: str) -> str: + # File where the arguments to scanimage are saved across steps in the job + return os.path.join(job_dir, ".scanimage_args") + +def save_scanimage_args(job_dir: str, scanimage_args: List[str]) -> None: + # Save scanimage_args in a file for use with future rear side scans + path = scanimage_args_path(job_dir) + with open(path, "w") as scanimage_args_file: + for arg in scanimage_args: + scanimage_args_file.write(arg + "\n") + +def read_scanimage_args(job_dir: str) -> List[str]: + # Read scanimage_args used for front scanning + path = scanimage_args_path(job_dir) + scanimage_args = [] + try: + with open(path, "r") as scanimage_args_file: + scanimage_args = [line.rstrip() for line in scanimage_args_file] + + os.remove(path) + except FileNotFoundError: + print(f" ERROR: scanimage_args file {path} not found.") + + return scanimage_args + +def scan_pid_path(job_dir: str) -> str: + return os.path.join(job_dir, ".scan_pid") + +def save_front_processing_pid(job_dir: str, pid: int) -> None: + with open(scan_pid_path(job_dir), "w") as pid_file: + pid_file.write(str(pid)) + +def kill_front_processing_from_pid(job_dir: str) -> int: + path = scan_pid_path(job_dir) + pid = None + try: + with open(path, "r") as scan_pid_file: + pid = int(scan_pid_file.read().strip()) + print(f" rear side: Read pid from {path}, killing front processing job {pid}") + os.kill(pid, signal.SIGKILL) + except FileNotFoundError: + print(" rear side: ERROR: scan_pid file {path} not found.") + except ProcessLookupError: + print(" rear side: ERROR: process with pid {pid} not found.") + else: + os.remove(path) + return pid + + return None + +# +# Scan entry points +# +def scan_front(log: TextIO, device: str, scanimage_args = []) -> None: + job_name = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") # Generate unique timestamp + job_dir = os.path.join(tempfile.gettempdir(), job_name) + filepath_base = os.path.join(job_dir, f"{job_name}-front-page") + tmp_output_batch = f"{filepath_base}%04d.pnm" + + # Create temporary directory + os.makedirs(job_dir, exist_ok=True) + os.chdir(job_dir) + print(f"- Scanning front to batch {tmp_output_batch}") + + # Save scanimage_args in a file for use with future rear side scans + save_scanimage_args(job_dir, scanimage_args) + + # Perform scan with retry + time.sleep(0.1) + scan_cmd(log, device, tmp_output_batch, scanimage_args) + if not os.path.exists(f"{filepath_base}0001.pnm"): + time.sleep(1) # Short delay before retry + scan_cmd(log, device, tmp_output_batch, scanimage_args) + + # Run conversion process in the background + pid = os.fork() + if pid == 0: # Child process + wait_for_rear_pages_or_convert(job_name) + os._exit(0) # Exit child process cleanly + elif pid > 0: + save_front_processing_pid(job_dir, pid) + print(f" front side: INFO: Waiting to start conversion process for {job_name} in process with PID {pid}") + else: + print(f" front side: ERROR: Fork failed ({pid}).") + +def scan_rear(log: TextIO, device: str, scanimage_args = None) -> None: + # Find latest directory in temp directory + job_name = latest_batch_dir() + print(f"- Scanning rear to latest batch {job_name}") + if job_name is None: + print(" rear side: ERROR: Could not find front scan directory") + return + + print(f" rear side: Found front-side batch: {job_name}") + job_dir = os.path.join(tempfile.gettempdir(), job_name) + filepath_base = os.path.join(job_dir, f"{job_name}-back-page") + tmp_output_batch = f"{filepath_base}%04d.pnm" + + os.chdir(job_dir) + + # Interrupt front scanning process which is waiting from a rear side scan + if kill_front_processing_from_pid(job_dir) is None: + return + + if scanimage_args is None: + # Read scanimage_args used for front scanning + scanimage_args = read_scanimage_args(job_dir) + + # Perform scan with retry + time.sleep(0.1) + scan_cmd(log, device, tmp_output_batch, scanimage_args) + if not os.path.exists(f"{filepath_base}0001.pnm"): + time.sleep(1) # Short delay before retry + scan_cmd(log, device, tmp_output_batch, scanimage_args) + + # Rename pages + number_of_pages = len( + [f for f in os.listdir(".") if (os.path.isfile(f) and "front-page" in f)] + ) + print(f" rear side: INFO: number of pages scanned: {number_of_pages}") + + cnt = 0 + for filename in glob.glob('*front*.pnm'): + cnt += 1 + cnt_formatted = f"{cnt:03d}" + os.rename(filename, f"index{cnt_formatted}-1-{filename}") + print(f" rear side: DEBUG: renamed {filename} to index{cnt_formatted}-1-{filename}") + + cnt = 0 + for filename in glob.glob('*back*.pnm'): + cnt += 1 + rear_index = number_of_pages - cnt + 1 + rear_index_formatted = f"{rear_index:03d}" + os.rename(filename, f"index{rear_index_formatted}-2-{filename}") + print(f" rear side: DEBUG: renamed {filename} to index{rear_index_formatted}-2-{filename}") + + # Convert to PDF + remove_blank_threshold = os.getenv("REMOVE_BLANK_THRESHOLD") + if not remove_blank_threshold is None: + remove_blank_threshold = float(remove_blank_threshold) + + pid = os.fork() + if pid == 0: # Child process + convert_and_post_process(job_name, 'rear', remove_blank_threshold) + os._exit(0) # Exit child process cleanly + + elif pid < 0: + print(f" rear side: ERROR: Fork failed ({pid}).") diff --git a/script/scantoemail-0.2.4-1.py b/script/scantoemail-0.2.4-1.py new file mode 100755 index 0000000..4231bf7 --- /dev/null +++ b/script/scantoemail-0.2.4-1.py @@ -0,0 +1,16 @@ +#!/usr/bin/python3 +# $1 = scanner device +# $2 = friendly name + +import sys +from scanner import scan_rear + +if __name__ == "__main__": + # Open the log file in append mode + with open('/var/log/scanner.log', 'a') as f: + # Redirect stdout to the log file + sys.stdout = f + + device = sys.argv[1] + scan_rear(f, device) + diff --git a/script/scantoemail-0.2.4-1.sh b/script/scantoemail-0.2.4-1.sh index 2090633..f321950 100755 --- a/script/scantoemail-0.2.4-1.sh +++ b/script/scantoemail-0.2.4-1.sh @@ -2,15 +2,12 @@ # $1 = scanner device # $2 = friendly name -{ - echo "scantoemail.sh triggered" - #override environment, as brscan is screwing it up: - export $(grep -v '^#' /opt/brother/scanner/env.txt | xargs) +SCRIPTPATH="$( + cd "$(dirname "$0")" || exit + pwd -P +)" - SCRIPTPATH="$( - cd "$(dirname "$0")" || exit - pwd -P - )" - /bin/bash "$SCRIPTPATH"/scanRear.sh $@ +#override environment, as brscan is screwing it up: +export $(grep -v '^#' /opt/brother/scanner/env.txt | xargs) -} >>/var/log/scanner.log 2>&1 +"$SCRIPTPATH"/scantoemail.py $@ diff --git a/script/scantoemail.py b/script/scantoemail.py new file mode 120000 index 0000000..672e1fb --- /dev/null +++ b/script/scantoemail.py @@ -0,0 +1 @@ +scantoemail-0.2.4-1.py \ No newline at end of file diff --git a/script/scantofile-0.2.4-1.py b/script/scantofile-0.2.4-1.py new file mode 100755 index 0000000..d2f1cb5 --- /dev/null +++ b/script/scantofile-0.2.4-1.py @@ -0,0 +1,15 @@ +#!/usr/bin/python3 +# $1 = scanner device +# $2 = friendly name + +import sys +from scanner import scan_front + +if __name__ == "__main__": + # Open the log file in append mode + with open('/var/log/scanner.log', 'a') as f: + # Redirect stdout to the log file + sys.stdout = f + + device = sys.argv[1] + scan_front(f, device) diff --git a/script/scantofile-0.2.4-1.sh b/script/scantofile-0.2.4-1.sh index 247287d..691bdb4 100755 --- a/script/scantofile-0.2.4-1.sh +++ b/script/scantofile-0.2.4-1.sh @@ -2,92 +2,12 @@ # $1 = scanner device # $2 = friendly name -{ - #override environment, as brscan is screwing it up: - export $(grep -v '^#' /opt/brother/scanner/env.txt | xargs) +SCRIPTPATH="$( + cd "$(dirname "$0")" || exit + pwd -P +)" - resolution="${RESOLUTION:-300}" +#override environment, as brscan is screwing it up: +export $(grep -v '^#' /opt/brother/scanner/env.txt | xargs) - gm_opts=(-page A4+0+0) - if [ "$USE_JPEG_COMPRESSION" = "true" ]; then - gm_opts+=(-compress JPEG -quality 80) - fi - - device="$1" - date=$(date +%Y-%m-%d-%H-%M-%S) - script_dir="/opt/brother/scanner/brscan-skey/script" - tmp_dir="/tmp/$date" - filename_base="${tmp_dir}/${date}-front-page" - tmp_output_file="${filename_base}%04d.pnm" - output_pdf_file="/scans/${date}.pdf" - - set -e # Exit on error - - mkdir -p "$tmp_dir" - cd "$tmp_dir" - filename_base="/tmp/${date}/${date}-front-page" - output_file="${filename_base}%04d.pnm" - echo "filename: $tmp_output_file" - - function scan_cmd() { - # `brother4:net1;dev0` device name gets passed to scanimage, which it refuses as an invalid device name for some reason. - # Let's use the default scanner for now - # scanimage -l 0 -t 0 -x 215 -y 297 --device-name="$1" --resolution="$2" --batch="$3" - scanimage -l 0 -t 0 -x 215 -y 297 --format=pnm --resolution="$2" --batch="$3" - } - - if [ "$(which usleep 2>/dev/null)" != '' ]; then - usleep 100000 - else - sleep 0.1 - fi - scan_cmd "$device" "$resolution" "$tmp_output_file" - if [ ! -s "${filename_base}0001.pnm" ]; then - if [ "$(which usleep 2>/dev/null)" != '' ]; then - usleep 1000000 - else - sleep 1 - fi - scan_cmd "$device" "$resolution" "$tmp_output_file" - fi - - #only convert when no back pages are being scanned: - ( - if [ "$(which usleep 2>/dev/null)" != '' ]; then - usleep 120000000 - else - sleep 120 - fi - - ( - echo "converting to PDF for $date..." - gm convert ${gm_opts[@]} "$filename_base"*.pnm "$output_pdf_file" - ${script_dir}/trigger_inotify.sh "${SSH_USER}" "${SSH_PASSWORD}" "${SSH_HOST}" "${SSH_PATH}" "${output_pdf_file}" - ${script_dir}/trigger_telegram.sh "${date}.pdf (front) scanned" - - echo "cleaning up for $date..." - cd /scans || exit - rm -rf "$tmp_dir" - - if [ -z "${OCR_SERVER}" ] || [ -z "${OCR_PORT}" ] || [ -z "${OCR_PATH}" ]; then - echo "OCR environment variables not set, skipping OCR." - else - echo "starting OCR for $date..." - ( - curl -F "userfile=@${output_pdf_file}" -H "Expect:" -o "/scans/${date}-ocr.pdf" "${OCR_SERVER}":"${OCR_PORT}"/"${OCR_PATH}" - ${script_dir}/trigger_inotify.sh "${SSH_USER}" "${SSH_PASSWORD}" "${SSH_HOST}" "${SSH_PATH}" "${date}-ocr.pdf" - ${script_dir}/trigger_telegram.sh "${date}-ocr.pdf (front) OCR finished" - ${script_dir}/sendtoftps.sh \ - "${FTP_USER}" \ - "${FTP_PASSWORD}" \ - "${FTP_HOST}" \ - "${FTP_PATH}" \ - "${output_pdf_file}" - ) & - fi - ) & - ) & - echo $! >scan_pid - echo "conversion process for $date is running in PID: $(cat scan_pid)" - -} >>/var/log/scanner.log 2>&1 +"$SCRIPTPATH"/scantofile.py $@ diff --git a/script/scantofile.py b/script/scantofile.py new file mode 120000 index 0000000..64bff2a --- /dev/null +++ b/script/scantofile.py @@ -0,0 +1 @@ +scantofile-0.2.4-1.py \ No newline at end of file diff --git a/script/scantoocr-0.2.4-1.py b/script/scantoocr-0.2.4-1.py new file mode 100755 index 0000000..9ad4514 --- /dev/null +++ b/script/scantoocr-0.2.4-1.py @@ -0,0 +1,15 @@ +#!/usr/bin/python3 +# $1 = scanner device +# $2 = friendly name + +import sys +from scanner import scan_front + +if __name__ == "__main__": + # Open the log file in append mode + with open('/var/log/scanner.log', 'a') as f: + # Redirect stdout to the log file + sys.stdout = f + + device = sys.argv[1] + scan_front(f, device, ["--mode=True Gray"]) diff --git a/script/scantoocr-0.2.4-1.sh b/script/scantoocr-0.2.4-1.sh index fc86ca2..2c32def 100755 --- a/script/scantoocr-0.2.4-1.sh +++ b/script/scantoocr-0.2.4-1.sh @@ -2,11 +2,12 @@ # $1 = scanner device # $2 = friendly name -{ +SCRIPTPATH="$( + cd "$(dirname "$0")" || exit + pwd -P +)" - echo "ERROR!" - echo "This function is not implemented." - echo "You may implement your own script and mount under $0." - echo "Check out scripts in same folder or https://github.com/PhilippMundhenk/BrotherScannerDocker for examples." +#override environment, as brscan is screwing it up: +export $(grep -v '^#' /opt/brother/scanner/env.txt | xargs) -} >>/var/log/scanner.log 2>&1 +"$SCRIPTPATH"/scantoocr.py $@ diff --git a/script/scantoocr.py b/script/scantoocr.py new file mode 120000 index 0000000..ccdc918 --- /dev/null +++ b/script/scantoocr.py @@ -0,0 +1 @@ +scantoocr-0.2.4-1.py \ No newline at end of file diff --git a/script/sendtoftps.py b/script/sendtoftps.py new file mode 100755 index 0000000..0b2d2a4 --- /dev/null +++ b/script/sendtoftps.py @@ -0,0 +1,30 @@ +#!/usr/bin/python3 + +def sendtoftps(log, user, password, address, filepath, file): + """Uploads a file to an FTP server. + + Args: + user (str): The FTP username. + password (str): The FTP password. + address (str): The FTP address. + filepath (str): The file path on the FTP server. + file (str): The file to upload. + """ + + command = [ + "curl", "--silent", "--show-error", "--ssl-reqd", + "--user", f"{user}:{password}", + "--upload-file", file, + f"ftp://{address}{filepath}" + ] + + try: + subprocess.run(command, check=True, stdout=log, stderr=log) + print(f"Uploading to FTP server {address} successful.") + except subprocess.CalledProcessError: + print("Uploading to FTP failed while using curl") + print(f"user: {user}") + print(f"address: {address}") + print(f"filepath: {filepath}") + print(f"file: {file}") + exit(1) diff --git a/script/sendtoftps.sh b/script/sendtoftps.sh deleted file mode 100755 index f2106ec..0000000 --- a/script/sendtoftps.sh +++ /dev/null @@ -1,28 +0,0 @@ -user=$1 -password=$2 -address=$3 -filepath=$4 -file=$5 - -cd /scans - -if [ -z "${user}" ] || [ -z "${password}" ] || [ -z "${address}" ] || [ -z "${filepath}" ] || [ -z "${file}" ]; then - echo "FTP environment variables not set, skipping inotify trigger." -else - if curl --silent \ - --show-error \ - --ssl-reqd \ - --user "${user}:${password}" \ - --upload-file "${file}" \ - "ftp://${address}${filepath}" ; then - echo "Uploading to ftp server ${address} successful." - else - echo "Uploading to ftp failed while using curl" - echo "user: ${user}" - echo "address: ${address}" - echo "filepath: ${filepath}" - echo "file: ${file}" - exit 1 - fi -fi - diff --git a/script/trigger_inotify.py b/script/trigger_inotify.py new file mode 100755 index 0000000..47cdc56 --- /dev/null +++ b/script/trigger_inotify.py @@ -0,0 +1,34 @@ +#!/usr/bin/python3 + +import subprocess +import os +from typing import TextIO + +def trigger_inotify(log: TextIO, user: str, password: str, address: str, filepath: str, file: str) -> None: + """Triggers inotify for a file. + + Args: + user (str): The SSH username. + password (str): The SSH password. + address (str): The SSH address. + filepath (str): The file path. + file (str): The file name. + """ + + if not user or not password or not address or not filepath: + print(" INFO: SSH environment variables not set, skipping inotify trigger.") + return + + command = [ + "sshpass", "-p", password, + "ssh", "-o", "StrictHostKeyChecking=no", + f"{user}@{address}", + f"sed \"\" -i {filepath}/{file}" + ] + + try: + subprocess.run(command, check=True, stdout=log, stderr=log) + print("Trigger inotify successful") + except subprocess.CalledProcessError: + print("Trigger inotify failed") + exit(1) diff --git a/script/trigger_inotify.sh b/script/trigger_inotify.sh deleted file mode 100755 index 4077b22..0000000 --- a/script/trigger_inotify.sh +++ /dev/null @@ -1,16 +0,0 @@ -user=$1 -password=$2 -address=$3 -filepath=$4 -file=$5 - -if [ -z "${user}" ] || [ -z "${password}" ] || [ -z "${address}" ] || [ -z "${filepath}" ]; then - echo "SSH environment variables not set, skipping inotify trigger." -else - if sshpass -p "$password" ssh -o StrictHostKeyChecking=no "$user"@"$address" "sed \"\" -i $filepath/$file"; then - echo "trigger inotify successful" - else - echo "trigger inotify failed" - exit 1 - fi -fi diff --git a/script/trigger_telegram.py b/script/trigger_telegram.py new file mode 100755 index 0000000..8015bcc --- /dev/null +++ b/script/trigger_telegram.py @@ -0,0 +1,39 @@ +#!/usr/bin/python3 + +import os +import json +import urllib.parse +from typing import TextIO + +def trigger_telegram(log: TextIO, token: str, chat_id: str, message: str) -> None: + """Sends a Telegram message using the provided token and chat ID. + """ + + if not token or not chat_id: + print(" INFO: TELEGRAM_TOKEN or TELEGRAM_CHATID environment variables not set, skipping Telegram trigger.") + exit(1) + + # URL encode the message + encoded_message = urllib.parse.quote(message, safe="") + + # Build the URL + url = f"https://api.telegram.org/{token}/sendMessage" + + # Prepare data payload + payload = { + "chat_id": chat_id, + "text": encoded_message + } + + # Use requests library for a more robust solution (install with 'pip install requests') + try: + import requests + response = requests.post(url, json=payload) + response.raise_for_status() # Raise an exception for non-200 response + print(" Telegram message sent successfully.") + except ModuleNotFoundError: + print(" WARNING: 'requests' library not found. Using wget fallback.") + # Fallback using wget (not recommended for production due to limited feedback) + os.system(f"wget -qO- --post-data='chat_id={chat_id}&text={encoded_message}' '{url}' >/dev/null") + except requests.exceptions.RequestException as e: + print(f" ERROR: sending Telegram message: {e}") diff --git a/script/trigger_telegram.sh b/script/trigger_telegram.sh deleted file mode 100755 index f96f6e6..0000000 --- a/script/trigger_telegram.sh +++ /dev/null @@ -1,19 +0,0 @@ -#!/bin/bash - -# Check if TELEGRAM_TOKEN and TELEGRAM_CHATID are both set -if [ -z "${TELEGRAM_TOKEN}" ] || [ -z "${TELEGRAM_CHATID}" ]; then - echo "TELEGRAM_TOKEN or TELEGRAM_CHATID is not set" -else - # Use the environment variables TELEGRAM_TOKEN and TELEGRAM_CHATID - TOKEN="$TELEGRAM_TOKEN" - CHAT_ID="$TELEGRAM_CHATID" - - # The message is passed as a parameter - MESSAGE="Scanner: $1" - - # URL encode the message to handle spaces and special characters - ENCODED_MESSAGE=$(echo "$MESSAGE" | jq -sRr @uri) - - # Send the message using wget - wget -qO- --post-data="chat_id=$CHAT_ID&text=$ENCODED_MESSAGE" "https://api.telegram.org/$TOKEN/sendMessage" >/dev/null -fi