From b53bdaa3a40688c19c20c3741d77c1414133af1b Mon Sep 17 00:00:00 2001 From: Pedro Pombeiro Date: Sun, 22 Sep 2024 17:08:24 +0200 Subject: [PATCH 1/3] Add support for removing of blank pages --- Dockerfile | 52 +++++++++++++++++++++++------------------- README.md | 35 ++++++++++++++++++++++------ script/remove_blank.sh | 32 ++++++++++++++++++++++++++ script/scanRear.sh | 8 ++++++- 4 files changed, 95 insertions(+), 32 deletions(-) create mode 100755 script/remove_blank.sh diff --git a/Dockerfile b/Dockerfile index d12c0b4..f31ad4c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -7,33 +7,36 @@ RUN apt-get update && apt-get install -y --no-install-recommends apt-utils && ap RUN apt-get -y update && apt-get -y upgrade && apt-get -y clean RUN apt-get -y install \ - sane \ - sane-utils \ - ghostscript \ - netpbm \ - x11-common \ - wget \ - graphicsmagick \ - curl \ - ssh \ - sshpass \ - lighttpd \ - php-cgi \ - php-curl \ - sudo \ - iproute2 \ - jq \ - && apt-get -y clean + sane \ + sane-utils \ + ghostscript \ + netpbm \ + x11-common \ + wget \ + graphicsmagick \ + curl \ + ssh \ + sshpass \ + lighttpd \ + php-cgi \ + php-curl \ + sudo \ + iproute2 \ + jq \ + bc \ + pdftk \ + poppler-utils \ + && apt-get -y clean RUN cd /tmp && \ - wget https://download.brother.com/welcome/dlf105200/brscan4-0.4.11-1.amd64.deb && \ - dpkg -i /tmp/brscan4-0.4.11-1.amd64.deb && \ - rm /tmp/brscan4-0.4.11-1.amd64.deb + wget https://download.brother.com/welcome/dlf105200/brscan4-0.4.11-1.amd64.deb && \ + dpkg -i /tmp/brscan4-0.4.11-1.amd64.deb && \ + rm /tmp/brscan4-0.4.11-1.amd64.deb RUN cd /tmp && \ - wget https://download.brother.com/welcome/dlf006652/brscan-skey-0.3.1-2.amd64.deb && \ - dpkg -i /tmp/brscan-skey-0.3.1-2.amd64.deb && \ - rm /tmp/brscan-skey-0.3.1-2.amd64.deb + wget https://download.brother.com/welcome/dlf006652/brscan-skey-0.3.1-2.amd64.deb && \ + dpkg -i /tmp/brscan-skey-0.3.1-2.amd64.deb && \ + rm /tmp/brscan-skey-0.3.1-2.amd64.deb ADD files/runScanner.sh /opt/brother/runScanner.sh COPY script /opt/brother/scanner/brscan-skey/script @@ -50,6 +53,7 @@ ENV NAME="Scanner" ENV MODEL="MFC-L2700DW" ENV IPADDRESS="192.168.1.123" ENV USERNAME="NAS" +ENV REMOVE_BLANK_THRESHOLD="0.3" #only set these variables in the compose file, if inotify needs to be triggered (e.g., for Synology Drive): ENV SSH_USER="" @@ -67,7 +71,7 @@ ENV TELEGRAM_TOKEN="" ENV TELEGRAM_CHATID="" # Make sure this ends in a slash. -ENV FTP_PATH="/scans/" +ENV FTP_PATH="/scans/" EXPOSE 54925 EXPOSE 54921 diff --git a/README.md b/README.md index 5074e80..5429e22 100644 --- a/README.md +++ b/README.md @@ -1,20 +1,24 @@ # Dockerized Brother Scanner + This is a dockerized scanner setup for Brother scanners. It allows you to run your scan server in a Docker environment and thus also on devices such as a Synology DiskStation. Additionally, some scripts are included that allow you to easily create duplex documents on non-duplex scanners. A configurable web-interface is provided, allowing you to trigger scans from your smartphone or PC. ## Setup + You have two options to set up your container: Preferred and Fallback. The preferred method is more complex but is able to address more situations, whereas the fallback method is much simpler, but might not work in all scenarios. Both are described in the following. ### Preferred + The preferred setup is slightly more complex, but can be applied in a larger number of settings, such as containers running in virtual machines, etc. Here, we require the IP address under which the container is reachable, as it will be contacted by the scanner, when scanning via the shortcut buttons. This may be the IP address of the Docker host, your virtual machine containing the Docker environment, etc. Additionally, we will need to forward the correct ports in Docker. Consider the following docker-compose file as an example for the preferred setup: + ```yaml version: '3' @@ -22,7 +26,7 @@ services: brother-scanner: image: ghcr.io/philippmundhenk/brotherscannerdocker volumes: - - /path/on/host:/scans + - /path/on/host:/scans ports: - 54925:54925/udp # mandatory, for scanner tools - 54921:54921 # mandatory, for scanner tools @@ -42,6 +46,7 @@ Here, the scanner (an MFC-L2700DW), is running on IP 192.168.1.10 and the contai The startup scripts will automatically configure the included Brother tooling, to set up the scanner accordingly. ### Fallback + The fallback setup might be a little more stable, but requires that your container can be bridged to the host network, rather than using Docker NAT. This is not possible in all situations (e.g., Docker on Win/Mac, limited underlying VM configuration, etc.). Consider the following docker-compose file: @@ -53,7 +58,7 @@ services: brother-scanner: image: ghcr.io/philippmundhenk/brotherscannerdocker volumes: - - /path/on/host:/scans + - /path/on/host:/scans environment: - NAME=Scanner - MODEL=MFC-L2700DW @@ -69,6 +74,7 @@ Note, that we do not need to specify the host IP address in this case, as we ass The startup scripts automatically tries to guess the host interface and adjust the Brother driver settings correctly. ### Further Notes + Note that the mounted folder /scans needs to have the correct permissions. By default, the scanner will run with user uid 1000 and gid 1000. You may change this through setting the environment variables UID and GID. @@ -82,6 +88,7 @@ If OCR, FTP, SSH options are specified, these will be executed, as well. There are a number of additional options explained in the following. ## Options + You can configure the tool via environment variables: | Variable | Type | Description | @@ -90,6 +97,7 @@ You can configure the tool via environment variables: | MODEL | mandatory | Model of your scanner (e.g., MFC-L2700DW) | | IPADDRESS | mandatory | IP Address of your scanner | | RESOLUTION | optional | DPI resolution of scan, refer to capabilities of printer on startup | +| REMOVE_BLANK_THRESHOLD | optional | Percentage of content in page until which a page is considered blank. Blank pages are removed if this variable is defined | | FTP_USER | optional | Username of an FTP(S) server to upload the completed scan to (see below) | | FTP_PASSWORD | optional | Username of an FTP(S) server to upload the completed scan to (see below) | | FTP_HOST | optional | Address of an FTP(S) server to upload the completed scan to (see below) | @@ -115,10 +123,11 @@ You can configure the tool via environment variables: | TELEGRAM_TOKEN | optional | If TELEGRAM_TOKEN and TELEGRAM_CHATID are set, then this sends notification | | TELEGRAM_CHATID | optional | If TELEGRAM_TOKEN and TELEGRAM_CHATID are set, then this sends notification | - ### FTPS upload + In addition to the storage in the mounted volume, you can use FTPS (Secure FTP) Upload. To do so, set the following environment variables to your values: + ``` - FTP_USER="scanner" - FTP_PASSWORD="scanner" @@ -129,40 +138,48 @@ To do so, set the following environment variables to your values: This only works with the scripts offered here in folder script/ (see Customize). ### Automatic Synchronization Solutions + Many automatic synchronization solutions, such as Synology CloudStation, are notified -about changes in the filesystem through inotify (see http://man7.org/linux/man-pages/man7/inotify.7.html). +about changes in the filesystem through inotify (see ). As the volume is mounted in Docker, the security mechanisms isolate the host and container filesystem. This means that such systems do not work. To solve this issue, a simple 'sed "" -i' can be performed on the file. The scripts in folder script/ use SSH to execute this command. This generates an inotify event, in turn starting synchronisation. To use this option, set the following variables to your values: + ``` - SSH_USER="admin" - SSH_PASSWORD="admin" - SSH_HOST="localhost" - SSH_PATH="/path/to/scans/folder/" ``` + Of course this requires SSH access to the host. If this is not available, consider the FTPS option. ### OCR + This image is prepared to utilize an OCR service, such as [my TesseractOCRMicroservice](https://github.com/PhilippMundhenk/TesseractOCRMicroservice). This uploads, waits for OCR to complete and downloads the file again. The resulting PDF file is saved in the /scans directory, with the appendix "-ocr" in the filename. To use this option, set the following variables to your values: + ``` - OCR_SERVER=192.168.1.101 - OCR_PORT=8080 - OCR_PATH=ocr.php ``` -This will call the OCR service at https://192.168.1.101:8080/ocr.php. + +This will call the OCR service at . ### Webserver + This image comes with an integrated webserver, allowing you to control the scanning functions also via API or GUI. To activate the webserver, you need to set an according environment variable. By default, the image uses port 80, but you may configure that. Additionally, for the GUI, you can rename and hide individual functions. here is an example of the environment: + ``` - WEBSERVER=true # optional, activates GUI & API - PORT=33355 # optional, sets port for webserver (default: 80) @@ -173,21 +190,24 @@ here is an example of the environment: ``` #### GUI + You can access the GUI under the IP of your container and the set port (or 80 in default case). With the full config example below, the result will look something like this: -![Screenshot of main web interface](doc/gui-main.jpg) +![Screenshot of main web interface](doc/gui-main.jpg) ![Screenshot of file list web interface](doc/gui-filelist.jpg) Note that the interface does not block when pressing a button. Thus, make sure to wait for your scan to complete, before pressing another button. #### API + The GUI uses a minimal "API" at the backend, which you can also use from other tooling (e.g., Home Assistant or a control panel near your printer). To scan, simply call `http://:/scan.php?target=` Also check out the endpoints `list.php`, `download.php`, `active.php`. Maybe one day an OpenAPI Spec will be included. ## Full Docker Compose Example + This docker-compose file can be run with minimal adaptions (environment variables MODEL, IPADDRESS, HOST_IPADDRESS & volume where files are to be stored): ```yaml @@ -231,12 +251,13 @@ services: ``` ## Customize Scan Scripts + As the standard scripts might not working particularly well for your purpose, you may customize them to your needs. You may also add additional scripts, as currently "Scan to Image" and "Scan to OCR" are not being used. Have a look in the folder `script/` in this repository for ideas. These scripts show some examples on how one might use the buttons on the printer. If you change these scripts, make sure to leave the filename as is, as the Brother drivers will call these scripts (or adapt /opt/brother/scanner/brscan-skey/brscan-skey.config). -Each script corresponds to a shortcut button on the scanner. +Each script corresponds to a shortcut button on the scanner. This way you can customize the actions running on your scanner. Hint: These scripts don't necessarily need to do scanning tasks. diff --git a/script/remove_blank.sh b/script/remove_blank.sh new file mode 100755 index 0000000..9a25c1b --- /dev/null +++ b/script/remove_blank.sh @@ -0,0 +1,32 @@ +#!/bin/bash +# remove_blank - git.waldenlabs.net/calvinrw/brother-paperless-workflow +# Heavily based on from Anthony Street's (and other contributors') +# StackExchange answer: https://superuser.com/a/1307895 + +if [ -n "$REMOVE_BLANK_THRESHOLD" ]; then + IN="$1" + FILENAME="$(basename "${IN}")" + FILENAME="${FILENAME%.*}" + SCRIPTNAME="remove_blank.sh" + PAGES="$(pdfinfo "$IN" | grep ^Pages: | tr -dc '0-9')" + echo "$SCRIPTNAME: threshold=$REMOVE_BLANK_THRESHOLD; analyzing $PAGES pages" + + cd "$(dirname "$IN")" || exit + pwd + + function non_blank() { + for i in $(seq 1 "$PAGES"); do + PERCENT=$(gs -o - -dFirstPage="${i}" -dLastPage="${i}" -sDEVICE=ink_cov "$IN" | grep CMYK | nawk 'BEGIN { sum=0; } {sum += $1 + $2 + $3 + $4;} END { printf "%.5f\n", sum } ') + if [ $(echo "$PERCENT > $REMOVE_BLANK_THRESHOLD" | bc) -eq 1 ]; then + echo "$i" + echo "Page $i: keep" 1>&2 + else + echo "Page $i: delete" 1>&2 + fi + done | tee "$FILENAME.tmp" + } + + set +x + pdftk "${IN}" cat $(non_blank) output "${FILENAME}_noblank.pdf" && + mv "${FILENAME}_noblank.pdf" "$IN" +fi diff --git a/script/scanRear.sh b/script/scanRear.sh index 2addbc1..0569739 100755 --- a/script/scanRear.sh +++ b/script/scanRear.sh @@ -14,6 +14,7 @@ fi device="$1" script_dir="/opt/brother/scanner/brscan-skey/script" +remove_blank="${script_dir}/remove_blank.sh" mkdir -p /tmp cd /tmp || exit @@ -78,10 +79,15 @@ fi ( echo "converting to PDF for $date..." - gm convert ${gm_opts[@]} ./*.pnm "$output_pdf_file" + gm convert ${gm_opts[@]} ./*.pnm "$tmp_output_pdf_file" ${script_dir}/trigger_inotify.sh "${SSH_USER}" "${SSH_PASSWORD}" "${SSH_HOST}" "${SSH_PATH}" "${output_pdf_file}" ${script_dir}/trigger_telegram.sh "${date}.pdf (rear) scanned" + $remove_blank "$tmp_output_pdf_file" + mv "$tmp_output_pdf_file" "$output_pdf_file" + + $script_dir/trigger_inotify.sh "${SSH_USER}" "${SSH_PASSWORD}" "${SSH_HOST}" "${SSH_PATH}" "${output_pdf_file}" + echo "cleaning up for $date..." cd /scans || exit rm -rf "$tmp_dir" From e2f1ca60294ce71de028283b7c22b77a6e275162 Mon Sep 17 00:00:00 2001 From: Philipp Mundhenk Date: Sun, 22 Sep 2024 19:53:12 +0200 Subject: [PATCH 2/3] Update scanRear.sh revert to old kill method --- script/scanRear.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/scanRear.sh b/script/scanRear.sh index 0569739..db6329d 100755 --- a/script/scanRear.sh +++ b/script/scanRear.sh @@ -28,7 +28,7 @@ output_pdf_file="/scans/${date}.pdf" cd "$tmp_dir" || exit -pkill -P "$(cat scan_pid)" +kill -9 "$(cat scan_pid)" rm scan_pid function scan_cmd() { From 110fbde6114cb4818b7ed87a0f785434237b52c5 Mon Sep 17 00:00:00 2001 From: Pedro Pombeiro Date: Sun, 22 Sep 2024 20:49:18 +0200 Subject: [PATCH 3/3] Add default value to README.md --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 5429e22..20846a8 100644 --- a/README.md +++ b/README.md @@ -97,7 +97,7 @@ You can configure the tool via environment variables: | MODEL | mandatory | Model of your scanner (e.g., MFC-L2700DW) | | IPADDRESS | mandatory | IP Address of your scanner | | RESOLUTION | optional | DPI resolution of scan, refer to capabilities of printer on startup | -| REMOVE_BLANK_THRESHOLD | optional | Percentage of content in page until which a page is considered blank. Blank pages are removed if this variable is defined | +| REMOVE_BLANK_THRESHOLD | optional | Percentage of content in page until which a page is considered blank. A good default is 0.3. Blank pages are removed if this variable is defined | | FTP_USER | optional | Username of an FTP(S) server to upload the completed scan to (see below) | | FTP_PASSWORD | optional | Username of an FTP(S) server to upload the completed scan to (see below) | | FTP_HOST | optional | Address of an FTP(S) server to upload the completed scan to (see below) |