added option to add extra tesseract config

- config list available at https://muthu.co/all-tesseract-ocr-options - bump version to 2.1.1
Dadangdut33 · Feb 8, 2023 · dc86b14 · dc86b14
1 parent bf84833
commit dc86b14
Show file tree

Hide file tree

Showing 9 changed files with 52 additions and 21 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,10 +1,18 @@
-test/
+# runtime project files
 *.pyc
 build/
 dist/
 captured/
 user/
 log/
-.vscode/
+test/
+
+# virtual environment
 venv/
+
+# user
+.vscode/
+
+# created when running build_pyinstaller
+LICENSE.txt 
 *.spec
diff --git a/installer_example.iss b/installer_example.iss
@@ -2,7 +2,7 @@
 ; SEE THE DOCUMENTATION FOR DETAILS ON CREATING INNO SETUP SCRIPT FILES!
 
 #define MyAppName "Screen Translate"
-#define MyAppVersion "2.1.0"
+#define MyAppVersion "2.1.1"
 #define MyAppPublisher "Dadangdut33"
 #define MyAppURL "https://github.com/Dadangdut33/Screen-Translate"
 #define MyAppExeName "ScreenTranslate.exe"

diff --git a/screen_translate/_version.py b/screen_translate/_version.py
@@ -1,2 +1,2 @@
-__version__ = "2.1.0"
+__version__ = "2.1.1"
 __setting_version__ = "2.0.0" # update only if significant setting field changes
diff --git a/screen_translate/components/window/Settings.py b/screen_translate/components/window/Settings.py
@@ -10,7 +10,7 @@
 
 from screen_translate.Globals import gClass, path_logo_icon, dir_captured, fJson, app_name, reg_key_name
 from screen_translate.Logging import logger, current_log, dir_log
-from screen_translate.utils.Helper import nativeNotify, startFile, tb_copy_only
+from screen_translate.utils.Helper import nativeNotify, startFile, tb_copy_only, OpenUrl
 from screen_translate.utils.Monitor import get_offset, getScreenTotalGeometry
 from screen_translate.utils.AutoStart import set_autostart_registry, check_autostart_registry
 from screen_translate.utils.Capture import seeFullWindow
@@ -275,22 +275,33 @@ def __init__(self, master: tk.Tk):
         self.lf_OCR_setting = tk.LabelFrame(self.f_cat_2_ocr, text="• Tesseract OCR Settings")
         self.lf_OCR_setting.pack(side=tk.TOP, fill=tk.X, expand=True, padx=5, pady=(0, 5))
 
-        self.f_OCR_setting = ttk.Frame(self.lf_OCR_setting)
-        self.f_OCR_setting.pack(side=tk.TOP, fill=tk.X, expand=True)
+        self.f_OCR_setting_1 = ttk.Frame(self.lf_OCR_setting)
+        self.f_OCR_setting_1.pack(side=tk.TOP, fill=tk.X, expand=True)
 
-        self.lbl_OCR_tesseract_path = ttk.Label(self.f_OCR_setting, text="Tesseract Path")
+        self.f_OCR_setting_2 = ttk.Frame(self.lf_OCR_setting)
+        self.f_OCR_setting_2.pack(side=tk.TOP, fill=tk.X, expand=True)
+
+        self.lbl_OCR_tesseract_path = ttk.Label(self.f_OCR_setting_1, text="Tesseract Path")
         self.lbl_OCR_tesseract_path.pack(side=tk.LEFT, padx=5, pady=5)
-        CreateToolTip(self.f_OCR_setting, "Tesseract.exe location")
+        CreateToolTip(self.f_OCR_setting_1, "Tesseract.exe location")
 
-        self.entry_OCR_tesseract_path = ttk.Entry(self.f_OCR_setting, width=70)
+        self.entry_OCR_tesseract_path = ttk.Entry(self.f_OCR_setting_1, width=70)
         self.entry_OCR_tesseract_path.bind("<Key>", lambda event: tb_copy_only(event))  # Disable textbox input
         self.entry_OCR_tesseract_path.pack(side=tk.LEFT, padx=5, pady=5, fill=tk.X, expand=True)
         CreateToolTip(self.entry_OCR_tesseract_path, "Tesseract.exe location")
 
-        self.btnSearchTesseract = ttk.Button(self.f_OCR_setting, text="...", command=self.searchTesseract)
+        self.btnSearchTesseract = ttk.Button(self.f_OCR_setting_1, text="...", command=self.searchTesseract)
         self.btnSearchTesseract.pack(side=tk.LEFT, padx=5, pady=5)
 
-        # [Ocr enhancement]
+        self.lbl_extra_config = ttk.Label(self.f_OCR_setting_2, text="Extra Config")
+        self.lbl_extra_config.pack(side=tk.LEFT, padx=5, pady=5)
+        CreateToolTip(self.lbl_extra_config, "Extra config for Tesseract.\n\nClick here to see available options")
+        self.lbl_extra_config.bind("<Button-1>", lambda event: OpenUrl("https://muthu.co/all-tesseract-ocr-options/"))
+
+        self.entry_OCR_config = ttk.Entry(self.f_OCR_setting_2, width=70)
+        self.entry_OCR_config.pack(side=tk.LEFT, padx=5, pady=5, fill=tk.X, expand=True)
+        CreateToolTip(self.entry_OCR_config, "Extra config for Tesseract. Click on the label to see the available config.\n\nExample input: --psm 5 --oem 1")
+
         self.lf_OCR_enhancement = tk.LabelFrame(self.f_cat_2_ocr, text="• OCR Enhancement", width=900, height=75)
         self.lf_OCR_enhancement.pack(side=tk.TOP, fill=tk.X, expand=False, padx=5, pady=5)
 
@@ -991,6 +1002,9 @@ def init_setting(self):
         self.entry_OCR_tesseract_path.delete(0, tk.END)
         self.entry_OCR_tesseract_path.insert(0, fJson.settingCache["tesseract_loc"])
 
+        self.entry_OCR_config.delete(0, tk.END)
+        self.entry_OCR_config.insert(0, fJson.settingCache["tesseract_config"])
+
         self.cb_OCR_bg.set(fJson.settingCache["enhance_background"])
         self.cbtnInvoker(fJson.settingCache["enhance_with_cv2_Contour"], self.cbtn_OCR_cv2contour)
         self.cbtnInvoker(fJson.settingCache["enhance_with_grayscale"], self.cbtn_OCR_grayscale)
@@ -1134,6 +1148,7 @@ def saveSettings(self):
             # ------------------ #
             # Capture
             "tesseract_loc": tesseractPathInput,
+            "tesseract_config": self.entry_OCR_config.get(),
             "replaceNewLine": self.cbtn_OCR_replace_newline.instate(["selected"]),
             "replaceNewLineWith": self.entry_OCR_replace_newline_with.get(),
             "captureLastValDelete": self.sb_OCR_delete_lastchar.get(),

diff --git a/screen_translate/utils/Capture.py b/screen_translate/utils/Capture.py
@@ -62,6 +62,7 @@ def ocrFromCoords(coords: List[int]):
 
         # Set variables
         pytesseract.pytesseract.tesseract_cmd = fJson.settingCache["tesseract_loc"]
+        config = fJson.settingCache["tesseract_config"] if fJson.settingCache["tesseract_config"] else ""
         enhance_withCv2 = fJson.settingCache["enhance_with_cv2_Contour"]
         grayscale = fJson.settingCache["enhance_with_grayscale"]
         debugmode = fJson.settingCache["enhance_debugmode"]
@@ -130,7 +131,7 @@ def ocrFromCoords(coords: List[int]):
                 cropped = imgFinal[y : y + h, x : x + w]
 
                 # Apply OCR on the cropped image
-                text = pytesseract.image_to_string(cropped, langCode)
+                text = pytesseract.image_to_string(cropped, langCode, config=config)
 
                 # Append the text into wordsarr
                 result += text.strip() + "\n"
@@ -147,9 +148,9 @@ def ocrFromCoords(coords: List[int]):
                 if debugmode:
                     cv2.imshow("Grayscale Image", grayImg)
 
-                result = pytesseract.image_to_string(grayImg, langCode)
+                result = pytesseract.image_to_string(grayImg, langCode, config=config)
             else:  # no enhancement
-                result = pytesseract.image_to_string(captured, langCode)
+                result = pytesseract.image_to_string(captured, langCode, config=config)
 
             if saveImg:
                 createPicDirIfGone()
@@ -166,6 +167,9 @@ def ocrFromCoords(coords: List[int]):
 
         if not fJson.settingCache["supress_no_text_alert"] and len(result) == 0:
             Mbox("No text detected", "No text detected in the image. Please try again.", 1)
+
+        if debugmode:
+            cv2.waitKey(0)
     except Exception as e:
         logger.exception(e)
         result = str(e)

diff --git a/screen_translate/utils/Helper.py b/screen_translate/utils/Helper.py
@@ -19,16 +19,16 @@ def startFile(filename: str):
         os.startfile(filename)
     except FileNotFoundError:
         logger.exception("Cannot find the file specified.")
-        nativeNotify("Error", "Cannot find the file specified.", "", "Speech Translate")
+        nativeNotify("Error", "Cannot find the file specified.", "", "Screen Translate")
     except Exception:
         try:
             subprocess.Popen(["xdg-open", filename])
         except FileNotFoundError:
             logger.exception("Cannot open the file specified.")
-            nativeNotify("Error", "Cannot find the file specified.", "", "Speech Translate")
+            nativeNotify("Error", "Cannot find the file specified.", "", "Screen Translate")
         except Exception as e:
             logger.exception(e)
-            nativeNotify("Error", f"Uncaught error {str(e)}", "", "Speech Translate")
+            nativeNotify("Error", f"Uncaught error {str(e)}", "", "Screen Translate")
 
 
 def OpenUrl(url: str):
@@ -39,7 +39,7 @@ def OpenUrl(url: str):
         webbrowser.open_new(url)
     except Exception as e:
         logger.exception(e)
-        nativeNotify("Error", "Cannot open the url specified.", "", "Speech Translate")
+        nativeNotify("Error", "Cannot open the url specified.", "", "Screen Translate")
 
 
 def nativeNotify(title: str, message: str, logo: str, app_name: str):

diff --git a/screen_translate/utils/Json.py b/screen_translate/utils/Json.py
@@ -46,6 +46,7 @@
     # ------------------ #
     # Capture
     "tesseract_loc": "C:/Program Files/Tesseract-OCR/tesseract.exe",
+    "tesseract_config": "",
     "replaceNewLine": True,
     "replaceNewLineWith": " ",
     "captureLastValDelete": 0,

diff --git a/user_manual/Changelog.txt b/user_manual/Changelog.txt
@@ -1,5 +1,8 @@
 # Changelog
-Latest V2.1.0
+Latest V2.1.1
+
+[V2.1.1 Add options for tesseract config]
+- added option to add extra tesseract config (config list available at https://muthu.co/all-tesseract-ocr-options)
 
 [V2.1.0 UI improvement - Dark theme]
 - added sun valley theme with dark / light mode support

diff --git a/version.txt b/version.txt
@@ -1 +1 @@
-2.1.0
+2.1.1