Merge pull request #97 from ROBERT-MCDOWELL/v2.0

updated custom_model now managed by session, fixed various bugs
DrewThomasson · Dec 21, 2024 · 82793bd · 82793bd
2 parents b8c8200 + bc38df7
commit 82793bd
Show file tree

Hide file tree

Showing 3 changed files with 169 additions and 91 deletions.
diff --git a/app.py b/app.py
@@ -95,11 +95,8 @@ def main():
     options = [
         '--script_mode', '--share', '--headless', 
         '--session', '--ebook', '--ebooks_dir',
-        '--voice', '--language', '--device', 
-        #'--custom_model',
-        #'--custom_model_url',
-        '--temperature',
-        '--length_penalty', '--repetition_penalty', 
+        '--voice', '--language', '--device', '--custom_model',
+        '--temperature', '--length_penalty', '--repetition_penalty', 
         '--top_k', '--top_p', '--speed',
         '--enable_text_splitting', '--fine_tuned',
         '--version', '--help'
@@ -122,32 +119,25 @@ def main():
                         help=f'Language for the audiobook conversion. Options: {lang_list_str}. Default to English (eng).')
     parser.add_argument(options[8], type=str, default='cpu', choices=['cpu', 'gpu'],
                         help=f'Type of processor unit for the audiobook conversion. If not specified: check first if gpu available, if not cpu is selected.')
-    """
     parser.add_argument(options[9], type=str,
-                        help='Path to the custom model file (.pth). Required if using a custom model.')
-    parser.add_argument(options[10], type=str,
-                        help=("URL to download the custom model as a zip file. Optional, but will be used if provided. "
-                              "Examples include David Attenborough's model: "
-                              "'https://huggingface.co/drewThomasson/xtts_David_Attenborough_fine_tune/resolve/main/Finished_model_files.zip?download=true'. "
-                              "More XTTS fine-tunes can be found on my Hugging Face at 'https://huggingface.co/drewThomasson'."))
-    """
-    parser.add_argument(options[9], type=float, default=0.65,
+                        help=f'Path to the custom model (.zip file containing {default_model_files}). Required if using a custom model.')
+    parser.add_argument(options[10], type=float, default=0.65,
                         help='Temperature for the model. Default to 0.65. Higher temperatures lead to more creative outputs.')
-    parser.add_argument(options[10], type=float, default=1.0,
+    parser.add_argument(options[11], type=float, default=1.0,
                         help='A length penalty applied to the autoregressive decoder. Default to 1.0. Not applied to custom models.')
-    parser.add_argument(options[11], type=float, default=2.5,
+    parser.add_argument(options[12], type=float, default=2.5,
                         help='A penalty that prevents the autoregressive decoder from repeating itself. Default to 2.5')
-    parser.add_argument(options[12], type=int, default=50,
+    parser.add_argument(options[13], type=int, default=50,
                         help='Top-k sampling. Lower values mean more likely outputs and increased audio generation speed. Default to 50')
-    parser.add_argument(options[13], type=float, default=0.8,
+    parser.add_argument(options[14], type=float, default=0.8,
                         help='Top-p sampling. Lower values mean more likely outputs and increased audio generation speed. Default to 0.8')
-    parser.add_argument(options[14], type=float, default=1.0,
+    parser.add_argument(options[15], type=float, default=1.0,
                         help='Speed factor for the speech generation. Default to 1.0')
-    parser.add_argument(options[15], type=str, default=default_fine_tuned,
+    parser.add_argument(options[16], type=str, default=default_fine_tuned,
                         help='Name of the fine tuned model. Optional, uses the standard model according to the TTS engine and language.')
-    parser.add_argument(options[16], action='store_true',
+    parser.add_argument(options[17], action='store_true',
                         help='Enable splitting text into sentences. Default to False.')
-    parser.add_argument(options[17], action='version',version=f'ebook2audiobook version {version}',
+    parser.add_argument(options[18], action='version',version=f'ebook2audiobook version {version}',
                         help='Show the version of the script and exit')
 
     for arg in sys.argv:

diff --git a/lib/conf.py b/lib/conf.py
@@ -12,20 +12,23 @@
 requirements_file = os.path.abspath(os.path.join('.','requirements.txt'))
 
 docker_utils_image = 'utils'
+
+interface_host = '0.0.0.0'
 interface_port = 7860
 interface_shared_expire = 72 # hours
 interface_concurrency_limit = 8 # or None for unlimited
 interface_component_options = {
     "gr_tab_preferences": True,
     "gr_voice_file": True,
-    "gr_custom_model_file": True,
-    "gr_custom_model_url": True
+    "gr_group_custom_model": True
 }
 
 python_env_dir = os.path.abspath(os.path.join('.','python_env'))
+
 models_dir = os.path.abspath(os.path.join('.','models'))
 ebooks_dir = os.path.abspath(os.path.join('.','ebooks'))
 processes_dir = os.path.abspath(os.path.join('.','tmp'))
+
 audiobooks_gradio_dir = os.path.abspath(os.path.join('.','audiobooks','gui','gradio'))
 audiobooks_host_dir = os.path.abspath(os.path.join('.','audiobooks','gui','host'))
 audiobooks_cli_dir = os.path.abspath(os.path.join('.','audiobooks','cli'))
@@ -52,6 +55,7 @@
 
 default_tts_engine = 'xtts'
 default_fine_tuned = 'std'
+default_model_files = ['config.json', 'vocab.json', 'model.pth', 'ref.wav']
 
 models = {
     "xtts": {