Skip to content

Commit

Permalink
Update models to include faster en
Browse files Browse the repository at this point in the history
* Includes .en models
* Flag update
* Had model double loading (oops) fixed, now models only load 1 time.
  • Loading branch information
cyberofficial committed Aug 8, 2023
1 parent 0477bb2 commit 5b1fa7e
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 16 deletions.
31 changes: 21 additions & 10 deletions modules/parser_args.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,27 +10,38 @@ def valid_port_number(value):
raise argparse.ArgumentTypeError(f"Invalid port number: {value}. Please choose a number between 1 and 65535.")
return port

def set_model_by_ram(ram, language):
def set_model_by_ram(ram, language, target_language):
ram = ram.lower()

if ram == "1gb":
model = "tiny"
if language == "en" or language == "English":
model = "tiny.en"
else:
model = "tiny"
elif ram == "2gb":
model = "base"
if language == "en" or language == "English":
model = "base.en"
else:
model = "base"
elif ram == "4gb":
model = "small"
if language == "en" or language == "English":
model = "small.en"
else:
model = "small"
elif ram == "6gb":
model = "medium"
if language == "en" or language == "English":
model = "medium.en"
else:
model = "medium"
elif ram == "12gb":
model = "large"
if language == "en":
if language == "en" or language == "English":
red_text = Fore.RED + Back.BLACK
green_text = Fore.GREEN + Back.BLACK
yellow_text = Fore.YELLOW + Back.BLACK
reset_text = Style.RESET_ALL
print(f"{red_text}WARNING{reset_text}: {yellow_text}12gb{reset_text} is overkill for English. Do you want to swap to {green_text}6gb{reset_text} model?")
print(f"{red_text}WARNING{reset_text}: {yellow_text}12gb{reset_text} is overkill for English. Do you want to swap to {green_text}6gb{reset_text} model? If you are transcribing a language other than English, you can ignore this warning and press {green_text}n{reset_text}.")
if input("y/n: ").lower() == "y":
model = "medium"
model = "medium.en"
else:
model = "large"
else:
Expand All @@ -45,7 +56,7 @@ def parse_arguments():
parser.add_argument("--non_english", action='store_true', help="Don't use the english model.")
parser.add_argument("--energy_threshold", default=100, help="Energy level for mic to detect.", type=int)
parser.add_argument("--record_timeout", default=1, help="How real time the recording is in seconds.", type=float)
parser.add_argument("--phrase_timeout", default=1, help="How much empty space between recordings before we "
parser.add_argument("--phrase_timeout", default=5, help="How much empty space between recordings before we "
"consider it a new line in the transcription.", type=float)
parser.add_argument("--no_log", action='store_true', help="Only show the last line of the transcription.")
parser.add_argument("--translate", action='store_true', help="Translate the transcriptions to English.")
Expand Down
15 changes: 9 additions & 6 deletions transcribe_audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def is_input_device(device_index):
from modules.version_checker import ScriptCreator, GitHubRepo
contributors(ScriptCreator, GitHubRepo)

model = parser_args.set_model_by_ram(args.ram, args.language)
model = ""

hardmodel = None

Expand Down Expand Up @@ -183,10 +183,10 @@ def is_input_device(device_index):
except AssertionError as e:
print(e)

if args.language == "en" or args.language == "English":
model += ".en"
if model == "large" or model == "large.en":
model = "large"
#if args.language == "en" or args.language == "English":
# model += ".en"
# if model == "large" or model == "large.en":
# model = "large"

if not os.path.exists("models"):
print("Creating models folder...")
Expand Down Expand Up @@ -282,7 +282,10 @@ def is_input_device(device_index):
args.ram = hardmodel


model = parser_args.set_model_by_ram(args.ram, args.language)
if args.target_language:
model = parser_args.set_model_by_ram(args.ram, args.language, args.target_language)
else:
model = parser_args.set_model_by_ram(args.ram, args.language, args.target_language==None)
print(f"Loading model {model}...")

audio_model = whisper.load_model(model, device=device, download_root="models")
Expand Down

0 comments on commit 5b1fa7e

Please sign in to comment.