From 30e989b2eeae2592c5e28f8fa71140c703724fc6 Mon Sep 17 00:00:00 2001 From: Josh XT Date: Sat, 27 Jan 2024 12:21:23 -0500 Subject: [PATCH 01/12] Add context manager --- app.py | 37 ++++++++++++++------- local_llm/LLM.py | 61 ++++++++++++++++++++-------------- tests/tests.ipynb | 83 +++++++++++++++++++++++++++-------------------- 3 files changed, 109 insertions(+), 72 deletions(-) diff --git a/app.py b/app.py index 78f28d0..9505a3f 100644 --- a/app.py +++ b/app.py @@ -8,11 +8,35 @@ from local_llm.CTTS import CTTS import os from dotenv import load_dotenv +from contextlib import asynccontextmanager load_dotenv() +DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "phi-2-dpo") +WHISPER_MODEL = os.getenv("WHISPER_MODEL", "base.en") + +CURRENT_MODEL = DEFAULT_MODEL if DEFAULT_MODEL else "phi-2-dpo" +CURRENT_STT_MODEL = WHISPER_MODEL if WHISPER_MODEL else "base.en" +LOADED_LLM = None +LOADED_STT = None +LOADED_CTTS = None + + +@asynccontextmanager +async def lifespan(app: FastAPI): + global CURRENT_MODEL + global CURRENT_STT_MODEL + global LOADED_LLM + global LOADED_STT + global LOADED_CTTS + print(f"[LLM] {CURRENT_MODEL} model loading...") + LOADED_LLM = LLM(model=CURRENT_MODEL) + print(f"[STT] {WHISPER_MODEL} model loading...") + LOADED_STT = STT(model=WHISPER_MODEL) + print(f"[CTTS] xttsv2_2.0.2 model loading...") + LOADED_CTTS = CTTS() -app = FastAPI(title="Local-LLM Server", docs_url="/") +app = FastAPI(title="Local-LLM Server", docs_url="/", lifespan=lifespan) app.add_middleware( CORSMiddleware, allow_origins=["*"], @@ -20,17 +44,6 @@ allow_methods=["*"], allow_headers=["*"], ) -DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "phi-2-dpo") -WHISPER_MODEL = os.getenv("WHISPER_MODEL", "base.en") - -CURRENT_MODEL = DEFAULT_MODEL if DEFAULT_MODEL else "phi-2-dpo" -CURRENT_STT_MODEL = WHISPER_MODEL if WHISPER_MODEL else "base.en" -print(f"[LLM] {CURRENT_MODEL} model loading...") -LOADED_LLM = LLM(model=CURRENT_MODEL) -print(f"[STT] {WHISPER_MODEL} model loading...") -LOADED_STT = STT(model=WHISPER_MODEL) -print(f"[CTTS] xttsv2_2.0.2 model loading...") -LOADED_CTTS = CTTS() def verify_api_key(authorization: str = Header(None)): diff --git a/local_llm/LLM.py b/local_llm/LLM.py index 9abf013..a5326d5 100644 --- a/local_llm/LLM.py +++ b/local_llm/LLM.py @@ -10,21 +10,7 @@ import torch -GPU_LAYERS = os.environ.get("GPU_LAYERS", 0) -if torch.cuda.is_available() and int(GPU_LAYERS) == 0: - VRAM = round(torch.cuda.get_device_properties(0).total_memory / 1024**3) - print(f"[LLM] {VRAM} GB of VRAM detected.") - GPU_LAYERS = min(2 * max(0, (VRAM - 1) // 2), 36) -RAM = round(psutil.virtual_memory().total / 1024**3) -MAIN_GPU = os.environ.get("MAIN_GPU", 0) -THREADS = os.environ.get("THREADS", psutil.cpu_count() - 2) -DOWNLOAD_MODELS = ( - True if os.environ.get("DOWNLOAD_MODELS", "true").lower() == "true" else False -) DEFAULT_MODEL = os.environ.get("DEFAULT_MODEL", "phi-2-dpo") -print( - f"[LLM] Running {DEFAULT_MODEL} with {GPU_LAYERS} GPU layers and {THREADS} CPU threads available for offloading." -) def get_models(): @@ -49,7 +35,10 @@ def get_models(): return model_names -def get_model_url(model_name=DEFAULT_MODEL): +def get_model_url(model_name=""): + if model_name == "": + global DEFAULT_MODEL + model_name = DEFAULT_MODEL model_url = "" try: models = get_models() @@ -78,7 +67,10 @@ def get_model_name(model_url="TheBloke/phi-2-dpo-GGUF"): return model_name -def get_readme(model_name=DEFAULT_MODEL, models_dir="models"): +def get_readme(model_name="", models_dir="models"): + if model_name == "": + global DEFAULT_MODEL + model_name = DEFAULT_MODEL model_url = get_model_url(model_name=model_name) model_name = model_name.lower() if not os.path.exists(f"{models_dir}/{model_name}/README.md"): @@ -92,7 +84,10 @@ def get_readme(model_name=DEFAULT_MODEL, models_dir="models"): return readme -def get_max_tokens(model_name=DEFAULT_MODEL, models_dir="models"): +def get_max_tokens(model_name="", models_dir="models"): + if model_name == "": + global DEFAULT_MODEL + model_name = DEFAULT_MODEL readme = get_readme(model_name=model_name, models_dir=models_dir) if "200k" in readme: return 200000 @@ -113,7 +108,10 @@ def get_max_tokens(model_name=DEFAULT_MODEL, models_dir="models"): return 8192 -def get_prompt(model_name=DEFAULT_MODEL, models_dir="models"): +def get_prompt(model_name="", models_dir="models"): + if model_name == "": + global DEFAULT_MODEL + model_name = DEFAULT_MODEL model_name = model_name.lower() if os.path.exists(f"{models_dir}/{model_name}/prompt.txt"): with open(f"{models_dir}/{model_name}/prompt.txt", "r") as f: @@ -129,10 +127,15 @@ def get_prompt(model_name=DEFAULT_MODEL, models_dir="models"): return prompt_template -def get_model(model_name=DEFAULT_MODEL, models_dir="models"): - global RAM - global DOWNLOAD_MODELS - if RAM > 16: +def get_model(model_name="", models_dir="models"): + if model_name == "": + global DEFAULT_MODEL + model_name = DEFAULT_MODEL + DOWNLOAD_MODELS = ( + True if os.environ.get("DOWNLOAD_MODELS", "true").lower() == "true" else False + ) + ram = round(psutil.virtual_memory().total / 1024**3) + if ram > 16: default_quantization_type = "Q5_K_M" else: default_quantization_type = "Q4_K_M" @@ -253,9 +256,17 @@ def __init__( system_message: str = "", **kwargs, ): - global THREADS - global GPU_LAYERS - global MAIN_GPU + global DEFAULT_MODEL + THREADS = os.environ.get("THREADS", psutil.cpu_count() - 2) + MAIN_GPU = os.environ.get("MAIN_GPU", 0) + GPU_LAYERS = os.environ.get("GPU_LAYERS", 0) + if torch.cuda.is_available() and int(GPU_LAYERS) == 0: + VRAM = round(torch.cuda.get_device_properties(0).total_memory / 1024**3) + print(f"[LLM] {VRAM} GB of VRAM detected.") + GPU_LAYERS = min(2 * max(0, (VRAM - 1) // 2), 36) + print( + f"[LLM] Running {DEFAULT_MODEL} with {GPU_LAYERS} GPU layers and {THREADS} CPU threads available for offloading." + ) self.params = {} self.model_name = model if model != "": diff --git a/tests/tests.ipynb b/tests/tests.ipynb index b430a8a..83ea91a 100644 --- a/tests/tests.ipynb +++ b/tests/tests.ipynb @@ -23,14 +23,14 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 33, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "['bakllava-1-7b', 'llava-v1.5-7b', 'llava-v1.5-13b', 'Etheria-55b-v0.1', 'EstopianMaid-13B', 'Goliath-longLORA-120b-rope8-32k-fp16', 'Etheria-55b-v0.1', 'EstopianMaid-13B', 'Everyone-Coder-33B-Base', 'FusionNet_34Bx2_MoE', 'WestLake-7B-v2', 'WestSeverus-7B-DPO', 'DiscoLM_German_7b_v1', 'Garrulus', 'DareVox-7B', 'NexoNimbus-7B', 'Lelantos-Maid-DPO-7B', 'stable-code-3b', 'Dr_Samantha-7B', 'NeuralBeagle14-7B', 'tigerbot-13B-chat-v5', 'Nous-Hermes-2-Mixtral-8x7B-SFT', 'Thespis-13B-DPO-v0.7', 'Code-290k-13B', 'Nous-Hermes-2-Mixtral-8x7B-DPO', 'Venus-120b-v1.2', 'LLaMA2-13B-Estopia', 'medicine-LLM', 'finance-LLM-13B', 'Yi-34B-200K-DARE-megamerge-v8', 'phi-2-orange', 'laser-dolphin-mixtral-2x7b-dpo', 'bagel-dpo-8x7b-v0.2', 'Everyone-Coder-4x7b-Base', 'phi-2-electrical-engineering', 'Cosmosis-3x34B', 'HamSter-0.1', 'Helion-4x34B', 'Bagel-Hermes-2x34b', 'deepmoney-34b-200k-chat-evaluator', 'deepmoney-34b-200k-base', 'TowerInstruct-7B-v0.1', 'PiVoT-SUS-RP', 'Noromaid-v0.4-Mixtral-Instruct-8x7b-Zloss', 'TenyxChat-7B-v1', 'UNA-TheBeagle-7B-v1', 'WhiteRabbitNeo-33B-v1', 'WinterGoliath-123b', 'Open_Gpt4_8x7B_v0.2', 'neuronovo-7B-v0.3', 'Nous-Capybara-limarpv3-34B', 'MegaDolphin-120b', 'Sensualize-Mixtral', 'openchat-3.5-0106', 'Lumosia-MoE-4x10.7', 'OrcaMaid-v3-13B-32k', 'Mixtral-8x7B-Instruct-v0.1-LimaRP-ZLoss-DARE-TIES', 'Velara-11B-V2', 'medicine-chat', 'law-chat', 'finance-chat', 'dolphin-2.6-mistral-7B-dpo-laser', 'speechless-mistral-moloras-7B', 'Mixtral_34Bx2_MoE_60B', 'Noromaid-13B-v0.3', 'LLaMA-Pro-8B-Instruct', 'sonya-medium-x8-MoE', 'Sensualize-Solar-10.7B', 'zephyr-quiklang-3b-4K', 'bagel-8x7b-v0.2', 'Pallas-0.5-frankenmerge', 'LLaMA-Pro-8B', 'zephyr-quiklang-3b', 'Open_Gpt4_8x7B', 'Rosa_v2_7B', 'Mistral-7B-Instruct-v0.2-code-ft', 'phi-2-dpo', 'Mixtral-8x7B-Instruct-v0.1-LimaRP-ZLoss', 'Beyonder-4x7B-v2', 'Mixtral_11Bx2_MoE_19B', 'Kunoichi-7B', 'Norobara-ZLoss-8x7B', 'Iambe-RP-DARE-20B-DENSE', 'WizardCoder-33B-V1.1', 'Mistral-7B-OpenOrca-oasst_top1_2023-08-25-v2', 'Nous-Hermes-2-SUS-Chat-34B-Slerp', 'Iambe-RP-v3-20B', 'WordWoven-13B', 'FlatDolphinMaid-8x7B', 'Panda-7B-v0.1', 'Unholy-v2-13B', 'OpenCAI-13B', 'OpenCAI-7B', 'openbuddy-mixtral-7bx8-v16.3-32k', 'Thespis-Mistral-7B-Alpha-v0.7', 'Pallas-0.5-LASER-0.6', 'nontoxic-bagel-34b-v0.2', 'bagel-dpo-34b-v0.2', 'bagel-34b-v0.2', 'Nous-Hermes-2-SOLAR-10.7B', 'medicine-LLM-13B', 'sonya-7B-x8-MoE', 'dolphin-2.7-mixtral-8x7b', 'yayi2-30B-llama', 'toxicqa-Llama2-7B', 'toxicqa-Llama2-13B', 'Sonya-7B', 'TinyLlama-1.1B-intermediate-step-1431k-3T', 'TinyLlama-1.1B-Chat-v1.0', 'dolphin-2.6-mistral-7B-dpo', 'CarbonVillain-en-10.7B-v4', 'trinity-v1.2-x8-MoE', 'deita-7B-v1.0-sft', 'law-LLM-13B', 'Lelantos-7B', 'UNAversal-8x7B-v1beta', 'mistral-ft-optimized-1227', 'Seraph-openchat-3.5-1210-Slerp', 'openchat-3.5-1210-Seraph-Slerp', 'MetaMath-NeuralHermes-2.5-Mistral-7B-Linear', 'NeuralQuant-9B', 'NeuralPipe-9B-merged', 'NeuralPipe-7B-ties', 'NeuralPipe-7B-slerp', 'You_can_cry_Snowman-13B', 'MLewd-v2.4-13B', 'Pallas-0.5', 'Synthia-v3.0-11B', 'Sarah_StoryTeller_13b', 'notux-8x7b-v1', 'docsgpt-7B-mistral', 'Metis-0.5', 'Aurora-Nights-103B-v1.0', 'Sakura-SOLAR-Instruct', 'SOLARC-MOE-10.7Bx4', 'Aurora-Nights-70B-v1.0', 'dolphin-2.6-mistral-7B', 'Silicon-Maid-7B', 'MixtralOrochi8x7B', 'openchat-3.5-1210-starling-slerp', 'neural-chat-7B-v3-3-wizardmath-dare-me', 'openthaigpt-1.0.0-beta-13B-chat', 'Yi-34B-200K-AEZAKMI-v2', 'xDAN-L1-Chat-RL-v1', 'Nous-Hermes-2-Yi-34B', 'SauerkrautLM-Mixtral-8x7B', 'SauerkrautLM-Mixtral-8x7B-Instruct', 'finance-LLM', 'openbuddy-mixtral-8x7b-v15.4', 'Loyal-Macaroni-Maid-7B', 'DaringMaid-13B', 'apricot-wildflower-20', 'LMCocktail-phi-2-v1', 'Instruct_Mixtral-8x7B-v0.1_Dolly15K', 'LMCocktail-10.7B-v1', 'typhoon-7B', 'SauerkrautLM-UNA-SOLAR-Instruct', 'dolphin-2_6-phi-2', 'Orca2myth7.2', 'Sensei-7B-V1', 'LUNA-SOLARkrautLM-Instruct', 'Valkyrie-V1', 'Mixtral_7Bx2_MoE', 'bun_mistral_7b_v2', 'SAM', 'firefly-mixtral-8x7b', 'SauerkrautLM-SOLAR-Instruct', 'CodeNinja-1.0-OpenChat-7B', 'MixtralRPChat-ZLoss', 'WizardMath-7B-V1.1', 'dolphin-2.6-mixtral-8x7b', 'orangetin-OpenHermes-Mixtral-8x7B', 'mistral-ft-optimized-1218', 'mixtral_spanish_ft', 'OrcaMaid-v2-FIX-13B-32k', 'Llama-2-7B-ft-instruct-es', 'FlatOrcamaid-13B-v0.2', 'ShiningValiantXS-1.1', 'DaringFortitude', 'OrcaMaidXL-17B-32k', 'WhiteRabbitNeo-13B', 'dragon-mistral-7B-v0', 'DaringMaid-20B', 'openbuddy-mixtral-8x7b-v15.2', 'Frostwind-10.7B-v1', 'Noromaid-13B-v0.2', 'Swallow-7B-Instruct', 'Swallow-7B', 'Swallow-13B-Instruct', 'Swallow-70B', 'Swallow-13B', 'Swallow-70B-instruct', 'Yi-34B-200K-DARE-merge-v5', 'GEITje-7B-chat', 'Metis-0.4', 'SOLAR-10.7B-Instruct-v1.0-uncensored', 'Fennec-Mixtral-8x7B', 'OpenZephyrChat-v0.2', 'Metis-0.3', 'phi-2', 'quantum-v0.01', 'quantum-dpo-v0.1', 'Mistral-7B-Merge-14-v0.1', 'Norocetacean-20B-10k', 'Pirouette-7B', 'Saily_220B', 'GreenNodeLM-7B-v4leo', 'BigPlap-8x20B', 'PlatYi-34B-Llama-Q-v3', 'PiVoT-MoE', 'PiVoT-10.7B-Mistral-v0.2', 'PiVoT-10.7B-Mistral-v0.2-RP', 'Mixtral-Fusion-4x7B-Instruct-v0.1', 'go-bruins-v2.1.1', 'Falkor-8x7B-MoE', 'Chupacabra-8x7B-MoE', '8x7B-MoE-test-NOT-MIXTRAL', 'Starling-LM-alpha-8x7B-MoE', 'MelloGPT', 'mindy-7B', 'SOLAR-Platypus-10.7B-v2', 'LeoScorpius-GreenNode-Platypus-7B-v1', 'LeoScorpius-GreenNode-Alpaca-7B-v1', 'Mistral-7B-AEZAKMI-v1', 'agiin-13.6B-v0.1', 'Rogue-Rose-103b-v0.2', 'Venus-120b-v1.1', 'Venus-103b-v1.2', 'Metis-0.1', 'yi-34B-v3', 'Mixtral-8x7B-MoE-RP-Story', 'openchat-3.5-1210', 'mistral-7B-dpo-v5', 'Tiamat-7B-1.1-DPO', 'Llamix2-MLewd-4x13B', 'LeoScorpius-GreenNode-7B-v1', 'WinterGoddess-1.4x-70B-L2', 'cutie', 'Orca-2-13B-SFT_v5', 'Pandora-v1-13B', 'una-cybertron-7B-v3-OMA', 'BruinsV2-OpHermesNeu-11B', 'DynamicFactor', 'ShiningValiant-1.3', 'Mistral-7B-Instruct-v0.2-DARE', 'Solar-10.7B-SLERP', 'Pandora-v1-10.7B', 'Mythalion-Kimiko-v2', 'openbuddy-mixtral-8x7b-v15.1', 'Rose-Kimiko-20B', 'Mixtral-SlimOrca-8x7B', 'Mixtral-8x7B-v0.1', 'Mixtral-8x7B-Instruct-v0.1', 'dolphin-2.5-mixtral-8x7b', 'LeoScorpius-7B', 'v1olet_merged_dpo_7B_v4', 'v1olet_merged_dpo_7B_v3', 'bagel-dpo-7B-v0.1', 'bagel-7B-v0.1', 'Ana-v1-m7', 'meditron-7B-chat', 'Nyxene-v3-11B', 'blossom-v3_1-yi-34b', 'SOLAR-10.7B-v1.0', 'SOLAR-10.7B-Instruct-v1.0', 'MadMix-v0.2', 'openbuddy-llama2-13b64k-v15', 'mixtralnt-4x7b-test', 'OpenZephyrChat', 'Code-33B', 'Marcoroni-neural-chat-7B-v1', 'SeraphMarcoroni-7B', 'pee', 'Terminis-7B', 'Pallas-0.4', 'Synthia-MoE-v3-Mixtral-8x7B', 'Pallas-0.3', 'supermario-v2', 'Merged-DPO-7B', 'v1olet_marcoroni-go-bruins-merge-7B', 'Seraph-7B', 'Amber', 'AmberChat', 'neural-chat-7B-v3-3', 'dopeystableplats-3b-v1', 'Clover3-17B', 'Mistral-7B-Instruct-v0.2', 'LlamaGuard-7B', 'leo-hessianai-70B', 'OpenHermes-2.5-neural-chat-v3-3-Slerp', 'leo-hessianai-70B-chat', 'Velara', 'Nethena-20B-Glued', 'open-instruct-human-mix-65B', 'Marcoroni-7B-v3', 'Merged-AGI-7B', 'NexusRaven-V2-13B', 'CaPlatTessDolXaBoros-Yi-34B-200K-DARE-Ties-HighDensity', 'Marcoroni-7B-v2', 'go-bruins', 'go-bruins-v2', 'openbuddy-deepseek-67b-v15-base', 'smol-7B', 'Venus-103b-v1.1', 'Mistral-7B-Instruct-v0.1', 'OrcaMaid-13B', 'Synthia-7B-v3.0', 'bling-stable-lm-3b-4e1t-v0', 'Thespis-13B-Alpha-v0.7', 'Iambe-Storyteller-20B', 'Code-13B', 'SG-Raccoon-Yi-200k-2.0', 'Sydney_Overthinker_13B', 'dolphin-2.2-yi-34b-200k', 'stablelm-zephyr-3b', 'Magicoder-S-DS-6.7B', 'Iambe-RP-cDPO-20B', 'Tiamat-7B', 'deepsex-34b', 'Optimus-7B', 'MetaMath-Cybertron-Starling', 'DPOpenHermes-7B-v2', 'una-xaberius-34b-v1beta', 'Nyxene-v2-11B', 'PiVoT-Merge-A-7B', 'DiscoLM-70B', 'Tess-34B-v1.4', 'Tess-7B-v1.4', 'una-cybertron-7B-v2', 'NeuralOrca-7B-v1', 'SUS-Chat-34B', 'sabia-7B', 'OpenOrca-Zephyr-7B', 'Poro-34B', 'RpBird-Yi-34B-200k', 'notus-7B-v1', 'Chronomaid-Storytelling-13B', 'OpenHermes-2.5-neural-chat-7B-v3-2-7B', 'DPOpenHermes-7B', 'loyal-piano-m7', 'Inairtra-7B', 'DiscoLM-120b', 'neural-chat-7B-v3-2', 'SG-Raccoon-Yi-55B-200k', 'juanako-7B-UNA', 'cinematika-7B-v0.1', 'meditron-7B', 'SlimOrca-13B', 'meditron-70B', 'tigerbot-70B-chat-v2', 'Open-Hermes-2.5-neural-chat-3.1-frankenmerge-11b', 'SG-Raccoon-Yi-55B', 'NeuralHermes-2.5-Mistral-7B', 'LLaMA2-13B-Psyfighter2', 'open-llama-3b-v2-wizard-evol-instuct-v2-196k', 'deepseek-llm-7B-chat', 'deepseek-llm-7B-base', 'Iambe-20B-DARE', 'deepseek-llm-67b-base', 'deepseek-llm-67b-chat', 'psyonic-cetacean-20B', 'SauerkrautLM-7B-HerO', 'Venus-120b-v1.0', 'Aetheria-L2-70B', 'Astrid-Mistral-7B', 'saiga_mistral_7b', 'evolvedSeeker_1_3', 'PiVoT-0.1-Starling-LM-RP', 'CapyTessBorosYi-34B-200K-DARE-Ties', 'Starling-LM-7B-alpha', 'Stheno-v2-Delta', 'Solus-70B-L2', 'Solus-103B-L2', 'Lila-103B-L2', 'Lila-70B-L2', 'OpenHermes-2.5-neural-chat-7B-v3-1-7B', 'SunsetBoulevard', 'PiVoT-0.1-Evil-a', 'MergeMonster-13B-20231124', 'openinstruct-mistral-7B', 'Capybara-Tess-Yi-34B-200K-DARE-Ties', 'MysticFusion-13B', 'Tess-M-v1.3', 'PiVoT-0.1-early', 'Karen_TheEditor_V2_CREATIVE_Mistral_7B', 'juanako-7B-v1', 'smartyplats-7B-v2', 'X-MythoChronos-13B', 'Rose-20B', 'Chupacabra-7B-v3', 'Mini_Synatra_7B_02', 'tulu-2-7B', 'tulu-2-dpo-7B', 'tulu-2-dpo-70B', 'tulu-2-dpo-13B', 'tulu-2-70B', 'tulu-2-13B', 'Tess-M-v1.2', 'cockatrice-7B-v0.1', 'Synatra-7B-v0.3-base', 'Synatra-V0.1-7B-Instruct', 'Yi-34B-Chat', 'Synatra-RP-Orca-2-7B-v0.1', 'Synatra-7B-v0.3-RP', 'Synatra-7B-v0.3-dpo', 'koOpenChat-sft', 'llama-polyglot-13B', 'digital-socrates-7B', 'digital-socrates-13B', 'rocket-3B', 'Noromaid-20B-v0.1.1', 'SynthIA-7B-v1.3-dare-0.85', 'CollectiveCognition-v1.1-Mistral-7B-dare-0.85', 'airoboros-m-7B-3.1.2-dare-0.85', 'speechless-mistral-7B-dare-0.85', 'Ferret_7B', 'Chupacabra-7B-v2', 'Tess-XS-v1.1', 'Tess-XS-Creative-v1.0', 'OrionStar-Yi-34B-Chat-Llama', 'Tess-M-v1.1', 'MythoMist-7B', 'Llama-2-70B-Chat', 'Nous-Capybara-7B-v1.9', 'gorilla-openfunctions-v1', 'Orca-2-7B', 'Orca-2-13B', 'Akins-3B', 'Marx-3B-v3', 'Yarn-Llama-2-70B-32k', 'zephyr_7b_norobots', 'mistral_7b_norobots', 'llama2_7b_merge_orcafamily', 'Writing_Partner_Mistral_7B', 'Generate_Question_Mistral_7B', 'Free_Sydney_V2_Mistral_7b', 'Autolycus-Mistral_7B', 'GOAT-70B-Storytelling', 'Karen_TheEditor_V2_STRICT_Mistral_7B', 'blossom-v3-mistral-7B', 'mistral-7B-finetuned-orca-dpo-v2', 'llama2-7B-layla', 'LLaMA_2_13B_SFT_v1', 'blossom-v3-baichuan2-7B', 'XwinCoder-34B', 'XwinCoder-13B', 'X-NoroChronos-13B', 'Capybara-Tess-Yi-34B-200K', 'Tess-M-Creative-v1.0', 'Kaori-70B-v1', 'Nanbeige-16B-Chat', 'Nanbeige-16B-Base', 'Nanbeige-16B-Base-32K', 'Nanbeige-16B-Chat-32K', 'tigerbot-70B-chat-v4', 'nucleus-22B-token-500B', 'nsql-llama-2-7B', 'sqlcoder-34b-alpha', 'opus-v0.5-70B', 'Nous-Capybara-34B', 'dolphin-2_2-yi-34b', 'Euryale-1.4-L2-70B', 'alfred-40B-1023', 'firefly-llama2-7B-chat', 'neural-chat-7B-v3-1', 'firefly-llama2-13B-chat', 'dragon-yi-6B-v0', 'platypus-yi-34b', 'OpenHermes-2.5-Mistral-7B-16k', 'TimeCrystal-L2-13B', 'merlyn-education-safety', 'merlyn-education-corpus-qa-v2', 'Deacon-34B', 'MoMo-70B-V1.1', 'Tai-70B', 'Thespis-Mistral-7B-v0.6', 'sqlcoder-7B', 'Tess-XL-v1.0', 'zephyr-7B-beta-pl', 'SynthIA-7B-v2.0-16k', 'ShiningValiantXS', 'Python-Code-33B', 'Python-Code-13B', 'opus-v0-70B', 'GodziLLa2-70B', 'Claire-7B-0.1', 'speechless-mistral-dolphin-orca-platypus-samantha-7B', 'Noromaid-13B-v0.1.1', 'Noromaid-13B-v0.1', 'Thespis-13B-v0.6', 'Augmental-ReMM-13B', 'Mistral-7B-OpenOrca-oasst_top1_2023-08-25-v1', 'Yi-34B-200K-Llamafied', 'Yi-34B-200K', 'Yi-6B-200K', 'cat-v1.0-13B', 'Augmental-Unholy-13B', 'openchat_3.5-16k', 'claude2-alpaca-13B', 'prometheus-7B-v1.0', 'prometheus-13B-v1.0', 'Yi-6B', 'claude2-alpaca-7B', 'dolphin-2.2.1-AshhLimaRP-Mistral-7B', 'Trion-M-7B', 'Dolphin2.1-OpenOrca-7B', 'MonadGPT', 'goliath-120b', 'Yi-34B-GiftedConvo-merged', 'KAI-7B-Instruct', 'KAI-7B-beta', 'opus-v0-7B', 'dolphin-2.2-70B', 'LLaMA2-13B-TiefighterLR', 'Psyfighter-13B', 'Dawn-v2-70B', 'vigogne-2-70B-chat', 'Toppy-M-7B', 'TinyLlama-1.1B-intermediate-step-715k-1.5T', 'Yi-34B', 'Barcenas-Mistral-7B', 'openbuddy-zephyr-7B-v14.1', 'japanese-stablelm-base-beta-70B', 'Hexoteric-7B', 'echidna-tiefigther-25', 'UtopiaXL-13B', 'Hermes-Trismegistus-Mistral-7B', 'stockmark-13B', 'calm2-7B-chat', 'deepseek-coder-33B-instruct', 'deepseek-coder-6.7B-instruct', 'deepseek-coder-1.3b-instruct', 'deepseek-coder-33B-base', 'deepseek-coder-6.7B-base', 'deepseek-coder-5.7bmqa-base', 'deepseek-coder-1.3b-base', 'Naberius-7B', 'Utopia-13B', 'japanese-stablelm-instruct-beta-7B', 'Thespis-Mistral-7B-v0.5', 'Yarn-Mistral-7B-64k', 'Yarn-Mistral-7B-128k', 'OpenHermes-2.5-Mistral-7B', 'openchat_3.5', 'japanese-stablelm-instruct-beta-70B', 'basilisk-7B-v0.2', 'openbuddy-llama2-70B-v13.2', 'SciPhi-Self-RAG-Mistral-7B-32k', 'vietnamese-llama2-7B-40GB', 'Amethyst-13B', 'Skywork-13B-base', 'Xwin-MLewd-7B-V0.2', 'MetaMath-Mistral-7B', 'Nethena-MLewd-Xwin-23B', 'dolphin-2.2.1-mistral-7B', 'Uncensored-Jordan-7B', 'Uncensored-Jordan-33B', 'Uncensored-Jordan-13B', 'Nethena-20B', 'Nethena-13B', 'Thespis-13B-v0.5', 'SciPhi-Mistral-7B-32k', 'Free_Sydney_V2_13B', 'Scarlett-7B', 'AquilaChat2-34B', 'AquilaChat2-34B-16K', 'Athnete-13B', 'Augmental-13B-v1.50_B', 'Augmental-13B-v1.50_A', 'Mistral-ClaudeLimaRP-v3-7B', 'Mistral-7B-Claude-Chat', 'japanese-stablelm-instruct-gamma-7B', 'SauerkrautLM-70B-v1', 'Echidna-13B-v0.3', 'Mistral-7B-codealpaca-lora', 'Mistral_7B_Dolphin2.1_LIMA0.5', 'openbuddy-mistral-7B-v13.1', 'Echidna-13B-v0.2', 'Gale-medium-init-3B', 'Lewd-Sydney-20B', 'med42-70B', 'MistRP-Airoboros-7B', 'AshhLimaRP-Mistral-7B', 'Nete-13B', 'zephyr-7B-beta', 'SynthIA-70B-v1.5', 'lzlv_70B', 'Cat-13B-0.5', 'Dolphin-2.1-70B', 'Augmental-13B', 'Vigostral-7B-Chat', 'HornyEchidna-13B-v0.1', 'Llama-2-7B-32K-Instruct', 'Llama-2-7B', 'CausalLM-7B', 'CausalLM-14B', 'SynthIA-7B-v2.0', 'Mistral-7B-SciPhi-32k', 'Thespis-13B-v0.4', 'LLaMA2-13B-Tiefighter', 'dolphin-2.1-mistral-7B', 'Airoboros-L2-70B-3.1.2', 'Airoboros-c34B-3.1.2', 'MLewdBoros-LRPSGPT-2Char-13B', 'vicuna-33B-coder', 'MistralMakise-Merged-13B', 'agentlm-7B', 'CodeBooga-34B-v0.1', 'agentlm-70B', 'agentlm-13B', 'Arithmo-Mistral-7B', 'Airoboros-M-7B-3.1.2', 'Falcon-180B', 'Falcon-180B-Chat', 'Mistral-Pygmalion-7B', 'MistralLite-7B', 'SlimOpenOrca-Mistral-7B', 'PsyMedRP-v1-13B', 'Xwin-LM-7B-V0.2', 'Airoboros-L2-13B-3.1.1', 'rpguild-chatml-13B', 'Euryale-1.3-L2-70B', 'Dans-AdventurousWinds-Mk2-7B', 'llemma_34b', 'Mistral-7B-Phibrarian-32K', 'llemma_7b', 'Thespis-13B-v0.3', 'Pandalyst-7B-v1.2', 'Airoboros-M-7B-3.1.1', 'Mistral-7B-Code-16K-qlora', 'OpenHermes-2-Mistral-7B', 'Leo-Mistral-Hessianai-7B-Chat', 'Airoboros-L2-70B-3.1', 'openbuddy-mistral-7B-v13-base', 'Xwin-MLewd-13B-v0.2', 'openbuddy-llama2-70B-v13-base', 'openbuddy-mistral-7B-v13', 'ShiningValiant-1.2', 'StellarBright', 'Mistral-11B-OmniMix', 'Airoboros-M-7B-3.1', 'SynthIA-7B-v1.5', 'LongAlpaca-70B', 'airoboros-l2-13B-3.1', 'tora-code-7B-v1.0', 'Xwin-LM-13B-v0.2', 'tora-code-34b-v1.0', 'tora-code-13B-v1.0', 'Mistral-11B-CC-Air-RP', 'tora-7B-v1.0', 'genz-13B-v2', 'SauerkrautLM-7B-v1-mistral', 'SauerkrautLM-3B-v1', 'Llama-2-7B-Chat', 'tora-70B-v1.0', 'SauerkrautLM-7B-v1', 'tora-13B-v1.0', 'ALMA-7B', 'ALMA-13B', 'SauerkrautLM-13B-v1', 'Tinyllama-2-1b-miniguanaco', 'zephyr-7B-alpha', 'speechless-codellama-34b-v2.0', 'speechless-tora-code-7B-v1.0', 'CollectiveCognition-v1-Mistral-7B', 'speechless-code-mistral-7B-v1.0', 'FashionGPT-70B-v1.2', 'chronos007-70B', 'samantha-1.2-mistral-7B', 'sqlcoder2', 'ANIMA-Phi-Neptune-Mistral-7B', 'ZephRP-m7b', 'jackalope-7B', 'UndiMix-v3-13B', 'tinyllama-1.1b-chat-v0.3_platypus', 'TinyLlama-1.1B-1T-OpenOrca', 'em_german_leo_mistral', 'em_german_mistral_v01', 'Ziya-Coding-34B-v1.0', 'PsyMedRP-v1-20B', 'Athena-v4', 'Llama2-chat-AYB-13B', 'sheep-duck-llama-2-13B', 'Mistral-Trismegistus-7B', 'UndiMix-v4-13B', 'llama-2-7B-Arguments', 'airoboros-m-7B-3.0', 'airoboros-l2-7B-3.0', 'airoboros-l2-13B-3.0', 'CollectiveCognition-v1.1-Mistral-7B', 'Amethyst-13B-Mistral', 'Mistralic-7B-1', 'Llama-2-7B-vietnamese-20k', 'Dans-TotSirocco-7B', 'Dans-AdventurousWinds-7B', 'airoboros-mistral2.2-7B', 'TinyLlama-1.1B-intermediate-step-480k-1T', 'TinyLlama-1.1B-python-v0.1', 'TinyLlama-1.1B-Chat-v0.3', 'Nous-Hermes-13B', 'Mistral-7B-OpenOrca', 'dolphin-2.0-mistral-7B', 'Nous-Capybara-7B', 'Inkbot-13B-8k-0.2', 'em_german_7b_v01', 'em_german_70b_v01', 'em_german_13b_v01', 'UltraLM-13B-v2.0', 'MythoMakiseMerged-13B', 'lince-zero', 'sheep-duck-llama-2-70B-v1.1', 'MegaMix-T1-13B', 'MegaMix-S1-13B', 'Megamix-A1-13B', 'Kimiko-Mistral-7B', 'Pandalyst_13B_V1.0', 'Pandalyst-7B-V1.1', 'samantha-mistral-instruct-7B', 'samantha-mistral-7B', 'Synthia-7B-v1.3', 'NexusRaven-13B', 'Mistral-7B-v0.1', 'leo-hessianai-7B', 'leo-hessianai-7B-chat', 'leo-hessianai-7B-chat-bilingual', 'leo-hessianai-13B', 'leo-hessianai-13B-chat', 'leo-hessianai-13B-chat-bilingual', 'Emerhyst-20B', 'Emerhyst-13B', 'openbuddy-openllama-7B-v12-bf16', 'Marcoroni-70B-v1', 'Athena-v3', 'Llama-2-13B-Ensemble-v6', 'Llama-2-13B-Ensemble-v5', 'COTHuginn-4.5-19B', 'Chronos-Hermes-13b-v2', 'LlongOrca-13B-16K', 'Llama2-22B-GPLATTY', 'L2-MythoMax22b-Instruct-Falseblock', 'chronos-13b-v2', 'Asclepius-13B', 'Airoboros-33B-2.1', 'Huginn-22B-Prototype', 'U-Amethyst-20B', 'sqlcoder', 'vicuna-33B', 'law-LLM', 'openbuddy-llama2-34b-v11.1-bf16', 'openbuddy-coder-34b-v11-bf16', 'Synthia-7B-v1.2', 'Synthia-13B-v1.2', 'MetaMath-7B-V1.0', 'MetaMath-70B-V1.0', 'MAmmoTH-Coder-13B', 'MetaMath-13B-V1.0', 'MXLewdMini-L2-13B', 'storytime-13B', 'MXLewd-L2-20B', 'PuddleJumper-13B-V2', 'Athena-v2', 'MAmmoTH-7B', 'MAmmoTH-13B', 'airoboros-l2-7B-2.2.1', 'airoboros-l2-13B-2.2.1', 'EverythingLM-13B-V3-16K', 'airoboros-c34b-2.2.1', 'ALMA-13B-Pretrain', 'ALMA-7B-Pretrain', 'MLewd-ReMM-L2-Chat-20B-Inverted', 'MLewd-ReMM-L2-Chat-20B', 'Airoboros-L2-70b-2.2.1', 'Xwin-LM-70B-V0.1', 'Inkbot-13B-4k', 'Xwin-LM-7B-V0.1', 'MAmmoTH-Coder-34B', 'MAmmoTH-70B', 'Xwin-LM-13B-V0.1', 'tulu-30B', 'tulu-7B', 'manticore-13b-chat-pyg', 'hippogriff-30b-chat', 'VicUnlocked-30B-LoRA', 'ARIA-70B-V2', 'WizardLM-Uncensored-SuperCOT-StoryTelling-30B', 'Platypus-30B', 'GPlatty-30B', 'medalpaca-13B', 'Vicuna-7B-CoT', 'airochronos-33B', 'Vicuna-13B-CoT', 'wizard-vicuna-13B', 'Chronoboros-33B', 'llama-30b-supercot', 'gorilla-7B', 'Karen_theEditor_13B', 'CAMEL-33B-Combined-Data', 'fin-llama-33B', 'CAMEL-13B-Role-Playing-Data', 'stable-vicuna-13B', 'CAMEL-13B-Combined-Data', 'chronos-wizardlm-uc-scot-st-13B', 'chronos-hermes-13B', 'llama-13b-supercot', 'wizard-mega-13B', 'Manticore-13B', 'minotaur-13B-fixed', 'wizardLM-7B', 'FashionGPT-70B-V1.1', 'WizardLM-30B', 'WizardLM-13B-V1.1', 'WizardLM-13B-1.0', 'Upstage-Llama1-65B-Instruct', 'upstage-llama-30b-instruct-2048', 'Uncensored-Frank-33b', 'guanaco-7B', 'guanaco-65B', 'guanaco-33B', 'SuperPlatty-30B', 'guanaco-13B', 'WizardLM-7B-V1.0-Uncensored', 'Uncensored-Frank-13b', 'WizardLM-33B-V1.0-Uncensored', 'WizardLM-7B-uncensored', 'WizardLM-30B-Uncensored', 'Uncensored-Frank-7B', 'Wizard-Vicuna-7B-Uncensored', 'WizardLM-13B-V1.0-Uncensored', 'WizardLM-13B-Uncensored', 'Wizard-Vicuna-30B-Uncensored', 'Wizard-Vicuna-13B-Uncensored', 'Dolphin-Llama-13B', 'based-30B', 'based-7B', 'based-13b', 'MythoLogic-13B', 'MythoBoros-13B', 'chronos-33b', '30B-Lazarus', '30B-Epsilon', 'chronos-13B', '13B-HyperMantis', '13B-Chimera', '13B-Ouroboros', 'tulu-13B', '13B-BlueMethod', 'AlpacaCielo-13B', 'llama2_7b_chat_uncensored', 'Llama-2-70B-LoRA-Assemble-v2', 'ReMM-v2.1-L2-13B', 'MLewd-L2-Chat-13B', 'WizardCoder-Python-7B-V1.0', 'TigerBot-70B-Chat', 'Pygmalion-2-13B-SuperCOT-weighed', 'Airoboros-c34B-2.2', 'Synthia-34B-v1.2', 'Kuchiki-1.1-L2-7B', 'CalliopeDS-L2-13B', 'OpenOrca_Stx', 'ChatAYT-Lora-Assamble-Marcoroni', 'Synthia-70B-v1.2b', 'CodeFuse-CodeLlama-34B', 'Chinese-Llama-2-13B', 'Chinese-Alpaca-2-13B', 'Chinese-Llama-2-7B', 'Chinese-Alpaca-2-7B', 'Marcoroni-70B', 'Pygmalion-2-13B-SuperCOT2', 'Llama-2-13B-LoRA-Assemble', 'Llama-2-7B-LoRA-Assemble', 'BerrySauce-L2-13B', 'AppleSauce-L2-13B', 'Llama-2-Coder-7B', 'Llama2-Chat-AYT-13B', 'Kuchiki-L2-7B', 'Euryale-Inverted-L2-70B', 'Euryale-L2-70B', 'Spicyboros-c34b-2.2', 'Airoboros-L2-70b-2.2', 'Airoboros-L2-13B-2.2', 'Airoboros-L2-7B-2.2', 'Sheep-Duck-Llama-2-70B', 'JanniesBasedLigma-L2-13B', 'Llama-2-13B-Chat-Dutch', 'Marcoroni-13B', 'Marcoroni-7b', 'Spicyboros-70B-2.2', 'Llama-2-70B-Ensemble-v5', 'Unholy-v1-12L-13B', 'Unholy-v1-10l-13B', 'Nous-Hermes-13B-Code', 'ReMM-v2-L2-13B', 'MLewdBoros-L2-13B', 'Tulpar-7B-v0', 'Spicyboros-13B-2.2', 'Pygmalion-2-13B-SuperCOT', 'ORCA_LLaMA_70B_QLoRA', 'Uni-TianYan-70B', 'airoboros-l2-7B-gpt4-m2.0', 'Spicyboros-7B-2.2', 'airoboros-l2-7B-gpt4-2.0', 'airoboros-l2-13b-gpt4-2.0', 'airoboros-l2-13b-gpt4-m2.0', 'Airoboros-L2-13B-2_1-YaRN-64K', 'Guanaco-13B-Uncensored', 'Guanaco-7B-Uncensored', 'Llama-2-PeanutButter_v19_R8-7B', '13B-Thorns-L2', 'YuLan-Chat-2-13B', 'Kimiko-7B', 'Kimiko-13B', 'llama-2-70b-Guanaco-QLoRA', 'llama2_70b_chat_uncensored', 'Llama-2-70B-OASST-1-200', 'WizardLM-70B-V1.0', 'airoboros-l2-70B-gpt4-1.4.1', 'Airoboros-L2-70B-GPT4-m2.0', 'Synthia-70B-v1.2', 'Camel-Platypus2-70B', 'WizardMath-70B-V1.0', 'Trurl-2-7B', 'airoboros-l2-70B-GPT4-2.0', 'Platypus2-70B', 'Trurl-2-13B', 'orca_mini_v3_70B', 'llama2-7b-chat-codeCherryPop-qLoRA', 'Luna-AI-Llama2-Uncensored', 'Upstage-Llama-2-70B-instruct-v2', 'StableBeluga-13B', 'StableBeluga2-70B', 'LosslessMegaCoder-Llama2-7B-Mini', 'Chronos-70B-v2', 'Platypus2-70B-Instruct', 'StableBeluga-7B', 'Llama2-13B-MegaCode2-OASST', 'LosslessMegaCoder-Llama2-13B-Mini', 'Mythalion-13B', 'LlongOrca-7B-16K', 'Pygmalion-2-7B', 'Pygmalion-2-13B', 'llama-2-7B-Guanaco-QLoRA', 'Llama2-22B-Daydreamer-v3', 'OpenAssistant-Llama2-13B-Orca-8K-3319', 'Samantha-1.1-70B', 'Synthia-7B', 'OpenOrca-Platypus2-13B', 'Vigogne-2-7B-Instruct', 'WizardLM-1.0-Uncensored-CodeLlama-34B', 'Vigogne-2-13B-Instruct', 'Chronoboros-Grad-L2-13B', 'Airochronos-L2-13B', 'llama2-22B-daydreamer-v2', 'llama-2-13B-German-Assistant-v2', 'llama-2-13B-Guanaco-QLoRA', 'CodeUp-Alpha-13B-HF', 'Synthia-13B', 'Hermes-LLongMA-2-7B-8K', 'Chronorctypus-Limarobormes-13b', 'Vigogne-2-7B-Chat', 'Chronolima-Airo-Grad-L2-13B', 'Airolima-Chronos-Grad-L2-13B', 'qCammel-70-x', 'Spring-Dragon', 'CodeUp-Llama-2-13B-Chat-HF', 'Llama-2-13B-German-Assistant-v4', 'Carl-Llama-2-13B', 'WizardLM-1.0-Uncensored-Llama2-13B', 'LLongMA-2-7B', 'huginnv1.2', 'Hermes-LLongMA-2-13B-8K', 'qCammel-13', 'Chronos-Beluga-v2-13B', '13B-Legerdemain-L2', 'orca_mini_v3_13B', 'Dolphin-Llama2-7B', 'Huginn-v3-13B', 'orca_mini_v3_7B', 'Huginn-13B', 'Zarablend-MX-L2-7B', 'Zarablend-L2-7B', 'HermesLimaRP-L2-7B', 'OpenBuddy-Llama2-70b-v10.1', 'WizardMath-7B-V1.0', 'Firefly-Llama2-13B-v1.2', 'WizardMath-13B-V1.0', 'WizardLM-13B-V1.2', 'ReMM-SLERP-L2-13B', 'Redmond-Puffin-13B', 'EverythingLM-13b-V2-16K', 'AlpacaCielo2-7B-8K', 'EverythingLM-13B-16K', 'Nous-Hermes-Llama2', 'Nous-Hermes-Llama-2-7B', 'airoboros-l2-7b-gpt4-1.4.1', 'airoboros-l2-13B-gpt4-1.4.1', 'Platypus2-13B', 'Stable-Platypus2-13B', 'Chronohermes-Grad-L2-13B', 'Camel-Platypus2-13B', 'vicuna-7B-v1.5', 'vicuna-13B-v1.5', 'vicuna-7B-v1.5-16K', 'vicuna-13B-v1.5-16K', 'MythoMax-L2-13B', 'MythoMix-L2-13B', 'MythoLogic-Mini-7B', 'MythoLogic-L2-13B', 'openchat_v3.2_super', 'Llama-2-70B', 'Llama-2-13B', 'Llama-2-13B-chat', 'OpenBuddy-Llama2-13B-v11.1', 'Speechless-Llama2-Hermes-Orca-Platypus-WizardLM-13B', 'Speechless-Llama2-13B', 'UndiMix-v2-13B', 'Stheno-L2-13B', 'UndiMix-v1-13B', 'Stheno-Inverted-L2-13B', 'Yarn-Llama-2-13B-64K', 'Yarn-Llama-2-13B-128K', 'Yarn-Llama-2-7B-128K', 'Yarn-Llama-2-7B-64K', 'Synthia-70B-v1.1', 'LoKuS-13B', 'llama-2-13B-chat-limarp-v2-merged', 'Airoboros-L2-70B-2.1-Creative', 'MythoMax-L2-Kimiko-v2-13B', 'fiction.live-Kimiko-V2-70B', 'Kimiko-v2-13B', 'Luban-13B', 'Athena-v1', 'Huginn-13B-v4', 'Huginn-13B-v4.5', 'Mythical-Destroyer-V2-L2-13B', 'Airoboros-L2-7B-2.1', 'model_007-70B', 'Airoboros-L2-13B-2.1', 'Mythical-Destroyer-L2-13B', 'MythoMax-Kimiko-Mix', 'Samantha-1.11-13B', 'Lemur-70B-Chat-v1', 'Phind-CodeLlama-34B-v2', 'WizardCoder-Python-13B-V1.0', 'CodeLlama-13B-oasst-sft-v10', 'Airoboros-L2-70B-2.1', 'Airoboros-c34B-2.1', 'Llama-2-70B-Orca-200k', 'Genz-70b', 'Synthia-70B', 'Zarafusionex-1.1-L2-7B', 'WizardCoder-Python-34B-V1.0', 'Phind-CodeLlama-34B-Python-v1', 'Phind-CodeLlama-34B-v1', 'Llama2-70B-OASST-SFT-v10', 'Samantha-1.11-CodeLlama-34B', 'Samantha-1.11-70B', 'CodeLlama-34B-Instruct', 'CodeLlama-34B-Python', 'CodeLlama-34B', 'CodeLlama-13B-Python', 'CodeLlama-13B-Instruct', 'CodeLlama-13B', 'CodeLlama-7B', 'CodeLlama-7B-Python', 'CodeLlama-7B-Instruct', 'Nous-Puffin-70B', 'Nous-Hermes-Llama2-70B', 'PuddleJumper-13B', 'LLaMA-7b', 'LLaMA-65B', 'LLaMA-30b', 'LLaMA-13b', 'minotaur-13B']\n" + "['bakllava-1-7b', 'llava-v1.5-7b', 'llava-v1.5-13b', 'Goliath-longLORA-120b-rope8-32k-fp16', 'Goliath-longLORA-120b-rope8-32k-fp16', 'Etheria-55b-v0.1', 'EstopianMaid-13B', 'Everyone-Coder-33B-Base', 'FusionNet_34Bx2_MoE', 'WestLake-7B-v2', 'WestSeverus-7B-DPO', 'DiscoLM_German_7b_v1', 'Garrulus', 'DareVox-7B', 'NexoNimbus-7B', 'Lelantos-Maid-DPO-7B', 'stable-code-3b', 'Dr_Samantha-7B', 'NeuralBeagle14-7B', 'tigerbot-13B-chat-v5', 'Nous-Hermes-2-Mixtral-8x7B-SFT', 'Thespis-13B-DPO-v0.7', 'Code-290k-13B', 'Nous-Hermes-2-Mixtral-8x7B-DPO', 'Venus-120b-v1.2', 'LLaMA2-13B-Estopia', 'medicine-LLM', 'finance-LLM-13B', 'Yi-34B-200K-DARE-megamerge-v8', 'phi-2-orange', 'laser-dolphin-mixtral-2x7b-dpo', 'bagel-dpo-8x7b-v0.2', 'Everyone-Coder-4x7b-Base', 'phi-2-electrical-engineering', 'Cosmosis-3x34B', 'HamSter-0.1', 'Helion-4x34B', 'Bagel-Hermes-2x34b', 'deepmoney-34b-200k-chat-evaluator', 'deepmoney-34b-200k-base', 'TowerInstruct-7B-v0.1', 'PiVoT-SUS-RP', 'Noromaid-v0.4-Mixtral-Instruct-8x7b-Zloss', 'TenyxChat-7B-v1', 'UNA-TheBeagle-7B-v1', 'WhiteRabbitNeo-33B-v1', 'WinterGoliath-123b', 'Open_Gpt4_8x7B_v0.2', 'neuronovo-7B-v0.3', 'Nous-Capybara-limarpv3-34B', 'MegaDolphin-120b', 'Sensualize-Mixtral', 'openchat-3.5-0106', 'Lumosia-MoE-4x10.7', 'OrcaMaid-v3-13B-32k', 'Mixtral-8x7B-Instruct-v0.1-LimaRP-ZLoss-DARE-TIES', 'Velara-11B-V2', 'medicine-chat', 'law-chat', 'finance-chat', 'dolphin-2.6-mistral-7B-dpo-laser', 'speechless-mistral-moloras-7B', 'Mixtral_34Bx2_MoE_60B', 'Noromaid-13B-v0.3', 'LLaMA-Pro-8B-Instruct', 'sonya-medium-x8-MoE', 'Sensualize-Solar-10.7B', 'zephyr-quiklang-3b-4K', 'bagel-8x7b-v0.2', 'Pallas-0.5-frankenmerge', 'LLaMA-Pro-8B', 'zephyr-quiklang-3b', 'Open_Gpt4_8x7B', 'Rosa_v2_7B', 'Mistral-7B-Instruct-v0.2-code-ft', 'phi-2-dpo', 'Mixtral-8x7B-Instruct-v0.1-LimaRP-ZLoss', 'Beyonder-4x7B-v2', 'Mixtral_11Bx2_MoE_19B', 'Kunoichi-7B', 'Norobara-ZLoss-8x7B', 'Iambe-RP-DARE-20B-DENSE', 'WizardCoder-33B-V1.1', 'Mistral-7B-OpenOrca-oasst_top1_2023-08-25-v2', 'Nous-Hermes-2-SUS-Chat-34B-Slerp', 'Iambe-RP-v3-20B', 'WordWoven-13B', 'FlatDolphinMaid-8x7B', 'Panda-7B-v0.1', 'Unholy-v2-13B', 'OpenCAI-13B', 'OpenCAI-7B', 'openbuddy-mixtral-7bx8-v16.3-32k', 'Thespis-Mistral-7B-Alpha-v0.7', 'Pallas-0.5-LASER-0.6', 'nontoxic-bagel-34b-v0.2', 'bagel-dpo-34b-v0.2', 'bagel-34b-v0.2', 'Nous-Hermes-2-SOLAR-10.7B', 'medicine-LLM-13B', 'sonya-7B-x8-MoE', 'dolphin-2.7-mixtral-8x7b', 'yayi2-30B-llama', 'toxicqa-Llama2-7B', 'toxicqa-Llama2-13B', 'Sonya-7B', 'TinyLlama-1.1B-intermediate-step-1431k-3T', 'TinyLlama-1.1B-Chat-v1.0', 'dolphin-2.6-mistral-7B-dpo', 'CarbonVillain-en-10.7B-v4', 'trinity-v1.2-x8-MoE', 'deita-7B-v1.0-sft', 'law-LLM-13B', 'Lelantos-7B', 'UNAversal-8x7B-v1beta', 'mistral-ft-optimized-1227', 'Seraph-openchat-3.5-1210-Slerp', 'openchat-3.5-1210-Seraph-Slerp', 'MetaMath-NeuralHermes-2.5-Mistral-7B-Linear', 'NeuralQuant-9B', 'NeuralPipe-9B-merged', 'NeuralPipe-7B-ties', 'NeuralPipe-7B-slerp', 'You_can_cry_Snowman-13B', 'MLewd-v2.4-13B', 'Pallas-0.5', 'Synthia-v3.0-11B', 'Sarah_StoryTeller_13b', 'notux-8x7b-v1', 'docsgpt-7B-mistral', 'Metis-0.5', 'Aurora-Nights-103B-v1.0', 'Sakura-SOLAR-Instruct', 'SOLARC-MOE-10.7Bx4', 'Aurora-Nights-70B-v1.0', 'dolphin-2.6-mistral-7B', 'Silicon-Maid-7B', 'MixtralOrochi8x7B', 'openchat-3.5-1210-starling-slerp', 'neural-chat-7B-v3-3-wizardmath-dare-me', 'openthaigpt-1.0.0-beta-13B-chat', 'Yi-34B-200K-AEZAKMI-v2', 'xDAN-L1-Chat-RL-v1', 'Nous-Hermes-2-Yi-34B', 'SauerkrautLM-Mixtral-8x7B', 'SauerkrautLM-Mixtral-8x7B-Instruct', 'finance-LLM', 'openbuddy-mixtral-8x7b-v15.4', 'Loyal-Macaroni-Maid-7B', 'DaringMaid-13B', 'apricot-wildflower-20', 'LMCocktail-phi-2-v1', 'Instruct_Mixtral-8x7B-v0.1_Dolly15K', 'LMCocktail-10.7B-v1', 'typhoon-7B', 'SauerkrautLM-UNA-SOLAR-Instruct', 'dolphin-2_6-phi-2', 'Orca2myth7.2', 'Sensei-7B-V1', 'LUNA-SOLARkrautLM-Instruct', 'Valkyrie-V1', 'Mixtral_7Bx2_MoE', 'bun_mistral_7b_v2', 'SAM', 'firefly-mixtral-8x7b', 'SauerkrautLM-SOLAR-Instruct', 'CodeNinja-1.0-OpenChat-7B', 'MixtralRPChat-ZLoss', 'WizardMath-7B-V1.1', 'dolphin-2.6-mixtral-8x7b', 'orangetin-OpenHermes-Mixtral-8x7B', 'mistral-ft-optimized-1218', 'mixtral_spanish_ft', 'OrcaMaid-v2-FIX-13B-32k', 'Llama-2-7B-ft-instruct-es', 'FlatOrcamaid-13B-v0.2', 'ShiningValiantXS-1.1', 'DaringFortitude', 'OrcaMaidXL-17B-32k', 'WhiteRabbitNeo-13B', 'dragon-mistral-7B-v0', 'DaringMaid-20B', 'openbuddy-mixtral-8x7b-v15.2', 'Frostwind-10.7B-v1', 'Noromaid-13B-v0.2', 'Swallow-7B-Instruct', 'Swallow-7B', 'Swallow-13B-Instruct', 'Swallow-70B', 'Swallow-13B', 'Swallow-70B-instruct', 'Yi-34B-200K-DARE-merge-v5', 'GEITje-7B-chat', 'Metis-0.4', 'SOLAR-10.7B-Instruct-v1.0-uncensored', 'Fennec-Mixtral-8x7B', 'OpenZephyrChat-v0.2', 'Metis-0.3', 'phi-2', 'quantum-v0.01', 'quantum-dpo-v0.1', 'Mistral-7B-Merge-14-v0.1', 'Norocetacean-20B-10k', 'Pirouette-7B', 'Saily_220B', 'GreenNodeLM-7B-v4leo', 'BigPlap-8x20B', 'PlatYi-34B-Llama-Q-v3', 'PiVoT-MoE', 'PiVoT-10.7B-Mistral-v0.2', 'PiVoT-10.7B-Mistral-v0.2-RP', 'Mixtral-Fusion-4x7B-Instruct-v0.1', 'go-bruins-v2.1.1', 'Falkor-8x7B-MoE', 'Chupacabra-8x7B-MoE', '8x7B-MoE-test-NOT-MIXTRAL', 'Starling-LM-alpha-8x7B-MoE', 'MelloGPT', 'mindy-7B', 'SOLAR-Platypus-10.7B-v2', 'LeoScorpius-GreenNode-Platypus-7B-v1', 'LeoScorpius-GreenNode-Alpaca-7B-v1', 'Mistral-7B-AEZAKMI-v1', 'agiin-13.6B-v0.1', 'Rogue-Rose-103b-v0.2', 'Venus-120b-v1.1', 'Venus-103b-v1.2', 'Metis-0.1', 'yi-34B-v3', 'Mixtral-8x7B-MoE-RP-Story', 'openchat-3.5-1210', 'mistral-7B-dpo-v5', 'Tiamat-7B-1.1-DPO', 'Llamix2-MLewd-4x13B', 'LeoScorpius-GreenNode-7B-v1', 'WinterGoddess-1.4x-70B-L2', 'cutie', 'Orca-2-13B-SFT_v5', 'Pandora-v1-13B', 'una-cybertron-7B-v3-OMA', 'BruinsV2-OpHermesNeu-11B', 'DynamicFactor', 'ShiningValiant-1.3', 'Mistral-7B-Instruct-v0.2-DARE', 'Solar-10.7B-SLERP', 'Pandora-v1-10.7B', 'Mythalion-Kimiko-v2', 'openbuddy-mixtral-8x7b-v15.1', 'Rose-Kimiko-20B', 'Mixtral-SlimOrca-8x7B', 'Mixtral-8x7B-v0.1', 'Mixtral-8x7B-Instruct-v0.1', 'dolphin-2.5-mixtral-8x7b', 'LeoScorpius-7B', 'v1olet_merged_dpo_7B_v4', 'v1olet_merged_dpo_7B_v3', 'bagel-dpo-7B-v0.1', 'bagel-7B-v0.1', 'Ana-v1-m7', 'meditron-7B-chat', 'Nyxene-v3-11B', 'blossom-v3_1-yi-34b', 'SOLAR-10.7B-v1.0', 'SOLAR-10.7B-Instruct-v1.0', 'MadMix-v0.2', 'openbuddy-llama2-13b64k-v15', 'mixtralnt-4x7b-test', 'OpenZephyrChat', 'Code-33B', 'Marcoroni-neural-chat-7B-v1', 'SeraphMarcoroni-7B', 'pee', 'Terminis-7B', 'Pallas-0.4', 'Synthia-MoE-v3-Mixtral-8x7B', 'Pallas-0.3', 'supermario-v2', 'Merged-DPO-7B', 'v1olet_marcoroni-go-bruins-merge-7B', 'Seraph-7B', 'Amber', 'AmberChat', 'neural-chat-7B-v3-3', 'dopeystableplats-3b-v1', 'Clover3-17B', 'Mistral-7B-Instruct-v0.2', 'LlamaGuard-7B', 'leo-hessianai-70B', 'OpenHermes-2.5-neural-chat-v3-3-Slerp', 'leo-hessianai-70B-chat', 'Velara', 'Nethena-20B-Glued', 'open-instruct-human-mix-65B', 'Marcoroni-7B-v3', 'Merged-AGI-7B', 'NexusRaven-V2-13B', 'CaPlatTessDolXaBoros-Yi-34B-200K-DARE-Ties-HighDensity', 'Marcoroni-7B-v2', 'go-bruins', 'go-bruins-v2', 'openbuddy-deepseek-67b-v15-base', 'smol-7B', 'Venus-103b-v1.1', 'Mistral-7B-Instruct-v0.1', 'OrcaMaid-13B', 'Synthia-7B-v3.0', 'bling-stable-lm-3b-4e1t-v0', 'Thespis-13B-Alpha-v0.7', 'Iambe-Storyteller-20B', 'Code-13B', 'SG-Raccoon-Yi-200k-2.0', 'Sydney_Overthinker_13B', 'dolphin-2.2-yi-34b-200k', 'stablelm-zephyr-3b', 'Magicoder-S-DS-6.7B', 'Iambe-RP-cDPO-20B', 'Tiamat-7B', 'deepsex-34b', 'Optimus-7B', 'MetaMath-Cybertron-Starling', 'DPOpenHermes-7B-v2', 'una-xaberius-34b-v1beta', 'Nyxene-v2-11B', 'PiVoT-Merge-A-7B', 'DiscoLM-70B', 'Tess-34B-v1.4', 'Tess-7B-v1.4', 'una-cybertron-7B-v2', 'NeuralOrca-7B-v1', 'SUS-Chat-34B', 'sabia-7B', 'OpenOrca-Zephyr-7B', 'Poro-34B', 'RpBird-Yi-34B-200k', 'notus-7B-v1', 'Chronomaid-Storytelling-13B', 'OpenHermes-2.5-neural-chat-7B-v3-2-7B', 'DPOpenHermes-7B', 'loyal-piano-m7', 'Inairtra-7B', 'DiscoLM-120b', 'neural-chat-7B-v3-2', 'SG-Raccoon-Yi-55B-200k', 'juanako-7B-UNA', 'cinematika-7B-v0.1', 'meditron-7B', 'SlimOrca-13B', 'meditron-70B', 'tigerbot-70B-chat-v2', 'Open-Hermes-2.5-neural-chat-3.1-frankenmerge-11b', 'SG-Raccoon-Yi-55B', 'NeuralHermes-2.5-Mistral-7B', 'LLaMA2-13B-Psyfighter2', 'open-llama-3b-v2-wizard-evol-instuct-v2-196k', 'deepseek-llm-7B-chat', 'deepseek-llm-7B-base', 'Iambe-20B-DARE', 'deepseek-llm-67b-base', 'deepseek-llm-67b-chat', 'psyonic-cetacean-20B', 'SauerkrautLM-7B-HerO', 'Venus-120b-v1.0', 'Aetheria-L2-70B', 'Astrid-Mistral-7B', 'saiga_mistral_7b', 'evolvedSeeker_1_3', 'PiVoT-0.1-Starling-LM-RP', 'CapyTessBorosYi-34B-200K-DARE-Ties', 'Starling-LM-7B-alpha', 'Stheno-v2-Delta', 'Solus-70B-L2', 'Solus-103B-L2', 'Lila-103B-L2', 'Lila-70B-L2', 'OpenHermes-2.5-neural-chat-7B-v3-1-7B', 'SunsetBoulevard', 'PiVoT-0.1-Evil-a', 'MergeMonster-13B-20231124', 'openinstruct-mistral-7B', 'Capybara-Tess-Yi-34B-200K-DARE-Ties', 'MysticFusion-13B', 'Tess-M-v1.3', 'PiVoT-0.1-early', 'Karen_TheEditor_V2_CREATIVE_Mistral_7B', 'juanako-7B-v1', 'smartyplats-7B-v2', 'X-MythoChronos-13B', 'Rose-20B', 'Chupacabra-7B-v3', 'Mini_Synatra_7B_02', 'tulu-2-7B', 'tulu-2-dpo-7B', 'tulu-2-dpo-70B', 'tulu-2-dpo-13B', 'tulu-2-70B', 'tulu-2-13B', 'Tess-M-v1.2', 'cockatrice-7B-v0.1', 'Synatra-7B-v0.3-base', 'Synatra-V0.1-7B-Instruct', 'Yi-34B-Chat', 'Synatra-RP-Orca-2-7B-v0.1', 'Synatra-7B-v0.3-RP', 'Synatra-7B-v0.3-dpo', 'koOpenChat-sft', 'llama-polyglot-13B', 'digital-socrates-7B', 'digital-socrates-13B', 'rocket-3B', 'Noromaid-20B-v0.1.1', 'SynthIA-7B-v1.3-dare-0.85', 'CollectiveCognition-v1.1-Mistral-7B-dare-0.85', 'airoboros-m-7B-3.1.2-dare-0.85', 'speechless-mistral-7B-dare-0.85', 'Ferret_7B', 'Chupacabra-7B-v2', 'Tess-XS-v1.1', 'Tess-XS-Creative-v1.0', 'OrionStar-Yi-34B-Chat-Llama', 'Tess-M-v1.1', 'MythoMist-7B', 'Llama-2-70B-Chat', 'Nous-Capybara-7B-v1.9', 'gorilla-openfunctions-v1', 'Orca-2-7B', 'Orca-2-13B', 'Akins-3B', 'Marx-3B-v3', 'Yarn-Llama-2-70B-32k', 'zephyr_7b_norobots', 'mistral_7b_norobots', 'llama2_7b_merge_orcafamily', 'Writing_Partner_Mistral_7B', 'Generate_Question_Mistral_7B', 'Free_Sydney_V2_Mistral_7b', 'Autolycus-Mistral_7B', 'GOAT-70B-Storytelling', 'Karen_TheEditor_V2_STRICT_Mistral_7B', 'blossom-v3-mistral-7B', 'mistral-7B-finetuned-orca-dpo-v2', 'llama2-7B-layla', 'LLaMA_2_13B_SFT_v1', 'blossom-v3-baichuan2-7B', 'XwinCoder-34B', 'XwinCoder-13B', 'X-NoroChronos-13B', 'Capybara-Tess-Yi-34B-200K', 'Tess-M-Creative-v1.0', 'Kaori-70B-v1', 'Nanbeige-16B-Chat', 'Nanbeige-16B-Base', 'Nanbeige-16B-Base-32K', 'Nanbeige-16B-Chat-32K', 'tigerbot-70B-chat-v4', 'nucleus-22B-token-500B', 'nsql-llama-2-7B', 'sqlcoder-34b-alpha', 'opus-v0.5-70B', 'Nous-Capybara-34B', 'dolphin-2_2-yi-34b', 'Euryale-1.4-L2-70B', 'alfred-40B-1023', 'firefly-llama2-7B-chat', 'neural-chat-7B-v3-1', 'firefly-llama2-13B-chat', 'dragon-yi-6B-v0', 'platypus-yi-34b', 'OpenHermes-2.5-Mistral-7B-16k', 'TimeCrystal-L2-13B', 'merlyn-education-safety', 'merlyn-education-corpus-qa-v2', 'Deacon-34B', 'MoMo-70B-V1.1', 'Tai-70B', 'Thespis-Mistral-7B-v0.6', 'sqlcoder-7B', 'Tess-XL-v1.0', 'zephyr-7B-beta-pl', 'SynthIA-7B-v2.0-16k', 'ShiningValiantXS', 'Python-Code-33B', 'Python-Code-13B', 'opus-v0-70B', 'GodziLLa2-70B', 'Claire-7B-0.1', 'speechless-mistral-dolphin-orca-platypus-samantha-7B', 'Noromaid-13B-v0.1.1', 'Noromaid-13B-v0.1', 'Thespis-13B-v0.6', 'Augmental-ReMM-13B', 'Mistral-7B-OpenOrca-oasst_top1_2023-08-25-v1', 'Yi-34B-200K-Llamafied', 'Yi-34B-200K', 'Yi-6B-200K', 'cat-v1.0-13B', 'Augmental-Unholy-13B', 'openchat_3.5-16k', 'claude2-alpaca-13B', 'prometheus-7B-v1.0', 'prometheus-13B-v1.0', 'Yi-6B', 'claude2-alpaca-7B', 'dolphin-2.2.1-AshhLimaRP-Mistral-7B', 'Trion-M-7B', 'Dolphin2.1-OpenOrca-7B', 'MonadGPT', 'goliath-120b', 'Yi-34B-GiftedConvo-merged', 'KAI-7B-Instruct', 'KAI-7B-beta', 'opus-v0-7B', 'dolphin-2.2-70B', 'LLaMA2-13B-TiefighterLR', 'Psyfighter-13B', 'Dawn-v2-70B', 'vigogne-2-70B-chat', 'Toppy-M-7B', 'TinyLlama-1.1B-intermediate-step-715k-1.5T', 'Yi-34B', 'Barcenas-Mistral-7B', 'openbuddy-zephyr-7B-v14.1', 'japanese-stablelm-base-beta-70B', 'Hexoteric-7B', 'echidna-tiefigther-25', 'UtopiaXL-13B', 'Hermes-Trismegistus-Mistral-7B', 'stockmark-13B', 'calm2-7B-chat', 'deepseek-coder-33B-instruct', 'deepseek-coder-6.7B-instruct', 'deepseek-coder-1.3b-instruct', 'deepseek-coder-33B-base', 'deepseek-coder-6.7B-base', 'deepseek-coder-5.7bmqa-base', 'deepseek-coder-1.3b-base', 'Naberius-7B', 'Utopia-13B', 'japanese-stablelm-instruct-beta-7B', 'Thespis-Mistral-7B-v0.5', 'Yarn-Mistral-7B-64k', 'Yarn-Mistral-7B-128k', 'OpenHermes-2.5-Mistral-7B', 'openchat_3.5', 'japanese-stablelm-instruct-beta-70B', 'basilisk-7B-v0.2', 'openbuddy-llama2-70B-v13.2', 'SciPhi-Self-RAG-Mistral-7B-32k', 'vietnamese-llama2-7B-40GB', 'Amethyst-13B', 'Skywork-13B-base', 'Xwin-MLewd-7B-V0.2', 'MetaMath-Mistral-7B', 'Nethena-MLewd-Xwin-23B', 'dolphin-2.2.1-mistral-7B', 'Uncensored-Jordan-7B', 'Uncensored-Jordan-33B', 'Uncensored-Jordan-13B', 'Nethena-20B', 'Nethena-13B', 'Thespis-13B-v0.5', 'SciPhi-Mistral-7B-32k', 'Free_Sydney_V2_13B', 'Scarlett-7B', 'AquilaChat2-34B', 'AquilaChat2-34B-16K', 'Athnete-13B', 'Augmental-13B-v1.50_B', 'Augmental-13B-v1.50_A', 'Mistral-ClaudeLimaRP-v3-7B', 'Mistral-7B-Claude-Chat', 'japanese-stablelm-instruct-gamma-7B', 'SauerkrautLM-70B-v1', 'Echidna-13B-v0.3', 'Mistral-7B-codealpaca-lora', 'Mistral_7B_Dolphin2.1_LIMA0.5', 'openbuddy-mistral-7B-v13.1', 'Echidna-13B-v0.2', 'Gale-medium-init-3B', 'Lewd-Sydney-20B', 'med42-70B', 'MistRP-Airoboros-7B', 'AshhLimaRP-Mistral-7B', 'Nete-13B', 'zephyr-7B-beta', 'SynthIA-70B-v1.5', 'lzlv_70B', 'Cat-13B-0.5', 'Dolphin-2.1-70B', 'Augmental-13B', 'Vigostral-7B-Chat', 'HornyEchidna-13B-v0.1', 'Llama-2-7B-32K-Instruct', 'Llama-2-7B', 'CausalLM-7B', 'CausalLM-14B', 'SynthIA-7B-v2.0', 'Mistral-7B-SciPhi-32k', 'Thespis-13B-v0.4', 'LLaMA2-13B-Tiefighter', 'dolphin-2.1-mistral-7B', 'Airoboros-L2-70B-3.1.2', 'Airoboros-c34B-3.1.2', 'MLewdBoros-LRPSGPT-2Char-13B', 'vicuna-33B-coder', 'MistralMakise-Merged-13B', 'agentlm-7B', 'CodeBooga-34B-v0.1', 'agentlm-70B', 'agentlm-13B', 'Arithmo-Mistral-7B', 'Airoboros-M-7B-3.1.2', 'Falcon-180B', 'Falcon-180B-Chat', 'Mistral-Pygmalion-7B', 'MistralLite-7B', 'SlimOpenOrca-Mistral-7B', 'PsyMedRP-v1-13B', 'Xwin-LM-7B-V0.2', 'Airoboros-L2-13B-3.1.1', 'rpguild-chatml-13B', 'Euryale-1.3-L2-70B', 'Dans-AdventurousWinds-Mk2-7B', 'llemma_34b', 'Mistral-7B-Phibrarian-32K', 'llemma_7b', 'Thespis-13B-v0.3', 'Pandalyst-7B-v1.2', 'Airoboros-M-7B-3.1.1', 'Mistral-7B-Code-16K-qlora', 'OpenHermes-2-Mistral-7B', 'Leo-Mistral-Hessianai-7B-Chat', 'Airoboros-L2-70B-3.1', 'openbuddy-mistral-7B-v13-base', 'Xwin-MLewd-13B-v0.2', 'openbuddy-llama2-70B-v13-base', 'openbuddy-mistral-7B-v13', 'ShiningValiant-1.2', 'StellarBright', 'Mistral-11B-OmniMix', 'Airoboros-M-7B-3.1', 'SynthIA-7B-v1.5', 'LongAlpaca-70B', 'airoboros-l2-13B-3.1', 'tora-code-7B-v1.0', 'Xwin-LM-13B-v0.2', 'tora-code-34b-v1.0', 'tora-code-13B-v1.0', 'Mistral-11B-CC-Air-RP', 'tora-7B-v1.0', 'genz-13B-v2', 'SauerkrautLM-7B-v1-mistral', 'SauerkrautLM-3B-v1', 'Llama-2-7B-Chat', 'tora-70B-v1.0', 'SauerkrautLM-7B-v1', 'tora-13B-v1.0', 'ALMA-7B', 'ALMA-13B', 'SauerkrautLM-13B-v1', 'Tinyllama-2-1b-miniguanaco', 'zephyr-7B-alpha', 'speechless-codellama-34b-v2.0', 'speechless-tora-code-7B-v1.0', 'CollectiveCognition-v1-Mistral-7B', 'speechless-code-mistral-7B-v1.0', 'FashionGPT-70B-v1.2', 'chronos007-70B', 'samantha-1.2-mistral-7B', 'sqlcoder2', 'ANIMA-Phi-Neptune-Mistral-7B', 'ZephRP-m7b', 'jackalope-7B', 'UndiMix-v3-13B', 'tinyllama-1.1b-chat-v0.3_platypus', 'TinyLlama-1.1B-1T-OpenOrca', 'em_german_leo_mistral', 'em_german_mistral_v01', 'Ziya-Coding-34B-v1.0', 'PsyMedRP-v1-20B', 'Athena-v4', 'Llama2-chat-AYB-13B', 'sheep-duck-llama-2-13B', 'Mistral-Trismegistus-7B', 'UndiMix-v4-13B', 'llama-2-7B-Arguments', 'airoboros-m-7B-3.0', 'airoboros-l2-7B-3.0', 'airoboros-l2-13B-3.0', 'CollectiveCognition-v1.1-Mistral-7B', 'Amethyst-13B-Mistral', 'Mistralic-7B-1', 'Llama-2-7B-vietnamese-20k', 'Dans-TotSirocco-7B', 'Dans-AdventurousWinds-7B', 'airoboros-mistral2.2-7B', 'TinyLlama-1.1B-intermediate-step-480k-1T', 'TinyLlama-1.1B-python-v0.1', 'TinyLlama-1.1B-Chat-v0.3', 'Nous-Hermes-13B', 'Mistral-7B-OpenOrca', 'dolphin-2.0-mistral-7B', 'Nous-Capybara-7B', 'Inkbot-13B-8k-0.2', 'em_german_7b_v01', 'em_german_70b_v01', 'em_german_13b_v01', 'UltraLM-13B-v2.0', 'MythoMakiseMerged-13B', 'lince-zero', 'sheep-duck-llama-2-70B-v1.1', 'MegaMix-T1-13B', 'MegaMix-S1-13B', 'Megamix-A1-13B', 'Kimiko-Mistral-7B', 'Pandalyst_13B_V1.0', 'Pandalyst-7B-V1.1', 'samantha-mistral-instruct-7B', 'samantha-mistral-7B', 'Synthia-7B-v1.3', 'NexusRaven-13B', 'Mistral-7B-v0.1', 'leo-hessianai-7B', 'leo-hessianai-7B-chat', 'leo-hessianai-7B-chat-bilingual', 'leo-hessianai-13B', 'leo-hessianai-13B-chat', 'leo-hessianai-13B-chat-bilingual', 'Emerhyst-20B', 'Emerhyst-13B', 'openbuddy-openllama-7B-v12-bf16', 'Marcoroni-70B-v1', 'Athena-v3', 'Llama-2-13B-Ensemble-v6', 'Llama-2-13B-Ensemble-v5', 'COTHuginn-4.5-19B', 'Chronos-Hermes-13b-v2', 'LlongOrca-13B-16K', 'Llama2-22B-GPLATTY', 'L2-MythoMax22b-Instruct-Falseblock', 'chronos-13b-v2', 'Asclepius-13B', 'Airoboros-33B-2.1', 'Huginn-22B-Prototype', 'U-Amethyst-20B', 'sqlcoder', 'vicuna-33B', 'law-LLM', 'openbuddy-llama2-34b-v11.1-bf16', 'openbuddy-coder-34b-v11-bf16', 'Synthia-7B-v1.2', 'Synthia-13B-v1.2', 'MetaMath-7B-V1.0', 'MetaMath-70B-V1.0', 'MAmmoTH-Coder-13B', 'MetaMath-13B-V1.0', 'MXLewdMini-L2-13B', 'storytime-13B', 'MXLewd-L2-20B', 'PuddleJumper-13B-V2', 'Athena-v2', 'MAmmoTH-7B', 'MAmmoTH-13B', 'airoboros-l2-7B-2.2.1', 'airoboros-l2-13B-2.2.1', 'EverythingLM-13B-V3-16K', 'airoboros-c34b-2.2.1', 'ALMA-13B-Pretrain', 'ALMA-7B-Pretrain', 'MLewd-ReMM-L2-Chat-20B-Inverted', 'MLewd-ReMM-L2-Chat-20B', 'Airoboros-L2-70b-2.2.1', 'Xwin-LM-70B-V0.1', 'Inkbot-13B-4k', 'Xwin-LM-7B-V0.1', 'MAmmoTH-Coder-34B', 'MAmmoTH-70B', 'Xwin-LM-13B-V0.1', 'tulu-30B', 'tulu-7B', 'manticore-13b-chat-pyg', 'hippogriff-30b-chat', 'VicUnlocked-30B-LoRA', 'ARIA-70B-V2', 'WizardLM-Uncensored-SuperCOT-StoryTelling-30B', 'Platypus-30B', 'GPlatty-30B', 'medalpaca-13B', 'Vicuna-7B-CoT', 'airochronos-33B', 'Vicuna-13B-CoT', 'wizard-vicuna-13B', 'Chronoboros-33B', 'llama-30b-supercot', 'gorilla-7B', 'Karen_theEditor_13B', 'CAMEL-33B-Combined-Data', 'fin-llama-33B', 'CAMEL-13B-Role-Playing-Data', 'stable-vicuna-13B', 'CAMEL-13B-Combined-Data', 'chronos-wizardlm-uc-scot-st-13B', 'chronos-hermes-13B', 'llama-13b-supercot', 'wizard-mega-13B', 'Manticore-13B', 'minotaur-13B-fixed', 'wizardLM-7B', 'FashionGPT-70B-V1.1', 'WizardLM-30B', 'WizardLM-13B-V1.1', 'WizardLM-13B-1.0', 'Upstage-Llama1-65B-Instruct', 'upstage-llama-30b-instruct-2048', 'Uncensored-Frank-33b', 'guanaco-7B', 'guanaco-65B', 'guanaco-33B', 'SuperPlatty-30B', 'guanaco-13B', 'WizardLM-7B-V1.0-Uncensored', 'Uncensored-Frank-13b', 'WizardLM-33B-V1.0-Uncensored', 'WizardLM-7B-uncensored', 'WizardLM-30B-Uncensored', 'Uncensored-Frank-7B', 'Wizard-Vicuna-7B-Uncensored', 'WizardLM-13B-V1.0-Uncensored', 'WizardLM-13B-Uncensored', 'Wizard-Vicuna-30B-Uncensored', 'Wizard-Vicuna-13B-Uncensored', 'Dolphin-Llama-13B', 'based-30B', 'based-7B', 'based-13b', 'MythoLogic-13B', 'MythoBoros-13B', 'chronos-33b', '30B-Lazarus', '30B-Epsilon', 'chronos-13B', '13B-HyperMantis', '13B-Chimera', '13B-Ouroboros', 'tulu-13B', '13B-BlueMethod', 'AlpacaCielo-13B', 'llama2_7b_chat_uncensored', 'Llama-2-70B-LoRA-Assemble-v2', 'ReMM-v2.1-L2-13B', 'MLewd-L2-Chat-13B', 'WizardCoder-Python-7B-V1.0', 'TigerBot-70B-Chat', 'Pygmalion-2-13B-SuperCOT-weighed', 'Airoboros-c34B-2.2', 'Synthia-34B-v1.2', 'Kuchiki-1.1-L2-7B', 'CalliopeDS-L2-13B', 'OpenOrca_Stx', 'ChatAYT-Lora-Assamble-Marcoroni', 'Synthia-70B-v1.2b', 'CodeFuse-CodeLlama-34B', 'Chinese-Llama-2-13B', 'Chinese-Alpaca-2-13B', 'Chinese-Llama-2-7B', 'Chinese-Alpaca-2-7B', 'Marcoroni-70B', 'Pygmalion-2-13B-SuperCOT2', 'Llama-2-13B-LoRA-Assemble', 'Llama-2-7B-LoRA-Assemble', 'BerrySauce-L2-13B', 'AppleSauce-L2-13B', 'Llama-2-Coder-7B', 'Llama2-Chat-AYT-13B', 'Kuchiki-L2-7B', 'Euryale-Inverted-L2-70B', 'Euryale-L2-70B', 'Spicyboros-c34b-2.2', 'Airoboros-L2-70b-2.2', 'Airoboros-L2-13B-2.2', 'Airoboros-L2-7B-2.2', 'Sheep-Duck-Llama-2-70B', 'JanniesBasedLigma-L2-13B', 'Llama-2-13B-Chat-Dutch', 'Marcoroni-13B', 'Marcoroni-7b', 'Spicyboros-70B-2.2', 'Llama-2-70B-Ensemble-v5', 'Unholy-v1-12L-13B', 'Unholy-v1-10l-13B', 'Nous-Hermes-13B-Code', 'ReMM-v2-L2-13B', 'MLewdBoros-L2-13B', 'Tulpar-7B-v0', 'Spicyboros-13B-2.2', 'Pygmalion-2-13B-SuperCOT', 'ORCA_LLaMA_70B_QLoRA', 'Uni-TianYan-70B', 'airoboros-l2-7B-gpt4-m2.0', 'Spicyboros-7B-2.2', 'airoboros-l2-7B-gpt4-2.0', 'airoboros-l2-13b-gpt4-2.0', 'airoboros-l2-13b-gpt4-m2.0', 'Airoboros-L2-13B-2_1-YaRN-64K', 'Guanaco-13B-Uncensored', 'Guanaco-7B-Uncensored', 'Llama-2-PeanutButter_v19_R8-7B', '13B-Thorns-L2', 'YuLan-Chat-2-13B', 'Kimiko-7B', 'Kimiko-13B', 'llama-2-70b-Guanaco-QLoRA', 'llama2_70b_chat_uncensored', 'Llama-2-70B-OASST-1-200', 'WizardLM-70B-V1.0', 'airoboros-l2-70B-gpt4-1.4.1', 'Airoboros-L2-70B-GPT4-m2.0', 'Synthia-70B-v1.2', 'Camel-Platypus2-70B', 'WizardMath-70B-V1.0', 'Trurl-2-7B', 'airoboros-l2-70B-GPT4-2.0', 'Platypus2-70B', 'Trurl-2-13B', 'orca_mini_v3_70B', 'llama2-7b-chat-codeCherryPop-qLoRA', 'Luna-AI-Llama2-Uncensored', 'Upstage-Llama-2-70B-instruct-v2', 'StableBeluga-13B', 'StableBeluga2-70B', 'LosslessMegaCoder-Llama2-7B-Mini', 'Chronos-70B-v2', 'Platypus2-70B-Instruct', 'StableBeluga-7B', 'Llama2-13B-MegaCode2-OASST', 'LosslessMegaCoder-Llama2-13B-Mini', 'Mythalion-13B', 'LlongOrca-7B-16K', 'Pygmalion-2-7B', 'Pygmalion-2-13B', 'llama-2-7B-Guanaco-QLoRA', 'Llama2-22B-Daydreamer-v3', 'OpenAssistant-Llama2-13B-Orca-8K-3319', 'Samantha-1.1-70B', 'Synthia-7B', 'OpenOrca-Platypus2-13B', 'Vigogne-2-7B-Instruct', 'WizardLM-1.0-Uncensored-CodeLlama-34B', 'Vigogne-2-13B-Instruct', 'Chronoboros-Grad-L2-13B', 'Airochronos-L2-13B', 'llama2-22B-daydreamer-v2', 'llama-2-13B-German-Assistant-v2', 'llama-2-13B-Guanaco-QLoRA', 'CodeUp-Alpha-13B-HF', 'Synthia-13B', 'Hermes-LLongMA-2-7B-8K', 'Chronorctypus-Limarobormes-13b', 'Vigogne-2-7B-Chat', 'Chronolima-Airo-Grad-L2-13B', 'Airolima-Chronos-Grad-L2-13B', 'qCammel-70-x', 'Spring-Dragon', 'CodeUp-Llama-2-13B-Chat-HF', 'Llama-2-13B-German-Assistant-v4', 'Carl-Llama-2-13B', 'WizardLM-1.0-Uncensored-Llama2-13B', 'LLongMA-2-7B', 'huginnv1.2', 'Hermes-LLongMA-2-13B-8K', 'qCammel-13', 'Chronos-Beluga-v2-13B', '13B-Legerdemain-L2', 'orca_mini_v3_13B', 'Dolphin-Llama2-7B', 'Huginn-v3-13B', 'orca_mini_v3_7B', 'Huginn-13B', 'Zarablend-MX-L2-7B', 'Zarablend-L2-7B', 'HermesLimaRP-L2-7B', 'OpenBuddy-Llama2-70b-v10.1', 'WizardMath-7B-V1.0', 'Firefly-Llama2-13B-v1.2', 'WizardMath-13B-V1.0', 'WizardLM-13B-V1.2', 'ReMM-SLERP-L2-13B', 'Redmond-Puffin-13B', 'EverythingLM-13b-V2-16K', 'AlpacaCielo2-7B-8K', 'EverythingLM-13B-16K', 'Nous-Hermes-Llama2', 'Nous-Hermes-Llama-2-7B', 'airoboros-l2-7b-gpt4-1.4.1', 'airoboros-l2-13B-gpt4-1.4.1', 'Platypus2-13B', 'Stable-Platypus2-13B', 'Chronohermes-Grad-L2-13B', 'Camel-Platypus2-13B', 'vicuna-7B-v1.5', 'vicuna-13B-v1.5', 'vicuna-7B-v1.5-16K', 'vicuna-13B-v1.5-16K', 'MythoMax-L2-13B', 'MythoMix-L2-13B', 'MythoLogic-Mini-7B', 'MythoLogic-L2-13B', 'openchat_v3.2_super', 'Llama-2-70B', 'Llama-2-13B', 'Llama-2-13B-chat', 'OpenBuddy-Llama2-13B-v11.1', 'Speechless-Llama2-Hermes-Orca-Platypus-WizardLM-13B', 'Speechless-Llama2-13B', 'UndiMix-v2-13B', 'Stheno-L2-13B', 'UndiMix-v1-13B', 'Stheno-Inverted-L2-13B', 'Yarn-Llama-2-13B-64K', 'Yarn-Llama-2-13B-128K', 'Yarn-Llama-2-7B-128K', 'Yarn-Llama-2-7B-64K', 'Synthia-70B-v1.1', 'LoKuS-13B', 'llama-2-13B-chat-limarp-v2-merged', 'Airoboros-L2-70B-2.1-Creative', 'MythoMax-L2-Kimiko-v2-13B', 'fiction.live-Kimiko-V2-70B', 'Kimiko-v2-13B', 'Luban-13B', 'Athena-v1', 'Huginn-13B-v4', 'Huginn-13B-v4.5', 'Mythical-Destroyer-V2-L2-13B', 'Airoboros-L2-7B-2.1', 'model_007-70B', 'Airoboros-L2-13B-2.1', 'Mythical-Destroyer-L2-13B', 'MythoMax-Kimiko-Mix', 'Samantha-1.11-13B', 'Lemur-70B-Chat-v1', 'Phind-CodeLlama-34B-v2', 'WizardCoder-Python-13B-V1.0', 'CodeLlama-13B-oasst-sft-v10', 'Airoboros-L2-70B-2.1', 'Airoboros-c34B-2.1', 'Llama-2-70B-Orca-200k', 'Genz-70b', 'Synthia-70B', 'Zarafusionex-1.1-L2-7B', 'WizardCoder-Python-34B-V1.0', 'Phind-CodeLlama-34B-Python-v1', 'Phind-CodeLlama-34B-v1', 'Llama2-70B-OASST-SFT-v10', 'Samantha-1.11-CodeLlama-34B', 'Samantha-1.11-70B', 'CodeLlama-34B-Instruct', 'CodeLlama-34B-Python', 'CodeLlama-34B', 'CodeLlama-13B-Python', 'CodeLlama-13B-Instruct', 'CodeLlama-13B', 'CodeLlama-7B', 'CodeLlama-7B-Python', 'CodeLlama-7B-Instruct', 'Nous-Puffin-70B', 'Nous-Hermes-Llama2-70B', 'PuddleJumper-13B', 'LLaMA-7b', 'LLaMA-65B', 'LLaMA-30b', 'LLaMA-13b', 'minotaur-13B']\n" ] } ], @@ -62,7 +62,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 34, "metadata": {}, "outputs": [ { @@ -91,7 +91,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 35, "metadata": {}, "outputs": [ { @@ -129,16 +129,21 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 42, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Crunchy Dorito shells\n", - " Filled with spicy loco meat\n", - " Taco Bell delight\n" + "Crunchy shells ring loud,\n", + " Dorito Locos Tacos sizzle hot,\n", + " Burst of flavor hits you.\n", + "\n", + "\n", + "Taco Bell's Locos Tacos,\n", + "A fiery feast for your taste buds,\n", + "Satisfaction guaranteed!\n" ] } ], @@ -146,7 +151,7 @@ "import openai\n", "\n", "# Modify this prompt to generate different outputs\n", - "prompt = \"Write a haiku about Taco Bell's Doritos Locos Tacos, then end with .\"\n", + "prompt = \"Write a haiku about Taco Bell's Doritos Locos Tacos.\"\n", "\n", "openai.api_key = \"Your LOCAL_LLM_API_KEY from your .env file\"\n", "openai.base_url = \"http://localhost:8091/v1/\"\n", @@ -155,11 +160,13 @@ "response = openai.chat.completions.create(\n", " model=\"phi-2-dpo\",\n", " messages=messages,\n", - " temperature=0.3,\n", - " max_tokens=1024,\n", + " temperature=0.2,\n", + " max_tokens=256,\n", " top_p=0.90,\n", " stream=False,\n", - " extra_body={\"system_message\": \"You are a creative assistant.\"},\n", + " extra_body={\n", + " \"system_message\": \"Act as a creative writer. After each request is fulfilled, end with before further explanation.\",\n", + " },\n", ")\n", "print(response.messages[1][\"content\"])" ] @@ -175,16 +182,20 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 44, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Fire breathers soar high,\n", - " Majestic wings spread wide and proud,\n", - " Dragons rule the skies.\n" + "Crunchy Doritos,\n", + " Locos Tacos with fiery salsa,\n", + " Burst of flavor.\n", + "\n", + "A taste sensation,\n", + "From the streets to your table,\n", + "Taco Bell magic.\n" ] } ], @@ -192,7 +203,7 @@ "import openai\n", "\n", "# Modify this prompt to generate different outputs\n", - "prompt = \"Write a haiku about dragons, then end with .\"\n", + "prompt = \"Write a haiku about Taco Bell's Doritos Locos Tacos.\"\n", "\n", "openai.base_url = \"http://localhost:8091/v1/\"\n", "openai.api_key = \"Your LOCAL_LLM_API_KEY from your .env file\"\n", @@ -200,12 +211,14 @@ "completion = openai.completions.create(\n", " model=\"phi-2-dpo\",\n", " prompt=prompt,\n", - " temperature=0.3,\n", - " max_tokens=1024,\n", + " temperature=0.2,\n", + " max_tokens=256,\n", " top_p=0.90,\n", " n=1,\n", " stream=False,\n", - " extra_body={\"system_message\": \"You are a creative assistant.\"},\n", + " extra_body={\n", + " \"system_message\": \"Act as a creative writer. After each request is fulfilled, end with before further explanation.\",\n", + " },\n", ")\n", "print(completion.choices[0].text)" ] @@ -221,7 +234,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 45, "metadata": {}, "outputs": [ { @@ -229,7 +242,7 @@ "text/html": [ "\n", " \n", " " @@ -238,7 +251,7 @@ "" ] }, - "execution_count": 27, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } @@ -246,7 +259,7 @@ "source": [ "import requests\n", "\n", - "prompt = \"Write a haiku about taco bell's delicious doritos locos tacos.\"\n", + "prompt = \"Write a haiku about Taco Bell's Doritos Locos Tacos.\"\n", "response = requests.post(\n", " \"http://localhost:8091/v1/audio/generation\",\n", " json={\n", @@ -274,14 +287,14 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 49, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "{'data': \" Write a haiku about Taco Bell's delicious Doritos Locos Tacos.\"}\n" + "{'data': \" Ride a haiku about Taco Bell's Doritos Locos Tacos\"}\n" ] } ], @@ -308,20 +321,20 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 50, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Crunchy Doritos,\n", - " Locos Tacos dance on my tongue,\n", - " Taco Bell delight.\n", + "Crunchy shells ring loud,\n", + " Cheesy salsa and meat fill 'em up,\n", + " Dorito Locos Tacos.\n", "\n", - "A symphony of flavors,\n", - "From spicy salsa to creamy guac,\n", - "Doritos Locos Tacos.\n", + "A bite of heaven on earth,\n", + "Bursts of flavor in every mouthful,\n", + "Taco Bell's true delight.\n", "\n" ] }, @@ -330,7 +343,7 @@ "text/html": [ "\n", " \n", " " @@ -339,7 +352,7 @@ "" ] }, - "execution_count": 29, + "execution_count": 50, "metadata": {}, "output_type": "execute_result" } From fcc61e50437692590899979c129e0899d91a9220 Mon Sep 17 00:00:00 2001 From: Josh XT Date: Sat, 27 Jan 2024 12:23:25 -0500 Subject: [PATCH 02/12] Fix STT model load in --- app.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/app.py b/app.py index 9505a3f..5279594 100644 --- a/app.py +++ b/app.py @@ -30,8 +30,8 @@ async def lifespan(app: FastAPI): global LOADED_CTTS print(f"[LLM] {CURRENT_MODEL} model loading...") LOADED_LLM = LLM(model=CURRENT_MODEL) - print(f"[STT] {WHISPER_MODEL} model loading...") - LOADED_STT = STT(model=WHISPER_MODEL) + print(f"[STT] {CURRENT_STT_MODEL} model loading...") + LOADED_STT = STT(model=CURRENT_STT_MODEL) print(f"[CTTS] xttsv2_2.0.2 model loading...") LOADED_CTTS = CTTS() From ca757a2f53c8b7bc0f09fe6812f22f21e2858e2b Mon Sep 17 00:00:00 2001 From: Josh XT Date: Sat, 27 Jan 2024 12:35:33 -0500 Subject: [PATCH 03/12] STT improvements --- .env | 1 + app.py | 4 ++-- local_llm/STT.py | 12 ++++++++---- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/.env b/.env index 912f2a4..4fc09e4 100644 --- a/.env +++ b/.env @@ -2,3 +2,4 @@ GPU_LAYERS=0 MAIN_GPU=0 LOCAL_LLM_API_KEY= DEFAULT_MODEL=phi-2-dpo +WHISPER_MODEL=large-v3 diff --git a/app.py b/app.py index 5279594..c3938bb 100644 --- a/app.py +++ b/app.py @@ -12,10 +12,10 @@ load_dotenv() DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "phi-2-dpo") -WHISPER_MODEL = os.getenv("WHISPER_MODEL", "base.en") +WHISPER_MODEL = os.getenv("WHISPER_MODEL", "large-v3") CURRENT_MODEL = DEFAULT_MODEL if DEFAULT_MODEL else "phi-2-dpo" -CURRENT_STT_MODEL = WHISPER_MODEL if WHISPER_MODEL else "base.en" +CURRENT_STT_MODEL = WHISPER_MODEL if WHISPER_MODEL else "large-v3" LOADED_LLM = None LOADED_STT = None LOADED_CTTS = None diff --git a/local_llm/STT.py b/local_llm/STT.py index 1694e4f..94fc983 100644 --- a/local_llm/STT.py +++ b/local_llm/STT.py @@ -3,11 +3,12 @@ import io import requests import uuid +import torch from whisper_cpp import Whisper from pydub import AudioSegment -def download_whisper_model(model="base.en"): +def download_whisper_model(model="large-v3"): # https://huggingface.co/ggerganov/whisper.cpp if model not in [ "tiny", @@ -22,7 +23,7 @@ def download_whisper_model(model="base.en"): "large-v2", "large-v3", ]: - model = "base.en" + model = "large-v3" os.makedirs(os.path.join(os.getcwd(), "whispercpp"), exist_ok=True) model_path = os.path.join(os.getcwd(), "whispercpp", f"ggml-{model}.bin") if not os.path.exists(model_path): @@ -35,9 +36,12 @@ def download_whisper_model(model="base.en"): class STT: - def __init__(self, model="base.en"): + def __init__(self, model="large-v3"): + self.device = "GPU" if torch.cuda.is_available() else "CPU" model_path = download_whisper_model(model=model) - self.w = Whisper(model_path=model_path, verbose=False) + self.w = Whisper( + model_path=model_path, verbose=False, openvino_encode_device=self.device + ) async def transcribe_audio(self, base64_audio, audio_format="m4a"): filename = f"{uuid.uuid4().hex}.wav" From 3b36c6523fe192eac44719c53c781f617f805b7b Mon Sep 17 00:00:00 2001 From: Josh XT Date: Sat, 27 Jan 2024 12:42:17 -0500 Subject: [PATCH 04/12] Logic improvement --- local_llm/LLM.py | 35 ++++++++++++++++++++++++++++++++--- 1 file changed, 32 insertions(+), 3 deletions(-) diff --git a/local_llm/LLM.py b/local_llm/LLM.py index a5326d5..5789d3c 100644 --- a/local_llm/LLM.py +++ b/local_llm/LLM.py @@ -261,9 +261,38 @@ def __init__( MAIN_GPU = os.environ.get("MAIN_GPU", 0) GPU_LAYERS = os.environ.get("GPU_LAYERS", 0) if torch.cuda.is_available() and int(GPU_LAYERS) == 0: - VRAM = round(torch.cuda.get_device_properties(0).total_memory / 1024**3) - print(f"[LLM] {VRAM} GB of VRAM detected.") - GPU_LAYERS = min(2 * max(0, (VRAM - 1) // 2), 36) + vram = round(torch.cuda.get_device_properties(0).total_memory / 1024**3) + print(f"[LLM] {vram} GB of VRAM detected.") + if vram >= 48: + GPU_LAYERS = vram + elif vram >= 24: + GPU_LAYERS = 48 + elif vram >= 18: + GPU_LAYERS = 36 + elif vram >= 16: + GPU_LAYERS = 30 + elif vram >= 12: + GPU_LAYERS = 20 + elif vram >= 10: + GPU_LAYERS = 18 + elif vram >= 8: + GPU_LAYERS = 16 + elif vram >= 7: + GPU_LAYERS = 14 + elif vram >= 6: + GPU_LAYERS = 12 + elif vram >= 5: + GPU_LAYERS = 10 + elif vram >= 4: + GPU_LAYERS = 8 + elif vram >= 3: + GPU_LAYERS = 6 + elif vram >= 2: + GPU_LAYERS = 4 + elif vram >= 1: + GPU_LAYERS = 2 + else: + GPU_LAYERS = 1 print( f"[LLM] Running {DEFAULT_MODEL} with {GPU_LAYERS} GPU layers and {THREADS} CPU threads available for offloading." ) From 3405303cb5818c45cae040e8d29df53f262bebc1 Mon Sep 17 00:00:00 2001 From: Josh XT Date: Sat, 27 Jan 2024 12:51:05 -0500 Subject: [PATCH 05/12] Better GPU layer logic --- local_llm/LLM.py | 30 ++---------------------------- 1 file changed, 2 insertions(+), 28 deletions(-) diff --git a/local_llm/LLM.py b/local_llm/LLM.py index 5789d3c..76ab997 100644 --- a/local_llm/LLM.py +++ b/local_llm/LLM.py @@ -263,36 +263,10 @@ def __init__( if torch.cuda.is_available() and int(GPU_LAYERS) == 0: vram = round(torch.cuda.get_device_properties(0).total_memory / 1024**3) print(f"[LLM] {vram} GB of VRAM detected.") - if vram >= 48: + if vram >= 48 or vram <= 2: GPU_LAYERS = vram - elif vram >= 24: - GPU_LAYERS = 48 - elif vram >= 18: - GPU_LAYERS = 36 - elif vram >= 16: - GPU_LAYERS = 30 - elif vram >= 12: - GPU_LAYERS = 20 - elif vram >= 10: - GPU_LAYERS = 18 - elif vram >= 8: - GPU_LAYERS = 16 - elif vram >= 7: - GPU_LAYERS = 14 - elif vram >= 6: - GPU_LAYERS = 12 - elif vram >= 5: - GPU_LAYERS = 10 - elif vram >= 4: - GPU_LAYERS = 8 - elif vram >= 3: - GPU_LAYERS = 6 - elif vram >= 2: - GPU_LAYERS = 4 - elif vram >= 1: - GPU_LAYERS = 2 else: - GPU_LAYERS = 1 + GPU_LAYERS = vram * 2 print( f"[LLM] Running {DEFAULT_MODEL} with {GPU_LAYERS} GPU layers and {THREADS} CPU threads available for offloading." ) From 666674813749cc9b91f847fea5d5295be34dfa05 Mon Sep 17 00:00:00 2001 From: Josh XT Date: Sat, 27 Jan 2024 12:54:14 -0500 Subject: [PATCH 06/12] Use logging --- app.py | 11 ++++++++--- local_llm/CTTS.py | 3 ++- local_llm/LLM.py | 11 ++++++----- local_llm/STT.py | 3 ++- 4 files changed, 18 insertions(+), 10 deletions(-) diff --git a/app.py b/app.py index c3938bb..b29d7b3 100644 --- a/app.py +++ b/app.py @@ -7,6 +7,7 @@ from local_llm.STT import STT from local_llm.CTTS import CTTS import os +import logging from dotenv import load_dotenv from contextlib import asynccontextmanager @@ -19,6 +20,10 @@ LOADED_LLM = None LOADED_STT = None LOADED_CTTS = None +logging.basicConfig( + level=os.environ.get("LOGLEVEL", "INFO"), + format="%(asctime)s | %(levelname)s | %(message)s", +) @asynccontextmanager @@ -28,11 +33,11 @@ async def lifespan(app: FastAPI): global LOADED_LLM global LOADED_STT global LOADED_CTTS - print(f"[LLM] {CURRENT_MODEL} model loading...") + logging.info(f"[LLM] {CURRENT_MODEL} model loading...") LOADED_LLM = LLM(model=CURRENT_MODEL) - print(f"[STT] {CURRENT_STT_MODEL} model loading...") + logging.info(f"[STT] {CURRENT_STT_MODEL} model loading...") LOADED_STT = STT(model=CURRENT_STT_MODEL) - print(f"[CTTS] xttsv2_2.0.2 model loading...") + logging.info(f"[CTTS] xttsv2_2.0.2 model loading...") LOADED_CTTS = CTTS() diff --git a/local_llm/CTTS.py b/local_llm/CTTS.py index 92a5905..f51ecb7 100644 --- a/local_llm/CTTS.py +++ b/local_llm/CTTS.py @@ -5,6 +5,7 @@ import torch import torchaudio import requests +import logging from TTS.tts.configs.xtts_config import XttsConfig from TTS.tts.models.xtts import Xtts @@ -31,7 +32,7 @@ def download_xtts(): for filename, url in files_to_download.items(): destination = os.path.join(os.getcwd(), "xttsv2_2.0.2", filename) if not os.path.exists(destination): - print(f"[CTTS] Downloading {filename} for XTTSv2...") + logging.info(f"[CTTS] Downloading {filename} for XTTSv2...") response = requests.get(url, stream=True) block_size = 1024 # 1 Kibibyte with open(destination, "wb") as file: diff --git a/local_llm/LLM.py b/local_llm/LLM.py index 76ab997..64c1be6 100644 --- a/local_llm/LLM.py +++ b/local_llm/LLM.py @@ -8,6 +8,7 @@ import json import psutil import torch +import logging DEFAULT_MODEL = os.environ.get("DEFAULT_MODEL", "phi-2-dpo") @@ -169,13 +170,13 @@ def get_model(model_name="", models_dir="models"): if model_name != "mistrallite-7b" else f"https://huggingface.co/TheBloke/MistralLite-7B-GGUF/resolve/main/mistrallite.{quantization_type}.gguf" ) - print(f"[LLM] Downloading {model_name}...") + logging.info(f"[LLM] Downloading {model_name}...") with requests.get(url, stream=True, allow_redirects=True) as r: with open(file_path, "wb") as f: for chunk in r.iter_content(chunk_size=8192): f.write(chunk) if clip_url != "": - print(f"[LLM] Downloading {model_name} CLIP...") + logging.info(f"[LLM] Downloading {model_name} CLIP...") with requests.get(clip_url, stream=True, allow_redirects=True) as r: with open( f"{models_dir}/{model_name}/mmproj-model-f16.gguf", "wb" @@ -262,12 +263,12 @@ def __init__( GPU_LAYERS = os.environ.get("GPU_LAYERS", 0) if torch.cuda.is_available() and int(GPU_LAYERS) == 0: vram = round(torch.cuda.get_device_properties(0).total_memory / 1024**3) - print(f"[LLM] {vram} GB of VRAM detected.") + logging.info(f"[LLM] {vram} GB of VRAM detected.") if vram >= 48 or vram <= 2: GPU_LAYERS = vram else: GPU_LAYERS = vram * 2 - print( + logging.info( f"[LLM] Running {DEFAULT_MODEL} with {GPU_LAYERS} GPU layers and {THREADS} CPU threads available for offloading." ) self.params = {} @@ -408,5 +409,5 @@ def models(self): if __name__ == "__main__": - print(f"[LLM] Downloading {DEFAULT_MODEL} model...") + logging.info(f"[LLM] Downloading {DEFAULT_MODEL} model...") get_model(model_name=DEFAULT_MODEL, models_dir="models") diff --git a/local_llm/STT.py b/local_llm/STT.py index 94fc983..dbf0f43 100644 --- a/local_llm/STT.py +++ b/local_llm/STT.py @@ -4,6 +4,7 @@ import requests import uuid import torch +import logging from whisper_cpp import Whisper from pydub import AudioSegment @@ -56,7 +57,7 @@ async def transcribe_audio(self, base64_audio, audio_format="m4a"): raise RuntimeError(f"Failed to load audio.") self.w.transcribe(file_path) user_input = self.w.output(output_txt=False) - print(f"[STT] Transcribed User Input: {user_input}") + logging.info(f"[STT] Transcribed User Input: {user_input}") user_input = user_input.replace("[BLANK_AUDIO]", "") os.remove(file_path) return user_input From 265168939028e61b1a1aedc37474270d12e94d98 Mon Sep 17 00:00:00 2001 From: Josh XT Date: Sat, 27 Jan 2024 13:11:26 -0500 Subject: [PATCH 07/12] Updates --- app.py | 4 ++-- docker-compose-cuda.yml | 1 + docker-compose-dev.yml | 1 + docker-compose-multi.yml | 36 ------------------------------------ docker-compose.yml | 1 + local_llm/STT.py | 11 ++++------- start.ps1 | 33 +++++++++++---------------------- 7 files changed, 20 insertions(+), 67 deletions(-) delete mode 100644 docker-compose-multi.yml diff --git a/app.py b/app.py index b29d7b3..3b98b28 100644 --- a/app.py +++ b/app.py @@ -13,10 +13,10 @@ load_dotenv() DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "phi-2-dpo") -WHISPER_MODEL = os.getenv("WHISPER_MODEL", "large-v3") +WHISPER_MODEL = os.getenv("WHISPER_MODEL", "base.en") CURRENT_MODEL = DEFAULT_MODEL if DEFAULT_MODEL else "phi-2-dpo" -CURRENT_STT_MODEL = WHISPER_MODEL if WHISPER_MODEL else "large-v3" +CURRENT_STT_MODEL = WHISPER_MODEL if WHISPER_MODEL else "base.en" LOADED_LLM = None LOADED_STT = None LOADED_CTTS = None diff --git a/docker-compose-cuda.yml b/docker-compose-cuda.yml index ae678a0..da0874b 100644 --- a/docker-compose-cuda.yml +++ b/docker-compose-cuda.yml @@ -8,6 +8,7 @@ services: - GPU_LAYERS=${GPU_LAYERS-0} - MAIN_GPU=${MAIN_GPU-0} - DEFAULT_MODEL=${DEFAULT_MODEL-phi-2-dpo} + - WHISPER_MODEL=${WHISPER_MODEL-base.en} - CMAKE_ARGS="-DLLAMA_CUBLAS=on" - LLAMA_CUBLAS=1 - CUDA_DOCKER_ARCH=all diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index e7d62e5..f13663a 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -7,6 +7,7 @@ services: - LOCAL_LLM_API_KEY=${LOCAL_LLM_API_KEY-} - GPU_LAYERS=0 - DEFAULT_MODEL=${DEFAULT_MODEL-phi-2-dpo} + - WHISPER_MODEL=${WHISPER_MODEL-large-v3} ports: - "8091:8091" volumes: diff --git a/docker-compose-multi.yml b/docker-compose-multi.yml deleted file mode 100644 index 28ab648..0000000 --- a/docker-compose-multi.yml +++ /dev/null @@ -1,36 +0,0 @@ -version: '3.8' -services: - zephyr-llm: - image: joshxt/local-llm:cuda - environment: - - LOCAL_LLM_API_KEY=${LOCAL_LLM_API_KEY-} - - GPU_LAYERS=${GPU_LAYERS-32} - - MAIN_GPU=${MAIN_GPU-1} - - DEFAULT_MODEL=zephyr-7b-beta - restart: unless-stopped - ports: - - "8091:8091" - volumes: - - ./models:/app/models - - ./outputs:/app/outputs - - ./voices:/app/voices - deploy: - resources: - reservations: - devices: - - driver: nvidia - count: all - capabilities: [ gpu ] - phi-llm: - image: joshxt/local-llm:cpu - environment: - - LOCAL_LLM_API_KEY=${LOCAL_LLM_API_KEY-} - - THREADS=${THREADS-10} - - DEFAULT_MODEL=phi-2-dpo - restart: unless-stopped - ports: - - "8092:8091" - volumes: - - ./models:/app/models - - ./outputs:/app/outputs - - ./voices:/app/voices diff --git a/docker-compose.yml b/docker-compose.yml index fe43639..51928cf 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,6 +6,7 @@ services: environment: - LOCAL_LLM_API_KEY=${LOCAL_LLM_API_KEY-} - DEFAULT_MODEL=${DEFAULT_MODEL-phi-2-dpo} + - WHISPER_MODEL=${WHISPER_MODEL-large-v3} restart: unless-stopped ports: - "8091:8091" diff --git a/local_llm/STT.py b/local_llm/STT.py index dbf0f43..8071efb 100644 --- a/local_llm/STT.py +++ b/local_llm/STT.py @@ -9,7 +9,7 @@ from pydub import AudioSegment -def download_whisper_model(model="large-v3"): +def download_whisper_model(model="base.en"): # https://huggingface.co/ggerganov/whisper.cpp if model not in [ "tiny", @@ -24,7 +24,7 @@ def download_whisper_model(model="large-v3"): "large-v2", "large-v3", ]: - model = "large-v3" + model = "base.en" os.makedirs(os.path.join(os.getcwd(), "whispercpp"), exist_ok=True) model_path = os.path.join(os.getcwd(), "whispercpp", f"ggml-{model}.bin") if not os.path.exists(model_path): @@ -37,12 +37,9 @@ def download_whisper_model(model="large-v3"): class STT: - def __init__(self, model="large-v3"): - self.device = "GPU" if torch.cuda.is_available() else "CPU" + def __init__(self, model="base.en"): model_path = download_whisper_model(model=model) - self.w = Whisper( - model_path=model_path, verbose=False, openvino_encode_device=self.device - ) + self.w = Whisper(model_path=model_path, verbose=False) async def transcribe_audio(self, base64_audio, audio_format="m4a"): filename = f"{uuid.uuid4().hex}.wav" diff --git a/start.ps1 b/start.ps1 index fac53f4..3f8d2f7 100644 --- a/start.ps1 +++ b/start.ps1 @@ -30,10 +30,6 @@ $env:GPU_LAYERS = Get-Content -Path ".env" | Select-String -Pattern "GPU_LAYERS" if ($null -eq $env:GPU_LAYERS) { $env:GPU_LAYERS = "0" } -$env:MULTI_SERVER = Get-Content -Path ".env" | Select-String -Pattern "MULTI_SERVER" | ForEach-Object { $_.ToString().Split("=")[1] } -if ($null -eq $env:MULTI_SERVER) { - $env:MULTI_SERVER = "" -} $env:CMAKE_ARGS = Get-Content -Path ".env" | Select-String -Pattern "CMAKE_ARGS" | ForEach-Object { $_.ToString().Split("=")[1] } if ($null -eq $env:CMAKE_ARGS) { $env:CMAKE_ARGS = "" @@ -72,28 +68,21 @@ if( $env:RUN_WITHOUT_DOCKER.Length -ne 0) { } & uvicorn app:app --host 0.0.0.0 --port 8091 --workers 1 --proxy-headers } else { - if ($env:MULTI_SERVER.Length -ne 0) { - docker-compose -f docker-compose-multi.yml down + if ($env:CUDA_DOCKER_ARCH.Length -ne 0) { + docker-compose -f docker-compose-cuda.yml down if ($env:AUTO_UPDATE -eq "true") { - docker-compose -f docker-compose-multi.yml pull + Write-Host "Pulling latest images, please wait.." + docker-compose -f docker-compose-cuda.yml pull } Write-Host "Starting server, please wait.." - docker-compose -f docker-compose-multi.yml up + docker-compose -f docker-compose-cuda.yml up } else { - if ($env:CUDA_DOCKER_ARCH.Length -ne 0) { - docker-compose -f docker-compose-cuda.yml down - if ($env:AUTO_UPDATE -eq "true") { - docker-compose -f docker-compose-cuda.yml pull - } - Write-Host "Starting server, please wait.." - docker-compose -f docker-compose-cuda.yml up - } else { - docker-compose down - if ($env:AUTO_UPDATE -eq "true") { - docker-compose pull - } - Write-Host "Starting server, please wait.." - docker-compose up + docker-compose down + if ($env:AUTO_UPDATE -eq "true") { + Write-Host "Pulling latest images, please wait.." + docker-compose pull } + Write-Host "Starting server, please wait.." + docker-compose up } } \ No newline at end of file From fe26c13d9972e90b0f90c356aa928ec7268897d0 Mon Sep 17 00:00:00 2001 From: Josh XT Date: Sat, 27 Jan 2024 13:18:18 -0500 Subject: [PATCH 08/12] Use base.en by default --- .env | 2 +- docker-compose-dev.yml | 2 +- docker-compose.yml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.env b/.env index 4fc09e4..dc405b8 100644 --- a/.env +++ b/.env @@ -2,4 +2,4 @@ GPU_LAYERS=0 MAIN_GPU=0 LOCAL_LLM_API_KEY= DEFAULT_MODEL=phi-2-dpo -WHISPER_MODEL=large-v3 +WHISPER_MODEL=base.en diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index f13663a..5369f6b 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -7,7 +7,7 @@ services: - LOCAL_LLM_API_KEY=${LOCAL_LLM_API_KEY-} - GPU_LAYERS=0 - DEFAULT_MODEL=${DEFAULT_MODEL-phi-2-dpo} - - WHISPER_MODEL=${WHISPER_MODEL-large-v3} + - WHISPER_MODEL=${WHISPER_MODEL-base.en} ports: - "8091:8091" volumes: diff --git a/docker-compose.yml b/docker-compose.yml index 51928cf..a1389ae 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,7 +6,7 @@ services: environment: - LOCAL_LLM_API_KEY=${LOCAL_LLM_API_KEY-} - DEFAULT_MODEL=${DEFAULT_MODEL-phi-2-dpo} - - WHISPER_MODEL=${WHISPER_MODEL-large-v3} + - WHISPER_MODEL=${WHISPER_MODEL-base.en} restart: unless-stopped ports: - "8091:8091" From eb4fa82ebba956b31f6f77cbc9803bc58398e66d Mon Sep 17 00:00:00 2001 From: Josh XT Date: Sat, 27 Jan 2024 13:52:44 -0500 Subject: [PATCH 09/12] Fix issue with context lib --- app.py | 25 +++++++------------------ local_llm/STT.py | 1 - 2 files changed, 7 insertions(+), 19 deletions(-) diff --git a/app.py b/app.py index 3b98b28..9abd4e0 100644 --- a/app.py +++ b/app.py @@ -9,7 +9,6 @@ import os import logging from dotenv import load_dotenv -from contextlib import asynccontextmanager load_dotenv() DEFAULT_MODEL = os.getenv("DEFAULT_MODEL", "phi-2-dpo") @@ -17,31 +16,21 @@ CURRENT_MODEL = DEFAULT_MODEL if DEFAULT_MODEL else "phi-2-dpo" CURRENT_STT_MODEL = WHISPER_MODEL if WHISPER_MODEL else "base.en" -LOADED_LLM = None -LOADED_STT = None -LOADED_CTTS = None logging.basicConfig( level=os.environ.get("LOGLEVEL", "INFO"), format="%(asctime)s | %(levelname)s | %(message)s", ) -@asynccontextmanager -async def lifespan(app: FastAPI): - global CURRENT_MODEL - global CURRENT_STT_MODEL - global LOADED_LLM - global LOADED_STT - global LOADED_CTTS - logging.info(f"[LLM] {CURRENT_MODEL} model loading...") - LOADED_LLM = LLM(model=CURRENT_MODEL) - logging.info(f"[STT] {CURRENT_STT_MODEL} model loading...") - LOADED_STT = STT(model=CURRENT_STT_MODEL) - logging.info(f"[CTTS] xttsv2_2.0.2 model loading...") - LOADED_CTTS = CTTS() +logging.info(f"[LLM] {CURRENT_MODEL} model loading...") +LOADED_LLM = LLM(model=CURRENT_MODEL) +logging.info(f"[STT] {CURRENT_STT_MODEL} model loading...") +LOADED_STT = STT(model=CURRENT_STT_MODEL) +logging.info(f"[CTTS] xttsv2_2.0.2 model loading...") +LOADED_CTTS = CTTS() -app = FastAPI(title="Local-LLM Server", docs_url="/", lifespan=lifespan) +app = FastAPI(title="Local-LLM Server", docs_url="/") app.add_middleware( CORSMiddleware, allow_origins=["*"], diff --git a/local_llm/STT.py b/local_llm/STT.py index 8071efb..1d06f48 100644 --- a/local_llm/STT.py +++ b/local_llm/STT.py @@ -3,7 +3,6 @@ import io import requests import uuid -import torch import logging from whisper_cpp import Whisper from pydub import AudioSegment From d6489066dede30ee7bf417240edfee85db7c2d47 Mon Sep 17 00:00:00 2001 From: Josh XT Date: Sat, 27 Jan 2024 13:55:44 -0500 Subject: [PATCH 10/12] Improve logging --- app.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/app.py b/app.py index 9abd4e0..e5ae4e6 100644 --- a/app.py +++ b/app.py @@ -21,13 +21,18 @@ format="%(asctime)s | %(levelname)s | %(message)s", ) +logging.info(f"[CTTS] xttsv2_2.0.2 model loading. Please wait...") +LOADED_CTTS = CTTS() +logging.info(f"[CTTS] xttsv2_2.0.2 model loaded successfully.") -logging.info(f"[LLM] {CURRENT_MODEL} model loading...") -LOADED_LLM = LLM(model=CURRENT_MODEL) -logging.info(f"[STT] {CURRENT_STT_MODEL} model loading...") +logging.info(f"[STT] {CURRENT_STT_MODEL} model loading. Please wait...") LOADED_STT = STT(model=CURRENT_STT_MODEL) -logging.info(f"[CTTS] xttsv2_2.0.2 model loading...") -LOADED_CTTS = CTTS() +logging.info(f"[STT] {CURRENT_STT_MODEL} model loaded successfully.") + +logging.info(f"[LLM] {CURRENT_MODEL} model loading. Please wait...") +LOADED_LLM = LLM(model=CURRENT_MODEL) +logging.info(f"[LLM] {CURRENT_MODEL} model loaded successfully.") +logging.info(f"[Local-LLM] Server is ready.") app = FastAPI(title="Local-LLM Server", docs_url="/") From 818146145c86adc1b281f7e1c87774d437686027 Mon Sep 17 00:00:00 2001 From: Josh XT Date: Sat, 27 Jan 2024 14:07:12 -0500 Subject: [PATCH 11/12] v0.1.1 --- README.md | 10 ++++++---- setup.py | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 766c9c5..5c1ea96 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ [![GitHub](https://img.shields.io/badge/GitHub-Local%20LLM-blue?logo=github&style=plastic)](https://github.com/Josh-XT/Local-LLM) [![Dockerhub](https://img.shields.io/badge/Docker-Local%20LLM-blue?logo=docker&style=plastic)](https://hub.docker.com/r/joshxt/local-llm) -Local-LLM is a simple [llama.cpp](https://github.com/ggerganov/llama.cpp) server that easily exposes a list of local language models to choose from to run on your own computer. It is designed to be as easy as possible to get started with running local models. It automatically handles downloading the model of your choice and configuring the server based on your CPU, RAM, and GPU. It also includes [OpenAI Style](https://pypi.org/project/openai/) endpoints for easy integration with other applications. +Local-LLM is a simple [llama.cpp](https://github.com/ggerganov/llama.cpp) server that easily exposes a list of local language models to choose from to run on your own computer. It is designed to be as easy as possible to get started with running local models. It automatically handles downloading the model of your choice and configuring the server based on your CPU, RAM, and GPU. It also includes [OpenAI Style](https://pypi.org/project/openai/) endpoints for easy integration with other applications. Additional functionality is built in for voice cloning text to speech and a voice to text for easy voice communication entirely offline after the initial setup. ## Prerequisites @@ -26,7 +26,9 @@ git clone https://github.com/Josh-XT/Local-LLM cd Local-LLM ``` -Expand Environment Setup if you would like to modify the default environment variables, otherwise skip to Usage. +### Environment Setup + +Expand Environment Setup if you would like to modify the default environment variables, otherwise skip to Usage. All environment variables are optional and have useful defaults. Change the default model that starts with Local-LLM in your `.env` file.
Environment Setup (Optional) @@ -39,10 +41,10 @@ Replace the environment variables with your desired settings. Assumptions will b - `LOCAL_LLM_API_KEY` - The API key to use for the server. If not set, the server will not require an API key when accepting requests. - `DEFAULT_MODEL` - The default model to use when no model is specified. Default is `phi-2-dpo`. -- `MULTI_SERVER` - This will run two servers, one with `zephyr-7b-beta` running on GPU, and one with `phi-2-dpo` running on CPU. If set, this will run both, otherwise it will only run one server. +- `WHISPER_MODEL` - The model to use for speech-to-text. Default is `base.en`. - `AUTO_UPDATE` - Whether or not to automatically update Local-LLM. Default is `true`. - `THREADS` - The number of CPU threads Local-LLM is allowed to use. Default is `your CPU thread count minus 2`. -- `GPU_LAYERS` (Only applicable to NVIDIA GPU) - The number of layers to use on the GPU. Default is `0`. +- `GPU_LAYERS` (Only applicable to NVIDIA GPU) - The number of layers to use on the GPU. Default is `0`. Local-LLM will automatically determine the optimal number of layers to use based on your GPU's memory if it is set to 0 and you have an NVIDIA GPU. - `MAIN_GPU` (Only applicable to NVIDIA GPU) - The GPU to use for the main model. Default is `0`.
diff --git a/setup.py b/setup.py index 61d59bc..b79208e 100644 --- a/setup.py +++ b/setup.py @@ -10,7 +10,7 @@ setup( name="local-llm", - version="0.1.0", + version="0.1.1", description="Local-LLM is a llama.cpp server in Docker with OpenAI Style Endpoints.", long_description=long_description, long_description_content_type="text/markdown", From 77a1f8b96442e7b2221d0fc5dda659b8ef9984e4 Mon Sep 17 00:00:00 2001 From: Josh XT Date: Sat, 27 Jan 2024 14:31:49 -0500 Subject: [PATCH 12/12] Fix workflow and messages --- .github/workflows/publish-docker-dev.yml | 2 +- docker-compose-dev.yml | 16 ++++++++++++++-- local_llm/LLM.py | 4 ++-- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/.github/workflows/publish-docker-dev.yml b/.github/workflows/publish-docker-dev.yml index 70788b1..8ab3193 100644 --- a/.github/workflows/publish-docker-dev.yml +++ b/.github/workflows/publish-docker-dev.yml @@ -58,7 +58,7 @@ jobs: - name: Get full image path id: get_image_path run: | - echo "IMAGE_PATH=$(echo ghcr.io/${{ env.GITHUB_USER }}/${{ env.REPO_NAME }}:${{ matrix.tag_name }}-${{ env.BRANCH_NAME }}-${{ github.sha }})" >> $GITHUB_ENV + echo "IMAGE_PATH=$(echo ghcr.io/${{ env.GITHUB_USER }}/${{ env.REPO_NAME }}:cpu-dev-${{ env.BRANCH_NAME }}-${{ github.sha }})" >> $GITHUB_ENV test-local-llm: uses: josh-xt/AGiXT/.github/workflows/operation-test-with-jupyter.yml@main diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index 5369f6b..c33a4fe 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -2,15 +2,27 @@ version: '3.8' services: local-llm: - image: ghcr.io/josh-xt/local-llm:cpu-dev-dev + image: ghcr.io/josh-xt/local-llm:cpu-dev environment: - LOCAL_LLM_API_KEY=${LOCAL_LLM_API_KEY-} - - GPU_LAYERS=0 + - GPU_LAYERS=${GPU_LAYERS-0} + - MAIN_GPU=${MAIN_GPU-0} - DEFAULT_MODEL=${DEFAULT_MODEL-phi-2-dpo} - WHISPER_MODEL=${WHISPER_MODEL-base.en} + - CMAKE_ARGS="-DLLAMA_CUBLAS=on" + - LLAMA_CUBLAS=1 + - CUDA_DOCKER_ARCH=all + restart: unless-stopped ports: - "8091:8091" volumes: - ./models:/app/models - ./outputs:/app/outputs - ./voices:/app/voices + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: all + capabilities: [ gpu ] diff --git a/local_llm/LLM.py b/local_llm/LLM.py index 64c1be6..ed10237 100644 --- a/local_llm/LLM.py +++ b/local_llm/LLM.py @@ -263,13 +263,13 @@ def __init__( GPU_LAYERS = os.environ.get("GPU_LAYERS", 0) if torch.cuda.is_available() and int(GPU_LAYERS) == 0: vram = round(torch.cuda.get_device_properties(0).total_memory / 1024**3) - logging.info(f"[LLM] {vram} GB of VRAM detected.") + logging.info(f"[LLM] {vram}GB of VRAM detected.") if vram >= 48 or vram <= 2: GPU_LAYERS = vram else: GPU_LAYERS = vram * 2 logging.info( - f"[LLM] Running {DEFAULT_MODEL} with {GPU_LAYERS} GPU layers and {THREADS} CPU threads available for offloading." + f"[LLM] Loading {DEFAULT_MODEL} with {GPU_LAYERS} GPU layers and {THREADS} CPU threads available for offloading. Please wait..." ) self.params = {} self.model_name = model