Skip to content

Commit

Permalink
Merge pull request #33 from DevXT-LLC/Add-basic-ui
Browse files Browse the repository at this point in the history
Added basic streamlit UI to test inference
  • Loading branch information
Josh-XT authored Mar 19, 2024
2 parents 51e135e + 40f9add commit 5b8ca7e
Show file tree
Hide file tree
Showing 4 changed files with 183 additions and 38 deletions.
27 changes: 20 additions & 7 deletions Pipes.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,13 +73,16 @@ def __init__(self):

async def get_response(self, data, completion_type="chat"):
data["local_uri"] = self.local_uri
images_uploaded = False
if "messages" in data:
if isinstance(data["messages"][-1]["content"], list):
messages = data["messages"][-1]["content"]
for message in messages:
if "text" in message:
prompt = message["text"]
for message in messages:
if "image_url" in message:
images_uploaded = True
if "audio_url" in message:
audio_url = (
message["audio_url"]["url"]
Expand Down Expand Up @@ -133,12 +136,27 @@ async def get_response(self, data, completion_type="chat"):
if completion_type == "chat"
else data["prompt"]
)
if isinstance(user_message, list):
user_message = prompt
for message in messages:
if "image_url" in message:
if "url" in message["image_url"]:
if not message["image_url"]["url"].startswith("data:"):
user_message += (
"Uploaded Image:"
+ message["image_url"]["url"]
+ "\n"
)
response_text = (
response["choices"][0]["text"]
if completion_type != "chat"
else response["choices"][0]["message"]["content"]
)
img_gen_prompt = f"Users message: {user_message} \nAssistant response: {response_text} \n\n**The assistant is acting as sentiment analysis expert and only responds with a concise YES or NO answer on if the user would like an image as visual or a picture generated. No other explanation is needed!**\nShould an image be created to accompany the assistant response?\nAssistant: "
if "data:" in user_message:
user_message = user_message.replace(
user_message.split("data:")[1].split("'")[0], ""
)
img_gen_prompt = f"Users message: {user_message} \n\n{'The user uploaded an image, one does not need generated unless the user is specifically asking.' if images_uploaded else ''} **The assistant is acting as sentiment analysis expert and only responds with a concise YES or NO answer on if the user would like an image as visual or a picture generated. No other explanation is needed!**\nWould the user potentially like an image generated based on their message?\nAssistant: "
logging.info(f"[IMG] Decision maker prompt: {img_gen_prompt}")
create_img = self.llm.chat(
messages=[{"role": "system", "content": img_gen_prompt}],
Expand All @@ -149,12 +167,7 @@ async def get_response(self, data, completion_type="chat"):
create_img = str(create_img["choices"][0]["message"]["content"]).lower()
logging.info(f"[IMG] Decision maker response: {create_img}")
if "yes" in create_img or "es," in create_img:
prompt = (
data["messages"][-1]["content"]
if completion_type == "chat"
else data["prompt"]
)
img_prompt = f"**The assistant is acting as a Stable Diffusion Prompt Generator.**\n\nUsers message: {prompt} \nAssistant response: {response} \n\nImportant rules to follow:\n- Describe subjects in detail, specify image type (e.g., digital illustration), art style (e.g., steampunk), and background. Include art inspirations (e.g., Art Station, specific artists). Detail lighting, camera (type, lens, view), and render (resolution, style). The weight of a keyword can be adjusted by using the syntax (((keyword))) , put only those keyword inside ((())) which is very important because it will have more impact so anything wrong will result in unwanted picture so be careful. Realistic prompts: exclude artist, specify lens. Separate with double lines. Max 60 words, avoiding 'real' for fantastical.\n- Based on the message from the user and response of the assistant, you will need to generate one detailed stable diffusion image generation prompt based on the context of the conversation to accompany the assistant response.\n- The prompt can only be up to 60 words long, so try to be concise while using enough descriptive words to make a proper prompt.\n- Following all rules will result in a $2000 tip that you can spend on anything!\n- Must be in markdown code block to be parsed out and only provide prompt in the code block, nothing else.\nStable Diffusion Prompt Generator: "
img_prompt = f"**The assistant is acting as a Stable Diffusion Prompt Generator.**\n\nUsers message: {user_message} \nAssistant response: {response_text} \n\nImportant rules to follow:\n- Describe subjects in detail, specify image type (e.g., digital illustration), art style (e.g., steampunk), and background. Include art inspirations (e.g., Art Station, specific artists). Detail lighting, camera (type, lens, view), and render (resolution, style). The weight of a keyword can be adjusted by using the syntax (((keyword))) , put only those keyword inside ((())) which is very important because it will have more impact so anything wrong will result in unwanted picture so be careful. Realistic prompts: exclude artist, specify lens. Separate with double lines. Max 60 words, avoiding 'real' for fantastical.\n- Based on the message from the user and response of the assistant, you will need to generate one detailed stable diffusion image generation prompt based on the context of the conversation to accompany the assistant response.\n- The prompt can only be up to 60 words long, so try to be concise while using enough descriptive words to make a proper prompt.\n- Following all rules will result in a $2000 tip that you can spend on anything!\n- Must be in markdown code block to be parsed out and only provide prompt in the code block, nothing else.\nStable Diffusion Prompt Generator: "
image_generation_prompt = self.llm.chat(
messages=[{"role": "system", "content": img_prompt}],
max_tokens=100,
Expand Down
51 changes: 21 additions & 30 deletions ezlocalai/VLM.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import uuid
import tiktoken
import os
import base64


def get_tokens(text: str) -> int:
Expand Down Expand Up @@ -40,30 +41,20 @@ def __init__(self, model="deepseek-ai/deepseek-vl-1.3b-chat"):
def chat(self, messages, **kwargs):
pil_images = []
images = []
conversation = []
prompt = ""
for message in messages:
if isinstance(message["content"], str):
role = message["role"] if "role" in message else "User"
if role.lower() == "user":
role = "User"
conversation.append(
{
"role": role,
"content": message["content"],
}
)
prompt += f"{message['content']}\n\n"
if role.lower() == "system":
prompt = f"System: {message['content']}\n\nUser: {prompt}"
if isinstance(message["content"], list):
for msg in message["content"]:
if "text" in msg:
role = message["role"] if "role" in message else "User"
if role.lower() == "user":
role = "User"
conversation.append(
{
"role": role,
"content": "<image_placeholder>" + msg["text"],
}
)
prompt += f"{msg['text']}\n\n"
if "image_url" in msg:
url = (
msg["image_url"]["url"]
Expand All @@ -73,25 +64,25 @@ def chat(self, messages, **kwargs):
image_path = f"./outputs/{uuid.uuid4().hex}.jpg"
if url.startswith("http"):
image = requests.get(url).content
with open(image_path, "wb") as f:
f.write(image)
images.append(image_path)
else:
with open(image_path, "wb") as f:
f.write(url)
images.append(image_path)
file_type = url.split(",")[0].split("/")[1].split(";")[0]
if file_type == "jpeg":
file_type = "jpg"
image_path = f"./outputs/{uuid.uuid4().hex}.{file_type}"
image = base64.b64decode(url.split(",")[1])
with open(image_path, "wb") as f:
f.write(image)
images.append(image_path)
pil_img = PIL.Image.open(image_path)
pil_img = pil_img.convert("RGB")
pil_images.append(pil_img)
if conversation == []:
conversation.append(
{
"role": "User",
"content": messages[0]["content"],
}
)
conversation[0]["images"] = images
conversation.append({"role": "Assistant", "content": ""})
if len(images) > 0:
for image in images:
prompt = f"<image_placeholder> {prompt}"
conversation = [
{"role": "User", "content": prompt, "images": images},
{"role": "Assistant", "content": ""},
]
prepare_inputs = self.vl_chat_processor(
conversations=conversation, images=pil_images, force_batchify=True
).to(self.vl_gpt.device)
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

setup(
name="ezlocalai",
version="0.1.6",
version="0.1.7",
description="ezlocalai is an easy to set up local artificial intelligence server with OpenAI Style Endpoints.",
long_description=long_description,
long_description_content_type="text/markdown",
Expand Down
141 changes: 141 additions & 0 deletions ui.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
import streamlit as st
import openai
import requests
import time
import base64
import os
import re
from datetime import datetime
from dotenv import load_dotenv

load_dotenv()
st.title("ezLocalai")

EZLOCALAI_SERVER = os.getenv("EZLOCALAI_URL", "http://localhost:8091")
EZLOCALAI_API_KEY = os.getenv("EZLOCALAI_API_KEY", "none")
DEFAULT_LLM = os.getenv("DEFAULT_LLM", "phi-2-dpo")
openai.base_url = f"{EZLOCALAI_SERVER}/v1/"
openai.api_key = EZLOCALAI_API_KEY if EZLOCALAI_API_KEY else EZLOCALAI_SERVER
HEADERS = {
"Content-Type": "application/json",
"Authorization": f"{EZLOCALAI_API_KEY}",
"ngrok-skip-browser-warning": "true",
}


def get_voices():
global EZLOCALAI_SERVER
global HEADERS
voices = requests.get(f"{EZLOCALAI_SERVER}/v1/audio/voices", headers=HEADERS)
return voices.json()


waiting_for_server = False

while True:
try:
voices = get_voices()
break
except:
if waiting_for_server == False:
st.spinner("Waiting for server to start...")
waiting_for_server = True
time.sleep(1)
waiting_for_server = False


def display_content(content):
global EZLOCALAI_SERVER
global HEADERS
outputs_url = f"{EZLOCALAI_SERVER}/outputs/"
os.makedirs("outputs", exist_ok=True)
if "http://localhost:8091/outputs/" in content:
if outputs_url != "http://localhost:8091/outputs/":
content = content.replace("http://localhost:8091/outputs/", outputs_url)
if "<audio controls>" in content or " " not in content:
try:
audio_response = content.split("data:audio/wav;base64,")[1].split('" type')[
0
]
except:
audio_response = content
file_name = f"outputs/{datetime.now().strftime('%Y-%m-%d-%H-%M-%S')}.wav"
with open(file_name, "wb") as fh:
fh.write(base64.b64decode(audio_response))
st.audio(file_name, format="audio/wav", start_time=0)
if outputs_url in content:
urls = re.findall(f"{re.escape(outputs_url)}[^\"' ]+", content)
urls = urls[0].split("\n\n")
for url in urls:
file_name = url.split("/")[-1]
url = f"{outputs_url}{file_name}"
data = requests.get(url, headers=HEADERS).content
if url.endswith(".jpg") or url.endswith(".png"):
content = content.replace(url, "")
st.image(data, use_column_width=True)
elif url.endswith(".mp4"):
content = content.replace(url, "")
st.audio(data, format="audio/mp4", start_time=0)
elif url.endswith(".wav"):
content = content.replace(url, "")
st.audio(data, format="audio/wav", start_time=0)
st.markdown(content, unsafe_allow_html=True)


with st.form("chat"):
SYSTEM_MESSAGE = st.text_area(
"System Prompt",
"The assistant is acting as a creative writer. All of your text responses are transcribed to audio and sent to the user. Be concise with all responses. After the request is fulfilled, end with </s>.",
)
DEFAULT_MAX_TOKENS = st.number_input(
"Max Output Tokens", min_value=10, max_value=300000, value=256
)
DEFAULT_TEMPERATURE = st.number_input(
"Temperature", min_value=0.0, max_value=1.0, value=0.5
)
DEFAULT_TOP_P = st.number_input("Top P", min_value=0.0, max_value=1.0, value=0.9)
voice_drop_down = st.selectbox(
"Text-to-Speech Response Voice", ["None"] + voices["voices"], index=0
)
uploaded_file = st.file_uploader("Upload an image")
prompt = st.text_area("Your Message:", "Describe each stage of this image.")
send = st.form_submit_button("Send")
if prompt != "" and send:
st.markdown("---")
st.spinner("Thinking...")
messages = []
if SYSTEM_MESSAGE != "":
messages.append({"role": "system", "content": SYSTEM_MESSAGE})
if uploaded_file:
messages.append(
{
"role": "user",
"content": [
{"type": "text", "text": prompt},
{
"type": f"{uploaded_file.type.split('/')[0]}_url",
f"{uploaded_file.type.split('/')[0]}_url": {
"url": f"data:{uploaded_file.type};base64,{base64.b64encode(uploaded_file.read()).decode('utf-8')}",
},
},
],
},
)
if uploaded_file.type.startswith("image"):
st.image(uploaded_file, use_column_width=True)
if messages == []:
messages = [
{"role": "user", "content": prompt},
]
extra_body = {} if voice_drop_down == "None" else {"voice": voice_drop_down}
response = openai.chat.completions.create(
model=DEFAULT_LLM,
messages=messages,
temperature=DEFAULT_TEMPERATURE,
max_tokens=DEFAULT_MAX_TOKENS,
top_p=DEFAULT_TOP_P,
stream=False,
extra_body=extra_body,
)
display_content(response.choices[0].message.content)
st.balloons()

0 comments on commit 5b8ca7e

Please sign in to comment.