Skip to content

Commit

Permalink
Merge pull request #5347 from oobabooga/dev
Browse files Browse the repository at this point in the history
Merge dev branch
  • Loading branch information
oobabooga authored Jan 22, 2024
2 parents d8c3a5b + 821dd65 commit 1343aa3
Show file tree
Hide file tree
Showing 23 changed files with 187 additions and 131 deletions.
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -412,8 +412,10 @@ If you would like to contribute to the project, check out the [Contributing guid
* Subreddit: https://www.reddit.com/r/oobabooga/
* Discord: https://discord.gg/jwZCF2dPQN

## Acknowledgment & support
## Acknowledgment

In August 2023, [Andreessen Horowitz](https://a16z.com/) (a16z) provided a generous grant to encourage and support my independent work on this project. I am **extremely** grateful for their trust and recognition.

If you find this project useful, I have a [Ko-fi page](https://ko-fi.com/oobabooga) where you can make a donation. Your support helps me continue maintaining and improving this project.
## Support

[![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/B0B5JFPBO)
5 changes: 4 additions & 1 deletion css/main.css
Original file line number Diff line number Diff line change
Expand Up @@ -490,7 +490,10 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
#show-controls label {
z-index: 1000;
position: absolute;
left: calc(100% - 168px);
right: 0;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}

#show-controls span {
Expand Down
41 changes: 39 additions & 2 deletions extensions/openai/completions.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,12 @@
load_instruction_template_memoized
)
from modules.presets import load_preset_memoized
from modules.text_generation import decode, encode, generate_reply
from modules.text_generation import (
decode,
encode,
generate_reply,
get_reply_from_output_ids
)


class LogitsBiasProcessor(LogitsProcessor):
Expand Down Expand Up @@ -56,7 +61,7 @@ def __call__(self, input_ids: torch.LongTensor, logits: torch.FloatTensor) -> to
if self.logprobs is not None: # 0-5
log_e_probabilities = F.log_softmax(logits, dim=1)
top_values, top_indices = torch.topk(log_e_probabilities, k=self.logprobs + 1)
top_tokens = [decode(tok) for tok in top_indices[0]]
top_tokens = [get_reply_from_output_ids([tok]) for tok in top_indices[0]]
top_probs = [float(x) for x in top_values[0]]
self.token_alternatives = dict(zip(top_tokens, top_probs))
debug_msg(repr(self))
Expand Down Expand Up @@ -87,6 +92,10 @@ def process_parameters(body, is_legacy=False):
if generate_params['truncation_length'] == 0:
generate_params['truncation_length'] = shared.settings['truncation_length']

if generate_params['temperature'] == 0:
generate_params['do_sample'] = False
generate_params['top_k'] = 1

if body['preset'] is not None:
preset = load_preset_memoized(body['preset'])
generate_params.update(preset)
Expand Down Expand Up @@ -144,6 +153,30 @@ def convert_history(history):
user_input = ""
system_message = ""

# Multimodal: convert OpenAI format to multimodal extension format
if any(isinstance(entry['content'], list) for entry in history):
new_history = []
for entry in history:
if isinstance(entry['content'], list):
image_url = None
content = None
for item in entry['content']:
if not isinstance(item, dict):
continue

if item['type'] == 'image_url' and isinstance(item['image_url'], dict):
image_url = item['image_url']['url']
elif item['type'] == 'text' and isinstance(item['text'], str):
content = item['text']

if image_url and content:
new_history.append({"image_url": image_url, "role": "user"})
new_history.append({"content": content, "role": "user"})
else:
new_history.append(entry)

history = new_history

for entry in history:
if "image_url" in entry:
image_url = entry['image_url']
Expand All @@ -158,6 +191,9 @@ def convert_history(history):
raise 'Image cannot be loaded from the URL!'

buffered = BytesIO()
if img.mode in ("RGBA", "P"):
img = img.convert("RGB")

img.save(buffered, format="JPEG")
img_str = base64.b64encode(buffered.getvalue()).decode('utf-8')
content = f'<img src="data:image/jpeg;base64,{img_str}">'
Expand All @@ -171,6 +207,7 @@ def convert_history(history):
if current_message:
chat_dialogue.append([current_message, ''])
current_message = ""

current_message = content
elif role == "assistant":
current_reply = content
Expand Down
1 change: 1 addition & 0 deletions extensions/openai/typing.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ class GenerationOptions(BaseModel):
early_stopping: bool = False
truncation_length: int = 0
max_tokens_second: int = 0
prompt_lookup_num_tokens: int = 0
custom_token_bans: str = ""
auto_max_new_tokens: bool = False
ban_eos_token: bool = False
Expand Down
2 changes: 1 addition & 1 deletion extensions/whisper_stt/readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,4 @@ whisper_stt-whipser_model: tiny
whisper_stt-auto_submit: False
```

See source documentation for [model names](https://github.com/openai/whisper#available-models-and-languages) and (languages)[https://github.com/openai/whisper/blob/main/whisper/tokenizer.py] you can use.
See source documentation for [model names](https://github.com/openai/whisper#available-models-and-languages) and [languages](https://github.com/openai/whisper/blob/main/whisper/tokenizer.py) you can use.
2 changes: 1 addition & 1 deletion modules/html_generator.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
import markdown
from PIL import Image, ImageOps

from modules.utils import get_available_chat_styles
from modules import shared
from modules.utils import get_available_chat_styles

# This is to store the paths to the thumbnails of the profile pictures
image_cache = {}
Expand Down
1 change: 1 addition & 0 deletions modules/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ def transformers_samplers():
'add_bos_token',
'skip_special_tokens',
'auto_max_new_tokens',
'prompt_lookup_num_tokens'
}


Expand Down
1 change: 1 addition & 0 deletions modules/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
'truncation_length_max': 200000,
'max_tokens_second': 0,
'max_updates_second': 0,
'prompt_lookup_num_tokens': 0,
'custom_stopping_strings': '',
'custom_token_bans': '',
'auto_max_new_tokens': False,
Expand Down
7 changes: 5 additions & 2 deletions modules/text_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,8 +268,8 @@ def apply_stopping_strings(reply, all_stop_strings):
return reply, stop_found


def get_reply_from_output_ids(output_ids, state, starting_from=0):
reply = decode(output_ids[starting_from:], state['skip_special_tokens'])
def get_reply_from_output_ids(output_ids, state=None, starting_from=0):
reply = decode(output_ids[starting_from:], state['skip_special_tokens'] if state else True)

# Handle tokenizers that do not add the leading space for the first token
if (hasattr(shared.tokenizer, 'convert_ids_to_tokens') and len(output_ids) > starting_from) and not reply.startswith(' '):
Expand All @@ -291,6 +291,9 @@ def generate_reply_HF(question, original_question, seed, state, stopping_strings
if state['negative_prompt'] != '':
generate_params['negative_prompt_ids'] = encode(state['negative_prompt'])

if state['prompt_lookup_num_tokens'] > 0:
generate_params['prompt_lookup_num_tokens'] = state['prompt_lookup_num_tokens']

for k in ['epsilon_cutoff', 'eta_cutoff']:
if state[k] > 0:
generate_params[k] = state[k] * 1e-4
Expand Down
5 changes: 5 additions & 0 deletions modules/training.py
Original file line number Diff line number Diff line change
Expand Up @@ -605,6 +605,11 @@ def on_log(self, args: transformers.TrainingArguments, state: transformers.Train
control.should_training_stop = True
print(f"\033[1;31;1mStop Loss {stop_at_loss} reached.\033[0;37;0m")

# Fix training for mixed precision models
for param in shared.model.parameters():
if param.requires_grad:
param.data = param.data.float()

trainer = transformers.Trainer(
model=lora_model,
train_dataset=train_data,
Expand Down
1 change: 1 addition & 0 deletions modules/ui.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def list_interface_input_elements():
'auto_max_new_tokens',
'max_tokens_second',
'max_updates_second',
'prompt_lookup_num_tokens',
'seed',
'temperature',
'temperature_last',
Expand Down
2 changes: 1 addition & 1 deletion modules/ui_model_menu.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def create_ui():
shared.gradio['quant_type'] = gr.Dropdown(label="quant_type", choices=["nf4", "fp4"], value=shared.args.quant_type)
shared.gradio['hqq_backend'] = gr.Dropdown(label="hqq_backend", choices=["PYTORCH", "PYTORCH_COMPILE", "ATEN"], value=shared.args.hqq_backend)

shared.gradio['n_gpu_layers'] = gr.Slider(label="n-gpu-layers", minimum=0, maximum=128, value=shared.args.n_gpu_layers)
shared.gradio['n_gpu_layers'] = gr.Slider(label="n-gpu-layers", minimum=0, maximum=256, value=shared.args.n_gpu_layers)
shared.gradio['n_ctx'] = gr.Slider(minimum=0, maximum=shared.settings['truncation_length_max'], step=256, label="n_ctx", value=shared.args.n_ctx, info='Context length. Try lowering this if you run out of memory while loading the model.')
shared.gradio['threads'] = gr.Slider(label="threads", minimum=0, step=1, maximum=32, value=shared.args.threads)
shared.gradio['threads_batch'] = gr.Slider(label="threads_batch", minimum=0, step=1, maximum=32, value=shared.args.threads_batch)
Expand Down
1 change: 1 addition & 0 deletions modules/ui_parameters.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def create_ui(default_preset):
shared.gradio['truncation_length'] = gr.Slider(value=get_truncation_length(), minimum=shared.settings['truncation_length_min'], maximum=shared.settings['truncation_length_max'], step=256, label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length. Most models require this to be at most 2048.')
shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.')
shared.gradio['max_updates_second'] = gr.Slider(value=shared.settings['max_updates_second'], minimum=0, maximum=24, step=1, label='Maximum UI updates/second', info='Set this if you experience lag in the UI during streaming.')
shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.')

shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=1, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='In addition to the defaults. Written between "" and separated by commas.', placeholder='"\\n", "\\nYou:"')
shared.gradio['custom_token_bans'] = gr.Textbox(value=shared.settings['custom_token_bans'] or None, label='Custom token bans', info='Specific token IDs to ban from generating, comma-separated. The IDs can be found in the Default or Notebook tab.')
Expand Down
Loading

0 comments on commit 1343aa3

Please sign in to comment.