Skip to content

Commit

Permalink
feat: drop predefined models for ollama (#1165)
Browse files Browse the repository at this point in the history
  • Loading branch information
sigoden authored Feb 10, 2025
1 parent 4fab4c5 commit 78f3d4f
Show file tree
Hide file tree
Showing 5 changed files with 29 additions and 59 deletions.
1 change: 0 additions & 1 deletion Argcfile.sh
Original file line number Diff line number Diff line change
Expand Up @@ -313,7 +313,6 @@ _argc_before() {
moonshot,moonshot-v1-8k,https://api.moonshot.cn/v1 \
novita,meta-llama/llama-3.1-8b-instruct,https://api.novita.ai/v3/openai \
openrouter,openai/gpt-4o-mini,https://openrouter.ai/api/v1 \
ollama,llama3.1:latest,http://${OLLAMA_HOST:-"127.0.0.1:11434"}/v1 \
perplexity,llama-3.1-8b-instruct,https://api.perplexity.ai \
qianwen,qwen-turbo-latest,https://dashscope.aliyuncs.com/compatible-mode/v1 \
siliconflow,meta-llama/Meta-Llama-3.1-8B-Instruct,https://api.siliconflow.cn/v1 \
Expand Down
26 changes: 11 additions & 15 deletions config.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -98,13 +98,10 @@ clients:
# supports_reasoning: true
# - name: xxxx # Embedding model
# type: embedding
# max_input_tokens: 200000
# max_tokens_per_chunk: 2000
# default_chunk_size: 1500
# max_batch_size: 100
# - name: xxxx # Reranker model
# type: reranker
# max_input_tokens: 2048
# patch: # Patch api
# chat_completions: # Api type, possible values: chat_completions, embeddings, and rerank
# <regex>: # The regex to match model names, e.g. '.*' 'gpt-4o' 'gpt-4o|gpt-4-.*'
Expand All @@ -125,19 +122,23 @@ clients:

# For any platform compatible with OpenAI's API
- type: openai-compatible
name: local
api_base: http://localhost:8080/v1
name: ollama
api_base: http://localhost:11434/v1
api_key: xxx # Optional
models:
- name: deepseek-r1
max_input_tokens: 131072
supports_reasoning: true
- name: llama3.1
max_input_tokens: 128000
supports_function_calling: true
- name: jina-embeddings-v2-base-en
- name: llama3.2-vision
max_input_tokens: 131072
supports_vision: true
- name: nomic-embed-text
type: embedding
default_chunk_size: 1500
max_batch_size: 100
- name: jina-reranker-v2-base-multilingual
type: reranker
default_chunk_size: 1000
max_batch_size: 50

# See https://ai.google.dev/docs
- type: gemini
Expand Down Expand Up @@ -197,11 +198,6 @@ clients:
api_base: https://api.groq.com/openai/v1
api_key: xxx

# See https://github.com/jmorganca/ollama
- type: openai-compatible
name: ollama
api_base: http://localhost:11434/v1

# See https://learn.microsoft.com/en-us/azure/ai-services/openai/chatgpt-quickstart
- type: azure-openai
api_base: https://{RESOURCE}.openai.azure.com
Expand Down
29 changes: 0 additions & 29 deletions models.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -440,35 +440,6 @@
output_price: 0
supports_reasoning: true

# Links:
# - https://ollama.com/library
# - https://github.com/ollama/ollama/blob/main/docs/openai.md
- provider: ollama
models:
- name: llama3.1
max_input_tokens: 131072
supports_function_calling: true
- name: llama3.2
max_input_tokens: 131072
supports_function_calling: true
- name: llama3.2-vision
max_input_tokens: 131072
supports_vision: true
- name: qwen2.5
max_input_tokens: 131072
supports_function_calling: true
- name: qwen2.5-coder
max_input_tokens: 32768
supports_function_calling: true
- name: deepseek-r1
max_input_tokens: 131072
supports_reasoning: true
- name: nomic-embed-text
type: embedding
max_tokens_per_chunk: 8192
default_chunk_size: 1000
max_batch_size: 50

# Links:
# - https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models
# - https://cloud.google.com/vertex-ai/generative-ai/docs/model-garden/explore-models
Expand Down
29 changes: 17 additions & 12 deletions src/client/common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -553,24 +553,29 @@ async fn set_client_models_config(client_config: &mut Value, client: &str) -> Re
std::env::var(&env_name).ok()
}),
) {
if let Ok(fetched_models) = abortable_run_with_spinner(
match abortable_run_with_spinner(
fetch_models(api_base, api_key.as_deref()),
"Fetching models",
create_abort_signal(),
)
.await
{
model_names = MultiSelect::new("LLM models (required):", fetched_models)
.with_validator(|list: &[ListOption<&String>]| {
if list.is_empty() {
Ok(Validation::Invalid(
"At least one item must be selected".into(),
))
} else {
Ok(Validation::Valid)
}
})
.prompt()?;
Ok(fetched_models) => {
model_names = MultiSelect::new("LLM models (required):", fetched_models)
.with_validator(|list: &[ListOption<&String>]| {
if list.is_empty() {
Ok(Validation::Invalid(
"At least one item must be selected".into(),
))
} else {
Ok(Validation::Valid)
}
})
.prompt()?;
}
Err(err) => {
eprintln!("✗ Unable to fetch models: {err}");
}
}
}
if model_names.is_empty() {
Expand Down
3 changes: 1 addition & 2 deletions src/client/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ register_client!(
(bedrock, "bedrock", BedrockConfig, BedrockClient),
);

pub const OPENAI_COMPATIBLE_PROVIDERS: [(&str, &str); 25] = [
pub const OPENAI_COMPATIBLE_PROVIDERS: [(&str, &str); 24] = [
("ai21", "https://api.ai21.com/studio/v1"),
(
"cloudflare",
Expand All @@ -53,7 +53,6 @@ pub const OPENAI_COMPATIBLE_PROVIDERS: [(&str, &str); 25] = [
("moonshot", "https://api.moonshot.cn/v1"),
("novita", "https://api.novita.ai/v3/openai"),
("openrouter", "https://openrouter.ai/api/v1"),
("ollama", "http://{OLLAMA_HOST}:11434/v1"),
("perplexity", "https://api.perplexity.ai"),
(
"qianwen",
Expand Down

0 comments on commit 78f3d4f

Please sign in to comment.