From 05b3eff175d9985bef61cf16fd7ba20831465e4a Mon Sep 17 00:00:00 2001 From: Anosh Rezaei Date: Sun, 7 Jul 2024 13:22:45 +0200 Subject: [PATCH 1/8] initial model configuration to run model on localai Signed-off-by: Anosh Rezaei --- src/scripts/GUI/model_configuration.yaml | 19 +++++++++++++++++++ .../stackoverflow_extractor.py | 16 +++++++++++++--- src/scripts/preprocessing.py | 3 ++- 3 files changed, 34 insertions(+), 4 deletions(-) create mode 100644 src/scripts/GUI/model_configuration.yaml diff --git a/src/scripts/GUI/model_configuration.yaml b/src/scripts/GUI/model_configuration.yaml new file mode 100644 index 0000000..0fa2c71 --- /dev/null +++ b/src/scripts/GUI/model_configuration.yaml @@ -0,0 +1,19 @@ +name: phi-2 +context_size: 2048 +f16: true +threads: 11 +gpu_layers: 90 +mmap: true +parameters: + # Reference any HF model or a local file here + model: huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf + temperature: 0.2 + top_k: 40 + top_p: 0.95 +template: + + chat: &template | + Instruct: {{.Input}} + Output: + # Modify the prompt template here ^^^ as per your requirements + completion: *template \ No newline at end of file diff --git a/src/scripts/data_preparation/stackoverflow_extractor.py b/src/scripts/data_preparation/stackoverflow_extractor.py index 36c7aad..591dda1 100644 --- a/src/scripts/data_preparation/stackoverflow_extractor.py +++ b/src/scripts/data_preparation/stackoverflow_extractor.py @@ -57,7 +57,7 @@ import multiprocessing load_dotenv() -API_KEY = os.getenv('API_KEY', 'Replace your api key') +API_KEY = os.getenv('API_KEY', '9voMDokb2mzeteewaiUXaw((') REQUEST_DELAY = 0 # Number of seconds to wait between requests CSV_FILE = 'sources/stackoverflow_Q&A/cncf_stackoverflow_qas.csv' @@ -115,6 +115,11 @@ def qa_extractor(tag: str, start_page: int, page_size: int = 100) -> int: request_count = 0 while True: + if start_page > 3: + # only extract 3 page per tag(project) + # with page size 300 is equal to 300 Q&As per project + save_progress(tag, "finished") + break if request_count >= DAILY_REQUEST_LIMIT: break @@ -122,7 +127,9 @@ def qa_extractor(tag: str, start_page: int, page_size: int = 100) -> int: 'page': start_page, 'pagesize': page_size, 'order': 'desc', - 'sort': 'activity', + 'sort': 'votes', + 'min': 5, # Minimum score greater than 10 + 'accepted': 'True', 'answers': 1, 'tagged': tag, 'site': 'stackoverflow', @@ -150,7 +157,8 @@ def qa_extractor(tag: str, start_page: int, page_size: int = 100) -> int: answers = fetch_answers(question_id) for count, answer in enumerate(answers, start=1): - if count > 3: + #only one anwer per question too reduce number of answers + if count > 1: break if answer['score'] < 0: continue @@ -160,6 +168,8 @@ def qa_extractor(tag: str, start_page: int, page_size: int = 100) -> int: "question": question_text, "answer": answer_text, "tag": tag, + "question_id": question_id, + "score": question['score'] }) processed_question_ids.add(question_id) diff --git a/src/scripts/preprocessing.py b/src/scripts/preprocessing.py index 1e4a131..a98e9db 100644 --- a/src/scripts/preprocessing.py +++ b/src/scripts/preprocessing.py @@ -49,7 +49,8 @@ def merge_data() -> None: 'answer': 'Answer', 'tag': 'Project' }) - + # Drop additional columns + df2 = df2.drop(['question_id', 'score'], axis=1) # Concatenate the selected and renamed columns merged_df = pd.concat([df1_selected, df2_selected]) From 6f153944de1d76acf7aa1e4596dc366790d7a99c Mon Sep 17 00:00:00 2001 From: Anosh Rezaei Date: Sun, 7 Jul 2024 15:04:42 +0200 Subject: [PATCH 2/8] modify model_configuration file Signed-off-by: Anosh Rezaei --- src/scripts/GUI/model_configuration.yaml | 22 ++++++++++++++++------ src/scripts/GUI/preparation_scripts.sh | 16 ++++++++++++++++ 2 files changed, 32 insertions(+), 6 deletions(-) create mode 100644 src/scripts/GUI/preparation_scripts.sh diff --git a/src/scripts/GUI/model_configuration.yaml b/src/scripts/GUI/model_configuration.yaml index 0fa2c71..ac18983 100644 --- a/src/scripts/GUI/model_configuration.yaml +++ b/src/scripts/GUI/model_configuration.yaml @@ -1,19 +1,29 @@ name: phi-2 context_size: 2048 f16: true -threads: 11 gpu_layers: 90 mmap: true +trimsuffix: +- "\n" parameters: - # Reference any HF model or a local file here model: huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf temperature: 0.2 top_k: 40 top_p: 0.95 + seed: -1 + +mirostat: 2 +mirostat_eta: 1.0 +mirostat_tau: 1.0 template: - - chat: &template | + chat: &template |- Instruct: {{.Input}} Output: - # Modify the prompt template here ^^^ as per your requirements - completion: *template \ No newline at end of file + completion: *template + +usage: | + To use this model, interact with the API (in another terminal) with curl for instance: + curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "phi-2", + "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] + }' \ No newline at end of file diff --git a/src/scripts/GUI/preparation_scripts.sh b/src/scripts/GUI/preparation_scripts.sh new file mode 100644 index 0000000..a509631 --- /dev/null +++ b/src/scripts/GUI/preparation_scripts.sh @@ -0,0 +1,16 @@ + +## install NVIDIA Container Toolkit +# source: https://www.server-world.info/en/note?os=Ubuntu_22.04&p=nvidia&f=2 +curl -s -L https://nvidia.github.io/nvidia-docker/gpgkey | apt-key add - +# result: OK +curl -s -L https://nvidia.github.io/nvidia-docker/ubuntu22.04/nvidia-docker.list > /etc/apt/sources.list.d/nvidia-docker.list +apt update +apt -y install nvidia-container-toolkit +systemctl restart docker +docker run --gpus all nvidia/cuda:11.5.2-base-ubuntu20.04 nvidia-smi # checks if NVIDIA Container Toolkit is installed + + +docker run -it --gpus all nvidia/cuda:12.5.0-base-ubuntu22.04 bash +docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-nvidia-cuda-12 + +docker run -p 8080:8080 --gpus all localai/localai:v2.18.1-cublas-cuda12-ffmpeg-core https://github.com/amosproj/amos2024ss08-cloud-native-llm/blob/05b3eff175d9985bef61cf16fd7ba20831465e4a/src/scripts/GUI/model_configuration.yaml \ No newline at end of file From b84a4e8592015c545f4370f05d6d8aac36c7a432 Mon Sep 17 00:00:00 2001 From: Anosh Rezaei Date: Sun, 7 Jul 2024 21:44:32 +0200 Subject: [PATCH 3/8] modify model configuration Signed-off-by: Anosh Rezaei --- src/scripts/GUI/model_configuration.yaml | 33 +++++++++--------------- 1 file changed, 12 insertions(+), 21 deletions(-) diff --git a/src/scripts/GUI/model_configuration.yaml b/src/scripts/GUI/model_configuration.yaml index ac18983..ada93a2 100644 --- a/src/scripts/GUI/model_configuration.yaml +++ b/src/scripts/GUI/model_configuration.yaml @@ -1,29 +1,20 @@ -name: phi-2 -context_size: 2048 +name: gemma +context_size: 512 f16: true +threads: 14 gpu_layers: 90 mmap: true -trimsuffix: -- "\n" parameters: - model: huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf - temperature: 0.2 - top_k: 40 + # Reference any HF model or a local file here + #model: huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf + model: huggingface://google/gemma-2b-it/gemma-2b-it.gguf + temperature: 0.7 + top_k: 50 top_p: 0.95 - seed: -1 - -mirostat: 2 -mirostat_eta: 1.0 -mirostat_tau: 1.0 template: - chat: &template |- + + chat: &template | Instruct: {{.Input}} Output: - completion: *template - -usage: | - To use this model, interact with the API (in another terminal) with curl for instance: - curl http://localhost:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ - "model": "phi-2", - "messages": [{"role": "user", "content": "How are you doing?", "temperature": 0.1}] - }' \ No newline at end of file + # Modify the prompt template here ^^^ as per your requirements + completion: *template \ No newline at end of file From 27ac6b9ff72563012a152a58b6cb914cc924910d Mon Sep 17 00:00:00 2001 From: Anosh Rezaei Date: Sun, 7 Jul 2024 23:28:55 +0200 Subject: [PATCH 4/8] modify model configuration Signed-off-by: Anosh Rezaei --- src/scripts/GUI/model_configuration.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/scripts/GUI/model_configuration.yaml b/src/scripts/GUI/model_configuration.yaml index ada93a2..f10bc01 100644 --- a/src/scripts/GUI/model_configuration.yaml +++ b/src/scripts/GUI/model_configuration.yaml @@ -11,6 +11,9 @@ parameters: temperature: 0.7 top_k: 50 top_p: 0.95 + +backend: langchain-huggingface + template: chat: &template | From 94319d2e71e8dcc00da96a1defabdbbe656b6590 Mon Sep 17 00:00:00 2001 From: Anosh Rezaei Date: Mon, 8 Jul 2024 16:02:18 +0200 Subject: [PATCH 5/8] modify model configuration Signed-off-by: Anosh Rezaei --- src/scripts/GUI/model_configuration.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/scripts/GUI/model_configuration.yaml b/src/scripts/GUI/model_configuration.yaml index f10bc01..db851da 100644 --- a/src/scripts/GUI/model_configuration.yaml +++ b/src/scripts/GUI/model_configuration.yaml @@ -7,7 +7,8 @@ mmap: true parameters: # Reference any HF model or a local file here #model: huggingface://TheBloke/phi-2-GGUF/phi-2.Q8_0.gguf - model: huggingface://google/gemma-2b-it/gemma-2b-it.gguf + #model: huggingface://google/gemma-2b-it/gemma-2b-it.gguf + model: huggingface://Kubermatic/DeepCNCFQuantized/ggml-model-Q4_K_M.gguf temperature: 0.7 top_k: 50 top_p: 0.95 From 48d114f89675e6916d5a661636503bffaf273e7d Mon Sep 17 00:00:00 2001 From: Anosh Rezaei Date: Mon, 8 Jul 2024 20:04:23 +0200 Subject: [PATCH 6/8] try and error with configuration Signed-off-by: Anosh Rezaei --- src/scripts/GUI/model_configuration.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scripts/GUI/model_configuration.yaml b/src/scripts/GUI/model_configuration.yaml index db851da..e981f7d 100644 --- a/src/scripts/GUI/model_configuration.yaml +++ b/src/scripts/GUI/model_configuration.yaml @@ -13,7 +13,7 @@ parameters: top_k: 50 top_p: 0.95 -backend: langchain-huggingface +#backend: langchain-huggingface template: From 090b90019a93a783893c7cd2f1e740eaaf31145e Mon Sep 17 00:00:00 2001 From: Anosh Rezaei Date: Wed, 10 Jul 2024 09:49:00 +0200 Subject: [PATCH 7/8] Final vlaues Signed-off-by: Anosh Rezaei --- src/scripts/GUI/model_configuration.yaml | 11 +++++++---- src/scripts/GUI/preparation_scripts.sh | 10 +++++++--- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/src/scripts/GUI/model_configuration.yaml b/src/scripts/GUI/model_configuration.yaml index e981f7d..1a0f24e 100644 --- a/src/scripts/GUI/model_configuration.yaml +++ b/src/scripts/GUI/model_configuration.yaml @@ -1,5 +1,5 @@ -name: gemma -context_size: 512 +name: DeepCNCFQuantized +context_size: 2048 f16: true threads: 14 gpu_layers: 90 @@ -12,13 +12,16 @@ parameters: temperature: 0.7 top_k: 50 top_p: 0.95 + #repetition_penalty: 1.0 + #max_new_tokens: 1024 + #stop: ["<|im_end|>"] #backend: langchain-huggingface template: chat: &template | - Instruct: {{.Input}} - Output: + Question: {{.Input}} + Answer: # Modify the prompt template here ^^^ as per your requirements completion: *template \ No newline at end of file diff --git a/src/scripts/GUI/preparation_scripts.sh b/src/scripts/GUI/preparation_scripts.sh index a509631..3e4a52e 100644 --- a/src/scripts/GUI/preparation_scripts.sh +++ b/src/scripts/GUI/preparation_scripts.sh @@ -10,7 +10,11 @@ systemctl restart docker docker run --gpus all nvidia/cuda:11.5.2-base-ubuntu20.04 nvidia-smi # checks if NVIDIA Container Toolkit is installed -docker run -it --gpus all nvidia/cuda:12.5.0-base-ubuntu22.04 bash -docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-nvidia-cuda-12 +docker run -it --gpus all nvidia/cuda:12.5.0-base-ubuntu22.04 bash +docker run -p 8080:8080 --gpus all --name local-ai -ti localai/localai:latest-aio-gpu-nvidia-cuda-12 -docker run -p 8080:8080 --gpus all localai/localai:v2.18.1-cublas-cuda12-ffmpeg-core https://github.com/amosproj/amos2024ss08-cloud-native-llm/blob/05b3eff175d9985bef61cf16fd7ba20831465e4a/src/scripts/GUI/model_configuration.yaml \ No newline at end of file +docker run -p 8080:8080 --gpus all --env-file .env localai/localai:v2.18.1-cublas-cuda12-ffmpeg-core https://raw.githubusercontent.com/amosproj/amos2024ss08-cloud-native-llm/110-implement-the-chat-bot-user-interface/src/scripts/GUI/model_configuration.yaml + +# Run localai with model config from gist + +docker run -p 8080:8080 --gpus all --env-file .env localai/localai:v2.18.1-cublas-cuda12-ffmpeg-core https://gist.githubusercontent.com/anosh-ar/91658012cccb8f74abb72ddc78bb71c8/raw/e00cca94739213ebf83e9074e3e9e3f74e55d7fb/model_config.yaml From 744edaa12c32e340608d4326dbb10992c827edce Mon Sep 17 00:00:00 2001 From: Anosh Rezaei Date: Wed, 10 Jul 2024 10:10:21 +0200 Subject: [PATCH 8/8] Final vlaues-testing error fixed Signed-off-by: Anosh Rezaei --- .../stackoverflow_extractor.py | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) diff --git a/src/scripts/data_preparation/stackoverflow_extractor.py b/src/scripts/data_preparation/stackoverflow_extractor.py index 591dda1..ce65bf8 100644 --- a/src/scripts/data_preparation/stackoverflow_extractor.py +++ b/src/scripts/data_preparation/stackoverflow_extractor.py @@ -57,7 +57,7 @@ import multiprocessing load_dotenv() -API_KEY = os.getenv('API_KEY', '9voMDokb2mzeteewaiUXaw((') +API_KEY = os.getenv('API_KEY', 'Replace your api key') REQUEST_DELAY = 0 # Number of seconds to wait between requests CSV_FILE = 'sources/stackoverflow_Q&A/cncf_stackoverflow_qas.csv' @@ -115,11 +115,6 @@ def qa_extractor(tag: str, start_page: int, page_size: int = 100) -> int: request_count = 0 while True: - if start_page > 3: - # only extract 3 page per tag(project) - # with page size 300 is equal to 300 Q&As per project - save_progress(tag, "finished") - break if request_count >= DAILY_REQUEST_LIMIT: break @@ -127,9 +122,7 @@ def qa_extractor(tag: str, start_page: int, page_size: int = 100) -> int: 'page': start_page, 'pagesize': page_size, 'order': 'desc', - 'sort': 'votes', - 'min': 5, # Minimum score greater than 10 - 'accepted': 'True', + 'sort': 'activity', 'answers': 1, 'tagged': tag, 'site': 'stackoverflow', @@ -157,8 +150,7 @@ def qa_extractor(tag: str, start_page: int, page_size: int = 100) -> int: answers = fetch_answers(question_id) for count, answer in enumerate(answers, start=1): - #only one anwer per question too reduce number of answers - if count > 1: + if count > 3: break if answer['score'] < 0: continue @@ -168,8 +160,6 @@ def qa_extractor(tag: str, start_page: int, page_size: int = 100) -> int: "question": question_text, "answer": answer_text, "tag": tag, - "question_id": question_id, - "score": question['score'] }) processed_question_ids.add(question_id) @@ -389,4 +379,4 @@ def load_tags() -> list: print("directory is created") tags = load_tags() - extract_all_projects(tags) + extract_all_projects(tags) \ No newline at end of file