mit-submit · julius-heitkoetter · Oct 25, 2023 · Sep 29, 2023 · Sep 29, 2023 · Oct 6, 2023
diff --git a/.github/workflows/dev-ci-cd.yaml b/.github/workflows/dev-ci-cd.yaml
@@ -76,6 +76,14 @@ jobs:
           echo "${{ secrets.HF_TOKEN }}" >> ${{ github.workspace }}/deploy/dev/secrets/hf_token.txt
           chmod 400 ${{ github.workspace }}/deploy/dev/secrets/hf_token.txt
 
+      # create env file to set tag(s) for docker-compose
+      - name: Create Env File
+        run: |
+          touch ${{ github.workspace }}/deploy/dev/.env
+          export tag="${GITHUB_REF#refs/heads/}"
+          export tag="${tag//\//-}.${GITHUB_SHA}"
+          echo "TAG=${tag}" >> ${{ github.workspace }}/deploy/dev/.env
+
       # stop any existing docker compose that's running
       - name: Stop Docker Compose
         run: |

diff --git a/.github/workflows/prod-801-ci-cd.yaml b/.github/workflows/prod-801-ci-cd.yaml
@@ -43,6 +43,14 @@ jobs:
           echo "${{ secrets.HF_TOKEN }}" >> ${{ github.workspace }}/deploy/prod-801/secrets/hf_token.txt
           chmod 400 ${{ github.workspace }}/deploy/prod-801/secrets/hf_token.txt
 
+      # create env file to set tag(s) for docker-compose
+      - name: Create Env File
+        run: |
+          touch ${{ github.workspace }}/deploy/prod-801/.env
+          export tag="${GITHUB_REF#refs/heads/}"
+          export tag="${tag//\//-}.${GITHUB_SHA}"
+          echo "TAG=${tag}" >> ${{ github.workspace }}/deploy/prod-801/.env
+
       # stop any existing docker compose that's running
       - name: Stop Docker Compose
         run: |

diff --git a/.github/workflows/prod-ci-cd.yaml b/.github/workflows/prod-ci-cd.yaml
@@ -76,6 +76,14 @@ jobs:
           echo "${{ secrets.HF_TOKEN }}" >> ${{ github.workspace }}/deploy/prod/secrets/hf_token.txt
           chmod 400 ${{ github.workspace }}/deploy/prod/secrets/hf_token.txt
 
+      # create env file to set tag(s) for docker-compose
+      - name: Create Env File
+        run: |
+          touch ${{ github.workspace }}/deploy/prod/.env
+          export tag="${GITHUB_REF#refs/heads/}"
+          export tag="${tag//\//-}.${GITHUB_SHA}"
+          echo "TAG=${tag}" >> ${{ github.workspace }}/deploy/prod/.env
+
       # stop any existing docker compose that's running
       - name: Stop Docker Compose
         run: |

diff --git a/.github/workflows/prod-meta-ci-cd.yaml b/.github/workflows/prod-meta-ci-cd.yaml
@@ -0,0 +1,103 @@
+name: Deploy A2rchi Meta
+run-name: ${{ github.actor }} deploys A2rchi Meta to prod
+on:
+  push:
+    branches:
+      - release-meta
+jobs:
+  deploy-prod-system:
+    runs-on: ubuntu-latest
+    env:
+      SSH_AUTH_SOCK: /tmp/ssh_agent.sock
+    steps:
+      # boilerplate message and pull repository to CI runner
+      - run: echo "🎉 The job was automatically triggered by a ${{ github.event_name }} event."
+      - uses: actions/checkout@v3
+      - run: echo "The ${{ github.repository }} repository has been cloned to the runner."
+
+      # setup SSH 
+      - name: Setup SSH
+        run: |
+          mkdir -p /home/runner/.ssh/
+          echo "${{ secrets.SSH_PRIVATE_KEY_MDRUSSO }}" > /home/runner/.ssh/id_rsa_submit
+          chmod 600 /home/runner/.ssh/id_rsa_submit
+          echo "${{ secrets.SSH_SUBMIT_KNOWN_HOSTS }}" > ~/.ssh/known_hosts
+          cp ${{ github.workspace }}/deploy/ssh_config /home/runner/.ssh/config
+          ssh-agent -a $SSH_AUTH_SOCK > /dev/null
+          ssh-add /home/runner/.ssh/id_rsa_submit
+
+      # TODO: update variables to be A2rchi meta-specific if need be
+      # create secrets files for docker-compose
+      - name: Create Secrets Files
+        run: |
+          mkdir -p ${{ github.workspace }}/deploy/prod-meta/secrets/
+          touch ${{ github.workspace }}/deploy/prod-meta/secrets/imap_user.txt
+          echo "${{ secrets.PROD_IMAP_USER }}" >> ${{ github.workspace }}/deploy/prod-meta/secrets/imap_user.txt
+          chmod 400 ${{ github.workspace }}/deploy/prod-meta/secrets/imap_user.txt
+          touch ${{ github.workspace }}/deploy/prod-meta/secrets/imap_pw.txt
+          echo "${{ secrets.PROD_IMAP_PW }}" >> ${{ github.workspace }}/deploy/prod-meta/secrets/imap_pw.txt
+          chmod 400 ${{ github.workspace }}/deploy/prod-meta/secrets/imap_pw.txt
+          touch ${{ github.workspace }}/deploy/prod-meta/secrets/cleo_url.txt
+          echo "${{ secrets.PROD_CLEO_URL }}" >> ${{ github.workspace }}/deploy/prod-meta/secrets/cleo_url.txt
+          chmod 400 ${{ github.workspace }}/deploy/prod-meta/secrets/cleo_url.txt
+          touch ${{ github.workspace }}/deploy/prod-meta/secrets/cleo_user.txt
+          echo "${{ secrets.PROD_CLEO_USER }}" >> ${{ github.workspace }}/deploy/prod-meta/secrets/cleo_user.txt
+          chmod 400 ${{ github.workspace }}/deploy/prod-meta/secrets/cleo_user.txt
+          touch ${{ github.workspace }}/deploy/prod-meta/secrets/cleo_pw.txt
+          echo "${{ secrets.PROD_CLEO_PW }}" >> ${{ github.workspace }}/deploy/prod-meta/secrets/cleo_pw.txt
+          chmod 400 ${{ github.workspace }}/deploy/prod-meta/secrets/cleo_pw.txt
+          touch ${{ github.workspace }}/deploy/prod-meta/secrets/cleo_project.txt
+          echo "${{ secrets.PROD_META_CLEO_PROJECT }}" >> ${{ github.workspace }}/deploy/prod-meta/secrets/cleo_project.txt
+          chmod 400 ${{ github.workspace }}/deploy/prod-meta/secrets/cleo_project.txt
+          touch ${{ github.workspace }}/deploy/prod-meta/secrets/sender_server.txt
+          echo "${{ secrets.PROD_SENDER_SERVER }}" >> ${{ github.workspace }}/deploy/prod-meta/secrets/sender_server.txt
+          chmod 400 ${{ github.workspace }}/deploy/prod-meta/secrets/sender_server.txt
+          touch ${{ github.workspace }}/deploy/prod-meta/secrets/sender_port.txt
+          echo "${{ secrets.PROD_SENDER_PORT }}" >> ${{ github.workspace }}/deploy/prod-meta/secrets/sender_port.txt
+          chmod 400 ${{ github.workspace }}/deploy/prod-meta/secrets/sender_port.txt
+          touch ${{ github.workspace }}/deploy/prod-meta/secrets/sender_replyto.txt
+          echo "${{ secrets.PROD_META_SENDER_REPLYTO }}" >> ${{ github.workspace }}/deploy/prod-meta/secrets/sender_replyto.txt
+          chmod 400 ${{ github.workspace }}/deploy/prod-meta/secrets/sender_replyto.txt
+          touch ${{ github.workspace }}/deploy/prod-meta/secrets/sender_user.txt
+          echo "${{ secrets.PROD_SENDER_USER }}" >> ${{ github.workspace }}/deploy/prod-meta/secrets/sender_user.txt
+          chmod 400 ${{ github.workspace }}/deploy/prod-meta/secrets/sender_user.txt
+          touch ${{ github.workspace }}/deploy/prod-meta/secrets/sender_pw.txt
+          echo "${{ secrets.PROD_SENDER_PW }}" >> ${{ github.workspace }}/deploy/prod-meta/secrets/sender_pw.txt
+          chmod 400 ${{ github.workspace }}/deploy/prod-meta/secrets/sender_pw.txt
+          touch ${{ github.workspace }}/deploy/prod-meta/secrets/openai_api_key.txt
+          echo "${{ secrets.OPENAI_API_KEY }}" >> ${{ github.workspace }}/deploy/prod-meta/secrets/openai_api_key.txt
+          chmod 400 ${{ github.workspace }}/deploy/prod-meta/secrets/openai_api_key.txt
+          touch ${{ github.workspace }}/deploy/prod-meta/secrets/hf_token.txt
+          echo "${{ secrets.HF_TOKEN }}" >> ${{ github.workspace }}/deploy/prod-meta/secrets/hf_token.txt
+          chmod 400 ${{ github.workspace }}/deploy/prod-meta/secrets/hf_token.txt
+
+      # create env file to set tag(s) for docker-compose
+      - name: Create Env File
+        run: |
+          touch ${{ github.workspace }}/deploy/prod-meta/.env
+          export tag="${GITHUB_REF#refs/heads/}"
+          export tag="${tag//\//-}.${GITHUB_SHA}"
+          echo "TAG=${tag}" >> ${{ github.workspace }}/deploy/prod-meta/.env
+
+      # stop any existing docker compose that's running
+      - name: Stop Docker Compose
+        run: |
+          ssh submit06 'bash -s' < ${{ github.workspace }}/deploy/prod-meta/prod-meta-stop.sh
+
+      # copy repository to machine
+      - name: Copy Repository
+        run: |
+          rsync -e ssh -r ${{ github.workspace}}/* --exclude .git/ --delete submit06:~/A2rchi-prod-meta/
+
+      # run deploy script
+      - name: Run Deploy Script
+        run: |
+          ssh submit06 'bash -s' < ${{ github.workspace }}/deploy/prod-meta/prod-meta-install.sh
+
+      # clean up secret files
+      - name: Remove Secrets from Runner
+        run: |
+          rm ${{ github.workspace }}/deploy/prod-meta/secrets/*.txt
+
+      # print job status
+      - run: echo "🍏 This job's status is ${{ job.status }}."
diff --git a/.github/workflows/prod-root-ci-cd.yaml b/.github/workflows/prod-root-ci-cd.yaml
@@ -43,6 +43,14 @@ jobs:
           echo "${{ secrets.HF_TOKEN }}" >> ${{ github.workspace }}/deploy/prod-root/secrets/hf_token.txt
           chmod 400 ${{ github.workspace }}/deploy/prod-root/secrets/hf_token.txt
 
+      # create env file to set tag(s) for docker-compose
+      - name: Create Env File
+        run: |
+          touch ${{ github.workspace }}/deploy/prod-root/.env
+          export tag="${GITHUB_REF#refs/heads/}"
+          export tag="${tag//\//-}.${GITHUB_SHA}"
+          echo "TAG=${tag}" >> ${{ github.workspace }}/deploy/prod-root/.env
+
       # stop any existing docker compose that's running
       - name: Stop Docker Compose
         run: |

diff --git a/A2rchi/chains/base.py b/A2rchi/chains/base.py
@@ -1,5 +1,7 @@
 """Chain for chatting with a vector database."""
 from __future__ import annotations
+from loguru import logger
+from langchain.callbacks import FileCallbackHandler
 
 from A2rchi.chains.prompts import CONDENSE_QUESTION_PROMPT, QA_PROMPT
 from A2rchi.utils.config_loader import Config_Loader
@@ -11,10 +13,12 @@
 from langchain.schema import BaseRetriever, Document
 from langchain.schema.prompt_template import BasePromptTemplate
 from typing import Any, Dict, List, Optional, Tuple
+import os 
 
 
 # DEFINITIONS
 config = Config_Loader().config["chains"]["base"]
+data_path = Config_Loader().config["global"]["DATA_PATH"]
 
 
 def _get_chat_history(chat_history: List[Tuple[str, str]]) -> str:
@@ -88,19 +92,27 @@ def from_llm(
         combine_docs_chain_kwargs = combine_docs_chain_kwargs or {}
         _prompt = QA_PROMPT
         document_variable_name = "context"
+
+        #Add logger for storing input to the QA chain, ie filled QA template 
+        logfile = os.path.join(data_path,config["logging"]["input_output_filename"])
+        logger.add(logfile, colorize=True, enqueue=True)
+        handler = FileCallbackHandler(logfile)  
+
         llm_chain = LLMChain(
             llm=llm,
             prompt=_prompt,
+            callbacks = [handler],
             verbose=verbose,
         )
         doc_chain = StuffDocumentsChain(
             llm_chain=llm_chain,
             document_variable_name=document_variable_name,
+            callbacks = [handler],
             verbose=verbose)
 
         _llm = condense_question_llm or llm
         condense_question_chain = LLMChain(
-            llm=_llm, prompt=condense_question_prompt, verbose=verbose
+            llm=_llm, prompt=condense_question_prompt, callbacks = [handler], verbose=verbose
         )
 
         return cls(
@@ -109,3 +121,4 @@ def from_llm(
             question_generator=condense_question_chain,
             **kwargs,
         )
+
diff --git a/A2rchi/chains/chain.py b/A2rchi/chains/chain.py
@@ -140,7 +140,7 @@ def __call__(self, history):
         print(f" INFO - answer: {answer}")
         self.lock.release()
 
-        # delet echain object to release chain, vectorstore, and client for garbage collection
+        # delete chain object to release chain, vectorstore, and client for garbage collection
         del chain
 
         return answer
diff --git a/A2rchi/interfaces/chat_app/app.py b/A2rchi/interfaces/chat_app/app.py
@@ -11,6 +11,7 @@
 import json
 import os
 import yaml
+import time
 
 # DEFINITIONS
 QUERY_LIMIT = 1000 # max number of queries 
@@ -72,9 +73,15 @@ def update_or_add_discussion(data_path, json_file, discussion_id, discussion_con
         # update or add discussion
         discussion_dict = data.get(str(discussion_id), {})
 
+        discussion_dict["meta"] = discussion_dict.get("meta", {})
+        if str(discussion_id) not in data.keys(): #first time in discusssion
+            discussion_dict["meta"]["time_first_used"] = time.time()
+        discussion_dict["meta"]["time_last_used"] = time.time()
+
         if discussion_contents is not None:
             print(" INFO - found contents.")
             discussion_dict["contents"] = discussion_contents
+            discussion_dict["meta"]["times_chain_was_called"] = discussion_dict["meta"]["times_chain_was_called"] + [time.time()] if ("times_chain_was_called" in discussion_dict["meta"].keys()) else [time.time()]
         if discussion_feedback is not None:
             print(" INFO - found feedback.")
             discussion_dict["feedback"] = discussion_dict["feedback"] + [discussion_feedback] if ("feedback" in discussion_dict.keys() and isinstance(discussion_dict["feedback"], List)) else [discussion_feedback]
@@ -94,6 +101,7 @@ def __call__(self, history: Optional[List[Tuple[str, str]]], discussion_id: Opti
         Execute the chat functionality.
         """
         self.lock.acquire()
+        print("INFO - acquired lock file")
         try:
             # convert the history to native A2rchi form (because javascript does not have tuples)
             history = self.convert_to_chain_history(history)
@@ -139,12 +147,13 @@ def __call__(self, history: Optional[List[Tuple[str, str]]], discussion_id: Opti
             else:
                 output = "<p>" + result["answer"] + "</p>"
 
-            ChatWrapper.update_or_add_discussion(self.data_path, "conversations_test.json", discussion_id, discussion_contents = history)
+            ChatWrapper.update_or_add_discussion(self.data_path, "conversations_test.json", discussion_id, discussion_contents = history + [("A2rchi", output)])
 
         except Exception as e:
             raise e
         finally:
             self.lock.release()
+            print("INFO - released lock file")
         return output, discussion_id
 
 
@@ -212,6 +221,8 @@ def terms(self):
         return render_template('terms.html')
 
     def like(self):
+        self.chat.lock.acquire()
+        print("INFO - acquired lock file")
         try:
             # Get the JSON data from the request body
             data = request.json
@@ -231,11 +242,18 @@ def like(self):
             response = {'message': 'Liked', 'content': chat_content}
             return jsonify(response), 200
 
-
         except Exception as e:
             return jsonify({'error': str(e)}), 500
-
+
+        # According to the Python documentation: https://docs.python.org/3/tutorial/errors.html#defining-clean-up-actions
+        # this will still execute, before the function returns in the try or except block.
+        finally:
+            self.chat.lock.release()
+            print("INFO - released lock file")
+
     def dislike(self):
+        self.chat.lock.acquire()
+        print("INFO - acquired lock file")
         try:
             # Get the JSON data from the request body
             data = request.json
@@ -263,6 +281,11 @@ def dislike(self):
             response = {'message': 'Disliked', 'content': chat_content}
             return jsonify(response), 200
 
-
         except Exception as e:
             return jsonify({'error': str(e)}), 500
+
+        # According to the Python documentation: https://docs.python.org/3/tutorial/errors.html#defining-clean-up-actions
+        # this will still execute, before the function returns in the try or except block.
+        finally:
+            self.chat.lock.release()
+            print("INFO - released lock file")
diff --git a/A2rchi/utils/data_manager.py b/A2rchi/utils/data_manager.py
@@ -11,6 +11,7 @@
 import hashlib
 import os
 import yaml
+import time
 
 
 class DataManager():
@@ -215,13 +216,19 @@ def _add_to_vectorstore(self, collection, files_to_add, sources={}):
                 metadata["filename"] = filename
 
             # create unique id for each chunk
-            # the first 12 bits of the id being the filename and the other 6 based on the chunk itself
+            # the first 12 bits of the id being the filename, 6 more based on the chunk itself, and the last 6 hashing the time
             ids = []
             for chunk in chunks:
                 identifier = hashlib.md5()
                 identifier.update(chunk.encode('utf-8'))
                 chunk_hash = str(int(identifier.hexdigest(),16))[0:6]
-                ids.append(str(filehash) + str(chunk_hash))
+                time_identifier = hashlib.md5()
+                time_identifier.update(str(time.time()).encode('utf-8'))
+                time_hash = str(int(identifier.hexdigest(),16))[0:6]
+                while str(filehash) + str(chunk_hash) + str(time_hash) in ids:
+                    print("INFO: Found conflict with hash: " + str(filehash) + str(chunk_hash) + str(time_hash) + ". Trying again")
+                    time_hash = str(int(time_hash) + 1)
+                ids.append(str(filehash) + str(chunk_hash) + str(time_hash))
 
             print("Ids: ",ids)
             collection.add(embeddings=embeddings, ids=ids, documents=chunks, metadatas=metadatas)

diff --git a/config/dev-config.yaml b/config/dev-config.yaml
@@ -33,6 +33,10 @@ chains:
       - User
       - A2rchi
       - Expert 
+    #logging within base chain
+    logging:
+      #name of .log logfile to be saved in data folder.  
+      input_output_filename: chain_input_output.log
   prompts:
     # prompt that serves to condense a history and a question into a single question
     CONDENSING_PROMPT: config/prompts/condense.prompt
@@ -71,6 +75,7 @@ chains:
           length_penalty: 1 #[optional] Exponential penalty to the length that is used with beam-based generation.
           max_padding_length: null # the max padding length to be used with tokenizer padding the prompts.
     chain_update_time: 10 # the amount of time (in seconds) which passes between when the chain updates to the newest version of the vectorstore
+
 utils: 
   cleo: 
     cleo_update_time: 10

diff --git a/config/prod-801-config.yaml b/config/prod-801-config.yaml
@@ -33,6 +33,10 @@ chains:
       - User
       - A2rchi
       - Expert 
+    #logging within base chain
+    logging:
+      #name of .log logfile to be saved in data folder.  
+      input_output_filename: chain_input_output.log
   prompts:
   # prompt that serves to condense a history and a question into a single question
     CONDENSING_PROMPT: config/prompts/condense.prompt
@@ -71,8 +75,8 @@ chains:
           length_penalty: 1 #[optional] Exponential penalty to the length that is used with beam-based generation.
           max_padding_length: null # the max padding length to be used with tokenizer padding the prompts.
     chain_update_time: 10 # the amount of time (in seconds) which passes between when the chain updates to the newest version of the vectorstore
-utils: 
-  cleo: 
+utils:
+  cleo:
     cleo_update_time: 10
   mailbox: 
     IMAP4_PORT: 143