diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 2aba83a..d9aab3c 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -21,8 +21,6 @@ on:
         "!.gitignore",
         "!README.md",
       ]
-  pull_request:
-    types: [opened, synchronize, reopened]
   workflow_dispatch:
 
 env:
@@ -55,65 +53,121 @@ jobs:
           draft: true
           prerelease: false
 
-  # Get the latest version of the release
-  set-cortex-llamacpp-version:
-    runs-on: ubuntu-latest
-    outputs:
-      version: ${{ steps.version_update.outputs.new_version }}
-    steps:
-      - name: Get latest release
-        id: version_update
-        run: |
-          if [[ ${{ github.event_name }} == push && ${{ github.ref }} == refs/tags/* ]]; then
-            echo "VERSION=${GITHUB_REF#refs/tags/}"
-            NEW_VERSION="${VERSION#v}"
-            echo "::set-output name=new_version::$NEW_VERSION"
-          else
-            # Function to get the latest release tag
-            get_latest_tag() {
-              local retries=0
-              local max_retries=3
-              local tag
-              while [ $retries -lt $max_retries ]; do
-                tag=$(curl -s https://api.github.com/repos/janhq/cortex.llamacpp/releases/latest | jq -r .tag_name)
-                if [ -n "$tag" ] && [ "$tag" != "null" ]; then
-                  echo $tag
-                  return
-                else
-                  let retries++
-                  sleep 2
-                fi
-              done
-              echo "Failed to fetch latest tag after $max_retries attempts."
-              exit 1
-            }
-            # Get the latest release tag from GitHub API
-            LATEST_TAG=$(get_latest_tag)
-            
-            # Remove the 'v' and append the build number to the version
-            NEW_VERSION="${LATEST_TAG#v}-${GITHUB_RUN_NUMBER}"
-            echo "New version: $NEW_VERSION"
-            echo "::set-output name=new_version::$NEW_VERSION"
-          fi
-          echo "Version: $NEW_VERSION"
-
   ubuntu-amd64-build:
     runs-on: ubuntu-18-04-cuda-11-7
-    needs: [create-draft-release, set-cortex-llamacpp-version]
-    if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-cortex-llamacpp-version.result == 'success'
+    needs: [create-draft-release]
     timeout-minutes: 40
-
     strategy:
       matrix:
         include:
-          - build: "amd64-avx2"
-            defines: "-DLLAMA_NATIVE=OFF"
-          - build: "amd64-avx"
-            defines: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF"
-          - build: "amd64-avx512"
-            defines: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF"
-          - build: "amd64-vulkan"
-            defines: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF"
+          - os: "linux"
+            name: "amd64-avx2"
+            runs-on: "ubuntu-18-04"
+            cmake-flags: "-DLLAMA_NATIVE=OFF"
+            run-e2e: true
+            vulkan: false
+          - os: "linux"
+            name: "amd64-avx"
+            runs-on: "ubuntu-18-04"
+            cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF"
+            run-e2e: false
+            vulkan: false
+          - os: "linux"
+            name: "amd64-avx512"
+            runs-on: "ubuntu-18-04"
+            cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF"
+            run-e2e: false
+            vulkan: false
+          - os: "linux"
+            name: "amd64-vulkan"
+            runs-on: "ubuntu-18-04-cuda-11-7"
+            cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF"
+            run-e2e: false
+            vulkan: true
+          - os: "linux"
+            name: "amd64-cuda-11-7"
+            runs-on: "ubuntu-18-04-cuda-11-7"
+            cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON"
+            run-e2e: false
+            vulkan: false
+          - os: "linux"
+            name: "amd64-cuda-12-0"
+            runs-on: "ubuntu-18-04-cuda-12-0"
+            cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON"
+            run-e2e: false
+            vulkan: false
+          - os: "mac"
+            name: "amd64"
+            runs-on: "macos-13"
+            cmake-flags: "-DLLAMA_METAL=OFF"
+            run-e2e: true
+            vulkan: false
+          - os: "mac"
+            name: "arm64"
+            runs-on: "mac-silicon"
+            cmake-flags: "-DLLAMA_METAL_EMBED_LIBRARY=ON"
+            run-e2e: true
+            vulkan: false
+          - os: "windows"
+            name: "amd64-avx2"
+            runs-on: "windows-latest"
+            cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+            run-e2e: true
+            vulkan: false
+          - os: "windows"
+            name: "amd64-avx"
+            runs-on: "windows-latest"
+            cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+            run-e2e: false
+            vulkan: false
+          - os: "windows"
+            name: "amd64-avx512"
+            runs-on: "windows-latest"
+            cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+            run-e2e: false
+            vulkan: false
+          - os: "windows"
+            name: "amd64-vulkan"
+            runs-on: "windows-latest"
+            cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+            run-e2e: false
+            vulkan: true
+          - os: "windows"
+            name: "amd64-avx2-cuda-12-0"
+            runs-on: "windows-cuda-12-0"
+            cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+            run-e2e: false
+            vulkan: false
+          - os: "windows"
+            name: "amd64-avx-cuda-12-0"
+            runs-on: "windows-cuda-12-0"
+            cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+            run-e2e: false
+            vulkan: false
+          - os: "windows"
+            name: "amd64-avx512-cuda-12-0"
+            runs-on: "windows-cuda-12-0"
+            cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+            run-e2e: false
+            vulkan: false
+          - os: "windows"
+            name: "amd64-avx2-cuda-11-7"
+            runs-on: "windows-cuda-11-7"
+            cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+            run-e2e: false
+            vulkan: false
+          - os: "windows"
+            name: "amd64-avx-cuda-11-7"
+            runs-on: "windows-cuda-11-7"
+            cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+            run-e2e: false
+            vulkan: false
+          - os: "windows"
+            name: "amd64-avx512-cuda-11-7"
+            runs-on: "windows-cuda-11-7"
+            cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+            run-e2e: false
+            vulkan: false
 
     steps:
       - name: Clone
@@ -123,101 +177,37 @@ jobs:
           submodules: recursive
 
       - name: Prepare Vulkan SDK
-        if: ${{ matrix.build == 'amd64-vulkan' }}
+        if: ${{ matrix.vulkan }}
         uses: humbletim/setup-vulkan-sdk@v1.2.0
         with:
           vulkan-query-version: 1.3.275.0
           vulkan-components: Vulkan-Headers, Vulkan-Loader
           vulkan-use-cache: true
 
-      - name: Build library
+      - name: Install choco on Windows
+        if: runner.os == 'Windows'
         run: |
-          ./configure.sh
-          make build CMAKE_EXTRA_FLAGS="${{ matrix.defines }}"
+          choco install make -y
 
-      - name: Build server example
+      - name: Build
         run: |
-          mkdir -p examples/server/build
-          cd examples/server/build
-          cmake .. ${{ matrix.defines }}
-          cmake --build . --config Release
+          make build-example-server CMAKE_EXTRA_FLAGS="${{ matrix.cmake-flags }}"
 
       - name: Package
-        shell: bash
         run: |
-          mkdir -p cortex.llamacpp
-          cp build/libengine.so cortex.llamacpp
-          tar -czvf cortex.llamacpp.tar.gz cortex.llamacpp
+          make package
 
-      - name: Upload Artifact
-        uses: actions/upload-artifact@v2
-        if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request'
-        with:
-          name: cortex.llamacpp-linux-${{ matrix.build }}
-          path: ./cortex.llamacpp
-      
       - name: Run e2e testing
-        shell: bash
-        if: ${{ matrix.build != 'arm64' && matrix.build != 'amd64-vulkan' && matrix.build != 'amd64-avx512' }}
+        if: ${{ matrix.run-e2e }}
         run: |
-          mkdir -p examples/server/build/engines/cortex.llamacpp
-          cd examples/server/build/
-          cp ../../../build/libengine.so engines/cortex.llamacpp/
-          chmod +x ../../../.github/scripts/e2e-test-server-linux-and-mac.sh && ../../../.github/scripts/e2e-test-server-linux-and-mac.sh ./server ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}
-
-      - uses: actions/upload-release-asset@v1.0.1
-        if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        with:
-          upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
-          asset_path: ./cortex.llamacpp.tar.gz
-          asset_name: cortex.llamacpp-${{ needs.create-draft-release.outputs.version }}-linux-${{ matrix.build }}.tar.gz
-          asset_content_type: application/gzip
-
-  ubuntu-amd64-cuda-build:
-    runs-on: ubuntu-18-04-cuda-${{ matrix.cuda }}
-    needs: [create-draft-release, set-cortex-llamacpp-version]
-    if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-cortex-llamacpp-version.result == 'success'
-    timeout-minutes: 40
+          make run-e2e-test LLM_MODEL_URL=${{ env.LLM_MODEL_URL }} EMBEDDING_MODEL_URL=${{ env.EMBEDDING_MODEL_URL }}
 
-    strategy:
-      matrix:
-        cuda: ["12-0", "11-7"]
-    
-    steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: recursive
-      
-      - name: Build library
-        run: |
-          ./configure.sh
-          make build CMAKE_EXTRA_FLAGS="-DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON"
-      
-      - name: Package
-        shell: bash
-        run: |
-          mkdir -p cortex.llamacpp
-          cp build/libengine.so cortex.llamacpp
-          tar -czvf cortex.llamacpp.tar.gz cortex.llamacpp
-          
       - name: Upload Artifact
         uses: actions/upload-artifact@v2
-        if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request'
         with:
-          name: cortex.llamacpp-linux-amd64-cuda-${{ matrix.cuda }}
+          name: cortex.llamacpp-${{ matrix.os }}-${{ matrix.name }}
           path: ./cortex.llamacpp
 
-      - name: Build server example
-        run: |
-          mkdir -p examples/server/build
-          cd examples/server/build
-          cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON
-          cmake --build . --config Release
-          
       - uses: actions/upload-release-asset@v1.0.1
         if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
         env:
@@ -225,314 +215,5 @@ jobs:
         with:
           upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
           asset_path: ./cortex.llamacpp.tar.gz
-          asset_name: cortex.llamacpp-${{ needs.create-draft-release.outputs.version }}-linux-amd64-cuda-${{ matrix.cuda }}.tar.gz
+          asset_name: cortex.llamacpp-${{ needs.create-draft-release.outputs.version }}-${{ matrix.os }}-${{ matrix.name }}.tar.gz
           asset_content_type: application/gzip
-
-  macOS-silicon-build:
-    runs-on: mac-silicon
-    needs: [create-draft-release, set-cortex-llamacpp-version]
-    if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-cortex-llamacpp-version.result == 'success'
-    timeout-minutes: 40
-    steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: recursive
-
-      - name: Build library
-        run: |
-          ./configure.sh
-          make build CMAKE_EXTRA_FLAGS="-DLLAMA_METAL_EMBED_LIBRARY=ON"
-          
-      - name: Build server example
-        run: |
-          mkdir -p examples/server/build
-          cd examples/server/build
-          cmake ..
-          cmake --build . --config Release
-
-      - name: Package
-        shell: bash
-        run: |
-          mkdir -p cortex.llamacpp
-          cp build/libengine.dylib cortex.llamacpp/
-
-      - name: Upload Artifact
-        uses: actions/upload-artifact@v2
-        with:
-          name: cortex.llamacpp-mac-arm64
-          path: ./cortex.llamacpp
-
-      - name: Run e2e testing
-        shell: bash
-        run: |
-          mkdir -p examples/server/build/engines/cortex.llamacpp
-          cd examples/server/build/
-          cp ../../../build/libengine.dylib engines/cortex.llamacpp/
-          chmod +x ../../../.github/scripts/e2e-test-server-linux-and-mac.sh && ../../../.github/scripts/e2e-test-server-linux-and-mac.sh ./server ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}
-  
-
-  macOS-amd64-build:
-    runs-on: macos-13
-    needs: [create-draft-release, set-cortex-llamacpp-version]
-    if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-cortex-llamacpp-version.result == 'success'
-    timeout-minutes: 40
-    steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: recursive
-
-      - name: Build library
-        id: cmake_build
-        run: |
-          ./configure.sh
-          make build CMAKE_EXTRA_FLAGS="-DLLAMA_METAL=OFF"
-
-      - name: Build server example
-        run: |
-          mkdir -p examples/server/build
-          cd examples/server/build
-          cmake ..
-          cmake --build . --config Release
-
-      - name: Package
-        shell: bash
-        run: |
-          mkdir -p cortex.llamacpp
-          cp build/libengine.dylib cortex.llamacpp/
-
-      - name: Upload Artifact
-        uses: actions/upload-artifact@v2
-        with:
-          name: cortex.llamacpp-mac-amd64
-          path: ./cortex.llamacpp
-      
-      - name: Run e2e testing
-        shell: bash
-        run: |
-          mkdir -p examples/server/build/engines/cortex.llamacpp
-          cd examples/server/build/
-          cp ../../../build/libengine.dylib engines/cortex.llamacpp/
-          chmod +x ../../../.github/scripts/e2e-test-server-linux-and-mac.sh && ../../../.github/scripts/e2e-test-server-linux-and-mac.sh ./server ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}
-    
-  universal-cortex-llamacpp-artifact-macos:
-    runs-on: macos-latest
-    needs: [create-draft-release, set-cortex-llamacpp-version, macOS-silicon-build, macOS-amd64-build]
-    if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-cortex-llamacpp-version.result == 'success' 
-    timeout-minutes: 40
-    permissions:
-      contents: write
-    steps:
-      - name: download artifact amd64
-        uses: actions/download-artifact@v2
-        with:
-          name: cortex.llamacpp-mac-amd64
-          path: ./cortex.llamacpp-mac-amd64
-        
-      - name: download artifact arm64
-        uses: actions/download-artifact@v2
-        with:
-          name: cortex.llamacpp-mac-arm64
-          path: ./cortex.llamacpp-mac-arm64
-              
-      - name: bundle universal binary
-        run: |
-          mkdir -p cortex.llamacpp
-          ls ./cortex.llamacpp-mac-amd64
-          lipo -create ./cortex.llamacpp-mac-amd64/libengine.dylib ./cortex.llamacpp-mac-arm64/libengine.dylib -output ./cortex.llamacpp/libengine.dylib
-          tar -czvf cortex.llamacpp.tar.gz cortex.llamacpp
-        
-      - name: Upload Artifact
-        uses: actions/upload-artifact@v2
-        with:
-          name: cortex.llamacpp-mac-universal
-          path: ./cortex.llamacpp
-        
-      - uses: actions/upload-release-asset@v1.0.1
-        if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        with:
-          upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
-          asset_path: ./cortex.llamacpp.tar.gz
-          asset_name: cortex.llamacpp-${{ needs.create-draft-release.outputs.version }}-mac-universal.tar.gz
-          asset_content_type: application/gzip
-
-  windows-amd64-build:
-    runs-on: windows-latest
-    needs: [create-draft-release, set-cortex-llamacpp-version]
-    if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-cortex-llamacpp-version.result == 'success'
-    timeout-minutes: 40
-
-    strategy:
-      matrix:
-        include:
-          - build: "amd64-avx2"
-            defines: "-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
-          - build: "amd64-avx"
-            defines: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
-          - build: "amd64-avx512"
-            defines: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
-          - build: "amd64-vulkan"
-            defines: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
- 
-    steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: recursive
-
-      - name: install make-gnu
-        run: |
-          choco install make -y
-
-      - name: Prepare Vulkan SDK
-        uses: humbletim/setup-vulkan-sdk@v1.2.0
-        if: ${{ matrix.build == 'amd64-vulkan' }}
-        with:
-          vulkan-query-version: 1.3.275.0
-          vulkan-components: Vulkan-Headers, Vulkan-Loader
-          vulkan-use-cache: true
-
-      - name: Build library
-        shell: cmd
-        run: |
-          cmake -S ./third-party -B ./build_deps/third-party
-          cmake --build ./build_deps/third-party --config Release -j %NUMBER_OF_PROCESSORS%
-          mkdir -p build
-          cd build
-          cmake .. ${{ matrix.defines }}
-          cmake --build . --config Release
-      
-      - name: Build server example
-        shell: cmd
-        run: |
-          mkdir .\examples\server\build
-          cd .\examples\server\build
-          cmake .. ${{ matrix.defines }}
-          cmake --build . --config Release
-      
-      - name: Pack artifacts
-        id: pack_artifacts
-        shell: cmd
-        run: |
-          dotnet tool install --global AzureSignTool
-          azuresigntool.exe sign -kvu "${{ secrets.AZURE_KEY_VAULT_URI }}" -kvi "${{ secrets.AZURE_CLIENT_ID }}" -kvt "${{ secrets.AZURE_TENANT_ID }}" -kvs "${{ secrets.AZURE_CLIENT_SECRET }}" -kvc ${{ secrets.AZURE_CERT_NAME }} -tr http://timestamp.globalsign.com/tsa/r6advanced1 -v ".\build\Release\engine.dll"
-          7z a -ttar temp.tar .\build\Release\*
-          7z a -tgzip cortex.llamacpp.tar.gz temp.tar
-
-      - name: Run e2e testing
-        shell: cmd
-        if: ${{ matrix.build != 'arm64' && matrix.build != 'amd64-vulkan' && matrix.build != 'amd64-avx512' }}
-        run: |
-          mkdir examples\server\build\Release\engines\cortex.llamacpp
-          cd examples\server\build\Release
-          copy ..\..\..\..\build\Release\engine.dll engines\cortex.llamacpp\
-          ..\..\..\..\.github\scripts\e2e-test-server-windows.bat server.exe ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }}
-      
-      - name: Upload Artifact
-        uses: actions/upload-artifact@v2
-        if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request'
-        with:
-          name: cortex.llamacpp-win-${{ matrix.build }}
-          path: ./build/Release
-
-      - uses: actions/upload-release-asset@v1.0.1
-        if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        with:
-          upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
-          asset_path: ./cortex.llamacpp.tar.gz
-          asset_name: cortex.llamacpp-${{ needs.create-draft-release.outputs.version }}-win-${{ matrix.build }}.tar.gz
-          asset_content_type: application/gzip
-        
-
-  windows-amd64-cuda-build:
-    runs-on: windows-cuda-${{ matrix.cuda }}
-    needs: [create-draft-release, set-cortex-llamacpp-version]
-    if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-cortex-llamacpp-version.result == 'success'
-    timeout-minutes: 40
-
-    strategy:
-      matrix:
-        include:
-          - cuda: "12-0"
-            instructions: "amd64-avx2"
-            inst-flags: "-DLLAMA_NATIVE=OFF"
-            cmake-flags: "-DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
-          - cuda: "12-0"
-            instructions: "amd64-avx"
-            inst-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF"
-            cmake-flags: "-DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
-          - cuda: "12-0"
-            instructions: "amd64-avx512"
-            inst-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF"
-            cmake-flags: "-DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
-          - cuda: "11-7"
-            instructions: "amd64-avx2"
-            inst-flags: "-DLLAMA_NATIVE=OFF"
-            cmake-flags: "-DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
-          - cuda: "11-7"
-            instructions: "amd64-avx"
-            inst-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF"
-            cmake-flags: "-DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
-          - cuda: "11-7"
-            instructions: "amd64-avx512"
-            inst-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF"
-            cmake-flags: "-DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
-    
-    steps:
-      - name: Clone
-        id: checkout
-        uses: actions/checkout@v3
-        with:
-          submodules: recursive
- 
-      - name: Build library
-        shell: cmd
-        run: |
-          cmake -S ./third-party -B ./build_deps/third-party
-          cmake --build ./build_deps/third-party --config Release -j %NUMBER_OF_PROCESSORS%
-          mkdir -p build
-          cd build
-          cmake .. ${{ matrix.inst-flags }} ${{ matrix.cmake-flags }}
-          cmake --build . --config Release
-          
-      - name: Build server example
-        shell: cmd
-        run: |
-          mkdir .\examples\server\build
-          cd .\examples\server\build
-          cmake .. ${{ matrix.inst-flags }} ${{ matrix.cmake-flags }}
-          cmake --build . --config Release
-      
-      - name: Pack artifacts
-        id: pack_artifacts
-        shell: cmd
-        run: |
-          dotnet tool install --global AzureSignTool
-          azuresigntool.exe sign -kvu "${{ secrets.AZURE_KEY_VAULT_URI }}" -kvi "${{ secrets.AZURE_CLIENT_ID }}" -kvt "${{ secrets.AZURE_TENANT_ID }}" -kvs "${{ secrets.AZURE_CLIENT_SECRET }}" -kvc ${{ secrets.AZURE_CERT_NAME }} -tr http://timestamp.globalsign.com/tsa/r6advanced1 -v ".\build\Release\engine.dll"
-          7z a -ttar temp.tar .\build\Release\*
-          7z a -tgzip cortex.llamacpp.tar.gz temp.tar
-
-      - name: Upload Artifact
-        uses: actions/upload-artifact@v2
-        if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request'
-        with:
-          name: cortex.llamacpp-win-${{ matrix.instructions }}-cuda-${{ matrix.cuda }}
-          path: ./build/Release
-
-      - uses: actions/upload-release-asset@v1.0.1
-        if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/')
-        env:
-          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
-        with:
-          upload_url: ${{ needs.create-draft-release.outputs.upload_url }}
-          asset_path: ./cortex.llamacpp.tar.gz
-          asset_name: cortex.llamacpp-${{ needs.create-draft-release.outputs.version }}-win-${{ matrix.instructions }}-cuda-${{ matrix.cuda }}.tar.gz
-          asset_content_type: application/gzip
\ No newline at end of file
diff --git a/.github/workflows/quality-gate.yml b/.github/workflows/quality-gate.yml
new file mode 100644
index 0000000..53d8e15
--- /dev/null
+++ b/.github/workflows/quality-gate.yml
@@ -0,0 +1,166 @@
+name: CI Quality Gate
+
+on:
+  pull_request:
+    types: [opened, synchronize, reopened]
+  workflow_dispatch:
+
+env:
+  LLM_MODEL_URL: https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf
+  EMBEDDING_MODEL_URL: https://catalog.jan.ai/dist/models/embeds/nomic-embed-text-v1.5.f16.gguf
+
+jobs:
+  build-and-test:
+    runs-on: ${{ matrix.runs-on }}
+    timeout-minutes: 40
+    strategy:
+      fail-fast: false
+      matrix:
+        include:
+          - os: "linux"
+            name: "amd64-avx2"
+            runs-on: "ubuntu-18-04"
+            cmake-flags: "-DLLAMA_NATIVE=OFF"
+            run-e2e: true
+            vulkan: false
+          - os: "linux"
+            name: "amd64-avx"
+            runs-on: "ubuntu-18-04"
+            cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF"
+            run-e2e: false
+            vulkan: false
+          - os: "linux"
+            name: "amd64-avx512"
+            runs-on: "ubuntu-18-04"
+            cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF"
+            run-e2e: false
+            vulkan: false
+          - os: "linux"
+            name: "amd64-vulkan"
+            runs-on: "ubuntu-18-04-cuda-11-7"
+            cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF"
+            run-e2e: false
+            vulkan: true
+          - os: "linux"
+            name: "amd64-cuda-11-7"
+            runs-on: "ubuntu-18-04-cuda-11-7"
+            cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON"
+            run-e2e: false
+            vulkan: false
+          - os: "linux"
+            name: "amd64-cuda-12-0"
+            runs-on: "ubuntu-18-04-cuda-12-0"
+            cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON"
+            run-e2e: false
+            vulkan: false
+          - os: "mac"
+            name: "amd64"
+            runs-on: "macos-13"
+            cmake-flags: "-DLLAMA_METAL=OFF"
+            run-e2e: true
+            vulkan: false
+          - os: "mac"
+            name: "arm64"
+            runs-on: "mac-silicon"
+            cmake-flags: "-DLLAMA_METAL_EMBED_LIBRARY=ON"
+            run-e2e: true
+            vulkan: false
+          - os: "windows"
+            name: "amd64-avx2"
+            runs-on: "windows-latest"
+            cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+            run-e2e: true
+            vulkan: false
+          - os: "windows"
+            name: "amd64-avx"
+            runs-on: "windows-latest"
+            cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+            run-e2e: false
+            vulkan: false
+          - os: "windows"
+            name: "amd64-avx512"
+            runs-on: "windows-latest"
+            cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+            run-e2e: false
+            vulkan: false
+          - os: "windows"
+            name: "amd64-vulkan"
+            runs-on: "windows-latest"
+            cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+            run-e2e: false
+            vulkan: true
+          - os: "windows"
+            name: "amd64-avx2-cuda-12-0"
+            runs-on: "windows-cuda-12-0"
+            cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+            run-e2e: false
+            vulkan: false
+          - os: "windows"
+            name: "amd64-avx-cuda-12-0"
+            runs-on: "windows-cuda-12-0"
+            cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+            run-e2e: false
+            vulkan: false
+          - os: "windows"
+            name: "amd64-avx512-cuda-12-0"
+            runs-on: "windows-cuda-12-0"
+            cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+            run-e2e: false
+            vulkan: false
+          - os: "windows"
+            name: "amd64-avx2-cuda-11-7"
+            runs-on: "windows-cuda-11-7"
+            cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+            run-e2e: false
+            vulkan: false
+          - os: "windows"
+            name: "amd64-avx-cuda-11-7"
+            runs-on: "windows-cuda-11-7"
+            cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+            run-e2e: false
+            vulkan: false
+          - os: "windows"
+            name: "amd64-avx512-cuda-11-7"
+            runs-on: "windows-cuda-11-7"
+            cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE"
+            run-e2e: false
+            vulkan: false
+
+    steps:
+      - name: Clone
+        id: checkout
+        uses: actions/checkout@v3
+        with:
+          submodules: recursive
+
+      - name: Prepare Vulkan SDK
+        if: ${{ matrix.vulkan }}
+        uses: humbletim/setup-vulkan-sdk@v1.2.0
+        with:
+          vulkan-query-version: 1.3.275.0
+          vulkan-components: Vulkan-Headers, Vulkan-Loader
+          vulkan-use-cache: true
+
+      - name: Install choco on Windows
+        if: runner.os == 'Windows'
+        run: |
+          choco install make -y
+
+      - name: Build
+        run: |
+          make build-example-server CMAKE_EXTRA_FLAGS="${{ matrix.cmake-flags }}"
+
+      - name: Package
+        run: |
+          make package
+
+      - name: Run e2e testing
+        if: ${{ matrix.run-e2e }}
+        run: |
+          make run-e2e-test LLM_MODEL_URL=${{ env.LLM_MODEL_URL }} EMBEDDING_MODEL_URL=${{ env.EMBEDDING_MODEL_URL }}
+
+      - name: Upload Artifact
+        uses: actions/upload-artifact@v2
+        with:
+          name: cortex.llamacpp-${{ matrix.os }}-${{ matrix.name }}
+          path: ./cortex.llamacpp
diff --git a/Makefile b/Makefile
index 5ec8616..9c17400 100644
--- a/Makefile
+++ b/Makefile
@@ -1,42 +1,77 @@
 # Makefile for Cortex llamacpp engine - Build, Lint, Test, and Clean
 
 CMAKE_EXTRA_FLAGS ?= ""
+RUN_TESTS ?= false
+LLM_MODEL_URL ?= "https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf"
+EMBEDDING_MODEL_URL ?= "https://catalog.jan.ai/dist/models/embeds/nomic-embed-text-v1.5.f16.gguf"
 
 # Default target, does nothing
 all:
 	@echo "Specify a target to run"
 
 # Build the Cortex engine
-build:
+build-lib:
 ifeq ($(OS),Windows_NT)
-	mkdir -p build
-	cd build; \
-	cmake .. $(CMAKE_EXTRA_FLAGS); \
-	cmake --build . --config Release;
+	@powershell -Command "cmake -S ./third-party -B ./build_deps/third-party;"
+	@powershell -Command "cmake --build ./build_deps/third-party --config Release -j4;"
+	@powershell -Command "mkdir -p build; cd build; cmake .. $(CMAKE_EXTRA_FLAGS); cmake --build . --config Release;"
 else ifeq ($(shell uname -s),Linux)
-	mkdir build && cd build; \
+	@cmake -S ./third-party -B ./build_deps/third-party;
+	@make -C ./build_deps/third-party -j4;
+	@rm -rf ./build_deps/third-party;
+	@mkdir build && cd build; \
 	cmake .. $(CMAKE_EXTRA_FLAGS); \
-	make -j$(nproc);
+	make -j4;
 else
-	mkdir build && cd build; \
+	@cmake -S ./third-party -B ./build_deps/third-party
+	@make -C ./build_deps/third-party -j4
+	@rm -rf ./build_deps/third-party
+	@mkdir build && cd build; \
 	cmake .. $(CMAKE_EXTRA_FLAGS); \
-	make -j$(sysctl -n hw.ncpu);
+	make -j4;
 endif
 
-code-sign: build
+build-example-server: build-lib
 ifeq ($(OS),Windows_NT)
-	@echo "Hello Windows";
+	@powershell -Command "mkdir -p .\examples\server\build; cd .\examples\server\build; cmake .. $(CMAKE_EXTRA_FLAGS); cmake --build . --config Release;"
 else ifeq ($(shell uname -s),Linux)
-	@echo "Hello Linux";
+	@mkdir -p examples/server/build && cd examples/server/build; \
+	cmake .. $(CMAKE_EXTRA_FLAGS); \
+	cmake --build . --config Release;
 else
-	@echo "Hello MacOS";
+	@mkdir -p examples/server/build && cd examples/server/build; \
+	cmake ..; \
+	cmake --build . --config Release;
 endif
 
-package: build
+package:
+ifeq ($(OS),Windows_NT)
+	@powershell -Command "mkdir -p cortex.llamacpp; cp build\Release\engine.dll cortex.llamacpp\; 7z a -ttar temp.tar cortex.llamacpp\*; 7z a -tgzip cortex.llamacpp.tar.gz temp.tar;"
+else ifeq ($(shell uname -s),Linux)
+	@mkdir -p cortex.llamacpp; \
+	cp build/libengine.so cortex.llamacpp/; \
+	tar -czvf cortex.llamacpp.tar.gz cortex.llamacpp;
+else
+	@mkdir -p cortex.llamacpp; \
+	cp build/libengine.dylib cortex.llamacpp/; \
+	tar -czvf cortex.llamacpp.tar.gz cortex.llamacpp;
+endif
+
+run-e2e-test:
+ifeq ($(RUN_TESTS),false)
+	@echo "Skipping tests"
+	@exit 0
+endif
 ifeq ($(OS),Windows_NT)
-	@echo "Hello Windows";
+	@powershell -Command "mkdir -p examples\server\build\Release\engines\cortex.llamacpp; cd examples\server\build\Release; cp ..\..\..\..\build\Release\engine.dll engines\cortex.llamacpp; ..\..\..\..\.github\scripts\e2e-test-server-windows.bat server.exe $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL);"
 else ifeq ($(shell uname -s),Linux)
-	@echo "Hello Linux";
+	@mkdir -p examples/server/build/engines/cortex.llamacpp; \
+	cd examples/server/build/; \
+	cp ../../../build/libengine.so engines/cortex.llamacpp/; \
+	chmod +x ../../../.github/scripts/e2e-test-server-linux-and-mac.sh && ../../../.github/scripts/e2e-test-server-linux-and-mac.sh ./server $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL);
 else
-	@echo "Hello MacOS";
+	@mkdir -p examples/server/build/engines/cortex.llamacpp; \
+	cd examples/server/build/; \
+	cp ../../../build/libengine.dylib engines/cortex.llamacpp/; \
+	chmod +x ../../../.github/scripts/e2e-test-server-linux-and-mac.sh && ../../../.github/scripts/e2e-test-server-linux-and-mac.sh ./server $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL);
 endif
\ No newline at end of file
diff --git a/configure.bat b/configure.bat
deleted file mode 100755
index 7f24733..0000000
--- a/configure.bat
+++ /dev/null
@@ -1,2 +0,0 @@
-cmake -S ./third-party -B ./build_deps/third-party
-cmake --build ./build_deps/third-party --config Release -j %NUMBER_OF_PROCESSORS%
\ No newline at end of file
diff --git a/configure.sh b/configure.sh
deleted file mode 100755
index 842bbb2..0000000
--- a/configure.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-cmake -S ./third-party -B ./build_deps/third-party
-make -C ./build_deps/third-party -j 10
-rm -rf ./build_deps/third-party
\ No newline at end of file