diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml index 2aba83a..d9aab3c 100644 --- a/.github/workflows/build.yml +++ b/.github/workflows/build.yml @@ -21,8 +21,6 @@ on: "!.gitignore", "!README.md", ] - pull_request: - types: [opened, synchronize, reopened] workflow_dispatch: env: @@ -55,65 +53,121 @@ jobs: draft: true prerelease: false - # Get the latest version of the release - set-cortex-llamacpp-version: - runs-on: ubuntu-latest - outputs: - version: ${{ steps.version_update.outputs.new_version }} - steps: - - name: Get latest release - id: version_update - run: | - if [[ ${{ github.event_name }} == push && ${{ github.ref }} == refs/tags/* ]]; then - echo "VERSION=${GITHUB_REF#refs/tags/}" - NEW_VERSION="${VERSION#v}" - echo "::set-output name=new_version::$NEW_VERSION" - else - # Function to get the latest release tag - get_latest_tag() { - local retries=0 - local max_retries=3 - local tag - while [ $retries -lt $max_retries ]; do - tag=$(curl -s https://api.github.com/repos/janhq/cortex.llamacpp/releases/latest | jq -r .tag_name) - if [ -n "$tag" ] && [ "$tag" != "null" ]; then - echo $tag - return - else - let retries++ - sleep 2 - fi - done - echo "Failed to fetch latest tag after $max_retries attempts." - exit 1 - } - # Get the latest release tag from GitHub API - LATEST_TAG=$(get_latest_tag) - - # Remove the 'v' and append the build number to the version - NEW_VERSION="${LATEST_TAG#v}-${GITHUB_RUN_NUMBER}" - echo "New version: $NEW_VERSION" - echo "::set-output name=new_version::$NEW_VERSION" - fi - echo "Version: $NEW_VERSION" - ubuntu-amd64-build: runs-on: ubuntu-18-04-cuda-11-7 - needs: [create-draft-release, set-cortex-llamacpp-version] - if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-cortex-llamacpp-version.result == 'success' + needs: [create-draft-release] timeout-minutes: 40 - strategy: matrix: include: - - build: "amd64-avx2" - defines: "-DLLAMA_NATIVE=OFF" - - build: "amd64-avx" - defines: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF" - - build: "amd64-avx512" - defines: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF" - - build: "amd64-vulkan" - defines: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF" + - os: "linux" + name: "amd64-avx2" + runs-on: "ubuntu-18-04" + cmake-flags: "-DLLAMA_NATIVE=OFF" + run-e2e: true + vulkan: false + - os: "linux" + name: "amd64-avx" + runs-on: "ubuntu-18-04" + cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF" + run-e2e: false + vulkan: false + - os: "linux" + name: "amd64-avx512" + runs-on: "ubuntu-18-04" + cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF" + run-e2e: false + vulkan: false + - os: "linux" + name: "amd64-vulkan" + runs-on: "ubuntu-18-04-cuda-11-7" + cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF" + run-e2e: false + vulkan: true + - os: "linux" + name: "amd64-cuda-11-7" + runs-on: "ubuntu-18-04-cuda-11-7" + cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON" + run-e2e: false + vulkan: false + - os: "linux" + name: "amd64-cuda-12-0" + runs-on: "ubuntu-18-04-cuda-12-0" + cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON" + run-e2e: false + vulkan: false + - os: "mac" + name: "amd64" + runs-on: "macos-13" + cmake-flags: "-DLLAMA_METAL=OFF" + run-e2e: true + vulkan: false + - os: "mac" + name: "arm64" + runs-on: "mac-silicon" + cmake-flags: "-DLLAMA_METAL_EMBED_LIBRARY=ON" + run-e2e: true + vulkan: false + - os: "windows" + name: "amd64-avx2" + runs-on: "windows-latest" + cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" + run-e2e: true + vulkan: false + - os: "windows" + name: "amd64-avx" + runs-on: "windows-latest" + cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" + run-e2e: false + vulkan: false + - os: "windows" + name: "amd64-avx512" + runs-on: "windows-latest" + cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" + run-e2e: false + vulkan: false + - os: "windows" + name: "amd64-vulkan" + runs-on: "windows-latest" + cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" + run-e2e: false + vulkan: true + - os: "windows" + name: "amd64-avx2-cuda-12-0" + runs-on: "windows-cuda-12-0" + cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" + run-e2e: false + vulkan: false + - os: "windows" + name: "amd64-avx-cuda-12-0" + runs-on: "windows-cuda-12-0" + cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" + run-e2e: false + vulkan: false + - os: "windows" + name: "amd64-avx512-cuda-12-0" + runs-on: "windows-cuda-12-0" + cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" + run-e2e: false + vulkan: false + - os: "windows" + name: "amd64-avx2-cuda-11-7" + runs-on: "windows-cuda-11-7" + cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" + run-e2e: false + vulkan: false + - os: "windows" + name: "amd64-avx-cuda-11-7" + runs-on: "windows-cuda-11-7" + cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" + run-e2e: false + vulkan: false + - os: "windows" + name: "amd64-avx512-cuda-11-7" + runs-on: "windows-cuda-11-7" + cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" + run-e2e: false + vulkan: false steps: - name: Clone @@ -123,101 +177,37 @@ jobs: submodules: recursive - name: Prepare Vulkan SDK - if: ${{ matrix.build == 'amd64-vulkan' }} + if: ${{ matrix.vulkan }} uses: humbletim/setup-vulkan-sdk@v1.2.0 with: vulkan-query-version: 1.3.275.0 vulkan-components: Vulkan-Headers, Vulkan-Loader vulkan-use-cache: true - - name: Build library + - name: Install choco on Windows + if: runner.os == 'Windows' run: | - ./configure.sh - make build CMAKE_EXTRA_FLAGS="${{ matrix.defines }}" + choco install make -y - - name: Build server example + - name: Build run: | - mkdir -p examples/server/build - cd examples/server/build - cmake .. ${{ matrix.defines }} - cmake --build . --config Release + make build-example-server CMAKE_EXTRA_FLAGS="${{ matrix.cmake-flags }}" - name: Package - shell: bash run: | - mkdir -p cortex.llamacpp - cp build/libengine.so cortex.llamacpp - tar -czvf cortex.llamacpp.tar.gz cortex.llamacpp + make package - - name: Upload Artifact - uses: actions/upload-artifact@v2 - if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request' - with: - name: cortex.llamacpp-linux-${{ matrix.build }} - path: ./cortex.llamacpp - - name: Run e2e testing - shell: bash - if: ${{ matrix.build != 'arm64' && matrix.build != 'amd64-vulkan' && matrix.build != 'amd64-avx512' }} + if: ${{ matrix.run-e2e }} run: | - mkdir -p examples/server/build/engines/cortex.llamacpp - cd examples/server/build/ - cp ../../../build/libengine.so engines/cortex.llamacpp/ - chmod +x ../../../.github/scripts/e2e-test-server-linux-and-mac.sh && ../../../.github/scripts/e2e-test-server-linux-and-mac.sh ./server ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }} - - - uses: actions/upload-release-asset@v1.0.1 - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - upload_url: ${{ needs.create-draft-release.outputs.upload_url }} - asset_path: ./cortex.llamacpp.tar.gz - asset_name: cortex.llamacpp-${{ needs.create-draft-release.outputs.version }}-linux-${{ matrix.build }}.tar.gz - asset_content_type: application/gzip - - ubuntu-amd64-cuda-build: - runs-on: ubuntu-18-04-cuda-${{ matrix.cuda }} - needs: [create-draft-release, set-cortex-llamacpp-version] - if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-cortex-llamacpp-version.result == 'success' - timeout-minutes: 40 + make run-e2e-test LLM_MODEL_URL=${{ env.LLM_MODEL_URL }} EMBEDDING_MODEL_URL=${{ env.EMBEDDING_MODEL_URL }} - strategy: - matrix: - cuda: ["12-0", "11-7"] - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v3 - with: - submodules: recursive - - - name: Build library - run: | - ./configure.sh - make build CMAKE_EXTRA_FLAGS="-DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON" - - - name: Package - shell: bash - run: | - mkdir -p cortex.llamacpp - cp build/libengine.so cortex.llamacpp - tar -czvf cortex.llamacpp.tar.gz cortex.llamacpp - - name: Upload Artifact uses: actions/upload-artifact@v2 - if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request' with: - name: cortex.llamacpp-linux-amd64-cuda-${{ matrix.cuda }} + name: cortex.llamacpp-${{ matrix.os }}-${{ matrix.name }} path: ./cortex.llamacpp - - name: Build server example - run: | - mkdir -p examples/server/build - cd examples/server/build - cmake .. -DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON - cmake --build . --config Release - - uses: actions/upload-release-asset@v1.0.1 if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') env: @@ -225,314 +215,5 @@ jobs: with: upload_url: ${{ needs.create-draft-release.outputs.upload_url }} asset_path: ./cortex.llamacpp.tar.gz - asset_name: cortex.llamacpp-${{ needs.create-draft-release.outputs.version }}-linux-amd64-cuda-${{ matrix.cuda }}.tar.gz + asset_name: cortex.llamacpp-${{ needs.create-draft-release.outputs.version }}-${{ matrix.os }}-${{ matrix.name }}.tar.gz asset_content_type: application/gzip - - macOS-silicon-build: - runs-on: mac-silicon - needs: [create-draft-release, set-cortex-llamacpp-version] - if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-cortex-llamacpp-version.result == 'success' - timeout-minutes: 40 - steps: - - name: Clone - id: checkout - uses: actions/checkout@v3 - with: - submodules: recursive - - - name: Build library - run: | - ./configure.sh - make build CMAKE_EXTRA_FLAGS="-DLLAMA_METAL_EMBED_LIBRARY=ON" - - - name: Build server example - run: | - mkdir -p examples/server/build - cd examples/server/build - cmake .. - cmake --build . --config Release - - - name: Package - shell: bash - run: | - mkdir -p cortex.llamacpp - cp build/libengine.dylib cortex.llamacpp/ - - - name: Upload Artifact - uses: actions/upload-artifact@v2 - with: - name: cortex.llamacpp-mac-arm64 - path: ./cortex.llamacpp - - - name: Run e2e testing - shell: bash - run: | - mkdir -p examples/server/build/engines/cortex.llamacpp - cd examples/server/build/ - cp ../../../build/libengine.dylib engines/cortex.llamacpp/ - chmod +x ../../../.github/scripts/e2e-test-server-linux-and-mac.sh && ../../../.github/scripts/e2e-test-server-linux-and-mac.sh ./server ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }} - - - macOS-amd64-build: - runs-on: macos-13 - needs: [create-draft-release, set-cortex-llamacpp-version] - if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-cortex-llamacpp-version.result == 'success' - timeout-minutes: 40 - steps: - - name: Clone - id: checkout - uses: actions/checkout@v3 - with: - submodules: recursive - - - name: Build library - id: cmake_build - run: | - ./configure.sh - make build CMAKE_EXTRA_FLAGS="-DLLAMA_METAL=OFF" - - - name: Build server example - run: | - mkdir -p examples/server/build - cd examples/server/build - cmake .. - cmake --build . --config Release - - - name: Package - shell: bash - run: | - mkdir -p cortex.llamacpp - cp build/libengine.dylib cortex.llamacpp/ - - - name: Upload Artifact - uses: actions/upload-artifact@v2 - with: - name: cortex.llamacpp-mac-amd64 - path: ./cortex.llamacpp - - - name: Run e2e testing - shell: bash - run: | - mkdir -p examples/server/build/engines/cortex.llamacpp - cd examples/server/build/ - cp ../../../build/libengine.dylib engines/cortex.llamacpp/ - chmod +x ../../../.github/scripts/e2e-test-server-linux-and-mac.sh && ../../../.github/scripts/e2e-test-server-linux-and-mac.sh ./server ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }} - - universal-cortex-llamacpp-artifact-macos: - runs-on: macos-latest - needs: [create-draft-release, set-cortex-llamacpp-version, macOS-silicon-build, macOS-amd64-build] - if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-cortex-llamacpp-version.result == 'success' - timeout-minutes: 40 - permissions: - contents: write - steps: - - name: download artifact amd64 - uses: actions/download-artifact@v2 - with: - name: cortex.llamacpp-mac-amd64 - path: ./cortex.llamacpp-mac-amd64 - - - name: download artifact arm64 - uses: actions/download-artifact@v2 - with: - name: cortex.llamacpp-mac-arm64 - path: ./cortex.llamacpp-mac-arm64 - - - name: bundle universal binary - run: | - mkdir -p cortex.llamacpp - ls ./cortex.llamacpp-mac-amd64 - lipo -create ./cortex.llamacpp-mac-amd64/libengine.dylib ./cortex.llamacpp-mac-arm64/libengine.dylib -output ./cortex.llamacpp/libengine.dylib - tar -czvf cortex.llamacpp.tar.gz cortex.llamacpp - - - name: Upload Artifact - uses: actions/upload-artifact@v2 - with: - name: cortex.llamacpp-mac-universal - path: ./cortex.llamacpp - - - uses: actions/upload-release-asset@v1.0.1 - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - upload_url: ${{ needs.create-draft-release.outputs.upload_url }} - asset_path: ./cortex.llamacpp.tar.gz - asset_name: cortex.llamacpp-${{ needs.create-draft-release.outputs.version }}-mac-universal.tar.gz - asset_content_type: application/gzip - - windows-amd64-build: - runs-on: windows-latest - needs: [create-draft-release, set-cortex-llamacpp-version] - if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-cortex-llamacpp-version.result == 'success' - timeout-minutes: 40 - - strategy: - matrix: - include: - - build: "amd64-avx2" - defines: "-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" - - build: "amd64-avx" - defines: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" - - build: "amd64-avx512" - defines: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" - - build: "amd64-vulkan" - defines: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v3 - with: - submodules: recursive - - - name: install make-gnu - run: | - choco install make -y - - - name: Prepare Vulkan SDK - uses: humbletim/setup-vulkan-sdk@v1.2.0 - if: ${{ matrix.build == 'amd64-vulkan' }} - with: - vulkan-query-version: 1.3.275.0 - vulkan-components: Vulkan-Headers, Vulkan-Loader - vulkan-use-cache: true - - - name: Build library - shell: cmd - run: | - cmake -S ./third-party -B ./build_deps/third-party - cmake --build ./build_deps/third-party --config Release -j %NUMBER_OF_PROCESSORS% - mkdir -p build - cd build - cmake .. ${{ matrix.defines }} - cmake --build . --config Release - - - name: Build server example - shell: cmd - run: | - mkdir .\examples\server\build - cd .\examples\server\build - cmake .. ${{ matrix.defines }} - cmake --build . --config Release - - - name: Pack artifacts - id: pack_artifacts - shell: cmd - run: | - dotnet tool install --global AzureSignTool - azuresigntool.exe sign -kvu "${{ secrets.AZURE_KEY_VAULT_URI }}" -kvi "${{ secrets.AZURE_CLIENT_ID }}" -kvt "${{ secrets.AZURE_TENANT_ID }}" -kvs "${{ secrets.AZURE_CLIENT_SECRET }}" -kvc ${{ secrets.AZURE_CERT_NAME }} -tr http://timestamp.globalsign.com/tsa/r6advanced1 -v ".\build\Release\engine.dll" - 7z a -ttar temp.tar .\build\Release\* - 7z a -tgzip cortex.llamacpp.tar.gz temp.tar - - - name: Run e2e testing - shell: cmd - if: ${{ matrix.build != 'arm64' && matrix.build != 'amd64-vulkan' && matrix.build != 'amd64-avx512' }} - run: | - mkdir examples\server\build\Release\engines\cortex.llamacpp - cd examples\server\build\Release - copy ..\..\..\..\build\Release\engine.dll engines\cortex.llamacpp\ - ..\..\..\..\.github\scripts\e2e-test-server-windows.bat server.exe ${{ env.LLM_MODEL_URL }} ${{ env.EMBEDDING_MODEL_URL }} - - - name: Upload Artifact - uses: actions/upload-artifact@v2 - if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request' - with: - name: cortex.llamacpp-win-${{ matrix.build }} - path: ./build/Release - - - uses: actions/upload-release-asset@v1.0.1 - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - upload_url: ${{ needs.create-draft-release.outputs.upload_url }} - asset_path: ./cortex.llamacpp.tar.gz - asset_name: cortex.llamacpp-${{ needs.create-draft-release.outputs.version }}-win-${{ matrix.build }}.tar.gz - asset_content_type: application/gzip - - - windows-amd64-cuda-build: - runs-on: windows-cuda-${{ matrix.cuda }} - needs: [create-draft-release, set-cortex-llamacpp-version] - if: always() && (needs.create-draft-release.result == 'success' || needs.create-draft-release.result == 'skipped') && needs.set-cortex-llamacpp-version.result == 'success' - timeout-minutes: 40 - - strategy: - matrix: - include: - - cuda: "12-0" - instructions: "amd64-avx2" - inst-flags: "-DLLAMA_NATIVE=OFF" - cmake-flags: "-DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" - - cuda: "12-0" - instructions: "amd64-avx" - inst-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF" - cmake-flags: "-DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" - - cuda: "12-0" - instructions: "amd64-avx512" - inst-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF" - cmake-flags: "-DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" - - cuda: "11-7" - instructions: "amd64-avx2" - inst-flags: "-DLLAMA_NATIVE=OFF" - cmake-flags: "-DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" - - cuda: "11-7" - instructions: "amd64-avx" - inst-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF" - cmake-flags: "-DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" - - cuda: "11-7" - instructions: "amd64-avx512" - inst-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF" - cmake-flags: "-DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" - - steps: - - name: Clone - id: checkout - uses: actions/checkout@v3 - with: - submodules: recursive - - - name: Build library - shell: cmd - run: | - cmake -S ./third-party -B ./build_deps/third-party - cmake --build ./build_deps/third-party --config Release -j %NUMBER_OF_PROCESSORS% - mkdir -p build - cd build - cmake .. ${{ matrix.inst-flags }} ${{ matrix.cmake-flags }} - cmake --build . --config Release - - - name: Build server example - shell: cmd - run: | - mkdir .\examples\server\build - cd .\examples\server\build - cmake .. ${{ matrix.inst-flags }} ${{ matrix.cmake-flags }} - cmake --build . --config Release - - - name: Pack artifacts - id: pack_artifacts - shell: cmd - run: | - dotnet tool install --global AzureSignTool - azuresigntool.exe sign -kvu "${{ secrets.AZURE_KEY_VAULT_URI }}" -kvi "${{ secrets.AZURE_CLIENT_ID }}" -kvt "${{ secrets.AZURE_TENANT_ID }}" -kvs "${{ secrets.AZURE_CLIENT_SECRET }}" -kvc ${{ secrets.AZURE_CERT_NAME }} -tr http://timestamp.globalsign.com/tsa/r6advanced1 -v ".\build\Release\engine.dll" - 7z a -ttar temp.tar .\build\Release\* - 7z a -tgzip cortex.llamacpp.tar.gz temp.tar - - - name: Upload Artifact - uses: actions/upload-artifact@v2 - if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event_name == 'pull_request' - with: - name: cortex.llamacpp-win-${{ matrix.instructions }}-cuda-${{ matrix.cuda }} - path: ./build/Release - - - uses: actions/upload-release-asset@v1.0.1 - if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/') - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - with: - upload_url: ${{ needs.create-draft-release.outputs.upload_url }} - asset_path: ./cortex.llamacpp.tar.gz - asset_name: cortex.llamacpp-${{ needs.create-draft-release.outputs.version }}-win-${{ matrix.instructions }}-cuda-${{ matrix.cuda }}.tar.gz - asset_content_type: application/gzip \ No newline at end of file diff --git a/.github/workflows/quality-gate.yml b/.github/workflows/quality-gate.yml new file mode 100644 index 0000000..53d8e15 --- /dev/null +++ b/.github/workflows/quality-gate.yml @@ -0,0 +1,166 @@ +name: CI Quality Gate + +on: + pull_request: + types: [opened, synchronize, reopened] + workflow_dispatch: + +env: + LLM_MODEL_URL: https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf + EMBEDDING_MODEL_URL: https://catalog.jan.ai/dist/models/embeds/nomic-embed-text-v1.5.f16.gguf + +jobs: + build-and-test: + runs-on: ${{ matrix.runs-on }} + timeout-minutes: 40 + strategy: + fail-fast: false + matrix: + include: + - os: "linux" + name: "amd64-avx2" + runs-on: "ubuntu-18-04" + cmake-flags: "-DLLAMA_NATIVE=OFF" + run-e2e: true + vulkan: false + - os: "linux" + name: "amd64-avx" + runs-on: "ubuntu-18-04" + cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF" + run-e2e: false + vulkan: false + - os: "linux" + name: "amd64-avx512" + runs-on: "ubuntu-18-04" + cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF" + run-e2e: false + vulkan: false + - os: "linux" + name: "amd64-vulkan" + runs-on: "ubuntu-18-04-cuda-11-7" + cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF" + run-e2e: false + vulkan: true + - os: "linux" + name: "amd64-cuda-11-7" + runs-on: "ubuntu-18-04-cuda-11-7" + cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON" + run-e2e: false + vulkan: false + - os: "linux" + name: "amd64-cuda-12-0" + runs-on: "ubuntu-18-04-cuda-12-0" + cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_CUDA=ON" + run-e2e: false + vulkan: false + - os: "mac" + name: "amd64" + runs-on: "macos-13" + cmake-flags: "-DLLAMA_METAL=OFF" + run-e2e: true + vulkan: false + - os: "mac" + name: "arm64" + runs-on: "mac-silicon" + cmake-flags: "-DLLAMA_METAL_EMBED_LIBRARY=ON" + run-e2e: true + vulkan: false + - os: "windows" + name: "amd64-avx2" + runs-on: "windows-latest" + cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" + run-e2e: true + vulkan: false + - os: "windows" + name: "amd64-avx" + runs-on: "windows-latest" + cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" + run-e2e: false + vulkan: false + - os: "windows" + name: "amd64-avx512" + runs-on: "windows-latest" + cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_BLAS=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" + run-e2e: false + vulkan: false + - os: "windows" + name: "amd64-vulkan" + runs-on: "windows-latest" + cmake-flags: "-DLLAMA_VULKAN=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" + run-e2e: false + vulkan: true + - os: "windows" + name: "amd64-avx2-cuda-12-0" + runs-on: "windows-cuda-12-0" + cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" + run-e2e: false + vulkan: false + - os: "windows" + name: "amd64-avx-cuda-12-0" + runs-on: "windows-cuda-12-0" + cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" + run-e2e: false + vulkan: false + - os: "windows" + name: "amd64-avx512-cuda-12-0" + runs-on: "windows-cuda-12-0" + cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" + run-e2e: false + vulkan: false + - os: "windows" + name: "amd64-avx2-cuda-11-7" + runs-on: "windows-cuda-11-7" + cmake-flags: "-DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" + run-e2e: false + vulkan: false + - os: "windows" + name: "amd64-avx-cuda-11-7" + runs-on: "windows-cuda-11-7" + cmake-flags: "-DLLAMA_AVX2=OFF -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" + run-e2e: false + vulkan: false + - os: "windows" + name: "amd64-avx512-cuda-11-7" + runs-on: "windows-cuda-11-7" + cmake-flags: "-DLLAMA_AVX512=ON -DLLAMA_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DLLAMA_CUDA=ON -DBUILD_SHARED_LIBS=OFF -DCMAKE_BUILD_TYPE=RELEASE" + run-e2e: false + vulkan: false + + steps: + - name: Clone + id: checkout + uses: actions/checkout@v3 + with: + submodules: recursive + + - name: Prepare Vulkan SDK + if: ${{ matrix.vulkan }} + uses: humbletim/setup-vulkan-sdk@v1.2.0 + with: + vulkan-query-version: 1.3.275.0 + vulkan-components: Vulkan-Headers, Vulkan-Loader + vulkan-use-cache: true + + - name: Install choco on Windows + if: runner.os == 'Windows' + run: | + choco install make -y + + - name: Build + run: | + make build-example-server CMAKE_EXTRA_FLAGS="${{ matrix.cmake-flags }}" + + - name: Package + run: | + make package + + - name: Run e2e testing + if: ${{ matrix.run-e2e }} + run: | + make run-e2e-test LLM_MODEL_URL=${{ env.LLM_MODEL_URL }} EMBEDDING_MODEL_URL=${{ env.EMBEDDING_MODEL_URL }} + + - name: Upload Artifact + uses: actions/upload-artifact@v2 + with: + name: cortex.llamacpp-${{ matrix.os }}-${{ matrix.name }} + path: ./cortex.llamacpp diff --git a/Makefile b/Makefile index 5ec8616..9c17400 100644 --- a/Makefile +++ b/Makefile @@ -1,42 +1,77 @@ # Makefile for Cortex llamacpp engine - Build, Lint, Test, and Clean CMAKE_EXTRA_FLAGS ?= "" +RUN_TESTS ?= false +LLM_MODEL_URL ?= "https://delta.jan.ai/tinyllama-1.1b-chat-v0.3.Q2_K.gguf" +EMBEDDING_MODEL_URL ?= "https://catalog.jan.ai/dist/models/embeds/nomic-embed-text-v1.5.f16.gguf" # Default target, does nothing all: @echo "Specify a target to run" # Build the Cortex engine -build: +build-lib: ifeq ($(OS),Windows_NT) - mkdir -p build - cd build; \ - cmake .. $(CMAKE_EXTRA_FLAGS); \ - cmake --build . --config Release; + @powershell -Command "cmake -S ./third-party -B ./build_deps/third-party;" + @powershell -Command "cmake --build ./build_deps/third-party --config Release -j4;" + @powershell -Command "mkdir -p build; cd build; cmake .. $(CMAKE_EXTRA_FLAGS); cmake --build . --config Release;" else ifeq ($(shell uname -s),Linux) - mkdir build && cd build; \ + @cmake -S ./third-party -B ./build_deps/third-party; + @make -C ./build_deps/third-party -j4; + @rm -rf ./build_deps/third-party; + @mkdir build && cd build; \ cmake .. $(CMAKE_EXTRA_FLAGS); \ - make -j$(nproc); + make -j4; else - mkdir build && cd build; \ + @cmake -S ./third-party -B ./build_deps/third-party + @make -C ./build_deps/third-party -j4 + @rm -rf ./build_deps/third-party + @mkdir build && cd build; \ cmake .. $(CMAKE_EXTRA_FLAGS); \ - make -j$(sysctl -n hw.ncpu); + make -j4; endif -code-sign: build +build-example-server: build-lib ifeq ($(OS),Windows_NT) - @echo "Hello Windows"; + @powershell -Command "mkdir -p .\examples\server\build; cd .\examples\server\build; cmake .. $(CMAKE_EXTRA_FLAGS); cmake --build . --config Release;" else ifeq ($(shell uname -s),Linux) - @echo "Hello Linux"; + @mkdir -p examples/server/build && cd examples/server/build; \ + cmake .. $(CMAKE_EXTRA_FLAGS); \ + cmake --build . --config Release; else - @echo "Hello MacOS"; + @mkdir -p examples/server/build && cd examples/server/build; \ + cmake ..; \ + cmake --build . --config Release; endif -package: build +package: +ifeq ($(OS),Windows_NT) + @powershell -Command "mkdir -p cortex.llamacpp; cp build\Release\engine.dll cortex.llamacpp\; 7z a -ttar temp.tar cortex.llamacpp\*; 7z a -tgzip cortex.llamacpp.tar.gz temp.tar;" +else ifeq ($(shell uname -s),Linux) + @mkdir -p cortex.llamacpp; \ + cp build/libengine.so cortex.llamacpp/; \ + tar -czvf cortex.llamacpp.tar.gz cortex.llamacpp; +else + @mkdir -p cortex.llamacpp; \ + cp build/libengine.dylib cortex.llamacpp/; \ + tar -czvf cortex.llamacpp.tar.gz cortex.llamacpp; +endif + +run-e2e-test: +ifeq ($(RUN_TESTS),false) + @echo "Skipping tests" + @exit 0 +endif ifeq ($(OS),Windows_NT) - @echo "Hello Windows"; + @powershell -Command "mkdir -p examples\server\build\Release\engines\cortex.llamacpp; cd examples\server\build\Release; cp ..\..\..\..\build\Release\engine.dll engines\cortex.llamacpp; ..\..\..\..\.github\scripts\e2e-test-server-windows.bat server.exe $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL);" else ifeq ($(shell uname -s),Linux) - @echo "Hello Linux"; + @mkdir -p examples/server/build/engines/cortex.llamacpp; \ + cd examples/server/build/; \ + cp ../../../build/libengine.so engines/cortex.llamacpp/; \ + chmod +x ../../../.github/scripts/e2e-test-server-linux-and-mac.sh && ../../../.github/scripts/e2e-test-server-linux-and-mac.sh ./server $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL); else - @echo "Hello MacOS"; + @mkdir -p examples/server/build/engines/cortex.llamacpp; \ + cd examples/server/build/; \ + cp ../../../build/libengine.dylib engines/cortex.llamacpp/; \ + chmod +x ../../../.github/scripts/e2e-test-server-linux-and-mac.sh && ../../../.github/scripts/e2e-test-server-linux-and-mac.sh ./server $(LLM_MODEL_URL) $(EMBEDDING_MODEL_URL); endif \ No newline at end of file diff --git a/configure.bat b/configure.bat deleted file mode 100755 index 7f24733..0000000 --- a/configure.bat +++ /dev/null @@ -1,2 +0,0 @@ -cmake -S ./third-party -B ./build_deps/third-party -cmake --build ./build_deps/third-party --config Release -j %NUMBER_OF_PROCESSORS% \ No newline at end of file diff --git a/configure.sh b/configure.sh deleted file mode 100755 index 842bbb2..0000000 --- a/configure.sh +++ /dev/null @@ -1,3 +0,0 @@ -cmake -S ./third-party -B ./build_deps/third-party -make -C ./build_deps/third-party -j 10 -rm -rf ./build_deps/third-party \ No newline at end of file