diff --git a/.github/CODE_OF_CONDUCT.md b/.github/CODE_OF_CONDUCT.md new file mode 100644 index 000000000..2db16038a --- /dev/null +++ b/.github/CODE_OF_CONDUCT.md @@ -0,0 +1,134 @@ + +# Contributor Covenant Code of Conduct + +## Our Pledge + +We as members, contributors, and leaders pledge to make participation in our +community a harassment-free experience for everyone, regardless of age, body +size, visible or invisible disability, ethnicity, sex characteristics, gender +identity and expression, level of experience, education, socio-economic status, +nationality, personal appearance, race, caste, color, religion, or sexual +identity and orientation. + +We pledge to act and interact in ways that contribute to an open, welcoming, +diverse, inclusive, and healthy community. + +## Our Standards + +Examples of behavior that contributes to a positive environment for our +community include: + +* Demonstrating empathy and kindness toward other people +* Being respectful of differing opinions, viewpoints, and experiences +* Giving and gracefully accepting constructive feedback +* Accepting responsibility and apologizing to those affected by our mistakes, + and learning from the experience +* Focusing on what is best not just for us as individuals, but for the overall + community + +Examples of unacceptable behavior include: + +* The use of sexualized language or imagery, and sexual attention or advances of + any kind +* Trolling, insulting or derogatory comments, and personal or political attacks +* Public or private harassment +* Publishing others' private information, such as a physical or email address, + without their explicit permission +* Other conduct which could reasonably be considered inappropriate in a + professional setting + +## Enforcement Responsibilities + +Community leaders are responsible for clarifying and enforcing our standards of +acceptable behavior and will take appropriate and fair corrective action in +response to any behavior that they deem inappropriate, threatening, offensive, +or harmful. + +Community leaders have the right and responsibility to remove, edit, or reject +comments, commits, code, wiki edits, issues, and other contributions that are +not aligned to this Code of Conduct, and will communicate reasons for moderation +decisions when appropriate. + +## Scope + +This Code of Conduct applies within all community spaces, and also applies when +an individual is officially representing the community in public spaces. +Examples of representing our community include using an official email address, +posting via an official social media account, or acting as an appointed +representative at an online or offline event. + +## Enforcement + +Instances of abusive, harassing, or otherwise unacceptable behavior may be +reported to the community leaders responsible for enforcement at +`leapfrogai [@] defenseunicorns.com`. + +All complaints will be reviewed and investigated promptly and fairly. + +All community leaders are obligated to respect the privacy and security of the +reporter of any incident. + +## Enforcement Guidelines + +Community leaders will follow these Community Impact Guidelines in determining +the consequences for any action they deem in violation of this Code of Conduct: + +### 1. Correction + +**Community Impact**: Use of inappropriate language or other behavior deemed +unprofessional or unwelcome in the community. + +**Consequence**: A private, written warning from community leaders, providing +clarity around the nature of the violation and an explanation of why the +behavior was inappropriate. A public apology may be requested. + +### 2. Warning + +**Community Impact**: A violation through a single incident or series of +actions. + +**Consequence**: A warning with consequences for continued behavior. No +interaction with the people involved, including unsolicited interaction with +those enforcing the Code of Conduct, for a specified period of time. This +includes avoiding interactions in community spaces as well as external channels +like social media. Violating these terms may lead to a temporary or permanent +ban. + +### 3. Temporary Ban + +**Community Impact**: A serious violation of community standards, including +sustained inappropriate behavior. + +**Consequence**: A temporary ban from any sort of interaction or public +communication with the community for a specified period of time. No public or +private interaction with the people involved, including unsolicited interaction +with those enforcing the Code of Conduct, is allowed during this period. +Violating these terms may lead to a permanent ban. + +### 4. Permanent Ban + +**Community Impact**: Demonstrating a pattern of violation of community +standards, including sustained inappropriate behavior, harassment of an +individual, or aggression toward or disparagement of classes of individuals. + +**Consequence**: A permanent ban from any sort of public interaction within the +community. + +## Attribution + +This Code of Conduct is adapted from the [Contributor Covenant][homepage], +version 2.1, available at +[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. + +Community Impact Guidelines were inspired by +[Mozilla's code of conduct enforcement ladder][Mozilla CoC]. + +For answers to common questions about this code of conduct, see the FAQ at +[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at +[https://www.contributor-covenant.org/translations][translations]. + +[homepage]: https://www.contributor-covenant.org +[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html +[Mozilla CoC]: https://github.com/mozilla/diversity +[FAQ]: https://www.contributor-covenant.org/faq +[translations]: https://www.contributor-covenant.org/translations diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md new file mode 100644 index 000000000..e8672f740 --- /dev/null +++ b/.github/CONTRIBUTING.md @@ -0,0 +1,54 @@ +# Welcome to LeapfrogAI + +Thank you for your interest in LeapfrogAI! + +This document describes the process and requirements for contributing. + +## Developer Experience + +Continuous Delivery is core to our development philosophy. Check out [https://minimumcd.org](https://minimumcd.org) for a good baseline agreement on what that means. + +Specifically: + +- We do trunk-based development (main) with short-lived feature branches that originate from the trunk, get merged into the trunk, and are deleted after the merge +- We don't merge code into main that isn't releasable +- We perform automated testing on all changes before they get merged to main +- Continuous integration (CI) pipeline tests are definitive +- We create immutable release artifacts + +### Developer Workflow + +:key: == Required by automation + +1. Drop a comment in any issue to let everyone know you're working on it and submit a Draft PR (step 4) as soon as you are able. +2. :key: Set up your Git config to GPG sign all commits. [Here's some documentation on how to set it up](https://docs.github.com/en/authentication/managing-commit-signature-verification/signing-commits). You won't be able to merge your PR if you have any unverified commits. +3. Use the [pre-commit](https://pre-commit.com/) hooks to provide localized checks against your new or modified code to catch mistakes before pushing. + + - Pre-commit must be activated in a Python-enabled environment and installed to the local `.git` repository to activate the commit hooks properly + - UDS and Zarf Lints require the [UDS tasks](../tasks.schema.json), [UDS](../uds.schema.json), and [Zarf](<(../zarf.schema.json)>) JSON schemas to be up-to-date with the current UDS CLI version (e.g., v0.14.0) + + ```bash + wget https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/uds.schema.json + wget https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/zarf.schema.json + wget https://raw.githubusercontent.com/defenseunicorns/uds-cli/v0.14.0/tasks.schema.json + ``` + +4. Create a Draft Pull Request as soon as you can, even if it is just 5 minutes after you started working on it. We lean towards working in the open as much as we can. + + > ⚠️ **NOTE:** _:key: We use [Conventional Commit messages](https://www.conventionalcommits.org/) in PR titles so, if you can, use one of `fix:`, `feat:`, `chore:`, `docs:` or similar. If you need help, just use with `wip:` and we'll help with the rest_ + +5. :key: Automated tests will begin based on the paths you have edited in your Pull Request. + + > ⚠️ **NOTE:** _If you are an external third-party contributor, the pipelines won't run until a [CODEOWNER](./CODEOWNERS) approves the pipeline run._ + +6. :key: Be sure to heed the `needs-adr`,`needs-docs`,`needs-tests` labels as appropriate for the PR. Once you have addressed all of the needs, remove the label or request a maintainer to remove it. +7. Once the review is complete and approved, a core member of the project will merge your PR. If you are an external third-party contributor, two core members (CODEOWNERS) of the project will be required to approve the PR. +8. Close the issue if it is fully resolved by your PR. _Hint: You can add "Fixes #XX" to the PR description to automatically close an issue when the PR is merged._ + +### Release Please + +We've chosen Google's [release-please](https://github.com/googleapis/release-please#release-please) as our automated tag and release solution. Below are some basic usage instructions. Read the documentation provided in the link for more advanced usage. + +- Use the conventional commits specification for all PRs that are merged into the `main` branch. +- To specify a specific version, like a patch or minor, you must provide an empty commit like this: `git commit --allow-empty -m "chore: release 0.1.0" -m "Release-As: 0.1.0"` +- Maintain and provide a `secrets.RELEASE_PLEASE_TOKEN` Personal Access Token (PAT) as identified in the GitHub workflow YAML. diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index d35438a69..00236e769 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -7,21 +7,30 @@ assignees: '' --- ### Environment -Device and OS: -App/package versions: -Kubernetes distro being used: -Other: + +1. OS and Architecture: +2. App or Package Name: +2. App or Package Version: +3. Kubernetes Distribution: +4. Kubernetes Version: +5. Other: ### Steps to reproduce + 1. ### Expected result +- + ### Actual Result +- + ### Visual Proof (screenshots, videos, text, etc) -### Severity/Priority +- ### Additional Context + Add any other context or screenshots about the technical debt here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md index cbbc3ddb9..19ea5246d 100644 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ b/.github/ISSUE_TEMPLATE/feature_request.md @@ -19,7 +19,9 @@ assignees: '' **Then** [something happens] ### Describe alternatives you've considered + (optional) A clear and concise description of any alternative solutions or features you've considered. ### Additional context + Add any other context or screenshots about the feature request here. diff --git a/.github/ISSUE_TEMPLATE/tech_debt.md b/.github/ISSUE_TEMPLATE/tech_debt.md index 729bf25ce..b718b9fce 100644 --- a/.github/ISSUE_TEMPLATE/tech_debt.md +++ b/.github/ISSUE_TEMPLATE/tech_debt.md @@ -7,10 +7,13 @@ assignees: '' --- ### Describe what should be investigated or refactored + A clear and concise description of what should be changed/researched. Ex. This piece of the code is not DRY enough [...] ### Links to any relevant code -(optional) i.e. - https://github.com/defenseunicorns/uds-software-factory/blob/main/README.md?plain=1#L1 + +(optional) i.e. - ### Additional context -Add any other context or screenshots about the technical debt here. \ No newline at end of file + +Add any other context or screenshots about the technical debt here. diff --git a/.github/SECURITY.md b/.github/SECURITY.md new file mode 100644 index 000000000..6a4387f85 --- /dev/null +++ b/.github/SECURITY.md @@ -0,0 +1,9 @@ +# Security Policy + +## Supported Versions + +As the LeapfrogAI has not yet reached v1.0.0, only the current latest minor release is supported. + +## Reporting a Vulnerability + +Please email `leapfrogai [@] defenseunicorns.com` to report a vulnerability for more details. If you are unable to disclose details via email, please let us know and we can coordinate alternate communications. diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md new file mode 100644 index 000000000..8074a83da --- /dev/null +++ b/.github/pull_request_template.md @@ -0,0 +1,16 @@ +## Description + +### BREAKING CHANGES + +### CHANGES + +## Related Issue + +Fixes # + +Relates to # + +## Checklist before merging + +- [ ] Tests, documentation, ADR added or updated as needed +- [ ] Followed the [Contributor Guide Steps](https://github.com/defenseunicorns/leapfrogai/blob/main/.github/CONTRIBUTING.md) diff --git a/.github/release-please-config.json b/.github/release-please-config.json index 2c61e1446..6cf564cdb 100644 --- a/.github/release-please-config.json +++ b/.github/release-please-config.json @@ -40,6 +40,11 @@ "type": "generic", "path": "**/zarf-config.yaml", "glob": true + }, + { + "type": "generic", + "path": "**/hugo.toml", + "glob": true } ] } diff --git a/.github/workflows/markdown-lint.yaml b/.github/workflows/markdown-lint.yaml new file mode 100644 index 000000000..867a6ea2d --- /dev/null +++ b/.github/workflows/markdown-lint.yaml @@ -0,0 +1,43 @@ +name: Markdown Lint + +on: + push: + branches: + - "main" + paths: + - README.md + - .github/*.md + - docs/**/*.md + - ".github/workflows/markdown-lint.yaml" + pull_request: + branches: + - "main" + paths: + - README.md + - .github/*.md + - docs/**/*.md + - ".github/workflows/markdown-lint.yaml" + +concurrency: + group: markdown-lint-${{ github.ref }} + cancel-in-progress: true + +jobs: + markdown-lint: + runs-on: ubuntu-latest + name: Lint Markdown Files + + permissions: + contents: read + + steps: + - name: Checkout Repo + uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 + + - uses: DavidAnson/markdownlint-cli2-action@b4c9feab76d8025d1e83c653fa3990936df0e6c8 #v16.0.0 + with: + config: "./.markdownlint.json" + globs: | + README.md + .github/*.md + docs/**/*.md diff --git a/.markdownlint.json b/.markdownlint.json new file mode 100644 index 000000000..9a9ca0851 --- /dev/null +++ b/.markdownlint.json @@ -0,0 +1,16 @@ +{ + "MD053": false, + "MD034": false, + "MD013": false, + "MD029": false, + "MD041": false, + "MD033": false, + "MD004": false, + "MD024": false, + "MD036": false, + "MD028": false, + "MD049": false, + "MD007": false, + "MD022": false, + "MD025": false +} diff --git a/.markdownlintignore b/.markdownlintignore new file mode 100644 index 000000000..2066086cd --- /dev/null +++ b/.markdownlintignore @@ -0,0 +1,5 @@ +LICENSE +CHANGELOG.md +CODEOWNERS +.github/ISSUE_TEMPLATE/ +.github/pull_request_template.md diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 3e6df886e..24785cab5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,32 +1,78 @@ repos: - # Generic pre-commit checks + ################ + # GENERAL CHECKS + ################ - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.0.1 hooks: - id: check-added-large-files + name: Large Files Check args: ["--maxkb=1024"] + - id: check-merge-conflict + name: Check for Upstream Merge Conflicts + - id: detect-aws-credentials + name: Check AWS Credentials args: - "--allow-missing-credentials" + - id: detect-private-key + name: Check Private Keys + + - id: check-merge-conflict + name: Merge Conflict Resolution Check + - id: end-of-file-fixer + name: Newline EOF Checker + - id: fix-byte-order-marker + name: Fix UTF-8 byte order marker + - id: trailing-whitespace + name: Whitespace Cleaning Check args: [--markdown-linebreak-ext=md] - # Python linting and formatting + - repo: https://github.com/scop/pre-commit-shfmt + rev: v3.8.0-1 + hooks: + - id: shfmt + name: Shell Script Format + + - repo: https://github.com/gitleaks/gitleaks + rev: v8.18.0 + hooks: + - id: gitleaks + name: GitLeaks Checks + + - repo: https://github.com/sirosen/fix-smartquotes + rev: 0.2.0 + hooks: + - id: fix-smartquotes + name: Fix Quotes + + ############ + # CODE LINT + ############ + + - repo: https://github.com/DavidAnson/markdownlint-cli2 + rev: v0.12.1 + hooks: + - id: markdownlint-cli2 + name: Markdown Lint + + - repo: local + hooks: + - id: eslint + name: ESLint + language: system + entry: sh -c 'npm --prefix src/leapfrogai_ui/ run lint' + files: \.(js|jsx|ts|tsx|svelte|cjs|mjs)$ # *.js, *.jsx, *.ts, *.tsx, *.svelte, *.cjs, *.mjs + - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.3.4 hooks: - id: ruff # Run the linter. + name: Ruff Lint - id: ruff-format # Run the formatter. - - # Local Eslint w/ Prettier - - repo: local - hooks: - - id: eslint - name: eslint - language: system - entry: sh -c 'npm --prefix src/leapfrogai_ui/ run lint' - files: \.(js|jsx|ts|tsx|svelte|cjs|mjs)$ # *.js, *.jsx, *.ts, *.tsx, *.svelte, *.cjs, *.mjs + name: Ruff Format diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index b764d1ccd..000000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,66 +0,0 @@ -# Contributing to LeapfrogAI (LFAI) - -First off, thanks so much for wanting to help out! :tada: - -This document describes the steps and requirements for contributing a bug fix or feature in a Pull Request to all LeapfrogAI products! If you have any questions about the process or the pull request you are working on feel free to reach out in the [LFAI Discord Channel](https://discord.gg/s2Ja5cmZRQ). - -## Developer Experience - -Continuous Delivery is core to our development philosophy. Check out [https://minimumcd.org](https://minimumcd.org/) for a good baseline agreement on what that means. - -Specifically: - -- We do trunk-based development (`main`) with short-lived feature branches that originate from the trunk, get merged into the trunk, and are deleted after the merge -- We don't merge code into `main` that isn't releasable -- We perform automated testing on all changes before they get merged to `main` -- We create ADRs for all architectural decisions -- Merges are always squashed and use [conventional commits](https://www.conventionalcommits.org/en/v1.0.0/) -- We create immutable release artifacts - -### Developer Workflow - -:key: == Required by automation - -1. Look at the next due [issue] and pick an issue that you want to work on. If you don't see anything that interests you, create an issue and assign it to yourself. -2. Drop a comment in the issue to let everyone know you're working on it and submit a Draft PR (step 4) as soon as you are able. If you have any questions as you work through the code, reach out in the [LFAI Discord Channel](https://discord.gg/s2Ja5cmZRQ). -3. :key: Set up your Git config to GPG sign all commits. [Here's some documentation on how to set it up](https://docs.github.com/en/authentication/managing-commit-signature-verification/signing-commits). You won't be able to merge your PR if you have any unverified commits. -4. Create a Draft Pull Request as soon as you can, even if it is just 5 minutes after you started working on it. We lean towards working in the open as much as we can. If you're not sure what to put in the PR description, just put a link to the issue you're working on. If you're not sure what to put in the PR title, just put "WIP" (Work In Progress) and we'll help you out with the rest. -5. :key: Automated tests will begin based on the paths you have edited in your Pull Request. - > ⚠️ **NOTE:** _If you are an external third-party contributor, the pipelines won't run until a [CODEOWNER](https://github.com/zarf-dev/zarf/blob/main/CODEOWNERS) approves the pipeline run._ -6. :key: Be sure to use the [needs-adr,needs-docs,needs-tests](https://github.com/zarf-dev/zarf/labels?q=needs) labels as appropriate for the PR. Once you have addressed all of the needs, remove the label. -7. Once the review is complete and approved, a core member of the LeapfrogAI project will merge your PR. If you are an external third-party contributor, two core members of the zarf project will be required to approve the PR. -8. Close the issue if it is fully resolved by your PR. _Hint: You can add "Fixes #XX" to the PR description to automatically close an issue when the PR is merged._ - -## Testing - -TBD - -## Documentation - -### Updating Our Documentation - -Under construction. - -### Architecture Decision Records (ADR) - -We've chosen to use ADRs to document architecturally significant decisions. We primarily use the guidance found in [this article by Michael Nygard](http://thinkrelevance.com/blog/2011/11/15/documenting-architecture-decisions) with a couple of tweaks: - -- The criteria for when an ADR is needed is undefined. The team will decide when the team needs an ADR. -- We will use the tool [adr-tools](https://github.com/npryce/adr-tools) to make it easier on us to create and maintain ADRs. -- We will keep ADRs in the repository under `adr/NNNN-name-of-adr.md`. `adr-tools` is configured with a dotfile to automatically use this directory and format. - -### How to use `adr-tools` - -```bash -# Create a new ADR titled "Use Bisquick for all waffle making" -adr new Use Bisquick for all waffle making - -# Create a new ADR that supersedes a previous one. Let's say, for example, that the previous ADR about Bisquick was ADR number 9. -adr new -s 9 Use scratch ingredients for all waffle making - -# Create a new ADR that amends a previous one. Let's say the previous one was ADR number 15 -adr new -l "15:Amends:Amended by" Use store-bought butter for all waffle making - -# Get full help docs. There are all sorts of other helpful commands that help manage the decision log. -adr help -``` diff --git a/Makefile b/Makefile index bf5442755..3515ff30a 100644 --- a/Makefile +++ b/Makefile @@ -164,7 +164,7 @@ build-gpu: build-supabase build-api build-ui build-vllm build-text-embeddings bu build-all: build-cpu build-gpu ## Build all of the LFAI packages -include tests/make-tests.mk +include tests/Makefile include packages/k3d-gpu/Makefile diff --git a/README.md b/README.md index 92a1b8af6..e60eb4af3 100644 --- a/README.md +++ b/README.md @@ -11,16 +11,13 @@ - [Getting Started](#getting-started) - [Components](#components) - [API](#api) - - [Backends](#backends) - [SDK](#sdk) - - [User Interface](#user-interface) - - [Repeater](#repeater) - - [Image Hardening](#image-hardening) + - [UI](#ui) + - [Backends](#backends) + - [Repeater](#repeater) - [Usage](#usage) - - [UDS](#uds) - - [UDS Latest](#uds-latest) - - [UDS Dev](#uds-dev) - - [Local Dev](#local-dev) +- [Local Development](#local-development) +- [Contributing](#contributing) - [Community](#community) ## Overview @@ -39,11 +36,24 @@ Large Language Models (LLMs) are a powerful resource for AI-driven decision maki - **Mission Integration**: By hosting your own LLM, you have the ability to customize the model's parameters, training data, and more, tailoring the AI to your specific needs. +## Demo Video + + + 2 minute demo of features of LeapfrogAI + + +LeapfrogAI, built on top of [Unicorn Delivery Service (UDS)](https://github.com/defenseunicorns/uds-core), which includes several features including: + +- **Single Sign-On** +- **Non-proprietary API Compatible with OpenAI's API** +- **Retrieval Augmented Generation (RAG)** +- **And More!** + ## Structure -The LeapfrogAI repository follows a monorepo structure based around an [API](#api) with each of the [components](#components) included in a dedicated `packages` directory. Each of these package directories contains the source code for each component as well as the deployment infrastructure. The UDS bundles that handle the development and latest deployments of LeapfrogAI are in the `uds-bundles` directory. The structure looks as follows: +The LeapfrogAI repository follows a monorepo structure based around an [API](#api) with each of the [components](#components) included in a dedicated `packages` directory. The UDS bundles that handle the development and latest deployments of LeapfrogAI are in the `uds-bundles` directory. The structure looks as follows: -```shell +```bash leapfrogai/ ├── src/ │ ├── leapfrogai_api/ # source code for the API @@ -52,7 +62,7 @@ leapfrogai/ ├── packages/ │ ├── api/ # deployment infrastructure for the API │ ├── llama-cpp-python/ # source code & deployment infrastructure for the llama-cpp-python backend -│ ├── repeater/ # source code & deployment infrastructure for the repeater model backend +│ ├── repeater/ # source code & deployment infrastructure for the repeater model backend │ ├── supabase/ # deployment infrastructure for the Supabase backend and postgres database │ ├── text-embeddings/ # source code & deployment infrastructure for the text-embeddings backend │ ├── ui/ # deployment infrastructure for the UI @@ -69,81 +79,78 @@ leapfrogai/ ## Getting Started -The preferred method for running LeapfrogAI is a local [Kubernetes](https://kubernetes.io/) deployment using [UDS](https://github.com/defenseunicorns/uds-core). Refer to the [Quick Start](https://docs.leapfrog.ai/docs/local-deploy-guide/quick_start/) section of the LeapfrogAI documentation site for instructions on this type of deployment. +The preferred method for running LeapfrogAI is a local [Kubernetes](https://kubernetes.io/) deployment using [UDS](https://github.com/defenseunicorns/uds-core). + +Please refer to the [Quick Start](https://docs.leapfrog.ai/docs/local-deploy-guide/quick_start/) section of the LeapfrogAI documentation website for system requirements and instructions. ## Components ### API -LeapfrogAI provides an API that closely matches that of OpenAI's. This feature allows tools that have been built with OpenAI/ChatGPT to function seamlessly with a LeapfrogAI backend. - -### Backends - -LeapfrogAI provides several backends for a variety of use cases. - -> Available Backends: -> | Backend | AMD64 Support | ARM64 Support | Cuda Support | Docker Ready | K8s Ready | Zarf Ready | -> | --- | --- | --- | --- | --- | --- | --- | -> | [llama-cpp-python](packages/llama-cpp-python/) | ✅ | 🚧 | ✅ | ✅ | ✅ | ✅ | -> | [whisper](packages/whisper/) | ✅ | 🚧 | ✅ | ✅ | ✅ | ✅ | -> | [text-embeddings](packages/text-embeddings/) | ✅ | 🚧 | ✅ | ✅ | ✅ | ✅ | -> | [vllm](packages/vllm/) | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | +LeapfrogAI provides an [API](src/leapfrogai_api/) that closely matches that of OpenAI's. This feature allows tools that have been built with OpenAI/ChatGPT to function seamlessly with a LeapfrogAI backend. ### SDK -The LeapfrogAI [SDK](src/leapfrogai_sdk/) provides a standard set of protobuff and python utilities for implementing backends and gRPC. +The LeapfrogAI [SDK](src/leapfrogai_sdk/) provides a standard set of protobufs and Python utilities for implementing backends with gRPC. -### User Interface +### UI -LeapfrogAI provides a [User Interface](src/leapfrogai_ui/) with support for common use-cases such as chat, summarization, and transcription. +LeapfrogAI provides a [UI](src/leapfrogai_ui/) with support for common use-cases such as general chat and "Q&A with your documents". -### Repeater +### Backends + +LeapfrogAI provides several backends for a variety of use cases. Below is the backends support and compatibility matrix: -The [repeater](packages/repeater/) "model" is a basic "backend" that parrots all inputs it receives back to the user. It is built out the same way all the actual backends are and it primarily used for testing the API. +| Backend | AMD64 | ARM64 | CUDA | Docker | Kubernetes | UDS | +| ---------------------------------------------- | ------- | ------- | ------ | ------ | ---------- | ------- | +| [llama-cpp-python](packages/llama-cpp-python/) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [whisper](packages/whisper/) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [text-embeddings](packages/text-embeddings/) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [vllm](packages/vllm/) | ✅ | ❌[^1] | ✅ | ✅ | ✅ | ✅ | -### Image Hardening +[^1]: vLLM requires a CUDA-enabled PyTorch built for ARM64, which is not available via pip or conda -> GitHub Repo: -> -> - [leapfrogai-images](https://github.com/defenseunicorns/leapfrogai-images) +#### Repeater -LeapfrogAI leverages Chainguard's [apko](https://github.com/chainguard-dev/apko) to harden base python images - pinning Python versions to the latest supported version by the other components of the LeapfrogAI stack. +The [repeater](packages/repeater/) "model" is a basic "backend" that parrots all inputs it receives back to the user. It is built out the same way all the actual backends are and it is primarily used for testing the API. ## Usage -### UDS +To build a LeapfrogAI UDS bundle and deploy it, please refer to the [LeapfrogAI Documentation Website](https://docs.leapfrog.ai/docs/). In the documentation website, you'll find system requirements and instructions for all things LeapfrogAI that aren't associated to local development and contributing. -LeapfrogAI can be deployed and run locally via UDS and Kubernetes, built out using [Zarf](https://zarf.dev) packages. See the [Quick Start](https://docs.leapfrog.ai/docs/local-deploy-guide/quick_start/#prerequisites) for a list of prerequisite packages that must be installed first. +For contributing and local deployment and development for each component in a local Python or Node.js environment please continue on to the [next section](#local-development). -Prior to deploying any LeapfrogAI packages, a UDS Kubernetes cluster must be deployed using the most recent k3d bundle: +## Local Development -```sh -make create-uds-cpu-cluster -``` +> [!NOTE] +> Please start with the [LeapfrogAI documentation website](https://docs.leapfrog.ai/docs/local-deploy-guide/) prior to attempting local development -#### UDS Latest +Each of the LeapfrogAI components can also be run individually outside of a Kubernetes or Containerized environment. This is useful when testing changes to a specific component, but will not assist in a full deployment of LeapfrogAI. Please refer to the [above section](#usage) for deployment instructions. Please refer to the [next section](#contributing) for rules on contributing to LeapfrogAI. -This type of deployment pulls the most recent package images and is the most stable way of running a local LeapfrogAI deployment. These instructions can be found on the [LeapfrogAI Docs](https://docs.leapfrog.ai/docs/) site. +**_First_** refer to the [DEVELOPMENT.md](docs/DEVELOPMENT.md) document for general development details. -#### UDS Dev +**_Then_** refer to the linked READMEs for each individual sub-directory's local development instructions. -If you want to make some changes to LeapfrogAI before deploying via UDS (for example in a dev environment), follow the [UDS Dev Instructions](/uds-bundles/dev/README.md). +- [SDK](src/leapfrogai_sdk/README.md)[^2] +- [API](packages/api/README.md)[^3] +- [UI](packages/ui/README.md)[^3] +- [LLaMA C++ Python](packages/llama-cpp-python/README.md) +- [vLLM](packages/vllm/README.md) +- [Supabase](packages/supabase/README.md) +- [Text Embeddings](packages/text-embeddings/README.md) +- [Faster Whisper](packages/whisper/README.md) +- [Repeater](packages/repeater/README.md) +- [Tests](tests/README.md) +[^2]: The SDK is not a functionally independent unit, and only becomes a functional unit when combined and packaged with the API and Backends as a dependency. -### Local Dev +[^3]: Please be aware that the API and UI have artifacts under 2 sub-directories. The sub-directories related to `packages/` are focused on the Zarf packaging and Helm charts, whereas the sub-directories related to `src/` contains the actual source code and development instructions. -Each of the LFAI components can also be run individually outside of a Kubernetes environment via local development. This is useful when testing changes to a specific component, but will not assist in a full deployment of LeapfrogAI. Please refer to the above sections for deployment instructions. +## Contributing -Please refer to the linked READMEs for each individual packages local development instructions: +All potential and current contributors must ensure that they have read the [Contributing documentation](.github/CONTRIBUTING.md), [Security Policies](.github/SECURITY.md) and [Code of Conduct](.github/CODE_OF_CONDUCT.md) prior to opening an issue or pull request to this repository. -- [API](/src/leapfrogai_api/README.md) -- [llama-cpp-python](/packages/llama-cpp-python/README.md) -- [repeater](/packages/repeater/README.md) -- [supabase](/packages/supabase/README.md) -- [text-embeddings](/packages/text-embeddings/README.md) -- [ui](/src/leapfrogai_ui/README.md) -- [vllm](/packages/vllm/README.md) -- [whisper](/packages/whisper/README.md) +When submitting an issue or opening a PR, please first ensure that you have searched your potential issue or PR against the existing or closed issues and PRs. Perceived duplicates will be closed, so please reference and differentiate your contributions from tangential or similar issues and PRs. ## Community @@ -162,4 +169,4 @@ LeapfrogAI is supported by a community of users and contributors, including: [![Defense Unicorns logo](/docs/imgs/user-logos/defense-unicorns.png)](https://defenseunicorns.com)[![Beast Code logo](/docs/imgs/user-logos/beast-code.png)](https://beast-code.com)[![Hypergiant logo](/docs/imgs/user-logos/hypergiant.png)](https://hypergiant.com)[![Pulze logo](/docs/imgs/user-logos/pulze.png)](https://pulze.ai) -*Want to add your organization or logo to this list? [Open a PR!](https://github.com/defenseunicorns/leapfrogai/edit/main/README.md)* +_Want to add your organization or logo to this list? [Open a PR!](https://github.com/defenseunicorns/leapfrogai/edit/main/README.md)_ diff --git a/docs/DEVELOPMENT.md b/docs/DEVELOPMENT.md new file mode 100644 index 000000000..06fdd8255 --- /dev/null +++ b/docs/DEVELOPMENT.md @@ -0,0 +1,324 @@ +# Development + +> [!IMPORTANT] +> Please read the entirety of the root [README.md](../README.md) and the [LeapfrogAI documentation website](https://docs.leapfrog.ai/docs/local-deploy-guide/quick_start/) prior to reading this document. Also, please refer to the [CONTRIBUTING.md](../.github/CONTRIBUTING.md) for rules on contributing to the LeapfrogAI project. + +The purpose of this document is to describe how to run a development loop on the LeapfrogAI tech stack. Specifics for each component are within the sub-directories identified in the root [README.md](../README.md). + +## Local Development + +Please first see the pre-requisites listed on the LeapfrogAI documentation website's [Requirements](https://docs.leapfrog.ai/docs/local-deploy-guide/requirements/) and [Dependencies](https://docs.leapfrog.ai/docs/local-deploy-guide/dependencies/), before going to each component's subdirectory README + +## PyEnv + +It is **_HIGHLY RECOMMENDED_** that PyEnv be installed on your machine, and a new virtual environment is created for every new development branch. + +Follow the installation instructions outlined in the [pyenv](https://github.com/pyenv/pyenv?tab=readme-ov-file#installation) repository to install Python 3.11.6: + + ```bash + # install the correct python version + pyenv install 3.11.6 + + # create a new virtual environment named "leapfrogai" + pyenv virtualenv 3.11.6 leapfrogai + + # activate the virtual environment + pyenv activate leapfrogai + ``` + +If your installation process completes successfully but indicates missing packages such as `sqlite3`, execute the following command to install the required packages then proceed with the reinstallation of Python 3.11.6: + + ```bash + sudo apt-get install build-essential zlib1g-dev libffi-dev \ + libssl-dev libbz2-dev libreadline-dev libsqlite3-dev \ + liblzma-dev libncurses-dev + ``` + +## UDS CLI Aliasing + +Below are instructions for adding UDS CLI aliases that are useful for deployments that occur in an air-gap where the UDS CLI binary is available to the engineer. + +For general CLI UX, put the following in your shell configuration (e.g., `/root/.bashrc`, `~/.zshrc`): + +```bash +alias k="uds zarf tools kubectl" +alias kubectl="uds zarf tools kubectl" +alias zarf='uds zarf' +alias k9s='uds zarf tools monitor' +alias udsclean="uds zarf tools clear-cache && rm -rf ~/.uds-cache && rm -rf /tmp/zarf-*" +``` + +For fulfilling `kubectl` binary requirements necessary for running some of the _optional_ deployment helper scripts and for full functionality within `uds zarf tools monitor`: + +```bash +touch /usr/local/bin/kubectl +echo -e '#!/bin/bash\nuds zarf tools kubectl "$@"' > /usr/local/bin/kubectl +chmod +x /usr/local/bin/kubectl +``` + +## Makefiles + +Many of the directories and sub-directories within this project contain Make targets that can be executed to simplify repetitive command-line tasks. + +Please refer to each Makefile for more arguments and details on what each target does and is dependent on. + +## Environment Variables + +Be wary of `*config*.yaml` or `.env*` files that are in individual components of the stack. The component's README will usually tell the developer when to fill them out or supply environment variables to a script. + +For example, the LeapfrogAI API requires a `config.yaml` be supplied when spun up locally. Use the `config.example.yaml` as an example, and make sure the [ports chosen for applicable backends do not conflict on localhost](#port-conflicts). + +## Package Development + +If you don't want to [build an entire bundle](#bundle-development), or you want to "dev-loop" on a single package in an existing [UDS Kubernetes cluster](../packages/k3d-gpu/README.md) you can do so by performing the following. + +For example, this is how you build and (re)deploy a local DEV version of a package: + +```bash +# if package is already in the cluster, and you are deploying a new one +uds zarf package remove leapfrogai-api --confirm +uds zarf tools registry prune --confirm + +# create and deploy the new package +LOCAL_VERSION=dev REGISTRY_PORT=5000 ARCH=amd64 make build-api +LOCAL_VERSION=dev REGISTRY_PORT=5000 ARCH=amd64 make deploy-api +``` + +For example, this is how you pull and deploy a LATEST version of a package: + +```bash +# pull and deploy latest versions +uds zarf package pull oci://ghcr.io/defenseunicorns/leapfrogai/leapfrogai-api:latest -a amd64 +uds zarf package deploy zarf-package-*.tar.zst --confirm +``` + +## Bundle Development + +1. Install all the necessary package creation dependencies: + + ```bash + python -m pip install "hugging_face[cli,hf_transfer]" "transformers[torch]" ctranslate2 + ``` + +2. Build all of the packages you need at once with **ONE** of the following Make targets: + + ```bash + LOCAL_VERSION=dev ARCH=amd64 make build-cpu # ui, api, llama-cpp-python, text-embeddings, whisper, supabase + # OR + LOCAL_VERSION=dev ARCH=amd64 make build-gpu # ui, api, vllm, text-embeddings, whisper, supabase + # OR + LOCAL_VERSION=dev ARCH=amd64 make build-all # all of the components + ``` + + **OR** + + You can build components individually using the following Make targets: + + ```bash + LOCAL_VERSION=dev ARCH=amd64 make build-ui + LOCAL_VERSION=dev ARCH=amd64 make build-api + LOCAL_VERSION=dev ARCH=amd64 make build-supabase + LOCAL_VERSION=dev ARCH=amd64 make build-vllm # if you have NVIDIA GPUs (AMR64 not supported) + LOCAL_VERSION=dev ARCH=amd64 make build-llama-cpp-python # if you have CPU only + LOCAL_VERSION=dev ARCH=amd64 make build-text-embeddings + LOCAL_VERSION=dev ARCH=amd64 make build-whisper + ``` + +3. Create the UDS bundle, modifying the `uds-config.yaml` as required: + + ```bash + cd uds-bundles/dev/ + uds create . --confirm + ``` + +4. Deploy the UDS bundle to an existing [UDS Kubernetes cluster](../packages/k3d-gpu/README.md): + + ```bash + cd uds-bundles/dev/ + uds deploy --confirm + ``` + +### MacOS Specifics + +To run the same commands in MacOS, you will need to prepend your command with a couple of env vars like so: + +**All Macs:** `REG_PORT=5001` + +**Apple Silicon (M1/M2/M3/M4 series) Macs:** `ARCH=arm64` + +To demonstrate what this would look like for an Apple Silicon Mac: + +``` shell +REG_PORT=5001 ARCH=arm64 LOCAL_VERSION=dev make build-cpu +``` + +To demonstrate what this would look like for an older Intel Mac: + +``` shell +REG_PORT=5001 ARCH=arm64 LOCAL_VERSION=dev make build-cpu +``` + +## Access + +All LeapfrogAI components exposed as `VirtualService`resources within a [UDS Kubernetes cluster](../packages/k3d-gpu/README.md) can be accessed without port-forwarding if [UDS Core Slim Dev](../packages/k3d-gpu/README.md) is installed with LeapfrogAI packages. + +For example, when developing the API and you need access to Supabase, you can point your locally running API to the in-cluster Supabase by setting the Supabase base URL to the in-cluster domain (https://supabase-kong.uds.dev). + +The preferred method of testing changes is to fully deploy something to a cluster and run local smoke tests as needed. The GitHub workflows will run all integration and E2E test suites. + +### Supabase + +Supabase is a special case when spun up inside of a UDS Kubernetes cluster. All of the bitnami Supabase components are served through the Kong service mesh, which is exposed as https://supabase-kong.uds.dev through our Istio tenant gateway. All of the Make commands, and our UI and API, correctly route to the right endpoint for interacting with each sub-component of Supabase. The UI and API use the `supabase` Typescript or Python package to interact with Supabase without issue. + +Although not recommended, below are example endpoints for direct interaction with Supabase sub-components is as follows: + +- https://supabase-kong.uds.dev/auth/v1/* -> to access auth endpoints +- https://supabase-kong.uds.dev/rest/v1/ -> for postgres + +We highly recommend using the published `supabase` packages, or interacting with Supabase via the LeapfrogAI API or UI. Go to https://leapfrogai-api.uds.dev/docs to see the exposed Supabase sub-component routes under the `leapfrogai` namespace / routes. + +### Backends + +The following sections discuss the nuances of developing on or with the LeapfrogAI model backends. + +#### Locally + +Backends can also be run locally as Python applications. See each model backend's README in the `packages/` directory for more details on running each in development mode. + +#### Cluster + +The model backends are the only components within the LeapfrogAI stack that are not readily accessible via a `VirtualService`. These must be port-forwarded if a user wants to test a local deployment of the API against an in-cluster backend. + +For example, the following bash script can be used to setup CPU RAG between a deployed UDS Kubernetes cluster and a locally running LeapfrogAI API: + +```bash +#!/bin/bash + +# Function to kill all background processes when the script exits or is interrupted +cleanup() { + echo "Cleaning up..." + kill $PID1 $PID2 +} + +# Set environment variables +export SUPABASE_URL="https://supabase-kong.uds.dev" +export SUPABASE_ANON_KEY=$(kubectl get secret supabase-bootstrap-jwt -n leapfrogai -o jsonpath='{.data.anon-key}' | base64 --decode) + +# Trap SIGINT (Ctrl-C) and SIGTERM (termination signal) to call the cleanup function +trap cleanup SIGINT SIGTERM + +# Start Kubectl port-forward services in the background and save their PIDs +# Expose the backends at different ports to prevent localhost conflict +# Make sure to change the config.yaml in the api source directory +uds zarf tools kubectl port-forward svc/text-embeddings-model -n leapfrogai 50052:50051 & +PID1=$! +uds zarf tools kubectl port-forward svc/llama-cpp-python-model -n leapfrogai 50051:50051 & +PID2=$! + +# Wait for all background processes to finish +wait $PID1 $PID2 +``` + +#### Port Conflicts + +In all cases, port conflicts may arise when outside of a cluster service mesh. As seen in the [Cluster sub-section](#cluster), backends all try to emit at port `50051`; however, on a host machine's localhost, there can only be one on 50051. Using the [Leapfrogai API](../src/leapfrogai_api/config.example.yaml), define the ports at which you plan on making a backend accessible. + +## Troubleshooting + +Occasionally, a package you are trying to re-deploy, or a namespace you are trying to delete, may hang. To workaround this, be sure to check the events and logs of all resources, to include pods, deployments, daemonsets, clusterpolicies, etc. There may be finalizers, Pepr hooks, and etc. causing the re-deployment or deletion to fail. Use the [`k9s`](https://k9scli.io/topics/commands/) and `kubectl` tools that are vendored with UDS CLI, like in the examples below: + +### Clusters + +```bash +# k9s CLI for debugging +uds zarf tools monitor + +# kubectl command for logs +uds zarf tools kubectl logs -l app=api -n leapfrogai --all-containers=true --follow +``` + +To describe node-level data, like resource usage, non-terminated pods, taints, etc. run the following command: + +```bash +uds zarf tools kubectl describe node +``` + +### NVIDIA GPUs + +#### NVML Errors or Missing CUDA Dependencies + +None of the following should ever error or return `unknown version`: + +1. Check if your NVIDIA GPU drivers are installed: + + ```bash + nvidia-smi + ``` + +2. Check the version of your NVIDIA Container Toolkit: + + ```bash + nvidia-ctk --version + ``` + +3. Check the version of your CUDA Toolkit (if compiling vLLM locally): + + ```bash + nvcc --version + ``` + +Try looking at your Docker runtime information and make sure the following returns with several lines of information: + +```bash +docker info | grep "nvidia" +``` + +Try running the CUDA sample tests in the cluster: [CUDA Vector Add](../packages/k3d-gpu/test/cuda-vector-add.yaml). This can be deployed by executing the following on an existing cluster with NVIDIA GPU operator and/or NVIDIA device plugin daemonset installed: + +```bash +uds zarf tools kubectl apply packages/k3d-gpu/test/cuda-vector-add.yaml +``` + +#### Memory Errors or Process Locks + +If you are, + +1. not deploying a fresh cluster or fresh packages (e.g., vLLM is already deployed), or +2. you have a GPU that has other workloads on it (e.g., display) + +then there may not be enough resources to offload the model weights to the NVIDIA GPU. + +To see what host-level processes are on your NVIDIA GPU(s) run the following: + +```bash +nvidia-smi +``` + +To check which pods are sucking up GPUs in particular, you can run the following `yq` command: + +```bash +uds zarf tools kubectl get pods \ +--all-namespaces \ +--output=yaml \ +| uds zarf tools yq eval -o=json ' + ["Pod", "Namespace", "Container", "GPU"] as $header | + [$header] + [ + .items[] | + .metadata as $metadata | + .spec.containers[] | + select(.resources.requests["nvidia.com/gpu"]) | + [ + $metadata.name, + $metadata.namespace, + .name, + .resources.requests["nvidia.com/gpu"] + ] + ]' - \ +| uds zarf tools yq -r '(.[0] | @tsv), (.[1:][] | @tsv)' \ +| column -t -s $'\t' +``` + +When you reinstall or start a new GPU-dependent pod, the previous PID (process) on the GPU may not have been flushed yet. + +1. Scale the previous GPU-dependent pod deployment down to 0, as the current `RollingUpdate` strategy for vLLM relies on back-up/secondary GPUs to be available for a graceful turnover +2. Use `nvidia-smi` to check if the process has been flushed upon Pod termination BEFORE you deploy a new GPU-dependent pod, and if not, use `kill -9 ` to manually flush the process diff --git a/packages/api/README.md b/packages/api/README.md index ea0b6eeb6..3d451decb 100644 --- a/packages/api/README.md +++ b/packages/api/README.md @@ -1,7 +1,32 @@ # LeapfrogAI Python API -A Python API that exposes LLM backends, via FastAPI and gRPC, in the [OpenAI API specification](https://platform.openai.com/docs/api-reference). +A Python API that exposes AI backends, via FastAPI and gRPC, in the [OpenAI API specification](https://platform.openai.com/docs/api-reference). ## Usage -:construction_worker: This documentation is still under construction. :construction_worker: \ No newline at end of file +### Pre-Requisites + +See the LeapfrogAI documentation website for [system requirements](https://docs.leapfrog.ai/docs/local-deploy-guide/requirements/) and [dependencies](https://docs.leapfrog.ai/docs/local-deploy-guide/dependencies/). + +#### Dependent Components + +- [UDS Kubernetes cluster bootstrapped with UDS Core Slim Dev](../k3d-gpu/README.md) for local KeyCloak authentication, Istio Service Mesh, and MetalLB advertisement +- [Supabase](../supabase/README.md) for a vector database to store resulting embeddings in, and user management and authentication +- [Text Embeddings](../text-embeddings/README.md) for RAG +- [LLaMA C++ Python](../llama-cpp-python/README.md) or [vLLM](../vllm/README.md) for completions and chat completions + +### Deployment + +To build and deploy the API Zarf package into an existing [UDS Kubernetes cluster](../k3d-gpu/README.md): + +> [!IMPORTANT] +> Execute the following commands from the root of the LeapfrogAI repository + +```bash +make build-api LOCAL_VERSION=dev +uds zarf package deploy packages/api/zarf-package-leapfrogai-api-*-dev.tar.zst --confirm +``` + +### Local Development + +See the [source code documentation](../../src/leapfrogai_api/README.md) for running the API from the source code for local Python environment development. diff --git a/packages/k3d-gpu/README.md b/packages/k3d-gpu/README.md index dbe1e534a..1a67e353b 100644 --- a/packages/k3d-gpu/README.md +++ b/packages/k3d-gpu/README.md @@ -1,28 +1,39 @@ # K3D GPU -Prepares `k3s` + `nvidia/cuda` base image that enables a K3D cluster to have access to your host machine's NVIDIA, CUDA-capable GPU(s). +Prepares a `k3s` + `nvidia/cuda` base image that enables a K3D cluster to utilize your host machine's NVIDIA, CUDA-capable GPU(s). -## Pre-Requisites - -* Docker: https://www.docker.com/ -* K3D: https://k3d.io/ -* UDS-CLI: https://github.com/defenseunicorns/uds-cli -* Modern NVIDIA GPU with CUDA cores and drivers must be present. Additionally, the CUDA toolkit and NVIDIA container toolkit must be installed. +This is for development and demonstration purposes, and should not be used to deploy LeapfrogAI in a production environment. ## Usage -Check out the Make targets for the various options. +### Pre-Requisites -### Local +All system requirements and pre-requisites from the [LeapfrogAI documentation website](https://docs.leapfrog.ai/docs/local-deploy-guide/quick_start/). -```shell -make build-k3d-gpu # build the image +### Deployment + +> [!NOTE] +> The following Make targets can be executed from the root of the LeapfrogAI repository or within this sub-directory. +To deploy a new K3d cluster with [UDS Core Slim Dev](https://github.com/defenseunicorns/uds-core#uds-package-development), use one of the following Make targets. + +```bash make create-uds-gpu-cluster # create a uds cluster equipped with the k3d-gpu image make test-uds-gpu-cluster # deploy a test gpu pod to see if everything is working ``` +### Local Development + +> [!NOTE] +> The following Make targets can be executed from the root of the LeapfrogAI repository or within this sub-directory + +To build **just** the K3s CUDA image for container debugging, use the following Make target. + +```bash +make build-k3d-gpu # build the image +``` + ## References * https://k3d.io/v5.7.2/usage/advanced/cuda/ diff --git a/packages/llama-cpp-python/Makefile b/packages/llama-cpp-python/Makefile index 780f8c36a..b7ab569eb 100644 --- a/packages/llama-cpp-python/Makefile +++ b/packages/llama-cpp-python/Makefile @@ -1,7 +1,6 @@ install: python -m pip install ../../src/leapfrogai_sdk - python -m pip install -e . + python -m pip install -e ".[dev]" dev: - make install python -m leapfrogai_sdk.cli --app-dir=. main:Model diff --git a/packages/llama-cpp-python/README.md b/packages/llama-cpp-python/README.md index 9aed7f7c7..24917fbcb 100644 --- a/packages/llama-cpp-python/README.md +++ b/packages/llama-cpp-python/README.md @@ -1,26 +1,20 @@ -# LeapfrogAI llama-cpp-python Backend +# LeapfrogAI LLaMA C++ Python Backend -A LeapfrogAI API-compatible [llama-cpp-python](https://github.com/abetlen/llama-cpp-python) w wrapper for quantized and un-quantized model inferencing across CPU infrastructures. +A LeapfrogAI API-compatible [llama-cpp-python](https://github.com/abetlen/llama-cpp-python) wrapper for quantized and un-quantized model inferencing across CPU infrastructures. +## Usage +### Pre-Requisites -See [instructions](#instructions) to get the backend up and running. Then, use the [LeapfrogAI API server](https://github.com/defenseunicorns/leapfrogai-api) to interact with the backend. +See the LeapfrogAI documentation website for [system requirements](https://docs.leapfrog.ai/docs/local-deploy-guide/requirements/) and [dependencies](https://docs.leapfrog.ai/docs/local-deploy-guide/dependencies/). -## Instructions +#### Dependent Components -The instructions in this section assume the following: - -1. Properly installed and configured Python 3.11.x, to include its development tools -2. The LeapfrogAI API server is deployed and running - -The following are additional assumptions for GPU inferencing: - -3. You have properly installed one or more NVIDIA GPUs and GPU drivers -4. You have properly installed and configured the [cuda-toolkit](https://developer.nvidia.com/cuda-toolkit) and [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/index.html) +- [LeapfrogAI API](../api/README.md) for a fully RESTful application ### Model Selection -The default model that comes with this backend in this repository's officially released images is a [4-bit quantization of the Synthia-7b model](https://huggingface.co/TheBloke/SynthIA-7B-v2.0-GPTQ). +The default model that comes with this backend in this repository's officially released images is a [quantization of the Synthia-7b model](https://huggingface.co/TheBloke/SynthIA-7B-v2.0-GPTQ). Models are pulled from [HuggingFace Hub](https://huggingface.co/models) via the [model_download.py](/packages/llama-cpp-python/scripts/model_download.py) script. To change what model comes with the llama-cpp-python backend, set the following environment variables: @@ -30,44 +24,37 @@ FILENAME # eg: "synthia-7b-v2.0.Q4_K_M.gguf" REVISION # eg: "3f65d882253d1f15a113dabf473a7c02a004d2b5" ``` -## Zarf Package Deployment +If you choose a different model, make sure to modify the default [config.yaml](./config.yaml) using the Hugging Face model repository's model files and model card. -To build and deploy just the llama-cpp-python Zarf package (from the root of the repository): +### Deployment -> Deploy a [UDS cluster](/README.md#uds) if one isn't deployed already +To build and deploy the llama-cpp-python backend Zarf package into an existing [UDS Kubernetes cluster](../k3d-gpu/README.md): -```shell +> [!IMPORTANT] +> Execute the following commands from the root of the LeapfrogAI repository + +```bash pip install 'huggingface_hub[cli,hf_transfer]' # Used to download the model weights from huggingface make build-llama-cpp-python LOCAL_VERSION=dev uds zarf package deploy packages/llama-cpp-python/zarf-package-llama-cpp-python-*-dev.tar.zst --confirm ``` -## Run Locally +### Local Development +To run the llama-cpp-python backend locally: -To run the llama-cpp-python backend locally (starting from the root directory of the repository): +> [!IMPORTANT] +> Execute the following commands from this sub-directory -From this directory: ```bash -# Setup Virtual Environment -python -m venv .venv -source .venv/bin/activate -``` +# Install dev and runtime dependencies +make install -```bash -# Install dependencies -python -m pip install src/leapfrogai_sdk -cd packages/llama-cpp-python -python -m pip install ".[dev]" -``` - -```bash # Clone Model -# Supply a REPO_ID, FILENAME and REVISION if a different model is desired +# Supply a REPO_ID, FILENAME and REVISION, as seen in the "Model Selection" section python scripts/model_download.py - mv .model/*.gguf .model/model.gguf -# Start Model Backend -lfai-cli --app-dir=. main:Model +# Start the model backend +make dev ``` diff --git a/packages/llama-cpp-python/pyproject.toml b/packages/llama-cpp-python/pyproject.toml index 521f4c944..8e893f3fe 100644 --- a/packages/llama-cpp-python/pyproject.toml +++ b/packages/llama-cpp-python/pyproject.toml @@ -15,7 +15,7 @@ readme = "README.md" [project.optional-dependencies] dev = [ - "huggingface_hub", + "huggingface_hub[cli,hf_transfer]" ] [tool.pip-tools] diff --git a/packages/repeater/Makefile b/packages/repeater/Makefile new file mode 100644 index 000000000..780f8c36a --- /dev/null +++ b/packages/repeater/Makefile @@ -0,0 +1,7 @@ +install: + python -m pip install ../../src/leapfrogai_sdk + python -m pip install -e . + +dev: + make install + python -m leapfrogai_sdk.cli --app-dir=. main:Model diff --git a/packages/repeater/README.md b/packages/repeater/README.md index 86f362ea5..b11de60d3 100644 --- a/packages/repeater/README.md +++ b/packages/repeater/README.md @@ -1,46 +1,48 @@ # LeapfrogAI Repeater Backend -A LeapfrogAI API-compatible repeater model that simply parrots the input it is provided back to the user. This is primarily used for quick-testing the API. +A LeapfrogAI API-compatible repeater backend that simply parrots the input it is provided back to the user. This is primarily used for quick-testing the API. +The repeater backend is used to verify that the API is able to both load configs for and send inputs to a very simple backend. The repeater backend fulfills this role by returning the input it recieves as output. -# Usage +## Usage -The repeater model is used to verify that the API is able to both load configs for and send inputs to a very simple model. The repeater model fulfills this role by returning the input it recieves as output. +### Pre-Requisites -## Zarf Package Deployment +See the LeapfrogAI documentation website for [system requirements](https://docs.leapfrog.ai/docs/local-deploy-guide/requirements/) and [dependencies](https://docs.leapfrog.ai/docs/local-deploy-guide/dependencies/). -To build and deploy just the repeater Zarf package (from the root of the repository): +#### Dependent Components -> Deploy a [UDS cluster](/README.md#uds) if one isn't deployed already +- Have the LeapfrogAI API deployed, running, and accessible in order to provide a fully RESTful application -```shell +### Deployment + +To build and deploy the repeater backend Zarf package into an existing [UDS Kubernetes cluster](../k3d-gpu/README.md): + +> [!IMPORTANT] +> Execute the following Make targets from the root of the LeapfrogAI repository + +```bash make build-repeater LOCAL_VERSION=dev uds zarf package deploy packages/repeater/zarf-package-repeater-*-dev.tar.zst --confirm ``` -## Local Usage +### Local Development -Here is how to run the repeater model locally to test the API: +To run the repeater backend locally: -It's easiest to set up a virtual environment to keep things clean: -```bash -python -m venv .venv -source .venv/bin/activate -``` +> [!IMPORTANT] +> Execute the following commands from this sub-directory -First install the lfai-repeater project and dependencies. From the root of the project repository: ```bash -pip install src/leapfrogai_sdk -cd packages/repeater -pip install . +# Install dependencies and start the model backend +make dev ``` -Next, launch the repeater model: -```bash -python repeater.py -``` +Now the basic API tests can be run in full with the following commands. + +> [!IMPORTANT] +> Execute the following commands from from the root of the LeapfrogAI repository -Now the basic API tests can be run in full. In a new terminal, starting from the root of the project repository: ```bash export LFAI_RUN_REPEATER_TESTS=true # this is needed to run the tests that require the repeater model, otherwise they get skipped pytest tests/pytest/test_api_auth.py diff --git a/packages/supabase/README.md b/packages/supabase/README.md index 35cdb276f..94472542a 100644 --- a/packages/supabase/README.md +++ b/packages/supabase/README.md @@ -1,50 +1,71 @@ -# Setting up Supabase locally +# Supabase -## Step 1: Create a Zarf package +A comprehensive relational and vector database operator and multi-functional API layer. See the [Supabase documentation](https://supabase.com/docs) and the [Bitnami package](https://bitnami.com/stack/supabase) for more details. -From `leapfrogai/packages/supabase` run `zarf package create` +## Usage -## Step 2: Create the uds bundle +### Pre-Requisites -From `leapfrogai/uds-bundles/dev//` run `uds create` +See the LeapfrogAI documentation website for [system requirements](https://docs.leapfrog.ai/docs/local-deploy-guide/requirements/) and [dependencies](https://docs.leapfrog.ai/docs/local-deploy-guide/dependencies/). +#### Dependent Components -## Step 3: Deploy the UDS bundle or deploy the Zarf package +- [UDS Kubernetes cluster bootstrapped with UDS Core Slim Dev](../k3d-gpu/README.md) for local KeyCloak authentication, Istio Service Mesh, and MetalLB advertisement +- [LeapfrogAI API](../api/README.md) for RESTful interaction +- [Text Embeddings](../text-embeddings/README.md) for vector generation +- [LeapfrogAI UI](../ui/README.md) for a Supabase and API compatible frontend -To deploy only Supabase for UDS bundle run the following from `leapfrogai/uds-bundles/dev//`: -* `uds deploy -p supabase uds-bundle-leapfrogai-*.tar.zst` +### Deployment -To deploy the Zarf package run the following from `leapfrogai/packages/supabase`: -* `uds zarf package deploy zarf-package-supabase-*.tar.zst` +To build and deploy the Supabase Zarf package into an existing [UDS Kubernetes cluster](../k3d-gpu/README.md): -## Step 4: Accessing Supabase +> [!IMPORTANT] +> Execute the following commands from the root of the LeapfrogAI repository -Go to `https://supabase-kong.uds.dev`. The login is `supabase-admin` the password is randomly generated in a cluster secret named `supabase-dashboard-secret` +```bash +make build-supabase LOCAL_VERSION=dev +uds zarf package deploy packages/supabase/zarf-package-supabase-*-dev.tar.zst --confirm +``` -**NOTE:** The `uds.dev` domain is only used for locally deployed LeapfrogAI packages, so this domain will be unreachable without first manually deploying the UDS bundle. +### Accessing Supabase -## Local Supabase Troubleshooting +Go to `https://supabase-kong.uds.dev`. The login username is `supabase-admin`, and the password is randomly generated in a cluster secret named `supabase-dashboard-secret`. Run the following to grab the password in a single-line command: -* If you cannot reach `https://supabase-kong.uds.dev`, check if the `Packages` CRDs and `VirtualServices` contain `supabase-kong.uds.dev`. If they do not, try restarting the `pepr-uds-core-watcher` pod. -* If logging in to the UI through keycloak returns a `500`, check and see if the `sql` migrations have been run in Supabase. - * You can find those in `leapfrogai/src/leapfrogai_ui/supabase/migrations`. They can be run in the studios SQL Editor. -* To obtain a jwt token for testing, create a test user and run the following: -``` -curl -X POST 'https://supabase-kong.uds.dev/auth/v1/token?grant_type=password' \-H "apikey: " \-H "Content-Type: application/json" \-d '{ "email": "", "password": ""}' +```bash +uds zarf tools kubectl get secret -n leapfrogai supabase-dashboard-secret -o json | uds zarf tools yq '.data.password' | base64 -d ``` -By following these steps, you'll have successfully set up Keycloak for your application, allowing secure authentication and authorization for your users. +**NOTE:** The `uds.dev` domain is only used for locally deployed LeapfrogAI packages, so this domain will be unreachable without first manually deploying the UDS bundle. + +## Troubleshooting + +- If you cannot reach `https://supabase-kong.uds.dev`, check if the `Packages` CRDs and `VirtualServices` contain `supabase-kong.uds.dev`. If they do not, try restarting the `pepr-uds-core-watcher` pod. +- If logging in to the UI through keycloak returns a `500`, check and see if the `sql` migrations have been run in Supabase. + - You can find those in `leapfrogai/src/leapfrogai_ui/supabase/migrations` - Migrations can be run in the Supabase studio SQL editor +- To obtain a 1-hour JWT for testing run the following: + + ```bash + # Replace , , and with your desired credentials + # Grab the Supabase Anon Key from the JWT Secret in the UDS Kubernetes cluster and use it with xargs + uds zarf tools kubectl get secret -n leapfrogai supabase-bootstrap-jwt -o json | uds zarf tools yq '.data.anon-key' | base64 -d | xargs -I {} curl -X POST 'https://supabase-kong.uds.dev/auth/v1/signup' \ + -H "apikey: {}" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer {}" \ + -d '{ "email": "", "password": "", "confirmPassword": ""}' + ``` +- Longer term API tokens (30, 60, or 90 days) can be created from the API key workflow within the LeapfrogAI UI +- Longer term API tokens (30 days) can also be created using the [API documentation](../../src/leapfrogai_api/README.md) -# Supabase Migrations +## Supabase Migrations -## Motivation +### Motivation A database migration is the process of modifying a database's schema in a controlled and versioned way. Migrations are used to modify the functionality of a database as its supported applications evolves over time. As time goes on, an application may require new tables, or tables may need new columns/indexes. Migrations allow for smooth changes to be applied to deployed databases, regardless of the current version the application is on. Migrations catalog a history of the database and provide an inherit form of database documentation, as each migration is stored in the Git repository chronologically (and by release). Migrations are automated on new deployments of LeapfrogAI such that all of the migrations (i.e database changes) are applied in order to ensure that the database has the most up to date schema. Migrations can also be run anytime a new version of LeapfrogAI is released, regardless of which version of LeapfrogAI is being updated from. -## Approach +### Approach Migrations are handled using the [Supabase CLI](https://supabase.com/docs/guides/cli/getting-started?queryGroups=platform&platform=linux). The Supabase CLI automatically handles new migrations and keeps track of which migrations have already been run, regardless whether the database instance is brand new or pre-existing. @@ -54,7 +75,7 @@ In order to submit migrations at deploy time, [K8s jobs](https://kubernetes.io/d The K8s jobs themselves simply pull any existing migrations from the remote database within the same cluster, then push up the local migrations. Due to the [schema migrations table](https://supabase.com/docs/reference/cli/usage#supabase-db-push), any migrations that have already been run on the remote database will be skipped, ensuring migrations are not repeated. Since each package's migrations should be separate, a different template is used for each job. -## Managing Migrations +### Managing Migrations Keep the following in mind when adding new migrations: diff --git a/packages/supabase/bitnami-values.yaml b/packages/supabase/bitnami-values.yaml index af26cb2eb..100a485ab 100644 --- a/packages/supabase/bitnami-values.yaml +++ b/packages/supabase/bitnami-values.yaml @@ -1,4 +1,4 @@ -## @section Leapfrog parameters +## @section LeapfrogAI parameters ## Parameters not defined in the upstream chart that are related to LeapfrogAI's specific configuration leapfrogai: package: diff --git a/packages/supabase/chart/templates/uds-package.yaml b/packages/supabase/chart/templates/uds-package.yaml index 8c80894d0..932586690 100644 --- a/packages/supabase/chart/templates/uds-package.yaml +++ b/packages/supabase/chart/templates/uds-package.yaml @@ -4,7 +4,7 @@ metadata: name: {{ .Values.leapfrogai.package.name }} spec: sso: - - name: Leapfrog AI + - name: LeapfrogAI description: Client for logging into Supabase clientId: {{ .Values.leapfrogai.sso.clientId }} redirectUris: diff --git a/packages/text-embeddings/Dockerfile b/packages/text-embeddings/Dockerfile index 9af7c5537..96becc0de 100644 --- a/packages/text-embeddings/Dockerfile +++ b/packages/text-embeddings/Dockerfile @@ -12,7 +12,7 @@ RUN python3.11 -m venv .venv ENV PATH="/leapfrogai/.venv/bin:$PATH" # copy and install all python dependencies -# NOTE: We are copying the leapfrog whl to this filename because installing 'optional extras' from +# NOTE: We are copying the leapfrogai whl to this filename because installing 'optional extras' from # a wheel requires the absolute path to the wheel file (instead of a wildcard whl) COPY --from=sdk /leapfrogai/${SDK_DEST} ${SDK_DEST} COPY packages/text-embeddings packages/text-embeddings diff --git a/packages/text-embeddings/Makefile b/packages/text-embeddings/Makefile new file mode 100644 index 000000000..19600e1d3 --- /dev/null +++ b/packages/text-embeddings/Makefile @@ -0,0 +1,8 @@ +install: + python -m pip install ../../src/leapfrogai_sdk + python -m pip install -e ".[dev]" + + +dev: + make install + python -m leapfrogai_sdk.cli --app-dir=. main:Model diff --git a/packages/text-embeddings/README.md b/packages/text-embeddings/README.md index 09bef42b4..e71256a76 100644 --- a/packages/text-embeddings/README.md +++ b/packages/text-embeddings/README.md @@ -1,41 +1,49 @@ +# LeapfrogAI Text Embeddings Backend +A LeapfrogAI API-compatible text embeddings wrapper for producing embeddings from text content. -# LeapfrogAI llama-cpp-python Backend +## Usage -A LeapfrogAI API-compatible [instructor-xl](https://huggingface.co/hkunlp/instructor-xl) model for creating embeddings across CPU and GPU infrastructures. +### Pre-Requisites +See the LeapfrogAI documentation website for [system requirements](https://docs.leapfrog.ai/docs/local-deploy-guide/requirements/) and [dependencies](https://docs.leapfrog.ai/docs/local-deploy-guide/dependencies/). -# Usage +#### Dependent Components -## Zarf Package Deployment +- [LeapfrogAI API](../api/README.md) for a fully RESTful application +- [Supabase](../supabase/README.md) for a vector database to store resulting embeddings in -To build and deploy just the text-embeddings Zarf package (from the root of the repository): +### Model Selection -> Deploy a [UDS cluster](/README.md#uds) if one isn't deployed already +The default model that comes with this backend in this repository's officially released images is [instructor-xl](https://huggingface.co/hkunlp/instructor-xl). -```shell +### Deployment + +To build and deploy the text-embeddings backend Zarf package into an existing [UDS Kubernetes cluster](../k3d-gpu/README.md): + +> [!IMPORTANT] +> Execute the following commands from the root of the LeapfrogAI repository + +```bash pip install 'huggingface_hub[cli,hf_transfer]' # Used to download the model weights from huggingface make build-text-embeddings LOCAL_VERSION=dev uds zarf package deploy packages/text-embeddings/zarf-package-text-embeddings-*-dev.tar.zst --confirm ``` -## Local Development +### Local Development -To run the text-embeddings backend locally (starting from the root directory of the repository): +To run the text-embeddings backend locally: -```shell -# Setup Virtual Environment if you haven't done so already -python -m venv .venv -source .venv/bin/activate +> [!IMPORTANT] +> Execute the following commands from this sub-directory -# install dependencies -python -m pip install src/leapfrogai_sdk -cd packages/text-embeddings -python -m pip install ".[dev]" +```bash +# Install dev and runtime dependencies +make install -# download the model +# Clone Model python scripts/model_download.py -# start the model backend -python -u main.py +# Start the model backend +make dev ``` diff --git a/packages/ui/README.md b/packages/ui/README.md new file mode 100644 index 000000000..6f9ab9f01 --- /dev/null +++ b/packages/ui/README.md @@ -0,0 +1,33 @@ +# LeapfrogAI UI + +A Svelte UI that provides an easy-to-use frontend for interacting with all components of the LeapfrogAI tech stack. + +## Usage + +### Pre-Requisites + +See the LeapfrogAI documentation website for [system requirements](https://docs.leapfrog.ai/docs/local-deploy-guide/requirements/) and [dependencies](https://docs.leapfrog.ai/docs/local-deploy-guide/dependencies/). + +#### Dependent Components + +- [UDS Kubernetes cluster bootstrapped with UDS Core Slim Dev](../k3d-gpu/README.md) for local KeyCloak authentication, Istio Service Mesh, and MetalLB advertisement +- [LeapfrogAI API](../api/README.md) for OpenAI API-like AI model backend interaction +- [Supabase](../supabase/README.md) for a vector database to store resulting embeddings in, and user management and authentication +- [Text Embeddings](../text-embeddings/README.md) for RAG +- [LLaMA C++ Python](../llama-cpp-python/README.md) or [vLLM](../vllm/README.md) for completions and chat completions + +### Deployment + +To build and deploy the UI Zarf package into an existing [UDS Kubernetes cluster](../k3d-gpu/README.md): + +> [!IMPORTANT] +> Execute the following commands from the root of the LeapfrogAI repository + +```bash +make build-ui LOCAL_VERSION=dev +uds zarf package deploy packages/ui/zarf-package-leapfrogai-ui-*-dev.tar.zst --confirm +``` + +### Local Development + +See the [source code documentation](../../src/leapfrogai_ui/README.md) for running the UI from the source code for local Node environment development. diff --git a/packages/ui/zarf.yaml b/packages/ui/zarf.yaml index 4f6392a53..93ae95896 100644 --- a/packages/ui/zarf.yaml +++ b/packages/ui/zarf.yaml @@ -16,7 +16,7 @@ variables: prompt: true sensitive: true - name: OPENAI_API_KEY - description: OpenAI API Key. If specified, app will use OpenAI instead of Leapfrog + description: OpenAI API Key. If specified, app will use OpenAI instead of LeapfrogAI prompt: true default: "" sensitive: true diff --git a/packages/vllm/Makefile b/packages/vllm/Makefile index 0434d4101..98e8b29db 100644 --- a/packages/vllm/Makefile +++ b/packages/vllm/Makefile @@ -1,7 +1,6 @@ install: python -m pip install ../../src/leapfrogai_sdk - python -m pip install -e . + python -m pip install -e ".[dev]" dev: - make install python -m leapfrogai_sdk.cli --app-dir=src/ main:Model diff --git a/packages/vllm/README.md b/packages/vllm/README.md index eac93f3ef..a55238cfd 100644 --- a/packages/vllm/README.md +++ b/packages/vllm/README.md @@ -1,23 +1,16 @@ # LeapfrogAI vLLM Backend -A LeapfrogAI API-compatible [vLLM](https://github.com/vllm-project/vllm) wrapper for quantized and un-quantized model inferencing across GPU infrastructures. - +A LeapfrogAI API-compatible [vllm](https://github.com/vllm-project/vllm) wrapper for quantized and un-quantized model inferencing across GPU infrastructures. ## Usage -See [instructions](#instructions) to get the backend up and running. Then, use the [LeapfrogAI API server](https://github.com/defenseunicorns/leapfrogai-api) to interact with the backend. - -## Instructions - -The instructions in this section assume the following: +### Pre-Requisites -1. Properly installed and configured Python 3.11.x, to include its development tools -2. The LeapfrogAI API server is deployed and running +See the LeapfrogAI documentation website for [system requirements](https://docs.leapfrog.ai/docs/local-deploy-guide/requirements/) and [dependencies](https://docs.leapfrog.ai/docs/local-deploy-guide/dependencies/). -The following are additional assumptions for GPU inferencing: +#### Dependent Components -3. You have properly installed one or more NVIDIA GPUs and GPU drivers -4. You have properly installed and configured the [cuda-toolkit](https://developer.nvidia.com/cuda-toolkit) and [nvidia-container-toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/index.html) +- [LeapfrogAI API](../api/README.md) for a fully RESTful application ### Model Selection @@ -31,48 +24,33 @@ You can optionally specify different models or quantization types using the foll - `--build-arg QUANTIZATION="gptq"`: Quantization type (e.g., gptq, awq, or empty for un-quantized) - `--build-arg TENSOR_PARALLEL_SIZE="1"`: The number of gpus to spread the tensor processing across -## Zarf Package Deployment +### Deployment -To build and deploy just the VLLM Zarf package (from the root of the repository): +To build and deploy the vllm backend Zarf package into an existing [UDS Kubernetes cluster](../k3d-gpu/README.md): -> Deploy a [UDS cluster](/README.md#uds) if one isn't deployed already +> [!IMPORTANT] +> Execute the following commands from the root of the LeapfrogAI repository -```shell +```bash pip install 'huggingface_hub[cli,hf_transfer]' # Used to download the model weights from huggingface make build-vllm LOCAL_VERSION=dev uds zarf package deploy packages/vllm/zarf-package-vllm-*-dev.tar.zst --confirm ``` -## Run Locally +### Local Development -To run the vllm backend locally (starting from the root directory of the repository): -```bash -# Setup Virtual Environment if you haven't done so already -python -m venv .venv -source .venv/bin/activate -``` +To run the vllm backend locally: -```bash -# Install dependencies -python -m pip install src/leapfrogai_sdk -cd packages/vllm -# To support Huggingface Hub model downloads -python -m pip install ".[dev]" -``` +> [!IMPORTANT] +> Execute the following commands from this sub-directory ```bash -# Copy the environment variable file, change this if different params are needed -cp .env.example .env - -# Make sure environment variables are set -source .env +# Install dev and runtime dependencies +make install # Clone Model -# Supply a REPO_ID, FILENAME and REVISION if a different model is desired python src/model_download.py -mv .model/*.gguf .model/model.gguf - -# Start Model Backend -lfai-cli --app-dir=src/ main:Model +# Start the model backend +make dev ``` diff --git a/packages/vllm/pyproject.toml b/packages/vllm/pyproject.toml index 7e9a19cd5..7d54cd810 100644 --- a/packages/vllm/pyproject.toml +++ b/packages/vllm/pyproject.toml @@ -19,7 +19,7 @@ readme = "README.md" [project.optional-dependencies] dev = [ - "huggingface_hub", + "huggingface_hub[cli,hf_transfer]" ] [tool.pip-tools] diff --git a/packages/whisper/Makefile b/packages/whisper/Makefile new file mode 100644 index 000000000..730458a21 --- /dev/null +++ b/packages/whisper/Makefile @@ -0,0 +1,7 @@ +install: + python -m pip install ../../src/leapfrogai_sdk + python -m pip install -e ".[dev]" + +dev: + make install + python -m leapfrogai_sdk.cli --app-dir=src/ main:Model diff --git a/packages/whisper/README.md b/packages/whisper/README.md index ddf0b1008..bdb9ca960 100644 --- a/packages/whisper/README.md +++ b/packages/whisper/README.md @@ -1,31 +1,47 @@ # LeapfrogAI Whisper Backend -A LeapfrogAI API-compatible [whisper](https://huggingface.co/openai/whisper-base) wrapper for audio transcription inferencing across CPU & GPU infrastructures. +A LeapfrogAI API-compatible [faster-whisper](https://github.com/SYSTRAN/faster-whisper) wrapper for audio transcription inferencing across CPU & GPU infrastructures. +## Usage -# Usage +### Pre-Requisites -## Zarf Package Deployment +See the LeapfrogAI documentation website for [system requirements](https://docs.leapfrog.ai/docs/local-deploy-guide/requirements/) and [dependencies](https://docs.leapfrog.ai/docs/local-deploy-guide/dependencies/). -To build and deploy just the whisper Zarf package (from the root of the repository): +#### Dependent Components -> Deploy a [UDS cluster](/README.md#uds) if one isn't deployed already +- [LeapfrogAI API](../api/README.md) for a fully RESTful application -```shell +### Model Selection + +See the [Deployment section](#deployment) for the CTranslate2 command for pulling and converting a model for inferencing. + +### Deployment + +To build and deploy the whisper backend Zarf package into an existing [UDS Kubernetes cluster](../k3d-gpu/README.md): + +> [!IMPORTANT] +> Execute the following commands from the root of the LeapfrogAI repository + +```bash pip install 'ctranslate2' # Used to download and convert the model weights pip install 'transformers[torch]' # Used to download and convert the model weights make build-whisper LOCAL_VERSION=dev uds zarf package deploy packages/whisper/zarf-package-whisper-*-dev.tar.zst --confirm ``` -## Local Development +### Local Development To run the vllm backend locally without K8s (starting from the root directory of the repository): -```shell -python -m pip install src/leapfrogai_sdk -cd packages/whisper -python -m pip install ".[dev]" +```bash +# Install dev and runtime dependencies +make install + +# Download and convert model +# Change the value for --model to change the whisper base ct2-transformers-converter --model openai/whisper-base --output_dir .model --copy_files tokenizer.json --quantization float32 -python -u main.py + +# Start the model backend +make dev ``` diff --git a/src/leapfrogai_api/Makefile b/src/leapfrogai_api/Makefile index adc2a2b20..5e78b3f06 100644 --- a/src/leapfrogai_api/Makefile +++ b/src/leapfrogai_api/Makefile @@ -1,44 +1,45 @@ -MAKEFILE_DIR := $(dir $(abspath $(lastword $(MAKEFILE_LIST)))) -SHELL := /bin/bash - - -export SUPABASE_URL=$(shell supabase status | grep -oP '(?<=API URL: ).*') -export SUPABASE_ANON_KEY=$(shell supabase status | grep -oP '(?<=anon key: ).*') - -install-api: - @cd ${MAKEFILE_DIR} && \ +API_PORT := 8080 +# This can be pointed at the localhost:8080 instance of the API as well +API_BASE_URL := https://leapfrogai-api.uds.dev +SUPABASE_BASE_URL := https://supabase-kong.uds.dev +EXPIRATION_TIME := $(shell date -d "+30 days" +%s) +SUPABASE_ANON_KEY := $(shell uds zarf tools kubectl get secret -n leapfrogai supabase-bootstrap-jwt -o json | uds zarf tools yq '.data.anon-key' | base64 -d) + +install: set-env python -m pip install ../../src/leapfrogai_sdk - @cd ${MAKEFILE_DIR} && \ - python -m pip install -e . - python -m pip install "../../.[dev]" + python -m pip install -e ".[dev]" -dev-run-api: - @cd ${MAKEFILE_DIR} && \ - python -m uvicorn main:app --port 3000 --reload --log-level info - -define get_jwt_token - echo "Getting JWT token from ${SUPABASE_URL}..."; \ - TOKEN_RESPONSE=$$(curl -s -X POST $(1) \ - -H "apikey: ${SUPABASE_ANON_KEY}" \ - -H "Content-Type: application/json" \ - -d '{ "email": "admin@localhost", "password": "$$SUPABASE_PASS"}'); \ - echo "Extracting token from $(TOKEN_RESPONSE)"; \ - JWT=$$(echo $$TOKEN_RESPONSE | grep -oP '(?<="access_token":")[^"]*'); \ - echo -n "$$JWT" | xclip -selection clipboard; \ - echo "SUPABASE_USER_JWT=$$JWT" > .env; \ - echo "SUPABASE_URL=$$SUPABASE_URL" >> .env; \ - echo "SUPABASE_ANON_KEY=$$SUPABASE_ANON_KEY" >> .env; \ - echo "DONE - JWT token copied to clipboard" -endef +set-env: + echo "SUPABASE_URL=${SUPABASE_BASE_URL}" > .env + echo "SUPABASE_ANON_KEY=${SUPABASE_ANON_KEY}" >> .env -user: - @read -s -p "Enter a new DEV API password: " SUPABASE_PASS; echo; \ - echo "Creating new supabase user..."; \ - $(call get_jwt_token,"${SUPABASE_URL}/auth/v1/signup") +dev: set-env + python -m uvicorn main:app --port ${API_PORT} --reload --log-level info --env-file .env -env: - @read -s -p "Enter your DEV API password: " SUPABASE_PASS; echo; \ - $(call get_jwt_token,"${SUPABASE_URL}/auth/v1/token?grant_type=password") +api-key: + curl -s -X POST '${SUPABASE_BASE_URL}/auth/v1/signup' \ + -H "apikey: ${SUPABASE_ANON_KEY}" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer ${SUPABASE_ANON_KEY}" \ + -d '{ "email": "leapfrogai@defenseunicorns.com", "password": "password", "confirmPassword": "password"}' | \ + uds zarf tools yq '.access_token' | \ + xargs -I {} curl -s --insecure -X POST '${API_BASE_URL}/leapfrogai/v1/auth/api-keys' \ + -H "apikey: {}" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer {}" \ + -d '{ "name": "api-key", "expires_at": "${EXPIRATION_TIME}" }' | \ + uds zarf tools yq '.api_key' -test-integration: - @cd ${MAKEFILE_DIR} && python -m pytest ../../tests/integration/api/ -vv -s +new-api-key: + curl -s -X POST '${SUPABASE_BASE_URL}/auth/v1/token?grant_type=password' \ + -H "apikey: ${SUPABASE_ANON_KEY}" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer ${SUPABASE_ANON_KEY}" \ + -d '{ "email": "leapfrogai@defenseunicorns.com", "password": "password"}' | \ + uds zarf tools yq '.access_token' | \ + xargs -I {} curl -s --insecure -X POST '${API_BASE_URL}/leapfrogai/v1/auth/api-keys' \ + -H "apikey: {}" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer {}" \ + -d '{ "name": "api-key", "expires_at": "${EXPIRATION_TIME}" }' | \ + uds zarf tools yq '.api_key' diff --git a/src/leapfrogai_api/README.md b/src/leapfrogai_api/README.md index 383f271cb..eec4dd0c6 100644 --- a/src/leapfrogai_api/README.md +++ b/src/leapfrogai_api/README.md @@ -1,79 +1,58 @@ # LeapfrogAI API -A mostly OpenAI compliant API surface. +> [!IMPORTANT] +> See the [API package documentation](../../packages/api/README.md) for general pre-requisites, dependent components, and package deployment instructions -## Zarf Package Deployment +This document is only applicable for spinning up the API in a local Python development environment. -To build and deploy just the API Zarf package (from the root of the repository): +## Local Development Setup -> Deploy a [UDS cluster](/README.md#uds) if one isn't deployed already +> [!IMPORTANT] +> Execute the following commands from this sub-directory -```shell -make build-api LOCAL_VERSION=dev -uds zarf package deploy packages/api/zarf-package-leapfrogai-api-*-dev.tar.zst --confirm -``` +### Running -## Local Development Setup +> [!IMPORTANT] +> The following steps assume that you already have a deployed and accessible UDS Kubernetes cluster and LeapfrogAI. Please follow the steps within the [DEVELOPMENT.md](../../docs/DEVELOPMENT.md) for details. 1. Install dependencies + ```bash make install ``` -2. Create a local Supabase instance (requires [Supabase CLI](https://supabase.com/docs/guides/cli/getting-started)): - ```bash - brew install supabase/tap/supabases +2. Create a config.yaml using the config.example.yaml as a template. - supabase start # from this directory +3. Run the FastAPI application - supabase stop --project-id leapfrogai # stop api containers - - supabase db reset # clears all data and reinitializes migrations - - supabase status # to check status and see your keys + ```bash + make dev API_PORT=8080 ``` -### Session Authentication +4. Create an API key with test user "leapfrogai@defenseunicorns.com" and test password "password", lasting 30 days from creation time -3. Create a local api user ```bash - make user + # If the in-cluster API is up, and not testing the API workflow + make api-key API_BASE_URL=http://localhost:8080 ``` -4. Create a JWT token + To create a new 30-day API key, use the following: + ```bash - make jwt - source .env + # If the in-cluster API is up, and not testing the API workflow + make new-api-key API_BASE_URL=http://localhost:8080 ``` - This will copy the JWT token to your clipboard. - - -5. Make calls to the api swagger endpoint at `http://localhost:8080/docs` using your JWT token as the `HTTPBearer` token. - * Hit `Authorize` on the swagger page to enter your JWT token - -## Integration Tests - -The integration tests serve to identify any mismatches between components: - -- Check all API routes -- Validate Request/Response types -- DB CRUD operations -- Schema mismatches -### Prerequisites + The newest API key will be printed to a `.env` file located within this directory. -Integration tests require a Supabase instance and environment variables configured (see [Local Development](#local-development)). +5. Make calls to the API Swagger endpoint at `http://localhost:8080/docs` using your API token as the `HTTPBearer` token. -### Authentication + - Hit `Authorize` on the Swagger page to enter your API key -Tests require a JWT token environment variable `SUPABASE_USER_JWT`. See [Session Authentication](#session-authentication) to set this up. +### Access -### Running the tests -After obtaining the JWT token, run the following: -``` -make test-integration -``` +See the ["Access" section of the DEVELOPMENT.md](../../docs/DEVELOPMENT.md#access) for different ways to connect the API to a model backend or Supabase. -## Notes +### Tests -* All API calls must be authenticated via a Supabase JWT token in the message's `Authorization` header, including swagger docs. +See the [tests directory documentation](../../tests/README.md) for more details. diff --git a/src/leapfrogai_api/pyproject.toml b/src/leapfrogai_api/pyproject.toml index 66f44b824..0190d4f8e 100644 --- a/src/leapfrogai_api/pyproject.toml +++ b/src/leapfrogai_api/pyproject.toml @@ -28,6 +28,14 @@ dependencies = [ ] requires-python = "~=3.11" +[project.optional-dependencies] +dev = [ + "pytest-asyncio", + "requests", + "requests-toolbelt", + "pytest" +] + [tool.pip-tools] generate-hashes = true diff --git a/src/leapfrogai_sdk/README.md b/src/leapfrogai_sdk/README.md new file mode 100644 index 000000000..b38917190 --- /dev/null +++ b/src/leapfrogai_sdk/README.md @@ -0,0 +1,17 @@ +# LeapfrogAI SDK + +> [!IMPORTANT] +> The SDK is not a functionally independent component! Please see the root README for more context. + +This document is only applicable for integrating the SDK into an API and model backend in a local Python development environment. + +## Local Development Setup + +1. Make changes to the SDK code +2. Re-install dependencies and spin-up a model backend (e.g., [vLLM](../../packages/vllm/README.md)) +3. Re-install dependencies and spin-up the [LeapfrogAI API](../leapfrogai_api/README.md) +4. Test changes as required + +## Integration Tests + +See the [API documentation](../leapfrogai_api/README.md) for instructions on running tests. diff --git a/src/leapfrogai_sdk/__init__.py b/src/leapfrogai_sdk/__init__.py index bea8fb196..946c37b42 100644 --- a/src/leapfrogai_sdk/__init__.py +++ b/src/leapfrogai_sdk/__init__.py @@ -58,4 +58,4 @@ ) from leapfrogai_sdk.serve import serve -print("Initializing Leapfrog") +print("Initializing LeapfrogAI") diff --git a/src/leapfrogai_sdk/cli.py b/src/leapfrogai_sdk/cli.py index d2c8912de..48322adad 100644 --- a/src/leapfrogai_sdk/cli.py +++ b/src/leapfrogai_sdk/cli.py @@ -32,7 +32,7 @@ @click.command() def cli(app: str, host: str, port: str, app_dir: str): sys.path.insert(0, app_dir) - """Leapfrog AI CLI""" + """LeapfrogAI CLI""" app = import_app(app) asyncio.run(serve(app(), host, port)) diff --git a/src/leapfrogai_ui/.env.example b/src/leapfrogai_ui/.env.example index 584bb5b5b..316894377 100644 --- a/src/leapfrogai_ui/.env.example +++ b/src/leapfrogai_ui/.env.example @@ -15,7 +15,7 @@ SUPABASE_AUTH_KEYCLOAK_CLIENT_ID=uds-supabase SUPABASE_AUTH_KEYCLOAK_SECRET= #ORIGIN=http://localhost:3000 # set if running in Docker locally (variable is also used in deployment) -#If specified, app will use OpenAI instead of Leapfrog +#If specified, app will use OpenAI instead of LeapfrogAI OPENAI_API_KEY= # PLAYWRIGHT diff --git a/src/leapfrogai_ui/.prettierignore b/src/leapfrogai_ui/.prettierignore index 06c9c69b9..9b8360d5f 100644 --- a/src/leapfrogai_ui/.prettierignore +++ b/src/leapfrogai_ui/.prettierignore @@ -5,4 +5,6 @@ yarn.lock ../../packages/ui/chart/ -supabase \ No newline at end of file +supabase + +README.md diff --git a/src/leapfrogai_ui/README.md b/src/leapfrogai_ui/README.md index 7b38cdb86..2c296603a 100644 --- a/src/leapfrogai_ui/README.md +++ b/src/leapfrogai_ui/README.md @@ -1,43 +1,64 @@ # LeapfrogAI UI -## Getting Started +> [!IMPORTANT] +> See the [UI package documentation](../../packages/UI/README.md) for general pre-requisites, dependent components, and package deployment instructions -### Requirements: +This document is only applicable for spinning up the UI in a local Node development environment. -This application requires Supabase, and either Leapfrog API or OpenAI to function. Additionally, it can optionally use -Keycloak for authentication. There are several different ways to run it, so please see the "Configuration Options" section -below for more information. +## Local Development Setup -### Running the UI +> [!IMPORTANT] +> Execute the following commands from this sub-directory -1. Change directories: `cd src/leapfrogai_ui` -2. Create a `.env` file at the root of the UI project (src/leapfrogai_ui), reference the `.env.example` file for values to put in the .env file -3. Install dependencies: `npm install` -4. Run `npm run dev -- --open` +### Running + +1. Install dependencies + + ```bash + npm install + ``` + +2. Create a .env using the .env.example as a template. + +3. Run the Node application and open in your default browser + + ```bash + npm run dev -- --open + ``` + +### Building + +To create a production version of the app: + +```bash +npm run build +``` + +You can preview the production build with `npm run preview`. ### Configuration Options +#### API + It is recommended to run LeapfrogAI with UDS, but if you want to run the UI locally (on localhost, e.g. for local development), you can either: -1. Connect to a UDS deployed version of the Leapfrog API and Supabase - or -2. Connect to OpenAI and UDS deployed Supabase or locally running Supabase. +1. Connect to a UDS deployed version of the LeapfrogAI API and Supabase -_Note - most data CRUD operations utilize Leapfrog API or OpenAI, but some functionality still depends on a direct connection with Supabase._ + **OR** -#### Running everything with UDS +2. Connect to OpenAI and UDS deployed Supabase or locally running Supabase. -This is the easiest way to use the UI. Follow the documentation for running the entire [LeapfrogAI stack](https://github.com/defenseunicorns/leapfrogai) +**NOTE:** most data CRUD operations utilize LeapfrogAI API or OpenAI, but some functionality still depends on a direct connection with Supabase. -#### Running UI Locally with LeapfrogAI +If running the UI locally and utilizing LeapfrogAI API, **you must use the same Supabase instance that the LeapfrogAI API is utilizing**. -If running the UI locally and utilizing LeapfrogAPI, **you must use the same Supabase instance that the Leapfrog API is utilizing**. +#### Cluster -1. Connect the UI to a UDS deployed version of Supabase and Leapfrog API. +1. Connect the UI to a UDS deployed version of Supabase and LeapfrogAI API. Ensure these env variables are set appropriately in your .env file: -``` +```bash PUBLIC_SUPABASE_URL=https://supabase-kong.uds.dev PUBLIC_SUPABASE_ANON_KEY= ... @@ -45,14 +66,38 @@ LEAPFROGAI_API_BASE_URL=https://leapfrogai-api.uds.dev DEFAULT_MODEL=llama-cpp-python # or vllm ``` -2. Run the frontend migrations +2. Run the UI migrations + + If you deploy the UI with UDS, the necessary database migrations will be applied. You can still run a local version of the UI, but the deployed version will have set up the database properly for you. + +#### Standalone Supabase + +1. Install [Supabase](https://supabase.com/docs/guides/cli/getting-started?platform=macos) +2. Run: `supabase start` + The configuration files at src/leapfrogai_ui/supabase will ensure your Supabase is configured to work with Keycloak if + you set these .env variables: + +```bash +SUPABASE_AUTH_KEYCLOAK_CLIENT_ID=uds-supabase +SUPABASE_AUTH_KEYCLOAK_SECRET= #this is the client secret for the client in Keycloak +SUPABASE_AUTH_EXTERNAL_KEYCLOAK_URL=https://sso.uds.dev/realms/uds +``` + +After it starts, the Supabase API URL and Anon key are printed to the console. These are used in the .env file to connect to Supabase. + +After starting supabase for the first time, you need to initialize the database with migrations and seed data: + +`supabase db reset` -If you deploy the UI with UDS, the necessary database migrations will be applied. You can still run a local version of the UI, but the deployed version will have set up the -database properly for you. +After this initial reset, if you start Supabase again it will already have the data and you don't need to run this command unless you want to restore it to the default. -Further instructions will be coming soon in a future release. +Stop Supabase: -##### Authentication +`npm run supabase:stop` + +**WARNING:** if switching the application from utilizing LeapfrogAI API to OpenAI or vice versa, and you encounter this error: `Server responded with status code 431. See https://vitejs.dev/guide/troubleshooting.html#_431-request-header-fields-too-large.`, then you need to clear your browser cookies. + +#### Authentication You can choose to use Keycloak (with UDS) or turn Keycloak off and just use Supabase. @@ -64,9 +109,9 @@ run the UI outside of UDS on localhost (e.g. for development work), there are so Add these values to the "GOTRUE_URI_ALLOW_LIST" (no spaces!). This variable may not exist and you will need to add it. Restart the supabase-auth pod after updating the config: `http://localhost:5173/auth/callback,http://localhost:4173/auth/callback` - Note - Port 4173 is utilized by Playwright for E2E tests. You do not need this if you are not concerned about Playwright. + **NOTE:** Port 4173 is utilized by Playwright for E2E tests. You do not need this if you are not concerned about Playwright. -###### With Keycloak authentication +##### With KeyCloak 1. If Supabase was deployed with UDS, it will automatically configure a Keycloak Client for you. We need to modify this client to allow localhost URIs. @@ -76,88 +121,54 @@ run the UI outside of UDS on localhost (e.g. for development work), there are so http://localhost:4173/auth/callback (for Playwright tests) 2. If you want to connect Keycloak to a locally running Supabase instance (non UDS deployed), see the "Running Supabase locally" section below. -###### Without Keycloak authentication +##### Without Keycloak 1. To turn off Keycloak, set this .env variable: `PUBLIC_DISABLE_KEYCLOAK=false` -##### Running UI Locally with OpenAI +#### OpenAI Set the following .env variables: -``` +```bash DEFAULT_MODEL=gpt-3.5-turbo LEAPFROGAI_API_BASE_URL=https://api.openai.com -#If specified, app will use OpenAI instead of Leapfrog +# If specified, app will use OpenAI instead of LeapfrogAI OPENAI_API_KEY= ``` You still need Supabase, so you can connect to UDS deployed Supabase, or run Supabase locally. To connect to UDS deployed Supabase, set these .env variables: -``` +```bash PUBLIC_SUPABASE_URL=https://supabase-kong.uds.dev PUBLIC_SUPABASE_ANON_KEY= ``` -Running Supabase locally: - -1. Install [Supabase](https://supabase.com/docs/guides/cli/getting-started?platform=macos) -2. Run: `supabase start` - The configuration files at src/leapfrogai_ui/supabase will ensure your Supabase is configured to work with Keycloak if - you set these .env variables: - -``` -SUPABASE_AUTH_KEYCLOAK_CLIENT_ID=uds-supabase -SUPABASE_AUTH_KEYCLOAK_SECRET= #this is the client secret for the client in Keycloak -SUPABASE_AUTH_EXTERNAL_KEYCLOAK_URL=https://sso.uds.dev/realms/uds -``` - -After it starts, the Supabase API URL and Anon key are printed to the console. These are used in the .env file to connect to Supabase. - -After starting supabase for the first time, you need to initialize the database with migrations and seed data: - -`supabase db reset` - -After this initial reset, if you start Supabase again it will already have the data and you don't need to run this command unless you want to restore it to the default. - -Stop Supabase: - -`npm run supabase:stop` - -_Warning - if switching the application from utilizing Leapfrog API to OpenAI or vice versa, -and you encounter this error:_ -`Server responded with status code 431. See https://vitejs.dev/guide/troubleshooting.html#_431-request-header-fields-too-large.` -_you need to clear your browser cookies_ - -### Building - -To create a production version of the app: - -``` -npm run build -``` - -You can preview the production build with `npm run preview`. - -## Developer Notes - -### Tooling +## Notes and Troubleshooting ### Supabase We use Supabase for authentication and a database. Application specific data (ex. user profile images, application settings like feature flags, etc..) should be stored directly in Supabase and -would not normally utilize the Leapfrog API for CRUD operations. +would not normally utilize the LeapfrogAI API for CRUD operations. ### Playwright End-to-End Tests -First install Playwright: `npm init playwright@latest` +1. Install Playwright + + ```bash + npm init playwright@latest + ``` -To run the E2E tests: -`npm run test:integration:ui` -Click the play button in the Playwright UI. -Playwright will run it's own production build and server the app at `http://localhost:4173`. If you make server side changes, -restart playwright for them to take effect. +2. Run the E2E tests: + + ```bash + npm run test:integration:ui + ``` + + Click the play button in the Playwright UI. + Playwright will run it's own production build and server the app at `http://localhost:4173`. If you make server side changes, + restart playwright for them to take effect. Notes: @@ -165,19 +176,19 @@ Notes: .env file. See the "Configuration Options" section above to configure which database Playwright is using. 2. If you run the tests in headless mode (`npm run test:integration`) you do not need the app running, it will build the app and run on port 4173. -# Supabase and Keycloak Integration +### Supabase and Keycloak Integration The Supabase docs are inadequate for properly integrating with Keycloak. Additionally, they only support integration with the Supabase Cloud SAAS offering. Before reading the section below, first reference the [Supabase docs](https://supabase.com/docs/guides/auth/social-login/auth-keycloak). -### The following steps are required to integrate Supabase with Keycloak for local development: +**The following steps are required to integrate Supabase with Keycloak for local development** The supabase/config.toml file contains configuration options for Supabase when running it locally. When running locally, the Supabase UI dashboard does not offer all the same configuration options that the cloud version does, so you have to specify some options in this file instead. The variables that had to be overridden were: -``` +```toml [auth] site_url = "http://localhost:5173" @@ -201,8 +212,8 @@ Under a realm in Keycloak that is not the master realm (if using UDS, its "uds") 4. Copy the Client Secret under the Clients -> Credentials tab and use in the env variables below 5. You can create users under the "Users" tab and either have them verify their email (if you setup SMTP), or manually mark them as verified. -``` -#.env +```bash +# .env SUPABASE_AUTH_KEYCLOAK_CLIENT_ID= SUPABASE_AUTH_KEYCLOAK_SECRET= SUPABASE_AUTH_EXTERNAL_KEYCLOAK_URL= @@ -225,26 +236,27 @@ If you need to use a different Keycloak server for local development, you will n If your Keycloak server is not at a hosted domain, you will also need to modify the /etc/hosts on your machine: -``` -Example: -sudo nano /etc/hosts -*add this line (edit as required)* +```bash +vim /etc/hosts + +# add the following line to the opened `/etc/hosts` file +# replace beginning with the correct IP address xxx.xxx.xx.xx keycloak.admin.uds.dev ``` Ensure the -``` +```bash PUBLIC_SUPABASE_URL= PUBLIC_SUPABASE_ANON_KEY= ``` -variables in your .env file are pointing to the correct Supabase instance. +variables in your `.env` file are pointing to the correct Supabase instance. -Note - if connecting to a hosted Supabase instance, or in a cluster with networking, you will not need to override /etc/host files. +**NOTE:** if connecting to a hosted Supabase instance, or in a cluster with networking, you will not need to override /etc/host files. The: -``` +```bash SUPABASE_AUTH_KEYCLOAK_CLIENT_ID= SUPABASE_AUTH_KEYCLOAK_SECRET= SUPABASE_AUTH_EXTERNAL_KEYCLOAK_URL= diff --git a/src/leapfrogai_ui/playwright.config.ts b/src/leapfrogai_ui/playwright.config.ts index 27f1dff05..f544586ba 100644 --- a/src/leapfrogai_ui/playwright.config.ts +++ b/src/leapfrogai_ui/playwright.config.ts @@ -45,7 +45,7 @@ const chromeConfig = { const defaultConfig: PlaywrightTestConfig = { // running more than 1 worker can cause flakiness due to test files being run at the same time in different browsers // (e.x. navigation history is incorrect) - // Additionally, Leapfrog API is slow when attaching files to assistants, resulting in flaky tests + // Additionally, LeapfrogAI API is slow when attaching files to assistants, resulting in flaky tests // We can attempt in increase number of browser and workers in the pipeline when the API is faster workers: 1, projects: [ diff --git a/src/leapfrogai_ui/src/lib/components/Message.svelte b/src/leapfrogai_ui/src/lib/components/Message.svelte index 79fe0c88e..bf2c939f1 100644 --- a/src/leapfrogai_ui/src/lib/components/Message.svelte +++ b/src/leapfrogai_ui/src/lib/components/Message.svelte @@ -130,7 +130,7 @@ {:else} - LeapfrogAI + LeapfrogAI {/if}
diff --git a/src/leapfrogai_ui/tests/chat.test.ts b/src/leapfrogai_ui/tests/chat.test.ts index bb096963d..bae117f05 100644 --- a/src/leapfrogai_ui/tests/chat.test.ts +++ b/src/leapfrogai_ui/tests/chat.test.ts @@ -99,7 +99,7 @@ test('it cancels responses when clicking enter instead of pause button and does await deleteActiveThread(page, openAIClient); }); -// TODO - Leapfrog API is currently too slow when sending assistant responses so when this test +// TODO - LeapfrogAI API is currently too slow when sending assistant responses so when this test // runs with multiple browsers in parallel, it times out. It should usually work for individual // browsers unless the API is receiving additional run requests simultaneously test('it can switch between normal chat and chat with an assistant', async ({ @@ -130,7 +130,7 @@ test('it can switch between normal chat and chat with an assistant', async ({ await expect(messages).toHaveCount(4); await expect(page.getByTestId('user-icon')).toHaveCount(2); - await expect(page.getByTestId('leapfrog-icon')).toHaveCount(1); + await expect(page.getByTestId('leapfrogai-icon')).toHaveCount(1); await expect(page.getByTestId('assistant-icon')).toHaveCount(1); // Test selected assistant has a checkmark and clicking it again de-selects the assistant @@ -145,7 +145,7 @@ test('it can switch between normal chat and chat with an assistant', async ({ await expect(messages).toHaveCount(6); await expect(page.getByTestId('user-icon')).toHaveCount(3); - await expect(page.getByTestId('leapfrog-icon')).toHaveCount(2); + await expect(page.getByTestId('leapfrogai-icon')).toHaveCount(2); await expect(page.getByTestId('assistant-icon')).toHaveCount(1); // Cleanup diff --git a/src/leapfrogai_ui/tests/helpers/fileHelpers.ts b/src/leapfrogai_ui/tests/helpers/fileHelpers.ts index a2fd457cc..38cdafe0f 100644 --- a/src/leapfrogai_ui/tests/helpers/fileHelpers.ts +++ b/src/leapfrogai_ui/tests/helpers/fileHelpers.ts @@ -21,7 +21,7 @@ export const uploadFileWithApi = async ( type: contentType }); - // This can also be done IAW the OpenAI API documentation with fs.createReadStream, but Leapfrog API does not currently + // This can also be done IAW the OpenAI API documentation with fs.createReadStream, but LeapfrogAI API does not currently // support a ReadStream. Open Issue: https://github.com/defenseunicorns/leapfrogai/issues/710 return openAIClient.files.create({ @@ -82,7 +82,7 @@ export const createExcelFile = (options: CreateFileOptions = {}) => { const filenameWithExtension = `${filename}${extension}`; const workbook = XLSX.utils.book_new(); - const worksheet = XLSX.utils.json_to_sheet([{ Name: 'Leapfrog', Age: 1, Type: 'AI' }]); + const worksheet = XLSX.utils.json_to_sheet([{ Name: 'LeapfrogAI', Age: 1, Type: 'AI' }]); XLSX.utils.book_append_sheet(workbook, worksheet, 'Sheet1'); XLSX.writeFile(workbook, `./tests/fixtures/${filenameWithExtension}`); diff --git a/tests/make-tests.mk b/tests/Makefile similarity index 80% rename from tests/make-tests.mk rename to tests/Makefile index 00b24dc1a..53441cd0f 100644 --- a/tests/make-tests.mk +++ b/tests/Makefile @@ -1,7 +1,8 @@ +SUPABASE_URL := https://supabase-kong.uds.dev set-supabase: - SUPABASE_URL := $(shell cd src/leapfrogai_api; supabase status | awk '/API URL:/ {print $$3}') - SUPABASE_ANON_KEY := $(shell cd src/leapfrogai_api; supabase status | awk '/anon key:/ {print $$3}') + SUPABASE_URL := ${SUPABASE_URL} + SUPABASE_ANON_KEY := $(shell uds zarf tools kubectl get secret -n leapfrogai supabase-bootstrap-jwt -o json | uds zarf tools yq '.data.anon-key' | base64 -d) define get_jwt_token echo "Getting JWT token from ${SUPABASE_URL}..."; \ @@ -26,10 +27,10 @@ test-env: set-supabase @read -s -p "Enter your DEV API password: " SUPABASE_PASS; echo; \ $(call get_jwt_token,"${SUPABASE_URL}/auth/v1/token?grant_type=password") -test-int-api: set-supabase +test-api-integration: set-supabase source .env; PYTHONPATH=$$(pwd) pytest -vv -s tests/integration/api -test-unit: set-supabase +test-api-unit: set-supabase PYTHONPATH=$$(pwd) pytest -vv -s tests/unit test-load: diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 000000000..46951f643 --- /dev/null +++ b/tests/README.md @@ -0,0 +1,64 @@ +# Testing + +This document outlines tests related to the LeapfrogAI API and backends. + +Please see the [documentation in the LeapfrogAI UI sub-directory](../src/leapfrogai_ui/README.md) for Svelte UI Playwright tests. + +## API Tests + +For the unit and integration tests within this directory, the following components must be running and accessible: + +- [LeapfrogAI API](../src/leapfrogai_api/README.md) +- [Repeater](../packages/repeater/README.md) +- [Supabase](../packages/supabase/README.md) + +Please see the [Makefile](./Makefile) for more details. Below is a quick synopsis of the available Make targets: + +```bash +# create a test user for the tests +make test-user SUPABASE_URL=https://supabase-kong.uds.dev + +# setup the environment variables for the tests +make test-env SUPABASE_URL=https://supabase-kong.uds.dev + +# run the unit tests +make test-api-unit SUPABASE_URL=https://supabase-kong.uds.dev + +# run the integration tests +make test-api-integration SUPABASE_URL=https://supabase-kong.uds.dev +``` + +## Load Tests + +Please see the [Load Test documentation](./load/README.md) and directory for more details. + +## End-To-End Tests + +End-to-End (E2E) tests are located in the `e2e/` sub-directory. Each E2E test runs independently based on the model backend that we are trying to test. + +### Running Tests + +Run the tests on an existing [UDS Kubernetes cluster](../k3d-gpu/README.md) with the applicable backend deployed to the cluster. + +For example, the following sequence of commands runs test on the llama-cpp-python backend: + +```bash +# Build and Deploy the LFAI API +make build-api +uds zarf package deploy packages/api/zarf-package-leapfrogai-api-*.tar.zst + +# Build and Deploy the model backend you want to test. +# NOTE: In this case we are showing llama-cpp-python +make build-llama-cpp-python +uds zarf package deploy packages/llama-cpp-python/zarf-package-llama-cpp-python-*.tar.zst + +# Install the python dependencies +python -m pip install ".[dev]" + +# Run the tests! +# NOTE: Each model backend has its own e2e test files +python -m pytest tests/e2e/test_llama.py -v + +# Cleanup after yourself +k3d cluster delete uds +``` diff --git a/tests/e2e/README.md b/tests/e2e/README.md deleted file mode 100644 index b566ffc04..000000000 --- a/tests/e2e/README.md +++ /dev/null @@ -1,41 +0,0 @@ -# LeapfrogAI End-To-End Tests - -This directory holds our e2e tests that we use to verify LFAI-API + various model backend functionality in an environment that replicates a live setting. The tests in this directory are automatically run against a [UDS Core](https://github.com/defenseunicorns/uds-core) cluster whenever a PR is opened or updated. - -## Running Tests Locally - -The tests in this directory are also able to be run locally! We are currently opinionated towards running on a cluster that is configured with UDS, as we mature out tests & documentations we'll potentially lose some of that opinionation. - -### Dependencies - -1. Python >= 3.11.6 -2. k3d >= v5.6.0 -3. uds >= v0.7.0 - -### Actually Running The Test - -There are several ways you can setup and run these tests. Here is one such way: - -> Deploy the [UDS cluster](/README.md#uds) \ -> NOTE: This stands up a k3d cluster and installs istio & pepr - -```bash -# Build and Deploy the LFAI API -make build-api -uds zarf package deploy zarf-package-leapfrogai-api-*.tar.zst - -# Build and Deploy the model backend you want to test. -# NOTE: In this case we are showing llama-cpp-python -make build-llama-cpp-python -uds zarf package deploy zarf-package-llama-cpp-python-*.tar.zst - -# Install the python dependencies -python -m pip install ".[dev]" - -# Run the tests! -# NOTE: Each model backend has its own e2e test files -python -m pytest tests/e2e/test_llama.py -v - -# Cleanup after yourself -k3d cluster delete -``` diff --git a/tests/load/README.md b/tests/load/README.md index 7985a6463..86d186e0e 100644 --- a/tests/load/README.md +++ b/tests/load/README.md @@ -1,29 +1,28 @@ # LeapfrogAI Load Tests -## Overview - These tests check the API's ability to handle different amounts of load. The tests simulate a specified number of users hitting the endpoints with some number of requests per second. -# Requirements - -### Environment Setup +## Pre-Requisites -Before running the tests, ensure that your API URL and bearer token are properly configured in your environment variables. Follow these steps: +Before running the tests, ensure that your API URL and API key are properly configured in your environment variables. Follow these steps: 1. Set the API URL: + ```bash export API_URL="https://leapfrogai-api.uds.dev" ``` 2. Set the API token: + ```bash - export BEARER_TOKEN="" + export BEARER_TOKEN="" ``` - **Note:** The bearer token should be your Supabase user JWT. For information on generating a JWT, please refer to the [Supabase README.md](../../packages/supabase/README.md). While an API key generated from the LeapfrogAI API endpoint can be used, it will cause the token generation load tests to fail. + **Note:** See the [API documentation](../../src/leapfrogai_api/README.md) to create an API key. 3. (Optional) - Set the model backend, this will default to `vllm` if unset: - ```bash + + ```bash export DEFAULT_MODEL="llama-cpp-python" ``` @@ -32,6 +31,7 @@ Before running the tests, ensure that your API URL and bearer token are properly To start the Locust web interface and run the tests: 1. Install dependencies from the project root. + ```bash pip install ".[dev]" ``` @@ -39,6 +39,7 @@ To start the Locust web interface and run the tests: 2. Navigate to the directory containing `loadtest.py`. 3. Execute the following command: + ```bash locust -f loadtest.py --web-port 8089 ``` @@ -49,4 +50,4 @@ To start the Locust web interface and run the tests: - Set the number of users to simulate - Set the spawn rate (users per second) - Choose the host to test against (should match your `API_URL`) - - Start the test and monitor results in real-time \ No newline at end of file + - Start the test and monitor results in real-time diff --git a/uds-bundles/dev/README.md b/uds-bundles/dev/README.md deleted file mode 100644 index deb25ddde..000000000 --- a/uds-bundles/dev/README.md +++ /dev/null @@ -1,118 +0,0 @@ -# LeapfrogAI UDS Dev Deployment Instructions - -Follow these instructions to create a local development deployment of LeapfrogAI using [UDS](https://github.com/defenseunicorns/uds-core). - -Make sure your system has the [required dependencies](https://docs.leapfrog.ai/docs/local-deploy-guide/quick_start/#prerequisites). - -For ease, it's best to create a virtual environment: - -```shell -python -m venv .venv -source .venv/bin/activate -``` - -#### Linux and Windows (via WSL2) - -Each component is built into its own Zarf package. You can build all of the packages you need at once with the following `Make` targets: -> ***Note:*** You need to build with `make build-* LOCAL_VERSION=dev` to set the tag to `dev` instead of the commit hash locally. - -> ***NOTE:*** Some of the packages have Python dev dependencies that need to be installed when building them locally. These dependencies are used to download the model weights that will be included in the final Zarf package. These dependencies are listed as `dev` in the `project.optional-dependencies` section of each models `pyproject.toml`. - -You can build all of the packages you need at once with the following `Make` targets: - -```shell -LOCAL_VERSION=dev make build-cpu # api, llama-cpp-python, text-embeddings, whisper, supabase -LOCAL_VERSION=dev make build-gpu # api, vllm, text-embeddings, whisper, supabase -LOCAL_VERSION=dev make build-all # all of the backends -``` - -**OR** - -You can build components individually using the following `Make` targets: - -```shell -LOCAL_VERSION=dev make build-api -LOCAL_VERSION=dev make build-supabase -LOCAL_VERSION=dev make build-vllm # if you have GPUs (macOS not supported) -LOCAL_VERSION=dev make build-llama-cpp-python # if you have CPU only -LOCAL_VERSION=dev make build-text-embeddings -LOCAL_VERSION=dev make build-whisper -``` - -**NOTE: If you do not prepend your commands with `LOCAL_VERSION=dev`, uds will not find the generated zarf packages, as -they will be tagged with your current git hash instead of `dev` which uds expects** - -#### macOS - -To run the same commands in macOS, you will need to prepend your command with a couple of env vars like so: - -All Macs: `REG_PORT=5001` - -Apple Silicon (M1/M2/M3/M4 series) Macs: `ARCH=arm64` - -To demonstrate what this would look like for an Apple Silicon Mac: -``` shell -REG_PORT=5001 ARCH=arm64 LOCAL_VERSION=dev make build-cpu -``` - -To demonstrate what this would look like for an older Intel Mac (not officially supported): -``` shell -REG_PORT=5001 LOCAL_VERSION=dev make build-cpu -``` - -**OR** - -You can build components individually using the following `Make` targets, just like in the Linux section except ensuring -to prepend the env vars detailed above. - -#### Once the packages are created, you can deploy either a CPU or GPU-enabled deployment via one of the UDS bundles (macOS only supports cpu) - -# Deploying via UDS bundle - -## CPU UDS Deployment - -Create the uds CPU bundle: -```shell -cd uds-bundles/dev/cpu -uds create . -``` - -Deploy a [UDS cluster](/README.md#uds) if one isn't deployed already - -Deploy the LeapfrogAI bundle: -```shell -uds deploy uds-bundle-leapfrogai*.tar.zst -``` - -## GPU UDS Deployment - -Create the uds GPU bundle: -```shell -cd uds-bundles/dev/gpu -uds create . -``` - -Deploy a [UDS cluster](/README.md#uds) with the following flags, as so: - -```shell -uds deploy {k3d-cluster-name} --set K3D_EXTRA_ARGS="--gpus=all --image=ghcr.io/justinthelaw/k3d-gpu-support:v1.27.4-k3s1-cuda" -``` - - -Deploy the LeapfrogAI bundle: -```shell -uds deploy uds-bundle-leapfrogai-*.tar.zst --confirm -``` - -Once running you can access the various components, if deployed and exposed, at the following URLS: - -```shell -https://ai.uds.dev # UI -https://leapfrogai-api.uds.dev # API -https://supabase-kong.uds.dev # Supabase Kong -https://keycloak.uds.dev # Keycloak -``` - -## Checking and Managing the Deployment - -For tips on how to monitor the deployment, accessing the UI, and clean up, please reference the [Quick Start](https://docs.leapfrog.ai/docs/local-deploy-guide/quick_start/#checking-deployment) guide in the LeapfrogAI docs. diff --git a/website/.markdownlint.json b/website/.markdownlint.json deleted file mode 100644 index 721182d36..000000000 --- a/website/.markdownlint.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "default": true, - "MD033": false, - "MD013": false, - "MD036": false -} diff --git a/website/LICENSE b/website/LICENSE index 1bf24df1d..8dd2f8876 100644 --- a/website/LICENSE +++ b/website/LICENSE @@ -186,7 +186,7 @@ same "printed page" as the copyright notice for easier identification within third-party archives. - Copyright [2023] [Defense Unicorns] + Copyright 2024 Defense Unicorns Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/website/README.md b/website/README.md index 374420575..3e4434799 100644 --- a/website/README.md +++ b/website/README.md @@ -10,10 +10,12 @@ which is a fork of the Google Docsy theme. The Docsy documentation can be used a This repository enforces [Conventional Commit](https://www.conventionalcommits.org/en/v1.0.0/) messages. See the documentation for [`release-please`](https://github.com/googleapis/release-please#how-should-i-write-my-commits) for correctly formatting commit messages. [This video](https://www.youtube.com/watch?v=lwGcnDgwmFc&ab_channel=Syntax) does a good job of showing how to add the `Conventional Commit` VSCode extension to use when creating the commit messages. -#### Prerequisites +### Pre-Requisites [Hugo](https://gohugo.io/documentation/) is required in order to utilize the doc site template. You can run `brew install hugo` to quickly install or see the [installation page](https://gohugo.io/installation/) for additional install methods. +Go and Node are also required dependencies for running a Hugo site. Refer to the [Go installation documentation](https://go.dev/doc/install) and [NVM documentation](https://github.com/nvm-sh/nvm) for details. + ## Getting Started Create a new repository from this template: @@ -23,9 +25,9 @@ Create a new repository from this template: Clone your new site: ```bash -git clone -cd -npm ci +git clone https://github.com/defenseunicorns/leapfrogai.git +cd website +npm install ``` To run the site for local development: diff --git a/website/assets/img/walkthrough_thumbnail.jpg b/website/assets/img/walkthrough_thumbnail.jpg new file mode 100644 index 000000000..e3ccdcb80 Binary files /dev/null and b/website/assets/img/walkthrough_thumbnail.jpg differ diff --git a/website/content/en/docs/leapfrogai/tadpole/tadpole-deploy.md b/website/content/en/docs/leapfrogai/tadpole/tadpole-deploy.md deleted file mode 100644 index 4a735113f..000000000 --- a/website/content/en/docs/leapfrogai/tadpole/tadpole-deploy.md +++ /dev/null @@ -1,111 +0,0 @@ ---- -title: Sandbox Deployment -type: docs -draft: true ---- - -## Overview - -The Tadpole sandbox deployment is the lightweight method to initiate your LeapfrogAI experience and is exclusively designed for **local testing and development purposes only**. Tadpole facilitates a non-Kubernetes deployment that executes a `docker compose` build of the LeapfrogAI API, language backend, and user interface. To ensure a smooth start, there are a collection of straightforward basic recipes. Executing any of these recipes initiates the automated processes within Tadpole, encompassing the build, configuration, and initiation of the necessary components. The culmination of this process results in a locally hosted "Chat with an LLM" demonstration. - -### Prerequisites - -- Have [Docker](https://docs.docker.com/get-docker/) installed. -- Have [Continue.dev](https://continue.dev/) installed. - -### System Requirements - -- `chat` and `code` recipes require a minimum of 16GB RAM. -- `chat-gpu` recipe requires a minimum of 8GB VRAM and a CUDA capable NVIDIA GPU with drivers setup in order to function correctly with Docker. - -{{% alert-note %}} -To set up your CUDA capable NVIDIA GPU, please see the following instructions: - -- Prepare your machine for [NVIDIA Driver installation.](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#pre-installation-actions) -- Install the proper [NVIDIA Drivers.](https://docs.nvidia.com/datacenter/tesla/tesla-installation-notes/index.html#pre-install) -- Find the correct [CUDA for your environment.](https://developer.nvidia.com/cuda-downloads) -- [Install CUDA](https://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#pre-installation-actions) properly. -- Prepare Docker for [GPU accessibility.](https://docs.docker.com/config/containers/resource_constraints/#gpu) -- Obtain the [NVIDIA device plugin](https://github.com/NVIDIA/k8s-device-plugin) for Kubernetes. -{{% /alert-note %}} - -### Operating Systems - -- macOS: Only CPU recipes are compatible at this time. -- Windows: CPU recipes are compatible. GPU recipes require that the [Windows Subsystem for Linux](https://learn.microsoft.com/en-us/windows/wsl/install) is installed. -- Linux: CPU and GPU recipes are compatible. - -## Getting Started - -### Clone - -Clone into the [Tadpole GitHub repository](https://github.com/defenseunicorns/tadpole). - -{{% alert-note %}} -Cloning into this repository requires that users have an SSH key associated with your GitHub account. Please follow the [GitHub SSH documentation](https://docs.github.com/en/authentication/connecting-to-github-with-ssh) to obtain this key. -{{% /alert-note %}} - -### Chat - -To spin up the Tadpole chatbot to your local environment: - -```git -make chat -``` - -The Leapfrog-UI will be running at `http://localhost:3000/`. - -### Code - -{{% alert-note %}} -This recipe is intended for use with a code extension such as [Continue.dev](https://continue.dev/) and has been tested with the v0.7.53 prerelease. -{{% /alert-note %}} - -To build and run the code backend: - -```git -make code -``` - -Navigate to `$HOME/.continue/config.json` and modify your [Continue.dev](https://continue.dev/) configuration: - -```git -{ - "models": - [{ - "title": "leapfrogai", - "provider": "openai", - "model": "leapfrogai", - "apiKey": "freeTheModels", - "apiBase": "http://localhost:8080/openai" - }], - "modelRoles": - { - "default": "leapfrogai" - } -} -``` - -### Chat-GPU - -{{% alert-note %}} -This requires a CUDA capable NVIDIA GPU with drivers setup. -{{% /alert-note %}} - -To activate GPU resources and increase response time for your chatbot: - -```git -make chat-gpu -``` - -The Leapfrog-UI will be running at `http://localhost:3000/`. - -### Cleanup - -When you are finished, run this cleanup command to remove Tadpole from your system: - -```git -make clean -``` - -For any additional information, or to report an issue, please see the [Tadpole GitHub repository.](https://github.com/defenseunicorns/tadpole/tree/main) diff --git a/website/content/en/docs/local deploy guide/_index.md b/website/content/en/docs/local deploy guide/_index.md deleted file mode 100644 index 7e0a67448..000000000 --- a/website/content/en/docs/local deploy guide/_index.md +++ /dev/null @@ -1,13 +0,0 @@ ---- -title: Local Deployment Guide -type: docs -weight: 1 ---- - -This documentation serves as a comprehensive guide for users, providing instructions on the system requirements, setup, and deployment of LeapfrogAI to their local environment. Please note that the specified order of steps must be followed to ensure an efficient deployment process. - -## Overview - -LeapfrogAI stands as a cutting-edge self-hosted generative AI platform, strategically designed for secure and disconnected environments, offering a ChatGPT-like experience that mirrors OpenAI and Hugging Face API surfaces. This unique solution empowers your teams to seamlessly navigate a ChatGPT-like interface without the need for internet connectivity. Beyond its core capabilities, LeapfrogAI excels in efficient similarity searches across large-scale databases, providing robust generative embeddings for applications such as semantic similarity, clustering, and more. - -LeapfrogAI has the ability to leverage customer-specific data for fine-tuning models. This capability allows LeapfrogAI to gain insights into specific domains, ensuring the delivery of highly accurate contextual outputs tailored to your team's unique requirements and objectives. diff --git a/website/content/en/docs/local deploy guide/components.md b/website/content/en/docs/local deploy guide/components.md deleted file mode 100644 index d2a0e3d63..000000000 --- a/website/content/en/docs/local deploy guide/components.md +++ /dev/null @@ -1,35 +0,0 @@ ---- -title: Components -type: docs -weight: 3 ---- - -## Components - -### LeapfrogAI API - -LeapfrogAI offers an API closely aligned with OpenAI's, facilitating seamless compatibility for tools developed with OpenAI/ChatGPT to operate seamlessly with a LeapfrogAI backend. The LeapfrogAI API is a Python API that exposes LLM backends, via FastAPI and gRPC, in the OpenAI API specification. - -### Backend - -LeapfrogAI offers several backends for a variety of use cases: - -| Backend | Support | -| ------------------------------------------------------------------------------------------ | ------------------------------- | -| [llama-cpp-python](https://github.com/defenseunicorns/leapfrogai/tree/main/packages/llama-cpp-python) | AMD64, Docker, Kubernetes, Zarf | -| [whisper](https://github.com/defenseunicorns/leapfrogai/tree/main/packages/whisper) | AMD64, Docker, Kubernetes, Zarf | -| [text-embeddings](https://github.com/defenseunicorns/leapfrogai/tree/main/packages/text-embeddings) | AMD64, Docker, Kubernetes, Zarf | -| [VLLM](https://github.com/defenseunicorns/leapfrogai/tree/main/packages/vllm) | AMD64, Docker, Kubernetes, Zarf | -| [RAG](https://github.com/defenseunicorns/leapfrogai-backend-rag) | AMD64, Docker, Kubernetes, Zarf | - -### Image Hardening - -LeapfrogAI utilizes Chainguard's [apko](https://github.com/chainguard-dev/apko) to fortify base Python images by adhering to a version-pinning approach, ensuring compatibility with the latest supported version by other components within the LeapfrogAI stack. Please see the [leapfrogai-images](https://github.com/defenseunicorns/leapfrogai-images) GitHub repository for additional information. - -### Software Development Kit - -The LeapfrogAI SDK offers a standardized collection of Protobuf and Python utilities designed to facilitate the implementation of backends and gRPC. Please see the [leapfrogai-sdk](https://github.com/defenseunicorns/leapfrogai-sdk) GitHub repository for additional information. - -### User Interface - -LeapfrogAI offers user-friendly interfaces tailored for common use-cases, including chat, summarization, and transcription, providing accessible options for users to initiate these tasks. Please see the [leapfrogai-ui](https://github.com/defenseunicorns/leapfrogai-ui) GitHub repository for additional information. diff --git a/website/content/en/docs/local deploy guide/dependencies.md b/website/content/en/docs/local deploy guide/dependencies.md deleted file mode 100644 index a67440d35..000000000 --- a/website/content/en/docs/local deploy guide/dependencies.md +++ /dev/null @@ -1,102 +0,0 @@ ---- -title: Dependencies -type: docs -weight: 5 ---- - -This documentation addresses the local deployment dependencies of LeapfrogAI, a self-hosted generative AI platform. LeapfrogAI extends the diverse capabilities and modalities of AI models to various environments, ranging from cloud-based deployments to servers with ingress and egress limitations. With LeapfrogAI, teams can deploy APIs aligned with OpenAI's API specifications, empowering teams to create and utilize tools compatible with nearly any model and code library available. Importantly, all operations take place locally, ensuring users can maintain the security of their information and sensitive data within their own environments - -Follow the outlined steps to ensure that your device is configured to execute LeapfrogAI workloads across local development scenarios. Please note that these instructions presume you have root access. - -### Host Dependencies - -Ensure that the following tools and packages are present in your environment: - -- [Jq](https://jqlang.github.io/jq/) -- [Docker](https://www.docker.com/get-started/) -- [build-essential](https://packages.ubuntu.com/focal/build-essential) -- [iptables](https://help.ubuntu.com/community/IptablesHowTo?action=show&redirect=Iptables) -- [Git](https://git-scm.com/) -- [procps](https://gitlab.com/procps-ng/procps) - -### Install pyenv - -- Follow the installation instructions outlined in the [pyenv](https://github.com/pyenv/pyenv?tab=readme-ov-file#installation) repository to install Python 3.11.6. -- If your installation process completes successfully but indicates missing packages such as `sqlite3`, execute the following command to install the required packages then proceed with the reinstallation of Python 3.11.6: - -```git -sudo apt-get install build-essential zlib1g-dev libffi-dev -libssl-dev libbz2-dev libreadline-dev libsqlite3-dev -liblzma-dev libncurses-dev -``` - -### Install Homebrew - -- Follow the [instructions](https://brew.sh/) to install the Homebrew package manager onto your system. - -### Install Docker - -- Follow the [instructions](https://docs.docker.com/engine/install/) to install Docker onto your system. -- For systems using an NVIDIA GPU, it is necessary to modify the Docker runtime to NVIDIA. Refer to the GPU instructions below for guidance on making this adjustment. - -### Install Kubectl - -- Follow the [instructions](https://kubernetes.io/docs/tasks/tools/#kubectl) to install kubectl onto your system. - -### Install K3d - -- Follow the [instructions](https://k3d.io/) to install k3d onto your system. - -### Install Zarf - -- Install [Zarf](https://zarf.dev/) using Homebrew: - -```git -brew tap defenseunicorns/tap && brew install zarf -``` - -- As Homebrew does not install packages to the root directory, it is advisable to manually add the `zarf` binary to the root. Even in cases where Docker is installed in a rootless configuration, certain systems may encounter container access issues if Docker is not executed with root privileges. -- To install as root, execute the following command in your terminal and ensure that the version number is replaced with the most recent [release](https://github.com/zarf-dev/zarf/releases): - -```git -# switch to sudo -sudo su -# download and store on removable media -wget https://github.com/defenseunicorns/ uds-cli /releases/download/v0. 9.0/ uds-cli _v0. 9.0 _Linux_amd64 -# upload from removable media and install -mv uds-cli_v0.9.0_Linux_amd64 /bin/uds -chmod +x /bin/uds -``` - -## GPU Specific Intructions - -LeapfrogAI exclusively supports NVIDIA GPUs at this point in time. The following instructions are tailored for users utilizing an NVIDIA GPU. - -### NVIDIA Drivers - -- Ensure that the proper [NVIDIA drivers](https://www.nvidia.com/download/index.aspx) are installed (>=525.60). -- Follow the [driver download](https://www.nvidia.com/download/index.aspx) by identifying your hardware from the provided list. - -### CUDA Toolkit - -- Follow the [instructions](https://developer.nvidia.com/cuda-downloads) to download the CUDA toolkit (>=12.2x). This toolkit is only required on the system that is building the Zarf Packages. - -### NVIDIA Container Toolkit - -- Follow the [instructions](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installing-with-apt) to download the NVIDIA container toolkit (>=1.14). -- After the successful installation of the toolkit, follow the [toolkit instructions](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installing-with-apt) to verify that your default Docker runtime is configured for NVIDIA. -- Verify that the default runtime is changed by running the following command: - -```git -docker info | grep "Default Runtime" -``` - -- The expected output should be similar to: `Default Runtime: nvidia`. - -### GPU Support Test - -- Test that your GPU is visible through Docker by deploying the [GPU Support Test](https://github.com/justinthelaw/gpu-support-test). - -### Deploy LeapfrogAI - -- After ensuring that all system dependencies and requirements are fulfilled, refer to the LeapfrogAI deployment guide for comprehensive instructions on deploying LeapfrogAI within your local environment. diff --git a/website/content/en/docs/local deploy guide/deploy.md b/website/content/en/docs/local deploy guide/deploy.md deleted file mode 100644 index 74d0dec78..000000000 --- a/website/content/en/docs/local deploy guide/deploy.md +++ /dev/null @@ -1,428 +0,0 @@ ---- -title: Advanced Deployments & Air Gap -type: docs -weight: 6 ---- - -These instructions are for users who are looking for a more customizable deployment of LeapfrogAI or require air gap deployment considerations. - -To successfully proceed with the installation and deployment of LeapfrogAI, steps must be executed in the order that they are presented in the following instructions. The LeapfrogAI deployment instructions are designed to guide advanced users through the process of deploying the latest version of LeapfrogAI on Kubernetes. - -## Switch to Sudo - -```bash -# login as required -sudo su -``` - -## Deploy Tools - -### Zarf - -Internet Access: - -```bash -# deploys latest version of Zarf -brew install zarf -``` - -Isolated Network: - -```bash -# download and store on removable media -wget https://github.com/zarf-dev/zarf/releases/download/v0.31.0/zarf_v0.31.0_Linux_amd64 - -# upload from removable media and install -mv zarf_v0.31.0_Linux_amd64 /usr/local/bin/zarf -chmod +x /usr/local/bin/zarf - -# check -zarf version -``` - -### Kubectl - -Internet Access: - -```bash -apt install kubectl -``` - -Isolated Network: - -```bash -# download and store on removable media -wget https://dl.k8s.io/release/v1.28.3/bin/linux/amd64/kubectl - -# upload from removable media and install -install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl - -# check -kubectl version -``` - -## Deploy Kubernetes Cluster - -The following commands are divided into three parts: download, create, and deploy. The main variation between "Internet Access" and "Isolated Network" is that, for an isolated network, users will execute the download and create steps outside the isolated network's environment, while the deploy step is done inside the isolated network. - -### Bootstrap k3d - -```git -# download -git clone https://github.com/zarf-dev/zarf-package-k3d-airgap.git -cd zarf-package-k3d-airgap - -# create -zarf package create --confirm - -zarf tools download-init - -cd metallb -zarf package create --confirm -``` - -```git -# deploy -cd ../ # if still in metallb folder -mkdir temp && cd temp -zarf package deploy --set enable_traefik=false --set enable_service_lb=true --set enable_metrics_server=false --set enable_gpus=false ../zarf-package-*.tar.zst - -cd ../ -zarf init --components git-server --confirm - -cd metallb -zarf package deploy --confirm zarf-package-*.tar.zst -``` - -Additional considerations are necessary for GPU deployments: - -```git -# deploy -cd ../ # if still in metallb folder -# temp folder to catch extra files generated during deploy -mkdir temp && cd temp -# largest difference is setting `enable_gpus` to `true` -zarf package deploy --set enable_traefik=false --set enable_service_lb=true --set enable_metrics_server=false --set enable_gpus=true ../zarf-package-*.tar.zst - -cd ../ -zarf init --components git-server --confirm - -cd metallb -zarf package deploy --confirm zarf-package-*.tar.zst -``` - -### UDS DUBBD - -```git -# download -git clone https://github.com/defenseunicorns/uds-package-dubbd.git -cd uds-package-dubbd/k3d/ - -# create -docker login registry1.dso.mil # account creation is required -zarf package create --confirm - -# deploy -zarf package deploy --confirm zarf-package-*.tar.zst -``` - -### Kyverno Configuration - -As of UDS DUBBD, v0.12+, a recently implemented Kyverno policy is causing certain LeapfrogAI pods to be restricted from execution. As we undergo refactoring efforts transitioning towards [Pepr](https://github.com/defenseunicorns/pepr), Kyverno's abstract replacement, the following guidelines outline the process for temporarily modifying the policy status from `Enforce` to `Audit`. - -```git -zarf tools kubectl patch clusterpolicy require-non-root-user --type='json' -p='[{"op": "replace", "path": "/spec/validationFailureAction", "value":"Audit"}]' -zarf tools kubectl patch clusterpolicy require-non-root-group --type='json' -p='[{"op": "replace", "path": "/spec/validationFailureAction", "value":"Audit"}]' -``` - -### GPU Support Test (Optional) - -The following support test is an optional addition for GPU deployments and helps confirm that the cluster's pods have access to expected GPU resources: - -```git -# download -git clone https://github.com/justinthelaw/gpu-support-test -cd leapfrogai-gpu-support-test - -# create -zarf package create --confirm - -# deploy -zarf package deploy zarf-package-*.tar.zst -# press "y" for prompt on deployment confirmation -# enter the number of GPU(s) that are expected to be available when prompted - -# clean-up -zarf package remove gpu-support-test -zarf tools registry prune --confirm -``` - -## Deploy LeapfrogAI - -### LeapfrogAI API - -```git -# download -git clone https://github.com/defenseunicorns/leapfrogai-api.git -cd leapfrogai-api/ - -# create -zarf package create --confirm - -# deploy -zarf package deploy zarf-package-*.zst --set ISTIO_ENABLED=true --set ISTIO_INJECTION=enabled --set ISTIO_GATEWAY=leapfrogai --components metallb-config --confirm -# if used without the `--confirm` flag, there are many prompted variables -# please read the variable descriptions in the zarf.yaml for more details -# after deploying the leapfrogai gateway, you may need to terminate the existing tenant gateway - -# configure, this will be removed in a future API release -zarf tools kubectl patch virtualservice leapfrogai -n leapfrogai --type='json' -p ' -[ - { - "op": "replace", - "path": "/spec", - "value": { - "gateways": [ - "istio-system/leapfrogai" - ], - "hosts": [ - "*" - ], - "http": [ - { - "match": [ - { - "uri": { - "prefix": "/leapfrogai-api/" - } - } - ], - "rewrite": { - "uri": "/" - }, - "route": [ - { - "destination": { - "host": "api", - "port": { - "number": 8080 - } - } - } - ] - }, - { - "match": [ - { - "uri": { - "prefix": "/openapi.json" - } - } - ], - "redirect": { - "uri": "/leapfrogai-api/openapi.json" - } - } - ] - } - } -]' -``` - -### Whisper Model (Optional) - -Deploy the Whisper Model for automatic speech recognition that transcribes speech to text. The Whisper Model backend is bundled with pre-packaged components, including Whisper-Base (limited to English language) and Faster-Whisper, which serves as the dedicated inferencing engine. - -```git -# download -git clone https://github.com/defenseunicorns/leapfrogai-backend-whisper.git -cd leapfrogai-backend-whisper - -# create -zarf package create --confirm - -# deploy -zarf package deploy zarf-package-*.tar.zst --confirm -``` - -Additional considerations are necessary for GPU deployments: - -The package deployment command is modified for GPU deployments: - -```git -# deploy -zarf package deploy zarf-package-*.tar.zst --set GPU_ENABLED=true --confirm -``` - -### LLaMA CPP Python - -This backend comes pre-packaged with [synthia-7b-v2.0.Q4_K_M](https://huggingface.co/TheBloke/SynthIA-7B-v2.0-GGUF#:~:text=v2.0.Q4_K_M.gguf-,Q4_K_M,-4), and `llama-cpp-python` as the inferencing engine and is primarily aimed at single user CPU deployments. - -```git -# download -git clone https://github.com/defenseunicorns/leapfrogai-backend-llama-cpp-python.git -cd leapfrogai-backend-llama-cpp-python - -# create -zarf package create --confirm -``` - -Additional considerations are necessary for GPU deployments: - -The package deployment command is modified for GPU deployments: - -```git -# deploy -zarf package deploy zarf-package-*.tar.zst --set GPU_ENABLED=true --confirm -``` - -### VLLM - -This backend comes pre-packaged with [synthia-7b-v2.0-awq](https://huggingface.co/TheBloke/SynthIA-7B-v2.0-AWQ), and `vllm` as the inferencing engine and is primarily aimed at multi-user GPU deployments. - -```git -# download -git clone https://github.com/defenseunicorns/leapfrogai-backend-vllm.git -cd leapfrogai-backend-vllm - -# create -zarf package create --confirm -``` - -Additional considerations are necessary for GPU deployments: - -The package deployment command is modified for GPU deployments: - -```git -# deploy -zarf package deploy zarf-package-*.tar.zst --set GPU_ENABLED=true --set REQUESTS_GPU=1 --set LIMITS_GPU=1 --set REQUESTS_CPU=0 --set LIMITS_CPU=0 --confirm -``` - -### LeapfrogAI UI (Optional) - -```git -# download -git clone https://github.com/defenseunicorns/leapfrogai-ui -cd leapfrogai-ui - -# create -zarf package create --confirm - -# deploy -cd leapfrogai-ui -zarf package deploy zarf-package-*.tar.zst --confirm -# if used without the `--confirm` flag, there are many prompted variables -# please read the variable descriptions in the zarf.yaml for more details -``` - -### Setup Ingress/Egress - -```git -k3d cluster edit zarf-k3d --port-add "443:30535@loadbalancer" -k3d cluster edit zarf-k3d --port-add "8080:30535@loadbalancer" - -# if the load balancer does not restart -k3d cluster start zarf-k3d -``` - -## LeapfrogAI UI and API - -- Navigate to `https://localhost:8080` to interact with LeapfrogAI UI. -- Navigate to `https://localhost:8080/leapfrogai-api/docs` to see usage details for the LeapfrogAI API. - -## Termination and Cleanup Procedures - -### Stop k3d Cluster - -Perform one of the following cleanup methods. The k3d command is the preferred method: - -```git -k3d cluster stop zarf-k3d -``` - -OR: - -```git -docker ps -# obtain the k3d cluster's container ID -docker stop -``` - -### Stop Zarf Registry - -```git -docker ps -# obtain the registry container ID -docker stop -``` - -### Cleanup - -Executing this command will remove all entities that are not associated with an active process. - -```git -docker system prune -a -f && docker volume prune -f -zarf tools clear-cache -rm -rf /tmp/zarf-* -``` - -## Troubleshooting - -The following outlines occasional deployment issues our teams have identified, which you may also encounter. - -### Cluster Connection - -**Issue:** After performing a restart or restarting the docker service, the cluster cannot be connected with. - -**Action:** - -```git -k3d cluster list -# verify that the cluster has `LOADBALANCER` set to true -# if not, try the following -k3d cluster stop zarf-k3d -k3d cluster start zarf-k3d -``` - -### Disk Pressure - -**Issue:** In certain scenarios, uploading multiple large AI models may lead to storage issues. To address this, there are several measures you can take to either optimize disk space usage or augment available space within a designated partition. - -**Action:** Remove unused files and storage. *Executing this command will remove all entities that are not associated with an active process.* Execute the following command sets to eliminate dangling or extraneous items. Additionally, consider deleting any previously deployed Zarf Packages to free up storage space. - -```git -# prune images stored in the local registry -zarf tools registry prune --confirm -# prune docker images, press "y" to confirm -docker image prune -# prune volumes, press "y" to confirm -docker volume prune -# clear zarf cache and temp files -zarf tools clear-cache -rm -rf /tmp/zarf-* -``` - -OR: - -**Action:** - -Check your disk's or mount's remaining space and utilization. - -```git -df -h -``` - -Go to the disk or mount in question, and check on the following paths: - -```git -ls -la /tmp -ls -la /var/lib/docker -``` - -In addition to your present working directory, the above paths are commonly identified as potential sources of excessive space consumption. To resolve this issue, it may be necessary to conduct manual cleanup or allocate additional space for the disks or mounts associated with these paths. - -### GPU Acceleration - -**Issue:** GPU access for Docker containers or pods in the Kubernetes cluster. - -**Action:** Please navigate to and read the [`gpu-support-test`](https://github.com/justinthelaw/gpu-support-test) repository. diff --git a/website/content/en/docs/local deploy guide/quick_start.md b/website/content/en/docs/local deploy guide/quick_start.md deleted file mode 100644 index 2b94d21b9..000000000 --- a/website/content/en/docs/local deploy guide/quick_start.md +++ /dev/null @@ -1,152 +0,0 @@ ---- -title: Quick Start -type: docs -weight: 2 ---- - -# LeapfrogAI UDS Deployment - -The fastest and easiest way to get started with a deployment of LeapfrogAI is by using [UDS](https://github.com/defenseunicorns/uds-core). These quick start instructions show how to deploy LeapfrogAI in either a CPU or GPU-enabled environment. - -## System Requirements - -Please review the following table to ensure your system meets the minimum requirements. LFAI can be run with or without GPU-access, but GPU-enabled systems are recommended due to the performance gains. The following assumes a single personal device: - -| | Minimum | Recommended (Performance) | -|-----|-------------------|---------------------------| -| RAM | 32 GB | 128 GB | -| CPU | 8 Cores @ 3.0 GHz | 32 Cores @ 3.0 GHz | -| GPU | N/A | 2x NVIDIA RTX 4090 GPUs | - -Additionally, please check the list of tested [operating systems](https://docs.leapfrog.ai/docs/local-deploy-guide/requirements/#operating-systems) for compatibility. - -## Prerequisites - -- [Python 3.11](https://www.python.org/downloads/release/python-3116/) - - NOTE: Different model packages will require different Python libraries. The libraries required will be listed in the `dev` optional dependencies in each projects `pyproject.toml` file. -- [Docker](https://docs.docker.com/engine/install/) -- [K3D](https://k3d.io/) -- [Zarf](https://docs.zarf.dev/getting-started/install/) -- [UDS CLI](https://github.com/defenseunicorns/uds-cli) - -GPU considerations (NVIDIA GPUs only): - -- NVIDIA GPU must have the most up-to-date drivers installed. -- NVIDIA GPU drivers compatible with CUDA (>=12.2). -- NVIDIA Container Toolkit is available via internet access, pre-installed, or on a mirrored package repository in the air gap. - -## Default Models -LeapfrogAI deploys with certain default models. The following models were selected to balance portability and performance for a base deployment: - -| Backend | CPU/GPU Support | Default Model | -|------------------|-----------------|------------------------------------------------------------------------------| -| llama-cpp-python | CPU | [SynthIA-7B-v2.0-GGUF](https://huggingface.co/TheBloke/SynthIA-7B-v2.0-GGUF) | -| vllm | GPU | [Synthia-7B-v2.0-GPTQ](https://huggingface.co/TheBloke/SynthIA-7B-v2.0-GPTQ) | -| text-embeddings | CPU/GPU | [Instructor-XL](https://huggingface.co/hkunlp/instructor-xl) | -| whisper | CPU/GPU | [OpenAI whisper-base](https://huggingface.co/openai/whisper-base) | - -**NOTE:** If a user's system specifications exceed the minimum requirements, advanced users are able to swap out the default model choices with larger or fine-tuned models. - -## Disclaimers - -The default configuration when deploying with GPU support assumes a single GPU. `vllm` is assigned the GPU resource. GPU workloads **_WILL NOT_** run if GPU resources are unavailable to the pod(s). You must provide sufficient NVIDIA GPU scheduling or else the pod(s) will go into a crash loop. - -If you have additional GPU resources, set `gpu_limit: ` in `uds-config.yaml`. The total number of GPUs in your configuration should be less than or equal to the number of GPUs for your hardware. - -If `vllm` is being used with: - -- A quantized model, then `QUANTIZATION` must be set to the quantization method (e.g., `awq`, `gptq`, etc.) -- Tensor parallelism for spreading a model's heads across multiple GPUs, then `TENSOR_PARALLEL_SIZE` must be set to an integer value that: - a) falls within the number of GPU resources (`nvidia.com/gpu`) that are allocatable in the cluster - b) divisible by the number of attention heads in the model architecture (if number of heads is 32, then `TENSOR_PARALLEL_SIZE` could be 2, 4, etc.) - -These `vllm` specific environment variables must be set at the model skeleton level or when the model is deployed into the cluster. - -## Instructions - -Start by cloning the [LeapfrogAI Repository](https://github.com/defenseunicorns/leapfrogai.git): - -``` bash -git clone https://github.com/defenseunicorns/leapfrogai.git -``` - -### CPU - -From within the cloned repository, deploy K3D and the LeapfrogAI bundle: - -``` bash -make create-uds-cpu-cluster - -cd uds-bundles/latest/cpu/ -uds create . -uds deploy uds-bundle-leapfrogai-*.tar.zst --confirm -``` - -### GPU - -In order to test the GPU deployment locally on K3d, use the following command when deploying UDS-Core: - -```bash - make build-k3d-gpu # build the image - make create-uds-gpu-cluster # create a uds cluster equipped with the k3d-gpu image - make test-uds-gpu-cluster # deploy a test gpu pod to see if everything is working - - cd uds-bundles/latest/gpu/ - uds create . - uds deploy uds-bundle-leapfrogai-*.tar.zst --confirm -``` - -## Checking Deployment -Once the cluster and LFAI have deployed, the cluster and pods can be inspected using uds: - -```bash -uds zarf tools monitor -``` - -The following URLs should now also be available to view LFAI resources: - -**DISCLAIMER**: These URls will only be available *after* both K3D-core and LFAI have been deployed. They will also only be available on the host system that deployed the cluster. - -| Tool | URL | -| ---------- | ------------------------------------- | -| UI | | -| API | | - -## Accessing the UI - -LeapfrogAI is integrated with the UDS Core KeyCloak service, which provides authentication via SSO. Below are general instructions for accessing the LeapfrogAI UI after a successful UDS deployment of UDS Core and LeapfrogAI. - -1. Connect to the KeyCloak admin panel - a. Run the following to get a port-forwarded tunnel: `uds zarf connect keycloak` - b. Go to the resulting localhost URL and create an admin account -2. Go to ai.uds.dev and press "Login using SSO" -3. Register a new user by pressing "Register Here" -4. Fill-in all of the information - a. The bot detection requires you to scroll and click around in a natural way, so if the Register button is not activated despite correct information, try moving around the page until the bot detection says 100% verified -5. Using an authenticator, follow the MFA steps -6. Go to sso.uds.dev - a. Login using the admin account you created earlier -7. Approve the newly registered user - a. Click on the hamburger menu in the top left to open/close the sidebar - b. Go to the dropdown that likely says "Keycloak" and switch to the "uds" context - c. Click "Users" in the sidebar - d. Click on the newly registered user's username - e. Go to the "Email Verified" switch and toggle it to be "Yes" - f. Scroll to the bottom and press "Save" -8. Go back to ai.uds.dev and login as the registered user to access the UI - -## Clean-up - -To clean-up or perform a fresh install, run the following commands in the context in which you had previously installed UDS Core and LeapfrogAI: - -```bash -k3d cluster delete uds # kills a running uds cluster -uds zarf tools clear-cache # clears the Zarf tool cache -rm -rf ~/.uds-cache # clears the UDS cache -docker system prune -a -f # removes all hanging containers and images -docker volume prune -f # removes all hanging container volumes -``` - -## References - -- [UDS-Core](https://github.com/defenseunicorns/uds-core) diff --git a/website/content/en/docs/local deploy guide/requirements.md b/website/content/en/docs/local deploy guide/requirements.md deleted file mode 100644 index 279ac1403..000000000 --- a/website/content/en/docs/local deploy guide/requirements.md +++ /dev/null @@ -1,90 +0,0 @@ ---- -title: Requirements -type: docs -weight: 4 ---- - -Prior to deploying LeapfrogAI, ensure that the following tools, packages, and requirements are met and present in your environment. - -## Tested Environments - -The following operating systems, hardware, architectures, and system specifications have been tested and validated for our deployment instructions: - -### Operating Systems - -- Ubuntu LTS (jammy) - - 22.04.2 - - 22.04.3 - - 22.04.4 - - 22.04.5 -- Pop!_OS 22.04 LTS -- MacOS Sonoma 14.x / ARM64 (CPU-based deployments only) - -### Hardware - -- 64 CPU cores (`Unknown Compute via Virtual Machine`) and ~250 GB RAM, no GPU. -- 32 CPU cores (`AMD Ryzen Threadripper PRO 5955WX`) and ~250 GB RAM, 2x `NVIDIA RTX A4000` (16Gb vRAM each). -- 64 CPU cores (`Intel Xeon Platinum 8358 CPU`) and ~200Gb RAM, 1x `NVIDIA RTX A10` (16Gb vRAM each). -- 10 CPU cores (`Apple M1 Pro`) and ~32 GB of free RAM, 1x `Apple M1 Pro`. -- 32 CPU cores (`13th Gen Intel Core i9-13900KF`) and ~190GB RAM, 1x `NVIDIA RTX 4090` (24Gb vRAM each). -- 2x 128 CPU cores (`AMD EPYC 9004`) and ~1.4Tb RAM, 8x `NVIDIA H100` (80Gb vRAM each). -- 32 CPU cores (`13th Gen Intel Core i9-13900HX`) and ~64Gb RAM, 1x `NVIDIA RTX 4070` (8Gb vRAM each). - -### Architecure - -- Linux/AMD64 -- Linux/ARM64 - - Differentiated instructions will be provided for two scenarios: "Internet Access" and "Isolated Network": - -- **Internet Access:** - - Indicates a system capable of fetching and executing remote dependencies from the internet. -- **Isolated Network:** - - Indicates a system that is isolated and lacks connectivity to external networks or remote repositories. - - Note that "Isolated Network" instructions are also compatible with devices that have internet access. - - For all "Isolated Network" installs, `wget`, `git` `clone` and `zarf package create` commands are assumed to have been completed prior to entering the isolated network. - - For "Isolated Network" installs, ensure files and binaries from these commands are stored on a removable media device and subsequently uploaded to the isolated machine. - - For specific tool versions, it is recommended to follow the "Isolated Network" instructions. - -## System Requirements - -- Standard Unix-based operating system installed. - - Some commands may need to be modified depending on your CLI and package manager. -- Have root `sudo su` access. - - Rootless mode details can be found in the [Docker documentation](https://docs.docker.com/engine/security/rootless/). - -Additional considerations are necessary for GPU deployments: - -- NVIDIA GPU must have the most up-to-date drivers installed. -- NVIDIA GPU drivers compatible with CUDA (>=12.2). -- NVIDIA Container Toolkit is available via internet access, pre-installed, or on a mirrored package repository in the air gap. - -## GPU Deployments - -- The speed and quality of LeapfrogAI, along with its hosted AI models, are significantly influenced by the availability of a robust GPU for offloading model layers. -- By default, each backend is configured to request 1x GPU device. -- Presently, these instructions do not support time-slicing or configuring multi-instance GPU setups. -- Over-scheduling GPU resources beyond their availability may result in the crash of backend pods. -- To prevent crashing, install backends as CPU-only if all available GPU devices are already allocated. - -## Additional User Information - -- All `cd` commands should be executed with respect to your project's working directory (PWD) within the development environment. Each new step should be considered as initiating from the root of that directory. -- For optimal organization, we recommend creating a new PWD named `/leapfrogai` in your home directory and consolidating all components there. -- In cases where a tagged version of a LeapfrogAI or Defense Unicorns release is not desired, the option to build an image from source prior to executing `zarf package create` is available: - -``` bash -docker build -t "ghcr.io/defenseunicorns/leapfrogai/:" . -# find and replace any manifests referencing the image tag (e.g., zarf.yaml, zarf-config.yaml, etc.) -zarf package create zarf-package--*.tar.zst -``` - -- When building your Docker image from source, it is advisable to re-tag and push these images to a local registry container. This practice enhances the efficiency of zarf package creation. Below is an example of how to accomplish this using our whisper backend: - -``` bash -docker run -d -p 5000:5000 --restart=always --name registry registry:2 -docker build -t ghcr.io/defenseunicorns/leapfrogai/whisper:0.4.0 . -docker tag ghcr.io/defenseunicorns/leapfrogai/whisper:0.4.0 localhost:5000/defenseunicorns/leapfrogai/whisper:0.4.0 -docker push localhost:5000/defenseunicorns/leapfrogai/whisper:0.4.0 -zarf package create --registry-override ghcr.io=localhost:5000 --set IMG=defenseunicorns/leapfrogai/whisper:0.4.0 -``` diff --git a/website/content/en/docs/local-deploy-guide/_index.md b/website/content/en/docs/local-deploy-guide/_index.md new file mode 100644 index 000000000..e5d8a1b19 --- /dev/null +++ b/website/content/en/docs/local-deploy-guide/_index.md @@ -0,0 +1,9 @@ +--- +title: Local Deployment Guide +type: docs +weight: 1 +--- + +This documentation serves as a comprehensive guide for users, providing instructions on the system requirements, setup, and deployment of LeapfrogAI to their local environment. Please note that the specified order of steps must be followed to ensure an efficient deployment process. + +This guide should only be used for demonstration purposes and does not provide instructions for production deployments of LeapfrogAI. diff --git a/website/content/en/docs/local-deploy-guide/components.md b/website/content/en/docs/local-deploy-guide/components.md new file mode 100644 index 000000000..7bf1b6550 --- /dev/null +++ b/website/content/en/docs/local-deploy-guide/components.md @@ -0,0 +1,61 @@ +--- +title: Components +type: docs +weight: 4 +--- + +## Components + +### LeapfrogAI API + +LeapfrogAI offers an API closely aligned with OpenAI's, facilitating seamless compatibility for tools developed with OpenAI/ChatGPT to operate seamlessly with a LeapfrogAI backend. The LeapfrogAI API is a Python API that exposes LLM backends, via FastAPI and gRPC, in the OpenAI API specification. + +### Backend + +LeapfrogAI offers several backends for a variety of use cases: + +| Backend | AMD64 Support | ARM64 Support | Cuda Support | Docker Ready | K8s Ready | Zarf Ready | +| --- | --- | --- | --- | --- | --- | --- | +| [llama-cpp-python](https://github.com/defenseunicorns/leapfrogai/tree/main/packages/llama-cpp-python) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [whisper](https://github.com/defenseunicorns/leapfrogai/tree/main/packages/whisper) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [text-embeddings](https://github.com/defenseunicorns/leapfrogai/tree/main/packages/text-embeddings) | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | +| [vllm](https://github.com/defenseunicorns/leapfrogai/tree/main/packages/vllm) | ✅ | ❌ | ✅ | ✅ | ✅ | ✅ | + +### Flavors + +Each component has different images and values that refer to a specific image registry and/or hardening source. These images are packaged using [Zarf Flavors](https://docs.zarf.dev/ref/examples/package-flavors/): + +1. `upstream`: uses upstream vendor images from open source container registries and repositories +2. 🚧 `registry1`: uses [IronBank hardened images](https://repo1.dso.mil/dsop) from the Repo1 harbor registry +3. 🚧 `unicorn`: uses [Chainguard hardened images](https://www.chainguard.dev/chainguard-images) from the Chainguard registry + +### Artifact Support + +LeapfrogAI contains built-in embeddings for RAG and transcription / translation solutions that can handle many different file types. Many of these capabilities are accessible via the LeapfrogAI API. The support artifact types are as follows: + +#### Transcription / Translation + +- All formats supported by `ffmpeg -formats`, e.g., `.mp3`, `.wav`, `.mp4`, etc. + +#### Embeddings for RAG + +- `.pdf` +- `.txt` +- `.html` +- `.htm` +- `.csv` +- `.md` +- `.doc` +- `.docx` +- `.xlsx` +- `.xls` +- `.pptx` +- `.ppt` + +### Software Development Kit + +The LeapfrogAI SDK offers a standardized collection of Protobuf and Python utilities designed to facilitate the implementation of backends and gRPC. Please see the [LeapfrogAI SDK](https://github.com/defenseunicorns/leapfrogai/tree/main/src/leapfrogai_sdk) sub-directory for the source code and details. + +### User Interface + +LeapfrogAI offers user-friendly interfaces tailored for common use-cases, including chat, summarization, and transcription, providing accessible options for users to initiate these tasks. Please see the [LeapfrogAI UI](https://github.com/defenseunicorns/leapfrogai/tree/main/src/leapfrogai_ui) GitHub repository for additional information. diff --git a/website/content/en/docs/local-deploy-guide/dependencies.md b/website/content/en/docs/local-deploy-guide/dependencies.md new file mode 100644 index 000000000..8c6edf4a7 --- /dev/null +++ b/website/content/en/docs/local-deploy-guide/dependencies.md @@ -0,0 +1,99 @@ +--- +title: Dependencies +type: docs +weight: 2 +--- + +This documentation addresses the local deployment dependencies of LeapfrogAI, a self-hosted generative AI platform. LeapfrogAI extends the diverse capabilities and modalities of AI models to various environments, ranging from cloud-based deployments to servers with ingress and egress limitations. With LeapfrogAI, teams can deploy APIs aligned with OpenAI's API specifications, empowering teams to create and utilize tools compatible with nearly any model and code library available. Importantly, all operations take place locally, ensuring users can maintain the security of their information and sensitive data within their own environments + +Follow the outlined steps to ensure that your device is configured to execute LeapfrogAI workloads across local development scenarios. Please note that these instructions presume you have root access. + +### Host Dependencies + +Ensure that the following tools and packages are installed in your environment according to the instructions below: + +- [Git](https://git-scm.com/) +- [Docker](https://docs.docker.com/engine/install/) +- [K3D](https://k3d.io/) +- [UDS CLI](https://github.com/defenseunicorns/uds-cli) + +### Install Git + +- Download [Git](https://git-scm.com/downloads) and follow the instructions on the [Git documentation website](https://git-scm.com/book/en/v2/Getting-Started-Installing-Git) + +### Install Docker + +- Follow the [instructions](https://docs.docker.com/engine/install/) to install Docker onto your system. +- Systems using an NVIDIA GPU must also follow the [GPU instructions below](#gpu-specific-instructions) + +### Install Kubectl + +- Follow the [instructions](https://kubernetes.io/docs/tasks/tools/#kubectl) to install kubectl onto your system. + +### Install K3d + +- Follow the [instructions](https://k3d.io/) to install k3d onto your system. + +### Install UDS CLI + +- Follow the [instructions](https://github.com/defenseunicorns/uds-cli#install) to install UDS CLI onto your system. + +- As Homebrew does not install packages to the root directory, it is advisable to manually add the `uds` binary to the root +- In cases where Docker is installed in a rootless configuration, certain systems may encounter container access issues if Docker is not executed with root privileges +- To install `uds` as root, execute the following command in your terminal and ensure that the version number is replaced with the most recent [release](https://github.com/defenseunicorns/uds-cli/releases): + +```bash +# where $UDS_VERSION is the latest UDS CLI release +wget -O uds https://github.com/defenseunicorns/uds-cli/releases/download/$UDS_VERSION/uds-cli_$UDS_VERSION_Linux_amd64 && \ + sudo chmod +x uds && \ + sudo mv uds /usr/local/bin/ +``` + +## GPU Specific Instructions + +LeapfrogAI exclusively supports NVIDIA GPUs at this point in time. The following instructions are tailored for users utilizing an NVIDIA GPU. + +If you are experiencing issues even after carefully following the instructions below, please refer to the [Developer Documentation](https://github.com/defenseunicorns/leapfrogai/tree/main/docs/DEVELOPMENT.md) troubleshooting section in the GitHub repository. + +### NVIDIA Drivers + +- Ensure that the proper [NVIDIA drivers](https://www.nvidia.com/download/index.aspx) are installed (>=525.60). +- Follow the [driver download](https://www.nvidia.com/download/index.aspx) by identifying your hardware from the provided list. + +### CUDA Toolkit + +- Follow the [instructions](https://developer.nvidia.com/cuda-downloads) to download the CUDA toolkit (>=12.2x). This toolkit is only required on the system that is building the Zarf Packages. + +### NVIDIA Container Toolkit + +- [Read the pre-requisites for installation and follow the instructions](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#installing-with-apt) to download and install the NVIDIA container toolkit (>=1.14). +- After the successful installation off the toolkit, follow the [toolkit instructions](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html#configuring-docker) to verify that your default Docker runtime is configured for NVIDIA: + + ```bash + nvidia-ctk runtime configure --runtime=docker --config=$HOME/.config/docker/daemon.json + ``` + +- Verify that `nvidia` is now a runtime available to the Docker daemon to use: + + ```bash + # the expected output should be similar to: `Runtimes: io.containerd.runc.v2 nvidia runc` + docker info | grep -i nvidia + ``` + +- [Try out a sample CUDA workload](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/sample-workload.html) to ensure your Docker containers have access to the GPUs after configuration. +- (OPTIONAL) You can configure Docker to use the `nvidia` runtime by default by adding the `--set-as-default` flag during the container toolkit post-installation configuration step by running the following command: + + ```bash + nvidia-ctk runtime configure --runtime=docker --config=$HOME/.config/docker/daemon.json --set-as-default + ``` + +- (OPTIONAL) Verify that the default runtime is changed by running the following command: + + ```bash + # the expected output should be similar to: `Default Runtime: nvidia` + docker info | grep "Default Runtime" + ``` + +### Deploy LeapfrogAI + +- After ensuring that all system dependencies and requirements are fulfilled, refer to the Quick Start guide for comprehensive instructions on deploying LeapfrogAI within your local environment. diff --git a/website/content/en/docs/local-deploy-guide/quick_start.md b/website/content/en/docs/local-deploy-guide/quick_start.md new file mode 100644 index 000000000..cb65bd5ba --- /dev/null +++ b/website/content/en/docs/local-deploy-guide/quick_start.md @@ -0,0 +1,115 @@ +--- +title: Quick Start +type: docs +weight: 3 +--- + +# LeapfrogAI UDS Deployment + +The fastest and easiest way to get started with a deployment of LeapfrogAI is by using [UDS](https://github.com/defenseunicorns/uds-core). These quick start instructions show how to deploy LeapfrogAI in either a CPU or GPU-enabled environment. + +## Pre-Requisites + +See the [Dependencies](https://docs.leapfrog.ai/docs/local-deploy-guide/dependencies/) and [Requirements](https://docs.leapfrog.ai/docs/local-deploy-guide/requirements/) pages for more details. + +## Default Models + +LeapfrogAI deploys with certain default models. The following models were selected to balance portability and performance for a base deployment: + +| Backend | CPU/GPU Support | Default Model | +| ------------------ | ----------------- | ------------------------------------------------------------------------------ | +| llama-cpp-python | CPU | [SynthIA-7B-v2.0-GGUF](https://huggingface.co/TheBloke/SynthIA-7B-v2.0-GGUF) | +| vllm | GPU | [Synthia-7B-v2.0-GPTQ](https://huggingface.co/TheBloke/SynthIA-7B-v2.0-GPTQ) | +| text-embeddings | CPU/GPU | [Instructor-XL](https://huggingface.co/hkunlp/instructor-xl) | +| whisper | CPU/GPU | [OpenAI whisper-base](https://huggingface.co/openai/whisper-base) | + +If a user's system specifications exceed the [minimum requirements](https://docs.leapfrog.ai/docs/local-deploy-guide/requirements/), advanced users are able to swap out the default model choices with larger or fine-tuned models. + +Examples of other models to put into vLLM or LLaMA C++ Python that are not sponsored nor owned by Defense Unicorns include: + +- [defenseunicorns/Hermes-2-Pro-Mistral-7B-4bit-32g](https://huggingface.co/defenseunicorns/Hermes-2-Pro-Mistral-7B-4bit-32g) +- [hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4](https://huggingface.co/hugging-quants/Meta-Llama-3.1-70B-Instruct-AWQ-INT4) +- [justinthelaw/Phi-3-mini-128k-instruct-4bit-128g](https://huggingface.co/justinthelaw/Phi-3-mini-128k-instruct-4bit-128g) +- [NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO-GGUF](https://huggingface.co/NousResearch/Nous-Hermes-2-Mixtral-8x7B-DPO-GGUF) + +The default configuration when deploying with GPU support assumes a single GPU. `vllm` is assigned the GPU resource. GPU workloads **_WILL NOT_** run if GPU resources are unavailable to the pod(s). You must provide sufficient NVIDIA GPU scheduling or else the pod(s) will go into a crash loop. See the [Dependencies](https://docs.leapfrog.ai/docs/local-deploy-guide/dependencies/) and [Requirements](https://docs.leapfrog.ai/docs/local-deploy-guide/requirements/) pages for more details. + +## Building the UDS Bundle + +If you already have a pre-built UDS bundle, please skip to [Deploying the UDS Bundle](#deploying-the-uds-bundle) + +1. Start by cloning the [LeapfrogAI Repository](https://github.com/defenseunicorns/leapfrogai): + + ``` bash + git clone https://github.com/defenseunicorns/leapfrogai.git + ``` + +2. From within the cloned repository create the LeapfrogAI bundle using **ONE** of the following: + + ```bash + # For CPU-only + cd bundles/latest/cpu/ + uds create . + UDS_ARCH=amd64 uds deploy uds-bundle-leapfrogai-*.tar.zst --confirm + + # For compatible AMD64, NVIDIA CUDA-capable GPU machines + cd bundles/latest/gpu/ + uds create . + UDS_ARCH=amd64 uds deploy uds-bundle-leapfrogai-*.tar.zst --confirm + ``` + +## Deploying the UDS bundle + +1. Deploy a UDS Kubernetes cluster with **ONE** of the following: + + ```bash + make create-uds-cpu-cluster # if you have CPUs only + # OR + make create-uds-gpu-cluster # if you have GPUs (macOS not supported) + ``` + +2. Deploy the bundle you created in the [previous steps](#building-the-uds-bundle): + + ```bash + # make sure you ar ein the directory with the UDS bundle archive + uds deploy uds-bundle-leapfrogai*.tar.zst + ``` + +## Checking Deployment + +Once the cluster and LFAI have deployed, the cluster and pods can be inspected using uds: + +```bash +uds zarf tools monitor +``` + +These URLs will only be accessible *after* the UDS Kubernetes cluster and LeapfrogAI have been deployed: + +| Tool | URL | +| --------------------- | ------------------------------------- | +| LeapfrogAI UI | | +| LeapfrogAI API | | +| Supabase Console | | +| KeyCloak User Page | | +| KeyCloak Admin Panel | | + +## Clean-up + +To clean-up or perform a fresh install, run the following commands in the context in which you had previously installed UDS Core and LeapfrogAI: + +```bash +k3d cluster delete uds # kills a running uds cluster +uds zarf tools clear-cache # clears the Zarf tool cache +rm -rf ~/.uds-cache && rm -rf /tmp/zarf-* # clears the UDS and Zarf temporary files +docker system prune -a -f # removes all hanging containers and images +docker volume prune -f # removes all hanging container volumes +``` + +## References + +- [UDS Core](https://github.com/defenseunicorns/uds-core) +- [UDS CLI](https://github.com/defenseunicorns/uds-cli) + +## Further Tinkering + +For more LeapfrogAI customization options and developer-level documentation, please visit the [LeapfrogAI GitHub](https://github.com/defenseunicorns/leapfrogai) project for more details. diff --git a/website/content/en/docs/local-deploy-guide/requirements.md b/website/content/en/docs/local-deploy-guide/requirements.md new file mode 100644 index 000000000..5d599c0a2 --- /dev/null +++ b/website/content/en/docs/local-deploy-guide/requirements.md @@ -0,0 +1,51 @@ +--- +title: Requirements +type: docs +weight: 1 +--- + +Prior to deploying LeapfrogAI, ensure that the following tools, packages, and requirements are met and present in your environment. See the [Dependencies](https://docs.leapfrog.ai/docs/local-deploy-guide/dependencies/) page fro more details. + +## System Requirements + +Please review the following table to ensure your system meets the minimum requirements. GPU requirements only apply when your system is capable of deploying a GPU-accelerated version of the LeapfrogAI stack. + +| | Minimum (CPU) | Minimum (GPU) | Recommended (Performance) | +|------|--------------------|----------------------------|---------------------------| +| DISK | 256 GB | 256 GB | 1 TB | +| RAM | 32 GB | 32 GB | 128 GB | +| CPU | 8 Cores @ 3.0 GHz | 8 Cores @ 3.0 GHz | 32 Cores @ 3.0 GHz | +| GPU | N/A | 1x NVIDIA GPU @ 12 GB VRAM | 2x NVIDIA RTX 4090 GPUs | + +## Tested Environments + +The following is a non-exhaustive list of operating systems, hardware, architectures, and system specifications have been tested and validated for our deployment instructions: + +### Operating Systems + +- Ubuntu LTS + - 22.04.2 + - 22.04.3 + - 22.04.4 + - 22.04.5 +- Ubuntu + - 20.04.6 +- Pop!_OS LTS + - 22.04.x +- MacOS Sonoma / ARM64 (CPU-only) + - 14.x + +### Hardware + +- 64 CPU cores (`Unknown Compute via Virtual Machine`) and ~250 GB RAM, no GPU. +- 32 CPU cores (`AMD Ryzen Threadripper PRO 5955WX`) and ~250 GB RAM, 2x `NVIDIA RTX A4000` (16Gb vRAM each). +- 64 CPU cores (`Intel Xeon Platinum 8358 CPU`) and ~200Gb RAM, 1x `NVIDIA RTX A10` (16Gb vRAM each). +- 10 CPU cores (`Apple M1 Pro`) and ~32 GB of free RAM, 1x `Apple M1 Pro`. +- 32 CPU cores (`13th Gen Intel Core i9-13900KF`) and ~190GB RAM, 1x `NVIDIA RTX 4090` (24Gb vRAM each). +- 2x 128 CPU cores (`AMD EPYC 9004`) and ~1.4Tb RAM, 8x `NVIDIA H100` (80Gb vRAM each). +- 32 CPU cores (`13th Gen Intel Core i9-13900HX`) and ~64Gb RAM, 1x `NVIDIA RTX 4070` (8Gb vRAM each). + +### Architectures + +- Linux/AMD64 +- Linux/ARM64 diff --git a/website/content/en/docs/prod deployment/_index.md b/website/content/en/docs/production-guide/_index.md similarity index 66% rename from website/content/en/docs/prod deployment/_index.md rename to website/content/en/docs/production-guide/_index.md index eba9b1810..23263d101 100644 --- a/website/content/en/docs/prod deployment/_index.md +++ b/website/content/en/docs/production-guide/_index.md @@ -5,4 +5,4 @@ weight: 1 draft: true --- -## Overview +## 🚧 _**UNDER CONSTRUCTION**_ 🚧 diff --git a/website/hugo.toml b/website/hugo.toml index f17719b46..8534a153c 100644 --- a/website/hugo.toml +++ b/website/hugo.toml @@ -15,8 +15,8 @@ uglyURLs = false # project-relative or absolute and even a symbolic link. For other modules it must be # project-relative. # target -# Where it should be mounted into Hugo’s virtual filesystem. It must start with one of -# Hugo’s component folders: static, content, layouts, data, assets, i18n, or archetypes. +# Where it should be mounted into Hugo's virtual filesystem. It must start with one of +# Hugo's component folders: static, content, layouts, data, assets, i18n, or archetypes. # E.g. content/blog. # [[module.mounts]] @@ -49,9 +49,11 @@ proxy = "direct" archived_version = false copyright = "Defense Unicorns" - github_project_repo = "https://github.com/defenseunicorns/leapfrogai-docs" - github_repo = "https://github.com/defenseunicorns/leapfrogai-docs" - version = "v1.0.0" + github_project_repo = "https://github.com/defenseunicorns/leapfrogai" + github_repo = "https://github.com/defenseunicorns/leapfrogai" + # x-release-please-start-version + version = "v0.11.0" + # x-release-please-end # version_menu = "v1" # url_latest_version = "https://latest-version"