diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 00000000..16b40db3 --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1 @@ +* david.b.kinder@intel.com suyue.chen@intel.com feng.tian@intel.com malini.bhandaru@intel.com preethi.venkatesh@intel.com rachel.roumeliotis@intel.com tom.f.lenth@intel.com diff --git a/.gitignore b/.gitignore index e35d8850..e76724e0 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ _build +.vscode diff --git a/.known-issues/sphinx.conf b/.known-issues/sphinx.conf index d99cf8d9..2affba84 100644 --- a/.known-issues/sphinx.conf +++ b/.known-issues/sphinx.conf @@ -13,8 +13,14 @@ # ^.*WARNING: toctree contains reference to document .*that doesn't have a title: no link will be generated$ # ignore intradoc targets -#^.*WARNING: 'myst' cross-reference target not found: '[-a-zA-Z0-9]*' \[myst.xref_missing\]$ # let's ignore all missing targets for now ^.*WARNING: 'myst' cross-reference target not found: '[^\']*' \[myst.xref_missing\]$ -# need to ignore .md files not is a toc tree for now too +# +^.*WARNING: local id not found in doc .* \[myst.xref_missing\]$ +# ignore .md files not is a toc tree (used this temporarily) #^.*md: WARNING: document isn't included in any toctree$ +# +# Mermaid config options +^.*WARNING: 'mermaid': Unknown option keys: .*\[myst.directive_option\]$ +# Ignore unknown pygments lexer names +^.*WARNING: Pygments lexer name .* is not known$ diff --git a/README.md b/README.md index 3864c7c9..a9285060 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ **Mission**: Create an open platform project that enables the creation of open, multi-provider, robust, and composable GenAI solutions that harness the best innovation across the ecosystem. -OPEA sites within the Linux Foundation AI & Data Organization: +Here are useful OPEA sites within the Linux Foundation AI & Data Organization: * Website: [https://opea.dev](https://opea.dev) * X/Twitter: [https://twitter.com/opeadev](https://twitter.com/opeadev) @@ -12,11 +12,15 @@ OPEA sites within the Linux Foundation AI & Data Organization: The OPEA platform includes: -- Detailed framework of composable building blocks for state-of-the-art generative AI systems including LLMs, data stores, and prompt engines -- Architectural blueprints of retrieval-augmented generative AI component stack structure, and end-to-end workflows -- A four-step assessment for grading generative AI systems around performance, features, trustworthiness, and enterprise-grade readiness +- Composable building blocks for state-of-the-art generative AI systems + including LLMs, data stores, and prompt engines +- Architectural blueprints of retrieval-augmented generative AI end-to-end workflows +- A four-step assessment for grading generative AI systems around performance, + features, trustworthiness, and enterprise-grade readiness -Check out the [LF AI & Data Press Release](https://lfaidata.foundation/blog/2024/04/16/lf-ai-data-foundation-launches-open-platform-for-enterprise-ai-opea-for-groundbreaking-enterprise-ai-collaboration/) and [Intel's blog post](https://www.intel.com/content/www/us/en/developer/articles/news/introducing-the-open-platform-for-enterprise-ai.html). +Check out the +[LF AI & Data Press Release](https://lfaidata.foundation/blog/2024/04/16/lf-ai-data-foundation-launches-open-platform-for-enterprise-ai-opea-for-groundbreaking-enterprise-ai-collaboration/) and +[Intel's blog post](https://www.intel.com/content/www/us/en/developer/articles/news/introducing-the-open-platform-for-enterprise-ai.html). ## Technical Steering Committee diff --git a/codeowner.md b/codeowner.md deleted file mode 100644 index 5a34fba4..00000000 --- a/codeowner.md +++ /dev/null @@ -1,50 +0,0 @@ -# OPEA Project Code Owners - -These tables list the GitHub IDs of code owners. For a PR review, please contact the corresponding owner. -- [GenAIExamples](#genaiexamples) -- [GenAIComps](#genaicomps) -- [GenAIEval](#genaieval) -- [GenAIInfra](#genaiinfra) -- [CICD](#cicd) - -## GenAIExamples - -| examples | owner | -|:-------------:|:-----------:| -| AudioQnA | Spycsh | -| ChatQnA | lvliang-intel| -| CodeGen | lvliang-intel| -| CodeTrans | Spycsh | -| DocSum | Spycsh | -| SearchQnA | letonghan | -| Language Translation |letonghan| -| VisualQnA | lvliang-intel| -| Others | lvliang-intel| - -## GenAIComps - -| comps | owner | -|:-------------:|:-----------:| -|asr |Spycsh | -|cores |lvliang-intel| -|dataprep |XinyuYe-Intel| -|embedding |XuhuiRen | -|guardrails |letonghan | -|llms |lvliang-intel| -|reranks |XuhuiRen | -|retrievers |XuhuiRen | -|tts |Spycsh | - -## GenAIEval - -lvliang-intel, changwangss, lkk12014402 - -## GenAIInfra - -mkbhanda, irisdingbj, jfding, ftian1, yongfengdu - -## CICD - -chensuyue,daisy-ycguo, ashahba, preethivenkatesh - - diff --git a/community/CONTRIBUTING.md b/community/CONTRIBUTING.md index 0d64ac25..7c4ac39d 100644 --- a/community/CONTRIBUTING.md +++ b/community/CONTRIBUTING.md @@ -3,27 +3,6 @@ Thanks for considering contributing to OPEA project. The contribution process is similar with other open source projects on Github, involving an amount of open discussion in issues and feature requests between the maintainers, contributors and users. -## Table of Contents - - - -- [All The Ways to Contribute](#all-the-ways-to-contribute) - - [Community Discussions](#community-discussions) - - [Documentations](#documentations) - - [Reporting Issues](#reporting-issues) - - [Proposing New Features](#proposing-new-features) - - [Submitting Pull Requests](#submitting-pull-requests) - - [Create Pull Request](#create-pull-request) - - [Pull Request Checklist](#pull-request-checklist) - - [Pull Request Template](#pull-request-template) - - [Pull Request Acceptance Criteria](#pull-request-acceptance-criteria) - - [Pull Request Status Checks Overview](#pull-request-status-checks-overview) - - [Pull Request Review](#pull-request-review) -- [Support](#support) -- [Contributor Covenant Code of Conduct](#contributor-covenant-code-of-conduct) - - - ## All The Ways To Contribute ### Community Discussions @@ -57,7 +36,11 @@ It is not necessary for changes like: #### Step-by-Step guidelines -- Follow the [RFC Template](./rfc_template.md) to propose your idea. +- Follow this RFC Template to propose your idea (found in the docs repo community/rfcs/rfc_template.txt): + + ```{literalinclude} rfcs/rfc_template.txt + ``` + - Submit the proposal to the `Issues` page of the corresponding OPEA github repository. - Reach out to your RFC's assignee if you need any help with the RFC process. - Amend your proposal in response to reviewer's feedback. @@ -80,7 +63,11 @@ If you are new to GitHub, view the pull request [How To](https://help.github.com #### Pull Request Template -See [PR template](./pull_request_template.md) +When you submit a PR, you'll be presented with a PR template that looks +something like this: + +```{literalinclude} pull_request_template.txt +``` #### Pull Request Acceptance Criteria diff --git a/community/TSC.rst b/community/TSC.rst new file mode 100644 index 00000000..b8892dd5 --- /dev/null +++ b/community/TSC.rst @@ -0,0 +1,51 @@ +Technical Steering Committee (TSC) +################################## + +As defined in the :doc:`OPEA Charter `, the Technical Steering +Committee is responsible for all technical oversight of the OPEA project +including: + +* coordinating the technical direction of the OPEA project; +* approving project or system proposals +* creating committees or working groups (for example, an executive or + architectural committee or end-user advisory committee) to support the + OPEA project; +* appointing representatives to work with other open source or open standards + communities; +* establishing community norms, workflows, issuing releases, and security issue + reporting policies; +* approving and implementing policies and processes for contributing +* discussions, seeking consensus, and where necessary, voting on technical + matters relating to the code base that affect multiple sub-projects; +* coordinating any marketing, events, or communications regarding the OPEA project. + +Refer to the :doc:`OPEA Charter ` for more details. + +Technical Steering Committee Members +************************************ + +.. list-table:: TSC Members (as of September 6, 2024) + :header-rows: 1 + + * - TSC Member Name + - Member's Title and Company + * - `Malini Bhandaru `_ (Chair) + - Senior Principal Engineer, Intel + * - `Amr Abdelhalem `_ + - SVP, Head of Cloud Platforms, Fidelity + * - `Nathan Cartwright `_ + - Chief Architect - AI, CDW + * - `Justin Cormack `_ + - CTO, Docker + * - `Ke Ding `_ + - Senior Prinicipal AI Engineer, Intel + * - Steve Grubb + - Senior Principal Engineer, Red Hat + * - `Robert Hafner `_ + - Senior Principal Architect, Comcast + * - `Melissa Mckay `_ + - Head of Developer Relations, JFrog + * - `Logan Markewich `_ + - Founding Software Developer, LlamaIndex + * - `Nick Ni `_ (Interim) + - Senior Director Ai Product Management, AMD diff --git a/community/TSC_Gives_and_gets.md b/community/TSC_Gives_and_gets.md deleted file mode 100644 index 25f5b8a8..00000000 --- a/community/TSC_Gives_and_gets.md +++ /dev/null @@ -1,18 +0,0 @@ -# Technical Steering Committee (TSC) Gives & Gets - -## TSC Member Gives - -- Attend two 1-hour meetings a month -- Keep up with technical changes/suggestions in order to vote -- Facilitate contributions from your company and others -- Help resolve issues -- Promote the project via your company’s social media channels (optional) -- Attend OPEA events (optional) - -## TSC Member Gets - -- A vote and voice in steering the direction of the project - - Control of project scope and budget allocation -- Thought leadership -- Build your company’s reputation in GenAI technology space -- Collaborate/partner with other companies to build business opportunities diff --git a/community/codeowner.md b/community/codeowner.md new file mode 100644 index 00000000..0cede2b1 --- /dev/null +++ b/community/codeowner.md @@ -0,0 +1,28 @@ +# OPEA Project Code Owners + +These tables list the GitHub code owners, as found in the CODEOWNERS file in the +corresponding OPEA repository. Code owners are responsible for code and +documentation in a repository. They are automatically requested for +review when someone opens a pull request (PR) that modifies code or +documentation that they own. + +Select or contact the corresponding area owner for a PR review or questions +about content within a repository. + + +```{include} codeowners.txt +``` + +----- + +## GenAIInfra + +* mkbhanda, irisdingbj, jfding, ftian1, yongfengdu + +## Continuous Integration (CICD) owners + +CI/CD processing is defined and managed by these owners: + +* chensuyue, daisy-ycguo, ashahba, preethivenkatesh + + diff --git a/community/index.rst b/community/index.rst index cff9ad6f..da60a330 100644 --- a/community/index.rst +++ b/community/index.rst @@ -30,7 +30,7 @@ support systems: * **Source Code in GitHub**: OPEA Project source code is maintained on a public GitHub repository at https://github.com/opea-project. You'll find information about getting access to the repository and how to - contribute to the project in this `Contribution Guide`_ document. + contribute to the project in this :doc:`Contribution Guide `. * **Documentation**: Project technical documentation is developed along with the project's code, and can be found at @@ -43,9 +43,6 @@ support systems: * **Mailing List**: TBD -.. _Contribution Guide: https://opea-project.github.io/latest/community/CONTRIBUTING.html - - Contributing Guides ******************* @@ -54,11 +51,11 @@ Contributing Guides ../README CONTRIBUTING - ../codeowner + codeowner SECURITY ../developer-guides/doc_guidelines - ../developer-guides/graphviz ../developer-guides/docbuild + ../developer-guides/graphviz Roadmaps ******** @@ -77,18 +74,10 @@ Project Governance :maxdepth: 1 charter + TSC CODE_OF_CONDUCT SECURITY -Technical Steering Committee -**************************** - -.. toctree:: - :maxdepth: 1 - - TSC_Gives_and_gets - - RFC Proposals ************* diff --git a/community/pull_request_template.txt b/community/pull_request_template.txt index 9de5e89f..25b5629d 100644 --- a/community/pull_request_template.txt +++ b/community/pull_request_template.txt @@ -1,10 +1,9 @@ -# OPEA Pull Request Template - ## Description The summary of the proposed changes as long as the relevant motivation and context. ## Issues + List the issue or RFC link this PR is working on. If there is no such link, please mark it as `n/a`. ## Type of change @@ -14,6 +13,7 @@ List the type of change like below. Please delete options that are not relevant. - [ ] Bug fix (non-breaking change which fixes an issue) - [ ] New feature (non-breaking change which adds new functionality) - [ ] Breaking change (fix or feature that would break existing design and interface) +- [ ] Others (enhancement, documentation, validation, etc.) ## Dependencies @@ -21,5 +21,4 @@ List the newly introduced 3rd party dependency if exists. ## Tests -Describe the tests that you ran to verify your changes. Please list the relevant details for your test configuration and step-by-step reproduce instructioins. - +Describe the tests that you ran to verify your changes. diff --git a/community/rfcs/24-06-21-OPEA-001-Guardrails-Gateway.md b/community/rfcs/24-06-21-OPEA-001-Guardrails-Gateway.md new file mode 100644 index 00000000..f413b6b9 --- /dev/null +++ b/community/rfcs/24-06-21-OPEA-001-Guardrails-Gateway.md @@ -0,0 +1,176 @@ +## RFC Title + +Guardrails Gateway + +## RFC Content + +### Author + +[zhxie](https://github.com/zhxie), [Forrest-zhao](https://github.com/Forrest-zhao), [ruijin-intel](https://github.com/ruijin-intel) + +### Status + +Under Review + +### Objective + +Deploy opt-in guardrails in gateway on deployment environment. + +### Motivation + +- Reduce latency in network transmission and protocol encoding/decoding. +- Support stateful guardrails. +- Enhance Observability. +- Leverage OpenVINO for AI acceleration instructions including AVX, AVX512 and AMX. + +### Design Proposal + +#### Inference In Place + +The LangChain-like workflow is presented below. + +```mermaid +graph LR + Entry(Entry)-->Gateway + Gateway-->Embedding + Embedding-->Gateway + Gateway-->Retrieve + Retrieve-->Gateway + Gateway-->Rerank + Rerank-->Gateway + Gateway-->LLM + LLM-->Guardrails + Guardrails-->LLM + LLM-->Gateway +``` + +All services use RESTful API calling to communicate. There is overhead in network transmission and protocol encoding/decoding. Early studies have shown that each hop adds a 3ms of latency, which can be even longer when mTLS is turned on for security reason in inter-nodes deployment. + +The opt-in guardrails in gateway works in the architecture given below. + +```mermaid +graph LR + Entry(Entry)-->Gateway["Gateway\nGuardrails"] + Gateway-->Embedding + Embedding-->Gateway + Gateway-->Retrieve + Retrieve-->Gateway + Gateway-->Rerank + Rerank-->Gateway + Gateway-->LLM + LLM-->Gateway +``` + +The gateway can host multiple guardrails without extra network transmission or protocol encoding/decoding. In the real world deployment, there may be many guardrails in all perspectives, and the gateway is the best place to provide guardrails for the system. + +The gateway consists of 2 basic components, inference runtime and guardrails. + +```mermaid +graph TD + Gateway---Runtime[Inference Runtime API] + Runtime---OpenVINO + Runtime---PyTorch + Runtime---Others[...] + Gateway---Guardrails + Guardrails---Load[Load Model] + Guardrails---Inference + Guardrails---Access[Access Control] +``` + +A unified inference runtime API provides a general interface for inference runtimes. Any inference runtime can be integrated into the system including OpenVINO. The guardrails leverages the inferece runtime and decides if the request/reponse is valid. + +#### Stateful Guardrails + +The traditional workflow from ingress to egress is presented below. + +```mermaid +flowchart LR + Entry(Entry)-->GuardrailsA + GuardrailsA["Guardrails\nAnti-Jailbreaking"]-->Embedding + Embedding-->Retrieve + Retrieve-->Rerank + Rerank-->LLM + LLM-->GuardrailsB["Guardrails\nAnti-Profanity"] +``` + +Guardrails service provides certain protection for LLM, such as anti-jailbreaking, anti-poisoning for the input side, anti-toxicity, factuality check for the output side, and PII detection for both input and output side. + +Guardrails can also be spliited into 2 types, stateless and stateful. Guardrails including anti-jailbreaking, anti-toxicity and PII detection are considered as stateless guards, since they do not rely on both prompt input and response output, while anti-hallucination is regarded as a stateful guard, it needs both input and ouput for the relativity between. + +[Guardrails Microservice](https://github.com/xuechendi/GenAIComps/tree/pii_detection/comps/guardrails) provides certain guardrails as microservice, but due to the limitation microservice, it is not able to track requests for responses, leading to difficulty in providing stateless guard ability. + +The opt-in guardrails in gateway works in the architecture given below. + +```mermaid +flowchart LR + Entry(Entry)-->GuardrailsA + subgraph Gateway + GuardrailsA["Guardrails\nAnti-Jailbreaking"]-->GuardrailsC + GuardrailsB-->GuardrailsC + end + GuardrailsC["Guardrails\nAnti-Hallucination"]-->Embedding + Embedding-->Retrieve + Retrieve-->Rerank + Rerank-->LLM + LLM-->GuardrailsB["Guardrails\nAnti-Profanity"] +``` + +As a alternative choice, the gateway will also provide guardrails ability, no matter stateful or stateless. + +#### Observability + +Envoy is the most popular proxy in cloud native, which contains out-of-box access log, stats and metrics, and can be integrated into observability platform including OpenTelemetry and Prometheus naturally. + +Guardrails in gateway will leverages these abilities about observability to meet potential regulartory and compliance needs. + +#### Multi-Services Deployment + +Let's say the embedding and LLM services are AI-powered and require guardrails protection. + +The opt-in gateway can be deployed as a gateway or sidecar services. + +```mermaid +graph LR + Entry(Entry)-->Embedding + subgraph SidecarA[Sidecar] + Embedding + end + Embedding-->Retrieve + Retrieve-->Rerank + Rerank-->LLM + subgraph SidecarB[Sidecar] + LLM + end +``` + +The gateway can also work with guardrails microservices. + +```mermaid +graph LR + Entry(Entry)-->GuardrailsC["Guardrails\nAnti-Hallucination"] + GuardrailsC["Guardrails\nAnti-Hallucination"]-->GuardrailsA["Guardrails\nAnti-Jailbreaking"] + GuardrailsA-->Embedding + Embedding-->Retrieve + Retrieve-->Rerank + Rerank-->GuardrailsB["Guardrails\nAnti-Jailbreaking"] + GuardrailsB-->LLM + LLM-->GuardrailsD["Guardrails\nAnti-Profanity"] + subgraph Gateway + GuardrailsD-->GuardrailsC + end +``` + +### Alternatives Considered + +[Guardrails Microservice](https://github.com/xuechendi/GenAIComps/tree/pii_detection/comps/guardrails): has provided certain guardrails, however it only supports stateless guardrails. + +### Compatibility + +N/A + +### Miscs + +- TODO + + - [ ] API definitions for meta service deployment and Kubernetes deployment + - [ ] Envoy inference framework and guardrails HTTP filter diff --git a/community/rfcs/24-08-07-OPEA-GenAIStudio.md b/community/rfcs/24-08-07-OPEA-GenAIStudio.md new file mode 100644 index 00000000..5465ab8b --- /dev/null +++ b/community/rfcs/24-08-07-OPEA-GenAIStudio.md @@ -0,0 +1,171 @@ +# 24-08-07 OPEA-001 OPEA GenAIStudio + +## Author + +[ongsoonee](https://github.com/OngSoonEe) +[chinyixiang](https://github.com/chinyixiang) + +## Status + +Under Review + +## RFC Content + +### Objective + +The purpose of this RFC is to propose the creation of GenAI Studio, a platform designed to facilitate the development of custom large language model (LLM) applications, leveraging insights from the playground experimentation phase. GenAI Studio will enable users to construct, evaluate, and benchmark their LLM applications through a user-friendly no-code/low-code interface. The platform also provide the capability to export the developed application as a ready-to-deploy package for immediate enterprise integration. This initiative aims to streamline the transition from concept to production, ensuring a seamless deployment process for day-0 enterprise applications. + +### Motivation + +This RFC outlines the creation of the Enterprise GenAI Assembly Framework, a streamlined platform for OPEA users. The framework's key goals include: +- Assembly and Configuration: Simplify the process of assembling and configuring GenAI components, such as GenAIComps, with an interactive interface for crafting functional applications. +- Benchmarking and Evaluation: Perform benchmarking and evaluation on the application for tuning and optimization, including use of [GenAIEval](https://github.com/opea-project/GenAIEval) facilities. +- Enterprise Deployment Package Creation: Provide tools to create ready-to-deploy Enterprise Packages, including integration of [GenAIInfra](https://github.com/opea-project/GenAIInfra). +The framework is designed to democratize development, evaluation, and deployment of GenAI applications for OPEA users, promoting innovation and operational efficiency in the enterprise AI landscape with OPEA. + +### Value Proposition +#### Current Approach +![Current Approach of GenAI Solution for enterprise](https://github.com/user-attachments/assets/adb10f29-b506-46d6-abd3-ed5f70049bee) + +Days/weeks before 1st working solution + +#### GenAI Studio Approach +![Proposed GenAIStudio Approach](https://github.com/user-attachments/assets/e0c59dd2-0ff5-4deb-9561-8cba4ab5defe) + +A Day-0 solution that offers users a foundational skeleton, allowing them to focus on business use-cases rather than building the basic framework. + +### Persona +OPEA is a framework designed to streamline the automation of enterprise processes through a series of microservices. The GenAI Studio enhances OPEA by enabling users to develop, deploy, and optimize AI-driven solutions. This scenario demonstrates how different personas—OPEA Developers, Enterprise Users (DevOps), and End Users—can leverage the GenAI Studio to build and deploy enterprise-ready solutions efficiently. + +Scenarios: +1. Developer Persona + - Objective: Develop and integrate GenAI Application for specific business use-case within OPEA microservice architecture. + - Use of the Studio: + - The OPEA Developer uses the GenAI Studio to create a GenAI model help enhances business use-case + - The Studio's advanced development tools allow the developer to fine-tune the model based on enterprise-specific data, ensuring optimal performance. + - After development, the Studio automatically generates a ready-to-use enterprise deployment package that includes all necessary components, such as configurations and resource management tools, ensuring seamless integration with the existing OPEA infrastructure. + - This package is designed to be easily deployable at the customer’s site, minimizing the need for additional configuration and setup. +2. Enterprise User Persona + - Objective: Optimize and deploy the GenAI application with OPEA microservices to meet specific enterprise needs. + - Use of the Studio: + - The enterprise user uses the GenAI Studio to test and optimize the deployment package generated. + - With the Studio’s benchmarking tools, they evaluate the AI model's performance from both inference and compute perspectives, ensuring it meets the enterprise's operational requirements. + - The Studio provides insights into resource allocation, helping DevOps fine-tune the deployment to achieve the best possible performance. Once optimized, the deployment package is easily launched, allowing the enterprise to immediately benefit from the AI enhancements. +3. End User Persona + - Objective: Implement and utilize the AI-enhanced OPEA solution for specific business tasks. + - Use of the Studio: + - The End User accesses the GenAI Studio to explore the ready-to-use deployment package provided by the DevOps team. + - The Studio offers tools to evaluate the solution's performance in real-world scenarios, ensuring it aligns with the business’s objectives. + - With minimal setup, the End User can deploy the AI-enhanced solution in their environment, automating complex workflows and optimizing resource usage to achieve business goals more efficiently. + +The Generative AI Studio empowers Developers, Enterprise User, and End Users to create, optimize, and deploy AI-driven enterprise solutions effortlessly. By providing tools to generate ready-to-use deployment packages and benchmark performance, the Studio ensures that AI solutions are not only powerful but also easy to deploy and maintain, making them highly effective for business applications. + +### Stragegy and Scope of Work + +Not reinventing the wheel - leverage existing work from OPEA, open-source and EasyData foundation works. +- GMC on configuration/deploy +- Langflow/flowise.ai for app configuration +- Suites of performance evaluation (VictoriaMetric, OpenTelemetry (Otel), Tempo, Loki, Grafana) +- Istio for workload management + +Scope of model development/optimization + +| Scope of Work | Status | +| --- | --- | +| Prompt engineering, RAG | In scope | +| Model Finetune | Stretch Goal | +| Model Pre-train | Out of Scope | + +### GenAI Studio High Level Architecture +![OPEA GenAI Studio Architecture](https://github.com/user-attachments/assets/fa55aeae-158b-4035-8325-25821c24a27f) + +### Design Proposal + +### Design Space +Providing a interactive user interface for user to build, configure, test and generate final deployment package. +User may utilize the yaml data file for creation and modification of studio project, as an alternate to GUI. + +#### Part 1: Application Build and Configuration +User to build GenAI application with configuration, such as +- model selection +- model parameter setting (temp, top-p, top-k, max response) +- system instruction + +Provides 2 mode of configuration +- Wizard Mode: User is guided through step-by-step process to create an application + - ![screenshot sample of wizard mode](https://github.com/user-attachments/assets/1c780be1-d6dc-47fb-8a23-5229392ab45b) +- Drag-n-drop Workflow Mode: Allow user to create their own flow from available components (leverage Flowise AI) + - Utilize Flowise AI - https://docs.flowiseai.com/ + - Note: Need further feasibility study on + - Ease of customization or adding new UI components + - Connectivity and integration to Mega Service (HelmChart, DockerCompose, GMC) + + + +#### Part 2: Benchmark and Evaluation +A. Inference Performance +- General Benchmarking + - GLUE/SuperGLUE + - GPQA + - SQuAD + - ImageNet + - MLPerf +- Halucination +- Vertical/Domain Specific Benchmarking (with ground truth) +- Finetuning – next phase + +B. Model Compute Performance +- token-per-sec (TPS) +- 1st token latency +- Throughput +- RAG performance + +C. Resource Monitoring - CPU utilization, memory utilization +![Diagram on resource monitoring architecture](https://github.com/user-attachments/assets/0fe9fed7-0979-4325-b242-fcd753b19f09) + +Enablement of components for compute performance evaluation +- VictoriaMetric: as metrics store for resource utilization +- OpenTelemetry (Otel): tracing probing mechanism +- Tempo: Trace store for OpenTelemetry +- Loki: log store for pod/Kubernetes +- Grafana: visualization of metrics, trace and logs +- Prometheus + +#### Part 3: Enterprise Deployment +Generate Enterprise Deployment Package base on the applicaiton with enterprise facilities features, including, +Features: +- Application UI +- user management (login, create, update, delete) +- Session management (e.g. Chat sessions) +- inference parameter setting (top-p, top-k, temperature) +- Vector Store +- token generation +- API access + +Applications: +- QnA +- AudioChat +- VisualChat +- Translation +- CodeGen +- CodeTrans +- Summarizer + +Deployment configuration - Sample UI +- OS +- Cloud/ OnPrem +- Cluster /single Machine +- Feature selection (API access, user management etc) +- Monitoring dashboard for Resource Management + +![GenAI Deployment Package Configuration](https://github.com/user-attachments/assets/8dd43bff-26a6-4c3e-a80c-127bccdff7f3) + +Generated Deployment Package generally contains the follow parts: +- Ansible playbooks - Ansible playbooks will be used to setup and initialize important services such as K8s, SQL Database, local image registry, etc. +- App UI codes +- App backend server codes +- Other OPEA microservice component images can be pulled from OPEA registry directly during setup. + + +### Compatibility +This RFC will require a feasibility study on tools to use for Part 1 Drag-n-Drop Workflow Mode design. Flowise AI is a good candidate but it needs to run as a separate service which will add to the complexity of the UI/UX design. \ No newline at end of file diff --git a/deploy/index.rst b/deploy/index.rst index 57d06027..1c966c1e 100644 --- a/deploy/index.rst +++ b/deploy/index.rst @@ -4,14 +4,11 @@ Deploying GenAI ############### GenAIInfra is the containerization and cloud native suite for OPEA, including -artifacts to deploy `GenAIExamples`_ in a cloud native way so enterprise users +artifacts to deploy :ref:`GenAIExamples` in a cloud native way so enterprise users can deploy to their own cloud. -As we're building this documentation site, for now, read more about what's -in the `GenAIInfra GitHub repository`_. - -.. _GenAIExamples: https://github.com/opea-project/GenAIExamples/blob/main/README.md -.. _GenAIInfra Github repository: https://github.com/opea-project/GenAIInfra/blob/main/README.md +We're building this documentation from content in the +:GenAIInfra_blob:`GenAIInfra` GitHub repository. .. toctree:: @@ -38,6 +35,7 @@ Authentication and Authorization :glob: /GenAIInfra/authN-authZ/* + /GenAIInfra/authN-authZ/*/* Helm Charts *********** diff --git a/developer-guides/doc_guidelines.rst b/developer-guides/doc_guidelines.rst index ef938734..dcaf0ff1 100644 --- a/developer-guides/doc_guidelines.rst +++ b/developer-guides/doc_guidelines.rst @@ -3,26 +3,34 @@ Documentation Guidelines ######################## -OPEA Project content is written using the `markdown`_ (``.md``) and `reStructuredText`_ markup -language (``.rst``) with Sphinx extensions, and processed -using Sphinx to create a formatted stand-alone website. Developers can +OPEA Project content is written using the `markdown`_ (``.md``) with `MyST extensions`_ and `reStructuredText`_ markup +language (``.rst``) with `Sphinx extensions`_, and processed +using `Sphinx`_ to create a formatted stand-alone website. Developers can view this content either in its raw form as ``.md`` and ``.rst`` markup files, or (with Sphinx installed) they can build the documentation using the Makefile (on Linux systems) to generate the HTML content. The HTML content can then be -viewed using a web browser. This same ``.md`` and ``.rst`` content is fed into the +viewed using a web browser. These ``.md`` and ``.rst`` files are maintained in +the project's GitHub repos and processed to create the `OPEA Project documentation`_ website. -You can read details about `reStructuredText`_ and about `Sphinx extensions`_ -from their respective websites. +.. note:: While GitHub supports viewing `.md` and `.rst` content with your browser on the + `github.com` site, markdown and reST extensions are not recognized there, so the + best viewing experience is through the `OPEA Project documentation`_ github.io + website. +You can read details about `reStructuredText`_ and `Sphinx extensions`_, and +`markdown`_ and `MyST extensions`_ from their respective websites. + +.. _MyST extensions: https://mystmd.org/guide/quickstart-myst-markdown .. _Sphinx extensions: https://www.sphinx-doc.org/en/stable/contents.html .. _reStructuredText: http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html +.. _Sphinx: https://www.sphinx-doc.org .. _Sphinx Inline Markup: https://www.sphinx-doc.org/en/master/usage/restructuredtext/roles.html .. _OPEA Project documentation: https://opea-project.github.io .. _markdown: https://docs.github.com/en/get-started/writing-on-github/getting-started-with-writing-and-formatting-on-github/basic-writing-and-formatting-syntax This document provides a quick reference for commonly used markdown and reST -with Sphinx-defined directives and roles used to create the documentation +with MyST and Sphinx-defined directives and roles used to create the documentation you're reading. Markdown vs. RestructuredText @@ -32,8 +40,8 @@ Both markdown and ReStructureText (reST) let you create individual documentation GitHub can render when viewing them in your browser on github.com. Markdown is popular because of it's familarity with developers and is the default markup language for StackOverflow, Reddit, GitHub, and others. ReStructuredText came -from the Python community for quite a while and became noticed outside that -community with the release of Sphinx. These days, reST is supported by GitHub +from the Python community in 2001 and became noticed outside that +community with the release of Sphinx in 2008. These days, reST is supported by GitHub and major projects use it for their documentation, including the Linux kernel, OpenCV and LLVM/Clang. @@ -44,7 +52,9 @@ mechanism, which leads to many different "flavors" of markdown. If you stick to the core and common markdown syntax (headings, paragraphs, lists, and such), using markdown is just fine. However, slipping in raw HTML to do formatting (such as centering) or using HTML for tables creates problems when publishing to the -https://opea-project.github.io site. +https://opea-project.github.io site. The MyST parser provides extensions to +markdown that integrated well with Sphinx, so we use this as a bridge for the +markdown content within the OPEA project. Within the OPEA documentation, we use both markdown and reST files for the @@ -64,8 +74,8 @@ files, along with other project related documents that are maintained in the ``docs`` repo. The root of the generated documentation starts with the ``docs/index.rst`` file that starts off the organizational structure that's shown as the left navigation in the generated HTML site at -https://opea-project.github.io. That ``index.rst`` file uses a toctree -directive to point to other documents that may include additional toctree +https://opea-project.github.io. That ``index.rst`` file uses a ``toctree`` +directive to point to other documents that may include additional ``toctree`` directives of their own, ultimately collecting all the content into an organizational structure you can navigate. @@ -126,7 +136,7 @@ Headings In markdown, headings are indicated as a line beginning with a ``#`` character, with additional ``#`` characters indicating a deeper heading level, e.g., ``#`` for H1 (title), ``##`` for H2 headings, ``###`` for H3 - headdings, and so on.) + headings, and so on.) * The ``#`` character for a heading must be the first character on the line, then a space, followed by the heading. For example:: @@ -141,10 +151,10 @@ Headings * There must be only one ``#`` H1 heading at the beginning of the document indicating the document's title. - * You must not skip heading levels on the way down in the document hierarchy, e.g., do not go from a H1 ``#`` to an - H3 ``###`` without an intervening H2 ``##``. You may skip heading levels - on the way back up, for example, from an H4 ``####`` back up to an H2 ``##`` - as appropriate. + * You must not skip heading levels on the way down in the document + hierarchy, e.g., do not go from a H1 ``#`` to an H3 ``###`` without an + intervening H2 ``##``. You may skip heading levels on the way back up, + for example, from an H4 ``####`` back up to an H2 ``##`` as appropriate. @@ -155,13 +165,22 @@ Content Highlighting Some common reST and markdown inline markup samples: * one asterisk: ``*text*`` for emphasis (*italics*), -* two asterisks: ``**text**`` for strong emphasis (**boldface**), and -* two back quotes: ````text```` for ``inline code`` samples. +* two asterisks: ``**text**`` for strong emphasis (**boldface**) + +.. tabs:: + + .. group-tab:: reST + + * two back quotes: ````text```` for ``inline code`` samples. -ReST rules for inline markup try to be forgiving to account for common -cases of using these marks. For example, using an asterisk to indicate -multiplication, such as ``2 * (x + y)`` will not be interpreted as an -unterminated italics section. + ReST rules for inline markup try to be forgiving to account for common + cases of using these marks. For example, using an asterisk to indicate + multiplication, such as ``2 * (x + y)`` will not be interpreted as an + unterminated italics section. + + .. group-tab:: markdown + + * one back quote: ```text``` for `inline code` samples. For inline markup, the characters between the beginning and ending characters must not start or end with a space, @@ -290,8 +309,9 @@ Tables There are a few ways to create tables, each with their limitations or quirks. `Grid tables `_ -offer the most capability for defining merged rows and columns, but are -hard to maintain:: +offer the most capability for defining merged rows and columns (where content +spans multiple rows or columns, but are hard to maintain because the grid +characters must be aligned throughout the table:: +------------------------+------------+----------+----------+ | Header row, column 1 | Header 2 | Header 3 | Header 4 | @@ -365,18 +385,28 @@ can use ``:widths: auto``. File Names and Commands *********************** -Sphinx extends reST by supporting additional inline markup elements (called -"roles") used to tag text with special meanings and enable output formatting. -(You can refer to the `Sphinx Inline Markup`_ documentation for the full -list). +.. tabs:: + + .. group-tab:: reST + + Sphinx extends reST by supporting additional inline markup elements (called + "roles") used to tag text with special meanings and enable output formatting. + (You can refer to the `Sphinx Inline Markup`_ documentation for the full + list). + + For example, there are roles for marking :file:`filenames` + (``:file:`name```) and command names such as :command:`make` + (``:command:`make```). You can also use the \`\`inline code\`\` + markup (double backticks) to indicate a ``filename``. -For example, there are roles for marking :file:`filenames` -(``:file:`name```) and command names such as :command:`make` -(``:command:`make```). You can also use the \`\`inline code\`\` -markup (double backticks) to indicate a ``filename``. + Don't use items within a single backtick, for example ```word```. Instead + use double backticks: ````word````. -Don't use items within a single backtick, for example ```word```. Instead -use double backticks: ````word````. + .. group-tab:: markdown + + MyST extends markdown by supporting additional inline markup elements + (called "roles") used to tag text with special meanings and enable output + formatting. Branch-Specific File Links ************************** @@ -396,8 +426,8 @@ creates a hyperlink to that file in the branch currently checked out. For example, a GitHub link to the reST file used to create this document can be generated -using ``:docs_file:`developer-guides/doc_guidelines```, which will -appear as :docs_file:`developer-guides/doc_guidelines.rst`, a link to +using ``:docs_blob:`developer-guides/doc_guidelines```, which will +appear as :docs_blob:`developer-guides/doc_guidelines.rst`, a link to the "blob" file in the GitHub repo as displayed by GitHub. There's also an ``:docs_raw:`developer-guides/doc_guidelines.rst``` role that will link to the "raw" uninterpreted file, @@ -406,78 +436,86 @@ to see the difference. If you don't want the whole path to the file name to appear in the text, you use the usual linking notation to define what link text -is shown, for example, ``:docs_file:`Guidelines ``` -would show up as simply :docs_file:`Guidelines `. +is shown, for example, ``:docs_blob:`Guidelines ``` +would show up as simply :docs_blob:`Guidelines `. .. _internal-linking: Internal Cross-Reference Linking ******************************** -Traditional ReST links are supported only within the current file using the -notation: +.. tabs:: -.. code-block:: rest + .. group-tab:: reST - refer to the `internal-linking`_ page + Traditional reST links are supported only within the current file using the + notation: -which renders as, + .. code-block:: rest - refer to the `internal-linking`_ page + refer to the `internal-linking`_ documentation -Note the use of a trailing underscore to indicate an outbound link. In this -example, the label was added immediately before a heading, so the text that's -displayed is the heading text itself. + which renders as, -With Sphinx, however, we can create link-references to any tagged text within -the project documentation. + refer to the `internal-linking`_ documentation -Target locations within documents are defined with a label directive: + Note the use of a trailing underscore indicates an **outbound link**. In this + example, the label was added immediately before a heading, so the text that's + displayed is the heading text itself. - .. code-block:: rst + With Sphinx, we can create link-references to any tagged text within + the project documentation. - .. _my label name: + Target locations within documents are defined with a label directive: -Note the leading underscore indicating an inbound link. The content -immediately following this label is the target for a ``:ref:`my label name``` -reference from anywhere within the documentation set. The label **must** be -added immediately before a heading so that there's a natural phrase to show -when referencing this label (for example, the heading text). + .. code-block:: rst -This directive is also used to define a label that's a reference to a URL: + .. _my label name: -.. code-block:: rest + Note the leading underscore indicating an **inbound link**. The content + immediately following this label is the target for a ``:ref:`my label name``` + reference from anywhere within the documentation set. The label **must** be + added immediately before a heading so that there's a natural phrase to show + when referencing this label (for example, the heading text). - .. _Hypervisor Wikipedia Page: - https://en.wikipedia.org/wiki/Hypervisor + This directive is also used to define a label that's a reference to a URL: -To enable easy cross-page linking within the site, each file should have a -reference label before its title so that it can be referenced from another -file. + .. code-block:: rest -.. note:: These reference labels must be unique across the whole site, so generic - names such as "samples" should be avoided. + .. _Hypervisor Wikipedia Page: + https://en.wikipedia.org/wiki/Hypervisor -For example, the top of this -document's ``.rst`` file is: + To enable easy cross-page linking within the site, each file should have a + reference label before its title so that it can be referenced from another + file. -.. code-block:: rst + .. note:: These reference labels must be unique across the whole site, so generic + names such as "samples" or "introduction" should be avoided. - .. _doc_guidelines: + For example, the top of this document's ``.rst`` file is: - Documentation Guidelines - ######################## + .. code-block:: rst -Other ``.rst`` documents can link to this document using the -``:ref:`doc_guidelines``` tag, and it will appear as :ref:`doc_guidelines`. -This type of internal cross-reference works across multiple files. The link -text is obtained from the document source, so if the title changes, the link -text will automatically update as well. + .. _doc_guidelines: + + Documentation Guidelines + ######################## + + Other ``.rst`` documents can link to this document using the + ``:ref:`doc_guidelines``` tag, and it will appear as :ref:`doc_guidelines`. + This type of internal cross-reference works across multiple files. The link + text is obtained from the document source, so if the title changes, the link + text will automatically update as well. + + There may be times when you'd like to change the link text that's shown in the + generated document. In this case, you can specify alternate text using + ``:ref:`alternate text ``` (renders as + :ref:`alternate text `). + + .. group-tab:: markdown + + TODO -There may be times when you'd like to change the link text that's shown in the -generated document. In this case, you can specify alternate text using -``:ref:`alternate text ``` (renders as -:ref:`alternate text `). Non-ASCII Characters @@ -497,6 +535,86 @@ special characters such as \™ and are defined in the file We've kept the substitutions list small but you can add others as needed by submitting a change to the ``substitutions.txt`` file. +Include Content from Other Files + +You can directly incorporate a document fragment from another file into your reST or +markdown content by using an ``include`` directive. + +.. important:: Be aware that references to content within the included content + are relative to the file doing the including. For example a relative reference + to an image must be correct from the point-of-view of the file doing the + inclusion, not the point-of-view of the included file. Also, the included + file must be appropriate in the current document's context at the point of + the directive. If an included document fragment contains section structure, + the title structure must match and be consistent in context. + +.. tabs:: + + .. group-tab:: reST + + In reST, you incorporate content from another file using an include + directive. Unless options are given, the included file is parsed in the + current document's context:: + + Here is some text in the reST document. + + .. include:: path/to/file + + And now we're back to the original document after the content in the + included file, as if that content were directly in the current file. + + You can use options to alter how the included file is processed: + + \:code\: language + The included content is treated as a ``code-block`` with ``language`` + highlighting. + + \:parser\: text + By default, the included content is parsed the same as the current + document (e.g., rst). This option specifies another parser such as + ``:parser: myst_parser.sphinx_`` if the included file is markdown. + + \:start-after\: text + Only the content after the first occurance of the specified ``text`` in + the external file will be included. + + \:end-before\: + Only the content before the first occurance of the specified ``text`` + in the external file will be included. + + These and other options described in the `docutils include directive `_ + documentation. + + .. group-tab:: markdown + + MyST directives can be used to incorporate content from another file + into the current document as if it were part of the current document:: + + ```{include} relativepath/to/file + ``` + + The ``relativepath/to/file`` can starts with a ``/`` to indicate a path + starting from the root of the document directory tree (not the root of + the underlying file system). You can reference files outside the + document tree root using ``../../`` syntax to get to the file. + + You can include an external file and show it as if it were a codeblock + by using the ``literalinclude`` directive:: + + ```{literalinclude} relativepath/to/file + ``` + + You can include reST content, interpreted as reST by using the + ``eval-rst`` directive an using the reST syntax and options for an + ``include`` directive, such as:: + + ```{eval-rst} + .. include:: path/to-file + :start-after: + :end-before: + ``` + + Code and Command Examples ************************* @@ -619,11 +737,20 @@ Code and Command Examples Images ****** +The image file name specified is relative to the document source file. We +recommend putting images into an ``images`` folder where the document source +is found. The usual image formats handled by a web browser are supported: +JPEG, PNG, GIF, and SVG. Keep the image size only as large as needed, +generally at least 500 px wide but no more than 1000 px, and no more than +250 KB unless a particularly large image is needed for clarity. + +You can also specify an URL to an image file if needed. + .. tabs:: .. group-tab:: reST - In reST, images are included in documentation by using an image directive:: + In reST, images are placed the document using an image directive:: .. image:: ../images/opea-horizontal-color-w200.png :align: center @@ -638,17 +765,10 @@ Images .. group-tab:: markdown - In markdown, images are included in documentation using this syntax:: +In markdown, images are placed in documentation using this syntax:: ![OPEA Logo](../images/opea-horizontal-color-w200.png) -The file name specified is relative to the document source file. We -recommend putting images into an ``images`` folder where the document source -is found. The usual image formats handled by a web browser are supported: -JPEG, PNG, GIF, and SVG. Keep the image size only as large as needed, -generally at least 500 px wide but no more than 1000 px, and no more than -250 KB unless a particularly large image is needed for clarity. - Tabs, Spaces, and Indenting @@ -696,9 +816,63 @@ or column span: Drawings ******** -In reST, we've included the ``graphviz`` Sphinx extension to enable you to use a -text description language to render drawings. For more information, see -:ref:`graphviz-examples`. +.. tabs:: + + .. group-tab:: reST + + In reST, we've included the ``graphviz`` Sphinx extension to enable that + text description language to render drawings. For more information, see + :ref:`graphviz-examples`. + + We'v ealso included an extension providing ``mermaid`` support that also enables + that text description language to render drawings using:: + + .. mermaid:: + + graph LR; + A--> B & C; + B--> A & C; + C--> A & B; + + This will be rendered into this graph drawing: + + .. mermaid:: + + graph LR; + A--> B & C; + B--> A & C; + C--> A & B; + + See the `Mermaid User Guide `_ for more + information. + + .. group-tab:: markdown + + In markdown, we've included the MyST ``mermaid`` extensions to enable that text + description language to render drawings using:: + + ```{mermaid} + graph LR; + A--> B & C & D; + B--> A & E; + C--> A & E; + D--> A & E; + E--> B & C & D; + ``` + + This will be rendered into this graph drawing: + + .. mermaid:: + + graph LR; + A--> B & C & D; + B--> A & E; + C--> A & E; + D--> A & E; + E--> B & C & D; + + See the `Mermaid User Guide `_ for more + information. Alternative Tabbed Content ************************** @@ -772,7 +946,7 @@ changes all tabs with the same name throughout the page. For example: In this latter case, we're using a ``.. group-tab::`` directive instead of a ``.. tab::`` directive. Under the hood, we're using the `sphinx-tabs `_ extension that's included -in the ACRN (requirements.txt) setup. Within a tab, you can have most +in the OPEA docs (requirements.txt) setup. Within a tab, you can have most any content *other than a heading* (code-blocks, ordered and unordered lists, pictures, paragraphs, and such). diff --git a/developer-guides/docbuild.rst b/developer-guides/docbuild.rst index 99b33c4f..6cd83ed6 100644 --- a/developer-guides/docbuild.rst +++ b/developer-guides/docbuild.rst @@ -69,6 +69,11 @@ publishing repo opea-project.github.io later in these steps. In the following steps, you'll clone the upstream repos You'll only need to do this once to set up the folder structure: +.. note:: These instructions are cloning the upstream OPEA project repositories + to do documentation generation. If you're doing OPEA development, you may + already have forked copies of these repos on your system. You may have to + adjust these instructions accordingly. + #. At a command prompt, create the working folder on your development computer and clone all the opea-project repos containing documentation: @@ -81,7 +86,10 @@ this once to set up the folder structure: done After this, you'll have ``origin`` for each cloned repo pointing to the - upstream project repos. + upstream project repos. (Again, if you're doing OPEA development, you may + have forked the upstream repos to your personal GitHub account and cloned + your personal repo locally as ``origin`` and set the OPEA project repos as the + ``upstream`` repo.) #. If you haven't done so already, be sure to configure git with your name and email address for the ``signed-off-by`` line in your commit messages: diff --git a/developer-guides/graphviz.rst b/developer-guides/graphviz.rst index 41f9bdf6..d3f3e2f9 100644 --- a/developer-guides/graphviz.rst +++ b/developer-guides/graphviz.rst @@ -18,7 +18,7 @@ itself, and included by using a Graphviz directive: .. graphviz:: images/boot-flow.dot :name: boot-flow-example :align: center - :caption: ACRN Hypervisor Boot Flow + :caption: Hypervisor Boot Flow where the boot-flow.dot file contains the drawing commands: @@ -29,7 +29,7 @@ and the generated output would appear as: .. graphviz:: images/boot-flow.dot :name: boot-flow-example :align: center - :caption: ACRN Hypervisor Boot Flow + :caption: Hypervisor Boot Flow Let's look at some more examples and then we'll get into more details about diff --git a/eval/index.rst b/eval/index.rst index 6b98ac10..174b6ebb 100644 --- a/eval/index.rst +++ b/eval/index.rst @@ -5,12 +5,8 @@ Evaluating GenAI GenAIEval provides evaluation, benchmark, scorecard, and targeting for performance on throughpuut and latency, accuracy on popular evaluation harnesses, safety, and hallucination. -As we're building this documentation site, for now, read more about what's -in the `GenAIEval GitHub repository`_. - -.. _GenAIExamples: https://github.com/opea-project/GenAIExamples/blob/main/README.md -.. _GenAIEval Github repository: https://github.com/opea-project/GenAIEval/blob/main/README.md - +We're building this documentation from content in the +:GenAIEval_blob:`GenAIEval` GitHub repository. .. toctree:: :maxdepth: 1 diff --git a/examples/ChatQnA/ChatQnA_Guide.rst b/examples/ChatQnA/ChatQnA_Guide.rst index fba274bd..6d2cabe5 100644 --- a/examples/ChatQnA/ChatQnA_Guide.rst +++ b/examples/ChatQnA/ChatQnA_Guide.rst @@ -3,74 +3,193 @@ ChatQnA Sample Guide #################### +.. note:: This guide is in its early development and is a work-in-progress with + placeholder content. + Introduction/Purpose -********************* +******************** + +TODO: Tom to provide. Overview/Intro ============== +Chatbots are a widely adopted use case for leveraging the powerful chat and +reasoning capabilities of large language models (LLMs). The ChatQnA example +provides the starting point for developers to begin working in the GenAI space. +Consider it the “hello world” of GenAI applications and can be leveraged for +solutions across wide enterprise verticals, both internally and externally. + Purpose ======= -AI Catalog Preview -================== +The ChatQnA example uses retrieval augmented generation (RAG) architecture, +which is quickly becoming the industry standard for chatbot development. It +combines the benefits of a knowledge base (via a vector store) and generative +models to reduce hallucinations, maintain up-to-date information, and leverage +domain-specific knowledge. + +RAG bridges the knowledge gap by dynamically fetching relevant information from +external sources, ensuring that responses generated remain factual and current. +The core of this architecture are vector databases, which are instrumental in +enabling efficient and semantic retrieval of information. These databases store +data as vectors, allowing RAG to swiftly access the most pertinent documents or +data points based on semantic similarity. + +Central to the RAG architecture is the use of a generative model, which is +responsible for generating responses to user queries. The generative model is +trained on a large corpus of customized and relevant text data and is capable of +generating human-like responses. Developers can easily swap out the generative +model or vector database with their own custom models or databases. This allows +developers to build chatbots that are tailored to their specific use cases and +requirements. By combining the generative model with the vector database, RAG +can provide accurate and contextually relevant responses specific to your users' +queries. + +The ChatQnA example is designed to be a simple, yet powerful, demonstration of +the RAG architecture. It is a great starting point for developers looking to +build chatbots that can provide accurate and up-to-date information to users. + +GMC is GenAI Microservices Connector. GMC facilitates sharing of services across +GenAI applications/pipelines, dynamic switching between models used in any stage +of a GenAI pipeline, for instance in the ChatQnA GenAI pipeline, it supports +changing the model used in the embedder, re-ranker, and/or the LLM. + +So one can use Upstream Vanilla Kubernetes or RHOCP, and one can use them with +and without GMC. GMC as indicated provides additional features. + +The ChatQnA provides several deployment options, including single-node +deployments on-premise or in a cloud environment using hardware such as Xeon +Scalable Processors, Gaudi servers, NVIDIA GPUs, and even on AI PCs. It also +supports Kubernetes deployments with and without the GenAI Management Console +(GMC), as well as cloud-native deployments using Red Hat OpenShift Container +Platform (RHOCP). + + +Preview +======= -(if applicable) +To get a preview of the ChatQnA example, visit the +`AI Explore site `_. The **ChatQnA Solution** +provides a basic chatbot while the **ChatQnA with Augmented Context** +allows you to upload your own files in order to quickly experiment with a RAG +solution to see how a developer supplied corpus can provide relevant and up to +date responses. -Key Implementation Details +Key Implementation Details ========================== -Tech overview -************* - -How it works +Embedding: + The process of transforming user queries into numerical representations called + embeddings. +Vector Database: + The storage and retrieval of relevant data points using vector databases. +RAG Architecture: + The use of the RAG architecture to combine knowledge bases and generative + models for development of chatbots with relevant and up to date query + responses. +Large Language Models (LLMs): + The training and utilization of LLMs for generating responses. +Deployment Options: + production ready deployment options for the ChatQnA + example, including single-node deployments and Kubernetes deployments. + +How It Works ============ +The ChatQnA Examples follows a basic flow of information in the chatbot system, +starting from the user input and going through the retrieve, re-ranker, and +generate components, ultimately resulting in the bot's output. + +.. figure:: /GenAIExamples/ChatQnA/assets/img/chatqna_architecture.png + :alt: ChatQnA Architecture Diagram + + This diagram illustrates the flow of information in the chatbot system, + starting from the user input and going through the retrieve, analyze, and + generate components, ultimately resulting in the bot's output. + +The architecture follows a series of steps to process user queries and generate responses: + +1. **Embedding**: The user query is first transformed into a numerical + representation called an embedding. This embedding captures the semantic + meaning of the query and allows for efficient comparison with other + embeddings. +#. **Vector Database**: The embedding is then used to search a vector database, + which stores relevant data points as vectors. The vector database enables + efficient and semantic retrieval of information based on the similarity + between the query embedding and the stored vectors. +#. **Re-ranker**: Uses a model to rank the retrieved data on their saliency. + The vector database retrieves the most relevant data + points based on the query embedding. These data points can include documents, + articles, or any other relevant information that can help generate accurate + responses. +#. **LLM**: The retrieved data points are then passed to large language models + (LLM) for further processing. LLMs are powerful generative models that have + been trained on a large corpus of text data. They can generate human-like + responses based on the input data. +#. **Generate Response**: The LLMs generate a response based on the input data + and the user query. This response is then returned to the user as the + chatbot's answer. + Expected Output =============== -Customization -============== - Validation Matrix and Prerequisites *********************************** +See :doc:`/GenAIExamples/supported_examples` + Architecture ************ -Need to include the architecture with microservices. Like the ones Xigui/Chun made and explain in a para or 2 on the highlights of the arch including Gateway, UI, mega service, how models are deployed and how the microservices use the deployment service. The architecture can be laid out as general as possible, maybe calling out “for e.g” on variable pieces. Will also be good to include a linw or 2 on what the overall use case is. For e.g. This chatqna is setup to assist in ansewering question on OPEA. The microservices are set up with RAG and llm pipeline to query on OPEA pdf documents +TODO: Includes microservice level graphics. -Microservice outline and diagram +TODO: Need to include the architecture with microservices. Like the ones +Xigui/Chun made and explain in a paragraph or 2 on the highlights of the arch +including Gateway, UI, mega service, how models are deployed and how the +microservices use the deployment service. The architecture can be laid out as +general as possible, maybe calling out “for e.g” on variable pieces. Will also +be good to include a line or 2 on what the overall use case is. For e.g. This +chatqna is setup to assist in answering question on OPEA. The microservices are +set up with RAG and LLM pipeline to query on OPEA PDF documents + +Microservice Outline and Diagram ================================ Deployment ********** -.. tabs:: - - .. tab:: Single Node deployment - (IDC or on prem metal: Xeon, Gaudi, AI PC, Nvidia?) +Single Node +=========== - .. tab:: Kubernetes +.. toctree:: + :maxdepth: 1 - K8S for Clusters. + deploy/xeon + deploy/gaudi + deploy/nvidia + deploy/AIPC - .. tab:: Cloud Deployment +Kubernetes +========== - AWS and Azure. +* Xeon & Gaudi with GMC +* Xeon & Gaudi without GMC +* Using Helm Charts - .. tab:: Managed Services Deployment +Cloud Native +============ - Such as.. +* Red Hat OpenShift Container Platform (RHOCP) Troubleshooting *************** -Monitoring +Monitoring ********** Evaluate performance and accuracy Summary and Next Steps -********************** \ No newline at end of file +********************** diff --git a/examples/ChatQnA/deploy/AIPC.rst b/examples/ChatQnA/deploy/AIPC.rst new file mode 100644 index 00000000..c1cde73e --- /dev/null +++ b/examples/ChatQnA/deploy/AIPC.rst @@ -0,0 +1,7 @@ +.. _ChatQnA_deploy_aiPC: + + +Single Node On-Prem Deployment: AI PC +##################################### + +TODO diff --git a/examples/ChatQnA/deploy/gaudi.rst b/examples/ChatQnA/deploy/gaudi.rst new file mode 100644 index 00000000..6ae1008f --- /dev/null +++ b/examples/ChatQnA/deploy/gaudi.rst @@ -0,0 +1,7 @@ +.. _ChatQnA_deploy_gaudi: + + +Single Node On-Prem Deployment: Gaudi Servers +############################################# + +TODO diff --git a/examples/ChatQnA/deploy/nvidia.rst b/examples/ChatQnA/deploy/nvidia.rst new file mode 100644 index 00000000..de6e94f2 --- /dev/null +++ b/examples/ChatQnA/deploy/nvidia.rst @@ -0,0 +1,7 @@ +.. _ChatQnA_deploy_nvidia: + + +Single Node On-Prem Deployment: NVIDIA GPUs +########################################### + +TODO diff --git a/examples/ChatQnA/deploy/xeon.rst b/examples/ChatQnA/deploy/xeon.rst new file mode 100644 index 00000000..54bc54ed --- /dev/null +++ b/examples/ChatQnA/deploy/xeon.rst @@ -0,0 +1,75 @@ +.. _ChatQnA_deploy_xeon: + + +Single Node On-Prem Deployment: XEON Scalable Processors +######################################################## + +e.g use case: +Should provide context for selecting between vLLM and TGI. + +.. tabs:: + + .. tab:: Deploy with Docker compose with vLLM + + TODO: The section must cover how the above said archi can be implemented + with vllm mode, or the serving model chosen. Show an Basic E2E end case + set up with 1 type of DB for e.g Redis based on what is already covered in + chatqna example( others can be called out or referenced to accordingly), + Show how to use one SOTA model, for llama3 and others with a sample + configuration. The use outcome must demonstrate on a real use case showing + both productivity and performance. For consistency, lets use the OPEA + documentation for RAG use cases + + Sample titles: + + 1. Overview + Talk a few lines of what is expected in this tutorial. Forer.g. Redis + db used and llama3 model run to showcase an e2e use case using OPEA and + vllm. + #. Pre-requisites + Includes cloning the repos, pulling the necessary containers if + available (UI, pipeline ect), setting the env variables like proxys, + getting access to model weights, get tokens on hf, lg etc. sanity + checks if needed. Etc. + #. Prepare (Building / Pulling) Docker images + a) This step will involve building/pulling ( maybe in future) relevant docker images with step-by-step process along with sanity check in the end + #) If customization is needed, we show 1 case of how to do it + + #. Use case setup + + This section will include how to get the data and other + dependencies needed, followed by all the micoservice envs ready. Use + this section to also talk about how to set other models if needed, how + to use other dbs etc + + #. Deploy chatqna use case based on the docker_compose + + This should cover the steps involved in starting the microservices + and megaservies, also explaining some key highlights of what’s covered + in the docker compose. Include sanity checks as needed. Each + microservice/megaservice start command along with what it does and the + expected output will be good to add + + #. Interacting with ChatQnA deployment. ( or navigating chatqna workflow) + + This section to cover how to use a different machine to interact and + validate the microservice and walk through how to navigate each + services. For e.g uploading local document for data prep and how to get + answers? Customer will be interested in getting the output for a query, + and a time also measure the quality of the model and the perf metrics( + Health and Statistics to also be covered). Please check if these + details can also be curled in the endpoints. Is uploading templates + available now?. Custom template is available today + + Show all the customization available and features + + #. Additional Capabilities (optional) + Use case specific features to call out + + #. Launch the UI service + Show steps how to launch the UI and a sample screenshot of query and output + + + .. tab:: Deploy with docker compose with TGI + + This section will be similar to vLLM. Should be worth trying to single source. diff --git a/examples/index.rst b/examples/index.rst index 87950648..d81b8d0b 100644 --- a/examples/index.rst +++ b/examples/index.rst @@ -8,11 +8,15 @@ testing, and scalability. All examples are fully compatible with Docker and Kubernetes, supporting a wide range of hardware platforms such as Gaudi, Xeon, and other hardwares. -As we're building this documentation site, for now, read more about these -Examples in the `GenAIExamples GitHub repository`_. +.. toctree:: + :maxdepth: 1 + + ChatQnA/ChatQnA_Guide -.. _GenAIExamples Github repository: https://github.com/opea-project/GenAIExamples/blob/main/README.md +---- +We're building this documentation from content in the +:GenAIExamples_blob:`GenAIExamples` GitHub repository. .. toctree:: :maxdepth: 1 @@ -117,6 +121,18 @@ FaqGen Application /GenAIExamples/FaqGen/*/*/*/* /GenAIExamples/FaqGen/*/*/*/*/* +Instruction Tuning Application +****************************** + +.. toctree:: + :maxdepth: 1 + :glob: + + /GenAIExamples/InstructionTuning/* + /GenAIExamples/InstructionTuning/*/* + /GenAIExamples/InstructionTuning/*/*/* + + ProductivitySuite Application ***************************** @@ -130,6 +146,18 @@ ProductivitySuite Application /GenAIExamples/ProductivitySuite/*/*/*/* /GenAIExamples/ProductivitySuite/*/*/*/*/* +Rerank Model Fine Tuning +************************ + +.. toctree:: + :maxdepth: 1 + :glob: + + /GenAIExamples/RerankFinetuning/* + /GenAIExamples/RerankFinetuning/*/* + /GenAIExamples/RerankFinetuning/*/*/* + + SearchQnA Application ********************* diff --git a/faq.md b/faq.md index 131aa41d..b5186d9a 100644 --- a/faq.md +++ b/faq.md @@ -1,53 +1,38 @@ # OPEA Frequently Asked Questions -## What is OPEA’s mission? -OPEA’s mission is to offer a validated enterprise-grade GenAI (Generative Artificial Intelligence) RAG reference implementation. This will simplify GenAI development and deployment, thereby accelerating time-to-market. +## What is OPEA's mission? +OPEA’s mission is to offer a validated enterprise-grade GenAI (Generative Artificial Intelligence) RAG reference implementation. This will simplify GenAI development and deployment, thereby accelerating time-to-market. ## What is OPEA? -The project currently consists of a technical conceptual framework that enables GenAI implementations to meet enterprise-grade requirements. The project offers a set of reference implementations for a wide range of enterprise use cases that can be used out-of-the-box. The project additionally offers a set of validation and compliance tools to ensure the reference implementations meet the needs outlined in the conceptual framework. This enables new reference implementations to be contributed and validated in an open manner. Partnering with the LF AI & Data places in the perfect spot for multi-partner development, evolution, and expansion. +The project currently consists of a technical conceptual framework that enables GenAI implementations to meet enterprise-grade requirements. The project offers a set of reference implementations for a wide range of enterprise use cases that can be used out-of-the-box. Additionally, the project provides a set of validation and compliance tools to ensure the reference implementations meet the needs outlined in the conceptual framework. This enables new reference implementations to be contributed and validated in an open manner. Partnering with the LF AI & Data places it in the perfect spot for multi-partner development, evolution, and expansion. ## What problems are faced by GenAI deployments within the enterprise? -Enterprises face a myriad of challenges in development and deployment of Gen AI. The development of new models, algorithms, fine tuning techniques, detecting and resolving bias and how to deploy large solutions at scale continues to evolve at a rapid pace. One of the biggest challenges enterprises come up against is a lack of standardized software tools and technologies from which to choose. Additionally, enterprises want the flexibility to innovate rapidly, extend the functionality to meet their business needs while ensuring the solution is secure and trustworthy. The lack of a framework that encompasses both proprietary and open solutions impedes enterprises from charting their destiny. This results in enormous investment of time and money impacting time-to-market advantage. OPEA answers the need for a multi-provider, ecosystem-supported framework that enables the evaluation, selection, customization, and trusted deployment of solutions that businesses can rely on. +Enterprises face a myriad of challenges in the development and deployment of GenAI. The development of new models, algorithms, fine-tuning techniques, detecting and resolving bias, and how to deploy large solutions at scale continues to evolve at a rapid pace. One of the biggest challenges enterprises come up against is a lack of standardized software tools and technologies from which to choose. Additionally, enterprises want the flexibility to innovate rapidly, extend functionality to meet their business needs while ensuring the solution is secure and trustworthy. The lack of a framework that encompasses both proprietary and open solutions impedes enterprises from charting their destiny. This results in an enormous investment of time and money, impacting the time-to-market advantage. OPEA answers the need for a multi-provider, ecosystem-supported framework that enables the evaluation, selection, customization, and trusted deployment of solutions that businesses can rely on. ## Why now? -The major adoption and deployment cycle of robust, secure, enterprise-grade Gen AI solutions across all industries is at its early stages. Enterprise-grade solutions will require collaboration in the open ecosystem. The time is now for the ecosystem to come together and accelerate GenAI deployments across enterprises by offering a standardized set of tools and technologies while supporting three key tenets – open, security, and scalability. This will require the ecosystem to work together to build reference implementations that are performant, trustworthy and enterprise-grade ready. +The major adoption and deployment cycle of robust, secure, enterprise-grade GenAI solutions across all industries is in its early stages. Enterprise-grade solutions will require collaboration in the open ecosystem. The time is now for the ecosystem to come together and accelerate GenAI deployments across enterprises by offering a standardized set of tools and technologies while supporting three key tenets – openness, security, and scalability. This will require the ecosystem to work together to build reference implementations that are performant, trustworthy, and enterprise-grade ready. ## How does it compare to other options for deploying Gen AI solutions within the enterprise? -There is not an alternative that brings the entire ecosystem together in a vendor neutral manner and delivers on the promise of open, security and scalability. This is our primary motivation for creating OPEA project. +There is no alternative that brings the entire ecosystem together in a vendor-neutral manner and delivers on the promise of openness, security, and scalability. This is our primary motivation for creating the OPEA project. ## Will OPEA reference implementations work with proprietary components? -Like any other open-source project, the community will determine which components are needed by the broader ecosystem. Enterprises can always extend OPEA project with other multi-vendor proprietary solutions to achieve their business goals. +Like any other open-source project, the community will determine which components are needed by the broader ecosystem. Enterprises can always extend the OPEA project with other multi-vendor proprietary solutions to achieve their business goals. ## What does OPEA acronym stand for? -Open Platform for Enterprise AI +Open Platform for Enterprise AI. ## How do I pronounce OPEA? -It is said ‘OH-PEA-AY' - -## What companies and open-source projects are part of OPEA? -AnyScale -Cloudera -DataStax -Domino Data Lab -HuggingFace -Intel -KX -MariaDB Foundation -MinIO -Qdrant -Red Hat -SAS -VMware by Broadcom -Yellowbrick Data -Zilliz +It is pronounced ‘OH-PEA-AY.’ + +## What initial companies and open-source projects joined OPEA? +AnyScale, Cloudera, DataStax, Domino Data Lab, HuggingFace, Intel, KX, MariaDB Foundation, MinIO, Qdrant, Red Hat, SAS, VMware by Broadcom, Yellowbrick Data, Zilliz. ## What is Intel contributing? -OPEA is to be defined jointly by several community partners, with a call for broad ecosystem contribution, under the well-established LF AI & Data Foundation. As a starting point, Intel has contributed a Technical Conceptual Framework that shows how to construct and optimize curated GenAI pipelines built for secure, turnkey enterprise deployment. At launch, Intel contributed several reference implementations on Intel hardware across Intel® Xeon® 5, Intel® Xeon® 6 and Intel® Gaudi® 2, which you can see in a Github repo here. Over time we intend to add to that contribution including a software infrastructure stack to enable fully containerized AI workload deployments as well as potentially implementations of those containerized workloads. +OPEA is to be defined jointly by several community partners, with a call for broad ecosystem contribution, under the well-established LF AI & Data Foundation. As a starting point, Intel has contributed a Technical Conceptual Framework that shows how to construct and optimize curated GenAI pipelines built for secure, turnkey enterprise deployment. At launch, Intel contributed several reference implementations on Intel hardware across Intel® Xeon® 5, Intel® Xeon® 6, and Intel® Gaudi® 2, which you can see in a GitHub repo here. Over time we intend to add to that contribution, including a software infrastructure stack to enable fully containerized AI workload deployments, as well as potentially implementations of those containerized workloads. -## When you say Technical Conceptual Framework, what components are included? -The models and modules can be part of an OPEA repository, or be published in a stable unobstructed repository (e.g., Hugging Face) and cleared for use by an OPEA assessment. These include: +## When you say Technical Conceptual Framework, what components are included? +The models and modules can be part of an OPEA repository or be published in a stable, unobstructed repository (e.g., Hugging Face) and cleared for use by an OPEA assessment. These include: -GenAI models – Large Language Models (LLMs), Large Vision Models (LVMs), multimodal models, etc. * Ingest/Data Processing * Embedding Models/Services * Indexing/Vector/Graph data stores @@ -68,17 +53,13 @@ There are different ways partners can contribute to this project: * Build the infrastructure to support OPEA projects ## Where can partners see the latest draft of the Conceptual Framework spec? -A version of the spec is available in the docs repo in this project +A version of the spec is available in the documentation (["docs"](https://github.com/opea-project/docs)) repository within this project. ## Is there a cost for joining? -There is no cost for anyone to join and contribute. +There is no cost for anyone to join and contribute to the OPEA project. -## Do I need to be Linux Foundation member to join? +## Do I need to be a Linux Foundation member to join? Anyone can join and contribute. You don’t need to be a Linux Foundation member. -## Where can I report a bug? -Vulnerability reports can be sent to info@opea.dev. - - - - +## Where can I report a bug or vulnerability? +Vulnerability reports and bug submissions can be sent to [info@opea.dev](mailto:info@opea.dev). \ No newline at end of file diff --git a/index.rst b/index.rst index 0d9f9c21..6472921f 100644 --- a/index.rst +++ b/index.rst @@ -4,11 +4,11 @@ OPEA Project Documentation ########################## Welcome to the OPEA Project (|version|) documentation published |today|. -OPEA streamlines implementation of enterprise-grade Generative AI by efficiently -integrating secure, performant, and cost-effective Generative AI workflows to business value. +OPEA streamlines the implementation of enterprise-grade Generative AI by efficiently +integrating secure, performant, and cost-effective Generative AI workflows into business processes. Source code for the OPEA Project is maintained in the -`OPEA Project GitHub repo`_. +`OPEA Project GitHub repository`_. .. comment The links in this grid display can't use :ref: because we're using raw html. There's a risk of broken links if referenced content is @@ -20,17 +20,23 @@ Source code for the OPEA Project is maintained in the

  • -

    What is OPEA

    +

    What is OPEA?

    -

    Overview, architecture, and features

    +

    Learn about the OPEA architecture, features, and benefits.

  • Getting Started

    -

    Getting started guide for running ChatQnA example -

    +

    Start building GenAI solutions or contribute to the community.

    +
  • +
  • + + +

    OPEA Community

    +
    +

    Join the OPEA community with your contributions and solution ideas.

  • @@ -42,24 +48,16 @@ Source code for the OPEA Project is maintained in the
  • -

    Deploy AI Solutions

    -
    -

    Select from several deployment strategies that best match your - enterprise needs.

    -
  • -
  • - - -

    OPEA Community

    +

    Deploy GenAI Solutions

    -

    Community Guides

    +

    Select from several deployment strategies that best match your enterprise needs.

  • - + -

    Release Notes

    +

    Browse GenAI Microservices

    -

    OPEA release notes archive

    +

    Use modular building blocks to build robust GenAI solutions.

  • @@ -79,4 +77,4 @@ Source code for the OPEA Project is maintained in the release_notes/index faq -.. _OPEA Project GitHub repo: https://github.com/opea-project +.. _OPEA Project GitHub repository: https://github.com/opea-project diff --git a/introduction/index.rst b/introduction/index.rst index 1eb0c84d..a9bab2ab 100644 --- a/introduction/index.rst +++ b/introduction/index.rst @@ -4,16 +4,85 @@ OPEA Overview ############# OPEA (Open Platform for Enterprise AI) is a framework that enables the creation -and evaluation of open, multi-provider, robust and composable GenAI solutions -that harness the best innovation across the ecosystem. - -OPEA is an ecosystem-wide program within the Linux Foundation Data & AI -framework that aims to accelerate enterprise adoption of GenAI end-to-end -solutions and realize business value. OPEA will simplify the implementation of -enterprise-grade composite GenAI solutions, including Retrieval Augmented -Generative AI (RAG). The platform is designed to facilitate efficient -integration of secure, performant, and cost-effective GenAI workflows into -business systems and manage its deployments. +and evaluation of open, multi-provider, robust, and composable generative AI +(GenAI) solutions. It harnesses the best innovations across the ecosystem while +keeping enterprise-level needs front and center. + +OPEA simplifies the implementation of enterprise-grade composite GenAI +solutions, starting with a focus on Retrieval Augmented Generative AI (RAG). +The platform is designed to facilitate efficient integration of secure, +performant, and cost-effective GenAI workflows into business systems and manage +its deployments, leading to quicker GenAI adoption and business value. + +The OPEA platform includes: + +* Detailed framework of composable microservices building blocks for + state-of-the-art GenAI systems including LLMs, data stores, and prompt engines + +* Architectural blueprints of retrieval-augmented GenAI component stack + structure and end-to-end workflows + +* Multiple micro- and megaservices to get your GenAI into production and + deployed + +* A four-step assessment for grading GenAI systems around performance, features, + trustworthiness and enterprise-grade readiness + +OPEA Project Architecture +************************* + +OPEA uses microservices to create high-quality GenAI applications for +enterprises, simplifying the scaling and deployment process for production. +These microservices leverage a service composer that assembles them into a +megaservice thereby creating real-world Enterprise AI applications. + +Microservices: Flexible and Scalable Architecture +================================================= + +The :ref:`GenAIComps` documentation describes +a suite of microservices. Each microservice is designed to perform a specific +function or task within the application architecture. By breaking down the +system into these smaller, self-contained services, microservices promote +modularity, flexibility, and scalability. This modular approach allows +developers to independently develop, deploy, and scale individual components of +the application, making it easier to maintain and evolve over time. All of the +microservices are containerized, allowing cloud native deployment. + +Megaservices: A Comprehensive Solution +====================================== + +Megaservices are higher-level architectural constructs composed of one or more +microservices. Unlike individual microservices, which focus on specific tasks or +functions, a megaservice orchestrates multiple microservices to deliver a +comprehensive solution. Megaservices encapsulate complex business logic and +workflow orchestration, coordinating the interactions between various +microservices to fulfill specific application requirements. This approach +enables the creation of modular yet integrated applications. You can find a +collection of use case-based applications in the :ref:`GenAIExamples` +documentation + +Gateways: Customized Access to Mega- and Microservices +====================================================== + +The Gateway serves as the interface for users to access a megaservice, providing +customized access based on user requirements. It acts as the entry point for +incoming requests, routing them to the appropriate microservices within the +megaservice architecture. + +Gateways support API definition, API versioning, rate limiting, and request +transformation, allowing for fine-grained control over how users interact with +the underlying Microservices. By abstracting the complexity of the underlying +infrastructure, Gateways provide a seamless and user-friendly experience for +interacting with the Megaservice. + +Next Step +********* + +Links to: + +* Getting Started Guide +* Get Involved with the OPEA Open Source Community +* Browse the OPEA wiki, mailing lists, and working groups: https://wiki.lfaidata.foundation/display/DL/OPEA+Home .. toctree:: :maxdepth: 1 diff --git a/microservices/index.rst b/microservices/index.rst index b1969f4d..941fa1a6 100644 --- a/microservices/index.rst +++ b/microservices/index.rst @@ -1,4 +1,5 @@ .. _GenAIComps: +.. _GenAIMicroservices: GenAI Microservices ################### @@ -6,13 +7,10 @@ GenAI Microservices GenAI microservices leverag a service composer to assemble a mega-service tailored for real-world Enterprise AI applications. All the microservices are containerized, allowing cloud native deployment. Checkout -how the microservices are used in `GenAIExamples`_. +how the microservices are used in :ref:`GenAIExamples`. -As we're building this documentation site, for now, read more about these -microservice components found in the `GenAIComps GitHub repository`_. - -.. _GenAIExamples: https://github.com/opea-project/GenAIExamples/blob/main/README.md -.. _GenAIComps Github repository: https://github.com/opea-project/GenAIComps/blob/main/README.md +We're building this microservices documentation from content in the +:GenAIComps_blob:`GenAIComps` GitHub repository. .. toctree:: diff --git a/release_notes/v0.9.md b/release_notes/v0.9.md index 51e65457..420782dc 100644 --- a/release_notes/v0.9.md +++ b/release_notes/v0.9.md @@ -6,7 +6,7 @@ - Initialize two Agent examples: AgentQnA and DocIndexRetriever - Support for authentication and authorization - Add Nginx Component to strengthen backend security - - Provid Toxicity Detection Microservice + - Provide Toxicity Detection Microservice - Support the experimental Fine-tuning microservice - Enhancement @@ -20,7 +20,6 @@ - Support Red Hat OpenShift Container Platform (RHOCP) - GenAI Microservices Connector (GMC) successfully tested on Nvidia GPUs - Add Kubernetes support for AudioQnA and VisualQnA examples - - Support the experimental Horizontal Pod Autoscaling (HPA) - OPEA Docker Hub: https://hub.docker.com/u/opea - GitHub IO: https://opea-project.github.io/latest/index.html diff --git a/scripts/checkmd.sh b/scripts/checkmd.sh new file mode 100755 index 00000000..0adaae6b --- /dev/null +++ b/scripts/checkmd.sh @@ -0,0 +1,13 @@ +#!/bin/bash +# Copyright (C) 2024 Intel Corporation. +# SPDX-License-Identifier: Apache-2.0 + +# Use pymarkdown to recursively scan all markdown files for problems +# Disable rules we don't care to check. If you find others that you'd like to +# ignore, simply add them to this list + +drules=line-length,no-bare-urls,no-multiple-blanks,blanks-around-fences,no-hard-tabs,blanks-around-headings +drules=$drules,fenced-code-language,no-duplicate-heading,no-emphasis-as-heading,no-trailing-spaces + +pymarkdown --disable-rules $drules scan -r . + diff --git a/scripts/codeowners_to_md.py b/scripts/codeowners_to_md.py new file mode 100755 index 00000000..8c681cff --- /dev/null +++ b/scripts/codeowners_to_md.py @@ -0,0 +1,72 @@ +#!/usr/bin/env python3 +# +# Copyright (c) 2024, Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +# Convert CODEOWNERS files provided as command line arguments into markdown with +# H2 heading titles followed by a table with path and owners + +import sys + +def parse_codeowners(file_path): + codeowners = [] + + with open(file_path, 'r') as file: + for line in file: + line = line.strip() + # Skip comments and empty lines + if not line or line.startswith('#'): + continue + + parts = line.split() + if len(parts) >= 2: + path = parts[0] + owners = ', '.join(parts[1:]) + codeowners.append((path, owners)) + + return codeowners + + +def convert_to_markdown_table(codeowners, file_name): + # ./.github/CODEOWNERS ./GenAIComps/.github/CODEOWNERS ./GenAIExamples/.github/CODEOWNERS + parts = file_name.split('/') + # if the repo name is missing, it's the docs repo. Also handle case when + # CODEOWNERS is in the root of the docs repo instead of in a .github directory. + repo=parts[1] + if (repo == '.github'): + repo="docs" + elif (repo == "CODEOWNERS"): + repo="docs" + + table = f"\n## {repo} Repository Code Owners\n\n" + table += "| Path | Owners |\n" + table += "|------|--------|\n" + + for path, owners in codeowners: + table += f"| `{path}` | {owners} |\n" + + return table + + +def main(): + if len(sys.argv) < 2: + print("Usage: python codeowners_to_md.py ...") + sys.exit(1) + + markdown_output = "" + + for file_path in sys.argv[1:]: + try: + codeowners = parse_codeowners(file_path) + markdown_table = convert_to_markdown_table(codeowners, file_path) + markdown_output += markdown_table + "\n" + except FileNotFoundError: + print(f"Error: File '{file_path}' not found.") + sys.exit(1) + + print(markdown_output) + + +if __name__ == "__main__": + main() + diff --git a/scripts/fix-github-md-refs.sh b/scripts/fix-github-md-refs.sh index 54cb59aa..7e21ccd0 100755 --- a/scripts/fix-github-md-refs.sh +++ b/scripts/fix-github-md-refs.sh @@ -1,28 +1,47 @@ #!/bin/bash +# Copyright (C) 2024 Intel Corporation. +# SPDX-License-Identifier: Apache-2.0 + # We'll post process the markdown files copied to the _build/rst folder to look # for hard references to the github.com markdown file, for example # (https://github.com/opea-project/.../blob/.../README.md) and make them # relative to the _build/rst directory structure where docs are being built # Work on the current directory or the directory passed as the first argument -# (as done in the makefile) +# (as done in the makefile). Normally is _build/rst cd ${1:-.} -files=`grep -ril --include="*.md" 'github.com/opea-project.*\/[^\)]*\.md'` +# look for markdown files containing a hard github.com/opea-project/... +# reference to a markdown file + +mdfiles=`grep -ril --include="*.md" 'github.com/opea-project.*\/[^\)]*\.md'` -# fix references to opea-project/blob/main/.../*.md to be to the repo name and -# subsequent path to the md file +# fix references to opea-project/tree/main/.../*.md or blob/.../*.md to be to the repo name and +# subsequent path to the md file \1 is repo \3 is file path -sed -i 's/https:\/\/github.com\/opea-project\/\([^\/]*\)\/\(blob\|tree\)\/main\/\([^)]*\.md\)/\/\1\/\3/g' $files +sed -i 's/(https:\/\/github.com\/opea-project\/\([^\/]*\)\/\(blob\|tree\)\/main\/\([^)]*\.md\)/(\/\1\/\3/g' $mdfiles -# links such as (docs/...) should change to (/...) since docs repo is the build root +# links such as (docs/...) should have the repo name removed since docs repo is the build root -sed -i 's/(\/docs\//(\//g' $files +sed -i 's/(\/docs\//(\//g' $mdfiles # links to a folder should instead be to the folder's README.md +# Not automating this for now since there are valid folder references # sed -i 's/\(\/[a-zA-z]*\))/\1\/README.md)/g' $files -# fix tagging on code blocks, sigh. +# fix tagging on code blocks, for myst parser (vs. GFM syntax) sed -i 's/^```mermaid/```{mermaid}/' `grep -ril --include="*.md" '\`\`\`mermaid'` + +# fix references to opea-project/blob/main/... to use the special role # :{repo}_raw:`{path to file}` +# alas, using sphinx roles doesn't work in markdown files, so leave them alone +# mdfiles=`grep -ril --include="*.md" '(https:\/\/github.com\/opea-project\/[^\/]*\/blob\/main\/[^\)]*)'` +# sed -i # 's/(https:\/\/github.com\/opea-project\/\([^\/]*\)\/blob\/main\/\([^)]*\)/(:\1_blob:`\2`/g' $mdfiles + +# find CODEOWNERS files and generate a rst table for each one found. This file +# will is included by the codeowners.md file during the doc build so we keep +# these lists up-to-date. + +cfiles=`find -name CODEOWNERS | sort` +scripts/codeowners_to_md.py $cfiles > community/codeowners.txt diff --git a/scripts/requirements.txt b/scripts/requirements.txt index c4ad1425..a186dff4 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -5,3 +5,4 @@ sphinx-tabs==3.4.5 myst-parser>=3.0 sphinx-md==0.0.3 sphinxcontrib-mermaid +pymarkdownlnt diff --git a/scripts/show-versions.py b/scripts/show-versions.py index 3c9f481f..714f7087 100755 --- a/scripts/show-versions.py +++ b/scripts/show-versions.py @@ -1,8 +1,7 @@ #!/usr/bin/env python3 # -# Copyright (c) 2018-2022, Intel Corporation -# -# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2018-2024, Intel Corporation +# SPDX-License-Identifier: Apache-2.0 # # Show installed versions of doc building tools (per requirements.txt) @@ -43,5 +42,5 @@ class color: rf.close() -# Print out the version of Doxygen (not installed via pip3) +# Print out the version of relevent packages not installed via pip # print (" " + "doxygen".ljust(25," ") + " version: " + subprocess.check_output(["doxygen", "-v"]).decode("utf-8")) diff --git a/sphinx/extensions/link_roles.py b/sphinx/extensions/link_roles.py index cd03b0d2..d11a5639 100644 --- a/sphinx/extensions/link_roles.py +++ b/sphinx/extensions/link_roles.py @@ -38,7 +38,7 @@ def setup(app): repos = ["GenAIComps", "GenAIEval", "GenAIExamples", "GenAIInfra", "Governance", "docs"] for r in repos: - app.add_role('{}_file'.format(r), autolink('{}{}/blob/{}/%s'.format(baseurl, r, rev))) + app.add_role('{}_blob'.format(r), autolink('{}{}/blob/{}/%s'.format(baseurl, r, rev))) app.add_role('{}_raw'.format(r), autolink('{}{}/raw/{}/%s'.format(baseurl, r, rev))) # The role just creates new nodes based on information in the