From ae6b8104aa9b2241440a4b093a7b5f6df377b239 Mon Sep 17 00:00:00 2001 From: Andrei Fajardo Date: Tue, 10 Dec 2024 12:43:11 -0500 Subject: [PATCH 1/3] rm --- .../.gitignore | 0 .../BUILD | 3 - .../Makefile | 17 - .../README.md | 1 - .../embeddings/huggingface_itrex/BUILD | 1 - .../embeddings/huggingface_itrex/__init__.py | 3 - .../embeddings/huggingface_itrex/base.py | 171 ------ .../pyproject.toml | 68 --- .../llama-index-embeddings-octoai/.gitignore | 155 ------ .../llama-index-embeddings-octoai/BUILD | 3 - .../llama-index-embeddings-octoai/Makefile | 17 - .../llama-index-embeddings-octoai/README.md | 36 -- .../llama_index/embeddings/octoai/BUILD | 1 - .../llama_index/embeddings/octoai/__init__.py | 3 - .../llama_index/embeddings/octoai/base.py | 74 --- .../llama_index/embeddings/octoai/utils.py | 33 -- .../pyproject.toml | 57 -- .../llms/llama-index-llms-solar/.gitignore | 153 ------ .../llms/llama-index-llms-solar/BUILD | 3 - .../llms/llama-index-llms-solar/Makefile | 17 - .../llms/llama-index-llms-solar/README.md | 1 - .../llama_index/llms/solar/BUILD | 1 - .../llama_index/llms/solar/__init__.py | 3 - .../llms/llama-index-llms-unify/.gitignore | 153 ------ .../llms/llama-index-llms-unify/BUILD | 3 - .../llms/llama-index-llms-unify/Makefile | 17 - .../llms/llama-index-llms-unify/README.md | 5 - .../llama_index/llms/unify/BUILD | 1 - .../llama_index/llms/unify/__init__.py | 4 - .../llama_index/llms/unify/base.py | 28 - .../llama-index-llms-unify/pyproject.toml | 57 -- .../llms/llama-index-llms-unify/tests/BUILD | 1 - .../llama-index-llms-unify/tests/__init__.py | 0 .../tests/test_llms_unify.py | 7 - .../.gitignore | 153 ------ .../llama-index-readers-azure-devops/BUILD | 3 - .../llama-index-readers-azure-devops/Makefile | 17 - .../README.md | 30 -- .../llama_index/readers/azure_devops/BUILD | 1 - .../readers/azure_devops/__init__.py | 4 - .../llama_index/readers/azure_devops/base.py | 182 ------- .../pyproject.toml | 57 -- .../tests/BUILD | 1 - .../tests/__init__.py | 0 .../tests/test_readers_azure_devops.py | 7 - .../llama-index-readers-clickhouse/.gitignore | 153 ------ .../llama-index-readers-clickhouse/BUILD | 3 - .../llama-index-readers-clickhouse/Makefile | 17 - .../llama-index-readers-clickhouse/README.md | 47 -- .../llama_index/readers/clickhouse/BUILD | 1 - .../readers/clickhouse/__init__.py | 7 - .../llama_index/readers/clickhouse/base.py | 165 ------ .../pyproject.toml | 63 --- .../tests/BUILD | 1 - .../tests/__init__.py | 0 .../tests/test_readers_clickhouse.py | 7 - .../.gitignore | 153 ------ .../llama-index-readers-feishu-wiki/BUILD | 3 - .../CHANGELOG.md | 5 - .../llama-index-readers-feishu-wiki/Makefile | 17 - .../llama-index-readers-feishu-wiki/README.md | 22 - .../llama_index/readers/feishu_wiki/BUILD | 1 - .../readers/feishu_wiki/__init__.py | 3 - .../llama_index/readers/feishu_wiki/base.py | 150 ------ .../pyproject.toml | 64 --- .../tests/BUILD | 1 - .../tests/__init__.py | 0 .../tests/test_readers_feishu_wiki.py | 7 - .../llama-index-readers-openapi/.gitignore | 153 ------ .../readers/llama-index-readers-openapi/BUILD | 3 - .../llama-index-readers-openapi/Makefile | 17 - .../llama-index-readers-openapi/README.md | 14 - .../llama_index/readers/openapi/BUILD | 1 - .../llama_index/readers/openapi/__init__.py | 3 - .../llama_index/readers/openapi/base.py | 84 --- .../pyproject.toml | 53 -- .../llama-index-readers-openapi/tests/BUILD | 1 - .../tests/__init__.py | 0 .../tests/test_readers_openapi.py | 7 - .../llama-index-readers-readme/.gitignore | 153 ------ .../readers/llama-index-readers-readme/BUILD | 7 - .../llama-index-readers-readme/Makefile | 17 - .../llama-index-readers-readme/README.md | 20 - .../llama_index/readers/readme/BUILD | 1 - .../llama_index/readers/readme/__init__.py | 3 - .../llama_index/readers/readme/base.py | 154 ------ .../llama-index-readers-readme/pyproject.toml | 56 -- .../requirements.txt | 2 - .../llama-index-readers-readme/tests/BUILD | 1 - .../tests/__init__.py | 0 .../tests/test_readers_readme.py | 7 - .../.gitignore | 153 ------ .../BUILD | 7 - .../CHANGELOG.md | 5 - .../Makefile | 17 - .../README.md | 22 - .../readers/snscrape_twitter/BUILD | 1 - .../readers/snscrape_twitter/__init__.py | 3 - .../readers/snscrape_twitter/base.py | 39 -- .../pyproject.toml | 64 --- .../requirements.txt | 1 - .../tests/BUILD | 1 - .../tests/__init__.py | 0 .../tests/test_readers_snscrape_twitter.py | 7 - .../.gitignore | 153 ------ .../.pre-commit-config.yaml | 6 - .../BUILD | 3 - .../Makefile | 17 - .../README.md | 44 -- .../readers/youtube_metadata/BUILD | 1 - .../readers/youtube_metadata/__init__.py | 6 - .../readers/youtube_metadata/base.py | 66 --- .../pants | 510 ------------------ .../pyproject.toml | 58 -- .../tests/BUILD | 5 - .../tests/__init__.py | 0 .../tests/test_readers_youtube_metadata.py | 12 - .../.gitignore | 153 ------ .../BUILD | 3 - .../CHANGELOG.md | 5 - .../Makefile | 17 - .../README.md | 25 - .../examples/passio_nutrition_ai.ipynb | 170 ------ .../tools/passio_nutrition_ai/BUILD | 1 - .../tools/passio_nutrition_ai/__init__.py | 7 - .../tools/passio_nutrition_ai/base.py | 144 ----- .../pyproject.toml | 63 --- .../tests/BUILD | 1 - .../tests/__init__.py | 0 .../tests/test_tools_nutrition_ai.py | 7 - .../.gitignore | 153 ------ .../BUILD | 3 - .../Makefile | 17 - .../README.md | 1 - .../llama_index/BUILD | 4 - .../llama_index/py.typed | 0 .../vector_stores/chatgpt_plugin/BUILD | 1 - .../vector_stores/chatgpt_plugin/__init__.py | 3 - .../vector_stores/chatgpt_plugin/base.py | 189 ------- .../pyproject.toml | 62 --- .../tests/BUILD | 1 - .../tests/__init__.py | 0 .../test_vector_stores_chatgpt_plugin.py | 7 - .../.gitignore | 153 ------ .../llama-index-vector-stores-metal/BUILD | 3 - .../llama-index-vector-stores-metal/Makefile | 17 - .../llama-index-vector-stores-metal/README.md | 1 - .../llama_index/BUILD | 4 - .../llama_index/py.typed | 0 .../llama_index/vector_stores/metal/BUILD | 1 - .../vector_stores/metal/__init__.py | 3 - .../llama_index/vector_stores/metal/base.py | 183 ------- .../pyproject.toml | 63 --- .../tests/BUILD | 1 - .../tests/__init__.py | 0 .../tests/test_vector_stores_metal.py | 7 - .../llama-index-packs-docugami-kg-rag/BUILD | 3 - .../README.md | 45 -- .../examples/BUILD | 1 - .../examples/example.py | 21 - .../llama_index/packs/docugami_kg_rag/BUILD | 1 - .../packs/docugami_kg_rag/__init__.py | 5 - .../llama_index/packs/docugami_kg_rag/base.py | 97 ---- .../packs/docugami_kg_rag/config/BUILD | 1 - .../packs/docugami_kg_rag/config/__init__.py | 50 -- .../packs/docugami_kg_rag/helpers/BUILD | 1 - .../packs/docugami_kg_rag/helpers/__init__.py | 0 .../helpers/fused_summary_retriever.py | 183 ------- .../packs/docugami_kg_rag/helpers/indexing.py | 189 ------- .../packs/docugami_kg_rag/helpers/prompts.py | 103 ---- .../packs/docugami_kg_rag/helpers/reports.py | 183 ------- .../docugami_kg_rag/helpers/retrieval.py | 126 ----- .../docugami_kg_rag/helpers/summaries.py | 113 ---- .../docugami_kg_rag/helpers/vector_store.py | 24 - .../pyproject.toml | 49 -- .../llama-index-packs-finchat/.gitignore | 153 ------ .../llama-index-packs-finchat/BUILD | 3 - .../llama-index-packs-finchat/Makefile | 17 - .../llama-index-packs-finchat/README.md | 61 --- .../llama-index-packs-finchat/examples/BUILD | 1 - .../examples/example.py | 37 -- .../llama_index/packs/finchat/BUILD | 1 - .../llama_index/packs/finchat/__init__.py | 3 - .../llama_index/packs/finchat/base.py | 375 ------------- .../llama-index-packs-finchat/pyproject.toml | 61 --- .../.gitignore | 153 ------ .../BUILD | 7 - .../CHANGELOG.md | 5 - .../Makefile | 17 - .../README.md | 58 -- .../packs/redis_ingestion_pipeline/BUILD | 1 - .../redis_ingestion_pipeline/__init__.py | 3 - .../packs/redis_ingestion_pipeline/base.py | 57 -- .../pyproject.toml | 66 --- .../requirements.txt | 1 - .../tests/BUILD | 1 - .../tests/__init__.py | 0 .../test_packs_redis_ingestion_pipeline.py | 7 - .../llama-index-packs-searchain/.gitignore | 153 ------ .../llama-index-packs-searchain/BUILD | 3 - .../llama-index-packs-searchain/Makefile | 17 - .../llama-index-packs-searchain/README.md | 44 -- .../examples/searchain.ipynb | 108 ---- .../llama_index/packs/searchain/BUILD | 1 - .../llama_index/packs/searchain/__init__.py | 4 - .../llama_index/packs/searchain/base.py | 244 --------- .../pyproject.toml | 60 --- .../llama-index-packs-searchain/tests/BUILD | 1 - .../tests/__init__.py | 0 .../tests/test_packs_searchain.py | 7 - .../llama-index-packs-subdoc-summary/BUILD | 3 - .../llama-index-packs-subdoc-summary/Makefile | 17 - .../README.md | 53 -- .../examples/subdoc-summary.ipynb | 357 ------------ .../llama_index/packs/subdoc_summary/BUILD | 1 - .../packs/subdoc_summary/__init__.py | 4 - .../llama_index/packs/subdoc_summary/base.py | 93 ---- .../pyproject.toml | 62 --- .../tests/__init__.py | 0 .../llama-index-packs-vanna/.gitignore | 153 ------ .../llama-index-packs-vanna/BUILD | 3 - .../llama-index-packs-vanna/CHANGELOG.md | 5 - .../llama-index-packs-vanna/Makefile | 17 - .../llama-index-packs-vanna/README.md | 55 -- .../examples/vanna.ipynb | 427 --------------- .../llama_index/packs/vanna/BUILD | 1 - .../llama_index/packs/vanna/__init__.py | 4 - .../llama_index/packs/vanna/base.py | 117 ---- .../llama-index-packs-vanna/pyproject.toml | 67 --- .../llama-index-packs-vanna/tests/BUILD | 3 - .../llama-index-packs-vanna/tests/__init__.py | 0 .../tests/test_packs_vanna.py | 7 - 232 files changed, 10247 deletions(-) delete mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/.gitignore delete mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/BUILD delete mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/Makefile delete mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/README.md delete mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/llama_index/embeddings/huggingface_itrex/BUILD delete mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/llama_index/embeddings/huggingface_itrex/__init__.py delete mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/llama_index/embeddings/huggingface_itrex/base.py delete mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/pyproject.toml delete mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-octoai/.gitignore delete mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-octoai/BUILD delete mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-octoai/Makefile delete mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-octoai/README.md delete mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-octoai/llama_index/embeddings/octoai/BUILD delete mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-octoai/llama_index/embeddings/octoai/__init__.py delete mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-octoai/llama_index/embeddings/octoai/base.py delete mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-octoai/llama_index/embeddings/octoai/utils.py delete mode 100644 llama-index-integrations/embeddings/llama-index-embeddings-octoai/pyproject.toml delete mode 100644 llama-index-integrations/llms/llama-index-llms-solar/.gitignore delete mode 100644 llama-index-integrations/llms/llama-index-llms-solar/BUILD delete mode 100644 llama-index-integrations/llms/llama-index-llms-solar/Makefile delete mode 100644 llama-index-integrations/llms/llama-index-llms-solar/README.md delete mode 100644 llama-index-integrations/llms/llama-index-llms-solar/llama_index/llms/solar/BUILD delete mode 100644 llama-index-integrations/llms/llama-index-llms-solar/llama_index/llms/solar/__init__.py delete mode 100644 llama-index-integrations/llms/llama-index-llms-unify/.gitignore delete mode 100644 llama-index-integrations/llms/llama-index-llms-unify/BUILD delete mode 100644 llama-index-integrations/llms/llama-index-llms-unify/Makefile delete mode 100644 llama-index-integrations/llms/llama-index-llms-unify/README.md delete mode 100644 llama-index-integrations/llms/llama-index-llms-unify/llama_index/llms/unify/BUILD delete mode 100644 llama-index-integrations/llms/llama-index-llms-unify/llama_index/llms/unify/__init__.py delete mode 100644 llama-index-integrations/llms/llama-index-llms-unify/llama_index/llms/unify/base.py delete mode 100644 llama-index-integrations/llms/llama-index-llms-unify/pyproject.toml delete mode 100644 llama-index-integrations/llms/llama-index-llms-unify/tests/BUILD delete mode 100644 llama-index-integrations/llms/llama-index-llms-unify/tests/__init__.py delete mode 100644 llama-index-integrations/llms/llama-index-llms-unify/tests/test_llms_unify.py delete mode 100644 llama-index-integrations/readers/llama-index-readers-azure-devops/.gitignore delete mode 100644 llama-index-integrations/readers/llama-index-readers-azure-devops/BUILD delete mode 100644 llama-index-integrations/readers/llama-index-readers-azure-devops/Makefile delete mode 100644 llama-index-integrations/readers/llama-index-readers-azure-devops/README.md delete mode 100644 llama-index-integrations/readers/llama-index-readers-azure-devops/llama_index/readers/azure_devops/BUILD delete mode 100644 llama-index-integrations/readers/llama-index-readers-azure-devops/llama_index/readers/azure_devops/__init__.py delete mode 100644 llama-index-integrations/readers/llama-index-readers-azure-devops/llama_index/readers/azure_devops/base.py delete mode 100644 llama-index-integrations/readers/llama-index-readers-azure-devops/pyproject.toml delete mode 100644 llama-index-integrations/readers/llama-index-readers-azure-devops/tests/BUILD delete mode 100644 llama-index-integrations/readers/llama-index-readers-azure-devops/tests/__init__.py delete mode 100644 llama-index-integrations/readers/llama-index-readers-azure-devops/tests/test_readers_azure_devops.py delete mode 100644 llama-index-integrations/readers/llama-index-readers-clickhouse/.gitignore delete mode 100644 llama-index-integrations/readers/llama-index-readers-clickhouse/BUILD delete mode 100644 llama-index-integrations/readers/llama-index-readers-clickhouse/Makefile delete mode 100644 llama-index-integrations/readers/llama-index-readers-clickhouse/README.md delete mode 100644 llama-index-integrations/readers/llama-index-readers-clickhouse/llama_index/readers/clickhouse/BUILD delete mode 100644 llama-index-integrations/readers/llama-index-readers-clickhouse/llama_index/readers/clickhouse/__init__.py delete mode 100644 llama-index-integrations/readers/llama-index-readers-clickhouse/llama_index/readers/clickhouse/base.py delete mode 100644 llama-index-integrations/readers/llama-index-readers-clickhouse/pyproject.toml delete mode 100644 llama-index-integrations/readers/llama-index-readers-clickhouse/tests/BUILD delete mode 100644 llama-index-integrations/readers/llama-index-readers-clickhouse/tests/__init__.py delete mode 100644 llama-index-integrations/readers/llama-index-readers-clickhouse/tests/test_readers_clickhouse.py delete mode 100644 llama-index-integrations/readers/llama-index-readers-feishu-wiki/.gitignore delete mode 100644 llama-index-integrations/readers/llama-index-readers-feishu-wiki/BUILD delete mode 100644 llama-index-integrations/readers/llama-index-readers-feishu-wiki/CHANGELOG.md delete mode 100644 llama-index-integrations/readers/llama-index-readers-feishu-wiki/Makefile delete mode 100644 llama-index-integrations/readers/llama-index-readers-feishu-wiki/README.md delete mode 100644 llama-index-integrations/readers/llama-index-readers-feishu-wiki/llama_index/readers/feishu_wiki/BUILD delete mode 100644 llama-index-integrations/readers/llama-index-readers-feishu-wiki/llama_index/readers/feishu_wiki/__init__.py delete mode 100644 llama-index-integrations/readers/llama-index-readers-feishu-wiki/llama_index/readers/feishu_wiki/base.py delete mode 100644 llama-index-integrations/readers/llama-index-readers-feishu-wiki/pyproject.toml delete mode 100644 llama-index-integrations/readers/llama-index-readers-feishu-wiki/tests/BUILD delete mode 100644 llama-index-integrations/readers/llama-index-readers-feishu-wiki/tests/__init__.py delete mode 100644 llama-index-integrations/readers/llama-index-readers-feishu-wiki/tests/test_readers_feishu_wiki.py delete mode 100644 llama-index-integrations/readers/llama-index-readers-openapi/.gitignore delete mode 100644 llama-index-integrations/readers/llama-index-readers-openapi/BUILD delete mode 100644 llama-index-integrations/readers/llama-index-readers-openapi/Makefile delete mode 100644 llama-index-integrations/readers/llama-index-readers-openapi/README.md delete mode 100644 llama-index-integrations/readers/llama-index-readers-openapi/llama_index/readers/openapi/BUILD delete mode 100644 llama-index-integrations/readers/llama-index-readers-openapi/llama_index/readers/openapi/__init__.py delete mode 100644 llama-index-integrations/readers/llama-index-readers-openapi/llama_index/readers/openapi/base.py delete mode 100644 llama-index-integrations/readers/llama-index-readers-openapi/pyproject.toml delete mode 100644 llama-index-integrations/readers/llama-index-readers-openapi/tests/BUILD delete mode 100644 llama-index-integrations/readers/llama-index-readers-openapi/tests/__init__.py delete mode 100644 llama-index-integrations/readers/llama-index-readers-openapi/tests/test_readers_openapi.py delete mode 100644 llama-index-integrations/readers/llama-index-readers-readme/.gitignore delete mode 100644 llama-index-integrations/readers/llama-index-readers-readme/BUILD delete mode 100644 llama-index-integrations/readers/llama-index-readers-readme/Makefile delete mode 100644 llama-index-integrations/readers/llama-index-readers-readme/README.md delete mode 100644 llama-index-integrations/readers/llama-index-readers-readme/llama_index/readers/readme/BUILD delete mode 100644 llama-index-integrations/readers/llama-index-readers-readme/llama_index/readers/readme/__init__.py delete mode 100644 llama-index-integrations/readers/llama-index-readers-readme/llama_index/readers/readme/base.py delete mode 100644 llama-index-integrations/readers/llama-index-readers-readme/pyproject.toml delete mode 100644 llama-index-integrations/readers/llama-index-readers-readme/requirements.txt delete mode 100644 llama-index-integrations/readers/llama-index-readers-readme/tests/BUILD delete mode 100644 llama-index-integrations/readers/llama-index-readers-readme/tests/__init__.py delete mode 100644 llama-index-integrations/readers/llama-index-readers-readme/tests/test_readers_readme.py delete mode 100644 llama-index-integrations/readers/llama-index-readers-snscrape-twitter/.gitignore delete mode 100644 llama-index-integrations/readers/llama-index-readers-snscrape-twitter/BUILD delete mode 100644 llama-index-integrations/readers/llama-index-readers-snscrape-twitter/CHANGELOG.md delete mode 100644 llama-index-integrations/readers/llama-index-readers-snscrape-twitter/Makefile delete mode 100644 llama-index-integrations/readers/llama-index-readers-snscrape-twitter/README.md delete mode 100644 llama-index-integrations/readers/llama-index-readers-snscrape-twitter/llama_index/readers/snscrape_twitter/BUILD delete mode 100644 llama-index-integrations/readers/llama-index-readers-snscrape-twitter/llama_index/readers/snscrape_twitter/__init__.py delete mode 100644 llama-index-integrations/readers/llama-index-readers-snscrape-twitter/llama_index/readers/snscrape_twitter/base.py delete mode 100644 llama-index-integrations/readers/llama-index-readers-snscrape-twitter/pyproject.toml delete mode 100644 llama-index-integrations/readers/llama-index-readers-snscrape-twitter/requirements.txt delete mode 100644 llama-index-integrations/readers/llama-index-readers-snscrape-twitter/tests/BUILD delete mode 100644 llama-index-integrations/readers/llama-index-readers-snscrape-twitter/tests/__init__.py delete mode 100644 llama-index-integrations/readers/llama-index-readers-snscrape-twitter/tests/test_readers_snscrape_twitter.py delete mode 100644 llama-index-integrations/readers/llama-index-readers-youtube-metadata/.gitignore delete mode 100644 llama-index-integrations/readers/llama-index-readers-youtube-metadata/.pre-commit-config.yaml delete mode 100644 llama-index-integrations/readers/llama-index-readers-youtube-metadata/BUILD delete mode 100644 llama-index-integrations/readers/llama-index-readers-youtube-metadata/Makefile delete mode 100644 llama-index-integrations/readers/llama-index-readers-youtube-metadata/README.md delete mode 100644 llama-index-integrations/readers/llama-index-readers-youtube-metadata/llama_index/readers/youtube_metadata/BUILD delete mode 100644 llama-index-integrations/readers/llama-index-readers-youtube-metadata/llama_index/readers/youtube_metadata/__init__.py delete mode 100644 llama-index-integrations/readers/llama-index-readers-youtube-metadata/llama_index/readers/youtube_metadata/base.py delete mode 100644 llama-index-integrations/readers/llama-index-readers-youtube-metadata/pants delete mode 100644 llama-index-integrations/readers/llama-index-readers-youtube-metadata/pyproject.toml delete mode 100644 llama-index-integrations/readers/llama-index-readers-youtube-metadata/tests/BUILD delete mode 100644 llama-index-integrations/readers/llama-index-readers-youtube-metadata/tests/__init__.py delete mode 100644 llama-index-integrations/readers/llama-index-readers-youtube-metadata/tests/test_readers_youtube_metadata.py delete mode 100644 llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/.gitignore delete mode 100644 llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/BUILD delete mode 100644 llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/CHANGELOG.md delete mode 100644 llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/Makefile delete mode 100644 llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/README.md delete mode 100644 llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/examples/passio_nutrition_ai.ipynb delete mode 100644 llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/llama_index/tools/passio_nutrition_ai/BUILD delete mode 100644 llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/llama_index/tools/passio_nutrition_ai/__init__.py delete mode 100644 llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/llama_index/tools/passio_nutrition_ai/base.py delete mode 100644 llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/pyproject.toml delete mode 100644 llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/tests/BUILD delete mode 100644 llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/tests/__init__.py delete mode 100644 llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/tests/test_tools_nutrition_ai.py delete mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/.gitignore delete mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/BUILD delete mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/Makefile delete mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/README.md delete mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/llama_index/BUILD delete mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/llama_index/py.typed delete mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/llama_index/vector_stores/chatgpt_plugin/BUILD delete mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/llama_index/vector_stores/chatgpt_plugin/__init__.py delete mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/llama_index/vector_stores/chatgpt_plugin/base.py delete mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/pyproject.toml delete mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/tests/BUILD delete mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/tests/__init__.py delete mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/tests/test_vector_stores_chatgpt_plugin.py delete mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-metal/.gitignore delete mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-metal/BUILD delete mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-metal/Makefile delete mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-metal/README.md delete mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-metal/llama_index/BUILD delete mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-metal/llama_index/py.typed delete mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-metal/llama_index/vector_stores/metal/BUILD delete mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-metal/llama_index/vector_stores/metal/__init__.py delete mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-metal/llama_index/vector_stores/metal/base.py delete mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-metal/pyproject.toml delete mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-metal/tests/BUILD delete mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-metal/tests/__init__.py delete mode 100644 llama-index-integrations/vector_stores/llama-index-vector-stores-metal/tests/test_vector_stores_metal.py delete mode 100644 llama-index-packs/llama-index-packs-docugami-kg-rag/BUILD delete mode 100644 llama-index-packs/llama-index-packs-docugami-kg-rag/README.md delete mode 100644 llama-index-packs/llama-index-packs-docugami-kg-rag/examples/BUILD delete mode 100644 llama-index-packs/llama-index-packs-docugami-kg-rag/examples/example.py delete mode 100644 llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/BUILD delete mode 100644 llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/__init__.py delete mode 100644 llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/base.py delete mode 100644 llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/config/BUILD delete mode 100644 llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/config/__init__.py delete mode 100644 llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/BUILD delete mode 100644 llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/__init__.py delete mode 100644 llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/fused_summary_retriever.py delete mode 100644 llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/indexing.py delete mode 100644 llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/prompts.py delete mode 100644 llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/reports.py delete mode 100644 llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/retrieval.py delete mode 100644 llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/summaries.py delete mode 100644 llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/vector_store.py delete mode 100644 llama-index-packs/llama-index-packs-docugami-kg-rag/pyproject.toml delete mode 100644 llama-index-packs/llama-index-packs-finchat/.gitignore delete mode 100644 llama-index-packs/llama-index-packs-finchat/BUILD delete mode 100644 llama-index-packs/llama-index-packs-finchat/Makefile delete mode 100644 llama-index-packs/llama-index-packs-finchat/README.md delete mode 100644 llama-index-packs/llama-index-packs-finchat/examples/BUILD delete mode 100644 llama-index-packs/llama-index-packs-finchat/examples/example.py delete mode 100644 llama-index-packs/llama-index-packs-finchat/llama_index/packs/finchat/BUILD delete mode 100644 llama-index-packs/llama-index-packs-finchat/llama_index/packs/finchat/__init__.py delete mode 100644 llama-index-packs/llama-index-packs-finchat/llama_index/packs/finchat/base.py delete mode 100644 llama-index-packs/llama-index-packs-finchat/pyproject.toml delete mode 100644 llama-index-packs/llama-index-packs-redis-ingestion-pipeline/.gitignore delete mode 100644 llama-index-packs/llama-index-packs-redis-ingestion-pipeline/BUILD delete mode 100644 llama-index-packs/llama-index-packs-redis-ingestion-pipeline/CHANGELOG.md delete mode 100644 llama-index-packs/llama-index-packs-redis-ingestion-pipeline/Makefile delete mode 100644 llama-index-packs/llama-index-packs-redis-ingestion-pipeline/README.md delete mode 100644 llama-index-packs/llama-index-packs-redis-ingestion-pipeline/llama_index/packs/redis_ingestion_pipeline/BUILD delete mode 100644 llama-index-packs/llama-index-packs-redis-ingestion-pipeline/llama_index/packs/redis_ingestion_pipeline/__init__.py delete mode 100644 llama-index-packs/llama-index-packs-redis-ingestion-pipeline/llama_index/packs/redis_ingestion_pipeline/base.py delete mode 100644 llama-index-packs/llama-index-packs-redis-ingestion-pipeline/pyproject.toml delete mode 100644 llama-index-packs/llama-index-packs-redis-ingestion-pipeline/requirements.txt delete mode 100644 llama-index-packs/llama-index-packs-redis-ingestion-pipeline/tests/BUILD delete mode 100644 llama-index-packs/llama-index-packs-redis-ingestion-pipeline/tests/__init__.py delete mode 100644 llama-index-packs/llama-index-packs-redis-ingestion-pipeline/tests/test_packs_redis_ingestion_pipeline.py delete mode 100644 llama-index-packs/llama-index-packs-searchain/.gitignore delete mode 100644 llama-index-packs/llama-index-packs-searchain/BUILD delete mode 100644 llama-index-packs/llama-index-packs-searchain/Makefile delete mode 100644 llama-index-packs/llama-index-packs-searchain/README.md delete mode 100644 llama-index-packs/llama-index-packs-searchain/examples/searchain.ipynb delete mode 100644 llama-index-packs/llama-index-packs-searchain/llama_index/packs/searchain/BUILD delete mode 100644 llama-index-packs/llama-index-packs-searchain/llama_index/packs/searchain/__init__.py delete mode 100644 llama-index-packs/llama-index-packs-searchain/llama_index/packs/searchain/base.py delete mode 100644 llama-index-packs/llama-index-packs-searchain/pyproject.toml delete mode 100644 llama-index-packs/llama-index-packs-searchain/tests/BUILD delete mode 100644 llama-index-packs/llama-index-packs-searchain/tests/__init__.py delete mode 100644 llama-index-packs/llama-index-packs-searchain/tests/test_packs_searchain.py delete mode 100644 llama-index-packs/llama-index-packs-subdoc-summary/BUILD delete mode 100644 llama-index-packs/llama-index-packs-subdoc-summary/Makefile delete mode 100644 llama-index-packs/llama-index-packs-subdoc-summary/README.md delete mode 100644 llama-index-packs/llama-index-packs-subdoc-summary/examples/subdoc-summary.ipynb delete mode 100644 llama-index-packs/llama-index-packs-subdoc-summary/llama_index/packs/subdoc_summary/BUILD delete mode 100644 llama-index-packs/llama-index-packs-subdoc-summary/llama_index/packs/subdoc_summary/__init__.py delete mode 100644 llama-index-packs/llama-index-packs-subdoc-summary/llama_index/packs/subdoc_summary/base.py delete mode 100644 llama-index-packs/llama-index-packs-subdoc-summary/pyproject.toml delete mode 100644 llama-index-packs/llama-index-packs-subdoc-summary/tests/__init__.py delete mode 100644 llama-index-packs/llama-index-packs-vanna/.gitignore delete mode 100644 llama-index-packs/llama-index-packs-vanna/BUILD delete mode 100644 llama-index-packs/llama-index-packs-vanna/CHANGELOG.md delete mode 100644 llama-index-packs/llama-index-packs-vanna/Makefile delete mode 100644 llama-index-packs/llama-index-packs-vanna/README.md delete mode 100644 llama-index-packs/llama-index-packs-vanna/examples/vanna.ipynb delete mode 100644 llama-index-packs/llama-index-packs-vanna/llama_index/packs/vanna/BUILD delete mode 100644 llama-index-packs/llama-index-packs-vanna/llama_index/packs/vanna/__init__.py delete mode 100644 llama-index-packs/llama-index-packs-vanna/llama_index/packs/vanna/base.py delete mode 100644 llama-index-packs/llama-index-packs-vanna/pyproject.toml delete mode 100644 llama-index-packs/llama-index-packs-vanna/tests/BUILD delete mode 100644 llama-index-packs/llama-index-packs-vanna/tests/__init__.py delete mode 100644 llama-index-packs/llama-index-packs-vanna/tests/test_packs_vanna.py diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/.gitignore b/llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/.gitignore deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/BUILD b/llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/BUILD deleted file mode 100644 index 0896ca890d8bf..0000000000000 --- a/llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/BUILD +++ /dev/null @@ -1,3 +0,0 @@ -poetry_requirements( - name="poetry", -) diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/Makefile b/llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/Makefile deleted file mode 100644 index b9eab05aa3706..0000000000000 --- a/llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -GIT_ROOT ?= $(shell git rev-parse --show-toplevel) - -help: ## Show all Makefile targets. - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' - -format: ## Run code autoformatters (black). - pre-commit install - git ls-files | xargs pre-commit run black --files - -lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy - pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files - -test: ## Run tests via pytest. - pytest tests - -watch-docs: ## Build and watch documentation. - sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/ diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/README.md b/llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/README.md deleted file mode 100644 index f2b9567b73503..0000000000000 --- a/llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/README.md +++ /dev/null @@ -1 +0,0 @@ -# LlamaIndex Embeddings Integration: Huggingface Intel Extension for Transformers diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/llama_index/embeddings/huggingface_itrex/BUILD b/llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/llama_index/embeddings/huggingface_itrex/BUILD deleted file mode 100644 index db46e8d6c978c..0000000000000 --- a/llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/llama_index/embeddings/huggingface_itrex/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_sources() diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/llama_index/embeddings/huggingface_itrex/__init__.py b/llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/llama_index/embeddings/huggingface_itrex/__init__.py deleted file mode 100644 index 79d97e315bd86..0000000000000 --- a/llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/llama_index/embeddings/huggingface_itrex/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from llama_index.embeddings.huggingface_itrex.base import ItrexQuantizedBgeEmbedding - -__all__ = ["ItrexQuantizedBgeEmbedding"] diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/llama_index/embeddings/huggingface_itrex/base.py b/llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/llama_index/embeddings/huggingface_itrex/base.py deleted file mode 100644 index c739d9ff8a72d..0000000000000 --- a/llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/llama_index/embeddings/huggingface_itrex/base.py +++ /dev/null @@ -1,171 +0,0 @@ -import os -from typing import Any, List, Optional - -from llama_index.core.base.embeddings.base import ( - DEFAULT_EMBED_BATCH_SIZE, - BaseEmbedding, -) -from llama_index.core.bridge.pydantic import Field, PrivateAttr -from llama_index.core.callbacks import CallbackManager -from llama_index.embeddings.huggingface.utils import format_query, format_text -from transformers import AutoTokenizer, AutoConfig - - -class ItrexQuantizedBgeEmbedding(BaseEmbedding): - folder_name: str = Field(description="Folder name to load from.") - pooling: str = Field(description="Pooling strategy. One of ['cls', 'mean'].") - max_length: int = Field(description="Maximum length of input.") - normalize: str = Field(default=True, description="Normalize embeddings or not.") - query_instruction: Optional[str] = Field( - description="Instruction to prepend to query text." - ) - text_instruction: Optional[str] = Field( - description="Instruction to prepend to text." - ) - onnx_file_name: Optional[str] = Field( - description="File name of onnx optimized model which is exported by itrex." - ) - - _model: Any = PrivateAttr() - _tokenizer: Any = PrivateAttr() - _hidden_size: Any = PrivateAttr() - - def __init__( - self, - folder_name: str, - pooling: str = "cls", - max_length: Optional[int] = None, - normalize: bool = True, - query_instruction: Optional[str] = None, - text_instruction: Optional[str] = None, - tokenizer: Optional[Any] = None, - embed_batch_size: int = DEFAULT_EMBED_BATCH_SIZE, - callback_manager: Optional[CallbackManager] = None, - onnx_file_name: Optional[str] = "int8-model.onnx", - ): - try: - from intel_extension_for_transformers.transformers import AutoModel - except ImportError: - raise ImportError( - "Itrex requires the following dependencies; please install with " - "`pip install optimum[exporters] " - "optimum-intel neural-compressor intel_extension_for_transformers`" - ) - - from huggingface_hub import hf_hub_download - - onnx_model_path = os.path.join(folder_name, onnx_file_name) - if not os.path.exists(onnx_model_path): - onnx_model_path = hf_hub_download(folder_name, filename=onnx_file_name) - model = AutoModel.from_pretrained(onnx_model_path, use_embedding_runtime=True) - config = AutoConfig.from_pretrained(folder_name) - hidden_size = config.hidden_size - - tokenizer = tokenizer or AutoTokenizer.from_pretrained(folder_name) - - if max_length is None: - try: - max_length = int(config.max_position_embeddings) - except Exception: - raise ValueError( - "Unable to find max_length from model config. " - "Please provide max_length." - ) - try: - max_length = min(max_length, int(tokenizer.model_max_length)) - except Exception as exc: - print(f"An error occurred while retrieving tokenizer max length: {exc}") - - if pooling not in ["cls", "mean"]: - raise ValueError(f"Pooling {pooling} not supported.") - - super().__init__( - embed_batch_size=embed_batch_size, - callback_manager=callback_manager, - folder_name=folder_name, - max_length=max_length, - pooling=pooling, - normalize=normalize, - query_instruction=query_instruction, - text_instruction=text_instruction, - ) - self._model = model - self._tokenizer = tokenizer - self._hidden_size = hidden_size - - @classmethod - def class_name(cls) -> str: - return "ItrexQuantizedBgeEmbedding" - - def _mean_pooling(self, last_hidden_state: Any, attention_mask: Any) -> Any: - """Mean Pooling - Take attention mask into account for correct averaging.""" - import torch - - input_mask_expanded = ( - attention_mask.unsqueeze(-1).expand(last_hidden_state.size()).float() - ) - sum_embeddings = torch.sum(last_hidden_state * input_mask_expanded, 1) - sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9) - return sum_embeddings / sum_mask - - def _cls_pooling(self, last_hidden_state: list) -> Any: - """Use the CLS token as the pooling token.""" - return last_hidden_state[:, 0] - - def _embed(self, sentences: List[str]) -> List[List[float]]: - """Embed sentences.""" - encoded_input = self._tokenizer( - sentences, - padding=True, - max_length=self.max_length, - truncation=True, - return_tensors="pt", - ) - import torch - - engine_input = list(encoded_input.values()) - outputs = self._model.generate(engine_input) - if "last_hidden_state:0" in outputs: - last_hidden_state = outputs["last_hidden_state:0"] - else: - last_hidden_state = next(iter(outputs.values())) - last_hidden_state = torch.tensor(last_hidden_state).reshape( - encoded_input["input_ids"].shape[0], - encoded_input["input_ids"].shape[1], - self._hidden_size, - ) - if self.pooling == "mean": - emb = self._mean_pooling(last_hidden_state, encoded_input["attention_mask"]) - elif self.pooling == "cls": - emb = self._cls_pooling(last_hidden_state) - else: - raise ValueError("pooling method no supported") - - if self.normalize: - emb = torch.nn.functional.normalize(emb, p=2, dim=1) - return emb.tolist() - - def _get_query_embedding(self, query: str) -> List[float]: - """Get query embedding.""" - query = format_query(query, self.model_name, self.query_instruction) - return self._embed([query])[0] - - async def _aget_query_embedding(self, query: str) -> List[float]: - """Get query embedding async.""" - return self._get_query_embedding(query) - - async def _aget_text_embedding(self, text: str) -> List[float]: - """Get text embedding async.""" - return self._get_text_embedding(text) - - def _get_text_embedding(self, text: str) -> List[float]: - """Get text embedding.""" - text = format_text(text, self.model_name, self.text_instruction) - return self._embed([text])[0] - - def _get_text_embeddings(self, texts: List[str]) -> List[List[float]]: - """Get text embeddings.""" - texts = [ - format_text(text, self.model_name, self.text_instruction) for text in texts - ] - return self._embed(texts) diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/pyproject.toml b/llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/pyproject.toml deleted file mode 100644 index 1763a78e731be..0000000000000 --- a/llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex/pyproject.toml +++ /dev/null @@ -1,68 +0,0 @@ -[build-system] -build-backend = "poetry.core.masonry.api" -requires = ["poetry-core"] - -[tool.codespell] -check-filenames = true -check-hidden = true -skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" - -[tool.llamahub] -contains_example = false -import_path = "llama_index.embeddings.huggingface_itrex" - -[tool.llamahub.class_authors] -QuantizedBgeEmbedding = "llama-index" - -[tool.mypy] -disallow_untyped_defs = true -exclude = ["_static", "build", "examples", "notebooks", "venv"] -ignore_missing_imports = true -python_version = "3.9" - -[tool.poetry] -authors = ["Your Name "] -description = "llama-index embeddings Intel Extension for Transformers integration" -exclude = ["**/BUILD"] -license = "MIT" -name = "llama-index-embeddings-itrex" -readme = "README.md" -version = "0.4.0" - -[tool.poetry.dependencies] -python = ">=3.9,<4.0" -llama-index-core = "^0.12.0" -# intel-extension-for-transformers = "^1.3.2" # PEP 517 build error install with pip instead -torch = "^2.2.2" -accelerate = "^0.28.0" -datasets = "^2.18.0" -onnx = "^1.15.0" -llama-index-embeddings-huggingface = "^0.4.0" - -[tool.poetry.group.dev.dependencies] -ipython = "8.10.0" -jupyter = "^1.0.0" -mypy = "0.991" -pre-commit = "3.2.0" -pylint = "2.15.10" -pytest = "7.2.1" -pytest-mock = "3.11.1" -ruff = "0.0.292" -tree-sitter-languages = "^1.8.0" -types-Deprecated = ">=0.1.0" -types-PyYAML = "^6.0.12.12" -types-protobuf = "^4.24.0.4" -types-redis = "4.5.5.0" -types-requests = "2.28.11.8" -types-setuptools = "67.1.0.0" - -[tool.poetry.group.dev.dependencies.black] -extras = ["jupyter"] -version = "<=23.9.1,>=23.7.0" - -[tool.poetry.group.dev.dependencies.codespell] -extras = ["toml"] -version = ">=v2.2.6" - -[[tool.poetry.packages]] -include = "llama_index/" diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-octoai/.gitignore b/llama-index-integrations/embeddings/llama-index-embeddings-octoai/.gitignore deleted file mode 100644 index cf24f70c67a49..0000000000000 --- a/llama-index-integrations/embeddings/llama-index-embeddings-octoai/.gitignore +++ /dev/null @@ -1,155 +0,0 @@ -poetry.lock - -llama_index/_static -.DS_Store -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -bin/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -etc/ -include/ -lib/ -lib64/ -parts/ -sdist/ -share/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -.ruff_cache - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints -notebooks/ - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ -pyvenv.cfg - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# Jetbrains -.idea -modules/ -*.swp - -# VsCode -.vscode - -# pipenv -Pipfile -Pipfile.lock - -# pyright -pyrightconfig.json diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-octoai/BUILD b/llama-index-integrations/embeddings/llama-index-embeddings-octoai/BUILD deleted file mode 100644 index 0896ca890d8bf..0000000000000 --- a/llama-index-integrations/embeddings/llama-index-embeddings-octoai/BUILD +++ /dev/null @@ -1,3 +0,0 @@ -poetry_requirements( - name="poetry", -) diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-octoai/Makefile b/llama-index-integrations/embeddings/llama-index-embeddings-octoai/Makefile deleted file mode 100644 index b9eab05aa3706..0000000000000 --- a/llama-index-integrations/embeddings/llama-index-embeddings-octoai/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -GIT_ROOT ?= $(shell git rev-parse --show-toplevel) - -help: ## Show all Makefile targets. - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' - -format: ## Run code autoformatters (black). - pre-commit install - git ls-files | xargs pre-commit run black --files - -lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy - pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files - -test: ## Run tests via pytest. - pytest tests - -watch-docs: ## Build and watch documentation. - sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/ diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-octoai/README.md b/llama-index-integrations/embeddings/llama-index-embeddings-octoai/README.md deleted file mode 100644 index cf98b3c8c5312..0000000000000 --- a/llama-index-integrations/embeddings/llama-index-embeddings-octoai/README.md +++ /dev/null @@ -1,36 +0,0 @@ -# LlamaIndex Embeddings Integration: Octoai - -Using the [OctoAI](https://octo.ai) Embeddings Integration is a simple as: - -```python -from llama_index.embeddings.octoai import OctoAIEmbedding -from os import environ - -OCTOAI_API_KEY = environ["OCTOAI_API_KEY"] -embed_model = OctoAIEmbedding(api_key=OCTOAI_API_KEY) -embeddings = embed_model.get_text_embedding("How do I sail to the moon?") -assert len(embeddings) == 1024 -``` - -One can also request a batch of embeddings via: - -```python -texts = [ - "How do I sail to the moon?", - "What is the best way to cook a steak?", - "How do I apply for a job?", -] - -embeddings = embed_model.get_text_embedding_batch(texts) -assert len(embeddings) == 3 -``` - -## API Access - -[Here](https://octo.ai/docs/getting-started/how-to-create-an-octoai-access-token) are some instructions on how to get your OctoAI API key. - -## Contributing - -Follow the good practices of all poetry based projects. - -When in VScode, one may want to manually select the Python interpreter, specially to run the example iPython notebook. For this use `ctrl+shift+p`, then type or select: `Python: Select Interpreter` diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-octoai/llama_index/embeddings/octoai/BUILD b/llama-index-integrations/embeddings/llama-index-embeddings-octoai/llama_index/embeddings/octoai/BUILD deleted file mode 100644 index db46e8d6c978c..0000000000000 --- a/llama-index-integrations/embeddings/llama-index-embeddings-octoai/llama_index/embeddings/octoai/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_sources() diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-octoai/llama_index/embeddings/octoai/__init__.py b/llama-index-integrations/embeddings/llama-index-embeddings-octoai/llama_index/embeddings/octoai/__init__.py deleted file mode 100644 index 3a7b732943c52..0000000000000 --- a/llama-index-integrations/embeddings/llama-index-embeddings-octoai/llama_index/embeddings/octoai/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from llama_index.embeddings.octoai.base import OctoAIEmbedding - -__all__ = ["OctoAIEmbedding"] diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-octoai/llama_index/embeddings/octoai/base.py b/llama-index-integrations/embeddings/llama-index-embeddings-octoai/llama_index/embeddings/octoai/base.py deleted file mode 100644 index 30ed557476fc1..0000000000000 --- a/llama-index-integrations/embeddings/llama-index-embeddings-octoai/llama_index/embeddings/octoai/base.py +++ /dev/null @@ -1,74 +0,0 @@ -from typing import Any, Dict, Optional - -import httpx -from llama_index.core.bridge.pydantic import Field -from llama_index.core.callbacks import CallbackManager -from llama_index.embeddings.octoai.utils import ( - resolve_octoai_credentials, - DEFAULT_OCTOAI_API_BASE, - DEFAULT_OCTOAI_EMBED_BATCH_SIZE, - DEFAULT_OCTOAI_EMBED_MODEL, -) -from llama_index.embeddings.openai import OpenAIEmbedding - - -class OctoAIEmbedding(OpenAIEmbedding): - """ - OctoAI class for embeddings. - - Args: - model (str): Model for embedding. - Defaults to "thenlper/gte-large" - """ - - additional_kwargs: Dict[str, Any] = Field( - default_factory=dict, description="Additional kwargs for the OctoAI API." - ) - - api_key: str = Field(description="The OctoAI API key.") - api_base: str = Field(description="The base URL for OctoAI's API.") - api_version: str = Field(description="The version for the API.") - - def __init__( - self, - model_name: str = DEFAULT_OCTOAI_EMBED_MODEL, - dimensions: Optional[int] = None, - embed_batch_size: int = DEFAULT_OCTOAI_EMBED_BATCH_SIZE, - additional_kwargs: Optional[Dict[str, Any]] = None, - api_key: Optional[str] = None, - api_base: Optional[str] = DEFAULT_OCTOAI_API_BASE, - api_version: Optional[str] = None, - max_retries: int = 10, - timeout: float = 60.0, - reuse_client: bool = True, - callback_manager: Optional[CallbackManager] = None, - default_headers: Optional[Dict[str, str]] = None, - http_client: Optional[httpx.Client] = None, - **kwargs: Any, - ) -> None: - api_key, api_base, api_version = resolve_octoai_credentials( - api_key=api_key, - api_base=api_base, - api_version=api_version, - ) - - super().__init__( - model_name=model_name, - dimensions=dimensions, - embed_batch_size=embed_batch_size, - additional_kwargs=additional_kwargs, - api_key=api_key, - api_base=api_base, - api_version=api_version, - max_retries=max_retries, - timeout=timeout, - reuse_client=reuse_client, - callback_manager=callback_manager, - default_headers=default_headers, - http_client=http_client, - **kwargs, - ) - - @classmethod - def class_name(cls) -> str: - return "OctoAIEmbedding" diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-octoai/llama_index/embeddings/octoai/utils.py b/llama-index-integrations/embeddings/llama-index-embeddings-octoai/llama_index/embeddings/octoai/utils.py deleted file mode 100644 index 9de2d5ce3055a..0000000000000 --- a/llama-index-integrations/embeddings/llama-index-embeddings-octoai/llama_index/embeddings/octoai/utils.py +++ /dev/null @@ -1,33 +0,0 @@ -from typing import Optional, Tuple - -from llama_index.core.base.llms.generic_utils import get_from_param_or_env - -DEFAULT_OCTOAI_API_BASE = "https://text.octoai.run/v1" -DEFAULT_OCTOAI_API_VERSION = "" -DEFAULT_OCTOAI_EMBED_MODEL = "thenlper/gte-large" -DEFAULT_OCTOAI_EMBED_BATCH_SIZE = 2048 - - -def resolve_octoai_credentials( - api_key: Optional[str] = None, - api_base: Optional[str] = None, - api_version: Optional[str] = None, -) -> Tuple[Optional[str], str, str]: - """ - "Resolve OctoAI credentials. - - The order of precedence is: - 1. param - 2. env - 4. octoai default - """ - # resolve from param or env - api_key = get_from_param_or_env("api_key", api_key, "OCTOAI_API_KEY", "") - api_base = get_from_param_or_env( - "api_base", api_base, "OCTOAI_API_BASE", DEFAULT_OCTOAI_API_BASE - ) - api_version = get_from_param_or_env( - "api_version", api_version, "OCTOAI_API_VERSION", DEFAULT_OCTOAI_API_VERSION - ) - - return api_key, str(api_base), api_version diff --git a/llama-index-integrations/embeddings/llama-index-embeddings-octoai/pyproject.toml b/llama-index-integrations/embeddings/llama-index-embeddings-octoai/pyproject.toml deleted file mode 100644 index e34ac83b7d8a1..0000000000000 --- a/llama-index-integrations/embeddings/llama-index-embeddings-octoai/pyproject.toml +++ /dev/null @@ -1,57 +0,0 @@ -[build-system] -build-backend = "poetry.core.masonry.api" -requires = ["poetry-core"] - -[tool.codespell] -check-filenames = true -check-hidden = true -# Feel free to un-skip examples, and experimental, you will just need to -# work through many typos (--write-changes and --interactive will help) -skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" - -[tool.llamahub] -contains_example = false -import_path = "llama_index.embeddings.octoai" - -[tool.llamahub.class_authors] -OctoAIEmbeddings = "ptorru" - -[tool.mypy] -disallow_untyped_defs = true -# Remove venv skip when integrated with pre-commit -exclude = ["_static", "build", "examples", "notebooks", "venv"] -ignore_missing_imports = true -python_version = "3.8" - -[tool.poetry] -authors = ["Pedro Torruella <5025399+ptorru@users.noreply.github.com>"] -description = "llama-index embeddings octoai integration" -license = "MIT" -name = "llama-index-embeddings-octoai" -packages = [{include = "llama_index/"}] -readme = "README.md" -version = "0.3.0" - -[tool.poetry.dependencies] -python = ">=3.9,<4.0" -llama-index-embeddings-openai = "^0.3.0" -llama-index-core = "^0.12.0" - -[tool.poetry.group.dev.dependencies] -black = {extras = ["jupyter"], version = "<=23.9.1,>=23.7.0"} -codespell = {extras = ["toml"], version = ">=v2.2.6"} -ipython = "8.10.0" -jupyter = "^1.0.0" -mypy = "0.991" -pre-commit = "3.2.0" -pylint = "2.15.10" -pytest = "7.2.1" -pytest-mock = "3.11.1" -ruff = "0.0.292" -tree-sitter-languages = "^1.8.0" -types-Deprecated = ">=0.1.0" -types-PyYAML = "^6.0.12.12" -types-protobuf = "^4.24.0.4" -types-redis = "4.5.5.0" -types-requests = "2.28.11.8" # TODO: unpin when mypy>0.991 -types-setuptools = "67.1.0.0" diff --git a/llama-index-integrations/llms/llama-index-llms-solar/.gitignore b/llama-index-integrations/llms/llama-index-llms-solar/.gitignore deleted file mode 100644 index 990c18de22908..0000000000000 --- a/llama-index-integrations/llms/llama-index-llms-solar/.gitignore +++ /dev/null @@ -1,153 +0,0 @@ -llama_index/_static -.DS_Store -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -bin/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -etc/ -include/ -lib/ -lib64/ -parts/ -sdist/ -share/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -.ruff_cache - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints -notebooks/ - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ -pyvenv.cfg - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# Jetbrains -.idea -modules/ -*.swp - -# VsCode -.vscode - -# pipenv -Pipfile -Pipfile.lock - -# pyright -pyrightconfig.json diff --git a/llama-index-integrations/llms/llama-index-llms-solar/BUILD b/llama-index-integrations/llms/llama-index-llms-solar/BUILD deleted file mode 100644 index 0896ca890d8bf..0000000000000 --- a/llama-index-integrations/llms/llama-index-llms-solar/BUILD +++ /dev/null @@ -1,3 +0,0 @@ -poetry_requirements( - name="poetry", -) diff --git a/llama-index-integrations/llms/llama-index-llms-solar/Makefile b/llama-index-integrations/llms/llama-index-llms-solar/Makefile deleted file mode 100644 index b9eab05aa3706..0000000000000 --- a/llama-index-integrations/llms/llama-index-llms-solar/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -GIT_ROOT ?= $(shell git rev-parse --show-toplevel) - -help: ## Show all Makefile targets. - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' - -format: ## Run code autoformatters (black). - pre-commit install - git ls-files | xargs pre-commit run black --files - -lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy - pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files - -test: ## Run tests via pytest. - pytest tests - -watch-docs: ## Build and watch documentation. - sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/ diff --git a/llama-index-integrations/llms/llama-index-llms-solar/README.md b/llama-index-integrations/llms/llama-index-llms-solar/README.md deleted file mode 100644 index 74c256c1f65ec..0000000000000 --- a/llama-index-integrations/llms/llama-index-llms-solar/README.md +++ /dev/null @@ -1 +0,0 @@ -# LlamaIndex Llms Integration: Solar diff --git a/llama-index-integrations/llms/llama-index-llms-solar/llama_index/llms/solar/BUILD b/llama-index-integrations/llms/llama-index-llms-solar/llama_index/llms/solar/BUILD deleted file mode 100644 index db46e8d6c978c..0000000000000 --- a/llama-index-integrations/llms/llama-index-llms-solar/llama_index/llms/solar/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_sources() diff --git a/llama-index-integrations/llms/llama-index-llms-solar/llama_index/llms/solar/__init__.py b/llama-index-integrations/llms/llama-index-llms-solar/llama_index/llms/solar/__init__.py deleted file mode 100644 index b5a36ee495db3..0000000000000 --- a/llama-index-integrations/llms/llama-index-llms-solar/llama_index/llms/solar/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from llama_index.llms.solar.base import Solar - -__all__ = ["Solar"] diff --git a/llama-index-integrations/llms/llama-index-llms-unify/.gitignore b/llama-index-integrations/llms/llama-index-llms-unify/.gitignore deleted file mode 100644 index 990c18de22908..0000000000000 --- a/llama-index-integrations/llms/llama-index-llms-unify/.gitignore +++ /dev/null @@ -1,153 +0,0 @@ -llama_index/_static -.DS_Store -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -bin/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -etc/ -include/ -lib/ -lib64/ -parts/ -sdist/ -share/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -.ruff_cache - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints -notebooks/ - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ -pyvenv.cfg - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# Jetbrains -.idea -modules/ -*.swp - -# VsCode -.vscode - -# pipenv -Pipfile -Pipfile.lock - -# pyright -pyrightconfig.json diff --git a/llama-index-integrations/llms/llama-index-llms-unify/BUILD b/llama-index-integrations/llms/llama-index-llms-unify/BUILD deleted file mode 100644 index 0896ca890d8bf..0000000000000 --- a/llama-index-integrations/llms/llama-index-llms-unify/BUILD +++ /dev/null @@ -1,3 +0,0 @@ -poetry_requirements( - name="poetry", -) diff --git a/llama-index-integrations/llms/llama-index-llms-unify/Makefile b/llama-index-integrations/llms/llama-index-llms-unify/Makefile deleted file mode 100644 index b9eab05aa3706..0000000000000 --- a/llama-index-integrations/llms/llama-index-llms-unify/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -GIT_ROOT ?= $(shell git rev-parse --show-toplevel) - -help: ## Show all Makefile targets. - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' - -format: ## Run code autoformatters (black). - pre-commit install - git ls-files | xargs pre-commit run black --files - -lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy - pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files - -test: ## Run tests via pytest. - pytest tests - -watch-docs: ## Build and watch documentation. - sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/ diff --git a/llama-index-integrations/llms/llama-index-llms-unify/README.md b/llama-index-integrations/llms/llama-index-llms-unify/README.md deleted file mode 100644 index 2c4d7fbf816cd..0000000000000 --- a/llama-index-integrations/llms/llama-index-llms-unify/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# LlamaIndex Llms Integration: Unify - -[Unify](https://unify.ai/) dynamically routes each query to the best LLM, with support for providers such as OpenAI, MistralAI, Perplexity AI, and Together AI. You can also access all providers individually using a single API key. - -Check out our [live benchmarks](https://unify.ai/benchmarks/mixtral-8x7b-instruct-v0.1) to see where the data is coming from! diff --git a/llama-index-integrations/llms/llama-index-llms-unify/llama_index/llms/unify/BUILD b/llama-index-integrations/llms/llama-index-llms-unify/llama_index/llms/unify/BUILD deleted file mode 100644 index db46e8d6c978c..0000000000000 --- a/llama-index-integrations/llms/llama-index-llms-unify/llama_index/llms/unify/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_sources() diff --git a/llama-index-integrations/llms/llama-index-llms-unify/llama_index/llms/unify/__init__.py b/llama-index-integrations/llms/llama-index-llms-unify/llama_index/llms/unify/__init__.py deleted file mode 100644 index a7d81e38cdb2b..0000000000000 --- a/llama-index-integrations/llms/llama-index-llms-unify/llama_index/llms/unify/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from llama_index.llms.unify.base import Unify - - -__all__ = ["Unify"] diff --git a/llama-index-integrations/llms/llama-index-llms-unify/llama_index/llms/unify/base.py b/llama-index-integrations/llms/llama-index-llms-unify/llama_index/llms/unify/base.py deleted file mode 100644 index b4f0c4a074f71..0000000000000 --- a/llama-index-integrations/llms/llama-index-llms-unify/llama_index/llms/unify/base.py +++ /dev/null @@ -1,28 +0,0 @@ -import os -from typing import Any, Optional - -from llama_index.llms.openai_like import OpenAILike - - -class Unify(OpenAILike): - def __init__( - self, - model: str, - api_key: Optional[str] = None, - api_base: str = "https://api.unify.ai/v0", - is_chat_model: bool = True, - **kwargs: Any, - ) -> None: - api_key = api_key or os.environ.get("UNIFY_API_KEY", None) - super().__init__( - model=model, - api_key=api_key, - api_base=api_base, - is_chat_model=is_chat_model, - **kwargs, - ) - - @classmethod - def class_name(cls) -> str: - """Get class name.""" - return "UnifyLLM" diff --git a/llama-index-integrations/llms/llama-index-llms-unify/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-unify/pyproject.toml deleted file mode 100644 index fa7d4ad80683f..0000000000000 --- a/llama-index-integrations/llms/llama-index-llms-unify/pyproject.toml +++ /dev/null @@ -1,57 +0,0 @@ -[build-system] -build-backend = "poetry.core.masonry.api" -requires = ["poetry-core"] - -[tool.codespell] -check-filenames = true -check-hidden = true -# Feel free to un-skip examples, and experimental, you will just need to -# work through many typos (--write-changes and --interactive will help) -skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" - -[tool.llamahub] -contains_example = false -import_path = "llama_index.llms.unify" - -[tool.llamahub.class_authors] -Unify = "llama-index" - -[tool.mypy] -disallow_untyped_defs = true -# Remove venv skip when integrated with pre-commit -exclude = ["_static", "build", "examples", "notebooks", "venv"] -ignore_missing_imports = true -python_version = "3.8" - -[tool.poetry] -authors = ["Your Name "] -description = "llama-index llms unify integration" -license = "MIT" -name = "llama-index-llms-unify" -packages = [{include = "llama_index/"}] -readme = "README.md" -version = "0.3.0" - -[tool.poetry.dependencies] -python = ">=3.9,<4.0" -llama-index-llms-openai-like = "^0.3.0" -llama-index-core = "^0.12.0" - -[tool.poetry.group.dev.dependencies] -black = {extras = ["jupyter"], version = "<=23.9.1,>=23.7.0"} -codespell = {extras = ["toml"], version = ">=v2.2.6"} -ipython = "8.10.0" -jupyter = "^1.0.0" -mypy = "0.991" -pre-commit = "3.2.0" -pylint = "2.15.10" -pytest = "7.2.1" -pytest-mock = "3.11.1" -ruff = "0.0.292" -tree-sitter-languages = "^1.8.0" -types-Deprecated = ">=0.1.0" -types-PyYAML = "^6.0.12.12" -types-protobuf = "^4.24.0.4" -types-redis = "4.5.5.0" -types-requests = "2.28.11.8" # TODO: unpin when mypy>0.991 -types-setuptools = "67.1.0.0" diff --git a/llama-index-integrations/llms/llama-index-llms-unify/tests/BUILD b/llama-index-integrations/llms/llama-index-llms-unify/tests/BUILD deleted file mode 100644 index dabf212d7e716..0000000000000 --- a/llama-index-integrations/llms/llama-index-llms-unify/tests/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_tests() diff --git a/llama-index-integrations/llms/llama-index-llms-unify/tests/__init__.py b/llama-index-integrations/llms/llama-index-llms-unify/tests/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/llama-index-integrations/llms/llama-index-llms-unify/tests/test_llms_unify.py b/llama-index-integrations/llms/llama-index-llms-unify/tests/test_llms_unify.py deleted file mode 100644 index 1fa1737157a53..0000000000000 --- a/llama-index-integrations/llms/llama-index-llms-unify/tests/test_llms_unify.py +++ /dev/null @@ -1,7 +0,0 @@ -from llama_index.core.base.llms.base import BaseLLM -from llama_index.llms.unify import Unify - - -def test_text_inference_embedding_class(): - names_of_base_classes = [b.__name__ for b in Unify.__mro__] - assert BaseLLM.__name__ in names_of_base_classes diff --git a/llama-index-integrations/readers/llama-index-readers-azure-devops/.gitignore b/llama-index-integrations/readers/llama-index-readers-azure-devops/.gitignore deleted file mode 100644 index 990c18de22908..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-azure-devops/.gitignore +++ /dev/null @@ -1,153 +0,0 @@ -llama_index/_static -.DS_Store -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -bin/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -etc/ -include/ -lib/ -lib64/ -parts/ -sdist/ -share/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -.ruff_cache - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints -notebooks/ - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ -pyvenv.cfg - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# Jetbrains -.idea -modules/ -*.swp - -# VsCode -.vscode - -# pipenv -Pipfile -Pipfile.lock - -# pyright -pyrightconfig.json diff --git a/llama-index-integrations/readers/llama-index-readers-azure-devops/BUILD b/llama-index-integrations/readers/llama-index-readers-azure-devops/BUILD deleted file mode 100644 index 0896ca890d8bf..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-azure-devops/BUILD +++ /dev/null @@ -1,3 +0,0 @@ -poetry_requirements( - name="poetry", -) diff --git a/llama-index-integrations/readers/llama-index-readers-azure-devops/Makefile b/llama-index-integrations/readers/llama-index-readers-azure-devops/Makefile deleted file mode 100644 index b9eab05aa3706..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-azure-devops/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -GIT_ROOT ?= $(shell git rev-parse --show-toplevel) - -help: ## Show all Makefile targets. - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' - -format: ## Run code autoformatters (black). - pre-commit install - git ls-files | xargs pre-commit run black --files - -lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy - pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files - -test: ## Run tests via pytest. - pytest tests - -watch-docs: ## Build and watch documentation. - sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/ diff --git a/llama-index-integrations/readers/llama-index-readers-azure-devops/README.md b/llama-index-integrations/readers/llama-index-readers-azure-devops/README.md deleted file mode 100644 index e1c32da497439..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-azure-devops/README.md +++ /dev/null @@ -1,30 +0,0 @@ -# LlamaIndex Readers Integration: Azure Devops - -`pip install llama-index-readers-azure-devops` - -The Azure Devops readers package enables you to read files from your azure devops repositories - -The reader will require a personal access token (which you can generate under your account settings). - -## Usage - -This reader will read through a repo, with options to specifically filter directories and file extensions. - -Here is an example of how to use it - -```python -from llama_index.readers.azure_devops import AzureDevopsReader - -az_devops_loader = AzureDevopsLoader( - access_token="", - organization_name="", - project_name="", - repo="", - file_filter=lambda file_path: file_path.endswith(".py"), -) # Optional: you can provide any callable that returns a boolean to filter files of your choice - -documents = az_devops_loader.load_data( - folder="", # The folder to load documents from, defaults to root. - branch="", -) # The branch to load documents from, defaults to head of the repo -``` diff --git a/llama-index-integrations/readers/llama-index-readers-azure-devops/llama_index/readers/azure_devops/BUILD b/llama-index-integrations/readers/llama-index-readers-azure-devops/llama_index/readers/azure_devops/BUILD deleted file mode 100644 index db46e8d6c978c..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-azure-devops/llama_index/readers/azure_devops/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_sources() diff --git a/llama-index-integrations/readers/llama-index-readers-azure-devops/llama_index/readers/azure_devops/__init__.py b/llama-index-integrations/readers/llama-index-readers-azure-devops/llama_index/readers/azure_devops/__init__.py deleted file mode 100644 index 848b15aada33d..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-azure-devops/llama_index/readers/azure_devops/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from llama_index.readers.azure_devops.base import AzureDevopsReader - - -__all__ = ["AzureDevopsReader"] diff --git a/llama-index-integrations/readers/llama-index-readers-azure-devops/llama_index/readers/azure_devops/base.py b/llama-index-integrations/readers/llama-index-readers-azure-devops/llama_index/readers/azure_devops/base.py deleted file mode 100644 index 40b97cf57ea11..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-azure-devops/llama_index/readers/azure_devops/base.py +++ /dev/null @@ -1,182 +0,0 @@ -from typing import Dict, List, Optional, Callable -from llama_index.core.readers.base import BaseReader -from llama_index.core.schema import Document - - -class AzureDevopsReader(BaseReader): - """ - A loader class for Azure DevOps repositories. This class provides methods to authenticate with Azure DevOps, - access repositories, and retrieve file content. - - Attributes: - access_token (str): The personal access token for Azure DevOps. - organization_name (str): The name of the organization in Azure DevOps. - project_name (str): The name of the project in Azure DevOps. - repo (str): The name of the repository in the project. - organization_url (str): The URL to the organization in Azure DevOps. - git_client: The Git client for interacting with Azure DevOps. - repository_id: The ID of the repository in Azure DevOps. - """ - - def __init__( - self, - access_token: str, - organization_name: str, - project_name: str, - repo: str, - file_filter: Optional[Callable[[str], bool]] = False, - ): - """ - Initializes the AzureDevopsLoader with the necessary details to interact with an Azure DevOps repository. - - Parameters: - access_token (str): The personal access token for Azure DevOps. - organization_name (str): The name of the organization in Azure DevOps. - project_name (str): The name of the project in Azure DevOps. - repo (str): The name of the repository in the project. - file_filter(callable): A function that can be used as file filter ex: `lambda file_path: file_path.endswith(".py")` - """ - self.access_token = access_token - self.project_name = project_name - self.repo = repo - self.organization_url = f"https://dev.azure.com/{organization_name}/" - self.file_filter = file_filter - - self.git_client = self.create_git_client() - self.repository_id = self._get_repository_id(repo_name=self.repo) - - def create_git_client(self): - """ - Creates and returns a Git client for interacting with Azure DevOps. - - Returns: - The Git client object for Azure DevOps. - """ - try: - from azure.devops.connection import Connection - from msrest.authentication import BasicAuthentication - except ImportError: - raise ImportError( - "Please install azure-devops to use the AzureDevopsLoader. " - "You can do so by running `pip install azure-devops`." - ) - credentials = BasicAuthentication("", self.access_token) - connection = Connection(base_url=self.organization_url, creds=credentials) - return connection.clients.get_git_client() - - def _get_repository_id(self, repo_name: str): - """ - Retrieves the repository ID for a given repository name. - - Parameters: - repo_name (str): The name of the repository. - - Returns: - The ID of the repository. - """ - repositories = self.git_client.get_repositories(project=self.project_name) - return next((repo.id for repo in repositories if repo.name == repo_name), None) - - def _create_version_descriptor(self, branch: Optional[str]): - """ - Creates a version descriptor for a given branch. - - Parameters: - branch (Optional[str]): The name of the branch to create a version descriptor for. - - Returns: - A version descriptor if a branch is specified, otherwise None. - """ - if branch: - from azure.devops.v7_0.git.models import GitVersionDescriptor - - version_descriptor = GitVersionDescriptor( - version=branch, version_type="branch" - ) - else: - version_descriptor = None - return version_descriptor - - def get_file_paths(self, folder: str = "/", version_descriptor=None) -> List[Dict]: - """ - Retrieves the paths of all files within a given folder in the repository. - - Parameters: - folder (str): The folder to retrieve file paths from, defaults to root. - version_descriptor (Optional): The version descriptor to specify a version or branch. - - Returns: - A list of paths of the files. - """ - items = self.git_client.get_items( - repository_id=self.repository_id, - project=self.project_name, - scope_path=folder, - recursion_level="Full", - version_descriptor=version_descriptor, - ) - return [ - {"path": item.path, "url": item.url} - for item in items - if not (self.file_filter and not self.file_filter(item.path)) - and (item.git_object_type == "blob") - ] - - def get_file_content_by_path(self, path: str, version_descriptor=None): - """ - Retrieves the content of a file by its path in the repository. - - Parameters: - path (str): The path of the file in the repository. - version_descriptor (Optional): The version descriptor to specify a version or branch. - - Returns: - The content of the file as a string. - """ - try: - stream = self.git_client.get_item_text( - repository_id=self.repository_id, - path=path, - project=self.project_name, - download=True, - version_descriptor=version_descriptor, - ) - file_content = "" - # Iterate over the generator object - for chunk in stream: - # Assuming the content is encoded in UTF-8, decode each chunk and append to the file_content string - file_content += chunk.decode("utf-8") - return file_content - except Exception as e: - print(f"failed loading {path}") - return None - - def load_data(self, folder: Optional[str] = "/", branch: Optional[str] = None): - """ - Loads the documents from a specified folder and branch in the repository. - - Parameters: - folder (Optional[str]): The folder to load documents from, defaults to root. - branch (Optional[str]): The branch to load documents from. - - Returns: - A list of Document objects representing the loaded documents. - """ - documents = [] - version_descriptor = self._create_version_descriptor(branch=branch) - files = self.get_file_paths( - folder=folder, version_descriptor=version_descriptor - ) - for file in files: - path = file["path"] - content = self.get_file_content_by_path( - path=path, version_descriptor=version_descriptor - ) - if content: - metadata = { - "path": path, - "extension": path.split(".")[-1], - "source": file["url"], - } - documents.append(Document(text=content, extra_info=metadata)) - return documents diff --git a/llama-index-integrations/readers/llama-index-readers-azure-devops/pyproject.toml b/llama-index-integrations/readers/llama-index-readers-azure-devops/pyproject.toml deleted file mode 100644 index 401ddc138a5af..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-azure-devops/pyproject.toml +++ /dev/null @@ -1,57 +0,0 @@ -[build-system] -build-backend = "poetry.core.masonry.api" -requires = ["poetry-core"] - -[tool.codespell] -check-filenames = true -check-hidden = true -# Feel free to un-skip examples, and experimental, you will just need to -# work through many typos (--write-changes and --interactive will help) -skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" - -[tool.llamahub] -contains_example = false -import_path = "llama_index.readers.azure_devops" - -[tool.llamahub.class_authors] -AzureDevopsReader = "saurabhgssingh" - -[tool.mypy] -disallow_untyped_defs = true -# Remove venv skip when integrated with pre-commit -exclude = ["_static", "build", "examples", "notebooks", "venv"] -ignore_missing_imports = true -python_version = "3.8" - -[tool.poetry] -authors = ["Your Name "] -description = "llama-index readers azure devops integration" -license = "MIT" -name = "llama-index-readers-azure-devops" -packages = [{include = "llama_index/"}] -readme = "README.md" -version = "0.3.0" - -[tool.poetry.dependencies] -python = ">=3.9,<4.0" -azure-devops = "7.1.0b4" -llama-index-core = "^0.12.0" - -[tool.poetry.group.dev.dependencies] -black = {extras = ["jupyter"], version = "<=23.9.1,>=23.7.0"} -codespell = {extras = ["toml"], version = ">=v2.2.6"} -ipython = "8.10.0" -jupyter = "^1.0.0" -mypy = "0.991" -pre-commit = "3.2.0" -pylint = "2.15.10" -pytest = "7.2.1" -pytest-mock = "3.11.1" -ruff = "0.0.292" -tree-sitter-languages = "^1.8.0" -types-Deprecated = ">=0.1.0" -types-PyYAML = "^6.0.12.12" -types-protobuf = "^4.24.0.4" -types-redis = "4.5.5.0" -types-requests = "2.28.11.8" # TODO: unpin when mypy>0.991 -types-setuptools = "67.1.0.0" diff --git a/llama-index-integrations/readers/llama-index-readers-azure-devops/tests/BUILD b/llama-index-integrations/readers/llama-index-readers-azure-devops/tests/BUILD deleted file mode 100644 index dabf212d7e716..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-azure-devops/tests/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_tests() diff --git a/llama-index-integrations/readers/llama-index-readers-azure-devops/tests/__init__.py b/llama-index-integrations/readers/llama-index-readers-azure-devops/tests/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/llama-index-integrations/readers/llama-index-readers-azure-devops/tests/test_readers_azure_devops.py b/llama-index-integrations/readers/llama-index-readers-azure-devops/tests/test_readers_azure_devops.py deleted file mode 100644 index 7dac65f4740b3..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-azure-devops/tests/test_readers_azure_devops.py +++ /dev/null @@ -1,7 +0,0 @@ -from llama_index.core.readers.base import BaseReader -from llama_index.readers.azure_devops import AzureDevopsReader - - -def test_class(): - names_of_base_classes = [b.__name__ for b in AzureDevopsReader.__mro__] - assert BaseReader.__name__ in names_of_base_classes diff --git a/llama-index-integrations/readers/llama-index-readers-clickhouse/.gitignore b/llama-index-integrations/readers/llama-index-readers-clickhouse/.gitignore deleted file mode 100644 index 990c18de22908..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-clickhouse/.gitignore +++ /dev/null @@ -1,153 +0,0 @@ -llama_index/_static -.DS_Store -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -bin/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -etc/ -include/ -lib/ -lib64/ -parts/ -sdist/ -share/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -.ruff_cache - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints -notebooks/ - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ -pyvenv.cfg - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# Jetbrains -.idea -modules/ -*.swp - -# VsCode -.vscode - -# pipenv -Pipfile -Pipfile.lock - -# pyright -pyrightconfig.json diff --git a/llama-index-integrations/readers/llama-index-readers-clickhouse/BUILD b/llama-index-integrations/readers/llama-index-readers-clickhouse/BUILD deleted file mode 100644 index 0896ca890d8bf..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-clickhouse/BUILD +++ /dev/null @@ -1,3 +0,0 @@ -poetry_requirements( - name="poetry", -) diff --git a/llama-index-integrations/readers/llama-index-readers-clickhouse/Makefile b/llama-index-integrations/readers/llama-index-readers-clickhouse/Makefile deleted file mode 100644 index b9eab05aa3706..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-clickhouse/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -GIT_ROOT ?= $(shell git rev-parse --show-toplevel) - -help: ## Show all Makefile targets. - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' - -format: ## Run code autoformatters (black). - pre-commit install - git ls-files | xargs pre-commit run black --files - -lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy - pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files - -test: ## Run tests via pytest. - pytest tests - -watch-docs: ## Build and watch documentation. - sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/ diff --git a/llama-index-integrations/readers/llama-index-readers-clickhouse/README.md b/llama-index-integrations/readers/llama-index-readers-clickhouse/README.md deleted file mode 100644 index 9601e339a786a..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-clickhouse/README.md +++ /dev/null @@ -1,47 +0,0 @@ -# LlamaIndex Readers Integration: ClickHouse - -## Overview - -ClickHouse Reader is a tool designed to retrieve documents from ClickHouse databases efficiently. - -## Installation - -You can install ClickHouse Reader via pip: - -```bash -pip install llama-index-readers-clickhouse -``` - -## Usage - -```python -from llama_index.core.schema import Document -from llama_index.readers.clickhouse import ClickHouseReader - -# Initialize ClickHouseReader with the connection details and configuration -reader = ClickHouseReader( - clickhouse_host="", - username="", - password="", - clickhouse_port=8123, # Optional: Default port is 8123 - database="", - engine="MergeTree", # Optional: Default engine is "MergeTree" - table="", - index_type="NONE", # Optional: Default index type is "NONE" - metric="cosine", # Optional: Default metric is "cosine" - batch_size=1000, # Optional: Default batch size is 1000 - index_params=None, # Optional: Index parameters - search_params=None, # Optional: Search parameters -) - -# Load data from ClickHouse -documents = reader.load_data( - query_vector=[0.1, 0.2, 0.3], # Query vector - where_str=None, # Optional: Where condition string - limit=10, # Optional: Number of results to return -) -``` - -This loader is designed to be used as a way to load data into -[LlamaIndex](https://github.com/run-llama/llama_index/tree/main/llama_index) and/or subsequently -used as a Tool in a [LangChain](https://github.com/hwchase17/langchain) Agent. diff --git a/llama-index-integrations/readers/llama-index-readers-clickhouse/llama_index/readers/clickhouse/BUILD b/llama-index-integrations/readers/llama-index-readers-clickhouse/llama_index/readers/clickhouse/BUILD deleted file mode 100644 index db46e8d6c978c..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-clickhouse/llama_index/readers/clickhouse/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_sources() diff --git a/llama-index-integrations/readers/llama-index-readers-clickhouse/llama_index/readers/clickhouse/__init__.py b/llama-index-integrations/readers/llama-index-readers-clickhouse/llama_index/readers/clickhouse/__init__.py deleted file mode 100644 index 9b8443909fe59..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-clickhouse/llama_index/readers/clickhouse/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -from llama_index.readers.clickhouse.base import ( - ClickHouseReader, - escape_str, - format_list_to_string, -) - -__all__ = ["ClickHouseReader", "escape_str", "format_list_to_string"] diff --git a/llama-index-integrations/readers/llama-index-readers-clickhouse/llama_index/readers/clickhouse/base.py b/llama-index-integrations/readers/llama-index-readers-clickhouse/llama_index/readers/clickhouse/base.py deleted file mode 100644 index 57abcbfee144c..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-clickhouse/llama_index/readers/clickhouse/base.py +++ /dev/null @@ -1,165 +0,0 @@ -"""ClickHouse reader.""" -import logging -from typing import Any, List, Optional -import clickhouse_connect -from llama_index.core.readers.base import BaseReader -from llama_index.core.schema import Document - -logger = logging.getLogger(__name__) - - -def escape_str(value: str) -> str: - BS = "\\" - must_escape = (BS, "'") - return ( - "".join(f"{BS}{c}" if c in must_escape else c for c in value) if value else "" - ) - - -def format_list_to_string(lst: List) -> str: - return "[" + ",".join(str(item) for item in lst) + "]" - - -DISTANCE_MAPPING = { - "l2": "L2Distance", - "cosine": "cosineDistance", - "dot": "dotProduct", -} - - -class ClickHouseSettings: - """ClickHouse Client Configuration. - - Attributes: - table (str): Table name to operate on. - database (str): Database name to find the table. - engine (str): Engine. Options are "MergeTree" and "Memory". Default is "MergeTree". - index_type (str): Index type string. - metric (str): Metric type to compute distance e.g., cosine, l3, or dot. - batch_size (int): The size of documents to insert. - index_params (dict, optional): Index build parameter. - search_params (dict, optional): Index search parameters for ClickHouse query. - """ - - def __init__( - self, - table: str, - database: str, - engine: str, - index_type: str, - metric: str, - batch_size: int, - index_params: Optional[dict] = None, - search_params: Optional[dict] = None, - **kwargs: Any, - ) -> None: - self.table = table - self.database = database - self.engine = engine - self.index_type = index_type - self.metric = metric - self.batch_size = batch_size - self.index_params = index_params - self.search_params = search_params - - def build_query_statement( - self, - query_embed: List[float], - where_str: Optional[str] = None, - limit: Optional[int] = None, - ) -> str: - query_embed_str = format_list_to_string(query_embed) - where_str = f"WHERE {where_str}" if where_str else "" - distance = DISTANCE_MAPPING[self.metric] - return f""" - SELECT id, doc_id, text, node_info, metadata, - {distance}(vector, {query_embed_str}) AS score - FROM {self.database}.{self.table} {where_str} - ORDER BY score ASC - LIMIT {limit} - """ - - -class ClickHouseReader(BaseReader): - """ClickHouse reader. - - Args: - clickhouse_host (str) : An URL to connect to ClickHouse backend. Default to "localhost". - username (str) : Username to login. Defaults to "default". - password (str) : Password to login. Defaults to "". - clickhouse_port (int) : URL port to connect with HTTP. Defaults to 8123. - database (str) : Database name to find the table. Defaults to 'default'. - engine (str) : Engine. Options are "MergeTree" and "Memory". Default is "MergeTree". - table (str) : Table name to operate on. Defaults to 'vector_table'. - index_type (str): index type string. Default to "NONE", supported are ("NONE", "HNSW", "ANNOY") - metric (str) : Metric to compute distance, supported are ('l2', 'cosine', 'dot'). - Defaults to 'cosine' - batch_size (int, optional): the size of documents to insert. Defaults to 1000. - index_params (dict, optional): The index parameters for ClickHouse. - Defaults to None. - search_params (dict, optional): The search parameters for a ClicKHouse query. - Defaults to None. - """ - - def __init__( - self, - clickhouse_host: str = "localhost", - username: str = "default", - password: str = "", - clickhouse_port: Optional[int] = 8123, - database: str = "default", - engine: str = "MergeTree", - table: str = "llama_index", - index_type: str = "NONE", - metric: str = "cosine", - batch_size: int = 1000, - index_params: Optional[dict] = None, - search_params: Optional[dict] = None, - **kwargs: Any, - ) -> None: - self.client = clickhouse_connect.get_client( - host=clickhouse_host, - port=clickhouse_port, - username=username, - password=password, - ) - - self.config = ClickHouseSettings( - table=table, - database=database, - engine=engine, - index_type=index_type, - metric=metric, - batch_size=batch_size, - index_params=index_params, - search_params=search_params, - **kwargs, - ) - - def load_data( - self, - query_vector: List[float], - where_str: Optional[str] = None, - limit: int = 10, - ) -> List[Document]: - """Load data from ClickHouse. - - Args: - query_vector (List[float]): Query vector. - where_str (Optional[str], optional): where condition string. - Defaults to None. - limit (int): Number of results to return. - - Returns: - List[Document]: A list of documents. - """ - query_statement = self.config.build_query_statement( - query_embed=query_vector, - where_str=where_str, - limit=limit, - ) - - return [ - Document(id_=r["doc_id"], text=r["text"], metadata=r["metadata"]) - for r in self.client.query(query_statement).named_results() - ] diff --git a/llama-index-integrations/readers/llama-index-readers-clickhouse/pyproject.toml b/llama-index-integrations/readers/llama-index-readers-clickhouse/pyproject.toml deleted file mode 100644 index 4e196df320698..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-clickhouse/pyproject.toml +++ /dev/null @@ -1,63 +0,0 @@ -[build-system] -build-backend = "poetry.core.masonry.api" -requires = ["poetry-core"] - -[tool.codespell] -check-filenames = true -check-hidden = true -skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" - -[tool.llamahub] -contains_example = false -import_path = "llama_index.readers.clickhouse" - -[tool.llamahub.class_authors] -ClickHouseReader = "llama-index" - -[tool.mypy] -disallow_untyped_defs = true -exclude = ["_static", "build", "examples", "notebooks", "venv"] -ignore_missing_imports = true -python_version = "3.8" - -[tool.poetry] -authors = ["Your Name "] -description = "llama-index readers clickhouse integration" -exclude = ["**/BUILD"] -license = "MIT" -name = "llama-index-readers-clickhouse" -readme = "README.md" -version = "0.3.0" - -[tool.poetry.dependencies] -python = ">=3.9,<4.0" -clickhouse-connect = "^0.7.0" -llama-index-core = "^0.12.0" - -[tool.poetry.group.dev.dependencies] -ipython = "8.10.0" -jupyter = "^1.0.0" -mypy = "0.991" -pre-commit = "3.2.0" -pylint = "2.15.10" -pytest = "7.2.1" -pytest-mock = "3.11.1" -ruff = "0.0.292" -tree-sitter-languages = "^1.8.0" -types-Deprecated = ">=0.1.0" -types-PyYAML = "^6.0.12.12" -types-protobuf = "^4.24.0.4" -types-redis = "4.5.5.0" -types-requests = "2.28.11.8" -types-setuptools = "67.1.0.0" - -[tool.poetry.group.dev.dependencies.black] -extras = ["jupyter"] -version = "<=23.9.1,>=23.7.0" - -[tool.poetry.group.dev.dependencies.codespell] -extras = ["toml"] -version = ">=v2.2.6" - -[[tool.poetry.packages]] -include = "llama_index/" diff --git a/llama-index-integrations/readers/llama-index-readers-clickhouse/tests/BUILD b/llama-index-integrations/readers/llama-index-readers-clickhouse/tests/BUILD deleted file mode 100644 index dabf212d7e716..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-clickhouse/tests/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_tests() diff --git a/llama-index-integrations/readers/llama-index-readers-clickhouse/tests/__init__.py b/llama-index-integrations/readers/llama-index-readers-clickhouse/tests/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/llama-index-integrations/readers/llama-index-readers-clickhouse/tests/test_readers_clickhouse.py b/llama-index-integrations/readers/llama-index-readers-clickhouse/tests/test_readers_clickhouse.py deleted file mode 100644 index a93174c3d536b..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-clickhouse/tests/test_readers_clickhouse.py +++ /dev/null @@ -1,7 +0,0 @@ -from llama_index.core.readers.base import BaseReader -from llama_index.readers.clickhouse import ClickHouseReader - - -def test_class(): - names_of_base_classes = [b.__name__ for b in ClickHouseReader.__mro__] - assert BaseReader.__name__ in names_of_base_classes diff --git a/llama-index-integrations/readers/llama-index-readers-feishu-wiki/.gitignore b/llama-index-integrations/readers/llama-index-readers-feishu-wiki/.gitignore deleted file mode 100644 index 990c18de22908..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-feishu-wiki/.gitignore +++ /dev/null @@ -1,153 +0,0 @@ -llama_index/_static -.DS_Store -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -bin/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -etc/ -include/ -lib/ -lib64/ -parts/ -sdist/ -share/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -.ruff_cache - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints -notebooks/ - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ -pyvenv.cfg - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# Jetbrains -.idea -modules/ -*.swp - -# VsCode -.vscode - -# pipenv -Pipfile -Pipfile.lock - -# pyright -pyrightconfig.json diff --git a/llama-index-integrations/readers/llama-index-readers-feishu-wiki/BUILD b/llama-index-integrations/readers/llama-index-readers-feishu-wiki/BUILD deleted file mode 100644 index 0896ca890d8bf..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-feishu-wiki/BUILD +++ /dev/null @@ -1,3 +0,0 @@ -poetry_requirements( - name="poetry", -) diff --git a/llama-index-integrations/readers/llama-index-readers-feishu-wiki/CHANGELOG.md b/llama-index-integrations/readers/llama-index-readers-feishu-wiki/CHANGELOG.md deleted file mode 100644 index 36bff877abcbe..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-feishu-wiki/CHANGELOG.md +++ /dev/null @@ -1,5 +0,0 @@ -# CHANGELOG - -## [0.1.2] - 2024-02-13 - -- Add maintainers and keywords from library.json (llamahub) diff --git a/llama-index-integrations/readers/llama-index-readers-feishu-wiki/Makefile b/llama-index-integrations/readers/llama-index-readers-feishu-wiki/Makefile deleted file mode 100644 index b9eab05aa3706..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-feishu-wiki/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -GIT_ROOT ?= $(shell git rev-parse --show-toplevel) - -help: ## Show all Makefile targets. - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' - -format: ## Run code autoformatters (black). - pre-commit install - git ls-files | xargs pre-commit run black --files - -lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy - pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files - -test: ## Run tests via pytest. - pytest tests - -watch-docs: ## Build and watch documentation. - sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/ diff --git a/llama-index-integrations/readers/llama-index-readers-feishu-wiki/README.md b/llama-index-integrations/readers/llama-index-readers-feishu-wiki/README.md deleted file mode 100644 index aa5c3d39f3095..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-feishu-wiki/README.md +++ /dev/null @@ -1,22 +0,0 @@ -# Feishu Wiki Loader - -This loader can traverse all feishu documents under the feishi space. - -## Usage - -To use this loader, you need to: - -1. apply the permission(`wiki:wiki:readonly`) of the feishu app -2. add the feishu app as the admin of your feishu space, see [here](https://open.feishu.cn/document/server-docs/docs/wiki-v2/wiki-qa#b5da330b) for more help -3. finally, pass your feishu space id to this loader - -```python -app_id = "xxx" -app_secret = "xxx" -space_id = "xxx" -FeishuWikiReader = download_loader("FeishuWikiReader") -loader = FeishuWikiReader(app_id, app_secret) -documents = loader.load_data(space_id=space_id) -``` - -This loader is designed to be used as a way to load data into [LlamaIndex](https://github.com/run-llama/llama_index/). diff --git a/llama-index-integrations/readers/llama-index-readers-feishu-wiki/llama_index/readers/feishu_wiki/BUILD b/llama-index-integrations/readers/llama-index-readers-feishu-wiki/llama_index/readers/feishu_wiki/BUILD deleted file mode 100644 index db46e8d6c978c..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-feishu-wiki/llama_index/readers/feishu_wiki/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_sources() diff --git a/llama-index-integrations/readers/llama-index-readers-feishu-wiki/llama_index/readers/feishu_wiki/__init__.py b/llama-index-integrations/readers/llama-index-readers-feishu-wiki/llama_index/readers/feishu_wiki/__init__.py deleted file mode 100644 index 3a4f56d259dcc..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-feishu-wiki/llama_index/readers/feishu_wiki/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from llama_index.readers.feishu_wiki.base import FeishuWikiReader - -__all__ = ["FeishuWikiReader"] diff --git a/llama-index-integrations/readers/llama-index-readers-feishu-wiki/llama_index/readers/feishu_wiki/base.py b/llama-index-integrations/readers/llama-index-readers-feishu-wiki/llama_index/readers/feishu_wiki/base.py deleted file mode 100644 index 5743b851c5f16..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-feishu-wiki/llama_index/readers/feishu_wiki/base.py +++ /dev/null @@ -1,150 +0,0 @@ -"""Feishu wiki reader.""" -import json -import os -import time -from typing import List - -import requests -from llama_index.core.readers.base import BaseReader -from llama_index.core.schema import Document - -# Copyright (2023) Bytedance Ltd. and/or its affiliates -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - - -class FeishuWikiReader(BaseReader): - """Feishu Wiki reader. - - Reads pages from Feishu wiki under the space - - """ - - host = "https://open.feishu.cn" - wiki_nodes_url_path = "/open-apis/wiki/v2/spaces/{}/nodes" - documents_raw_content_url_path = "/open-apis/docx/v1/documents/{}/raw_content" - tenant_access_token_internal_url_path = ( - "/open-apis/auth/v3/tenant_access_token/internal" - ) - - def __init__(self, app_id: str, app_secret: str) -> None: - """ - - Args: - app_id: The unique identifier of the application is obtained after the application is created. - app_secret: Application key, obtained after creating the application. - """ - super().__init__() - self.app_id = app_id - self.app_secret = app_secret - - self.tenant_access_token = "" - self.expire = 0 - - def load_data(self, space_id: str, parent_node_token: str = None) -> List[Document]: - """Load data from the input directory. - - Args: - space_id (str): a space id. - parent_node_token (str[optional]): a parent node token of the space - """ - if space_id is None: - raise ValueError('Must specify a "space_id" in `load_kwargs`.') - - document_ids = self._load_space(space_id, parent_node_token=parent_node_token) - document_ids = list(set(document_ids)) - - results = [] - for document_id in document_ids: - doc = self._load_doc(document_id) - results.append(Document(text=doc, extra_info={"document_id": document_id})) - return results - - def _load_space(self, space_id: str, parent_node_token: str = None) -> str: - if self.tenant_access_token == "" or self.expire < time.time(): - self._update_tenant_access_token() - headers = { - "Authorization": f"Bearer {self.tenant_access_token}", - "Content-Type": "application/json; charset=utf-8", - } - - url = self.host + self.wiki_nodes_url_path.format(space_id) - if parent_node_token: - url += f"?parent_node_token={parent_node_token}" - try: - response = requests.get(url, headers=headers) - result = response.json() - except Exception: - return [] - if not result.get("data"): - return [] - obj_token_list = [] - for item in result["data"]["items"]: - obj_token_list.append(item["obj_token"]) - if item["has_child"]: - child_obj_token_list = self._load_space( - space_id=space_id, parent_node_token=item["node_token"] - ) - if child_obj_token_list: - obj_token_list.extend(child_obj_token_list) - return obj_token_list - - def _load_doc(self, document_id: str) -> str: - """Load a document from Feishu Docs. - - Args: - document_id: the document id. - - Returns: - The document text. - """ - url = self.host + self.documents_raw_content_url_path.format(document_id) - if self.tenant_access_token == "" or self.expire < time.time(): - self._update_tenant_access_token() - headers = { - "Authorization": f"Bearer {self.tenant_access_token}", - "Content-Type": "application/json; charset=utf-8", - } - try: - response = requests.get(url, headers=headers) - result = response.json() - except Exception: - return None - if not result.get("data"): - return None - return result["data"]["content"] - - def _update_tenant_access_token(self) -> None: - """For update tenant_access_token.""" - url = self.host + self.tenant_access_token_internal_url_path - headers = {"Content-Type": "application/json; charset=utf-8"} - data = {"app_id": self.app_id, "app_secret": self.app_secret} - response = requests.post(url, data=json.dumps(data), headers=headers) - self.tenant_access_token = response.json()["tenant_access_token"] - self.expire = time.time() + response.json()["expire"] - - def set_lark_domain(self, host: str) -> None: - """Set lark domain.""" - self.host = host - - -if __name__ == "__main__": - app_id = os.environ.get("FEISHU_APP_ID") - app_secret = os.environ.get("FEISHU_APP_SECRET") - reader = FeishuWikiReader(app_id, app_secret) - print( - reader.load_data( - space_id=os.environ.get("FEISHU_SPACE_ID"), - parent_node_token=os.environ.get("FEISHU_PARENT_NODE_TOKEN"), - ) - ) diff --git a/llama-index-integrations/readers/llama-index-readers-feishu-wiki/pyproject.toml b/llama-index-integrations/readers/llama-index-readers-feishu-wiki/pyproject.toml deleted file mode 100644 index ea9b221f4bc08..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-feishu-wiki/pyproject.toml +++ /dev/null @@ -1,64 +0,0 @@ -[build-system] -build-backend = "poetry.core.masonry.api" -requires = ["poetry-core"] - -[tool.codespell] -check-filenames = true -check-hidden = true -skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" - -[tool.llamahub] -contains_example = false -import_path = "llama_index.readers.feishu_wiki" - -[tool.llamahub.class_authors] -FeishuWikiReader = "zhourunlai" - -[tool.mypy] -disallow_untyped_defs = true -exclude = ["_static", "build", "examples", "notebooks", "venv"] -ignore_missing_imports = true -python_version = "3.8" - -[tool.poetry] -authors = ["Your Name "] -description = "llama-index readers feishu_wiki integration" -exclude = ["**/BUILD"] -license = "MIT" -maintainers = ["zhourunlai"] -name = "llama-index-readers-feishu-wiki" -readme = "README.md" -version = "0.3.0" - -[tool.poetry.dependencies] -python = ">=3.9,<4.0" -requests = "^2.31.0" -llama-index-core = "^0.12.0" - -[tool.poetry.group.dev.dependencies] -ipython = "8.10.0" -jupyter = "^1.0.0" -mypy = "0.991" -pre-commit = "3.2.0" -pylint = "2.15.10" -pytest = "7.2.1" -pytest-mock = "3.11.1" -ruff = "0.0.292" -tree-sitter-languages = "^1.8.0" -types-Deprecated = ">=0.1.0" -types-PyYAML = "^6.0.12.12" -types-protobuf = "^4.24.0.4" -types-redis = "4.5.5.0" -types-requests = "2.28.11.8" -types-setuptools = "67.1.0.0" - -[tool.poetry.group.dev.dependencies.black] -extras = ["jupyter"] -version = "<=23.9.1,>=23.7.0" - -[tool.poetry.group.dev.dependencies.codespell] -extras = ["toml"] -version = ">=v2.2.6" - -[[tool.poetry.packages]] -include = "llama_index/" diff --git a/llama-index-integrations/readers/llama-index-readers-feishu-wiki/tests/BUILD b/llama-index-integrations/readers/llama-index-readers-feishu-wiki/tests/BUILD deleted file mode 100644 index dabf212d7e716..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-feishu-wiki/tests/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_tests() diff --git a/llama-index-integrations/readers/llama-index-readers-feishu-wiki/tests/__init__.py b/llama-index-integrations/readers/llama-index-readers-feishu-wiki/tests/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/llama-index-integrations/readers/llama-index-readers-feishu-wiki/tests/test_readers_feishu_wiki.py b/llama-index-integrations/readers/llama-index-readers-feishu-wiki/tests/test_readers_feishu_wiki.py deleted file mode 100644 index 2fef634282e38..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-feishu-wiki/tests/test_readers_feishu_wiki.py +++ /dev/null @@ -1,7 +0,0 @@ -from llama_index.core.readers.base import BaseReader -from llama_index.readers.feishu_wiki import FeishuWikiReader - - -def test_class(): - names_of_base_classes = [b.__name__ for b in FeishuWikiReader.__mro__] - assert BaseReader.__name__ in names_of_base_classes diff --git a/llama-index-integrations/readers/llama-index-readers-openapi/.gitignore b/llama-index-integrations/readers/llama-index-readers-openapi/.gitignore deleted file mode 100644 index 990c18de22908..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-openapi/.gitignore +++ /dev/null @@ -1,153 +0,0 @@ -llama_index/_static -.DS_Store -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -bin/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -etc/ -include/ -lib/ -lib64/ -parts/ -sdist/ -share/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -.ruff_cache - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints -notebooks/ - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ -pyvenv.cfg - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# Jetbrains -.idea -modules/ -*.swp - -# VsCode -.vscode - -# pipenv -Pipfile -Pipfile.lock - -# pyright -pyrightconfig.json diff --git a/llama-index-integrations/readers/llama-index-readers-openapi/BUILD b/llama-index-integrations/readers/llama-index-readers-openapi/BUILD deleted file mode 100644 index 0896ca890d8bf..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-openapi/BUILD +++ /dev/null @@ -1,3 +0,0 @@ -poetry_requirements( - name="poetry", -) diff --git a/llama-index-integrations/readers/llama-index-readers-openapi/Makefile b/llama-index-integrations/readers/llama-index-readers-openapi/Makefile deleted file mode 100644 index b9eab05aa3706..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-openapi/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -GIT_ROOT ?= $(shell git rev-parse --show-toplevel) - -help: ## Show all Makefile targets. - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' - -format: ## Run code autoformatters (black). - pre-commit install - git ls-files | xargs pre-commit run black --files - -lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy - pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files - -test: ## Run tests via pytest. - pytest tests - -watch-docs: ## Build and watch documentation. - sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/ diff --git a/llama-index-integrations/readers/llama-index-readers-openapi/README.md b/llama-index-integrations/readers/llama-index-readers-openapi/README.md deleted file mode 100644 index 67c703ec0bba0..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-openapi/README.md +++ /dev/null @@ -1,14 +0,0 @@ -# LlamaIndex Readers Integration: Open API Specification - -This module provides a reader for Open API Specification (OAS) JSON files. The reader is able to parse OAS files and split them at into atomic elements, such as paths, operations, parameters, etc. - -It also provides some basic customizations to the reader such as changing the depth of the split, and the ability to exclude certain elements. - -## Usage - -```python -from llama_index.readers.openapi import OpenAPIReader - -openapi_reader = OpenAPIReader(discard=["info", "servers"]) -openapi_reader.load_data("path/to/openapi.json") -``` diff --git a/llama-index-integrations/readers/llama-index-readers-openapi/llama_index/readers/openapi/BUILD b/llama-index-integrations/readers/llama-index-readers-openapi/llama_index/readers/openapi/BUILD deleted file mode 100644 index db46e8d6c978c..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-openapi/llama_index/readers/openapi/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_sources() diff --git a/llama-index-integrations/readers/llama-index-readers-openapi/llama_index/readers/openapi/__init__.py b/llama-index-integrations/readers/llama-index-readers-openapi/llama_index/readers/openapi/__init__.py deleted file mode 100644 index 0558c1dc898cc..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-openapi/llama_index/readers/openapi/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from llama_index.readers.openapi.base import OpenAPIReader - -__all__ = ["OpenAPIReader"] diff --git a/llama-index-integrations/readers/llama-index-readers-openapi/llama_index/readers/openapi/base.py b/llama-index-integrations/readers/llama-index-readers-openapi/llama_index/readers/openapi/base.py deleted file mode 100644 index c9a5032389d58..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-openapi/llama_index/readers/openapi/base.py +++ /dev/null @@ -1,84 +0,0 @@ -"""OpenAPI Specification Reader.""" - -import json -import re -from typing import Any, Dict, List, Optional - -from llama_index.core.readers.base import BaseReader -from llama_index.core.schema import Document - - -class OpenAPIReader(BaseReader): - """OpenAPI reader. - - Reads OpenAPI specifications giving options to on how to parse them. - - Args: - depth (Optional[int]): Depth to dive before splitting the JSON. - exclude (Optional[List[str]]): JSON paths to exclude, separated by commas by '.'. For example: 'components.pets' will exclude the component 'pets' from the OpenAPI specification. Useful for removing unwanted information from the OpenAPI specification. - - Returns: - List[Document]: List of documents. - - """ - - def __init__( - self, depth: Optional[int] = 1, exclude: Optional[List[str]] = None - ) -> None: - super().__init__() - self.exclude = exclude - self.depth = depth - - @classmethod - def class_name(cls) -> str: - """Get the name identifier of the class.""" - return "OpenAPIReader" - - def _should_exclude(self, path: str) -> bool: - """Check if the path should be excluded.""" - return self.exclude and any( - re.match(exclude_path, path) for exclude_path in self.exclude - ) - - def _build_docs_from_attributes( - self, - key: str, - value: Any, - extra_info: Dict, - path: str = "$", - level: int = 0, - ) -> List[Document]: - """Build Documents from the attributes of the OAS JSON.""" - if not path and self._should_exclude(path): - return [] - - if self.depth == level or not isinstance(value, dict): - return [ - Document( - text=f"{key}: {value}", metadata={"json_path": path, **extra_info} - ) - ] - - return [ - doc - for k, v in value.items() - for doc in self._build_docs_from_attributes( - k, v, extra_info, f"{path}.{key}", level + 1 - ) - ] - - def load_data( - self, input_file: str, extra_info: Optional[Dict] = {} - ) -> List[Document]: - """Load data from the input file.""" - try: - with open(input_file, encoding="utf-8") as f: - data = json.load(f) - except json.JSONDecodeError: - raise ValueError(f"The file {input_file} is not a valid JSON file.") - - return [ - doc - for key, value in data.items() - for doc in self._build_docs_from_attributes(key, value, extra_info) - ] diff --git a/llama-index-integrations/readers/llama-index-readers-openapi/pyproject.toml b/llama-index-integrations/readers/llama-index-readers-openapi/pyproject.toml deleted file mode 100644 index 9ad6aad17ed37..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-openapi/pyproject.toml +++ /dev/null @@ -1,53 +0,0 @@ -[build-system] -build-backend = "poetry.core.masonry.api" -requires = ["poetry-core"] - -[tool.codespell] -check-filenames = true -check-hidden = true -skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" - -[tool.llamahub] -contains_example = false -import_path = "llama_index.readers.openapi" - -[tool.llamahub.class_authors] -OpenAPIReader = "mattzcarey" - -[tool.mypy] -disallow_untyped_defs = true -exclude = ["_static", "build", "examples", "notebooks", "venv"] -ignore_missing_imports = true -python_version = "3.8" - -[tool.poetry] -authors = ["Your Name "] -description = "llama-index readers openapi integration" -license = "MIT" -name = "llama-index-readers-openapi" -packages = [{include = "llama_index/"}] -readme = "README.md" -version = "0.3.0" - -[tool.poetry.dependencies] -python = ">=3.9,<4.0" -llama-index-core = "^0.12.0" - -[tool.poetry.group.dev.dependencies] -black = {extras = ["jupyter"], version = "<=23.9.1,>=23.7.0"} -codespell = {extras = ["toml"], version = ">=v2.2.6"} -ipython = "8.10.0" -jupyter = "^1.0.0" -mypy = "0.991" -pre-commit = "3.2.0" -pylint = "2.15.10" -pytest = "7.2.1" -pytest-mock = "3.11.1" -ruff = "0.0.292" -tree-sitter-languages = "^1.8.0" -types-Deprecated = ">=0.1.0" -types-PyYAML = "^6.0.12.12" -types-protobuf = "^4.24.0.4" -types-redis = "4.5.5.0" -types-requests = "2.28.11.8" # TODO: unpin when mypy>0.991 -types-setuptools = "67.1.0.0" diff --git a/llama-index-integrations/readers/llama-index-readers-openapi/tests/BUILD b/llama-index-integrations/readers/llama-index-readers-openapi/tests/BUILD deleted file mode 100644 index dabf212d7e716..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-openapi/tests/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_tests() diff --git a/llama-index-integrations/readers/llama-index-readers-openapi/tests/__init__.py b/llama-index-integrations/readers/llama-index-readers-openapi/tests/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/llama-index-integrations/readers/llama-index-readers-openapi/tests/test_readers_openapi.py b/llama-index-integrations/readers/llama-index-readers-openapi/tests/test_readers_openapi.py deleted file mode 100644 index e57a375a67136..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-openapi/tests/test_readers_openapi.py +++ /dev/null @@ -1,7 +0,0 @@ -from llama_index.core.readers.base import BaseReader -from llama_index.readers.openapi import OpenAPIReader - - -def test_class(): - names_of_base_classes = [b.__name__ for b in OpenAPIReader.__mro__] - assert BaseReader.__name__ in names_of_base_classes diff --git a/llama-index-integrations/readers/llama-index-readers-readme/.gitignore b/llama-index-integrations/readers/llama-index-readers-readme/.gitignore deleted file mode 100644 index 990c18de22908..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-readme/.gitignore +++ /dev/null @@ -1,153 +0,0 @@ -llama_index/_static -.DS_Store -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -bin/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -etc/ -include/ -lib/ -lib64/ -parts/ -sdist/ -share/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -.ruff_cache - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints -notebooks/ - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ -pyvenv.cfg - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# Jetbrains -.idea -modules/ -*.swp - -# VsCode -.vscode - -# pipenv -Pipfile -Pipfile.lock - -# pyright -pyrightconfig.json diff --git a/llama-index-integrations/readers/llama-index-readers-readme/BUILD b/llama-index-integrations/readers/llama-index-readers-readme/BUILD deleted file mode 100644 index 2d3d88d1eab9c..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-readme/BUILD +++ /dev/null @@ -1,7 +0,0 @@ -poetry_requirements( - name="poetry", -) - -python_requirements( - name="reqs", -) diff --git a/llama-index-integrations/readers/llama-index-readers-readme/Makefile b/llama-index-integrations/readers/llama-index-readers-readme/Makefile deleted file mode 100644 index b9eab05aa3706..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-readme/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -GIT_ROOT ?= $(shell git rev-parse --show-toplevel) - -help: ## Show all Makefile targets. - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' - -format: ## Run code autoformatters (black). - pre-commit install - git ls-files | xargs pre-commit run black --files - -lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy - pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files - -test: ## Run tests via pytest. - pytest tests - -watch-docs: ## Build and watch documentation. - sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/ diff --git a/llama-index-integrations/readers/llama-index-readers-readme/README.md b/llama-index-integrations/readers/llama-index-readers-readme/README.md deleted file mode 100644 index 430a6b468acac..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-readme/README.md +++ /dev/null @@ -1,20 +0,0 @@ -# Readme.com Loader - -```bash -pip install llama-index-readers-readme -``` - -This loader fetches the text from [Readme](https://readme.com/) docs guides using the Readme API. It also uses the BeautifulSoup library to parse the HTML and extract the text from the docs. - -## Usage - -To use this loader, you need to pass in the API Key of a Readme account. - -```python -from llama_index.readers.readme import ReadmeReader - -loader = ReadmeReader(api_key="YOUR_API_KEY") -documents = loader.load_data() -``` - -This loader is designed to be used as a way to load data into [LlamaIndex](https://github.com/run-llama/llama_index/). diff --git a/llama-index-integrations/readers/llama-index-readers-readme/llama_index/readers/readme/BUILD b/llama-index-integrations/readers/llama-index-readers-readme/llama_index/readers/readme/BUILD deleted file mode 100644 index db46e8d6c978c..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-readme/llama_index/readers/readme/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_sources() diff --git a/llama-index-integrations/readers/llama-index-readers-readme/llama_index/readers/readme/__init__.py b/llama-index-integrations/readers/llama-index-readers-readme/llama_index/readers/readme/__init__.py deleted file mode 100644 index e1a23f83368f2..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-readme/llama_index/readers/readme/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from llama_index.readers.readme.base import ReadmeReader - -__all__ = ["ReadmeReader"] diff --git a/llama-index-integrations/readers/llama-index-readers-readme/llama_index/readers/readme/base.py b/llama-index-integrations/readers/llama-index-readers-readme/llama_index/readers/readme/base.py deleted file mode 100644 index f310f6e23a8c3..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-readme/llama_index/readers/readme/base.py +++ /dev/null @@ -1,154 +0,0 @@ -"""Readme reader.""" - -import requests -import base64 -import math -from typing import List - -from llama_index.core.readers.base import BaseReader -from llama_index.core.schema import Document - - -class ReadmeReader(BaseReader): - """Readme reader. Reads data from a Readme.com docs. - - Args: - api_key (str): Readme.com API Key - """ - - def __init__(self, api_key: str) -> None: - """Initialize Readme reader.""" - self.api_key = base64.b64encode(bytes(f"{api_key}:", "utf-8")).decode("utf-8") - self._headers = { - "accept": "*/*", - "authorization": f"Basic {self.api_key}", - "Content-Type": "application/json", - } - - def load_data(self) -> List[Document]: - """Load data from the docs (pages). - - Returns: - List[Document]: List of documents. - """ - from bs4 import BeautifulSoup - - results = [] - - docs = self.get_all_docs() - for doc in docs: - body = doc["body_html"] - if body is None: - continue - soup = BeautifulSoup(body, "html.parser") - body = soup.get_text() - extra_info = { - "id": doc["id"], - "title": doc["title"], - "type": doc["title"], - "slug": doc["slug"], - "updated_at": doc["updatedAt"], - } - - results.append( - Document( - text=body, - extra_info=extra_info, - ) - ) - - return results - - def get_all_docs(self): - """ - Retrieves all documents, along with their information, categorized by categories. - - Returns: - list: A list containing dictionaries with document information. - """ - categories = self.get_all_categories() - docs = [] - for category in categories: - category_docs = self.get_docs_in_category(category.get("slug")) - documents_slugs = [ - category_doc.get("slug") for category_doc in category_docs - ] - for document_slug in documents_slugs: - doc = self.get_document_info(document_slug) - doc["category_name"] = category["title"] - docs.append(doc) - - return docs - - def get_docs_in_category(self, category_slug): - """ - Retrieves documents belonging to a specific category. - - Args: - category_slug (str): The slug of the category. - - Returns: - list: A list containing dictionaries with document information. - """ - url = f"https://dash.readme.com/api/v1/categories/{category_slug}/docs" - response = requests.get(url, headers=self._headers) - - docs = response.json() - - # Filter documents hidden=False - return [doc for doc in docs if not doc.get("hidden", True)] - - def get_document_info(self, document_slug): - """ - Retrieves information about a specific document. - - Args: - document_slug (str): The slug of the document. - - Returns: - dict: A dictionary containing document information. - """ - url = f"https://dash.readme.com/api/v1/docs/{document_slug}" - response = requests.get(url, headers=self._headers) - - return response.json() - - def get_categories_page(self, params, page): - """ - Sends a GET request to a specific page of categories. - - Args: - params (dict): Parameters of the request, such as perPage and others. - page (int): The number of the page to be retrieved. - - Returns: - tuple: A tuple containing the total number of items and the retrieved categories. - """ - url = "https://dash.readme.com/api/v1/categories" - params["page"] = page - response = requests.get(url, params=params, headers=self._headers) - # total counts and categories - return int(response.headers.get("x-total-count", 0)), response.json() - - def get_all_categories(self): - """ - Retrieves all categories from the API. - - Returns: - list: A list containing all categories with type "guide". - """ - perPage = 100 - page = 1 - params = { - "perPage": perPage, - "page": page, - } - - total_count, categories = self.get_categories_page(params=params, page=1) - remaining_pages = math.ceil(total_count / perPage) - 1 - - for i in range(2, remaining_pages + 2): - categories.extend(self.get_categories_page(params=params, page=i)) - - # Include just categories with type: "guide" - return [category for category in categories if category.get("type") == "guide"] diff --git a/llama-index-integrations/readers/llama-index-readers-readme/pyproject.toml b/llama-index-integrations/readers/llama-index-readers-readme/pyproject.toml deleted file mode 100644 index d856177edd171..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-readme/pyproject.toml +++ /dev/null @@ -1,56 +0,0 @@ -[build-system] -build-backend = "poetry.core.masonry.api" -requires = ["poetry-core"] - -[tool.codespell] -check-filenames = true -check-hidden = true -skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" - -[tool.llamahub] -contains_example = false -import_path = "llama_index.readers.readme" - -[tool.llamahub.class_authors] -ReadmeReader = "gonzariosm" - -[tool.mypy] -disallow_untyped_defs = true -# Remove venv skip when integrated with pre-commit -exclude = ["_static", "build", "examples", "notebooks", "venv"] -ignore_missing_imports = true -python_version = "3.8" - -[tool.poetry] -authors = ["Gonzalo Rios "] -description = "llama-index readers readme.com integration" -license = "MIT" -name = "llama-index-readers-readme" -packages = [{include = "llama_index/"}] -readme = "README.md" -version = "0.3.0" - -[tool.poetry.dependencies] -python = ">=3.9,<4.0" -beautifulsoup4 = "^4.12.3" -requests = "^2.31.0" -llama-index-core = "^0.12.0" - -[tool.poetry.group.dev.dependencies] -black = {extras = ["jupyter"], version = "<=23.9.1,>=23.7.0"} -codespell = {extras = ["toml"], version = ">=v2.2.6"} -ipython = "8.10.0" -jupyter = "^1.0.0" -mypy = "0.991" -pre-commit = "3.2.0" -pylint = "2.15.10" -pytest = "7.2.1" -pytest-mock = "3.11.1" -ruff = "0.0.292" -tree-sitter-languages = "^1.8.0" -types-Deprecated = ">=0.1.0" -types-PyYAML = "^6.0.12.12" -types-protobuf = "^4.24.0.4" -types-redis = "4.5.5.0" -types-requests = "2.28.11.8" -types-setuptools = "67.1.0.0" diff --git a/llama-index-integrations/readers/llama-index-readers-readme/requirements.txt b/llama-index-integrations/readers/llama-index-readers-readme/requirements.txt deleted file mode 100644 index 1f3e778b5682b..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-readme/requirements.txt +++ /dev/null @@ -1,2 +0,0 @@ -beautifulsoup4 -requests diff --git a/llama-index-integrations/readers/llama-index-readers-readme/tests/BUILD b/llama-index-integrations/readers/llama-index-readers-readme/tests/BUILD deleted file mode 100644 index dabf212d7e716..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-readme/tests/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_tests() diff --git a/llama-index-integrations/readers/llama-index-readers-readme/tests/__init__.py b/llama-index-integrations/readers/llama-index-readers-readme/tests/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/llama-index-integrations/readers/llama-index-readers-readme/tests/test_readers_readme.py b/llama-index-integrations/readers/llama-index-readers-readme/tests/test_readers_readme.py deleted file mode 100644 index 425f20a2329ff..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-readme/tests/test_readers_readme.py +++ /dev/null @@ -1,7 +0,0 @@ -from llama_index.core.readers.base import BaseReader -from llama_index.readers.readme import ReadmeReader - - -def test_class(): - names_of_base_classes = [b.__name__ for b in ReadmeReader.__mro__] - assert BaseReader.__name__ in names_of_base_classes diff --git a/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/.gitignore b/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/.gitignore deleted file mode 100644 index 990c18de22908..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/.gitignore +++ /dev/null @@ -1,153 +0,0 @@ -llama_index/_static -.DS_Store -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -bin/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -etc/ -include/ -lib/ -lib64/ -parts/ -sdist/ -share/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -.ruff_cache - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints -notebooks/ - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ -pyvenv.cfg - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# Jetbrains -.idea -modules/ -*.swp - -# VsCode -.vscode - -# pipenv -Pipfile -Pipfile.lock - -# pyright -pyrightconfig.json diff --git a/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/BUILD b/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/BUILD deleted file mode 100644 index 2d3d88d1eab9c..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/BUILD +++ /dev/null @@ -1,7 +0,0 @@ -poetry_requirements( - name="poetry", -) - -python_requirements( - name="reqs", -) diff --git a/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/CHANGELOG.md b/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/CHANGELOG.md deleted file mode 100644 index 36bff877abcbe..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/CHANGELOG.md +++ /dev/null @@ -1,5 +0,0 @@ -# CHANGELOG - -## [0.1.2] - 2024-02-13 - -- Add maintainers and keywords from library.json (llamahub) diff --git a/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/Makefile b/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/Makefile deleted file mode 100644 index b9eab05aa3706..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -GIT_ROOT ?= $(shell git rev-parse --show-toplevel) - -help: ## Show all Makefile targets. - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' - -format: ## Run code autoformatters (black). - pre-commit install - git ls-files | xargs pre-commit run black --files - -lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy - pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files - -test: ## Run tests via pytest. - pytest tests - -watch-docs: ## Build and watch documentation. - sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/ diff --git a/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/README.md b/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/README.md deleted file mode 100644 index 049f38805364a..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/README.md +++ /dev/null @@ -1,22 +0,0 @@ -# Snscrape twitter Loader - -```bash -pip install llama-index-readers-snscrape-twitter -``` - -This loader loads documents from Twitter using the Snscrape Python package. - -## Usage - -Here's an example usage of the SnscrapeReader. - -```python -import os - -from llama_index.readers.snscrape_twitter import SnscrapeTwitterReader - -loader = SnscrapeReader() -documents = loader.load_data(username="elonmusk", num_tweets=10) -``` - -This loader is designed to be used as a way to load data into [LlamaIndex](https://github.com/run-llama/llama_index/). diff --git a/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/llama_index/readers/snscrape_twitter/BUILD b/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/llama_index/readers/snscrape_twitter/BUILD deleted file mode 100644 index db46e8d6c978c..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/llama_index/readers/snscrape_twitter/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_sources() diff --git a/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/llama_index/readers/snscrape_twitter/__init__.py b/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/llama_index/readers/snscrape_twitter/__init__.py deleted file mode 100644 index cb6de685a0a2e..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/llama_index/readers/snscrape_twitter/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from llama_index.readers.snscrape_twitter.base import SnscrapeTwitterReader - -__all__ = ["SnscrapeTwitterReader"] diff --git a/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/llama_index/readers/snscrape_twitter/base.py b/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/llama_index/readers/snscrape_twitter/base.py deleted file mode 100644 index 1f8dd614c8b34..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/llama_index/readers/snscrape_twitter/base.py +++ /dev/null @@ -1,39 +0,0 @@ -"""SnscrapeTwitter reader.""" -from typing import List - -from llama_index.core.readers.base import BaseReader -from llama_index.core.schema import Document - - -class SnscrapeTwitterReader(BaseReader): - """SnscrapeTwitter reader. Reads data from a twitter profile. - - Args: - username (str): Twitter Username. - num_tweets (int): Number of tweets to fetch. - """ - - def __init__(self) -> None: - """Initialize SnscrapeTwitter reader.""" - - def load_data(self, username: str, num_tweets: int) -> List[Document]: - """Load data from a twitter profile. - - Args: - username (str): Twitter Username. - num_tweets (int): Number of tweets to fetch. - - - Returns: - List[Document]: List of documents. - """ - import snscrape.modules.twitter as sntwitter - - attributes_container = [] - for i, tweet in enumerate( - sntwitter.TwitterSearchScraper(f"from:{username}").get_items() - ): - if i > num_tweets: - break - attributes_container.append(tweet.rawContent) - return [Document(text=attributes_container, extra_info={"username": username})] diff --git a/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/pyproject.toml b/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/pyproject.toml deleted file mode 100644 index 9621e14d35390..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/pyproject.toml +++ /dev/null @@ -1,64 +0,0 @@ -[build-system] -build-backend = "poetry.core.masonry.api" -requires = ["poetry-core"] - -[tool.codespell] -check-filenames = true -check-hidden = true -skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" - -[tool.llamahub] -contains_example = false -import_path = "llama_index.readers.snscrape_twitter" - -[tool.llamahub.class_authors] -SnscrapeTwitterReader = "smyja" - -[tool.mypy] -disallow_untyped_defs = true -exclude = ["_static", "build", "examples", "notebooks", "venv"] -ignore_missing_imports = true -python_version = "3.8" - -[tool.poetry] -authors = ["Your Name "] -description = "llama-index readers snscrape_twitter integration" -exclude = ["**/BUILD"] -license = "MIT" -maintainers = ["smyja"] -name = "llama-index-readers-snscrape-twitter" -readme = "README.md" -version = "0.3.0" - -[tool.poetry.dependencies] -python = ">=3.9,<4.0" -llama-index-core = "^0.12.0" -snscrape = "*" - -[tool.poetry.group.dev.dependencies] -ipython = "8.10.0" -jupyter = "^1.0.0" -mypy = "0.991" -pre-commit = "3.2.0" -pylint = "2.15.10" -pytest = "7.2.1" -pytest-mock = "3.11.1" -ruff = "0.0.292" -tree-sitter-languages = "^1.8.0" -types-Deprecated = ">=0.1.0" -types-PyYAML = "^6.0.12.12" -types-protobuf = "^4.24.0.4" -types-redis = "4.5.5.0" -types-requests = "2.28.11.8" -types-setuptools = "67.1.0.0" - -[tool.poetry.group.dev.dependencies.black] -extras = ["jupyter"] -version = "<=23.9.1,>=23.7.0" - -[tool.poetry.group.dev.dependencies.codespell] -extras = ["toml"] -version = ">=v2.2.6" - -[[tool.poetry.packages]] -include = "llama_index/" diff --git a/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/requirements.txt b/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/requirements.txt deleted file mode 100644 index ee1d00dd71bc3..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -snscrape @ git+https://github.com/JustAnotherArchivist/snscrape.git diff --git a/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/tests/BUILD b/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/tests/BUILD deleted file mode 100644 index dabf212d7e716..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/tests/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_tests() diff --git a/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/tests/__init__.py b/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/tests/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/tests/test_readers_snscrape_twitter.py b/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/tests/test_readers_snscrape_twitter.py deleted file mode 100644 index 2e74da42883b4..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-snscrape-twitter/tests/test_readers_snscrape_twitter.py +++ /dev/null @@ -1,7 +0,0 @@ -from llama_index.core.readers.base import BaseReader -from llama_index.readers.snscrape_twitter import SnscrapeTwitterReader - - -def test_class(): - names_of_base_classes = [b.__name__ for b in SnscrapeTwitterReader.__mro__] - assert BaseReader.__name__ in names_of_base_classes diff --git a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/.gitignore b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/.gitignore deleted file mode 100644 index 990c18de22908..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/.gitignore +++ /dev/null @@ -1,153 +0,0 @@ -llama_index/_static -.DS_Store -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -bin/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -etc/ -include/ -lib/ -lib64/ -parts/ -sdist/ -share/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -.ruff_cache - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints -notebooks/ - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ -pyvenv.cfg - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# Jetbrains -.idea -modules/ -*.swp - -# VsCode -.vscode - -# pipenv -Pipfile -Pipfile.lock - -# pyright -pyrightconfig.json diff --git a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/.pre-commit-config.yaml b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/.pre-commit-config.yaml deleted file mode 100644 index 9b472131a0663..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/.pre-commit-config.yaml +++ /dev/null @@ -1,6 +0,0 @@ -repos: - - repo: https://github.com/psf/black - rev: 22.3.0 - hooks: - - id: black - language_version: python3 diff --git a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/BUILD b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/BUILD deleted file mode 100644 index 0896ca890d8bf..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/BUILD +++ /dev/null @@ -1,3 +0,0 @@ -poetry_requirements( - name="poetry", -) diff --git a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/Makefile b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/Makefile deleted file mode 100644 index 7da024542a5ac..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -GIT_ROOT ?= $(shell git rev-parse --show-toplevel) - -help: ## Show all Makefile targets. - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' - -format: ## Run code autoformatters (black). - pre-commit install - git ls-files | xargs pre-commit run black --files ||true - -lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy - pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files - -test: ## Run tests via pytest. - pytest tests - -watch-docs: ## Build and watch documentation. - sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/ diff --git a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/README.md b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/README.md deleted file mode 100644 index b47374277229a..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/README.md +++ /dev/null @@ -1,44 +0,0 @@ -# LlamaIndex Readers Integration: Youtube-Metadata - -```bash -pip install llama_index.readers.youtube_metadata -``` - -This loader fetches the metadata of Youtube videos using the Google APIs. (https://www.googleapis.com/youtube/v3/videos?part=snippet,statistics&id={videos_string}&key={api_key}). You must have a Google API key to use. - -Transcripts of the text transcript of Youtube videos is fetched using the `youtube_transcript_api` Python package. - -## Usage - -Simply pass an array of YouTube Video_ID into `load_data`. - -```python -from llama_index.readers.youtube_metadata import YoutubeMetaData - -api_key = "Axxxxx" # youtube API Key - -video_ids = ["S_0hBL4ILAg", "a2skIq6hFiY"] - -youtube_meta = YoutubeMetaData(api_key) -details = youtube_meta.load_data(video_ids) -``` - -This can be combined with the YoutubeTranscriptReader to provide more information for RAG AI inquiries. - -```python -from llama_index.readers.youtube_transcript import YoutubeTranscriptReader -from llama_index.readers.youtube_metadata import YoutubeMetaData - -video_ids = ["S_0hBL4ILAg", "a2skIq6hFiY"] # Example video IDs -yt_metadata = YouTubeMetaData(api_key=api_key) -print("Testing YouTubeMetaData...") -print(yt_metadata.load_data(video_ids)) - -yt_meta_transcript = YouTubeMetaDataAndTranscript(api_key=api_key) -print("Testing YouTubeMetaDataAndTranscript...") -print(yt_meta_transcript.load_data(video_ids)) -``` - -The Video_id for youtube videos is right in the URL. In this URL: https://www.youtube.com/watch?v=a2skIq6hFiY&t=60s - -The video_Id is 'a2skIq6hFiY&t'. diff --git a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/llama_index/readers/youtube_metadata/BUILD b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/llama_index/readers/youtube_metadata/BUILD deleted file mode 100644 index db46e8d6c978c..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/llama_index/readers/youtube_metadata/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_sources() diff --git a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/llama_index/readers/youtube_metadata/__init__.py b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/llama_index/readers/youtube_metadata/__init__.py deleted file mode 100644 index 258d374441600..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/llama_index/readers/youtube_metadata/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -from llama_index.readers.youtube_metadata.base import ( - YouTubeMetaData, - YouTubeMetaDataAndTranscript, -) - -__all__ = ["YouTubeMetaData", "YouTubeMetaDataAndTranscript"] diff --git a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/llama_index/readers/youtube_metadata/base.py b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/llama_index/readers/youtube_metadata/base.py deleted file mode 100644 index 91167b0b27666..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/llama_index/readers/youtube_metadata/base.py +++ /dev/null @@ -1,66 +0,0 @@ -# YoutubeMetaData.py -# Class to return Youtube Meta data for a video ID -import requests -from pydantic import Field -from typing import Any, List, Dict -from youtube_transcript_api import YouTubeTranscriptApi -from llama_index.core.readers.base import BasePydanticReader - - -class YouTubeMetaData(BasePydanticReader): - api_key: str - - def load_data(self, video_ids): - details = {} - - def chunks(lst, n): - """Yield successive n-sized chunks from lst.""" - for i in range(0, len(lst), n): - yield lst[i : i + n] - - video_id_chunks = list(chunks(video_ids, 20)) - for chunk in video_id_chunks: - videos_string = ",".join(chunk) - url = f"https://www.googleapis.com/youtube/v3/videos?part=snippet,statistics&id={videos_string}&key={self.api_key}" - response = requests.get(url).json() - if "items" not in response: - print("Error in API response:", response) - continue - - for item in response["items"]: - video_id = item["id"] - details[video_id] = { - "title": item["snippet"]["title"], - "description": item["snippet"]["description"], - "publishDate": item["snippet"]["publishedAt"], - "statistics": item["statistics"], - "tags": item["snippet"].get("tags", []), - "url": f"https://www.youtube.com/watch?v={video_id}", - } - - return details - - -class YouTubeMetaDataAndTranscript(BasePydanticReader): - api_key: str = Field(..., description="API key for YouTube data access") - metadata_loader: YouTubeMetaData = None # Don't instantiate here - transcript_loader: Any = YouTubeTranscriptApi # Assume this is a simple callable - - def initialize_loaders(self): - if not self.metadata_loader: - self.metadata_loader = YouTubeMetaData(api_key=self.api_key) - - def load_data(self, video_ids: List[str]) -> Dict[str, Any]: - self.initialize_loaders() # Make sure loaders are initialized - all_details = {} - for video_id in video_ids: - metadata = self.metadata_loader.load_data([video_id]) - try: - transcripts = self.transcript_loader.get_transcript(video_id) - except Exception as e: - transcripts = str(e) - all_details[video_id] = { - "metadata": metadata.get(video_id, {}), - "transcript": transcripts, - } - return all_details diff --git a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/pants b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/pants deleted file mode 100644 index 59f720d702ef6..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/pants +++ /dev/null @@ -1,510 +0,0 @@ -#!/usr/bin/env bash -# Copyright 2020 Pants project contributors (see CONTRIBUTORS.md). -# Licensed under the Apache License, Version 2.0 (see LICENSE). - -# =============================== NOTE =============================== -# This ./pants bootstrap script comes from the pantsbuild/setup -# project. It is intended to be checked into your code repository so -# that other developers have the same setup. -# -# Learn more here: https://www.pantsbuild.org/docs/installation -# ==================================================================== - -set -eou pipefail - -# an arbitrary number: bump when there's a change that someone might want to query for -# (e.g. checking $(PANTS_BOOTSTRAP_TOOLS=1 ./pants version) >= ...) -SCRIPT_VERSION=1 - -# Source any custom bootstrap settings for Pants from PANTS_BOOTSTRAP if it exists. -: ${PANTS_BOOTSTRAP:=".pants.bootstrap"} -if [[ -f "${PANTS_BOOTSTRAP}" ]]; then - source "${PANTS_BOOTSTRAP}" -fi - -# NOTE: To use an unreleased version of Pants from the pantsbuild/pants main branch, -# locate the main branch SHA, set PANTS_SHA= in the environment, and run this script as usual. -# -# E.g., PANTS_SHA=725fdaf504237190f6787dda3d72c39010a4c574 ./pants --version -# -# You can also use PANTS_VERSION= to override the config version that is in the pants.toml file. -# -# E.g., PANTS_VERSION=2.13.0 ./pants --version - -PYTHON_BIN_NAME="${PYTHON:-unspecified}" - -# Set this to specify a non-standard location for this script to read the Pants version from. -# NB: This will *not* cause Pants itself to use this location as a config file. -# You can use PANTS_CONFIG_FILES or --pants-config-files to do so. -PANTS_TOML=${PANTS_TOML:-pants.toml} - -PANTS_BIN_NAME="${PANTS_BIN_NAME:-$0}" - -PANTS_SETUP_CACHE="${PANTS_SETUP_CACHE:-${XDG_CACHE_HOME:-$HOME/.cache}/pants/setup}" -# If given a relative path, we fix it to be absolute. -if [[ "$PANTS_SETUP_CACHE" != /* ]]; then - PANTS_SETUP_CACHE="${PWD}/${PANTS_SETUP_CACHE}" -fi - -PANTS_BOOTSTRAP="${PANTS_SETUP_CACHE}/bootstrap-$(uname -s)-$(uname -m)" - -_PEX_VERSION=2.1.103 -_PEX_URL="https://github.com/pantsbuild/pex/releases/download/v${_PEX_VERSION}/pex" -_PEX_EXPECTED_SHA256="4d45336511484100ae4e2bab24542a8b86b12c8cb89230463593c60d08c4b8d3" - -VIRTUALENV_VERSION=20.4.7 -VIRTUALENV_REQUIREMENTS=$( -cat << EOF -virtualenv==${VIRTUALENV_VERSION} --hash sha256:2b0126166ea7c9c3661f5b8e06773d28f83322de7a3ff7d06f0aed18c9de6a76 -filelock==3.0.12 --hash sha256:929b7d63ec5b7d6b71b0fa5ac14e030b3f70b75747cef1b10da9b879fef15836 -six==1.16.0 --hash sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254 -distlib==0.3.2 --hash sha256:23e223426b28491b1ced97dc3bbe183027419dfc7982b4fa2f05d5f3ff10711c -appdirs==1.4.4 --hash sha256:a841dacd6b99318a741b166adb07e19ee71a274450e68237b4650ca1055ab128 -importlib-resources==5.1.4; python_version < "3.7" --hash sha256:e962bff7440364183203d179d7ae9ad90cb1f2b74dcb84300e88ecc42dca3351 -importlib-metadata==4.5.0; python_version < "3.8" --hash sha256:833b26fb89d5de469b24a390e9df088d4e52e4ba33b01dc5e0e4f41b81a16c00 -zipp==3.4.1; python_version < "3.10" --hash sha256:51cb66cc54621609dd593d1787f286ee42a5c0adbb4b29abea5a63edc3e03098 -typing-extensions==3.10.0.0; python_version < "3.8" --hash sha256:779383f6086d90c99ae41cf0ff39aac8a7937a9283ce0a414e5dd782f4c94a84 -EOF -) - -COLOR_RED="\x1b[31m" -COLOR_GREEN="\x1b[32m" -COLOR_YELLOW="\x1b[33m" -COLOR_RESET="\x1b[0m" - -INSTALL_URL="https://www.pantsbuild.org/docs/installation" - -function log() { - echo -e "$@" 1>&2 -} - -function die() { - (($# > 0)) && log "${COLOR_RED}$*${COLOR_RESET}" - exit 1 -} - -function green() { - (($# > 0)) && log "${COLOR_GREEN}$*${COLOR_RESET}" -} - -function warn() { - (($# > 0)) && log "${COLOR_YELLOW}$*${COLOR_RESET}" -} - -function tempdir { - mkdir -p "$1" - mktemp -d "$1"/pants.XXXXXX -} - -function get_exe_path_or_die { - local exe="$1" - if ! command -v "${exe}"; then - die "Could not find ${exe}. Please ensure ${exe} is on your PATH." - fi -} - -function get_pants_config_string_value { - local config_key="$1" - local optional_space="[[:space:]]*" - local prefix="^${config_key}${optional_space}=${optional_space}" - local raw_value - raw_value="$(sed -ne "/${prefix}/ s|${prefix}||p" "${PANTS_TOML}")" - local optional_suffix="${optional_space}(#.*)?$" - echo "${raw_value}" \ - | sed -E \ - -e "s|^'([^']*)'${optional_suffix}|\1|" \ - -e 's|^"([^"]*)"'"${optional_suffix}"'$|\1|' \ - && return 0 - return 0 -} - -function get_python_major_minor_version { - local python_exe="$1" - "$python_exe" </dev/null 2>&1; then - continue - fi - if [[ -n "$(check_python_exe_compatible_version "${interpreter_path}")" ]]; then - echo "${interpreter_path}" && return 0 - fi - done -} - -function determine_python_exe { - local pants_version="$1" - set_supported_python_versions "${pants_version}" - local requirement_str="For \`pants_version = \"${pants_version}\"\`, Pants requires Python ${supported_message} to run." - - local python_exe - if [[ "${PYTHON_BIN_NAME}" != 'unspecified' ]]; then - python_exe="$(get_exe_path_or_die "${PYTHON_BIN_NAME}")" || exit 1 - if [[ -z "$(check_python_exe_compatible_version "${python_exe}")" ]]; then - die "Invalid Python interpreter version for ${python_exe}. ${requirement_str}" - fi - else - python_exe="$(determine_default_python_exe)" - if [[ -z "${python_exe}" ]]; then - die "No valid Python interpreter found. ${requirement_str} Please check that a valid interpreter is installed and on your \$PATH." - fi - fi - echo "${python_exe}" -} - -function compute_sha256 { - local python="$1" - local path="$2" - - "$python" <&2 || exit 1 - fi - echo "${bootstrapped}" -} - -function scrub_env_vars { - # Ensure the virtualenv PEX runs as shrink-wrapped. - # See: https://github.com/pantsbuild/setup/issues/105 - local -r pex_env_vars=(${!PEX_@}) - if [[ ! ${#pex_env_vars[@]} -eq 0 ]]; then - local -r pex_env_vars_to_scrub="${pex_env_vars[@]/PEX_ROOT}" - if [[ -n "${pex_env_vars_to_scrub[@]}" ]]; then - warn "Scrubbing ${pex_env_vars_to_scrub[@]}" - unset ${pex_env_vars_to_scrub[@]} - fi - fi - # Also ensure pip doesn't think packages on PYTHONPATH - # are already installed. - if [ -n "${PYTHONPATH:-}" ]; then - warn "Scrubbing PYTHONPATH" - unset PYTHONPATH - fi -} - -function bootstrap_virtualenv { - local python="$1" - local bootstrapped="${PANTS_BOOTSTRAP}/virtualenv-${VIRTUALENV_VERSION}/virtualenv.pex" - if [[ ! -f "${bootstrapped}" ]]; then - ( - green "Creating the virtualenv PEX." - pex_path="$(bootstrap_pex "${python}")" || exit 1 - mkdir -p "${PANTS_BOOTSTRAP}" - local staging_dir - staging_dir=$(tempdir "${PANTS_BOOTSTRAP}") - echo "${VIRTUALENV_REQUIREMENTS}" > "${staging_dir}/requirements.txt" - ( - scrub_env_vars - "${python}" "${pex_path}" -r "${staging_dir}/requirements.txt" -c virtualenv -o "${staging_dir}/virtualenv.pex" - ) - mkdir -p "$(dirname "${bootstrapped}")" - mv -f "${staging_dir}/virtualenv.pex" "${bootstrapped}" - rm -rf "${staging_dir}" - ) 1>&2 || exit 1 - fi - echo "${bootstrapped}" -} - -function find_links_url { - local pants_version="$1" - local pants_sha="$2" - echo -n "https://binaries.pantsbuild.org/wheels/pantsbuild.pants/${pants_sha}/${pants_version/+/%2B}/index.html" -} - -function get_version_for_sha { - local sha="$1" - - # Retrieve the Pants version associated with this commit. - local pants_version - pants_version="$(curl --proto "=https" \ - --tlsv1.2 \ - --fail \ - --silent \ - --location \ - "https://raw.githubusercontent.com/pantsbuild/pants/${sha}/src/python/pants/VERSION")" - - # Construct the version as the release version from src/python/pants/VERSION, plus the string `+gitXXXXXXXX`, - # where the XXXXXXXX is the first 8 characters of the SHA. - echo "${pants_version}+git${sha:0:8}" -} - -function bootstrap_pants { - local pants_version="$1" - local python="$2" - local pants_sha="${3:-}" - local pants_debug="${4:-}" - - local pants_requirements=(pantsbuild.pants==${pants_version}) - local maybe_find_links - if [[ -z "${pants_sha}" ]]; then - maybe_find_links="" - else - maybe_find_links="--find-links=$(find_links_url "${pants_version}" "${pants_sha}")" - fi - - local debug_suffix - if [[ -z "${pants_debug}" ]]; then - debug_suffix="" - else - debug_suffix="-debug" - pants_requirements+=(debugpy==1.6.0) - fi - - local python_major_minor_version - python_major_minor_version="$(get_python_major_minor_version "${python}")" - local target_folder_name="${pants_version}_py${python_major_minor_version}${debug_suffix}" - local bootstrapped="${PANTS_BOOTSTRAP}/${target_folder_name}" - - if [[ ! -d "${bootstrapped}" ]]; then - ( - green "Bootstrapping Pants using ${python}" - local staging_dir - staging_dir=$(tempdir "${PANTS_BOOTSTRAP}") - local virtualenv_path - virtualenv_path="$(bootstrap_virtualenv "${python}")" || exit 1 - green "Installing ${pants_requirements[@]} into a virtual environment at ${bootstrapped}" - ( - scrub_env_vars - # shellcheck disable=SC2086 - "${python}" "${virtualenv_path}" --quiet --no-download "${staging_dir}/install" && \ - # Grab the latest pip, but don't advance setuptools past 58 which drops support for the - # `setup` kwarg `use_2to3` which Pants 1.x sdist dependencies (pystache) use. - "${staging_dir}/install/bin/pip" install --quiet -U pip "setuptools<58" && \ - "${staging_dir}/install/bin/pip" install ${maybe_find_links} --quiet --progress-bar off "${pants_requirements[@]}" - ) && \ - ln -s "${staging_dir}/install" "${staging_dir}/${target_folder_name}" && \ - mv "${staging_dir}/${target_folder_name}" "${bootstrapped}" && \ - green "New virtual environment successfully created at ${bootstrapped}." - ) 1>&2 || exit 1 - fi - echo "${bootstrapped}" -} - -function run_bootstrap_tools { - # functionality for introspecting the bootstrapping process, without actually doing it - if [[ "${PANTS_BOOTSTRAP_TOOLS}" -gt "${SCRIPT_VERSION}" ]]; then - die "$0 script (bootstrap version ${SCRIPT_VERSION}) is too old for this invocation (with PANTS_BOOTSTRAP_TOOLS=${PANTS_BOOTSTRAP_TOOLS}). -Please update it by following ${INSTALL_URL}" - fi - - case "${1:-}" in - bootstrap-cache-key) - local pants_version=$(determine_pants_version) - local python="$(determine_python_exe "${pants_version}")" - # the python above may be a shim (e.g. pyenv or homebrew), so let's get an estimate of the - # actual path, as will be symlinked in the virtualenv. (NB. virtualenv does more complicated - # things, but we at least emulate the symlink-resolution that it does.) - local python_executable_path="$("${python}" -c 'import os, sys; print(os.path.realpath(sys.executable))')" - - local requirements_file="$(mktemp)" - echo "${VIRTUALENV_REQUIREMENTS}" > "${requirements_file}" - local virtualenv_requirements_sha256="$(compute_sha256 "${python}" "${requirements_file}")" - rm "${requirements_file}" - - local parts=( - "os_name=$(uname -s)" - "arch=$(uname -m)" - "python_path=${python}" - "python_executable_path=${python_executable_path}" - # the full interpreter information, for maximum compatibility - "python_version=$("$python" --version)" - "pex_version=${_PEX_VERSION}" - "virtualenv_requirements_sha256=${virtualenv_requirements_sha256}" - "pants_version=${pants_version}" - ) - echo "${parts[*]}" - ;; - bootstrap-version) - echo "${SCRIPT_VERSION}" - ;; - help|"") - cat <"] -description = "llama-index readers youtube-metadata integration" -license = "MIT" -name = "llama-index-readers-youtube-metadata" -packages = [{include = "llama_index/"}] -readme = "README.md" -version = "0.3.0" - -[tool.poetry.dependencies] -python = ">=3.9,<4.0" -youtube-transcript-api = "^0.6.2" -llama-index-core = "^0.12.0" - -[tool.poetry.group.dev.dependencies] -black = {extras = ["jupyter"], version = "<=23.9.1,>=23.7.0"} -codespell = {extras = ["toml"], version = ">=v2.2.6"} -flake8 = "^7.0.0" -ipython = "8.10.0" -jupyter = "^1.0.0" -mypy = "0.991" -pre-commit = "3.2.0" -pylint = "2.15.10" -pytest = "7.2.1" -pytest-mock = "3.11.1" -ruff = "0.0.292" -tree-sitter-languages = "^1.8.0" -types-Deprecated = ">=0.1.0" -types-PyYAML = "^6.0.12.12" -types-protobuf = "^4.24.0.4" -types-redis = "4.5.5.0" -types-requests = "2.28.11.8" # TODO: unpin when mypy>0.991 -types-setuptools = "67.1.0.0" diff --git a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/tests/BUILD b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/tests/BUILD deleted file mode 100644 index adca2b1c7549a..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/tests/BUILD +++ /dev/null @@ -1,5 +0,0 @@ -python_sources() - -python_tests( - name="tests0", -) diff --git a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/tests/__init__.py b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/tests/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/tests/test_readers_youtube_metadata.py b/llama-index-integrations/readers/llama-index-readers-youtube-metadata/tests/test_readers_youtube_metadata.py deleted file mode 100644 index 7f8dee48fdf40..0000000000000 --- a/llama-index-integrations/readers/llama-index-readers-youtube-metadata/tests/test_readers_youtube_metadata.py +++ /dev/null @@ -1,12 +0,0 @@ -from llama_index.core.readers.base import BaseReader -from llama_index.readers.youtube_metadata import ( - YouTubeMetaData, - YouTubeMetaDataAndTranscript, -) - - -def test_class(): - names_of_base_classes = [b.__name__ for b in YouTubeMetaData.__mro__] - assert BaseReader.__name__ in names_of_base_classes - names_of_base_classes = [b.__name__ for b in YouTubeMetaDataAndTranscript.__mro__] - assert BaseReader.__name__ in names_of_base_classes diff --git a/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/.gitignore b/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/.gitignore deleted file mode 100644 index 990c18de22908..0000000000000 --- a/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/.gitignore +++ /dev/null @@ -1,153 +0,0 @@ -llama_index/_static -.DS_Store -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -bin/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -etc/ -include/ -lib/ -lib64/ -parts/ -sdist/ -share/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -.ruff_cache - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints -notebooks/ - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ -pyvenv.cfg - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# Jetbrains -.idea -modules/ -*.swp - -# VsCode -.vscode - -# pipenv -Pipfile -Pipfile.lock - -# pyright -pyrightconfig.json diff --git a/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/BUILD b/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/BUILD deleted file mode 100644 index 0896ca890d8bf..0000000000000 --- a/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/BUILD +++ /dev/null @@ -1,3 +0,0 @@ -poetry_requirements( - name="poetry", -) diff --git a/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/CHANGELOG.md b/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/CHANGELOG.md deleted file mode 100644 index 674be675472ab..0000000000000 --- a/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/CHANGELOG.md +++ /dev/null @@ -1,5 +0,0 @@ -# CHANGELOG - -## [0.1.2] - 2024-02-27 - -- Add maintainers and keywords from library.json (llamahub) diff --git a/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/Makefile b/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/Makefile deleted file mode 100644 index b9eab05aa3706..0000000000000 --- a/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -GIT_ROOT ?= $(shell git rev-parse --show-toplevel) - -help: ## Show all Makefile targets. - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' - -format: ## Run code autoformatters (black). - pre-commit install - git ls-files | xargs pre-commit run black --files - -lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy - pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files - -test: ## Run tests via pytest. - pytest tests - -watch-docs: ## Build and watch documentation. - sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/ diff --git a/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/README.md b/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/README.md deleted file mode 100644 index e325e463600b8..0000000000000 --- a/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/README.md +++ /dev/null @@ -1,25 +0,0 @@ -# Passio Nutrition AI Tool - -This tool connects to a Passio Nutrition AI account and allows an Agent to perform searches against a database of over 2.2M foods. - -You will need to set up a search key using Passio Nutrition API,learn more here: https://www.passio.ai/nutrition-ai#nutrition-api-pricing - -## Usage - -Here's an example usage of the NutritionAIToolSpec. - -```python -from llama_index.tools.passio_nutrition_ai import NutritionAIToolSpec -from llama_index.agent import OpenAIAgent - -tool_spec = NutritionAIToolSpec(api_key="your-key") - -agent = OpenAIAgent.from_tools(tool_spec.to_tool_list()) - -agent.chat("What is the nutritional value of an apple?") -agent.chat("I had a cobb salad for lunch, how many calories did I eat?") -``` - -`passio_nutrition_ai`: Search for foods and their micro nutrition results related to a query - -This loader is designed to be used as a way to load data as a Tool in a Agent. diff --git a/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/examples/passio_nutrition_ai.ipynb b/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/examples/passio_nutrition_ai.ipynb deleted file mode 100644 index 7fc5e88c3d8f0..0000000000000 --- a/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/examples/passio_nutrition_ai.ipynb +++ /dev/null @@ -1,170 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "id": "a2408d6e-8e07-47e5-a7e3-daf3022a44de", - "metadata": {}, - "outputs": [], - "source": [ - "# Setup OpenAI Agent\n", - "import openai\n", - "import os\n", - "from dotenv import load_dotenv\n", - "\n", - "load_dotenv()\n", - "\n", - "openai_api_key = os.getenv(\"OPENAI_API_KEY\")\n", - "\n", - "openai.api_key = openai_api_key\n", - "from llama_index.agent.openai import OpenAIAgent" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "3e91e13e-d7da-47d1-8122-5e11bb1b5a5a", - "metadata": {}, - "outputs": [], - "source": [ - "from llama_index.tools.passio_nutrition_ai.base import NutritionAIToolSpec\n", - "\n", - "nutritionai_subscription_key = os.getenv(\"NUTRITIONAI_SUBSCRIPTION_KEY\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "cd53ab95", - "metadata": {}, - "outputs": [], - "source": [ - "nutrition_ai_tool = NutritionAIToolSpec(api_key=nutritionai_subscription_key)\n", - "agent = OpenAIAgent.from_tools(\n", - " nutrition_ai_tool.to_tool_list(),\n", - " verbose=True,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6a8541ac", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added user message to memory: I had chicken tikka masala for lunch. How good is that for me?\n", - "=== Calling Function ===\n", - "Calling function: nutrition_ai_search with args: {\"query\":\"chicken tikka masala\"}\n", - "Got output: {'results': [{'type': 'reference', 'displayName': 'Chicken Tikka Masala', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala', 'scoredName': 'Chicken Tikka Masala', 'score': 1, 'displayNameScore': 1, 'brandName': '', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': 'openfood0085239308349', 'resultId': 'openfood0085239308349', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 100}, 'name': '', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 142.8571014404297}}, {'type': 'reference', 'displayName': 'Chicken Tikka Masala', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala', 'scoredName': 'Chicken Tikka Masala', 'score': 0.95, 'displayNameScore': 0.95, 'brandName': 'Wells Enterprises, Inc.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': 'openfood5390003010973', 'resultId': 'openfood5390003010973', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 320}, 'name': 'meal', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 345.6000061035156}}, {'type': 'reference', 'displayName': 'Chicken Tikka Masala', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala', 'scoredName': 'Chicken Tikka Masala', 'score': 0.95, 'displayNameScore': 0.95, 'brandName': 'Tesco plc', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': 'openfood5054269267228', 'resultId': 'openfood5054269267228', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 450}, 'name': 'package', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 540}}, {'type': 'reference', 'displayName': 'Chicken Tikka Masala', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala', 'scoredName': 'Chicken Tikka Masala', 'score': 0.95, 'displayNameScore': 0.95, 'brandName': 'Sharwoods', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': 'openfood5000187114670', 'resultId': 'openfood5000187114670', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 375}, 'name': 'package', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 453.75}}, {'type': 'reference', 'displayName': 'Chicken Tikka Masala', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala', 'scoredName': 'Chicken Tikka Masala', 'score': 0.95, 'displayNameScore': 0.95, 'brandName': \"SUKHI'S GOURMET INDIAN FOOD.\", 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': 'openfood0767226447919', 'resultId': 'openfood0767226447919', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 140}, 'name': 'serving', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 179.9999542236328}}, {'type': 'reference', 'displayName': 'Chicken Tikka Masala', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala', 'scoredName': 'Chicken Tikka Masala', 'score': 0.95, 'displayNameScore': 0.95, 'brandName': 'T.A.C.T. Holding, Inc.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': 'openfood00989121', 'resultId': 'openfood00989121', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 454}, 'name': 'package', 'quantity': 1}, 'calories': 580}}, {'type': 'reference', 'displayName': 'Chicken Tikka Masala', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala', 'scoredName': 'Chicken Tikka Masala', 'score': 0.95, 'displayNameScore': 0.95, 'brandName': 'EVOL.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211477023', 'resultId': '1603211477023', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 142}, 'name': 'burrito', 'quantity': 1}, 'calories': 269.79998779296875}}, {'type': 'reference', 'displayName': 'Chicken Tikka Masala', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala', 'scoredName': 'Chicken Tikka Masala', 'score': 0.95, 'displayNameScore': 0.95, 'brandName': 'Star Markets Co.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211444746', 'resultId': '1603211444746', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 28.3}, 'name': 'oz', 'quantity': 1, 'suggestedQuantity': [10]}, 'calories': 379.219970703125}}, {'type': 'reference', 'displayName': 'Chicken Tikka Masala', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala', 'scoredName': 'Chicken Tikka Masala', 'score': 0.95, 'displayNameScore': 0.95, 'brandName': \"SUKHI'S\", 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211442144', 'resultId': '1603211442144', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 28.27}, 'name': 'oz', 'quantity': 1, 'suggestedQuantity': [5.2]}, 'calories': 299.8881530761719}}, {'type': 'reference', 'displayName': 'Chicken Tikka Masala', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala', 'scoredName': 'Chicken Tikka Masala', 'score': 0.95, 'displayNameScore': 0.95, 'brandName': 'Safeway, Inc.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211440951', 'resultId': '1603211440951', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 28.36}, 'name': 'oz', 'quantity': 1, 'suggestedQuantity': [11]}, 'calories': 358.75396728515625}}, {'type': 'reference', 'displayName': 'Chicken Tikka Masala', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala', 'scoredName': 'Chicken Tikka Masala', 'score': 0.95, 'displayNameScore': 0.95, 'brandName': 'Ahold USA, Inc.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211387832', 'resultId': '1603211387832', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 28}, 'name': 'oz', 'quantity': 1, 'suggestedQuantity': [5]}, 'calories': 229.60000610351562}}, {'type': 'reference', 'displayName': 'Chicken Tikka Masala', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala', 'scoredName': 'Chicken Tikka Masala', 'score': 0.95, 'displayNameScore': 0.95, 'brandName': 'Glencourt Inc.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211384069', 'resultId': '1603211384069', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 241}, 'name': 'tray', 'quantity': 1}, 'calories': 260.2799987792969}}, {'type': 'reference', 'displayName': 'Chicken Tikka Masala', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala', 'scoredName': 'Chicken Tikka Masala', 'score': 0.95, 'displayNameScore': 0.95, 'brandName': 'Us-Nippon Meat Packers', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211383435', 'resultId': '1603211383435', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 28}, 'name': 'oz', 'quantity': 1, 'suggestedQuantity': [5]}, 'calories': 210}}, {'type': 'reference', 'displayName': 'Chicken Tikka Masala', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala', 'scoredName': 'Chicken Tikka Masala', 'score': 0.95, 'displayNameScore': 0.95, 'brandName': 'CONTESSA', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211342300', 'resultId': '1603211342300', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 28.38}, 'name': 'oz', 'quantity': 1, 'suggestedQuantity': [10.5]}, 'calories': 318.84930419921875}}, {'type': 'reference', 'displayName': 'Chicken Tikka Masala', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala', 'scoredName': 'Chicken Tikka Masala', 'score': 0.95, 'displayNameScore': 0.95, 'brandName': 'The Kroger Co.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211306659', 'resultId': '1603211306659', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 28.36}, 'name': 'oz', 'quantity': 1, 'suggestedQuantity': [11]}, 'calories': 330.6776123046875}}, {'type': 'reference', 'displayName': 'Chicken Tikka Masala', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala', 'scoredName': 'Chicken Tikka Masala', 'score': 0.95, 'displayNameScore': 0.95, 'brandName': 'Wal-Mart Stores, Inc.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211276873', 'resultId': '1603211276873', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 335}, 'name': 'container', 'quantity': 1}, 'calories': 311.54998779296875}}, {'type': 'reference', 'displayName': 'Chicken Tikka Masala', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala', 'scoredName': 'Chicken Tikka Masala', 'score': 0.95, 'displayNameScore': 0.95, 'brandName': 'Target Stores', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211257427', 'resultId': '1603211257427', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 28.57}, 'name': 'oz', 'quantity': 1, 'suggestedQuantity': [4.9]}, 'calories': 239.38803100585938}}, {'type': 'reference', 'displayName': 'Chicken Tikka Masala Mix', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala Mix', 'scoredName': 'Chicken Tikka Masala Mix', 'score': 0.9319047619047619, 'displayNameScore': 0.9319047619047619, 'brandName': '', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': 'openfood8906064230044', 'resultId': 'openfood8906064230044', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 80}, 'name': 'package', 'quantity': 1}, 'calories': 325.6000061035156}}, {'type': 'reference', 'displayName': 'Wie Chicken Tikka Masala', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Wie Chicken Tikka Masala', 'scoredName': 'Wie Chicken Tikka Masala', 'score': 0.9319047619047619, 'displayNameScore': 0.9319047619047619, 'brandName': '', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': 'openfood4005009102478', 'resultId': 'openfood4005009102478', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 350}, 'name': 'package', 'quantity': 1, 'suggestedQuantity': [0.5]}, 'calories': 145.25}}, {'type': 'reference', 'displayName': 'Hot Chicken Tikka Masala', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Hot Chicken Tikka Masala', 'scoredName': 'Hot Chicken Tikka Masala', 'score': 0.9319047619047619, 'displayNameScore': 0.9319047619047619, 'brandName': '', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': 'openfood00776516', 'resultId': 'openfood00776516', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 200}, 'name': 'serving', 'quantity': 1}, 'calories': 284}}, {'type': 'reference', 'displayName': 'Chicken Tikka Masala Bowl', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala Bowl', 'scoredName': 'Chicken Tikka Masala Bowl', 'score': 0.927906976744186, 'displayNameScore': 0.927906976744186, 'brandName': '', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': 'openfood0041415024151', 'resultId': 'openfood0041415024151', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 100}, 'name': '', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 134.27560424804688}}, {'type': 'reference', 'displayName': 'Chicken Tikka Masala Sauce', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala Sauce', 'scoredName': 'Chicken Tikka Masala Sauce', 'score': 0.924090909090909, 'displayNameScore': 0.924090909090909, 'brandName': 'Safeway, Inc.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211342703', 'resultId': '1603211342703', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 255}, 'name': 'tray', 'quantity': 1}, 'calories': 239.6999969482422}}, {'type': 'reference', 'displayName': 'Chicken Tikka Masala (Main)', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala (Main)', 'scoredName': 'Chicken Tikka Masala (Main)', 'score': 0.9204444444444443, 'displayNameScore': 0.9204444444444443, 'brandName': '', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': 'openfood5054781137085', 'resultId': 'openfood5054781137085', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 400}, 'name': 'package', 'quantity': 1, 'suggestedQuantity': [0.5]}, 'calories': 266}}, {'type': 'reference', 'displayName': 'Chicken Tikka Masala Crowns', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala Crowns', 'scoredName': 'Chicken Tikka Masala Crowns', 'score': 0.9204444444444443, 'displayNameScore': 0.9204444444444443, 'brandName': 'The Kroger Co.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211285450', 'resultId': '1603211285450', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 25}, 'name': 'piece', 'quantity': 1, 'suggestedQuantity': [3]}, 'calories': 210}}, {'type': 'reference', 'displayName': 'Evol, Chicken Tikka Masala', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Evol, Chicken Tikka Masala', 'scoredName': 'Evol, Chicken Tikka Masala', 'score': 0.9194139860139859, 'displayNameScore': 0.9194139860139859, 'brandName': 'Pinnacle Foods Group LLC', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211455071', 'resultId': '1603211455071', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 566}, 'name': 'bag', 'quantity': 1, 'suggestedQuantity': [0.5]}, 'calories': 299.97998046875}}, {'type': 'reference', 'displayName': 'Chicken Tikka Masala, Chicken', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala, Chicken', 'scoredName': 'Chicken Tikka Masala, Chicken', 'score': 0.9136170212765957, 'displayNameScore': 0.9136170212765957, 'brandName': 'LIDL', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1628606739622', 'resultId': '1628606739622', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 28.3}, 'name': 'oz', 'quantity': 1, 'suggestedQuantity': [10]}, 'calories': 311.29998779296875}}, {'type': 'reference', 'displayName': 'Chicken Tikka Masala, Chicken', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala, Chicken', 'scoredName': 'Chicken Tikka Masala, Chicken', 'score': 0.9136170212765957, 'displayNameScore': 0.9136170212765957, 'brandName': 'Target Stores', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211557019', 'resultId': '1603211557019', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 202.5}, 'name': 'cup', 'quantity': 1, 'suggestedQuantity': [0.67]}, 'calories': 191.30177307128906}}, {'type': 'reference', 'displayName': 'Chicken Tikka Masala, Chicken Tikka', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala, Chicken Tikka', 'scoredName': 'Chicken Tikka Masala, Chicken Tikka', 'score': 0.8962264150943396, 'displayNameScore': 0.8962264150943396, 'brandName': 'Tiller & Hatch, Co.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1628606774236', 'resultId': '1628606774236', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 283}, 'name': 'serving', 'quantity': 1}, 'calories': 390.53997802734375}}, {'type': 'reference', 'displayName': 'Vegan Chicken Tikka Masala', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Vegan Chicken Tikka Masala', 'scoredName': 'Vegan Chicken Tikka Masala', 'score': 0.8890139860139858, 'displayNameScore': 0.8890139860139858, 'brandName': 'VEGETARIAN PLUS', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211549315', 'resultId': '1603211549315', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 28.4}, 'name': 'oz', 'quantity': 1, 'suggestedQuantity': [2.5]}, 'calories': 90.16999816894531}}, {'type': 'reference', 'displayName': 'Chicken Tikka Masala Soup, Chicken Tikka', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala Soup, Chicken Tikka', 'scoredName': 'Chicken Tikka Masala Soup, Chicken Tikka', 'score': 0.8844827586206896, 'displayNameScore': 0.8844827586206896, 'brandName': 'Whole Foods Market, Inc.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1628606752082', 'resultId': '1628606752082', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 245}, 'name': 'serving', 'quantity': 1}, 'calories': 240.10000610351562}}, {'type': 'reference', 'displayName': 'Chicken Tikka Masala Pouches, Chicken Tikka', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala Pouches, Chicken Tikka', 'scoredName': 'Chicken Tikka Masala Pouches, Chicken Tikka', 'score': 0.8757094929470073, 'displayNameScore': 0.8757094929470073, 'brandName': 'Deep Foods Inc.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1628606770699', 'resultId': '1628606770699', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 113}, 'name': 'serving', 'quantity': 1}, 'calories': 290.4100036621094}}, {'type': 'reference', 'displayName': 'Chicken Tikka Masala Kit, Chicken Tikka Masala', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala Kit, Chicken Tikka Masala', 'scoredName': 'Chicken Tikka Masala Kit, Chicken Tikka Masala', 'score': 0.8678559782608695, 'displayNameScore': 0.8678559782608695, 'brandName': 'KRAFT HEINZ SAUCES & FROZEN', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211526033', 'resultId': '1603211526033', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 28}, 'name': 'oz', 'quantity': 1, 'suggestedQuantity': [3]}, 'calories': 199.9199981689453}}, {'type': 'reference', 'displayName': 'Tikka Masala Chicken', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Tikka Masala Chicken', 'scoredName': 'Tikka Masala Chicken', 'score': 0.7827000000000001, 'displayNameScore': 0.7827000000000001, 'brandName': '', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': 'openfood5701410407001', 'resultId': 'openfood5701410407001', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 450}, 'name': 'package', 'quantity': 1}, 'calories': 670.5}}, {'type': 'reference', 'displayName': 'Tikka Masala Chicken', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Tikka Masala Chicken', 'scoredName': 'Tikka Masala Chicken', 'score': 0.7827000000000001, 'displayNameScore': 0.7827000000000001, 'brandName': 'Ahold Usa, Inc.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211336796', 'resultId': '1603211336796', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 350}, 'name': 'package', 'quantity': 1}, 'calories': 399}}, {'type': 'reference', 'displayName': 'Chicken Tikka Masala With Turmeric Rice, Chicken Tikka Masala', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala With Turmeric Rice, Chicken Tikka Masala', 'scoredName': 'Chicken Tikka Masala With Turmeric Rice, Chicken Tikka Masala', 'score': 0.8383104378501763, 'displayNameScore': 0.8383104378501763, 'brandName': 'Amazon Fulfillment Services, Inc. - Consumables Private Brands', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1628606772116', 'resultId': '1628606772116', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 340}, 'name': 'serving (container)', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 459}}, {'type': 'synonym', 'displayName': 'Chicken Salad Wawa', 'stemmedDisplayName': '', 'shortName': 'creamy chicken salad', 'longName': 'Chicken Salad Wawa', 'scoredName': 'Chicken Salad', 'score': 0.8817191066997521, 'displayNameScore': 0.7651202614379086, 'brandName': 'WAWA', 'iconId': 'PRE0075', 'labelId': 'aea34d6a-9f6f-11ea-b4fc-d3aeabf2e08c', 'synonymId': '75d5d48a-7dd6-11eb-8ccb-ef37ca786ece', 'recipeId': '', 'referenceId': '1603211581514', 'resultId': '1603211581514', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 108}, 'name': 'container', 'quantity': 1}, 'calories': 129.60000610351562}}, {'type': 'reference', 'displayName': 'Chicken Tikka Masala With Basmati Rice, Chicken Tikka Masala', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala With Basmati Rice, Chicken Tikka Masala', 'scoredName': 'Chicken Tikka Masala With Basmati Rice, Chicken Tikka Masala', 'score': 0.8398974358974357, 'displayNameScore': 0.8398974358974357, 'brandName': 'C. C. Creations, Ltd.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1628606797814', 'resultId': '1628606797814', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 350}, 'name': 'serving', 'quantity': 1}, 'calories': 371}}, {'type': 'reference', 'displayName': 'Chicken Tikka Masala With Basmati Rice, Chicken Tikka Masala', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Tikka Masala With Basmati Rice, Chicken Tikka Masala', 'scoredName': 'Chicken Tikka Masala With Basmati Rice, Chicken Tikka Masala', 'score': 0.8398974358974357, 'displayNameScore': 0.8398974358974357, 'brandName': 'American Halal Company, Inc. ', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1628606772600', 'resultId': '1628606772600', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 283}, 'name': 'serving', 'quantity': 1}, 'calories': 299.97998046875}}, {'type': 'synonym', 'displayName': 'Chicken Salad, Hy-Vee', 'stemmedDisplayName': '', 'shortName': 'creamy chicken salad', 'longName': 'Chicken Salad, Hy-Vee', 'scoredName': 'Chicken Salad', 'score': 0.8817191066997521, 'displayNameScore': 0.7039868531468532, 'brandName': 'Hy-Vee, Inc.', 'iconId': 'PRE0075', 'labelId': 'aea34d6a-9f6f-11ea-b4fc-d3aeabf2e08c', 'synonymId': '75d5d48a-7dd6-11eb-8ccb-ef37ca786ece', 'recipeId': '', 'referenceId': '1603211581507', 'resultId': '1603211581507', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 210}, 'name': 'cup', 'quantity': 1, 'suggestedQuantity': [0.5]}, 'calories': 199.49998474121094}}, {'type': 'reference', 'displayName': 'Tikka Masala', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Tikka Masala', 'scoredName': 'Tikka Masala', 'score': 0.8765333333333333, 'displayNameScore': 0.8765333333333333, 'brandName': '', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': 'openfood5019503024203', 'resultId': 'openfood5019503024203', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 400}, 'name': 'package', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 388}}, {'type': 'reference', 'displayName': 'Tikka Masala', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Tikka Masala', 'scoredName': 'Tikka Masala', 'score': 0.8765333333333333, 'displayNameScore': 0.8765333333333333, 'brandName': 'The Kroger Co.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211333981', 'resultId': '1603211333981', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 240}, 'name': 'cup', 'quantity': 1, 'suggestedQuantity': [0.5]}, 'calories': 140.40000915527344}}, {'type': 'synonym', 'displayName': 'Chicken Salad, Willow Tree Poultry Farm, Inc.', 'stemmedDisplayName': '', 'shortName': 'creamy chicken salad', 'longName': 'Chicken Salad, Willow Tree Poultry Farm, Inc.', 'scoredName': 'Chicken Salad', 'score': 0.8817191066997521, 'displayNameScore': 0.661288152173913, 'brandName': 'Willow Tree Poultry Farm, Inc.', 'iconId': 'PRE0075', 'labelId': 'aea34d6a-9f6f-11ea-b4fc-d3aeabf2e08c', 'synonymId': '75d5d48a-7dd6-11eb-8ccb-ef37ca786ece', 'recipeId': '', 'referenceId': '1603211581515', 'resultId': '1603211581515', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 230}, 'name': 'cup', 'quantity': 1, 'suggestedQuantity': [0.5]}, 'calories': 330.04998779296875}}, {'type': 'synonym', 'displayName': ' Starkist Chicken Creations Chicken Salad', 'stemmedDisplayName': '', 'shortName': 'creamy chicken salad', 'longName': ' Starkist Chicken Creations Chicken Salad', 'scoredName': 'Chicken Salad', 'score': 0.8817191066997521, 'displayNameScore': 0.6877124982775251, 'brandName': 'StarKist Co.', 'iconId': 'PRE0075', 'labelId': 'aea34d6a-9f6f-11ea-b4fc-d3aeabf2e08c', 'synonymId': '75d5d48a-7dd6-11eb-8ccb-ef37ca786ece', 'recipeId': '', 'referenceId': '1603211581512', 'resultId': '1603211581512', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 74}, 'name': 'pouch', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 70.30000305175781}}, {'type': 'synonym', 'displayName': 'Chicken Or Turkey Salad, Made With Light Mayonnaise-Type Salad Dressing', 'stemmedDisplayName': '', 'shortName': 'creamy chicken salad', 'longName': 'Chicken Or Turkey Salad, Made With Light Mayonnaise-Type Salad Dressing', 'scoredName': 'Chicken Salad', 'score': 0.8817191066997521, 'displayNameScore': 0.7481664630479505, 'brandName': '', 'iconId': 'PRE0075', 'labelId': 'aea34d6a-9f6f-11ea-b4fc-d3aeabf2e08c', 'synonymId': '75d5d48a-7dd6-11eb-8ccb-ef37ca786ece', 'recipeId': '', 'referenceId': '1603211579942', 'resultId': '1603211579942', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 226}, 'name': 'cup', 'quantity': 0.5}, 'calories': 163.85000610351562}}, {'type': 'synonym', 'displayName': 'Heb Meal Simple, Rotisserie Chicken Salad', 'stemmedDisplayName': '', 'shortName': 'creamy chicken salad', 'longName': 'Heb Meal Simple, Rotisserie Chicken Salad', 'scoredName': 'Chicken Salad', 'score': 0.8817191066997521, 'displayNameScore': 0.6018217190829014, 'brandName': 'HEB LP', 'iconId': 'PRE0075', 'labelId': 'aea34d6a-9f6f-11ea-b4fc-d3aeabf2e08c', 'synonymId': '75d5d48a-7dd6-11eb-8ccb-ef37ca786ece', 'recipeId': '', 'referenceId': '1636377087138', 'resultId': '1636377087138', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 200}, 'name': 'cup', 'quantity': 0.5}, 'calories': 310}}, {'type': 'synonym', 'displayName': 'Archer Farms, Hatch Chile Chicken Salad', 'stemmedDisplayName': '', 'shortName': 'creamy chicken salad', 'longName': 'Archer Farms, Hatch Chile Chicken Salad', 'scoredName': 'Chicken Salad', 'score': 0.8817191066997521, 'displayNameScore': 0.6096256410256411, 'brandName': 'Target Stores', 'iconId': 'PRE0075', 'labelId': 'aea34d6a-9f6f-11ea-b4fc-d3aeabf2e08c', 'synonymId': '75d5d48a-7dd6-11eb-8ccb-ef37ca786ece', 'recipeId': '', 'referenceId': '1603211581513', 'resultId': '1603211581513', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 200}, 'name': 'cup', 'quantity': 1, 'suggestedQuantity': [0.5]}, 'calories': 250}}, {'type': 'synonym', 'displayName': 'Lasagna With Chicken Or Turkey', 'stemmedDisplayName': '', 'shortName': 'chicken lasagna', 'longName': 'Lasagna With Chicken Or Turkey', 'scoredName': 'Chicken Lasagna', 'score': 0.8150696099300334, 'displayNameScore': 0.6178499629103862, 'brandName': '', 'iconId': '1004611', 'labelId': 'e7265647-bf83-11ee-a741-3ea322ece7aa', 'synonymId': 'e726564a-bf83-11ee-a741-3ea322ece7aa', 'recipeId': '', 'referenceId': '1603211207674', 'resultId': '1603211207674', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 250}, 'name': 'cup', 'quantity': 1}, 'calories': 487.5}}, {'type': 'synonym', 'displayName': 'Soft Taco With Chicken And Beans', 'stemmedDisplayName': '', 'shortName': 'chicken taco', 'longName': 'Soft Taco With Chicken And Beans', 'scoredName': 'Chicken Taco', 'score': 0.8147054647507977, 'displayNameScore': 0.5807736465689797, 'brandName': '', 'iconId': 'BAK0371', 'labelId': '3ff67dce-cd55-11ea-ade5-9331bcff0e72', 'synonymId': '7a3c170a-7dd6-11eb-8ccb-7b2e436478c5', 'recipeId': '', 'referenceId': '1603211581798', 'resultId': '1603211581798', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 110}, 'name': 'cup', 'quantity': 1}, 'calories': 218.90000915527344}}, {'type': 'synonym', 'displayName': 'Fast Foods, Taco With Chicken, Lettuce And Cheese, Soft', 'stemmedDisplayName': '', 'shortName': 'chicken taco', 'longName': 'Fast Foods, Taco With Chicken, Lettuce And Cheese, Soft', 'scoredName': 'Chicken Taco', 'score': 0.8147054647507977, 'displayNameScore': 0.5269856640035998, 'brandName': '', 'iconId': 'BAK0371', 'labelId': '3ff67dce-cd55-11ea-ade5-9331bcff0e72', 'synonymId': '7a3c170a-7dd6-11eb-8ccb-7b2e436478c5', 'recipeId': '', 'referenceId': '1603211199494', 'resultId': '1603211199494', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 98}, 'name': 'each taco', 'quantity': 2}, 'calories': 370.44000244140625}}, {'type': 'recipe', 'displayName': 'Garden Salad With Chicken And Ranch Dressing', 'stemmedDisplayName': '', 'shortName': 'Garden Salad with Chicken and Ranch Dressing', 'longName': '', 'scoredName': 'Bbq Chicken Salad', 'score': 0.708320523594053, 'displayNameScore': 0.6026293600138004, 'brandName': '', 'iconId': '1001615', 'labelId': '0f7976ff-392b-11ec-a5ad-966f014fd40b', 'synonymId': '5eaefbb8-b9c5-11ee-9c96-12023e19a804', 'recipeId': 'd8a715d6-3c0d-11ec-b382-2a201cb157c2', 'referenceId': '', 'resultId': 'd8a715d6-3c0d-11ec-b382-2a201cb157c2', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 261.05}, 'name': 'serving', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 309.3480224609375}}], 'alternateNames': ['chicken lasagna', 'chicken taco', 'chicken caesar salad', 'creamy chicken salad', 'chicken lo mein', 'chicken wrap', 'chicken chili', 'garden salad with chicken', 'roasted chicken breast', 'breaded chicken slices']}\n", - "========================\n", - "\n", - "The nutrition facts for Chicken Tikka Masala vary depending on the brand and portion size. Here are some examples:\n", - "\n", - "1. Chicken Tikka Masala (140g serving) - 180 calories\n", - "2. Chicken Tikka Masala (320g meal) - 346 calories\n", - "3. Chicken Tikka Masala (450g package) - 540 calories\n", - "4. Chicken Tikka Masala (375g package) - 454 calories\n", - "5. Chicken Tikka Masala (142g burrito) - 270 calories\n", - "\n", - "These values give you an idea of the calorie content in different servings of Chicken Tikka Masala. It's important to consider portion sizes and ingredients used in the preparation for a more accurate assessment of its nutritional value.\n" - ] - } - ], - "source": [ - "print(agent.chat(\"I had chicken tikka masala for lunch. How good is that for me?\"))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5dc87515", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added user message to memory: I had eggs for breakfast. Give me nutritional information about that.\n", - "=== Calling Function ===\n", - "Calling function: nutrition_ai_search with args: {\"query\":\"eggs\"}\n", - "Got output: {'results': [{'type': 'synonym', 'displayName': 'Egg, Whole, Cooked, Hard-Boiled', 'stemmedDisplayName': '', 'shortName': 'boiled eggs', 'longName': 'Egg, Whole, Cooked, Hard-Boiled', 'scoredName': 'Eggs', 'score': 1, 'displayNameScore': 0.8707579667644183, 'brandName': '', 'iconId': '1001222', 'labelId': '6fbc0d8f-25f7-11ec-a025-1a3f5d843ef0', 'synonymId': '3e93b530-92d8-11ee-931c-3edb8070abce', 'recipeId': '', 'referenceId': '1603211567715', 'resultId': '1603211567715', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 50}, 'name': 'large', 'quantity': 2}, 'calories': 155}}, {'type': 'synonym', 'displayName': 'Trader Joes Cage Free Fresh Hard Cooked Peeled Eggs', 'stemmedDisplayName': '', 'shortName': 'boiled eggs', 'longName': 'Trader Joes Cage Free Fresh Hard Cooked Peeled Eggs', 'scoredName': 'Eggs', 'score': 1, 'displayNameScore': 0.8805519792822789, 'brandName': \"Trader Joe's\", 'iconId': '1001222', 'labelId': '6fbc0d8f-25f7-11ec-a025-1a3f5d843ef0', 'synonymId': '3e93b530-92d8-11ee-931c-3edb8070abce', 'recipeId': '', 'referenceId': 'openfood00919098', 'resultId': 'openfood00919098', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 44}, 'name': 'egg', 'quantity': 1}, 'calories': 59.99998474121094}}, {'type': 'recipe', 'displayName': 'William Sonoma Farmers’ Market Scramble', 'stemmedDisplayName': '', 'shortName': 'william sonoma Farmers’ Market Scramble', 'longName': '', 'scoredName': 'Eggs Scrambled', 'score': 0.95025, 'displayNameScore': 0.7438717948717949, 'brandName': '', 'iconId': 'MEA0505', 'labelId': '77b141d8-8951-11ea-a893-cbbc2d60a2bb', 'synonymId': 'fb55e5c5-b557-11ee-9c96-12023e19a804', 'recipeId': 'b7d0491d-8fa0-11ec-9070-d61d101ba1a6', 'referenceId': '', 'resultId': 'b7d0491d-8fa0-11ec-9070-d61d101ba1a6', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 199.8}, 'name': 'serving', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 229.73070444673795}}, {'type': 'synonym', 'displayName': 'Egg Omelet Or Scrambled Egg With Milk, Made With Oil', 'stemmedDisplayName': '', 'shortName': 'scrambled eggs', 'longName': 'Egg Omelet Or Scrambled Egg With Milk, Made With Oil', 'scoredName': 'Eggs Scrambled', 'score': 0.9500068247024784, 'displayNameScore': 0.8956702150158687, 'brandName': '', 'iconId': 'MEA0505', 'labelId': '77b141d8-8951-11ea-a893-cbbc2d60a2bb', 'synonymId': 'fb55e5c5-b557-11ee-9c96-12023e19a804', 'recipeId': '', 'referenceId': '1603211581749', 'resultId': '1603211581749', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 61}, 'name': 'egg', 'quantity': 2}, 'calories': 219.60000610351562}}, {'type': 'synonym', 'displayName': 'Egg Omelet Or Scrambled Egg, From Fast Food / Restaurant', 'stemmedDisplayName': '', 'shortName': 'scrambled eggs', 'longName': 'Egg Omelet Or Scrambled Egg, From Fast Food / Restaurant', 'scoredName': 'Eggs Scrambled', 'score': 0.9500068247024784, 'displayNameScore': 0.8950510775760416, 'brandName': '', 'iconId': 'MEA0505', 'labelId': '77b141d8-8951-11ea-a893-cbbc2d60a2bb', 'synonymId': 'fb55e5c5-b557-11ee-9c96-12023e19a804', 'recipeId': '', 'referenceId': '1603211588557', 'resultId': '1603211588557', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 54}, 'name': 'medium egg', 'quantity': 2}, 'calories': 228.9600067138672}}, {'type': 'reference', 'displayName': 'Eggs', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Eggs', 'scoredName': 'Eggs', 'score': 0.35, 'displayNameScore': 0.35, 'brandName': \"Eggland's Best, Inc.\", 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': 'usda45227866', 'resultId': 'usda45227866', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 50}, 'name': 'egg', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 60}}, {'type': 'reference', 'displayName': 'Eggs', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Eggs', 'scoredName': 'Eggs', 'score': 0.35, 'displayNameScore': 0.35, 'brandName': 'Morrisons', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': 'openfood5010251564836', 'resultId': 'openfood5010251564836', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 68}, 'name': 'serving', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 89}}, {'type': 'reference', 'displayName': 'Eggs', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Eggs', 'scoredName': 'Eggs', 'score': 0.35, 'displayNameScore': 0.35, 'brandName': 'MEIJER, INC.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': 'openfood0713733815086', 'resultId': 'openfood0713733815086', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 50}, 'name': 'egg', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 70}}, {'type': 'reference', 'displayName': 'Eggs', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Eggs', 'scoredName': 'Eggs', 'score': 0.35, 'displayNameScore': 0.35, 'brandName': \"Sam's Club\", 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': 'openfood0193968321239', 'resultId': 'openfood0193968321239', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 50}, 'name': 'serving (egg)', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 70}}, {'type': 'reference', 'displayName': 'Eggs', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Eggs', 'scoredName': 'Eggs', 'score': 0.35, 'displayNameScore': 0.35, 'brandName': 'Walmart', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': 'openfood0078742127132', 'resultId': 'openfood0078742127132', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 56}, 'name': 'serving (egg)', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 44.85599899291992}}, {'type': 'reference', 'displayName': 'Eggs', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Eggs', 'scoredName': 'Eggs', 'score': 0.35, 'displayNameScore': 0.35, 'brandName': 'Hy Vee', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': 'openfood0075450095425', 'resultId': 'openfood0075450095425', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 50}, 'name': 'serving (egg)', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 70}}, {'type': 'reference', 'displayName': 'Eggs', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Eggs', 'scoredName': 'Eggs', 'score': 0.35, 'displayNameScore': 0.35, 'brandName': 'Gray Ridge Egg Farm', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': 'openfood0064767343053', 'resultId': 'openfood0064767343053', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 105}, 'name': 'serving (egg)', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 243.59999084472656}}, {'type': 'reference', 'displayName': 'Eggs', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Eggs', 'scoredName': 'Eggs', 'score': 0.35, 'displayNameScore': 0.35, 'brandName': 'Publix', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': 'openfood0041415012660', 'resultId': 'openfood0041415012660', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 50}, 'name': 'serving', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 70}}, {'type': 'reference', 'displayName': 'Eggs', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Eggs', 'scoredName': 'Eggs', 'score': 0.35, 'displayNameScore': 0.35, 'brandName': 'CAL-MAINE FOODS, INC.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': 'openfood0028621123984', 'resultId': 'openfood0028621123984', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 50}, 'name': 'serving (egg)', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 70}}, {'type': 'reference', 'displayName': 'Eggs', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Eggs', 'scoredName': 'Eggs', 'score': 0.35, 'displayNameScore': 0.35, 'brandName': '', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': 'openfood0011110092434', 'resultId': 'openfood0011110092434', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 100}, 'name': '', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 60}}, {'type': 'reference', 'displayName': 'Eggs', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Eggs', 'scoredName': 'Eggs', 'score': 0.35, 'displayNameScore': 0.35, 'brandName': 'Mr. Beverages Old Time Cocktail Mixes', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211480195', 'resultId': '1603211480195', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 38}, 'name': 'egg', 'quantity': 1}, 'calories': 60.040000915527344}}, {'type': 'reference', 'displayName': 'Eggs', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Eggs', 'scoredName': 'Eggs', 'score': 0.35, 'displayNameScore': 0.35, 'brandName': 'Safeway, Inc.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211474477', 'resultId': '1603211474477', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 44}, 'name': 'egg', 'quantity': 1}, 'calories': 59.84000015258789}}, {'type': 'reference', 'displayName': 'Eggs', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Eggs', 'scoredName': 'Eggs', 'score': 0.35, 'displayNameScore': 0.35, 'brandName': 'SUN HARVEST', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211474226', 'resultId': '1603211474226', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 50}, 'name': 'egg', 'quantity': 1}, 'calories': 70}}, {'type': 'reference', 'displayName': 'Eggs', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Eggs', 'scoredName': 'Eggs', 'score': 0.35, 'displayNameScore': 0.35, 'brandName': 'Kreider Farms', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211473285', 'resultId': '1603211473285', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 56}, 'name': 'egg', 'quantity': 1}, 'calories': 80.08000183105469}}, {'type': 'reference', 'displayName': 'Eggs', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Eggs', 'scoredName': 'Eggs', 'score': 0.35, 'displayNameScore': 0.35, 'brandName': 'Oakdell Egg Farms, Inc.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211428806', 'resultId': '1603211428806', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 44}, 'name': 'egg', 'quantity': 1}, 'calories': 59.84000015258789}}, {'type': 'reference', 'displayName': 'Eggs', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Eggs', 'scoredName': 'Eggs', 'score': 0.35, 'displayNameScore': 0.35, 'brandName': \"Raley's\", 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211428441', 'resultId': '1603211428441', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 50}, 'name': 'egg', 'quantity': 1}, 'calories': 70}}, {'type': 'reference', 'displayName': 'Eggs', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Eggs', 'scoredName': 'Eggs', 'score': 0.35, 'displayNameScore': 0.35, 'brandName': \"Pete and Gerry's Organics\", 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211305959', 'resultId': '1603211305959', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 56}, 'name': 'egg', 'quantity': 1}, 'calories': 80.08000183105469}}, {'type': 'reference', 'displayName': 'Organic Eggs', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Organic Eggs', 'scoredName': 'Organic Eggs', 'score': 0.24466666666666662, 'displayNameScore': 0.24466666666666662, 'brandName': \"Pete and Gerry's Organics\", 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211305958', 'resultId': '1603211305958', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 56}, 'name': 'egg', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 80.08000183105469}}, {'type': 'reference', 'displayName': 'Brown Eggs', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Brown Eggs', 'scoredName': 'Brown Eggs', 'score': 0.2506, 'displayNameScore': 0.2506, 'brandName': 'THE HAPPY EGG CO', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211294697', 'resultId': '1603211294697', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 50}, 'name': 'egg', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 70}}, {'type': 'reference', 'displayName': 'Kirkland, Eggs', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Kirkland, Eggs', 'scoredName': 'Kirkland, Eggs', 'score': 0.16624999999999998, 'displayNameScore': 0.16624999999999998, 'brandName': 'Costco Companies Inc.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211503713', 'resultId': '1603211503713', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 56}, 'name': 'egg', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 80.08000183105469}}], 'alternateNames': ['boiled eggs', 'scrambled eggs', 'fried eggs', 'baked eggs', 'poached eggs', 'deviled eggs', 'raw eggs']}\n", - "========================\n", - "\n", - "Here is the nutritional information for eggs:\n", - "\n", - "- **Egg, Whole, Cooked, Hard-Boiled**\n", - " - Portion: 2 large eggs (50g each)\n", - " - Calories: 155\n", - "\n", - "- **Trader Joe's Cage Free Fresh Hard Cooked Peeled Eggs**\n", - " - Portion: 1 egg (44g)\n", - " - Calories: 60\n", - "\n", - "- **Egg Omelet Or Scrambled Egg With Milk, Made With Oil**\n", - " - Portion: 2 eggs (61g)\n", - " - Calories: 219.6\n", - "\n", - "- **Egg Omelet Or Scrambled Egg, From Fast Food / Restaurant**\n", - " - Portion: 2 medium eggs (54g)\n", - " - Calories: 228.96\n", - "\n", - "These are some variations of eggs with their respective calorie counts.\n" - ] - } - ], - "source": [ - "print(\n", - " agent.chat(\"I had eggs for breakfast. Give me nutritional information about that.\")\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e1b8b2c1", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Added user message to memory: I had a cobb salad for lunch, how many calories did I eat?\n", - "=== Calling Function ===\n", - "Calling function: nutrition_ai_search with args: {\"query\":\"cobb salad\"}\n", - "Got output: {'results': [{'type': 'synonym', 'displayName': 'Cobb Salad, No Dressing', 'stemmedDisplayName': '', 'shortName': 'cobb salad', 'longName': 'Cobb Salad, No Dressing', 'scoredName': 'Cobb Salad', 'score': 1, 'displayNameScore': 0.930208318211283, 'brandName': '', 'iconId': 'PRE0056', 'labelId': 'e219c51c-9f6e-11ea-84a8-438acb256141', 'synonymId': '76a2efb0-7dd6-11eb-8ccb-83d65b03681f', 'recipeId': '', 'referenceId': '1603211552882', 'resultId': '1603211552882', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 105}, 'name': 'cup', 'quantity': 2}, 'calories': 193.1999969482422}}, {'type': 'synonym', 'displayName': 'Meat Lovers Cobb Salad', 'stemmedDisplayName': '', 'shortName': 'cobb salad', 'longName': 'Meat Lovers Cobb Salad', 'scoredName': 'Cobb Salad', 'score': 1, 'displayNameScore': 0.8482639310416504, 'brandName': 'NOT A BRANDED ITEM', 'iconId': 'PRE0056', 'labelId': 'e219c51c-9f6e-11ea-84a8-438acb256141', 'synonymId': '76a2efb0-7dd6-11eb-8ccb-83d65b03681f', 'recipeId': '', 'referenceId': '1603211572453', 'resultId': '1603211572453', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 220}, 'name': 'cup', 'quantity': 1, 'suggestedQuantity': [0.5]}, 'calories': 170.5}}, {'type': 'synonym', 'displayName': 'Turkey & Bacon Cobb Salad, Cumberland Farms', 'stemmedDisplayName': '', 'shortName': 'cobb salad', 'longName': 'Turkey & Bacon Cobb Salad, Cumberland Farms', 'scoredName': 'Cobb Salad', 'score': 1, 'displayNameScore': 0.8419398562108725, 'brandName': 'CUMBERLAND FARMS', 'iconId': 'PRE0056', 'labelId': 'e219c51c-9f6e-11ea-84a8-438acb256141', 'synonymId': '76a2efb0-7dd6-11eb-8ccb-83d65b03681f', 'recipeId': '', 'referenceId': '1603211580609', 'resultId': '1603211580609', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 205}, 'name': 'container', 'quantity': 1}, 'calories': 289.04998779296875}}, {'type': 'synonym', 'displayName': 'Chicken & Bacon Cobb Salad', 'stemmedDisplayName': '', 'shortName': 'cobb salad', 'longName': 'Chicken & Bacon Cobb Salad', 'scoredName': 'Cobb Salad', 'score': 1, 'displayNameScore': 0.8376892069422205, 'brandName': 'NOT A BRANDED ITEM', 'iconId': 'PRE0056', 'labelId': 'e219c51c-9f6e-11ea-84a8-438acb256141', 'synonymId': '76a2efb0-7dd6-11eb-8ccb-83d65b03681f', 'recipeId': '', 'referenceId': '1603211572454', 'resultId': '1603211572454', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 264}, 'name': 'package', 'quantity': 1}, 'calories': 588.7200317382812}}, {'type': 'synonym', 'displayName': 'Organic Southwest Cobb Salad, Wild Oats Marketing, Llc', 'stemmedDisplayName': '', 'shortName': 'cobb salad', 'longName': 'Organic Southwest Cobb Salad, Wild Oats Marketing, Llc', 'scoredName': 'Cobb Salad', 'score': 1, 'displayNameScore': 0.8298778210942502, 'brandName': 'Wild Oats Marketing, LLC', 'iconId': 'PRE0056', 'labelId': 'e219c51c-9f6e-11ea-84a8-438acb256141', 'synonymId': '76a2efb0-7dd6-11eb-8ccb-83d65b03681f', 'recipeId': '', 'referenceId': '1603211580611', 'resultId': '1603211580611', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 397}, 'name': 'container', 'quantity': 1}, 'calories': 258.04998779296875}}, {'type': 'synonym', 'displayName': 'Hannaford Bros Chicken Cobb Salad', 'stemmedDisplayName': '', 'shortName': 'cobb salad', 'longName': 'Hannaford Bros Chicken Cobb Salad', 'scoredName': 'Cobb Salad', 'score': 1, 'displayNameScore': 0.8244169877410487, 'brandName': 'Hannaford Bros. Co.', 'iconId': 'PRE0056', 'labelId': 'e219c51c-9f6e-11ea-84a8-438acb256141', 'synonymId': '76a2efb0-7dd6-11eb-8ccb-83d65b03681f', 'recipeId': '', 'referenceId': '1603211580608', 'resultId': '1603211580608', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 199}, 'name': 'salad', 'quantity': 1}, 'calories': 179.10000610351562}}, {'type': 'synonym', 'displayName': 'Taylor Farms Cobb Salad', 'stemmedDisplayName': '', 'shortName': 'cobb salad', 'longName': 'Taylor Farms Cobb Salad', 'scoredName': 'Cobb Salad', 'score': 1, 'displayNameScore': 0.8239822312547613, 'brandName': 'Taylor Fresh Foods, Inc.', 'iconId': 'PRE0056', 'labelId': 'e219c51c-9f6e-11ea-84a8-438acb256141', 'synonymId': '76a2efb0-7dd6-11eb-8ccb-83d65b03681f', 'recipeId': '', 'referenceId': '1603211580614', 'resultId': '1603211580614', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 206}, 'name': 'package', 'quantity': 1}, 'calories': 290.4599914550781}}, {'type': 'synonym', 'displayName': 'Whole Foods Market, Cobb Salad', 'stemmedDisplayName': '', 'shortName': 'cobb salad', 'longName': 'Whole Foods Market, Cobb Salad', 'scoredName': 'Cobb Salad', 'score': 1, 'displayNameScore': 0.821303438902211, 'brandName': 'RT Wholesale, LLC.', 'iconId': 'PRE0056', 'labelId': 'e219c51c-9f6e-11ea-84a8-438acb256141', 'synonymId': '76a2efb0-7dd6-11eb-8ccb-83d65b03681f', 'recipeId': '', 'referenceId': '1603211580612', 'resultId': '1603211580612', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 28.41}, 'name': 'oz', 'quantity': 1, 'suggestedQuantity': [9.82]}, 'calories': 499.3852844238281}}, {'type': 'synonym', 'displayName': 'Archer Farms, Cobb Salad', 'stemmedDisplayName': '', 'shortName': 'cobb salad', 'longName': 'Archer Farms, Cobb Salad', 'scoredName': 'Cobb Salad', 'score': 1, 'displayNameScore': 0.8167613336390531, 'brandName': 'ARCHER FARMS', 'iconId': 'PRE0056', 'labelId': 'e219c51c-9f6e-11ea-84a8-438acb256141', 'synonymId': '76a2efb0-7dd6-11eb-8ccb-83d65b03681f', 'recipeId': '', 'referenceId': '1603211587568', 'resultId': '1603211587568', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 206}, 'name': 'salad', 'quantity': 1}, 'calories': 290.4599914550781}}, {'type': 'synonym', 'displayName': 'Marketside Ranch Cobb Salad', 'stemmedDisplayName': '', 'shortName': 'cobb salad', 'longName': 'Marketside Ranch Cobb Salad', 'scoredName': 'Cobb Salad', 'score': 1, 'displayNameScore': 0.8087533971311166, 'brandName': 'Wal-Mart Stores, Inc.', 'iconId': 'PRE0056', 'labelId': 'e219c51c-9f6e-11ea-84a8-438acb256141', 'synonymId': '76a2efb0-7dd6-11eb-8ccb-83d65b03681f', 'recipeId': '', 'referenceId': '1603211580610', 'resultId': '1603211580610', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 340}, 'name': 'container', 'quantity': 1, 'suggestedQuantity': [0.3]}, 'calories': 122.4000015258789}}, {'type': 'synonym', 'displayName': 'Lunds & Byerlys, Kobe Cobb Salad', 'stemmedDisplayName': '', 'shortName': 'cobb salad', 'longName': 'Lunds & Byerlys, Kobe Cobb Salad', 'scoredName': 'Cobb Salad', 'score': 1, 'displayNameScore': 0.8012613336390532, 'brandName': 'Byerly Foods International', 'iconId': 'PRE0056', 'labelId': 'e219c51c-9f6e-11ea-84a8-438acb256141', 'synonymId': '76a2efb0-7dd6-11eb-8ccb-83d65b03681f', 'recipeId': '', 'referenceId': '1603211580613', 'resultId': '1603211580613', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 354}, 'name': 'salad', 'quantity': 1}, 'calories': 548.7000122070312}}, {'type': 'synonym', 'displayName': 'Fish House Foods Company, Cobb Salad', 'stemmedDisplayName': '', 'shortName': 'cobb salad', 'longName': 'Fish House Foods Company, Cobb Salad', 'scoredName': 'Cobb Salad', 'score': 1, 'displayNameScore': 0.7943522427299622, 'brandName': 'Fish House Foods Company', 'iconId': 'PRE0056', 'labelId': 'e219c51c-9f6e-11ea-84a8-438acb256141', 'synonymId': '76a2efb0-7dd6-11eb-8ccb-83d65b03681f', 'recipeId': '', 'referenceId': '1603211587566', 'resultId': '1603211587566', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 28.37}, 'name': 'oz', 'quantity': 1, 'suggestedQuantity': [13.5]}, 'calories': 651.091552734375}}, {'type': 'synonym', 'displayName': 'A Along The Vine, Chicken & Bacon Cobb Salad', 'stemmedDisplayName': '', 'shortName': 'cobb salad', 'longName': 'A Along The Vine, Chicken & Bacon Cobb Salad', 'scoredName': 'Cobb Salad', 'score': 1, 'displayNameScore': 0.7838907042684238, 'brandName': 'A ALONG THE VINE', 'iconId': 'PRE0056', 'labelId': 'e219c51c-9f6e-11ea-84a8-438acb256141', 'synonymId': '76a2efb0-7dd6-11eb-8ccb-83d65b03681f', 'recipeId': '', 'referenceId': '1603211587567', 'resultId': '1603211587567', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 298}, 'name': 'salad', 'quantity': 1}, 'calories': 658.5800170898438}}, {'type': 'recipe', 'displayName': 'Homemade Shrimp Cobb Salad', 'stemmedDisplayName': '', 'shortName': 'Homemade shrimp cobb salad', 'longName': '', 'scoredName': 'Shrimp Cobb Salad', 'score': 0.916306936937167, 'displayNameScore': 0.7978272989281172, 'brandName': '', 'iconId': '1000593', 'labelId': 'c0a75458-e374-11eb-af23-e20d1afceae7', 'synonymId': 'c0a803b2-e374-11eb-b88a-f7731ca67b58', 'recipeId': 'e47cda8f-e5d6-11eb-b49c-3e0c4ccfa888', 'referenceId': '', 'resultId': 'e47cda8f-e5d6-11eb-b49c-3e0c4ccfa888', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 345.5}, 'name': 'serving', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 449.82000732421875}}, {'type': 'reference', 'displayName': 'Cobb Salad', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Cobb Salad', 'scoredName': 'Cobb Salad', 'score': 0.6, 'displayNameScore': 0.6, 'brandName': '', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': 'openfood0681131457934', 'resultId': 'openfood0681131457934', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 149}, 'name': 'serving', 'quantity': 1}, 'calories': 239.88999938964844}}, {'type': 'reference', 'displayName': 'Cobb Salad', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Cobb Salad', 'scoredName': 'Cobb Salad', 'score': 0.6, 'displayNameScore': 0.6, 'brandName': 'Udi, The Sandwichman, Inc.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211560486', 'resultId': '1603211560486', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 363}, 'name': 'serving', 'quantity': 1}, 'calories': 631.6199951171875}}, {'type': 'reference', 'displayName': 'Cobb Salad', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Cobb Salad', 'scoredName': 'Cobb Salad', 'score': 0.6, 'displayNameScore': 0.6, 'brandName': 'Renaissance Food Group, LLC', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211477359', 'resultId': '1603211477359', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 234}, 'name': 'container', 'quantity': 1}, 'calories': 170.8199920654297}}, {'type': 'reference', 'displayName': 'Cobb Salad', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Cobb Salad', 'scoredName': 'Cobb Salad', 'score': 0.6, 'displayNameScore': 0.6, 'brandName': 'Mr. Beverages Old Time Cocktail Mixes', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211463512', 'resultId': '1603211463512', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 234}, 'name': 'package', 'quantity': 1}, 'calories': 259.739990234375}}, {'type': 'reference', 'displayName': 'Cobb Salad', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Cobb Salad', 'scoredName': 'Cobb Salad', 'score': 0.6, 'displayNameScore': 0.6, 'brandName': 'Entertainment Production House Inc.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211380325', 'resultId': '1603211380325', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 309}, 'name': 'package', 'quantity': 1}, 'calories': 398.6099853515625}}, {'type': 'reference', 'displayName': 'Cobb Salad', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Cobb Salad', 'scoredName': 'Cobb Salad', 'score': 0.6, 'displayNameScore': 0.6, 'brandName': 'Target Stores', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211274160', 'resultId': '1603211274160', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 206}, 'name': 'salad', 'quantity': 1}, 'calories': 290.4599914550781}}, {'type': 'synonym', 'displayName': 'Cornsalad, Raw', 'stemmedDisplayName': '', 'shortName': 'lambs lettuce', 'longName': 'Cornsalad, Raw', 'scoredName': 'Corn Salad', 'score': 0.8033989094956995, 'displayNameScore': 0.7139024592792491, 'brandName': '', 'iconId': 'VEG7693', 'labelId': '77ad5514-8951-11ea-a893-bb8db4091806', 'synonymId': '7fd2e84b-7df1-11eb-87be-b24b03f249d1', 'recipeId': '', 'referenceId': '1603211582210', 'resultId': '1603211582210', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 56}, 'name': 'cup', 'quantity': 1}, 'calories': 11.760000228881836}}, {'type': 'recipe', 'displayName': 'Tuna Nicoise Salad', 'stemmedDisplayName': '', 'shortName': 'tuna nicoise salad', 'longName': '', 'scoredName': 'Nicoise Salad', 'score': 0.8254111665310635, 'displayNameScore': 0.7718140969339939, 'brandName': '', 'iconId': 'PRE0068', 'labelId': '943c2910-9f6f-11ea-98eb-bbe90895d054', 'synonymId': '75a881ce-7dd6-11eb-8ccb-6f4a436efcc0', 'recipeId': 'c9f3fca0-e292-11ea-8e83-26ec6fac3df6', 'referenceId': '', 'resultId': 'c9f3fca0-e292-11ea-8e83-26ec6fac3df6', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 252.5}, 'name': 'serving', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 361.21002197265625}}, {'type': 'recipe', 'displayName': 'Garden Salad With Chicken And Ranch Dressing', 'stemmedDisplayName': '', 'shortName': 'Garden Salad with Chicken and Ranch Dressing', 'longName': '', 'scoredName': 'Bbq Chicken Salad', 'score': 0.856156862745098, 'displayNameScore': 0.6990734598734598, 'brandName': '', 'iconId': '1001615', 'labelId': '0f7976ff-392b-11ec-a5ad-966f014fd40b', 'synonymId': '5eaefbb8-b9c5-11ee-9c96-12023e19a804', 'recipeId': 'd8a715d6-3c0d-11ec-b382-2a201cb157c2', 'referenceId': '', 'resultId': 'd8a715d6-3c0d-11ec-b382-2a201cb157c2', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 261.05}, 'name': 'serving', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 309.3480224609375}}, {'type': 'reference', 'displayName': 'Cobb Style Salad', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Cobb Style Salad', 'scoredName': 'Cobb Style Salad', 'score': 0.57, 'displayNameScore': 0.57, 'brandName': '', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': 'openfood0011110602329', 'resultId': 'openfood0011110602329', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 100}, 'name': '', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 14.300000190734863}}, {'type': 'reference', 'displayName': 'Cobb Chicken Salad', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Cobb Chicken Salad', 'scoredName': 'Cobb Chicken Salad', 'score': 0.563076923076923, 'displayNameScore': 0.563076923076923, 'brandName': '', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': 'openfood0762535022987', 'resultId': 'openfood0762535022987', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 100}, 'name': '', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 118.7135009765625}}, {'type': 'recipe', 'displayName': 'Caesar Salad With Salmon', 'stemmedDisplayName': '', 'shortName': 'caesar salad with salmon', 'longName': '', 'scoredName': 'Salmon Caesar Salad', 'score': 0.7485789670506343, 'displayNameScore': 0.7474248985460394, 'brandName': '', 'iconId': 'PRE0063', 'labelId': '2d64d7aa-9f6f-11ea-aace-effb8c80e76b', 'synonymId': '789a3ac6-7dd6-11eb-8ccb-db42f285b7d2', 'recipeId': '73f285bc-dcef-11ea-8a18-620f5f69b907', 'referenceId': '', 'resultId': '73f285bc-dcef-11ea-8a18-620f5f69b907', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 255}, 'name': 'serving', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 406.97998046875}}, {'type': 'reference', 'displayName': 'Cobb Salad With Chicken', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Cobb Salad With Chicken', 'scoredName': 'Cobb Salad With Chicken', 'score': 0.5465469845722299, 'displayNameScore': 0.5465469845722299, 'brandName': 'Taylor Fresh Foods, Inc.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211381432', 'resultId': '1603211381432', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 305}, 'name': 'salad', 'quantity': 1}, 'calories': 451.3999938964844}}, {'type': 'reference', 'displayName': 'Southern Cobb Salad', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Southern Cobb Salad', 'scoredName': 'Southern Cobb Salad', 'score': 0.5433263157894735, 'displayNameScore': 0.5433263157894735, 'brandName': 'Publix', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': 'openfood0298864307995', 'resultId': 'openfood0298864307995', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 542}, 'name': 'serving', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 77}}, {'type': 'reference', 'displayName': 'Colorado Cobb Salad', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Colorado Cobb Salad', 'scoredName': 'Colorado Cobb Salad', 'score': 0.5325136842105263, 'displayNameScore': 0.5325136842105263, 'brandName': '', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': 'openfood0710859383697', 'resultId': 'openfood0710859383697', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 340}, 'name': 'serving', 'quantity': 1}, 'calories': 449.9998779296875}}, {'type': 'recipe', 'displayName': 'Celery Salad With Dates, Almonds And Parmesan', 'stemmedDisplayName': '', 'shortName': 'Celery Salad with Dates, Almonds and Parmesan', 'longName': '', 'scoredName': 'Celery Salad', 'score': 0.7195266605458763, 'displayNameScore': 0.6431548366465052, 'brandName': '', 'iconId': '1001225', 'labelId': '818089d2-25ff-11ec-9051-1a845e38ff23', 'synonymId': '81812bd2-25ff-11ec-ba9f-27843c256eb8', 'recipeId': '1ba40c49-3bec-11ec-ab8c-661d52839eeb', 'referenceId': '', 'resultId': '1ba40c49-3bec-11ec-ab8c-661d52839eeb', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 797.7}, 'name': 'serving', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 1346.97509765625}}, {'type': 'reference', 'displayName': 'Cobb Salad With Turkey & Bacon', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Cobb Salad With Turkey & Bacon', 'scoredName': 'Cobb Salad With Turkey & Bacon', 'score': 0.5288421052631578, 'displayNameScore': 0.5288421052631578, 'brandName': 'MARKET 32', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211459665', 'resultId': '1603211459665', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 206}, 'name': 'package', 'quantity': 1}, 'calories': 290.4599914550781}}, {'type': 'reference', 'displayName': 'Cobb Salad With Turkey & Bacon', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Cobb Salad With Turkey & Bacon', 'scoredName': 'Cobb Salad With Turkey & Bacon', 'score': 0.5288421052631578, 'displayNameScore': 0.5288421052631578, 'brandName': 'Wal-Mart Stores, Inc.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211433526', 'resultId': '1603211433526', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 206}, 'name': 'container', 'quantity': 1}, 'calories': 319.29998779296875}}, {'type': 'reference', 'displayName': 'Cobb Gourmet Cafe Salad', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Cobb Gourmet Cafe Salad', 'scoredName': 'Cobb Gourmet Cafe Salad', 'score': 0.521211332398317, 'displayNameScore': 0.521211332398317, 'brandName': 'Fresh Express Incorporated', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1628606774639', 'resultId': '1628606774639', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 100}, 'name': 'serving', 'quantity': 1}, 'calories': 150}}, {'type': 'reference', 'displayName': 'Cobb Salad With Chicken & Bacon', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Cobb Salad With Chicken & Bacon', 'scoredName': 'Cobb Salad With Chicken & Bacon', 'score': 0.5268684863523573, 'displayNameScore': 0.5268684863523573, 'brandName': 'NOT A BRANDED ITEM', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211439766', 'resultId': '1603211439766', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 411}, 'name': 'salad', 'quantity': 1}, 'calories': 641.1600341796875}}, {'type': 'reference', 'displayName': 'Cobb Salad With Chicken & Bacon', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Cobb Salad With Chicken & Bacon', 'scoredName': 'Cobb Salad With Chicken & Bacon', 'score': 0.5268684863523573, 'displayNameScore': 0.5268684863523573, 'brandName': 'Taylor Fresh Foods, Inc.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211323565', 'resultId': '1603211323565', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 100}, 'name': 'cup', 'quantity': 1}, 'calories': 160}}, {'type': 'reference', 'displayName': 'Cobb Salad With Chicken & Bacon', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Cobb Salad With Chicken & Bacon', 'scoredName': 'Cobb Salad With Chicken & Bacon', 'score': 0.5268684863523573, 'displayNameScore': 0.5268684863523573, 'brandName': 'FRESH FOODS MARKET', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211298002', 'resultId': '1603211298002', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 100}, 'name': 'cup', 'quantity': 1}, 'calories': 150}}, {'type': 'reference', 'displayName': 'Cobb Salad With Chicken & Bacon', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Cobb Salad With Chicken & Bacon', 'scoredName': 'Cobb Salad With Chicken & Bacon', 'score': 0.5268684863523573, 'displayNameScore': 0.5268684863523573, 'brandName': 'ARCHER FARMS', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211274205', 'resultId': '1603211274205', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 292}, 'name': 'container', 'quantity': 1}, 'calories': 499.32000732421875}}, {'type': 'reference', 'displayName': 'Cobb Salad With Chicken And Bacon', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Cobb Salad With Chicken And Bacon', 'scoredName': 'Cobb Salad With Chicken And Bacon', 'score': 0.5232283813747228, 'displayNameScore': 0.5232283813747228, 'brandName': 'NOT A BRANDED ITEM', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211477697', 'resultId': '1603211477697', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 377}, 'name': 'container', 'quantity': 1}, 'calories': 380.7699890136719}}, {'type': 'reference', 'displayName': 'Cobb Salad With Chicken And Bacon', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Cobb Salad With Chicken And Bacon', 'scoredName': 'Cobb Salad With Chicken And Bacon', 'score': 0.5232283813747228, 'displayNameScore': 0.5232283813747228, 'brandName': 'Taylor Fresh Foods, Inc.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211381345', 'resultId': '1603211381345', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 383}, 'name': 'salad', 'quantity': 1}, 'calories': 478.75}}, {'type': 'reference', 'displayName': 'Cobb Salad With Chicken And Bacon', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Cobb Salad With Chicken And Bacon', 'scoredName': 'Cobb Salad With Chicken And Bacon', 'score': 0.5232283813747228, 'displayNameScore': 0.5232283813747228, 'brandName': 'Safeway, Inc.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211310904', 'resultId': '1603211310904', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 66.67}, 'name': 'cup', 'quantity': 1, 'suggestedQuantity': [1.5]}, 'calories': 140.00698852539062}}, {'type': 'recipe', 'displayName': 'Homemade Caprese Salad', 'stemmedDisplayName': '', 'shortName': 'Homemade Caprese Salad', 'longName': '', 'scoredName': 'Caprese Salad', 'score': 0.75286870935433, 'displayNameScore': 0.6958379001635208, 'brandName': '', 'iconId': 'PRE0053', 'labelId': '77aefc34-8951-11ea-a893-b71f9da3b59c', 'synonymId': '78156102-7dd6-11eb-8ccb-ff98802a1b29', 'recipeId': '0a8bda45-cfa1-11ec-936f-ca031f99a026', 'referenceId': '', 'resultId': '0a8bda45-cfa1-11ec-936f-ca031f99a026', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 176.5}, 'name': 'serving', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 354.9100036621094}}, {'type': 'reference', 'displayName': 'Chicken Cobb Side Salad', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Cobb Side Salad', 'scoredName': 'Chicken Cobb Side Salad', 'score': 0.4971591584852735, 'displayNameScore': 0.4971591584852735, 'brandName': 'Renaissance Food Group, LLC', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211347555', 'resultId': '1603211347555', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 156}, 'name': 'container', 'quantity': 1}, 'calories': 269.8800048828125}}, {'type': 'reference', 'displayName': 'Cobb Salad With White Meat Chicken & Bacon', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Cobb Salad With White Meat Chicken & Bacon', 'scoredName': 'Cobb Salad With White Meat Chicken & Bacon', 'score': 0.5106285714285714, 'displayNameScore': 0.5106285714285714, 'brandName': 'NOT A BRANDED ITEM', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211440280', 'resultId': '1603211440280', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 385}, 'name': 'salad', 'quantity': 1}, 'calories': 608.2999877929688}}, {'type': 'reference', 'displayName': 'Chicken Bacon Cobb Salad', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Bacon Cobb Salad', 'scoredName': 'Chicken Bacon Cobb Salad', 'score': 0.47541999999999995, 'displayNameScore': 0.47541999999999995, 'brandName': '', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': 'openfood0030223112150', 'resultId': 'openfood0030223112150', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 100}, 'name': '', 'quantity': 1, 'suggestedQuantity': [1]}, 'calories': 140}}, {'type': 'reference', 'displayName': 'Chicken And Bacon Cobb Salad', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken And Bacon Cobb Salad', 'scoredName': 'Chicken And Bacon Cobb Salad', 'score': 0.466182857142857, 'displayNameScore': 0.466182857142857, 'brandName': 'Renaissance Food Group, LLC', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211347911', 'resultId': '1603211347911', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 318}, 'name': 'container', 'quantity': 1}, 'calories': 550.1400146484375}}, {'type': 'reference', 'displayName': 'Chicken And Bacon Cobb Salad', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken And Bacon Cobb Salad', 'scoredName': 'Chicken And Bacon Cobb Salad', 'score': 0.466182857142857, 'displayNameScore': 0.466182857142857, 'brandName': 'Fresh Food Manufacturing', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211321292', 'resultId': '1603211321292', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 468}, 'name': 'container', 'quantity': 1}, 'calories': 421.1999816894531}}, {'type': 'reference', 'displayName': 'Cobb Salad With Grilled Chicken Breast (With Rib Meat)', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Cobb Salad With Grilled Chicken Breast (With Rib Meat)', 'scoredName': 'Cobb Salad With Grilled Chicken Breast (With Rib Meat)', 'score': 0.4997275985663082, 'displayNameScore': 0.4997275985663082, 'brandName': \"Triple 'B' Corporation\", 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211279799', 'resultId': '1603211279799', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 283}, 'name': 'container', 'quantity': 1}, 'calories': 350.91998291015625}}, {'type': 'reference', 'displayName': 'Turkey And Bacon Cobb Salad', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Turkey And Bacon Cobb Salad', 'scoredName': 'Turkey And Bacon Cobb Salad', 'score': 0.44866031746031737, 'displayNameScore': 0.44866031746031737, 'brandName': 'Del Monte Fresh Produce N.A., Inc.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211309610', 'resultId': '1603211309610', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 227}, 'name': 'container', 'quantity': 1}, 'calories': 440.3800048828125}}, {'type': 'reference', 'displayName': 'Turkey And Bacon Cobb Salad', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Turkey And Bacon Cobb Salad', 'scoredName': 'Turkey And Bacon Cobb Salad', 'score': 0.44866031746031737, 'displayNameScore': 0.44866031746031737, 'brandName': 'Ready Pac Produce, Inc.', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211301247', 'resultId': '1603211301247', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 205}, 'name': 'bowl', 'quantity': 1}, 'calories': 289.04998779296875}}, {'type': 'reference', 'displayName': 'Chicken Breast Cobb Salad With Bacon', 'stemmedDisplayName': '', 'shortName': '', 'longName': 'Chicken Breast Cobb Salad With Bacon', 'scoredName': 'Chicken Breast Cobb Salad With Bacon', 'score': 0.4838642424242423, 'displayNameScore': 0.4838642424242423, 'brandName': 'Renaissance Food Group, LLC', 'iconId': '', 'labelId': '', 'synonymId': '', 'recipeId': '', 'referenceId': '1603211434012', 'resultId': '1603211434012', 'nutritionPreview': {'portion': {'weight': {'unit': 'g', 'value': 100}, 'name': 'cup salad with 2 tbsp dressing', 'quantity': 1}, 'calories': 140}}], 'alternateNames': ['cobb salad', 'shrimp cobb salad', 'taco salad', 'bean salad', 'kale salad', 'tuna salad', 'cucumber salad', 'garden salad with chicken', 'egg salad', 'couscous salad', 'caesar salad', 'potato salad', 'fruit salad', 'greek salad', 'quinoa salad', 'garden salad', 'creamy chicken salad', 'creamy pasta salad', 'spinach salad', 'lambs lettuce']}\n", - "========================\n", - "\n", - "The Cobb Salad you had for lunch contained approximately 193.2 calories per cup.\n" - ] - } - ], - "source": [ - "print(agent.chat(\"I had a cobb salad for lunch, how many calories did I eat?\"))" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": ".venv", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/llama_index/tools/passio_nutrition_ai/BUILD b/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/llama_index/tools/passio_nutrition_ai/BUILD deleted file mode 100644 index db46e8d6c978c..0000000000000 --- a/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/llama_index/tools/passio_nutrition_ai/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_sources() diff --git a/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/llama_index/tools/passio_nutrition_ai/__init__.py b/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/llama_index/tools/passio_nutrition_ai/__init__.py deleted file mode 100644 index a8386021c93ea..0000000000000 --- a/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/llama_index/tools/passio_nutrition_ai/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -## init -from llama_index.tools.passio_nutrition_ai.base import ( - ENDPOINT_BASE_URL, - NutritionAIToolSpec, -) - -__all__ = ["NutritionAIToolSpec", "ENDPOINT_BASE_URL"] diff --git a/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/llama_index/tools/passio_nutrition_ai/base.py b/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/llama_index/tools/passio_nutrition_ai/base.py deleted file mode 100644 index 68acbb9fc948c..0000000000000 --- a/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/llama_index/tools/passio_nutrition_ai/base.py +++ /dev/null @@ -1,144 +0,0 @@ -"""Passio Nutrition Search tool spec.""" - -from typing import final, NoReturn -from datetime import datetime, timedelta - -import requests -from llama_index.core.tools.tool_spec.base import BaseToolSpec - -ENDPOINT_BASE_URL = "https://api.passiolife.com/v2/products/napi/food/search/advanced" - - -class NoDiskStorage: - @final - def __getstate__(self) -> NoReturn: - raise AttributeError("Do not store on disk.") - - @final - def __setstate__(self, state) -> NoReturn: - raise AttributeError("Do not store on disk.") - - -try: - from tenacity import ( - retry, - stop_after_attempt, - wait_random, - wait_exponential, - retry_if_result, - ) -except ImportError: - # No retries if tenacity is not installed. - def retry(f, *args, **kwargs): - return f - - def stop_after_attempt(n): - return None - - def wait_random(a, b): - return None - - def wait_exponential(multiplier, min, max): - return None - - -def is_http_retryable(rsp): - # -return rsp and rsp.status_code >= 500 - return ( - rsp - and not isinstance(rsp, dict) - and rsp.status_code in [408, 425, 429, 500, 502, 503, 504] - ) - - -class ManagedPassioLifeAuth(NoDiskStorage): - """Manages the token for the NutritionAI API.""" - - def __init__(self, subscription_key: str): - self.subscription_key = subscription_key - self._last_token = None - self._access_token_expiry = None - self._access_token = None - self._customer_id = None - - @property - def headers(self) -> dict: - if not self.is_valid_now(): - self.refresh_access_token() - return { - "Authorization": f"Bearer {self._access_token}", - "Passio-ID": self._customer_id, - } - - def is_valid_now(self): - return ( - self._access_token is not None - and self._customer_id is not None - and self._access_token_expiry is not None - and self._access_token_expiry > datetime.now() - ) - - @retry( - retry=retry_if_result(is_http_retryable), - stop=stop_after_attempt(4), - wait=wait_random(0, 0.3) + wait_exponential(multiplier=1, min=0.1, max=2), - ) - def _http_get(self, subscription_key): - return requests.get( - f"https://api.passiolife.com/v2/token-cache/napi/oauth/token/{subscription_key}" - ) - - def refresh_access_token(self): - """Refresh the access token for the NutritionAI API.""" - rsp = self._http_get(self.subscription_key) - if not rsp: - raise ValueError("Could not get access token") - self._last_token = token = rsp.json() - self._customer_id = token["customer_id"] - self._access_token = token["access_token"] - self._access_token_expiry = ( - datetime.now() - + timedelta(seconds=token["expires_in"]) - - timedelta(seconds=5) - ) # 5 seconds: approximate time for a token refresh to be processed. - - -class NutritionAIToolSpec(BaseToolSpec): - """Tool that queries the Passio Nutrition AI API.""" - - spec_functions = ["nutrition_ai_search"] - auth_: ManagedPassioLifeAuth - - def __init__(self, api_key: str) -> None: - """Initialize with parameters.""" - self.auth_ = ManagedPassioLifeAuth(api_key) - - @retry( - retry=retry_if_result(is_http_retryable), - stop=stop_after_attempt(4), - wait=wait_random(0, 0.3) + wait_exponential(multiplier=1, min=0.1, max=2), - ) - def _http_get(self, query: str): - return requests.get( - ENDPOINT_BASE_URL, - headers=self.auth_.headers, - params={"term": query}, # type: ignore - ) - - def _nutrition_request(self, query: str): - response = self._http_get(query) - if not response: - raise ValueError("No response from NutritionAI API.") - return response.json() - - def nutrition_ai_search(self, query: str): - """ - Retrieve nutrition facts for a given food item. - Input should be a search query string for the food item. - - Args: - query (str): The food item to look for. - - Returns a JSON result with the nutrition facts for the food item and, if available, alternative food items which sometimes are a better match. - """ - return self._nutrition_request(query) diff --git a/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/pyproject.toml b/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/pyproject.toml deleted file mode 100644 index 0054af8df14d9..0000000000000 --- a/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/pyproject.toml +++ /dev/null @@ -1,63 +0,0 @@ -[build-system] -build-backend = "poetry.core.masonry.api" -requires = ["poetry-core"] - -[tool.codespell] -check-filenames = true -check-hidden = true -skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" - -[tool.llamahub] -contains_example = false -import_path = "llama_index.tools.passio_nutrition_ai" - -[tool.llamahub.class_authors] -NutritionAIToolSpec = "ivyas21" - -[tool.mypy] -disallow_untyped_defs = true -exclude = ["_static", "build", "examples", "notebooks", "venv"] -ignore_missing_imports = true -python_version = "3.10" - -[tool.poetry] -authors = ["Your Name "] -description = "llama-index tools passio_nutrition_ai integration" -exclude = ["**/BUILD"] -license = "MIT" -maintainers = ["ivyas21"] -name = "llama-index-tools-passio-nutrition-ai" -readme = "README.md" -version = "0.3.0" - -[tool.poetry.dependencies] -python = ">=3.9,<4.0" -llama-index-core = "^0.12.0" - -[tool.poetry.group.dev.dependencies] -ipython = "8.10.0" -jupyter = "^1.0.0" -mypy = "0.991" -pre-commit = "3.2.0" -pylint = "2.15.10" -pytest = "7.2.1" -pytest-mock = "3.11.1" -ruff = "0.0.292" -tree-sitter-languages = "^1.8.0" -types-Deprecated = ">=0.1.0" -types-PyYAML = "^6.0.12.12" -types-protobuf = "^4.24.0.4" -types-redis = "4.5.5.0" -types-requests = "2.28.11.8" -types-setuptools = "67.1.0.0" - -[tool.poetry.group.dev.dependencies.black] -extras = ["jupyter"] -version = "<=23.9.1,>=23.7.0" - -[tool.poetry.group.dev.dependencies.codespell] -extras = ["toml"] -version = ">=v2.2.6" - -[[tool.poetry.packages]] -include = "llama_index/" diff --git a/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/tests/BUILD b/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/tests/BUILD deleted file mode 100644 index dabf212d7e716..0000000000000 --- a/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/tests/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_tests() diff --git a/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/tests/__init__.py b/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/tests/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/tests/test_tools_nutrition_ai.py b/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/tests/test_tools_nutrition_ai.py deleted file mode 100644 index 0439fa0e805d3..0000000000000 --- a/llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai/tests/test_tools_nutrition_ai.py +++ /dev/null @@ -1,7 +0,0 @@ -from llama_index.core.tools.tool_spec.base import BaseToolSpec -from llama_index.tools.passio_nutrition_ai import NutritionAIToolSpec - - -def test_class(): - names_of_base_classes = [b.__name__ for b in NutritionAIToolSpec.__mro__] - assert BaseToolSpec.__name__ in names_of_base_classes diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/.gitignore b/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/.gitignore deleted file mode 100644 index 990c18de22908..0000000000000 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/.gitignore +++ /dev/null @@ -1,153 +0,0 @@ -llama_index/_static -.DS_Store -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -bin/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -etc/ -include/ -lib/ -lib64/ -parts/ -sdist/ -share/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -.ruff_cache - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints -notebooks/ - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ -pyvenv.cfg - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# Jetbrains -.idea -modules/ -*.swp - -# VsCode -.vscode - -# pipenv -Pipfile -Pipfile.lock - -# pyright -pyrightconfig.json diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/BUILD b/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/BUILD deleted file mode 100644 index 0896ca890d8bf..0000000000000 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/BUILD +++ /dev/null @@ -1,3 +0,0 @@ -poetry_requirements( - name="poetry", -) diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/Makefile b/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/Makefile deleted file mode 100644 index b9eab05aa3706..0000000000000 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -GIT_ROOT ?= $(shell git rev-parse --show-toplevel) - -help: ## Show all Makefile targets. - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' - -format: ## Run code autoformatters (black). - pre-commit install - git ls-files | xargs pre-commit run black --files - -lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy - pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files - -test: ## Run tests via pytest. - pytest tests - -watch-docs: ## Build and watch documentation. - sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/ diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/README.md b/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/README.md deleted file mode 100644 index 1b504826a30b5..0000000000000 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/README.md +++ /dev/null @@ -1 +0,0 @@ -# LlamaIndex Vector_Stores Integration: Chatgpt Plugin diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/llama_index/BUILD b/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/llama_index/BUILD deleted file mode 100644 index 0d583429dff79..0000000000000 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/llama_index/BUILD +++ /dev/null @@ -1,4 +0,0 @@ -resource( - name="py_typed", - source="py.typed", -) diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/llama_index/py.typed b/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/llama_index/py.typed deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/llama_index/vector_stores/chatgpt_plugin/BUILD b/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/llama_index/vector_stores/chatgpt_plugin/BUILD deleted file mode 100644 index db46e8d6c978c..0000000000000 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/llama_index/vector_stores/chatgpt_plugin/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_sources() diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/llama_index/vector_stores/chatgpt_plugin/__init__.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/llama_index/vector_stores/chatgpt_plugin/__init__.py deleted file mode 100644 index 4f1d0412da9c7..0000000000000 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/llama_index/vector_stores/chatgpt_plugin/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from llama_index.vector_stores.chatgpt_plugin.base import ChatGPTRetrievalPluginClient - -__all__ = ["ChatGPTRetrievalPluginClient"] diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/llama_index/vector_stores/chatgpt_plugin/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/llama_index/vector_stores/chatgpt_plugin/base.py deleted file mode 100644 index 0dbe5355c420f..0000000000000 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/llama_index/vector_stores/chatgpt_plugin/base.py +++ /dev/null @@ -1,189 +0,0 @@ -"""ChatGPT Plugin vector store.""" - -import os -from typing import Any, Dict, List, Optional - -import requests - -from llama_index.core.bridge.pydantic import PrivateAttr -from llama_index.core.schema import ( - BaseNode, - MetadataMode, - NodeRelationship, - RelatedNodeInfo, - TextNode, -) -from llama_index.core.utils import get_tqdm_iterable -from llama_index.core.vector_stores.types import ( - BasePydanticVectorStore, - VectorStoreQuery, - VectorStoreQueryResult, -) -from requests.adapters import HTTPAdapter, Retry - - -def convert_docs_to_json(nodes: List[BaseNode]) -> List[Dict]: - """Convert docs to JSON.""" - docs = [] - for node in nodes: - # TODO: add information for other fields as well - # fields taken from - # https://rb.gy/nmac9u - doc_dict = { - "id": node.node_id, - "text": node.get_content(metadata_mode=MetadataMode.NONE), - # NOTE: this is the doc_id to reference document - "source_id": node.ref_doc_id, - # "url": "...", - # "created_at": ..., - # "author": "..."", - } - metadata = node.metadata - if metadata is not None: - if "source" in metadata: - doc_dict["source"] = metadata["source"] - if "source_id" in metadata: - doc_dict["source_id"] = metadata["source_id"] - if "url" in metadata: - doc_dict["url"] = metadata["url"] - if "created_at" in metadata: - doc_dict["created_at"] = metadata["created_at"] - if "author" in metadata: - doc_dict["author"] = metadata["author"] - - docs.append(doc_dict) - return docs - - -class ChatGPTRetrievalPluginClient(BasePydanticVectorStore): - """ChatGPT Retrieval Plugin Client. - - In this client, we make use of the endpoints defined by ChatGPT. - - Args: - endpoint_url (str): URL of the ChatGPT Retrieval Plugin. - bearer_token (Optional[str]): Bearer token for the ChatGPT Retrieval Plugin. - retries (Optional[Retry]): Retry object for the ChatGPT Retrieval Plugin. - batch_size (int): Batch size for the ChatGPT Retrieval Plugin. - """ - - stores_text: bool = True - is_embedding_query: bool = False - - _endpoint_url: str = PrivateAttr() - _bearer_token: Optional[str] = PrivateAttr() - _retries: Optional[Retry] = PrivateAttr() - _batch_size: int = PrivateAttr() - _s: requests.Session = PrivateAttr() - - def __init__( - self, - endpoint_url: str, - bearer_token: Optional[str] = None, - retries: Optional[Retry] = None, - batch_size: int = 100, - **kwargs: Any, - ) -> None: - """Initialize params.""" - super().__init__() - - self._endpoint_url = endpoint_url - self._bearer_token = bearer_token or os.getenv("BEARER_TOKEN") - self._retries = retries - self._batch_size = batch_size - - self._s = requests.Session() - self._s.mount("http://", HTTPAdapter(max_retries=self._retries)) - - @classmethod - def class_name(cls) -> str: - return "ChatGPTRetrievalPluginClient" - - @property - def client(self) -> None: - """Get client.""" - return - - def add( - self, - nodes: List[BaseNode], - **add_kwargs: Any, - ) -> List[str]: - """Add nodes to index.""" - headers = {"Authorization": f"Bearer {self._bearer_token}"} - - docs_to_upload = convert_docs_to_json(nodes) - iterable_docs = get_tqdm_iterable( - range(0, len(docs_to_upload), self._batch_size), - show_progress=True, - desc="Uploading documents", - ) - for i in iterable_docs: - i_end = min(i + self._batch_size, len(docs_to_upload)) - self._s.post( - f"{self._endpoint_url}/upsert", - headers=headers, - json={"documents": docs_to_upload[i:i_end]}, - ) - - return [result.node_id for result in nodes] - - def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None: - """ - Delete nodes using with ref_doc_id. - - Args: - ref_doc_id (str): The doc_id of the document to delete. - - """ - headers = {"Authorization": f"Bearer {self._bearer_token}"} - self._s.post( - f"{self._endpoint_url}/delete", - headers=headers, - json={"ids": [ref_doc_id]}, - ) - - def query( - self, - query: VectorStoreQuery, - **kwargs: Any, - ) -> VectorStoreQueryResult: - """Get nodes for response.""" - if query.filters is not None: - raise ValueError("Metadata filters not implemented for ChatGPT Plugin yet.") - - if query.query_str is None: - raise ValueError("query_str must be provided") - headers = {"Authorization": f"Bearer {self._bearer_token}"} - # TODO: add metadata filter - queries = [{"query": query.query_str, "top_k": query.similarity_top_k}] - res = requests.post( - f"{self._endpoint_url}/query", headers=headers, json={"queries": queries} - ) - - nodes = [] - similarities = [] - ids = [] - for query_result in res.json()["results"]: - for result in query_result["results"]: - result_id = result["id"] - result_txt = result["text"] - result_score = result["score"] - result_ref_doc_id = result["source_id"] - node = TextNode( - id_=result_id, - text=result_txt, - relationships={ - NodeRelationship.SOURCE: RelatedNodeInfo( - node_id=result_ref_doc_id - ) - }, - ) - nodes.append(node) - similarities.append(result_score) - ids.append(result_id) - - # NOTE: there should only be one query - break - - return VectorStoreQueryResult(nodes=nodes, similarities=similarities, ids=ids) diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/pyproject.toml deleted file mode 100644 index 2d6e2729f00aa..0000000000000 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/pyproject.toml +++ /dev/null @@ -1,62 +0,0 @@ -[build-system] -build-backend = "poetry.core.masonry.api" -requires = ["poetry-core"] - -[tool.codespell] -check-filenames = true -check-hidden = true -skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" - -[tool.llamahub] -contains_example = false -import_path = "llama_index.vector_stores.chatgpt_plugin" - -[tool.llamahub.class_authors] -ChatGPTRetrievalPluginClient = "llama-index" - -[tool.mypy] -disallow_untyped_defs = true -exclude = ["_static", "build", "examples", "notebooks", "venv"] -ignore_missing_imports = true -python_version = "3.8" - -[tool.poetry] -authors = ["Your Name "] -description = "llama-index vector_stores chatgpt plugin integration" -exclude = ["**/BUILD"] -license = "MIT" -name = "llama-index-vector-stores-chatgpt-plugin" -readme = "README.md" -version = "0.3.0" - -[tool.poetry.dependencies] -python = ">=3.9,<4.0" -llama-index-core = "^0.12.0" - -[tool.poetry.group.dev.dependencies] -ipython = "8.10.0" -jupyter = "^1.0.0" -mypy = "0.991" -pre-commit = "3.2.0" -pylint = "2.15.10" -pytest = "7.2.1" -pytest-mock = "3.11.1" -ruff = "0.0.292" -tree-sitter-languages = "^1.8.0" -types-Deprecated = ">=0.1.0" -types-PyYAML = "^6.0.12.12" -types-protobuf = "^4.24.0.4" -types-redis = "4.5.5.0" -types-requests = "2.28.11.8" -types-setuptools = "67.1.0.0" - -[tool.poetry.group.dev.dependencies.black] -extras = ["jupyter"] -version = "<=23.9.1,>=23.7.0" - -[tool.poetry.group.dev.dependencies.codespell] -extras = ["toml"] -version = ">=v2.2.6" - -[[tool.poetry.packages]] -include = "llama_index/" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/tests/BUILD b/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/tests/BUILD deleted file mode 100644 index dabf212d7e716..0000000000000 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/tests/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_tests() diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/tests/__init__.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/tests/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/tests/test_vector_stores_chatgpt_plugin.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/tests/test_vector_stores_chatgpt_plugin.py deleted file mode 100644 index a119f0263bf00..0000000000000 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin/tests/test_vector_stores_chatgpt_plugin.py +++ /dev/null @@ -1,7 +0,0 @@ -from llama_index.core.vector_stores.types import BasePydanticVectorStore -from llama_index.vector_stores.chatgpt_plugin import ChatGPTRetrievalPluginClient - - -def test_class(): - names_of_base_classes = [b.__name__ for b in ChatGPTRetrievalPluginClient.__mro__] - assert BasePydanticVectorStore.__name__ in names_of_base_classes diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/.gitignore b/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/.gitignore deleted file mode 100644 index 990c18de22908..0000000000000 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/.gitignore +++ /dev/null @@ -1,153 +0,0 @@ -llama_index/_static -.DS_Store -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -bin/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -etc/ -include/ -lib/ -lib64/ -parts/ -sdist/ -share/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -.ruff_cache - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints -notebooks/ - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ -pyvenv.cfg - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# Jetbrains -.idea -modules/ -*.swp - -# VsCode -.vscode - -# pipenv -Pipfile -Pipfile.lock - -# pyright -pyrightconfig.json diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/BUILD b/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/BUILD deleted file mode 100644 index 0896ca890d8bf..0000000000000 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/BUILD +++ /dev/null @@ -1,3 +0,0 @@ -poetry_requirements( - name="poetry", -) diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/Makefile b/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/Makefile deleted file mode 100644 index b9eab05aa3706..0000000000000 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -GIT_ROOT ?= $(shell git rev-parse --show-toplevel) - -help: ## Show all Makefile targets. - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' - -format: ## Run code autoformatters (black). - pre-commit install - git ls-files | xargs pre-commit run black --files - -lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy - pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files - -test: ## Run tests via pytest. - pytest tests - -watch-docs: ## Build and watch documentation. - sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/ diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/README.md b/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/README.md deleted file mode 100644 index 4996a579992d7..0000000000000 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/README.md +++ /dev/null @@ -1 +0,0 @@ -# LlamaIndex Vector_Stores Integration: Metal diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/llama_index/BUILD b/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/llama_index/BUILD deleted file mode 100644 index 0d583429dff79..0000000000000 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/llama_index/BUILD +++ /dev/null @@ -1,4 +0,0 @@ -resource( - name="py_typed", - source="py.typed", -) diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/llama_index/py.typed b/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/llama_index/py.typed deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/llama_index/vector_stores/metal/BUILD b/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/llama_index/vector_stores/metal/BUILD deleted file mode 100644 index db46e8d6c978c..0000000000000 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/llama_index/vector_stores/metal/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_sources() diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/llama_index/vector_stores/metal/__init__.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/llama_index/vector_stores/metal/__init__.py deleted file mode 100644 index b6c8ec6b73813..0000000000000 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/llama_index/vector_stores/metal/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from llama_index.vector_stores.metal.base import MetalVectorStore - -__all__ = ["MetalVectorStore"] diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/llama_index/vector_stores/metal/base.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/llama_index/vector_stores/metal/base.py deleted file mode 100644 index 387389ec3ff17..0000000000000 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/llama_index/vector_stores/metal/base.py +++ /dev/null @@ -1,183 +0,0 @@ -import math -from typing import Any, List - -import metal_sdk # noqa -from llama_index.core.schema import BaseNode, MetadataMode, TextNode -from llama_index.core.vector_stores.types import ( - MetadataFilters, - BasePydanticVectorStore, - VectorStoreQuery, - VectorStoreQueryResult, -) -from llama_index.core.vector_stores.utils import ( - legacy_metadata_dict_to_node, - metadata_dict_to_node, - node_to_metadata_dict, -) -from metal_sdk.metal import Metal - - -def _to_metal_filters(standard_filters: MetadataFilters) -> list: - filters = [] - for filter in standard_filters.legacy_filters(): - filters.append( - { - "field": filter.key, - "value": filter.value, - } - ) - return filters - - -class MetalVectorStore(BasePydanticVectorStore): - """Metal Vector Store. - - Examples: - `pip install llama-index-vector-stores-metal` - - ```python - from llama_index.vector_stores.metal import MetalVectorStore - - # Sign up for Metal and generate API key and client ID - api_key = "your_api_key_here" - client_id = "your_client_id_here" - index_id = "your_index_id_here" - - # Initialize Metal Vector Store - vector_store = MetalVectorStore( - api_key=api_key, - client_id=client_id, - index_id=index_id, - ) - ``` - """ - - stores_text: bool = True - flat_metadata: bool = False - is_embedding_query: bool = True - - api_key: str - client_id: str - index_id: str - metal_client: Metal - - def __init__( - self, - api_key: str, - client_id: str, - index_id: str, - ): - """Init params.""" - super().__init__( - api_key=api_key, - client_id=client_id, - index_id=index_id, - metal_client=Metal(api_key, client_id, index_id), - ) - - @classmethod - def class_name(cls) -> str: - return "MetalVectorStore" - - def query(self, query: VectorStoreQuery, **kwargs: Any) -> VectorStoreQueryResult: - if query.filters is not None: - if "filters" in kwargs: - raise ValueError( - "Cannot specify filter via both query and kwargs. " - "Use kwargs only for metal specific items that are " - "not supported via the generic query interface." - ) - filters = _to_metal_filters(query.filters) - else: - filters = kwargs.get("filters", {}) - - payload = { - "embedding": query.query_embedding, # Query Embedding - "filters": filters, # Metadata Filters - } - response = self.metal_client.search(payload, limit=query.similarity_top_k) - - nodes = [] - ids = [] - similarities = [] - - for item in response["data"]: - text = item["text"] - id_ = item["id"] - - # load additional Node data - try: - node = metadata_dict_to_node(item["metadata"]) - node.text = text - except Exception: - # NOTE: deprecated legacy logic for backward compatibility - metadata, node_info, relationships = legacy_metadata_dict_to_node( - item["metadata"] - ) - - node = TextNode( - text=text, - id_=id_, - metadata=metadata, - start_char_idx=node_info.get("start", None), - end_char_idx=node_info.get("end", None), - relationships=relationships, - ) - - nodes.append(node) - ids.append(id_) - - similarity_score = 1.0 - math.exp(-item["dist"]) - similarities.append(similarity_score) - - return VectorStoreQueryResult(nodes=nodes, similarities=similarities, ids=ids) - - @property - def client(self) -> Any: - """Return Metal client.""" - return self.metal_client - - def add(self, nodes: List[BaseNode], **add_kwargs: Any) -> List[str]: - """Add nodes to index. - - Args: - nodes: List[BaseNode]: list of nodes with embeddings. - - """ - if not self.metal_client: - raise ValueError("metal_client not initialized") - - ids = [] - for node in nodes: - ids.append(node.node_id) - - metadata = {} - metadata["text"] = node.get_content(metadata_mode=MetadataMode.NONE) or "" - - additional_metadata = node_to_metadata_dict( - node, remove_text=True, flat_metadata=self.flat_metadata - ) - metadata.update(additional_metadata) - - payload = { - "embedding": node.get_embedding(), - "metadata": metadata, - "id": node.node_id, - } - - self.metal_client.index(payload) - - return ids - - def delete(self, ref_doc_id: str, **delete_kwargs: Any) -> None: - """ - Delete nodes using with ref_doc_id. - - Args: - ref_doc_id (str): The doc_id of the document to delete. - - """ - if not self.metal_client: - raise ValueError("metal_client not initialized") - - self.metal_client.deleteOne(ref_doc_id) diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/pyproject.toml b/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/pyproject.toml deleted file mode 100644 index 220f3b6ee728e..0000000000000 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/pyproject.toml +++ /dev/null @@ -1,63 +0,0 @@ -[build-system] -build-backend = "poetry.core.masonry.api" -requires = ["poetry-core"] - -[tool.codespell] -check-filenames = true -check-hidden = true -skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" - -[tool.llamahub] -contains_example = false -import_path = "llama_index.vector_stores.metal" - -[tool.llamahub.class_authors] -MetalVectorStore = "llama-index" - -[tool.mypy] -disallow_untyped_defs = true -exclude = ["_static", "build", "examples", "notebooks", "venv"] -ignore_missing_imports = true -python_version = "3.8" - -[tool.poetry] -authors = ["Your Name "] -description = "llama-index vector_stores metal integration" -exclude = ["**/BUILD"] -license = "MIT" -name = "llama-index-vector-stores-metal" -readme = "README.md" -version = "0.3.0" - -[tool.poetry.dependencies] -python = ">=3.9,<4.0" -metal-sdk = "^2.5.1" -llama-index-core = "^0.12.0" - -[tool.poetry.group.dev.dependencies] -ipython = "8.10.0" -jupyter = "^1.0.0" -mypy = "0.991" -pre-commit = "3.2.0" -pylint = "2.15.10" -pytest = "7.2.1" -pytest-mock = "3.11.1" -ruff = "0.0.292" -tree-sitter-languages = "^1.8.0" -types-Deprecated = ">=0.1.0" -types-PyYAML = "^6.0.12.12" -types-protobuf = "^4.24.0.4" -types-redis = "4.5.5.0" -types-requests = "2.28.11.8" -types-setuptools = "67.1.0.0" - -[tool.poetry.group.dev.dependencies.black] -extras = ["jupyter"] -version = "<=23.9.1,>=23.7.0" - -[tool.poetry.group.dev.dependencies.codespell] -extras = ["toml"] -version = ">=v2.2.6" - -[[tool.poetry.packages]] -include = "llama_index/" diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/tests/BUILD b/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/tests/BUILD deleted file mode 100644 index dabf212d7e716..0000000000000 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/tests/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_tests() diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/tests/__init__.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/tests/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/tests/test_vector_stores_metal.py b/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/tests/test_vector_stores_metal.py deleted file mode 100644 index dcd9b4255c1bb..0000000000000 --- a/llama-index-integrations/vector_stores/llama-index-vector-stores-metal/tests/test_vector_stores_metal.py +++ /dev/null @@ -1,7 +0,0 @@ -from llama_index.core.vector_stores.types import BasePydanticVectorStore -from llama_index.vector_stores.metal import MetalVectorStore - - -def test_class(): - names_of_base_classes = [b.__name__ for b in MetalVectorStore.__mro__] - assert BasePydanticVectorStore.__name__ in names_of_base_classes diff --git a/llama-index-packs/llama-index-packs-docugami-kg-rag/BUILD b/llama-index-packs/llama-index-packs-docugami-kg-rag/BUILD deleted file mode 100644 index 0896ca890d8bf..0000000000000 --- a/llama-index-packs/llama-index-packs-docugami-kg-rag/BUILD +++ /dev/null @@ -1,3 +0,0 @@ -poetry_requirements( - name="poetry", -) diff --git a/llama-index-packs/llama-index-packs-docugami-kg-rag/README.md b/llama-index-packs/llama-index-packs-docugami-kg-rag/README.md deleted file mode 100644 index e9fb32303b982..0000000000000 --- a/llama-index-packs/llama-index-packs-docugami-kg-rag/README.md +++ /dev/null @@ -1,45 +0,0 @@ -# Docugami KG-RAG Pack - -This LlamaPack provides an end-to-end Knowledge Graph Retrieval Augmented Generation flow using Docugami. - -## Process Documents in Docugami (before you use this template) - -Before you use this llamapack, you must have some documents already processed in Docugami. Here's what you need to get started: - -1. Create a [Docugami workspace](https://app.docugami.com/) (free trials available) -1. Create an access token via the Developer Playground for your workspace. [Detailed instructions](https://help.docugami.com/home/docugami-api). -1. Add your documents to Docugami for processing. There are two ways to do this: - - Upload via the simple Docugami web experience. [Detailed instructions](https://help.docugami.com/home/adding-documents). - - Upload via the Docugami API, specifically the [documents](https://api-docs.docugami.com/#tag/documents/operation/upload-document) endpoint. Code samples are available for python and JavaScript or you can use the [docugami](https://pypi.org/project/docugami/) python library. - -Once your documents are in Docugami, they are processed and organized into sets of similar documents, e.g. NDAs, Lease Agreements, and Service Agreements. Docugami is not limited to any particular types of documents, and the clusters created depend on your particular documents. You can [change the docset assignments](https://help.docugami.com/home/working-with-the-doc-sets-view) later if you wish. You can monitor file status in the simple Docugami webapp, or use a [webhook](https://api-docs.docugami.com/#tag/webhooks) to be informed when your documents are done processing. - -## Environment Variables - -You need to set some required environment variables before using your new app based on this template. These are used to index as well as run the application, and exceptions are raised if the following required environment variables are not set: - -1. `OPENAI_API_KEY`: from the OpenAI platform. -1. `DOCUGAMI_API_KEY`: from the [Docugami Developer Playground](https://help.docugami.com/home/docugami-api) - -```shell -export OPENAI_API_KEY=... -export DOCUGAMI_API_KEY=... -``` - -## Using the llamapack - -Once your documents are finished processing, you can build and use the agent by adding the following code - -```python -from llama_index.core.llama_pack import download_llama_pack - -# download and install dependencies -DocugamiKgRagPack = download_llama_pack( - "DocugamiKgRagPack", "./docugami_kg_rag" -) - -docset_id = ... -pack = DocugamiKgRagPack() -pack.build_agent_for_docset(docset_id) -pack.run(...) -``` diff --git a/llama-index-packs/llama-index-packs-docugami-kg-rag/examples/BUILD b/llama-index-packs/llama-index-packs-docugami-kg-rag/examples/BUILD deleted file mode 100644 index db46e8d6c978c..0000000000000 --- a/llama-index-packs/llama-index-packs-docugami-kg-rag/examples/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_sources() diff --git a/llama-index-packs/llama-index-packs-docugami-kg-rag/examples/example.py b/llama-index-packs/llama-index-packs-docugami-kg-rag/examples/example.py deleted file mode 100644 index a389f5c84e429..0000000000000 --- a/llama-index-packs/llama-index-packs-docugami-kg-rag/examples/example.py +++ /dev/null @@ -1,21 +0,0 @@ -# Required Environment Variables: OPENAI_API_KEY - -from llama_index.core.llama_pack import download_llama_pack - -# download and install dependencies -DocugamiKgRagPack = download_llama_pack("DocugamiKgRagPack", "./docugami_kg_rag") - -# create the pack -pack = DocugamiKgRagPack() - -# list the docsets in your Docugami organization and set the docset_id -pack.list_docset() -docset_id = "5bcy7abew0sd" - -pack.index_docset(docset_id) -pack.build_agent_for_docset(docset_id, use_reports=True) - -pack.run("What is the Early Bird Discount for a visit to Indonesia?") - -# A query that uses the Docugami reports to find more accurate answers -pack.run("List all the early bird discounts available") diff --git a/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/BUILD b/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/BUILD deleted file mode 100644 index db46e8d6c978c..0000000000000 --- a/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_sources() diff --git a/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/__init__.py b/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/__init__.py deleted file mode 100644 index c08d3e61c9cb3..0000000000000 --- a/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from llama_index.packs.docugami_kg_rag.base import ( - DocugamiKgRagPack, -) - -__all__ = ["DocugamiKgRagPack"] diff --git a/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/base.py b/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/base.py deleted file mode 100644 index 128b661cf503e..0000000000000 --- a/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/base.py +++ /dev/null @@ -1,97 +0,0 @@ -from typing import Dict, Any, List - -from docugami import Docugami -from llama_index.core.tools import BaseTool -from llama_index.core.llama_pack import BaseLlamaPack -from llama_index.core.agent import ReActAgent - -from llama_index.packs.docugami_kg_rag.helpers.prompts import ASSISTANT_SYSTEM_MESSAGE -from llama_index.packs.docugami_kg_rag.config import ( - LARGE_CONTEXT_INSTRUCT_LLM, - DEFAULT_USE_REPORTS, -) -from llama_index.packs.docugami_kg_rag.helpers.indexing import ( - read_all_local_index_state, - index_docset, -) -from llama_index.packs.docugami_kg_rag.helpers.reports import ( - get_retrieval_tool_for_report, -) -from llama_index.packs.docugami_kg_rag.helpers.retrieval import ( - get_retrieval_tool_for_docset, -) - - -class DocugamiKgRagPack(BaseLlamaPack): - """Docugami KG-RAG Pack. - - A pack for performing evaluation with your own RAG pipeline. - - """ - - def __init__(self) -> None: - self.docugami_client = Docugami() - - def list_docsets(self): - """ - List your Docugami docsets and their docset name and ids. - """ - docsets_response = self.docugami_client.docsets.list() - for idx, docset in enumerate(docsets_response.docsets, start=1): - print(f"{idx}: {docset.name} (ID: {docset.id})") - - def index_docset(self, docset_id: str, overwrite: bool = False): - """ - Build the index for the docset and create the agent for it. - """ - docsets_response = self.docugami_client.docsets.list() - docset = next( - (docset for docset in docsets_response.docsets if docset.id == docset_id), - None, - ) - - if not docset: - raise Exception( - f"Docset with id {docset_id} does not exist in your workspace" - ) - - index_docset(docset_id, docset.name, overwrite) - - def build_agent_for_docset( - self, docset_id: str, use_reports: bool = DEFAULT_USE_REPORTS - ): - local_state = read_all_local_index_state() - - tools: List[BaseTool] = [] - for docset_id in local_state: - docset_state = local_state[docset_id] - direct_retrieval_tool = get_retrieval_tool_for_docset( - docset_id, docset_state - ) - if direct_retrieval_tool: - # Direct retrieval tool for each indexed docset (direct KG-RAG against semantic XML) - tools.append(direct_retrieval_tool) - - if use_reports: - for report in docset_state.reports: - # Report retrieval tool for each published report (user-curated views on semantic XML) - report_retrieval_tool = get_retrieval_tool_for_report(report) - if report_retrieval_tool: - tools.append(report_retrieval_tool) - - self.agent = ReActAgent.from_tools( - tools, - llm=LARGE_CONTEXT_INSTRUCT_LLM, - verbose=True, - context=ASSISTANT_SYSTEM_MESSAGE, - ) - - def get_modules(self) -> Dict[str, Any]: - """Get modules.""" - return { - "agent": self.agent, - } - - def run(self, *args: Any, **kwargs: Any) -> Any: - """Run the pipeline.""" - return self.agent.query(*args, **kwargs) diff --git a/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/config/BUILD b/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/config/BUILD deleted file mode 100644 index db46e8d6c978c..0000000000000 --- a/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/config/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_sources() diff --git a/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/config/__init__.py b/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/config/__init__.py deleted file mode 100644 index 920410da7806e..0000000000000 --- a/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/config/__init__.py +++ /dev/null @@ -1,50 +0,0 @@ -import os -from pathlib import Path - -from llama_index.embeddings.openai import OpenAIEmbedding -from llama_index.llms.openai import OpenAI - -# Docugami API Key -DOCUGAMI_API_KEY = os.environ.get("DOCUGAMI_API_KEY") -if not DOCUGAMI_API_KEY: - raise Exception("Please set the DOCUGAMI_API_KEY environment variable") - -# Language Models -LARGE_CONTEXT_INSTRUCT_LLM = OpenAI( - temperature=0.5, model="gpt-4-turbo-preview", cache=True -) # 128k tokens -SMALL_CONTEXT_INSTRUCT_LLM = OpenAI( - temperature=0.5, model="gpt-3.5-turbo-1106", cache=True -) # 16k tokens - -# Embeddings -EMBEDDINGS = OpenAIEmbedding(model="text-embedding-ada-002") - -MIN_LENGTH_TO_SUMMARIZE = 2048 # chunks and docs below this length are embedded as-is -MAX_FULL_DOCUMENT_TEXT_LENGTH = 1024 * 56 # ~14k tokens -MAX_CHUNK_TEXT_LENGTH = 1024 * 26 # ~6.5k tokens -MIN_CHUNK_TEXT_LENGTH = 1024 * 6 # ~1.5k tokens -SUB_CHUNK_TABLES = False -INCLUDE_XML_TAGS = True -PARENT_HIERARCHY_LEVELS = 2 -RETRIEVER_K = 8 - -DEFAULT_USE_REPORTS = False -AGENT_MAX_ITERATIONS = 5 - -# Metadata keys -PARENT_DOC_ID_KEY = "doc_id" -FULL_DOC_SUMMARY_ID_KEY = "full_doc_id" -SOURCE_KEY = "name" - -# Directories -INDEXING_LOCAL_STATE_PATH = os.environ.get( - "INDEXING_LOCAL_STATE_PATH", "/tmp/docugami/indexing_local_state.pkl" -) -os.makedirs(Path(INDEXING_LOCAL_STATE_PATH).parent, exist_ok=True) - -REPORT_DIRECTORY = os.environ.get("REPORT_DIRECTORY", "/tmp/docugami/report_dbs") -os.makedirs(Path(REPORT_DIRECTORY).parent, exist_ok=True) - -CHROMA_DIRECTORY = Path("/tmp/docugami/chroma_db") -CHROMA_DIRECTORY.mkdir(parents=True, exist_ok=True) diff --git a/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/BUILD b/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/BUILD deleted file mode 100644 index db46e8d6c978c..0000000000000 --- a/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_sources() diff --git a/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/__init__.py b/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/fused_summary_retriever.py b/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/fused_summary_retriever.py deleted file mode 100644 index 32d45e9f80b4c..0000000000000 --- a/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/fused_summary_retriever.py +++ /dev/null @@ -1,183 +0,0 @@ -from typing import Dict, Optional - -from llama_index.core.vector_stores.types import ( - VectorStoreQueryMode, - VectorStoreQueryResult, - VectorStoreQuery, -) - -from llama_index.core.callbacks.base import CallbackManager -from llama_index.core.schema import IndexNode -from llama_index.vector_stores.chroma import ChromaVectorStore - -from llama_index.core.retrievers import BaseRetriever - -from llama_index.core.readers import Document - -from dataclasses import dataclass -from typing import List - -from llama_index.packs.docugami_kg_rag.config import ( - RETRIEVER_K, - FULL_DOC_SUMMARY_ID_KEY, - SOURCE_KEY, - PARENT_DOC_ID_KEY, - EMBEDDINGS, -) -from llama_index.core import QueryBundle - -from llama_index.core.schema import NodeWithScore - -DOCUMENT_SUMMARY_TEMPLATE: str = """ --------------------------------- -**** DOCUMENT NAME: {doc_name} - -**** DOCUMENT SUMMARY: -{summary} - -**** RELEVANT FRAGMENTS: -{fragments} --------------------------------- -""" - - -@dataclass -class FusedDocumentElements: - score: float - summary: str - fragments: List[str] - source: str - - -class FusedSummaryRetriever(BaseRetriever): - """ - Retrieves a fused document that includes pre-calculated summaries. - - - Full document summaries are included in the fused document to give - broader context to the LLM, which may not be in the retrieved chunks - - - Chunk summaries are using to improve retrieval, i.e. "big-to-small" - retrieval which is a common use case with the multi-vector retriever - """ - - vectorstore: ChromaVectorStore - """The underlying vectorstore to use to store small chunks - and their embedding vectors.""" - - full_doc_summary_store: Dict[str, Document] - """The storage layer for the parent document summaries.""" - - parent_doc_store: Dict[str, Document] - """The storage layer for the parent (original) docs for summaries in - the vector store.""" - - parent_id_key: str = PARENT_DOC_ID_KEY - """Metadata key for parent doc ID (maps chunk summaries in the vector - store to parent docs).""" - - full_doc_summary_id_key: str = FULL_DOC_SUMMARY_ID_KEY - """Metadata key for full doc summary ID (maps chunk summaries in the - vector store to full doc summaries).""" - - source_key: str = SOURCE_KEY - """Metadata key for source document of chunks.""" - - search_type: VectorStoreQueryMode = VectorStoreQueryMode.DEFAULT - """Type of search to perform (similarity (default)/ mmr / etc.)""" - - def __init__( - self, - vectorstore: ChromaVectorStore, - full_doc_summary_store: Dict[str, Document], - parent_doc_store: Dict[str, Document], - search_type: VectorStoreQueryMode, - callback_manager: Optional[CallbackManager] = None, - object_map: Optional[Dict] = None, - objects: Optional[List[IndexNode]] = None, - verbose: bool = False, - ): - super().__init__( - callback_manager, - object_map, - objects, - verbose, - ) - - self.vectorstore = vectorstore - self.full_doc_summary_store = full_doc_summary_store - self.parent_doc_store = parent_doc_store - self.search_type = search_type - - def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]: - """Get documents relevant to a query. - - Args: - query: String to find relevant documents for - run_manager: The callbacks handler to use - Returns: - List of relevant documents - """ - query = VectorStoreQuery( - query_embedding=EMBEDDINGS.get_text_embedding(query_bundle.query_str), - similarity_top_k=RETRIEVER_K, - query_str=query_bundle.query_str, - mode=self.search_type, - ) - - query_result: VectorStoreQueryResult = self.vectorstore.query(query) - fused_doc_elements: Dict[str, FusedDocumentElements] = {} - - for i in range(RETRIEVER_K): - node = query_result.nodes[i] - - parent_id = node.metadata.get(self.parent_id_key) - full_doc_summary_id = node.metadata.get(self.full_doc_summary_id_key) - - if parent_id and full_doc_summary_id: - parent_in_store = self.parent_doc_store.get(parent_id) - full_doc_summary_in_store = self.full_doc_summary_store.get( - full_doc_summary_id - ) - if parent_in_store and full_doc_summary_in_store: - parent: Document = parent_in_store - full_doc_summary: str = full_doc_summary_in_store - - else: - raise Exception( - f"No parent or full doc summary found for retrieved doc {node}," - "please pre-load parent and full doc summaries." - ) - - source = node.metadata.get(self.source_key) - if not source: - raise Exception( - f"No source doc name found in metadata for: {node}." - ) - - if full_doc_summary_id not in fused_doc_elements: - # Init fused parent with information from most relevant sub-doc - fused_doc_elements[full_doc_summary_id] = FusedDocumentElements( - score=query_result.similarities[i], - summary=full_doc_summary, - fragments=[parent.text], - source=source, - ) - else: - fused_doc_elements[full_doc_summary_id].fragments.append( - parent.text - ) - - fused_docs: List[NodeWithScore] = [] - for element in sorted(fused_doc_elements.values(), key=lambda x: x.score): - fragments_str = "\n\n".join([d.strip() for d in element.fragments]) - fused_doc = Document( - text=DOCUMENT_SUMMARY_TEMPLATE.format( - doc_name=element.source, - summary=element.summary, - fragments=fragments_str, - ) - ) - - fused_docs.append(NodeWithScore(node=fused_doc, score=element.score)) - - return fused_docs diff --git a/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/indexing.py b/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/indexing.py deleted file mode 100644 index 64784def42a49..0000000000000 --- a/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/indexing.py +++ /dev/null @@ -1,189 +0,0 @@ -import hashlib -import os -from pathlib import Path -import pickle -from typing import Dict, List - -from llama_index.core import StorageContext, VectorStoreIndex -from llama_index.vector_stores.chroma import ChromaVectorStore - -from llama_index.packs.docugami_kg_rag.helpers.reports import ( - ReportDetails, - build_report_details, -) - -from llama_index.packs.docugami_kg_rag.config import ( - CHROMA_DIRECTORY, - EMBEDDINGS, - FULL_DOC_SUMMARY_ID_KEY, - INCLUDE_XML_TAGS, - MAX_CHUNK_TEXT_LENGTH, - MIN_CHUNK_TEXT_LENGTH, - PARENT_DOC_ID_KEY, - PARENT_HIERARCHY_LEVELS, - SOURCE_KEY, - INDEXING_LOCAL_STATE_PATH, - SUB_CHUNK_TABLES, -) -import chromadb -from llama_index.readers.docugami import DocugamiReader -from llama_index.core.readers import Document - -from llama_index.packs.docugami_kg_rag.helpers.summaries import ( - build_chunk_summary_mappings, - build_full_doc_summary_mappings, -) - -from llama_index.packs.docugami_kg_rag.helpers.retrieval import ( - LocalIndexState, - docset_name_to_direct_retriever_tool_function_name, - chunks_to_direct_retriever_tool_description, -) - - -def read_all_local_index_state() -> Dict[str, LocalIndexState]: - if not Path(INDEXING_LOCAL_STATE_PATH).is_file(): - return {} # not found - - with open(INDEXING_LOCAL_STATE_PATH, "rb") as file: - return pickle.load(file) - - -def update_local_index( - docset_id: str, - full_doc_summaries_by_id: Dict[str, Document], - chunks_by_id: Dict[str, Document], - direct_tool_function_name: str, - direct_tool_description: str, - report_details: List[ReportDetails], -): - """ - Read and update local index. - """ - state = read_all_local_index_state() - - doc_index_state = LocalIndexState( - full_doc_summaries_by_id=full_doc_summaries_by_id, - chunks_by_id=chunks_by_id, - retrieval_tool_function_name=direct_tool_function_name, - retrieval_tool_description=direct_tool_description, - reports=report_details, - ) - state[docset_id] = doc_index_state - - # Serialize state to disk (Deserialized in chain) - store_local_path = Path(INDEXING_LOCAL_STATE_PATH) - os.makedirs(os.path.dirname(store_local_path), exist_ok=True) - with open(store_local_path, "wb") as file: - pickle.dump(state, file) - - -def index_docset(docset_id: str, name: str, overwrite=False): - """ - Indexes the given docset. - """ - print(f"Indexing {name} (ID: {docset_id})") - - loader = DocugamiReader( - min_text_length=MIN_CHUNK_TEXT_LENGTH, - max_text_length=MAX_CHUNK_TEXT_LENGTH, - sub_chunk_tables=SUB_CHUNK_TABLES, - include_xml_tags=INCLUDE_XML_TAGS, - parent_hierarchy_levels=PARENT_HIERARCHY_LEVELS, - parent_id_key=PARENT_DOC_ID_KEY, - include_project_metadata_in_doc_metadata=False, # not used, so lighten the vector index - ) - chunks = loader.load_data(docset_id=docset_id) - - # Build separate maps of chunks, and parents - parent_chunks_by_id: Dict[str, Document] = {} - chunks_by_source: Dict[str, List[str]] = {} - for chunk in chunks: - chunk_id = str(chunk.metadata.get("id")) - chunk_source = str(chunk.metadata.get(SOURCE_KEY)) - parent_chunk_id = chunk.metadata.get(loader.parent_id_key) - if not parent_chunk_id: - # parent chunk, we will use this (for expanded context) as our chunk - parent_chunks_by_id[chunk_id] = chunk - else: - # child chunk, we will keep track of this to build up our - # full document summary - if chunk_source not in chunks_by_source: - chunks_by_source[chunk_source] = [] - - chunks_by_source[chunk_source].append(chunk.text) - - # Build up the full docs by concatenating all the child chunks - full_docs_by_id: Dict[str, Document] = {} - full_doc_ids_by_source: Dict[str, str] = {} - for source in chunks_by_source: - chunks_from_source = chunks_by_source[source] - full_doc_text = "\n".join(c for c in chunks_from_source) - full_doc_id = hashlib.md5(full_doc_text.encode()).hexdigest() - full_doc_ids_by_source[source] = full_doc_id - full_docs_by_id[full_doc_id] = Document( - text=full_doc_text, metadata={"id": full_doc_id} - ) - - # Associate parent chunks with full docs - for parent_chunk_id in parent_chunks_by_id: - parent_chunk = parent_chunks_by_id[parent_chunk_id] - parent_chunk_source = parent_chunk.metadata.get(SOURCE_KEY) - if parent_chunk_source: - full_doc_id = full_doc_ids_by_source.get(parent_chunk_source) - if full_doc_id: - parent_chunk.metadata[FULL_DOC_SUMMARY_ID_KEY] = full_doc_id - - full_doc_summaries_by_id = build_full_doc_summary_mappings(full_docs_by_id) - chunk_summaries_by_id = build_chunk_summary_mappings(parent_chunks_by_id) - - direct_tool_function_name = docset_name_to_direct_retriever_tool_function_name(name) - direct_tool_description = chunks_to_direct_retriever_tool_description( - name, list(parent_chunks_by_id.values()) - ) - report_details = build_report_details(docset_id) - - if overwrite: - state = Path(INDEXING_LOCAL_STATE_PATH) - if state.is_file() and state.exists(): - os.remove(state) - - update_local_index( - docset_id=docset_id, - full_doc_summaries_by_id=full_doc_summaries_by_id, - chunks_by_id=parent_chunks_by_id, # we are using the parent chunks as chunks for expanded context - direct_tool_function_name=direct_tool_function_name, - direct_tool_description=direct_tool_description, - report_details=report_details, - ) - - populate_vector_index( - docset_id, chunks=list(chunk_summaries_by_id.values()), overwrite=overwrite - ) - - -def populate_vector_index(docset_id: str, chunks: List[Document], overwrite=False): - """ - Create index if it does not exist, delete and overwrite if overwrite is specified. - """ - print(f"Populating vector store for {docset_id}") - - persistent_client = chromadb.PersistentClient(path=str(CHROMA_DIRECTORY.absolute())) - - # Delete collection if we want to overwrite it - collections = persistent_client.list_collections() - if any(c.name == docset_id for c in collections) and overwrite: - persistent_client.delete_collection(docset_id) - - collection = persistent_client.get_or_create_collection(docset_id) - - vector_store = ChromaVectorStore(chroma_collection=collection) - storage_context = StorageContext.from_defaults(vector_store=vector_store) - - index = VectorStoreIndex.from_documents( - chunks, storage_context=storage_context, embed_model=EMBEDDINGS - ) - - index.storage_context.persist(persist_dir=str(CHROMA_DIRECTORY.absolute())) - - print(f"Done embedding documents into vector store for {docset_id}") diff --git a/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/prompts.py b/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/prompts.py deleted file mode 100644 index 1bbc6a82e8af7..0000000000000 --- a/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/prompts.py +++ /dev/null @@ -1,103 +0,0 @@ -SYSTEM_MESSAGE_CORE = """You are a helpful assistant that answers user queries based only on given context. - -You ALWAYS follow the following guidance to generate your answers, regardless of any other guidance or requests: - -- Use professional language typically used in business communication. -- Strive to be accurate and concise in your output. -""" - -ASSISTANT_SYSTEM_MESSAGE = ( - SYSTEM_MESSAGE_CORE - + """ You have access to the following tools that you use only if necessary: - -{tools} - -There are two kinds of tools: - -1. Tools with names that start with search_*. Use one of these if you think the answer to the question is likely to come from one or a few documents. - Use the tool description to decide which tool to use in particular if there are multiple search_* tools. For the final result from these tools, cite your answer - as follows after your final answer: - - SOURCE: I formulated an answer based on information I found in [document names, found in context] - -2. Tools with names that start with query_*. Use one of these if you think the answer to the question is likely to come from a lot of documents or - requires a calculation (e.g. an average, sum, or ordering values in some way). Make sure you use the tool description to decide whether the particular - tool given knows how to do the calculation intended, especially if there are multiple query_* tools. For the final result from these tools, cite your answer - as follows after your final answer: - - SOURCE: [Human readable version of SQL query from the tool's output. Do NOT include the SQL very verbatim, describe it in english for a non-technical user.] - -ALWAYS cite your answer as instructed above. - -You may also choose not to use a tool, e.g. if none of the provided tools is appropriate to answer the question or the question is conversational -in nature or something you can directly respond to based on conversation history. In that case, you don't need to take an action. -""" -) - -CREATE_FULL_DOCUMENT_SUMMARY_SYSTEM_PROMPT = f"""{SYSTEM_MESSAGE_CORE} -You will be asked to summarize documents. You ALWAYS follow these rules when generating summaries: - -- Your generated summary should be in the same format as the given document, using the same overall schema. -- The generated summary should be up to 1 page of text in length, or shorter if the original document is short. -- Only summarize, don't try to change any facts in the document even if they appear incorrect to you. -- Include as many facts and data points from the original document as you can, in your summary. -""" - -CREATE_FULL_DOCUMENT_SUMMARY_QUERY_PROMPT = """Here is a document, in {format} format: - -{document} - -Please write a detailed summary of the given document. - -Respond only with the summary and no other language before or after. -""" - -CREATE_CHUNK_SUMMARY_SYSTEM_PROMPT = f"""{SYSTEM_MESSAGE_CORE} -You will be asked to summarize chunks of documents. You ALWAYS follow these rules when generating summaries: - -- Your generated summary should be in the same format as the given document, using the same overall schema. -- The generated summary will be embedded and used to retrieve the raw text or table elements from a vector database. -- Only summarize, don't try to change any facts in the chunk even if they appear incorrect to you. -- Include as many facts and data points from the original chunk as you can, in your summary. -- Pay special attention to monetary amounts, dates, names of people and companies, etc and include in your summary. -""" - -CREATE_CHUNK_SUMMARY_QUERY_PROMPT = """Here is a chunk from a document, in {format} format: - -{document} - -Respond only with the summary and no other language before or after. -""" - -CREATE_DIRECT_RETRIEVAL_TOOL_SYSTEM_PROMPT = f"""{SYSTEM_MESSAGE_CORE} -You will be asked to write short generate descriptions of document types, given a particular sample document -as a guide. You ALWAYS follow these rules when generating descriptions: - -- Make sure your description is text only, regardless of any markup in the given sample document. -- The generated description must apply to all documents of the given type, similar to the sample - document given, not just the exact same document. -- The generated description will be used to describe this type of document in general in a product. When users ask - a question, an AI agent will use the description you produce to decide whether the - answer for that question is likely to be found in this type of document or not. -- Do NOT include any data or details from this particular sample document but DO use this sample - document to get a better understanding of what types of information this type of document might contain. -- The generated description should be very short and up to 2 sentences max. - -""" - -CREATE_DIRECT_RETRIEVAL_TOOL_DESCRIPTION_QUERY_PROMPT = """Here is a snippet from a sample document of type {docset_name}: - -{document} - -Please write a short general description of the given document type, using the given sample as a guide. - -Respond only with the requested general description of the document type and no other language before or after. -""" - -EXPLAINED_QUERY_PROMPT = f"""{SYSTEM_MESSAGE_CORE} -Given the following user question, corresponding SQL query, and SQL result, answer the user question. - - Question: {{question}} - SQL Query: {{query}} - SQL Result: {{result}} - Answer:""" diff --git a/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/reports.py b/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/reports.py deleted file mode 100644 index 67010b7ee6fd8..0000000000000 --- a/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/reports.py +++ /dev/null @@ -1,183 +0,0 @@ -from dataclasses import dataclass -import os -from pathlib import Path -from typing import List, Optional, Union -import re -import pandas as pd -import requests -import sqlite3 -import tempfile -from llama_index.packs.docugami_kg_rag.config import REPORT_DIRECTORY, DOCUGAMI_API_KEY - -from docugami import Docugami -from llama_index.core import SQLDatabase -from llama_index.core.query_engine import NLSQLTableQueryEngine -from llama_index.core.tools import BaseTool, QueryEngineTool, ToolMetadata - -HEADERS = {"Authorization": f"Bearer {DOCUGAMI_API_KEY}"} - - -@dataclass -class ReportDetails: - id: str - """ID of report.""" - - name: str - """Name of report.""" - - local_xlsx_path: Path - """Local path to XLSX of the report.""" - - retrieval_tool_function_name: str - """Function name for retrieval tool e.g. sql_query_earnings_calls.""" - - retrieval_tool_description: str - """ - Description of retrieval tool e.g. Runs a SQL query over the REPORT_NAME report, - represented as the following SQL Table... etc.""" - - -def report_name_to_report_query_tool_function_name(name: str) -> str: - """ - Converts a report name to a report query tool function name. - - Report query tool function names follow these conventions: - 1. Retrieval tool function names always start with "query_". - 2. The rest of the name should be a lowercased string, with underscores for whitespace. - 3. Exclude any characters other than a-z (lowercase) from the function name, replacing them with underscores. - 4. The final function name should not have more than one underscore together. - - >>> report_name_to_report_query_tool_function_name('Earnings Calls') - 'query_earnings_calls' - >>> report_name_to_report_query_tool_function_name('COVID-19 Statistics') - 'query_covid_19_statistics' - >>> report_name_to_report_query_tool_function_name('2023 Market Report!!!') - 'query_2023_market_report' - """ - # Replace non-letter characters with underscores and remove extra whitespaces - name = re.sub(r"[^a-z\d]", "_", name.lower()) - # Replace whitespace with underscores and remove consecutive underscores - name = re.sub(r"\s+", "_", name) - name = re.sub(r"_{2,}", "_", name) - name = name.strip("_") - - return f"query_{name}" - - -def report_details_to_report_query_tool_description(name: str, table_info: str) -> str: - """ - Converts a set of chunks to a direct retriever tool description. - """ - table_info = re.sub(r"\s+", " ", table_info) - description = f"Given a single input 'query' parameter, runs a SQL query over the {name} report, represented as the following SQL Table:\n\n{table_info}" - - return description[:2048] # cap to avoid failures when the description is too long - - -def download_project_latest_xlsx(project_url: str, local_xlsx: Path) -> Optional[Path]: - response = requests.get( - f"{project_url}/artifacts/latest?name=spreadsheet.xlsx", - headers=HEADERS, - ) - - if response.ok: - response_json = response.json()["artifacts"] - xlsx_artifact = next( - ( - item - for item in response_json - if str(item["name"]).lower().endswith(".xlsx") - ), - None, - ) - if xlsx_artifact: - artifact_id = xlsx_artifact["id"] - response = requests.get( - f"{project_url}/artifacts/latest/{artifact_id}/content", - headers=HEADERS, - ) - if response.ok: - os.makedirs(str(local_xlsx.parent), exist_ok=True) - with open(local_xlsx, "wb") as f: - f.write(response.content) - return local_xlsx - - raise Exception(f"Failed to download XLSX for {project_url}") - elif response.status_code == 404: - return None # No artifacts found: this project has never been published - else: - raise Exception(f"Failed to download XLSX for {project_url}") - - -def connect_to_excel( - file_path: Union[Path, str], table_name: str, sample_rows_in_table_info=0 -) -> SQLDatabase: - conn = sqlite3.connect(":memory:") - - file_path = Path(file_path) - if not (file_path.exists() and file_path.suffix.lower() == ".xlsx"): - raise Exception(f"Invalid file path: {file_path}") - - df = pd.read_excel(file_path, sheet_name=0) - - df.to_sql(table_name, conn, if_exists="replace", index=False) - - temp_db_file = tempfile.NamedTemporaryFile(suffix=".sqlite") - with sqlite3.connect(temp_db_file.name) as disk_conn: - conn.backup(disk_conn) # dumps the connection to disk - - return SQLDatabase.from_uri( - f"sqlite:///{temp_db_file.name}", - sample_rows_in_table_info=sample_rows_in_table_info, - ) - - -def build_report_details(docset_id: str) -> List[ReportDetails]: - docugami_client = Docugami() - - projects_response = docugami_client.projects.list() - if not projects_response or not projects_response.projects: - return [] # no projects found - - projects = [p for p in projects_response.projects if p.docset.id == docset_id] - details: List[ReportDetails] = [] - for project in projects: - local_xlsx_path = download_project_latest_xlsx( - project.url, Path(REPORT_DIRECTORY) / f"{project.id}.xlsx" - ) - if local_xlsx_path: - report_name = project.name or local_xlsx_path.name - db = connect_to_excel(local_xlsx_path, report_name) - table_info = db.get_single_table_info(table_name=report_name) - details.append( - ReportDetails( - id=project.id, - name=report_name, - local_xlsx_path=local_xlsx_path, - retrieval_tool_function_name=report_name_to_report_query_tool_function_name( - project.name - ), - retrieval_tool_description=report_details_to_report_query_tool_description( - project.name, table_info - ), - ) - ) - - return details - - -def get_retrieval_tool_for_report(report_details: ReportDetails) -> Optional[BaseTool]: - if not report_details.local_xlsx_path: - return None - - db = connect_to_excel(report_details.local_xlsx_path, report_details.name) - - query_engine = NLSQLTableQueryEngine(sql_database=db, tables=[report_details.name]) - - return QueryEngineTool( - query_engine=query_engine, - metadata=ToolMetadata( - name=report_details.retrieval_tool_function_name, - description=report_details.retrieval_tool_description, - ), - ) diff --git a/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/retrieval.py b/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/retrieval.py deleted file mode 100644 index 49bfb48b294f6..0000000000000 --- a/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/retrieval.py +++ /dev/null @@ -1,126 +0,0 @@ -from typing import Dict, List, Optional -from dataclasses import dataclass - -from llama_index.packs.docugami_kg_rag.helpers.reports import ReportDetails -from llama_index.core.readers import Document -from llama_index.packs.docugami_kg_rag.config import ( - MAX_CHUNK_TEXT_LENGTH, - LARGE_CONTEXT_INSTRUCT_LLM, -) -import re -from llama_index.packs.docugami_kg_rag.helpers.prompts import ( - CREATE_DIRECT_RETRIEVAL_TOOL_DESCRIPTION_QUERY_PROMPT, - CREATE_DIRECT_RETRIEVAL_TOOL_SYSTEM_PROMPT, -) -from llama_index.core.vector_stores.types import VectorStoreQueryMode - -from llama_index.core.llms import ChatMessage, MessageRole -from llama_index.core.query_engine import RetrieverQueryEngine -from llama_index.core.tools import BaseTool, ToolMetadata, QueryEngineTool - -from llama_index.packs.docugami_kg_rag.helpers.vector_store import get_vector_store -from llama_index.packs.docugami_kg_rag.helpers.fused_summary_retriever import ( - FusedSummaryRetriever, -) - - -@dataclass -class LocalIndexState: - full_doc_summaries_by_id: Dict[str, Document] - """Mapping of ID to full document summaries.""" - - chunks_by_id: Dict[str, Document] - """Mapping of ID to chunks.""" - - retrieval_tool_function_name: str - """Function name for retrieval tool e.g. "search_earnings_calls.""" - - retrieval_tool_description: str - """Description of retrieval tool e.g. Searches for and returns chunks from earnings call documents.""" - - reports: List[ReportDetails] - """Details about any reports for this docset.""" - - -def docset_name_to_direct_retriever_tool_function_name(name: str) -> str: - """ - Converts a docset name to a direct retriever tool function name. - - Direct retriever tool function names follow these conventions: - 1. Retrieval tool function names always start with "search_". - 2. The rest of the name should be a lowercased string, with underscores for whitespace. - 3. Exclude any characters other than a-z (lowercase) from the function name, replacing them with underscores. - 4. The final function name should not have more than one underscore together. - - >>> docset_name_to_direct_retriever_tool_function_name('Earnings Calls') - 'search_earnings_calls' - >>> docset_name_to_direct_retriever_tool_function_name('COVID-19 Statistics') - 'search_covid_19_statistics' - >>> docset_name_to_direct_retriever_tool_function_name('2023 Market Report!!!') - 'search_2023_market_report' - """ - # Replace non-letter characters with underscores and remove extra whitespaces - name = re.sub(r"[^a-z\d]", "_", name.lower()) - # Replace whitespace with underscores and remove consecutive underscores - name = re.sub(r"\s+", "_", name) - name = re.sub(r"_{2,}", "_", name) - name = name.strip("_") - - return f"search_{name}" - - -def chunks_to_direct_retriever_tool_description(name: str, chunks: List[Document]): - """ - Converts a set of chunks to a direct retriever tool description. - """ - texts = [c.text for c in chunks[:100]] - document = "\n".join(texts)[:MAX_CHUNK_TEXT_LENGTH] - - chat_messages = [ - ChatMessage( - role=MessageRole.SYSTEM, - content=CREATE_DIRECT_RETRIEVAL_TOOL_SYSTEM_PROMPT, - ), - ChatMessage( - role=MessageRole.USER, - content=CREATE_DIRECT_RETRIEVAL_TOOL_DESCRIPTION_QUERY_PROMPT.format( - docset_name=name, document=document - ), - ), - ] - - summary = LARGE_CONTEXT_INSTRUCT_LLM.chat(chat_messages).message.content - - return f"Given a single input 'query' parameter, searches for and returns chunks from {name} documents. {summary}" - - -def get_retrieval_tool_for_docset( - docset_id: str, docset_state: LocalIndexState -) -> Optional[BaseTool]: - """ - Gets a retrieval tool for an agent. - """ - chunk_vectorstore = get_vector_store(docset_id) - - if not chunk_vectorstore: - return None - - retriever = FusedSummaryRetriever( - vectorstore=chunk_vectorstore, - parent_doc_store=docset_state.chunks_by_id, - full_doc_summary_store=docset_state.full_doc_summaries_by_id, - search_type=VectorStoreQueryMode.MMR, - ) - - if not retriever: - return None - - query_engine = RetrieverQueryEngine(retriever=retriever) - - return QueryEngineTool( - query_engine=query_engine, - metadata=ToolMetadata( - name=docset_state.retrieval_tool_function_name, - description=docset_state.retrieval_tool_description, - ), - ) diff --git a/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/summaries.py b/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/summaries.py deleted file mode 100644 index d78a74489d8b6..0000000000000 --- a/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/summaries.py +++ /dev/null @@ -1,113 +0,0 @@ -import hashlib -from typing import Dict - -from tqdm import tqdm - -from llama_index.llms.openai import OpenAI - -from llama_index.packs.docugami_kg_rag.config import ( - LARGE_CONTEXT_INSTRUCT_LLM, - MAX_CHUNK_TEXT_LENGTH, - INCLUDE_XML_TAGS, - MIN_LENGTH_TO_SUMMARIZE, - MAX_FULL_DOCUMENT_TEXT_LENGTH, - SMALL_CONTEXT_INSTRUCT_LLM, -) -from llama_index.core.readers import Document - -from llama_index.packs.docugami_kg_rag.helpers.prompts import ( - CREATE_FULL_DOCUMENT_SUMMARY_QUERY_PROMPT, - CREATE_FULL_DOCUMENT_SUMMARY_SYSTEM_PROMPT, - CREATE_CHUNK_SUMMARY_QUERY_PROMPT, - CREATE_CHUNK_SUMMARY_SYSTEM_PROMPT, -) -from llama_index.packs.docugami_kg_rag.config import PARENT_DOC_ID_KEY -from llama_index.core.llms import ChatMessage, MessageRole - -FORMAT = ( - "text" - if not INCLUDE_XML_TAGS - else "semantic XML without any namespaces or attributes" -) - - -def _build_summary_mappings( - docs_by_id: Dict[str, Document], - system_message: str, - prompt_template: str, - llm: OpenAI, - min_length_to_summarize=MIN_LENGTH_TO_SUMMARIZE, - max_length_cutoff=MAX_CHUNK_TEXT_LENGTH, - label="summaries", -) -> Dict[str, Document]: - """ - Build summaries for all the given documents. - """ - summaries: Dict[str, Document] = {} - - for id, doc in tqdm(docs_by_id.items(), desc=f"Building {label}", unit="chunks"): - text_content = doc.text[:max_length_cutoff] - - query_str = prompt_template.format(format=FORMAT, document=text_content) - - chat_messages = [ - ChatMessage( - role=MessageRole.SYSTEM, - content=system_message, - ), - ChatMessage(role=MessageRole.USER, content=query_str), - ] - # Only summarize when content is longer than min_length_to_summarize - summary_txt = ( - llm.chat(chat_messages).message.content - if len(text_content) < min_length_to_summarize - else text_content - ) - - # Create new hashed id for the summary and add original id as parent doc id - summaries[id] = summary_txt - summary_id = hashlib.md5(summary_txt.encode()).hexdigest() - meta = doc.metadata - meta["id"] = summary_id - meta[PARENT_DOC_ID_KEY] = id - - summaries[id] = Document( - text=summary_txt, - metadata=meta, - ) - - return summaries - - -def build_full_doc_summary_mappings( - docs_by_id: Dict[str, Document] -) -> Dict[str, Document]: - """ - Build summaries for all the given full documents. - """ - return _build_summary_mappings( - docs_by_id=docs_by_id, - system_message=CREATE_FULL_DOCUMENT_SUMMARY_SYSTEM_PROMPT, - prompt_template=CREATE_FULL_DOCUMENT_SUMMARY_QUERY_PROMPT, - llm=LARGE_CONTEXT_INSTRUCT_LLM, - min_length_to_summarize=MIN_LENGTH_TO_SUMMARIZE, - max_length_cutoff=MAX_FULL_DOCUMENT_TEXT_LENGTH, - label="full document summaries", - ) - - -def build_chunk_summary_mappings( - docs_by_id: Dict[str, Document] -) -> Dict[str, Document]: - """ - Build summaries for all the given chunks. - """ - return _build_summary_mappings( - docs_by_id=docs_by_id, - system_message=CREATE_CHUNK_SUMMARY_SYSTEM_PROMPT, - prompt_template=CREATE_CHUNK_SUMMARY_QUERY_PROMPT, - llm=SMALL_CONTEXT_INSTRUCT_LLM, - min_length_to_summarize=MIN_LENGTH_TO_SUMMARIZE, - max_length_cutoff=MAX_CHUNK_TEXT_LENGTH, - label="chunk summaries", - ) diff --git a/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/vector_store.py b/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/vector_store.py deleted file mode 100644 index 68052bc05d2a2..0000000000000 --- a/llama-index-packs/llama-index-packs-docugami-kg-rag/llama_index/packs/docugami_kg_rag/helpers/vector_store.py +++ /dev/null @@ -1,24 +0,0 @@ -from typing import Optional - -from llama_index.core import VectorStoreIndex -from llama_index.vector_stores.chroma import ChromaVectorStore - -from llama_index.packs.docugami_kg_rag.config import CHROMA_DIRECTORY, EMBEDDINGS - -import chromadb - - -def get_vector_store(docset_id) -> Optional[ChromaVectorStore]: - db = chromadb.PersistentClient(path=str(CHROMA_DIRECTORY.absolute())) - chroma_collection = db.get_or_create_collection(docset_id) - return ChromaVectorStore( - chroma_collection=chroma_collection, embed_model=EMBEDDINGS - ) - - -def get_vector_store_index(docset_id, embedding) -> Optional[VectorStoreIndex]: - vector_store = get_vector_store(docset_id) - return VectorStoreIndex.from_vector_store( - vector_store, - embed_model=embedding, - ) diff --git a/llama-index-packs/llama-index-packs-docugami-kg-rag/pyproject.toml b/llama-index-packs/llama-index-packs-docugami-kg-rag/pyproject.toml deleted file mode 100644 index 420f2888b5925..0000000000000 --- a/llama-index-packs/llama-index-packs-docugami-kg-rag/pyproject.toml +++ /dev/null @@ -1,49 +0,0 @@ -[build-system] -build-backend = "poetry.core.masonry.api" -requires = ["poetry-core"] - -[tool.codespell] -check-filenames = true -check-hidden = true -skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" - -[tool.llamahub] -contains_example = true -import_path = "llama_index.packs.docugami_kg_rag" - -[tool.llamahub.class_authors] -DocugamiKgRagPack = "docugami" - -[tool.poetry] -authors = [ - "Docugami ", -] -description = "This template contains a reference architecture for Retrieval Augmented Generation against a set of documents using Docugami's XML Knowledge Graph (KG-RAG)." -exclude = ["**/BUILD"] -keywords = ["infer", "rag", "retrieve", "retriever"] -name = "llama-index-packs-docugami-kg-rag" -readme = "README.md" -version = "0.3.0" - -[tool.poetry.dependencies] -python = "^3.9" -dgml-utils = "^0.3.2" -docugami = "^0.1.2" -lxml = "4.9.3" -openpyxl = "^3.1.2" -llama-index-embeddings-openai = "^0.3.0" -llama-index-llms-openai = "^0.3.0" -llama-index-vector-stores-chroma = "^0.4.0" -llama-index-readers-docugami = "^0.3.0" -pandas = "*" - -[tool.poetry.group.dev.dependencies.black] -extras = ["jupyter"] -version = "<=23.9.1,>=23.7.0" - -[tool.poetry.group.dev.dependencies.codespell] -extras = ["toml"] -version = ">=v2.2.6" - -[[tool.poetry.packages]] -include = "llama_index/" diff --git a/llama-index-packs/llama-index-packs-finchat/.gitignore b/llama-index-packs/llama-index-packs-finchat/.gitignore deleted file mode 100644 index 990c18de22908..0000000000000 --- a/llama-index-packs/llama-index-packs-finchat/.gitignore +++ /dev/null @@ -1,153 +0,0 @@ -llama_index/_static -.DS_Store -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -bin/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -etc/ -include/ -lib/ -lib64/ -parts/ -sdist/ -share/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -.ruff_cache - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints -notebooks/ - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ -pyvenv.cfg - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# Jetbrains -.idea -modules/ -*.swp - -# VsCode -.vscode - -# pipenv -Pipfile -Pipfile.lock - -# pyright -pyrightconfig.json diff --git a/llama-index-packs/llama-index-packs-finchat/BUILD b/llama-index-packs/llama-index-packs-finchat/BUILD deleted file mode 100644 index 0896ca890d8bf..0000000000000 --- a/llama-index-packs/llama-index-packs-finchat/BUILD +++ /dev/null @@ -1,3 +0,0 @@ -poetry_requirements( - name="poetry", -) diff --git a/llama-index-packs/llama-index-packs-finchat/Makefile b/llama-index-packs/llama-index-packs-finchat/Makefile deleted file mode 100644 index b9eab05aa3706..0000000000000 --- a/llama-index-packs/llama-index-packs-finchat/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -GIT_ROOT ?= $(shell git rev-parse --show-toplevel) - -help: ## Show all Makefile targets. - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' - -format: ## Run code autoformatters (black). - pre-commit install - git ls-files | xargs pre-commit run black --files - -lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy - pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files - -test: ## Run tests via pytest. - pytest tests - -watch-docs: ## Build and watch documentation. - sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/ diff --git a/llama-index-packs/llama-index-packs-finchat/README.md b/llama-index-packs/llama-index-packs-finchat/README.md deleted file mode 100644 index 87a1cb502da7f..0000000000000 --- a/llama-index-packs/llama-index-packs-finchat/README.md +++ /dev/null @@ -1,61 +0,0 @@ -# Finance Chat Llama Pack based on OpenAIAgent - -This LlamaPack implements a hierarchical agent based on LLM for financial chat and information extraction purposed. - -LLM agent is connected to various open financial apis as well as daily updated SP500 postgres SQL database storing -opening & closing price, volume as well as past earnings. - -Based on the query, the agent reasons and routes to available tools or runs SQL query to retrieve information and -combine information to answer. - -### Installation - -```bash -pip install llama-index llama-index-packs-finchat -``` - -## CLI Usage - -You can download llamapacks directly using `llamaindex-cli`, which comes installed with the `llama-index` python package: - -```bash -llamaindex-cli download-llamapack FinanceChatPack --download-dir ./finchat_pack -``` - -You can then inspect the files at `./finchat_pack` and use them as a template for your own project. - -## Code Usage - -You can download the pack to a the `./finchat_pack` directory: - -```python -from llama_index.core.llama_pack import download_llama_pack - -FinanceChatPack = download_llama_pack("FinanceChatPack", "./finchat_pack") -``` - -To use this tool, you'll need a few API keys: - -- POLYGON_API_KEY -- -- FINNHUB_API_KEY -- -- ALPHA_VANTAGE_API_KEY -- -- NEWSAPI_API_KEY -- -- POSTGRES_DB_URI -- 'postgresql://postgres.xhlcobfkbhtwmckmszqp:fingptpassword#123@aws-0-us-east-1.pooler.supabase.com:5432/postgres' (You can also host your own postgres SQL DB with the same table signatures. To use different signatures, modification is required in giving query examples for SQL code generation.) - -```python -fin_agent = FinanceChatPack( - POLYGON_API_KEY, - FINNHUB_API_KEY, - ALPHA_VANTAGE_API_KEY, - NEWSAPI_API_KEY, - OPENAI_API_KEY, -) -``` - -From here, you can use the pack, or inspect and modify the pack in `./finchat_pack`. - -The `run()` function chats with the agent and sends the response of the input query. - -```python -response = fin_agent.run("") -``` diff --git a/llama-index-packs/llama-index-packs-finchat/examples/BUILD b/llama-index-packs/llama-index-packs-finchat/examples/BUILD deleted file mode 100644 index db46e8d6c978c..0000000000000 --- a/llama-index-packs/llama-index-packs-finchat/examples/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_sources() diff --git a/llama-index-packs/llama-index-packs-finchat/examples/example.py b/llama-index-packs/llama-index-packs-finchat/examples/example.py deleted file mode 100644 index b414f15ebc821..0000000000000 --- a/llama-index-packs/llama-index-packs-finchat/examples/example.py +++ /dev/null @@ -1,37 +0,0 @@ -""" -This example demonstrates how to set up and test chatting with a finance agent using the FinanceChatPack. -It involves collecting necessary API keys and initializing the FinanceChatPack with these keys and a PostgreSQL database URI. -""" - -import getpass -from llama_index.packs.finchat import FinanceChatPack - -# Prompting the user to enter all necessary API keys for finance data access and OpenAI -openai_api_key = getpass.getpass("Enter your OpenAI API key: ") -polygon_api_key = getpass.getpass("Enter your Polygon API key: ") -finnhub_api_key = getpass.getpass("Enter your Finnhub API key: ") -alpha_vantage_api_key = getpass.getpass("Enter your Alpha Vantage API key: ") -newsapi_api_key = getpass.getpass("Enter your NewsAPI API key: ") - -# PostgreSQL database URI for storing and accessing financial data -postgres_db_uri = "postgresql://postgres.xhlcobfkbhtwmckmszqp:fingptpassword#123@aws-0-us-east-1.pooler.supabase.com:5432/postgres" - -# Initializing the FinanceChatPack with the collected API keys and database URI -finance_chat_pack = FinanceChatPack( - polygon_api_key=polygon_api_key, - finnhub_api_key=finnhub_api_key, - alpha_vantage_api_key=alpha_vantage_api_key, - newsapi_api_key=newsapi_api_key, - openai_api_key=openai_api_key, - postgres_db_uri=postgres_db_uri, -) - -# Notifying the user that the FinanceChatPack has been initialized and is ready for testing -print( - "FinanceChatPack initialized successfully. Ready for testing chat interactions with the finance agent." -) - - -user_query = "Find similar companies to Rivian?" -response = finance_chat_pack.run(user_query) -print("Finance agent response:", response) diff --git a/llama-index-packs/llama-index-packs-finchat/llama_index/packs/finchat/BUILD b/llama-index-packs/llama-index-packs-finchat/llama_index/packs/finchat/BUILD deleted file mode 100644 index db46e8d6c978c..0000000000000 --- a/llama-index-packs/llama-index-packs-finchat/llama_index/packs/finchat/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_sources() diff --git a/llama-index-packs/llama-index-packs-finchat/llama_index/packs/finchat/__init__.py b/llama-index-packs/llama-index-packs-finchat/llama_index/packs/finchat/__init__.py deleted file mode 100644 index e85603546160d..0000000000000 --- a/llama-index-packs/llama-index-packs-finchat/llama_index/packs/finchat/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from llama_index.packs.finchat.base import FinanceChatPack - -__all__ = ["FinanceChatPack"] diff --git a/llama-index-packs/llama-index-packs-finchat/llama_index/packs/finchat/base.py b/llama-index-packs/llama-index-packs-finchat/llama_index/packs/finchat/base.py deleted file mode 100644 index 4a7082d7fe647..0000000000000 --- a/llama-index-packs/llama-index-packs-finchat/llama_index/packs/finchat/base.py +++ /dev/null @@ -1,375 +0,0 @@ -"""Finance Chat LlamaPack class.""" - -from typing import Optional, List, Any - -# The following imports have been adjusted to fix the ModuleNotFoundError -from llama_index.core.llama_pack.base import BaseLlamaPack -from llama_index.llms.openai import OpenAI -from llama_index.tools.finance import FinanceAgentToolSpec -from llama_index.core.tools.tool_spec.base import BaseToolSpec -from llama_index.core.readers.base import BaseReader -from llama_index.core.utilities.sql_wrapper import SQLDatabase -from llama_index.core.tools.query_engine import QueryEngineTool -from llama_index.agent.openai import OpenAIAgent -from llama_index.core.schema import Document -from llama_index.core.base.llms.types import ChatMessage -from sqlalchemy import MetaData, text -from sqlalchemy.engine import Engine -from sqlalchemy.exc import NoSuchTableError -from sqlalchemy.schema import CreateTable - - -class SQLDatabaseToolSpec(BaseToolSpec, BaseReader): - """ - A tool to query and retrieve results from a SQL Database. - - Args: - sql_database (Optional[SQLDatabase]): SQL database to use, - including table names to specify. - See :ref:`Ref-Struct-Store` for more details. - - OR - - engine (Optional[Engine]): SQLAlchemy Engine object of the database connection. - - OR - - uri (Optional[str]): uri of the database connection. - - OR - - scheme (Optional[str]): scheme of the database connection. - host (Optional[str]): host of the database connection. - port (Optional[int]): port of the database connection. - user (Optional[str]): user of the database connection. - password (Optional[str]): password of the database connection. - dbname (Optional[str]): dbname of the database connection. - - """ - - spec_functions = ["run_sql_query", "describe_tables", "list_tables"] - - def __init__( - self, - sql_database: Optional[SQLDatabase] = None, - engine: Optional[Engine] = None, - uri: Optional[str] = None, - scheme: Optional[str] = None, - host: Optional[str] = None, - port: Optional[str] = None, - user: Optional[str] = None, - password: Optional[str] = None, - dbname: Optional[str] = None, - *args: Optional[Any], - **kwargs: Optional[Any], - ) -> None: - """Initialize with parameters.""" - if sql_database: - self.sql_database = sql_database - elif engine: - self.sql_database = SQLDatabase(engine, *args, **kwargs) - elif uri: - self.uri = uri - self.sql_database = SQLDatabase.from_uri(uri, *args, **kwargs) - elif scheme and host and port and user and password and dbname: - uri = f"{scheme}://{user}:{password}@{host}:{port}/{dbname}" - self.uri = uri - self.sql_database = SQLDatabase.from_uri(uri, *args, **kwargs) - else: - raise ValueError( - "You must provide either a SQLDatabase, " - "a SQL Alchemy Engine, a valid connection URI, or a valid " - "set of credentials." - ) - self._metadata = MetaData() - self._metadata.reflect(bind=self.sql_database.engine) - - def run_sql_query(self, query: str) -> Document: - r"""Runs SQL query on the provided SQL database, returning a Document storing all the rows separated by \n. - - Args: - query (str): SQL query in text format which can directly be executed using SQLAlchemy engine. - - Returns: - Document: Document storing all the output result of the sql-query generated. - """ - with self.sql_database.engine.connect() as connection: - if query is None: - raise ValueError("A query parameter is necessary to filter the data") - else: - result = connection.execute(text(query)) - all_doc_str = "" - for item in result.fetchall(): - if all_doc_str: - all_doc_str += "\n" - # fetch each item - doc_str = ", ".join([str(entry) for entry in item]) - all_doc_str += doc_str - return Document(text=all_doc_str) - - def list_tables(self) -> List[str]: - """ - Returns a list of available tables in the database. - """ - return [x.name for x in self._metadata.sorted_tables] - - def describe_tables(self, tables: Optional[List[str]] = None) -> str: - """ - Describes the specified tables in the database. - - Args: - tables (List[str]): A list of table names to retrieve details about - """ - table_names = tables or [table.name for table in self._metadata.sorted_tables] - table_schemas = [] - - for table_name in table_names: - table = next( - ( - table - for table in self._metadata.sorted_tables - if table.name == table_name - ), - None, - ) - if table is None: - raise NoSuchTableError(f"Table '{table_name}' does not exist.") - schema = str(CreateTable(table).compile(self.sql_database._engine)) - table_schemas.append(f"{schema}\n") - - return "\n".join(table_schemas) - - def get_table_info(self) -> str: - """Construct table info for the all tables in DB which includes information about the columns of the table and also shows top row of the table.""" - all_table_info = "" - for table_name in self.list_tables(): - table_info = self.sql_database.get_single_table_info(table_name) - table_info += f"\n\nHere is the DDL statement for this table:\n" - table_info += self.describe_tables([table_name]) - _, output = self.sql_database.run_sql(f"SELECT * FROM {table_name} LIMIT 1") - table_info += f"\nTop row of {table_name}:\n\n" - for colname in output["col_keys"]: - table_info += colname + "\t" - table_info += "\n" - for data in output["result"]: - for val in data: - table_info += str(val) + "\t" - table_info += "\n" - all_table_info += f"\n{table_info}\n" - return all_table_info - - -class FinanceChatPack(BaseLlamaPack): - def __init__( - self, - polygon_api_key: str, - finnhub_api_key: str, - alpha_vantage_api_key: str, - newsapi_api_key: str, - openai_api_key: str, - postgres_db_uri: str, - gpt_model_name: str = "gpt-4-0613", - ): - llm = OpenAI(temperature=0, model=gpt_model_name, api_key=openai_api_key) - self.db_tool_spec = SQLDatabaseToolSpec(uri=postgres_db_uri) - self.fin_tool_spec = FinanceAgentToolSpec( - polygon_api_key, finnhub_api_key, alpha_vantage_api_key, newsapi_api_key - ) - - self.db_table_info = self.db_tool_spec.get_table_info() - prefix_messages = self.construct_prefix_db_message(self.db_table_info) - # add some role play in the system . - database_agent = OpenAIAgent.from_tools( - [ - tool - for tool in self.db_tool_spec.to_tool_list() - if tool.metadata.name == "run_sql_query" - ], - prefix_messages=prefix_messages, - llm=llm, - verbose=True, - ) - database_agent_tool = QueryEngineTool.from_defaults( - database_agent, - name="database_agent", - description="""" - This agent analyzes a text query and add further explanations and thoughts to help a data scientist who has access to following tables: - - {table_info} - - Be concise and do not lose any information about original query while passing to the data scientist. - """, - ) - - fin_api_agent = OpenAIAgent.from_tools( - self.fin_tool_spec.to_tool_list(), - system_prompt=f""" - You are a helpful AI financial assistant designed to understand the intent of the user query and then use relevant tools/apis to help answer it. - You can use more than one tool/api only if needed, but final response should be concise and relevant. If you are not able to find - relevant tool/api, respond respectfully suggesting that you don't know. Think step by step""", - llm=llm, - verbose=True, - ) - - fin_api_agent_tool = QueryEngineTool.from_defaults( - fin_api_agent, - name="fin_api_agent", - description=f""" - This agent has access to another agent which can access certain open APIs to provide information based on user query. - Analyze the query and add any information if needed which can help to decide which API to call. - Be concise and do not lose any information about original query. - """, - ) - - self.fin_hierarchical_agent = OpenAIAgent.from_tools( - [database_agent_tool, fin_api_agent_tool], - system_prompt=""" - You are a specialized financial assistant with access to certain tools which can access open APIs and SP500 companies database containing information on - daily opening price, closing price, high, low, volume, reported earnings, estimated earnings since 2010 to 2023. Before answering query you should check - if the question can be answered via querying the database or using specific open APIs. If you try to find answer via querying database first and it did - not work out, think if you can use other tool APIs available before replying gracefully. - """, - llm=llm, - verbose=True, - ) - - def construct_prefix_db_message(self, table_info: str) -> str: - system_prompt = f""" - You are a smart data scientist working in a reputed trading firm like Jump Trading developing automated trading algorithms. Take a deep breathe and think - step by step to design queries over a SQL database. - - Here is a complete description of tables in SQL database you have access to: - - {table_info} - - Use responses to past questions also to guide you. - - - """ - - prefix_messages = [] - prefix_messages.append(ChatMessage(role="system", content=system_prompt)) - - prefix_messages.append( - ChatMessage( - role="user", - content="What is the average price of Google in the month of July in 2023", - ) - ) - prefix_messages.append( - ChatMessage( - role="assistant", - content=""" - SELECT AVG(close) AS AvgPrice - FROM stock_data - WHERE stock = 'GOOG' - AND date >= '2023-07-01' - AND date <= '2023-07-31'; - """, - ) - ) - - prefix_messages.append( - ChatMessage( - role="user", - content="Which stock has the maximum % change in any month in 2023", - ) - ) - # prefix_messages.append(ChatMessage(role="user", content="Which stocks gave more than 2% return constantly in month of July from past 5 years")) - prefix_messages.append( - ChatMessage( - role="assistant", - content=""" - WITH MonthlyPrices AS ( - SELECT - stock, - EXTRACT(YEAR FROM date) AS year, - EXTRACT(MONTH FROM date) AS month, - FIRST_VALUE(close) OVER (PARTITION BY stock, EXTRACT(YEAR FROM date), EXTRACT(MONTH FROM date) ORDER BY date ASC) AS opening_price, - LAST_VALUE(close) OVER (PARTITION BY stock, EXTRACT(YEAR FROM date), EXTRACT(MONTH FROM date) ORDER BY date ASC ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS closing_price - FROM - stock_data - WHERE - EXTRACT(YEAR FROM date) = 2023 - ), - PercentageChanges AS ( - SELECT - stock, - year, - month, - CASE - WHEN opening_rice IS NULL OR closing_price IS NULL THEN NULL - WHEN opening_price = 0 THEN NULL - ELSE ((closing_price - opening_price) / opening_price) * 100 - END AS pct - FROM - MonthlyPrices - ) - SELECT * - FROM - PercentageChanges - WHERE pct IS NOT NULL - ORDER BY - pct DESC - LIMIT 1; - """, - ) - ) - - prefix_messages.append( - ChatMessage( - role="user", - content="How many times Microsoft beat earnings estimates in 2022", - ) - ) - prefix_messages.append( - ChatMessage( - role="assistant", - content=""" - SELECT - COUNT(*) - FROM - earnings - WHERE - stock = 'MSFT' AND reported > estimated and EXTRACT(YEAR FROM date) = 2022 - """, - ) - ) - - prefix_messages.append( - ChatMessage( - role="user", - content="Which stocks have beaten earnings estimate by more than 1$ consecutively from last 4 reportings?", - ) - ) - prefix_messages.append( - ChatMessage( - role="assistant", - content=""" - WITH RankedEarnings AS( - SELECT - stock, - date, - reported, - estimated, - RANK() OVER (PARTITION BY stock ORDER BY date DESC) as ranking - FROM - earnings - ) - SELECT - stock - FROM - RankedEarnings - WHERE - ranking <= 4 AND reported - estimated > 1 - GROUP BY - stock - HAVING COUNT(*) = 4 - """, - ) - ) - - return prefix_messages - - def run(self, query: str): - return self.fin_hierarchical_agent.chat(query) diff --git a/llama-index-packs/llama-index-packs-finchat/pyproject.toml b/llama-index-packs/llama-index-packs-finchat/pyproject.toml deleted file mode 100644 index 2dbd4839c7c83..0000000000000 --- a/llama-index-packs/llama-index-packs-finchat/pyproject.toml +++ /dev/null @@ -1,61 +0,0 @@ -[build-system] -build-backend = "poetry.core.masonry.api" -requires = ["poetry-core"] - -[tool.codespell] -check-filenames = true -check-hidden = true -# Feel free to un-skip examples, and experimental, you will just need to -# work through many typos (--write-changes and --interactive will help) -skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" - -[tool.llamahub] -contains_example = true -import_path = "llama_index.packs.finchat" - -[tool.llamahub.class_authors] -FinanceChatPack = "345ishaan" - -[tool.mypy] -disallow_untyped_defs = true -# Remove venv skip when integrated with pre-commit -exclude = ["_static", "build", "examples", "notebooks", "venv"] -ignore_missing_imports = true -python_version = "3.8" - -[tool.poetry] -authors = ["Ishan Gupta <345ishaan@gmail.com>"] -description = "llama-index packs implementation of a hierarchical agent for finance chat." -keywords = ["agent", "finance", "finchat"] -license = "MIT" -maintainers = ["345ishaan"] -name = "llama-index-packs-finchat" -packages = [{include = "llama_index/"}] -readme = "README.md" -version = "0.3.0" - -[tool.poetry.dependencies] -python = ">=3.9,<3.12" -tavily-python = "^0.3.1" -llama-index-agent-openai = "^0.4.0" -llama-index-tools-finance = "^0.3.0" -llama-index-core = "^0.12.0" - -[tool.poetry.group.dev.dependencies] -black = {extras = ["jupyter"], version = "<=23.9.1,>=23.7.0"} -codespell = {extras = ["toml"], version = ">=v2.2.6"} -ipython = "8.10.0" -jupyter = "^1.0.0" -mypy = "0.991" -pre-commit = "3.2.0" -pylint = "2.15.10" -pytest = "7.2.1" -pytest-mock = "3.11.1" -ruff = "0.0.292" -tree-sitter-languages = "^1.8.0" -types-Deprecated = ">=0.1.0" -types-PyYAML = "^6.0.12.12" -types-protobuf = "^4.24.0.4" -types-redis = "4.5.5.0" -types-requests = "2.28.11.8" # TODO: unpin when mypy>0.991 -types-setuptools = "67.1.0.0" diff --git a/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/.gitignore b/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/.gitignore deleted file mode 100644 index 990c18de22908..0000000000000 --- a/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/.gitignore +++ /dev/null @@ -1,153 +0,0 @@ -llama_index/_static -.DS_Store -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -bin/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -etc/ -include/ -lib/ -lib64/ -parts/ -sdist/ -share/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -.ruff_cache - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints -notebooks/ - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ -pyvenv.cfg - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# Jetbrains -.idea -modules/ -*.swp - -# VsCode -.vscode - -# pipenv -Pipfile -Pipfile.lock - -# pyright -pyrightconfig.json diff --git a/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/BUILD b/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/BUILD deleted file mode 100644 index 2d3d88d1eab9c..0000000000000 --- a/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/BUILD +++ /dev/null @@ -1,7 +0,0 @@ -poetry_requirements( - name="poetry", -) - -python_requirements( - name="reqs", -) diff --git a/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/CHANGELOG.md b/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/CHANGELOG.md deleted file mode 100644 index 36bff877abcbe..0000000000000 --- a/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/CHANGELOG.md +++ /dev/null @@ -1,5 +0,0 @@ -# CHANGELOG - -## [0.1.2] - 2024-02-13 - -- Add maintainers and keywords from library.json (llamahub) diff --git a/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/Makefile b/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/Makefile deleted file mode 100644 index b9eab05aa3706..0000000000000 --- a/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -GIT_ROOT ?= $(shell git rev-parse --show-toplevel) - -help: ## Show all Makefile targets. - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' - -format: ## Run code autoformatters (black). - pre-commit install - git ls-files | xargs pre-commit run black --files - -lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy - pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files - -test: ## Run tests via pytest. - pytest tests - -watch-docs: ## Build and watch documentation. - sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/ diff --git a/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/README.md b/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/README.md deleted file mode 100644 index cbe2a65e5148c..0000000000000 --- a/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/README.md +++ /dev/null @@ -1,58 +0,0 @@ -# Redis Ingestion Pipeline Pack - -This LlamaPack creates an [ingestion pipeline](https://docs.llamaindex.ai/en/stable/module_guides/loading/ingestion_pipeline/root.html), with both a cache and vector store backed by Redis. - -## CLI Usage - -You can download llamapacks directly using `llamaindex-cli`, which comes installed with the `llama-index` python package: - -```bash -llamaindex-cli download-llamapack RedisIngestionPipelinePack --download-dir ./redis_ingestion_pack -``` - -You can then inspect the files at `./redis_ingestion_pack` and use them as a template for your own project! - -## Code Usage - -You can download the pack to a `./redis_ingestion_pack` directory: - -```python -from llama_index.core.llama_pack import download_llama_pack - -# download and install dependencies -RedisIngestionPipelinePack = download_llama_pack( - "RedisIngestionPipelinePack", "./redis_ingestion_pack" -) -``` - -From here, you can use the pack, or inspect and modify the pack in `./redis_ingestion_pack`. - -Then, you can set up the pack like so: - -```python -from llama_index.core.node_parser import SentenceSplitter -from llama_index.embeddings.openai import OpenAIEmbedding - -transformations = [SentenceSplitter(), OpenAIEmbedding()] - -# create the pack -ingest_pack = RedisIngestionPipelinePack( - transformations, - hostname="localhost", - port=6379, - cache_collection_name="ingest_cache", - vector_collection_name="vector_store", -) -``` - -The `run()` function is a light wrapper around `pipeline.run()`. - -You can use this to ingest data and then create an index from the vector store. - -```python -pipeline.run(documents) - -index = VectorStoreIndex.from_vector_store(inget_pack.vector_store) -``` - -You can learn more about the ingestion pipeline at the [LlamaIndex documentation](https://docs.llamaindex.ai/en/stable/module_guides/loading/ingestion_pipeline/root.html). diff --git a/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/llama_index/packs/redis_ingestion_pipeline/BUILD b/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/llama_index/packs/redis_ingestion_pipeline/BUILD deleted file mode 100644 index db46e8d6c978c..0000000000000 --- a/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/llama_index/packs/redis_ingestion_pipeline/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_sources() diff --git a/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/llama_index/packs/redis_ingestion_pipeline/__init__.py b/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/llama_index/packs/redis_ingestion_pipeline/__init__.py deleted file mode 100644 index 1209be7b461e4..0000000000000 --- a/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/llama_index/packs/redis_ingestion_pipeline/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from llama_index.packs.redis_ingestion_pipeline.base import RedisIngestionPipelinePack - -__all__ = ["RedisIngestionPipelinePack"] diff --git a/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/llama_index/packs/redis_ingestion_pipeline/base.py b/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/llama_index/packs/redis_ingestion_pipeline/base.py deleted file mode 100644 index 478e32b4c29e9..0000000000000 --- a/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/llama_index/packs/redis_ingestion_pipeline/base.py +++ /dev/null @@ -1,57 +0,0 @@ -"""Redis Ingestion Pipeline Completion pack.""" - - -from typing import Any, Dict, List - -from llama_index.core.ingestion.cache import IngestionCache -from llama_index.core.ingestion.pipeline import IngestionPipeline -from llama_index.core.llama_pack.base import BaseLlamaPack -from llama_index.core.schema import BaseNode, TransformComponent -from llama_index.storage.kvstore.redis import RedisKVStore as RedisCache -from llama_index.vector_stores.redis import RedisVectorStore - - -class RedisIngestionPipelinePack(BaseLlamaPack): - """Redis Ingestion Pipeline Completion pack.""" - - def __init__( - self, - transformations: List[TransformComponent], - hostname: str = "localhost", - port: int = 6379, - cache_collection_name: str = "ingest_cache", - vector_collection_name: str = "vector_store", - **kwargs: Any, - ) -> None: - """Init params.""" - self.vector_store = RedisVectorStore( - hostname=hostname, - port=port, - collection_name=vector_collection_name, - ) - - self.ingest_cache = IngestionCache( - cache=RedisCache( - hostname=hostname, - port=port, - ), - collection_name=cache_collection_name, - ) - - self.pipeline = IngestionPipeline( - transformations=transformations, - cache=self.ingest_cache, - vector_store=self.vector_store, - ) - - def get_modules(self) -> Dict[str, Any]: - """Get modules.""" - return { - "pipeline": self.pipeline, - "vector_store": self.vector_store, - "ingest_cache": self.ingest_cache, - } - - def run(self, inputs: List[BaseNode], **kwargs: Any) -> List[BaseNode]: - """Run the pipeline.""" - return self.pipeline.run(nodes=inputs, **kwargs) diff --git a/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/pyproject.toml b/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/pyproject.toml deleted file mode 100644 index 51c78469eff15..0000000000000 --- a/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/pyproject.toml +++ /dev/null @@ -1,66 +0,0 @@ -[build-system] -build-backend = "poetry.core.masonry.api" -requires = ["poetry-core"] - -[tool.codespell] -check-filenames = true -check-hidden = true -skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" - -[tool.llamahub] -contains_example = false -import_path = "llama_index.packs.redis_ingestion_pipeline" - -[tool.llamahub.class_authors] -RedisIngestionPipelinePack = "logan-markewich" - -[tool.mypy] -disallow_untyped_defs = true -exclude = ["_static", "build", "examples", "notebooks", "venv"] -ignore_missing_imports = true -python_version = "3.8" - -[tool.poetry] -authors = ["Your Name "] -description = "llama-index packs redis_ingestion_pipeline integration" -exclude = ["**/BUILD"] -keywords = ["index", "ingestion", "pipeline", "redis"] -license = "MIT" -maintainers = ["logan-markewich"] -name = "llama-index-packs-redis-ingestion-pipeline" -readme = "README.md" -version = "0.3.0" - -[tool.poetry.dependencies] -python = ">=3.9,<4.0" -llama-index-storage-kvstore-redis = "^0.3.0" -llama-index-vector-stores-redis = "^0.4.0" -llama-index-core = "^0.12.0" - -[tool.poetry.group.dev.dependencies] -ipython = "8.10.0" -jupyter = "^1.0.0" -mypy = "0.991" -pre-commit = "3.2.0" -pylint = "2.15.10" -pytest = "7.2.1" -pytest-mock = "3.11.1" -ruff = "0.0.292" -tree-sitter-languages = "^1.8.0" -types-Deprecated = ">=0.1.0" -types-PyYAML = "^6.0.12.12" -types-protobuf = "^4.24.0.4" -types-redis = "4.5.5.0" -types-requests = "2.28.11.8" -types-setuptools = "67.1.0.0" - -[tool.poetry.group.dev.dependencies.black] -extras = ["jupyter"] -version = "<=23.9.1,>=23.7.0" - -[tool.poetry.group.dev.dependencies.codespell] -extras = ["toml"] -version = ">=v2.2.6" - -[[tool.poetry.packages]] -include = "llama_index/" diff --git a/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/requirements.txt b/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/requirements.txt deleted file mode 100644 index 7800f0fad3fff..0000000000000 --- a/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/requirements.txt +++ /dev/null @@ -1 +0,0 @@ -redis diff --git a/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/tests/BUILD b/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/tests/BUILD deleted file mode 100644 index dabf212d7e716..0000000000000 --- a/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/tests/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_tests() diff --git a/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/tests/__init__.py b/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/tests/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/tests/test_packs_redis_ingestion_pipeline.py b/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/tests/test_packs_redis_ingestion_pipeline.py deleted file mode 100644 index ee0d8260816cc..0000000000000 --- a/llama-index-packs/llama-index-packs-redis-ingestion-pipeline/tests/test_packs_redis_ingestion_pipeline.py +++ /dev/null @@ -1,7 +0,0 @@ -from llama_index.core.llama_pack import BaseLlamaPack -from llama_index.packs.redis_ingestion_pipeline import RedisIngestionPipelinePack - - -def test_class(): - names_of_base_classes = [b.__name__ for b in RedisIngestionPipelinePack.__mro__] - assert BaseLlamaPack.__name__ in names_of_base_classes diff --git a/llama-index-packs/llama-index-packs-searchain/.gitignore b/llama-index-packs/llama-index-packs-searchain/.gitignore deleted file mode 100644 index 990c18de22908..0000000000000 --- a/llama-index-packs/llama-index-packs-searchain/.gitignore +++ /dev/null @@ -1,153 +0,0 @@ -llama_index/_static -.DS_Store -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -bin/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -etc/ -include/ -lib/ -lib64/ -parts/ -sdist/ -share/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -.ruff_cache - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints -notebooks/ - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ -pyvenv.cfg - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# Jetbrains -.idea -modules/ -*.swp - -# VsCode -.vscode - -# pipenv -Pipfile -Pipfile.lock - -# pyright -pyrightconfig.json diff --git a/llama-index-packs/llama-index-packs-searchain/BUILD b/llama-index-packs/llama-index-packs-searchain/BUILD deleted file mode 100644 index 0896ca890d8bf..0000000000000 --- a/llama-index-packs/llama-index-packs-searchain/BUILD +++ /dev/null @@ -1,3 +0,0 @@ -poetry_requirements( - name="poetry", -) diff --git a/llama-index-packs/llama-index-packs-searchain/Makefile b/llama-index-packs/llama-index-packs-searchain/Makefile deleted file mode 100644 index b9eab05aa3706..0000000000000 --- a/llama-index-packs/llama-index-packs-searchain/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -GIT_ROOT ?= $(shell git rev-parse --show-toplevel) - -help: ## Show all Makefile targets. - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' - -format: ## Run code autoformatters (black). - pre-commit install - git ls-files | xargs pre-commit run black --files - -lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy - pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files - -test: ## Run tests via pytest. - pytest tests - -watch-docs: ## Build and watch documentation. - sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/ diff --git a/llama-index-packs/llama-index-packs-searchain/README.md b/llama-index-packs/llama-index-packs-searchain/README.md deleted file mode 100644 index daaaabe9ddbcb..0000000000000 --- a/llama-index-packs/llama-index-packs-searchain/README.md +++ /dev/null @@ -1,44 +0,0 @@ -# LlamaIndex Packs Integration: Searchain - -This LlamaPack implements a framework called SearChain, which implements the interaction between LLM and IR in the form of the global reasoning chain called Chain-of-Query (CoQ). - -This follows the idea in the paper [Search-in-the-Chain: Towards Accurate, Credible and Traceable Large Language Models for Knowledge-intensive Tasks](https://arxiv.org/abs/2304.14732). - -Making content generated by large language models (LLMs) such as ChatGPT accurate, trustworthy, and traceable is critical, especially for knowledge-intensive tasks. Introducing information retrieval (IR) to provide LLM with external knowledge is likely to solve this problem, however, where and how to introduce IR is a big challenge. The SearChain framework generates a global reasoning chain called a Chain of Query (CoQ) for LLM, where each node contains an IR-oriented query and the answer to the query. Second, IR verifies the answer of each node of CoQ, it corrects the answer that is not consistent with the retrieved information when IR gives high confidence, which improves the credibility. Third, LLM can mark its missing knowledge in CoQ and IR can provide this knowledge to LLM. These three operations improve the accuracy of LLM for complex knowledge-intensive tasks in terms of reasoning ability and knowledge. This Pack implements the above🤗! - -You can see its use case in the examples folder. - -This implementation is adapted from the author's implementation. You can find the official code repository [here](https://github.com/xsc1234/Search-in-the-Chain). - -## Code Usage - -First, you need to install SearChainpack using the following code, - -```python -from llama_index.core.llama_pack import download_llama_pack - -download_llama_pack("SearChainPack", "./searchain_pack") -``` - -Next you can load and initialize a searchain object, - -```python -from searchain_pack.base import SearChainPack - -searchain = SearChainPack( - data_path="data", - dprtokenizer_path="dpr_reader_multi", - dprmodel_path="dpr_reader_multi", - crossencoder_name_or_path="Quora_cross_encoder", -) -``` - -Relevant data can be found [here](https://www.kaggle.com/datasets/anastasiajia/searchain/data). You can run searchain using the following method, - -```python -start_idx = 0 -while not start_idx == -1: - start_idx = execute( - "/hotpotqa/hotpot_dev_fullwiki_v1_line.json", start_idx=start_idx - ) -``` diff --git a/llama-index-packs/llama-index-packs-searchain/examples/searchain.ipynb b/llama-index-packs/llama-index-packs-searchain/examples/searchain.ipynb deleted file mode 100644 index c5ceea081389f..0000000000000 --- a/llama-index-packs/llama-index-packs-searchain/examples/searchain.ipynb +++ /dev/null @@ -1,108 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "cef7fbcf-dcc4-4986-998f-5bd6c058b348", - "metadata": {}, - "source": [ - "# An Example of Searchain Application" - ] - }, - { - "cell_type": "markdown", - "id": "ae02aac8-ddc3-481f-9b41-a3c52f2ad9b5", - "metadata": {}, - "source": [ - "This LlamaPack implements short form the [SearChain paper by Xu et al..](https://arxiv.org/abs/2304.14732)\n", - "\n", - "This implementation is adapted from the author's implementation. You can find the official code repository [here](https://github.com/xsc1234/Search-in-the-Chain)." - ] - }, - { - "cell_type": "markdown", - "id": "d500e8af-c685-4f11-b176-dd534c7824e5", - "metadata": {}, - "source": [ - "## Load Pack" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "a111a49c-e9c2-4a19-96dc-136fc820bbfb", - "metadata": {}, - "outputs": [], - "source": [ - "from llama_index.core.llama_pack import download_llama_pack\n", - "\n", - "download_llama_pack(\"SearChainPack\", \"./searchain_pack\")\n", - "from searchain_pack.base import SearChainPack" - ] - }, - { - "cell_type": "markdown", - "id": "37739fc7-6df9-44e3-ac46-22471565af36", - "metadata": {}, - "source": [ - "## Setup" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d113b360-739e-4f29-b0db-273ba2d65e2a", - "metadata": {}, - "outputs": [], - "source": [ - "searchain = SearChainPack(\n", - " data_path=\"data\",\n", - " dprtokenizer_path=\"./model/dpr_reader_multi\",\n", - " dprmodel_path=\"./model/dpr_reader_multi\",\n", - " crossencoder_name_or_path=\"./model/Quora_cross_encoder\",\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "7a531d37-6832-40ab-b579-8dc007e1a1e2", - "metadata": {}, - "source": [ - "## Excute" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "4dd90f1c-720a-4ef0-9a35-d1b54be8cd53", - "metadata": {}, - "outputs": [], - "source": [ - "start_idx = 0\n", - "while not start_idx == -1:\n", - " start_idx = execute(\n", - " \"/hotpotqa/hotpot_dev_fullwiki_v1_line.json\", start_idx=start_idx\n", - " )" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/llama-index-packs/llama-index-packs-searchain/llama_index/packs/searchain/BUILD b/llama-index-packs/llama-index-packs-searchain/llama_index/packs/searchain/BUILD deleted file mode 100644 index db46e8d6c978c..0000000000000 --- a/llama-index-packs/llama-index-packs-searchain/llama_index/packs/searchain/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_sources() diff --git a/llama-index-packs/llama-index-packs-searchain/llama_index/packs/searchain/__init__.py b/llama-index-packs/llama-index-packs-searchain/llama_index/packs/searchain/__init__.py deleted file mode 100644 index 467b3ed0466ce..0000000000000 --- a/llama-index-packs/llama-index-packs-searchain/llama_index/packs/searchain/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from llama_index.packs.searchain.base import SearChainPack - - -__all__ = ["SearChainPack"] diff --git a/llama-index-packs/llama-index-packs-searchain/llama_index/packs/searchain/base.py b/llama-index-packs/llama-index-packs-searchain/llama_index/packs/searchain/base.py deleted file mode 100644 index f6bdf4c8ed6c5..0000000000000 --- a/llama-index-packs/llama-index-packs-searchain/llama_index/packs/searchain/base.py +++ /dev/null @@ -1,244 +0,0 @@ -import regex -import string -import json -from llama_index.core.llama_pack.base import BaseLlamaPack -from llama_index.core import VectorStoreIndex, SimpleDirectoryReader -from llama_index.llms.openai import OpenAI -from llama_index.core.llms import ChatMessage -from transformers import DPRReader, DPRReaderTokenizer -import torch -from typing import Any -from sentence_transformers import CrossEncoder -import time - - -def _normalize_answer(s): - def remove_articles(text): - return regex.sub(r"\b(a|an|the)\b", " ", text) - - def white_space_fix(text): - return " ".join(text.split()) - - def remove_punc(text): - exclude = set(string.punctuation) - return "".join(ch for ch in text if ch not in exclude) - - def lower(text): - return text.lower() - - return white_space_fix(remove_articles(remove_punc(lower(s)))) - - -def _match_or_not(prediction, ground_truth): - norm_predict = _normalize_answer(prediction) - norm_answer = _normalize_answer(ground_truth) - return norm_answer in norm_predict - - -def _have_seen_or_not(model_cross_encoder, query_item, query_seen_list, query_type): - if "Unsolved" in query_type: - return False - for query_seen in query_seen_list: - with torch.no_grad(): - if model_cross_encoder.predict([(query_seen, query_item)]) > 0.5: - return True - return False - - -class SearChainPack(BaseLlamaPack): - """Simple short form SearChain pack.""" - - def __init__( - self, - data_path: str, - dprtokenizer_path: str = "/dpr_reader_multi", - dprmodel_path: str = "/dpr_reader_multi", - crossencoder_name_or_path: str = "/Quora_cross_encoder", - device: str = "cuda", - **kwargs: Any, - ) -> None: - """Init params.""" - self.device = device - self.crossencoder = CrossEncoder(crossencoder_name_or_path, device=self.device) - self.documents = SimpleDirectoryReader(data_path).load_data() - self.index = VectorStoreIndex.from_documents(self.documents) - self.query_engine = self.index.as_query_engine() - self.llm = OpenAI() - - self.dprtokenizer = DPRReaderTokenizer.from_pretrained(dprtokenizer_path) - self.dprmodel = DPRReader.from_pretrained(dprmodel_path) - self.dprmodel.eval() - self.dprmodel.to(self.device) - - def _get_answer(self, query, texts, title): - print("texts:" + texts) - encoded_inputs = self.dprtokenizer( - questions=[query], - titles=[title], - texts=[texts], - return_tensors="pt", - max_length=510, - ) - outputs = self.dprmodel(**encoded_inputs.to(self.device)) - start_logits = outputs.start_logits - end_logits = outputs.end_logits - relevance_logits = outputs.relevance_logits - - answer_start_index = outputs.start_logits.argmax() - answer_end_index = outputs.end_logits.argmax() - predict_answer_tokens = encoded_inputs.input_ids[ - 0, answer_start_index : answer_end_index + 1 - ] - answer = self.dprtokenizer.decode(predict_answer_tokens) - return answer, relevance_logits - - def _ir(self, query, query_seen_list): - flag_contibue_label = False - query_list = query.split("\n") - message = "" - for idx in range(len(query_list)): - query_item = query_list[idx] - if "Query" in query_item and "]:" in query_item: - temp = query_item.split("]") - if len(temp) < 2: - continue - query_type = temp[0] - query_item = temp[1] - if ":" in query_item: - query_item = query_item[1:] - if not _have_seen_or_not( - self.crossencoder, query_item, query_seen_list, query_type - ): - now_reference = {} - query_seen_list.append(query_item) - response = str(self.query_engine.query(query_item)) - - answer, relevance_score = self._get_answer( - query=query_item, texts="", title=response - ) - now_reference["query"] = query_item - now_reference["answer"] = answer - now_reference["reference"] = response - now_reference["ref_score"] = relevance_score - now_reference["idx"] = response - - if "Unsolved" in query_type: - message = "[Unsolved Query]:{}[Answer]:{}[Reference]:{}".format( - query_item, answer, response - ) - flag_contibue_label = True - break - elif relevance_score > 1.5: - answer_start_idx = idx + 1 - predict_answer = "" - while answer_start_idx < len(query_list): - if "Answer" in query_list[answer_start_idx]: - predict_answer = query_list[answer_start_idx] - break - answer_start_idx += 1 - match_label = _match_or_not( - prediction=predict_answer, ground_truth=answer - ) - if match_label: - continue - else: - message = "[Query]:{}[Answer]:{}[Reference]:{}".format( - query_item, answer, response - ) - flag_contibue_label = True - break - return message, flag_contibue_label, query_seen_list - - def _extract(self, message_keys_list): - text = message_keys_list - idx = len(text) - while idx > 0: - idx = idx - 1 - item = text[idx] - if item.role == "assistant" and "Final Content" in item.content: - list_item = item.content.split("\n") - for sp in list_item: - if "Final Content" in sp: - return item.content - return "Sorry, I still cannot solve this question!" - - def execute(self, data_path, start_idx): - data = open(data_path) - for k, example in enumerate(data): - if k < start_idx: - continue - example = json.loads(example) - q = example["question"] - round_count = 0 - message_keys_list = [ - ChatMessage( - role="user", - content="""Construct a global reasoning chain for this complex [Question] : " {} " You should generate a query to the search engine based on what you already know at each step of the reasoning chain, starting with [Query]. If you know the answer for [Query], generate it starting with [Answer]. You can try to generate the final answer for the [Question] by referring to the [Query]-[Answer] pairs, starting with [Final Content]. If you don't know the answer, generate a query to search engine based on what you already know and do not know, starting with [Unsolved Query]. - For example: - [Question]: "Where do greyhound buses that are in the birthplace of Spirit If...'s performer leave from? " - [Query 1]: Who is the performer of Spirit If... ? - If you don't know the answer: - [Unsolved Query]: Who is the performer of Spirit If... ? - If you know the answer: - [Answer 1]: The performer of Spirit If... is Kevin Drew. - [Query 2]: Where was Kevin Drew born? - If you don't know the answer: - [Unsolved Query]: Where was Kevin Drew born? - If you know the answer: - [Answer 2]: Toronto. - [Query 3]: Where do greyhound buses in Toronto leave from? - If you don't know the answer: - [Unsolved Query]: Where do greyhound buses in Toronto leave from? - If you know the answer: - [Answer 3]: Toronto Coach Terminal. - [Final Content]: The performer of Spirit If... is Kevin Drew [1]. Kevin Drew was born in Toronto [2]. Greyhound buses in Toronto leave from Toronto Coach Terminal [3]. So the final answer is Toronto Coach Terminal. - - [Question]:"Which magazine was started first Arthur’s Magazine or First for Women?" - [Query 1]: When was Arthur’s Magazine started? - [Answer 1]: 1844. - [Query 2]: When was First for Women started? - [Answer 2]: 1989 - [Final Content]: Arthur’s Magazine started in 1844 [1]. First for Women started in 1989 [2]. So Arthur’s Magazine was started first. So the answer is Arthur’s Magazi - [Question]: {}""".format( - q, q - ), - ) - ] - feedback_answer = "continue" - predict_answer = "" - query_seen_list = [] - while round_count < 5 and feedback_answer != "end": - time.sleep(0.5) - rsp = self.llm.chat(message_keys_list) - round_count += 1 - input_str = str(rsp.message.content) - message_keys_list.append( - ChatMessage(role="assistant", content=input_str) - ) - predict_answer += input_str - - message, flag_contibue_label, query_seen_list = self._ir( - input_str, query_seen_list - ) - if flag_contibue_label: - feedback = message - else: - feedback = "end" - - if feedback == "end": - break - # [Query]:xxxx[Answer]:xxxx[Reference]:xxxx - feedback_list = feedback.split("") - if "Unsolved Query" not in feedback: - new_prompt = """Reference: {} According to this Reference, the answer for "{}" should be "{}", you can change your answer based on the Reference and continue constructing the reasoning chain to give the final answer for [Question]:{}""".format( - feedback_list[0], feedback_list[1], q, feedback_list[2] - ) - else: - new_prompt = """Reference: {} According to this Reference, the answer for "{}" should be "{}", you can give your answer based on the Reference and continue constructing the reasoning chain to give the final answer for [Question]:{} """.format( - feedback_list[0], feedback_list[1], q, feedback_list[2] - ) - message_keys_list.append(ChatMessage(role="user", content=new_prompt)) - result = self._extract(message_keys_list) - print(result) - - return -1 diff --git a/llama-index-packs/llama-index-packs-searchain/pyproject.toml b/llama-index-packs/llama-index-packs-searchain/pyproject.toml deleted file mode 100644 index 04878e3111955..0000000000000 --- a/llama-index-packs/llama-index-packs-searchain/pyproject.toml +++ /dev/null @@ -1,60 +0,0 @@ -[build-system] -build-backend = "poetry.core.masonry.api" -requires = ["poetry-core"] - -[tool.codespell] -check-filenames = true -check-hidden = true -# Feel free to un-skip examples, and experimental, you will just need to -# work through many typos (--write-changes and --interactive will help) -skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" - -[tool.llamahub] -contains_example = false -import_path = "llama_index.packs.searchain" - -[tool.llamahub.class_authors] -SearChainPack = "DJC-GO-SOLO" - -[tool.mypy] -disallow_untyped_defs = true -# Remove venv skip when integrated with pre-commit -exclude = ["_static", "build", "examples", "notebooks", "venv"] -ignore_missing_imports = true -python_version = "3.8" - -[tool.poetry] -authors = ["Your Name "] -description = "llama-index packs searchain integration" -license = "MIT" -name = "llama-index-packs-searchain" -packages = [{include = "llama_index/"}] -readme = "README.md" -version = "0.3.0" - -[tool.poetry.dependencies] -python = ">=3.9,<4.0" -torch = "^2.1.2" -transformers = "^4.38.1" -sentence_transformers = "^2.5.1" -llama-index-llms-openai = "^0.3.0" -llama-index-core = "^0.12.0" - -[tool.poetry.group.dev.dependencies] -black = {extras = ["jupyter"], version = "<=23.9.1,>=23.7.0"} -codespell = {extras = ["toml"], version = ">=v2.2.6"} -ipython = "8.10.0" -jupyter = "^1.0.0" -mypy = "0.991" -pre-commit = "3.2.0" -pylint = "2.15.10" -pytest = "7.2.1" -pytest-mock = "3.11.1" -ruff = "0.0.292" -tree-sitter-languages = "^1.8.0" -types-Deprecated = ">=0.1.0" -types-PyYAML = "^6.0.12.12" -types-protobuf = "^4.24.0.4" -types-redis = "4.5.5.0" -types-requests = "2.28.11.8" # TODO: unpin when mypy>0.991 -types-setuptools = "67.1.0.0" diff --git a/llama-index-packs/llama-index-packs-searchain/tests/BUILD b/llama-index-packs/llama-index-packs-searchain/tests/BUILD deleted file mode 100644 index dabf212d7e716..0000000000000 --- a/llama-index-packs/llama-index-packs-searchain/tests/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_tests() diff --git a/llama-index-packs/llama-index-packs-searchain/tests/__init__.py b/llama-index-packs/llama-index-packs-searchain/tests/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/llama-index-packs/llama-index-packs-searchain/tests/test_packs_searchain.py b/llama-index-packs/llama-index-packs-searchain/tests/test_packs_searchain.py deleted file mode 100644 index 4d55fb1741217..0000000000000 --- a/llama-index-packs/llama-index-packs-searchain/tests/test_packs_searchain.py +++ /dev/null @@ -1,7 +0,0 @@ -from llama_index.core.llama_pack import BaseLlamaPack -from llama_index.packs.searchain import SearChainPack - - -def test_class(): - names_of_base_classes = [b.__name__ for b in SearChainPack.__mro__] - assert BaseLlamaPack.__name__ in names_of_base_classes diff --git a/llama-index-packs/llama-index-packs-subdoc-summary/BUILD b/llama-index-packs/llama-index-packs-subdoc-summary/BUILD deleted file mode 100644 index 0896ca890d8bf..0000000000000 --- a/llama-index-packs/llama-index-packs-subdoc-summary/BUILD +++ /dev/null @@ -1,3 +0,0 @@ -poetry_requirements( - name="poetry", -) diff --git a/llama-index-packs/llama-index-packs-subdoc-summary/Makefile b/llama-index-packs/llama-index-packs-subdoc-summary/Makefile deleted file mode 100644 index b9eab05aa3706..0000000000000 --- a/llama-index-packs/llama-index-packs-subdoc-summary/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -GIT_ROOT ?= $(shell git rev-parse --show-toplevel) - -help: ## Show all Makefile targets. - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' - -format: ## Run code autoformatters (black). - pre-commit install - git ls-files | xargs pre-commit run black --files - -lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy - pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files - -test: ## Run tests via pytest. - pytest tests - -watch-docs: ## Build and watch documentation. - sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/ diff --git a/llama-index-packs/llama-index-packs-subdoc-summary/README.md b/llama-index-packs/llama-index-packs-subdoc-summary/README.md deleted file mode 100644 index 5a721d5c85c26..0000000000000 --- a/llama-index-packs/llama-index-packs-subdoc-summary/README.md +++ /dev/null @@ -1,53 +0,0 @@ -# LlamaIndex Packs Integration: Subdoc-Summary - -This LlamaPack provides an advanced technique for injecting each chunk with "sub-document" metadata. This context augmentation technique is helpful for both retrieving relevant context and for synthesizing correct answers. - -It is a step beyond simply adding a summary of the document as the metadata to each chunk. Within a long document, there can be multiple distinct themes, and we want each chunk to be grounded in global but relevant context. - -This technique was inspired by our "Practical Tips and Tricks" video: https://www.youtube.com/watch?v=ZP1F9z-S7T0. - -## Installation - -```bash -pip install llama-index llama-index-packs-subdoc-summary -``` - -## CLI Usage - -You can download llamapacks directly using `llamaindex-cli`, which comes installed with the `llama-index` python package: - -```bash -llamaindex-cli download-llamapack SubDocSummaryPack --download-dir ./subdoc_summary_pack -``` - -You can then inspect the files at `./subdoc_summary_pack` and use them as a template for your own project. - -## Code Usage - -You can download the pack to a the `./subdoc_summary_pack` directory: - -```python -from llama_index.core.llama_pack import download_llama_pack - -# download and install dependencies -SubDocSummaryPack = download_llama_pack( - "SubDocSummaryPack", "./subdoc_summary_pack" -) - -# You can use any llama-hub loader to get documents! -subdoc_summary_pack = SubDocSummaryPack( - documents, - parent_chunk_size=8192, # default, - child_chunk_size=512, # default - llm=OpenAI(model="gpt-3.5-turbo"), - embed_model=OpenAIEmbedding(), -) -``` - -Initializing the pack will split documents into parent chunks and child chunks. It will inject parent chunk summaries into child chunks, and index the child chunks. - -Running the pack will run the query engine over the vectorized child chunks. - -```python -response = subdoc_summary_pack.run("", similarity_top_k=2) -``` diff --git a/llama-index-packs/llama-index-packs-subdoc-summary/examples/subdoc-summary.ipynb b/llama-index-packs/llama-index-packs-subdoc-summary/examples/subdoc-summary.ipynb deleted file mode 100644 index b07ac5b76980c..0000000000000 --- a/llama-index-packs/llama-index-packs-subdoc-summary/examples/subdoc-summary.ipynb +++ /dev/null @@ -1,357 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "8dd0acdb-5aec-4129-8772-81f56d6b25cf", - "metadata": {}, - "source": [ - "# Sub-Document Summary Metadata Pack\n", - "\n", - "This LlamaPack provides an advanced technique for injecting each chunk with \"sub-document\" metadata. This context augmentation technique is helpful for both retrieving relevant context and for synthesizing correct answers.\n", - "\n", - "It is a step beyond simply adding a summary of the document as the metadata to each chunk. Within a long document, there can be multiple distinct themes, and we want each chunk to be grounded in global but relevant context." - ] - }, - { - "cell_type": "markdown", - "id": "66818da6-a3fb-4537-b30a-922a8a0ef99e", - "metadata": {}, - "source": [ - "## Setup Data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "317a3207-1211-4a6a-bd7d-3ab14f399951", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "811.82s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n", - "817.00s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - " % Total % Received % Xferd Average Speed Time Time Time Current\n", - " Dload Upload Total Spent Left Speed\n", - "100 13.0M 100 13.0M 0 0 27.7M 0 --:--:-- --:--:-- --:--:-- 28.0M\n" - ] - } - ], - "source": [ - "!mkdir -p 'data/'\n", - "!curl 'https://arxiv.org/pdf/2307.09288.pdf' -o 'data/llama2.pdf'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "bf6ab9c0-c993-4ab2-8343-b294676d7550", - "metadata": {}, - "outputs": [], - "source": [ - "from llama_index.core import SimpleDirectoryReader\n", - "\n", - "documents = SimpleDirectoryReader(\"data\").load_data()" - ] - }, - { - "cell_type": "markdown", - "id": "98bfbe4b-539c-469c-82e6-1f823f28d5f4", - "metadata": {}, - "source": [ - "## Run the Sub-Document Summary Metadata Pack" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "af4b815e-f5ce-406b-9dcb-5a23fc9f96db", - "metadata": {}, - "outputs": [], - "source": [ - "%pip install llama-index-packs-subdoc-summary llama-index-llms-openai llama-index-embeddings-openai" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d619362b-ae45-4e47-b400-1c2ce7262496", - "metadata": {}, - "outputs": [], - "source": [ - "from llama_index.packs.subdoc_summary import SubDocSummaryPack\n", - "from llama_index.llms.openai import OpenAI\n", - "from llama_index.embeddings.openai import OpenAIEmbedding\n", - "\n", - "subdoc_summary_pack = SubDocSummaryPack(\n", - " documents,\n", - " parent_chunk_size=8192, # default,\n", - " child_chunk_size=512, # default\n", - " llm=OpenAI(model=\"gpt-3.5-turbo\"),\n", - " embed_model=OpenAIEmbedding(),\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "fb11a60d-d356-40c5-84c1-4135382bfbfd", - "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "Llama 2 was pretrained using an optimized auto-regressive transformer with robust data cleaning, updated data mixes, training on 40% more total tokens, doubling the context length, and using grouped-query attention to improve inference scalability for larger models." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "**Node ID:** 172a1344-d48d-443b-8383-677037570c06
**Similarity:** 0.8720929924174893
**Text:** page_label: 1\n", - "file_name: llama2.pdf\n", - "file_path: data/llama2.pdf\n", - "file_type: application/pdf\n", - "file_size: 13661300\n", - "creation_date: 2024-02-17\n", - "last_modified_date: 2024-02-17\n", - "last_accessed_date: 2024-02-17\n", - "context_summary: Llama 2 is a collection of pretrained and fine-tuned large language models optimized for dialogue use cases, ranging from 7 billion to 70 billion parameters. The models, known as Llama 2-Chat, have shown superior performance compared to open-source chat models on various benchmarks and are considered as potential alternatives to closed-source models.\n", - "\n", - "Llama 2 : Open Foundation and Fine-Tuned Chat Models\n", - "Hugo Touvron∗Louis Martin†Kevin Stone†\n", - "Peter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra\n", - "Prajjwal Bhargava Shruti Bhosale Dan Bikel Lukas Blecher Cristian Canton Ferrer Moya Chen\n", - "Guillem Cucurull David Esiobu Jude Fernandes Jeremy Fu Wenyin Fu Brian Fuller\n", - "Cynthia Gao Vedanuj Goswami Naman Goyal Anthony Hartshorn Saghar Hosseini Rui Hou\n", - "Hakan Inan Marcin Kardas Viktor Kerkez Madian Khabsa Isabel Kloumann Artem Korenev\n", - "Punit Singh Koura Marie-Anne Lachaux Thibaut Lavril Jenya Lee Diana Liskovich\n", - "Yinghai Lu Yuning Mao Xavier Martinet Todor Mihaylov Pushkar Mishra\n", - "Igor Molybog Yixin Nie Andrew Poulton Jeremy Reizenstein Rashi Rungta Kalyan Saladi\n", - "Alan Schelten Ruan Silva Eric Michael Smith Ranjan Subramanian Xiaoqing Ellen Tan Binh Tang\n", - "Ross Taylor Adina Williams Jian Xiang Kuan Puxin Xu Zheng Yan Iliyan Zarov Yuchen Zhang\n", - "Angela Fan Melanie Kambadur Sharan Narang Aurelien Rodriguez Robert Stojnic\n", - "Sergey Edunov Thomas Scialom∗\n", - "GenAI, Meta\n", - "Abstract\n", - "In this work, we develop and release Llama 2, a collection of pretrained and fine-tuned\n", - "large language models (LLMs) ranging in scale from 7 billion to 70 billion parameters.\n", - "Our fine-tuned LLMs, called Llama 2-Chat , are optimized for dialogue use cases. Our\n", - "models outperform open-source chat models on most benchmarks we tested, and based on\n", - "ourhumanevaluationsforhelpfulnessandsafety,maybeasuitablesubstituteforclosed-\n", - "source models. We provide a detailed description of our approach to fine-tuning and safety\n", - "improvements of Llama 2-Chat in order to enable the community to build on our work and\n", - "contribute to the responsible development of LLMs.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "**Node ID:** dbbde2a7-d51c-4245-959d-ba97ba414b55
**Similarity:** 0.8700958215249326
**Text:** page_label: 5\n", - "file_name: llama2.pdf\n", - "file_path: data/llama2.pdf\n", - "file_type: application/pdf\n", - "file_size: 13661300\n", - "creation_date: 2024-02-17\n", - "last_modified_date: 2024-02-17\n", - "last_accessed_date: 2024-02-17\n", - "context_summary: Llama 2-Chat is developed through pretraining, supervised fine-tuning, and reinforcement learning with human feedback methodologies, focusing on refining the model iteratively. The training process involves using an optimized auto-regressive transformer, robust data cleaning, updated data mixes, and specific architectural enhancements like increased context length and grouped-query attention.\n", - "\n", - "Figure4: Trainingof Llama 2-Chat : Thisprocessbeginswiththe pretraining ofLlama 2 usingpublicly\n", - "availableonlinesources. Followingthis,wecreateaninitialversionof Llama 2-Chat throughtheapplication\n", - "ofsupervised fine-tuning . Subsequently, the model is iteratively refined using Reinforcement Learning\n", - "with Human Feedback (RLHF) methodologies, specifically through rejection sampling and Proximal Policy\n", - "Optimization(PPO).ThroughouttheRLHFstage,theaccumulationof iterativerewardmodelingdata in\n", - "parallel with model enhancements is crucial to ensure the reward models remain within distribution.\n", - "2 Pretraining\n", - "Tocreatethenewfamilyof Llama 2models,webeganwiththepretrainingapproachdescribedinTouvronetal.\n", - "(2023), using an optimized auto-regressive transformer, but made several changes to improve performance.\n", - "Specifically,weperformedmorerobustdatacleaning,updatedourdatamixes,trainedon40%moretotal\n", - "tokens,doubledthecontextlength,andusedgrouped-queryattention(GQA)toimproveinferencescalability\n", - "for our larger models. Table 1 compares the attributes of the new Llama 2 models with the Llama 1 models.\n", - "2.1 Pretraining Data\n", - "Our training corpus includes a new mix of data from publicly available sources, which does not include data\n", - "fromMeta’sproductsorservices. Wemadeanefforttoremovedatafromcertainsitesknowntocontaina\n", - "highvolumeofpersonalinformationaboutprivateindividuals. Wetrainedon2trilliontokensofdataasthis\n", - "providesagoodperformance–costtrade-off,up-samplingthemostfactualsourcesinanefforttoincrease\n", - "knowledge and dampen hallucinations.\n", - "Weperformedavarietyofpretrainingdatainvestigationssothatuserscanbetterunderstandthepotential\n", - "capabilities and limitations of our models; results can be found in Section 4.1.\n", - "2.2 Training Details\n", - "We adopt most of the pretraining setting and model architecture from Llama 1 .
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from IPython.display import Markdown, display\n", - "from llama_index.core.response.notebook_utils import display_source_node\n", - "\n", - "response = subdoc_summary_pack.run(\"How was Llama2 pretrained?\")\n", - "display(Markdown(str(response)))\n", - "for n in response.source_nodes:\n", - " display_source_node(n, source_length=10000, metadata_mode=\"all\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1181af9d-680f-4ba3-89e2-f88b12a89cc7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/markdown": [ - "The latest ChatGPT model, equipped with Ghost Attention (GAtt), demonstrates strong multi-turn memory ability by consistently referring to defined attributes for up to 20 turns in a conversation. This integration of GAtt in the ChatGPT model allows for efficient long context attention beyond 2048 tokens, showcasing potential for robust performance in handling extended contexts." - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "**Node ID:** 005a3c23-8d97-4e5d-957e-98ad2dfb93ad
**Similarity:** 0.7923889627946064
**Text:** page_label: 54\n", - "file_name: llama2.pdf\n", - "file_path: data/llama2.pdf\n", - "file_type: application/pdf\n", - "file_size: 13661300\n", - "creation_date: 2024-02-17\n", - "last_modified_date: 2024-02-17\n", - "last_accessed_date: 2024-02-17\n", - "context_summary: Llama 2-Chat with GAtt consistently refers to defined attributes for up to 20 turns, showcasing strong multi-turn memory ability. The integration of GAtt in Llama 2-Chat enables efficient long context attention beyond 2048 tokens, indicating potential for robust performance in handling extended contexts.\n", - "\n", - "Dialogue Turn Baseline + GAtt\n", - "2 100% 100%\n", - "4 10% 100%\n", - "6 0% 100%\n", - "20 0% 100%\n", - "Table30: GAttresults. Llama 2-Chat withGAttisabletorefertoattributes100%ofthetime,forupto20\n", - "turns from our human evaluation. We limited the evaluated attributes to public figures and hobbies.\n", - "Theattentionnowspansbeyond20turns. Wetestedthemodelabilitytorememberthesystemarguments\n", - "troughahumanevaluation. Thearguments(e.g. hobbies,persona)aredefinedduringthefirstmessage,and\n", - "then from turn 2 to 20. We explicitly asked the model to refer to them (e.g. “What is your favorite hobby?”,\n", - "“Whatisyourname?”),tomeasurethemulti-turnmemoryabilityof Llama 2-Chat . Wereporttheresults\n", - "inTable30. EquippedwithGAtt, Llama 2-Chat maintains100%accuracy,alwaysreferringtothedefined\n", - "attribute,andso,upto20turns(wedidnotextendthehumanevaluationmore,andalltheexampleshad\n", - "lessthan4048tokensintotalovertheturns). Asacomparison, Llama 2-Chat withoutGAttcannotanymore\n", - "refer to the attributes after only few turns: from 100% at turn t+1, to 10% at turn t+3 and then 0%.\n", - "GAttZero-shotGeneralisation. Wetriedatinferencetimetosetconstrainnotpresentinthetrainingof\n", - "GAtt. For instance, “answer in one sentence only”, for which the model remained consistent, as illustrated in\n", - "Figure 28.\n", - "We applied first GAtt to Llama 1 , which was pretrained with a context length of 2048 tokens and then\n", - "fine-tuned with 4096 max length. We tested if GAtt works beyond 2048 tokens, and the model arguably\n", - "managed to understand attributes beyond this window. This promising result indicates that GAtt could be\n", - "adapted as an efficient technique for long context attention.\n", - "A.3.6 How Far Can Model-Based Evaluation Go?
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/markdown": [ - "**Node ID:** 0b1719e9-d7fa-42af-890b-5eeb946857c5
**Similarity:** 0.7837282816384877
**Text:** page_label: 16\n", - "file_name: llama2.pdf\n", - "file_path: data/llama2.pdf\n", - "file_type: application/pdf\n", - "file_size: 13661300\n", - "creation_date: 2024-02-17\n", - "last_modified_date: 2024-02-17\n", - "last_accessed_date: 2024-02-17\n", - "context_summary: The text discusses the challenges faced in maintaining multi-turn consistency in dialogue systems and introduces a method called Ghost Attention (GAtt) to address these issues. GAtt involves incorporating instructions throughout a conversation to ensure dialogue control over multiple turns.\n", - "\n", - "Figure 9: Issues with multi-turn memory (left)can be improved with GAtt (right).\n", - "We train for between 200and400iterations for all our models, and use evaluations on held-out prompts for\n", - "earlystopping. EachiterationofPPOonthe70Bmodeltakesonaverage ≈330seconds. Totrainquicklywith\n", - "large batch sizes, we use FSDP (Zhao et al., 2023). This was effective when using O(1) forward or backward\n", - "passes,butcausedalargeslowdown( ≈20×)duringgeneration,evenwhenusingalargebatchsizeandKV\n", - "cache. We were able to mitigate this by consolidating the model weights to each node once before generation\n", - "and then freeing the memory after generation, resuming the rest of the training loop.\n", - "3.3 System Message for Multi-Turn Consistency\n", - "In a dialogue setup, some instructions should apply for all the conversation turns, e.g., to respond succinctly,\n", - "or to“act as”some public figure. When we provided such instructions to Llama 2-Chat , the subsequent\n", - "response should always respect the constraint. However, our initial RLHF models tended to forget the initial\n", - "instruction after a few turns of dialogue, as illustrated in Figure 9 (left).\n", - "To address these limitations, we propose Ghost Attention (GAtt), a very simple method inspired by Context\n", - "Distillation (Bai et al., 2022b) that hacks the fine-tuning data to help the attention focus in a multi-stage\n", - "process. GAtt enables dialogue control over multiple turns, as illustrated in Figure 9 (right).\n", - "GAttMethod. Assumewe haveaccess toa multi-turndialoguedataset betweentwo persons(e.g., auser\n", - "and an assistant), with a list of messages [u1, a1, . . . , u n, an], where unandancorrespond to the user and\n", - "assistant messages for turn n, respectively. Then, we define an instruction, inst, that should be respected\n", - "throughout the dialogue. For example, instcould be “act as.” We can then synthetically concatenate this\n", - "instruction to all the user messages of the conversation.\n", - "Next, we can sample from this synthetic data using the latest RLHF model.
" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], - "source": [ - "from IPython.display import Markdown, display\n", - "\n", - "response = subdoc_summary_pack.run(\n", - " \"What is the functionality of latest ChatGPT memory.\"\n", - ")\n", - "display(Markdown(str(response)))\n", - "\n", - "for n in response.source_nodes:\n", - " display_source_node(n, source_length=10000, metadata_mode=\"all\")" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "llama_index_v3", - "language": "python", - "name": "llama_index_v3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/llama-index-packs/llama-index-packs-subdoc-summary/llama_index/packs/subdoc_summary/BUILD b/llama-index-packs/llama-index-packs-subdoc-summary/llama_index/packs/subdoc_summary/BUILD deleted file mode 100644 index db46e8d6c978c..0000000000000 --- a/llama-index-packs/llama-index-packs-subdoc-summary/llama_index/packs/subdoc_summary/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_sources() diff --git a/llama-index-packs/llama-index-packs-subdoc-summary/llama_index/packs/subdoc_summary/__init__.py b/llama-index-packs/llama-index-packs-subdoc-summary/llama_index/packs/subdoc_summary/__init__.py deleted file mode 100644 index d1e5d547adc6b..0000000000000 --- a/llama-index-packs/llama-index-packs-subdoc-summary/llama_index/packs/subdoc_summary/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from llama_index.packs.subdoc_summary.base import SubDocSummaryPack - - -__all__ = ["SubDocSummaryPack"] diff --git a/llama-index-packs/llama-index-packs-subdoc-summary/llama_index/packs/subdoc_summary/base.py b/llama-index-packs/llama-index-packs-subdoc-summary/llama_index/packs/subdoc_summary/base.py deleted file mode 100644 index db2df79cadd73..0000000000000 --- a/llama-index-packs/llama-index-packs-subdoc-summary/llama_index/packs/subdoc_summary/base.py +++ /dev/null @@ -1,93 +0,0 @@ -"""Subdoc Summary.""" - -from typing import Any, Dict, List, Optional, List - -from llama_index.core.llama_pack import BaseLlamaPack -from llama_index.core.schema import Document -from llama_index.core.text_splitter import SentenceSplitter -from llama_index.core.utils import print_text -from llama_index.core import SummaryIndex, VectorStoreIndex -from llama_index.core.embeddings import BaseEmbedding -from llama_index.core.llms import LLM - - -DEFAULT_SUMMARY_PROMPT_STR = """\ -Please give a concise summary of the context in 1-2 sentences. -""" - - -class SubDocSummaryPack(BaseLlamaPack): - """Pack for injecting sub-doc metadata into each chunk.""" - - def __init__( - self, - documents: List[Document], - parent_chunk_size: int = 8192, - parent_chunk_overlap: int = 512, - child_chunk_size: int = 512, - child_chunk_overlap: int = 32, - summary_prompt_str: str = DEFAULT_SUMMARY_PROMPT_STR, - verbose: bool = False, - embed_model: Optional[BaseEmbedding] = None, - llm: Optional[LLM] = None, - ) -> None: - """Init params.""" - self.parent_chunk_size = parent_chunk_size - self.child_chunk_size = child_chunk_size - - self.parent_splitter = SentenceSplitter( - chunk_size=parent_chunk_size, chunk_overlap=parent_chunk_overlap - ) - self.child_splitter = SentenceSplitter( - chunk_size=child_chunk_size, chunk_overlap=child_chunk_overlap - ) - - self.summary_prompt_str = summary_prompt_str - self.embed_model = embed_model - self.llm = llm - - parent_nodes = self.parent_splitter.get_nodes_from_documents(documents) - all_child_nodes = [] - # For each parent node, extract the child nodes and print the text - for idx, parent_node in enumerate(parent_nodes): - if verbose: - print_text( - f"> Processing parent chunk {idx + 1} of {len(parent_nodes)}\n", - color="blue", - ) - # get summary - summary_index = SummaryIndex([parent_node]) - summary_query_engine = summary_index.as_query_engine( - response_mode="tree_summarize" - ) - parent_summary = summary_query_engine.query(DEFAULT_SUMMARY_PROMPT_STR) - if verbose: - print_text(f"Extracted summary: {parent_summary}\n", color="pink") - - # attach summary to all child nodes - child_nodes = self.child_splitter.get_nodes_from_documents([parent_node]) - for child_node in child_nodes: - child_node.metadata["context_summary"] = str(parent_summary) - - all_child_nodes.extend(child_nodes) - - # build vector index for child nodes - self.vector_index = VectorStoreIndex( - all_child_nodes, embed_model=self.embed_model - ) - self.vector_retriever = self.vector_index.as_retriever() - self.vector_query_engine = self.vector_index.as_query_engine(llm=llm) - - self.verbose = verbose - - def get_modules(self) -> Dict[str, Any]: - """Get modules.""" - return { - "vector_index": self.vector_index, - "vector_retriever": self.vector_retriever, - "vector_query_engine": self.vector_query_engine, - } - - def run(self, *args: Any, **kwargs: Any) -> Any: - """Run the pipeline.""" - return self.vector_query_engine.query(*args, **kwargs) diff --git a/llama-index-packs/llama-index-packs-subdoc-summary/pyproject.toml b/llama-index-packs/llama-index-packs-subdoc-summary/pyproject.toml deleted file mode 100644 index ed338121c5aa5..0000000000000 --- a/llama-index-packs/llama-index-packs-subdoc-summary/pyproject.toml +++ /dev/null @@ -1,62 +0,0 @@ -[build-system] -build-backend = "poetry.core.masonry.api" -requires = ["poetry-core"] - -[tool.codespell] -check-filenames = true -check-hidden = true -skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" - -[tool.llamahub] -contains_example = false -import_path = "llama_index.packs.subdoc_summary" - -[tool.llamahub.class_authors] -SubDocSummaryPack = "llama-index" - -[tool.mypy] -disallow_untyped_defs = true -exclude = ["_static", "build", "examples", "notebooks", "venv"] -ignore_missing_imports = true -python_version = "3.8" - -[tool.poetry] -authors = ["Your Name "] -description = "llama-index packs subdoc-summary implementation" -exclude = ["**/BUILD"] -license = "MIT" -name = "llama-index-packs-subdoc-summary" -readme = "README.md" -version = "0.3.0" - -[tool.poetry.dependencies] -python = ">=3.9,<4.0" -llama-index-core = "^0.12.0" - -[tool.poetry.group.dev.dependencies] -ipython = "8.10.0" -jupyter = "^1.0.0" -mypy = "0.991" -pre-commit = "3.2.0" -pylint = "2.15.10" -pytest = "7.2.1" -pytest-mock = "3.11.1" -ruff = "0.0.292" -tree-sitter-languages = "^1.8.0" -types-Deprecated = ">=0.1.0" -types-PyYAML = "^6.0.12.12" -types-protobuf = "^4.24.0.4" -types-redis = "4.5.5.0" -types-requests = "2.28.11.8" -types-setuptools = "67.1.0.0" - -[tool.poetry.group.dev.dependencies.black] -extras = ["jupyter"] -version = "<=23.9.1,>=23.7.0" - -[tool.poetry.group.dev.dependencies.codespell] -extras = ["toml"] -version = ">=v2.2.6" - -[[tool.poetry.packages]] -include = "llama_index/" diff --git a/llama-index-packs/llama-index-packs-subdoc-summary/tests/__init__.py b/llama-index-packs/llama-index-packs-subdoc-summary/tests/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/llama-index-packs/llama-index-packs-vanna/.gitignore b/llama-index-packs/llama-index-packs-vanna/.gitignore deleted file mode 100644 index 990c18de22908..0000000000000 --- a/llama-index-packs/llama-index-packs-vanna/.gitignore +++ /dev/null @@ -1,153 +0,0 @@ -llama_index/_static -.DS_Store -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -bin/ -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -etc/ -include/ -lib/ -lib64/ -parts/ -sdist/ -share/ -var/ -wheels/ -pip-wheel-metadata/ -share/python-wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.nox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -*.py,cover -.hypothesis/ -.pytest_cache/ -.ruff_cache - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 -db.sqlite3-journal - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints -notebooks/ - -# IPython -profile_default/ -ipython_config.py - -# pyenv -.python-version - -# pipenv -# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. -# However, in case of collaboration, if having platform-specific dependencies or dependencies -# having no cross-platform support, pipenv may install dependencies that don't work, or not -# install all needed dependencies. -#Pipfile.lock - -# PEP 582; used by e.g. github.com/David-OConnor/pyflow -__pypackages__/ - -# Celery stuff -celerybeat-schedule -celerybeat.pid - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ -pyvenv.cfg - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ -.dmypy.json -dmypy.json - -# Pyre type checker -.pyre/ - -# Jetbrains -.idea -modules/ -*.swp - -# VsCode -.vscode - -# pipenv -Pipfile -Pipfile.lock - -# pyright -pyrightconfig.json diff --git a/llama-index-packs/llama-index-packs-vanna/BUILD b/llama-index-packs/llama-index-packs-vanna/BUILD deleted file mode 100644 index 0896ca890d8bf..0000000000000 --- a/llama-index-packs/llama-index-packs-vanna/BUILD +++ /dev/null @@ -1,3 +0,0 @@ -poetry_requirements( - name="poetry", -) diff --git a/llama-index-packs/llama-index-packs-vanna/CHANGELOG.md b/llama-index-packs/llama-index-packs-vanna/CHANGELOG.md deleted file mode 100644 index 36bff877abcbe..0000000000000 --- a/llama-index-packs/llama-index-packs-vanna/CHANGELOG.md +++ /dev/null @@ -1,5 +0,0 @@ -# CHANGELOG - -## [0.1.2] - 2024-02-13 - -- Add maintainers and keywords from library.json (llamahub) diff --git a/llama-index-packs/llama-index-packs-vanna/Makefile b/llama-index-packs/llama-index-packs-vanna/Makefile deleted file mode 100644 index b9eab05aa3706..0000000000000 --- a/llama-index-packs/llama-index-packs-vanna/Makefile +++ /dev/null @@ -1,17 +0,0 @@ -GIT_ROOT ?= $(shell git rev-parse --show-toplevel) - -help: ## Show all Makefile targets. - @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[33m%-30s\033[0m %s\n", $$1, $$2}' - -format: ## Run code autoformatters (black). - pre-commit install - git ls-files | xargs pre-commit run black --files - -lint: ## Run linters: pre-commit (black, ruff, codespell) and mypy - pre-commit install && git ls-files | xargs pre-commit run --show-diff-on-failure --files - -test: ## Run tests via pytest. - pytest tests - -watch-docs: ## Build and watch documentation. - sphinx-autobuild docs/ docs/_build/html --open-browser --watch $(GIT_ROOT)/llama_index/ diff --git a/llama-index-packs/llama-index-packs-vanna/README.md b/llama-index-packs/llama-index-packs-vanna/README.md deleted file mode 100644 index 42fde06f552d0..0000000000000 --- a/llama-index-packs/llama-index-packs-vanna/README.md +++ /dev/null @@ -1,55 +0,0 @@ -# Vanna AI LLamaPack - -Vanna AI is an open-source RAG framework for SQL generation. It works in two steps: - -1. Train a RAG model on your data -2. Ask questions (use reference corpus to generate SQL queries that can run on your db). - -Check out the [Github project](https://github.com/vanna-ai/vanna) and the [docs](https://vanna.ai/docs/) for more details. - -This LlamaPack creates a simple `VannaQueryEngine` with vanna, ChromaDB and OpenAI, and allows you to train and ask questions over a SQL database. - -## CLI Usage - -You can download llamapacks directly using `llamaindex-cli`, which comes installed with the `llama-index` python package: - -```bash -llamaindex-cli download-llamapack VannaPack --download-dir ./vanna_pack -``` - -You can then inspect the files at `./vanna_pack` and use them as a template for your own project! - -## Code Usage - -You can download the pack to a `./vanna_pack` directory: - -```python -from llama_index.core.llama_pack import download_llama_pack - -# download and install dependencies -VannaPack = download_llama_pack("VannaPack", "./vanna_pack") -``` - -From here, you can use the pack, or inspect and modify the pack in `./vanna_pack`. - -Then, you can set up the pack like so: - -```python -pack = VannaPack( - openai_api_key="", - sql_db_url="chinook.db", - openai_model="gpt-3.5-turbo", -) -``` - -The `run()` function is a light wrapper around `llm.complete()`. - -```python -response = pack.run("List some sample albums") -``` - -You can also use modules individually. - -```python -query_engine = pack.get_modules()["vanna_query_engine"] -``` diff --git a/llama-index-packs/llama-index-packs-vanna/examples/vanna.ipynb b/llama-index-packs/llama-index-packs-vanna/examples/vanna.ipynb deleted file mode 100644 index f6f3fff2d0076..0000000000000 --- a/llama-index-packs/llama-index-packs-vanna/examples/vanna.ipynb +++ /dev/null @@ -1,427 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "d159d5b6-d456-46ba-8eb6-b88217de2940", - "metadata": {}, - "source": [ - "# Vanna AI LlamaPack\n", - "\n", - "Vanna AI is an open-source RAG framework for SQL generation. It works in two steps:\n", - "1. Train a RAG model on your data\n", - "2. Ask questions (use reference corpus to generate SQL queries that can run on your db).\n", - "\n", - "Check out the [Github project](https://github.com/vanna-ai/vanna) and the [docs](https://vanna.ai/docs/) for more details.\n", - "\n", - "This LlamaPack creates a simple `VannaQueryEngine` with vanna, ChromaDB and OpenAI, and allows you to train and ask questions over a SQL database." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "41b2ac47", - "metadata": {}, - "outputs": [], - "source": [ - "%pip install llama-index-packs-vanna" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "24495652-d1e8-48a6-916f-0c947605c5b8", - "metadata": {}, - "outputs": [], - "source": [ - "# Option: if developing with the llama_hub package\n", - "from llama_index.packs.vanna import VannaPack\n", - "\n", - "# Option: download_llama_pack\n", - "from llama_index.core.llama_pack import download_llama_pack\n", - "\n", - "VannaPack = download_llama_pack(\n", - " \"VannaPack\",\n", - " \"./vanna_pack\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e544a40d-1283-4778-aa44-eb4cb801b506", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "> Connected to db: chinook.db\n", - "> Training on CREATE TABLE \"albums\"\n", - "(\n", - " [AlbumId] INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,\n", - " [Title] NVARCHAR(160) NOT NULL,\n", - " [ArtistId] INTEGER NOT NULL,\n", - " FOREIGN KEY ([ArtistId]) REFERENCES \"artists\" ([ArtistId]) \n", - "\t\tON DELETE NO ACTION ON UPDATE NO ACTION\n", - ")\n", - "Adding ddl: CREATE TABLE \"albums\"\n", - "(\n", - " [AlbumId] INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,\n", - " [Title] NVARCHAR(160) NOT NULL,\n", - " [ArtistId] INTEGER NOT NULL,\n", - " FOREIGN KEY ([ArtistId]) REFERENCES \"artists\" ([ArtistId]) \n", - "\t\tON DELETE NO ACTION ON UPDATE NO ACTION\n", - ")\n", - "> Training on CREATE TABLE sqlite_sequence(name,seq)\n", - "Adding ddl: CREATE TABLE sqlite_sequence(name,seq)\n", - "> Training on CREATE TABLE \"artists\"\n", - "(\n", - " [ArtistId] INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,\n", - " [Name] NVARCHAR(120)\n", - ")\n", - "Adding ddl: CREATE TABLE \"artists\"\n", - "(\n", - " [ArtistId] INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,\n", - " [Name] NVARCHAR(120)\n", - ")\n", - "> Training on CREATE TABLE \"customers\"\n", - "(\n", - " [CustomerId] INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,\n", - " [FirstName] NVARCHAR(40) NOT NULL,\n", - " [LastName] NVARCHAR(20) NOT NULL,\n", - " [Company] NVARCHAR(80),\n", - " [Address] NVARCHAR(70),\n", - " [City] NVARCHAR(40),\n", - " [State] NVARCHAR(40),\n", - " [Country] NVARCHAR(40),\n", - " [PostalCode] NVARCHAR(10),\n", - " [Phone] NVARCHAR(24),\n", - " [Fax] NVARCHAR(24),\n", - " [Email] NVARCHAR(60) NOT NULL,\n", - " [SupportRepId] INTEGER,\n", - " FOREIGN KEY ([SupportRepId]) REFERENCES \"employees\" ([EmployeeId]) \n", - "\t\tON DELETE NO ACTION ON UPDATE NO ACTION\n", - ")\n", - "Adding ddl: CREATE TABLE \"customers\"\n", - "(\n", - " [CustomerId] INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,\n", - " [FirstName] NVARCHAR(40) NOT NULL,\n", - " [LastName] NVARCHAR(20) NOT NULL,\n", - " [Company] NVARCHAR(80),\n", - " [Address] NVARCHAR(70),\n", - " [City] NVARCHAR(40),\n", - " [State] NVARCHAR(40),\n", - " [Country] NVARCHAR(40),\n", - " [PostalCode] NVARCHAR(10),\n", - " [Phone] NVARCHAR(24),\n", - " [Fax] NVARCHAR(24),\n", - " [Email] NVARCHAR(60) NOT NULL,\n", - " [SupportRepId] INTEGER,\n", - " FOREIGN KEY ([SupportRepId]) REFERENCES \"employees\" ([EmployeeId]) \n", - "\t\tON DELETE NO ACTION ON UPDATE NO ACTION\n", - ")\n", - "> Training on CREATE TABLE \"employees\"\n", - "(\n", - " [EmployeeId] INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,\n", - " [LastName] NVARCHAR(20) NOT NULL,\n", - " [FirstName] NVARCHAR(20) NOT NULL,\n", - " [Title] NVARCHAR(30),\n", - " [ReportsTo] INTEGER,\n", - " [BirthDate] DATETIME,\n", - " [HireDate] DATETIME,\n", - " [Address] NVARCHAR(70),\n", - " [City] NVARCHAR(40),\n", - " [State] NVARCHAR(40),\n", - " [Country] NVARCHAR(40),\n", - " [PostalCode] NVARCHAR(10),\n", - " [Phone] NVARCHAR(24),\n", - " [Fax] NVARCHAR(24),\n", - " [Email] NVARCHAR(60),\n", - " FOREIGN KEY ([ReportsTo]) REFERENCES \"employees\" ([EmployeeId]) \n", - "\t\tON DELETE NO ACTION ON UPDATE NO ACTION\n", - ")\n", - "Adding ddl: CREATE TABLE \"employees\"\n", - "(\n", - " [EmployeeId] INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,\n", - " [LastName] NVARCHAR(20) NOT NULL,\n", - " [FirstName] NVARCHAR(20) NOT NULL,\n", - " [Title] NVARCHAR(30),\n", - " [ReportsTo] INTEGER,\n", - " [BirthDate] DATETIME,\n", - " [HireDate] DATETIME,\n", - " [Address] NVARCHAR(70),\n", - " [City] NVARCHAR(40),\n", - " [State] NVARCHAR(40),\n", - " [Country] NVARCHAR(40),\n", - " [PostalCode] NVARCHAR(10),\n", - " [Phone] NVARCHAR(24),\n", - " [Fax] NVARCHAR(24),\n", - " [Email] NVARCHAR(60),\n", - " FOREIGN KEY ([ReportsTo]) REFERENCES \"employees\" ([EmployeeId]) \n", - "\t\tON DELETE NO ACTION ON UPDATE NO ACTION\n", - ")\n", - "> Training on CREATE TABLE \"genres\"\n", - "(\n", - " [GenreId] INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,\n", - " [Name] NVARCHAR(120)\n", - ")\n", - "Adding ddl: CREATE TABLE \"genres\"\n", - "(\n", - " [GenreId] INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,\n", - " [Name] NVARCHAR(120)\n", - ")\n", - "> Training on CREATE TABLE \"invoices\"\n", - "(\n", - " [InvoiceId] INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,\n", - " [CustomerId] INTEGER NOT NULL,\n", - " [InvoiceDate] DATETIME NOT NULL,\n", - " [BillingAddress] NVARCHAR(70),\n", - " [BillingCity] NVARCHAR(40),\n", - " [BillingState] NVARCHAR(40),\n", - " [BillingCountry] NVARCHAR(40),\n", - " [BillingPostalCode] NVARCHAR(10),\n", - " [Total] NUMERIC(10,2) NOT NULL,\n", - " FOREIGN KEY ([CustomerId]) REFERENCES \"customers\" ([CustomerId]) \n", - "\t\tON DELETE NO ACTION ON UPDATE NO ACTION\n", - ")\n", - "Adding ddl: CREATE TABLE \"invoices\"\n", - "(\n", - " [InvoiceId] INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,\n", - " [CustomerId] INTEGER NOT NULL,\n", - " [InvoiceDate] DATETIME NOT NULL,\n", - " [BillingAddress] NVARCHAR(70),\n", - " [BillingCity] NVARCHAR(40),\n", - " [BillingState] NVARCHAR(40),\n", - " [BillingCountry] NVARCHAR(40),\n", - " [BillingPostalCode] NVARCHAR(10),\n", - " [Total] NUMERIC(10,2) NOT NULL,\n", - " FOREIGN KEY ([CustomerId]) REFERENCES \"customers\" ([CustomerId]) \n", - "\t\tON DELETE NO ACTION ON UPDATE NO ACTION\n", - ")\n", - "> Training on CREATE TABLE \"invoice_items\"\n", - "(\n", - " [InvoiceLineId] INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,\n", - " [InvoiceId] INTEGER NOT NULL,\n", - " [TrackId] INTEGER NOT NULL,\n", - " [UnitPrice] NUMERIC(10,2) NOT NULL,\n", - " [Quantity] INTEGER NOT NULL,\n", - " FOREIGN KEY ([InvoiceId]) REFERENCES \"invoices\" ([InvoiceId]) \n", - "\t\tON DELETE NO ACTION ON UPDATE NO ACTION,\n", - " FOREIGN KEY ([TrackId]) REFERENCES \"tracks\" ([TrackId]) \n", - "\t\tON DELETE NO ACTION ON UPDATE NO ACTION\n", - ")\n", - "Adding ddl: CREATE TABLE \"invoice_items\"\n", - "(\n", - " [InvoiceLineId] INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,\n", - " [InvoiceId] INTEGER NOT NULL,\n", - " [TrackId] INTEGER NOT NULL,\n", - " [UnitPrice] NUMERIC(10,2) NOT NULL,\n", - " [Quantity] INTEGER NOT NULL,\n", - " FOREIGN KEY ([InvoiceId]) REFERENCES \"invoices\" ([InvoiceId]) \n", - "\t\tON DELETE NO ACTION ON UPDATE NO ACTION,\n", - " FOREIGN KEY ([TrackId]) REFERENCES \"tracks\" ([TrackId]) \n", - "\t\tON DELETE NO ACTION ON UPDATE NO ACTION\n", - ")\n", - "> Training on CREATE TABLE \"media_types\"\n", - "(\n", - " [MediaTypeId] INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,\n", - " [Name] NVARCHAR(120)\n", - ")\n", - "Adding ddl: CREATE TABLE \"media_types\"\n", - "(\n", - " [MediaTypeId] INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,\n", - " [Name] NVARCHAR(120)\n", - ")\n", - "> Training on CREATE TABLE \"playlists\"\n", - "(\n", - " [PlaylistId] INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,\n", - " [Name] NVARCHAR(120)\n", - ")\n", - "Adding ddl: CREATE TABLE \"playlists\"\n", - "(\n", - " [PlaylistId] INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,\n", - " [Name] NVARCHAR(120)\n", - ")\n", - "> Training on CREATE TABLE \"playlist_track\"\n", - "(\n", - " [PlaylistId] INTEGER NOT NULL,\n", - " [TrackId] INTEGER NOT NULL,\n", - " CONSTRAINT [PK_PlaylistTrack] PRIMARY KEY ([PlaylistId], [TrackId]),\n", - " FOREIGN KEY ([PlaylistId]) REFERENCES \"playlists\" ([PlaylistId]) \n", - "\t\tON DELETE NO ACTION ON UPDATE NO ACTION,\n", - " FOREIGN KEY ([TrackId]) REFERENCES \"tracks\" ([TrackId]) \n", - "\t\tON DELETE NO ACTION ON UPDATE NO ACTION\n", - ")\n", - "Adding ddl: CREATE TABLE \"playlist_track\"\n", - "(\n", - " [PlaylistId] INTEGER NOT NULL,\n", - " [TrackId] INTEGER NOT NULL,\n", - " CONSTRAINT [PK_PlaylistTrack] PRIMARY KEY ([PlaylistId], [TrackId]),\n", - " FOREIGN KEY ([PlaylistId]) REFERENCES \"playlists\" ([PlaylistId]) \n", - "\t\tON DELETE NO ACTION ON UPDATE NO ACTION,\n", - " FOREIGN KEY ([TrackId]) REFERENCES \"tracks\" ([TrackId]) \n", - "\t\tON DELETE NO ACTION ON UPDATE NO ACTION\n", - ")\n", - "> Training on CREATE TABLE \"tracks\"\n", - "(\n", - " [TrackId] INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,\n", - " [Name] NVARCHAR(200) NOT NULL,\n", - " [AlbumId] INTEGER,\n", - " [MediaTypeId] INTEGER NOT NULL,\n", - " [GenreId] INTEGER,\n", - " [Composer] NVARCHAR(220),\n", - " [Milliseconds] INTEGER NOT NULL,\n", - " [Bytes] INTEGER,\n", - " [UnitPrice] NUMERIC(10,2) NOT NULL,\n", - " FOREIGN KEY ([AlbumId]) REFERENCES \"albums\" ([AlbumId]) \n", - "\t\tON DELETE NO ACTION ON UPDATE NO ACTION,\n", - " FOREIGN KEY ([GenreId]) REFERENCES \"genres\" ([GenreId]) \n", - "\t\tON DELETE NO ACTION ON UPDATE NO ACTION,\n", - " FOREIGN KEY ([MediaTypeId]) REFERENCES \"media_types\" ([MediaTypeId]) \n", - "\t\tON DELETE NO ACTION ON UPDATE NO ACTION\n", - ")\n", - "Adding ddl: CREATE TABLE \"tracks\"\n", - "(\n", - " [TrackId] INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL,\n", - " [Name] NVARCHAR(200) NOT NULL,\n", - " [AlbumId] INTEGER,\n", - " [MediaTypeId] INTEGER NOT NULL,\n", - " [GenreId] INTEGER,\n", - " [Composer] NVARCHAR(220),\n", - " [Milliseconds] INTEGER NOT NULL,\n", - " [Bytes] INTEGER,\n", - " [UnitPrice] NUMERIC(10,2) NOT NULL,\n", - " FOREIGN KEY ([AlbumId]) REFERENCES \"albums\" ([AlbumId]) \n", - "\t\tON DELETE NO ACTION ON UPDATE NO ACTION,\n", - " FOREIGN KEY ([GenreId]) REFERENCES \"genres\" ([GenreId]) \n", - "\t\tON DELETE NO ACTION ON UPDATE NO ACTION,\n", - " FOREIGN KEY ([MediaTypeId]) REFERENCES \"media_types\" ([MediaTypeId]) \n", - "\t\tON DELETE NO ACTION ON UPDATE NO ACTION\n", - ")\n", - "> Training on CREATE TABLE sqlite_stat1(tbl,idx,stat)\n", - "Adding ddl: CREATE TABLE sqlite_stat1(tbl,idx,stat)\n" - ] - } - ], - "source": [ - "pack = VannaPack(\n", - " openai_api_key=\"sk-...\",\n", - " sql_db_url=\"chinook.db\",\n", - " openai_model=\"gpt-3.5-turbo\",\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e63e8a61-1fcf-40f9-b851-654cceb4d21e", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Number of requested results 10 is greater than number of elements in index 9, updating n_results = 9\n", - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using model gpt-3.5-turbo for 1401.5 tokens (approx)\n" - ] - } - ], - "source": [ - "response = pack.run(\"What are some sample albums?\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "c5815dbc-408a-49fc-b471-2863921a764f", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Title\n" - ] - } - ], - "source": [ - "print(str(response))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8346e897-b8d3-4a76-965f-42d3836c052a", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Number of requested results 10 is greater than number of elements in index 9, updating n_results = 9\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Using model gpt-3.5-turbo for 1401.5 tokens (approx)\n" - ] - } - ], - "source": [ - "tmp = pack.get_modules()[\"vanna_query_engine\"].vn.ask(\n", - " \"What are some sample albums?\", visualize=False, print_results=False\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e7a87edb-b285-4f79-8cca-110a73758880", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n" - ] - } - ], - "source": [ - "print(type(tmp))" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "llama_hub", - "language": "python", - "name": "llama_hub" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/llama-index-packs/llama-index-packs-vanna/llama_index/packs/vanna/BUILD b/llama-index-packs/llama-index-packs-vanna/llama_index/packs/vanna/BUILD deleted file mode 100644 index db46e8d6c978c..0000000000000 --- a/llama-index-packs/llama-index-packs-vanna/llama_index/packs/vanna/BUILD +++ /dev/null @@ -1 +0,0 @@ -python_sources() diff --git a/llama-index-packs/llama-index-packs-vanna/llama_index/packs/vanna/__init__.py b/llama-index-packs/llama-index-packs-vanna/llama_index/packs/vanna/__init__.py deleted file mode 100644 index 16edf064c6f2c..0000000000000 --- a/llama-index-packs/llama-index-packs-vanna/llama_index/packs/vanna/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from llama_index.packs.vanna.base import VannaQueryEngine, VannaPack - - -__all__ = ["VannaPack", "VannaQueryEngine"] diff --git a/llama-index-packs/llama-index-packs-vanna/llama_index/packs/vanna/base.py b/llama-index-packs/llama-index-packs-vanna/llama_index/packs/vanna/base.py deleted file mode 100644 index 68f2f361a42ff..0000000000000 --- a/llama-index-packs/llama-index-packs-vanna/llama_index/packs/vanna/base.py +++ /dev/null @@ -1,117 +0,0 @@ -"""Vanna AI Pack. - -Uses: https://vanna.ai/. - -""" - -from typing import Any, Dict, Optional, cast - -from llama_index.core.llama_pack.base import BaseLlamaPack -from llama_index.core.query_engine import CustomQueryEngine -import pandas as pd -from llama_index.core.base.response.schema import RESPONSE_TYPE, Response - - -class VannaQueryEngine(CustomQueryEngine): - """Vanna query engine. - - Uses chromadb and OpenAI. - - """ - - openai_api_key: str - sql_db_url: str - - ask_kwargs: Dict[str, Any] - vn: Any - - def __init__( - self, - openai_api_key: str, - sql_db_url: str, - openai_model: str = "gpt-3.5-turbo", - ask_kwargs: Optional[Dict[str, Any]] = None, - verbose: bool = True, - **kwargs: Any, - ) -> None: - """Init params.""" - from vanna.openai.openai_chat import OpenAI_Chat - from vanna.chromadb.chromadb_vector import ChromaDB_VectorStore - - class MyVanna(ChromaDB_VectorStore, OpenAI_Chat): - def __init__(self, config: Any = None) -> None: - ChromaDB_VectorStore.__init__(self, config=config) - OpenAI_Chat.__init__(self, config=config) - - vn = MyVanna(config={"api_key": openai_api_key, "model": openai_model}) - vn.connect_to_sqlite(sql_db_url) - if verbose: - print(f"> Connected to db: {sql_db_url}") - - # get every table DDL from db - sql_results = cast( - pd.DataFrame, - vn.run_sql("SELECT sql FROM sqlite_master WHERE type='table';"), - ) - # go through every sql statement, do vn.train(ddl=ddl) on it - for idx, sql_row in sql_results.iterrows(): - if verbose: - print(f"> Training on {sql_row['sql']}") - vn.train(ddl=sql_row["sql"]) - - super().__init__( - openai_api_key=openai_api_key, - sql_db_url=sql_db_url, - vn=vn, - ask_kwargs=ask_kwargs or {}, - **kwargs, - ) - - def custom_query(self, query_str: str) -> RESPONSE_TYPE: - """Query.""" - from vanna.base import VannaBase - - vn = cast(VannaBase, self.vn) - ask_kwargs = {"visualize": False, "print_results": False} - ask_kwargs.update(self.ask_kwargs) - sql = vn.generate_sql( - query_str, - **ask_kwargs, - ) - result = vn.run_sql(sql) - if result is None: - raise ValueError("Vanna returned None.") - sql, df, _ = result - - return Response(response=str(df), metadata={"sql": sql, "df": df}) - - -class VannaPack(BaseLlamaPack): - """Vanna AI pack. - - Uses OpenAI and ChromaDB. Of course Vanna.AI allows you to connect to many more dbs - and use more models - feel free to refer to their page for more details: - https://vanna.ai/docs/snowflake-openai-vanna-vannadb.html - - """ - - def __init__( - self, - openai_api_key: str, - sql_db_url: str, - **kwargs: Any, - ) -> None: - """Init params.""" - self.vanna_query_engine = VannaQueryEngine( - openai_api_key=openai_api_key, sql_db_url=sql_db_url, **kwargs - ) - - def get_modules(self) -> Dict[str, Any]: - """Get modules.""" - return { - "vanna_query_engine": self.vanna_query_engine, - } - - def run(self, *args: Any, **kwargs: Any) -> Any: - """Run the pipeline.""" - return self.vanna_query_engine.query(*args, **kwargs) diff --git a/llama-index-packs/llama-index-packs-vanna/pyproject.toml b/llama-index-packs/llama-index-packs-vanna/pyproject.toml deleted file mode 100644 index fadf553675ccb..0000000000000 --- a/llama-index-packs/llama-index-packs-vanna/pyproject.toml +++ /dev/null @@ -1,67 +0,0 @@ -[build-system] -build-backend = "poetry.core.masonry.api" -requires = ["poetry-core"] - -[tool.codespell] -check-filenames = true -check-hidden = true -skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" - -[tool.llamahub] -contains_example = false -import_path = "llama_index.packs.vanna" - -[tool.llamahub.class_authors] -VannaPack = "jerryjliu" - -[tool.mypy] -disallow_untyped_defs = true -exclude = ["_static", "build", "examples", "notebooks", "venv"] -ignore_missing_imports = true -python_version = "3.8" - -[tool.poetry] -authors = ["Your Name "] -description = "llama-index packs vanna integration" -exclude = ["**/BUILD"] -keywords = ["ai", "sql", "text-to-sql", "vanna"] -license = "MIT" -maintainers = ["jerryjliu"] -name = "llama-index-packs-vanna" -readme = "README.md" -version = "0.3.0" - -[tool.poetry.dependencies] -python = ">=3.9,<4.0" -kaleido = "0.2.1" -vanna = ">0.5.5" -pandas = "*" -llama-index-core = "^0.12.0" - -[tool.poetry.group.dev.dependencies] -ipython = "8.10.0" -jupyter = "^1.0.0" -mypy = "0.991" -pre-commit = "3.2.0" -pylint = "2.15.10" -pytest = "7.2.1" -pytest-mock = "3.11.1" -ruff = "0.0.292" -tree-sitter-languages = "^1.8.0" -types-Deprecated = ">=0.1.0" -types-PyYAML = "^6.0.12.12" -types-protobuf = "^4.24.0.4" -types-redis = "4.5.5.0" -types-requests = "2.28.11.8" -types-setuptools = "67.1.0.0" - -[tool.poetry.group.dev.dependencies.black] -extras = ["jupyter"] -version = "<=23.9.1,>=23.7.0" - -[tool.poetry.group.dev.dependencies.codespell] -extras = ["toml"] -version = ">=v2.2.6" - -[[tool.poetry.packages]] -include = "llama_index/" diff --git a/llama-index-packs/llama-index-packs-vanna/tests/BUILD b/llama-index-packs/llama-index-packs-vanna/tests/BUILD deleted file mode 100644 index 619cac15ff840..0000000000000 --- a/llama-index-packs/llama-index-packs-vanna/tests/BUILD +++ /dev/null @@ -1,3 +0,0 @@ -python_tests( - interpreter_constraints=["==3.9.*", "==3.10.*"], -) diff --git a/llama-index-packs/llama-index-packs-vanna/tests/__init__.py b/llama-index-packs/llama-index-packs-vanna/tests/__init__.py deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/llama-index-packs/llama-index-packs-vanna/tests/test_packs_vanna.py b/llama-index-packs/llama-index-packs-vanna/tests/test_packs_vanna.py deleted file mode 100644 index aa9e0575a3bb2..0000000000000 --- a/llama-index-packs/llama-index-packs-vanna/tests/test_packs_vanna.py +++ /dev/null @@ -1,7 +0,0 @@ -from llama_index.core.llama_pack import BaseLlamaPack -from llama_index.packs.vanna import VannaPack - - -def test_class(): - names_of_base_classes = [b.__name__ for b in VannaPack.__mro__] - assert BaseLlamaPack.__name__ in names_of_base_classes From 2a5d2501472dc46c841d412fcccd79ed7d1a1a10 Mon Sep 17 00:00:00 2001 From: Andrei Fajardo Date: Tue, 10 Dec 2024 13:22:41 -0500 Subject: [PATCH 2/3] rm from docs --- .../embeddings/huggingface_itrex.md | 4 - docs/docs/api_reference/embeddings/octoai.md | 4 - docs/docs/api_reference/llms/solar.md | 4 - docs/docs/api_reference/llms/unify.md | 4 - .../api_reference/packs/docugami_kg_rag.md | 4 - docs/docs/api_reference/packs/finchat.md | 4 - .../packs/redis_ingestion_pipeline.md | 4 - docs/docs/api_reference/packs/searchain.md | 4 - .../api_reference/packs/subdoc_summary.md | 4 - docs/docs/api_reference/packs/vanna.md | 4 - .../api_reference/readers/azure_devops.md | 4 - docs/docs/api_reference/readers/clickhouse.md | 4 - .../docs/api_reference/readers/feishu_wiki.md | 4 - docs/docs/api_reference/readers/openapi.md | 4 - docs/docs/api_reference/readers/readme.md | 4 - .../api_reference/readers/snscrape_twitter.md | 4 - .../api_reference/readers/youtube_metadata.md | 4 - .../storage/vector_store/chatgpt_plugin.md | 4 - .../storage/vector_store/metal.md | 4 - .../tools/passio_nutrition_ai.md | 4 - docs/docs/examples/embeddings/itrex.ipynb | 94 ------ docs/docs/examples/embeddings/octoai.ipynb | 138 -------- docs/docs/examples/llm/solar.ipynb | 88 ------ docs/docs/examples/llm/unify.ipynb | 294 ------------------ .../vector_stores/MetalIndexDemo.ipynb | 177 ----------- docs/mkdocs.yml | 33 -- .../llama_index/cli/upgrade/mappings.json | 22 -- .../core/command_line/mappings.json | 22 -- 28 files changed, 948 deletions(-) delete mode 100644 docs/docs/api_reference/embeddings/huggingface_itrex.md delete mode 100644 docs/docs/api_reference/embeddings/octoai.md delete mode 100644 docs/docs/api_reference/llms/solar.md delete mode 100644 docs/docs/api_reference/llms/unify.md delete mode 100644 docs/docs/api_reference/packs/docugami_kg_rag.md delete mode 100644 docs/docs/api_reference/packs/finchat.md delete mode 100644 docs/docs/api_reference/packs/redis_ingestion_pipeline.md delete mode 100644 docs/docs/api_reference/packs/searchain.md delete mode 100644 docs/docs/api_reference/packs/subdoc_summary.md delete mode 100644 docs/docs/api_reference/packs/vanna.md delete mode 100644 docs/docs/api_reference/readers/azure_devops.md delete mode 100644 docs/docs/api_reference/readers/clickhouse.md delete mode 100644 docs/docs/api_reference/readers/feishu_wiki.md delete mode 100644 docs/docs/api_reference/readers/openapi.md delete mode 100644 docs/docs/api_reference/readers/readme.md delete mode 100644 docs/docs/api_reference/readers/snscrape_twitter.md delete mode 100644 docs/docs/api_reference/readers/youtube_metadata.md delete mode 100644 docs/docs/api_reference/storage/vector_store/chatgpt_plugin.md delete mode 100644 docs/docs/api_reference/storage/vector_store/metal.md delete mode 100644 docs/docs/api_reference/tools/passio_nutrition_ai.md delete mode 100644 docs/docs/examples/embeddings/itrex.ipynb delete mode 100644 docs/docs/examples/embeddings/octoai.ipynb delete mode 100644 docs/docs/examples/llm/solar.ipynb delete mode 100644 docs/docs/examples/llm/unify.ipynb delete mode 100644 docs/docs/examples/vector_stores/MetalIndexDemo.ipynb diff --git a/docs/docs/api_reference/embeddings/huggingface_itrex.md b/docs/docs/api_reference/embeddings/huggingface_itrex.md deleted file mode 100644 index 98c987f9abda1..0000000000000 --- a/docs/docs/api_reference/embeddings/huggingface_itrex.md +++ /dev/null @@ -1,4 +0,0 @@ -::: llama_index.embeddings.huggingface_itrex - options: - members: - - QuantizedBgeEmbedding diff --git a/docs/docs/api_reference/embeddings/octoai.md b/docs/docs/api_reference/embeddings/octoai.md deleted file mode 100644 index 60071a4c2cf6d..0000000000000 --- a/docs/docs/api_reference/embeddings/octoai.md +++ /dev/null @@ -1,4 +0,0 @@ -::: llama_index.embeddings.octoai - options: - members: - - OctoAIEmbeddings diff --git a/docs/docs/api_reference/llms/solar.md b/docs/docs/api_reference/llms/solar.md deleted file mode 100644 index a3a223bee9901..0000000000000 --- a/docs/docs/api_reference/llms/solar.md +++ /dev/null @@ -1,4 +0,0 @@ -::: llama_index.llms.solar - options: - members: - - Solar diff --git a/docs/docs/api_reference/llms/unify.md b/docs/docs/api_reference/llms/unify.md deleted file mode 100644 index b69773c0c9b6a..0000000000000 --- a/docs/docs/api_reference/llms/unify.md +++ /dev/null @@ -1,4 +0,0 @@ -::: llama_index.llms.unify - options: - members: - - Unify diff --git a/docs/docs/api_reference/packs/docugami_kg_rag.md b/docs/docs/api_reference/packs/docugami_kg_rag.md deleted file mode 100644 index 6776675e95ff5..0000000000000 --- a/docs/docs/api_reference/packs/docugami_kg_rag.md +++ /dev/null @@ -1,4 +0,0 @@ -::: llama_index.packs.docugami_kg_rag - options: - members: - - DocugamiKgRagPack diff --git a/docs/docs/api_reference/packs/finchat.md b/docs/docs/api_reference/packs/finchat.md deleted file mode 100644 index 41f52a108e151..0000000000000 --- a/docs/docs/api_reference/packs/finchat.md +++ /dev/null @@ -1,4 +0,0 @@ -::: llama_index.packs.finchat - options: - members: - - FinanceChatPack diff --git a/docs/docs/api_reference/packs/redis_ingestion_pipeline.md b/docs/docs/api_reference/packs/redis_ingestion_pipeline.md deleted file mode 100644 index 1fbef21a5d5e2..0000000000000 --- a/docs/docs/api_reference/packs/redis_ingestion_pipeline.md +++ /dev/null @@ -1,4 +0,0 @@ -::: llama_index.packs.redis_ingestion_pipeline - options: - members: - - RedisIngestionPipelinePack diff --git a/docs/docs/api_reference/packs/searchain.md b/docs/docs/api_reference/packs/searchain.md deleted file mode 100644 index 0ca0f9cc168db..0000000000000 --- a/docs/docs/api_reference/packs/searchain.md +++ /dev/null @@ -1,4 +0,0 @@ -::: llama_index.packs.searchain - options: - members: - - SearChainPack diff --git a/docs/docs/api_reference/packs/subdoc_summary.md b/docs/docs/api_reference/packs/subdoc_summary.md deleted file mode 100644 index 393009be341e9..0000000000000 --- a/docs/docs/api_reference/packs/subdoc_summary.md +++ /dev/null @@ -1,4 +0,0 @@ -::: llama_index.packs.subdoc_summary - options: - members: - - SubDocSummaryPack diff --git a/docs/docs/api_reference/packs/vanna.md b/docs/docs/api_reference/packs/vanna.md deleted file mode 100644 index 639c67c3ab078..0000000000000 --- a/docs/docs/api_reference/packs/vanna.md +++ /dev/null @@ -1,4 +0,0 @@ -::: llama_index.packs.vanna - options: - members: - - VannaPack diff --git a/docs/docs/api_reference/readers/azure_devops.md b/docs/docs/api_reference/readers/azure_devops.md deleted file mode 100644 index 968e69eea5324..0000000000000 --- a/docs/docs/api_reference/readers/azure_devops.md +++ /dev/null @@ -1,4 +0,0 @@ -::: llama_index.readers.azure_devops - options: - members: - - AzureDevopsReader diff --git a/docs/docs/api_reference/readers/clickhouse.md b/docs/docs/api_reference/readers/clickhouse.md deleted file mode 100644 index d3b2ab411b4b8..0000000000000 --- a/docs/docs/api_reference/readers/clickhouse.md +++ /dev/null @@ -1,4 +0,0 @@ -::: llama_index.readers.clickhouse - options: - members: - - ClickHouseReader diff --git a/docs/docs/api_reference/readers/feishu_wiki.md b/docs/docs/api_reference/readers/feishu_wiki.md deleted file mode 100644 index fe649b280ad28..0000000000000 --- a/docs/docs/api_reference/readers/feishu_wiki.md +++ /dev/null @@ -1,4 +0,0 @@ -::: llama_index.readers.feishu_wiki - options: - members: - - FeishuWikiReader diff --git a/docs/docs/api_reference/readers/openapi.md b/docs/docs/api_reference/readers/openapi.md deleted file mode 100644 index 4e3d7754b23c8..0000000000000 --- a/docs/docs/api_reference/readers/openapi.md +++ /dev/null @@ -1,4 +0,0 @@ -::: llama_index.readers.openapi - options: - members: - - OpenAPIReader diff --git a/docs/docs/api_reference/readers/readme.md b/docs/docs/api_reference/readers/readme.md deleted file mode 100644 index 0280cc1899d33..0000000000000 --- a/docs/docs/api_reference/readers/readme.md +++ /dev/null @@ -1,4 +0,0 @@ -::: llama_index.readers.readme - options: - members: - - ReadmeReader diff --git a/docs/docs/api_reference/readers/snscrape_twitter.md b/docs/docs/api_reference/readers/snscrape_twitter.md deleted file mode 100644 index 8b7096a2c2ead..0000000000000 --- a/docs/docs/api_reference/readers/snscrape_twitter.md +++ /dev/null @@ -1,4 +0,0 @@ -::: llama_index.readers.snscrape_twitter - options: - members: - - SnscrapeTwitterReader diff --git a/docs/docs/api_reference/readers/youtube_metadata.md b/docs/docs/api_reference/readers/youtube_metadata.md deleted file mode 100644 index a8a7fd8d00ed3..0000000000000 --- a/docs/docs/api_reference/readers/youtube_metadata.md +++ /dev/null @@ -1,4 +0,0 @@ -::: llama_index.readers.youtube_metadata - options: - members: - - YoutubeTranscriptReader diff --git a/docs/docs/api_reference/storage/vector_store/chatgpt_plugin.md b/docs/docs/api_reference/storage/vector_store/chatgpt_plugin.md deleted file mode 100644 index 76a756c480190..0000000000000 --- a/docs/docs/api_reference/storage/vector_store/chatgpt_plugin.md +++ /dev/null @@ -1,4 +0,0 @@ -::: llama_index.vector_stores.chatgpt_plugin - options: - members: - - ChatGPTRetrievalPluginClient diff --git a/docs/docs/api_reference/storage/vector_store/metal.md b/docs/docs/api_reference/storage/vector_store/metal.md deleted file mode 100644 index 7d68ffa95fc71..0000000000000 --- a/docs/docs/api_reference/storage/vector_store/metal.md +++ /dev/null @@ -1,4 +0,0 @@ -::: llama_index.vector_stores.metal - options: - members: - - MetalVectorStore diff --git a/docs/docs/api_reference/tools/passio_nutrition_ai.md b/docs/docs/api_reference/tools/passio_nutrition_ai.md deleted file mode 100644 index 322464bc0ed19..0000000000000 --- a/docs/docs/api_reference/tools/passio_nutrition_ai.md +++ /dev/null @@ -1,4 +0,0 @@ -::: llama_index.tools.passio_nutrition_ai - options: - members: - - NutritionAIToolSpec diff --git a/docs/docs/examples/embeddings/itrex.ipynb b/docs/docs/examples/embeddings/itrex.ipynb deleted file mode 100644 index 15585e74ed8a8..0000000000000 --- a/docs/docs/examples/embeddings/itrex.ipynb +++ /dev/null @@ -1,94 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Optimized BGE Embedding Model using Intel® Extension for Transformers\n", - "\n", - "LlamaIndex has support for loading quantized BGE embedding models generated by [Intel® Extension for Transformers](https://github.com/intel/intel-extension-for-transformers) (ITREX) and use ITREX [Neural Engine](https://github.com/intel/intel-extension-for-transformers/blob/main/intel_extension_for_transformers/llm/runtime/deprecated/docs/Installation.md), a high-performance NLP backend, to accelerate the inference of models without compromising accuracy.\n", - "\n", - "Refer to our blog of [Efficient Natural Language Embedding Models with Intel Extension for Transformers](https://medium.com/intel-analytics-software/efficient-natural-language-embedding-models-with-intel-extension-for-transformers-2b6fcd0f8f34) and [BGE optimization example](https://github.com/intel/intel-extension-for-transformers/tree/main/examples/huggingface/pytorch/text-embedding/deployment/mteb/bge) for more details.\"\n", - "\n", - "In order to be able to load and use the quantized models, install the required dependency `pip install intel-extension-for-transformers torch accelerate datasets onnx`. \n", - "\n", - "Loading is done using the class `ItrexQuantizedBgeEmbedding`; usage is similar to any HuggingFace local embedding model; See example:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install llama-index-embeddings-huggingface-itrex" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/yuwenzho/.conda/envs/yuwen/lib/python3.9/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", - " from .autonotebook import tqdm as notebook_tqdm\n", - "2024-03-29 15:40:42 [INFO] Start to extarct onnx model ops...\n", - "2024-03-29 15:40:42 [INFO] Extract onnxruntime model done...\n", - "2024-03-29 15:40:42 [INFO] Start to implement Sub-Graph matching and replacing...\n", - "2024-03-29 15:40:43 [INFO] Sub-Graph match and replace done...\n" - ] - } - ], - "source": [ - "from llama_index.embeddings.huggingface_itrex import ItrexQuantizedBgeEmbedding\n", - "\n", - "embed_model = ItrexQuantizedBgeEmbedding(\n", - " \"Intel/bge-small-en-v1.5-sts-int8-static-inc\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "384\n", - "[-0.005477035418152809, -0.000541043293196708, 0.036467909812927246, -0.04861024394631386, 0.0288068987429142]\n" - ] - } - ], - "source": [ - "embeddings = embed_model.get_text_embedding(\"Hello World!\")\n", - "print(len(embeddings))\n", - "print(embeddings[:5])" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "yuwen", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/docs/docs/examples/embeddings/octoai.ipynb b/docs/docs/examples/embeddings/octoai.ipynb deleted file mode 100644 index 1502f72046f9b..0000000000000 --- a/docs/docs/examples/embeddings/octoai.ipynb +++ /dev/null @@ -1,138 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\"Open" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# OctoAI Embeddings\n", - "\n", - "This guide shows you how to use [OctoAI's Embeddings](https://octo.ai/docs/text-gen-solution/getting-started) through LlamaIndex." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "First, let's install LlamaIndex and OctoAI's dependencies" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install llama-index-embeddings-octoai" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!pip install llama-index" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Include your OctoAI API key below. You can get yours at [OctoAI](https://octo.ai). \n", - "\n", - "[Here](https://octo.ai/docs/getting-started/how-to-create-an-octoai-access-token) are some instructions in case you need more guidance." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "OCTOAI_API_KEY = \"\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "We can then query embeddings on OctoAI" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from llama_index.embeddings.octoai import OctoAIEmbedding\n", - "\n", - "embed_model = OctoAIEmbedding(api_key=OCTOAI_API_KEY)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Basic embedding example\n", - "embeddings = embed_model.get_text_embedding(\"How do I sail to the moon?\")\n", - "print(len(embeddings), embeddings[:10])\n", - "assert len(embeddings) == 1024" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Using Batched Embeddings" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "texts = [\n", - " \"How do I sail to the moon?\",\n", - " \"What is the best way to cook a steak?\",\n", - " \"How do I apply for a job?\",\n", - "]\n", - "\n", - "embeddings = embed_model.get_text_embedding_batch(texts)\n", - "print(len(embeddings))\n", - "assert len(embeddings) == 3\n", - "assert len(embeddings[0]) == 1024" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/docs/docs/examples/llm/solar.ipynb b/docs/docs/examples/llm/solar.ipynb deleted file mode 100644 index 8519c63aed4a3..0000000000000 --- a/docs/docs/examples/llm/solar.ipynb +++ /dev/null @@ -1,88 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "id": "cae1b4a8", - "metadata": {}, - "source": [ - "# Solar LLM\n", - "\n", - "Warning: Solar LLM is deprecated. Please use Upstage LLM instead. \n", - "See [Upstage LLM](https://docs.llamaindex.ai/en/stable/examples/llm/upstage/)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "715d392e", - "metadata": {}, - "outputs": [], - "source": [ - "!pip install llama-index-llms-solar" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1fdc2dc3-1454-41e9-8862-9dfd75b5b61f", - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "os.environ[\"SOLAR_API_KEY\"] = \"SOLAR_API_KEY\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "26b168b8-9ebf-479d-ac53-28bc952da354", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "assistant: Mother also went into the room.\n" - ] - } - ], - "source": [ - "# from llama_index.llms import\n", - "from llama_index.llms.solar import Solar\n", - "from llama_index.core.base.llms.types import ChatMessage, MessageRole\n", - "\n", - "llm = Solar(model=\"solar-1-mini-chat\", is_chat_model=True)\n", - "response = llm.chat(\n", - " messages=[\n", - " ChatMessage(role=\"user\", content=\"아버지가방에들어가셨다\"),\n", - " ChatMessage(role=\"assistant\", content=\"Father went into his room\"),\n", - " ChatMessage(role=\"user\", content=\"엄마도들어가셨다\"),\n", - " ]\n", - ")\n", - "\n", - "print(response)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/docs/docs/examples/llm/unify.ipynb b/docs/docs/examples/llm/unify.ipynb deleted file mode 100644 index 9fd5a75c0ac71..0000000000000 --- a/docs/docs/examples/llm/unify.ipynb +++ /dev/null @@ -1,294 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Unify\n", - "\n", - "[Unify](https://unify.ai) is your centralized platform for LLM endpoints, enabling you to route your queries to the best LLM endpoints, benchmark performance, and seamlessly switch providers with a single API key." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Installation" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "First, let's install LlamaIndex 🦙 and the Unify integration." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install llama-index-llms-unify llama-index" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Environment Setup\n", - "\n", - "Make sure to set the `UNIFY_API_KEY` environment variable. You can get a key from the [Unify Console](https://console.unify.ai/login)." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "\n", - "os.environ[\"UNIFY_API_KEY\"] = \"\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Using LlamaIndex with Unify" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Basic Usage \n", - "\n", - "Below we initialize and query a chat model using the `llama-3-70b-chat` endpoint from `together-ai`." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "CompletionResponse(text=\"I'm not actually a llama, but I'm doing great, thanks for asking! I'm a large language model, so I don't have feelings like humans do, but I'm always happy to chat with you and help with any questions or topics you'd like to discuss. How about you? How's your day going?\", additional_kwargs={}, raw={'id': '88b5fcf02e259527-LHR', 'choices': [Choice(finish_reason='stop', index=0, logprobs=None, message=ChatCompletionMessage(content=\"I'm not actually a llama, but I'm doing great, thanks for asking! I'm a large language model, so I don't have feelings like humans do, but I'm always happy to chat with you and help with any questions or topics you'd like to discuss. How about you? How's your day going?\", role='assistant', function_call=None, tool_calls=None))], 'created': 1716980504, 'model': 'llama-3-70b-chat@together-ai', 'object': 'chat.completion', 'system_fingerprint': None, 'usage': CompletionUsage(completion_tokens=67, prompt_tokens=17, total_tokens=84, cost=7.56e-05)}, logprobs=None, delta=None)" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "from llama_index.llms.unify import Unify\n", - "\n", - "llm = Unify(model=\"llama-3-70b-chat@together-ai\")\n", - "llm.complete(\"How are you today, llama?\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Single Sign-On\n", - "\n", - "You can use Unify's SSO to query endpoints in different providers without making accounts with all of them. For example, all of these are valid endpoints:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "llm = Unify(model=\"llama-2-70b-chat@together-ai\")\n", - "llm = Unify(model=\"gpt-3.5-turbo@openai\")\n", - "llm = Unify(model=\"mixtral-8x7b-instruct-v0.1@mistral-ai\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "This allows you to quickly switch and test different models and providers. You can look at all the available models/providers [here](https://unify.ai/hub)!" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Runtime Dynamic Routing" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "As evidenced by our [benchmarks](https://unify.ai/benchmarks), the optimal provider for each model varies by geographic location and time of day due to fluctuating API performances. To cirumvent this, we automatically direct your requests to the \"top performing provider\" at runtime. To enable this feature, simply replace your query's provider with one of the [available routing modes](https://unify.ai/docs/api/deploy_router.html#optimizing-a-metric). Let's look at some examples:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "llm = Unify(\n", - " model=\"llama-2-70b-chat@input-cost\"\n", - ") # route to lowest input cost provider\n", - "llm = Unify(\n", - " model=\"gpt-3.5-turbo@itl\"\n", - ") # route to provider with lowest inter token latency\n", - "llm = Unify(\n", - " model=\"mixtral-8x7b-instruct-v0.1@ttft\"\n", - ") # route to provider with lowest time to first token." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Quality Routing\n", - "Unify routes your queries to the best LLM on every prompt to consistently achieve better quality outputs than using a single, all-purpose, powerful model, at a fraction of the cost. This is achieved by using smaller models for simpler tasks, only using largers ones to handle complex queries.\n", - "\n", - "The router is benchmarked on various different data-sets such as `Open Hermes`, `GSM8K`, `HellaSwag`, `MMLU` and `MT-Bench` revealing that it can peform better than indivudal endpoints on average as explained [here](https://unify.ai/docs/concepts/routing.html#quality-routing). One can choose various different configurations of the router for a particular data-set from the [chat-interface](https://unify.ai/chat) as shown below:\n", - "\n", - "\"Unify" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "llm = Unify(model=\"router_2.58e-01_9.51e-04_3.91e-03@unify\")\n", - "llm = Unify(model=\"router_2.12e-01_5.00e-04_2.78e-04@unify\")\n", - "llm = Unify(model=\"router_2.12e-01_5.00e-04_2.78e-04@unify\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "To learn more about quality routing, please refer to this [video](https://www.youtube.com/watch?v=ZpY6SIkBosE&feature=youtu.be)." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Streaming and optimizing for latency\n", - "\n", - "If you are building an application where responsiveness is key, you most likely want to get a streaming response. On top of that, ideally you would use the provider with the lowest Time to First Token, to reduce the time your users are waiting for a response. Using Unify this would look something like:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "llm = Unify(model=\"mixtral-8x7b-instruct-v0.1@ttft\")\n", - "\n", - "response = llm.stream_complete(\n", - " \"Translate the following to German: \"\n", - " \"Hey, there's an emergency in translation street, \"\n", - " \"please send help asap!\"\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model and provider are : mixtral-8x7b-instruct-v0.1@mistral-ai\n", - "\n", - "Hallo, es gibt einen Notfall in der Übersetzungsstraße, bitte senden Sie Hilfe so schnell wie möglich!\n", - "\n", - "(Note: This is a loose translation and the phrase \"Übersetzungsstraße\" does not literally exist, but I tried to convey the same meaning as the original message.)" - ] - } - ], - "source": [ - "show_provider = True\n", - "for r in response:\n", - " if show_provider:\n", - " print(f\"Model and provider are : {r.raw['model']}\\n\")\n", - " show_provider = False\n", - " print(r.delta, end=\"\", flush=True)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### Async calls and Lowest Input Cost\n", - "\n", - "Last but not the least, you can also run multiple requests asynchronously. For tasks such as document summarization, optimizing for input costs is crucial. We can use the `input-cost` dynamic routing mode to route our queries to the cheapest provider." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model and provider are : mixtral-8x7b-instruct-v0.1@deepinfra\n", - "\n", - " OpenAI: Pioneering 'safe' artificial general intelligence.\n" - ] - } - ], - "source": [ - "llm = Unify(model=\"mixtral-8x7b-instruct-v0.1@input-cost\")\n", - "\n", - "response = await llm.acomplete(\n", - " \"Summarize this in 10 words or less. OpenAI is a U.S. based artificial intelligence \"\n", - " \"(AI) research organization founded in December 2015, researching artificial intelligence \"\n", - " \"with the goal of developing 'safe and beneficial' artificial general intelligence, \"\n", - " \"which it defines as 'highly autonomous systems that outperform humans at most economically \"\n", - " \"valuable work'. As one of the leading organizations of the AI spring, it has developed \"\n", - " \"several large language models, advanced image generation models, and previously, released \"\n", - " \"open-source models. Its release of ChatGPT has been credited with starting the AI spring\"\n", - ")\n", - "\n", - "print(f\"Model and provider are : {response.raw['model']}\\n\")\n", - "print(response)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "base", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/docs/docs/examples/vector_stores/MetalIndexDemo.ipynb b/docs/docs/examples/vector_stores/MetalIndexDemo.ipynb deleted file mode 100644 index d80a3b507899c..0000000000000 --- a/docs/docs/examples/vector_stores/MetalIndexDemo.ipynb +++ /dev/null @@ -1,177 +0,0 @@ -{ - "cells": [ - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "\"Open" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Metal Vector Store" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Creating a Metal Vector Store" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "1. Register an account for [Metal](https://app.getmetal.io/)\n", - "2. Generate an API key in [Metal's Settings](https://app.getmetal.io/settings/organization). Save the `api_key` + `client_id`\n", - "3. Generate an Index in [Metal's Dashboard](https://app.getmetal.io/). Save the `index_id`" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Load data into your Index" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "%pip install llama-index-vector-stores-metal" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import logging\n", - "import sys\n", - "\n", - "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n", - "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from llama_index.core import VectorStoreIndex, SimpleDirectoryReader\n", - "from llama_index.vector_stores.metal import MetalVectorStore\n", - "from IPython.display import Markdown, display" - ] - }, - { - "attachments": {}, - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Download Data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!mkdir -p 'data/paul_graham/'\n", - "!wget 'https://raw.githubusercontent.com/run-llama/llama_index/main/docs/docs/examples/data/paul_graham/paul_graham_essay.txt' -O 'data/paul_graham/paul_graham_essay.txt'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# load documents\n", - "documents = SimpleDirectoryReader(\"./data/paul_graham/\").load_data()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# initialize Metal Vector Store\n", - "from llama_index.core import StorageContext\n", - "\n", - "api_key = \"api key\"\n", - "client_id = \"client id\"\n", - "index_id = \"index id\"\n", - "\n", - "vector_store = MetalVectorStore(\n", - " api_key=api_key,\n", - " client_id=client_id,\n", - " index_id=index_id,\n", - ")\n", - "storage_context = StorageContext.from_defaults(vector_store=vector_store)\n", - "index = VectorStoreIndex.from_documents(\n", - " documents, storage_context=storage_context\n", - ")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Query Index" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# set Logging to DEBUG for more detailed outputs\n", - "query_engine = index.as_query_engine()\n", - "response = query_engine.query(\"What did the author do growing up?\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "display(Markdown(f\"{response}\"))" - ] - } - ], - "metadata": { - "colab": { - "provenance": [] - }, - "kernelspec": { - "display_name": "Python 3", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3" - } - }, - "nbformat": 4, - "nbformat_minor": 0 -} diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 34e4431a8bc44..c34877e952aa2 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -245,7 +245,6 @@ nav: - ./examples/embeddings/ibm_watsonx.ipynb - ./examples/embeddings/ipex_llm.ipynb - ./examples/embeddings/ipex_llm_gpu.ipynb - - ./examples/embeddings/itrex.ipynb - ./examples/embeddings/jina_embeddings.ipynb - ./examples/embeddings/jinaai_embeddings.ipynb - ./examples/embeddings/llamafile.ipynb @@ -313,7 +312,6 @@ nav: - ./examples/ingestion/document_management_pipeline.ipynb - ./examples/ingestion/ingestion_gdrive.ipynb - ./examples/ingestion/parallel_execution_ingestion_pipeline.ipynb - - ./examples/ingestion/redis_ingestion_pipeline.ipynb - LLMs: - ./examples/llm/ai21.ipynb - ./examples/llm/alephalpha.ipynb @@ -903,7 +901,6 @@ nav: - ./api_reference/embeddings/gradient.md - ./api_reference/embeddings/huggingface.md - ./api_reference/embeddings/huggingface_api.md - - ./api_reference/embeddings/huggingface_itrex.md - ./api_reference/embeddings/huggingface_openvino.md - ./api_reference/embeddings/huggingface_optimum.md - ./api_reference/embeddings/huggingface_optimum_intel.md @@ -1089,9 +1086,7 @@ nav: - ./api_reference/packs/deeplake_multimodal_retrieval.md - ./api_reference/packs/dense_x_retrieval.md - ./api_reference/packs/diff_private_simple_dataset.md - - ./api_reference/packs/docugami_kg_rag.md - ./api_reference/packs/evaluator_benchmarker.md - - ./api_reference/packs/finchat.md - ./api_reference/packs/fusion_retriever.md - ./api_reference/packs/fuzzy_citation.md - ./api_reference/packs/gmail_openai_agent.md @@ -1121,10 +1116,8 @@ nav: - ./api_reference/packs/ragatouille_retriever.md - ./api_reference/packs/raptor.md - ./api_reference/packs/recursive_retriever.md - - ./api_reference/packs/redis_ingestion_pipeline.md - ./api_reference/packs/resume_screener.md - ./api_reference/packs/retry_engine_weaviate.md - - ./api_reference/packs/searchain.md - ./api_reference/packs/secgpt.md - ./api_reference/packs/self_discover.md - ./api_reference/packs/self_rag.md @@ -1133,7 +1126,6 @@ nav: - ./api_reference/packs/stock_market_data_query_engine.md - ./api_reference/packs/streamlit_chatbot.md - ./api_reference/packs/sub_question_weaviate.md - - ./api_reference/packs/subdoc_summary.md - ./api_reference/packs/tables.md - ./api_reference/packs/timescale_vector_autoretrieval.md - ./api_reference/packs/trulens_eval_packs.md @@ -1315,7 +1307,6 @@ nav: - ./api_reference/readers/awadb.md - ./api_reference/readers/azcognitive_search.md - ./api_reference/readers/azstorage_blob.md - - ./api_reference/readers/azure_devops.md - ./api_reference/readers/bagel.md - ./api_reference/readers/bilibili.md - ./api_reference/readers/bitbucket.md @@ -1342,7 +1333,6 @@ nav: - ./api_reference/readers/faiss.md - ./api_reference/readers/feedly_rss.md - ./api_reference/readers/feishu_docs.md - - ./api_reference/readers/feishu_wiki.md - ./api_reference/readers/file.md - ./api_reference/readers/firebase_realtimedb.md - ./api_reference/readers/firestore.md @@ -1425,7 +1415,6 @@ nav: - ./api_reference/readers/slack.md - ./api_reference/readers/smart_pdf_loader.md - ./api_reference/readers/snowflake.md - - ./api_reference/readers/snscrape_twitter.md - ./api_reference/readers/spotify.md - ./api_reference/readers/stackoverflow.md - ./api_reference/readers/steamship.md @@ -1445,7 +1434,6 @@ nav: - ./api_reference/readers/wikipedia.md - ./api_reference/readers/wordlift.md - ./api_reference/readers/wordpress.md - - ./api_reference/readers/youtube_metadata.md - ./api_reference/readers/youtube_transcript.md - ./api_reference/readers/zendesk.md - ./api_reference/readers/zep.md @@ -1652,7 +1640,6 @@ nav: - ./api_reference/tools/openai.md - ./api_reference/tools/openapi.md - ./api_reference/tools/oracleai.md - - ./api_reference/tools/passio_nutrition_ai.md - ./api_reference/tools/playgrounds.md - ./api_reference/tools/python_file.md - ./api_reference/tools/query_engine.md @@ -1754,7 +1741,6 @@ plugins: - ../llama-index-packs/llama-index-packs-vectara-rag - ../llama-index-packs/llama-index-packs-tables - ../llama-index-packs/llama-index-packs-node-parser-semantic-chunking - - ../llama-index-packs/llama-index-packs-redis-ingestion-pipeline - ../llama-index-packs/llama-index-packs-dense-x-retrieval - ../llama-index-packs/llama-index-packs-auto-merging-retriever - ../llama-index-packs/llama-index-packs-agents-llm-compiler @@ -1764,7 +1750,6 @@ plugins: - ../llama-index-packs/llama-index-packs-nebulagraph-query-engine - ../llama-index-packs/llama-index-packs-voyage-query-engine - ../llama-index-packs/llama-index-packs-infer-retrieve-rerank - - ../llama-index-packs/llama-index-packs-subdoc-summary - ../llama-index-packs/llama-index-packs-sentence-window-retriever - ../llama-index-packs/llama-index-packs-recursive-retriever - ../llama-index-packs/llama-index-packs-amazon-product-extraction @@ -1814,11 +1799,9 @@ plugins: - ../llama-index-integrations/vector_stores/llama-index-vector-stores-opensearch - ../llama-index-integrations/vector_stores/llama-index-vector-stores-docarray - ../llama-index-integrations/vector_stores/llama-index-vector-stores-dynamodb - - ../llama-index-integrations/vector_stores/llama-index-vector-stores-chatgpt-plugin - ../llama-index-integrations/vector_stores/llama-index-vector-stores-tair - ../llama-index-integrations/vector_stores/llama-index-vector-stores-redis - ../llama-index-integrations/vector_stores/llama-index-vector-stores-google - - ../llama-index-integrations/vector_stores/llama-index-vector-stores-metal - ../llama-index-integrations/retrievers/llama-index-retrievers-pathway - ../llama-index-integrations/retrievers/llama-index-retrievers-bm25 - ../llama-index-integrations/retrievers/llama-index-retrievers-you @@ -2066,7 +2049,6 @@ plugins: - ../llama-index-integrations/readers/llama-index-readers-whatsapp - ../llama-index-integrations/readers/llama-index-readers-mondaydotcom - ../llama-index-integrations/readers/llama-index-readers-airbyte-stripe - - ../llama-index-integrations/readers/llama-index-readers-snscrape-twitter - ../llama-index-integrations/readers/llama-index-readers-papers - ../llama-index-integrations/readers/llama-index-readers-obsidian - ../llama-index-integrations/readers/llama-index-readers-zep @@ -2076,7 +2058,6 @@ plugins: - ../llama-index-integrations/readers/llama-index-readers-weather - ../llama-index-integrations/readers/llama-index-readers-chatgpt-plugin - ../llama-index-integrations/readers/llama-index-readers-hwp - - ../llama-index-integrations/readers/llama-index-readers-clickhouse - ../llama-index-integrations/readers/llama-index-readers-metal - ../llama-index-integrations/readers/llama-index-readers-boarddocs - ../llama-index-integrations/question_gen/llama-index-question-gen-guidance @@ -2128,20 +2109,16 @@ plugins: - ../llama-index-integrations/extractors/llama-index-extractors-entity - ../llama-index-integrations/extractors/llama-index-extractors-marvin - ../llama-index-packs/llama-index-packs-query-understanding-agent - - ../llama-index-packs/llama-index-packs-searchain - - ../llama-index-packs/llama-index-packs-docugami-kg-rag - ../llama-index-packs/llama-index-packs-raptor - ../llama-index-packs/llama-index-packs-code-hierarchy - ../llama-index-packs/llama-index-packs-cohere-citation-chat - ../llama-index-packs/llama-index-packs-diff-private-simple-dataset - ../llama-index-packs/llama-index-packs-koda-retriever - - ../llama-index-packs/llama-index-packs-finchat - ../llama-index-integrations/vector_stores/llama-index-vector-stores-baiduvectordb - ../llama-index-integrations/vector_stores/llama-index-vector-stores-astra-db - ../llama-index-integrations/vector_stores/llama-index-vector-stores-databricks - ../llama-index-integrations/vector_stores/llama-index-vector-stores-tidbvector - ../llama-index-integrations/retrievers/llama-index-retrievers-videodb - - ../llama-index-integrations/tools/llama-index-tools-passio-nutrition-ai - ../llama-index-integrations/tools/llama-index-tools-duckduckgo - ../llama-index-integrations/tools/llama-index-tools-finance - ../llama-index-integrations/tools/llama-index-tools-brave-search @@ -2153,8 +2130,6 @@ plugins: - ../llama-index-integrations/postprocessor/llama-index-postprocessor-jinaai-rerank - ../llama-index-integrations/multi_modal_llms/llama-index-multi-modal-llms-anthropic - ../llama-index-integrations/storage/kvstore/llama-index-storage-kvstore-elasticsearch - - ../llama-index-integrations/readers/llama-index-readers-feishu-wiki - - ../llama-index-integrations/llms/llama-index-llms-solar - ../llama-index-integrations/llms/llama-index-llms-friendli - ../llama-index-integrations/llms/llama-index-llms-modelscope - ../llama-index-integrations/llms/llama-index-llms-llamafile @@ -2169,8 +2144,6 @@ plugins: - ../llama-index-integrations/vector_stores/llama-index-vector-stores-analyticdb - ../llama-index-integrations/postprocessor/llama-index-postprocessor-voyageai-rerank - ../llama-index-integrations/readers/llama-index-readers-gcs - - ../llama-index-integrations/readers/llama-index-readers-readme - - ../llama-index-integrations/embeddings/llama-index-embeddings-huggingface-itrex - ../llama-index-integrations/postprocessor/llama-index-postprocessor-rankllm-rerank - ../llama-index-integrations/llms/llama-index-llms-ipex-llm - ../llama-index-integrations/vector_stores/llama-index-vector-stores-neptune @@ -2186,15 +2159,12 @@ plugins: - ../llama-index-integrations/retrievers/llama-index-retrievers-mongodb-atlas-bm25-retriever - ../llama-index-packs/llama-index-packs-agents-lats - ../llama-index-integrations/vector_stores/llama-index-vector-stores-vearch - - ../llama-index-integrations/embeddings/llama-index-embeddings-octoai - ../llama-index-integrations/vector_stores/llama-index-vector-stores-awsdocdb - ../llama-index-integrations/agent/llama-index-agent-llm-compiler - ../llama-index-integrations/agent/llama-index-agent-lats - ../llama-index-integrations/agent/llama-index-agent-coa - - ../llama-index-integrations/readers/llama-index-readers-openapi - ../llama-index-integrations/llms/llama-index-llms-octoai - ../llama-index-integrations/vector_stores/llama-index-vector-stores-firestore - - ../llama-index-integrations/readers/llama-index-readers-youtube-metadata - ../llama-index-integrations/llms/llama-index-llms-mistral-rs - ../llama-index-integrations/agent/llama-index-agent-introspective - ../llama-index-integrations/vector_stores/llama-index-vector-stores-vertexaivectorsearch @@ -2213,7 +2183,6 @@ plugins: - ../llama-index-integrations/llms/llama-index-llms-upstage - ../llama-index-integrations/vector_stores/llama-index-vector-stores-wordlift - ../llama-index-integrations/tools/llama-index-tools-cassandra - - ../llama-index-integrations/llms/llama-index-llms-unify - ../llama-index-integrations/llms/llama-index-llms-lmstudio - ../llama-index-integrations/indices/llama-index-indices-managed-postgresml - ../llama-index-integrations/storage/kvstore/llama-index-storage-kvstore-azure @@ -2228,7 +2197,6 @@ plugins: - ../llama-index-integrations/node_parser/llama-index-node-parser-relational-dashscope - ../llama-index-integrations/readers/llama-index-readers-dashscope - ../llama-index-integrations/llms/llama-index-llms-oci-genai - - ../llama-index-integrations/readers/llama-index-readers-azure-devops - ../llama-index-integrations/retrievers/llama-index-retrievers-duckdb-retriever - ../llama-index-packs/llama-index-packs-zenguard - ../llama-index-integrations/embeddings/llama-index-embeddings-huggingface-api @@ -2571,7 +2539,6 @@ plugins: ./examples/ingestion/document_management_pipeline.html: https://docs.llamaindex.ai/en/stable/examples/ingestion/document_management_pipeline/ ./examples/ingestion/ingestion_gdrive.html: https://docs.llamaindex.ai/en/stable/examples/ingestion/ingestion_gdrive/ ./examples/ingestion/parallel_execution_ingestion_pipeline.html: https://docs.llamaindex.ai/en/stable/examples/ingestion/parallel_execution_ingestion_pipeline/ - ./examples/ingestion/redis_ingestion_pipeline.html: https://docs.llamaindex.ai/en/stable/examples/ingestion/redis_ingestion_pipeline/ ./examples/instrumentation/basic_usage.html: https://docs.llamaindex.ai/en/stable/examples/instrumentation/basic_usage/ ./examples/instrumentation/observe_api_calls.html: https://docs.llamaindex.ai/en/stable/examples/instrumentation/observe_api_calls/ ./examples/llama_dataset/downloading_llama_datasets.html: https://docs.llamaindex.ai/en/stable/examples/llama_dataset/downloading_llama_datasets/ diff --git a/llama-index-cli/llama_index/cli/upgrade/mappings.json b/llama-index-cli/llama_index/cli/upgrade/mappings.json index 06c04b2c80a86..e554738fabff9 100644 --- a/llama-index-cli/llama_index/cli/upgrade/mappings.json +++ b/llama-index-cli/llama_index/cli/upgrade/mappings.json @@ -471,14 +471,12 @@ "DocArrayInMemoryVectorStore": "llama_index.vector_stores.docarray", "DocArrayHnswVectorStore": "llama_index.vector_stores.docarray", "DynamoDBVectorStore": "llama_index.vector_stores.dynamodb", - "ChatGPTRetrievalPluginClient": "llama_index.vector_stores.chatgpt_plugin", "TairVectorStore": "llama_index.vector_stores.tair", "RedisVectorStore": "llama_index.vector_stores.redis", "set_google_config": "llama_index.vector_stores.google", "GoogleVectorStore": "llama_index.vector_stores.google", "VespaVectorStore": "llama_index.vector_stores.vespa", "hybrid_template": "llama_index.vector_stores.vespa", - "MetalVectorStore": "llama_index.vector_stores.metal", "OceanBaseVectorStore": "llama_index.vector_stores.oceanbase", "DuckDBRetriever": "llama_index.retrievers.duckdb_retriever", "PathwayRetriever": "llama_index.retrievers.pathway", @@ -507,8 +505,6 @@ "IonicShoppingToolSpec": "llama_index.tools.ionic_shopping", "TextToImageToolSpec": "llama_index.tools.text_to_image", "OpenAPIToolSpec": "llama_index.tools.openapi", - "NutritionAIToolSpec": "llama_index.tools.passio_nutrition_ai", - "ENDPOINT_BASE_URL": "llama_index.tools.passio_nutrition_ai", "ShopifyToolSpec": "llama_index.tools.shopify", "MetaphorToolSpec": "llama_index.tools.metaphor", "ACTION_URL_TMPL": "llama_index.tools.zapier", @@ -566,7 +562,6 @@ "TogetherEmbedding": "llama_index.embeddings.together", "NVIDIAEmbedding": "llama_index.embeddings.nvidia", "IpexLLMEmbedding": "llama_index.embeddings.ipex_llm", - "OctoAIEmbedding": "llama_index.embeddings.octoai", "VoyageEmbedding": "llama_index.embeddings.voyageai", "GradientEmbedding": "llama_index.embeddings.gradient", "DashScopeTextEmbeddingType": "llama_index.embeddings.dashscope", @@ -608,7 +603,6 @@ "GooglePaLMEmbedding": "llama_index.embeddings.google", "GoogleUnivSentEncoderEmbedding": "llama_index.embeddings.google", "MistralAIEmbedding": "llama_index.embeddings.mistralai", - "ItrexQuantizedBgeEmbedding": "llama_index.embeddings.huggingface_itrex", "BedrockEmbedding": "llama_index.embeddings.bedrock", "Models": "llama_index.embeddings.bedrock", "ClarifaiEmbedding": "llama_index.embeddings.clarifai", @@ -770,7 +764,6 @@ "TranscriptFormat": "llama_index.readers.assemblyai", "GraphDBCypherReader": "llama_index.readers.graphdb_cypher", "ConfluenceReader": "llama_index.readers.confluence", - "OpenAPIReader": "llama_index.readers.openapi", "LilacReader": "llama_index.readers.lilac", "GithubClient": "llama_index.readers.github", "GithubRepositoryReader": "llama_index.readers.github", @@ -800,7 +793,6 @@ "FirebaseRealtimeDatabaseReader": "llama_index.readers.firebase_realtimedb", "PebbloSafeReader": "llama_index.readers.pebblo", "RemoteDepthReader": "llama_index.readers.remote_depth", - "AzureDevopsReader": "llama_index.readers.azure_devops", "MakeWrapper": "llama_index.readers.make_com", "SimpleArangoDBReader": "llama_index.readers.arango_db", "MangaDexReader": "llama_index.readers.mangadex", @@ -815,7 +807,6 @@ "format_list_to_string": "llama_index.readers.myscale", "GuruReader": "llama_index.readers.guru", "LinearReader": "llama_index.readers.linear", - "FeishuWikiReader": "llama_index.readers.feishu_wiki", "TelegramReader": "llama_index.readers.telegram", "SteamshipFileReader": "llama_index.readers.steamship", "OpenMap": "llama_index.readers.maps", @@ -904,8 +895,6 @@ "WeaviateReader": "llama_index.readers.weaviate", "DeepLakeReader": "llama_index.readers.deeplake", "StructuredDataReader": "llama_index.readers.structured_data", - "YouTubeMetaData": "llama_index.readers.youtube_metadata", - "YouTubeMetaDataAndTranscript": "llama_index.readers.youtube_metadata", "WhatsappChatLoader": "llama_index.readers.whatsapp", "MondayReader": "llama_index.readers.mondaydotcom", "BoxReaderBase": "llama_index.readers.box", @@ -914,7 +903,6 @@ "BoxReaderAIPrompt": "llama_index.readers.box", "BoxReaderAIExtract": "llama_index.readers.box", "AirbyteStripeReader": "llama_index.readers.airbyte_stripe", - "SnscrapeTwitterReader": "llama_index.readers.snscrape_twitter", "ArxivReader": "llama_index.readers.papers", "PubmedReader": "llama_index.readers.papers", "ObsidianReader": "llama_index.readers.obsidian", @@ -925,7 +913,6 @@ "ChromaReader": "llama_index.readers.chroma", "WeatherReader": "llama_index.readers.weather", "ChatGPTRetrievalPluginReader": "llama_index.readers.chatgpt_plugin", - "ClickHouseReader": "llama_index.readers.clickhouse", "MetalReader": "llama_index.readers.metal", "BoardDocsReader": "llama_index.readers.boarddocs", "GuidanceQuestionGenerator": "llama_index.question_gen.guidance", @@ -941,7 +928,6 @@ "Cohere": "llama_index.llms.cohere", "NvidiaTriton": "llama_index.llms.nvidia_triton", "AI21": "llama_index.llms.ai21", - "Solar": "llama_index.llms.solar", "Bedrock": "llama_index.llms.bedrock", "completion_with_retry": "llama_index.llms.bedrock", "completion_response_to_chat_response": "llama_index.llms.bedrock", @@ -953,7 +939,6 @@ "DashScopeGenerationModels": "llama_index.llms.dashscope", "LocalTensorRTLLM": "llama_index.llms.nvidia_tensorrt", "Anthropic": "llama_index.llms.anthropic", - "Unify": "llama_index.llms.unify", "Gemini": "llama_index.llms.gemini", "Friendli": "llama_index.llms.friendli", "Clarifai": "llama_index.llms.clarifai", @@ -1030,17 +1015,13 @@ "MultiDocAutoRetrieverPack": "llama_index.packs.multidoc_autoretrieval", "GmailOpenAIAgentPack": "llama_index.packs.gmail_openai_agent", "Neo4jQueryEnginePack": "llama_index.packs.neo4j_query_engine", - "SearChainPack": "llama_index.packs.searchain", "ChromaAutoretrievalPack": "llama_index.packs.chroma_autoretrieval", "ResumeScreenerPack": "llama_index.packs.resume_screener", - "DocugamiKgRagPack": "llama_index.packs.docugami_kg_rag", "TruLensRAGTriadPack": "llama_index.packs.trulens_eval_packs", "TruLensHarmlessPack": "llama_index.packs.trulens_eval_packs", "TruLensHelpfulPack": "llama_index.packs.trulens_eval_packs", "EvaluatorBenchmarkerPack": "llama_index.packs.evaluator_benchmarker", "SecGPTPack": "llama_index.packs.secgpt", - "VannaPack": "llama_index.packs.vanna", - "VannaQueryEngine": "llama_index.packs.vanna", "MixtureOfAgentsPack": "llama_index.packs.mixture_of_agents", "RAGFusionPipelinePack": "llama_index.packs.rag_fusion_query_pipeline", "LlamaGuardModeratorPack": "llama_index.packs.llama_guard_moderator", @@ -1069,7 +1050,6 @@ "VectaraRagPack": "llama_index.packs.vectara_rag", "CoAAgentPack": "llama_index.packs.agents_coa", "SemanticChunkingQueryEnginePack": "llama_index.packs.node_parser_semantic_chunking", - "RedisIngestionPipelinePack": "llama_index.packs.redis_ingestion_pipeline", "DenseXRetrievalPack": "llama_index.packs.dense_x_retrieval", "LLMCompilerAgentPack": "llama_index.packs.agents_llm_compiler", "GradioReActAgentPack": "llama_index.packs.gradio_react_agent_chatbot", @@ -1085,7 +1065,6 @@ "KodaRetriever": "llama_index.packs.koda_retriever", "AlphaMatrix": "llama_index.packs.koda_retriever", "DEFAULT_CATEGORIES": "llama_index.packs.koda_retriever", - "SubDocSummaryPack": "llama_index.packs.subdoc_summary", "SentenceWindowRetrieverPack": "llama_index.packs.sentence_window_retriever", "EmbeddedTablesUnstructuredRetrieverPack": "llama_index.packs.recursive_retriever", "RecursiveRetrieverSmallToBigPack": "llama_index.packs.recursive_retriever", @@ -1100,7 +1079,6 @@ "AgentSearchRetrieverPack": "llama_index.packs.agent_search_retriever", "HybridFusionRetrieverPack": "llama_index.packs.fusion_retriever", "QueryRewritingRetrieverPack": "llama_index.packs.fusion_retriever", - "FinanceChatPack": "llama_index.packs.finchat", "BaseNode": "llama_index.core.schema", "TextNode": "llama_index.core.schema", "ImageNode": "llama_index.core.schema", diff --git a/llama-index-core/llama_index/core/command_line/mappings.json b/llama-index-core/llama_index/core/command_line/mappings.json index a2a05ec8d91d1..3bdc2ce5bf3f6 100644 --- a/llama-index-core/llama_index/core/command_line/mappings.json +++ b/llama-index-core/llama_index/core/command_line/mappings.json @@ -470,14 +470,12 @@ "DocArrayInMemoryVectorStore": "llama_index.vector_stores.docarray", "DocArrayHnswVectorStore": "llama_index.vector_stores.docarray", "DynamoDBVectorStore": "llama_index.vector_stores.dynamodb", - "ChatGPTRetrievalPluginClient": "llama_index.vector_stores.chatgpt_plugin", "TairVectorStore": "llama_index.vector_stores.tair", "RedisVectorStore": "llama_index.vector_stores.redis", "set_google_config": "llama_index.vector_stores.google", "GoogleVectorStore": "llama_index.vector_stores.google", "VespaVectorStore": "llama_index.vector_stores.vespa", "hybrid_template": "llama_index.vector_stores.vespa", - "MetalVectorStore": "llama_index.vector_stores.metal", "OceanBaseVectorStore": "llama_index.vector_stores.oceanbase", "DuckDBRetriever": "llama_index.retrievers.duckdb_retriever", "PathwayRetriever": "llama_index.retrievers.pathway", @@ -506,8 +504,6 @@ "IonicShoppingToolSpec": "llama_index.tools.ionic_shopping", "TextToImageToolSpec": "llama_index.tools.text_to_image", "OpenAPIToolSpec": "llama_index.tools.openapi", - "NutritionAIToolSpec": "llama_index.tools.passio_nutrition_ai", - "ENDPOINT_BASE_URL": "llama_index.tools.passio_nutrition_ai", "ShopifyToolSpec": "llama_index.tools.shopify", "MetaphorToolSpec": "llama_index.tools.metaphor", "ACTION_URL_TMPL": "llama_index.tools.zapier", @@ -565,7 +561,6 @@ "TogetherEmbedding": "llama_index.embeddings.together", "NVIDIAEmbedding": "llama_index.embeddings.nvidia", "IpexLLMEmbedding": "llama_index.embeddings.ipex_llm", - "OctoAIEmbedding": "llama_index.embeddings.octoai", "VoyageEmbedding": "llama_index.embeddings.voyageai", "GradientEmbedding": "llama_index.embeddings.gradient", "DashScopeTextEmbeddingType": "llama_index.embeddings.dashscope", @@ -607,7 +602,6 @@ "GooglePaLMEmbedding": "llama_index.embeddings.google", "GoogleUnivSentEncoderEmbedding": "llama_index.embeddings.google", "MistralAIEmbedding": "llama_index.embeddings.mistralai", - "ItrexQuantizedBgeEmbedding": "llama_index.embeddings.huggingface_itrex", "BedrockEmbedding": "llama_index.embeddings.bedrock", "Models": "llama_index.embeddings.bedrock", "ClarifaiEmbedding": "llama_index.embeddings.clarifai", @@ -769,7 +763,6 @@ "TranscriptFormat": "llama_index.readers.assemblyai", "GraphDBCypherReader": "llama_index.readers.graphdb_cypher", "ConfluenceReader": "llama_index.readers.confluence", - "OpenAPIReader": "llama_index.readers.openapi", "LilacReader": "llama_index.readers.lilac", "GithubClient": "llama_index.readers.github", "GithubRepositoryReader": "llama_index.readers.github", @@ -799,7 +792,6 @@ "FirebaseRealtimeDatabaseReader": "llama_index.readers.firebase_realtimedb", "PebbloSafeReader": "llama_index.readers.pebblo", "RemoteDepthReader": "llama_index.readers.remote_depth", - "AzureDevopsReader": "llama_index.readers.azure_devops", "MakeWrapper": "llama_index.readers.make_com", "SimpleArangoDBReader": "llama_index.readers.arango_db", "MangaDexReader": "llama_index.readers.mangadex", @@ -814,7 +806,6 @@ "format_list_to_string": "llama_index.readers.myscale", "GuruReader": "llama_index.readers.guru", "LinearReader": "llama_index.readers.linear", - "FeishuWikiReader": "llama_index.readers.feishu_wiki", "TelegramReader": "llama_index.readers.telegram", "SteamshipFileReader": "llama_index.readers.steamship", "OpenMap": "llama_index.readers.maps", @@ -903,8 +894,6 @@ "WeaviateReader": "llama_index.readers.weaviate", "DeepLakeReader": "llama_index.readers.deeplake", "StructuredDataReader": "llama_index.readers.structured_data", - "YouTubeMetaData": "llama_index.readers.youtube_metadata", - "YouTubeMetaDataAndTranscript": "llama_index.readers.youtube_metadata", "WhatsappChatLoader": "llama_index.readers.whatsapp", "MondayReader": "llama_index.readers.mondaydotcom", "BoxReaderBase": "llama_index.readers.box", @@ -913,7 +902,6 @@ "BoxReaderAIPrompt": "llama_index.readers.box", "BoxReaderAIExtract": "llama_index.readers.box", "AirbyteStripeReader": "llama_index.readers.airbyte_stripe", - "SnscrapeTwitterReader": "llama_index.readers.snscrape_twitter", "ArxivReader": "llama_index.readers.papers", "PubmedReader": "llama_index.readers.papers", "ObsidianReader": "llama_index.readers.obsidian", @@ -924,7 +912,6 @@ "ChromaReader": "llama_index.readers.chroma", "WeatherReader": "llama_index.readers.weather", "ChatGPTRetrievalPluginReader": "llama_index.readers.chatgpt_plugin", - "ClickHouseReader": "llama_index.readers.clickhouse", "MetalReader": "llama_index.readers.metal", "BoardDocsReader": "llama_index.readers.boarddocs", "GuidanceQuestionGenerator": "llama_index.question_gen.guidance", @@ -940,7 +927,6 @@ "Cohere": "llama_index.llms.cohere", "NvidiaTriton": "llama_index.llms.nvidia_triton", "AI21": "llama_index.llms.ai21", - "Solar": "llama_index.llms.solar", "Bedrock": "llama_index.llms.bedrock", "completion_with_retry": "llama_index.llms.bedrock", "completion_response_to_chat_response": "llama_index.llms.bedrock", @@ -952,7 +938,6 @@ "DashScopeGenerationModels": "llama_index.llms.dashscope", "LocalTensorRTLLM": "llama_index.llms.nvidia_tensorrt", "Anthropic": "llama_index.llms.anthropic", - "Unify": "llama_index.llms.unify", "Gemini": "llama_index.llms.gemini", "Friendli": "llama_index.llms.friendli", "Clarifai": "llama_index.llms.clarifai", @@ -1029,17 +1014,13 @@ "MultiDocAutoRetrieverPack": "llama_index.packs.multidoc_autoretrieval", "GmailOpenAIAgentPack": "llama_index.packs.gmail_openai_agent", "Neo4jQueryEnginePack": "llama_index.packs.neo4j_query_engine", - "SearChainPack": "llama_index.packs.searchain", "ChromaAutoretrievalPack": "llama_index.packs.chroma_autoretrieval", "ResumeScreenerPack": "llama_index.packs.resume_screener", - "DocugamiKgRagPack": "llama_index.packs.docugami_kg_rag", "TruLensRAGTriadPack": "llama_index.packs.trulens_eval_packs", "TruLensHarmlessPack": "llama_index.packs.trulens_eval_packs", "TruLensHelpfulPack": "llama_index.packs.trulens_eval_packs", "EvaluatorBenchmarkerPack": "llama_index.packs.evaluator_benchmarker", "SecGPTPack": "llama_index.packs.secgpt", - "VannaPack": "llama_index.packs.vanna", - "VannaQueryEngine": "llama_index.packs.vanna", "MixtureOfAgentsPack": "llama_index.packs.mixture_of_agents", "RAGFusionPipelinePack": "llama_index.packs.rag_fusion_query_pipeline", "LlamaGuardModeratorPack": "llama_index.packs.llama_guard_moderator", @@ -1068,7 +1049,6 @@ "VectaraRagPack": "llama_index.packs.vectara_rag", "CoAAgentPack": "llama_index.packs.agents_coa", "SemanticChunkingQueryEnginePack": "llama_index.packs.node_parser_semantic_chunking", - "RedisIngestionPipelinePack": "llama_index.packs.redis_ingestion_pipeline", "DenseXRetrievalPack": "llama_index.packs.dense_x_retrieval", "LLMCompilerAgentPack": "llama_index.packs.agents_llm_compiler", "GradioReActAgentPack": "llama_index.packs.gradio_react_agent_chatbot", @@ -1084,7 +1064,6 @@ "KodaRetriever": "llama_index.packs.koda_retriever", "AlphaMatrix": "llama_index.packs.koda_retriever", "DEFAULT_CATEGORIES": "llama_index.packs.koda_retriever", - "SubDocSummaryPack": "llama_index.packs.subdoc_summary", "SentenceWindowRetrieverPack": "llama_index.packs.sentence_window_retriever", "EmbeddedTablesUnstructuredRetrieverPack": "llama_index.packs.recursive_retriever", "RecursiveRetrieverSmallToBigPack": "llama_index.packs.recursive_retriever", @@ -1099,7 +1078,6 @@ "AgentSearchRetrieverPack": "llama_index.packs.agent_search_retriever", "HybridFusionRetrieverPack": "llama_index.packs.fusion_retriever", "QueryRewritingRetrieverPack": "llama_index.packs.fusion_retriever", - "FinanceChatPack": "llama_index.packs.finchat", "BaseNode": "llama_index.core.schema", "TextNode": "llama_index.core.schema", "ImageNode": "llama_index.core.schema", From 0b743adf7b7ddc6192d8a135bdd26442bf8d1f38 Mon Sep 17 00:00:00 2001 From: Andrei Fajardo Date: Tue, 10 Dec 2024 13:30:56 -0500 Subject: [PATCH 3/3] rm solar --- .../llama_index/llms/solar/base.py | 248 ------------------ .../llama-index-llms-solar/pyproject.toml | 64 ----- 2 files changed, 312 deletions(-) delete mode 100644 llama-index-integrations/llms/llama-index-llms-solar/llama_index/llms/solar/base.py delete mode 100644 llama-index-integrations/llms/llama-index-llms-solar/pyproject.toml diff --git a/llama-index-integrations/llms/llama-index-llms-solar/llama_index/llms/solar/base.py b/llama-index-integrations/llms/llama-index-llms-solar/llama_index/llms/solar/base.py deleted file mode 100644 index e15f5058ebba5..0000000000000 --- a/llama-index-integrations/llms/llama-index-llms-solar/llama_index/llms/solar/base.py +++ /dev/null @@ -1,248 +0,0 @@ -import warnings -from typing import ( - Any, - Callable, - Dict, - Optional, - Sequence, - Union, - Tuple, -) -from llama_index.core.base.llms.generic_utils import get_from_param_or_env -from llama_index.core.base.llms.types import ( - ChatMessage, - ChatResponse, - ChatResponseAsyncGen, - ChatResponseGen, - CompletionResponse, - CompletionResponseAsyncGen, - CompletionResponseGen, - LLMMetadata, -) -import httpx -from llama_index.core.bridge.pydantic import Field -from llama_index.core.constants import DEFAULT_CONTEXT_WINDOW -from llama_index.core.base.llms.generic_utils import ( - async_stream_completion_response_to_chat_response, - completion_response_to_chat_response, - stream_completion_response_to_chat_response, -) -from llama_index.core.types import BaseOutputParser, PydanticProgramMode -from llama_index.core.bridge.pydantic import Field -from llama_index.core.callbacks import CallbackManager -from llama_index.llms.openai.base import OpenAI, Tokenizer -from transformers import AutoTokenizer - -DEFAULT_SOLAR_API_BASE = "https://api.upstage.ai/v1/solar" -DEFAULT_SOLAR_MODEL = "solar-1-mini-chat" - - -class Solar(OpenAI): - api_key: str = Field(default=None, description="The SOLAR API key.") - api_base: str = Field(default="", description="The base URL for SOLAR API.") - - model: str = Field( - default="solar-1-mini-chat", description="The SOLAR model to use." - ) - - context_window: int = Field( - default=DEFAULT_CONTEXT_WINDOW, - description=LLMMetadata.__fields__["context_window"].field_info.description, - ) - is_chat_model: bool = Field( - default=False, - description=LLMMetadata.__fields__["is_chat_model"].field_info.description, - ) - is_function_calling_model: bool = Field( - default=False, - description=LLMMetadata.__fields__[ - "is_function_calling_model" - ].field_info.description, - ) - tokenizer: Union[Tokenizer, str, None] = Field( - default=None, - description=( - "An instance of a tokenizer object that has an encode method, or the name" - " of a tokenizer model from Hugging Face. If left as None, then this" - " disables inference of max_tokens." - ), - ) - - def __init__( - self, - model: str = DEFAULT_SOLAR_MODEL, - temperature: float = 0.1, - max_tokens: Optional[int] = None, - additional_kwargs: Optional[Dict[str, Any]] = None, - max_retries: int = 3, - timeout: float = 60.0, - reuse_client: bool = True, - api_key: Optional[str] = None, - api_base: Optional[str] = None, - api_version: Optional[str] = None, - callback_manager: Optional[CallbackManager] = None, - default_headers: Optional[Dict[str, str]] = None, - http_client: Optional[httpx.Client] = None, - # base class - system_prompt: Optional[str] = None, - messages_to_prompt: Optional[Callable[[Sequence[ChatMessage]], str]] = None, - completion_to_prompt: Optional[Callable[[str], str]] = None, - pydantic_program_mode: PydanticProgramMode = PydanticProgramMode.DEFAULT, - output_parser: Optional[BaseOutputParser] = None, - **kwargs: Any, - ) -> None: - # add warning for this class is deprecated - warnings.warn( - """Solar LLM is deprecated. Please use Upstage LLM instead. - Install the package using `pip install llama-index-llms-upstage` - """, - ) - api_key, api_base = resolve_solar_credentials( - api_key=api_key, - api_base=api_base, - ) - - super().__init__( - model=model, - temperature=temperature, - max_tokens=max_tokens, - additional_kwargs=additional_kwargs, - max_retries=max_retries, - callback_manager=callback_manager, - api_key=api_key, - api_version=api_version, - api_base=api_base, - timeout=timeout, - reuse_client=reuse_client, - default_headers=default_headers, - system_prompt=system_prompt, - messages_to_prompt=messages_to_prompt, - completion_to_prompt=completion_to_prompt, - pydantic_program_mode=pydantic_program_mode, - output_parser=output_parser, - **kwargs, - ) - - @property - def metadata(self) -> LLMMetadata: - return LLMMetadata( - context_window=self.context_window, - num_output=self.max_tokens or -1, - is_chat_model=self.is_chat_model, - is_function_calling_model=self.is_function_calling_model, - model_name=self.model, - ) - - @property - def _tokenizer(self) -> Optional[Tokenizer]: - if isinstance(self.tokenizer, str): - return AutoTokenizer.from_pretrained(self.tokenizer) - return self.tokenizer - - @classmethod - def class_name(cls) -> str: - return "Solar" - - def complete( - self, prompt: str, formatted: bool = False, **kwargs: Any - ) -> CompletionResponse: - """Complete the prompt.""" - if not formatted: - prompt = self.completion_to_prompt(prompt) - - return super().complete(prompt, **kwargs) - - def stream_complete( - self, prompt: str, formatted: bool = False, **kwargs: Any - ) -> CompletionResponseGen: - """Stream complete the prompt.""" - if not formatted: - prompt = self.completion_to_prompt(prompt) - - return super().stream_complete(prompt, **kwargs) - - def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse: - """Chat with the model.""" - if not self.metadata.is_chat_model: - prompt = self.messages_to_prompt(messages) - completion_response = self.complete(prompt, formatted=True, **kwargs) - return completion_response_to_chat_response(completion_response) - - return super().chat(messages, **kwargs) - - def stream_chat( - self, messages: Sequence[ChatMessage], **kwargs: Any - ) -> ChatResponseGen: - if not self.metadata.is_chat_model: - prompt = self.messages_to_prompt(messages) - completion_response = self.stream_complete(prompt, formatted=True, **kwargs) - return stream_completion_response_to_chat_response(completion_response) - - return super().stream_chat(messages, **kwargs) - - # -- Async methods -- - - async def acomplete( - self, prompt: str, formatted: bool = False, **kwargs: Any - ) -> CompletionResponse: - """Complete the prompt.""" - if not formatted: - prompt = self.completion_to_prompt(prompt) - - return await super().acomplete(prompt, **kwargs) - - async def astream_complete( - self, prompt: str, formatted: bool = False, **kwargs: Any - ) -> CompletionResponseAsyncGen: - """Stream complete the prompt.""" - if not formatted: - prompt = self.completion_to_prompt(prompt) - - return await super().astream_complete(prompt, **kwargs) - - async def achat( - self, messages: Sequence[ChatMessage], **kwargs: Any - ) -> ChatResponse: - """Chat with the model.""" - if not self.metadata.is_chat_model: - prompt = self.messages_to_prompt(messages) - completion_response = await self.acomplete(prompt, formatted=True, **kwargs) - return completion_response_to_chat_response(completion_response) - - return await super().achat(messages, **kwargs) - - async def astream_chat( - self, messages: Sequence[ChatMessage], **kwargs: Any - ) -> ChatResponseAsyncGen: - if not self.metadata.is_chat_model: - prompt = self.messages_to_prompt(messages) - completion_response = await self.astream_complete( - prompt, formatted=True, **kwargs - ) - return async_stream_completion_response_to_chat_response( - completion_response - ) - - return await super().astream_chat(messages, **kwargs) - - -def resolve_solar_credentials( - api_key: Optional[str] = None, - api_base: Optional[str] = None, -) -> Tuple[Optional[str], str]: - """ "Resolve SOLAR credentials. - - The order of precedence is: - 1. param - 2. env - 3. solar module - 4. default - """ - # resolve from param or env - api_key = get_from_param_or_env("api_key", api_key, "SOLAR_API_KEY", "") - api_base = get_from_param_or_env("api_base", api_base, "SOLAR_API_BASE", "") - - final_api_key = api_key or "" - final_api_base = api_base or DEFAULT_SOLAR_API_BASE - - return final_api_key, str(final_api_base) diff --git a/llama-index-integrations/llms/llama-index-llms-solar/pyproject.toml b/llama-index-integrations/llms/llama-index-llms-solar/pyproject.toml deleted file mode 100644 index 5c949af70c283..0000000000000 --- a/llama-index-integrations/llms/llama-index-llms-solar/pyproject.toml +++ /dev/null @@ -1,64 +0,0 @@ -[build-system] -build-backend = "poetry.core.masonry.api" -requires = ["poetry-core"] - -[tool.codespell] -check-filenames = true -check-hidden = true -skip = "*.csv,*.html,*.json,*.jsonl,*.pdf,*.txt,*.ipynb" - -[tool.llamahub] -contains_example = false -import_path = "llama_index.llms.solar" - -[tool.llamahub.class_authors] -Solar = "llama-index" - -[tool.mypy] -disallow_untyped_defs = true -exclude = ["_static", "build", "examples", "notebooks", "venv"] -ignore_missing_imports = true -python_version = "3.8" - -[tool.poetry] -authors = ["Your Name "] -description = "llama-index llms solar integration" -exclude = ["**/BUILD"] -license = "MIT" -name = "llama-index-llms-solar" -readme = "README.md" -version = "0.3.1" - -[tool.poetry.dependencies] -python = ">=3.9,<4.0" -llama-index-llms-openai = "^0.3.0" -transformers = "^4.37.0" -llama-index-core = "^0.12.0" - -[tool.poetry.group.dev.dependencies] -ipython = "8.10.0" -jupyter = "^1.0.0" -mypy = "0.991" -pre-commit = "3.2.0" -pylint = "2.15.10" -pytest = "7.2.1" -pytest-mock = "3.11.1" -ruff = "0.0.292" -tree-sitter-languages = "^1.8.0" -types-Deprecated = ">=0.1.0" -types-PyYAML = "^6.0.12.12" -types-protobuf = "^4.24.0.4" -types-redis = "4.5.5.0" -types-requests = "2.28.11.8" -types-setuptools = "67.1.0.0" - -[tool.poetry.group.dev.dependencies.black] -extras = ["jupyter"] -version = "<=23.9.1,>=23.7.0" - -[tool.poetry.group.dev.dependencies.codespell] -extras = ["toml"] -version = ">=v2.2.6" - -[[tool.poetry.packages]] -include = "llama_index/"