From a9dca505c19bb5b7bc39f2df360d80bc966606ea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Gomes?= Date: Wed, 6 Nov 2024 10:48:36 +0000 Subject: [PATCH] [119] Architecture diagram (#135) --- .python-version | 1 + README.md | 15 +- docker-compose.yml | 8 + {doc => docs}/diagram/class_diagram | 0 {doc => docs}/diagram/class_diagram.png | Bin {doc => docs}/diagram/class_diagram_script.py | 4 +- docs/rag.md | 68 +++ doc/results.md => docs/rd.md | 4 +- poetry.lock | 526 +++++++++++++++++- 9 files changed, 611 insertions(+), 15 deletions(-) create mode 100644 .python-version rename {doc => docs}/diagram/class_diagram (100%) rename {doc => docs}/diagram/class_diagram.png (100%) rename {doc => docs}/diagram/class_diagram_script.py (94%) create mode 100644 docs/rag.md rename doc/results.md => docs/rd.md (99%) diff --git a/.python-version b/.python-version new file mode 100644 index 0000000..d4b278f --- /dev/null +++ b/.python-version @@ -0,0 +1 @@ +3.11.7 diff --git a/README.md b/README.md index 201639d..445acb2 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,11 @@ # labs - - - + -Runtime Labs +## Topics + +- [Architecture](docs/rag.md) +- [R&D](docs/rd.md) ## Getting Started @@ -38,9 +39,3 @@ Here's the steps to setup the project locally: 3. `make up` 4. `make api` or `ENV=local make api` 5. `ENV=test make tests` - - -## Approaches, Tests Conducted and Results - -For details on the approaches and tests conducted on issues encountered during development, -refer to the document [here](./doc/results.md) diff --git a/docker-compose.yml b/docker-compose.yml index 6fbf605..bdfc1d5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,4 +1,12 @@ services: + postgres: + container_name: postgres + image: postgres:13 + environment: + POSTGRES_USER: ${DATABASE_USER} + POSTGRES_PASSWORD: ${DATABASE_PASS} + POSTGRES_DB: ${POSTGRES_DB} + labs-db: container_name: labs-db image: pgvector/pgvector:pg16 diff --git a/doc/diagram/class_diagram b/docs/diagram/class_diagram similarity index 100% rename from doc/diagram/class_diagram rename to docs/diagram/class_diagram diff --git a/doc/diagram/class_diagram.png b/docs/diagram/class_diagram.png similarity index 100% rename from doc/diagram/class_diagram.png rename to docs/diagram/class_diagram.png diff --git a/doc/diagram/class_diagram_script.py b/docs/diagram/class_diagram_script.py similarity index 94% rename from doc/diagram/class_diagram_script.py rename to docs/diagram/class_diagram_script.py index 78af58c..901873e 100644 --- a/doc/diagram/class_diagram_script.py +++ b/docs/diagram/class_diagram_script.py @@ -28,7 +28,9 @@ def extract_classes(file_path): args.append(f"{arg.arg}: {arg.annotation.id}") elif isinstance(arg.annotation, ast.Subscript): if isinstance(arg.annotation.value, ast.Name): - args.append(f"{arg.arg}: {arg.annotation.value.id}[{arg.annotation.slice.value.id}]") + args.append( + f"{arg.arg}: {arg.annotation.value.id}[{arg.annotation.slice.value.id}]" + ) else: args.append(arg.arg) method_signature = f"{visibility} {method_name}({', '.join(args)})" diff --git a/docs/rag.md b/docs/rag.md new file mode 100644 index 0000000..6b780db --- /dev/null +++ b/docs/rag.md @@ -0,0 +1,68 @@ +# RAG Architecture + +## Challenges + +1. Context embeddings creation + + * how are embeddings created? which strategy was used? + +2. Query + context + + * Ensure query + context reflect what's intended + * provides an acceptable answer + +3. Include the generated answer in the correct place in the project + +## RAG General view + +```mermaid +flowchart + + subgraph Prompt + A[/Query/] --> B[Embedding model
text-embedding-ada-002] + B --> C[Embeddings] + end + subgraph Context + E[/Data source/] --> F[Embedding model
text-embedding-ada-002] + F --> G[Embeddings] + end + subgraph Vector Database + C --> D[(PostgreSQL PGVector)] + G --> D + end + subgraph LLM + A --> H[/Query + Context/] + D --> H + H --> I[OpenAPI o1] + I --> J[/Response/] + end +``` + +## Integrations + +```mermaid +flowchart + subgraph CodeMonkey + API + end + + subgraph Local + C[Repository] + end + + subgraph Github + D[Repository] + Issues + App + end + + Github <--> CodeMonkey + Local <--> CodeMonkey +``` + +## Embeddings and LLM + +| Embeddings | LLM | +| --- | --- | +| text-embedding-ada-002 | OpenAPI o1 | +| text-embedding-ada-002 | Llama 3.2 (local) | diff --git a/doc/results.md b/docs/rd.md similarity index 99% rename from doc/results.md rename to docs/rd.md index a2b7606..cfc250d 100644 --- a/doc/results.md +++ b/docs/rd.md @@ -1,10 +1,10 @@ -# Project tests results +# Research & Development This document aims to document some of the approaches tested and the discoveries made on various problems, as well as the results achieved from the tests conducted. -## Embeddins +## Embeddings During the course of the project, various approaches were taken regarding how embeddings are generated, namely: diff --git a/poetry.lock b/poetry.lock index 22340a6..cbe8803 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -825,6 +825,34 @@ files = [ {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"}, ] +[[package]] +name = "deprecated" +version = "1.2.14" +description = "Python @deprecated decorator to deprecate old python classes, functions or methods." +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" +files = [ + {file = "Deprecated-1.2.14-py2.py3-none-any.whl", hash = "sha256:6fac8b097794a90302bdbb17b9b815e732d3c4720583ff1b198499d78470466c"}, + {file = "Deprecated-1.2.14.tar.gz", hash = "sha256:e5323eb936458dccc2582dc6f9c322c852a775a27065ff2b0c4970b9d53d01b3"}, +] + +[package.dependencies] +wrapt = ">=1.10,<2" + +[package.extras] +dev = ["PyTest", "PyTest-Cov", "bump2version (<1)", "sphinx (<2)", "tox"] + +[[package]] +name = "dirtyjson" +version = "1.0.8" +description = "JSON decoder for Python that can extract data from the muck" +optional = false +python-versions = "*" +files = [ + {file = "dirtyjson-1.0.8-py3-none-any.whl", hash = "sha256:125e27248435a58acace26d5c2c4c11a1c0de0a9c5124c5a94ba78e517d74f53"}, + {file = "dirtyjson-1.0.8.tar.gz", hash = "sha256:90ca4a18f3ff30ce849d100dcf4a003953c79d3a2348ef056f1d9c22231a25fd"}, +] + [[package]] name = "distlib" version = "0.3.8" @@ -1655,6 +1683,17 @@ files = [ {file = "jiter-0.5.0.tar.gz", hash = "sha256:1d916ba875bcab5c5f7d927df998c4cb694d27dceddf3392e58beaf10563368a"}, ] +[[package]] +name = "joblib" +version = "1.4.2" +description = "Lightweight pipelining with Python functions" +optional = false +python-versions = ">=3.8" +files = [ + {file = "joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6"}, + {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"}, +] + [[package]] name = "json5" version = "0.9.25" @@ -2178,6 +2217,310 @@ tokenizers = "*" extra-proxy = ["azure-identity (>=1.15.0,<2.0.0)", "azure-keyvault-secrets (>=4.8.0,<5.0.0)", "google-cloud-kms (>=2.21.3,<3.0.0)", "prisma (==0.11.0)", "resend (>=0.8.0,<0.9.0)"] proxy = ["PyJWT (>=2.8.0,<3.0.0)", "apscheduler (>=3.10.4,<4.0.0)", "backoff", "cryptography (>=42.0.5,<43.0.0)", "fastapi (>=0.111.0,<0.112.0)", "fastapi-sso (>=0.10.0,<0.11.0)", "gunicorn (>=22.0.0,<23.0.0)", "orjson (>=3.9.7,<4.0.0)", "pynacl (>=1.5.0,<2.0.0)", "python-multipart (>=0.0.9,<0.0.10)", "pyyaml (>=6.0.1,<7.0.0)", "rq", "uvicorn (>=0.22.0,<0.23.0)"] +[[package]] +name = "llama-cloud" +version = "0.1.2" +description = "" +optional = false +python-versions = "<4,>=3.8" +files = [ + {file = "llama_cloud-0.1.2-py3-none-any.whl", hash = "sha256:f0ec61b88be689895cc4708b13db4503760e29c67131ec76b4d4583d81d571c5"}, + {file = "llama_cloud-0.1.2.tar.gz", hash = "sha256:7c80747e9a7f32d4b739c2d5d4add7351f836d92e633d6ac03fa401896d33097"}, +] + +[package.dependencies] +httpx = ">=0.20.0" +pydantic = ">=1.10" + +[[package]] +name = "llama-index" +version = "0.11.18" +description = "Interface between LLMs and your data" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index-0.11.18-py3-none-any.whl", hash = "sha256:dc54c7fdd4c8ee32aa0c5565038894295fc76bd95e21e70fa67ca6fb2413a1b3"}, + {file = "llama_index-0.11.18.tar.gz", hash = "sha256:5c43b46ea9957d539ad823e008c9b6957fbaf4ec5c8bc6903accfb19863edfd9"}, +] + +[package.dependencies] +llama-index-agent-openai = ">=0.3.4,<0.4.0" +llama-index-cli = ">=0.3.1,<0.4.0" +llama-index-core = ">=0.11.18,<0.12.0" +llama-index-embeddings-openai = ">=0.2.4,<0.3.0" +llama-index-indices-managed-llama-cloud = ">=0.3.0" +llama-index-legacy = ">=0.9.48,<0.10.0" +llama-index-llms-openai = ">=0.2.10,<0.3.0" +llama-index-multi-modal-llms-openai = ">=0.2.0,<0.3.0" +llama-index-program-openai = ">=0.2.0,<0.3.0" +llama-index-question-gen-openai = ">=0.2.0,<0.3.0" +llama-index-readers-file = ">=0.2.0,<0.3.0" +llama-index-readers-llama-parse = ">=0.3.0" +nltk = ">3.8.1" + +[[package]] +name = "llama-index-agent-openai" +version = "0.3.4" +description = "llama-index agent openai integration" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_agent_openai-0.3.4-py3-none-any.whl", hash = "sha256:3720ce9bb12417a99a3fe84e52cce23e762b13f88a2dfc4292c76f4df9b26b4a"}, + {file = "llama_index_agent_openai-0.3.4.tar.gz", hash = "sha256:80e3408d97121bebca3fa3ffd14b51285870c1c3c73d4ee04d3d18cfe6040466"}, +] + +[package.dependencies] +llama-index-core = ">=0.11.0,<0.12.0" +llama-index-llms-openai = ">=0.2.9,<0.3.0" +openai = ">=1.14.0" + +[[package]] +name = "llama-index-cli" +version = "0.3.1" +description = "llama-index cli" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_cli-0.3.1-py3-none-any.whl", hash = "sha256:2111fbb6973f5b1eabce0d6cca3986499f0f2f625b13d7f48269a49c64c027d4"}, + {file = "llama_index_cli-0.3.1.tar.gz", hash = "sha256:1890dd687cf440f3651365a549e303363162c167b8efbd87a3aa10058d6d5c77"}, +] + +[package.dependencies] +llama-index-core = ">=0.11.0,<0.12.0" +llama-index-embeddings-openai = ">=0.2.0,<0.3.0" +llama-index-llms-openai = ">=0.2.0,<0.3.0" + +[[package]] +name = "llama-index-core" +version = "0.11.18" +description = "Interface between LLMs and your data" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_core-0.11.18-py3-none-any.whl", hash = "sha256:8e57522e69d3c8a219b29b5f1624c20269c9c3f87729eff9ecfb796eab51dd55"}, + {file = "llama_index_core-0.11.18.tar.gz", hash = "sha256:f94ae8d740b65c3bf0bc0422b0210613664c1a9f8e98b7328e037a68255bed83"}, +] + +[package.dependencies] +aiohttp = ">=3.8.6,<4.0.0" +dataclasses-json = "*" +deprecated = ">=1.2.9.3" +dirtyjson = ">=1.0.8,<2.0.0" +fsspec = ">=2023.5.0" +httpx = "*" +nest-asyncio = ">=1.5.8,<2.0.0" +networkx = ">=3.0" +nltk = ">3.8.1" +numpy = "<2.0.0" +pillow = ">=9.0.0" +pydantic = ">=2.7.0,<3.0.0" +PyYAML = ">=6.0.1" +requests = ">=2.31.0" +SQLAlchemy = {version = ">=1.4.49", extras = ["asyncio"]} +tenacity = ">=8.2.0,<8.4.0 || >8.4.0,<9.0.0" +tiktoken = ">=0.3.3" +tqdm = ">=4.66.1,<5.0.0" +typing-extensions = ">=4.5.0" +typing-inspect = ">=0.8.0" +wrapt = "*" + +[[package]] +name = "llama-index-embeddings-openai" +version = "0.2.5" +description = "llama-index embeddings openai integration" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_embeddings_openai-0.2.5-py3-none-any.whl", hash = "sha256:823c8311e556349ba19dda408a64a314fa3dafe0e5759709c54d33a0269aa6ba"}, + {file = "llama_index_embeddings_openai-0.2.5.tar.gz", hash = "sha256:0047dd71d747068645ed728c29312aa91b65bbe4c6142180034c64dfc5c6f6e8"}, +] + +[package.dependencies] +llama-index-core = ">=0.11.0,<0.12.0" +openai = ">=1.1.0" + +[[package]] +name = "llama-index-indices-managed-llama-cloud" +version = "0.4.0" +description = "llama-index indices llama-cloud integration" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_indices_managed_llama_cloud-0.4.0-py3-none-any.whl", hash = "sha256:c2c54821f1bf17a7810e6c013fbe7ddfef4154b7e5b100f7bf8673098f8004e4"}, + {file = "llama_index_indices_managed_llama_cloud-0.4.0.tar.gz", hash = "sha256:fbebff7876a219b6ab96892ae7c432a9299195fab8f67d4a4a0ebf6da210b242"}, +] + +[package.dependencies] +llama-cloud = ">=0.0.11" +llama-index-core = ">=0.11.13.post1,<0.12.0" + +[[package]] +name = "llama-index-legacy" +version = "0.9.48.post3" +description = "Interface between LLMs and your data" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_legacy-0.9.48.post3-py3-none-any.whl", hash = "sha256:04221320d84d96ba9ee3e21e5055bd8527cbd769e8f1c60cf0368ed907e012a2"}, + {file = "llama_index_legacy-0.9.48.post3.tar.gz", hash = "sha256:f6969f1085efb0abebd6367e46f3512020f3f6b9c086f458a519830dd61e8206"}, +] + +[package.dependencies] +aiohttp = ">=3.8.6,<4.0.0" +dataclasses-json = "*" +deprecated = ">=1.2.9.3" +dirtyjson = ">=1.0.8,<2.0.0" +fsspec = ">=2023.5.0" +httpx = "*" +nest-asyncio = ">=1.5.8,<2.0.0" +networkx = ">=3.0" +nltk = ">=3.8.1" +numpy = "*" +openai = ">=1.1.0" +pandas = "*" +requests = ">=2.31.0" +SQLAlchemy = {version = ">=1.4.49", extras = ["asyncio"]} +tenacity = ">=8.2.0,<9.0.0" +tiktoken = ">=0.3.3" +typing-extensions = ">=4.5.0" +typing-inspect = ">=0.8.0" + +[package.extras] +gradientai = ["gradientai (>=1.4.0)"] +html = ["beautifulsoup4 (>=4.12.2,<5.0.0)"] +langchain = ["langchain (>=0.0.303)"] +local-models = ["optimum[onnxruntime] (>=1.13.2,<2.0.0)", "sentencepiece (>=0.1.99,<0.2.0)", "transformers[torch] (>=4.33.1,<5.0.0)"] +postgres = ["asyncpg (>=0.28.0,<0.29.0)", "pgvector (>=0.1.0,<0.2.0)", "psycopg2-binary (>=2.9.9,<3.0.0)"] +query-tools = ["guidance (>=0.0.64,<0.0.65)", "jsonpath-ng (>=1.6.0,<2.0.0)", "lm-format-enforcer (>=0.4.3,<0.5.0)", "rank-bm25 (>=0.2.2,<0.3.0)", "scikit-learn", "spacy (>=3.7.1,<4.0.0)"] + +[[package]] +name = "llama-index-llms-ollama" +version = "0.3.4" +description = "llama-index llms ollama integration" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_llms_ollama-0.3.4-py3-none-any.whl", hash = "sha256:4c144551c59decd2a993eccde87e451fe61843e31480ebf541951581b43c08e4"}, + {file = "llama_index_llms_ollama-0.3.4.tar.gz", hash = "sha256:56df6e1887081af4522f9ec32be2d4632d8d4f2619f72aa71ea60966231e54a0"}, +] + +[package.dependencies] +llama-index-core = ">=0.11.0,<0.12.0" +ollama = ">=0.3.0" + +[[package]] +name = "llama-index-llms-openai" +version = "0.2.15" +description = "llama-index llms openai integration" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_llms_openai-0.2.15-py3-none-any.whl", hash = "sha256:a906669397c4c0c3ee55b241dcc22bf0129b3391a8d6ae681a2579affbc5ed48"}, + {file = "llama_index_llms_openai-0.2.15.tar.gz", hash = "sha256:f13655535e8966f5ccf0214c7360e86ef8fc718678557ef248d7fe13f6fde8d0"}, +] + +[package.dependencies] +llama-index-core = ">=0.11.7,<0.12.0" +openai = ">=1.40.0,<2.0.0" + +[[package]] +name = "llama-index-multi-modal-llms-openai" +version = "0.2.2" +description = "llama-index multi-modal-llms openai integration" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_multi_modal_llms_openai-0.2.2-py3-none-any.whl", hash = "sha256:81813c66c133aab0554b3bee60fe9673e84403dcc57c9fa95fb8be2d7c4c4cee"}, + {file = "llama_index_multi_modal_llms_openai-0.2.2.tar.gz", hash = "sha256:c7205cfd9a23e2201db527ca3f8fa5ef4fb260ab6c9b15e79163630a916ee159"}, +] + +[package.dependencies] +llama-index-core = ">=0.11.0,<0.12.0" +llama-index-llms-openai = ">=0.2.11,<0.3.0" + +[[package]] +name = "llama-index-program-openai" +version = "0.2.0" +description = "llama-index program openai integration" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_program_openai-0.2.0-py3-none-any.whl", hash = "sha256:2e10d0c8f21af2e9443eb79e81bb31e7b73835b7c7bbd7ddf20e0a9c846cd368"}, + {file = "llama_index_program_openai-0.2.0.tar.gz", hash = "sha256:4139935541c011257fbfeb9662b3bf1237b729ef4b1c8f4ddf5b6789d2374ac4"}, +] + +[package.dependencies] +llama-index-agent-openai = ">=0.3.0,<0.4.0" +llama-index-core = ">=0.11.0,<0.12.0" +llama-index-llms-openai = ">=0.2.0,<0.3.0" + +[[package]] +name = "llama-index-question-gen-openai" +version = "0.2.0" +description = "llama-index question_gen openai integration" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_question_gen_openai-0.2.0-py3-none-any.whl", hash = "sha256:a16e68fc5434e9a793f1dfd0cc0354ee19afd167f1d499403b0085b11c5406c0"}, + {file = "llama_index_question_gen_openai-0.2.0.tar.gz", hash = "sha256:3dde1cecbd651000639c20031d7ea23334276aabb181cac40ff424f35e10465e"}, +] + +[package.dependencies] +llama-index-core = ">=0.11.0,<0.12.0" +llama-index-llms-openai = ">=0.2.0,<0.3.0" +llama-index-program-openai = ">=0.2.0,<0.3.0" + +[[package]] +name = "llama-index-readers-file" +version = "0.2.2" +description = "llama-index readers file integration" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_readers_file-0.2.2-py3-none-any.whl", hash = "sha256:ffec878771c1e7575afb742887561059bcca77b97a81c1c1be310ebb73f10f46"}, + {file = "llama_index_readers_file-0.2.2.tar.gz", hash = "sha256:48459f90960b863737147b66ed83afec9ce8984f8eda2561b6d2500214365db2"}, +] + +[package.dependencies] +beautifulsoup4 = ">=4.12.3,<5.0.0" +llama-index-core = ">=0.11.0,<0.12.0" +pandas = "*" +pypdf = ">=4.0.1,<5.0.0" +striprtf = ">=0.0.26,<0.0.27" + +[package.extras] +pymupdf = ["pymupdf (>=1.23.21,<2.0.0)"] + +[[package]] +name = "llama-index-readers-llama-parse" +version = "0.3.0" +description = "llama-index readers llama-parse integration" +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_index_readers_llama_parse-0.3.0-py3-none-any.whl", hash = "sha256:1973cc710dbd5e110c7500c9983ecb45787ad1ff92e6b2113f94a57cf48f3038"}, + {file = "llama_index_readers_llama_parse-0.3.0.tar.gz", hash = "sha256:a5feada0895714dcc41d65dd512c1c38cf70d8ae19947cff82b80d58e6aa367e"}, +] + +[package.dependencies] +llama-index-core = ">=0.11.0,<0.12.0" +llama-parse = ">=0.5.0" + +[[package]] +name = "llama-parse" +version = "0.5.9" +description = "Parse files into RAG-Optimized formats." +optional = false +python-versions = "<4.0,>=3.8.1" +files = [ + {file = "llama_parse-0.5.9-py3-none-any.whl", hash = "sha256:7ce1898bfc2ede947f9e695861725874eedd3ff3647ade611cf0ae8deb1ec869"}, + {file = "llama_parse-0.5.9.tar.gz", hash = "sha256:7bec538b5a4c06c44d8bf4ae64059e828e607893c33fbde1a18e258066554af3"}, +] + +[package.dependencies] +click = ">=8.1.7,<9.0.0" +llama-index-core = ">=0.11.0" + [[package]] name = "markdown-it-py" version = "3.0.0" @@ -2543,6 +2886,50 @@ files = [ {file = "nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe"}, ] +[[package]] +name = "networkx" +version = "3.4.1" +description = "Python package for creating and manipulating graphs and networks" +optional = false +python-versions = ">=3.10" +files = [ + {file = "networkx-3.4.1-py3-none-any.whl", hash = "sha256:e30a87b48c9a6a7cc220e732bffefaee585bdb166d13377734446ce1a0620eed"}, + {file = "networkx-3.4.1.tar.gz", hash = "sha256:f9df45e85b78f5bd010993e897b4f1fdb242c11e015b101bd951e5c0e29982d8"}, +] + +[package.extras] +default = ["matplotlib (>=3.7)", "numpy (>=1.24)", "pandas (>=2.0)", "scipy (>=1.10,!=1.11.0,!=1.11.1)"] +developer = ["changelist (==0.5)", "mypy (>=1.1)", "pre-commit (>=3.2)", "rtoml"] +doc = ["intersphinx-registry", "myst-nb (>=1.1)", "numpydoc (>=1.8.0)", "pillow (>=9.4)", "pydata-sphinx-theme (>=0.15)", "sphinx (>=7.3)", "sphinx-gallery (>=0.16)", "texext (>=0.6.7)"] +example = ["cairocffi (>=1.7)", "contextily (>=1.6)", "igraph (>=0.11)", "momepy (>=0.7.2)", "osmnx (>=1.9)", "scikit-learn (>=1.5)", "seaborn (>=0.13)"] +extra = ["lxml (>=4.6)", "pydot (>=3.0.1)", "pygraphviz (>=1.14)", "sympy (>=1.10)"] +test = ["pytest (>=7.2)", "pytest-cov (>=4.0)"] + +[[package]] +name = "nltk" +version = "3.9.1" +description = "Natural Language Toolkit" +optional = false +python-versions = ">=3.8" +files = [ + {file = "nltk-3.9.1-py3-none-any.whl", hash = "sha256:4fa26829c5b00715afe3061398a8989dc643b92ce7dd93fb4585a70930d168a1"}, + {file = "nltk-3.9.1.tar.gz", hash = "sha256:87d127bd3de4bd89a4f81265e5fa59cb1b199b27440175370f7417d2bc7ae868"}, +] + +[package.dependencies] +click = "*" +joblib = "*" +regex = ">=2021.8.3" +tqdm = "*" + +[package.extras] +all = ["matplotlib", "numpy", "pyparsing", "python-crfsuite", "requests", "scikit-learn", "scipy", "twython"] +corenlp = ["requests"] +machine-learning = ["numpy", "python-crfsuite", "scikit-learn", "scipy"] +plot = ["matplotlib"] +tgrep = ["pyparsing"] +twitter = ["twython"] + [[package]] name = "nodeenv" version = "1.9.1" @@ -2653,6 +3040,20 @@ files = [ [package.dependencies] httpx = ">=0.27.0,<0.28.0" +[[package]] +name = "ollama" +version = "0.3.3" +description = "The official Python client for Ollama." +optional = false +python-versions = "<4.0,>=3.8" +files = [ + {file = "ollama-0.3.3-py3-none-any.whl", hash = "sha256:ca6242ce78ab34758082b7392df3f9f6c2cb1d070a9dede1a4c545c929e16dba"}, + {file = "ollama-0.3.3.tar.gz", hash = "sha256:f90a6d61803117f40b0e8ff17465cab5e1eb24758a473cfe8101aff38bc13b51"}, +] + +[package.dependencies] +httpx = ">=0.27.0,<0.28.0" + [[package]] name = "openai" version = "1.46.1" @@ -2901,6 +3302,98 @@ files = [ [package.dependencies] numpy = "*" +[[package]] +name = "pillow" +version = "11.0.0" +description = "Python Imaging Library (Fork)" +optional = false +python-versions = ">=3.9" +files = [ + {file = "pillow-11.0.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:6619654954dc4936fcff82db8eb6401d3159ec6be81e33c6000dfd76ae189947"}, + {file = "pillow-11.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b3c5ac4bed7519088103d9450a1107f76308ecf91d6dabc8a33a2fcfb18d0fba"}, + {file = "pillow-11.0.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a65149d8ada1055029fcb665452b2814fe7d7082fcb0c5bed6db851cb69b2086"}, + {file = "pillow-11.0.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:88a58d8ac0cc0e7f3a014509f0455248a76629ca9b604eca7dc5927cc593c5e9"}, + {file = "pillow-11.0.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:c26845094b1af3c91852745ae78e3ea47abf3dbcd1cf962f16b9a5fbe3ee8488"}, + {file = "pillow-11.0.0-cp310-cp310-manylinux_2_28_x86_64.whl", hash = "sha256:1a61b54f87ab5786b8479f81c4b11f4d61702830354520837f8cc791ebba0f5f"}, + {file = "pillow-11.0.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:674629ff60030d144b7bca2b8330225a9b11c482ed408813924619c6f302fdbb"}, + {file = "pillow-11.0.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:598b4e238f13276e0008299bd2482003f48158e2b11826862b1eb2ad7c768b97"}, + {file = "pillow-11.0.0-cp310-cp310-win32.whl", hash = "sha256:9a0f748eaa434a41fccf8e1ee7a3eed68af1b690e75328fd7a60af123c193b50"}, + {file = "pillow-11.0.0-cp310-cp310-win_amd64.whl", hash = "sha256:a5629742881bcbc1f42e840af185fd4d83a5edeb96475a575f4da50d6ede337c"}, + {file = "pillow-11.0.0-cp310-cp310-win_arm64.whl", hash = "sha256:ee217c198f2e41f184f3869f3e485557296d505b5195c513b2bfe0062dc537f1"}, + {file = "pillow-11.0.0-cp311-cp311-macosx_10_10_x86_64.whl", hash = "sha256:1c1d72714f429a521d8d2d018badc42414c3077eb187a59579f28e4270b4b0fc"}, + {file = "pillow-11.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:499c3a1b0d6fc8213519e193796eb1a86a1be4b1877d678b30f83fd979811d1a"}, + {file = "pillow-11.0.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c8b2351c85d855293a299038e1f89db92a2f35e8d2f783489c6f0b2b5f3fe8a3"}, + {file = "pillow-11.0.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6f4dba50cfa56f910241eb7f883c20f1e7b1d8f7d91c750cd0b318bad443f4d5"}, + {file = "pillow-11.0.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:5ddbfd761ee00c12ee1be86c9c0683ecf5bb14c9772ddbd782085779a63dd55b"}, + {file = "pillow-11.0.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:45c566eb10b8967d71bf1ab8e4a525e5a93519e29ea071459ce517f6b903d7fa"}, + {file = "pillow-11.0.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b4fd7bd29610a83a8c9b564d457cf5bd92b4e11e79a4ee4716a63c959699b306"}, + {file = "pillow-11.0.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:cb929ca942d0ec4fac404cbf520ee6cac37bf35be479b970c4ffadf2b6a1cad9"}, + {file = "pillow-11.0.0-cp311-cp311-win32.whl", hash = "sha256:006bcdd307cc47ba43e924099a038cbf9591062e6c50e570819743f5607404f5"}, + {file = "pillow-11.0.0-cp311-cp311-win_amd64.whl", hash = "sha256:52a2d8323a465f84faaba5236567d212c3668f2ab53e1c74c15583cf507a0291"}, + {file = "pillow-11.0.0-cp311-cp311-win_arm64.whl", hash = "sha256:16095692a253047fe3ec028e951fa4221a1f3ed3d80c397e83541a3037ff67c9"}, + {file = "pillow-11.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:d2c0a187a92a1cb5ef2c8ed5412dd8d4334272617f532d4ad4de31e0495bd923"}, + {file = "pillow-11.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:084a07ef0821cfe4858fe86652fffac8e187b6ae677e9906e192aafcc1b69903"}, + {file = "pillow-11.0.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8069c5179902dcdce0be9bfc8235347fdbac249d23bd90514b7a47a72d9fecf4"}, + {file = "pillow-11.0.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f02541ef64077f22bf4924f225c0fd1248c168f86e4b7abdedd87d6ebaceab0f"}, + {file = "pillow-11.0.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:fcb4621042ac4b7865c179bb972ed0da0218a076dc1820ffc48b1d74c1e37fe9"}, + {file = "pillow-11.0.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:00177a63030d612148e659b55ba99527803288cea7c75fb05766ab7981a8c1b7"}, + {file = "pillow-11.0.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:8853a3bf12afddfdf15f57c4b02d7ded92c7a75a5d7331d19f4f9572a89c17e6"}, + {file = "pillow-11.0.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3107c66e43bda25359d5ef446f59c497de2b5ed4c7fdba0894f8d6cf3822dafc"}, + {file = "pillow-11.0.0-cp312-cp312-win32.whl", hash = "sha256:86510e3f5eca0ab87429dd77fafc04693195eec7fd6a137c389c3eeb4cfb77c6"}, + {file = "pillow-11.0.0-cp312-cp312-win_amd64.whl", hash = "sha256:8ec4a89295cd6cd4d1058a5e6aec6bf51e0eaaf9714774e1bfac7cfc9051db47"}, + {file = "pillow-11.0.0-cp312-cp312-win_arm64.whl", hash = "sha256:27a7860107500d813fcd203b4ea19b04babe79448268403172782754870dac25"}, + {file = "pillow-11.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:bcd1fb5bb7b07f64c15618c89efcc2cfa3e95f0e3bcdbaf4642509de1942a699"}, + {file = "pillow-11.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:0e038b0745997c7dcaae350d35859c9715c71e92ffb7e0f4a8e8a16732150f38"}, + {file = "pillow-11.0.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0ae08bd8ffc41aebf578c2af2f9d8749d91f448b3bfd41d7d9ff573d74f2a6b2"}, + {file = "pillow-11.0.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d69bfd8ec3219ae71bcde1f942b728903cad25fafe3100ba2258b973bd2bc1b2"}, + {file = "pillow-11.0.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:61b887f9ddba63ddf62fd02a3ba7add935d053b6dd7d58998c630e6dbade8527"}, + {file = "pillow-11.0.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:c6a660307ca9d4867caa8d9ca2c2658ab685de83792d1876274991adec7b93fa"}, + {file = "pillow-11.0.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:73e3a0200cdda995c7e43dd47436c1548f87a30bb27fb871f352a22ab8dcf45f"}, + {file = "pillow-11.0.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fba162b8872d30fea8c52b258a542c5dfd7b235fb5cb352240c8d63b414013eb"}, + {file = "pillow-11.0.0-cp313-cp313-win32.whl", hash = "sha256:f1b82c27e89fffc6da125d5eb0ca6e68017faf5efc078128cfaa42cf5cb38798"}, + {file = "pillow-11.0.0-cp313-cp313-win_amd64.whl", hash = "sha256:8ba470552b48e5835f1d23ecb936bb7f71d206f9dfeee64245f30c3270b994de"}, + {file = "pillow-11.0.0-cp313-cp313-win_arm64.whl", hash = "sha256:846e193e103b41e984ac921b335df59195356ce3f71dcfd155aa79c603873b84"}, + {file = "pillow-11.0.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:4ad70c4214f67d7466bea6a08061eba35c01b1b89eaa098040a35272a8efb22b"}, + {file = "pillow-11.0.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:6ec0d5af64f2e3d64a165f490d96368bb5dea8b8f9ad04487f9ab60dc4bb6003"}, + {file = "pillow-11.0.0-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c809a70e43c7977c4a42aefd62f0131823ebf7dd73556fa5d5950f5b354087e2"}, + {file = "pillow-11.0.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:4b60c9520f7207aaf2e1d94de026682fc227806c6e1f55bba7606d1c94dd623a"}, + {file = "pillow-11.0.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:1e2688958a840c822279fda0086fec1fdab2f95bf2b717b66871c4ad9859d7e8"}, + {file = "pillow-11.0.0-cp313-cp313t-win32.whl", hash = "sha256:607bbe123c74e272e381a8d1957083a9463401f7bd01287f50521ecb05a313f8"}, + {file = "pillow-11.0.0-cp313-cp313t-win_amd64.whl", hash = "sha256:5c39ed17edea3bc69c743a8dd3e9853b7509625c2462532e62baa0732163a904"}, + {file = "pillow-11.0.0-cp313-cp313t-win_arm64.whl", hash = "sha256:75acbbeb05b86bc53cbe7b7e6fe00fbcf82ad7c684b3ad82e3d711da9ba287d3"}, + {file = "pillow-11.0.0-cp39-cp39-macosx_10_10_x86_64.whl", hash = "sha256:2e46773dc9f35a1dd28bd6981332fd7f27bec001a918a72a79b4133cf5291dba"}, + {file = "pillow-11.0.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2679d2258b7f1192b378e2893a8a0a0ca472234d4c2c0e6bdd3380e8dfa21b6a"}, + {file = "pillow-11.0.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eda2616eb2313cbb3eebbe51f19362eb434b18e3bb599466a1ffa76a033fb916"}, + {file = "pillow-11.0.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:20ec184af98a121fb2da42642dea8a29ec80fc3efbaefb86d8fdd2606619045d"}, + {file = "pillow-11.0.0-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:8594f42df584e5b4bb9281799698403f7af489fba84c34d53d1c4bfb71b7c4e7"}, + {file = "pillow-11.0.0-cp39-cp39-manylinux_2_28_x86_64.whl", hash = "sha256:c12b5ae868897c7338519c03049a806af85b9b8c237b7d675b8c5e089e4a618e"}, + {file = "pillow-11.0.0-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:70fbbdacd1d271b77b7721fe3cdd2d537bbbd75d29e6300c672ec6bb38d9672f"}, + {file = "pillow-11.0.0-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:5178952973e588b3f1360868847334e9e3bf49d19e169bbbdfaf8398002419ae"}, + {file = "pillow-11.0.0-cp39-cp39-win32.whl", hash = "sha256:8c676b587da5673d3c75bd67dd2a8cdfeb282ca38a30f37950511766b26858c4"}, + {file = "pillow-11.0.0-cp39-cp39-win_amd64.whl", hash = "sha256:94f3e1780abb45062287b4614a5bc0874519c86a777d4a7ad34978e86428b8dd"}, + {file = "pillow-11.0.0-cp39-cp39-win_arm64.whl", hash = "sha256:290f2cc809f9da7d6d622550bbf4c1e57518212da51b6a30fe8e0a270a5b78bd"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:1187739620f2b365de756ce086fdb3604573337cc28a0d3ac4a01ab6b2d2a6d2"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:fbbcb7b57dc9c794843e3d1258c0fbf0f48656d46ffe9e09b63bbd6e8cd5d0a2"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5d203af30149ae339ad1b4f710d9844ed8796e97fda23ffbc4cc472968a47d0b"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:21a0d3b115009ebb8ac3d2ebec5c2982cc693da935f4ab7bb5c8ebe2f47d36f2"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:73853108f56df97baf2bb8b522f3578221e56f646ba345a372c78326710d3830"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e58876c91f97b0952eb766123bfef372792ab3f4e3e1f1a2267834c2ab131734"}, + {file = "pillow-11.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:224aaa38177597bb179f3ec87eeefcce8e4f85e608025e9cfac60de237ba6316"}, + {file = "pillow-11.0.0-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:5bd2d3bdb846d757055910f0a59792d33b555800813c3b39ada1829c372ccb06"}, + {file = "pillow-11.0.0-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:375b8dd15a1f5d2feafff536d47e22f69625c1aa92f12b339ec0b2ca40263273"}, + {file = "pillow-11.0.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:daffdf51ee5db69a82dd127eabecce20729e21f7a3680cf7cbb23f0829189790"}, + {file = "pillow-11.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:7326a1787e3c7b0429659e0a944725e1b03eeaa10edd945a86dead1913383944"}, + {file = "pillow-11.0.0.tar.gz", hash = "sha256:72bacbaf24ac003fea9bff9837d1eedb6088758d41e100c1552930151f677739"}, +] + +[package.extras] +docs = ["furo", "olefile", "sphinx (>=8.1)", "sphinx-copybutton", "sphinx-inline-tabs", "sphinxext-opengraph"] +fpx = ["olefile"] +mic = ["olefile"] +tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"] +typing = ["typing-extensions"] +xmp = ["defusedxml"] + [[package]] name = "platformdirs" version = "4.3.6" @@ -3390,6 +3883,24 @@ files = [ [package.extras] windows-terminal = ["colorama (>=0.4.6)"] +[[package]] +name = "pypdf" +version = "4.3.1" +description = "A pure-python PDF library capable of splitting, merging, cropping, and transforming PDF files" +optional = false +python-versions = ">=3.6" +files = [ + {file = "pypdf-4.3.1-py3-none-any.whl", hash = "sha256:64b31da97eda0771ef22edb1bfecd5deee4b72c3d1736b7df2689805076d6418"}, + {file = "pypdf-4.3.1.tar.gz", hash = "sha256:b2f37fe9a3030aa97ca86067a56ba3f9d3565f9a791b305c7355d8392c30d91b"}, +] + +[package.extras] +crypto = ["PyCryptodome", "cryptography"] +dev = ["black", "flit", "pip-tools", "pre-commit (<2.18.0)", "pytest-cov", "pytest-socket", "pytest-timeout", "pytest-xdist", "wheel"] +docs = ["myst_parser", "sphinx", "sphinx_rtd_theme"] +full = ["Pillow (>=8.0.0)", "PyCryptodome", "cryptography"] +image = ["Pillow (>=8.0.0)"] + [[package]] name = "pytest" version = "8.3.3" @@ -4363,7 +4874,7 @@ files = [ ] [package.dependencies] -greenlet = {version = "!=0.4.17", markers = "python_version < \"3.13\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\")"} +greenlet = {version = "!=0.4.17", optional = true, markers = "python_version < \"3.13\" and (platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\") or extra == \"asyncio\""} typing-extensions = ">=4.6.0" [package.extras] @@ -4442,6 +4953,17 @@ anyio = ">=3.4.0,<5" [package.extras] full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.7)", "pyyaml"] +[[package]] +name = "striprtf" +version = "0.0.26" +description = "A simple library to convert rtf to text" +optional = false +python-versions = "*" +files = [ + {file = "striprtf-0.0.26-py3-none-any.whl", hash = "sha256:8c8f9d32083cdc2e8bfb149455aa1cc5a4e0a035893bedc75db8b73becb3a1bb"}, + {file = "striprtf-0.0.26.tar.gz", hash = "sha256:fdb2bba7ac440072d1c41eab50d8d74ae88f60a8b6575c6e2c7805dc462093aa"}, +] + [[package]] name = "tenacity" version = "8.5.0"