Skip to content
This repository has been archived by the owner on Mar 1, 2024. It is now read-only.

feat: allow use of self-signed cert for MinIO server with MinioReader #935

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 16 additions & 8 deletions llama_hub/docugami/docugami.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -81,16 +81,16 @@
"source": [
"from base import DocugamiReader\n",
"\n",
"docset_id=\"tjwrr2ekqkc3\"\n",
"docset_name=\"SEC 10-Q reports\"\n",
"document_ids=[\"ui7pkriyckwi\", \"1be3o7ch10iy\"]\n",
"docset_id = \"tjwrr2ekqkc3\"\n",
"docset_name = \"SEC 10-Q reports\"\n",
"document_ids = [\"ui7pkriyckwi\", \"1be3o7ch10iy\"]\n",
"\n",
"reader = DocugamiReader()\n",
"chunks = reader.load_data(docset_id=docset_id, document_ids=document_ids)\n",
"\n",
"for chunk in chunks[:5]:\n",
" print(chunk)\n",
" print(\"*\"*32)"
" print(\"*\" * 32)"
]
},
{
Expand Down Expand Up @@ -164,7 +164,7 @@
}
],
"source": [
"reader.min_text_length = 1024 * 4 # ~1k tokens\n",
"reader.min_text_length = 1024 * 4 # ~1k tokens\n",
"reader.max_text_length = 1024 * 24 # ~6k tokens\n",
"reader.include_xml_tags = True\n",
"chunks = reader.load_data(docset_id=docset_id)\n",
Expand Down Expand Up @@ -236,7 +236,9 @@
],
"source": [
"# Try out the query engine with example query\n",
"response = query_engine.query(\"How much did Microsoft spend for opex in the latest quarter?\")\n",
"response = query_engine.query(\n",
" \"How much did Microsoft spend for opex in the latest quarter?\"\n",
")\n",
"print(response.response)"
]
},
Expand Down Expand Up @@ -317,7 +319,9 @@
"response = query_engine.query(\n",
" \"What was Microsoft's weighted average discount rate for operating leases as of March 2023?\"\n",
")\n",
"print(response.response) # the correct answer should be 2.7%, listed on page 24 of \"2023 Q2 MSFT.pdf\""
"print(\n",
" response.response\n",
") # the correct answer should be 2.7%, listed on page 24 of \"2023 Q2 MSFT.pdf\""
]
},
{
Expand Down Expand Up @@ -428,7 +432,11 @@
"outputs": [],
"source": [
"from llama_index.indices.vector_store.retrievers import VectorIndexAutoRetriever\n",
"from llama_index.vector_stores.types import MetadataInfo, VectorStoreInfo, VectorStoreQueryMode\n",
"from llama_index.vector_stores.types import (\n",
" MetadataInfo,\n",
" VectorStoreInfo,\n",
" VectorStoreQueryMode,\n",
")\n",
"from llama_index.query_engine import RetrieverQueryEngine\n",
"\n",
"EXCLUDE_KEYS = [\"id\", \"xpath\", \"structure\", \"name\", \"tag\"]\n",
Expand Down
9 changes: 9 additions & 0 deletions llama_hub/minio/minio-client/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ def __init__(
file_metadata: Optional[Callable[[str], Dict]] = None,
minio_endpoint: Optional[str] = None,
minio_secure: bool = False,
minio_cert_check: bool = False,
minio_access_key: Optional[str] = None,
minio_secret_key: Optional[str] = None,
minio_session_token: Optional[str] = None,
Expand Down Expand Up @@ -59,6 +60,8 @@ def __init__(
minio_access_key (Optional[str]): The Minio access key. Default is None.
minio_secret_key (Optional[str]): The Minio secret key. Default is None.
minio_session_token (Optional[str]): The Minio session token.
minio_secure: MinIO server runs in TLS mode
minio_cert_check: allows the usage of a self-signed cert for MinIO server
"""
super().__init__(*args, **kwargs)

Expand All @@ -74,22 +77,28 @@ def __init__(

self.minio_endpoint = minio_endpoint
self.minio_secure = minio_secure
self.minio_cert_check = minio_cert_check
self.minio_access_key = minio_access_key
self.minio_secret_key = minio_secret_key
self.minio_session_token = minio_session_token

def load_data(self) -> List[Document]:
"""Load file(s) from Minio."""
from minio import Minio
import urllib3

minio_client = Minio(
self.minio_endpoint,
secure=self.minio_secure,
cert_check=self.minio_cert_check,
access_key=self.minio_access_key,
secret_key=self.minio_secret_key,
session_token=self.minio_session_token,
)

if not self.minio_cert_check:
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)

with tempfile.TemporaryDirectory() as temp_dir:
if self.key:
suffix = Path(self.key).suffix
Expand Down
Loading