From ebb9f456c5001897ce1bb59e04e4d2923fbf2cf8 Mon Sep 17 00:00:00 2001 From: Jack Luar Date: Sat, 12 Oct 2024 05:10:55 +0000 Subject: [PATCH] add pyproject toml for backend/frontend modules Signed-off-by: Jack Luar --- .pre-commit-config.yaml | 5 - backend/build_docs.py | 249 +++++++++++++------------- backend/chatbot.py | 2 +- backend/pyproject.toml | 10 +- backend/src/agents/retriever_graph.py | 2 +- backend/src/tools/format_docs.py | 4 +- backend/src/tools/process_html.py | 2 +- backend/src/tools/process_json.py | 4 +- backend/src/tools/process_md.py | 2 +- backend/src/tools/process_pdf.py | 2 +- backend/src/vectorstores/faiss.py | 18 +- frontend/mypy.ini | 8 - frontend/pyproject.toml | 88 +++++++++ frontend/ruff.toml | 56 ------ frontend/streamlit_app.py | 36 ++-- frontend/utils/feedback.py | 2 +- 16 files changed, 257 insertions(+), 233 deletions(-) delete mode 100644 frontend/mypy.ini create mode 100644 frontend/pyproject.toml delete mode 100644 frontend/ruff.toml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1a479bd3..26fe4dce 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -15,8 +15,3 @@ repos: args: - --maxkb=120 - --enforce-all - -- repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.5.1 - hooks: - - id: ruff diff --git a/backend/build_docs.py b/backend/build_docs.py index bcb00a0b..d26c2181 100644 --- a/backend/build_docs.py +++ b/backend/build_docs.py @@ -1,3 +1,4 @@ +import json import os import subprocess import requests @@ -42,7 +43,7 @@ def update_src(src_path: str, dst_path: str) -> None: 'ORQA_RAG_DATASETS_COMMIT', '470c7ecd67d3a22557500a451b73a31fc8c4ec15' ) source_dict[dst_path] = ( - f'https://huggingface.co/datasets/The-OpenROAD-Project/ORQA_RAG_datasets/raw/{commit_hash}/{manpage_path}' + f"https://huggingface.co/datasets/The-OpenROAD-Project/ORQA_RAG_datasets/raw/{commit_hash}/{manpage_path}" ) elif 'yosys' in dst_path: source_dict[dst_path] = f"https://{dst_path[len('data/html/yosys_docs') :]}" @@ -64,13 +65,13 @@ def purge_folders(folder_paths: list[str]) -> None: for folder_path in folder_paths: if os.path.exists(folder_path): shutil.rmtree(folder_path) - logging.debug(f'Purging, Folder {folder_path} deleted.') + logging.debug(f"Purging, Folder {folder_path} deleted.") def track_src(src: str) -> None: - logging.debug(f'Updating source dict for {src}...') + logging.debug(f"Updating source dict for {src}...") if not os.path.exists(src): - logging.error(f'File {src} does not exist. Exiting.') + logging.error(f"File {src} does not exist. Exiting.") sys.exit(1) for root, _, files in os.walk(src): @@ -83,7 +84,7 @@ def track_src(src: str) -> None: def copy_file_track_src(src: str, dst: str) -> None: if not os.path.exists(src): - logging.error(f'File {src} does not exist. Exiting.') + logging.error(f"File {src} does not exist. Exiting.") sys.exit(1) if os.path.isfile(src): @@ -91,8 +92,8 @@ def copy_file_track_src(src: str, dst: str) -> None: base, ext = os.path.splitext(dst) counter = 2 while os.path.exists(dst): - new_file_name = f'{base}_{counter}{ext}' - logging.debug(f'File {dst} already exists. Renaming to {new_file_name}') + new_file_name = f"{base}_{counter}{ext}" + logging.debug(f"File {dst} already exists. Renaming to {new_file_name}") dst = new_file_name counter += 1 @@ -105,7 +106,7 @@ def copy_file_track_src(src: str, dst: str) -> None: def copy_tree_track_src(src: str, dst: str) -> None: if not os.path.exists(src): - logging.debug(f'Folder {src} does not exist. Exiting.') + logging.debug(f"Folder {src} does not exist. Exiting.") sys.exit(1) for root, _, files in os.walk(src): @@ -121,10 +122,10 @@ def copy_tree_track_src(src: str, dst: str) -> None: base, ext = os.path.splitext(file) counter = 2 while os.path.exists(dst_file): - new_file_name = f'{base}_{counter}{ext}' + new_file_name = f"{base}_{counter}{ext}" dst_file = os.path.join(dst_dir, new_file_name) logging.debug( - f'File {dst_file} already exists. Renaming to {new_file_name}' + f"File {dst_file} already exists. Renaming to {new_file_name}" ) counter += 1 @@ -136,15 +137,15 @@ def copy_tree_track_src(src: str, dst: str) -> None: def clone_repo(url: str, folder_name: str, commit_hash: Optional[str] = None) -> None: target_dir = os.path.join(cur_dir, folder_name) - logging.debug(f'Cloning repo from {url} to {target_dir}...') - command = f'git clone {url} {target_dir}' + logging.debug(f"Cloning repo from {url} to {target_dir}...") + command = f"git clone {url} {target_dir}" res = subprocess.run(command, shell=True, capture_output=True) if res.returncode != 0: logging.debug(f"Error in cloning repo: {res.stderr.decode('utf-8')}") sys.exit(1) if commit_hash: os.chdir(target_dir) - command = f'git fetch origin {commit_hash} && git checkout {commit_hash}' + command = f"git fetch origin {commit_hash} && git checkout {commit_hash}" res = subprocess.run(command, shell=True, capture_output=True) if res.returncode != 0: logging.debug( @@ -165,32 +166,32 @@ def build_or_docs() -> None: md_or_docs = os.path.join(cur_dir, 'OpenROAD/docs/build/html/_sources') if not os.path.isdir(md_or_docs): - logging.debug(f'Directory {md_or_docs} does not exist. Exiting.') + logging.debug(f"Directory {md_or_docs} does not exist. Exiting.") sys.exit(1) copy_tree_track_src( - f'{md_or_docs}/user', f'{cur_dir}/data/markdown/OR_docs/installation' + f"{md_or_docs}/user", f"{cur_dir}/data/markdown/OR_docs/installation" ) - copy_tree_track_src(f'{md_or_docs}/main', f'{cur_dir}/data/markdown/OR_docs/tools') + copy_tree_track_src(f"{md_or_docs}/main", f"{cur_dir}/data/markdown/OR_docs/tools") copy_file_track_src( - f'{md_or_docs}/main/README.md', - f'{cur_dir}/data/markdown/OR_docs/general/README.md', + f"{md_or_docs}/main/README.md", + f"{cur_dir}/data/markdown/OR_docs/general/README.md", ) copy_tree_track_src( - f'{md_or_docs}/tutorials', f'{cur_dir}/data/markdown/OR_docs/general' + f"{md_or_docs}/tutorials", f"{cur_dir}/data/markdown/OR_docs/general" ) copy_tree_track_src( - f'{md_or_docs}/contrib', f'{cur_dir}/data/markdown/OR_docs/general' + f"{md_or_docs}/contrib", f"{cur_dir}/data/markdown/OR_docs/general" ) copy_tree_track_src( - f'{md_or_docs}/src/test', f'{cur_dir}/data/markdown/OR_docs/general' + f"{md_or_docs}/src/test", f"{cur_dir}/data/markdown/OR_docs/general" ) - for file in os.listdir(f'{md_or_docs}'): + for file in os.listdir(f"{md_or_docs}"): if file.endswith('.md'): copyfile( - f'{md_or_docs}/{file}', - f'{cur_dir}/data/markdown/OR_docs/general/{file}', + f"{md_or_docs}/{file}", + f"{cur_dir}/data/markdown/OR_docs/general/{file}", ) logging.debug('Finished building OR docs.') @@ -211,14 +212,14 @@ def build_orfs_docs() -> None: ) if not os.path.isdir(md_orfs_docs): - logging.debug(f'Directory {md_orfs_docs} does not exist. Exiting.') + logging.debug(f"Directory {md_orfs_docs} does not exist. Exiting.") sys.exit(1) copy_tree_track_src( - f'{md_orfs_docs}/tutorials', f'{cur_dir}/data/markdown/ORFS_docs/general' + f"{md_orfs_docs}/tutorials", f"{cur_dir}/data/markdown/ORFS_docs/general" ) copy_tree_track_src( - f'{md_orfs_docs}/contrib', f'{cur_dir}/data/markdown/ORFS_docs/general' + f"{md_orfs_docs}/contrib", f"{cur_dir}/data/markdown/ORFS_docs/general" ) installation_files = [ @@ -231,30 +232,30 @@ def build_orfs_docs() -> None: 'index2.md', ] - for file in os.listdir(f'{md_orfs_docs}/user'): + for file in os.listdir(f"{md_orfs_docs}/user"): if file.endswith('.md'): if file in installation_files: copy_file_track_src( - f'{md_orfs_docs}/user/{file}', - f'{cur_dir}/data/markdown/ORFS_docs/installation/{file}', + f"{md_orfs_docs}/user/{file}", + f"{cur_dir}/data/markdown/ORFS_docs/installation/{file}", ) else: copy_file_track_src( - f'{md_orfs_docs}/user/{file}', - f'{cur_dir}/data/markdown/ORFS_docs/general/{file}', + f"{md_orfs_docs}/user/{file}", + f"{cur_dir}/data/markdown/ORFS_docs/general/{file}", ) - for file in os.listdir(f'{md_orfs_docs}/'): + for file in os.listdir(f"{md_orfs_docs}/"): if file.endswith('.md'): if file in installation_files: copy_file_track_src( - f'{md_orfs_docs}/{file}', - f'{cur_dir}/data/markdown/ORFS_docs/installation/{file}', + f"{md_orfs_docs}/{file}", + f"{cur_dir}/data/markdown/ORFS_docs/installation/{file}", ) else: copy_file_track_src( - f'{md_orfs_docs}/{file}', - f'{cur_dir}/data/markdown/ORFS_docs/general/{file}', + f"{md_orfs_docs}/{file}", + f"{cur_dir}/data/markdown/ORFS_docs/general/{file}", ) logging.debug('Finished building ORFS docs.') @@ -285,7 +286,7 @@ def build_manpages() -> None: continue os.chdir(os.path.join(cur_dir, 'OpenROAD/docs')) num_cores = os.cpu_count() - command = f'make clean && make preprocess && make -j{num_cores}' + command = f"make clean && make preprocess && make -j{num_cores}" res = subprocess.run(command, shell=True, capture_output=True) logging.debug('Finished building manpages.') @@ -326,23 +327,23 @@ def get_opensta_docs() -> None: else: logging.debug('Failed to download file. Status code:', response.status_code) - track_src(f'{cur_dir}/data/markdown/OpenSTA_docs') - track_src(f'{cur_dir}/data/pdf/OpenSTA') + track_src(f"{cur_dir}/data/markdown/OpenSTA_docs") + track_src(f"{cur_dir}/data/pdf/OpenSTA") def get_or_website_html() -> None: logging.debug('Scraping OR website...') try: subprocess.run( - f'wget -r -A.html -P data/html/or_website {or_website_url}', + f"wget -r -A.html -P data/html/or_website {or_website_url}", shell=True, ) except Exception as e: - logging.debug(f'Error in downloading OR website docs: {e}') + logging.debug(f"Error in downloading OR website docs: {e}") sys.exit(1) logging.debug('OR website docs downloaded successfully.') - track_src(f'{cur_dir}/data/html/or_website') + track_src(f"{cur_dir}/data/html/or_website") def get_or_publications() -> None: @@ -359,11 +360,11 @@ def get_or_publications() -> None: for paper_link in papers: paper_name = paper_link.split('/')[-1] - logging.debug(f'Downloading {paper_name}. . .') + logging.debug(f"Downloading {paper_name}. . .") counter = 2 - while os.path.exists(f'{cur_dir}/data/pdf/OR_publications/{paper_name}'): - logging.debug(f'File {paper_name} already exists. Renaming. . .') + while os.path.exists(f"{cur_dir}/data/pdf/OR_publications/{paper_name}"): + logging.debug(f"File {paper_name} already exists. Renaming. . .") paper_name = f"{paper_name.split('.')[0]}_{counter}.pdf" counter += 1 @@ -372,14 +373,14 @@ def get_or_publications() -> None: 'wget', paper_link, '-O', - f'data/pdf/OR_publications/{paper_name}', + f"data/pdf/OR_publications/{paper_name}", ] ) - source_dict[f'data/pdf/OR_publications/{paper_name}'] = paper_link + source_dict[f"data/pdf/OR_publications/{paper_name}"] = paper_link except Exception as e: - logging.debug(f'Error in downloading OR publications: {e}') + logging.debug(f"Error in downloading OR publications: {e}") sys.exit(1) logging.debug('OR publications downloaded successfully.') @@ -389,100 +390,100 @@ def get_yosys_docs_html() -> None: logging.debug('Scraping Yosys RT docs...') try: subprocess.run( - f'wget -r -A.html -P data/html/yosys_docs {yosys_html_url} ', + f"wget -r -A.html -P data/html/yosys_docs {yosys_html_url} ", shell=True, ) except Exception as e: - logging.debug(f'Error in downloading Yosys docs: {e}') + logging.debug(f"Error in downloading Yosys docs: {e}") sys.exit(1) logging.debug('Yosys docs downloaded successfully.') - track_src(f'{cur_dir}/data/html/yosys_docs') + track_src(f"{cur_dir}/data/html/yosys_docs") def get_klayout_docs_html() -> None: logging.debug('Scraping KLayout docs...') try: subprocess.run( - f'wget -r -A.html -l 3 -P data/html/klayout_docs {klayout_html_url} ', + f"wget -r -A.html -l 3 -P data/html/klayout_docs {klayout_html_url} ", shell=True, ) except Exception as e: - logging.debug(f'Error in downloading KLayout docs: {e}') + logging.debug(f"Error in downloading KLayout docs: {e}") sys.exit(1) logging.debug('KLayout docs downloaded successfully.') - track_src(f'{cur_dir}/data/html/klayout_docs') + track_src(f"{cur_dir}/data/html/klayout_docs") if __name__ == '__main__': logging.info('Building knowledge base...') - # docs_paths = [ - # 'data/markdown/manpages', - # 'data/markdown/OR_docs', - # 'data/markdown/ORFS_docs', - # 'data/markdown/OpenSTA_docs', - # 'data/pdf', - # 'data/html', - # ] - # purge_folders(folder_paths=docs_paths) - - # os.makedirs('data/markdown/manpages', exist_ok=True) - # os.makedirs('data/markdown/OR_docs', exist_ok=True) - # os.makedirs('data/markdown/OR_docs/installation', exist_ok=True) - # os.makedirs('data/markdown/OR_docs/tools', exist_ok=True) - # os.makedirs('data/markdown/OR_docs/general', exist_ok=True) - # os.makedirs('data/markdown/ORFS_docs', exist_ok=True) - # os.makedirs('data/markdown/ORFS_docs/installation', exist_ok=True) - # os.makedirs('data/markdown/ORFS_docs/general', exist_ok=True) - # os.makedirs('data/markdown/OpenSTA_docs', exist_ok=True) - # os.makedirs('data/pdf/OpenSTA', exist_ok=True) - # os.makedirs('data/pdf/OR_publications', exist_ok=True) - # os.makedirs('data/html', exist_ok=True) - - # get_klayout_docs_html() - # get_yosys_docs_html() - - # get_or_publications() - # get_or_website_html() - # get_opensta_docs() - - # clone_repo( - # url='https://github.com/The-OpenROAD-Project/OpenROAD.git', - # commit_hash=os.getenv( - # 'OR_REPO_COMMIT', 'ffc5760f2df639cd184c40ceba253c7e02a006d5' - # ), - # folder_name='OpenROAD', - # ) - # clone_repo( - # url='https://github.com/The-OpenROAD-Project/OpenROAD-flow-scripts.git', - # commit_hash=os.getenv( - # 'ORFS_REPO_COMMIT', 'b94834df01cb58915bc0e8dabf85a314fbd8fb9e' - # ), - # folder_name='OpenROAD-flow-scripts', - # ) - - # build_or_docs() - # build_orfs_docs() - # build_manpages() - - # os.chdir(cur_dir) - # copy_file_track_src( - # f'{cur_dir}/data/markdown/OR_docs/installation/MessagesFinal.md', - # f'{cur_dir}/data/markdown/manpages/man3/ErrorMessages.md', - # ) - - # os.remove(f'{cur_dir}/data/markdown/OR_docs/installation/MessagesFinal.md') - - # gh_disc_src_json = open(f'{cur_dir}/data/markdown/gh_discussions/mapping.json', 'r') - # gh_disc_src = json.load(gh_disc_src_json) - # gh_disc_path = 'data/markdown/gh_discussions' - # for file in gh_disc_src.keys(): - # full_path = os.path.join(gh_disc_path, file) - # source_dict[full_path] = gh_disc_src[file]['url'] - - # with open('data/source_list.json', 'w+') as src: - # src.write(json.dumps(source_dict)) - - # repo_paths = ['OpenROAD', 'OpenROAD-flow-scripts'] - # purge_folders(folder_paths=repo_paths) + docs_paths = [ + 'data/markdown/manpages', + 'data/markdown/OR_docs', + 'data/markdown/ORFS_docs', + 'data/markdown/OpenSTA_docs', + 'data/pdf', + 'data/html', + ] + purge_folders(folder_paths=docs_paths) + + os.makedirs('data/markdown/manpages', exist_ok=True) + os.makedirs('data/markdown/OR_docs', exist_ok=True) + os.makedirs('data/markdown/OR_docs/installation', exist_ok=True) + os.makedirs('data/markdown/OR_docs/tools', exist_ok=True) + os.makedirs('data/markdown/OR_docs/general', exist_ok=True) + os.makedirs('data/markdown/ORFS_docs', exist_ok=True) + os.makedirs('data/markdown/ORFS_docs/installation', exist_ok=True) + os.makedirs('data/markdown/ORFS_docs/general', exist_ok=True) + os.makedirs('data/markdown/OpenSTA_docs', exist_ok=True) + os.makedirs('data/pdf/OpenSTA', exist_ok=True) + os.makedirs('data/pdf/OR_publications', exist_ok=True) + os.makedirs('data/html', exist_ok=True) + + get_klayout_docs_html() + get_yosys_docs_html() + + get_or_publications() + get_or_website_html() + get_opensta_docs() + + clone_repo( + url='https://github.com/The-OpenROAD-Project/OpenROAD.git', + commit_hash=os.getenv( + 'OR_REPO_COMMIT', 'ffc5760f2df639cd184c40ceba253c7e02a006d5' + ), + folder_name='OpenROAD', + ) + clone_repo( + url='https://github.com/The-OpenROAD-Project/OpenROAD-flow-scripts.git', + commit_hash=os.getenv( + 'ORFS_REPO_COMMIT', 'b94834df01cb58915bc0e8dabf85a314fbd8fb9e' + ), + folder_name='OpenROAD-flow-scripts', + ) + + build_or_docs() + build_orfs_docs() + build_manpages() + + os.chdir(cur_dir) + copy_file_track_src( + f"{cur_dir}/data/markdown/OR_docs/installation/MessagesFinal.md", + f"{cur_dir}/data/markdown/manpages/man3/ErrorMessages.md", + ) + + os.remove(f"{cur_dir}/data/markdown/OR_docs/installation/MessagesFinal.md") + + gh_disc_src_json = open(f"{cur_dir}/data/markdown/gh_discussions/mapping.json", 'r') + gh_disc_src = json.load(gh_disc_src_json) + gh_disc_path = 'data/markdown/gh_discussions' + for file in gh_disc_src.keys(): + full_path = os.path.join(gh_disc_path, file) + source_dict[full_path] = gh_disc_src[file]['url'] + + with open('data/source_list.json', 'w+') as src: + src.write(json.dumps(source_dict)) + + repo_paths = ['OpenROAD', 'OpenROAD-flow-scripts'] + purge_folders(folder_paths=repo_paths) diff --git a/backend/chatbot.py b/backend/chatbot.py index 0d9d6435..fd4384ed 100644 --- a/backend/chatbot.py +++ b/backend/chatbot.py @@ -45,7 +45,7 @@ def get_history_str(chat_history: list[dict[str, str]]) -> str: urls = output[-2][tool]['urls'] chat_history.append({'User': user_question, 'AI': llm_response}) - print(f'LLM: {llm_response} \nSources: {srcs} \nURLs: {urls}\n\n') + print(f"LLM: {llm_response} \nSources: {srcs} \nURLs: {urls}\n\n") else: print('LLM response extraction failed') diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 4fe735b9..bef50ad1 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -3,16 +3,16 @@ requires = ['setuptools>=60', 'Cython==3.0.7', 'wheel==0.42.0'] build-backend = "setuptools.build_meta" [project] -name = "webconsole" +name = "ora-backend" version = "1.0.0" dynamic = ["dependencies", "optional-dependencies"] -requires-python = ">=3.10" +requires-python = ">=3.12" classifiers = [ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "Topic :: Software Development", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3 :: Only", ] @@ -21,7 +21,7 @@ dependencies = { file = ["requirements.txt"] } optional-dependencies = { test = { file = ["requirements-test.txt"] } } [tool.mypy] -python_version = "3.10" +python_version = "3.12" warn_unused_configs = true warn_return_any = true warn_unused_ignores = true @@ -80,7 +80,7 @@ unfixable = [] dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" [tool.ruff.format] -quote-style = "single" +quote-style = "double" indent-style = "space" skip-magic-trailing-comma = false line-ending = "auto" diff --git a/backend/src/agents/retriever_graph.py b/backend/src/agents/retriever_graph.py index d9aaf3f6..d0ce5676 100644 --- a/backend/src/agents/retriever_graph.py +++ b/backend/src/agents/retriever_graph.py @@ -166,7 +166,7 @@ def agent(self, state: AgentState) -> dict[str, list[str]]: tool_calls = response.get('tool_names', []) for tool in tool_calls: if tool not in self.tool_names: - logging.warning(f'Tool {tool} not found in tool list.') + logging.warning(f"Tool {tool} not found in tool list.") tool_calls.remove(tool) else: logging.warning('Tool selection failed. Returning empty tool list.') diff --git a/backend/src/tools/format_docs.py b/backend/src/tools/format_docs.py index 250fc666..068fda41 100644 --- a/backend/src/tools/format_docs.py +++ b/backend/src/tools/format_docs.py @@ -16,13 +16,13 @@ def format_docs(docs: list[Document]) -> tuple[str, list[str], list[str]]: if 'man1' in doc_src or 'man2' in doc_src: doc_text = f"Command Name: {doc_src.split('/')[-1].replace('.md', '')}\n\n{doc.page_content}" elif 'gh_discussions' in doc_src: - doc_text = f'{gh_discussion_prompt_template}\n\n{doc.page_content}' + doc_text = f"{gh_discussion_prompt_template}\n\n{doc.page_content}" else: doc_text = doc.page_content if 'url' in doc.metadata: doc_urls.append(doc.metadata['url']) - doc_texts += f'\n\n- - - - - - - - - - - - - - - \n\n{doc_text}' + doc_texts += f"\n\n- - - - - - - - - - - - - - - \n\n{doc_text}" return doc_texts, doc_srcs, doc_urls diff --git a/backend/src/tools/process_html.py b/backend/src/tools/process_html.py index 961266d0..f81efe03 100644 --- a/backend/src/tools/process_html.py +++ b/backend/src/tools/process_html.py @@ -33,7 +33,7 @@ def process_html( For processing OR/ORFS docs """ if not os.path.exists(folder_path) or not os.listdir(folder_path): - logging.error(f'{folder_path} is not populated, returning empty list.') + logging.error(f"{folder_path} is not populated, returning empty list.") return [] with open('data/source_list.json') as f: diff --git a/backend/src/tools/process_json.py b/backend/src/tools/process_json.py index 298898ce..faf0498d 100644 --- a/backend/src/tools/process_json.py +++ b/backend/src/tools/process_json.py @@ -24,7 +24,7 @@ def generate_knowledge_base(file_paths: list[str]) -> list[Document]: for file_path in file_paths: try: with open(file_path, 'r') as file: - logging.debug(f'Processing {file_path}...') + logging.debug(f"Processing {file_path}...") for line in file: try: json_object = json.loads(line) @@ -37,6 +37,6 @@ def generate_knowledge_base(file_paths: list[str]) -> list[Document]: except json.JSONDecodeError: logging.error('Error: Invalid JSON format line:', line) except FileNotFoundError: - logging.error(f'{file_path} not found.') + logging.error(f"{file_path} not found.") return json_knowledge_base diff --git a/backend/src/tools/process_md.py b/backend/src/tools/process_md.py index a2a48c77..a0d09e64 100644 --- a/backend/src/tools/process_md.py +++ b/backend/src/tools/process_md.py @@ -51,7 +51,7 @@ def process_md( """ # if no files in the directory if not os.path.exists(folder_path) or not os.listdir(folder_path): - logging.error(f'{folder_path} is not populated, returning empty list.') + logging.error(f"{folder_path} is not populated, returning empty list.") return [] with open('data/source_list.json') as f: diff --git a/backend/src/tools/process_pdf.py b/backend/src/tools/process_pdf.py index 1dbbf566..7748681b 100644 --- a/backend/src/tools/process_pdf.py +++ b/backend/src/tools/process_pdf.py @@ -30,7 +30,7 @@ def process_pdf_docs(file_path: str) -> list[Document]: try: documents = loader.load_and_split(text_splitter=text_splitter) except PdfStreamError: - logging.error(f'Error processing PDF: {file_path} is corrupted or incomplete.') + logging.error(f"Error processing PDF: {file_path} is corrupted or incomplete.") for doc in documents: try: diff --git a/backend/src/vectorstores/faiss.py b/backend/src/vectorstores/faiss.py index 8c78cb4f..bb30b1c7 100644 --- a/backend/src/vectorstores/faiss.py +++ b/backend/src/vectorstores/faiss.py @@ -91,7 +91,7 @@ def add_md_docs( processed_mddocs: list[Document] = [] for folder_path in folder_paths: - logging.debug(f'Processing [{folder_path}]...') + logging.debug(f"Processing [{folder_path}]...") processed_mddocs.extend( process_md( folder_path=folder_path, @@ -101,7 +101,7 @@ def add_md_docs( ) if processed_mddocs: - logging.info(f'Adding {folder_paths} to FAISS database...\n') + logging.info(f"Adding {folder_paths} to FAISS database...\n") self._add_to_db(documents=processed_mddocs) self.processed_docs.extend(processed_mddocs) else: @@ -119,7 +119,7 @@ def add_md_manpages( processed_manpages: list[Document] = [] for folder_path in folder_paths: - logging.debug(f'Processing [{folder_path}]...') + logging.debug(f"Processing [{folder_path}]...") processed_manpages.extend( process_md( folder_path=folder_path, split_text=False, chunk_size=chunk_size @@ -127,7 +127,7 @@ def add_md_manpages( ) if processed_manpages: - logging.info(f'Adding {folder_paths} to FAISS database...\n') + logging.info(f"Adding {folder_paths} to FAISS database...\n") self._add_to_db(documents=processed_manpages) self.processed_docs.extend(processed_manpages) else: @@ -145,7 +145,7 @@ def add_html( processed_html_docs: list[Document] = [] for folder_path in folder_paths: - logging.debug(f'Processing [{folder_path}]...') + logging.debug(f"Processing [{folder_path}]...") processed_html_docs.extend( process_html( folder_path=folder_path, split_text=True, chunk_size=chunk_size @@ -153,11 +153,11 @@ def add_html( ) if processed_html_docs: - logging.info(f'Adding {folder_paths} to FAISS database...\n') + logging.info(f"Adding {folder_paths} to FAISS database...\n") self._add_to_db(documents=processed_html_docs) self.processed_docs.extend(processed_html_docs) else: - raise ValueError(f'Could not add {folder_paths}. No HTML docs processed.') + raise ValueError(f"Could not add {folder_paths}. No HTML docs processed.") if return_docs: return processed_html_docs @@ -172,14 +172,14 @@ def add_documents( processed_otherdocs: list[Document] = [] for file_path in file_paths: - logging.debug(f'Processing [{file_path}]...') + logging.debug(f"Processing [{file_path}]...") if file_type == 'pdf': processed_otherdocs.extend(process_pdf_docs(file_path=file_path)) else: raise ValueError('File type not supported.') if processed_otherdocs: - logging.info(f'Adding [{file_paths}] to FAISS database...\n') + logging.info(f"Adding [{file_paths}] to FAISS database...\n") self._add_to_db(documents=processed_otherdocs) self.processed_docs.extend(processed_otherdocs) else: diff --git a/frontend/mypy.ini b/frontend/mypy.ini deleted file mode 100644 index 59171727..00000000 --- a/frontend/mypy.ini +++ /dev/null @@ -1,8 +0,0 @@ -[mypy] -warn_return_any = True -warn_unused_configs = True -ignore_missing_imports = True -disable_error_code = call-arg - -[mypy-transformers.*] -ignore_missing_imports = True diff --git a/frontend/pyproject.toml b/frontend/pyproject.toml new file mode 100644 index 00000000..ad47e4e1 --- /dev/null +++ b/frontend/pyproject.toml @@ -0,0 +1,88 @@ +[build-system] +requires = ['setuptools>=60', 'Cython==3.0.7', 'wheel==0.42.0'] +build-backend = "setuptools.build_meta" + +[project] +name = "ora-frontend" +version = "1.0.0" +dynamic = ["dependencies", "optional-dependencies"] +requires-python = ">=3.12" +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "Topic :: Software Development", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3 :: Only", +] + +[tool.setuptools.dynamic] +dependencies = { file = ["requirements.txt"] } +optional-dependencies = { test = { file = ["requirements-test.txt"] } } + +[tool.mypy] +python_version = "3.12" +warn_unused_configs = true +warn_return_any = true +warn_unused_ignores = true +strict_optional = true +disable_error_code = ["call-arg"] +exclude = "src/post_install.py" + +[[tool.mypy.overrides]] +module = "transformers.*" +ignore_missing_imports = true + +[tool.ruff] +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".ipynb_checkpoints", + ".mypy_cache", + ".nox", + ".pants.d", + ".pyenv", + ".pytest_cache", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + ".vscode", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "site-packages", + "venv", +] +line-length = 88 +indent-width = 4 +target-version = "py310" + +[tool.ruff.lint] +select = ["E4", "E7", "E9","E301","E304","E305","E401","E223","E224","E242", "E", "F" ,"N", "W", "C90"] +extend-select = ["D203", "D204"] +ignore = ["E501", "C901"] +preview = true + +# Allow fix for all enabled rules (when `--fix` is provided). +fixable = ["ALL"] +unfixable = [] + +# Allow unused variables when underscore-prefixed. +dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" + +[tool.ruff.format] +quote-style = "double" +indent-style = "space" +skip-magic-trailing-comma = false +line-ending = "auto" +docstring-code-format = false +docstring-code-line-length = "dynamic" diff --git a/frontend/ruff.toml b/frontend/ruff.toml deleted file mode 100644 index b179ab31..00000000 --- a/frontend/ruff.toml +++ /dev/null @@ -1,56 +0,0 @@ -exclude = [ - ".bzr", - ".direnv", - ".eggs", - ".git", - ".git-rewrite", - ".hg", - ".ipynb_checkpoints", - ".mypy_cache", - ".nox", - ".pants.d", - ".pyenv", - ".pytest_cache", - ".pytype", - ".ruff_cache", - ".svn", - ".tox", - ".venv", - ".vscode", - "__pypackages__", - "_build", - "buck-out", - "build", - "dist", - "node_modules", - "site-packages", - "venv", -] - -line-length = 88 -indent-width = 4 - -target-version = "py312" - -[lint] -select = ["E4", "E7", "E9","E301","E304","E305","E401","E223","E224","E242", "F","N"] -extend-select = ["D203", "D204"] -ignore = [] -preview = true - -# Allow fix for all enabled rules (when `--fix` is provided). -fixable = ["ALL"] -unfixable = [] - -# Allow unused variables when underscore-prefixed. -dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" - -[format] -quote-style = "single" - -indent-style = "space" -skip-magic-trailing-comma = false -line-ending = "auto" -docstring-code-format = false -docstring-code-line-length = "dynamic" -preview = true diff --git a/frontend/streamlit_app.py b/frontend/streamlit_app.py index 22acaa8e..9963244a 100644 --- a/frontend/streamlit_app.py +++ b/frontend/streamlit_app.py @@ -33,7 +33,7 @@ def response_generator(user_input: str) -> tuple[str, str] | tuple[None, None]: Returns: - tuple: Contains the AI response and sources. """ - url = f'{st.session_state.base_url}{st.session_state.selected_endpoint}' + url = f"{st.session_state.base_url}{st.session_state.selected_endpoint}" headers = {'accept': 'application/json', 'Content-Type': 'application/json'} @@ -61,20 +61,20 @@ def response_generator(user_input: str) -> tuple[str, str] | tuple[None, None]: return data.get('response', ''), sources except requests.exceptions.RequestException as e: - st.error(f'Request failed: {e}') + st.error(f"Request failed: {e}") return None, None def fetch_endpoints() -> tuple[str, list[str]]: base_url = os.getenv('CHAT_ENDPOINT', 'http://localhost:8000') - url = f'{base_url}/chains/listAll' + url = f"{base_url}/chains/listAll" try: response = requests.get(url) response.raise_for_status() endpoints = response.json() return base_url, endpoints except requests.exceptions.RequestException as e: - st.error(f'Failed to fetch endpoints: {e}') + st.error(f"Failed to fetch endpoints: {e}") return base_url, [] @@ -117,10 +117,12 @@ def main() -> None: st.session_state.metadata = {} if not st.session_state.chat_history: - st.session_state.chat_history.append({ - 'content': 'Hi, I am the OpenROAD assistant. Type your query about OpenROAD', - 'role': 'ai', - }) + st.session_state.chat_history.append( + { + 'content': 'Hi, I am the OpenROAD assistant. Type your query about OpenROAD', + 'role': 'ai', + } + ) for message in st.session_state.chat_history: with st.chat_message(message['role']): @@ -160,14 +162,16 @@ def main() -> None: message_placeholder.markdown(response_buffer) response_time_text = ( - f'Response Time: {response_time / 1000:.2f} seconds' + f"Response Time: {response_time / 1000:.2f} seconds" ) response_time_colored = f":{'green' if response_time < 5000 else 'orange' if response_time < 10000 else 'red'}[{response_time_text}]" st.markdown(response_time_colored) - st.session_state.chat_history.append({ - 'content': response_buffer, - 'role': 'ai', - }) + st.session_state.chat_history.append( + { + 'content': response_buffer, + 'role': 'ai', + } + ) if sources: with st.expander('Sources:'): @@ -181,7 +185,7 @@ def main() -> None: parsed_sources = sources if isinstance(parsed_sources, (list, set)): sources_list = '\n'.join( - f'- [{link}]({link})' + f"- [{link}]({link})" for link in parsed_sources if link.strip() ) @@ -189,7 +193,7 @@ def main() -> None: else: st.markdown('No valid sources found.') except (ValueError, SyntaxError) as e: - st.markdown(f'Failed to parse sources: {e}') + st.markdown(f"Failed to parse sources: {e}") else: st.error('Invalid response from the API') @@ -219,7 +223,7 @@ def update_state() -> None: st.session_state.chat_history, ) except Exception as e: - st.error(f'Failed to load feedback form: {e}') + st.error(f"Failed to load feedback form: {e}") if __name__ == '__main__': diff --git a/frontend/utils/feedback.py b/frontend/utils/feedback.py index 0592e8f8..2f3a2fa8 100644 --- a/frontend/utils/feedback.py +++ b/frontend/utils/feedback.py @@ -142,7 +142,7 @@ def submit_feedback_to_google_sheet( sheet.append_row(data_to_append) st.sidebar.success('Feedback submitted successfully.') else: - st.sidebar.error(f'Sheet with GID {target_gid} not found.') + st.sidebar.error(f"Sheet with GID {target_gid} not found.") def show_feedback_form(