Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Code refactor. #4291

Merged
merged 3 commits into from
Dec 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions agent/component/answer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import random
from abc import ABC
from functools import partial
from typing import Tuple, Union

import pandas as pd

Expand Down Expand Up @@ -76,4 +77,13 @@ def stream_output(self):
def set_exception(self, e):
self.exception = e

def output(self, allow_partial=True) -> Tuple[str, Union[pd.DataFrame, partial]]:
if allow_partial:
return super.output()

for r, c in self._canvas.history[::-1]:
if r == "user":
return self._param.output_var_name, pd.DataFrame([{"content": c}])

self._param.output_var_name, pd.DataFrame([])

4 changes: 4 additions & 0 deletions api/apps/canvas_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,12 +146,16 @@ def sse():

canvas.messages.append({"role": "assistant", "content": final_ans["content"], "id": message_id})
canvas.history.append(("assistant", final_ans["content"]))
if not canvas.path[-1]:
canvas.path.pop(-1)
if final_ans.get("reference"):
canvas.reference.append(final_ans["reference"])
cvs.dsl = json.loads(str(canvas))
UserCanvasService.update_by_id(req["id"], cvs.to_dict())
except Exception as e:
cvs.dsl = json.loads(str(canvas))
if not canvas.path[-1]:
canvas.path.pop(-1)
UserCanvasService.update_by_id(req["id"], cvs.to_dict())
traceback.print_exc()
yield "data:" + json.dumps({"code": 500, "message": str(e),
Expand Down
6 changes: 2 additions & 4 deletions api/apps/dialog_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,10 +103,7 @@ def set_dialog():
}
if not DialogService.save(**dia):
return get_data_error_result(message="Fail to new a dialog!")
e, dia = DialogService.get_by_id(dia["id"])
if not e:
return get_data_error_result(message="Fail to new a dialog!")
return get_json_result(data=dia.to_json())
return get_json_result(data=dia)
else:
del req["dialog_id"]
if "kb_names" in req:
Expand All @@ -117,6 +114,7 @@ def set_dialog():
if not e:
return get_data_error_result(message="Fail to update a dialog!")
dia = dia.to_dict()
dia.update(req)
dia["kb_ids"], dia["kb_names"] = get_kb_names(dia["kb_ids"])
return get_json_result(data=dia)
except Exception as e:
Expand Down
3 changes: 2 additions & 1 deletion api/apps/kb_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,8 @@ def rm():
return get_data_error_result(
message="Database error (Document removal)!")
f2d = File2DocumentService.get_by_document_id(doc.id)
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
if f2d:
FileService.filter_delete([File.source_type == FileSource.KNOWLEDGEBASE, File.id == f2d[0].file_id])
File2DocumentService.delete_by_document_id(doc.id)
FileService.filter_delete(
[File.source_type == FileSource.KNOWLEDGEBASE, File.type == "folder", File.name == kbs[0].name])
Expand Down
4 changes: 4 additions & 0 deletions api/utils/api_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,10 @@ def server_error_response(e):
if len(e.args) > 1:
return get_json_result(
code=settings.RetCode.EXCEPTION_ERROR, message=repr(e.args[0]), data=e.args[1])
if repr(e).find("index_not_found_exception") >= 0:
return get_json_result(code=settings.RetCode.EXCEPTION_ERROR,
message="No chunk found, please upload file and parse it.")

return get_json_result(code=settings.RetCode.EXCEPTION_ERROR, message=repr(e))


Expand Down
8 changes: 4 additions & 4 deletions graphrag/graph_prompt.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,20 +11,20 @@

-Steps-
1. Identify all entities. For each identified entity, extract the following information:
- entity_name: Name of the entity, capitalized
- entity_name: Name of the entity, capitalized, in language of 'Text'
- entity_type: One of the following types: [{entity_types}]
- entity_description: Comprehensive description of the entity's attributes and activities
- entity_description: Comprehensive description of the entity's attributes and activities in language of 'Text'
Format each entity as ("entity"{tuple_delimiter}<entity_name>{tuple_delimiter}<entity_type>{tuple_delimiter}<entity_description>

2. From the entities identified in step 1, identify all pairs of (source_entity, target_entity) that are *clearly related* to each other.
For each pair of related entities, extract the following information:
- source_entity: name of the source entity, as identified in step 1
- target_entity: name of the target entity, as identified in step 1
- relationship_description: explanation as to why you think the source entity and the target entity are related to each other
- relationship_description: explanation as to why you think the source entity and the target entity are related to each other in language of 'Text'
- relationship_strength: a numeric score indicating strength of the relationship between the source entity and target entity
Format each relationship as ("relationship"{tuple_delimiter}<source_entity>{tuple_delimiter}<target_entity>{tuple_delimiter}<relationship_description>{tuple_delimiter}<relationship_strength>)

3. Return output in English as a single list of all the entities and relationships identified in steps 1 and 2. Use **{record_delimiter}** as the list delimiter.
3. Return output as a single list of all the entities and relationships identified in steps 1 and 2. Use **{record_delimiter}** as the list delimiter.

4. When finished, output {completion_delimiter}

Expand Down
2 changes: 1 addition & 1 deletion graphrag/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ def get_llm_cache(llmnm, txt, history, genconf):
return bin


def set_llm_cache(llmnm, txt, v: str, history, genconf):
def set_llm_cache(llmnm, txt, v, history, genconf):
hasher = xxhash.xxh64()
hasher.update(str(llmnm).encode("utf-8"))
hasher.update(str(txt).encode("utf-8"))
Expand Down
8 changes: 3 additions & 5 deletions rag/app/laws.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,11 +153,9 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,

if re.search(r"\.docx$", filename, re.IGNORECASE):
callback(0.1, "Start to parse.")
for txt in Docx()(filename, binary):
sections.append(txt)
callback(0.8, "Finish parsing.")
chunks = sections
return tokenize_chunks(chunks, doc, eng, pdf_parser)
chunks = Docx()(filename, binary)
callback(0.7, "Finish parsing.")
return tokenize_chunks(chunks, doc, eng, None)

elif re.search(r"\.pdf$", filename, re.IGNORECASE):
pdf_parser = Pdf() if kwargs.get(
Expand Down
4 changes: 2 additions & 2 deletions rag/app/manual.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
sections = [(t, lvl, [[0] * 5]) for t, lvl in sections]
# set pivot using the most frequent type of title,
# then merge between 2 pivot
if len(sections) > 0 and len(pdf_parser.outlines) / len(sections) > 0.1:
if len(sections) > 0 and len(pdf_parser.outlines) / len(sections) > 0.03:
max_lvl = max([lvl for _, lvl in pdf_parser.outlines])
most_level = max(0, max_lvl - 1)
levels = []
Expand Down Expand Up @@ -256,7 +256,7 @@ def tag(pn, left, right, top, bottom):
res.extend(tokenize_chunks(chunks, doc, eng, pdf_parser))
return res

if re.search(r"\.docx$", filename, re.IGNORECASE):
elif re.search(r"\.docx$", filename, re.IGNORECASE):
docx_parser = Docx()
ti_list, tbls = docx_parser(filename, binary,
from_page=0, to_page=10000, callback=callback)
Expand Down
2 changes: 1 addition & 1 deletion rag/app/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def chunk(filename, binary=None, from_page=0, to_page=10000000000,
"datetime": "_dt",
"bool": "_kwd"}
for df in dfs:
for n in ["id", "index", "idx"]:
for n in ["id", "_id", "index", "idx"]:
if n in df.columns:
del df[n]
clmns = df.columns.values
Expand Down