Skip to content

Commit

Permalink
refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
KevinHuSh committed Oct 9, 2024
1 parent 8f4bd10 commit 14cada8
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 1 deletion.
2 changes: 2 additions & 0 deletions deepdoc/parser/html_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,13 @@
import html_text
import chardet


def get_encoding(file):
with open(file,'rb') as f:
tmp = chardet.detect(f.read())
return tmp['encoding']


class RAGFlowHtmlParser:
def __call__(self, fnm, binary=None):
txt = ""
Expand Down
1 change: 1 addition & 0 deletions deepdoc/parser/markdown_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#
import re


class RAGFlowMarkdownParser:
def __init__(self, chunk_token_num=128):
self.chunk_token_num = int(chunk_token_num)
Expand Down
1 change: 0 additions & 1 deletion rag/app/naive.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,6 @@ def __call__(self, filename, binary=None):
sections.append((sec[int(len(sec)/2):], ""))
else:
sections.append((sec, ""))
print(tables)
for table in tables:
tbls.append(((None, markdown(table, extensions=['markdown.extensions.tables'])), ""))
return sections, tbls
Expand Down

0 comments on commit 14cada8

Please sign in to comment.