Skip to content

Commit

Permalink
refactor code (infiniflow#583)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?

### Type of change

- [x] Refactoring
  • Loading branch information
KevinHuSh authored Apr 28, 2024
1 parent a9816c3 commit 80964cb
Show file tree
Hide file tree
Showing 25 changed files with 48 additions and 525 deletions.
2 changes: 1 addition & 1 deletion api/apps/api_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from itsdangerous import URLSafeTimedSerializer

from api.utils.file_utils import filename_type, thumbnail
from rag.utils import MINIO
from rag.utils.minio_conn import MINIO


def generate_confirmation_token(tenent_id):
Expand Down
3 changes: 2 additions & 1 deletion api/apps/chunk_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@

from rag.app.qa import rmPrefix, beAdoc
from rag.nlp import search, huqie
from rag.utils import ELASTICSEARCH, rmSpace
from rag.utils.es_conn import ELASTICSEARCH
from rag.utils import rmSpace
from api.db import LLMType, ParserType
from api.db.services.knowledgebase_service import KnowledgebaseService
from api.db.services.llm_service import TenantLLMService
Expand Down
2 changes: 1 addition & 1 deletion api/apps/document_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
from api.db.services.file2document_service import File2DocumentService
from api.db.services.file_service import FileService
from rag.nlp import search
from rag.utils import ELASTICSEARCH
from rag.utils.es_conn import ELASTICSEARCH
from api.db.services import duplicate_name
from api.db.services.knowledgebase_service import KnowledgebaseService
from api.utils.api_utils import server_error_response, get_data_error_result, validate_request
Expand Down
2 changes: 1 addition & 1 deletion api/apps/file2document_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@
from api.settings import RetCode
from api.utils.api_utils import get_json_result
from rag.nlp import search
from rag.utils import ELASTICSEARCH
from rag.utils.es_conn import ELASTICSEARCH


@manager.route('/convert', methods=['POST'])
Expand Down
2 changes: 1 addition & 1 deletion api/apps/file_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from api.utils.api_utils import get_json_result
from api.utils.file_utils import filename_type
from rag.nlp import search
from rag.utils import ELASTICSEARCH
from rag.utils.es_conn import ELASTICSEARCH
from rag.utils.minio_conn import MINIO


Expand Down
2 changes: 1 addition & 1 deletion api/apps/kb_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from api.settings import stat_logger, RetCode
from api.utils.api_utils import get_json_result
from rag.nlp import search
from rag.utils import ELASTICSEARCH
from rag.utils.es_conn import ELASTICSEARCH


@manager.route('/create', methods=['post'])
Expand Down
2 changes: 1 addition & 1 deletion api/db/services/document_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
from peewee import Expression

from elasticsearch_dsl import Q
from rag.utils import ELASTICSEARCH
from rag.utils.es_conn import ELASTICSEARCH
from rag.utils.minio_conn import MINIO
from rag.nlp import search

Expand Down
2 changes: 1 addition & 1 deletion api/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
database_logger = getLogger("database")
chat_logger = getLogger("chat")

from rag.utils import ELASTICSEARCH
from rag.utils.es_conn import ELASTICSEARCH
from rag.nlp import search
from api.utils import get_base_config, decrypt_database_config

Expand Down
8 changes: 4 additions & 4 deletions deepdoc/parser/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@


from .pdf_parser import HuParser as PdfParser, PlainParser
from .docx_parser import HuDocxParser as DocxParser
from .excel_parser import HuExcelParser as ExcelParser
from .ppt_parser import HuPptParser as PptParser
from .pdf_parser import RAGFlowPdfParser as PdfParser, PlainParser
from .docx_parser import RAGFlowDocxParser as DocxParser
from .excel_parser import RAGFlowExcelParser as ExcelParser
from .ppt_parser import RAGFlowPptParser as PptParser
2 changes: 1 addition & 1 deletion deepdoc/parser/docx_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from io import BytesIO


class HuDocxParser:
class RAGFlowDocxParser:

def __extract_table_content(self, tb):
df = []
Expand Down
4 changes: 2 additions & 2 deletions deepdoc/parser/excel_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
from rag.nlp import find_codec


class HuExcelParser:
class RAGFlowExcelParser:
def html(self, fnm):
if isinstance(fnm, str):
wb = load_workbook(fnm)
Expand Down Expand Up @@ -74,5 +74,5 @@ def row_number(fnm, binary):


if __name__ == "__main__":
psr = HuExcelParser()
psr = RAGFlowExcelParser()
psr(sys.argv[1])
2 changes: 1 addition & 1 deletion deepdoc/parser/pdf_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
logging.getLogger("pdfminer").setLevel(logging.WARNING)


class HuParser:
class RAGFlowPdfParser:
def __init__(self):
self.ocr = OCR()
if hasattr(self, "model_speciess"):
Expand Down
2 changes: 1 addition & 1 deletion deepdoc/parser/ppt_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from pptx import Presentation


class HuPptParser(object):
class RAGFlowPptParser(object):
def __init__(self):
super().__init__()

Expand Down
9 changes: 5 additions & 4 deletions deepdoc/vision/t_ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,6 @@
# limitations under the License.
#

from deepdoc.vision.seeit import draw_box
from deepdoc.vision import OCR, init_in_out
import argparse
import numpy as np
import os
import sys
sys.path.insert(
Expand All @@ -25,6 +21,11 @@
os.path.abspath(__file__)),
'../../')))

from deepdoc.vision.seeit import draw_box
from deepdoc.vision import OCR, init_in_out
import argparse
import numpy as np


def main(args):
ocr = OCR()
Expand Down
19 changes: 8 additions & 11 deletions deepdoc/vision/t_recognizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#

from deepdoc.vision.seeit import draw_box
from deepdoc.vision import Recognizer, LayoutRecognizer, TableStructureRecognizer, OCR, init_in_out
from api.utils.file_utils import get_project_base_directory
import argparse
import os
import sys
import re

import numpy as np

import os, sys
sys.path.insert(
0,
os.path.abspath(
Expand All @@ -29,6 +19,13 @@
os.path.abspath(__file__)),
'../../')))

from deepdoc.vision.seeit import draw_box
from deepdoc.vision import Recognizer, LayoutRecognizer, TableStructureRecognizer, OCR, init_in_out
from api.utils.file_utils import get_project_base_directory
import argparse
import re
import numpy as np


def main(args):
images, outputs = init_in_out(args)
Expand Down
2 changes: 1 addition & 1 deletion rag/llm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
"Ollama": OllamaEmbed,
"OpenAI": OpenAIEmbed,
"Xinference": XinferenceEmbed,
"Tongyi-Qianwen": HuEmbedding, #QWenEmbed,
"Tongyi-Qianwen": DefaultEmbedding, #QWenEmbed,
"ZHIPU-AI": ZhipuEmbed,
"FastEmbed": FastEmbed,
"Youdao": YoudaoEmbed
Expand Down
2 changes: 1 addition & 1 deletion rag/llm/embedding_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ def encode_queries(self, text: str):
raise NotImplementedError("Please implement encode method!")


class HuEmbedding(Base):
class DefaultEmbedding(Base):
def __init__(self, *args, **kwargs):
"""
If you have trouble downloading HuggingFace models, -_^ this might help!!
Expand Down
Loading

0 comments on commit 80964cb

Please sign in to comment.