From 275b5d14f2e0bc16a51ce993f83b2b2f964c4967 Mon Sep 17 00:00:00 2001 From: Jin Hai Date: Thu, 12 Dec 2024 20:34:46 +0800 Subject: [PATCH] Fix json file parse (#4004) ### What problem does this PR solve? Fix json file parsing ### Type of change - [x] Bug Fix (non-breaking change which fixes an issue) --------- Signed-off-by: jinhai --- deepdoc/parser/json_parser.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/deepdoc/parser/json_parser.py b/deepdoc/parser/json_parser.py index 08ddc89acf0..fa30debdfe1 100644 --- a/deepdoc/parser/json_parser.py +++ b/deepdoc/parser/json_parser.py @@ -4,6 +4,7 @@ import json from typing import Any + from rag.nlp import find_codec class RAGFlowJsonParser: def __init__( @@ -53,7 +54,7 @@ def _list_to_dict_preprocessing(self, data: Any) -> Any: def _json_split( self, - data: dict[str, Any], + data, current_path: list[str] | None, chunks: list[dict] | None, ) -> list[dict]: @@ -86,13 +87,14 @@ def _json_split( def split_json( self, - json_data: dict[str, Any], + json_data, convert_lists: bool = False, ) -> list[dict]: """Splits JSON into a list of JSON chunks""" if convert_lists: - chunks = self._json_split(self._list_to_dict_preprocessing(json_data), None, None) + preprocessed_data = self._list_to_dict_preprocessing(json_data) + chunks = self._json_split(preprocessed_data, None, None) else: chunks = self._json_split(json_data, None, None)