diff --git a/comps/dataprep/utils.py b/comps/dataprep/utils.py index 571d5e8a4..f48d97157 100644 --- a/comps/dataprep/utils.py +++ b/comps/dataprep/utils.py @@ -285,6 +285,16 @@ def load_json(json_path): return content_list +def load_jsonl(jsonl_path): + """Load and process jsonl file.""" + content_list = [] + with open(jsonl_path, "r") as file: + for line in file: + json_obj = json.loads(line) + content_list.append(json_obj) + return content_list + + def load_yaml(yaml_path): """Load and process yaml file.""" with open(yaml_path, "r") as file: @@ -351,8 +361,10 @@ def document_loader(doc_path): return load_md(doc_path) elif doc_path.endswith(".xml"): return load_xml(doc_path) - elif doc_path.endswith(".json") or doc_path.endswith(".jsonl"): + elif doc_path.endswith(".json"): return load_json(doc_path) + elif doc_path.endswith(".jsonl"): + return load_jsonl(doc_path) elif doc_path.endswith(".yaml"): return load_yaml(doc_path) elif doc_path.endswith(".xlsx") or doc_path.endswith(".xls"):