-
Notifications
You must be signed in to change notification settings - Fork 1
/
excel2json.py
60 lines (48 loc) · 1.98 KB
/
excel2json.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import pandas as pd
import json
from datetime import datetime
def convert_datetime(obj):
if isinstance(obj, (pd.Timestamp, datetime)):
return obj.isoformat()
raise TypeError("Type not serializable")
def convert_excel_to_json(excel_file_path, json_file_path):
# Read the Excel file
xls = pd.ExcelFile(excel_file_path)
# Specify the columns to include in the JSON file
columns_to_include = [
"Target Data Name",
"Target Data Schema",
"Target Data Description",
"Source Data Name",
"Source Data Schema",
"Source Data Description",
"Schema Change Hints",
"5 Samples of Source Data",
"Ground Truth SQL",
"Prompt-4 Results",
"Prompt-3 Results",
"Prompt-2 Results",
"Prompt-1 Results",
"Remark or Note"
]
# Read the specified columns from the first sheet
data_to_convert = pd.read_excel(xls, sheet_name=0, usecols=columns_to_include)
# Fill missing values in the specified columns by forward filling
columns_to_fill = ["Target Data Name", "Target Data Schema", "Target Data Description"]
data_to_convert[columns_to_fill] = data_to_convert[columns_to_fill].ffill()
# Create a list to store the dictionaries
json_data = []
# Iterate through each row and append to the list
for _, row in data_to_convert.iterrows():
json_data.append(row.to_dict())
# Open the JSON file for writing
with open(json_file_path, 'w') as json_file:
json_data = [row.to_dict() for _, row in data_to_convert.iterrows()]
json_file.write(json.dumps(json_data, indent=4, default=convert_datetime))
print(f"JSON file has been saved to {json_file_path}")
# Path to the Excel file
excel_file_path = 'chatgpt.xlsx'
# Path to save the JSON file
json_file_path = 'chatgpt.json'
# Call the function to perform the conversion
convert_excel_to_json(excel_file_path, json_file_path)