-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcsv2excel.py
111 lines (88 loc) · 4.08 KB
/
csv2excel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
import pandas as pd
from openpyxl import Workbook
from openpyxl.styles import Alignment, Font
from openpyxl.utils.dataframe import dataframe_to_rows
from openpyxl.worksheet.hyperlink import Hyperlink
# 定义信源文件地址
source_file_path = r'D:\wechatmonitor\source\招聘公众号信源.txt'
def adjust_cell_format(cell, r_idx, c_idx, link_column_index, summary_column_index):
# 如果是超链接列并且不是第一行,则设置单元格为超链接
if c_idx == link_column_index and r_idx != 1:
cell.hyperlink = Hyperlink(
ref=cell.coordinate, target=cell.value, tooltip=cell.value)
cell.style = 'Hyperlink' # 应用超链接的默认格式
# 如果不是第一行,则设置单元格格式为左右上下居中对齐
if r_idx != 1:
cell.alignment = Alignment(
horizontal='center', vertical='center', wrapText=True)
# 如果是摘要列并且不是第一行,则设置单元格为左对齐
if c_idx == summary_column_index and r_idx != 1:
cell.alignment = Alignment(
horizontal='left', vertical='center', wrapText=True)
def adjust_column_width(ws):
for col in ws.columns:
max_length = 0
column = col[0].column_letter # 获取列名
for cell in col:
try: # 避免空单元格引发错误
if len(str(cell.value)) > max_length:
max_length = len(cell.value)
except:
pass
adjusted_width = (max_length + 2)
# 设置最大列宽为180
if adjusted_width > 180:
adjusted_width = 180
ws.column_dimensions[column].width = adjusted_width
def adjust_row_height(ws):
for row in ws.rows:
max_height = 20
for cell in row:
try: # 避免空单元格引发错误
if len(str(cell.value)) > max_height:
max_height = len(cell.value)
except:
pass
adjusted_height = (max_height + 2) * 1.2
ws.row_dimensions[row[0].row].height = adjusted_height
def sort_dataframe(df, sort_column_index):
# 读取排序依据的文本文件
with open(source_file_path, 'r', encoding='utf-8') as file:
sort_list = file.read().splitlines()
# 创建排序映射字典,未在列表中的元素将放在最后并按原顺序排序
sort_order = {key: i for i, key in enumerate(sort_list)}
max_index = len(sort_order)
# 根据文本文件中的顺序对DataFrame进行排序
df['SortKey'] = df.iloc[:, sort_column_index].map(sort_order)
df['SortKey'].fillna(max_index + len(df), inplace=True)
df.sort_values(by='SortKey', inplace=True)
df.drop('SortKey', axis=1, inplace=True)
return df
def csv_to_excel(csv_file_path, excel_file_path):
link_column_index = 5 # 超链接所在列的索引
summary_column_index = 6 # 摘要所在列的索引
sort_column_index = 1 # 未添加序号列前,根据第B列进行排序,Python中索引为1
# 读取CSV文件
df = pd.read_csv(csv_file_path, quotechar='"', quoting=1)
# 按指定顺序对DataFrame进行排序
df = sort_dataframe(df, sort_column_index)
# 在DataFrame首列添加序号列,序号从1开始
df.insert(0, '序号', list(range(1, len(df) + 1)))
# 创建一个新的Workbook
wb = Workbook()
ws = wb.active
# 将DataFrame转换为Excel行
for r_idx, row in enumerate(dataframe_to_rows(df, index=False, header=True), 1):
for c_idx, value in enumerate(row, 1):
cell = ws.cell(row=r_idx, column=c_idx, value=value)
adjust_cell_format(cell, r_idx, c_idx,
link_column_index, summary_column_index)
# 设置第一行的格式:加粗,字号大一点
for cell in ws[1]:
cell.font = Font(bold=True, size=12)
cell.alignment = Alignment(horizontal='center', vertical='center')
adjust_column_width(ws)
adjust_row_height(ws)
# 保存Workbook到Excel文件
wb.save(excel_file_path)
print(f'CSV文件已成功转换为Excel文件并保存到 {excel_file_path}')