-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathparse_raw.py
46 lines (39 loc) · 1.52 KB
/
parse_raw.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
#!/usr/bin/env python
"""Parse raw xls files of 2020 TW election results."""
import sys
import os
import shutil
import pandas as pd
from utils import parse_archive, gather_by_level, gather_by_class, zh_to_en
if __name__ == "__main__":
# ------------------------------------- #
# Parse presidential election raw data. #
# ------------------------------------- #
outdir1 = "data/processed/presidential"
if not os.path.exists(outdir1):
os.makedirs(outdir1)
inzipfile1 = "data/raw/總統-各投票所得票明細及概況(Excel檔).zip"
dfs1 = parse_archive(inzipfile1)
presidential = gather_by_level(dfs1)
# Write out.
for name, df in presidential.items():
outfile = f"presidential_{name}.csv"
df.to_csv(os.path.join(outdir1, outfile), index=False, encoding="utf-8")
# Zip.
shutil.make_archive("data/out/presidential", "zip", outdir1)
# ------------------------------------ #
# Parse legislative election raw data. #
# ------------------------------------ #
outdir2 = "data/processed/legislative"
if not os.path.exists(outdir2):
os.makedirs(outdir2)
inzipfile2 = "data/raw/立委-各投票所得票明細及概況(Excel檔).zip"
dfs2 = parse_archive(inzipfile2)
legislative = gather_by_class(dfs2)
# Write out.
for name, df in legislative.items():
name = zh_to_en(name) # Output English filename.
outfile = f"legislative_{name}.csv"
df.to_csv(os.path.join(outdir2, outfile), index=False, encoding="utf-8")
# Zip.
shutil.make_archive("data/out/legislative", "zip", outdir2)