-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutilities.py
102 lines (82 loc) · 3.34 KB
/
utilities.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
import csv
import subprocess
import platform
import os
import pandas as pd
from typing import List, Union
from pathlib import Path
from configs import OUTPUT_PATH
def open_file(file_path: Union[str, Path]) -> None:
"""
Opens a file using the default application associated with its type.
Args:
file_path (str): The path to the file to be opened.
Returns:
None
"""
if os.path.exists('/.dockerenv'):
print(f"Excel file saved at: {OUTPUT_PATH.with_suffix('.xlsx')}")
return
if platform.system() == 'Darwin': # macOS
subprocess.call(['open', file_path])
elif platform.system() == 'Linux': # Linux
subprocess.call(['xdg-open', file_path])
else: # Windows
os.startfile(file_path)
def write_delim_file(path: Union[str, Path], data: List[List[str]], delim: str) -> None:
"""
Writes a list of lists (data) to a file using a specified delimiter.
Args:
path (str or Path): The path where the file will be written.
data (list of lists): The data to be written to the file.
delim (str): The delimiter to use for separating entries in the file.
Returns:
None
Example:
write_delim_file("output.csv", data, ",")
"""
path = Path(path)
path.parent.mkdir(parents=True, exist_ok=True)
with path.open(mode="w", newline="") as file:
writer = csv.writer(file, delimiter=delim)
writer.writerows(data)
def format_job_data(df: pd.DataFrame) -> pd.DataFrame:
"""
Formats the columns of a DataFrame containing job data.
This function performs the following operations:
1. Converts the 'Job Title', 'Job ID (Job Code)', and 'Job Category' columns to string type.
2. Converts the 'Effective Date' column to a datetime object with the format 'MM/DD/YYYY'.
3. Converts the 'Annual Min', 'Annual Max', 'Monthly Min', and 'Monthly Max' columns from currency strings to float type by removing dollar signs and commas.
Args:
df (pd.DataFrame): A pandas DataFrame containing job data with the following columns:
- 'Job Title'
- 'Job ID (Job Code)'
- 'Job Category'
- 'Effective Date'
- 'Annual Min'
- 'Annual Max'
- 'Monthly Min'
- 'Monthly Max'
Returns:
pd.DataFrame: The formatted DataFrame with appropriate data types for each column.
"""
df['Job Title'] = df['Job Title'].astype(str)
df['Job ID (Job Code)'] = df['Job ID (Job Code)'].astype(str)
df['Job Category'] = df['Job Category'].astype(str)
df['Effective Date'] = pd.to_datetime(df['Effective Date'], format='%m/%d/%Y')
currency_columns = ['Annual Min', 'Annual Max', 'Monthly Min', 'Monthly Max']
for column in currency_columns:
df[column] = df[column].replace(r'[\$,]', '', regex=True).astype(float)
return df
def write_spreadsheet(csv_file_path: Union[str, Path], excel_file_path: Union[str, Path]) -> None:
"""
Converts a CSV file into an Excel spreadsheet and saves it.
Args:
csv_file_path (str): The path to the input CSV file.
excel_file_path (str): The path where the output Excel file should be saved.
Returns:
None
"""
df = pd.read_csv(csv_file_path)
df = format_job_data(df)
df.to_excel(excel_file_path, index=False)