-
Notifications
You must be signed in to change notification settings - Fork 0
/
findExistingCollNamesAndNamesToCreate.py
79 lines (64 loc) · 2.28 KB
/
findExistingCollNamesAndNamesToCreate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import pandas as pd
from datetime import datetime
import os
path = os.getcwd()
termsDone = os.path.join(path, 'termsDone/')
if not os.path.exists(termsDone):
os.mkdir(termsDone)
termsToCreate = os.path.join(path, 'termsToCreate/')
if not os.path.exists(termsToCreate):
os.mkdir(termsToCreate)
aggregatedRoles = os.path.join(path, 'aggregated-roles')
typeSheet = 'allCollectionNames.csv'
rolesSheet = os.path.join(path, 'existing-taxonomies/creator_r.csv')
rolesList = ['AggregatedByarranger.csv', 'AggregatedBycomposer.csv',
'AggregatedBylyricist.csv']
rolesDF = pd.read_csv(rolesSheet)
rolesDF = rolesDF.rename(columns={'name': 'role', 'id': 'creator_role_id'})
newDF = pd.DataFrame()
for filename in rolesList:
role = filename.replace('AggregatedBy', '')
role = role[:-4]
filename = aggregatedRoles + "/" + filename
print(filename)
if filename.endswith('.csv'):
try:
df = pd.read_csv(filename)
df['role'] = role
df = pd.merge(df, rolesDF, how='left', on=['role'])
newDF = newDF.append(df, ignore_index=True, sort=True)
except FileNotFoundError:
pass
newDF = newDF.drop_duplicates()
newDF['title'] = newDF['title'].str.strip()
print(newDF.head)
typeSheet = 'allCollectionNames.csv'
existing = pd.read_csv(typeSheet, header=0)
print(existing.head)
existing['title'] = existing['title'].str.strip()
frame = pd.merge(newDF, existing, how='left', on=['title'])
frame.drop_duplicates(inplace=True)
dt = datetime.now().strftime('%Y-%m-%d %H.%M.%S')
renamedKey = typeSheet.replace('all', 'matched_')
frame.to_csv(renamedKey, index=False)
newDF = pd.read_csv(renamedKey, header=0)
toCreate = []
done = []
for count, row in newDF.iterrows():
id = row['id']
title = row['title']
role = row['role']
if pd.isna(id):
toCreate.append({'title': title})
else:
done.append(row)
toCreate = pd.DataFrame.from_dict(toCreate)
print(toCreate.head)
toCreate.drop_duplicates(inplace=True)
filename = 'levy_collection_namesToCreate.csv'
fullname = os.path.join(termsToCreate, filename)
toCreate.to_csv(fullname, index=False)
done = pd.DataFrame.from_dict(done)
filename2 = 'levy_collection_namesDone.csv'
fullname2 = os.path.join(termsDone, filename2)
done.to_csv(fullname2, index=False)