-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsponsor.py
203 lines (187 loc) · 8.72 KB
/
sponsor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
import os
import subprocess
import pandas as pd
import base64
import unicodedata
import re
def strip_accents(s):
return ''.join(c for c in unicodedata.normalize('NFD', s) if unicodedata.category(c) != 'Mn')
def get_sponsor_netid_per_cluster_dict_from_ldap(netid, verbose=True, strip=False):
"""Returns a dictionary of sponsor netids for a given user netid for the large clusters."""
ldap = "ldap://ldap01.rc.princeton.edu"
cmd = f"ldapsearch -x -H {ldap} -b dc=rc,dc=princeton,dc=edu uid={netid} displayname manager description"
output = subprocess.run(cmd, stdout=subprocess.PIPE, shell=True, timeout=5, text=True, check=True)
lines = output.stdout.split('\n')
if lines != [] and lines[-1] == "": lines = lines[:-1]
# get primary manager (if more than 1 then take first)
line_index = 0
displayname = None
managers = []
for i, line in enumerate(lines):
if "displayname:: " in line:
rawname = line.split(":: ")[1].strip()
displayname = base64.b64decode(rawname).decode("utf-8")
displayname = strip_accents(displayname) if strip else displayname
if "displayname: " in line:
displayname = line.split(": ")[1].strip()
if "manager: " in line and "uid=" in line:
managers.append(line.split("uid=")[1].split(",")[0])
line_index = i
if managers == []:
primary = None
# try looking in CSV file if available
fname = "users_left_university_from_robert_knight.csv"
if os.path.exists(fname):
rk = pd.read_csv(fname)
if not rk[rk.Netid_ == netid].empty:
primary = rk[rk.Netid_ == netid].Sponsor_Netid_.values[0]
if not displayname: displayname = rk[rk.Netid_ == netid].Name_.values[0]
if verbose: print(f"W: Primary sponsor for {netid} taken from CSV file.")
if not primary and verbose: print(f"W: No primary sponsor found for {netid} in CSES LDAP or CSV file.")
elif len(managers) > 1:
if verbose:
print(f"W: User {netid} has multiple primary sponsors: {','.join(managers)}. Using {managers[0]}.")
primary = managers[0]
else:
primary = managers[0]
if not displayname and verbose: print(f"W: Name not found for user {netid} in CSES LDAP.")
# get all cluster-specific sponsors and name of user
sponsor = {"della":primary,
"stellar":primary,
"tiger":primary,
"tigressdata":primary,
"traverse":primary,
"displayname":displayname}
s = ""
for line in lines[line_index:]:
s += line.strip() if not line.startswith("#") else ""
for cluster in sponsor.keys():
x = f"{cluster}:"
if x in s:
sponsor_netid = s.split(x)[1].split("=")[0]
if sponsor_netid == "USER":
if verbose:
print(f"W: Sponsor entry of {sponsor_netid} found for {netid} on {cluster}. Corrected to {netid}.")
sponsor_netid = netid
if "(" in sponsor_netid:
tmp = sponsor_netid.split("(")[0].strip()
if verbose:
print(f"W: Sponsor entry of {sponsor_netid} found for {netid} on {cluster}. Corrected to {tmp}.")
sponsor_netid = tmp
sponsor[cluster] = sponsor_netid
return sponsor
def get_full_name_from_ldap(netid, use_rc=False, include_netid=False, verbose=True, strip=True):
"""Return the full name for the given netid by using either rc or university ldap."""
if use_rc:
ldap = "ldap://ldap01.rc.princeton.edu"
cmd = f"ldapsearch -x -H {ldap} -b dc=rc,dc=princeton,dc=edu uid={netid} displayname"
else:
cmd = f"ldapsearch -x uid={netid} displayname"
output = subprocess.run(cmd, stdout=subprocess.PIPE, shell=True, timeout=5, text=True, check=True)
lines = output.stdout.split('\n')
displayname = None
for line in lines:
if "displayname:: " in line:
rawname = line.split(":: ")[1].strip()
displayname = base64.b64decode(rawname).decode("utf-8")
displayname = strip_accents(displayname) if strip else displayname
return f"{displayname} ({netid})" if include_netid else displayname
if "displayname: " in line:
displayname = line.split(": ")[1].strip()
return f"{displayname} ({netid})" if include_netid else displayname
if displayname is None and verbose:
print(f"W: Name not found in LDAP for {netid} with use_rc={use_rc}.")
return displayname
def get_full_name_of_user_from_log(netid, flnm="tigress_user_changes.log"):
"""Return the full name of the user from the log file. Loop over all
lines (i.e., do not return on first match)."""
with open(flnm, "r", encoding="utf-8") as f:
lines = f.readlines()
pattern = f" {netid} "
logname = None
for line in lines:
if pattern in line:
if f" Added user {netid} (" in line:
logname = line.split(f" Added user {netid} (")[-1].split(")")[0].split(" - ")[-1]
if f" Removed user {netid} (" in line:
logname = line.split(f" Removed user {netid} (")[-1].split(")")[0]
return logname
def get_sponsor_netid_of_user_from_log(netid, flnm="tigress_user_changes.log"):
"""Return the sponsor netid for a given user netid from the log file.
Loop over all lines (i.e., do not return on first match)."""
with open(flnm, "r", encoding="utf-8") as f:
lines = f.readlines()
pattern = f" {netid} "
sponsor = None
for line in lines:
if pattern in line:
if f" Added user {netid} " in line and " with sponsor " in line:
sponsor = line.split(" with sponsor ")[-1].split()[0]
if f" Removed user {netid} " in line and " sponsor " in line:
sponsor = line.split(" sponsor ")[-1].split(";")[0]
return sponsor
def build_uid_username_dictionaries(uids: set[str], flnm="tigress_user_changes.log"):
"""Return a uid-to-username and username-to-uid dictionary for a given
set of uids. Each uid is stored as a string."""
uid2user = {"0": "root"}
user2uid = {"root": "0"}
if os.path.isfile(flnm):
with open(flnm, "r", encoding="utf-8") as f:
lines = f.readlines()
pattern = r"Added user \w+ \(\d+ -"
for line in lines:
match = re.findall(pattern, line)
if match:
uid = match[0].split("(")[1].split()[0]
netid = match[0].split("(")[0].strip().split()[-1]
assert uid.isnumeric(), f"{uid} is not numeric: {line}"
uid2user[uid] = netid
user2uid[netid] = uid
else:
print(f"{flnm} was not found.")
for uid in uids:
if uid not in uid2user:
found = False
ldap = "ldap://ldap01.rc.princeton.edu"
cmd = f"ldapsearch -x -H {ldap} -b dc=rc,dc=princeton,dc=edu uidNumber={uid} uid"
output = subprocess.run(cmd,
stdout=subprocess.PIPE,
shell=True,
timeout=5,
text=True,
check=True)
lines = output.stdout.split('\n')
for line in lines:
if line.startswith("uid: "):
netid = line.split()[1]
uid2user[uid] = netid
user2uid[netid] = uid
found = True
break
if not found:
print(f"A netid for uid {uid} was not found.")
return uid2user, user2uid
def user_and_sponsor_with_dept(df: pd.DataFrame, cluster="della", verbose=False, level=0):
"""Given the output of ldap_plus, find the sponsor name
and department for each user. If the user's department is null
then replace it with the sponsor department."""
try:
# wget https://raw.githubusercontent.com/jdh4/tigergpu_visualization/refs/heads/master/dossier.py
from dossier import ldap_plus
except ModuleNotFoundError:
print("dossier module not found. Exiting.")
return pd.DataFrame()
df["sponsor_dict"] = df.NETID_TRUE.apply(get_sponsor_netid_per_cluster_dict_from_ldap)
df["sponsor"] = df.sponsor_dict.apply(lambda d: d[cluster])
def sponsor_name_and_dept(sponsor_netid, level):
props = ldap_plus([sponsor_netid], level=level)
heading, values = props[0], props[1]
return values[0], values[1]
df["sponsor_ldap"] = df.sponsor.apply(lambda sponsor_netid: sponsor_name_and_dept(sponsor_netid, level=level))
cols = ["SPONSOR_NAME", "SPONSOR_DEPT"]
df[cols] = pd.DataFrame(df["sponsor_ldap"].tolist(), index=df.index)
df["DEPT"] = df.apply(lambda row: row["SPONSOR_DEPT"] if row["DEPT"] == "UNSPECIFIED" else row["DEPT"], axis="columns")
df.sort_values("DEPT", inplace=True)
df.reset_index(drop=True, inplace=True)
df.index += 1
return df[["NAME", "NETID", "NETID_TRUE", "DEPT", "POSITION", "SPONSOR_NAME", "SPONSOR_DEPT"]]