From a20d29dd183e9e88b5ccf67818ef806d13a6a498 Mon Sep 17 00:00:00 2001 From: Janosh Riebesell Date: Sat, 7 Oct 2023 12:20:13 -0700 Subject: [PATCH] Add `docs/fetch_pmg_contributors.py` script (#3387) * add script docs/fetch_pmg_contributors_sorted.py for fetching the top contributors of pymatgen from GitHub's API * update top-contributors.csv * mv docs/fetch_pmg_contributors(_sorted->'').py --- .gitignore | 1 + docs/2023-10-07-top-contributors.csv | 92 ++++++++++++++++++++++++++++ docs/fetch_pmg_contributors.py | 84 +++++++++++++++++++++++++ 3 files changed, 177 insertions(+) create mode 100644 docs/2023-10-07-top-contributors.csv create mode 100644 docs/fetch_pmg_contributors.py diff --git a/.gitignore b/.gitignore index 21e0621b8fb..052fbd540d1 100644 --- a/.gitignore +++ b/.gitignore @@ -40,3 +40,4 @@ venv/ ENV/ env.bak/ venv.bak/ +**/secrets* diff --git a/docs/2023-10-07-top-contributors.csv b/docs/2023-10-07-top-contributors.csv new file mode 100644 index 00000000000..bed291718e8 --- /dev/null +++ b/docs/2023-10-07-top-contributors.csv @@ -0,0 +1,92 @@ +GitHub username,Weeks with merged PR,Years with merged PR,Number years active,Oldest PR,Total contributions +shyuep,439,"2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2012, 2013, 2014, 2015",12,https://github.com/materialsproject/pymatgen/pull/2048,5876 +setten,63,"2016, 2013, 2014, 2015",4,https://github.com/materialsproject/pymatgen/pull/2048,1126 +gmatteo,164,"2016, 2017, 2018, 2019, 2020, 2021, 2023, 2013, 2014, 2015",10,https://github.com/materialsproject/pymatgen/pull/2048,973 +janosh,83,"2021, 2022, 2023",3,https://github.com/materialsproject/pymatgen/pull/2048,782 +mkhorton,161,"2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023",8,https://github.com/materialsproject/pymatgen/pull/2048,668 +rkingsbury,95,"2019, 2020, 2021, 2022, 2023",5,https://github.com/materialsproject/pymatgen/pull/2048,542 +wmdrichards,126,"2016, 2017, 2012, 2013, 2014, 2015",6,https://github.com/materialsproject/pymatgen/pull/2048,477 +samblau,89,"2018, 2019, 2020, 2021, 2022, 2023",6,https://github.com/materialsproject/pymatgen/pull/2048,392 +montoyjh,80,"2016, 2017, 2018, 2023, 2015",5,https://github.com/materialsproject/pymatgen/pull/2048,374 +computron,95,"2016, 2017, 2018, 2019, 2020, 2011, 2012, 2013, 2014, 2015",10,https://github.com/materialsproject/pymatgen/pull/2048,328 +xhqu1981,51,"2016, 2017, 2013, 2014, 2015",5,https://github.com/materialsproject/pymatgen/pull/2048,298 +CifLord,49,"2016, 2017, 2018, 2019, 2020, 2021, 2023",7,https://github.com/materialsproject/pymatgen/pull/2048,260 +dbroberg,28,"2017, 2018, 2019",3,https://github.com/materialsproject/pymatgen/pull/2048,253 +shyamd,53,"2016, 2017, 2018, 2019, 2020, 2021",6,https://github.com/materialsproject/pymatgen/pull/2048,241 +Andrew-S-Rosen,33,"2021, 2022, 2023",3,https://github.com/materialsproject/pymatgen/pull/2048,233 +matk86,30,"2016, 2017",2,https://github.com/materialsproject/pymatgen/pull/2048,219 +JaGeo,55,"2017, 2018, 2019, 2020, 2021, 2022, 2023",7,https://github.com/materialsproject/pymatgen/pull/2048,199 +sdacek,39,"2016, 2012, 2013, 2014, 2015",5,https://github.com/materialsproject/pymatgen/pull/2048,193 +hautierg,49,"2011, 2012, 2013, 2014, 2015",5,https://github.com/materialsproject/pymatgen/pull/2048,193 +mbkumar,52,"2016, 2017, 2020, 2013, 2014, 2015",6,https://github.com/materialsproject/pymatgen/pull/2048,179 +jmmshn,43,"2018, 2019, 2020, 2021, 2022, 2023",6,https://github.com/materialsproject/pymatgen/pull/2048,150 +fraricci,33,"2016, 2017, 2018, 2019, 2020, 2022, 2023",7,https://github.com/materialsproject/pymatgen/pull/2048,129 +utf,38,"2017, 2018, 2019, 2020, 2021, 2022",6,https://github.com/materialsproject/pymatgen/pull/2048,128 +nisse3000,36,"2016, 2017, 2018, 2015",4,https://github.com/materialsproject/pymatgen/pull/2048,127 +espottesmith,23,"2018, 2020, 2021, 2022, 2023",5,https://github.com/materialsproject/pymatgen/pull/2048,126 +awvio,25,"2019, 2020",2,https://github.com/materialsproject/pymatgen/pull/2048,109 +mattmcdermott,19,"2019, 2020, 2021, 2022, 2023",5,https://github.com/materialsproject/pymatgen/pull/2048,72 +davidwaroquiers,21,"2017, 2018, 2019, 2020, 2021",5,https://github.com/materialsproject/pymatgen/pull/2048,60 +welltemperedpaprika,15,"2019, 2020",2,https://github.com/materialsproject/pymatgen/pull/2048,53 +saijayaram,21,"2013, 2014",2,https://github.com/materialsproject/pymatgen/pull/2048,52 +chc273,12,"2017, 2019, 2020, 2021",4,https://github.com/materialsproject/pymatgen/pull/2048,51 +gpetretto,34,"2016, 2017, 2018, 2019, 2020, 2021, 2022, 2023, 2014, 2015",10,https://github.com/materialsproject/pymatgen/pull/2048,51 +arepstein,14,"2020, 2021, 2023",3,https://github.com/materialsproject/pymatgen/pull/2048,48 +rwoodsrobinson,10,"2019, 2020, 2021",3,https://github.com/materialsproject/pymatgen/pull/2048,48 +gVallverdu,20,"2016, 2018, 2019, 2020, 2014, 2015",6,https://github.com/materialsproject/pymatgen/pull/2048,46 +ncfrey,14,"2019, 2020",2,https://github.com/materialsproject/pymatgen/pull/2048,45 +munrojm,14,"2020, 2021, 2022",3,https://github.com/materialsproject/pymatgen/pull/2048,39 +sivonxay,7,"2018, 2019",2,https://github.com/materialsproject/pymatgen/pull/2048,36 +lbluque,11,"2020, 2022, 2023",3,https://github.com/materialsproject/pymatgen/pull/2048,35 +vorwerkc,17,"2016, 2017, 2020, 2021, 2022",5,https://github.com/materialsproject/pymatgen/pull/2048,35 +mturiansky,11,"2017, 2018, 2019, 2020",4,https://github.com/materialsproject/pymatgen/pull/2048,34 +WardLT,10,"2017, 2018, 2019",3,https://github.com/materialsproject/pymatgen/pull/2048,34 +HanmeiTang,12,"2017, 2018",2,https://github.com/materialsproject/pymatgen/pull/2048,33 +htz1992213,14,"2019, 2020, 2021",3,https://github.com/materialsproject/pymatgen/pull/2048,32 +nheinsdorf,8,2022,1,https://github.com/materialsproject/pymatgen/pull/2048,31 +rees-c,3,2019,1,https://github.com/materialsproject/pymatgen/pull/2048,31 +dwinston,26,"2016, 2017, 2018, 2019",4,https://github.com/materialsproject/pymatgen/pull/2048,30 +blondegeek,10,"2016, 2017, 2018, 2019",4,https://github.com/materialsproject/pymatgen/pull/2048,28 +henriquemiranda,13,"2017, 2018, 2019, 2022",4,https://github.com/materialsproject/pymatgen/pull/2048,28 +kmu,9,"2017, 2018, 2019, 2021",4,https://github.com/materialsproject/pymatgen/pull/2048,26 +saurabh02,6,2016,1,https://github.com/materialsproject/pymatgen/pull/2048,26 +wood-b,11,"2016, 2017, 2018",3,https://github.com/materialsproject/pymatgen/pull/2048,25 +tschaume,10,"2018, 2019, 2014, 2015",4,https://github.com/materialsproject/pymatgen/pull/2048,25 +ucsdlxg,7,"2017, 2018",2,https://github.com/materialsproject/pymatgen/pull/2048,24 +ab5424,10,"2020, 2021, 2022, 2023",4,https://github.com/materialsproject/pymatgen/pull/2048,24 +mcocdawc,3,2017,1,https://github.com/materialsproject/pymatgen/pull/2048,23 +ScottNotFound,4,"2022, 2023",2,https://github.com/materialsproject/pymatgen/pull/2048,23 +yimingchen-eng,5,"2017, 2019, 2020",3,https://github.com/materialsproject/pymatgen/pull/2048,22 +bwjustus,7,"2020, 2021",2,https://github.com/materialsproject/pymatgen/pull/2048,22 +yang-ruoxi,8,"2021, 2022, 2023",3,https://github.com/materialsproject/pymatgen/pull/2048,20 +dangunter,6,"2011, 2012",2,https://github.com/materialsproject/pymatgen/pull/2048,20 +ardunn,5,"2019, 2021",2,https://github.com/materialsproject/pymatgen/pull/2048,19 +aykol,5,"2016, 2017",2,https://github.com/materialsproject/pymatgen/pull/2048,18 +fekad,5,2020,1,https://github.com/materialsproject/pymatgen/pull/2048,17 +knc6,5,"2019, 2020",2,https://github.com/materialsproject/pymatgen/pull/2048,17 +guymoore13,5,"2019, 2020",2,https://github.com/materialsproject/pymatgen/pull/2048,16 +ayushsgupta,5,2020,1,https://github.com/materialsproject/pymatgen/pull/2048,16 +mjwen,3,2022,1,https://github.com/materialsproject/pymatgen/pull/2048,16 +MichaelWolloch,7,"2021, 2022, 2023",3,https://github.com/materialsproject/pymatgen/pull/2048,16 +navnidhirajput,4,2013,1,https://github.com/materialsproject/pymatgen/pull/2048,15 +kavanase,6,"2020, 2021, 2022, 2023",4,https://github.com/materialsproject/pymatgen/pull/2048,15 +acrutt,6,"2020, 2021, 2022",3,https://github.com/materialsproject/pymatgen/pull/2048,14 +rousseab,6,"2014, 2015",2,https://github.com/materialsproject/pymatgen/pull/2048,14 +lan496,5,"2019, 2020, 2022",3,https://github.com/materialsproject/pymatgen/pull/2048,14 +vivid0036,5,"2016, 2014",2,https://github.com/materialsproject/pymatgen/pull/2048,13 +specter119,8,"2016, 2017, 2018",3,https://github.com/materialsproject/pymatgen/pull/2048,13 +orionarcher,7,"2020, 2021, 2022, 2023",4,https://github.com/materialsproject/pymatgen/pull/2048,12 +Bismarrck,5,2015,1,https://github.com/materialsproject/pymatgen/pull/2048,12 +zacharygibbs,4,"2013, 2014",2,https://github.com/materialsproject/pymatgen/pull/2048,11 +albalu,6,"2017, 2018",2,https://github.com/materialsproject/pymatgen/pull/2048,11 +adengz,8,"2017, 2019, 2014, 2015",4,https://github.com/materialsproject/pymatgen/pull/2048,10 +naik-aakash,7,"2022, 2023",2,https://github.com/materialsproject/pymatgen/pull/2048,10 +cnncnnzh,3,"2022, 2023",2,https://github.com/materialsproject/pymatgen/pull/2048,10 +kylebystrom,3,2019,1,https://github.com/materialsproject/pymatgen/pull/2048,10 +Tinaatucsd,4,"2017, 2018, 2019",3,https://github.com/materialsproject/pymatgen/pull/2048,9 +wuxiaohua1011,4,"2020, 2021",2,https://github.com/materialsproject/pymatgen/pull/2048,9 +ml-evs,6,"2021, 2023",2,https://github.com/materialsproject/pymatgen/pull/2048,9 +resnant,4,2018,1,https://github.com/materialsproject/pymatgen/pull/2048,8 +jdagdelen,5,"2017, 2018",2,https://github.com/materialsproject/pymatgen/pull/2048,8 +Qi-max,1,2018,1,https://github.com/materialsproject/pymatgen/pull/2048,8 +jacksund,3,"2020, 2022",2,https://github.com/materialsproject/pymatgen/pull/2048,8 diff --git a/docs/fetch_pmg_contributors.py b/docs/fetch_pmg_contributors.py new file mode 100644 index 00000000000..2c63f88b07b --- /dev/null +++ b/docs/fetch_pmg_contributors.py @@ -0,0 +1,84 @@ +# %% +from __future__ import annotations + +from datetime import datetime + +import pandas as pd +import requests +from tqdm import tqdm +from docs.secrets import GH_TOKEN +from datetime import datetime + +__author__ = "Janosh Riebesell" +__date__ = "2023-08-11" + + +# %% +headers = {"Authorization": f"token {GH_TOKEN}"} # GH personal access token +base_url = "https://api.github.com/repos/materialsproject/pymatgen" + +contributors_url = f"{base_url}/stats/contributors" +pull_requests_url = f"{base_url}/pulls?state=closed&sort=created&direction=asc" + +contributors_response = requests.get(contributors_url, headers=headers).json() + + +# %% +gh_user_data = {} + +for contributor in tqdm(contributors_response): + user_url = f"https://api.github.com/users/{contributor['author']['login']}" + user_data = requests.get(user_url, headers=headers).json() + + if user_data.get("type") == "Bot": # Exclude bots + continue + gh_user_data[contributor["author"]["login"]] = user_data + + +# %% +merged_prs_per_contributor = {} +for login in tqdm(gh_user_data): + merged_pr_response = requests.get(pull_requests_url + f"&creator={login}&state=merged", headers=headers) + merged_prs_per_contributor[login] = merged_pr_response.json() + + +# %% +# Process contributor data and PRs +contributor_dict = {} +weeks_with_prs_col = "Weeks with merged PR" +n_contribs_col = "Total contributions" + +for contributor in tqdm(contributors_response): + login = contributor["author"]["login"] + if login not in merged_prs_per_contributor: + continue + pr_list = merged_prs_per_contributor[login] + + weeks_with_merged_pr = sum(1 for week in contributor["weeks"] if week["c"] > 0) + + years_with_merged_pr = {datetime.fromtimestamp(week["w"]).year for week in contributor["weeks"] if week["c"] > 0} + + + name = gh_user_data[login]["name"] + contributor_dict[name] = { + "GitHub username": login, + weeks_with_prs_col: weeks_with_merged_pr, + "Years with merged PR": ", ".join(map(str,years_with_merged_pr)), + "Number years active": len(years_with_merged_pr), + "Oldest PR": pr_list[0]["html_url"] if pr_list else None, + n_contribs_col: contributor["total"], + } + +df_contributors = pd.DataFrame(contributor_dict.values()).sort_values(n_contribs_col, ascending=False) + +today = f"{datetime.now():%Y-%m-%d}" +df_contributors.to_csv(f"{today}-top-contributors.csv", index=False) + + + +# %% +%store df_contributors # cache the data + + +# restore the data +%store -r df_contributors