From 680b7154360e5d675f0b2f6f077b2d0eae107609 Mon Sep 17 00:00:00 2001 From: James Suplizio Date: Tue, 27 Jun 2023 13:12:50 -0700 Subject: [PATCH] Add GitHub Team User Store to tools sources (#6355) * Add GitHub Team User Store to tools sources * Output a warning if a team is found with no members * Update to add command line parsing for storage and other feedback * Remove unnecessary folder that was added and not used * Pass in storage URI on command line, use BlobUriBuilder to parse the pieces * Create the BlobUriBuilder as part of the GitHubEventClient and store that instead of the URI string --- .../GitHubTeamUserStore.sln | 25 +++ .../Constants/ProductAndTeamConstants.cs | 17 ++ .../GitHubTeamUserStore/GitHubEventClient.cs | 194 ++++++++++++++++++ .../GitHubTeamUserStore.csproj | 16 ++ .../GitHubTeamUserStore/Program.cs | 62 ++++++ .../GitHubTeamUserStore/TeamUserGenerator.cs | 133 ++++++++++++ tools/github-team-user-store/README.md | 28 +++ 7 files changed, 475 insertions(+) create mode 100644 tools/github-team-user-store/GitHubTeamUserStore/GitHubTeamUserStore.sln create mode 100644 tools/github-team-user-store/GitHubTeamUserStore/GitHubTeamUserStore/Constants/ProductAndTeamConstants.cs create mode 100644 tools/github-team-user-store/GitHubTeamUserStore/GitHubTeamUserStore/GitHubEventClient.cs create mode 100644 tools/github-team-user-store/GitHubTeamUserStore/GitHubTeamUserStore/GitHubTeamUserStore.csproj create mode 100644 tools/github-team-user-store/GitHubTeamUserStore/GitHubTeamUserStore/Program.cs create mode 100644 tools/github-team-user-store/GitHubTeamUserStore/GitHubTeamUserStore/TeamUserGenerator.cs create mode 100644 tools/github-team-user-store/README.md diff --git a/tools/github-team-user-store/GitHubTeamUserStore/GitHubTeamUserStore.sln b/tools/github-team-user-store/GitHubTeamUserStore/GitHubTeamUserStore.sln new file mode 100644 index 00000000000..8a2c1f168f5 --- /dev/null +++ b/tools/github-team-user-store/GitHubTeamUserStore/GitHubTeamUserStore.sln @@ -0,0 +1,25 @@ + +Microsoft Visual Studio Solution File, Format Version 12.00 +# Visual Studio Version 17 +VisualStudioVersion = 17.5.33530.505 +MinimumVisualStudioVersion = 10.0.40219.1 +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "GitHubTeamUserStore", "GitHubTeamUserStore\GitHubTeamUserStore.csproj", "{47699B24-3A45-47FC-B6ED-40717A3B568B}" +EndProject +Global + GlobalSection(SolutionConfigurationPlatforms) = preSolution + Debug|Any CPU = Debug|Any CPU + Release|Any CPU = Release|Any CPU + EndGlobalSection + GlobalSection(ProjectConfigurationPlatforms) = postSolution + {47699B24-3A45-47FC-B6ED-40717A3B568B}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {47699B24-3A45-47FC-B6ED-40717A3B568B}.Debug|Any CPU.Build.0 = Debug|Any CPU + {47699B24-3A45-47FC-B6ED-40717A3B568B}.Release|Any CPU.ActiveCfg = Release|Any CPU + {47699B24-3A45-47FC-B6ED-40717A3B568B}.Release|Any CPU.Build.0 = Release|Any CPU + EndGlobalSection + GlobalSection(SolutionProperties) = preSolution + HideSolutionNode = FALSE + EndGlobalSection + GlobalSection(ExtensibilityGlobals) = postSolution + SolutionGuid = {54B86434-B8CB-48F9-99EA-3013047BC952} + EndGlobalSection +EndGlobal diff --git a/tools/github-team-user-store/GitHubTeamUserStore/GitHubTeamUserStore/Constants/ProductAndTeamConstants.cs b/tools/github-team-user-store/GitHubTeamUserStore/GitHubTeamUserStore/Constants/ProductAndTeamConstants.cs new file mode 100644 index 00000000000..1dd0cbcf9e2 --- /dev/null +++ b/tools/github-team-user-store/GitHubTeamUserStore/GitHubTeamUserStore/Constants/ProductAndTeamConstants.cs @@ -0,0 +1,17 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; + +namespace GitHubTeamUserStore.Constants +{ + internal class ProductAndTeamConstants + { + // The ProductHeaderName is used to register the GitHubClient for this application + public const string ProductHeaderName = "azure-sdk-github-team-user-store"; + // Need to do this since Octokit doesn't expose the API to get team by name. + // The team Id won't change even if the team name gets modified. + public const int AzureSdkWriteTeamId = 3057675; + } +} diff --git a/tools/github-team-user-store/GitHubTeamUserStore/GitHubTeamUserStore/GitHubEventClient.cs b/tools/github-team-user-store/GitHubTeamUserStore/GitHubTeamUserStore/GitHubEventClient.cs new file mode 100644 index 00000000000..7c67b8b3a6a --- /dev/null +++ b/tools/github-team-user-store/GitHubTeamUserStore/GitHubTeamUserStore/GitHubEventClient.cs @@ -0,0 +1,194 @@ +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Threading.Tasks; +using Azure.Storage.Blobs; +using Octokit; +using GitHubTeamUserStore.Constants; + +namespace GitHubTeamUserStore +{ + public class GitHubEventClient + { + private const int MaxPageSize = 100; + // This needs to be done set because both GetAllMembers and GetAllChildTeams API calls auto-paginate + // but default to a page size of 30. Default to 100/page to reduce the number of API calls + private static ApiOptions _apiOptions = new ApiOptions() { PageSize = MaxPageSize }; + public GitHubClient _gitHubClient = null; + public int CoreRateLimit { get; set; } = 0; + private int _numRetries = 5; + private int _delayTimeInMs = 1000; + + private BlobUriBuilder AzureBlobUriBuilder { get; set; } = null; + public GitHubEventClient(string productHeaderName, string azureBlobStorageURI) + { + _gitHubClient = CreateClientWithGitHubEnvToken(productHeaderName); + Uri blobStorageUri = new Uri(azureBlobStorageURI); + BlobUriBuilder blobUriBuilder = new BlobUriBuilder(blobStorageUri); + AzureBlobUriBuilder = blobUriBuilder; + } + + /// + /// This method creates a GitHubClient using the GITHUB_TOKEN from the environment for authentication + /// + /// This is used to generate the User Agent string sent with each request. The name used should represent the product, the GitHub Organization, or the GitHub username that's using Octokit.net (in that order of preference). + /// If the product header name is null or empty + /// If there is no GITHUB_TOKEN in the environment + /// Authenticated GitHubClient + public virtual GitHubClient CreateClientWithGitHubEnvToken(string productHeaderName) + { + if (string.IsNullOrEmpty(productHeaderName)) + { + throw new ArgumentException("productHeaderName cannot be null or empty"); + } + var githubToken = Environment.GetEnvironmentVariable("GITHUB_TOKEN"); + if (string.IsNullOrEmpty(githubToken)) + { + throw new ApplicationException("GITHUB_TOKEN cannot be null or empty"); + } + var gitHubClient = new GitHubClient(new ProductHeaderValue(productHeaderName)) + { + Credentials = new Credentials(githubToken) + }; + return gitHubClient; + } + + /// + /// Using the authenticated GitHubClient, call the RateLimit API to get the rate limits. + /// + /// Octokit.MiscellaneousRateLimit which contains the rate limit information. + public async Task GetRateLimits() + { + return await _gitHubClient.RateLimit.GetRateLimits(); + } + + /// + /// Write the current rate limit and remaining number of transactions. + /// + /// Optional message to prepend to the rate limit message. + public async Task WriteRateLimits(string prependMessage = null) + { + var miscRateLimit = await GetRateLimits(); + CoreRateLimit = miscRateLimit.Resources.Core.Limit; + // Get the Minutes till reset. + TimeSpan span = miscRateLimit.Resources.Core.Reset.UtcDateTime.Subtract(DateTime.UtcNow); + // In the message, cast TotalMinutes to an int to get a whole number of minutes. + string rateLimitMessage = $"Limit={miscRateLimit.Resources.Core.Limit}, Remaining={miscRateLimit.Resources.Core.Remaining}, Limit Reset in {(int)span.TotalMinutes} minutes."; + if (prependMessage != null) + { + rateLimitMessage = $"{prependMessage} {rateLimitMessage}"; + } + Console.WriteLine(rateLimitMessage); + } + + // Given a teamId, get the Team from github. Chances are, this is only going to be used to get + // the first team + public async Task GetTeamById(int teamId) + { + return await _gitHubClient.Organization.Team.Get(teamId); + } + + /// + /// Given an Octokit.Team, call to get the team members. Note: GitHub's GetTeamMembers API gets all of the Users + /// for the team which includes all the members of child teams. + /// + /// Octokit.Team, the team whose members to retrieve. + /// IReadOnlyList of Octokit.Users + /// Thrown if GetAllMembers fails after all retries have been exhausted. + public async Task> GetTeamMembers(Team team) + { + // For the cases where exceptions/retries fail and an empty ReadOnlyList needs to be returned + List emptyUserList = new List(); + + int tryNumber = 0; + while (tryNumber < _numRetries) + { + tryNumber++; + try + { + return await _gitHubClient.Organization.Team.GetAllMembers(team.Id, _apiOptions); + } + // This is what gets thrown if we try and get a userList for certain special teams on GitHub. + // None of these teams are used directly in anything and neither team is a child team of + // azure-sdk-write. If a ForbiddenException is encountered, then report it and return an + // empty list. + catch (Octokit.ForbiddenException forbiddenEx) + { + Console.WriteLine($"{team.Name} cannot be retrieved using a GitHub PAT."); + Console.WriteLine(forbiddenEx.Message); + return emptyUserList.AsReadOnly(); + } + // The only time we should get down here is if there's an exception caused by a network hiccup. + // Sleep for a second and try again. + catch (Exception ex) + { + Console.WriteLine(ex.Message); + Console.WriteLine($"delaying {_delayTimeInMs} and retrying"); + await Task.Delay(_delayTimeInMs); + } + } + throw new ApplicationException($"Unable to get members for team {team.Name}. See above exception(s)"); + } + + /// + /// Given an Octokit.Team, call to get all child teams. + /// + /// Octokit.Team, the team whose child teams to retrieve. + /// IReadOnlyList of Octkit.Team + /// Thrown if GetAllChildTeams fails after all retries have been exhausted + public async Task> GetAllChildTeams(Team team) + { + int tryNumber = 0; + while (tryNumber < _numRetries) + { + tryNumber++; + try + { + return await _gitHubClient.Organization.Team.GetAllChildTeams(team.Id, _apiOptions); + } + // The only time we should get down here is if there's an exception caused by a network hiccup. + // Sleep for a second and try again. + catch (Exception ex) + { + Console.WriteLine(ex.Message); + Console.WriteLine($"delaying {_delayTimeInMs} and retrying"); + await Task.Delay(_delayTimeInMs); + } + } + throw new ApplicationException($"Unable to get members for team {team.Name}. See above exception(s)"); + } + + /// + /// Store the team/user blob in Azure blob storage. + /// + /// The json string, representing the team/user information, that will be uploaded to blob storage. + /// + /// If there is no AZURE_SDK_TEAM_USER_STORE_SAS in the environment + public async Task UploadToBlobStorage(string rawJson) + { + BlobServiceClient blobServiceClient = new BlobServiceClient(AzureBlobUriBuilder.ToUri()); + BlobContainerClient blobContainerClient = blobServiceClient.GetBlobContainerClient(AzureBlobUriBuilder.BlobContainerName); + BlobClient blobClient = blobContainerClient.GetBlobClient(AzureBlobUriBuilder.BlobName); + await blobClient.UploadAsync(BinaryData.FromString(rawJson), overwrite: true); + } + + /// + /// Fetch the team/user blob date from Azure Blob storage. + /// + /// The raw json string blob. + /// Thrown if the HttpResponseMessage does not contain a success status code. + public async Task GetTeamUserBlobFromStorage() + { + HttpClient client = new HttpClient(); + string blobUri = $"https://{AzureBlobUriBuilder.Host}/{AzureBlobUriBuilder.BlobContainerName}/{AzureBlobUriBuilder.BlobName}"; + HttpResponseMessage response = await client.GetAsync(blobUri); + if (response.IsSuccessStatusCode) + { + string rawJson = await response.Content.ReadAsStringAsync(); + return rawJson; + } + throw new ApplicationException($"Unable to retrieve team user data from blob storage. Status code: {response.StatusCode}, Reason {response.ReasonPhrase}"); + } + } +} diff --git a/tools/github-team-user-store/GitHubTeamUserStore/GitHubTeamUserStore/GitHubTeamUserStore.csproj b/tools/github-team-user-store/GitHubTeamUserStore/GitHubTeamUserStore/GitHubTeamUserStore.csproj new file mode 100644 index 00000000000..5393a5252e5 --- /dev/null +++ b/tools/github-team-user-store/GitHubTeamUserStore/GitHubTeamUserStore/GitHubTeamUserStore.csproj @@ -0,0 +1,16 @@ + + + + Exe + net6.0 + enable + github-team-user-store + + + + + + + + + diff --git a/tools/github-team-user-store/GitHubTeamUserStore/GitHubTeamUserStore/Program.cs b/tools/github-team-user-store/GitHubTeamUserStore/GitHubTeamUserStore/Program.cs new file mode 100644 index 00000000000..771a4bed3f8 --- /dev/null +++ b/tools/github-team-user-store/GitHubTeamUserStore/GitHubTeamUserStore/Program.cs @@ -0,0 +1,62 @@ +using System.CommandLine; +using System.Diagnostics; +using GitHubTeamUserStore.Constants; + +namespace GitHubTeamUserStore +{ + internal class Program + { + static async Task Main(string[] args) + { + var blobStorageURIOption = new Option + (name: "--blobStorageURI", + description: "The blob storage URI including the SAS."); + blobStorageURIOption.IsRequired = true; + + var rootCommand = new RootCommand + { + blobStorageURIOption, + }; + rootCommand.SetHandler(PopulateTeamUserData, + blobStorageURIOption); + + int returnCode = await rootCommand.InvokeAsync(args); + Console.WriteLine($"Exiting with return code {returnCode}"); + Environment.Exit(returnCode); + } + + private static async Task PopulateTeamUserData(string blobStorageURI) + { + + // Default the returnCode code to non-zero. If everything is successful it'll be set to 0 + int returnCode = 1; + Stopwatch stopWatch = new Stopwatch(); + stopWatch.Start(); + + GitHubEventClient gitHubEventClient = new GitHubEventClient(ProductAndTeamConstants.ProductHeaderName, blobStorageURI); + + await gitHubEventClient.WriteRateLimits("RateLimit at start of execution:"); + await TeamUserGenerator.GenerateAndStoreTeamUserList(gitHubEventClient); + await gitHubEventClient.WriteRateLimits("RateLimit at end of execution:"); + bool storedEqualsGenerated = await TeamUserGenerator.VerifyStoredTeamUsers(gitHubEventClient); + + stopWatch.Stop(); + TimeSpan ts = stopWatch.Elapsed; + string elapsedTime = String.Format("{0:00}:{1:00}:{2:00}.{3:00}", + ts.Hours, ts.Minutes, ts.Seconds, + ts.Milliseconds / 10); + Console.WriteLine($"Total run time: {elapsedTime}"); + + if (storedEqualsGenerated) + { + Console.WriteLine("List stored successfully."); + returnCode = 0; + } + else + { + Console.WriteLine("There were issues with generated vs stored data. See above for specifics."); + } + return returnCode; + } + } +} diff --git a/tools/github-team-user-store/GitHubTeamUserStore/GitHubTeamUserStore/TeamUserGenerator.cs b/tools/github-team-user-store/GitHubTeamUserStore/GitHubTeamUserStore/TeamUserGenerator.cs new file mode 100644 index 00000000000..811c2f9dfc6 --- /dev/null +++ b/tools/github-team-user-store/GitHubTeamUserStore/GitHubTeamUserStore/TeamUserGenerator.cs @@ -0,0 +1,133 @@ +using Octokit; +using System; +using System.Collections.Generic; +using System.Linq; +using System.Text; +using System.Text.Json; +using System.Threading.Tasks; +using GitHubTeamUserStore.Constants; + +namespace GitHubTeamUserStore +{ + public class TeamUserGenerator + { + // Team/User dictionary where the team name is the key and the list users is the value. It's worth noting + // that this is the list of Logins (Octokit.User.Login) which are what's used in CODEOWNERS, @mentions, etc. + private static Dictionary> _teamUserDict = new Dictionary>(); + + /// + /// Generate the team/user lists for each and every team under azure-sdk-write. Every team and user in a CODEOWNERS + /// file must have azure-sdk-write permissions in order to be in there which means every team/user will be under + /// of azure-sdk-write. This is done to limit the number of calls made because if team/user data for every team + /// under Azure was pulled there would be roughly 1933 teams. Getting the team/user data for that many teams would + /// end up taking 2500-3000 GitHub API calls whereas getting the team/user data for azure-sdk-write and its child + /// teams is less than 1/10th of that. The team/user data is serialized into json and stored in azure blob storage. + /// + /// Authenticated GitHubEventClient + /// + public static async Task GenerateAndStoreTeamUserList(GitHubEventClient gitHubEventClient) + { + Team azureSdkWrite = await gitHubEventClient.GetTeamById(ProductAndTeamConstants.AzureSdkWriteTeamId); + await CreateTeamUserEntry(gitHubEventClient, azureSdkWrite); + // Serializing the Dictionary> directly won't work with the JsonSerializer but + // a List>> will and it's easy enough to convert to/from. + var list = _teamUserDict.ToList(); + string jsonString = JsonSerializer.Serialize(list); + await gitHubEventClient.UploadToBlobStorage(jsonString); + } + + /// + /// Call GitHub to get users for the team and add a dictionary entry for the team/users. Note: GitHub returns + /// a distinct list of all of the users, including users from any/all child teams. After that, get the list + /// of child teams and recursively call GetUsersForTeam for each one of those to create their team/user entries. + /// + /// Authenticated GitHubEventClient + /// Octokit.Team to get users for. + public static async Task CreateTeamUserEntry(GitHubEventClient gitHubEventClient, Team team) + { + // If this team has already been added to the dictionary then there's nothing to do. This + // should prevent any weirdness if there ends up being some kind of circular team reference + if (_teamUserDict.ContainsKey(team.Name)) + { + return; + } + // Get all of the team members + var teamMembers = await gitHubEventClient.GetTeamMembers(team); + if (teamMembers.Count > 0) + { + // Just need a List containing the logins from the returned + // list of users. The Login is what's used in @mentions, assignments etc + var members = teamMembers.Select(s => s.Login).ToList(); + _teamUserDict.Add(team.Name, members); + } + else + { + // It seems better to report this than to add a team to the dictionary with no users + Console.WriteLine($"Warning: team {team.Name} has no members and will not be added to the dictionary."); + } + var childTeams = await gitHubEventClient.GetAllChildTeams(team); + foreach (Team childTeam in childTeams) + { + await CreateTeamUserEntry(gitHubEventClient, childTeam); + } + } + + /// + /// This method is called after the team/user data is stored in blob storage. It verifies that the + /// team/user data from blob storage is the same as the in-memory data that was used to create the blob. + /// + /// Authenticated GitHubEventClient + /// True, if the team data in storage matches the in-memory data that was used to create the blob otherwise false. + public static async Task VerifyStoredTeamUsers(GitHubEventClient gitHubEventClient) + { + bool hasError = false; + string rawJson = await gitHubEventClient.GetTeamUserBlobFromStorage(); + var list = JsonSerializer.Deserialize>>>(rawJson); + var storedDictionary = list.ToDictionary((keyItem) => keyItem.Key, (valueItem) => valueItem.Value); + + // Verify the dictionary from storage contains everything from the _teamUserDict + if (_teamUserDict.Keys.Count != storedDictionary.Keys.Count) + { + // At this point list the teams and return, don't bother looking at the users. + Console.WriteLine($"Error! Created dictionary has {_teamUserDict.Keys.Count} teams and stored dictionary has {storedDictionary.Keys.Count} teams."); + Console.WriteLine(string.Format("created list teams {0}", string.Join(", ", _teamUserDict.Keys))); + Console.WriteLine(string.Format("stored list teams {0}", string.Join(", ", storedDictionary.Keys))); + return !hasError; + } + + // If the number of teams in the dictionaries are equal, look at the users for every team. + foreach (string key in _teamUserDict.Keys) + { + var users = _teamUserDict[key]; + var storedUsers = storedDictionary[key]; + // Since these are just lists of strings, calling sort will sort them in ascending order. + // This makes things easier to find differences if there's an error + users.Sort(); + storedUsers.Sort(); + if (users.Count != storedUsers.Count) + { + hasError = true; + Console.WriteLine($"Error! Created dictionary for team {key} has {users.Count} and stored dictionary has {storedUsers.Count}"); + Console.WriteLine(string.Format("created list users {0}", string.Join(", ", users))); + Console.WriteLine(string.Format("stored list users {0}", string.Join(", ", storedUsers))); + } + else + { + foreach (var user in users) + { + // As soon as difference is found, output all the users for each team and move on to the next team + if (!storedUsers.Contains(user)) + { + hasError = true; + Console.WriteLine($"Error! Created dictionary for team {key} has different users than the stored dictionary"); + Console.WriteLine(string.Format("created list users {0}", string.Join(", ", users))); + Console.WriteLine(string.Format("stored list users {0}", string.Join(", ", storedUsers))); + break; + } + } + } + } + return !hasError; + } + } +} diff --git a/tools/github-team-user-store/README.md b/tools/github-team-user-store/README.md new file mode 100644 index 00000000000..14d84adbbed --- /dev/null +++ b/tools/github-team-user-store/README.md @@ -0,0 +1,28 @@ +# GitHub Team User Store + +## Overview + +The github-team-user-store is an internal only tool. The tool will recursively get the list of teams/users, that includes the Azure/azure-sdk-write and all of the teams within that hierarchy, and store the results as a json string in Azure blob storage. The purpose of this is to enable team usage in CODEOWNERS by allowing the CodeOwnersParser to pull the list of users when it encounters a non-user, or team, entry. The reason why this code isn't directly in the CodeOwnersParser is because it requires a specific, fine-grained Personal Access Token (PAT) in order to be able to get the users for teams which isn't something that's readily available or prudent to be added to all of the places where CodeOwnersParser is used. + +### github-team-user-store processing + +The tool uses creates a Dictionary> where the key is the team name and the value is list of users, specifically the user Logins. The Logins are what's used by GitHub in @mentions, assignments etc. The specific details about the API calls can be found in code. + +### Tool requirements + +The tool requires two things: + +1. A GitHub PAT from a user that has organization access in the GITHUB_TOKEN environment variable. This requires a GitHub fine-grained token with Organization->Membership read-only permission. A bot account won't work here, this needs to be created by a user with permissions for the org. Note: Nothing is edited here, only read. + +2. The SAS token with write permissions for the azure-sdk-write-teams container in the azuresdkartifacts storage account in the AZURE_SDK_TEAM_USER_STORE_SAS environment variable. + +Both the PAT and the SAS token are in the **azuresdkartifacts azure-sdk-write-teams variables** variable group to be used by the pipeline which will run this on, at least, a daily basis. + +### Using the store data + +The json blob is read anonymously. The json blob is created from the dictionary but, because the serializer doesn't handle Dictionary>, the dictionary is converted to a List>>. This means that anything wanting to use this data needs to convert it back to a dictionary which is easily done with the following C# code. + +```Csharp + var list = JsonSerializer.Deserialize>>>(rawJson); + var TeamUSerDictionary = list.ToDictionary((keyItem) => keyItem.Key, (valueItem) => valueItem.Value); +```