Skip to content

Commit

Permalink
Only Include Required Modules in the Generated From Source Aggregate …
Browse files Browse the repository at this point in the history
…POM (#19898)

Reduced the number of modules built during From Source run
  • Loading branch information
alzimmermsft authored Mar 23, 2021
1 parent a3f213c commit cade78f
Show file tree
Hide file tree
Showing 3 changed files with 251 additions and 12 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@ jacoco.exec

# Generated client pom file
ClientAggregatePom.xml
ClientFromSourcePom.xml

# Anaconda virtual env
venv
Expand Down
23 changes: 11 additions & 12 deletions eng/pipelines/templates/jobs/ci.tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -88,25 +88,24 @@ jobs:
arguments: '--update-type library --build-type client --sr'
condition: and(succeeded(), eq(variables['ShouldRunSourceTests'],'true'))

- task: PowerShell@2
- template: ../steps/generate-project-list.yml
parameters:
Artifacts: ${{parameters.Artifacts}}
AdditionalModules: ${{parameters.AdditionalModules}}

- task: PythonScript@0
# The root pom.xml contains both track 1, data, and track 2, client libraries.
# This task calls a powershell script that will go through all of the pom files
# This task calls a Python script that will go through all of the pom files
# in the repository looking, specifically, for ones whose parent is
# azure-client-sdk-parent and create a temporary pom file with just those libraries.
# Note: It excludes items within the /eng directory otherwise we'd be picking up
# jacoco and spotbugs which are unnecessary for the From Source run.
displayName: 'Generate Aggregate Pom for From Source Build'
inputs:
pwsh: true
workingDirectory: $(Agent.BuildDirectory)
filePath: eng/scripts/Generate-Client-Aggregate-Pom.ps1
scriptPath: eng/scripts/generate_from_source_pom.py
arguments: '--project-list $(ProjectList)'
condition: and(succeeded(), eq(variables['ShouldRunSourceTests'],'true'))

- template: ../steps/generate-project-list.yml
parameters:
Artifacts: ${{parameters.Artifacts}}
AdditionalModules: ${{parameters.AdditionalModules}}

- task: Maven@3
displayName: 'Run tests'
inputs:
Expand Down Expand Up @@ -138,7 +137,7 @@ jobs:
- task: Maven@3
displayName: 'Build for From Source run'
inputs:
mavenPomFile: ClientAggregatePom.xml
mavenPomFile: ClientFromSourcePom.xml
goals: 'install'
# Build and install every track 2, client, library. The purpose is to have all of
# the libraries installed on the machine so we can simply run tests with the -amd
Expand All @@ -158,7 +157,7 @@ jobs:
- task: Maven@3
displayName: 'Run tests for From Source run'
inputs:
mavenPomFile: ClientAggregatePom.xml
mavenPomFile: ClientFromSourcePom.xml
# For the From Source runs we don't want the -am switch as we don't care about running tests for our dependencies
# but we do want the -amd switch because we want to run tests on things that depend on us.
options: ${{ parameters.TestOptions }} -pl $(PLSkipOptions)$(ProjectList) -amd -T 1C
Expand Down
239 changes: 239 additions & 0 deletions eng/scripts/generate_from_source_pom.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,239 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

# Python version 3.4 or higher is required to run this script.

# Use case: Creates an aggregate POM which contains all modules that will be required in a "From Source" run for the passed
# project list.
#
# Flags
# --project-list/--pl: List of project included in the From Source run.
#
# For example: To create an aggregate POM for Azure Storage
# python eng/scripts/generate_from_source_pom.py --pl com.azure:azure-storage-blob,com.azure:azure-storage-common,...
#
# For example: To create an aggregate POM for Azure Core
# python eng/scripts/generate_from_source_pom.py --pl com.azure:azure-core,com.azure:azure-core-amqp,com.azure:azure-core-test,...
#
# The script must be run at the root of azure-sdk-for-java.

import argparse
from datetime import timedelta
import os
import time
import xml.etree.ElementTree as ET

# Only azure-client-sdk-parent and spring-boot-starter-parent are valid parent POMs for Track 2 libraries.
valid_parents = ['azure-client-sdk-parent', 'spring-boot-starter-parent', 'azure-spring-boot-test-parent']

# From this file get to the root path of the repo.
root_path = os.path.normpath(os.path.abspath(__file__) + '/../../../')

# From the root of the repo get to the version_client.txt file in eng/versioning.
client_versions_path = os.path.normpath(root_path + '/eng/versioning/version_client.txt')

# File path where the aggregate POM will be written.
client_from_source_pom_path = os.path.join(root_path, 'ClientFromSourcePom.xml')

# Beginning XML for the aggregate POM.
pom_file_start = '''<!-- Copyright (c) Microsoft Corporation. All rights reserved. Licensed under the MIT License. -->
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.azure</groupId>
<artifactId>azure-sdk-from-source</artifactId>
<packaging>pom</packaging>
<version>1.0.0</version>
<modules>
'''

# Closing XML for the aggregate POM.
pom_file_end = ''' </modules>
</project>
'''

maven_xml_namespace = '{http://maven.apache.org/POM/4.0.0}'

# Function that creates the aggregate POM.
def create_from_source_pom(project_list: str):
project_list_identifiers = project_list.split(',')

# Get the artifact identifiers from client_versions.txt to act as our source of truth.
artifact_identifier_to_source_version = load_client_artifact_identifiers()

project_dependencies_mapping, dependency_to_project_mapping, project_to_pom_path_mapping = create_dependency_and_path_mappings(project_list_identifiers, artifact_identifier_to_source_version)

dependent_modules = []

# Resolve all projects, including transitively, that are dependent on the projects in the project list.
for project_identifier in project_list_identifiers:
if not project_identifier in project_to_pom_path_mapping:
continue

dependent_modules = resolve_dependent_project(project_identifier, dependent_modules, dependency_to_project_mapping)

# Distinct the dependent modules, even though this should be guarded, to reduce downstream processing requirements.
dependent_modules = list(set(dependent_modules))

# Sort for making debugging easier (since it is cheap)
dependent_modules.sort()

dependency_modules = []

# Resolve all dependencies of the projects in the project list and of the dependent modules.
for project_identifier in project_list_identifiers + dependent_modules:
depencency_modules = resolve_project_dependencies(project_identifier, dependency_modules, project_dependencies_mapping)

modules = []
# Finally map the project identifiers to relative module paths.
for project_identifier in project_list_identifiers + dependent_modules + depencency_modules:
if project_identifier in project_to_pom_path_mapping:
modules.append(project_to_pom_path_mapping[project_identifier])

# Distinct the modules list.
modules = list(set(modules))

# Sort the modules list for easier reading.
modules.sort()

with open(file=client_from_source_pom_path, mode='w') as fromSourcePom:
fromSourcePom.write(pom_file_start)

for module in modules:
fromSourcePom.write(' <module>{}</module>\n'.format(module))

fromSourcePom.write(pom_file_end)

# Function that loads and parses client_versions.txt into a artifact identifier - source version mapping.
def load_client_artifact_identifiers():
artifact_identifiers = {}
with open(file=client_versions_path, mode='r') as f:
for line in f:
stripped_line = line.strip()

# Skip empty lines, comments, and non-standard version lines.
if not stripped_line or stripped_line.startswith('#') or line.startswith('beta_') or line.startswith('unreleased_'):
continue

# Split the version line on ';' which should create 3 substrings of artifact identifier - released version - source version.
splitVersionLine = stripped_line.split(";")

# From the split lines create the artifact identitifer - source version map entry.
artifact_identifiers[splitVersionLine[0]]=splitVersionLine[2]

return artifact_identifiers

# Function which creates project dependencies mapping, dependencies to dependent projects mapping, and project to module relative path mapping.
def create_dependency_and_path_mappings(project_list_identifiers: list, artifact_identifier_to_source_version: dict):
project_dependencies_mapping = {}
dependency_mapping = {}
module_path_mapping = {}

for root, _, files in os.walk(root_path):
for file_name in files:
file_path = root + os.sep + file_name

# Only parse files that are pom.xml files.
if (file_name.startswith('pom') and file_name.endswith('.xml')):
add_project_to_dependency_and_module_mappings(file_path, project_dependencies_mapping, project_list_identifiers, artifact_identifier_to_source_version, dependency_mapping, module_path_mapping)

return project_dependencies_mapping, dependency_mapping, module_path_mapping

# Function that constructs the project dependencies map and adds to dependency to project map and project to module relative path map for a track 2 project.
def add_project_to_dependency_and_module_mappings(file_path: str, project_dependencies_mapping: dict, project_list_identifiers: list, artifact_identifier_to_source_version: dict, dependency_mapping: dict, module_path_mapping: dict):
if 'eng' in file_path.split(os.sep):
return

tree = ET.parse(file_path)
tree_root = tree.getroot()

project_identifier = create_artifact_identifier(tree_root)

# If the project isn't a track 2 POM skip it and not one of the project list identifiers.
if not project_identifier in project_list_identifiers and not is_track_two_pom(tree_root):
return

module_path_mapping[project_identifier] = os.path.dirname(file_path).replace(root_path, '').replace('\\', '/')

dependencies = tree_root.iter(maven_xml_namespace + 'dependency')

# If the project doesn't have a dependencies XML element skip it.
if dependencies is None:
return

if not project_identifier in project_dependencies_mapping:
project_dependencies_mapping[project_identifier] = []

for dependency in dependencies:
dependency_identifier = create_artifact_identifier(dependency)
if not dependency_identifier in artifact_identifier_to_source_version:
continue

if not dependency_identifier in dependency_mapping:
dependency_mapping[dependency_identifier] = []

project_dependencies_mapping[project_identifier].append(dependency_identifier)
dependency_mapping[dependency_identifier].append(project_identifier)

# Function which resolves the dependent projects of the project.
def resolve_dependent_project(pom_identifier: str, dependent_modules: list, dependency_to_project_mapping: dict):
if pom_identifier in dependency_to_project_mapping:
for dependency in dependency_to_project_mapping[pom_identifier]:
# Only continue if the project's dependents haven't already been resolved.
if not dependency in dependent_modules:
dependent_modules = resolve_dependent_project(dependency, dependent_modules, dependency_to_project_mapping)
dependent_modules.append(dependency)

return dependent_modules

# Function which resolves the dependencies of the project.
def resolve_project_dependencies(pom_identifier: str, dependency_modules: list, project_dependencies_mapping: dict):
if pom_identifier in project_dependencies_mapping:
for dependency in project_dependencies_mapping[pom_identifier]:
# Only continue if the project's dependencies haven't already been resolved.
if not dependency in dependency_modules:
dependency_modules = resolve_project_dependencies(dependency, dependency_modules, project_dependencies_mapping)
dependency_modules.append(dependency)

return dependency_modules

# Determines if the passed POM XML is a track 2 library.
def is_track_two_pom(tree_root: ET.Element):
parent_node = element_find(tree_root, 'parent')

return not parent_node is None and element_find(parent_node, 'artifactId').text in valid_parents

# Creates an artifacts identifier.
def create_artifact_identifier(element: ET.Element):
group_id = element_find(element, 'groupId')

# POMs allow the groupId to be inferred from the parent POM.
# This is a guard to prevent this from raising an error.
if group_id is None:
group_id = element_find(element_find(element, 'parent'), 'groupId')

return group_id.text + ':' + element_find(element, 'artifactId').text

# Helper function for finding an XML element which handles adding the namespace.
def element_find(element: ET.Element, path: str):
return element.find(maven_xml_namespace + path)

def main():
parser = argparse.ArgumentParser(description='Generated an aggregate POM for a From Source run.')
parser.add_argument('--project-list', '--pl', type=str)
args = parser.parse_args()
if args.project_list == None:
raise ValueError('Missing project list.')
start_time = time.time()
create_from_source_pom(args.project_list)
elapsed_time = time.time() - start_time

print('Effective From Source POM File')
with open(file=client_from_source_pom_path, mode='r') as fromSourcePom:
print(fromSourcePom.read())

print('elapsed_time={}'.format(elapsed_time))
print('Total time for replacement: {} seconds'.format(str(timedelta(seconds=elapsed_time))))

if __name__ == '__main__':
main()

0 comments on commit cade78f

Please sign in to comment.