Skip to content

Commit

Permalink
[IntelliJ] Export only modulizable targets when in export-dep-as-jar (
Browse files Browse the repository at this point in the history
#8812)

### Problem

As an optimization for exporting source dependencies as jars, we want to export `target` entries only for targets that are either:
- Target Roots, or
- Between target roots (e.g. if A -> B -> C and we `./pants export-dep-as-jar :A :C`, we want `:B` to also be in the output).

### Solution

Split the function `generate_targets_map` into three parts:
1.- We iterate over all targets to create the resource map. This is `O(|all_targets|)`
2.- We iterate over all targets to fill up the `graph_info['libraries']` section. This is also `O(|all_targets|)`.
3.- We calculate the modulizable targets (that fulfil the constraints above), and populate `targets_map` with entries for those. Figuring out the modulizable targets boils down to a call to `BuildGraph.transitive_dependees_of_addresses`, which is a DFS graph traversal. Populating `targets_map` can be quadratic, because for each modulizable target we must walk its transitive dependency graph to include all of those dependencies as libraries.
  • Loading branch information
blorente authored and wisechengyi committed Jan 6, 2020
1 parent ca9ed54 commit d33e078
Show file tree
Hide file tree
Showing 4 changed files with 237 additions and 94 deletions.
24 changes: 24 additions & 0 deletions src/docs/export.md
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,30 @@ The following is an abbreviated export file from a command in the pants repo:

# Export Format Changes

## 1.0.14

Export only modulizable targets for `export-dep-as-jar`, and the rest of targets will appear as libraries.

Definition of `modulizable_targets`:
1. Conceptually: targets that should appear as modules in IntelliJ.
2. Computationally: dependees of target roots within the transitive context.

For example, A -> B -> C -> D
Given `./panst export-dep-as-jar A`,
```
modulizable_targets = [A]
libraries = [B,C,D]
```

Given `./panst export-dep-as-jar A C`,
```
modulizable_targets = [A, B, C]
libraries = [D]
```
In this case, `B` is forced into a module even though it is not a target root because IntelliJ
does not allow a library to depend back onto a source module,
i.e. `B` has to be a module to be able to depend on `C`.

## 1.0.13

Add `--available-target-types` option, which exports currently available target types.
Expand Down
175 changes: 106 additions & 69 deletions src/python/pants/backend/project_info/tasks/export_dep_as_jar.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,7 @@
import zipfile
from collections import defaultdict

from twitter.common.collections import OrderedSet

from pants.backend.jvm.subsystems.dependency_context import DependencyContext
from pants.backend.jvm.subsystems.jvm_platform import JvmPlatform
from pants.backend.jvm.subsystems.scala_platform import ScalaPlatform
from pants.backend.jvm.targets.jar_library import JarLibrary
Expand All @@ -33,6 +32,10 @@ class ExportDepAsJar(ConsoleTask):
jvm dependencies instead of sources.
"""

@classmethod
def subsystem_dependencies(cls):
return super().subsystem_dependencies() + (DependencyContext,)

@classmethod
def register_options(cls, register):
super().register_options(register)
Expand Down Expand Up @@ -140,7 +143,18 @@ def _zip_sources(target, location, suffix='.jar'):
zip_file.write(os.path.join(get_buildroot(), src_from_build_root), src_from_source_root)
return f

def _process_target(self, current_target, target_roots_set, resource_target_map, runtime_classpath):
def _dependencies_to_include_in_libraries(self, t, modulizable_target_set):
dependencies_to_include = set([])
self.context.build_graph.walk_transitive_dependency_graph(
[direct_dep.address for direct_dep in t.dependencies],
# NB: Dependency graph between modulizable targets is represented with modules,
# so we don't need to expand those branches of the dep graph.
predicate=lambda dep: dep not in modulizable_target_set,
work=lambda dep: dependencies_to_include.add(dep),
)
return dependencies_to_include

def _process_target(self, current_target, modulizable_target_set, resource_target_map, runtime_classpath):
"""
:type current_target:pants.build_graph.target.Target
"""
Expand All @@ -153,7 +167,7 @@ def _process_target(self, current_target, target_roots_set, resource_target_map,
'target_type': ExportDepAsJar._get_target_type(current_target, resource_target_map),
'is_synthetic': current_target.is_synthetic,
'pants_target_type': self._get_pants_target_alias(type(current_target)),
'is_target_root': current_target in target_roots_set,
'is_target_root': current_target in modulizable_target_set,
'transitive': current_target.transitive,
'scope': str(current_target.scope)
}
Expand All @@ -175,16 +189,36 @@ def iter_transitive_jars(jar_lib):
# libraries dict and here we just want the key into that dict (see `_jar_id`).
yield M2Coordinate(org=coordinate.org, name=coordinate.name, rev=coordinate.rev)

target_libraries = OrderedSet()
if isinstance(current_target, JarLibrary):
target_libraries = OrderedSet(iter_transitive_jars(current_target))
for dep in current_target.dependencies:
info['targets'].append(dep.address.spec)
if isinstance(dep, JarLibrary):
for jar in dep.jar_dependencies:
target_libraries.add(M2Coordinate(jar.org, jar.name, jar.rev))
def _full_library_set_for_target(target):
"""
Get the full library set for a target, including jar dependencies and jars of the library itself.
"""
libraries = set([])
if isinstance(target, JarLibrary):
jars = set([])
for jar in target.jar_dependencies:
jars.add(M2Coordinate(jar.org, jar.name, jar.rev))
# Add all the jars pulled in by this jar_library
target_libraries.update(iter_transitive_jars(dep))
jars.update(iter_transitive_jars(target))
libraries = [self._jar_id(jar) for jar in jars]
else:
libraries.add(target.id)
return libraries

libraries_for_target = set([self._jar_id(jar) for jar in iter_transitive_jars(current_target)])
for dep in self._dependencies_to_include_in_libraries(current_target, modulizable_target_set):
libraries_for_target.update(_full_library_set_for_target(dep))
info['libraries'].extend(libraries_for_target)

if current_target in modulizable_target_set:
info['roots'] = [{
'source_root': os.path.realpath(source_root_package_prefix[0]),
'package_prefix': source_root_package_prefix[1]
} for source_root_package_prefix in self._source_roots_for_target(current_target)]

for dep in current_target.dependencies:
if dep in modulizable_target_set:
info['targets'].append(dep.address.spec)

if isinstance(current_target, ScalaLibrary):
for dep in current_target.java_sources:
Expand All @@ -196,18 +230,9 @@ def iter_transitive_jars(jar_lib):
if hasattr(current_target, 'test_platform'):
info['test_platform'] = current_target.test_platform.name

if runtime_classpath:
info['libraries'].extend(self._jar_id(lib) for lib in target_libraries)

if current_target in target_roots_set:
info['roots'] = [{
'source_root': os.path.realpath(source_root_package_prefix[0]),
'package_prefix': source_root_package_prefix[1]
} for source_root_package_prefix in self._source_roots_for_target(current_target)]

return info

def initialize_graph_info(self, targets_map):
def initialize_graph_info(self):
scala_platform = ScalaPlatform.global_instance()
scala_platform_map = {
'scala_version': scala_platform.version,
Expand All @@ -229,7 +254,7 @@ def initialize_graph_info(self, targets_map):

graph_info = {
'version': DEFAULT_EXPORT_VERSION,
'targets': targets_map,
'targets': {},
'jvm_platforms': jvm_platforms_map,
'scala_platform': scala_platform_map,
# `jvm_distributions` are static distribution settings from config,
Expand All @@ -252,67 +277,79 @@ def initialize_graph_info(self, targets_map):

return graph_info

def generate_targets_map(self, all_targets, runtime_classpath):
def _get_all_targets(self, targets):
additional_java_targets = []
for t in targets:
if isinstance(t, ScalaLibrary):
additional_java_targets.extend(t.java_sources)
targets.extend(additional_java_targets)
return set(targets)

def _get_targets_to_make_into_modules(self, target_roots_set):
target_root_addresses = [t.address for t in target_roots_set]
dependees_of_target_roots = self.context.build_graph.transitive_dependees_of_addresses(target_root_addresses)
return dependees_of_target_roots

def _make_libraries_entry(self, target, resource_target_map, runtime_classpath):
# Using resolved path in preparation for VCFS.
resource_jar_root = os.path.realpath(self.versioned_workdir)
library_entry = {}
target_type = ExportDepAsJar._get_target_type(target, resource_target_map)
if target_type == SourceRootTypes.RESOURCE or target_type == SourceRootTypes.TEST_RESOURCE:
# yic assumed that the cost to fingerprint the target may not be that lower than
# just zipping up the resources anyway.
jarred_resources = ExportDepAsJar._zip_sources(target, resource_jar_root)
library_entry['default'] = jarred_resources.name
else:
jar_products = runtime_classpath.get_for_target(target)
for conf, jar_entry in jar_products:
# TODO(yic): check --compile-rsc-use-classpath-jars is enabled.
# If not, zip up the classes/ dir here.
if 'z.jar' in jar_entry:
library_entry[conf] = jar_entry
if self.get_options().sources:
# NB: We create the jar in the same place as we create the resources
# (as opposed to where we store the z.jar), because the path to the z.jar depends
# on tasks outside of this one.
# In addition to that, we may not want to depend on z.jar existing to export source jars.
jarred_sources = ExportDepAsJar._zip_sources(target, resource_jar_root, suffix='-sources.jar')
library_entry['sources'] = jarred_sources.name
return library_entry

def generate_targets_map(self, targets, runtime_classpath):
"""Generates a dictionary containing all pertinent information about the target graph.
The return dictionary is suitable for serialization by json.dumps.
:param all_targets: The list of targets to generate the map for.
:param classpath_products: Optional classpath_products. If not provided when the --libraries
option is `True`, this task will perform its own jar resolution.
"""
target_roots_set = set(self.context.target_roots)

all_targets = self._get_all_targets(targets)
libraries_map = self._resolve_jars_info(all_targets, runtime_classpath)

targets_map = {}
resource_target_map = {}
target_roots_set = set(self.context.target_roots)

additional_java_targets = []
for t in all_targets:
if isinstance(t, ScalaLibrary):
additional_java_targets.extend(t.java_sources)

all_targets.extend(additional_java_targets)
for dep in t.dependencies:
if isinstance(dep, Resources):
resource_target_map[dep] = t

for target in all_targets:
info = self._process_target(target, target_roots_set, resource_target_map, runtime_classpath)
targets_map[target.address.spec] = info
modulizable_targets = self._get_targets_to_make_into_modules(target_roots_set)
non_modulizable_targets = all_targets.difference(modulizable_targets)

for dep in target.dependencies:
if isinstance(dep, Resources):
resource_target_map[dep] = target
for t in non_modulizable_targets:
libraries_map[t.id] = self._make_libraries_entry(t, resource_target_map, runtime_classpath)

graph_info = self.initialize_graph_info(targets_map)
graph_info['libraries'] = self._resolve_jars_info(all_targets, runtime_classpath)
for target in modulizable_targets:
info = self._process_target(target, modulizable_targets, resource_target_map, runtime_classpath)
targets_map[target.address.spec] = info

# Using resolved path in preparation for VCFS.
resource_jar_root = os.path.realpath(self.versioned_workdir)
for t in all_targets:
target_type = ExportDepAsJar._get_target_type(t, resource_target_map)
# If it is a target root or it is already a jar_library target, then no-op.
if t in target_roots_set or targets_map[t.address.spec]['pants_target_type'] == 'jar_library':
continue

targets_map[t.address.spec]['pants_target_type'] = 'jar_library'
targets_map[t.address.spec]['libraries'] = [t.id]

if target_type == SourceRootTypes.RESOURCE or target_type == SourceRootTypes.TEST_RESOURCE:
# yic assumed that the cost to fingerprint the target may not be that lower than
# just zipping up the resources anyway.
jarred_resources = ExportDepAsJar._zip_sources(t, resource_jar_root)
graph_info['libraries'][t.id]['default'] = jarred_resources.name
else:
jar_products = runtime_classpath.get_for_target(t)
for conf, jar_entry in jar_products:
# TODO(yic): check --compile-rsc-use-classpath-jars is enabled.
# If not, zip up the classes/ dir here.
if 'z.jar' in jar_entry:
graph_info['libraries'][t.id][conf] = jar_entry
if self.get_options().sources:
# NB: We create the jar in the same place as we create the resources
# (as opposed to where we store the z.jar), because the path to the z.jar depends
# on tasks outside of this one.
# In addition to that, we may not want to depend on z.jar existing to export source jars.
jarred_sources = ExportDepAsJar._zip_sources(t, resource_jar_root, suffix='-sources.jar')
graph_info['libraries'][t.id]['sources'] = jarred_sources.name
graph_info = self.initialize_graph_info()
graph_info['targets'] = targets_map
graph_info['libraries'] = libraries_map

return graph_info

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@
#
# Note format changes in src/docs/export.md and update the Changelog section.
#
DEFAULT_EXPORT_VERSION = '1.0.13'
DEFAULT_EXPORT_VERSION = '1.0.14'
Loading

0 comments on commit d33e078

Please sign in to comment.