Fix bug with linter targets being skipped (#10974) (#10975)

### Problem We noticed an issue where, when running the `./pants lint` command on a large number of targets in a repository, some targets were being completely skipped by the flake8 process, resulting in the flake8 linter output falsely reporting all good, when there were actually files in the repo with linter errors. The problem turned out to lie in the `group_field_sets_by_constraints` method. This method takes as its input an unsorted collection of field sets corresponding to the input targets, and groups them by their python interpreter contraint. This method is used as part of the pipeline for running the flake8 process on python source files. Internally, this method calls the python standard library `itertools.groupby` method. It turns out that `groupby` does not work as expected with unsorted input data - it generates a new sub-iterable every time the sorting key changes (in this case, the interpreter constraint), rather than creating as many sub-iterables as there were distinct sorting keys in the input data. Because we were taking the output of this method and using it in a dictionary comprehension, we were accidentally overwriting dictionary values in a non-deterministic way, resulting in some filed sets getting skipped before the flake8 process could run on them. ### Solution `group_field_sets_by_constraints` was rewritten to avoid using `itertools.groupby` altogether, so we no longer skip inputs; and a test was added to make sure that we handle unsorted field set inputs to this method correctly. Co-authored-by: gshuflin <[email protected]>
pantsbuild · Oct 16, 2020 · 79ece83 · 79ece83
1 parent d0a86f5
commit 79ece83
Show file tree

Hide file tree

Showing 2 changed files with 34 additions and 10 deletions.
diff --git a/src/python/pants/backend/python/util_rules/pex.py b/src/python/pants/backend/python/util_rules/pex.py
@@ -5,6 +5,7 @@
 import functools
 import itertools
 import logging
+from collections import defaultdict
 from dataclasses import dataclass
 from textwrap import dedent
 from typing import (
@@ -187,16 +188,19 @@ def create_from_compatibility_fields(
     def group_field_sets_by_constraints(
         cls, field_sets: Iterable[_FS], python_setup: PythonSetup
     ) -> FrozenDict["PexInterpreterConstraints", Tuple[_FS, ...]]:
-        constraints_to_field_sets = {
-            constraints: tuple(sorted(fs_collection, key=lambda fs: fs.address))
-            for constraints, fs_collection in itertools.groupby(
-                field_sets,
-                key=lambda fs: cls.create_from_compatibility_fields(
-                    [fs.compatibility], python_setup
-                ),
-            )
-        }
-        return FrozenDict(sorted(constraints_to_field_sets.items()))
+
+        results = defaultdict(set)
+
+        for fs in field_sets:
+            constraints = cls.create_from_compatibility_fields([fs.compatibility], python_setup)
+            results[constraints].add(fs)
+
+        return FrozenDict(
+            {
+                constraints: tuple(sorted(field_sets, key=lambda fs: fs.address))
+                for constraints, field_sets in sorted(results.items())
+            }
+        )
 
     def generate_pex_arg_list(self) -> List[str]:
         args = []

diff --git a/src/python/pants/backend/python/util_rules/pex_test.py b/src/python/pants/backend/python/util_rules/pex_test.py
@@ -241,6 +241,26 @@ def test_group_field_sets_by_constraints() -> None:
     )
 
 
+def test_group_field_sets_by_constraints_with_unsorted_inputs() -> None:
+    py3_fs = [
+        MockFieldSet.create_for_test("src/python/a_dir/path.py:test", "==3.6.*"),
+        MockFieldSet.create_for_test("src/python/b_dir/path.py:test", ">2.7,<3"),
+        MockFieldSet.create_for_test("src/python/c_dir/path.py:test", "==3.6.*"),
+    ]
+
+    ic_36 = PexInterpreterConstraints([Requirement.parse("CPython==3.6.*")])
+
+    output = PexInterpreterConstraints.group_field_sets_by_constraints(
+        py3_fs,
+        python_setup=create_subsystem(PythonSetup, interpreter_constraints=[]),
+    )
+
+    assert output[ic_36] == (
+        MockFieldSet.create_for_test("src/python/a_dir/path.py:test", "==3.6.*"),
+        MockFieldSet.create_for_test("src/python/c_dir/path.py:test", "==3.6.*"),
+    )
+
+
 @dataclass(frozen=True)
 class ExactRequirement:
     project_name: str