From 1143af45b4cb5ecc06b9f33b3bd44482c5786cda Mon Sep 17 00:00:00 2001
From: gshuflin <greg.shuflin@toolchain.com>
Date: Thu, 15 Oct 2020 17:55:24 -0700
Subject: [PATCH] Fix bug with linter targets being skipped (#10974)

### Problem

We noticed an issue where, when running the `./pants lint` command on a large number of targets in a repository, some targets were being completely skipped by the flake8 process, resulting in the flake8 linter output falsely reporting all good, when there were actually files in the repo with linter errors.

The problem turned out to lie in the `group_field_sets_by_constraints` method. This method takes as its input an unsorted collection of field sets corresponding to the input targets, and groups them by their python interpreter contraint. This method is used as part of the pipeline for running the flake8 process on python source files.

Internally, this method calls the python standard library `itertools.groupby` method. It turns out that `groupby` does not work as expected with unsorted input data - it generates a new sub-iterable every time the sorting key changes (in this case, the interpreter constraint), rather than creating as many sub-iterables as there were distinct sorting keys in the input data. Because we were taking the output of this method and using it in a dictionary comprehension, we were accidentally overwriting dictionary values in a non-deterministic way, resulting in some filed sets getting skipped before the flake8 process could run on them.

### Solution

`group_field_sets_by_constraints` was rewritten to avoid using `itertools.groupby` altogether, so we no longer skip inputs; and a test was added to make sure that we handle unsorted field set inputs to this method correctly.
---
 .../pants/backend/python/util_rules/pex.py    | 24 +++++++++++--------
 .../backend/python/util_rules/pex_test.py     | 20 ++++++++++++++++
 2 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/src/python/pants/backend/python/util_rules/pex.py b/src/python/pants/backend/python/util_rules/pex.py
index 1254d11eb9d..3a27bff96d1 100644
--- a/src/python/pants/backend/python/util_rules/pex.py
+++ b/src/python/pants/backend/python/util_rules/pex.py
@@ -5,6 +5,7 @@
 import functools
 import itertools
 import logging
+from collections import defaultdict
 from dataclasses import dataclass
 from textwrap import dedent
 from typing import (
@@ -187,16 +188,19 @@ def create_from_compatibility_fields(
     def group_field_sets_by_constraints(
         cls, field_sets: Iterable[_FS], python_setup: PythonSetup
     ) -> FrozenDict["PexInterpreterConstraints", Tuple[_FS, ...]]:
-        constraints_to_field_sets = {
-            constraints: tuple(sorted(fs_collection, key=lambda fs: fs.address))
-            for constraints, fs_collection in itertools.groupby(
-                field_sets,
-                key=lambda fs: cls.create_from_compatibility_fields(
-                    [fs.compatibility], python_setup
-                ),
-            )
-        }
-        return FrozenDict(sorted(constraints_to_field_sets.items()))
+
+        results = defaultdict(set)
+
+        for fs in field_sets:
+            constraints = cls.create_from_compatibility_fields([fs.compatibility], python_setup)
+            results[constraints].add(fs)
+
+        return FrozenDict(
+            {
+                constraints: tuple(sorted(field_sets, key=lambda fs: fs.address))
+                for constraints, field_sets in sorted(results.items())
+            }
+        )
 
     def generate_pex_arg_list(self) -> List[str]:
         args = []
diff --git a/src/python/pants/backend/python/util_rules/pex_test.py b/src/python/pants/backend/python/util_rules/pex_test.py
index 4745f435250..146c0538689 100644
--- a/src/python/pants/backend/python/util_rules/pex_test.py
+++ b/src/python/pants/backend/python/util_rules/pex_test.py
@@ -241,6 +241,26 @@ def test_group_field_sets_by_constraints() -> None:
     )
 
 
+def test_group_field_sets_by_constraints_with_unsorted_inputs() -> None:
+    py3_fs = [
+        MockFieldSet.create_for_test("src/python/a_dir/path.py:test", "==3.6.*"),
+        MockFieldSet.create_for_test("src/python/b_dir/path.py:test", ">2.7,<3"),
+        MockFieldSet.create_for_test("src/python/c_dir/path.py:test", "==3.6.*"),
+    ]
+
+    ic_36 = PexInterpreterConstraints([Requirement.parse("CPython==3.6.*")])
+
+    output = PexInterpreterConstraints.group_field_sets_by_constraints(
+        py3_fs,
+        python_setup=create_subsystem(PythonSetup, interpreter_constraints=[]),
+    )
+
+    assert output[ic_36] == (
+        MockFieldSet.create_for_test("src/python/a_dir/path.py:test", "==3.6.*"),
+        MockFieldSet.create_for_test("src/python/c_dir/path.py:test", "==3.6.*"),
+    )
+
+
 @dataclass(frozen=True)
 class ExactRequirement:
     project_name: str