From ca832b2836e0bffa7cf95589acdcd71230f5834e Mon Sep 17 00:00:00 2001 From: Pradyun Gedam Date: Sat, 24 Apr 2021 10:13:15 +0100 Subject: [PATCH] Don't split git references on unicode separators Previously, maliciously formatted tags could be used to hijack a commit-based pin. Using the fact that the split here allowed for all of unicode's whitespace characters as separators -- which git allows as a part of a tag name -- it is possible to force a different revision to be installed; if an attacker gains access to the repository. This change stops splitting the string on unicode characters, by forcing the splits to happen on newlines and ASCII spaces. --- src/pip/_internal/vcs/git.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/pip/_internal/vcs/git.py b/src/pip/_internal/vcs/git.py index 9f24ccdf5ee..b7c1b9fe7b5 100644 --- a/src/pip/_internal/vcs/git.py +++ b/src/pip/_internal/vcs/git.py @@ -131,9 +131,15 @@ def get_revision_sha(cls, dest, rev): on_returncode='ignore', ) refs = {} - for line in output.strip().splitlines(): + # NOTE: We do not use splitlines here since that would split on other + # unicode separators, which can be maliciously used to install a + # different revision. + for line in output.strip().split("\n"): + line = line.rstrip("\r") + if not line: + continue try: - ref_sha, ref_name = line.split() + ref_sha, ref_name = line.split(" ", maxsplit=2) except ValueError: # Include the offending line to simplify troubleshooting if # this error ever occurs.