Skip to content

Commit

Permalink
New hook 'destroyed-symlinks' to detect symlinks which are changed to…
Browse files Browse the repository at this point in the history
… regular files with a content of a path which that symlink was pointing to; move zsplit to util
  • Loading branch information
m-khvoinitsky authored and asottile committed Nov 18, 2020
1 parent 14e9f0e commit 1e87d59
Show file tree
Hide file tree
Showing 9 changed files with 204 additions and 18 deletions.
6 changes: 6 additions & 0 deletions .pre-commit-hooks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,12 @@
entry: debug-statement-hook
language: python
types: [python]
- id: destroyed-symlinks
name: Detect Destroyed Symlinks
description: Detects symlinks which are changed to regular files with a content of a path which that symlink was pointing to.
entry: destroyed-symlinks
language: python
types: [file]
- id: detect-aws-credentials
name: Detect AWS Credentials
description: Detects *your* aws credentials from the aws cli credentials file
Expand Down
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,12 @@ Attempts to load all yaml files to verify syntax.
#### `debug-statements`
Check for debugger imports and py37+ `breakpoint()` calls in python source.

#### `destroyed-symlinks`
Detects symlinks which are changed to regular files with a content of a path
which that symlink was pointing to.
This usually happens on Windows when a user clones a repository that has
symlinks but they do not have the permission to create symlinks.

#### `detect-aws-credentials`
Checks for the existence of AWS secrets that you have set up with the AWS CLI.
The following arguments are available:
Expand Down
9 changes: 1 addition & 8 deletions pre_commit_hooks/check_executables_have_shebangs.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,11 @@
from typing import Set

from pre_commit_hooks.util import cmd_output
from pre_commit_hooks.util import zsplit

EXECUTABLE_VALUES = frozenset(('1', '3', '5', '7'))


def zsplit(s: str) -> List[str]:
s = s.strip('\0')
if s:
return s.split('\0')
else:
return []


def check_executables(paths: List[str]) -> int:
if sys.platform == 'win32': # pragma: win32 cover
return _check_git_filemode(paths)
Expand Down
96 changes: 96 additions & 0 deletions pre_commit_hooks/destroyed_symlinks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
import argparse
import shlex
import subprocess
from typing import List
from typing import Optional
from typing import Sequence

from pre_commit_hooks.util import cmd_output
from pre_commit_hooks.util import zsplit

ORDINARY_CHANGED_ENTRIES_MARKER = '1'
PERMS_LINK = '120000'
PERMS_NONEXIST = '000000'


def find_destroyed_symlinks(files: Sequence[str]) -> List[str]:
destroyed_links: List[str] = []
if not files:
return destroyed_links
for line in zsplit(
cmd_output('git', 'status', '--porcelain=v2', '-z', '--', *files),
):
splitted = line.split(' ')
if splitted and splitted[0] == ORDINARY_CHANGED_ENTRIES_MARKER:
# https://git-scm.com/docs/git-status#_changed_tracked_entries
(
_, _, _,
mode_HEAD,
mode_index,
_,
hash_HEAD,
hash_index,
*path_splitted,
) = splitted
path = ' '.join(path_splitted)
if (
mode_HEAD == PERMS_LINK and
mode_index != PERMS_LINK and
mode_index != PERMS_NONEXIST
):
if hash_HEAD == hash_index:
# if old and new hashes are equal, it's not needed to check
# anything more, we've found a destroyed symlink for sure
destroyed_links.append(path)
else:
# if old and new hashes are *not* equal, it doesn't mean
# that everything is OK - new file may be altered
# by something like trailing-whitespace and/or
# mixed-line-ending hooks so we need to go deeper
SIZE_CMD = ('git', 'cat-file', '-s')
size_index = int(cmd_output(*SIZE_CMD, hash_index).strip())
size_HEAD = int(cmd_output(*SIZE_CMD, hash_HEAD).strip())

# in the worst case new file may have CRLF added
# so check content only if new file is bigger
# not more than 2 bytes compared to the old one
if size_index <= size_HEAD + 2:
head_content = subprocess.check_output(
('git', 'cat-file', '-p', hash_HEAD),
).rstrip()
index_content = subprocess.check_output(
('git', 'cat-file', '-p', hash_index),
).rstrip()
if head_content == index_content:
destroyed_links.append(path)
return destroyed_links


def main(argv: Optional[Sequence[str]] = None) -> int:
parser = argparse.ArgumentParser()
parser.add_argument('filenames', nargs='*', help='Filenames to check.')
args = parser.parse_args(argv)
destroyed_links = find_destroyed_symlinks(files=args.filenames)
if destroyed_links:
print('Destroyed symlinks:')
for destroyed_link in destroyed_links:
print(f'- {destroyed_link}')
print('You should unstage affected files:')
print(
'\tgit reset HEAD -- {}'.format(
' '.join(shlex.quote(link) for link in destroyed_links),
),
)
print(
'And retry commit. As a long term solution '
'you may try to explicitly tell git that your '
'environment does not support symlinks:',
)
print('\tgit config core.symlinks false')
return 1
else:
return 0


if __name__ == '__main__':
exit(main())
9 changes: 9 additions & 0 deletions pre_commit_hooks/util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import subprocess
from typing import Any
from typing import List
from typing import Optional
from typing import Set

Expand All @@ -22,3 +23,11 @@ def cmd_output(*cmd: str, retcode: Optional[int] = 0, **kwargs: Any) -> str:
if retcode is not None and proc.returncode != retcode:
raise CalledProcessError(cmd, retcode, proc.returncode, stdout, stderr)
return stdout


def zsplit(s: str) -> List[str]:
s = s.strip('\0')
if s:
return s.split('\0')
else:
return []
1 change: 1 addition & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ console_scripts =
check-xml = pre_commit_hooks.check_xml:main
check-yaml = pre_commit_hooks.check_yaml:main
debug-statement-hook = pre_commit_hooks.debug_statement_hook:main
destroyed-symlinks = pre_commit_hooks.destroyed_symlinks:main
detect-aws-credentials = pre_commit_hooks.detect_aws_credentials:main
detect-private-key = pre_commit_hooks.detect_private_key:main
double-quote-string-fixer = pre_commit_hooks.string_fixer:main
Expand Down
10 changes: 0 additions & 10 deletions tests/check_executables_have_shebangs_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,16 +102,6 @@ def test_check_git_filemode_failing(tmpdir):
assert check_executables_have_shebangs._check_git_filemode(files) == 1


@pytest.mark.parametrize('out', ('\0f1\0f2\0', '\0f1\0f2', 'f1\0f2\0'))
def test_check_zsplits_correctly(out):
assert check_executables_have_shebangs.zsplit(out) == ['f1', 'f2']


@pytest.mark.parametrize('out', ('\0\0', '\0', ''))
def test_check_zsplit_returns_empty(out):
assert check_executables_have_shebangs.zsplit(out) == []


@pytest.mark.parametrize(
('content', 'mode', 'expected'),
(
Expand Down
74 changes: 74 additions & 0 deletions tests/destroyed_symlinks_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
import os
import subprocess

import pytest

from pre_commit_hooks.destroyed_symlinks import find_destroyed_symlinks
from pre_commit_hooks.destroyed_symlinks import main

TEST_SYMLINK = 'test_symlink'
TEST_SYMLINK_TARGET = '/doesnt/really/matters'
TEST_FILE = 'test_file'
TEST_FILE_RENAMED = f'{TEST_FILE}_renamed'


@pytest.fixture
def repo_with_destroyed_symlink(tmpdir):
source_repo = tmpdir.join('src')
os.makedirs(source_repo, exist_ok=True)
test_repo = tmpdir.join('test')
with source_repo.as_cwd():
subprocess.check_call(('git', 'init'))
os.symlink(TEST_SYMLINK_TARGET, TEST_SYMLINK)
with open(TEST_FILE, 'w') as f:
print('some random content', file=f)
subprocess.check_call(('git', 'add', '.'))
subprocess.check_call(
('git', 'commit', '--no-gpg-sign', '-m', 'initial'),
)
assert b'120000 ' in subprocess.check_output(
('git', 'cat-file', '-p', 'HEAD^{tree}'),
)
subprocess.check_call(
('git', '-c', 'core.symlinks=false', 'clone', source_repo, test_repo),
)
with test_repo.as_cwd():
subprocess.check_call(
('git', 'config', '--local', 'core.symlinks', 'true'),
)
subprocess.check_call(('git', 'mv', TEST_FILE, TEST_FILE_RENAMED))
assert not os.path.islink(test_repo.join(TEST_SYMLINK))
yield test_repo


def test_find_destroyed_symlinks(repo_with_destroyed_symlink):
with repo_with_destroyed_symlink.as_cwd():
assert find_destroyed_symlinks([]) == []
assert main([]) == 0

subprocess.check_call(('git', 'add', TEST_SYMLINK))
assert find_destroyed_symlinks([TEST_SYMLINK]) == [TEST_SYMLINK]
assert find_destroyed_symlinks([]) == []
assert main([]) == 0
assert find_destroyed_symlinks([TEST_FILE_RENAMED, TEST_FILE]) == []
ALL_STAGED = [TEST_SYMLINK, TEST_FILE_RENAMED]
assert find_destroyed_symlinks(ALL_STAGED) == [TEST_SYMLINK]
assert main(ALL_STAGED) != 0

with open(TEST_SYMLINK, 'a') as f:
print(file=f) # add trailing newline
subprocess.check_call(['git', 'add', TEST_SYMLINK])
assert find_destroyed_symlinks(ALL_STAGED) == [TEST_SYMLINK]
assert main(ALL_STAGED) != 0

with open(TEST_SYMLINK, 'w') as f:
print('0' * len(TEST_SYMLINK_TARGET), file=f)
subprocess.check_call(('git', 'add', TEST_SYMLINK))
assert find_destroyed_symlinks(ALL_STAGED) == []
assert main(ALL_STAGED) == 0

with open(TEST_SYMLINK, 'w') as f:
print('0' * (len(TEST_SYMLINK_TARGET) + 3), file=f)
subprocess.check_call(('git', 'add', TEST_SYMLINK))
assert find_destroyed_symlinks(ALL_STAGED) == []
assert main(ALL_STAGED) == 0
11 changes: 11 additions & 0 deletions tests/util_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from pre_commit_hooks.util import CalledProcessError
from pre_commit_hooks.util import cmd_output
from pre_commit_hooks.util import zsplit


def test_raises_on_error():
Expand All @@ -12,3 +13,13 @@ def test_raises_on_error():
def test_output():
ret = cmd_output('sh', '-c', 'echo hi')
assert ret == 'hi\n'


@pytest.mark.parametrize('out', ('\0f1\0f2\0', '\0f1\0f2', 'f1\0f2\0'))
def test_check_zsplits_str_correctly(out):
assert zsplit(out) == ['f1', 'f2']


@pytest.mark.parametrize('out', ('\0\0', '\0', ''))
def test_check_zsplit_returns_empty(out):
assert zsplit(out) == []

0 comments on commit 1e87d59

Please sign in to comment.