-
-
Notifications
You must be signed in to change notification settings - Fork 718
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
New hook 'destroyed-symlinks' to detect symlinks which are changed to…
… regular files with a content of a path which that symlink was pointing to
- Loading branch information
1 parent
e1668fe
commit 6bb4c2a
Showing
5 changed files
with
171 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,90 @@ | ||
import argparse | ||
import sys | ||
from operator import methodcaller | ||
from subprocess import check_call | ||
from subprocess import check_output | ||
from typing import Optional | ||
from typing import Sequence | ||
|
||
ORDINARY_CHANGED_ENTRIES_MARKER = b'1' | ||
PERMS_LINK = b'120000' | ||
PERMS_NONEXIST = b'000000' | ||
|
||
|
||
def normalize_content(content: bytes) -> bytes: | ||
return b'\n'.join( | ||
filter( | ||
None, | ||
map( | ||
methodcaller('strip'), | ||
content.splitlines(), | ||
), | ||
), | ||
) | ||
|
||
|
||
def find_destroyed_symlinks(autofix: bool) -> Sequence[bytes]: | ||
destroyed_links = [] | ||
for line in check_output(['git', 'status', '--porcelain=v2', '-z']).split(b'\0'): | ||
splitted = line.split(b' ') | ||
if splitted and splitted[0] == ORDINARY_CHANGED_ENTRIES_MARKER: | ||
# variable names are taken from https://git-scm.com/docs/git-status#_changed_tracked_entries | ||
_, XY, sub, mH, mI, mW, hH, hI, *path_splitted = splitted | ||
path = b' '.join(path_splitted) | ||
if all(( | ||
mH == PERMS_LINK, | ||
mI != PERMS_LINK, | ||
mI != PERMS_NONEXIST, | ||
)): | ||
found_destroyed_link = False | ||
if hH == hI: | ||
# if old and new hashes are equal, it's not needed to check anything more, we've found a destroyed symlink for sure | ||
found_destroyed_link = True | ||
else: | ||
# if old and new hashes are *not* equal, it doesn't mean that everything is OK - | ||
# new file may be altered by something like trailing-whitespace and/or mixed-line-ending hooks so we need to go deeper | ||
index_size = int(check_output(['git', 'cat-file', '-s', hI]).strip()) | ||
# Most filesystems limit path length to 4096 bytes. In the worst (insane) case when symlink points to a file which path | ||
# consists of pure newlines and slashes, after converting it to Windows line break, its size in any case won't be bigger | ||
# than 4096*2, so if new file is bigger than this, we can safely assume that it is not destroyed symlink but | ||
# a valid new file instead of the symlink. | ||
if index_size <= 8192: | ||
head_content = normalize_content(check_output(['git', 'cat-file', '-p', hH])) | ||
index_content = normalize_content(check_output(['git', 'cat-file', '-p', hI])) | ||
found_destroyed_link = head_content == index_content | ||
if found_destroyed_link: | ||
destroyed_links.append(path) | ||
if autofix: | ||
check_call([ | ||
'git', | ||
'update-index', | ||
'--cacheinfo', | ||
b','.join(( | ||
PERMS_LINK, | ||
hH, | ||
path, | ||
)), | ||
]) | ||
return destroyed_links | ||
|
||
|
||
def main(argv: Optional[Sequence[str]] = None) -> int: | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument('--autofix', action='store_true', help='unstage broken symlinks') | ||
args = parser.parse_args(argv) | ||
destroyed_links = find_destroyed_symlinks( | ||
autofix=args.autofix, | ||
) | ||
if destroyed_links: | ||
print('Destroyed symlinks:', flush=True) | ||
for destroyed_link in destroyed_links: | ||
sys.stdout.buffer.write(b'- ') | ||
sys.stdout.buffer.write(destroyed_link) | ||
sys.stdout.buffer.write(b'\n') | ||
sys.stdout.buffer.flush() | ||
return 1 | ||
return 0 | ||
|
||
|
||
if __name__ == '__main__': | ||
exit(main()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
import os | ||
from subprocess import check_call | ||
from subprocess import check_output | ||
|
||
import pytest | ||
|
||
from pre_commit_hooks.destroyed_symlinks import find_destroyed_symlinks | ||
from pre_commit_hooks.destroyed_symlinks import main | ||
from pre_commit_hooks.destroyed_symlinks import normalize_content | ||
|
||
TEST_SYMLINK = 'test_symlink' | ||
|
||
|
||
@pytest.fixture | ||
def repo_with_destroyed_symlink(tmpdir): | ||
source_repo = tmpdir.join('src') | ||
os.makedirs(source_repo, exist_ok=True) | ||
test_repo = tmpdir.join('test') | ||
with source_repo.as_cwd(): | ||
check_call(['git', 'init']) | ||
os.symlink('/doesnt/really/matters', TEST_SYMLINK) | ||
check_call(['git', 'add', '.']) | ||
check_call(['git', 'commit', '--no-gpg-sign', '-m', 'initial']) | ||
assert check_output(['git', 'cat-file', '-p', 'HEAD^{tree}']).startswith(b'120000') | ||
check_call(['git', '-c', 'core.symlinks=false', 'clone', source_repo, test_repo]) | ||
assert not os.path.islink(test_repo.join(TEST_SYMLINK)) | ||
yield test_repo | ||
|
||
|
||
@pytest.mark.parametrize( | ||
('content', 'result'), | ||
( | ||
(b'qwer', b'qwer'), | ||
(b'qwer\n', b'qwer'), | ||
(b'qwer\nasdf', b'qwer\nasdf'), | ||
(b'qwer\r\nasdf', b'qwer\nasdf'), | ||
(b' qwer\r\n\tasdf \r\n', b'qwer\nasdf'), | ||
), | ||
) | ||
def test_normalize_content(content: bytes, result: bytes) -> None: | ||
assert normalize_content(content) == result | ||
|
||
|
||
def test_find_destroyed_symlinks(repo_with_destroyed_symlink): | ||
with repo_with_destroyed_symlink.as_cwd(): | ||
assert find_destroyed_symlinks(autofix=False) == [] | ||
assert main([]) == 0 | ||
check_call(['git', 'add', TEST_SYMLINK]) | ||
assert find_destroyed_symlinks(autofix=False) == [TEST_SYMLINK.encode()] | ||
assert main([]) != 0 | ||
assert find_destroyed_symlinks(autofix=True) == [TEST_SYMLINK.encode()] | ||
# check that file is not staged anymore | ||
assert check_output(['git', 'status', '--porcelain=v2']).startswith(b'1 .T ') | ||
check_call(['git', 'add', TEST_SYMLINK]) | ||
assert main(['--autofix']) != 0 | ||
assert check_output(['git', 'status', '--porcelain=v2']).startswith(b'1 .T ') | ||
print(file=open(TEST_SYMLINK, 'a')) # add trailing newline | ||
check_call(['git', 'add', TEST_SYMLINK]) | ||
assert find_destroyed_symlinks(autofix=False) == [TEST_SYMLINK.encode()] | ||
assert main([]) != 0 | ||
print('0' * 8193, file=open(TEST_SYMLINK, 'w')) | ||
check_call(['git', 'add', TEST_SYMLINK]) | ||
assert find_destroyed_symlinks(autofix=False) == [] | ||
assert main([]) == 0 |