Skip to content

Commit

Permalink
mtime-window: fix non-deterministic behaviour
Browse files Browse the repository at this point in the history
  • Loading branch information
sahib committed Nov 20, 2016
1 parent 23e3efd commit 78bbe18
Show file tree
Hide file tree
Showing 2 changed files with 54 additions and 22 deletions.
44 changes: 32 additions & 12 deletions lib/preprocess.c
Original file line number Diff line number Diff line change
Expand Up @@ -84,28 +84,47 @@ gint rm_file_cmp(const RmFile *file_a, const RmFile *file_b) {
: 0;
}

if(result == 0 && cfg->mtime_window >= 0) {
gint64 diff = (gint64)file_a->mtime - (gint64)file_b->mtime;
if(ABS(diff) <= cfg->mtime_window) {
result = 0;
} else {
result = diff;
}
}

return result;
}

gint rm_file_cmp_full(const RmFile *file_a, const RmFile *file_b,
static gint rm_file_cmp_full(const RmFile *file_a, const RmFile *file_b,
const RmSession *session) {
gint result = rm_file_cmp(file_a, file_b);
if(result != 0) {
return result;
}

if(session->cfg->mtime_window >= 0) {
return (gint64)file_a->mtime - (gint64)file_b->mtime;
}

return rm_pp_cmp_orig_criteria(file_a, file_b, session);
}

static gint rm_file_cmp_split(const RmFile *file_a, const RmFile *file_b,
const RmSession *session) {
gint result = rm_file_cmp(file_a, file_b);
if(result != 0) {
return result;
}

/* If --mtime-window is specified, we need to check if the mtime is inside
* the window. The file list was sorted by rm_file_cmp_full by taking the
* diff of mtimes, therefore we have to define the split criteria
* differently.
*/
if(session->cfg->mtime_window >= 0) {
gint64 diff = (gint64)file_a->mtime - (gint64)file_b->mtime;
if(ABS(diff) <= session->cfg->mtime_window) {
return 0;
}

return diff;
}

return 0;
}

static guint rm_node_hash(const RmFile *file) {
return file->inode ^ file->dev;
}
Expand Down Expand Up @@ -639,7 +658,7 @@ void rm_preprocess(RmSession *session) {

/* get next file and check if it is part of the same group */
file = g_queue_pop_head(all_files);
if(!file || rm_file_cmp(file, current_size_file) != 0) {
if(!file || rm_file_cmp_split(file, current_size_file, session) != 0) {
/* process completed group (all same size & other criteria)*/
/* remove path doubles and handle "other" lint */

Expand All @@ -655,8 +674,9 @@ void rm_preprocess(RmSession *session) {
/* zero size group after handling other lint; remove it */
tables->size_groups = g_slist_delete_link(tables->size_groups, tables->size_groups);
}
current_size_file = file;
}

current_size_file = file;
}

session->other_lint_cnt += rm_pp_handler_other_lint(session);
Expand Down
32 changes: 22 additions & 10 deletions tests/test_options/test_mtime_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ def test_consider_mtime():
create_file('xxx', 'a')
create_file('xxx', 'b')
create_file('xxx', 'c')
create_file('xxx', 'd')

def set_mtime(path, mtime):
full_path = os.path.join(TESTDIR_NAME, path)
Expand All @@ -20,21 +21,32 @@ def set_mtime(path, mtime):
set_mtime('a', '2004-02-29 16:21:42')
set_mtime('b', '2004-02-29 16:21:42')
set_mtime('c', '2004-02-29 16:21:44')
set_mtime('d', '2004-02-29 16:21:45')

head, *data, footer = run_rmlint('--mtime-window=-1')
assert len(data) == 3
assert footer['total_files'] == 3
assert footer['total_lint_size'] == 6
assert footer['duplicates'] == 2
assert len(data) == 4
assert footer['total_files'] == 4
assert footer['total_lint_size'] == 9
assert footer['duplicates'] == 3
assert footer['duplicate_sets'] == 1

head, *data, footer = run_rmlint('--mtime-window=+1')
head, *data, footer = run_rmlint('--mtime-window=0')
assert len(data) == 2
assert footer['total_files'] == 3
assert footer['total_files'] == 4
assert footer['total_lint_size'] == 3
assert footer['duplicates'] == 1
assert footer['duplicate_sets'] == 1

head, *data, footer = run_rmlint('--mtime-window=+2')
assert len(data) == 3
assert footer['total_files'] == 3
assert footer['total_lint_size'] == 6
head, *data, footer = run_rmlint('--mtime-window=+1')
assert len(data) == 4
assert footer['total_files'] == 4
assert footer['total_lint_size'] == 6 # two originals.
assert footer['duplicates'] == 2
assert footer['duplicate_sets'] == 2

head, *data, footer = run_rmlint('--mtime-window=+2')
assert len(data) == 4 # '2' also chains up to d from c.
assert footer['total_files'] == 4
assert footer['total_lint_size'] == 9
assert footer['duplicates'] == 3
assert footer['duplicate_sets'] == 1

0 comments on commit 78bbe18

Please sign in to comment.