-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.ml
67 lines (58 loc) · 1.97 KB
/
main.ml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
open Common
let signature file =
Digest.file file |> Digest.to_hex
(*
match Common.cmd_to_list (spf "md5sum %s" (Filename.quote file)) with
| [x] ->
if x =~ "^\\([0-9a-f]+\\) .*$"
then Common.matched1 x
else failwith (spf "not a md5sum signature %s for %s" x file)
| _ -> failwith (spf "could not get signature for %s" file)
*)
let ask_delete dupe orig =
pr (spf "file %s\n = %s" dupe orig);
Common2.command2_y_or_no (spf "rm -f %s" (Filename.quote dupe)) |> ignore;
()
let main () =
let dir1, other_dirs =
match Array.to_list Sys.argv with
| _::dir1::dir2::xs ->
dir1, String.concat " " (dir2::xs)
| _ -> failwith "usage: find-dupe <dir with suspected dupe> <base dirs...>"
in
let files = Common.cmd_to_list (spf "find %s -type f" dir1) in
let files_and_sig = files |> List.map (fun file -> file, signature file) in
pr2 (spf "Done scanning %s" dir1);
let other_files = Common.cmd_to_list (spf "find %s -type f" other_dirs) in
(* use the Hashtbl.find_all property *)
let hother_size = Hashtbl.create 1001 in
other_files |> List.iter (fun file ->
Hashtbl.add hother_size (Common2.filesize file) file
);
let hdone = Hashtbl.create 1001 in
pr2 (spf "Done scanning %s" other_dirs);
files_and_sig |> List.iter (fun (file, k) ->
let size = Common2.filesize file in
if Hashtbl.mem hother_size size (* CONFIG && size > 5000 *)
then begin
let candidates = Hashtbl.find_all hother_size size in
match candidates |> Common.find_some_opt (fun candidate ->
let md5 =
if Hashtbl.mem hdone candidate
then Hashtbl.find hdone candidate
else begin
let md5 = signature candidate in
Hashtbl.add hdone candidate md5;
md5
end
in
if md5 = k
then Some (candidate, md5)
else None
) with
| Some (orig, _) ->
ask_delete file orig
| None -> ()
end
)
let _ = main ()