-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathautocorrect.py
executable file
·67 lines (46 loc) · 1.93 KB
/
autocorrect.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
#!/usr/bin/env python
import sys, Queue
# Expect dictionary file as first arg, list of words to auto-correct as second argument
dictionary = [line.strip().lower() for line in open(sys.argv[1])]
candidates = [line.strip().lower() for line in open(sys.argv[2])]
max_tries = int(sys.argv[3]) if len(sys.argv) > 3 else 2500
alphabet = "abcdefghijklmnopqrstuvwxyz"
def suggest(word, max_tries):
pq = Queue.PriorityQueue()
pq.put((0, word))
tried = set()
while pq.qsize() > 0 and len(tried) < max_tries:
mods, word = pq.get()
tried.add(word)
if word in dictionary:
return word
# No need to generate more of the search space that we won't use
if pq.qsize() > max_tries - len(tried):
continue
neighbors = []
# Heuristic: Try swaps first since they're common
for i, c in enumerate(word):
if i + 1 < len(word):
neighbors.append(word[:i] + word[i + 1] + word[i] + word[i+2:])
for neighbor in neighbors:
if neighbor not in tried:
pq.put((mods + .5, neighbor))
neighbors = []
# Now do a principled single-edit neighbor enumeration
for i, c in enumerate(word):
for a in alphabet:
# Won't you be my (character changed) neighbor
neighbors.append(word[:i] + a + word[i+1:])
# Won't you be my (character added) neighbor
neighbors.append(word[:i] + a + word[i:])
# Won't you be my (character deleted) neighbor
neighbors.append(word[:i] + word[i+1:])
# Neighbors for adding a character at the end
for a in alphabet:
neighbors.append(word + a)
for neighbor in neighbors:
if neighbor not in tried:
pq.put((mods + 1, neighbor))
return "UNKNOWN"
for candidate in candidates:
print(suggest(candidate, max_tries))