forked from mukeshmk/image-audio-captcha
-
Notifications
You must be signed in to change notification settings - Fork 0
/
generate-audio-captcha.py
87 lines (69 loc) · 3.13 KB
/
generate-audio-captcha.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# !/usr/bin/env python3
import os
import random
import argparse
from gtts import gTTS
file_format = '.wav'
def scramble_captcha_name(captcha_name):
import hashlib
m = hashlib.sha1()
m.update(captcha_name.encode('utf-8'))
return m.hexdigest()
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--length', help='Length of captchas in characters', type=int)
parser.add_argument('--count', help='How many captchas to generate', type=int)
parser.add_argument('--scramble', help='Whether to scramble captcha names', default=False, action='store_true')
parser.add_argument('--output-dir', help='Where to store the generated captchas', type=str)
parser.add_argument('--symbols', help='File with the symbols to use in captchas', type=str)
parser.add_argument('--audio-dict', help='File with the symbols to use in captchas', type=str)
args = parser.parse_args()
if args.audio_dict is not None:
args.length = -1
args.count = -1
print("Generating Symbol Set Data")
if args.length is None:
print("Please specify the captcha length")
exit(1)
if args.count is None:
print("Please specify the captcha count to generate")
exit(1)
if args.output_dir is None:
print("Please specify the captcha output directory")
exit(1)
if args.symbols is None:
print("Please specify the captcha symbols file")
exit(1)
symbols_file = open(args.symbols, 'r')
captcha_symbols = symbols_file.readline().strip()
symbols_file.close()
print("Generating captchas with symbol set {" + captcha_symbols + "}")
if not os.path.exists(args.output_dir):
print("Creating output directory " + args.output_dir)
os.makedirs(args.output_dir)
if args.audio_dict:
args.count = len(captcha_symbols)
args.scramble = False
for i in range(args.count):
if args.audio_dict:
captcha_text = captcha_symbols[i]
else:
captcha_text = ''.join([random.choice(captcha_symbols) for j in range(args.length)])
captcha_name_scrambled = captcha_text
if args.scramble:
captcha_name_scrambled = scramble_captcha_name(captcha_text)
captcha_file_name = os.path.join(args.output_dir, captcha_name_scrambled + file_format)
if os.path.exists(captcha_file_name):
version = 1
while os.path.exists(
os.path.join(args.output_dir, captcha_name_scrambled + '_' + str(version) + file_format)):
version += 1
captcha_file_name = os.path.join(args.output_dir, captcha_name_scrambled + '_' + str(version) + file_format)
tts = gTTS(captcha_text, 'en')
if args.audio_dict and not os.path.exists(args.output_dir + '\\' + captcha_name_scrambled):
os.makedirs(os.path.join(args.output_dir, captcha_name_scrambled))
captcha_file_name = os.path.join(args.output_dir + '\\' + captcha_name_scrambled,
captcha_name_scrambled + file_format)
tts.save(captcha_file_name)
if __name__ == '__main__':
main()