forked from mukeshmk/image-audio-captcha
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgenerate-multiTTS-audio-captcha.py
130 lines (106 loc) · 4.44 KB
/
generate-multiTTS-audio-captcha.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
# !/usr/bin/env python3
import os
import random
import argparse
from multiprocessing import Pool
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import librosa
import librosa.display
from pydub import AudioSegment
file_format = '.wav'
# This are the dir name from where audio files will be picked up
file_names = os.listdir(os.getcwd() + '\\symbols\\')
tts_list = []
for file in file_names:
tts_list.append(file)
tempDir = 'multiTTS_data'
def create_audio_and_convert(output_dir, captcha_text):
pwd = os.getcwd()
work_dir = pwd + '\\symbols\\'
combined_sounds = None
for symbol in captcha_text:
tts_service = random.choice(tts_list)
raw_path = work_dir + tts_service + '\\' + symbol
sound = AudioSegment.from_wav(raw_path + ".wav")
if combined_sounds is None:
combined_sounds = sound
else:
combined_sounds = combined_sounds + sound
if not os.path.exists(pwd + '\\' + tempDir):
print("Creating temp directory " + tempDir)
os.makedirs(pwd + '\\' + tempDir)
output_dir_tts = pwd + '\\' + tempDir + '\\'
wav_audio_path = output_dir_tts + captcha_text + '.wav'
combined_sounds.export(wav_audio_path, format="wav")
plt.interactive(False)
clip, sample_rate = librosa.load(wav_audio_path, sr=None)
fig = plt.figure(figsize=[0.415, 0.210])
ax = fig.add_subplot(111)
ax.axes.get_xaxis().set_visible(False)
ax.axes.get_yaxis().set_visible(False)
ax.set_frame_on(False)
S = librosa.feature.melspectrogram(y=clip, sr=sample_rate)
librosa.display.specshow(librosa.power_to_db(S, ref=np.max))
filename = os.path.join(output_dir, captcha_text + '.jpg')
plt.savefig(filename, dpi=400, bbox_inches='tight', pad_inches=0)
plt.close()
fig.clf()
plt.close(fig)
plt.close('all')
del clip, sample_rate, fig, ax, S
def scramble_captcha_name(captcha_name):
import hashlib
m = hashlib.sha1()
m.update(captcha_name.encode('utf-8'))
return m.hexdigest()
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--length', help='Length of captchas in characters', type=int)
parser.add_argument('--count', help='How many captchas to generate', type=int)
parser.add_argument('--scramble', help='Whether to scramble captcha names', default=False, action='store_true')
parser.add_argument('--output-dir', help='Where to store the generated captchas', type=str)
parser.add_argument('--symbols', help='File with the symbols to use in captchas', type=str)
parser.add_argument('--n', help='no of parallel threads to start', type=int, default=6)
args = parser.parse_args()
if args.length is None:
print("Please specify the captcha length")
exit(1)
if args.count is None:
print("Please specify the captcha count to generate")
exit(1)
if args.output_dir is None:
print("Please specify the captcha output directory")
exit(1)
if args.symbols is None:
print("Please specify the captcha symbols file")
exit(1)
if args.n is None:
print("Setting the number of threads to 6")
args.n = 6
symbols_file = open(args.symbols, 'r')
captcha_symbols = symbols_file.readline().strip()
symbols_file.close()
print("Generating captchas with symbol set {" + captcha_symbols + "}")
if not os.path.exists(args.output_dir):
print("Creating output directory " + args.output_dir)
os.makedirs(args.output_dir)
process_args = []
for i in range(args.count):
captcha_text = ''.join([random.choice(captcha_symbols) for j in range(args.length)])
captcha_name_scrambled = captcha_text
if args.scramble:
captcha_name_scrambled = scramble_captcha_name(captcha_text)
captcha_file_name = os.path.join(args.output_dir, captcha_name_scrambled + file_format)
if os.path.exists(captcha_file_name):
version = 1
while os.path.exists(os.path.join(args.output_dir, captcha_name_scrambled + '_' + str(version) + file_format)):
version += 1
captcha_file_name = os.path.join(args.output_dir, captcha_name_scrambled + '_' + str(version) + file_format)
process_args.append((args.output_dir, captcha_name_scrambled))
print('Generating Melspectrogram for Audio Captcha')
with Pool(args.n) as p:
p.starmap(create_audio_and_convert, process_args)
if __name__ == '__main__':
main()