-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathoutdated_v1.py
142 lines (106 loc) · 5.13 KB
/
outdated_v1.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
import io
import os
import crepe
from google.cloud import translate_v2, speech_v1, texttospeech
from moviepy.editor import *
from scipy.io import wavfile
class VideoTranslator:
def __init__(self):
#BEGIN TRANSLATION SETUP
self.translate_client = translate_v2.Client()
#END TRANSLATION SETUP
#BEGIN GENERAL SETUP
self.languages = {}
for lng in self.translate_client.get_languages():
self.languages[lng['name'].lower()] = lng['language']
#END GENERAL SETUP
#BEGIN AUDIO-TEXT SETUP
self.audio_text_client = speech_v1.SpeechClient()
self.audio_channel_count = 2
self.enable_separate_recognition_per_channel = True
#END AUDIO-TEXT SETUP
#START TEXT-AUDIO SETUP
self.text_audio_client = texttospeech.TextToSpeechClient()
#END TEXT-AUDIO SETUP
def translate(self, text, lng="english"):
translation = self.translate_client.translate(text,
target_language=self.languages[lng.lower()])
return self.edit_transcript(translation['translatedText'])
def get_audio(self, local_file_path):
with io.open(local_file_path, "rb") as f:
content = f.read()
return content
def translate_video(self, url, native_lng, lng="english"):
#video, audio = self.retrieve_video_and_audio(url)
audio = {"content": self.get_audio(url)}
full_transcript = self.split_transcript(self.get_transcript(audio, native_lng))
translated_transcript = []
for line in full_transcript:
translated_transcript.append(self.translate(line, lng))
translated_audio = None
for i in range(len(full_transcript)):
native_line = full_transcript[i]
translated_line = translated_transcript[i]
speed_factor = self.get_speed_factor(native_line, translated_line)
if not translated_audio:
translated_audio = self.text_to_audio(translated_line, lng, speed_factor=speed_factor)
else:
translated_audio = translated_audio + self.text_to_audio(translated_line, lng, speed_factor=speed_factor)
with open("output.mp3", "wb") as out:
out.write(translated_audio)
audio_background = AudioFileClip("output.mp3")
final_audio = CompositeAudioClip([audio_background])
final_clip = videoclip.set_audio(final_audio)
final_clip.write_videofile("result.mp4")
# new_clip = videoclip.set_audio(AudioFileClip("output.mp3"))
# videoclip.write_videofile("output.mp4")
#videoclip.write_videofile("output.mp4", audio="trying.mp3")
#return self.splice_video_and_audio(video, translated_audio)
def edit_transcript(self, transcript):
return transcript.replace("'", "'")
def split_transcript(self, transcript):
return transcript.split(' ')
def retrieve_video_and_audio(self, url): #ARUSHI HAS THIS CODE
return None
def get_transcript(self, audio, native_lng): #CRYSTAL HAS THIS CODE
config = {
"audio_channel_count": self.audio_channel_count,
"enable_separate_recognition_per_channel": self.enable_separate_recognition_per_channel,
"language_code": self.languages[native_lng],
}
response = self.audio_text_client.recognize(config, audio)
for result in response.results:
alternative = result.alternatives[0]
return format(alternative.transcript)
def get_speed_factor(self, native_line, translated_line): #CAN EDIT THIS LATER, FUNCTIONAL FOR NOW
return len(translated_line)/len(native_line)
def determine_gender(frequency):
return frequency > 170 and "female" or "male"
def text_to_audio(self, text, lng, speed_factor=1, gender=None): #CRYSTAL IS WORKING ON THIS CODE
gender = determine_gender(frequency)
if gender == "female":
ssml_gender=texttospeech.enums.SsmlVoiceGender.FEMALE
elif gender == "male":
ssml_gender=texttospeech.enums.SsmlVoiceGender.MALE
else:
ssml_gender=texttospeech.enums.SsmlVoiceGender.NEUTRAL
synthesis_input = texttospeech.types.SynthesisInput(text=text)
voice = texttospeech.types.VoiceSelectionParams(language_code=self.languages[lng], ssml_gender=ssml_gender)
audio_config = texttospeech.types.AudioConfig(audio_encoding=texttospeech.enums.AudioEncoding.MP3)
response = self.text_audio_client.synthesize_speech(synthesis_input, voice, audio_config)
return response.audio_content
s = input("Specify Filename: ")
sr, audio = wavfile.read('nani.mov')
time, frequency, confidence, activation = crepe.predict(audio, sr, viterbi= True)
"""
if the frequency is between upto 170, then it is a male voice.
if the frequency is between 171 and infinity, then it is a female voice.
"""
videoclip = VideoFileClip(s)
audioclip = videoclip.audio
audioclip.write_audiofile("trying.wav", verbose=True)
s1 = str(input("Input Language: "))
s2 = str(input("Output Language: "))
vt = VideoTranslator()
vt.translate_video("trying.wav", s1, s2)
os.remove("trying.wav")