-
Notifications
You must be signed in to change notification settings - Fork 0
/
merge_audio_files.py
219 lines (170 loc) · 8.44 KB
/
merge_audio_files.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
import subprocess
import json
import math
import os
from tqdm import tqdm
from moviepy.editor import *
from pydub import AudioSegment
slow_langs = ["hi"]
def merge(file_name: str, target_language: str = "", device: str = "cpu"):
with open("subtitles.json", "r") as file:
subtitles = json.load(file)
# Create updated silence file larger than the video
duration = subtitles[-1]['end'] + 100 # Fetched the end of last subtitle and added 100 just for safety
generate_silent_wav("silence.wav", duration, device)
lossless_output = None
with open("concat_file.txt", "w") as file:
end = 0
silence_debt = 0
added_seconds = 0
index = 0
is_first = True
print("Updating the video and audio to sync up")
for subtitle in tqdm(subtitles):
update_audio_file(subtitle['file_name'], device)
silence_length = math.floor((subtitle['start'] - end))
silence_debt += ((subtitle['start'] - end)) - float(silence_length)
if silence_debt >= 1.5 and target_language not in slow_langs:
silence_length += 1
silence_debt -= 1.5
file.write("file silence.wav\n")
file.write(f"outpoint {silence_length}\n")
file.write(f"file {subtitle['file_name']}\n")
end = subtitle['end']
if subtitle['video_slow_down'] != 1 and subtitle['video_slow_down'] != 0:
if is_first:
is_first = False
lossless_output = slow_down_video(file_name, subtitle['start'] + added_seconds, subtitle['end'] + added_seconds, subtitle['video_slow_down'])
else:
lossless_output = slow_down_video(lossless_output, subtitle['start'] + added_seconds, subtitle['end'] + added_seconds, subtitle['video_slow_down'], True)
index += 1
slow_down_audio(file_name.replace(".mp4", "_Instruments.wav"), subtitle['start'] + added_seconds, subtitle['end'] + added_seconds, subtitle['video_slow_down'])
added_seconds += (subtitle['end'] - subtitle['start']) * (subtitle['video_slow_down'] - 1)
if lossless_output != None:
output_video = final_encode_lossless_to_final(lossless_output, "temp_output.mp4")
else:
output_video = file_name
command = ['ffmpeg', "-loglevel", "error", "-y", '-safe', '0', '-f', 'concat', '-i', 'concat_file.txt', 'output.wav']
if device == "cuda":
command += ["-hwaccel", "cuda"]
subprocess.call(command)
command = ["ffmpeg", "-loglevel", "error", "-y", "-i", "output.wav", "-f", "wav", "-bitexact", "-acodec", "pcm_s16le", "output_new.wav"]
if device == "cuda":
command += ["-hwaccel", "cuda"]
subprocess.call(command)
# new_file = update_wav_speed("output_new.wav", 300, "./")
videoclip = VideoFileClip(output_video)
new_clip = videoclip.without_audio()
audioclip = AudioFileClip("output_new.wav")
audioclip_instrumental = AudioFileClip(file_name.replace(".mp4", "_Instruments.wav"))
audioclip_combined = CompositeAudioClip([audioclip, audioclip_instrumental])
new_clip_with_audio = new_clip.set_audio(audioclip_combined)
new_clip_with_audio.write_videofile(file_name.replace(".mp4", f"_{target_language}.mp4"))
def generate_silent_wav(output_file, duration, device = "cpu"):
# ffmpeg command to generate a silent WAV file
command = [
'ffmpeg', '-y', # Overwrite output file if exists
"-loglevel", "error", # Change loglevel
'-f', 'lavfi', # Use lavfi filter
'-i', f'anullsrc=r=24000:cl=mono', # Generate silence with sample rate 24000 and mono channel
'-t', str(duration), # Duration of the silent audio
'-acodec', 'pcm_s16le', # Audio codec PCM signed 16-bit little-endian
output_file # Output file
]
if device == "cuda":
command += ["-hwaccel", "cuda"]
try:
# Execute the command
subprocess.run(command, check=True)
print(f"Silent WAV file of {duration} seconds saved to {output_file}")
except subprocess.CalledProcessError as e:
print(f"Error while generating the silent WAV file: {e}")
def update_audio_file(file_path, device):
# Define a temporary output file name
temp_file = file_path + ".temp.wav"
# ffmpeg command to update the audio file
command = [
'ffmpeg', '-y', # Overwrite the temp file without confirmation
"-loglevel", "error", # Change loglevel
'-i', file_path, # Input audio file
'-ar', '24000', # Set the sample rate to 24000 Hz
'-acodec', 'pcm_s16le', # Set the codec to PCM signed 16-bit little-endian
'-b:a', '384k', # Set the bitrate to 384 kbps
temp_file # Temporary output file
]
if device == "cuda":
command += ["-hwaccel", "cuda"]
try:
# Execute the command
subprocess.run(command, check=True)
# print(f"Audio file {file_path} has been processed and saved? to {temp_file}")
# Replace the original file with the updated file
os.replace(temp_file, file_path)
# print(f"Original file {file_path} has been overwritten.")
except subprocess.CalledProcessError as e:
print(f"Error while updating the audio file: {e}")
finally:
if os.path.exists(temp_file):
os.remove(temp_file)
i=0
def slow_down_video(input_video, start_time, end_time, slow_factor, remove_input_file = False):
global i
# Load the video
video = VideoFileClip(input_video)
# print(start_time, end_time, i)
# 1. Extract the first part (before the slow-motion segment)
part1 = video.subclip(0, start_time)
# 2. Extract the slow-motion part and apply the slow factor
slow_part = video.subclip(start_time, end_time).fx(vfx.speedx, factor=1/slow_factor)
# 3. Extract the third part (after the slow-motion segment)
part3 = video.subclip(end_time, video.duration)
# Ensure all clips have the same fps (to avoid stuck frames)
part1 = part1.set_fps(video.fps)
slow_part = slow_part.set_fps(video.fps)
part3 = part3.set_fps(video.fps)
# 4. Concatenate the parts together
final_video = concatenate_videoclips([part1, slow_part, part3], method="compose")
# 5. Write the result to a lossless format (intermediate step)
lossless_output = f"intermediate_lossless_{i}.mkv"
i+=1
final_video.write_videofile(lossless_output, codec="ffv1", preset="ultrafast", audio=False, fps=video.fps, logger=None)
video.close()
if remove_input_file:
os.remove(input_video)
return lossless_output
def final_encode_lossless_to_final(intermediate_video, output_video, crf=18, bitrate="3000k"):
# Load the intermediate lossless video
lossless_clip = VideoFileClip(intermediate_video)
# 1. Re-encode the lossless video to final output format (e.g., H.264)
lossless_clip.write_videofile(output_video, audio=False, logger=None)
return output_video
def slow_down_audio(input_audio, start_time, end_time, slow_factor):
# Load the original audio file
audio = AudioSegment.from_file(input_audio)
# Convert start_time and end_time from seconds to milliseconds
start_time_ms = start_time * 1000
end_time_ms = end_time * 1000
# 1. Extract the first part (before the slow-motion segment)
part1 = audio[:start_time_ms]
# 2. Extract the slow-motion part and apply the slow factor
slow_part = audio[start_time_ms:end_time_ms]._spawn(audio[start_time_ms:end_time_ms].raw_data, overrides={
"frame_rate": int(audio.frame_rate / slow_factor)
}).set_frame_rate(audio.frame_rate)
# 3. Extract the third part (after the slow-motion segment)
part3 = audio[end_time_ms:]
# 4. Concatenate the audio parts together
final_audio = part1 + slow_part + part3
# 5. Export the result to the desired format (MP3, WAV, FLAC, etc.)
# 5. Write the result to a temporary file
temp_output = input_audio + ".tmp"
final_audio.export(temp_output, format=input_audio.split('.')[-1])
# 6. Replace the original file with the temporary file
try:
os.replace(temp_output, input_audio)
except Exception as e:
# Clean up the temporary file if error occurs
os.remove(temp_output)
raise e
if __name__ == "__main__":
merge(file_name="Making a smart closet with ML.mp4", target_language="hi")
# slow_down_video("Making a smart closet with ML.mp4", "temp.mp4", 0, 10, 2)