-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathapp.py
103 lines (83 loc) · 3.29 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import os
import sys
import yt_dlp
import subprocess
from groq import Groq
import tempfile
from dotenv import load_dotenv
def download_youtube_audio(url, output_path):
"""Download audio from YouTube video."""
ydl_opts = {
'format': 'bestaudio/best',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
}],
'outtmpl': output_path,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
return f"{output_path}.mp3"
def speed_up_audio(input_path, output_path, speed_factor=1.5):
"""Speed up audio using ffmpeg."""
command = [
'ffmpeg', '-i', input_path,
'-filter:a', f'atempo={speed_factor}',
'-ar', '16000', # Set sample rate to 16kHz as required by Whisper
'-ac', '1', # Convert to mono
'-y', # Overwrite output file if exists
output_path
]
subprocess.run(command, check=True)
def transcribe_audio(audio_path, groq_api_key):
"""Transcribe audio using Groq API."""
client = Groq(api_key=groq_api_key)
with open(audio_path, "rb") as file:
transcription = client.audio.transcriptions.create(
file=(audio_path, file.read()),
model="whisper-large-v3",
response_format="text"
)
# The response is already a string when response_format="text"
return transcription
def transcribe_youtube_video(video_url, groq_api_key, speed_factor=2.0):
"""Main function to handle the entire transcription process."""
try:
# Create temporary directory for audio files
with tempfile.TemporaryDirectory() as temp_dir:
# Download YouTube audio
print("Downloading YouTube audio...")
original_audio = download_youtube_audio(video_url,
os.path.join(temp_dir, "audio"))
# Speed up and process audio
print(f"Processing audio (speed factor: {speed_factor}x)...")
processed_audio = os.path.join(temp_dir, "processed_audio.mp3")
speed_up_audio(original_audio, processed_audio, speed_factor)
# Transcribe the processed audio
print("Transcribing audio...")
transcript = transcribe_audio(processed_audio, groq_api_key)
return transcript
except Exception as e:
print(f"An error occurred: {str(e)}")
return None
if __name__ == "__main__":
if len(sys.argv) < 2:
print("Usage: python script.py <youtube_url>")
sys.exit(1)
# Load environment variables from .env file
load_dotenv()
VIDEO_URL = sys.argv[1]
GROQ_API_KEY = os.getenv('GROQ_API_KEY')
if not GROQ_API_KEY:
print("Please set the GROQ_API_KEY in your .env file")
sys.exit(1)
# Speed factor (1.0 = normal speed, 2.0 = double speed)
SPEED_FACTOR = 2.0
transcript = transcribe_youtube_video(VIDEO_URL, GROQ_API_KEY, SPEED_FACTOR)
if transcript:
print("\nTranscription:")
print(transcript)
# Save to file
with open("transcript.txt", "w", encoding="utf-8") as f:
f.write(transcript)
print("\nTranscript saved to transcript.txt")