-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathserver.py
164 lines (131 loc) · 4.93 KB
/
server.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
from flask import Flask, request, jsonify
from flask_cors import CORS
import os
import sys
import yt_dlp
import subprocess
from groq import Groq
import tempfile
from cerebras.cloud.sdk import Cerebras
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
app = Flask(__name__)
CORS(app)
# Replace hardcoded API key with environment variable
CEREBRAS_API_KEY = os.getenv('CEREBRAS_API_KEY')
def summarize_transcript(transcript):
"""Summarize transcript and extract key points"""
client = Cerebras(api_key=CEREBRAS_API_KEY)
prompt = f"""Please analyze the following transcript and provide:
1. A concise summary (2-3 sentences)
2. 3-5 main key points
3. Main takeaway
4. When encountering contatent that is technical, speicific in detail, or instructional, please ensure to include and preserve all of the details and create list of this extracted knowledge from the content.
Transcript:
{transcript}"""
chat_completion = client.chat.completions.create(
messages=[
{
"role": "user",
"content": prompt,
}
],
model="llama3.1-70b",
)
return chat_completion.choices[0].message.content
def download_video(url, output_path):
"""Download video from YouTube."""
ydl_opts = {
'format': 'best',
'outtmpl': output_path,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
info = ydl.extract_info(url, download=True)
return info['title']
def download_youtube_audio(url, output_path):
"""Download audio from YouTube video."""
ydl_opts = {
'format': 'bestaudio/best',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
}],
'outtmpl': output_path,
}
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
ydl.download([url])
return f"{output_path}.mp3"
def speed_up_audio(input_path, output_path, speed_factor=1.5):
"""Speed up audio using ffmpeg."""
command = [
'ffmpeg', '-i', input_path,
'-filter:a', f'atempo={speed_factor}',
'-ar', '16000',
'-ac', '1',
'-y',
output_path
]
subprocess.run(command, check=True)
def transcribe_audio(audio_path, groq_api_key):
"""Transcribe audio using Groq API."""
client = Groq(api_key=groq_api_key)
with open(audio_path, "rb") as file:
transcription = client.audio.transcriptions.create(
file=(audio_path, file.read()),
model="whisper-large-v3",
response_format="text"
)
return transcription
def transcribe_youtube_video(video_url, groq_api_key, speed_factor=2.0):
"""Main function to handle the entire transcription process."""
try:
with tempfile.TemporaryDirectory() as temp_dir:
print("Downloading YouTube audio...")
original_audio = download_youtube_audio(video_url,
os.path.join(temp_dir, "audio"))
print(f"Processing audio (speed factor: {speed_factor}x)...")
processed_audio = os.path.join(temp_dir, "processed_audio.mp3")
speed_up_audio(original_audio, processed_audio, speed_factor)
print("Transcribing audio...")
transcript = transcribe_audio(processed_audio, groq_api_key)
print("Generating summary...")
summary = summarize_transcript(transcript)
return {
'transcript': transcript,
'summary': summary
}
except Exception as e:
print(f"An error occurred: {str(e)}")
return None
@app.route('/download', methods=['POST'])
def download_only():
data = request.json
video_url = data.get('url')
if not video_url:
return jsonify({'error': 'No URL provided'}), 400
try:
downloads_dir = os.path.join(os.path.expanduser('~'), 'Downloads', 'YTTranscriber')
os.makedirs(downloads_dir, exist_ok=True)
output_template = os.path.join(downloads_dir, '%(title)s.%(ext)s')
title = download_video(video_url, output_template)
return jsonify({
'success': True,
'message': f'Video downloaded successfully to Downloads/YTTranscriber folder',
'title': title
})
except Exception as e:
return jsonify({'error': str(e)}), 500
@app.route('/transcribe', methods=['POST'])
def transcribe():
data = request.json
video_url = data.get('url')
groq_api_key = os.getenv('GROQ_API_KEY') # Get from environment instead of request
speed_factor = data.get('speed_factor', 2.0)
result = transcribe_youtube_video(video_url, groq_api_key, speed_factor)
if result:
return jsonify(result)
else:
return jsonify({'error': 'Transcription failed'}), 400
if __name__ == '__main__':
app.run(port=5000)