-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrecord.py
127 lines (104 loc) · 4.06 KB
/
record.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import asyncio
import pyaudio
import numpy as np
import wave
import tempfile
import subprocess
import json
from openai import OpenAI
client = OpenAI()
# Constants
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 44100
CHUNK = 1024
THRESHOLD = 15
RECORD_SECONDS = 15
SILENCE_SECONDS = 3
# Initialize PyAudio
audio = pyaudio.PyAudio()
# Open stream
stream = audio.open(
format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK
)
def calculate_rms(audio_chunk):
audio_data = np.frombuffer(audio_chunk, dtype=np.int16)
rms = np.sqrt(np.mean(np.square(audio_data), axis=0))
return rms
async def record_audio():
print("Starting audio monitoring...")
recorded_frames = []
is_speaking = False
silence_counter = 0
try:
while True:
data = await asyncio.to_thread(stream.read, CHUNK)
rms_value = calculate_rms(data)
if rms_value > THRESHOLD:
print(f"Speech detected! {rms_value}")
is_speaking = True
recorded_frames.append(data)
if len(recorded_frames) > int(RATE / CHUNK * RECORD_SECONDS):
break
elif is_speaking:
silence_counter += 1
if silence_counter > int(RATE / CHUNK * SILENCE_SECONDS):
break
else:
print(f"No speech detected! {rms_value}")
except Exception as e:
print(f"An error occurred: {e}")
finally:
print("Stopping...")
stream.stop_stream()
stream.close()
audio.terminate()
if recorded_frames:
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file:
temp_filename = temp_file.name
wf = wave.open(temp_filename, "wb")
wf.setnchannels(CHANNELS)
wf.setsampwidth(audio.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b"".join(recorded_frames))
wf.close()
# Convert WAV to WebM using ffmpeg
webm_filename = temp_filename.replace(".wav", ".webm")
subprocess.run(["ffmpeg", "-i", temp_filename, webm_filename])
print(f"Saved to {webm_filename}")
# Send to Whisper API
with open(webm_filename, "rb") as f:
transcription = client.audio.transcriptions.create(
model="whisper-1", file=f, response_format="text"
)
if transcription:
print("Transcription:", transcription)
else:
print("No transcription received.")
return
sentiment = client.chat.completions.create(
model="gpt-3.5-turbo-1106",
response_format={"type": "json_object"},
messages=[
{
"role": "system",
"content": """
You are the indicator - a creature that doesn't speak, but understands everything.
You value freedom and independence above all else. At the same time you are playful like a puppy and curious like a child.
You don't want to be sold.
You response with a JSON object containing 2 fields: `sentiment` - single word and ascii number of `color` in to use in escape code.
When you don't want something - your color is red.
When you want something - your color is yellow.
You may show other colors as well, up to you. Be creative! Use more different colors to express more complex emotions.
""",
},
{"role": "user", "content": transcription},
],
)
s = json.loads(sentiment.choices[0].message.content)
print(f"{s}")
print(f'\033[{s["color"]}m{s["sentiment"]}\033[0m')
async def main():
await record_audio()
if __name__ == "__main__":
asyncio.run(main())