-
Notifications
You must be signed in to change notification settings - Fork 1
/
RecordingTranscriptionSample.py
executable file
·44 lines (38 loc) · 1.74 KB
/
RecordingTranscriptionSample.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import os
import numpy as np
from Sample import Sample
from RecordingArtifact import RecordingArtifact
from TranscriptArtifact import TranscriptArtifact
from AudioTextSample import AudioTextSample
from load_and_resample_if_necessary import load_and_resample_if_necessary
from power_split import power_split
class RecordingTranscriptionSample(Sample):
def __init__(self, _config, _afn, _tfn):
_key = (os.path.basename(_afn)[0:-4],)
_audio = load_and_resample_if_necessary(_config, _afn)
super().__init__((_config.language,)+_key,
RecordingArtifact(_config, _audio, _afn),
TranscriptArtifact(_config, _tfn))
def display(self):
print('KEY', self.key)
print('SOURCE')
self.source.display('10-MINUTE RECORDING')
print('TARGET')
self.target.display()
print()
def gold(self):
return self.target.gold()
def transcript_split(self):
x_np = self.source.value
C = self.source.C
speech=[(float(x[-3]), float(x[-2]), x[-1]) for x in self.target.value if len(x)==6]
speech_segments=[(int(a*C.sample_rate), int(b*C.sample_rate), words)
for (a,b,words) in speech
if 'IGNORE' not in words]
return [AudioTextSample(C, self.key+((lower,upper),), x_np[lower:upper], words.lower())
for i, (lower, upper, words) in enumerate(speech_segments)]
def split_on_silence(self, goal_length_in_seconds):
C = self.source.C
audio = self.source.value
clips = power_split(C, audio, goal_length_in_seconds)
return [AudioTextSample(C, self.key+((clip.parent_start,clip.parent_end)), clip.clipped, ' ') for clip in clips]