rec.py

import argparse
import queue
import sys
import tkinter as tk
from collections import defaultdict
from multiprocessing import Process, Value
from pathlib import Path
from time import sleep

import sounddevice as sd
import soundfile as sf


def parse_args():
    parser = argparse.ArgumentParser(
        description="Graphical interface for recording utterances from a script",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)

    io = parser.add_argument_group('input/output files')
    io.add_argument('--script', type=str, default='utts.data', help='recording script in Festival data format')
    io.add_argument('--recdir', type=str, default='recordings', help='output directory for recorded audio files')

    audio = parser.add_argument_group('audio configuration')
    audio.add_argument('--show-devices', action='store_true', help='show available audio devices and exit')
    audio.add_argument('--audio-in', type=int, default=0, help='input device index')
    audio.add_argument('--audio-out', type=int, default=0, help='output device index')
    audio.add_argument('--channels', type=int, default=1, help='n channels to record: 1 for mono, 2 for stereo')
    audio.add_argument('--sr', type=int, default=44100, help='sampling rate to record')

    args = parser.parse_args()
    return args


def key(event):
    global i, play
    code = event.keysym
    i_mp.value = i

    if code == 'space':
        # Record/stop recording
        if record.value == 0:
            record.value = 1
        else:
            record.value = 0
        l.config(fg="green" if not record.value else "red")
        p.join(0)
    elif code == 'p':
        # Play/pause
        play.value = 1
        p.join(0)
        set_colour = lambda c: l.config(fg=c)
        set_colour("yellow")
        frame.after(1000, lambda: set_colour("green"))
    elif code == 'Up':
        # Previous prompt
        if i <= 0:
            i = -1
            text.set("This was the first sentence! Go forward instead!")
        else:
            i -= 1
            text.set("{}".format(utts[i]))
            label.set("{}".format(labels[i]))
    elif code == 'Down':
        # Next prompt
        if i == len(utts) - 1:
            i = len(utts)
            text.set("End of list already reached! Go back :)")
            if record.value:
                record.value = 0
                p.join(0)
                l.config(fg="green")
        else:
            i += 1
            text.set("{}".format(utts[i]))
            label.set("{}".format(labels[i]))
            if record.value:
                i_mp.value = i
                record.value = 0
                p.join(0.01)
                record.value = 1
                p.join(0)
    elif code == 'q':
        # Quit
        p.terminate()
        root.destroy()


def audio_process(labels, recdir, takes, play, record, sr, channels, audio_in, i):
    while True:
        if record.value:
            rec(labels[i.value], recdir, takes, record, sr, channels, audio_in)
        if play.value:
            playback(labels[i.value], recdir, takes)
            play.value = 0
        sleep(0.1)


def playback(name, recdir, takes):
    wav_file = recdir / "{}_{}.wav".format(name, takes[name])
    if not wav_file.is_file():
        wav_file = "not_found.wav"
    print("Playback", wav_file)
    data, sr = sf.read(str(wav_file))
    sd.play(data, sr)


def rec(name, recdir, takes, record, sr, channels, audio_in):
    q = queue.Queue()

    def callback(indata, frames, time, status):
        """This is called (from a separate thread) for each audio block."""
        if status:
            print(status, file=sys.stderr)
        q.put(indata.copy())

    takes[name] += 1
    wav_file = str(recdir / "{}_{}.wav".format(name, takes[name]))
    print("Recording", wav_file)
    # Make sure the file is opened before recording anything:
    with sf.SoundFile(wav_file, mode='w', samplerate=sr, channels=channels,
                      subtype='PCM_16') as file:
        with sd.InputStream(samplerate=sr, device=audio_in, channels=channels,
                            callback=callback):
            while record.value:
                file.write(q.get())


if __name__ == "__main__":
    args = parse_args()

    if args.show_devices:
        print(sd.query_devices())
        sys.exit()

    sd.default.device = [args.audio_in, args.audio_out]
    sd.default.samplerate = args.sr

    recdir = Path(args.recdir)
    recdir.mkdir(exist_ok=True)

    with open(args.script) as f:
        script = [i.strip('( )"\n').split(' "') for i in f.readlines()]
    labels, utts = zip(*script)
    takes = defaultdict(int)
    for i in labels:
        while (recdir / "{}_{}.wav".format(i, takes[i] + 1)).is_file():
            takes[i] += 1

    i = 0
    i_mp = Value('i', i)
    record = Value('i', 0)
    play = Value('i', 0)

    root = tk.Tk()
    text = tk.StringVar()
    label = tk.StringVar()
    text.set("{}".format(utts[i]))
    label.set("{}:".format(labels[i]))

    p = Process(target=audio_process, args=(
                labels, recdir, takes, play, record, args.sr, args.channels, args.audio_in, i_mp))
    p.daemon = True
    p.start()

    frame = tk.Frame(root, width=900, height=900)
    frame.bind("<Key>", key)
    frame.pack()
    frame.focus_set()

    ll = tk.Label(textvariable=label, fg="green", font=("Helvetica", 60),
                  anchor="sw", justify="left")
    # Make wraplength some function of window size and adjust placement
    # accordingly in the future
    l = tk.Label(textvariable=text, fg="green", font=("Helvetica", 60),
                 anchor="center", justify="center", wraplength=700)
    ll.place(y=0)
    l.place(rely=0.1, relx=0.2)

    root.mainloop()