Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implemented audio extraction, adding audio streams, displaying audio stream #4

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
58 changes: 31 additions & 27 deletions src/ilabs_streamsync/example_script.py
Original file line number Diff line number Diff line change
@@ -1,34 +1,38 @@
import mne
from streamsync import StreamSync, extract_audio_from_video

from ilabs_streamsync import StreamSync, extract_audio_from_video
if __name__ == "__main__":
# load an MNE raw file
raw = None
cam1 = "/Users/user/VideoSync_NonSubject/sinclair_alexis_audiosync_240110_CAM3.mp4"
flux1 = None
my_events = []

# load an MNE raw file
raw = None
cam1 = None
flux1 = None
my_events = []
extract_audio_from_video(cam1, "/Users/user/VideoSync_NonSubject/output")
ss = StreamSync(None, None) #Raw type not supported yet
ss.add_stream("/Users/user/VideoSync_NonSubject/output/sinclair_alexis_audiosync_240110_CAM3_16bit.wav", channel=1)
ss.plot_sync_pulses(tmin=0.998,tmax=1)

# subjects = ["146a", "222b"]

subjects = ["146a", "222b"]
# for subj in subjects:
# construct the filename/path
# load the Raw
# figure out where video files are & load them
# extract_audio_from_video(cam1)

for subj in subjects:
# construct the filename/path
# load the Raw
# figure out where video files are & load them
audio1 = extract_audio_from_video(cam1)
# ss = StreamSync(raw, "STIM001")
# ss.add_stream(audio1)
# ss.add_camera_events(my_events)
# ss.add_stream(flux1)
# result = ss.do_syncing()
# fig = ss.plot_sync()
# annot = ss.add_camera_events(my_events)
# raw.set_annotations(annot)
# fig.savefig(...)
# if result < 0.7:
# write_log_msg(f"subj {subj} had bad pulse syncing, aborting")
# continue

ss = StreamSync(raw, "STIM001")
ss.add_stream(audio1)
ss.add_camera_events(my_events)
ss.add_stream(flux1)
result = ss.do_syncing()
fig = ss.plot_sync()
annot = ss.add_camera_events(my_events)
raw.set_annotations(annot)
fig.savefig(...)
if result < 0.7:
write_log_msg(f"subj {subj} had bad pulse syncing, aborting")
continue

# apply maxfilter
# do ICA
# apply maxfilter
# do ICA
108 changes: 92 additions & 16 deletions src/ilabs_streamsync/streamsync.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
from __future__ import annotations

import os
import pathlib
import subprocess

import matplotlib.pyplot as plt
import numpy as np
from scipy.io.wavfile import read as wavread


class StreamSync:
"""Synchronize two data streams.

Expand All @@ -9,39 +20,104 @@ class StreamSync:
"""

def __init__(self, reference_object, pulse_channel):
self.ref_stream = reference_object.get_chan(pulse_channel)
self.sfreq = reference_object.info["sfreq"] # Hz
self.streams = []
"""Initialize StreamSync object with 'Raw' MEG associated with it."""
# self.ref_stream = reference_object.get_chan(pulse_channel)
self.ref_stream = None
# self.sfreq = reference_object.info["sfreq"] # Hz
self.sfreq = 0
ashtondoane marked this conversation as resolved.
Show resolved Hide resolved
self.streams = [] # of (filename, srate, Pulses, Data)

def add_stream(self, stream, channel=None, events=None):
"""Add a new ``Raw`` or video stream, optionally with events.

stream : Raw | wav
An audio or FIF stream.
stream : str
File path to an audio or FIF stream.
channel : str | int | None
Which channel of `stream` contains the sync pulse sequence.
events : array-like | None
Events associated with the stream. TODO: should they be integer sample
numbers? Timestamps? Do we support both?
Comment on lines 73 to 74
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As far as the input format for events, I don't really know what researchers/annotators are going to want to do. We have seen cases where the data was in the form of timestamps, probably something like HH:MM:SS.123456. So unless @NeuroLaunch has opinions about what (other) format(s) we should target, I'd say start with parsing HH:MM:SS.123456-formatted data, and we can expand to other formats later.

As far as the output format of events: MNE-Python has 2 ways of representing events (event arrays, and Annotations objects). We should decide which one (or both?) we want to use when converting/syncing camera timestamps to the Raw file's time domain. @NeuroLaunch do you have an opinion here? @ashtondoane are you familiar with the two kinds of MNE event representations?

If I had to put a stake in the ground I'd probably say "use Annotations" but I haven't thought very hard about it yet... maybe implement that first, and if we find that we need to also implement event array support, we can add that later.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am not familiar with the MNE representations, I will have to read the documentation. I'll begin with annotations as @NeuroLaunch also mentioned this as a possibility and we can adjust later if necessary.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not clear to me that this has actually been addressed, as nothing is done with events in the code; unresolving.

"""
pulses = self._extract_pulse_sequence_from_stream(stream, channel=channel)
self.streams.append(pulses)
srate, pulses, data = self._extract_data_from_stream(stream, channel=channel)
self.streams.append((stream, srate, pulses, data))
ashtondoane marked this conversation as resolved.
Show resolved Hide resolved

def _extract_pulse_sequence_from_stream(self, stream, channel):
# TODO triage based on input type (e.g., if it's a Raw, pull out a stim chan,
# if it's audio, just add it as-is)
def _extract_data_from_stream(self, stream, channel):
"""Extract pulses and raw data from stream provided."""
ext = pathlib.Path(stream).suffix
if ext == ".fif":
return self._extract_data__from_raw(stream, channel)
ashtondoane marked this conversation as resolved.
Show resolved Hide resolved
if ext == ".wav":
return self._extract_data_from_wav(stream, channel)
raise TypeError("Stream provided was of unsupported format. Please provide a fif or wav file.")


def _extract_data__from_raw(self, stream, channel):
pass
ashtondoane marked this conversation as resolved.
Show resolved Hide resolved

def _extract_data_from_wav(self, stream, channel):
"""Return tuple of (pulse channel, audio channel) from stereo file."""
srate, wav_signal = wavread(stream)
return (srate, wav_signal[:,channel], wav_signal[:,1-channel])
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not sure about the API here. we probably want this func and the _extract_data_from_raw func to return the same thing (e.g., tuple of same length), but for the Raw case there's no analogue to "audio channel". I also don't know what the audio channel is useful for --- e.g., if it's downsampled from 44.1kHz (typical audio) to 1kHz (typical MEG), it will be pretty much useless / unintelligible, so I don't see the point of syncing the audio data itself.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Perhaps I don't understand the end goal of this API. I wasn't intending to use the audio data for syncing (the pulses are the goal here), but rather holding onto it to create a file in the future that has been aligned. I'm not sure I understand the point about downsampling. Would you mind clarifying here?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not sure if this question is still relevant, but here goes an answer: as I understand it, the end goal here is to convert researcher-created timestamps (in HH:MM:SS.ssssss format) into an mne.Annotations object, after first figuring out what tranformations (shift and/or stretch) must be done to get the video time domain aligned with the MEG time domain. In that sense, there is no need to write out the camera's audio channel to a WAV file (either before or after it's been warped/synced to the MEG time domain).


def do_syncing(self):
"""Synchronize all streams with the reference stream."""
# TODO (waves hands) do the hard part.
# TODO spit out a report of correlation/association between all pairs of streams
pass

def plot_sync(self):
pass
def plot_sync_pulses(self, tmin=0, tmax=float('inf')):
ashtondoane marked this conversation as resolved.
Show resolved Hide resolved
"""Plot each stream in the class."""
# TODO Plot the raw file on the first plot.
fig, axset = plt.subplots(len(self.streams)+1, 1, figsize = [8,6]) #show individual channels seperately, and the 0th plot is the combination of these.
for i, stream in enumerate(self.streams):
npts = len(stream[2])
ashtondoane marked this conversation as resolved.
Show resolved Hide resolved
tt = np.arange(npts) / stream[1]
idx = np.where((tt>=tmin) & (tt<tmax))
axset[i+1].plot(tt[idx], stream[2][idx].T)
axset[i+1].set_title(pathlib.Path(stream[0]).name)
# Make label equal to simply the cam number
plt.show()

def extract_audio_from_video(path_to_video, output_dir):
"""Extract audio from path provided.

path_to_video: str
Path to audio file
TODO allow path_to_video to take regex?
output_dir: str
Path to directory where extracted audio should be sent

Effects:
Creates output directory if non-existent. For each video found, creates
a file with the associated audio labeled the same way.

Raises:
ValueException if video path does not exist,
Exception if filename is taken in output_dir
"""
audio_codecout = 'pcm_s16le'
ashtondoane marked this conversation as resolved.
Show resolved Hide resolved
audio_suffix = '_16bit'
p = pathlib.Path(path_to_video)
audio_file = p.stem + audio_suffix + '.wav'
ashtondoane marked this conversation as resolved.
Show resolved Hide resolved
if not p.exists():
raise ValueError('Path provided cannot be found.')
if pathlib.PurePath.joinpath(pathlib.Path(output_dir), pathlib.Path(audio_file)).exists():
raise Exception(f"Audio already exists for {path_to_video} in output directory.")
ashtondoane marked this conversation as resolved.
Show resolved Hide resolved

command = ['ffmpeg',
'-acodec', 'pcm_s24le', # force little-endian format (req'd for Linux)
'-i', path_to_video,
'-map', '0:a', # audio only (per DM)
# '-af', 'highpass=f=0.1',
'-acodec', audio_codecout,
ashtondoane marked this conversation as resolved.
Show resolved Hide resolved
'-ac', '2', # no longer mono output, so setting to "2"
'-y', '-vn', # overwrite output file without asking; no video
'-loglevel', 'error',
audio_file]
pipe = subprocess.run(command, timeout=50, check=False)
ashtondoane marked this conversation as resolved.
Show resolved Hide resolved

def extract_audio_from_video(path_to_video, channel):
"""Path can be a regex or glob to allow batch processing."""
pass
if pipe.returncode==0:
print(f'Audio extraction was successful for {path_to_video}')
ashtondoane marked this conversation as resolved.
Show resolved Hide resolved
output_path = pathlib.PurePath.joinpath(pathlib.Path(output_dir), pathlib.Path(audio_file))
ashtondoane marked this conversation as resolved.
Show resolved Hide resolved
os.renames(audio_file, output_path)
ashtondoane marked this conversation as resolved.
Show resolved Hide resolved
else:
print(f"Audio extraction unsuccessful for {path_to_video}")
Loading