Skip to content

Commit

Permalink
Replace torchaudio with soundfile in python-api-examples (#765)
Browse files Browse the repository at this point in the history
  • Loading branch information
gtf35 authored Apr 13, 2024
1 parent 983df28 commit b0265b2
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@

import numpy as np
import sherpa_onnx
import torchaudio
import soundfile as sf

try:
import sounddevice as sd
Expand Down Expand Up @@ -357,8 +357,14 @@ def load_speaker_file(args) -> Dict[str, List[str]]:


def load_audio(filename: str) -> Tuple[np.ndarray, int]:
samples, sample_rate = torchaudio.load(filename)
return samples[0].contiguous().numpy(), sample_rate
data, sample_rate = sf.read(
filename,
always_2d=True,
dtype="float32",
)
data = data[:, 0] # use only the first channel
samples = np.ascontiguousarray(data)
return samples, sample_rate


def compute_speaker_embedding(
Expand Down
12 changes: 9 additions & 3 deletions python-api-examples/speaker-identification-with-vad.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@

import numpy as np
import sherpa_onnx
import torchaudio
import soundfile as sf

try:
import sounddevice as sd
Expand Down Expand Up @@ -160,8 +160,14 @@ def load_speaker_file(args) -> Dict[str, List[str]]:


def load_audio(filename: str) -> Tuple[np.ndarray, int]:
samples, sample_rate = torchaudio.load(filename)
return samples[0].contiguous().numpy(), sample_rate
data, sample_rate = sf.read(
filename,
always_2d=True,
dtype="float32",
)
data = data[:, 0] # use only the first channel
samples = np.ascontiguousarray(data)
return samples, sample_rate


def compute_speaker_embedding(
Expand Down
12 changes: 9 additions & 3 deletions python-api-examples/speaker-identification.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@

import numpy as np
import sherpa_onnx
import torchaudio
import soundfile as sf

try:
import sounddevice as sd
Expand Down Expand Up @@ -145,8 +145,14 @@ def load_speaker_file(args) -> Dict[str, List[str]]:


def load_audio(filename: str) -> Tuple[np.ndarray, int]:
samples, sample_rate = torchaudio.load(filename)
return samples[0].contiguous().numpy(), sample_rate
data, sample_rate = sf.read(
filename,
always_2d=True,
dtype="float32",
)
data = data[:, 0] # use only the first channel
samples = np.ascontiguousarray(data)
return samples, sample_rate


def compute_speaker_embedding(
Expand Down

0 comments on commit b0265b2

Please sign in to comment.