Skip to content

Commit

Permalink
Overlap Speech Detection - Manoj
Browse files Browse the repository at this point in the history
  • Loading branch information
Manoj-2702 committed Nov 24, 2023
1 parent 126ff63 commit 0a5fc49
Show file tree
Hide file tree
Showing 7 changed files with 145 additions and 0 deletions.
1 change: 1 addition & 0 deletions OverlappedSpeech/.env
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
HUGGING_FACE_TOKEN="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"
File renamed without changes.
62 changes: 62 additions & 0 deletions OverlappedSpeech/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# Voice Activity Detection with Overlap Detection

This project uses the WebRTC VAD module to perform Voice Activity Detection (VAD) on an audio file. It also applies noise reduction to the audio file before performing VAD.

## Dependencies

- Python 3
- numpy
- librosa
- matplotlib
- webrtcvad
- soundfile
- noisereduce

You can install these dependencies using pip:

```bash
pip install numpy librosa matplotlib webrtcvad soundfile noisereduce
```

or

```bash
pip install -r requirements.txt
```

## Usage

1. Clone the repo

```bash
git clone https://github.com/Manoj-2702/FacialAnalysis-IISc.git
```

2. Navigate to the directory

```bash
cd FacialAnalysis-IISc/OverlappedSpeech
```

3. Run the script

```
python vad.py
```

```python
from vad import vad_with_overlap_detection

# Replace 'your_audio_file.wav' with the path to your audio file
audio_file_path = 'your_audio_file.wav'

# Run VAD with overlap detection and plot the graph
vad_with_overlap_detection(audio_file_path)

```

4. The output will be a graph showing the overlapping speech regions.

## Audio File

Sample audio file is in this repo. iisc_1.mp3
Binary file added OverlappedSpeech/iisc_1.mp3
Binary file not shown.
16 changes: 16 additions & 0 deletions OverlappedSpeech/overlap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# from pyannote.audio import Pipeline
from pyannote.audio.pipelines import OverlappedSpeechDetection
from dotenv import load_dotenv
import os
load_dotenv()

hugging_face_token=os.environ.get("HUGGING_FACE_TOKEN")
pipeline = OverlappedSpeechDetection.from_pretrained("pyannote/overlapped-speech-detection",use_auth_token=hugging_face_token)
audio_file_path = "IIsc_Voice_Sample.mp3"

output = pipeline({'uri': 'filename', 'audio': audio_file_path})

for speech in output['uri'].get_timeline().itertracks():
# Two or more speakers are active between speech.start and speech.end
print(f"Overlapped Speech Detected: {speech.start} - {speech.end}")

66 changes: 66 additions & 0 deletions OverlappedSpeech/vad.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import numpy as np
import librosa
import matplotlib.pyplot as plt
import webrtcvad
import soundfile as sf
import librosa.display
import noisereduce as nr

def vad_with_overlap_detection(audio_file):
"""
Performs Voice Activity Detection on an audio file.
Args:
audio_file (str): Path to the audio file.
Returns:
Plots the waveform and VAD result of the Audio File
"""
# Load the audio file
y, sr = librosa.load(audio_file, sr=None)
y = librosa.to_mono(y)
y = nr.reduce_noise(y=y, sr=sr)
# Ensure the audio is mono
if len(y.shape) > 1:
y = np.mean(y, axis=1)

# Resample to 16000 Hz
if sr != 16000:
y = librosa.resample(y, orig_sr=sr, target_sr=16000)
sr = 16000

# Create a VAD object
vad = webrtcvad.Vad()

# Set its aggressiveness mode
vad.set_mode(2)

# Convert the float audio data to int16
y = (y * 32767).astype(np.int16)

# Frame the audio data
frame_duration = 0.02 # 20 ms
frame_length = int(sr * frame_duration)
frames = [y[i:i+frame_length] for i in range(0, len(y), frame_length)]

# Apply VAD on each frame
is_speech = [vad.is_speech(frame.tobytes(), sr) for frame in frames]


# Plot the waveform and VAD result
plt.figure(figsize=(12, 6))
plt.subplot(2, 1, 1)
plt.plot(y)
plt.title('Waveform')
plt.subplot(2, 1, 2)
plt.plot(is_speech)
plt.title('VAD Result')
plt.tight_layout()
plt.show()


# Replace 'your_audio_file.wav' with the path to your audio file
audio_file_path = 'iisc_1.mp3'

# Run VAD with overlap detection and plot the graph
vad_with_overlap_detection(audio_file_path)
Binary file modified requirements.txt
Binary file not shown.

0 comments on commit 0a5fc49

Please sign in to comment.