From 5bbe75f1e5f00659fcc2abdd776b9b35e4661617 Mon Sep 17 00:00:00 2001
From: Hasan Mehdi <hmehdi@endosoft.com>
Date: Thu, 30 May 2024 11:33:20 -0400
Subject: [PATCH 1/6] Updated readme embed-extraction pipeline

---
 README.md | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)
diff --git a/README.md b/README.md
index f9a89dbc..b248604f 100644
--- a/README.md
+++ b/README.md
@@ -284,19 +284,22 @@ Obtain overlap-aware speaker embeddings from a microphone stream:
 ```python
 import rx.operators as ops
 import diart.operators as dops
-from diart.sources import MicrophoneAudioSource
+from diart.sources import MicrophoneAudioSource #, FileAudioSource
 from diart.blocks import SpeakerSegmentation, OverlapAwareSpeakerEmbedding
 
-segmentation = SpeakerSegmentation.from_pretrained("pyannote/segmentation")
-embedding = OverlapAwareSpeakerEmbedding.from_pretrained("pyannote/embedding")
+segmentation = SpeakerSegmentation.from_pretrained("pyannote/segmentation", use_hf_token="")
+embedding = OverlapAwareSpeakerEmbedding.from_pretrained("pyannote/embedding", use_hf_token="")
+
 mic = MicrophoneAudioSource()
+# To take input from file:
+# mic = FileAudioSource("<filename>", sample_rate=16000)
 
 stream = mic.stream.pipe(
     # Reformat stream to 5s duration and 500ms shift
     dops.rearrange_audio_stream(sample_rate=segmentation.model.sample_rate),
     ops.map(lambda wav: (wav, segmentation(wav))),
     ops.starmap(embedding)
-).subscribe(on_next=lambda emb: print(emb.shape))
+).subscribe(on_next=lambda emb: print(emb)) #emb.shape to display shape
 
 mic.read()
 ```
@@ -304,10 +307,13 @@ mic.read()
 Output:
 
 ```
-# Shape is (batch_size, num_speakers, embedding_dim)
-torch.Size([1, 3, 512])
-torch.Size([1, 3, 512])
-torch.Size([1, 3, 512])
+# Displaying embeds: 
+tensor([[[-0.0442, -0.0327, -0.0910,  ...,  0.0134,  0.0209,  0.0050],
+         [-0.0404, -0.0342, -0.0780,  ...,  0.0395,  0.0334, -0.0140],
+         [-0.0404, -0.0342, -0.0780,  ...,  0.0395,  0.0334, -0.0140]]])
+tensor([[[-0.0724,  0.0049, -0.0660,  ...,  0.0359,  0.0247, -0.0256],
+         [-0.0462, -0.0256, -0.0642,  ...,  0.0417,  0.0273, -0.0135],
+         [-0.0459, -0.0263, -0.0639,  ...,  0.0412,  0.0269, -0.0131]]])
 ...
 ```
 

From 9e4388997853c5211c440c66fd2bf81b122c4cc9 Mon Sep 17 00:00:00 2001
From: Hasan Mehdi <hmehdi@endosoft.com>
Date: Thu, 30 May 2024 11:37:57 -0400
Subject: [PATCH 2/6] Updated readme embed-extraction pipeline

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index b248604f..25138bea 100644
--- a/README.md
+++ b/README.md
@@ -296,7 +296,7 @@ mic = MicrophoneAudioSource()
 
 stream = mic.stream.pipe(
     # Reformat stream to 5s duration and 500ms shift
-    dops.rearrange_audio_stream(sample_rate=segmentation.model.sample_rate),
+    dops.rearrange_audio_stream(sample_rate=16000),
     ops.map(lambda wav: (wav, segmentation(wav))),
     ops.starmap(embedding)
 ).subscribe(on_next=lambda emb: print(emb)) #emb.shape to display shape

From 629eb5effb6e2ed7d0ed9c24cb1e84abf0cef60c Mon Sep 17 00:00:00 2001
From: Juan Coria <juanmc2005@hotmail.com>
Date: Fri, 28 Jun 2024 23:49:37 +0200
Subject: [PATCH 3/6] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 25138bea..58e0368c 100644
--- a/README.md
+++ b/README.md
@@ -284,7 +284,7 @@ Obtain overlap-aware speaker embeddings from a microphone stream:
 ```python
 import rx.operators as ops
 import diart.operators as dops
-from diart.sources import MicrophoneAudioSource #, FileAudioSource
+from diart.sources import MicrophoneAudioSource, FileAudioSource
 from diart.blocks import SpeakerSegmentation, OverlapAwareSpeakerEmbedding
 
 segmentation = SpeakerSegmentation.from_pretrained("pyannote/segmentation", use_hf_token="")

From 6b79055543f866e9b3a3c7df597565935c2ae59b Mon Sep 17 00:00:00 2001
From: Juan Coria <juanmc2005@hotmail.com>
Date: Fri, 28 Jun 2024 23:50:29 +0200
Subject: [PATCH 4/6] Apply suggestions from code review

---
 README.md | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/README.md b/README.md
index 58e0368c..3a3f3f11 100644
--- a/README.md
+++ b/README.md
@@ -287,33 +287,30 @@ import diart.operators as dops
 from diart.sources import MicrophoneAudioSource, FileAudioSource
 from diart.blocks import SpeakerSegmentation, OverlapAwareSpeakerEmbedding
 
-segmentation = SpeakerSegmentation.from_pretrained("pyannote/segmentation", use_hf_token="")
-embedding = OverlapAwareSpeakerEmbedding.from_pretrained("pyannote/embedding", use_hf_token="")
+segmentation = SpeakerSegmentation.from_pretrained("pyannote/segmentation")
+embedding = OverlapAwareSpeakerEmbedding.from_pretrained("pyannote/embedding")
 
-mic = MicrophoneAudioSource()
+source = MicrophoneAudioSource()
 # To take input from file:
-# mic = FileAudioSource("<filename>", sample_rate=16000)
+# source = FileAudioSource("<filename>", sample_rate=16000)
 
 stream = mic.stream.pipe(
     # Reformat stream to 5s duration and 500ms shift
-    dops.rearrange_audio_stream(sample_rate=16000),
+    dops.rearrange_audio_stream(sample_rate=source.sample_rate),
     ops.map(lambda wav: (wav, segmentation(wav))),
     ops.starmap(embedding)
-).subscribe(on_next=lambda emb: print(emb)) #emb.shape to display shape
+).subscribe(on_next=lambda emb: print(emb.shape))
 
-mic.read()
+source.read()
 ```
 
 Output:
 
 ```
-# Displaying embeds: 
-tensor([[[-0.0442, -0.0327, -0.0910,  ...,  0.0134,  0.0209,  0.0050],
-         [-0.0404, -0.0342, -0.0780,  ...,  0.0395,  0.0334, -0.0140],
-         [-0.0404, -0.0342, -0.0780,  ...,  0.0395,  0.0334, -0.0140]]])
-tensor([[[-0.0724,  0.0049, -0.0660,  ...,  0.0359,  0.0247, -0.0256],
-         [-0.0462, -0.0256, -0.0642,  ...,  0.0417,  0.0273, -0.0135],
-         [-0.0459, -0.0263, -0.0639,  ...,  0.0412,  0.0269, -0.0131]]])
+# Shape is (batch_size, num_speakers, embedding_dim)
+torch.Size([1, 3, 512])
+torch.Size([1, 3, 512])
+torch.Size([1, 3, 512])
 ...
 ```
 

From 7478e872026a18991582cb710adc8ff2843f2269 Mon Sep 17 00:00:00 2001
From: Juan Coria <juanmc2005@hotmail.com>
Date: Fri, 28 Jun 2024 23:53:05 +0200
Subject: [PATCH 5/6] Update README.md

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index 3a3f3f11..f55f670a 100644
--- a/README.md
+++ b/README.md
@@ -294,6 +294,9 @@ source = MicrophoneAudioSource()
 # To take input from file:
 # source = FileAudioSource("<filename>", sample_rate=16000)
 
+# Make sure the model has been trained with the same sample rate
+print(source.sample_rate)
+
 stream = mic.stream.pipe(
     # Reformat stream to 5s duration and 500ms shift
     dops.rearrange_audio_stream(sample_rate=source.sample_rate),

From 36e1a52e2823aac76826989d7ce8b98ffbb8938c Mon Sep 17 00:00:00 2001
From: Juan Coria <juanmc2005@hotmail.com>
Date: Fri, 28 Jun 2024 23:53:59 +0200
Subject: [PATCH 6/6] Update README.md

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index f55f670a..42bf3da3 100644
--- a/README.md
+++ b/README.md
@@ -294,7 +294,7 @@ source = MicrophoneAudioSource()
 # To take input from file:
 # source = FileAudioSource("<filename>", sample_rate=16000)
 
-# Make sure the model has been trained with the same sample rate
+# Make sure the models have been trained with this sample rate
 print(source.sample_rate)
 
 stream = mic.stream.pipe(