diff --git a/training/deepspeech_training/util/signal_augmentations.py b/training/deepspeech_training/util/signal_augmentations.py index f9156c39ac..d69622d435 100644 --- a/training/deepspeech_training/util/signal_augmentations.py +++ b/training/deepspeech_training/util/signal_augmentations.py @@ -141,7 +141,8 @@ def apply(self, sample, clock): n_gaps = pick_value_from_range(self.n_gaps, clock=clock) for _ in range(n_gaps): size = pick_value_from_range(self.size, clock=clock) - offset = max(0, random.randint(0, len(audio) - size - 1)) + size = min(size, len(audio) // 10) # a gap should never exceed 10 percent of the audio + offset = random.randint(0, max(0, len(audio) - size - 1)) audio[offset:offset + size] = 0 sample.audio = audio