jaywalnut310 · AK391 · Jun 14, 2021 · Jun 14, 2021 · Jun 14, 2021 · Jun 14, 2021
diff --git a/README.md b/README.md
@@ -10,6 +10,8 @@ Visit our [demo](https://jaywalnut310.github.io/vits-demo/index.html) for audio
 
 We also provide the [pretrained models](https://drive.google.com/drive/folders/1ksarh-cJf3F5eKJjLVWY0X1j1qsQqiS2?usp=sharing).
 
+** Update note: [Gradio Web Demo](https://gradio.app/hub/AK391/vits)
+
 ** Update note: Thanks to [Rishikesh (ऋषिकेश)](https://github.com/jaywalnut310/vits/issues/1), our interactive TTS demo is now available on [Colab Notebook](https://colab.research.google.com/drive/1CO61pZizDj7en71NQG_aqqKdGaA_SaBf?usp=sharing).
 
 <table style="width:100%">

diff --git a/gradiodemo.py b/gradiodemo.py
@@ -0,0 +1,93 @@
+import matplotlib.pyplot as plt
+
+import os
+import json
+import math
+import torch
+from torch import nn
+from torch.nn import functional as F
+from torch.utils.data import DataLoader
+
+import commons
+import utils
+from data_utils import TextAudioLoader, TextAudioCollate, TextAudioSpeakerLoader, TextAudioSpeakerCollate
+
+import sys
+from subprocess import call
+
+def run_cmd(command):
+    try:
+        print(command)
+        call(command, shell=True)
+    except KeyboardInterrupt:
+        print("Process interrupted")
+        sys.exit(1)
+
+current = os.getcwd()
+print(current)
+full = current + "/monotonic_align"
+print(full)
+os.chdir(full)
+print(os.getcwd())
+run_cmd("python3 setup.py build_ext --inplace")
+run_cmd("apt-get install espeak -y")
+os.chdir("..")
+print(os.getcwd())
+
+from models import SynthesizerTrn
+from text.symbols import symbols
+from text import text_to_sequence
+
+from scipy.io.wavfile import write
+import gradio as gr
+import scipy.io.wavfile
+import numpy as np
+import torchtext
+
+
+
+
+
+torchtext.utils.download_from_url("https://drive.google.com/uc?id=1q86w74Ygw2hNzYP9cWkeClGT5X25PvBT", root=".")
+
+
+def get_text(text, hps):
+    text_norm = text_to_sequence(text, hps.data.text_cleaners)
+    if hps.data.add_blank:
+        text_norm = commons.intersperse(text_norm, 0)
+    text_norm = torch.LongTensor(text_norm)
+    return text_norm
+
+hps = utils.get_hparams_from_file("./configs/ljs_base.json")
+net_g = SynthesizerTrn(
+    len(symbols),
+    hps.data.filter_length // 2 + 1,
+    hps.train.segment_size // hps.data.hop_length,
+    **hps.model)
+_ = net_g.eval()
+
+_ = utils.load_checkpoint("pretrained_ljs.pth", net_g, None)
+def inference(text):
+    stn_tst = get_text(text, hps)
+    with torch.no_grad():
+        x_tst = stn_tst.unsqueeze(0)
+        x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
+        audio = net_g.infer(x_tst, x_tst_lengths, noise_scale=.667, noise_scale_w=0.8, length_scale=1)[0][0,0].data.float().numpy()
+        scipy.io.wavfile.write("out.wav", hps.data.sampling_rate, audio)
+        return "./out.wav"
+
+
+inputs = gr.inputs.Textbox(lines=5, label="Input Text")
+outputs =  gr.outputs.File(label="Output Audio")
+
+
+title = "VITS"
+description = "demo for VITS: Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech. To use it, simply add your text, or click one of the examples to load them. Read more at the links below."
+article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2106.06103'>Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech</a> | <a href='https://github.com/jaywalnut310/vits'>Github Repo</a></p>"
+
+examples = [
+ ["We propose VITS, Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech."],
+ ["Our method adopts variational inference augmented with normalizing flows and an adversarial training process, which improves the expressive power of generative modeling."]   
+]
+
+gr.Interface(inference, inputs, outputs, title=title, description=description, article=article, examples=examples).launch()
diff --git a/requirements.txt b/requirements.txt
@@ -5,6 +5,8 @@ numpy==1.18.5
 phonemizer==2.2.1
 scipy==1.5.2
 tensorboard==2.3.0
-torch==1.6.0
-torchvision==0.7.0
+torch
+torchvision
 Unidecode==1.1.1
+gradio
+torchtext