kennethli319 commited on
Commit
2230bd8
·
1 Parent(s): 7f77a52

update tts

Browse files
Files changed (1) hide show
  1. app.py +11 -3
app.py CHANGED
@@ -1,5 +1,8 @@
1
  import gradio as gr
2
  import torch
 
 
 
3
  import numpy as np
4
  from nemo.collections.tts.models import FastPitchModel
5
  from nemo.collections.tts.models import HifiGanModel
@@ -21,16 +24,21 @@ def generate_tts(text: str, speaker: int = 0):
21
  parsed = spec_generator.parse(text)
22
  spectrogram = spec_generator.generate_spectrogram(tokens=parsed, speaker=speaker)
23
  audio = voc_model.convert_spectrogram_to_audio(spec=spectrogram)
24
- return sr, audio.to('cpu').detach().numpy()
 
 
 
 
 
25
 
26
  def run():
27
  demo = gr.Interface(
28
  fn=generate_tts,
29
  inputs=[gr.Textbox(value="This is a test.", label="Text to Synthesize"),
30
  gr.Slider(0, 10, step=1, label="Speaker")],
31
- outputs="audio",
32
  )
33
-
34
  demo.launch(server_name="0.0.0.0", server_port=7860)
35
 
36
 
 
1
  import gradio as gr
2
  import torch
3
+ import torchaudio
4
+ import tempfile
5
+
6
  import numpy as np
7
  from nemo.collections.tts.models import FastPitchModel
8
  from nemo.collections.tts.models import HifiGanModel
 
24
  parsed = spec_generator.parse(text)
25
  spectrogram = spec_generator.generate_spectrogram(tokens=parsed, speaker=speaker)
26
  audio = voc_model.convert_spectrogram_to_audio(spec=spectrogram)
27
+
28
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
29
+ torchaudio.save(fp.name, audio.to('cpu'), sample_rate=sr)
30
+
31
+ return fp.name
32
+ #return (sr, audio.to('cpu').detach().numpy())
33
 
34
  def run():
35
  demo = gr.Interface(
36
  fn=generate_tts,
37
  inputs=[gr.Textbox(value="This is a test.", label="Text to Synthesize"),
38
  gr.Slider(0, 10, step=1, label="Speaker")],
39
+ outputs=gr.outputs.Audio(label="Output"),
40
  )
41
+
42
  demo.launch(server_name="0.0.0.0", server_port=7860)
43
 
44