kennethli319 commited on
Commit
f690a5a
·
1 Parent(s): 09e6eb0

update tts

Browse files
Files changed (1) hide show
  1. app.py +13 -4
app.py CHANGED
@@ -8,6 +8,12 @@ from nemo.collections.tts.models import FastPitchModel
8
  from nemo.collections.tts.models import HifiGanModel
9
  from nemo.collections.tts.models import MixerTTSModel
10
 
 
 
 
 
 
 
11
  # spec_generator_2 = MixerTTSModel.from_pretrained("tts_en_lj_mixerttsx")
12
  # model1 = HifiGanModel.from_pretrained(model_name="tts_en_lj_hifigan_ft_mixerttsx")
13
 
@@ -16,20 +22,23 @@ spec_generator.eval()
16
  voc_model = HifiGanModel.from_pretrained(model_name="tts_en_hifitts_hifigan_ft_fastpitch")
17
  voc_model.eval()
18
 
 
 
19
  def greet(name):
20
  return "Hello " + name + "!!"
21
 
22
  def generate_tts(text: str, speaker: int = 0):
23
  sr = 44100
24
- parsed = spec_generator.parse(text)
25
- spectrogram = spec_generator.generate_spectrogram(tokens=parsed, speaker=speaker)
26
- audio = voc_model.convert_spectrogram_to_audio(spec=spectrogram)
 
27
 
28
  # with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
29
  # torchaudio.save(fp.name, audio.to('cpu'), sample_rate=sr)
30
 
31
  # return fp.name
32
- return (sr, audio.to('cpu').detach().numpy())
33
 
34
  def run():
35
  demo = gr.Interface(
 
8
  from nemo.collections.tts.models import HifiGanModel
9
  from nemo.collections.tts.models import MixerTTSModel
10
 
11
+ from transformers import pipeline
12
+
13
+
14
+ Audio(output["audio"], rate=output["sampling_rate"])
15
+
16
+
17
  # spec_generator_2 = MixerTTSModel.from_pretrained("tts_en_lj_mixerttsx")
18
  # model1 = HifiGanModel.from_pretrained(model_name="tts_en_lj_hifigan_ft_mixerttsx")
19
 
 
22
  voc_model = HifiGanModel.from_pretrained(model_name="tts_en_hifitts_hifigan_ft_fastpitch")
23
  voc_model.eval()
24
 
25
+ pipe = pipeline("text-to-speech", model="suno/bark-small")
26
+
27
  def greet(name):
28
  return "Hello " + name + "!!"
29
 
30
  def generate_tts(text: str, speaker: int = 0):
31
  sr = 44100
32
+ # parsed = spec_generator.parse(text)
33
+ # spectrogram = spec_generator.generate_spectrogram(tokens=parsed, speaker=speaker)
34
+ # audio = voc_model.convert_spectrogram_to_audio(spec=spectrogram)
35
+ output = pipe(text)
36
 
37
  # with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
38
  # torchaudio.save(fp.name, audio.to('cpu'), sample_rate=sr)
39
 
40
  # return fp.name
41
+ return (output["sampling_rate"], output["audio"])
42
 
43
  def run():
44
  demo = gr.Interface(