Spaces:
Runtime error
Runtime error
File size: 3,025 Bytes
003b0ed ae71d4b 27a58ec ae71d4b 6e9b076 ae71d4b 6e9b076 ae71d4b 9483da5 ae71d4b b68b40e 2a963d1 9d22775 b68b40e ae71d4b 32fb746 ae71d4b 9483da5 2a963d1 27fd4f5 bbb7e65 9483da5 183bb88 2a963d1 ae71d4b bbb7e65 ae71d4b 183bb88 ae71d4b 183bb88 a1ec3a4 ae71d4b c335e76 b2dc9fc ae71d4b 9483da5 27fd4f5 caaf71e 867b7ff ae71d4b b2dc9fc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
import textwrap
import gradio as gr
import librosa
import numpy as np
import torch
import requests
from transformers import SpeechT5Processor, SpeechT5ForTextToSpeech, SpeechT5HifiGan
checkpoint = "microsoft/speecht5_tts"
processor = SpeechT5Processor.from_pretrained(checkpoint)
model = SpeechT5ForTextToSpeech.from_pretrained(checkpoint)
vocoder = SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan")
speaker_embeddings = {
"BDL": "spkemb/cmu_us_bdl_arctic-wav-arctic_a0009.npy",
"CLB": "spkemb/cmu_us_clb_arctic-wav-arctic_a0144.npy",
"KSP": "spkemb/cmu_us_ksp_arctic-wav-arctic_b0087.npy",
"RMS": "spkemb/cmu_us_rms_arctic-wav-arctic_b0353.npy",
"SLT": "spkemb/cmu_us_slt_arctic-wav-arctic_a0508.npy",
}
def getNews(search_key):
return requests.get ("https://newsapi.org/v2/everything?pagesize=3&apiKey=3bca07c913ec4703a23f6ba03e15b30b&q="+search_key).content.decode("utf-8")
def getHeadlines():
return requests.get ("https://newsapi.org/v2/top-headlines?country=us&apiKey=3bca07c913ec4703a23f6ba03e15b30b").content.decode("utf-8")
def predict(text, preset):
if len(text.strip()) == 0:
return (16000, np.zeros(0).astype(np.int16))
# text = getNews ()
# inputs = processor(text=text, return_tensors="pt")
inputs = processor(text=textwrap.shorten(getNews(text), width=250), return_tensors="pt")
# limit input length
input_ids = inputs["input_ids"]
input_ids = input_ids[..., :model.config.max_text_positions]
# cmu_us_awb_arctic-wav-arctic_a0002.npy
speaker_embedding = np.load('spkemb/cmu_us_bdl_arctic-wav-arctic_a0009.npy')
speaker_embedding = torch.tensor(speaker_embedding).unsqueeze(0)
speech = model.generate_speech(input_ids, speaker_embedding, vocoder=vocoder)
speech = (speech.numpy() * 32767).astype(np.int16)
return (16000, speech)
title = "Create 423: News to Speech"
description = """
Create 423: News to Speech
"""
article = """
<div style='margin:20px auto;'>
<p>References: <a href="https://arxiv.org/abs/2110.07205">SpeechT5 paper</a> |
<a href="https://github.com/microsoft/SpeechT5/">original GitHub</a> |
<a href="https://huggingface.co./mechanicalsea/speecht5-tts">original weights</a></p>
<p>Speaker embeddings were generated from <a href="http://www.festvox.org/cmu_arctic/">CMU ARCTIC</a> using <a href="https://huggingface.co./mechanicalsea/speecht5-vc/blob/main/manifest/utils/prep_cmu_arctic_spkemb.py">this script</a>.</p>
</div>
"""
examples = [
["example 1", "US"],
["example 2", "International"],
]
gr.Interface(
fn=predict,
inputs=[
gr.Text(label="Input Text"),
gr.Radio(label="Preset", choices=[
"US",
"International",
"Technology",
"KPop",
"Surprise Me!"
], value="KPop"),
],
outputs=[
gr.Audio(label="Generated Speech", type="numpy"),
],
title=title,
description=description,
article=article,
examples=examples,
).launch(share=False)
|