File size: 2,606 Bytes
bd4c34d e1aa7a1 2dd8e14 bd4c34d 2dd8e14 bd4c34d 2dd8e14 bd4c34d 78726b4 bd4c34d 78726b4 bd4c34d 2dd8e14 bd4c34d 3be2192 d6a6af9 bd4c34d 0a3d769 73784e2 bd4c34d 0a3d769 bd4c34d d6a6af9 bd4c34d 0a3d769 bd4c34d 11e2872 d6a6af9 bd4c34d d6a6af9 2dd8e14 bd4c34d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import os
from speechbrain.inference.separation import SepformerSeparation as separator
import torchaudio
import gradio as gr
from moviepy.editor import VideoFileClip
def convert_video_to_audio(video_input):
video_clip = VideoFileClip(video_input)
audio_clip = video_clip.audio
audio_clip_filepath = os.path.normpath(f"{video_input.split('.')[0]}.m4a")
audio_clip.write_audiofile(audio_clip_filepath, codec='aac')
audio_clip.close()
video_clip.close()
return audio_clip_filepath
def speechbrain(input_obj, input_obj_type):
model = separator.from_hparams(source="speechbrain/sepformer-whamr-enhancement", savedir='pretrained_models/sepformer-whamr-enhancement')
if input_obj_type == "video":
aud = convert_video_to_audio(input_obj)
else:
aud = input_obj
est_sources = model.separate_file(path=aud)
torchaudio.save("clean_audio_file.wav", est_sources[:, :, 0].detach().cpu(), 8000)
return "clean_audio_file.wav"
def main():
with gr.Blocks(title="Speech Enhancement", delete_cache=(86400, 86400), theme=gr.themes.Ocean()) as demo:
gr.Markdown("Gradio demo for Speech Enhancement by SpeechBrain. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below.")
gr.Markdown("<p style='text-align: center'><a href='https://arxiv.org/abs/2010.13154' target='_blank'>Attention is All You Need in Speech Separation</a> | <a href='https://github.com/speechbrain/speechbrain/tree/develop/templates/enhancement' '_blank'>Github Repo</a></p>")
# examples = [
# ['samples_audio_samples_test_mixture.wav']
# ]
with gr.Tabs(selected="video") as tabs:
with gr.Tab("Video", id="video"):
gr.Interface(
fn=speechbrain,
inputs= [
gr.Video(),
gr.Radio(choices=["video"], value="video", label="File Type")
],
outputs= [
gr.Audio(label="Output Audio", type="filepath")
]
)
with gr.Tab("Audio", id="audio"):
gr.Interface(
fn=speechbrain,
inputs=[
gr.Audio(type="filepath"),
gr.Radio(choices=["audio"], value="audio", label="File Type")
],
outputs=[
gr.Audio(label="Output Audio", type="filepath")
]
)
demo.launch()
if __name__ == '__main__':
main() |