speech-enhancement

Running

App Files Files Community

speech-enhancement / app.py

marquesafonso

update theme

3be2192 verified about 1 month ago

raw

history blame contribute delete

2.61 kB

	import os
	from speechbrain.inference.separation import SepformerSeparation as separator
	import torchaudio
	import gradio as gr
	from moviepy.editor import VideoFileClip

	def convert_video_to_audio(video_input):
	video_clip = VideoFileClip(video_input)
	audio_clip = video_clip.audio
	audio_clip_filepath = os.path.normpath(f"{video_input.split('.')[0]}.m4a")
	audio_clip.write_audiofile(audio_clip_filepath, codec='aac')
	audio_clip.close()
	video_clip.close()
	return audio_clip_filepath

	def speechbrain(input_obj, input_obj_type):
	model = separator.from_hparams(source="speechbrain/sepformer-whamr-enhancement", savedir='pretrained_models/sepformer-whamr-enhancement')
	if input_obj_type == "video":
	aud = convert_video_to_audio(input_obj)
	else:
	aud = input_obj
	est_sources = model.separate_file(path=aud)
	torchaudio.save("clean_audio_file.wav", est_sources[:, :, 0].detach().cpu(), 8000)
	return "clean_audio_file.wav"

	def main():
	with gr.Blocks(title="Speech Enhancement", delete_cache=(86400, 86400), theme=gr.themes.Ocean()) as demo:
	gr.Markdown("Gradio demo for Speech Enhancement by SpeechBrain. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below.")
	gr.Markdown("<p style='text-align: center'><a href='https://arxiv.org/abs/2010.13154' target='_blank'>Attention is All You Need in Speech Separation</a> \| <a href='https://github.com/speechbrain/speechbrain/tree/develop/templates/enhancement' '_blank'>Github Repo</a></p>")
	# examples = [
	# ['samples_audio_samples_test_mixture.wav']
	# ]
	with gr.Tabs(selected="video") as tabs:
	with gr.Tab("Video", id="video"):
	gr.Interface(
	fn=speechbrain,
	inputs= [
	gr.Video(),
	gr.Radio(choices=["video"], value="video", label="File Type")
	],
	outputs= [
	gr.Audio(label="Output Audio", type="filepath")
	]
	)
	with gr.Tab("Audio", id="audio"):
	gr.Interface(
	fn=speechbrain,
	inputs=[
	gr.Audio(type="filepath"),
	gr.Radio(choices=["audio"], value="audio", label="File Type")
	],
	outputs=[
	gr.Audio(label="Output Audio", type="filepath")
	]
	)
	demo.launch()

	if __name__ == '__main__':
	main()