import os
from speechbrain.inference.separation import SepformerSeparation as separator
import torchaudio
import gradio as gr
from moviepy.editor import VideoFileClip

def convert_video_to_audio(video_input):
    video_clip = VideoFileClip(video_input)
    audio_clip = video_clip.audio
    audio_clip_filepath = os.path.normpath(f"{video_input.split('.')[0]}.m4a")
    audio_clip.write_audiofile(audio_clip_filepath, codec='aac')
    audio_clip.close()
    video_clip.close()
    return audio_clip_filepath

def speechbrain(input_obj, input_obj_type):
    model = separator.from_hparams(source="speechbrain/sepformer-whamr-enhancement", savedir='pretrained_models/sepformer-whamr-enhancement')
    if input_obj_type == "video":
        aud = convert_video_to_audio(input_obj)
    else:
        aud = input_obj
    est_sources = model.separate_file(path=aud)
    torchaudio.save("clean_audio_file.wav", est_sources[:, :, 0].detach().cpu(), 8000)
    return "clean_audio_file.wav"

def main():
    with gr.Blocks(title="Speech Enhancement", delete_cache=(86400, 86400), theme=gr.themes.Ocean()) as demo:
        gr.Markdown("Gradio demo for Speech Enhancement by SpeechBrain. To use it, simply upload your audio, or click one of the examples to load them. Read more at the links below.")
        gr.Markdown("<p style='text-align: center'><a href='https://arxiv.org/abs/2010.13154' target='_blank'>Attention is All You Need in Speech Separation</a> | <a href='https://github.com/speechbrain/speechbrain/tree/develop/templates/enhancement' '_blank'>Github Repo</a></p>")
        # examples = [
        #     ['samples_audio_samples_test_mixture.wav']
        # ]
        with gr.Tabs(selected="video") as tabs:
            with gr.Tab("Video", id="video"):
                gr.Interface(
                  fn=speechbrain,
                  inputs= [
                      gr.Video(),
                      gr.Radio(choices=["video"], value="video", label="File Type")
                  ],
                  outputs= [
                    gr.Audio(label="Output Audio", type="filepath")
                  ]
                )
            with gr.Tab("Audio", id="audio"):
                gr.Interface(
                  fn=speechbrain,
                  inputs=[
                      gr.Audio(type="filepath"),
                      gr.Radio(choices=["audio"], value="audio", label="File Type")
                  ],
                  outputs=[
                    gr.Audio(label="Output Audio", type="filepath")
                  ]
                )
        demo.launch()

if __name__ == '__main__':
    main()