Spaces:
Runtime error
Runtime error
File size: 6,918 Bytes
d57e374 117486a d57e374 117486a 4cf73d6 075c9a6 4cf73d6 117486a 075c9a6 117486a 1834911 d57e374 117486a d57e374 117486a d57e374 075c9a6 117486a d57e374 075c9a6 d57e374 117486a d57e374 075c9a6 117486a d57e374 117486a d57e374 075c9a6 55f08a9 117486a d57e374 117486a 55f08a9 075c9a6 117486a 075c9a6 d57e374 075c9a6 117486a d57e374 075c9a6 117486a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 |
import os
import gc
import torch
import shutil
import atexit
import torchaudio
import numpy as np
import gradio as gr
from pipeline.morph_pipeline_successed_ver1 import AudioLDM2MorphPipeline
os.environ["CUDA_VISIBLE_DEVICES"] = "6"
# Initialize AudioLDM2 Pipeline
torch.cuda.set_device(0)
dtype = torch.float32
pipeline = AudioLDM2MorphPipeline.from_pretrained("cvssp/audioldm2-large", torch_dtype=dtype)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
pipeline.to(device)
def morph_audio(audio_file1, audio_file2, num_inference_steps, prompt1='', prompt2='', negative_prompt1="Low quality", negative_prompt2="Low quality"):
save_lora_dir = "output"
if os.path.exists(save_lora_dir):
shutil.rmtree(save_lora_dir)
os.makedirs(save_lora_dir, exist_ok=True)
# Load audio and compute duration
waveform1, sample_rate1 = torchaudio.load(audio_file1)
duration1 = waveform1.shape[1] / sample_rate1
waveform2, sample_rate2 = torchaudio.load(audio_file2)
duration2 = waveform2.shape[1] / sample_rate2
# Compare durations and take the shorter one
duration = int(min(duration1, duration2))
# Perform morphing using the pipeline
_ = pipeline(
dtype = dtype,
audio_file=audio_file1,
audio_file2=audio_file2,
audio_length_in_s=duration,
time_pooling=2,
freq_pooling=2,
prompt_1=prompt1,
prompt_2=prompt2,
negative_prompt_1=negative_prompt1,
negative_prompt_2=negative_prompt2,
save_lora_dir=save_lora_dir,
use_adain=True,
use_reschedule=False,
num_inference_steps=num_inference_steps,
lamd=0.6,
output_path=save_lora_dir,
num_frames=5,
fix_lora=None,
use_lora=True,
lora_steps=2,
noisy_latent_with_lora=True,
morphing_with_lora=True,
use_morph_prompt=True,
guidance_scale=7.5,
)
# Collect the output file paths
output_paths = sorted(
[os.path.join(save_lora_dir, file) for file in os.listdir(save_lora_dir) if file.endswith(".wav")],
key=lambda x: int(os.path.splitext(os.path.basename(x))[0])
)
del waveform1, waveform2, _
torch.cuda.empty_cache()
gc.collect()
return output_paths
def morph_audio_with_morphing_factor(audio_file1, audio_file2, alpha,num_inference_steps, prompt1='', prompt2='', negative_prompt1="Low quality", negative_prompt2="Low quality"):
save_lora_dir = "output"
if os.path.exists(save_lora_dir):
shutil.rmtree(save_lora_dir)
os.makedirs(save_lora_dir, exist_ok=True)
# Load audio and compute duration
waveform1, sample_rate1 = torchaudio.load(audio_file1)
duration1 = waveform1.shape[1] / sample_rate1
waveform2, sample_rate2 = torchaudio.load(audio_file2)
duration2 = waveform2.shape[1] / sample_rate2
# Compare durations and take the shorter one
duration = int(min(duration1, duration2))
try:
# Perform morphing using the pipeline
_ = pipeline(
dtype = dtype,
morphing_factor = alpha,
audio_file=audio_file1,
audio_file2=audio_file2,
audio_length_in_s=duration,
time_pooling=2,
freq_pooling=2,
prompt_1=prompt1,
prompt_2=prompt2,
negative_prompt_1=negative_prompt1,
negative_prompt_2=negative_prompt2,
save_lora_dir=save_lora_dir,
use_adain=True,
use_reschedule=False,
num_inference_steps=num_inference_steps,
lamd=0.6,
output_path=save_lora_dir,
num_frames=5,
fix_lora=None,
use_lora=True,
lora_steps=2,
noisy_latent_with_lora=True,
morphing_with_lora=True,
use_morph_prompt=True,
guidance_scale=7.5,
)
output_paths = os.path.join(save_lora_dir, 'interpolated.wav')
except RuntimeError as e:
if "CUDA out of memory" in str(e):
print("CUDA out of memory. Releasing unused memory...")
torch.cuda.empty_cache()
gc.collect()
raise e
# # Collect the output file paths
# del waveform1, waveform2, _
# torch.cuda.empty_cache()
# gc.collect()
return output_paths
def cleanup_output_dir():
save_lora_dir = "output"
if os.path.exists(save_lora_dir):
shutil.rmtree(save_lora_dir)
print(f"Cleaned up directory: {save_lora_dir}")
atexit.register(cleanup_output_dir)
# Gradio interface function
def interface(audio1, audio2, alpha, num_inference_steps):
output_paths = morph_audio_with_morphing_factor(audio1, audio2, alpha, num_inference_steps)
return output_paths
# Gradio Interface
# demo = gr.Interface(
# fn=interface,
# inputs=[
# gr.Audio(label="Upload Audio File 1", type="filepath"),
# gr.Audio(label="Upload Audio File 2", type="filepath"),
# gr.Slider(0, 1, step=0.01, label="Interpolation Alpha"),
# gr.Slider(10, 50, step=1, label="Inference Steps"),
# # gr.Textbox(label="Prompt for Audio File 1"),
# # gr.Textbox(label="Prompt for Audio File 2"),
# ],
# outputs=gr.Audio(label="Interpolated Audio")
# )
with gr.Blocks() as demo:
with gr.Tab("Sound Morphing with fixed frames."):
gr.Markdown("### Upload two audio files for morphing")
with gr.Row():
audio1 = gr.Audio(label="Upload Audio File 1", type="filepath")
audio2 = gr.Audio(label="Upload Audio File 2", type="filepath")
num_inference_steps = gr.Slider(10, 50, step=1, label="Inference Steps", value=50)
outputs = [
gr.Audio(label="Morphing audio 1"),
gr.Audio(label="Morphing audio 2"),
gr.Audio(label="Morphing audio 3"),
gr.Audio(label="Morphing audio 4"),
gr.Audio(label="Morphing audio 5"),
]
submit_btn1 = gr.Button("Submit")
submit_btn1.click(morph_audio, inputs=[audio1, audio2, num_inference_steps], outputs=outputs)
with gr.Tab("Sound Morphing with specified morphing factor."):
gr.Markdown("### Upload two audio files for morphing")
with gr.Row():
audio1 = gr.Audio(label="Upload Audio File 1", type="filepath")
audio2 = gr.Audio(label="Upload Audio File 2", type="filepath")
alpha = gr.Slider(0, 1, step=0.01, label="Interpolation Alpha")
num_inference_steps = gr.Slider(10, 50, step=1, label="Inference Steps", value=50)
outputs=gr.Audio(label="Interpolated Audio")
submit_btn2 = gr.Button("Submit")
submit_btn2.click(morph_audio_with_morphing_factor, inputs=[audio1, audio2, alpha, num_inference_steps], outputs=outputs)
if __name__ == "__main__":
demo.launch(share=True) |