File size: 2,422 Bytes
4cc54d7
 
 
 
1f7b6ee
4cc54d7
2f40a85
4cc54d7
d0a9fdf
 
 
 
 
d6e9825
1f7b6ee
d0a9fdf
 
 
4cc54d7
1dc0f0f
f2c6f1c
4cc54d7
0228cca
f2c6f1c
7ccd379
6c001bc
0228cca
c679ed5
4cc54d7
 
05629f7
7ccd379
4cc54d7
05629f7
4cc54d7
7ccd379
f2c6f1c
05629f7
 
 
7ccd379
 
 
 
 
4cc54d7
05629f7
3754c59
7ccd379
f2c6f1c
4cc54d7
f2c6f1c
46972b6
2f40a85
4cc54d7
2f40a85
4cc54d7
7ccd379
4cc54d7
 
05629f7
 
d0a9fdf
 
 
05629f7
f2c6f1c
05629f7
4cc54d7
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import gradio as gr
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation
from matplotlib.colors import to_hex
import librosa
import tempfile

def check_rgba(string:str):
    if '#' in string:
        return string
    else:
        clean_str_list = string.split("(")[1].split(")")[0].split(",")
        clean_list = [float(value.strip()) / 255 if idx != 3 else float(value.strip()) / 1 for idx, value in enumerate(clean_str_list)]
        hex = to_hex(clean_list, keep_alpha=True)
        return hex

def extract_waveform_animation(audio_file, window_seconds, waveform_color, background_color):
    y, sr = librosa.load(audio_file, sr=None)
    duration = librosa.get_duration(y=y, sr=sr)
    FPS = 1
    fig, ax = plt.subplots()
    line, = ax.plot([], [], lw=2, color=check_rgba(waveform_color))
    window_length = int(window_seconds * sr)
    x_vals = np.linspace(0, duration, num=len(y))
    ax.set_axis_off()
    bg_color = check_rgba(background_color)
    fig.set_facecolor(bg_color)
    
    def init():
        ax.set_xlim(0, window_seconds)
        ax.set_ylim(np.min(y), np.max(y))  # Reduced max for visibility
        return line,

    def update(frame):
        # Get current window
        start = frame * sr
        end = start + window_length
        window = y[start:end]
        
        # Update x and y limits
        ax.set_xlim(frame, frame + window_seconds)
        
        # Update line data
        line.set_data(x_vals[start:end], window)
        return line,

    total_frames = int(duration) * FPS
    ani = FuncAnimation(fig, update, frames=range(total_frames), 
                       init_func=init, interval=window_seconds, blit=False)
    
    with tempfile.NamedTemporaryFile(delete=False, suffix='.gif') as tmpfile:
        ani.save(tmpfile.name, writer='ffmpeg', fps=FPS)
        video_path = tmpfile.name
    
    return video_path

# Modified interface with window controls
iface = gr.Interface(
    fn=extract_waveform_animation,
    inputs=[
        gr.Audio(type="filepath"),
        gr.Slider(1, 10, value=5, step=1, label="Window Size (seconds)"),
        gr.ColorPicker(label="Waveform color",  value="#0021e4"),
        gr.ColorPicker(label="Background color",  value="#00FFFF00")
    ],
    outputs=gr.Image(),
    description="Scroll through audio waveform with a moving window."
)

if __name__ == "__main__":
    iface.launch()