Spaces:
Paused
Paused
"""Audio utilities for CSM-1B API.""" | |
import io | |
import tempfile | |
from typing import Optional | |
import os | |
import torch | |
import torchaudio | |
import ffmpeg | |
def convert_audio_format( | |
audio_tensor: torch.Tensor, | |
sample_rate: int, | |
format: str = "mp3", | |
bit_rate: Optional[str] = "128k", | |
) -> bytes: | |
"""Convert audio tensor to specified format. | |
Args: | |
audio_tensor: Audio tensor (channels, samples) | |
sample_rate: Sample rate | |
format: Output format (mp3, opus, aac, flac, wav) | |
bit_rate: Bit rate for lossy formats | |
Returns: | |
Audio bytes in specified format | |
""" | |
# Create temporary files | |
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_wav: | |
wav_path = temp_wav.name | |
temp_out = tempfile.NamedTemporaryFile(suffix=f".{format}", delete=False) | |
out_path = temp_out.name | |
temp_out.close() | |
try: | |
# Save as WAV first (native format for torchaudio) | |
torchaudio.save(wav_path, audio_tensor.unsqueeze(0) if audio_tensor.dim() == 1 else audio_tensor, | |
sample_rate) | |
# Convert to desired format using ffmpeg | |
if format == "mp3": | |
ffmpeg.input(wav_path).output(out_path, format=format, audio_bitrate=bit_rate).run(quiet=True) | |
elif format in ["opus", "aac"]: | |
ffmpeg.input(wav_path).output(out_path, format=format).run(quiet=True) | |
elif format == "flac": | |
ffmpeg.input(wav_path).output(out_path, format=format).run(quiet=True) | |
elif format == "wav": | |
# Already saved as WAV | |
pass | |
# Read the output file | |
with open(out_path if format != "wav" else wav_path, "rb") as f: | |
audio_bytes = f.read() | |
return audio_bytes | |
finally: | |
# Clean up temporary files | |
for path in [wav_path, out_path]: | |
if os.path.exists(path): | |
os.unlink(path) | |