Spaces:
Running
on
Zero
Running
on
Zero
积极的屁孩
commited on
Commit
·
defde46
1
Parent(s):
a8377f8
trying to fix vevo style
Browse files
app.py
CHANGED
@@ -385,17 +385,25 @@ def vevo_style(content_wav, style_wav):
|
|
385 |
else:
|
386 |
raise ValueError("Invalid content audio format")
|
387 |
|
388 |
-
if isinstance(style_wav,
|
389 |
-
|
390 |
-
if isinstance(style_wav[0], np.ndarray):
|
391 |
-
style_data, style_sr = style_wav
|
392 |
-
else:
|
393 |
-
style_sr, style_data = style_wav
|
394 |
-
style_tensor = torch.FloatTensor(style_data)
|
395 |
-
if style_tensor.ndim == 1:
|
396 |
-
style_tensor = style_tensor.unsqueeze(0) # 添加通道维度
|
397 |
else:
|
398 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
399 |
|
400 |
# 打印debug信息
|
401 |
print(f"Content audio shape: {content_tensor.shape}, sample rate: {content_sr}")
|
|
|
385 |
else:
|
386 |
raise ValueError("Invalid content audio format")
|
387 |
|
388 |
+
if isinstance(style_wav[0], np.ndarray):
|
389 |
+
style_data, style_sr = style_wav
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
390 |
else:
|
391 |
+
style_sr, style_data = style_wav
|
392 |
+
|
393 |
+
# 确保是单声道
|
394 |
+
if len(style_data.shape) > 1 and style_data.shape[1] > 1:
|
395 |
+
style_data = np.mean(style_data, axis=1)
|
396 |
+
|
397 |
+
# 重采样到24kHz
|
398 |
+
if style_sr != 24000:
|
399 |
+
style_tensor = torch.FloatTensor(style_data).unsqueeze(0)
|
400 |
+
style_tensor = torchaudio.functional.resample(style_tensor, style_sr, 24000)
|
401 |
+
style_sr = 24000
|
402 |
+
else:
|
403 |
+
style_tensor = torch.FloatTensor(style_data).unsqueeze(0)
|
404 |
+
|
405 |
+
# 归一化音量
|
406 |
+
style_tensor = style_tensor / (torch.max(torch.abs(style_tensor)) + 1e-6) * 0.95
|
407 |
|
408 |
# 打印debug信息
|
409 |
print(f"Content audio shape: {content_tensor.shape}, sample rate: {content_sr}")
|