积极的屁孩 commited on
Commit
defde46
·
1 Parent(s): a8377f8

trying to fix vevo style

Browse files
Files changed (1) hide show
  1. app.py +18 -10
app.py CHANGED
@@ -385,17 +385,25 @@ def vevo_style(content_wav, style_wav):
385
  else:
386
  raise ValueError("Invalid content audio format")
387
 
388
- if isinstance(style_wav, tuple) and len(style_wav) == 2:
389
- # 确保正确的顺序 (data, sample_rate)
390
- if isinstance(style_wav[0], np.ndarray):
391
- style_data, style_sr = style_wav
392
- else:
393
- style_sr, style_data = style_wav
394
- style_tensor = torch.FloatTensor(style_data)
395
- if style_tensor.ndim == 1:
396
- style_tensor = style_tensor.unsqueeze(0) # 添加通道维度
397
  else:
398
- raise ValueError("Invalid style audio format")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
399
 
400
  # 打印debug信息
401
  print(f"Content audio shape: {content_tensor.shape}, sample rate: {content_sr}")
 
385
  else:
386
  raise ValueError("Invalid content audio format")
387
 
388
+ if isinstance(style_wav[0], np.ndarray):
389
+ style_data, style_sr = style_wav
 
 
 
 
 
 
 
390
  else:
391
+ style_sr, style_data = style_wav
392
+
393
+ # 确保是单声道
394
+ if len(style_data.shape) > 1 and style_data.shape[1] > 1:
395
+ style_data = np.mean(style_data, axis=1)
396
+
397
+ # 重采样到24kHz
398
+ if style_sr != 24000:
399
+ style_tensor = torch.FloatTensor(style_data).unsqueeze(0)
400
+ style_tensor = torchaudio.functional.resample(style_tensor, style_sr, 24000)
401
+ style_sr = 24000
402
+ else:
403
+ style_tensor = torch.FloatTensor(style_data).unsqueeze(0)
404
+
405
+ # 归一化音量
406
+ style_tensor = style_tensor / (torch.max(torch.abs(style_tensor)) + 1e-6) * 0.95
407
 
408
  # 打印debug信息
409
  print(f"Content audio shape: {content_tensor.shape}, sample rate: {content_sr}")