ZeeAI1 commited on
Commit
f23e3b4
·
verified ·
1 Parent(s): c1b97f6

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +23 -0
  2. README.md +8 -7
  3. app.py +64 -0
  4. requirements.txt +8 -0
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ ENV PIP_NO_CACHE_DIR=true \
4
+ PYTHONDONTWRITEBYTECODE=1 \
5
+ PYTHONUNBUFFERED=1 \
6
+ NUMBA_DISABLE_CACHE=1
7
+
8
+ RUN apt-get update && apt-get install -y \
9
+ git ffmpeg libsndfile1 build-essential python3-dev libffi-dev wget curl \
10
+ && apt-get clean
11
+
12
+ WORKDIR /app
13
+
14
+ COPY app.py app.py
15
+ COPY requirements.txt requirements.txt
16
+ COPY README.md README.md
17
+
18
+ RUN pip install --upgrade pip setuptools wheel
19
+ RUN pip install numpy==1.24.3
20
+ RUN pip install git+https://github.com/openai/whisper.git
21
+ RUN pip install -r requirements.txt
22
+
23
+ CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
README.md CHANGED
@@ -1,12 +1,13 @@
1
  ---
2
- title: Voice0006
3
- emoji:
4
  colorFrom: gray
5
- colorTo: red
6
- sdk: streamlit
7
- sdk_version: 1.44.1
8
- app_file: app.py
9
  pinned: false
10
  ---
11
 
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
1
  ---
2
+ title: AI Voiceover App V2 (gTTS)
3
+ emoji: 🧠
4
  colorFrom: gray
5
+ colorTo: green
6
+ sdk: docker
 
 
7
  pinned: false
8
  ---
9
 
10
+ # 🎤 AI Voiceover V2 Replace One Speaker Only (with gTTS)
11
+
12
+ Upload a video, transcribe with Whisper, and replace one speaker's voice using Google TTS (gTTS). No Numba or Librosa issues!
13
+
app.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ os.environ["NUMBA_DISABLE_CACHE"] = "1"
3
+
4
+ import streamlit as st
5
+ import whisper
6
+ from gtts import gTTS
7
+ from moviepy.editor import VideoFileClip, AudioFileClip
8
+ from tempfile import NamedTemporaryFile
9
+ import torchaudio
10
+
11
+ st.set_page_config(page_title="AI Voiceover V2", layout="centered")
12
+ st.title("🎤 AI Voiceover V2: Replace One Speaker Only")
13
+
14
+ @st.cache_resource
15
+ def load_whisper_model():
16
+ return whisper.load_model("small")
17
+
18
+ whisper_model = load_whisper_model()
19
+
20
+ video_file = st.file_uploader("Upload a short video clip (MP4 preferred)", type=["mp4", "mov", "avi"])
21
+
22
+ if video_file:
23
+ with NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_video:
24
+ tmp_video.write(video_file.read())
25
+ tmp_video_path = tmp_video.name
26
+
27
+ st.video(tmp_video_path)
28
+
29
+ video = VideoFileClip(tmp_video_path)
30
+ audio_path = tmp_video_path.replace(".mp4", ".wav")
31
+ video.audio.write_audiofile(audio_path)
32
+
33
+ st.info("Transcribing using Whisper...")
34
+ result = whisper_model.transcribe(audio_path)
35
+ st.subheader("📝 Detected Speech")
36
+ st.write(result["text"])
37
+
38
+ custom_text = st.text_area("Enter your custom voiceover text to replace one speaker:", result["text"])
39
+
40
+ if st.button("Replace Only One Speaker's Voice"):
41
+ ai_voice_path = audio_path.replace(".wav", "_ai_voice.wav")
42
+ tts = gTTS(text=custom_text)
43
+ tts.save(ai_voice_path)
44
+ st.audio(ai_voice_path)
45
+
46
+ original_audio, sr = torchaudio.load(audio_path)
47
+ ai_audio, _ = torchaudio.load(ai_voice_path)
48
+
49
+ if ai_audio.shape[1] < original_audio.shape[1]:
50
+ diff = original_audio.shape[1] - ai_audio.shape[1]
51
+ ai_audio = torchaudio.functional.pad(ai_audio, (0, diff))
52
+ else:
53
+ ai_audio = ai_audio[:, :original_audio.shape[1]]
54
+
55
+ mixed_audio = (original_audio * 0.4) + (ai_audio * 0.6)
56
+ mixed_path = audio_path.replace(".wav", "_mixed.wav")
57
+ torchaudio.save(mixed_path, mixed_audio, sr)
58
+
59
+ final_video = video.set_audio(AudioFileClip(mixed_path))
60
+ final_path = tmp_video_path.replace(".mp4", "_final_v2.mp4")
61
+ final_video.write_videofile(final_path, codec="libx264", audio_codec="aac")
62
+
63
+ with open(final_path, "rb") as f:
64
+ st.download_button(label="📥 Download Final Video with Mixed Voiceover", data=f, file_name="final_ai_video_v2.mp4")
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ streamlit
2
+ gTTS
3
+ openai-whisper
4
+ moviepy
5
+ ffmpeg-python
6
+ imageio-ffmpeg
7
+ torchaudio
8
+ torch