Bils commited on
Commit
ecc69bf
Β·
verified Β·
1 Parent(s): a3b5047

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +71 -49
app.py CHANGED
@@ -9,18 +9,20 @@ from transformers import (
9
  MusicgenForConditionalGeneration,
10
  )
11
  from scipy.io.wavfile import write
 
12
  import tempfile
13
  from dotenv import load_dotenv
14
  import spaces
15
 
 
16
  load_dotenv()
17
  hf_token = os.getenv("HF_TOKEN")
18
 
19
  # ---------------------------------------------------------------------
20
  # Load Llama 3 Pipeline with Zero GPU (Encapsulated)
21
  # ---------------------------------------------------------------------
22
- @spaces.GPU(duration=300) # GPU allocation for 300 seconds
23
- def generate_script(user_prompt: str, model_id: str, token: str):
24
  try:
25
  tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=token)
26
  model = AutoModelForCausalLM.from_pretrained(
@@ -34,7 +36,7 @@ def generate_script(user_prompt: str, model_id: str, token: str):
34
 
35
  system_prompt = (
36
  "You are an expert radio imaging producer specializing in sound design and music. "
37
- "Take the user's concept and craft a concise, creative promo script with a strong focus on auditory elements and musical appeal."
38
  )
39
 
40
  combined_prompt = f"{system_prompt}\nUser concept: {user_prompt}\nRefined script:"
@@ -43,7 +45,6 @@ def generate_script(user_prompt: str, model_id: str, token: str):
43
  except Exception as e:
44
  return f"Error generating script: {e}"
45
 
46
-
47
  # ---------------------------------------------------------------------
48
  # Load MusicGen Model (Encapsulated)
49
  # ---------------------------------------------------------------------
@@ -53,49 +54,60 @@ def generate_audio(prompt: str, audio_length: int):
53
  musicgen_model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
54
  musicgen_processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
55
 
56
- # Ensure everything is on the same device (GPU or CPU)
57
  device = "cuda" if torch.cuda.is_available() else "cpu"
58
  musicgen_model.to(device)
59
 
60
  inputs = musicgen_processor(text=[prompt], padding=True, return_tensors="pt").to(device)
61
  outputs = musicgen_model.generate(**inputs, max_new_tokens=audio_length)
62
 
63
- # Move outputs to CPU for further processing
64
  audio_data = outputs[0, 0].cpu().numpy()
65
-
66
- # Normalize and save the audio file
67
  normalized_audio = (audio_data / max(abs(audio_data)) * 32767).astype("int16")
 
68
  output_path = f"{tempfile.gettempdir()}/generated_audio.wav"
69
  write(output_path, musicgen_model.config.audio_encoder.sampling_rate, normalized_audio)
70
-
71
  return output_path
72
  except Exception as e:
73
  return f"Error generating audio: {e}"
74
 
75
-
76
  # ---------------------------------------------------------------------
77
- # Gradio Interface Functions
78
  # ---------------------------------------------------------------------
79
- def interface_generate_script(user_prompt, llama_model_id):
80
- return generate_script(user_prompt, llama_model_id, hf_token)
 
 
 
 
 
 
 
 
 
 
 
 
81
 
 
 
 
 
 
82
 
83
  def interface_generate_audio(script, audio_length):
84
  return generate_audio(script, audio_length)
85
 
 
 
86
 
87
  # ---------------------------------------------------------------------
88
  # Interface
89
  # ---------------------------------------------------------------------
90
  with gr.Blocks() as demo:
91
- # Header
92
- gr.Markdown(
93
- """
94
- # 🎧 AI-Powered Radio Imaging Studio πŸš€
95
- ### Create stunning **radio promos** with **Llama 3** and **MusicGen**
96
- πŸ”₯ **Zero GPU** integration for efficiency and ease!
97
- """
98
- )
99
 
100
  # Script Generation Section
101
  gr.Markdown("## ✍️ Step 1: Generate Your Promo Script")
@@ -103,62 +115,72 @@ with gr.Blocks() as demo:
103
  user_prompt = gr.Textbox(
104
  label="🎀 Enter Promo Idea",
105
  placeholder="E.g., A 15-second energetic jingle for a morning talk show.",
106
- lines=2,
107
- info="Describe your promo idea clearly to generate a creative script."
 
 
 
 
 
108
  )
109
  llama_model_id = gr.Textbox(
110
- label="🎿 Llama 3 Model ID",
111
- value="meta-llama/Meta-Llama-3-8B-Instruct",
112
- info="Enter the Hugging Face model ID for Llama 3."
113
  )
114
  generate_script_button = gr.Button("Generate Script ✨")
115
- script_output = gr.Textbox(
116
- label="πŸ–ŒοΈ Generated Promo Script",
117
- lines=4,
118
- interactive=False,
119
- info="Your generated promo script will appear here."
120
- )
121
 
122
  # Audio Generation Section
123
- gr.Markdown("## 🎡 Step 2: Generate Audio from Your Script")
124
  with gr.Row():
125
  audio_length = gr.Slider(
126
  label="🎢 Audio Length (tokens)",
127
  minimum=128,
128
  maximum=1024,
129
  step=64,
130
- value=512,
131
- info="Select the desired audio token length."
132
  )
133
  generate_audio_button = gr.Button("Generate Audio 🎢")
134
- audio_output = gr.Audio(
135
- label="🎡 Generated Audio File",
136
- type="filepath",
137
- interactive=False
138
- )
 
 
 
 
 
 
 
 
 
 
 
139
 
140
  # Footer
141
- gr.Markdown(
142
- """
143
  <br><hr>
144
  <p style="text-align: center; font-size: 0.9em;">
145
  Created with ❀️ by <a href="https://bilsimaging.com" target="_blank">bilsimaging.com</a>
146
  </p>
147
- """,
148
- elem_id="footer"
149
- )
150
 
151
  # Button Actions
152
  generate_script_button.click(
153
  fn=interface_generate_script,
154
- inputs=[user_prompt, llama_model_id],
155
- outputs=script_output,
156
  )
157
-
158
  generate_audio_button.click(
159
  fn=interface_generate_audio,
160
  inputs=[script_output, audio_length],
161
- outputs=audio_output,
 
 
 
 
 
162
  )
163
 
164
  # ---------------------------------------------------------------------
 
9
  MusicgenForConditionalGeneration,
10
  )
11
  from scipy.io.wavfile import write
12
+ from TTS.api import TTS
13
  import tempfile
14
  from dotenv import load_dotenv
15
  import spaces
16
 
17
+ # Load environment variables
18
  load_dotenv()
19
  hf_token = os.getenv("HF_TOKEN")
20
 
21
  # ---------------------------------------------------------------------
22
  # Load Llama 3 Pipeline with Zero GPU (Encapsulated)
23
  # ---------------------------------------------------------------------
24
+ @spaces.GPU(duration=300)
25
+ def generate_script(user_prompt: str, duration: int, model_id: str, token: str):
26
  try:
27
  tokenizer = AutoTokenizer.from_pretrained(model_id, use_auth_token=token)
28
  model = AutoModelForCausalLM.from_pretrained(
 
36
 
37
  system_prompt = (
38
  "You are an expert radio imaging producer specializing in sound design and music. "
39
+ f"Generate a concise, creative promo script for a {duration}-second ad, focusing on auditory elements and musical appeal."
40
  )
41
 
42
  combined_prompt = f"{system_prompt}\nUser concept: {user_prompt}\nRefined script:"
 
45
  except Exception as e:
46
  return f"Error generating script: {e}"
47
 
 
48
  # ---------------------------------------------------------------------
49
  # Load MusicGen Model (Encapsulated)
50
  # ---------------------------------------------------------------------
 
54
  musicgen_model = MusicgenForConditionalGeneration.from_pretrained("facebook/musicgen-small")
55
  musicgen_processor = AutoProcessor.from_pretrained("facebook/musicgen-small")
56
 
 
57
  device = "cuda" if torch.cuda.is_available() else "cpu"
58
  musicgen_model.to(device)
59
 
60
  inputs = musicgen_processor(text=[prompt], padding=True, return_tensors="pt").to(device)
61
  outputs = musicgen_model.generate(**inputs, max_new_tokens=audio_length)
62
 
 
63
  audio_data = outputs[0, 0].cpu().numpy()
 
 
64
  normalized_audio = (audio_data / max(abs(audio_data)) * 32767).astype("int16")
65
+
66
  output_path = f"{tempfile.gettempdir()}/generated_audio.wav"
67
  write(output_path, musicgen_model.config.audio_encoder.sampling_rate, normalized_audio)
 
68
  return output_path
69
  except Exception as e:
70
  return f"Error generating audio: {e}"
71
 
 
72
  # ---------------------------------------------------------------------
73
+ # Generate Voice-Over with Coqui XTTS-v2
74
  # ---------------------------------------------------------------------
75
+ @spaces.GPU(duration=300)
76
+ def generate_voice(script: str, reference_audio: str, language: str):
77
+ try:
78
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=torch.cuda.is_available())
79
+ output_path = f"{tempfile.gettempdir()}/voice_over.wav"
80
+ tts.tts_to_file(
81
+ text=script,
82
+ file_path=output_path,
83
+ speaker_wav=reference_audio,
84
+ language=language,
85
+ )
86
+ return output_path
87
+ except Exception as e:
88
+ return f"Error generating voice-over: {e}"
89
 
90
+ # ---------------------------------------------------------------------
91
+ # Interface Functions
92
+ # ---------------------------------------------------------------------
93
+ def interface_generate_script(user_prompt, duration, llama_model_id):
94
+ return generate_script(user_prompt, duration, llama_model_id, hf_token)
95
 
96
  def interface_generate_audio(script, audio_length):
97
  return generate_audio(script, audio_length)
98
 
99
+ def interface_generate_voice(script, reference_audio, language):
100
+ return generate_voice(script, reference_audio, language)
101
 
102
  # ---------------------------------------------------------------------
103
  # Interface
104
  # ---------------------------------------------------------------------
105
  with gr.Blocks() as demo:
106
+ gr.Markdown("""
107
+ # 🎧 All-in-One Radio Promo Studio πŸš€
108
+ ### Create professional scripts, soundscapes, and voice-overs in minutes!
109
+ πŸ”₯ Powered by **Llama 3**, **MusicGen**, and **XTTS-v2**
110
+ """)
 
 
 
111
 
112
  # Script Generation Section
113
  gr.Markdown("## ✍️ Step 1: Generate Your Promo Script")
 
115
  user_prompt = gr.Textbox(
116
  label="🎀 Enter Promo Idea",
117
  placeholder="E.g., A 15-second energetic jingle for a morning talk show.",
118
+ lines=2
119
+ )
120
+ duration = gr.Dropdown(
121
+ label="⏳ Duration",
122
+ choices=["15", "30", "60"],
123
+ value="15",
124
+ info="Choose the duration of the promo (in seconds)."
125
  )
126
  llama_model_id = gr.Textbox(
127
+ label="πŸŽ›οΈ Llama 3 Model ID",
128
+ value="meta-llama/Meta-Llama-3-8B-Instruct"
 
129
  )
130
  generate_script_button = gr.Button("Generate Script ✨")
131
+ script_output = gr.Textbox(label="πŸ–ŒοΈ Generated Promo Script", lines=4, interactive=False)
 
 
 
 
 
132
 
133
  # Audio Generation Section
134
+ gr.Markdown("## 🎡 Step 2: Generate Background Music")
135
  with gr.Row():
136
  audio_length = gr.Slider(
137
  label="🎢 Audio Length (tokens)",
138
  minimum=128,
139
  maximum=1024,
140
  step=64,
141
+ value=512
 
142
  )
143
  generate_audio_button = gr.Button("Generate Audio 🎢")
144
+ audio_output = gr.Audio(label="🎡 Generated Audio", type="filepath")
145
+
146
+ # Voice-Over Section
147
+ gr.Markdown("## πŸŽ™οΈ Step 3: Generate Voice-Over")
148
+ with gr.Row():
149
+ reference_audio = gr.Audio(
150
+ label="🎀 Upload Reference Voice (6 seconds)",
151
+ type="filepath"
152
+ )
153
+ language = gr.Dropdown(
154
+ label="🌍 Language",
155
+ choices=["en", "es", "fr", "de", "it"],
156
+ value="en"
157
+ )
158
+ generate_voice_button = gr.Button("Generate Voice-Over 🎀")
159
+ voice_output = gr.Audio(label="πŸ”Š Generated Voice-Over", type="filepath")
160
 
161
  # Footer
162
+ gr.Markdown("""
 
163
  <br><hr>
164
  <p style="text-align: center; font-size: 0.9em;">
165
  Created with ❀️ by <a href="https://bilsimaging.com" target="_blank">bilsimaging.com</a>
166
  </p>
167
+ """)
 
 
168
 
169
  # Button Actions
170
  generate_script_button.click(
171
  fn=interface_generate_script,
172
+ inputs=[user_prompt, duration, llama_model_id],
173
+ outputs=script_output
174
  )
 
175
  generate_audio_button.click(
176
  fn=interface_generate_audio,
177
  inputs=[script_output, audio_length],
178
+ outputs=audio_output
179
+ )
180
+ generate_voice_button.click(
181
+ fn=interface_generate_voice,
182
+ inputs=[script_output, reference_audio, language],
183
+ outputs=voice_output
184
  )
185
 
186
  # ---------------------------------------------------------------------