|
import os |
|
import gradio as gr |
|
import requests |
|
import io |
|
import re |
|
from PIL import Image |
|
from groq import Groq |
|
|
|
|
|
|
|
GROQ_API_KEY = os.getenv("GROQ_API_KEY") |
|
HF_API_KEY = os.getenv("HF_API_KEY") |
|
|
|
if not GROQ_API_KEY or not HF_API_KEY: |
|
raise ValueError("GROQ_API_KEY and HF_TOKEN must be set in the environment variables.") |
|
|
|
client = Groq(api_key=GROQ_API_KEY) |
|
|
|
|
|
HF_IMAGE_MODEL = "stabilityai/stable-diffusion-2-1" |
|
|
|
|
|
def transcribe_audio(audio_path): |
|
if not audio_path: |
|
return "Error: Please upload an audio file." |
|
|
|
try: |
|
with open(audio_path, "rb") as file: |
|
transcription = client.audio.transcriptions.create( |
|
file=(os.path.basename(audio_path), file.read()), |
|
model="whisper-large-v3", |
|
language="ta", |
|
response_format="verbose_json", |
|
) |
|
return transcription.text.strip() |
|
|
|
except Exception as e: |
|
return f"Error in transcription: {str(e)}" |
|
|
|
|
|
def translate_tamil_to_english(tamil_text): |
|
if not tamil_text: |
|
return "Error: Please enter Tamil text for translation." |
|
|
|
prompt = f"Translate this Tamil text to English: {tamil_text}\nGive only the translated text as output." |
|
|
|
try: |
|
response = client.chat.completions.create( |
|
model="llama3-8b-8192", |
|
messages=[{"role": "user", "content": prompt}], |
|
) |
|
translated_text = response.choices[0].message.content.strip() |
|
|
|
|
|
translated_text = re.sub(r"</?think>", "", translated_text).strip() |
|
return translated_text |
|
|
|
except Exception as e: |
|
return f"Error in translation: {str(e)}" |
|
|
|
|
|
def generate_image(english_text): |
|
if not english_text: |
|
return "Error: Please enter a description for image generation." |
|
|
|
try: |
|
headers = {"Authorization": f"Bearer {HF_API_KEY}"} |
|
payload = {"inputs": english_text} |
|
|
|
response = requests.post(f"https://api-inference.huggingface.co/models/{HF_IMAGE_MODEL}", |
|
headers=headers, json=payload) |
|
response.raise_for_status() |
|
image_bytes = response.content |
|
|
|
|
|
if not image_bytes: |
|
return "Error: Received empty response from API." |
|
|
|
return Image.open(io.BytesIO(image_bytes)) |
|
|
|
except Exception as e: |
|
return f"Error in image generation: {str(e)}" |
|
|
|
|
|
|
|
def generate_text(english_text): |
|
if not english_text: |
|
return "Please enter a prompt." |
|
|
|
try: |
|
response = client.chat.completions.create( |
|
model="llama3-8b-8192", |
|
messages=[{"role": "user", "content": english_text}], |
|
) |
|
|
|
|
|
generated_text = response.choices[0].message.content.strip() |
|
|
|
|
|
cleaned_text = re.sub(r"</?think>", "", generated_text).strip() |
|
|
|
return cleaned_text |
|
|
|
except Exception as e: |
|
return f"Error in text generation: {str(e)}" |
|
|
|
|
|
def process_audio(audio_path): |
|
|
|
tamil_text = transcribe_audio(audio_path) |
|
if "Error" in tamil_text: |
|
return tamil_text, None, None, None |
|
|
|
|
|
english_text = translate_tamil_to_english(tamil_text) |
|
if "Error" in english_text: |
|
return tamil_text, english_text, None, None |
|
|
|
|
|
image = generate_image(english_text) |
|
if isinstance(image, str) and "Error" in image: |
|
return tamil_text, english_text, None, None |
|
|
|
|
|
generated_text = generate_text(english_text) |
|
return tamil_text, english_text, image, generated_text |
|
|
|
|
|
|
|
def clear_outputs(): |
|
return "", "", None, "" |
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
|
gr.Markdown("### π¨ **TransArt: Multimodal Tamil Audio Experience**") |
|
gr.Markdown("**Transform Tamil audio into captivating content** β from transcription and English translation to stunning AI-generated images and creative narratives! π") |
|
|
|
|
|
gr.Markdown("---") |
|
|
|
|
|
with gr.Row(): |
|
audio_input = gr.Audio(type="filepath", label="πΆ Upload Tamil Audio") |
|
with gr.Column(): |
|
submit_button = gr.Button("β¨ Submit") |
|
clear_button = gr.Button("π§Ή Clear") |
|
|
|
|
|
gr.Markdown("---") |
|
|
|
|
|
with gr.Row(): |
|
transcribed_text = gr.Textbox(label="π Transcribed Tamil Text") |
|
translated_text = gr.Textbox(label="π Translated English Text") |
|
|
|
|
|
gr.Markdown("---") |
|
|
|
|
|
with gr.Row(): |
|
generated_image = gr.Image(label="π¨ Generated AI Image") |
|
generated_text = gr.Textbox(label="π‘ Generated English Text") |
|
|
|
|
|
submit_button.click( |
|
fn=process_audio, |
|
inputs=audio_input, |
|
outputs=[transcribed_text, translated_text, generated_image, generated_text], |
|
) |
|
clear_button.click( |
|
fn=clear_outputs, |
|
inputs=[], |
|
outputs=[transcribed_text, translated_text, generated_image, generated_text], |
|
) |
|
|
|
demo.launch() |