Lizk75's picture
[fix] Dark/light mode compatibility for Gradio UI
c9df750
import os
import gradio as gr
import threading
from src.synth_data_gen import SynthDataGen
generator = SynthDataGen()
# Update the output format choices based on the selected dataset type
def update_output_format(dataset_type):
if dataset_type in ["Tabular", "Time-series"]:
return gr.update(choices=["JSON", "csv", "Parquet"], value="JSON")
elif dataset_type == "Text":
return gr.update(choices=["JSON", "Markdown"], value="JSON")
def update_pipeline(business_problem, dataset_type, output_format, num_samples, model):
# Check if business problem is empty
if not business_problem.strip():
yield [gr.update(visible=False), gr.update(visible=True), "❌ Please enter a business problem before generating."]
return
# Initial feedback while generating
yield [gr.update(visible=False), gr.update(visible=False), "⏳ Generating dataset..."]
try:
# Pack inputs into a dictionary for the generator
input_data = {
"business_problem": business_problem,
"dataset_type": dataset_type,
"output_format": output_format,
"num_samples": num_samples,
"model": model
}
# Generate dataset file
file_path = generator.generate_dataset(**input_data)
print("🧪 File result returned:", file_path)
# Check if file exists and return success message + file path
if isinstance(file_path, str) and os.path.exists(file_path):
threading.Timer(60, os.remove, args=[file_path]).start() # Auto-delete after 60s
yield [gr.update(value=file_path, visible=True), gr.update(visible=True), "✅ Dataset ready for download."]
else:
# Handle invalid or missing file
yield [gr.update(visible=False), gr.update(visible=True), "❌ Error: File not created or path invalid."]
except Exception as e:
# Catch and display any errors in the pipeline
yield [gr.update(visible=False), gr.update(visible=True), f"❌ Pipeline error: {e}"]
def build_ui(css_path="assets/styles.css"):
with open(css_path, "r") as f:
css = f.read()
with gr.Blocks(css=css, title="🧬SynthDataGen") as ui:
with gr.Column(elem_id="app-container"):
gr.Markdown("<h1 id='app-title'>SynthDataGen 🧬 </h1>")
gr.Markdown("<h2 id='app-subtitle'>AI-Powered Synthetic Dataset Generator</h2>")
gr.HTML("""
<div id="intro-text">
<p>With SynthDataGen, easily generate <strong>diverse datasets in different formats</strong> for testing, development, and AI training.</p>
<h4>🎯 How It Works:</h4>
<ol>
<li>1️⃣ Define your business problem or dataset topic.</li>
<li>2️⃣ Select the dataset type, output format, model, and number of samples.</li>
<li>3️⃣ Receive your synthetic dataset — ready to download and use!</li>
</ol>
</div>
""")
gr.HTML("""
<div id="learn-more-button">
<a href="https://github.com/lisek75/synthdatagen_app/blob/main/README.md" class="button-link" target="_blank">Learn More</a>
</div>
""")
gr.Markdown("""
<p><strong>🧠 Need inspiration?</strong> Try one of these examples:</p>
<ul>
<li>Movie summaries for genre classification.</li>
<li>Generate customer chats with realistic dialogue, chat_id, timestamp, names, sentiment label, and aligned transcript.</li>
<li>Create daily stock prices for 2 companies with typical fields like date, ticker, open, close, high, low, and volume.</li>
</ul>
""")
gr.Markdown("<p><strong>Start generating your synthetic datasets now!</strong> 🗂️✨</p>")
with gr.Group(elem_id="input-container"):
business_problem = gr.Textbox(
placeholder="Describe the dataset you want (e.g., Job postings, Customer reviews, Sensor data, Movie titles)",
lines=2,
label="📌 Business Problem",
elem_classes=["label-box"],
elem_id="business-problem-box"
)
with gr.Row(elem_classes="column-gap"):
with gr.Column(scale=1):
dataset_type = gr.Dropdown(
["Tabular", "Time-series", "Text"],
value="Tabular",
label="📊 Dataset Type",
elem_classes=["label-box"],
elem_id="custom-dropdown"
)
with gr.Column(scale=1):
output_format = gr.Dropdown(
choices=["JSON", "csv", "Parquet"],
value="JSON",
label="📁 Output Format",
elem_classes=["label-box"],
elem_id="custom-dropdown"
)
# Bind the update function to the dataset type dropdown
dataset_type.change(
update_output_format,
inputs=[dataset_type],
outputs=[output_format]
)
with gr.Row(elem_classes="row-spacer column-gap"):
with gr.Column(scale=1):
model = gr.Dropdown(
["GPT", "Claude"],
value="GPT",
label="🤖 Model",
elem_classes=["label-box"],
elem_id="custom-dropdown"
)
with gr.Column(scale=1):
num_samples = gr.Slider(
minimum=10,
maximum=1000,
value=10,
step=1,
interactive=True,
label="🔢 Number of Samples",
elem_classes=["label-box"]
)
# Hidden file component for dataset download
file_download = gr.File(visible=False, elem_id="download-box", label=None)
# Component to display status messages
status_message = gr.Markdown("", label="Status")
# Button to trigger dataset generation
run_btn = gr.Button("Create a dataset", elem_id="run-btn")
run_btn.click(
update_pipeline,
inputs=[business_problem, dataset_type, output_format, num_samples, model],
outputs=[file_download, run_btn, status_message]
)
return ui, generator.output_dir