Spaces:
Sleeping
Sleeping
File size: 1,455 Bytes
767fba0 e20d86e 925ba7d 767fba0 e20d86e 925ba7d 501033d 925ba7d ef46523 767fba0 501033d 925ba7d 767fba0 501033d 925ba7d 767fba0 ef46523 767fba0 925ba7d 6c8c083 925ba7d 501033d 6c8c083 925ba7d 767fba0 501033d 925ba7d e20d86e ef46523 62ffb32 767fba0 62ffb32 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 |
# app.py
import torch
import gradio as gr
import threading
from transformers import (
AutoModelForCausalLM,
AutoTokenizer,
TrainingArguments,
Trainer,
DataCollatorForLanguageModeling
)
from datasets import load_dataset
import logging
import sys
from urllib.parse import urlparse
# Configure logging
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
def parse_hf_dataset_url(url: str):
# ... (keep previous URL parsing logic) ...
def train(dataset_url: str):
try:
# ... (keep previous training logic) ...
except Exception as e:
logging.error(f"Critical error: {str(e)}")
return f"β Critical error: {str(e)}"
# Gradio interface
with gr.Blocks(title="Phi-2 Training") as demo:
gr.Markdown("# π Train Phi-2 with HF Hub Data")
with gr.Row():
dataset_url = gr.Textbox(
label="Dataset URL",
value="https://huggingface.co./datasets/mozilla-foundation/common_voice_11_0"
)
start_btn = gr.Button("Start Training", variant="primary")
status_output = gr.Textbox(label="Status", interactive=False)
start_btn.click(
fn=lambda url: threading.Thread(target=train, args=(url,)).start(),
inputs=[dataset_url],
outputs=status_output
)
if __name__ == "__main__":
demo.launch(
server_name="0.0.0.0",
server_port=7860,
enable_queue=True,
share=False
) |