Spaces:
Paused
Paused
import os | |
import torch | |
from transformers import Trainer, TrainingArguments | |
from datasets import load_dataset | |
import subprocess | |
# Install required packages | |
subprocess.run("pip install git+https://github.com/canopyai/Orpheus-TTS.git", shell=True) | |
subprocess.run("pip install orpheus-speech vllm==0.7.3", shell=True) | |
# Load the dataset | |
dataset = load_dataset("Emotional_Speech_Dataset_(ESD)") | |
# Get the model | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
model = AutoModelForCausalLM.from_pretrained("canopylabs/orpheus-3b-0.1-pretrained") | |
tokenizer = AutoTokenizer.from_pretrained("canopylabs/orpheus-3b-0.1-pretrained") | |
# Setup training arguments | |
training_args = TrainingArguments( | |
output_dir="./orpheus-finetuned", | |
per_device_train_batch_size=2, | |
gradient_accumulation_steps=4, | |
learning_rate=5e-5, | |
num_train_epochs=3, | |
save_strategy="steps", | |
save_steps=500, | |
) | |
# Start training | |
trainer = Trainer( | |
model=model, | |
args=training_args, | |
train_dataset=dataset, | |
tokenizer=tokenizer, | |
) | |
trainer.train() | |
# Save the model | |
model.save_pretrained("./orpheus-finetuned-model") | |
tokenizer.save_pretrained("./orpheus-finetuned-model") |