Spaces:

IronWolfAI
/

P4-CySec

Sleeping

App Files Files Community

IronWolfAI commited on 13 days ago

Commit

b4f5699

verified ·

1 Parent(s): c59ec53

Update app.py

Browse files

Files changed (1) hide show

app.py +82 -32

app.py CHANGED Viewed

@@ -1,45 +1,95 @@
 import os
 from datasets import load_dataset
 import torch
-from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments
 from trl import SFTTrainer
-# Load the model and tokenizer
-model_name = "microsoft/phi-4-multimodal-instruct"
-model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-# Load the dataset
-dataset = load_dataset("openai/gsm8k", "main")["train"]
-# Preprocess the dataset
-def preprocess_function(examples):
-    return tokenizer(examples["question"], padding="max_length", truncation=True)
-dataset = dataset.map(preprocess_function, batched=True)
-# Define the training arguments
-training_args = TrainingArguments(
-    output_dir="./results",
-    per_device_train_batch_size=4,
-    gradient_accumulation_steps=4,
-    learning_rate=2e-5,
-    num_train_epochs=1,
-    fp16=True,
-    logging_dir="./logs",
-    report_to="none",
 )
-# Create the SFT trainer
 trainer = SFTTrainer(
     model=model,
-    train_dataset=dataset,
-    args=training_args,
     tokenizer=tokenizer,
 )
-# Train the model
-trainer.train()
-# Save the model
-trainer.save_model("./results")

 import os
+import logging
 from datasets import load_dataset
+from peft import LoraConfig
 import torch
+import transformers
 from trl import SFTTrainer
+# Hyper-parameters and configurations
+training_config = {
+    "output_dir": "./results",
+    "per_device_train_batch_size": 4,
+    "gradient_accumulation_steps": 4,
+    "learning_rate": 2e-5,
+    "num_train_epochs": 1,
+    "fp16": True,
+    "logging_dir": "./logs",
+    "report_to": "none",
+}
+peft_config = {
+    "r": 16,  # LoRA rank
+    "lora_alpha": 64,  # LoRA alpha
+    "target_modules": ["q_proj", "k_proj", "v_proj", "o_proj"],  # Target modules for LoRA
+    "bias": "none",
+    "task_type": "CAUSAL_LM",
+}
+train_conf = training_config  # Rename to match the original script's variable name
+peft_conf = LoraConfig(**peft_config)
+# Setup logging
+logging.basicConfig(
+    format="%(asctime)s - %(levelname)s - %(name)s - %(message)s",
+    datefmt="%Y-%m-%d %H:%M:%S",
+    handlers=[logging.StreamHandler()],
+)
+log_level = logging.INFO  # Set log level, you can adjust this based on your preference
+logger = logging.getLogger(__name__)
+logger.setLevel(log_level)
+# Model Loading and Tokenizer Configuration
+checkpoint_path = "microsoft/Phi-4-mini-instruct"
+model_kwargs = dict(
+    use_cache=False,
+    trust_remote_code=True,
+    attn_implementation="flash_attention_2",
+    torch_dtype=torch.bfloat16,
+    device_map=None,
 )
+model = transformers.AutoModelForCausalLM.from_pretrained(checkpoint_path, **model_kwargs)
+tokenizer = transformers.AutoTokenizer.from_pretrained(checkpoint_path)
+# Data Processing
+def apply_chat_template(example):
+    messages = example["messages"]
+    # Assuming a function that converts chat messages into text for the model
+    example["text"] = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)
+    return example
+train_dataset, test_dataset = load_dataset("HuggingFaceH4/ultrachat_200k", split=["train_sft", "test_sft"])
+column_names = list(train_dataset.features)
+processed_train_dataset = train_dataset.map(
+    apply_chat_template,
+    num_proc=10,
+    remove_columns=column_names,
+)
+# Training
 trainer = SFTTrainer(
     model=model,
+    args=train_conf,
+    peft_config=peft_conf,
+    train_dataset=processed_train_dataset,
+    eval_dataset=test_dataset,  # Assuming you want to evaluate on the test set after training
+    max_seq_length=2048,
+    dataset_text_field="text",
     tokenizer=tokenizer,
+    packing=True,
 )
+train_result = trainer.train()
+metrics = train_result.metrics
+trainer.log_metrics("train", metrics)
+trainer.save_metrics("train", metrics)
+trainer.save_state()
+# Evaluation (assuming evaluation after training, otherwise comment out)
+metrics = trainer.evaluate()
+metrics["eval_samples"] = len(test_dataset)
+trainer.log_metrics("eval", metrics)
+trainer.save_metrics("eval", metrics)
+# Save model
+trainer.save_model(train_conf["output_dir"])