flytoe commited on
Commit
849f804
·
verified ·
1 Parent(s): 5665c6b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -4
app.py CHANGED
@@ -5,12 +5,12 @@ from datasets import load_dataset
5
  # 1️⃣ Modell & Tokenizer laden
6
  model_name = "allenai/scibert_scivocab_uncased"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
- model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3) # z.B. für 3 Kategorien
9
 
10
- # 2️⃣ Dataset laden (armanc/scientific_papers)
11
- dataset = load_dataset("armanc/scientific_papers")
12
 
13
- # 3️⃣ Tokenisierung der Texte
14
  def tokenize_function(examples):
15
  return tokenizer(examples["text"], padding="max_length", truncation=True)
16
 
 
5
  # 1️⃣ Modell & Tokenizer laden
6
  model_name = "allenai/scibert_scivocab_uncased"
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
+ model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
9
 
10
+ # 2️⃣ Dataset laden (armanc/scientific_papers) mit trust_remote_code=True
11
+ dataset = load_dataset("armanc/scientific_papers", trust_remote_code=True)
12
 
13
+ # 3️⃣ Tokenisierung der Texte (hier wird die Spalte "text" genutzt; ggf. anpassen, falls andere Spalten vorhanden sind)
14
  def tokenize_function(examples):
15
  return tokenizer(examples["text"], padding="max_length", truncation=True)
16