Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -5,12 +5,12 @@ from datasets import load_dataset
|
|
5 |
# 1️⃣ Modell & Tokenizer laden
|
6 |
model_name = "allenai/scibert_scivocab_uncased"
|
7 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
8 |
-
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
|
9 |
|
10 |
-
# 2️⃣ Dataset laden (armanc/scientific_papers)
|
11 |
-
dataset = load_dataset("armanc/scientific_papers")
|
12 |
|
13 |
-
# 3️⃣ Tokenisierung der Texte
|
14 |
def tokenize_function(examples):
|
15 |
return tokenizer(examples["text"], padding="max_length", truncation=True)
|
16 |
|
|
|
5 |
# 1️⃣ Modell & Tokenizer laden
|
6 |
model_name = "allenai/scibert_scivocab_uncased"
|
7 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
8 |
+
model = AutoModelForSequenceClassification.from_pretrained(model_name, num_labels=3)
|
9 |
|
10 |
+
# 2️⃣ Dataset laden (armanc/scientific_papers) mit trust_remote_code=True
|
11 |
+
dataset = load_dataset("armanc/scientific_papers", trust_remote_code=True)
|
12 |
|
13 |
+
# 3️⃣ Tokenisierung der Texte (hier wird die Spalte "text" genutzt; ggf. anpassen, falls andere Spalten vorhanden sind)
|
14 |
def tokenize_function(examples):
|
15 |
return tokenizer(examples["text"], padding="max_length", truncation=True)
|
16 |
|