|
from transformers import pipeline, AutoTokenizer, AutoModelForSequenceClassification |
|
|
|
import torch |
|
import torch.nn.functional as F |
|
|
|
model_name = "distilbert-base-uncased-finetuned-sst-2-english" |
|
|
|
model = AutoModelForSequenceClassification.from_pretrained(model_name) |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
|
|
classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer) |
|
print(classifier.__class__) |
|
res = classifier(["We are very happy to show you the Transformers library.", |
|
"Hope you don't hate it"]) |
|
|
|
for result in res: |
|
print(result) |
|
|
|
|
|
tokens = tokenizer.tokenize("I am very happy now.") |
|
|
|
|
|
token_ids = tokenizer.convert_tokens_to_ids(tokens) |
|
|
|
|
|
input_ids = tokenizer("I am very happy now.") |
|
|
|
print(f'Tokens:{tokens}') |
|
print(f'TokenIDs:{token_ids}') |
|
print(f'InputIDs:{input_ids}') |
|
|
|
X_train = ["We are very happy to show you the Transformers library.", |
|
"Hope you don't hate it"] |
|
|
|
batch = tokenizer(X_train, padding=True, truncation=True, max_length=512, return_tensors="pt") |
|
|
|
|
|
|
|
|
|
with torch.no_grad(): |
|
outputs = model(**batch) |
|
outputs = model(**batch, labels=torch.tensor([1, 0])) |
|
print("Outputs: ", outputs) |
|
predictions = F.softmax(outputs.logits, dim=1) |
|
print("Predictions: ", predictions) |
|
label_ids = torch.argmax(predictions, dim=1) |
|
print("Raw Labels: ", label_ids) |
|
labels = [model.config.id2label[label_id] for label_id in label_ids.tolist()] |
|
print("Labels: ", labels) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|