File size: 1,737 Bytes
077f60f c3a28c2 077f60f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import streamlit as st
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import pipeline
import torch
import torch.nn.functional as F
@st.cache_resource
def load_model():
absa_tokenizer = AutoTokenizer.from_pretrained("yangheng/deberta-v3-base-absa-v1.1", use_fast=False)
absa_model = AutoModelForSequenceClassification.from_pretrained("yangheng/deberta-v3-base-absa-v1.1")
token_classifier = pipeline(
model="thainq107/abte-restaurants-distilbert-base-uncased",
aggregation_strategy="simple"
)
return absa_model, absa_tokenizer, token_classifier
absa_model, absa_tokenizer, token_classifier = load_model()
def inference(review):
aspects = token_classifier(review)
aspects = [aspect['word'] for aspect in aspects]
results = {}
for aspect in aspects:
# Check if the aspect is mentioned in the review
if aspect.lower() in review.lower():
inputs = absa_tokenizer(f"[CLS] {review} [SEP] {aspect} [SEP]", return_tensors="pt")
outputs = absa_model(**inputs)
probs = F.softmax(outputs.logits, dim=1)
probs = probs.detach().numpy()[0]
# Extract the label with the highest probability
max_label, max_prob = max(zip(["negative", "neutral", "positive"], probs), key=lambda x: x[1])
results[aspect] = max_label
return results
st.title("ABSA - Aspect-Based Sentiment Analysis")
text = st.text_area("Nhập câu cần phân tích:", "The battery life is great, but the screen is dim.")
if st.button("Phân tích cảm xúc"):
results = inference(text)
for aspect, label in results.items():
st.markdown(f"{aspect} ➝ {label}")
|