|
import pandas as pd |
|
import gradio as gr |
|
from sentence_transformers import SentenceTransformer |
|
import faiss |
|
import numpy as np |
|
|
|
|
|
csv_path = "train.csv" |
|
df = pd.read_csv(csv_path, header=None, names=["label", "title", "description"]) |
|
|
|
|
|
df["content"] = df["title"].fillna("") + ". " + df["description"].fillna("") |
|
|
|
|
|
model = SentenceTransformer('all-MiniLM-L6-v2') |
|
corpus_embeddings = model.encode(df["content"].tolist(), show_progress_bar=True) |
|
|
|
|
|
embedding_dim = corpus_embeddings.shape[1] |
|
index = faiss.IndexFlatL2(embedding_dim) |
|
index.add(corpus_embeddings) |
|
|
|
|
|
def retrieve_and_respond(claim, k=5): |
|
query_embedding = model.encode([claim]) |
|
D, I = index.search(np.array(query_embedding), k) |
|
|
|
results = [] |
|
for idx in I[0]: |
|
row = df.iloc[idx] |
|
results.append(f"*Title:* {row['title']}\n*Description:* {row['description']}\n*Label:* {row['label']}\n") |
|
|
|
return "\n\n".join(results) |
|
|
|
|
|
iface = gr.Interface( |
|
fn=retrieve_and_respond, |
|
inputs=gr.Textbox(lines=2, placeholder="Enter a news-related claim here..."), |
|
outputs="markdown", |
|
title="Claim Verifier using AG News", |
|
description="Enter a claim and get the most relevant AG News articles to help verify or refute it." |
|
) |
|
|
|
iface.launch() |