NewsFactChecker / app.py
rahideer's picture
Update app.py
0329a7f verified
import streamlit as st
import pandas as pd
import torch
import faiss
from sentence_transformers import SentenceTransformer
from transformers import pipeline
# Load models for embeddings and generation
embedder = SentenceTransformer('all-MiniLM-L6-v2')
generator = pipeline("text2text-generation", model="facebook/bart-large")
# Load and combine train + test datasets
@st.cache_data
def load_data():
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")
df = pd.concat([train_df, test_df], ignore_index=True)
df["text"] = df["title"].fillna('') + ". " + df["description"].fillna('')
return df
# Load the data
df = load_data()
corpus = df["text"].tolist()
corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True)
# Build FAISS index for similarity search
index = faiss.IndexFlatL2(corpus_embeddings.shape[1])
index.add(corpus_embeddings.cpu().detach().numpy())
# App UI
st.title("🧠 Climate News Fact Checker")
st.markdown("Enter a **claim** to check if it's supported or refuted by recent climate-related news.")
# User input
user_input = st.text_input("πŸ”Ž Enter a claim or statement:")
if user_input:
# Embed the input claim
query_embedding = embedder.encode([user_input])
# Retrieve top-k similar news snippets
top_k = 3
D, I = index.search(query_embedding, top_k)
results = [corpus[i] for i in I[0]]
# Show retrieved snippets
st.subheader("πŸ“„ Retrieved News Snippets")
for idx, res in enumerate(results):
st.write(f"**Snippet {idx+1}:** {res}")
# Generate a response based on context
context = " ".join(results)
prompt = f"Claim: {user_input}\nContext: {context}\nAnswer:"
st.subheader("βœ… Fact Check Result")
response = generator(prompt, max_length=100, do_sample=False)[0]['generated_text']
st.write(response)