Spaces:
Sleeping
Sleeping
import streamlit as st | |
import pandas as pd | |
import torch | |
import faiss | |
from sentence_transformers import SentenceTransformer | |
from transformers import pipeline | |
# Load models for embeddings and generation | |
embedder = SentenceTransformer('all-MiniLM-L6-v2') | |
generator = pipeline("text2text-generation", model="facebook/bart-large") | |
# Load and combine train + test datasets | |
def load_data(): | |
train_df = pd.read_csv("train.csv") | |
test_df = pd.read_csv("test.csv") | |
df = pd.concat([train_df, test_df], ignore_index=True) | |
df["text"] = df["title"].fillna('') + ". " + df["description"].fillna('') | |
return df | |
# Load the data | |
df = load_data() | |
corpus = df["text"].tolist() | |
corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True) | |
# Build FAISS index for similarity search | |
index = faiss.IndexFlatL2(corpus_embeddings.shape[1]) | |
index.add(corpus_embeddings.cpu().detach().numpy()) | |
# App UI | |
st.title("π§ Climate News Fact Checker") | |
st.markdown("Enter a **claim** to check if it's supported or refuted by recent climate-related news.") | |
# User input | |
user_input = st.text_input("π Enter a claim or statement:") | |
if user_input: | |
# Embed the input claim | |
query_embedding = embedder.encode([user_input]) | |
# Retrieve top-k similar news snippets | |
top_k = 3 | |
D, I = index.search(query_embedding, top_k) | |
results = [corpus[i] for i in I[0]] | |
# Show retrieved snippets | |
st.subheader("π Retrieved News Snippets") | |
for idx, res in enumerate(results): | |
st.write(f"**Snippet {idx+1}:** {res}") | |
# Generate a response based on context | |
context = " ".join(results) | |
prompt = f"Claim: {user_input}\nContext: {context}\nAnswer:" | |
st.subheader("β Fact Check Result") | |
response = generator(prompt, max_length=100, do_sample=False)[0]['generated_text'] | |
st.write(response) | |