import streamlit as st import pandas as pd import torch import faiss from sentence_transformers import SentenceTransformer from transformers import pipeline # Load models for embeddings and generation embedder = SentenceTransformer('all-MiniLM-L6-v2') generator = pipeline("text2text-generation", model="facebook/bart-large") # Load and combine train + test datasets @st.cache_data def load_data(): train_df = pd.read_csv("train.csv") test_df = pd.read_csv("test.csv") df = pd.concat([train_df, test_df], ignore_index=True) df["text"] = df["title"].fillna('') + ". " + df["description"].fillna('') return df # Load the data df = load_data() corpus = df["text"].tolist() corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True) # Build FAISS index for similarity search index = faiss.IndexFlatL2(corpus_embeddings.shape[1]) index.add(corpus_embeddings.cpu().detach().numpy()) # App UI st.title("🧠 Climate News Fact Checker") st.markdown("Enter a **claim** to check if it's supported or refuted by recent climate-related news.") # User input user_input = st.text_input("🔎 Enter a claim or statement:") if user_input: # Embed the input claim query_embedding = embedder.encode([user_input]) # Retrieve top-k similar news snippets top_k = 3 D, I = index.search(query_embedding, top_k) results = [corpus[i] for i in I[0]] # Show retrieved snippets st.subheader("📄 Retrieved News Snippets") for idx, res in enumerate(results): st.write(f"**Snippet {idx+1}:** {res}") # Generate a response based on context context = " ".join(results) prompt = f"Claim: {user_input}\nContext: {context}\nAnswer:" st.subheader("✅ Fact Check Result") response = generator(prompt, max_length=100, do_sample=False)[0]['generated_text'] st.write(response)