File size: 1,847 Bytes
b0efa4e
855a31e
ad4cc62
 
 
 
855a31e
0329a7f
ad4cc62
 
855a31e
0329a7f
ad4cc62
 
0329a7f
 
 
ad4cc62
 
855a31e
0329a7f
 
ad4cc62
 
855a31e
0329a7f
ad4cc62
 
855a31e
0329a7f
ad4cc62
0329a7f
855a31e
0329a7f
 
855a31e
ad4cc62
0329a7f
ad4cc62
0329a7f
 
ad4cc62
 
 
0329a7f
 
 
ad4cc62
 
0329a7f
 
ad4cc62
 
0329a7f
ad4cc62
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import streamlit as st
import pandas as pd
import torch
import faiss
from sentence_transformers import SentenceTransformer
from transformers import pipeline

# Load models for embeddings and generation
embedder = SentenceTransformer('all-MiniLM-L6-v2')
generator = pipeline("text2text-generation", model="facebook/bart-large")

# Load and combine train + test datasets
@st.cache_data
def load_data():
    train_df = pd.read_csv("train.csv")
    test_df = pd.read_csv("test.csv")
    df = pd.concat([train_df, test_df], ignore_index=True)
    df["text"] = df["title"].fillna('') + ". " + df["description"].fillna('')
    return df

# Load the data
df = load_data()
corpus = df["text"].tolist()
corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True)

# Build FAISS index for similarity search
index = faiss.IndexFlatL2(corpus_embeddings.shape[1])
index.add(corpus_embeddings.cpu().detach().numpy())

# App UI
st.title("🧠 Climate News Fact Checker")
st.markdown("Enter a **claim** to check if it's supported or refuted by recent climate-related news.")

# User input
user_input = st.text_input("πŸ”Ž Enter a claim or statement:")

if user_input:
    # Embed the input claim
    query_embedding = embedder.encode([user_input])

    # Retrieve top-k similar news snippets
    top_k = 3
    D, I = index.search(query_embedding, top_k)
    results = [corpus[i] for i in I[0]]

    # Show retrieved snippets
    st.subheader("πŸ“„ Retrieved News Snippets")
    for idx, res in enumerate(results):
        st.write(f"**Snippet {idx+1}:** {res}")

    # Generate a response based on context
    context = " ".join(results)
    prompt = f"Claim: {user_input}\nContext: {context}\nAnswer:"

    st.subheader("βœ… Fact Check Result")
    response = generator(prompt, max_length=100, do_sample=False)[0]['generated_text']
    st.write(response)