Spaces:
Sleeping
Sleeping
File size: 1,847 Bytes
b0efa4e 855a31e ad4cc62 855a31e 0329a7f ad4cc62 855a31e 0329a7f ad4cc62 0329a7f ad4cc62 855a31e 0329a7f ad4cc62 855a31e 0329a7f ad4cc62 855a31e 0329a7f ad4cc62 0329a7f 855a31e 0329a7f 855a31e ad4cc62 0329a7f ad4cc62 0329a7f ad4cc62 0329a7f ad4cc62 0329a7f ad4cc62 0329a7f ad4cc62 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
import streamlit as st
import pandas as pd
import torch
import faiss
from sentence_transformers import SentenceTransformer
from transformers import pipeline
# Load models for embeddings and generation
embedder = SentenceTransformer('all-MiniLM-L6-v2')
generator = pipeline("text2text-generation", model="facebook/bart-large")
# Load and combine train + test datasets
@st.cache_data
def load_data():
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")
df = pd.concat([train_df, test_df], ignore_index=True)
df["text"] = df["title"].fillna('') + ". " + df["description"].fillna('')
return df
# Load the data
df = load_data()
corpus = df["text"].tolist()
corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True)
# Build FAISS index for similarity search
index = faiss.IndexFlatL2(corpus_embeddings.shape[1])
index.add(corpus_embeddings.cpu().detach().numpy())
# App UI
st.title("π§ Climate News Fact Checker")
st.markdown("Enter a **claim** to check if it's supported or refuted by recent climate-related news.")
# User input
user_input = st.text_input("π Enter a claim or statement:")
if user_input:
# Embed the input claim
query_embedding = embedder.encode([user_input])
# Retrieve top-k similar news snippets
top_k = 3
D, I = index.search(query_embedding, top_k)
results = [corpus[i] for i in I[0]]
# Show retrieved snippets
st.subheader("π Retrieved News Snippets")
for idx, res in enumerate(results):
st.write(f"**Snippet {idx+1}:** {res}")
# Generate a response based on context
context = " ".join(results)
prompt = f"Claim: {user_input}\nContext: {context}\nAnswer:"
st.subheader("β
Fact Check Result")
response = generator(prompt, max_length=100, do_sample=False)[0]['generated_text']
st.write(response)
|