rahideer commited on
Commit
431fe4b
·
verified ·
1 Parent(s): a9c9f66

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -18
app.py CHANGED
@@ -1,35 +1,26 @@
1
- import os
2
- import zipfile
3
  import pandas as pd
4
  import gradio as gr
5
  from sentence_transformers import SentenceTransformer
6
  import faiss
7
  import numpy as np
8
 
9
- # Step 1: Unzip the dataset
10
- zip_file = "climate.zip" # your uploaded zip file
11
- extracted_path = "climate_data"
12
-
13
- if not os.path.exists(extracted_path):
14
- with zipfile.ZipFile(zip_file, 'r') as zip_ref:
15
- zip_ref.extractall(extracted_path)
16
-
17
- # Step 2: Load train.csv
18
- csv_path = os.path.join(extracted_path, "train.csv")
19
  df = pd.read_csv(csv_path, header=None, names=["label", "title", "description"])
20
 
21
- # Combine title and description for semantic search
22
  df["content"] = df["title"].fillna("") + ". " + df["description"].fillna("")
23
 
24
- # Step 3: Encode using SentenceTransformer
25
  model = SentenceTransformer('all-MiniLM-L6-v2')
26
  corpus_embeddings = model.encode(df["content"].tolist(), show_progress_bar=True)
27
 
28
- # Step 4: Create FAISS index
29
  embedding_dim = corpus_embeddings.shape[1]
30
  index = faiss.IndexFlatL2(embedding_dim)
31
  index.add(corpus_embeddings)
32
 
 
33
  def retrieve_and_respond(claim, k=5):
34
  query_embedding = model.encode([claim])
35
  D, I = index.search(np.array(query_embedding), k)
@@ -41,13 +32,13 @@ def retrieve_and_respond(claim, k=5):
41
 
42
  return "\n\n".join(results)
43
 
44
- # Step 5: Gradio Interface
45
  iface = gr.Interface(
46
  fn=retrieve_and_respond,
47
  inputs=gr.Textbox(lines=2, placeholder="Enter a news-related claim here..."),
48
  outputs="markdown",
49
- title="Claim Verifier using RAG (AG News)",
50
- description="Enter a claim and retrieve the most relevant AG News articles to verify or refute it."
51
  )
52
 
53
  iface.launch()
 
 
 
1
  import pandas as pd
2
  import gradio as gr
3
  from sentence_transformers import SentenceTransformer
4
  import faiss
5
  import numpy as np
6
 
7
+ # Step 1: Load train.csv directly
8
+ csv_path = "train.csv" # file you uploaded directly
 
 
 
 
 
 
 
 
9
  df = pd.read_csv(csv_path, header=None, names=["label", "title", "description"])
10
 
11
+ # Step 2: Combine title and description
12
  df["content"] = df["title"].fillna("") + ". " + df["description"].fillna("")
13
 
14
+ # Step 3: Encode content using SentenceTransformer
15
  model = SentenceTransformer('all-MiniLM-L6-v2')
16
  corpus_embeddings = model.encode(df["content"].tolist(), show_progress_bar=True)
17
 
18
+ # Step 4: Build FAISS index
19
  embedding_dim = corpus_embeddings.shape[1]
20
  index = faiss.IndexFlatL2(embedding_dim)
21
  index.add(corpus_embeddings)
22
 
23
+ # Step 5: Define retrieval function
24
  def retrieve_and_respond(claim, k=5):
25
  query_embedding = model.encode([claim])
26
  D, I = index.search(np.array(query_embedding), k)
 
32
 
33
  return "\n\n".join(results)
34
 
35
+ # Step 6: Gradio UI
36
  iface = gr.Interface(
37
  fn=retrieve_and_respond,
38
  inputs=gr.Textbox(lines=2, placeholder="Enter a news-related claim here..."),
39
  outputs="markdown",
40
+ title="Claim Verifier using AG News",
41
+ description="Enter a claim and get the most relevant AG News articles to help verify or refute it."
42
  )
43
 
44
  iface.launch()