annikwag commited on
Commit
1bfcfd5
·
verified ·
1 Parent(s): 9812be1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +58 -2
app.py CHANGED
@@ -1,4 +1,5 @@
1
  import streamlit as st
 
2
  import pandas as pd
3
  from appStore.prep_data import process_giz_worldwide, remove_duplicates, get_max_end_year, extract_year
4
  from appStore.prep_utils import create_documents, get_client
@@ -10,6 +11,50 @@ from torch import cuda
10
  import json
11
  from datetime import datetime
12
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  # get the device to be used eithe gpu or cpu
14
  device = 'cuda' if cuda.is_available() else 'cpu'
15
 
@@ -300,8 +345,19 @@ else:
300
  if not filtered_semantic_no_dupe:
301
  st.write("No relevant results found.")
302
  else:
303
- # Show the top 15 from filtered_semantic
304
- for res in filtered_semantic_no_dupe[:15]:
 
 
 
 
 
 
 
 
 
 
 
305
  # Metadata
306
  metadata = res.payload.get('metadata', {})
307
  countries = metadata.get('countries', "[]")
 
1
  import streamlit as st
2
+ import requests
3
  import pandas as pd
4
  from appStore.prep_data import process_giz_worldwide, remove_duplicates, get_max_end_year, extract_year
5
  from appStore.prep_utils import create_documents, get_client
 
11
  import json
12
  from datetime import datetime
13
 
14
+
15
+ ###########
16
+ # ToDo move to config file and functions
17
+ # Configuration for the dedicated model
18
+ DEDICATED_MODEL = "meta-llama/Llama-3.1-8B-Instruct"
19
+ DEDICATED_ENDPOINT = "https://qu2d8m6dmsollhly.us-east-1.aws.endpoints.huggingface.cloud"
20
+ # Retrieve your write access token from the settings (assuming you stored it in st.secrets)
21
+ WRITE_ACCESS_TOKEN = st.secrets["WRITE_ACCESS_TOKEN"]
22
+
23
+ def get_rag_answer(query, top_results):
24
+ """
25
+ Constructs a prompt from the query and the page contexts of the top results,
26
+ then sends it to the dedicated endpoint and returns the generated answer.
27
+ """
28
+ # Combine the context from the top results (you may adjust the separator as needed)
29
+ context = "\n\n".join([res.payload["page_content"] for res in top_results])
30
+
31
+ # Create a prompt: you can refine the instructions to better suit your needs.
32
+ prompt = (
33
+ f"Using the following context, answer the question concisely.\n\n"
34
+ f"Context:\n{context}\n\n"
35
+ f"Question: {query}\n\n"
36
+ f"Answer:"
37
+ )
38
+
39
+ headers = {"Authorization": f"Bearer {WRITE_ACCESS_TOKEN}"}
40
+ payload = {
41
+ "inputs": prompt,
42
+ "parameters": {
43
+ "max_new_tokens": 150 # Adjust max tokens as needed
44
+ }
45
+ }
46
+
47
+ response = requests.post(DEDICATED_ENDPOINT, headers=headers, json=payload)
48
+ if response.status_code == 200:
49
+ result = response.json()
50
+ # Depending on the endpoint's response structure, adjust how you extract the generated text.
51
+ answer = result[0]["generated_text"]
52
+ return answer.strip()
53
+ else:
54
+ return f"Error in generating answer: {response.text}"
55
+
56
+ #######
57
+
58
  # get the device to be used eithe gpu or cpu
59
  device = 'cuda' if cuda.is_available() else 'cpu'
60
 
 
345
  if not filtered_semantic_no_dupe:
346
  st.write("No relevant results found.")
347
  else:
348
+ # Get the top 15 results for the RAG context
349
+ top_results = filtered_semantic_no_dupe[:15]
350
+
351
+ # Call the RAG function to generate an answer
352
+ rag_answer = get_rag_answer(var, top_results)
353
+
354
+ # Display the generated answer at the top of the page
355
+ st.markdown("### Generated Answer")
356
+ st.write(rag_answer)
357
+ st.divider()
358
+
359
+ # Now list each individual search result below
360
+ for res in top_results:
361
  # Metadata
362
  metadata = res.payload.get('metadata', {})
363
  countries = metadata.get('countries', "[]")