TEST-GIZ-Project-Search

Sleeping

App Files Files Community

annikwag commited on Mar 2

Commit

1bfcfd5

verified ·

1 Parent(s): 9812be1

Update app.py

Browse files

Files changed (1) hide show

app.py +58 -2

app.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import streamlit as st
 import pandas as pd
 from appStore.prep_data import process_giz_worldwide, remove_duplicates, get_max_end_year, extract_year
 from appStore.prep_utils import create_documents, get_client
@@ -10,6 +11,50 @@ from torch import cuda
 import json
 from datetime import datetime
 # get the device to be used eithe gpu or cpu
 device = 'cuda' if cuda.is_available() else 'cpu'
@@ -300,8 +345,19 @@ else:
     if not filtered_semantic_no_dupe:
         st.write("No relevant results found.")
     else:
-        # Show the top 15 from filtered_semantic
-        for res in filtered_semantic_no_dupe[:15]:
             # Metadata
             metadata = res.payload.get('metadata', {})
             countries = metadata.get('countries', "[]")

 import streamlit as st
+import requests
 import pandas as pd
 from appStore.prep_data import process_giz_worldwide, remove_duplicates, get_max_end_year, extract_year
 from appStore.prep_utils import create_documents, get_client
 import json
 from datetime import datetime
+###########
+# ToDo move to config file and functions
+# Configuration for the dedicated model
+DEDICATED_MODEL = "meta-llama/Llama-3.1-8B-Instruct"
+DEDICATED_ENDPOINT = "https://qu2d8m6dmsollhly.us-east-1.aws.endpoints.huggingface.cloud"
+# Retrieve your write access token from the settings (assuming you stored it in st.secrets)
+WRITE_ACCESS_TOKEN = st.secrets["WRITE_ACCESS_TOKEN"]
+def get_rag_answer(query, top_results):
+    """
+    Constructs a prompt from the query and the page contexts of the top results,
+    then sends it to the dedicated endpoint and returns the generated answer.
+    """
+    # Combine the context from the top results (you may adjust the separator as needed)
+    context = "\n\n".join([res.payload["page_content"] for res in top_results])
+    # Create a prompt: you can refine the instructions to better suit your needs.
+    prompt = (
+        f"Using the following context, answer the question concisely.\n\n"
+        f"Context:\n{context}\n\n"
+        f"Question: {query}\n\n"
+        f"Answer:"
+    )
+    headers = {"Authorization": f"Bearer {WRITE_ACCESS_TOKEN}"}
+    payload = {
+        "inputs": prompt,
+        "parameters": {
+            "max_new_tokens": 150  # Adjust max tokens as needed
+        }
+    }
+    response = requests.post(DEDICATED_ENDPOINT, headers=headers, json=payload)
+    if response.status_code == 200:
+        result = response.json()
+        # Depending on the endpoint's response structure, adjust how you extract the generated text.
+        answer = result[0]["generated_text"]
+        return answer.strip()
+    else:
+        return f"Error in generating answer: {response.text}"
+#######
 # get the device to be used eithe gpu or cpu
 device = 'cuda' if cuda.is_available() else 'cpu'
     if not filtered_semantic_no_dupe:
         st.write("No relevant results found.")
     else:
+        # Get the top 15 results for the RAG context
+        top_results = filtered_semantic_no_dupe[:15]
+        # Call the RAG function to generate an answer
+        rag_answer = get_rag_answer(var, top_results)
+        # Display the generated answer at the top of the page
+        st.markdown("### Generated Answer")
+        st.write(rag_answer)
+        st.divider()
+        # Now list each individual search result below
+        for res in top_results:
             # Metadata
             metadata = res.payload.get('metadata', {})
             countries = metadata.get('countries', "[]")