Spaces:

BaRiDo
/

IBMHackRAG

Sleeping

App Files Files Community

BaRiDo commited on Feb 23

Commit

0daea72

verified ·

1 Parent(s): bafc651

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -23

app.py CHANGED Viewed

@@ -8,7 +8,6 @@ import streamlit as st
 VECTOR_DB ="bbf2ef09-875b-4737-a793-499409a108b0"
 IBM_API_KEY = os.getenv("IBM_API_KEY")
-IBM_PROJECT_ID = "a0659778-f4ce-4da1-ba01-43b4f43a026f"
 IBM_URL_TOKEN = "https://iam.cloud.ibm.com/identity/token"
 IBM_URL_CHAT = "https://us-south.ml.cloud.ibm.com/ml/v1/text/chat?version=2023-10-25"
@@ -19,8 +18,7 @@ if "user_input" not in st.session_state:
     st.session_state.user_input = ""
 # Load the banner image from the same directory
-#banner_image = Image.open("banner.jpg")
-#st.image("banner.jpg", use_container_width=True)
 ##############################################
 ##
@@ -47,7 +45,7 @@ def IBM_token():
 def IBM_chat (messages):
     body = {
         "model_id": "ibm/granite-3-8b-instruct",
-        "project_id": IBM_PROJECT_ID,
         "messages": messages,
         "max_tokens": 10000,
         "temperature": 0.3,
@@ -76,21 +74,20 @@ def get_credentials():
 		"apikey" : os.getenv("IBM_API_KEY")
 	}
-from ibm_watsonx_ai.foundation_models import ModelInference
 from ibm_watsonx_ai.client import APIClient
 if "client" not in st.session_state:
     with st.spinner("⏳ Waking the wizard ..."):
         IBM_token()
         wml_credentials = get_credentials()
-        project_id = os.getenv("IBM_PROJECT_ID")
-        st.session_state.client = APIClient(credentials=wml_credentials, project_id=project_id)
-vector_index_id = VECTOR_DB
-vector_index_details = st.session_state.client.data_assets.get_details(vector_index_id)
-vector_index_properties = vector_index_details["entity"]["vector_index"]
-top_n = 20 if vector_index_properties["settings"].get("rerank") else int(vector_index_properties["settings"]["top_k"])
 def rerank( client, documents, query, top_n ):
     from ibm_watsonx_ai.foundation_models import Rerank
@@ -116,9 +113,6 @@ def rerank( client, documents, query, top_n ):
     return new_documents
-from ibm_watsonx_ai.foundation_models.embeddings.sentence_transformer_embeddings import SentenceTransformerEmbeddings
-emb = SentenceTransformerEmbeddings('sentence-transformers/all-MiniLM-L6-v2')
 import subprocess
 import gzip
@@ -128,13 +122,11 @@ import random
 import string
 def hydrate_chromadb():
-    data = st.session_state.client.data_assets.get_content(vector_index_id)
     content = gzip.decompress(data)
     stringified_vectors = str(content, "utf-8")
     vectors = json.loads(stringified_vectors)
-    #chroma_client = chromadb.Client()
-    #chroma_client = chromadb.InMemoryClient()
     chroma_client = chromadb.PersistentClient(path="./chroma_db")
     # make sure collection is empty if it already existed
@@ -180,17 +172,17 @@ if "chroma_collection" not in st.session_state:
         st.session_state.chroma_collection = hydrate_chromadb()
 def proximity_search( question ):
-    query_vectors = emb.embed_query(question)
     query_result = st.session_state.chroma_collection.query(
         query_embeddings=query_vectors,
-        n_results=top_n,
         include=["documents", "metadatas", "distances"]
     )
     documents = list(reversed(query_result["documents"][0]))
-    if vector_index_properties["settings"].get("rerank"):
-        documents = rerank(st.session_state.client, documents, question, vector_index_properties["settings"]["top_k"])
     return "\n".join(documents)
@@ -211,7 +203,7 @@ with col2:
         st.session_state["user_input"] = "How to implement DEI?"
 # User input in Streamlit
-user_input = st.chat_input("Describe your policy or project to find relevant Lab Lab projects...")
 if st.session_state["user_input"]:

 VECTOR_DB ="bbf2ef09-875b-4737-a793-499409a108b0"
 IBM_API_KEY = os.getenv("IBM_API_KEY")
 IBM_URL_TOKEN = "https://iam.cloud.ibm.com/identity/token"
 IBM_URL_CHAT = "https://us-south.ml.cloud.ibm.com/ml/v1/text/chat?version=2023-10-25"
     st.session_state.user_input = ""
 # Load the banner image from the same directory
+st.image("banner.jpg", use_container_width=True)
 ##############################################
 ##
 def IBM_chat (messages):
     body = {
         "model_id": "ibm/granite-3-8b-instruct",
+        "project_id": os.getenv("IBM_PROJECT_ID"),
         "messages": messages,
         "max_tokens": 10000,
         "temperature": 0.3,
 		"apikey" : os.getenv("IBM_API_KEY")
 	}
 from ibm_watsonx_ai.client import APIClient
+from ibm_watsonx_ai.foundation_models.embeddings.sentence_transformer_embeddings import SentenceTransformerEmbeddings
 if "client" not in st.session_state:
     with st.spinner("⏳ Waking the wizard ..."):
         IBM_token()
         wml_credentials = get_credentials()
+        st.session_state.client = APIClient(credentials=wml_credentials, project_id=os.getenv("IBM_PROJECT_ID"))
+        vector_index_details = st.session_state.client.data_assets.get_details(VECTOR_DB)
+        st.session_state.vector_index_properties = vector_index_details["entity"]["vector_index"]
+        st.session_state.top_n = 20 if st.session_state.vector_index_properties["settings"].get("rerank") else int(st.session_state.vector_index_properties["settings"]["top_k"])
+        st.session_state.emb = SentenceTransformerEmbeddings('sentence-transformers/all-MiniLM-L6-v2')
 def rerank( client, documents, query, top_n ):
     from ibm_watsonx_ai.foundation_models import Rerank
     return new_documents
 import subprocess
 import gzip
 import string
 def hydrate_chromadb():
+    data = st.session_state.client.data_assets.get_content(VECTOR_DB)
     content = gzip.decompress(data)
     stringified_vectors = str(content, "utf-8")
     vectors = json.loads(stringified_vectors)
     chroma_client = chromadb.PersistentClient(path="./chroma_db")
     # make sure collection is empty if it already existed
         st.session_state.chroma_collection = hydrate_chromadb()
 def proximity_search( question ):
+    query_vectors = st.session_state.emb.embed_query(question)
     query_result = st.session_state.chroma_collection.query(
         query_embeddings=query_vectors,
+        n_results=st.session_state.top_n,
         include=["documents", "metadatas", "distances"]
     )
     documents = list(reversed(query_result["documents"][0]))
+    if st.session_state.vector_index_properties["settings"].get("rerank"):
+        documents = rerank(st.session_state.client, documents, question, st.session_state.vector_index_properties["settings"]["top_k"])
     return "\n".join(documents)
         st.session_state["user_input"] = "How to implement DEI?"
 # User input in Streamlit
+user_input = st.text_input("Describe your policy or project to find relevant Lab Lab projects...")
 if st.session_state["user_input"]: