Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -8,7 +8,6 @@ import streamlit as st
|
|
8 |
VECTOR_DB ="bbf2ef09-875b-4737-a793-499409a108b0"
|
9 |
|
10 |
IBM_API_KEY = os.getenv("IBM_API_KEY")
|
11 |
-
IBM_PROJECT_ID = "a0659778-f4ce-4da1-ba01-43b4f43a026f"
|
12 |
|
13 |
IBM_URL_TOKEN = "https://iam.cloud.ibm.com/identity/token"
|
14 |
IBM_URL_CHAT = "https://us-south.ml.cloud.ibm.com/ml/v1/text/chat?version=2023-10-25"
|
@@ -19,8 +18,7 @@ if "user_input" not in st.session_state:
|
|
19 |
st.session_state.user_input = ""
|
20 |
|
21 |
# Load the banner image from the same directory
|
22 |
-
|
23 |
-
#st.image("banner.jpg", use_container_width=True)
|
24 |
|
25 |
##############################################
|
26 |
##
|
@@ -47,7 +45,7 @@ def IBM_token():
|
|
47 |
def IBM_chat (messages):
|
48 |
body = {
|
49 |
"model_id": "ibm/granite-3-8b-instruct",
|
50 |
-
"project_id": IBM_PROJECT_ID,
|
51 |
"messages": messages,
|
52 |
"max_tokens": 10000,
|
53 |
"temperature": 0.3,
|
@@ -76,21 +74,20 @@ def get_credentials():
|
|
76 |
"apikey" : os.getenv("IBM_API_KEY")
|
77 |
}
|
78 |
|
79 |
-
from ibm_watsonx_ai.foundation_models import ModelInference
|
80 |
from ibm_watsonx_ai.client import APIClient
|
|
|
81 |
|
82 |
if "client" not in st.session_state:
|
83 |
with st.spinner("⏳ Waking the wizard ..."):
|
84 |
IBM_token()
|
85 |
wml_credentials = get_credentials()
|
86 |
-
|
87 |
-
st.session_state.client
|
88 |
-
|
89 |
-
vector_index_id = VECTOR_DB
|
90 |
-
vector_index_details = st.session_state.client.data_assets.get_details(vector_index_id)
|
91 |
-
vector_index_properties = vector_index_details["entity"]["vector_index"]
|
92 |
|
93 |
-
top_n = 20 if vector_index_properties["settings"].get("rerank") else int(vector_index_properties["settings"]["top_k"])
|
|
|
|
|
94 |
|
95 |
def rerank( client, documents, query, top_n ):
|
96 |
from ibm_watsonx_ai.foundation_models import Rerank
|
@@ -116,9 +113,6 @@ def rerank( client, documents, query, top_n ):
|
|
116 |
|
117 |
return new_documents
|
118 |
|
119 |
-
from ibm_watsonx_ai.foundation_models.embeddings.sentence_transformer_embeddings import SentenceTransformerEmbeddings
|
120 |
-
|
121 |
-
emb = SentenceTransformerEmbeddings('sentence-transformers/all-MiniLM-L6-v2')
|
122 |
|
123 |
import subprocess
|
124 |
import gzip
|
@@ -128,13 +122,11 @@ import random
|
|
128 |
import string
|
129 |
|
130 |
def hydrate_chromadb():
|
131 |
-
data = st.session_state.client.data_assets.get_content(
|
132 |
content = gzip.decompress(data)
|
133 |
stringified_vectors = str(content, "utf-8")
|
134 |
vectors = json.loads(stringified_vectors)
|
135 |
|
136 |
-
#chroma_client = chromadb.Client()
|
137 |
-
#chroma_client = chromadb.InMemoryClient()
|
138 |
chroma_client = chromadb.PersistentClient(path="./chroma_db")
|
139 |
|
140 |
# make sure collection is empty if it already existed
|
@@ -180,17 +172,17 @@ if "chroma_collection" not in st.session_state:
|
|
180 |
st.session_state.chroma_collection = hydrate_chromadb()
|
181 |
|
182 |
def proximity_search( question ):
|
183 |
-
query_vectors = emb.embed_query(question)
|
184 |
query_result = st.session_state.chroma_collection.query(
|
185 |
query_embeddings=query_vectors,
|
186 |
-
n_results=top_n,
|
187 |
include=["documents", "metadatas", "distances"]
|
188 |
)
|
189 |
|
190 |
documents = list(reversed(query_result["documents"][0]))
|
191 |
|
192 |
-
if vector_index_properties["settings"].get("rerank"):
|
193 |
-
documents = rerank(st.session_state.client, documents, question, vector_index_properties["settings"]["top_k"])
|
194 |
|
195 |
return "\n".join(documents)
|
196 |
|
@@ -211,7 +203,7 @@ with col2:
|
|
211 |
st.session_state["user_input"] = "How to implement DEI?"
|
212 |
|
213 |
# User input in Streamlit
|
214 |
-
user_input = st.
|
215 |
|
216 |
if st.session_state["user_input"]:
|
217 |
|
|
|
8 |
VECTOR_DB ="bbf2ef09-875b-4737-a793-499409a108b0"
|
9 |
|
10 |
IBM_API_KEY = os.getenv("IBM_API_KEY")
|
|
|
11 |
|
12 |
IBM_URL_TOKEN = "https://iam.cloud.ibm.com/identity/token"
|
13 |
IBM_URL_CHAT = "https://us-south.ml.cloud.ibm.com/ml/v1/text/chat?version=2023-10-25"
|
|
|
18 |
st.session_state.user_input = ""
|
19 |
|
20 |
# Load the banner image from the same directory
|
21 |
+
st.image("banner.jpg", use_container_width=True)
|
|
|
22 |
|
23 |
##############################################
|
24 |
##
|
|
|
45 |
def IBM_chat (messages):
|
46 |
body = {
|
47 |
"model_id": "ibm/granite-3-8b-instruct",
|
48 |
+
"project_id": os.getenv("IBM_PROJECT_ID"),
|
49 |
"messages": messages,
|
50 |
"max_tokens": 10000,
|
51 |
"temperature": 0.3,
|
|
|
74 |
"apikey" : os.getenv("IBM_API_KEY")
|
75 |
}
|
76 |
|
|
|
77 |
from ibm_watsonx_ai.client import APIClient
|
78 |
+
from ibm_watsonx_ai.foundation_models.embeddings.sentence_transformer_embeddings import SentenceTransformerEmbeddings
|
79 |
|
80 |
if "client" not in st.session_state:
|
81 |
with st.spinner("⏳ Waking the wizard ..."):
|
82 |
IBM_token()
|
83 |
wml_credentials = get_credentials()
|
84 |
+
st.session_state.client = APIClient(credentials=wml_credentials, project_id=os.getenv("IBM_PROJECT_ID"))
|
85 |
+
vector_index_details = st.session_state.client.data_assets.get_details(VECTOR_DB)
|
86 |
+
st.session_state.vector_index_properties = vector_index_details["entity"]["vector_index"]
|
|
|
|
|
|
|
87 |
|
88 |
+
st.session_state.top_n = 20 if st.session_state.vector_index_properties["settings"].get("rerank") else int(st.session_state.vector_index_properties["settings"]["top_k"])
|
89 |
+
st.session_state.emb = SentenceTransformerEmbeddings('sentence-transformers/all-MiniLM-L6-v2')
|
90 |
+
|
91 |
|
92 |
def rerank( client, documents, query, top_n ):
|
93 |
from ibm_watsonx_ai.foundation_models import Rerank
|
|
|
113 |
|
114 |
return new_documents
|
115 |
|
|
|
|
|
|
|
116 |
|
117 |
import subprocess
|
118 |
import gzip
|
|
|
122 |
import string
|
123 |
|
124 |
def hydrate_chromadb():
|
125 |
+
data = st.session_state.client.data_assets.get_content(VECTOR_DB)
|
126 |
content = gzip.decompress(data)
|
127 |
stringified_vectors = str(content, "utf-8")
|
128 |
vectors = json.loads(stringified_vectors)
|
129 |
|
|
|
|
|
130 |
chroma_client = chromadb.PersistentClient(path="./chroma_db")
|
131 |
|
132 |
# make sure collection is empty if it already existed
|
|
|
172 |
st.session_state.chroma_collection = hydrate_chromadb()
|
173 |
|
174 |
def proximity_search( question ):
|
175 |
+
query_vectors = st.session_state.emb.embed_query(question)
|
176 |
query_result = st.session_state.chroma_collection.query(
|
177 |
query_embeddings=query_vectors,
|
178 |
+
n_results=st.session_state.top_n,
|
179 |
include=["documents", "metadatas", "distances"]
|
180 |
)
|
181 |
|
182 |
documents = list(reversed(query_result["documents"][0]))
|
183 |
|
184 |
+
if st.session_state.vector_index_properties["settings"].get("rerank"):
|
185 |
+
documents = rerank(st.session_state.client, documents, question, st.session_state.vector_index_properties["settings"]["top_k"])
|
186 |
|
187 |
return "\n".join(documents)
|
188 |
|
|
|
203 |
st.session_state["user_input"] = "How to implement DEI?"
|
204 |
|
205 |
# User input in Streamlit
|
206 |
+
user_input = st.text_input("Describe your policy or project to find relevant Lab Lab projects...")
|
207 |
|
208 |
if st.session_state["user_input"]:
|
209 |
|