File size: 9,908 Bytes
4be0291
39bc853
4be0291
 
66b3608
a5c9751
7fed2df
0527179
4be0291
8b4657a
 
 
 
 
7de92cf
 
0527179
 
e7a100a
 
7fc8447
8b4657a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b647859
8b4657a
 
0daea72
8b4657a
 
20a9392
45ab87d
8b4657a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b647859
05ec8aa
b647859
05ec8aa
4be0291
 
 
 
 
 
05ec8aa
 
 
 
 
 
4be0291
0daea72
4be0291
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a5c9751
4be0291
 
 
 
 
 
a5c9751
4be0291
0527179
 
 
 
 
 
 
 
 
 
4be0291
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0527179
 
 
 
4be0291
0527179
 
 
 
 
 
4be0291
0527179
 
4be0291
0527179
 
4be0291
 
 
 
 
 
 
 
 
 
0daea72
dab5624
4be0291
0daea72
4be0291
 
 
 
 
0527179
 
4be0291
 
 
0527179
 
 
 
 
 
 
 
 
 
 
b647859
0527179
 
 
 
 
 
e7a100a
 
 
 
 
 
 
 
4be0291
e7a100a
45ab87d
5c8b7b4
 
 
 
 
 
e7a100a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0527179
 
e7a100a
 
45ab87d
e7a100a
 
 
 
 
 
 
45ab87d
e7a100a
 
 
 
a00bf6e
b647859
7acc416
e7a100a
 
 
 
 
45ab87d
 
 
0e28843
cec2ab4
0e28843
 
0527179
cec2ab4
0527179
0e28843
 
0527179
cec2ab4
0527179
cec2ab4
 
0527179
cec2ab4
0527179
0e28843
4be0291
0527179
91f0302
56a9d79
 
 
 
4be0291
0527179
 
4be0291
0527179
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
import os
import requests
import sentence_transformers

import streamlit as st

VECTOR_DB ="bbf2ef09-875b-4737-a793-499409a108b0"
JSON_DB = "f49e274a-b5c3-4573-81a2-32df8f96e97b"

IBM_API_KEY = os.getenv("IBM_API_KEY")

IBM_URL_TOKEN = "https://iam.cloud.ibm.com/identity/token"
IBM_URL_CHAT = "https://us-south.ml.cloud.ibm.com/ml/v1/text/chat?version=2023-10-25"

if "messages" not in st.session_state:
    st.session_state.messages = []
if "query" not in st.session_state:
    st.session_state.query = ""
if "extended_query" not in st.session_state:
    st.session_state.extended_query = ""

##############################################
##
##   IBM API
##
##############################################
def IBM_token():
    # Define the headers
    headers = {
        "Content-Type": "application/x-www-form-urlencoded"
    }
    
    # Define the data payload
    data = {
        "grant_type": "urn:ibm:params:oauth:grant-type:apikey",
        "apikey": IBM_API_KEY
    }
    
    # Make the POST request
    response = requests.post(IBM_URL_TOKEN, headers=headers, data=data)
    st.session_state.IBM_ACCESS_TOKEN = response.json().get("access_token", "")


def IBM_chat (messages, temperature = 0.7):
    body = {
        "model_id": "ibm/granite-3-8b-instruct",
        "project_id": os.getenv("IBM_PROJECT_ID"),
        "messages": messages,
        "max_tokens": 10000,
        "temperature": temperature,
        "time_limit": 40000
    }
    headers = {
    	"Accept": "application/json",
    	"Content-Type": "application/json",
    	"Authorization": "Bearer " + st.session_state.IBM_ACCESS_TOKEN
    }    
    response = requests.post(
    	IBM_URL_CHAT,
    	headers=headers,
    	json=body
    )
    
    if response.status_code != 200:
    	raise Exception("Non-200 response: " + str(response.text))
    
    response = response.json()
    return response["choices"][0]["message"]["content"]

def IBM_query (prompt, temperature = 0.7):
    messages = [{"role": "user", "content": prompt}]
    return IBM_chat(messages, temperature)
    
def get_credentials():
	return {
		"url" : "https://us-south.ml.cloud.ibm.com",
		"apikey" : os.getenv("IBM_API_KEY")
	}

##############################################
##
##   Vector DB
##
##############################################

from ibm_watsonx_ai.client import APIClient
from ibm_watsonx_ai.foundation_models.embeddings.sentence_transformer_embeddings import SentenceTransformerEmbeddings

def rerank( client, documents, query, top_n ):
    from ibm_watsonx_ai.foundation_models import Rerank

    reranker = Rerank(
        model_id="cross-encoder/ms-marco-minilm-l-12-v2",
        api_client=client,
        params={
            "return_options": {
                "top_n": top_n
            },
            "truncate_input_tokens": 512
        }
    )

    reranked_results = reranker.generate(query=query, inputs=documents)["results"]

    new_documents = []
    
    for result in reranked_results:
        result_index = result["index"]
        new_documents.append(documents[result_index])
        
    return new_documents


import subprocess
import gzip
import json
import chromadb
import random
import string

def hydrate_chromadb():
    #data = st.session_state.client.data_assets.get_content(JSON_DB)
    #stringified_vectors = str(content, "utf-8")
    with open("lablab - json.txt", "r", encoding="utf-8") as f:
    #with open("lablab.gzip", "rb") as f:
        gz = f.read()
    #content = gzip.decompress(gz)
    #stringified_vectors = str(content, "utf-8")

    vectors = json.loads(gz)

    chroma_client = chromadb.PersistentClient(path="./chroma_db")

    # make sure collection is empty if it already existed
    collection_name = "my_collection"
    try:
        collection = chroma_client.delete_collection(name=collection_name)
    except:
        print("Collection didn't exist - nothing to do.")
    collection = chroma_client.create_collection(name=collection_name)

    vector_embeddings = []
    vector_documents = []
    vector_metadatas = []
    vector_ids = []

    for vector in vectors:
        vector_embeddings.append(vector["embedding"])
        vector_documents.append(vector["content"]
                               )
        #metadata = vector["metadata"]
        #lines = metadata["loc"]["lines"]
        clean_metadata = {}
        clean_metadata["source"] = "Lablab website"
        #clean_metadata["asset_id"] = metadata["asset_id"]
        #clean_metadata["asset_name"] = metadata["asset_name"]
        #clean_metadata["url"] = metadata["url"]
        #clean_metadata["from"] = lines["from"]
        #clean_metadata["to"] = lines["to"]
        vector_metadatas.append(clean_metadata)
        
        #asset_id = vector["metadata"]["asset_id"]
        random_string = ''.join(random.choices(string.ascii_uppercase + string.digits, k=10))
        #id = "{}:{}-{}-{}".format(asset_id, lines["from"], lines["to"], random_string)
        vector_ids.append(random_string)

    collection.add(
        embeddings=vector_embeddings,
        documents=vector_documents,
        metadatas=vector_metadatas,
        ids=vector_ids
    )
    return collection

def proximity_search( question ):
    query_vectors = st.session_state.emb.embed_query(question)
    query_result = st.session_state.chroma_collection.query(
        query_embeddings=query_vectors,
        n_results=st.session_state.top_n,
        include=["documents", "metadatas", "distances"]
    )

    documents = list(reversed(query_result["documents"][0]))

    #if st.session_state.vector_index_properties["settings"].get("rerank"):
    #    documents = rerank(st.session_state.client, documents, question, 10) # st.session_state.vector_index_properties["settings"]["top_k"])

    return "\n".join(documents)

def do_query(query):
    # add the submissions as context (only in prompt, not in history)
    grounding = proximity_search(query)
    prompt = query + ". For a project share the image as markdown and mention the url as well. The context for the question: " + grounding;
    #messages = st.session_state.messages.copy()
    #messages.append({"role": "user", "content": prompt})
    #st.session_state.messages.append({"role": "user", "content": query})
    messages = [{"role": "user", "content": prompt}]

    # Get response from IBM
    with st.spinner("Thinking..."):
        assistant_reply = IBM_chat(messages, 0)     ## no creativity here, just searching

    # Display assistant message
    st.chat_message("assistant").markdown(assistant_reply)
    #st.session_state.messages.append({"role": "assistant", "content": assistant_reply})
    #st.session_state.query = query

############################
##
##   UI
##
############################

# Load the banner image from the same directory
st.image("banner_policy.jpg", use_container_width=True)

# set up sidebar
st.sidebar.title("๐Ÿง™ Synergy Scrolling")
st.sidebar.write(
    "Synergy Scrolling analyzes policies and finds relevant past projects. "
    "This tool helps match your policy or business idea with projects from "
    "previous LabLab hackathons."
)

################ INIT

if "client" not in st.session_state:
    with st.spinner("โณ Waking the wizard ..."):
        IBM_token()
        wml_credentials = get_credentials()
        st.session_state.client = APIClient(credentials=wml_credentials, project_id=os.getenv("IBM_PROJECT_ID"))
        
        #vector_index_details = st.session_state.client.data_assets.get_details(VECTOR_DB)
        #st.session_state.vector_index_properties = vector_index_details["entity"]["vector_index"]
        #st.session_state.top_n = 20 if st.session_state.vector_index_properties["settings"].get("rerank") else int(st.session_state.vector_index_properties["settings"]["top_k"])
        
        st.session_state.emb = SentenceTransformerEmbeddings('sentence-transformers/all-MiniLM-L6-v2')
        st.session_state.top_n = 10

if "chroma_collection" not in st.session_state:
    with st.spinner("โณ Dusting off the scroll books ..."):
        st.session_state.chroma_collection = hydrate_chromadb()

query = ""

################ main UI

st.title("๐Ÿ”ฎ Policy Scroll")
st.subheader("AI-Powered Project & Policy Matching")
st.write("Explore the Lab Lab Library to find relevant past projects that align with your policy or new initiative.")

################ sidebar UI

policy_input = st.sidebar.text_area("๐Ÿ“ Enter Your Policy or Business Idea:")

if st.sidebar.button("๐Ÿ”— Analyze with IBM Granite"):
    if policy_input.strip():
        prompt = f"Define search criteria for projects to implement: {policy_input}"
        
        # Get response from IBM
        with st.spinner("Analyzing..."):
            result = IBM_query(prompt, 0.7)
            st.session_state["extended_query"] = "Find 3 projects that best match and explain why, with these criteria: " + result
    else:
        st.sidebar.warning("Please enter a policy or business idea first!")

# Display AI result in another textarea
st.sidebar.text_area("๐Ÿ’ก Extended query:", value=st.session_state.get("extended_query", ""), height=150)
if st.sidebar.button("๐Ÿ” Search for synergy"):
    query = st.session_state.get("extended_query", "")
    
# Suggested search queries as buttons
col1, col2, col3 = st.columns(3)

with col1:
    q = "Projects with a link with Solarpunk"
    if st.button(q):
        query = q

with col2:
    q = "DEI aware projects"
    if st.button(q):
        query = q

with col3:
    q = "Decentral projects"
    if st.button(q):
        query = q
        
# User input in Streamlit
user_input = st.text_input("Describe your policy or project to find relevant Lab Lab projects...", "")
    
# Display chat history
#for message in st.session_state.messages:
#    with st.chat_message(message["role"]):
#        st.markdown(message["content"])

if user_input:
    do_query(user_input)

if query:
    do_query(query)