Update app.py
Browse files
app.py
CHANGED
@@ -45,7 +45,7 @@ def retrieve(query, vectorstore, top_k=5):
|
|
45 |
return [
|
46 |
(doc.page_content, float(score)) # Ensure score is a standard float
|
47 |
for doc, score in docs_and_scores
|
48 |
-
if float(score) <=
|
49 |
]
|
50 |
|
51 |
|
@@ -66,10 +66,6 @@ class ChatRequest(BaseModel):
|
|
66 |
dataset = load_dataset("Lhumpal/youtube-hunting-beast-transcripts", data_files={"concise": "concise/*", "raw": "raw/*"})
|
67 |
concise_text = dataset["concise"]["text"]
|
68 |
concise_text_string = "".join(concise_text)
|
69 |
-
sample = "Big bucks like to bed in the tall grass and shade in the summer."
|
70 |
-
concise_text_string += sample
|
71 |
-
print(concise_text_string)
|
72 |
-
|
73 |
|
74 |
# Chunk and index the documents
|
75 |
chunks = chunk_text(concise_text_string, chunk_size=250)
|
@@ -103,8 +99,8 @@ async def chat(request: ChatRequest):
|
|
103 |
|
104 |
|
105 |
# Retrieve relevant text
|
106 |
-
docs = retrieve(request.message, vectorstore, top_k=5)
|
107 |
-
|
108 |
|
109 |
rag_prompt = f"""Use the following information to answer the user's query. You do not have to use all the information, just the pieces that directly
|
110 |
help answer the query most accurately. Start directly with information, NOT with a rhetorical question. Respond in a conversational manner.
|
@@ -119,6 +115,8 @@ async def chat(request: ChatRequest):
|
|
119 |
|
120 |
You have access to the following relevant information retrieved based on the user's query:
|
121 |
|
|
|
|
|
122 |
Using the information above, answer the user's query as accurately as possible:
|
123 |
|
124 |
Query: {request.message}
|
@@ -144,7 +142,7 @@ async def chat(request: ChatRequest):
|
|
144 |
del request.chat_history[-1]
|
145 |
request.chat_history.append({"role": "user", "parts": [{"text": request.message}]})
|
146 |
|
147 |
-
return {"response": response.text, "dataset_str": concise_text_string
|
148 |
|
149 |
if request.model_choice == "HF":
|
150 |
if hf_token:
|
|
|
45 |
return [
|
46 |
(doc.page_content, float(score)) # Ensure score is a standard float
|
47 |
for doc, score in docs_and_scores
|
48 |
+
if float(score) <= 0.75
|
49 |
]
|
50 |
|
51 |
|
|
|
66 |
dataset = load_dataset("Lhumpal/youtube-hunting-beast-transcripts", data_files={"concise": "concise/*", "raw": "raw/*"})
|
67 |
concise_text = dataset["concise"]["text"]
|
68 |
concise_text_string = "".join(concise_text)
|
|
|
|
|
|
|
|
|
69 |
|
70 |
# Chunk and index the documents
|
71 |
chunks = chunk_text(concise_text_string, chunk_size=250)
|
|
|
99 |
|
100 |
|
101 |
# Retrieve relevant text
|
102 |
+
docs, scores = retrieve(request.message, vectorstore, top_k=5)
|
103 |
+
docs = "\n\n".join(docs)
|
104 |
|
105 |
rag_prompt = f"""Use the following information to answer the user's query. You do not have to use all the information, just the pieces that directly
|
106 |
help answer the query most accurately. Start directly with information, NOT with a rhetorical question. Respond in a conversational manner.
|
|
|
115 |
|
116 |
You have access to the following relevant information retrieved based on the user's query:
|
117 |
|
118 |
+
{docs}
|
119 |
+
|
120 |
Using the information above, answer the user's query as accurately as possible:
|
121 |
|
122 |
Query: {request.message}
|
|
|
142 |
del request.chat_history[-1]
|
143 |
request.chat_history.append({"role": "user", "parts": [{"text": request.message}]})
|
144 |
|
145 |
+
return {"response": response.text, "dataset_str": concise_text_string, "docs": docs, "history": request.chat_history, "RAG_prompt": rag_prompt, "chunks": chunks}
|
146 |
|
147 |
if request.model_choice == "HF":
|
148 |
if hf_token:
|