Spaces:

Lhumpal
/

beast-llm

Sleeping

App Files Files Community

Lhumpal commited on Mar 28

Commit

08a4aab

verified ·

1 Parent(s): b471855

Update app.py

Browse files

Files changed (1) hide show

app.py +6 -8

app.py CHANGED Viewed

@@ -45,7 +45,7 @@ def retrieve(query, vectorstore, top_k=5):
     return [
         (doc.page_content, float(score))  # Ensure score is a standard float
         for doc, score in docs_and_scores
-        if float(score) <= 100
     ]
@@ -66,10 +66,6 @@ class ChatRequest(BaseModel):
 dataset = load_dataset("Lhumpal/youtube-hunting-beast-transcripts", data_files={"concise": "concise/*", "raw": "raw/*"})
 concise_text = dataset["concise"]["text"]
 concise_text_string = "".join(concise_text)
-sample = "Big bucks like to bed in the tall grass and shade in the summer."
-concise_text_string += sample
-print(concise_text_string)
 # Chunk and index the documents
 chunks = chunk_text(concise_text_string, chunk_size=250)
@@ -103,8 +99,8 @@ async def chat(request: ChatRequest):
             # Retrieve relevant text
-            docs = retrieve(request.message, vectorstore, top_k=5)
-            # formatted_results = "\n\n".join(filtered_docs)
             rag_prompt = f"""Use the following information to answer the user's query. You do not have to use all the information, just the pieces that directly
             help answer the query most accurately. Start directly with information, NOT with a rhetorical question. Respond in a conversational manner.
@@ -119,6 +115,8 @@ async def chat(request: ChatRequest):
             You have access to the following relevant information retrieved based on the user's query:
             Using the information above, answer the user's query as accurately as possible:
             Query: {request.message}
@@ -144,7 +142,7 @@ async def chat(request: ChatRequest):
             del request.chat_history[-1]
             request.chat_history.append({"role": "user", "parts": [{"text": request.message}]})
-            return {"response": response.text, "dataset_str": concise_text_string[:150], "docs": docs, "history": request.chat_history, "RAG_prompt": rag_prompt, "chunks": chunks}
         if request.model_choice == "HF":
             if hf_token:

     return [
         (doc.page_content, float(score))  # Ensure score is a standard float
         for doc, score in docs_and_scores
+        if float(score) <= 0.75
     ]
 dataset = load_dataset("Lhumpal/youtube-hunting-beast-transcripts", data_files={"concise": "concise/*", "raw": "raw/*"})
 concise_text = dataset["concise"]["text"]
 concise_text_string = "".join(concise_text)
 # Chunk and index the documents
 chunks = chunk_text(concise_text_string, chunk_size=250)
             # Retrieve relevant text
+            docs, scores = retrieve(request.message, vectorstore, top_k=5)
+            docs = "\n\n".join(docs)
             rag_prompt = f"""Use the following information to answer the user's query. You do not have to use all the information, just the pieces that directly
             help answer the query most accurately. Start directly with information, NOT with a rhetorical question. Respond in a conversational manner.
             You have access to the following relevant information retrieved based on the user's query:
+            {docs}
             Using the information above, answer the user's query as accurately as possible:
             Query: {request.message}
             del request.chat_history[-1]
             request.chat_history.append({"role": "user", "parts": [{"text": request.message}]})
+            return {"response": response.text, "dataset_str": concise_text_string, "docs": docs, "history": request.chat_history, "RAG_prompt": rag_prompt, "chunks": chunks}
         if request.model_choice == "HF":
             if hf_token: