File size: 11,344 Bytes
ffd9ec7 ec9d0bc 5efbc3d 0b7df41 015794e 9c86737 0526456 c802485 90f8451 c59b8a2 0ab7702 522e9d7 ffd9ec7 4a1495c 0b7df41 4083e2d 0526456 3e419e2 0a0e8cf 1835f57 0a0e8cf 90f8451 7cc632c c6ee45e 90f8451 344d17a 90f8451 bc03209 0de6c54 13cd2d2 1f5682b f1a3c8f 1f5682b fc9568c 1f5682b 90f8451 7f349bb 0f0d7e3 ac3d904 a4ff331 f417ee0 ffd9ec7 90f8451 7cc632c 90f8451 1835f57 90f8451 c50451d 7f349bb f417ee0 0f0d7e3 6a6f1b3 d6a43ae 6e781e6 5efbc3d 91304f2 2076526 b471855 5efbc3d 59f6fc6 5efbc3d d6a43ae 32c4d71 919e8de 32c4d71 919e8de 32c4d71 919e8de 32c4d71 919e8de d6a43ae 7bda0be d6a43ae ca33690 f1a3c8f 08a4aab 6a6f1b3 d6a43ae abadb06 e2d54b4 f9ba337 abadb06 d7e4374 abadb06 bc03209 f9ba337 bc03209 f9ba337 abadb06 90f8451 08a4aab f9ba337 90f8451 78bfbb7 b045ce8 90f8451 6e781e6 90f8451 f417ee0 1e8bd2e f417ee0 0f0d7e3 ac3d904 f417ee0 90f8451 db20b75 90f8451 fa4f06b f417ee0 ad463d5 0b7df41 9be5e08 0b7df41 ffd9ec7 99942af |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 |
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
from huggingface_hub import InferenceClient
import os
import textwrap
from google import genai
from google.genai.types import GenerateContentConfig
from datasets import load_dataset
from huggingface_hub import login
from typing import List, Dict, Any
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
import numpy as np
app = FastAPI()
# Get the token from the environment variable
hf_token = os.environ.get("HF_TOKEN")
google_api_key = os.environ.get("GOOGLE_API_KEY")
login(token=hf_token)
def chunk_text(text, chunk_size=250, chunk_overlap=0):
splitter = RecursiveCharacterTextSplitter(
chunk_size=chunk_size, chunk_overlap=chunk_overlap, separators=["\n\n", "\n", "."]
)
chunks = splitter.split_text(text)
return chunks
# Function to build FAISS index
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# embedding_model = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en")
def build_faiss_vectorstore(chunks):
vectorstore = FAISS.from_texts(chunks, embedding_model)
num_documents = len(vectorstore.index_to_docstore_id)
print(f"Total number of documents: {num_documents}")
return vectorstore
# Function to retrieve similar text
def retrieve(query, vectorstore, top_k=8):
docs_and_scores = vectorstore.similarity_search_with_score(query=query, k=top_k)
# Filter results based on score threshold
filtered_docs_and_scores = [(doc.page_content, float(score)) for doc, score in docs_and_scores if float(score) <= 0.7]
# Separate docs from the (doc, score) tuples
docs_content = [doc for doc, _ in filtered_docs_and_scores]
return docs_content, filtered_docs_and_scores
class ChatRequest(BaseModel):
message: str = ""
system_message: str = ""
temperature: float = 1.5
max_output_tokens: int = 200
chat_history: List[Dict[str, Any]] = []
model_choice: str = "google"
# grab dataset
dataset = load_dataset("Lhumpal/youtube-hunting-beast-transcripts", data_files={"concise": "concise/*", "raw": "raw/*", "facts": "facts/*"})
text = dataset["facts"]["text"]
text_string = "".join(text)
# Chunk and index the documents
chunks = chunk_text(text_string, chunk_size=500)
# Build the vectorsore
vectorstore = build_faiss_vectorstore(chunks)
@app.post("/chat")
async def chat(request: ChatRequest):
try:
if request.model_choice == "google":
client = genai.Client(api_key=google_api_key)
system_message = f"""You are Dan Infalt, a friendly public land deer hunting expert specializing in targeting mature bucks in pressured areas, but
don’t worry, you won’t take yourself too seriously. You respond in a conversational matter but still direct. You have dry humor you mix in every once in a while.
You focus on buck bedding, terrain reading, and aggressive yet calculated mobile tactics. Your blue-collar, no-nonsense approach
emphasizes deep scouting, strategic access, and minimalist setups. Through The Hunting Beast, you teach hunters how to kill big bucks
using terrain, wind, and thermals. You speak from firsthand experience, keeping your advice practical and to the point. Provide detailed
yet concise responses that fully articulate your experience and answer the user query. Please keep your reponses between 0 and {request.max_output_tokens} words."""
# ------------ summarize chat history ------------
summary_thresh = 10
if len(request.chat_history) > summary_thresh:
summarize_prompt = f"""Please summarize the following chat history concisely, focusing on the key points and main topics discussed. Avoid
unnecessary details and provide a clear, straightforward summary. {request.chat_history[:-summary_thresh]}""" # summarize everything except last k items
summary_response = client.models.generate_content(
model="gemini-2.0-flash",
contents=summarize_prompt,
config=GenerateContentConfig(
system_instruction=["You are a helpful assistant who is an expert at summarization."],
max_output_tokens=250,
temperature=0.5
),
)
request.chat_history = request.chat_history[-(summary_thresh+2):] # keep last k items
request.chat_history.insert(1,
{"role": "user",
"parts": [{"text": f"Here is a summary of this conversation so far: {summary_response.text}"}]})
# ------------ rephrase user question ------------
rephrase_prompt = f"""Given the user question and the chat history, rewrite the user question to improve clarity, specificity, and retrieval accuracy while
maintaining its original intent within the given chat.
- Read the chat history
- Expand only where necessary to remove vagueness.
- Keep the question natural and concise.
- Avoid adding excessive detail or unrelated context.
- Ensure the enhanced question remains true to what the user is asking.
Example Enhancements:
User: "Does camo really matter?"
Refined: "How important is camouflage for a hunter’s success, and how does it compare to other factors like movement and scent control?"
User: "How does attitude affect success?"
Refined: "How do mindset factors like patience, confidence, and adaptability influence a hunter’s success?"
User: "What does it mean if I see does while buck hunting?"
Refined: "If I see does while hunting for a buck, what does that indicate about deer movement and buck activity?"
Chat history:
{request.chat_history}
Now, given the chat history, refine the following user question to improve clarity, specificity, and retrieval accuracy while maintaining its original
intent within the given chat:
{request.message}
"""
rephrase_response = client.models.generate_content(
model="gemini-2.0-flash",
contents=rephrase_prompt,
config=GenerateContentConfig(
system_instruction=["You are a public land deer hunting expert specializing in targeting mature bucks in pressured areas. Your job is to use your deer hunting knowledge to enhance user questions for better retrieval in a Retrieval-Augmented Generation (RAG) system."],
max_output_tokens=250,
temperature=0.5
),
)
# ------------ Retrieve relevant text ------------
rephrase_response = rephrase_response.text
docs, filtered_docs_and_scores = retrieve(rephrase_response, vectorstore, top_k=10)
docs = "\n\n".join(docs)
# ------------ Retrievel Augmented Generation ------------
rag_prompt = f"""Use the following information to answer the user's query. You do not have to use all the information, just the pieces that directly
help answer the query most accurately. Start directly with information, NOT with a question, and NOT restating the subject matter of the user query in
any way, or you will be penalized. Respond in a conversational manner.
Here are three examples of the style and tone of a response. Notice the good response and bad response. Please respond like the good response and NOT like the bad response:
User Query: How do big bucks use clear cuts for bedding?
Bad Response: Alright, so you want to know big bucks use clear cuts for bedding?, eh? Well, a lot of people assume big bucks bed right in the middle of a clear
cut because it’s thick, but that’s not really the case. The dense regrowth provides food and cover, but bucks still want the upper hand.
Good Response: Yeah, a lot of guys think big bucks just bed right in the middle of a clear cut because it’s thick, but that’s not really how they use it. The
thick regrowth is great for food and cover, but those bucks still want an advantage. Most of the time, they’re bedding on the edges, right where the cut
meets older timber. They’ll set up with the wind at their back so they can smell anything sneaking up behind them, and they’re looking out into the open
woods, watching for danger.
You have access to the following relevant information retrieved based on the user's query:
{docs}
Using the information above, answer the user's query as accurately as possible in the tone and style of the Good Response:
User Query: {request.message}
"""
# remove the unfformatted user message
del request.chat_history[-1]
# add the user message with RAG data
rag_prompt = textwrap.dedent(rag_prompt)
request.chat_history.append({"role": "user", "parts": [{"text": rag_prompt}]})
response = client.models.generate_content(
model="gemini-2.0-flash",
contents=request.chat_history,
config=GenerateContentConfig(
system_instruction=[system_message],
max_output_tokens=request.max_output_tokens,
temperature=request.temperature
),
)
# delete the prompt and put back the unformatted user message
del request.chat_history[-1]
request.chat_history.append({"role": "user", "parts": [{"text": request.message}]})
return {"response": response.text, "dataset_str": text_string, "rephrase_response": rephrase_response, "docs": docs, "filtered_docs_and_scores": filtered_docs_and_scores, "history": request.chat_history, "RAG_prompt": rag_prompt, "chunks": chunks}
if request.model_choice == "HF":
if hf_token:
client = InferenceClient("meta-llama/Llama-3.2-3B-Instruct", token=hf_token)
else:
raise ValueError("HF_TOKEN environment variable not set. Please add it as a secret in your Hugging Face Space.")
messages = [
{"role": "system", "content": request.system_message},
{"role": "user", "content": request.message},
]
response = client.chat_completion(
messages=messages,
max_tokens=request.max_tokens,
temperature=request.temperature,
top_p=request.top_p,
)
return {"response": response.choices[0].message.content}
except Exception as e:
raise HTTPException(status_code=500, detail=str(e)) |