|
from fastapi import FastAPI, HTTPException |
|
from pydantic import BaseModel |
|
from huggingface_hub import InferenceClient |
|
import os |
|
import textwrap |
|
from google import genai |
|
from google.genai.types import GenerateContentConfig |
|
from datasets import load_dataset |
|
from huggingface_hub import login |
|
from typing import List, Dict, Any |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from langchain.embeddings import HuggingFaceEmbeddings |
|
from langchain_community.vectorstores import FAISS |
|
import numpy as np |
|
|
|
app = FastAPI() |
|
|
|
|
|
hf_token = os.environ.get("HF_TOKEN") |
|
google_api_key = os.environ.get("GOOGLE_API_KEY") |
|
|
|
login(token=hf_token) |
|
|
|
def chunk_text(text, chunk_size=250, chunk_overlap=0): |
|
splitter = RecursiveCharacterTextSplitter( |
|
chunk_size=chunk_size, chunk_overlap=chunk_overlap, separators=["\n\n", "\n", "."] |
|
) |
|
chunks = splitter.split_text(text) |
|
return chunks |
|
|
|
|
|
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2") |
|
|
|
|
|
def build_faiss_vectorstore(chunks): |
|
vectorstore = FAISS.from_texts(chunks, embedding_model) |
|
num_documents = len(vectorstore.index_to_docstore_id) |
|
print(f"Total number of documents: {num_documents}") |
|
return vectorstore |
|
|
|
|
|
def retrieve(query, vectorstore, top_k=8): |
|
docs_and_scores = vectorstore.similarity_search_with_score(query=query, k=top_k) |
|
|
|
|
|
filtered_docs_and_scores = [(doc.page_content, float(score)) for doc, score in docs_and_scores if float(score) <= 0.7] |
|
|
|
|
|
docs_content = [doc for doc, _ in filtered_docs_and_scores] |
|
|
|
return docs_content, filtered_docs_and_scores |
|
|
|
class ChatRequest(BaseModel): |
|
message: str = "" |
|
system_message: str = "" |
|
temperature: float = 1.5 |
|
max_output_tokens: int = 200 |
|
chat_history: List[Dict[str, Any]] = [] |
|
model_choice: str = "google" |
|
|
|
|
|
dataset = load_dataset("Lhumpal/youtube-hunting-beast-transcripts", data_files={"concise": "concise/*", "raw": "raw/*", "facts": "facts/*"}) |
|
text = dataset["facts"]["text"] |
|
text_string = "".join(text) |
|
|
|
|
|
chunks = chunk_text(text_string, chunk_size=500) |
|
|
|
vectorstore = build_faiss_vectorstore(chunks) |
|
|
|
@app.post("/chat") |
|
async def chat(request: ChatRequest): |
|
try: |
|
if request.model_choice == "google": |
|
client = genai.Client(api_key=google_api_key) |
|
system_message = f"""You are Dan Infalt, a friendly public land deer hunting expert specializing in targeting mature bucks in pressured areas, but |
|
don’t worry, you won’t take yourself too seriously. You respond in a conversational matter but still direct. You have dry humor you mix in every once in a while. |
|
You focus on buck bedding, terrain reading, and aggressive yet calculated mobile tactics. Your blue-collar, no-nonsense approach |
|
emphasizes deep scouting, strategic access, and minimalist setups. Through The Hunting Beast, you teach hunters how to kill big bucks |
|
using terrain, wind, and thermals. You speak from firsthand experience, keeping your advice practical and to the point. Provide detailed |
|
yet concise responses that fully articulate your experience and answer the user query. Please keep your reponses between 0 and {request.max_output_tokens} words.""" |
|
|
|
|
|
summary_thresh = 10 |
|
if len(request.chat_history) > summary_thresh: |
|
summarize_prompt = f"""Please summarize the following chat history concisely, focusing on the key points and main topics discussed. Avoid |
|
unnecessary details and provide a clear, straightforward summary. {request.chat_history[:-summary_thresh]}""" |
|
summary_response = client.models.generate_content( |
|
model="gemini-2.0-flash", |
|
contents=summarize_prompt, |
|
config=GenerateContentConfig( |
|
system_instruction=["You are a helpful assistant who is an expert at summarization."], |
|
max_output_tokens=250, |
|
temperature=0.5 |
|
), |
|
) |
|
request.chat_history = request.chat_history[-(summary_thresh+2):] |
|
request.chat_history.insert(1, |
|
{"role": "user", |
|
"parts": [{"text": f"Here is a summary of this conversation so far: {summary_response.text}"}]}) |
|
|
|
|
|
rephrase_prompt = f"""Given the user question and the chat history, rewrite the user question to improve clarity, specificity, and retrieval accuracy while |
|
maintaining its original intent within the given chat. |
|
|
|
- Read the chat history |
|
- Expand only where necessary to remove vagueness. |
|
- Keep the question natural and concise. |
|
- Avoid adding excessive detail or unrelated context. |
|
- Ensure the enhanced question remains true to what the user is asking. |
|
|
|
Example Enhancements: |
|
|
|
User: "Does camo really matter?" |
|
Refined: "How important is camouflage for a hunter’s success, and how does it compare to other factors like movement and scent control?" |
|
|
|
User: "How does attitude affect success?" |
|
Refined: "How do mindset factors like patience, confidence, and adaptability influence a hunter’s success?" |
|
|
|
User: "What does it mean if I see does while buck hunting?" |
|
Refined: "If I see does while hunting for a buck, what does that indicate about deer movement and buck activity?" |
|
|
|
Chat history: |
|
{request.chat_history} |
|
|
|
Now, given the chat history, refine the following user question to improve clarity, specificity, and retrieval accuracy while maintaining its original |
|
intent within the given chat: |
|
{request.message} |
|
""" |
|
|
|
rephrase_response = client.models.generate_content( |
|
model="gemini-2.0-flash", |
|
contents=rephrase_prompt, |
|
config=GenerateContentConfig( |
|
system_instruction=["You are a public land deer hunting expert specializing in targeting mature bucks in pressured areas. Your job is to use your deer hunting knowledge to enhance user questions for better retrieval in a Retrieval-Augmented Generation (RAG) system."], |
|
max_output_tokens=250, |
|
temperature=0.5 |
|
), |
|
) |
|
|
|
|
|
rephrase_response = rephrase_response.text |
|
docs, filtered_docs_and_scores = retrieve(rephrase_response, vectorstore, top_k=10) |
|
docs = "\n\n".join(docs) |
|
|
|
|
|
rag_prompt = f"""Use the following information to answer the user's query. You do not have to use all the information, just the pieces that directly |
|
help answer the query most accurately. Start directly with information, NOT with a question, and NOT restating the subject matter of the user query in |
|
any way, or you will be penalized. Respond in a conversational manner. |
|
|
|
Here are three examples of the style and tone of a response. Notice the good response and bad response. Please respond like the good response and NOT like the bad response: |
|
|
|
User Query: How do big bucks use clear cuts for bedding? |
|
|
|
Bad Response: Alright, so you want to know big bucks use clear cuts for bedding?, eh? Well, a lot of people assume big bucks bed right in the middle of a clear |
|
cut because it’s thick, but that’s not really the case. The dense regrowth provides food and cover, but bucks still want the upper hand. |
|
|
|
Good Response: Yeah, a lot of guys think big bucks just bed right in the middle of a clear cut because it’s thick, but that’s not really how they use it. The |
|
thick regrowth is great for food and cover, but those bucks still want an advantage. Most of the time, they’re bedding on the edges, right where the cut |
|
meets older timber. They’ll set up with the wind at their back so they can smell anything sneaking up behind them, and they’re looking out into the open |
|
woods, watching for danger. |
|
|
|
You have access to the following relevant information retrieved based on the user's query: |
|
|
|
{docs} |
|
|
|
Using the information above, answer the user's query as accurately as possible in the tone and style of the Good Response: |
|
|
|
User Query: {request.message} |
|
""" |
|
|
|
|
|
del request.chat_history[-1] |
|
|
|
rag_prompt = textwrap.dedent(rag_prompt) |
|
request.chat_history.append({"role": "user", "parts": [{"text": rag_prompt}]}) |
|
|
|
response = client.models.generate_content( |
|
model="gemini-2.0-flash", |
|
contents=request.chat_history, |
|
config=GenerateContentConfig( |
|
system_instruction=[system_message], |
|
max_output_tokens=request.max_output_tokens, |
|
temperature=request.temperature |
|
), |
|
) |
|
|
|
|
|
del request.chat_history[-1] |
|
request.chat_history.append({"role": "user", "parts": [{"text": request.message}]}) |
|
|
|
return {"response": response.text, "dataset_str": text_string, "rephrase_response": rephrase_response, "docs": docs, "filtered_docs_and_scores": filtered_docs_and_scores, "history": request.chat_history, "RAG_prompt": rag_prompt, "chunks": chunks} |
|
|
|
if request.model_choice == "HF": |
|
if hf_token: |
|
client = InferenceClient("meta-llama/Llama-3.2-3B-Instruct", token=hf_token) |
|
else: |
|
raise ValueError("HF_TOKEN environment variable not set. Please add it as a secret in your Hugging Face Space.") |
|
|
|
messages = [ |
|
{"role": "system", "content": request.system_message}, |
|
{"role": "user", "content": request.message}, |
|
] |
|
|
|
response = client.chat_completion( |
|
messages=messages, |
|
max_tokens=request.max_tokens, |
|
temperature=request.temperature, |
|
top_p=request.top_p, |
|
) |
|
|
|
return {"response": response.choices[0].message.content} |
|
|
|
except Exception as e: |
|
raise HTTPException(status_code=500, detail=str(e)) |