Spaces:
Running
Running
from langchain_core.output_parsers import StrOutputParser | |
from langchain_core.prompts import ( | |
ChatPromptTemplate, | |
MessagesPlaceholder, | |
) | |
from langchain.chains import create_history_aware_retriever, create_retrieval_chain | |
from langchain.chains.combine_documents import create_stuff_documents_chain | |
from langchain_core.runnables import RunnablePassthrough | |
from langchain_core.vectorstores import VectorStoreRetriever | |
from langchain_google_genai import ChatGoogleGenerativeAI | |
from langchain_core.chat_history import BaseChatMessageHistory | |
from langchain_community.chat_message_histories import ChatMessageHistory | |
from langchain_core.runnables.history import RunnableWithMessageHistory | |
from processing.documents import format_documents | |
from caching.lfu import LFUCache | |
def _initialize_llm(model_name) -> ChatGoogleGenerativeAI: | |
""" | |
Initializes the LLM instance. | |
""" | |
llm = ChatGoogleGenerativeAI(model= model_name) | |
return llm | |
class LLMService: | |
def __init__(self, logger, system_prompt: str, web_retriever: VectorStoreRetriever,cache_capacity: int = 50, llm_model_name = "gemini-2.0-flash-thinking-exp-01-21"): | |
self._conversational_rag_chain = None | |
self._logger = logger | |
self.system_prompt = system_prompt | |
self._web_retriever = web_retriever | |
self.llm = _initialize_llm(llm_model_name) | |
self._initialize_conversational_rag_chain() | |
### Statefully manage chat history ### | |
self.store = LFUCache(capacity=cache_capacity) | |
def _initialize_conversational_rag_chain(self): | |
""" | |
Initializes the conversational RAG chain. | |
""" | |
### Contextualize question ### | |
contextualize_q_system_prompt = """Given a chat history and the latest user question \ | |
which might reference context in the chat history, formulate a standalone question \ | |
which can be understood without the chat history. Do NOT answer the question, \ | |
just reformulate it if needed and otherwise return it as is.""" | |
contextualize_q_prompt = ChatPromptTemplate.from_messages( | |
[ | |
("system", contextualize_q_system_prompt), | |
MessagesPlaceholder("chat_history"), | |
("human", "{input}"), | |
] | |
) | |
history_aware_retriever = create_history_aware_retriever( | |
self.llm, self._web_retriever, contextualize_q_prompt) | |
qa_prompt = ChatPromptTemplate.from_messages( | |
[ | |
("system", self.system_prompt), | |
MessagesPlaceholder("chat_history"), | |
("human", "{input}"), | |
] | |
) | |
question_answer_chain = create_stuff_documents_chain(self.llm, qa_prompt) | |
rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain) | |
self._conversational_rag_chain = RunnableWithMessageHistory( | |
rag_chain, | |
self._get_session_history, | |
input_messages_key="input", | |
history_messages_key="chat_history", | |
output_messages_key="answer", | |
) | |
def _get_session_history(self, session_id: str) -> BaseChatMessageHistory: | |
history = self.store.get(session_id) | |
if history is None: | |
history = ChatMessageHistory() | |
self.store.put(session_id, history) | |
return history | |
def conversational_rag_chain(self): | |
""" | |
Returns the initialized conversational RAG chain. | |
Returns: | |
The conversational RAG chain instance. | |
""" | |
return self._conversational_rag_chain | |
def get_llm(self) -> ChatGoogleGenerativeAI: | |
""" | |
Returns the LLM instance. | |
""" | |
if self.llm is None: | |
raise Exception("llm is not initialized") | |
return self.llm | |