Spaces:
Sleeping
Sleeping
File size: 3,764 Bytes
7f5dc30 80b0eb8 7f5dc30 72b0097 7f5dc30 80b0eb8 72b0097 eae6eae 80b0eb8 7f5dc30 80b0eb8 7f5dc30 e6ef252 7f5dc30 80b0eb8 7f5dc30 80b0eb8 7f5dc30 e6ef252 7f5dc30 80b0eb8 7f5dc30 80b0eb8 7f5dc30 80b0eb8 7f5dc30 521c000 7f5dc30 521c000 7f5dc30 80b0eb8 521c000 7f5dc30 80b0eb8 7f5dc30 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 |
import os
import gradio as gr
from langchain.chains import RetrievalQA
from langchain_pinecone import Pinecone
from langchain_openai import ChatOpenAI
from langchain_community.llms import HuggingFacePipeline
from langchain_community.embeddings import HuggingFaceEmbeddings
#from dotenv import load_dotenv
import torch
from transformers import AutoModelForCausalLM, AutoModelForSequenceClassification, pipeline, AutoTokenizer
from huggingface_hub import login
login(token=os.getenv("HF_TOKEN"))
# Initialize Embedding Model
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# Pinecone Retriever
api_key = os.getenv("PINCE_CONE_LIGHT")
if api_key is None:
raise ValueError("Pinecone API key missing.")
else:
pc = Pinecone(pinecone_api_key=api_key, embedding=embedding_model, index_name='rag-rubic', namespace='vectors_lightmodel')
retriever = pc.as_retriever()
# LLM Options
llm_options = {
"OpenAI": "gpt-4o-mini",
"Microsoft-Phi": "microsoft/Phi-3.5-mini-instruct",
"DeepSeek-R1": "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B",
"Intel-tinybert": "Intel/dynamic_tinybert"
}
def load_llm(name, model_name):
"""Loads the selected LLM model only when needed."""
if name == "OpenAI":
openai_api_key = os.getenv("OPEN_AI_KEY")
return ChatOpenAI(model='gpt-4o-mini', openai_api_key=openai_api_key)
if "Phi" in name or "DeepSeek" in name:
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", torch_dtype=torch.float16)
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_length=4096, eos_token_id=tokenizer.eos_token_id, return_full_text=False,
do_sample=False, num_return_sequences=1, max_new_tokens=50, temperature=0.1)
elif "tinybert" in name:
model = AutoModelForSequenceClassification.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
pipe = pipeline("feature-extraction", model=model, tokenizer=tokenizer, truncation=True, padding=True, max_length=512)
else:
return None
return HuggingFacePipeline(pipeline=pipe)
# Initialize default LLM
selected_llm = list(llm_options.keys())[0]
llm = load_llm(selected_llm, llm_options[selected_llm])
# Create QA Retrieval Chain
qa = RetrievalQA.from_llm(llm=llm, retriever=retriever)
# Chatbot function
def chatbot(selected_llm, user_input, chat_history):
global llm
if hasattr(llm, "pipeline"): # Ensure llm has a pipeline
current_model = llm.pipeline.model.name_or_path # Get the model name
else:
current_model = None # Handle cases where llm is not initialized
if selected_llm != current_model:
llm = load_llm(selected_llm, llm_options[selected_llm])
response = qa.invoke({"query": user_input})
answer = response.get("result", "No response received.")
chat_history.append(("π§βπ» You", user_input))
chat_history.append(("π€ Bot", answer))
return chat_history, ""
# Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("# π€ RAG-Powered Chatbot")
llm_selector = gr.Dropdown(choices=list(llm_options.keys()), value=selected_llm, label="Choose an LLM")
chat_history = gr.State([])
chatbot_ui = gr.Chatbot()
user_input = gr.Textbox(label="π¬ Type your message and press Enter:")
send_button = gr.Button("Send")
send_button.click(chatbot, inputs=[llm_selector, user_input, chat_history], outputs=[chatbot_ui, user_input])
user_input.submit(chatbot, inputs=[llm_selector, user_input, chat_history], outputs=[chatbot_ui, user_input])
demo.launch()
|