import streamlit as st import torch from transformers import AutoTokenizer, AutoModelForCausalLM import os from huggingface_hub import login # Get the token securely from Hugging Face secrets hf_token = os.getenv("HF_TOKEN") # Authenticate with the token login(token=hf_token) # Define model path on Hugging Face Hub model_name = "Somya1834/fc-deepseek-finetuned-50" # Replace with your repo # Load tokenizer and model from Hugging Face @st.cache_resource def load_model(): tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained(model_name) device = "cuda" if torch.cuda.is_available() else "cpu" model.to(device) return tokenizer, model, device # Load model once when app starts tokenizer, model, device = load_model() # Streamlit UI st.title("🚀 AI Chatbot - Powered by Your Fine-Tuned Model!") st.markdown("Ask me anything and get an AI-generated response!") # User input prompt = st.text_area("Enter your query:", "") # Generate response when button is clicked if st.button("Generate Response"): if prompt.strip() != "": # Tokenize input inputs = tokenizer(prompt, return_tensors="pt", max_length=512, truncation=True, padding=True).to(device) # Generate response with torch.no_grad(): outputs = model.generate(**inputs, max_length=512, num_return_sequences=1) # Decode and display the generated response response = tokenizer.decode(outputs[0], skip_special_tokens=True) st.success(f"💬 Response: {response}") else: st.warning("Please enter a query to generate a response.")