Spaces:

Avinash109
/

qwen2.5

Sleeping

App Files Files Community

Avinash109 commited on Nov 12, 2024

Commit

80351f4

verified ·

1 Parent(s): fb19b6e

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -29

app.py CHANGED Viewed

@@ -1,35 +1,39 @@
 import streamlit as st
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import datetime
-# Page configuration
 st.set_page_config(
     page_title="Qwen2.5-Coder Chat",
     page_icon="💬",
     layout="wide"
 )
 # Initialize session state for conversation history
 if 'messages' not in st.session_state:
     st.session_state.messages = []
-# Cache the model loading
 @st.cache_resource
 def load_model_and_tokenizer():
-    model_name = "Qwen/Qwen2.5-Coder-7B-Instruct"  # Using smaller 7B model
     # Load tokenizer
     tokenizer = AutoTokenizer.from_pretrained(
         model_name,
         trust_remote_code=True
     )
-    # Determine device
     device = "cuda" if torch.cuda.is_available() else "cpu"
     st.info(f"Using device: {device}")
-    # Load model with appropriate settings for CPU/GPU
     if device == "cuda":
         model = AutoModelForCausalLM.from_pretrained(
             model_name,
@@ -48,7 +52,7 @@ def load_model_and_tokenizer():
     return tokenizer, model
-# Main title
 st.title("💬 Qwen2.5-Coder Chat")
 # Sidebar settings
@@ -58,7 +62,7 @@ with st.sidebar:
     max_length = st.slider(
         "Maximum Length",
         min_value=64,
-        max_value=2048,  # Reduced for CPU usage
         value=512,
         step=64,
         help="Maximum number of tokens to generate"
@@ -86,7 +90,7 @@ with st.sidebar:
         st.session_state.messages = []
         st.rerun()
-# Load model with error handling
 try:
     with st.spinner("Loading model... Please wait..."):
         tokenizer, model = load_model_and_tokenizer()
@@ -94,13 +98,12 @@ except Exception as e:
     st.error(f"Error loading model: {str(e)}")
     st.stop()
 def generate_response(prompt, max_new_tokens=512, temperature=0.7, top_p=0.9):
     """Generate response from the model"""
     try:
-        # Tokenize input
         inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
-        # Generate response
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
@@ -112,24 +115,21 @@ def generate_response(prompt, max_new_tokens=512, temperature=0.7, top_p=0.9):
                 eos_token_id=tokenizer.eos_token_id,
             )
-        # Decode and return response
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
-        # Extract only the model's response (after the prompt)
-        response = response[len(prompt):].strip()
-        return response
     except Exception as e:
         st.error(f"Error generating response: {str(e)}")
         return None
-# Display chat history
 for message in st.session_state.messages:
     with st.chat_message(message["role"]):
         st.write(f"{message['content']}\n\n_{message['timestamp']}_")
 # Chat input
 if prompt := st.chat_input("Ask me anything about coding..."):
-    # Add user message to chat
     timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
     st.session_state.messages.append({
         "role": "user",
@@ -144,14 +144,11 @@ if prompt := st.chat_input("Ask me anything about coding..."):
     # Generate and display response
     with st.chat_message("assistant"):
         with st.spinner("Thinking..."):
-            # Prepare conversation history
-            conversation = ""
-            for msg in st.session_state.messages:
-                if msg["role"] == "user":
-                    conversation += f"Human: {msg['content']}\n"
-                else:
-                    conversation += f"Assistant: {msg['content']}\n"
-            conversation += "Assistant:"
             response = generate_response(
                 conversation,
@@ -164,9 +161,9 @@ if prompt := st.chat_input("Ask me anything about coding..."):
                 timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                 st.write(f"{response}\n\n_{timestamp}_")
-                # Add assistant response to chat history
                 st.session_state.messages.append({
                     "role": "assistant",
                     "content": response,
                     "timestamp": timestamp
-                })

+import os
 import streamlit as st
 import torch
 from transformers import AutoTokenizer, AutoModelForCausalLM
 import datetime
+# Set up page configuration
 st.set_page_config(
     page_title="Qwen2.5-Coder Chat",
     page_icon="💬",
     layout="wide"
 )
+# Set cache directory explicitly
+os.environ["TRANSFORMERS_CACHE"] = "/root/.cache/huggingface"
 # Initialize session state for conversation history
 if 'messages' not in st.session_state:
     st.session_state.messages = []
+# Cache model loading
 @st.cache_resource
 def load_model_and_tokenizer():
+    model_name = "Qwen/Qwen2.5-Coder-7B-Instruct"  # Model identifier
     # Load tokenizer
     tokenizer = AutoTokenizer.from_pretrained(
         model_name,
         trust_remote_code=True
     )
+    # Device configuration
     device = "cuda" if torch.cuda.is_available() else "cpu"
     st.info(f"Using device: {device}")
+    # Load model
     if device == "cuda":
         model = AutoModelForCausalLM.from_pretrained(
             model_name,
     return tokenizer, model
+# Title
 st.title("💬 Qwen2.5-Coder Chat")
 # Sidebar settings
     max_length = st.slider(
         "Maximum Length",
         min_value=64,
+        max_value=2048,
         value=512,
         step=64,
         help="Maximum number of tokens to generate"
         st.session_state.messages = []
         st.rerun()
+# Load model with caching
 try:
     with st.spinner("Loading model... Please wait..."):
         tokenizer, model = load_model_and_tokenizer()
     st.error(f"Error loading model: {str(e)}")
     st.stop()
+# Response generation function
 def generate_response(prompt, max_new_tokens=512, temperature=0.7, top_p=0.9):
     """Generate response from the model"""
     try:
         inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
         with torch.no_grad():
             outputs = model.generate(
                 **inputs,
                 eos_token_id=tokenizer.eos_token_id,
             )
         response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+        return response[len(prompt):].strip()  # Extract only the response
     except Exception as e:
         st.error(f"Error generating response: {str(e)}")
         return None
+# Display conversation history
 for message in st.session_state.messages:
     with st.chat_message(message["role"]):
         st.write(f"{message['content']}\n\n_{message['timestamp']}_")
 # Chat input
 if prompt := st.chat_input("Ask me anything about coding..."):
+    # Add user message
     timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
     st.session_state.messages.append({
         "role": "user",
     # Generate and display response
     with st.chat_message("assistant"):
         with st.spinner("Thinking..."):
+            # Prepare conversation context
+            conversation = "\n".join(
+                f"{'Human' if msg['role'] == 'user' else 'Assistant'}: {msg['content']}"
+                for msg in st.session_state.messages
+            ) + "\nAssistant:"
             response = generate_response(
                 conversation,
                 timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
                 st.write(f"{response}\n\n_{timestamp}_")
+                # Add response to chat history
                 st.session_state.messages.append({
                     "role": "assistant",
                     "content": response,
                     "timestamp": timestamp
+                })