Avinash109 commited on
Commit
80351f4
Β·
verified Β·
1 Parent(s): fb19b6e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -29
app.py CHANGED
@@ -1,35 +1,39 @@
 
1
  import streamlit as st
2
  import torch
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import datetime
5
 
6
- # Page configuration
7
  st.set_page_config(
8
  page_title="Qwen2.5-Coder Chat",
9
  page_icon="πŸ’¬",
10
  layout="wide"
11
  )
12
 
 
 
 
13
  # Initialize session state for conversation history
14
  if 'messages' not in st.session_state:
15
  st.session_state.messages = []
16
 
17
- # Cache the model loading
18
  @st.cache_resource
19
  def load_model_and_tokenizer():
20
- model_name = "Qwen/Qwen2.5-Coder-7B-Instruct" # Using smaller 7B model
21
-
22
  # Load tokenizer
23
  tokenizer = AutoTokenizer.from_pretrained(
24
  model_name,
25
  trust_remote_code=True
26
  )
27
-
28
- # Determine device
29
  device = "cuda" if torch.cuda.is_available() else "cpu"
30
  st.info(f"Using device: {device}")
31
-
32
- # Load model with appropriate settings for CPU/GPU
33
  if device == "cuda":
34
  model = AutoModelForCausalLM.from_pretrained(
35
  model_name,
@@ -48,7 +52,7 @@ def load_model_and_tokenizer():
48
 
49
  return tokenizer, model
50
 
51
- # Main title
52
  st.title("πŸ’¬ Qwen2.5-Coder Chat")
53
 
54
  # Sidebar settings
@@ -58,7 +62,7 @@ with st.sidebar:
58
  max_length = st.slider(
59
  "Maximum Length",
60
  min_value=64,
61
- max_value=2048, # Reduced for CPU usage
62
  value=512,
63
  step=64,
64
  help="Maximum number of tokens to generate"
@@ -86,7 +90,7 @@ with st.sidebar:
86
  st.session_state.messages = []
87
  st.rerun()
88
 
89
- # Load model with error handling
90
  try:
91
  with st.spinner("Loading model... Please wait..."):
92
  tokenizer, model = load_model_and_tokenizer()
@@ -94,13 +98,12 @@ except Exception as e:
94
  st.error(f"Error loading model: {str(e)}")
95
  st.stop()
96
 
 
97
  def generate_response(prompt, max_new_tokens=512, temperature=0.7, top_p=0.9):
98
  """Generate response from the model"""
99
  try:
100
- # Tokenize input
101
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
102
 
103
- # Generate response
104
  with torch.no_grad():
105
  outputs = model.generate(
106
  **inputs,
@@ -112,24 +115,21 @@ def generate_response(prompt, max_new_tokens=512, temperature=0.7, top_p=0.9):
112
  eos_token_id=tokenizer.eos_token_id,
113
  )
114
 
115
- # Decode and return response
116
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
117
- # Extract only the model's response (after the prompt)
118
- response = response[len(prompt):].strip()
119
- return response
120
 
121
  except Exception as e:
122
  st.error(f"Error generating response: {str(e)}")
123
  return None
124
 
125
- # Display chat history
126
  for message in st.session_state.messages:
127
  with st.chat_message(message["role"]):
128
  st.write(f"{message['content']}\n\n_{message['timestamp']}_")
129
 
130
  # Chat input
131
  if prompt := st.chat_input("Ask me anything about coding..."):
132
- # Add user message to chat
133
  timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
134
  st.session_state.messages.append({
135
  "role": "user",
@@ -144,14 +144,11 @@ if prompt := st.chat_input("Ask me anything about coding..."):
144
  # Generate and display response
145
  with st.chat_message("assistant"):
146
  with st.spinner("Thinking..."):
147
- # Prepare conversation history
148
- conversation = ""
149
- for msg in st.session_state.messages:
150
- if msg["role"] == "user":
151
- conversation += f"Human: {msg['content']}\n"
152
- else:
153
- conversation += f"Assistant: {msg['content']}\n"
154
- conversation += "Assistant:"
155
 
156
  response = generate_response(
157
  conversation,
@@ -164,9 +161,9 @@ if prompt := st.chat_input("Ask me anything about coding..."):
164
  timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
165
  st.write(f"{response}\n\n_{timestamp}_")
166
 
167
- # Add assistant response to chat history
168
  st.session_state.messages.append({
169
  "role": "assistant",
170
  "content": response,
171
  "timestamp": timestamp
172
- })
 
1
+ import os
2
  import streamlit as st
3
  import torch
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
  import datetime
6
 
7
+ # Set up page configuration
8
  st.set_page_config(
9
  page_title="Qwen2.5-Coder Chat",
10
  page_icon="πŸ’¬",
11
  layout="wide"
12
  )
13
 
14
+ # Set cache directory explicitly
15
+ os.environ["TRANSFORMERS_CACHE"] = "/root/.cache/huggingface"
16
+
17
  # Initialize session state for conversation history
18
  if 'messages' not in st.session_state:
19
  st.session_state.messages = []
20
 
21
+ # Cache model loading
22
  @st.cache_resource
23
  def load_model_and_tokenizer():
24
+ model_name = "Qwen/Qwen2.5-Coder-7B-Instruct" # Model identifier
25
+
26
  # Load tokenizer
27
  tokenizer = AutoTokenizer.from_pretrained(
28
  model_name,
29
  trust_remote_code=True
30
  )
31
+
32
+ # Device configuration
33
  device = "cuda" if torch.cuda.is_available() else "cpu"
34
  st.info(f"Using device: {device}")
35
+
36
+ # Load model
37
  if device == "cuda":
38
  model = AutoModelForCausalLM.from_pretrained(
39
  model_name,
 
52
 
53
  return tokenizer, model
54
 
55
+ # Title
56
  st.title("πŸ’¬ Qwen2.5-Coder Chat")
57
 
58
  # Sidebar settings
 
62
  max_length = st.slider(
63
  "Maximum Length",
64
  min_value=64,
65
+ max_value=2048,
66
  value=512,
67
  step=64,
68
  help="Maximum number of tokens to generate"
 
90
  st.session_state.messages = []
91
  st.rerun()
92
 
93
+ # Load model with caching
94
  try:
95
  with st.spinner("Loading model... Please wait..."):
96
  tokenizer, model = load_model_and_tokenizer()
 
98
  st.error(f"Error loading model: {str(e)}")
99
  st.stop()
100
 
101
+ # Response generation function
102
  def generate_response(prompt, max_new_tokens=512, temperature=0.7, top_p=0.9):
103
  """Generate response from the model"""
104
  try:
 
105
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
106
 
 
107
  with torch.no_grad():
108
  outputs = model.generate(
109
  **inputs,
 
115
  eos_token_id=tokenizer.eos_token_id,
116
  )
117
 
 
118
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
119
+ return response[len(prompt):].strip() # Extract only the response
 
 
120
 
121
  except Exception as e:
122
  st.error(f"Error generating response: {str(e)}")
123
  return None
124
 
125
+ # Display conversation history
126
  for message in st.session_state.messages:
127
  with st.chat_message(message["role"]):
128
  st.write(f"{message['content']}\n\n_{message['timestamp']}_")
129
 
130
  # Chat input
131
  if prompt := st.chat_input("Ask me anything about coding..."):
132
+ # Add user message
133
  timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
134
  st.session_state.messages.append({
135
  "role": "user",
 
144
  # Generate and display response
145
  with st.chat_message("assistant"):
146
  with st.spinner("Thinking..."):
147
+ # Prepare conversation context
148
+ conversation = "\n".join(
149
+ f"{'Human' if msg['role'] == 'user' else 'Assistant'}: {msg['content']}"
150
+ for msg in st.session_state.messages
151
+ ) + "\nAssistant:"
 
 
 
152
 
153
  response = generate_response(
154
  conversation,
 
161
  timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
162
  st.write(f"{response}\n\n_{timestamp}_")
163
 
164
+ # Add response to chat history
165
  st.session_state.messages.append({
166
  "role": "assistant",
167
  "content": response,
168
  "timestamp": timestamp
169
+ })