ratyim commited on
Commit
93807bd
·
verified ·
1 Parent(s): 9ae94bb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -56
app.py CHANGED
@@ -1,6 +1,5 @@
1
  #!/usr/bin/env python
2
  # encoding: utf-8
3
-
4
  import spaces
5
  import gradio as gr
6
  from PIL import Image
@@ -8,13 +7,15 @@ import traceback
8
  import re
9
  import torch
10
  import argparse
11
- import logging
12
  from transformers import AutoModel, AutoTokenizer
13
- from huggingface_hub import hf_hub_download
14
 
15
- # Set up logging
16
- logging.basicConfig(level=logging.INFO)
17
- logger = logging.getLogger(__name__)
 
 
 
 
18
 
19
  # Argparser
20
  parser = argparse.ArgumentParser(description='demo')
@@ -25,44 +26,30 @@ assert device in ['cuda', 'mps']
25
 
26
  # Load model
27
  model_path = 'openbmb/MiniCPM-Llama3-V-2_5'
 
 
 
 
 
 
 
 
 
 
28
 
29
- def download_model_files(repo_id, filenames):
30
- for filename in filenames:
31
- try:
32
- file_path = hf_hub_download(repo_id=repo_id, filename=filename, resume_download=True)
33
- logger.info(f"Downloaded {filename} successfully.")
34
- except Exception as e:
35
- logger.error(f"Error downloading {filename}: {e}")
36
- raise
37
-
38
- model_files = ["configuration_minicpm.py", "resampler.py", "modeling_minicpmv.py"]
39
- download_model_files(model_path, model_files)
40
 
41
- try:
42
- if 'int4' in model_path:
43
- if device == 'mps':
44
- logger.error('Error: running int4 model with bitsandbytes on Mac is not supported right now.')
45
- exit()
46
- model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
47
- else:
48
- model = AutoModel.from_pretrained(model_path, trust_remote_code=True).to(dtype=torch.float16)
49
- model = model.to(device=device)
50
- tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
51
- model.eval()
52
- except Exception as e:
53
- logger.error(f"Error loading model or tokenizer: {e}")
54
- raise
55
 
56
  ERROR_MSG = "Error, please retry"
57
  model_name = 'MiniCPM-Llama3-V 2.5'
58
 
59
  form_radio = {
60
  'choices': ['Beam Search', 'Sampling'],
 
61
  'value': 'Sampling',
62
  'interactive': True,
63
  'label': 'Decode Type'
64
  }
65
-
66
  num_beams_slider = {
67
  'minimum': 0,
68
  'maximum': 5,
@@ -71,7 +58,6 @@ num_beams_slider = {
71
  'interactive': True,
72
  'label': 'Num Beams'
73
  }
74
-
75
  repetition_penalty_slider = {
76
  'minimum': 0,
77
  'maximum': 3,
@@ -80,7 +66,6 @@ repetition_penalty_slider = {
80
  'interactive': True,
81
  'label': 'Repetition Penalty'
82
  }
83
-
84
  repetition_penalty_slider2 = {
85
  'minimum': 0,
86
  'maximum': 3,
@@ -89,14 +74,13 @@ repetition_penalty_slider2 = {
89
  'interactive': True,
90
  'label': 'Repetition Penalty'
91
  }
92
-
93
  max_new_tokens_slider = {
94
  'minimum': 1,
95
  'maximum': 4096,
96
  'value': 1024,
97
  'step': 1,
98
  'interactive': True,
99
- 'label': 'Max New Tokens'
100
  }
101
 
102
  top_p_slider = {
@@ -105,27 +89,26 @@ top_p_slider = {
105
  'value': 0.8,
106
  'step': 0.05,
107
  'interactive': True,
108
- 'label': 'Top P'
109
  }
110
-
111
  top_k_slider = {
112
  'minimum': 0,
113
  'maximum': 200,
114
  'value': 100,
115
  'step': 1,
116
  'interactive': True,
117
- 'label': 'Top K'
118
  }
119
-
120
  temperature_slider = {
121
  'minimum': 0,
122
  'maximum': 2,
123
  'value': 0.7,
124
  'step': 0.05,
125
  'interactive': True,
126
- 'label': 'Temperature'
127
  }
128
 
 
129
  def create_component(params, comp='Slider'):
130
  if comp == 'Slider':
131
  return gr.Slider(
@@ -151,7 +134,7 @@ def create_component(params, comp='Slider'):
151
 
152
  @spaces.GPU(duration=120)
153
  def chat(img, msgs, ctx, params=None, vision_hidden_states=None):
154
- default_params = {"stream": False, "sampling": False, "num_beams": 3, "repetition_penalty": 1.2, "max_new_tokens": 1024}
155
  if params is None:
156
  params = default_params
157
  if img is None:
@@ -165,24 +148,34 @@ def chat(img, msgs, ctx, params=None, vision_hidden_states=None):
165
  tokenizer=tokenizer,
166
  **params
167
  )
 
 
 
 
 
 
 
168
  for char in answer:
169
  yield char
170
  except Exception as err:
171
- logger.error(f"Error during chat: {err}")
172
  traceback.print_exc()
173
  yield ERROR_MSG
174
 
 
175
  def upload_img(image, _chatbot, _app_session):
176
  image = Image.fromarray(image)
177
- _app_session['sts'] = None
178
- _app_session['ctx'] = []
179
- _app_session['img'] = image
 
180
  _chatbot.append(('', 'Image uploaded successfully, you can talk to me now'))
181
  return _chatbot, _app_session
182
 
 
183
  def respond(_chat_bot, _app_cfg, params_form, num_beams, repetition_penalty, repetition_penalty_2, top_p, top_k, temperature):
184
  _question = _chat_bot[-1][0]
185
- logger.info(f'<Question>: {_question}')
186
  if _app_cfg.get('ctx', None) is None:
187
  _chat_bot[-1][1] = 'Please upload an image to start'
188
  yield (_chat_bot, _app_cfg)
@@ -198,7 +191,7 @@ def respond(_chat_bot, _app_cfg, params_form, num_beams, repetition_penalty, rep
198
  'stream': False,
199
  'num_beams': num_beams,
200
  'repetition_penalty': repetition_penalty,
201
- "max_new_tokens": 896
202
  }
203
  else:
204
  params = {
@@ -208,9 +201,9 @@ def respond(_chat_bot, _app_cfg, params_form, num_beams, repetition_penalty, rep
208
  'top_k': top_k,
209
  'temperature': temperature,
210
  'repetition_penalty': repetition_penalty_2,
211
- "max_new_tokens": 896
212
  }
213
-
214
  gen = chat(_app_cfg['img'], _context, None, params)
215
  _chat_bot[-1][1] = ""
216
  for _char in gen:
@@ -218,10 +211,12 @@ def respond(_chat_bot, _app_cfg, params_form, num_beams, repetition_penalty, rep
218
  _context[-1]["content"] += _char
219
  yield (_chat_bot, _app_cfg)
220
 
 
221
  def request(_question, _chat_bot, _app_cfg):
222
  _chat_bot.append((_question, None))
223
  return '', _chat_bot, _app_cfg
224
 
 
225
  def regenerate_button_clicked(_question, _chat_bot, _app_cfg):
226
  if len(_chat_bot) <= 1:
227
  _chat_bot.append(('Regenerate', 'No question for regeneration.'))
@@ -233,6 +228,8 @@ def regenerate_button_clicked(_question, _chat_bot, _app_cfg):
233
  _chat_bot = _chat_bot[:-1]
234
  _app_cfg['ctx'] = _app_cfg['ctx'][:-2]
235
  return request(_question, _chat_bot, _app_cfg)
 
 
236
 
237
  def clear_button_clicked(_question, _chat_bot, _app_cfg, _bt_pic):
238
  _chat_bot.clear()
@@ -241,6 +238,7 @@ def clear_button_clicked(_question, _chat_bot, _app_cfg, _bt_pic):
241
  _app_cfg['img'] = None
242
  _bt_pic = None
243
  return '', _chat_bot, _app_cfg, _bt_pic
 
244
 
245
  with gr.Blocks() as demo:
246
  with gr.Row():
@@ -257,11 +255,11 @@ with gr.Blocks() as demo:
257
  regenerate = create_component({'value': 'Regenerate'}, comp='Button')
258
  clear = create_component({'value': 'Clear'}, comp='Button')
259
  with gr.Column(scale=3, min_width=500):
260
- app_session = gr.State({'sts': None, 'ctx': None, 'img': None})
261
  bt_pic = gr.Image(label="Upload an image to start")
262
  chat_bot = gr.Chatbot(label=f"Chat with {model_name}")
263
  txt_message = gr.Textbox(label="Input text")
264
-
265
  clear.click(
266
  clear_button_clicked,
267
  [txt_message, chat_bot, app_session, bt_pic],
@@ -269,7 +267,8 @@ with gr.Blocks() as demo:
269
  queue=False
270
  )
271
  txt_message.submit(
272
- request,
 
273
  [txt_message, chat_bot, app_session],
274
  [txt_message, chat_bot, app_session],
275
  queue=False
@@ -288,8 +287,9 @@ with gr.Blocks() as demo:
288
  [chat_bot, app_session, params_form, num_beams, repetition_penalty, repetition_penalty_2, top_p, top_k, temperature],
289
  [chat_bot, app_session]
290
  )
291
- bt_pic.upload(lambda: None, None, chat_bot, queue=False).then(upload_img, inputs=[bt_pic, chat_bot, app_session], outputs=[chat_bot, app_session])
292
 
293
- # Launch the demo
 
294
  demo.queue()
295
- demo.launch()
 
1
  #!/usr/bin/env python
2
  # encoding: utf-8
 
3
  import spaces
4
  import gradio as gr
5
  from PIL import Image
 
7
  import re
8
  import torch
9
  import argparse
 
10
  from transformers import AutoModel, AutoTokenizer
 
11
 
12
+ # README, How to run demo on different devices
13
+
14
+ # For Nvidia GPUs.
15
+ # python web_demo_2.5.py --device cuda
16
+
17
+ # For Mac with MPS (Apple silicon or AMD GPUs).
18
+ # PYTORCH_ENABLE_MPS_FALLBACK=1 python web_demo_2.5.py --device mps
19
 
20
  # Argparser
21
  parser = argparse.ArgumentParser(description='demo')
 
26
 
27
  # Load model
28
  model_path = 'openbmb/MiniCPM-Llama3-V-2_5'
29
+ if 'int4' in model_path:
30
+ if device == 'mps':
31
+ print('Error: running int4 model with bitsandbytes on Mac is not supported right now.')
32
+ exit()
33
+ model = AutoModel.from_pretrained(model_path, trust_remote_code=True)
34
+ else:
35
+ model = AutoModel.from_pretrained(model_path, trust_remote_code=True).to(dtype=torch.float16)
36
+ model = model.to(device=device)
37
+ tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
38
+ model.eval()
39
 
 
 
 
 
 
 
 
 
 
 
 
40
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  ERROR_MSG = "Error, please retry"
43
  model_name = 'MiniCPM-Llama3-V 2.5'
44
 
45
  form_radio = {
46
  'choices': ['Beam Search', 'Sampling'],
47
+ #'value': 'Beam Search',
48
  'value': 'Sampling',
49
  'interactive': True,
50
  'label': 'Decode Type'
51
  }
52
+ # Beam Form
53
  num_beams_slider = {
54
  'minimum': 0,
55
  'maximum': 5,
 
58
  'interactive': True,
59
  'label': 'Num Beams'
60
  }
 
61
  repetition_penalty_slider = {
62
  'minimum': 0,
63
  'maximum': 3,
 
66
  'interactive': True,
67
  'label': 'Repetition Penalty'
68
  }
 
69
  repetition_penalty_slider2 = {
70
  'minimum': 0,
71
  'maximum': 3,
 
74
  'interactive': True,
75
  'label': 'Repetition Penalty'
76
  }
 
77
  max_new_tokens_slider = {
78
  'minimum': 1,
79
  'maximum': 4096,
80
  'value': 1024,
81
  'step': 1,
82
  'interactive': True,
83
+ 'label': 'Max New Tokens'
84
  }
85
 
86
  top_p_slider = {
 
89
  'value': 0.8,
90
  'step': 0.05,
91
  'interactive': True,
92
+ 'label': 'Top P'
93
  }
 
94
  top_k_slider = {
95
  'minimum': 0,
96
  'maximum': 200,
97
  'value': 100,
98
  'step': 1,
99
  'interactive': True,
100
+ 'label': 'Top K'
101
  }
 
102
  temperature_slider = {
103
  'minimum': 0,
104
  'maximum': 2,
105
  'value': 0.7,
106
  'step': 0.05,
107
  'interactive': True,
108
+ 'label': 'Temperature'
109
  }
110
 
111
+
112
  def create_component(params, comp='Slider'):
113
  if comp == 'Slider':
114
  return gr.Slider(
 
134
 
135
  @spaces.GPU(duration=120)
136
  def chat(img, msgs, ctx, params=None, vision_hidden_states=None):
137
+ default_params = {"stream": False, "sampling": False, "num_beams":3, "repetition_penalty": 1.2, "max_new_tokens": 1024}
138
  if params is None:
139
  params = default_params
140
  if img is None:
 
148
  tokenizer=tokenizer,
149
  **params
150
  )
151
+ # if params['stream'] is False:
152
+ # res = re.sub(r'(<box>.*</box>)', '', answer)
153
+ # res = res.replace('<ref>', '')
154
+ # res = res.replace('</ref>', '')
155
+ # res = res.replace('<box>', '')
156
+ # answer = res.replace('</box>', '')
157
+ # else:
158
  for char in answer:
159
  yield char
160
  except Exception as err:
161
+ print(err)
162
  traceback.print_exc()
163
  yield ERROR_MSG
164
 
165
+
166
  def upload_img(image, _chatbot, _app_session):
167
  image = Image.fromarray(image)
168
+
169
+ _app_session['sts']=None
170
+ _app_session['ctx']=[]
171
+ _app_session['img']=image
172
  _chatbot.append(('', 'Image uploaded successfully, you can talk to me now'))
173
  return _chatbot, _app_session
174
 
175
+
176
  def respond(_chat_bot, _app_cfg, params_form, num_beams, repetition_penalty, repetition_penalty_2, top_p, top_k, temperature):
177
  _question = _chat_bot[-1][0]
178
+ print('<Question>:', _question)
179
  if _app_cfg.get('ctx', None) is None:
180
  _chat_bot[-1][1] = 'Please upload an image to start'
181
  yield (_chat_bot, _app_cfg)
 
191
  'stream': False,
192
  'num_beams': num_beams,
193
  'repetition_penalty': repetition_penalty,
194
+ "max_new_tokens": 896
195
  }
196
  else:
197
  params = {
 
201
  'top_k': top_k,
202
  'temperature': temperature,
203
  'repetition_penalty': repetition_penalty_2,
204
+ "max_new_tokens": 896
205
  }
206
+
207
  gen = chat(_app_cfg['img'], _context, None, params)
208
  _chat_bot[-1][1] = ""
209
  for _char in gen:
 
211
  _context[-1]["content"] += _char
212
  yield (_chat_bot, _app_cfg)
213
 
214
+
215
  def request(_question, _chat_bot, _app_cfg):
216
  _chat_bot.append((_question, None))
217
  return '', _chat_bot, _app_cfg
218
 
219
+
220
  def regenerate_button_clicked(_question, _chat_bot, _app_cfg):
221
  if len(_chat_bot) <= 1:
222
  _chat_bot.append(('Regenerate', 'No question for regeneration.'))
 
228
  _chat_bot = _chat_bot[:-1]
229
  _app_cfg['ctx'] = _app_cfg['ctx'][:-2]
230
  return request(_question, _chat_bot, _app_cfg)
231
+ # return respond(_chat_bot, _app_cfg, params_form, num_beams, repetition_penalty, repetition_penalty_2, top_p, top_k, temperature)
232
+
233
 
234
  def clear_button_clicked(_question, _chat_bot, _app_cfg, _bt_pic):
235
  _chat_bot.clear()
 
238
  _app_cfg['img'] = None
239
  _bt_pic = None
240
  return '', _chat_bot, _app_cfg, _bt_pic
241
+
242
 
243
  with gr.Blocks() as demo:
244
  with gr.Row():
 
255
  regenerate = create_component({'value': 'Regenerate'}, comp='Button')
256
  clear = create_component({'value': 'Clear'}, comp='Button')
257
  with gr.Column(scale=3, min_width=500):
258
+ app_session = gr.State({'sts':None,'ctx':None,'img':None})
259
  bt_pic = gr.Image(label="Upload an image to start")
260
  chat_bot = gr.Chatbot(label=f"Chat with {model_name}")
261
  txt_message = gr.Textbox(label="Input text")
262
+
263
  clear.click(
264
  clear_button_clicked,
265
  [txt_message, chat_bot, app_session, bt_pic],
 
267
  queue=False
268
  )
269
  txt_message.submit(
270
+ request,
271
+ #[txt_message, chat_bot, app_session, params_form, num_beams, repetition_penalty, repetition_penalty_2, top_p, top_k, temperature],
272
  [txt_message, chat_bot, app_session],
273
  [txt_message, chat_bot, app_session],
274
  queue=False
 
287
  [chat_bot, app_session, params_form, num_beams, repetition_penalty, repetition_penalty_2, top_p, top_k, temperature],
288
  [chat_bot, app_session]
289
  )
290
+ bt_pic.upload(lambda: None, None, chat_bot, queue=False).then(upload_img, inputs=[bt_pic,chat_bot,app_session], outputs=[chat_bot,app_session])
291
 
292
+ # launch
293
+ #demo.launch(share=False, debug=True, show_api=False, server_port=8080, server_name="0.0.0.0")
294
  demo.queue()
295
+ demo.launch()