Spaces:
Running
on
Zero
Running
on
Zero
Update vlm.py
Browse files
vlm.py
CHANGED
@@ -127,7 +127,11 @@ def build_messages(input_dict: dict, history: list[tuple]):
|
|
127 |
#
|
128 |
@spaces.GPU
|
129 |
@torch.inference_mode()
|
130 |
-
def stream_response(
|
|
|
|
|
|
|
|
|
131 |
"""Stream the model's response to the chat interface.
|
132 |
|
133 |
Args:
|
@@ -148,7 +152,9 @@ def stream_response(messages: list[dict]):
|
|
148 |
generation_args = dict(
|
149 |
inputs,
|
150 |
streamer=streamer,
|
151 |
-
max_new_tokens=
|
|
|
|
|
152 |
do_sample=True
|
153 |
)
|
154 |
|
|
|
127 |
#
|
128 |
@spaces.GPU
|
129 |
@torch.inference_mode()
|
130 |
+
def stream_response(
|
131 |
+
messages: list[dict],
|
132 |
+
max_new_tokens: int=1_024,
|
133 |
+
temperature: float=0.15
|
134 |
+
):
|
135 |
"""Stream the model's response to the chat interface.
|
136 |
|
137 |
Args:
|
|
|
152 |
generation_args = dict(
|
153 |
inputs,
|
154 |
streamer=streamer,
|
155 |
+
max_new_tokens=max_new_tokens,
|
156 |
+
temperature=temperature,
|
157 |
+
top_p=0.9,
|
158 |
do_sample=True
|
159 |
)
|
160 |
|