Update app.py
Browse files
app.py
CHANGED
@@ -109,9 +109,10 @@ def generate_response_stream(prompt, history, max_length, temperature, top_p, to
|
|
109 |
try:
|
110 |
input_tokens_list = tokenizer.encode(full_prompt) # Encode returns a list/array
|
111 |
# Ensure input_tokens is a numpy array of the correct type (int32 is common)
|
112 |
-
|
113 |
# Reshape to (batch_size, sequence_length), which is (1, N) for single prompt
|
114 |
-
|
|
|
115 |
|
116 |
|
117 |
search_options = {
|
@@ -125,15 +126,13 @@ def generate_response_stream(prompt, history, max_length, temperature, top_p, to
|
|
125 |
params = og.GeneratorParams(model)
|
126 |
params.set_search_options(**search_options)
|
127 |
|
128 |
-
# FIX:
|
129 |
-
#
|
130 |
-
|
131 |
-
|
132 |
-
logging.info(f"Setting inputs with keys: {inputs.keys()} and shape for 'input_ids': {inputs['input_ids'].shape}")
|
133 |
-
params.set_inputs(inputs)
|
134 |
|
135 |
start_time = time.time()
|
136 |
-
# Create generator AFTER setting parameters including
|
137 |
generator = og.Generator(model, params)
|
138 |
model_status = "Generating..." # Update status indicator
|
139 |
logging.info("Streaming response...")
|
@@ -172,17 +171,18 @@ def generate_response_stream(prompt, history, max_length, temperature, top_p, to
|
|
172 |
logging.info(f"Generation complete. Tokens: {token_count}, Total Time: {total_time:.2f}s, TTFT: {ttft:.2f}ms, TPS: {tps:.2f}")
|
173 |
model_status = f"Model Ready ({EXECUTION_PROVIDER.upper()} / {model_variant_name})" # Reset status
|
174 |
|
|
|
|
|
|
|
|
|
|
|
|
|
175 |
except TypeError as te:
|
176 |
# Catch type errors specifically during setup if the input format is still wrong
|
177 |
logging.error(f"TypeError during generation setup: {te}", exc_info=True)
|
178 |
-
logging.error("Check
|
179 |
model_status = f"Generation Setup TypeError: {te}"
|
180 |
yield f"\n\nSorry, a TypeError occurred setting up generation: {te}"
|
181 |
-
except AttributeError as ae:
|
182 |
-
# Catch potential future API changes or issues during generation setup
|
183 |
-
logging.error(f"AttributeError during generation setup: {ae}", exc_info=True)
|
184 |
-
model_status = f"Generation Setup Error: {ae}"
|
185 |
-
yield f"\n\nSorry, an error occurred setting up generation: {ae}"
|
186 |
except Exception as e:
|
187 |
logging.error(f"Error during generation: {e}", exc_info=True)
|
188 |
model_status = f"Error during generation: {e}"
|
|
|
109 |
try:
|
110 |
input_tokens_list = tokenizer.encode(full_prompt) # Encode returns a list/array
|
111 |
# Ensure input_tokens is a numpy array of the correct type (int32 is common)
|
112 |
+
input_tokens_np = np.array(input_tokens_list, dtype=np.int32)
|
113 |
# Reshape to (batch_size, sequence_length), which is (1, N) for single prompt
|
114 |
+
input_tokens_np = input_tokens_np.reshape((1, -1))
|
115 |
+
logging.info(f"Prepared input_tokens shape: {input_tokens_np.shape}, dtype: {input_tokens_np.dtype}")
|
116 |
|
117 |
|
118 |
search_options = {
|
|
|
126 |
params = og.GeneratorParams(model)
|
127 |
params.set_search_options(**search_options)
|
128 |
|
129 |
+
# FIX: Reverting to direct assignment based on official examples,
|
130 |
+
# ensuring the numpy array is correctly shaped *before* assignment.
|
131 |
+
logging.info("Attempting direct assignment: params.input_ids = input_tokens_np")
|
132 |
+
params.input_ids = input_tokens_np # Use the reshaped numpy array
|
|
|
|
|
133 |
|
134 |
start_time = time.time()
|
135 |
+
# Create generator AFTER setting parameters including input_ids
|
136 |
generator = og.Generator(model, params)
|
137 |
model_status = "Generating..." # Update status indicator
|
138 |
logging.info("Streaming response...")
|
|
|
171 |
logging.info(f"Generation complete. Tokens: {token_count}, Total Time: {total_time:.2f}s, TTFT: {ttft:.2f}ms, TPS: {tps:.2f}")
|
172 |
model_status = f"Model Ready ({EXECUTION_PROVIDER.upper()} / {model_variant_name})" # Reset status
|
173 |
|
174 |
+
except AttributeError as ae:
|
175 |
+
# Catching this specifically again after trying direct assignment
|
176 |
+
logging.error(f"AttributeError during generation setup (using params.input_ids): {ae}", exc_info=True)
|
177 |
+
logging.error("This suggests the 'input_ids' attribute is not available in this version, despite examples.")
|
178 |
+
model_status = f"Generation Setup AttributeError: {ae}"
|
179 |
+
yield f"\n\nSorry, an AttributeError occurred setting up generation: {ae}"
|
180 |
except TypeError as te:
|
181 |
# Catch type errors specifically during setup if the input format is still wrong
|
182 |
logging.error(f"TypeError during generation setup: {te}", exc_info=True)
|
183 |
+
logging.error("Check input data types and shapes if this occurs.")
|
184 |
model_status = f"Generation Setup TypeError: {te}"
|
185 |
yield f"\n\nSorry, a TypeError occurred setting up generation: {te}"
|
|
|
|
|
|
|
|
|
|
|
186 |
except Exception as e:
|
187 |
logging.error(f"Error during generation: {e}", exc_info=True)
|
188 |
model_status = f"Error during generation: {e}"
|