Spaces:

Athspi
/

Yyyy

Runtime error

App Files Files Community

Athspi commited on 20 days ago

Commit

cd6a828

verified ·

1 Parent(s): e3d67e9

Update app.py

Browse files

Files changed (1) hide show

app.py +15 -15

app.py CHANGED Viewed

@@ -109,9 +109,10 @@ def generate_response_stream(prompt, history, max_length, temperature, top_p, to
     try:
         input_tokens_list = tokenizer.encode(full_prompt) # Encode returns a list/array
         # Ensure input_tokens is a numpy array of the correct type (int32 is common)
-        input_tokens = np.array(input_tokens_list, dtype=np.int32)
         # Reshape to (batch_size, sequence_length), which is (1, N) for single prompt
-        input_tokens = input_tokens.reshape((1, -1))
         search_options = {
@@ -125,15 +126,13 @@ def generate_response_stream(prompt, history, max_length, temperature, top_p, to
         params = og.GeneratorParams(model)
         params.set_search_options(**search_options)
-        # FIX: Create a dictionary mapping input names to tensors (numpy arrays)
-        #      and pass this dictionary to set_inputs.
-        #      Assuming the standard input name "input_ids".
-        inputs = {"input_ids": input_tokens}
-        logging.info(f"Setting inputs with keys: {inputs.keys()} and shape for 'input_ids': {inputs['input_ids'].shape}")
-        params.set_inputs(inputs)
         start_time = time.time()
-        # Create generator AFTER setting parameters including inputs
         generator = og.Generator(model, params)
         model_status = "Generating..." # Update status indicator
         logging.info("Streaming response...")
@@ -172,17 +171,18 @@ def generate_response_stream(prompt, history, max_length, temperature, top_p, to
         logging.info(f"Generation complete. Tokens: {token_count}, Total Time: {total_time:.2f}s, TTFT: {ttft:.2f}ms, TPS: {tps:.2f}")
         model_status = f"Model Ready ({EXECUTION_PROVIDER.upper()} / {model_variant_name})" # Reset status
     except TypeError as te:
         # Catch type errors specifically during setup if the input format is still wrong
         logging.error(f"TypeError during generation setup: {te}", exc_info=True)
-        logging.error("Check if the input format {'input_ids': token_array} is correct.")
         model_status = f"Generation Setup TypeError: {te}"
         yield f"\n\nSorry, a TypeError occurred setting up generation: {te}"
-    except AttributeError as ae:
-         # Catch potential future API changes or issues during generation setup
-         logging.error(f"AttributeError during generation setup: {ae}", exc_info=True)
-         model_status = f"Generation Setup Error: {ae}"
-         yield f"\n\nSorry, an error occurred setting up generation: {ae}"
     except Exception as e:
         logging.error(f"Error during generation: {e}", exc_info=True)
         model_status = f"Error during generation: {e}"

     try:
         input_tokens_list = tokenizer.encode(full_prompt) # Encode returns a list/array
         # Ensure input_tokens is a numpy array of the correct type (int32 is common)
+        input_tokens_np = np.array(input_tokens_list, dtype=np.int32)
         # Reshape to (batch_size, sequence_length), which is (1, N) for single prompt
+        input_tokens_np = input_tokens_np.reshape((1, -1))
+        logging.info(f"Prepared input_tokens shape: {input_tokens_np.shape}, dtype: {input_tokens_np.dtype}")
         search_options = {
         params = og.GeneratorParams(model)
         params.set_search_options(**search_options)
+        # FIX: Reverting to direct assignment based on official examples,
+        #      ensuring the numpy array is correctly shaped *before* assignment.
+        logging.info("Attempting direct assignment: params.input_ids = input_tokens_np")
+        params.input_ids = input_tokens_np # Use the reshaped numpy array
         start_time = time.time()
+        # Create generator AFTER setting parameters including input_ids
         generator = og.Generator(model, params)
         model_status = "Generating..." # Update status indicator
         logging.info("Streaming response...")
         logging.info(f"Generation complete. Tokens: {token_count}, Total Time: {total_time:.2f}s, TTFT: {ttft:.2f}ms, TPS: {tps:.2f}")
         model_status = f"Model Ready ({EXECUTION_PROVIDER.upper()} / {model_variant_name})" # Reset status
+    except AttributeError as ae:
+         # Catching this specifically again after trying direct assignment
+         logging.error(f"AttributeError during generation setup (using params.input_ids): {ae}", exc_info=True)
+         logging.error("This suggests the 'input_ids' attribute is not available in this version, despite examples.")
+         model_status = f"Generation Setup AttributeError: {ae}"
+         yield f"\n\nSorry, an AttributeError occurred setting up generation: {ae}"
     except TypeError as te:
         # Catch type errors specifically during setup if the input format is still wrong
         logging.error(f"TypeError during generation setup: {te}", exc_info=True)
+        logging.error("Check input data types and shapes if this occurs.")
         model_status = f"Generation Setup TypeError: {te}"
         yield f"\n\nSorry, a TypeError occurred setting up generation: {te}"
     except Exception as e:
         logging.error(f"Error during generation: {e}", exc_info=True)
         model_status = f"Error during generation: {e}"