Spaces:

Spestly
/

AtlasUI

Running

App Files Files Community

Spestly commited on Jan 26

Commit

15b94f7

verified ·

1 Parent(s): e7e2534

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -31

app.py CHANGED Viewed

@@ -14,11 +14,11 @@ login(token=HF_TOKEN)
 # Define models
 MODELS = {
     "athena-1": {
-        "name": "🦁 Atlas-Flash",
         "sizes": {
             "1.5B": "Spestly/Atlas-R1-1.5B-Preview",
         },
-        "emoji": "🦁",
         "experimental": True,
         "is_vision": False,  # Enable vision support for this model
     },
@@ -103,12 +103,9 @@ class AtlasInferenceApp:
                 padding=True
             )
-            # Generate response with streaming
-            response_container = st.empty()  # Placeholder for streaming text
-            full_response = ""
-            generated_tokens = []  # Track generated tokens to avoid duplicates
             with torch.no_grad():
-                for chunk in st.session_state.current_model["model"].generate(
                     input_ids=inputs.input_ids,
                     attention_mask=inputs.attention_mask,
                     max_new_tokens=max_tokens,
@@ -118,30 +115,14 @@ class AtlasInferenceApp:
                     do_sample=True,
                     pad_token_id=st.session_state.current_model["tokenizer"].pad_token_id,
                     eos_token_id=st.session_state.current_model["tokenizer"].eos_token_id,
-                ):
-                    # Ensure chunk is 2D (batch size × sequence length)
-                    if chunk.dim() == 1:
-                        chunk = chunk.unsqueeze(0)  # Add batch dimension
-                    # Decode only the new tokens
-                    new_tokens = chunk[:, inputs.input_ids.shape[1]:]  # Exclude input tokens
-                    generated_tokens.extend(new_tokens[0].tolist())  # Add new tokens to the list
-                    chunk_text = st.session_state.current_model["tokenizer"].decode(generated_tokens, skip_special_tokens=True)
-                    # Remove the prompt from the response
-                    if prompt in chunk_text:
-                        chunk_text = chunk_text.replace(prompt, "").strip()
-                    # Update the response
-                    full_response = chunk_text
-                    response_container.markdown(full_response)
-                    # Stop if the response is too long or incomplete
-                    if len(full_response) >= max_tokens * 4:  # Approximate token-to-character ratio
-                        st.warning("⚠️ Response truncated due to length limit.")
-                        break
-            return full_response.strip()  # Return the cleaned response
         except Exception as e:
             return f"⚠️ Generation Error: {str(e)}"
         finally:

 # Define models
 MODELS = {
     "athena-1": {
+        "name": "⚡ Atlas-Flash 1205",
         "sizes": {
             "1.5B": "Spestly/Atlas-R1-1.5B-Preview",
         },
+        "emoji": "⚡",
         "experimental": True,
         "is_vision": False,  # Enable vision support for this model
     },
                 padding=True
             )
+            # Generate response without streaming
             with torch.no_grad():
+                output = st.session_state.current_model["model"].generate(
                     input_ids=inputs.input_ids,
                     attention_mask=inputs.attention_mask,
                     max_new_tokens=max_tokens,
                     do_sample=True,
                     pad_token_id=st.session_state.current_model["tokenizer"].pad_token_id,
                     eos_token_id=st.session_state.current_model["tokenizer"].eos_token_id,
+                )
+                response = st.session_state.current_model["tokenizer"].decode(output[0], skip_special_tokens=True)
+            # Remove the prompt from the response
+            if prompt in response:
+                response = response.replace(prompt, "").strip()
+            return response
         except Exception as e:
             return f"⚠️ Generation Error: {str(e)}"
         finally: