Spaces:

nuojohnchen
/

JudgeLRMDemo

Running

App Files Files Community

nuojohnchen commited on Mar 31

Commit

cd84e2f

verified ·

1 Parent(s): 39329a7

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -13

app.py CHANGED Viewed

@@ -235,18 +235,25 @@ def generate_and_judge(instruction, model_dropdown_1, custom_model_1, model_drop
     progress(0.4, desc=f"Generating response from {model_path_2}")
     response2 = generate_response(instruction, model_path_2, progress)
-    # Update the response textboxes
-    progress(0.7, desc="Evaluating responses")
-    # Use the judge_responses generator but collect all outputs
-    evaluation_results = ""
-    for result in judge_responses(instruction, response1, response2, judge_model_name, temperature, max_new_tokens):
-        evaluation_results = result
-    progress(1.0, desc="Evaluation complete")
-    return response1, response2, evaluation_results
 # Create Gradio interface
 with gr.Blocks(fill_height=True, css=css) as demo:
     gr.Markdown(DESCRIPTION)
@@ -309,12 +316,21 @@ with gr.Blocks(fill_height=True, css=css) as demo:
             with gr.Row():
                 auto_output = gr.Textbox(label="Evaluation Results", lines=15)
-            # Handle auto-generation and evaluation
             auto_submit_btn.click(
-                fn=generate_and_judge,
-                inputs=[auto_instruction, model_dropdown_1, custom_model_1, model_dropdown_2, custom_model_2, auto_model_dropdown, auto_temperature, auto_max_tokens],
-                outputs=[auto_response1, auto_response2, auto_output]
-            )
             # Examples for auto-generation
             auto_examples = [
                 ["Write a short poem about artificial intelligence",

     progress(0.4, desc=f"Generating response from {model_path_2}")
     response2 = generate_response(instruction, model_path_2, progress)
+    # # Update the response textboxes
+    # progress(0.7, desc="Evaluating responses")
+    # # Use the judge_responses generator but collect all outputs
+    # evaluation_results = ""
+    # for result in judge_responses(instruction, response1, response2, judge_model_name, temperature, max_new_tokens):
+    #     evaluation_results = result
+    # progress(1.0, desc="Evaluation complete")
+    return response1, response2, None
+# Function to stream evaluation results after responses are generated
+@spaces.GPU(duration=120)
+def stream_evaluation(instruction, response1, response2, judge_model_name, temperature=0.1, max_new_tokens=2048):
+    """Stream evaluation results after responses are generated"""
+    for result in judge_responses(instruction, response1, response2, judge_model_name, temperature, max_new_tokens):
+        yield result
 # Create Gradio interface
 with gr.Blocks(fill_height=True, css=css) as demo:
     gr.Markdown(DESCRIPTION)
             with gr.Row():
                 auto_output = gr.Textbox(label="Evaluation Results", lines=15)
+            # # Handle auto-generation and evaluation
+            # auto_submit_btn.click(
+            #     fn=generate_and_judge,
+            #     inputs=[auto_instruction, model_dropdown_1, custom_model_1, model_dropdown_2, custom_model_2, auto_model_dropdown, auto_temperature, auto_max_tokens],
+            #     outputs=[auto_response1, auto_response2, auto_output]
+            # )
             auto_submit_btn.click(
+               fn=generate_and_judge,
+               inputs=[auto_instruction, model_dropdown_1, custom_model_1, model_dropdown_2, custom_model_2, auto_model_dropdown, auto_temperature, auto_max_tokens],
+               outputs=[auto_response1, auto_response2, auto_output]
+           ).then(
+               fn=stream_evaluation,
+               inputs=[auto_instruction, auto_response1, auto_response2, auto_model_dropdown, auto_temperature, auto_max_tokens],
+               outputs=auto_output
+           )
             # Examples for auto-generation
             auto_examples = [
                 ["Write a short poem about artificial intelligence",