Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -235,18 +235,25 @@ def generate_and_judge(instruction, model_dropdown_1, custom_model_1, model_drop
|
|
235 |
progress(0.4, desc=f"Generating response from {model_path_2}")
|
236 |
response2 = generate_response(instruction, model_path_2, progress)
|
237 |
|
238 |
-
# Update the response textboxes
|
239 |
-
progress(0.7, desc="Evaluating responses")
|
240 |
|
241 |
-
# Use the judge_responses generator but collect all outputs
|
242 |
-
evaluation_results = ""
|
243 |
-
for result in judge_responses(instruction, response1, response2, judge_model_name, temperature, max_new_tokens):
|
244 |
-
|
245 |
|
246 |
-
progress(1.0, desc="Evaluation complete")
|
247 |
|
248 |
-
return response1, response2,
|
249 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
250 |
# Create Gradio interface
|
251 |
with gr.Blocks(fill_height=True, css=css) as demo:
|
252 |
gr.Markdown(DESCRIPTION)
|
@@ -309,12 +316,21 @@ with gr.Blocks(fill_height=True, css=css) as demo:
|
|
309 |
with gr.Row():
|
310 |
auto_output = gr.Textbox(label="Evaluation Results", lines=15)
|
311 |
|
312 |
-
# Handle auto-generation and evaluation
|
|
|
|
|
|
|
|
|
|
|
313 |
auto_submit_btn.click(
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
|
|
|
|
|
|
|
|
318 |
# Examples for auto-generation
|
319 |
auto_examples = [
|
320 |
["Write a short poem about artificial intelligence",
|
|
|
235 |
progress(0.4, desc=f"Generating response from {model_path_2}")
|
236 |
response2 = generate_response(instruction, model_path_2, progress)
|
237 |
|
238 |
+
# # Update the response textboxes
|
239 |
+
# progress(0.7, desc="Evaluating responses")
|
240 |
|
241 |
+
# # Use the judge_responses generator but collect all outputs
|
242 |
+
# evaluation_results = ""
|
243 |
+
# for result in judge_responses(instruction, response1, response2, judge_model_name, temperature, max_new_tokens):
|
244 |
+
# evaluation_results = result
|
245 |
|
246 |
+
# progress(1.0, desc="Evaluation complete")
|
247 |
|
248 |
+
return response1, response2, None
|
249 |
|
250 |
+
# Function to stream evaluation results after responses are generated
|
251 |
+
@spaces.GPU(duration=120)
|
252 |
+
def stream_evaluation(instruction, response1, response2, judge_model_name, temperature=0.1, max_new_tokens=2048):
|
253 |
+
"""Stream evaluation results after responses are generated"""
|
254 |
+
for result in judge_responses(instruction, response1, response2, judge_model_name, temperature, max_new_tokens):
|
255 |
+
yield result
|
256 |
+
|
257 |
# Create Gradio interface
|
258 |
with gr.Blocks(fill_height=True, css=css) as demo:
|
259 |
gr.Markdown(DESCRIPTION)
|
|
|
316 |
with gr.Row():
|
317 |
auto_output = gr.Textbox(label="Evaluation Results", lines=15)
|
318 |
|
319 |
+
# # Handle auto-generation and evaluation
|
320 |
+
# auto_submit_btn.click(
|
321 |
+
# fn=generate_and_judge,
|
322 |
+
# inputs=[auto_instruction, model_dropdown_1, custom_model_1, model_dropdown_2, custom_model_2, auto_model_dropdown, auto_temperature, auto_max_tokens],
|
323 |
+
# outputs=[auto_response1, auto_response2, auto_output]
|
324 |
+
# )
|
325 |
auto_submit_btn.click(
|
326 |
+
fn=generate_and_judge,
|
327 |
+
inputs=[auto_instruction, model_dropdown_1, custom_model_1, model_dropdown_2, custom_model_2, auto_model_dropdown, auto_temperature, auto_max_tokens],
|
328 |
+
outputs=[auto_response1, auto_response2, auto_output]
|
329 |
+
).then(
|
330 |
+
fn=stream_evaluation,
|
331 |
+
inputs=[auto_instruction, auto_response1, auto_response2, auto_model_dropdown, auto_temperature, auto_max_tokens],
|
332 |
+
outputs=auto_output
|
333 |
+
)
|
334 |
# Examples for auto-generation
|
335 |
auto_examples = [
|
336 |
["Write a short poem about artificial intelligence",
|