Spaces:
Sleeping
Sleeping
File size: 3,674 Bytes
252caca d092d11 18b6d60 143b62d d092d11 4fb58cc d092d11 87bb867 d092d11 143b62d d092d11 143b62d 031841d 87bb867 031841d 4fb58cc 031841d 87bb867 031841d 87bb867 252caca 87bb867 252caca 87bb867 252caca 87bb867 252caca 87bb867 031841d 29fb045 143b62d f961a8f 252caca 18b6d60 031841d 143b62d 031841d 143b62d 18b6d60 87bb867 143b62d 031841d 29fb045 031841d 143b62d f253a0d 143b62d 031841d 143b62d 031841d 143b62d 031841d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
import html
import json
import logging
import gradio as gr
import numpy as np
from dotenv import load_dotenv
from pages.summarization_playground import custom_css, generate_answer
from utils.metric import metric_rouge_score
from utils.model import Model
load_dotenv()
def display_results(response_list):
overall_score = np.mean([r["metric_score"]["rouge_score"] for r in response_list])
html_output = f"<h2>Overall Score: {overall_score:.2f}</h2>"
for i, item in enumerate(response_list, 1):
dialogue = item["dialogue"]
summary = item["summary"]
response = item["response"]
rouge_score = item["metric_score"]["rouge_score"]
dialogue = html.escape(item["dialogue"]).replace("\n", "<br>")
summary = html.escape(item["summary"]).replace("\n", "<br>")
response = html.escape(item["response"]).replace("\n", "<br>")
html_output += f"""
<details>
<summary>Response {i} (Rouge Score: {rouge_score:.2f})</summary>
<div style="display: flex; justify-content: space-between;">
<div style="width: 30%;">
<h3>Dialogue</h3>
<pre style="white-space: pre-wrap; word-wrap: break-word;">{dialogue}</pre>
</div>
<div style="width: 30%;">
<h3>Summary</h3>
<pre style="white-space: pre-wrap; word-wrap: break-word;">{summary}</pre>
</div>
<div style="width: 30%;">
<h3>Response</h3>
<pre style="white-space: pre-wrap; word-wrap: break-word;">{response}</pre>
</div>
</div>
</details>
"""
return html_output
def process(model_selection, prompt, num=10):
response_list = []
with open("test_samples/test_data.json", "r") as file:
json_data = file.read()
dataset = json.loads(json_data)
for i, data in enumerate(dataset):
logging.info(f"Start testing datapoint {i+1}")
dialogue = data["dialogue"]
format = data["format"]
summary = data["summary"]
response = generate_answer(
dialogue, model_selection, prompt + f" Output following {format} format."
)
rouge_score = metric_rouge_score(response, summary)
response_list.append(
{
"dialogue": dialogue,
"summary": summary,
"response": response,
"metric_score": {"rouge_score": rouge_score},
}
)
logging.info(f"Complete testing datapoint {i+1}")
return display_results(response_list)
def create_batch_evaluation_interface():
with gr.Blocks(
theme=gr.themes.Soft(spacing_size="sm", text_size="sm"), css=custom_css
) as demo:
gr.Markdown(
"## Here are evaluation setups. It will run though datapoints in test_data.josn to generate and evaluate. Show results once finished."
)
model_dropdown = gr.Dropdown(
choices=Model.__model_list__,
label="Choose a model",
value=Model.__model_list__[0],
)
Template_text = gr.Textbox(
value="""Summarize the following dialogue""",
label="Input Prompting Template",
lines=8,
placeholder="Input your prompts",
)
submit_button = gr.Button("✨ Submit ✨")
output = gr.HTML(label="Results")
submit_button.click(
process, inputs=[model_dropdown, Template_text], outputs=output
)
return demo
if __name__ == "__main__":
demo = create_batch_evaluation_interface()
demo.launch()
|