Spaces:
Running
Running
innit
Browse files- app.py +28 -0
- evaluator.py +64 -0
- helper.py +55 -0
- main.py +78 -0
- requirements.txt +5 -0
- transcriber.py +69 -0
app.py
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from main import flow
|
3 |
+
|
4 |
+
|
5 |
+
with gr.Blocks(theme= "JohnSmith9982/small_and_pretty") as demo:
|
6 |
+
with gr.Row():
|
7 |
+
with gr.Column():
|
8 |
+
student_pdf_path = gr.File(label="Student's PDF Response")
|
9 |
+
with gr.Column():
|
10 |
+
standard_pdf_path = gr.File(label="Standard Answer Key PDF")
|
11 |
+
|
12 |
+
with gr.Row():
|
13 |
+
submit_btn = gr.Button("Evaluate Exam")
|
14 |
+
|
15 |
+
with gr.Row():
|
16 |
+
output = gr.File(label="Output PDF Path")
|
17 |
+
|
18 |
+
|
19 |
+
def evaluate_exam(student_pdf_path, standard_pdf_path):
|
20 |
+
|
21 |
+
save_dict = True
|
22 |
+
student_dict_path = "student_response.json"
|
23 |
+
standard_dict_path = "standard_key.json"
|
24 |
+
output_pdf_path = "output_report.pdf"
|
25 |
+
|
26 |
+
return flow(student_pdf_path, standard_pdf_path, save_dict, student_dict_path, standard_dict_path, output_pdf_path)
|
27 |
+
|
28 |
+
demo.launch(share=True, pwa=True, debug=True)
|
evaluator.py
ADDED
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
from openai import OpenAI
|
3 |
+
from dotenv import load_dotenv
|
4 |
+
import json
|
5 |
+
from helper import convert_markdown_to_pdf
|
6 |
+
|
7 |
+
load_dotenv()
|
8 |
+
|
9 |
+
|
10 |
+
def eval_flow(student_resp: dict, standard_key: dict, output_pdf_path: str):
|
11 |
+
# Generate the report
|
12 |
+
report_md = generate_report(student_resp, standard_key)
|
13 |
+
|
14 |
+
# Convert the Markdown report to PDF
|
15 |
+
convert_markdown_to_pdf(report_md, output_pdf_path)
|
16 |
+
|
17 |
+
|
18 |
+
print(f"Report has been saved to '{output_pdf_path}'")
|
19 |
+
return output_pdf_path
|
20 |
+
|
21 |
+
|
22 |
+
|
23 |
+
def generate_report(student_resp: dict, standard_key: dict) -> str:
|
24 |
+
|
25 |
+
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
26 |
+
|
27 |
+
student_resp_str = json.dumps(student_resp, indent=4)
|
28 |
+
standard_key_str = json.dumps(standard_key, indent=4)
|
29 |
+
|
30 |
+
response = client.chat.completions.create(
|
31 |
+
model="gpt-4o",
|
32 |
+
messages=[
|
33 |
+
{
|
34 |
+
"role": "system",
|
35 |
+
"content": [
|
36 |
+
{
|
37 |
+
"type": "text",
|
38 |
+
"text": "You are an AI-powered answer sheet evaluator designed to assess students' responses by comparing them with a standard answer key and create Answer-wise Detailed Assessment Report (ADAR) in Markdown format. \n\nYour tasks include:\n1) Answer Evaluation: Assess the completeness, relevance, and accuracy of the content in comparison to the standard answers.\na) Feedback Generation: Provide detailed and constructive feedback for each question, including:\nb) Identifying mistakes and misconceptions.\nc) Suggesting improvements with a focus on clarity, structure, and content.\nd) Scoring: Assign marks for each question based on accuracy, relevance, and completeness.\n\n2) Reporting: Generate a comprehensive report including:\na) Per-question analysis: Scores and feedback.\nb) Overall performance: Strengths, weaknesses, and areas for improvement.\nc) Suggestions: Practical tips for enhancing future performance.\n\nEnsure your responses are clear, detailed, and supportive to help students understand their mistakes and improve effectively.\n\n\nExample of ADAR Entry;\n\n# Answer-wise Detailed Assessment Report (ADAR)\n\n**Name:** Atiya Salim \n**Subject:** Business Laws, Ethics and Communication \n---\n\n## Q1 (a)\n**Score:** 2/6 \n\n**Feedback:**\n\n● You have defined coercion incorrectly. It is not just committing or threatening others to do/not to do any act but “Coercion” is committing, or threatening to commit any act forbidden by Indian Penal Code 1860. \n\n● The definition of undue influence seems to be incomplete. You however managed to describe the crux of the definition well. \n\n● The differentiation mentioned by you was simply a repetition of the introductory part. \n\n● You have missed out the important distinguishing point that contract entered under coercion is voidable at the option of the party & contract entered under undue influence is voidable or court may set it aside or enforce it in a modified form. \n\n● It seems you need more practice.\n\n---\n\n## Q1 (b)\n**Score:** 2/6 \n\n**Feedback:**\n\n● Your answer was incomplete. You did not mention the 4 basic conditions that need to be satisfied for making the payment of honor. Please refer to suggested answers for the same. \n\n● Moreover, your introductory part was slightly erroneous and there was not enough clarity. The payment is made after the party fails to make the payment. \n\n● Work more on the concepts. \n\n---",
|
39 |
+
# "text": "You are an AI-powered answer sheet evaluator designed to assess students' responses by comparing them with a standard answer key and create Answer-wise Detailed Assessment Report (ADAR) in Markdown format. Your tasks include:\n\n1) Answer Evaluation: Assess the completeness, relevance, and accuracy of the content in comparison to the standard answers.\na) Feedback Generation: Provide detailed and constructive feedback for each question, including:\nb) Identifying mistakes and misconceptions.\nc) Suggesting improvements with a focus on clarity, structure, and content.\nd) Scoring: Assign marks for each question based on accuracy, relevance, and completeness.\n\n2) Reporting: Generate a comprehensive report including:\na) Per-question analysis: Scores and feedback.\nb) Overall performance: Strengths, weaknesses, and areas for improvement.\nc) Suggestions: Practical tips for enhancing future performance.\n\nEnsure your responses are clear, detailed, and supportive to help students understand their mistakes and improve effectively.\n\n\nExample of ADAR Entry;\n\n# Answer-wise Detailed Assessment Report (ADAR)\n\n**Name:** Atiya Salim \n**Subject:** Business Laws, Ethics and Communication \n**Marks Scored:** 23/70 \n\n---\n\n## Q1 (a)\n**Score:** 2/6 \n\n**Feedback:**\n- ❌ You have defined *coercion* incorrectly. It is not just committing or threatening others to do/not to do any act but “Coercion” is committing, or threatening to commit any act forbidden by the Indian Penal Code 1860. \n- ⚠️ The definition of *undue influence* seems to be incomplete. However, you managed to describe the crux of the definition well. \n- ❌ The differentiation mentioned was a repetition of the introductory part. \n- ❗ You missed a key point: contracts entered under coercion are voidable at the option of the party, while those under undue influence can be voidable or modified by the court. \n- 📌 **Suggestion:** More practice is needed on legal definitions and distinctions.\n\n---\n\n## Q1 (b)\n**Score:** 2/6 \n\n**Feedback:**\n- ❌ Your answer was incomplete. You did not mention the **four basic conditions** that need to be satisfied for making the payment of honor. Please refer to suggested answers. \n- ⚠️ The introductory part was slightly erroneous and lacked clarity. \n- 📌 **Suggestion:** Focus on enhancing clarity and understanding of key conditions.\n\n---"
|
40 |
+
}
|
41 |
+
]
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"role": "user",
|
45 |
+
"content": [
|
46 |
+
{
|
47 |
+
"type": "text",
|
48 |
+
"text": f"Standard Answer Key;\n```json\n{standard_key_str}\n```\nStudents Response;\n```json\n{student_resp_str}\n```"
|
49 |
+
}
|
50 |
+
]
|
51 |
+
}
|
52 |
+
],
|
53 |
+
response_format={
|
54 |
+
"type": "text"
|
55 |
+
},
|
56 |
+
temperature=1,
|
57 |
+
max_completion_tokens=4048,
|
58 |
+
top_p=1,
|
59 |
+
frequency_penalty=0,
|
60 |
+
presence_penalty=0
|
61 |
+
)
|
62 |
+
|
63 |
+
# print(response.choices[0].message.content)
|
64 |
+
return response.choices[0].message.content
|
helper.py
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import base64
|
2 |
+
import mimetypes
|
3 |
+
import markdown
|
4 |
+
from weasyprint import HTML
|
5 |
+
import fitz # PyMuPDF
|
6 |
+
import os
|
7 |
+
|
8 |
+
def encode_image_v2(image_path: str):
|
9 |
+
"""Encodes an image to base64 and determines the correct MIME type."""
|
10 |
+
mime_type, _ = mimetypes.guess_type(image_path)
|
11 |
+
if mime_type is None:
|
12 |
+
raise ValueError(f"Cannot determine MIME type for {image_path}")
|
13 |
+
|
14 |
+
with open(image_path, "rb") as image_file:
|
15 |
+
encoded_string = base64.b64encode(image_file.read()).decode('utf-8')
|
16 |
+
return f"data:{mime_type};base64,{encoded_string}"
|
17 |
+
|
18 |
+
|
19 |
+
# Function to encode the image
|
20 |
+
def encode_image(image_path):
|
21 |
+
with open(image_path, "rb") as image_file:
|
22 |
+
return base64.b64encode(image_file.read()).decode("utf-8")
|
23 |
+
|
24 |
+
|
25 |
+
# Function to convert Markdown to PDF
|
26 |
+
def convert_markdown_to_pdf(md_content, output_pdf_path):
|
27 |
+
# Convert Markdown to HTML
|
28 |
+
html_content = markdown.markdown(md_content)
|
29 |
+
|
30 |
+
# Save HTML to a PDF file
|
31 |
+
HTML(string=html_content).write_pdf(output_pdf_path)
|
32 |
+
|
33 |
+
print("Markdown has been successfully converted to PDF!")
|
34 |
+
|
35 |
+
|
36 |
+
|
37 |
+
|
38 |
+
def pdf_to_images(pdf_path, output_folder, zoom_x=2.0, zoom_y=2.0):
|
39 |
+
# Open the PDF file
|
40 |
+
pdf_document = fitz.open(pdf_path)
|
41 |
+
|
42 |
+
# Create output folder if it doesn't exist
|
43 |
+
if not os.path.exists(output_folder):
|
44 |
+
os.makedirs(output_folder)
|
45 |
+
|
46 |
+
image_paths = []
|
47 |
+
for page_num in range(len(pdf_document)):
|
48 |
+
page = pdf_document.load_page(page_num)
|
49 |
+
matrix = fitz.Matrix(zoom_x, zoom_y) # Adjust the zoom factor to increase quality
|
50 |
+
pix = page.get_pixmap(matrix=matrix)
|
51 |
+
image_path = os.path.join(output_folder, f'page_{page_num + 1}.png')
|
52 |
+
pix.save(image_path)
|
53 |
+
image_paths.append(image_path)
|
54 |
+
|
55 |
+
return image_paths
|
main.py
ADDED
@@ -0,0 +1,78 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from transcriber import transcribe_pdf
|
2 |
+
from evaluator import eval_flow
|
3 |
+
import json
|
4 |
+
import os
|
5 |
+
|
6 |
+
def flow(student_pdf_path: str,
|
7 |
+
standard_pdf_path:str,
|
8 |
+
save_dict: bool,
|
9 |
+
student_dict_path: str,
|
10 |
+
standard_dict_path: str,
|
11 |
+
output_pdf_path: str):
|
12 |
+
|
13 |
+
|
14 |
+
|
15 |
+
|
16 |
+
# create new interim_files folder if multiple instances of the flow are run
|
17 |
+
|
18 |
+
# Transcribe the student's PDF response
|
19 |
+
print("Transcribing the student's response")
|
20 |
+
student_resp = transcribe_pdf(student_pdf_path, "interim_files_student", save_dict, student_dict_path)
|
21 |
+
print("--------------------------------------------")
|
22 |
+
# Transcribe the student's PDF response
|
23 |
+
print("Transcribing the student's response")
|
24 |
+
student_resp = transcribe_pdf(student_pdf_path, "interim_files_student", save_dict, student_dict_path)
|
25 |
+
print("--------------------------------------------")
|
26 |
+
|
27 |
+
# Transcribe the standard answer key PDF
|
28 |
+
print("Transcribing the standard answer key")
|
29 |
+
standard_key = transcribe_pdf(standard_pdf_path, "interim_files_standard", save_dict, standard_dict_path)
|
30 |
+
print("--------------------------------------------")
|
31 |
+
# Transcribe the standard answer key PDF
|
32 |
+
print("Transcribing the standard answer key")
|
33 |
+
standard_key = transcribe_pdf(standard_pdf_path, "interim_files_standard", save_dict, standard_dict_path)
|
34 |
+
print("--------------------------------------------")
|
35 |
+
|
36 |
+
# comment if running full flow
|
37 |
+
# when testing the evaluator only
|
38 |
+
# student_resp = json.load(open(student_dict_path, "r"))
|
39 |
+
# standard_key = json.load(open(standard_dict_path, "r"))
|
40 |
+
|
41 |
+
|
42 |
+
# Evaluate the student's response and generate a report
|
43 |
+
print("Evaluating the student's response")
|
44 |
+
eval_flow(student_resp, standard_key, output_pdf_path)
|
45 |
+
|
46 |
+
print("Flow completed successfully!")
|
47 |
+
print("--------------------------------------------")
|
48 |
+
|
49 |
+
# remove the files in the interim_files folder
|
50 |
+
print("Clearing interim files folder")
|
51 |
+
for file in os.listdir("interim_files_student"):
|
52 |
+
file_path = os.path.join("interim_files_student", file)
|
53 |
+
os.remove(file_path)
|
54 |
+
|
55 |
+
for file in os.listdir("interim_files_standard"):
|
56 |
+
file_path = os.path.join("interim_files_standard", file)
|
57 |
+
os.remove(file_path)
|
58 |
+
|
59 |
+
|
60 |
+
|
61 |
+
return output_pdf_path
|
62 |
+
|
63 |
+
|
64 |
+
if __name__ == "__main__":
|
65 |
+
# student_pdf_path = "examples/Chapter 1 - Indian Regulatory Framework Student Answer Key (1).pdf"
|
66 |
+
# standard_pdf_path = "examples/Chapter 1 Indian Regulatory Frame Work Standard Question And Answer Key (2).pdf"
|
67 |
+
|
68 |
+
student_pdf_path = "examples/ca inter chapter - amalgamation student answer.pdf"
|
69 |
+
standard_pdf_path = "examples/ca inter chapter - amalgamation.pdf"
|
70 |
+
|
71 |
+
save_dict = True
|
72 |
+
student_dict_path = "student_response.json"
|
73 |
+
standard_dict_path = "standard_key.json"
|
74 |
+
output_pdf_path = "output_report.pdf"
|
75 |
+
|
76 |
+
flow(student_pdf_path, standard_pdf_path, save_dict, student_dict_path, standard_dict_path, output_pdf_path)
|
77 |
+
|
78 |
+
|
requirements.txt
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
pymupdf
|
2 |
+
openai
|
3 |
+
python-dotenv
|
4 |
+
markdown
|
5 |
+
weasyprint
|
transcriber.py
ADDED
@@ -0,0 +1,69 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import base64
|
3 |
+
from openai import OpenAI
|
4 |
+
from dotenv import load_dotenv
|
5 |
+
from helper import encode_image, pdf_to_images
|
6 |
+
# from pdf_processor import pdf_to_images
|
7 |
+
from tqdm import tqdm
|
8 |
+
import json
|
9 |
+
|
10 |
+
load_dotenv()
|
11 |
+
|
12 |
+
|
13 |
+
def transcribe_image(image_path, handwritten_flag=True):
|
14 |
+
|
15 |
+
# Getting the Base64 string
|
16 |
+
base64_image = encode_image(image_path)
|
17 |
+
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
|
18 |
+
|
19 |
+
if handwritten_flag:
|
20 |
+
system_cmd = "You are a professional transcriber, you will be given an input image which has handwritten text and your job is to transcribe it to the best of your ability.\n\nYou are not allowed to correct any mistakes in the imput, the output text should be exactly the same as in the image input. \nJust output raw text."
|
21 |
+
else:
|
22 |
+
system_cmd = "You are a professional transcriber, you will be given an input image which has text and your job is to transcribe it to the best of your ability.\n\nYou are not allowed to correct any mistakes in the imput, the output text should be exactly the same as in the image input."
|
23 |
+
|
24 |
+
response = client.chat.completions.create(
|
25 |
+
model="gpt-4o",
|
26 |
+
messages=[
|
27 |
+
{
|
28 |
+
"role": "system",
|
29 |
+
"content": [
|
30 |
+
{
|
31 |
+
"type": "text",
|
32 |
+
"text": system_cmd,
|
33 |
+
}
|
34 |
+
]
|
35 |
+
},
|
36 |
+
{
|
37 |
+
"role": "user",
|
38 |
+
"content": [
|
39 |
+
{
|
40 |
+
"type": "text",
|
41 |
+
# "text": "tables should be in markdown format without any padding, encapsulation or ``` ```.",
|
42 |
+
"text": "Transcribe the following image to text in markdown format.",
|
43 |
+
|
44 |
+
},
|
45 |
+
{
|
46 |
+
"type": "image_url",
|
47 |
+
"image_url": {"url": f"data:image/jpeg;base64,{base64_image}"},
|
48 |
+
},
|
49 |
+
],
|
50 |
+
}
|
51 |
+
],
|
52 |
+
)
|
53 |
+
|
54 |
+
# print(response.choices[0].message.content)
|
55 |
+
return response.choices[0].message.content
|
56 |
+
|
57 |
+
|
58 |
+
|
59 |
+
def transcribe_pdf(pdf_path, interim_files_folder_path, save_dict=False, save_dict_path=None):
|
60 |
+
image_paths = pdf_to_images(pdf_path, interim_files_folder_path, zoom_x=2.0, zoom_y=2.0)
|
61 |
+
transcriptions = {}
|
62 |
+
for i, image_path in enumerate(tqdm(image_paths, desc="Transcribing PDF")):
|
63 |
+
transcriptions[f"page_{i+1}"] = transcribe_image(image_path)
|
64 |
+
|
65 |
+
if save_dict and save_dict_path:
|
66 |
+
with open(save_dict_path, 'w') as f:
|
67 |
+
json.dump(transcriptions, f)
|
68 |
+
|
69 |
+
return transcriptions
|