Spaces:

hymarog1
/

LegalDoc

Running

App Files Files Community

LegalDoc / app.py

hymarog1

Update app.py

ce7b020 verified about 3 hours ago

raw

history blame contribute delete

29.9 kB

	import streamlit as st
	import shelve
	import docx2txt
	import PyPDF2
	import time # Used to simulate typing effect
	import nltk
	import re
	import os
	import time # already imported in your code
	from dotenv import load_dotenv
	import torch
	from sentence_transformers import SentenceTransformer, util
	nltk.download('punkt')
	import hashlib
	from nltk import sent_tokenize
	nltk.download('punkt_tab')
	from transformers import LEDTokenizer, LEDForConditionalGeneration
	from transformers import pipeline
	import asyncio
	import dateutil.parser
	from datetime import datetime
	import sys

	from openai import OpenAI
	import numpy as np


	# Fix for RuntimeError: no running event loop on Windows
	if sys.platform.startswith("win"):
	asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())

	st.set_page_config(page_title="Legal Document Summarizer", layout="wide")

	if "processed" not in st.session_state:
	st.session_state.processed = False
	if "last_uploaded_hash" not in st.session_state:
	st.session_state.last_uploaded_hash = None
	if "chat_prompt_processed" not in st.session_state:
	st.session_state.chat_prompt_processed = False

	if "embedding_text" not in st.session_state:
	st.session_state.embedding_text = None

	if "document_context" not in st.session_state:
	st.session_state.document_context = None

	if "last_prompt_hash" not in st.session_state:
	st.session_state.last_prompt_hash = None


	st.title("📄 Legal Document Summarizer (Document Augmentation RAG)")

	USER_AVATAR = "👤"
	BOT_AVATAR = "🤖"

	# Load chat history
	def load_chat_history():
	with shelve.open("chat_history") as db:
	return db.get("messages", [])

	# Save chat history
	def save_chat_history(messages):
	with shelve.open("chat_history") as db:
	db["messages"] = messages

	# Function to limit text preview to 500 words
	def limit_text(text, word_limit=500):
	words = text.split()
	return " ".join(words[:word_limit]) + ("..." if len(words) > word_limit else "")


	# CLEAN AND NORMALIZE TEXT


	def clean_text(text):
	# Remove newlines and extra spaces
	text = text.replace('\r\n', ' ').replace('\n', ' ')
	text = re.sub(r'\s+', ' ', text)

	# Remove page number markers like "Page 1 of 10"
	text = re.sub(r'Page\s+\d+\s+of\s+\d+', '', text, flags=re.IGNORECASE)

	# Remove long dashed or underscored lines
	text = re.sub(r'[_]{5,}', '', text) # Lines with underscores: _____
	text = re.sub(r'[-]{5,}', '', text) # Lines with hyphens: -----

	# Remove long dotted separators
	text = re.sub(r'[.]{4,}', '', text) # Dots like "......" or ".............."

	# Trim final leading/trailing whitespace
	text = text.strip()

	return text


	#######################################################################################################################


	# LOADING MODELS FOR DIVIDING TEXT INTO SECTIONS

	# Load token from .env file
	load_dotenv()
	HF_API_TOKEN = os.getenv("HF_API_TOKEN")

	client = OpenAI(
	base_url="https://api.studio.nebius.com/v1/",
	api_key=os.getenv("OPENAI_API_KEY")
	)

	# print("API Key:", os.getenv("OPENAI_API_KEY")) # Temporary for debugging


	# Load once at the top (cache for performance)
	@st.cache_resource
	def load_local_zero_shot_classifier():
	return pipeline("zero-shot-classification", model="typeform/distilbert-base-uncased-mnli")

	local_classifier = load_local_zero_shot_classifier()


	SECTION_LABELS = ["Facts", "Arguments", "Judgement", "Others"]

	def classify_chunk(text):
	result = local_classifier(text, candidate_labels=SECTION_LABELS)
	return result["labels"][0]


	# NEW: NLP-based sectioning using zero-shot classification
	def section_by_zero_shot(text):
	sections = {"Facts": "", "Arguments": "", "Judgment": "", "Others": ""}
	sentences = sent_tokenize(text)
	chunk = ""

	for i, sent in enumerate(sentences):
	chunk += sent + " "
	if (i + 1) % 3 == 0 or i == len(sentences) - 1:
	label = classify_chunk(chunk.strip())
	print(f"🔎 Chunk: {chunk[:60]}...\n🔖 Predicted Label: {label}")
	# 👇 Normalize label (title case and fallback)
	label = label.capitalize()
	if label not in sections:
	label = "Others"
	sections[label] += chunk + "\n"
	chunk = ""

	return sections

	#######################################################################################################################



	# EXTRACTING TEXT FROM UPLOADED FILES

	# Function to extract text from uploaded file
	def extract_text(file):
	if file.name.endswith(".pdf"):
	reader = PyPDF2.PdfReader(file)
	full_text = "\n".join(page.extract_text() or "" for page in reader.pages)
	elif file.name.endswith(".docx"):
	full_text = docx2txt.process(file)
	elif file.name.endswith(".txt"):
	full_text = file.read().decode("utf-8")
	else:
	return "Unsupported file type."

	return full_text # Full text is needed for summarization


	#######################################################################################################################

	# EXTRACTIVE AND ABSTRACTIVE SUMMARIZATION


	@st.cache_resource
	def load_legalbert():
	return SentenceTransformer("nlpaueb/legal-bert-base-uncased")


	legalbert_model = load_legalbert()

	@st.cache_resource
	def load_led():
	tokenizer = LEDTokenizer.from_pretrained("allenai/led-base-16384")
	model = LEDForConditionalGeneration.from_pretrained("allenai/led-base-16384")
	return tokenizer, model

	tokenizer_led, model_led = load_led()


	def legalbert_extractive_summary(text, top_ratio=0.2):
	sentences = sent_tokenize(text)
	top_k = max(3, int(len(sentences) * top_ratio))
	if len(sentences) <= top_k:
	return text
	sentence_embeddings = legalbert_model.encode(sentences, convert_to_tensor=True)
	doc_embedding = torch.mean(sentence_embeddings, dim=0)
	cosine_scores = util.pytorch_cos_sim(doc_embedding, sentence_embeddings)[0]
	top_results = torch.topk(cosine_scores, k=top_k)
	selected_sentences = [sentences[i] for i in sorted(top_results.indices.tolist())]
	return " ".join(selected_sentences)

	# Add LED Abstractive Summarization


	def led_abstractive_summary(text, max_length=512, min_length=100):
	inputs = tokenizer_led(
	text, return_tensors="pt", padding="max_length",
	truncation=True, max_length=4096
	)
	global_attention_mask = torch.zeros_like(inputs["input_ids"])
	global_attention_mask[:, 0] = 1

	outputs = model_led.generate(
	inputs["input_ids"],
	attention_mask=inputs["attention_mask"],
	global_attention_mask=global_attention_mask,
	max_length=max_length,
	min_length=min_length,
	num_beams=4, # Use beam search
	repetition_penalty=2.0, # Penalize repetition
	length_penalty=1.0,
	early_stopping=True,
	no_repeat_ngram_size=4 # Prevent repeated phrases
	)

	return tokenizer_led.decode(outputs[0], skip_special_tokens=True)



	def led_abstractive_summary_chunked(text, max_tokens=3000):
	sentences = sent_tokenize(text)
	current_chunk, chunks, summaries = "", [], []
	for sent in sentences:
	if len(tokenizer_led(current_chunk + sent)["input_ids"]) > max_tokens:
	chunks.append(current_chunk)
	current_chunk = sent
	else:
	current_chunk += " " + sent
	if current_chunk:
	chunks.append(current_chunk)
	for chunk in chunks:
	inputs = tokenizer_led(chunk, return_tensors="pt", padding="max_length", truncation=True, max_length=4096)
	global_attention_mask = torch.zeros_like(inputs["input_ids"])
	global_attention_mask[:, 0] = 1
	output = model_led.generate(
	inputs["input_ids"],
	attention_mask=inputs["attention_mask"],
	global_attention_mask=global_attention_mask,
	max_length=512,
	min_length=100,
	num_beams=4,
	repetition_penalty=2.0,
	length_penalty=1.0,
	early_stopping=True,
	no_repeat_ngram_size=4,
	)
	summaries.append(tokenizer_led.decode(output[0], skip_special_tokens=True))
	return " ".join(summaries)



	def hybrid_summary_hierarchical(text, top_ratio=0.8):
	cleaned_text = clean_text(text)
	sections = section_by_zero_shot(cleaned_text)

	structured_summary = {} # <-- hierarchical summary here

	for name, content in sections.items():
	if content.strip():
	# Extractive summary
	extractive = legalbert_extractive_summary(content, top_ratio)

	# Abstractive summary
	abstractive = led_abstractive_summary_chunked(extractive)

	# Store in dictionary (hierarchical structure)
	structured_summary[name] = {
	"extractive": extractive,
	"abstractive": abstractive
	}

	return structured_summary


	def chunk_text_custom(text, n=1000, overlap=200):
	chunks = []
	for i in range(0, len(text), n - overlap):
	chunks.append(text[i:i + n])
	return chunks



	def get_embedding(text, model="BAAI/bge-en-icl"):
	"""
	From your notebook:
	Creates an embedding for the given text chunk using the BGE-ICL model.
	"""
	resp = client.embeddings.create(model=model, input=text)
	return np.array(resp.data[0].embedding)



	def semantic_search(query, text_chunks, chunk_embeddings, k=5):
	"""
	Compute cosine similarity between the query embedding and each chunk embedding,
	then pick the top-k chunks.
	"""
	q_emb = get_embedding(query)
	# simple cosine:
	def cosine(a, b): return float(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b)))
	scores = [cosine(q_emb, emb) for emb in chunk_embeddings]
	top_idxs = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:k]
	return [text_chunks[i] for i in top_idxs]


	def generate_response(system_prompt, user_message, model="meta-llama/Llama-3.2-3B-Instruct"):
	return client.chat.completions.create(
	model=model,
	temperature=0,
	messages=[{"role": "system", "content": system_prompt}, {"role": "user", "content": user_message}]
	).choices[0].message.content


	def generate_questions(text_chunk, num_questions=5,
	model="meta-llama/Llama-3.2-3B-Instruct"):
	system_prompt = (
	"You are an expert at generating relevant questions from text. "
	"Create concise questions that can be answered using only the provided text."
	)
	user_prompt = f"""
	Based on the following text, generate {num_questions} different questions
	that can be answered using only this text:

	{text_chunk}

	Format your response as a numbered list of questions only.
	"""
	resp = client.chat.completions.create(
	model=model,
	temperature=0.7,
	messages=[
	{"role":"system","content":system_prompt},
	{"role":"user","content":user_prompt}
	]
	)
	raw = resp.choices[0].message.content.strip()
	questions = []
	for line in raw.split("\n"):
	q = re.sub(r"^\d+\.\s*", "", line).strip()
	if q.endswith("?"):
	questions.append(q)
	return questions

	# 2) EMBEDDINGS
	def create_embeddings(text, model="BAAI/bge-en-icl"):
	resp = client.embeddings.create(model=model, input=text)
	return resp.data[0].embedding

	def cosine_similarity(a,b):
	return float(np.dot(a,b)/(np.linalg.norm(a)*np.linalg.norm(b)))

	# 3) VECTOR STORE
	class SimpleVectorStore:
	def __init__(self):
	self.items = [] # each item is dict {text, embedding, metadata}
	def add_item(self, text, embedding, metadata):
	self.items.append(dict(text=text, embedding=embedding, metadata=metadata))
	def search(self, query, k=5):
	q_emb = create_embeddings(query)
	scores = [(i, cosine_similarity(q_emb, item["embedding"]))
	for i,item in enumerate(self.items)]
	scores.sort(key=lambda x:x[1], reverse=True)
	return [self.items[i] for i,_ in scores[:k]]

	# 4) DOCUMENT PROCESSOR
	def process_document(raw_text,
	chunk_size=1000,
	chunk_overlap=200,
	questions_per_chunk=5):
	# chunk the text
	chunks = []
	for i in range(0, len(raw_text), chunk_size - chunk_overlap):
	chunks.append(raw_text[i:i+chunk_size])
	store = SimpleVectorStore()
	for idx,chunk in enumerate(chunks):
	# chunk embedding
	emb = create_embeddings(chunk)
	store.add_item(chunk, emb, {"type":"chunk","index":idx})
	# generate Qs + their embeddings
	qs = generate_questions(chunk, num_questions=questions_per_chunk)
	for q in qs:
	q_emb = create_embeddings(q)
	store.add_item(q, q_emb, {
	"type":"question",
	"chunk_index":idx,
	"original_chunk": chunk
	})
	return chunks, store

	# 5) CONTEXT BUILDER
	def prepare_context(results):
	seen = set()
	ctx = []
	# first direct chunks
	for r in results:
	m = r["metadata"]
	if m["type"]=="chunk" and m["index"] not in seen:
	seen.add(m["index"])
	ctx.append(f"Chunk {m['index']}:\n{r['text']}")
	# then referenced by questions
	for r in results:
	m = r["metadata"]
	if m["type"]=="question":
	ci = m["chunk_index"]
	if ci not in seen:
	seen.add(ci)
	ctx.append(f"Chunk {ci} (via Q “{r['text']}”):\n{m['original_chunk']}")
	return "\n\n".join(ctx)

	# 6) ANSWER GENERATOR (overrides your old generate_response)
	def generate_response_from_context(query, context,
	model="meta-llama/Llama-3.2-3B-Instruct"):
	sp = (
	"You are an AI assistant that strictly answers based on the given context. "
	"If the answer cannot be derived directly from the provided context, "
	"respond with: 'I do not have enough information to answer that.'"
	)
	up = f"""
	Context:
	{context}

	Question: {query}

	Please answer the question based only on the context above.
	"""
	resp = client.chat.completions.create(
	model=model,
	temperature=0,
	messages=[{"role":"system","content":sp},
	{"role":"user","content":up}]
	)
	return resp.choices[0].message.content




	#######################################################################################################################


	# STREAMLIT APP INTERFACE CODE

	# Initialize or load chat history
	if "messages" not in st.session_state:
	st.session_state.messages = load_chat_history()

	# Initialize last_uploaded if not set
	if "last_uploaded" not in st.session_state:
	st.session_state.last_uploaded = None



	# Sidebar with a button to delete chat history
	with st.sidebar:
	st.subheader("⚙️ Options")
	if st.button("Delete Chat History"):
	st.session_state.messages = []
	st.session_state.last_uploaded = None
	st.session_state.processed = False
	st.session_state.chat_prompt_processed = False
	save_chat_history([])


	# Display chat messages with a typing effect
	def display_with_typing_effect(text, speed=0.005):
	placeholder = st.empty()
	displayed_text = ""
	for char in text:
	displayed_text += char
	placeholder.markdown(displayed_text)
	time.sleep(speed)
	return displayed_text

	# Show existing chat messages
	for message in st.session_state.messages:
	avatar = USER_AVATAR if message["role"] == "user" else BOT_AVATAR
	with st.chat_message(message["role"], avatar=avatar):
	st.markdown(message["content"])


	# Standard chat input field
	prompt = st.chat_input("Type a message...")


	# Place uploader before the chat so it's always visible
	with st.container():
	st.subheader("📎 Upload a Legal Document")
	uploaded_file = st.file_uploader("Upload a file (PDF, DOCX, TXT)", type=["pdf", "docx", "txt"])
	reprocess_btn = st.button("🔄 Reprocess Last Uploaded File")



	# Hashing logic
	def get_file_hash(file):
	file.seek(0)
	content = file.read()
	file.seek(0)
	return hashlib.md5(content).hexdigest()

	# Function to prepare text for embedding
	# This function combines the extractive and abstractive summaries into a single string for embedding
	def prepare_text_for_embedding(summary_dict):
	combined_chunks = []

	for section, content in summary_dict.items():
	ext = content.get("extractive", "").strip()
	abs = content.get("abstractive", "").strip()
	if ext:
	combined_chunks.append(f"{section} - Extractive Summary:\n{ext}")
	if abs:
	combined_chunks.append(f"{section} - Abstractive Summary:\n{abs}")

	return "\n\n".join(combined_chunks)


	##############################################################################################################

	user_role = st.sidebar.selectbox(
	"🎭 Select Your Role for Custom Summary",
	["General", "Judge", "Lawyer", "Student"]
	)


	def role_based_filter(section, summary, role):
	if role == "General":
	return summary

	filtered_summary = {
	"extractive": "",
	"abstractive": ""
	}

	if role == "Judge" and section in ["Judgement", "Facts"]:
	filtered_summary = summary
	elif role == "Lawyer" and section in ["Arguments", "Facts"]:
	filtered_summary = summary
	elif role == "Student" and section in ["Facts"]:
	filtered_summary = summary

	return filtered_summary



	#########################################################################################################################


	if uploaded_file:
	file_hash = get_file_hash(uploaded_file)
	if file_hash != st.session_state.last_uploaded_hash or reprocess_btn:
	st.session_state.processed = False

	if not st.session_state.processed:
	start_time = time.time()

	# 1) extract & summarize as before
	raw_text = extract_text(uploaded_file)
	summary_dict = hybrid_summary_hierarchical(raw_text)
	embedding_text = prepare_text_for_embedding(summary_dict)

	# ─── NEW: document‐augmentation ingestion ───
	chunks, store = process_document(raw_text,
	chunk_size=1000,
	chunk_overlap=200,
	questions_per_chunk=5)
	st.session_state.vector_store = store
	# ────────────────────────────────────────────

	# 2) generate your “role‐specific prompt” as before
	st.session_state.document_context = embedding_text

	if user_role == "General":
	role_specific_prompt = (
	"Summarize the legal document focusing on the most relevant aspects "
	"such as facts, arguments, and judgments. Include key legal reasoning "
	"and a timeline of events where necessary."
	)
	else:
	role_specific_prompt = (
	f"As a {user_role}, summarize the legal document focusing on "
	"the most relevant aspects such as facts, arguments, and judgments "
	"tailored for your role. Include key legal reasoning and timeline of events."
	)

	# ─── REPLACE rag_query_response with doc‐augmentation RAG ───
	results = store.search(role_specific_prompt, k=5)
	context = prepare_context(results)
	rag_summary = generate_response_from_context(role_specific_prompt, context)
	#

	st.session_state.messages.append({
	"role": "user",
	"content": f"📤 Uploaded {uploaded_file.name}"
	})
	st.session_state.messages.append({
	"role": "assistant",
	"content": rag_summary
	})
	with st.chat_message("assistant", avatar=BOT_AVATAR):
	display_with_typing_effect(rag_summary)

	processing_time = round((time.time() - start_time) / 60, 2)
	st.info(f"⏱️ Response generated in {processing_time} minutes.")

	st.session_state.generated_summary = rag_summary
	st.session_state.last_uploaded_hash = file_hash
	st.session_state.processed = True
	st.session_state.last_prompt_hash = None
	save_chat_history(st.session_state.messages)



	if prompt:
	words = prompt.split()
	word_count = len(words)
	prompt_hash = hashlib.md5(prompt.encode("utf-8")).hexdigest()

	# 1) LONG prompts – echo & ingest like a “paste‐in” document
	if word_count > 30 and prompt_hash != st.session_state.last_prompt_hash:
	st.session_state.last_prompt_hash = prompt_hash

	raw_text = prompt
	st.session_state.messages.append({
	"role": "user",
	"content": f"📥 Pasted Document Text:\n\n{limit_text(raw_text,500)}"
	})
	with st.chat_message("user", avatar=USER_AVATAR):
	st.markdown(limit_text(raw_text,500))

	start_time = time.time()
	# summarization + emb_text as before
	summary_dict = hybrid_summary_hierarchical(raw_text)
	emb_text = prepare_text_for_embedding(summary_dict)
	st.session_state.document_context = emb_text
	st.session_state.processed = True

	# ─── NEW: ingest via document‐augmentation ───
	chunks, store = process_document(raw_text)
	st.session_state.vector_store = store

	if user_role == "General":
	role_prompt = (
	"Summarize the document focusing on facts, arguments, judgments, "
	"and include a timeline of events."
	)
	else:
	role_prompt = (
	f"As a {user_role}, summarize the document focusing on facts, "
	"arguments, judgments, plus timeline of events."
	)

	# ─── doc‐augmentation RAG here too ───
	results = store.search(role_prompt, k=5)
	context = prepare_context(results)
	initial_summary = generate_response_from_context(role_prompt, context)

	st.session_state.messages.append({
	"role": "assistant",
	"content": initial_summary
	})
	with st.chat_message("assistant", avatar=BOT_AVATAR):
	display_with_typing_effect(initial_summary)

	st.info(f"⏱️ Summary generated in {round((time.time()-start_time)/60,2)} minutes")
	save_chat_history(st.session_state.messages)


	# 2) SHORT prompts – normal RAG against last ingested context
	elif word_count <= 30 and st.session_state.processed:

	with st.chat_message("user", avatar=USER_AVATAR):
	st.markdown(prompt)

	# 2) save to history
	st.session_state.messages.append({"role": "user", "content": prompt})
	store = st.session_state.vector_store

	# ─── instead of rag_query_response, do doc‐augmentation RAG ───
	results = store.search(prompt, k=5)
	context = prepare_context(results)
	answer = generate_response_from_context(prompt, context)

	# st.session_state.messages.append({"role":"user", "content":prompt})
	st.session_state.messages.append({"role":"assistant","content":answer})
	with st.chat_message("assistant", avatar=BOT_AVATAR):
	display_with_typing_effect(answer)
	save_chat_history(st.session_state.messages)


	# 3) not enough input
	else:
	with st.chat_message("assistant", avatar=BOT_AVATAR):
	st.markdown("❗ Paste at least 30 words of your document to ingest it first.")


	################################Evaluation###########################
	######################################################################################################################

	# 📚 Imports
	import evaluate
	from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
	from sklearn.metrics import f1_score

	# 📌 Load Evaluators Once
	@st.cache_resource
	def load_evaluators():
	rouge = evaluate.load("rouge")
	bertscore = evaluate.load("bertscore")
	return rouge, bertscore

	rouge, bertscore = load_evaluators()

	# 📌 Define Evaluation Functions
	def evaluate_summary(generated_summary, ground_truth_summary):
	"""Evaluate ROUGE and BERTScore."""
	rouge_result = rouge.compute(predictions=[generated_summary], references=[ground_truth_summary])
	bert_result = bertscore.compute(predictions=[generated_summary], references=[ground_truth_summary], lang="en")
	return rouge_result, bert_result

	def exact_match(prediction, ground_truth):
	return int(prediction.strip().lower() == ground_truth.strip().lower())

	def compute_bleu(prediction, ground_truth):
	reference = [ground_truth.strip().split()]
	candidate = prediction.strip().split()
	smoothie = SmoothingFunction().method4
	return sentence_bleu(reference, candidate, smoothing_function=smoothie)

	def compute_f1(prediction, ground_truth):
	"""Compute F1 score based on token overlap, like in QA evaluation."""
	pred_tokens = prediction.strip().lower().split()
	gt_tokens = ground_truth.strip().lower().split()

	common_tokens = set(pred_tokens) & set(gt_tokens)
	num_common = len(common_tokens)

	if num_common == 0:
	return 0.0

	precision = num_common / len(pred_tokens)
	recall = num_common / len(gt_tokens)
	f1 = 2 * (precision * recall) / (precision + recall)
	return f1

	def evaluate_additional_metrics(prediction, ground_truth):
	em = exact_match(prediction, ground_truth)
	bleu = compute_bleu(prediction, ground_truth)
	f1 = compute_f1(prediction, ground_truth)
	return {
	"Exact Match": em,
	"BLEU Score": bleu,
	"F1 Score": f1
	}

	# 📥 Upload and Evaluate
	ground_truth_summary_file = st.file_uploader("📄 Upload Ground Truth Summary (.txt)", type=["txt"])

	if ground_truth_summary_file:
	ground_truth_summary = ground_truth_summary_file.read().decode("utf-8").strip()

	if "generated_summary" in st.session_state and st.session_state.generated_summary:
	prediction = st.session_state.generated_summary

	# Evaluate ROUGE and BERTScore
	rouge_result, bert_result = evaluate_summary(prediction, ground_truth_summary)

	# Display ROUGE and BERTScore
	st.subheader("📊 Evaluation Results")
	st.write("🔹 ROUGE Scores:")
	st.json(rouge_result)
	st.write("🔹 BERTScore:")
	st.json(bert_result)

	# Evaluate and Display Exact Match, BLEU, F1
	additional_metrics = evaluate_additional_metrics(prediction, ground_truth_summary)
	st.subheader("🔎 Additional Evaluation Metrics")
	st.json(additional_metrics)

	else:
	st.warning("⚠️ Please generate a summary first by uploading a document.")





	######################################################################################################################


	# Run this along with streamlit run app.py to evaluate the model's performance on a test set
	# Otherwise, comment the below code

	# ⇒ EVALUATION HOOK: after the very first summary, fire off evaluate.main() once

	# import json
	# import pandas as pd
	# import threading


	# def run_eval(doc_context):

	# with open("test_case1.json", "r", encoding="utf-8") as f:
	# gt_data = json.load(f)

	# # 2) map document_id → local file

	# records = []
	# for entry in gt_data:
	# doc_id = entry["document_id"]
	# query = entry["query"]
	# gt_ans = entry["ground_truth_answer"]


	# # model_ans = rag_query_response(query, emb_text)
	# model_ans = rag_query_response(query, doc_context)

	# records.append({
	# "document_id": doc_id,
	# "query": query,
	# "ground_truth_answer": gt_ans,
	# "model_answer": model_ans
	# })
	# print(f"✅ Done {doc_id} / “{query}”")

	# # 3) push to DataFrame + CSV
	# df = pd.DataFrame(records)
	# out = "evaluation_results.csv"
	# df.to_csv(out, index=False, encoding="utf-8")
	# print(f"\n📝 Saved {len(df)} rows to {out}")


	# # you could log this somewhere
	# def _run_evaluation():
	# try:
	# run_eval()
	# except Exception as e:
	# print("‼️ Evaluation script error:", e)

	# if st.session_state.processed and not st.session_state.get("evaluation_launched", False):
	# st.session_state.evaluation_launched = True

	# # inform user
	# st.sidebar.info("🔬 Starting background evaluation run…")

	# # capture the context
	# doc_ctx = st.session_state.document_context

	# # spawn the thread, passing doc_ctx in
	# threading.Thread(
	# target=lambda: run_eval(doc_ctx),
	# daemon=True
	# ).start()

	# st.sidebar.success("✅ Evaluation launched — check evaluation_results.csv when done.")

	# # check for file existence & show download button
	# eval_path = os.path.abspath("evaluation_results.csv")
	# if os.path.exists(eval_path):
	# st.sidebar.success(f"✅ Results saved to:\n`{eval_path}`")
	# # load it into a small dataframe (optional)
	# df_eval = pd.read_csv(eval_path)
	# # add a download button
	# st.sidebar.download_button(
	# label="⬇️ Download evaluation_results.csv",
	# data=df_eval.to_csv(index=False).encode("utf-8"),
	# file_name="evaluation_results.csv",
	# mime="text/csv"
	# )
	# else:
	# # if you want, display the cwd so you can inspect it
	# st.sidebar.info(f"Current working dir:\n`{os.getcwd()}`")