Spaces:
Running
Running
File size: 5,623 Bytes
1d9d928 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 |
import gradio as gr
import pdfplumber
import docx
import os
import datetime
from transformers import pipeline
# Load open-source LLMs
summary_llm = pipeline("summarization", model="google/pegasus-xsum", tokenizer="google/pegasus-xsum")
text_llm = pipeline("text2text-generation", model="MBZUAI/LaMini-T5-738M", tokenizer="MBZUAI/LaMini-T5-738M")
# Extract text from files
def extract_text(file):
if file.name.endswith(".pdf"):
with pdfplumber.open(file.name) as pdf:
return "\n".join([p.extract_text() for p in pdf.pages if p.extract_text()])
elif file.name.endswith(".docx"):
doc = docx.Document(file)
return "\n".join([para.text for para in doc.paragraphs])
elif file.name.endswith(".txt"):
return file.read().decode("utf-8")
else:
return "Unsupported file format."
# Format glossary visually
def format_glossary_html(glossary_text):
lines = glossary_text.split('\n')
html = ""
for line in lines:
if ":" in line:
term, desc = line.split(":", 1)
html += f"<b style='color:#1e3a8a'>{term.strip()}</b>: {desc.strip()}<br>"
else:
html += f"{line}<br>"
return html
# Generate summary
def generate_summary(text):
return summary_llm(text[:1024], max_length=250, min_length=80, do_sample=False)[0]["summary_text"]
# Generate text (glossary/verdict/custom)
def generate_text_response(prompt, max_len=512):
return text_llm(prompt, max_length=max_len, do_sample=True)[0]["generated_text"]
# Main document analyzer
def analyze_document(file):
filename = os.path.basename(file.name)
text = extract_text(file)
if not text.strip():
return "No content found in file.", "", "", "", "", None, ""
short_text = text[:3000]
# Enhanced prompts
summary_prompt = f"""
You are a legal assistant. Read the following legal document and generate a comprehensive summary.
Include: parties involved, key facts, legal issues, arguments, court observations, and likely outcome.
Document:
{short_text}
"""
glossary_prompt = f"""
Extract and explain all legal terms, laws, or references. Format:
Term: ...
Explanation: ...
Document:
{short_text}
"""
verdict_prompt = f"""
Based on the document, predict the likely verdict in 2β3 sentences using standard legal reasoning.
Document:
{short_text}
"""
# Run LLMs
summary = generate_summary(short_text)
glossary = generate_text_response(glossary_prompt)
verdict = generate_text_response(verdict_prompt)
glossary_html = format_glossary_html(glossary)
# Save report
timestamp = datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
output_filename = f"LegalSummary_{timestamp}.txt"
with open(output_filename, "w", encoding="utf-8") as f:
f.write(f"π File: {filename}\nπ Time: {timestamp}\n\n")
f.write("=== π Summary ===\n" + summary + "\n\n")
f.write("=== π Glossary ===\n" + glossary + "\n\n")
f.write("=== βοΈ Verdict ===\n" + verdict + "\n")
return text, summary, glossary, glossary_html, verdict, output_filename, short_text
# Custom prompt answer
def custom_prompt_response(doc_text, user_prompt):
if not doc_text.strip() or not user_prompt.strip():
return "β οΈ Please provide both a document and a prompt."
prompt = f"""
You are a legal expert. Answer the question below using only the document provided.
Question:
{user_prompt.strip()}
Document:
{doc_text.strip()}
"""
return generate_text_response(prompt)
# Gradio UI
with gr.Blocks(css="body { background-color: #f9f9f9; font-family: 'Segoe UI'; }") as demo:
with gr.Row():
with gr.Column(scale=3):
gr.Markdown("""
<div style='text-align: center; font-size: 28px; font-weight: bold; color: #1e3a8a; margin-bottom: 10px;'>
π§Ύ Legal Document Summarizer Using LLMs
</div>
<div style='text-align: center; font-size: 16px; color: #444444; margin-bottom: 25px;'>
Upload legal documents in PDF, DOCX, or TXT format to receive structured summaries, legal term glossaries, and AI-inferred verdicts using open-source language models.
</div>
""")
file_input = gr.File(label="π Upload Legal Document")
submit_btn = gr.Button("π Analyze Document")
download_btn = gr.File(label="β¬οΈ Download Report")
with gr.Column(scale=1):
gr.Markdown("### π‘ Features")
gr.Markdown("""
- π AI-generated legal summaries
- π Glossary of legal terms
- βοΈ Inferred legal verdict
- β Custom Q&A based on the document
""")
extracted = gr.Textbox(label="π Extracted Text", lines=10, interactive=False)
summary = gr.Textbox(label="π Summary", lines=6, interactive=False)
glossary_raw = gr.Textbox(visible=False)
glossary_html = gr.HTML(label="π Glossary of Legal Terms")
final_verdict = gr.Textbox(label="βοΈ Verdict (AI Inferred)", lines=3, interactive=False)
with gr.Row():
gr.Markdown("### β Ask a Question About the Document")
user_prompt = gr.Textbox(label="Your Question", placeholder="e.g., What is the legal issue?")
custom_response = gr.Textbox(label="π€ AI Answer", lines=4)
custom_btn = gr.Button("π§ Get Answer")
hidden_doc_text = gr.Textbox(visible=False)
submit_btn.click(fn=analyze_document, inputs=[file_input], outputs=[
extracted, summary, glossary_raw, glossary_html, final_verdict, download_btn, hidden_doc_text
])
custom_btn.click(fn=custom_prompt_response, inputs=[hidden_doc_text, user_prompt], outputs=custom_response)
demo.launch()
|