Spaces:

2KKLabs
/

DocumentAI_V1

Running

File size: 12,603 Bytes

bd5a4f4
828a989
 
bd5a4f4
828a989
 
 
 
 
bd5a4f4
828a989
 
 
 
 
 
 
 
 
 
bd5a4f4
765e1e3
 
 
 
 
 
 
 
 
828a989
 
 
 
 
 
 
bd5a4f4
828a989
 
 
 
 
 
 
 
 
bd5a4f4
828a989
 
 
 
 
 
bd5a4f4
828a989
 
bd5a4f4
828a989
0c33edb
828a989
0c33edb
 
 
70c497e
0c33edb
765e1e3
0c33edb
765e1e3
 
0c33edb
 
 
 
 
 
 
 
765e1e3
0c33edb
 
 
 
70c497e
0c33edb
765e1e3
0c33edb
765e1e3
 
0c33edb
 
 
 
 
 
 
 
765e1e3
0c33edb
 
 
765e1e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0c33edb
 
 
765e1e3
0c33edb
 
 
 
 
 
 
 
 
765e1e3
0c33edb
 
 
 
 
 
 
 
765e1e3
0c33edb
 
765e1e3
 
 
 
 
 
 
 
 
 
0c33edb
 
765e1e3
0c33edb
 
 
 
 
 
 
 
 
 
 
765e1e3
0c33edb
 
 
 
 
765e1e3
 
 
 
 
 
 
 
 
 
0c33edb
 
 
 
765e1e3
 
 
 
0c33edb
 
 
bd5a4f4

import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, AutoModelForQuestionAnswering
import torch

# -------------------------------
# Модель суммаризации
# -------------------------------
sum_tokenizer = AutoTokenizer.from_pretrained("LaciaStudio/Lacia_sum_small_v1")
sum_model = AutoModelForSeq2SeqLM.from_pretrained("LaciaStudio/Lacia_sum_small_v1")

def summarize_document(file):
    if file is None:
        return "Файл не загружен."
    with open(file, "r", encoding="utf-8") as f:
        text = f.read()
    input_text = "summarize: " + text
    inputs = sum_tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
    summary_ids = sum_model.generate(inputs["input_ids"], max_length=150, num_beams=4, early_stopping=True)
    summary = sum_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

def summarize_text(text):
    if not text:
        return "Текст не предоставлен."
    input_text = "summarize: " + text
    inputs = sum_tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True)
    summary_ids = sum_model.generate(inputs["input_ids"], max_length=150, num_beams=4, early_stopping=True)
    summary = sum_tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# -------------------------------
# Модель вопросов-ответов (Q&A)
# -------------------------------
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
qa_tokenizer = AutoTokenizer.from_pretrained("LaciaStudio/Kaleidoscope_large_v1")
qa_model = AutoModelForQuestionAnswering.from_pretrained("LaciaStudio/Kaleidoscope_large_v1")
qa_model.to(device)

def answer_question(context, question):
    inputs = qa_tokenizer(question, context, return_tensors="pt", truncation=True, max_length=384)
    inputs = {k: v.to(device) for k, v in inputs.items()}
    outputs = qa_model(**inputs)
    start_index = torch.argmax(outputs.start_logits)
    end_index = torch.argmax(outputs.end_logits)
    answer_tokens = inputs["input_ids"][0][start_index:end_index + 1]
    answer = qa_tokenizer.decode(answer_tokens, skip_special_tokens=True)
    return answer

def answer_question_file(file, question):
    if file is None:
        return "Файл не загружен."
    with open(file, "r", encoding="utf-8") as f:
        context = f.read()
    return answer_question(context, question)

def answer_question_text(context, question):
    return answer_question(context, question)

# -------------------------------
# Функция установки языка и обновления интерфейса
# -------------------------------
def set_language(lang):
    if lang == "English":
        texts = {
            "app_title": "# DocumentAI by LaciaStudio",
            "sum_section": "Document Summarization",
            "sum_tab_file": "Upload File",
            "sum_file_label": "Attach file for summarization",
            "sum_tab_text": "Enter Text",
            "sum_text_label": "Enter document text",
            "sum_button": "Summarize",
            "sum_output": "Summarization",
            "qa_section": "Document Q&A",
            "qa_tab_file": "Upload File",
            "qa_file_label": "Attach file with document",
            "qa_question_label": "Enter your question",
            "qa_answer_button": "Get Answer",
            "qa_answer_label": "Answer",
            "qa_tab_text": "Enter Text",
            "qa_text_label": "Enter document text",
        }
    else:  # Русский
        texts = {
            "app_title": "# DocumentAI создано LaciaStudio",
            "sum_section": "Суммаризация документа",
            "sum_tab_file": "Загрузить файл",
            "sum_file_label": "Прикрепить файл для суммаризации",
            "sum_tab_text": "Ввести текст",
            "sum_text_label": "Введите текст документа",
            "sum_button": "Суммаризировать",
            "sum_output": "Суммаризация",
            "qa_section": "Вопрос-ответ по документу",
            "qa_tab_file": "Загрузить файл",
            "qa_file_label": "Прикрепить файл с документом",
            "qa_question_label": "Введите вопрос",
            "qa_answer_button": "Получить ответ",
            "qa_answer_label": "Ответ",
            "qa_tab_text": "Ввести текст",
            "qa_text_label": "Введите текст документа",
        }
    return (
       gr.update(visible=False),               # Скрыть выбор языка
       gr.update(visible=True),                # Показать основной интерфейс
       gr.update(value=texts["app_title"]),      # Заголовок приложения
       # --- Обновление компонентов для суммаризации ---
       gr.update(value="### " + texts["sum_section"]),  # Заголовок секции суммаризации
       gr.update(label=texts["sum_tab_file"]),          # Название вкладки (файл)
       gr.update(label=texts["sum_file_label"]),        # Метка компонента File (суммаризация, файл)
       gr.update(value=texts["sum_button"]),            # Кнопка суммаризации (файл)
       gr.update(label=texts["sum_output"]),            # Вывод суммаризации (файл)
       gr.update(label=texts["sum_tab_text"]),          # Название вкладки (текст)
       gr.update(label=texts["sum_text_label"]),        # Метка компонента Text (суммаризация, текст)
       gr.update(value=texts["sum_button"]),            # Кнопка суммаризации (текст)
       gr.update(label=texts["sum_output"]),            # Вывод суммаризации (текст)
       # --- Обновление компонентов для Q&A ---
       gr.update(value="### " + texts["qa_section"]),   # Заголовок секции Q&A
       gr.update(label=texts["qa_tab_file"]),           # Название вкладки Q&A (файл)
       gr.update(label=texts["qa_tab_text"]),           # Название вкладки Q&A (текст)
       gr.update(label=texts["qa_file_label"]),         # Метка компонента File (Q&A, файл)
       gr.update(label=texts["qa_question_label"]),     # Метка поля вопроса (Q&A, файл)
       gr.update(value=texts["qa_answer_button"]),      # Кнопка получения ответа (Q&A, файл)
       gr.update(label=texts["qa_answer_label"]),       # Вывод ответа (Q&A, файл)
       gr.update(label=texts["qa_text_label"]),         # Метка компонента Text (Q&A, текст)
       gr.update(label=texts["qa_question_label"]),     # Метка поля вопроса (Q&A, текст)
       gr.update(value=texts["qa_answer_button"]),      # Кнопка получения ответа (Q&A, текст)
       gr.update(label=texts["qa_answer_label"])        # Вывод ответа (Q&A, текст)
    )

# -------------------------------
# Интерфейс Gradio с улучшенным дизайном, выбором языка и вкладками для ввода
# -------------------------------
with gr.Blocks(css="""
    body { background-color: #f0f2f5; }
    .gradio-container { border-radius: 10px; box-shadow: 0 0 15px rgba(0,0,0,0.1); padding: 20px; background-color: white; }
    h1, h2, h3 { color: #333; }
    .gr-button { background-color: #4CAF50; color: white; }
""") as demo:
    gr.HTML("<style>body { background-color: #f0f2f5; } .gradio-container { border-radius: 10px; box-shadow: 0 0 15px rgba(0,0,0,0.1); padding: 20px; background-color: white; } h1, h2, h3 { color: #333; } .gr-button { background-color: #4CAF50; color: white; }</style>")
    
    # Контейнер выбора языка (первоначально показывается)
    with gr.Column(visible=True) as lang_container:
         lang_radio = gr.Radio(choices=["English", "Русский"], label="Select Language / Выберите язык", value="English")
         start_button = gr.Button("Start / Начать")
    
    # Основной интерфейс (изначально скрыт)
    with gr.Column(visible=False) as main_container:
         header_markdown = gr.Markdown("Placeholder Title")
         with gr.Row():
             # Левая колонка – Суммаризация с двумя вкладками
             with gr.Column():
                 sum_section_md = gr.Markdown("Placeholder Summarization Section")
                 with gr.Tabs() as sum_tabs:
                     with gr.Tab("Placeholder Tab 1") as sum_file_tab:
                         file_input_sum = gr.File(label="Placeholder File Input", file_count="single", type="filepath")
                         summarize_button_file = gr.Button("Placeholder Summarize Button")
                         summary_output_file = gr.Textbox(label="Placeholder Summarization Output", lines=10)
                     with gr.Tab("Placeholder Tab 2") as sum_text_tab:
                         text_input_sum = gr.Textbox(label="Placeholder Text Input", lines=10, placeholder="Enter document text here")
                         summarize_button_text = gr.Button("Placeholder Summarize Button")
                         summary_output_text = gr.Textbox(label="Placeholder Summarization Output", lines=10)
             # Правая колонка – Q&A с двумя вкладками
             with gr.Column():
                 qa_section_md = gr.Markdown("Placeholder Q&A Section")
                 with gr.Tabs() as qa_tabs:
                     with gr.Tab("Placeholder Tab 1") as file_tab:
                         file_input_qa = gr.File(label="Placeholder QA File Input", file_count="single", type="filepath")
                         question_input_file = gr.Textbox(label="Placeholder QA Question", placeholder="Your question here")
                         answer_button_file = gr.Button("Placeholder QA Answer Button")
                         answer_output_file = gr.Textbox(label="Placeholder QA Answer Output", lines=5)
                     with gr.Tab("Placeholder Tab 2") as text_tab:
                         context_input = gr.Textbox(label="Placeholder Context Input", lines=10, placeholder="Document text here")
                         question_input_text = gr.Textbox(label="Placeholder QA Question", placeholder="Your question here")
                         answer_button_text = gr.Button("Placeholder QA Answer Button")
                         answer_output_text = gr.Textbox(label="Placeholder QA Answer Output", lines=5)
    
    # При выборе языка – обновляем все метки и показываем основной интерфейс
    start_button.click(
         set_language,
         inputs=[lang_radio],
         outputs=[
             lang_container, main_container,
             header_markdown, 
             # Компоненты для суммаризации (файл)
             sum_section_md,
             sum_file_tab, file_input_sum, summarize_button_file, summary_output_file,
             # Компоненты для суммаризации (текст)
             sum_text_tab, text_input_sum, summarize_button_text, summary_output_text,
             # Компоненты для Q&A
             qa_section_md,
             file_tab, text_tab, file_input_qa, question_input_file, answer_button_file, answer_output_file,
             context_input, question_input_text, answer_button_text, answer_output_text
         ]
    )
    
    # Связываем функционал нейросетей
    # Суммаризация
    summarize_button_file.click(fn=summarize_document, inputs=file_input_sum, outputs=summary_output_file)
    summarize_button_text.click(fn=summarize_text, inputs=text_input_sum, outputs=summary_output_text)
    # Q&A
    answer_button_file.click(fn=answer_question_file, inputs=[file_input_qa, question_input_file], outputs=answer_output_file)
    answer_button_text.click(fn=answer_question_text, inputs=[context_input, question_input_text], outputs=answer_output_text)
    
if __name__ == "__main__":
    demo.launch()