File size: 1,960 Bytes
790e088
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import docx
from pypdf import PdfReader
from prompts import summary_system_prompt
from llm import LLM
import logging


logger = logging.getLogger("summary")
logging.basicConfig(
    format="%(asctime)s %(levelname)-8s %(message)s",
    level=logging.INFO,
    datefmt="%Y-%m-%d %H:%M:%S",
)

def getTextFromDocx(filename):
    doc = docx.Document(filename)
    fullText = []
    for para in doc.paragraphs:
        fullText.append(para.text)
    return '\n'.join(fullText)

def getTextFromPDF(filename):
    reader = PdfReader(filename)
    text = ""
    for page in reader.pages:
        text += page.extract_text() + "\n"
    return text


def getTextFromFile(filename):
    content = ""
    if (filename.lower().endswith(".pdf")):
        logger.info(f"pdf detected")
        content = getTextFromPDF(filename)
    elif (filename.lower().endswith(".docx")):
        logger.info(f"docx detected")
        content = getTextFromDocx(filename)
    elif (filename.lower().endswith(".txt")):
        logger.info(f"pdf detected")
        with open(filename, 'r', encoding='utf-8') as f:
            content = f.read()
    return content

# Функция для обработки запроса к LLM
def file_summary(file):
    logger.info(f"Start summarization")
    # Чтение содержимого файлов
    if file is not None:
        file_content = getTextFromFile(file.name)
    else:
        logger.warning(f"Invalid input file")
        file_content = ""

    # Формирование сообщения для LLM
    messages = [
        {"role": "system", "content": summary_system_prompt},
        {"role": "user", "content": f"Задание: {file_content}"}
    ]

    llm = LLM('mistral')

    # Получение ответа от LLM
    llm_response = llm.chat(messages).choices[0].message.content
    logger.info(f"Finish summarization")
    return llm_response