import docx from pypdf import PdfReader from prompts import summary_system_prompt from llm import LLM import logging logger = logging.getLogger("summary") logging.basicConfig( format="%(asctime)s %(levelname)-8s %(message)s", level=logging.INFO, datefmt="%Y-%m-%d %H:%M:%S", ) def getTextFromDocx(filename): doc = docx.Document(filename) fullText = [] for para in doc.paragraphs: fullText.append(para.text) return '\n'.join(fullText) def getTextFromPDF(filename): reader = PdfReader(filename) text = "" for page in reader.pages: text += page.extract_text() + "\n" return text def getTextFromFile(filename): content = "" if (filename.lower().endswith(".pdf")): logger.info(f"pdf detected") content = getTextFromPDF(filename) elif (filename.lower().endswith(".docx")): logger.info(f"docx detected") content = getTextFromDocx(filename) elif (filename.lower().endswith(".txt")): logger.info(f"pdf detected") with open(filename, 'r', encoding='utf-8') as f: content = f.read() return content # Функция для обработки запроса к LLM def file_summary(file): logger.info(f"Start summarization") # Чтение содержимого файлов if file is not None: file_content = getTextFromFile(file.name) else: logger.warning(f"Invalid input file") file_content = "" # Формирование сообщения для LLM messages = [ {"role": "system", "content": summary_system_prompt}, {"role": "user", "content": f"Задание: {file_content}"} ] llm = LLM('mistral') # Получение ответа от LLM llm_response = llm.chat(messages).choices[0].message.content logger.info(f"Finish summarization") return llm_response