Spaces:
Running
Running
import docx | |
from pypdf import PdfReader | |
from prompts import summary_system_prompt | |
from llm import LLM | |
import logging | |
logger = logging.getLogger("summary") | |
logging.basicConfig( | |
format="%(asctime)s %(levelname)-8s %(message)s", | |
level=logging.INFO, | |
datefmt="%Y-%m-%d %H:%M:%S", | |
) | |
def getTextFromDocx(filename): | |
doc = docx.Document(filename) | |
fullText = [] | |
for para in doc.paragraphs: | |
fullText.append(para.text) | |
return '\n'.join(fullText) | |
def getTextFromPDF(filename): | |
reader = PdfReader(filename) | |
text = "" | |
for page in reader.pages: | |
text += page.extract_text() + "\n" | |
return text | |
def getTextFromFile(filename): | |
content = "" | |
if (filename.lower().endswith(".pdf")): | |
logger.info(f"pdf detected") | |
content = getTextFromPDF(filename) | |
elif (filename.lower().endswith(".docx")): | |
logger.info(f"docx detected") | |
content = getTextFromDocx(filename) | |
elif (filename.lower().endswith(".txt")): | |
logger.info(f"pdf detected") | |
with open(filename, 'r', encoding='utf-8') as f: | |
content = f.read() | |
return content | |
# Функция для обработки запроса к LLM | |
def file_summary(file): | |
logger.info(f"Start summarization") | |
# Чтение содержимого файлов | |
if file is not None: | |
file_content = getTextFromFile(file.name) | |
else: | |
logger.warning(f"Invalid input file") | |
file_content = "" | |
# Формирование сообщения для LLM | |
messages = [ | |
{"role": "system", "content": summary_system_prompt}, | |
{"role": "user", "content": f"Задание: {file_content}"} | |
] | |
llm = LLM('mistral') | |
# Получение ответа от LLM | |
llm_response = llm.chat(messages).choices[0].message.content | |
logger.info(f"Finish summarization") | |
return llm_response |