hakaton / summary.py
aleksandrrnt's picture
Upload 11 files
790e088 verified
import docx
from pypdf import PdfReader
from prompts import summary_system_prompt
from llm import LLM
import logging
logger = logging.getLogger("summary")
logging.basicConfig(
format="%(asctime)s %(levelname)-8s %(message)s",
level=logging.INFO,
datefmt="%Y-%m-%d %H:%M:%S",
)
def getTextFromDocx(filename):
doc = docx.Document(filename)
fullText = []
for para in doc.paragraphs:
fullText.append(para.text)
return '\n'.join(fullText)
def getTextFromPDF(filename):
reader = PdfReader(filename)
text = ""
for page in reader.pages:
text += page.extract_text() + "\n"
return text
def getTextFromFile(filename):
content = ""
if (filename.lower().endswith(".pdf")):
logger.info(f"pdf detected")
content = getTextFromPDF(filename)
elif (filename.lower().endswith(".docx")):
logger.info(f"docx detected")
content = getTextFromDocx(filename)
elif (filename.lower().endswith(".txt")):
logger.info(f"pdf detected")
with open(filename, 'r', encoding='utf-8') as f:
content = f.read()
return content
# Функция для обработки запроса к LLM
def file_summary(file):
logger.info(f"Start summarization")
# Чтение содержимого файлов
if file is not None:
file_content = getTextFromFile(file.name)
else:
logger.warning(f"Invalid input file")
file_content = ""
# Формирование сообщения для LLM
messages = [
{"role": "system", "content": summary_system_prompt},
{"role": "user", "content": f"Задание: {file_content}"}
]
llm = LLM('mistral')
# Получение ответа от LLM
llm_response = llm.chat(messages).choices[0].message.content
logger.info(f"Finish summarization")
return llm_response