Spaces:
Running
Running
File size: 1,960 Bytes
790e088 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import docx
from pypdf import PdfReader
from prompts import summary_system_prompt
from llm import LLM
import logging
logger = logging.getLogger("summary")
logging.basicConfig(
format="%(asctime)s %(levelname)-8s %(message)s",
level=logging.INFO,
datefmt="%Y-%m-%d %H:%M:%S",
)
def getTextFromDocx(filename):
doc = docx.Document(filename)
fullText = []
for para in doc.paragraphs:
fullText.append(para.text)
return '\n'.join(fullText)
def getTextFromPDF(filename):
reader = PdfReader(filename)
text = ""
for page in reader.pages:
text += page.extract_text() + "\n"
return text
def getTextFromFile(filename):
content = ""
if (filename.lower().endswith(".pdf")):
logger.info(f"pdf detected")
content = getTextFromPDF(filename)
elif (filename.lower().endswith(".docx")):
logger.info(f"docx detected")
content = getTextFromDocx(filename)
elif (filename.lower().endswith(".txt")):
logger.info(f"pdf detected")
with open(filename, 'r', encoding='utf-8') as f:
content = f.read()
return content
# Функция для обработки запроса к LLM
def file_summary(file):
logger.info(f"Start summarization")
# Чтение содержимого файлов
if file is not None:
file_content = getTextFromFile(file.name)
else:
logger.warning(f"Invalid input file")
file_content = ""
# Формирование сообщения для LLM
messages = [
{"role": "system", "content": summary_system_prompt},
{"role": "user", "content": f"Задание: {file_content}"}
]
llm = LLM('mistral')
# Получение ответа от LLM
llm_response = llm.chat(messages).choices[0].message.content
logger.info(f"Finish summarization")
return llm_response |