Spaces:

aleksandrrnt
/

hakaton

Running

App Files Files Community

hakaton / summary.py

aleksandrrnt

Upload 11 files

790e088 verified about 2 months ago

raw

history blame contribute delete

1.96 kB

	import docx
	from pypdf import PdfReader
	from prompts import summary_system_prompt
	from llm import LLM
	import logging


	logger = logging.getLogger("summary")
	logging.basicConfig(
	format="%(asctime)s %(levelname)-8s %(message)s",
	level=logging.INFO,
	datefmt="%Y-%m-%d %H:%M:%S",
	)

	def getTextFromDocx(filename):
	doc = docx.Document(filename)
	fullText = []
	for para in doc.paragraphs:
	fullText.append(para.text)
	return '\n'.join(fullText)

	def getTextFromPDF(filename):
	reader = PdfReader(filename)
	text = ""
	for page in reader.pages:
	text += page.extract_text() + "\n"
	return text


	def getTextFromFile(filename):
	content = ""
	if (filename.lower().endswith(".pdf")):
	logger.info(f"pdf detected")
	content = getTextFromPDF(filename)
	elif (filename.lower().endswith(".docx")):
	logger.info(f"docx detected")
	content = getTextFromDocx(filename)
	elif (filename.lower().endswith(".txt")):
	logger.info(f"pdf detected")
	with open(filename, 'r', encoding='utf-8') as f:
	content = f.read()
	return content

	# Функция для обработки запроса к LLM
	def file_summary(file):
	logger.info(f"Start summarization")
	# Чтение содержимого файлов
	if file is not None:
	file_content = getTextFromFile(file.name)
	else:
	logger.warning(f"Invalid input file")
	file_content = ""

	# Формирование сообщения для LLM
	messages = [
	{"role": "system", "content": summary_system_prompt},
	{"role": "user", "content": f"Задание: {file_content}"}
	]

	llm = LLM('mistral')

	# Получение ответа от LLM
	llm_response = llm.chat(messages).choices[0].message.content
	logger.info(f"Finish summarization")
	return llm_response