Spaces:

chbsaikiran
/

Phi3-VLM-On-Cifar10

Runtime error

Phi3-VLM-On-Cifar10 / extract_answers.py

Intial Commit

5e37be9 19 days ago

1.56 kB

	import os
	import re
	import glob

	def extract_assistant_answers(input_file):
	"""Extract the text after 'Assistant:' from the input file."""
	with open(input_file, 'r', encoding='utf-8') as f:
	content = f.read()

	# Split content by "Assistant:" to get all sections after it
	sections = content.split("Assistant:")

	# Process each section to get clean answers
	answers = []
	for section in sections[1:]: # Skip the first split as it's before first "Assistant:"
	# Get text up to next "Q" or "User:" or end of string
	answer = section.split("Q")[0].split("User:")[0].strip()
	if answer:
	answers.append(answer)

	return answers

	def process_all_files():
	"""Process all image_*.txt files in the qa_outputs directory."""
	# Get all image_*.txt files
	input_files = glob.glob("qa_outputs/image_*.txt")

	for input_file in input_files:
	# Extract the base name without extension
	base_name = os.path.splitext(input_file)[0]
	output_file = f"{base_name}_extr.txt"

	# Extract answers
	answers = extract_assistant_answers(input_file)

	# Write answers to the output file
	with open(output_file, 'w', encoding='utf-8') as f:
	for i, answer in enumerate(answers, 1):
	f.write(f"{answer}\n")

	print(f"Processed {input_file} -> {output_file}")

	if __name__ == "__main__":
	process_all_files()
	print("Extraction complete! Check the files with '_extr' suffix.")