Phi3-VLM-On-Cifar10 / extract_answers.py
chbsaikiran's picture
Intial Commit
5e37be9
import os
import re
import glob
def extract_assistant_answers(input_file):
"""Extract the text after 'Assistant:' from the input file."""
with open(input_file, 'r', encoding='utf-8') as f:
content = f.read()
# Split content by "Assistant:" to get all sections after it
sections = content.split("Assistant:")
# Process each section to get clean answers
answers = []
for section in sections[1:]: # Skip the first split as it's before first "Assistant:"
# Get text up to next "Q" or "User:" or end of string
answer = section.split("Q")[0].split("User:")[0].strip()
if answer:
answers.append(answer)
return answers
def process_all_files():
"""Process all image_*.txt files in the qa_outputs directory."""
# Get all image_*.txt files
input_files = glob.glob("qa_outputs/image_*.txt")
for input_file in input_files:
# Extract the base name without extension
base_name = os.path.splitext(input_file)[0]
output_file = f"{base_name}_extr.txt"
# Extract answers
answers = extract_assistant_answers(input_file)
# Write answers to the output file
with open(output_file, 'w', encoding='utf-8') as f:
for i, answer in enumerate(answers, 1):
f.write(f"{answer}\n")
print(f"Processed {input_file} -> {output_file}")
if __name__ == "__main__":
process_all_files()
print("Extraction complete! Check the files with '_extr' suffix.")