Dataset / clean_openml.py
vansh9878's picture
files added
825e978
import os
import csv
def clean(user_prompt):
input_directory = os.path.join("input_folder", user_prompt)
output_directory = "downloads/"+user_prompt
# Create the output directory if it doesn't exist
os.makedirs(output_directory, exist_ok=True)
# Loop through all files in the user-specified input directory
for filename in os.listdir(input_directory):
file_path = os.path.join(input_directory, filename)
# Skip directories or hidden files
if os.path.isdir(file_path) or filename.startswith("."):
continue
# Output file path (.csv extension added)
output_file = os.path.join(output_directory, filename + ".csv")
with open(file_path, "r", encoding="utf-8", errors="ignore") as file:
lines = file.readlines()
headers = []
data_rows = []
data_started = False
for line in lines:
line = line.strip()
if line.startswith("@ATTRIBUTE"):
parts = line.split()
if len(parts) >= 2:
headers.append(parts[1])
elif line.startswith("@DATA"):
data_started = True
elif data_started and line:
data_rows.append(line.split(","))
# Write to CSV
with open(output_file, "w", newline="") as csvfile:
writer = csv.writer(csvfile)
writer.writerow(headers)
writer.writerows(data_rows)
print(f"βœ… CSV file created for: {filename} β†’ {output_file}")