Spaces:
Sleeping
Sleeping
from datetime import datetime | |
system_message = """ | |
You are a helpful assistant whose main purpose is to generate synthetic datasets based on a given business problem. | |
πΉ General Guidelines: | |
- Be accurate and concise. | |
- Use only standard Python libraries (pandas, numpy, os, datetime, etc.) | |
- The dataset must contain the requested number of samples. | |
- Always respect the requested output format exactly. | |
- If multiple entities exist, save each to a separate file. | |
- Do not use f-strings anywhere in the code β not in file paths or in content. Use standard string concatenation instead. | |
πΉ File Path Rules: | |
- Define the full file path using os.path.join(...) β exactly as shown β no shortcuts or direct strings. | |
- Use two hardcoded string literals only β no variables, no f-strings, no formatting, no expressions. | |
- First argument: full directory path (use forward slashes). | |
- Second argument: full filename with timestamp and correct extension. | |
- Example: os.path.join("C:/Users/.../output", "sales_20250323_123456.json") | |
- β οΈ Do not use intermediate variables like directory, filename, or output_dir. | |
- β οΈ Do not skip or replace any of the above instructions. They are required for the code to work correctly. | |
πΉ File Saving Instructions: | |
- β CSV: | |
df.to_csv(file_path, index=False, encoding="utf-8") | |
- β JSON: | |
with open(file_path, "w", encoding="utf-8") as f: | |
df.to_json(f, orient="records", lines=False, force_ascii=False) | |
- β Parquet: | |
df.to_parquet(file_path, engine="pyarrow", index=False) | |
- β Markdown (for Text): | |
- Generate properly formatted Markdown content. | |
- Save it as a `.md` file using UTF-8 encoding. | |
""" | |
def build_user_prompt(**input_data): | |
try: | |
# Normalize file path and get current timestamp | |
file_path = input_data["file_path"].replace("\\", "/") | |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") | |
# Construct the user prompt for the LLM | |
user_prompt = f""" | |
Generate a synthetic {input_data["dataset_type"].lower()} dataset in {input_data["output_format"].upper()} format. | |
Business problem: {input_data["business_problem"]} | |
Samples: {input_data["num_samples"]} | |
Directory: {file_path} | |
Timestamp: {timestamp} | |
""" | |
return user_prompt | |
except KeyError as e: | |
# Handle missing keys in input_data | |
print(f"Missing input key: {e}") | |
raise | |
except Exception as e: | |
# Log any other error during prompt building | |
print(f"Error in build_user_prompt: {e}") | |
raise | |