File size: 1,279 Bytes
45a01dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import requests
import json
import os
import os
from dotenv import load_dotenv
import streamlit as st

API_KEY = st.secrets["hf_token"]



def generate_schema(user_prompt):
    """ Generates a synthetic dataset schema using Hugging Face API. """

    system_prompt = """
You are an expert data scientist designing synthetic datasets.
For any given dataset description, generate:
- Column names
- Data types (string, int, float, date)
- Approximate row count

Output in **pure JSON** format like:
{
    "columns": ["PatientID", "Age", "Gender", "Diagnosis"],
    "types": ["int", "int", "string", "string"],
    "size": 500
}
"""

    payload = {
        "inputs": system_prompt + "\n\nUser request: " + user_prompt,
        "options": {"wait_for_model": True}
    }

    response = requests.post(HF_MODEL_URL, headers={"Authorization": f"Bearer {API_KEY}"}, json=payload)

    if response.status_code == 200:
        try:
            output = response.json()[0]['generated_text']
            schema = json.loads(output.strip())  # Convert to JSON
            return schema
        except json.JSONDecodeError:
            return {"error": "Invalid JSON output from model. Try again."}
    else:
        return {"error": f"API request failed. Status Code: {response.status_code}"}