infinitymatter commited on
Commit
45a01dd
·
verified ·
1 Parent(s): 6fc9ee5

Create generate_schema.py

Browse files
Files changed (1) hide show
  1. generate_schema.py +45 -0
generate_schema.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import json
3
+ import os
4
+ import os
5
+ from dotenv import load_dotenv
6
+ import streamlit as st
7
+
8
+ API_KEY = st.secrets["hf_token"]
9
+
10
+
11
+
12
+ def generate_schema(user_prompt):
13
+ """ Generates a synthetic dataset schema using Hugging Face API. """
14
+
15
+ system_prompt = """
16
+ You are an expert data scientist designing synthetic datasets.
17
+ For any given dataset description, generate:
18
+ - Column names
19
+ - Data types (string, int, float, date)
20
+ - Approximate row count
21
+
22
+ Output in **pure JSON** format like:
23
+ {
24
+ "columns": ["PatientID", "Age", "Gender", "Diagnosis"],
25
+ "types": ["int", "int", "string", "string"],
26
+ "size": 500
27
+ }
28
+ """
29
+
30
+ payload = {
31
+ "inputs": system_prompt + "\n\nUser request: " + user_prompt,
32
+ "options": {"wait_for_model": True}
33
+ }
34
+
35
+ response = requests.post(HF_MODEL_URL, headers={"Authorization": f"Bearer {API_KEY}"}, json=payload)
36
+
37
+ if response.status_code == 200:
38
+ try:
39
+ output = response.json()[0]['generated_text']
40
+ schema = json.loads(output.strip()) # Convert to JSON
41
+ return schema
42
+ except json.JSONDecodeError:
43
+ return {"error": "Invalid JSON output from model. Try again."}
44
+ else:
45
+ return {"error": f"API request failed. Status Code: {response.status_code}"}