abubasith86 commited on
Commit
fc8b17b
Β·
verified Β·
1 Parent(s): c973974

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +47 -72
app.py CHANGED
@@ -8,60 +8,57 @@ st.title("πŸ“š JSONL Dataset Editor")
8
 
9
  TMP_DIR = "temp"
10
  TMP_FILE = os.path.join(TMP_DIR, "session_dataset.jsonl")
11
-
12
- # --- Setup temp directory ---
13
  os.makedirs(TMP_DIR, exist_ok=True)
14
 
15
- # --- Reset update flag on rerun ---
16
- if st.session_state.get("updated"):
17
- st.session_state.updated = False
 
 
 
18
 
19
- # --- Load session data ---
20
  if "data" not in st.session_state:
21
- if os.path.exists(TMP_FILE):
22
- with open(TMP_FILE, "r", encoding="utf-8") as f:
23
- st.session_state.data = [json.loads(line) for line in f]
24
- else:
25
- st.session_state.data = []
26
-
27
- # Load all unique fields
28
- def get_all_fields(data):
29
- all_keys = set()
30
- for record in data:
31
- all_keys.update(record.keys())
32
- return sorted(all_keys)
33
-
34
  st.session_state.all_fields = get_all_fields(st.session_state.data)
35
  st.session_state.prev_data = st.session_state.data.copy()
36
 
37
- # --- Upload JSONL File ---
38
  uploaded_file = st.file_uploader("Upload a JSONL file", type=["jsonl"])
39
  if uploaded_file:
40
  content = uploaded_file.read().decode("utf-8")
41
  st.session_state.data = [json.loads(line) for line in content.strip().splitlines()]
42
- st.session_state.all_fields = sorted(set().union(*(record.keys() for record in st.session_state.data)))
43
  st.session_state.prev_data = st.session_state.data.copy()
44
-
45
  with open(TMP_FILE, "w", encoding="utf-8") as f:
46
  for item in st.session_state.data:
47
  f.write(json.dumps(item, ensure_ascii=False) + "\n")
 
48
 
49
- st.success(f"Loaded {len(st.session_state.data)} records.")
50
-
51
- # --- Safe fallback fields if no data yet ---
52
- if not st.session_state.data and not st.session_state.all_fields:
53
  st.session_state.all_fields = ["context", "question", "answer"]
54
 
55
- # --- Edit Existing Records ---
56
  st.markdown("### ✏️ Edit Records")
57
-
58
  df = pd.DataFrame(st.session_state.data)
59
  df = df.reindex(columns=st.session_state.all_fields)
60
 
 
61
  for field in st.session_state.all_fields:
62
  if field.lower() in ["context", "question", "answer"]:
63
  df[field] = df[field].astype(str)
64
 
 
65
  column_configs = {
66
  field: (
67
  st.column_config.TextColumn(label=field, width="large")
@@ -71,25 +68,23 @@ column_configs = {
71
  for field in st.session_state.all_fields
72
  }
73
 
74
- if not st.session_state.get("updated"):
75
- edited_df = st.data_editor(
76
- df,
77
- use_container_width=True,
78
- num_rows="dynamic",
79
- column_config=column_configs,
80
- key="editable_table",
81
- )
82
-
83
- new_data = edited_df.fillna("").to_dict(orient="records")
84
- if new_data != st.session_state.prev_data:
85
- st.session_state.data = new_data
86
- st.session_state.prev_data = new_data.copy()
87
-
88
- with open(TMP_FILE, "w", encoding="utf-8") as f:
89
- for item in st.session_state.data:
90
- f.write(json.dumps(item, ensure_ascii=False) + "\n")
91
 
92
- st.toast("βœ… Changes auto-saved!", icon="πŸ’Ύ")
 
 
 
 
 
 
 
 
93
 
94
  # --- Add New Entry ---
95
  st.markdown("### βž• Add New Entry")
@@ -101,12 +96,9 @@ with st.form("new_entry_form"):
101
  if submitted:
102
  st.session_state.data.append(new_record)
103
  st.session_state.prev_data = st.session_state.data.copy()
104
- st.session_state.updated = True
105
-
106
  with open(TMP_FILE, "w", encoding="utf-8") as f:
107
  for item in st.session_state.data:
108
  f.write(json.dumps(item, ensure_ascii=False) + "\n")
109
-
110
  st.success("βœ… New entry added!")
111
  st.rerun()
112
 
@@ -116,46 +108,30 @@ with st.expander("βž• Add New Field"):
116
  if st.button("Add Field"):
117
  if new_field and new_field not in st.session_state.all_fields:
118
  st.session_state.all_fields.append(new_field)
119
- st.success(f"βœ… Field '{new_field}' added!")
120
  st.rerun()
121
 
122
  # --- Export Section ---
123
  st.markdown("### πŸ“€ Export Dataset")
124
- export_path = st.text_input(
125
- "Custom save path (e.g., ./exports/my_dataset.jsonl)",
126
- value="./exports/exported_dataset.jsonl",
127
- )
128
 
129
  col1, col2, col3 = st.columns(3)
130
 
131
- # Export to path and download
132
  with col1:
133
  if st.button("πŸ“ Export JSONL"):
134
  os.makedirs(os.path.dirname(export_path), exist_ok=True)
135
  with open(export_path, "w", encoding="utf-8") as f:
136
  for row in st.session_state.data:
137
  f.write(json.dumps(row, ensure_ascii=False) + "\n")
138
-
139
- st.success(f"βœ… Dataset saved to {export_path}")
140
-
141
  with open(export_path, "r", encoding="utf-8") as f:
142
  content = f.read()
143
-
144
- st.download_button(
145
- "⬇️ Download JSONL",
146
- content,
147
- file_name=os.path.basename(export_path),
148
- mime="application/json",
149
- )
150
-
151
- # Clear session + temp
152
  if os.path.exists(TMP_FILE):
153
  os.remove(TMP_FILE)
154
  st.session_state.clear()
155
- st.success("🧹 Temporary session cleared.")
156
  st.rerun()
157
 
158
- # Temp file download
159
  with col2:
160
  if os.path.exists(TMP_FILE):
161
  with open(TMP_FILE, "r", encoding="utf-8") as f:
@@ -167,13 +143,12 @@ with col2:
167
  mime="application/json",
168
  )
169
  else:
170
- st.warning("⚠️ No temp file found to download.")
171
 
172
- # Clear session button
173
  with col3:
174
  if st.button("πŸ—‘οΈ Clear Session"):
175
  if os.path.exists(TMP_FILE):
176
  os.remove(TMP_FILE)
177
  st.session_state.clear()
178
- st.success("🧹 Session and temp cleared!")
179
  st.rerun()
 
8
 
9
  TMP_DIR = "temp"
10
  TMP_FILE = os.path.join(TMP_DIR, "session_dataset.jsonl")
 
 
11
  os.makedirs(TMP_DIR, exist_ok=True)
12
 
13
+ # --- Helpers ---
14
+ def get_all_fields(data):
15
+ all_keys = set()
16
+ for record in data:
17
+ all_keys.update(record.keys())
18
+ return sorted(all_keys)
19
 
20
+ # --- Session Initialization ---
21
  if "data" not in st.session_state:
22
+ st.session_state.data = []
23
+ if "all_fields" not in st.session_state:
24
+ st.session_state.all_fields = []
25
+ if "prev_data" not in st.session_state:
26
+ st.session_state.prev_data = []
27
+
28
+ # --- Load from temp if needed ---
29
+ if not st.session_state.data and os.path.exists(TMP_FILE):
30
+ with open(TMP_FILE, "r", encoding="utf-8") as f:
31
+ st.session_state.data = [json.loads(line) for line in f]
 
 
 
32
  st.session_state.all_fields = get_all_fields(st.session_state.data)
33
  st.session_state.prev_data = st.session_state.data.copy()
34
 
35
+ # --- Upload JSONL ---
36
  uploaded_file = st.file_uploader("Upload a JSONL file", type=["jsonl"])
37
  if uploaded_file:
38
  content = uploaded_file.read().decode("utf-8")
39
  st.session_state.data = [json.loads(line) for line in content.strip().splitlines()]
40
+ st.session_state.all_fields = get_all_fields(st.session_state.data)
41
  st.session_state.prev_data = st.session_state.data.copy()
 
42
  with open(TMP_FILE, "w", encoding="utf-8") as f:
43
  for item in st.session_state.data:
44
  f.write(json.dumps(item, ensure_ascii=False) + "\n")
45
+ st.rerun()
46
 
47
+ # --- Fallback fields if none ---
48
+ if not st.session_state.all_fields:
 
 
49
  st.session_state.all_fields = ["context", "question", "answer"]
50
 
51
+ # --- Edit Records ---
52
  st.markdown("### ✏️ Edit Records")
 
53
  df = pd.DataFrame(st.session_state.data)
54
  df = df.reindex(columns=st.session_state.all_fields)
55
 
56
+ # Ensure fields are strings for editor
57
  for field in st.session_state.all_fields:
58
  if field.lower() in ["context", "question", "answer"]:
59
  df[field] = df[field].astype(str)
60
 
61
+ # TextAreas for longer fields
62
  column_configs = {
63
  field: (
64
  st.column_config.TextColumn(label=field, width="large")
 
68
  for field in st.session_state.all_fields
69
  }
70
 
71
+ edited_df = st.data_editor(
72
+ df,
73
+ use_container_width=True,
74
+ num_rows="dynamic",
75
+ column_config=column_configs,
76
+ key="editable_table",
77
+ )
 
 
 
 
 
 
 
 
 
 
78
 
79
+ # Save if changed
80
+ new_data = edited_df.fillna("").to_dict(orient="records")
81
+ if new_data != st.session_state.prev_data:
82
+ st.session_state.data = new_data
83
+ st.session_state.prev_data = new_data.copy()
84
+ with open(TMP_FILE, "w", encoding="utf-8") as f:
85
+ for item in new_data:
86
+ f.write(json.dumps(item, ensure_ascii=False) + "\n")
87
+ st.toast("βœ… Auto-saved!", icon="πŸ’Ύ")
88
 
89
  # --- Add New Entry ---
90
  st.markdown("### βž• Add New Entry")
 
96
  if submitted:
97
  st.session_state.data.append(new_record)
98
  st.session_state.prev_data = st.session_state.data.copy()
 
 
99
  with open(TMP_FILE, "w", encoding="utf-8") as f:
100
  for item in st.session_state.data:
101
  f.write(json.dumps(item, ensure_ascii=False) + "\n")
 
102
  st.success("βœ… New entry added!")
103
  st.rerun()
104
 
 
108
  if st.button("Add Field"):
109
  if new_field and new_field not in st.session_state.all_fields:
110
  st.session_state.all_fields.append(new_field)
 
111
  st.rerun()
112
 
113
  # --- Export Section ---
114
  st.markdown("### πŸ“€ Export Dataset")
115
+ export_path = st.text_input("Save path", value="./exports/exported_dataset.jsonl")
 
 
 
116
 
117
  col1, col2, col3 = st.columns(3)
118
 
119
+ # Export
120
  with col1:
121
  if st.button("πŸ“ Export JSONL"):
122
  os.makedirs(os.path.dirname(export_path), exist_ok=True)
123
  with open(export_path, "w", encoding="utf-8") as f:
124
  for row in st.session_state.data:
125
  f.write(json.dumps(row, ensure_ascii=False) + "\n")
 
 
 
126
  with open(export_path, "r", encoding="utf-8") as f:
127
  content = f.read()
128
+ st.download_button("⬇️ Download JSONL", content, file_name=os.path.basename(export_path))
 
 
 
 
 
 
 
 
129
  if os.path.exists(TMP_FILE):
130
  os.remove(TMP_FILE)
131
  st.session_state.clear()
 
132
  st.rerun()
133
 
134
+ # Download temp
135
  with col2:
136
  if os.path.exists(TMP_FILE):
137
  with open(TMP_FILE, "r", encoding="utf-8") as f:
 
143
  mime="application/json",
144
  )
145
  else:
146
+ st.warning("⚠️ No temp file found.")
147
 
148
+ # Clear session
149
  with col3:
150
  if st.button("πŸ—‘οΈ Clear Session"):
151
  if os.path.exists(TMP_FILE):
152
  os.remove(TMP_FILE)
153
  st.session_state.clear()
 
154
  st.rerun()