abubasith86 commited on
Commit
f3a8b9c
·
verified ·
1 Parent(s): 77d363f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -50
app.py CHANGED
@@ -11,47 +11,73 @@ TMP_DIR = "temp"
11
  TMP_FILE = os.path.join(TMP_DIR, "session_dataset.jsonl")
12
  os.makedirs(TMP_DIR, exist_ok=True)
13
 
 
14
  # --- Helpers ---
15
  def get_all_fields(data):
16
- all_keys = set()
17
- for record in data:
18
- all_keys.update(record.keys())
19
- return sorted(all_keys)
 
20
 
21
  def save_to_file():
22
  with open(TMP_FILE, "w", encoding="utf-8") as f:
23
- for item in st.session_state.data:
24
- f.write(json.dumps(item, ensure_ascii=False) + "\n")
 
25
 
26
- # --- Initialize state ---
27
  if "data" not in st.session_state:
28
  st.session_state.data = []
29
  if "all_fields" not in st.session_state:
30
  st.session_state.all_fields = []
31
- if "edit_key" not in st.session_state:
32
- st.session_state.edit_key = str(uuid4())
 
 
33
 
34
- # --- Load from temp if file exists ---
35
  if os.path.exists(TMP_FILE) and not st.session_state.data:
36
  with open(TMP_FILE, "r", encoding="utf-8") as f:
37
  st.session_state.data = [json.loads(line) for line in f]
38
  st.session_state.all_fields = get_all_fields(st.session_state.data)
39
 
40
- # --- Upload JSONL ---
41
- uploaded_file = st.file_uploader("Upload a JSONL file", type=["jsonl"])
42
- if uploaded_file:
43
- content = uploaded_file.read().decode("utf-8")
44
- st.session_state.data = [json.loads(line) for line in content.strip().splitlines()]
45
  st.session_state.all_fields = get_all_fields(st.session_state.data)
46
  save_to_file()
47
- st.session_state.edit_key = str(uuid4())
48
  st.rerun()
49
 
50
- # --- Ensure default fields if none ---
51
  if not st.session_state.all_fields:
52
  st.session_state.all_fields = ["context", "question", "answer"]
53
 
54
- # --- Edit Section ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
  st.markdown("### ✏️ Edit Records")
56
  df = pd.DataFrame(st.session_state.data)
57
  df = df.reindex(columns=st.session_state.all_fields)
@@ -60,23 +86,21 @@ for field in st.session_state.all_fields:
60
  df[field] = df[field].astype(str)
61
 
62
  column_configs = {
63
- field: (
64
- st.column_config.TextColumn(label=field, width="large")
65
- if field.lower() in ["context", "question", "answer"]
66
- else None
67
- )
68
  for field in st.session_state.all_fields
69
  }
70
 
71
  edited_df = st.data_editor(
72
  df,
73
- key=st.session_state.edit_key, # Unique key forces Streamlit to rerender editor
74
  use_container_width=True,
75
  num_rows="dynamic",
76
  column_config=column_configs,
77
  )
78
 
79
- # If data changed, update state and save
80
  if edited_df is not None:
81
  new_data = edited_df.fillna("").to_dict(orient="records")
82
  if new_data != st.session_state.data:
@@ -84,20 +108,6 @@ if edited_df is not None:
84
  save_to_file()
85
  st.toast("✅ Changes auto-saved!", icon="💾")
86
 
87
- # --- Add Entry ---
88
- st.markdown("### ➕ Add New Entry")
89
- with st.form("add_form"):
90
- new_item = {}
91
- for field in st.session_state.all_fields:
92
- new_item[field] = st.text_area(field, key=f"add_{field}")
93
- submitted = st.form_submit_button("Add Entry")
94
- if submitted:
95
- st.session_state.data.append(new_item)
96
- save_to_file()
97
- st.session_state.edit_key = str(uuid4()) # Force editor to refresh
98
- st.success("✅ New entry added!")
99
- st.rerun()
100
-
101
  # --- Add New Field ---
102
  with st.expander("➕ Add New Field"):
103
  new_field = st.text_input("New field name", key="new_field_name")
@@ -106,8 +116,8 @@ with st.expander("➕ Add New Field"):
106
  st.session_state.all_fields.append(new_field)
107
  st.rerun()
108
 
109
- # --- Export Section ---
110
- st.markdown("### 📤 Export Dataset")
111
  export_path = st.text_input("Save path", value="./exports/exported_dataset.jsonl")
112
 
113
  col1, col2, col3 = st.columns(3)
@@ -121,24 +131,20 @@ with col1:
121
  with open(export_path, "r", encoding="utf-8") as f:
122
  content = f.read()
123
  st.download_button("⬇️ Download JSONL", content, file_name=os.path.basename(export_path))
 
 
124
  if os.path.exists(TMP_FILE):
125
  os.remove(TMP_FILE)
126
- st.session_state.clear()
127
  st.rerun()
128
 
129
  with col2:
130
  if os.path.exists(TMP_FILE):
131
  with open(TMP_FILE, "r", encoding="utf-8") as f:
132
- tmp_content = f.read()
133
- st.download_button(
134
- "⬇️ Download Temp File",
135
- tmp_content,
136
- file_name="session_dataset.jsonl",
137
- mime="application/json",
138
- )
139
 
140
  with col3:
141
- if st.button("🗑️ Clear Session"):
142
  if os.path.exists(TMP_FILE):
143
  os.remove(TMP_FILE)
144
  st.session_state.clear()
 
11
  TMP_FILE = os.path.join(TMP_DIR, "session_dataset.jsonl")
12
  os.makedirs(TMP_DIR, exist_ok=True)
13
 
14
+
15
  # --- Helpers ---
16
  def get_all_fields(data):
17
+ keys = set()
18
+ for d in data:
19
+ keys.update(d.keys())
20
+ return sorted(list(keys))
21
+
22
 
23
  def save_to_file():
24
  with open(TMP_FILE, "w", encoding="utf-8") as f:
25
+ for row in st.session_state.data:
26
+ f.write(json.dumps(row, ensure_ascii=False) + "\n")
27
+
28
 
29
+ # --- Session Initialization ---
30
  if "data" not in st.session_state:
31
  st.session_state.data = []
32
  if "all_fields" not in st.session_state:
33
  st.session_state.all_fields = []
34
+ if "editor_key" not in st.session_state:
35
+ st.session_state.editor_key = str(uuid4())
36
+ if "just_added" not in st.session_state:
37
+ st.session_state.just_added = False
38
 
39
+ # --- Load from TMP file ---
40
  if os.path.exists(TMP_FILE) and not st.session_state.data:
41
  with open(TMP_FILE, "r", encoding="utf-8") as f:
42
  st.session_state.data = [json.loads(line) for line in f]
43
  st.session_state.all_fields = get_all_fields(st.session_state.data)
44
 
45
+ # --- File Upload ---
46
+ uploaded = st.file_uploader("Upload JSONL", type=["jsonl"])
47
+ if uploaded:
48
+ lines = uploaded.read().decode("utf-8").splitlines()
49
+ st.session_state.data = [json.loads(l) for l in lines]
50
  st.session_state.all_fields = get_all_fields(st.session_state.data)
51
  save_to_file()
52
+ st.session_state.editor_key = str(uuid4())
53
  st.rerun()
54
 
55
+ # --- Add New Fields fallback ---
56
  if not st.session_state.all_fields:
57
  st.session_state.all_fields = ["context", "question", "answer"]
58
 
59
+ # --- Add Entry Form ---
60
+ with st.form("add_form"):
61
+ st.markdown("### ➕ Add New Entry")
62
+ new_entry = {}
63
+ for field in st.session_state.all_fields:
64
+ new_entry[field] = st.text_area(field, key=f"add_{field}")
65
+
66
+ submit_add = st.form_submit_button("Add Entry")
67
+
68
+ if submit_add:
69
+ st.session_state.data.append(new_entry)
70
+ save_to_file()
71
+ st.session_state.editor_key = str(uuid4())
72
+ st.session_state.just_added = True
73
+ st.rerun()
74
+
75
+ # --- Wait for rerun before rendering editor ---
76
+ if st.session_state.just_added:
77
+ st.session_state.just_added = False
78
+ st.rerun()
79
+
80
+ # --- Display Editor ---
81
  st.markdown("### ✏️ Edit Records")
82
  df = pd.DataFrame(st.session_state.data)
83
  df = df.reindex(columns=st.session_state.all_fields)
 
86
  df[field] = df[field].astype(str)
87
 
88
  column_configs = {
89
+ field: st.column_config.TextColumn(field, width="large")
90
+ if field.lower() in ["context", "question", "answer"]
91
+ else None
 
 
92
  for field in st.session_state.all_fields
93
  }
94
 
95
  edited_df = st.data_editor(
96
  df,
97
+ key=st.session_state.editor_key,
98
  use_container_width=True,
99
  num_rows="dynamic",
100
  column_config=column_configs,
101
  )
102
 
103
+ # --- Save edits ---
104
  if edited_df is not None:
105
  new_data = edited_df.fillna("").to_dict(orient="records")
106
  if new_data != st.session_state.data:
 
108
  save_to_file()
109
  st.toast("✅ Changes auto-saved!", icon="💾")
110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  # --- Add New Field ---
112
  with st.expander("➕ Add New Field"):
113
  new_field = st.text_input("New field name", key="new_field_name")
 
116
  st.session_state.all_fields.append(new_field)
117
  st.rerun()
118
 
119
+ # --- Export Dataset ---
120
+ st.markdown("### ���� Export")
121
  export_path = st.text_input("Save path", value="./exports/exported_dataset.jsonl")
122
 
123
  col1, col2, col3 = st.columns(3)
 
131
  with open(export_path, "r", encoding="utf-8") as f:
132
  content = f.read()
133
  st.download_button("⬇️ Download JSONL", content, file_name=os.path.basename(export_path))
134
+ st.success("✅ Exported!")
135
+ st.session_state.clear()
136
  if os.path.exists(TMP_FILE):
137
  os.remove(TMP_FILE)
 
138
  st.rerun()
139
 
140
  with col2:
141
  if os.path.exists(TMP_FILE):
142
  with open(TMP_FILE, "r", encoding="utf-8") as f:
143
+ tmp_data = f.read()
144
+ st.download_button("⬇️ Temp File", tmp_data, file_name="session_dataset.jsonl")
 
 
 
 
 
145
 
146
  with col3:
147
+ if st.button("🧹 Clear Session"):
148
  if os.path.exists(TMP_FILE):
149
  os.remove(TMP_FILE)
150
  st.session_state.clear()