DataGen / app.py
warhawkmonk's picture
Update app.py
b487c0e verified
raw
history blame
8 kB
import streamlit as st
# import wikipedia
from streamlit_lottie import st_lottie
import regex as re
from streamlit_js_eval import streamlit_js_eval
# from common.utils import *
from data_collector import *
# from langchain_community.llms import Ollama
# from langchain_community.llms import Ollama
import pandas as pd
import json
st.set_page_config(layout="wide")
screen_width = streamlit_js_eval(label="screen.width",js_expressions='screen.width')
screen_height = streamlit_js_eval(label="screen.height",js_expressions='screen.height')
condition_capture = st.session_state
if 'schema' not in condition_capture:
condition_capture['schema'] = {}
if 'prompt' not in condition_capture:
condition_capture['prompt'] = ""
if "count" not in condition_capture:
condition_capture['count'] = 0
if "prev_schema" not in condition_capture:
condition_capture['prev_schema'] = {}
if "textual_value" not in condition_capture:
condition_capture['textual_value'] = {}
textual_value=None
schema=condition_capture['schema']
column1,column2 = st.columns(2)
with column2:
if len(condition_capture['schema'])!=0 and len(condition_capture['textual_value'])==0:
# condition_capture['prev_schema'] = condition_capture['schema']
condition_capture['textual_value']=relevent_value(str(condition_capture['schema']).lower(),50)
if len(condition_capture['schema'])!=0:
html_page = condition_capture['textual_value'][1]
textual_value = condition_capture['textual_value'][0]
st.write("<br>",unsafe_allow_html=True)
with st.container(border=True,height=int(screen_height/2.3)):
st.header("Wikipedia insights")
updated_schema = st.button("Start processing")
selector=st.empty()
write =st.empty()
start_page= selector.select_slider("Select a range of color wavelength",options=[i for i in html_page],key="start_page")
write.write(html_page[start_page],unsafe_allow_html=True)
# )
with column1:
if str(schema)!=str({}):
tabs = st.tabs(["Schema","Data Generation"])
with tabs[0]:
if str(schema)!=str({}):
schema_column1,schema_column2 = st.columns(2)
with schema_column1:
edited_df = st.data_editor([str(i) for index,i in enumerate(schema)],hide_index=True,use_container_width=True,num_rows='dynamic',height=int(screen_height/3))
with schema_column2:
number = st.number_input("Number of rows",min_value=1,max_value=1000,value=10)
if number!=condition_capture['count'] and updated_schema:
condition_capture['count'] = number
with open("/home/user/app/animation/edit_file.json") as animate:
url_json=json.load(animate)
st_lottie(url_json,height = int(screen_height/3))
with tabs[1]:
with open("/home/user/app/animation/no data animation.json") as animate:
url_json=json.load(animate)
dataframe=st.empty()
if condition_capture['count']==0:
st_lottie(url_json,height = int(screen_height/3))
else:
smart_append=[]
if condition_capture['prev_schema'] != condition_capture['schema']:
condition_capture['prev_schema'] = condition_capture['schema']
condition_capture['current_append']={}
for text_indexing,store in enumerate(actual_value(textual_value,schema)):
dummy_value =dictionary_formatting(store)
for keys in dummy_value:
while len(dummy_value[keys])>=2:
dummy_value[keys].pop(0)
dummy_value = dictionary_formatting(dummy_value)
if dummy_value != None:
smart_append.append(dummy_value)
print(dummy_value)
for keys in dummy_value:
if keys not in condition_capture['current_append']:
condition_capture['current_append'][str(keys)]=[]
condition_capture['current_append'][str(keys)].append(str([i for i in dummy_value[keys]]))
dataframe.dataframe(condition_capture['current_append'])
if len(condition_capture['current_append'][[i for i in condition_capture['current_append']][-1]])>=condition_capture['count']:
break
# print(dummy_value)
# if smart_check(dummy_value)!=True:
# smart_value=verification(dummy_value)
# if statement(condition_capture['schema'],smart_value):
# st.dataframe(smart_value)
condition_capture['current_append']={}
if len(smart_append)==0:
ranger=len(condition_capture['current_append'][[i for i in condition_capture['current_append']][0]])
for indexing in range(ranger):
working_dict = {}
for j in condition_capture['current_append']:
working_dict[j]=condition_capture['current_append'][j][indexing][0]
smart_append.append(working_dict)
smart_movement = sorting(smart_append)
for keys in smart_movement:
value=eval(keys)
for keys in value:
if keys not in condition_capture['current_append']:
condition_capture['current_append'][str(keys)]=[]
condition_capture['current_append'][str(keys)].append([str(i) for i in value[keys]])
dataframe.dataframe(condition_capture['current_append'])
for indexing,j in enumerate(smart_movement):
try:
# Convert string to dictionary
dummy_value = eval(j)
# Process dictionary values
for key in dummy_value:
while len(dummy_value[key]) >= 2:
dummy_value[key].pop(0)
# Format dictionary
formatted = dictionary_formatting(dummy_value)
print(formatted)
# Verify and store result
verification_result = verification(formatted) if formatted else None
for j in verification_result:
if j in condition_capture['current_append']:
condition_capture['current_append'][j][indexing]=[str(i) for i in verification_result[j]]
dataframe.dataframe(condition_capture['current_append'])
except:
pass
prompt = st.text_input(label="Please use prompt to generate data",value=condition_capture['prompt'])
if prompt != str(condition_capture['prompt']):
condition_capture['prompt'] = prompt
schema = schema_generator(prompt)
condition_capture['schema'] = schema
condition_capture['current_append']={}
st.rerun()