Spaces:
Running
Running
File size: 9,275 Bytes
b487c0e ac812b9 b487c0e ac812b9 b487c0e 3b586d4 eceb8f1 3b586d4 b487c0e 3b586d4 b487c0e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 |
import streamlit as st
# import wikipedia
from streamlit_lottie import st_lottie
import regex as re
from streamlit_js_eval import streamlit_js_eval
# from common.utils import *
from data_collector import *
# from langchain_community.llms import Ollama
# from langchain_community.llms import Ollama
import pandas as pd
import json
st.set_page_config(layout="wide")
screen_width = streamlit_js_eval(label="screen.width",js_expressions='screen.width')
screen_height = streamlit_js_eval(label="screen.height",js_expressions='screen.height')
condition_capture = st.session_state
if 'schema' not in condition_capture:
condition_capture['schema'] = {}
if 'prompt' not in condition_capture:
condition_capture['prompt'] = ""
if "count" not in condition_capture:
condition_capture['count'] = 0
if "prev_schema" not in condition_capture:
condition_capture['prev_schema'] = {}
if "textual_value" not in condition_capture:
condition_capture['textual_value'] = {}
textual_value=None
schema=condition_capture['schema']
column1,column2 = st.columns(2)
with column2:
if len(condition_capture['schema'])!=0 and len(condition_capture['textual_value'])==0:
# condition_capture['prev_schema'] = condition_capture['schema']
condition_capture['textual_value']=relevent_value(str(condition_capture['schema']).lower(),50)
if len(condition_capture['schema'])!=0:
html_page = condition_capture['textual_value'][1]
textual_value = condition_capture['textual_value'][0]
st.write("<br>",unsafe_allow_html=True)
with st.container(border=True,height=int(screen_height/2.3)):
st.header("Wikipedia insights")
updated_schema = st.button("Start processing")
selector=st.empty()
start_page=my_bar = st.progress(0, text="Press start processing .")
write =st.empty()
# )
with column1:
if str(schema)!=str({}):
tabs = st.tabs(["Schema","Data Generation"])
with tabs[0]:
if str(schema)!=str({}):
schema_column1,schema_column2 = st.columns(2)
with schema_column1:
edited_df = st.data_editor([str(i) for index,i in enumerate(schema)],hide_index=True,use_container_width=True,num_rows='dynamic',height=int(screen_height/3))
with schema_column2:
number = st.number_input("Number of rows",min_value=1,max_value=1000,value=10)
if number!=condition_capture['count'] and updated_schema:
condition_capture['count'] = number
with open("/home/user/app/animation/edit_file.json") as animate:
url_json=json.load(animate)
st_lottie(url_json,height = int(screen_height/3))
with tabs[1]:
with open("/home/user/app/animation/no data animation.json") as animate:
url_json=json.load(animate)
dataframe=st.empty()
if condition_capture['count']==0:
st_lottie(url_json,height = int(screen_height/3))
else:
smart_append=[]
if condition_capture['prev_schema'] != condition_capture['schema']:
condition_capture['prev_schema'] = condition_capture['schema']
condition_capture['current_append']={}
for text_indexing,store in enumerate(actual_value(textual_value,schema)):
dummy_value =dictionary_formatting(store)
for keys in dummy_value:
while len(dummy_value[keys])>=2:
dummy_value[keys].pop(0)
dummy_value = dictionary_formatting(dummy_value)
if dummy_value != None:
smart_append.append(dummy_value)
print(dummy_value)
for keys in dummy_value:
if keys not in condition_capture['current_append']:
condition_capture['current_append'][str(keys)]=[]
condition_capture['current_append'][str(keys)].append(str([i for i in dummy_value[keys]]))
dataframe.dataframe(condition_capture['current_append'])
if len(condition_capture['current_append'][[i for i in condition_capture['current_append']][-1]])>=condition_capture['count']:
start_page.progress(100, text="Operation completed.")
break
else:
start_page.progress((text_indexing + 1)*(100//condition_capture['count']), text="Operation in progress. Please wait.")
write.write(html_page[[i for i in html_page][text_indexing]],unsafe_allow_html=True)
# print(dummy_value)
# if smart_check(dummy_value)!=True:
# smart_value=verification(dummy_value)
# if statement(condition_capture['schema'],smart_value):
# st.dataframe(smart_value)
condition_capture['current_append']={}
if len(smart_append)==0:
ranger=len(condition_capture['current_append'][[i for i in condition_capture['current_append']][0]])
for indexing in range(ranger):
working_dict = {}
for j in condition_capture['current_append']:
working_dict[j]=condition_capture['current_append'][j][indexing][0]
smart_append.append(working_dict)
smart_movement = sorting(smart_append)
for keys in smart_movement:
value=eval(keys)
for keys in value:
if keys not in condition_capture['current_append']:
condition_capture['current_append'][str(keys)]=[]
condition_capture['current_append'][str(keys)].append([str(i) for i in value[keys]])
dataframe.dataframe(condition_capture['current_append'])
for indexing,j in enumerate(smart_movement):
try:
# Convert string to dictionary
dummy_value = eval(j)
# Process dictionary values
for key in dummy_value:
while len(dummy_value[key]) >= 2:
dummy_value[key].pop(0)
# Format dictionary
formatted = dictionary_formatting(dummy_value)
print(formatted)
# Verify and store result
verification_result = verification(formatted) if formatted else None
for j in verification_result:
if j in condition_capture['current_append']:
condition_capture['current_append'][j][indexing]=[str(i) for i in verification_result[j]]
dataframe.dataframe(condition_capture['current_append'])
except:
pass
if len(condition_capture['schema'])==0:
st.markdown(
"""
<div style="text-align: center; font-size: 50px; font-weight: bold; margin-top: 20px;">
Generate high-quality, realistic data effortlessly.
</div>
""",
unsafe_allow_html=True
)
newline_column1,newline_column2 = st.columns(2)
with newline_column2:
with open("/home/user/app/animation/Data_rocket.json") as animate:
url_json=json.load(animate)
st_lottie(url_json,height = int(screen_height/4))
with newline_column1:
# Instructions for the user
st.markdown("""
### How it Works:
- **Step 1:** Enter a prompt to generate data.
- **step 2:** Select the number of rows you want to generate and press start processing.
- **Step 3:** AI will use Wikipedia insights to create realistic outputs.
- **Key Feature:** Less hallucination, more accurate data.
""")
prompt = st.text_input(label="Please use prompt to generate data",value=condition_capture['prompt'])
if prompt != str(condition_capture['prompt']):
condition_capture['prompt'] = prompt
schema = schema_generator(prompt)
condition_capture['schema'] = schema
condition_capture['current_append']={}
st.rerun() |