Spaces:

nolanzandi
/

virtual-data-analyst

Running

App Files Files Community

virtual-data-analyst / functions /chart_functions.py

nolanzandi

Updates to convince LLM to display visualizations more often

1862a57 verified 9 days ago

raw

history blame

13.8 kB

	from typing import List
	from typing import Dict
	import plotly.io as pio
	import plotly.express as px
	import pandas as pd
	from utils import TEMP_DIR
	import os
	import ast
	import json
	from dotenv import load_dotenv

	load_dotenv()

	root_url = os.getenv("ROOT_URL")

	def llm_chart_data_scrub(data, layout):
	#Processing data to account for variation from LLM
	data_list = []
	layout_dict = {}

	if isinstance(data, list):
	data_list = data
	else:
	data_list.append(data)

	false_replace = [':false', ': false']
	false_value = ':False'
	true_replace = [':true', ': true']
	true_value = ':True'

	data_dict = {}
	for data_obj in data_list:
	if isinstance(data_obj, str):
	data_obj = data_obj.replace("\n", "")
	for replace in false_replace:
	data_obj = data_obj.replace(replace, false_value)
	for replace in true_replace:
	data_obj = data_obj.replace(replace, true_value)
	print(data_obj)
	data_dict = ast.literal_eval(data_obj)
	else:
	data_dict = data_obj

	if layout and isinstance(layout, list):
	layout_obj = layout[0]
	else:
	layout_obj = layout

	if layout_obj and isinstance(layout_obj, str):
	for replace in false_replace:
	layout_obj = layout_obj.replace(replace, false_value)
	for replace in true_replace:
	layout_obj = layout_obj.replace(replace, true_value)
	print(layout_obj)
	layout_dict = ast.literal_eval(layout_obj)
	else:
	layout_dict = layout_obj

	return data_dict, layout_dict

	def scatter_chart_fig(df, x_column: List[str], y_column: str, category: str="", trendline: str="",
	trendline_options: List[dict]=[{}], marginal_x: str="", marginal_y: str="",
	size: str=""):

	function_args = {"data_frame":df, "x":x_column, "y":y_column}

	if category:
	function_args["color"] = category
	if trendline:
	function_args["trendline"] = trendline
	if marginal_x:
	function_args["marginal_x"] = marginal_x
	if marginal_y:
	function_args["marginal_y"] = marginal_y
	if size:
	df.loc[df[size] < 0, size] = 0
	function_args["size"] = size
	if trendline_options:
	trendline_options_dict = {}
	if trendline_options and isinstance(trendline_options, list):
	trendline_options_obj = trendline_options[0]
	else:
	trendline_options_obj = trendline_options

	if trendline_options_obj and isinstance(trendline_options_obj, str):
	trendline_options_dict = ast.literal_eval(trendline_options_obj)
	else:
	trendline_options_dict = trendline_options_obj
	function_args["trendline_options"] = trendline_options_dict

	fig = px.scatter(**function_args)

	return fig

	def scatter_chart_generation_func(x_column: List[str], y_column: str, session_hash, session_folder, data: List[dict]=[{}], layout: List[dict]=[{}],
	category: str="", trendline: str="", trendline_options: List[dict]=[{}], marginal_x: str="", marginal_y: str="",
	size: str="", **kwargs):
	try:
	dir_path = TEMP_DIR / str(session_hash) / str(session_folder)
	chart_path = f'{dir_path}/chart.html'
	csv_query_path = f'{dir_path}/query.csv'

	df = pd.read_csv(csv_query_path)

	initial_graph = scatter_chart_fig(df, x_column=x_column, y_column=y_column,
	category=category, trendline=trendline, trendline_options=trendline_options,
	marginal_x=marginal_x, marginal_y=marginal_y, size=size)

	fig = initial_graph.to_dict()

	print(data)
	print(layout)

	data_dict,layout_dict = llm_chart_data_scrub(data,layout)

	#Applying stylings and settings generated from LLM
	if layout_dict:
	fig["layout"] = layout_dict

	data_ignore = ["x","y","type"]

	if size:
	data_ignore.append("marker")

	for key, value in data_dict.items():
	if key not in data_ignore:
	for data_item in fig["data"]:
	data_item[key] = value

	pio.write_html(fig, chart_path, full_html=False)

	chart_url = f'{root_url}/gradio_api/file/temp/{session_hash}/{session_folder}/chart.html'

	iframe = 'Please display this iframe: <div style=overflow:auto;><iframe\n scrolling="yes"\n width="1000px"\n height="500px"\n src="' + chart_url + '"\n frameborder="0"\n allowfullscreen\n></iframe>\n</div>'

	return {"reply": iframe}

	except Exception as e:
	print("SCATTER PLOT ERROR")
	print(e)
	reply = f"""There was an error generating the Plotly Scatter Plot from {x_column}, {y_column}, {data}, and {layout}
	The error is {e},
	You should probably try again.
	"""
	return {"reply": reply}

	def line_chart_generation_func(x_column: str, y_column: str, session_hash, session_folder, data: List[dict]=[{}], layout: List[dict]=[{}],
	category: str="", **kwargs):
	try:
	dir_path = TEMP_DIR / str(session_hash) / str(session_folder)
	chart_path = f'{dir_path}/chart.html'
	csv_query_path = f'{dir_path}/query.csv'

	df = pd.read_csv(csv_query_path)

	function_args = {"data_frame":df, "x":x_column, "y":y_column}

	if category:
	function_args["color"] = category

	initial_graph = px.line(**function_args)

	fig = initial_graph.to_dict()

	data_dict,layout_dict = llm_chart_data_scrub(data,layout)

	print(data_dict)
	print(layout_dict)

	#Applying stylings and settings generated from LLM
	if layout_dict:
	fig["layout"] = layout_dict

	for key, value in data_dict.items():
	if key not in ["x","y","type"]:
	for data_item in fig["data"]:
	data_item[key] = value

	print(fig)

	pio.write_html(fig, chart_path, full_html=False)

	chart_url = f'{root_url}/gradio_api/file/temp/{session_hash}/{session_folder}/chart.html'

	iframe = 'Please display this iframe: <div style=overflow:auto;><iframe\n scrolling="yes"\n width="1000px"\n height="500px"\n src="' + chart_url + '"\n frameborder="0"\n allowfullscreen\n></iframe>\n</div>'

	return {"reply": iframe}

	except Exception as e:
	print("LINE CHART ERROR")
	print(e)
	reply = f"""There was an error generating the Plotly Line Chart from {x_column}, {y_column}, {data}, and {layout}
	The error is {e},
	You should probably try again.
	"""
	return {"reply": reply}

	def bar_chart_generation_func(x_column: str, y_column: str, session_hash, session_folder, data: List[dict]=[{}], layout: List[dict]=[{}],
	category: str="", facet_row: str="", facet_col: str="", **kwargs):
	try:
	dir_path = TEMP_DIR / str(session_hash) / str(session_folder)
	chart_path = f'{dir_path}/chart.html'
	csv_query_path = f'{dir_path}/query.csv'

	df = pd.read_csv(csv_query_path)

	function_args = {"data_frame":df, "x":x_column, "y":y_column}

	if category:
	function_args["color"] = category
	if facet_row:
	function_args["facet_row"] = facet_row
	if facet_col:
	function_args["facet_col"] = facet_col

	initial_graph = px.bar(**function_args)

	fig = initial_graph.to_dict()

	data_dict,layout_dict = llm_chart_data_scrub(data,layout)

	print(data_dict)
	print(layout_dict)

	#Applying stylings and settings generated from LLM
	if layout_dict:
	fig["layout"] = layout_dict

	for key, value in data_dict.items():
	if key not in ["x","y","type"]:
	for data_item in fig["data"]:
	data_item[key] = value

	print(fig)

	pio.write_html(fig, chart_path, full_html=False)

	chart_url = f'{root_url}/gradio_api/file/temp/{session_hash}/{session_folder}/chart.html'

	iframe = 'Please display this iframe: <div style=overflow:auto;><iframe\n scrolling="yes"\n width="1000px"\n height="500px"\n src="' + chart_url + '"\n frameborder="0"\n allowfullscreen\n></iframe>\n</div>'

	return {"reply": iframe}

	except Exception as e:
	print("BAR CHART ERROR")
	print(e)
	reply = f"""There was an error generating the Plotly Bar Chart from {x_column}, {y_column}, {data}, and {layout}
	The error is {e},
	You should probably try again.
	"""
	return {"reply": reply}

	def pie_chart_generation_func(values: str, names: str, session_hash, session_folder, data: List[dict]=[{}], layout: List[dict]=[{}], **kwargs):
	try:
	dir_path = TEMP_DIR / str(session_hash) / str(session_folder)
	chart_path = f'{dir_path}/chart.html'
	csv_query_path = f'{dir_path}/query.csv'

	df = pd.read_csv(csv_query_path)

	function_args = {"data_frame":df, "values":values, "names":names}

	initial_graph = px.pie(**function_args)

	fig = initial_graph.to_dict()

	data_dict,layout_dict = llm_chart_data_scrub(data,layout)

	print(data_dict)
	print(layout_dict)

	#Applying stylings and settings generated from LLM
	if layout_dict:
	fig["layout"] = layout_dict

	for key, value in data_dict.items():
	if key not in ["x","y","type"]:
	for data_item in fig["data"]:
	data_item[key] = value

	print(fig)

	pio.write_html(fig, chart_path, full_html=False)

	chart_url = f'{root_url}/gradio_api/file/temp/{session_hash}/{session_folder}/chart.html'

	iframe = 'Please display this iframe: <div style=overflow:auto;><iframe\n scrolling="yes"\n width="1000px"\n height="500px"\n src="' + chart_url + '"\n frameborder="0"\n allowfullscreen\n></iframe>\n</div>'

	return {"reply": iframe}

	except Exception as e:
	print("PIE CHART ERROR")
	print(e)
	reply = f"""There was an error generating the Plotly Pie Chart from {values}, {names}, {data}, and {layout}
	The error is {e},
	You should probably try again.
	"""
	return {"reply": reply}

	def histogram_generation_func(x_column: str, session_hash, session_folder, y_column: str="", data: List[dict]=[{}], layout: List[dict]=[{}], histnorm: str="", category: str="",
	histfunc: str="", **kwargs):
	try:
	dir_path = TEMP_DIR / str(session_hash) / str(session_folder)
	chart_path = f'{dir_path}/chart.html'
	csv_query_path = f'{dir_path}/query.csv'

	df = pd.read_csv(csv_query_path)

	print(x_column)

	function_args = {"data_frame":df, "x":x_column}

	if y_column:
	function_args["y"] = y_column
	if histnorm:
	function_args["histnorm"] = histnorm
	if category:
	function_args["color"] = category
	if histfunc:
	function_args["histfunc"] = histfunc

	initial_graph = px.histogram(**function_args)

	fig = initial_graph.to_dict()

	data_dict,layout_dict = llm_chart_data_scrub(data,layout)

	print(data_dict)
	print(layout_dict)

	#Applying stylings and settings generated from LLM
	if layout_dict:
	fig["layout"] = layout_dict

	for key, value in data_dict.items():
	if key not in ["x","y","type"]:
	for data_item in fig["data"]:
	data_item[key] = value

	print(fig)

	pio.write_html(fig, chart_path, full_html=False)

	chart_url = f'{root_url}/gradio_api/file/temp/{session_hash}/{session_folder}/chart.html'

	iframe = 'Please display this iframe: <div style=overflow:auto;><iframe\n scrolling="yes"\n width="1000px"\n height="500px"\n src="' + chart_url + '"\n frameborder="0"\n allowfullscreen\n></iframe>\n</div>'

	return {"reply": iframe}

	except Exception as e:
	print("HISTOGRAM ERROR")
	print(e)
	reply = f"""There was an error generating the Plotly Histogram from {x_column}.
	The error is {e},
	You should probably try again.
	"""
	return {"reply": reply}

	def table_generation_func(session_hash, session_folder, **kwargs):
	print("TABLE GENERATION")
	try:
	dir_path = TEMP_DIR / str(session_hash) / str(session_folder)
	csv_query_path = f'{dir_path}/query.csv'
	table_path = f'{dir_path}/table.html'

	df = pd.read_csv(csv_query_path)

	html_table = df.to_html()
	print(html_table)

	with open(table_path, "w") as file:
	file.write(html_table)

	table_url = f'{root_url}/gradio_api/file/temp/{session_hash}/{session_folder}/table.html'

	iframe = 'Please display this iframe: <div style=overflow:auto;><iframe\n scrolling="yes"\n width="1000px"\n height="500px"\n src="' + table_url + '"\n frameborder="0"\n allowfullscreen\n></iframe>\n</div>'
	print(iframe)
	return {"reply": iframe}

	except Exception as e:
	print("TABLE ERROR")
	print(e)
	reply = f"""There was an error generating the Pandas DataFrame table results.
	The error is {e},
	You should probably try again.
	"""
	return {"reply": reply}