Shreneek commited on
Commit
1c11299
·
verified ·
1 Parent(s): 6201344

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +120 -31
app.py CHANGED
@@ -1,41 +1,130 @@
1
  import streamlit as st
2
  import pandas as pd
 
3
  from ydata_profiling import ProfileReport
 
4
 
5
- st.set_page_config(page_title="Dynamic Data Profiling", layout="wide", page_icon="📊")
 
 
 
 
 
6
 
7
- st.title("Dynamic Data Profiling with ydata-profiling")
8
- st.write("Upload your CSV file and get a complete interactive profiling report!")
 
 
 
 
 
 
9
 
10
- uploaded_file = st.file_uploader("Upload a CSV file", type="csv")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
 
12
  if uploaded_file is not None:
13
- try:
14
- # Read CSV into DataFrame
15
- df = pd.read_csv(uploaded_file)
16
- st.success("File uploaded successfully!")
17
-
18
- # Generate the profile report
19
- with st.spinner("Generating profile report..."):
20
- profile = ProfileReport(df, title="Profiling Report", explorative=True)
21
- # Convert report to HTML
22
- report_html = profile.to_html()
23
-
24
- # Show the report in an iframe
25
- st.components.v1.html(report_html, height=1200, scrolling=True)
26
-
27
- # Provide a download button for the HTML
28
- st.write("### Download the Profiling Report")
29
- # Convert HTML string to bytes
30
- report_bytes = report_html.encode('utf-8')
31
- st.download_button(
32
- label="Download HTML",
33
- data=report_bytes,
34
- file_name="profiling_report.html",
35
- mime="text/html"
36
- )
37
-
38
- except Exception as e:
39
- st.error(f"An error occurred: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
  else:
41
  st.info("Awaiting CSV file upload.")
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
+ import plotly.express as px
4
  from ydata_profiling import ProfileReport
5
+ from statsmodels.stats.outliers_influence import variance_inflation_factor
6
 
7
+ # 1. Set Page Configuration
8
+ st.set_page_config(
9
+ page_title="Enhanced Data Profiling",
10
+ layout="wide",
11
+ page_icon="📊"
12
+ )
13
 
14
+ # 2. Custom CSS for a Clean, White UI
15
+ custom_css = """
16
+ <style>
17
+ /* Make the entire background white */
18
+ body {
19
+ background-color: #ffffff !important;
20
+ font-family: 'Roboto', sans-serif;
21
+ }
22
 
23
+ /* Headers and titles */
24
+ h1, h2, h3, h4 {
25
+ color: #2c3e50;
26
+ font-weight: 700;
27
+ }
28
+
29
+ /* The main Streamlit container */
30
+ [data-testid="stAppViewContainer"] {
31
+ background-color: #ffffff !important;
32
+ }
33
+
34
+ /* Individual content containers */
35
+ .css-1d391kg, .css-hxt7ib {
36
+ background-color: #ffffff !important;
37
+ border-radius: 15px;
38
+ padding: 30px;
39
+ margin-bottom: 20px;
40
+ box-shadow: 0 8px 16px rgba(0,0,0,0.1);
41
+ }
42
+
43
+ /* Sidebar styling */
44
+ [data-testid="stSidebar"] {
45
+ background-color: #34495e !important;
46
+ color: #ecf0f1 !important;
47
+ font-size: 16px;
48
+ }
49
+ [data-testid="stSidebar"] .css-1d391kg {
50
+ background-color: #2c3e50 !important;
51
+ border-radius: 10px;
52
+ }
53
+ </style>
54
+ """
55
+ st.markdown(custom_css, unsafe_allow_html=True)
56
+
57
+ # 3. Title and Description
58
+ st.title("Enhanced Data Profiling")
59
+ st.markdown("<h4 style='text-align: center; color: #2c3e50;'>Upload your CSV and explore it thoroughly!</h4>", unsafe_allow_html=True)
60
+
61
+ # 4. Sidebar for File Upload
62
+ st.sidebar.header("Upload & Options")
63
+ uploaded_file = st.sidebar.file_uploader("Upload a CSV file", type="csv")
64
+
65
+ # Placeholder for the DataFrame
66
+ df = None
67
 
68
  if uploaded_file is not None:
69
+ # 4a. Read the CSV
70
+ df = pd.read_csv(uploaded_file)
71
+ st.success("File uploaded successfully!")
72
+
73
+ # 5. KPI Metrics / Quick Summary
74
+ st.subheader("Dataset Quick Summary")
75
+ col1, col2, col3, col4 = st.columns(4)
76
+ col1.metric("Rows", f"{df.shape[0]}")
77
+ col2.metric("Columns", f"{df.shape[1]}")
78
+ missing_percentage = (df.isnull().sum().sum() / df.size) * 100
79
+ col3.metric("Missing %", f"{missing_percentage:.2f}%")
80
+ duplicates = df.duplicated().sum()
81
+ col4.metric("Duplicates", f"{duplicates}")
82
+
83
+ st.write("---")
84
+
85
+ # 6. Optional Data Transformation: Drop columns with > 50% missing
86
+ if st.checkbox("Drop columns with > 50% missing data?"):
87
+ threshold = df.shape[0] * 0.5
88
+ before_cols = df.shape[1]
89
+ df = df.loc[:, df.isnull().sum() < threshold]
90
+ after_cols = df.shape[1]
91
+ st.success(f"Dropped {before_cols - after_cols} columns. Remaining columns: {after_cols}")
92
+
93
+ # 7. Optional Quick Histogram
94
+ numeric_cols = df.select_dtypes(include="number").columns.tolist()
95
+ if numeric_cols:
96
+ st.subheader("Optional Quick Histogram")
97
+ selected_col = st.selectbox("Select a numeric column", numeric_cols)
98
+ if selected_col:
99
+ fig_hist = px.histogram(df, x=selected_col, nbins=50, title=f"Histogram of {selected_col}")
100
+ fig_hist.update_traces(opacity=0.8)
101
+ st.plotly_chart(fig_hist, use_container_width=True)
102
+
103
+ # 8. Generate ydata-profiling Report
104
+ st.subheader("Comprehensive Profiling Report")
105
+ with st.spinner("Generating profiling report..."):
106
+ profile = ProfileReport(df, title="Profiling Report", explorative=True)
107
+ report_html = profile.to_html()
108
+
109
+ # 8a. Display the report in an iframe
110
+ st.components.v1.html(report_html, height=1200, scrolling=True)
111
+
112
+ # 8b. Download Button for HTML
113
+ st.write("### Download the Profiling Report")
114
+ st.download_button(
115
+ label="Download HTML",
116
+ data=report_html.encode('utf-8'),
117
+ file_name="profiling_report.html",
118
+ mime="text/html"
119
+ )
120
  else:
121
  st.info("Awaiting CSV file upload.")
122
+
123
+ # That's it!
124
+ # Simply copy and paste this into your app.py on Hugging Face Spaces.
125
+ # Make sure you have a requirements.txt that includes:
126
+ # streamlit
127
+ # pandas
128
+ # ydata-profiling
129
+ # plotly
130
+ # statsmodels (for VIF, if you need it)