File size: 5,471 Bytes
d2b9031 49e25d2 ff86828 d2b9031 6a1564b 1ec543a 6a1564b 1ec543a fa41b98 1ec543a 4f2568a 1ec543a 4f2568a 1ec543a 4f2568a 6a1564b fa41b98 1ec543a fa41b98 1ec543a 5ffc072 1ec543a 5ffc072 1ec543a fa41b98 1ec543a fa41b98 5ffc072 1ec543a fa41b98 1ec543a fa41b98 1ec543a fa41b98 1ec543a fa41b98 1ec543a fa41b98 1ec543a fa41b98 1ec543a fa41b98 1ec543a fa41b98 1ec543a 7773ef1 5ffc072 31c7995 1ec543a 31c7995 1ec543a 31c7995 fa41b98 1ec543a fa41b98 31c7995 1ec543a 31c7995 1ec543a 31c7995 fa41b98 1ec543a fa41b98 72dd3ca fa41b98 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
import gradio as gr
import pandas as pd
from io import BytesIO
def convert_file(input_file, conversion_type):
# Check if a file was uploaded
if input_file is None:
return None, "Please upload a file."
# Read the file content
try:
# Try reading from file-like object
file_bytes = input_file.read()
file_name = input_file.name
except AttributeError:
# If there's an AttributeError, treat input_file as a file path
file_name = input_file
with open(file_name, "rb") as f:
file_bytes = f.read()
file_extension = file_name.lower().split('.')[-1]
df = None
output_file = None
converted_format = None
try:
# Conversion: CSV to Parquet
if conversion_type == "CSV to Parquet":
if file_extension != "csv":
return None, "For CSV to Parquet conversion, please upload a CSV file."
# Set UTF-8 as default encoding and try others if needed
encodings_to_try = ['utf-8', 'latin1', 'iso-8859-1', 'cp1252']
encoding = 'utf-8' # Set UTF-8 as default encoding
# Try UTF-8 first, then other encodings if it fails
try:
df = pd.read_csv(BytesIO(file_bytes), encoding=encoding)
except UnicodeDecodeError:
# Try other encodings if UTF-8 fails
for enc in encodings_to_try[1:]: # Try remaining encodings except utf-8
try:
df = pd.read_csv(BytesIO(file_bytes), encoding=enc)
encoding = enc
break
except UnicodeDecodeError:
continue
except Exception as e:
return None, f"Error reading CSV: {str(e)}"
except Exception as e:
return None, f"Error reading CSV: {str(e)}"
if df is None:
return None, "Failed to read CSV with any of the common encodings. Your file might use a rare encoding."
output_file = "output.parquet"
df.to_parquet(output_file, index=False)
converted_format = "Parquet"
# Conversion: Parquet to CSV
elif conversion_type == "Parquet to CSV":
if file_extension != "parquet":
return None, "For Parquet to CSV conversion, please upload a Parquet file."
df = pd.read_parquet(BytesIO(file_bytes))
output_file = "output.csv"
df.to_csv(output_file, index=False, encoding='utf-8')
converted_format = "CSV"
else:
return None, "Invalid conversion type selected."
# Generate a preview of the top 10 rows
preview = df.head(10).to_string(index=False)
info_message = (
f"Input file: {file_name}\n"
f"Converted file format: {converted_format}\n"
)
if conversion_type == "CSV to Parquet":
info_message += f"Used encoding: {encoding}\n"
info_message += f"\nPreview (Top 10 Rows):\n{preview}"
return output_file, info_message
except Exception as e:
return None, f"Error during conversion: {str(e)}"
# ๋ชจ๋ํ๊ณ ์ธ๋ จ๋ ์คํ์ผ์ ์ํ ์ฌ์ฉ์ ์ ์ CSS
custom_css = """
body {
background-color: #f4f4f4;
font-family: 'Helvetica Neue', Arial, sans-serif;
}
.gradio-container {
max-width: 900px;
margin: 40px auto;
padding: 20px;
background-color: #ffffff;
border-radius: 12px;
box-shadow: 0 8px 16px rgba(0,0,0,0.1);
}
h1, h2 {
color: #333333;
}
.gradio-input, .gradio-output {
margin-bottom: 20px;
}
.gradio-button {
background-color: #4CAF50 !important;
color: white !important;
border: none !important;
padding: 10px 20px !important;
font-size: 16px !important;
border-radius: 6px !important;
cursor: pointer;
}
.gradio-button:hover {
background-color: #45a049 !important;
}
"""
with gr.Blocks(css=custom_css, title="CSV <-> Parquet Converter") as demo:
gr.Markdown("# CSV <-> Parquet Converter")
gr.Markdown("Upload a CSV or Parquet file and select the conversion type. The app converts the file to the opposite format and displays a preview of the top 10 rows.")
with gr.Row():
with gr.Column(scale=1):
input_file = gr.File(label="Upload CSV or Parquet File")
with gr.Column(scale=1):
conversion_type = gr.Radio(
choices=["CSV to Parquet", "Parquet to CSV"],
label="Conversion Type",
value="CSV to Parquet" # Set default value
)
convert_button = gr.Button("Convert", elem_classes=["gradio-button"])
with gr.Row():
output_file = gr.File(label="Converted File")
preview = gr.Textbox(label="Preview (Top 10 Rows)", lines=15)
convert_button.click(fn=convert_file, inputs=[input_file, conversion_type], outputs=[output_file, preview])
gr.Markdown("""
### Notes:
- This converter uses UTF-8 as the default encoding
- If UTF-8 fails, it tries Latin-1, ISO-8859-1, and CP1252 encodings
- Parquet files preserve data types better than CSV
- The preview shows only the first 10 rows of data
""")
if __name__ == "__main__":
demo.launch() |