File size: 5,471 Bytes
d2b9031
49e25d2
ff86828
d2b9031
6a1564b
1ec543a
6a1564b
1ec543a
fa41b98
1ec543a
4f2568a
1ec543a
4f2568a
 
 
1ec543a
4f2568a
 
 
 
6a1564b
 
 
 
fa41b98
 
1ec543a
fa41b98
 
1ec543a
5ffc072
1ec543a
5ffc072
1ec543a
fa41b98
1ec543a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fa41b98
5ffc072
1ec543a
fa41b98
 
 
 
 
1ec543a
fa41b98
 
1ec543a
fa41b98
 
 
 
 
 
1ec543a
fa41b98
1ec543a
fa41b98
 
1ec543a
 
fa41b98
1ec543a
 
fa41b98
1ec543a
fa41b98
 
 
 
1ec543a
7773ef1
5ffc072
31c7995
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ec543a
 
 
31c7995
 
 
1ec543a
31c7995
fa41b98
 
1ec543a
 
fa41b98
31c7995
1ec543a
31c7995
 
1ec543a
 
31c7995
 
fa41b98
 
1ec543a
 
 
 
 
fa41b98
72dd3ca
fa41b98
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
import gradio as gr
import pandas as pd
from io import BytesIO

def convert_file(input_file, conversion_type):
    # Check if a file was uploaded
    if input_file is None:
        return None, "Please upload a file."
    
    # Read the file content
    try:
        # Try reading from file-like object
        file_bytes = input_file.read()
        file_name = input_file.name
    except AttributeError:
        # If there's an AttributeError, treat input_file as a file path
        file_name = input_file
        with open(file_name, "rb") as f:
            file_bytes = f.read()
    
    file_extension = file_name.lower().split('.')[-1]
    df = None
    output_file = None
    converted_format = None
    
    try:
        # Conversion: CSV to Parquet
        if conversion_type == "CSV to Parquet":
            if file_extension != "csv":
                return None, "For CSV to Parquet conversion, please upload a CSV file."
            
            # Set UTF-8 as default encoding and try others if needed
            encodings_to_try = ['utf-8', 'latin1', 'iso-8859-1', 'cp1252']
            encoding = 'utf-8'  # Set UTF-8 as default encoding
            
            # Try UTF-8 first, then other encodings if it fails
            try:
                df = pd.read_csv(BytesIO(file_bytes), encoding=encoding)
            except UnicodeDecodeError:
                # Try other encodings if UTF-8 fails
                for enc in encodings_to_try[1:]:  # Try remaining encodings except utf-8
                    try:
                        df = pd.read_csv(BytesIO(file_bytes), encoding=enc)
                        encoding = enc
                        break
                    except UnicodeDecodeError:
                        continue
                    except Exception as e:
                        return None, f"Error reading CSV: {str(e)}"
            except Exception as e:
                return None, f"Error reading CSV: {str(e)}"
            
            if df is None:
                return None, "Failed to read CSV with any of the common encodings. Your file might use a rare encoding."
            
            output_file = "output.parquet"
            df.to_parquet(output_file, index=False)
            converted_format = "Parquet"
            
        # Conversion: Parquet to CSV
        elif conversion_type == "Parquet to CSV":
            if file_extension != "parquet":
                return None, "For Parquet to CSV conversion, please upload a Parquet file."
            
            df = pd.read_parquet(BytesIO(file_bytes))
            output_file = "output.csv"
            df.to_csv(output_file, index=False, encoding='utf-8')
            converted_format = "CSV"
        else:
            return None, "Invalid conversion type selected."
        
        # Generate a preview of the top 10 rows
        preview = df.head(10).to_string(index=False)
        info_message = (
            f"Input file: {file_name}\n"
            f"Converted file format: {converted_format}\n"
        )
        if conversion_type == "CSV to Parquet":
            info_message += f"Used encoding: {encoding}\n"
        
        info_message += f"\nPreview (Top 10 Rows):\n{preview}"
        
        return output_file, info_message
    
    except Exception as e:
        return None, f"Error during conversion: {str(e)}"

# ๋ชจ๋˜ํ•˜๊ณ  ์„ธ๋ จ๋œ ์Šคํƒ€์ผ์„ ์œ„ํ•œ ์‚ฌ์šฉ์ž ์ •์˜ CSS
custom_css = """
body {
    background-color: #f4f4f4;
    font-family: 'Helvetica Neue', Arial, sans-serif;
}
.gradio-container {
    max-width: 900px;
    margin: 40px auto;
    padding: 20px;
    background-color: #ffffff;
    border-radius: 12px;
    box-shadow: 0 8px 16px rgba(0,0,0,0.1);
}
h1, h2 {
    color: #333333;
}
.gradio-input, .gradio-output {
    margin-bottom: 20px;
}
.gradio-button {
    background-color: #4CAF50 !important;
    color: white !important;
    border: none !important;
    padding: 10px 20px !important;
    font-size: 16px !important;
    border-radius: 6px !important;
    cursor: pointer;
}
.gradio-button:hover {
    background-color: #45a049 !important;
}
"""

with gr.Blocks(css=custom_css, title="CSV <-> Parquet Converter") as demo:
    gr.Markdown("# CSV <-> Parquet Converter")
    gr.Markdown("Upload a CSV or Parquet file and select the conversion type. The app converts the file to the opposite format and displays a preview of the top 10 rows.")
    
    with gr.Row():
        with gr.Column(scale=1):
            input_file = gr.File(label="Upload CSV or Parquet File")
        with gr.Column(scale=1):
            conversion_type = gr.Radio(
                choices=["CSV to Parquet", "Parquet to CSV"], 
                label="Conversion Type",
                value="CSV to Parquet"  # Set default value
            )
    
    convert_button = gr.Button("Convert", elem_classes=["gradio-button"])
    
    with gr.Row():
        output_file = gr.File(label="Converted File")
        preview = gr.Textbox(label="Preview (Top 10 Rows)", lines=15)
    
    convert_button.click(fn=convert_file, inputs=[input_file, conversion_type], outputs=[output_file, preview])
    
    gr.Markdown("""
    ### Notes:
    - This converter uses UTF-8 as the default encoding
    - If UTF-8 fails, it tries Latin-1, ISO-8859-1, and CP1252 encodings
    - Parquet files preserve data types better than CSV
    - The preview shows only the first 10 rows of data
    """)

if __name__ == "__main__":
    demo.launch()