File size: 3,270 Bytes
24371db
 
00dae37
63c3a67
 
dd82d0a
1647e02
dd82d0a
63c3a67
 
 
 
 
 
 
 
 
 
 
 
00dae37
 
 
 
 
 
24371db
73a1633
 
 
 
63c3a67
 
 
 
 
 
 
 
73a1633
 
 
 
 
 
 
 
fb65c41
331a091
 
 
 
 
 
 
 
 
24371db
331a091
 
 
 
 
 
00dae37
331a091
1647e02
61f7b79
1647e02
3d87c18
1647e02
 
61f7b79
 
 
 
 
331a091
 
63c3a67
c101c53
 
 
331a091
dd82d0a
331a091
 
 
 
 
 
 
 
24371db
331a091
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import pandas as pd
import sqlite3
import csv
import json
import time
import os
import re
from utils import TEMP_DIR

def is_file_done_saving(file_path):
    try:
        with open(file_path, 'r') as f:
            contents = f

        if contents:
            return True
        else:
            return False
    except PermissionError:
        return False

def get_delimiter(file_path, bytes = 4096):
    sniffer = csv.Sniffer()
    data = open(file_path, "r").read(bytes)
    delimiter = sniffer.sniff(data).delimiter
    return delimiter

def read_file(file):
    if file.endswith(('.csv', '.tsv', '.txt')) :
        df = pd.read_csv(file, sep=get_delimiter(file))
    elif file.endswith('.json'):
        with open(file, 'r') as f:
            contents = json.load(f)
        df = pd.json_normalize(contents)
    elif file.endswith('.ndjson'):
        with open(file, 'r') as f:
            contents = f.read()
        data = [json.loads(str(item)) for item in contents.strip().split('\n')]
        df = pd.json_normalize(data)
    elif file.endswith('.xml'):
        df = pd.read_xml(file)
    elif file.endswith(('.xls','xlsx')):
        df = pd.read_excel(file)
    else:
        raise ValueError(f'Unsupported filetype: {file}')
    return df

def process_data_upload(data_file, session_hash):
    try:
        total_time = 0
        while not is_file_done_saving(data_file):
            total_time += .5
            time.sleep(.5)
            if total_time > 10:
                break
            
        df = read_file(data_file)

        # Read each sheet and store data in a DataFrame
        #data = df.parse(sheet_name)
        # Process the data as needed
        # ...
        df.columns = df.columns.str.replace(' ', '_')
        df.columns = df.columns.str.replace('/', '_')

        for column in df.columns:
            if type(column) is str:
                if "date" in column.lower() or "time" in column.lower():
                    try: 
                        df[column] = pd.to_datetime(df[column])
                    except:
                        pass
                if 'year' in column.lower():
                    try: 
                        df[column] = pd.to_datetime(df[column], format='%Y')
                    except:
                        pass
            if df[column].dtype == 'object' and isinstance(df[column].iloc[0], list):
                df[column] = df[column].explode()

        session_path = 'file_upload'

        dir_path = TEMP_DIR / str(session_hash) / str(session_path)
        os.makedirs(dir_path, exist_ok=True)

        connection = sqlite3.connect(f'{dir_path}/data_source.db')
        print("Opened database successfully");
        print(df.columns)

        df.to_sql('data_source', connection, if_exists='replace', index = False)
        
        connection.commit()
        connection.close()

        return ["success","<p style='color:green;text-align:center;font-size:18px;'>Data upload successful</p>"]
    except Exception as e:
        print("UPLOAD ERROR")
        print(e)
        return ["error",f"<p style='color:red;text-align:center;font-size:18px;font-weight:bold;'>ERROR: {e}</p>"]