File size: 3,270 Bytes
24371db 00dae37 63c3a67 dd82d0a 1647e02 dd82d0a 63c3a67 00dae37 24371db 73a1633 63c3a67 73a1633 fb65c41 331a091 24371db 331a091 00dae37 331a091 1647e02 61f7b79 1647e02 3d87c18 1647e02 61f7b79 331a091 63c3a67 c101c53 331a091 dd82d0a 331a091 24371db 331a091 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
import pandas as pd
import sqlite3
import csv
import json
import time
import os
import re
from utils import TEMP_DIR
def is_file_done_saving(file_path):
try:
with open(file_path, 'r') as f:
contents = f
if contents:
return True
else:
return False
except PermissionError:
return False
def get_delimiter(file_path, bytes = 4096):
sniffer = csv.Sniffer()
data = open(file_path, "r").read(bytes)
delimiter = sniffer.sniff(data).delimiter
return delimiter
def read_file(file):
if file.endswith(('.csv', '.tsv', '.txt')) :
df = pd.read_csv(file, sep=get_delimiter(file))
elif file.endswith('.json'):
with open(file, 'r') as f:
contents = json.load(f)
df = pd.json_normalize(contents)
elif file.endswith('.ndjson'):
with open(file, 'r') as f:
contents = f.read()
data = [json.loads(str(item)) for item in contents.strip().split('\n')]
df = pd.json_normalize(data)
elif file.endswith('.xml'):
df = pd.read_xml(file)
elif file.endswith(('.xls','xlsx')):
df = pd.read_excel(file)
else:
raise ValueError(f'Unsupported filetype: {file}')
return df
def process_data_upload(data_file, session_hash):
try:
total_time = 0
while not is_file_done_saving(data_file):
total_time += .5
time.sleep(.5)
if total_time > 10:
break
df = read_file(data_file)
# Read each sheet and store data in a DataFrame
#data = df.parse(sheet_name)
# Process the data as needed
# ...
df.columns = df.columns.str.replace(' ', '_')
df.columns = df.columns.str.replace('/', '_')
for column in df.columns:
if type(column) is str:
if "date" in column.lower() or "time" in column.lower():
try:
df[column] = pd.to_datetime(df[column])
except:
pass
if 'year' in column.lower():
try:
df[column] = pd.to_datetime(df[column], format='%Y')
except:
pass
if df[column].dtype == 'object' and isinstance(df[column].iloc[0], list):
df[column] = df[column].explode()
session_path = 'file_upload'
dir_path = TEMP_DIR / str(session_hash) / str(session_path)
os.makedirs(dir_path, exist_ok=True)
connection = sqlite3.connect(f'{dir_path}/data_source.db')
print("Opened database successfully");
print(df.columns)
df.to_sql('data_source', connection, if_exists='replace', index = False)
connection.commit()
connection.close()
return ["success","<p style='color:green;text-align:center;font-size:18px;'>Data upload successful</p>"]
except Exception as e:
print("UPLOAD ERROR")
print(e)
return ["error",f"<p style='color:red;text-align:center;font-size:18px;font-weight:bold;'>ERROR: {e}</p>"] |