File size: 2,305 Bytes
24371db 00dae37 63c3a67 00dae37 24371db 73a1633 63c3a67 73a1633 fb65c41 63c3a67 73a1633 24371db 00dae37 63c3a67 24371db fb65c41 24371db |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import pandas as pd
import sqlite3
import csv
import json
import time
def is_file_done_saving(file_path):
try:
with open(file_path, 'r') as f:
contents = f
if contents:
return True
else:
return False
except PermissionError:
return False
def get_delimiter(file_path, bytes = 4096):
sniffer = csv.Sniffer()
data = open(file_path, "r").read(bytes)
delimiter = sniffer.sniff(data).delimiter
return delimiter
def read_file(file):
if file.endswith(('.csv', '.tsv', '.txt')) :
df = pd.read_csv(file, sep=get_delimiter(file))
elif file.endswith('.json'):
with open(file, 'r') as f:
contents = json.load(f)
df = pd.json_normalize(contents)
elif file.endswith('.ndjson'):
with open(file, 'r') as f:
contents = f.read()
data = [json.loads(str(item)) for item in contents.strip().split('\n')]
df = pd.json_normalize(data)
elif file.endswith('.xml'):
df = pd.read_xml(file)
elif file.endswith(('.xls','xlsx')):
df = pd.read_excel(file)
else:
raise ValueError(f'Unsupported filetype: {file}')
return df
def process_data_upload(data_file, session_hash):
total_time = 0
while not is_file_done_saving(data_file):
total_time += .5
time.sleep(.5)
if total_time > 10:
break
df = read_file(data_file)
# Read each sheet and store data in a DataFrame
#data = df.parse(sheet_name)
# Process the data as needed
# ...
df.columns = df.columns.str.replace(' ', '_')
df.columns = df.columns.str.replace('/', '_')
for column in df.columns:
if "date" in column.lower() or "time" in column.lower():
df[column] = pd.to_datetime(df[column])
if df[column].dtype == 'object' and isinstance(df[column].iloc[0], list):
df[column] = df[column].explode()
connection = sqlite3.connect(f'data_source_{session_hash}.db')
print("Opened database successfully");
print(df.columns)
df.to_sql('data_source', connection, if_exists='replace', index = False)
connection.commit()
connection.close() |