Spaces:
Sleeping
Sleeping
Commit
·
ca7444f
1
Parent(s):
5710643
add fix for int64 conversion error
Browse files- app.py +9 -1
- mpl_data_plotter.py +2 -1
app.py
CHANGED
@@ -8,14 +8,22 @@ from constants import *
|
|
8 |
|
9 |
from mpl_data_plotter import MatplotlibDataPlotter
|
10 |
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
|
12 |
print(f"Loading domains data...")
|
13 |
single_df = pd.read_csv(SINGLE_DOMAINS_FILE, compression='gzip')
|
14 |
single_df['biosyn_class_index'] = single_df.bgc_class.apply(lambda x: BIOSYN_CLASS_NAMES.index(x))
|
|
|
15 |
|
16 |
pair_df = pd.read_csv(PAIR_DOMAINS_FILE, compression='gzip')
|
17 |
pair_df['biosyn_class_index'] = pair_df.bgc_class.apply(lambda x: BIOSYN_CLASS_NAMES.index(x))
|
18 |
-
|
|
|
19 |
num_domains_in_region_df = single_df.groupby('cds_region_id', as_index=False).agg({'as_domain_id': 'count'}).rename(
|
20 |
columns={'as_domain_id': 'num_domains'})
|
21 |
|
|
|
8 |
|
9 |
from mpl_data_plotter import MatplotlibDataPlotter
|
10 |
|
11 |
+
def convert_int64_to_int32(df):
|
12 |
+
for col in df.columns:
|
13 |
+
if df[col].dtype == 'int64':
|
14 |
+
print(col)
|
15 |
+
df[col] = df[col].astype('int32')
|
16 |
+
return df
|
17 |
|
18 |
print(f"Loading domains data...")
|
19 |
single_df = pd.read_csv(SINGLE_DOMAINS_FILE, compression='gzip')
|
20 |
single_df['biosyn_class_index'] = single_df.bgc_class.apply(lambda x: BIOSYN_CLASS_NAMES.index(x))
|
21 |
+
single_df = convert_int64_to_int32(single_df)
|
22 |
|
23 |
pair_df = pd.read_csv(PAIR_DOMAINS_FILE, compression='gzip')
|
24 |
pair_df['biosyn_class_index'] = pair_df.bgc_class.apply(lambda x: BIOSYN_CLASS_NAMES.index(x))
|
25 |
+
pair_df = convert_int64_to_int32(pair_df)
|
26 |
+
|
27 |
num_domains_in_region_df = single_df.groupby('cds_region_id', as_index=False).agg({'as_domain_id': 'count'}).rename(
|
28 |
columns={'as_domain_id': 'num_domains'})
|
29 |
|
mpl_data_plotter.py
CHANGED
@@ -26,6 +26,7 @@ class MatplotlibDataPlotter:
|
|
26 |
'cds_region_id'].values
|
27 |
single_df_subset = self.single_df.loc[self.single_df.cds_region_id.isin(selected_region_ids)]
|
28 |
|
|
|
29 |
split_name = 'stratified'
|
30 |
column_name = f'cosine_similarity_{split_name}'
|
31 |
# single_df_subset = single_df.loc[single_df.dom_location_len >= num_domains]
|
@@ -69,7 +70,7 @@ class MatplotlibDataPlotter:
|
|
69 |
self.num_domains_in_region_df.num_domains >= num_domains,
|
70 |
'cds_region_id'].values
|
71 |
pair_df_subset = self.pair_df.loc[self.pair_df.cds_region_id.isin(selected_region_ids)]
|
72 |
-
|
73 |
split_name = 'stratified'
|
74 |
column_name = f'cosine_similarity_{split_name}'
|
75 |
# pair_df_subset = pair_df.loc[pair_df.dom_location_len >= num_domains]
|
|
|
26 |
'cds_region_id'].values
|
27 |
single_df_subset = self.single_df.loc[self.single_df.cds_region_id.isin(selected_region_ids)]
|
28 |
|
29 |
+
return self.single_domains_fig
|
30 |
split_name = 'stratified'
|
31 |
column_name = f'cosine_similarity_{split_name}'
|
32 |
# single_df_subset = single_df.loc[single_df.dom_location_len >= num_domains]
|
|
|
70 |
self.num_domains_in_region_df.num_domains >= num_domains,
|
71 |
'cds_region_id'].values
|
72 |
pair_df_subset = self.pair_df.loc[self.pair_df.cds_region_id.isin(selected_region_ids)]
|
73 |
+
return self.pair_domains_fig
|
74 |
split_name = 'stratified'
|
75 |
column_name = f'cosine_similarity_{split_name}'
|
76 |
# pair_df_subset = pair_df.loc[pair_df.dom_location_len >= num_domains]
|