Spaces:
Sleeping
Sleeping
import matplotlib.pyplot as plt | |
import matplotlib | |
matplotlib.use('agg') | |
import plot_utils | |
from constants import * | |
class MatplotlibDataPlotter: | |
def __init__(self, single_df, pair_df, num_domains_in_region_df): | |
self.single_df = single_df | |
self.pair_df = pair_df | |
self.num_domains_in_region_df = num_domains_in_region_df | |
#self.single_domains_fig = plt.figure(figsize=(5, 10)) | |
#self.pair_domains_fig = plt.figure(figsize=(5, 10)) | |
def plot_single_domains(self, num_domains, split_name): | |
return plt.gcf() | |
# selected_region_ids = self.num_domains_in_region_df.loc[ | |
# self.num_domains_in_region_df.num_domains >= num_domains, | |
# 'cds_region_id'].values | |
# single_df_subset = self.single_df.loc[self.single_df.cds_region_id.isin(selected_region_ids)] | |
# split_name = 'stratified' | |
column_name = f'cosine_similarity_{split_name}' | |
# single_df_subset = single_df.loc[single_df.dom_location_len >= num_domains] | |
selected_keyword_index = single_df_subset.groupby('cds_region_id').agg( | |
{column_name: 'idxmax'} | |
).values.flatten() | |
targets_list = single_df_subset.loc[selected_keyword_index, 'biosyn_class_index'].values | |
label_list = single_df_subset.loc[selected_keyword_index, 'profile_name'].values | |
top_n=5 | |
bin_width=1 | |
hue_group_offset=0.5 | |
# hue_order=BIOSYN_CLASS_NAMES | |
hue2count={} | |
width=0.9 | |
show_legend=True | |
print(matplotlib.get_backend()) | |
fig = self.single_domains_fig | |
fig.clf() | |
ax = fig.gca() | |
plot_utils.draw_barplots( | |
targets_list, | |
label_list=label_list, | |
top_n=5, | |
bin_width=1, | |
hue_group_offset=0.5, | |
hue_order=BIOSYN_CLASS_NAMES, | |
hue2count={}, | |
width=0.9, | |
ax=ax, | |
show_legend=True | |
) | |
plt.tight_layout() | |
return fig # plt.gcf() | |
def plot_pair_domains(self, num_domains, split_name): | |
return plt.gcf() | |
# selected_region_ids = self.num_domains_in_region_df.loc[ | |
# self.num_domains_in_region_df.num_domains >= num_domains, | |
# 'cds_region_id'].values | |
# pair_df_subset = self.pair_df.loc[self.pair_df.cds_region_id.isin(selected_region_ids)] | |
# split_name = 'stratified' | |
column_name = f'cosine_similarity_{split_name}' | |
# pair_df_subset = pair_df.loc[pair_df.dom_location_len >= num_domains] | |
selected_keyword_index = pair_df_subset.groupby('cds_region_id').agg( | |
{column_name: 'idxmax'} | |
).values.flatten() | |
targets_list = pair_df_subset.loc[ | |
selected_keyword_index, 'biosyn_class_index'].values | |
label_list=pair_df_subset.loc[ | |
selected_keyword_index, 'profile_name'].values | |
top_n=5 | |
bin_width=1 | |
hue_group_offset=0.5 | |
# hue_order=BIOSYN_CLASS_NAMES | |
hue2count={} | |
width=0.9 | |
show_legend=True | |
# fig = plt.figure(figsize=(5, 10)) | |
fig = self.pair_domains_fig | |
# fig = plt.gcf() | |
fig.clf() | |
print(matplotlib.get_backend()) | |
ax = fig.gca() | |
plot_utils.draw_barplots( | |
targets_list, | |
label_list=label_list, | |
top_n=5, | |
bin_width=1, | |
hue_group_offset=0.5, | |
hue_order=BIOSYN_CLASS_NAMES, | |
hue2count={}, | |
width=0.9, | |
ax=ax, | |
show_legend=True | |
) | |
plt.tight_layout() | |
return fig #plt.gcf() | |