File size: 3,666 Bytes
694c1c6
 
d03fbaa
 
1bcc7b4
694c1c6
 
 
 
 
 
 
 
 
 
 
86b6d3f
 
694c1c6
 
86b6d3f
7f8d6ba
 
 
 
694c1c6
86b6d3f
7f8d6ba
694c1c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d03fbaa
694c1c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86b6d3f
7f8d6ba
 
 
 
 
694c1c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d03fbaa
694c1c6
d03fbaa
694c1c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115

import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('agg')

import plot_utils
from constants import *


class MatplotlibDataPlotter:
    def __init__(self, single_df, pair_df, num_domains_in_region_df):
        self.single_df = single_df
        self.pair_df = pair_df

        self.num_domains_in_region_df = num_domains_in_region_df

        #self.single_domains_fig = plt.figure(figsize=(5, 10))
        #self.pair_domains_fig = plt.figure(figsize=(5, 10))

    def plot_single_domains(self, num_domains, split_name):
        return plt.gcf()
        # selected_region_ids = self.num_domains_in_region_df.loc[
        #     self.num_domains_in_region_df.num_domains >= num_domains, 
        #     'cds_region_id'].values
        # single_df_subset = self.single_df.loc[self.single_df.cds_region_id.isin(selected_region_ids)]
        
        
        # split_name = 'stratified'
        column_name = f'cosine_similarity_{split_name}'
        # single_df_subset = single_df.loc[single_df.dom_location_len >= num_domains]
        selected_keyword_index = single_df_subset.groupby('cds_region_id').agg(
            {column_name: 'idxmax'}
        ).values.flatten()
        targets_list = single_df_subset.loc[selected_keyword_index, 'biosyn_class_index'].values
        label_list = single_df_subset.loc[selected_keyword_index, 'profile_name'].values

        top_n=5
        bin_width=1
        hue_group_offset=0.5
        # hue_order=BIOSYN_CLASS_NAMES
        hue2count={}
        width=0.9

        show_legend=True
        print(matplotlib.get_backend())

        fig = self.single_domains_fig
        fig.clf()

        ax = fig.gca()
        plot_utils.draw_barplots(
            targets_list, 
            label_list=label_list,
            top_n=5,
            bin_width=1,
            hue_group_offset=0.5,
            hue_order=BIOSYN_CLASS_NAMES,
            hue2count={},
            width=0.9,
            ax=ax, 
            show_legend=True
        )
        plt.tight_layout()
        return fig # plt.gcf()

    def plot_pair_domains(self, num_domains, split_name):
        return plt.gcf()
        # selected_region_ids = self.num_domains_in_region_df.loc[
        #     self.num_domains_in_region_df.num_domains >= num_domains, 
        #     'cds_region_id'].values
        # pair_df_subset = self.pair_df.loc[self.pair_df.cds_region_id.isin(selected_region_ids)]
        # split_name = 'stratified'
        column_name = f'cosine_similarity_{split_name}'
        # pair_df_subset = pair_df.loc[pair_df.dom_location_len >= num_domains]
        selected_keyword_index = pair_df_subset.groupby('cds_region_id').agg(
            {column_name: 'idxmax'}
        ).values.flatten()
        targets_list = pair_df_subset.loc[
            selected_keyword_index, 'biosyn_class_index'].values
        label_list=pair_df_subset.loc[
            selected_keyword_index, 'profile_name'].values

        top_n=5
        bin_width=1
        hue_group_offset=0.5
        # hue_order=BIOSYN_CLASS_NAMES
        hue2count={}
        width=0.9

        show_legend=True
        # fig = plt.figure(figsize=(5, 10))
        fig = self.pair_domains_fig
        # fig = plt.gcf()
        fig.clf()
        print(matplotlib.get_backend())

        ax = fig.gca()
        plot_utils.draw_barplots(
            targets_list, 
            label_list=label_list,
            top_n=5,
            bin_width=1,
            hue_group_offset=0.5,
            hue_order=BIOSYN_CLASS_NAMES,
            hue2count={},
            width=0.9,
            ax=ax, 
            show_legend=True
        )
        plt.tight_layout()
        return fig  #plt.gcf()