Spaces:

latticetower
/

keyword-embeddings-space

Sleeping

File size: 3,666 Bytes

694c1c6
 
d03fbaa
 
1bcc7b4
694c1c6
 
 
 
 
 
 
 
 
 
 
86b6d3f
 
694c1c6
 
86b6d3f
7f8d6ba
 
 
 
694c1c6
86b6d3f
7f8d6ba
694c1c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d03fbaa
694c1c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86b6d3f
7f8d6ba
 
 
 
 
694c1c6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d03fbaa
694c1c6
d03fbaa
694c1c6


import matplotlib.pyplot as plt
import matplotlib
matplotlib.use('agg')

import plot_utils
from constants import *


class MatplotlibDataPlotter:
    def __init__(self, single_df, pair_df, num_domains_in_region_df):
        self.single_df = single_df
        self.pair_df = pair_df

        self.num_domains_in_region_df = num_domains_in_region_df

        #self.single_domains_fig = plt.figure(figsize=(5, 10))
        #self.pair_domains_fig = plt.figure(figsize=(5, 10))

    def plot_single_domains(self, num_domains, split_name):
        return plt.gcf()
        # selected_region_ids = self.num_domains_in_region_df.loc[
        #     self.num_domains_in_region_df.num_domains >= num_domains, 
        #     'cds_region_id'].values
        # single_df_subset = self.single_df.loc[self.single_df.cds_region_id.isin(selected_region_ids)]
        
        
        # split_name = 'stratified'
        column_name = f'cosine_similarity_{split_name}'
        # single_df_subset = single_df.loc[single_df.dom_location_len >= num_domains]
        selected_keyword_index = single_df_subset.groupby('cds_region_id').agg(
            {column_name: 'idxmax'}
        ).values.flatten()
        targets_list = single_df_subset.loc[selected_keyword_index, 'biosyn_class_index'].values
        label_list = single_df_subset.loc[selected_keyword_index, 'profile_name'].values

        top_n=5
        bin_width=1
        hue_group_offset=0.5
        # hue_order=BIOSYN_CLASS_NAMES
        hue2count={}
        width=0.9

        show_legend=True
        print(matplotlib.get_backend())

        fig = self.single_domains_fig
        fig.clf()

        ax = fig.gca()
        plot_utils.draw_barplots(
            targets_list, 
            label_list=label_list,
            top_n=5,
            bin_width=1,
            hue_group_offset=0.5,
            hue_order=BIOSYN_CLASS_NAMES,
            hue2count={},
            width=0.9,
            ax=ax, 
            show_legend=True
        )
        plt.tight_layout()
        return fig # plt.gcf()

    def plot_pair_domains(self, num_domains, split_name):
        return plt.gcf()
        # selected_region_ids = self.num_domains_in_region_df.loc[
        #     self.num_domains_in_region_df.num_domains >= num_domains, 
        #     'cds_region_id'].values
        # pair_df_subset = self.pair_df.loc[self.pair_df.cds_region_id.isin(selected_region_ids)]
        # split_name = 'stratified'
        column_name = f'cosine_similarity_{split_name}'
        # pair_df_subset = pair_df.loc[pair_df.dom_location_len >= num_domains]
        selected_keyword_index = pair_df_subset.groupby('cds_region_id').agg(
            {column_name: 'idxmax'}
        ).values.flatten()
        targets_list = pair_df_subset.loc[
            selected_keyword_index, 'biosyn_class_index'].values
        label_list=pair_df_subset.loc[
            selected_keyword_index, 'profile_name'].values

        top_n=5
        bin_width=1
        hue_group_offset=0.5
        # hue_order=BIOSYN_CLASS_NAMES
        hue2count={}
        width=0.9

        show_legend=True
        # fig = plt.figure(figsize=(5, 10))
        fig = self.pair_domains_fig
        # fig = plt.gcf()
        fig.clf()
        print(matplotlib.get_backend())

        ax = fig.gca()
        plot_utils.draw_barplots(
            targets_list, 
            label_list=label_list,
            top_n=5,
            bin_width=1,
            hue_group_offset=0.5,
            hue_order=BIOSYN_CLASS_NAMES,
            hue2count={},
            width=0.9,
            ax=ax, 
            show_legend=True
        )
        plt.tight_layout()
        return fig  #plt.gcf()