File size: 5,473 Bytes
4fb102d
862e96e
1dde253
 
3d4b0bc
 
862e96e
f4cb8a7
862e96e
 
 
 
 
3d4b0bc
 
 
 
1dde253
 
 
 
 
 
 
 
 
 
 
 
 
b6b3b16
709650d
1dde253
 
 
 
 
 
f4cb8a7
862e96e
 
 
 
 
f4cb8a7
 
862e96e
 
 
 
 
 
 
 
 
3d4b0bc
 
1dde253
 
3d4b0bc
71bc990
 
 
 
3d4b0bc
 
1dde253
3d4b0bc
1dde253
 
3d4b0bc
 
 
 
 
 
 
 
 
 
 
 
1dde253
709650d
1dde253
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71bc990
709650d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1dde253
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4fb102d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import streamlit as st
import transformers as tf
import plotly.graph_objects as go
import matplotlib.cm as cm
import pandas as pd


# Function to load and cache models
@st.experimental_singleton(show_spinner=False)
def load_model(username, prefix, model_name):
    p = tf.pipeline('text-classification', f'{username}/{prefix}-{model_name}')
    return p

@st.experimental_singleton(show_spinner=False)
def load_pickle(f):
    return pd.read_pickle(f)

def get_results(model, c):
    res = model(c)[0]
    label = float(res['label'].split('_')[1])
    score = res['score']
    return {'label': label, 'score': score}

def run_models(model_names, models, c):
    results = {}
    for mn in model_names:
        results[mn] = get_results(models[mn], c)
    return results


st.title('Assess the *QuAL*ity of your feedback')
st.caption(
"""Medical education *requires* high-quality feedback, but evaluating feedback
is difficult and time-consuming. This tool uses NLP/ML to predict a validated
feedback quality metric known as the QuAL Score. *Try it for yourself!*
""")

### Load models
# Specify which models to load 
USERNAME = 'maxspad'
PREFIX = 'nlp-qual'
models_to_load = ['qual', 'q1', 'q2i', 'q3i']
n_models = float(len(models_to_load))
models = {}
# Show a progress bar while models are downloading, 
# then hide it when done
lc_placeholder = st.empty()
loader_container = lc_placeholder.container()
loader_container.caption('Loading models... please wait...')
pbar = loader_container.progress(0.0)
for i, mn in enumerate(models_to_load):
    pbar.progress((i+1.0) / n_models)
    models[mn] = load_model(USERNAME, PREFIX, mn)
lc_placeholder.empty()

### Load example data
examples = load_pickle('test.pkl')

### Process input
ex = examples['comment'].sample(1).tolist()[0]
try:
    ex = ex.strip().replace('_x000D_', '').replace('nan', 'blank')
except:
    ex = 'blank'
if 'comment' not in st.session_state:
    st.session_state['comment'] = ex
with st.form('comment_form'):
    comment = st.text_area('Try a comment:', value=st.session_state['comment'])
    left_col, right_col = st.columns([1,9], gap='medium')
    submitted = left_col.form_submit_button('Submit')
    trying_example = right_col.form_submit_button('Try an example!')
    
    if submitted:
        st.session_state['button_clicked'] = 'submit'
        st.session_state['comment'] = comment
        st.experimental_rerun()
    elif trying_example:
        st.session_state['button_clicked'] = 'example'
        st.session_state['comment'] = ex
        st.experimental_rerun()
    
results = run_models(models_to_load, models, st.session_state['comment'])

tab_titles = ['Overview', 'Q1 - Level of Detail', 'Q2 - Suggestion Given', 'Q3 - Suggestion Linked', 'About']
tabs = st.tabs(tab_titles)

with tabs[0]:
    cmap = cm.get_cmap('RdYlGn')
    color = cmap(results['qual']['label'] / 6.0)
    color = f'rgba({int(color[0]*256)}, {int(color[1]*256)}, {int(color[2]*256)}, {int(color[3]*256)})'

    fig = go.Figure(go.Indicator(
        domain = {'x': [0, 1], 'y': [0, 1]},
        value = results['qual']['label'],
        mode = "gauge+number",
        title = {'text': "QuAL"},
        gauge = {'axis': {'range': [None, 5]},
                'bgcolor': 'lightgray',
                'bar': {'color': color, 'thickness': 1.0},
                
                }
        ), layout=go.Layout(margin=dict(t=0, b=135)))#, layout=go.Layout(width=750, height=300))# layout={'paper_bgcolor': 'rgb(245,245,245)'})#,

    cols = st.columns([7, 3])
    with cols[0]:
        st.plotly_chart(fig, use_container_width=True)
    with cols[1]:

    
        # cols = st.columns(3)
        # cols[0].markdown('#### Level of Detail')
        q1lab = results['q1']['label']
        if q1lab == 0:
            md_str = 'πŸ˜₯ None'
        elif q1lab == 1:
            md_str = '😐 Low'
        elif q1lab == 2:
            md_str = '😊 Medium'
        elif q1lab == 3:
            md_str = '😁 High'
        # cols[0].markdown(md_str)
        cols[1].metric('Level of Detail', md_str, 
            help='How specific was the evaluator in describing the behavior?')

        q2lab = results['q2i']['label']
        if q2lab == 0:
            md_str = 'βœ… Yes'
        else:
            md_str = '❌ No'
        cols[1].metric('Suggestion Given', (md_str),
            help='Did the evaluator give a suggestion for improvement?')

        q3lab = results['q3i']['label']
        if q3lab == 0:
            md_str = 'βœ… Yes'
        else:
            md_str = '❌ No'
        cols[1].metric('Suggestion Linked', md_str,
            help='Is the suggestion for improvement linked to the described behavior?')




# denoms = ['5','3']
# for mn in models_to_load:
#     st.header(mn)
#     cols = st.columns(2)
#     res = models[mn](comment)[0]

#     if mn == 'qual':
#         cols[0].metric('Score', f"{res['label'].split('_')[1]}/5")
#     elif mn == 'q1':
#         cols[0].metric('Score', f"{res['label'].split('_')[1]}/3")
#     elif mn == 'q2i':
#         if res['label'] == 'LABEL_0':
#             cols[0].metric('Suggestion for improvement?', 'Yes')
#         else:
#             cols[0].metric('Suggestion for improvement?', 'No')
#     elif mn == 'q3i':
#         if res['label'] == 'LABEL_0':
#             cols[0].metric('Suggestion linked?', 'Yes')
#        else:
#             cols[0].metric('Suggestion linked?', 'No')

#     cols[1].caption('Confidence')
#     cols[1].progress(res['score'])