File size: 3,344 Bytes
686aa3a
 
e8fd69f
 
 
cf63839
 
e8fd69f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf63839
e8fd69f
 
 
 
 
 
 
 
 
 
 
 
cf63839
e8fd69f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cf63839
 
 
 
 
 
 
 
 
 
 
7bfcd48
cf63839
 
 
 
e8fd69f
 
 
 
 
 
 
 
 
 
 
 
 
 
cf63839
e8fd69f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import nltk
nltk.download('punkt')
from nltk.tokenize import sent_tokenize
#from tiny_llama import generate_answer
#from llama_generate import generate_answer
from tiny_llama import generate_answer
from self_check_gpt import nli_confidence

def get_yes_or_no(result):
    if 'yes' in str.lower(result)[:5]:return 'Yes'
    if 'no' in str.lower(result)[:5]:return 'No'
    return 'N/A'


def check_score(context, sentences):
    score_mapping = {'Yes':1.0, 'No':0.0}
    template = """
        Context: {a}
        Sentence: {b}
        is the sentence supported by the context above? 
        Answer "Yes" or "No"
    """
    scores, results = list(), list()
    for sentence in sentences:
        content = template.format(a=context.strip().replace('/n', ''), b=sentence.strip().replace('/n', ''))
        result = generate_answer(content, sample_num=1)[0]
        #print(result)
        results.append(result)

    results = [get_yes_or_no(r) for r in results]
    scores = [score_mapping.get(result, 0.5) for result in results]

    # for sent, score in zip(sentences, scores):
    #     print(sent.strip(), score)
        #result_string += sent + ' ({a})'.format(a=score)

    return scores


def run_prompt(query, sample_size=5):
    sampled = generate_answer(query, sample_size+1)
    answer = sampled[0]
    proofs = sampled[1:]
    sentences = sent_tokenize(answer)

    all_scores = list()
    for proof in proofs:
        scores = check_score(proof, sentences)
        all_scores.append(scores)

    final_content = ''
    avg_confidence = list()
    for index, scores in enumerate(zip(*all_scores)):
        sentence_confidence = sum(scores) / len(scores)
        avg_confidence.append(sentence_confidence)
        final_content += sentences[index].strip() + ' ({a}) '.format(a=sentence_confidence)
    avg_confidence = sum(avg_confidence) / len(avg_confidence)
    final_content += '\nThe confidence score of this answer is {a}'.format(a=avg_confidence)
    return final_content


def run_nli(query, sample_size=5):
    sampled = generate_answer(query, sample_size + 1)
    answer = sampled[0]
    proofs = sampled[1:]
    sentences = sent_tokenize(answer)

    scores = nli_confidence(proofs, sentences)

    final_content = ''
    for index, sent in enumerate(sentences):
        final_content += sent.strip() + ' ({a}) '.format(a=scores[index])
    final_content += '\nThe confidence score of this answer is {a}'.format(a=round(sum(scores)/len(scores), 4))
    return final_content



if __name__ == '__main__':
    # result = generate_answer(query="Who is Lihu Chen?", sample_num=3)
    # print(result)

    # context = """
    # Lihu Chen is an American writer and artist who works in comics. They received their degree in psychology from California State University, Fullerton and have worked on titles such as "The Gathering Storm" and "Heartthrob".
    # """
    # sentences = sent_tokenize("""
    # Lihu Chen is an American writer and artist who works in comics. They received their degree in psychology from California State University, Fullerton and have worked on titles such as "The Gathering Storm" and "Heartthrob".
    # """)
    # result = check_score(context, sentences)
    # print(result)
    # result = """

    answer = run_nli(query='tell me something about Albert Einstein', sample_size=5)
    print(answer)