File size: 7,616 Bytes
009d93e
 
 
e6e7506
 
 
009d93e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e6e7506
009d93e
e6e7506
009d93e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e6e7506
 
 
009d93e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4754e33
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
009d93e
 
 
 
 
 
 
 
4754e33
009d93e
 
 
 
 
 
 
4754e33
 
e6e7506
 
 
009d93e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e6e7506
 
 
009d93e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
from langchain.prompts import PromptTemplate
from .prompt_example import *

# ==================================================================== #
#                           SCHEMA AGENT                               #
# ==================================================================== #

# Get Text Analysis
TEXT_ANALYSIS_INSTRUCTION = """
**Instruction**: Please analyze and categorize the given text.
{examples}
**Text**: {text}

**Output Shema**: {schema}
"""

text_analysis_instruction = PromptTemplate(
    input_variables=["examples", "text", "schema"],
    template=TEXT_ANALYSIS_INSTRUCTION,
)

# Get Deduced Schema Json
DEDUCE_SCHEMA_JSON_INSTRUCTION = """
**Instruction**: Generate an output format that meets the requirements as described in the task. Pay attention to the following requirements:
    - Format: Return your responses in dictionary format as a JSON object.
    - Content: Do not include any actual data; all attributes values should be set to None.
    - Note: Attributes not mentioned in the task description should be ignored.
{examples}
**Task**: {instruction}

**Text**: {distilled_text}
{text}

Now please deduce the output schema in json format. All attributes values should be set to None.
**Output Schema**:
"""

deduced_schema_json_instruction = PromptTemplate(
    input_variables=["examples", "instruction", "distilled_text", "text", "schema"],
    template=DEDUCE_SCHEMA_JSON_INSTRUCTION,
)

# Get Deduced Schema Code
DEDUCE_SCHEMA_CODE_INSTRUCTION = """
**Instruction**: Based on the provided text and task description, Define the output schema in Python using Pydantic. Name the final extraction target class as 'ExtractionTarget'.
{examples}
**Task**: {instruction}

**Text**: {distilled_text}
{text}

Now please deduce the output schema. Ensure that the output code snippet is wrapped in '```',and can be directly parsed by the Python interpreter.
**Output Schema**: """
deduced_schema_code_instruction = PromptTemplate(
    input_variables=["examples", "instruction", "distilled_text", "text"],
    template=DEDUCE_SCHEMA_CODE_INSTRUCTION,
)


# ==================================================================== #
#                         EXTRACTION AGENT                             #
# ==================================================================== #

EXTRACT_INSTRUCTION = """
**Instruction**: You are an agent skilled in information extarction. {instruction}
{examples}
**Text**: {text}
{additional_info}
**Output Schema**: {schema}

Now please extract the corresponding information from the text. Ensure that the information you extract has a clear reference in the given text. Set any property not explicitly mentioned in the text to null.
"""

extract_instruction = PromptTemplate(
    input_variables=["instruction", "examples", "text", "schema", "additional_info"],
    template=EXTRACT_INSTRUCTION,
)

instruction_mapper = {
    'NER': "You are an expert in named entity recognition. Please extract entities that match the schema definition from the input. Return an empty list if the entity type does not exist. Please respond in the format of a JSON string.",
    'RE': "You are an expert in relationship extraction. Please extract relationship triples that match the schema definition from the input. Return an empty list for relationships that do not exist. Please respond in the format of a JSON string.",
    'EE': "You are an expert in event extraction. Please extract events from the input that conform to the schema definition. Return an empty list for events that do not exist, and return NAN for arguments that do not exist. If an argument has multiple values, please return a list. Respond in the format of a JSON string.",
}

EXTRACT_INSTRUCTION_JSON = """
{{
    "instruction": {instruction},
    "schema": {constraint},
    "input": {input},
}}
"""

extract_instruction_json = PromptTemplate(
    input_variables=["instruction", "constraint", "input"],
    template=EXTRACT_INSTRUCTION_JSON,
)

SUMMARIZE_INSTRUCTION = """
**Instruction**: Below is a list of results obtained after segmenting and extracting information from a long article. Please consolidate all the answers to generate a final response.
{examples}
**Task**: {instruction}

**Result List**: {answer_list}

**Output Schema**: {schema}
Now summarize all the information from the Result List. Filter or merge the redundant information.
"""
summarize_instruction = PromptTemplate(
    input_variables=["instruction", "examples", "answer_list", "schema"],
    template=SUMMARIZE_INSTRUCTION,
)




# ==================================================================== #
#                          REFLECION AGENT                             #
# ==================================================================== #
REFLECT_INSTRUCTION = """**Instruction**: You are an agent skilled in reflection and optimization based on the original result. Refer to **Reflection Reference** to identify potential issues in the current extraction results.

**Reflection Reference**: {examples}

Now please review each element in the extraction result. Identify and improve any potential issues in the result based on the reflection. NOTE: If the original result is correct, no modifications are needed!

**Task**: {instruction}

**Text**: {text}

**Output Schema**: {schema}

**Original Result**: {result}

"""
reflect_instruction = PromptTemplate(
    input_variables=["instruction", "examples", "text", "schema", "result"],
    template=REFLECT_INSTRUCTION,
)

SUMMARIZE_INSTRUCTION = """
**Instruction**: Below is a list of results obtained after segmenting and extracting information from a long article. Please consolidate all the answers to generate a final response.

**Task**: {instruction}

**Result List**: {answer_list}
{additional_info}
**Output Schema**: {schema}
Now summarize the information from the Result List.
"""
summarize_instruction = PromptTemplate(
    input_variables=["instruction", "answer_list", "additional_info", "schema"],
    template=SUMMARIZE_INSTRUCTION,
)



# ==================================================================== #
#                            CASE REPOSITORY                           #
# ==================================================================== #

GOOD_CASE_ANALYSIS_INSTRUCTION = """
**Instruction**: Below is an information extraction task and its corresponding correct answer. Provide the reasoning steps that led to the correct answer, along with brief explanation of the answer. Your response should be brief and organized.

**Task**: {instruction}

**Text**: {text}
{additional_info}
**Correct Answer**: {result}

Now please generate the reasoning steps and breif analysis of the **Correct Answer** given above. DO NOT generate your own extraction result.
**Analysis**:
"""
good_case_analysis_instruction = PromptTemplate(
    input_variables=["instruction", "text", "result", "additional_info"],
    template=GOOD_CASE_ANALYSIS_INSTRUCTION,
)

BAD_CASE_REFLECTION_INSTRUCTION = """
**Instruction**: Based on the task description, compare the original answer with the correct one. Your output should be a brief reflection or concise summarized rules.

**Task**: {instruction}

**Text**: {text}
{additional_info}
**Original Answer**: {original_answer}

**Correct Answer**: {correct_answer}

Now please generate a brief and organized reflection. DO NOT generate your own extraction result.
**Reflection**:
"""

bad_case_reflection_instruction = PromptTemplate(
    input_variables=["instruction", "text", "original_answer", "correct_answer", "additional_info"],
    template=BAD_CASE_REFLECTION_INSTRUCTION,
)