chore: update
Browse files- .gitattributes +2 -35
- Document.md +767 -0
- README.md +2 -2
- data/input_files/ChineseNewsExample.json +200 -0
- figs/webdemo/demo000.png +3 -0
- figs/webdemo/demo001.png +3 -0
- figs/webdemo/demo010.png +3 -0
- figs/webdemo/demo011.png +3 -0
- figs/webdemo/demo020.png +3 -0
- figs/webdemo/demo021.png +3 -0
- figs/webdemo/demo030.png +3 -0
- figs/webdemo/demo031.png +3 -0
- figs/webdemo/demo040.png +3 -0
- figs/webdemo/demo041.png +3 -0
- figs/webdemo/demo050.png +3 -0
- figs/webdemo/demo051.png +3 -0
- src/models/llm_def.py +2 -2
- src/webui.py +146 -53
.gitattributes
CHANGED
@@ -1,36 +1,3 @@
|
|
1 |
-
|
2 |
-
*.arrow filter=lfs diff=lfs merge=lfs -text
|
3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
4 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
5 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
11 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
12 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
13 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
14 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
15 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
16 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
17 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
18 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
19 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
20 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
21 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
22 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
23 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
24 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
25 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
26 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
27 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
28 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
29 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
30 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
31 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
32 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -textdata/datasets/NYT11/train.json filter=lfs diff=lfs merge=lfs -text
|
36 |
data/input_files/Harry_Potter_Chapter1.pdf filter=lfs diff=lfs merge=lfs -text
|
|
|
|
1 |
+
data/datasets/NYT11/train.json filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
data/input_files/Harry_Potter_Chapter1.pdf filter=lfs diff=lfs merge=lfs -text
|
3 |
+
figs/webdemo/*.png filter=lfs diff=lfs merge=lfs -text
|
Document.md
ADDED
@@ -0,0 +1,767 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
<p align="center">
|
3 |
+
<img src="./figs/logo.png" width="300px">
|
4 |
+
</p>
|
5 |
+
|
6 |
+
<h3 align="center"> A Dockerized Schema-Guided Knowledge Extraction System </h3>
|
7 |
+
|
8 |
+
<p align="center">
|
9 |
+
<a href="https://oneke.openkg.cn/">🌐Web</a> •
|
10 |
+
<a href="http://oneke.openkg.cn/demo.mp4">📹Video</a>
|
11 |
+
</p>
|
12 |
+
|
13 |
+
## Table of Contents
|
14 |
+
- [Table of Contents](#table-of-contents)
|
15 |
+
- [🔔News](#news)
|
16 |
+
- [🌟Overview](#overview)
|
17 |
+
- [🚀Quick Start](#quick-start)
|
18 |
+
- [Step1: Environment Setup](#step1-environment-setup)
|
19 |
+
- [🔩Manual Environment Configuration](#manual-environment-configuration)
|
20 |
+
- [🐳Building With Docker Image](#building-with-docker-image)
|
21 |
+
- [Step2: Start with Examples](#step2-start-with-examples)
|
22 |
+
- [🖊️Start with CLI](#️start-with-cli)
|
23 |
+
- [🖊️Start with Web UI](#️start-with-web-ui)
|
24 |
+
- [📟 Web UI Navigation](#-web-ui-navigation)
|
25 |
+
- [Initial Page](#initial-page)
|
26 |
+
- [Model Configuration](#model-configuration)
|
27 |
+
- [Task Configuration](#task-configuration)
|
28 |
+
- [Text Input](#text-input)
|
29 |
+
- [Set Instruction](#set-instruction)
|
30 |
+
- [Get Result](#get-result)
|
31 |
+
- [🔍Further Usage](#further-usage)
|
32 |
+
- [💡Extraction Task Support](#extraction-task-support)
|
33 |
+
- [1. Named Entity Recognition](#1-named-entity-recognition)
|
34 |
+
- [2. Relation Extraction](#2-relation-extraction)
|
35 |
+
- [3. Event Extraction](#3-event-extraction)
|
36 |
+
- [4. Triple Extraction](#4-triple-extraction)
|
37 |
+
- [Build Knowledge-Graph](#build-knowledge-graph)
|
38 |
+
- [5. Open Domain IE](#5-open-domain-ie)
|
39 |
+
- [💡Data Source Support](#data-source-support)
|
40 |
+
- [💡Extraction Model Support](#extraction-model-support)
|
41 |
+
- [💡Extraction Method Support](#extraction-method-support)
|
42 |
+
- [💡Knowledge Base Configuration](#knowledge-base-configuration)
|
43 |
+
- [1. Schema Repository](#1-schema-repository)
|
44 |
+
- [2. Case Repository](#2-case-repository)
|
45 |
+
- [🛠️Network Issue Solutions](#️network-issue-solutions)
|
46 |
+
- [🎉Contributors](#contributors)
|
47 |
+
- [🌻Acknowledgement](#acknowledgement)
|
48 |
+
|
49 |
+
---
|
50 |
+
|
51 |
+
## 🔔News
|
52 |
+
- **[2025/02] We support the local deployment of the [DeepSeek-R1](https://huggingface.co/collections/deepseek-ai/deepseek-r1-678e1e131c0169c0bc89728d) series in addition to the existing API service, as well as vllm acceleration for other LLMs.**
|
53 |
+
- **[2025/01] [OneKE](https://arxiv.org/abs/2412.20005) is accepted by WWW 2025 Demonstration Track 🎉🎉🎉.**
|
54 |
+
- **[2024/12] We open source the *OneKE* framework, supporting multi-agent knowledge extraction across various scenarios.**
|
55 |
+
- **[2024/04] We release a new bilingual (Chinese and English) schema-based information extraction model called [OneKE](https://huggingface.co/zjunlp/OneKE) based on Chinese-Alpaca-2-13B.**
|
56 |
+
|
57 |
+
## 🌟Overview
|
58 |
+
**OneKE** is a flexible dockerized system for schema-guided knowledge extraction, capable of extracting information from the web and raw PDF books across multiple domains like science and news. It employs a collaborative multi-agent approach and includes a user-customizable knowledge base to enable tailored extraction. Embark on your information extraction journey with OneKE!
|
59 |
+
|
60 |
+
<img src="./figs/main.png" alt="method" style="zoom: 50%;" />
|
61 |
+
|
62 |
+
OneKE currently offers the following features:
|
63 |
+
- [x] Various IE Tasks Support
|
64 |
+
- [x] Various Data Sources Support
|
65 |
+
- [x] Various LLMs Support
|
66 |
+
- [x] Various Extraction Method Support
|
67 |
+
- [x] User-Configurable Knowledge Base
|
68 |
+
|
69 |
+
|
70 |
+
## 🚀Quick Start
|
71 |
+
We have developed a webpage demo for OneKE with Gradio, click [here](http://120.27.214.45:7876/) try information extraction in an intuitive way.
|
72 |
+
|
73 |
+
> Note: The demo only displays OneKE's basic capabilities for efficiency. Consider the local deployment steps below for further features.
|
74 |
+
|
75 |
+
### Step1: Environment Setup
|
76 |
+
OneKE supports both manual and docker image environment configuration, choose your preferred method to build.
|
77 |
+
|
78 |
+
#### 🔩Manual Environment Configuration
|
79 |
+
Conda virtual environments offer a light and flexible setup.
|
80 |
+
|
81 |
+
**Prerequisites**
|
82 |
+
- Anaconda Installation
|
83 |
+
- GPU support (recommended CUDA version: 12.4)
|
84 |
+
|
85 |
+
**Configure Steps**
|
86 |
+
|
87 |
+
1. Clone the repository:
|
88 |
+
```bash
|
89 |
+
git clone https://github.com/zjunlp/OneKE.git
|
90 |
+
```
|
91 |
+
2. Enter the working directory, and all subsequent commands should be executed in this directory.
|
92 |
+
```bash
|
93 |
+
cd OneKE
|
94 |
+
```
|
95 |
+
3. Create a virtual environment using `Anaconda`.
|
96 |
+
```bash
|
97 |
+
conda create -n oneke python=3.9
|
98 |
+
conda activate oneke
|
99 |
+
```
|
100 |
+
4. Install all required Python packages.
|
101 |
+
```bash
|
102 |
+
pip install -r requirements.txt
|
103 |
+
# If you encounter network issues, consider setting up a domestic mirror for pip.
|
104 |
+
```
|
105 |
+
|
106 |
+
#### 🐳Building With Docker Image
|
107 |
+
Docker image provides greater reliability and stability.
|
108 |
+
|
109 |
+
**Prerequisites**
|
110 |
+
- Docker Installation
|
111 |
+
- NVIDIA Container Toolkit
|
112 |
+
- GPU support (recommended CUDA version: 12.4)
|
113 |
+
|
114 |
+
**Configure Steps**
|
115 |
+
1. Clone the repository:
|
116 |
+
```bash
|
117 |
+
git clone https://github.com/zjunlp/OneKE.git
|
118 |
+
```
|
119 |
+
2. Pull the docker image from the mirror repository.
|
120 |
+
```bash
|
121 |
+
docker pull zjunlp/oneke:v4
|
122 |
+
# If you encounter network issues, consider setting up domestic registry mirrors for docker.
|
123 |
+
```
|
124 |
+
3. Launch a container from the image.
|
125 |
+
```bash
|
126 |
+
docker run --gpus all \
|
127 |
+
-v ./OneKE:/app/OneKE \
|
128 |
+
-it oneke:v4 /bin/bash
|
129 |
+
```
|
130 |
+
If using locally deployed models, ensure the local model path is mapped to the container:
|
131 |
+
```bash
|
132 |
+
docker run --gpus all \
|
133 |
+
-v ./OneKE:/app/OneKE \
|
134 |
+
-v your_local_model_path:/app/model/your_model_name \
|
135 |
+
-it oneke:v4 /bin/bash
|
136 |
+
```
|
137 |
+
Map any **necessary local files** to the container paths as shown above, and use **container paths** in your code and execution.
|
138 |
+
|
139 |
+
Upon starting, the container will enter the `/app/OneKE` directory as its working directory. Just modify the code locally as needed, and the changes will sync to the container through mapping.
|
140 |
+
|
141 |
+
### Step2: Start with Examples
|
142 |
+
|
143 |
+
We offer two quick-start options. Choose your preferred method to swiftly explore OneKE with predefined examples.
|
144 |
+
|
145 |
+
> Note:
|
146 |
+
> - **Ensure** that your working directory is set to the **`OneKE`** folder, whether in a virtual environment or a docker container.
|
147 |
+
> - Refer to [here](#network-issue-solutions) to resolve the **network issues**. If you have more questions, feel free to open an issue with us.
|
148 |
+
|
149 |
+
|
150 |
+
#### 🖊️Start with CLI
|
151 |
+
**Step1: Prepare the configuration file**
|
152 |
+
|
153 |
+
Several YAML configuration files are available in the `examples/config`. These extraction scenarios cover different extraction data, methods, and models, allowing you to easily explore all the features of OneKE.
|
154 |
+
|
155 |
+
***Web News Extraction:***
|
156 |
+
|
157 |
+
Here is the example for the web news knowledge extraction scenario, with the source extraction text in `HTML` format:
|
158 |
+
```yaml
|
159 |
+
# model configuration
|
160 |
+
model:
|
161 |
+
category: DeepSeek # model category, chosen from ChatGPT, DeepSeek, LLaMA, Qwen, ChatGLM, MiniCPM, OneKE.
|
162 |
+
model_name_or_path: deepseek-chat # model name, chosen from deepseek-chat and deepseek-reasoner. Choose deepseek-chat to use DeepSeek-V3 or choose deepseek-reasoner to use DeepSeek-R1.
|
163 |
+
api_key: your_api_key # your API key for the model with API service. No need for open-source models.
|
164 |
+
base_url: https://api.deepseek.com # base URL for the API service. No need for open-source models.
|
165 |
+
|
166 |
+
# extraction configuration
|
167 |
+
extraction:
|
168 |
+
task: Base # task type, chosen from Base, NER, RE, EE.
|
169 |
+
instruction: Extract key information from the given text. # description for the task. No need for NER, RE, EE task.
|
170 |
+
use_file: true # whether to use a file for the input text. Default set to false.
|
171 |
+
file_path: ./data/input_files/Tulsi_Gabbard_News.html # path to the input file. No need if use_file is set to false.
|
172 |
+
output_schema: NewsReport # output schema for the extraction task. Selected the from schema repository.
|
173 |
+
mode: customized # extraction mode, chosen from quick, detailed, customized. Default set to quick. See src/config.yaml for more details.
|
174 |
+
update_case: false # whether to update the case repository. Default set to false.
|
175 |
+
show_trajectory: false # whether to display the extracted intermediate steps
|
176 |
+
```
|
177 |
+
|
178 |
+
The `model` section contains information about the extraction model, while the `extraction` section configures the settings for the extraction process.
|
179 |
+
|
180 |
+
You can choose an existing configuration file or customize the extraction settings as you wish. Note that when using an API service like ChatGPT and DeepSeek, please **set your API key**.
|
181 |
+
|
182 |
+
**Step2: Run the shell script**
|
183 |
+
|
184 |
+
Specify the configuration file path and run the code to start the extraction process.
|
185 |
+
```bash
|
186 |
+
config_file=your_yaml_file_path # configuration file path, use the container path if inside a container
|
187 |
+
python src/run.py --config $config_file # start extraction, executed in the OneKE directory
|
188 |
+
```
|
189 |
+
|
190 |
+
If you want to deploy the local models using vllm, run the following code:
|
191 |
+
```bash
|
192 |
+
config_file=your_yaml_file_path # REMEMBER to set vllm_serve to TRUE!
|
193 |
+
python src/models/vllm_serve.py --config $config_file # deploy local model via vllm, executed in the OneKE directory
|
194 |
+
python src/run.py --config $config_file # start extraction, executed in the OneKE directory
|
195 |
+
```
|
196 |
+
|
197 |
+
Refer to [here](https://github.com/zjunlp/OneKE/tree/main/examples/results) to get an overview of the knowledge extraction results.
|
198 |
+
|
199 |
+
> Note: You can also try OneKE by directly running the `example.py` file located in the `example` directory. In this way, you can explore more advanced uses flexibly.
|
200 |
+
|
201 |
+
#### 🖊️Start with Web UI
|
202 |
+
|
203 |
+
> Note: Before starting with the web UI, make sure the package `gradio 4.44.0` is already installed in your [Environment](https://github.com/zjunlp/OneKE/tree/main/requirements.txt).
|
204 |
+
|
205 |
+
**Step1: Execute Command**
|
206 |
+
|
207 |
+
Execute the following commands in the `OneKE` directory:
|
208 |
+
|
209 |
+
```bash
|
210 |
+
python src/webui.py
|
211 |
+
```
|
212 |
+
|
213 |
+
**Step2: Open your Web Browser**
|
214 |
+
|
215 |
+
The front-end is built with Gradio, and the default port of Gradio is 7860. Therefore, please enter the following URL in your browser's address bar to open the web interface:
|
216 |
+
|
217 |
+
```
|
218 |
+
http://127.0.0.1:7860
|
219 |
+
```
|
220 |
+
|
221 |
+
The web service interface is now complete, so you can visually configure tasks and obtain results through it.
|
222 |
+
|
223 |
+
## 📟 Web UI Navigation
|
224 |
+
|
225 |
+
### Initial Page
|
226 |
+
|
227 |
+
<table>
|
228 |
+
<tr>
|
229 |
+
<td><img src="./figs/webdemo/demo000.png"></td>
|
230 |
+
<td><img src="./figs/webdemo/demo001.png"></td>
|
231 |
+
</tr>
|
232 |
+
</table>
|
233 |
+
|
234 |
+
Here are three main function buttons:
|
235 |
+
|
236 |
+
1. `🎲 Quick Start with an Example 🎲`: Quickly get a simple example to try OneKE.
|
237 |
+
2. `Submit`: After configuring your customized tasks, click this button to run.
|
238 |
+
3. `Clear`: When a task is completed, click this button to restore.
|
239 |
+
|
240 |
+
### Model Configuration
|
241 |
+
|
242 |
+
<table>
|
243 |
+
<tr>
|
244 |
+
<td><img src="./figs/webdemo/demo010.png"></td>
|
245 |
+
<td><img src="./figs/webdemo/demo011.png"></td>
|
246 |
+
</tr>
|
247 |
+
</table>
|
248 |
+
|
249 |
+
1. `🪄 Enter your Model`: You can enter your model name here, such as *gpt-4o-mini*, *o3-mini*, *deepseek-chat*, *deepseek-reasoner*, etc. We also support local models — just input the local model path. For more details, please read [this part](#extraction-model-support).
|
250 |
+
2. `🔑 Enter your API-Key`: Enter your model's API key here. We ensure the security of your information. If you are using a local model, you don’t need to fill in this field.
|
251 |
+
3. `🔗 Enter your Base-URL`: We support any custom Base-URL. If you are using the default URL, please leave this field empty.
|
252 |
+
|
253 |
+
### Task Configuration
|
254 |
+
|
255 |
+
<table>
|
256 |
+
<tr>
|
257 |
+
<td><img src="./figs/webdemo/demo020.png"></td>
|
258 |
+
<td><img src="./figs/webdemo/demo021.png"></td>
|
259 |
+
</tr>
|
260 |
+
</table>
|
261 |
+
|
262 |
+
|
263 |
+
1. `🎯 Select your Task`: Choose your task. We support both **Traditional IE** (NER, RE, EE, and Triple for knowledge graph) and **Open Domain IE** such as Web News Extraction, Book Knowledge Extraction, and **any other extraction task** you want. For more details, please read [this part](#extraction-task-support).
|
264 |
+
2. `🧭 Select your Mode`: Choose your extraction method. We offer predefined agent combinations, or you can fully `customize` your own agent strategy. For longer text extraction tasks, we recommend using the `direct mode`; For shorter tasks requiring high accuracy, you can try the `standard mode`. If you're customizing the mode and don't need any agent, simply select `Not Required`. For more details, please read [this part](#extraction-method-support).
|
265 |
+
|
266 |
+
### Text Input
|
267 |
+
|
268 |
+
<table>
|
269 |
+
<tr>
|
270 |
+
<td><img src="./figs/webdemo/demo030.png"></td>
|
271 |
+
<td><img src="./figs/webdemo/demo031.png"></td>
|
272 |
+
</tr>
|
273 |
+
</table>
|
274 |
+
|
275 |
+
1. `📂 Use File`: You can either input the text or upload a file.
|
276 |
+
2. `📖 Upload a File`: If you want to upload a file, make sure the `📂 Use File` is checked first. Then you can drop a file here or click to upload. We support various file formats such as *.pdf*, *.html*, etc. For more details, please read [this part](#data-source-support).
|
277 |
+
3. `📖 Text`: You can enter text in any language here.
|
278 |
+
|
279 |
+
### Set Instruction
|
280 |
+
|
281 |
+
<table>
|
282 |
+
<tr>
|
283 |
+
<td><img src="./figs/webdemo/demo040.png"></td>
|
284 |
+
<td><img src="./figs/webdemo/demo041.png"></td>
|
285 |
+
</tr>
|
286 |
+
</table>
|
287 |
+
|
288 |
+
1. `🕹️ Instruction`: You can enter any type of information you want to extract here, for example: *Please help me extract all the person names*.
|
289 |
+
2. `💰 Update Case`: Check this box if you want to update the **Case Repository**, then you need to provide your truth. For more details, please read [this part](#2-case-repository).
|
290 |
+
3. `🪙 Truth`: You can enter the truth you want LLMs to know, for example: *{"relation_list": [{"head": "Guinea", "tail": "Conakry", "relation": "country capital"}]}*.
|
291 |
+
|
292 |
+
### Get Result
|
293 |
+
|
294 |
+
<table>
|
295 |
+
<tr>
|
296 |
+
<td><img src="./figs/webdemo/demo050.png"></td>
|
297 |
+
<td><img src="./figs/webdemo/demo051.png"></td>
|
298 |
+
</tr>
|
299 |
+
</table>
|
300 |
+
|
301 |
+
After configuring your customized tasks, click the `Submit` button to run. The results will be displayed here:
|
302 |
+
|
303 |
+
1. **`🤔 Generated Schema`**: This is the extraction schema automatically generated by OneKE for your task, presented in a structured **Python Class** format.
|
304 |
+
2. **`😉 Final Answer`**: This is the final extraction result, presented in structured **JSON** format.
|
305 |
+
3. `😵💫 Error capture`: This section displays all error messages caught during the process, such as network issues, etc. For more details, please read [this part](#️network-issue-solutions).
|
306 |
+
|
307 |
+
## 🔍Further Usage
|
308 |
+
### 💡Extraction Task Support
|
309 |
+
You can try different types of information extraction tasks within the OneKE framework.
|
310 |
+
| **Task** | **Description** |
|
311 |
+
| :---: | :---: |
|
312 |
+
| ***Traditional IE*** | |
|
313 |
+
| NER | Named Entity Recognition, identifies and classifies various named entities such as names, locations, and organizations in text. |
|
314 |
+
| RE | Relation Extraction, identifies relationships between entities, and typically returns results as entity-relation-entity triples. |
|
315 |
+
| EE | Event Extraction, identifies events in text, focusing on event triggers and associated participants, known as event arguments. |
|
316 |
+
| Triple | Triple Extraction, identifies subject-predicate-object triples in text. A triple is a fundamental data structure in information extraction, representing a piece of knowledge or fact. Knowledge graph can be quickly constructed after the Triple Extraction. |
|
317 |
+
| ***Open Domain IE***||
|
318 |
+
| Web News Extraction| Involves extracting key entities and events from online news articles to generate structured insights. |
|
319 |
+
| Book Knowledge Extraction | Extracts information such as key concepts, themes, and facts from book chapters. |
|
320 |
+
| Other | Encompasses information extraction from different types of content, such as social media and research papers, each tailored to the specific context and data type. |
|
321 |
+
|
322 |
+
In subsequent code processing, we categorize tasks into four types: `NER` for Named Entity Recognition, `RE` for Relation Extraction, `EE` for Event Extraction, `Triple` for Triple Extraction, and `Base` for any other user-defined open-domain extraction tasks.
|
323 |
+
|
324 |
+
|
325 |
+
#### 1. Named Entity Recognition
|
326 |
+
Named entity recognition seeks to locate and classify named entities mentioned in unstructured text into pre-defined entity types such as person names, organizations, locations, organizations, etc.
|
327 |
+
|
328 |
+
Refer to the case defined in `examples/config/NER.yaml` as an example:
|
329 |
+
| Text | Entity Types |
|
330 |
+
| --- |--- |
|
331 |
+
| Finally, every other year, ELRA organizes a major conference LREC, the International Language Resources and Evaluation Conference. | Algorithm, Conference, Else, Product, Task, Field, Metrics, Organization, Researcher, Program Language, Country, Location, Person, University |
|
332 |
+
|
333 |
+
In this task setting, `Text` represents the text to be extracted, while `Entity Types` denote the constraint on the types of entities to be extracted. Accordingly, we set the `text` and `constraint` attributes in the YAML file to their respective values.
|
334 |
+
|
335 |
+
Next, follow the steps below to complete the NER task:
|
336 |
+
|
337 |
+
- Complete `./examples/config/NER.yaml`:
|
338 |
+
|
339 |
+
configure the necessary model and extraction settings.
|
340 |
+
|
341 |
+
- Run the shell script below:
|
342 |
+
```bash
|
343 |
+
config_file=./examples/config/NER.yaml
|
344 |
+
python src/run.py --config $config_file
|
345 |
+
```
|
346 |
+
( Refer to [issues](#network-issue-solutions) for any network issues. )
|
347 |
+
|
348 |
+
The final extraction result should be:
|
349 |
+
| Text | Conference |
|
350 |
+
| --- | --- |
|
351 |
+
| Finally, every other year, ELRA organizes a major conference LREC, the International Language Resources and Evaluation Conference. | ELRA, LREC, International Language Resources and Evaluation Conference |
|
352 |
+
|
353 |
+
Click [here](https://github.com/zjunlp/OneKE/tree/main/examples/results/NER.json) to obtain the raw results in `json` format.
|
354 |
+
> Note: The actual extraction results may not exactly match this due to LLM randomness.
|
355 |
+
|
356 |
+
The result indicates that, given the text and entity type constraint, entities of type `conference` have been extracted: `ELRA`, `conference`, `International Language Resources and Evaluation Conference`.
|
357 |
+
|
358 |
+
You can either specify entity type constraints or omit them. Without constraints, OneKE will extract all entities from the sentence.
|
359 |
+
|
360 |
+
|
361 |
+
#### 2. Relation Extraction
|
362 |
+
Relationship extraction is the task of extracting semantic relations between entities from a unstructured text.
|
363 |
+
|
364 |
+
Refer to the case defined in `examples/config/RE.yaml` as an example:
|
365 |
+
| Text | Relation Types |
|
366 |
+
| --- |--- |
|
367 |
+
| The aid group Doctors Without Borders said that since Saturday , more than 275 wounded people had been admitted and treated at Donka Hospital in the capital of Guinea , Conakry . | Nationality, Country Capital, Place of Death, Children, Location Contains, Place of Birth, Place Lived, Administrative Division of Country, Country of Administrative Divisions, Company, Neighborhood of, Company Founders |
|
368 |
+
|
369 |
+
In this task setting, `Text` represents the text to be extracted, while `Relation Types` denote the constraint on the types of relations of entities to be extracted. Accordingly, we set the `text` and `constraint` attributes in the YAML file to their respective values.
|
370 |
+
|
371 |
+
Next, follow the steps below to complete the RE task:
|
372 |
+
- Complete `./examples/config/RE.yaml`:
|
373 |
+
configure the necessary model and extraction settings
|
374 |
+
- Run the shell script below:
|
375 |
+
```bash
|
376 |
+
config_file=./examples/config/RE.yaml
|
377 |
+
python src/run.py --config $config_file
|
378 |
+
```
|
379 |
+
( Refer to [issues](#network-issue-solutions) for any network issues. )
|
380 |
+
|
381 |
+
The final extraction result should be:
|
382 |
+
|
383 |
+
| Text | Head Entity | Tail Entity | Relationship |
|
384 |
+
| --- | --- | --- | --- |
|
385 |
+
| The aid group Doctors Without Borders said that since Saturday , more than 275 wounded people had been admitted and treated at Donka Hospital in the capital of Guinea , Conakry . | Guinea | Conakry | Country-Capital |
|
386 |
+
|
387 |
+
Click [here](https://github.com/zjunlp/OneKE/tree/main/examples/results/RE.json) to obtain the raw results in `json` format.
|
388 |
+
> Note: The actual extraction results may not exactly match this due to LLM randomness.
|
389 |
+
|
390 |
+
The result indicates that, the relation `Country-Capital` is extracted from the given text based on the relation list, accompanied by the corresponding head entity `Guinea` and tail entity `Conakry`, which denotes that `Conakry is the capital of Guinea`.
|
391 |
+
|
392 |
+
You can either specify relation type constraints or omit them. Without constraints, OneKE will extract all relation triples from the sentence.
|
393 |
+
|
394 |
+
|
395 |
+
#### 3. Event Extraction
|
396 |
+
Event extraction is the task to extract event type, event trigger words, and event arguments from a unstructed text, which is a more complex IE task compared to the first two.
|
397 |
+
|
398 |
+
Refer to the case defined in `examples/config/EE.yaml` as an example:
|
399 |
+
|
400 |
+
The extraction text is:
|
401 |
+
|
402 |
+
```
|
403 |
+
UConn Health , an academic medical center , says in a media statement that it identified approximately 326,000 potentially impacted individuals whose personal information was contained in the compromised email accounts.
|
404 |
+
```
|
405 |
+
while the event type constraint is formatted as follows:
|
406 |
+
| Event Type | Event Argument |
|
407 |
+
| --- | --- |
|
408 |
+
| phishing | damage amount, attack pattern, tool, victim, place, attacker, purpose, trusted entity, time |
|
409 |
+
| data breach | damage amount, attack pattern, number of data, number of victim, tool, compromised data, victim, place, attacker, purpose, time |
|
410 |
+
| ransom | damage amount, attack pattern, payment method, tool, victim, place, attacker, price, time |
|
411 |
+
| discover vulnerability | vulnerable system, vulnerability, vulnerable system owner, vulnerable system version, supported platform, common vulnerabilities and exposures, capabilities, time, discoverer |
|
412 |
+
| patch vulnerability | vulnerable system, vulnerability, issues addressed, vulnerable system version, releaser, supported platform, common vulnerabilities and exposures, patch number, time, patch |
|
413 |
+
|
414 |
+
Each event type has its own corresponding event arguments.
|
415 |
+
|
416 |
+
Next, follow the steps below to complete the EE task:
|
417 |
+
- Complete `./examples/config/EE.yaml`:
|
418 |
+
configure the necessary model and extraction settings
|
419 |
+
- Run the shell script below:
|
420 |
+
```bash
|
421 |
+
config_file=./examples/config/EE.yaml
|
422 |
+
python src/run.py --config $config_file
|
423 |
+
```
|
424 |
+
( Refer to [issues](#network-issue-solutions) for any network issues. )
|
425 |
+
|
426 |
+
The final extraction result should be:
|
427 |
+
|
428 |
+
<table>
|
429 |
+
<tr>
|
430 |
+
<th>Text</th>
|
431 |
+
<th>Event Type</th>
|
432 |
+
<th>Event Trigger</th>
|
433 |
+
<th>Argument</th>
|
434 |
+
<th>Role</th>
|
435 |
+
</tr>
|
436 |
+
<tr>
|
437 |
+
<td rowspan="4">UConn Health , an academic medical center , says in a media statement that it identified approximately 326,000 potentially impacted individuals whose personal information was contained in the compromised email accounts.</td>
|
438 |
+
<td rowspan="4">data breach</td>
|
439 |
+
<td rowspan="4">compromised</td>
|
440 |
+
<td>email accounts</td>
|
441 |
+
<td>compromised data</td>
|
442 |
+
</tr>
|
443 |
+
<tr>
|
444 |
+
<td>326,000</td>
|
445 |
+
<td>number of victim</td>
|
446 |
+
</tr>
|
447 |
+
<tr>
|
448 |
+
<td>individuals</td>
|
449 |
+
<td>victim</td>
|
450 |
+
</tr>
|
451 |
+
<tr>
|
452 |
+
<td>personal information</td>
|
453 |
+
<td>compromised data</td>
|
454 |
+
</tr>
|
455 |
+
</table>
|
456 |
+
|
457 |
+
Click [here](https://github.com/zjunlp/OneKE/tree/main/examples/results/EE.json) to obtain the raw results in `json` format.
|
458 |
+
> Note: The actual extraction results may not exactly match this due to LLM randomness.
|
459 |
+
|
460 |
+
The extraction results show that the `data breach` event is identified using the trigger `compromised`, and the specific contents of different event arguments such as `compromised data` and `victim` have also been extracted.
|
461 |
+
|
462 |
+
You can either specify event constraints or omit them. Without constraints, OneKE will extract all events from the sentence.
|
463 |
+
|
464 |
+
#### 4. Triple Extraction
|
465 |
+
|
466 |
+
Triple Extraction identifies subject-predicate-object triples in text. A triple is a fundamental data structure in information extraction, representing a piece of knowledge or a fact. Knowledge Graph (KG) can be quickly constructed after the Triple Extraction.
|
467 |
+
|
468 |
+
Here is an example:
|
469 |
+
|
470 |
+
| Text | Subject Entity Types | Relation Types | Object Entity Types |
|
471 |
+
| ------------------------------------------------------------ | -------------------- | ---------------- | ------------------- |
|
472 |
+
| The international conference on renewable energy technologies was held in Berlin. Several researchers presented their findings, discussing new innovations and challenges. The event was attended by experts from all over the world, and it is expected to continue in various locations. | Event, Person | Action, Location | Place, Concept |
|
473 |
+
|
474 |
+
The final extraction result should be:
|
475 |
+
|
476 |
+
| Subject Entity | Relation | Object Entity |
|
477 |
+
| -------------------- | --------------------------- | ------------------------- |
|
478 |
+
| Conference (Event) | was held in (Location) | Berlin (Place) |
|
479 |
+
| Researchers (Person) | presented (Action) | findings (Concept) |
|
480 |
+
| Researchers (Person) | discussed (Action) | innovations (Concept) |
|
481 |
+
| Conference (Event) | will continue in (Location) | various locations (Place) |
|
482 |
+
| Experts (Person) | attended (Action) | event (Event) |
|
483 |
+
| Event (Event) | is attended by (Location) | experts (Person) |
|
484 |
+
|
485 |
+
Let's start in OneKE ~
|
486 |
+
|
487 |
+
The constraint can be customed as multiple styles, and it's formatted as follows:
|
488 |
+
|
489 |
+
* Define `entity types` only:
|
490 |
+
|
491 |
+
If you only need to specify the entity types, the `constraint` should be a single list of strings representing the different entity types.
|
492 |
+
|
493 |
+
```python
|
494 |
+
["Person", "Place", "Event", "property"]
|
495 |
+
```
|
496 |
+
|
497 |
+
* Define `entity types` and `relation types`:
|
498 |
+
|
499 |
+
If you need to specify both entity types and relation types, the `constraint` should be a nested list. The first list contains the entity types, and the second list contains the relation types.
|
500 |
+
|
501 |
+
```python
|
502 |
+
[["Person", "Place", "Event", "property"], ["Interpersonal", "Located", "Ownership", "Action"]]
|
503 |
+
```
|
504 |
+
|
505 |
+
* Define `subject entities types`, `relation types`, and `object entities types`:
|
506 |
+
|
507 |
+
If you need to define the types of subject entities, relation types, and object entities, the `constraint` should be a nested list. The first list contains the subject entity types, the second list contains the relation types, and the third list contains the object entity types.
|
508 |
+
|
509 |
+
```python
|
510 |
+
[["Person"], ["Interpersonal", "Ownership"], ["Person", "property"]]
|
511 |
+
```
|
512 |
+
|
513 |
+
Next, follow the steps below to complete the Triple extraction task:
|
514 |
+
|
515 |
+
- Complete `./examples/config/Triple2KG.yaml`:
|
516 |
+
|
517 |
+
configure the necessary model and extraction settings.
|
518 |
+
|
519 |
+
- Run the shell script below:
|
520 |
+
|
521 |
+
```bash
|
522 |
+
config_file=./examples/config/Triple2KG.yaml
|
523 |
+
python src/run.py --config $config_file
|
524 |
+
```
|
525 |
+
|
526 |
+
( Refer to [issues](#network-issue-solutions) for any network issues. )
|
527 |
+
|
528 |
+
Here is an [example](https://github.com/zjunlp/OneKE/tree/main/examples/config/Triple2KG.yaml) to start. And access a raw [results](https://github.com/zjunlp/OneKE/tree/main/examples/results/TripleExtraction.json) in JSON format here.
|
529 |
+
|
530 |
+
> ⚠️ Warning: If you do not intend to build a Knowledge Graph, make sure to remove or comment out the construct field in the yaml file. This will help avoid errors related to database connection issues.
|
531 |
+
|
532 |
+
##### Build Knowledge-Graph
|
533 |
+
|
534 |
+
✨ If you need to construct your Knowledge Graph (KG) **with your Triple Extraction result**, you can refer to this [example](https://github.com/zjunlp/OneKE/tree/main/examples/config/Triple2KG.yaml) for guidance. Mimic this example and add the `construct` field. Just update the field with your own database parameters.
|
535 |
+
|
536 |
+
```yaml
|
537 |
+
construct: # (Optional) If you want to construct a Knowledge Graph, you need to set the construct field, or you must delete this field.
|
538 |
+
database: Neo4j # your database type.
|
539 |
+
url: neo4j://localhost:7687 # your database URL,Neo4j's default port is 7687.
|
540 |
+
username: your_username # your database username.
|
541 |
+
password: "your_password" # your database password.
|
542 |
+
```
|
543 |
+
|
544 |
+
Once your database is set up, you can access your graph database through a browser. For Neo4j, the web interface connection URL is usually:
|
545 |
+
|
546 |
+
```
|
547 |
+
http://localhost:7474/browser
|
548 |
+
```
|
549 |
+
|
550 |
+
For additional information regarding the Neo4j database, please refer to it's [documentation](https://neo4j.com/docs).
|
551 |
+
|
552 |
+
> ⚠️ Warning Again: If you do not intend to build a Knowledge Graph, make sure to remove or comment out the construct field in the yaml file. This will help avoid errors related to database connection issues.
|
553 |
+
|
554 |
+
#### 5. Open Domain IE
|
555 |
+
|
556 |
+
This type of task is represented as `Base` in the code, signifying any other user-defined open-domain extraction tasks.
|
557 |
+
|
558 |
+
We refer to the [example](#step1-prepare-the-configuration-file) above for guidance.
|
559 |
+
|
560 |
+
In the context of customized **Web News Extraction**, we first set the extraction instruction to `Extract key information from the given text`, and provide the file path to extract content from the file. We specify the output schema from the schema repository as the predefined `NewsReport`, and then proceed with the extraction.
|
561 |
+
|
562 |
+
Next, follow the steps below to complete this task:
|
563 |
+
- Complete `./examples/config/NewsExtraction.yaml `:
|
564 |
+
configure the necessary model and extraction settings
|
565 |
+
- Run the shell script below:
|
566 |
+
```bash
|
567 |
+
config_file=./examples/config/NewsExtraction.yaml
|
568 |
+
python src/run.py --config $config_file
|
569 |
+
```
|
570 |
+
( Refer to [issues](#network-issue-solutions) for any network issues. )
|
571 |
+
|
572 |
+
Here is an excerpt of the extracted content:
|
573 |
+
| **Title** | Meet Trump's pick for director of national intelligence |
|
574 |
+
|----------------------------------|--------------------------------------------------------------------------------|
|
575 |
+
| **Summary** | Tulsi Gabbard, chosen by President-elect Donald Trump for director of national intelligence, faces a Senate confirmation challenge due to her lack of experience and controversial views. Accusations include promoting an anti-American agenda and having troubling ties with U.S. adversaries. |
|
576 |
+
| **Publication Date** | 2024-12-04T17:06:00Z |
|
577 |
+
| **Keywords** | Tulsi Gabbard; director of national intelligence; Donald Trump; Senate confirmation; intelligence agencies |
|
578 |
+
| **Events** | Tulsi Gabbard's nomination leads to a Senate confirmation battle due to controversies. |
|
579 |
+
| **People Involved** | Tulsi Gabbard: Nominee for director of national intelligence; Donald Trump: President-elect; Tammy Duckworth: Democratic Senator; Olivia Troye: Former Trump administration national security official |
|
580 |
+
| **Quotes** | "The U.S. intelligence community has identified her as having troubling relationships with America’s foes."; "If Gabbard is confirmed, America’s allies may not share as much information with the U.S." |
|
581 |
+
| **Viewpoints** | Gabbard's nomination is considered alarming and dangerous for U.S. national security; Her anti-war stance and criticism of military interventions draw both support and criticism. |
|
582 |
+
|
583 |
+
Click [here](https://github.com/zjunlp/OneKE/tree/main/examples/results/NewsExtraction.json) to obtain the raw results in `json` format.
|
584 |
+
> Note: The actual extraction results may not exactly match this due to LLM randomness.
|
585 |
+
|
586 |
+
In contrast to eariler tasks, the `Base-Type` Task requires you to provide an explicit `Instruction` that clearly defines your extraction task, while not allowing the setting of `constraint` values.
|
587 |
+
|
588 |
+
|
589 |
+
|
590 |
+
### 💡Data Source Support
|
591 |
+
You can choose source texts of various lengths and forms for extraction.
|
592 |
+
| **Source Format** | **Description** |
|
593 |
+
| :---: | :---: |
|
594 |
+
| Plain Text | String form of raw natural language text. |
|
595 |
+
| HTML Source | Markup language for structuring web pages. |
|
596 |
+
| PDF File | Portable format for fixed-layout documents. |
|
597 |
+
| Word File | Microsoft Word document format, with rich text. |
|
598 |
+
| TXT File | Basic text format, easily opened and edited. |
|
599 |
+
| Json File | Lightweight format for structured data interchange. |
|
600 |
+
|
601 |
+
In practice, you can use the YAML file configuration to handle different types of text input:
|
602 |
+
- **Plain Text**: Set `use_file` to `false` and enter the text to be extracted in the `text` field.
|
603 |
+
For example:
|
604 |
+
```yaml
|
605 |
+
use_file: false
|
606 |
+
text: Finally , every other year , ELRA organizes a major conference LREC , the International Language Resources and Evaluation Conference .
|
607 |
+
```
|
608 |
+
- **File Content**: Set `use_file` to `true` and specify the file path in `file_path` for the text to be extracted.
|
609 |
+
For example:
|
610 |
+
```yaml
|
611 |
+
use_file: true
|
612 |
+
file_path: ./data/input_files/Tulsi_Gabbard_News.html
|
613 |
+
```
|
614 |
+
|
615 |
+
|
616 |
+
### 💡Extraction Model Support
|
617 |
+
You can choose from various open-source or proprietary model APIs to perform information extraction tasks.
|
618 |
+
> Note: For complex IE tasks, we recommend using powerful models like **OpenAI**'s or or **large-scale** open-source LLMs.
|
619 |
+
|
620 |
+
| **Model** | **Description** |
|
621 |
+
| :---: | :---: |
|
622 |
+
| ***API Service*** | |
|
623 |
+
| OpenAI | A series of GPT foundation models offered by OpenAI, such as GPT-3.5 and GPT-4-turbo, which are renowned for their outstanding capabilities in natural language processing. |
|
624 |
+
| DeepSeek | High-performance LLMs that have demonstrated exceptional capabilities in both English and Chinese benchmarks. |
|
625 |
+
| ***Local Deploy***|
|
626 |
+
| LLaMA3-Instruct series| Meta's series of large language models, with tens to hundreds of billions of parameters, have shown advanced performance on industry-standard benchmarks. |
|
627 |
+
| Qwen2.5-Instruct series| LLMs developed by the Qwen team, come in various parameter sizes and exhibit strong capabilities in both English and Chinese. |
|
628 |
+
| ChatGLM4-9B | The latest model series by the Zhipu team, which achieve breakthroughs in multiple metrics, excel as bilingual (Chinese-English) chat models. |
|
629 |
+
| MiniCPM3-4B | A lightweight language model with 4B parameters, matches or even surpasses 7B-9B models in most evaluation benchmarks.|
|
630 |
+
| OneKE | A large-scale model for knowledge extraction jointly developed by Ant Group and Zhejiang University.
|
631 |
+
| DeepSeek-R1 series| A bilingual Chinese-English strong reasoning model series provided by DeepSeek, featuring the original DeepSeek-R1 and various distilled versions based on smaller models. |
|
632 |
+
> Note: We recommend deploying the DeepSeek-R1 models with VLLM.
|
633 |
+
|
634 |
+
|
635 |
+
|
636 |
+
|
637 |
+
In practice, you can use the YAML file configuration to employ various LLMs:
|
638 |
+
- **API Service**: Set the `model_name_or_path` to the available model name provided by the company, and enter your `api_key` as well as the `base_url`.
|
639 |
+
For exmaple:
|
640 |
+
```yaml
|
641 |
+
model:
|
642 |
+
category: DeepSeek # model category, chosen from ChatGPT and DeepSeek
|
643 |
+
model_name_or_path: deepseek-chat # model name, chosen from deepseek-chat and deepseek-reasoner. Choose deepseek-chat to use DeepSeek-V3 or choose deepseek-reasoner to use DeepSeek-R1.
|
644 |
+
api_key: your_api_key # your API key for the model with API service.
|
645 |
+
base_url: https://api.deepseek.com # base URL for the API service. No need for open-source models.
|
646 |
+
```
|
647 |
+
- **Local Deploy**: Set the `model_name_or_path` to either the model name on Hugging Face or the path to the local model. We support using either `Transformer` or `vllm` to access the models.
|
648 |
+
- Transformer Example:
|
649 |
+
```yaml
|
650 |
+
model:
|
651 |
+
category: LLaMA # model category, chosen from LLaMA, Qwen, ChatGLM, MiniCPM, OneKE.
|
652 |
+
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct # model name to download from huggingface or use the local model path.
|
653 |
+
vllm_serve: false # whether to use the vllm. Default set to false.
|
654 |
+
```
|
655 |
+
Note that the category of deployment model **must** be chosen from LLaMA, Qwen, ChatGLM, MiniCPM, OneKE.
|
656 |
+
- VLLM Example:
|
657 |
+
```yaml
|
658 |
+
model:
|
659 |
+
category: DeepSeek # model category
|
660 |
+
model_name_or_path: meta-llama/Meta-Llama-3-8B-Instruct # model name to download from huggingface or use the local model path.
|
661 |
+
vllm_serve: true # whether to use the vllm. Default set to false.
|
662 |
+
```
|
663 |
+
Note that the **DeepSeek-R1** series models only support **VLLM** deployment. Remember to **start the VLLM service** before running the extraction task. The reference code is as follows:
|
664 |
+
```shell
|
665 |
+
config_file=your_yaml_file_path # REMEMBER to set vllm_serve to TRUE!
|
666 |
+
python src/models/vllm_serve.py --config $config_file # deploy local model via vllm, executed in the OneKE directory
|
667 |
+
```
|
668 |
+
You can also run the command `vllm serve model_name_or_path` directly to start the VLLM service. See the [official documents](https://docs.vllm.ai/en/latest/getting_started/quickstart.html) for more details.
|
669 |
+
|
670 |
+
### 💡Extraction Method Support
|
671 |
+
You can freely combine different extraction methods to complete the information extraction task.
|
672 |
+
| **Method** | **Description** |
|
673 |
+
| :---: | :---: |
|
674 |
+
| ***Schema Agent*** | |
|
675 |
+
| Default Schema | Use the default JSON output format. |
|
676 |
+
| Predefined Schema | Utilize the predefined output schema retrieved from the knowledge base. |
|
677 |
+
| Self Schema Deduction | Generate the output schema by inferring from the task description and the source text. |
|
678 |
+
| ***Extraction Agent***|
|
679 |
+
| Direct IE | Directly extract information from the given text based on the task description. |
|
680 |
+
| Case Retrieval | Retrieve similar good cases from the knowledge base to aid in the extraction. |
|
681 |
+
| ***Reflection Agent***|
|
682 |
+
| No Reflection| Directly return the extraction results. |
|
683 |
+
| Case Reflection | Use the self-consistency approach, and if inconsistencies appear, reflect on the original answer by retrieving similar bad cases from the knowledge base. |
|
684 |
+
|
685 |
+
The configuration for detail extraction methods and mode information can be found in `src/config.yaml`. You can customize the extraction methods by modifying the `customized` within this file and set the `mode` to customize in an external configuration file.
|
686 |
+
|
687 |
+
For example, first configure the `src/config.yaml` as follows:
|
688 |
+
```yaml
|
689 |
+
# src/config.yaml
|
690 |
+
customized:
|
691 |
+
schema_agent: get_deduced_schema
|
692 |
+
extraction_agent: extract_information_direct
|
693 |
+
reflection_agent: reflect_with_case
|
694 |
+
```
|
695 |
+
Then, set the `mode` of your custom extraction task in `examples/customized.yaml` to `customized`:
|
696 |
+
|
697 |
+
```yaml
|
698 |
+
# examples/customized.yaml
|
699 |
+
mode: customized
|
700 |
+
```
|
701 |
+
This allows you to experience the customized extraction methods.
|
702 |
+
|
703 |
+
> Tips:
|
704 |
+
> - For longer text extraction tasks, we recommend using the `direct mode` to avoid issues like attention dispersion and increased processing time.
|
705 |
+
> - For shorter tasks requiring high accuracy, you can try the `standard mode` to ensure precision.
|
706 |
+
|
707 |
+
|
708 |
+
### 💡Knowledge Base Configuration
|
709 |
+
#### 1. Schema Repository
|
710 |
+
You can view the predefined schemas within the `src/modules/knowledge_base/schema_repository.py` file. The Schema Repository is designed to be easily extendable. You just need to define your output schema in the form of a pydantic class following the format defined in the file, and it can be directly used in subsequent extractions.
|
711 |
+
|
712 |
+
For example, add a new schema in the schema repository:
|
713 |
+
```python
|
714 |
+
# src/modules/knowledge_base/schema_repository.py
|
715 |
+
class ChemicalSubstance(BaseModel):
|
716 |
+
name: str = Field(description="Name of the chemical substance")
|
717 |
+
formula: str = Field(description="Molecular formula")
|
718 |
+
appearance: str = Field(description="Physical appearance")
|
719 |
+
uses: List[str] = Field(description="Primary uses")
|
720 |
+
hazards: str = Field(description="Hazard classification")
|
721 |
+
|
722 |
+
class ChemicalList(BaseModel):
|
723 |
+
chemicals: List[ChemicalSubstance] = Field(description="List of chemicals")
|
724 |
+
```
|
725 |
+
|
726 |
+
Then, set the method for `schema_agent` under `customized` to `get_retrieved_schema` in `src/config.yaml`. Finally, set the `mode` to `customized` in the external configuration file to enable custom schema extraction.
|
727 |
+
|
728 |
+
In this example, the extraction results will be a list of **chemical substances** that strictly adhere to the defined schema, ensuring a high level of accuracy and flexibility in the extraction results.
|
729 |
+
|
730 |
+
Note that the names of newly created objects **should not conflict with** existing ones.
|
731 |
+
|
732 |
+
#### 2. Case Repository
|
733 |
+
You can directly view the case storage in the `src/modules/knowledge_base/case_repository.json` file, but we do not recommend modifying it directly.
|
734 |
+
|
735 |
+
The Case Repository is automatically updated with each extraction process once setting `update_repository` to `True` in the configuration file.
|
736 |
+
|
737 |
+
When updating the Case Repository, you must provide external feedback to generate case information, either by including truth answer in the configuration file or during the extraction process.
|
738 |
+
|
739 |
+
Here is an example:
|
740 |
+
```yaml
|
741 |
+
# examples/config/RE.yaml
|
742 |
+
truth: {"relation_list": [{"head": "Guinea", "tail": "Conakry", "relation": "country capital"}]} # Truth data for the relation
|
743 |
+
update_case: true
|
744 |
+
```
|
745 |
+
|
746 |
+
After extraction, OneKE compares results with the truth answer, generates analysis, and finally stores the case in the repository.
|
747 |
+
|
748 |
+
|
749 |
+
## 🛠️Network Issue Solutions
|
750 |
+
Here are some network issues you might encounter and the corresponding solutions.
|
751 |
+
|
752 |
+
- Pip Installation Failure: Use mirror websites, run the command as `pip install -i [mirror-source] ...`.
|
753 |
+
- Docker Image Pull Failure: Configure the docker daemon to add repository mirrors.
|
754 |
+
- Nltk Download Failure: Manually download the `nltk` package and place it in the proper directory.
|
755 |
+
- Model Dowload Failure: Use the `Hugging Face Mirror` site or `ModelScope` to download model, and specify the local path to the model when using it.
|
756 |
+
> Note: We use `all-MiniLM-L6-v2` model by default for case matching, so it needs to be downloaded during execution. If network issues occur, manually download the model, and update the `embedding_model` to its local path in the `src/config.yaml` file.
|
757 |
+
|
758 |
+
|
759 |
+
## 🎉Contributors
|
760 |
+
|
761 |
+
[Ningyu Zhang](https://person.zju.edu.cn/en/ningyu), [Haofen Wang](https://tjdi.tongji.edu.cn/TeacherDetail.do?id=4991&lang=_en), Yujie Luo, Xiangyuan Ru, Kangwei Liu, Lin Yuan, Mengshu Sun, Lei Liang, Zhiqiang Zhang, Jun Zhou, Lanning Wei, Da Zheng, Huajun Chen.
|
762 |
+
|
763 |
+
We deeply appreciate the collaborative efforts of everyone involved. We will continue to enhance and maintain this repository over the long term. If you encounter any issues, feel free to submit them to us!
|
764 |
+
|
765 |
+
|
766 |
+
## 🌻Acknowledgement
|
767 |
+
We reference [itext2kg](https://github.com/AuvaLab/itext2kg) to aid in building the schema repository and utilize tools from [LangChain](https://www.langchain.com/) for file parsing. The experimental datasets we use are curated from the [IEPile](https://huggingface.co/datasets/zjunlp/iepile) repository. We appreciate their valuable contributions!
|
README.md
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
---
|
2 |
title: OneKE
|
3 |
-
emoji:
|
4 |
colorFrom: blue
|
5 |
colorTo: indigo
|
6 |
sdk: gradio
|
7 |
-
sdk_version: 5.
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
|
|
1 |
---
|
2 |
title: OneKE
|
3 |
+
emoji: 👌
|
4 |
colorFrom: blue
|
5 |
colorTo: indigo
|
6 |
sdk: gradio
|
7 |
+
sdk_version: 5.23.3
|
8 |
app_file: app.py
|
9 |
pinned: false
|
10 |
license: mit
|
data/input_files/ChineseNewsExample.json
ADDED
@@ -0,0 +1,200 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{"title_id": "33366", "title": "不熄灯的长沙“钥匙阿姨”,坚持凡人善举21年", "topic": "社会", "event_info": [{"event1_trigger": "坚持", "event1_triple": "'长沙“钥匙阿姨”', '坚持', '凡人善举'"}]}
|
2 |
+
{"title_id": "9116", "title": "“迎新春,庆元旦”扈胡镇新时代文明实践所举办掼蛋比赛", "topic": "社会", "event_info": [{"event1_trigger": "举办", "event1_triple": "'扈胡镇新时代文明实践所', '举办', '掼蛋比赛'"}]}
|
3 |
+
{"title_id": "6199", "title": "寿仙谷上涨7%,现报62.06元", "topic": "财经", "event_info": [{"event1_trigger": "上涨", "event1_triple": "'寿仙谷', '上涨', ''"}]}
|
4 |
+
{"title_id": "32775", "title": "津媒:亚泰退出亚冠 是更现实的选择!打好联赛是最基本的诉求", "topic": "体育", "event_info": [{"event1_trigger": "退出", "event1_triple": "'亚泰', '退出', '亚冠'"}]}
|
5 |
+
{"title_id": "9930", "title": "联想童夫尧亮相2021人民财经高峰论坛,以“新IT”塑造新时代高质量增长曲线", "topic": "科技", "event_info": [{"event1_trigger": "亮相", "event1_triple": "'童夫尧', '亮相', '2021人民财经高峰论坛'"}]}
|
6 |
+
{"title_id": "12550", "title": "深夜突发!韩国政坛大洗牌,文在寅在做拼死一搏!", "topic": "时事", "event_info": [{"event1_trigger": "洗牌", "event1_triple": "'韩国政坛', '洗牌', ''"}, {"event2_trigger": "做拼死一搏", "event2_triple": "'文在寅', '做拼死一搏', ''"}]}
|
7 |
+
{"title_id": "27402", "title": "国内航线燃油附加费复征两个月后将取消,已有30家航司发布通知", "topic": "时事", "event_info": [{"event1_trigger": "复征", "event1_triple": "'国内航线燃油附加费', '复征', ''"}, {"event2_trigger": "将取消", "event2_triple": "'国内航线燃油附加费复征', '将取消', ''"}, {"event3_trigger": "发布", "event3_triple": "'30家航司', '发布', '通知'"}]}
|
8 |
+
{"title_id": "41547", "title": "央行开展2100亿元“MLF+逆回购”,专家:预计二季度降息可能性大", "topic": "财经", "event_info": [{"event1_trigger": "开展", "event1_triple": "'央行', '开展', '2100亿元“MLF+逆回购”'"}, {"event2_trigger": "预计", "event2_triple": "'专家', '预计', 'MLF二季度降息可能性大'"}]}
|
9 |
+
{"title_id": "36089", "title": "追讨欠款19亿!恒大把贵州首富公司给告了", "topic": "财经", "event_info": [{"event1_trigger": "把给告了", "event1_triple": "'恒大', '把给告了', '贵州首富公司'"}, {"event2_trigger": "追讨", "event2_triple": "'恒大', '追讨', '欠款'"}]}
|
10 |
+
{"title_id": "4840", "title": "河北廊坊开发区开展元旦期间安全检查", "topic": "社会", "event_info": [{"event1_trigger": "开展", "event1_triple": "'廊坊开发区', '开展', '安全检查'"}]}
|
11 |
+
{"title_id": "25899", "title": "县委常委、宣传部长纵文华到葛集镇开展“新春访万企”活动", "topic": "社会", "event_info": [{"event1_trigger": "到", "event1_triple": "'县委常委宣传部长', '到', '葛集镇'"}, {"event2_trigger": "开展", "event2_triple": "'县委常委宣传部长', '开展', '“新春访万企”活动'"}]}
|
12 |
+
{"title_id": "6084", "title": "意天空盘点史上冬窗最贵转会:库蒂尼奥1.45亿欧转会巴萨居首", "topic": "体育", "event_info": [{"event1_trigger": "盘点", "event1_triple": "'意天空', '盘点', '史上冬窗最贵转会'"}, {"event2_trigger": "转会", "event2_triple": "'库蒂尼奥', '转会', '巴萨'"}]}
|
13 |
+
{"title_id": "22873", "title": "海南榴莲种植面积突破3万亩 专家:榴莲可做为特色产业适当发展", "topic": "三农", "event_info": [{"event1_trigger": "突破3万亩", "event1_triple": "'海南榴莲种植面积', '突破3万亩', ''"}]}
|
14 |
+
{"title_id": "28401", "title": "“中超莫德里奇”回来了!张修维真的被郑智成功改造了回去", "topic": "体育", "event_info": [{"event1_trigger": "回来了", "event1_triple": "'“中超莫德里奇”', '回来了', ''"}, {"event2_trigger": "被成功改造", "event2_triple": "'张修维', '被成功改造', '郑智'"}]}
|
15 |
+
{"title_id": "10786", "title": "【市场】特斯拉去年销量仅百万,350元哨子上架", "topic": "科技", "event_info": [{"event1_trigger": "上架", "event1_triple": "'特斯拉350元哨子', '上架', ''"}]}
|
16 |
+
{"title_id": "41896", "title": "内蒙古自治区托克托县发布大雾黄色预警", "topic": "天气", "event_info": [{"event1_trigger": "发布", "event1_triple": "'内蒙古自治区托克托县', '发布', '大雾黄色预警'"}]}
|
17 |
+
{"title_id": "35642", "title": "黑龙江省哈尔滨市通河县发布大风蓝色预警【2022-03-06】", "topic": "天气", "event_info": [{"event1_trigger": "发布", "event1_triple": "'哈尔滨市通河县', '发布', '大风蓝色预警'"}]}
|
18 |
+
{"title_id": "23802", "title": "理发师已就位!CBA顺德赛区为球员安排理发活动", "topic": "体育", "event_info": [{"event1_trigger": "为安排理发活动", "event1_triple": "'CBA顺德赛区', '为安排理发活动', '球员'"}, {"event2_trigger": "已就位", "event2_triple": "'理发师', '已就位', ''"}]}
|
19 |
+
{"title_id": "37583", "title": "小摩全线下调中国互联网板块评级 互联网OUT了?", "topic": "财经", "event_info": [{"event1_trigger": "下调", "event1_triple": "'小摩', '下调', '中国互联网板块评级'"}]}
|
20 |
+
{"title_id": "7872", "title": "华宝中证800地产ETF净值上涨1.76% 请保持关注", "topic": "财经", "event_info": [{"event1_trigger": "上涨", "event1_triple": "'华宝中证800地产ETF净值', '上涨', ''"}]}
|
21 |
+
{"title_id": "12844", "title": "政协上蔡县第十届委员会常务委员会第二十一次会议召开", "topic": "时事", "event_info": [{"event1_trigger": "召开", "event1_triple": "'政协上蔡县第十届委员会常务委员会第二十一次会议', '召开', ''"}]}
|
22 |
+
{"title_id": "35896", "title": "碧水源跌0.16%,创年度新低,报6.17元", "topic": "财经", "event_info": [{"event1_trigger": "跌0.16%", "event1_triple": "'碧水源', '跌0.16%', ''"}, {"event2_trigger": "创", "event2_triple": "'碧水源', '创', '年度新低'"}, {"event3_trigger": "报6.17元", "event3_triple": "'碧水源', '报6.17元', ''"}]}
|
23 |
+
{"title_id": "28452", "title": "上饶一男子,致2名交警受伤!", "topic": "社会", "event_info": [{"event1_trigger": "致受伤", "event1_triple": "'上饶一男子', '致受伤', '2名交警'"}]}
|
24 |
+
{"title_id": "12468", "title": "管理产品近3年平均收益超230% 金牛名将李晓星新品银华心兴1月4日起发行", "topic": "财经", "event_info": [{"event1_trigger": "发行", "event1_triple": "'银华心兴', '发行', ''"}, {"event2_trigger": "管理", "event2_triple": "'李晓星', '管理', '产品'"}, {"event3_trigger": "超230%", "event3_triple": "'李晓星管理产品平均收益', '超230%', ''"}]}
|
25 |
+
{"title_id": "19125", "title": "热闹!湖人官宣3方大交易,王哲林成筹码,下一步交易威少塔克?", "topic": "体育", "event_info": [{"event1_trigger": "官宣", "event1_triple": "'湖人', '官宣', '3方大交易'"}, {"event2_trigger": "成", "event2_triple": "'王哲林', '成', '筹码'"}]}
|
26 |
+
{"title_id": "9161", "title": "*ST围海(002586.SZ):副总经理、财务总监陈静玉辞职 由王可飞接任", "topic": "财经", "event_info": [{"event1_trigger": ":", "event1_triple": "'*ST围海', ':', '副总经理、财务总监陈静玉辞职由王可飞接任'"}, {"event2_trigger": "辞职", "event2_triple": "'陈静玉', '辞职', ''"}, {"event3_trigger": "接任", "event3_triple": "'王可飞', '接任', 'ST围海副总经理'"}]}
|
27 |
+
{"title_id": "17910", "title": "中远海运大连投资与中远海运物流签署长兴岛化学品物流园项目合资协议", "topic": "财经", "event_info": [{"event1_trigger": "签署", "event1_triple": "'中远海运大连投资与中远海运物流', '签署', '长兴岛化学品物流园项目合资协议'"}]}
|
28 |
+
{"title_id": "7710", "title": "郑州“黄牌车”过渡期延长两年 车主和管理部门都松了一口气", "topic": "社会", "event_info": [{"event1_trigger": "延长", "event1_triple": "'郑州黄牌车过渡期', '延长', ''"}, {"event2_trigger": "松了", "event2_triple": "'车主和管理部门', '松了', '一口气'"}]}
|
29 |
+
{"title_id": "24112", "title": "3月3日河北省沧县气象台发布大风蓝色预警", "topic": "天气", "event_info": [{"event1_trigger": "发布", "event1_triple": "'沧县气象台', '发布', '大风蓝色预警'"}]}
|
30 |
+
{"title_id": "13508", "title": "钟丽缇:为三女儿考拉11岁庆生,随继父张伦硕姓改严稚棱为张凯琳", "topic": "娱乐", "event_info": [{"event1_trigger": "为庆生", "event1_triple": "'钟丽缇', '为庆生', '三女儿'"}, {"event2_trigger": "随", "event2_triple": "'钟丽缇三女儿', '随', '张伦硕姓'"}]}
|
31 |
+
{"title_id": "35387", "title": "西甲:席尔瓦破门 马略卡0-2皇家社会遭遇三连败", "topic": "体育", "event_info": [{"event1_trigger": "破门", "event1_triple": "'席尔瓦', '破门', ''"}, {"event2_trigger": "-", "event2_triple": "'马略卡0', '-', '皇家社会2'"}, {"event3_trigger": "遭遇", "event3_triple": "'马略卡', '遭遇', '三连败'"}]}
|
32 |
+
{"title_id": "15200", "title": "中物联:2021年12月份中国大宗商品指数(CBMI)为100.9%;", "topic": "财经", "event_info": [{"event1_trigger": ":", "event1_triple": "'中物联', ':', '2021年12月份中国大宗商品指数为100.9%;'"}, {"event2_trigger": "为100.9%;", "event2_triple": "'中物联2021年12月份中国大宗商品指数', '为100.9%;', ''"}]}
|
33 |
+
{"title_id": "19266", "title": "三星 Galaxy Z Flip3 5G 国行推送 One UI 4.0 正式版更新", "topic": "科技", "event_info": [{"event1_trigger": "推送", "event1_triple": "'三星 Galaxy Z Flip3 5G 国行', '推送', 'One UI 4.0 正式版'"}, {"event2_trigger": "更新", "event2_triple": "'三星 Galaxy Z Flip3 5G 国行One UI 4.0 正式版', '更新', ''"}]}
|
34 |
+
{"title_id": "14657", "title": "中国首届旅行+大会在即,看生而破界的捷途汽车潮玩跨界", "topic": "汽车", "event_info": [{"event1_trigger": "跨界", "event1_triple": "'捷途汽车', '跨界', ''"}, {"event2_trigger": "在即", "event2_triple": "'中国首届旅行+大会', '在即', ''"}]}
|
35 |
+
{"title_id": "5888", "title": "尹颂:主持台里年度推介活动稳居C位,32岁成江西卫视头牌男主持", "topic": "综艺", "event_info": [{"event1_trigger": "主持", "event1_triple": "'尹颂', '主持', '年度推介活动'"}, {"event2_trigger": "稳居", "event2_triple": "'尹颂', '稳居', 'C位'"}, {"event3_trigger": "成", "event3_triple": "'尹颂', '成', '江西卫视头牌男主持'"}]}
|
36 |
+
{"title_id": "172", "title": "生日快乐!詹姆斯迎37岁生日 征战联盟19季传奇延续", "topic": "体育", "event_info": [{"event1_trigger": "迎", "event1_triple": "'詹姆斯', '迎', '37岁生日'"}, {"event2_trigger": "征战", "event2_triple": "'詹姆斯', '征战', '联盟19季传奇延续'"}]}
|
37 |
+
{"title_id": "36923", "title": "北京3月16日16时至17日16时新增本土新冠肺炎病毒感染者8例", "topic": "社会", "event_info": [{"event1_trigger": "新增", "event1_triple": "'北京', '新增', '本土新冠肺炎病毒感染者'"}]}
|
38 |
+
{"title_id": "41035", "title": "【天眼问政·追踪】观山湖区商城西路,路灯亮了", "topic": "社会", "event_info": [{"event1_trigger": "亮了", "event1_triple": "'观山湖区商城西路路灯', '亮了', ''"}]}
|
39 |
+
{"title_id": "36338", "title": "澳门出台应对大规模新冠疫情应急处置预案", "topic": "社会", "event_info": [{"event1_trigger": "出台", "event1_triple": "'澳门', '出台', '应对大规模新冠疫情应急处置预案'"}]}
|
40 |
+
{"title_id": "32077", "title": "浙江杭州桐庐县发布通告,暂时关闭城南街道江南农贸市场", "topic": "社会", "event_info": [{"event1_trigger": "发布", "event1_triple": "'桐庐县', '发布', '通告'"}, {"event2_trigger": "关闭", "event2_triple": "'桐庐县', '关闭', '城南街道江南农贸市场'"}]}
|
41 |
+
{"title_id": "5088", "title": "云南丽江市宁蒗县地震已致4人受伤,云南省地震局启动应急响应", "topic": "社会", "event_info": [{"event1_trigger": "已致", "event1_triple": "'丽江市宁蒗县地震', '已致', '4人受伤'"}, {"event2_trigger": "启动", "event2_triple": "'云南省地震局', '启动', '应急响应'"}]}
|
42 |
+
{"title_id": "19079", "title": "波兰央行加息应对通胀 系今年首个宣布加息的国家银行;", "topic": "财经", "event_info": [{"event1_trigger": "加息应对", "event1_triple": "'波兰央行', '加息应对', '通胀'"}, {"event2_trigger": "系", "event2_triple": "'波兰央行', '系', '今年首个宣布加息的国家银行'"}]}
|
43 |
+
{"title_id": "17288", "title": "中欧明睿新常态混合A : 中欧明睿新常态混合型证券投资基金恢复大额申购、转换转入及定期定额投资业务", "topic": "财经", "event_info": [{"event1_trigger": ":", "event1_triple": "'中欧明睿新常态混合A', ':', '中欧明睿新常态混合型证券投资基金恢复大额申购、转换转入及定期定额投资业务'"}, {"event2_trigger": "恢复", "event2_triple": "'中欧明睿新常态混合型证券投资基金', '恢复', '大额申购、转换转入及定期定额投资业务'"}]}
|
44 |
+
{"title_id": "40127", "title": "西方不香了?英美澳加出台一项政策,中国留学生数量下降超四成", "topic": "教育", "event_info": [{"event1_trigger": "出台", "event1_triple": "'英美澳加', '出台', '一项政策'"}, {"event2_trigger": "超四成", "event2_triple": "'中国留学生数量下降', '超四成', ''"}]}
|
45 |
+
{"title_id": "11640", "title": "大批武器刚运抵叙利亚,就被以色列炸个精光,俄S-400全程哑火", "topic": "军事", "event_info": [{"event1_trigger": "运抵", "event1_triple": "'大批武器', '运抵', '叙利亚'"}, {"event2_trigger": "被炸", "event2_triple": "'大批武器', '被炸', '以色列'"}, {"event3_trigger": "哑火", "event3_triple": "'俄S-400', '哑火', ''"}]}
|
46 |
+
{"title_id": "27355", "title": "为了针对我国,可以说已是不遗余力!美禁售C919航发", "topic": "时事", "event_info": [{"event1_trigger": "禁售", "event1_triple": "'美', '禁售', 'C919航发'"}]}
|
47 |
+
{"title_id": "7170", "title": "河南省森防办第四督导组莅临陕州区督导森林防灭火工作", "topic": "社会", "event_info": [{"event1_trigger": "莅临", "event1_triple": "'河南省森防办第四督导组', '莅临', '陕州区'"}, {"event2_trigger": "督导", "event2_triple": "'河南省森防办第四督导组', '督导', '森林防灭火工作'"}]}
|
48 |
+
{"title_id": "41022", "title": "张镇麟:篮球世家出身,未满14岁赴美学习,创CBA单场纪录", "topic": "体育", "event_info": [{"event1_trigger": "赴", "event1_triple": "'张镇麟', '赴', '美'"}, {"event2_trigger": "创", "event2_triple": "'张镇麟', '创', 'CBA单场纪录'"}, {"event3_trigger": "学习", "event3_triple": "'张镇麟', '学习', ''"}]}
|
49 |
+
{"title_id": "9234", "title": "文在寅刚宣布特赦,韩国民众爆发了,朴槿惠又要回监狱?", "topic": "时事", "event_info": [{"event1_trigger": "爆发了", "event1_triple": "'韩国民众', '爆发了', ''"}]}
|
50 |
+
{"title_id": "29744", "title": "1月1日意大利新法令:所有意大利市民必须强制收集湿垃圾", "topic": "时事", "event_info": [{"event1_trigger": "必须强制收集", "event1_triple": "'意大利市民', '必须强制收集', '湿垃圾'"}]}
|
51 |
+
{"title_id": "34553", "title": "3月8日广州最新疫情数据消息通报 广州截至3月8日17时34分(北京时间)疫情数据统计", "topic": "社会", "event_info": [{"event1_trigger": "通报", "event1_triple": "'广州最新疫情数据消息', '通报', ''"}]}
|
52 |
+
{"title_id": "30389", "title": "西安累计确诊1117例 新增病例40%来自核筛", "topic": "社会", "event_info": [{"event1_trigger": "累计确诊", "event1_triple": "'西安', '累计确诊', '1117例'"}, {"event2_trigger": "来自", "event2_triple": "'新增病例40%', '来自', '核筛'"}]}
|
53 |
+
{"title_id": "34763", "title": "好消息!四川调整省本级职工医保待遇!", "topic": "时事", "event_info": [{"event1_trigger": "调整", "event1_triple": "'四川', '调整', '省本级职工医保待遇'"}]}
|
54 |
+
{"title_id": "32824", "title": "残奥小将张梦秋“爱心型”双马尾亮相 网友:已经会了", "topic": "体育", "event_info": [{"event1_trigger": "亮相", "event1_triple": "'张梦秋“爱心型”双马尾', '亮相', ''"}]}
|
55 |
+
{"title_id": "5543", "title": "云南丽江宁蒗县5.5级地震已致15人受伤!专家:后续还可能发生地震", "topic": "社会", "event_info": [{"event1_trigger": "地震", "event1_triple": "'丽江宁蒗县', '地震', ''"}, {"event2_trigger": "已致", "event2_triple": "'丽江宁蒗县地震', '已致', '15人受伤'"}]}
|
56 |
+
{"title_id": "891", "title": "张韬:罕见换发型,入职央视4年以来首次以毛寸造型出镜超显青春", "topic": "娱乐", "event_info": [{"event1_trigger": "换", "event1_triple": "'张韬', '换', '发型'"}, {"event2_trigger": "入职", "event2_triple": "'张韬', '入职', '央视'"}, {"event3_trigger": "出镜", "event3_triple": "'张韬毛寸造型', '出镜', ''"}]}
|
57 |
+
{"title_id": "31265", "title": "海关总署公布跨境电商进口消费品质量风险信息 多款婴幼儿产品在列", "topic": "科技", "event_info": [{"event1_trigger": "公布", "event1_triple": "'海关总署', '公布', '跨境电商进口消费品质量风险信息'"}, {"event2_trigger": "在列", "event2_triple": "'多款婴幼儿产品', '在列', ''"}]}
|
58 |
+
{"title_id": "38989", "title": "博尔塔拉蒙古自治州气象台发布大雾黄色预警[Ⅲ级/较重]", "topic": "天气", "event_info": [{"event1_trigger": "发布", "event1_triple": "'博尔塔拉蒙古自治州气象台', '发布', '大雾黄色预警'"}]}
|
59 |
+
{"title_id": "11468", "title": "龙华人 专注龙华大小事的“龙华之声”全新改版上线", "topic": "社会", "event_info": [{"event1_trigger": "上线", "event1_triple": "'“龙华之声”改版', '上线', ''"}]}
|
60 |
+
{"title_id": "8188", "title": "第一!品质定力绽放品牌魅力,江苏卫视2021收视登顶", "topic": "综艺", "event_info": [{"event1_trigger": "登顶", "event1_triple": "'江苏卫视收视', '登顶', ''"}]}
|
61 |
+
{"title_id": "26754", "title": "广州举办首期数字政府创新应用平台政企对接会", "topic": "科技", "event_info": [{"event1_trigger": "举办", "event1_triple": "'广州', '举办', '首期数字政府创新应用平台政企对接会'"}]}
|
62 |
+
{"title_id": "28795", "title": "这就是西帝?安芬尼-西蒙斯三分16中9轰下43分3板7助创生涯新高", "topic": "体育", "event_info": [{"event1_trigger": "创", "event1_triple": "'安芬尼-西蒙斯', '创', '生涯新高'"}, {"event2_trigger": "轰下", "event2_triple": "'安芬尼-西蒙斯', '轰下', '43分3板7助'"}, {"event3_trigger": "16中9", "event3_triple": "'西蒙斯三分', '16中9', ''"}]}
|
63 |
+
{"title_id": "17522", "title": "丰台区城管委积极做好元旦路灯照明和景观布置工作", "topic": "社会", "event_info": [{"event1_trigger": "做好", "event1_triple": "'丰台区城管委', '做好', '元旦路灯照明、景观布置工作'"}]}
|
64 |
+
{"title_id": "26940", "title": "果然冲着仇家来的!朴槿惠妹妹突然“弃暗投明”,韩政坛要变天?", "topic": "时事", "event_info": [{"event1_trigger": "“弃暗投明”", "event1_triple": "'朴槿惠妹妹', '“弃暗投明”', ''"}]}
|
65 |
+
{"title_id": "14807", "title": "台湾艺人方芳元旦在天安门看升旗", "topic": "社会", "event_info": [{"event1_trigger": "看", "event1_triple": "'方芳', '看', '升旗'"}]}
|
66 |
+
{"title_id": "42632", "title": "“帮我报警,勿回!”她深夜在短视频平台收到闺蜜求助信息", "topic": "社会", "event_info": [{"event1_trigger": "收到", "event1_triple": "'她', '收到', '求助信息'"}]}
|
67 |
+
{"title_id": "38331", "title": "3月15日山东省利津县气象台发布大风黄色预警", "topic": "天气", "event_info": [{"event1_trigger": "发布", "event1_triple": "'山东省利津县气象台', '发布', '大风黄色预警'"}]}
|
68 |
+
{"title_id": "14109", "title": "山东南水北调���司领导到泰安局开展基层联系点工作并指导调水工作", "topic": "社会", "event_info": [{"event1_trigger": "开展", "event1_triple": "'山东南水北调公司领导', '开展', '基层联系点工作'"}, {"event2_trigger": "到", "event2_triple": "'山东南水北调公司领导', '到', '泰安局'"}, {"event3_trigger": "指导", "event3_triple": "'山东南水北调公司领导', '指导', '调水工作'"}]}
|
69 |
+
{"title_id": "1124", "title": "山东省成立第一届学科评议组和专业学位研究生教育指导委员会", "topic": "教育", "event_info": [{"event1_trigger": "成立", "event1_triple": "'山东省', '成立', '第一届学科评议组和专业学位研究生教育指导委员会'"}]}
|
70 |
+
{"title_id": "21174", "title": "CSGO: HLTV二月最新世界排名,银河战舰FaZe上升5名", "topic": "游戏", "event_info": [{"event1_trigger": ":", "event1_triple": "'CSGO', ':', 'HLTV二月最新世界排名'"}, {"event2_trigger": "上升", "event2_triple": "'银河战舰FaZe', '上升', '5名'"}]}
|
71 |
+
{"title_id": "34950", "title": "大量证据曝光!德云社张九南家暴疑实锤,前妻:他算计了我们母子", "topic": "娱乐", "event_info": [{"event1_trigger": "曝光", "event1_triple": "'张九南家暴证据', '曝光', ''"}, {"event2_trigger": "疑实锤", "event2_triple": "'张九南家暴', '疑实锤', ''"}, {"event3_trigger": "家暴", "event3_triple": "'张九南', '家暴', ''"}]}
|
72 |
+
{"title_id": "28013", "title": "100余组花灯人气高 “最美花灯”点亮泉城", "topic": "文化", "event_info": [{"event1_trigger": "点亮", "event1_triple": "'最美花灯', '点亮', '泉城'"}]}
|
73 |
+
{"title_id": "41225", "title": "佳力图:603912:佳力图关于非公开发行股票申请获得中国证监会发行审核委员会审核通过", "topic": "财经", "event_info": [{"event1_trigger": "获得审核通过", "event1_triple": "'佳力图关于非公开发行股票申请', '获得审核通过', '中国证监会发行审核委员会'"}]}
|
74 |
+
{"title_id": "1059", "title": "建行广州分行服务专业市场,畅通金融“活水”", "topic": "财经", "event_info": [{"event1_trigger": "畅通", "event1_triple": "'建行广州分行服务专业市场', '畅通', '金融“活水”'"}]}
|
75 |
+
{"title_id": "18219", "title": "公私募规模合计突破44万亿元!规模激增背后业绩助力", "topic": "财经", "event_info": [{"event1_trigger": "突破", "event1_triple": "'公私募规模', '突破', '44万亿元'"}, {"event2_trigger": "助力", "event2_triple": "'业绩', '助力', '公私募规模激增'"}]}
|
76 |
+
{"title_id": "8171", "title": "“家·圆”网络新年文艺晚会圆满落幕", "topic": "社会", "event_info": [{"event1_trigger": "落幕", "event1_triple": "'“家·圆”网络新年文艺晚会', '落幕', ''"}]}
|
77 |
+
{"title_id": "1973", "title": "安徽省望江县发布大雾黄色预警", "topic": "天气", "event_info": [{"event1_trigger": "发布", "event1_triple": "'安徽省望江县', '发布', '大雾黄色预警'"}]}
|
78 |
+
{"title_id": "23067", "title": "吴小姐想买盆胧月,带回家发现是冬美人,哭笑不得,太难区分了", "topic": "搞笑", "event_info": [{"event1_trigger": "想", "event1_triple": "'吴小姐', '想', '买盆胧月'"}, {"event2_trigger": "带回家", "event2_triple": "'吴小姐', '带回家', '胧月'"}, {"event3_trigger": "发现", "event3_triple": "'吴小姐', '发现', '是冬美人'"}]}
|
79 |
+
{"title_id": "13784", "title": "惊险!广东一4岁男孩悬吊三楼防盗网外,危急时刻……", "topic": "社会", "event_info": [{"event1_trigger": "悬吊", "event1_triple": "'广东男孩', '悬吊', '防盗网外'"}]}
|
80 |
+
{"title_id": "3810", "title": "刚刚!江西又一条高铁开通!", "topic": "时事", "event_info": [{"event1_trigger": "开通", "event1_triple": "'江西又一条高铁', '开通', ''"}]}
|
81 |
+
{"title_id": "22595", "title": "19岁欧阳娜娜终于放弃了高跟鞋,换上“果冻匡威”,潮到没朋友", "topic": "娱乐", "event_info": [{"event1_trigger": "放弃", "event1_triple": "'欧阳娜娜', '放弃', '高跟鞋'"}, {"event2_trigger": "换上", "event2_triple": "'欧阳娜娜', '换上', '“果冻匡威”'"}]}
|
82 |
+
{"title_id": "16825", "title": "海丰:红色旅游景区成为新年打卡“热地”", "topic": "旅游", "event_info": [{"event1_trigger": "成为", "event1_triple": "'海丰红色旅游景区', '成为', '新年打卡“热地”'"}]}
|
83 |
+
{"title_id": "28938", "title": "2022年起取消权益性投资核定征收,直播带货首当其冲!", "topic": "财经", "event_info": [{"event1_trigger": "取消", "event1_triple": "'权益性投资核定征收', '取消', ''"}]}
|
84 |
+
{"title_id": "10334", "title": "世体:巴萨在2019年时就想签莫拉塔,但因担心苏亚雷斯不高兴放弃", "topic": "体育", "event_info": [{"event1_trigger": "想签", "event1_triple": "'巴萨', '想签', '莫拉塔'"}, {"event2_trigger": "担心", "event2_triple": "'巴萨', '担心', '苏亚雷斯不高兴'"}, {"event3_trigger": "放弃签", "event3_triple": "'巴萨', '放��签', '莫拉塔'"}]}
|
85 |
+
{"title_id": "7781", "title": "外交部提醒中国公民近期暂勿前往阿富汗", "topic": "时事", "event_info": [{"event1_trigger": "提醒", "event1_triple": "'外交部', '提醒', '中国公民'"}, {"event2_trigger": "暂勿前往", "event2_triple": "'中国公民', '暂勿前往', '阿富汗'"}]}
|
86 |
+
{"title_id": "1103", "title": "要求房子加名后,男友却提出婚后租房住,女友:算了,这婚不结了", "topic": "情感", "event_info": [{"event1_trigger": "要求", "event1_triple": "'女友', '要求', '房子加名'"}, {"event2_trigger": "提出", "event2_triple": "'男友', '提出', '婚后租房住'"}]}
|
87 |
+
{"title_id": "22721", "title": "中共嘉祥县纪委十五届二次全体会议举行", "topic": "时事", "event_info": [{"event1_trigger": "举行", "event1_triple": "'中共嘉祥县纪委十五届二次全体会议', '举行', ''"}]}
|
88 |
+
{"title_id": "34448", "title": "3月6日0时至14时青岛市新增27例新冠肺炎确诊病例", "topic": "社会", "event_info": [{"event1_trigger": "新增", "event1_triple": "'青岛市', '新增', '27例新冠肺炎确诊病例'"}]}
|
89 |
+
{"title_id": "17355", "title": "国家税务总局:对各种偷逃税行为一律严惩不贷", "topic": "时事", "event_info": [{"event1_trigger": ":", "event1_triple": "'国家税务总局', ':', '对各种偷逃税行为一律严惩不贷'"}]}
|
90 |
+
{"title_id": "25119", "title": "镍价一度破10万美元!印尼将增产40万吨镍,又想征收镍铁出口税?", "topic": "财经", "event_info": [{"event1_trigger": "将增产", "event1_triple": "'印尼', '将增产', '40万吨镍'"}, {"event2_trigger": "破10万美元", "event2_triple": "'镍价', '破10万美元', ''"}]}
|
91 |
+
{"title_id": "20769", "title": "云天化发布2021年度业绩快报 归属净利润盈利36.366亿元", "topic": "财经", "event_info": [{"event1_trigger": "发布", "event1_triple": "'云天化', '发布', '2021年度业绩快报'"}, {"event2_trigger": "盈利36.366亿元", "event2_triple": "'云天化归属净利润', '盈利36.366亿元', ''"}]}
|
92 |
+
{"title_id": "37307", "title": "体育老师办公室集体涂防晒,一天晒7节课,网友:精致起来了", "topic": "教育", "event_info": [{"event1_trigger": "涂", "event1_triple": "'体育老师', '涂', '防晒'"}]}
|
93 |
+
{"title_id": "23028", "title": "格力没有接班人,董明珠再干三年", "topic": "科技", "event_info": [{"event1_trigger": "没有", "event1_triple": "'格力', '没有', '接班人'"}, {"event2_trigger": "再干三年", "event2_triple": "'董明珠', '再干三年', ''"}]}
|
94 |
+
{"title_id": "16631", "title": "江西日报广告经营创收首破亿元,同比增长104.56%", "topic": "财经", "event_info": [{"event1_trigger": "首破亿元", "event1_triple": "'江西日报广告经营创收', '首破亿元', ''"}, {"event2_trigger": "增长104.56%", "event2_triple": "'江西日报广告经营创收', '增长104.56%', ''"}]}
|
95 |
+
{"title_id": "2528", "title": "CCTV5直播!广州男篮复仇之战,大外援正式就位,郭士强重振军心", "topic": "体育", "event_info": [{"event1_trigger": "就位", "event1_triple": "'广州男篮大外援', '就位', ''"}, {"event2_trigger": "重振", "event2_triple": "'郭士强', '重振', '军心'"}]}
|
96 |
+
{"title_id": "1019", "title": "特斯拉2021全年交付近百万辆,同比暴涨87%,马斯克:了不起!", "topic": "科技", "event_info": [{"event1_trigger": "交付", "event1_triple": "'特斯拉', '交付', '近百万辆'"}, {"event2_trigger": "暴涨87%", "event2_triple": "'特斯拉2021全年交付', '暴涨87%', ''"}]}
|
97 |
+
{"title_id": "10943", "title": "上海新年第一个工作日项目开工掀热潮", "topic": "社会", "event_info": [{"event1_trigger": "开工", "event1_triple": "'上海新年第一个工作日项目', '开工', ''"}, {"event2_trigger": "掀", "event2_triple": "'上海新年第一个工作日项目', '掀', '热潮'"}]}
|
98 |
+
{"title_id": "25780", "title": "有大动作?熊磊公开发声暂停直播,将与网暴抗争到底", "topic": "综艺", "event_info": [{"event1_trigger": "公开发声", "event1_triple": "'熊磊', '公开发声', '暂停直播'"}, {"event2_trigger": "将与抗争到底", "event2_triple": "'熊磊', '将与抗争到底', '网暴'"}]}
|
99 |
+
{"title_id": "24585", "title": "举办逾600场活动,“粤新年·有虎气”活动奉上粤式文化盛宴", "topic": "文化", "event_info": [{"event1_trigger": "奉上", "event1_triple": "'“粤新年·有虎气”活动', '奉上', '粤式文化盛宴'"}]}
|
100 |
+
{"title_id": "31831", "title": "林心如9年前旧照曝光,一个小动作遭吐槽,身旁的蒋欣意外抢镜", "topic": "娱乐", "event_info": [{"event1_trigger": "曝光", "event1_triple": "'林心如9年前旧照', '曝光', ''"}, {"event2_trigger": "遭吐槽", "event2_triple": "'林心如', '遭吐槽', ''"}, {"event3_trigger": "抢镜", "event3_triple": "'蒋欣', '抢镜', ''"}]}
|
101 |
+
{"title_id": "10855", "title": "传丰田汽车(TM.US)计划2025年前推出汽车操作系统Arene", "topic": "汽车", "event_info": [{"event1_trigger": "计划推出", "event1_triple": "'丰田汽车', '计划推出', '汽车操作系统Arene'"}]}
|
102 |
+
{"title_id": "30355", "title": "博山交警连出重拳!假期3天16台超载车落网", "topic": "社会", "event_info": [{"event1_trigger": "连出重拳", "event1_triple": "'博山交警', '连出重拳', ''"}, {"event2_trigger": "落网", "event2_triple": "'超载车', '落网', ''"}]}
|
103 |
+
{"title_id": "35041", "title": "信义玻璃(00868)与信义储电(08328)订立蒸汽供应协议", "topic": "财经", "event_info": [{"event1_trigger": "订立", "event1_triple": "'信义玻璃与信义储电', '订立', '蒸汽供应协议'"}]}
|
104 |
+
{"title_id": "31285", "title": "水谷隼回应抱伊藤美诚遭拒绝:私下也不熟!曾爆料伊藤美诚想放弃", "topic": "动漫", "event_info": [{"event1_trigger": "回应", "event1_triple": "'水谷隼', '回应', '抱伊藤美诚遭拒绝'"}, {"event2_trigger": "抱", "event2_triple": "'水谷隼', '抱', '伊藤美诚'"}, {"event3_trigger": "遭拒绝", "event3_triple": "'水谷隼抱伊藤美诚', '遭拒绝', ''"}]}
|
105 |
+
{"title_id": "16181", "title": "1月1日河北无新增新型冠状病毒肺炎确诊病例 无新增无症状感染者", "topic": "社会", "event_info": [{"event1_trigger": "无新增", "event1_triple": "'河北', '无新增', '新型冠状病毒肺炎确诊病例'"}]}
|
106 |
+
{"title_id": "2497", "title": "哪吒汽车:2021年12月交付量为10127台 全年累计交付量达69674台", "topic": "汽车", "event_info": [{"event1_trigger": "为", "event1_triple": "'哪吒汽车2021年12月交付量', '为', '10127台'"}, {"event2_trigger": "达69674台", "event2_triple": "'哪吒汽车全年累计交付量', '达69674台', ''"}]}
|
107 |
+
{"title_id": "32340", "title": "中国石油化工股份(00386.HK):出让赛科公司部分股权引入战略投资者", "topic": "财经", "event_info": [{"event1_trigger": ":", "event1_triple": "'中国石油化工股份', ':', '出让赛科公司部分股权引入战略投资者'"}, {"event2_trigger": "出让", "event2_triple": "'中国石油化工股份', '出让', '赛科公司部分股权'"}, {"event3_trigger": "引入", "event3_triple": "'中国石油化工股份', '引入', '战略投资者'"}]}
|
108 |
+
{"title_id": "38845", "title": "第18金!国家轮椅冰壶集训队在这里立下的夺冠誓言实现了", "topic": "体育", "event_info": [{"event1_trigger": "立下", "event1_triple": "'国家轮椅冰壶集训队', '立下', '夺冠誓言'"}, {"event2_trigger": "实现了", "event2_triple": "'国家轮椅冰壶集训队夺冠誓言', '实现了', ''"}]}
|
109 |
+
{"title_id": "31775", "title": "特色农业“接二连三”贵州山地成为“聚宝盆”", "topic": "三农", "event_info": [{"event1_trigger": "成为", "event1_triple": "'贵州山地', '成为', '“聚宝盆”'"}, {"event2_trigger": "“接二连三”", "event2_triple": "'贵州特色农业', '“接二连三”', ''"}]}
|
110 |
+
{"title_id": "42353", "title": "瓦科公司迎来十周年 \\", "topic": "社会", "event_info": [{"event1_trigger": "迎来", "event1_triple": "'瓦科公司', '迎来', '十周年'"}]}
|
111 |
+
{"title_id": "27986", "title": "中信证券(600030.SH)预计2021年度归母净利润不低于176.45亿元", "topic": "财经", "event_info": [{"event1_trigger": "预计", "event1_triple": "'中信证券', '预计', '2021年度归母净利润不低于176.45亿元'"}]}
|
112 |
+
{"title_id": "38254", "title": "高圆圆出道前的容颜实在太美,因照片被流出,十八岁比现在美十倍", "topic": "娱乐", "event_info": [{"event1_trigger": "被流出", "event1_triple": "'高圆圆照片', '被流出', ''"}]}
|
113 |
+
{"title_id": "22522", "title": "3月7日起 海口秀英港试行船舶智能配载全预约过海模式", "topic": "科技", "event_info": [{"event1_trigger": "试行", "event1_triple": "'海口秀英港', '试行', '船舶智能配载全预约过海模式'"}]}
|
114 |
+
{"title_id": "10166", "title": "冬奥会延庆赛区临建和形象景观基本完工!记者现场探访~", "topic": "体育", "event_info": [{"event1_trigger": "基本完工", "event1_triple": "'冬奥会延庆赛区临建和形象景观', '基本完工', ''"}, {"event2_trigger": "探访", "event2_triple": "'记者', '探访', '冬奥会延庆赛区临建和形象景观'"}]}
|
115 |
+
{"title_id": "312", "title": "威海市区道路运输行业工会联合会成立", "topic": "社会", "event_info": [{"event1_trigger": "成立", "event1_triple": "'威海市区道路运输行业工会联合会', '成立', ''"}]}
|
116 |
+
{"title_id": "29634", "title": "人民日报:江苏句容着力解决群众急难愁盼问题", "topic": "社会", "event_info": [{"event1_trigger": "解决", "event1_triple": "'句容', '解决', '群众急难愁盼问题'"}]}
|
117 |
+
{"title_id": "39744", "title": "双喜临门!李亚鹏宣布结婚生子", "topic": "娱乐", "event_info": [{"event1_trigger": "宣布", "event1_triple": "'李亚鹏', '宣布', '结婚生子'"}]}
|
118 |
+
{"title_id": "27981", "title": "ST德豪:公司高级管理人员退休离任", "topic": "财经", "event_info": [{"event1_trigger": "退休离任", "event1_triple": "'ST��豪公司高级管理人员', '退休离任', ''"}]}
|
119 |
+
{"title_id": "13928", "title": "增2.0T汽油发动机 江淮瑞风L5售价15.58万元起", "topic": "汽车", "event_info": [{"event1_trigger": "增", "event1_triple": "'江淮瑞风L5', '增', '2.0T汽油发动机'"}]}
|
120 |
+
{"title_id": "9325", "title": "迎冬奥 爱冰雪丨兵团第十二师冰雪文化旅游节精彩开幕", "topic": "旅游", "event_info": [{"event1_trigger": "开幕", "event1_triple": "'兵团第十二师冰雪文化旅游节', '开幕', ''"}]}
|
121 |
+
{"title_id": "4273", "title": "百度CTO王海峰获中国专利金奖,百度连续四年AI专利申请和授予量全国第一", "topic": "科技", "event_info": [{"event1_trigger": "获", "event1_triple": "'王海峰', '获', '中国专利金奖'"}]}
|
122 |
+
{"title_id": "28503", "title": "豫金刚石(300064)与柏堡龙(002776)拟被行政处罚,投资者可准备索赔", "topic": "财经", "event_info": [{"event1_trigger": "拟被行政处罚", "event1_triple": "'豫金刚石与柏堡龙', '拟被行政处罚', ''"}, {"event2_trigger": "可准备索赔", "event2_triple": "'投资者', '可准备索赔', ''"}]}
|
123 |
+
{"title_id": "7579", "title": "今天起 中国全面履行RCEP所有承诺和义务", "topic": "时事", "event_info": [{"event1_trigger": "履行", "event1_triple": "'中国', '履行', 'RCEP所有承诺和义务'"}]}
|
124 |
+
{"title_id": "729", "title": "任城区干部助企攀登活动推进会召开", "topic": "社会", "event_info": [{"event1_trigger": "召开", "event1_triple": "'任城区干部助企攀登活动推进会', '召开', ''"}]}
|
125 |
+
{"title_id": "34523", "title": "国际原油价格攀上110美元/桶关口,今晚进行油价调整!", "topic": "财经", "event_info": [{"event1_trigger": "攀上", "event1_triple": "'国际原油价格', '攀上', '110美元/桶关口,'"}, {"event2_trigger": "进行", "event2_triple": "'国际原油价格', '进行', '油价调整'"}]}
|
126 |
+
{"title_id": "9804", "title": "再创新高 跨年夜南昌地铁客流达181.5万人次", "topic": "社会", "event_info": [{"event1_trigger": "再创", "event1_triple": "'南昌地铁客流', '再创', '新高'"}, {"event2_trigger": "达181.5万人次", "event2_triple": "'南昌地铁客流', '达181.5万人次', ''"}]}
|
127 |
+
{"title_id": "7482", "title": "菲律宾2021年12月核心通货膨胀同比增长3.0%", "topic": "财经", "event_info": [{"event1_trigger": "增长", "event1_triple": "'菲律宾核心通货膨胀', '增长', ''"}]}
|
128 |
+
{"title_id": "18384", "title": "恒大回应海南海花岛39栋楼被责令拆除:不涉及已收楼业主", "topic": "房产", "event_info": [{"event1_trigger": "回应", "event1_triple": "'恒大', '回应', '海南海花岛39栋楼被责令拆除'"}, {"event2_trigger": "被责令拆除", "event2_triple": "'海南海花岛39栋楼', '被责令拆除', ''"}]}
|
129 |
+
{"title_id": "22403", "title": "霞寨镇团结村:村道硬化 村民出行更方便", "topic": "社会", "event_info": [{"event1_trigger": "硬化", "event1_triple": "'霞寨镇团结村村道', '硬化', ''"}]}
|
130 |
+
{"title_id": "4581", "title": "五大卫视跨年没有出圈作品,但杨紫、周深大赢家,王一博罕见落第", "topic": "综艺", "event_info": [{"event1_trigger": "没有", "event1_triple": "'五大卫视跨年', '没有', '出圈作品'"}, {"event2_trigger": "落第", "event2_triple": "'王一博', '落第', ''"}]}
|
131 |
+
{"title_id": "42138", "title": "广东政法系统反腐纪实:15名厅级干部被查", "topic": "时事", "event_info": [{"event1_trigger": "被查", "event1_triple": "'广东政法系统15名厅级干部', '被查', ''"}]}
|
132 |
+
{"title_id": "29191", "title": "资环学院举办“耕耘三十载,英‘资’向未来” 2022年元旦晚会", "topic": "教育", "event_info": [{"event1_trigger": "举办", "event1_triple": "'资环学院', '举办', '2022年元旦晚会'"}]}
|
133 |
+
{"title_id": "25668", "title": "谷歌:Steam已经登陆Chromebook", "topic": "科技", "event_info": [{"event1_trigger": "登陆", "event1_triple": "'Steam', '登陆', 'Chromebook'"}]}
|
134 |
+
{"title_id": "1692", "title": "【聚焦东莞党代会·现场直击】中国共产党东莞市第十五次代表大会开幕,肖亚非代表十四届市委向大会作报告", "topic": "时事", "event_info": [{"event1_trigger": "开幕", "event1_triple": "'中国共产党东莞市第十五次代表大会', '开幕', ''"}, {"event2_trigger": "代表", "event2_triple": "'肖亚非', '代表', '十四届市委'"}, {"event3_trigger": "作", "event3_triple": "'肖亚非', '作', '报告'"}]}
|
135 |
+
{"title_id": "8593", "title": "数百万财付通小微商家享受政策利好", "topic": "科技", "event_info": [{"event1_trigger": "享受", "event1_triple": "'财付通小微商家', '享受', '政策利好'"}]}
|
136 |
+
{"title_id": "12726", "title": "*ST海航:公司及十家子公司重整计划执行完毕", "topic": "财经", "event_info": [{"event1_trigger": ":", "event1_triple": "'海航', ':', '公司及十家子公司重整计划执行完毕'"}, {"event2_trigger": "执行完毕", "event2_triple": "'海航公司及十家子公司重整���划', '执行完毕', ''"}]}
|
137 |
+
{"title_id": "24976", "title": "全国首条免费高速公路来了!", "topic": "时事", "event_info": [{"event1_trigger": "来了", "event1_triple": "'全国首条免费高速公路', '来了', ''"}]}
|
138 |
+
{"title_id": "5099", "title": "市疫情防控督查组督导检查曲沃县疫情防控工作", "topic": "社会", "event_info": [{"event1_trigger": "督导", "event1_triple": "'市疫情防控督查组', '督导', '曲沃县疫情防控工作'"}, {"event2_trigger": "检查", "event2_triple": "'市疫情防控督查组', '检查', '曲沃县疫情防控工作'"}]}
|
139 |
+
{"title_id": "7666", "title": "历史第一人!德罗赞两连三分压哨绝杀神似科比,詹皇被好兄弟打服", "topic": "体育", "event_info": [{"event1_trigger": "两连三分", "event1_triple": "'德罗赞', '两连三分', ''"}, {"event2_trigger": "压哨绝杀", "event2_triple": "'德罗赞', '压哨绝杀', ''"}, {"event3_trigger": "被打服", "event3_triple": "'詹皇', '被打服', ''"}]}
|
140 |
+
{"title_id": "15444", "title": "大方县气象局发布大雾黄色预警【III级/较重】", "topic": "天气", "event_info": [{"event1_trigger": "发布", "event1_triple": "'大方县气象局', '发布', '大雾黄色预警'"}]}
|
141 |
+
{"title_id": "20376", "title": "安顺供电局电力“满格”助春耕备耕", "topic": "社会", "event_info": [{"event1_trigger": "助", "event1_triple": "'安顺供电局', '助', '春耕备耕'"}, {"event2_trigger": "“满格”", "event2_triple": "'安顺供电局电力', '“满格”', ''"}]}
|
142 |
+
{"title_id": "13167", "title": "粤电力A:以所持风电公司股权向广东省风力发电有限公司增资,增资金额32.18亿元", "topic": "财经", "event_info": [{"event1_trigger": ":", "event1_triple": "'粤电力A', ':', '以所持风电公司股权向广东省风力发电有限公司增资增资金额32.18亿元'"}, {"event2_trigger": "增资32.18亿元", "event2_triple": "'粤电力A', '增资32.18亿元', '广东省风力发电有限公司'"}]}
|
143 |
+
{"title_id": "17868", "title": "跨年福利 有“李”相伴丨羊晚邀广州街坊免费参观“百年味道馆”+“湿地公园”", "topic": "社会", "event_info": [{"event1_trigger": "邀", "event1_triple": "'羊晚', '邀', '免费参观“百年味道馆”“湿地公园”'"}]}
|
144 |
+
{"title_id": "33063", "title": "甘肃防疫最新数据消息通报-甘肃今日截至2月28日05时31分防控疫情数据统计情况公布", "topic": "社会", "event_info": [{"event1_trigger": "通报", "event1_triple": "'甘肃防疫最新数据消息', '通报', ''"}, {"event2_trigger": "公布", "event2_triple": "'甘肃防控疫情数据统计情况', '公布', ''"}]}
|
145 |
+
{"title_id": "7492", "title": "俄罗斯男网终于爆发惊人力量!梅德韦杰夫能在新赛季表现最好么?", "topic": "体育", "event_info": [{"event1_trigger": "爆发", "event1_triple": "'俄罗斯男网', '爆发', '惊人力量'"}]}
|
146 |
+
{"title_id": "23738", "title": "贵州检察机关综合运用“四大检察” 做深做实未成年人检察工作", "topic": "时事", "event_info": [{"event1_trigger": "综合运用", "event1_triple": "'贵州检察机关', '综合运用', '“四大检察”'"}, {"event2_trigger": "做深做实", "event2_triple": "'贵州检察机关', '做深做实', '未成年人检察工作'"}]}
|
147 |
+
{"title_id": "8629", "title": "大名城(600094.SH):截至12月底已累计回购0.65%股份", "topic": "财经", "event_info": [{"event1_trigger": ":", "event1_triple": "'大名城', ':', '截至12月底已累计回购0.65%股份'"}, {"event2_trigger": "已回购", "event2_triple": "'大名城', '已回购', '0.65%股份'"}]}
|
148 |
+
{"title_id": "39888", "title": "66岁成龙再受伤,一张照片告诉你,被叫一声大哥有多不容易", "topic": "娱乐", "event_info": [{"event1_trigger": "受伤", "event1_triple": "'成龙', '受伤', ''"}]}
|
149 |
+
{"title_id": "16990", "title": "揭秘|鼎鑫生物携“秒吖健康”重出江湖,现行制度涉嫌多层次团队计酬?", "topic": "财经", "event_info": [{"event1_trigger": "重出江湖", "event1_triple": "'鼎鑫生物', '重出江湖', ''"}, {"event2_trigger": "携", "event2_triple": "'鼎鑫生物', '携', '“秒吖健康”'"}]}
|
150 |
+
{"title_id": "24485", "title": "国际油价持续上涨,布油站上110美元/桶!", "topic": "财经", "event_info": [{"event1_trigger": "持续上涨", "event1_triple": "'国际油价', '持续上涨', ''"}, {"event2_trigger": "站上", "event2_triple": "'布油', '站上', '110美元/桶'"}]}
|
151 |
+
{"title_id": "10137", "title": "华商记者帮|老人高血压药吃完了 记者帮助联系了药店", "topic": "社会", "event_info": [{"event1_trigger": "吃完", "event1_triple": "'老人', '吃完', '高血压药'"}, {"event2_trigger": "帮助", "event2_triple": "'记者', '帮助', '老人'"}, {"event3_trigger": "联系", "event3_triple": "'记者', '联系', '药店'"}]}
|
152 |
+
{"title_id": "21062", "title": "文水县检察院办理全省首例伪造出生证明犯罪案件", "topic": "社会", "event_info": [{"event1_trigger": "办理", "event1_triple": "'文水县检察院', '办理', '全省首例伪造出生证明犯罪案件'"}]}
|
153 |
+
{"title_id": "31129", "title": "嗨起来了!内马尔在个人豪宅里举办跨年“马拉松派对”", "topic": "体育", "event_info": [{"event1_trigger": "举办", "event1_triple": "'内马尔', '举办', '跨年马拉松派对'"}]}
|
154 |
+
{"title_id": "3646", "title": "国产自研飞机型号冠一GA20取证试飞机在黔东南州首飞", "topic": "军事", "event_info": [{"event1_trigger": "首飞", "event1_triple": "'国产自研飞机型号冠一GA20取证试飞机', '首飞', ''"}]}
|
155 |
+
{"title_id": "36997", "title": "5G智能通信助力冬残奥会沟通无障碍,消除鸿沟一起向未来", "topic": "体育", "event_info": [{"event1_trigger": "助力", "event1_triple": "'5G智能通信', '助力', '冬残奥会'"}]}
|
156 |
+
{"title_id": "23801", "title": "数模混合芯片供应商「聆思半导体」完成数千万元天使轮融资,高性能、可重构芯片已量产交付", "topic": "科技", "event_info": [{"event1_trigger": "完成", "event1_triple": "'聆思半导体', '完成', '天使轮融资'"}, {"event2_trigger": "已量产交付", "event2_triple": "'聆思半导体高性能可重构芯片', '已量产交付', ''"}]}
|
157 |
+
{"title_id": "4599", "title": "太龙股份于深圳新设电子科技公司,注册资本5000万元", "topic": "财经", "event_info": [{"event1_trigger": "新设", "event1_triple": "'太龙股份', '新设', '电子科技公司'"}]}
|
158 |
+
{"title_id": "30833", "title": "安顺市疾控系统援铜仁医护人员安全返回", "topic": "社会", "event_info": [{"event1_trigger": "安全返回", "event1_triple": "'安顺市疾控系统援铜仁医护人员', '安全返回', ''"}]}
|
159 |
+
{"title_id": "33536", "title": "国务院免去徐英伟香港特别行政区政府民政事务局局长职务", "topic": "时事", "event_info": [{"event1_trigger": "免去", "event1_triple": "'国务院', '免去', '徐英伟香港特别行政区政府民政事务局局长职务'"}]}
|
160 |
+
{"title_id": "26911", "title": "【植此青绿】巴中市文明办赴清江开展2022年春季义务植树活动", "topic": "社会", "event_info": [{"event1_trigger": "赴", "event1_triple": "'巴中市文明办', '赴', '清江'"}, {"event2_trigger": "开展", "event2_triple": "'巴中市文明办', '开展', '春季义务植树活动'"}]}
|
161 |
+
{"title_id": "649", "title": "波士顿开启急冻模式", "topic": "社会", "event_info": [{"event1_trigger": "开启", "event1_triple": "'波士顿', '开启', '急冻模式'"}]}
|
162 |
+
{"title_id": "28315", "title": "13年来首次下跌!韩国“地王”掉价了!昔日知名商圈,店铺接连关门!商家感叹前景迷茫...", "topic": "房产", "event_info": [{"event1_trigger": "掉价", "event1_triple": "'韩国“地王”', '掉价', ''"}, {"event2_trigger": "下跌", "event2_triple": "'韩国“地王”', '下跌', ''"}, {"event3_trigger": "关门", "event3_triple": "'知名商圈店铺', '关门', ''"}, {"event4_trigger": "感叹", "event4_triple": "'商家', '感叹', ''"}]}
|
163 |
+
{"title_id": "1486", "title": "顶级CV姜广涛、季冠霖加盟,三国杀2022新年狂欢夜嘉宾首曝", "topic": "游戏", "event_info": [{"event1_trigger": "首曝", "event1_triple": "'三国杀2022新年狂欢夜嘉宾', '首曝', ''"}, {"event2_trigger": "加盟", "event2_triple": "'姜广涛季冠霖', '加盟', '三国杀'"}]}
|
164 |
+
{"title_id": "14360", "title": "陕西咸阳烟草:彰显使命担当 30万爱心捐款助力疫情防控", "topic": "社会", "event_info": [{"event1_trigger": "彰显", "event1_triple": "'咸阳烟草', '彰显', '使命担当'"}, {"event2_trigger": "助力", "event2_triple": "'咸阳烟草30万爱心捐款', '助力', '疫情防控'"}, {"event3_trigger": "捐款30万", "event3_triple": "'咸阳烟草', '捐款30万', ''"}]}
|
165 |
+
{"title_id": "23207", "title": "苹果2022春季发布会将天两天后举行(3月9日凌晨)", "topic": "科技", "event_info": [{"event1_trigger": "将举行", "event1_triple": "'苹果2022春季发布会', '将举行', ''"}]}
|
166 |
+
{"title_id": "20749", "title": "药家鑫被判死刑,死前的愿望仍被拒绝,父亲希望他能把罪恶都带走", "topic": "历史", "event_info": [{"event1_trigger": "被判", "event1_triple": "'药家鑫', '被判', '死刑'"}]}
|
167 |
+
{"title_id": "19204", "title": "张令华就节日期间疫情防控、安全生产情况现场办公", "topic": "社会", "event_info": [{"event1_trigger": "现场办公", "event1_triple": "'张令华', '现场办公', ''"}]}
|
168 |
+
{"title_id": "13030", "title": "事业编制!卫生系统事业单位公开招聘588人!报名时间截止2022年1月8日", "topic": "教育", "event_info": [{"event1_trigger": "招聘", "event1_triple": "'卫生系统事业单位', '招聘', '588人'"}]}
|
169 |
+
{"title_id": "42506", "title": "湖南开通首条至南美洲货运航线", "topic": "时事", "event_info": [{"event1_trigger": "开通", "event1_triple": "'湖南', '开通', '首条至南美洲货运航线'"}]}
|
170 |
+
{"title_id": "32481", "title": "中金招科技硬件方向实习生(二级市���,3.6截止)", "topic": "科技", "event_info": [{"event1_trigger": "招", "event1_triple": "'中金', '招', '科技硬件方向实习生'"}]}
|
171 |
+
{"title_id": "24338", "title": "“基金股票”再上热搜!超3400只个股下跌!黄金概念逆市走强", "topic": "财经", "event_info": [{"event1_trigger": "再上", "event1_triple": "'“基金股票”', '再上', '热搜'"}, {"event2_trigger": "下跌", "event2_triple": "'超3400只个股', '下跌', ''"}, {"event3_trigger": "走强", "event3_triple": "'黄金概念', '走强', ''"}]}
|
172 |
+
{"title_id": "36559", "title": "福建成立文旅营销推广联盟", "topic": "旅游", "event_info": [{"event1_trigger": "成立", "event1_triple": "'福建', '成立', '文旅营销推广联盟'"}]}
|
173 |
+
{"title_id": "41096", "title": "泰康人寿至今最高额赔案,1717万元快速给付", "topic": "财经", "event_info": [{"event1_trigger": "给付", "event1_triple": "'泰康人寿', '给付', '1717万元'"}]}
|
174 |
+
{"title_id": "42056", "title": "县粮食局直属库招聘国有企业工作人员!3月14-17日报名!", "topic": "教育", "event_info": [{"event1_trigger": "招聘", "event1_triple": "'县粮食局直属库', '招聘', '国有企业工作人员'"}]}
|
175 |
+
{"title_id": "42527", "title": "\\ufeff经验丰富/重症专家云集 队员提前了解港情况", "topic": "社会", "event_info": [{"event1_trigger": "云集", "event1_triple": "'重症专家', '云集', ''"}, {"event2_trigger": "提前了解", "event2_triple": "'重症专家队员', '提前了解', '港情况'"}]}
|
176 |
+
{"title_id": "26918", "title": "近期避免聚餐聚会!共同聚餐成陕西本轮疫情主要传播方式", "topic": "社会", "event_info": [{"event1_trigger": "成", "event1_triple": "'共同聚餐', '成', '陕西本轮疫情主要传播方式'"}]}
|
177 |
+
{"title_id": "42781", "title": "海鸥在得物发售首个年轻系列潮表,实现品牌“年轻化”破圈", "topic": "时尚", "event_info": [{"event1_trigger": "发售", "event1_triple": "'海鸥', '发售', '首个年轻系列潮表'"}, {"event2_trigger": "实现", "event2_triple": "'海鸥', '实现', '品牌“年轻化”破圈'"}]}
|
178 |
+
{"title_id": "20348", "title": "细节见人品!吴昕节目中无意戳到工作人员,下一秒的举动看出素养", "topic": "综艺", "event_info": [{"event1_trigger": "戳到", "event1_triple": "'吴昕', '戳到', '工作人员'"}]}
|
179 |
+
{"title_id": "16573", "title": "沈阳开展“专项行动”全力推动产业转型、城市转型、社会转型", "topic": "时事", "event_info": [{"event1_trigger": "开展", "event1_triple": "'沈阳', '开展', '“专项行动”'"}, {"event2_trigger": "推动", "event2_triple": "'沈阳', '推动', '产业转型、城市转型、社会转型'"}]}
|
180 |
+
{"title_id": "10114", "title": "高苗在定边督导疫情防控等工作时要求 加强联动协作落实防控措施", "topic": "社会", "event_info": [{"event1_trigger": "督导", "event1_triple": "'高苗', '督导', '疫情防控等工作'"}, {"event2_trigger": "要求", "event2_triple": "'高苗', '要求', '加强联动协作落实防控措施'"}]}
|
181 |
+
{"title_id": "295", "title": "稀罕!内蒙古一牧民喜接65对双胞胎羊羔", "topic": "社会", "event_info": [{"event1_trigger": "接", "event1_triple": "'内蒙古牧民', '接', '65对双胞胎羊羔'"}]}
|
182 |
+
{"title_id": "1272", "title": "绍兴文化旅游商品征集大赛揭晓,古越龙山斩获金银大奖", "topic": "旅游", "event_info": [{"event1_trigger": "揭晓", "event1_triple": "'绍兴文化旅游商品征集大赛', '揭晓', ''"}, {"event2_trigger": "斩获", "event2_triple": "'古越龙山', '斩获', '金银大奖'"}]}
|
183 |
+
{"title_id": "30093", "title": "规模化、系统化冷链物流体系建设开启破冰之旅", "topic": "科技", "event_info": [{"event1_trigger": "开启", "event1_triple": "'规模化系统化冷链物流体系建设', '开启', '破冰之旅'"}]}
|
184 |
+
{"title_id": "4355", "title": "【一线调研】10万亿穿戴国货开启新征途,1月6日首发江西瑞声", "topic": "科技", "event_info": [{"event1_trigger": "开启", "event1_triple": "'穿戴国货', '开启', '征途'"}, {"event2_trigger": "首发", "event2_triple": "'穿戴国货征途', '首发', ''"}]}
|
185 |
+
{"title_id": "18665", "title": "东风雪铁龙2021年销量同比增长137% 凡尔赛成销量王", "topic": "汽车", "event_info": [{"event1_trigger": "增长137%", "event1_triple": "'东风雪铁龙2021年销量', '增长137%', ''"}, {"event2_trigger": "成", "event2_triple": "'凡尔赛', '成', '销量王'"}]}
|
186 |
+
{"title_id": "37023", "title": "魔咒or无奈?巴黎遭皇马逆转无缘八强,梅西近7年无缘欧冠决赛", "topic": "体育", "event_info": [{"event1_trigger": "遭逆转", "event1_triple": "'巴黎', '遭逆转', '皇马'"}, {"event2_trigger": "无缘", "event2_triple": "'巴黎', '无缘', '八强'"}, {"event3_trigger": "无缘", "event3_triple": "'梅西', '无缘', '欧冠决赛'"}]}
|
187 |
+
{"title_id": "6098", "title": "丹东打掉一犯罪集团!34人落网!", "topic": "社会", "event_info": [{"event1_trigger": "打掉", "event1_triple": "'丹东', '打掉', '一犯罪集团'"}, {"event2_trigger": "落网", "event2_triple": "'犯罪集团34人', '落网', ''"}]}
|
188 |
+
{"title_id": "21400", "title": "图赫尔:詹姆斯没有出现不好反应,T-席尔瓦和查洛巴恢复训练", "topic": "体育", "event_info": [{"event1_trigger": "没有出现", "event1_triple": "'詹姆斯', '没有出现', '不好反应'"}, {"event2_trigger": "恢复", "event2_triple": "'T-席尔瓦和查洛巴', '恢复', '训练'"}]}
|
189 |
+
{"title_id": "34451", "title": "天津市高校排名公布!天津大学遥遥领先,天津工业大学有望突围", "topic": "教育", "event_info": [{"event1_trigger": "公布", "event1_triple": "'天津市高校排名', '公布', ''"}, {"event2_trigger": "领先", "event2_triple": "'天津大学', '领先', ''"}, {"event3_trigger": "有望突围", "event3_triple": "'天津工业大学', '有望突围', ''"}]}
|
190 |
+
{"title_id": "34170", "title": "【全职】4000-5000元/月,多个岗位要人!佛山盛博绳带辅料公司招聘", "topic": "职场", "event_info": [{"event1_trigger": "招聘", "event1_triple": "'佛山盛博绳带辅料公司', '招聘', ''"}]}
|
191 |
+
{"title_id": "10267", "title": "C罗射门越老越妖,30岁后欧冠63球!热身射晕保安场边替索帅指挥", "topic": "体育", "event_info": [{"event1_trigger": "射门", "event1_triple": "'C罗', '射门', ''"}, {"event2_trigger": "射晕", "event2_triple": "'C罗', '射晕', '保安'"}, {"event3_trigger": "指挥", "event3_triple": "'C罗', '指挥', ''"}, {"event4_trigger": "替指挥", "event4_triple": "'C罗', '替指挥', '索帅'"}]}
|
192 |
+
{"title_id": "36317", "title": "415套!赣州中心城区第二批人才住房明天启动申请", "topic": "房产", "event_info": [{"event1_trigger": "启动申请", "event1_triple": "'赣州中心城区第二批人才住房', '启动申请', ''"}]}
|
193 |
+
{"title_id": "24814", "title": "Windows 11 Build 22567预览版更新 支持更绿色的后台更新时段", "topic": "科技", "event_info": [{"event1_trigger": "更新", "event1_triple": "'Windows 11 Build 22567预览版', '更新', ''"}, {"event2_trigger": "支持", "event2_triple": "'Windows 11 Build 22567预览版', '支持', '更绿色的后台更新时段'"}]}
|
194 |
+
{"title_id": "12085", "title": "金鹰元盛债券(LOF)E一个月来收益0.72%,同公司基金表现如何?(12月31日)", "topic": "财经", "event_info": [{"event1_trigger": "收益0.72%", "event1_triple": "'金鹰元盛债券(LOF)E', '收益0.72%', ''"}]}
|
195 |
+
{"title_id": "38928", "title": "大学男生被家长安排相亲,不料女方是高中班主任,乌龙闹得有点大", "topic": "情感", "event_info": [{"event1_trigger": "被安排相亲", "event1_triple": "'大学男生', '被安排相亲', '家长'"}, {"event2_trigger": "是", "event2_triple": "'女方', '是', '男生高中班主任'"}]}
|
196 |
+
{"title_id": "37046", "title": "橄榄古典音乐作为“音乐向导”正式入驻Apple Music", "topic": "音乐", "event_info": [{"event1_trigger": "入驻", "event1_triple": "'橄榄古典音乐', '入驻', 'Apple Music'"}]}
|
197 |
+
{"title_id": "7149", "title": "公司拖欠工资,员工集体上门讨薪,老板:敢来闹事,再延期半个月", "topic": "社会", "event_info": [{"event1_trigger": "拖欠", "event1_triple": "'公司', '拖欠', '工资'"}, {"event2_trigger": "上门讨薪", "event2_triple": "'员工', '上门讨薪', ''"}]}
|
198 |
+
{"title_id": "1682", "title": "重庆vs申花:双方全华班出战,毕津浩、曹赟定首发", "topic": "体育", "event_info": [{"event1_trigger": "vs", "event1_triple": "'重庆', 'vs', '申花'"}, {"event2_trigger": "出战", "event2_triple": "'重庆申花全华班', '出战', ''"}, {"event3_trigger": "首发", "event3_triple": "'毕津浩曹赟定', '首发', ''"}]}
|
199 |
+
{"title_id": "33192", "title": "【春暖万家】《焦作日报》:第十五届“春暖万家·关爱女性送健康”大型公益活动正式启动", "topic": "社会", "event_info": [{"event1_trigger": "正式启动", "event1_triple": "'第十五届“春暖万家·关爱女性送健康”大型公益活动', '正式启动', ''"}]}
|
200 |
+
{"title_id": "22364", "title": "吴佩慈为范玮琪新歌打气,无意间透露豪门生活似乎不太理想", "topic": "娱乐", "event_info": [{"event1_trigger": "为打气", "event1_triple": "'吴佩慈', '为打气', '范玮琪新歌'"}, {"event2_trigger": "透露", "event2_triple": "'吴佩慈', '透露', '豪门生活似乎不太理想'"}]}
|
figs/webdemo/demo000.png
ADDED
![]() |
Git LFS Details
|
figs/webdemo/demo001.png
ADDED
![]() |
Git LFS Details
|
figs/webdemo/demo010.png
ADDED
![]() |
Git LFS Details
|
figs/webdemo/demo011.png
ADDED
![]() |
Git LFS Details
|
figs/webdemo/demo020.png
ADDED
![]() |
Git LFS Details
|
figs/webdemo/demo021.png
ADDED
![]() |
Git LFS Details
|
figs/webdemo/demo030.png
ADDED
![]() |
Git LFS Details
|
figs/webdemo/demo031.png
ADDED
![]() |
Git LFS Details
|
figs/webdemo/demo040.png
ADDED
![]() |
Git LFS Details
|
figs/webdemo/demo041.png
ADDED
![]() |
Git LFS Details
|
figs/webdemo/demo050.png
ADDED
![]() |
Git LFS Details
|
figs/webdemo/demo051.png
ADDED
![]() |
Git LFS Details
|
src/models/llm_def.py
CHANGED
@@ -201,7 +201,7 @@ class ChatGPT(BaseEngine):
|
|
201 |
self.base_url = base_url
|
202 |
self.temperature = 0.2
|
203 |
self.top_p = 0.9
|
204 |
-
self.max_tokens =
|
205 |
if api_key != "":
|
206 |
self.api_key = api_key
|
207 |
else:
|
@@ -228,7 +228,7 @@ class DeepSeek(BaseEngine):
|
|
228 |
self.base_url = base_url
|
229 |
self.temperature = 0.2
|
230 |
self.top_p = 0.9
|
231 |
-
self.max_tokens =
|
232 |
if api_key != "":
|
233 |
self.api_key = api_key
|
234 |
else:
|
|
|
201 |
self.base_url = base_url
|
202 |
self.temperature = 0.2
|
203 |
self.top_p = 0.9
|
204 |
+
self.max_tokens = 4096 # Close source model
|
205 |
if api_key != "":
|
206 |
self.api_key = api_key
|
207 |
else:
|
|
|
228 |
self.base_url = base_url
|
229 |
self.temperature = 0.2
|
230 |
self.top_p = 0.9
|
231 |
+
self.max_tokens = 4096 # Close source model
|
232 |
if api_key != "":
|
233 |
self.api_key = api_key
|
234 |
else:
|
src/webui.py
CHANGED
@@ -6,19 +6,31 @@
|
|
6 |
import gradio as gr
|
7 |
import json
|
8 |
import random
|
|
|
9 |
|
10 |
from models import *
|
11 |
from pipeline import Pipeline
|
12 |
|
13 |
|
14 |
examples = [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
{
|
16 |
"task": "NER",
|
17 |
"mode": "quick",
|
18 |
"use_file": False,
|
19 |
"text": "Finally, every other year , ELRA organizes a major conference LREC , the International Language Resources and Evaluation Conference .",
|
20 |
"instruction": "",
|
21 |
-
"constraint": """["
|
22 |
"file_path": None,
|
23 |
"update_case": False,
|
24 |
"truth": "",
|
@@ -45,6 +57,17 @@ examples = [
|
|
45 |
"update_case": False,
|
46 |
"truth": "",
|
47 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
{
|
49 |
"task": "Base",
|
50 |
"mode": "quick",
|
@@ -68,13 +91,35 @@ examples = [
|
|
68 |
"truth": "",
|
69 |
},
|
70 |
{
|
71 |
-
"task": "
|
72 |
"mode": "quick",
|
73 |
-
"use_file":
|
74 |
-
"
|
75 |
-
"instruction": "",
|
76 |
-
"constraint": ""
|
77 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
78 |
"update_case": False,
|
79 |
"truth": "",
|
80 |
}
|
@@ -86,15 +131,15 @@ def create_interface():
|
|
86 |
gr.HTML("""
|
87 |
<div style="text-align:center;">
|
88 |
<p align="center">
|
89 |
-
<a
|
90 |
-
<img src="https://raw.githubusercontent.com/zjunlp/
|
91 |
</a>
|
92 |
</p>
|
93 |
<h1>OneKE: A Dockerized Schema-Guided LLM Agent-based Knowledge Extraction System</h1>
|
94 |
<p>
|
95 |
🌐[<a href="https://oneke.openkg.cn/" target="_blank">Home</a>]
|
96 |
📹[<a href="http://oneke.openkg.cn/demo.mp4" target="_blank">Video</a>]
|
97 |
-
📝[<a href="https://arxiv.org/abs/
|
98 |
💻[<a href="https://github.com/zjunlp/OneKE" target="_blank">Code</a>]
|
99 |
</p>
|
100 |
</div>
|
@@ -106,16 +151,25 @@ def create_interface():
|
|
106 |
with gr.Column():
|
107 |
model_gr = gr.Dropdown(
|
108 |
label="🪄 Select your Model",
|
109 |
-
choices=["deepseek-chat", "deepseek-reasoner",
|
|
|
|
|
110 |
value="deepseek-chat",
|
111 |
)
|
|
|
|
|
|
|
|
|
|
|
112 |
api_key_gr = gr.Textbox(
|
113 |
label="🔑 Enter your API-Key",
|
114 |
-
|
|
|
115 |
)
|
116 |
base_url_gr = gr.Textbox(
|
117 |
label="🔗 Enter your Base-URL",
|
118 |
-
|
|
|
119 |
)
|
120 |
with gr.Column():
|
121 |
task_gr = gr.Dropdown(
|
@@ -128,36 +182,54 @@ def create_interface():
|
|
128 |
choices=["quick", "standard", "customized"],
|
129 |
value="quick",
|
130 |
)
|
131 |
-
schema_agent_gr = gr.Dropdown(choices=["
|
132 |
-
extraction_Agent_gr = gr.Dropdown(choices=["
|
133 |
-
reflection_agent_gr = gr.Dropdown(choices=["
|
134 |
|
135 |
use_file_gr = gr.Checkbox(label="📂 Use File", value=True)
|
136 |
file_path_gr = gr.File(label="📖 Upload a File", visible=True)
|
137 |
-
text_gr = gr.Textbox(label="📖 Text", placeholder="Enter your Text", visible=False)
|
138 |
-
instruction_gr = gr.Textbox(label="🕹️ Instruction", visible=True)
|
139 |
-
constraint_gr = gr.Textbox(label="🕹️
|
140 |
|
141 |
update_case_gr = gr.Checkbox(label="💰 Update Case", value=False)
|
142 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
|
144 |
def customized_mode(mode):
|
145 |
if mode == "customized":
|
146 |
return gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
|
147 |
else:
|
148 |
-
return gr.update(visible=False, value="
|
149 |
|
150 |
def update_fields(task):
|
151 |
if task == "Base" or task == "":
|
152 |
-
return gr.update(visible=True, label="🕹️ Instruction", placeholder="
|
153 |
elif task == "NER":
|
154 |
-
return gr.update(visible=False), gr.update(visible=True, label="🕹️
|
155 |
elif task == "RE":
|
156 |
-
return gr.update(visible=False), gr.update(visible=True, label="🕹️
|
157 |
elif task == "EE":
|
158 |
-
return gr.update(visible=False), gr.update(visible=True, label="🕹️
|
159 |
elif task == "Triple":
|
160 |
-
return gr.update(visible=False), gr.update(visible=True, label="🕹️
|
161 |
|
162 |
def update_input_fields(use_file):
|
163 |
if use_file:
|
@@ -171,9 +243,28 @@ def create_interface():
|
|
171 |
else:
|
172 |
return gr.update(visible=False)
|
173 |
|
|
|
|
|
|
|
|
|
|
|
|
|
174 |
def start_with_example():
|
175 |
-
example_index = random.randint(
|
|
|
176 |
example = examples[example_index]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
177 |
return (
|
178 |
gr.update(value=example["task"]),
|
179 |
gr.update(value=example["mode"]),
|
@@ -183,24 +274,25 @@ def create_interface():
|
|
183 |
gr.update(value=example["instruction"], visible=example["task"] == "Base"),
|
184 |
gr.update(value=example["constraint"], visible=example["task"] in ["NER", "RE", "EE", "Triple"]),
|
185 |
gr.update(value=example["update_case"]),
|
186 |
-
gr.update(value=example["truth"]),
|
187 |
-
gr.update(value="
|
188 |
-
gr.update(value="
|
189 |
-
gr.update(value="
|
190 |
)
|
191 |
|
192 |
def submit(model, api_key, base_url, task, mode, instruction, constraint, text, use_file, file_path, update_case, truth, schema_agent, extraction_Agent, reflection_agent):
|
193 |
try:
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
|
|
199 |
else:
|
200 |
-
if
|
201 |
-
pipeline = Pipeline(
|
202 |
-
|
203 |
-
pipeline = Pipeline(
|
204 |
|
205 |
if task == "Base":
|
206 |
instruction = instruction
|
@@ -219,11 +311,11 @@ def create_interface():
|
|
219 |
|
220 |
agent3 = {}
|
221 |
if mode == "customized":
|
222 |
-
if schema_agent not in ["", "
|
223 |
agent3["schema_agent"] = schema_agent
|
224 |
-
if extraction_Agent not in ["", "
|
225 |
agent3["extraction_agent"] = extraction_Agent
|
226 |
-
if reflection_agent not in ["", "
|
227 |
agent3["reflection_agent"] = reflection_agent
|
228 |
|
229 |
# use 'Pipeline'
|
@@ -253,9 +345,9 @@ def create_interface():
|
|
253 |
|
254 |
def clear_all():
|
255 |
return (
|
256 |
-
gr.update(value="
|
257 |
-
gr.update(value="
|
258 |
-
gr.update(value="
|
259 |
gr.update(value="Base"), # task
|
260 |
gr.update(value="quick"), # mode
|
261 |
gr.update(value="", visible=False), # instruction
|
@@ -264,9 +356,9 @@ def create_interface():
|
|
264 |
gr.update(value="", visible=False), # text
|
265 |
gr.update(value=None, visible=True), # file_path
|
266 |
gr.update(value=False), # update_case
|
267 |
-
gr.update(value="", visible=False), #
|
268 |
-
gr.update(value=""),
|
269 |
-
gr.update(value=""),
|
270 |
gr.update(value="", visible=False), # error_output
|
271 |
)
|
272 |
|
@@ -280,7 +372,7 @@ def create_interface():
|
|
280 |
<span style="position: absolute; right: 0; top: 50%; transform: translateY(-50%); width: 45%; border-top: 1px solid #ccc;"></span>
|
281 |
</div>
|
282 |
""")
|
283 |
-
error_output_gr = gr.Textbox(label="😵💫 Ops, an Error Occurred", visible=False)
|
284 |
with gr.Row():
|
285 |
with gr.Column(scale=1):
|
286 |
py_output_gr = gr.Code(label="🤔 Generated Schema", language="python", lines=10, interactive=False)
|
@@ -291,6 +383,7 @@ def create_interface():
|
|
291 |
mode_gr.change(fn=customized_mode, inputs=mode_gr, outputs=[schema_agent_gr, extraction_Agent_gr, reflection_agent_gr])
|
292 |
use_file_gr.change(fn=update_input_fields, inputs=use_file_gr, outputs=[text_gr, file_path_gr])
|
293 |
update_case_gr.change(fn=update_case, inputs=update_case_gr, outputs=[truth_gr])
|
|
|
294 |
|
295 |
example_button_gr.click(
|
296 |
fn=start_with_example,
|
@@ -304,7 +397,7 @@ def create_interface():
|
|
304 |
instruction_gr,
|
305 |
constraint_gr,
|
306 |
update_case_gr,
|
307 |
-
truth_gr,
|
308 |
schema_agent_gr,
|
309 |
extraction_Agent_gr,
|
310 |
reflection_agent_gr,
|
@@ -324,7 +417,7 @@ def create_interface():
|
|
324 |
use_file_gr,
|
325 |
file_path_gr,
|
326 |
update_case_gr,
|
327 |
-
truth_gr,
|
328 |
schema_agent_gr,
|
329 |
extraction_Agent_gr,
|
330 |
reflection_agent_gr,
|
@@ -346,7 +439,7 @@ def create_interface():
|
|
346 |
text_gr,
|
347 |
file_path_gr,
|
348 |
update_case_gr,
|
349 |
-
truth_gr,
|
350 |
py_output_gr,
|
351 |
json_output_gr,
|
352 |
error_output_gr,
|
|
|
6 |
import gradio as gr
|
7 |
import json
|
8 |
import random
|
9 |
+
import re
|
10 |
|
11 |
from models import *
|
12 |
from pipeline import Pipeline
|
13 |
|
14 |
|
15 |
examples = [
|
16 |
+
{
|
17 |
+
"task": "Base",
|
18 |
+
"mode": "quick",
|
19 |
+
"use_file": False,
|
20 |
+
"text": "合力治堵!济南交通部门在拥堵路段定点研究交通治理方案",
|
21 |
+
"instruction": "请帮我抽取这个新闻事件",
|
22 |
+
"constraint": "",
|
23 |
+
"file_path": None,
|
24 |
+
"update_case": False,
|
25 |
+
"truth": "",
|
26 |
+
},
|
27 |
{
|
28 |
"task": "NER",
|
29 |
"mode": "quick",
|
30 |
"use_file": False,
|
31 |
"text": "Finally, every other year , ELRA organizes a major conference LREC , the International Language Resources and Evaluation Conference .",
|
32 |
"instruction": "",
|
33 |
+
"constraint": """["algorithm", "conference", "else", "product", "task", "field", "metrics", "organization", "researcher", "program language", "country", "location", "person", "university"]""",
|
34 |
"file_path": None,
|
35 |
"update_case": False,
|
36 |
"truth": "",
|
|
|
57 |
"update_case": False,
|
58 |
"truth": "",
|
59 |
},
|
60 |
+
{
|
61 |
+
"task": "Triple",
|
62 |
+
"mode": "quick",
|
63 |
+
"use_file": True,
|
64 |
+
"file_path": "data/input_files/Artificial_Intelligence_Wikipedia.txt",
|
65 |
+
"instruction": "",
|
66 |
+
"constraint": """[["Person", "Place", "Event", "property"], ["Interpersonal", "Located", "Ownership", "Action"]]""",
|
67 |
+
"text": "",
|
68 |
+
"update_case": False,
|
69 |
+
"truth": "",
|
70 |
+
},
|
71 |
{
|
72 |
"task": "Base",
|
73 |
"mode": "quick",
|
|
|
91 |
"truth": "",
|
92 |
},
|
93 |
{
|
94 |
+
"task": "Base",
|
95 |
"mode": "quick",
|
96 |
+
"use_file": False,
|
97 |
+
"text": "John Smith, a 45-year-old male, presents with persistent headaches that have lasted for the past 10 days. The headaches are described as moderate and occur primarily in the frontal region, often accompanied by mild nausea. The patient reports no significant medical history except for seasonal allergies, for which he occasionally takes antihistamines. Physical examination reveals a heart rate of 78 beats per minute, blood pressure of 125/80 mmHg, and normal temperature. A neurological examination showed no focal deficits. A CT scan of the head was performed, which revealed no acute abnormalities, and a sinus X-ray suggested mild sinusitis. Based on the clinical presentation and imaging results, the diagnosis is sinusitis, and the patient is advised to take decongestants and rest for recovery.",
|
98 |
+
"instruction": "Please extract the key medical information from this case description.",
|
99 |
+
"constraint": "",
|
100 |
+
"file_path": None,
|
101 |
+
"update_case": False,
|
102 |
+
"truth": "",
|
103 |
+
},
|
104 |
+
{
|
105 |
+
"task": "Base",
|
106 |
+
"mode": "quick",
|
107 |
+
"use_file": False,
|
108 |
+
"text": "张三,男,60岁,主诉背部酸痛已持续约两周,伴有轻微的头晕。患者有高血压病史,已服用降压药物多年,且控制良好;此外,患者曾在五年前接受过一次胆囊切除手术。体检时,心率为75次/分钟,血压为130/85 mmHg。背部触诊时无明显压痛,但活动时出现轻微不适。胸部X光显示无异常,腰部CT检查提示轻度腰椎退行性变。经医生诊断,患者被认为是由于长时间的不良姿势引起的腰椎退行性病变,建议进行物理治疗,并配合止痛药物。",
|
109 |
+
"instruction": "请从这个病例描述中,提取出重要的医疗信息",
|
110 |
+
"constraint": "",
|
111 |
+
"file_path": None,
|
112 |
+
"update_case": False,
|
113 |
+
"truth": "",
|
114 |
+
},
|
115 |
+
{
|
116 |
+
"task": "Base",
|
117 |
+
"mode": "quick",
|
118 |
+
"use_file": False,
|
119 |
+
"text": "中国政府近日宣布了一项新的环保政策,旨在减少工业污染,并改善空气质量。此次政策将在全国范围内实施,涉及多个行业,尤其是钢铁和煤炭行业。环保部门负责人表示,这项政策的实施标志着中国环保工作的新阶段,预计将在未来五年内显著改善空气质量。",
|
120 |
+
"instruction": "请从这段新闻描述中提取出重要的事件信息,包括事件名称、时间、参与人员、事件目的、实施过程及预期结果。",
|
121 |
+
"constraint": "",
|
122 |
+
"file_path": None,
|
123 |
"update_case": False,
|
124 |
"truth": "",
|
125 |
}
|
|
|
131 |
gr.HTML("""
|
132 |
<div style="text-align:center;">
|
133 |
<p align="center">
|
134 |
+
<a>
|
135 |
+
<img src="https://raw.githubusercontent.com/zjunlp/OneKE/refs/heads/main/figs/logo.png" width="240"/>
|
136 |
</a>
|
137 |
</p>
|
138 |
<h1>OneKE: A Dockerized Schema-Guided LLM Agent-based Knowledge Extraction System</h1>
|
139 |
<p>
|
140 |
🌐[<a href="https://oneke.openkg.cn/" target="_blank">Home</a>]
|
141 |
📹[<a href="http://oneke.openkg.cn/demo.mp4" target="_blank">Video</a>]
|
142 |
+
📝[<a href="https://arxiv.org/abs/2412.20005v2" target="_blank">Paper</a>]
|
143 |
💻[<a href="https://github.com/zjunlp/OneKE" target="_blank">Code</a>]
|
144 |
</p>
|
145 |
</div>
|
|
|
151 |
with gr.Column():
|
152 |
model_gr = gr.Dropdown(
|
153 |
label="🪄 Select your Model",
|
154 |
+
choices=["deepseek-chat", "deepseek-reasoner",
|
155 |
+
"gpt-3.5-turbo", "gpt-4o-mini", "gpt-4o",
|
156 |
+
],
|
157 |
value="deepseek-chat",
|
158 |
)
|
159 |
+
# model_gr = gr.Textbox(
|
160 |
+
# label="🪄 Enter your Model",
|
161 |
+
# placeholder="Supports online-models like gpt-4o-mini, deepseek-chat, etc., HuggingFace Demo is not supported for local models.",
|
162 |
+
# value="deepseek-chat",
|
163 |
+
# )
|
164 |
api_key_gr = gr.Textbox(
|
165 |
label="🔑 Enter your API-Key",
|
166 |
+
placeholder="If using a local-model, this field should be left empty.",
|
167 |
+
value="sk-xxxxx"
|
168 |
)
|
169 |
base_url_gr = gr.Textbox(
|
170 |
label="🔗 Enter your Base-URL",
|
171 |
+
placeholder="If using the default Base-URL or a local-model, this field should be left empty.",
|
172 |
+
value="Default",
|
173 |
)
|
174 |
with gr.Column():
|
175 |
task_gr = gr.Dropdown(
|
|
|
182 |
choices=["quick", "standard", "customized"],
|
183 |
value="quick",
|
184 |
)
|
185 |
+
schema_agent_gr = gr.Dropdown(choices=["Not Required", "get_default_schema", "get_retrieved_schema", "get_deduced_schema"], value="Not Required", label="🤖 Select your Schema-Agent", visible=False)
|
186 |
+
extraction_Agent_gr = gr.Dropdown(choices=["Not Required", "extract_information_direct", "extract_information_with_case"], value="Not Required", label="🤖 Select your Extraction-Agent", visible=False)
|
187 |
+
reflection_agent_gr = gr.Dropdown(choices=["Not Required", "reflect_with_case"], value="Not Required", label="🤖 Select your Reflection-Agent", visible=False)
|
188 |
|
189 |
use_file_gr = gr.Checkbox(label="📂 Use File", value=True)
|
190 |
file_path_gr = gr.File(label="📖 Upload a File", visible=True)
|
191 |
+
text_gr = gr.Textbox(label="📖 Text", lines=5, placeholder="Enter your Text please.", visible=False)
|
192 |
+
instruction_gr = gr.Textbox(label="🕹️ Instruction", lines=3, placeholder="You can enter any type of information you want to extract here, for example: Please help me extract all the person names.", visible=True)
|
193 |
+
constraint_gr = gr.Textbox(label="🕹️ Instruction", lines=3, placeholder="You can enter any type of information you want to extract here, for example: Please help me extract all the person names.", visible=False)
|
194 |
|
195 |
update_case_gr = gr.Checkbox(label="💰 Update Case", value=False)
|
196 |
+
# update_schema_gr = gr.Checkbox(label="📟 Update Schema", value=False)
|
197 |
+
truth_gr = gr.Textbox(label="🪙 Truth", lines=2, placeholder="""You can enter the truth you want LLM know, for example: {"relation_list": [{"head": "Guinea", "tail": "Conakry", "relation": "country capital"}]}""", visible=False)
|
198 |
+
# selfschema_gr = gr.Textbox(label="📟 Schema", lines=5, placeholder="Enter your New Schema", visible=False, interactive=True)
|
199 |
+
|
200 |
+
def get_model_category(model_name_or_path):
|
201 |
+
if model_name_or_path in ["gpt-3.5-turbo", "gpt-4o-mini", "gpt-4o", "o3-mini"]:
|
202 |
+
return ChatGPT
|
203 |
+
elif model_name_or_path in ["deepseek-chat", "deepseek-reasoner"]:
|
204 |
+
return DeepSeek
|
205 |
+
elif re.search(r'(?i)llama', model_name_or_path):
|
206 |
+
return LLaMA
|
207 |
+
elif re.search(r'(?i)qwen', model_name_or_path):
|
208 |
+
return Qwen
|
209 |
+
elif re.search(r'(?i)minicpm', model_name_or_path):
|
210 |
+
return MiniCPM
|
211 |
+
elif re.search(r'(?i)chatglm', model_name_or_path):
|
212 |
+
return ChatGLM
|
213 |
+
else:
|
214 |
+
return BaseEngine
|
215 |
|
216 |
def customized_mode(mode):
|
217 |
if mode == "customized":
|
218 |
return gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
|
219 |
else:
|
220 |
+
return gr.update(visible=False, value="Not Required"), gr.update(visible=False, value="Not Required"), gr.update(visible=False, value="Not Required")
|
221 |
|
222 |
def update_fields(task):
|
223 |
if task == "Base" or task == "":
|
224 |
+
return gr.update(visible=True, label="🕹️ Instruction", lines=3, placeholder="You can enter any type of information you want to extract here, for example: Please help me extract all the person names."), gr.update(visible=False)
|
225 |
elif task == "NER":
|
226 |
+
return gr.update(visible=False), gr.update(visible=True, label="🕹️ Instruction", lines=3, placeholder="You can enter any type of information you want to extract here, for example: Please help me extract all the person names.")
|
227 |
elif task == "RE":
|
228 |
+
return gr.update(visible=False), gr.update(visible=True, label="🕹️ Instruction", lines=3, placeholder="You can enter any type of information you want to extract here, for example: Please help me extract all the person names.")
|
229 |
elif task == "EE":
|
230 |
+
return gr.update(visible=False), gr.update(visible=True, label="🕹️ Instruction", lines=3, placeholder="You can enter any type of information you want to extract here, for example: Please help me extract all the person names.")
|
231 |
elif task == "Triple":
|
232 |
+
return gr.update(visible=False), gr.update(visible=True, label="🕹️ Instruction", lines=3, placeholder="You can enter any type of information you want to extract here, for example: Please help me extract all the person names.")
|
233 |
|
234 |
def update_input_fields(use_file):
|
235 |
if use_file:
|
|
|
243 |
else:
|
244 |
return gr.update(visible=False)
|
245 |
|
246 |
+
# def update_schema(update_schema):
|
247 |
+
# if update_schema:
|
248 |
+
# return gr.update(visible=True)
|
249 |
+
# else:
|
250 |
+
# return gr.update(visible=False)
|
251 |
+
|
252 |
def start_with_example():
|
253 |
+
example_index = random.randint(-3, len(examples) - 1)
|
254 |
+
example_index = max(example_index, 0)
|
255 |
example = examples[example_index]
|
256 |
+
|
257 |
+
if example_index == 0:
|
258 |
+
with open("data/input_files/ChineseNewsExample.json", "r", encoding="utf-8") as file:
|
259 |
+
lines = file.readlines()
|
260 |
+
random_line = random.choice(lines).strip()
|
261 |
+
try:
|
262 |
+
json_data = json.loads(random_line)
|
263 |
+
title = json_data.get("title", "No title found")
|
264 |
+
except json.JSONDecodeError:
|
265 |
+
title = "Error decoding JSON"
|
266 |
+
example["text"] = title
|
267 |
+
|
268 |
return (
|
269 |
gr.update(value=example["task"]),
|
270 |
gr.update(value=example["mode"]),
|
|
|
274 |
gr.update(value=example["instruction"], visible=example["task"] == "Base"),
|
275 |
gr.update(value=example["constraint"], visible=example["task"] in ["NER", "RE", "EE", "Triple"]),
|
276 |
gr.update(value=example["update_case"]),
|
277 |
+
gr.update(value=example["truth"]), # gr.update(value=example["update_schema"]), gr.update(value=example["selfschema"]),
|
278 |
+
gr.update(value="Not Required", visible=False),
|
279 |
+
gr.update(value="Not Required", visible=False),
|
280 |
+
gr.update(value="Not Required", visible=False),
|
281 |
)
|
282 |
|
283 |
def submit(model, api_key, base_url, task, mode, instruction, constraint, text, use_file, file_path, update_case, truth, schema_agent, extraction_Agent, reflection_agent):
|
284 |
try:
|
285 |
+
ModelClass = get_model_category(model)
|
286 |
+
if base_url == "Default" or base_url == "":
|
287 |
+
if api_key == "":
|
288 |
+
pipeline = Pipeline(ModelClass(model_name_or_path=model))
|
289 |
+
else:
|
290 |
+
pipeline = Pipeline(ModelClass(model_name_or_path=model, api_key=api_key))
|
291 |
else:
|
292 |
+
if api_key == "":
|
293 |
+
pipeline = Pipeline(ModelClass(model_name_or_path=model, base_url=base_url))
|
294 |
+
else:
|
295 |
+
pipeline = Pipeline(ModelClass(model_name_or_path=model, api_key=api_key, base_url=base_url))
|
296 |
|
297 |
if task == "Base":
|
298 |
instruction = instruction
|
|
|
311 |
|
312 |
agent3 = {}
|
313 |
if mode == "customized":
|
314 |
+
if schema_agent not in ["", "Not Required"]:
|
315 |
agent3["schema_agent"] = schema_agent
|
316 |
+
if extraction_Agent not in ["", "Not Required"]:
|
317 |
agent3["extraction_agent"] = extraction_Agent
|
318 |
+
if reflection_agent not in ["", "Not Required"]:
|
319 |
agent3["reflection_agent"] = reflection_agent
|
320 |
|
321 |
# use 'Pipeline'
|
|
|
345 |
|
346 |
def clear_all():
|
347 |
return (
|
348 |
+
gr.update(value="Not Required", visible=False), # sechema_agent
|
349 |
+
gr.update(value="Not Required", visible=False), # extraction_Agent
|
350 |
+
gr.update(value="Not Required", visible=False), # reflection_agent
|
351 |
gr.update(value="Base"), # task
|
352 |
gr.update(value="quick"), # mode
|
353 |
gr.update(value="", visible=False), # instruction
|
|
|
356 |
gr.update(value="", visible=False), # text
|
357 |
gr.update(value=None, visible=True), # file_path
|
358 |
gr.update(value=False), # update_case
|
359 |
+
gr.update(value="", visible=False), # truth # gr.update(value=False), # update_schema gr.update(value="", visible=False), # selfschema
|
360 |
+
gr.update(value=""), # py_output_gr
|
361 |
+
gr.update(value=""), # json_output_gr
|
362 |
gr.update(value="", visible=False), # error_output
|
363 |
)
|
364 |
|
|
|
372 |
<span style="position: absolute; right: 0; top: 50%; transform: translateY(-50%); width: 45%; border-top: 1px solid #ccc;"></span>
|
373 |
</div>
|
374 |
""")
|
375 |
+
error_output_gr = gr.Textbox(label="😵💫 Ops, an Error Occurred", visible=False, interactive=False)
|
376 |
with gr.Row():
|
377 |
with gr.Column(scale=1):
|
378 |
py_output_gr = gr.Code(label="🤔 Generated Schema", language="python", lines=10, interactive=False)
|
|
|
383 |
mode_gr.change(fn=customized_mode, inputs=mode_gr, outputs=[schema_agent_gr, extraction_Agent_gr, reflection_agent_gr])
|
384 |
use_file_gr.change(fn=update_input_fields, inputs=use_file_gr, outputs=[text_gr, file_path_gr])
|
385 |
update_case_gr.change(fn=update_case, inputs=update_case_gr, outputs=[truth_gr])
|
386 |
+
# update_schema_gr.change(fn=update_schema, inputs=update_schema_gr, outputs=[selfschema_gr])
|
387 |
|
388 |
example_button_gr.click(
|
389 |
fn=start_with_example,
|
|
|
397 |
instruction_gr,
|
398 |
constraint_gr,
|
399 |
update_case_gr,
|
400 |
+
truth_gr, # update_schema_gr, selfschema_gr,
|
401 |
schema_agent_gr,
|
402 |
extraction_Agent_gr,
|
403 |
reflection_agent_gr,
|
|
|
417 |
use_file_gr,
|
418 |
file_path_gr,
|
419 |
update_case_gr,
|
420 |
+
truth_gr, # update_schema_gr, selfschema_gr,
|
421 |
schema_agent_gr,
|
422 |
extraction_Agent_gr,
|
423 |
reflection_agent_gr,
|
|
|
439 |
text_gr,
|
440 |
file_path_gr,
|
441 |
update_case_gr,
|
442 |
+
truth_gr, # update_schema_gr, selfschema_gr,
|
443 |
py_output_gr,
|
444 |
json_output_gr,
|
445 |
error_output_gr,
|