File size: 11,973 Bytes
36b7b70
ab78124
b495719
36b7b70
ab78124
36b7b70
 
 
ab78124
36b7b70
ab78124
 
 
 
 
 
36b7b70
b495719
ab78124
 
 
 
 
 
 
 
36b7b70
 
 
 
ab78124
36b7b70
 
 
 
 
 
 
 
 
 
b495719
36b7b70
 
 
 
 
 
 
 
b495719
36b7b70
 
 
 
 
 
 
 
b495719
36b7b70
 
b495719
36b7b70
 
 
3a8258b
36b7b70
3a8258b
 
 
 
 
 
 
 
 
 
 
 
 
 
36b7b70
 
 
 
 
 
 
 
 
ab78124
36b7b70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b495719
36b7b70
 
 
 
 
b495719
36b7b70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ab78124
36b7b70
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
import os
import json
import gradio as gr
from gliner import GLiNER

# Load the common examples from the JSON file
with open("examples.json", "r", encoding="utf-8") as f:
    common_examples = json.load(f)

# Utility function to merge adjacent entities (used in NuNER Zero)
def merge_entities(entities):
    if not entities:
        return []
    merged = []
    current = entities[0]
    for next_entity in entities[1:]:
        # Merge if same label and adjacent
        if next_entity['entity'] == current['entity'] and (next_entity['start'] == current['end'] + 1 or next_entity['start'] == current['end']):
            current['word'] += ' ' + next_entity['word']
            current['end'] = next_entity['end']
        else:
            merged.append(current)
            current = next_entity
    merged.append(current)
    return merged

# Load the three models
model_nuner = GLiNER.from_pretrained("numind/NuZero_token")
model_pii   = GLiNER.from_pretrained("urchade/gliner_multi_pii-v1")
model_med   = GLiNER.from_pretrained("urchade/gliner_medium-v2.1")

# Define NER functions for each model
def ner_nuner(text, labels, threshold, nested_ner):
    label_list = [lbl.strip() for lbl in labels.split(",")]
    pred_entities = model_nuner.predict_entities(text, label_list, flat_ner=not nested_ner, threshold=threshold)
    entities = [
        {"entity": entity["label"], "word": entity["text"], "start": entity["start"], "end": entity["end"], "score": 0}
        for entity in pred_entities
    ]
    merged_entities = merge_entities(entities)
    return {"text": text, "entities": merged_entities}

def ner_pii(text, labels, threshold, nested_ner):
    label_list = [lbl.strip() for lbl in labels.split(",")]
    pred_entities = model_pii.predict_entities(text, label_list, flat_ner=not nested_ner, threshold=threshold)
    entities = [
        {"entity": entity["label"], "word": entity["text"], "start": entity["start"], "end": entity["end"], "score": 0}
        for entity in pred_entities
    ]
    return {"text": text, "entities": entities}

def ner_med(text, labels, threshold, nested_ner):
    label_list = [lbl.strip() for lbl in labels.split(",")]
    pred_entities = model_med.predict_entities(text, label_list, flat_ner=not nested_ner, threshold=threshold)
    entities = [
        {"entity": entity["label"], "word": entity["text"], "start": entity["start"], "end": entity["end"], "score": 0}
        for entity in pred_entities
    ]
    return {"text": text, "entities": entities}

# Use the first example from the common examples for default values
default_text, default_labels, default_threshold, default_nested = common_examples[0]

# Build the combined Gradio app with three tabs
with gr.Blocks(title="GLiNER NER Testbed") as demo:
    gr.Markdown("# GLiNER NER Testbed")
    with gr.Accordion("This interface allows you to compare different zero-shot Named Entity Recognition models...", open=True):
        gr.Markdown(
            """        
            ## Models Available:
            - **GLiNER Medium v2.1**: The original GLiNER medium model
            - **GLiNER Multi PII**: Fine-tuned for detecting personally identifiable information across multiple languages
            - **NuNER Zero**: A specialized token-based NER model
            
            ## Features:
            - Select different models
            - Select examples based on different use cases
            - Toggle nested entity recognition
            - Entity merging is currently enabled for NuNER Zero only
            
            ## About GLiNER:

            **GLiNER** is a state-of-the-art Named Entity Recognition (NER) system that leverages a BERT-like bidirectional transformer encoder to identify a wide range of entity types in text. Unlike conventional NER models that are restricted to fixed entity categories, GLiNER supports flexible, zero-shot extraction, making it ideal for diverse real-world applications. It also provides a resource-efficient alternative to large language models (LLMs) for scenarios where cost and speed are critical. Distributed under the Apache 2.0 license, GLiNER is commercially friendly and readily deployable.

            **Useful Links**

            - **Model:** [gliner_medium-v2.1](https://huggingface.co./urchade/gliner_medium-v2.1)
            - **All GLiNER Models:** [Hugging Face GLiNER Models](https://huggingface.co./models?library=gliner)
            - **Research Paper:** [arXiv:2311.08526](https://arxiv.org/abs/2311.08526)
            - **Repository:** [GitHub - GLiNER](https://github.com/urchade/GLiNER)
            """
        )
    
    with gr.Tabs():
        # Tab for GLiNER-medium
        with gr.Tab("GLiNER-medium"):
            gr.Markdown("## GLiNER-medium-v2.1")
            with gr.Accordion("How to run this model locally", open=False):
                gr.Markdown(
                    """
                    **Installation:**
                    ```
                    !pip install gliner
                    ```
                    **Usage:**
                    Load the model with `GLiNER.from_pretrained("urchade/gliner_medium-v2.1")`
                    and call `predict_entities` to perform zero-shot NER.
                    """
                )
                gr.Code(
                    '''from gliner import GLiNER
model = GLiNER.from_pretrained("urchade/gliner_medium-v2.1")''',
                    language="python",
                )
            input_text_med = gr.Textbox(value=default_text, label="Text input", placeholder="Enter your text here")
            with gr.Row():
                labels_med = gr.Textbox(value=default_labels, label="Labels", placeholder="Enter labels (comma separated)", scale=2)
                threshold_med = gr.Slider(0, 1, value=default_threshold, step=0.01, label="Threshold", info="Lower threshold to increase predictions", scale=1)
                nested_ner_med = gr.Checkbox(value=default_nested, label="Nested NER", info="Allow for nested NER?", scale=0)
            output_med = gr.HighlightedText(label="Predicted Entities")
            submit_btn_med = gr.Button("Submit")
            gr.Examples(
                common_examples,
                fn=ner_med,
                inputs=[input_text_med, labels_med, threshold_med, nested_ner_med],
                outputs=output_med,
                cache_examples=False,
            )
            input_text_med.submit(ner_med, inputs=[input_text_med, labels_med, threshold_med, nested_ner_med], outputs=output_med)
            labels_med.submit(ner_med, inputs=[input_text_med, labels_med, threshold_med, nested_ner_med], outputs=output_med)
            threshold_med.release(ner_med, inputs=[input_text_med, labels_med, threshold_med, nested_ner_med], outputs=output_med)
            submit_btn_med.click(ner_med, inputs=[input_text_med, labels_med, threshold_med, nested_ner_med], outputs=output_med)
            nested_ner_med.change(ner_med, inputs=[input_text_med, labels_med, threshold_med, nested_ner_med], outputs=output_med)

        # Tab for GLiNER-PII
        with gr.Tab("GLiNER-PII"):
            gr.Markdown("## GLiNER-PII")
            with gr.Accordion("How to run this model locally", open=False):
                gr.Markdown(
                    """
                    **Installation:**
                    ```
                    !pip install gliner
                    ```
                    **Usage:**
                    Load the model with `GLiNER.from_pretrained("urchade/gliner_multi_pii-v1")`
                    and call `predict_entities` to extract PII.
                    """
                )
                gr.Code(
                    '''from gliner import GLiNER
model = GLiNER.from_pretrained("urchade/gliner_multi_pii-v1")''',
                    language="python",
                )
            input_text_pii = gr.Textbox(value=default_text, label="Text input", placeholder="Enter your text here")
            with gr.Row():
                labels_pii = gr.Textbox(value=default_labels, label="Labels", placeholder="Enter labels (comma separated)", scale=2)
                threshold_pii = gr.Slider(0, 1, value=default_threshold, step=0.01, label="Threshold", info="Lower threshold to increase predictions", scale=1)
                nested_ner_pii = gr.Checkbox(value=default_nested, label="Nested NER", info="Allow for nested NER?", scale=0)
            output_pii = gr.HighlightedText(label="Predicted Entities")
            submit_btn_pii = gr.Button("Submit")
            gr.Examples(
                common_examples,
                fn=ner_pii,
                inputs=[input_text_pii, labels_pii, threshold_pii, nested_ner_pii],
                outputs=output_pii,
                cache_examples=False,
            )
            input_text_pii.submit(ner_pii, inputs=[input_text_pii, labels_pii, threshold_pii, nested_ner_pii], outputs=output_pii)
            labels_pii.submit(ner_pii, inputs=[input_text_pii, labels_pii, threshold_pii, nested_ner_pii], outputs=output_pii)
            threshold_pii.release(ner_pii, inputs=[input_text_pii, labels_pii, threshold_pii, nested_ner_pii], outputs=output_pii)
            submit_btn_pii.click(ner_pii, inputs=[input_text_pii, labels_pii, threshold_pii, nested_ner_pii], outputs=output_pii)
            nested_ner_pii.change(ner_pii, inputs=[input_text_pii, labels_pii, threshold_pii, nested_ner_pii], outputs=output_pii)
    
        # Tab for NuNER Zero
        with gr.Tab("NuNER Zero"):
            gr.Markdown("## NuNER Zero")
            with gr.Accordion("How to run this model locally", open=False):
                gr.Markdown(
                    """
                    **Installation:**
                    ```
                    !pip install gliner
                    ```
                    **Usage:**
                    Load the model with `GLiNER.from_pretrained("numind/NuZero_token")`
                    and call `predict_entities` to perform zero-shot NER.
                    """
                )
                gr.Code(
                    '''from gliner import GLiNER
model = GLiNER.from_pretrained("numind/NuZero_token")''',
                    language="python",
                )
            input_text_nuner = gr.Textbox(value=default_text, label="Text input", placeholder="Enter your text here")
            with gr.Row():
                labels_nuner = gr.Textbox(value=default_labels, label="Labels", placeholder="Enter labels (comma separated)", scale=2)
                threshold_nuner = gr.Slider(0, 1, value=default_threshold, step=0.01, label="Threshold", info="Lower threshold to increase predictions", scale=1)
                nested_ner_nuner = gr.Checkbox(value=default_nested, label="Nested NER", info="Allow for nested NER?", scale=0)
            output_nuner = gr.HighlightedText(label="Predicted Entities")
            submit_btn_nuner = gr.Button("Submit")
            gr.Examples(
                common_examples,
                fn=ner_nuner,
                inputs=[input_text_nuner, labels_nuner, threshold_nuner, nested_ner_nuner],
                outputs=output_nuner,
                cache_examples=False,
            )
            input_text_nuner.submit(ner_nuner, inputs=[input_text_nuner, labels_nuner, threshold_nuner, nested_ner_nuner], outputs=output_nuner)
            labels_nuner.submit(ner_nuner, inputs=[input_text_nuner, labels_nuner, threshold_nuner, nested_ner_nuner], outputs=output_nuner)
            threshold_nuner.release(ner_nuner, inputs=[input_text_nuner, labels_nuner, threshold_nuner, nested_ner_nuner], outputs=output_nuner)
            submit_btn_nuner.click(ner_nuner, inputs=[input_text_nuner, labels_nuner, threshold_nuner, nested_ner_nuner], outputs=output_nuner)
            nested_ner_nuner.change(ner_nuner, inputs=[input_text_nuner, labels_nuner, threshold_nuner, nested_ner_nuner], outputs=output_nuner)

    # Enable queuing and launch the app
    demo.queue()
    demo.launch(debug=True)