File size: 2,971 Bytes
cabf51c
a109861
ca99229
ef08f4e
01ddf42
 
ef08f4e
 
 
 
01ddf42
 
 
 
 
 
 
 
 
fccb2d8
68aee5f
01ddf42
 
ca99229
bc2dc1a
 
 
 
6148b9b
1265529
cabf51c
1265529
 
a7e2698
02a0351
 
cabf51c
 
 
bc2dc1a
 
8d27209
bc2dc1a
 
687617e
bc2dc1a
 
 
 
 
 
 
1265529
6ab8d85
1265529
bc2dc1a
 
dfa7b0a
1265529
 
 
 
 
 
dfa7b0a
 
1265529
 
bc2dc1a
df2d919
dfa7b0a
 
 
 
 
 
 
4fea159
 
 
 
 
 
1265529
bc2dc1a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
import spaces
import os
import gradio as gr
import shutil
import sys 
import subprocess




os.system("pip install git+https://github.com/facebookresearch/detectron2.git")
os.system("git clone https://github.com/Visual-AI/Mr.DETR.git MrDETR && cd MrDETR && rm -f requirements.txt && cd ..")
subprocess.run(
    shlex.split(
        "pip install detrex-0.3.0-cp310-cp310-linux_x86_64.whl"
    )
)
    
sys.path.append("MrDETR/")
install_setup()

detrex-0.3.0-cp310-cp310-linux_x86_64.whl


from demo.predictors import VisualizationDemo
from detectron2.checkpoint import DetectionCheckpointer
from detectron2.config import LazyConfig, instantiate
import numpy as np
from PIL import Image


if __name__ == "__main__":
    gr.close_all()
    cfg = LazyConfig.load("MrDETR/projects/mr_detr_align/configs/deformable_detr_swinl_two_stage_12ep_plusplus.py")
    cfg["model"].device = "cuda"
    cfg["train"].device = "cuda"

    # @spaces.GPU(duration=40, progress=gr.Progress(track_tqdm=True))
    # def 
    model = instantiate(cfg.model)
    checkpointer = DetectionCheckpointer(model)
    checkpointer.load("https://github.com/Visual-AI/Mr.DETR/releases/download/weights/MrDETR_align_swinL_12ep_900q_safe.pth")

    model.eval()
    model.cuda()
    vis_demo = VisualizationDemo(
        model=model,
        min_size_test=800,
        max_size_test=1333,
        img_format="RGB",
        metadata_dataset="coco_2017_val",
    )
    
    @spaces.GPU
    def inference(img, confidence):
        img = np.array(img)
        _, results = vis_demo.run_on_image(img, confidence)
        results = Image.fromarray(results.get_image()[:, :, ::-1])
        return results 
    
    demo = gr.Interface(
        fn=inference,
        inputs=[
            gr.Image(type="pil", image_mode="RGB"),
            # gr.Number(precision=2, minimum=0.0, maximum=1.0, value=0.5)
            gr.Slider(minimum=0.0, maximum=1.0, value=0.5, step=0.05)
        ],
        outputs="image",
        examples=[
            ["MrDETR/assets/000000014226.jpg", 0.5],
            ["MrDETR/assets/000000028449.jpg", 0.3],
            ["MrDETR/assets/000000070048.jpg", 0.5],
            ["MrDETR/assets/000000218997.jpg", 0.5],
            ["MrDETR/assets/000000279774.jpg", 0.5],
            ["MrDETR/assets/000000434459.jpg", 0.5],
            ["MrDETR/assets/000000448448.jpg", 0.5],
            ["MrDETR/assets/000000560474.jpg", 0.5],
        ],
        title="[CVPR 2025] Mr. DETR: Instructive Multi-Route Training for Detection Transformers",
        description='''
            [![Paper](https://img.shields.io/badge/arXiv-2412.10028-red)](https://arxiv.org/abs/2412.10028)
            [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/mr-detr-instructive-multi-route-training-for/object-detection-on-coco-2017-val)](https://paperswithcode.com/sota/object-detection-on-coco-2017-val?p=mr-detr-instructive-multi-route-training-for)
        '''
    )
    demo.launch()