Spaces:
Running
Running
FIX: video
Browse files- __pycache__/detection.cpython-310.pyc +0 -0
- __pycache__/model.cpython-310.pyc +0 -0
- detection.py +11 -4
__pycache__/detection.cpython-310.pyc
ADDED
Binary file (2.23 kB). View file
|
|
__pycache__/model.cpython-310.pyc
ADDED
Binary file (795 Bytes). View file
|
|
detection.py
CHANGED
@@ -70,10 +70,17 @@ def detect_video(frames, processor, clip_model, detection_model):
|
|
70 |
|
71 |
pred_score = float(detection_model(last_hidden_states)[0][0].cpu().detach().numpy())
|
72 |
assert 0 <= pred_score <= 1
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
77 |
|
78 |
blended_image = vis_attn(image, cls_attention_map)
|
79 |
|
|
|
70 |
|
71 |
pred_score = float(detection_model(last_hidden_states)[0][0].cpu().detach().numpy())
|
72 |
assert 0 <= pred_score <= 1
|
73 |
+
|
74 |
+
for layer_idx in range(len(outputs.attentions)):
|
75 |
+
attn_map = outputs.attentions[layer_idx]
|
76 |
+
if layer_idx == 0:
|
77 |
+
last_layer_attn = attn_map
|
78 |
+
else:
|
79 |
+
if layer_idx < 6:
|
80 |
+
last_layer_attn += attn_map
|
81 |
+
|
82 |
+
head_mean_attn = last_layer_attn.mean(dim=1)[0]
|
83 |
+
cls_attention_map = head_mean_attn[0, 1:]
|
84 |
|
85 |
blended_image = vis_attn(image, cls_attention_map)
|
86 |
|