Explainable-Vision-Language-Model

Running on Zero

App Files Files Community

khang119966 commited on 15 days ago

Commit

23191fb

verified ·

1 Parent(s): c3d33a4

Update app.py

Browse files

Files changed (1) hide show

app.py +33 -14

app.py CHANGED Viewed

@@ -316,31 +316,50 @@ def visualize_attention_hiddenstate(attention_tensor, head=None, start_img_token
     return heat_maps, top_5_tokens
 def adjust_overlay(overlay, text_img):
     h_o, w_o = overlay.shape[:2]
     h_t, w_t = text_img.shape[:2]
-    if h_o > w_o:  # Overlay là ảnh đứng
-        # Resize overlay sao cho h = h_t, giữ nguyên tỷ lệ
-        new_h = h_t
-        new_w = int(w_o * (new_h / h_o))
-        overlay_resized = cv2.resize(overlay, (new_w, new_h))
-    else:  # Overlay là ảnh ngang
-        # Giữ nguyên overlay, nhưng nếu h < h_t thì thêm padding trắng
-        overlay_resized = overlay.copy()
-    # Thêm padding trắng nếu overlay có h < h_t
     if overlay_resized.shape[0] < h_t:
         pad_h = h_t - overlay_resized.shape[0]
         padding = np.ones((pad_h, overlay_resized.shape[1], 3), dtype=np.uint8) * 255
-        overlay_resized = np.vstack((overlay_resized, padding))  # Padding vào dưới
-    # Đảm bảo overlay có cùng chiều cao với text_img
-    if overlay_resized.shape[0] != h_t:
-        overlay_resized = cv2.resize(overlay_resized, (overlay_resized.shape[1], h_t))
     return overlay_resized
 def extract_next_token_table_data(model, tokenizer, response, index_focus):
     next_token_table = []
     for layer_index in range(len(response.hidden_states[index_focus])):

     return heat_maps, top_5_tokens
+# def adjust_overlay(overlay, text_img):
+#     h_o, w_o = overlay.shape[:2]
+#     h_t, w_t = text_img.shape[:2]
+#     if h_o > w_o:  # Overlay là ảnh đứng
+#         # Resize overlay sao cho h = h_t, giữ nguyên tỷ lệ
+#         new_h = h_t
+#         new_w = int(w_o * (new_h / h_o))
+#         overlay_resized = cv2.resize(overlay, (new_w, new_h))
+#     else:  # Overlay là ảnh ngang
+#         # Giữ nguyên overlay, nhưng nếu h < h_t thì thêm padding trắng
+#         overlay_resized = overlay.copy()
+#     # Thêm padding trắng nếu overlay có h < h_t
+#     if overlay_resized.shape[0] < h_t:
+#         pad_h = h_t - overlay_resized.shape[0]
+#         padding = np.ones((pad_h, overlay_resized.shape[1], 3), dtype=np.uint8) * 255
+#         overlay_resized = np.vstack((overlay_resized, padding))  # Padding vào dưới
+#     # Đảm bảo overlay có cùng chiều cao với text_img
+#     if overlay_resized.shape[0] != h_t:
+#         overlay_resized = cv2.resize(overlay_resized, (overlay_resized.shape[1], h_t))
+#     return overlay_resized
 def adjust_overlay(overlay, text_img):
     h_o, w_o = overlay.shape[:2]
     h_t, w_t = text_img.shape[:2]
+    # Resize overlay sao cho chiều ngang <= 500, chiều dọc <= 1000 (giữ nguyên tỉ lệ)
+    scale = min(500 / w_o, 1000 / h_o, 1.0)  # không phóng to quá kích thước gốc
+    new_w = int(w_o * scale)
+    new_h = int(h_o * scale)
+    overlay_resized = cv2.resize(overlay, (new_w, new_h))
+    # Nếu overlay nhỏ hơn chiều cao của text_img thì thêm padding trắng bên dưới
     if overlay_resized.shape[0] < h_t:
         pad_h = h_t - overlay_resized.shape[0]
         padding = np.ones((pad_h, overlay_resized.shape[1], 3), dtype=np.uint8) * 255
+        overlay_resized = np.vstack((overlay_resized, padding))
     return overlay_resized
 def extract_next_token_table_data(model, tokenizer, response, index_focus):
     next_token_table = []
     for layer_index in range(len(response.hidden_states[index_focus])):