Spaces:

kishoreb4
/

CV_Project

Sleeping

App Files Files Community

kishoreb4 commited on 18 days ago

Commit

46aa1b6

verified ·

1 Parent(s): 0caf545

Update app.py

Browse files

Files changed (1) hide show

app.py +52 -121

app.py CHANGED Viewed

@@ -10,7 +10,7 @@ import io
 import gdown
 from transformers import TFSegformerForSemanticSegmentation
-# Set page config at the very beginning of the app
 st.set_page_config(
     page_title="Pet Segmentation with SegFormer",
     page_icon="🐶",
@@ -18,13 +18,13 @@ st.set_page_config(
     initial_sidebar_state="expanded"
 )
-# Constants for image preprocessing - matching colab_code.py
 IMAGE_SIZE = 512
 OUTPUT_SIZE = 128
 MEAN = tf.constant([0.485, 0.456, 0.406])
 STD = tf.constant([0.229, 0.224, 0.225])
-# Class labels - DO NOT CHANGE
 ID2LABEL = {0: "background", 1: "border", 2: "foreground/pet"}
 NUM_CLASSES = len(ID2LABEL)
@@ -38,7 +38,8 @@ def download_model_from_drive():
     if not os.path.exists(model_path):
         # Correct format for gdown
-        url = "https://drive.google.com/file/d/1XObpqG8qZ7YUyiRKbpVvxX11yQSK8Y_3/view?usp=sharing"
         try:
             gdown.download(url, model_path, quiet=False)
             st.success("Model downloaded successfully from Google Drive.")
@@ -54,7 +55,7 @@ def download_model_from_drive():
 def load_model():
     """Load the SegFormer model"""
     try:
-        # First create a base model with the correct architecture
         base_model = TFSegformerForSemanticSegmentation.from_pretrained(
             "nvidia/mit-b0",
             num_labels=NUM_CLASSES,
@@ -70,9 +71,8 @@ def load_model():
                 base_model.load_weights(model_path)
                 st.success("Model weights loaded successfully!")
             except Exception as e:
-                st.success("Model weights loaded successfully!")
-                # st.error(f"Error loading weights: {e}")
-                # st.warning("Using base pretrained model instead.")
         return base_model
@@ -88,36 +88,27 @@ def normalize_image(input_image):
     return input_image
-def preprocess_image(image, is_dataset_image=False):
-    """
-    Preprocess image exactly like in colab_code.py
-    Args:
-        image: PIL Image to preprocess
-        is_dataset_image: Whether the image is from the Oxford-IIIT Pet dataset
-    Returns:
-        Preprocessed image tensor, original image
-    """
     # Convert PIL Image to numpy array
     img_array = np.array(image.convert('RGB'))
     # Store original image for display
     original_img = img_array.copy()
-    # Resize to target size with preserve_aspect_ratio=False
     img_resized = tf.image.resize(
         img_array,
         (IMAGE_SIZE, IMAGE_SIZE),
         method='bilinear',
-        preserve_aspect_ratio=False,  # Ensure exact dimensions
         antialias=True
     )
     # Normalize
     img_normalized = normalize_image(img_resized)
-    # Transpose from HWC to CHW (SegFormer expects channels first)
     img_transposed = tf.transpose(img_normalized, (2, 0, 1))
     # Add batch dimension
@@ -126,13 +117,12 @@ def preprocess_image(image, is_dataset_image=False):
     return img_batch, original_img
-def process_uploaded_mask(mask_array, from_dataset=True):
     """
-    Process an uploaded mask from the dataset to match app's format
     Args:
         mask_array: Numpy array of the mask
-        from_dataset: Whether the mask is from the original dataset
     Returns:
         Processed mask with values 0,1,2
@@ -145,50 +135,37 @@ def process_uploaded_mask(mask_array, from_dataset=True):
     if len(mask_array.shape) == 3 and mask_array.shape[2] >= 3:
         mask_array = cv2.cvtColor(mask_array, cv2.COLOR_RGB2GRAY)
-    # For dataset masks, convert from original values (1,2,3) to app values (0,1,2)
-    if from_dataset:
         processed_mask = np.zeros_like(mask_array)
-        # Map dataset values to app values
         processed_mask[mask_array == 1] = 2  # Foreground/pet (1→2)
         processed_mask[mask_array == 2] = 1  # Border (2→1)
         processed_mask[mask_array == 3] = 0  # Background (3→0)
         return processed_mask
-    else:
-        # For non-dataset masks, assume they're already in the right format
         return mask_array
 def create_mask(pred_mask):
-    """
-    Convert model prediction to displayable mask
-    Args:
-        pred_mask: Prediction logits from the model
-    Returns:
-        Processed mask (2D array)
-    """
-    # Take argmax along the class dimension
     pred_mask = tf.math.argmax(pred_mask, axis=1)
-    # Remove batch dimension and convert to numpy
     pred_mask = tf.squeeze(pred_mask)
     return pred_mask.numpy()
 def colorize_mask(mask):
-    """
-    Colorize a segmentation mask for visualization
-    Args:
-        mask: Segmentation mask (2D array with class indices)
-    Returns:
-        Colorized mask (3D array with RGB colors)
-    """
     # Define colors for visualization
     colors = [
         [0, 0, 0],       # Black for background (0)
@@ -208,17 +185,7 @@ def colorize_mask(mask):
 def create_overlay(image, mask, alpha=0.5):
-    """
-    Create an overlay of mask on original image
-    Args:
-        image: Original image
-        mask: Colorized segmentation mask
-        alpha: Transparency level (0-1)
-    Returns:
-        Overlay image
-    """
     # Ensure mask shape matches image
     if image.shape[:2] != mask.shape[:2]:
         mask = cv2.resize(mask, (image.shape[1], image.shape[0]))
@@ -236,17 +203,7 @@ def create_overlay(image, mask, alpha=0.5):
 def calculate_iou(y_true, y_pred, class_idx=None):
-    """
-    Calculate IoU (Intersection over Union)
-    Args:
-        y_true: Ground truth mask
-        y_pred: Predicted mask
-        class_idx: Class index to compute IoU for (if None, compute mean IoU)
-    Returns:
-        IoU score
-    """
     if class_idx is not None:
         # Convert to binary masks for specific class
         y_true_class = (y_true == class_idx).astype(np.float32)
@@ -265,23 +222,11 @@ def calculate_iou(y_true, y_pred, class_idx=None):
             class_iou = calculate_iou(y_true, y_pred, idx)
             class_ious.append(class_iou)
-        iou = np.mean(class_ious)
-    return iou
 def calculate_dice(y_true, y_pred, class_idx=None):
-    """
-    Calculate Dice coefficient (F1 score)
-    Args:
-        y_true: Ground truth mask
-        y_pred: Predicted mask
-        class_idx: Class index to compute Dice for (if None, compute mean Dice)
-    Returns:
-        Dice score
-    """
     if class_idx is not None:
         # Convert to binary masks for specific class
         y_true_class = (y_true == class_idx).astype(np.float32)
@@ -300,37 +245,18 @@ def calculate_dice(y_true, y_pred, class_idx=None):
             class_dice = calculate_dice(y_true, y_pred, idx)
             class_dices.append(class_dice)
-        dice = np.mean(class_dices)
-    return dice
 def calculate_pixel_accuracy(y_true, y_pred):
-    """
-    Calculate pixel accuracy
-    Args:
-        y_true: Ground truth mask
-        y_pred: Predicted mask
-    Returns:
-        Pixel accuracy
-    """
     correct = np.sum(y_true == y_pred)
     total = y_true.size
     return float(correct) / float(total)
 def display_side_by_side(original_img, gt_mask=None, pred_mask=None, overlay=None):
-    """
-    Display images side by side
-    Args:
-        original_img: Original input image
-        gt_mask: Ground truth segmentation mask (optional)
-        pred_mask: Predicted segmentation mask
-        overlay: Overlay of mask on original image
-    """
     # Determine number of columns based on available images
     columns = 1  # Start with original image
     if gt_mask is not None:
@@ -385,9 +311,6 @@ def main():
     # Debug mode toggle
     debug_mode = st.sidebar.checkbox("Debug Mode", value=False)
-    # Dataset image toggle - important for processing Oxford-IIIT Pet masks
-    dataset_image = st.sidebar.checkbox("Image is from Oxford-IIIT Pet dataset", value=True)
     # Overlay opacity control
     overlay_opacity = st.sidebar.slider(
         "Overlay Opacity",
@@ -418,11 +341,15 @@ def main():
             # Read the image
             image_bytes = uploaded_image.read()
             image = Image.open(io.BytesIO(image_bytes))
             # Preprocess and predict
             with st.spinner("Generating segmentation mask..."):
                 # Preprocess the image
-                img_tensor, original_img = preprocess_image(image, is_dataset_image=dataset_image)
                 # Make prediction
                 outputs = model(pixel_values=img_tensor, training=False)
@@ -437,7 +364,7 @@ def main():
                 # Create overlay
                 overlay = create_overlay(original_img, colorized_mask, alpha=overlay_opacity)
-            # Prepare for metrics calculation (if ground truth is provided)
             gt_mask = None
             gt_mask_colorized = None
             metrics_calculated = False
@@ -457,15 +384,15 @@ def main():
                         st.write(f"Ground truth mask shape: {gt_mask_raw.shape}")
                         st.write(f"Ground truth mask unique values: {np.unique(gt_mask_raw)}")
-                    # Process the mask based on source
-                    gt_mask = process_uploaded_mask(gt_mask_raw, from_dataset=dataset_image)
                     # Colorize for display
                     gt_mask_colorized = colorize_mask(gt_mask)
                     # Resize for comparison
                     gt_mask_resized = cv2.resize(gt_mask, (mask.shape[0], mask.shape[1]),
-                                                interpolation=cv2.INTER_NEAREST)
                     if debug_mode:
                         st.write(f"Processed GT mask shape: {gt_mask_resized.shape}")
@@ -485,6 +412,7 @@ def main():
                         st.code(traceback.format_exc())
             # Display results
             display_side_by_side(
                 original_img,
                 gt_mask_colorized,
@@ -577,7 +505,7 @@ def main():
                     # Create CSV with metrics
                     metrics_csv = f"Metric,Overall,Background,Border,Foreground\n"
                     metrics_csv += f"IoU,{iou_score:.4f},{calculate_iou(gt_mask_resized, mask, 0):.4f},{calculate_iou(gt_mask_resized, mask, 1):.4f},{calculate_iou(gt_mask_resized, mask, 2):.4f}\n"
-                    metrics_csv += f"Dice,{dice_score:.4f},{calculate_dice(gt_mask_resized, mask, 0):.4f},{calculate_dice(gt_mask_resized, mask, 1):.4f},{calculate_dice(gt_mask_resized, mask, 2)::.4f}\n"
                     metrics_csv += f"Accuracy,{accuracy:.4f},,,"
                     st.download_button(
@@ -592,6 +520,9 @@ def main():
             if debug_mode:
                 import traceback
                 st.code(traceback.format_exc())
 if __name__ == "__main__":

 import gdown
 from transformers import TFSegformerForSemanticSegmentation
+# Set page config at the very beginning
 st.set_page_config(
     page_title="Pet Segmentation with SegFormer",
     page_icon="🐶",
     initial_sidebar_state="expanded"
 )
+# Constants for image preprocessing
 IMAGE_SIZE = 512
 OUTPUT_SIZE = 128
 MEAN = tf.constant([0.485, 0.456, 0.406])
 STD = tf.constant([0.229, 0.224, 0.225])
+# Class labels
 ID2LABEL = {0: "background", 1: "border", 2: "foreground/pet"}
 NUM_CLASSES = len(ID2LABEL)
     if not os.path.exists(model_path):
         # Correct format for gdown
+        file_id = "1XObpqG8qZ7YUyiRKbpVvxX11yQSK8Y_3"
+        url = f"https://drive.google.com/uc?id={file_id}"
         try:
             gdown.download(url, model_path, quiet=False)
             st.success("Model downloaded successfully from Google Drive.")
 def load_model():
     """Load the SegFormer model"""
     try:
+        # Create a base model with the correct architecture
         base_model = TFSegformerForSemanticSegmentation.from_pretrained(
             "nvidia/mit-b0",
             num_labels=NUM_CLASSES,
                 base_model.load_weights(model_path)
                 st.success("Model weights loaded successfully!")
             except Exception as e:
+                st.error(f"Error loading weights: {e}")
+                st.warning("Using base pretrained model instead.")
         return base_model
     return input_image
+def preprocess_image(image):
+    """Preprocess image exactly like in colab_code.py"""
     # Convert PIL Image to numpy array
     img_array = np.array(image.convert('RGB'))
     # Store original image for display
     original_img = img_array.copy()
+    # Resize to target size
     img_resized = tf.image.resize(
         img_array,
         (IMAGE_SIZE, IMAGE_SIZE),
         method='bilinear',
+        preserve_aspect_ratio=False,
         antialias=True
     )
     # Normalize
     img_normalized = normalize_image(img_resized)
+    # Transpose from HWC to CHW (channels first)
     img_transposed = tf.transpose(img_normalized, (2, 0, 1))
     # Add batch dimension
     return img_batch, original_img
+def process_uploaded_mask(mask_array):
     """
+    Process an uploaded mask from save_image_and_mask_to_local function
     Args:
         mask_array: Numpy array of the mask
     Returns:
         Processed mask with values 0,1,2
     if len(mask_array.shape) == 3 and mask_array.shape[2] >= 3:
         mask_array = cv2.cvtColor(mask_array, cv2.COLOR_RGB2GRAY)
+    # Check the unique values in the mask to determine processing
+    unique_values = np.unique(mask_array)
+    # If mask has values 1,2,3 (from the dataset), convert to 0,1,2
+    if 3 in unique_values:
         processed_mask = np.zeros_like(mask_array)
         processed_mask[mask_array == 1] = 2  # Foreground/pet (1→2)
         processed_mask[mask_array == 2] = 1  # Border (2→1)
         processed_mask[mask_array == 3] = 0  # Background (3→0)
         return processed_mask
+    # If mask has values 0,1,2 already, just return it
+    elif 0 in unique_values and 2 in unique_values:
         return mask_array
+    # If we can't determine the format, use binary threshold as fallback
+    else:
+        # Use binary threshold to create a simple foreground/background mask
+        _, binary_mask = cv2.threshold(mask_array, 127, 2, cv2.THRESH_BINARY)
+        return binary_mask
 def create_mask(pred_mask):
+    """Convert model prediction to mask"""
     pred_mask = tf.math.argmax(pred_mask, axis=1)
     pred_mask = tf.squeeze(pred_mask)
     return pred_mask.numpy()
 def colorize_mask(mask):
+    """Colorize a segmentation mask for visualization"""
     # Define colors for visualization
     colors = [
         [0, 0, 0],       # Black for background (0)
 def create_overlay(image, mask, alpha=0.5):
+    """Create an overlay of mask on original image"""
     # Ensure mask shape matches image
     if image.shape[:2] != mask.shape[:2]:
         mask = cv2.resize(mask, (image.shape[1], image.shape[0]))
 def calculate_iou(y_true, y_pred, class_idx=None):
+    """Calculate IoU (Intersection over Union)"""
     if class_idx is not None:
         # Convert to binary masks for specific class
         y_true_class = (y_true == class_idx).astype(np.float32)
             class_iou = calculate_iou(y_true, y_pred, idx)
             class_ious.append(class_iou)
+        return np.mean(class_ious)
 def calculate_dice(y_true, y_pred, class_idx=None):
+    """Calculate Dice coefficient (F1 score)"""
     if class_idx is not None:
         # Convert to binary masks for specific class
         y_true_class = (y_true == class_idx).astype(np.float32)
             class_dice = calculate_dice(y_true, y_pred, idx)
             class_dices.append(class_dice)
+        return np.mean(class_dices)
 def calculate_pixel_accuracy(y_true, y_pred):
+    """Calculate pixel accuracy"""
     correct = np.sum(y_true == y_pred)
     total = y_true.size
     return float(correct) / float(total)
 def display_side_by_side(original_img, gt_mask=None, pred_mask=None, overlay=None):
+    """Display images side by side"""
     # Determine number of columns based on available images
     columns = 1  # Start with original image
     if gt_mask is not None:
     # Debug mode toggle
     debug_mode = st.sidebar.checkbox("Debug Mode", value=False)
     # Overlay opacity control
     overlay_opacity = st.sidebar.slider(
         "Overlay Opacity",
             # Read the image
             image_bytes = uploaded_image.read()
             image = Image.open(io.BytesIO(image_bytes))
+            # Display the original image first
+            st.subheader("Original Image")
+            st.image(image, caption="Uploaded Image", use_column_width=True)
             # Preprocess and predict
             with st.spinner("Generating segmentation mask..."):
                 # Preprocess the image
+                img_tensor, original_img = preprocess_image(image)
                 # Make prediction
                 outputs = model(pixel_values=img_tensor, training=False)
                 # Create overlay
                 overlay = create_overlay(original_img, colorized_mask, alpha=overlay_opacity)
+            # Prepare for metrics calculation
             gt_mask = None
             gt_mask_colorized = None
             metrics_calculated = False
                         st.write(f"Ground truth mask shape: {gt_mask_raw.shape}")
                         st.write(f"Ground truth mask unique values: {np.unique(gt_mask_raw)}")
+                    # Process the mask
+                    gt_mask = process_uploaded_mask(gt_mask_raw)
                     # Colorize for display
                     gt_mask_colorized = colorize_mask(gt_mask)
                     # Resize for comparison
                     gt_mask_resized = cv2.resize(gt_mask, (mask.shape[0], mask.shape[1]),
+                                               interpolation=cv2.INTER_NEAREST)
                     if debug_mode:
                         st.write(f"Processed GT mask shape: {gt_mask_resized.shape}")
                         st.code(traceback.format_exc())
             # Display results
+            st.subheader("Segmentation Results")
             display_side_by_side(
                 original_img,
                 gt_mask_colorized,
                     # Create CSV with metrics
                     metrics_csv = f"Metric,Overall,Background,Border,Foreground\n"
                     metrics_csv += f"IoU,{iou_score:.4f},{calculate_iou(gt_mask_resized, mask, 0):.4f},{calculate_iou(gt_mask_resized, mask, 1):.4f},{calculate_iou(gt_mask_resized, mask, 2):.4f}\n"
+                    metrics_csv += f"Dice,{dice_score:.4f},{calculate_dice(gt_mask_resized, mask, 0):.4f},{calculate_dice(gt_mask_resized, mask, 1):.4f},{calculate_dice(gt_mask_resized, mask, 2):.4f}\n"
                     metrics_csv += f"Accuracy,{accuracy:.4f},,,"
                     st.download_button(
             if debug_mode:
                 import traceback
                 st.code(traceback.format_exc())
+    else:
+        # Display sample images if no image is uploaded
+        st.info("Please upload an image to get started.")
 if __name__ == "__main__":