Spaces:

kishoreb4
/

CV_Project

Sleeping

App Files Files Community

kishoreb4 commited on 17 days ago

Commit

cb7125b

verified ·

1 Parent(s): 3deb70d

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -236

app.py CHANGED Viewed

@@ -1,12 +1,4 @@
 import streamlit as st
-# THIS MUST BE THE FIRST STREAMLIT COMMAND
-st.set_page_config(
-    page_title="Pet Segmentation with SegFormer",
-    page_icon="🐶",
-    layout="wide",
-    initial_sidebar_state="expanded"
-)
 import tensorflow as tf
 from tensorflow.keras import backend
 import numpy as np
@@ -18,18 +10,12 @@ import io
 import gdown
 from transformers import TFSegformerForSemanticSegmentation
-try:
-    # Limit GPU memory growth
-    gpus = tf.config.experimental.list_physical_devices('GPU')
-    if gpus:
-        for gpu in gpus:
-            tf.config.experimental.set_memory_growth(gpu, True)
-        st.sidebar.success(f"GPU available: {len(gpus)} device(s)")
-    else:
-        st.sidebar.warning("No GPU detected, using CPU")
-except Exception as e:
-    st.sidebar.error(f"GPU config error: {e}")
 # Constants for image preprocessing
 IMAGE_SIZE = 512
@@ -49,7 +35,7 @@ def download_model_from_drive():
     model_path = "models/tf_model.h5"
     if not os.path.exists(model_path):
-        # Extract the file ID from the sharing URL
         url = "https://drive.google.com/file/d/1XObpqG8qZ7YUyiRKbpVvxX11yQSK8Y_3/view?usp=sharing"
         try:
             gdown.download(url, model_path, quiet=False)
@@ -124,13 +110,12 @@ def normalize_image(input_image):
     input_image = (input_image - MEAN) / tf.maximum(STD, backend.epsilon())
     return input_image
-def preprocess_image(image, from_dataset=False):
     """
-    Preprocess image for model input with special handling for dataset images
     Args:
         image: PIL Image to preprocess
-        from_dataset: Whether the image is from the original dataset
     Returns:
         Preprocessed image tensor, original image
@@ -142,24 +127,12 @@ def preprocess_image(image, from_dataset=False):
     original_img = img_array.copy()
     # Resize to target size
-    img_resized = tf.image.resize(
-        img_array,
-        (IMAGE_SIZE, IMAGE_SIZE),
-        method='bilinear',
-        preserve_aspect_ratio=False,
-        antialias=True
-    )
-    # Special handling for dataset images
-    if from_dataset:
-        # The dataset already has specific dimensions, just normalize
-        # Skip additional preprocessing that might have been applied
-        img_normalized = normalize_image(img_resized)
-    else:
-        # Regular preprocessing for uploaded images
-        img_normalized = normalize_image(img_resized)
-    # Transpose from HWC to CHW (channels first)
     img_transposed = tf.transpose(img_normalized, (2, 0, 1))
     # Add batch dimension
@@ -177,21 +150,8 @@ def create_mask(pred_mask):
     Returns:
         Processed mask (2D array)
     """
-    # Take argmax along the class dimension (axis=1 for batch data)
     pred_mask = tf.math.argmax(pred_mask, axis=1)
-    # Remove batch dimension and convert to numpy
     pred_mask = tf.squeeze(pred_mask)
-    # Resize to match original image size if needed
-    if pred_mask.shape[0] != IMAGE_SIZE or pred_mask.shape[1] != IMAGE_SIZE:
-        pred_mask = tf.image.resize(
-            tf.expand_dims(pred_mask, axis=-1),
-            (IMAGE_SIZE, IMAGE_SIZE),
-            method='nearest'
-        )
-        pred_mask = tf.squeeze(pred_mask)
     return pred_mask.numpy()
 def colorize_mask(mask):
@@ -284,91 +244,6 @@ def create_overlay(image, mask, alpha=0.5):
     return overlay
-def display_results_side_by_side(original_image, ground_truth_mask=None, predicted_mask=None):
-    """
-    Display results in a side-by-side format similar to colab_code.py
-    Args:
-        original_image: Original input image
-        ground_truth_mask: Optional ground truth segmentation mask
-        predicted_mask: Predicted segmentation mask
-    """
-    # Determine how many images to display
-    cols = 1 + (ground_truth_mask is not None) + (predicted_mask is not None)
-    # Create a figure with multiple columns
-    st.write("### Segmentation Results Comparison")
-    col_list = st.columns(cols)
-    # Display original image
-    with col_list[0]:
-        st.markdown("**Original Image**")
-        st.image(original_image, use_column_width=True)
-    # Display ground truth if available
-    if ground_truth_mask is not None:
-        with col_list[1]:
-            st.markdown("**Ground Truth Mask**")
-            # Colorize ground truth if needed
-            if len(ground_truth_mask.shape) == 2:
-                gt_display = colorize_mask(ground_truth_mask)
-            else:
-                gt_display = ground_truth_mask
-            st.image(gt_display, use_column_width=True)
-    # Display prediction
-    if predicted_mask is not None:
-        with col_list[2 if ground_truth_mask is not None else 1]:
-            st.markdown("**Predicted Mask**")
-            # Colorize prediction if needed
-            if len(predicted_mask.shape) == 2:
-                pred_display = colorize_mask(predicted_mask)
-            else:
-                pred_display = predicted_mask
-            st.image(pred_display, use_column_width=True)
-def process_uploaded_mask(mask_array, from_dataset=False):
-    """
-    Process an uploaded mask to ensure it has the correct format
-    Args:
-        mask_array: Numpy array of the mask
-        from_dataset: Whether the mask is from the original dataset
-    Returns:
-        Processed mask with values 0,1,2
-    """
-    # Check for RGBA format and convert to RGB if needed
-    if len(mask_array.shape) == 3 and mask_array.shape[2] == 4:
-        # Convert RGBA to RGB (discard alpha channel)
-        mask_array = mask_array[:,:,:3]
-    # Convert RGB to grayscale if needed
-    if len(mask_array.shape) == 3 and mask_array.shape[2] >= 3:
-        # Convert RGB to grayscale
-        mask_array = cv2.cvtColor(mask_array, cv2.COLOR_RGB2GRAY)
-    if from_dataset:
-        # For dataset masks (saved from your colab code):
-        # Create an empty mask with the same shape
-        processed_mask = np.zeros_like(mask_array)
-        # Map the values correctly:
-        # Original dataset uses 1,2,3 but your app expects 0,1,2
-        processed_mask[mask_array == 1] = 2  # Foreground/pet (1→2)
-        processed_mask[mask_array == 2] = 1  # Border (2→1)
-        processed_mask[mask_array == 3] = 0  # Background (3→0)
-        return processed_mask
-    else:
-        # For non-dataset masks, we assume they have correct class values
-        return mask_array
 def main():
     st.title("🐶 Pet Segmentation with SegFormer")
     st.markdown("""
@@ -404,9 +279,6 @@ def main():
         step=0.1
     )
-    # Add this checkbox to your app's UI
-    dataset_image = st.sidebar.checkbox("Image is from the Oxford-IIIT Pet dataset")
     # Load model
     with st.spinner("Loading SegFormer model..."):
         model = load_model()
@@ -436,46 +308,21 @@ def main():
             # Preprocess and predict
             with st.spinner("Generating segmentation mask..."):
-                try:
-                    # Preprocess the image
-                    img_tensor, original_img = preprocess_image(image, from_dataset=dataset_image)
-                    # Print shape to debug
-                    st.write(f"DEBUG - Input tensor shape: {img_tensor.shape}")
-                    # Make prediction with error handling
-                    try:
-                        outputs = model(pixel_values=img_tensor, training=False)
-                        logits = outputs.logits
-                        # Create visualization mask
-                        mask = create_mask(logits)
-                        # Colorize the mask
-                        colorized_mask = colorize_mask(mask)
-                        # Create overlay
-                        overlay = create_overlay(original_img, colorized_mask, alpha=overlay_opacity)
-                    except Exception as inference_error:
-                        st.error(f"Inference error: {inference_error}")
-                        st.write("Trying alternative approach...")
-                        # Alternative: resize to exactly 512x512 with crop_or_pad
-                        img_resized = tf.image.resize_with_crop_or_pad(
-                            original_img, IMAGE_SIZE, IMAGE_SIZE
-                        )
-                        img_normalized = normalize_image(img_resized)
-                        img_transposed = tf.transpose(img_normalized, (2, 0, 1))
-                        img_tensor = tf.expand_dims(img_transposed, axis=0)
-                        outputs = model(pixel_values=img_tensor, training=False)
-                        logits = outputs.logits
-                        mask = create_mask(logits)
-                        colorized_mask = colorize_mask(mask)
-                        overlay = create_overlay(original_img, colorized_mask, alpha=overlay_opacity)
-                except Exception as e:
-                    st.error(f"Failed to process image: {e}")
-                    st.stop()
             # Display results
             with col2:
@@ -507,81 +354,37 @@ def main():
             # Calculate IoU if ground truth is uploaded
             if uploaded_mask is not None:
                 try:
-                    # Reset the file pointer to the beginning
-                    uploaded_mask.seek(0)
                     # Read the mask file
                     mask_data = uploaded_mask.read()
                     mask_io = io.BytesIO(mask_data)
-                    # Load the raw mask
-                    raw_mask = np.array(Image.open(mask_io))
-                    # Show debug info
-                    st.write(f"Debug - Raw mask shape: {raw_mask.shape}")
-                    st.write(f"Debug - Raw mask unique values: {np.unique(raw_mask)}")
-                    # Process the mask based on source
-                    processed_gt_mask = process_uploaded_mask(raw_mask, from_dataset=dataset_image)
-                    # Resize for IoU calculation
-                    gt_mask_resized = cv2.resize(processed_gt_mask, (OUTPUT_SIZE, OUTPUT_SIZE),
-                                               interpolation=cv2.INTER_NEAREST)
-                    # Resize prediction for comparison
-                    pred_mask_resized = cv2.resize(mask, (OUTPUT_SIZE, OUTPUT_SIZE),
-                                                 interpolation=cv2.INTER_NEAREST)
-                    # Show processed values
-                    st.write(f"Debug - Processed GT mask unique values: {np.unique(gt_mask_resized)}")
-                    st.write(f"Debug - Prediction mask unique values: {np.unique(pred_mask_resized)}")
                     # Calculate and display IoU
-                    iou_score = calculate_iou(gt_mask_resized, pred_mask_resized)
                     st.success(f"Mean IoU: {iou_score:.4f}")
                     # Display specific class IoUs
                     st.markdown("### IoU by Class")
                     col1, col2, col3 = st.columns(3)
                     with col1:
-                        bg_iou = calculate_iou(gt_mask_resized, pred_mask_resized, 0)
                         st.metric("Background IoU", f"{bg_iou:.4f}")
                     with col2:
-                        border_iou = calculate_iou(gt_mask_resized, pred_mask_resized, 1)
                         st.metric("Border IoU", f"{border_iou:.4f}")
                     with col3:
-                        fg_iou = calculate_iou(gt_mask_resized, pred_mask_resized, 2)
                         st.metric("Foreground IoU", f"{fg_iou:.4f}")
-                    # For display, create a colorized version of the ground truth
-                    gt_mask_for_display = colorize_mask(processed_gt_mask)
-                    # Side-by-side display
-                    display_results_side_by_side(
-                        original_img,
-                        ground_truth_mask=gt_mask_for_display,
-                        predicted_mask=colorized_mask
-                    )
                 except Exception as e:
                     st.error(f"Error processing ground truth mask: {e}")
                     st.write("Please ensure the mask is valid and has the correct format.")
-                    import traceback
-                    st.code(traceback.format_exc())  # Show detailed error trace
-                    # Even with an error, try to display results without the ground truth
-                    display_results_side_by_side(
-                        original_img,
-                        ground_truth_mask=None,
-                        predicted_mask=colorized_mask
-                    )
-            else:
-                # No ground truth, just display original and prediction
-                display_results_side_by_side(
-                    original_img,
-                    ground_truth_mask=None,
-                    predicted_mask=colorized_mask
-                )
             # Download buttons
             col1, col2 = st.columns(2)

 import streamlit as st
 import tensorflow as tf
 from tensorflow.keras import backend
 import numpy as np
 import gdown
 from transformers import TFSegformerForSemanticSegmentation
+st.set_page_config(
+    page_title="Pet Segmentation with SegFormer",
+    page_icon="🐶",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
 # Constants for image preprocessing
 IMAGE_SIZE = 512
     model_path = "models/tf_model.h5"
     if not os.path.exists(model_path):
+        # Fixed Google Drive URL format for gdown
         url = "https://drive.google.com/file/d/1XObpqG8qZ7YUyiRKbpVvxX11yQSK8Y_3/view?usp=sharing"
         try:
             gdown.download(url, model_path, quiet=False)
     input_image = (input_image - MEAN) / tf.maximum(STD, backend.epsilon())
     return input_image
+def preprocess_image(image):
     """
+    Preprocess image for model input
     Args:
         image: PIL Image to preprocess
     Returns:
         Preprocessed image tensor, original image
     original_img = img_array.copy()
     # Resize to target size
+    img_resized = tf.image.resize(img_array, (IMAGE_SIZE, IMAGE_SIZE))
+    # Normalize
+    img_normalized = normalize_image(img_resized)
+    # Transpose from HWC to CHW (SegFormer expects channels first)
     img_transposed = tf.transpose(img_normalized, (2, 0, 1))
     # Add batch dimension
     Returns:
         Processed mask (2D array)
     """
     pred_mask = tf.math.argmax(pred_mask, axis=1)
     pred_mask = tf.squeeze(pred_mask)
     return pred_mask.numpy()
 def colorize_mask(mask):
     return overlay
 def main():
     st.title("🐶 Pet Segmentation with SegFormer")
     st.markdown("""
         step=0.1
     )
     # Load model
     with st.spinner("Loading SegFormer model..."):
         model = load_model()
             # Preprocess and predict
             with st.spinner("Generating segmentation mask..."):
+                # Preprocess the image
+                img_tensor, original_img = preprocess_image(image)
+                # Make prediction
+                outputs = model(pixel_values=img_tensor, training=False)
+                logits = outputs.logits
+                # Create visualization mask
+                mask = create_mask(logits)
+                # Colorize the mask
+                colorized_mask = colorize_mask(mask)
+                # Create overlay
+                overlay = create_overlay(original_img, colorized_mask, alpha=overlay_opacity)
             # Display results
             with col2:
             # Calculate IoU if ground truth is uploaded
             if uploaded_mask is not None:
                 try:
                     # Read the mask file
                     mask_data = uploaded_mask.read()
                     mask_io = io.BytesIO(mask_data)
+                    gt_mask = np.array(Image.open(mask_io).resize((OUTPUT_SIZE, OUTPUT_SIZE), Image.NEAREST))
+                    # Handle different mask formats
+                    if len(gt_mask.shape) == 3 and gt_mask.shape[2] == 3:
+                        # Convert RGB to single channel if needed
+                        gt_mask = cv2.cvtColor(gt_mask, cv2.COLOR_RGB2GRAY)
                     # Calculate and display IoU
+                    resized_mask = cv2.resize(mask, (OUTPUT_SIZE, OUTPUT_SIZE), interpolation=cv2.INTER_NEAREST)
+                    iou_score = calculate_iou(gt_mask, resized_mask)
                     st.success(f"Mean IoU: {iou_score:.4f}")
                     # Display specific class IoUs
                     st.markdown("### IoU by Class")
                     col1, col2, col3 = st.columns(3)
                     with col1:
+                        bg_iou = calculate_iou(gt_mask, resized_mask, 0)
                         st.metric("Background IoU", f"{bg_iou:.4f}")
                     with col2:
+                        border_iou = calculate_iou(gt_mask, resized_mask, 1)
                         st.metric("Border IoU", f"{border_iou:.4f}")
                     with col3:
+                        fg_iou = calculate_iou(gt_mask, resized_mask, 2)
                         st.metric("Foreground IoU", f"{fg_iou:.4f}")
                 except Exception as e:
                     st.error(f"Error processing ground truth mask: {e}")
                     st.write("Please ensure the mask is valid and has the correct format.")
             # Download buttons
             col1, col2 = st.columns(2)