Spaces:

obichimav
/

Object-Detection-and-Plant-Analysis-System

Running

App Files Files Community

obichimav commited on Feb 21

Commit

2bd484d

verified ·

1 Parent(s): 7f884bb

Update app.py

Browse files

Files changed (1) hide show

app.py +25 -37

app.py CHANGED Viewed

@@ -352,7 +352,7 @@ PLANTNET_API_KEY = os.getenv('PLANTNET_API_KEY', 'your-plantnet-key-here')
 MODEL = "gpt-4o"
 openai = OpenAI()
-# Initialize VisionAgent
 agent = VisionAgentCoderV2(verbose=False)
 system_message = """You are an expert in object detection. When users mention:
@@ -360,10 +360,8 @@ system_message = """You are an expert in object detection. When users mention:
 2. "detect [object(s)]" - Same as count
 3. "show [object(s)]" - Same as count
-Always use object detection tool when counting/detecting is mentioned."""
-system_message += "Always be accurate. If you don't know the answer, say so."
 class State:
     def __init__(self):
@@ -398,38 +396,28 @@ def detect_objects(query_text):
         # Clean query text to get the object name
         object_name = query_text[0].replace("a photo of ", "").strip()
-        # Let VisionAgent handle the detection with its agent-based approach
-        # Create agent message for object detection
-        agent_message = [
-            AgentMessage(
-                role="user",
-                content=f"Count the number of {object_name} in this image. Only show detections with high confidence (>0.75).",
-                media=[image_path]
-            )
-        ]
-        # Generate code using VisionAgent
-        code_context = agent.generate_code(agent_message)
-        # Load the image for visualization
         image = T.load_image(image_path)
-        # Use multiple models for detection and get high confidence results
-        # First try the specialized detector
         detections = T.countgd_object_detection(object_name, image, conf_threshold=0.55)
-        # If no high-confidence detections, try the more general object detector
         if not detections:
-            # Try a different model with the same high threshold
             try:
                 detections = T.grounding_dino_detection(object_name, image, box_threshold=0.55)
-            except:
-                pass
-        # Only keep high confidence detections
-        high_conf_detections = [det for det in detections if det.get("score", 0) > 0.55]
-        # Visualize only high confidence results with clear labeling
         result_image = T.overlay_bounding_boxes(
             image,
             high_conf_detections,
@@ -442,7 +430,7 @@ def detect_objects(query_text):
         return {
             "count": len(high_conf_detections),
             "confidence": [det["score"] for det in high_conf_detections],
-            "message": f"Detected {len(high_conf_detections)} {object_name}(s) with high confidence (>0.75)"
         }
     except Exception as e:
         print(f"Error in detect_objects: {str(e)}")
@@ -539,8 +527,6 @@ def chat(message, image, history):
     # Extract objects to detect from user message
     objects_to_detect = message.lower()
-    # Format query for object detection - keep it simple and direct
     cleaned_query = objects_to_detect.replace("count", "").replace("detect", "").replace("show", "").strip()
     query = ["a photo of " + cleaned_query]
@@ -559,11 +545,13 @@ def chat(message, image, history):
         max_tokens=300
     )
     if response.choices[0].finish_reason == "tool_calls":
-        message = response.choices[0].message
-        messages.append(message)
-        for tool_call in message.tool_calls:
             if tool_call.function.name == "detect_objects":
                 results = detect_objects(query)
             else:
@@ -604,8 +592,8 @@ with gr.Blocks() as demo:
             output_image = gr.Image(type="numpy", label="Detection Results")
     def process_interaction(message, image, history):
-        response, pred_image = chat(message, image, history)
-        history.append((message, response))
         return "", pred_image, history
     def reset_interface():
@@ -636,4 +624,4 @@ Examples:
 - "What species is this plant?"
 """)
-demo.launch(share=True)

 MODEL = "gpt-4o"
 openai = OpenAI()
+# Initialize VisionAgent (kept for potential future use, though not used directly in detection below)
 agent = VisionAgentCoderV2(verbose=False)
 system_message = """You are an expert in object detection. When users mention:
 2. "detect [object(s)]" - Same as count
 3. "show [object(s)]" - Same as count
+Always use object detection tool when counting/detecting is mentioned.
+Always be accurate. If you don't know the answer, say so."""
 class State:
     def __init__(self):
         # Clean query text to get the object name
         object_name = query_text[0].replace("a photo of ", "").strip()
+        # Load the image for detection and visualization
         image = T.load_image(image_path)
+        # Use the specialized detector first with a threshold of 0.55
         detections = T.countgd_object_detection(object_name, image, conf_threshold=0.55)
+        if detections is None:
+            detections = []
+        # If no detections, try the more general grounding_dino detector
         if not detections:
             try:
                 detections = T.grounding_dino_detection(object_name, image, box_threshold=0.55)
+                if detections is None:
+                    detections = []
+            except Exception as e:
+                print(f"Error in grounding_dino_detection: {str(e)}")
+                detections = []
+        # Only keep detections with confidence higher than 0.55
+        high_conf_detections = [det for det in detections if det.get("score", 0) >= 0.55]
+        # Visualize the high confidence detections with clear labeling
         result_image = T.overlay_bounding_boxes(
             image,
             high_conf_detections,
         return {
             "count": len(high_conf_detections),
             "confidence": [det["score"] for det in high_conf_detections],
+            "message": f"Detected {len(high_conf_detections)} {object_name}(s) with high confidence (>=0.55)"
         }
     except Exception as e:
         print(f"Error in detect_objects: {str(e)}")
     # Extract objects to detect from user message
     objects_to_detect = message.lower()
     cleaned_query = objects_to_detect.replace("count", "").replace("detect", "").replace("show", "").strip()
     query = ["a photo of " + cleaned_query]
         max_tokens=300
     )
+    # Check if a tool call is required based on the response
     if response.choices[0].finish_reason == "tool_calls":
+        message_obj = response.choices[0].message
+        messages.append(message_obj)
+        # Process each tool call from the message
+        for tool_call in message_obj.tool_calls:
             if tool_call.function.name == "detect_objects":
                 results = detect_objects(query)
             else:
             output_image = gr.Image(type="numpy", label="Detection Results")
     def process_interaction(message, image, history):
+        response_text, pred_image = chat(message, image, history)
+        history.append((message, response_text))
         return "", pred_image, history
     def reset_interface():
 - "What species is this plant?"
 """)
+demo.launch(share=True)