Spaces:

ApsidalSolid4
/

CITProjectAIDetector

Running

App Files Files Community

ApsidalSolid4 commited on Feb 20

Commit

8373deb

verified ·

1 Parent(s): 79ae2f7

Update app.py

Browse files

Files changed (1) hide show

app.py +45 -39

app.py CHANGED Viewed

@@ -163,7 +163,7 @@ class TextClassifier:
         }
     def detailed_scan(self, text: str) -> Dict:
-        """Perform a detailed scan with sentence-level analysis and improved boundary handling."""
         if not text.strip():
             return {
                 'sentence_predictions': [],
@@ -180,18 +180,22 @@ class TextClassifier:
         if not sentences:
             return {}
-        # Create centered windows for each sentence
         windows, window_sentence_indices = self.processor.create_centered_windows(sentences, WINDOW_SIZE)
-        # Track scores for each sentence
-        sentence_appearances = {i: 0 for i in range(len(sentences))}
-        sentence_scores = {i: {'human_prob': 0.0, 'ai_prob': 0.0} for i in range(len(sentences))}
-        # Process windows in batches
-        for i in range(0, len(windows), BATCH_SIZE):
-            batch_windows = windows[i:i + BATCH_SIZE]
-            batch_indices = window_sentence_indices[i:i + batch_size]
             inputs = self.tokenizer(
                 batch_windows,
                 truncation=True,
@@ -204,45 +208,46 @@ class TextClassifier:
                 outputs = self.model(**inputs)
                 probs = F.softmax(outputs.logits, dim=-1)
-                # Attribute predictions with center-weighted approach
                 for window_idx, indices in enumerate(batch_indices):
                     center_idx = len(indices) // 2
-                    center_weight = 0.7  # Higher weight for center sentence
-                    edge_weight = 0.3 / (len(indices) - 1)  # Distribute remaining weight
                     for pos, sent_idx in enumerate(indices):
-                        # Apply higher weight to center sentence
                         weight = center_weight if pos == center_idx else edge_weight
-                        sentence_appearances[sent_idx] += weight
-                        sentence_scores[sent_idx]['human_prob'] += weight * probs[window_idx][1].item()
-                        sentence_scores[sent_idx]['ai_prob'] += weight * probs[window_idx][0].item()
-        # Calculate final predictions with boundary smoothing
         sentence_predictions = []
         for i in range(len(sentences)):
-            if sentence_appearances[i] > 0:
-                human_prob = sentence_scores[i]['human_prob'] / sentence_appearances[i]
-                ai_prob = sentence_scores[i]['ai_prob'] / sentence_appearances[i]
-                # Apply minimal smoothing at prediction boundaries
-                if i > 0 and i < len(sentences) - 1:
-                    prev_human = sentence_scores[i-1]['human_prob'] / sentence_appearances[i-1]
-                    prev_ai = sentence_scores[i-1]['ai_prob'] / sentence_appearances[i-1]
-                    next_human = sentence_scores[i+1]['human_prob'] / sentence_appearances[i+1]
-                    next_ai = sentence_scores[i+1]['ai_prob'] / sentence_appearances[i+1]
-                    # Check if we're at a prediction boundary
-                    current_pred = 'human' if human_prob > ai_prob else 'ai'
-                    prev_pred = 'human' if prev_human > prev_ai else 'ai'
-                    next_pred = 'human' if next_human > next_ai else 'ai'
-                    if current_pred != prev_pred or current_pred != next_pred:
-                        # Small adjustment at boundaries
-                        smooth_factor = 0.1
-                        human_prob = (human_prob * (1 - smooth_factor) +
-                                    (prev_human + next_human) * smooth_factor / 2)
-                        ai_prob = (ai_prob * (1 - smooth_factor) +
-                                 (prev_ai + next_ai) * smooth_factor / 2)
                 sentence_predictions.append({
                     'sentence': sentences[i],
@@ -251,6 +256,7 @@ class TextClassifier:
                     'prediction': 'human' if human_prob > ai_prob else 'ai',
                     'confidence': max(human_prob, ai_prob)
                 })
         return {
             'sentence_predictions': sentence_predictions,

         }
     def detailed_scan(self, text: str) -> Dict:
+        """Optimized detailed scan with sentence-level analysis."""
         if not text.strip():
             return {
                 'sentence_predictions': [],
         if not sentences:
             return {}
+        # Pre-calculate window information
         windows, window_sentence_indices = self.processor.create_centered_windows(sentences, WINDOW_SIZE)
+        sentence_scores = {i: {'human_prob': 0.0, 'ai_prob': 0.0, 'appearances': 0} for i in range(len(sentences))}
+        # Calculate weights once
+        center_weight = 0.7
+        edge_weight = 0.3 / (WINDOW_SIZE - 1) if WINDOW_SIZE > 1 else 0.3
+        # Process all windows in larger batches
+        batch_size = min(32, len(windows))  # Increased batch size
+        for i in range(0, len(windows), batch_size):
+            batch_end = min(i + batch_size, len(windows))
+            batch_windows = windows[i:batch_end]
+            batch_indices = window_sentence_indices[i:batch_end]
+            # Process batch
             inputs = self.tokenizer(
                 batch_windows,
                 truncation=True,
                 outputs = self.model(**inputs)
                 probs = F.softmax(outputs.logits, dim=-1)
+                # Process each window in the batch
                 for window_idx, indices in enumerate(batch_indices):
                     center_idx = len(indices) // 2
+                    window_human_prob = probs[window_idx][1].item()
+                    window_ai_prob = probs[window_idx][0].item()
+                    # Update scores for all sentences in this window
                     for pos, sent_idx in enumerate(indices):
                         weight = center_weight if pos == center_idx else edge_weight
+                        sentence_scores[sent_idx]['human_prob'] += weight * window_human_prob
+                        sentence_scores[sent_idx]['ai_prob'] += weight * window_ai_prob
+                        sentence_scores[sent_idx]['appearances'] += weight
+            del inputs, outputs, probs
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+        # Calculate final predictions
         sentence_predictions = []
+        prev_pred = None
         for i in range(len(sentences)):
+            scores = sentence_scores[i]
+            if scores['appearances'] > 0:
+                # Calculate base probabilities
+                human_prob = scores['human_prob'] / scores['appearances']
+                ai_prob = scores['ai_prob'] / scores['appearances']
+                current_pred = 'human' if human_prob > ai_prob else 'ai'
+                # Only apply smoothing at actual prediction boundaries
+                if i > 0 and prev_pred and current_pred != prev_pred:
+                    # Simple smoothing only at boundaries
+                    smooth_factor = 0.1
+                    if i < len(sentences) - 1:
+                        next_scores = sentence_scores[i + 1]
+                        next_human = next_scores['human_prob'] / next_scores['appearances']
+                        next_ai = next_scores['ai_prob'] / next_scores['appearances']
+                        # Apply minimal smoothing
+                        human_prob = human_prob * (1 - smooth_factor) + next_human * smooth_factor
+                        ai_prob = ai_prob * (1 - smooth_factor) + next_ai * smooth_factor
                 sentence_predictions.append({
                     'sentence': sentences[i],
                     'prediction': 'human' if human_prob > ai_prob else 'ai',
                     'confidence': max(human_prob, ai_prob)
                 })
+                prev_pred = current_pred
         return {
             'sentence_predictions': sentence_predictions,