Spaces:

ApsidalSolid4
/

CITProjectAIDetector

Running

App Files Files Community

ApsidalSolid4 commited on Feb 20

Commit

99608c9

verified ·

1 Parent(s): 444f8bc

Update app.py

Browse files

Files changed (1) hide show

app.py +96 -93

app.py CHANGED Viewed

@@ -176,102 +176,105 @@ class TextClassifier:
             'num_windows': len(predictions)
         }
-    def detailed_scan(self, text: str) -> Dict:
-        """Original prediction method with modified window handling"""
-        if self.model is None or self.tokenizer is None:
-            self.load_model()
-        self.model.eval()
-        sentences = self.processor.split_into_sentences(text)
-        if not sentences:
-            return {}
-        # Create centered windows for each sentence
-        windows, window_sentence_indices = self.processor.create_centered_windows(sentences, WINDOW_SIZE)
-        # Track scores for each sentence
-        sentence_appearances = {i: 0 for i in range(len(sentences))}
-        sentence_scores = {i: {'human_prob': 0.0, 'ai_prob': 0.0} for i in range(len(sentences))}
-        # Process windows in batches
-        batch_size = 16
-        for i in range(0, len(windows), batch_size):
-            batch_windows = windows[i:i + batch_size]
-            batch_indices = window_sentence_indices[i:i + batch_size]
-            inputs = self.tokenizer(
-                batch_windows,
-                truncation=True,
-                padding=True,
-                max_length=MAX_LENGTH,
-                return_tensors="pt"
-            ).to(self.device)
-            with torch.no_grad():
-                outputs = self.model(**inputs)
-                probs = F.softmax(outputs.logits, dim=-1)
-                # Attribute predictions more carefully
-                for window_idx, indices in enumerate(batch_indices):
-                    center_idx = len(indices) // 2
-                    center_weight = 0.7  # Higher weight for center sentence
-                    edge_weight = 0.3 / (len(indices) - 1)  # Distribute remaining weight
-                    for pos, sent_idx in enumerate(indices):
-                        # Apply higher weight to center sentence
-                        weight = center_weight if pos == center_idx else edge_weight
-                        sentence_appearances[sent_idx] += weight
-                        sentence_scores[sent_idx]['human_prob'] += weight * probs[window_idx][1].item()
-                        sentence_scores[sent_idx]['ai_prob'] += weight * probs[window_idx][0].item()
-            del inputs, outputs, probs
-            if torch.cuda.is_available():
-                torch.cuda.empty_cache()
-        # Calculate final predictions
-        sentence_predictions = []
-        for i in range(len(sentences)):
-            if sentence_appearances[i] > 0:
-                human_prob = sentence_scores[i]['human_prob'] / sentence_appearances[i]
-                ai_prob = sentence_scores[i]['ai_prob'] / sentence_appearances[i]
-                # Only apply minimal smoothing at prediction boundaries
-                if i > 0 and i < len(sentences) - 1:
-                    prev_human = sentence_scores[i-1]['human_prob'] / sentence_appearances[i-1]
-                    prev_ai = sentence_scores[i-1]['ai_prob'] / sentence_appearances[i-1]
-                    next_human = sentence_scores[i+1]['human_prob'] / sentence_appearances[i+1]
-                    next_ai = sentence_scores[i+1]['ai_prob'] / sentence_appearances[i+1]
-                    # Check if we're at a prediction boundary
-                    current_pred = 'human' if human_prob > ai_prob else 'ai'
-                    prev_pred = 'human' if prev_human > prev_ai else 'ai'
-                    next_pred = 'human' if next_human > next_ai else 'ai'
-                    if current_pred != prev_pred or current_pred != next_pred:
-                        # Small adjustment at boundaries
-                        smooth_factor = 0.1
-                        human_prob = (human_prob * (1 - smooth_factor) +
-                                    (prev_human + next_human) * smooth_factor / 2)
-                        ai_prob = (ai_prob * (1 - smooth_factor) +
-                                (prev_ai + next_ai) * smooth_factor / 2)
-                sentence_predictions.append({
-                    'sentence': sentences[i],
-                    'human_prob': human_prob,
-                    'ai_prob': ai_prob,
-                    'prediction': 'human' if human_prob > ai_prob else 'ai',
-                    'confidence': max(human_prob, ai_prob)
-                })
-        return {
-            'sentence_predictions': sentence_predictions,
-            'highlighted_text': self.format_predictions_html(sentence_predictions),
-            'full_text': text,
-            'overall_prediction': self.aggregate_predictions(sentence_predictions)
-        }
     def detailed_scan(self, text: str) -> Dict:
         """Perform a detailed scan with improved sentence-level analysis."""
         if not text.strip():
             return {
                 'sentence_predictions': [],

             'num_windows': len(predictions)
         }
+    # def detailed_scan(self, text: str) -> Dict:
+    #     """Original prediction method with modified window handling"""
+    #     if self.model is None or self.tokenizer is None:
+    #         self.load_model()
+    #     self.model.eval()
+    #     sentences = self.processor.split_into_sentences(text)
+    #     if not sentences:
+    #         return {}
+    #     # Create centered windows for each sentence
+    #     windows, window_sentence_indices = self.processor.create_centered_windows(sentences, WINDOW_SIZE)
+    #     # Track scores for each sentence
+    #     sentence_appearances = {i: 0 for i in range(len(sentences))}
+    #     sentence_scores = {i: {'human_prob': 0.0, 'ai_prob': 0.0} for i in range(len(sentences))}
+    #     # Process windows in batches
+    #     batch_size = 16
+    #     for i in range(0, len(windows), batch_size):
+    #         batch_windows = windows[i:i + batch_size]
+    #         batch_indices = window_sentence_indices[i:i + batch_size]
+    #         inputs = self.tokenizer(
+    #             batch_windows,
+    #             truncation=True,
+    #             padding=True,
+    #             max_length=MAX_LENGTH,
+    #             return_tensors="pt"
+    #         ).to(self.device)
+    #         with torch.no_grad():
+    #             outputs = self.model(**inputs)
+    #             probs = F.softmax(outputs.logits, dim=-1)
+    #             # Attribute predictions more carefully
+    #             for window_idx, indices in enumerate(batch_indices):
+    #                 center_idx = len(indices) // 2
+    #                 center_weight = 0.7  # Higher weight for center sentence
+    #                 edge_weight = 0.3 / (len(indices) - 1)  # Distribute remaining weight
+    #                 for pos, sent_idx in enumerate(indices):
+    #                     # Apply higher weight to center sentence
+    #                     weight = center_weight if pos == center_idx else edge_weight
+    #                     sentence_appearances[sent_idx] += weight
+    #                     sentence_scores[sent_idx]['human_prob'] += weight * probs[window_idx][1].item()
+    #                     sentence_scores[sent_idx]['ai_prob'] += weight * probs[window_idx][0].item()
+    #         del inputs, outputs, probs
+    #         if torch.cuda.is_available():
+    #             torch.cuda.empty_cache()
+    #     # Calculate final predictions
+    #     sentence_predictions = []
+    #     for i in range(len(sentences)):
+    #         if sentence_appearances[i] > 0:
+    #             human_prob = sentence_scores[i]['human_prob'] / sentence_appearances[i]
+    #             ai_prob = sentence_scores[i]['ai_prob'] / sentence_appearances[i]
+    #             # Only apply minimal smoothing at prediction boundaries
+    #             if i > 0 and i < len(sentences) - 1:
+    #                 prev_human = sentence_scores[i-1]['human_prob'] / sentence_appearances[i-1]
+    #                 prev_ai = sentence_scores[i-1]['ai_prob'] / sentence_appearances[i-1]
+    #                 next_human = sentence_scores[i+1]['human_prob'] / sentence_appearances[i+1]
+    #                 next_ai = sentence_scores[i+1]['ai_prob'] / sentence_appearances[i+1]
+    #                 # Check if we're at a prediction boundary
+    #                 current_pred = 'human' if human_prob > ai_prob else 'ai'
+    #                 prev_pred = 'human' if prev_human > prev_ai else 'ai'
+    #                 next_pred = 'human' if next_human > next_ai else 'ai'
+    #                 if current_pred != prev_pred or current_pred != next_pred:
+    #                     # Small adjustment at boundaries
+    #                     smooth_factor = 0.1
+    #                     human_prob = (human_prob * (1 - smooth_factor) +
+    #                                 (prev_human + next_human) * smooth_factor / 2)
+    #                     ai_prob = (ai_prob * (1 - smooth_factor) +
+    #                             (prev_ai + next_ai) * smooth_factor / 2)
+    #             sentence_predictions.append({
+    #                 'sentence': sentences[i],
+    #                 'human_prob': human_prob,
+    #                 'ai_prob': ai_prob,
+    #                 'prediction': 'human' if human_prob > ai_prob else 'ai',
+    #                 'confidence': max(human_prob, ai_prob)
+    #             })
+    #     return {
+    #         'sentence_predictions': sentence_predictions,
+    #         'highlighted_text': self.format_predictions_html(sentence_predictions),
+    #         'full_text': text,
+    #         'overall_prediction': self.aggregate_predictions(sentence_predictions)
+    #     }
     def detailed_scan(self, text: str) -> Dict:
         """Perform a detailed scan with improved sentence-level analysis."""
+        # Clean up trailing whitespace
+        text = text.rstrip()
         if not text.strip():
             return {
                 'sentence_predictions': [],