Spaces:

ApsidalSolid4
/

CITProjectAIDetector

Running

App Files Files Community

ApsidalSolid4 commited on Feb 16

Commit

3fe982e

verified ·

1 Parent(s): a627ffe

Update app.py

Browse files

Files changed (1) hide show

app.py +28 -26

app.py CHANGED Viewed

@@ -69,33 +69,35 @@ class TextClassifier:
         self.initialize_model()
     def initialize_model(self):
-            """Initialize the model and tokenizer."""
-            logger.info("Initializing model and tokenizer...")
-            # Download and save tokenizer files locally
-            local_tokenizer_path = "tokenizer"
-            if not os.path.exists(local_tokenizer_path):
-                AutoTokenizer.from_pretrained(self.model_name).save_pretrained(local_tokenizer_path)
-            # Load from local files
-            self.tokenizer = AutoTokenizer.from_pretrained(local_tokenizer_path)
-            # First initialize the base model
-            self.model = AutoModelForSequenceClassification.from_pretrained(
-                self.model_name,
-                num_labels=2
-            ).to(self.device)
-            # Look for model file in the same directory as the code
-            model_path = "model.pt"  # Your model file should be uploaded as model.pt
-            if os.path.exists(model_path):
-                logger.info(f"Loading custom model from {model_path}")
-                checkpoint = torch.load(model_path, map_location=self.device)
-                self.model.load_state_dict(checkpoint['model_state_dict'])
-            else:
-                logger.warning("Custom model file not found. Using base model.")
-            self.model.eval()
     def predict_with_sentence_scores(self, text: str) -> Dict:
         """Predict with sentence-level granularity using overlapping windows."""

         self.initialize_model()
     def initialize_model(self):
+        """Initialize the model and tokenizer."""
+        logger.info("Initializing model and tokenizer...")
+        from transformers import DebertaV2TokenizerFast
+        # Try to load tokenizer directly from the Hub
+        self.tokenizer = DebertaV2TokenizerFast.from_pretrained(
+            self.model_name,
+            model_max_length=MAX_LENGTH,
+            use_fast=False,
+            from_slow=True
+        )
+        # Initialize the model as before
+        self.model = AutoModelForSequenceClassification.from_pretrained(
+            self.model_name,
+            num_labels=2
+        ).to(self.device)
+        # Your existing model loading code
+        model_path = "model.pt"
+        if os.path.exists(model_path):
+            logger.info(f"Loading custom model from {model_path}")
+            checkpoint = torch.load(model_path, map_location=self.device)
+            self.model.load_state_dict(checkpoint['model_state_dict'])
+        else:
+            logger.warning("Custom model file not found. Using base model.")
+        self.model.eval()
     def predict_with_sentence_scores(self, text: str) -> Dict:
         """Predict with sentence-level granularity using overlapping windows."""