ApsidalSolid4 commited on
Commit
1419b33
·
verified ·
1 Parent(s): bc0ac29

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +57 -117
app.py CHANGED
@@ -77,12 +77,6 @@ class OCRProcessor:
77
  def process_file(self, file_path: str) -> Dict:
78
  """
79
  Process a file using OCR.space API
80
-
81
- Args:
82
- file_path: Path to the file to be processed
83
-
84
- Returns:
85
- Dictionary containing the OCR results and status
86
  """
87
  start_time = time.time()
88
  ocr_logger.info(f"Starting OCR processing for file: {os.path.basename(file_path)}")
@@ -101,11 +95,6 @@ class OCRProcessor:
101
  file_type = self._get_file_type(file_path)
102
  ocr_logger.info(f"Detected file type: {file_type}")
103
 
104
- # Special handling for Word documents - convert to PDF if needed
105
- if file_type.startswith('application/vnd.openxmlformats-officedocument') or file_type == 'application/msword':
106
- ocr_logger.info("Word document detected, processing directly")
107
- # Note: OCR.space may handle Word directly, but if not, conversion would be needed here
108
-
109
  # Prepare the API request
110
  with open(file_path, 'rb') as f:
111
  file_data = f.read()
@@ -176,12 +165,6 @@ class OCRProcessor:
176
  def _extract_text_from_result(self, result: Dict) -> str:
177
  """
178
  Extract all text from the OCR API result
179
-
180
- Args:
181
- result: The OCR API response JSON
182
-
183
- Returns:
184
- Extracted text as a single string
185
  """
186
  extracted_text = ""
187
 
@@ -195,12 +178,6 @@ class OCRProcessor:
195
  def _get_file_type(self, file_path: str) -> str:
196
  """
197
  Determine MIME type of a file
198
-
199
- Args:
200
- file_path: Path to the file
201
-
202
- Returns:
203
- MIME type as string
204
  """
205
  mime_type, _ = mimetypes.guess_type(file_path)
206
  if mime_type is None:
@@ -208,11 +185,9 @@ class OCRProcessor:
208
  return 'application/octet-stream'
209
  return mime_type
210
 
211
-
212
  def is_admin_password(input_text: str) -> bool:
213
  """
214
  Check if the input text matches the admin password using secure hash comparison.
215
- This prevents the password from being visible in the source code.
216
  """
217
  # Hash the input text
218
  input_hash = hashlib.sha256(input_text.strip().encode()).hexdigest()
@@ -220,7 +195,6 @@ def is_admin_password(input_text: str) -> bool:
220
  # Compare hashes (constant-time comparison to prevent timing attacks)
221
  return input_hash == ADMIN_PASSWORD_HASH
222
 
223
-
224
  class TextWindowProcessor:
225
  def __init__(self):
226
  try:
@@ -272,10 +246,8 @@ class TextWindowProcessor:
272
 
273
  return windows, window_sentence_indices
274
 
275
-
276
  class TextClassifier:
277
  def __init__(self):
278
- # FIXED: Removed the thread configuration here, as it's now at the module level
279
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
280
  self.model_name = MODEL_NAME
281
  self.tokenizer = None
@@ -310,6 +282,7 @@ class TextClassifier:
310
 
311
  self.model.eval()
312
 
 
313
  def quick_scan(self, text: str) -> Dict:
314
  """Perform a quick scan using simple window analysis."""
315
  if not text.strip():
@@ -520,19 +493,10 @@ class TextClassifier:
520
  'num_sentences': num_sentences
521
  }
522
 
523
-
524
  # Function to handle file upload, OCR processing, and text analysis
525
  def handle_file_upload_and_analyze(file_obj, mode: str, classifier) -> tuple:
526
  """
527
  Handle file upload, OCR processing, and text analysis
528
-
529
- Args:
530
- file_obj: Uploaded file object from Gradio (bytes when using type="binary")
531
- mode: Analysis mode (quick or detailed)
532
- classifier: The TextClassifier instance
533
-
534
- Returns:
535
- Analysis results as a tuple (same format as original analyze_text function)
536
  """
537
  if file_obj is None:
538
  return (
@@ -542,10 +506,6 @@ def handle_file_upload_and_analyze(file_obj, mode: str, classifier) -> tuple:
542
  )
543
 
544
  # Create a temporary file with an appropriate extension based on content
545
- # Since we don't have the original filename when using binary mode,
546
- # we'll use a generic extension based on simple content detection
547
-
548
- # Simple content type detection
549
  content_start = file_obj[:20] # Look at the first few bytes
550
 
551
  # Default to .bin extension
@@ -561,7 +521,6 @@ def handle_file_upload_and_analyze(file_obj, mode: str, classifier) -> tuple:
561
  file_ext = ".png"
562
  elif content_start.startswith(b'GIF'): # GIF
563
  file_ext = ".gif"
564
- # Add more content type detection as needed
565
 
566
  # Create a temporary file with the detected extension
567
  with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as temp_file:
@@ -600,7 +559,6 @@ def handle_file_upload_and_analyze(file_obj, mode: str, classifier) -> tuple:
600
  if os.path.exists(temp_file_path):
601
  os.remove(temp_file_path)
602
 
603
-
604
  def initialize_excel_log():
605
  """Initialize the Excel log file if it doesn't exist."""
606
  if not os.path.exists(EXCEL_LOG_PATH):
@@ -810,20 +768,11 @@ def analyze_text(text: str, mode: str, classifier: TextClassifier) -> tuple:
810
  overall_result
811
  )
812
 
 
 
813
 
814
- # Modified Gradio interface setup function to include file upload
815
- def setup_gradio_interface(classifier):
816
- """
817
- Set up Gradio interface with a more aligned and compact file upload
818
-
819
- Args:
820
- classifier: The TextClassifier instance
821
-
822
- Returns:
823
- Gradio Interface object
824
- """
825
- import gradio as gr
826
-
827
  # Create analyzer functions that capture the classifier
828
  def analyze_text_wrapper(text, mode):
829
  return analyze_text(text, mode, classifier)
@@ -833,115 +782,109 @@ def setup_gradio_interface(classifier):
833
  return analyze_text_wrapper("", mode) # Return empty analysis
834
  return handle_file_upload_and_analyze(file_obj, mode, classifier)
835
 
 
836
  with gr.Blocks(title="AI Text Detector") as demo:
837
  gr.Markdown("# AI Text Detector")
838
 
839
  with gr.Row():
840
- # Left column - Input
841
  with gr.Column():
842
  text_input = gr.Textbox(
843
- lines=8,
844
  placeholder="Enter text to analyze...",
845
  label="Input Text"
846
  )
847
 
848
  with gr.Row():
849
- # Left side: Analysis mode radio buttons
850
- with gr.Column(scale=4):
851
- gr.Markdown("Analysis Mode")
852
- gr.Markdown("Quick mode for faster analysis. Detailed mode for sentence-level analysis.", elem_classes=["description-text"])
853
- mode_selection = gr.Radio(
854
- choices=["quick", "detailed"],
855
- value="quick",
856
- label=""
857
- )
858
 
859
- # Right side: File upload (compact and aligned)
860
- with gr.Column(scale=1, elem_classes=["file-upload-container"]):
861
- file_upload = gr.File(
862
- label="File",
863
- file_types=["image", "pdf", "doc", "docx"],
864
- type="binary",
865
- elem_classes=["compact-file-upload"]
866
- )
867
 
868
- # Analyze button
869
  analyze_button = gr.Button("Analyze Text")
870
 
871
- # Right column - Results
872
  with gr.Column():
873
  output_html = gr.HTML(label="Highlighted Analysis")
874
  output_sentences = gr.Textbox(label="Sentence-by-Sentence Analysis", lines=10)
875
  output_result = gr.Textbox(label="Overall Result", lines=4)
876
 
877
- # Connect buttons to functions
878
  analyze_button.click(
879
  analyze_text_wrapper,
880
  inputs=[text_input, mode_selection],
881
  outputs=[output_html, output_sentences, output_result]
882
  )
883
 
884
- # Connect file upload to automatically process when changed
885
  file_upload.change(
886
  handle_file_upload_wrapper,
887
  inputs=[file_upload, mode_selection],
888
  outputs=[output_html, output_sentences, output_result]
889
  )
890
 
891
- # Add custom CSS for alignment and styling
892
  gr.HTML("""
893
  <style>
894
- /* Make file upload more compact */
895
- .compact-file-upload {
896
- max-width: 100%;
 
 
897
  }
898
 
899
- .compact-file-upload > .wrap {
900
- margin: 0;
901
- padding: 0;
902
  }
903
 
904
- .compact-file-upload .file-preview {
905
- min-height: 0;
 
906
  }
907
 
908
- /* Align file upload with radio buttons */
909
- .file-upload-container {
910
- display: flex;
911
- align-items: flex-end;
912
- justify-content: center;
913
- padding-bottom: 10px;
914
  }
915
 
916
- /* Make description text smaller */
917
- .description-text {
918
- font-size: 0.85em;
919
- color: #666;
920
- margin-top: -5px;
921
- margin-bottom: 5px;
 
 
 
 
 
 
922
  }
923
  </style>
924
  """)
925
 
926
  return demo
927
 
928
-
929
- # This function is a replacement for the original main app setup
930
- def setup_app_with_ocr():
931
- """
932
- Setup the application with OCR capabilities
933
- """
934
- # Initialize the classifier (uses the fixed class)
935
- classifier = TextClassifier()
936
-
937
- # Create the Gradio interface with file upload functionality
938
- demo = setup_gradio_interface(classifier)
939
 
940
  # Get the FastAPI app from Gradio
941
  app = demo.app
942
 
943
- # Add CORS middleware (same as original code)
944
- from fastapi.middleware.cors import CORSMiddleware
945
  app.add_middleware(
946
  CORSMiddleware,
947
  allow_origins=["*"], # For development
@@ -950,14 +893,11 @@ def setup_app_with_ocr():
950
  allow_headers=["*"],
951
  )
952
 
953
- # Return the demo for launching
954
  return demo
955
 
956
-
957
  # Initialize the application
958
  if __name__ == "__main__":
959
- # Create the app with OCR functionality
960
- demo = setup_app_with_ocr()
961
 
962
  # Start the server
963
  demo.queue()
 
77
  def process_file(self, file_path: str) -> Dict:
78
  """
79
  Process a file using OCR.space API
 
 
 
 
 
 
80
  """
81
  start_time = time.time()
82
  ocr_logger.info(f"Starting OCR processing for file: {os.path.basename(file_path)}")
 
95
  file_type = self._get_file_type(file_path)
96
  ocr_logger.info(f"Detected file type: {file_type}")
97
 
 
 
 
 
 
98
  # Prepare the API request
99
  with open(file_path, 'rb') as f:
100
  file_data = f.read()
 
165
  def _extract_text_from_result(self, result: Dict) -> str:
166
  """
167
  Extract all text from the OCR API result
 
 
 
 
 
 
168
  """
169
  extracted_text = ""
170
 
 
178
  def _get_file_type(self, file_path: str) -> str:
179
  """
180
  Determine MIME type of a file
 
 
 
 
 
 
181
  """
182
  mime_type, _ = mimetypes.guess_type(file_path)
183
  if mime_type is None:
 
185
  return 'application/octet-stream'
186
  return mime_type
187
 
 
188
  def is_admin_password(input_text: str) -> bool:
189
  """
190
  Check if the input text matches the admin password using secure hash comparison.
 
191
  """
192
  # Hash the input text
193
  input_hash = hashlib.sha256(input_text.strip().encode()).hexdigest()
 
195
  # Compare hashes (constant-time comparison to prevent timing attacks)
196
  return input_hash == ADMIN_PASSWORD_HASH
197
 
 
198
  class TextWindowProcessor:
199
  def __init__(self):
200
  try:
 
246
 
247
  return windows, window_sentence_indices
248
 
 
249
  class TextClassifier:
250
  def __init__(self):
 
251
  self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
252
  self.model_name = MODEL_NAME
253
  self.tokenizer = None
 
282
 
283
  self.model.eval()
284
 
285
+ # [Other TextClassifier methods remain the same as in paste.txt]
286
  def quick_scan(self, text: str) -> Dict:
287
  """Perform a quick scan using simple window analysis."""
288
  if not text.strip():
 
493
  'num_sentences': num_sentences
494
  }
495
 
 
496
  # Function to handle file upload, OCR processing, and text analysis
497
  def handle_file_upload_and_analyze(file_obj, mode: str, classifier) -> tuple:
498
  """
499
  Handle file upload, OCR processing, and text analysis
 
 
 
 
 
 
 
 
500
  """
501
  if file_obj is None:
502
  return (
 
506
  )
507
 
508
  # Create a temporary file with an appropriate extension based on content
 
 
 
 
509
  content_start = file_obj[:20] # Look at the first few bytes
510
 
511
  # Default to .bin extension
 
521
  file_ext = ".png"
522
  elif content_start.startswith(b'GIF'): # GIF
523
  file_ext = ".gif"
 
524
 
525
  # Create a temporary file with the detected extension
526
  with tempfile.NamedTemporaryFile(delete=False, suffix=file_ext) as temp_file:
 
559
  if os.path.exists(temp_file_path):
560
  os.remove(temp_file_path)
561
 
 
562
  def initialize_excel_log():
563
  """Initialize the Excel log file if it doesn't exist."""
564
  if not os.path.exists(EXCEL_LOG_PATH):
 
768
  overall_result
769
  )
770
 
771
+ # Initialize the classifier globally
772
+ classifier = TextClassifier()
773
 
774
+ # Create Gradio interface with a small file upload button next to the radio buttons
775
+ def setup_interface():
 
 
 
 
 
 
 
 
 
 
 
776
  # Create analyzer functions that capture the classifier
777
  def analyze_text_wrapper(text, mode):
778
  return analyze_text(text, mode, classifier)
 
782
  return analyze_text_wrapper("", mode) # Return empty analysis
783
  return handle_file_upload_and_analyze(file_obj, mode, classifier)
784
 
785
+ # Create the interface similar to the original but with a small file upload button
786
  with gr.Blocks(title="AI Text Detector") as demo:
787
  gr.Markdown("# AI Text Detector")
788
 
789
  with gr.Row():
790
+ # Left column for input
791
  with gr.Column():
792
  text_input = gr.Textbox(
793
+ lines=8,
794
  placeholder="Enter text to analyze...",
795
  label="Input Text"
796
  )
797
 
798
  with gr.Row():
799
+ # Mode selection (same as original)
800
+ mode_selection = gr.Radio(
801
+ choices=["quick", "detailed"],
802
+ value="quick",
803
+ label="Analysis Mode",
804
+ info="Quick mode for faster analysis. Detailed mode for sentence-level analysis."
805
+ )
 
 
806
 
807
+ # Small file upload button (like the Claude paperclip)
808
+ file_upload = gr.File(
809
+ label="",
810
+ file_types=["image", "pdf", "doc", "docx"],
811
+ type="binary",
812
+ elem_classes=["small-file-upload"]
813
+ )
 
814
 
 
815
  analyze_button = gr.Button("Analyze Text")
816
 
817
+ # Right column for output
818
  with gr.Column():
819
  output_html = gr.HTML(label="Highlighted Analysis")
820
  output_sentences = gr.Textbox(label="Sentence-by-Sentence Analysis", lines=10)
821
  output_result = gr.Textbox(label="Overall Result", lines=4)
822
 
823
+ # Connect the components
824
  analyze_button.click(
825
  analyze_text_wrapper,
826
  inputs=[text_input, mode_selection],
827
  outputs=[output_html, output_sentences, output_result]
828
  )
829
 
 
830
  file_upload.change(
831
  handle_file_upload_wrapper,
832
  inputs=[file_upload, mode_selection],
833
  outputs=[output_html, output_sentences, output_result]
834
  )
835
 
836
+ # Custom CSS to style the file upload button like a small paperclip
837
  gr.HTML("""
838
  <style>
839
+ /* Make the file upload small and positioned correctly */
840
+ .small-file-upload {
841
+ width: 40px !important;
842
+ margin-left: 10px !important;
843
+ margin-top: 15px !important;
844
  }
845
 
846
+ .small-file-upload > .wrap {
847
+ padding: 0 !important;
848
+ margin: 0 !important;
849
  }
850
 
851
+ .small-file-upload .file-preview {
852
+ min-height: 0 !important;
853
+ padding: 0 !important;
854
  }
855
 
856
+ /* Make file upload look like a paperclip icon */
857
+ .small-file-upload .icon {
858
+ font-size: 1.2em !important;
859
+ opacity: 0.7 !important;
 
 
860
  }
861
 
862
+ .small-file-upload .upload-button {
863
+ border-radius: 50% !important;
864
+ padding: 5px !important;
865
+ width: 30px !important;
866
+ height: 30px !important;
867
+ display: flex !important;
868
+ align-items: center !important;
869
+ justify-content: center !important;
870
+ }
871
+
872
+ .small-file-upload .upload-button:hover {
873
+ background-color: #f0f0f0 !important;
874
  }
875
  </style>
876
  """)
877
 
878
  return demo
879
 
880
+ # Setup the app with CORS middleware
881
+ def setup_app():
882
+ demo = setup_interface()
 
 
 
 
 
 
 
 
883
 
884
  # Get the FastAPI app from Gradio
885
  app = demo.app
886
 
887
+ # Add CORS middleware
 
888
  app.add_middleware(
889
  CORSMiddleware,
890
  allow_origins=["*"], # For development
 
893
  allow_headers=["*"],
894
  )
895
 
 
896
  return demo
897
 
 
898
  # Initialize the application
899
  if __name__ == "__main__":
900
+ demo = setup_app()
 
901
 
902
  # Start the server
903
  demo.queue()