ApsidalSolid4 commited on
Commit
967f5dd
·
verified ·
1 Parent(s): 5f61427

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -173
app.py CHANGED
@@ -12,12 +12,6 @@ from concurrent.futures import ThreadPoolExecutor
12
  from functools import partial
13
  import time
14
  from datetime import datetime
15
- import openpyxl
16
- from openpyxl import Workbook
17
- from openpyxl.utils import get_column_letter
18
- from io import BytesIO
19
- import base64
20
- import hashlib
21
 
22
  # Configure logging
23
  logging.basicConfig(level=logging.INFO)
@@ -32,26 +26,6 @@ CONFIDENCE_THRESHOLD = 0.65
32
  BATCH_SIZE = 8 # Reduced batch size for CPU
33
  MAX_WORKERS = 4 # Number of worker threads for processing
34
 
35
- # Get password hash from environment variable (more secure)
36
- ADMIN_PASSWORD_HASH = os.environ.get('ADMIN_PASSWORD_HASH')
37
-
38
- if not ADMIN_PASSWORD_HASH:
39
- ADMIN_PASSWORD_HASH = "5e22d1ed71b273b1b2b5331f2d3e0f6cf34595236f201c6924d6bc81de27cdcb"
40
-
41
- # Excel file path for logs
42
- EXCEL_LOG_PATH = "/tmp/prediction_logs.xlsx"
43
-
44
- def is_admin_password(input_text: str) -> bool:
45
- """
46
- Check if the input text matches the admin password using secure hash comparison.
47
- This prevents the password from being visible in the source code.
48
- """
49
- # Hash the input text
50
- input_hash = hashlib.sha256(input_text.strip().encode()).hexdigest()
51
-
52
- # Compare hashes (constant-time comparison to prevent timing attacks)
53
- return input_hash == ADMIN_PASSWORD_HASH
54
-
55
  class TextWindowProcessor:
56
  def __init__(self):
57
  try:
@@ -354,133 +328,8 @@ class TextClassifier:
354
  'num_sentences': num_sentences
355
  }
356
 
357
- def initialize_excel_log():
358
- """Initialize the Excel log file if it doesn't exist."""
359
- if not os.path.exists(EXCEL_LOG_PATH):
360
- wb = Workbook()
361
- ws = wb.active
362
- ws.title = "Prediction Logs"
363
-
364
- # Set column headers
365
- headers = ["timestamp", "word_count", "prediction", "confidence",
366
- "execution_time_ms", "analysis_mode", "full_text"]
367
-
368
- for col_num, header in enumerate(headers, 1):
369
- ws.cell(row=1, column=col_num, value=header)
370
-
371
- # Adjust column widths for better readability
372
- ws.column_dimensions[get_column_letter(1)].width = 20 # timestamp
373
- ws.column_dimensions[get_column_letter(2)].width = 10 # word_count
374
- ws.column_dimensions[get_column_letter(3)].width = 10 # prediction
375
- ws.column_dimensions[get_column_letter(4)].width = 10 # confidence
376
- ws.column_dimensions[get_column_letter(5)].width = 15 # execution_time_ms
377
- ws.column_dimensions[get_column_letter(6)].width = 15 # analysis_mode
378
- ws.column_dimensions[get_column_letter(7)].width = 100 # full_text
379
-
380
- # Save the workbook
381
- wb.save(EXCEL_LOG_PATH)
382
- logger.info(f"Initialized Excel log file at {EXCEL_LOG_PATH}")
383
-
384
- def log_prediction_data(input_text, word_count, prediction, confidence, execution_time, mode):
385
- """Log prediction data to an Excel file in the /tmp directory."""
386
- # Initialize the Excel file if it doesn't exist
387
- if not os.path.exists(EXCEL_LOG_PATH):
388
- initialize_excel_log()
389
-
390
- try:
391
- # Load the existing workbook
392
- wb = openpyxl.load_workbook(EXCEL_LOG_PATH)
393
- ws = wb.active
394
-
395
- # Get the next row number
396
- next_row = ws.max_row + 1
397
-
398
- # Clean up the input text for Excel storage (replace problematic characters)
399
- cleaned_text = input_text.replace("\n", " ")
400
-
401
- # Prepare row data
402
- timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
403
- row_data = [
404
- timestamp,
405
- word_count,
406
- prediction,
407
- f"{confidence:.2f}",
408
- f"{execution_time:.2f}",
409
- mode,
410
- cleaned_text
411
- ]
412
-
413
- # Add the data to the worksheet
414
- for col_num, value in enumerate(row_data, 1):
415
- ws.cell(row=next_row, column=col_num, value=value)
416
-
417
- # Save the workbook
418
- wb.save(EXCEL_LOG_PATH)
419
- logger.info(f"Successfully logged prediction data to {EXCEL_LOG_PATH}")
420
- return True
421
-
422
- except Exception as e:
423
- logger.error(f"Error logging prediction data to Excel: {str(e)}")
424
- return False
425
-
426
- def get_logs_as_base64():
427
- """Read the Excel logs file and return as base64 for downloading."""
428
- if not os.path.exists(EXCEL_LOG_PATH):
429
- return None
430
-
431
- try:
432
- # Read the Excel file into memory
433
- with open(EXCEL_LOG_PATH, "rb") as f:
434
- file_data = f.read()
435
-
436
- # Encode the file as base64
437
- base64_data = base64.b64encode(file_data).decode('utf-8')
438
- return base64_data
439
-
440
- except Exception as e:
441
- logger.error(f"Error reading Excel logs: {str(e)}")
442
- return None
443
-
444
  def analyze_text(text: str, mode: str, classifier: TextClassifier) -> tuple:
445
  """Analyze text using specified mode and return formatted results."""
446
- # Check if the input text matches the admin password using secure comparison
447
- if is_admin_password(text.strip()):
448
- # Return logs instead of analysis
449
- base64_data = get_logs_as_base64()
450
- logs_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
451
- logs_filename = f"prediction_logs_{logs_timestamp}.xlsx"
452
-
453
- if base64_data:
454
- # Create downloadable HTML with the logs
455
- html_content = f"""
456
- <div style="background-color: #e6f7ff; padding: 15px; border-radius: 5px;">
457
- <h3>Admin Access Granted - Prediction Logs</h3>
458
- <p>Logs retrieved at: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}</p>
459
- <p>Excel file contains all prediction data with full text of all submissions.</p>
460
- <a href="data:application/vnd.openxmlformats-officedocument.spreadsheetml.sheet;base64,{base64_data}"
461
- download="{logs_filename}"
462
- style="display: inline-block; margin-top: 10px; padding: 10px 15px;
463
- background-color: #4CAF50; color: white; text-decoration: none;
464
- border-radius: 4px;">
465
- Download Excel Logs
466
- </a>
467
- </div>
468
- """
469
- else:
470
- html_content = """
471
- <div style="background-color: #ffe6e6; padding: 15px; border-radius: 5px;">
472
- <h3>Admin Access Granted - No Logs Found</h3>
473
- <p>No prediction logs were found or there was an error reading the logs file.</p>
474
- </div>
475
- """
476
-
477
- # Return special admin output instead of normal analysis
478
- return (
479
- html_content,
480
- f"Admin access granted. Logs retrieved at {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
481
- f"ADMIN MODE\nLogs available for download\nFile: {EXCEL_LOG_PATH}"
482
- )
483
-
484
  # Start timing for normal analysis
485
  start_time = time.time()
486
 
@@ -508,16 +357,6 @@ def analyze_text(text: str, mode: str, classifier: TextClassifier) -> tuple:
508
  # Calculate execution time in milliseconds
509
  execution_time = (time.time() - start_time) * 1000
510
 
511
- # Log the prediction data
512
- log_prediction_data(
513
- input_text=text,
514
- word_count=word_count,
515
- prediction=result['prediction'],
516
- confidence=result['confidence'],
517
- execution_time=execution_time,
518
- mode=original_mode
519
- )
520
-
521
  return (
522
  text, # No highlighting in quick mode
523
  "Quick scan mode - no sentence-level analysis available",
@@ -544,16 +383,6 @@ def analyze_text(text: str, mode: str, classifier: TextClassifier) -> tuple:
544
  # Calculate execution time in milliseconds
545
  execution_time = (time.time() - start_time) * 1000
546
 
547
- # Log the prediction data
548
- log_prediction_data(
549
- input_text=text,
550
- word_count=word_count,
551
- prediction=final_pred['prediction'],
552
- confidence=final_pred['confidence'],
553
- execution_time=execution_time,
554
- mode=original_mode
555
- )
556
-
557
  return (
558
  analysis['highlighted_text'],
559
  "\n".join(detailed_analysis),
@@ -609,5 +438,4 @@ if __name__ == "__main__":
609
  server_name="0.0.0.0",
610
  server_port=7860,
611
  share=True
612
- )
613
-
 
12
  from functools import partial
13
  import time
14
  from datetime import datetime
 
 
 
 
 
 
15
 
16
  # Configure logging
17
  logging.basicConfig(level=logging.INFO)
 
26
  BATCH_SIZE = 8 # Reduced batch size for CPU
27
  MAX_WORKERS = 4 # Number of worker threads for processing
28
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  class TextWindowProcessor:
30
  def __init__(self):
31
  try:
 
328
  'num_sentences': num_sentences
329
  }
330
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
331
  def analyze_text(text: str, mode: str, classifier: TextClassifier) -> tuple:
332
  """Analyze text using specified mode and return formatted results."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
333
  # Start timing for normal analysis
334
  start_time = time.time()
335
 
 
357
  # Calculate execution time in milliseconds
358
  execution_time = (time.time() - start_time) * 1000
359
 
 
 
 
 
 
 
 
 
 
 
360
  return (
361
  text, # No highlighting in quick mode
362
  "Quick scan mode - no sentence-level analysis available",
 
383
  # Calculate execution time in milliseconds
384
  execution_time = (time.time() - start_time) * 1000
385
 
 
 
 
 
 
 
 
 
 
 
386
  return (
387
  analysis['highlighted_text'],
388
  "\n".join(detailed_analysis),
 
438
  server_name="0.0.0.0",
439
  server_port=7860,
440
  share=True
441
+ )