hynky HF Staff commited on
Commit
c25fdf1
·
1 Parent(s): 9686506
Files changed (1) hide show
  1. extractor_compare.py +89 -85
extractor_compare.py CHANGED
@@ -226,10 +226,11 @@ def create_interface():
226
  font-family: 'Local Arial', sans-serif;
227
  }
228
  </style>
229
- <meta http-equiv="Content-Security-Policy" content="default-src * blob:; script-src 'self' 'unsafe-inline' 'unsafe-eval'; style-src 'self' 'unsafe-inline';">
230
- <iframe id="pdf-iframe" width="100%" height="100%" style="border:none;" src="about:blank" sandbox="allow-same-origin allow-scripts allow-forms"></iframe>
 
231
  <div id="pdf-fallback" style="position:absolute; top:0; left:0; width:100%; height:100%;
232
- display:flex; align-items:center; justify-content:center; padding:20px; text-align:center;">
233
  Click "Load PDFs" to start viewing documents.
234
  </div>
235
  </div>
@@ -350,110 +351,113 @@ def create_interface():
350
  demo.load(
351
  fn=None,
352
  js="""
353
- // Function to safely setup the MutationObserver for the PDF data
354
- function setupPdfDataObserver() {
355
- console.log('Setting up PDF data observer...');
356
 
357
- // Wait for Gradio components to fully render
358
- setTimeout(() => {
 
 
 
359
  try {
360
- const targetNode = document.getElementById('pdf_base64_data');
361
- if (!targetNode) {
362
- console.error('PDF data container not found!');
 
363
  return;
364
  }
365
 
366
- // Find the textarea within the Gradio component
367
- const hiddenTextArea = targetNode.querySelector('textarea');
368
- if (!hiddenTextArea) {
369
- console.error('Hidden textarea not found within the container!');
370
- return;
371
- }
372
 
373
- console.log('Found hidden textarea to observe');
 
 
 
374
 
375
- // Setup observer configuration
376
- const observerConfig = {
377
- characterData: true,
378
- childList: true,
379
- subtree: true,
380
- attributes: true
381
- };
382
 
383
- // Create and attach the observer
384
- const observer = new MutationObserver(function(mutationsList) {
385
- console.log('Mutation detected, checking textarea value');
386
- if (hiddenTextArea.value && hiddenTextArea.value.length > 100) {
387
- console.log('Valid value found in textarea, displaying PDF');
388
- displayPdfBlob(hiddenTextArea.value);
389
- }
390
- });
391
 
392
- // Observe the textarea itself, not its parent
393
- observer.observe(hiddenTextArea, observerConfig);
394
- console.log('MutationObserver attached to textarea');
395
 
396
- // Also check initial value
397
- if (hiddenTextArea.value && hiddenTextArea.value.length > 100) {
398
- console.log('Initial valid value found, displaying PDF');
399
- displayPdfBlob(hiddenTextArea.value);
 
 
 
400
  }
401
  } catch (error) {
402
- console.error('Error setting up observer:', error);
 
 
 
 
 
403
  }
404
- }, 1000); // Wait 1 second for components to render
405
- }
406
-
407
- // Function to display PDF from base64 data
408
- function displayPdfBlob(base64Data) {
409
- try {
410
- // Get iframe and fallback elements
411
- const iframe = document.getElementById('pdf-iframe');
412
- const fallback = document.getElementById('pdf-fallback');
413
-
414
- if (!iframe || !fallback) {
415
- console.error('PDF viewer elements not found');
416
  return;
417
  }
418
 
419
- // Convert base64 to binary
420
- const binaryString = atob(base64Data);
421
- const len = binaryString.length;
422
- const bytes = new Uint8Array(len);
423
-
424
- for (let i = 0; i < len; i++) {
425
- bytes[i] = binaryString.charCodeAt(i);
426
  }
427
 
428
- // Create blob and URL
429
- const blob = new Blob([bytes], { type: 'application/pdf' });
430
- const objectUrl = URL.createObjectURL(blob);
 
431
 
432
- // Update iframe
433
- iframe.src = objectUrl;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
434
 
435
- // Hide fallback message
436
- fallback.style.display = 'none';
 
437
 
438
- // Log success
439
- console.log('PDF displayed successfully');
440
- } catch (error) {
441
- console.error('Error displaying PDF:', error);
442
- }
 
 
 
443
  }
444
-
445
- // Initialize the observer after everything is loaded
446
- window.addEventListener('load', function() {
447
- console.log('Window loaded, initializing PDF observer...');
448
- setupPdfDataObserver();
449
- });
450
-
451
- // Also setup when Gradio mounts the component
452
- document.addEventListener('DOMContentLoaded', function() {
453
- console.log('DOM loaded, waiting for Gradio components...');
454
- // Wait a bit longer for Gradio components to mount
455
- setTimeout(setupPdfDataObserver, 2000);
456
- });
457
  """
458
  )
459
 
 
226
  font-family: 'Local Arial', sans-serif;
227
  }
228
  </style>
229
+ <object id="pdf-object" type="application/pdf" width="100%" height="100%" style="display:none;">
230
+ <p>PDF cannot be displayed</p>
231
+ </object>
232
  <div id="pdf-fallback" style="position:absolute; top:0; left:0; width:100%; height:100%;
233
+ display:flex; align-items:center; justify-content:center; padding:20px; text-align:center; font-family: Arial, sans-serif;">
234
  Click "Load PDFs" to start viewing documents.
235
  </div>
236
  </div>
 
351
  demo.load(
352
  fn=None,
353
  js="""
354
+ function() {
355
+ console.log("Setting up PDF viewer");
 
356
 
357
+ // Store the current blob URL
358
+ var pdfBlobUrl = null;
359
+
360
+ // Function to display PDF from base64 data
361
+ function displayPdfFromBase64(base64Data) {
362
  try {
363
+ if (!base64Data || base64Data.length < 100) {
364
+ console.log("No valid PDF data received");
365
+ document.getElementById('pdf-fallback').style.display = 'flex';
366
+ document.getElementById('pdf-object').style.display = 'none';
367
  return;
368
  }
369
 
370
+ console.log("Displaying PDF from base64 data");
 
 
 
 
 
371
 
372
+ // Clean up previous blob URL
373
+ if (pdfBlobUrl) {
374
+ URL.revokeObjectURL(pdfBlobUrl);
375
+ }
376
 
377
+ // Convert base64 to binary
378
+ const binary = atob(base64Data);
379
+ const bytes = new Uint8Array(binary.length);
380
+ for (let i = 0; i < binary.length; i++) {
381
+ bytes[i] = binary.charCodeAt(i);
382
+ }
 
383
 
384
+ // Create blob and URL
385
+ const blob = new Blob([bytes], {type: 'application/pdf'});
386
+ pdfBlobUrl = URL.createObjectURL(blob);
 
 
 
 
 
387
 
388
+ // Display PDF in the object element
389
+ const pdfObject = document.getElementById('pdf-object');
390
+ const fallback = document.getElementById('pdf-fallback');
391
 
392
+ if (pdfObject && fallback) {
393
+ pdfObject.data = pdfBlobUrl;
394
+ pdfObject.style.display = 'block';
395
+ fallback.style.display = 'none';
396
+ console.log("PDF displayed successfully");
397
+ } else {
398
+ console.error("PDF viewer elements not found");
399
  }
400
  } catch (error) {
401
+ console.error("Error displaying PDF:", error);
402
+ const fallback = document.getElementById('pdf-fallback');
403
+ if (fallback) {
404
+ fallback.innerHTML = '<div style="color:red; font-family: Arial, sans-serif;">Error displaying PDF</div>';
405
+ fallback.style.display = 'flex';
406
+ }
407
  }
408
+ }
409
+
410
+ // Check for PDF data repeatedly
411
+ function checkForPdfData() {
412
+ const dataElement = document.getElementById('pdf_base64_data');
413
+ if (!dataElement) {
414
+ console.log("PDF data element not found, will retry");
415
+ setTimeout(checkForPdfData, 1000);
 
 
 
 
416
  return;
417
  }
418
 
419
+ const textarea = dataElement.querySelector('textarea');
420
+ if (!textarea) {
421
+ console.log("Textarea not found, will retry");
422
+ setTimeout(checkForPdfData, 1000);
423
+ return;
 
 
424
  }
425
 
426
+ // Display initial data if available
427
+ if (textarea.value && textarea.value.length > 100) {
428
+ displayPdfFromBase64(textarea.value);
429
+ }
430
 
431
+ // Set up polling to check for changes
432
+ setInterval(function() {
433
+ if (textarea.value && textarea.value.length > 100) {
434
+ displayPdfFromBase64(textarea.value);
435
+ }
436
+ }, 2000);
437
+ }
438
+
439
+ // Start checking for PDF data
440
+ setTimeout(checkForPdfData, 1000);
441
+
442
+ // Add keyboard shortcuts
443
+ document.addEventListener('keydown', function(event) {
444
+ if (event.target.tagName === 'INPUT' || event.target.tagName === 'TEXTAREA') {
445
+ return;
446
+ }
447
 
448
+ var buttonId = null;
449
+ if (event.key === 'ArrowLeft') buttonId = 'prev_button';
450
+ else if (event.key === 'ArrowRight') buttonId = 'next_button';
451
 
452
+ if (buttonId) {
453
+ var button = document.getElementById(buttonId);
454
+ if (button) {
455
+ event.preventDefault();
456
+ button.click();
457
+ }
458
+ }
459
+ });
460
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
461
  """
462
  )
463