push up
Browse files- extractor_compare.py +89 -85
extractor_compare.py
CHANGED
@@ -226,10 +226,11 @@ def create_interface():
|
|
226 |
font-family: 'Local Arial', sans-serif;
|
227 |
}
|
228 |
</style>
|
229 |
-
<
|
230 |
-
|
|
|
231 |
<div id="pdf-fallback" style="position:absolute; top:0; left:0; width:100%; height:100%;
|
232 |
-
display:flex; align-items:center; justify-content:center; padding:20px; text-align:center;">
|
233 |
Click "Load PDFs" to start viewing documents.
|
234 |
</div>
|
235 |
</div>
|
@@ -350,110 +351,113 @@ def create_interface():
|
|
350 |
demo.load(
|
351 |
fn=None,
|
352 |
js="""
|
353 |
-
|
354 |
-
|
355 |
-
console.log('Setting up PDF data observer...');
|
356 |
|
357 |
-
//
|
358 |
-
|
|
|
|
|
|
|
359 |
try {
|
360 |
-
|
361 |
-
|
362 |
-
|
|
|
363 |
return;
|
364 |
}
|
365 |
|
366 |
-
|
367 |
-
const hiddenTextArea = targetNode.querySelector('textarea');
|
368 |
-
if (!hiddenTextArea) {
|
369 |
-
console.error('Hidden textarea not found within the container!');
|
370 |
-
return;
|
371 |
-
}
|
372 |
|
373 |
-
|
|
|
|
|
|
|
374 |
|
375 |
-
//
|
376 |
-
const
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
};
|
382 |
|
383 |
-
// Create and
|
384 |
-
const
|
385 |
-
|
386 |
-
if (hiddenTextArea.value && hiddenTextArea.value.length > 100) {
|
387 |
-
console.log('Valid value found in textarea, displaying PDF');
|
388 |
-
displayPdfBlob(hiddenTextArea.value);
|
389 |
-
}
|
390 |
-
});
|
391 |
|
392 |
-
//
|
393 |
-
|
394 |
-
|
395 |
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
|
|
|
|
|
|
400 |
}
|
401 |
} catch (error) {
|
402 |
-
console.error(
|
|
|
|
|
|
|
|
|
|
|
403 |
}
|
404 |
-
}
|
405 |
-
|
406 |
-
|
407 |
-
|
408 |
-
|
409 |
-
|
410 |
-
|
411 |
-
|
412 |
-
const fallback = document.getElementById('pdf-fallback');
|
413 |
-
|
414 |
-
if (!iframe || !fallback) {
|
415 |
-
console.error('PDF viewer elements not found');
|
416 |
return;
|
417 |
}
|
418 |
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
for (let i = 0; i < len; i++) {
|
425 |
-
bytes[i] = binaryString.charCodeAt(i);
|
426 |
}
|
427 |
|
428 |
-
//
|
429 |
-
|
430 |
-
|
|
|
431 |
|
432 |
-
//
|
433 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
434 |
|
435 |
-
|
436 |
-
|
|
|
437 |
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
|
|
|
|
|
|
|
443 |
}
|
444 |
-
|
445 |
-
// Initialize the observer after everything is loaded
|
446 |
-
window.addEventListener('load', function() {
|
447 |
-
console.log('Window loaded, initializing PDF observer...');
|
448 |
-
setupPdfDataObserver();
|
449 |
-
});
|
450 |
-
|
451 |
-
// Also setup when Gradio mounts the component
|
452 |
-
document.addEventListener('DOMContentLoaded', function() {
|
453 |
-
console.log('DOM loaded, waiting for Gradio components...');
|
454 |
-
// Wait a bit longer for Gradio components to mount
|
455 |
-
setTimeout(setupPdfDataObserver, 2000);
|
456 |
-
});
|
457 |
"""
|
458 |
)
|
459 |
|
|
|
226 |
font-family: 'Local Arial', sans-serif;
|
227 |
}
|
228 |
</style>
|
229 |
+
<object id="pdf-object" type="application/pdf" width="100%" height="100%" style="display:none;">
|
230 |
+
<p>PDF cannot be displayed</p>
|
231 |
+
</object>
|
232 |
<div id="pdf-fallback" style="position:absolute; top:0; left:0; width:100%; height:100%;
|
233 |
+
display:flex; align-items:center; justify-content:center; padding:20px; text-align:center; font-family: Arial, sans-serif;">
|
234 |
Click "Load PDFs" to start viewing documents.
|
235 |
</div>
|
236 |
</div>
|
|
|
351 |
demo.load(
|
352 |
fn=None,
|
353 |
js="""
|
354 |
+
function() {
|
355 |
+
console.log("Setting up PDF viewer");
|
|
|
356 |
|
357 |
+
// Store the current blob URL
|
358 |
+
var pdfBlobUrl = null;
|
359 |
+
|
360 |
+
// Function to display PDF from base64 data
|
361 |
+
function displayPdfFromBase64(base64Data) {
|
362 |
try {
|
363 |
+
if (!base64Data || base64Data.length < 100) {
|
364 |
+
console.log("No valid PDF data received");
|
365 |
+
document.getElementById('pdf-fallback').style.display = 'flex';
|
366 |
+
document.getElementById('pdf-object').style.display = 'none';
|
367 |
return;
|
368 |
}
|
369 |
|
370 |
+
console.log("Displaying PDF from base64 data");
|
|
|
|
|
|
|
|
|
|
|
371 |
|
372 |
+
// Clean up previous blob URL
|
373 |
+
if (pdfBlobUrl) {
|
374 |
+
URL.revokeObjectURL(pdfBlobUrl);
|
375 |
+
}
|
376 |
|
377 |
+
// Convert base64 to binary
|
378 |
+
const binary = atob(base64Data);
|
379 |
+
const bytes = new Uint8Array(binary.length);
|
380 |
+
for (let i = 0; i < binary.length; i++) {
|
381 |
+
bytes[i] = binary.charCodeAt(i);
|
382 |
+
}
|
|
|
383 |
|
384 |
+
// Create blob and URL
|
385 |
+
const blob = new Blob([bytes], {type: 'application/pdf'});
|
386 |
+
pdfBlobUrl = URL.createObjectURL(blob);
|
|
|
|
|
|
|
|
|
|
|
387 |
|
388 |
+
// Display PDF in the object element
|
389 |
+
const pdfObject = document.getElementById('pdf-object');
|
390 |
+
const fallback = document.getElementById('pdf-fallback');
|
391 |
|
392 |
+
if (pdfObject && fallback) {
|
393 |
+
pdfObject.data = pdfBlobUrl;
|
394 |
+
pdfObject.style.display = 'block';
|
395 |
+
fallback.style.display = 'none';
|
396 |
+
console.log("PDF displayed successfully");
|
397 |
+
} else {
|
398 |
+
console.error("PDF viewer elements not found");
|
399 |
}
|
400 |
} catch (error) {
|
401 |
+
console.error("Error displaying PDF:", error);
|
402 |
+
const fallback = document.getElementById('pdf-fallback');
|
403 |
+
if (fallback) {
|
404 |
+
fallback.innerHTML = '<div style="color:red; font-family: Arial, sans-serif;">Error displaying PDF</div>';
|
405 |
+
fallback.style.display = 'flex';
|
406 |
+
}
|
407 |
}
|
408 |
+
}
|
409 |
+
|
410 |
+
// Check for PDF data repeatedly
|
411 |
+
function checkForPdfData() {
|
412 |
+
const dataElement = document.getElementById('pdf_base64_data');
|
413 |
+
if (!dataElement) {
|
414 |
+
console.log("PDF data element not found, will retry");
|
415 |
+
setTimeout(checkForPdfData, 1000);
|
|
|
|
|
|
|
|
|
416 |
return;
|
417 |
}
|
418 |
|
419 |
+
const textarea = dataElement.querySelector('textarea');
|
420 |
+
if (!textarea) {
|
421 |
+
console.log("Textarea not found, will retry");
|
422 |
+
setTimeout(checkForPdfData, 1000);
|
423 |
+
return;
|
|
|
|
|
424 |
}
|
425 |
|
426 |
+
// Display initial data if available
|
427 |
+
if (textarea.value && textarea.value.length > 100) {
|
428 |
+
displayPdfFromBase64(textarea.value);
|
429 |
+
}
|
430 |
|
431 |
+
// Set up polling to check for changes
|
432 |
+
setInterval(function() {
|
433 |
+
if (textarea.value && textarea.value.length > 100) {
|
434 |
+
displayPdfFromBase64(textarea.value);
|
435 |
+
}
|
436 |
+
}, 2000);
|
437 |
+
}
|
438 |
+
|
439 |
+
// Start checking for PDF data
|
440 |
+
setTimeout(checkForPdfData, 1000);
|
441 |
+
|
442 |
+
// Add keyboard shortcuts
|
443 |
+
document.addEventListener('keydown', function(event) {
|
444 |
+
if (event.target.tagName === 'INPUT' || event.target.tagName === 'TEXTAREA') {
|
445 |
+
return;
|
446 |
+
}
|
447 |
|
448 |
+
var buttonId = null;
|
449 |
+
if (event.key === 'ArrowLeft') buttonId = 'prev_button';
|
450 |
+
else if (event.key === 'ArrowRight') buttonId = 'next_button';
|
451 |
|
452 |
+
if (buttonId) {
|
453 |
+
var button = document.getElementById(buttonId);
|
454 |
+
if (button) {
|
455 |
+
event.preventDefault();
|
456 |
+
button.click();
|
457 |
+
}
|
458 |
+
}
|
459 |
+
});
|
460 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
461 |
"""
|
462 |
)
|
463 |
|