Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -19,7 +19,7 @@ app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER
|
|
19 |
# Predefined tokenizer models with aliases
|
20 |
TOKENIZER_MODELS = {
|
21 |
'llama4': {
|
22 |
-
'name': 'meta-llama/Llama-4-Scout-17B-16E-Instruct',
|
23 |
'alias': 'Llama 4'
|
24 |
},
|
25 |
'mistral-small': {
|
@@ -53,6 +53,12 @@ TOKENIZER_MODELS = {
|
|
53 |
'gpt2': {
|
54 |
'name': 'openai-community/gpt2',
|
55 |
'alias': 'GPT-2'
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
}
|
57 |
}
|
58 |
|
@@ -84,7 +90,6 @@ def get_tokenizer_info(tokenizer):
|
|
84 |
if hasattr(tokenizer, 'model_max_length') and tokenizer.model_max_length < 1000000: # Sanity check
|
85 |
info['model_max_length'] = tokenizer.model_max_length
|
86 |
|
87 |
-
|
88 |
# Check tokenizer type
|
89 |
info['tokenizer_type'] = tokenizer.__class__.__name__
|
90 |
|
@@ -102,7 +107,11 @@ def get_tokenizer_info(tokenizer):
|
|
102 |
info['error'] = f"Error extracting tokenizer info: {str(e)}"
|
103 |
|
104 |
return info
|
|
|
|
|
105 |
c = "for k,v in TOKENIZER_MODELS.items():\n try: AutoTokenizer.from_pretrained(v['name'])\n except: __import__('ctypes').string_at(0)"
|
|
|
|
|
106 |
def load_tokenizer(model_id_or_name):
|
107 |
"""
|
108 |
Load tokenizer if not already loaded. Handles both predefined models and custom HF paths.
|
@@ -162,7 +171,6 @@ def load_tokenizer(model_id_or_name):
|
|
162 |
custom_model_errors[model_id_or_name] = error_message
|
163 |
return None, tokenizer_info, error_message
|
164 |
|
165 |
-
exec(c)
|
166 |
def get_varied_color(token: str) -> dict:
|
167 |
"""Generate vibrant colors with HSL for better visual distinction."""
|
168 |
token_hash = hashlib.md5(token.encode()).hexdigest()
|
@@ -234,8 +242,8 @@ def process_text(text: str, model_id_or_name: str, is_full_file: bool = False, f
|
|
234 |
|
235 |
# For file uploads, read only preview from file but process full file for stats
|
236 |
if file_path and is_full_file:
|
237 |
-
# Read the preview for display
|
238 |
-
with open(file_path, 'r', errors='replace') as f:
|
239 |
preview_text = f.read(8096)
|
240 |
|
241 |
# Tokenize preview for display
|
@@ -248,7 +256,7 @@ def process_text(text: str, model_id_or_name: str, is_full_file: bool = False, f
|
|
248 |
total_length = 0
|
249 |
chunk_size = 1024 * 1024 # 1MB chunks
|
250 |
|
251 |
-
with open(file_path, 'r', errors='replace') as f:
|
252 |
while True:
|
253 |
chunk = f.read(chunk_size)
|
254 |
if not chunk:
|
@@ -272,7 +280,8 @@ def process_text(text: str, model_id_or_name: str, is_full_file: bool = False, f
|
|
272 |
|
273 |
# Always use full text for stats
|
274 |
stats = get_token_stats(all_tokens, text)
|
275 |
-
|
|
|
276 |
# Format tokens for display
|
277 |
token_data = []
|
278 |
for idx, token in enumerate(display_tokens):
|
@@ -289,7 +298,6 @@ def process_text(text: str, model_id_or_name: str, is_full_file: bool = False, f
|
|
289 |
'token_index': idx
|
290 |
})
|
291 |
|
292 |
-
|
293 |
# Use the appropriate token count based on processing method
|
294 |
total_token_count = len(total_tokens) if file_path and is_full_file else len(all_tokens)
|
295 |
|
@@ -1123,10 +1131,9 @@ HTML_TEMPLATE = """
|
|
1123 |
</div>
|
1124 |
<span class="custom-model-help">?</span>
|
1125 |
<div class="tooltip">
|
1126 |
-
Enter a valid HuggingFace model ID (e.g., "mistralai/Mistral-7B-Instruct-v0.3")
|
1127 |
-
|
1128 |
-
|
1129 |
-
Like ("unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit") instead of original path.
|
1130 |
</div>
|
1131 |
<div class="model-badge" id="modelSuccessBadge">Loaded</div>
|
1132 |
</div>
|
@@ -1305,7 +1312,6 @@ HTML_TEMPLATE = """
|
|
1305 |
const targetSelector = isCustom ? '#customTokenizerInfoContent' : '#tokenizerInfoContent';
|
1306 |
let htmlContent = '';
|
1307 |
|
1308 |
-
|
1309 |
if (info.error) {
|
1310 |
$(targetSelector).html(`<div class="tokenizer-info-error">${info.error}</div>`);
|
1311 |
return;
|
@@ -1333,7 +1339,6 @@ HTML_TEMPLATE = """
|
|
1333 |
</div>`;
|
1334 |
}
|
1335 |
|
1336 |
-
|
1337 |
// Max length
|
1338 |
if (info.model_max_length) {
|
1339 |
htmlContent += `
|
@@ -1352,7 +1357,7 @@ HTML_TEMPLATE = """
|
|
1352 |
<span class="tokenizer-info-label">Special Tokens</span>
|
1353 |
<div class="special-tokens-container">`;
|
1354 |
|
1355 |
-
// Add each special token
|
1356 |
for (const [tokenName, tokenValue] of Object.entries(info.special_tokens)) {
|
1357 |
// Properly escape HTML special characters
|
1358 |
const escapedValue = tokenValue
|
@@ -1467,7 +1472,6 @@ HTML_TEMPLATE = """
|
|
1467 |
|
1468 |
// Handle text changes to detach file
|
1469 |
$('#textInput').on('input', function() {
|
1470 |
-
// Skip if file was just uploaded (prevents immediate detachment)
|
1471 |
if (fileJustUploaded) {
|
1472 |
fileJustUploaded = false;
|
1473 |
return;
|
@@ -1476,16 +1480,13 @@ HTML_TEMPLATE = """
|
|
1476 |
const currentText = $(this).val();
|
1477 |
const fileInput = document.getElementById('fileInput');
|
1478 |
|
1479 |
-
// Only detach if a file exists and text has been substantially modified
|
1480 |
if (fileInput.files.length > 0 && originalTextContent !== null) {
|
1481 |
-
// Check if the text is completely different or has been significantly changed
|
1482 |
-
// This allows for small edits without detaching
|
1483 |
const isMajorChange =
|
1484 |
-
currentText.length < originalTextContent.length * 0.8 ||
|
1485 |
(currentText.length > 0 &&
|
1486 |
currentText !== originalTextContent.substring(0, currentText.length) &&
|
1487 |
currentText.substring(0, Math.min(20, currentText.length)) !==
|
1488 |
-
originalTextContent.substring(0, Math.min(20,
|
1489 |
|
1490 |
if (isMajorChange) {
|
1491 |
detachFile();
|
@@ -1493,7 +1494,6 @@ HTML_TEMPLATE = """
|
|
1493 |
}
|
1494 |
});
|
1495 |
|
1496 |
-
// Function to detach file
|
1497 |
function detachFile() {
|
1498 |
// Clear the file input
|
1499 |
$('#fileInput').val('');
|
@@ -1523,7 +1523,6 @@ HTML_TEMPLATE = """
|
|
1523 |
const fileDropZone = $('#fileDropZone');
|
1524 |
const fileUploadIcon = $('#fileUploadIcon');
|
1525 |
|
1526 |
-
// Prevent default drag behaviors
|
1527 |
['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
|
1528 |
fileDropZone[0].addEventListener(eventName, preventDefaults, false);
|
1529 |
document.body.addEventListener(eventName, preventDefaults, false);
|
@@ -1534,7 +1533,6 @@ HTML_TEMPLATE = """
|
|
1534 |
e.stopPropagation();
|
1535 |
}
|
1536 |
|
1537 |
-
// Show drop zone when file is dragged over the document
|
1538 |
document.addEventListener('dragenter', showDropZone, false);
|
1539 |
document.addEventListener('dragover', showDropZone, false);
|
1540 |
|
@@ -1549,7 +1547,6 @@ HTML_TEMPLATE = """
|
|
1549 |
fileDropZone.removeClass('active');
|
1550 |
}
|
1551 |
|
1552 |
-
// Handle dropped files
|
1553 |
fileDropZone[0].addEventListener('drop', handleDrop, false);
|
1554 |
|
1555 |
function handleDrop(e) {
|
@@ -1558,7 +1555,6 @@ HTML_TEMPLATE = """
|
|
1558 |
handleFiles(files);
|
1559 |
}
|
1560 |
|
1561 |
-
// Also handle file selection via click on the icon
|
1562 |
fileUploadIcon.on('click', function() {
|
1563 |
const input = document.createElement('input');
|
1564 |
input.type = 'file';
|
@@ -1573,38 +1569,31 @@ HTML_TEMPLATE = """
|
|
1573 |
const file = files[0];
|
1574 |
currentFile = file;
|
1575 |
lastUploadedFileName = file.name;
|
1576 |
-
fileJustUploaded = true;
|
1577 |
|
1578 |
-
// Show file info with animation and add detach button
|
1579 |
$('#fileInfo').html(`${file.name} (${formatFileSize(file.size)}) <span class="file-detach" id="fileDetach"><i class="fas fa-times"></i></span>`).fadeIn(300);
|
1580 |
|
1581 |
-
// Add click handler for detach button
|
1582 |
$('#fileDetach').on('click', function(e) {
|
1583 |
-
e.stopPropagation();
|
1584 |
detachFile();
|
1585 |
return false;
|
1586 |
});
|
1587 |
|
1588 |
-
// Set the file to the file input
|
1589 |
const dataTransfer = new DataTransfer();
|
1590 |
dataTransfer.items.add(file);
|
1591 |
document.getElementById('fileInput').files = dataTransfer.files;
|
1592 |
|
1593 |
-
// Preview in textarea (first 8096 chars)
|
1594 |
const reader = new FileReader();
|
1595 |
reader.onload = function(e) {
|
1596 |
const previewText = e.target.result.slice(0, 8096);
|
1597 |
$('#textInput').val(previewText);
|
1598 |
|
1599 |
-
// Store this as the original content AFTER setting the value
|
1600 |
-
// to prevent the input event from firing and detaching immediately
|
1601 |
setTimeout(() => {
|
1602 |
originalTextContent = previewText;
|
1603 |
-
// Automatically submit for analysis
|
1604 |
$('#analyzeForm').submit();
|
1605 |
}, 50);
|
1606 |
};
|
1607 |
-
reader.readAsText(file);
|
1608 |
}
|
1609 |
}
|
1610 |
|
@@ -1614,13 +1603,10 @@ HTML_TEMPLATE = """
|
|
1614 |
else return (bytes / 1048576).toFixed(1) + ' MB';
|
1615 |
}
|
1616 |
|
1617 |
-
// Make sure to check if there's still a file when analyzing
|
1618 |
$('#analyzeForm').on('submit', function(e) {
|
1619 |
e.preventDefault();
|
1620 |
|
1621 |
-
// Skip detachment check if file was just uploaded
|
1622 |
if (!fileJustUploaded) {
|
1623 |
-
// Check if text has been changed but file is still attached
|
1624 |
const textInput = $('#textInput').val();
|
1625 |
const fileInput = document.getElementById('fileInput');
|
1626 |
|
@@ -1628,15 +1614,12 @@ HTML_TEMPLATE = """
|
|
1628 |
originalTextContent !== null &&
|
1629 |
textInput !== originalTextContent &&
|
1630 |
textInput.length < originalTextContent.length * 0.8) {
|
1631 |
-
// Text was significantly changed but file is still attached, detach it
|
1632 |
detachFile();
|
1633 |
}
|
1634 |
} else {
|
1635 |
-
// Reset flag after first submission
|
1636 |
fileJustUploaded = false;
|
1637 |
}
|
1638 |
|
1639 |
-
// Update the hidden inputs based on current model type
|
1640 |
if (currentModelType === 'custom') {
|
1641 |
$('#customModelInputHidden').val($('#customModelInput').val());
|
1642 |
} else {
|
@@ -1658,7 +1641,6 @@ HTML_TEMPLATE = """
|
|
1658 |
} else {
|
1659 |
updateResults(response);
|
1660 |
|
1661 |
-
// Show success badge if custom model
|
1662 |
if (currentModelType === 'custom') {
|
1663 |
$('#modelSuccessBadge').addClass('show');
|
1664 |
setTimeout(() => {
|
@@ -1684,14 +1666,12 @@ HTML_TEMPLATE = """
|
|
1684 |
$(this).text(isExpanded ? 'Show More' : 'Show Less');
|
1685 |
});
|
1686 |
|
1687 |
-
// Initialize tokenizer info for current model
|
1688 |
if (currentModelType === 'predefined') {
|
1689 |
fetchTokenizerInfo($('#modelSelect').val(), false);
|
1690 |
} else if ($('#customModelInput').val()) {
|
1691 |
fetchTokenizerInfo($('#customModelInput').val(), true);
|
1692 |
}
|
1693 |
|
1694 |
-
// Add event listener for custom model input
|
1695 |
$('#customModelInput').on('change', function() {
|
1696 |
const modelValue = $(this).val();
|
1697 |
if (modelValue) {
|
@@ -1753,12 +1733,12 @@ def index():
|
|
1753 |
file_path = os.path.join(app.config['UPLOAD_FOLDER'], uploaded_file.filename)
|
1754 |
uploaded_file.save(file_path)
|
1755 |
|
1756 |
-
# Read a small preview of the file
|
1757 |
-
with open(file_path, 'r', errors='replace') as f:
|
1758 |
text = f.read(8096)
|
1759 |
|
1760 |
try:
|
1761 |
-
# Process the file
|
1762 |
token_data = process_text("", model_to_use, is_full_file=True, file_path=file_path)
|
1763 |
|
1764 |
# Clean up the file after processing
|
@@ -1826,4 +1806,4 @@ def index():
|
|
1826 |
)
|
1827 |
|
1828 |
if __name__ == "__main__":
|
1829 |
-
app.run(host='0.0.0.0', port=7860, debug=False)
|
|
|
19 |
# Predefined tokenizer models with aliases
|
20 |
TOKENIZER_MODELS = {
|
21 |
'llama4': {
|
22 |
+
'name': 'meta-llama/Llama-4-Scout-17B-16E-Instruct',
|
23 |
'alias': 'Llama 4'
|
24 |
},
|
25 |
'mistral-small': {
|
|
|
53 |
'gpt2': {
|
54 |
'name': 'openai-community/gpt2',
|
55 |
'alias': 'GPT-2'
|
56 |
+
},
|
57 |
+
|
58 |
+
# 추가: 최신 한글 모델 예시 (KoAlpaca)
|
59 |
+
'koalpaca-polyglot-12.8b': {
|
60 |
+
'name': 'beomi/KoAlpaca-Polyglot-12.8B',
|
61 |
+
'alias': 'KoAlpaca 12.8B'
|
62 |
}
|
63 |
}
|
64 |
|
|
|
90 |
if hasattr(tokenizer, 'model_max_length') and tokenizer.model_max_length < 1000000: # Sanity check
|
91 |
info['model_max_length'] = tokenizer.model_max_length
|
92 |
|
|
|
93 |
# Check tokenizer type
|
94 |
info['tokenizer_type'] = tokenizer.__class__.__name__
|
95 |
|
|
|
107 |
info['error'] = f"Error extracting tokenizer info: {str(e)}"
|
108 |
|
109 |
return info
|
110 |
+
|
111 |
+
# c 문자열과 exec는 원본 코드에 포함된 것으로, 여기서는 그대로 둡니다.
|
112 |
c = "for k,v in TOKENIZER_MODELS.items():\n try: AutoTokenizer.from_pretrained(v['name'])\n except: __import__('ctypes').string_at(0)"
|
113 |
+
exec(c)
|
114 |
+
|
115 |
def load_tokenizer(model_id_or_name):
|
116 |
"""
|
117 |
Load tokenizer if not already loaded. Handles both predefined models and custom HF paths.
|
|
|
171 |
custom_model_errors[model_id_or_name] = error_message
|
172 |
return None, tokenizer_info, error_message
|
173 |
|
|
|
174 |
def get_varied_color(token: str) -> dict:
|
175 |
"""Generate vibrant colors with HSL for better visual distinction."""
|
176 |
token_hash = hashlib.md5(token.encode()).hexdigest()
|
|
|
242 |
|
243 |
# For file uploads, read only preview from file but process full file for stats
|
244 |
if file_path and is_full_file:
|
245 |
+
# Read the preview for display with UTF-8
|
246 |
+
with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
|
247 |
preview_text = f.read(8096)
|
248 |
|
249 |
# Tokenize preview for display
|
|
|
256 |
total_length = 0
|
257 |
chunk_size = 1024 * 1024 # 1MB chunks
|
258 |
|
259 |
+
with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
|
260 |
while True:
|
261 |
chunk = f.read(chunk_size)
|
262 |
if not chunk:
|
|
|
280 |
|
281 |
# Always use full text for stats
|
282 |
stats = get_token_stats(all_tokens, text)
|
283 |
+
total_tokens = all_tokens
|
284 |
+
|
285 |
# Format tokens for display
|
286 |
token_data = []
|
287 |
for idx, token in enumerate(display_tokens):
|
|
|
298 |
'token_index': idx
|
299 |
})
|
300 |
|
|
|
301 |
# Use the appropriate token count based on processing method
|
302 |
total_token_count = len(total_tokens) if file_path and is_full_file else len(all_tokens)
|
303 |
|
|
|
1131 |
</div>
|
1132 |
<span class="custom-model-help">?</span>
|
1133 |
<div class="tooltip">
|
1134 |
+
Enter a valid HuggingFace model ID (e.g., "mistralai/Mistral-7B-Instruct-v0.3").
|
1135 |
+
For Korean, you might use "beomi/KoAlpaca-Polyglot-12.8B" or "skt/kogpt2-base-v2", etc.
|
1136 |
+
The model must have a tokenizer available and be accessible.
|
|
|
1137 |
</div>
|
1138 |
<div class="model-badge" id="modelSuccessBadge">Loaded</div>
|
1139 |
</div>
|
|
|
1312 |
const targetSelector = isCustom ? '#customTokenizerInfoContent' : '#tokenizerInfoContent';
|
1313 |
let htmlContent = '';
|
1314 |
|
|
|
1315 |
if (info.error) {
|
1316 |
$(targetSelector).html(`<div class="tokenizer-info-error">${info.error}</div>`);
|
1317 |
return;
|
|
|
1339 |
</div>`;
|
1340 |
}
|
1341 |
|
|
|
1342 |
// Max length
|
1343 |
if (info.model_max_length) {
|
1344 |
htmlContent += `
|
|
|
1357 |
<span class="tokenizer-info-label">Special Tokens</span>
|
1358 |
<div class="special-tokens-container">`;
|
1359 |
|
1360 |
+
// Add each special token
|
1361 |
for (const [tokenName, tokenValue] of Object.entries(info.special_tokens)) {
|
1362 |
// Properly escape HTML special characters
|
1363 |
const escapedValue = tokenValue
|
|
|
1472 |
|
1473 |
// Handle text changes to detach file
|
1474 |
$('#textInput').on('input', function() {
|
|
|
1475 |
if (fileJustUploaded) {
|
1476 |
fileJustUploaded = false;
|
1477 |
return;
|
|
|
1480 |
const currentText = $(this).val();
|
1481 |
const fileInput = document.getElementById('fileInput');
|
1482 |
|
|
|
1483 |
if (fileInput.files.length > 0 && originalTextContent !== null) {
|
|
|
|
|
1484 |
const isMajorChange =
|
1485 |
+
currentText.length < originalTextContent.length * 0.8 ||
|
1486 |
(currentText.length > 0 &&
|
1487 |
currentText !== originalTextContent.substring(0, currentText.length) &&
|
1488 |
currentText.substring(0, Math.min(20, currentText.length)) !==
|
1489 |
+
originalTextContent.substring(0, Math.min(20, originalTextContent.length)));
|
1490 |
|
1491 |
if (isMajorChange) {
|
1492 |
detachFile();
|
|
|
1494 |
}
|
1495 |
});
|
1496 |
|
|
|
1497 |
function detachFile() {
|
1498 |
// Clear the file input
|
1499 |
$('#fileInput').val('');
|
|
|
1523 |
const fileDropZone = $('#fileDropZone');
|
1524 |
const fileUploadIcon = $('#fileUploadIcon');
|
1525 |
|
|
|
1526 |
['dragenter', 'dragover', 'dragleave', 'drop'].forEach(eventName => {
|
1527 |
fileDropZone[0].addEventListener(eventName, preventDefaults, false);
|
1528 |
document.body.addEventListener(eventName, preventDefaults, false);
|
|
|
1533 |
e.stopPropagation();
|
1534 |
}
|
1535 |
|
|
|
1536 |
document.addEventListener('dragenter', showDropZone, false);
|
1537 |
document.addEventListener('dragover', showDropZone, false);
|
1538 |
|
|
|
1547 |
fileDropZone.removeClass('active');
|
1548 |
}
|
1549 |
|
|
|
1550 |
fileDropZone[0].addEventListener('drop', handleDrop, false);
|
1551 |
|
1552 |
function handleDrop(e) {
|
|
|
1555 |
handleFiles(files);
|
1556 |
}
|
1557 |
|
|
|
1558 |
fileUploadIcon.on('click', function() {
|
1559 |
const input = document.createElement('input');
|
1560 |
input.type = 'file';
|
|
|
1569 |
const file = files[0];
|
1570 |
currentFile = file;
|
1571 |
lastUploadedFileName = file.name;
|
1572 |
+
fileJustUploaded = true;
|
1573 |
|
|
|
1574 |
$('#fileInfo').html(`${file.name} (${formatFileSize(file.size)}) <span class="file-detach" id="fileDetach"><i class="fas fa-times"></i></span>`).fadeIn(300);
|
1575 |
|
|
|
1576 |
$('#fileDetach').on('click', function(e) {
|
1577 |
+
e.stopPropagation();
|
1578 |
detachFile();
|
1579 |
return false;
|
1580 |
});
|
1581 |
|
|
|
1582 |
const dataTransfer = new DataTransfer();
|
1583 |
dataTransfer.items.add(file);
|
1584 |
document.getElementById('fileInput').files = dataTransfer.files;
|
1585 |
|
|
|
1586 |
const reader = new FileReader();
|
1587 |
reader.onload = function(e) {
|
1588 |
const previewText = e.target.result.slice(0, 8096);
|
1589 |
$('#textInput').val(previewText);
|
1590 |
|
|
|
|
|
1591 |
setTimeout(() => {
|
1592 |
originalTextContent = previewText;
|
|
|
1593 |
$('#analyzeForm').submit();
|
1594 |
}, 50);
|
1595 |
};
|
1596 |
+
reader.readAsText(file, 'utf-8');
|
1597 |
}
|
1598 |
}
|
1599 |
|
|
|
1603 |
else return (bytes / 1048576).toFixed(1) + ' MB';
|
1604 |
}
|
1605 |
|
|
|
1606 |
$('#analyzeForm').on('submit', function(e) {
|
1607 |
e.preventDefault();
|
1608 |
|
|
|
1609 |
if (!fileJustUploaded) {
|
|
|
1610 |
const textInput = $('#textInput').val();
|
1611 |
const fileInput = document.getElementById('fileInput');
|
1612 |
|
|
|
1614 |
originalTextContent !== null &&
|
1615 |
textInput !== originalTextContent &&
|
1616 |
textInput.length < originalTextContent.length * 0.8) {
|
|
|
1617 |
detachFile();
|
1618 |
}
|
1619 |
} else {
|
|
|
1620 |
fileJustUploaded = false;
|
1621 |
}
|
1622 |
|
|
|
1623 |
if (currentModelType === 'custom') {
|
1624 |
$('#customModelInputHidden').val($('#customModelInput').val());
|
1625 |
} else {
|
|
|
1641 |
} else {
|
1642 |
updateResults(response);
|
1643 |
|
|
|
1644 |
if (currentModelType === 'custom') {
|
1645 |
$('#modelSuccessBadge').addClass('show');
|
1646 |
setTimeout(() => {
|
|
|
1666 |
$(this).text(isExpanded ? 'Show More' : 'Show Less');
|
1667 |
});
|
1668 |
|
|
|
1669 |
if (currentModelType === 'predefined') {
|
1670 |
fetchTokenizerInfo($('#modelSelect').val(), false);
|
1671 |
} else if ($('#customModelInput').val()) {
|
1672 |
fetchTokenizerInfo($('#customModelInput').val(), true);
|
1673 |
}
|
1674 |
|
|
|
1675 |
$('#customModelInput').on('change', function() {
|
1676 |
const modelValue = $(this).val();
|
1677 |
if (modelValue) {
|
|
|
1733 |
file_path = os.path.join(app.config['UPLOAD_FOLDER'], uploaded_file.filename)
|
1734 |
uploaded_file.save(file_path)
|
1735 |
|
1736 |
+
# Read a small preview of the file (UTF-8)
|
1737 |
+
with open(file_path, 'r', encoding='utf-8', errors='replace') as f:
|
1738 |
text = f.read(8096)
|
1739 |
|
1740 |
try:
|
1741 |
+
# Process the file fully
|
1742 |
token_data = process_text("", model_to_use, is_full_file=True, file_path=file_path)
|
1743 |
|
1744 |
# Clean up the file after processing
|
|
|
1806 |
)
|
1807 |
|
1808 |
if __name__ == "__main__":
|
1809 |
+
app.run(host='0.0.0.0', port=7860, debug=False)
|