sbert-embedder / index.html
UMCU's picture
Add 2 files
e12081b verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>SBERT Embedding Generator</title>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
<style>
:root {
--primary-color: #4361ee;
--secondary-color: #3f37c9;
--accent-color: #4895ef;
--light-color: #f8f9fa;
--dark-color: #212529;
--success-color: #4cc9f0;
--warning-color: #f72585;
--border-radius: 8px;
--box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
--transition: all 0.3s ease;
}
* {
margin: 0;
padding: 0;
box-sizing: border-box;
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
}
body {
background-color: #f0f2f5;
color: var(--dark-color);
line-height: 1.6;
padding: 20px;
}
.container {
max-width: 1000px;
margin: 0 auto;
padding: 20px;
}
header {
text-align: center;
margin-bottom: 30px;
animation: fadeIn 0.5s ease-out;
}
h1 {
color: var(--primary-color);
margin-bottom: 10px;
font-size: 2.5rem;
}
.subtitle {
color: #6c757d;
font-size: 1.1rem;
}
.app-container {
background-color: white;
border-radius: var(--border-radius);
box-shadow: var(--box-shadow);
overflow: hidden;
transition: var(--transition);
animation: slideUp 0.5s ease-out;
}
.input-section, .output-section {
padding: 25px;
}
.input-section {
background-color: var(--light-color);
border-bottom: 1px solid #e9ecef;
}
.output-section {
display: none;
}
.form-group {
margin-bottom: 20px;
}
label {
display: block;
margin-bottom: 8px;
font-weight: 600;
color: var(--dark-color);
}
select, textarea {
width: 100%;
padding: 12px;
border: 1px solid #ced4da;
border-radius: var(--border-radius);
font-size: 16px;
transition: var(--transition);
}
select:focus, textarea:focus {
outline: none;
border-color: var(--accent-color);
box-shadow: 0 0 0 3px rgba(72, 149, 239, 0.25);
}
textarea {
min-height: 120px;
resize: vertical;
}
.btn {
display: inline-block;
background-color: var(--primary-color);
color: white;
border: none;
border-radius: var(--border-radius);
padding: 12px 24px;
font-size: 16px;
cursor: pointer;
transition: var(--transition);
font-weight: 600;
}
.btn:hover {
background-color: var(--secondary-color);
transform: translateY(-2px);
}
.btn:disabled {
background-color: #adb5bd;
cursor: not-allowed;
transform: none;
}
.btn-group {
display: flex;
gap: 10px;
margin-top: 20px;
}
.secondary-btn {
background-color: #6c757d;
}
.secondary-btn:hover {
background-color: #5a6268;
}
.model-info {
background-color: #e9ecef;
padding: 15px;
border-radius: var(--border-radius);
margin-top: 15px;
font-size: 14px;
color: #495057;
}
.output-container {
margin-top: 20px;
}
.embedding-preview {
background-color: #f8f9fa;
border: 1px solid #e9ecef;
border-radius: var(--border-radius);
padding: 15px;
max-height: 300px;
overflow-y: auto;
font-family: 'Courier New', Courier, monospace;
font-size: 14px;
line-height: 1.5;
position: relative;
}
.copy-btn {
position: absolute;
top: 10px;
right: 10px;
background-color: rgba(255, 255, 255, 0.8);
border: none;
border-radius: 4px;
padding: 5px 10px;
cursor: pointer;
transition: var(--transition);
}
.copy-btn:hover {
background-color: white;
}
.stats {
display: flex;
gap: 20px;
margin-top: 15px;
font-size: 14px;
color: #6c757d;
}
.stat-item {
display: flex;
align-items: center;
gap: 5px;
}
.loading {
display: none;
text-align: center;
margin: 30px 0;
}
.spinner {
width: 50px;
height: 50px;
border: 5px solid rgba(67, 97, 238, 0.2);
border-radius: 50%;
border-top-color: var(--primary-color);
animation: spin 1s linear infinite;
margin: 0 auto 15px;
}
@keyframes spin {
to { transform: rotate(360deg); }
}
@keyframes fadeIn {
from { opacity: 0; }
to { opacity: 1; }
}
@keyframes slideUp {
from {
opacity: 0;
transform: translateY(20px);
}
to {
opacity: 1;
transform: translateY(0);
}
}
.dimension-pill {
display: inline-block;
background-color: var(--accent-color);
color: white;
padding: 3px 8px;
border-radius: 20px;
font-size: 12px;
margin-right: 5px;
margin-bottom: 5px;
}
.toast {
position: fixed;
top: 20px;
right: 20px;
background-color: var(--success-color);
color: white;
padding: 12px 20px;
border-radius: var(--border-radius);
box-shadow: var(--box-shadow);
transform: translateX(200%);
transition: transform 0.3s ease;
z-index: 1000;
}
.toast.show {
transform: translateX(0);
}
.toast.error {
background-color: var(--warning-color);
}
footer {
text-align: center;
margin-top: 30px;
color: #6c757d;
font-size: 14px;
}
@media (max-width: 768px) {
.container {
padding: 10px;
}
.input-section, .output-section {
padding: 15px;
}
.btn-group {
flex-direction: column;
}
.stats {
flex-direction: column;
gap: 10px;
}
}
</style>
</head>
<body>
<div class="container">
<header>
<h1>SBERT Embedding Generator</h1>
<p class="subtitle">Generate sentence embeddings using pre-trained SBERT models</p>
</header>
<div class="app-container">
<div class="input-section">
<div class="form-group">
<label for="model-select">Select SBERT Model</label>
<select id="model-select">
<option value="all-MiniLM-L6-v2">all-MiniLM-L6-v2 (384 dimensions, recommended for general use)</option>
<option value="all-mpnet-base-v2">all-mpnet-base-v2 (768 dimensions, highest quality)</option>
<option value="multi-qa-mpnet-base-dot-v1">multi-qa-mpnet-base-dot-v1 (768 dimensions, optimized for semantic search)</option>
<option value="paraphrase-multilingual-MiniLM-L12-v2">paraphrase-multilingual-MiniLM-L12-v2 (384 dimensions, supports 50+ languages)</option>
</select>
</div>
<div class="form-group">
<label for="sentence-input">Enter Your Sentence</label>
<textarea id="sentence-input" placeholder="Type or paste your text here to generate embeddings..."></textarea>
</div>
<div class="model-info">
<i class="fas fa-info-circle"></i> The selected model will process your text locally in the browser using TensorFlow.js. No data is sent to external servers.
</div>
<div class="btn-group">
<button id="generate-btn" class="btn">
<i class="fas fa-cog"></i> Generate Embeddings
</button>
<button id="clear-btn" class="btn secondary-btn">
<i class="fas fa-trash-alt"></i> Clear
</button>
</div>
</div>
<div class="loading">
<div class="spinner"></div>
<p>Generating embeddings...</p>
<p id="loading-details">Loading model and processing text</p>
</div>
<div class="output-section">
<h2>Generated Embeddings</h2>
<div class="stats">
<div class="stat-item">
<i class="fas fa-layer-group"></i>
<span>Dimensions: <span id="dimension-count">0</span></span>
</div>
<div class="stat-item">
<i class="fas fa-calculator"></i>
<span>Vector Length: <span id="vector-length">0</span></span>
</div>
<div class="stat-item">
<i class="fas fa-memory"></i>
<span>Processing Time: <span id="process-time">0</span> ms</span>
</div>
</div>
<div class="output-container">
<label>Embedding Vector Preview</label>
<div class="embedding-preview" id="embedding-output">
<button class="copy-btn" id="copy-btn">
<i class="fas fa-copy"></i> Copy
</button>
<div id="embedding-text">No embeddings generated yet.</div>
</div>
</div>
<div class="btn-group">
<button id="download-btn" class="btn">
<i class="fas fa-download"></i> Download as JSON
</button>
<button id="new-embedding-btn" class="btn secondary-btn">
<i class="fas fa-plus"></i> New Embedding
</button>
</div>
</div>
</div>
<div class="toast" id="toast">
Embeddings copied to clipboard!
</div>
<footer>
<p>This demo uses TensorFlow.js to run SBERT models in your browser. For production use, consider using a backend service.</p>
</footer>
</div>
<!-- Load TensorFlow.js and Universal Sentence Encoder -->
<script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs"></script>
<script src="https://cdn.jsdelivr.net/npm/@tensorflow-models/universal-sentence-encoder"></script>
<script>
// DOM elements
const modelSelect = document.getElementById('model-select');
const sentenceInput = document.getElementById('sentence-input');
const generateBtn = document.getElementById('generate-btn');
const clearBtn = document.getElementById('clear-btn');
const outputSection = document.querySelector('.output-section');
const loadingSection = document.querySelector('.loading');
const embeddingOutput = document.getElementById('embedding-text');
const dimensionCount = document.getElementById('dimension-count');
const vectorLength = document.getElementById('vector-length');
const processTime = document.getElementById('process-time');
const copyBtn = document.getElementById('copy-btn');
const downloadBtn = document.getElementById('download-btn');
const newEmbeddingBtn = document.getElementById('new-embedding-btn');
const toast = document.getElementById('toast');
const loadingDetails = document.getElementById('loading-details');
// Variables
let model = null;
let embeddings = null;
// Event listeners
generateBtn.addEventListener('click', generateEmbeddings);
clearBtn.addEventListener('click', clearInput);
copyBtn.addEventListener('click', copyEmbeddings);
downloadBtn.addEventListener('click', downloadEmbeddings);
newEmbeddingBtn.addEventListener('click', newEmbedding);
// Initialize
checkInput();
// Functions
function checkInput() {
generateBtn.disabled = sentenceInput.value.trim() === '';
}
function showLoading(message) {
loadingDetails.textContent = message;
loadingSection.style.display = 'block';
generateBtn.disabled = true;
}
function hideLoading() {
loadingSection.style.display = 'none';
generateBtn.disabled = false;
}
function showToast(message, isError = false) {
toast.textContent = message;
toast.className = isError ? 'toast error show' : 'toast show';
setTimeout(() => {
toast.className = 'toast';
}, 3000);
}
async function loadModel(modelName) {
try {
showLoading(`Loading ${modelName} model...`);
// Note: This is a placeholder for actual SBERT model loading
// In a real implementation, you would need to:
// 1. Host SBERT models converted to TFJS format
// 2. Properly load them using tf.loadGraphModel()
// Simulate model loading delay
await new Promise(resolve => setTimeout(resolve, 1500));
// For demo purposes, we'll use Universal Sentence Encoder
// In production, you would replace this with actual SBERT models
model = await use.load();
return model;
} catch (error) {
console.error('Model loading error:', error);
showToast('Error loading model. Please try again.', true);
throw error;
}
}
async function generateEmbeddings() {
try {
const sentence = sentenceInput.value.trim();
if (!sentence) return;
const modelName = modelSelect.value;
// Load model if not already loaded
if (!model) {
model = await loadModel(modelName);
}
showLoading('Generating embeddings...');
const startTime = performance.now();
// Generate embeddings - using USE as a placeholder
const embeddings = await model.embed([sentence]);
const embeddingArray = await embeddings.array();
const embeddingVector = embeddingArray[0];
const endTime = performance.now();
const elapsedTime = (endTime - startTime).toFixed(2);
// Display results
displayEmbeddings(embeddingVector, elapsedTime, modelName);
} catch (error) {
console.error('Embedding generation error:', error);
showToast('Error generating embeddings. Please try again.', true);
} finally {
hideLoading();
}
}
function displayEmbeddings(vector, elapsedTime, modelName) {
// Show output section
outputSection.style.display = 'block';
// Calculate vector length
const length = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0)).toFixed(4);
// Update stats
dimensionCount.textContent = vector.length;
vectorLength.textContent = length;
processTime.textContent = elapsedTime;
// Truncate the display
const previewLength = 100;
const fullVector = JSON.stringify(vector, null, 2);
const previewText = vector.length > previewLength
? fullVector.substring(0, 1000) + `\n... [truncated, showing first ${previewLength} of ${vector.length} dimensions]`
: fullVector;
embeddingOutput.innerHTML = `
<p><strong>Model:</strong> ${modelName}</p>
<pre>${previewText}</pre>
<div class="dimensions-container">
${vector.slice(0, 10).map((_, i) => `<span class="dimension-pill">${i}: ${vector[i].toFixed(6)}</span>`).join('')}
</div>
`;
// Store the full embeddings for download/copy
embeddings = {
model: modelName,
sentence: sentenceInput.value.trim(),
embeddings: vector,
dimension: vector.length,
vector_length: parseFloat(length),
processing_time_ms: parseFloat(elapsedTime),
timestamp: new Date().toISOString()
};
}
function clearInput() {
sentenceInput.value = '';
checkInput();
}
function copyEmbeddings() {
if (!embeddings) return;
navigator.clipboard.writeText(JSON.stringify(embeddings, null, 2))
.then(() => showToast('Embeddings copied to clipboard!'))
.catch(err => {
console.error('Copy failed:', err);
showToast('Failed to copy. Please try again.', true);
});
}
function downloadEmbeddings() {
if (!embeddings) return;
const dataStr = JSON.stringify(embeddings, null, 2);
const dataUri = 'data:application/json;charset=utf-8,' + encodeURIComponent(dataStr);
const exportName = `sbert_embedding_${new Date().toISOString().slice(0, 10)}.json`;
const linkElement = document.createElement('a');
linkElement.setAttribute('href', dataUri);
linkElement.setAttribute('download', exportName);
linkElement.click();
}
function newEmbedding() {
outputSection.style.display = 'none';
sentenceInput.value = '';
checkInput();
}
// Input validation
sentenceInput.addEventListener('input', checkInput);
</script>
<p style="border-radius: 8px; text-align: center; font-size: 12px; color: #fff; margin-top: 16px;position: fixed; left: 8px; bottom: 8px; z-index: 10; background: rgba(0, 0, 0, 0.8); padding: 4px 8px;">Made with <a href="https://enzostvs-deepsite.hf.space" style="color: #fff;" target="_blank" >DeepSite</a> <img src="https://enzostvs-deepsite.hf.space/logo.svg" alt="DeepSite Logo" style="width: 16px; height: 16px; vertical-align: middle;"></p></body>
</html>