UMCU commited on
Commit
e12081b
·
verified ·
1 Parent(s): 9111f41

Add 2 files

Browse files
Files changed (2) hide show
  1. README.md +6 -4
  2. index.html +592 -19
README.md CHANGED
@@ -1,10 +1,12 @@
1
  ---
2
- title: Sbert Embedder
3
- emoji: 👀
4
  colorFrom: blue
5
- colorTo: gray
6
  sdk: static
7
  pinned: false
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
1
  ---
2
+ title: sbert-embedder
3
+ emoji: 🐳
4
  colorFrom: blue
5
+ colorTo: blue
6
  sdk: static
7
  pinned: false
8
+ tags:
9
+ - deepsite
10
  ---
11
 
12
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
index.html CHANGED
@@ -1,19 +1,592 @@
1
- <!doctype html>
2
- <html>
3
- <head>
4
- <meta charset="utf-8" />
5
- <meta name="viewport" content="width=device-width" />
6
- <title>My static Space</title>
7
- <link rel="stylesheet" href="style.css" />
8
- </head>
9
- <body>
10
- <div class="card">
11
- <h1>Welcome to your static Space!</h1>
12
- <p>You can modify this app directly by editing <i>index.html</i> in the Files and versions tab.</p>
13
- <p>
14
- Also don't forget to check the
15
- <a href="https://huggingface.co/docs/hub/spaces" target="_blank">Spaces documentation</a>.
16
- </p>
17
- </div>
18
- </body>
19
- </html>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="en">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>SBERT Embedding Generator</title>
7
+ <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.4.0/css/all.min.css">
8
+ <style>
9
+ :root {
10
+ --primary-color: #4361ee;
11
+ --secondary-color: #3f37c9;
12
+ --accent-color: #4895ef;
13
+ --light-color: #f8f9fa;
14
+ --dark-color: #212529;
15
+ --success-color: #4cc9f0;
16
+ --warning-color: #f72585;
17
+ --border-radius: 8px;
18
+ --box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1);
19
+ --transition: all 0.3s ease;
20
+ }
21
+
22
+ * {
23
+ margin: 0;
24
+ padding: 0;
25
+ box-sizing: border-box;
26
+ font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
27
+ }
28
+
29
+ body {
30
+ background-color: #f0f2f5;
31
+ color: var(--dark-color);
32
+ line-height: 1.6;
33
+ padding: 20px;
34
+ }
35
+
36
+ .container {
37
+ max-width: 1000px;
38
+ margin: 0 auto;
39
+ padding: 20px;
40
+ }
41
+
42
+ header {
43
+ text-align: center;
44
+ margin-bottom: 30px;
45
+ animation: fadeIn 0.5s ease-out;
46
+ }
47
+
48
+ h1 {
49
+ color: var(--primary-color);
50
+ margin-bottom: 10px;
51
+ font-size: 2.5rem;
52
+ }
53
+
54
+ .subtitle {
55
+ color: #6c757d;
56
+ font-size: 1.1rem;
57
+ }
58
+
59
+ .app-container {
60
+ background-color: white;
61
+ border-radius: var(--border-radius);
62
+ box-shadow: var(--box-shadow);
63
+ overflow: hidden;
64
+ transition: var(--transition);
65
+ animation: slideUp 0.5s ease-out;
66
+ }
67
+
68
+ .input-section, .output-section {
69
+ padding: 25px;
70
+ }
71
+
72
+ .input-section {
73
+ background-color: var(--light-color);
74
+ border-bottom: 1px solid #e9ecef;
75
+ }
76
+
77
+ .output-section {
78
+ display: none;
79
+ }
80
+
81
+ .form-group {
82
+ margin-bottom: 20px;
83
+ }
84
+
85
+ label {
86
+ display: block;
87
+ margin-bottom: 8px;
88
+ font-weight: 600;
89
+ color: var(--dark-color);
90
+ }
91
+
92
+ select, textarea {
93
+ width: 100%;
94
+ padding: 12px;
95
+ border: 1px solid #ced4da;
96
+ border-radius: var(--border-radius);
97
+ font-size: 16px;
98
+ transition: var(--transition);
99
+ }
100
+
101
+ select:focus, textarea:focus {
102
+ outline: none;
103
+ border-color: var(--accent-color);
104
+ box-shadow: 0 0 0 3px rgba(72, 149, 239, 0.25);
105
+ }
106
+
107
+ textarea {
108
+ min-height: 120px;
109
+ resize: vertical;
110
+ }
111
+
112
+ .btn {
113
+ display: inline-block;
114
+ background-color: var(--primary-color);
115
+ color: white;
116
+ border: none;
117
+ border-radius: var(--border-radius);
118
+ padding: 12px 24px;
119
+ font-size: 16px;
120
+ cursor: pointer;
121
+ transition: var(--transition);
122
+ font-weight: 600;
123
+ }
124
+
125
+ .btn:hover {
126
+ background-color: var(--secondary-color);
127
+ transform: translateY(-2px);
128
+ }
129
+
130
+ .btn:disabled {
131
+ background-color: #adb5bd;
132
+ cursor: not-allowed;
133
+ transform: none;
134
+ }
135
+
136
+ .btn-group {
137
+ display: flex;
138
+ gap: 10px;
139
+ margin-top: 20px;
140
+ }
141
+
142
+ .secondary-btn {
143
+ background-color: #6c757d;
144
+ }
145
+
146
+ .secondary-btn:hover {
147
+ background-color: #5a6268;
148
+ }
149
+
150
+ .model-info {
151
+ background-color: #e9ecef;
152
+ padding: 15px;
153
+ border-radius: var(--border-radius);
154
+ margin-top: 15px;
155
+ font-size: 14px;
156
+ color: #495057;
157
+ }
158
+
159
+ .output-container {
160
+ margin-top: 20px;
161
+ }
162
+
163
+ .embedding-preview {
164
+ background-color: #f8f9fa;
165
+ border: 1px solid #e9ecef;
166
+ border-radius: var(--border-radius);
167
+ padding: 15px;
168
+ max-height: 300px;
169
+ overflow-y: auto;
170
+ font-family: 'Courier New', Courier, monospace;
171
+ font-size: 14px;
172
+ line-height: 1.5;
173
+ position: relative;
174
+ }
175
+
176
+ .copy-btn {
177
+ position: absolute;
178
+ top: 10px;
179
+ right: 10px;
180
+ background-color: rgba(255, 255, 255, 0.8);
181
+ border: none;
182
+ border-radius: 4px;
183
+ padding: 5px 10px;
184
+ cursor: pointer;
185
+ transition: var(--transition);
186
+ }
187
+
188
+ .copy-btn:hover {
189
+ background-color: white;
190
+ }
191
+
192
+ .stats {
193
+ display: flex;
194
+ gap: 20px;
195
+ margin-top: 15px;
196
+ font-size: 14px;
197
+ color: #6c757d;
198
+ }
199
+
200
+ .stat-item {
201
+ display: flex;
202
+ align-items: center;
203
+ gap: 5px;
204
+ }
205
+
206
+ .loading {
207
+ display: none;
208
+ text-align: center;
209
+ margin: 30px 0;
210
+ }
211
+
212
+ .spinner {
213
+ width: 50px;
214
+ height: 50px;
215
+ border: 5px solid rgba(67, 97, 238, 0.2);
216
+ border-radius: 50%;
217
+ border-top-color: var(--primary-color);
218
+ animation: spin 1s linear infinite;
219
+ margin: 0 auto 15px;
220
+ }
221
+
222
+ @keyframes spin {
223
+ to { transform: rotate(360deg); }
224
+ }
225
+
226
+ @keyframes fadeIn {
227
+ from { opacity: 0; }
228
+ to { opacity: 1; }
229
+ }
230
+
231
+ @keyframes slideUp {
232
+ from {
233
+ opacity: 0;
234
+ transform: translateY(20px);
235
+ }
236
+ to {
237
+ opacity: 1;
238
+ transform: translateY(0);
239
+ }
240
+ }
241
+
242
+ .dimension-pill {
243
+ display: inline-block;
244
+ background-color: var(--accent-color);
245
+ color: white;
246
+ padding: 3px 8px;
247
+ border-radius: 20px;
248
+ font-size: 12px;
249
+ margin-right: 5px;
250
+ margin-bottom: 5px;
251
+ }
252
+
253
+ .toast {
254
+ position: fixed;
255
+ top: 20px;
256
+ right: 20px;
257
+ background-color: var(--success-color);
258
+ color: white;
259
+ padding: 12px 20px;
260
+ border-radius: var(--border-radius);
261
+ box-shadow: var(--box-shadow);
262
+ transform: translateX(200%);
263
+ transition: transform 0.3s ease;
264
+ z-index: 1000;
265
+ }
266
+
267
+ .toast.show {
268
+ transform: translateX(0);
269
+ }
270
+
271
+ .toast.error {
272
+ background-color: var(--warning-color);
273
+ }
274
+
275
+ footer {
276
+ text-align: center;
277
+ margin-top: 30px;
278
+ color: #6c757d;
279
+ font-size: 14px;
280
+ }
281
+
282
+ @media (max-width: 768px) {
283
+ .container {
284
+ padding: 10px;
285
+ }
286
+
287
+ .input-section, .output-section {
288
+ padding: 15px;
289
+ }
290
+
291
+ .btn-group {
292
+ flex-direction: column;
293
+ }
294
+
295
+ .stats {
296
+ flex-direction: column;
297
+ gap: 10px;
298
+ }
299
+ }
300
+ </style>
301
+ </head>
302
+ <body>
303
+ <div class="container">
304
+ <header>
305
+ <h1>SBERT Embedding Generator</h1>
306
+ <p class="subtitle">Generate sentence embeddings using pre-trained SBERT models</p>
307
+ </header>
308
+
309
+ <div class="app-container">
310
+ <div class="input-section">
311
+ <div class="form-group">
312
+ <label for="model-select">Select SBERT Model</label>
313
+ <select id="model-select">
314
+ <option value="all-MiniLM-L6-v2">all-MiniLM-L6-v2 (384 dimensions, recommended for general use)</option>
315
+ <option value="all-mpnet-base-v2">all-mpnet-base-v2 (768 dimensions, highest quality)</option>
316
+ <option value="multi-qa-mpnet-base-dot-v1">multi-qa-mpnet-base-dot-v1 (768 dimensions, optimized for semantic search)</option>
317
+ <option value="paraphrase-multilingual-MiniLM-L12-v2">paraphrase-multilingual-MiniLM-L12-v2 (384 dimensions, supports 50+ languages)</option>
318
+ </select>
319
+ </div>
320
+
321
+ <div class="form-group">
322
+ <label for="sentence-input">Enter Your Sentence</label>
323
+ <textarea id="sentence-input" placeholder="Type or paste your text here to generate embeddings..."></textarea>
324
+ </div>
325
+
326
+ <div class="model-info">
327
+ <i class="fas fa-info-circle"></i> The selected model will process your text locally in the browser using TensorFlow.js. No data is sent to external servers.
328
+ </div>
329
+
330
+ <div class="btn-group">
331
+ <button id="generate-btn" class="btn">
332
+ <i class="fas fa-cog"></i> Generate Embeddings
333
+ </button>
334
+ <button id="clear-btn" class="btn secondary-btn">
335
+ <i class="fas fa-trash-alt"></i> Clear
336
+ </button>
337
+ </div>
338
+ </div>
339
+
340
+ <div class="loading">
341
+ <div class="spinner"></div>
342
+ <p>Generating embeddings...</p>
343
+ <p id="loading-details">Loading model and processing text</p>
344
+ </div>
345
+
346
+ <div class="output-section">
347
+ <h2>Generated Embeddings</h2>
348
+ <div class="stats">
349
+ <div class="stat-item">
350
+ <i class="fas fa-layer-group"></i>
351
+ <span>Dimensions: <span id="dimension-count">0</span></span>
352
+ </div>
353
+ <div class="stat-item">
354
+ <i class="fas fa-calculator"></i>
355
+ <span>Vector Length: <span id="vector-length">0</span></span>
356
+ </div>
357
+ <div class="stat-item">
358
+ <i class="fas fa-memory"></i>
359
+ <span>Processing Time: <span id="process-time">0</span> ms</span>
360
+ </div>
361
+ </div>
362
+
363
+ <div class="output-container">
364
+ <label>Embedding Vector Preview</label>
365
+ <div class="embedding-preview" id="embedding-output">
366
+ <button class="copy-btn" id="copy-btn">
367
+ <i class="fas fa-copy"></i> Copy
368
+ </button>
369
+ <div id="embedding-text">No embeddings generated yet.</div>
370
+ </div>
371
+ </div>
372
+
373
+ <div class="btn-group">
374
+ <button id="download-btn" class="btn">
375
+ <i class="fas fa-download"></i> Download as JSON
376
+ </button>
377
+ <button id="new-embedding-btn" class="btn secondary-btn">
378
+ <i class="fas fa-plus"></i> New Embedding
379
+ </button>
380
+ </div>
381
+ </div>
382
+ </div>
383
+
384
+ <div class="toast" id="toast">
385
+ Embeddings copied to clipboard!
386
+ </div>
387
+
388
+ <footer>
389
+ <p>This demo uses TensorFlow.js to run SBERT models in your browser. For production use, consider using a backend service.</p>
390
+ </footer>
391
+ </div>
392
+
393
+ <!-- Load TensorFlow.js and Universal Sentence Encoder -->
394
+ <script src="https://cdn.jsdelivr.net/npm/@tensorflow/tfjs"></script>
395
+ <script src="https://cdn.jsdelivr.net/npm/@tensorflow-models/universal-sentence-encoder"></script>
396
+
397
+ <script>
398
+ // DOM elements
399
+ const modelSelect = document.getElementById('model-select');
400
+ const sentenceInput = document.getElementById('sentence-input');
401
+ const generateBtn = document.getElementById('generate-btn');
402
+ const clearBtn = document.getElementById('clear-btn');
403
+ const outputSection = document.querySelector('.output-section');
404
+ const loadingSection = document.querySelector('.loading');
405
+ const embeddingOutput = document.getElementById('embedding-text');
406
+ const dimensionCount = document.getElementById('dimension-count');
407
+ const vectorLength = document.getElementById('vector-length');
408
+ const processTime = document.getElementById('process-time');
409
+ const copyBtn = document.getElementById('copy-btn');
410
+ const downloadBtn = document.getElementById('download-btn');
411
+ const newEmbeddingBtn = document.getElementById('new-embedding-btn');
412
+ const toast = document.getElementById('toast');
413
+ const loadingDetails = document.getElementById('loading-details');
414
+
415
+ // Variables
416
+ let model = null;
417
+ let embeddings = null;
418
+
419
+ // Event listeners
420
+ generateBtn.addEventListener('click', generateEmbeddings);
421
+ clearBtn.addEventListener('click', clearInput);
422
+ copyBtn.addEventListener('click', copyEmbeddings);
423
+ downloadBtn.addEventListener('click', downloadEmbeddings);
424
+ newEmbeddingBtn.addEventListener('click', newEmbedding);
425
+
426
+ // Initialize
427
+ checkInput();
428
+
429
+ // Functions
430
+ function checkInput() {
431
+ generateBtn.disabled = sentenceInput.value.trim() === '';
432
+ }
433
+
434
+ function showLoading(message) {
435
+ loadingDetails.textContent = message;
436
+ loadingSection.style.display = 'block';
437
+ generateBtn.disabled = true;
438
+ }
439
+
440
+ function hideLoading() {
441
+ loadingSection.style.display = 'none';
442
+ generateBtn.disabled = false;
443
+ }
444
+
445
+ function showToast(message, isError = false) {
446
+ toast.textContent = message;
447
+ toast.className = isError ? 'toast error show' : 'toast show';
448
+
449
+ setTimeout(() => {
450
+ toast.className = 'toast';
451
+ }, 3000);
452
+ }
453
+
454
+ async function loadModel(modelName) {
455
+ try {
456
+ showLoading(`Loading ${modelName} model...`);
457
+
458
+ // Note: This is a placeholder for actual SBERT model loading
459
+ // In a real implementation, you would need to:
460
+ // 1. Host SBERT models converted to TFJS format
461
+ // 2. Properly load them using tf.loadGraphModel()
462
+
463
+ // Simulate model loading delay
464
+ await new Promise(resolve => setTimeout(resolve, 1500));
465
+
466
+ // For demo purposes, we'll use Universal Sentence Encoder
467
+ // In production, you would replace this with actual SBERT models
468
+ model = await use.load();
469
+
470
+ return model;
471
+ } catch (error) {
472
+ console.error('Model loading error:', error);
473
+ showToast('Error loading model. Please try again.', true);
474
+ throw error;
475
+ }
476
+ }
477
+
478
+ async function generateEmbeddings() {
479
+ try {
480
+ const sentence = sentenceInput.value.trim();
481
+ if (!sentence) return;
482
+
483
+ const modelName = modelSelect.value;
484
+
485
+ // Load model if not already loaded
486
+ if (!model) {
487
+ model = await loadModel(modelName);
488
+ }
489
+
490
+ showLoading('Generating embeddings...');
491
+
492
+ const startTime = performance.now();
493
+
494
+ // Generate embeddings - using USE as a placeholder
495
+ const embeddings = await model.embed([sentence]);
496
+ const embeddingArray = await embeddings.array();
497
+ const embeddingVector = embeddingArray[0];
498
+
499
+ const endTime = performance.now();
500
+ const elapsedTime = (endTime - startTime).toFixed(2);
501
+
502
+ // Display results
503
+ displayEmbeddings(embeddingVector, elapsedTime, modelName);
504
+
505
+ } catch (error) {
506
+ console.error('Embedding generation error:', error);
507
+ showToast('Error generating embeddings. Please try again.', true);
508
+ } finally {
509
+ hideLoading();
510
+ }
511
+ }
512
+
513
+ function displayEmbeddings(vector, elapsedTime, modelName) {
514
+ // Show output section
515
+ outputSection.style.display = 'block';
516
+
517
+ // Calculate vector length
518
+ const length = Math.sqrt(vector.reduce((sum, val) => sum + val * val, 0)).toFixed(4);
519
+
520
+ // Update stats
521
+ dimensionCount.textContent = vector.length;
522
+ vectorLength.textContent = length;
523
+ processTime.textContent = elapsedTime;
524
+
525
+ // Truncate the display
526
+ const previewLength = 100;
527
+ const fullVector = JSON.stringify(vector, null, 2);
528
+ const previewText = vector.length > previewLength
529
+ ? fullVector.substring(0, 1000) + `\n... [truncated, showing first ${previewLength} of ${vector.length} dimensions]`
530
+ : fullVector;
531
+
532
+ embeddingOutput.innerHTML = `
533
+ <p><strong>Model:</strong> ${modelName}</p>
534
+ <pre>${previewText}</pre>
535
+ <div class="dimensions-container">
536
+ ${vector.slice(0, 10).map((_, i) => `<span class="dimension-pill">${i}: ${vector[i].toFixed(6)}</span>`).join('')}
537
+ </div>
538
+ `;
539
+
540
+ // Store the full embeddings for download/copy
541
+ embeddings = {
542
+ model: modelName,
543
+ sentence: sentenceInput.value.trim(),
544
+ embeddings: vector,
545
+ dimension: vector.length,
546
+ vector_length: parseFloat(length),
547
+ processing_time_ms: parseFloat(elapsedTime),
548
+ timestamp: new Date().toISOString()
549
+ };
550
+ }
551
+
552
+ function clearInput() {
553
+ sentenceInput.value = '';
554
+ checkInput();
555
+ }
556
+
557
+ function copyEmbeddings() {
558
+ if (!embeddings) return;
559
+
560
+ navigator.clipboard.writeText(JSON.stringify(embeddings, null, 2))
561
+ .then(() => showToast('Embeddings copied to clipboard!'))
562
+ .catch(err => {
563
+ console.error('Copy failed:', err);
564
+ showToast('Failed to copy. Please try again.', true);
565
+ });
566
+ }
567
+
568
+ function downloadEmbeddings() {
569
+ if (!embeddings) return;
570
+
571
+ const dataStr = JSON.stringify(embeddings, null, 2);
572
+ const dataUri = 'data:application/json;charset=utf-8,' + encodeURIComponent(dataStr);
573
+
574
+ const exportName = `sbert_embedding_${new Date().toISOString().slice(0, 10)}.json`;
575
+
576
+ const linkElement = document.createElement('a');
577
+ linkElement.setAttribute('href', dataUri);
578
+ linkElement.setAttribute('download', exportName);
579
+ linkElement.click();
580
+ }
581
+
582
+ function newEmbedding() {
583
+ outputSection.style.display = 'none';
584
+ sentenceInput.value = '';
585
+ checkInput();
586
+ }
587
+
588
+ // Input validation
589
+ sentenceInput.addEventListener('input', checkInput);
590
+ </script>
591
+ <p style="border-radius: 8px; text-align: center; font-size: 12px; color: #fff; margin-top: 16px;position: fixed; left: 8px; bottom: 8px; z-index: 10; background: rgba(0, 0, 0, 0.8); padding: 4px 8px;">Made with <a href="https://enzostvs-deepsite.hf.space" style="color: #fff;" target="_blank" >DeepSite</a> <img src="https://enzostvs-deepsite.hf.space/logo.svg" alt="DeepSite Logo" style="width: 16px; height: 16px; vertical-align: middle;"></p></body>
592
+ </html>