ambient-obliqueness / index.html
lsb's picture
Update index.html
e00f083 verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Nearest Sentence Finder</title>
<script src="https://cdn.jsdelivr.net/npm/@xenova/transformers"></script>
<style>
body {
font-family: Arial, sans-serif;
text-align: center;
margin-top: 50px;
}
#nearestSentence {
font-size: 1.5em;
color: #333;
}
</style>
</head>
<body>
<h1>Nearest Sentence Finder</h1>
<p>Sentence closest to the spoken words:</p>
<div id="nearestSentence">Loading...</div>
<script>
// Check for browser support
const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
if (!SpeechRecognition) {
alert("Your browser does not support the Web Speech API. Please use a compatible browser.");
} else {
const recognition = new SpeechRecognition();
recognition.continuous = true;
recognition.interimResults = false;
recognition.lang = 'en-US';
let wordBuffer = [];
const bufferDuration = 30 * 1000; // 30 seconds
const nearestSentenceElement = document.getElementById('nearestSentence');
// Predefined sentences
const sampleSentences = ['person', 'man', 'woman', 'camera', 'tv'];
let sampleEmbeddings = [];
let pipeline;
// Load the pipeline and compute embeddings for sample sentences
async function initializePipeline() {
pipeline = await transformers.pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
sampleEmbeddings = await Promise.all(
sampleSentences.map(sentence => pipeline(sentence).then(embedding => embedding[0]))
);
nearestSentenceElement.textContent = 'Model loaded. Start speaking!';
}
// Compute Euclidean distance
function euclideanDistance(vec1, vec2) {
return Math.sqrt(vec1.reduce((sum, val, i) => sum + Math.pow(val - vec2[i], 2), 0));
}
// Find the nearest sentence
function findNearestSentence(transcriptEmbedding) {
let nearestSentence = '';
let minDistance = Infinity;
sampleEmbeddings.forEach((embedding, index) => {
const distance = euclideanDistance(transcriptEmbedding, embedding);
if (distance < minDistance) {
minDistance = distance;
nearestSentence = sampleSentences[index];
}
});
return nearestSentence;
}
// Start speech recognition
recognition.start();
recognition.onresult = async (event) => {
const transcript = Array.from(event.results)
.map(result => result[0].transcript)
.join(' ');
const timestamp = Date.now();
console.log({transcript, timestamp})
// Add transcript to the buffer with timestamps
wordBuffer.push({ transcript, timestamp });
// Remove transcripts older than 30 seconds
wordBuffer = wordBuffer.filter(item => timestamp - item.timestamp <= bufferDuration);
// Combine all transcripts in the buffer
const combinedTranscript = wordBuffer.map(item => item.transcript).join(' ');
console.log({combinedTranscript})
// Compute embedding for the combined transcript
if (pipeline) {
const transcriptEmbedding = await pipeline(combinedTranscript).then(embedding => embedding[0]);
// Find and display the nearest sentence
const nearestSentence = findNearestSentence(transcriptEmbedding);
nearestSentenceElement.textContent = nearestSentence;
}
};
recognition.onerror = (event) => {
console.error('Speech recognition error:', event.error);
if (event.error === 'not-allowed' || event.error === 'service-not-allowed') {
alert('Microphone access is blocked. Please allow microphone access and reload the page.');
}
};
recognition.onend = () => {
console.warn('Speech recognition stopped. Restarting...');
recognition.start(); // Restart recognition if it stops
};
// Initialize the pipeline
initializePipeline();
}
</script>
</body>
</html>