Spaces:

lsb
/

ambient-obliqueness

Running

File size: 4,221 Bytes

<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <title>Nearest Sentence Finder</title>
  <script src="https://cdn.jsdelivr.net/npm/@xenova/transformers"></script>
  <style>
    body {
      font-family: Arial, sans-serif;
      text-align: center;
      margin-top: 50px;
    }
    #nearestSentence {
      font-size: 1.5em;
      color: #333;
    }
  </style>
</head>
<body>
  <h1>Nearest Sentence Finder</h1>
  <p>Sentence closest to the spoken words:</p>
  <div id="nearestSentence">Loading...</div>

  <script>
    // Check for browser support
    const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
    if (!SpeechRecognition) {
      alert("Your browser does not support the Web Speech API. Please use a compatible browser.");
    } else {
      const recognition = new SpeechRecognition();
      recognition.continuous = true;
      recognition.interimResults = false;
      recognition.lang = 'en-US';

      let wordBuffer = [];
      const bufferDuration = 30 * 1000; // 30 seconds
      const nearestSentenceElement = document.getElementById('nearestSentence');

      // Predefined sentences
      const sampleSentences = ['person', 'man', 'woman', 'camera', 'tv'];
      let sampleEmbeddings = [];
      let pipeline;

      // Load the pipeline and compute embeddings for sample sentences
      async function initializePipeline() {
        pipeline = await transformers.pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
        sampleEmbeddings = await Promise.all(
          sampleSentences.map(sentence => pipeline(sentence).then(embedding => embedding[0]))
        );
        nearestSentenceElement.textContent = 'Model loaded. Start speaking!';
      }

      // Compute Euclidean distance
      function euclideanDistance(vec1, vec2) {
        return Math.sqrt(vec1.reduce((sum, val, i) => sum + Math.pow(val - vec2[i], 2), 0));
      }

      // Find the nearest sentence
      function findNearestSentence(transcriptEmbedding) {
        let nearestSentence = '';
        let minDistance = Infinity;

        sampleEmbeddings.forEach((embedding, index) => {
          const distance = euclideanDistance(transcriptEmbedding, embedding);
          if (distance < minDistance) {
            minDistance = distance;
            nearestSentence = sampleSentences[index];
          }
        });

        return nearestSentence;
      }

      // Start speech recognition
      recognition.start();

      recognition.onresult = async (event) => {
        const transcript = Array.from(event.results)
          .map(result => result[0].transcript)
          .join(' ');

        const timestamp = Date.now();
        console.log({transcript, timestamp})

        // Add transcript to the buffer with timestamps
        wordBuffer.push({ transcript, timestamp });

        // Remove transcripts older than 30 seconds
        wordBuffer = wordBuffer.filter(item => timestamp - item.timestamp <= bufferDuration);

        // Combine all transcripts in the buffer
        const combinedTranscript = wordBuffer.map(item => item.transcript).join(' ');
        console.log({combinedTranscript})

        // Compute embedding for the combined transcript
        if (pipeline) {
          const transcriptEmbedding = await pipeline(combinedTranscript).then(embedding => embedding[0]);

          // Find and display the nearest sentence
          const nearestSentence = findNearestSentence(transcriptEmbedding);
          nearestSentenceElement.textContent = nearestSentence;
        }
      };

      recognition.onerror = (event) => {
        console.error('Speech recognition error:', event.error);
        if (event.error === 'not-allowed' || event.error === 'service-not-allowed') {
          alert('Microphone access is blocked. Please allow microphone access and reload the page.');
        }
      };

      recognition.onend = () => {
        console.warn('Speech recognition stopped. Restarting...');
        recognition.start(); // Restart recognition if it stops
      };

      // Initialize the pipeline
      initializePipeline();
    }
  </script>
</body>
</html>