File size: 4,221 Bytes
134af0b
 
 
e00f083
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134af0b
 
e00f083
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134af0b
e00f083
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
134af0b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
  <title>Nearest Sentence Finder</title>
  <script src="https://cdn.jsdelivr.net/npm/@xenova/transformers"></script>
  <style>
    body {
      font-family: Arial, sans-serif;
      text-align: center;
      margin-top: 50px;
    }
    #nearestSentence {
      font-size: 1.5em;
      color: #333;
    }
  </style>
</head>
<body>
  <h1>Nearest Sentence Finder</h1>
  <p>Sentence closest to the spoken words:</p>
  <div id="nearestSentence">Loading...</div>

  <script>
    // Check for browser support
    const SpeechRecognition = window.SpeechRecognition || window.webkitSpeechRecognition;
    if (!SpeechRecognition) {
      alert("Your browser does not support the Web Speech API. Please use a compatible browser.");
    } else {
      const recognition = new SpeechRecognition();
      recognition.continuous = true;
      recognition.interimResults = false;
      recognition.lang = 'en-US';

      let wordBuffer = [];
      const bufferDuration = 30 * 1000; // 30 seconds
      const nearestSentenceElement = document.getElementById('nearestSentence');

      // Predefined sentences
      const sampleSentences = ['person', 'man', 'woman', 'camera', 'tv'];
      let sampleEmbeddings = [];
      let pipeline;

      // Load the pipeline and compute embeddings for sample sentences
      async function initializePipeline() {
        pipeline = await transformers.pipeline('feature-extraction', 'Xenova/all-MiniLM-L6-v2');
        sampleEmbeddings = await Promise.all(
          sampleSentences.map(sentence => pipeline(sentence).then(embedding => embedding[0]))
        );
        nearestSentenceElement.textContent = 'Model loaded. Start speaking!';
      }

      // Compute Euclidean distance
      function euclideanDistance(vec1, vec2) {
        return Math.sqrt(vec1.reduce((sum, val, i) => sum + Math.pow(val - vec2[i], 2), 0));
      }

      // Find the nearest sentence
      function findNearestSentence(transcriptEmbedding) {
        let nearestSentence = '';
        let minDistance = Infinity;

        sampleEmbeddings.forEach((embedding, index) => {
          const distance = euclideanDistance(transcriptEmbedding, embedding);
          if (distance < minDistance) {
            minDistance = distance;
            nearestSentence = sampleSentences[index];
          }
        });

        return nearestSentence;
      }

      // Start speech recognition
      recognition.start();

      recognition.onresult = async (event) => {
        const transcript = Array.from(event.results)
          .map(result => result[0].transcript)
          .join(' ');

        const timestamp = Date.now();
        console.log({transcript, timestamp})

        // Add transcript to the buffer with timestamps
        wordBuffer.push({ transcript, timestamp });

        // Remove transcripts older than 30 seconds
        wordBuffer = wordBuffer.filter(item => timestamp - item.timestamp <= bufferDuration);

        // Combine all transcripts in the buffer
        const combinedTranscript = wordBuffer.map(item => item.transcript).join(' ');
        console.log({combinedTranscript})

        // Compute embedding for the combined transcript
        if (pipeline) {
          const transcriptEmbedding = await pipeline(combinedTranscript).then(embedding => embedding[0]);

          // Find and display the nearest sentence
          const nearestSentence = findNearestSentence(transcriptEmbedding);
          nearestSentenceElement.textContent = nearestSentence;
        }
      };

      recognition.onerror = (event) => {
        console.error('Speech recognition error:', event.error);
        if (event.error === 'not-allowed' || event.error === 'service-not-allowed') {
          alert('Microphone access is blocked. Please allow microphone access and reload the page.');
        }
      };

      recognition.onend = () => {
        console.warn('Speech recognition stopped. Restarting...');
        recognition.start(); // Restart recognition if it stops
      };

      // Initialize the pipeline
      initializePipeline();
    }
  </script>
</body>
</html>