Spaces:
Sleeping
Sleeping
File size: 2,558 Bytes
7be7ac1 f28d146 7be7ac1 f28d146 7be7ac1 f28d146 7be7ac1 f28d146 7be7ac1 f28d146 7be7ac1 f28d146 7be7ac1 f28d146 7be7ac1 f28d146 7be7ac1 f28d146 7be7ac1 f28d146 7be7ac1 f28d146 7be7ac1 f28d146 7be7ac1 23cb5b2 f28d146 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 |
import asyncio
import websockets
import streamlit as st
from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
import numpy as np
import torch
import soundfile as sf
import io
# Load pre-trained model and tokenizer
tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h")
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
async def recognize_speech(websocket):
async for message in websocket:
wf, samplerate = sf.read(io.BytesIO(message))
input_values = tokenizer(wf, return_tensors="pt").input_values
with torch.no_grad():
logits = model(input_values).logits
predicted_ids = torch.argmax(logits, dim=-1)
transcription = tokenizer.decode(predicted_ids[0])
await websocket.send(transcription)
async def main_logic():
async with websockets.serve(recognize_speech, "localhost", 8000):
await asyncio.Future() # run forever
# Create the streamlit interface
st.title("Real-Time ASR with Transformers.js")
# The script can't be run via "streamlit run" because that hangs asyncio loop
st.markdown("""
<script>
const handleAudio = async (stream) => {
const websocket = new WebSocket('ws://localhost:8000');
const mediaRecorder = new MediaRecorder(stream, {mimeType: 'audio/webm'});
const audioChunks = [];
mediaRecorder.addEventListener("dataavailable", event => {
console.log('dataavailable:', event.data);
audioChunks.push(event.data);
websocket.send(event.data);
});
websocket.onmessage = (event) => {
const transcription = event.data;
const transcriptionDiv = document.getElementById("transcription");
transcriptionDiv.innerHTML = transcriptionDiv.innerHTML + transcription + "<br/>";
console.log('Received:', transcription);
};
mediaRecorder.start(1000);
websocket.onopen = () => {
console.log('Connected to WebSocket');
};
websocket.onerror = (error) => {
console.error('WebSocket Error:', error);
};
websocket.onclose = () => {
console.log('WebSocket Closed');
};
};
navigator.mediaDevices.getUserMedia({ audio: true })
.then(handleAudio)
.catch(error => console.error('getUserMedia Error:', error));
</script>
<div id="transcription">Your transcriptions will appear here:</div>
""", unsafe_allow_html=True)
if __name__ == "__main__":
asyncio.run(main_logic()) |