Spaces:
Sleeping
Sleeping
import asyncio | |
import websockets | |
import streamlit as st | |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer | |
import numpy as np | |
import torch | |
import soundfile as sf | |
import io | |
# Load pre-trained model and tokenizer | |
tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h") | |
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h") | |
async def recognize_speech(websocket): | |
async for message in websocket: | |
wf, samplerate = sf.read(io.BytesIO(message)) | |
input_values = tokenizer(wf, return_tensors="pt").input_values | |
with torch.no_grad(): | |
logits = model(input_values).logits | |
predicted_ids = torch.argmax(logits, dim=-1) | |
transcription = tokenizer.decode(predicted_ids[0]) | |
await websocket.send(transcription) | |
async def main_logic(): | |
async with websockets.serve(recognize_speech, "localhost", 8000): | |
await asyncio.Future() # run forever | |
# Create the streamlit interface | |
st.title("Real-Time ASR with Transformers.js") | |
# The script can't be run via "streamlit run" because that hangs asyncio loop | |
st.markdown(""" | |
<script> | |
const handleAudio = async (stream) => { | |
const websocket = new WebSocket('ws://localhost:8000'); | |
const mediaRecorder = new MediaRecorder(stream, {mimeType: 'audio/webm'}); | |
const audioChunks = []; | |
mediaRecorder.addEventListener("dataavailable", event => { | |
console.log('dataavailable:', event.data); | |
audioChunks.push(event.data); | |
websocket.send(event.data); | |
}); | |
websocket.onmessage = (event) => { | |
const transcription = event.data; | |
const transcriptionDiv = document.getElementById("transcription"); | |
transcriptionDiv.innerHTML = transcriptionDiv.innerHTML + transcription + "<br/>"; | |
console.log('Received:', transcription); | |
}; | |
mediaRecorder.start(1000); | |
websocket.onopen = () => { | |
console.log('Connected to WebSocket'); | |
}; | |
websocket.onerror = (error) => { | |
console.error('WebSocket Error:', error); | |
}; | |
websocket.onclose = () => { | |
console.log('WebSocket Closed'); | |
}; | |
}; | |
navigator.mediaDevices.getUserMedia({ audio: true }) | |
.then(handleAudio) | |
.catch(error => console.error('getUserMedia Error:', error)); | |
</script> | |
<div id="transcription">Your transcriptions will appear here:</div> | |
""", unsafe_allow_html=True) | |
if __name__ == "__main__": | |
asyncio.run(main_logic()) |