import streamlit as st import sparknlp import os import pandas as pd import librosa from sparknlp.base import * from sparknlp.common import * from sparknlp.annotator import * from pyspark.ml import Pipeline from sparknlp.pretrained import PretrainedPipeline from pyspark.sql.types import * import pyspark.sql.functions as F # Page configuration st.set_page_config( layout="wide", initial_sidebar_state="auto" ) # Custom CSS for styling st.markdown(""" """, unsafe_allow_html=True) @st.cache_resource def init_spark(): """Initialize Spark NLP.""" return sparknlp.start() @st.cache_resource def create_pipeline(model): """Create a Spark NLP pipeline for audio processing.""" audioAssembler = AudioAssembler() \ .setInputCol("audio_content") \ .setOutputCol("audio_assembler") speechToText = WhisperForCTC.pretrained("asr_whisper_small_english","en") \ .setInputCols(["audio_assembler"]) \ .setOutputCol("text") pipeline = Pipeline(stages=[ audioAssembler, speechToText ]) return pipeline def fit_data(pipeline, fed_data): """Fit the data into the pipeline and return the transcription.""" data, sampling_rate = librosa.load(fed_data, sr=16000) data = data.tolist() spark_df = spark.createDataFrame([[data]], ["audio_content"]) model = pipeline.fit(spark_df) lp = LightPipeline(model) lp_result = lp.fullAnnotate(data)[0] return lp_result def save_uploadedfile(uploadedfile, path): """Save the uploaded file to the specified path.""" filepath = os.path.join(path, uploadedfile.name) with open(filepath, "wb") as f: if hasattr(uploadedfile, 'getbuffer'): f.write(uploadedfile.getbuffer()) else: f.write(uploadedfile.read()) # Sidebar content model_list = ["asr_whisper_small_english"] model = st.sidebar.selectbox( "Choose the pretrained model", model_list, help="For more info about the models visit: https://sparknlp.org/models" ) # Main content st.markdown('
This demo transcribes audio files into texts using the WhisperForCTC
Annotator and advanced speech recognition models.