Spaces:
Sleeping
Sleeping
File size: 2,774 Bytes
b8a29bf d8682b4 30e4262 b8a29bf e108f29 fe85304 b8a29bf 1ec0e70 b8a29bf 411d6c8 b8a29bf d8682b4 b8a29bf 1ec0e70 38b5697 1ec0e70 61c94e1 fe85304 61c94e1 1ec0e70 30e4262 1ec0e70 e108f29 1ec0e70 fe85304 b8a29bf 30e4262 b8a29bf fe85304 b8a29bf |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
import gradio as gr
import azure.cognitiveservices.speech as speechsdk
def assess_pronunciation(audio_file, reference_text):
# Configure Azure Speech Service
speech_key = "12afe22c558a4f8d8bd28d6a67cdb9b0"
service_region = "westus"
speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
# Set up the audio configuration
audio_config = speechsdk.audio.AudioConfig(filename=audio_file)
# Create pronunciation assessment config
pronunciation_config = speechsdk.PronunciationAssessmentConfig(
reference_text=reference_text,
grading_system=speechsdk.PronunciationAssessmentGradingSystem.HundredMark,
granularity=speechsdk.PronunciationAssessmentGranularity.Phoneme
)
pronunciation_config.enable_prosody_assessment()
# Create the recognizer
recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
pronunciation_config.apply_to(recognizer)
# Recognize speech and assess pronunciation
result = recognizer.recognize_once()
# Debug information
print(f"Recognition result reason: {result.reason}")
if result.reason == speechsdk.ResultReason.RecognizedSpeech:
pronunciation_result = speechsdk.PronunciationAssessmentResult(result)
# Extract and format the results
accuracy_score = pronunciation_result.accuracy_score
fluency_score = pronunciation_result.fluency_score
completeness_score = pronunciation_result.completeness_score
prosody_score = pronunciation_result.prosody_score
return {
"Accuracy": accuracy_score,
"Fluency": fluency_score,
"Completeness": completeness_score,
"Prosody": prosody_score
}
elif result.reason == speechsdk.ResultReason.NoMatch:
print("NOMATCH: Speech could not be recognized.")
return {"Error": "Speech could not be recognized. Please try again with a clearer audio."}
elif result.reason == speechsdk.ResultReason.Canceled:
cancellation_details = speechsdk.CancellationDetails(result)
print(f"CANCELED: Reason={cancellation_details.reason}")
print(f"CANCELED: ErrorDetails={cancellation_details.error_details}")
return {"Error": f"Speech recognition canceled: {cancellation_details.error_details}"}
# Create Gradio interface
interface = gr.Interface(
fn=assess_pronunciation,
inputs=[
gr.Audio(type="filepath"), # Audio input
gr.Textbox(label="Reference Text", placeholder="Enter the reference text you are pronouncing") # Reference text input
],
outputs="json",
title="Chinese Pronunciation Checker"
)
if __name__ == "__main__":
interface.launch()
|