File size: 2,774 Bytes
b8a29bf
d8682b4
 
30e4262
b8a29bf
e108f29
 
fe85304
b8a29bf
 
 
 
 
 
1ec0e70
b8a29bf
 
 
 
411d6c8
b8a29bf
 
 
d8682b4
b8a29bf
 
1ec0e70
38b5697
 
1ec0e70
61c94e1
 
 
 
 
 
 
 
fe85304
61c94e1
 
 
 
 
 
1ec0e70
 
30e4262
1ec0e70
 
 
e108f29
1ec0e70
fe85304
b8a29bf
 
 
30e4262
 
 
 
b8a29bf
 
 
fe85304
b8a29bf
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import gradio as gr
import azure.cognitiveservices.speech as speechsdk

def assess_pronunciation(audio_file, reference_text):
    # Configure Azure Speech Service
    speech_key = "12afe22c558a4f8d8bd28d6a67cdb9b0"
    service_region = "westus"
    speech_config = speechsdk.SpeechConfig(subscription=speech_key, region=service_region)
    
    # Set up the audio configuration
    audio_config = speechsdk.audio.AudioConfig(filename=audio_file)
    
    # Create pronunciation assessment config
    pronunciation_config = speechsdk.PronunciationAssessmentConfig(
        reference_text=reference_text,
        grading_system=speechsdk.PronunciationAssessmentGradingSystem.HundredMark,
        granularity=speechsdk.PronunciationAssessmentGranularity.Phoneme
    )
    pronunciation_config.enable_prosody_assessment()

    # Create the recognizer
    recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
    pronunciation_config.apply_to(recognizer)

    # Recognize speech and assess pronunciation
    result = recognizer.recognize_once()

    # Debug information
    print(f"Recognition result reason: {result.reason}")

    if result.reason == speechsdk.ResultReason.RecognizedSpeech:
        pronunciation_result = speechsdk.PronunciationAssessmentResult(result)
        
        # Extract and format the results
        accuracy_score = pronunciation_result.accuracy_score
        fluency_score = pronunciation_result.fluency_score
        completeness_score = pronunciation_result.completeness_score
        prosody_score = pronunciation_result.prosody_score

        return {
            "Accuracy": accuracy_score,
            "Fluency": fluency_score,
            "Completeness": completeness_score,
            "Prosody": prosody_score
        }
    elif result.reason == speechsdk.ResultReason.NoMatch:
        print("NOMATCH: Speech could not be recognized.")
        return {"Error": "Speech could not be recognized. Please try again with a clearer audio."}
    elif result.reason == speechsdk.ResultReason.Canceled:
        cancellation_details = speechsdk.CancellationDetails(result)
        print(f"CANCELED: Reason={cancellation_details.reason}")
        print(f"CANCELED: ErrorDetails={cancellation_details.error_details}")
        return {"Error": f"Speech recognition canceled: {cancellation_details.error_details}"}

# Create Gradio interface
interface = gr.Interface(
    fn=assess_pronunciation,
    inputs=[
        gr.Audio(type="filepath"),  # Audio input
        gr.Textbox(label="Reference Text", placeholder="Enter the reference text you are pronouncing")  # Reference text input
    ],
    outputs="json",
    title="Chinese Pronunciation Checker"
)

if __name__ == "__main__":
    interface.launch()