02-HarmonyOS5-SpeechRecognizer-Case
1 zhousg 0 6/11/2025, 9:06:42 AM
Case Description
This is a real-time speech-to-text case implemented based on AI basic voice services. It collects audio through a microphone and converts it into text in real-time.
import { speechRecognizer } from '@kit.CoreSpeechKit' import { abilityAccessCtrl } from '@kit.AbilityKit' import { promptAction } from '@kit.ArkUI'
@Entry @ComponentV2 struct SpeechRecognizer { @Local isRecording: boolean = false @Local text: string = '' hasPermissions: boolean = false asrEngine?: speechRecognizer.SpeechRecognitionEngine
aboutToAppear(): void {
// Request microphone permissions
this.requestPermissions()
}
async requestPermissions() {
const atManager = abilityAccessCtrl.createAtManager();
const res = await atManager.requestPermissionsFromUser(getContext(), ['ohos.permission.MICROPHONE'])
this.hasPermissions =
res.authResults.every(grantStatus => grantStatus === abilityAccessCtrl.GrantStatus.PERMISSION_GRANTED)
}
// Start microphone recognition
async startRecord() {
if (canIUse('SystemCapability.AI.SpeechRecognizer')) {
if (!this.hasPermissions) {
return promptAction.showToast({ message: 'Microphone not authorized' })
}
if (this.isRecording) {
return promptAction.showToast({ message: 'Recording...' })
}
this.isRecording = true
this.asrEngine = await speechRecognizer.createEngine({
language: 'zh-CN',
online: 1
})
const _this = this
this.asrEngine.setListener({
onStart(sessionId: string, eventMessage: string) {
},
onEvent(sessionId: string, eventCode: number, eventMessage: string) {
},
onResult(sessionId: string, result: speechRecognizer.SpeechRecognitionResult) {
_this.text = result.result
if (result.isLast) {
_this.isRecording = false
}
},
onComplete(sessionId: string, eventMessage: string) {
},
onError(sessionId: string, errorCode: number, errorMessage: string) {
}
})
const audioParam: speechRecognizer.AudioInfo = {
audioType: 'pcm',
sampleRate: 16000,
soundChannel: 1,
sampleBit: 16
}
const extraParam: Record<string, Object> = {
"recognitionMode": 0,
"vadBegin": 2000,
"vadEnd": 3000,
"maxAudioDuration": 20000
}
const recognizerParams: speechRecognizer.StartParams = {
sessionId: '10000',
audioInfo: audioParam,
extraParams: extraParam
}
this.asrEngine.startListening(recognizerParams)
}
}
async closeRecord() {
if (canIUse('SystemCapability.AI.SpeechRecognizer')) {
this.asrEngine?.finish('10000')
this.asrEngine?.cancel('10000')
this.asrEngine?.shutdown()
}
}
build() {
Column() {
Row() {
Text(this.text)
.width('100%')
.lineHeight(32)
}
.alignItems(VerticalAlign.Top)
.width('100%')
.layoutWeight(1)
Button(this.isRecording ? 'Start Speaking' : 'Press and Speak')
.width('100%')
.gesture(LongPressGesture()
.onAction(() => {
this.startRecord()
})
.onActionEnd(() => {
this.closeRecord()
})
.onActionCancel(() => {
this.closeRecord()
}))
}
.padding(15)
.height('100%')
.width('100%')
}
}
No comments yet