02-HarmonyOS5-SpeechRecognizer-Case

1 zhousg 0 6/11/2025, 9:06:42 AM
Case Description This is a real-time speech-to-text case implemented based on AI basic voice services. It collects audio through a microphone and converts it into text in real-time.

import { speechRecognizer } from '@kit.CoreSpeechKit' import { abilityAccessCtrl } from '@kit.AbilityKit' import { promptAction } from '@kit.ArkUI'

@Entry @ComponentV2 struct SpeechRecognizer { @Local isRecording: boolean = false @Local text: string = '' hasPermissions: boolean = false asrEngine?: speechRecognizer.SpeechRecognitionEngine

  aboutToAppear(): void {
    // Request microphone permissions
    this.requestPermissions()
  }

  async requestPermissions() {
    const atManager = abilityAccessCtrl.createAtManager();
    const res = await atManager.requestPermissionsFromUser(getContext(), ['ohos.permission.MICROPHONE'])
    this.hasPermissions =
      res.authResults.every(grantStatus => grantStatus === abilityAccessCtrl.GrantStatus.PERMISSION_GRANTED)
  }

  // Start microphone recognition
  async startRecord() {
    if (canIUse('SystemCapability.AI.SpeechRecognizer')) {
      if (!this.hasPermissions) {
        return promptAction.showToast({ message: 'Microphone not authorized' })
      }
      if (this.isRecording) {
        return promptAction.showToast({ message: 'Recording...' })
      }
      this.isRecording = true
      this.asrEngine = await speechRecognizer.createEngine({
        language: 'zh-CN',
        online: 1
      })
      const _this = this
      this.asrEngine.setListener({
        onStart(sessionId: string, eventMessage: string) {
        },
        onEvent(sessionId: string, eventCode: number, eventMessage: string) {
        },
        onResult(sessionId: string, result: speechRecognizer.SpeechRecognitionResult) {
          _this.text = result.result
          if (result.isLast) {
            _this.isRecording = false
          }
        },
        onComplete(sessionId: string, eventMessage: string) {
        },
        onError(sessionId: string, errorCode: number, errorMessage: string) {
        }
      })
      const audioParam: speechRecognizer.AudioInfo = {
        audioType: 'pcm',
        sampleRate: 16000,
        soundChannel: 1,
        sampleBit: 16
      }
      const extraParam: Record<string, Object> = {
        "recognitionMode": 0,
        "vadBegin": 2000,
        "vadEnd": 3000,
        "maxAudioDuration": 20000
      }
      const recognizerParams: speechRecognizer.StartParams = {
        sessionId: '10000',
        audioInfo: audioParam,
        extraParams: extraParam
      }
      this.asrEngine.startListening(recognizerParams)
    }
  }

  async closeRecord() {
    if (canIUse('SystemCapability.AI.SpeechRecognizer')) {
      this.asrEngine?.finish('10000')
      this.asrEngine?.cancel('10000')
      this.asrEngine?.shutdown()
    }
  }

  build() {
    Column() {
      Row() {
        Text(this.text)
          .width('100%')
          .lineHeight(32)
      }
      .alignItems(VerticalAlign.Top)
      .width('100%')
      .layoutWeight(1)

      Button(this.isRecording ? 'Start Speaking' : 'Press and Speak')
        .width('100%')
        .gesture(LongPressGesture()
          .onAction(() => {
            this.startRecord()
          })
          .onActionEnd(() => {
            this.closeRecord()
          })
          .onActionCancel(() => {
            this.closeRecord()
          }))

    }
    .padding(15)
    .height('100%')
    .width('100%')
  }
}

Comments (0)

No comments yet