348 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			C#
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			348 lines
		
	
	
		
			10 KiB
		
	
	
	
		
			C#
		
	
	
		
			Executable File
		
	
	
	
	
| using System;
 | |
| using Microsoft.CognitiveServices.Speech;
 | |
| using Microsoft.CognitiveServices.Speech.Audio;
 | |
| using UnityEngine;
 | |
| 
 | |
| 
 | |
| #region Enums
 | |
| 
 | |
| public enum ESpeechRecognitionState
 | |
| {
 | |
|     StartingUp = 0,
 | |
|     Ready = 10,
 | |
|     Listening = 20,
 | |
|     Disabled = 90,
 | |
| }
 | |
| 
 | |
| #endregion
 | |
| 
 | |
| 
 | |
| public class SpeechRecognitionService : MonoBehaviour
 | |
| {
 | |
| 
 | |
|     #region Inspector Properties
 | |
| 
 | |
|     [Header("Config Values")]
 | |
|     [SerializeField]
 | |
|     private bool debugModeIsActive;
 | |
| 
 | |
|     [SerializeField]
 | |
|     private string speechKey;
 | |
| 
 | |
|     [SerializeField]
 | |
|     private string speechRegion;
 | |
| 
 | |
|     [SerializeField]
 | |
|     private string initialRecognitionLanguageCode; // Format: "de-CH"
 | |
| 
 | |
|     [Header("Asset Objects")]
 | |
|     [SerializeField]
 | |
|     private AudioClip startedListeningClip;
 | |
| 
 | |
|     #endregion
 | |
| 
 | |
|     #region Public Properties
 | |
| 
 | |
|     #region SpeechRecognitionState
 | |
| 
 | |
|     private ESpeechRecognitionState _speechRecognitionState = ESpeechRecognitionState.StartingUp;
 | |
| 
 | |
|     public ESpeechRecognitionState SpeechRecognitionState
 | |
|     {
 | |
|         get { return this._speechRecognitionState; }
 | |
|         set
 | |
|         {
 | |
|             if (value != this._speechRecognitionState)
 | |
|             {
 | |
|                 this.logIfInDebugMode($"SpeechRecognitionState changed, new value= {value}");
 | |
| 
 | |
|                 this._speechRecognitionState = value;
 | |
| 
 | |
|                 this.OnSpeechRecognitionStateChangedEvent?.Invoke(this, value);
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     public event EventHandler<ESpeechRecognitionState> OnSpeechRecognitionStateChangedEvent;
 | |
| 
 | |
|     #endregion
 | |
| 
 | |
|     #region PartialTranscription
 | |
| 
 | |
|     private string _partialTranscription = "";
 | |
| 
 | |
|     public string PartialTranscription
 | |
|     {
 | |
|         get { return this._partialTranscription; }
 | |
|         set
 | |
|         {
 | |
|             if (value != this._partialTranscription)
 | |
|             {
 | |
|                 this.logIfInDebugMode("PartialTranscription changed, new value= " + value);
 | |
| 
 | |
|                 this._partialTranscription = value;
 | |
| 
 | |
|                 if (this.OnPartialTranscriptionChangedEvent != null)
 | |
|                 {
 | |
|                     this.OnPartialTranscriptionChangedEvent.Invoke(this, value);
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     public event EventHandler<string> OnPartialTranscriptionChangedEvent;
 | |
| 
 | |
|     #endregion
 | |
| 
 | |
|     #region FullTranscription
 | |
| 
 | |
|     private string _fullTranscription = "";
 | |
| 
 | |
|     public string FullTranscription
 | |
|     {
 | |
|         get { return this._fullTranscription; }
 | |
|         set
 | |
|         {
 | |
|             if (value != this._fullTranscription)
 | |
|             {
 | |
|                 this.logIfInDebugMode("FullTranscription changed, new value= " + value);
 | |
| 
 | |
|                 this._fullTranscription = value;
 | |
| 
 | |
|                 if (this.OnFullTranscriptionChangedEvent != null)
 | |
|                 {
 | |
|                     this.OnFullTranscriptionChangedEvent.Invoke(this, value);
 | |
|                 }
 | |
|             }
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     public event EventHandler<string> OnFullTranscriptionChangedEvent;
 | |
| 
 | |
|     #endregion
 | |
| 
 | |
|     #endregion
 | |
| 
 | |
|     #region Private Properties
 | |
| 
 | |
|     private ViaggioAIManager vaim { get { return ViaggioAIManager.Instance; } }
 | |
| 
 | |
|     private object threadLocker = new object();
 | |
|     private SpeechConfig speechConfig;
 | |
| 
 | |
|     private bool listeningRestartIsPending;
 | |
| 
 | |
|     private bool invokeUserSpeechInputStartedEventIsPending;
 | |
|     private string pendingPartialTranscription;
 | |
|     private string pendingFullTranscription;
 | |
|     private string invokeUserSpeechInputEndedEventIsPending;
 | |
|     private string pendingRecoError;
 | |
| 
 | |
|     #endregion
 | |
| 
 | |
|     #region Framework Functions
 | |
| 
 | |
|     void OnEnable()
 | |
|     {
 | |
|         this.initialize(this.initialRecognitionLanguageCode);
 | |
|     }
 | |
| 
 | |
|     void Update()
 | |
|     {
 | |
|         this.doMainThreadTasks();
 | |
|     }
 | |
| 
 | |
|     void OnDisable()
 | |
|     {
 | |
|         this.listeningRestartIsPending = false;
 | |
|         this.SpeechRecognitionState = ESpeechRecognitionState.Disabled;
 | |
|     }
 | |
| 
 | |
|     #endregion
 | |
| 
 | |
|     #region Private Events
 | |
| 
 | |
|     private void speechRecognizer_SpeechStartDetected(object sender, RecognitionEventArgs e)
 | |
|     {
 | |
|         if (this == null || this.SpeechRecognitionState != ESpeechRecognitionState.Listening)
 | |
|         {
 | |
|             // Listener was stopped before speech start was detected
 | |
|             return;
 | |
|         }
 | |
| 
 | |
|         // Set invoke pending flag (to later invoke on main thread)
 | |
|         this.invokeUserSpeechInputStartedEventIsPending = true;
 | |
|     }
 | |
| 
 | |
|     private void speechRecognizer_Recognizing(object sender, SpeechRecognitionEventArgs e)
 | |
|     {
 | |
|         if (this == null || this.SpeechRecognitionState != ESpeechRecognitionState.Listening)
 | |
|         {
 | |
|             // Listener was stopped before this
 | |
|             return;
 | |
|         }
 | |
| 
 | |
|         // Save pending partial transcription (to later invoke on main thread)
 | |
|         this.pendingPartialTranscription = e?.Result?.Text;
 | |
|     }
 | |
| 
 | |
|     #endregion
 | |
| 
 | |
|     #region Public Events
 | |
| 
 | |
|     public event EventHandler<bool> OnUserSpeechInputStartedEvent;
 | |
| 
 | |
|     public event EventHandler<string> OnUserSpeechInputEndedEvent;
 | |
| 
 | |
|     public event EventHandler<string> OnSpeechRecognitionErrorChangedEvent;
 | |
| 
 | |
|     #endregion
 | |
| 
 | |
|     #region Public Functions
 | |
| 
 | |
|     public async void StartListeningOnceAsync()
 | |
|     {
 | |
|         if (this.speechConfig == null)
 | |
|         {
 | |
|             Debug.LogError("Speech recognizer is not ready yet. Wait for initialize before calling StartListeningOnceAsync");
 | |
|             return;
 | |
|         }
 | |
| 
 | |
|         using (AudioConfig audioConfigInput = AudioConfig.FromDefaultMicrophoneInput())
 | |
|         using (SpeechRecognizer recognizer = new SpeechRecognizer(speechConfig, audioConfigInput))
 | |
|         {
 | |
|             lock (threadLocker)
 | |
|             {
 | |
|                 this.SpeechRecognitionState = ESpeechRecognitionState.Listening;
 | |
|             }
 | |
| 
 | |
|             vaim.PlayAudioIfEnabled(this.startedListeningClip);
 | |
| 
 | |
|             recognizer.SpeechStartDetected += this.speechRecognizer_SpeechStartDetected;
 | |
|             recognizer.Recognizing += this.speechRecognizer_Recognizing;
 | |
| 
 | |
|             SpeechRecognitionResult result = await recognizer.RecognizeOnceAsync().ConfigureAwait(false);
 | |
| 
 | |
|             if (this.SpeechRecognitionState != ESpeechRecognitionState.Listening)
 | |
|             {
 | |
|                 // Listener was stopped before recognition finished -> unsubscribe and return
 | |
|                 recognizer.SpeechStartDetected -= this.speechRecognizer_SpeechStartDetected;
 | |
|                 recognizer.Recognizing -= this.speechRecognizer_Recognizing;
 | |
| 
 | |
|                 return;
 | |
|             }
 | |
| 
 | |
|             // Check result
 | |
|             string fullTranscription = null;
 | |
| 
 | |
|             if (result.Reason == ResultReason.RecognizedSpeech && !string.IsNullOrEmpty(result.Text))
 | |
|             {
 | |
|                 fullTranscription = result.Text;
 | |
|                 this.invokeUserSpeechInputEndedEventIsPending = fullTranscription;
 | |
|             }
 | |
|             else if (result.Reason == ResultReason.NoMatch)
 | |
|             {
 | |
|                 this.logIfInDebugMode("SpeechIntentService NoMatch: Speech could not be recognized.");
 | |
|                 listeningRestartIsPending = true;
 | |
|             }
 | |
|             else if (result.Reason == ResultReason.Canceled)
 | |
|             {
 | |
|                 var cancellation = CancellationDetails.FromResult(result);
 | |
|                 this.pendingRecoError = $"SpeechIntentService Canceled: Reason={cancellation.Reason} ErrorDetails={cancellation.ErrorDetails}";
 | |
|             }
 | |
| 
 | |
|             lock (threadLocker)
 | |
|             {
 | |
|                 this.SpeechRecognitionState = ESpeechRecognitionState.Ready;
 | |
| 
 | |
|                 this.pendingFullTranscription = fullTranscription;
 | |
|             }
 | |
| 
 | |
|             recognizer.SpeechStartDetected -= this.speechRecognizer_SpeechStartDetected;
 | |
|             recognizer.Recognizing -= this.speechRecognizer_Recognizing;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     public void StopListening()
 | |
|     {
 | |
|         this.listeningRestartIsPending = false;
 | |
|         this.SpeechRecognitionState = ESpeechRecognitionState.Ready;
 | |
|     }
 | |
| 
 | |
|     public void SetRecognitionLanguage(string languageCode)
 | |
|     {
 | |
|         this.initialize(languageCode);
 | |
|     }
 | |
| 
 | |
|     #endregion
 | |
| 
 | |
|     #region Private Functions
 | |
| 
 | |
|     private void initialize(string languageCode)
 | |
|     {
 | |
|         this.SpeechRecognitionState = ESpeechRecognitionState.StartingUp;
 | |
| 
 | |
|         this.speechConfig = SpeechConfig.FromSubscription(this.speechKey, this.speechRegion);
 | |
|         this.speechConfig.SpeechRecognitionLanguage = languageCode;
 | |
| 
 | |
|         string micName = (Microphone.devices != null && Microphone.devices.Length > 0) ? Microphone.devices[0] : "<none>";
 | |
|         this.logIfInDebugMode($"SpeechRecognitionService initialized using microphone: {micName}");
 | |
| 
 | |
|         this.SpeechRecognitionState = ESpeechRecognitionState.Ready;
 | |
|     }
 | |
| 
 | |
|     private void doMainThreadTasks()
 | |
|     {
 | |
|         if (this.listeningRestartIsPending)
 | |
|         {
 | |
|             this.StartListeningOnceAsync();
 | |
|             listeningRestartIsPending = false;
 | |
|         }
 | |
| 
 | |
|         if (this.invokeUserSpeechInputStartedEventIsPending)
 | |
|         {
 | |
|             this.OnUserSpeechInputStartedEvent?.Invoke(this, true);
 | |
|             this.invokeUserSpeechInputStartedEventIsPending = false;
 | |
|         }
 | |
| 
 | |
|         if (this.pendingPartialTranscription != null)
 | |
|         {
 | |
|             this.PartialTranscription = this.pendingPartialTranscription;
 | |
|             this.pendingPartialTranscription = null;
 | |
|         }
 | |
| 
 | |
|         if (this.pendingFullTranscription != null)
 | |
|         {
 | |
|             this.FullTranscription = this.pendingFullTranscription;
 | |
|             this.pendingFullTranscription = null;
 | |
|         }
 | |
| 
 | |
|         if (this.invokeUserSpeechInputEndedEventIsPending != null)
 | |
|         {
 | |
|             this.OnUserSpeechInputEndedEvent?.Invoke(this, invokeUserSpeechInputEndedEventIsPending);
 | |
|             this.invokeUserSpeechInputEndedEventIsPending = null;
 | |
|         }
 | |
| 
 | |
|         if (this.pendingRecoError != null)
 | |
|         {
 | |
|             this.OnSpeechRecognitionErrorChangedEvent?.Invoke(this, this.pendingRecoError);
 | |
|             vaim.RaiseViaggioAIError(this.pendingRecoError);
 | |
| 
 | |
|             this.pendingRecoError = null;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     private void logIfInDebugMode(string message)
 | |
|     {
 | |
|         if (!this.debugModeIsActive)
 | |
|         {
 | |
|             return;
 | |
|         }
 | |
| 
 | |
|         Debug.Log($"(SpeechRecognitionService) => {message}");
 | |
|     }
 | |
| 
 | |
|     #endregion
 | |
| 
 | |
| }
 |