448 lines
13 KiB
C#
Executable File
448 lines
13 KiB
C#
Executable File
using System;
|
|
using System.Collections;
|
|
using System.Collections.Generic;
|
|
using System.Runtime.InteropServices;
|
|
using System.Threading.Tasks;
|
|
using UnityEngine;
|
|
using UnityEngine.Android;
|
|
|
|
#region Enums
|
|
|
|
public enum EViaggioAIState
|
|
{
|
|
StartingUp = 0,
|
|
Idle = 10,
|
|
Listening = 20,
|
|
WaitingForIntentRecognition = 30,
|
|
}
|
|
|
|
#endregion
|
|
|
|
public class ViaggioAIManager : MonoBehaviour
|
|
{
|
|
|
|
#region Inspector Properties
|
|
|
|
[Header("Config Values")]
|
|
[SerializeField]
|
|
private bool debugModeIsActive;
|
|
|
|
[SerializeField]
|
|
private bool soundEffectsEnabled;
|
|
|
|
[Header("Scene Objects")]
|
|
[SerializeField]
|
|
private AudioSource audioSource;
|
|
|
|
[SerializeField]
|
|
private GameObject servicesContainer;
|
|
|
|
[SerializeField]
|
|
private GameObject debugContent;
|
|
|
|
[Header("Services")]
|
|
[SerializeField]
|
|
private SpeechRecognitionService speechRecognitionService;
|
|
|
|
[SerializeField]
|
|
private SpeechSynthesizerService speechSynthesizerService;
|
|
|
|
[SerializeField]
|
|
private OpenAIServices openAIServices;
|
|
|
|
[Header("Asset Objects")]
|
|
[SerializeField]
|
|
private AudioClip startedListeningClip;
|
|
|
|
[SerializeField]
|
|
private AudioClip waitingForIntentRecognitionClip;
|
|
|
|
[SerializeField]
|
|
private AudioClip intentRecognitionCompletedClip;
|
|
|
|
#endregion
|
|
|
|
#region Public Properties
|
|
|
|
#region Singleton
|
|
public static ViaggioAIManager Instance { get; private set; }
|
|
|
|
#endregion
|
|
|
|
#region ViaggioAIState
|
|
|
|
private EViaggioAIState _viaggioAIState = EViaggioAIState.Idle;
|
|
|
|
public EViaggioAIState ViaggioAIState
|
|
{
|
|
get { return this._viaggioAIState; }
|
|
private set
|
|
{
|
|
if (value != this._viaggioAIState)
|
|
{
|
|
this.LogIfInDebugMode("ViaggioAIState changed, new value= " + value);
|
|
|
|
this._viaggioAIState = value;
|
|
|
|
if (this.OnViaggioAIStateChangedEvent != null)
|
|
{
|
|
this.OnViaggioAIStateChangedEvent.Invoke(this, value);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
public event EventHandler<EViaggioAIState> OnViaggioAIStateChangedEvent;
|
|
|
|
#endregion
|
|
|
|
#region PartialTranscription
|
|
|
|
private string _partialTranscription = "";
|
|
|
|
public string PartialTranscription
|
|
{
|
|
get { return this._partialTranscription; }
|
|
set
|
|
{
|
|
if (value != this._partialTranscription)
|
|
{
|
|
this.LogIfInDebugMode("PartialTranscription changed, new value= " + value);
|
|
|
|
this._partialTranscription = value;
|
|
|
|
if (this.OnPartialTranscriptionChangedEvent != null)
|
|
{
|
|
this.OnPartialTranscriptionChangedEvent.Invoke(this, value);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
public event EventHandler<string> OnPartialTranscriptionChangedEvent;
|
|
|
|
#endregion
|
|
|
|
#region FullTranscription
|
|
|
|
private string _fullTranscription = "";
|
|
|
|
public string FullTranscription
|
|
{
|
|
get { return this._fullTranscription; }
|
|
set
|
|
{
|
|
if (value != this._fullTranscription)
|
|
{
|
|
this.LogIfInDebugMode("FullTranscription changed, new value= " + value);
|
|
|
|
this._fullTranscription = value;
|
|
|
|
if (this.OnFullTranscriptionChangedEvent != null)
|
|
{
|
|
this.OnFullTranscriptionChangedEvent.Invoke(this, value);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
public event EventHandler<string> OnFullTranscriptionChangedEvent;
|
|
|
|
#endregion
|
|
|
|
public RequestDataModel PendingRequestDataModel { get; private set; } = null;
|
|
|
|
#endregion
|
|
|
|
#region Private Properties
|
|
|
|
private float initDelay = 0.5f;
|
|
|
|
#endregion
|
|
|
|
#region Framework Functions
|
|
|
|
void Awake()
|
|
{
|
|
if (Instance != null && Instance != this)
|
|
{
|
|
Destroy(this);
|
|
return;
|
|
}
|
|
Instance = this;
|
|
}
|
|
|
|
void OnEnable()
|
|
{
|
|
this.subscribeToEvents();
|
|
|
|
this.initWithDelay();
|
|
}
|
|
|
|
void OnDisable()
|
|
{
|
|
this.unsubscribeFromEvents();
|
|
}
|
|
|
|
#endregion
|
|
|
|
#region Private Events
|
|
|
|
private void subscribeToEvents()
|
|
{
|
|
if (this.speechRecognitionService != null)
|
|
{
|
|
this.speechRecognitionService.OnUserSpeechInputStartedEvent += this.onUserSpeechInputStarted;
|
|
this.speechRecognitionService.OnPartialTranscriptionChangedEvent += this.onPartialTranscriptionChanged;
|
|
this.speechRecognitionService.OnFullTranscriptionChangedEvent += this.onFullTranscriptionChanged;
|
|
this.speechRecognitionService.OnUserSpeechInputEndedEvent += this.onUserSpeechInputEnded;
|
|
this.speechRecognitionService.OnLastRecoErrorChangedEvent += this.onLastRecoErrorChanged;
|
|
}
|
|
|
|
if (this.speechSynthesizerService != null)
|
|
{
|
|
this.speechSynthesizerService.OnSpeechOutputStartedEvent += this.onSpeechOutputStarted;
|
|
this.speechSynthesizerService.OnSpeechOutputEndedEvent += this.onSpeechOutputEnded;
|
|
this.speechSynthesizerService.OnSynthErrorChangedEvent += this.onSynthErrorChanged;
|
|
}
|
|
|
|
if (this.openAIServices)
|
|
{
|
|
this.openAIServices.OnLastBotReplyChangedEvent += this.onLastBotReplyChanged;
|
|
this.openAIServices.OnLastBotErrorChangedEvent += this.onLastBotErrorChanged;
|
|
}
|
|
}
|
|
|
|
private void unsubscribeFromEvents()
|
|
{
|
|
if (this?.speechRecognitionService != null)
|
|
{
|
|
this.speechRecognitionService.OnUserSpeechInputStartedEvent -= this.onUserSpeechInputStarted;
|
|
this.speechRecognitionService.OnPartialTranscriptionChangedEvent -= this.onPartialTranscriptionChanged;
|
|
this.speechRecognitionService.OnFullTranscriptionChangedEvent -= this.onFullTranscriptionChanged;
|
|
this.speechRecognitionService.OnUserSpeechInputEndedEvent -= this.onUserSpeechInputEnded;
|
|
this.speechRecognitionService.OnLastRecoErrorChangedEvent -= this.onLastRecoErrorChanged;
|
|
}
|
|
|
|
if (this?.speechSynthesizerService != null)
|
|
{
|
|
this.speechSynthesizerService.OnSpeechOutputStartedEvent -= this.onSpeechOutputStarted;
|
|
this.speechSynthesizerService.OnSpeechOutputEndedEvent -= this.onSpeechOutputEnded;
|
|
this.speechSynthesizerService.OnSynthErrorChangedEvent -= this.onSynthErrorChanged;
|
|
}
|
|
|
|
if (this?.openAIServices)
|
|
{
|
|
this.openAIServices.OnLastBotReplyChangedEvent -= this.onLastBotReplyChanged;
|
|
this.openAIServices.OnLastBotErrorChangedEvent -= this.onLastBotErrorChanged;
|
|
}
|
|
}
|
|
|
|
private void onUserSpeechInputStarted(object sender, bool startDetected)
|
|
{
|
|
this.LogIfInDebugMode("OnUserSpeechInputStarted");
|
|
|
|
this.OnUserSpeechInputStartedEvent?.Invoke(this, true);
|
|
}
|
|
|
|
private void onPartialTranscriptionChanged(object sender, string partialTranscription)
|
|
{
|
|
this.LogIfInDebugMode($"OnPartialTranscriptionChanged: {partialTranscription}");
|
|
|
|
this.PartialTranscription = partialTranscription;
|
|
}
|
|
|
|
private void onFullTranscriptionChanged(object sender, string fullTranscription)
|
|
{
|
|
this.LogIfInDebugMode($"OnFullTranscriptionChanged: {fullTranscription}");
|
|
|
|
this.FullTranscription = fullTranscription;
|
|
}
|
|
|
|
private void onUserSpeechInputEnded(object sender, string fullTranscription)
|
|
{
|
|
this.LogIfInDebugMode("OnUserSpeechInputEnded");
|
|
|
|
this.OnUserSpeechInputEndedEvent?.Invoke(this, fullTranscription);
|
|
|
|
this.sendIntentRequest(fullTranscription);
|
|
}
|
|
|
|
private void onLastRecoErrorChanged(object sender, string errorText)
|
|
{
|
|
this.handleError(sender, errorText);
|
|
}
|
|
|
|
private void onSpeechOutputStarted(object sender, bool dummy)
|
|
{
|
|
this.OnSpeechOutputStartedEvent?.Invoke(this, dummy);
|
|
}
|
|
|
|
private void onSpeechOutputEnded(object sender, bool dummy)
|
|
{
|
|
this.OnSpeechOutputEndedEvent?.Invoke(this, dummy);
|
|
}
|
|
|
|
private void onSynthErrorChanged(object sender, string errorText)
|
|
{
|
|
this.handleError(sender, errorText);
|
|
}
|
|
|
|
private void onLastBotReplyChanged(object sender, string replyText)
|
|
{
|
|
this.processBotReply(replyText);
|
|
}
|
|
|
|
private void onLastBotErrorChanged(object sender, string errorText)
|
|
{
|
|
this.handleError(sender, errorText);
|
|
}
|
|
|
|
#endregion
|
|
|
|
#region Public Events
|
|
|
|
// Speech Recognition
|
|
public event EventHandler<object> OnUserSpeechInputStartedEvent;
|
|
public event EventHandler<string> OnUserSpeechInputEndedEvent;
|
|
|
|
// Speech Synthesis
|
|
public event EventHandler<bool> OnSpeechOutputStartedEvent;
|
|
public event EventHandler<bool> OnSpeechOutputEndedEvent;
|
|
|
|
// Intent Recognition
|
|
public event EventHandler<bool> OnIntentRecognitionInitiatedEvent;
|
|
public event EventHandler<string> OnIntentRecognitionSucceededEvent;
|
|
public event EventHandler<bool> OnIntentRecognitionFailedEvent;
|
|
|
|
// Error Handling
|
|
public event EventHandler<string> OnViaggioAIErrorEvent;
|
|
|
|
#endregion
|
|
|
|
#region Public Functions
|
|
|
|
public void StartIntentRecognition(Dictionary<string, string> possibleIntents)
|
|
{
|
|
this.playAudioIfEnabled(this.startedListeningClip);
|
|
|
|
this.speechRecognitionService.StartListeningOnceAsync();
|
|
|
|
this.PendingRequestDataModel = new RequestDataModel() { PossibleIntents = possibleIntents };
|
|
this.PartialTranscription = null;
|
|
this.FullTranscription = null;
|
|
|
|
this.ViaggioAIState = EViaggioAIState.Listening;
|
|
}
|
|
|
|
public void SetSpeechRecognitionLanguage(string languageCode)
|
|
{
|
|
this.speechRecognitionService.SetRecognitionLanguage(languageCode);
|
|
}
|
|
|
|
public void StopIntentRecognition()
|
|
{
|
|
this.PendingRequestDataModel = null;
|
|
this.PartialTranscription = null;
|
|
this.FullTranscription = null;
|
|
|
|
this.speechRecognitionService.StopListening();
|
|
this.openAIServices.Stop();
|
|
|
|
this.ViaggioAIState = EViaggioAIState.Idle;
|
|
}
|
|
|
|
public void SynthesizeText(string text, string languageCode, string voiceNameOverride = null)
|
|
{
|
|
this.speechSynthesizerService.Synthesize(text, languageCode, voiceNameOverride);
|
|
}
|
|
|
|
public void LogIfInDebugMode(string message)
|
|
{
|
|
if (!this.debugModeIsActive)
|
|
{
|
|
return;
|
|
}
|
|
|
|
Debug.Log($"(ViaggioAI) => {message}");
|
|
}
|
|
|
|
#endregion
|
|
|
|
#region Private Functions
|
|
|
|
private async void initWithDelay()
|
|
{
|
|
this.ViaggioAIState = EViaggioAIState.StartingUp;
|
|
|
|
this.debugContent.SetActive(this.debugModeIsActive);
|
|
|
|
await Task.Delay(TimeSpan.FromSeconds(this.initDelay));
|
|
|
|
this.servicesContainer.SetActive(true);
|
|
|
|
this.ViaggioAIState = EViaggioAIState.Idle;
|
|
}
|
|
|
|
private void sendIntentRequest(string text)
|
|
{
|
|
this.PendingRequestDataModel.InputText = text;
|
|
|
|
string requestText = this.PendingRequestDataModel.GetRequestText();
|
|
this.openAIServices.SendTextToBot(requestText);
|
|
|
|
this.LogIfInDebugMode("Sending intent request to ChatBot");
|
|
|
|
this.ViaggioAIState = EViaggioAIState.WaitingForIntentRecognition;
|
|
this.playAudioIfEnabled(this.waitingForIntentRecognitionClip);
|
|
|
|
this.OnIntentRecognitionInitiatedEvent?.Invoke(this, true);
|
|
}
|
|
|
|
private void processBotReply(string reply)
|
|
{
|
|
if (this.PendingRequestDataModel == null)
|
|
{
|
|
this.LogIfInDebugMode($"Ignoring incoming bot reply because intent recognition was stopped.");
|
|
return;
|
|
}
|
|
|
|
if (this.PendingRequestDataModel.PossibleIntents.ContainsKey(reply))
|
|
{
|
|
this.LogIfInDebugMode($"Recognition succeeded, Key = {reply}");
|
|
|
|
this.OnIntentRecognitionSucceededEvent?.Invoke(this, reply);
|
|
}
|
|
else
|
|
{
|
|
this.LogIfInDebugMode($"Recognition failed, Reply = {reply}");
|
|
|
|
this.OnIntentRecognitionFailedEvent?.Invoke(this, false);
|
|
}
|
|
|
|
this.ViaggioAIState = EViaggioAIState.Idle;
|
|
this.playAudioIfEnabled(this.intentRecognitionCompletedClip);
|
|
}
|
|
|
|
private void playAudioIfEnabled(AudioClip clip)
|
|
{
|
|
if (!this.soundEffectsEnabled)
|
|
{
|
|
return;
|
|
}
|
|
|
|
this.audioSource.PlayOneShot(clip);
|
|
}
|
|
|
|
private void handleError(object sender, string errorText)
|
|
{
|
|
string logText = $"{sender} Error: {errorText}";
|
|
|
|
Debug.LogError(logText);
|
|
this.OnViaggioAIErrorEvent?.Invoke(this, logText);
|
|
}
|
|
|
|
#endregion
|
|
|
|
}
|