390 lines
11 KiB
C#
390 lines
11 KiB
C#
using System;
|
|
using System.Threading.Tasks;
|
|
using UnityEngine;
|
|
|
|
|
|
#region Enums
|
|
|
|
public enum EConversationState
|
|
{
|
|
NotStarted = 0,
|
|
Running = 10,
|
|
OutroText = 20,
|
|
Ended = 30,
|
|
}
|
|
|
|
public enum EBotState
|
|
{
|
|
Idle = 0,
|
|
Thinking = 10,
|
|
Speaking = 20,
|
|
}
|
|
|
|
#endregion
|
|
|
|
public class ConversationService : MonoBehaviour
|
|
{
|
|
|
|
#region Inspector Properties
|
|
|
|
[Header("Config Values")]
|
|
[SerializeField]
|
|
private bool debugModeIsActive;
|
|
|
|
[Header("Scene Objects")]
|
|
[SerializeField]
|
|
private OpenAIServices openAIServices_Conversation;
|
|
|
|
#endregion
|
|
|
|
#region Public Properties
|
|
|
|
#region ConversationState
|
|
|
|
private EConversationState _conversationState = EConversationState.NotStarted;
|
|
|
|
public EConversationState ConversationState
|
|
{
|
|
get { return this._conversationState; }
|
|
set
|
|
{
|
|
if (value != this._conversationState)
|
|
{
|
|
this.logIfInDebugMode($"ConversationState changed, new value= {value}");
|
|
|
|
this._conversationState = value;
|
|
|
|
this.OnConversationStateChangedEvent?.Invoke(this, value);
|
|
}
|
|
}
|
|
}
|
|
|
|
public event EventHandler<EConversationState> OnConversationStateChangedEvent;
|
|
|
|
#endregion
|
|
|
|
#region BotState
|
|
|
|
private EBotState _botState = EBotState.Idle;
|
|
|
|
public EBotState BotState
|
|
{
|
|
get { return this._botState; }
|
|
set
|
|
{
|
|
if (value != this._botState)
|
|
{
|
|
this.logIfInDebugMode($"BotState changed, new value= {value}");
|
|
|
|
this._botState = value;
|
|
|
|
this.OnBotStateChangedEvent?.Invoke(this, value);
|
|
}
|
|
}
|
|
}
|
|
|
|
public event EventHandler<EBotState> OnBotStateChangedEvent;
|
|
|
|
#endregion
|
|
|
|
#endregion
|
|
|
|
#region Private Properties
|
|
|
|
private ViaggioAIManager vaim { get { return ViaggioAIManager.Instance; } }
|
|
|
|
private string languageCode;
|
|
private string instructions;
|
|
private string context;
|
|
private int maxRoundTrips;
|
|
private float maxSilenceBeforeTimeOut;
|
|
private string outroText;
|
|
|
|
private int currentRoundTrips;
|
|
private float currentSilenceDuratation;
|
|
|
|
#endregion
|
|
|
|
#region Framework Functions
|
|
|
|
void OnEnable()
|
|
{
|
|
this.subscribeToEvents();
|
|
}
|
|
|
|
void Update()
|
|
{
|
|
this.checkForTimeout();
|
|
}
|
|
|
|
void OnDisable()
|
|
{
|
|
this.unsubscribeFromEvents();
|
|
}
|
|
|
|
#endregion
|
|
|
|
#region Events
|
|
|
|
private async void subscribeToEvents()
|
|
{
|
|
while (vaim == null)
|
|
{
|
|
await System.Threading.Tasks.Task.Delay(10);
|
|
}
|
|
|
|
vaim.SpeechRecognitionService.OnPartialTranscriptionChangedEvent += this.onPartialTranscriptionChanged;
|
|
vaim.SpeechRecognitionService.OnFullTranscriptionChangedEvent += this.onFullTranscriptionChangedEvent;
|
|
|
|
vaim.SpeechSynthesizerService.OnSpeechOutputEndedEvent += this.onSpeechOutputEnded;
|
|
|
|
this.openAIServices_Conversation.OnLastBotErrorChangedEvent += this.onLastBotErrorChanged;
|
|
this.openAIServices_Conversation.OnLastBotReplyChangedEvent += this.onLastBotReplyChanged;
|
|
}
|
|
|
|
private void unsubscribeFromEvents()
|
|
{
|
|
if (vaim?.SpeechRecognitionService != null)
|
|
{
|
|
vaim.SpeechRecognitionService.OnPartialTranscriptionChangedEvent -= this.onPartialTranscriptionChanged;
|
|
vaim.SpeechRecognitionService.OnFullTranscriptionChangedEvent -= this.onFullTranscriptionChangedEvent;
|
|
}
|
|
|
|
if (vaim?.SpeechSynthesizerService != null)
|
|
{
|
|
vaim.SpeechSynthesizerService.OnSpeechOutputEndedEvent -= this.onSpeechOutputEnded;
|
|
}
|
|
|
|
if (this?.openAIServices_Conversation != null)
|
|
{
|
|
this.openAIServices_Conversation.OnLastBotErrorChangedEvent -= this.onLastBotErrorChanged;
|
|
this.openAIServices_Conversation.OnLastBotReplyChangedEvent -= this.onLastBotReplyChanged;
|
|
}
|
|
}
|
|
|
|
private void onLastBotErrorChanged(object sender, string botError)
|
|
{
|
|
if (vaim.ViaggioAIState != EViaggioAIState.Conversation)
|
|
{
|
|
return;
|
|
}
|
|
this.OnConversationErrorEvent?.Invoke(this, $"Conversation ended because of BotError: {botError}");
|
|
|
|
this.ConversationState = EConversationState.Ended;
|
|
vaim.StopConversation();
|
|
}
|
|
|
|
private void onLastBotReplyChanged(object sender, string botReply)
|
|
{
|
|
if (vaim.ViaggioAIState != EViaggioAIState.Conversation)
|
|
{
|
|
return;
|
|
}
|
|
if (this.ConversationState != EConversationState.Running)
|
|
{
|
|
ViaggioAIManager.Instance.LogIfInDebugMode($"Got Bot Reply in wrong conversation state ({this.ConversationState}). Ignoring");
|
|
return;
|
|
}
|
|
|
|
this.handleBotReply(botReply);
|
|
}
|
|
|
|
private void onPartialTranscriptionChanged(object sender, string e)
|
|
{
|
|
if (vaim.ViaggioAIState != EViaggioAIState.Conversation)
|
|
{
|
|
return;
|
|
}
|
|
|
|
this.currentSilenceDuratation = 0;
|
|
}
|
|
|
|
private void onFullTranscriptionChangedEvent(object sender, string userInput)
|
|
{
|
|
if (vaim.ViaggioAIState != EViaggioAIState.Conversation)
|
|
{
|
|
return;
|
|
}
|
|
if (this.ConversationState != EConversationState.Running)
|
|
{
|
|
ViaggioAIManager.Instance.LogIfInDebugMode($"Got User Input in wrong conversation state ({this.ConversationState}). Ignoring");
|
|
return;
|
|
}
|
|
|
|
this.currentSilenceDuratation = 0;
|
|
|
|
this.handleUserInput(userInput);
|
|
}
|
|
|
|
private void onSpeechOutputEnded(object sender, bool e)
|
|
{
|
|
if (this.BotState != EBotState.Speaking)
|
|
{
|
|
return;
|
|
}
|
|
|
|
this.BotState = EBotState.Idle;
|
|
}
|
|
|
|
#endregion
|
|
|
|
#region Public Events
|
|
|
|
public event EventHandler<string> OnConversationInitiatedEvent;
|
|
public event EventHandler<string> OnConversationErrorEvent;
|
|
public event EventHandler<string> OnConversationEndedEvent;
|
|
|
|
#endregion
|
|
|
|
#region Public Functions
|
|
|
|
public async void StartConversation(string _languageCode, string _instructions, string _context, int _maxRoundtrips, float _maxSilenceBeforeTimeOut, string _outroText)
|
|
{
|
|
this.logIfInDebugMode($"Starting new conversation");
|
|
|
|
this.languageCode = _languageCode;
|
|
this.instructions = _instructions;
|
|
this.context = _context;
|
|
this.maxRoundTrips = _maxRoundtrips;
|
|
this.maxSilenceBeforeTimeOut = _maxSilenceBeforeTimeOut;
|
|
this.outroText = _outroText;
|
|
|
|
string fullInitialPrompt = this.instructions + this.context;
|
|
|
|
await this.sendTextToBot(fullInitialPrompt);
|
|
|
|
this.ConversationState = EConversationState.Running;
|
|
this.OnConversationInitiatedEvent?.Invoke(this, $"Conversation started");
|
|
|
|
this.currentRoundTrips = 0;
|
|
this.currentSilenceDuratation = 0;
|
|
}
|
|
|
|
public void StopConversation()
|
|
{
|
|
vaim.SpeechRecognitionService.StopListening();
|
|
this.openAIServices_Conversation.Stop();
|
|
|
|
this.ConversationState = EConversationState.NotStarted;
|
|
}
|
|
|
|
#endregion
|
|
|
|
#region Private Functions
|
|
|
|
private async Task sendTextToBot(string newText)
|
|
{
|
|
while (!this.openAIServices_Conversation.IsInitialized)
|
|
{
|
|
await Task.Delay(200);
|
|
|
|
if (this == null)
|
|
{
|
|
return;
|
|
}
|
|
|
|
this.logIfInDebugMode($"openAIServices_Conversation not initialized yet, wait before sending text...");
|
|
}
|
|
|
|
this.openAIServices_Conversation.SendTextToBot(newText);
|
|
|
|
this.BotState = EBotState.Thinking;
|
|
|
|
this.currentRoundTrips++;
|
|
|
|
this.logIfInDebugMode($"Sent text to bot: {newText}. Current Round Trips = {this.currentRoundTrips}");
|
|
}
|
|
|
|
private void handleBotReply(string botReply)
|
|
{
|
|
if (vaim.ViaggioAIState != EViaggioAIState.Conversation)
|
|
{
|
|
return;
|
|
}
|
|
|
|
string textToSynthesize = botReply;
|
|
|
|
if (this.currentRoundTrips == this.maxRoundTrips)
|
|
{
|
|
this.logIfInDebugMode($"Got bot reply {botReply} and reached max roundtrips {this.maxRoundTrips}");
|
|
|
|
textToSynthesize += this.outroText;
|
|
this.ConversationState = EConversationState.Ended;
|
|
|
|
this.stopConversation(false);
|
|
}
|
|
else
|
|
{
|
|
this.logIfInDebugMode($"Got bot reply {botReply} and started listening again.");
|
|
vaim.SpeechRecognitionService.StartListeningOnceAsync();
|
|
}
|
|
|
|
vaim.SpeechSynthesizerService.Synthesize(textToSynthesize, this.languageCode);
|
|
|
|
this.BotState = EBotState.Speaking;
|
|
}
|
|
|
|
private async void handleUserInput(string userInput)
|
|
{
|
|
if (vaim.SpeechSynthesizerService.SpeechSynthesizerState == ESpeechSynthesizerState.Speaking)
|
|
{
|
|
await vaim.SpeechSynthesizerService.Stop();
|
|
}
|
|
|
|
this.sendTextToBot(userInput);
|
|
}
|
|
|
|
private void checkForTimeout()
|
|
{
|
|
if (this.ConversationState != EConversationState.Running)
|
|
{
|
|
return;
|
|
}
|
|
|
|
if (this.BotState == EBotState.Speaking || this.BotState == EBotState.Thinking)
|
|
{
|
|
// Can't timeout while bot is speaking or thinking
|
|
return;
|
|
}
|
|
|
|
this.currentSilenceDuratation += Time.deltaTime;
|
|
Debug.Log($"Increased currentSilenceDuratation ({this.currentSilenceDuratation}), ConvState ={this.ConversationState}, BotState={this.BotState}");
|
|
|
|
if (this.currentSilenceDuratation > this.maxSilenceBeforeTimeOut)
|
|
{
|
|
this.logIfInDebugMode("Conversation timed out -> Stopping");
|
|
|
|
this.stopConversation(true);
|
|
}
|
|
}
|
|
|
|
private void stopConversation(bool timeOut)
|
|
{
|
|
this.logIfInDebugMode("Stopping conversation");
|
|
|
|
this.OnConversationEndedEvent?.Invoke(this, $"Conversation stopped");
|
|
|
|
if (timeOut)
|
|
{
|
|
vaim.SpeechSynthesizerService.Synthesize(this.outroText, this.languageCode);
|
|
}
|
|
|
|
this.ConversationState = EConversationState.Ended;
|
|
|
|
vaim.StopConversation();
|
|
}
|
|
|
|
private void logIfInDebugMode(string message)
|
|
{
|
|
if (!this.debugModeIsActive)
|
|
{
|
|
return;
|
|
}
|
|
|
|
Debug.Log($"(ViaggioAI ConversationService) => {message}");
|
|
}
|
|
|
|
#endregion
|
|
|
|
}
|