Unity 在 Microsoft Azure 文本转语音处理输入时冻结 2 秒
Unity freezes for 2 seconds while Microsoft Azure Text-To-Speech processes input
我正在使用 Microsoft Azure Text To Speech with Unity。它的工作原理是当按下一个按钮从文本输入中产生语音时,整个应用程序会冻结大约 2 秒,然后输出声音,然后游戏恢复正常。我假设此冻结是由于 Azure 正在处理 TTS?下面是代码。
public void ButtonClick()
{
// Creates an instance of a speech config with specified subscription key and service region.
// Replace with your own subscription key and service region (e.g., "westus").
var config = SpeechConfig.FromSubscription("[redacted]", "westus");
// Creates a speech synthesizer.
// Make sure to dispose the synthesizer after use!
using (var synthsizer = new SpeechSynthesizer(config, null))
{
lock (threadLocker)
{
waitingForSpeak = true;
}
// Starts speech synthesis, and returns after a single utterance is synthesized.
var result = synthsizer.SpeakTextAsync(inputField.text).Result;
// Checks result.
string newMessage = string.Empty;
if (result.Reason == ResultReason.SynthesizingAudioCompleted)
{
// Since native playback is not yet supported on Unity yet (currently only supported on Windows/Linux Desktop),
// use the Unity API to play audio here as a short term solution.
// Native playback support will be added in the future release.
var sampleCount = result.AudioData.Length / 2;
var audioData = new float[sampleCount];
for (var i = 0; i < sampleCount; ++i)
{
audioData[i] = (short)(result.AudioData[i * 2 + 1] << 8 | result.AudioData[i * 2]) / 32768.0F;
}
// The default output audio format is 16K 16bit mono
var audioClip = AudioClip.Create("SynthesizedAudio", sampleCount, 1, 16000, false);
audioClip.SetData(audioData, 0);
audioSource.clip = audioClip;
audioSource.Play();
newMessage = "Speech synthesis succeeded!";
}
else if (result.Reason == ResultReason.Canceled)
{
var cancellation = SpeechSynthesisCancellationDetails.FromResult(result);
newMessage = $"CANCELED:\nReason=[{cancellation.Reason}]\nErrorDetails=[{cancellation.ErrorDetails}]\nDid you update the subscription info?";
}
lock (threadLocker)
{
message = newMessage;
waitingForSpeak = false;
}
}
}
void Start()
{
if (inputField == null)
{
message = "inputField property is null! Assign a UI InputField element to it.";
UnityEngine.Debug.LogError(message);
}
else if (speakButton == null)
{
message = "speakButton property is null! Assign a UI Button to it.";
UnityEngine.Debug.LogError(message);
}
else
{
// Continue with normal initialization, Text, InputField and Button objects are present.
inputField.text = "Enter text you wish spoken here.";
message = "Click button to synthesize speech";
speakButton.onClick.AddListener(ButtonClick);
//ButtonClick();
}
}
我希望 TTS 在按下 TTS 按钮时理想情况下不会冻结整个应用程序,因此在按下 TTS 按钮时应用程序可用。任何帮助将不胜感激。
当您执行 synthesizer.SpeakTextAsync(inputField.text).Result;
时,它会阻塞直到任务完成。相反,尝试调用 Task<SpeechSynthesisResult> task = synthesizer.SpeakTextAsync(inputField.text);
然后设置一个协同程序,直到 task.IsCompleted()
为真然后执行代码中的其余过程
这是帮助您入门的部分(未经测试)解决方案。我将变量从 synthsizer 更改为 synthesizer,并且删除了所有锁定,因为协程在主线程上按顺序发生,因此不需要锁定:
public void ButtonClick()
{
if (waitingForSpeak) return;
waitingForSpeak = true;
// Creates an instance of a speech config with specified subscription
// key and service region.
// Replace with your own subscription key and service region (e.g., "westus").
SpeechConfig config = SpeechConfig.FromSubscription("[redacted]", "westus");
// Creates a speech synthesizer.
// Make sure to dispose the synthesizer after use!
SpeechSynthesizer synthesizer = new SpeechSynthesizer(config, null));
// Starts speech synthesis, and returns after a single utterance is synthesized.
Task<SpeechSynthesisResult> task = synthesizer.SpeakTextAsync(inputField.text);
StartCoroutine(CheckSynthesizer(task, config, synthesizer));
}
private IEnumerator CheckSynthesizer(Task<SpeechSynthesisResult> task,
SpeechConfig config,
SpeechSynthesizer synthesizer)
{
yield return new WaitUntil(() => task.IsCompleted());
var result = task.Result;
// Checks result.
string newMessage = string.Empty;
if (result.Reason == ResultReason.SynthesizingAudioCompleted)
{
// Since native playback is not yet supported on Unity yet (currently
// only supported on Windows/Linux Desktop),
// use the Unity API to play audio here as a short term solution.
// Native playback support will be added in the future release.
var sampleCount = result.AudioData.Length / 2;
var audioData = new float[sampleCount];
for (var i = 0; i < sampleCount; ++i)
{
audioData[i] = (short)(result.AudioData[i * 2 + 1] << 8
| result.AudioData[i * 2]) / 32768.0F;
}
// The default output audio format is 16K 16bit mono
var audioClip = AudioClip.Create("SynthesizedAudio", sampleCount,
1, 16000, false);
audioClip.SetData(audioData, 0);
audioSource.clip = audioClip;
audioSource.Play();
newMessage = "Speech synthesis succeeded!";
}
else if (result.Reason == ResultReason.Canceled)
{
var cancellation = SpeechSynthesisCancellationDetails.FromResult(result);
newMessage = $"CANCELED:\nReason=[{cancellation.Reason}]\n"+
$"ErrorDetails=[{cancellation.ErrorDetails}]\n"+"
"Did you update the subscription info?";
}
message = newMessage;
waitingForSpeak = false;
synthesizer.Dispose();
}
void Start()
{
if (inputField == null)
{
message = "inputField property is null! Assign a UI InputField element to it.";
UnityEngine.Debug.LogError(message);
}
else if (speakButton == null)
{
message = "speakButton property is null! Assign a UI Button to it.";
UnityEngine.Debug.LogError(message);
}
else
{
// Continue with normal initialization, Text, InputField and Button
// objects are present.
inputField.text = "Enter text you wish spoken here.";
message = "Click button to synthesize speech";
speakButton.onClick.AddListener(ButtonClick);
//ButtonClick();
}
}
作为对评论的回应,这是您可以尝试的替代方法的开始,它将数据复制包装在任务中并在该任务完成之前产生:
private IEnumerator CheckSynthesizer(Task<SpeechSynthesisResult> task,
SpeechConfig config,
SpeechSynthesizer synthesizer)
{
yield return new WaitUntil(() => task.IsCompleted());
var result = task.Result;
// Checks result.
string newMessage = string.Empty;
if (result.Reason == ResultReason.SynthesizingAudioCompleted)
{
// Since native playback is not yet supported on Unity yet (currently
// only supported on Windows/Linux Desktop),
// use the Unity API to play audio here as a short term solution.
// Native playback support will be added in the future release.
Task copyTask = Task.Factory.StartNew( () =>
{
var sampleCount = result.AudioData.Length / 2;
var audioData = new float[sampleCount];
for (var i = 0; i < sampleCount; ++i)
{
audioData[i] = (short)(result.AudioData[i * 2 + 1] << 8
| result.AudioData[i * 2]) / 32768.0F;
}
// The default output audio format is 16K 16bit mono
var audioClip = AudioClip.Create("SynthesizedAudio", sampleCount,
1, 16000, false);
audioClip.SetData(audioData, 0);
audioSource.clip = audioClip;
audioSource.Play();
});
yield return new WaitUntil(() => copyTask.IsCompleted());
newMessage = "Speech synthesis succeeded!";
}
else if (result.Reason == ResultReason.Canceled)
{
var cancellation = SpeechSynthesisCancellationDetails.FromResult(result);
newMessage = $"CANCELED:\nReason=[{cancellation.Reason}]\n"+
$"ErrorDetails=[{cancellation.ErrorDetails}]\n"+"
"Did you update the subscription info?";
}
message = newMessage;
waitingForSpeak = false;
synthesizer.Dispose();
}
我正在使用 Microsoft Azure Text To Speech with Unity。它的工作原理是当按下一个按钮从文本输入中产生语音时,整个应用程序会冻结大约 2 秒,然后输出声音,然后游戏恢复正常。我假设此冻结是由于 Azure 正在处理 TTS?下面是代码。
public void ButtonClick()
{
// Creates an instance of a speech config with specified subscription key and service region.
// Replace with your own subscription key and service region (e.g., "westus").
var config = SpeechConfig.FromSubscription("[redacted]", "westus");
// Creates a speech synthesizer.
// Make sure to dispose the synthesizer after use!
using (var synthsizer = new SpeechSynthesizer(config, null))
{
lock (threadLocker)
{
waitingForSpeak = true;
}
// Starts speech synthesis, and returns after a single utterance is synthesized.
var result = synthsizer.SpeakTextAsync(inputField.text).Result;
// Checks result.
string newMessage = string.Empty;
if (result.Reason == ResultReason.SynthesizingAudioCompleted)
{
// Since native playback is not yet supported on Unity yet (currently only supported on Windows/Linux Desktop),
// use the Unity API to play audio here as a short term solution.
// Native playback support will be added in the future release.
var sampleCount = result.AudioData.Length / 2;
var audioData = new float[sampleCount];
for (var i = 0; i < sampleCount; ++i)
{
audioData[i] = (short)(result.AudioData[i * 2 + 1] << 8 | result.AudioData[i * 2]) / 32768.0F;
}
// The default output audio format is 16K 16bit mono
var audioClip = AudioClip.Create("SynthesizedAudio", sampleCount, 1, 16000, false);
audioClip.SetData(audioData, 0);
audioSource.clip = audioClip;
audioSource.Play();
newMessage = "Speech synthesis succeeded!";
}
else if (result.Reason == ResultReason.Canceled)
{
var cancellation = SpeechSynthesisCancellationDetails.FromResult(result);
newMessage = $"CANCELED:\nReason=[{cancellation.Reason}]\nErrorDetails=[{cancellation.ErrorDetails}]\nDid you update the subscription info?";
}
lock (threadLocker)
{
message = newMessage;
waitingForSpeak = false;
}
}
}
void Start()
{
if (inputField == null)
{
message = "inputField property is null! Assign a UI InputField element to it.";
UnityEngine.Debug.LogError(message);
}
else if (speakButton == null)
{
message = "speakButton property is null! Assign a UI Button to it.";
UnityEngine.Debug.LogError(message);
}
else
{
// Continue with normal initialization, Text, InputField and Button objects are present.
inputField.text = "Enter text you wish spoken here.";
message = "Click button to synthesize speech";
speakButton.onClick.AddListener(ButtonClick);
//ButtonClick();
}
}
我希望 TTS 在按下 TTS 按钮时理想情况下不会冻结整个应用程序,因此在按下 TTS 按钮时应用程序可用。任何帮助将不胜感激。
当您执行 synthesizer.SpeakTextAsync(inputField.text).Result;
时,它会阻塞直到任务完成。相反,尝试调用 Task<SpeechSynthesisResult> task = synthesizer.SpeakTextAsync(inputField.text);
然后设置一个协同程序,直到 task.IsCompleted()
为真然后执行代码中的其余过程
这是帮助您入门的部分(未经测试)解决方案。我将变量从 synthsizer 更改为 synthesizer,并且删除了所有锁定,因为协程在主线程上按顺序发生,因此不需要锁定:
public void ButtonClick()
{
if (waitingForSpeak) return;
waitingForSpeak = true;
// Creates an instance of a speech config with specified subscription
// key and service region.
// Replace with your own subscription key and service region (e.g., "westus").
SpeechConfig config = SpeechConfig.FromSubscription("[redacted]", "westus");
// Creates a speech synthesizer.
// Make sure to dispose the synthesizer after use!
SpeechSynthesizer synthesizer = new SpeechSynthesizer(config, null));
// Starts speech synthesis, and returns after a single utterance is synthesized.
Task<SpeechSynthesisResult> task = synthesizer.SpeakTextAsync(inputField.text);
StartCoroutine(CheckSynthesizer(task, config, synthesizer));
}
private IEnumerator CheckSynthesizer(Task<SpeechSynthesisResult> task,
SpeechConfig config,
SpeechSynthesizer synthesizer)
{
yield return new WaitUntil(() => task.IsCompleted());
var result = task.Result;
// Checks result.
string newMessage = string.Empty;
if (result.Reason == ResultReason.SynthesizingAudioCompleted)
{
// Since native playback is not yet supported on Unity yet (currently
// only supported on Windows/Linux Desktop),
// use the Unity API to play audio here as a short term solution.
// Native playback support will be added in the future release.
var sampleCount = result.AudioData.Length / 2;
var audioData = new float[sampleCount];
for (var i = 0; i < sampleCount; ++i)
{
audioData[i] = (short)(result.AudioData[i * 2 + 1] << 8
| result.AudioData[i * 2]) / 32768.0F;
}
// The default output audio format is 16K 16bit mono
var audioClip = AudioClip.Create("SynthesizedAudio", sampleCount,
1, 16000, false);
audioClip.SetData(audioData, 0);
audioSource.clip = audioClip;
audioSource.Play();
newMessage = "Speech synthesis succeeded!";
}
else if (result.Reason == ResultReason.Canceled)
{
var cancellation = SpeechSynthesisCancellationDetails.FromResult(result);
newMessage = $"CANCELED:\nReason=[{cancellation.Reason}]\n"+
$"ErrorDetails=[{cancellation.ErrorDetails}]\n"+"
"Did you update the subscription info?";
}
message = newMessage;
waitingForSpeak = false;
synthesizer.Dispose();
}
void Start()
{
if (inputField == null)
{
message = "inputField property is null! Assign a UI InputField element to it.";
UnityEngine.Debug.LogError(message);
}
else if (speakButton == null)
{
message = "speakButton property is null! Assign a UI Button to it.";
UnityEngine.Debug.LogError(message);
}
else
{
// Continue with normal initialization, Text, InputField and Button
// objects are present.
inputField.text = "Enter text you wish spoken here.";
message = "Click button to synthesize speech";
speakButton.onClick.AddListener(ButtonClick);
//ButtonClick();
}
}
作为对评论的回应,这是您可以尝试的替代方法的开始,它将数据复制包装在任务中并在该任务完成之前产生:
private IEnumerator CheckSynthesizer(Task<SpeechSynthesisResult> task,
SpeechConfig config,
SpeechSynthesizer synthesizer)
{
yield return new WaitUntil(() => task.IsCompleted());
var result = task.Result;
// Checks result.
string newMessage = string.Empty;
if (result.Reason == ResultReason.SynthesizingAudioCompleted)
{
// Since native playback is not yet supported on Unity yet (currently
// only supported on Windows/Linux Desktop),
// use the Unity API to play audio here as a short term solution.
// Native playback support will be added in the future release.
Task copyTask = Task.Factory.StartNew( () =>
{
var sampleCount = result.AudioData.Length / 2;
var audioData = new float[sampleCount];
for (var i = 0; i < sampleCount; ++i)
{
audioData[i] = (short)(result.AudioData[i * 2 + 1] << 8
| result.AudioData[i * 2]) / 32768.0F;
}
// The default output audio format is 16K 16bit mono
var audioClip = AudioClip.Create("SynthesizedAudio", sampleCount,
1, 16000, false);
audioClip.SetData(audioData, 0);
audioSource.clip = audioClip;
audioSource.Play();
});
yield return new WaitUntil(() => copyTask.IsCompleted());
newMessage = "Speech synthesis succeeded!";
}
else if (result.Reason == ResultReason.Canceled)
{
var cancellation = SpeechSynthesisCancellationDetails.FromResult(result);
newMessage = $"CANCELED:\nReason=[{cancellation.Reason}]\n"+
$"ErrorDetails=[{cancellation.ErrorDetails}]\n"+"
"Did you update the subscription info?";
}
message = newMessage;
waitingForSpeak = false;
synthesizer.Dispose();
}