2
0
mirror of https://github.com/9ParsonsB/Pulsar.git synced 2025-04-05 17:39:39 -04:00
pulsar/ObservatoryHerald/AzureSpeechManager.cs
F K 1727e5fb2a
Preserve inner XML when setting a voice style (#37)
The former method lost any XML markup (such as say-as tags, etc) embedded within the voice tag. In the future if support for setting voice speed is added, it can be inserted here easily as well.
2021-11-20 10:51:11 -03:30

231 lines
7.3 KiB
C#

using System;
using System.IO;
using System.Linq;
using System.Collections.Generic;
using System.Net.Http;
using System.Xml;
using Microsoft.CognitiveServices.Speech;
using System.Collections.ObjectModel;
using Observatory.Framework;
namespace Observatory.Herald
{
internal class VoiceSpeechManager
{
private string azureKey;
private DirectoryInfo cacheLocation;
private SpeechConfig speechConfig;
private SpeechSynthesizer speech;
internal VoiceSpeechManager(HeraldSettings settings, HttpClient httpClient)
{
cacheLocation = new(Environment.GetFolderPath(Environment.SpecialFolder.ApplicationData)
+ $"{Path.DirectorySeparatorChar}ObservatoryCore{Path.DirectorySeparatorChar}ObservatoryHerald{Path.DirectorySeparatorChar}");
if (!Directory.Exists(cacheLocation.FullName))
{
Directory.CreateDirectory(cacheLocation.FullName);
}
try
{
azureKey = GetAzureKey(settings, httpClient);
}
catch (Exception ex)
{
throw new PluginException("Herald", "Unable to retrieve Azure API key.", ex);
}
try
{
speechConfig = SpeechConfig.FromSubscription(azureKey, "eastus");
}
catch (Exception ex)
{
throw new PluginException("Herald", "Error retrieving Azure account details.", ex);
}
speech = new(speechConfig, null);
settings.Voices = PopulateVoiceSettingOptions();
}
private Dictionary<string, object> PopulateVoiceSettingOptions()
{
ReadOnlyCollection<VoiceInfo> voices;
try
{
voices = speech.GetVoicesAsync().Result.Voices;
}
catch (Exception ex)
{
throw new PluginException("Herald", "Unable to retrieve voice list from Azure.", ex);
}
var voiceOptions = new Dictionary<string, object>();
var englishSpeakingVoices = from v in voices
where v.Locale.StartsWith("en-")
select v;
foreach (var voice in englishSpeakingVoices)
{
string demonym = GetDemonymFromLocale(voice.Locale);
voiceOptions.Add(
$"{demonym} - {voice.LocalName}",
voice);
foreach (var style in voice.StyleList)
{
if (!string.IsNullOrWhiteSpace(style))
voiceOptions.Add(
$"{demonym} - {voice.LocalName} - {style}",
voice);
}
}
return voiceOptions;
}
private static string GetDemonymFromLocale(string locale)
{
string demonym;
switch (locale)
{
case "en-AU":
demonym = "Australian";
break;
case "en-CA":
demonym = "Canadian";
break;
case "en-GB":
demonym = "British";
break;
case "en-HK":
demonym = "Hong Konger";
break;
case "en-IE":
demonym = "Irish";
break;
case "en-IN":
demonym = "Indian";
break;
case "en-KE":
demonym = "Kenyan";
break;
case "en-NG":
demonym = "Nigerian";
break;
case "en-NZ":
demonym = "Kiwi";
break;
case "en-PH":
demonym = "Filipino";
break;
case "en-SG":
demonym = "Singaporean";
break;
case "en-TZ":
demonym = "Tanzanian";
break;
case "en-US":
demonym = "American";
break;
case "en-ZA":
demonym = "South African";
break;
default:
demonym = locale;
break;
}
return demonym;
}
internal string GetAudioFileFromSsml(string ssml, string voice, string style)
{
ssml = AddVoiceToSsml(ssml, voice, style);
string ssmlHash = FNV64(ssml).ToString("X");
string audioFile = cacheLocation + ssmlHash + ".wav";
if (!File.Exists(audioFile))
{
using var stream = RequestFromAzure(ssml);
stream.SaveToWaveFileAsync(audioFile).Wait();
}
return audioFile;
}
private static ulong FNV64(string data)
{
string lower_data = data.ToLower();
ulong hash = 0xcbf29ce484222325uL;
for (int i = 0; i < lower_data.Length; i++)
{
byte b = (byte)lower_data[i];
hash *= 1099511628211uL;
hash ^= b;
}
return hash;
}
private AudioDataStream RequestFromAzure(string ssml)
{
try
{
var result = speech.SpeakSsmlAsync(ssml).Result;
return AudioDataStream.FromResult(result);
}
catch (Exception ex)
{
throw new PluginException("Herald", "Unable to retrieve audio from Azure.", ex);
}
}
private static string AddVoiceToSsml(string ssml, string voiceName, string styleName)
{
XmlDocument ssmlDoc = new();
ssmlDoc.LoadXml(ssml);
var ssmlNamespace = ssmlDoc.DocumentElement.NamespaceURI;
XmlNamespaceManager ssmlNs = new(ssmlDoc.NameTable);
ssmlNs.AddNamespace("ssml", ssmlNamespace);
ssmlNs.AddNamespace("mstts", "http://www.w3.org/2001/mstts");
var voiceNode = ssmlDoc.SelectSingleNode("/ssml:speak/ssml:voice", ssmlNs);
voiceNode.Attributes.GetNamedItem("name").Value = voiceName;
if (!string.IsNullOrWhiteSpace(styleName))
{
var expressAsNode = ssmlDoc.CreateElement("express-as", "http://www.w3.org/2001/mstts");
expressAsNode.SetAttribute("style", styleName);
expressAsNode.InnerXml = voiceNode.InnerXml;
voiceNode.InnerXml = expressAsNode.OuterXml;
}
return ssmlDoc.OuterXml;
}
private static string GetAzureKey(HeraldSettings settings, HttpClient httpClient)
{
string azureKey;
if (string.IsNullOrWhiteSpace(settings.AzureAPIKeyOverride))
{
azureKey = httpClient.GetStringAsync("https://xjph.net/Observatory/ObservatoryHeraldAzureKey").Result;
}
else
{
azureKey = settings.AzureAPIKeyOverride;
}
return azureKey;
}
}
}