using System.Text;
using Microsoft.CognitiveServices.Speech;
using Microsoft.CognitiveServices.Speech.Audio;
using Azure;
using Azure.AI.OpenAI;
// This example requires environment variables named "AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT" and "AZURE_OPENAI_CHAT_DEPLOYMENT"
// Your endpoint should look like the following https://YOUR_OPEN_AI_RESOURCE_NAME.openai.azure.com/
string openAIKey = Environment.GetEnvironmentVariable("AZURE_OPENAI_API_KEY") ??
throw new ArgumentException("Missing AZURE_OPENAI_API_KEY");
string openAIEndpoint = Environment.GetEnvironmentVariable("AZURE_OPENAI_ENDPOINT") ??
throw new ArgumentException("Missing AZURE_OPENAI_ENDPOINT");
// Enter the deployment name you chose when you deployed the model.
string engine = Environment.GetEnvironmentVariable("AZURE_OPENAI_CHAT_DEPLOYMENT") ??
throw new ArgumentException("Missing AZURE_OPENAI_CHAT_DEPLOYMENT");
// This example requires environment variables named "SPEECH_KEY" and "SPEECH_REGION"
string speechKey = Environment.GetEnvironmentVariable("SPEECH_KEY") ??
throw new ArgumentException("Missing SPEECH_KEY");
string speechRegion = Environment.GetEnvironmentVariable("SPEECH_REGION") ??
throw new ArgumentException("Missing SPEECH_REGION");
// Sentence end symbols for splitting the response into sentences.
List<string> sentenceSaperators = new() { ".", "!", "?", ";", "。", "!", "?", ";", "\n" };
try
{
await ChatWithAzureOpenAI();
}
catch (Exception ex)
{
Console.WriteLine(ex);
}
// Prompts Azure OpenAI with a request and synthesizes the response.
async Task AskAzureOpenAI(string prompt)
{
object consoleLock = new();
var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion);
// The language of the voice that speaks.
speechConfig.SpeechSynthesisVoiceName = "en-US-JennyMultilingualNeural";
var audioOutputConfig = AudioConfig.FromDefaultSpeakerOutput();
using var speechSynthesizer = new SpeechSynthesizer(speechConfig, audioOutputConfig);
speechSynthesizer.Synthesizing += (sender, args) =>
{
lock (consoleLock)
{
Console.ForegroundColor = ConsoleColor.Yellow;
Console.Write($"[Audio]");
Console.ResetColor();
}
};
// Ask Azure OpenAI
OpenAIClient client = new(new Uri(openAIEndpoint), new AzureKeyCredential(openAIKey));
var completionsOptions = new ChatCompletionsOptions()
{
DeploymentName = engine,
Messages = { new ChatRequestUserMessage(prompt) },
MaxTokens = 100,
};
var responseStream = await client.GetChatCompletionsStreamingAsync(completionsOptions);
StringBuilder gptBuffer = new();
await foreach (var completionUpdate in responseStream)
{
var message = completionUpdate.ContentUpdate;
if (string.IsNullOrEmpty(message))
{
continue;
}
lock (consoleLock)
{
Console.ForegroundColor = ConsoleColor.DarkBlue;
Console.Write($"{message}");
Console.ResetColor();
}
gptBuffer.Append(message);
if (sentenceSaperators.Any(message.Contains))
{
var sentence = gptBuffer.ToString().Trim();
if (!string.IsNullOrEmpty(sentence))
{
await speechSynthesizer.SpeakTextAsync(sentence);
gptBuffer.Clear();
}
}
}
}
// Continuously listens for speech input to recognize and send as text to Azure OpenAI
async Task ChatWithAzureOpenAI()
{
// Should be the locale for the speaker's language.
var speechConfig = SpeechConfig.FromSubscription(speechKey, speechRegion);
speechConfig.SpeechRecognitionLanguage = "en-US";
using var audioConfig = AudioConfig.FromDefaultMicrophoneInput();
using var speechRecognizer = new SpeechRecognizer(speechConfig, audioConfig);
var conversationEnded = false;
while (!conversationEnded)
{
Console.WriteLine("Azure OpenAI is listening. Say 'Stop' or press Ctrl-Z to end the conversation.");
// Get audio from the microphone and then send it to the TTS service.
var speechRecognitionResult = await speechRecognizer.RecognizeOnceAsync();
switch (speechRecognitionResult.Reason)
{
case ResultReason.RecognizedSpeech:
if (speechRecognitionResult.Text == "Stop.")
{
Console.WriteLine("Conversation ended.");
conversationEnded = true;
}
else
{
Console.WriteLine($"Recognized speech: {speechRecognitionResult.Text}");
await AskAzureOpenAI(speechRecognitionResult.Text);
}
break;
case ResultReason.NoMatch:
Console.WriteLine($"No speech could be recognized: ");
break;
case ResultReason.Canceled:
var cancellationDetails = CancellationDetails.FromResult(speechRecognitionResult);
Console.WriteLine($"Speech Recognition canceled: {cancellationDetails.Reason}");
if (cancellationDetails.Reason == CancellationReason.Error)
{
Console.WriteLine($"Error details={cancellationDetails.ErrorDetails}");
}
break;
}
}
}
PS C:\dev\openai\csharp> dotnet run
Azure OpenAI is listening. Say 'Stop' or press Ctrl-Z to end the conversation.
Recognized speech:Make a comma separated list of all continents.
Azure OpenAI response:Africa, Antarctica, Asia, Australia, Europe, North America, South America
Speech synthesized to speaker for text [Africa, Antarctica, Asia, Australia, Europe, North America, South America]
Azure OpenAI is listening. Say 'Stop' or press Ctrl-Z to end the conversation.
Recognized speech: Make a comma separated list of 1 Astronomical observatory for each continent. A list should include each continent name in parentheses.
Azure OpenAI response:Mauna Kea Observatories (North America), La Silla Observatory (South America), Tenerife Observatory (Europe), Siding Spring Observatory (Australia), Beijing Xinglong Observatory (Asia), Naukluft Plateau Observatory (Africa), Rutherford Appleton Laboratory (Antarctica)
Speech synthesized to speaker for text [Mauna Kea Observatories (North America), La Silla Observatory (South America), Tenerife Observatory (Europe), Siding Spring Observatory (Australia), Beijing Xinglong Observatory (Asia), Naukluft Plateau Observatory (Africa), Rutherford Appleton Laboratory (Antarctica)]
Azure OpenAI is listening. Say 'Stop' or press Ctrl-Z to end the conversation.
Conversation ended.
PS C:\dev\openai\csharp>
import os
import azure.cognitiveservices.speech as speechsdk
from openai import AzureOpenAI
# This example requires environment variables named "AZURE_OPENAI_API_KEY", "AZURE_OPENAI_ENDPOINT" and "AZURE_OPENAI_CHAT_DEPLOYMENT"
# Your endpoint should look like the following https://YOUR_OPEN_AI_RESOURCE_NAME.openai.azure.com/
client = AzureOpenAI(
azure_endpoint=os.environ.get('AZURE_OPENAI_ENDPOINT'),
api_key=os.environ.get('AZURE_OPENAI_API_KEY'),
api_version="2023-05-15"
)
# This will correspond to the custom name you chose for your deployment when you deployed a model.
deployment_id=os.environ.get('AZURE_OPENAI_CHAT_DEPLOYMENT')
# This example requires environment variables named "SPEECH_KEY" and "SPEECH_REGION"
speech_config = speechsdk.SpeechConfig(subscription=os.environ.get('SPEECH_KEY'), region=os.environ.get('SPEECH_REGION'))
audio_output_config = speechsdk.audio.AudioOutputConfig(use_default_speaker=True)
audio_config = speechsdk.audio.AudioConfig(use_default_microphone=True)
# Should be the locale for the speaker's language.
speech_config.speech_recognition_language="en-US"
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config, audio_config=audio_config)
# The language of the voice that responds on behalf of Azure OpenAI.
speech_config.speech_synthesis_voice_name='en-US-JennyMultilingualNeural'
speech_synthesizer = speechsdk.SpeechSynthesizer(speech_config=speech_config, audio_config=audio_output_config)
# tts sentence end mark
tts_sentence_end = [ ".", "!", "?", ";", "。", "!", "?", ";", "\n" ]
# Prompts Azure OpenAI with a request and synthesizes the response.
def ask_azure_openai(prompt):
# Ask Azure OpenAI in streaming way
response = client.chat.completions.create(model=deployment_id, max_tokens=200, stream=True, messages=[
{"role": "user", "content": prompt}
])
collected_messages = []
last_tts_request = None
# iterate through the stream response stream
for chunk in response:
if len(chunk.choices) > 0:
chunk_message = chunk.choices[0].delta.content # extract the message
if chunk_message is not None:
collected_messages.append(chunk_message) # save the message
if chunk_message in tts_sentence_end: # sentence end found
text = ''.join(collected_messages).strip() # join the recieved message together to build a sentence
if text != '': # if sentence only have \n or space, we could skip
print(f"Speech synthesized to speaker for: {text}")
last_tts_request = speech_synthesizer.speak_text_async(text)
collected_messages.clear()
if last_tts_request:
last_tts_request.get()
# Continuously listens for speech input to recognize and send as text to Azure OpenAI
def chat_with_azure_openai():
while True:
print("Azure OpenAI is listening. Say 'Stop' or press Ctrl-Z to end the conversation.")
try:
# Get audio from the microphone and then send it to the TTS service.
speech_recognition_result = speech_recognizer.recognize_once_async().get()
# If speech is recognized, send it to Azure OpenAI and listen for the response.
if speech_recognition_result.reason == speechsdk.ResultReason.RecognizedSpeech:
if speech_recognition_result.text == "Stop.":
print("Conversation ended.")
break
print("Recognized speech: {}".format(speech_recognition_result.text))
ask_azure_openai(speech_recognition_result.text)
elif speech_recognition_result.reason == speechsdk.ResultReason.NoMatch:
print("No speech could be recognized: {}".format(speech_recognition_result.no_match_details))
break
elif speech_recognition_result.reason == speechsdk.ResultReason.Canceled:
cancellation_details = speech_recognition_result.cancellation_details
print("Speech Recognition canceled: {}".format(cancellation_details.reason))
if cancellation_details.reason == speechsdk.CancellationReason.Error:
print("Error details: {}".format(cancellation_details.error_details))
except EOFError:
break
# Main
try:
chat_with_azure_openai()
except Exception as err:
print("Encountered exception. {}".format(err))
PS C:\dev\openai\python> python.exe .\azure-openai-speech.py
Azure OpenAI is listening. Say 'Stop' or press Ctrl-Z to end the conversation.
Recognized speech:Make a comma separated list of all continents.
Azure OpenAI response:Africa, Antarctica, Asia, Australia, Europe, North America, South America
Speech synthesized to speaker for text [Africa, Antarctica, Asia, Australia, Europe, North America, South America]
Azure OpenAI is listening. Say 'Stop' or press Ctrl-Z to end the conversation.
Recognized speech: Make a comma separated list of 1 Astronomical observatory for each continent. A list should include each continent name in parentheses.
Azure OpenAI response:Mauna Kea Observatories (North America), La Silla Observatory (South America), Tenerife Observatory (Europe), Siding Spring Observatory (Australia), Beijing Xinglong Observatory (Asia), Naukluft Plateau Observatory (Africa), Rutherford Appleton Laboratory (Antarctica)
Speech synthesized to speaker for text [Mauna Kea Observatories (North America), La Silla Observatory (South America), Tenerife Observatory (Europe), Siding Spring Observatory (Australia), Beijing Xinglong Observatory (Asia), Naukluft Plateau Observatory (Africa), Rutherford Appleton Laboratory (Antarctica)]
Azure OpenAI is listening. Say 'Stop' or press Ctrl-Z to end the conversation.
Conversation ended.
PS C:\dev\openai\python>