I am able to get streamed response with api_version = '2024-12-01-preview' and another chunking approach
import os
import json
import requests
import time
from openai import AzureOpenAI
api_base = 'https://resourcename.openai.azure.com/' # your endpoint should look like the following https://YOUR_RESOURCE_NAME.openai.azure.com/
api_key="<apikey>"
deployment_name = 'gpt-4'
api_version = '2024-12-01-preview' # this might change in the future
client = AzureOpenAI(
api_key=api_key,
api_version=api_version,
base_url=f"{api_base}openai/deployments/{deployment_name}",
response = client.chat.completions.create(
model="gpt-4",
messages=[
{ "role": "system", "content": "You are a helpful assistant." },
{ "role": "user", "content": [
{
"type": "text",
"text": "Describe this picture:"
},
{
"type": "image_url",
"image_url": {
"url": "https://th.bing.com/th/id/OIP.JQEaFD6izhB9jFmJlO1_NgHaFL?rs=1&pid=ImgDetMain"
}
}
] }
],
stream=True
# stream_options={"include_usage": True}
)
for i in response:
if len(i.choices)==0:
continue
print(i.choices[0].delta.content)
Output
None The image shows a close -up photograph of vibrant pink flowers , possibly dah lias , None
Hope it helps
Thank you.