Azure OpenAI GPT-4o mini 2024-07-18 Structured Outputs `response_format` not working

trying to replicate the math tutor example using Azure OpenAI API, but response_format is flagged as “Invalid parameter”.

Full error message:

	"name": "BadRequestError",
	"message": "Error code: 400 - {'error': {'message': \"Invalid parameter: 'response_format' must be one of 'json_object', 'text'.\", 'type': 'invalid_request_error', 'param': 'response_format', 'code': None}}",
	"stack": "---------------------------------------------------------------------------
BadRequestError                           Traceback (most recent call last)
Cell In[10], line 55
     25 response_format ={
     26     \"type\": \"json_schema\",
     27     \"json_schema\": {
   (...)
     50     }
     51 }
     54 # Ask the model to use the function
---> 55 response = client.chat.completions.create(
     56     model=deployment_name,
     57     messages=messages,
     58     response_format=response_format,
     59 )
     61 json.loads(response.choices[0].message.tool_calls[0].function.arguments)

File ~\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\openai\\_utils\\_utils.py:277, in required_args.<locals>.inner.<locals>.wrapper(*args, **kwargs)
    275             msg = f\"Missing required argument: {quote(missing[0])}\"
    276     raise TypeError(msg)
--> 277 return func(*args, **kwargs)

File ~\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\openai\\resources\\chat\\completions.py:590, in Completions.create(self, messages, model, frequency_penalty, function_call, functions, logit_bias, logprobs, max_tokens, n, presence_penalty, response_format, seed, stop, stream, stream_options, temperature, tool_choice, tools, top_logprobs, top_p, user, extra_headers, extra_query, extra_body, timeout)
    558 @required_args([\"messages\", \"model\"], [\"messages\", \"model\", \"stream\"])
    559 def create(
    560     self,
   (...)
    588     timeout: float | httpx.Timeout | None | NotGiven = NOT_GIVEN,
    589 ) -> ChatCompletion | Stream[ChatCompletionChunk]:
--> 590     return self._post(
    591         \"/chat/completions\",
    592         body=maybe_transform(
    593             {
    594                 \"messages\": messages,
    595                 \"model\": model,
    596                 \"frequency_penalty\": frequency_penalty,
    597                 \"function_call\": function_call,
    598                 \"functions\": functions,
    599                 \"logit_bias\": logit_bias,
    600                 \"logprobs\": logprobs,
    601                 \"max_tokens\": max_tokens,
    602                 \"n\": n,
    603                 \"presence_penalty\": presence_penalty,
    604                 \"response_format\": response_format,
    605                 \"seed\": seed,
    606                 \"stop\": stop,
    607                 \"stream\": stream,
    608                 \"stream_options\": stream_options,
    609                 \"temperature\": temperature,
    610                 \"tool_choice\": tool_choice,
    611                 \"tools\": tools,
    612                 \"top_logprobs\": top_logprobs,
    613                 \"top_p\": top_p,
    614                 \"user\": user,
    615             },
    616             completion_create_params.CompletionCreateParams,
    617         ),
    618         options=make_request_options(
    619             extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
    620         ),
    621         cast_to=ChatCompletion,
    622         stream=stream or False,
    623         stream_cls=Stream[ChatCompletionChunk],
    624     )

File ~\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\openai\\_base_client.py:1240, in SyncAPIClient.post(self, path, cast_to, body, options, files, stream, stream_cls)
   1226 def post(
   1227     self,
   1228     path: str,
   (...)
   1235     stream_cls: type[_StreamT] | None = None,
   1236 ) -> ResponseT | _StreamT:
   1237     opts = FinalRequestOptions.construct(
   1238         method=\"post\", url=path, json_data=body, files=to_httpx_files(files), **options
   1239     )
-> 1240     return cast(ResponseT, self.request(cast_to, opts, stream=stream, stream_cls=stream_cls))

File ~\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\openai\\_base_client.py:921, in SyncAPIClient.request(self, cast_to, options, remaining_retries, stream, stream_cls)
    912 def request(
    913     self,
    914     cast_to: Type[ResponseT],
   (...)
    919     stream_cls: type[_StreamT] | None = None,
    920 ) -> ResponseT | _StreamT:
--> 921     return self._request(
    922         cast_to=cast_to,
    923         options=options,
    924         stream=stream,
    925         stream_cls=stream_cls,
    926         remaining_retries=remaining_retries,
    927     )

File ~\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\openai\\_base_client.py:1020, in SyncAPIClient._request(self, cast_to, options, remaining_retries, stream, stream_cls)
   1017         err.response.read()
   1019     log.debug(\"Re-raising status error\")
-> 1020     raise self._make_status_error_from_response(err.response) from None
   1022 return self._process_response(
   1023     cast_to=cast_to,
   1024     options=options,
   (...)
   1027     stream_cls=stream_cls,
   1028 )

BadRequestError: Error code: 400 - {'error': {'message': \"Invalid parameter: 'response_format' must be one of 'json_object', 'text'.\", 'type': 'invalid_request_error', 'param': 'response_format', 'code': None}}"
}

code:

import os
import json
from openai import AzureOpenAI
from textwrap import dedent

# Initialize the Azure OpenAI client
client = AzureOpenAI(
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"), 
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),  
    api_version=os.getenv("OpenAI_API_VERSION")
)
deployment_name = os.getenv("AZURE_OPENAI_DEPLOYMENT_ID")

# Initial user message
messages = [
    {"role": "system", "content": dedent("""
        You are a helpful math tutor. You will be provided with a math problem,
        and your goal will be to output a step by step solution, along with a final answer.
        For each step, just provide the output as an equation use the explanation field to detail the reasoning.
                                         """)},
    {"role": "user", "content": "solve 8x + 31 = 2"}
]

# Define the JSON schema for structured output
response_format ={
    "type": "json_schema",
    "json_schema": {
        "name": "math_reasoning",
        "schema": {
            "type": "object",
            "properties": {
                "steps": {
                    "type": "array",
                    "items": {
                        "type": "object",
                        "properties": {
                            "explanation": {"type": "string"},
                            "output": {"type": "string"}
                        },
                        "required": ["explanation", "output"],
                        "additionalProperties": False
                    }
                },
                "final_answer": {"type": "string"}
            },
            "required": ["steps", "final_answer"],
            "additionalProperties": False
        },
        "strict": True
    }
}


# Ask the model to use the function
response = client.chat.completions.create(
    model=deployment_name,
    messages=messages,
    response_format=response_format,
)

json.loads(response.choices[0].message.tool_calls[0].function.arguments)

what model are you using “2024-08-06”? I don’t see where “deployment_name” is defined

sorry, I’m using GPT-4o mini 2024-07-18. which should support response_format

Structured output is simply not supported yet. Azure probably needs weeks to catch up with OpenAI new features…

just discovered this still doesn’t work with our eastus deployment. Azure can take months and months to add parity support with OAI. Some stuff seems to end up in a limbo state if the needed changes are at odds with broader changes.

gpt-4o 2024-08-01-preview works

import os, re, tiktoken
from pprint import pprint
from pydantic import BaseModel
from openai import AzureOpenAI
from IPython.display import display, Markdown, Latex, HTML, JSON

# encoding = tiktoken.encoding_for_model("gpt-3.5-turbo-1106")
encoding = tiktoken.encoding_for_model("gpt-4o")

class Step(BaseModel):
    explanation: str
    output: str


class MathResponse(BaseModel):
    steps: list[Step]
    final_answer: str

client = AzureOpenAI(
  api_key = os.getenv("AZURE_OPENAI_API_KEY"),  
  api_version = os.getenv("OpenAI_API_VERSION"),
  azure_endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
)

response = client.beta.chat.completions.parse(
    model=os.getenv("AZURE_OPENAI_DEPLOYMENT_ID"),
    messages=[
        {"role": "system", "content": "You are a helpful math tutor."},
        {"role": "user", "content": "solve 8x + 31 = 2"},
    ],
    response_format=MathResponse,
    temperature=0.3,
    max_tokens=1024
)

print(f'word count: {len(response.choices[0].message.content.split()):,}. \
        \nestimated token count: {len(encoding.encode(response.choices[0].message.content)):,}')
usage_pattern = re.compile(r"usage=CompletionUsage\((.*?)\)")
print(usage_pattern.search(str(response)).group(1), "\n")
display(JSON(response.choices[0].message.content))
pprint(response.choices[0].message.content)
# print(response.model_dump_json(indent=2))

output:

word count: 40.
estimated token count: 103
completion_tokens=103, prompt_tokens=127, total_tokens=230

{“steps”:[{“explanation”:“Start by isolating the term with the variable. Subtract 31 from both sides of the equation.”,“output”:“8x + 31 - 31 = 2 - 31”},{“explanation”:“Simplify both sides.”,“output”:“8x = -29”},{“explanation”:“Divide both sides by 8 to solve for x.”,“output”:“x = -\frac{29}{8}”}],“final_answer”:“x = -\frac{29}{8}”}

@yl95 which region is your deployment?

Folks, I apricate your frustration with issues in the Azure platform, however, Microsoft run their AI services offerings separately from OpenAI’s API endpoints.

Happy for members to see if likeminded people with similar issues have solutions, but the best place for resolution is with your Microsoft support contact for Azure OpenAI Services.

1 Like

Can confirm today that structured outputs works for the Azure OpenAI with GPT 4.o mini using 2024-08-01-preview API version. Anyone else?