Streaming using Structured Outputs

Hi @expertise.ai.chat , I managed to find a workaround by creating a wrapper for the Pydantic base class and process the json schema the same way the streaming beta api is doing.

from pydantic.json_schema import (
    DEFAULT_REF_TEMPLATE,
    GenerateJsonSchema,
    JsonSchemaMode,
    model_json_schema
)
from typing import Any
from pydantic import BaseModel, Field
from openai.lib._pydantic import _ensure_strict_json_schema


class BaseModelOpenAI(BaseModel):
    @classmethod
    def model_json_schema(
        cls,
        by_alias: bool = True,
        ref_template: str = DEFAULT_REF_TEMPLATE,
        schema_generator: type[GenerateJsonSchema] = GenerateJsonSchema,
        mode: JsonSchemaMode = 'serialization'
    ) -> dict[str, Any]:
        json_schema = model_json_schema(
            cls,
            by_alias=by_alias,
            ref_template=ref_template,
            schema_generator=schema_generator,
            mode=mode
        )
        return _ensure_strict_json_schema(json_schema, path=(), root=json_schema)

Your classes should inherit from BaseModelOpenAI and then you need to pass the response format as follows:

{
        "type": "json_schema",
        "json_schema": {
        "name": response_class.__name__,
        "schema": response_class.model_json_schema(),
        "strict": True
}

Then you can to use the standard client.chat.competions.create to send your request and get a streaming response.

2 Likes