Streaming using Structured Outputs

Option 1:

# pip install git+https://github.com/nicholishen/tooldantic.git

import tooldantic as td

class Step(td.OpenAiResponseFormatBaseModel):
    explanation: str
    output: str

class MathReasoning(td.OpenAiResponseFormatBaseModel):
    steps: list[Step]
    final_answer: str

Option 2:

class OpenAiResponseFormatGenerator(pydantic.json_schema.GenerateJsonSchema):
    # https://docs.pydantic.dev/latest/concepts/json_schema/#customizing-the-json-schema-generation-process
    def generate(self, schema, mode="validation"):
        json_schema = super().generate(schema, mode=mode)
        json_schema = {
            "type": "json_schema",
            "json_schema": {
                "name": json_schema.pop("title"),
                "schema": json_schema,
            }
        }
        return json_schema


class StrictBaseModel(pydantic.BaseModel):
    model_config = {"extra": "forbid"}

    @classmethod
    def model_json_schema(cls, **kwargs):
        return super().model_json_schema(
            schema_generator=OpenAiResponseFormatGenerator, **kwargs
        )


class Step(StrictBaseModel):
    explanation: str
    output: str


class MathReasoning(StrictBaseModel):
    steps: list[Step]
    final_answer: str

Calling the LLM:

stream = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {
            "role": "system",
            "content": "You are a helpful math tutor. Guide the user through the solution step by step.",
        },
        {"role": "user", "content": "how can I solve 8x + 7 = -23"},
    ],
    stream=True,
    response_format=MathReasoning.model_json_schema(),
)
1 Like