I’ve tried it with a simple test case but the API rejects the generated schema:
openai.BadRequestError: Error code: 400 - {'error': {'message': "Invalid schema for response_format 'ColorDetection': In context=('properties', 'color'), 'allOf' is not permitted", 'type': 'invalid_request_error', 'param': 'response_format', 'code': None}}`Preformatted text`
test case:
from enum import Enum
from openai import OpenAI
from pydantic import BaseModel, Field
class Color(Enum):
RED = "red"
BLUE = "blue"
GREEN = "green"
class ColorDetection(BaseModel):
color: Color = Field(description="The detected color")
hex_color_code: str = Field(description="The hex color code of the detected color")
def test_detect_color():
client = OpenAI()
completion = client.beta.chat.completions.parse(
model="gpt-4o-2024-08-06",
messages=[
{
"role": "user",
"content": "What color is a Coke can?"
}
],
response_format=ColorDetection,
)
detected = completion.choices[0].message.parsed
assert detected.color == Color.RED, f"Invalid color detected: {detected}"
Hey @mcantrell, it looks like this is happening because you added a description to the color property.
If you change your models to this, it should work.
from enum import Enum
from openai import OpenAI
from pydantic import BaseModel, Field
class Color(Enum):
"""The dected color"""
RED = "red"
BLUE = "blue"
GREEN = "green"
class ColorDetection(BaseModel):
color: Color
hex_color_code: str = Field(description="The hex color code of the detected color")
def test_detect_color():
client = OpenAI()
completion = client.beta.chat.completions.parse(
model="gpt-4o-2024-08-06",
messages=[
{
"role": "user",
"content": "What color is a Coke can?"
}
],
response_format=ColorDetection,
)
detected = completion.choices[0].message.parsed
assert detected.color == Color.RED, f"Invalid color detected: {detected}"
I see that this has been fixed in v1.40.4. Links to the actual commit are not allowed, but the test classes in that release show show this:
class Color(Enum):
RED = "red"
BLUE = "blue"
GREEN = "green"
class ColorDetection(BaseModel):
color: Color = Field(description="The detected color")
hex_color_code: str = Field(description="The hex color code of the detected color")
And Yet
I’m still seeing error play out in 1.42.0 version.
class ClassificationType(Enum):
lease = "lease"
tax_return = "tax-return"
profit_and_loss = "profit-and-loss"
balance_sheet = "balance-sheet"
unknown = "unknown"
@classmethod
def descriptions(cls):
return {
cls.lease: "A lease agreement document.",
cls.tax_return: "A tax return document.",
cls.profit_and_loss: "A profit and loss statement.",
cls.balance_sheet: "A balance sheet document.",
cls.unknown: "An unknown type of document.",
}
def description(self):
return self.descriptions().get(self, "No description available.")
class Classification(BaseModel):
# warning! Don't add any description to this field. Or any enum field. You'll get an obscure error.
# like 'allOf' is not permitted
classification_type: ClassificationType = Field(description="The classification of the document.")
justification: str = Field(description="The justification for the classification. Use 20 words or less.")
class Config:
extra = "forbid"
Is there simply no way to have a property of one model be a class from another model? I’m finding lots of people wrapping the class in a list, but that’s not what I’m trying to model.
class TermType(Enum):
monthly = "monthly"
yearly = "yearly"
quarterly = "quarterly"
# this is a hack to fix this issue
# https://community.openai.com/t/structured-response-enums-not-supported-in-with-pydantic-schema-generation/901973/2
# which is supposed to be fixed in v1.40.4 but does not seem to be as of Sept 18
# https://community.openai.com/t/structured-response-enums-not-supported-in-with-pydantic-schema-generation/901973/5
# loop through the TermType enum and create a list of the values
term_enum = {"enum": [term for term in TermType]}
class RenewalOptions(BaseModel):
renewal_options_summary: str = Field(description="A text summary of what was outlined in the renewal section")
renewal_amount: float = Field(description="The renewal amount in dollars")
renewal_term: str = Field(description="The term of the renewal as an enum", json_schema_extra=term_enum)
notice_period_in_days: int = Field(description="The notice period in days")
class Config:
extra = "forbid"
class Basics(BaseModel):
basics_summary: str = Field(description="A summary in text of all the fields in the basics section")
start_date: str = Field(description="The start date of the lease. " + iso_text)
end_date: str = Field(description="The end date of the lease" + iso_text)
term_in_years: int = Field(description="The term of the lease in years")
commencement_date: str = Field(description="The commencement date of the lease." + iso_text)
rent_commencement_date: str = Field(
description="The rent commencement date of the lease. This is when the rent starts. " + iso_text
)
class Config:
extra = "forbid"
class Address(BaseModel):
street: str = Field(description="Street Address")
city: str = Field(description="City")
state: str = Field(description="State as a two-letter abbreviation. For example, OR for Oregon")
zip: str = Field(description="Zip code as a number in format. Example: 97703")
class Config:
extra = "forbid"
class Contact(BaseModel):
first_name: str = Field(description="First Name. Prefer this field to be None if the contact is a company")
last_name: str = Field(description="Last Name. Prefer this field to be None if the contact is a company")
company: str = Field(description="Company Name")
email: str = Field(description="Email Address")
phone_number: str = Field(description="Phone Number")
address: Address = Field(description="Address")
class Config:
extra = "forbid"
class PartiesInvolved(BaseModel):
landlord: Contact = Field(description="The person or company who's renting the property")
tenant: Contact = Field(description="The person or company who's renting the property")
class Config:
extra = "forbid"
class OperatingExpenses(BaseModel):
cam_charges_amount: float = Field(description="Common Area Maintenance Charges")
cam_charges_term: str = Field(
description="Common Area Maintenance Charges Term as an enum", json_schema_extra=term_enum
)
property_taxes_amount: float = Field(description="Property Taxes Owed")
property_taxes_term: str = Field(description="Property Taxes Term as an enum", json_schema_extra=term_enum)
additional_rent: float = Field(description="Additional Rent Owed")
additional_rent_term: str = Field(description="Additional Rent Owed term as an enum", json_schema_extra=term_enum)
additional_rent_explanation: str = Field(
description="Additional Rent Explanation. This is where any text-based details about the additional rent can go."
)
class Config:
extra = "forbid"
class FinancialTerms(BaseModel):
base_rent_amount: float = Field(description="The base rent amount")
base_rent_term: str = Field(description="The base rent term as an enum", json_schema_extra=term_enum)
security_deposit_amount: float = Field(description="The security deposit amount")
operating_expenses: OperatingExpenses = Field(
description="Any additional operating expenses not covered by the base rent"
)
class Config:
extra = "forbid"
class Lease(BaseModel):
basics: Basics
renewal_options: List[RenewalOptions]
parties_involved: PartiesInvolved
financial_terms: FinancialTerms
steps: List[Steps] = Field(description="The steps taken to extract the information from the lease document")
class Config:
extra = "forbid"
Ok, this is pretty verbose, but anywhere I reference another class as a property, I get the 'allOf' is not permitted error.
Thanks, I can’t reproduce the error with your models, here’s my script, with the term_enum changed to remove the hack and the Steps class was missing
from enum import Enum
from typing import List
import rich
from pydantic import Field, BaseModel
from openai import OpenAI
class TermType(Enum):
monthly = "monthly"
yearly = "yearly"
quarterly = "quarterly"
iso_text = "placeholder"
class RenewalOptions(BaseModel):
renewal_options_summary: str = Field(description="A text summary of what was outlined in the renewal section")
renewal_amount: float = Field(description="The renewal amount in dollars")
renewal_term: TermType = Field(description="The term of the renewal as an enum")
notice_period_in_days: int = Field(description="The notice period in days")
class Config:
extra = "forbid"
class Basics(BaseModel):
basics_summary: str = Field(description="A summary in text of all the fields in the basics section")
start_date: str = Field(description="The start date of the lease. " + iso_text)
end_date: str = Field(description="The end date of the lease" + iso_text)
term_in_years: int = Field(description="The term of the lease in years")
commencement_date: str = Field(description="The commencement date of the lease." + iso_text)
rent_commencement_date: str = Field(
description="The rent commencement date of the lease. This is when the rent starts. " + iso_text
)
class Config:
extra = "forbid"
class Address(BaseModel):
street: str = Field(description="Street Address")
city: str = Field(description="City")
state: str = Field(description="State as a two-letter abbreviation. For example, OR for Oregon")
zip: str = Field(description="Zip code as a number in format. Example: 97703")
class Config:
extra = "forbid"
class Contact(BaseModel):
first_name: str = Field(description="First Name. Prefer this field to be None if the contact is a company")
last_name: str = Field(description="Last Name. Prefer this field to be None if the contact is a company")
company: str = Field(description="Company Name")
email: str = Field(description="Email Address")
phone_number: str = Field(description="Phone Number")
address: Address = Field(description="Address")
class Config:
extra = "forbid"
class PartiesInvolved(BaseModel):
landlord: Contact = Field(description="The person or company who's renting the property")
tenant: Contact = Field(description="The person or company who's renting the property")
class Config:
extra = "forbid"
class OperatingExpenses(BaseModel):
cam_charges_amount: float = Field(description="Common Area Maintenance Charges")
cam_charges_term: TermType = Field(description="Common Area Maintenance Charges Term as an enum")
property_taxes_amount: float = Field(description="Property Taxes Owed")
property_taxes_term: TermType = Field(description="Property Taxes Term as an enum")
additional_rent: float = Field(description="Additional Rent Owed")
additional_rent_term: TermType = Field(description="Additional Rent Owed term as an enum")
additional_rent_explanation: str = Field(
description="Additional Rent Explanation. This is where any text-based details about the additional rent can go."
)
class Config:
extra = "forbid"
class FinancialTerms(BaseModel):
base_rent_amount: float = Field(description="The base rent amount")
base_rent_term: TermType = Field(
description="The base rent term as an enum",
)
security_deposit_amount: float = Field(description="The security deposit amount")
operating_expenses: OperatingExpenses = Field(
description="Any additional operating expenses not covered by the base rent"
)
class Config:
extra = "forbid"
class Lease(BaseModel):
basics: Basics
renewal_options: List[RenewalOptions]
parties_involved: PartiesInvolved
financial_terms: FinancialTerms
# steps: List[Steps] = Field(description="The steps taken to extract the information from the lease document")
class Config:
extra = "forbid"
client = OpenAI()
completion = client.beta.chat.completions.parse(
model="gpt-4o-2024-08-06",
messages=[
{"role": "user", "content": "Can you generate some fake data matching the given response format?"},
],
response_format=Lease,
)
message = completion.choices[0].message
if message.parsed:
rich.print(message.parsed)
else:
print(message.refusal)
Also, here is a workaround that I put into tooldantic
def _inline_all_of(self, schema):
"""Inlines allOf schemas if the allOf list contains only one item."""
if isinstance(schema, dict):
if "allOf" in schema and len(schema["allOf"]) == 1:
# Replace the allOf construct with its single contained schema
inlined_schema = self._inline_all_of(schema["allOf"][0])
# If the inlined schema is a dictionary, merge it with the current schema
if isinstance(inlined_schema, dict):
schema.update(inlined_schema)
schema.pop("allOf")
return schema
# Recursively apply this method to all dictionary values
for key, value in schema.items():
schema[key] = self._inline_all_of(value)
elif isinstance(schema, list):
# Recursively apply this method to all items in the list
return [self._inline_all_of(item) for item in schema]
return schema