This was seen and just commented on in another thread.
While the gpt-4.1 model is symptomatic, I propose tackling it head-on, with the name itself indicating the structured output response’s purpose and type of character set.
{
"name": "api_utf_8_json",
"schema": {
"type": "object",
"properties": {
"products": {
"type": "array",
"description": "A list of products.",
"items": {
"type": "object",
"properties": {
"Eans": {
"type": "array",
"description": "List of EANs for the product.",
"items": {
"type": "string"
}
},
"Name": {
"type": "string",
"description": "The name of the product."
},
"LabelText": {
"type": "string",
"description": "Label text providing details about the product."
},
"UsedUrls": {
"type": "array",
"description": "List of URLs associated with the product.",
"items": {
"type": "string"
}
},
"ProductId": {
"type": "string",
"description": "Unique identifier for the product."
},
"ProductName": {
"type": "string",
"description": "The product's name."
},
"LongDescription": {
"type": "string",
"description": "A detailed description of the product."
},
"ShortDescription": {
"type": "string",
"description": "A brief description of the product."
}
},
"required": [
"Eans",
"Name",
"LabelText",
"UsedUrls",
"ProductId",
"ProductName",
"LongDescription",
"ShortDescription"
],
"additionalProperties": false
}
}
},
"required": [
"products"
],
"additionalProperties": false
},
"strict": true
}