I am using GPT-4o-06-06 for extracting structured data from partially structured data. I have the input data in the form of a CSV that I’m using in my prompt template (<text_doc) containing 550 rows of data.
I am using Structured Output to get a JSON.
Input Details
prompt template-
{
"role": "system",
"content": "You are a world class algorithm in process engineering industry, extracting information in a structured format."
},
{
"role": "user",
"content": "Use the given format to extract information from the following input: <text_doc>"
},
{
"role": "user",
"content": "Tip: Make sure to answer in the correct format. DO NOT MISS OUT ON ANY ROWS IN THE TABLES."
}
schema-
{
"type": "json_schema",
"json_schema": {
"name": "document_info",
"description": "All key:value extractions from the txt document.",
"strict": true,
"schema": {
"type": "object",
"properties": {
"company_name": {
"title": "company_name",
"description": "Company name/Buyer/From/",
"type": "string"
},
"quotation_no": {
"title": "quotation_no",
"description": "Usually an alphanumeric string as a unique identifier. Example- SHINTECH LOUISIANA, LLC",
"type": "string"
},
"purchase_order_no": {
"title": "purchase_order_no",
"description": "Purchase order No./PO No./No./Purchase Order:/Contract No./PO Number. Example- PQSD-289272",
"type": "string"
},
"buyers_reference_no": {
"title": "buyers_reference_no",
"description": "Buyers reference No. Example- 5429",
"type": "string"
},
"manufacturer": {
"title": "manufacturer",
"description": "MFR./Manufacturer/",
"type": "string"
},
"date_of_order": {
"title": "date_of_order",
"description": "Date of Order/ date/PO date/ Example- 11/29/2023",
"type": "string"
},
"place_of_delivery": {
"title": "place_of_delivery",
"description": "Place of Delivery/Place of Delivery:/Delivery Location/Example- 1234 Main Street, New York, NY 10001",
"type": "string"
},
"payment_term": {
"title": "payment_term",
"description": "Payment Terms/Terms of Payment/Example- Net 30 days",
"type": "string"
},
"shipping_term": {
"title": "shipping_term",
"description": "Shipping Terms/Shipping Method/Example- FOB Destination",
"type": "string"
},
"incoterms": {
"title": "incoterms",
"description": "Incoterms/Incoterms:/Example- FOB",
"type": "string"
},
"delivery_method": {
"title": "delivery_method",
"description": "Delivery Method/Method of Delivery/Example- Truck",
"type": "string"
},
"currency": {
"title": "currency",
"description": "Currency/Currency:/Example- USD",
"type": "string"
},
"entry_number": {
"title": "entry_number",
"description": "Entry No./Entry No./Example- 123456",
"type": "string"
},
"part_details": {
"title": "part_details",
"description": "Table containing parts information",
"type": "array",
"items": {
"description": "A single record of parts information",
"type": "object",
"properties": {
"description": {
"title": "description",
"description": "Description & Specifications/Item Description/Description of Goods/Commodity & Quality/ Example- O RING.",
"type": "string"
},
"item_no": {
"title": "item_no",
"description": "Item code/Item No./No./Item Material No./ Example- 1.",
"type": "string"
},
"quantity": {
"title": "quantity",
"description": "Quantity/Qty/Quantity:/QUANTITY/Q'ty/ Example- 1.00.",
"type": "string"
},
"discount": {
"title": "discount",
"description": "Discount/Discount %/Discount Amount/Discount Rate/Discount Price/%nt Dist/DISCOUNTED UNIT PRICE/ Ensure to add the currency symbol preceding the amount Example- $114.00.",
"type": "string"
},
"unit": {
"title": "unit",
"description": "Unit/UOM/ Example- EA.",
"type": "string"
},
"unit_price": {
"title": "unit_price",
"description": "UNIT PRICE/Rate/ Ensure to add the currency symbol preceding the amount Example- $114.00",
"type": "string"
},
"linenet": {
"title": "linenet",
"description": "Amount/Item Total/ Net Value/SUBTOTAL PRICE/ Ensure to add the currency symbol preceding the amount Example- $114.00.",
"type": "string"
},
"total_price": {
"title": "total_price",
"description": "Total Price/Total Value/ Ensure to add the currency symbol preceding the amount Example- $114.00.",
"type": "string"
},
"part_no": {
"title": "part_no",
"description": "Part No./ MHI PART NO./ Example- 471154.",
"type": "string"
},
"dwg_no": {
"title": "dwg_no",
"description": "Project #: Example- P-ECM-001-25.",
"type": "string"
},
"model_no": {
"title": "model_no",
"description": "Model No. Example- NS-290318.",
"type": "string"
},
"end_user": {
"title": "end_user",
"description": "Task #: Example- P-ECM-001-25*1.",
"type": "string"
},
"machine_no": {
"title": "machine_no",
"description": "Machiine No./CUSTOMER MACHINE NO./Example- MC-1368,ST-1785.",
"type": "string"
},
"deliverydate": {
"title": "deliverydate",
"description": "Delivery date/Date of Delivery/LEAD TIME/Example- 9,8,27.",
"type": "string"
}
},
"required": [
"description",
"item_no",
"quantity",
"discount",
"unit",
"unit_price",
"linenet",
"total_price",
"part_no",
"dwg_no",
"model_no",
"end_user",
"machine_no",
"deliverydate"
],
"additionalProperties": false
}
},
"port_of_shipment": {
"title": "port_of_shipment",
"description": "Port of Shipment/To be shipped from",
"type": "string"
},
"port_of_destination": {
"title": "port_of_destination",
"description": "Port of Destination/to be shipped to",
"type": "string"
},
"effectiveness_of_contract": {
"title": "effectiveness_of_contract",
"description": "Effectiveness of the contract",
"type": "string"
},
"insurance": {
"title": "insurance",
"description": "Insurance/Insurance:",
"type": "string"
},
"packing": {
"title": "packing",
"description": "Packing/Packing:/Example- Wooden Box",
"type": "string"
},
"delivery_conditions": {
"title": "delivery_conditions",
"description": "Delivery Conditions/Conditions of Delivery/Example- Ex-Works",
"type": "string"
}
},
"required": [
"company_name",
"quotation_no",
"purchase_order_no",
"buyers_reference_no",
"manufacturer",
"date_of_order",
"place_of_delivery",
"payment_term",
"shipping_term",
"incoterms",
"delivery_method",
"currency",
"entry_number",
"part_details",
"port_of_shipment",
"port_of_destination",
"effectiveness_of_contract",
"insurance",
"packing",
"delivery_conditions"
],
"additionalProperties": false
}
}
}
Model & Params
model: gpt-4o-08-06 (supports 16k output)
temperature: 0.0
seed: 0
response_method: structured_output
Output Details (Run#1)
tokens used: output_tokens = 16384, input_tokens = 110073, total_tokens=126457
Excel Records Extracted: 214
Output Details (Run#2)
tokens used: output_tokens = 9862, input_tokens = 110073, total_tokens=119935
Excel Records Extracted: 125
The model randomly truncates the output without using all the output tokens. What could be the reason? Any help would be appreciated.