Help! I’m encountering an issue where the script works perfectly with gpt-4o-mini-2024-07-18
, but fails or behaves unexpectedly when I switch to more powerful models like gpt-4o
or others. I’m not sure why this is happening, since I would expect the larger models to be compatible or even more capable. Any insights or suggestions would be greatly appreciated!
To help you troubleshoot this issue, it would be great if you could share a bit more detail, like the specific code you’re running, which models it works correctly with, what errors or problems you’re encountering, and your computing setup. This will give the community much better context to provide useful feedback.
“gpt-4o-mini-2024-07-18
is technically compatible and runs correctly, but its recognition performance is poor. Switching to gpt-4o
would likely improve accuracy, but the model is not supported or compatible.”
if (!is_file($urlDocumento) || !is_readable($urlDocumento)) {
printr(“ Error: el archivo no existe o no se puede leer”);
return false;
}
$apiKey = $_SERVER['OPENAI_API_KEY'] ?? '';
if (empty($apiKey)) {
printr("❌ Error: falta OPENAI_API_KEY en \$_SERVER");
return false;
}
$start_time = microtime(true);
// 1) Subir el PDF a OpenAI
$ch = curl_init('https://api.openai.com/v1/files');
curl_setopt_array($ch, [
CURLOPT_RETURNTRANSFER => true,
CURLOPT_POST => true,
CURLOPT_HTTPHEADER => [
"Authorization: Bearer {$apiKey}",
'Content-Type: multipart/form-data',
],
CURLOPT_POSTFIELDS => [
'file' => new CURLFile($urlDocumento),
'purpose' => 'user_data',
],
]);
$resp = curl_exec($ch);
if (curl_errno($ch)) {
error_log('❌ Error al subir archivo a OpenAI: '.curl_error($ch));
curl_close($ch);
return false;
}
curl_close($ch);
$j = json_decode($resp, true);
if (!isset($j['id'])) {
error_log("❌ Error al subir el archivo: ".($j['error']['message'] ?? 'Respuesta inválida'));
return false;
}
$fileId = $j['id'];
printr("📎 Archivo subido: $fileId");
// 2) Preparar prompts y mensaje
$systemPrompt = <<<PROMPT
Eres un extractor de datos de facturas.
Recibirás un PDF de factura adjunto.
Devuélveme ÚNICAMENTE un JSON con estas claves:
-
numero (que es el numero de factura)
-
cifs (CSV de CIFs encontrados)
-
nombre
-
proveedor_nombre
-
total
-
divisa
-
importe (igual a total)
-
ivas: array de objetos { base, cuota, tipo }
-
fecha (YYYY-MM-DD)
-
fecha_vencimiento (YYYY-MM-DD o cadena vacía)
-
irpf (retención si la hay, 0 o null)
Nada más, JSON limpio.
PROMPT;$messages = [
[‘role’=>‘system’, ‘content’=> $systemPrompt],
[‘role’=>‘user’, ‘content’=> [
[‘type’=>‘file’, ‘file’=> [‘file_id’=> $fileId]],
[‘type’=>‘text’, ‘text’=> “Extrae todos los datos de la factura, en la estructura JSON solicitada.”]
]],
];// 3) Petición a chat completions
$payload = json_encode([
‘model’ => ‘gpt-4o-mini-2024-07-18’,
‘messages’ => $messages,
‘temperature’ => 0.0,
]);$ch = curl_init(‘https://api.openai.com/v1/chat/completions’);
curl_setopt_array($ch, [
CURLOPT_RETURNTRANSFER => true,
CURLOPT_POST => true,
CURLOPT_HTTPHEADER => [
“Authorization: Bearer {$apiKey}”,
‘Content-Type: application/json’,
],
CURLOPT_POSTFIELDS => $payload,
]);
$resp = curl_exec($ch);
$ch1 = curl_init(“https://api.openai.com/v1/files/{$fileId}”);
curl_setopt_array($ch1, [
CURLOPT_CUSTOMREQUEST => ‘DELETE’,
CURLOPT_RETURNTRANSFER => true,
CURLOPT_HTTPHEADER => [
“Authorization: Bearer {$apiKey}”,
],
]);
curl_exec($ch1);
curl_close($ch1);
if (curl_errno($ch)) {
error_log('Error en la petición chat.completions: '.curl_error($ch));
curl_close($ch);
return false;
}
curl_close($ch);
// 4) Eliminar archivo temporal en OpenAI$result = json_decode($resp, true);
if (!isset($result[‘choices’][0][‘message’][‘content’])) {
error_log("Error en la respuesta: ".($result[‘error’][‘message’] ?? ‘Sin contenido’));
return false;
}$jsonText = trim($result[‘choices’][0][‘message’][‘content’]);
$data = json_decode($jsonText);
if (json_last_error() !== JSON_ERROR_NONE) {
error_log(“Error al decodificar JSON: $jsonText”);
return false;
}// 5) Preparar salida
$out = new \StdClass();
foreach ($data as $k => $v) {
$out->{$k} = $v;
}
$out->tiempo = microtime(true) - $start_time;return $out;