Gpt-4o-transcribe-diarize only transcribe the first 32 sec

I have been trying to use gpt-40-transcribe-diarize using both wav and opus encoded (webm) file. But I always get result of the first 32 seconds. As if only the first chunk is processed from the backend. Any clue what I am doing wrong? I tried with and without extra_body, same result.
try {

stream = fh.createReadStream();

const filePart = await toFile(stream, inputFileName || "input_audio");

speakerRefs = await buildSpeakerReferences({

  interviewerPath: interviewerSamplePath,

  panelistPath: panelistSamplePath,

  fallbackPath: monoSamplePath,

  durationSec

});

const request = {

  model: "gpt-4o-transcribe-diarize",

  file: filePart,

  response_format: "diarized_json",

  chunking_strategy: "auto"

};

if (speakerRefs.names.length) {

  request.extra_body = {

    known_speaker_names: speakerRefs.names,

    known_speaker_references: speakerRefs.references

  };

}

console.log("DEBUG0",JSON.stringify(request,null,2))

response = await openai.audio.transcriptions.create({

  ...request

});

response.\_speaker_reference_debug = speakerRefs.debug;



console.log("DEBUG1",JSON.stringify(response,null,2))

} finally {

if (stream) stream.destroy();

await fh.close();

}