Delay in polling and assisntant

Im having a delay of like 5-10 seconds on each polling and assisntan run of the api, how can i make it faster? Here is the code im using:

import OpenAI from “openai”;

const openai = new OpenAI({
apiKey: process.env.OPENAI_KEY,
});

let sessionConversations = {};
let sharedThread = null;

function removeCitations(text) {
return text.replace(/\【\d+:\d+†source\】/g, ‘’);
}

export async function POST(req) {
console.time(“Total Request Time”);
try {
const { prompt, sessionId } = await req.json();

console.time("Initialize Session");
if (!sessionConversations[sessionId]) {
  sessionConversations[sessionId] = { messages: [], thread: null };
}
console.timeEnd("Initialize Session");

const userSession = sessionConversations[sessionId];

console.time("Push User Message");
userSession.messages.push({
  role: "user",
  content: prompt,
});
console.timeEnd("Push User Message");

console.time("Thread Creation");
// Reuse shared thread if available, otherwise create a new one
if (!userSession.thread) {
  if (sharedThread) {
    userSession.thread = sharedThread;
  } else {
    userSession.thread = await openai.beta.threads.create();
    sharedThread = userSession.thread;
  }
}
console.timeEnd("Thread Creation");

const thread = userSession.thread;

console.time("API Calls");
const [_, embeddingsResponse] = await Promise.all([
  openai.beta.threads.messages.create(thread.id, {
    role: "user",
    content: prompt,
  }),
  openai.embeddings.create({
    model: "text-embedding-3-small",
    input: prompt,
  }),
]);
console.timeEnd("API Calls");

userSession.embeddings = embeddingsResponse.data;

console.time("Run Assistant and Polling");
// Run the assistant and start polling for results
const run = await openai.beta.threads.runs.create(thread.id, {
  assistant_id: process.env.ASSISTANT_ID,
});

let runStatus;
let attempts = 0;
const maxAttempts = 10;
const delay = 500; // Fixed delay for polling

do {
  await new Promise((resolve) => setTimeout(resolve, delay));
  runStatus = await openai.beta.threads.runs.retrieve(thread.id, run.id);
  attempts++;
} while (runStatus.status !== "completed" && attempts < maxAttempts);
console.timeEnd("Run Assistant and Polling");

console.time("Fetch and Clean Response");
// Fetch the final message and clean it
const threadMessages = await openai.beta.threads.messages.list(thread.id);
const lastMessage = threadMessages.data[0];
const botMessage = lastMessage.content[0].text.value;
const cleanedMessage = removeCitations(botMessage);
const truncatedResponse = cleanedMessage.substring(0, 2500);

userSession.messages.push({
  role: "assistant",
  content: cleanedMessage,
});
console.timeEnd("Fetch and Clean Response");

console.timeEnd("Total Request Time");
return new Response(
  JSON.stringify({
    message: "Success",
    response: truncatedResponse,
  }),
  { status: 200 }
);

} catch (error) {
console.error(“Error details:”, error.message || error.stack);
console.timeEnd(“Total Request Time”);
return new Response(JSON.stringify({ error: “Internal server error” }), {
status: 500,
});
}
}