Im having a delay of like 5-10 seconds on each polling and assisntan run of the api, how can i make it faster? Here is the code im using:
import OpenAI from “openai”;
const openai = new OpenAI({
apiKey: process.env.OPENAI_KEY,
});
let sessionConversations = {};
let sharedThread = null;
function removeCitations(text) {
return text.replace(/\【\d+:\d+†source\】/g, ‘’);
}
export async function POST(req) {
console.time(“Total Request Time”);
try {
const { prompt, sessionId } = await req.json();
console.time("Initialize Session");
if (!sessionConversations[sessionId]) {
sessionConversations[sessionId] = { messages: [], thread: null };
}
console.timeEnd("Initialize Session");
const userSession = sessionConversations[sessionId];
console.time("Push User Message");
userSession.messages.push({
role: "user",
content: prompt,
});
console.timeEnd("Push User Message");
console.time("Thread Creation");
// Reuse shared thread if available, otherwise create a new one
if (!userSession.thread) {
if (sharedThread) {
userSession.thread = sharedThread;
} else {
userSession.thread = await openai.beta.threads.create();
sharedThread = userSession.thread;
}
}
console.timeEnd("Thread Creation");
const thread = userSession.thread;
console.time("API Calls");
const [_, embeddingsResponse] = await Promise.all([
openai.beta.threads.messages.create(thread.id, {
role: "user",
content: prompt,
}),
openai.embeddings.create({
model: "text-embedding-3-small",
input: prompt,
}),
]);
console.timeEnd("API Calls");
userSession.embeddings = embeddingsResponse.data;
console.time("Run Assistant and Polling");
// Run the assistant and start polling for results
const run = await openai.beta.threads.runs.create(thread.id, {
assistant_id: process.env.ASSISTANT_ID,
});
let runStatus;
let attempts = 0;
const maxAttempts = 10;
const delay = 500; // Fixed delay for polling
do {
await new Promise((resolve) => setTimeout(resolve, delay));
runStatus = await openai.beta.threads.runs.retrieve(thread.id, run.id);
attempts++;
} while (runStatus.status !== "completed" && attempts < maxAttempts);
console.timeEnd("Run Assistant and Polling");
console.time("Fetch and Clean Response");
// Fetch the final message and clean it
const threadMessages = await openai.beta.threads.messages.list(thread.id);
const lastMessage = threadMessages.data[0];
const botMessage = lastMessage.content[0].text.value;
const cleanedMessage = removeCitations(botMessage);
const truncatedResponse = cleanedMessage.substring(0, 2500);
userSession.messages.push({
role: "assistant",
content: cleanedMessage,
});
console.timeEnd("Fetch and Clean Response");
console.timeEnd("Total Request Time");
return new Response(
JSON.stringify({
message: "Success",
response: truncatedResponse,
}),
{ status: 200 }
);
} catch (error) {
console.error(“Error details:”, error.message || error.stack);
console.timeEnd(“Total Request Time”);
return new Response(JSON.stringify({ error: “Internal server error” }), {
status: 500,
});
}
}