Is this an appropriate method to efficiently generate the embeddings of multiple chunks?
async def create_point(
client: AsyncOpenAI, example: dict[str, Any], model: str
) -> models.PointStruct:
"""Creates a Point that contains the payload and the vector."""
embedding_result: list[Any] = await client.embeddings.create(
input=[example.get("content")],
model=model,
)
vector = embedding_result.data[0].embedding
return models.PointStruct(
id=str(uuid4()),
vector=vector,
payload=dict(
chunk_id=example.get("id"),
arxiv_id=example.get("arxiv_id"),
title=example.get("title"),
content=example.get("content"),
prechunk_id=example.get("prechunk_id"),
postchunk_id=example.get("postchunk_id"),
references=example.get("references").tolist(),
),
)
async def process_batch(
client: AsyncOpenAI, batch: list[dict[str, Any]], model: str
) -> list[models.PointStruct]:
"""Processes a batch of examples to create PointStructs."""
return await asyncio.gather(
*[create_point(client, example, model) for example in batch]
)
Or is it better to pass al the chunks in a single API call and then create the Points (qdrant objects). I am asking because passing 10 elements with this approach takes just a few secs, but when passing the full list I still get quite sometime to get the operation completed.