Im using ChromaDB for storing my embedded texts and I need to know the ID of the embedded chunk into the ChromaDB when I try to do retrieval. I need it to reference the generated text with the used document.
My code for retrieval is:
def __setRetrieval(self, vectordb, llm):
memory = ConversationBufferMemory(
memory_key=MEMORY_KEY,
return_messages=True
)
return ConversationalRetrievalChain.from_llm(
llm,
retriever=vectordb.as_retriever(search_type=RETRIEVAL_TYPE),
memory=memory
)
And my code for embedding is:
# Dividir el documento en fragmentos
splitter = RecursiveCharacterTextSplitter(
chunk_size=SPLIT_CHUNK_SIZE_CHARS,
chunk_overlap=SPLIT_CHUNK_OVERLAP_CHARS)
docs_split = splitter.split_documents(docs_txt)
logger.info(f"""
Chunks: "{len(docs_split)}"
""")
# Guardar los datos en Chroma
openai_lc_client = Chroma.from_documents(
docs_split,
embeddings,
client=chroma_client,
collection_name=COLLECTION_NAME_DEFAULT
)
Thanks!