Hello:
I create a Dataframe in the variable parrafos which extract information of a pdf documento with the following code, but I want to add a column in "parrafos"which contains the embedding, How can I do?
pip install langchain pypdf
from openai import OpenAI
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter
from openai import OpenAI
loader = PyPDFLoader(“./Comercial.pdf”)
pages = loader.load_and_split()
split = CharacterTextSplitter(chunk_size=300, separator = ‘.\n’)
textos = [str(i.page_content) for i in textos] #Lista de parrafos
parrafos = pd.DataFrame(textos, columns=[“texto”])
import numpy as np
client = OpenAI(api_key=‘’),
#client = OpenAI()
def obtener_embeddings_batch(textos):
response = client.embeddings.create(
model=“text-embedding-3-small”, # Usando el modelo adecuado
input=textos
)
embeddings = [np.array(item[‘embedding’]) for item in response[‘data’]]
return embeddings
batch_size = 10