I am trying to load multiple files for QnA but the index only remembers the last file uploaded from a folder.
Do I need to change the structure of for loop or have another parameter with the Open Method
from langchain.text_splitter import CharacterTextSplitter
#from langchain.document_loaders import UnstructuredFileLoader
from langchain.document_loaders import UnstructuredPDFLoader
from langchain.vectorstores.faiss import FAISS
from langchain.embeddings import OpenAIEmbeddings
import pickle
import os
print("Loading data...")
pdf_folder_path = "content/"
print(os.listdir(pdf_folder_path))
# Load multiple files
# location of the pdf file/files.
loaders = [UnstructuredPDFLoader(os.path.join(pdf_folder_path, fn)) for fn in os.listdir(pdf_folder_path)]
print(loaders)
alldocument = []
vectorstore = None
for loader in loaders:
print("Loading raw document..." + loader.file_path)
raw_documents = loader.load()
print("Splitting text...")
text_splitter = CharacterTextSplitter(
separator="\n\n",
chunk_size=800,
chunk_overlap=100,
length_function=len,
)
documents = text_splitter.split_documents(raw_documents)
#alldocument = alldocument + documents
print("Creating vectorstore...")
embeddings = OpenAIEmbeddings()
vectorstore = FAISS.from_documents(documents, embeddings)
#with open("vectorstore.pkl", "wb") as f:
with open("vectorstore.pkl", "ab") as f:
pickle.dump(vectorstore, f)
f.close()