Hello, I made a model of openai using langchain that will communicate with my documents, however I have one question, I would like for the model to behave as such that if it does not find the question in documents to consult its general knowledge, what do I mean by this?
Lets say I have two models, one is chat model, another uses vector store and reads from files.
I ask basic chat model what is the main city of Germany and it answers Berlin.
I ask model that is based on documentation same question, it checks documentation sees it does not have information then it should act like basic chat model and find the answer using its own set of tools. But all i get is the answer is not in provided context.
This is the code i use
**mods.py**
def parse_pdf(file_path: str) -> List[str]:
with open(file_path, "rb") as f:
data = f.read()
pdf = PdfReader(BytesIO(data))
output = []
for page in pdf.pages:
text = page.extract_text()
text = re.sub(r"(\w+)-\n(\w+)", r"\1\2", text)
text = re.sub(r"(?<!\n\s)\n(?!\s\n)", " ", text.strip())
text = re.sub(r"\n\s*\n", "\n\n", text)
output.append(text)
return output
def text_to_docs(text: List[str]) -> List[str]:
combined_text = ' '.join(text)
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=2000,
separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""],
chunk_overlap=100,
)
chunks = text_splitter.split_text(combined_text)
return chunks
**main.py**
all_texts = []
for pdf_file in pdf_files:
file_path = os.path.join(data_path, pdf_file)
try:
text = mods.parse_pdf(file_path)
all_texts.extend(text)
except FileNotFoundError:
st.error(f"File not found: {file_path}")
return
except Exception as e:
st.error(f"Error occurred while reading the PDF: {e}")
documents = mods.text_to_docs(all_texts)
embeddings = OpenAIEmbeddings()
vector_store = FAISS.from_texts(documents, embedding=embeddings)
llm = ChatOpenAI(temperature=0.3, max_tokens=1000, model_name="gpt-4-1106-preview")
qa = ConversationalRetrievalChain.from_llm(llm=llm,retriever=vector_store.as_retriever())
if "messages" not in st.session_state:
st.session_state.messages = []
if not st.session_state.messages:
welcome_message = {"role": "assistant", "content": "Hello, how can i help?"}
st.session_state.messages.append(welcome_message)
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
if prompt := st.chat_input("What is your question"):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
result = qa({"question": prompt, "chat_history": [(message["role"], message["content"]) for message in st.session_state.messages]})
with st.chat_message("assistant"):
full_response = result["answer"]
st.markdown(full_response)
st.session_state.messages.append({"role": "assistant", "content": full_response})