-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathchat.py
executable file
·127 lines (99 loc) · 4.33 KB
/
chat.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import os
import streamlit as st
# from PyPDF2 import PdfReader
# from dotenv import load_dotenv
from langchain.embeddings.openai import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
import pickle
from langchain.chains import ConversationalRetrievalChain
from langchain.chat_models import ChatOpenAI
from langchain.vectorstores import FAISS
from keys import OPENAI_API_KEY
# Function to read PDF content
# def read_pdf(file_path):
# pdf_reader = PdfReader(file_path)
# text = ""
# for page in pdf_reader.pages:
# text += page.extract_text()
# return text
# Mapping of PDFs
# pdf_mapping = {
# 'HealthInsurance Benefits': 'TaxBenefits_HealthInsurance.pdf',
# 'Tax Regime': 'New-vs-Old-Tax.pdf',
# 'Reinforcement Learning': 'SuttonBartoIPRLBook2ndEd.pdf',
# 'GPT4 All Training': '2023_GPT4All_Technical_Report.pdf',
# # Add more mappings as needed
# }
# Load environment variables
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY
# Main Streamlit app
def main():
# st.title("Query your PDF")
with st.sidebar:
st.title('🤗💬 PDF Chat App')
st.markdown('''
## About
Choose the desired PDF, then perform a query.
''')
# custom_names = list(pdf_mapping.keys())
selected_custom_name = st.sidebar.selectbox('Choose your PDF', ['', 'ansible'])
selected_actual_name = selected_custom_name
if selected_actual_name:
# pdf_folder = "pdfs"
# file_path = os.path.join(pdf_folder, selected_actual_name)
# try:
# text = read_pdf(file_path)
# st.info("The content of the PDF is hidden. Type your query in the chat window.")
# except FileNotFoundError:
# st.error(f"File not found: {file_path}")
# return
# except Exception as e:
# st.error(f"Error occurred while reading the PDF: {e}")
# return
# text_splitter = RecursiveCharacterTextSplitter(
# chunk_size=1000,
# chunk_overlap=150,
# length_function=len
# )
# # Process the PDF text and create the documents list
# documents = text_splitter.split_text(text=text)
# Vectorize the documents and create vectorstore
embeddings = OpenAIEmbeddings()
vectorstore = vectorstore = FAISS.load_local(f'/home/yuvraj/projects/docai/vector_stores/{selected_actual_name}', embeddings, index_name=selected_actual_name)
# st.session_state.processed_data = {
# # "document_chunks": documents,
# "vectorstore": vectorstore,
# }
# Save vectorstore using pickle
# pickle_folder = "Pickle"
# if not os.path.exists(pickle_folder):
# os.mkdir(pickle_folder)
# pickle_file_path = os.path.join(pickle_folder, f"{selected_custom_name}.pkl")
# if not os.path.exists(pickle_file_path):
# with open(pickle_file_path, "wb") as f:
# pickle.dump(vectorstore, f)
# Load the Langchain chatbot
# llm = ChatOpenAI(temperature=0, max_tokens=1000, model_name="gpt-3.5-turbo")
llm = ChatOpenAI()
qa = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever())
# Initialize Streamlit chat UI
if "messages" not in st.session_state:
st.session_state.messages = []
for message in st.session_state.messages:
with st.chat_message(message["role"]):
st.markdown(message["content"])
if prompt := st.chat_input("Ask your questions from PDF "f'{selected_custom_name}'"?"):
st.session_state.messages.append({"role": "user", "content": prompt})
with st.chat_message("user"):
st.markdown(prompt)
result = qa({"question": prompt, "chat_history": [(message["role"], message["content"]) for message in st.session_state.messages]})
print(prompt)
with st.chat_message("assistant"):
message_placeholder = st.empty()
full_response = result["answer"]
message_placeholder.markdown(full_response + "|")
message_placeholder.markdown(full_response)
print(full_response)
st.session_state.messages.append({"role": "assistant", "content": full_response})
if __name__ == "__main__":
main()