-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathingest.py
31 lines (25 loc) · 977 Bytes
/
ingest.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
"""
The file to execute the QA system in the browser using FastAPI.
"""
import os
import pickle
from dotenv import load_dotenv
import faiss
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import OpenAIEmbeddings
from loguru import logger
load_dotenv()
if __name__ == "__main__":
logger.info("Loading the document loader...")
document_loader = TextLoader('data/state_of_the_union.txt')
text_splitter = CharacterTextSplitter(chunk_size=1500, chunk_overlap=100)
docs = document_loader.load_and_split(text_splitter=text_splitter)
#Create a vector store from the documents and save it to disk
embedding = OpenAIEmbeddings()
store = FAISS.from_documents(docs, embedding)
faiss.write_index(store.index, "docs.index")
store.index = None
with open("faiss_store.pkl", "wb") as f:
pickle.dump(store, f)