From 82da6432ce3bf8cc57d1658c26b69b3bbdee38b1 Mon Sep 17 00:00:00 2001 From: Harrison Loh Date: Tue, 16 Jan 2024 14:05:54 -0500 Subject: [PATCH 1/2] Fix errors involving splitting of document text and saving as database --- app/langchain_api.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/app/langchain_api.py b/app/langchain_api.py index 88cb866..d3fed5c 100644 --- a/app/langchain_api.py +++ b/app/langchain_api.py @@ -33,7 +33,7 @@ import pyalex import PyPDF2 import io -#import tiktoken +import tiktoken #from demo import read_single #TODO: IF doi -> then search open alex -> determine relevant metadata to return. -> Together once everything is up to date. @@ -384,10 +384,10 @@ async def langchain_paper_search(node): document = Document(page_content = text) splitter = RecursiveCharacterTextSplitter(chunk_size = 5000, chunk_overlap = 1000) - texts = splitter.split_documents(document) + split_texts = splitter.split_text(text) embeddings = OpenAIEmbeddings() - - db = FAISS.from_documents(texts, embeddings) + db = FAISS.from_texts(split_texts, embeddings) + print("Database built...\n") # Define all the queries and corresponding schemas in a list queries_schemas_docs = [ From 2cf8978eac2e8b1e53e7db83442562bdb0f7ec34 Mon Sep 17 00:00:00 2001 From: Harrison Loh Date: Tue, 16 Jan 2024 14:06:23 -0500 Subject: [PATCH 2/2] Add required import for using FAISS database. --- app/requirements.txt | Bin 2222 -> 2258 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/app/requirements.txt b/app/requirements.txt index d8efed684fced38eab7939b05b695f8941bc47f2..0c60e69d0c68bd4992a0e547b655cae2b5b62e34 100755 GIT binary patch delta 40 scmZ1{cu8=>JVxn6hD?THAk<|@W+-4NWv~T8Lk2wta|XT5QyC@M0n`Kt*#H0l delta 12 Tcmca4xK41xJjTt37zNk?B8>!p