deepset-ai · tanaysoni · Aug 17, 2020 · Aug 17, 2020
diff --git a/tutorials/Tutorial6_Better_Retrieval_via_DPR.ipynb b/tutorials/Tutorial6_Better_Retrieval_via_DPR.ipynb
@@ -106,76 +106,26 @@
    "source": [
     "## Document Store\n",
     "\n",
-    "### Start an Elasticsearch server\n",
-    "You can start Elasticsearch on your local machine instance using Docker. If Docker is not readily available in your environment (e.g. in Colab notebooks), you can also manually download and execute Elasticsearch from source."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "4cc342fd2f0096e6335390e66029716ef452e2853ddd11a5b9802a4fdde20cdc\r\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Recommended: Start Elasticsearch using Docker\n",
-    "#! docker run -d -p 9200:9200 -e \"discovery.type=single-node\" elasticsearch:7.6.2\n",
-    "# wait until ES has started\n",
-    "#! sleep 30"
+    "FAISS is a library for efficient similarity search on a cluster of dense vectors.\n",
+    "The `FAISSDocumentStore` uses a SQL(SQLite in-memory be default) database under-the-hood\n",
+    "to store the document text and other meta data. The vector embeddings of the text are\n",
+    "indexed on a FAISS Index that later is queried for searching answers."
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 9,
-   "metadata": {},
    "outputs": [],
    "source": [
-    "# In Colab / No Docker environments: Start Elasticsearch from source\n",
-    "! wget https://artifacts.elastic.co/downloads/elasticsearch/elasticsearch-7.6.2-linux-x86_64.tar.gz -q\n",
-    "! tar -xzf elasticsearch-7.6.2-linux-x86_64.tar.gz\n",
-    "! chown -R daemon:daemon elasticsearch-7.6.2\n",
-    "\n",
-    "import os\n",
-    "from subprocess import Popen, PIPE, STDOUT\n",
-    "es_server = Popen(['elasticsearch-7.6.2/bin/elasticsearch'],\n",
-    "                   stdout=PIPE, stderr=STDOUT,\n",
-    "                   preexec_fn=lambda: os.setuid(1)  # as daemon\n",
-    "                  )\n",
-    "# wait until ES has started\n",
-    "! sleep 30"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
+    "from haystack.database.faiss import FAISSDocumentStore\n",
+    "\n",
+    "document_store = FAISSDocumentStore()"
+   ],
    "metadata": {
     "pycharm": {
      "name": "#%%\n"
     }
-   },
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "07/03/2020 11:46:26 - INFO - elasticsearch -   PUT http://localhost:9200/document [status:200 request:0.343s]\n"
-     ]
-    }
-   ],
-   "source": [
-    "# Connect to Elasticsearch\n",
-    "from haystack.database.elasticsearch import ElasticsearchDocumentStore\n",
-    "\n",
-    "# We need to set `embedding_field` and `embedding_dim`, when we plan to use a dense retriever\n",
-    "document_store = ElasticsearchDocumentStore(host=\"localhost\", username=\"\", password=\"\", index=\"document\", \n",
-    "                                            embedding_field=\"embedding\", embedding_dim=768)"
-   ]
+   }
   },
   {
    "cell_type": "markdown",

diff --git a/tutorials/Tutorial6_Better_Retrieval_via_DPR.py b/tutorials/Tutorial6_Better_Retrieval_via_DPR.py
@@ -1,36 +1,17 @@
-import logging
-import subprocess
-import time
-
 from haystack import Finder
-from haystack.database.elasticsearch import ElasticsearchDocumentStore
+from haystack.database.faiss import FAISSDocumentStore
 from haystack.indexing.cleaning import clean_wiki_text
 from haystack.indexing.utils import convert_files_to_dicts, fetch_archive_from_http
 from haystack.reader.farm import FARMReader
-from haystack.reader.transformers import TransformersReader
 from haystack.utils import print_answers
-from haystack.retriever.sparse import ElasticsearchRetriever
 from haystack.retriever.dense import DensePassageRetriever
 
-LAUNCH_ELASTICSEARCH = True
-
-# Start an Elasticsearch server
-# You can start Elasticsearch on your local machine instance using Docker. If Docker is not readily available in
-# your environment (eg., in Colab notebooks), then you can manually download and execute Elasticsearch from source.
-
-if LAUNCH_ELASTICSEARCH:
-    logging.info("Starting Elasticsearch ...")
-    status = subprocess.run(
-        ['docker run -d -p 9200:9200 -e "discovery.type=single-node" elasticsearch:7.6.2'], shell=True
-    )
-    if status.returncode:
-        raise Exception("Failed to launch Elasticsearch. If you want to connect to an existing Elasticsearch instance"
-                        "then set LAUNCH_ELASTICSEARCH in the script to False.")
-    time.sleep(15)
 
-# Connect to Elasticsearch
-document_store = ElasticsearchDocumentStore(host="localhost", username="", password="",
-                                            index="document", embedding_dim=768, embedding_field="embedding")
+# FAISS is a library for efficient similarity search on a cluster of dense vectors.
+# The FAISSDocumentStore uses a SQL(SQLite in-memory be default) database under-the-hood
+# to store the document text and other meta data. The vector embeddings of the text are
+# indexed on a FAISS Index that later is queried for searching answers.
+document_store = FAISSDocumentStore()
 
 # ## Cleaning & indexing documents
 # Let's first get some documents that we want to query
@@ -42,7 +23,7 @@
 dicts = convert_files_to_dicts(dir_path=doc_dir, clean_func=clean_wiki_text, split_paragraphs=True)
 
 # Now, let's write the docs to our DB.
-document_store.write_documents(dicts[:16])
+document_store.write_documents(dicts)
 
 ### Retriever
 retriever = DensePassageRetriever(document_store=document_store, embedding_model="dpr-bert-base-nq",