From fbbca6e3dc348b201ee82ab57de05be0c44af9bb Mon Sep 17 00:00:00 2001
From: ZanSara <sarazanzo94@gmail.com>
Date: Mon, 10 Jan 2022 18:28:49 +0100
Subject: [PATCH 01/21] Add unique contraint to avoid PSQL crash and debug
 hanging tests

---
 haystack/document_stores/sql.py |   2 +-
 test/test_faiss_on_postgres.py  | 163 ++++++++++++++++++++++++++++++++
 2 files changed, 164 insertions(+), 1 deletion(-)
 create mode 100644 test/test_faiss_on_postgres.py

diff --git a/haystack/document_stores/sql.py b/haystack/document_stores/sql.py
index 666870238a..a175a07a14 100644
--- a/haystack/document_stores/sql.py
+++ b/haystack/document_stores/sql.py
@@ -20,7 +20,7 @@
 class ORMBase(Base):
     __abstract__ = True
 
-    id = Column(String(100), default=lambda: str(uuid4()), primary_key=True)
+    id = Column(String(100), default=lambda: str(uuid4()), unique=True, primary_key=True)
     created_at = Column(DateTime, server_default=func.now())
     updated_at = Column(DateTime, server_default=func.now(), server_onupdate=func.now())
 
diff --git a/test/test_faiss_on_postgres.py b/test/test_faiss_on_postgres.py
new file mode 100644
index 0000000000..301c53ffe1
--- /dev/null
+++ b/test/test_faiss_on_postgres.py
@@ -0,0 +1,163 @@
+import uuid
+import faiss
+import math
+import numpy as np
+import pytest
+import sys
+import subprocess
+import logging
+from time import sleep
+from sqlalchemy import create_engine, text
+import psycopg
+
+from haystack.schema import Document
+from haystack.pipelines import DocumentSearchPipeline
+from haystack.document_stores.faiss import FAISSDocumentStore
+from haystack.document_stores.weaviate import WeaviateDocumentStore
+
+from haystack.pipelines import Pipeline
+from haystack.nodes.retriever.dense import EmbeddingRetriever
+
+
+DOCUMENTS = [
+    {"meta": {"name": "name_1", "year": "2020", "month": "01"}, "content": "text_1", "embedding": np.random.rand(768).astype(np.float32)},
+    {"meta": {"name": "name_2", "year": "2020", "month": "02"}, "content": "text_2", "embedding": np.random.rand(768).astype(np.float32)},
+    {"meta": {"name": "name_3", "year": "2020", "month": "03"}, "content": "text_3", "embedding": np.random.rand(768).astype(np.float64)},
+    {"meta": {"name": "name_4", "year": "2021", "month": "01"}, "content": "text_4", "embedding": np.random.rand(768).astype(np.float32)},
+    {"meta": {"name": "name_5", "year": "2021", "month": "02"}, "content": "text_5", "embedding": np.random.rand(768).astype(np.float32)},
+    {"meta": {"name": "name_6", "year": "2021", "month": "03"}, "content": "text_6", "embedding": np.random.rand(768).astype(np.float64)},
+]
+
+
+# @pytest.fixture
+# def sql_url():
+
+#     # status = subprocess.run(["docker run --name postgres_test -d -e POSTGRES_HOST_AUTH_METHOD=trust -p 5432:5432 postgres"], shell=True)
+#     # if status.returncode:
+#     #     logging.warning("Tried to start PostgreSQL through Docker but this failed. It is likely that there is already an existing instance running.")
+#     # else:
+#     #     sleep(5)
+
+#     engine = create_engine(
+#         'postgresql://postgres:postgres@127.0.0.1/postgres',
+#         isolation_level='AUTOCOMMIT')
+
+#     # with engine.connect() as connection:
+#     connection = engine.connect()
+
+#     try:
+#         connection.execute('DROP SCHEMA public CASCADE;')
+#     except Exception as e:
+#         logging.error(e)
+#     try:
+#         connection.execute('CREATE SCHEMA public;')  
+#         connection.execute('SET SESSION idle_in_transaction_session_timeout = "1s";')   
+
+#         yield "postgresql://postgres:postgres@127.0.0.1/postgres"
+
+#     finally:
+#         connection.execute('DROP SCHEMA public CASCADE;')
+#         connection.close()
+
+#     logging.error(" -----------------------> Done")
+
+    # sleep(1)
+
+    # status = subprocess.run(["docker stop postgres_test"], shell=True)
+    # if status.returncode:
+    #     logging.warning("Tried to start PostgreSQL through Docker but this failed. It is likely that there is already an existing instance running.")
+
+    # status = subprocess.run(["docker rm postgres_test"], shell=True)
+    # if status.returncode:
+    #     logging.warning("Tried to start PostgreSQL through Docker but this failed. It is likely that there is already an existing instance running.")
+
+
+
+# @pytest.fixture
+# def sql_url(tmp_path):
+#     return f"sqlite:////{tmp_path/'haystack_test.db'}"
+
+
+
+@pytest.mark.skipif(sys.platform in ['win32', 'cygwin'], reason="Test with tmp_path not working on windows runner")
+def test_faiss_index_save_and_load(tmp_path): #, sql_url):
+
+    engine = create_engine(
+        'postgresql://postgres:postgres@127.0.0.1/postgres',
+        isolation_level='AUTOCOMMIT',
+        future=True)
+
+    with engine.connect() as connection:
+        #connection = engine.connect()
+
+        try:
+            connection.execute(text('DROP SCHEMA public CASCADE'))
+            connection.commit()
+        except psycopg.errors.ProgrammingError as pe:
+            logging.error(pe)
+        except Exception as e:
+            logging.error(e)
+        
+        connection.execute(text('CREATE SCHEMA public'))
+        connection.commit()
+        #connection.execute('SET SESSION idle_in_transaction_session_timeout = "1s";')  
+
+        sql_url = "postgresql://postgres:postgres@127.0.0.1/postgres"
+
+
+
+
+        document_store = FAISSDocumentStore(
+            sql_url=sql_url,
+            index="haystack_test",
+            progress_bar=False  # Just to check if the init parameters are kept
+        )
+        document_store.write_documents(DOCUMENTS)
+
+        # test saving the index
+        document_store.save(tmp_path / "haystack_test_faiss")
+
+        # clear existing faiss_index
+        document_store.faiss_indexes[document_store.index].reset()
+
+        # test faiss index is cleared
+        assert document_store.faiss_indexes[document_store.index].ntotal == 0
+
+        # test loading the index
+        new_document_store = FAISSDocumentStore.load(tmp_path / "haystack_test_faiss")
+
+        # check faiss index is restored
+        assert new_document_store.faiss_indexes[document_store.index].ntotal == len(DOCUMENTS)
+        # check if documents are restored
+        assert len(new_document_store.get_all_documents()) == len(DOCUMENTS)
+        # Check if the init parameters are kept
+        assert not new_document_store.progress_bar
+
+        # test saving and loading the loaded faiss index
+        new_document_store.save(tmp_path / "haystack_test_faiss")
+        reloaded_document_store = FAISSDocumentStore.load(tmp_path / "haystack_test_faiss")
+
+        # check faiss index is restored
+        assert reloaded_document_store.faiss_indexes[document_store.index].ntotal == len(DOCUMENTS)
+        # check if documents are restored
+        assert len(reloaded_document_store.get_all_documents()) == len(DOCUMENTS)
+        # Check if the init parameters are kept
+        assert not reloaded_document_store.progress_bar
+
+        # test loading the index via init
+        new_document_store = FAISSDocumentStore(faiss_index_path=tmp_path / "haystack_test_faiss")
+
+        # check faiss index is restored
+        assert new_document_store.faiss_indexes[document_store.index].ntotal == len(DOCUMENTS)
+        # check if documents are restored
+        assert len(new_document_store.get_all_documents()) == len(DOCUMENTS)
+        # Check if the init parameters are kept
+        assert not new_document_store.progress_bar
+
+
+
+        connection.execute(text('DROP SCHEMA public CASCADE'))
+        connection.commit()
+        # connection.close()
+
+    logging.error(" -----------------------> Done")

From 7b7a27135f9879f433b8f5a95f477328fe0433dc Mon Sep 17 00:00:00 2001
From: ZanSara <sarazanzo94@gmail.com>
Date: Tue, 11 Jan 2022 15:04:55 +0100
Subject: [PATCH 02/21] Tests seems to be runnable on PG now, still to verify

---
 haystack/document_stores/faiss.py    |   4 +-
 haystack/document_stores/milvus.py   |   4 +-
 haystack/document_stores/milvus2x.py |   2 +
 haystack/document_stores/sql.py      |  11 +-
 test/conftest.py                     | 115 ++++++++++++++-----
 test/test_faiss_and_milvus.py        |  31 +++--
 test/test_faiss_on_postgres.py       | 163 ---------------------------
 7 files changed, 125 insertions(+), 205 deletions(-)
 delete mode 100644 test/test_faiss_on_postgres.py

diff --git a/haystack/document_stores/faiss.py b/haystack/document_stores/faiss.py
index e1fb2ca8a0..663a06a5f1 100644
--- a/haystack/document_stores/faiss.py
+++ b/haystack/document_stores/faiss.py
@@ -48,6 +48,7 @@ def __init__(
         duplicate_documents: str = 'overwrite',
         faiss_index_path: Union[str, Path] = None,
         faiss_config_path: Union[str, Path] = None,
+        isolation_level: str = None,
         **kwargs,
     ):
         """
@@ -145,7 +146,8 @@ def __init__(
         super().__init__(
             url=sql_url,
             index=index,
-            duplicate_documents=duplicate_documents
+            duplicate_documents=duplicate_documents,
+            isolation_level=isolation_level
         )
 
         self._validate_index_sync()
diff --git a/haystack/document_stores/milvus.py b/haystack/document_stores/milvus.py
index 80c19c333f..5ab0fb9a4e 100644
--- a/haystack/document_stores/milvus.py
+++ b/haystack/document_stores/milvus.py
@@ -51,6 +51,7 @@ def __init__(
             embedding_field: str = "embedding",
             progress_bar: bool = True,
             duplicate_documents: str = 'overwrite',
+            isolation_level: str = None,
             **kwargs,
     ):
         """
@@ -129,7 +130,8 @@ def __init__(
         super().__init__(
             url=sql_url,
             index=index,
-            duplicate_documents=duplicate_documents
+            duplicate_documents=duplicate_documents,
+            isolation_level=isolation_level,
         )
 
     def __del__(self):
diff --git a/haystack/document_stores/milvus2x.py b/haystack/document_stores/milvus2x.py
index dc3eecfffe..85405138fa 100644
--- a/haystack/document_stores/milvus2x.py
+++ b/haystack/document_stores/milvus2x.py
@@ -71,6 +71,7 @@ def __init__(
             custom_fields: Optional[List[Any]] = None,
             progress_bar: bool = True,
             duplicate_documents: str = 'overwrite',
+            isolation_level: str = None,
     ):
         """
         :param sql_url: SQL connection URL for storing document texts and metadata. It defaults to a local, file based SQLite DB. For large scale
@@ -165,6 +166,7 @@ def __init__(
             url=sql_url,
             index=index,
             duplicate_documents=duplicate_documents
+            isolation_level=isolation_level,
         )
 
     def _create_collection_and_index_if_not_exist(
diff --git a/haystack/document_stores/sql.py b/haystack/document_stores/sql.py
index a175a07a14..4f44f526a0 100644
--- a/haystack/document_stores/sql.py
+++ b/haystack/document_stores/sql.py
@@ -96,7 +96,8 @@ def __init__(
         index: str = "document",
         label_index: str = "label",
         duplicate_documents: str = "overwrite",
-        check_same_thread: bool = False
+        check_same_thread: bool = False,
+        isolation_level: str = None
     ):
         """
         An SQL backed DocumentStore. Currently supports SQLite, PostgreSQL and MySQL backends.
@@ -112,6 +113,7 @@ def __init__(
                                     fail: an error is raised if the document ID of the document being added already
                                     exists.
         :param check_same_thread: Set to False to mitigate multithreading issues in older SQLite versions (see https://docs.sqlalchemy.org/en/14/dialects/sqlite.html?highlight=check_same_thread#threading-pooling-behavior) 
+        :param isolation_level: see SQLAlchemy's `isolation_level` parameter for `create_engine()`
         """
 
         # save init parameters to enable export of component config as YAML
@@ -119,10 +121,13 @@ def __init__(
                 url=url, index=index, label_index=label_index, duplicate_documents=duplicate_documents, check_same_thread=check_same_thread
         )
 
+        create_engine_params = {}
+        if isolation_level:
+            create_engine_params["isolation_level"] = isolation_level
         if "sqlite" in url:
-            engine = create_engine(url, connect_args={'check_same_thread': check_same_thread})
+            engine = create_engine(url, connect_args={'check_same_thread': check_same_thread}, **create_engine_params)
         else:
-            engine = create_engine(url)
+            engine = create_engine(url, **create_engine_params)
         ORMBase.metadata.create_all(engine)
         Session = sessionmaker(bind=engine)
         self.session = Session()
diff --git a/test/conftest.py b/test/conftest.py
index eccce83b7f..31bbbb2cb3 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -3,6 +3,8 @@
 from subprocess import run
 from sys import platform
 import gc
+import logging
+from sqlalchemy import create_engine, text
 
 import numpy as np
 import psutil
@@ -478,41 +480,94 @@ def get_retriever(retriever_type, document_store):
 
 
 @pytest.fixture(params=["elasticsearch", "faiss", "memory", "milvus", "weaviate"])
-def document_store_with_docs(request, test_docs_xs):
-    document_store = get_document_store(request.param)
-    document_store.write_documents(test_docs_xs)
-    yield document_store
-    document_store.delete_documents()
-
+def document_store_with_docs(request, test_docs_xs, tmp_path):
+    for document_store in get_document_store(request.param, tmp_path=tmp_path):
+        document_store.write_documents(test_docs_xs)
+        yield document_store
+        document_store.delete_documents()
 
 @pytest.fixture
-def document_store(request, test_docs_xs):
+def document_store(request, tmp_path):
     vector_dim = request.node.get_closest_marker("vector_dim", pytest.mark.vector_dim(768))
-    document_store = get_document_store(request.param, vector_dim.args[0])
-    yield document_store
-    document_store.delete_documents()
+    for document_store in get_document_store(request.param, embedding_dim=vector_dim.args[0], tmp_path=tmp_path):
+        yield document_store
+        document_store.delete_documents()
 
 @pytest.fixture(params=["faiss", "milvus", "weaviate"])
-def document_store_cosine(request, test_docs_xs):
+def document_store_cosine(request, tmp_path):
     vector_dim = request.node.get_closest_marker("vector_dim", pytest.mark.vector_dim(768))
-    document_store = get_document_store(request.param, vector_dim.args[0], similarity="cosine")
-    yield document_store
-    document_store.delete_documents()
+    for document_store in get_document_store(request.param, embedding_dim=vector_dim.args[0], similarity="cosine", tmp_path=tmp_path):
+        yield document_store
+        document_store.delete_documents()
 
 @pytest.fixture(params=["elasticsearch", "faiss", "memory", "milvus", "weaviate"])
-def document_store_cosine_small(request, test_docs_xs):
+def document_store_cosine_small(request, tmp_path):
     vector_dim = request.node.get_closest_marker("vector_dim", pytest.mark.vector_dim(3))
-    document_store = get_document_store(request.param, vector_dim.args[0], similarity="cosine")
-    yield document_store
-    document_store.delete_documents()    
+    for document_store in get_document_store(request.param, embedding_dim=vector_dim.args[0], similarity="cosine", tmp_path=tmp_path):
+        yield document_store
+        document_store.delete_documents()
+
+
+
+SQL_TYPE = "sqlite"
+# SQL_TYPE = "postgres"
+
+
+@pytest.fixture
+def sql_url(tmp_path):
+    if SQL_TYPE == "postgres":
+        try:
+            setup_postgres()
+            yield get_sql_url(tmp_path)
+        finally:
+            teardown_postgres()
+    else:
+        yield get_sql_url(tmp_path)
+
 
-def get_document_store(document_store_type, embedding_dim=768, embedding_field="embedding", index="haystack_test", similarity:str="dot_product"):
+def get_sql_url(tmp_path):
+    if SQL_TYPE == "postgres":
+        return "postgresql://postgres:postgres@127.0.0.1/postgres"
+    else:
+        return f"sqlite:///{tmp_path}/haystack_test.db"
+
+
+def setup_postgres():
+    # status = subprocess.run(["docker run --name postgres_test -d -e POSTGRES_HOST_AUTH_METHOD=trust -p 5432:5432 postgres"], shell=True)
+    # if status.returncode:
+    #     logging.warning("Tried to start PostgreSQL through Docker but this failed. It is likely that there is already an existing instance running.")
+    # else:
+    #     sleep(5)
+    engine = create_engine('postgresql://postgres:postgres@127.0.0.1/postgres', isolation_level='AUTOCOMMIT')
+
+    with engine.connect() as connection:
+        try:
+            connection.execute(text('DROP SCHEMA public CASCADE'))
+        except Exception as e:
+            logging.error(e)
+        connection.execute(text('CREATE SCHEMA public;'))
+        connection.execute(text('SET SESSION idle_in_transaction_session_timeout = "1s";'))
+
+        
+def teardown_postgres():
+    engine = create_engine('postgresql://postgres:postgres@127.0.0.1/postgres', isolation_level='AUTOCOMMIT')
+    with engine.connect() as connection:
+        connection.execute(text('DROP SCHEMA public CASCADE'))
+        connection.close()
+
+
+def get_document_store(document_store_type, tmp_path, embedding_dim=768, embedding_field="embedding", index="haystack_test", similarity:str="dot_product"):
+    
+    if SQL_TYPE == "postgres" and document_store_type in ["faiss", "milvus"]:
+        setup_postgres()
+    
     if document_store_type == "sql":
         document_store = SQLDocumentStore(url="sqlite://", index=index)
+
     elif document_store_type == "memory":
         document_store = InMemoryDocumentStore(
-            return_embedding=True, embedding_dim=embedding_dim, embedding_field=embedding_field, index=index, similarity=similarity
-        )
+            return_embedding=True, embedding_dim=embedding_dim, embedding_field=embedding_field, index=index, similarity=similarity)
+        
     elif document_store_type == "elasticsearch":
         # make sure we start from a fresh index
         client = Elasticsearch()
@@ -520,28 +575,33 @@ def get_document_store(document_store_type, embedding_dim=768, embedding_field="
         document_store = ElasticsearchDocumentStore(
             index=index, return_embedding=True, embedding_dim=embedding_dim, embedding_field=embedding_field, similarity=similarity
         )
+
     elif document_store_type == "faiss":
         document_store = FAISSDocumentStore(
             vector_dim=embedding_dim,
-            sql_url="sqlite://",
+            sql_url=get_sql_url(tmp_path),
             return_embedding=True,
             embedding_field=embedding_field,
             index=index,
-            similarity=similarity
+            similarity=similarity,
+            isolation_level="AUTOCOMMIT" if SQL_TYPE == "postgres" else None
         )
+
     elif document_store_type == "milvus":
         document_store = MilvusDocumentStore(
             vector_dim=embedding_dim,
-            sql_url="sqlite://",
+            sql_url=get_sql_url(tmp_path),
             return_embedding=True,
             embedding_field=embedding_field,
             index=index,
-            similarity=similarity
+            similarity=similarity,
+            isolation_level="AUTOCOMMIT" if SQL_TYPE == "postgres" else None
         )
         _, collections = document_store.milvus_server.list_collections()
         for collection in collections:
             if collection.startswith(index):
                 document_store.milvus_server.drop_collection(collection)
+    
     elif document_store_type == "weaviate":
         document_store = WeaviateDocumentStore(
             weaviate_url="http://localhost:8080",
@@ -554,7 +614,10 @@ def get_document_store(document_store_type, embedding_dim=768, embedding_field="
     else:
         raise Exception(f"No document store fixture for '{document_store_type}'")
 
-    return document_store
+    yield document_store
+
+    if SQL_TYPE == "postgres" and document_store_type in ["faiss", "milvus"]:
+        teardown_postgres()
 
 
 @pytest.fixture(scope="function")
diff --git a/test/test_faiss_and_milvus.py b/test/test_faiss_and_milvus.py
index b2ca1c7c2c..8e7ec6023e 100644
--- a/test/test_faiss_and_milvus.py
+++ b/test/test_faiss_and_milvus.py
@@ -13,6 +13,7 @@
 from haystack.pipelines import Pipeline
 from haystack.nodes.retriever.dense import EmbeddingRetriever
 
+
 DOCUMENTS = [
     {"meta": {"name": "name_1", "year": "2020", "month": "01"}, "content": "text_1", "embedding": np.random.rand(768).astype(np.float32)},
     {"meta": {"name": "name_2", "year": "2020", "month": "02"}, "content": "text_2", "embedding": np.random.rand(768).astype(np.float32)},
@@ -23,12 +24,14 @@
 ]
 
 
+
 @pytest.mark.skipif(sys.platform in ['win32', 'cygwin'], reason="Test with tmp_path not working on windows runner")
-def test_faiss_index_save_and_load(tmp_path):
+def test_faiss_index_save_and_load(tmp_path, sql_url):
     document_store = FAISSDocumentStore(
-        sql_url=f"sqlite:////{tmp_path/'haystack_test.db'}",
+        sql_url=sql_url,
         index="haystack_test",
-        progress_bar=False  # Just to check if the init parameters are kept
+        progress_bar=False,  # Just to check if the init parameters are kept
+        isolation_level="AUTOCOMMIT"
     )
     document_store.write_documents(DOCUMENTS)
 
@@ -74,11 +77,12 @@ def test_faiss_index_save_and_load(tmp_path):
 
 
 @pytest.mark.skipif(sys.platform in ['win32', 'cygwin'], reason="Test with tmp_path not working on windows runner")
-def test_faiss_index_save_and_load_custom_path(tmp_path):
+def test_faiss_index_save_and_load_custom_path(tmp_path, sql_url):
     document_store = FAISSDocumentStore(
-        sql_url=f"sqlite:////{tmp_path/'haystack_test.db'}",
+        sql_url=sql_url,
         index="haystack_test",
-        progress_bar=False  # Just to check if the init parameters are kept
+        progress_bar=False,  # Just to check if the init parameters are kept
+        isolation_level="AUTOCOMMIT"
     )
     document_store.write_documents(DOCUMENTS)
 
@@ -128,13 +132,15 @@ def test_faiss_index_mutual_exclusive_args(tmp_path):
     with pytest.raises(ValueError):
         FAISSDocumentStore(
             sql_url=f"sqlite:////{tmp_path/'haystack_test.db'}",
-            faiss_index_path=f"{tmp_path/'haystack_test'}"
+            faiss_index_path=f"{tmp_path/'haystack_test'}",
+            isolation_level="AUTOCOMMIT"
         )
 
     with pytest.raises(ValueError):
         FAISSDocumentStore(
             f"sqlite:////{tmp_path/'haystack_test.db'}",
-            faiss_index_path=f"{tmp_path/'haystack_test'}"
+            faiss_index_path=f"{tmp_path/'haystack_test'}",
+            isolation_level="AUTOCOMMIT"
         )
 
 
@@ -225,7 +231,9 @@ def test_update_with_empty_store(document_store, retriever):
 @pytest.mark.parametrize("index_factory", ["Flat", "HNSW", "IVF1,Flat"])
 def test_faiss_retrieving(index_factory, tmp_path):
     document_store = FAISSDocumentStore(
-        sql_url=f"sqlite:////{tmp_path/'test_faiss_retrieving.db'}", faiss_index_factory_str=index_factory
+        sql_url=f"sqlite:////{tmp_path/'test_faiss_retrieving.db'}", 
+        faiss_index_factory_str=index_factory,
+        isolation_level="AUTOCOMMIT"
     )
 
     document_store.delete_all_documents(index="document")
@@ -394,7 +402,6 @@ def test_get_docs_with_many_filters(document_store, retriever):
     assert "2020" == documents[0].meta["year"]
 
 
-
 @pytest.mark.parametrize("retriever", ["embedding"], indirect=True)
 @pytest.mark.parametrize("document_store", ["faiss", "milvus"], indirect=True)
 def test_pipeline(document_store, retriever):
@@ -421,7 +428,9 @@ def test_faiss_passing_index_from_outside(tmp_path):
     faiss_index.set_direct_map_type(faiss.DirectMap.Hashtable)
     faiss_index.nprobe = 2
     document_store = FAISSDocumentStore(
-        sql_url=f"sqlite:////{tmp_path/'haystack_test_faiss.db'}", faiss_index=faiss_index, index=index
+        sql_url=f"sqlite:////{tmp_path/'haystack_test_faiss.db'}", 
+        faiss_index=faiss_index, index=index,
+        isolation_level="AUTOCOMMIT"
     )
 
     document_store.delete_documents()
diff --git a/test/test_faiss_on_postgres.py b/test/test_faiss_on_postgres.py
deleted file mode 100644
index 301c53ffe1..0000000000
--- a/test/test_faiss_on_postgres.py
+++ /dev/null
@@ -1,163 +0,0 @@
-import uuid
-import faiss
-import math
-import numpy as np
-import pytest
-import sys
-import subprocess
-import logging
-from time import sleep
-from sqlalchemy import create_engine, text
-import psycopg
-
-from haystack.schema import Document
-from haystack.pipelines import DocumentSearchPipeline
-from haystack.document_stores.faiss import FAISSDocumentStore
-from haystack.document_stores.weaviate import WeaviateDocumentStore
-
-from haystack.pipelines import Pipeline
-from haystack.nodes.retriever.dense import EmbeddingRetriever
-
-
-DOCUMENTS = [
-    {"meta": {"name": "name_1", "year": "2020", "month": "01"}, "content": "text_1", "embedding": np.random.rand(768).astype(np.float32)},
-    {"meta": {"name": "name_2", "year": "2020", "month": "02"}, "content": "text_2", "embedding": np.random.rand(768).astype(np.float32)},
-    {"meta": {"name": "name_3", "year": "2020", "month": "03"}, "content": "text_3", "embedding": np.random.rand(768).astype(np.float64)},
-    {"meta": {"name": "name_4", "year": "2021", "month": "01"}, "content": "text_4", "embedding": np.random.rand(768).astype(np.float32)},
-    {"meta": {"name": "name_5", "year": "2021", "month": "02"}, "content": "text_5", "embedding": np.random.rand(768).astype(np.float32)},
-    {"meta": {"name": "name_6", "year": "2021", "month": "03"}, "content": "text_6", "embedding": np.random.rand(768).astype(np.float64)},
-]
-
-
-# @pytest.fixture
-# def sql_url():
-
-#     # status = subprocess.run(["docker run --name postgres_test -d -e POSTGRES_HOST_AUTH_METHOD=trust -p 5432:5432 postgres"], shell=True)
-#     # if status.returncode:
-#     #     logging.warning("Tried to start PostgreSQL through Docker but this failed. It is likely that there is already an existing instance running.")
-#     # else:
-#     #     sleep(5)
-
-#     engine = create_engine(
-#         'postgresql://postgres:postgres@127.0.0.1/postgres',
-#         isolation_level='AUTOCOMMIT')
-
-#     # with engine.connect() as connection:
-#     connection = engine.connect()
-
-#     try:
-#         connection.execute('DROP SCHEMA public CASCADE;')
-#     except Exception as e:
-#         logging.error(e)
-#     try:
-#         connection.execute('CREATE SCHEMA public;')  
-#         connection.execute('SET SESSION idle_in_transaction_session_timeout = "1s";')   
-
-#         yield "postgresql://postgres:postgres@127.0.0.1/postgres"
-
-#     finally:
-#         connection.execute('DROP SCHEMA public CASCADE;')
-#         connection.close()
-
-#     logging.error(" -----------------------> Done")
-
-    # sleep(1)
-
-    # status = subprocess.run(["docker stop postgres_test"], shell=True)
-    # if status.returncode:
-    #     logging.warning("Tried to start PostgreSQL through Docker but this failed. It is likely that there is already an existing instance running.")
-
-    # status = subprocess.run(["docker rm postgres_test"], shell=True)
-    # if status.returncode:
-    #     logging.warning("Tried to start PostgreSQL through Docker but this failed. It is likely that there is already an existing instance running.")
-
-
-
-# @pytest.fixture
-# def sql_url(tmp_path):
-#     return f"sqlite:////{tmp_path/'haystack_test.db'}"
-
-
-
-@pytest.mark.skipif(sys.platform in ['win32', 'cygwin'], reason="Test with tmp_path not working on windows runner")
-def test_faiss_index_save_and_load(tmp_path): #, sql_url):
-
-    engine = create_engine(
-        'postgresql://postgres:postgres@127.0.0.1/postgres',
-        isolation_level='AUTOCOMMIT',
-        future=True)
-
-    with engine.connect() as connection:
-        #connection = engine.connect()
-
-        try:
-            connection.execute(text('DROP SCHEMA public CASCADE'))
-            connection.commit()
-        except psycopg.errors.ProgrammingError as pe:
-            logging.error(pe)
-        except Exception as e:
-            logging.error(e)
-        
-        connection.execute(text('CREATE SCHEMA public'))
-        connection.commit()
-        #connection.execute('SET SESSION idle_in_transaction_session_timeout = "1s";')  
-
-        sql_url = "postgresql://postgres:postgres@127.0.0.1/postgres"
-
-
-
-
-        document_store = FAISSDocumentStore(
-            sql_url=sql_url,
-            index="haystack_test",
-            progress_bar=False  # Just to check if the init parameters are kept
-        )
-        document_store.write_documents(DOCUMENTS)
-
-        # test saving the index
-        document_store.save(tmp_path / "haystack_test_faiss")
-
-        # clear existing faiss_index
-        document_store.faiss_indexes[document_store.index].reset()
-
-        # test faiss index is cleared
-        assert document_store.faiss_indexes[document_store.index].ntotal == 0
-
-        # test loading the index
-        new_document_store = FAISSDocumentStore.load(tmp_path / "haystack_test_faiss")
-
-        # check faiss index is restored
-        assert new_document_store.faiss_indexes[document_store.index].ntotal == len(DOCUMENTS)
-        # check if documents are restored
-        assert len(new_document_store.get_all_documents()) == len(DOCUMENTS)
-        # Check if the init parameters are kept
-        assert not new_document_store.progress_bar
-
-        # test saving and loading the loaded faiss index
-        new_document_store.save(tmp_path / "haystack_test_faiss")
-        reloaded_document_store = FAISSDocumentStore.load(tmp_path / "haystack_test_faiss")
-
-        # check faiss index is restored
-        assert reloaded_document_store.faiss_indexes[document_store.index].ntotal == len(DOCUMENTS)
-        # check if documents are restored
-        assert len(reloaded_document_store.get_all_documents()) == len(DOCUMENTS)
-        # Check if the init parameters are kept
-        assert not reloaded_document_store.progress_bar
-
-        # test loading the index via init
-        new_document_store = FAISSDocumentStore(faiss_index_path=tmp_path / "haystack_test_faiss")
-
-        # check faiss index is restored
-        assert new_document_store.faiss_indexes[document_store.index].ntotal == len(DOCUMENTS)
-        # check if documents are restored
-        assert len(new_document_store.get_all_documents()) == len(DOCUMENTS)
-        # Check if the init parameters are kept
-        assert not new_document_store.progress_bar
-
-
-
-        connection.execute(text('DROP SCHEMA public CASCADE'))
-        connection.commit()
-        # connection.close()
-
-    logging.error(" -----------------------> Done")

From caadc971d65508260ea133ed7b060402ff5470a7 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Tue, 11 Jan 2022 14:06:01 +0000
Subject: [PATCH 03/21] Add latest docstring and tutorial changes

---
 docs/_src/api/api/document_store.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/docs/_src/api/api/document_store.md b/docs/_src/api/api/document_store.md
index 8464bc47b9..b4705ddf5a 100644
--- a/docs/_src/api/api/document_store.md
+++ b/docs/_src/api/api/document_store.md
@@ -952,7 +952,7 @@ class SQLDocumentStore(BaseDocumentStore)
 #### \_\_init\_\_
 
 ```python
- | __init__(url: str = "sqlite://", index: str = "document", label_index: str = "label", duplicate_documents: str = "overwrite", check_same_thread: bool = False)
+ | __init__(url: str = "sqlite://", index: str = "document", label_index: str = "label", duplicate_documents: str = "overwrite", check_same_thread: bool = False, isolation_level: str = None)
 ```
 
 An SQL backed DocumentStore. Currently supports SQLite, PostgreSQL and MySQL backends.
@@ -970,6 +970,7 @@ An SQL backed DocumentStore. Currently supports SQLite, PostgreSQL and MySQL bac
                             fail: an error is raised if the document ID of the document being added already
                             exists.
 - `check_same_thread`: Set to False to mitigate multithreading issues in older SQLite versions (see https://docs.sqlalchemy.org/en/14/dialects/sqlite.html?highlight=check_same_thread#threading-pooling-behavior)
+- `isolation_level`: see SQLAlchemy's `isolation_level` parameter for `create_engine()`
 
 <a name="sql.SQLDocumentStore.get_document_by_id"></a>
 #### get\_document\_by\_id
@@ -1202,7 +1203,7 @@ the vector embeddings are indexed in a FAISS Index.
 #### \_\_init\_\_
 
 ```python
- | __init__(sql_url: str = "sqlite:///faiss_document_store.db", vector_dim: int = 768, faiss_index_factory_str: str = "Flat", faiss_index: Optional["faiss.swigfaiss.Index"] = None, return_embedding: bool = False, index: str = "document", similarity: str = "dot_product", embedding_field: str = "embedding", progress_bar: bool = True, duplicate_documents: str = 'overwrite', faiss_index_path: Union[str, Path] = None, faiss_config_path: Union[str, Path] = None, **kwargs, ,)
+ | __init__(sql_url: str = "sqlite:///faiss_document_store.db", vector_dim: int = 768, faiss_index_factory_str: str = "Flat", faiss_index: Optional["faiss.swigfaiss.Index"] = None, return_embedding: bool = False, index: str = "document", similarity: str = "dot_product", embedding_field: str = "embedding", progress_bar: bool = True, duplicate_documents: str = 'overwrite', faiss_index_path: Union[str, Path] = None, faiss_config_path: Union[str, Path] = None, isolation_level: str = None, **kwargs, ,)
 ```
 
 **Arguments**:
@@ -1478,7 +1479,7 @@ Usage:
 #### \_\_init\_\_
 
 ```python
- | __init__(sql_url: str = "sqlite:///", milvus_url: str = "tcp://localhost:19530", connection_pool: str = "SingletonThread", index: str = "document", vector_dim: int = 768, index_file_size: int = 1024, similarity: str = "dot_product", index_type: IndexType = IndexType.FLAT, index_param: Optional[Dict[str, Any]] = None, search_param: Optional[Dict[str, Any]] = None, return_embedding: bool = False, embedding_field: str = "embedding", progress_bar: bool = True, duplicate_documents: str = 'overwrite', **kwargs, ,)
+ | __init__(sql_url: str = "sqlite:///", milvus_url: str = "tcp://localhost:19530", connection_pool: str = "SingletonThread", index: str = "document", vector_dim: int = 768, index_file_size: int = 1024, similarity: str = "dot_product", index_type: IndexType = IndexType.FLAT, index_param: Optional[Dict[str, Any]] = None, search_param: Optional[Dict[str, Any]] = None, return_embedding: bool = False, embedding_field: str = "embedding", progress_bar: bool = True, duplicate_documents: str = 'overwrite', isolation_level: str = None, **kwargs, ,)
 ```
 
 **Arguments**:

From 250d44daad76a66e0354f2fd912eb7f81715069d Mon Sep 17 00:00:00 2001
From: ZanSara <sarazanzo94@gmail.com>
Date: Tue, 11 Jan 2022 15:35:18 +0100
Subject: [PATCH 04/21] Forgot comma

---
 haystack/document_stores/milvus2x.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/haystack/document_stores/milvus2x.py b/haystack/document_stores/milvus2x.py
index 85405138fa..5feb5f842d 100644
--- a/haystack/document_stores/milvus2x.py
+++ b/haystack/document_stores/milvus2x.py
@@ -165,7 +165,7 @@ def __init__(
         super().__init__(
             url=sql_url,
             index=index,
-            duplicate_documents=duplicate_documents
+            duplicate_documents=duplicate_documents,
             isolation_level=isolation_level,
         )
 

From 1addbd7ccf4d06731c3a37d27e533b7bed283e84 Mon Sep 17 00:00:00 2001
From: ZanSara <sarazanzo94@gmail.com>
Date: Tue, 11 Jan 2022 16:35:26 +0100
Subject: [PATCH 05/21] Fix a few more issues left and add isolation_level to
 set.config()

---
 haystack/document_stores/faiss.py    |   3 +-
 haystack/document_stores/milvus.py   |   1 +
 haystack/document_stores/milvus2x.py |   1 +
 haystack/document_stores/sql.py      |   2 +
 test/conftest.py                     |  18 ++---
 test/test_document_store.py          | 101 +++++++++++++++++++++++++--
 test/test_faiss_and_milvus.py        |  77 --------------------
 7 files changed, 113 insertions(+), 90 deletions(-)

diff --git a/haystack/document_stores/faiss.py b/haystack/document_stores/faiss.py
index 66c24e88b0..676f49f84b 100644
--- a/haystack/document_stores/faiss.py
+++ b/haystack/document_stores/faiss.py
@@ -116,7 +116,8 @@ def __init__(
             index=index,
             similarity=similarity,
             embedding_field=embedding_field,
-            progress_bar=progress_bar
+            progress_bar=progress_bar,
+            isolation_level=isolation_level
         )
 
         if similarity in ("dot_product", "cosine"):
diff --git a/haystack/document_stores/milvus.py b/haystack/document_stores/milvus.py
index 6bf7982e42..538392c1bc 100644
--- a/haystack/document_stores/milvus.py
+++ b/haystack/document_stores/milvus.py
@@ -105,6 +105,7 @@ def __init__(
             embedding_dim=embedding_dim, index_file_size=index_file_size, similarity=similarity, index_type=index_type, index_param=index_param,
             search_param=search_param, duplicate_documents=duplicate_documents,
             return_embedding=return_embedding, embedding_field=embedding_field, progress_bar=progress_bar,
+            isolation_level=isolation_level
         )
 
         self.milvus_server = Milvus(uri=milvus_url, pool=connection_pool)
diff --git a/haystack/document_stores/milvus2x.py b/haystack/document_stores/milvus2x.py
index 0acd50f863..026c09bcea 100644
--- a/haystack/document_stores/milvus2x.py
+++ b/haystack/document_stores/milvus2x.py
@@ -128,6 +128,7 @@ def __init__(
             search_param=search_param, duplicate_documents=duplicate_documents, id_field=id_field,
             return_embedding=return_embedding, embedding_field=embedding_field, progress_bar=progress_bar,
             custom_fields=custom_fields,
+            isolation_level=isolation_level
         )
 
         logger.warning("Milvus2DocumentStore is in experimental state until Milvus 2.0 is released")
diff --git a/haystack/document_stores/sql.py b/haystack/document_stores/sql.py
index 4f44f526a0..a7ef00f460 100644
--- a/haystack/document_stores/sql.py
+++ b/haystack/document_stores/sql.py
@@ -124,6 +124,8 @@ def __init__(
         create_engine_params = {}
         if isolation_level:
             create_engine_params["isolation_level"] = isolation_level
+        
+        logging.warning(f"create engine params: {create_engine_params}")
         if "sqlite" in url:
             engine = create_engine(url, connect_args={'check_same_thread': check_same_thread}, **create_engine_params)
         else:
diff --git a/test/conftest.py b/test/conftest.py
index 29387e6d61..b2bf0536cb 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -42,6 +42,11 @@
 from haystack.nodes.question_generator import QuestionGenerator
 
 
+# To manually run the tests with default PostgreSQL instead of SQLite, switch the lines below
+SQL_TYPE = "sqlite"
+# SQL_TYPE = "postgres"
+
+
 def pytest_addoption(parser):
     parser.addoption("--document_store_type", action="store", default="elasticsearch, faiss, memory, milvus, weaviate")
 
@@ -508,11 +513,6 @@ def document_store_cosine_small(request, tmp_path):
         document_store.delete_documents()
 
 
-
-SQL_TYPE = "sqlite"
-# SQL_TYPE = "postgres"
-
-
 @pytest.fixture
 def sql_url(tmp_path):
     if SQL_TYPE == "postgres":
@@ -559,10 +559,11 @@ def teardown_postgres():
 def get_document_store(document_store_type, tmp_path, embedding_dim=768, embedding_field="embedding", index="haystack_test", similarity:str="dot_product"):
     
     if SQL_TYPE == "postgres" and document_store_type in ["faiss", "milvus"]:
+        logging.warning("Setting up PostgreSQL...")
         setup_postgres()
     
     if document_store_type == "sql":
-        document_store = SQLDocumentStore(url="sqlite://", index=index)
+        document_store = SQLDocumentStore(url=get_sql_url(tmp_path), index=index, isolation_level="AUTOCOMMIT")
 
     elif document_store_type == "memory":
         document_store = InMemoryDocumentStore(
@@ -584,7 +585,7 @@ def get_document_store(document_store_type, tmp_path, embedding_dim=768, embeddi
             embedding_field=embedding_field,
             index=index,
             similarity=similarity,
-            isolation_level="AUTOCOMMIT" if SQL_TYPE == "postgres" else None
+            isolation_level="AUTOCOMMIT"# if SQL_TYPE == "postgres" else None
         )
 
     elif document_store_type == "milvus":
@@ -595,7 +596,7 @@ def get_document_store(document_store_type, tmp_path, embedding_dim=768, embeddi
             embedding_field=embedding_field,
             index=index,
             similarity=similarity,
-            isolation_level="AUTOCOMMIT" if SQL_TYPE == "postgres" else None
+            isolation_level="AUTOCOMMIT"# if SQL_TYPE == "postgres" else None
         )
         _, collections = document_store.milvus_server.list_collections()
         for collection in collections:
@@ -617,6 +618,7 @@ def get_document_store(document_store_type, tmp_path, embedding_dim=768, embeddi
     yield document_store
 
     if SQL_TYPE == "postgres" and document_store_type in ["faiss", "milvus"]:
+        logging.warning("Tearing down PostgreSQL...")
         teardown_postgres()
 
 
diff --git a/test/test_document_store.py b/test/test_document_store.py
index 39bc5caf51..581edb7bb7 100644
--- a/test/test_document_store.py
+++ b/test/test_document_store.py
@@ -1,4 +1,6 @@
 from unittest import mock
+import uuid
+import math
 import numpy as np
 import pandas as pd
 import pytest
@@ -16,6 +18,18 @@
 from haystack.document_stores.faiss import FAISSDocumentStore
 
 
+
+DOCUMENTS = [
+    {"meta": {"name": "name_1", "year": "2020", "month": "01"}, "content": "text_1", "embedding": np.random.rand(768).astype(np.float32)},
+    {"meta": {"name": "name_2", "year": "2020", "month": "02"}, "content": "text_2", "embedding": np.random.rand(768).astype(np.float32)},
+    {"meta": {"name": "name_3", "year": "2020", "month": "03"}, "content": "text_3", "embedding": np.random.rand(768).astype(np.float64)},
+    {"meta": {"name": "name_4", "year": "2021", "month": "01"}, "content": "text_4", "embedding": np.random.rand(768).astype(np.float32)},
+    {"meta": {"name": "name_5", "year": "2021", "month": "02"}, "content": "text_5", "embedding": np.random.rand(768).astype(np.float32)},
+    {"meta": {"name": "name_6", "year": "2021", "month": "03"}, "content": "text_6", "embedding": np.random.rand(768).astype(np.float64)},
+]
+
+
+
 @pytest.mark.elasticsearch
 def test_init_elastic_client():
     # defaults
@@ -147,8 +161,8 @@ def test_get_all_documents_with_correct_filters(document_store_with_docs):
     assert {d.meta["meta_field"] for d in documents} == {"test1", "test3"}
 
 
-def test_get_all_documents_with_correct_filters_legacy_sqlite(test_docs_xs):
-    document_store_with_docs = get_document_store("sql")
+def test_get_all_documents_with_correct_filters_legacy_sqlite(test_docs_xs, tmp_path):
+    document_store_with_docs = get_document_store("sql", tmp_path)
     document_store_with_docs.write_documents(test_docs_xs)
 
     document_store_with_docs.use_windowed_query = False
@@ -819,7 +833,7 @@ def test_update_meta(document_store):
 @pytest.mark.parametrize("document_store_type", ["elasticsearch", "memory"])
 def test_custom_embedding_field(document_store_type):
     document_store = get_document_store(
-        document_store_type=document_store_type, embedding_field="custom_embedding_field"
+        document_store_type=document_store_type, tmp_path=tmp_path, embedding_field="custom_embedding_field"
     )
     doc_to_write = {"content": "test", "custom_embedding_field": np.random.rand(768).astype(np.float32)}
     document_store.write_documents([doc_to_write])
@@ -973,4 +987,83 @@ def test_custom_headers(document_store_with_docs: BaseDocumentStore):
         args, kwargs = mock_client.search.call_args
         assert "headers" in kwargs
         assert kwargs["headers"] == custom_headers
-        assert len(documents) > 0
\ No newline at end of file
+        assert len(documents) > 0
+
+
+def ensure_ids_are_correct_uuids(docs:list,document_store:object)->None:
+    # Weaviate currently only supports UUIDs
+    if type(document_store)==WeaviateDocumentStore:
+        for d in docs:
+            d["id"] = str(uuid.uuid4())
+
+    
+def test_cosine_similarity(document_store_cosine):
+    # below we will write documents to the store and then query it to see if vectors were normalized
+
+    ensure_ids_are_correct_uuids(docs=DOCUMENTS, document_store=document_store_cosine)
+    document_store_cosine.write_documents(documents=DOCUMENTS)
+
+    # note that the same query will be used later when querying after updating the embeddings
+    query = np.random.rand(768).astype(np.float32)
+
+    query_results = document_store_cosine.query_by_embedding(query_emb=query, top_k=len(DOCUMENTS), return_embedding=True)
+
+    # check if search with cosine similarity returns the correct number of results
+    assert len(query_results) == len(DOCUMENTS)
+    indexed_docs = {}
+    for doc in DOCUMENTS:
+        indexed_docs[doc["content"]] = doc["embedding"]
+
+    for doc in query_results:
+        result_emb = doc.embedding
+        original_emb = np.array([indexed_docs[doc.content]], dtype="float32")
+        document_store_cosine.normalize_embedding(original_emb[0])
+
+        # check if the stored embedding was normalized
+        assert np.allclose(original_emb[0], result_emb, rtol=0.01)
+        
+        # check if the score is plausible for cosine similarity
+        assert 0 <= doc.score <= 1.0
+
+    # now check if vectors are normalized when updating embeddings
+    class MockRetriever():
+        def embed_documents(self, docs):
+            return [np.random.rand(768).astype(np.float32) for doc in docs]
+
+    retriever = MockRetriever()
+    document_store_cosine.update_embeddings(retriever=retriever)
+    query_results = document_store_cosine.query_by_embedding(query_emb=query, top_k=len(DOCUMENTS), return_embedding=True)
+
+    for doc in query_results:
+        original_emb = np.array([indexed_docs[doc.content]], dtype="float32")
+        document_store_cosine.normalize_embedding(original_emb[0])
+        # check if the original embedding has changed after updating the embeddings
+        assert not np.allclose(original_emb[0], doc.embedding, rtol=0.01)
+
+
+def test_normalize_embeddings_diff_shapes(document_store_cosine_small):
+    VEC_1 = np.array([.1, .2, .3], dtype="float32")
+    document_store_cosine_small.normalize_embedding(VEC_1)
+    assert np.linalg.norm(VEC_1) - 1 < 0.01
+
+    VEC_1 = np.array([.1, .2, .3], dtype="float32").reshape(1, -1)
+    document_store_cosine_small.normalize_embedding(VEC_1)
+    assert np.linalg.norm(VEC_1) - 1 < 0.01
+
+
+def test_cosine_sanity_check(document_store_cosine_small):
+    VEC_1 = np.array([.1, .2, .3], dtype="float32")
+    VEC_2 = np.array([.4, .5, .6], dtype="float32")
+
+    # This is the cosine similarity of VEC_1 and VEC_2 calculated using sklearn.metrics.pairwise.cosine_similarity
+    # The score is normalized to yield a value between 0 and 1.
+    KNOWN_COSINE = (0.9746317 + 1) / 2
+
+    docs = [{"name": "vec_1", "text": "vec_1", "content": "vec_1", "embedding": VEC_1}]
+    ensure_ids_are_correct_uuids(docs=docs,document_store=document_store_cosine_small)
+    document_store_cosine_small.write_documents(documents=docs)
+
+    query_results = document_store_cosine_small.query_by_embedding(query_emb=VEC_2, top_k=1, return_embedding=True)
+
+    # check if faiss returns the same cosine similarity. Manual testing with faiss yielded 0.9746318
+    assert math.isclose(query_results[0].score, KNOWN_COSINE, abs_tol=0.00002)
diff --git a/test/test_faiss_and_milvus.py b/test/test_faiss_and_milvus.py
index 8e7ec6023e..3c45b26851 100644
--- a/test/test_faiss_and_milvus.py
+++ b/test/test_faiss_and_milvus.py
@@ -443,80 +443,3 @@ def test_faiss_passing_index_from_outside(tmp_path):
     # test if vectors ids are associated with docs
     for doc in documents_indexed:
         assert 0 <= int(doc.meta["vector_id"]) <= 7
-
-def ensure_ids_are_correct_uuids(docs:list,document_store:object)->None:
-    # Weaviate currently only supports UUIDs
-    if type(document_store)==WeaviateDocumentStore:
-        for d in docs:
-            d["id"] = str(uuid.uuid4())
-
-def test_cosine_similarity(document_store_cosine):
-    # below we will write documents to the store and then query it to see if vectors were normalized
-
-    ensure_ids_are_correct_uuids(docs=DOCUMENTS,document_store=document_store_cosine)
-    document_store_cosine.write_documents(documents=DOCUMENTS)
-
-    # note that the same query will be used later when querying after updating the embeddings
-    query = np.random.rand(768).astype(np.float32)
-
-    query_results = document_store_cosine.query_by_embedding(query_emb=query, top_k=len(DOCUMENTS), return_embedding=True)
-
-    # check if search with cosine similarity returns the correct number of results
-    assert len(query_results) == len(DOCUMENTS)
-    indexed_docs = {}
-    for doc in DOCUMENTS:
-        indexed_docs[doc["content"]] = doc["embedding"]
-
-    for doc in query_results:
-        result_emb = doc.embedding
-        original_emb = np.array([indexed_docs[doc.content]], dtype="float32")
-        document_store_cosine.normalize_embedding(original_emb[0])
-
-        # check if the stored embedding was normalized
-        assert np.allclose(original_emb[0], result_emb, rtol=0.01)
-        
-        # check if the score is plausible for cosine similarity
-        assert 0 <= doc.score <= 1.0
-
-    # now check if vectors are normalized when updating embeddings
-    class MockRetriever():
-        def embed_documents(self, docs):
-            return [np.random.rand(768).astype(np.float32) for doc in docs]
-
-    retriever = MockRetriever()
-    document_store_cosine.update_embeddings(retriever=retriever)
-    query_results = document_store_cosine.query_by_embedding(query_emb=query, top_k=len(DOCUMENTS), return_embedding=True)
-
-    for doc in query_results:
-        original_emb = np.array([indexed_docs[doc.content]], dtype="float32")
-        document_store_cosine.normalize_embedding(original_emb[0])
-        # check if the original embedding has changed after updating the embeddings
-        assert not np.allclose(original_emb[0], doc.embedding, rtol=0.01)
-
-
-def test_normalize_embeddings_diff_shapes(document_store_cosine_small):
-    VEC_1 = np.array([.1, .2, .3], dtype="float32")
-    document_store_cosine_small.normalize_embedding(VEC_1)
-    assert np.linalg.norm(VEC_1) - 1 < 0.01
-
-    VEC_1 = np.array([.1, .2, .3], dtype="float32").reshape(1, -1)
-    document_store_cosine_small.normalize_embedding(VEC_1)
-    assert np.linalg.norm(VEC_1) - 1 < 0.01
-
-
-def test_cosine_sanity_check(document_store_cosine_small):
-    VEC_1 = np.array([.1, .2, .3], dtype="float32")
-    VEC_2 = np.array([.4, .5, .6], dtype="float32")
-
-    # This is the cosine similarity of VEC_1 and VEC_2 calculated using sklearn.metrics.pairwise.cosine_similarity
-    # The score is normalized to yield a value between 0 and 1.
-    KNOWN_COSINE = (0.9746317 + 1) / 2
-
-    docs = [{"name": "vec_1", "text": "vec_1", "content": "vec_1", "embedding": VEC_1}]
-    ensure_ids_are_correct_uuids(docs=docs,document_store=document_store_cosine_small)
-    document_store_cosine_small.write_documents(documents=docs)
-
-    query_results = document_store_cosine_small.query_by_embedding(query_emb=VEC_2, top_k=1, return_embedding=True)
-
-    # check if faiss returns the same cosine similarity. Manual testing with faiss yielded 0.9746318
-    assert math.isclose(query_results[0].score, KNOWN_COSINE, abs_tol=0.00002)

From 463ba445c55c7b6732f7a1336d9373ce81d52d39 Mon Sep 17 00:00:00 2001
From: ZanSara <sarazanzo94@gmail.com>
Date: Tue, 11 Jan 2022 18:18:21 +0100
Subject: [PATCH 06/21] Properly fix MetaDocumentORM and MetaLabelORM with
 composite foreign key constraints

---
 haystack/document_stores/sql.py | 53 ++++++++++++++-------------------
 test/test_document_store.py     | 22 +++++++-------
 2 files changed, 34 insertions(+), 41 deletions(-)

diff --git a/haystack/document_stores/sql.py b/haystack/document_stores/sql.py
index a7ef00f460..3d218e9a43 100644
--- a/haystack/document_stores/sql.py
+++ b/haystack/document_stores/sql.py
@@ -4,7 +4,7 @@
 import itertools
 import numpy as np
 from uuid import uuid4
-from sqlalchemy import and_, func, create_engine, Column, String, DateTime, ForeignKey, Boolean, Text, text, JSON
+from sqlalchemy import and_, func, create_engine, Column, String, DateTime, ForeignKey, Boolean, Text, text, JSON, ForeignKeyConstraint
 from sqlalchemy.ext.declarative import declarative_base
 from sqlalchemy.orm import relationship, sessionmaker
 from sqlalchemy.sql import case, null
@@ -20,7 +20,7 @@
 class ORMBase(Base):
     __abstract__ = True
 
-    id = Column(String(100), default=lambda: str(uuid4()), unique=True, primary_key=True)
+    id = Column(String(100), default=lambda: str(uuid4()), primary_key=True)
     created_at = Column(DateTime, server_default=func.now())
     updated_at = Column(DateTime, server_default=func.now(), server_onupdate=func.now())
 
@@ -33,9 +33,6 @@ class DocumentORM(ORMBase):
     # primary key in combination with id to allow the same doc in different indices
     index = Column(String(100), nullable=False, primary_key=True)
     vector_id = Column(String(100), unique=True, nullable=True)
-
-    # labels = relationship("LabelORM", back_populates="document")
-
     # speeds up queries for get_documents_by_vector_ids() by having a single query that returns joined metadata
     meta = relationship("MetaDocumentORM", back_populates="documents", lazy="joined")
 
@@ -45,36 +42,18 @@ class MetaDocumentORM(ORMBase):
 
     name = Column(String(100), index=True)
     value = Column(String(1000), index=True)
-    document_id = Column(
-        String(100),
-        ForeignKey("document.id", ondelete="CASCADE", onupdate="CASCADE"),
-        nullable=False,
-        index=True
-    )
-
     documents = relationship("DocumentORM", back_populates="meta")
 
-
-class MetaLabelORM(ORMBase):
-    __tablename__ = "meta_label"
-
-    name = Column(String(100), index=True)
-    value = Column(String(1000), index=True)
-    label_id = Column(
-        String(100),
-        ForeignKey("label.id", ondelete="CASCADE", onupdate="CASCADE"),
-        nullable=False,
-        index=True
-    )
-
-    labels = relationship("LabelORM", back_populates="meta")
+    document_id = Column(String(100), nullable=False, index=True)
+    document_index = Column(String(100), nullable=False, index=True)
+    __table_args__ = (ForeignKeyConstraint([document_id, document_index],
+                                           [DocumentORM.id, DocumentORM.index],
+                                           ondelete="CASCADE", onupdate="CASCADE"), {})
 
 
 class LabelORM(ORMBase):
     __tablename__ = "label"
 
-    # document_id = Column(String(100), ForeignKey("document.id", ondelete="CASCADE", onupdate="CASCADE"), nullable=False)
-
     index = Column(String(100), nullable=False, primary_key=True)
     query = Column(Text, nullable=False)
     answer = Column(JSON, nullable=True)
@@ -86,7 +65,21 @@ class LabelORM(ORMBase):
     pipeline_id = Column(String(500), nullable=True)
 
     meta = relationship("MetaLabelORM", back_populates="labels", lazy="joined")
-    # document = relationship("DocumentORM", back_populates="labels")
+
+
+class MetaLabelORM(ORMBase):
+    __tablename__ = "meta_label"
+
+    name = Column(String(100), index=True)
+    value = Column(String(1000), index=True)
+    labels = relationship("LabelORM", back_populates="meta")
+
+    label_id = Column(String(100), nullable=False, index=True)
+    label_index = Column(String(100), nullable=False, index=True)
+    __table_args__ = (ForeignKeyConstraint([label_id, label_index],
+                                           [LabelORM.id, LabelORM.index],
+                                           ondelete="CASCADE", onupdate="CASCADE"), {})
+
 
 
 class SQLDocumentStore(BaseDocumentStore):
@@ -130,7 +123,7 @@ def __init__(
             engine = create_engine(url, connect_args={'check_same_thread': check_same_thread}, **create_engine_params)
         else:
             engine = create_engine(url, **create_engine_params)
-        ORMBase.metadata.create_all(engine)
+        Base.metadata.create_all(engine)
         Session = sessionmaker(bind=engine)
         self.session = Session()
         self.index: str = index
diff --git a/test/test_document_store.py b/test/test_document_store.py
index 581edb7bb7..9b05716e10 100644
--- a/test/test_document_store.py
+++ b/test/test_document_store.py
@@ -162,18 +162,18 @@ def test_get_all_documents_with_correct_filters(document_store_with_docs):
 
 
 def test_get_all_documents_with_correct_filters_legacy_sqlite(test_docs_xs, tmp_path):
-    document_store_with_docs = get_document_store("sql", tmp_path)
-    document_store_with_docs.write_documents(test_docs_xs)
+    for document_store_with_docs in get_document_store("sql", tmp_path):
+        document_store_with_docs.write_documents(test_docs_xs)
 
-    document_store_with_docs.use_windowed_query = False
-    documents = document_store_with_docs.get_all_documents(filters={"meta_field": ["test2"]})
-    assert len(documents) == 1
-    assert documents[0].meta["name"] == "filename2"
+        document_store_with_docs.use_windowed_query = False
+        documents = document_store_with_docs.get_all_documents(filters={"meta_field": ["test2"]})
+        assert len(documents) == 1
+        assert documents[0].meta["name"] == "filename2"
 
-    documents = document_store_with_docs.get_all_documents(filters={"meta_field": ["test1", "test3"]})
-    assert len(documents) == 2
-    assert {d.meta["name"] for d in documents} == {"filename1", "filename3"}
-    assert {d.meta["meta_field"] for d in documents} == {"test1", "test3"}
+        documents = document_store_with_docs.get_all_documents(filters={"meta_field": ["test1", "test3"]})
+        assert len(documents) == 2
+        assert {d.meta["name"] for d in documents} == {"filename1", "filename3"}
+        assert {d.meta["meta_field"] for d in documents} == {"test1", "test3"}
 
 
 def test_get_all_documents_with_incorrect_filter_name(document_store_with_docs):
@@ -831,7 +831,7 @@ def test_update_meta(document_store):
 
 
 @pytest.mark.parametrize("document_store_type", ["elasticsearch", "memory"])
-def test_custom_embedding_field(document_store_type):
+def test_custom_embedding_field(document_store_type, tmp_path):
     document_store = get_document_store(
         document_store_type=document_store_type, tmp_path=tmp_path, embedding_field="custom_embedding_field"
     )

From bb534eee209fbc7a43d0a81e0d92fe2fa2d353c6 Mon Sep 17 00:00:00 2001
From: ZanSara <sarazanzo94@gmail.com>
Date: Tue, 11 Jan 2022 18:39:14 +0100
Subject: [PATCH 07/21] Fix mypy

---
 haystack/document_stores/sql.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/haystack/document_stores/sql.py b/haystack/document_stores/sql.py
index 3d218e9a43..a29c90c04d 100644
--- a/haystack/document_stores/sql.py
+++ b/haystack/document_stores/sql.py
@@ -48,7 +48,7 @@ class MetaDocumentORM(ORMBase):
     document_index = Column(String(100), nullable=False, index=True)
     __table_args__ = (ForeignKeyConstraint([document_id, document_index],
                                            [DocumentORM.id, DocumentORM.index],
-                                           ondelete="CASCADE", onupdate="CASCADE"), {})
+                                           ondelete="CASCADE", onupdate="CASCADE"), {})  #type: ignore
 
 
 class LabelORM(ORMBase):
@@ -78,7 +78,7 @@ class MetaLabelORM(ORMBase):
     label_index = Column(String(100), nullable=False, index=True)
     __table_args__ = (ForeignKeyConstraint([label_id, label_index],
                                            [LabelORM.id, LabelORM.index],
-                                           ondelete="CASCADE", onupdate="CASCADE"), {})
+                                           ondelete="CASCADE", onupdate="CASCADE"), {})  #type: ignore
 
 
 

From 0e2ebfba0e6c7a235005755263ca3209111b0928 Mon Sep 17 00:00:00 2001
From: ZanSara <sarazanzo94@gmail.com>
Date: Wed, 12 Jan 2022 10:38:34 +0100
Subject: [PATCH 08/21] update_document_meta() was not using index properly

---
 haystack/document_stores/elasticsearch.py |  4 +++-
 haystack/document_stores/sql.py           | 11 +++++------
 haystack/document_stores/weaviate.py      |  4 +++-
 test/conftest.py                          |  2 +-
 test/test_document_store.py               | 18 ++++++++----------
 test/test_weaviate.py                     |  8 ++++----
 6 files changed, 24 insertions(+), 23 deletions(-)

diff --git a/haystack/document_stores/elasticsearch.py b/haystack/document_stores/elasticsearch.py
index 6e3951bba8..10e0046adf 100644
--- a/haystack/document_stores/elasticsearch.py
+++ b/haystack/document_stores/elasticsearch.py
@@ -551,10 +551,12 @@ def write_labels(
         if labels_to_index:
             bulk(self.client, labels_to_index, request_timeout=300, refresh=self.refresh_type, headers=headers)
 
-    def update_document_meta(self, id: str, meta: Dict[str, str], headers: Optional[Dict[str, str]] = None):
+    def update_document_meta(self, id: str, meta: Dict[str, str], headers: Optional[Dict[str, str]] = None, index: str = None):
         """
         Update the metadata dictionary of a document by specifying its string id
         """
+        if not index:
+            index = self.index
         body = {"doc": meta}
         self.client.update(index=self.index, id=id, body=body, refresh=self.refresh_type, headers=headers)
 
diff --git a/haystack/document_stores/sql.py b/haystack/document_stores/sql.py
index a29c90c04d..a5c3c6d493 100644
--- a/haystack/document_stores/sql.py
+++ b/haystack/document_stores/sql.py
@@ -113,12 +113,9 @@ def __init__(
         self.set_config(
                 url=url, index=index, label_index=label_index, duplicate_documents=duplicate_documents, check_same_thread=check_same_thread
         )
-
         create_engine_params = {}
         if isolation_level:
             create_engine_params["isolation_level"] = isolation_level
-        
-        logging.warning(f"create engine params: {create_engine_params}")
         if "sqlite" in url:
             engine = create_engine(url, connect_args={'check_same_thread': check_same_thread}, **create_engine_params)
         else:
@@ -461,12 +458,14 @@ def reset_vector_ids(self, index: Optional[str] = None):
         self.session.query(DocumentORM).filter_by(index=index).update({DocumentORM.vector_id: null()})
         self.session.commit()
 
-    def update_document_meta(self, id: str, meta: Dict[str, str]):
+    def update_document_meta(self, id: str, meta: Dict[str, str], index: str = None):
         """
         Update the metadata dictionary of a document by specifying its string id
         """
-        self.session.query(MetaDocumentORM).filter_by(document_id=id).delete()
-        meta_orms = [MetaDocumentORM(name=key, value=value, document_id=id) for key, value in meta.items()]
+        if not index:
+            index = self.index
+        self.session.query(MetaDocumentORM).filter_by(document_id=id, document_index=index).delete()
+        meta_orms = [MetaDocumentORM(name=key, value=value, document_id=id, document_index=index) for key, value in meta.items()]
         for m in meta_orms:
             self.session.add(m)
         self.session.commit()
diff --git a/haystack/document_stores/weaviate.py b/haystack/document_stores/weaviate.py
index e17ff98fbe..2f4561cb94 100644
--- a/haystack/document_stores/weaviate.py
+++ b/haystack/document_stores/weaviate.py
@@ -483,10 +483,12 @@ def write_documents(
                 progress_bar.update(batch_size)
         progress_bar.close()
 
-    def update_document_meta(self, id: str, meta: Dict[str, str]):
+    def update_document_meta(self, id: str, meta: Dict[str, str], index: str = None):
         """
         Update the metadata dictionary of a document by specifying its string id.
         """
+        if not index:
+            index = self.index
         self.weaviate_client.data_object.update(meta, class_name=self.index, uuid=id)
 
     def get_embedding_count(self, filters: Optional[Dict[str, List[str]]] = None, index: Optional[str] = None) -> int:
diff --git a/test/conftest.py b/test/conftest.py
index b2bf0536cb..aadde5368e 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -44,7 +44,7 @@
 
 # To manually run the tests with default PostgreSQL instead of SQLite, switch the lines below
 SQL_TYPE = "sqlite"
-# SQL_TYPE = "postgres"
+#SQL_TYPE = "postgres"
 
 
 def pytest_addoption(parser):
diff --git a/test/test_document_store.py b/test/test_document_store.py
index 9b05716e10..44029aa797 100644
--- a/test/test_document_store.py
+++ b/test/test_document_store.py
@@ -804,7 +804,7 @@ def test_multilabel_no_answer(document_store):
     assert len(multi_labels[0].answers) == 1
 
 
-@pytest.mark.parametrize("document_store", ["elasticsearch", "faiss"], indirect=True)
+@pytest.mark.parametrize("document_store", ["elasticsearch", "faiss", "milvus", "weaviate"], indirect=True)
 # Currently update_document_meta() is not implemented for Memory doc store
 def test_update_meta(document_store):
     documents = [
@@ -832,15 +832,13 @@ def test_update_meta(document_store):
 
 @pytest.mark.parametrize("document_store_type", ["elasticsearch", "memory"])
 def test_custom_embedding_field(document_store_type, tmp_path):
-    document_store = get_document_store(
-        document_store_type=document_store_type, tmp_path=tmp_path, embedding_field="custom_embedding_field"
-    )
-    doc_to_write = {"content": "test", "custom_embedding_field": np.random.rand(768).astype(np.float32)}
-    document_store.write_documents([doc_to_write])
-    documents = document_store.get_all_documents(return_embedding=True)
-    assert len(documents) == 1
-    assert documents[0].content == "test"
-    np.testing.assert_array_equal(doc_to_write["custom_embedding_field"], documents[0].embedding)
+    for document_store in get_document_store(document_store_type=document_store_type, tmp_path=tmp_path, embedding_field="custom_embedding_field"):
+        doc_to_write = {"content": "test", "custom_embedding_field": np.random.rand(768).astype(np.float32)}
+        document_store.write_documents([doc_to_write])
+        documents = document_store.get_all_documents(return_embedding=True)
+        assert len(documents) == 1
+        assert documents[0].content == "test"
+        np.testing.assert_array_equal(doc_to_write["custom_embedding_field"], documents[0].embedding)
 
 
 @pytest.mark.parametrize("document_store", ["elasticsearch"], indirect=True)
diff --git a/test/test_weaviate.py b/test/test_weaviate.py
index b6a3de9775..49ece2f52c 100644
--- a/test/test_weaviate.py
+++ b/test/test_weaviate.py
@@ -30,16 +30,16 @@ def get_uuid():
 
 
 @pytest.fixture(params=["weaviate"])
-def document_store_with_docs(request):
-    document_store = get_document_store(request.param)
+def document_store_with_docs(request, tmp_path):
+    document_store = get_document_store(request.param, tmp_path=tmp_path)
     document_store.write_documents(DOCUMENTS_XS)
     yield document_store
     document_store.delete_documents()
 
 
 @pytest.fixture(params=["weaviate"])
-def document_store(request):
-    document_store = get_document_store(request.param)
+def document_store(request, tmp_path):
+    document_store = get_document_store(request.param, tmp_path=tmp_path)
     yield document_store
     document_store.delete_documents()
 

From c1589a898c8cd3560869ff60192e82e45f817bb0 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 12 Jan 2022 09:39:27 +0000
Subject: [PATCH 09/21] Add latest docstring and tutorial changes

---
 docs/_src/api/api/document_store.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/_src/api/api/document_store.md b/docs/_src/api/api/document_store.md
index cb14d575b3..eaf9747e21 100644
--- a/docs/_src/api/api/document_store.md
+++ b/docs/_src/api/api/document_store.md
@@ -380,7 +380,7 @@ Write annotation labels into document store.
 #### update\_document\_meta
 
 ```python
- | update_document_meta(id: str, meta: Dict[str, str], headers: Optional[Dict[str, str]] = None)
+ | update_document_meta(id: str, meta: Dict[str, str], headers: Optional[Dict[str, str]] = None, index: str = None)
 ```
 
 Update the metadata dictionary of a document by specifying its string id
@@ -1095,7 +1095,7 @@ Set vector IDs for all documents as None
 #### update\_document\_meta
 
 ```python
- | update_document_meta(id: str, meta: Dict[str, str])
+ | update_document_meta(id: str, meta: Dict[str, str], index: str = None)
 ```
 
 Update the metadata dictionary of a document by specifying its string id
@@ -1863,7 +1863,7 @@ None
 #### update\_document\_meta
 
 ```python
- | update_document_meta(id: str, meta: Dict[str, str])
+ | update_document_meta(id: str, meta: Dict[str, str], index: str = None)
 ```
 
 Update the metadata dictionary of a document by specifying its string id.

From 681b0365f91b09435d6f2a0553f73d3779c3508d Mon Sep 17 00:00:00 2001
From: ZanSara <sarazanzo94@gmail.com>
Date: Wed, 12 Jan 2022 12:25:17 +0100
Subject: [PATCH 10/21] Another small fix for test_weaviate.py

---
 test/conftest.py      |  2 +-
 test/test_weaviate.py | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/test/conftest.py b/test/conftest.py
index aadde5368e..05773bb773 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -558,7 +558,7 @@ def teardown_postgres():
 
 def get_document_store(document_store_type, tmp_path, embedding_dim=768, embedding_field="embedding", index="haystack_test", similarity:str="dot_product"):
     
-    if SQL_TYPE == "postgres" and document_store_type in ["faiss", "milvus"]:
+    if SQL_TYPE == "postgres" and document_store_type in ["faiss", "milvus", "sql"]:
         logging.warning("Setting up PostgreSQL...")
         setup_postgres()
     
diff --git a/test/test_weaviate.py b/test/test_weaviate.py
index 49ece2f52c..60ddb8ca1c 100644
--- a/test/test_weaviate.py
+++ b/test/test_weaviate.py
@@ -31,17 +31,17 @@ def get_uuid():
 
 @pytest.fixture(params=["weaviate"])
 def document_store_with_docs(request, tmp_path):
-    document_store = get_document_store(request.param, tmp_path=tmp_path)
-    document_store.write_documents(DOCUMENTS_XS)
-    yield document_store
-    document_store.delete_documents()
+    for document_store in get_document_store(request.param, tmp_path=tmp_path):
+        document_store.write_documents(DOCUMENTS_XS)
+        yield document_store
+        document_store.delete_documents()
 
 
 @pytest.fixture(params=["weaviate"])
 def document_store(request, tmp_path):
-    document_store = get_document_store(request.param, tmp_path=tmp_path)
-    yield document_store
-    document_store.delete_documents()
+    for document_store in get_document_store(request.param, tmp_path=tmp_path):
+        yield document_store
+        document_store.delete_documents()
 
 
 @pytest.mark.weaviate

From 65e7bf3f11a0b4c30f9807c040b380b5fb60f0f6 Mon Sep 17 00:00:00 2001
From: ZanSara <sarazanzo94@gmail.com>
Date: Wed, 12 Jan 2022 13:52:12 +0100
Subject: [PATCH 11/21] Improve dosctrings for 'isolation_level'

---
 haystack/document_stores/faiss.py    | 1 +
 haystack/document_stores/milvus.py   | 1 +
 haystack/document_stores/milvus2x.py | 3 ++-
 haystack/document_stores/sql.py      | 2 +-
 4 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/haystack/document_stores/faiss.py b/haystack/document_stores/faiss.py
index 676f49f84b..f79568b7c5 100644
--- a/haystack/document_stores/faiss.py
+++ b/haystack/document_stores/faiss.py
@@ -95,6 +95,7 @@ def __init__(
             If specified no other params besides faiss_config_path must be specified.
         :param faiss_config_path: Stored FAISS initial configuration parameters.
             Can be created via calling `save()`
+        :param isolation_level: see SQLAlchemy's `isolation_level` parameter for `create_engine()` (https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine.params.isolation_level)
         """
         # special case if we want to load an existing index from disk
         # load init params from disk and run init again
diff --git a/haystack/document_stores/milvus.py b/haystack/document_stores/milvus.py
index 538392c1bc..4eeec700bf 100644
--- a/haystack/document_stores/milvus.py
+++ b/haystack/document_stores/milvus.py
@@ -98,6 +98,7 @@ def __init__(
                                     overwrite: Update any existing documents with the same ID when adding documents.
                                     fail: an error is raised if the document ID of the document being added already
                                     exists.
+        :param isolation_level: see SQLAlchemy's `isolation_level` parameter for `create_engine()` (https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine.params.isolation_level)
         """
         # save init parameters to enable export of component config as YAML
         self.set_config(
diff --git a/haystack/document_stores/milvus2x.py b/haystack/document_stores/milvus2x.py
index 026c09bcea..4ea6626612 100644
--- a/haystack/document_stores/milvus2x.py
+++ b/haystack/document_stores/milvus2x.py
@@ -73,7 +73,7 @@ def __init__(
             custom_fields: Optional[List[Any]] = None,
             progress_bar: bool = True,
             duplicate_documents: str = 'overwrite',
-            isolation_level: str = None,
+            isolation_level: str = None
     ):
         """
         :param sql_url: SQL connection URL for storing document texts and metadata. It defaults to a local, file based SQLite DB. For large scale
@@ -119,6 +119,7 @@ def __init__(
                                     overwrite: Update any existing documents with the same ID when adding documents.
                                     fail: an error is raised if the document ID of the document being added already
                                     exists.
+        :param isolation_level: see SQLAlchemy's `isolation_level` parameter for `create_engine()` (https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine.params.isolation_level)
         """
 
         # save init parameters to enable export of component config as YAML
diff --git a/haystack/document_stores/sql.py b/haystack/document_stores/sql.py
index a5c3c6d493..7ef5db7912 100644
--- a/haystack/document_stores/sql.py
+++ b/haystack/document_stores/sql.py
@@ -106,7 +106,7 @@ def __init__(
                                     fail: an error is raised if the document ID of the document being added already
                                     exists.
         :param check_same_thread: Set to False to mitigate multithreading issues in older SQLite versions (see https://docs.sqlalchemy.org/en/14/dialects/sqlite.html?highlight=check_same_thread#threading-pooling-behavior) 
-        :param isolation_level: see SQLAlchemy's `isolation_level` parameter for `create_engine()`
+        :param isolation_level: see SQLAlchemy's `isolation_level` parameter for `create_engine()` (https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine.params.isolation_level)
         """
 
         # save init parameters to enable export of component config as YAML

From 9c3d8fa4cc4c93b360d604eb339c4b101a070bc2 Mon Sep 17 00:00:00 2001
From: "github-actions[bot]"
 <41898282+github-actions[bot]@users.noreply.github.com>
Date: Wed, 12 Jan 2022 12:53:05 +0000
Subject: [PATCH 12/21] Add latest docstring and tutorial changes

---
 docs/_src/api/api/document_store.md | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/docs/_src/api/api/document_store.md b/docs/_src/api/api/document_store.md
index eaf9747e21..bd37d120b5 100644
--- a/docs/_src/api/api/document_store.md
+++ b/docs/_src/api/api/document_store.md
@@ -970,7 +970,7 @@ An SQL backed DocumentStore. Currently supports SQLite, PostgreSQL and MySQL bac
                             fail: an error is raised if the document ID of the document being added already
                             exists.
 - `check_same_thread`: Set to False to mitigate multithreading issues in older SQLite versions (see https://docs.sqlalchemy.org/en/14/dialects/sqlite.html?highlight=check_same_thread#threading-pooling-behavior)
-- `isolation_level`: see SQLAlchemy's `isolation_level` parameter for `create_engine()`
+- `isolation_level`: see SQLAlchemy's `isolation_level` parameter for `create_engine()` (https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine.params.isolation_level)
 
 <a name="sql.SQLDocumentStore.get_document_by_id"></a>
 #### get\_document\_by\_id
@@ -1249,6 +1249,7 @@ the vector embeddings are indexed in a FAISS Index.
     If specified no other params besides faiss_config_path must be specified.
 - `faiss_config_path`: Stored FAISS initial configuration parameters.
     Can be created via calling `save()`
+- `isolation_level`: see SQLAlchemy's `isolation_level` parameter for `create_engine()` (https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine.params.isolation_level)
 
 <a name="faiss.FAISSDocumentStore.write_documents"></a>
 #### write\_documents
@@ -1526,6 +1527,7 @@ Note that an overly large index_file_size value may cause failure to load a segm
                             overwrite: Update any existing documents with the same ID when adding documents.
                             fail: an error is raised if the document ID of the document being added already
                             exists.
+- `isolation_level`: see SQLAlchemy's `isolation_level` parameter for `create_engine()` (https://docs.sqlalchemy.org/en/14/core/engines.html#sqlalchemy.create_engine.params.isolation_level)
 
 <a name="milvus.MilvusDocumentStore.write_documents"></a>
 #### write\_documents

From 115f072b61a7495564aa36f0aada81d0ff2d3f99 Mon Sep 17 00:00:00 2001
From: ZanSara <sarazanzo94@gmail.com>
Date: Thu, 13 Jan 2022 09:50:44 +0100
Subject: [PATCH 13/21] Implement Thomas' feedback

---
 haystack/document_stores/elasticsearch.py |  2 +-
 haystack/document_stores/weaviate.py      |  2 +-
 test/conftest.py                          | 72 ++++++++++++++---------
 test/test_document_store.py               | 34 +++++------
 test/test_weaviate.py                     | 14 ++---
 5 files changed, 69 insertions(+), 55 deletions(-)

diff --git a/haystack/document_stores/elasticsearch.py b/haystack/document_stores/elasticsearch.py
index 10e0046adf..4a4072004d 100644
--- a/haystack/document_stores/elasticsearch.py
+++ b/haystack/document_stores/elasticsearch.py
@@ -558,7 +558,7 @@ def update_document_meta(self, id: str, meta: Dict[str, str], headers: Optional[
         if not index:
             index = self.index
         body = {"doc": meta}
-        self.client.update(index=self.index, id=id, body=body, refresh=self.refresh_type, headers=headers)
+        self.client.update(index=index, id=id, body=body, refresh=self.refresh_type, headers=headers)
 
     def get_document_count(self, filters: Optional[Dict[str, List[str]]] = None, index: Optional[str] = None,
                            only_documents_without_embedding: bool = False, headers: Optional[Dict[str, str]] = None) -> int:
diff --git a/haystack/document_stores/weaviate.py b/haystack/document_stores/weaviate.py
index 2f4561cb94..508e33631c 100644
--- a/haystack/document_stores/weaviate.py
+++ b/haystack/document_stores/weaviate.py
@@ -489,7 +489,7 @@ def update_document_meta(self, id: str, meta: Dict[str, str], index: str = None)
         """
         if not index:
             index = self.index
-        self.weaviate_client.data_object.update(meta, class_name=self.index, uuid=id)
+        self.weaviate_client.data_object.update(meta, class_name=index, uuid=id)
 
     def get_embedding_count(self, filters: Optional[Dict[str, List[str]]] = None, index: Optional[str] = None) -> int:
         """
diff --git a/test/conftest.py b/test/conftest.py
index 05773bb773..44e23d295b 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -44,7 +44,7 @@
 
 # To manually run the tests with default PostgreSQL instead of SQLite, switch the lines below
 SQL_TYPE = "sqlite"
-#SQL_TYPE = "postgres"
+# SQL_TYPE = "postgres"
 
 
 def pytest_addoption(parser):
@@ -486,43 +486,57 @@ def get_retriever(retriever_type, document_store):
 
 @pytest.fixture(params=["elasticsearch", "faiss", "memory", "milvus", "weaviate"])
 def document_store_with_docs(request, test_docs_xs, tmp_path):
-    for document_store in get_document_store(request.param, tmp_path=tmp_path):
-        document_store.write_documents(test_docs_xs)
-        yield document_store
-        document_store.delete_documents()
+    document_store = get_document_store(request.param, tmp_path=tmp_path)
+    document_store.write_documents(test_docs_xs)
+    yield document_store
+    document_store.delete_documents()
 
 @pytest.fixture
 def document_store(request, tmp_path):
     embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(768))
-    for document_store in get_document_store(request.param, embedding_dim=embedding_dim.args[0], tmp_path=tmp_path):
-        yield document_store
-        document_store.delete_documents()
+    document_store = get_document_store(request.param, embedding_dim=embedding_dim.args[0], tmp_path=tmp_path)
+    yield document_store
+    document_store.delete_documents()
 
 @pytest.fixture(params=["faiss", "milvus", "weaviate"])
 def document_store_cosine(request, tmp_path):
     embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(768))
-    for document_store in get_document_store(request.param, embedding_dim=embedding_dim.args[0], similarity="cosine", tmp_path=tmp_path):
-        yield document_store
-        document_store.delete_documents()
+    document_store = get_document_store(request.param, embedding_dim=embedding_dim.args[0], similarity="cosine", tmp_path=tmp_path)
+    yield document_store
+    document_store.delete_documents()
 
 @pytest.fixture(params=["elasticsearch", "faiss", "memory", "milvus", "weaviate"])
 def document_store_cosine_small(request, tmp_path):
     embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(3))
-    for document_store in get_document_store(request.param, embedding_dim=embedding_dim.args[0], similarity="cosine", tmp_path=tmp_path):
-        yield document_store
-        document_store.delete_documents()
+    document_store = get_document_store(request.param, embedding_dim=embedding_dim.args[0], similarity="cosine", tmp_path=tmp_path)
+    yield document_store
+    document_store.delete_documents()
 
 
-@pytest.fixture
-def sql_url(tmp_path):
+@pytest.fixture(scope="function", autouse=True)
+def postgres_fixture():
     if SQL_TYPE == "postgres":
-        try:
-            setup_postgres()
-            yield get_sql_url(tmp_path)
-        finally:
-            teardown_postgres()
+        setup_postgres()
+        yield
+        teardown_postgres()
     else:
-        yield get_sql_url(tmp_path)
+        yield
+
+
+# @pytest.fixture
+# def sql_url(tmp_path):
+#     if SQL_TYPE == "postgres":
+#         try:
+#             setup_postgres()
+#             yield get_sql_url(tmp_path)
+#         finally:
+#             teardown_postgres()
+#     else:
+#         yield get_sql_url(tmp_path)
+
+@pytest.fixture
+def sql_url(tmp_path):
+    return  get_sql_url(tmp_path)
 
 
 def get_sql_url(tmp_path):
@@ -558,9 +572,9 @@ def teardown_postgres():
 
 def get_document_store(document_store_type, tmp_path, embedding_dim=768, embedding_field="embedding", index="haystack_test", similarity:str="dot_product"):
     
-    if SQL_TYPE == "postgres" and document_store_type in ["faiss", "milvus", "sql"]:
-        logging.warning("Setting up PostgreSQL...")
-        setup_postgres()
+    # if SQL_TYPE == "postgres" and document_store_type in ["faiss", "milvus", "sql"]:
+    #     logging.warning("Setting up PostgreSQL...")
+    #     setup_postgres()
     
     if document_store_type == "sql":
         document_store = SQLDocumentStore(url=get_sql_url(tmp_path), index=index, isolation_level="AUTOCOMMIT")
@@ -615,11 +629,11 @@ def get_document_store(document_store_type, tmp_path, embedding_dim=768, embeddi
     else:
         raise Exception(f"No document store fixture for '{document_store_type}'")
 
-    yield document_store
+    return document_store
 
-    if SQL_TYPE == "postgres" and document_store_type in ["faiss", "milvus"]:
-        logging.warning("Tearing down PostgreSQL...")
-        teardown_postgres()
+    # if SQL_TYPE == "postgres" and document_store_type in ["faiss", "milvus"]:
+    #     logging.warning("Tearing down PostgreSQL...")
+    #     teardown_postgres()
 
 
 @pytest.fixture(scope="function")
diff --git a/test/test_document_store.py b/test/test_document_store.py
index 44029aa797..eb5b79cc64 100644
--- a/test/test_document_store.py
+++ b/test/test_document_store.py
@@ -162,18 +162,18 @@ def test_get_all_documents_with_correct_filters(document_store_with_docs):
 
 
 def test_get_all_documents_with_correct_filters_legacy_sqlite(test_docs_xs, tmp_path):
-    for document_store_with_docs in get_document_store("sql", tmp_path):
-        document_store_with_docs.write_documents(test_docs_xs)
+    document_store_with_docs = get_document_store("sql", tmp_path)
+    document_store_with_docs.write_documents(test_docs_xs)
 
-        document_store_with_docs.use_windowed_query = False
-        documents = document_store_with_docs.get_all_documents(filters={"meta_field": ["test2"]})
-        assert len(documents) == 1
-        assert documents[0].meta["name"] == "filename2"
+    document_store_with_docs.use_windowed_query = False
+    documents = document_store_with_docs.get_all_documents(filters={"meta_field": ["test2"]})
+    assert len(documents) == 1
+    assert documents[0].meta["name"] == "filename2"
 
-        documents = document_store_with_docs.get_all_documents(filters={"meta_field": ["test1", "test3"]})
-        assert len(documents) == 2
-        assert {d.meta["name"] for d in documents} == {"filename1", "filename3"}
-        assert {d.meta["meta_field"] for d in documents} == {"test1", "test3"}
+    documents = document_store_with_docs.get_all_documents(filters={"meta_field": ["test1", "test3"]})
+    assert len(documents) == 2
+    assert {d.meta["name"] for d in documents} == {"filename1", "filename3"}
+    assert {d.meta["meta_field"] for d in documents} == {"test1", "test3"}
 
 
 def test_get_all_documents_with_incorrect_filter_name(document_store_with_docs):
@@ -832,13 +832,13 @@ def test_update_meta(document_store):
 
 @pytest.mark.parametrize("document_store_type", ["elasticsearch", "memory"])
 def test_custom_embedding_field(document_store_type, tmp_path):
-    for document_store in get_document_store(document_store_type=document_store_type, tmp_path=tmp_path, embedding_field="custom_embedding_field"):
-        doc_to_write = {"content": "test", "custom_embedding_field": np.random.rand(768).astype(np.float32)}
-        document_store.write_documents([doc_to_write])
-        documents = document_store.get_all_documents(return_embedding=True)
-        assert len(documents) == 1
-        assert documents[0].content == "test"
-        np.testing.assert_array_equal(doc_to_write["custom_embedding_field"], documents[0].embedding)
+    document_store = get_document_store(document_store_type=document_store_type, tmp_path=tmp_path, embedding_field="custom_embedding_field")
+    doc_to_write = {"content": "test", "custom_embedding_field": np.random.rand(768).astype(np.float32)}
+    document_store.write_documents([doc_to_write])
+    documents = document_store.get_all_documents(return_embedding=True)
+    assert len(documents) == 1
+    assert documents[0].content == "test"
+    np.testing.assert_array_equal(doc_to_write["custom_embedding_field"], documents[0].embedding)
 
 
 @pytest.mark.parametrize("document_store", ["elasticsearch"], indirect=True)
diff --git a/test/test_weaviate.py b/test/test_weaviate.py
index 60ddb8ca1c..49ece2f52c 100644
--- a/test/test_weaviate.py
+++ b/test/test_weaviate.py
@@ -31,17 +31,17 @@ def get_uuid():
 
 @pytest.fixture(params=["weaviate"])
 def document_store_with_docs(request, tmp_path):
-    for document_store in get_document_store(request.param, tmp_path=tmp_path):
-        document_store.write_documents(DOCUMENTS_XS)
-        yield document_store
-        document_store.delete_documents()
+    document_store = get_document_store(request.param, tmp_path=tmp_path)
+    document_store.write_documents(DOCUMENTS_XS)
+    yield document_store
+    document_store.delete_documents()
 
 
 @pytest.fixture(params=["weaviate"])
 def document_store(request, tmp_path):
-    for document_store in get_document_store(request.param, tmp_path=tmp_path):
-        yield document_store
-        document_store.delete_documents()
+    document_store = get_document_store(request.param, tmp_path=tmp_path)
+    yield document_store
+    document_store.delete_documents()
 
 
 @pytest.mark.weaviate

From d2a6ad5d88ade265fe9419505cd36dbf188e4bfb Mon Sep 17 00:00:00 2001
From: ZanSara <sarazanzo94@gmail.com>
Date: Thu, 13 Jan 2022 11:36:49 +0100
Subject: [PATCH 14/21] Fix bug introduced in merge

---
 test/conftest.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/test/conftest.py b/test/conftest.py
index cf0043e100..d0c3e0f1af 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -487,7 +487,7 @@ def get_retriever(retriever_type, document_store):
 @pytest.fixture(params=["elasticsearch", "faiss", "memory", "milvus", "weaviate"])
 def document_store_with_docs(request, test_docs_xs, tmp_path):
     embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(768))
-    document_store = get_document_store(request.param, embedding_dim.args[0], tmp_path=tmp_path)
+    document_store = get_document_store(document_store_type=request.param, embedding_dim=embedding_dim.args[0], tmp_path=tmp_path)
     document_store.write_documents(test_docs_xs)
     yield document_store
     document_store.delete_documents()
@@ -495,21 +495,21 @@ def document_store_with_docs(request, test_docs_xs, tmp_path):
 @pytest.fixture
 def document_store(request, tmp_path):
     embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(768))
-    document_store = get_document_store(request.param, embedding_dim=embedding_dim.args[0], tmp_path=tmp_path)
+    document_store = get_document_store(document_store_type=request.param, embedding_dim=embedding_dim=embedding_dim.args[0], tmp_path=tmp_path)
     yield document_store
     document_store.delete_documents()
 
 @pytest.fixture(params=["memory", "faiss", "milvus", "elasticsearch"])
 def document_store_dot_product(request, tmp_path):
     embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(768))
-    document_store = get_document_store(request.param, embedding_dim.args[0], similarity="dot_product", tmp_path=tmp_path)
+    document_store = get_document_store(document_store_type=request.param, embedding_dim=embedding_dim.args[0], similarity="dot_product", tmp_path=tmp_path)
     yield document_store
     document_store.delete_documents()
 
 @pytest.fixture(params=["memory", "faiss", "milvus", "elasticsearch"])
 def document_store_dot_product_with_docs(request, test_docs_xs, tmp_path):
     embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(768))
-    document_store = get_document_store(request.param, embedding_dim.args[0], similarity="dot_product", tmp_path=tmp_path)
+    document_store = get_document_store(document_store_type=request.param, embedding_dim=embedding_dim.args[0], similarity="dot_product", tmp_path=tmp_path)
     document_store.write_documents(test_docs_xs)
     yield document_store
     document_store.delete_documents()
@@ -517,14 +517,14 @@ def document_store_dot_product_with_docs(request, test_docs_xs, tmp_path):
 @pytest.fixture(params=["elasticsearch", "faiss", "memory", "milvus"])
 def document_store_dot_product_small(request, tmp_path):
     embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(3))
-    document_store = get_document_store(request.param, embedding_dim.args[0], similarity="dot_product", tmp_path=tmp_path)
+    document_store = get_document_store(document_store_type=request.param, embedding_dim=embedding_dim.args[0], similarity="dot_product", tmp_path=tmp_path)
     yield document_store
     document_store.delete_documents()
 
 @pytest.fixture(params=["elasticsearch", "faiss", "memory", "milvus", "weaviate"])
 def document_store_cosine_small(request, tmp_path):
     embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(3))
-    document_store = get_document_store(request.param, embedding_dim=embedding_dim.args[0], similarity="cosine", tmp_path=tmp_path)
+    document_store = get_document_store(document_store_type=request.param, embedding_dim=embedding_dim.args[0], similarity="cosine", tmp_path=tmp_path)
     yield document_store
     document_store.delete_documents()
 

From b96f83bb99a2beace8b862238572a4c7cf91cdb5 Mon Sep 17 00:00:00 2001
From: ZanSara <sarazanzo94@gmail.com>
Date: Thu, 13 Jan 2022 13:42:54 +0100
Subject: [PATCH 15/21] Typo

---
 test/conftest.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/conftest.py b/test/conftest.py
index d0c3e0f1af..426193e438 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -495,7 +495,7 @@ def document_store_with_docs(request, test_docs_xs, tmp_path):
 @pytest.fixture
 def document_store(request, tmp_path):
     embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(768))
-    document_store = get_document_store(document_store_type=request.param, embedding_dim=embedding_dim=embedding_dim.args[0], tmp_path=tmp_path)
+    document_store = get_document_store(document_store_type=request.param, embedding_dim=embedding_dim.args[0], tmp_path=tmp_path)
     yield document_store
     document_store.delete_documents()
 

From 0cb19f4f5a82f6d7ae025068d4e7ad7c634b3c84 Mon Sep 17 00:00:00 2001
From: ZanSara <sarazanzo94@gmail.com>
Date: Thu, 13 Jan 2022 14:05:52 +0100
Subject: [PATCH 16/21] Another typo from the merge

---
 test/conftest.py              |  2 +-
 test/test_document_store.py   | 32 +++++++-------
 test/test_faiss_and_milvus.py | 79 -----------------------------------
 3 files changed, 17 insertions(+), 96 deletions(-)

diff --git a/test/conftest.py b/test/conftest.py
index 426193e438..06abc204d0 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -522,7 +522,7 @@ def document_store_dot_product_small(request, tmp_path):
     document_store.delete_documents()
 
 @pytest.fixture(params=["elasticsearch", "faiss", "memory", "milvus", "weaviate"])
-def document_store_cosine_small(request, tmp_path):
+def document_store_small(request, tmp_path):
     embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(3))
     document_store = get_document_store(document_store_type=request.param, embedding_dim=embedding_dim.args[0], similarity="cosine", tmp_path=tmp_path)
     yield document_store
diff --git a/test/test_document_store.py b/test/test_document_store.py
index 0e7cb69198..3038026fb1 100644
--- a/test/test_document_store.py
+++ b/test/test_document_store.py
@@ -1014,17 +1014,17 @@ def ensure_ids_are_correct_uuids(docs:list,document_store:object)->None:
         for d in docs:
             d["id"] = str(uuid.uuid4())
 
-    
-def test_cosine_similarity(document_store_cosine):
+
+def test_cosine_similarity(document_store):
     # below we will write documents to the store and then query it to see if vectors were normalized
 
-    ensure_ids_are_correct_uuids(docs=DOCUMENTS, document_store=document_store_cosine)
-    document_store_cosine.write_documents(documents=DOCUMENTS)
+    ensure_ids_are_correct_uuids(docs=DOCUMENTS,document_store=document_store)
+    document_store.write_documents(documents=DOCUMENTS)
 
     # note that the same query will be used later when querying after updating the embeddings
     query = np.random.rand(768).astype(np.float32)
 
-    query_results = document_store_cosine.query_by_embedding(query_emb=query, top_k=len(DOCUMENTS), return_embedding=True)
+    query_results = document_store.query_by_embedding(query_emb=query, top_k=len(DOCUMENTS), return_embedding=True)
 
     # check if search with cosine similarity returns the correct number of results
     assert len(query_results) == len(DOCUMENTS)
@@ -1035,7 +1035,7 @@ def test_cosine_similarity(document_store_cosine):
     for doc in query_results:
         result_emb = doc.embedding
         original_emb = np.array([indexed_docs[doc.content]], dtype="float32")
-        document_store_cosine.normalize_embedding(original_emb[0])
+        document_store.normalize_embedding(original_emb[0])
 
         # check if the stored embedding was normalized
         assert np.allclose(original_emb[0], result_emb, rtol=0.01)
@@ -1049,27 +1049,27 @@ def embed_documents(self, docs):
             return [np.random.rand(768).astype(np.float32) for doc in docs]
 
     retriever = MockRetriever()
-    document_store_cosine.update_embeddings(retriever=retriever)
-    query_results = document_store_cosine.query_by_embedding(query_emb=query, top_k=len(DOCUMENTS), return_embedding=True)
+    document_store.update_embeddings(retriever=retriever)
+    query_results = document_store.query_by_embedding(query_emb=query, top_k=len(DOCUMENTS), return_embedding=True)
 
     for doc in query_results:
         original_emb = np.array([indexed_docs[doc.content]], dtype="float32")
-        document_store_cosine.normalize_embedding(original_emb[0])
+        document_store.normalize_embedding(original_emb[0])
         # check if the original embedding has changed after updating the embeddings
         assert not np.allclose(original_emb[0], doc.embedding, rtol=0.01)
 
 
-def test_normalize_embeddings_diff_shapes(document_store_cosine_small):
+def test_normalize_embeddings_diff_shapes(document_store_dot_product_small):
     VEC_1 = np.array([.1, .2, .3], dtype="float32")
-    document_store_cosine_small.normalize_embedding(VEC_1)
+    document_store_dot_product_small.normalize_embedding(VEC_1)
     assert np.linalg.norm(VEC_1) - 1 < 0.01
 
     VEC_1 = np.array([.1, .2, .3], dtype="float32").reshape(1, -1)
-    document_store_cosine_small.normalize_embedding(VEC_1)
+    document_store_dot_product_small.normalize_embedding(VEC_1)
     assert np.linalg.norm(VEC_1) - 1 < 0.01
 
 
-def test_cosine_sanity_check(document_store_cosine_small):
+def test_cosine_sanity_check(document_store_small):
     VEC_1 = np.array([.1, .2, .3], dtype="float32")
     VEC_2 = np.array([.4, .5, .6], dtype="float32")
 
@@ -1078,10 +1078,10 @@ def test_cosine_sanity_check(document_store_cosine_small):
     KNOWN_COSINE = (0.9746317 + 1) / 2
 
     docs = [{"name": "vec_1", "text": "vec_1", "content": "vec_1", "embedding": VEC_1}]
-    ensure_ids_are_correct_uuids(docs=docs,document_store=document_store_cosine_small)
-    document_store_cosine_small.write_documents(documents=docs)
+    ensure_ids_are_correct_uuids(docs=docs,document_store=document_store_small)
+    document_store_small.write_documents(documents=docs)
 
-    query_results = document_store_cosine_small.query_by_embedding(query_emb=VEC_2, top_k=1, return_embedding=True)
+    query_results = document_store_small.query_by_embedding(query_emb=VEC_2, top_k=1, return_embedding=True)
 
     # check if faiss returns the same cosine similarity. Manual testing with faiss yielded 0.9746318
     assert math.isclose(query_results[0].score, KNOWN_COSINE, abs_tol=0.00002)
diff --git a/test/test_faiss_and_milvus.py b/test/test_faiss_and_milvus.py
index 7e4fc01b23..965c28f507 100644
--- a/test/test_faiss_and_milvus.py
+++ b/test/test_faiss_and_milvus.py
@@ -445,82 +445,3 @@ def test_faiss_passing_index_from_outside(tmp_path):
     # test if vectors ids are associated with docs
     for doc in documents_indexed:
         assert 0 <= int(doc.meta["vector_id"]) <= 7
-
-
-def ensure_ids_are_correct_uuids(docs:list,document_store:object)->None:
-    # Weaviate currently only supports UUIDs
-    if type(document_store)==WeaviateDocumentStore:
-        for d in docs:
-            d["id"] = str(uuid.uuid4())
-
-def test_cosine_similarity(document_store):
-    # below we will write documents to the store and then query it to see if vectors were normalized
-
-    ensure_ids_are_correct_uuids(docs=DOCUMENTS,document_store=document_store)
-    document_store.write_documents(documents=DOCUMENTS)
-
-    # note that the same query will be used later when querying after updating the embeddings
-    query = np.random.rand(768).astype(np.float32)
-
-    query_results = document_store.query_by_embedding(query_emb=query, top_k=len(DOCUMENTS), return_embedding=True)
-
-    # check if search with cosine similarity returns the correct number of results
-    assert len(query_results) == len(DOCUMENTS)
-    indexed_docs = {}
-    for doc in DOCUMENTS:
-        indexed_docs[doc["content"]] = doc["embedding"]
-
-    for doc in query_results:
-        result_emb = doc.embedding
-        original_emb = np.array([indexed_docs[doc.content]], dtype="float32")
-        document_store.normalize_embedding(original_emb[0])
-
-        # check if the stored embedding was normalized
-        assert np.allclose(original_emb[0], result_emb, rtol=0.01)
-        
-        # check if the score is plausible for cosine similarity
-        assert 0 <= doc.score <= 1.0
-
-    # now check if vectors are normalized when updating embeddings
-    class MockRetriever():
-        def embed_documents(self, docs):
-            return [np.random.rand(768).astype(np.float32) for doc in docs]
-
-    retriever = MockRetriever()
-    document_store.update_embeddings(retriever=retriever)
-    query_results = document_store.query_by_embedding(query_emb=query, top_k=len(DOCUMENTS), return_embedding=True)
-
-    for doc in query_results:
-        original_emb = np.array([indexed_docs[doc.content]], dtype="float32")
-        document_store.normalize_embedding(original_emb[0])
-        # check if the original embedding has changed after updating the embeddings
-        assert not np.allclose(original_emb[0], doc.embedding, rtol=0.01)
-
-
-def test_normalize_embeddings_diff_shapes(document_store_dot_product_small):
-    VEC_1 = np.array([.1, .2, .3], dtype="float32")
-    document_store_dot_product_small.normalize_embedding(VEC_1)
-    assert np.linalg.norm(VEC_1) - 1 < 0.01
-
-    VEC_1 = np.array([.1, .2, .3], dtype="float32").reshape(1, -1)
-    document_store_dot_product_small.normalize_embedding(VEC_1)
-    assert np.linalg.norm(VEC_1) - 1 < 0.01
-
-
-def test_cosine_sanity_check(document_store_small):
-    VEC_1 = np.array([.1, .2, .3], dtype="float32")
-    VEC_2 = np.array([.4, .5, .6], dtype="float32")
-
-    # This is the cosine similarity of VEC_1 and VEC_2 calculated using sklearn.metrics.pairwise.cosine_similarity
-    # The score is normalized to yield a value between 0 and 1.
-    KNOWN_COSINE = (0.9746317 + 1) / 2
-
-    docs = [{"name": "vec_1", "text": "vec_1", "content": "vec_1", "embedding": VEC_1}]
-    ensure_ids_are_correct_uuids(docs=docs,document_store=document_store_small)
-    document_store_small.write_documents(documents=docs)
-
-    query_results = document_store_small.query_by_embedding(query_emb=VEC_2, top_k=1, return_embedding=True)
-
-    # check if faiss returns the same cosine similarity. Manual testing with faiss yielded 0.9746318
-    assert math.isclose(query_results[0].score, KNOWN_COSINE, abs_tol=0.00002)
-

From afe8483475729922b42ce9d8a3224f7af8ed3724 Mon Sep 17 00:00:00 2001
From: ZanSara <sarazanzo94@gmail.com>
Date: Fri, 14 Jan 2022 09:51:07 +0100
Subject: [PATCH 17/21] Exclude ES and Memory from the cosine_sanity_check test

---
 test/test_document_store.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/test_document_store.py b/test/test_document_store.py
index 3038026fb1..2e6b6b46b4 100644
--- a/test/test_document_store.py
+++ b/test/test_document_store.py
@@ -1069,6 +1069,7 @@ def test_normalize_embeddings_diff_shapes(document_store_dot_product_small):
     assert np.linalg.norm(VEC_1) - 1 < 0.01
 
 
+@pytest.mark.parametrize("document_store", ["faiss", "milvus", "weaviate"], indirect=True)
 def test_cosine_sanity_check(document_store_small):
     VEC_1 = np.array([.1, .2, .3], dtype="float32")
     VEC_2 = np.array([.4, .5, .6], dtype="float32")

From 3b669b38f51581e4b3a26158ec2390f5024f9769 Mon Sep 17 00:00:00 2001
From: ZanSara <sarazanzo94@gmail.com>
Date: Fri, 14 Jan 2022 10:20:01 +0100
Subject: [PATCH 18/21] Bug in fixture name

---
 test/test_document_store.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_document_store.py b/test/test_document_store.py
index 2e6b6b46b4..f960baa895 100644
--- a/test/test_document_store.py
+++ b/test/test_document_store.py
@@ -1069,7 +1069,7 @@ def test_normalize_embeddings_diff_shapes(document_store_dot_product_small):
     assert np.linalg.norm(VEC_1) - 1 < 0.01
 
 
-@pytest.mark.parametrize("document_store", ["faiss", "milvus", "weaviate"], indirect=True)
+@pytest.mark.parametrize("document_store_small", ["faiss", "milvus", "weaviate"], indirect=True)
 def test_cosine_sanity_check(document_store_small):
     VEC_1 = np.array([.1, .2, .3], dtype="float32")
     VEC_2 = np.array([.4, .5, .6], dtype="float32")

From d9e14e013a2eb5d5c106de0660555d9d5bf44c8e Mon Sep 17 00:00:00 2001
From: ZanSara <sarazanzo94@gmail.com>
Date: Fri, 14 Jan 2022 10:45:43 +0100
Subject: [PATCH 19/21] move ensure_ids_are_correct_uuids in conftest and move
 one test back to faiss & milvus suite

---
 test/conftest.py              |  8 ++++++++
 test/test_document_store.py   | 27 +--------------------------
 test/test_faiss_and_milvus.py | 21 +++++++++++++++++++++
 3 files changed, 30 insertions(+), 26 deletions(-)

diff --git a/test/conftest.py b/test/conftest.py
index 06abc204d0..a9a530e924 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -3,6 +3,7 @@
 from subprocess import run
 from sys import platform
 import gc
+import uuid
 import logging
 from sqlalchemy import create_engine, text
 
@@ -484,6 +485,13 @@ def get_retriever(retriever_type, document_store):
     return retriever
 
 
+def ensure_ids_are_correct_uuids(docs:list,document_store:object)->None:
+    # Weaviate currently only supports UUIDs
+    if type(document_store)==WeaviateDocumentStore:
+        for d in docs:
+            d["id"] = str(uuid.uuid4())
+
+
 @pytest.fixture(params=["elasticsearch", "faiss", "memory", "milvus", "weaviate"])
 def document_store_with_docs(request, test_docs_xs, tmp_path):
     embedding_dim = request.node.get_closest_marker("embedding_dim", pytest.mark.embedding_dim(768))
diff --git a/test/test_document_store.py b/test/test_document_store.py
index f960baa895..50053fe624 100644
--- a/test/test_document_store.py
+++ b/test/test_document_store.py
@@ -9,7 +9,7 @@
 from elasticsearch.exceptions import RequestError
 
 
-from conftest import get_document_store
+from conftest import get_document_store, ensure_ids_are_correct_uuids
 from haystack.document_stores import WeaviateDocumentStore
 from haystack.document_stores.base import BaseDocumentStore
 from haystack.errors import DuplicateDocumentError
@@ -1008,13 +1008,6 @@ def test_custom_headers(document_store_with_docs: BaseDocumentStore):
         assert len(documents) > 0
 
 
-def ensure_ids_are_correct_uuids(docs:list,document_store:object)->None:
-    # Weaviate currently only supports UUIDs
-    if type(document_store)==WeaviateDocumentStore:
-        for d in docs:
-            d["id"] = str(uuid.uuid4())
-
-
 def test_cosine_similarity(document_store):
     # below we will write documents to the store and then query it to see if vectors were normalized
 
@@ -1068,21 +1061,3 @@ def test_normalize_embeddings_diff_shapes(document_store_dot_product_small):
     document_store_dot_product_small.normalize_embedding(VEC_1)
     assert np.linalg.norm(VEC_1) - 1 < 0.01
 
-
-@pytest.mark.parametrize("document_store_small", ["faiss", "milvus", "weaviate"], indirect=True)
-def test_cosine_sanity_check(document_store_small):
-    VEC_1 = np.array([.1, .2, .3], dtype="float32")
-    VEC_2 = np.array([.4, .5, .6], dtype="float32")
-
-    # This is the cosine similarity of VEC_1 and VEC_2 calculated using sklearn.metrics.pairwise.cosine_similarity
-    # The score is normalized to yield a value between 0 and 1.
-    KNOWN_COSINE = (0.9746317 + 1) / 2
-
-    docs = [{"name": "vec_1", "text": "vec_1", "content": "vec_1", "embedding": VEC_1}]
-    ensure_ids_are_correct_uuids(docs=docs,document_store=document_store_small)
-    document_store_small.write_documents(documents=docs)
-
-    query_results = document_store_small.query_by_embedding(query_emb=VEC_2, top_k=1, return_embedding=True)
-
-    # check if faiss returns the same cosine similarity. Manual testing with faiss yielded 0.9746318
-    assert math.isclose(query_results[0].score, KNOWN_COSINE, abs_tol=0.00002)
diff --git a/test/test_faiss_and_milvus.py b/test/test_faiss_and_milvus.py
index 965c28f507..24b07fa5c4 100644
--- a/test/test_faiss_and_milvus.py
+++ b/test/test_faiss_and_milvus.py
@@ -13,6 +13,8 @@
 from haystack.pipelines import Pipeline
 from haystack.nodes.retriever.dense import EmbeddingRetriever
 
+from conftest import ensure_ids_are_correct_uuids
+
 
 DOCUMENTS = [
     {"meta": {"name": "name_1", "year": "2020", "month": "01"}, "content": "text_1", "embedding": np.random.rand(768).astype(np.float32)},
@@ -445,3 +447,22 @@ def test_faiss_passing_index_from_outside(tmp_path):
     # test if vectors ids are associated with docs
     for doc in documents_indexed:
         assert 0 <= int(doc.meta["vector_id"]) <= 7
+
+
+@pytest.mark.parametrize("document_store_small", ["faiss", "milvus", "weaviate"], indirect=True)
+def test_cosine_sanity_check(document_store_small):
+    VEC_1 = np.array([.1, .2, .3], dtype="float32")
+    VEC_2 = np.array([.4, .5, .6], dtype="float32")
+
+    # This is the cosine similarity of VEC_1 and VEC_2 calculated using sklearn.metrics.pairwise.cosine_similarity
+    # The score is normalized to yield a value between 0 and 1.
+    KNOWN_COSINE = (0.9746317 + 1) / 2
+
+    docs = [{"name": "vec_1", "text": "vec_1", "content": "vec_1", "embedding": VEC_1}]
+    ensure_ids_are_correct_uuids(docs=docs,document_store=document_store_small)
+    document_store_small.write_documents(documents=docs)
+
+    query_results = document_store_small.query_by_embedding(query_emb=VEC_2, top_k=1, return_embedding=True)
+
+    # check if faiss returns the same cosine similarity. Manual testing with faiss yielded 0.9746318
+    assert math.isclose(query_results[0].score, KNOWN_COSINE, abs_tol=0.00002)
\ No newline at end of file

From c1913dfcd6e4323c139cf5c1bf39ae4202cf77a2 Mon Sep 17 00:00:00 2001
From: ZanSara <sarazanzo94@gmail.com>
Date: Fri, 14 Jan 2022 11:32:07 +0100
Subject: [PATCH 20/21] Move back tests to faiss&milvus suite, cannot
 parametrize them properly

---
 test/test_document_store.py   | 54 ---------------------------------
 test/test_faiss_and_milvus.py | 56 +++++++++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 54 deletions(-)

diff --git a/test/test_document_store.py b/test/test_document_store.py
index 50053fe624..30d55f4bac 100644
--- a/test/test_document_store.py
+++ b/test/test_document_store.py
@@ -1007,57 +1007,3 @@ def test_custom_headers(document_store_with_docs: BaseDocumentStore):
         assert kwargs["headers"] == custom_headers
         assert len(documents) > 0
 
-
-def test_cosine_similarity(document_store):
-    # below we will write documents to the store and then query it to see if vectors were normalized
-
-    ensure_ids_are_correct_uuids(docs=DOCUMENTS,document_store=document_store)
-    document_store.write_documents(documents=DOCUMENTS)
-
-    # note that the same query will be used later when querying after updating the embeddings
-    query = np.random.rand(768).astype(np.float32)
-
-    query_results = document_store.query_by_embedding(query_emb=query, top_k=len(DOCUMENTS), return_embedding=True)
-
-    # check if search with cosine similarity returns the correct number of results
-    assert len(query_results) == len(DOCUMENTS)
-    indexed_docs = {}
-    for doc in DOCUMENTS:
-        indexed_docs[doc["content"]] = doc["embedding"]
-
-    for doc in query_results:
-        result_emb = doc.embedding
-        original_emb = np.array([indexed_docs[doc.content]], dtype="float32")
-        document_store.normalize_embedding(original_emb[0])
-
-        # check if the stored embedding was normalized
-        assert np.allclose(original_emb[0], result_emb, rtol=0.01)
-        
-        # check if the score is plausible for cosine similarity
-        assert 0 <= doc.score <= 1.0
-
-    # now check if vectors are normalized when updating embeddings
-    class MockRetriever():
-        def embed_documents(self, docs):
-            return [np.random.rand(768).astype(np.float32) for doc in docs]
-
-    retriever = MockRetriever()
-    document_store.update_embeddings(retriever=retriever)
-    query_results = document_store.query_by_embedding(query_emb=query, top_k=len(DOCUMENTS), return_embedding=True)
-
-    for doc in query_results:
-        original_emb = np.array([indexed_docs[doc.content]], dtype="float32")
-        document_store.normalize_embedding(original_emb[0])
-        # check if the original embedding has changed after updating the embeddings
-        assert not np.allclose(original_emb[0], doc.embedding, rtol=0.01)
-
-
-def test_normalize_embeddings_diff_shapes(document_store_dot_product_small):
-    VEC_1 = np.array([.1, .2, .3], dtype="float32")
-    document_store_dot_product_small.normalize_embedding(VEC_1)
-    assert np.linalg.norm(VEC_1) - 1 < 0.01
-
-    VEC_1 = np.array([.1, .2, .3], dtype="float32").reshape(1, -1)
-    document_store_dot_product_small.normalize_embedding(VEC_1)
-    assert np.linalg.norm(VEC_1) - 1 < 0.01
-
diff --git a/test/test_faiss_and_milvus.py b/test/test_faiss_and_milvus.py
index 24b07fa5c4..7dedc0ad31 100644
--- a/test/test_faiss_and_milvus.py
+++ b/test/test_faiss_and_milvus.py
@@ -449,6 +449,62 @@ def test_faiss_passing_index_from_outside(tmp_path):
         assert 0 <= int(doc.meta["vector_id"]) <= 7
 
 
+@pytest.mark.parametrize("document_store", ["faiss", "milvus", "weaviate"], indirect=True)
+def test_cosine_similarity(document_store):
+    # below we will write documents to the store and then query it to see if vectors were normalized
+
+    ensure_ids_are_correct_uuids(docs=DOCUMENTS,document_store=document_store)
+    document_store.write_documents(documents=DOCUMENTS)
+
+    # note that the same query will be used later when querying after updating the embeddings
+    query = np.random.rand(768).astype(np.float32)
+
+    query_results = document_store.query_by_embedding(query_emb=query, top_k=len(DOCUMENTS), return_embedding=True)
+
+    # check if search with cosine similarity returns the correct number of results
+    assert len(query_results) == len(DOCUMENTS)
+    indexed_docs = {}
+    for doc in DOCUMENTS:
+        indexed_docs[doc["content"]] = doc["embedding"]
+
+    for doc in query_results:
+        result_emb = doc.embedding
+        original_emb = np.array([indexed_docs[doc.content]], dtype="float32")
+        document_store.normalize_embedding(original_emb[0])
+
+        # check if the stored embedding was normalized
+        assert np.allclose(original_emb[0], result_emb, rtol=0.01)
+        
+        # check if the score is plausible for cosine similarity
+        assert 0 <= doc.score <= 1.0
+
+    # now check if vectors are normalized when updating embeddings
+    class MockRetriever():
+        def embed_documents(self, docs):
+            return [np.random.rand(768).astype(np.float32) for doc in docs]
+
+    retriever = MockRetriever()
+    document_store.update_embeddings(retriever=retriever)
+    query_results = document_store.query_by_embedding(query_emb=query, top_k=len(DOCUMENTS), return_embedding=True)
+
+    for doc in query_results:
+        original_emb = np.array([indexed_docs[doc.content]], dtype="float32")
+        document_store.normalize_embedding(original_emb[0])
+        # check if the original embedding has changed after updating the embeddings
+        assert not np.allclose(original_emb[0], doc.embedding, rtol=0.01)
+
+
+@pytest.mark.parametrize("document_store_dot_product_small", ["faiss", "milvus", "weaviate"], indirect=True)
+def test_normalize_embeddings_diff_shapes(document_store_dot_product_small):
+    VEC_1 = np.array([.1, .2, .3], dtype="float32")
+    document_store_dot_product_small.normalize_embedding(VEC_1)
+    assert np.linalg.norm(VEC_1) - 1 < 0.01
+
+    VEC_1 = np.array([.1, .2, .3], dtype="float32").reshape(1, -1)
+    document_store_dot_product_small.normalize_embedding(VEC_1)
+    assert np.linalg.norm(VEC_1) - 1 < 0.01
+
+
 @pytest.mark.parametrize("document_store_small", ["faiss", "milvus", "weaviate"], indirect=True)
 def test_cosine_sanity_check(document_store_small):
     VEC_1 = np.array([.1, .2, .3], dtype="float32")

From 70bde27fb1306bb0784a6fe6faec2f2e57539fd2 Mon Sep 17 00:00:00 2001
From: ZanSara <sarazanzo94@gmail.com>
Date: Fri, 14 Jan 2022 12:21:24 +0100
Subject: [PATCH 21/21] Remove one failing test for Weaviate

---
 test/test_faiss_and_milvus.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/test/test_faiss_and_milvus.py b/test/test_faiss_and_milvus.py
index 7dedc0ad31..221ce6e4d7 100644
--- a/test/test_faiss_and_milvus.py
+++ b/test/test_faiss_and_milvus.py
@@ -494,7 +494,7 @@ def embed_documents(self, docs):
         assert not np.allclose(original_emb[0], doc.embedding, rtol=0.01)
 
 
-@pytest.mark.parametrize("document_store_dot_product_small", ["faiss", "milvus", "weaviate"], indirect=True)
+@pytest.mark.parametrize("document_store_dot_product_small", ["faiss", "milvus"], indirect=True)
 def test_normalize_embeddings_diff_shapes(document_store_dot_product_small):
     VEC_1 = np.array([.1, .2, .3], dtype="float32")
     document_store_dot_product_small.normalize_embedding(VEC_1)