deepset-ai · mathislucka · Jan 26, 2022 · Jan 25, 2022 · Jan 25, 2022 · Jan 25, 2022
diff --git a/docs/_src/api/api/document_store.md b/docs/_src/api/api/document_store.md
@@ -293,7 +293,8 @@ Fetch a document by specifying its text id string
  | get_documents_by_id(ids: List[str], index: Optional[str] = None, batch_size: int = 10_000, headers: Optional[Dict[str, str]] = None) -> List[Document]
 ```
 
-Fetch documents by specifying a list of text id strings
+Fetch documents by specifying a list of text id strings. Be aware that passing a large number of ids might lead
+to performance issues.
 
 <a name="elasticsearch.ElasticsearchDocumentStore.get_metadata_values_by_key"></a>
 #### get\_metadata\_values\_by\_key

diff --git a/haystack/document_stores/elasticsearch.py b/haystack/document_stores/elasticsearch.py
@@ -363,9 +363,12 @@ def get_document_by_id(self, id: str, index: Optional[str] = None, headers: Opti
             return None
 
     def get_documents_by_id(self, ids: List[str], index: Optional[str] = None, batch_size: int = 10_000, headers: Optional[Dict[str, str]] = None) -> List[Document]:
-        """Fetch documents by specifying a list of text id strings"""
+        """
+        Fetch documents by specifying a list of text id strings. Be aware that passing a large number of ids might lead
+        to performance issues.
+        """
         index = index or self.index
-        query = {"query": {"ids": {"values": ids}}}
+        query = {"size": len(ids), "query": {"ids": {"values": ids}}}
         result = self.client.search(index=index, body=query, headers=headers)["hits"]["hits"]
         documents = [self._convert_es_hit_to_document(hit, return_embedding=self.return_embedding) for hit in result]
         return documents

diff --git a/test/test_document_store.py b/test/test_document_store.py
@@ -187,13 +187,30 @@ def test_get_all_documents_with_incorrect_filter_value(document_store_with_docs)
     assert len(documents) == 0
 
 
-def test_get_documents_by_id(document_store_with_docs):
+def test_get_document_by_id(document_store_with_docs):
     documents = document_store_with_docs.get_all_documents()
     doc = document_store_with_docs.get_document_by_id(documents[0].id)
     assert doc.id == documents[0].id
     assert doc.content == documents[0].content
 
 
+def test_get_documents_by_id(document_store):
+    # generate more documents than the elasticsearch default query size limit of 10
+    docs_to_generate = 15
+    documents = [{'content': 'doc-' + str(i)} for i in range(docs_to_generate)]
+    doc_idx = 'green_fields'
+    document_store.write_documents(documents, index=doc_idx)
+
+    all_docs = document_store.get_all_documents(index=doc_idx)
+    all_ids = [doc.id for doc in all_docs]
+
+    retrieved_by_id = document_store.get_documents_by_id(all_ids, index=doc_idx)
+    retrieved_ids = [doc.id for doc in retrieved_by_id]
+
+    # all documents in the index should be retrieved when passing all document ids in the index
+    assert set(retrieved_ids) == set(all_ids)
+
+
 def test_get_document_count(document_store):
     documents = [
         {"content": "text1", "id": "1", "meta_field_for_count": "a"},