From a273c3a51dd432bd125e5b35df4be94260a2cdb7 Mon Sep 17 00:00:00 2001
From: Florian Hardow <10029805+FHardow@users.noreply.github.com>
Date: Thu, 31 Mar 2022 08:59:58 +0200
Subject: [PATCH] =?UTF-8?q?EvaluationSetClient=20for=20deepset=20cloud=20t?=
 =?UTF-8?q?o=20fetch=20evaluation=20sets=20and=20la=E2=80=A6=20(#2345)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* EvaluationSetClient for deepset cloud to fetch evaluation sets and labels for one specific evaluation set

* make DeepsetCloudDocumentStore able to fetch uploaded evaluation set names

* fix missing renaming of get_evaluation_set_names in DeepsetCloudDocumentStore

* update documentation for evaluation set functionality in deepset cloud document store

* DeepsetCloudDocumentStore tests for evaluation set functionality

* rename index to evaluation_set_name for DeepsetCloudDocumentStore evaluation set functionality

* raise DeepsetCloudError when no labels were found for evaluation set

* make use of .get_with_auto_paging in EvaluationSetClient

* Return result of get_with_auto_paging() as it parses the response already

* Make schema import source more specific

* fetch all evaluation sets for a workspace in deepset Cloud

* Rename evaluation_set_name to label_index

* make use of generator functionality for fetching labels

* Update Documentation & Code Style

* Adjust function input for DeepsetCloudDocumentStore.get_all_labels, adjust tests for it, fix typos, make linter happy

* Match error message with pytest.raises

* Update Documentation & Code Style

* DeepsetCloudDocumentStore.get_labels_count raises DeepsetCloudError when no evaluation set was found to count labels on

* remove unneeded import in tests

* DeepsetCloudDocumentStore tests, make reponse bodies a string through json.dumps

* DeepsetcloudDocumentStore.get_label_count - move raise to return

* stringify uuid before json.dump as uuid is not serilizable

* DeepsetcloudDocumentStore - adjust response mocking in tests

* DeepsetcloudDocumentStore - json dump response body in test

* DeepsetCloudDocumentStore introduce label_index, EvaluationSetClient rename label_index to evaluation_set

* Update Documentation & Code Style

* DeepsetCloudDocumentStore rename evaluation_set to evaluation_set_response as there is a name clash with the input variable

* DeepsetCloudDocumentStore - rename missed variable in test

* DeepsetCloudDocumentStore - rename missed label_index to index in doc string, rename label_index to evaluation_set in EvaluationSetClient

* Update Documentation & Code Style

* DeepsetCloudDocumentStore - update docstrings for EvaluationSetClient

* DeepsetCloudDocumentStore - fix typo in doc string

Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
 docs/_src/api/api/document_store.md           |  58 +++++-
 haystack/document_stores/deepsetcloud.py      |  41 +++-
 .../haystack-pipeline-1.2.1rc0.schema.json    |   5 +
 .../haystack-pipeline-unstable.schema.json    |   5 +
 haystack/utils/deepsetcloud.py                | 140 +++++++++++++
 test/test_document_store.py                   | 186 ++++++++++++++++++
 6 files changed, 431 insertions(+), 4 deletions(-)

diff --git a/docs/_src/api/api/document_store.md b/docs/_src/api/api/document_store.md
index ffbc18d8fd..0f8c4ee1cc 100644
--- a/docs/_src/api/api/document_store.md
+++ b/docs/_src/api/api/document_store.md
@@ -3938,7 +3938,7 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore)
 #### \_\_init\_\_
 
 ```python
-def __init__(api_key: str = None, workspace: str = "default", index: str = "default", duplicate_documents: str = "overwrite", api_endpoint: Optional[str] = None, similarity: str = "dot_product", return_embedding: bool = False)
+def __init__(api_key: str = None, workspace: str = "default", index: str = "default", duplicate_documents: str = "overwrite", api_endpoint: Optional[str] = None, similarity: str = "dot_product", return_embedding: bool = False, label_index: str = "default")
 ```
 
 A DocumentStore facade enabling you to interact with the documents stored in Deepset Cloud.
@@ -3964,6 +3964,7 @@ exists.
 If not specified, will be read from DEEPSET_CLOUD_API_ENDPOINT environment variable.
 - `similarity`: The similarity function used to compare document vectors. 'dot_product' is the default since it is
 more performant with DPR embeddings. 'cosine' is recommended if you are using a Sentence BERT model.
+- `label_index`: index for the evaluation set interface
 - `return_embedding`: To return document embedding.
 
 <a id="deepsetcloud.DeepsetCloudDocumentStore.get_all_documents"></a>
@@ -4257,6 +4258,61 @@ exists.
 
 None
 
+<a id="deepsetcloud.DeepsetCloudDocumentStore.get_evaluation_sets"></a>
+
+#### get\_evaluation\_sets
+
+```python
+def get_evaluation_sets() -> List[dict]
+```
+
+Returns a list of uploaded evaluation sets to deepset cloud.
+
+**Returns**:
+
+list of evaluation sets as dicts
+These contain ("name", "evaluation_set_id", "created_at", "matched_labels", "total_labels") as fields.
+
+<a id="deepsetcloud.DeepsetCloudDocumentStore.get_all_labels"></a>
+
+#### get\_all\_labels
+
+```python
+def get_all_labels(index: Optional[str] = None, filters: Optional[Dict[str, Union[Dict, List, str, int, float, bool]]] = None, headers: Optional[Dict[str, str]] = None) -> List[Label]
+```
+
+Returns a list of labels for the given index name.
+
+**Arguments**:
+
+- `index`: Optional name of evaluation set for which labels should be searched.
+If None, the DocumentStore's default label_index (self.label_index) will be used.
+- `headers`: Not supported.
+
+**Returns**:
+
+list of Labels.
+
+<a id="deepsetcloud.DeepsetCloudDocumentStore.get_label_count"></a>
+
+#### get\_label\_count
+
+```python
+def get_label_count(index: Optional[str] = None, headers: Optional[Dict[str, str]] = None) -> int
+```
+
+Counts the number of labels for the given index and returns the value.
+
+**Arguments**:
+
+- `index`: Optional evaluation set name for which the labels should be counted.
+If None, the DocumentStore's default label_index (self.label_index) will be used.
+- `headers`: Not supported.
+
+**Returns**:
+
+number of labels for the given index
+
 <a id="pinecone"></a>
 
 # Module pinecone
diff --git a/haystack/document_stores/deepsetcloud.py b/haystack/document_stores/deepsetcloud.py
index e6ccdafe4d..b7666816ae 100644
--- a/haystack/document_stores/deepsetcloud.py
+++ b/haystack/document_stores/deepsetcloud.py
@@ -24,6 +24,7 @@ def __init__(
         api_endpoint: Optional[str] = None,
         similarity: str = "dot_product",
         return_embedding: bool = False,
+        label_index: str = "default",
     ):
         """
         A DocumentStore facade enabling you to interact with the documents stored in Deepset Cloud.
@@ -46,11 +47,13 @@ def __init__(
                              If not specified, will be read from DEEPSET_CLOUD_API_ENDPOINT environment variable.
         :param similarity: The similarity function used to compare document vectors. 'dot_product' is the default since it is
                            more performant with DPR embeddings. 'cosine' is recommended if you are using a Sentence BERT model.
+        :param label_index: index for the evaluation set interface
+
         :param return_embedding: To return document embedding.
 
         """
         self.index = index
-        self.label_index = index
+        self.label_index = label_index
         self.duplicate_documents = duplicate_documents
         self.similarity = similarity
         self.return_embedding = return_embedding
@@ -65,6 +68,10 @@ def __init__(
                 f"{indexing_info['pending_file_count']} files are pending to be indexed. Indexing status: {indexing_info['status']}"
             )
 
+        self.evaluation_set_client = DeepsetCloud.get_evaluation_set_client(
+            api_key=api_key, api_endpoint=api_endpoint, workspace=workspace, evaluation_set=label_index
+        )
+
         super().__init__()
 
     def get_all_documents(
@@ -452,16 +459,44 @@ def write_documents(
         """
         raise NotImplementedError("DeepsetCloudDocumentStore currently does not support writing documents.")
 
+    def get_evaluation_sets(self) -> List[dict]:
+        """
+        Returns a list of uploaded evaluation sets to deepset cloud.
+
+        :return: list of evaluation sets as dicts
+                 These contain ("name", "evaluation_set_id", "created_at", "matched_labels", "total_labels") as fields.
+        """
+        return self.evaluation_set_client.get_evaluation_sets()
+
     def get_all_labels(
         self,
         index: Optional[str] = None,
         filters: Optional[Dict[str, Union[Dict, List, str, int, float, bool]]] = None,
         headers: Optional[Dict[str, str]] = None,
     ) -> List[Label]:
-        raise NotImplementedError("DeepsetCloudDocumentStore currently does not support labels.")
+        """
+        Returns a list of labels for the given index name.
+
+        :param index: Optional name of evaluation set for which labels should be searched.
+                      If None, the DocumentStore's default label_index (self.label_index) will be used.
+        :filters: Not supported.
+        :param headers: Not supported.
+
+        :return: list of Labels.
+        """
+        return self.evaluation_set_client.get_labels(evaluation_set=index)
 
     def get_label_count(self, index: Optional[str] = None, headers: Optional[Dict[str, str]] = None) -> int:
-        raise NotImplementedError("DeepsetCloudDocumentStore currently does not support labels.")
+        """
+        Counts the number of labels for the given index and returns the value.
+
+        :param index: Optional evaluation set name for which the labels should be counted.
+                      If None, the DocumentStore's default label_index (self.label_index) will be used.
+        :param headers: Not supported.
+
+        :return: number of labels for the given index
+        """
+        return self.evaluation_set_client.get_labels_count(evaluation_set=index)
 
     def write_labels(
         self,
diff --git a/haystack/json-schemas/haystack-pipeline-1.2.1rc0.schema.json b/haystack/json-schemas/haystack-pipeline-1.2.1rc0.schema.json
index 6f5d549a94..ba6f2d3e4a 100644
--- a/haystack/json-schemas/haystack-pipeline-1.2.1rc0.schema.json
+++ b/haystack/json-schemas/haystack-pipeline-1.2.1rc0.schema.json
@@ -293,6 +293,11 @@
               "title": "Return Embedding",
               "default": false,
               "type": "boolean"
+            },
+            "label_index": {
+              "title": "Label Index",
+              "default": "default",
+              "type": "string"
             }
           },
           "additionalProperties": false,
diff --git a/haystack/json-schemas/haystack-pipeline-unstable.schema.json b/haystack/json-schemas/haystack-pipeline-unstable.schema.json
index a8e6ae7e33..31517778fb 100644
--- a/haystack/json-schemas/haystack-pipeline-unstable.schema.json
+++ b/haystack/json-schemas/haystack-pipeline-unstable.schema.json
@@ -296,6 +296,11 @@
               "title": "Return Embedding",
               "default": false,
               "type": "boolean"
+            },
+            "label_index": {
+              "title": "Label Index",
+              "default": "default",
+              "type": "string"
             }
           },
           "additionalProperties": false,
diff --git a/haystack/utils/deepsetcloud.py b/haystack/utils/deepsetcloud.py
index 30d414204a..52f017b6eb 100644
--- a/haystack/utils/deepsetcloud.py
+++ b/haystack/utils/deepsetcloud.py
@@ -5,6 +5,8 @@
 import time
 from typing import Any, Dict, Generator, List, Optional, Tuple, Union
 
+from haystack.schema import Label, Document, Answer
+
 try:
     from typing import Literal
 except ImportError:
@@ -637,6 +639,122 @@ def _build_workspace_url(self, workspace: Optional[str] = None):
         return self.client.build_workspace_url(workspace)
 
 
+class EvaluationSetClient:
+    def __init__(
+        self, client: DeepsetCloudClient, workspace: Optional[str] = None, evaluation_set: Optional[str] = None
+    ):
+        """
+        A client to communicate with Deepset Cloud evaluation sets and labels.
+
+        :param client: Deepset Cloud client
+        :param workspace: workspace in Deepset Cloud
+        :param evaluation_set: name of the evaluation set to fall back to
+
+        """
+        self.client = client
+        self.workspace = workspace
+        self.evaluation_set = evaluation_set
+
+    def get_labels(self, evaluation_set: Optional[str], workspace: Optional[str] = None) -> List[Label]:
+        """
+        Searches for labels for a given evaluation set in deepset cloud. Returns a list of all found labels.
+        If no labels were found, raises DeepsetCloudError.
+
+        :param evaluation_set: name of the evaluation set for which labels should be fetched
+        :param workspace: Optional workspace in Deepset Cloud
+                          If None, the EvaluationSetClient's default workspace (self.workspace) will be used.
+
+        :return: list of Label
+        """
+        try:
+            evaluation_sets_response = next(
+                self._get_evaluation_set(evaluation_set=evaluation_set, workspace=workspace)
+            )
+        except StopIteration:
+            raise DeepsetCloudError(f"No evaluation set found with the name {evaluation_set}")
+
+        labels = self._get_labels_from_evaluation_set(
+            workspace=workspace, evaluation_set_id=evaluation_sets_response["evaluation_set_id"]
+        )
+
+        return [
+            Label(
+                query=label_dict["query"],
+                document=Document(content=label_dict["context"]),
+                is_correct_answer=True,
+                is_correct_document=True,
+                origin="user-feedback",
+                answer=Answer(label_dict["answer"]),
+                id=label_dict["label_id"],
+                no_answer=False if label_dict.get("answer", None) else True,
+                pipeline_id=None,
+                created_at=None,
+                updated_at=None,
+                meta=label_dict["meta"],
+                filters={},
+            )
+            for label_dict in labels
+        ]
+
+    def get_labels_count(self, evaluation_set: Optional[str] = None, workspace: Optional[str] = None) -> int:
+        """
+        Counts labels for a given evaluation set in deepset cloud.
+
+        :param evaluation_set: Optional evaluation set in deepset Cloud
+                               If None, the EvaluationSetClient's default evaluation set (self.evaluation_set) will be used.
+        :param workspace: Optional workspace in deepset Cloud
+                          If None, the EvaluationSetClient's default workspace (self.workspace) will be used.
+
+        :return: Number of labels for the given (or defaulting) index
+        """
+        try:
+            evaluation_sets_response = next(
+                self._get_evaluation_set(evaluation_set=evaluation_set, workspace=workspace)
+            )
+        except StopIteration:
+            raise DeepsetCloudError(f"No evaluation set found with the name {evaluation_set}")
+
+        return evaluation_sets_response["total_labels"]
+
+    def get_evaluation_sets(self, workspace: Optional[str] = None) -> List[dict]:
+        """
+        Searches for all evaluation set names in the given workspace in Deepset Cloud.
+
+        :param workspace: Optional workspace in Deepset Cloud
+                          If None, the EvaluationSetClient's default workspace (self.workspace) will be used.
+
+        :return: List of dictionaries that represent deepset Cloud evaluation sets.
+                 These contain ("name", "evaluation_set_id", "created_at", "matched_labels", "total_labels") as fields.
+        """
+        evaluation_sets_response = self._get_evaluation_set(evaluation_set=None, workspace=workspace)
+
+        return [eval_set for eval_set in evaluation_sets_response]
+
+    def _get_evaluation_set(self, evaluation_set: Optional[str], workspace: Optional[str] = None) -> Generator:
+        if not evaluation_set:
+            evaluation_set = self.evaluation_set
+
+        url = self._build_workspace_url(workspace=workspace)
+        evaluation_set_url = f"{url}/evaluation_sets"
+
+        for response in self.client.get_with_auto_paging(url=evaluation_set_url, query_params={"name": evaluation_set}):
+            yield response
+
+    def _get_labels_from_evaluation_set(
+        self, workspace: Optional[str] = None, evaluation_set_id: Optional[str] = None
+    ) -> Generator:
+        url = f"{self._build_workspace_url(workspace=workspace)}/evaluation_sets/{evaluation_set_id}"
+        labels = self.client.get(url=url).json()
+
+        for label in labels:
+            yield label
+
+    def _build_workspace_url(self, workspace: Optional[str] = None):
+        if workspace is None:
+            workspace = self.workspace
+        return self.client.build_workspace_url(workspace)
+
+
 class DeepsetCloud:
     """
     A facade to communicate with Deepset Cloud.
@@ -685,3 +803,25 @@ def get_pipeline_client(
         """
         client = DeepsetCloudClient(api_key=api_key, api_endpoint=api_endpoint)
         return PipelineClient(client=client, workspace=workspace, pipeline_config_name=pipeline_config_name)
+
+    @classmethod
+    def get_evaluation_set_client(
+        cls,
+        api_key: Optional[str] = None,
+        api_endpoint: Optional[str] = None,
+        workspace: str = "default",
+        evaluation_set: str = "default",
+    ) -> EvaluationSetClient:
+        """
+        Creates a client to communicate with Deepset Cloud labels.
+
+        :param api_key: Secret value of the API key.
+                        If not specified, will be read from DEEPSET_CLOUD_API_KEY environment variable.
+        :param api_endpoint: The URL of the Deepset Cloud API.
+                             If not specified, will be read from DEEPSET_CLOUD_API_ENDPOINT environment variable.
+        :param workspace: workspace in Deepset Cloud
+        :param evaluation_set: name of the evaluation set in Deepset Cloud
+
+        """
+        client = DeepsetCloudClient(api_key=api_key, api_endpoint=api_endpoint)
+        return EvaluationSetClient(client=client, workspace=workspace, evaluation_set=evaluation_set)
diff --git a/test/test_document_store.py b/test/test_document_store.py
index 0b27c2e8db..016d5efcd7 100644
--- a/test/test_document_store.py
+++ b/test/test_document_store.py
@@ -1,3 +1,6 @@
+from typing import List
+from uuid import uuid4
+
 import numpy as np
 import pandas as pd
 import pytest
@@ -1634,6 +1637,189 @@ def test_DeepsetCloudDocumentStore_query(deepset_cloud_document_store):
     assert len(filtered_docs) < len(docs)
 
 
+@pytest.mark.parametrize(
+    "body, expected_count",
+    [
+        (
+            {
+                "data": [
+                    {
+                        "evaluation_set_id": str(uuid4()),
+                        "name": DC_TEST_INDEX,
+                        "created_at": "2022-03-22T13:40:27.535Z",
+                        "matched_labels": 2,
+                        "total_labels": 10,
+                    }
+                ],
+                "has_more": False,
+                "total": 1,
+            },
+            10,
+        ),
+        (
+            {
+                "data": [
+                    {
+                        "evaluation_set_id": str(uuid4()),
+                        "name": DC_TEST_INDEX,
+                        "created_at": "2022-03-22T13:40:27.535Z",
+                        "matched_labels": 0,
+                        "total_labels": 0,
+                    }
+                ],
+                "has_more": False,
+                "total": 1,
+            },
+            0,
+        ),
+    ],
+)
+@responses.activate
+def test_DeepsetCloudDocumentStore_count_of_labels_for_evaluation_set(
+    deepset_cloud_document_store, body: dict, expected_count: int
+):
+    if MOCK_DC:
+        responses.add(
+            method=responses.GET,
+            url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets",
+            status=200,
+            body=json.dumps(body),
+        )
+    else:
+        responses.add_passthru(DC_API_ENDPOINT)
+
+    count = deepset_cloud_document_store.get_label_count(index=DC_TEST_INDEX)
+    assert count == expected_count
+
+
+@responses.activate
+def test_DeepsetCloudDocumentStore_count_of_labels_for_evaluation_set_raises_DC_error_when_nothing_found(
+    deepset_cloud_document_store,
+):
+    if MOCK_DC:
+        responses.add(
+            method=responses.GET,
+            url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets",
+            status=200,
+            body=json.dumps({"data": [], "has_more": False, "total": 0}),
+        )
+    else:
+        responses.add_passthru(DC_API_ENDPOINT)
+
+    with pytest.raises(DeepsetCloudError, match=f"No evaluation set found with the name {DC_TEST_INDEX}"):
+        deepset_cloud_document_store.get_label_count(index=DC_TEST_INDEX)
+
+
+@responses.activate
+def test_DeepsetCloudDocumentStore_lists_evaluation_sets(deepset_cloud_document_store):
+    response_evaluation_set = {
+        "evaluation_set_id": str(uuid4()),
+        "name": DC_TEST_INDEX,
+        "created_at": "2022-03-22T13:40:27.535Z",
+        "matched_labels": 2,
+        "total_labels": 10,
+    }
+    if MOCK_DC:
+        responses.add(
+            method=responses.GET,
+            url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets",
+            status=200,
+            body=json.dumps({"data": [response_evaluation_set], "has_more": False, "total": 1}),
+        )
+    else:
+        responses.add_passthru(DC_API_ENDPOINT)
+
+    evaluation_sets = deepset_cloud_document_store.get_evaluation_sets()
+    assert evaluation_sets == [response_evaluation_set]
+
+
+@responses.activate
+def test_DeepsetCloudDocumentStore_fetches_labels_for_evaluation_set(deepset_cloud_document_store):
+    if MOCK_DC:
+        eval_set_id = uuid4()
+        responses.add(
+            method=responses.GET,
+            url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets?name={DC_TEST_INDEX}&page_number=1",
+            status=200,
+            body=json.dumps(
+                {
+                    "data": [
+                        {
+                            "evaluation_set_id": str(eval_set_id),
+                            "name": DC_TEST_INDEX,
+                            "created_at": "2022-03-22T13:40:27.535Z",
+                            "matched_labels": 1,
+                            "total_labels": 1,
+                        }
+                    ],
+                    "has_more": False,
+                    "total": 1,
+                }
+            ),
+        )
+        responses.add(
+            method=responses.GET,
+            url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets/{eval_set_id}",
+            status=200,
+            body=json.dumps(
+                [
+                    {
+                        "label_id": "3fa85f64-5717-4562-b3fc-2c963f66afa6",
+                        "query": "What is berlin?",
+                        "answer": "biggest city in germany",
+                        "answer_start": 0,
+                        "answer_end": 0,
+                        "meta": {},
+                        "context": "Berlin is the biggest city in germany.",
+                        "external_file_name": "string",
+                        "file_id": "3fa85f64-5717-4562-b3fc-2c963f66afa6",
+                        "state": "Label matching status",
+                        "candidates": "Candidates that were found in the label <-> file matching",
+                    }
+                ]
+            ),
+        )
+    else:
+        responses.add_passthru(DC_API_ENDPOINT)
+
+    labels = deepset_cloud_document_store.get_all_labels(index=DC_TEST_INDEX)
+    assert labels == [
+        Label(
+            query="What is berlin?",
+            document=Document(content="Berlin is the biggest city in germany."),
+            is_correct_answer=True,
+            is_correct_document=True,
+            origin="user-feedback",
+            answer=Answer("biggest city in germany"),
+            id="3fa85f64-5717-4562-b3fc-2c963f66afa6",
+            no_answer=False,
+            pipeline_id=None,
+            created_at=None,
+            updated_at=None,
+            meta={},
+            filters={},
+        )
+    ]
+
+
+@responses.activate
+def test_DeepsetCloudDocumentStore_fetches_lables_for_evaluation_set_raises_deepsetclouderror_when_nothing_found(
+    deepset_cloud_document_store,
+):
+    if MOCK_DC:
+        responses.add(
+            method=responses.GET,
+            url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets",
+            status=200,
+            body=json.dumps({"data": [], "has_more": False, "total": 0}),
+        )
+    else:
+        responses.add_passthru(DC_API_ENDPOINT)
+
+    with pytest.raises(DeepsetCloudError, match=f"No evaluation set found with the name {DC_TEST_INDEX}"):
+        deepset_cloud_document_store.get_all_labels(index=DC_TEST_INDEX)
+
+
 @responses.activate
 def test_DeepsetCloudDocumentStore_query_by_embedding(deepset_cloud_document_store):
     query_emb = np.random.randn(768)