From a273c3a51dd432bd125e5b35df4be94260a2cdb7 Mon Sep 17 00:00:00 2001
From: Florian Hardow <10029805+FHardow@users.noreply.github.com>
Date: Thu, 31 Mar 2022 08:59:58 +0200
Subject: [PATCH] =?UTF-8?q?EvaluationSetClient=20for=20deepset=20cloud=20t?=
=?UTF-8?q?o=20fetch=20evaluation=20sets=20and=20la=E2=80=A6=20(#2345)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* EvaluationSetClient for deepset cloud to fetch evaluation sets and labels for one specific evaluation set
* make DeepsetCloudDocumentStore able to fetch uploaded evaluation set names
* fix missing renaming of get_evaluation_set_names in DeepsetCloudDocumentStore
* update documentation for evaluation set functionality in deepset cloud document store
* DeepsetCloudDocumentStore tests for evaluation set functionality
* rename index to evaluation_set_name for DeepsetCloudDocumentStore evaluation set functionality
* raise DeepsetCloudError when no labels were found for evaluation set
* make use of .get_with_auto_paging in EvaluationSetClient
* Return result of get_with_auto_paging() as it parses the response already
* Make schema import source more specific
* fetch all evaluation sets for a workspace in deepset Cloud
* Rename evaluation_set_name to label_index
* make use of generator functionality for fetching labels
* Update Documentation & Code Style
* Adjust function input for DeepsetCloudDocumentStore.get_all_labels, adjust tests for it, fix typos, make linter happy
* Match error message with pytest.raises
* Update Documentation & Code Style
* DeepsetCloudDocumentStore.get_labels_count raises DeepsetCloudError when no evaluation set was found to count labels on
* remove unneeded import in tests
* DeepsetCloudDocumentStore tests, make reponse bodies a string through json.dumps
* DeepsetcloudDocumentStore.get_label_count - move raise to return
* stringify uuid before json.dump as uuid is not serilizable
* DeepsetcloudDocumentStore - adjust response mocking in tests
* DeepsetcloudDocumentStore - json dump response body in test
* DeepsetCloudDocumentStore introduce label_index, EvaluationSetClient rename label_index to evaluation_set
* Update Documentation & Code Style
* DeepsetCloudDocumentStore rename evaluation_set to evaluation_set_response as there is a name clash with the input variable
* DeepsetCloudDocumentStore - rename missed variable in test
* DeepsetCloudDocumentStore - rename missed label_index to index in doc string, rename label_index to evaluation_set in EvaluationSetClient
* Update Documentation & Code Style
* DeepsetCloudDocumentStore - update docstrings for EvaluationSetClient
* DeepsetCloudDocumentStore - fix typo in doc string
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
---
docs/_src/api/api/document_store.md | 58 +++++-
haystack/document_stores/deepsetcloud.py | 41 +++-
.../haystack-pipeline-1.2.1rc0.schema.json | 5 +
.../haystack-pipeline-unstable.schema.json | 5 +
haystack/utils/deepsetcloud.py | 140 +++++++++++++
test/test_document_store.py | 186 ++++++++++++++++++
6 files changed, 431 insertions(+), 4 deletions(-)
diff --git a/docs/_src/api/api/document_store.md b/docs/_src/api/api/document_store.md
index ffbc18d8fd..0f8c4ee1cc 100644
--- a/docs/_src/api/api/document_store.md
+++ b/docs/_src/api/api/document_store.md
@@ -3938,7 +3938,7 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore)
#### \_\_init\_\_
```python
-def __init__(api_key: str = None, workspace: str = "default", index: str = "default", duplicate_documents: str = "overwrite", api_endpoint: Optional[str] = None, similarity: str = "dot_product", return_embedding: bool = False)
+def __init__(api_key: str = None, workspace: str = "default", index: str = "default", duplicate_documents: str = "overwrite", api_endpoint: Optional[str] = None, similarity: str = "dot_product", return_embedding: bool = False, label_index: str = "default")
```
A DocumentStore facade enabling you to interact with the documents stored in Deepset Cloud.
@@ -3964,6 +3964,7 @@ exists.
If not specified, will be read from DEEPSET_CLOUD_API_ENDPOINT environment variable.
- `similarity`: The similarity function used to compare document vectors. 'dot_product' is the default since it is
more performant with DPR embeddings. 'cosine' is recommended if you are using a Sentence BERT model.
+- `label_index`: index for the evaluation set interface
- `return_embedding`: To return document embedding.
@@ -4257,6 +4258,61 @@ exists.
None
+
+
+#### get\_evaluation\_sets
+
+```python
+def get_evaluation_sets() -> List[dict]
+```
+
+Returns a list of uploaded evaluation sets to deepset cloud.
+
+**Returns**:
+
+list of evaluation sets as dicts
+These contain ("name", "evaluation_set_id", "created_at", "matched_labels", "total_labels") as fields.
+
+
+
+#### get\_all\_labels
+
+```python
+def get_all_labels(index: Optional[str] = None, filters: Optional[Dict[str, Union[Dict, List, str, int, float, bool]]] = None, headers: Optional[Dict[str, str]] = None) -> List[Label]
+```
+
+Returns a list of labels for the given index name.
+
+**Arguments**:
+
+- `index`: Optional name of evaluation set for which labels should be searched.
+If None, the DocumentStore's default label_index (self.label_index) will be used.
+- `headers`: Not supported.
+
+**Returns**:
+
+list of Labels.
+
+
+
+#### get\_label\_count
+
+```python
+def get_label_count(index: Optional[str] = None, headers: Optional[Dict[str, str]] = None) -> int
+```
+
+Counts the number of labels for the given index and returns the value.
+
+**Arguments**:
+
+- `index`: Optional evaluation set name for which the labels should be counted.
+If None, the DocumentStore's default label_index (self.label_index) will be used.
+- `headers`: Not supported.
+
+**Returns**:
+
+number of labels for the given index
+
# Module pinecone
diff --git a/haystack/document_stores/deepsetcloud.py b/haystack/document_stores/deepsetcloud.py
index e6ccdafe4d..b7666816ae 100644
--- a/haystack/document_stores/deepsetcloud.py
+++ b/haystack/document_stores/deepsetcloud.py
@@ -24,6 +24,7 @@ def __init__(
api_endpoint: Optional[str] = None,
similarity: str = "dot_product",
return_embedding: bool = False,
+ label_index: str = "default",
):
"""
A DocumentStore facade enabling you to interact with the documents stored in Deepset Cloud.
@@ -46,11 +47,13 @@ def __init__(
If not specified, will be read from DEEPSET_CLOUD_API_ENDPOINT environment variable.
:param similarity: The similarity function used to compare document vectors. 'dot_product' is the default since it is
more performant with DPR embeddings. 'cosine' is recommended if you are using a Sentence BERT model.
+ :param label_index: index for the evaluation set interface
+
:param return_embedding: To return document embedding.
"""
self.index = index
- self.label_index = index
+ self.label_index = label_index
self.duplicate_documents = duplicate_documents
self.similarity = similarity
self.return_embedding = return_embedding
@@ -65,6 +68,10 @@ def __init__(
f"{indexing_info['pending_file_count']} files are pending to be indexed. Indexing status: {indexing_info['status']}"
)
+ self.evaluation_set_client = DeepsetCloud.get_evaluation_set_client(
+ api_key=api_key, api_endpoint=api_endpoint, workspace=workspace, evaluation_set=label_index
+ )
+
super().__init__()
def get_all_documents(
@@ -452,16 +459,44 @@ def write_documents(
"""
raise NotImplementedError("DeepsetCloudDocumentStore currently does not support writing documents.")
+ def get_evaluation_sets(self) -> List[dict]:
+ """
+ Returns a list of uploaded evaluation sets to deepset cloud.
+
+ :return: list of evaluation sets as dicts
+ These contain ("name", "evaluation_set_id", "created_at", "matched_labels", "total_labels") as fields.
+ """
+ return self.evaluation_set_client.get_evaluation_sets()
+
def get_all_labels(
self,
index: Optional[str] = None,
filters: Optional[Dict[str, Union[Dict, List, str, int, float, bool]]] = None,
headers: Optional[Dict[str, str]] = None,
) -> List[Label]:
- raise NotImplementedError("DeepsetCloudDocumentStore currently does not support labels.")
+ """
+ Returns a list of labels for the given index name.
+
+ :param index: Optional name of evaluation set for which labels should be searched.
+ If None, the DocumentStore's default label_index (self.label_index) will be used.
+ :filters: Not supported.
+ :param headers: Not supported.
+
+ :return: list of Labels.
+ """
+ return self.evaluation_set_client.get_labels(evaluation_set=index)
def get_label_count(self, index: Optional[str] = None, headers: Optional[Dict[str, str]] = None) -> int:
- raise NotImplementedError("DeepsetCloudDocumentStore currently does not support labels.")
+ """
+ Counts the number of labels for the given index and returns the value.
+
+ :param index: Optional evaluation set name for which the labels should be counted.
+ If None, the DocumentStore's default label_index (self.label_index) will be used.
+ :param headers: Not supported.
+
+ :return: number of labels for the given index
+ """
+ return self.evaluation_set_client.get_labels_count(evaluation_set=index)
def write_labels(
self,
diff --git a/haystack/json-schemas/haystack-pipeline-1.2.1rc0.schema.json b/haystack/json-schemas/haystack-pipeline-1.2.1rc0.schema.json
index 6f5d549a94..ba6f2d3e4a 100644
--- a/haystack/json-schemas/haystack-pipeline-1.2.1rc0.schema.json
+++ b/haystack/json-schemas/haystack-pipeline-1.2.1rc0.schema.json
@@ -293,6 +293,11 @@
"title": "Return Embedding",
"default": false,
"type": "boolean"
+ },
+ "label_index": {
+ "title": "Label Index",
+ "default": "default",
+ "type": "string"
}
},
"additionalProperties": false,
diff --git a/haystack/json-schemas/haystack-pipeline-unstable.schema.json b/haystack/json-schemas/haystack-pipeline-unstable.schema.json
index a8e6ae7e33..31517778fb 100644
--- a/haystack/json-schemas/haystack-pipeline-unstable.schema.json
+++ b/haystack/json-schemas/haystack-pipeline-unstable.schema.json
@@ -296,6 +296,11 @@
"title": "Return Embedding",
"default": false,
"type": "boolean"
+ },
+ "label_index": {
+ "title": "Label Index",
+ "default": "default",
+ "type": "string"
}
},
"additionalProperties": false,
diff --git a/haystack/utils/deepsetcloud.py b/haystack/utils/deepsetcloud.py
index 30d414204a..52f017b6eb 100644
--- a/haystack/utils/deepsetcloud.py
+++ b/haystack/utils/deepsetcloud.py
@@ -5,6 +5,8 @@
import time
from typing import Any, Dict, Generator, List, Optional, Tuple, Union
+from haystack.schema import Label, Document, Answer
+
try:
from typing import Literal
except ImportError:
@@ -637,6 +639,122 @@ def _build_workspace_url(self, workspace: Optional[str] = None):
return self.client.build_workspace_url(workspace)
+class EvaluationSetClient:
+ def __init__(
+ self, client: DeepsetCloudClient, workspace: Optional[str] = None, evaluation_set: Optional[str] = None
+ ):
+ """
+ A client to communicate with Deepset Cloud evaluation sets and labels.
+
+ :param client: Deepset Cloud client
+ :param workspace: workspace in Deepset Cloud
+ :param evaluation_set: name of the evaluation set to fall back to
+
+ """
+ self.client = client
+ self.workspace = workspace
+ self.evaluation_set = evaluation_set
+
+ def get_labels(self, evaluation_set: Optional[str], workspace: Optional[str] = None) -> List[Label]:
+ """
+ Searches for labels for a given evaluation set in deepset cloud. Returns a list of all found labels.
+ If no labels were found, raises DeepsetCloudError.
+
+ :param evaluation_set: name of the evaluation set for which labels should be fetched
+ :param workspace: Optional workspace in Deepset Cloud
+ If None, the EvaluationSetClient's default workspace (self.workspace) will be used.
+
+ :return: list of Label
+ """
+ try:
+ evaluation_sets_response = next(
+ self._get_evaluation_set(evaluation_set=evaluation_set, workspace=workspace)
+ )
+ except StopIteration:
+ raise DeepsetCloudError(f"No evaluation set found with the name {evaluation_set}")
+
+ labels = self._get_labels_from_evaluation_set(
+ workspace=workspace, evaluation_set_id=evaluation_sets_response["evaluation_set_id"]
+ )
+
+ return [
+ Label(
+ query=label_dict["query"],
+ document=Document(content=label_dict["context"]),
+ is_correct_answer=True,
+ is_correct_document=True,
+ origin="user-feedback",
+ answer=Answer(label_dict["answer"]),
+ id=label_dict["label_id"],
+ no_answer=False if label_dict.get("answer", None) else True,
+ pipeline_id=None,
+ created_at=None,
+ updated_at=None,
+ meta=label_dict["meta"],
+ filters={},
+ )
+ for label_dict in labels
+ ]
+
+ def get_labels_count(self, evaluation_set: Optional[str] = None, workspace: Optional[str] = None) -> int:
+ """
+ Counts labels for a given evaluation set in deepset cloud.
+
+ :param evaluation_set: Optional evaluation set in deepset Cloud
+ If None, the EvaluationSetClient's default evaluation set (self.evaluation_set) will be used.
+ :param workspace: Optional workspace in deepset Cloud
+ If None, the EvaluationSetClient's default workspace (self.workspace) will be used.
+
+ :return: Number of labels for the given (or defaulting) index
+ """
+ try:
+ evaluation_sets_response = next(
+ self._get_evaluation_set(evaluation_set=evaluation_set, workspace=workspace)
+ )
+ except StopIteration:
+ raise DeepsetCloudError(f"No evaluation set found with the name {evaluation_set}")
+
+ return evaluation_sets_response["total_labels"]
+
+ def get_evaluation_sets(self, workspace: Optional[str] = None) -> List[dict]:
+ """
+ Searches for all evaluation set names in the given workspace in Deepset Cloud.
+
+ :param workspace: Optional workspace in Deepset Cloud
+ If None, the EvaluationSetClient's default workspace (self.workspace) will be used.
+
+ :return: List of dictionaries that represent deepset Cloud evaluation sets.
+ These contain ("name", "evaluation_set_id", "created_at", "matched_labels", "total_labels") as fields.
+ """
+ evaluation_sets_response = self._get_evaluation_set(evaluation_set=None, workspace=workspace)
+
+ return [eval_set for eval_set in evaluation_sets_response]
+
+ def _get_evaluation_set(self, evaluation_set: Optional[str], workspace: Optional[str] = None) -> Generator:
+ if not evaluation_set:
+ evaluation_set = self.evaluation_set
+
+ url = self._build_workspace_url(workspace=workspace)
+ evaluation_set_url = f"{url}/evaluation_sets"
+
+ for response in self.client.get_with_auto_paging(url=evaluation_set_url, query_params={"name": evaluation_set}):
+ yield response
+
+ def _get_labels_from_evaluation_set(
+ self, workspace: Optional[str] = None, evaluation_set_id: Optional[str] = None
+ ) -> Generator:
+ url = f"{self._build_workspace_url(workspace=workspace)}/evaluation_sets/{evaluation_set_id}"
+ labels = self.client.get(url=url).json()
+
+ for label in labels:
+ yield label
+
+ def _build_workspace_url(self, workspace: Optional[str] = None):
+ if workspace is None:
+ workspace = self.workspace
+ return self.client.build_workspace_url(workspace)
+
+
class DeepsetCloud:
"""
A facade to communicate with Deepset Cloud.
@@ -685,3 +803,25 @@ def get_pipeline_client(
"""
client = DeepsetCloudClient(api_key=api_key, api_endpoint=api_endpoint)
return PipelineClient(client=client, workspace=workspace, pipeline_config_name=pipeline_config_name)
+
+ @classmethod
+ def get_evaluation_set_client(
+ cls,
+ api_key: Optional[str] = None,
+ api_endpoint: Optional[str] = None,
+ workspace: str = "default",
+ evaluation_set: str = "default",
+ ) -> EvaluationSetClient:
+ """
+ Creates a client to communicate with Deepset Cloud labels.
+
+ :param api_key: Secret value of the API key.
+ If not specified, will be read from DEEPSET_CLOUD_API_KEY environment variable.
+ :param api_endpoint: The URL of the Deepset Cloud API.
+ If not specified, will be read from DEEPSET_CLOUD_API_ENDPOINT environment variable.
+ :param workspace: workspace in Deepset Cloud
+ :param evaluation_set: name of the evaluation set in Deepset Cloud
+
+ """
+ client = DeepsetCloudClient(api_key=api_key, api_endpoint=api_endpoint)
+ return EvaluationSetClient(client=client, workspace=workspace, evaluation_set=evaluation_set)
diff --git a/test/test_document_store.py b/test/test_document_store.py
index 0b27c2e8db..016d5efcd7 100644
--- a/test/test_document_store.py
+++ b/test/test_document_store.py
@@ -1,3 +1,6 @@
+from typing import List
+from uuid import uuid4
+
import numpy as np
import pandas as pd
import pytest
@@ -1634,6 +1637,189 @@ def test_DeepsetCloudDocumentStore_query(deepset_cloud_document_store):
assert len(filtered_docs) < len(docs)
+@pytest.mark.parametrize(
+ "body, expected_count",
+ [
+ (
+ {
+ "data": [
+ {
+ "evaluation_set_id": str(uuid4()),
+ "name": DC_TEST_INDEX,
+ "created_at": "2022-03-22T13:40:27.535Z",
+ "matched_labels": 2,
+ "total_labels": 10,
+ }
+ ],
+ "has_more": False,
+ "total": 1,
+ },
+ 10,
+ ),
+ (
+ {
+ "data": [
+ {
+ "evaluation_set_id": str(uuid4()),
+ "name": DC_TEST_INDEX,
+ "created_at": "2022-03-22T13:40:27.535Z",
+ "matched_labels": 0,
+ "total_labels": 0,
+ }
+ ],
+ "has_more": False,
+ "total": 1,
+ },
+ 0,
+ ),
+ ],
+)
+@responses.activate
+def test_DeepsetCloudDocumentStore_count_of_labels_for_evaluation_set(
+ deepset_cloud_document_store, body: dict, expected_count: int
+):
+ if MOCK_DC:
+ responses.add(
+ method=responses.GET,
+ url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets",
+ status=200,
+ body=json.dumps(body),
+ )
+ else:
+ responses.add_passthru(DC_API_ENDPOINT)
+
+ count = deepset_cloud_document_store.get_label_count(index=DC_TEST_INDEX)
+ assert count == expected_count
+
+
+@responses.activate
+def test_DeepsetCloudDocumentStore_count_of_labels_for_evaluation_set_raises_DC_error_when_nothing_found(
+ deepset_cloud_document_store,
+):
+ if MOCK_DC:
+ responses.add(
+ method=responses.GET,
+ url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets",
+ status=200,
+ body=json.dumps({"data": [], "has_more": False, "total": 0}),
+ )
+ else:
+ responses.add_passthru(DC_API_ENDPOINT)
+
+ with pytest.raises(DeepsetCloudError, match=f"No evaluation set found with the name {DC_TEST_INDEX}"):
+ deepset_cloud_document_store.get_label_count(index=DC_TEST_INDEX)
+
+
+@responses.activate
+def test_DeepsetCloudDocumentStore_lists_evaluation_sets(deepset_cloud_document_store):
+ response_evaluation_set = {
+ "evaluation_set_id": str(uuid4()),
+ "name": DC_TEST_INDEX,
+ "created_at": "2022-03-22T13:40:27.535Z",
+ "matched_labels": 2,
+ "total_labels": 10,
+ }
+ if MOCK_DC:
+ responses.add(
+ method=responses.GET,
+ url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets",
+ status=200,
+ body=json.dumps({"data": [response_evaluation_set], "has_more": False, "total": 1}),
+ )
+ else:
+ responses.add_passthru(DC_API_ENDPOINT)
+
+ evaluation_sets = deepset_cloud_document_store.get_evaluation_sets()
+ assert evaluation_sets == [response_evaluation_set]
+
+
+@responses.activate
+def test_DeepsetCloudDocumentStore_fetches_labels_for_evaluation_set(deepset_cloud_document_store):
+ if MOCK_DC:
+ eval_set_id = uuid4()
+ responses.add(
+ method=responses.GET,
+ url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets?name={DC_TEST_INDEX}&page_number=1",
+ status=200,
+ body=json.dumps(
+ {
+ "data": [
+ {
+ "evaluation_set_id": str(eval_set_id),
+ "name": DC_TEST_INDEX,
+ "created_at": "2022-03-22T13:40:27.535Z",
+ "matched_labels": 1,
+ "total_labels": 1,
+ }
+ ],
+ "has_more": False,
+ "total": 1,
+ }
+ ),
+ )
+ responses.add(
+ method=responses.GET,
+ url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets/{eval_set_id}",
+ status=200,
+ body=json.dumps(
+ [
+ {
+ "label_id": "3fa85f64-5717-4562-b3fc-2c963f66afa6",
+ "query": "What is berlin?",
+ "answer": "biggest city in germany",
+ "answer_start": 0,
+ "answer_end": 0,
+ "meta": {},
+ "context": "Berlin is the biggest city in germany.",
+ "external_file_name": "string",
+ "file_id": "3fa85f64-5717-4562-b3fc-2c963f66afa6",
+ "state": "Label matching status",
+ "candidates": "Candidates that were found in the label <-> file matching",
+ }
+ ]
+ ),
+ )
+ else:
+ responses.add_passthru(DC_API_ENDPOINT)
+
+ labels = deepset_cloud_document_store.get_all_labels(index=DC_TEST_INDEX)
+ assert labels == [
+ Label(
+ query="What is berlin?",
+ document=Document(content="Berlin is the biggest city in germany."),
+ is_correct_answer=True,
+ is_correct_document=True,
+ origin="user-feedback",
+ answer=Answer("biggest city in germany"),
+ id="3fa85f64-5717-4562-b3fc-2c963f66afa6",
+ no_answer=False,
+ pipeline_id=None,
+ created_at=None,
+ updated_at=None,
+ meta={},
+ filters={},
+ )
+ ]
+
+
+@responses.activate
+def test_DeepsetCloudDocumentStore_fetches_lables_for_evaluation_set_raises_deepsetclouderror_when_nothing_found(
+ deepset_cloud_document_store,
+):
+ if MOCK_DC:
+ responses.add(
+ method=responses.GET,
+ url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets",
+ status=200,
+ body=json.dumps({"data": [], "has_more": False, "total": 0}),
+ )
+ else:
+ responses.add_passthru(DC_API_ENDPOINT)
+
+ with pytest.raises(DeepsetCloudError, match=f"No evaluation set found with the name {DC_TEST_INDEX}"):
+ deepset_cloud_document_store.get_all_labels(index=DC_TEST_INDEX)
+
+
@responses.activate
def test_DeepsetCloudDocumentStore_query_by_embedding(deepset_cloud_document_store):
query_emb = np.random.randn(768)