From a273c3a51dd432bd125e5b35df4be94260a2cdb7 Mon Sep 17 00:00:00 2001 From: Florian Hardow <10029805+FHardow@users.noreply.github.com> Date: Thu, 31 Mar 2022 08:59:58 +0200 Subject: [PATCH] =?UTF-8?q?EvaluationSetClient=20for=20deepset=20cloud=20t?= =?UTF-8?q?o=20fetch=20evaluation=20sets=20and=20la=E2=80=A6=20(#2345)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * EvaluationSetClient for deepset cloud to fetch evaluation sets and labels for one specific evaluation set * make DeepsetCloudDocumentStore able to fetch uploaded evaluation set names * fix missing renaming of get_evaluation_set_names in DeepsetCloudDocumentStore * update documentation for evaluation set functionality in deepset cloud document store * DeepsetCloudDocumentStore tests for evaluation set functionality * rename index to evaluation_set_name for DeepsetCloudDocumentStore evaluation set functionality * raise DeepsetCloudError when no labels were found for evaluation set * make use of .get_with_auto_paging in EvaluationSetClient * Return result of get_with_auto_paging() as it parses the response already * Make schema import source more specific * fetch all evaluation sets for a workspace in deepset Cloud * Rename evaluation_set_name to label_index * make use of generator functionality for fetching labels * Update Documentation & Code Style * Adjust function input for DeepsetCloudDocumentStore.get_all_labels, adjust tests for it, fix typos, make linter happy * Match error message with pytest.raises * Update Documentation & Code Style * DeepsetCloudDocumentStore.get_labels_count raises DeepsetCloudError when no evaluation set was found to count labels on * remove unneeded import in tests * DeepsetCloudDocumentStore tests, make reponse bodies a string through json.dumps * DeepsetcloudDocumentStore.get_label_count - move raise to return * stringify uuid before json.dump as uuid is not serilizable * DeepsetcloudDocumentStore - adjust response mocking in tests * DeepsetcloudDocumentStore - json dump response body in test * DeepsetCloudDocumentStore introduce label_index, EvaluationSetClient rename label_index to evaluation_set * Update Documentation & Code Style * DeepsetCloudDocumentStore rename evaluation_set to evaluation_set_response as there is a name clash with the input variable * DeepsetCloudDocumentStore - rename missed variable in test * DeepsetCloudDocumentStore - rename missed label_index to index in doc string, rename label_index to evaluation_set in EvaluationSetClient * Update Documentation & Code Style * DeepsetCloudDocumentStore - update docstrings for EvaluationSetClient * DeepsetCloudDocumentStore - fix typo in doc string Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- docs/_src/api/api/document_store.md | 58 +++++- haystack/document_stores/deepsetcloud.py | 41 +++- .../haystack-pipeline-1.2.1rc0.schema.json | 5 + .../haystack-pipeline-unstable.schema.json | 5 + haystack/utils/deepsetcloud.py | 140 +++++++++++++ test/test_document_store.py | 186 ++++++++++++++++++ 6 files changed, 431 insertions(+), 4 deletions(-) diff --git a/docs/_src/api/api/document_store.md b/docs/_src/api/api/document_store.md index ffbc18d8fd..0f8c4ee1cc 100644 --- a/docs/_src/api/api/document_store.md +++ b/docs/_src/api/api/document_store.md @@ -3938,7 +3938,7 @@ class DeepsetCloudDocumentStore(KeywordDocumentStore) #### \_\_init\_\_ ```python -def __init__(api_key: str = None, workspace: str = "default", index: str = "default", duplicate_documents: str = "overwrite", api_endpoint: Optional[str] = None, similarity: str = "dot_product", return_embedding: bool = False) +def __init__(api_key: str = None, workspace: str = "default", index: str = "default", duplicate_documents: str = "overwrite", api_endpoint: Optional[str] = None, similarity: str = "dot_product", return_embedding: bool = False, label_index: str = "default") ``` A DocumentStore facade enabling you to interact with the documents stored in Deepset Cloud. @@ -3964,6 +3964,7 @@ exists. If not specified, will be read from DEEPSET_CLOUD_API_ENDPOINT environment variable. - `similarity`: The similarity function used to compare document vectors. 'dot_product' is the default since it is more performant with DPR embeddings. 'cosine' is recommended if you are using a Sentence BERT model. +- `label_index`: index for the evaluation set interface - `return_embedding`: To return document embedding. @@ -4257,6 +4258,61 @@ exists. None + + +#### get\_evaluation\_sets + +```python +def get_evaluation_sets() -> List[dict] +``` + +Returns a list of uploaded evaluation sets to deepset cloud. + +**Returns**: + +list of evaluation sets as dicts +These contain ("name", "evaluation_set_id", "created_at", "matched_labels", "total_labels") as fields. + + + +#### get\_all\_labels + +```python +def get_all_labels(index: Optional[str] = None, filters: Optional[Dict[str, Union[Dict, List, str, int, float, bool]]] = None, headers: Optional[Dict[str, str]] = None) -> List[Label] +``` + +Returns a list of labels for the given index name. + +**Arguments**: + +- `index`: Optional name of evaluation set for which labels should be searched. +If None, the DocumentStore's default label_index (self.label_index) will be used. +- `headers`: Not supported. + +**Returns**: + +list of Labels. + + + +#### get\_label\_count + +```python +def get_label_count(index: Optional[str] = None, headers: Optional[Dict[str, str]] = None) -> int +``` + +Counts the number of labels for the given index and returns the value. + +**Arguments**: + +- `index`: Optional evaluation set name for which the labels should be counted. +If None, the DocumentStore's default label_index (self.label_index) will be used. +- `headers`: Not supported. + +**Returns**: + +number of labels for the given index + # Module pinecone diff --git a/haystack/document_stores/deepsetcloud.py b/haystack/document_stores/deepsetcloud.py index e6ccdafe4d..b7666816ae 100644 --- a/haystack/document_stores/deepsetcloud.py +++ b/haystack/document_stores/deepsetcloud.py @@ -24,6 +24,7 @@ def __init__( api_endpoint: Optional[str] = None, similarity: str = "dot_product", return_embedding: bool = False, + label_index: str = "default", ): """ A DocumentStore facade enabling you to interact with the documents stored in Deepset Cloud. @@ -46,11 +47,13 @@ def __init__( If not specified, will be read from DEEPSET_CLOUD_API_ENDPOINT environment variable. :param similarity: The similarity function used to compare document vectors. 'dot_product' is the default since it is more performant with DPR embeddings. 'cosine' is recommended if you are using a Sentence BERT model. + :param label_index: index for the evaluation set interface + :param return_embedding: To return document embedding. """ self.index = index - self.label_index = index + self.label_index = label_index self.duplicate_documents = duplicate_documents self.similarity = similarity self.return_embedding = return_embedding @@ -65,6 +68,10 @@ def __init__( f"{indexing_info['pending_file_count']} files are pending to be indexed. Indexing status: {indexing_info['status']}" ) + self.evaluation_set_client = DeepsetCloud.get_evaluation_set_client( + api_key=api_key, api_endpoint=api_endpoint, workspace=workspace, evaluation_set=label_index + ) + super().__init__() def get_all_documents( @@ -452,16 +459,44 @@ def write_documents( """ raise NotImplementedError("DeepsetCloudDocumentStore currently does not support writing documents.") + def get_evaluation_sets(self) -> List[dict]: + """ + Returns a list of uploaded evaluation sets to deepset cloud. + + :return: list of evaluation sets as dicts + These contain ("name", "evaluation_set_id", "created_at", "matched_labels", "total_labels") as fields. + """ + return self.evaluation_set_client.get_evaluation_sets() + def get_all_labels( self, index: Optional[str] = None, filters: Optional[Dict[str, Union[Dict, List, str, int, float, bool]]] = None, headers: Optional[Dict[str, str]] = None, ) -> List[Label]: - raise NotImplementedError("DeepsetCloudDocumentStore currently does not support labels.") + """ + Returns a list of labels for the given index name. + + :param index: Optional name of evaluation set for which labels should be searched. + If None, the DocumentStore's default label_index (self.label_index) will be used. + :filters: Not supported. + :param headers: Not supported. + + :return: list of Labels. + """ + return self.evaluation_set_client.get_labels(evaluation_set=index) def get_label_count(self, index: Optional[str] = None, headers: Optional[Dict[str, str]] = None) -> int: - raise NotImplementedError("DeepsetCloudDocumentStore currently does not support labels.") + """ + Counts the number of labels for the given index and returns the value. + + :param index: Optional evaluation set name for which the labels should be counted. + If None, the DocumentStore's default label_index (self.label_index) will be used. + :param headers: Not supported. + + :return: number of labels for the given index + """ + return self.evaluation_set_client.get_labels_count(evaluation_set=index) def write_labels( self, diff --git a/haystack/json-schemas/haystack-pipeline-1.2.1rc0.schema.json b/haystack/json-schemas/haystack-pipeline-1.2.1rc0.schema.json index 6f5d549a94..ba6f2d3e4a 100644 --- a/haystack/json-schemas/haystack-pipeline-1.2.1rc0.schema.json +++ b/haystack/json-schemas/haystack-pipeline-1.2.1rc0.schema.json @@ -293,6 +293,11 @@ "title": "Return Embedding", "default": false, "type": "boolean" + }, + "label_index": { + "title": "Label Index", + "default": "default", + "type": "string" } }, "additionalProperties": false, diff --git a/haystack/json-schemas/haystack-pipeline-unstable.schema.json b/haystack/json-schemas/haystack-pipeline-unstable.schema.json index a8e6ae7e33..31517778fb 100644 --- a/haystack/json-schemas/haystack-pipeline-unstable.schema.json +++ b/haystack/json-schemas/haystack-pipeline-unstable.schema.json @@ -296,6 +296,11 @@ "title": "Return Embedding", "default": false, "type": "boolean" + }, + "label_index": { + "title": "Label Index", + "default": "default", + "type": "string" } }, "additionalProperties": false, diff --git a/haystack/utils/deepsetcloud.py b/haystack/utils/deepsetcloud.py index 30d414204a..52f017b6eb 100644 --- a/haystack/utils/deepsetcloud.py +++ b/haystack/utils/deepsetcloud.py @@ -5,6 +5,8 @@ import time from typing import Any, Dict, Generator, List, Optional, Tuple, Union +from haystack.schema import Label, Document, Answer + try: from typing import Literal except ImportError: @@ -637,6 +639,122 @@ def _build_workspace_url(self, workspace: Optional[str] = None): return self.client.build_workspace_url(workspace) +class EvaluationSetClient: + def __init__( + self, client: DeepsetCloudClient, workspace: Optional[str] = None, evaluation_set: Optional[str] = None + ): + """ + A client to communicate with Deepset Cloud evaluation sets and labels. + + :param client: Deepset Cloud client + :param workspace: workspace in Deepset Cloud + :param evaluation_set: name of the evaluation set to fall back to + + """ + self.client = client + self.workspace = workspace + self.evaluation_set = evaluation_set + + def get_labels(self, evaluation_set: Optional[str], workspace: Optional[str] = None) -> List[Label]: + """ + Searches for labels for a given evaluation set in deepset cloud. Returns a list of all found labels. + If no labels were found, raises DeepsetCloudError. + + :param evaluation_set: name of the evaluation set for which labels should be fetched + :param workspace: Optional workspace in Deepset Cloud + If None, the EvaluationSetClient's default workspace (self.workspace) will be used. + + :return: list of Label + """ + try: + evaluation_sets_response = next( + self._get_evaluation_set(evaluation_set=evaluation_set, workspace=workspace) + ) + except StopIteration: + raise DeepsetCloudError(f"No evaluation set found with the name {evaluation_set}") + + labels = self._get_labels_from_evaluation_set( + workspace=workspace, evaluation_set_id=evaluation_sets_response["evaluation_set_id"] + ) + + return [ + Label( + query=label_dict["query"], + document=Document(content=label_dict["context"]), + is_correct_answer=True, + is_correct_document=True, + origin="user-feedback", + answer=Answer(label_dict["answer"]), + id=label_dict["label_id"], + no_answer=False if label_dict.get("answer", None) else True, + pipeline_id=None, + created_at=None, + updated_at=None, + meta=label_dict["meta"], + filters={}, + ) + for label_dict in labels + ] + + def get_labels_count(self, evaluation_set: Optional[str] = None, workspace: Optional[str] = None) -> int: + """ + Counts labels for a given evaluation set in deepset cloud. + + :param evaluation_set: Optional evaluation set in deepset Cloud + If None, the EvaluationSetClient's default evaluation set (self.evaluation_set) will be used. + :param workspace: Optional workspace in deepset Cloud + If None, the EvaluationSetClient's default workspace (self.workspace) will be used. + + :return: Number of labels for the given (or defaulting) index + """ + try: + evaluation_sets_response = next( + self._get_evaluation_set(evaluation_set=evaluation_set, workspace=workspace) + ) + except StopIteration: + raise DeepsetCloudError(f"No evaluation set found with the name {evaluation_set}") + + return evaluation_sets_response["total_labels"] + + def get_evaluation_sets(self, workspace: Optional[str] = None) -> List[dict]: + """ + Searches for all evaluation set names in the given workspace in Deepset Cloud. + + :param workspace: Optional workspace in Deepset Cloud + If None, the EvaluationSetClient's default workspace (self.workspace) will be used. + + :return: List of dictionaries that represent deepset Cloud evaluation sets. + These contain ("name", "evaluation_set_id", "created_at", "matched_labels", "total_labels") as fields. + """ + evaluation_sets_response = self._get_evaluation_set(evaluation_set=None, workspace=workspace) + + return [eval_set for eval_set in evaluation_sets_response] + + def _get_evaluation_set(self, evaluation_set: Optional[str], workspace: Optional[str] = None) -> Generator: + if not evaluation_set: + evaluation_set = self.evaluation_set + + url = self._build_workspace_url(workspace=workspace) + evaluation_set_url = f"{url}/evaluation_sets" + + for response in self.client.get_with_auto_paging(url=evaluation_set_url, query_params={"name": evaluation_set}): + yield response + + def _get_labels_from_evaluation_set( + self, workspace: Optional[str] = None, evaluation_set_id: Optional[str] = None + ) -> Generator: + url = f"{self._build_workspace_url(workspace=workspace)}/evaluation_sets/{evaluation_set_id}" + labels = self.client.get(url=url).json() + + for label in labels: + yield label + + def _build_workspace_url(self, workspace: Optional[str] = None): + if workspace is None: + workspace = self.workspace + return self.client.build_workspace_url(workspace) + + class DeepsetCloud: """ A facade to communicate with Deepset Cloud. @@ -685,3 +803,25 @@ def get_pipeline_client( """ client = DeepsetCloudClient(api_key=api_key, api_endpoint=api_endpoint) return PipelineClient(client=client, workspace=workspace, pipeline_config_name=pipeline_config_name) + + @classmethod + def get_evaluation_set_client( + cls, + api_key: Optional[str] = None, + api_endpoint: Optional[str] = None, + workspace: str = "default", + evaluation_set: str = "default", + ) -> EvaluationSetClient: + """ + Creates a client to communicate with Deepset Cloud labels. + + :param api_key: Secret value of the API key. + If not specified, will be read from DEEPSET_CLOUD_API_KEY environment variable. + :param api_endpoint: The URL of the Deepset Cloud API. + If not specified, will be read from DEEPSET_CLOUD_API_ENDPOINT environment variable. + :param workspace: workspace in Deepset Cloud + :param evaluation_set: name of the evaluation set in Deepset Cloud + + """ + client = DeepsetCloudClient(api_key=api_key, api_endpoint=api_endpoint) + return EvaluationSetClient(client=client, workspace=workspace, evaluation_set=evaluation_set) diff --git a/test/test_document_store.py b/test/test_document_store.py index 0b27c2e8db..016d5efcd7 100644 --- a/test/test_document_store.py +++ b/test/test_document_store.py @@ -1,3 +1,6 @@ +from typing import List +from uuid import uuid4 + import numpy as np import pandas as pd import pytest @@ -1634,6 +1637,189 @@ def test_DeepsetCloudDocumentStore_query(deepset_cloud_document_store): assert len(filtered_docs) < len(docs) +@pytest.mark.parametrize( + "body, expected_count", + [ + ( + { + "data": [ + { + "evaluation_set_id": str(uuid4()), + "name": DC_TEST_INDEX, + "created_at": "2022-03-22T13:40:27.535Z", + "matched_labels": 2, + "total_labels": 10, + } + ], + "has_more": False, + "total": 1, + }, + 10, + ), + ( + { + "data": [ + { + "evaluation_set_id": str(uuid4()), + "name": DC_TEST_INDEX, + "created_at": "2022-03-22T13:40:27.535Z", + "matched_labels": 0, + "total_labels": 0, + } + ], + "has_more": False, + "total": 1, + }, + 0, + ), + ], +) +@responses.activate +def test_DeepsetCloudDocumentStore_count_of_labels_for_evaluation_set( + deepset_cloud_document_store, body: dict, expected_count: int +): + if MOCK_DC: + responses.add( + method=responses.GET, + url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets", + status=200, + body=json.dumps(body), + ) + else: + responses.add_passthru(DC_API_ENDPOINT) + + count = deepset_cloud_document_store.get_label_count(index=DC_TEST_INDEX) + assert count == expected_count + + +@responses.activate +def test_DeepsetCloudDocumentStore_count_of_labels_for_evaluation_set_raises_DC_error_when_nothing_found( + deepset_cloud_document_store, +): + if MOCK_DC: + responses.add( + method=responses.GET, + url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets", + status=200, + body=json.dumps({"data": [], "has_more": False, "total": 0}), + ) + else: + responses.add_passthru(DC_API_ENDPOINT) + + with pytest.raises(DeepsetCloudError, match=f"No evaluation set found with the name {DC_TEST_INDEX}"): + deepset_cloud_document_store.get_label_count(index=DC_TEST_INDEX) + + +@responses.activate +def test_DeepsetCloudDocumentStore_lists_evaluation_sets(deepset_cloud_document_store): + response_evaluation_set = { + "evaluation_set_id": str(uuid4()), + "name": DC_TEST_INDEX, + "created_at": "2022-03-22T13:40:27.535Z", + "matched_labels": 2, + "total_labels": 10, + } + if MOCK_DC: + responses.add( + method=responses.GET, + url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets", + status=200, + body=json.dumps({"data": [response_evaluation_set], "has_more": False, "total": 1}), + ) + else: + responses.add_passthru(DC_API_ENDPOINT) + + evaluation_sets = deepset_cloud_document_store.get_evaluation_sets() + assert evaluation_sets == [response_evaluation_set] + + +@responses.activate +def test_DeepsetCloudDocumentStore_fetches_labels_for_evaluation_set(deepset_cloud_document_store): + if MOCK_DC: + eval_set_id = uuid4() + responses.add( + method=responses.GET, + url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets?name={DC_TEST_INDEX}&page_number=1", + status=200, + body=json.dumps( + { + "data": [ + { + "evaluation_set_id": str(eval_set_id), + "name": DC_TEST_INDEX, + "created_at": "2022-03-22T13:40:27.535Z", + "matched_labels": 1, + "total_labels": 1, + } + ], + "has_more": False, + "total": 1, + } + ), + ) + responses.add( + method=responses.GET, + url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets/{eval_set_id}", + status=200, + body=json.dumps( + [ + { + "label_id": "3fa85f64-5717-4562-b3fc-2c963f66afa6", + "query": "What is berlin?", + "answer": "biggest city in germany", + "answer_start": 0, + "answer_end": 0, + "meta": {}, + "context": "Berlin is the biggest city in germany.", + "external_file_name": "string", + "file_id": "3fa85f64-5717-4562-b3fc-2c963f66afa6", + "state": "Label matching status", + "candidates": "Candidates that were found in the label <-> file matching", + } + ] + ), + ) + else: + responses.add_passthru(DC_API_ENDPOINT) + + labels = deepset_cloud_document_store.get_all_labels(index=DC_TEST_INDEX) + assert labels == [ + Label( + query="What is berlin?", + document=Document(content="Berlin is the biggest city in germany."), + is_correct_answer=True, + is_correct_document=True, + origin="user-feedback", + answer=Answer("biggest city in germany"), + id="3fa85f64-5717-4562-b3fc-2c963f66afa6", + no_answer=False, + pipeline_id=None, + created_at=None, + updated_at=None, + meta={}, + filters={}, + ) + ] + + +@responses.activate +def test_DeepsetCloudDocumentStore_fetches_lables_for_evaluation_set_raises_deepsetclouderror_when_nothing_found( + deepset_cloud_document_store, +): + if MOCK_DC: + responses.add( + method=responses.GET, + url=f"{DC_API_ENDPOINT}/workspaces/default/evaluation_sets", + status=200, + body=json.dumps({"data": [], "has_more": False, "total": 0}), + ) + else: + responses.add_passthru(DC_API_ENDPOINT) + + with pytest.raises(DeepsetCloudError, match=f"No evaluation set found with the name {DC_TEST_INDEX}"): + deepset_cloud_document_store.get_all_labels(index=DC_TEST_INDEX) + + @responses.activate def test_DeepsetCloudDocumentStore_query_by_embedding(deepset_cloud_document_store): query_emb = np.random.randn(768)