From 72ed21c2aa0c09f9995abc3f7a48ceb6891f37a6 Mon Sep 17 00:00:00 2001 From: anakin87 <44616784+anakin87@users.noreply.github.com> Date: Wed, 21 Dec 2022 22:10:26 +0100 Subject: [PATCH 1/2] remove deprecated parameters --- haystack/nodes/summarizer/base.py | 21 +++++---------- haystack/nodes/summarizer/transformers.py | 31 ++--------------------- test/conftest.py | 7 ++--- test/nodes/test_summarizer.py | 2 +- 4 files changed, 11 insertions(+), 50 deletions(-) diff --git a/haystack/nodes/summarizer/base.py b/haystack/nodes/summarizer/base.py index 17198cdadd..917f1723ea 100644 --- a/haystack/nodes/summarizer/base.py +++ b/haystack/nodes/summarizer/base.py @@ -14,43 +14,34 @@ class BaseSummarizer(BaseComponent): outgoing_edges = 1 @abstractmethod - def predict(self, documents: List[Document], generate_single_summary: Optional[bool] = None) -> List[Document]: + def predict(self, documents: List[Document]) -> List[Document]: """ Abstract method for creating a summary. :param documents: Related documents (e.g. coming from a retriever) that the answer shall be conditioned on. - :param generate_single_summary: This parameter is deprecated and will be removed in Haystack 1.12 :return: List of Documents, where Document.meta["summary"] contains the summarization """ pass @abstractmethod def predict_batch( - self, - documents: Union[List[Document], List[List[Document]]], - generate_single_summary: Optional[bool] = None, - batch_size: Optional[int] = None, + self, documents: Union[List[Document], List[List[Document]]], batch_size: Optional[int] = None ) -> Union[List[Document], List[List[Document]]]: pass - def run(self, documents: List[Document], generate_single_summary: Optional[bool] = None): # type: ignore + def run(self, documents: List[Document]): # type: ignore results: Dict = {"documents": []} if documents: - results["documents"] = self.predict(documents=documents, generate_single_summary=generate_single_summary) + results["documents"] = self.predict(documents=documents) return results, "output_1" def run_batch( # type: ignore - self, - documents: Union[List[Document], List[List[Document]]], - generate_single_summary: Optional[bool] = None, - batch_size: Optional[int] = None, + self, documents: Union[List[Document], List[List[Document]]], batch_size: Optional[int] = None ): - results = self.predict_batch( - documents=documents, batch_size=batch_size, generate_single_summary=generate_single_summary - ) + results = self.predict_batch(documents=documents, batch_size=batch_size) return {"documents": results}, "output_1" diff --git a/haystack/nodes/summarizer/transformers.py b/haystack/nodes/summarizer/transformers.py index ab6bc45456..faeede8bd5 100644 --- a/haystack/nodes/summarizer/transformers.py +++ b/haystack/nodes/summarizer/transformers.py @@ -60,8 +60,6 @@ def __init__( min_length: int = 5, use_gpu: bool = True, clean_up_tokenization_spaces: bool = True, - separator_for_single_summary: str = " ", - generate_single_summary: bool = False, batch_size: int = 16, progress_bar: bool = True, use_auth_token: Optional[Union[str, bool]] = None, @@ -81,9 +79,6 @@ def __init__( :param min_length: Minimum length of summarized text :param use_gpu: Whether to use GPU (if available). :param clean_up_tokenization_spaces: Whether or not to clean up the potential extra spaces in the text output - :param separator_for_single_summary: This parameter is deprecated and will be removed in Haystack 1.12 - :param generate_single_summary: This parameter is deprecated and will be removed in Haystack 1.12. - To obtain single summaries from multiple documents, consider using the [DocumentMerger](https://docs.haystack.deepset.ai/reference/other-api#module-document_merger). :param batch_size: Number of documents to process at a time. :param progress_bar: Whether to show a progress bar. :param use_auth_token: The API token used to download private models from Huggingface. @@ -98,11 +93,6 @@ def __init__( """ super().__init__() - if generate_single_summary is True: - raise ValueError( - "'generate_single_summary' has been removed. Instead, you can use the Document Merger to merge documents before applying the Summarizer." - ) - self.devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False) if len(self.devices) > 1: logger.warning( @@ -128,21 +118,14 @@ def __init__( self.batch_size = batch_size self.progress_bar = progress_bar - def predict(self, documents: List[Document], generate_single_summary: Optional[bool] = None) -> List[Document]: + def predict(self, documents: List[Document]) -> List[Document]: """ Produce the summarization from the supplied documents. These document can for example be retrieved via the Retriever. :param documents: Related documents (e.g. coming from a retriever) that the answer shall be conditioned on. - :param generate_single_summary: This parameter is deprecated and will be removed in Haystack 1.12. - To obtain single summaries from multiple documents, consider using the [DocumentMerger](https://docs.haystack.deepset.ai/docs/document_merger). :return: List of Documents, where Document.meta["summary"] contains the summarization """ - if generate_single_summary is True: - raise ValueError( - "'generate_single_summary' has been removed. Instead, you can use the Document Merger to merge documents before applying the Summarizer." - ) - if self.min_length > self.max_length: raise AttributeError("min_length cannot be greater than max_length") @@ -183,10 +166,7 @@ def predict(self, documents: List[Document], generate_single_summary: Optional[b return result def predict_batch( - self, - documents: Union[List[Document], List[List[Document]]], - generate_single_summary: Optional[bool] = None, - batch_size: Optional[int] = None, + self, documents: Union[List[Document], List[List[Document]]], batch_size: Optional[int] = None ) -> Union[List[Document], List[List[Document]]]: """ Produce the summarization from the supplied documents. @@ -194,15 +174,8 @@ def predict_batch( :param documents: Single list of related documents or list of lists of related documents (e.g. coming from a retriever) that the answer shall be conditioned on. - :param generate_single_summary: This parameter is deprecated and will be removed in Haystack 1.12. - To obtain single summaries from multiple documents, consider using the [DocumentMerger](https://docs.haystack.deepset.ai/docs/document_merger). :param batch_size: Number of Documents to process at a time. """ - if generate_single_summary is True: - raise ValueError( - "'generate_single_summary' has been removed. Instead, you can use the Document Merger to merge documents before applying the Summarizer." - ) - if self.min_length > self.max_length: raise AttributeError("min_length cannot be greater than max_length") diff --git a/test/conftest.py b/test/conftest.py index 253869483a..59541d651e 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -263,14 +263,11 @@ def predict(self, query: str, documents: List[Document], top_k: Optional[int]) - class MockSummarizer(BaseSummarizer): def predict_batch( - self, - documents: Union[List[Document], List[List[Document]]], - generate_single_summary: Optional[bool] = None, - batch_size: Optional[int] = None, + self, documents: Union[List[Document], List[List[Document]]], batch_size: Optional[int] = None ) -> Union[List[Document], List[List[Document]]]: pass - def predict(self, documents: List[Document], generate_single_summary: Optional[bool] = None) -> List[Document]: + def predict(self, documents: List[Document]) -> List[Document]: pass diff --git a/test/nodes/test_summarizer.py b/test/nodes/test_summarizer.py index 3a41736709..d68a3b6950 100644 --- a/test/nodes/test_summarizer.py +++ b/test/nodes/test_summarizer.py @@ -96,7 +96,7 @@ def test_summarization_pipeline(document_store, retriever, summarizer): @fail_in_v1_12 def test_generate_single_summary_deprecated(): summarizer = TransformersSummarizer(model_name_or_path="hf-internal-testing/tiny-random-bart", use_gpu=False) - with pytest.raises(ValueError): + with pytest.raises(TypeError): summarizer.predict([Document(content="irrelevant")], generate_single_summary=True) From 3b215ab6b99b8d915b2a026ea7c561b9c408ec13 Mon Sep 17 00:00:00 2001 From: anakin87 <44616784+anakin87@users.noreply.github.com> Date: Wed, 21 Dec 2022 22:22:12 +0100 Subject: [PATCH 2/2] remove deprecation/removal test --- test/nodes/test_summarizer.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/test/nodes/test_summarizer.py b/test/nodes/test_summarizer.py index d68a3b6950..f4874b0a54 100644 --- a/test/nodes/test_summarizer.py +++ b/test/nodes/test_summarizer.py @@ -86,20 +86,6 @@ def test_summarization_pipeline(document_store, retriever, summarizer): assert " The Eiffel Tower in Paris has officially opened its doors to the public." == answers[0]["answer"] -haystack_version = tuple(int(num) for num in haystack.__version__.split(".")[:2]) -fail_in_v1_12 = pytest.mark.xfail( - haystack_version >= (1, 12), - reason="'generate_single_summary' should be removed in v1.12, as it was deprecated in v1.10", -) - - -@fail_in_v1_12 -def test_generate_single_summary_deprecated(): - summarizer = TransformersSummarizer(model_name_or_path="hf-internal-testing/tiny-random-bart", use_gpu=False) - with pytest.raises(TypeError): - summarizer.predict([Document(content="irrelevant")], generate_single_summary=True) - - # # Document Merger + Summarizer tests #