Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor: remove deprecated parameters from Summarizer #3740

Merged
merged 2 commits into from
Dec 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 6 additions & 15 deletions haystack/nodes/summarizer/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,43 +14,34 @@ class BaseSummarizer(BaseComponent):
outgoing_edges = 1

@abstractmethod
def predict(self, documents: List[Document], generate_single_summary: Optional[bool] = None) -> List[Document]:
def predict(self, documents: List[Document]) -> List[Document]:
"""
Abstract method for creating a summary.

:param documents: Related documents (e.g. coming from a retriever) that the answer shall be conditioned on.
:param generate_single_summary: This parameter is deprecated and will be removed in Haystack 1.12
:return: List of Documents, where Document.meta["summary"] contains the summarization
"""
pass

@abstractmethod
def predict_batch(
self,
documents: Union[List[Document], List[List[Document]]],
generate_single_summary: Optional[bool] = None,
batch_size: Optional[int] = None,
self, documents: Union[List[Document], List[List[Document]]], batch_size: Optional[int] = None
) -> Union[List[Document], List[List[Document]]]:
pass

def run(self, documents: List[Document], generate_single_summary: Optional[bool] = None): # type: ignore
def run(self, documents: List[Document]): # type: ignore

results: Dict = {"documents": []}

if documents:
results["documents"] = self.predict(documents=documents, generate_single_summary=generate_single_summary)
results["documents"] = self.predict(documents=documents)

return results, "output_1"

def run_batch( # type: ignore
self,
documents: Union[List[Document], List[List[Document]]],
generate_single_summary: Optional[bool] = None,
batch_size: Optional[int] = None,
self, documents: Union[List[Document], List[List[Document]]], batch_size: Optional[int] = None
):

results = self.predict_batch(
documents=documents, batch_size=batch_size, generate_single_summary=generate_single_summary
)
results = self.predict_batch(documents=documents, batch_size=batch_size)

return {"documents": results}, "output_1"
31 changes: 2 additions & 29 deletions haystack/nodes/summarizer/transformers.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,8 +60,6 @@ def __init__(
min_length: int = 5,
use_gpu: bool = True,
clean_up_tokenization_spaces: bool = True,
separator_for_single_summary: str = " ",
generate_single_summary: bool = False,
batch_size: int = 16,
progress_bar: bool = True,
use_auth_token: Optional[Union[str, bool]] = None,
Expand All @@ -81,9 +79,6 @@ def __init__(
:param min_length: Minimum length of summarized text
:param use_gpu: Whether to use GPU (if available).
:param clean_up_tokenization_spaces: Whether or not to clean up the potential extra spaces in the text output
:param separator_for_single_summary: This parameter is deprecated and will be removed in Haystack 1.12
:param generate_single_summary: This parameter is deprecated and will be removed in Haystack 1.12.
To obtain single summaries from multiple documents, consider using the [DocumentMerger](https://docs.haystack.deepset.ai/reference/other-api#module-document_merger).
:param batch_size: Number of documents to process at a time.
:param progress_bar: Whether to show a progress bar.
:param use_auth_token: The API token used to download private models from Huggingface.
Expand All @@ -98,11 +93,6 @@ def __init__(
"""
super().__init__()

if generate_single_summary is True:
raise ValueError(
"'generate_single_summary' has been removed. Instead, you can use the Document Merger to merge documents before applying the Summarizer."
)

self.devices, _ = initialize_device_settings(devices=devices, use_cuda=use_gpu, multi_gpu=False)
if len(self.devices) > 1:
logger.warning(
Expand All @@ -128,21 +118,14 @@ def __init__(
self.batch_size = batch_size
self.progress_bar = progress_bar

def predict(self, documents: List[Document], generate_single_summary: Optional[bool] = None) -> List[Document]:
def predict(self, documents: List[Document]) -> List[Document]:
"""
Produce the summarization from the supplied documents.
These document can for example be retrieved via the Retriever.

:param documents: Related documents (e.g. coming from a retriever) that the answer shall be conditioned on.
:param generate_single_summary: This parameter is deprecated and will be removed in Haystack 1.12.
To obtain single summaries from multiple documents, consider using the [DocumentMerger](https://docs.haystack.deepset.ai/docs/document_merger).
:return: List of Documents, where Document.meta["summary"] contains the summarization
"""
if generate_single_summary is True:
raise ValueError(
"'generate_single_summary' has been removed. Instead, you can use the Document Merger to merge documents before applying the Summarizer."
)

if self.min_length > self.max_length:
raise AttributeError("min_length cannot be greater than max_length")

Expand Down Expand Up @@ -183,26 +166,16 @@ def predict(self, documents: List[Document], generate_single_summary: Optional[b
return result

def predict_batch(
self,
documents: Union[List[Document], List[List[Document]]],
generate_single_summary: Optional[bool] = None,
batch_size: Optional[int] = None,
self, documents: Union[List[Document], List[List[Document]]], batch_size: Optional[int] = None
) -> Union[List[Document], List[List[Document]]]:
"""
Produce the summarization from the supplied documents.
These documents can for example be retrieved via the Retriever.

:param documents: Single list of related documents or list of lists of related documents
(e.g. coming from a retriever) that the answer shall be conditioned on.
:param generate_single_summary: This parameter is deprecated and will be removed in Haystack 1.12.
To obtain single summaries from multiple documents, consider using the [DocumentMerger](https://docs.haystack.deepset.ai/docs/document_merger).
:param batch_size: Number of Documents to process at a time.
"""
if generate_single_summary is True:
raise ValueError(
"'generate_single_summary' has been removed. Instead, you can use the Document Merger to merge documents before applying the Summarizer."
)

if self.min_length > self.max_length:
raise AttributeError("min_length cannot be greater than max_length")

Expand Down
7 changes: 2 additions & 5 deletions test/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -263,14 +263,11 @@ def predict(self, query: str, documents: List[Document], top_k: Optional[int]) -

class MockSummarizer(BaseSummarizer):
def predict_batch(
self,
documents: Union[List[Document], List[List[Document]]],
generate_single_summary: Optional[bool] = None,
batch_size: Optional[int] = None,
self, documents: Union[List[Document], List[List[Document]]], batch_size: Optional[int] = None
) -> Union[List[Document], List[List[Document]]]:
pass

def predict(self, documents: List[Document], generate_single_summary: Optional[bool] = None) -> List[Document]:
def predict(self, documents: List[Document]) -> List[Document]:
pass


Expand Down
14 changes: 0 additions & 14 deletions test/nodes/test_summarizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,20 +86,6 @@ def test_summarization_pipeline(document_store, retriever, summarizer):
assert " The Eiffel Tower in Paris has officially opened its doors to the public." == answers[0]["answer"]


haystack_version = tuple(int(num) for num in haystack.__version__.split(".")[:2])
fail_in_v1_12 = pytest.mark.xfail(
haystack_version >= (1, 12),
reason="'generate_single_summary' should be removed in v1.12, as it was deprecated in v1.10",
)


@fail_in_v1_12
def test_generate_single_summary_deprecated():
summarizer = TransformersSummarizer(model_name_or_path="hf-internal-testing/tiny-random-bart", use_gpu=False)
with pytest.raises(ValueError):
summarizer.predict([Document(content="irrelevant")], generate_single_summary=True)


#
# Document Merger + Summarizer tests
#
Expand Down