Skip to content

Commit

Permalink
Added diversity parameter to update_topics() function (#887)
Browse files Browse the repository at this point in the history
  • Loading branch information
anubhabdaserrr authored Dec 19, 2022
1 parent e7b7658 commit 70aac96
Showing 1 changed file with 5 additions and 0 deletions.
5 changes: 5 additions & 0 deletions bertopic/_bertopic.py
Original file line number Diff line number Diff line change
Expand Up @@ -990,6 +990,7 @@ def update_topics(self,
docs: List[str],
topics: List[int] = None,
n_gram_range: Tuple[int, int] = None,
diversity: float = None,
vectorizer_model: CountVectorizer = None,
ctfidf_model: ClassTfidfTransformer = None):
""" Updates the topic representation by recalculating c-TF-IDF with the new
Expand All @@ -1008,6 +1009,9 @@ def update_topics(self,
topic reduction techniques are used afterwards. Make sure that
manually assigning topics is the last step in the pipeline
n_gram_range: The n-gram range for the CountVectorizer.
diversity: Whether to use MMR to diversify the resulting topic representations.
If set to None, MMR will not be used. Accepted values lie between
0 and 1 with 0 being not at all diverse and 1 being very diverse.
vectorizer_model: Pass in your own CountVectorizer from scikit-learn
ctfidf_model: Pass in your own c-TF-IDF model to update the representations
Expand Down Expand Up @@ -1039,6 +1043,7 @@ def update_topics(self,
if not n_gram_range:
n_gram_range = self.n_gram_range

self.diversity = diversity
self.vectorizer_model = vectorizer_model or CountVectorizer(ngram_range=n_gram_range)
self.ctfidf_model = ctfidf_model or ClassTfidfTransformer()

Expand Down

0 comments on commit 70aac96

Please sign in to comment.