From 73c814c4edd5c9d924152e9db40117f94eeeb452 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mario=20S=C3=A4nger?= <saengema@informatik.hu-berlin.de>
Date: Tue, 14 Mar 2023 10:54:33 +0100
Subject: [PATCH] Update version specifications of spacy and scispacy + revise
 documentation

---
 flair/splitter.py                             |  2 +-
 flair/tokenization.py                         | 10 +++++-----
 resources/docs/HUNFLAIR.md                    | 18 +++++++++++-------
 resources/docs/HUNFLAIR_TUTORIAL_1_TAGGING.md |  4 ++--
 4 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/flair/splitter.py b/flair/splitter.py
index 981ff65c5b..f75ece3112 100644
--- a/flair/splitter.py
+++ b/flair/splitter.py
@@ -107,7 +107,7 @@ def __init__(self, model: Union[Any, str], tokenizer: Tokenizer = None):
             from spacy.language import Language
         except ImportError:
             raise ImportError(
-                "Please install spacy v2.3.2 or higher before using the SpacySentenceSplitter, "
+                "Please install spacy v3.4.4 or higher before using the SpacySentenceSplitter, "
                 "otherwise you can use SegtokSentenceSplitter as alternative implementation."
             )
 
diff --git a/flair/tokenization.py b/flair/tokenization.py
index 543286d8dd..df0c3b3dd6 100644
--- a/flair/tokenization.py
+++ b/flair/tokenization.py
@@ -42,7 +42,7 @@ def __init__(self, model):
             from spacy.language import Language
         except ImportError:
             raise ImportError(
-                "Please install Spacy v2.0 or better before using the Spacy tokenizer, "
+                "Please install Spacy v3.4.4 or better before using the Spacy tokenizer, "
                 "otherwise you can use SegtokTokenizer as advanced tokenizer."
             )
 
@@ -219,12 +219,12 @@ def __init__(self):
             from spacy.lang import char_classes
         except ImportError:
             raise ImportError(
-                "  Please install scispacy version 0.2.5 (recommended) or higher before using the SciSpacy tokenizer, "
+                "  Please install scispacy version 0.5.1 (recommended) or higher before using the SciSpacy tokenizer, "
                 "otherwise you can use SegtokTokenizer as alternative implementation.\n"
-                "  You can install scispacy (version 0.2.5) by running:\n\n"
-                "     pip install scispacy==0.2.5\n\n"
+                "  You can install scispacy (version 0.5.1) by running:\n\n"
+                "     pip install scispacy==0.5.1\n\n"
                 "  By default HunFlair uses the `en_core_sci_sm` model. You can install the model by running:\n\n"
-                "     pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_core_sci_sm-0.2.5.tar.gz\n\n"
+                "     pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.1/en_core_sci_sm-0.5.1.tar.gz\n\n"
                 "  Note that the scispacy version and the version of the model must match to work properly!"
             )
 
diff --git a/resources/docs/HUNFLAIR.md b/resources/docs/HUNFLAIR.md
index 77122400be..a85b88c8bc 100644
--- a/resources/docs/HUNFLAIR.md
+++ b/resources/docs/HUNFLAIR.md
@@ -58,8 +58,8 @@ Span[6:7]: "Mouse" → Species (0.9979)
 
 Scientific texts are difficult to tokenize. For this reason, we recommend to install [SciSpaCy](https://allenai.github.io/scispacy/) for improved pre-processing and tokenization of scientific / biomedical texts:
  ```
-pip install scispacy==0.2.5
-pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_core_sci_sm-0.2.5.tar.gz
+pip install scispacy==0.5.1
+pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.1/en_core_sci_sm-0.5.1.tar.gz
 ```
 
 Use this code to apply scientific tokenization: 
@@ -121,10 +121,14 @@ We provide a set of quick tutorials to get you started with *HunFlair*:
 ## Citing HunFlair
 Please cite the following paper when using *HunFlair*:
 ~~~
-@article{weber2020hunflair,
-    title={HunFlair: An Easy-to-Use Tool for State-of-the-Art Biomedical Named Entity Recognition},
-    author={Weber, Leon and S{\"a}nger, Mario and M{\"u}nchmeyer, Jannes  and Habibi, Maryam and Leser, Ulf and Akbik, Alan},
-    journal={arXiv preprint arXiv:2008.07347},
-    year={2020}
+@article{weber2021hunflair,
+  title={HunFlair: an easy-to-use tool for state-of-the-art biomedical named entity recognition},
+  author={Weber, Leon and S{\"a}nger, Mario and M{\"u}nchmeyer, Jannes and Habibi, Maryam and Leser, Ulf and Akbik, Alan},
+  journal={Bioinformatics},
+  volume={37},
+  number={17},
+  pages={2792--2794},
+  year={2021},
+  publisher={Oxford University Press}
 }
 ~~~
diff --git a/resources/docs/HUNFLAIR_TUTORIAL_1_TAGGING.md b/resources/docs/HUNFLAIR_TUTORIAL_1_TAGGING.md
index e46ac18d21..7bad25081b 100644
--- a/resources/docs/HUNFLAIR_TUTORIAL_1_TAGGING.md
+++ b/resources/docs/HUNFLAIR_TUTORIAL_1_TAGGING.md
@@ -90,8 +90,8 @@ This can be unfavourable if applied to biomedical texts.
 *HunFlair* integrates [SciSpaCy](https://allenai.github.io/scispacy/), a library specially designed to work with scientific text.
 To use the library we first have to install it and download one of it's models:
 ~~~
-pip install scispacy==0.2.5
-pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.2.5/en_core_sci_sm-0.2.5.tar.gz
+pip install scispacy==0.5.1
+pip install https://s3-us-west-2.amazonaws.com/ai2-s2-scispacy/releases/v0.5.1/en_core_sci_sm-0.5.1.tar.gz
 ~~~
 
 To use the tokenizer we just have to pass it as parameter to when instancing a sentence: