diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fb8426d5d9..020426175e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -11,48 +11,18 @@ jobs: TRANSFORMERS_CACHE: ./cache/transformers FLAIR_CACHE_ROOT: ./cache/flair steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python 3.7 id: setup-python - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: 3.7 - #---------------------------------------------- - # Try to load cached poetry installation - #---------------------------------------------- - - name: Load cached Poetry installation - id: cached-poetry - uses: actions/cache@v3 - with: - path: | - ~/.local - ~/.cache/pypoetry - key: poetry-0 # increment to reset cache - #---------------------------------------------- - # Install poetry if not using cached - #---------------------------------------------- - - name: Install Poetry - if: steps.cached-poetry.outputs.cache-hit != 'true' - uses: snok/install-poetry@v1 - with: - virtualenvs-in-project: true - #---------------------------------------------- - # Lock dependencies to get fresh versions - #---------------------------------------------- - - name: Run poetry lock - run: | - poetry config virtualenvs.in-project true - poetry lock - #---------------------------------------------- - # install dependencies if cache does not exist - #---------------------------------------------- - - name: Install dependencies - run: | - poetry config virtualenvs.in-project true - poetry install --no-interaction --no-root - #---------------------------------------------- - # run test suite - #---------------------------------------------- + - name: Install Flair dependencies + run: pip install -e . + - name: Install unittest dependencies + run: pip install -r requirements-dev.txt + - name: Show installed dependencies + run: pip freeze - name: Cache downloaded models/datasets uses: actions/cache@v3 with: @@ -60,6 +30,5 @@ jobs: key: cache-v1.1 - name: Run tests run: | - source .venv/bin/activate python -c 'import flair' pytest --runintegration --durations=0 -vv diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index ce1452607f..d89055a214 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -3,14 +3,11 @@ We are happy to accept your contributions to make `flair` better and more awesome! To avoid unnecessary work on either side, please stick to the following process: -1. Check if there is already [an issue](/~https://github.com/zalandoresearch/flair/issues) for your concern. +1. Check if there is already [an issue](/~https://github.com/flairNLP/flair/issues) for your concern. 2. If there is not, open a new one to start a discussion. We hate to close finished PRs! 3. If we decide your concern needs code changes, we would be happy to accept a pull request. Please consider the commit guidelines below. -In case you just want to help out and don't know where to start, -[issues with "help wanted" label](/~https://github.com/zalandoresearch/flair/labels/help%20wanted) are good for -first-time contributors. ## Git Commit Guidelines @@ -30,18 +27,10 @@ the code should hopefully be easy. Flair requires python-3.7 or higher. To make sure your code also runs on the oldest supported python version, it is recommended to use python-3.7.x for flair development. -We use [poetry](https://python-poetry.org) for dependency and virtual environment management. -Install poetry following the official guide at https://python-poetry.org/docs/#installation - using the official installer -or via [pipx](https://pypa.github.io/pipx/). - -After the pre-conditions are met, switch into the checked out flair folder. - +Create a python environment of your preference and run: ```bash -# Install dependencies (including dev dependencies) -poetry install - -# You can turn on the installed environment -poetry shell +pip install -r requirements-dev.txt +pip install -e . ``` ### Tests diff --git a/flair/__init__.py b/flair/__init__.py index 1f8230b7e3..2eb42075fa 100644 --- a/flair/__init__.py +++ b/flair/__init__.py @@ -10,6 +10,10 @@ cache_root = Path(os.getenv("FLAIR_CACHE_ROOT", Path(Path.home(), ".flair"))) +device: torch.device +"""Flair is using a single device for everything. You can set this device by overwriting this variable.""" + + # global variable: device if torch.cuda.is_available(): device_id = os.environ.get("FLAIR_DEVICE") diff --git a/flair/embeddings/token.py b/flair/embeddings/token.py index 8fef855839..771c3a6397 100644 --- a/flair/embeddings/token.py +++ b/flair/embeddings/token.py @@ -77,8 +77,8 @@ def __init__(self, embeddings: List[TokenEmbeddings], overwrite_names: bool = Tr # IMPORTANT: add embeddings as torch modules for i, embedding in enumerate(embeddings): if overwrite_names: - embedding.name = f"{str(i)}-{embedding.name}" - self.add_module(f"list_embedding_{str(i)}", embedding) + embedding.name = f"{i!s}-{embedding.name}" + self.add_module(f"list_embedding_{i!s}", embedding) self.name: str = "Stack" self.__names = [name for embedding in self.embeddings for name in embedding.get_names()] diff --git a/flair/inference_utils.py b/flair/inference_utils.py index 6a4a0bbe4f..e96e16da17 100644 --- a/flair/inference_utils.py +++ b/flair/inference_utils.py @@ -75,7 +75,7 @@ def __init__(self, embedding: WordEmbeddings, backend="sqlite", verbose=True) -> self.name = embedding.name self.store_path: Path = WordEmbeddingsStore._get_store_path(embedding, backend) if verbose: - logger.info(f"store filename: {str(self.store_path)}") + logger.info(f"store filename: {self.store_path!s}") self.backend: Union[WordEmbeddings, WordEmbeddingsStoreBackend] if backend == "sqlite": self.backend = SqliteWordEmbeddingsStoreBackend(embedding, verbose) @@ -143,7 +143,7 @@ def delete_stores(model, backend="sqlite"): """Deletes the db versions of all word embeddings.""" for embedding in WordEmbeddingsStore._word_embeddings(model): store_path: Path = WordEmbeddingsStore._get_store_path(embedding) - logger.info(f"delete store: {str(store_path)}") + logger.info(f"delete store: {store_path!s}") if store_path.is_file(): store_path.unlink() elif store_path.is_dir(): @@ -177,7 +177,7 @@ def __init__(self, embedding, verbose) -> None: self.k = len(result[0]) - 1 return except sqlite3.Error as err: - logger.exception(f"Fail to open sqlite database {str(self.store_path)}: {str(err)}") + logger.exception(f"Fail to open sqlite database {self.store_path!s}: {str(err)}") # otherwise, push embedding to database if hasattr(embedding, "precomputed_word_embeddings"): self.db = sqlite3.connect(str(self.store_path)) @@ -239,7 +239,7 @@ def __init__(self, embedding, verbose) -> None: cursor.close() return except lmdb.Error as err: - logger.exception(f"Fail to open lmdb database {str(self.store_path)}: {str(err)}") + logger.exception(f"Fail to open lmdb database {self.store_path!s}: {str(err)}") # create and load the database in write mode if hasattr(embedding, "precomputed_word_embeddings"): pwe = embedding.precomputed_word_embeddings diff --git a/flair/splitter.py b/flair/splitter.py index 3c082f79fb..cf57b36bdf 100644 --- a/flair/splitter.py +++ b/flair/splitter.py @@ -64,8 +64,8 @@ def split(self, text: str) -> List[Sentence]: sentence_offset = text.index(sentence, sentence_offset) except ValueError as error: raise AssertionError( - f"Can't find the sentence offset for sentence {repr(sentence)} " - f"starting from position {repr(sentence_offset)}" + f"Can't find the sentence offset for sentence {sentence} " + f"starting from position {sentence_offset}" ) from error sentences.append( Sentence( diff --git a/flair/training_utils.py b/flair/training_utils.py index 46cd0ace9f..51894386dc 100644 --- a/flair/training_utils.py +++ b/flair/training_utils.py @@ -39,7 +39,7 @@ def loss(self): return self.scores["loss"] def __str__(self) -> str: - return f"{str(self.detailed_results)}\nLoss: {self.loss}'" + return f"{self.detailed_results!s}\nLoss: {self.loss}'" class MetricRegression: diff --git a/poetry.toml b/poetry.toml deleted file mode 100644 index d11b90b042..0000000000 --- a/poetry.toml +++ /dev/null @@ -1,2 +0,0 @@ -[virtualenvs] -prefer-active-python = true diff --git a/pyproject.toml b/pyproject.toml index 1a13831148..635eee77f1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,68 +1,3 @@ -[tool.poetry] -name = "flair" -version = "0.12.2" -description = "A very simple framework for state-of-the-art NLP" -readme = "README.md" -authors = ["Alan Akbik "] -maintainers = ["Alan Akbik "] -repository = "/~https://github.com/flairNLP/flair" -license = "MIT" - -[tool.poetry.dependencies] -python = ">=3.7,<4.0" - -boto3 = ">=1.20.27" -bpemb = ">=0.3.2" -conllu = ">=4.0" -deprecated = ">=1.2.13" -ftfy = ">=6.1.0" -gdown = ">=4.4.0" -gensim = ">=4.2.0" -huggingface-hub = ">=0.10.0" -janome = ">=0.4.2" -langdetect = ">=1.0.9" -lxml = ">=4.8.0" -matplotlib = ">=2.2.3" -more-itertools = ">=8.13.0" -mpld3 = "==0.3" -numpy = [ - {version = "~1.21.6", python = "<3.8.0"}, - {version = ">=1.22.3", python = ">=3.8.0"}, -] -pptree = ">=3.1" -python-dateutil = ">=2.8.2" -pytorch_revgrad = ">=0.2.0" -regex = ">=2022.1.18" -scikit-learn = ">=1.0.2" -segtok = ">=1.5.11" -sqlitedict = ">=2.0.0" -tabulate = ">=0.8.10" -torch = ">=1.5.0,!=1.8" -tqdm = ">=4.63.0" -transformer-smaller-training-vocab = ">=0.2.3" -transformers = { extras = ["sentencepiece"], version =">=4.18.0" } -wikipedia-api = ">=0.5.7" - - -[tool.poetry.group.dev.dependencies] -black = { extras = ["jupyter"], version = ">=23.3.0" } -konoha = ">=4.6.5,<5.0.0" -mypy = ">=1.2.0" -pytest = ">=7.3.1" -pytest-black-ng = ">=0.4.1" -pytest-github-actions-annotate-failures = ">=0.1.8" -pytest-mypy = ">=0.10.3" -pytest-ruff = ">=0.0.5" -types-dataclasses = ">=0.6.6" -types-Deprecated = ">=1.2.9.2" -types-requests = ">=2.28.11.17" -types-tabulate = ">=0.9.0.2" - -[build-system] -requires = ["poetry-core"] -build-backend = "poetry.core.masonry.api" - - [tool.black] line-length = 120 target-version = ['py37'] @@ -91,6 +26,8 @@ filterwarnings = [ "ignore:the imp module is deprecated:DeprecationWarning:past", # ignore DeprecationWarning from hyperopt dependency "ignore:.*imp module.*:DeprecationWarning", # ignore DeprecationWarnings that involve imp module "ignore:The class LayoutLMv3FeatureExtractor is deprecated and will be removed in version 5 of Transformers. Please use LayoutLMv3ImageProcessor instead.", # huggingface layoutlmv3 has deprecated calls. + "ignore:pkg_resources", # huggingface has deprecated calls. + 'ignore:Deprecated call to `pkg_resources', # huggingface has deprecated calls. ] markers = [ "integration", diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 0000000000..96d82982f4 --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,12 @@ +black[jupyter]>=23.3.0 +konoha>=4.6.5,<5.0.0 +mypy>=1.2.0 +pytest>=7.3.1 +pytest-black-ng>=0.4.1 +pytest-github-actions-annotate-failures>=0.1.8 +pytest-mypy>=0.10.3 +pytest-ruff>=0.0.5 +types-dataclasses>=0.6.6 +types-Deprecated>=1.2.9.2 +types-requests>=2.28.11.17 +types-tabulate>=0.9.0.2 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000000..d3ca17e303 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,28 @@ +boto3>=1.20.27 +bpemb>=0.3.2 +conllu>=4.0 +deprecated>=1.2.13 +ftfy>=6.1.0 +gdown>=4.4.0 +gensim>=4.2.0 +huggingface-hub>=0.10.0 +janome>=0.4.2 +langdetect>=1.0.9 +lxml>=4.8.0 +matplotlib>=2.2.3 +more-itertools>=8.13.0 +mpld3>=0.3 +pptree>=3.1 +python-dateutil>=2.8.2 +pytorch_revgrad>=0.2.0 +regex>=2022.1.18 +scikit-learn>=1.0.2 +segtok>=1.5.11 +sqlitedict>=2.0.0 +tabulate>=0.8.10 +torch>=1.5.0,!=1.8 +tqdm>=4.63.0 +transformer-smaller-training-vocab>=0.2.3 +transformers[sentencepiece]>=4.18.0,<5.0.0 +urllib3<2.0.0,>=1.0.0 # pin below 2 to make dependency resolution faster. +wikipedia-api>=0.5.7 \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000000..fb62c0116a --- /dev/null +++ b/setup.py @@ -0,0 +1,21 @@ +from pathlib import Path + +from setuptools import find_packages, setup + +required = Path("requirements.txt").read_text(encoding="utf-8").split("\n") + +setup( + name="flair", + version="0.12.2", + description="A very simple framework for state-of-the-art NLP", + long_description=Path("README.md").read_text(encoding="utf-8"), + long_description_content_type="text/markdown", + author="Alan Akbik", + author_email="alan.akbik@gmail.com", + url="/~https://github.com/flairNLP/flair", + packages=find_packages(exclude="tests"), # same as name + license="MIT", + install_requires=required, + include_package_data=True, + python_requires=">=3.7", +)