diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index fbd68116a1da7..f72450a18312e 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -188,3 +188,8 @@ repos: entry: 'pg8000' files: ^ci/deps types: [yaml] + - id: validate-min-versions-in-sync + name: Check minimum version of dependencies are aligned + entry: python scripts/validate_min_versions_in_sync.py + language: python + files: ^(ci/deps/actions-.*-minimum_versions\.yaml|pandas/compat/_optional\.py)$ diff --git a/ci/deps/actions-38-minimum_versions.yaml b/ci/deps/actions-38-minimum_versions.yaml index 8505dad542239..467402bb6ef7f 100644 --- a/ci/deps/actions-38-minimum_versions.yaml +++ b/ci/deps/actions-38-minimum_versions.yaml @@ -32,14 +32,14 @@ dependencies: - matplotlib=3.3.2 - numba=0.50.1 - numexpr=2.7.1 - - openpyxl=3.0.3 - odfpy=1.4.1 + - openpyxl=3.0.3 - pandas-gbq=0.14.0 - psycopg2=2.8.4 - pymysql=0.10.1 - pytables=3.6.1 - pyarrow=1.0.1 - - pyreadstat + - pyreadstat=1.1.0 - pyxlsb=1.0.6 - s3fs=0.4.0 - scipy=1.4.1 diff --git a/doc/source/getting_started/install.rst b/doc/source/getting_started/install.rst index 1cc74eeddbddb..df9c258f4aa6d 100644 --- a/doc/source/getting_started/install.rst +++ b/doc/source/getting_started/install.rst @@ -361,7 +361,7 @@ blosc 1.20.1 Compression for HDF5 zlib Compression for HDF5 fastparquet 0.4.0 Parquet reading / writing pyarrow 1.0.1 Parquet, ORC, and feather reading / writing -pyreadstat SPSS files (.sav) reading +pyreadstat 1.1.0 SPSS files (.sav) reading ========================= ================== ============================================================= .. _install.warn_orc: diff --git a/pandas/compat/_optional.py b/pandas/compat/_optional.py index a2be663504abe..a26bc94ab883e 100644 --- a/pandas/compat/_optional.py +++ b/pandas/compat/_optional.py @@ -11,17 +11,24 @@ VERSIONS = { "bs4": "4.8.2", + "blosc": "1.20.1", "bottleneck": "1.3.1", - "fsspec": "0.7.4", "fastparquet": "0.4.0", + "fsspec": "0.7.4", + "html5lib": "1.1", "gcsfs": "0.6.0", + "jinja2": "2.11", "lxml.etree": "4.5.0", "matplotlib": "3.3.2", + "numba": "0.50.1", "numexpr": "2.7.1", "odfpy": "1.4.1", "openpyxl": "3.0.3", "pandas_gbq": "0.14.0", + "psycopg2": "2.8.4", # (dt dec pq3 ext lo64) + "pymysql": "0.10.1", "pyarrow": "1.0.1", + "pyreadstat": "1.1.0", "pytest": "6.0", "pyxlsb": "1.0.6", "s3fs": "0.4.0", @@ -33,7 +40,6 @@ "xlrd": "2.0.1", "xlwt": "1.3.0", "xlsxwriter": "1.2.2", - "numba": "0.50.1", "zstandard": "0.15.2", } @@ -46,6 +52,7 @@ "lxml.etree": "lxml", "odf": "odfpy", "pandas_gbq": "pandas-gbq", + "tables": "pytables", "sqlalchemy": "SQLAlchemy", "jinja2": "Jinja2", } @@ -59,6 +66,9 @@ def get_version(module: types.ModuleType) -> str: if version is None: raise ImportError(f"Can't determine version for {module.__name__}") + if module.__name__ == "psycopg2": + # psycopg2 appends " (dt dec pq3 ext lo64)" to it's version + version = version.split()[0] return version diff --git a/scripts/validate_min_versions_in_sync.py b/scripts/validate_min_versions_in_sync.py new file mode 100755 index 0000000000000..4dbf6a4cdcef8 --- /dev/null +++ b/scripts/validate_min_versions_in_sync.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 +""" +Check pandas required and optional dependencies are synced across: + +ci/deps/actions-.*-minimum_versions.yaml +pandas/compat/_optional.py + +TODO: doc/source/getting_started/install.rst + +This is meant to be run as a pre-commit hook - to run it manually, you can do: + + pre-commit run validate-min-versions-in-sync --all-files +""" +from __future__ import annotations + +import pathlib +import sys + +DOC_PATH = pathlib.Path("doc/source/getting_started/install.rst").resolve() +CI_PATH = next( + pathlib.Path("ci/deps").absolute().glob("actions-*-minimum_versions.yaml") +) +CODE_PATH = pathlib.Path("pandas/compat/_optional.py").resolve() +# pandas package is not available +# in pre-commit environment +sys.path.append("pandas/compat") +sys.path.append("pandas/util") +import version + +sys.modules["pandas.util.version"] = version +import _optional + + +def get_versions_from_code() -> dict[str, str]: + install_map = _optional.INSTALL_MAPPING + versions = _optional.VERSIONS + return { + install_map.get(k, k).casefold(): v + for k, v in versions.items() + if k != "pytest" + } + + +def get_versions_from_ci(content: list[str]) -> tuple[dict[str, str], dict[str, str]]: + # Don't parse with pyyaml because it ignores comments we're looking for + seen_required = False + seen_optional = False + required_deps = {} + optional_deps = {} + for line in content: + if "# required dependencies" in line: + seen_required = True + elif "# optional dependencies" in line: + seen_optional = True + elif seen_required and line.strip(): + package, version = line.strip().split("=") + package = package[2:] + if not seen_optional: + required_deps[package] = version + else: + optional_deps[package] = version + return required_deps, optional_deps + + +def main(): + with open(CI_PATH, encoding="utf-8") as f: + _, ci_optional = get_versions_from_ci(f.readlines()) + code_optional = get_versions_from_code() + diff = set(ci_optional.items()).symmetric_difference(code_optional.items()) + if diff: + sys.stdout.write( + f"The follow minimum version differences were found between " + f"{CI_PATH} and {CODE_PATH}. Please ensure these are aligned: " + f"{diff}\n" + ) + sys.exit(1) + sys.exit(0) + + +if __name__ == "__main__": + main()