Skip to content

Commit

Permalink
Backport PR #45219: MISC: Check that min versions are aligned in CI a…
Browse files Browse the repository at this point in the history
…nd import_optional_dependency (#45537)

Co-authored-by: Matthew Roeschke <emailformattr@gmail.com>
  • Loading branch information
meeseeksmachine and mroeschke authored Jan 21, 2022
1 parent feb5346 commit f939b2c
Show file tree
Hide file tree
Showing 5 changed files with 101 additions and 5 deletions.
5 changes: 5 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -181,3 +181,8 @@ repos:
entry: 'pg8000'
files: ^ci/deps
types: [yaml]
- id: validate-min-versions-in-sync
name: Check minimum version of dependencies are aligned
entry: python scripts/validate_min_versions_in_sync.py
language: python
files: ^(ci/deps/actions-.*-minimum_versions\.yaml|pandas/compat/_optional\.py)$
4 changes: 2 additions & 2 deletions ci/deps/actions-38-minimum_versions.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,14 +32,14 @@ dependencies:
- matplotlib=3.3.2
- numba=0.50.1
- numexpr=2.7.1
- openpyxl=3.0.3
- odfpy=1.4.1
- openpyxl=3.0.3
- pandas-gbq=0.14.0
- psycopg2=2.8.4
- pymysql=0.10.1
- pytables=3.6.1
- pyarrow=1.0.1
- pyreadstat
- pyreadstat=1.1.0
- pyxlsb=1.0.6
- s3fs=0.4.0
- scipy=1.4.1
Expand Down
2 changes: 1 addition & 1 deletion doc/source/getting_started/install.rst
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ blosc 1.20.1 Compression for HDF5
zlib Compression for HDF5
fastparquet 0.4.0 Parquet reading / writing
pyarrow 1.0.1 Parquet, ORC, and feather reading / writing
pyreadstat SPSS files (.sav) reading
pyreadstat 1.1.0 SPSS files (.sav) reading
========================= ================== =============================================================

.. _install.warn_orc:
Expand Down
14 changes: 12 additions & 2 deletions pandas/compat/_optional.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,24 @@

VERSIONS = {
"bs4": "4.8.2",
"blosc": "1.20.1",
"bottleneck": "1.3.1",
"fsspec": "0.7.4",
"fastparquet": "0.4.0",
"fsspec": "0.7.4",
"html5lib": "1.1",
"gcsfs": "0.6.0",
"jinja2": "2.11",
"lxml.etree": "4.5.0",
"matplotlib": "3.3.2",
"numba": "0.50.1",
"numexpr": "2.7.1",
"odfpy": "1.4.1",
"openpyxl": "3.0.3",
"pandas_gbq": "0.14.0",
"psycopg2": "2.8.4", # (dt dec pq3 ext lo64)
"pymysql": "0.10.1",
"pyarrow": "1.0.1",
"pyreadstat": "1.1.0",
"pytest": "6.0",
"pyxlsb": "1.0.6",
"s3fs": "0.4.0",
Expand All @@ -33,7 +40,6 @@
"xlrd": "2.0.1",
"xlwt": "1.3.0",
"xlsxwriter": "1.2.2",
"numba": "0.50.1",
"zstandard": "0.15.2",
}

Expand All @@ -46,6 +52,7 @@
"lxml.etree": "lxml",
"odf": "odfpy",
"pandas_gbq": "pandas-gbq",
"tables": "pytables",
"sqlalchemy": "SQLAlchemy",
"jinja2": "Jinja2",
}
Expand All @@ -59,6 +66,9 @@ def get_version(module: types.ModuleType) -> str:

if version is None:
raise ImportError(f"Can't determine version for {module.__name__}")
if module.__name__ == "psycopg2":
# psycopg2 appends " (dt dec pq3 ext lo64)" to it's version
version = version.split()[0]
return version


Expand Down
81 changes: 81 additions & 0 deletions scripts/validate_min_versions_in_sync.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#!/usr/bin/env python3
"""
Check pandas required and optional dependencies are synced across:
ci/deps/actions-.*-minimum_versions.yaml
pandas/compat/_optional.py
TODO: doc/source/getting_started/install.rst
This is meant to be run as a pre-commit hook - to run it manually, you can do:
pre-commit run validate-min-versions-in-sync --all-files
"""
from __future__ import annotations

import pathlib
import sys

DOC_PATH = pathlib.Path("doc/source/getting_started/install.rst").resolve()
CI_PATH = next(
pathlib.Path("ci/deps").absolute().glob("actions-*-minimum_versions.yaml")
)
CODE_PATH = pathlib.Path("pandas/compat/_optional.py").resolve()
# pandas package is not available
# in pre-commit environment
sys.path.append("pandas/compat")
sys.path.append("pandas/util")
import version

sys.modules["pandas.util.version"] = version
import _optional


def get_versions_from_code() -> dict[str, str]:
install_map = _optional.INSTALL_MAPPING
versions = _optional.VERSIONS
return {
install_map.get(k, k).casefold(): v
for k, v in versions.items()
if k != "pytest"
}


def get_versions_from_ci(content: list[str]) -> tuple[dict[str, str], dict[str, str]]:
# Don't parse with pyyaml because it ignores comments we're looking for
seen_required = False
seen_optional = False
required_deps = {}
optional_deps = {}
for line in content:
if "# required dependencies" in line:
seen_required = True
elif "# optional dependencies" in line:
seen_optional = True
elif seen_required and line.strip():
package, version = line.strip().split("=")
package = package[2:]
if not seen_optional:
required_deps[package] = version
else:
optional_deps[package] = version
return required_deps, optional_deps


def main():
with open(CI_PATH, encoding="utf-8") as f:
_, ci_optional = get_versions_from_ci(f.readlines())
code_optional = get_versions_from_code()
diff = set(ci_optional.items()).symmetric_difference(code_optional.items())
if diff:
sys.stdout.write(
f"The follow minimum version differences were found between "
f"{CI_PATH} and {CODE_PATH}. Please ensure these are aligned: "
f"{diff}\n"
)
sys.exit(1)
sys.exit(0)


if __name__ == "__main__":
main()

0 comments on commit f939b2c

Please sign in to comment.