Skip to content

Commit

Permalink
solver: invert heuristics for choosing the next dependency to resolve…
Browse files Browse the repository at this point in the history
… so that dependencies with more versions are resolved first

Although the original algorithm proposes to choose dependencies with fewer versions first, we don't know about any real-world examples where this is important. However, we know about real-world examples (especially boto3 vs. urllib3, but also Sphinx vs. docutils) where it's better to choose dependencies with more versions first.

Co-authored-by: David Hotham <david.hotham@blueyonder.co.uk>
  • Loading branch information
radoering and dimbleby committed Aug 17, 2023
1 parent 5fe3a91 commit abd6788
Show file tree
Hide file tree
Showing 3 changed files with 74 additions and 43 deletions.
15 changes: 10 additions & 5 deletions src/poetry/mixology/version_solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,8 +440,13 @@ class Preference:
LOCKED = 3
DEFAULT = 4

# Prefer packages with as few remaining versions as possible,
# so that if a conflict is necessary it's forced quickly.
# The original algorithm proposes to prefer packages with as few remaining
# versions as possible, so that if a conflict is necessary it's forced quickly.
# /~https://github.com/dart-lang/pub/blob/master/doc/solver.md#decision-making
# However, this leads to the famous boto3 vs. urllib3 issue, so we prefer
# packages with more remaining versions (see
# /~https://github.com/python-poetry/poetry/pull/8255#issuecomment-1657198242
# for more details).
# In order to provide results that are as deterministic as possible
# and consistent between `poetry lock` and `poetry update`, the return value
# of two different dependencies should not be equal if possible.
Expand All @@ -450,15 +455,15 @@ def _get_min(dependency: Dependency) -> tuple[bool, int, int]:
# a regular dependency for some package only to find later that we had a
# direct-origin dependency.
if dependency.is_direct_origin():
return False, Preference.DIRECT_ORIGIN, 1
return False, Preference.DIRECT_ORIGIN, -1

is_specific_marker = not dependency.marker.is_any()

use_latest = dependency.name in self._provider.use_latest
if not use_latest:
locked = self._provider.get_locked(dependency)
if locked:
return is_specific_marker, Preference.LOCKED, 1
return is_specific_marker, Preference.LOCKED, -1

num_packages = len(
self._dependency_cache.search_for(
Expand All @@ -472,7 +477,7 @@ def _get_min(dependency: Dependency) -> tuple[bool, int, int]:
preference = Preference.USE_LATEST
else:
preference = Preference.DEFAULT
return is_specific_marker, preference, num_packages
return is_specific_marker, preference, -num_packages

dependency = min(unsatisfied, key=_get_min)

Expand Down
82 changes: 55 additions & 27 deletions tests/mixology/version_solver/test_backtracking.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def test_backjumps_after_partial_satisfier(
add_to_repo(repo, "y", "1.0.0")
add_to_repo(repo, "y", "2.0.0")

check_solver_result(root, provider, {"c": "1.0.0", "y": "2.0.0"}, tries=2)
check_solver_result(root, provider, {"c": "1.0.0", "y": "2.0.0"}, tries=4)


def test_rolls_back_leaf_versions_first(
Expand Down Expand Up @@ -132,32 +132,6 @@ def test_backjump_to_nearer_unsatisfied_package(
)


def test_traverse_into_package_with_fewer_versions_first(
root: ProjectPackage, provider: Provider, repo: Repository
) -> None:
# Dependencies are ordered so that packages with fewer versions are tried
# first. Here, there are two valid solutions (either a or b must be
# downgraded once). The chosen one depends on which dep is traversed first.
# Since b has fewer versions, it will be traversed first, which means a will
# come later. Since later selections are revised first, a gets downgraded.
root.add_dependency(Factory.create_dependency("a", "*"))
root.add_dependency(Factory.create_dependency("b", "*"))

add_to_repo(repo, "a", "1.0.0", deps={"c": "*"})
add_to_repo(repo, "a", "2.0.0", deps={"c": "*"})
add_to_repo(repo, "a", "3.0.0", deps={"c": "*"})
add_to_repo(repo, "a", "4.0.0", deps={"c": "*"})
add_to_repo(repo, "a", "5.0.0", deps={"c": "1.0.0"})
add_to_repo(repo, "b", "1.0.0", deps={"c": "*"})
add_to_repo(repo, "b", "2.0.0", deps={"c": "*"})
add_to_repo(repo, "b", "3.0.0", deps={"c": "*"})
add_to_repo(repo, "b", "4.0.0", deps={"c": "2.0.0"})
add_to_repo(repo, "c", "1.0.0")
add_to_repo(repo, "c", "2.0.0")

check_solver_result(root, provider, {"a": "4.0.0", "b": "4.0.0", "c": "2.0.0"})


def test_backjump_past_failed_package_on_disjoint_constraint(
root: ProjectPackage, provider: Provider, repo: Repository
) -> None:
Expand All @@ -176,3 +150,57 @@ def test_backjump_past_failed_package_on_disjoint_constraint(
add_to_repo(repo, "foo", "2.0.4")

check_solver_result(root, provider, {"a": "1.0.0", "foo": "2.0.4"})


def test_backtracking_performance_level_1(
root: ProjectPackage, provider: Provider, repo: Repository
) -> None:
"""
This test takes quite long if an unfavorable heuristics is chosen
to select the next package to resolve.
B depends on A, but does not support the latest version of A.
B has a lot more versions than A.
Test for boto3/botocore vs. urllib3 issue in its simple form.
"""
root.add_dependency(Factory.create_dependency("a", "*"))
root.add_dependency(Factory.create_dependency("b", "*"))

add_to_repo(repo, "a", "1")
add_to_repo(repo, "a", "2")

b_max = 500
for i in range(1, b_max + 1):
add_to_repo(repo, "b", str(i), deps={"a": "<=1"})

check_solver_result(root, provider, {"a": "1", "b": str(b_max)})


def test_backtracking_performance_level_2(
root: ProjectPackage, provider: Provider, repo: Repository
) -> None:
"""
Similar to test_backtracking_performance_level_1,
but with one more level of dependencies.
C depends on B depends on A, but B does not support the latest version of A.
The root dependency only requires A and C so there is no direct dependency between
these two.
B and C have a lot more versions than A.
Test for boto3/botocore vs. urllib3 issue in its more complex form.
"""
root.add_dependency(Factory.create_dependency("a", "*"))
root.add_dependency(Factory.create_dependency("c", "*"))

add_to_repo(repo, "a", "1")
add_to_repo(repo, "a", "2")

bc_max = 500
for i in range(1, bc_max + 1):
add_to_repo(repo, "b", str(i), deps={"a": "<=1"})
for i in range(1, bc_max + 1):
add_to_repo(repo, "c", str(i), deps={"b": f"<={i}"})

check_solver_result(root, provider, {"a": "1", "b": str(bc_max), "c": str(bc_max)})
20 changes: 9 additions & 11 deletions tests/puzzle/test_solver.py
Original file line number Diff line number Diff line change
Expand Up @@ -4265,7 +4265,7 @@ def test_update_with_use_latest_vs_lock(
A1 depends on B2, A2 and A3 depend on B1. Same for C.
B1 depends on A2/C2, B2 depends on A1/C1.
Because there are fewer versions B than of A and C, B is resolved first
Because there are more versions of B than of A and C, B is resolved first
so that latest version of B is used.
There shouldn't be a difference between `poetry lock` (not is_locked)
and `poetry update` (is_locked + use_latest)
Expand All @@ -4277,37 +4277,35 @@ def test_update_with_use_latest_vs_lock(
package.add_dependency(Factory.create_dependency("C", "*"))

package_a1 = get_package("A", "1")
package_a1.add_dependency(Factory.create_dependency("B", "2"))
package_a1.add_dependency(Factory.create_dependency("B", "3"))
package_a2 = get_package("A", "2")
package_a2.add_dependency(Factory.create_dependency("B", "1"))
package_a3 = get_package("A", "3")
package_a3.add_dependency(Factory.create_dependency("B", "1"))

package_c1 = get_package("C", "1")
package_c1.add_dependency(Factory.create_dependency("B", "2"))
package_c1.add_dependency(Factory.create_dependency("B", "3"))
package_c2 = get_package("C", "2")
package_c2.add_dependency(Factory.create_dependency("B", "1"))
package_c3 = get_package("C", "3")
package_c3.add_dependency(Factory.create_dependency("B", "1"))

package_b1 = get_package("B", "1")
package_b1.add_dependency(Factory.create_dependency("A", "2"))
package_b1.add_dependency(Factory.create_dependency("C", "2"))
package_b2 = get_package("B", "2")
package_b2.add_dependency(Factory.create_dependency("A", "1"))
package_b2.add_dependency(Factory.create_dependency("C", "1"))
package_b3 = get_package("B", "3")
package_b3.add_dependency(Factory.create_dependency("A", "1"))
package_b3.add_dependency(Factory.create_dependency("C", "1"))

repo.add_package(package_a1)
repo.add_package(package_a2)
repo.add_package(package_a3)
repo.add_package(package_b1)
repo.add_package(package_b2)
repo.add_package(package_b3)
repo.add_package(package_c1)
repo.add_package(package_c2)
repo.add_package(package_c3)

if is_locked:
locked = [package_a1, package_b2, package_c1]
locked = [package_a1, package_b3, package_c1]
use_latest = [package.name for package in locked]
else:
locked = []
Expand All @@ -4320,7 +4318,7 @@ def test_update_with_use_latest_vs_lock(
transaction,
[
{"job": "install", "package": package_c1},
{"job": "install", "package": package_b2},
{"job": "install", "package": package_b3},
{"job": "install", "package": package_a1},
],
)

0 comments on commit abd6788

Please sign in to comment.