-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgolf_search.py
189 lines (166 loc) · 6.72 KB
/
golf_search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
import logging
import math
import typing
from _weakref import ref
from datetime import datetime
from itertools import count
from golf import GolfState
from heuristic import Heuristic
logger = logging.getLogger(__name__)
class SearchNode:
# Controls exploration of new nodes vs. exploitation of good nodes.
exploration_weight = 1.0
def __init__(self,
golf_state: GolfState,
parent: typing.Self = None,
move_text: str = None,
depth: int = 0):
""" Initialize an instance.
:param golf_state: the board state that this node represents
:param parent: the board state that this node came from
:param move_text: the move to get from parent to this node
:param depth: the number of moves to get to this node from start state
"""
self.state_bytes = golf_state.to_bytes()
self.state_ref = ref(golf_state)
self.parent = parent
self.move_text = move_text
self.depth = depth
self.children: typing.Dict[str, SearchNode] | None = None
self.average_value = 0.0
self.value_count = 0
def __repr__(self):
game_state = GolfState(state_bytes=self.state_bytes)
return f"SearchNode({game_state!r})"
@property
def golf_state(self):
golf_state = self.state_ref()
if golf_state is not None:
return golf_state
# Resurrect state from compressed version
golf_state = GolfState(state_bytes=self.state_bytes)
self.state_ref = ref(golf_state)
return golf_state
def select_leaf(self):
if self.value_count <= 1:
return self
children = self.find_all_children()
if not children:
return self
best_score = float('-inf')
best_child = None
for move_text, child in children.items():
prior = 1/len(children)
score = child.average_value + (self.exploration_weight * prior *
math.sqrt(self.value_count) /
(1 + child.value_count))
if score > best_score:
best_score = score
best_child = child
# if self.depth == 0:
# print(f'Selected child {best_child.move_text} with score {best_score}.')
return best_child.select_leaf()
def find_all_children(self) -> typing.Dict[str, typing.Self]:
if self.children is not None:
return self.children
children: typing.Dict[str, SearchNode] = {}
current_state = self.golf_state
if current_state.is_solved:
return children
for move in current_state.find_moves():
child_state = current_state.move(move)
children[move.uci()] = SearchNode(child_state, self, move.uci(), self.depth + 1)
self.children = children
return children
def record_value(self, value: float):
self.average_value = ((self.average_value * self.value_count + value) /
(self.value_count + 1))
self.value_count += 1
if self.parent:
self.parent.record_value(value)
def evaluate(self, heuristic: Heuristic):
children = self.find_all_children()
if not children:
self.record_value(self.average_value)
return
for move_text, child in children.items():
value = heuristic.analyse(child.golf_state, move_text, child.depth)
child.record_value(value)
def rank_children(self) -> typing.Tuple[typing.List[str],
typing.List[float]]:
move_list = []
weights = []
for move_text, child in self.find_all_children().items():
move_list.append(move_text)
weights.append(child.average_value)
min_weight = min(weights)
if min_weight <= 0:
weights = [weight - min_weight + 1 for weight in weights]
return move_list, weights
class SearchManager:
def __init__(self,
start_state: GolfState,
heuristic: Heuristic,
process_count: int = 1):
self.start_state = start_state
self.heuristic = heuristic
self.current_node = self.reset()
self.process_count = process_count
self.search_count = 0
self.total_iterations = 0
self.total_milliseconds = 0
self.best_solution_node: SearchNode | None = None
def find_node(self, game_state: GolfState):
state_bytes = game_state.to_bytes()
if not game_state == self.current_node.state_bytes:
for child in self.current_node.find_all_children().values():
if state_bytes == child.state_bytes:
self.current_node = child
break
else:
parent = self.current_node.parent
if parent is not None and state_bytes == parent.state_bytes:
self.current_node = parent
else:
self.current_node = SearchNode(game_state)
def search(self,
board: GolfState,
iterations: int | None = None,
milliseconds: int | None = None):
start_time = datetime.now()
self.find_node(board)
best_solution_depth: int | None = None
self.best_solution_node = None
visited = set()
for iteration in count(1):
if iteration % 1000 == 0:
print(f'Visited {len(visited)} nodes.')
leaf = self.current_node.select_leaf()
visited.add(leaf.state_bytes)
if not leaf.find_all_children():
# Found a solution, is it the best so far?
if best_solution_depth is None or leaf.depth < best_solution_depth:
best_solution_depth = leaf.depth
self.best_solution_node = leaf
leaf.evaluate(self.heuristic)
if iterations is not None:
if iteration >= iterations:
break
else:
assert milliseconds is not None
spent_seconds = (datetime.now() - start_time).total_seconds()
if spent_seconds*1000 > milliseconds:
break
def reset(self) -> SearchNode:
self.current_node = SearchNode(self.start_state)
return self.current_node
def get_solution(self) -> typing.List[str]:
node = self.best_solution_node
if node is None:
raise RuntimeError('No solution found.')
moves = []
while node.move_text is not None:
moves.append(node.move_text)
node = node.parent
moves.reverse()
return moves