Axelrod-Python · marcharper · Jan 30, 2019 · Dec 31, 2018 · Jan 3, 2019 · Jan 4, 2019
diff --git a/axelrod/strategies/finite_state_machines.py b/axelrod/strategies/finite_state_machines.py
@@ -1,9 +1,348 @@
 from axelrod.action import Action
 from axelrod.player import Player
+from collections import defaultdict, namedtuple
 
 C, D = Action.C, Action.D
 
 
+def get_accessible_transitions(transitions: dict, initial_state: int) -> dict:
+  """Gets all transitions from the list that can be reached from the
+  initial_state.
+  """
+  edge_dict = defaultdict(list)
+  visited = dict()
+  for k, v in transitions.items():
+      visited[k[0]] = False
+      edge_dict[k[0]].append(v[0])
+  accessible_edges = [initial_state]
+
+  edge_queue = [initial_state]
+  visited[initial_state] = True
+  while len(edge_queue) > 0:
+      edge = edge_queue.pop()
+      for next_edge in edge_dict[edge]:
+          if not visited[next_edge]:
+              visited[next_edge] = True
+              edge_queue.append(next_edge)
+              accessible_edges.append(next_edge)
+
+  accessible_transitions = dict()
+  for k, v in transitions.items():
+      if k[0] in accessible_edges:
+          accessible_transitions[k] = v
+
+  return accessible_transitions
+
+
+def get_memory_from_transitions(transitions: dict, initial_state: int = None,
+                                print_trace: bool = False,
+                                print_output: bool = False) -> int:
+    """This function calculates the memory of an FSM from the transitions.
+
+    Assume that transitions are a dict with entries like
+    (state, last_opponent_action): (next_state, next_action)
+
+    We look at all the next_actions for all the transitions.  If these aren't
+    all the same, then we attach 1 turn worth of memory (this strategy's
+    previous action and the opponent's previous action).  We can get the
+    opponent's previous strategy from the transition, but to get this strategy's
+    previous action, we need to consider all incoming transitions into the
+    current state.  [We call this walking backwards through the graph along the
+    path given by the incoming transition.]  There may be zero or one or
+    multiple incoming transitions into each state, creating multiple paths we
+    could walk along.  We call these branches, and keep track of all branches.
+
+    Along each branch there is a chain of actions.  After 1 step, it may be CC,
+    CD, DC, or DD.  [However we write "CC" like "_/C, C/_" though to establish
+    that letters to the left of the "/" are the opponent's moves, while the
+    letters to the right are this strategy's moves.]  For each chain of actions,
+    we gather the branches that match that chain.  If all these branches have
+    the same next_action, then we know what to do following that chain of
+    actions.  In that case we call these branches decided.  With undecided
+    branches, we continue to walk back.  We repeat until all branches are
+    decided.  The number of steps that this takes is the memory of the FSM.
+
+    If however, there are still undecided branches after E*(E-1) steps (where E
+    is the number of transitions), then the memory must be infinite.  This is
+    shown elsewhere.
+
+
+    As an example, we show how the Fortress3 (defined below) strategy would
+    work.
+
+    Fortress3 is given by the transitions:
+    transitions = (
+    (1, C, 1, D),
+    (1, D, 2, D),
+    (2, C, 1, D),
+    (2, D, 3, C),
+    (3, C, 3, C),
+    (3, D, 1, D),
+    )
+
+    In the first step, we just check transitions' next-actions.
+    We list transtions as state:prev_opponent_action;next_action:
+    1:C;D | Back-trace: _ | On-state: 1
+    1:D;D | Back-trace: _ | On-state: 1
+    2:C;D | Back-trace: _ | On-state: 2
+    2:D;C | Back-trace: _ | On-state: 2
+    3:C;C | Back-trace: _ | On-state: 3
+    3:D;D | Back-trace: _ | On-state: 3
+
+    In the second step, we walk backwards along incoming transitions.
+    We continue to label each branch by its ending
+    state:previous_opponent_action;next_action, but we list the state that
+    we're on at this point in time:
+    1:C;D | Back-trace: _/D, C/_ | On-state: 1
+    1:C;D | Back-trace: _/D, C/_ | On-state: 2
+    1:C;D | Back-trace: _/D, C/_ | On-state: 3
+    2:C;D | Back-trace: _/D, C/_ | On-state: 1
+    1:D;D | Back-trace: _/D, D/_ | On-state: 1
+    1:D;D | Back-trace: _/D, D/_ | On-state: 2
+    1:D;D | Back-trace: _/D, D/_ | On-state: 3
+    2:D;C | Back-trace: _/D, D/_ | On-state: 1
+    3:C;C | Back-trace: _/C, C/_ | On-state: 2
+    3:C;C | Back-trace: _/C, C/_ | On-state: 3
+    3:D;D | Back-trace: _/C, D/_ | On-state: 2
+    3:D;D | Back-trace: _/C, D/_ | On-state: 3
+
+    From here we can conclude that:
+    If _/D, C/_, then D
+    If _/C, C/_, then C
+    If _/C, D/_, then D
+
+    We remove the branches that correspond to those action chains.  But we
+    continue to walk back the _/D, D/_ branches:
+    1:D;D | Back-trace: _/D, C/D, D/_ | On-state: 1
+    1:D;D | Back-trace: _/D, C/D, D/_ | On-state: 2
+    1:D;D | Back-trace: _/D, C/D, D/_ | On-state: 3
+    1:D;D | Back-trace: _/D, C/D, D/_ | On-state: 1
+    1:D;D | Back-trace: _/C, D/D, D/_ | On-state: 2
+    1:D;D | Back-trace: _/C, D/D, D/_ | On-state: 3
+    2:D;C | Back-trace: _/D, D/D, D/_ | On-state: 1
+    2:D;C | Back-trace: _/D, D/D, D/_ | On-state: 2
+    2:D;C | Back-trace: _/D, D/D, D/_ | On-state: 3
+
+    From here we conclude that:
+    If _/D, C/D, D/_, then D
+    If _/C, D/D, D/_, then D
+    If _/D, D/D, D/_, then C
+
+    There are no more undecided branches, so we stop and say that the memory
+    is 2.
+    """
+    # If initial_state is set, use this to determine which transitions are
+    # reachable from the initial_state and restrict to those.
+    if initial_state is not None:
+        transitions = get_accessible_transitions(transitions, initial_state)
+
+    # First make a back_transitions dict from transitions.  This is keyed on
+    # states, and a list of "BackTrans" (one for each transition incoming to
+    # that state) as values.
+    back_transitions = defaultdict(list)
+    # A "BackTrans" has the previous state and previous action/reaction pair.
+    BackTrans = namedtuple("BackTrans", ["prev_state", "prev_reaction",
+                                         "prev_opp_action"])
+    for k, v in transitions.items():
+        state = k[0]
+        last_opponent_action = k[1]
+        next_state = v[0]
+        next_action = v[1]
+
+        back_transitions[next_state].append(BackTrans(state,
+                                                      next_action,
+                                                      last_opponent_action))
+
+    class ActionChain(object):
+        """A list of actions.  Made a class so that we can hash."""
+        def __init__(self, initial_list: dict = None) -> None:
+            if initial_list is None:
+                initial_list = list()
+            self.actions = initial_list[:]
+
+        def __eq__(self, other) -> bool:
+            return self.actions == other.actions
+
+        def __repr__(self) -> None:
+            """
+            This is a way to represent a memory of a certain length.  We
+            represent history as a opponent_action/this_player_reaction
+            seperated by commas, with the most recent pair listed last.
+
+            Because knowing the left half of the _/_ action-reaction requires
+            more memory than knowing the right half, we will have a blank on the
+            oldest pair.
+            """
+            if len(self.actions) == 0:
+                return "_"
+
+            # The first action on the list will be the opponent's previous
+            # action.  We don't know yet how we will respond, so we leave a
+            # blank (_).
+            action_str = "{}/_".format(self.actions[0])
+            # Then we go backwards.  The next actions on the list are the
+            # opponent's previous actions, our previous actions, alternatively.
+            i = 1
+            while i < len(self.actions)-2:
+                action_str = "{}/{}, {}".format(self.actions[i+1],
+                                                self.actions[i], action_str)
+                i += 2
+            # The oldest action we'll have will be our response to an unknown
+            # opponent action.
+            action_str = "_/{}, {}".format(self.actions[-1], action_str)
+
+            return action_str
+
+        def __hash__(self) -> None:
+            return hash(repr(self))
+
+        def append(self, action: Action) -> None:
+            self.actions.append(action)
+
+    class Branch(object):
+        """A chain of previous actions.  With other information captured, like
+        state, so that we can continue to walk backwards.
+        """
+        def __init__(self, trans: tuple = None) -> None:
+            if trans is None:
+                return
+
+            state = trans[0]
+            last_opponent_action = trans[1]
+            next_state = trans[2]
+            next_action = trans[3]
+
+            self.num_moves_recorded = 0
+            self.action_chain = ActionChain([])
+            self.next_action = next_action
+            self.on_state = state
+            # The information that we have available at any step will be half of
+            # next step's history.  So we keep this in a buffer.
+            self.buffer = last_opponent_action
+
+            # For debugging
+            self.initial_trans = "{}:{}".format(state, last_opponent_action)
+
+        def step(self, backtrans: BackTrans):
+            """Continues to walk (or branch) backwards from where the branch
+            leaves off, given a path (backtrans) to walk backwards along.  This
+            will return a Branch instance.
+            """
+            new_branch = Branch()
+            new_branch.num_moves_recorded = self.num_moves_recorded + 1
+            new_branch.action_chain = ActionChain(self.action_chain.actions)
+            new_branch.action_chain.append(self.buffer)
+            new_branch.action_chain.append(backtrans.prev_reaction)
+            new_branch.next_action = self.next_action
+            new_branch.on_state = backtrans.prev_state
+            # This needs one more memory to know.
+            new_branch.buffer = backtrans.prev_opp_action
+
+            new_branch.initial_trans = self.initial_trans
+
+            return new_branch
+
+        def debug_str(self) -> str:
+            return "{};{} | Back-trace: {} | On-state: {}".format(
+                    self.initial_trans, self.next_action,
+                    repr(self.action_chain), self.on_state)
+
+    BranchList = namedtuple("BranchList", ("branch_list", "next_actions"))
+
+    class BranchPool(object):
+        """We keep branches in the branch_pool, grouped by common-end
+        ActionChains.  A common-end ActionChain is a chain of N actions
+        occurring most-recently that is common to all branches in the group.
+        Specifically branch_pool is a dict with keys given by common-end
+        ActionChains, and with dict-values given by a list of branches and the
+        set of possible next_actions for these branches.
+
+        The set of possible next_actions is the set of actions that this FSM may
+        choose to do following the chain of actions given in the key.  When
+        there is a single action, we know that the strategy will make that
+        action; we call the branches with that chain of actions "decided" at
+        this point.
+        """
+        def __init__(self) -> None:
+            self.clear()
+
+        def push(self, branch: Branch) -> None:
+            """Just adds a branch to the branch_pool."""
+            common_branches = self.branch_pool[branch.action_chain]
+            common_branches.next_actions.add(branch.next_action)
+            common_branches.branch_list.append(branch)
+
+        def branches(self) -> Branch:
+            """An iterator that loops through all the branches in the
+            branch_pool.
+            """
+            for k, v in self.branch_pool.items():
+                for branch in v.branch_list:
+                    yield branch
+
+        def clear(self) -> None:
+            """Empty the branch_pool."""
+            self.branch_pool = defaultdict(lambda: BranchList(list(), set()))
+
+        def remove_decided_branches(self) -> dict:
+            """We call a branch "decided" if all branches with that common-end
+            (end of ActionChain) give the same next_action.  This function
+            removes those from the branch_pool, and returns these as a dict
+            keyed by common-end ActionChain, and with dict-values given by the
+            common next_action.
+            """
+            decided_branches = dict()
+            for k, v in self.branch_pool.items():
+                if len(v.next_actions) == 1:
+                  decided_branches[k] = list(v.next_actions)[0]
+            for k in decided_branches.keys():
+                del self.branch_pool[k]
+            return decided_branches
+
+        def __bool__(self):
+            return len(self.branch_pool) > 0
+
+    # Set up variables
+    num_edges = len(transitions)
+    waiting, processed = BranchPool(), BranchPool()
+    if print_trace:
+        print("STEP 0")
+        print("===============")
+    for k, v in transitions.items():
+        trans_branch = Branch((k[0], k[1], v[0], v[1]))
+        processed.push(trans_branch)
+        if print_trace:
+            print(trans_branch.debug_str())
+    processed.remove_decided_branches()
+
+    steps = 0
+    while processed:
+        steps += 1
+        if print_trace:
+            print("STEP {}".format(steps))
+            print("===============")
+        if steps > num_edges*(num_edges-1):
+            return float("inf")
+        # Move processed to waiting
+        for branch in processed.branches():
+            waiting.push(branch)
+        processed.clear()
+        # Now process the waiting list.
+        for branch in waiting.branches():
+            for backtrans in back_transitions[branch.on_state]:
+                processed.push(branch.step(backtrans))
+        if print_trace:
+          for branch in processed.branches():
+                print(branch.debug_str())
+        waiting.clear()
+        # And remove decided branches.
+        decided_branches = processed.remove_decided_branches()
+        if print_output:
+            for k, v in decided_branches.items():
+                print("If {}, then {}".format(k, v))
+    return steps
+
+
 class SimpleFSM(object):
     """Simple implementation of a finite state machine that transitions
     between states based on the last round of play.