ALLAN-DIP · wwongkamjan · Mar 1, 2023 · May 9, 2023 · May 15, 2023 · May 18, 2023
diff --git a/README.md b/README.md
@@ -85,11 +85,11 @@ After each pull it's recommended to run `make` to re-compile internal C++ and pr
 module load tacc-singularity
 git clone --recursive /~https://github.com/ALLAN-DIP/diplomacy_cicero.git
 
-cp -r /corral/projects/DARPA-SHADE/Shared/cicero "$WORK"
+cp -r /corral/projects/DARPA-SHADE/Shared/cicero $WORK
 cp /corral/projects/DARPA-SHADE/Shared/UMD/pytorch_model.bin "$WORK"/diplomacy_cicero/fairdiplomacy/AMR/amrlib/amrlib/data/model_parse_xfm/checkpoint-9920/
 export CICERO=$WORK/cicero
 
-cd "$CICERO"
+cd $CICERO
 singularity run  --nv \
   --bind "$WORK"/diplomacy_cicero/fairdiplomacy/agents/:/diplomacy_cicero/fairdiplomacy/agent \
   --bind "$WORK"/diplomacy_cicero/fairdiplomacy_external:/diplomacy_cicero/fairdiplomacy_external \

diff --git a/dipcc/dipcc/cc/data_fields.cc b/dipcc/dipcc/cc/data_fields.cc
@@ -35,6 +35,7 @@ TensorDict new_data_fields(long B, int input_version, long max_seq_len,
   if (include_power) {
     fields["x_power"] = torch::full({B, 7, max_seq_len}, -1, torch::kLong);
   }
+  fields["x_stance_vectors"] = torch::empty({B, 49}, torch::kFloat32);
 
   return fields;
 }

diff --git a/dipcc/dipcc/cc/game.cc b/dipcc/dipcc/cc/game.cc
@@ -91,6 +91,8 @@ void Game::process() {
 GameState &Game::get_state() { return *state_; }
 const GameState &Game::get_state() const { return *state_; }
 
+std::unordered_map<Power, std::unordered_map<Power, float>> Game::get_stance_vectors() { return stance_vectors_history_[state_->get_phase().to_string()]; }
+
 std::unordered_map<Power, std::vector<Loc>> Game::get_orderable_locations() {
   return state_->get_orderable_locations();
 }
@@ -313,6 +315,14 @@ Game::Game(const string &json_str) {
           std::make_shared<const std::unordered_map<Power, std::vector<Order>>>(
               orders_this_phase);
 
+      for (auto &it1 : j_phase["stance_vectors"].items()){
+        Power power1 = power_from_str(it1.key());
+        for (auto &it2 : j_phase["stance_vectors"][it1.key()].items()) {
+          Power power2 = power_from_str(it2.key());
+            stance_vectors_history_[phase_str][power1][power2] = j_phase["stance_vectors"][it1.key()][it2.key()];
+        }
+      }
+
       if (j_phase.find("messages") != j_phase.end()) {
         for (auto &j_msg : j_phase["messages"]) {
           JCHECK(message_history_[phase_str].find(j_msg["time_sent"]) ==

diff --git a/dipcc/dipcc/cc/game.h b/dipcc/dipcc/cc/game.h
@@ -156,6 +156,7 @@ class Game {
   pybind11::dict py_get_state();
   pybind11::dict py_get_orderable_locations();
   std::vector<PhaseData> get_phase_history();
+  std::unordered_map<Power, std::unordered_map<Power, float>> get_stance_vectors();
   PhaseData get_phase_data()
       const; // Deliberately weird - does NOT return staged orders and messages
   PhaseData get_staged_phase_data()
@@ -228,6 +229,7 @@ class Game {
       order_history_;
   std::map<Phase, std::vector<std::shared_ptr<const std::string>>> logs_;
   std::map<Phase, std::map<uint64_t, Message>> message_history_;
+  std::map<Phase, std::unordered_map<Power, std::unordered_map<Power, float>>> stance_vectors_history_;
   int draw_on_stalemate_years_ = -1;
   bool exception_on_convoy_paradox_ = false;
   std::unordered_map<std::string, std::string> metadata_;

diff --git a/dipcc/dipcc/cc/thread_pool.cc b/dipcc/dipcc/cc/thread_pool.cc
@@ -136,6 +136,7 @@ TensorDict ThreadPool::encode_inputs_state_only_multi(vector<Game *> &games,
             fields["x_year_encoded"].index({i}).data_ptr<float>(),
             fields["x_in_adj_phase"].index({i}).data_ptr<float>(),
             fields["x_build_numbers"].index({i}).data_ptr<float>(),
+            fields["x_stance_vectors"].index({i}).data_ptr<float>(),
             fields["x_scoring_system"].index({i}).data_ptr<float>(),
             nullptr, // x_loc_idxs
             nullptr, // x_possible_actions
@@ -168,6 +169,7 @@ TensorDict ThreadPool::encode_inputs_all_powers_multi(vector<Game *> &games,
             fields["x_year_encoded"].index({i}).data_ptr<float>(),
             fields["x_in_adj_phase"].index({i}).data_ptr<float>(),
             fields["x_build_numbers"].index({i}).data_ptr<float>(),
+            fields["x_stance_vectors"].index({i}).data_ptr<float>(),
             fields["x_scoring_system"].index({i}).data_ptr<float>(),
             fields["x_loc_idxs"].index({i}).data_ptr<int8_t>(),
             fields["x_possible_actions"].index({i}).data_ptr<int32_t>(),
@@ -199,6 +201,7 @@ TensorDict ThreadPool::encode_inputs_multi(vector<Game *> &games,
             fields["x_year_encoded"].index({i}).data_ptr<float>(),
             fields["x_in_adj_phase"].index({i}).data_ptr<float>(),
             fields["x_build_numbers"].index({i}).data_ptr<float>(),
+            fields["x_stance_vectors"].index({i}).data_ptr<float>(),
             fields["x_scoring_system"].index({i}).data_ptr<float>(),
             fields["x_loc_idxs"].index({i}).data_ptr<int8_t>(),
             fields["x_possible_actions"].index({i}).data_ptr<int32_t>(),
@@ -371,6 +374,23 @@ void ThreadPool::encode_state_for_game(Game *game, int input_version,
   pointers.x_year_encoded[0] =
       std::clamp(0.1 * (current_phase.year - 1901), 0.0, 5.0);
 
+  // encode x_stance_vectors
+  memset(pointers.x_stance_vectors, 0, 49 * sizeof(float));
+  std::unordered_map<Power, std::unordered_map<Power, float>> stance_map = game->get_stance_vectors();
+  for (const auto& outer_pair : stance_map) {
+    Power power1 = outer_pair.first;
+    int index1 = static_cast<int>(power1) - 1;
+
+    for (const auto& inner_pair : outer_pair.second) {
+        Power power2 = inner_pair.first;
+        int index2 = static_cast<int>(power2) - 1;
+
+        // Map 2D coordinates (index1, index2) to 1D array index
+        int arrayIndex = index1 * 7 + index2;
+        pointers.x_stance_vectors[arrayIndex] = inner_pair.second;
+    }
+  }
+
   // encode x_in_adj_phase, x_build_numbers
   if (current_phase.phase_type == 'A') {
     *pointers.x_in_adj_phase = 1;

diff --git a/dipcc/dipcc/cc/thread_pool.h b/dipcc/dipcc/cc/thread_pool.h
@@ -34,6 +34,7 @@ struct EncodingArrayPointers {
   float *x_year_encoded;
   float *x_in_adj_phase;
   float *x_build_numbers;
+  float *x_stance_vectors;
   float *x_scoring_system;
   int8_t *x_loc_idxs;
   int32_t *x_possible_actions;

diff --git a/dipcc/dipcc/pybind/pybind.cc b/dipcc/dipcc/pybind/pybind.cc
@@ -53,6 +53,7 @@ PYBIND11_MODULE(pydipcc, m) {
            py::return_value_policy::move,
            "Gets the phase data for all past phases, not including the current "
            "staged phase.")
+      .def("get_stance_vectors", &Game::get_stance_vectors)
       .def("get_staged_phase_data", &Game::get_staged_phase_data,
            py::return_value_policy::move,
            "Gets the phase data for the current staged phase that is not "

diff --git a/fairdiplomacy/agents/base_strategy_model_rollouts.py b/fairdiplomacy/agents/base_strategy_model_rollouts.py
@@ -6,17 +6,19 @@
 #
 import logging
 from typing import Callable, Dict, Generic, Iterable, List, Optional, Tuple, TypeVar
-
+import copy
 import logging
 import numpy as np
 import torch
 from typing import List, Tuple
 
 from conf import agents_cfgs
 from fairdiplomacy import pydipcc
+from fairdiplomacy.agents.base_agent import AgentState
 from fairdiplomacy.agents.base_search_agent import n_move_phases_later
 
 from fairdiplomacy.agents.base_strategy_model_wrapper import BaseStrategyModelWrapper
+from fairdiplomacy.agents.stance_utils import predict_stance_vector_from_to,is_neighbor
 from fairdiplomacy.game import sort_phase_key
 from fairdiplomacy.models.consts import POWERS
 from fairdiplomacy.typedefs import (
@@ -132,6 +134,8 @@ def do_rollouts_multi(
         override_max_rollout_length: Optional[int] = None,
         timings=None,
         log_timings=False,
+        agent_state: Optional[AgentState] = None,
+        stance_vector_mode: Optional[str] = 'og',
     ) -> torch.Tensor:
         """Computes actions of state-action pairs for a bunch of value functions.
 
@@ -140,7 +144,18 @@ def do_rollouts_multi(
 
         Returns array of shape [len(set_orders_dicts), num_powers, num_value_functions].
         """
-
+        if stance_vector_mode!= 'og':
+            print(f'checking in do_rollouts_multi if stance_vector_mode: {stance_vector_mode} works')
+            logging.info(f'stance vector mode {stance_vector_mode}')
+            print(f'plus agent_state stance: {agent_state.stance_vector.stance} and opponent: {agent_state.opponent}')
+            logging.info(f'agent_state stance: {agent_state.stance_vector.stance} and opponent: {agent_state.opponent}')
+            st = agent_state.stance_vector
+            st.set_rollout(is_rollout=True)
+            mila_game = agent_state.mila_game
+            opponent = agent_state.opponent
+            print(f'who is opponent? {opponent}')
+            logging.info(f'who is opponent? {opponent}')
+
         all_value_functions = [self.base_strategy_model] + (
             [] if extra_base_strategy_models is None else extra_base_strategy_models
         )
@@ -156,13 +171,15 @@ def do_rollouts_multi(
             games = game_init.clone_n_times(len(set_orders_dicts) * self.average_n_rollouts)
         with timings("setup"):
             game_ids = [game.game_id for game in games]
+            logging.info(f'seting up rollout, len set_orders_dicts = {len(set_orders_dicts)}')
 
             # set orders if specified
             for game, set_orders_dict in zip(
                 games, repeat(set_orders_dicts, self.average_n_rollouts)
             ):
                 for power, orders in set_orders_dict.items():
                     game.set_orders(power, list(orders))
+                    # logging.info(f'set game: {game.game_id} curr phase {game.current_short_phase} order, {power} = {list(orders)}')
 
             # for each game, a list of powers whose orders need to be generated
             # by the model on the first phase.
@@ -199,7 +216,8 @@ def do_rollouts_multi(
             rollout_end_phase_id = sort_phase_key(
                 n_move_phases_later(game_init.current_short_phase, max_rollout_length)
             )
-            max_steps = 1000000
+            # max_steps = 1000000
+            max_steps = 10000
         else:
             # Really far ahead.
             rollout_end_phase_id = sort_phase_key(
@@ -271,18 +289,87 @@ def do_rollouts_multi(
                     top_p=self.top_p,
                     timings=timings,
                 )
-
+                not_align_games = []
+                temp_power_orders = dict()
+                # while not align to stance mode:
                 with timings("env.set_orders"):
-                    assert len(games_to_step) == len(batch_orders)
-                    for game, orders_per_power in zip(games_to_step, batch_orders):
-                        for power, orders in zip(POWERS, orders_per_power):
+                    assert len(games_to_step) == len(batch_orders) == len(_logprobs)
+                    for game, orders_per_power, logprob_power_order in zip(games_to_step, batch_orders, _logprobs):
+                        for power, orders, logprob in zip(POWERS, orders_per_power, logprob_power_order):
                             if step_id == 0 and power not in missing_start_orders[game.game_id]:
                                 continue
-                            game.set_orders(power, list(orders))
+
+                            if stance_vector_mode!= 'og' and opponent==power and is_neighbor(game, agent_power, opponent):
+                                # st.set_rollout_game(game=game)
+                                # if new stance > stance and stance_vector_mode = 'foes' then reorder
+                                # if new stance < stance and stance_vector_mode = 'ally' then reorder
+
+                                # if in this current rollout game us neighbor, let's check if it aligns with st that we want to
+                                curr_stance = st.stance[agent_power][opponent]
+                                new_stance = predict_stance_vector_from_to(game, agent_power, opponent, st, orders)
+                                if (new_stance > curr_stance and stance_vector_mode == 'foes') or (new_stance < curr_stance and stance_vector_mode == 'ally'):
+                                    #if not align, add in not_align_games else set orders
+                                    game.clear_old_all_possible_orders()
+                                    not_align_games.append(game)
+                                    break  
 
+                            game.set_orders(power, list(orders))
+                            # logging.info(f'set game: {game.game_id} curr phase {game.current_short_phase} order, {power} = {list(orders)} with logprob: {logprob}')
+                    logging.info(f'not aligned games: {len(not_align_games)}/{len(games_to_step)}')    
+
+
+                    # reorder
+                    max_reorder = 0
+                    reorder_time =0
+                    while reorder_time < max_reorder and len(not_align_games)>0:
+                        reorder_time+=1
+                        batch_orders, _logprobs = self.base_strategy_model.forward_policy(
+                        not_align_games,
+                        has_press=self.has_press,
+                        agent_power=agent_power,
+                        game_rating_dict=games_to_step_rating_dict,
+                        feature_encoder=self.feature_encoder,
+                        temperature=self.temperature,
+                        top_p=self.top_p,
+                        timings=timings,
+                        )
+                        new_not_align_games = []
+                        for game, orders_per_power, logprob_power_order in zip(not_align_games, batch_orders, _logprobs):
+                            for power, orders, logprob in zip(POWERS, orders_per_power, logprob_power_order):
+                                if step_id == 0 and power not in missing_start_orders[game.game_id]:
+                                    continue
+                                if stance_vector_mode!= 'og' and opponent==power and is_neighbor(game, agent_power, opponent):
+                                    # st.set_rollout_game(game=game)
+                                    # if new stance > stance and stance_vector_mode = 'foes' then reorder
+                                    # if new stance < stance and stance_vector_mode = 'ally' then reorder
+
+                                    # if in this current rollout game us neighbor, let's check if it aligns with st that we want to
+                                    curr_stance = st.stance[agent_power][opponent]
+                                    new_stance = predict_stance_vector_from_to(game, agent_power, opponent, st, orders)
+                                    if (new_stance > curr_stance and stance_vector_mode == 'foes') or (new_stance < curr_stance and stance_vector_mode == 'ally'):
+                                        # if not align, add in not_align_games else set orders
+                                        game.clear_old_all_possible_orders()
+                                        new_not_align_games.append(game)
+                                        break  
+
+                                game.set_orders(power, list(orders))
+                                # logging.info(f'set game: {game.game_id} curr phase {game.current_short_phase} order, {power} = {list(orders)} with logprob: {logprob}')
+                        logging.info(f'not aligned games: {len(not_align_games)}/{len(games_to_step)}')  
+                        not_align_games=  new_not_align_games
+
+                for game in not_align_games:
+                    # everyone doing nothing in a game that is not aligned with our expected st
+                    for power in POWERS:
+                        game.set_orders(power, [])
+
+
             with timings("env.step"):
                 self.feature_encoder.process_multi([game for game in games_to_step])
-
+
+        if stance_vector_mode!= 'og':        
+            st.set_rollout(is_rollout=False)
+            st.set_rollout_game(game=None)
+
         # Shape: [num_games, num_powers, num_value_functions].
         final_scores = torch.zeros((len(games), len(POWERS), len(all_value_functions)))
 

diff --git a/fairdiplomacy/agents/base_strategy_model_wrapper.py b/fairdiplomacy/agents/base_strategy_model_wrapper.py
@@ -34,6 +34,7 @@
 )
 from fairdiplomacy.utils.thread_pool_encoding import FeatureEncoder
 from fairdiplomacy.utils.timing_ctx import DummyCtx, TimingCtx
+import json
 
 
 class BaseStrategyModelWrapper:
@@ -169,6 +170,8 @@ def forward_policy(
                 agent_power=agent_power,
                 game_rating_dict=game_rating_dict,
             )
+        if 'x_stance_vectors' in batch:
+            print(f"test_st: {batch['x_stance_vectors']}")
 
         return self.forward_policy_from_datafields(
             batch,
@@ -843,3 +846,49 @@ def create_conditional_teacher_force_orders(batch: DataFields) -> torch.Tensor:
                 ]  # Loc id (global) -> local loc idx.
                 teacher_force_orders[batch_idx, power_idx, local_loc_id] = order_id
     return teacher_force_orders
+
+def edit_stance(stance_vectors, power_sen, power_rec, stance_value):
+    stance_vectors[power_sen][power_rec] = stance_value
+
+
+def test_action_from_policy(game_path,phase_name, model_type='ft'):
+    with open(game_path) as f:
+        game_string = f.read()
+        game_json = json.loads(game_string)
+
+    phase_json = next((p for p in game_json['phases'] if p['name'] == phase_name), None)
+
+    game = pydipcc.Game.from_json(game_string)
+    rolled_back_game = game.rolled_back_to_phase_start(phase_name)
+    power_sen = 'FRANCE'
+    power_rec = 'GERMANY'
+    force = False
+    if model_type!='ft':
+        model = BaseStrategyModelWrapper(
+                        model_path='/diplomacy_cicero/models/human_imitation_joint_policy.ckpt',
+                    )
+
+    else:
+        model = BaseStrategyModelWrapper(
+                model_path='/diplomacy_cicero/models/human_imitation_joint_policy_stance_5000_04_1.ckpt',
+            )
+        if force:
+            stance_vectors = phase_json['stance_vectors']
+            print(f'stance_vectors_before: {power_sen}->{power_rec}: {stance_vectors[power_sen][power_rec]}')
+            print(f'stance_vectors_before: {power_rec}->{power_sen}: {stance_vectors[power_rec][power_sen]}')
+            stance_vectors[power_sen][power_rec] = 1.0
+            stance_vectors[power_rec][power_sen] = 1.0
+            print(f'stance_vectors_after: {power_sen}->{power_rec}: {stance_vectors[power_sen][power_rec]}')
+            print(f'stance_vectors_after: {power_rec}->{power_sen}: {stance_vectors[power_rec][power_sen]}')
+            game_string = json.dumps(game_json, indent=4)
+            game = pydipcc.Game.from_json(game_string)
+            rolled_back_game = game.rolled_back_to_phase_start(phase_name)
+
+    action, actionprob = model.forward_policy([rolled_back_game], has_press=False, agent_power=power_sen,temperature=0.2,top_p=0.9)
+    print(action)
+    print(torch.exp(actionprob))
+
+game_path = '/data/games_stance/game_111.json'
+phase = 'S1902M'
+# test_action_from_policy(game_path, phase, model_type='sl')
+test_action_from_policy(game_path, phase, model_type='ft')