Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update tracking deception to output in TACC log #18

Open
wants to merge 56 commits into
base: old-main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
56 commits
Select commit Hold shift + click to select a range
3aa5a9e
Update README.md
wwongkamjan Mar 1, 2023
b3676cc
fix two engines phase inconsistency
May 9, 2023
9832e21
fix arg for dipcc R phase
May 15, 2023
f8668a2
math.ceil
May 18, 2023
988177d
silent cicero
Jun 4, 2023
cf3368e
set wait and end phase for human games
Jul 11, 2023
9439d75
fairdiplomacy_external/mila_api.py
Jul 18, 2023
a07831f
update daide2eng
Jul 19, 2023
0e6a1a8
stop comm using remaining time
Aug 1, 2023
0e59aff
fix calling remaining time
Aug 3, 2023
2d42d7f
update should_stop
Aug 3, 2023
65eddb8
presubmit order
Aug 4, 2023
d98070d
fix deadline=0 case
Aug 7, 2023
507b095
fix inconsistent in phase when calling game.schedule
Aug 8, 2023
034d06e
track time_sent of prev message
Sep 20, 2023
587694c
remove internal response
Sep 28, 2023
73a0e87
Update requirements.txt
wwongkamjan Sep 28, 2023
9aecdfe
remove daide2eng for human games
Sep 29, 2023
03bd537
get back daide2eng
Sep 29, 2023
5687751
add cicero intent experiment
Oct 20, 2023
493b155
fix update press: most recent time
Oct 23, 2023
3962137
update: test intent and test value table
Oct 24, 2023
a2424ea
add human AI game
Oct 24, 2023
a5c76aa
fix json error
Oct 25, 2023
ec520b8
update time to submit
Oct 29, 2023
d7ff6d8
add greedy PO
Oct 29, 2023
4c76344
remove message printing in two files
Oct 30, 2023
d06b778
update assert deadline
Oct 30, 2023
5254651
update game view of dipcc
Oct 30, 2023
ed3d547
update msg sleeptime 10 and mila->dipcc with timestamp now
Nov 3, 2023
f6f7a99
add val to them and lie po to deceptive agent
Nov 5, 2023
0cd33cb
update deceptive intent, bug fixes
Nov 10, 2023
b6bca77
searchbot change their po
Nov 17, 2023
b73c406
add stance vector to decide when to lie
Dec 23, 2023
43379ae
force key, value in game features for stance vector error
Dec 30, 2023
207b3af
improve log stance vector
Jan 2, 2024
0233b34
update deceptive cicero
Feb 21, 2024
608e0bb
update persuasion in deception
Mar 2, 2024
826af0b
update obj func and test lie from human games
Mar 15, 2024
c47be2c
find joint action in deceptive PO and BR
Apr 7, 2024
a970b00
trainsl and some lie results
Oct 4, 2024
9d94d77
remove print in c++
Oct 4, 2024
2066353
add get_stance_vectors in game c++ header
Oct 4, 2024
ca4eebd
debug cc++
Oct 4, 2024
aa4a0e2
debug cc++
Oct 4, 2024
7324f22
debug cc++
Oct 4, 2024
1d4a79e
debug cc++
Oct 4, 2024
95680b7
debug cc++
Oct 4, 2024
e243611
debug cc++
Oct 4, 2024
26782bc
debug cc++
Oct 4, 2024
02352df
debug cc++
Oct 4, 2024
8e01fcf
debug cc++
Oct 4, 2024
04732ca
debug cc++
Oct 4, 2024
102497c
debug cc++
Oct 4, 2024
d492481
update dataset with stance, stance_action_prob, stance_loss, action_test
Nov 5, 2024
53e3421
debug and unit test for dataset.py
Dec 2, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,11 +85,11 @@ After each pull it's recommended to run `make` to re-compile internal C++ and pr
module load tacc-singularity
git clone --recursive /~https://github.com/ALLAN-DIP/diplomacy_cicero.git

cp -r /corral/projects/DARPA-SHADE/Shared/cicero "$WORK"
cp -r /corral/projects/DARPA-SHADE/Shared/cicero $WORK
cp /corral/projects/DARPA-SHADE/Shared/UMD/pytorch_model.bin "$WORK"/diplomacy_cicero/fairdiplomacy/AMR/amrlib/amrlib/data/model_parse_xfm/checkpoint-9920/
export CICERO=$WORK/cicero

cd "$CICERO"
cd $CICERO
singularity run --nv \
--bind "$WORK"/diplomacy_cicero/fairdiplomacy/agents/:/diplomacy_cicero/fairdiplomacy/agent \
--bind "$WORK"/diplomacy_cicero/fairdiplomacy_external:/diplomacy_cicero/fairdiplomacy_external \
Expand Down
1 change: 1 addition & 0 deletions dipcc/dipcc/cc/data_fields.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ TensorDict new_data_fields(long B, int input_version, long max_seq_len,
if (include_power) {
fields["x_power"] = torch::full({B, 7, max_seq_len}, -1, torch::kLong);
}
fields["x_stance_vectors"] = torch::empty({B, 49}, torch::kFloat32);

return fields;
}
Expand Down
10 changes: 10 additions & 0 deletions dipcc/dipcc/cc/game.cc
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ void Game::process() {
GameState &Game::get_state() { return *state_; }
const GameState &Game::get_state() const { return *state_; }

std::unordered_map<Power, std::unordered_map<Power, float>> Game::get_stance_vectors() { return stance_vectors_history_[state_->get_phase().to_string()]; }

std::unordered_map<Power, std::vector<Loc>> Game::get_orderable_locations() {
return state_->get_orderable_locations();
}
Expand Down Expand Up @@ -313,6 +315,14 @@ Game::Game(const string &json_str) {
std::make_shared<const std::unordered_map<Power, std::vector<Order>>>(
orders_this_phase);

for (auto &it1 : j_phase["stance_vectors"].items()){
Power power1 = power_from_str(it1.key());
for (auto &it2 : j_phase["stance_vectors"][it1.key()].items()) {
Power power2 = power_from_str(it2.key());
stance_vectors_history_[phase_str][power1][power2] = j_phase["stance_vectors"][it1.key()][it2.key()];
}
}

if (j_phase.find("messages") != j_phase.end()) {
for (auto &j_msg : j_phase["messages"]) {
JCHECK(message_history_[phase_str].find(j_msg["time_sent"]) ==
Expand Down
2 changes: 2 additions & 0 deletions dipcc/dipcc/cc/game.h
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,7 @@ class Game {
pybind11::dict py_get_state();
pybind11::dict py_get_orderable_locations();
std::vector<PhaseData> get_phase_history();
std::unordered_map<Power, std::unordered_map<Power, float>> get_stance_vectors();
PhaseData get_phase_data()
const; // Deliberately weird - does NOT return staged orders and messages
PhaseData get_staged_phase_data()
Expand Down Expand Up @@ -228,6 +229,7 @@ class Game {
order_history_;
std::map<Phase, std::vector<std::shared_ptr<const std::string>>> logs_;
std::map<Phase, std::map<uint64_t, Message>> message_history_;
std::map<Phase, std::unordered_map<Power, std::unordered_map<Power, float>>> stance_vectors_history_;
int draw_on_stalemate_years_ = -1;
bool exception_on_convoy_paradox_ = false;
std::unordered_map<std::string, std::string> metadata_;
Expand Down
20 changes: 20 additions & 0 deletions dipcc/dipcc/cc/thread_pool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ TensorDict ThreadPool::encode_inputs_state_only_multi(vector<Game *> &games,
fields["x_year_encoded"].index({i}).data_ptr<float>(),
fields["x_in_adj_phase"].index({i}).data_ptr<float>(),
fields["x_build_numbers"].index({i}).data_ptr<float>(),
fields["x_stance_vectors"].index({i}).data_ptr<float>(),
fields["x_scoring_system"].index({i}).data_ptr<float>(),
nullptr, // x_loc_idxs
nullptr, // x_possible_actions
Expand Down Expand Up @@ -168,6 +169,7 @@ TensorDict ThreadPool::encode_inputs_all_powers_multi(vector<Game *> &games,
fields["x_year_encoded"].index({i}).data_ptr<float>(),
fields["x_in_adj_phase"].index({i}).data_ptr<float>(),
fields["x_build_numbers"].index({i}).data_ptr<float>(),
fields["x_stance_vectors"].index({i}).data_ptr<float>(),
fields["x_scoring_system"].index({i}).data_ptr<float>(),
fields["x_loc_idxs"].index({i}).data_ptr<int8_t>(),
fields["x_possible_actions"].index({i}).data_ptr<int32_t>(),
Expand Down Expand Up @@ -199,6 +201,7 @@ TensorDict ThreadPool::encode_inputs_multi(vector<Game *> &games,
fields["x_year_encoded"].index({i}).data_ptr<float>(),
fields["x_in_adj_phase"].index({i}).data_ptr<float>(),
fields["x_build_numbers"].index({i}).data_ptr<float>(),
fields["x_stance_vectors"].index({i}).data_ptr<float>(),
fields["x_scoring_system"].index({i}).data_ptr<float>(),
fields["x_loc_idxs"].index({i}).data_ptr<int8_t>(),
fields["x_possible_actions"].index({i}).data_ptr<int32_t>(),
Expand Down Expand Up @@ -371,6 +374,23 @@ void ThreadPool::encode_state_for_game(Game *game, int input_version,
pointers.x_year_encoded[0] =
std::clamp(0.1 * (current_phase.year - 1901), 0.0, 5.0);

// encode x_stance_vectors
memset(pointers.x_stance_vectors, 0, 49 * sizeof(float));
std::unordered_map<Power, std::unordered_map<Power, float>> stance_map = game->get_stance_vectors();
for (const auto& outer_pair : stance_map) {
Power power1 = outer_pair.first;
int index1 = static_cast<int>(power1) - 1;

for (const auto& inner_pair : outer_pair.second) {
Power power2 = inner_pair.first;
int index2 = static_cast<int>(power2) - 1;

// Map 2D coordinates (index1, index2) to 1D array index
int arrayIndex = index1 * 7 + index2;
pointers.x_stance_vectors[arrayIndex] = inner_pair.second;
}
}

// encode x_in_adj_phase, x_build_numbers
if (current_phase.phase_type == 'A') {
*pointers.x_in_adj_phase = 1;
Expand Down
1 change: 1 addition & 0 deletions dipcc/dipcc/cc/thread_pool.h
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ struct EncodingArrayPointers {
float *x_year_encoded;
float *x_in_adj_phase;
float *x_build_numbers;
float *x_stance_vectors;
float *x_scoring_system;
int8_t *x_loc_idxs;
int32_t *x_possible_actions;
Expand Down
1 change: 1 addition & 0 deletions dipcc/dipcc/pybind/pybind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ PYBIND11_MODULE(pydipcc, m) {
py::return_value_policy::move,
"Gets the phase data for all past phases, not including the current "
"staged phase.")
.def("get_stance_vectors", &Game::get_stance_vectors)
.def("get_staged_phase_data", &Game::get_staged_phase_data,
py::return_value_policy::move,
"Gets the phase data for the current staged phase that is not "
Expand Down
105 changes: 96 additions & 9 deletions fairdiplomacy/agents/base_strategy_model_rollouts.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,19 @@
#
import logging
from typing import Callable, Dict, Generic, Iterable, List, Optional, Tuple, TypeVar

import copy
import logging
import numpy as np
import torch
from typing import List, Tuple

from conf import agents_cfgs
from fairdiplomacy import pydipcc
from fairdiplomacy.agents.base_agent import AgentState
from fairdiplomacy.agents.base_search_agent import n_move_phases_later

from fairdiplomacy.agents.base_strategy_model_wrapper import BaseStrategyModelWrapper
from fairdiplomacy.agents.stance_utils import predict_stance_vector_from_to,is_neighbor
from fairdiplomacy.game import sort_phase_key
from fairdiplomacy.models.consts import POWERS
from fairdiplomacy.typedefs import (
Expand Down Expand Up @@ -132,6 +134,8 @@ def do_rollouts_multi(
override_max_rollout_length: Optional[int] = None,
timings=None,
log_timings=False,
agent_state: Optional[AgentState] = None,
stance_vector_mode: Optional[str] = 'og',
) -> torch.Tensor:
"""Computes actions of state-action pairs for a bunch of value functions.

Expand All @@ -140,7 +144,18 @@ def do_rollouts_multi(

Returns array of shape [len(set_orders_dicts), num_powers, num_value_functions].
"""

if stance_vector_mode!= 'og':
print(f'checking in do_rollouts_multi if stance_vector_mode: {stance_vector_mode} works')
logging.info(f'stance vector mode {stance_vector_mode}')
print(f'plus agent_state stance: {agent_state.stance_vector.stance} and opponent: {agent_state.opponent}')
logging.info(f'agent_state stance: {agent_state.stance_vector.stance} and opponent: {agent_state.opponent}')
st = agent_state.stance_vector
st.set_rollout(is_rollout=True)
mila_game = agent_state.mila_game
opponent = agent_state.opponent
print(f'who is opponent? {opponent}')
logging.info(f'who is opponent? {opponent}')

all_value_functions = [self.base_strategy_model] + (
[] if extra_base_strategy_models is None else extra_base_strategy_models
)
Expand All @@ -156,13 +171,15 @@ def do_rollouts_multi(
games = game_init.clone_n_times(len(set_orders_dicts) * self.average_n_rollouts)
with timings("setup"):
game_ids = [game.game_id for game in games]
logging.info(f'seting up rollout, len set_orders_dicts = {len(set_orders_dicts)}')

# set orders if specified
for game, set_orders_dict in zip(
games, repeat(set_orders_dicts, self.average_n_rollouts)
):
for power, orders in set_orders_dict.items():
game.set_orders(power, list(orders))
# logging.info(f'set game: {game.game_id} curr phase {game.current_short_phase} order, {power} = {list(orders)}')

# for each game, a list of powers whose orders need to be generated
# by the model on the first phase.
Expand Down Expand Up @@ -199,7 +216,8 @@ def do_rollouts_multi(
rollout_end_phase_id = sort_phase_key(
n_move_phases_later(game_init.current_short_phase, max_rollout_length)
)
max_steps = 1000000
# max_steps = 1000000
max_steps = 10000
else:
# Really far ahead.
rollout_end_phase_id = sort_phase_key(
Expand Down Expand Up @@ -271,18 +289,87 @@ def do_rollouts_multi(
top_p=self.top_p,
timings=timings,
)

not_align_games = []
temp_power_orders = dict()
# while not align to stance mode:
with timings("env.set_orders"):
assert len(games_to_step) == len(batch_orders)
for game, orders_per_power in zip(games_to_step, batch_orders):
for power, orders in zip(POWERS, orders_per_power):
assert len(games_to_step) == len(batch_orders) == len(_logprobs)
for game, orders_per_power, logprob_power_order in zip(games_to_step, batch_orders, _logprobs):
for power, orders, logprob in zip(POWERS, orders_per_power, logprob_power_order):
if step_id == 0 and power not in missing_start_orders[game.game_id]:
continue
game.set_orders(power, list(orders))

if stance_vector_mode!= 'og' and opponent==power and is_neighbor(game, agent_power, opponent):
# st.set_rollout_game(game=game)
# if new stance > stance and stance_vector_mode = 'foes' then reorder
# if new stance < stance and stance_vector_mode = 'ally' then reorder

# if in this current rollout game us neighbor, let's check if it aligns with st that we want to
curr_stance = st.stance[agent_power][opponent]
new_stance = predict_stance_vector_from_to(game, agent_power, opponent, st, orders)
if (new_stance > curr_stance and stance_vector_mode == 'foes') or (new_stance < curr_stance and stance_vector_mode == 'ally'):
#if not align, add in not_align_games else set orders
game.clear_old_all_possible_orders()
not_align_games.append(game)
break

game.set_orders(power, list(orders))
# logging.info(f'set game: {game.game_id} curr phase {game.current_short_phase} order, {power} = {list(orders)} with logprob: {logprob}')
logging.info(f'not aligned games: {len(not_align_games)}/{len(games_to_step)}')


# reorder
max_reorder = 0
reorder_time =0
while reorder_time < max_reorder and len(not_align_games)>0:
reorder_time+=1
batch_orders, _logprobs = self.base_strategy_model.forward_policy(
not_align_games,
has_press=self.has_press,
agent_power=agent_power,
game_rating_dict=games_to_step_rating_dict,
feature_encoder=self.feature_encoder,
temperature=self.temperature,
top_p=self.top_p,
timings=timings,
)
new_not_align_games = []
for game, orders_per_power, logprob_power_order in zip(not_align_games, batch_orders, _logprobs):
for power, orders, logprob in zip(POWERS, orders_per_power, logprob_power_order):
if step_id == 0 and power not in missing_start_orders[game.game_id]:
continue
if stance_vector_mode!= 'og' and opponent==power and is_neighbor(game, agent_power, opponent):
# st.set_rollout_game(game=game)
# if new stance > stance and stance_vector_mode = 'foes' then reorder
# if new stance < stance and stance_vector_mode = 'ally' then reorder

# if in this current rollout game us neighbor, let's check if it aligns with st that we want to
curr_stance = st.stance[agent_power][opponent]
new_stance = predict_stance_vector_from_to(game, agent_power, opponent, st, orders)
if (new_stance > curr_stance and stance_vector_mode == 'foes') or (new_stance < curr_stance and stance_vector_mode == 'ally'):
# if not align, add in not_align_games else set orders
game.clear_old_all_possible_orders()
new_not_align_games.append(game)
break

game.set_orders(power, list(orders))
# logging.info(f'set game: {game.game_id} curr phase {game.current_short_phase} order, {power} = {list(orders)} with logprob: {logprob}')
logging.info(f'not aligned games: {len(not_align_games)}/{len(games_to_step)}')
not_align_games= new_not_align_games

for game in not_align_games:
# everyone doing nothing in a game that is not aligned with our expected st
for power in POWERS:
game.set_orders(power, [])


with timings("env.step"):
self.feature_encoder.process_multi([game for game in games_to_step])


if stance_vector_mode!= 'og':
st.set_rollout(is_rollout=False)
st.set_rollout_game(game=None)

# Shape: [num_games, num_powers, num_value_functions].
final_scores = torch.zeros((len(games), len(POWERS), len(all_value_functions)))

Expand Down
49 changes: 49 additions & 0 deletions fairdiplomacy/agents/base_strategy_model_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
)
from fairdiplomacy.utils.thread_pool_encoding import FeatureEncoder
from fairdiplomacy.utils.timing_ctx import DummyCtx, TimingCtx
import json


class BaseStrategyModelWrapper:
Expand Down Expand Up @@ -169,6 +170,8 @@ def forward_policy(
agent_power=agent_power,
game_rating_dict=game_rating_dict,
)
if 'x_stance_vectors' in batch:
print(f"test_st: {batch['x_stance_vectors']}")

return self.forward_policy_from_datafields(
batch,
Expand Down Expand Up @@ -843,3 +846,49 @@ def create_conditional_teacher_force_orders(batch: DataFields) -> torch.Tensor:
] # Loc id (global) -> local loc idx.
teacher_force_orders[batch_idx, power_idx, local_loc_id] = order_id
return teacher_force_orders

def edit_stance(stance_vectors, power_sen, power_rec, stance_value):
stance_vectors[power_sen][power_rec] = stance_value


def test_action_from_policy(game_path,phase_name, model_type='ft'):
with open(game_path) as f:
game_string = f.read()
game_json = json.loads(game_string)

phase_json = next((p for p in game_json['phases'] if p['name'] == phase_name), None)

game = pydipcc.Game.from_json(game_string)
rolled_back_game = game.rolled_back_to_phase_start(phase_name)
power_sen = 'FRANCE'
power_rec = 'GERMANY'
force = False
if model_type!='ft':
model = BaseStrategyModelWrapper(
model_path='/diplomacy_cicero/models/human_imitation_joint_policy.ckpt',
)

else:
model = BaseStrategyModelWrapper(
model_path='/diplomacy_cicero/models/human_imitation_joint_policy_stance_5000_04_1.ckpt',
)
if force:
stance_vectors = phase_json['stance_vectors']
print(f'stance_vectors_before: {power_sen}->{power_rec}: {stance_vectors[power_sen][power_rec]}')
print(f'stance_vectors_before: {power_rec}->{power_sen}: {stance_vectors[power_rec][power_sen]}')
stance_vectors[power_sen][power_rec] = 1.0
stance_vectors[power_rec][power_sen] = 1.0
print(f'stance_vectors_after: {power_sen}->{power_rec}: {stance_vectors[power_sen][power_rec]}')
print(f'stance_vectors_after: {power_rec}->{power_sen}: {stance_vectors[power_rec][power_sen]}')
game_string = json.dumps(game_json, indent=4)
game = pydipcc.Game.from_json(game_string)
rolled_back_game = game.rolled_back_to_phase_start(phase_name)

action, actionprob = model.forward_policy([rolled_back_game], has_press=False, agent_power=power_sen,temperature=0.2,top_p=0.9)
print(action)
print(torch.exp(actionprob))

game_path = '/data/games_stance/game_111.json'
phase = 'S1902M'
# test_action_from_policy(game_path, phase, model_type='sl')
test_action_from_policy(game_path, phase, model_type='ft')
Loading