From 01de65fc6a384805715edc80c3a192cfb26aaf7e Mon Sep 17 00:00:00 2001 From: Pierre Tholoniat Date: Tue, 20 Aug 2024 09:37:17 -0400 Subject: [PATCH] Fix logs verbosity and detail experiment instructions --- README.md | 4 ++-- experiments/run_all.sh | 10 ++++----- experiments/runner.cli.py | 45 +++++++++++++++++++++++++-------------- 3 files changed, 35 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index bbb3015..e6393c3 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ The script [experiments/run_all.sh](/~https://github.com/columbia/cookiemonster/bl ### 3.1. Run all experiments -Reproduce all Cookie Monster experiments by running the cookiemonster docker with the following command: +Reproduce all Cookie Monster experiments by running the cookiemonster docker with the following command, from any directory (e.g., the `cookiemonster` repository root): ``` bash sudo docker run -v $PWD/logs:/cookiemonster/logs -v $PWD/figures:/cookiemonster/figures -v $PWD/cookiemonster/config:/cookiemonster/cookiemonster/config -v $PWD/temp:/tmp --network=host --name cookiemonster --shm-size=204.89gb --rm cookiemonster experiments/run_all.sh @@ -73,7 +73,7 @@ With the `-v` flag we mount directories `cookiemonster/logs`, `cookiemonster/fig ### 3.2. Analyze results -The [experiments/runner.cli.py](/~https://github.com/columbia/cookiemonster/blob/artifact-sosp/experiments/runner.cli.py) script will automatically analyze the execution logs and create plots corresponding to the figures presented in the paper. +The previous `experiments/run_all.sh` command will automatically analyze the execution logs and create plots corresponding to the figures presented in the paper, using the [experiments/runner.cli.py](/~https://github.com/columbia/cookiemonster/blob/artifact-sosp/experiments/runner.cli.py) script. Check the `figures` directory for all the outputs. Due to noise addition not being deterministic results might not be identical but the relative difference between baselines should be the same. diff --git a/experiments/run_all.sh b/experiments/run_all.sh index 35970d0..e128025 100644 --- a/experiments/run_all.sh +++ b/experiments/run_all.sh @@ -1,13 +1,11 @@ -export LOGURU_LEVEL=ERROR - echo "Running Figures 4.a and 4.b.." -python3 experiments/runner.cli.py --exp microbenchmark_varying_knob1 +python3 experiments/runner.cli.py --exp microbenchmark_varying_knob1 --loguru-level ERROR echo "Running Figures 4.c and 4.d.." -python3 experiments/runner.cli.py --exp microbenchmark_varying_knob2 +python3 experiments/runner.cli.py --exp microbenchmark_varying_knob2 --loguru-level ERROR echo "Running Figures 5.a, 5.b and 5.c.." -python3 experiments/runner.cli.py --exp patcg_varying_epoch_granularity +python3 experiments/runner.cli.py --exp patcg_varying_epoch_granularity --loguru-level ERROR echo "Running Figures 6.a, 6.b and 6.c.." -python3 experiments/runner.cli.py --exp criteo_run +python3 experiments/runner.cli.py --exp criteo_run --loguru-level ERROR diff --git a/experiments/runner.cli.py b/experiments/runner.cli.py index e5884a8..e403757 100644 --- a/experiments/runner.cli.py +++ b/experiments/runner.cli.py @@ -1,16 +1,20 @@ +import multiprocessing import os import time -from omegaconf import OmegaConf -import typer -import multiprocessing from copy import deepcopy -from data.criteo.creators.query_pool_creator import QueryPoolDatasetCreator as CriteoQueries + +import typer +from omegaconf import OmegaConf from ray_runner import grid_run -from cookiemonster.utils import BUDGET, BIAS, LOGS_PATH + +from cookiemonster.utils import BIAS, BUDGET, LOGS_PATH +from data.criteo.creators.query_pool_creator import ( + QueryPoolDatasetCreator as CriteoQueries, +) from notebooks.utils import save_data +from plotting.criteo_plot import criteo_plot_experiments_side_by_side from plotting.microbenchmark_plot import microbenchmark_plot_budget_consumption_bars from plotting.patcg_plot import patcg_plot_experiments_side_by_side -from plotting.criteo_plot import criteo_plot_experiments_side_by_side app = typer.Typer() @@ -72,8 +76,9 @@ def microbenchmark_varying_knob1(ray_session_dir): path = "ray/microbenchmark/varying_knob1" save_data(path, type="budget") microbenchmark_plot_budget_consumption_bars( - "knob1", f"{LOGS_PATH.joinpath(path)}/budgets.csv", "figures/fig4_a_b.png") - + "knob1", f"{LOGS_PATH.joinpath(path)}/budgets.csv", "figures/fig4_a_b.png" + ) + def microbenchmark_varying_knob2(ray_session_dir): dataset = "microbenchmark" @@ -117,7 +122,8 @@ def microbenchmark_varying_knob2(ray_session_dir): path = "ray/microbenchmark/varying_knob2" save_data(path, type="budget") microbenchmark_plot_budget_consumption_bars( - "knob2", f"{LOGS_PATH.joinpath(path)}/budgets.csv", "figures/fig4_c_d.png") + "knob2", f"{LOGS_PATH.joinpath(path)}/budgets.csv", "figures/fig4_c_d.png" + ) def microbenchmark_varying_epoch_granularity(ray_session_dir): @@ -209,18 +215,23 @@ def criteo_run(ray_session_dir): criteo_plot_experiments_side_by_side( f"{LOGS_PATH.joinpath(path1)}", f"{LOGS_PATH.joinpath(path2)}", - "figures/fig6_a_b_c_d.png") + "figures/fig6_a_b_c_d.png", + ) def criteo_impressions_run(ray_session_dir): dataset = "criteo" conversions_path = f"{dataset}/{dataset}_query_pool_conversions.csv" workload_generation = OmegaConf.load("data/criteo/config.json") - impression_augment_rates = CriteoQueries(workload_generation).get_impression_augment_rates() + impression_augment_rates = CriteoQueries( + workload_generation + ).get_impression_augment_rates() ray_init = True for rate in impression_augment_rates: - impressions_path = f"{dataset}/{dataset}_query_pool_impressions_augment_{rate}.csv" + impressions_path = ( + f"{dataset}/{dataset}_query_pool_impressions_augment_{rate}.csv" + ) logs_dir = f"{dataset}/augment_impressions_{rate}" config = { "baseline": ["ipa", "cookiemonster_base", "cookiemonster"], @@ -228,7 +239,9 @@ def criteo_impressions_run(ray_session_dir): "impressions_path": impressions_path, "conversions_path": conversions_path, "num_days_attribution_window": [30], - "workload_size": [1_000], # force a high number so that we run on all queries + "workload_size": [ + 1_000 + ], # force a high number so that we run on all queries "max_scheduling_batch_size_per_query": workload_generation.max_batch_size, "min_scheduling_batch_size_per_query": workload_generation.min_batch_size, "initial_budget": [1], @@ -273,7 +286,7 @@ def patcg_varying_epoch_granularity(ray_session_dir): grid_run(**config) config["num_days_per_epoch"] = [1, 60] grid_run(**config) - + config["num_days_per_epoch"] = [14, 7] grid_run(**config) @@ -282,9 +295,9 @@ def patcg_varying_epoch_granularity(ray_session_dir): save_data(path, type="bias") os.makedirs("figures", exist_ok=True) patcg_plot_experiments_side_by_side( - f"{LOGS_PATH.joinpath(path)}", "figures/fig5_a_b_c.png") + f"{LOGS_PATH.joinpath(path)}", "figures/fig5_a_b_c.png" + ) - def patcg_varying_initial_budget(ray_session_dir): dataset = "patcg"