Fix logs verbosity and detail experiment instructions

columbia · Aug 20, 2024 · 01de65f · 01de65f
1 parent de5b9ce
commit 01de65f
Show file tree

Hide file tree

Showing 3 changed files with 35 additions and 24 deletions.
diff --git a/README.md b/README.md
@@ -55,7 +55,7 @@ The script [experiments/run_all.sh](/~https://github.com/columbia/cookiemonster/bl
 
 ### 3.1. Run all experiments
 
-Reproduce all Cookie Monster experiments by running the cookiemonster docker with the following command:
+Reproduce all Cookie Monster experiments by running the cookiemonster docker with the following command, from any directory (e.g., the `cookiemonster` repository root):
 
 ``` bash
 sudo docker run -v $PWD/logs:/cookiemonster/logs -v $PWD/figures:/cookiemonster/figures -v $PWD/cookiemonster/config:/cookiemonster/cookiemonster/config -v $PWD/temp:/tmp --network=host --name cookiemonster --shm-size=204.89gb --rm cookiemonster experiments/run_all.sh
@@ -73,7 +73,7 @@ With the `-v` flag we mount directories `cookiemonster/logs`, `cookiemonster/fig
 
 ### 3.2. Analyze results
 
-The [experiments/runner.cli.py](/~https://github.com/columbia/cookiemonster/blob/artifact-sosp/experiments/runner.cli.py) script will automatically analyze the execution logs and create plots corresponding to the figures presented in the paper.
+The previous `experiments/run_all.sh` command will automatically analyze the execution logs and create plots corresponding to the figures presented in the paper, using the [experiments/runner.cli.py](/~https://github.com/columbia/cookiemonster/blob/artifact-sosp/experiments/runner.cli.py) script.
 
 Check the `figures` directory for all the outputs.
 Due to noise addition not being deterministic results might not be identical but the relative difference between baselines should be the same. 
diff --git a/experiments/run_all.sh b/experiments/run_all.sh
@@ -1,13 +1,11 @@
-export LOGURU_LEVEL=ERROR
-
 echo "Running Figures 4.a and 4.b.."
-python3 experiments/runner.cli.py --exp microbenchmark_varying_knob1
+python3 experiments/runner.cli.py --exp microbenchmark_varying_knob1 --loguru-level ERROR
 
 echo "Running Figures 4.c and 4.d.."
-python3 experiments/runner.cli.py --exp microbenchmark_varying_knob2
+python3 experiments/runner.cli.py --exp microbenchmark_varying_knob2 --loguru-level ERROR
 
 echo "Running Figures 5.a, 5.b and 5.c.."
-python3 experiments/runner.cli.py --exp patcg_varying_epoch_granularity
+python3 experiments/runner.cli.py --exp patcg_varying_epoch_granularity --loguru-level ERROR
 
 echo "Running Figures 6.a, 6.b and 6.c.."
-python3 experiments/runner.cli.py --exp criteo_run
+python3 experiments/runner.cli.py --exp criteo_run --loguru-level ERROR
diff --git a/experiments/runner.cli.py b/experiments/runner.cli.py
@@ -1,16 +1,20 @@
+import multiprocessing
 import os
 import time
-from omegaconf import OmegaConf
-import typer
-import multiprocessing
 from copy import deepcopy
-from data.criteo.creators.query_pool_creator import QueryPoolDatasetCreator as CriteoQueries
+
+import typer
+from omegaconf import OmegaConf
 from ray_runner import grid_run
-from cookiemonster.utils import BUDGET, BIAS, LOGS_PATH
+
+from cookiemonster.utils import BIAS, BUDGET, LOGS_PATH
+from data.criteo.creators.query_pool_creator import (
+    QueryPoolDatasetCreator as CriteoQueries,
+)
 from notebooks.utils import save_data
+from plotting.criteo_plot import criteo_plot_experiments_side_by_side
 from plotting.microbenchmark_plot import microbenchmark_plot_budget_consumption_bars
 from plotting.patcg_plot import patcg_plot_experiments_side_by_side
-from plotting.criteo_plot import criteo_plot_experiments_side_by_side
 
 app = typer.Typer()
 
@@ -72,8 +76,9 @@ def microbenchmark_varying_knob1(ray_session_dir):
     path = "ray/microbenchmark/varying_knob1"
     save_data(path, type="budget")
     microbenchmark_plot_budget_consumption_bars(
-        "knob1", f"{LOGS_PATH.joinpath(path)}/budgets.csv", "figures/fig4_a_b.png")
-
+        "knob1", f"{LOGS_PATH.joinpath(path)}/budgets.csv", "figures/fig4_a_b.png"
+    )
+
 
 def microbenchmark_varying_knob2(ray_session_dir):
     dataset = "microbenchmark"
@@ -117,7 +122,8 @@ def microbenchmark_varying_knob2(ray_session_dir):
     path = "ray/microbenchmark/varying_knob2"
     save_data(path, type="budget")
     microbenchmark_plot_budget_consumption_bars(
-        "knob2", f"{LOGS_PATH.joinpath(path)}/budgets.csv", "figures/fig4_c_d.png")
+        "knob2", f"{LOGS_PATH.joinpath(path)}/budgets.csv", "figures/fig4_c_d.png"
+    )
 
 
 def microbenchmark_varying_epoch_granularity(ray_session_dir):
@@ -209,26 +215,33 @@ def criteo_run(ray_session_dir):
     criteo_plot_experiments_side_by_side(
         f"{LOGS_PATH.joinpath(path1)}",
         f"{LOGS_PATH.joinpath(path2)}",
-        "figures/fig6_a_b_c_d.png")
+        "figures/fig6_a_b_c_d.png",
+    )
 
 
 def criteo_impressions_run(ray_session_dir):
     dataset = "criteo"
     conversions_path = f"{dataset}/{dataset}_query_pool_conversions.csv"
     workload_generation = OmegaConf.load("data/criteo/config.json")
-    impression_augment_rates = CriteoQueries(workload_generation).get_impression_augment_rates()
+    impression_augment_rates = CriteoQueries(
+        workload_generation
+    ).get_impression_augment_rates()
     ray_init = True
 
     for rate in impression_augment_rates:
-        impressions_path = f"{dataset}/{dataset}_query_pool_impressions_augment_{rate}.csv"
+        impressions_path = (
+            f"{dataset}/{dataset}_query_pool_impressions_augment_{rate}.csv"
+        )
         logs_dir = f"{dataset}/augment_impressions_{rate}"
         config = {
             "baseline": ["ipa", "cookiemonster_base", "cookiemonster"],
             "dataset_name": dataset,
             "impressions_path": impressions_path,
             "conversions_path": conversions_path,
             "num_days_attribution_window": [30],
-            "workload_size": [1_000],  # force a high number so that we run on all queries
+            "workload_size": [
+                1_000
+            ],  # force a high number so that we run on all queries
             "max_scheduling_batch_size_per_query": workload_generation.max_batch_size,
             "min_scheduling_batch_size_per_query": workload_generation.min_batch_size,
             "initial_budget": [1],
@@ -273,7 +286,7 @@ def patcg_varying_epoch_granularity(ray_session_dir):
     grid_run(**config)
     config["num_days_per_epoch"] = [1, 60]
     grid_run(**config)
-    
+
     config["num_days_per_epoch"] = [14, 7]
     grid_run(**config)
 
@@ -282,9 +295,9 @@ def patcg_varying_epoch_granularity(ray_session_dir):
     save_data(path, type="bias")
     os.makedirs("figures", exist_ok=True)
     patcg_plot_experiments_side_by_side(
-        f"{LOGS_PATH.joinpath(path)}", "figures/fig5_a_b_c.png")
+        f"{LOGS_PATH.joinpath(path)}", "figures/fig5_a_b_c.png"
+    )
 
-
 
 def patcg_varying_initial_budget(ray_session_dir):
     dataset = "patcg"