Skip to content

Commit

Permalink
reformat run.ipynb
Browse files Browse the repository at this point in the history
  • Loading branch information
jbloom-md committed Apr 18, 2024
1 parent 11a71e1 commit 822882c
Showing 1 changed file with 3 additions and 11 deletions.
14 changes: 3 additions & 11 deletions scripts/run.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -256,13 +256,11 @@
" d_in=1024, # the width of the mlp output.\n",
" dataset_path=\"apollo-research/roneneldan-TinyStories-tokenizer-gpt2\", # this is a tokenized language dataset on Huggingface for the Tiny Stories corpus.\n",
" is_dataset_tokenized=True,\n",
" \n",
" # SAE Parameters\n",
" mse_loss_normalization=None, # We won't normalize the mse loss,\n",
" expansion_factor=16, # the width of the SAE. Larger will result in better stats but slower training.\n",
" b_dec_init_method=\"geometric_median\", # The geometric median can be used to initialize the decoder weights.\n",
" apply_b_dec_to_input=False, # We won't apply the decoder to the input.\n",
" \n",
" # Training Parameters\n",
" lr=0.0008, # lower the better, we'll go fairly high to speed up the tutorial.\n",
" lr_scheduler_name=\"constant\", # constant learning rate with warmup. Could be better schedules out there.\n",
Expand All @@ -271,15 +269,13 @@
" lp_norm=1.0, # the L1 penalty (and not a Lp for p < 1)\n",
" train_batch_size=4096,\n",
" context_size=128, # will control the lenght of the prompts we feed to the model. Larger is better but slower.\n",
" \n",
" # Activation Store Parameters\n",
" n_batches_in_buffer=64, # controls how many activations we store / shuffle.\n",
" training_tokens=1_000_000 * 25, # 100 million tokens is quite a few, but we want to see good stats. Get a coffee, come back.\n",
" training_tokens=1_000_000\n",
" * 25, # 100 million tokens is quite a few, but we want to see good stats. Get a coffee, come back.\n",
" finetuning_method=\"decoder\",\n",
" finetuning_tokens=1_000_000 * 25,\n",
" store_batch_size=32,\n",
" \n",
" \n",
" # Resampling protocol\n",
" use_ghost_grads=False,\n",
" feature_sampling_window=1000, # this controls our reporting of feature sparsity stats\n",
Expand Down Expand Up @@ -620,8 +616,6 @@
}
],
"source": [
"\n",
"\n",
"cfg = LanguageModelSAERunnerConfig(\n",
" # Data Generating Function (Model + Training Distibuion)\n",
" model_name=\"gpt2-small\",\n",
Expand All @@ -630,7 +624,7 @@
" d_in=768,\n",
" dataset_path=\"apollo-research/Skylion007-openwebtext-tokenizer-gpt2\",\n",
" is_dataset_tokenized=True,\n",
" prepend_bos=True, # should experiment with turning this off.\n",
" prepend_bos=True, # should experiment with turning this off.\n",
" # SAE Parameters\n",
" expansion_factor=32, # determines the dimension of the SAE.\n",
" b_dec_init_method=\"geometric_median\", # geometric median is better but slower to get started\n",
Expand All @@ -650,13 +644,11 @@
" finetuning_method=\"decoder\",\n",
" finetuning_tokens=1_000_000 * 100,\n",
" store_batch_size=32,\n",
" \n",
" # Resampling protocol\n",
" use_ghost_grads=False,\n",
" feature_sampling_window=2500,\n",
" dead_feature_window=5000,\n",
" dead_feature_threshold=1e-8,\n",
" \n",
" # WANDB\n",
" log_to_wandb=True,\n",
" wandb_project=\"gpt2_small_experiments_april\",\n",
Expand Down

0 comments on commit 822882c

Please sign in to comment.