diff --git a/tutorials/neuronpedia/make_batch.py b/tutorials/neuronpedia/make_batch.py index d347ebca..79dbe5d9 100644 --- a/tutorials/neuronpedia/make_batch.py +++ b/tutorials/neuronpedia/make_batch.py @@ -4,13 +4,11 @@ SAE_PATH = sys.argv[1] MODEL_ID = sys.argv[2] SAE_ID = sys.argv[3] -LEFT_BUFFER = int(sys.argv[4]) -RIGHT_BUFFER = int(sys.argv[5]) -N_BATCHES_SAMPLE = int(sys.argv[6]) -N_PROMPTS_SELECT = int(sys.argv[7]) -FEATURES_AT_A_TIME = int(sys.argv[8]) -START_BATCH_INCLUSIVE = int(sys.argv[9]) -END_BATCH_INCLUSIVE = int(sys.argv[10]) +N_BATCHES_SAMPLE = int(sys.argv[4]) +N_PROMPTS_SELECT = int(sys.argv[5]) +FEATURES_AT_A_TIME = int(sys.argv[6]) +START_BATCH_INCLUSIVE = int(sys.argv[7]) +END_BATCH_INCLUSIVE = int(sys.argv[8]) NP_OUTPUT_FOLDER = "../../neuronpedia_outputs" @@ -23,8 +21,6 @@ n_batches_to_sample_from=N_BATCHES_SAMPLE, n_prompts_to_select=N_PROMPTS_SELECT, n_features_at_a_time=FEATURES_AT_A_TIME, - buffer_tokens_left=LEFT_BUFFER, - buffer_tokens_right=RIGHT_BUFFER, start_batch_inclusive=START_BATCH_INCLUSIVE, end_batch_inclusive=END_BATCH_INCLUSIVE, ) diff --git a/tutorials/neuronpedia/make_features.sh b/tutorials/neuronpedia/make_features.sh index a1e23f58..149147b7 100755 --- a/tutorials/neuronpedia/make_features.sh +++ b/tutorials/neuronpedia/make_features.sh @@ -8,55 +8,39 @@ echo "===== This takes input of one SAE directory at a time." echo "===== Features will be output into ./neuronpedia_outputs/{model}_{hook_point}_{d_sae}/batch-{batch_num}.json" echo "" -echo "(Step 1 of 10)" +echo "(Step 1 of 8)" echo "What is the absolute, full local file path to your SAE's directory (with cfg.json, sae_weights.safetensors, sparsity.safetensors)?" read saepath # TODO: support huggingface directories echo "" -echo "(Step 2 of 10)" +echo "(Step 2 of 8)" echo "What's the model ID? This must exactly match (including casing) the model ID you created on Neuronpedia." read modelid echo "" -echo "(Step 3 of 10)" +echo "(Step 3 of 8)" echo "What's the SAE ID?" echo "This was set when you did 'Add SAEs' on Neuronpedia. This must exactly match that ID (including casing). It's in the format [abbrev hook name]-[abbrev author name], like res-jb." read saeid echo "" -echo "(Step 4 of 10)" +echo "(Step 4 of 8)" echo "How many features are in this SAE?" read numfeatures echo "" -echo "(Step 5 of 10)" +echo "(Step 5 of 8)" read -p "How many features do you want generate per batch file? More requires more RAM. (default: 128): " perbatch [ -z "${perbatch}" ] && perbatch='128' echo "" -echo "(Step 6 of 10)" -echo "For each activating text sequence, how many tokens to the LEFT of the top activating token do you want?" -echo "If your text sequences are 128 tokens long, then you might put 64. (default: 64)" -read leftbuffer -[ -z "${leftbuffer}" ] && leftbuffer='64' - -echo "" -echo "(Step 7 of 10)" -echo "For each activating text sequence, how many tokens to the RIGHT of the top activating token do you want?" -echo "Left Buffer + Right Buffer must be < Total Text Length - 1" -echo "For example, text sequences of 128 can have at most buffers of 64 + 62 = 126" -echo "If your text sequences are 128 tokens long, then you might put 62. (default: 62)" -read rightbuffer -[ -z "${rightbuffer}" ] && rightbuffer='62' - -echo "" -echo "(Step 8 of 10)" +echo "(Step 6 of 8)" read -p "Enter number of batches to sample from (default: 4096): " batches [ -z "${batches}" ] && batches='4096' echo "" -echo "(Step 9 of 10)" +echo "(Step 7 of 8)" read -p "Enter number of prompts to select from (default: 24576): " prompts [ -z "${prompts}" ] && prompts='24576' @@ -65,7 +49,7 @@ numbatches=$(expr $numfeatures / $perbatch) echo "===== INFO: We'll generate $numbatches batches of $perbatch features per batch = $numfeatures total features" echo "" -echo "(Step 10 of 10)" +echo "(Step 8 of 8)" read -p "Do you want to resume from a specific batch number? Enter 1 to start from the beginning (default: 1): " startbatch [ -z "${startbatch}" ] && startbatch='1'