Skip to content

Commit

Permalink
Don't use buffer, fix anomalies
Browse files Browse the repository at this point in the history
  • Loading branch information
hijohnnylin committed Apr 15, 2024
1 parent 3837884 commit 2c9ca64
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 33 deletions.
14 changes: 5 additions & 9 deletions tutorials/neuronpedia/make_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,13 +4,11 @@
SAE_PATH = sys.argv[1]
MODEL_ID = sys.argv[2]
SAE_ID = sys.argv[3]
LEFT_BUFFER = int(sys.argv[4])
RIGHT_BUFFER = int(sys.argv[5])
N_BATCHES_SAMPLE = int(sys.argv[6])
N_PROMPTS_SELECT = int(sys.argv[7])
FEATURES_AT_A_TIME = int(sys.argv[8])
START_BATCH_INCLUSIVE = int(sys.argv[9])
END_BATCH_INCLUSIVE = int(sys.argv[10])
N_BATCHES_SAMPLE = int(sys.argv[4])
N_PROMPTS_SELECT = int(sys.argv[5])
FEATURES_AT_A_TIME = int(sys.argv[6])
START_BATCH_INCLUSIVE = int(sys.argv[7])
END_BATCH_INCLUSIVE = int(sys.argv[8])

NP_OUTPUT_FOLDER = "../../neuronpedia_outputs"

Expand All @@ -23,8 +21,6 @@
n_batches_to_sample_from=N_BATCHES_SAMPLE,
n_prompts_to_select=N_PROMPTS_SELECT,
n_features_at_a_time=FEATURES_AT_A_TIME,
buffer_tokens_left=LEFT_BUFFER,
buffer_tokens_right=RIGHT_BUFFER,
start_batch_inclusive=START_BATCH_INCLUSIVE,
end_batch_inclusive=END_BATCH_INCLUSIVE,
)
Expand Down
32 changes: 8 additions & 24 deletions tutorials/neuronpedia/make_features.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,55 +8,39 @@ echo "===== This takes input of one SAE directory at a time."
echo "===== Features will be output into ./neuronpedia_outputs/{model}_{hook_point}_{d_sae}/batch-{batch_num}.json"

echo ""
echo "(Step 1 of 10)"
echo "(Step 1 of 8)"
echo "What is the absolute, full local file path to your SAE's directory (with cfg.json, sae_weights.safetensors, sparsity.safetensors)?"
read saepath
# TODO: support huggingface directories

echo ""
echo "(Step 2 of 10)"
echo "(Step 2 of 8)"
echo "What's the model ID? This must exactly match (including casing) the model ID you created on Neuronpedia."
read modelid

echo ""
echo "(Step 3 of 10)"
echo "(Step 3 of 8)"
echo "What's the SAE ID?"
echo "This was set when you did 'Add SAEs' on Neuronpedia. This must exactly match that ID (including casing). It's in the format [abbrev hook name]-[abbrev author name], like res-jb."
read saeid

echo ""
echo "(Step 4 of 10)"
echo "(Step 4 of 8)"
echo "How many features are in this SAE?"
read numfeatures

echo ""
echo "(Step 5 of 10)"
echo "(Step 5 of 8)"
read -p "How many features do you want generate per batch file? More requires more RAM. (default: 128): " perbatch
[ -z "${perbatch}" ] && perbatch='128'

echo ""
echo "(Step 6 of 10)"
echo "For each activating text sequence, how many tokens to the LEFT of the top activating token do you want?"
echo "If your text sequences are 128 tokens long, then you might put 64. (default: 64)"
read leftbuffer
[ -z "${leftbuffer}" ] && leftbuffer='64'

echo ""
echo "(Step 7 of 10)"
echo "For each activating text sequence, how many tokens to the RIGHT of the top activating token do you want?"
echo "Left Buffer + Right Buffer must be < Total Text Length - 1"
echo "For example, text sequences of 128 can have at most buffers of 64 + 62 = 126"
echo "If your text sequences are 128 tokens long, then you might put 62. (default: 62)"
read rightbuffer
[ -z "${rightbuffer}" ] && rightbuffer='62'

echo ""
echo "(Step 8 of 10)"
echo "(Step 6 of 8)"
read -p "Enter number of batches to sample from (default: 4096): " batches
[ -z "${batches}" ] && batches='4096'

echo ""
echo "(Step 9 of 10)"
echo "(Step 7 of 8)"
read -p "Enter number of prompts to select from (default: 24576): " prompts
[ -z "${prompts}" ] && prompts='24576'

Expand All @@ -65,7 +49,7 @@ numbatches=$(expr $numfeatures / $perbatch)
echo "===== INFO: We'll generate $numbatches batches of $perbatch features per batch = $numfeatures total features"

echo ""
echo "(Step 10 of 10)"
echo "(Step 8 of 8)"
read -p "Do you want to resume from a specific batch number? Enter 1 to start from the beginning (default: 1): " startbatch
[ -z "${startbatch}" ] && startbatch='1'

Expand Down

0 comments on commit 2c9ca64

Please sign in to comment.