Skip to content

Commit

Permalink
Fix all Sphinx warnings (#422)
Browse files Browse the repository at this point in the history
Fixes all warnings and will now error if new warnings are created (so we catch them in the CD).
  • Loading branch information
alan-cooney authored Oct 19, 2023
1 parent 11cd1c3 commit c49739f
Show file tree
Hide file tree
Showing 16 changed files with 177 additions and 162 deletions.
4 changes: 1 addition & 3 deletions .github/workflows/gh-pages.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,10 @@ jobs:
- uses: actions/checkout@v2
- name: Install Poetry
uses: snok/install-poetry@v1
with:
version: 1.4.0
- name: Set up Python
uses: actions/setup-python@v2
with:
python-version: "3.9"
python-version: "3.11"
- name: Install dependencies
run: poetry install --with docs
- name: Build Docs
Expand Down
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
"gelu",
"githubpages",
"gptj",
"howpublished",
"huggingface",
"interpretability",
"isort",
Expand All @@ -41,6 +42,7 @@
"Nanda",
"neel",
"neox",
"Nitpicky",
"Olah",
"pagename",
"probs",
Expand All @@ -51,6 +53,7 @@
"templatedir",
"templatename",
"toctree",
"transformerlens",
"Unembed",
"unembedding"
],
Expand Down
28 changes: 18 additions & 10 deletions docs/make_docs.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,4 @@
"""
Generate a markdown table summarizing properties of pretrained models.
This script extracts various properties of pretrained models from the
`easy_transformer` library, such as the number of parameters, layers, and heads,
among others, and generates a markdown table. This table is saved to the
docs directory.
"""
"""Build the API Documentation."""
import subprocess
from functools import lru_cache
from pathlib import Path
Expand Down Expand Up @@ -76,7 +69,12 @@ def get_property(name, model_name):


def generate_model_table():
"""Generate a markdown table summarizing properties of pretrained models."""
"""Generate a markdown table summarizing properties of pretrained models.
This script extracts various properties of pretrained models from the `easy_transformer`
library, such as the number of parameters, layers, and heads, among others, and generates a
markdown table.
"""

# Create the table
column_names = [
Expand Down Expand Up @@ -115,7 +113,17 @@ def generate_model_table():
def build_docs():
"""Build the docs."""
generate_model_table()
subprocess.run(["sphinx-build", SOURCE_PATH, BUILD_PATH], check=True)

subprocess.run(
[
"sphinx-build",
SOURCE_PATH,
BUILD_PATH,
# "-n", # Nitpicky mode (warn about all missing references)
"-W", # Turn warnings into errors
],
check=True,
)


def docs_hot_reload():
Expand Down
1 change: 0 additions & 1 deletion docs/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
"sphinx.ext.napoleon",
"myst_parser",
"sphinx.ext.githubpages",
"sphinx.ext.apidoc",
]

source_suffix = {
Expand Down
17 changes: 9 additions & 8 deletions docs/source/content/citation.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@

## Citation
# Citation

Please cite this library as:
```
@misc{nandatransformerlens2022,
title = {TransformerLens},
author = {Nanda, Neel},
url = {/~https://github.com/neelnanda-io/TransformerLens},
year = {2022}

```BibTeX
@misc{nanda2022transformerlens,
title = {TransformerLens},
author = {Neel Nanda},
year = {2022},
howpublished = {\url{/~https://github.com/neelnanda-io/TransformerLens}},
}
```
(This is my best guess for how citing software works, feel free to send a correction!)

Also, if you're actually using this for your research, I'd love to chat! Reach out at neelnanda27@gmail.com
10 changes: 5 additions & 5 deletions docs/source/content/development.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
## Local Development
# Local Development

### DevContainer
## DevContainer

For a one-click setup of your development environment, this project includes a [DevContainer](https://containers.dev/). It can be used locally with [VS Code](https://marketplace.visualstudio.com/items?itemName=ms-vscode-remote.remote-containers) or with [GitHub Codespaces](/~https://github.com/features/codespaces).

### Manual Setup
## Manual Setup

This project uses [Poetry](https://python-poetry.org/docs/#installation) for package management. Install as follows (this will also setup your virtual environment):

Expand All @@ -17,12 +17,12 @@ Optionally, if you want Jupyter Lab you can run `poetry run pip install jupyterl

Then the library can be imported as `import transformer_lens`.

### Testing
## Testing

If adding a feature, please add unit tests for it to the tests folder, and check that it hasn't broken anything major using the existing tests (install pytest and run it in the root TransformerLens/ directory).

To run tests, you can use the following command:

```
```shell
poetry run pytest -v transformer_lens/tests
```
3 changes: 2 additions & 1 deletion docs/source/content/gallery.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
## Gallery
# Gallery

User contributed examples of the library being used in action:

* [Induction Heads Phase Change Replication](https://colab.research.google.com/github/ckkissane/induction-heads-transformer-lens/blob/main/Induction_Heads_Phase_Change.ipynb): A partial replication of [In-Context Learning and Induction Heads](https://transformer-circuits.pub/2022/in-context-learning-and-induction-heads/index.html) from Connor Kissane
* [Decision Transformer Interpretability](/~https://github.com/jbloomAus/DecisionTransformerInterpretability): A set of scripts for training decision transformers which uses transformer lens to view intermediate activations, perform attribution and ablations. A write up of the initial work can be found [here](https://www.lesswrong.com/posts/bBuBDJBYHt39Q5zZy/decision-transformer-interpretability).
9 changes: 4 additions & 5 deletions docs/source/content/getting_started.md
Original file line number Diff line number Diff line change
@@ -1,22 +1,21 @@
## Getting Started
# Getting Started

**Start with the [main demo](https://neelnanda.io/transformer-lens-demo) to learn how the library works, and the basic features**.

To see what using it for exploratory analysis in practice looks like, check out [my notebook analysing Indirect Objection Identification](https://neelnanda.io/exploratory-analysis-demo) or [my recording of myself doing research](https://www.youtube.com/watch?v=yo4QvDn-vsU)!
To see what using it for exploratory analysis in practice looks like, check out [my notebook analysing Indirect Objection Identification](https://neelnanda.io/exploratory-analysis-demo) or [my recording of myself doing research](https://www.youtube.com/watch?v=yo4QvDn-vsU)!

Mechanistic interpretability is a very young and small field, and there are a *lot* of open problems - if you would like to help, please try working on one! **Check out my [list of concrete open problems](https://docs.google.com/document/d/1WONBzNqfKIxERejrrPlQMyKqg7jSFW92x5UMXNrMdPo/edit) to figure out where to start.**. It begins with advice on skilling up, and key resources to check out.

If you're new to transformers, check out my [what is a transformer tutorial](https://neelnanda.io/transformer-tutorial) and [tutorial on coding GPT-2 from scratch](https://neelnanda.io/transformer-tutorial-2) (with [an accompanying template](https://neelnanda.io/transformer-template) to write one yourself!

### Advice for Reading the Code
## Advice for Reading the Code

One significant design decision made was to have a single transformer implementation that could support a range of subtly different GPT-style models. This has the upside of interpretability code just working for arbitrary models when you change the model name in `HookedTransformer.from_pretrained`! But it has the significant downside that the code implementing the model (in `HookedTransformer.py` and `components.py`) can be difficult to read. I recommend starting with my [Clean Transformer Demo](https://neelnanda.io/transformer-solution), which is a clean, minimal implementation of GPT-2 with the same internal architecture and activation names as HookedTransformer, but is significantly clearer and better documented.

### Installation
## Installation

`pip install git+/~https://github.com/neelnanda-io/TransformerLens`

Import the library with `import transformer_lens`

(Note: This library used to be known as EasyTransformer, and some breaking changes have been made since the rename. If you need to use the old version with some legacy code, run `pip install git+/~https://github.com/neelnanda-io/TransformerLens@v1`.)

6 changes: 3 additions & 3 deletions docs/source/content/tutorials.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
## Tutorials
# Tutorials

- **Start with the [main demo](https://neelnanda.io/transformer-lens-demo) to learn how the library works, and the basic features**.

### Where To Start
## Where To Start

- To see what using it for exploratory analysis in practice looks like, check out [my notebook analysing Indirect Objection Identification](https://neelnanda.io/exploratory-analysis-demo) or [my recording of myself doing research](https://www.youtube.com/watch?v=yo4QvDn-vsU)!

- [What is a Transformer tutorial](https://neelnanda.io/transformer-tutorial)

### Demos
## Demos

- [**Activation Patching in TransformerLens**](https://colab.research.google.com/github/neelnanda-io/TransformerLens/blob/main/demos/Activation_Patching_in_TL_Demo.ipynb) - Accompanies the [Exploratory Analysis Demo](https://colab.research.google.com/github/neelnanda-io/TransformerLens/blob/main/demos/Exploratory Analysis Demo.ipynb). This demo explains how to use [Activation Patching](https://dynalist.io/d/n2ZWtnoYHrU1s4vnFSAQ519J#z=qeWBvs-R-taFfcCq-S_hgMqx) in TransformerLens, a mechanistic interpretability technique that uses causal intervention to identify which activations in a model matter for producing an output.

Expand Down
39 changes: 18 additions & 21 deletions transformer_lens/ActivationCache.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,24 +268,21 @@ def logit_attrs(
difference attributions for the residual stack if incorrect_tokens is provided.
Args:
residual_stack (Float[torch.Tensor, "num_components *batch_and_pos_dims d_model"]):
stack of components of residual stream to get logit attributions for.
tokens (Union[str, int, Int[torch.Tensor, ""], Int[torch.Tensor, "batch"],
Int[torch.Tensor, "batch position"]]): tokens to compute logit attributions on.
incorrect_tokens (Union[str, int, Int[torch.Tensor, ""], Int[torch.Tensor, "batch"],
Int[torch.Tensor, "batch position"]], optional): if provided, compute attributions
residual_stack: Stack of components of residual stream to get logit attributions for.
tokens: tokens to compute logit attributions on.
incorrect_tokens: if provided, compute attributions
on logit difference between tokens and incorrect_tokens. Must have the same shape as
tokens.
pos_slice (Slice, optional): The slice to apply layer norm scaling on. Defaults to None,
pos_slice: The slice to apply layer norm scaling on. Defaults to None,
do nothing.
batch_slice (Slice, optional): The slice to take on the batch dimension during layer
batch_slice: The slice to take on the batch dimension during layer
norm scaling. Defaults to None, do nothing.
has_batch_dim (bool, optional): Whether residual_stack has a batch dimension. Defaults
has_batch_dim: Whether residual_stack has a batch dimension. Defaults
to True.
Returns:
Components: A [num_components, *batch_and_pos_dims] tensor of the logit attributions or
logit difference attributions if incorrect_tokens was provided.
Components: A tensor of the logit attributions or logit difference attributions if
incorrect_tokens was provided.
"""
if not isinstance(pos_slice, Slice):
pos_slice = Slice(pos_slice)
Expand Down Expand Up @@ -352,25 +349,25 @@ def decompose_resid(
useful for attributing model behaviour to different components of the residual stream
Args:
layer (int): The layer to take components up to - by default includes
layer: The layer to take components up to - by default includes
resid_pre for that layer and excludes resid_mid and resid_post for that layer.
layer==n_layers means to return all layer outputs incl in the final layer, layer==0
means just embed and pos_embed. The indices are taken such that this gives the
accumulated streams up to the input to layer l
incl_mid (bool, optional): Whether to return resid_mid for all previous
incl_mid: Whether to return resid_mid for all previous
layers. Defaults to False.
mlp_input (bool, optional): Whether to include attn_out for the current
mlp_input: Whether to include attn_out for the current
layer - essentially decomposing the residual stream that's input to the MLP input
rather than the Attn input. Defaults to False.
mode (str): Values are "all", "mlp" or "attn". "all" returns all
mode: Values are "all", "mlp" or "attn". "all" returns all
components, "mlp" returns only the MLP components, and "attn" returns only the
attention components. Defaults to "all".
apply_ln (bool, optional): Whether to apply LayerNorm to the stack. Defaults to False.
pos_slice (Slice): A slice object to apply to the pos dimension.
apply_ln: Whether to apply LayerNorm to the stack. Defaults to False.
pos_slice: A slice object to apply to the pos dimension.
Defaults to None, do nothing.
incl_embeds (bool): Whether to include embed & pos_embed return_labels (bool, optional):
Whether to return a list of labels for
the residual stream components. Useful for labelling graphs. Defaults to True.
incl_embeds: Whether to include embed & pos_embed
return_labels: Whether to return a list of labels for the residual stream components.
Useful for labelling graphs. Defaults to True.
Returns:
Components: A [num_components, batch_size, pos, d_model] tensor of the accumulated
Expand Down
3 changes: 1 addition & 2 deletions transformer_lens/HookedTransformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1338,7 +1338,6 @@ def from_pretrained_no_processing(
def init_weights(self):
"""Initialize weights.
Initialize weights matrices with a normal of std=initializer_range (default=0.02). This
roughly follows the GPT-2 paper's scheme (but with truncation, and not halving the std for
W_pos).
Expand All @@ -1348,7 +1347,7 @@ def init_weights(self):
Weight matrices are set to empty by default (to save space + compute, since they're the bulk
of the parameters), so it is important to call this if you are not loading in pretrained
weights! Note that this function assumes that weight names being with W_
weights! Note that this function assumes that weight names being with `W_`.
Set seed here to ensure determinism.
Expand Down
50 changes: 33 additions & 17 deletions transformer_lens/SVDInterpreter.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,30 +32,46 @@ def get_singular_vectors(
) -> torch.Tensor:
"""Gets the singular vectors for a given vector type, layer, and optionally head.
Options:
- OV: Get the singular vectors of the OV matrix for a particular layer and head.
- w_in: Get the singular vectors of the w_in matrix for a particular layer.
- w_out: Get the singular vectors of the w_out matrix for a particular layer.
Returns a (d_vocab, 1, num_vectors) tensor.
This tensor can then be plotted using Neel's PySvelte, as demonstrated in the demo for this feature. The demo also points out some "gotchas" in this feature - numerical instability means inconsistency across devices, and the default HookedTransformer parameters don't replicate the original SVD post very well. So I'd recommend checking out the demo if you want to use this!
This tensor can then be plotted using Neel's PySvelte, as demonstrated in the demo for this
feature. The demo also points out some "gotchas" in this feature - numerical instability
means inconsistency across devices, and the default HookedTransformer parameters don't
replicate the original SVD post very well. So I'd recommend checking out the demo if you
want to use this!
Example:
.. code-block:: python
build-docsfrom transformer_lens import HookedTransformer, SVDInterpreter
build-docsmodel = HookedTransformer.from_pretrained('gpt2-medium')
build-docssvd_interpreter = SVDInterpreter(model)
build-docsov = svd_interpreter.get_singular_vectors('OV', layer_index=22, head_index=10)
from transformer_lens import HookedTransformer, SVDInterpreter
model = HookedTransformer.from_pretrained('gpt2-medium')
svd_interpreter = SVDInterpreter(model)
ov = svd_interpreter.get_singular_vectors('OV', layer_index=22, head_index=10)
all_tokens = [model.to_str_tokens(np.array([i])) for i in range(model.cfg.d_vocab)]
all_tokens = [all_tokens[i][0] for i in range(model.cfg.d_vocab)]
build-docsall_tokens = [model.to_str_tokens(np.array([i])) for i in range(model.cfg.d_vocab)]
build-docsall_tokens = [all_tokens[i][0] for i in range(model.cfg.d_vocab)]
def plot_matrix(matrix, tokens, k=10, filter="topk"):
pysvelte.TopKTable(
tokens=all_tokens,
activations=matrix,
obj_type="SVD direction",
k=k,
filter=filter
).show()
build-docsdef plot_matrix(matrix, tokens, k=10, filter="topk"):
build-docs pysvelte.TopKTable(tokens=all_tokens, activations=matrix, obj_type="SVD direction", k=k, filter=filter).show()
plot_matrix(ov, all_tokens)
build-docsplot_matrix(ov, all_tokens)"""
Args:
vector_type: Type of the vector:
- "OV": Singular vectors of the OV matrix for a particular layer and head.
- "w_in": Singular vectors of the w_in matrix for a particular layer.
- "w_out": Singular vectors of the w_out matrix for a particular layer.
layer_index: The index of the layer.
num_vectors: Number of vectors.
head_index: Index of the head.
"""

if head_index is None:
assert vector_type in [
Expand Down
26 changes: 13 additions & 13 deletions transformer_lens/evals.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ class IOIDataset(Dataset):
Paper: https://arxiv.org/pdf/2211.00593.pdf
Example:
--------
.. code-block:: python
>>> from transformer_lens.evals import ioi_eval, IOIDataset
Expand Down Expand Up @@ -281,22 +281,22 @@ def get_default_nouns():
}


# %%
@torch.inference_mode()
def ioi_eval(
model, dataset=None, batch_size=8, num_samples=1000, tokenizer=None, symmetric=False
):
"""
Evaluates the model on the Indirect Object Identification task.
dataset must be a torch Dataset that returns a dict:
{
'prompt': torch.LongTensor,
'IO': torch.LongTensor,
'S': torch.LongTensor
}
Returns average logit difference and accuracy.
"""Evaluate the Model on the Indirect Object Identification Task.
Args:
model: HookedTransformer model.
dataset: PyTorch Dataset that returns a dict with keys "prompt", "IO", and "S".
batch_size: Batch size to use.
num_samples: Number of samples to use.
tokenizer: Tokenizer to use.
symmetric: Whether to use the symmetric version of the task.
Returns:
Average logit difference and accuracy.
"""
if tokenizer is None:
tokenizer = model.tokenizer
Expand Down
Loading

0 comments on commit c49739f

Please sign in to comment.