Operations#

Most basic operations and torch operations work on proxies and are added to the computation graph.

In this example we get the sum of the hidden states and add them to the hidden_states themselves (for whatever reason). By saving the various steps, we can see how the values change.

[1]:

from nnsight import LanguageModel
import torch

model = LanguageModel('openai-community/gpt2', device_map='auto')

with model.trace('The Eiffel Tower is in the city of') as tracer:

    hidden_states_pre = model.transformer.h[-1].output[0].save()

    hs_sum = torch.sum(hidden_states_pre).save()

    hs_edited = hidden_states_pre + hs_sum

    hs_edited = hs_edited.save()

/opt/anaconda3/envs/nnsight/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html
  from .autonotebook import tqdm as notebook_tqdm

[2]:

print(hidden_states_pre)
print(hs_sum)
print(hs_edited)

tensor([[[ 0.0505, -0.1728, -0.1690,  ..., -1.0096,  0.1280, -1.0687],
         [ 8.7495,  2.9057,  5.3024,  ..., -8.0418,  1.2964, -2.8677],
         [ 0.2960,  4.6686, -3.6642,  ...,  0.2391, -2.6064,  3.2263],
         ...,
         [ 2.1537,  6.8917,  3.8651,  ...,  0.0588, -1.9866,  5.9188],
         [-0.4460,  7.4285, -9.3065,  ...,  2.0528, -2.7946,  0.5556],
         [ 6.6286,  1.7258,  4.7969,  ...,  7.6714,  3.0682,  2.0481]]],
       device='mps:0', grad_fn=<AddBackward0>)
tensor(501.2962, device='mps:0', grad_fn=<SumBackward0>)
tensor([[[501.3467, 501.1234, 501.1272,  ..., 500.2866, 501.4242, 500.2275],
         [510.0457, 504.2019, 506.5986,  ..., 493.2544, 502.5926, 498.4285],
         [501.5922, 505.9648, 497.6320,  ..., 501.5353, 498.6898, 504.5225],
         ...,
         [503.4499, 508.1880, 505.1613,  ..., 501.3550, 499.3096, 507.2150],
         [500.8502, 508.7247, 491.9897,  ..., 503.3490, 498.5016, 501.8518],
         [507.9248, 503.0220, 506.0931,  ..., 508.9676, 504.3644, 503.3443]]],
       device='mps:0', grad_fn=<AddBackward0>)