nnsight.modeling#

class nnsight.modeling.language.LanguageModel(*args, config: ~transformers.configuration_utils.PretrainedConfig | None = None, tokenizer: ~transformers.tokenization_utils.PreTrainedTokenizer | None = None, automodel: ~typing.Type[~transformers.models.auto.modeling_auto.AutoModel] = <class 'transformers.models.auto.modeling_auto.AutoModelForCausalLM'>, **kwargs)[source]#

LanguageModels are NNsight wrappers around transformers language models.

Inputs can be in the form of:

Prompt: (str) Prompts: (List[str]) Batched prompts: (List[List[str]]) Tokenized prompt: (Union[List[int], torch.Tensor]) Tokenized prompts: (Union[List[List[int]], torch.Tensor]) Direct input: (Dict[str,Any])

If using a custom model, you also need to provide the tokenizer like LanguageModel(custom_model, tokenizer=tokenizer)

Calls to generate pass arguments downstream to GenerationMixin.generate()

config#

Huggingface config file loaded from repository or checkpoint.

Type:

PretrainedConfig

tokenizer#

Tokenizer for LMs.

Type:

PreTrainedTokenizer

automodel#

AutoModel type from transformer auto models.

Type:

Type

model#

Meta version of underlying auto model.

Type:

PreTrainedModel

class Generator[source]#
class Streamer(*args, **kwargs)[source]#
class nnsight.modeling.diffusion.Diffuser(*args, **kwargs)[source]#
class nnsight.modeling.diffusion.DiffusionModel(*args, **kwargs)[source]#
class nnsight.modeling.vllm.vllm.VLLM(*args, **kwargs)[source]#

NNsight wrapper to conduct interventions on a vLLM inference engine. .. attribute:: - vllm_entrypoint

vLLM language model.

type:

vllm.LLM

- tokenizer

tokenizer.

Type:

vllm.transformers_utils.tokenizer.AnyTokenizer

- logits

logits.

Type:

nnsight.WrapperModule

- samples

sampled tokens.

Type:

nnsight.WrapperModule

interleave(interleaver: Interleaver, *args, fn: Callable | str | None = None, **kwargs) Any[source]#
if not self.dispatched:

self.dispatch()

for param in params:

param.intervention_graph = intervention_graph

fn(prompts, params, **kwargs)

intervention_graph.alive = False

class nnsight.modeling.vllm.sampling.NNsightSamplingMetadata(*args, intervention_graph: InterventionGraph | None = None, nns_batch_groups: List[Tuple[int, int]] | None = None, batch_groups: Dict[int, Tuple[int, int]] | None = None, **kwargs)[source]#
class nnsight.modeling.vllm.sampling.NNsightSamplingParams(n: int = 1, best_of: int | None = None, _real_n: int | None = None, presence_penalty: float = 0.0, frequency_penalty: float = 0.0, repetition_penalty: float = 1.0, temperature: float = 1.0, top_p: float = 1.0, top_k: int = -1, min_p: float = 0.0, seed: int | None = None, stop: str | ~typing.List[str] | None = None, stop_token_ids: ~typing.List[int] | None = None, bad_words: ~typing.List[str] | None = None, ignore_eos: bool = False, max_tokens: int | None = 16, min_tokens: int = 0, logprobs: int | None = None, prompt_logprobs: int | None = None, detokenize: bool = True, skip_special_tokens: bool = True, spaces_between_special_tokens: bool = True, logits_processors: ~typing.Any | None = None, include_stop_str_in_output: bool = False, truncate_prompt_tokens: ~typing.Annotated[int, msgspec.Meta(ge=1)] | None = None, output_kind: ~vllm.sampling_params.RequestOutputKind = RequestOutputKind.CUMULATIVE, output_text_buffer_length: int = 0, _all_stop_token_ids: ~typing.Set[int] = <factory>, guided_decoding: ~vllm.sampling_params.GuidedDecodingParams | None = None, logit_bias: ~typing.Dict[int, float] | None = None, allowed_token_ids: ~typing.List[int] | None = None, intervention_graph: ~nnsight.intervention.graph.graph.InterventionGraph | None = None, nns_batch_groups: ~typing.List[~typing.Tuple[int, int]] | None = None, invoker_group: int | None = None, is_default_param: bool = True)[source]#
clone() SamplingParams[source]#

Deep copy excluding LogitsProcessor objects.

LogitsProcessor objects are excluded because they may contain an arbitrary, nontrivial amount of data. See vllm-project/vllm#3087