diff --git a/vllm/model_executor/models/gpt_j.py b/vllm/model_executor/models/gpt_j.py index 2f858d2d..cf89e28b 100644 --- a/vllm/model_executor/models/gpt_j.py +++ b/vllm/model_executor/models/gpt_j.py @@ -65,7 +65,7 @@ class GPTJAttention(nn.Module): self.num_heads = self.total_num_heads // tp_world_size scaling = self.head_size**-0.5 - assert config.rotary + assert getattr(config, "rotary", True) assert config.rotary_dim % 2 == 0 self.attn = PagedAttentionWithRoPE(self.num_heads, self.head_size, scaling, config.rotary_dim)