Fix bias in InternLM (#1501)
This commit is contained in:
parent
69be658bba
commit
aa9af07cac
@ -62,6 +62,7 @@ class InternLMAttention(nn.Module):
|
|||||||
self,
|
self,
|
||||||
hidden_size: int,
|
hidden_size: int,
|
||||||
num_heads: int,
|
num_heads: int,
|
||||||
|
bias: bool,
|
||||||
rope_theta: float = 10000,
|
rope_theta: float = 10000,
|
||||||
max_position_embeddings: int = 8192,
|
max_position_embeddings: int = 8192,
|
||||||
):
|
):
|
||||||
@ -81,13 +82,13 @@ class InternLMAttention(nn.Module):
|
|||||||
self.qkv_proj = ColumnParallelLinear(
|
self.qkv_proj = ColumnParallelLinear(
|
||||||
hidden_size,
|
hidden_size,
|
||||||
3 * self.total_num_heads * self.head_dim,
|
3 * self.total_num_heads * self.head_dim,
|
||||||
bias=True,
|
bias=bias,
|
||||||
gather_output=False,
|
gather_output=False,
|
||||||
)
|
)
|
||||||
self.o_proj = RowParallelLinear(
|
self.o_proj = RowParallelLinear(
|
||||||
self.total_num_heads * self.head_dim,
|
self.total_num_heads * self.head_dim,
|
||||||
hidden_size,
|
hidden_size,
|
||||||
bias=True,
|
bias=bias,
|
||||||
input_is_parallel=True,
|
input_is_parallel=True,
|
||||||
)
|
)
|
||||||
self.attn = PagedAttentionWithRoPE(
|
self.attn = PagedAttentionWithRoPE(
|
||||||
@ -126,6 +127,7 @@ class InternLMDecoderLayer(nn.Module):
|
|||||||
self.self_attn = InternLMAttention(
|
self.self_attn = InternLMAttention(
|
||||||
hidden_size=self.hidden_size,
|
hidden_size=self.hidden_size,
|
||||||
num_heads=config.num_attention_heads,
|
num_heads=config.num_attention_heads,
|
||||||
|
bias=config.bias,
|
||||||
rope_theta=rope_theta,
|
rope_theta=rope_theta,
|
||||||
max_position_embeddings=max_position_embeddings,
|
max_position_embeddings=max_position_embeddings,
|
||||||
)
|
)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user