Fix bias in InternLM (#1501)

This commit is contained in:
Woosuk Kwon 2023-10-30 00:24:18 +01:00 committed by GitHub
parent 69be658bba
commit aa9af07cac
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -62,6 +62,7 @@ class InternLMAttention(nn.Module):
self, self,
hidden_size: int, hidden_size: int,
num_heads: int, num_heads: int,
bias: bool,
rope_theta: float = 10000, rope_theta: float = 10000,
max_position_embeddings: int = 8192, max_position_embeddings: int = 8192,
): ):
@ -81,13 +82,13 @@ class InternLMAttention(nn.Module):
self.qkv_proj = ColumnParallelLinear( self.qkv_proj = ColumnParallelLinear(
hidden_size, hidden_size,
3 * self.total_num_heads * self.head_dim, 3 * self.total_num_heads * self.head_dim,
bias=True, bias=bias,
gather_output=False, gather_output=False,
) )
self.o_proj = RowParallelLinear( self.o_proj = RowParallelLinear(
self.total_num_heads * self.head_dim, self.total_num_heads * self.head_dim,
hidden_size, hidden_size,
bias=True, bias=bias,
input_is_parallel=True, input_is_parallel=True,
) )
self.attn = PagedAttentionWithRoPE( self.attn = PagedAttentionWithRoPE(
@ -126,6 +127,7 @@ class InternLMDecoderLayer(nn.Module):
self.self_attn = InternLMAttention( self.self_attn = InternLMAttention(
hidden_size=self.hidden_size, hidden_size=self.hidden_size,
num_heads=config.num_attention_heads, num_heads=config.num_attention_heads,
bias=config.bias,
rope_theta=rope_theta, rope_theta=rope_theta,
max_position_embeddings=max_position_embeddings, max_position_embeddings=max_position_embeddings,
) )