[Bugfix] Fix embedding to support 2D inputs (#5829)

2024-06-26 00:15:22 -07:00 · 2024-06-26 00:15:22 -07:00 · 6806998bf9
commit 6806998bf9
parent 515080ad2f
1 changed files with 2 additions and 2 deletions
--- a/vllm/model_executor/layers/vocab_parallel_embedding.py
+++ b/vllm/model_executor/layers/vocab_parallel_embedding.py
@ -306,11 +306,11 @@ class VocabParallelEmbedding(torch.nn.Module):
                self.shard_indices.added_vocab_end_index)
        else:
            masked_input = input_
-            # Get the embeddings.
+        # Get the embeddings.
        output_parallel = F.embedding(masked_input.long(), self.weight)
        # Mask the output embedding.
        if self.tp_size > 1:
-            output_parallel.masked_fill_(input_mask.unsqueeze(1), 0)
+            output_parallel.masked_fill_(input_mask.unsqueeze(-1), 0)
        # Reduce across all the model parallel GPUs.
        output = tensor_model_parallel_all_reduce(output_parallel)
        return output