[Bugfix] Fix logit soft cap in flash-attn backend (#7425)
This commit is contained in:
parent
d2bc4510a4
commit
cfba4def5d
@ -563,6 +563,7 @@ class FlashAttentionImpl(AttentionImpl):
|
||||
softmax_scale=self.scale,
|
||||
causal=True,
|
||||
alibi_slopes=self.alibi_slopes,
|
||||
softcap=self.logits_soft_cap,
|
||||
).squeeze(1)
|
||||
|
||||
# Reshape the output tensor.
|
||||
|
||||
Loading…
Reference in New Issue
Block a user