[Bugfix] Fix logit soft cap in flash-attn backend (#7425)
This commit is contained in:
parent
d2bc4510a4
commit
cfba4def5d
@ -563,6 +563,7 @@ class FlashAttentionImpl(AttentionImpl):
|
|||||||
softmax_scale=self.scale,
|
softmax_scale=self.scale,
|
||||||
causal=True,
|
causal=True,
|
||||||
alibi_slopes=self.alibi_slopes,
|
alibi_slopes=self.alibi_slopes,
|
||||||
|
softcap=self.logits_soft_cap,
|
||||||
).squeeze(1)
|
).squeeze(1)
|
||||||
|
|
||||||
# Reshape the output tensor.
|
# Reshape the output tensor.
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user