[CrossEntropy] Fix triton cross_entropy_loss IMA for >=2B elements

This commit is contained in:
Tri Dao 2023-10-24 00:17:34 -07:00
parent 02ac572f3f
commit c79de85ffa

View File

@ -43,7 +43,7 @@ def cross_entropy_fwd_kernel(
):
row_idx = tl.program_id(0)
col_block_idx = tl.program_id(1)
logits_ptr = logits_ptr + row_idx * logits_row_stride
logits_ptr = logits_ptr + row_idx * logits_row_stride.to(tl.int64)
col_offsets = col_block_idx * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
label_idx = tl.load(labels_ptr + row_idx)
logits = tl.load(logits_ptr + col_offsets, mask=col_offsets < n_cols, other=-float("inf")).to(
@ -107,8 +107,8 @@ def cross_entropy_bwd_kernel(
):
row_idx = tl.program_id(0)
col_block_idx = tl.program_id(1)
logits_ptr = logits_ptr + row_idx * logits_row_stride
dlogits_ptr = dlogits_ptr + row_idx * dlogits_row_stride
logits_ptr = logits_ptr + row_idx * logits_row_stride.to(tl.int64)
dlogits_ptr = dlogits_ptr + row_idx * dlogits_row_stride.to(tl.int64)
col_offsets = col_block_idx * BLOCK_SIZE + tl.arange(0, BLOCK_SIZE)
label_idx = tl.load(labels_ptr + row_idx)
if label_idx != ignored_index: