From f95c2fc108a1a63e2fdd9aed87190ac3fd492af2 Mon Sep 17 00:00:00 2001 From: Tri Dao Date: Sat, 7 Jan 2023 19:06:39 -0800 Subject: [PATCH] [Gen] Remove commented code --- flash_attn/utils/generation.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/flash_attn/utils/generation.py b/flash_attn/utils/generation.py index fa40a64..ea01ea1 100644 --- a/flash_attn/utils/generation.py +++ b/flash_attn/utils/generation.py @@ -97,7 +97,6 @@ def decode(input_ids, model, max_length, top_k=1, top_p=0.0, temperature=1.0, if cg: assert fused_ft_kernel run, cg_cache = capture_cg(model, inference_params, batch_size, seqlen_og, max_length) - # with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA], record_shapes=True) as prof: if timing: start = time.time() while True: @@ -117,8 +116,6 @@ def decode(input_ids, model, max_length, top_k=1, top_p=0.0, temperature=1.0, break if timing: print(f'Decoding time: {time.time() - start}') - # print(prof.key_averages().table(sort_by="cuda_time_total", row_limit=50)) - # prof.export_chrome_trace("gpt2s_generation.json") output_cls = GreedySearchDecoderOnlyOutput if top_k == 1 else SampleDecoderOnlyOutput return output_cls( sequences=torch.cat([input_ids, torch.stack(sequences, dim=1)], dim=1),