cutlass/include/cutlass
cyyever 7a7796afae
Fix is_zero (#1147)
* Fix is_zero

* Use constexpr

* Add CUTLASS_PRAGMA_UNROLL to loops

* Avoid if branches in is_zero
2023-10-23 12:09:37 -04:00
..
arch Support for Mixed Input TensorOp (#1084) 2023-09-27 11:18:30 -04:00
conv CUTLASS 3.2.1 (#1113) 2023-09-26 17:24:26 -04:00
detail CUTLASS 3.2.1 (#1113) 2023-09-26 17:24:26 -04:00
epilogue set kIsHeavy member variables (#1012) 2023-10-04 12:38:36 -04:00
gemm Support for Mixed Input TensorOp (#1084) 2023-09-27 11:18:30 -04:00
layout CUTLASS 3.2.1 (#1113) 2023-09-26 17:24:26 -04:00
pipeline CUTLASS 3.2.1 (#1113) 2023-09-26 17:24:26 -04:00
platform Add #include <limits> to platform.h (#1121) 2023-10-02 21:41:25 -04:00
reduction Fix typos 2 (#842) 2023-03-09 23:22:56 -05:00
thread CUTLASS 3.2 (#1024) 2023-08-07 20:50:32 -04:00
transform CUTLASS 3.2.1 (#1113) 2023-09-26 17:24:26 -04:00
aligned_buffer.h New updates for 2.11 (#775) 2023-01-20 16:32:57 -05:00
array_planar_complex.h New updates for 2.11 (#775) 2023-01-20 16:32:57 -05:00
array_subbyte.h CUTLASS 3.2.1 (#1113) 2023-09-26 17:24:26 -04:00
array.h Change the position of minus sign in line1549 array.h (#1091) 2023-09-26 17:26:39 -04:00
barrier.h CUTLASS 3.2.1 (#1113) 2023-09-26 17:24:26 -04:00
bfloat16.h CUTLASS 3.2.1 (#1113) 2023-09-26 17:24:26 -04:00
blas3_types.h CUTLASS 3.2 (#1024) 2023-08-07 20:50:32 -04:00
blas3.h CUTLASS 3.2 (#1024) 2023-08-07 20:50:32 -04:00
block_striped.h CUTLASS 3.2 (#1024) 2023-08-07 20:50:32 -04:00
cluster_launch.hpp CUTLASS 3.2 (#1024) 2023-08-07 20:50:32 -04:00
complex.h Add missing int64 and uint64 overloads for conj (#1127) 2023-10-05 20:01:44 -04:00
constants.h New updates for 2.11 (#775) 2023-01-20 16:32:57 -05:00
coord.h CUTLASS 3.2.1 (#1113) 2023-09-26 17:24:26 -04:00
core_io.h CUTLASS 3.2.1 (#1113) 2023-09-26 17:24:26 -04:00
cutlass.h CUTLASS 3.2.1 (#1113) 2023-09-26 17:24:26 -04:00
device_kernel.h CUTLASS 3.2 (#1024) 2023-08-07 20:50:32 -04:00
fast_math.h CUTLASS 3.2.1 (#1113) 2023-09-26 17:24:26 -04:00
float8.h CUTLASS 3.2.1 (#1113) 2023-09-26 17:24:26 -04:00
floating_point_nvrtc.h New updates for 2.11 (#775) 2023-01-20 16:32:57 -05:00
functional.h CUTLASS 3.2.1 (#1113) 2023-09-26 17:24:26 -04:00
gemm_coord.h CUTLASS 3.2 (#1024) 2023-08-07 20:50:32 -04:00
gemm_coord.hpp CUTLASS 3.2.1 (#1113) 2023-09-26 17:24:26 -04:00
half.h CUTLASS 3.2.1 (#1113) 2023-09-26 17:24:26 -04:00
integer_subbyte.h [fix] fix comparison operator for integer_subbyte (#1090) 2023-09-26 17:26:12 -04:00
kernel_hardware_info.h CUTLASS 3.2.1 (#1113) 2023-09-26 17:24:26 -04:00
kernel_hardware_info.hpp CUTLASS 3.2.1 (#1113) 2023-09-26 17:24:26 -04:00
kernel_launch.h New updates for 2.11 (#775) 2023-01-20 16:32:57 -05:00
matrix_coord.h New updates for 2.11 (#775) 2023-01-20 16:32:57 -05:00
matrix_shape.h New updates for 2.11 (#775) 2023-01-20 16:32:57 -05:00
matrix.h New updates for 2.11 (#775) 2023-01-20 16:32:57 -05:00
numeric_conversion.h Support for Mixed Input TensorOp (#1084) 2023-09-27 11:18:30 -04:00
numeric_size.h CUTLASS 3.2.1 (#1113) 2023-09-26 17:24:26 -04:00
numeric_types.h CUTLASS 3.2.1 (#1113) 2023-09-26 17:24:26 -04:00
pitch_linear_coord.h New updates for 2.11 (#775) 2023-01-20 16:32:57 -05:00
predicate_vector.h Fix is_zero (#1147) 2023-10-23 12:09:37 -04:00
quaternion.h CUTLASS 3.2 (#1024) 2023-08-07 20:50:32 -04:00
real.h New updates for 2.11 (#775) 2023-01-20 16:32:57 -05:00
relatively_equal.h CUTLASS 3.1 (#915) 2023-04-14 23:19:34 -04:00
semaphore.h Updates for 3.1 (#932) 2023-04-29 09:34:27 -04:00
subbyte_reference.h CUTLASS 3.2.1 (#1113) 2023-09-26 17:24:26 -04:00
tensor_coord.h New updates for 2.11 (#775) 2023-01-20 16:32:57 -05:00
tensor_ref_planar_complex.h New updates for 2.11 (#775) 2023-01-20 16:32:57 -05:00
tensor_ref.h New updates for 2.11 (#775) 2023-01-20 16:32:57 -05:00
tensor_view_planar_complex.h New updates for 2.11 (#775) 2023-01-20 16:32:57 -05:00
tensor_view.h New updates for 2.11 (#775) 2023-01-20 16:32:57 -05:00
tfloat32.h New updates for 2.11 (#775) 2023-01-20 16:32:57 -05:00
trace.h New updates for 2.11 (#775) 2023-01-20 16:32:57 -05:00
uint128.h CUTLASS 3.2.1 (#1113) 2023-09-26 17:24:26 -04:00
wmma_array.h Updates for 3.0 (#857) 2023-03-09 15:27:40 -05:00
workspace.h CUTLASS 3.2.1 (#1113) 2023-09-26 17:24:26 -04:00