#pragma once #include #include #include //////////////////////////////////////////////////////////////////////////////// // Debugging functions //////////////////////////////////////////////////////////////////////////////// // Nans & inf detection #define NANCHECK(frag) \ { \ for (int _i = 0; _i < frag.size(); ++_i) { \ assert(std::isfinite(float(frag[_i]))); \ assert(!std::isnan(float(frag[_i]))); \ } \ } // Print on the first thread of the first block #if 0 #define PRINT_WARP_ID 0 #define PRINT_LANE_ID 0 #define PRINT_T0_L0(msg, ...) \ if (blockIdx.x == 0 && blockIdx.y == 0 && blockIdx.z == 0 && \ threadIdx.x == PRINT_LANE_ID && threadIdx.y == PRINT_WARP_ID && \ threadIdx.z == 0) { \ printf(msg "\n", __VA_ARGS__); \ } struct __string_view { char const* data; std::size_t size; }; template constexpr __string_view __get_type_name() { char const* p = __PRETTY_FUNCTION__; while (*p++ != '=') ; for (; *p == ' '; ++p) ; char const* p2 = p; int count = 1; for (;; ++p2) { switch (*p2) { case '[': ++count; break; case ']': --count; if (!count) return {p, std::size_t(p2 - p)}; } } return {}; } #else #define PRINT_T0_L0 #endif // Print a given array #define PRINT_ACCUM8_T0_L0_START(name, accum, start) \ PRINT_T0_L0( \ "%s[%d:%d] - {%f, %f, %f, %f, %f, %f, %f, %f}", \ name, \ int(start), \ int(start + 8), \ float(accum[start + 0]), \ float(accum[start + 1]), \ float(accum[start + 2]), \ float(accum[start + 3]), \ float(accum[start + 4]), \ float(accum[start + 5]), \ float(accum[start + 6]), \ float(accum[start + 7])); #define PRINT_ACCUM8_T0_L0(name, accum) PRINT_ACCUM8_T0_L0_START(name, accum, 0) #define PRINT_FRAG_T0_L0(name, frag) \ { \ auto typeStr = __get_type_name(); \ PRINT_T0_L0("printing %s (%s)", name, typeStr.data); \ for (int _start = 0; _start < frag.size(); _start += 8) { \ PRINT_ACCUM8_T0_L0_START(" ", frag, _start); \ } \ /*__syncthreads(); \ NANCHECK(frag); */ \ } #define PRINT_ARRAY_T0_L0_INCR(name, array, length, incr) \ { \ PRINT_T0_L0("printing %s (len=%d)", name, int(length)); \ for (int _start = 0; _start < length; _start += incr) { \ PRINT_ACCUM8_T0_L0_START(" ", array, _start); \ } \ } #define PRINT_ARRAY_T0_L0(name, array, length) \ PRINT_ARRAY_T0_L0_INCR(name, array, length, 8) // Print a 4x4 matrix #define PRINT_TENSOR4x4_T0_L0_START(name, ref, start_x, start_y) \ PRINT_T0_L0( \ "%s[%d:%d, %d:%d]:\n %f, %f, %f, %f\n %f, %f, %f, %f\n %f, %f, %f, %f\n %f, %f, %f, %f", \ name, \ int(start_x), \ int(start_x + 4), \ int(start_y), \ int(start_y + 4), \ float(ref.at({start_x + 0, start_y + 0})), \ float(ref.at({start_x + 0, start_y + 1})), \ float(ref.at({start_x + 0, start_y + 2})), \ float(ref.at({start_x + 0, start_y + 3})), \ float(ref.at({start_x + 1, start_y + 0})), \ float(ref.at({start_x + 1, start_y + 1})), \ float(ref.at({start_x + 1, start_y + 2})), \ float(ref.at({start_x + 1, start_y + 3})), \ float(ref.at({start_x + 2, start_y + 0})), \ float(ref.at({start_x + 2, start_y + 1})), \ float(ref.at({start_x + 2, start_y + 2})), \ float(ref.at({start_x + 2, start_y + 3})), \ float(ref.at({start_x + 3, start_y + 0})), \ float(ref.at({start_x + 3, start_y + 1})), \ float(ref.at({start_x + 3, start_y + 2})), \ float(ref.at({start_x + 3, start_y + 3}))); #define PRINT_TENSOR4x4_T0_L0(name, ref) \ PRINT_TENSOR4x4_T0_L0_START(name, ref, 0, 0) #define PRINT_PROBLEM_SIZE(name, ps) \ PRINT_T0_L0( \ "%s.problem_size: {.m=%d, .n=%d, .k=%d}", \ name, \ int(ps.m()), \ int(ps.n()), \ int(ps.k()))