From ce2b3f695dc05e68348f2e1e14d9479c3578772c Mon Sep 17 00:00:00 2001 From: Artem Belevich Date: Fri, 8 Dec 2017 10:39:55 -0800 Subject: [PATCH] Fixed debug macros for clang. Unlike nvcc, clang always sees both host and device-side code during compilation. CUDA_LOG macro is used in both host and device code, so when it expanded to contain device-only code, that resulted in errors when it was used from the host-side functions. In order to make CUDA_LOG work with clang it was split into two parts -- a pair of target-attribute-based overloaded functions that perform host or device specific parts of logging, and a printf which works on both sides. --- cutlass/util/debug.h | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/cutlass/util/debug.h b/cutlass/util/debug.h index 81650932..3a4b2fd0 100644 --- a/cutlass/util/debug.h +++ b/cutlass/util/debug.h @@ -44,10 +44,26 @@ namespace cutlass { * Formats and prints the given message to stdout */ #if !defined(CUDA_LOG) - #if !defined(__CUDA_ARCH__) - #define CUDA_LOG(format, ...) printf(format,__VA_ARGS__) - #else - #define CUDA_LOG(format, ...) printf("[block (%d,%d,%d), thread (%d,%d,%d)]: " format, blockIdx.x, blockIdx.y, blockIdx.z, threadIdx.x, threadIdx.y, threadIdx.z, __VA_ARGS__); + #if defined(__clang__) && defined(__CUDA__) +static __device__ void cuda_log_location() { + printf("[block (%d,%d,%d), thread (%d,%d,%d)]: ", blockIdx.x, blockIdx.y, + blockIdx.z, threadIdx.x, threadIdx.y, threadIdx.z); +} +static __host__ void cuda_log_location() {} + #define CUDA_LOG(format, ...) \ + do { \ + cuda_log_location(); \ + printf(format, __VA_ARGS__); \ + } while (0) + #else // NVCC + #if !defined(__CUDA_ARCH__) + #define CUDA_LOG(format, ...) printf(format, __VA_ARGS__) + #else + #define CUDA_LOG(format, ...) \ + printf("[block (%d,%d,%d), thread (%d,%d,%d)]: " format, \ + blockIdx.x, blockIdx.y, blockIdx.z, threadIdx.x, \ + threadIdx.y, threadIdx.z, __VA_ARGS__); + #endif #endif #endif