From 2e07c4cc2fc94f4cd396ecf1e9132caf1efba50e Mon Sep 17 00:00:00 2001 From: Manish Gupta Date: Mon, 20 Sep 2021 11:02:22 -0700 Subject: [PATCH] CUTLASS 2.7 (#318) CUTLASS 2.7 Mainloop fusion for GEMM: summation over A or B Strided DGRAD (optimized iterators) Half-precision GELU_taylor activation functions Use these when accumulation and epilogue compute types are all cutlass::half_t Tuning and bug fixes to fused GEMM + GEMM example Support for smaller than 128b aligned Convolutions: see examples Caching of results to accelerate Convolution unit tests Can be enabled or disabled by running cmake .. -DCUTLASS_TEST_ENABLE_CACHED_RESULTS=OFF Corrections and bug fixes reported by the CUTLASS community Thank you for filing these issues! authored-by: Haicheng Wu haichengw@nvidia.com, Manish Gupta manigupta@nvidia.com, Dustyn Blasig dblasig@nvidia.com, Andrew Kerr akerr@nvidia.com --- CHANGELOG.md | 12 +- CMakeLists.txt | 36 +- README.md | 34 +- examples/03_visualize_layout/CMakeLists.txt | 2 +- include/cutlass/arch/memory.h | 28 + .../conv/kernel/default_conv2d_dgrad.h | 454 +++++++++- .../conv/kernel/default_conv2d_fprop.h | 2 +- .../default_conv2d_fprop_with_broadcast.h | 2 +- .../default_conv2d_fprop_with_reduction.h | 2 +- .../conv/kernel/default_conv2d_wgrad.h | 2 +- .../conv/kernel/default_conv3d_dgrad.h | 2 +- .../conv/kernel/default_conv3d_fprop.h | 2 +- .../conv/kernel/default_conv3d_wgrad.h | 2 +- ...rad_filter_tile_access_iterator_analytic.h | 15 +- ...ad_filter_tile_access_iterator_optimized.h | 276 ++++++ ...t_gradient_tile_access_iterator_analytic.h | 17 +- ..._gradient_tile_access_iterator_optimized.h | 374 +++++++++ ...activation_tile_access_iterator_analytic.h | 8 +- ...rop_filter_tile_access_iterator_analytic.h | 7 +- .../cutlass/conv/threadblock/conv2d_params.h | 125 +++ ...activation_tile_access_iterator_analytic.h | 7 +- ...ctivation_tile_access_iterator_optimized.h | 2 + ...t_gradient_tile_access_iterator_analytic.h | 8 +- include/cutlass/epilogue/thread/activation.h | 26 + .../warp/tile_iterator_tensor_op_mixed.h | 8 +- include/cutlass/fast_math.h | 77 +- .../kernel/default_gemm_with_k_reduction.h | 2 +- .../gemm/kernel/gemm_with_k_reduction.h | 16 +- .../cutlass/gemm/threadblock/default_mma.h | 12 +- .../gemm/threadblock/default_mma_core_sm80.h | 4 +- .../default_multistage_mma_complex.h | 13 +- ...default_multistage_mma_complex_core_sm80.h | 33 +- .../predicated_tile_access_iterator.h | 1 - test/unit/CMakeLists.txt | 13 +- test/unit/conv/device/CMakeLists.txt | 23 +- test/unit/conv/device/cache_testbed_output.h | 791 ++++++++++++++++++ ...nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.cu | 56 ++ test/unit/conv/device/conv2d_problems.h | 60 +- ...nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.cu | 133 +++ test/unit/conv/device/conv2d_testbed.h | 99 ++- .../conv/device/conv2d_testbed_interleaved.h | 95 ++- .../device/conv2d_with_broadcast_testbed.h | 84 +- .../device/conv2d_with_reduction_testbed.h | 83 +- ...c_tf32ndhwc_f32ndhwc_tensor_op_f32_sm80.cu | 3 +- test/unit/conv/device/conv3d_testbed.h | 90 +- ...lts_cutlass_test_unit_conv_device_simt.txt | 473 +++++++++++ ...est_unit_conv_device_tensorop_f16_sm80.txt | 207 +++++ ...est_unit_conv_device_tensorop_f32_sm70.txt | 197 +++++ ...est_unit_conv_device_tensorop_f32_sm75.txt | 472 +++++++++++ ...est_unit_conv_device_tensorop_f32_sm80.txt | 283 +++++++ ...nit_conv_device_tensorop_f32_tf32_sm80.txt | 310 +++++++ ...ass_test_unit_conv_device_tensorop_s32.txt | 138 +++ ...t_conv_device_tensorop_s32_interleaved.txt | 128 +++ test/unit/epilogue/thread/CMakeLists.txt | 1 + test/unit/epilogue/thread/activation.cu | 314 +++++++ .../epilogue/thread/linear_combination.cu | 38 + .../library/include/cutlass/library/library.h | 1 - tools/library/scripts/generator.py | 90 +- tools/library/src/util.cu | 1 - tools/profiler/src/cublas_helpers.h | 1 - .../util/reference/device/tensor_fill.h | 1 - .../cutlass/util/reference/host/tensor_fill.h | 1 - 62 files changed, 5611 insertions(+), 186 deletions(-) create mode 100644 test/unit/conv/device/cache_testbed_output.h create mode 100644 test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_simt.txt create mode 100644 test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_tensorop_f16_sm80.txt create mode 100644 test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_tensorop_f32_sm70.txt create mode 100644 test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_tensorop_f32_sm75.txt create mode 100644 test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_tensorop_f32_sm80.txt create mode 100644 test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_tensorop_f32_tf32_sm80.txt create mode 100644 test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_tensorop_s32.txt create mode 100644 test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_tensorop_s32_interleaved.txt create mode 100644 test/unit/epilogue/thread/activation.cu diff --git a/CHANGELOG.md b/CHANGELOG.md index 448b1134..90e6e6bd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,16 @@ # NVIDIA CUTLASS Changelog -# CUTLASS 2.x +## [2.7.0](https://github.com/NVIDIA/cutlass/releases/tag/v2.7.0) (2021-09-24) + * Mainloop fusion for GEMM: [summation over A or B](/examples/23_ampere_gemm_operand_reduction_fusion/ampere_gemm_operand_reduction_fusion.cu) + * [Strided DGRAD (optimized iterators)](/include/cutlass/conv/kernel/default_conv2d_dgrad.h) + * [Half-precision GELU_taylor activation functions](/include/cutlass/epilogue/thread/activation.h#L196) + * Use these when accumulation and epilogue compute types are all `cutlass::half_t` + * Tuning and bug fixes to [fused GEMM + GEMM example](/examples/13_two_tensor_op_fusion/) + * Support for smaller than 128b aligned Convolutions: [see examples](test/unit/conv/device/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.cu#L272) + * Caching of results to accelerate Convolution [unit tests](test/unit/conv/device/cache_testbed_output.h) + * Can be enabled or disabled by running `cmake .. -DCUTLASS_TEST_ENABLE_CACHED_RESULTS=OFF` + * Corrections and bug fixes reported by the CUTLASS community + * Thank you for filing these issues! ## [2.6.1](https://github.com/NVIDIA/cutlass/releases/tag/v2.6.1) (2021-09-03) * Arbitrary padding and striding for CUTLASS Strided DGRAD Convolution operator (Analytic Iterators) diff --git a/CMakeLists.txt b/CMakeLists.txt index a16d77c0..80a555ba 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -32,7 +32,7 @@ endif() message(STATUS "CMake Version: ${CMAKE_VERSION}") -project(CUTLASS VERSION 2.6.0 LANGUAGES CXX) +project(CUTLASS VERSION 2.7.0 LANGUAGES CXX) include(${CMAKE_CURRENT_SOURCE_DIR}/CUDA.cmake) if (CUDA_VERSION VERSION_LESS 10.2) @@ -188,10 +188,18 @@ set(CUTLASS_LIBRARY_IGNORE_KERNELS "" CACHE STRING "Comma delimited list of kern # Test Levels L0, L1, L2 set(CUTLASS_TEST_LEVEL "0" CACHE STRING "Level of tests to compile.") + + +set(CUTLASS_TEST_ENABLE_CACHED_RESULTS ON CACHE BOOL "Enable caching and reuse of test results in unit tests") + set_property(CACHE CUTLASS_TEST_LEVEL PROPERTY STRINGS 0 1 2) list(APPEND CUTLASS_CUDA_NVCC_FLAGS -DCUTLASS_TEST_LEVEL=${CUTLASS_TEST_LEVEL}) list(APPEND CUTLASS_CUDA_CLANG_FLAGS -DCUTLASS_TEST_LEVEL=${CUTLASS_TEST_LEVEL}) +if (CUTLASS_TEST_ENABLE_CACHED_RESULTS) +list(APPEND CUTLASS_CUDA_NVCC_FLAGS -DCUTLASS_TEST_ENABLE_CACHED_RESULTS=1) +endif() + # # CUDA 10.1 introduces "mma" in PTX performing collective matrix multiply operations. # @@ -244,7 +252,7 @@ if (NOT MSVC AND CUTLASS_NVCC_KEEP) # MSVC flow handles caching already, but for other generators we handle it here. set(CUTLASS_NVCC_KEEP_DIR ${CMAKE_CURRENT_BINARY_DIR}/tmp CACHE PATH "Location to store NVCC scratch files") file(MAKE_DIRECTORY ${CUTLASS_NVCC_KEEP_DIR}) - list(APPEND CUTLASS_CUDA_NVCC_FLAGS --keep) # --keep-dir may not work with nvcc for some directories. + list(APPEND CUTLASS_CUDA_NVCC_FLAGS --keep -v) # --keep-dir may not work with nvcc for some directories. list(APPEND CUTLASS_CUDA_CLANG_FLAGS -save-temps=${CUTLASS_NVCC_KEEP_DIR}) endif() @@ -572,10 +580,12 @@ function(cutlass_add_executable_tests NAME TARGET) # TEST_COMMAND_OPTIONS: A list of variables (i.e. by reference params) which contain command line arguments # to pass to the test executable. A unique test with suffix _0, _1, ... is generated for each set of # options given. If this option is not used, a single test with no arguments is generated. +# RESULT_CACHE_FILE: A file to be installed alongside the test executable with pre-computed +# test results to speed up test runtime. # set(options DISABLE_EXECUTABLE_INSTALL_RULE) - set(oneValueArgs DISABLE_TESTS) + set(oneValueArgs DISABLE_TESTS RESULT_CACHE_FILE) set(multiValueArgs DEPENDS DEPENDEES TEST_COMMAND_OPTIONS) cmake_parse_arguments(_ "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) @@ -583,6 +593,17 @@ function(cutlass_add_executable_tests NAME TARGET) set(__DISABLE_TESTS OFF) endif() + if (__RESULT_CACHE_FILE) + + add_custom_command( + TARGET ${TARGET} + POST_BUILD + COMMAND ${CMAKE_COMMAND} + ARGS -E copy ${__RESULT_CACHE_FILE} "$" + ) + + endif() + if (NOT __DISABLE_EXECUTABLE_INSTALL_RULE AND CUTLASS_INSTALL_TESTS) # file(RELATIVE_PATH CMAKE_CURRENT_BINARY_RELATIVE_DIR ${CMAKE_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}) @@ -591,6 +612,15 @@ function(cutlass_add_executable_tests NAME TARGET) TARGETS ${TARGET} RUNTIME DESTINATION ${CUTLASS_TEST_INSTALL_BINDIR} ) + + if (__RESULT_CACHE_FILE) + + install( + FILES ${__RESULT_CACHE_FILE} + DESTINATION ${CUTLASS_TEST_INSTALL_BINDIR}/ + ) + + endif() endif() diff --git a/README.md b/README.md index 0079012c..a9fc680b 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,15 @@ ![ALT](/media/images/gemm-hierarchy-with-epilogue-no-labels.png "Complete CUDA GEMM decomposition") -# CUTLASS 2.6 +# CUTLASS 2.7 -_CUTLASS 2.6.1 - September 2021_ +_CUTLASS 2.7 - September 2021_ CUTLASS is a collection of CUDA C++ template abstractions for implementing -high-performance matrix-multiplication (GEMM) at all levels and scales within CUDA. -It incorporates strategies for hierarchical decomposition and data movement similar -to those used to implement cuBLAS. CUTLASS decomposes these "moving parts" into -reusable, modular software components abstracted by C++ template classes. These -thread-wide, warp-wide, block-wide, and device-wide primitives can be specialized +high-performance matrix-multiplication (GEMM) and related computations at all levels +and scales within CUDA. It incorporates strategies for hierarchical decomposition and +data movement similar to those used to implement cuBLAS and cuDNN. CUTLASS decomposes +these "moving parts" into reusable, modular software components abstracted by C++ template +classes. These thread-wide, warp-wide, block-wide, and device-wide primitives can be specialized and tuned via custom tiling sizes, data types, and other algorithmic policy. The resulting flexibility simplifies their use as building blocks within custom kernels and applications. @@ -20,14 +20,14 @@ multiply-accumulate abstractions for half-precision floating point (FP16), BFloat16 (BF16), Tensor Float 32 (TF32), single-precision floating point (FP32), double-precision floating point (FP64) types, integer data types (4b and 8b), and binary data types (1b). - -Furthermore, CUTLASS demonstrates warp-synchronous matrix multiply operations +CUTLASS demonstrates warp-synchronous matrix multiply operations targeting the programmable, high-throughput _Tensor Cores_ implemented by NVIDIA's Volta, Turing, and Ampere architectures. -Additionaly, CUTLASS implements high-performance convolution (implicit GEMM). -Implicit GEMM is the formulation of a convolution operation as a GEMM. This allows CUTLASS -to build convolutions by reusing highly optimized warp-wide GEMM components and below. +CUTLASS implements high-performance Convolution via the implicit GEMM algorithm. +Implicit GEMM is the formulation of a convolution operation as a GEMM thereby taking advantage of +CUTLASS's modular GEMM pipeline. +This allows CUTLASS to build convolutions by reusing highly optimized warp-wide GEMM components and below. See the [Quick Start Guide](/media/docs/quickstart.md) to get started quickly. @@ -36,6 +36,16 @@ supported at each level of the execution model hierarchy. See the [CHANGELOG](CHANGELOG.md) for descriptions of recent updates. +# What's New in CUTLASS 2.7 +CUTLASS 2.7 is a minor update to CUTLASS adding: +- Mainloop fusion for GEMM: [summation over A or B](/examples/23_ampere_gemm_operand_reduction_fusion/ampere_gemm_operand_reduction_fusion.cu) +- [Optimizations for strided DGRAD](/include/cutlass/conv/kernel/default_conv2d_dgrad.h) +- [Half-precision GELU_taylor activation functions](/include/cutlass/epilogue/thread/activation.h#L196) +- Tuning and bug fixes to [fused GEMM + GEMM example](/examples/13_two_tensor_op_fusion/) +- Support for smaller than 128b aligned Convolutions: [see examples](test/unit/conv/device/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.cu#L272) +- Caching of results to accelerate Convolution [unit tests](test/unit/conv/device/cache_testbed_output.h) +- Numerous updates from the community (thanks!) + # What's New in CUTLASS 2.6 CUTLASS 2.6 is a minor update to CUTLASS adding: - Fused [broadcast](test/unit/gemm/device/gemm_with_broadcast_f16n_f16n_f16n_tensorop_f32_sm75.cu) and [reductions](/test/unit/gemm/device/gemm_with_reduction_f16n_f16n_f16n_tensorop_f32_sm75.cu) in the epilogues of GEMM and Convolution diff --git a/examples/03_visualize_layout/CMakeLists.txt b/examples/03_visualize_layout/CMakeLists.txt index 7b31afe6..7df55fd1 100644 --- a/examples/03_visualize_layout/CMakeLists.txt +++ b/examples/03_visualize_layout/CMakeLists.txt @@ -21,7 +21,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. set(TEST_COMMAND_00 RowMajor --extent=16,16) -set(TEST_COMMAND_01 "ColumnMajorInterleaved<4>" --extent=32,8 --output-shape=16 --vectorize=4) +set(TEST_COMMAND_01 \"ColumnMajorInterleaved<4>\" --extent=32,8 --output-shape=16 --vectorize=4) cutlass_example_add_executable( 03_visualize_layout diff --git a/include/cutlass/arch/memory.h b/include/cutlass/arch/memory.h index 6076f86c..92d536f6 100644 --- a/include/cutlass/arch/memory.h +++ b/include/cutlass/arch/memory.h @@ -225,6 +225,34 @@ struct global_store; // ///////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct global_store { + CUTLASS_DEVICE + global_store(AccessType const &D, void *ptr, bool pred_guard) { + uint4 const *data = reinterpret_cast(&D); + + asm volatile( + "{\n" + " .reg .pred p;\n" + " setp.ne.b32 p, %5, 0;\n" + " @p st.global.v4.u32 [%0], {%1, %2, %3, %4};\n" + " @p st.global.v4.u32 [%6], {%7, %8, %9, %10};\n" + " @p st.global.v4.u32 [%11], {%12, %13, %14, %15};\n" + " @p st.global.v4.u32 [%16], {%17, %18, %19, %20};\n" + "}\n" + : + : "l"(ptr), "r"(data[0].x), "r"(data[0].y), "r"(data[0].z), + "r"(data[0].w), "r"((int)pred_guard), "l"(((uint8_t *)ptr) + 16), + "r"(data[1].x), "r"(data[1].y), "r"(data[1].z), "r"(data[1].w), + "l"(((uint8_t *)ptr) + 32), + "r"(data[2].x), "r"(data[2].y), "r"(data[2].z), "r"(data[2].w), + "l"(((uint8_t *)ptr) + 48), + "r"(data[3].x), "r"(data[3].y), "r"(data[3].z), "r"(data[2].w)); + } +}; + + template struct global_store { CUTLASS_DEVICE diff --git a/include/cutlass/conv/kernel/default_conv2d_dgrad.h b/include/cutlass/conv/kernel/default_conv2d_dgrad.h index 7c3e29e2..c51d9d55 100644 --- a/include/cutlass/conv/kernel/default_conv2d_dgrad.h +++ b/include/cutlass/conv/kernel/default_conv2d_dgrad.h @@ -65,7 +65,7 @@ template < typename ThreadblockSwizzle, int Stages, typename MathOperatorTag, - conv::IteratorAlgorithm IteratorAlgorithm = IteratorAlgorithm::kAnalytic, + conv::IteratorAlgorithm IteratorAlgorithm = IteratorAlgorithm::kOptimized, conv::StrideSupport StrideSupport = StrideSupport::kStrided, /// Access granularity of A matrix in units of elements int AlignmentA = 128 / cutlass::sizeof_bits::value, @@ -675,6 +675,243 @@ struct DefaultConv2dDgrad < >; }; +/// Defines a kernel for Conv2dDgrad specialzation for Optimized IteratorAlgorithm Dgrad Strided and +// multistage pipeline. +template < + typename ElementA, + typename LayoutA, + typename ElementB, + typename LayoutB, + typename ElementC, + typename LayoutC, + typename ElementAccumulator, + typename ArchTag, + typename ThreadblockShape, + typename WarpShape, + typename InstructionShape, + typename EpilogueOutputOp, + typename ThreadblockSwizzle, + int Stages, + typename MathOperatorTag, + int AlignmentA, + int AlignmentB +> +struct DefaultConv2dDgrad < + ElementA, + LayoutA, + ElementB, + LayoutB, + ElementC, + LayoutC, + ElementAccumulator, + arch::OpClassTensorOp, + ArchTag, + ThreadblockShape, + WarpShape, + InstructionShape, + EpilogueOutputOp, + ThreadblockSwizzle, + Stages, + MathOperatorTag, + IteratorAlgorithm::kOptimized, + StrideSupport::kStrided, + AlignmentA, + AlignmentB +> { + + // Define the core components from GEMM + using MmaCore = typename cutlass::gemm::threadblock::DefaultMmaCore< + ThreadblockShape, WarpShape, InstructionShape, ElementA, layout::RowMajor, + ElementB, layout::RowMajor, ElementAccumulator, layout::RowMajor, arch::OpClassTensorOp, + Stages, MathOperatorTag>; + + // Define iterators over tiles from the A operand + using ThreadMapA = typename MmaCore::IteratorThreadMapA; + using AccessTypeA = cutlass::AlignedArray; + using IteratorA = + cutlass::conv::threadblock::Conv2dDgradOutputGradientTileAccessIteratorOptimized< + cutlass::MatrixShape, + ElementA, + ThreadMapA, + StrideSupport::kStrided, + AccessTypeA + >; + + using SmemIteratorA = typename MmaCore::SmemIteratorA; + + // Define iterators over tiles from the B operand + using ThreadMapB = typename MmaCore::IteratorThreadMapB; + using AccessTypeB = cutlass::AlignedArray; + using IteratorB = + cutlass::conv::threadblock::Conv2dDgradFilterTileAccessIteratorOptimized< + cutlass::MatrixShape, + ElementB, + ThreadMapB, + StrideSupport::kStrided, + AccessTypeB + >; + + using SmemIteratorB = typename MmaCore::SmemIteratorB; + + // Warp-level GEMM components + using WarpMmaTensorOp = typename MmaCore::MmaTensorOp; + using MmaPolicy = typename MmaCore::MmaPolicy; + + static cutlass::arch::CacheOperation::Kind const CacheOpB = + ((sizeof_bits::value * AlignmentB) == 128) + ? cutlass::arch::CacheOperation::Global + : cutlass::arch::CacheOperation::Always; + + // Define the Mma + using Mma = threadblock::ImplicitGemmMultistage< + ThreadblockShape, + IteratorA, + SmemIteratorA, + arch::CacheOperation::Always, + IteratorB, + SmemIteratorB, + CacheOpB, + MmaPolicy, + Stages + >; + + static const int kPartitionsK = ThreadblockShape::kK / WarpShape::kK; + + // Define the epilogue + using Epilogue = typename epilogue::threadblock::DefaultEpilogueTensorOpStridedDgrad< + ThreadblockShape, + WarpMmaTensorOp, + kPartitionsK, + EpilogueOutputOp, + EpilogueOutputOp::kCount + >::Epilogue; + + // Define the kernel + using Kernel = cutlass::conv::kernel::ImplicitGemmConvolutionStridedDgrad< + Mma, + Epilogue, + ThreadblockSwizzle, + conv::Operator::kDgrad + >; +}; + +/// Defines a kernel for Conv2dDgrad specialzation for Optimized IteratorAlgorithm Dgrad Strided +// and 2 stage pipeline. +template < + typename ElementA, + typename LayoutA, + typename ElementB, + typename LayoutB, + typename ElementC, + typename LayoutC, + typename ElementAccumulator, + typename ArchTag, + typename ThreadblockShape, + typename WarpShape, + typename InstructionShape, + typename EpilogueOutputOp, + typename ThreadblockSwizzle, + typename MathOperatorTag, + int AlignmentA, + int AlignmentB +> +struct DefaultConv2dDgrad < + ElementA, + LayoutA, + ElementB, + LayoutB, + ElementC, + LayoutC, + ElementAccumulator, + arch::OpClassTensorOp, + ArchTag, + ThreadblockShape, + WarpShape, + InstructionShape, + EpilogueOutputOp, + ThreadblockSwizzle, + 2, + MathOperatorTag, + IteratorAlgorithm::kOptimized, + StrideSupport::kStrided, + AlignmentA, + AlignmentB +> { + + // Define the core components from GEMM + using MmaCore = typename cutlass::gemm::threadblock::DefaultMmaCore< + ThreadblockShape, WarpShape, InstructionShape, ElementA, layout::RowMajor, + ElementB, layout::RowMajor, ElementAccumulator, layout::RowMajor, arch::OpClassTensorOp, + 2, MathOperatorTag>; + + // Define iterators over tiles from the A operand + using ThreadMapA = typename MmaCore::IteratorThreadMapA; + using AccessTypeA = cutlass::AlignedArray; + using IteratorA = + cutlass::conv::threadblock::TileIteratorStridedDgrad< + cutlass::conv::threadblock::Conv2dDgradOutputGradientTileAccessIteratorOptimized< + cutlass::MatrixShape, + ElementA, + ThreadMapA, + StrideSupport::kStrided, + AccessTypeA + > + >; + + using SmemIteratorA = typename MmaCore::SmemIteratorA; + + // Define iterators over tiles from the B operand + using ThreadMapB = typename MmaCore::IteratorThreadMapB; + using AccessTypeB = cutlass::AlignedArray; + using IteratorB = + cutlass::conv::threadblock::TileIteratorStridedDgrad< + cutlass::conv::threadblock::Conv2dDgradFilterTileAccessIteratorOptimized< + cutlass::MatrixShape, + ElementB, + ThreadMapB, + StrideSupport::kStrided, + AccessTypeB + > + >; + + using SmemIteratorB = typename MmaCore::SmemIteratorB; + + // Warp-level GEMM components + using WarpMmaTensorOp = typename MmaCore::MmaTensorOp; + using MmaPolicy = typename MmaCore::MmaPolicy; + + // Define the Mma + using Mma = threadblock::ImplicitGemmPipelined< + ThreadblockShape, + IteratorA, + SmemIteratorA, + IteratorB, + SmemIteratorB, + ElementC, + LayoutC, + MmaPolicy + >; + + static const int kPartitionsK = ThreadblockShape::kK / WarpShape::kK; + + // Define the epilogue + using Epilogue = typename detail::DefaultConvEpilogueStridedDgrad< + ArchTag, + ThreadblockShape, + WarpMmaTensorOp, + kPartitionsK, + EpilogueOutputOp + >::Epilogue; + + // Define the kernel + using Kernel = cutlass::conv::kernel::ImplicitGemmConvolutionStridedDgrad< + Mma, + Epilogue, + ThreadblockSwizzle, + conv::Operator::kDgrad + >; +}; + /// Defines a kernel for Conv2dDgrad specialzation for Optimized IteratorAlgorithm Dgrad Unity // 2 stage pipeline template < @@ -1126,6 +1363,112 @@ struct DefaultConv2dDgrad < }; ///////////////////////////////////////////////////////////////////////////////////////////////// +template < + typename ElementA, + typename LayoutA, + typename ElementB, + typename LayoutB, + typename ElementC, + typename LayoutC, + typename ElementAccumulator, + typename ArchTag, + typename ThreadblockShape, + typename WarpShape, + typename InstructionShape, + typename EpilogueOutputOp, + typename ThreadblockSwizzle, + int Stages, + typename MathOperatorTag, + int AlignmentA, + int AlignmentB +> +struct DefaultConv2dDgrad < + ElementA, + LayoutA, + ElementB, + LayoutB, + ElementC, + LayoutC, + ElementAccumulator, + arch::OpClassSimt, + ArchTag, + ThreadblockShape, + WarpShape, + InstructionShape, + EpilogueOutputOp, + ThreadblockSwizzle, + Stages, + MathOperatorTag, + IteratorAlgorithm::kOptimized, + conv::StrideSupport::kStrided, + AlignmentA, + AlignmentB +> { + + // Define the core components from GEMM + using MmaCore = typename cutlass::gemm::threadblock::DefaultMmaCore< + ThreadblockShape, WarpShape, InstructionShape, ElementA, layout::RowMajor, + ElementB, layout::RowMajor, ElementAccumulator, layout::RowMajor, arch::OpClassSimt, + Stages, MathOperatorTag>; + + // Define iterators over tiles from the A operand + using ThreadMapA = typename MmaCore::IteratorThreadMapA; + using IteratorA = + cutlass::conv::threadblock::Conv2dDgradOutputGradientTileAccessIteratorOptimized< + cutlass::MatrixShape, + ElementA, + ThreadMapA, + conv::StrideSupport::kStrided + >; + + using SmemIteratorA = typename MmaCore::SmemIteratorA; + + // Define iterators over tiles from the B operand + using ThreadMapB = typename MmaCore::IteratorThreadMapB; + using IteratorB = + cutlass::conv::threadblock::Conv2dDgradFilterTileAccessIteratorOptimized< + cutlass::MatrixShape, + ElementB, + ThreadMapB, + conv::StrideSupport::kStrided + >; + + using SmemIteratorB = typename MmaCore::SmemIteratorB; + + // Warp-level GEMM components + using WarpMmaSimtOp = typename MmaCore::MmaWarpSimt; + using MmaPolicy = typename MmaCore::MmaPolicy; + + // Define the Mma + using Mma = threadblock::ImplicitGemmMultistage< + ThreadblockShape, + IteratorA, + SmemIteratorA, + arch::CacheOperation::Always, + IteratorB, + SmemIteratorB, + arch::CacheOperation::Always, + MmaPolicy, + Stages + >; + + // Define the epilogue + using Epilogue = typename epilogue::threadblock::DefaultEpilogueSimtStridedDgrad< + ThreadblockShape, + WarpMmaSimtOp, + EpilogueOutputOp, + EpilogueOutputOp::kCount + >::Epilogue; + + // Define the kernel + using Kernel = cutlass::conv::kernel::ImplicitGemmConvolutionStridedDgrad< + Mma, + Epilogue, + ThreadblockSwizzle, + conv::Operator::kDgrad + >; + +}; ///////////////////////////////////////////////////////////////////////////////////////////////// /// Defines a kernel for Conv2dDgrad specialzation for Analytic IteratorAlgorithm, @@ -1462,6 +1805,115 @@ struct DefaultConv2dDgrad < }; ///////////////////////////////////////////////////////////////////////////////////////////////// +template < + typename ElementA, + typename LayoutA, + typename ElementB, + typename LayoutB, + typename ElementC, + typename LayoutC, + typename ElementAccumulator, + typename ArchTag, + typename ThreadblockShape, + typename WarpShape, + typename InstructionShape, + typename EpilogueOutputOp, + typename ThreadblockSwizzle, + typename MathOperatorTag, + int AlignmentA, + int AlignmentB +> +struct DefaultConv2dDgrad < + ElementA, + LayoutA, + ElementB, + LayoutB, + ElementC, + LayoutC, + ElementAccumulator, + arch::OpClassSimt, + ArchTag, + ThreadblockShape, + WarpShape, + InstructionShape, + EpilogueOutputOp, + ThreadblockSwizzle, + 2, + MathOperatorTag, + IteratorAlgorithm::kOptimized, + conv::StrideSupport::kStrided, + AlignmentA, + AlignmentB +> { + + // Define the core components from GEMM + using MmaCore = typename cutlass::gemm::threadblock::DefaultMmaCore< + ThreadblockShape, WarpShape, InstructionShape, ElementA, layout::RowMajor, + ElementB, layout::RowMajor, ElementAccumulator, layout::RowMajor, arch::OpClassSimt, + 2, MathOperatorTag>; + + // Define iterators over tiles from the A operand + using ThreadMapA = typename MmaCore::IteratorThreadMapA; + using IteratorA = + cutlass::conv::threadblock::TileIteratorStridedDgrad< + cutlass::conv::threadblock::Conv2dDgradOutputGradientTileAccessIteratorOptimized< + cutlass::MatrixShape, + ElementA, + ThreadMapA, + conv::StrideSupport::kStrided + > + >; + + using SmemIteratorA = typename MmaCore::SmemIteratorA; + + // Define iterators over tiles from the B operand + using ThreadMapB = typename MmaCore::IteratorThreadMapB; + using IteratorB = + cutlass::conv::threadblock::TileIteratorStridedDgrad< + cutlass::conv::threadblock::Conv2dDgradFilterTileAccessIteratorOptimized< + cutlass::MatrixShape, + ElementB, + ThreadMapB, + conv::StrideSupport::kStrided + > + >; + + using SmemIteratorB = typename MmaCore::SmemIteratorB; + + // Warp-level GEMM components + using WarpMmaSimtOp = typename MmaCore::MmaWarpSimt; + using MmaPolicy = typename MmaCore::MmaPolicy; + + // Define the Mma + using Mma = threadblock::ImplicitGemmPipelined< + ThreadblockShape, + IteratorA, + SmemIteratorA, + IteratorB, + SmemIteratorB, + ElementC, + LayoutC, + MmaPolicy + >; + + // Define the epilogue + using Epilogue = typename epilogue::threadblock::DefaultEpilogueSimtStridedDgrad< + ThreadblockShape, + WarpMmaSimtOp, + EpilogueOutputOp, + EpilogueOutputOp::kCount + >::Epilogue; + + // Define the kernel + using Kernel = cutlass::conv::kernel::ImplicitGemmConvolutionStridedDgrad< + Mma, + Epilogue, + ThreadblockSwizzle, + conv::Operator::kDgrad + >; + +}; + } // namespace kernel } // namespace conv } // namespace cutlass diff --git a/include/cutlass/conv/kernel/default_conv2d_fprop.h b/include/cutlass/conv/kernel/default_conv2d_fprop.h index 205da469..2ce945d2 100644 --- a/include/cutlass/conv/kernel/default_conv2d_fprop.h +++ b/include/cutlass/conv/kernel/default_conv2d_fprop.h @@ -65,7 +65,7 @@ template < typename ThreadblockSwizzle, int Stages, typename MathOperatorTag, - conv::IteratorAlgorithm IteratorAlgorithm = IteratorAlgorithm::kAnalytic, + conv::IteratorAlgorithm IteratorAlgorithm = IteratorAlgorithm::kOptimized, conv::StrideSupport StrideSupport = StrideSupport::kStrided, /// Access granularity of A matrix in units of elements int AlignmentA = 128 / cutlass::sizeof_bits::value, diff --git a/include/cutlass/conv/kernel/default_conv2d_fprop_with_broadcast.h b/include/cutlass/conv/kernel/default_conv2d_fprop_with_broadcast.h index 6f127440..dfbc98e7 100644 --- a/include/cutlass/conv/kernel/default_conv2d_fprop_with_broadcast.h +++ b/include/cutlass/conv/kernel/default_conv2d_fprop_with_broadcast.h @@ -64,7 +64,7 @@ template < typename ThreadblockSwizzle, int Stages, typename MathOperatorTag, - conv::IteratorAlgorithm IteratorAlgorithm = IteratorAlgorithm::kAnalytic, + conv::IteratorAlgorithm IteratorAlgorithm = IteratorAlgorithm::kOptimized, conv::StrideSupport StrideSupport = StrideSupport::kStrided, /// Access granularity of A matrix in units of elements int AlignmentA = 128 / cutlass::sizeof_bits::value, diff --git a/include/cutlass/conv/kernel/default_conv2d_fprop_with_reduction.h b/include/cutlass/conv/kernel/default_conv2d_fprop_with_reduction.h index 5dd31052..24553a6d 100644 --- a/include/cutlass/conv/kernel/default_conv2d_fprop_with_reduction.h +++ b/include/cutlass/conv/kernel/default_conv2d_fprop_with_reduction.h @@ -65,7 +65,7 @@ template < typename ThreadblockSwizzle, int Stages, typename MathOperatorTag, - conv::IteratorAlgorithm IteratorAlgorithm = IteratorAlgorithm::kAnalytic, + conv::IteratorAlgorithm IteratorAlgorithm = IteratorAlgorithm::kOptimized, conv::StrideSupport StrideSupport = StrideSupport::kStrided, /// Access granularity of A matrix in units of elements int AlignmentA = 128 / cutlass::sizeof_bits::value, diff --git a/include/cutlass/conv/kernel/default_conv2d_wgrad.h b/include/cutlass/conv/kernel/default_conv2d_wgrad.h index 91edde2d..677959a2 100644 --- a/include/cutlass/conv/kernel/default_conv2d_wgrad.h +++ b/include/cutlass/conv/kernel/default_conv2d_wgrad.h @@ -66,7 +66,7 @@ template < typename ThreadblockSwizzle, int Stages, typename MathOperatorTag, - conv::IteratorAlgorithm IteratorAlgorithm = IteratorAlgorithm::kAnalytic, + conv::IteratorAlgorithm IteratorAlgorithm = IteratorAlgorithm::kOptimized, conv::StrideSupport StrideSupport = StrideSupport::kStrided, /// Access granularity of A matrix in units of elements int AlignmentA = 128 / cutlass::sizeof_bits::value, diff --git a/include/cutlass/conv/kernel/default_conv3d_dgrad.h b/include/cutlass/conv/kernel/default_conv3d_dgrad.h index d51ff21c..929165cf 100644 --- a/include/cutlass/conv/kernel/default_conv3d_dgrad.h +++ b/include/cutlass/conv/kernel/default_conv3d_dgrad.h @@ -66,7 +66,7 @@ template < typename ThreadblockSwizzle, int Stages, typename MathOperatorTag, - conv::IteratorAlgorithm IteratorAlgorithm = IteratorAlgorithm::kAnalytic, + conv::IteratorAlgorithm IteratorAlgorithm = IteratorAlgorithm::kOptimized, conv::StrideSupport StrideSupport = StrideSupport::kStrided > struct DefaultConv3dDgrad; diff --git a/include/cutlass/conv/kernel/default_conv3d_fprop.h b/include/cutlass/conv/kernel/default_conv3d_fprop.h index 9d39086a..ef63ffde 100644 --- a/include/cutlass/conv/kernel/default_conv3d_fprop.h +++ b/include/cutlass/conv/kernel/default_conv3d_fprop.h @@ -66,7 +66,7 @@ template < typename ThreadblockSwizzle, int Stages, typename MathOperatorTag, - conv::IteratorAlgorithm IteratorAlgorithm = IteratorAlgorithm::kAnalytic, + conv::IteratorAlgorithm IteratorAlgorithm = IteratorAlgorithm::kOptimized, conv::StrideSupport StrideSupport = StrideSupport::kStrided > struct DefaultConv3dFprop; diff --git a/include/cutlass/conv/kernel/default_conv3d_wgrad.h b/include/cutlass/conv/kernel/default_conv3d_wgrad.h index 728cf945..0fd7f0f8 100644 --- a/include/cutlass/conv/kernel/default_conv3d_wgrad.h +++ b/include/cutlass/conv/kernel/default_conv3d_wgrad.h @@ -65,7 +65,7 @@ template < typename ThreadblockSwizzle, int Stages, typename MathOperatorTag, - conv::IteratorAlgorithm IteratorAlgorithm = IteratorAlgorithm::kAnalytic, + conv::IteratorAlgorithm IteratorAlgorithm = IteratorAlgorithm::kOptimized, conv::StrideSupport StrideSupport = StrideSupport::kStrided > struct DefaultConv3dWgrad; diff --git a/include/cutlass/conv/threadblock/conv2d_dgrad_filter_tile_access_iterator_analytic.h b/include/cutlass/conv/threadblock/conv2d_dgrad_filter_tile_access_iterator_analytic.h index da8ae974..59b5c8bd 100644 --- a/include/cutlass/conv/threadblock/conv2d_dgrad_filter_tile_access_iterator_analytic.h +++ b/include/cutlass/conv/threadblock/conv2d_dgrad_filter_tile_access_iterator_analytic.h @@ -210,9 +210,9 @@ public: CUTLASS_HOST_DEVICE TensorCoord at() const { - int c = offset_c_[iteration_contiguous_]; int k = offset_k_[iteration_strided_]; - + int c = offset_c_[iteration_contiguous_] + iteration_vector_ * AccessType::kElements; + return TensorCoord(k, filter_r_, filter_s_, c); } @@ -222,7 +222,7 @@ public: TensorCoord coord = at(); - return coord.n() < problem_size_.K && (coord.c() + iteration_vector_ * AccessType::kElements) < problem_size_.C; + return coord.n() < problem_size_.K && coord.c() < problem_size_.C; } /// Returns a pointer to the vector starting at the current coordinate @@ -232,7 +232,7 @@ public: TensorCoord coord = at(); LongIndex offset = params_.layout(coord); - return reinterpret_cast(pointer_ + offset * sizeof_bits::value / 8) + iteration_vector_; + return reinterpret_cast(pointer_ + offset * sizeof_bits::value / 8); } @@ -250,6 +250,7 @@ public: return *this; } iteration_contiguous_ = 0; + ++iteration_strided_; if (iteration_strided_ < ThreadMap::Iterations::kStrided) { return *this; @@ -408,8 +409,8 @@ public: CUTLASS_HOST_DEVICE TensorCoord at() const { - int c = offset_c_[iteration_contiguous_]; int k = offset_k_[iteration_strided_]; + int c = offset_c_[iteration_contiguous_] + iteration_vector_ * AccessType::kElements; return TensorCoord(k, filter_r_, filter_s_, c); } @@ -420,7 +421,7 @@ public: TensorCoord coord = at(); - return coord.n() < problem_size_.K && (coord.c() + iteration_vector_ * AccessType::kElements) < problem_size_.C; + return coord.n() < problem_size_.K && coord.c() < problem_size_.C; } /// Returns a pointer to the vector starting at the current coordinate @@ -430,7 +431,7 @@ public: TensorCoord coord = at(); LongIndex offset = params_.layout(coord); - return reinterpret_cast(pointer_ + offset * sizeof_bits::value / 8) + iteration_vector_; + return reinterpret_cast(pointer_ + offset * sizeof_bits::value / 8); } /// Increments to the next memory access diff --git a/include/cutlass/conv/threadblock/conv2d_dgrad_filter_tile_access_iterator_optimized.h b/include/cutlass/conv/threadblock/conv2d_dgrad_filter_tile_access_iterator_optimized.h index 723a8f6d..ba7b839b 100644 --- a/include/cutlass/conv/threadblock/conv2d_dgrad_filter_tile_access_iterator_optimized.h +++ b/include/cutlass/conv/threadblock/conv2d_dgrad_filter_tile_access_iterator_optimized.h @@ -67,6 +67,282 @@ class Conv2dDgradFilterTileAccessIteratorOptimized; ///////////////////////////////////////////////////////////////////////////////////////////////// +// Conv2dDgradFilterTileAccessIteratorOptimized unity strided dgrad is more performant for dgrad +// on problem sizes with stride = {1x1} +template < + typename Shape_, + typename Element_, + typename ThreadMap_, + typename AccessType_ +> +class Conv2dDgradFilterTileAccessIteratorOptimized < + Shape_, + Element_, + ThreadMap_, + conv::StrideSupport::kStrided, + AccessType_ + > { +public: + + // + // Types + // + + using Shape = Shape_; + using Element = Element_; + using Layout = layout::TensorNHWC; + using ThreadMap = ThreadMap_; + using AccessType = AccessType_; + using TensorRef = cutlass::TensorRef; + using TensorCoord = typename Layout::TensorCoord; + using Index = typename Layout::Index; + using LongIndex = typename Layout::LongIndex; + static IteratorAlgorithm const kIteratorAlgorithm = conv::IteratorAlgorithm::kOptimized; + static StrideSupport const kStrideSupport = conv::StrideSupport::kStrided; + static int const kConvDim = 2; + using ConvProblemSize = typename conv::Conv2dProblemSize; + + static int const kAccessesPerVector = ThreadMap::kElementsPerAccess / AccessType::kElements; + + static_assert(!(ThreadMap::kElementsPerAccess % AccessType::kElements), + "Vectors implied by the thread map must be divisible by the access type."); + + // + // Parameters structure + // + + struct Params : Conv2dStridedDgradFilterIteratorOptimizedParams { + + // + // Methods + // + CUTLASS_HOST_DEVICE + Params() { } + + CUTLASS_HOST_DEVICE + Params(Conv2dStridedDgradFilterIteratorOptimizedParams const &base): + Conv2dStridedDgradFilterIteratorOptimizedParams(base) { } + + CUTLASS_HOST_DEVICE + Params( + Conv2dProblemSize const &problem_size, + Layout const &layout + ): + Conv2dStridedDgradFilterIteratorOptimizedParams( + problem_size, + layout, + sizeof_bits::value, + {Shape::kRow, Shape::kColumn}, + ThreadMap::kThreads, + ThreadMap::kElementsPerAccess, + {ThreadMap::Iterations::kContiguous, ThreadMap::Iterations::kStrided}, + {ThreadMap::Delta::kContiguous, ThreadMap::Delta::kStrided} + ) { } + + }; + +private: + + Conv2dStridedDgradFilterIteratorOptimizedParams const ¶ms_; + Conv2dProblemSize const &problem_size_; + LongIndex iteration_contiguous_; + LongIndex iteration_strided_; + LongIndex iteration_vector_; + char const *pointer_; + + uint32_t predicates_[kAccessesPerVector]; + int filter_k_; + int filter_r_; + int filter_s_; + + int start_r_; + int start_s_; + + int64_t reset_bytes_s_; + int64_t reset_bytes_r_; + + // + // Assertions + // + + // We map predicates into bits packed in this uint32_t container + static_assert(ThreadMap::Iterations::kStrided * + ThreadMap::Iterations::kContiguous < sizeof(predicates_) * 8, + "Currently, the number of loads per iteration is limited by the size of the predicates container."); + +public: + + CUTLASS_HOST_DEVICE + Conv2dDgradFilterTileAccessIteratorOptimized( + Conv2dStridedDgradFilterIteratorOptimizedParams const ¶ms, + Conv2dProblemSize const &problem_size, + Element const *ptr, + int thread_idx, + int start_r, int start_s, + MatrixCoord const &threadblock_offset = MatrixCoord() + ): + params_(params), + problem_size_(problem_size), + pointer_(reinterpret_cast(ptr)), + predicates_{0}, + filter_r_(start_r), + filter_s_(start_s), + start_r_(start_r), + start_s_(start_s) { + + layout::PitchLinearCoord thread_coord = ThreadMap::initial_offset(thread_idx); + + filter_k_ = threadblock_offset.row() + thread_coord.strided(); + Index column = threadblock_offset.column() + thread_coord.contiguous(); + + reset_bytes_s_ = (problem_size_.num_gemm_k_filter_s(start_s_) - 1) * params_.inc_next[0]; + reset_bytes_r_ = reset_bytes_s_ + + (problem_size_.num_gemm_k_filter_r(start_r_) - 1) * params_.inc_next[1]; + + CUTLASS_PRAGMA_UNROLL + for (int s = 0; s < ThreadMap::Iterations::kStrided; ++s) { + CUTLASS_PRAGMA_UNROLL + for (int c = 0; c < ThreadMap::Iterations::kContiguous; ++c) { + + int filter_k = filter_k_ + s * ThreadMap::Delta::kStrided; + int filter_c = column + c * ThreadMap::Delta::kContiguous; + + CUTLASS_PRAGMA_UNROLL + for (int v = 0; v < kAccessesPerVector; ++v) { + + uint32_t pred = ((filter_k < problem_size_.K && (filter_c + v * AccessType::kElements) < problem_size_.C) ? 1u : 0); + + int pred_idx = c + s * ThreadMap::Iterations::kContiguous; + + predicates_[v] |= (pred << pred_idx); + } + } + } + + TensorCoord coord{filter_k_, filter_r_, filter_s_, column}; + + pointer_ += params_.layout(coord) * sizeof_bits::value / 8; + + set_iteration_index(0); + } + + /// Overrides the internal iteration index + CUTLASS_HOST_DEVICE + void set_iteration_index(Index index) { + iteration_vector_ = index % kAccessesPerVector; + int residual_access = index / kAccessesPerVector; + iteration_contiguous_ = residual_access % ThreadMap::Iterations::kContiguous; + iteration_strided_ = residual_access / ThreadMap::Iterations::kContiguous; + } + + /// Adds a pointer offset in units of Element + CUTLASS_HOST_DEVICE + void add_pointer_offset(LongIndex pointer_offset) { + + pointer_ += pointer_offset * sizeof_bits::value / 8; + } + + CUTLASS_HOST_DEVICE + void advance() { + + int next_idx = 0; + LongIndex reset_bytes = params_.reset_bytes; + + // Move filter_s by stride_w + filter_s_ += problem_size_.stride_w; + if (filter_s_ >= problem_size_.S) { + + // Restore filter_s + filter_s_ = start_s_; + + // Move filter_r by stride_h + filter_r_ += problem_size_.stride_h; + + bool check = (filter_r_ < problem_size_.R); + + filter_r_ = check ? filter_r_ : start_r_; + next_idx = check ? 1 : 2; + reset_bytes += (check ? reset_bytes_s_ : reset_bytes_r_); + } + + // offset pointers by offset_bytes + pointer_ += (params_.inc_next[next_idx] - reset_bytes); + + if (next_idx == 2) { + filter_k_ += params_.filter_k_delta; + } + + // Clear predicates if needed + CUTLASS_PRAGMA_UNROLL + for (int s = 0; s < ThreadMap::Iterations::kStrided; ++s) { + if (filter_k_ + s * ThreadMap::Delta::kStrided >= problem_size_.K) { + uint32_t kClearMask = ((1u << ThreadMap::Iterations::kContiguous) - 1) << (s * ThreadMap::Iterations::kContiguous); + + CUTLASS_PRAGMA_UNROLL + for (int v = 0; v < kAccessesPerVector; ++v) { + predicates_[v] = (predicates_[v] & (~kClearMask)); + } + } + } + } + + /// Returns true if the current coordinate is within the filter tensor W + CUTLASS_HOST_DEVICE + bool valid() { + LongIndex pred_idx = iteration_contiguous_ + iteration_strided_ * ThreadMap::Iterations::kContiguous; + return (predicates_[iteration_vector_] & (1u << pred_idx)); + } + + /// Returns a pointer to the vector starting at the current coordinate + CUTLASS_HOST_DEVICE + AccessType const *get() const { + return reinterpret_cast(pointer_ + + iteration_contiguous_ * ThreadMap::Delta::kContiguous * sizeof_bits::value / 8) + iteration_vector_; + } + + /// Increments to the next memory access + CUTLASS_HOST_DEVICE + Conv2dDgradFilterTileAccessIteratorOptimized &operator++() { + ++iteration_vector_; + if (iteration_vector_ < kAccessesPerVector) { + return *this; + } + iteration_vector_ = 0; + + ++iteration_contiguous_; + if (iteration_contiguous_ < ThreadMap::Iterations::kContiguous) { + return *this; + } + iteration_contiguous_ = 0; + + ++iteration_strided_; + if (iteration_strided_ < ThreadMap::Iterations::kStrided) { + + // Move to the next K coordinate within the tile + pointer_ += params_.inc_next_strided; + + return *this; + } + iteration_strided_ = 0; + + return *this; + } + + /// Determines whether the Implicit GEMM can execute the given problem. + CUTLASS_HOST_DEVICE + static Status can_implement(Conv2dProblemSize const &problem_size) { + + // check alignment constraint on iterator's contiguous dimension + if (problem_size.C % AccessType::kElements) { + return Status::kErrorInvalidProblem; + } + + return Status::kSuccess; + } +}; + +///////////////////////////////////////////////////////////////////////////////////////////////// + // Conv2dDgradFilterTileAccessIteratorOptimized unity strided dgrad is more performant for dgrad // on problem sizes with stride = {1x1} template < diff --git a/include/cutlass/conv/threadblock/conv2d_dgrad_output_gradient_tile_access_iterator_analytic.h b/include/cutlass/conv/threadblock/conv2d_dgrad_output_gradient_tile_access_iterator_analytic.h index cb8410e4..8434b398 100644 --- a/include/cutlass/conv/threadblock/conv2d_dgrad_output_gradient_tile_access_iterator_analytic.h +++ b/include/cutlass/conv/threadblock/conv2d_dgrad_output_gradient_tile_access_iterator_analytic.h @@ -268,11 +268,13 @@ public: p += (conv_sign * (filter_r_ / problem_size_.stride_h)); q += (conv_sign * (filter_s_ / problem_size_.stride_w)); + int k = filter_k_ + iteration_vector_ * AccessType::kElements; + return TensorCoord( n, p, q, - filter_k_); + k); } @@ -286,7 +288,7 @@ public: coord.n() < problem_size_.N && coord.h() >= 0 && coord.h() < problem_size_.P && coord.w() >= 0 && coord.w() < problem_size_.Q && - (coord.c() + iteration_vector_ * AccessType::kElements) < problem_size_.K; + coord.c() < problem_size_.K; } /// Returns a pointer to the vector starting at the current coordinate @@ -296,7 +298,7 @@ public: TensorCoord coord = at(); LongIndex offset = params_.layout(coord); - return reinterpret_cast(pointer_ + offset * sizeof_bits::value / 8) + iteration_vector_; + return reinterpret_cast(pointer_ + offset * sizeof_bits::value / 8); } /// Increments to the next memory access @@ -313,6 +315,7 @@ public: return *this; } iteration_contiguous_ = 0; + ++iteration_strided_; if (iteration_strided_ < ThreadMap::Iterations::kStrided) { return *this; @@ -516,7 +519,9 @@ public: int p = (h + problem_size_.pad_h - r * problem_size_.dilation_h) / problem_size_.stride_h; int q = (w + problem_size_.pad_w - s * problem_size_.dilation_w) / problem_size_.stride_w; - return TensorCoord(n, p, q, filter_k_); + int k = filter_k_ + iteration_vector_ * AccessType::kElements; + + return TensorCoord(n, p, q, k); } @@ -529,7 +534,7 @@ public: return coord.n() < problem_size_.N && coord.h() >= 0 && coord.h() < problem_size_.P && coord.w() >= 0 && coord.w() < problem_size_.Q && - (coord.c() + iteration_vector_ * AccessType::kElements) < problem_size_.K; + coord.c() < problem_size_.K; } /// Returns a pointer to the vector starting at the current coordinate @@ -539,7 +544,7 @@ public: TensorCoord coord = at(); LongIndex offset = params_.layout(coord); - return reinterpret_cast(pointer_ + offset * sizeof_bits::value / 8) + iteration_vector_; + return reinterpret_cast(pointer_ + offset * sizeof_bits::value / 8); } /// Increments to the next memory access diff --git a/include/cutlass/conv/threadblock/conv2d_dgrad_output_gradient_tile_access_iterator_optimized.h b/include/cutlass/conv/threadblock/conv2d_dgrad_output_gradient_tile_access_iterator_optimized.h index ef916dde..99e72adb 100644 --- a/include/cutlass/conv/threadblock/conv2d_dgrad_output_gradient_tile_access_iterator_optimized.h +++ b/include/cutlass/conv/threadblock/conv2d_dgrad_output_gradient_tile_access_iterator_optimized.h @@ -67,6 +67,380 @@ template < class Conv2dDgradOutputGradientTileAccessIteratorOptimized; ///////////////////////////////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////////////////////////////// +// Conv2dDgradOutputGradientTileAccessIteratorOptimized strided dgrad needs special handling +// to skip MMAs (Dx = Dy * w) on invalid filter positions +///////////////////////////////////////////////////////////////////////////////////////////////// +template < + typename Shape_, + typename Element_, + typename ThreadMap_, + typename AccessType_ +> +class Conv2dDgradOutputGradientTileAccessIteratorOptimized < + Shape_, + Element_, + ThreadMap_, + conv::StrideSupport::kStrided, + AccessType_ +> { +public: + + // + // Types + // + using Shape = Shape_; + using Element = Element_; + using Layout = layout::TensorNHWC; + using ThreadMap = ThreadMap_; + using AccessType = AccessType_; + using TensorRef = cutlass::TensorRef; + using TensorCoord = typename Layout::TensorCoord; + using Index = typename Layout::Index; + using LongIndex = typename Layout::LongIndex; + static IteratorAlgorithm const kIteratorAlgorithm = conv::IteratorAlgorithm::kOptimized; + static StrideSupport const kStrideSupport = conv::StrideSupport::kStrided; + static int const kConvDim = 2; + using ConvProblemSize = typename conv::Conv2dProblemSize; + + static int const kAccessesPerVector = ThreadMap::kElementsPerAccess / AccessType::kElements; + + static_assert(!(ThreadMap::kElementsPerAccess % AccessType::kElements), + "Vectors implied by the thread map must be divisible by the access type."); + + using Mask = uint64_t; + + static_assert(sizeof_bits::value >= 8, + "DGRAD requires elements of size 8b or greater."); + + // + // Simpligying assertions + // + + static_assert(ThreadMap::Iterations::kContiguous == 1, + "Require Iterations::kContiguous == 1"); + + // + // Parameters structure + // + + using Params = Conv2dStridedDgradOutputGradientIteratorOptimizedParams; + +private: + + Params const ¶ms_; + Conv2dProblemSize const &problem_size_; + LongIndex iteration_contiguous_; + LongIndex iteration_strided_; + LongIndex iteration_vector_; + + // One pointer per access + char const *pointer_[ThreadMap::Iterations::kStrided]; + + int filter_k_; + int filter_r_; + int filter_s_; + int start_r_; + int start_s_; + int64_t reset_bytes_s_; + int64_t reset_bytes_r_; + + Index masks_[ThreadMap::Iterations::kStrided][kAccessesPerVector][2]; + +public: + + CUTLASS_HOST_DEVICE + Conv2dDgradOutputGradientTileAccessIteratorOptimized( + Params const ¶ms, + Conv2dProblemSize const &problem_size, + Element const *ptr, + int thread_idx, + FastDivmod const &stride_h_divmod, FastDivmod const &stride_w_divmod, + int start_r, int start_s, + MatrixCoord const &threadblock_offset = MatrixCoord() // threadblock offset - units are whole CTA tiles + ): + params_(params), + problem_size_(problem_size), + filter_k_(0), + filter_r_(start_r), + filter_s_(start_s), + start_r_(start_r), + start_s_(start_s) { + + layout::PitchLinearCoord thread_coord = ThreadMap::initial_offset(thread_idx); + + filter_k_ = threadblock_offset.column() + thread_coord.contiguous(); + + reset_bytes_s_ = (problem_size_.num_gemm_k_filter_s(start_s_) - 1) * params_.inc_next[0]; + + reset_bytes_r_ = (problem_size_.num_gemm_k_filter_s(start_s_) - 1) * params_.inc_next[0] + + (problem_size_.num_gemm_k_filter_r(start_r_) - 1) * params_.inc_next[1]; + + int offset_n[ThreadMap::Iterations::kStrided]; + int offset_p[ThreadMap::Iterations::kStrided]; + int offset_q[ThreadMap::Iterations::kStrided]; + + int filter_r = filter_r_; + int filter_s = filter_s_; + + if (problem_size_.mode == Mode::kConvolution) { + filter_r = (problem_size_.R - 1 - filter_r); + filter_s = (problem_size_.S - 1 - filter_s); + } + + // Starting h, w positions for filter position in gemm_k=0 + int start_h, start_w; + strided_dgrad_starting_coords( + problem_size_, + stride_h_divmod, stride_w_divmod, + filter_r, filter_s, + start_h, start_w); + + + // Effective starting P and Q for filter position required for remapping NHW rows + int P = (problem_size_.H - start_h + problem_size_.stride_h - 1) / problem_size_.stride_h; + int Q = (problem_size_.W - start_w + problem_size_.stride_w - 1) / problem_size_.stride_w; + + CUTLASS_PRAGMA_UNROLL + for (int s = 0; s < ThreadMap::Iterations::kStrided; ++s) { + + pointer_[s] = reinterpret_cast(ptr); + + int offset_npq = (threadblock_offset.row() + thread_coord.strided() + s * ThreadMap::Delta::kStrided) % params_.tiled_rows_per_filter; + + // (STEP 1) [reorder NHW rows to start with same filter positions] + offset_n[s] = offset_npq / (P * Q); + int residual = offset_npq % (P * Q); + + int p = (residual / Q); + int q = (residual % Q); + + int mapped_h = (start_h + p * problem_size_.stride_h); + int mapped_w = (start_w + q * problem_size_.stride_w); + + // Access (p, q) coordinates for Dy tensor for filter position in gemm_k=0 + // note that (h + pad_h - filter_r) and (w + pad_w - filter_s) are ensured to be + // divisible by stride_h and stride_w + offset_p[s] = (mapped_h + problem_size_.pad_h - filter_r) / problem_size_.stride_h; + offset_q[s] = (mapped_w + problem_size_.pad_w - filter_s) / problem_size_.stride_w; + + // Intialize pointers for gemm_k=0 + TensorCoord coord{offset_n[s], offset_p[s], offset_q[s], filter_k_}; + + pointer_[s] += params_.layout(coord) * sizeof_bits::value / 8; + } + + // + // Precompute mask predicates + // + clear_mask(); + + CUTLASS_PRAGMA_NO_UNROLL + for (int r = start_r; r < problem_size_.R; r += problem_size_.stride_h) { + CUTLASS_PRAGMA_UNROLL + for (int s_idx = 0; s_idx < ThreadMap::Iterations::kStrided; ++s_idx) { + + int p = offset_p[s_idx] ; + + p += (params_.conv_sign * (r / problem_size_.stride_h)); + + bool pred = (offset_n[s_idx] < problem_size_.N && p >= 0 && p < problem_size_.P); + + CUTLASS_PRAGMA_UNROLL + for (int v_idx = 0; v_idx < kAccessesPerVector; ++v_idx) { + masks_[s_idx][v_idx][0] |= (pred << r); + } + } + } + + CUTLASS_PRAGMA_NO_UNROLL + for(int s = start_s; s < problem_size_.S; s += problem_size_.stride_w) { + CUTLASS_PRAGMA_UNROLL + for (int s_idx = 0; s_idx < ThreadMap::Iterations::kStrided; ++s_idx) { + + int q = offset_q[s_idx]; + q += (params_.conv_sign * (s / problem_size_.stride_w)); + + bool pred = (q >=0 && q < problem_size_.Q); + + CUTLASS_PRAGMA_UNROLL + for (int v_idx = 0; v_idx < kAccessesPerVector; ++v_idx) { + masks_[s_idx][v_idx][1] |= (pred << s); + } + } + } + + CUTLASS_PRAGMA_UNROLL + for (int v_idx = 0; v_idx < kAccessesPerVector; ++v_idx) { + clear_mask(v_idx, (filter_k_ + v_idx * AccessType::kElements) >= problem_size.K); + } + + set_iteration_index(0); + } + + CUTLASS_HOST_DEVICE + static Params getParams(Conv2dProblemSize const &problem_size, Layout const &layout) { + return Params(problem_size, + layout, + sizeof_bits::value, + {Shape::kRow, Shape::kColumn}); + } + +private: + + /// Adds a pointer offset in units of element + CUTLASS_HOST_DEVICE + void add_byte_offset_(LongIndex byte_offset, LongIndex byte_reset = 0) { + + CUTLASS_PRAGMA_UNROLL + for (int s = 0; s < ThreadMap::Iterations::kStrided; ++s) { + pointer_[s] += byte_offset - byte_reset; + } + } + +public: + + /// Overrides the internal iteration index + CUTLASS_HOST_DEVICE + void set_iteration_index(Index index) { + iteration_vector_ = index % kAccessesPerVector; + int residual_access = index / kAccessesPerVector; + iteration_contiguous_ = residual_access % ThreadMap::Iterations::kContiguous; + iteration_strided_ = residual_access / ThreadMap::Iterations::kContiguous; + } + + /// Adds a pointer offset in units of Element + CUTLASS_HOST_DEVICE + void add_pointer_offset(LongIndex pointer_offset) { + add_byte_offset_(pointer_offset * sizeof_bits::value / 8); + } + + CUTLASS_HOST_DEVICE + void advance() { + + int next_idx = 0; + int64_t reset_bytes = 0; + + // Move filter_s by stride_w + filter_s_ += problem_size_.stride_w; + if (filter_s_ >= problem_size_.S) { + + // Restore filter_s + filter_s_ = start_s_; + + // Move filter_r by stride_h + filter_r_ += problem_size_.stride_h; + if (filter_r_ < problem_size_.R) { + + next_idx = 1; + + // Restore bytes in q coordinate (Mma in filter s dimenstion) + reset_bytes = reset_bytes_s_; + + } else { + + // Restore filter_r + filter_r_ = start_r_; + + next_idx = 2; + + // Restore bytes in p and q coordinate (Mma in filter s and r dimenstion) + reset_bytes = reset_bytes_r_; + } + } + + // offset pointers by offset_bytes + add_byte_offset_(params_.inc_next[next_idx] - reset_bytes); + + if (next_idx == 2) { + filter_k_ += params_.filter_k_delta; + } + + CUTLASS_PRAGMA_UNROLL + for (int v_idx = 0; v_idx < kAccessesPerVector; ++v_idx) { + clear_mask(v_idx, (filter_k_ + v_idx * AccessType::kElements) >= problem_size_.K); + } + } + + /// Clears the predicates + CUTLASS_HOST_DEVICE + void clear_mask(bool clear = true) { + CUTLASS_PRAGMA_UNROLL + for (int s = 0; s < ThreadMap::Iterations::kStrided; ++s) { + CUTLASS_PRAGMA_UNROLL + for (int v = 0; v < kAccessesPerVector; ++v) { + masks_[s][v][0] = clear ? Mask(0) : masks_[s][v][0]; + masks_[s][v][1] = clear ? Mask(0) : masks_[s][v][1]; + } + } + } + + /// Clears the predicates + CUTLASS_HOST_DEVICE + void clear_mask(int v, bool clear = true) { + CUTLASS_PRAGMA_UNROLL + for (int s = 0; s < ThreadMap::Iterations::kStrided; ++s) { + masks_[s][v][0] = clear ? Mask(0) : masks_[s][v][0]; + masks_[s][v][1] = clear ? Mask(0) : masks_[s][v][1]; + } + } + + /// Returns true if the current coordinate is within the output tensor Dy + CUTLASS_HOST_DEVICE + bool valid() const { + return + (masks_[iteration_strided_][iteration_vector_][0] & (Index(1) << filter_r_)) && + (masks_[iteration_strided_][iteration_vector_][1] & (Index(1) << filter_s_)); + } + + /// Returns a pointer to the vector starting at the current coordinate + CUTLASS_HOST_DEVICE + AccessType const *get() const { + + return reinterpret_cast(pointer_[iteration_strided_]) + iteration_vector_; + } + + /// Increments to the next memory access + CUTLASS_HOST_DEVICE + Conv2dDgradOutputGradientTileAccessIteratorOptimized &operator++() { + ++iteration_vector_; + if (iteration_vector_ < kAccessesPerVector) { + return *this; + } + iteration_vector_ = 0; + + ++iteration_contiguous_; + if (iteration_contiguous_ < ThreadMap::Iterations::kContiguous) { + return *this; + } + iteration_contiguous_ = 0; + ++iteration_strided_; + if (iteration_strided_ < ThreadMap::Iterations::kStrided) { + return *this; + } + iteration_strided_ = 0; + + return *this; + } + + /// Determines whether the Implicit GEMM can execute the given problem. + CUTLASS_HOST_DEVICE + static Status can_implement(Conv2dProblemSize const &problem_size) { + + // check alignment constraint on iterator's contiguous dimension + if (problem_size.K % AccessType::kElements) { + return Status::kErrorInvalidProblem; + } + + // Limit on filter size + if (problem_size.R > 32 || problem_size.S > 32) { + return Status::kErrorNotSupported; + } + + return Status::kSuccess; + } +}; + ///////////////////////////////////////////////////////////////////////////////////////////////// // Conv2dDgradOutputGradientTileAccessIteratorOptimized unity stride dgrad is optimized for dgrad // with problem stride = {1x1} diff --git a/include/cutlass/conv/threadblock/conv2d_fprop_activation_tile_access_iterator_analytic.h b/include/cutlass/conv/threadblock/conv2d_fprop_activation_tile_access_iterator_analytic.h index 3dba1a42..9b2296b7 100644 --- a/include/cutlass/conv/threadblock/conv2d_fprop_activation_tile_access_iterator_analytic.h +++ b/include/cutlass/conv/threadblock/conv2d_fprop_activation_tile_access_iterator_analytic.h @@ -209,7 +209,9 @@ public: int h = p * problem_size_.stride_h - problem_size_.pad_h + r * problem_size_.dilation_h; int w = q * problem_size_.stride_w - problem_size_.pad_w + s * problem_size_.dilation_w; - return TensorCoord(n, h, w, filter_c_); + int c = filter_c_ + iteration_vector_ * AccessType::kElements; + + return TensorCoord(n, h, w, c); } /// Returns true if the current coordinate is within the activations tensor X @@ -221,7 +223,7 @@ public: return coord.n() < problem_size_.N && coord.h() >= 0 && coord.h() < problem_size_.H && coord.w() >= 0 && coord.w() < problem_size_.W && - (coord.c() + iteration_vector_ * AccessType::kElements) < problem_size_.C; + coord.c() < problem_size_.C; } /// Returns a pointer to the vector starting at the current coordinate @@ -231,7 +233,7 @@ public: TensorCoord coord = at(); LongIndex offset = params_.layout(coord); - AccessType const *ptr = reinterpret_cast(pointer_ + offset * sizeof_bits::value / 8) + iteration_vector_; + AccessType const *ptr = reinterpret_cast(pointer_ + offset * sizeof_bits::value / 8); return ptr; } diff --git a/include/cutlass/conv/threadblock/conv2d_fprop_filter_tile_access_iterator_analytic.h b/include/cutlass/conv/threadblock/conv2d_fprop_filter_tile_access_iterator_analytic.h index f815b707..25b254f3 100644 --- a/include/cutlass/conv/threadblock/conv2d_fprop_filter_tile_access_iterator_analytic.h +++ b/include/cutlass/conv/threadblock/conv2d_fprop_filter_tile_access_iterator_analytic.h @@ -183,8 +183,9 @@ public: TensorCoord at() const { int k = offset_k_[iteration_strided_]; + int c = filter_c_ + iteration_vector_ * AccessType::kElements; - return TensorCoord(k, filter_r_, filter_s_, filter_c_); + return TensorCoord(k, filter_r_, filter_s_, c); } /// Returns true if the current coordinate is within the activations tensor W @@ -194,7 +195,7 @@ public: TensorCoord coord = at(); return coord.n() < problem_size_.K && - (coord.c() + iteration_vector_ * AccessType::kElements) < problem_size_.C; + coord.c() < problem_size_.C; } /// Returns a pointer to the vector starting at the current coordinate @@ -204,7 +205,7 @@ public: TensorCoord coord = at(); LongIndex offset = params_.layout(coord); - return reinterpret_cast(pointer_ + offset * sizeof_bits::value / 8) + iteration_vector_; + return reinterpret_cast(pointer_ + offset * sizeof_bits::value / 8); } /// Increments to the next memory access diff --git a/include/cutlass/conv/threadblock/conv2d_params.h b/include/cutlass/conv/threadblock/conv2d_params.h index 20a538d0..511504de 100644 --- a/include/cutlass/conv/threadblock/conv2d_params.h +++ b/include/cutlass/conv/threadblock/conv2d_params.h @@ -527,6 +527,64 @@ struct Conv2dDgradOutputGradientIteratorOptimizedParams { } }; +///////////////////////////////////////////////////////////////////////////////////////////////// +// Strided Dgrad Optimized Dy params (layout::TensorNHWC) +///////////////////////////////////////////////////////////////////////////////////////////////// +struct Conv2dStridedDgradOutputGradientIteratorOptimizedParams { + + using Layout = layout::TensorNHWC; + + Layout layout; + + int64_t inc_next[3]; // {next S, next R, next K} + + int filter_k_delta; // number of logical elements to add to filter_k_ + + int tiled_rows_per_filter; + + int conv_sign; + // + // Methods + // + + CUTLASS_HOST_DEVICE + Conv2dStridedDgradOutputGradientIteratorOptimizedParams() { } + + CUTLASS_HOST_DEVICE + Conv2dStridedDgradOutputGradientIteratorOptimizedParams( + Conv2dProblemSize const &problem_size, + Layout const &layout, ///< layout object + int element_size_bits, ///< size of each element in bits + MatrixCoord threadblock_shape + ): layout(layout) { + + int tile_m_per_filter = strided_dgrad_tile_m_per_filter(problem_size, threadblock_shape.row()); + + tiled_rows_per_filter = tile_m_per_filter * threadblock_shape.row(); + + conv_sign = (problem_size.mode == Mode::kConvolution ? 1 : -1); + + // next S + inc_next[0] = conv_sign * ( + layout.stride()[0] * problem_size.dilation_w + ) * element_size_bits / 8; + + // next R + inc_next[1] = conv_sign * ( + layout.stride()[1] * problem_size.dilation_h + ) * element_size_bits / 8; + + // next K + inc_next[2] = ( + threadblock_shape.column() * problem_size.split_k_slices + ) * element_size_bits / 8; + + // logical offset added to internal channel counter - units are elements, not bytes + filter_k_delta = threadblock_shape.column() * problem_size.split_k_slices; + } +}; +///////////////////////////////////////////////////////////////////////////////////////////////// + //////////////////////////////////////////////////////////////////////////////////////////////// // Dgrad Optimized w params (layout::TensorNHWC) ///////////////////////////////////////////////////////////////////////////////////////////////// @@ -584,6 +642,73 @@ struct Conv2dDgradFilterIteratorOptimizedParams { ///////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////////////////////// +// StridedDgrad Optimized w params (layout::TensorNHWC) +///////////////////////////////////////////////////////////////////////////////////////////////// +struct Conv2dStridedDgradFilterIteratorOptimizedParams { + + using Layout = layout::TensorNHWC; + + Layout layout; + int RS; + int filter_k_delta; + + int64_t inc_next_strided; // offset in units of bytes to next K coordinate within tile + int64_t inc_next[3]; // {next S, next R, next K} + int64_t reset_bytes; // offset in units of bytes to move back the pointer + // + // Methods + // + CUTLASS_HOST_DEVICE + Conv2dStridedDgradFilterIteratorOptimizedParams() { } + + CUTLASS_HOST_DEVICE + Conv2dStridedDgradFilterIteratorOptimizedParams( + Conv2dProblemSize const &problem_size, + Layout const &layout, + int element_size_bits, ///< size of each element in bits + MatrixCoord threadblock_shape, + int thread_count, + int access_size, + layout::PitchLinearCoord threadmap_iterations, + layout::PitchLinearCoord threadmap_delta + ): + layout(layout), RS(problem_size.R * problem_size.S) { + + TRACE_CONV_INITIALIZERS("conv2d_dgrad", "filter", + element_size_bits, threadblock_shape, thread_count, access_size, threadmap_iterations, threadmap_delta); + + inc_next_strided = (layout.stride()[2] * threadmap_delta.strided() * element_size_bits) / 8; + + // next S + inc_next[0] = + ( layout.stride()[0] * problem_size.stride_w + //- (threadmap_iterations.strided() - 1) * threadmap_delta.strided() * layout.stride()[2] + ) * element_size_bits / 8; + + // next R + inc_next[1] = + ( layout.stride()[1] * problem_size.stride_h + //- (threadmap_iterations.strided() - 1) * threadmap_delta.strided() * layout.stride()[2] + ) * element_size_bits / 8; + + // next K + inc_next[2] = + ( + threadblock_shape.row() * problem_size.split_k_slices * layout.stride()[2] + //- (problem_size.R * problem_size.S - 1) * layout.stride()[0] + //- (threadmap_iterations.strided() - 1) * threadmap_delta.strided() * layout.stride()[2] + ) * element_size_bits / 8; + + // offset in units of bytes to move the pointer in backward direction + reset_bytes = (threadmap_iterations.strided() - 1) * threadmap_delta.strided() * layout.stride()[2] + * element_size_bits / 8; + + filter_k_delta = threadblock_shape.row() * problem_size.split_k_slices; + } +}; +///////////////////////////////////////////////////////////////////////////////////////////////// + /// Parameters object for Conv2d WGRAD Output Gradient (dy) iterator struct Conv2dWgradOutputGradientIteratorOptimizedParams { diff --git a/include/cutlass/conv/threadblock/conv2d_wgrad_activation_tile_access_iterator_analytic.h b/include/cutlass/conv/threadblock/conv2d_wgrad_activation_tile_access_iterator_analytic.h index 901582e8..70d84187 100644 --- a/include/cutlass/conv/threadblock/conv2d_wgrad_activation_tile_access_iterator_analytic.h +++ b/include/cutlass/conv/threadblock/conv2d_wgrad_activation_tile_access_iterator_analytic.h @@ -183,10 +183,13 @@ public: int r, s, c; if (kAccessesPerVector == 1) { + /// One 128b aligned access fetching more than one element + c = filter_c_[iteration_contiguous_]; r = filter_r_[iteration_contiguous_]; s = filter_s_[iteration_contiguous_]; - c = filter_c_[iteration_contiguous_]; - } else { + } + else { + /// Multiple access to support non-128b alignment in contiguous dimenstion c = (filter_c_[iteration_contiguous_] + iteration_vector_ * AccessType::kElements) % problem_size_.C; int wrap_c = (filter_c_[iteration_contiguous_] + iteration_vector_ * AccessType::kElements) / problem_size_.C; s = (filter_s_[iteration_contiguous_] + wrap_c) % problem_size_.S; diff --git a/include/cutlass/conv/threadblock/conv2d_wgrad_activation_tile_access_iterator_optimized.h b/include/cutlass/conv/threadblock/conv2d_wgrad_activation_tile_access_iterator_optimized.h index cb96594b..c4e886a3 100644 --- a/include/cutlass/conv/threadblock/conv2d_wgrad_activation_tile_access_iterator_optimized.h +++ b/include/cutlass/conv/threadblock/conv2d_wgrad_activation_tile_access_iterator_optimized.h @@ -205,6 +205,8 @@ public: int c = filter_c_[iteration_contiguous_]; if (kAccessesPerVector > 1) { + // This code section is only to support non-128b alignment + // Multiple access to support non-128b alignment in contiguous dimenstion int wrap_c; params_.c_divmod(wrap_c, c, c + iteration_vector_ * AccessType::kElements); diff --git a/include/cutlass/conv/threadblock/conv2d_wgrad_output_gradient_tile_access_iterator_analytic.h b/include/cutlass/conv/threadblock/conv2d_wgrad_output_gradient_tile_access_iterator_analytic.h index e43bc534..c9e12973 100644 --- a/include/cutlass/conv/threadblock/conv2d_wgrad_output_gradient_tile_access_iterator_analytic.h +++ b/include/cutlass/conv/threadblock/conv2d_wgrad_output_gradient_tile_access_iterator_analytic.h @@ -182,7 +182,9 @@ public: int p = residual / problem_size_.Q; int q = residual % problem_size_.Q; - return TensorCoord(n, p, q, filter_k_[iteration_contiguous_]); + int k = filter_k_[iteration_contiguous_] + iteration_vector_ * AccessType::kElements; + + return TensorCoord(n, p, q, k); } @@ -194,7 +196,7 @@ public: return coord.n() < problem_size_.N && coord.h() < problem_size_.P && coord.w() < problem_size_.Q && - (coord.c() + iteration_vector_ * AccessType::kElements) < problem_size_.K; + coord.c() < problem_size_.K; } /// Returns a pointer to the vector starting at the current coordinate @@ -204,7 +206,7 @@ public: TensorCoord coord = at(); LongIndex offset = params_.layout(coord); - return reinterpret_cast(pointer_ + offset * sizeof_bits::value / 8) + iteration_vector_; + return reinterpret_cast(pointer_ + offset * sizeof_bits::value / 8); } /// Increments to the next memory access diff --git a/include/cutlass/epilogue/thread/activation.h b/include/cutlass/epilogue/thread/activation.h index 2baddbc0..6f53375c 100644 --- a/include/cutlass/epilogue/thread/activation.h +++ b/include/cutlass/epilogue/thread/activation.h @@ -192,6 +192,32 @@ struct GELU_taylor { } }; +template +struct GELU_taylor > { + static const bool kIsHeavy=true; + CUTLASS_HOST_DEVICE + Array operator()(Array const &z) const { + + using T = half_t; + Array y; + + half_t k0 = half_t(0.7978845608028654); + half_t k1 = half_t(0.044715); + + multiply_add> fma; + multiplies> mul; + plus> add; + + fast_tanh_op> tanh; + + Array u = mul(mul(k0, z), fma(mul(k1, z), z, cutlass::constants::one())); + + y = mul(mul(z, cutlass::constants::half()), add(cutlass::constants::one(), tanh(u))); + + return y; + } +}; + template struct GELU_taylor > { static const bool kIsHeavy=true; diff --git a/include/cutlass/epilogue/warp/tile_iterator_tensor_op_mixed.h b/include/cutlass/epilogue/warp/tile_iterator_tensor_op_mixed.h index a0fd92c8..4c22a420 100644 --- a/include/cutlass/epilogue/warp/tile_iterator_tensor_op_mixed.h +++ b/include/cutlass/epilogue/warp/tile_iterator_tensor_op_mixed.h @@ -234,8 +234,9 @@ public: if (WarpShape::kN == 64) { ptr = pointers_[n / 4]; } - -#else + else +#endif + { // This is the reference implementation int column_idx = warp_column_ + n * Detail::kLanesInQuad * Policy::kElementsPerAccess; int ptr_idx = ((column_idx * sizeof_bits::value) / 1024) % Detail::kPointerCount; @@ -252,7 +253,8 @@ public: else if (ptr_idx == 3) { ptr = pointers_[3 % Detail::kPointerCount]; } -#endif + } + int offset = n * Detail::kLanesInQuad + pointer_offset / Policy::kElementsPerAccess; diff --git a/include/cutlass/fast_math.h b/include/cutlass/fast_math.h index 15cf9d2d..d4ccf7de 100644 --- a/include/cutlass/fast_math.h +++ b/include/cutlass/fast_math.h @@ -34,6 +34,7 @@ #endif #include "cutlass/cutlass.h" +#include "cutlass/array.h" #include "cutlass/uint128.h" #include "cutlass/coord.h" #include "cutlass/numeric_types.h" @@ -724,7 +725,13 @@ double fast_log(double x) { CUTLASS_HOST_DEVICE float fast_tanh(float x) { #if defined(__CUDA_ARCH__) - return ::tanhf(x); + #if (__CUDACC_VER_MAJOR__ >= 11) && (__CUDA_ARCH__ >= 750) + float y; + asm volatile ( "tanh.approx.f32 %0, %1; " : "=f"(y) : "f"(x)); + return y; + #else + return ::tanhf(x); + #endif #else return std::tanh(x); #endif @@ -739,6 +746,74 @@ double fast_tanh(double x) { #endif } +CUTLASS_HOST_DEVICE +half_t fast_tanh(half_t x) { + #if defined(__CUDA_ARCH__) && (__CUDACC_VER_MAJOR__ >= 11) && (__CUDA_ARCH__ >= 750) + + asm volatile ( "tanh.approx.f16 %0, %1;" : "=h"(x.raw()) : "h"(x.raw())); + return x; + + #else + return half_t(fast_tanh(float(x))); + #endif +} + +///////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct fast_tanh_op { + CUTLASS_HOST_DEVICE + T operator()(T const &rhs) const { + return fast_tanh(rhs); + } +}; + +#if defined(__CUDA_ARCH__) && (__CUDACC_VER_MAJOR__ >= 11) && (__CUDA_ARCH__ >= 750) +template +struct fast_tanh_op> { + CUTLASS_DEVICE + Array operator()(Array const &rhs) const { + + Array result; + + // use x2 specialization + uint32_t const *in = reinterpret_cast(&rhs); + uint32_t *out = reinterpret_cast(&result); + + CUTLASS_PRAGMA_UNROLL + for (int i = 0; i < N / 2; ++i) { + asm volatile ("tanh.approx.f16x2 %0, %1;" : "=r"(out[i]) : "r"(in[i])); + } + + // residual + if (N % 2) { + uint16_t const *in = reinterpret_cast(&rhs); + uint16_t *out = reinterpret_cast(&result); + asm volatile ("tanh.approx.f16 %0, %1;" : "=h"(out[N - 1]) : "h"(in[N - 1])); + } + + return result; + } +}; +#endif // #if defined(__CUDA_ARCH__) + +template +struct fast_tanh_op> { + CUTLASS_HOST_DEVICE + Array operator()(Array const &rhs) const { + + fast_tanh_op fast_op; + Array y; + + CUTLASS_PRAGMA_UNROLL + for (int i = 0; i < N; ++i) { + y[i] = fast_op(rhs[i]); + } + + return y; + } +}; + ///////////////////////////////////////////////////////////////////////////////////////////////// } // namespace cutlass diff --git a/include/cutlass/gemm/kernel/default_gemm_with_k_reduction.h b/include/cutlass/gemm/kernel/default_gemm_with_k_reduction.h index 114351d1..e3d8c5a4 100644 --- a/include/cutlass/gemm/kernel/default_gemm_with_k_reduction.h +++ b/include/cutlass/gemm/kernel/default_gemm_with_k_reduction.h @@ -126,7 +126,7 @@ struct DefaultGemmWithKReduction { ThreadblockShape, typename Mma::Operator, kPartitionsK, EpilogueOutputOp, EpilogueOutputOp::kCount>::Epilogue; - /// Define the epilogue + /// Define the epilogue of the reduction vector using EpilogueGemmKReduction = typename cutlass::epilogue::threadblock::EpilogueGemmKReduction< ElementAccumulator, ElementC, ThreadblockShape, typename Mma::Operator, kReduceKForA>; diff --git a/include/cutlass/gemm/kernel/gemm_with_k_reduction.h b/include/cutlass/gemm/kernel/gemm_with_k_reduction.h index 35e61e74..1d376d2c 100644 --- a/include/cutlass/gemm/kernel/gemm_with_k_reduction.h +++ b/include/cutlass/gemm/kernel/gemm_with_k_reduction.h @@ -582,6 +582,13 @@ public: __threadfence(); } + // Execute the epilogue operator to update the destination tensor. + epilogue( + output_op, + iterator_D, + accumulators, + iterator_C); + if ((kReduceKForA && threadblock_tile_offset.n() == 0) || (!kReduceKForA && threadblock_tile_offset.m() == 0)) { @@ -610,14 +617,7 @@ public: && (threadblock_tile_offset.k() > 0)); } } - - // Execute the epilogue operator to update the destination tensor. - epilogue( - output_op, - iterator_D, - accumulators, - iterator_C); - + // // Release the semaphore // diff --git a/include/cutlass/gemm/threadblock/default_mma.h b/include/cutlass/gemm/threadblock/default_mma.h index 40b9b34b..d5af3aca 100644 --- a/include/cutlass/gemm/threadblock/default_mma.h +++ b/include/cutlass/gemm/threadblock/default_mma.h @@ -378,11 +378,21 @@ struct DefaultMma>::value, "simt epilogue must be row major"); + static cutlass::arch::CacheOperation::Kind const CacheOpA = + ((sizeof_bits::value * kAlignmentA) == 128) + ? cutlass::arch::CacheOperation::Global + : cutlass::arch::CacheOperation::Always; + + static cutlass::arch::CacheOperation::Kind const CacheOpB = + ((sizeof_bits::value * kAlignmentB) == 128) + ? cutlass::arch::CacheOperation::Global + : cutlass::arch::CacheOperation::Always; + // Define the MmaCore components using MmaCore = typename cutlass::gemm::threadblock::DefaultMmaCore< ThreadblockShape, WarpShape, InstructionShape, ElementA, LayoutA, ElementB, LayoutB, ElementAccumulator, LayoutC, arch::OpClassSimt, - Stages, Operator>; + Stages, Operator, false, CacheOpA, CacheOpB>; // Define iterators over tiles from the A operand using ThreadMapA = typename MmaCore::IteratorThreadMapA; diff --git a/include/cutlass/gemm/threadblock/default_mma_core_sm80.h b/include/cutlass/gemm/threadblock/default_mma_core_sm80.h index 221f5a8e..70b1764a 100644 --- a/include/cutlass/gemm/threadblock/default_mma_core_sm80.h +++ b/include/cutlass/gemm/threadblock/default_mma_core_sm80.h @@ -1111,8 +1111,8 @@ struct DefaultMmaCore< using ElementC = complex; using LayoutC = LayoutC_; static int const kStages = Stages; - static cutlass::arch::CacheOperation::Kind const kCacheOpA = cutlass::arch::CacheOperation::Always; - static cutlass::arch::CacheOperation::Kind const kCacheOpB = cutlass::arch::CacheOperation::Always; + static cutlass::arch::CacheOperation::Kind const kCacheOpA = cutlass::arch::CacheOperation::Global; + static cutlass::arch::CacheOperation::Kind const kCacheOpB = cutlass::arch::CacheOperation::Global; static const ComplexTransform TransformA = TransformA_; static const ComplexTransform TransformB = TransformB_; diff --git a/include/cutlass/gemm/threadblock/default_multistage_mma_complex.h b/include/cutlass/gemm/threadblock/default_multistage_mma_complex.h index bc73034e..870123b9 100644 --- a/include/cutlass/gemm/threadblock/default_multistage_mma_complex.h +++ b/include/cutlass/gemm/threadblock/default_multistage_mma_complex.h @@ -116,11 +116,22 @@ struct DefaultMultistageMmaComplex { + + static cutlass::arch::CacheOperation::Kind const CacheOpA = + (sizeof_bits::value == 128) + ? cutlass::arch::CacheOperation::Global + : cutlass::arch::CacheOperation::Always; + + static cutlass::arch::CacheOperation::Kind const CacheOpB = + (sizeof_bits::value == 128) + ? cutlass::arch::CacheOperation::Global + : cutlass::arch::CacheOperation::Always; + // Define the MmaCore components using MmaCore = typename cutlass::gemm::threadblock::DefaultMultistageMmaComplexCore< ThreadblockShape, WarpShape, InstructionShape, ElementA, LayoutA, ElementB, LayoutB, ElementAccumulator, layout::RowMajor, OperatorClass, - Stages, TransformA, TransformB, Operator>; + Stages, TransformA, TransformB, Operator, CacheOpA, CacheOpB>; // Define iterators over tiles from the A operand using ThreadMapA = typename MmaCore::IteratorThreadMapA; diff --git a/include/cutlass/gemm/threadblock/default_multistage_mma_complex_core_sm80.h b/include/cutlass/gemm/threadblock/default_multistage_mma_complex_core_sm80.h index d9a85394..266b7824 100644 --- a/include/cutlass/gemm/threadblock/default_multistage_mma_complex_core_sm80.h +++ b/include/cutlass/gemm/threadblock/default_multistage_mma_complex_core_sm80.h @@ -113,8 +113,8 @@ struct DefaultMultistageMmaComplexCore< static ComplexTransform const kTransformA = TransformA; static ComplexTransform const kTransformB = TransformB; using Operator = Operator_; - static cutlass::arch::CacheOperation::Kind const kCacheOpA = cutlass::arch::CacheOperation::Always; - static cutlass::arch::CacheOperation::Kind const kCacheOpB = cutlass::arch::CacheOperation::Always; + static cutlass::arch::CacheOperation::Kind const kCacheOpA = cutlass::arch::CacheOperation::Global; + static cutlass::arch::CacheOperation::Kind const kCacheOpB = cutlass::arch::CacheOperation::Global; /// Number of warps present using WarpCount = GemmShape +#include +#include +#include +#include + +#include "cutlass/cutlass.h" +#include "cutlass/layout/matrix.h" +#include "cutlass/conv/convolution.h" +#include "cutlass/conv/conv2d_problem_size.h" + +#include "cutlass/conv/conv3d_problem_size.h" +#include "cutlass/core_io.h" +#include "cutlass/util/tensor_view_io.h" + +#ifndef CUTLASS_TEST_ENABLE_CACHED_RESULTS +#define CUTLASS_TEST_ENABLE_CACHED_RESULTS false +#endif + +///////////////////////////////////////////////////////////////////////////////////////////////// + +namespace test { +namespace conv { +namespace device { + +///////////////////////////////////////////////////////////////////////////////////////////////// + +/// Result of a test +struct CachedTestKey { + + std::string op; ///< Concatenated string representation of operation performed + std::string problem; ///< Concatenated string representation of problem description + std::string types; ///< Concatenated string representation of operand types + uint32_t A; ///< Hashed result of tensor A + uint32_t B; ///< Hashed result of tensor B + uint32_t C; ///< Hashed result of tensor C + + // + // Methods + // + inline CachedTestKey(): A(), B(), C() { } + + inline CachedTestKey( + std::string op, ///< Concatenated string representation of operation performed + std::string problem, ///< Concatenated string representation of problem description + std::string types, ///< Concatenated string representation of operand types + uint32_t A, ///< Hashed result of tensor A + uint32_t B, ///< Hashed result of tensor B + uint32_t C ///< Hashed result of tensor C + ): + op(op), problem(problem), types(types), A(A), B(B), C(C) + { } + + /// Checks for equality of the problem + bool operator==(CachedTestKey const &rhs) const { + return op == rhs.op && problem == rhs.problem && types == rhs.types && A == rhs.A && B == rhs.B && C == rhs.C; + } +}; + +///////////////////////////////////////////////////////////////////////////////////////////////// + +inline std::istream &operator>>(std::istream &in, CachedTestKey &result) { + + in >> result.op; + in >> result.problem; + in >> result.types; + in >> result.A; + in >> result.B; + in >> result.C; + + return in; +} + +inline std::ostream &operator<<(std::ostream &out, CachedTestKey const &result) { + + out << result.op << " "; + out << result.problem << " "; + out << result.types << " "; + out << result.A << " "; + out << result.B << " "; + out << result.C << " "; + + return out; +} + +///////////////////////////////////////////////////////////////////////////////////////////////// + +struct CachedTestResult { + uint32_t D; + + // + // Methods + // + + CachedTestResult(): D() { } + + CachedTestResult(uint32_t D): D(D) { } + + operator bool() const { + return bool(D); + } +}; + +///////////////////////////////////////////////////////////////////////////////////////////////// + +inline std::istream &operator>>(std::istream &in, CachedTestResult &result) { + in >> result.D; + return in; +} + +inline std::ostream &operator<<(std::ostream &out, CachedTestResult const &result) { + out << result.D; + return out; +} + +///////////////////////////////////////////////////////////////////////////////////////////////// + +struct CachedTestResultListing { + + std::list> results; + + // + // Methods + // + + inline CachedTestResultListing(std::string const &path) { + std::ifstream file(path); + + while (file.good()) { + CachedTestKey key; + file >> key; + + CachedTestResult result; + file >> result; + + if (result) { + results.push_back(std::make_pair(key, result)); + } + } + } + + /// Returns the cached result + std::pair find(CachedTestKey const &rhs) const { + for (auto const & result : results) { + if (result.first == rhs) { + return std::make_pair(true, result.second); + } + } + return std::make_pair(false, CachedTestResult()); + } + + /// Appends an entry + void append(CachedTestKey const &key, CachedTestResult const &result) { + if (result) { + results.push_back(std::make_pair(key, result)); + } + } + + /// Writes the entire listing to a file + bool write(std::string const &path) { + std::ofstream file(path); + if (!file.good()) { + return false; + } + + for (auto const &result : results) { + file << result.first << result.second << std::endl; + } + + return true; + } +}; + +///////////////////////////////////////////////////////////////////////////////////////////////// + +template +struct ScalarEncoder { + Element scalar; + + ScalarEncoder(Element s): scalar(s) { } + + std::string str() const { + std::stringstream ss; + Element s = scalar; + if (s < Element()) { + s = -s; + ss << "n"; + } + ss << s; + return ss.str(); + } +}; + +template +ScalarEncoder EncodeScalar(Element a) { + return ScalarEncoder(a); +} + +template +struct ScalarEncoder> { + cutlass::complex scalar; + + ScalarEncoder(cutlass::complex s): scalar(s) { } + + std::string str() const { + std::stringstream ss; + ss << EncodeScalar(scalar.real()) << "_" << EncodeScalar(scalar.imag()) << "i"; + return ss.str(); + } +}; + +template +std::ostream &operator<<(std::ostream &out, ScalarEncoder const &scalar) { + out << scalar.str(); + return out; +} + +///////////////////////////////////////////////////////////////////////////////////////////////// + +inline char const *EncodeOperator(cutlass::conv::Operator conv_op) { + switch (conv_op) { + case cutlass::conv::Operator::kFprop: return "fprop"; + case cutlass::conv::Operator::kDgrad: return "dgrad"; + case cutlass::conv::Operator::kWgrad: return "wgrad"; + } + return "conv_unknown"; +} + +///////////////////////////////////////////////////////////////////////////////////////////////// + +// Encode GemmCoord (Gemm problem size) +inline std::ostream &EncodeProblemSize( + std::ostream &out, + cutlass::gemm::GemmCoord const &problem) { + + out << problem.m() << "x" << problem.n() << "x" << problem.k() << "_"; + + return out; +} + +///////////////////////////////////////////////////////////////////////////////////////////////// +// Encode Conv2dProblemSize +inline std::ostream &EncodeProblemSize( + std::ostream &out, + cutlass::conv::Conv2dProblemSize const &problem) { + + out << problem.N << "x" << problem.H << "x" << problem.W << "x" << problem.C << "_" + << problem.P << "x" << problem.Q << "_" << problem.K << "x" << problem.R << "x" << problem.S << "_"; + + out << "pad_h" << problem.pad_h << "w" << problem.pad_w << "_"; + out << "stride_h" << problem.stride_h << "w" << problem.stride_w << "_"; + out << "dil_h" << problem.dilation_h << "w" << problem.dilation_w << "_"; + + switch (problem.mode) { + case cutlass::conv::Mode::kCrossCorrelation: + out << "corr"; + break; + case cutlass::conv::Mode::kConvolution: + out << "conv"; + break; + } + + return out; +} + +///////////////////////////////////////////////////////////////////////////////////////////////// + +// Encode Conv3dProblemSize +inline std::ostream &EncodeProblemSize( + std::ostream &out, + cutlass::conv::Conv3dProblemSize const &problem) { + + out << problem.N << "x" << problem.D << "x" << problem.H << "x" << problem.W << "x" << problem.C << "_" + << problem.Z << problem.P << "x" << problem.Q << "_" << problem.K << "x" << problem.R << "x" << problem.S << "_"; + + out << "pad_d" << problem.pad_h << "h" << problem.pad_h << "w" << problem.pad_w << "_"; + out << "stride_d" << problem.stride_d << "h" << problem.stride_h << "w" << problem.stride_w << "_"; + out << "dil_d" << problem.dilation_d << "h" << problem.dilation_h << "w" << problem.dilation_w << "_"; + + switch (problem.mode) { + case cutlass::conv::Mode::kCrossCorrelation: + out << "corr"; + break; + case cutlass::conv::Mode::kConvolution: + out << "conv"; + break; + } + + return out; +} + +///////////////////////////////////////////////////////////////////////////////////////////////// + +template +inline std::string ElementTypeName() { + return std::string(typeid(Element).name()); +} + +template <> +inline std::string ElementTypeName() { + return "h"; +} + +template <> +inline std::string ElementTypeName>() { + return "ch"; +} + +template <> +inline std::string ElementTypeName() { + return "bf16"; +} + +template <> +inline std::string ElementTypeName>() { + return "cbf16"; +} + +template <> +inline std::string ElementTypeName() { + return "tf32"; +} + +template <> +inline std::string ElementTypeName>() { + return "ctf32"; +} + +template <> +inline std::string ElementTypeName>() { + return "c"; +} + +template <> +inline std::string ElementTypeName>() { + return "z"; +} + +template <> +inline std::string ElementTypeName>() { + return "q"; +} + +template <> +inline std::string ElementTypeName() { + return "s8"; +} + +template <> +inline std::string ElementTypeName() { + return "u8"; +} + +template <> +inline std::string ElementTypeName() { + return "s4"; +} + +template <> +inline std::string ElementTypeName() { + return "u4"; +} + +///////////////////////////////////////////////////////////////////////////////////////////////// + +template +inline std::string LayoutTypeName() { + return std::string(typeid(Layout).name()); +} + +template <> +inline std::string LayoutTypeName() { + return "n"; +} + +template <> +inline std::string LayoutTypeName() { + return "t"; +} + +template <> +inline std::string LayoutTypeName() { + return "nhwc"; +} + +template <> +inline std::string LayoutTypeName>() { + return "nc32hw32"; +} + +template <> +inline std::string LayoutTypeName>() { + return "nc64hw64"; +} + +template <> +inline std::string LayoutTypeName>() { + return "c32rsk32"; +} + +template <> +inline std::string LayoutTypeName>() { + return "c64rsk64"; +} + +template <> +inline std::string LayoutTypeName() { + return "ndhwc"; +} + +///////////////////////////////////////////////////////////////////////////////////////////////// + +template +inline std::string TensorTypeName() { + std::stringstream ss; + ss << ElementTypeName() << LayoutTypeName(); + return ss.str(); +} + +///////////////////////////////////////////////////////////////////////////////////////////////// + +/// Hash function on a byte array +struct CRC32 { + + uint32_t table[256]; + + // + // Methods + // + + CRC32() { + + uint32_t rem; + int i, j; + + for (i = 0; i < 256; i++) { + rem = i; + for (j = 0; j < 8; j++) { + if (rem & 1) { + rem >>= 1; + rem ^= 0xedb88320; + } else + rem >>= 1; + } + table[i] = rem; + } + } + + /// Computes the CRC of an array of bytes + uint32_t operator()(void const *start, size_t length, uint32_t crc = uint32_t()) const { + uint8_t const *p = static_cast(start); + uint8_t const *q = static_cast(start) + length; + + crc = ~crc; + + for (; p != q; ++p) { + uint8_t octet = *p; + crc = (crc >> 8) ^ table[(crc & 0xff) ^ octet]; + } + + return ~crc; + } +}; + +///////////////////////////////////////////////////////////////////////////////////////////////// + +template < + typename Element, typename Layout +> +uint32_t TensorHash( + cutlass::TensorView view, + CRC32 const &hash = CRC32(), + uint32_t crc = uint32_t() +) { + + return hash(view.data(), view.capacity() * cutlass::sizeof_bits::value / 8, crc); +} + +///////////////////////////////////////////////////////////////////////////////////////////////// + +template < + typename ElementA, typename LayoutA, + typename ElementB, typename LayoutB, + typename ElementC, typename LayoutC, + typename ElementAccumulator, + typename ElementCompute +> +inline std::ostream &EncodeTypes( + std::ostream &out +) { + + out << TensorTypeName() << "_" + << TensorTypeName() << "_" + << TensorTypeName() << "_" + << ElementTypeName() << "_" + << ElementTypeName(); + + return out; +} + +///////////////////////////////////////////////////////////////////////////////////////////////// + +template < + typename ElementA, typename LayoutA, + typename ElementB, typename LayoutB, + typename ElementC, typename LayoutC, + typename ElementAccumulator, + typename ElementCompute +> +inline CachedTestKey CreateCachedGemmTestKey( + cutlass::gemm::GemmCoord const &problem, + ElementCompute alpha, + ElementCompute beta, + cutlass::TensorView A, + cutlass::TensorView B, + cutlass::TensorView C +) { + + CachedTestKey key; + + // Encode gemm operator and problem sizes + key.op = "gemm"; + + std::stringstream ss_problem; + EncodeProblemSize(ss_problem, problem); + ss_problem << "_alpha" << EncodeScalar(alpha) << "_beta" << EncodeScalar(beta); + key.problem = ss_problem.str(); + + // Encode problem data types + std::stringstream ss_types; + EncodeTypes< + ElementA, LayoutA, + ElementB, LayoutB, + ElementC, LayoutC, + ElementAccumulator, + ElementCompute>(ss_types); + key.types = ss_types.str(); + + // Encode hash for problem data + CRC32 crc_hash; + key.A = TensorHash(A, crc_hash); + key.B = TensorHash(B, crc_hash); + key.C = TensorHash(C, crc_hash); + + return key; +} + +///////////////////////////////////////////////////////////////////////////////////////////////// + + +template < + typename ElementA, typename LayoutA, + typename ElementB, typename LayoutB, + typename ElementC, typename LayoutC, + typename ElementAccumulator, + typename ElementCompute +> +inline CachedTestKey CreateCachedConv2dTestKey( + + cutlass::conv::Operator conv_operator, + cutlass::conv::Conv2dProblemSize const &problem, + ElementCompute alpha, + ElementCompute beta, + cutlass::TensorView A, + cutlass::TensorView B, + cutlass::TensorView C +) { + + CachedTestKey key; + + // Encode conv2d operator and problem sizes + key.op = "conv2d"; + + std::stringstream ss_problem; + ss_problem << EncodeOperator(conv_operator) << "_"; + EncodeProblemSize(ss_problem, problem); + ss_problem << "_alpha" << EncodeScalar(alpha) << "_beta" << EncodeScalar(beta); + + key.problem = ss_problem.str(); + + // Encode problem data types + std::stringstream ss_types; + EncodeTypes< + ElementA, LayoutA, + ElementB, LayoutB, + ElementC, LayoutC, + ElementAccumulator, + ElementCompute>(ss_types); + key.types = ss_types.str(); + + // Encode hash for problem data + CRC32 crc_hash; + + key.A = TensorHash(A, crc_hash); + key.B = TensorHash(B, crc_hash); + key.C = TensorHash(C, crc_hash); + + return key; +} + +///////////////////////////////////////////////////////////////////////////////////////////////// + +template < + typename ElementA, typename LayoutA, + typename ElementB, typename LayoutB, + typename ElementC, typename LayoutC, + typename ElementAccumulator, + typename ElementCompute +> +inline CachedTestKey CreateCachedConv2dWithBroadcastTestKey( + + cutlass::conv::Operator conv_operator, + cutlass::conv::Conv2dProblemSize const &problem, + ElementCompute alpha, + ElementCompute beta, + cutlass::TensorView A, + cutlass::TensorView B, + cutlass::TensorView C +) { + + CachedTestKey key; + + // Encode conv2d operator and problem sizes + key.op = "conv2d_with_broadcast"; + + std::stringstream ss_problem; + ss_problem << EncodeOperator(conv_operator) << "_"; + EncodeProblemSize(ss_problem, problem); + ss_problem << "_alpha" << EncodeScalar(alpha) << "_beta" << EncodeScalar(beta); + + key.problem = ss_problem.str(); + + // Encode problem data types + std::stringstream ss_types; + EncodeTypes< + ElementA, LayoutA, + ElementB, LayoutB, + ElementC, LayoutC, + ElementAccumulator, + ElementCompute>(ss_types); + key.types = ss_types.str(); + + // Encode hash for problem data + CRC32 crc_hash; + + key.A = TensorHash(A, crc_hash); + key.B = TensorHash(B, crc_hash); + key.C = TensorHash(C, crc_hash); + + return key; +} + +///////////////////////////////////////////////////////////////////////////////////////////////// + +template < + typename ElementA, typename LayoutA, + typename ElementB, typename LayoutB, + typename ElementC, typename LayoutC, + typename ElementAccumulator, + typename ElementCompute +> +inline CachedTestKey CreateCachedConv2dWithReductionTestKey( + + cutlass::conv::Operator conv_operator, + cutlass::conv::Conv2dProblemSize const &problem, + ElementCompute alpha, + ElementCompute beta, + cutlass::TensorView A, + cutlass::TensorView B, + cutlass::TensorView C +) { + + CachedTestKey key; + + // Encode conv2d operator and problem sizes + key.op = "conv2d_with_reduction"; + + std::stringstream ss_problem; + ss_problem << EncodeOperator(conv_operator) << "_"; + EncodeProblemSize(ss_problem, problem); + ss_problem << "_alpha" << EncodeScalar(alpha) << "_beta" << EncodeScalar(beta); + + key.problem = ss_problem.str(); + + // Encode problem data types + std::stringstream ss_types; + EncodeTypes< + ElementA, LayoutA, + ElementB, LayoutB, + ElementC, LayoutC, + ElementAccumulator, + ElementCompute>(ss_types); + key.types = ss_types.str(); + + // Encode hash for problem data + CRC32 crc_hash; + + key.A = TensorHash(A, crc_hash); + key.B = TensorHash(B, crc_hash); + key.C = TensorHash(C, crc_hash); + + return key; +} + +///////////////////////////////////////////////////////////////////////////////////////////////// + +template < + typename ElementA, typename LayoutA, + typename ElementB, typename LayoutB, + typename ElementC, typename LayoutC, + typename ElementAccumulator, + typename ElementCompute +> +inline CachedTestKey CreateCachedConv3dTestKey( + cutlass::conv::Operator conv_operator, + cutlass::conv::Conv3dProblemSize const &problem, + ElementCompute alpha, + ElementCompute beta, + cutlass::TensorView A, + cutlass::TensorView B, + cutlass::TensorView C +) { + + CachedTestKey key; + + // Encode conv3d operator and problem sizes + key.op = "conv3d"; + + std::stringstream ss_problem; + + ss_problem << EncodeOperator(conv_operator) << "_"; + EncodeProblemSize(ss_problem, problem); + ss_problem << "_alpha" << EncodeScalar(alpha) << "_beta" << EncodeScalar(beta); + + key.problem = ss_problem.str(); + + // Encode problem data types + std::stringstream ss_types; + EncodeTypes< + ElementA, LayoutA, + ElementB, LayoutB, + ElementC, LayoutC, + ElementAccumulator, + ElementCompute>(ss_types); + key.types = ss_types.str(); + + // Encode problem data + CRC32 crc_hash; + key.A = TensorHash(A, crc_hash); + key.B = TensorHash(B, crc_hash); + key.C = TensorHash(C, crc_hash); + + return key; +} + +///////////////////////////////////////////////////////////////////////////////////////////////// + +} // namespace device +} // nammespace conv +} // namespace test + +///////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/test/unit/conv/device/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.cu b/test/unit/conv/device/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.cu index 04474f2f..579157e7 100644 --- a/test/unit/conv/device/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.cu +++ b/test/unit/conv/device/conv2d_fprop_implicit_gemm_f16nhwc_f16nhwc_f16nhwc_tensor_op_f16_sm80.cu @@ -170,6 +170,25 @@ TEST(SM80_Device_Conv2d_Fprop_Analytic_ImplicitGemm_f16nhwc_f16nhwc_f16nhwc_tens {1, 1} // dilation (dilation_h, dilation_w) )); + // run specific problem size in the unit test first + problem_size_list.push_back(cutlass::conv::Conv2dProblemSize( + {1, 4, 4, 14}, // input size (NHWC) + {8, 3, 3, 14}, // filter size (KRSC) + {0, 0, 0, 0}, // padding (pad_h, _, pad_w, _) + {3, 3}, // stride (stride_h, stride_w) + {1, 1} // dilation (dilation_h, dilation_w) + )); + + // run specific problem size in the unit test first + problem_size_list.push_back(cutlass::conv::Conv2dProblemSize( + {1, 23, 56, 98}, // input size (NHWC) + {128, 3, 3, 98}, // filter size (KRSC) + {4, 0, 5, 0}, // padding (pad_h, _, pad_w, _) + {3, 3}, // stride (stride_h, stride_w) + {1, 1} // dilation (dilation_h, dilation_w) + )); + + /// Run all unit test sizes with device-level Conv2d instance EXPECT_TRUE(test::conv::device::TestAllConv2d(problem_size_list)); } @@ -225,6 +244,25 @@ TEST(SM80_Device_Conv2d_Fprop_Optimized_ImplicitGemm_f16nhwc_f16nhwc_f16nhwc_ten {1, 1} // dilation (dilation_h, dilation_w) )); + // run specific problem size in the unit test first + problem_size_list.push_back(cutlass::conv::Conv2dProblemSize( + {1, 4, 4, 14}, // input size (NHWC) + {8, 3, 3, 14}, // filter size (KRSC) + {0, 0, 0, 0}, // padding (pad_h, _, pad_w, _) + {3, 3}, // stride (stride_h, stride_w) + {1, 1} // dilation (dilation_h, dilation_w) + )); + + // run specific problem size in the unit test first + problem_size_list.push_back(cutlass::conv::Conv2dProblemSize( + {1, 23, 56, 98}, // input size (NHWC) + {128, 3, 3, 98}, // filter size (KRSC) + {4, 0, 5, 0}, // padding (pad_h, _, pad_w, _) + {3, 3}, // stride (stride_h, stride_w) + {1, 1} // dilation (dilation_h, dilation_w) + )); + + /// Run all unit test sizes with device-level Conv2d instance EXPECT_TRUE(test::conv::device::TestAllConv2d(problem_size_list)); } @@ -280,6 +318,24 @@ TEST(SM80_Device_Conv2d_Fprop_Optimized_ImplicitGemm_f16nhwc_f16nhwc_f16nhwc_ten {1, 1} // dilation (dilation_h, dilation_w) )); + // run specific problem size in the unit test first + problem_size_list.push_back(cutlass::conv::Conv2dProblemSize( + {1, 4, 4, 28}, // input size (NHWC) + {8, 3, 3, 28}, // filter size (KRSC) + {0, 0, 0, 0}, // padding (pad_h, _, pad_w, _) + {3, 3}, // stride (stride_h, stride_w) + {1, 1} // dilation (dilation_h, dilation_w) + )); + + // run specific problem size in the unit test first + problem_size_list.push_back(cutlass::conv::Conv2dProblemSize( + {1, 23, 56, 100}, // input size (NHWC) + {128, 3, 3, 100}, // filter size (KRSC) + {4, 0, 5, 0}, // padding (pad_h, _, pad_w, _) + {3, 3}, // stride (stride_h, stride_w) + {1, 1} // dilation (dilation_h, dilation_w) + )); + /// Run all unit test sizes with device-level Conv2d instance EXPECT_TRUE(test::conv::device::TestAllConv2d(problem_size_list)); } diff --git a/test/unit/conv/device/conv2d_problems.h b/test/unit/conv/device/conv2d_problems.h index 3503ee31..4fef15c8 100644 --- a/test/unit/conv/device/conv2d_problems.h +++ b/test/unit/conv/device/conv2d_problems.h @@ -182,7 +182,7 @@ struct TestbedConv2dProblemSizes { )); conv2d_default_sizes.push_back(cutlass::conv::Conv2dProblemSize( - {1, 8, 8, minimum_channel_size}, // input size (NHWC) + {1, 7, 8, minimum_channel_size}, // input size (NHWC) {8, 3, 3, minimum_channel_size}, // filter size (KRSC) {1, 1, 1, 1}, // padding (pad_h, _, pad_w, _) {1, 1}, // stride (stride_h, stride_w) @@ -190,7 +190,7 @@ struct TestbedConv2dProblemSizes { )); conv2d_default_sizes.push_back(cutlass::conv::Conv2dProblemSize( - {1, 8, 8, minimum_channel_size}, // input size (NHWC) + {1, 7, 9, minimum_channel_size}, // input size (NHWC) {8, 4, 4, minimum_channel_size}, // filter size (KRSC) {1, 1, 1, 1}, // padding (pad_h, _, pad_w, _) {1, 1}, // stride (stride_h, stride_w) @@ -198,7 +198,7 @@ struct TestbedConv2dProblemSizes { )); conv2d_default_sizes.push_back(cutlass::conv::Conv2dProblemSize( - {1, 8, 8, minimum_channel_size}, // input size (NHWC) + {2, 7, 9, minimum_channel_size}, // input size (NHWC) {8, 5, 5, minimum_channel_size}, // filter size (KRSC) {1, 1, 1, 1}, // padding (pad_h, _, pad_w, _) {1, 1}, // stride (stride_h, stride_w) @@ -206,7 +206,7 @@ struct TestbedConv2dProblemSizes { )); conv2d_default_sizes.push_back(cutlass::conv::Conv2dProblemSize( - {1, 8, 8, minimum_channel_size}, // input size (NHWC) + {3, 7, 9, minimum_channel_size}, // input size (NHWC) {8, 6, 5, minimum_channel_size}, // filter size (KRSC) {1, 1, 1, 1}, // padding (pad_h, _, pad_w, _) {1, 1}, // stride (stride_h, stride_w) @@ -214,7 +214,7 @@ struct TestbedConv2dProblemSizes { )); conv2d_default_sizes.push_back(cutlass::conv::Conv2dProblemSize( - {1, 8, 8, minimum_channel_size}, // input size (NHWC) + {3, 7, 9, minimum_channel_size}, // input size (NHWC) {8, 6, 6, minimum_channel_size}, // filter size (KRSC) {1, 1, 1, 1}, // padding (pad_h, _, pad_w, _) {1, 1}, // stride (stride_h, stride_w) @@ -222,7 +222,7 @@ struct TestbedConv2dProblemSizes { )); conv2d_default_sizes.push_back(cutlass::conv::Conv2dProblemSize( - {1, 8, 8, minimum_channel_size}, // input size (NHWC) + {3, 7, 9, minimum_channel_size}, // input size (NHWC) {8, 7, 7, minimum_channel_size}, // filter size (KRSC) {1, 1, 1, 1}, // padding (pad_h, _, pad_w, _) {1, 1}, // stride (stride_h, stride_w) @@ -234,7 +234,7 @@ struct TestbedConv2dProblemSizes { // C < CTA::K and non-multiples of CTA::K. Typical CTA::K = {32, 64} //////////////////////////////////////////////////////////////////////////////////////////// conv2d_default_sizes.push_back(cutlass::conv::Conv2dProblemSize( - {1, 11, 11, minimum_channel_size}, // input size (NHWC) + {1, 11, 7, minimum_channel_size}, // input size (NHWC) {8, 1, 1, minimum_channel_size}, // filter size (KRSC) {0, 0, 0, 0}, // padding (pad_h, _, pad_w, _) {2, 2}, // stride (stride_h, stride_w) @@ -242,7 +242,7 @@ struct TestbedConv2dProblemSizes { )); conv2d_default_sizes.push_back(cutlass::conv::Conv2dProblemSize( - {1, 11, 11, minimum_channel_size}, // input size (NHWC) + {1, 11, 7, minimum_channel_size}, // input size (NHWC) {8, 3, 3, minimum_channel_size}, // filter size (KRSC) {1, 1, 1, 1}, // padding (pad_h, _, pad_w, _) {2, 2}, // stride (stride_h, stride_w) @@ -250,7 +250,7 @@ struct TestbedConv2dProblemSizes { )); conv2d_default_sizes.push_back(cutlass::conv::Conv2dProblemSize( - {1, 13, 13, minimum_channel_size}, // input size (NHWC) + {1, 13, 11, minimum_channel_size}, // input size (NHWC) {8, 1, 1, minimum_channel_size}, // filter size (KRSC) {1, 1, 1, 1}, // padding (pad_h, _, pad_w, _) {2, 2}, // stride (stride_h, stride_w) @@ -258,40 +258,40 @@ struct TestbedConv2dProblemSizes { )); conv2d_default_sizes.push_back(cutlass::conv::Conv2dProblemSize( - {1, 8, 8, minimum_channel_size}, // input size (NHWC) - {8, 2, 2, minimum_channel_size}, // filter size (KRSC) + {1, 17, 19, minimum_channel_size}, // input size (NHWC) + {16, 2, 2, minimum_channel_size}, // filter size (KRSC) {1, 1, 1, 1}, // padding (pad_h, _, pad_w, _) {2, 2}, // stride (stride_h, stride_w) {1, 1} // dilation (dilation_h, dilation_w) )); conv2d_default_sizes.push_back(cutlass::conv::Conv2dProblemSize( - {1, 5, 5, minimum_channel_size}, // input size (NHWC) - {8, 3, 3, minimum_channel_size}, // filter size (KRSC) + {1, 23, 5, minimum_channel_size}, // input size (NHWC) + {16, 3, 3, minimum_channel_size}, // filter size (KRSC) {1, 1, 1, 1}, // padding (pad_h, _, pad_w, _) {2, 2}, // stride (stride_h, stride_w) {1, 1} // dilation (dilation_h, dilation_w) )); conv2d_default_sizes.push_back(cutlass::conv::Conv2dProblemSize( - {1, 8, 8, 8}, // input size (NHWC) - {8, 3, 3, 8}, // filter size (KRSC) + {1, 13, 17, 8}, // input size (NHWC) + {24, 3, 3, 8}, // filter size (KRSC) {0, 0, 0, 0}, // padding (pad_h, _, pad_w, _) {2, 2}, // stride (stride_h, stride_w) {1, 1} // dilation (dilation_h, dilation_w) )); conv2d_default_sizes.push_back(cutlass::conv::Conv2dProblemSize( - {1, 8, 8, 8}, // input size (NHWC) - {8, 3, 3, 8}, // filter size (KRSC) + {1, 23, 21, 8}, // input size (NHWC) + {24, 3, 3, 8}, // filter size (KRSC) {1, 1, 1, 1}, // padding (pad_h, _, pad_w, _) {3, 3}, // stride (stride_h, stride_w) {1, 1} // dilation (dilation_h, dilation_w) )); conv2d_default_sizes.push_back(cutlass::conv::Conv2dProblemSize( - {1, 16, 16, 8}, // input size (NHWC) - {8, 3, 3, 8}, // filter size (KRSC) + {1, 20, 24, 8}, // input size (NHWC) + {40, 3, 3, 8}, // filter size (KRSC) {3, 3, 3, 3}, // padding (pad_h, _, pad_w, _) {3, 3}, // stride (stride_h, stride_w) {1, 1} // dilation (dilation_h, dilation_w) @@ -352,7 +352,7 @@ struct TestbedConv2dProblemSizes { )); conv2d_default_sizes.push_back(cutlass::conv::Conv2dProblemSize( - {1, 16, 16, 64 + minimum_channel_size}, // input size (NHWC) + {1, 16, 24, 64 + minimum_channel_size}, // input size (NHWC) {96, 3, 3, 64 + minimum_channel_size}, // filter size (KRSC) {1, 1, 1, 1}, // padding (pad_h, _, pad_w, _) {1, 1}, // stride (stride_h, stride_w) @@ -363,7 +363,7 @@ struct TestbedConv2dProblemSizes { // Medium input size, filter size (1x1, 3,x3, 5x5, 7x7), stride (2, 2) //////////////////////////////////////////////////////////////////////////////////// conv2d_default_sizes.push_back(cutlass::conv::Conv2dProblemSize( - {1, 16, 16, 288}, // input size (NHWC) + {1, 13, 16, 288}, // input size (NHWC) {160, 5, 5, 288}, // filter size (KRSC) {2, 2, 2, 2}, // padding (pad_h, _, pad_w, _) {2, 2}, // stride (stride_h, stride_w) @@ -371,7 +371,7 @@ struct TestbedConv2dProblemSizes { )); conv2d_default_sizes.push_back(cutlass::conv::Conv2dProblemSize( - {1, 55, 55, 256}, // input size (NHWC) + {1, 55, 51, 256}, // input size (NHWC) {512, 1, 1, 256}, // filter size (KRSC) {0, 0, 0, 0}, // padding (pad_h, _, pad_w, _) {2, 2}, // stride (stride_h, stride_w) @@ -379,7 +379,7 @@ struct TestbedConv2dProblemSizes { )); conv2d_default_sizes.push_back(cutlass::conv::Conv2dProblemSize( - {1, 80, 80, 32}, // input size (NHWC) + {1, 71, 80, 32}, // input size (NHWC) {64, 5, 5, 32}, // filter size (KRSC) {2, 2, 2, 2}, // padding (pad_h, _, pad_w, _) {2, 2}, // stride (stride_h, stride_w) @@ -398,7 +398,7 @@ struct TestbedConv2dProblemSizes { // Medium input size stride (3, 3), filter (3, 3), non-default padding //////////////////////////////////////////////////////////////////////////////////// conv2d_default_sizes.push_back(cutlass::conv::Conv2dProblemSize( - {1, 27, 27, 256}, // input size (NHWC) + {1, 27, 23, 256}, // input size (NHWC) {512, 3, 3, 256}, // filter size (KRSC) {0, 0, 0, 0}, // padding (pad_h, _, pad_w, _) {3, 3}, // stride (stride_h, stride_w) @@ -464,16 +464,16 @@ struct TestbedConv2dProblemSizes { )); conv2d_default_sizes.push_back(cutlass::conv::Conv2dProblemSize( - {32, 32, 32, 32}, // input size (NHWC) - {32, 1, 1, 32}, // filter size (KRSC) + {32, 24, 32, 32}, // input size (NHWC) + {32, 1, 2, 32}, // filter size (KRSC) {0, 0, 0, 0}, // padding (pad_h, _, pad_w, _) {1, 1}, // stride (stride_h, stride_w) {1, 1} // dilation (dilation_h, dilation_w) )); conv2d_default_sizes.push_back(cutlass::conv::Conv2dProblemSize( - {4, 3, 3, 128}, // input size (NHWC) - {256, 3, 3, 128}, // filter size (KRSC) + {4, 4, 5, 128}, // input size (NHWC) + {256, 3, 6, 128}, // filter size (KRSC) {0, 0, 0, 0}, // padding (pad_h, _, pad_w, _) {1, 1}, // stride (stride_h, stride_w) {1, 1}, // dilation (dilation_h, dilation_w) @@ -481,8 +481,8 @@ struct TestbedConv2dProblemSizes { )); conv2d_default_sizes.push_back(cutlass::conv::Conv2dProblemSize( - {4, 1, 1, 256}, // input size (NHWC) - {328, 3, 3, 256}, // filter size (KRSC) + {4, 2, 3, 256}, // input size (NHWC) + {328, 3, 5, 256}, // filter size (KRSC) {1, 1, 1, 1}, // padding (pad_h, _, pad_w, _) {1, 1}, // stride (stride_h, stride_w) {1, 1}, // dilation (dilation_h, dilation_w) diff --git a/test/unit/conv/device/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.cu b/test/unit/conv/device/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.cu index d8a3ea10..614a3cef 100644 --- a/test/unit/conv/device/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.cu +++ b/test/unit/conv/device/conv2d_strided_dgrad_implicit_gemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_sm80.cu @@ -231,4 +231,137 @@ TEST(SM80_Device_Conv2d_Strided_Dgrad_Analytic_ImplicitGemm_f16nhwc_f16nhwc_f32n } //////////////////////////////////////////////////////////////////////////////// + +//////////////////////////////////////////////////////////////////////////////// +// Strided Dgrad (Optimized) +//////////////////////////////////////////////////////////////////////////////// + +TEST(SM80_Device_Conv2d_Strided_Dgrad_Optimized_ImplicitGemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32, + 128x128_32x3_64x64x32) { + + /// Conv operation element types for the Gemm equivalent (ImplicitGemm) + using ElementA = cutlass::half_t; + using ElementB = cutlass::half_t; + using ElementC = float; + using ElementAccumulator = float; + using ElementCompute = float; + + /// Device-level Conv2d instance + using Conv2dDgradKernel = typename cutlass::conv::kernel::DefaultConv2dDgrad< + ElementA, cutlass::layout::TensorNHWC, + ElementB, cutlass::layout::TensorNHWC, + ElementC, cutlass::layout::TensorNHWC, + ElementAccumulator, + cutlass::arch::OpClassTensorOp, + cutlass::arch::Sm80, + cutlass::gemm::GemmShape<128, 128, 32>, + cutlass::gemm::GemmShape<64, 64, 32>, + cutlass::gemm::GemmShape<16, 8, 16>, + cutlass::epilogue::thread::LinearCombination< + ElementC, + 128 / cutlass::sizeof_bits::value, + ElementAccumulator, + ElementCompute + >, + cutlass::conv::threadblock::StridedDgradIdentityThreadblockSwizzle<>, + 3, + cutlass::arch::OpMultiplyAdd, + cutlass::conv::IteratorAlgorithm::kOptimized, + cutlass::conv::StrideSupport::kStrided + >::Kernel; + + using Conv2dDgrad = cutlass::conv::device::ImplicitGemmConvolution; + + + test::conv::device::Conv2dProblemVector problem_size_list; + +#if 0 // run specific problem size in the unit test first + problem_size_list.push_back(cutlass::conv::Conv2dProblemSize( + {1, 56, 56, 8}, // input size (NHWC) + {8, 1, 1, 8}, // filter size (KRSC) + {0, 0, 0, 0}, // padding (pad_h, _, pad_w, _) + {2, 2}, // stride (stride_h, stride_w) + {1, 1} // dilation (dilation_h, dilation_w) + )); + + problem_size_list.push_back(cutlass::conv::Conv2dProblemSize( + {1, 55, 55, 8}, // input size (NHWC) + {8, 1, 1, 8}, // filter size (KRSC) + {0, 0, 0, 0}, // padding (pad_h, _, pad_w, _) + {2, 2}, // stride (stride_h, stride_w) + {1, 1} // dilation (dilation_h, dilation_w) + )); + +#endif + + /// Run all unit test sizes with device-level Conv2d instance + EXPECT_TRUE(test::conv::device::TestAllConv2d(problem_size_list)); +} + +//////////////////////////////////////////////////////////////////////////////// + +TEST(SM80_Device_Conv2d_Strided_Dgrad_Optimized_ImplicitGemm_f16nhwc_f16nhwc_f32nhwc_tensor_op_f32_align4, + 128x128_32x3_64x64x32) { + + /// Conv operation element types for the Gemm equivalent (ImplicitGemm) + using ElementA = cutlass::half_t; + using ElementB = cutlass::half_t; + using ElementC = float; + using ElementAccumulator = float; + using ElementCompute = float; + + /// Device-level Conv2d instance + using Conv2dDgradKernel = typename cutlass::conv::kernel::DefaultConv2dDgrad< + ElementA, cutlass::layout::TensorNHWC, + ElementB, cutlass::layout::TensorNHWC, + ElementC, cutlass::layout::TensorNHWC, + ElementAccumulator, + cutlass::arch::OpClassTensorOp, + cutlass::arch::Sm80, + cutlass::gemm::GemmShape<128, 128, 32>, + cutlass::gemm::GemmShape<64, 64, 32>, + cutlass::gemm::GemmShape<16, 8, 16>, + cutlass::epilogue::thread::LinearCombination< + ElementC, + 4, + ElementAccumulator, + ElementCompute + >, + cutlass::conv::threadblock::StridedDgradIdentityThreadblockSwizzle<>, + 3, + cutlass::arch::OpMultiplyAdd, + cutlass::conv::IteratorAlgorithm::kOptimized, + cutlass::conv::StrideSupport::kStrided, + 4, + 4 + >::Kernel; + + using Conv2dDgrad = cutlass::conv::device::ImplicitGemmConvolution; + + + test::conv::device::Conv2dProblemVector problem_size_list; + + // run specific problem size in the unit test first + problem_size_list.push_back(cutlass::conv::Conv2dProblemSize( + {1, 56, 56, 12}, // input size (NHWC) + {8, 1, 1, 12}, // filter size (KRSC) + {0, 0, 0, 0}, // padding (pad_h, _, pad_w, _) + {2, 2}, // stride (stride_h, stride_w) + {1, 1} // dilation (dilation_h, dilation_w) + )); + + problem_size_list.push_back(cutlass::conv::Conv2dProblemSize( + {1, 55, 55, 12}, // input size (NHWC) + {8, 1, 1, 12}, // filter size (KRSC) + {0, 0, 0, 0}, // padding (pad_h, _, pad_w, _) + {2, 2}, // stride (stride_h, stride_w) + {1, 1} // dilation (dilation_h, dilation_w) + )); + + /// Run all unit test sizes with device-level Conv2d instance + EXPECT_TRUE(test::conv::device::TestAllConv2d(problem_size_list)); +} + +//////////////////////////////////////////////////////////////////////////////// + #endif // CUTLASS_ARCH_MMA_SM80_SUPPORTED diff --git a/test/unit/conv/device/conv2d_testbed.h b/test/unit/conv/device/conv2d_testbed.h index e2b41233..318db31a 100644 --- a/test/unit/conv/device/conv2d_testbed.h +++ b/test/unit/conv/device/conv2d_testbed.h @@ -47,6 +47,8 @@ #include "cutlass/core_io.h" #include "cutlass/util/tensor_view_io.h" +#include "cache_testbed_output.h" + namespace test { namespace conv { namespace device { @@ -116,7 +118,6 @@ public: cutlass::Distribution::Kind dist_kind, uint64_t seed) { -//cutlass::reference::host::TensorFill(view, Element(1.0f)); if (dist_kind == cutlass::Distribution::Uniform) { int scope; @@ -126,7 +127,12 @@ public: scope = 2; } else if (bits == 16) { - scope = 3; + if (cutlass::sizeof_bits::value <= 16) { + scope = 3; + } + else { + scope = 5; + } } else { scope = 8; @@ -137,6 +143,7 @@ public: else if (dist_kind == cutlass::Distribution::Identity) { cutlass::reference::host::TensorFillIdentity(view); + } else if (dist_kind == cutlass::Distribution::Gaussian) { @@ -321,6 +328,50 @@ public: tensor_D_computed.sync_host(); + // + // Reference check - support caching results + // + + CachedTestKey cached_test_key = CreateCachedConv2dTestKey< + ElementA, LayoutA, + ElementB, LayoutB, + ElementC, LayoutC, + ElementAccumulator, + ElementCompute + >( + kConvolutionalOperator, + problem_size, + alpha, + beta, + tensor_A.host_view(), + tensor_B.host_view(), + tensor_C.host_view() + ); + + // + // Look for the cached key + // + + bool cached_result_loaded = false; + CachedTestResult cached_test_result; + + std::string conv2d_result_cache_name = + std::string("cached_results_") + CUTLASS_TARGET_NAME + ".txt"; + + if (CUTLASS_TEST_ENABLE_CACHED_RESULTS) { + + CachedTestResultListing cached_results(conv2d_result_cache_name); + + auto cached = cached_results.find(cached_test_key); + + cached_result_loaded = cached.first; + if (cached_result_loaded) { + cached_test_result = cached.second; + } + } + + if (!cached_result_loaded) { + #if CUTLASS_CONV_TEST_UNIT_REFERENCE_DEVICE_ENABLED cutlass::reference::device::Conv2d< @@ -367,9 +418,32 @@ public: beta); #endif - passed = cutlass::reference::host::TensorEquals( - tensor_D_computed.host_view(), - tensor_D_reference.host_view()); + + if (CUTLASS_TEST_ENABLE_CACHED_RESULTS) { + + cached_test_result.D = TensorHash(tensor_D_reference.host_view()); + + CachedTestResultListing cached_results(conv2d_result_cache_name); + + cached_results.append(cached_test_key, cached_test_result); + cached_results.write(conv2d_result_cache_name); + } + } // if (!cached_result_loaded) + + uint32_t tensor_D_hash = TensorHash(tensor_D_computed.host_view()); + + if (CUTLASS_TEST_ENABLE_CACHED_RESULTS) { + passed = (tensor_D_hash == cached_test_result.D); + + EXPECT_EQ(tensor_D_hash, cached_test_result.D) + << "Hash-based comparison failed for key:" << "\n" << cached_test_key << "\n"; + } + else { + + passed = cutlass::reference::host::TensorEquals( + tensor_D_computed.host_view(), + tensor_D_reference.host_view()); + } EXPECT_TRUE(passed); @@ -416,9 +490,18 @@ public: results << "\nA:\n" << tensor_A.host_view() << "\n" << "\nB:\n" << tensor_B.host_view() << "\n" - << "\nC:\n" << tensor_C.host_view() << "\n" - << "\nD reference:\n" << tensor_D_reference.host_view() << "\n" - << "\nD computed:\n" << tensor_D_computed.host_view() << "\n"; + << "\nC:\n" << tensor_C.host_view() << "\n"; + + results << "\nD reference (hash: " << cached_test_result.D << ")\n"; + + if (!cached_result_loaded) { + results + << tensor_D_reference.host_view() << "\n"; + } + + results + << "\nD computed (hash: " << tensor_D_hash << ")\n" + << tensor_D_computed.host_view() << "\n"; } diff --git a/test/unit/conv/device/conv2d_testbed_interleaved.h b/test/unit/conv/device/conv2d_testbed_interleaved.h index 97020459..86e4278c 100644 --- a/test/unit/conv/device/conv2d_testbed_interleaved.h +++ b/test/unit/conv/device/conv2d_testbed_interleaved.h @@ -48,6 +48,8 @@ #include "cutlass/core_io.h" #include "cutlass/util/tensor_view_io.h" +#include "cache_testbed_output.h" + namespace test { namespace conv { namespace device { @@ -280,6 +282,50 @@ public: tensor_D_computed.sync_host(); + // + // Reference check - support caching results + // + + CachedTestKey cached_test_key = CreateCachedConv2dTestKey< + ElementA, LayoutA, + ElementB, LayoutB, + ElementC, LayoutC, + ElementAccumulator, + ElementCompute + >( + kConvolutionalOperator, + problem_size, + alpha, + beta, + tensor_A.host_view(), + tensor_B.host_view(), + tensor_C.host_view() + ); + + // + // Look for the cached key + // + + bool cached_result_loaded = false; + CachedTestResult cached_test_result; + + std::string conv2d_result_cache_name = + std::string("cached_results_") + CUTLASS_TARGET_NAME + ".txt"; + + if (CUTLASS_TEST_ENABLE_CACHED_RESULTS) { + + CachedTestResultListing cached_results(conv2d_result_cache_name); + + auto cached = cached_results.find(cached_test_key); + + cached_result_loaded = cached.first; + if (cached_result_loaded) { + cached_test_result = cached.second; + } + } + + if (!cached_result_loaded) { + #if CUTLASS_CONV_TEST_UNIT_REFERENCE_DEVICE_ENABLED cutlass::reference::device::Conv2d< @@ -332,9 +378,32 @@ public: beta); #endif - passed = cutlass::reference::host::TensorEquals( - tensor_D_computed.host_view(), - tensor_D_reference.host_view()); + + if (CUTLASS_TEST_ENABLE_CACHED_RESULTS) { + + cached_test_result.D = TensorHash(tensor_D_reference.host_view()); + + CachedTestResultListing cached_results(conv2d_result_cache_name); + + cached_results.append(cached_test_key, cached_test_result); + cached_results.write(conv2d_result_cache_name); + } + } // if (!cached_result_loaded) + + uint32_t tensor_D_hash = TensorHash(tensor_D_computed.host_view()); + + if (CUTLASS_TEST_ENABLE_CACHED_RESULTS) { + passed = (tensor_D_hash == cached_test_result.D); + + EXPECT_EQ(tensor_D_hash, cached_test_result.D) + << "Hash-based comparison failed for key:" << "\n" << cached_test_key << "\n"; + } + else { + + passed = cutlass::reference::host::TensorEquals( + tensor_D_computed.host_view(), + tensor_D_reference.host_view()); + } EXPECT_TRUE(passed); @@ -345,12 +414,12 @@ public: << (split_k_mode == cutlass::conv::SplitKMode::kSerial ? "serial_reduction_" : "parallel_reduction_") << (Conv2d::kConvolutionalOperator == cutlass::conv::Operator::kFprop ? "fprop_" : (Conv2d::kConvolutionalOperator == cutlass::conv::Operator::kDgrad ? "dgrad_" : "wgrad_")) - << "nhwc_" + << "ncxhwx_" << problem_size.N << "x" << problem_size.H << "x" << problem_size.W << "x" << problem_size.C - << "_krsc_" + << "_cxrskx_" << problem_size.K << "x" << problem_size.R << "x" << problem_size.S << "x" @@ -381,10 +450,18 @@ public: results << "\nA:\n" << tensor_A.host_view() << "\n" << "\nB:\n" << tensor_B.host_view() << "\n" - << "\nB_reordered =\n" << tensor_B_reordered.host_view() << "\n" - << "\nC:\n" << tensor_C.host_view() << "\n" - << "\nD reference:\n" << tensor_D_reference.host_view() << "\n" - << "\nD computed:\n" << tensor_D_computed.host_view() << "\n"; + << "\nC:\n" << tensor_C.host_view() << "\n"; + + results << "\nD reference (hash: " << cached_test_result.D << ")\n"; + + if (!cached_result_loaded) { + results + << tensor_D_reference.host_view() << "\n"; + } + + results + << "\nD computed (hash: " << tensor_D_hash << ")\n" + << tensor_D_computed.host_view() << "\n"; } diff --git a/test/unit/conv/device/conv2d_with_broadcast_testbed.h b/test/unit/conv/device/conv2d_with_broadcast_testbed.h index ab029f93..bb79a1cf 100644 --- a/test/unit/conv/device/conv2d_with_broadcast_testbed.h +++ b/test/unit/conv/device/conv2d_with_broadcast_testbed.h @@ -47,6 +47,8 @@ #include "cutlass/core_io.h" #include "cutlass/util/tensor_view_io.h" +#include "cache_testbed_output.h" + namespace test { namespace conv { namespace device { @@ -109,11 +111,17 @@ public: scope = 2; } else if (bits == 16) { - scope = 3; + if (cutlass::sizeof_bits::value <= 16) { + scope = 3; + } + else { + scope = 5; + } } else { scope = 8; } + cutlass::reference::host::TensorFillRandomUniform( view, seed, scope, -scope, 0); } @@ -263,6 +271,50 @@ public: tensor_D_computed.sync_host(); + // + // Reference check - support caching results + // + + CachedTestKey cached_test_key = CreateCachedConv2dWithBroadcastTestKey< + ElementA, LayoutA, + ElementB, LayoutB, + ElementC, LayoutC, + ElementAccumulator, + ElementCompute + >( + kConvolutionalOperator, + problem_size, + alpha, + beta, + tensor_A.host_view(), + tensor_B.host_view(), + tensor_C.host_view() + ); + + // + // Look for the cached key + // + + bool cached_result_loaded = false; + CachedTestResult cached_test_result; + + std::string conv2d_result_cache_name = + std::string("cached_results_") + CUTLASS_TARGET_NAME + ".txt"; + + if (CUTLASS_TEST_ENABLE_CACHED_RESULTS) { + + CachedTestResultListing cached_results(conv2d_result_cache_name); + + auto cached = cached_results.find(cached_test_key); + + cached_result_loaded = cached.first; + if (cached_result_loaded) { + cached_test_result = cached.second; + } + } + + if (!cached_result_loaded) { + #if CUTLASS_CONV_TEST_UNIT_REFERENCE_DEVICE_ENABLED cutlass::reference::device::Conv2d< @@ -309,9 +361,33 @@ public: beta); #endif - passed = cutlass::reference::host::TensorEquals( - tensor_D_computed.host_view(), - tensor_D_reference.host_view()); + + if (CUTLASS_TEST_ENABLE_CACHED_RESULTS) { + + cached_test_result.D = TensorHash(tensor_D_reference.host_view()); + + CachedTestResultListing cached_results(conv2d_result_cache_name); + + cached_results.append(cached_test_key, cached_test_result); + cached_results.write(conv2d_result_cache_name); + } + } // if (!cached_result_loaded) + + + uint32_t tensor_D_hash = TensorHash(tensor_D_computed.host_view()); + + if (CUTLASS_TEST_ENABLE_CACHED_RESULTS) { + passed = (tensor_D_hash == cached_test_result.D); + + EXPECT_EQ(tensor_D_hash, cached_test_result.D) + << "Hash-based comparison failed for key:" << "\n" << cached_test_key << "\n"; + } + else { + + passed = cutlass::reference::host::TensorEquals( + tensor_D_computed.host_view(), + tensor_D_reference.host_view()); + } EXPECT_TRUE(passed); diff --git a/test/unit/conv/device/conv2d_with_reduction_testbed.h b/test/unit/conv/device/conv2d_with_reduction_testbed.h index cf8e9bff..57b5d9ee 100644 --- a/test/unit/conv/device/conv2d_with_reduction_testbed.h +++ b/test/unit/conv/device/conv2d_with_reduction_testbed.h @@ -47,6 +47,8 @@ #include "cutlass/core_io.h" #include "cutlass/util/tensor_view_io.h" +#include "cache_testbed_output.h" + namespace test { namespace conv { namespace device { @@ -114,7 +116,12 @@ public: scope = 2; } else if (bits == 16) { - scope = 3; + if (cutlass::sizeof_bits::value <= 16) { + scope = 3; + } + else { + scope = 5; + } } else { scope = 8; @@ -280,6 +287,50 @@ public: tensor_D_computed.sync_host(); + // + // Reference check - support caching results + // + + CachedTestKey cached_test_key = CreateCachedConv2dWithReductionTestKey< + ElementA, LayoutA, + ElementB, LayoutB, + ElementC, LayoutC, + ElementAccumulator, + ElementCompute + >( + kConvolutionalOperator, + problem_size, + alpha, + beta, + tensor_A.host_view(), + tensor_B.host_view(), + tensor_C.host_view() + ); + + // + // Look for the cached key + // + + bool cached_result_loaded = false; + CachedTestResult cached_test_result; + + std::string conv2d_result_cache_name = + std::string("cached_results_") + CUTLASS_TARGET_NAME + ".txt"; + + if (CUTLASS_TEST_ENABLE_CACHED_RESULTS) { + + CachedTestResultListing cached_results(conv2d_result_cache_name); + + auto cached = cached_results.find(cached_test_key); + + cached_result_loaded = cached.first; + if (cached_result_loaded) { + cached_test_result = cached.second; + } + } + + if (!cached_result_loaded) { + #if CUTLASS_CONV_TEST_UNIT_REFERENCE_DEVICE_ENABLED cutlass::reference::device::Conv2d< @@ -326,9 +377,33 @@ public: beta); #endif - passed = cutlass::reference::host::TensorEquals( - tensor_D_computed.host_view(), - tensor_D_reference.host_view()); + + if (CUTLASS_TEST_ENABLE_CACHED_RESULTS) { + + cached_test_result.D = TensorHash(tensor_D_reference.host_view()); + + CachedTestResultListing cached_results(conv2d_result_cache_name); + + cached_results.append(cached_test_key, cached_test_result); + cached_results.write(conv2d_result_cache_name); + } + } // if (!cached_result_loaded) + + + uint32_t tensor_D_hash = TensorHash(tensor_D_computed.host_view()); + + if (CUTLASS_TEST_ENABLE_CACHED_RESULTS) { + passed = (tensor_D_hash == cached_test_result.D); + + EXPECT_EQ(tensor_D_hash, cached_test_result.D) + << "Hash-based comparison failed for key:" << "\n" << cached_test_key << "\n"; + } + else { + + passed = cutlass::reference::host::TensorEquals( + tensor_D_computed.host_view(), + tensor_D_reference.host_view()); + } EXPECT_TRUE(passed); diff --git a/test/unit/conv/device/conv3d_dgrad_implicit_gemm_tf32ndhwc_tf32ndhwc_f32ndhwc_tensor_op_f32_sm80.cu b/test/unit/conv/device/conv3d_dgrad_implicit_gemm_tf32ndhwc_tf32ndhwc_f32ndhwc_tensor_op_f32_sm80.cu index 86e06341..d8620613 100644 --- a/test/unit/conv/device/conv3d_dgrad_implicit_gemm_tf32ndhwc_tf32ndhwc_f32ndhwc_tensor_op_f32_sm80.cu +++ b/test/unit/conv/device/conv3d_dgrad_implicit_gemm_tf32ndhwc_tf32ndhwc_f32ndhwc_tensor_op_f32_sm80.cu @@ -67,7 +67,8 @@ TEST(SM80_Device_Conv3d_Dgrad_Analytic_ImplicitGemm_tf32ndhwc_tf32ndhwc_f32ndhwc >, cutlass::gemm::threadblock::GemmIdentityThreadblockSwizzle<>, 3, - cutlass::arch::OpMultiplyAdd + cutlass::arch::OpMultiplyAdd, + cutlass::conv::IteratorAlgorithm::kAnalytic >::Kernel; using Conv3dDgrad = cutlass::conv::device::ImplicitGemmConvolution; diff --git a/test/unit/conv/device/conv3d_testbed.h b/test/unit/conv/device/conv3d_testbed.h index b3a54691..5139c4b7 100644 --- a/test/unit/conv/device/conv3d_testbed.h +++ b/test/unit/conv/device/conv3d_testbed.h @@ -47,6 +47,8 @@ #include "conv3d_problems.h" #include "cutlass/core_io.h" +#include "cache_testbed_output.h" + namespace test { namespace conv { namespace device { @@ -321,6 +323,50 @@ public: tensor_D_computed.sync_host(); + // + // Reference check - support caching results + // + + CachedTestKey cached_test_key = CreateCachedConv3dTestKey< + ElementA, LayoutA, + ElementB, LayoutB, + ElementC, LayoutC, + ElementAccumulator, + ElementCompute + >( + kConvolutionalOperator, + problem_size, + alpha, + beta, + tensor_A.host_view(), + tensor_B.host_view(), + tensor_C.host_view() + ); + + // + // Look for the cached key + // + + bool cached_result_loaded = false; + CachedTestResult cached_test_result; + + std::string conv2d_result_cache_name = + std::string("cached_results_") + CUTLASS_TARGET_NAME + ".txt"; + + if (CUTLASS_TEST_ENABLE_CACHED_RESULTS) { + + CachedTestResultListing cached_results(conv2d_result_cache_name); + + auto cached = cached_results.find(cached_test_key); + + cached_result_loaded = cached.first; + if (cached_result_loaded) { + cached_test_result = cached.second; + } + } + + if (!cached_result_loaded) { + #if CUTLASS_CONV_TEST_UNIT_REFERENCE_DEVICE_ENABLED cutlass::reference::device::Conv3d< @@ -368,10 +414,32 @@ public: ); #endif - passed = cutlass::reference::host::TensorEquals( - tensor_D_computed.host_view(), - tensor_D_reference.host_view()); + if (CUTLASS_TEST_ENABLE_CACHED_RESULTS) { + cached_test_result.D = TensorHash(tensor_D_reference.host_view()); + + CachedTestResultListing cached_results(conv2d_result_cache_name); + + cached_results.append(cached_test_key, cached_test_result); + cached_results.write(conv2d_result_cache_name); + } + } // if (!cached_result_loaded) + + uint32_t tensor_D_hash = TensorHash(tensor_D_computed.host_view()); + + if (CUTLASS_TEST_ENABLE_CACHED_RESULTS) { + passed = (tensor_D_hash == cached_test_result.D); + + EXPECT_EQ(tensor_D_hash, cached_test_result.D) + << "Hash-based comparison failed for key:" << "\n" << cached_test_key << "\n"; + } + else { + + passed = cutlass::reference::host::TensorEquals( + tensor_D_computed.host_view(), + tensor_D_reference.host_view()); + } + EXPECT_TRUE(passed); if (!passed) { @@ -422,9 +490,19 @@ public: results << "\nA:\n" << tensor_A.host_view() << "\n" << "\nB:\n" << tensor_B.host_view() << "\n" - << "\nC:\n" << tensor_C.host_view() << "\n" - << "\nD reference:\n" << tensor_D_reference.host_view() << "\n" - << "\nD computed:\n" << tensor_D_computed.host_view() << "\n"; + << "\nC:\n" << tensor_C.host_view() << "\n"; + + + results << "\nD reference (hash: " << cached_test_result.D << ")\n"; + + if (!cached_result_loaded) { + results + << tensor_D_reference.host_view() << "\n"; + } + + results + << "\nD computed (hash: " << tensor_D_hash << ")\n" + << tensor_D_computed.host_view() << "\n"; } diff --git a/test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_simt.txt b/test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_simt.txt new file mode 100644 index 00000000..018f2caf --- /dev/null +++ b/test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_simt.txt @@ -0,0 +1,473 @@ +conv2d fprop_1x1x1x4_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1291629984 4250546017 486967919 3628559084 +conv2d fprop_1x1x1x4_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1291629984 4250546017 486967919 3628559084 +conv2d fprop_1x1x8x4_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3269355653 987911951 1919973277 3060288390 +conv2d fprop_1x1x8x4_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3269355653 987911951 1919973277 2970039207 +conv2d fprop_1x7x8x4_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3748422120 3644341259 1911021900 2231724975 +conv2d fprop_1x7x8x4_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3748422120 3644341259 1911021900 32368867 +conv2d fprop_1x7x9x4_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 472157622 1595526728 2963892063 3617758931 +conv2d fprop_1x7x9x4_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 472157622 1595526728 2963892063 1213867461 +conv2d fprop_2x7x9x4_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2555494614 224923131 3862463878 1032157693 +conv2d fprop_2x7x9x4_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2555494614 224923131 3862463878 2512484574 +conv2d fprop_3x7x9x4_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2311828961 1219404370 373331791 156090483 +conv2d fprop_3x7x9x4_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2311828961 1219404370 373331791 2125643627 +conv2d fprop_3x7x9x4_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2311828961 2400093115 2695215306 1183833128 +conv2d fprop_3x7x9x4_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2311828961 2400093115 2695215306 950863078 +conv2d fprop_3x7x9x4_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2311828961 1018998471 2132201986 1645211922 +conv2d fprop_3x7x9x4_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2311828961 1018998471 2132201986 888922489 +conv2d fprop_1x11x7x4_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2296142491 4250546017 1919973277 3242947060 +conv2d fprop_1x11x7x4_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2296142491 4250546017 1919973277 3242947060 +conv2d fprop_1x11x7x4_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2296142491 3644341259 1919973277 2570877648 +conv2d fprop_1x11x7x4_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2296142491 3644341259 1919973277 3601912269 +conv2d fprop_1x13x11x4_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1218117393 4250546017 1911021900 1777629475 +conv2d fprop_1x13x11x4_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1218117393 4250546017 1911021900 1777629475 +conv2d fprop_1x17x19x4_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3858018920 3830842503 5067603 2056042422 +conv2d fprop_1x17x19x4_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3858018920 3830842503 5067603 3581028648 +conv2d fprop_1x23x5x4_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3131990564 3914955377 2695215306 1284148249 +conv2d fprop_1x23x5x4_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3131990564 3914955377 2695215306 529645467 +conv2d fprop_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3975452619 4213557158 2381193183 3146807750 +conv2d fprop_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3975452619 4213557158 2381193183 3919362272 +conv2d fprop_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1463744869 4213557158 1038140209 2259020777 +conv2d fprop_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1463744869 4213557158 1038140209 2073872618 +conv2d fprop_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2629906455 3838546319 720746870 2382111738 +conv2d fprop_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2629906455 3838546319 720746870 1697335701 +conv2d fprop_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1212756742 4265921531 2290618404 3626933660 +conv2d fprop_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1212756742 4265921531 2290618404 3626933660 +conv2d fprop_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 241472534 468963831 2959096809 3337392869 +conv2d fprop_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 241472534 468963831 2959096809 1465273757 +conv2d fprop_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 796785810 2766914163 613201518 2545095524 +conv2d fprop_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 796785810 2766914163 613201518 119102005 +conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3566971919 3050624323 167104692 2075508652 +conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3566971919 3050624323 167104692 249956416 +conv2d fprop_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1307031978 1633048603 2557576548 825424571 +conv2d fprop_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1307031978 1633048603 2557576548 3723250582 +conv2d fprop_1x15x19x36_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 51486510 2467457616 1993107524 3917445532 +conv2d fprop_1x15x19x36_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 51486510 2467457616 1993107524 723144309 +conv2d fprop_1x16x24x68_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 806079124 1319256657 1041989393 4007092054 +conv2d fprop_1x16x24x68_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 806079124 1319256657 1041989393 121750431 +conv2d fprop_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2682525616 2411959438 1222599802 2705358355 +conv2d fprop_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2682525616 2411959438 1222599802 1921082534 +conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2113351655 1888208948 1454859747 2429064071 +conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2113351655 1888208948 1454859747 2429064071 +conv2d fprop_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2239997711 3765480640 1933240382 258792262 +conv2d fprop_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2239997711 3765480640 1933240382 1350961289 +conv2d fprop_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1124828340 632037627 1966904362 2674132654 +conv2d fprop_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1124828340 632037627 1966904362 4061823028 +conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3787124528 2799725917 3426919182 1513280885 +conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3787124528 2799725917 3426919182 3304279324 +conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2109521687 2799725917 4185476157 1062588307 +conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2109521687 2799725917 4185476157 1737400068 +conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1329557640 2685661941 623381239 3456180201 +conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1329557640 2685661941 623381239 612510019 +conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 4076621732 2799725917 748585111 1918078890 +conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 4076621732 2799725917 748585111 1296506717 +conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 4076621732 2799725917 748585111 720321141 +conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 4076621732 2799725917 748585111 2250020808 +conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3826850269 1181458623 3933470176 1216324411 +conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3826850269 1181458623 3933470176 1337638334 +conv2d fprop_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2161330459 2750022170 3879567407 2187352956 +conv2d fprop_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2161330459 2750022170 3879567407 3505172898 +conv2d fprop_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 61034797 3724098469 1954437668 3275330607 +conv2d fprop_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 61034797 3724098469 1954437668 3211342413 +conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2979706213 840940128 2268084635 2574304052 +conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2979706213 840940128 2268084635 959147198 +conv2d fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 933902974 3274942180 1217977439 1759317211 +conv2d fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 933902974 3274942180 1217977439 3077847987 +conv2d fprop_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 525054430 617641393 90427526 1404446496 +conv2d fprop_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 525054430 617641393 90427526 4261451781 +conv2d fprop_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2145363230 721318312 934864596 1375658938 +conv2d fprop_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2145363230 721318312 934864596 90410819 +conv2d fprop_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 fnhwc_fnhwc_fnhwc_f_f 3484291824 12477519 722450442 726679987 +conv2d fprop_1x1x1x2_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1291629984 4250546017 4259529924 973561880 +conv2d fprop_1x1x1x2_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1291629984 4250546017 4259529924 973561880 +conv2d fprop_1x1x8x2_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3269355653 987911951 2963892063 653870217 +conv2d fprop_1x1x8x2_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3269355653 987911951 2963892063 3381857140 +conv2d fprop_1x7x8x2_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3748422120 3644341259 2132626684 727104696 +conv2d fprop_1x7x8x2_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3748422120 3644341259 2132626684 857884834 +conv2d fprop_1x7x9x2_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 472157622 1595526728 1755127071 1773464230 +conv2d fprop_1x7x9x2_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 472157622 1595526728 1755127071 1800737809 +conv2d fprop_2x7x9x2_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2555494614 224923131 2778187426 1408209490 +conv2d fprop_2x7x9x2_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2555494614 224923131 2778187426 113009838 +conv2d fprop_3x7x9x2_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2311828961 1219404370 1038140209 1972128042 +conv2d fprop_3x7x9x2_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2311828961 1219404370 1038140209 740278031 +conv2d fprop_3x7x9x2_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2311828961 2400093115 2381193183 3189138716 +conv2d fprop_3x7x9x2_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2311828961 2400093115 2381193183 4117350218 +conv2d fprop_3x7x9x2_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2311828961 1018998471 123247934 3895648089 +conv2d fprop_3x7x9x2_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2311828961 1018998471 123247934 1801652807 +conv2d fprop_1x11x7x2_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2296142491 4250546017 2963892063 1354630 +conv2d fprop_1x11x7x2_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2296142491 4250546017 2963892063 1354630 +conv2d fprop_1x11x7x2_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2296142491 3644341259 2963892063 2794615982 +conv2d fprop_1x11x7x2_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2296142491 3644341259 2963892063 1407282247 +conv2d fprop_1x13x11x2_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1218117393 4250546017 2132626684 4169859068 +conv2d fprop_1x13x11x2_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1218117393 4250546017 2132626684 4169859068 +conv2d fprop_1x17x19x2_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3858018920 3830842503 3268227320 1293623633 +conv2d fprop_1x17x19x2_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3858018920 3830842503 3268227320 422114034 +conv2d fprop_1x23x5x2_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3131990564 3914955377 2381193183 3106056962 +conv2d fprop_1x23x5x2_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3131990564 3914955377 2381193183 3961289412 +conv2d fprop_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3187654522 2811231923 3002932066 332595516 +conv2d fprop_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3187654522 2811231923 3002932066 1038970344 +conv2d fprop_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 939350077 2811231923 3710122981 1390001842 +conv2d fprop_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 939350077 2811231923 3710122981 3516755647 +conv2d fprop_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1957958032 2306828385 341432232 1207552588 +conv2d fprop_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1957958032 2306828385 341432232 2056415706 +conv2d fprop_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 4005630370 105489309 212243307 3753643595 +conv2d fprop_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 4005630370 105489309 212243307 3753643595 +conv2d fprop_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 4148272483 1750065361 2212942576 1337682673 +conv2d fprop_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 4148272483 1750065361 2212942576 183620743 +conv2d fprop_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 617040073 350396557 284178850 3286712256 +conv2d fprop_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 617040073 350396557 284178850 2327714256 +conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2096370412 2318155471 835666052 4141289477 +conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2096370412 2318155471 835666052 2298284761 +conv2d fprop_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3496218347 2077244131 2929697040 1641694950 +conv2d fprop_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3496218347 2077244131 2929697040 1957029265 +conv2d fprop_1x15x19x34_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1926056930 1319256657 950258165 4217829985 +conv2d fprop_1x15x19x34_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1926056930 1319256657 950258165 1720028652 +conv2d fprop_1x16x24x66_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2388882552 2500591916 3967280674 2026555219 +conv2d fprop_1x16x24x66_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2388882552 2500591916 3967280674 2322971508 +conv2d fprop_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 683582756 1395357567 3996211431 3665284572 +conv2d fprop_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 683582756 1395357567 3996211431 201734166 +conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 982506873 1181458623 2289578760 3612707051 +conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 982506873 1181458623 2289578760 3612707051 +conv2d fprop_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2185005209 2219118660 1440004873 3463834206 +conv2d fprop_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2185005209 2219118660 1440004873 2512434087 +conv2d fprop_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 613238853 1728048860 3033436585 2458388362 +conv2d fprop_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 613238853 1728048860 3033436585 3613671804 +conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1261436340 2257098898 3140537372 3075173914 +conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1261436340 2257098898 3140537372 2600183629 +conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 254292619 2257098898 2668473724 3607778249 +conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 254292619 2257098898 2668473724 2808559748 +conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 772022222 4190341022 1138386226 449926041 +conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 772022222 4190341022 1138386226 1583119279 +conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3879922405 2257098898 4106573903 2990552295 +conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3879922405 2257098898 4106573903 760071194 +conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3879922405 2257098898 4106573903 2172129453 +conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3879922405 2257098898 4106573903 3098209529 +conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2751430422 3535384576 1178225662 264922690 +conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2751430422 3535384576 1178225662 638310695 +conv2d fprop_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 634689168 1558178565 3438778114 3770158048 +conv2d fprop_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 634689168 1558178565 3438778114 2838908779 +conv2d fprop_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1906326545 268218351 3556752290 4160523498 +conv2d fprop_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1906326545 268218351 3556752290 2865592304 +conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1481519788 2799725917 1070404771 3297631478 +conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1481519788 2799725917 1070404771 767059743 +conv2d fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1350987180 3287180835 87918186 1052120585 +conv2d fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1350987180 3287180835 87918186 1681505651 +conv2d fprop_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 864673475 2278914712 345256678 1262388200 +conv2d fprop_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 864673475 2278914712 345256678 301355370 +conv2d fprop_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2136677961 1230883571 535024815 2000875092 +conv2d fprop_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2136677961 1230883571 535024815 1840294626 +conv2d fprop_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_0i_beta2_0i cnhwc_cnhwc_cnhwc_c_c 958260586 595219593 900622494 3754107310 +conv2d dgrad_1x1x1x2_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3303215167 4250546017 3890556343 2998602688 +conv2d dgrad_1x1x1x2_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3303215167 4250546017 3890556343 2998602688 +conv2d dgrad_1x1x8x2_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1134921014 987911951 2980124304 2356394714 +conv2d dgrad_1x1x8x2_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1134921014 987911951 2980124304 4022777716 +conv2d dgrad_1x7x8x2_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 874639038 3644341259 802623080 2956519823 +conv2d dgrad_1x7x8x2_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 874639038 3644341259 802623080 2062861672 +conv2d dgrad_1x7x9x2_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3528407758 1595526728 2738672641 906097834 +conv2d dgrad_1x7x9x2_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3528407758 1595526728 2738672641 3836417944 +conv2d dgrad_2x7x9x2_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 639343303 224923131 311726682 1416127669 +conv2d dgrad_2x7x9x2_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 639343303 224923131 311726682 2471877698 +conv2d dgrad_3x7x9x2_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1075488054 1219404370 1146816700 3034531074 +conv2d dgrad_3x7x9x2_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1075488054 1219404370 1146816700 4066967351 +conv2d dgrad_3x7x9x2_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1325203442 2400093115 1146816700 693310298 +conv2d dgrad_3x7x9x2_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1325203442 2400093115 1146816700 3135924868 +conv2d dgrad_3x7x9x2_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 558171981 1018998471 1146816700 529330651 +conv2d dgrad_3x7x9x2_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 558171981 1018998471 1146816700 2005367170 +conv2d dgrad_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3068101001 105489309 1542732212 3932236040 +conv2d dgrad_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3068101001 105489309 1542732212 3932236040 +conv2d dgrad_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 4012287854 350396557 4055318830 3961661564 +conv2d dgrad_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 4012287854 350396557 4055318830 3131592715 +conv2d dgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1814966816 2318155471 2860596382 3615634589 +conv2d dgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1814966816 2318155471 2860596382 4068533991 +conv2d dgrad_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1112763679 2077244131 923239372 4185176358 +conv2d dgrad_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1112763679 2077244131 923239372 2760678679 +conv2d dgrad_1x15x19x34_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2216139934 1319256657 2148851182 4222196889 +conv2d dgrad_1x15x19x34_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2216139934 1319256657 2148851182 3594057123 +conv2d dgrad_1x16x24x66_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 859196912 2500591916 96801657 4090791495 +conv2d dgrad_1x16x24x66_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 859196912 2500591916 96801657 3335462877 +conv2d dgrad_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1655739745 268218351 1632053434 381883460 +conv2d dgrad_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1655739745 268218351 1632053434 2882340861 +conv2d dgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 178805580 2799725917 1965113500 2522976436 +conv2d dgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 178805580 2799725917 1965113500 733838654 +conv2d dgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 857551382 3287180835 3720443806 3464939495 +conv2d dgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 857551382 3287180835 3720443806 1609144169 +conv2d dgrad_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1587615602 2278914712 424519891 3221010146 +conv2d dgrad_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1587615602 2278914712 424519891 2669791026 +conv2d dgrad_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2145363230 1230883571 2549378996 2649498772 +conv2d dgrad_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2145363230 1230883571 2549378996 3402864639 +conv2d dgrad_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_0i_beta2_0i cnhwc_cnhwc_cnhwc_c_c 1762516385 595219593 918164635 3122213356 +conv2d wgrad_1x1x1x2_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3303215167 991930693 2980124304 2646402841 +conv2d wgrad_1x1x1x2_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3303215167 991930693 2980124304 2646402841 +conv2d wgrad_1x1x8x2_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1134921014 4250546017 294738383 2839064596 +conv2d wgrad_1x1x8x2_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1134921014 4250546017 294738383 126798540 +conv2d wgrad_1x7x8x2_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 874639038 1478369444 1588130777 3726346872 +conv2d wgrad_1x7x8x2_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 874639038 1478369444 1588130777 2751024512 +conv2d wgrad_1x7x9x2_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3528407758 3070204399 3036494114 2723605209 +conv2d wgrad_1x7x9x2_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3528407758 3070204399 3036494114 1320740478 +conv2d wgrad_2x7x9x2_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 639343303 2872812701 1239438433 3682382315 +conv2d wgrad_2x7x9x2_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 639343303 2872812701 1239438433 1302999924 +conv2d wgrad_3x7x9x2_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1075488054 3373210244 4207987780 1660673899 +conv2d wgrad_3x7x9x2_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1075488054 3373210244 4207987780 2714640979 +conv2d wgrad_3x7x9x2_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1325203442 3373210244 2381193183 1695826123 +conv2d wgrad_3x7x9x2_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1325203442 3373210244 2381193183 805511206 +conv2d wgrad_3x7x9x2_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 558171981 3373210244 2224388272 3988056388 +conv2d wgrad_3x7x9x2_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 558171981 3373210244 2224388272 3477078834 +conv2d wgrad_1x11x7x2_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1134921014 1219770266 2980124304 4226893215 +conv2d wgrad_1x11x7x2_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1134921014 1219770266 2980124304 4226893215 +conv2d wgrad_1x11x7x2_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1134921014 1219770266 1588130777 507860528 +conv2d wgrad_1x11x7x2_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1134921014 1219770266 1588130777 197947685 +conv2d wgrad_1x13x11x2_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 874639038 1458935684 2980124304 665196336 +conv2d wgrad_1x13x11x2_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 874639038 1458935684 2980124304 665196336 +conv2d wgrad_1x17x19x2_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 872104768 2927917918 3840288384 3199099046 +conv2d wgrad_1x17x19x2_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 872104768 2927917918 3840288384 4041124990 +conv2d wgrad_1x23x5x2_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1325203442 1018974017 2695215306 4046661096 +conv2d wgrad_1x23x5x2_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1325203442 1018974017 2695215306 3480351868 +conv2d wgrad_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 4145273678 2543470756 2731031538 2297420515 +conv2d wgrad_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 4145273678 2543470756 2731031538 1328037289 +conv2d wgrad_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3825093358 4205517202 2731031538 710476466 +conv2d wgrad_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3825093358 4205517202 2731031538 2536080878 +conv2d wgrad_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1971123277 747484694 1069984549 2363498191 +conv2d wgrad_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1971123277 747484694 1069984549 3086819073 +conv2d wgrad_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3068101001 2172673608 472330470 2004815062 +conv2d wgrad_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3068101001 2172673608 472330470 2004815062 +conv2d wgrad_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2733349579 4187797427 4010744414 1794084581 +conv2d wgrad_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2733349579 4187797427 4010744414 4114524808 +conv2d wgrad_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 4012287854 1017126775 3085573007 2255778450 +conv2d wgrad_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 4012287854 1017126775 3085573007 1202237741 +conv2d wgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1814966816 1352955925 3175962793 2510381785 +conv2d wgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1814966816 1352955925 3175962793 1158539581 +conv2d wgrad_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1112763679 3236296258 4126085678 2683550287 +conv2d wgrad_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1112763679 3236296258 4126085678 405842269 +conv2d wgrad_1x15x19x34_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2216139934 1340523375 995921099 1592723627 +conv2d wgrad_1x15x19x34_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2216139934 1340523375 995921099 309005134 +conv2d wgrad_1x16x24x66_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 859196912 1358310556 2323019560 542694758 +conv2d wgrad_1x16x24x66_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 859196912 1358310556 2323019560 2166847633 +conv2d wgrad_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2259629017 2475931707 1440799092 4221565216 +conv2d wgrad_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2259629017 2475931707 1440799092 3549830349 +conv2d wgrad_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3657056888 2153286596 2099843274 1726340662 +conv2d wgrad_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3657056888 2153286596 2099843274 1726340662 +conv2d wgrad_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1982243752 1023264400 3870616796 3807131300 +conv2d wgrad_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1982243752 1023264400 3870616796 1448894569 +conv2d wgrad_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2719672358 3720159962 198641006 877490804 +conv2d wgrad_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2719672358 3720159962 198641006 1819866268 +conv2d wgrad_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3808539997 3824664598 3887066342 1781681130 +conv2d wgrad_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 3808539997 3824664598 3887066342 568593004 +conv2d wgrad_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 365784208 397970234 3887066342 3219231066 +conv2d wgrad_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 365784208 397970234 3887066342 1952799079 +conv2d wgrad_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 649647965 4115158715 3687074625 2188687762 +conv2d wgrad_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 649647965 4115158715 3687074625 328038371 +conv2d wgrad_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2535440624 1855518467 3887066342 3554145843 +conv2d wgrad_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2535440624 1855518467 3887066342 629423006 +conv2d wgrad_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2535440624 1855518467 3887066342 4234955015 +conv2d wgrad_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2535440624 1855518467 3887066342 3741665130 +conv2d wgrad_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 575370052 296626169 3843555714 2515434981 +conv2d wgrad_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 575370052 296626169 3843555714 2997311698 +conv2d wgrad_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 933902974 1680322972 2268084635 947907958 +conv2d wgrad_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 933902974 1680322972 2268084635 1668453739 +conv2d wgrad_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1655739745 1407675647 3608965793 1545685731 +conv2d wgrad_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1655739745 1407675647 3608965793 1560811683 +conv2d wgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 178805580 468297304 2081693980 2353241153 +conv2d wgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 178805580 468297304 2081693980 3371496143 +conv2d wgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 857551382 4022024717 3526617126 2530989957 +conv2d wgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 857551382 4022024717 3526617126 1774437661 +conv2d wgrad_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1587615602 1932866802 3105677758 658488210 +conv2d wgrad_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 1587615602 1932866802 3105677758 3321302287 +conv2d wgrad_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_corr_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2145363230 3735060046 3040777726 2073220474 +conv2d wgrad_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_conv_alpha1_0i_beta0_0i cnhwc_cnhwc_cnhwc_c_c 2145363230 3735060046 3040777726 3382548517 +conv2d wgrad_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_0i_beta2_0i cnhwc_cnhwc_cnhwc_c_c 1762516385 36879495 1156311284 3310348499 +conv2d fprop_1x1x1x8_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2188357494 1769029067 159526285 825535581 +conv2d fprop_1x1x1x8_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2188357494 1769029067 159526285 825535581 +conv2d fprop_1x1x8x8_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1838714027 2265452184 2610359861 1987868270 +conv2d fprop_1x1x8x8_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1838714027 2265452184 2610359861 728956432 +conv2d fprop_1x7x8x8_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 702641729 3971681487 257610791 1008996088 +conv2d fprop_1x7x8x8_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 702641729 3971681487 257610791 2555315413 +conv2d fprop_1x7x9x8_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2318538354 3316437378 710632830 1863244686 +conv2d fprop_1x7x9x8_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2318538354 3316437378 710632830 3946357538 +conv2d fprop_2x7x9x8_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 672199864 3153681330 3307060297 2695612973 +conv2d fprop_2x7x9x8_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 672199864 3153681330 3307060297 2564657101 +conv2d fprop_3x7x9x8_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3966476684 2700620395 1556396665 2901584122 +conv2d fprop_3x7x9x8_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3966476684 2700620395 1556396665 3078683181 +conv2d fprop_3x7x9x8_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3966476684 3431458505 2293883251 1513830178 +conv2d fprop_3x7x9x8_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3966476684 3431458505 2293883251 1456315771 +conv2d fprop_3x7x9x8_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3966476684 728619567 2638743394 4157930574 +conv2d fprop_3x7x9x8_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3966476684 728619567 2638743394 4280729276 +conv2d fprop_1x11x7x8_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2760870769 1769029067 2610359861 829068145 +conv2d fprop_1x11x7x8_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2760870769 1769029067 2610359861 829068145 +conv2d fprop_1x11x7x8_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2760870769 3971681487 2610359861 3175191801 +conv2d fprop_1x11x7x8_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2760870769 3971681487 2610359861 752965501 +conv2d fprop_1x13x11x8_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1483240777 1769029067 257610791 1872463282 +conv2d fprop_1x13x11x8_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1483240777 1769029067 257610791 1872463282 +conv2d fprop_1x17x19x8_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3878803000 1547628165 652315301 3236250595 +conv2d fprop_1x17x19x8_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3878803000 1547628165 652315301 115169690 +conv2d fprop_1x23x5x8_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3474690180 3245600707 2293883251 1373982557 +conv2d fprop_1x23x5x8_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3474690180 3245600707 2293883251 3038299705 +conv2d fprop_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 876572357 663101280 1784121575 3650717238 +conv2d fprop_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 876572357 663101280 1784121575 1844414540 +conv2d fprop_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2926249914 663101280 1238102800 940361331 +conv2d fprop_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2926249914 663101280 1238102800 1574520323 +conv2d fprop_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 568740975 3647824478 1001821658 763311327 +conv2d fprop_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 568740975 3647824478 1001821658 3755275557 +conv2d fprop_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3351802863 889731117 1862926968 1133541197 +conv2d fprop_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3351802863 889731117 1862926968 1133541197 +conv2d fprop_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3680916718 2931605166 1380790217 1603771059 +conv2d fprop_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3680916718 2931605166 1380790217 1384953419 +conv2d fprop_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 865676658 3292777216 2570824137 1716239002 +conv2d fprop_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 865676658 3292777216 2570824137 1207507795 +conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3471311976 3069685598 1501287327 2020885449 +conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3471311976 3069685598 1501287327 3540106320 +conv2d fprop_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2012717243 4183087394 1158072871 2770857658 +conv2d fprop_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2012717243 4183087394 1158072871 2753556152 +conv2d fprop_1x15x19x40_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2639488807 1127821716 1175487408 1443444693 +conv2d fprop_1x15x19x40_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2639488807 1127821716 1175487408 2615818500 +conv2d fprop_1x16x24x72_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 4005424028 560055529 3924773317 1674063910 +conv2d fprop_1x16x24x72_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 4005424028 560055529 3924773317 731082069 +conv2d fprop_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2975308389 3388564462 540151004 2747507935 +conv2d fprop_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2975308389 3388564462 540151004 916078700 +conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1609667174 3347123934 1942443225 927150176 +conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1609667174 3347123934 1942443225 927150176 +conv2d fprop_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1910152888 4279006666 634623721 1857297775 +conv2d fprop_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1910152888 4279006666 634623721 3617909140 +conv2d fprop_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3020034528 2354621402 3496282292 1893593581 +conv2d fprop_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3020034528 2354621402 3496282292 4168452665 +conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2432565675 683959930 1993750394 960818057 +conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2432565675 683959930 1993750394 3831768945 +conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1094435461 683959930 1546760441 3943198524 +conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1094435461 683959930 1546760441 297674396 +conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1881914243 1677559851 2710651669 1457186301 +conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1881914243 1677559851 2710651669 641199618 +conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3323210016 683959930 3090812263 1219910572 +conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3323210016 683959930 3090812263 2513734798 +conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3323210016 683959930 3090812263 2022987417 +conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3323210016 683959930 3090812263 1691115924 +conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3831097363 1912090452 140813403 3043290146 +conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3831097363 1912090452 140813403 2921616453 +conv2d fprop_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1244925287 3609240260 3715463477 2323185695 +conv2d fprop_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1244925287 3609240260 3715463477 1672551923 +conv2d fprop_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3026255423 62170551 337645063 913747685 +conv2d fprop_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3026255423 62170551 337645063 778811608 +conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3054241601 2539714930 725343835 1284495313 +conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3054241601 2539714930 725343835 2804300956 +conv2d fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3459414189 2300049969 3590051376 3773230297 +conv2d fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3459414189 2300049969 3590051376 3201976695 +conv2d fprop_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 869473545 972211497 126335800 745587578 +conv2d fprop_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 869473545 972211497 126335800 3427184805 +conv2d fprop_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3759545138 698995736 2260090497 603560103 +conv2d fprop_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3759545138 698995736 2260090497 3440870263 +conv2d fprop_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 hnhwc_hnhwc_hnhwc_h_h 1541569966 3683140111 4057089521 1550530307 +conv2d dgrad_1x1x1x4_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 135000378 4250546017 3890556343 3840696830 +conv2d dgrad_1x1x1x4_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 135000378 4250546017 3890556343 3840696830 +conv2d dgrad_1x1x8x4_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1501031342 987911951 2980124304 2572054323 +conv2d dgrad_1x1x8x4_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1501031342 987911951 2980124304 4006425566 +conv2d dgrad_1x7x8x4_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1504270762 3644341259 802623080 1655787066 +conv2d dgrad_1x7x8x4_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1504270762 3644341259 802623080 2956350850 +conv2d dgrad_1x7x9x4_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1134921014 1595526728 2738672641 962312341 +conv2d dgrad_1x7x9x4_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1134921014 1595526728 2738672641 3126435132 +conv2d dgrad_2x7x9x4_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1113301030 224923131 311726682 4061102478 +conv2d dgrad_2x7x9x4_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1113301030 224923131 311726682 2314369199 +conv2d dgrad_3x7x9x4_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1431058757 1219404370 1146816700 3996872848 +conv2d dgrad_3x7x9x4_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1431058757 1219404370 1146816700 2269688958 +conv2d dgrad_3x7x9x4_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2568212047 2400093115 1146816700 1719436727 +conv2d dgrad_3x7x9x4_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2568212047 2400093115 1146816700 4236972782 +conv2d dgrad_3x7x9x4_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2133636964 1018998471 1146816700 1307853167 +conv2d dgrad_3x7x9x4_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2133636964 1018998471 1146816700 469937848 +conv2d dgrad_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1126681227 4265921531 1307969125 2041137857 +conv2d dgrad_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1126681227 4265921531 1307969125 2041137857 +conv2d dgrad_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3363792845 2766914163 1541598000 562462849 +conv2d dgrad_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3363792845 2766914163 1541598000 585918261 +conv2d dgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2017514189 3050624323 2813830197 3551637995 +conv2d dgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2017514189 3050624323 2813830197 2284323213 +conv2d dgrad_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2136362147 1633048603 2983167436 866644788 +conv2d dgrad_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2136362147 1633048603 2983167436 253066315 +conv2d dgrad_1x15x19x36_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 784928777 2467457616 1092494080 274183605 +conv2d dgrad_1x15x19x36_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 784928777 2467457616 1092494080 1241514327 +conv2d dgrad_1x16x24x68_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1928121195 1319256657 3541920582 2018009151 +conv2d dgrad_1x16x24x68_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1928121195 1319256657 3541920582 1303918704 +conv2d dgrad_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1089703540 3724098469 2351024902 941952011 +conv2d dgrad_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1089703540 3724098469 2351024902 3577527602 +conv2d dgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1527509174 840940128 2574042419 2518679986 +conv2d dgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1527509174 840940128 2574042419 3797768014 +conv2d dgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3153833039 3274942180 3438778114 508409341 +conv2d dgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3153833039 3274942180 3438778114 3918768286 +conv2d dgrad_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 805200720 617641393 2145192778 3649177283 +conv2d dgrad_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 805200720 617641393 2145192778 2392118609 +conv2d dgrad_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3916058745 721318312 535024815 4039199216 +conv2d dgrad_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3916058745 721318312 535024815 2891577765 +conv2d dgrad_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 fnhwc_fnhwc_fnhwc_f_f 3160120111 12477519 1682007569 2212069629 +conv2d wgrad_1x1x1x4_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 135000378 991930693 2980124304 2169312899 +conv2d wgrad_1x1x1x4_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 135000378 991930693 2980124304 2169312899 +conv2d wgrad_1x1x8x4_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1501031342 4250546017 294738383 1067638259 +conv2d wgrad_1x1x8x4_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1501031342 4250546017 294738383 504959928 +conv2d wgrad_1x7x8x4_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1504270762 1478369444 1588130777 587511439 +conv2d wgrad_1x7x8x4_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1504270762 1478369444 1588130777 3767308084 +conv2d wgrad_1x7x9x4_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1134921014 3070204399 3036494114 4124366680 +conv2d wgrad_1x7x9x4_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1134921014 3070204399 3036494114 2833785422 +conv2d wgrad_2x7x9x4_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1113301030 2872812701 1239438433 1775116239 +conv2d wgrad_2x7x9x4_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1113301030 2872812701 1239438433 1288542483 +conv2d wgrad_3x7x9x4_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1431058757 3373210244 4207987780 4054255409 +conv2d wgrad_3x7x9x4_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1431058757 3373210244 4207987780 180834054 +conv2d wgrad_3x7x9x4_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2568212047 3373210244 2381193183 353843599 +conv2d wgrad_3x7x9x4_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2568212047 3373210244 2381193183 1860329513 +conv2d wgrad_3x7x9x4_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2133636964 3373210244 2224388272 347055977 +conv2d wgrad_3x7x9x4_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2133636964 3373210244 2224388272 994788649 +conv2d wgrad_1x11x7x4_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1501031342 1219770266 2980124304 1448608264 +conv2d wgrad_1x11x7x4_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1501031342 1219770266 2980124304 1448608264 +conv2d wgrad_1x11x7x4_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1501031342 1219770266 1588130777 2381531672 +conv2d wgrad_1x11x7x4_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1501031342 1219770266 1588130777 3417974847 +conv2d wgrad_1x13x11x4_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1504270762 1458935684 2980124304 2512582600 +conv2d wgrad_1x13x11x4_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1504270762 1458935684 2980124304 2512582600 +conv2d wgrad_1x17x19x4_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1642334467 2927917918 3840288384 2880620663 +conv2d wgrad_1x17x19x4_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1642334467 2927917918 3840288384 2552969500 +conv2d wgrad_1x23x5x4_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2568212047 1018974017 2695215306 3059274099 +conv2d wgrad_1x23x5x4_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2568212047 1018974017 2695215306 3436607346 +conv2d wgrad_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1325203442 2973628448 3097125139 3643918743 +conv2d wgrad_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1325203442 2973628448 3097125139 747082267 +conv2d wgrad_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1075488054 4153253284 3097125139 21340657 +conv2d wgrad_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1075488054 4153253284 3097125139 56227083 +conv2d wgrad_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2373792510 4228778230 3268227320 2781787969 +conv2d wgrad_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2373792510 4228778230 3268227320 4054149767 +conv2d wgrad_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1126681227 448814756 4020828414 692686929 +conv2d wgrad_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1126681227 448814756 4020828414 692686929 +conv2d wgrad_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 4098697864 2355686406 2621782130 2833949486 +conv2d wgrad_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 4098697864 2355686406 2621782130 3251496823 +conv2d wgrad_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3363792845 68416507 3062073018 220836046 +conv2d wgrad_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3363792845 68416507 3062073018 1707620502 +conv2d wgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2017514189 2695053579 4119019906 338363935 +conv2d wgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2017514189 2695053579 4119019906 738717986 +conv2d wgrad_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2136362147 2323702535 2730633195 4275642741 +conv2d wgrad_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2136362147 2323702535 2730633195 3105473059 +conv2d wgrad_1x15x19x36_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 784928777 3920994243 2871183684 1457368860 +conv2d wgrad_1x15x19x36_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 784928777 3920994243 2871183684 2129608645 +conv2d wgrad_1x16x24x68_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1928121195 1834136816 995921099 1290832901 +conv2d wgrad_1x16x24x68_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1928121195 1834136816 995921099 154477589 +conv2d wgrad_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3829716048 824597450 3596813277 3377048584 +conv2d wgrad_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3829716048 824597450 3596813277 412509722 +conv2d wgrad_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1276391848 1858648938 3730143675 2712417789 +conv2d wgrad_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1276391848 1858648938 3730143675 2712417789 +conv2d wgrad_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2991748655 996994755 3162712336 2691220362 +conv2d wgrad_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2991748655 996994755 3162712336 3441232209 +conv2d wgrad_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 613238853 3737415653 1813717058 3953491679 +conv2d wgrad_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 613238853 3737415653 1813717058 1261891081 +conv2d wgrad_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3074259140 3369591202 2081693980 3439473075 +conv2d wgrad_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3074259140 3369591202 2081693980 3352279175 +conv2d wgrad_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2475424603 3047230488 2081693980 3359165930 +conv2d wgrad_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 2475424603 3047230488 2081693980 1942009480 +conv2d wgrad_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3980242078 2830864967 1000406280 332157749 +conv2d wgrad_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3980242078 2830864967 1000406280 2601063004 +conv2d wgrad_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 870738555 640024666 2081693980 4100031453 +conv2d wgrad_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 870738555 640024666 2081693980 1919107176 +conv2d wgrad_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 870738555 640024666 2081693980 1618717912 +conv2d wgrad_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 870738555 640024666 2081693980 221251170 +conv2d wgrad_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1467525936 2166215692 2099843274 1752033731 +conv2d wgrad_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1467525936 2166215692 2099843274 2254410747 +conv2d wgrad_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1441608968 3983207745 3357056235 4220059751 +conv2d wgrad_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1441608968 3983207745 3357056235 3334646036 +conv2d wgrad_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1089703540 1191155361 852881505 1729883415 +conv2d wgrad_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1089703540 1191155361 852881505 3112064590 +conv2d wgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1527509174 2779255425 1217669626 2313445327 +conv2d wgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 1527509174 2779255425 1217669626 2378687243 +conv2d wgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3153833039 1258133396 2989357662 2523233417 +conv2d wgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3153833039 1258133396 2989357662 845687444 +conv2d wgrad_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 805200720 858259717 3333097025 2967627788 +conv2d wgrad_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 805200720 858259717 3333097025 285381561 +conv2d wgrad_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3916058745 443607763 3443985888 4252958697 +conv2d wgrad_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 fnhwc_fnhwc_fnhwc_f_f 3916058745 443607763 3443985888 160172027 +conv2d wgrad_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 fnhwc_fnhwc_fnhwc_f_f 3160120111 1536824438 422976060 2522704893 diff --git a/test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_tensorop_f16_sm80.txt b/test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_tensorop_f16_sm80.txt new file mode 100644 index 00000000..3da54096 --- /dev/null +++ b/test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_tensorop_f16_sm80.txt @@ -0,0 +1,207 @@ +conv2d fprop_1x1x1x8_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2188357494 1769029067 159526285 825535581 +conv2d fprop_1x1x1x8_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2188357494 1769029067 159526285 825535581 +conv2d fprop_1x1x8x8_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1838714027 2265452184 2610359861 1987868270 +conv2d fprop_1x1x8x8_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1838714027 2265452184 2610359861 728956432 +conv2d fprop_1x7x8x8_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 702641729 3971681487 257610791 1008996088 +conv2d fprop_1x7x8x8_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 702641729 3971681487 257610791 2555315413 +conv2d fprop_1x7x9x8_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2318538354 3316437378 710632830 1863244686 +conv2d fprop_1x7x9x8_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2318538354 3316437378 710632830 3946357538 +conv2d fprop_2x7x9x8_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 672199864 3153681330 3307060297 2695612973 +conv2d fprop_2x7x9x8_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 672199864 3153681330 3307060297 2564657101 +conv2d fprop_3x7x9x8_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3966476684 2700620395 1556396665 2901584122 +conv2d fprop_3x7x9x8_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3966476684 2700620395 1556396665 3078683181 +conv2d fprop_3x7x9x8_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3966476684 3431458505 2293883251 1513830178 +conv2d fprop_3x7x9x8_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3966476684 3431458505 2293883251 1456315771 +conv2d fprop_3x7x9x8_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3966476684 728619567 2638743394 4157930574 +conv2d fprop_3x7x9x8_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3966476684 728619567 2638743394 4280729276 +conv2d fprop_1x11x7x8_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2760870769 1769029067 2610359861 829068145 +conv2d fprop_1x11x7x8_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2760870769 1769029067 2610359861 829068145 +conv2d fprop_1x11x7x8_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2760870769 3971681487 2610359861 3175191801 +conv2d fprop_1x11x7x8_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2760870769 3971681487 2610359861 752965501 +conv2d fprop_1x13x11x8_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1483240777 1769029067 257610791 1872463282 +conv2d fprop_1x13x11x8_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1483240777 1769029067 257610791 1872463282 +conv2d fprop_1x17x19x8_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3878803000 1547628165 652315301 3236250595 +conv2d fprop_1x17x19x8_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3878803000 1547628165 652315301 115169690 +conv2d fprop_1x23x5x8_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3474690180 3245600707 2293883251 1373982557 +conv2d fprop_1x23x5x8_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3474690180 3245600707 2293883251 3038299705 +conv2d fprop_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 876572357 663101280 1784121575 3650717238 +conv2d fprop_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 876572357 663101280 1784121575 1844414540 +conv2d fprop_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2926249914 663101280 1238102800 940361331 +conv2d fprop_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2926249914 663101280 1238102800 1574520323 +conv2d fprop_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 568740975 3647824478 1001821658 763311327 +conv2d fprop_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 568740975 3647824478 1001821658 3755275557 +conv2d fprop_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3351802863 889731117 1862926968 1133541197 +conv2d fprop_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3351802863 889731117 1862926968 1133541197 +conv2d fprop_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3680916718 2931605166 1380790217 1603771059 +conv2d fprop_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3680916718 2931605166 1380790217 1384953419 +conv2d fprop_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 865676658 3292777216 2570824137 1716239002 +conv2d fprop_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 865676658 3292777216 2570824137 1207507795 +conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3471311976 3069685598 1501287327 2020885449 +conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3471311976 3069685598 1501287327 3540106320 +conv2d fprop_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2012717243 4183087394 1158072871 2770857658 +conv2d fprop_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2012717243 4183087394 1158072871 2753556152 +conv2d fprop_1x15x19x40_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2639488807 1127821716 1175487408 1443444693 +conv2d fprop_1x15x19x40_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2639488807 1127821716 1175487408 2615818500 +conv2d fprop_1x16x24x72_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 4005424028 560055529 3924773317 1674063910 +conv2d fprop_1x16x24x72_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 4005424028 560055529 3924773317 731082069 +conv2d fprop_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2975308389 3388564462 540151004 2747507935 +conv2d fprop_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2975308389 3388564462 540151004 916078700 +conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1609667174 3347123934 1942443225 927150176 +conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1609667174 3347123934 1942443225 927150176 +conv2d fprop_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1910152888 4279006666 634623721 1857297775 +conv2d fprop_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1910152888 4279006666 634623721 3617909140 +conv2d fprop_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3020034528 2354621402 3496282292 1893593581 +conv2d fprop_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3020034528 2354621402 3496282292 4168452665 +conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2432565675 683959930 1993750394 960818057 +conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2432565675 683959930 1993750394 3831768945 +conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1094435461 683959930 1546760441 3943198524 +conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1094435461 683959930 1546760441 297674396 +conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1881914243 1677559851 2710651669 1457186301 +conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1881914243 1677559851 2710651669 641199618 +conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3323210016 683959930 3090812263 1219910572 +conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3323210016 683959930 3090812263 2513734798 +conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3323210016 683959930 3090812263 2022987417 +conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3323210016 683959930 3090812263 1691115924 +conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3831097363 1912090452 140813403 3043290146 +conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3831097363 1912090452 140813403 2921616453 +conv2d fprop_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1244925287 3609240260 3715463477 2323185695 +conv2d fprop_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1244925287 3609240260 3715463477 1672551923 +conv2d fprop_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3026255423 62170551 337645063 913747685 +conv2d fprop_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3026255423 62170551 337645063 778811608 +conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3054241601 2539714930 725343835 1284495313 +conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3054241601 2539714930 725343835 2804300956 +conv2d fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3459414189 2300049969 3590051376 3773230297 +conv2d fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3459414189 2300049969 3590051376 3201976695 +conv2d fprop_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 869473545 972211497 126335800 745587578 +conv2d fprop_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 869473545 972211497 126335800 3427184805 +conv2d fprop_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3759545138 698995736 2260090497 603560103 +conv2d fprop_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3759545138 698995736 2260090497 3440870263 +conv2d fprop_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 hnhwc_hnhwc_hnhwc_h_h 1541569966 3683140111 4057089521 1550530307 +conv2d fprop_1x4x4x12_1x1_8x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1089974555 439881880 2361007913 550417941 +conv2d fprop_1x4x4x12_1x1_8x3x3_pad_h0w0_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1089974555 439881880 2361007913 511192308 +conv2d fprop_1x4x4x14_1x1_8x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2604126736 2435640339 2361007913 968601759 +conv2d fprop_1x4x4x14_1x1_8x3x3_pad_h0w0_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2604126736 2435640339 2361007913 144578446 +conv2d fprop_1x23x56x98_10x22_128x3x3_pad_h4w5_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 4198134089 972211497 1394525436 3504696155 +conv2d fprop_1x23x56x98_10x22_128x3x3_pad_h4w5_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 4198134089 972211497 1394525436 3698098218 +conv2d fprop_1x4x4x28_1x1_8x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 702641729 1672619248 2361007913 1369103290 +conv2d fprop_1x4x4x28_1x1_8x3x3_pad_h0w0_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 702641729 1672619248 2361007913 489904325 +conv2d fprop_1x23x56x100_10x22_128x3x3_pad_h4w5_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 4291047527 2579050400 1394525436 351315258 +conv2d fprop_1x23x56x100_10x22_128x3x3_pad_h4w5_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 4291047527 2579050400 1394525436 1597164252 +conv2d dgrad_1x1x1x8_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 882369639 1769029067 2361007913 2527823846 +conv2d dgrad_1x1x1x8_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 882369639 1769029067 2361007913 2527823846 +conv2d dgrad_1x1x8x8_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1089974555 2265452184 1363379414 300054115 +conv2d dgrad_1x1x8x8_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1089974555 2265452184 1363379414 3807580965 +conv2d dgrad_1x7x8x8_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 702641729 3971681487 257610791 2386732272 +conv2d dgrad_1x7x8x8_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 702641729 3971681487 257610791 4258410038 +conv2d dgrad_1x7x9x8_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1057722415 3316437378 889102087 2570634199 +conv2d dgrad_1x7x9x8_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1057722415 3316437378 889102087 171700712 +conv2d dgrad_2x7x9x8_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1856996643 3153681330 2728188831 2975997856 +conv2d dgrad_2x7x9x8_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1856996643 3153681330 2728188831 2889418578 +conv2d dgrad_3x7x9x8_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3890032868 2700620395 2547817029 3967064079 +conv2d dgrad_3x7x9x8_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3890032868 2700620395 2547817029 3184265450 +conv2d dgrad_3x7x9x8_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3787596573 3431458505 2547817029 964612107 +conv2d dgrad_3x7x9x8_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3787596573 3431458505 2547817029 2037559507 +conv2d dgrad_3x7x9x8_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3247824247 728619567 2547817029 3557273069 +conv2d dgrad_3x7x9x8_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3247824247 728619567 2547817029 4397155 +conv2d dgrad_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 74280264 889731117 1508034024 1365806907 +conv2d dgrad_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 74280264 889731117 1508034024 1365806907 +conv2d dgrad_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1282400930 3292777216 2278368940 8357438 +conv2d dgrad_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1282400930 3292777216 2278368940 226365504 +conv2d dgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 4245845173 3069685598 99670743 325988631 +conv2d dgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 4245845173 3069685598 99670743 2434233848 +conv2d dgrad_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2504130563 4183087394 1801140850 1590712227 +conv2d dgrad_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2504130563 4183087394 1801140850 2545352669 +conv2d dgrad_1x15x19x40_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1766181903 1127821716 1260519450 2958285630 +conv2d dgrad_1x15x19x40_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1766181903 1127821716 1260519450 3013578259 +conv2d dgrad_1x16x24x72_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 183183414 560055529 2304892275 2542844870 +conv2d dgrad_1x16x24x72_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 183183414 560055529 2304892275 2460120362 +conv2d dgrad_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 202999757 62170551 888583293 630899132 +conv2d dgrad_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 202999757 62170551 888583293 2651936157 +conv2d dgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 64770922 2539714930 291707000 1632621462 +conv2d dgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 64770922 2539714930 291707000 2196326316 +conv2d dgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1553906912 2300049969 270479912 1418534287 +conv2d dgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1553906912 2300049969 270479912 2969883907 +conv2d dgrad_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 4010944083 972211497 2352115326 3241394959 +conv2d dgrad_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 4010944083 972211497 2352115326 3084585540 +conv2d dgrad_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 920147836 698995736 4291786732 4085488230 +conv2d dgrad_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 920147836 698995736 4291786732 65254192 +conv2d dgrad_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 hnhwc_hnhwc_hnhwc_h_h 2823085435 3683140111 2381613519 4075620655 +conv2d wgrad_1x1x1x8_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 882369639 2469118171 1363379414 2536698895 +conv2d wgrad_1x1x1x8_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 882369639 2469118171 1363379414 2536698895 +conv2d wgrad_1x1x8x8_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1089974555 1769029067 2610359861 2675562568 +conv2d wgrad_1x1x8x8_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1089974555 1769029067 2610359861 931542610 +conv2d wgrad_1x7x8x8_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 702641729 4237122930 2293883251 3279943011 +conv2d wgrad_1x7x8x8_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 702641729 4237122930 2293883251 2971660764 +conv2d wgrad_1x7x9x8_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1057722415 4274209342 2000691267 560753121 +conv2d wgrad_1x7x9x8_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1057722415 4274209342 2000691267 2154075967 +conv2d wgrad_2x7x9x8_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1856996643 2435640339 4206304784 607086828 +conv2d wgrad_2x7x9x8_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1856996643 2435640339 4206304784 1740786933 +conv2d wgrad_3x7x9x8_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3890032868 3485693384 722716191 534106900 +conv2d wgrad_3x7x9x8_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3890032868 3485693384 722716191 1330324832 +conv2d wgrad_3x7x9x8_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3787596573 3485693384 2298261608 3272993277 +conv2d wgrad_3x7x9x8_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3787596573 3485693384 2298261608 809879168 +conv2d wgrad_3x7x9x8_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3247824247 3485693384 677398494 3545662177 +conv2d wgrad_3x7x9x8_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3247824247 3485693384 677398494 406656675 +conv2d wgrad_1x11x7x8_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1089974555 4228252800 1363379414 1916931500 +conv2d wgrad_1x11x7x8_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1089974555 4228252800 1363379414 1916931500 +conv2d wgrad_1x11x7x8_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1089974555 4228252800 2293883251 2596039814 +conv2d wgrad_1x11x7x8_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1089974555 4228252800 2293883251 2187291295 +conv2d wgrad_1x13x11x8_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 702641729 27036473 1363379414 1933529223 +conv2d wgrad_1x13x11x8_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 702641729 27036473 1363379414 1933529223 +conv2d wgrad_1x17x19x8_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2903517501 1191123585 2874079035 3411667027 +conv2d wgrad_1x17x19x8_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2903517501 1191123585 2874079035 803905560 +conv2d wgrad_1x23x5x8_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3787596573 598080595 1784121575 644569158 +conv2d wgrad_1x23x5x8_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3787596573 598080595 1784121575 2373347066 +conv2d wgrad_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3227777797 329714136 2976030512 1014758557 +conv2d wgrad_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3227777797 329714136 2976030512 3940634369 +conv2d wgrad_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2301757939 1530947948 2976030512 3694497516 +conv2d wgrad_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2301757939 1530947948 2976030512 835787843 +conv2d wgrad_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2617901288 2993361563 1502330852 2213153149 +conv2d wgrad_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2617901288 2993361563 1502330852 4284874546 +conv2d wgrad_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 74280264 1916026608 2010130163 369232966 +conv2d wgrad_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 74280264 1916026608 2010130163 369232966 +conv2d wgrad_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1666425148 2767428662 2279791261 4027210146 +conv2d wgrad_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1666425148 2767428662 2279791261 1318730126 +conv2d wgrad_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1282400930 2713937462 3240894382 506695315 +conv2d wgrad_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1282400930 2713937462 3240894382 816815166 +conv2d wgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 4245845173 1573282079 3850186149 4106577752 +conv2d wgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 4245845173 1573282079 3850186149 355691834 +conv2d wgrad_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2504130563 4047167459 2936630712 348206756 +conv2d wgrad_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2504130563 4047167459 2936630712 2784859021 +conv2d wgrad_1x15x19x40_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1766181903 4273504437 787340207 1859862221 +conv2d wgrad_1x15x19x40_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1766181903 4273504437 787340207 3725886099 +conv2d wgrad_1x16x24x72_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 183183414 3105456251 3826106137 974970753 +conv2d wgrad_1x16x24x72_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 183183414 3105456251 3826106137 3416657232 +conv2d wgrad_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3483168388 424125191 908150856 1561140377 +conv2d wgrad_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3483168388 424125191 908150856 188844017 +conv2d wgrad_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 556825749 1391936148 2475632360 2800518880 +conv2d wgrad_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 556825749 1391936148 2475632360 2800518880 +conv2d wgrad_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3638123775 1173557136 2296711591 633157930 +conv2d wgrad_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3638123775 1173557136 2296711591 2368475048 +conv2d wgrad_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 649525056 3855280460 3101885145 2961254635 +conv2d wgrad_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 649525056 3855280460 3101885145 3741625104 +conv2d wgrad_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2173736421 3547027896 2104091502 3064810292 +conv2d wgrad_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2173736421 3547027896 2104091502 3713272088 +conv2d wgrad_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2709957180 1282487297 2104091502 744834192 +conv2d wgrad_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2709957180 1282487297 2104091502 904876309 +conv2d wgrad_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3651249563 4031283235 1508206782 2694170352 +conv2d wgrad_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3651249563 4031283235 1508206782 3908734231 +conv2d wgrad_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3355104310 3659562123 2104091502 2337157193 +conv2d wgrad_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3355104310 3659562123 2104091502 2313638308 +conv2d wgrad_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3355104310 3659562123 2104091502 3685548808 +conv2d wgrad_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 3355104310 3659562123 2104091502 2598517610 +conv2d wgrad_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2990284284 1468341695 784812405 1657604983 +conv2d wgrad_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 2990284284 1468341695 784812405 1091864392 +conv2d wgrad_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 777450620 1302452628 2792629935 1858657589 +conv2d wgrad_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 777450620 1302452628 2792629935 17882029 +conv2d wgrad_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 202999757 1750335029 2181422926 4235795050 +conv2d wgrad_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 202999757 1750335029 2181422926 4023494188 +conv2d wgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 64770922 821682484 2518007336 481779259 +conv2d wgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 64770922 821682484 2518007336 973321278 +conv2d wgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1553906912 3381440542 537676326 3225233411 +conv2d wgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 1553906912 3381440542 537676326 1804899744 +conv2d wgrad_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 4010944083 3959973438 1089446814 942114500 +conv2d wgrad_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 4010944083 3959973438 1089446814 3331001854 +conv2d wgrad_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 920147836 2098470176 1991739118 2991567146 +conv2d wgrad_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_h_h 920147836 2098470176 1991739118 244560469 +conv2d wgrad_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 hnhwc_hnhwc_hnhwc_h_h 2823085435 2750615938 1907430051 3805642950 diff --git a/test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_tensorop_f32_sm70.txt b/test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_tensorop_f32_sm70.txt new file mode 100644 index 00000000..07e9ab62 --- /dev/null +++ b/test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_tensorop_f32_sm70.txt @@ -0,0 +1,197 @@ +conv2d fprop_1x1x1x8_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 542905940 1152388039 486967919 3706990238 +conv2d fprop_1x1x1x8_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 542905940 1152388039 486967919 3706990238 +conv2d fprop_1x1x8x8_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1585175937 2411745115 1919973277 506127908 +conv2d fprop_1x1x8x8_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1585175937 2411745115 1919973277 2193864160 +conv2d fprop_1x7x8x8_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2793112873 4021016722 1911021900 3872812830 +conv2d fprop_1x7x8x8_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2793112873 4021016722 1911021900 2445012180 +conv2d fprop_1x7x9x8_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3892602751 3987447721 2963892063 3556328678 +conv2d fprop_1x7x9x8_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3892602751 3987447721 2963892063 2088376980 +conv2d fprop_2x7x9x8_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1647798936 1986983586 3862463878 498933688 +conv2d fprop_2x7x9x8_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1647798936 1986983586 3862463878 2581533408 +conv2d fprop_3x7x9x8_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 664571366 3844983048 373331791 3515334098 +conv2d fprop_3x7x9x8_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 664571366 3844983048 373331791 2566159340 +conv2d fprop_3x7x9x8_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 664571366 741820436 2695215306 4235633178 +conv2d fprop_3x7x9x8_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 664571366 741820436 2695215306 815310604 +conv2d fprop_3x7x9x8_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 664571366 2188679282 2132201986 1043483661 +conv2d fprop_3x7x9x8_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 664571366 2188679282 2132201986 2172013863 +conv2d fprop_1x11x7x8_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3376807219 1152388039 1919973277 1836150764 +conv2d fprop_1x11x7x8_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3376807219 1152388039 1919973277 1836150764 +conv2d fprop_1x11x7x8_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3376807219 4021016722 1919973277 3537245111 +conv2d fprop_1x11x7x8_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3376807219 4021016722 1919973277 1166347001 +conv2d fprop_1x13x11x8_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3708184432 1152388039 1911021900 4188669650 +conv2d fprop_1x13x11x8_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3708184432 1152388039 1911021900 4188669650 +conv2d fprop_1x17x19x8_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3793481427 2784674253 5067603 3709403009 +conv2d fprop_1x17x19x8_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3793481427 2784674253 5067603 3666537740 +conv2d fprop_1x23x5x8_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 724504571 866495443 2695215306 2273666993 +conv2d fprop_1x23x5x8_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 724504571 866495443 2695215306 3277012289 +conv2d fprop_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3696992523 2499748007 2381193183 4256475381 +conv2d fprop_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3696992523 2499748007 2381193183 868016148 +conv2d fprop_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3999848747 2499748007 1038140209 3720347250 +conv2d fprop_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3999848747 2499748007 1038140209 3079806253 +conv2d fprop_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1193090006 2131084833 720746870 1441382904 +conv2d fprop_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1193090006 2131084833 720746870 2400159337 +conv2d fprop_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3123356211 67170595 2290618404 1832291434 +conv2d fprop_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3123356211 67170595 2290618404 1832291434 +conv2d fprop_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 374093114 3867270480 2959096809 3957922181 +conv2d fprop_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 374093114 3867270480 2959096809 855145836 +conv2d fprop_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3983093985 3903005020 613201518 1067564721 +conv2d fprop_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3983093985 3903005020 613201518 3116342099 +conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3167361585 764195688 167104692 3451521719 +conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3167361585 764195688 167104692 2267030515 +conv2d fprop_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2514717945 2683853778 2557576548 3978291544 +conv2d fprop_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2514717945 2683853778 2557576548 2608287914 +conv2d fprop_1x15x19x40_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1072544153 768805504 1993107524 2984226313 +conv2d fprop_1x15x19x40_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1072544153 768805504 1993107524 3008259681 +conv2d fprop_1x16x24x72_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3875284691 1195875134 1041989393 1900995663 +conv2d fprop_1x16x24x72_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3875284691 1195875134 1041989393 392783151 +conv2d fprop_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3179965682 1961598694 1222599802 2151043714 +conv2d fprop_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3179965682 1961598694 1222599802 1013916354 +conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2615784276 3832095556 1454859747 958765281 +conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2615784276 3832095556 1454859747 958765281 +conv2d fprop_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 674692187 808380753 1933240382 879488563 +conv2d fprop_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 674692187 808380753 1933240382 1399508988 +conv2d fprop_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 961566038 2529610490 1966904362 1818870269 +conv2d fprop_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 961566038 2529610490 1966904362 2484388308 +conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3562503578 4021646304 3426919182 4130346493 +conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3562503578 4021646304 3426919182 3486420438 +conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1144321310 4021646304 4185476157 964986272 +conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1144321310 4021646304 4185476157 1800819557 +conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3153402236 2244058322 623381239 4197612113 +conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3153402236 2244058322 623381239 2816226027 +conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3356996599 4021646304 748585111 438913488 +conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3356996599 4021646304 748585111 2812015344 +conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3356996599 4021646304 748585111 307241203 +conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3356996599 4021646304 748585111 3413043496 +conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4047780986 3796154296 3933470176 2042558105 +conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4047780986 3796154296 3933470176 2082898655 +conv2d fprop_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4196208609 640419443 3879567407 3126196765 +conv2d fprop_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4196208609 640419443 3879567407 370340580 +conv2d fprop_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1494714374 785253250 1954437668 16145912 +conv2d fprop_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1494714374 785253250 1954437668 352471766 +conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4104811786 417350231 2268084635 3829320356 +conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4104811786 417350231 2268084635 3317921115 +conv2d fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3747780903 2173099720 1217977439 3831700802 +conv2d fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3747780903 2173099720 1217977439 2066501805 +conv2d fprop_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2475183841 2416021807 90427526 3879607464 +conv2d fprop_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2475183841 2416021807 90427526 3914420497 +conv2d fprop_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 835429033 3252222904 934864596 2610068735 +conv2d fprop_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 835429033 3252222904 934864596 4160630405 +conv2d fprop_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 hnhwc_hnhwc_fnhwc_f_f 83915378 2330495260 722450442 220865236 +conv2d dgrad_1x1x1x8_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 558763846 1152388039 1036082506 2004021933 +conv2d dgrad_1x1x1x8_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 558763846 1152388039 1036082506 2004021933 +conv2d dgrad_1x1x8x8_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2125023038 2411745115 1585402383 692064357 +conv2d dgrad_1x1x8x8_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2125023038 2411745115 1585402383 3191762167 +conv2d dgrad_1x7x8x8_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2793112873 4021016722 1911021900 2493803192 +conv2d dgrad_1x7x8x8_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2793112873 4021016722 1911021900 2915688271 +conv2d dgrad_1x7x9x8_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 390767101 3987447721 311726682 356690874 +conv2d dgrad_1x7x9x8_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 390767101 3987447721 311726682 3600277581 +conv2d dgrad_2x7x9x8_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2346452629 1986983586 1898068767 1480022038 +conv2d dgrad_2x7x9x8_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2346452629 1986983586 1898068767 1482170634 +conv2d dgrad_3x7x9x8_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 695385844 3844983048 3628298618 1684782062 +conv2d dgrad_3x7x9x8_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 695385844 3844983048 3628298618 1312771437 +conv2d dgrad_3x7x9x8_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 118714643 741820436 3628298618 3927854537 +conv2d dgrad_3x7x9x8_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 118714643 741820436 3628298618 3753859236 +conv2d dgrad_3x7x9x8_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2078642274 2188679282 3628298618 331956491 +conv2d dgrad_3x7x9x8_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2078642274 2188679282 3628298618 3514953364 +conv2d dgrad_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1371340519 67170595 1307969125 1500995359 +conv2d dgrad_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1371340519 67170595 1307969125 1500995359 +conv2d dgrad_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2815270590 3903005020 1541598000 4224357357 +conv2d dgrad_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2815270590 3903005020 1541598000 1962390570 +conv2d dgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2371881721 764195688 2813830197 3377586111 +conv2d dgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2371881721 764195688 2813830197 1315372741 +conv2d dgrad_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2314925659 2683853778 2983167436 324191835 +conv2d dgrad_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2314925659 2683853778 2983167436 1085373547 +conv2d dgrad_1x15x19x40_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1327011661 768805504 3823519277 1916712483 +conv2d dgrad_1x15x19x40_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1327011661 768805504 3823519277 1584393587 +conv2d dgrad_1x16x24x72_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2223985070 1195875134 2636428043 1173364286 +conv2d dgrad_1x16x24x72_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2223985070 1195875134 2636428043 4109782860 +conv2d dgrad_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2519843766 785253250 2351024902 3316943078 +conv2d dgrad_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2519843766 785253250 2351024902 1065622134 +conv2d dgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4232110637 417350231 2574042419 294608043 +conv2d dgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4232110637 417350231 2574042419 491435109 +conv2d dgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3630316758 2173099720 3438778114 3788164138 +conv2d dgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3630316758 2173099720 3438778114 2104210385 +conv2d dgrad_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 866609547 2416021807 2145192778 2640510765 +conv2d dgrad_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 866609547 2416021807 2145192778 2748647248 +conv2d dgrad_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2538496988 3252222904 535024815 169162784 +conv2d dgrad_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2538496988 3252222904 535024815 3841086721 +conv2d dgrad_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 hnhwc_hnhwc_fnhwc_f_f 2329920945 2330495260 1682007569 215965178 +conv2d wgrad_1x1x1x8_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 558763846 3282100285 1585402383 820104967 +conv2d wgrad_1x1x1x8_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 558763846 3282100285 1585402383 820104967 +conv2d wgrad_1x1x8x8_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2125023038 1152388039 1919973277 4103626856 +conv2d wgrad_1x1x8x8_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2125023038 1152388039 1919973277 4164492975 +conv2d wgrad_1x7x8x8_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2793112873 2527774459 2695215306 1519678771 +conv2d wgrad_1x7x8x8_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2793112873 2527774459 2695215306 4056436284 +conv2d wgrad_1x7x9x8_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 390767101 3488761154 1737428004 2359527095 +conv2d wgrad_1x7x9x8_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 390767101 3488761154 1737428004 2240340004 +conv2d wgrad_2x7x9x8_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2346452629 2242491745 475190714 3363555017 +conv2d wgrad_2x7x9x8_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2346452629 2242491745 475190714 2240937477 +conv2d wgrad_3x7x9x8_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 695385844 1231166805 3030543492 1587793687 +conv2d wgrad_3x7x9x8_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 695385844 1231166805 3030543492 4129044530 +conv2d wgrad_3x7x9x8_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 118714643 1231166805 3002932066 1436246794 +conv2d wgrad_3x7x9x8_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 118714643 1231166805 3002932066 2860692730 +conv2d wgrad_3x7x9x8_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2078642274 1231166805 2704246666 3831504826 +conv2d wgrad_3x7x9x8_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2078642274 1231166805 2704246666 932276705 +conv2d wgrad_1x11x7x8_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2125023038 3952246738 1585402383 584217576 +conv2d wgrad_1x11x7x8_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2125023038 3952246738 1585402383 584217576 +conv2d wgrad_1x11x7x8_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2125023038 3952246738 2695215306 644274899 +conv2d wgrad_1x11x7x8_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2125023038 3952246738 2695215306 2070383610 +conv2d wgrad_1x13x11x8_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2793112873 3269716347 1585402383 2091838742 +conv2d wgrad_1x13x11x8_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2793112873 3269716347 1585402383 2091838742 +conv2d wgrad_1x17x19x8_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 624119942 2208297031 3036494114 1391639360 +conv2d wgrad_1x17x19x8_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 624119942 2208297031 3036494114 2853291206 +conv2d wgrad_1x23x5x8_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 118714643 3638907161 2381193183 4076043307 +conv2d wgrad_1x23x5x8_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 118714643 3638907161 2381193183 162466399 +conv2d wgrad_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3912955093 1748311260 3097125139 1760368998 +conv2d wgrad_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3912955093 1748311260 3097125139 370487040 +conv2d wgrad_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2380180646 950177617 3097125139 2441672462 +conv2d wgrad_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2380180646 950177617 3097125139 4093130692 +conv2d wgrad_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4197955097 3223936144 3268227320 3407045986 +conv2d wgrad_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4197955097 3223936144 3268227320 3327506975 +conv2d wgrad_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1371340519 2413111238 4020828414 3782469663 +conv2d wgrad_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1371340519 2413111238 4020828414 3782469663 +conv2d wgrad_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4064585156 1849062825 2621782130 1997835815 +conv2d wgrad_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4064585156 1849062825 2621782130 2176605786 +conv2d wgrad_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2815270590 1028019427 3062073018 1532225578 +conv2d wgrad_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2815270590 1028019427 3062073018 3175924537 +conv2d wgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2371881721 665408258 4119019906 3070068838 +conv2d wgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2371881721 665408258 4119019906 4095170522 +conv2d wgrad_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2314925659 1329921014 2730633195 1859011130 +conv2d wgrad_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2314925659 1329921014 2730633195 1829382848 +conv2d wgrad_1x15x19x40_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1327011661 654394569 180842421 456267566 +conv2d wgrad_1x15x19x40_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1327011661 654394569 180842421 4065629708 +conv2d wgrad_1x16x24x72_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2223985070 792561864 3949436648 2346117136 +conv2d wgrad_1x16x24x72_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2223985070 792561864 3949436648 3375703263 +conv2d wgrad_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1663038387 2182213461 3596813277 2680766255 +conv2d wgrad_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1663038387 2182213461 3596813277 819444109 +conv2d wgrad_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2518156720 4284293314 3730143675 2330242371 +conv2d wgrad_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2518156720 4284293314 3730143675 2330242371 +conv2d wgrad_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1153452126 1488124595 3162712336 2186928678 +conv2d wgrad_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1153452126 1488124595 3162712336 4038147074 +conv2d wgrad_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3557673531 1324698024 1813717058 1491574211 +conv2d wgrad_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3557673531 1324698024 1813717058 664642450 +conv2d wgrad_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3373735934 1617738198 2081693980 1691976762 +conv2d wgrad_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3373735934 1617738198 2081693980 1352928468 +conv2d wgrad_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1883441731 169231440 2081693980 4154079864 +conv2d wgrad_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1883441731 169231440 2081693980 49240552 +conv2d wgrad_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2212544074 1111691860 1000406280 1708014479 +conv2d wgrad_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2212544074 1111691860 1000406280 2976393539 +conv2d wgrad_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 841696772 3782006390 2081693980 141423768 +conv2d wgrad_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 841696772 3782006390 2081693980 1807012226 +conv2d wgrad_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 841696772 3782006390 2081693980 4065587708 +conv2d wgrad_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 841696772 3782006390 2081693980 3621154478 +conv2d wgrad_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3869576724 4018387624 2099843274 3166971951 +conv2d wgrad_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3869576724 4018387624 2099843274 710495809 +conv2d wgrad_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2106265431 1007447386 3357056235 3446932548 +conv2d wgrad_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2106265431 1007447386 3357056235 344178277 +conv2d wgrad_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2519843766 4040055257 852881505 1633246355 +conv2d wgrad_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2519843766 4040055257 852881505 778858244 +conv2d wgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4232110637 428926092 1217669626 1090782686 +conv2d wgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4232110637 428926092 1217669626 1523455590 +conv2d wgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3630316758 1787889194 2989357662 3134613422 +conv2d wgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3630316758 1787889194 2989357662 1158845444 +conv2d wgrad_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 866609547 857847029 3333097025 3239572160 +conv2d wgrad_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 866609547 857847029 3333097025 2436320376 +conv2d wgrad_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2538496988 3294874067 3443985888 4083335585 +conv2d wgrad_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2538496988 3294874067 3443985888 3302917564 +conv2d wgrad_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 hnhwc_hnhwc_fnhwc_f_f 2329920945 531242570 422976060 2227466455 diff --git a/test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_tensorop_f32_sm75.txt b/test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_tensorop_f32_sm75.txt new file mode 100644 index 00000000..4024064f --- /dev/null +++ b/test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_tensorop_f32_sm75.txt @@ -0,0 +1,472 @@ +conv2d fprop_1x1x1x8_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 542905940 1152388039 486967919 3706990238 +conv2d fprop_1x1x1x8_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 542905940 1152388039 486967919 3706990238 +conv2d fprop_1x1x8x8_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1585175937 2411745115 1919973277 506127908 +conv2d fprop_1x1x8x8_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1585175937 2411745115 1919973277 2193864160 +conv2d fprop_1x7x8x8_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2793112873 4021016722 1911021900 3872812830 +conv2d fprop_1x7x8x8_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2793112873 4021016722 1911021900 2445012180 +conv2d fprop_1x7x9x8_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3892602751 3987447721 2963892063 3556328678 +conv2d fprop_1x7x9x8_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3892602751 3987447721 2963892063 2088376980 +conv2d fprop_2x7x9x8_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1647798936 1986983586 3862463878 498933688 +conv2d fprop_2x7x9x8_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1647798936 1986983586 3862463878 2581533408 +conv2d fprop_3x7x9x8_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 664571366 3844983048 373331791 3515334098 +conv2d fprop_3x7x9x8_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 664571366 3844983048 373331791 2566159340 +conv2d fprop_3x7x9x8_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 664571366 741820436 2695215306 4235633178 +conv2d fprop_3x7x9x8_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 664571366 741820436 2695215306 815310604 +conv2d fprop_3x7x9x8_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 664571366 2188679282 2132201986 1043483661 +conv2d fprop_3x7x9x8_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 664571366 2188679282 2132201986 2172013863 +conv2d fprop_1x11x7x8_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3376807219 1152388039 1919973277 1836150764 +conv2d fprop_1x11x7x8_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3376807219 1152388039 1919973277 1836150764 +conv2d fprop_1x11x7x8_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3376807219 4021016722 1919973277 3537245111 +conv2d fprop_1x11x7x8_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3376807219 4021016722 1919973277 1166347001 +conv2d fprop_1x13x11x8_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3708184432 1152388039 1911021900 4188669650 +conv2d fprop_1x13x11x8_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3708184432 1152388039 1911021900 4188669650 +conv2d fprop_1x17x19x8_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3793481427 2784674253 5067603 3709403009 +conv2d fprop_1x17x19x8_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3793481427 2784674253 5067603 3666537740 +conv2d fprop_1x23x5x8_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 724504571 866495443 2695215306 2273666993 +conv2d fprop_1x23x5x8_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 724504571 866495443 2695215306 3277012289 +conv2d fprop_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3696992523 2499748007 2381193183 4256475381 +conv2d fprop_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3696992523 2499748007 2381193183 868016148 +conv2d fprop_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3999848747 2499748007 1038140209 3720347250 +conv2d fprop_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3999848747 2499748007 1038140209 3079806253 +conv2d fprop_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1193090006 2131084833 720746870 1441382904 +conv2d fprop_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1193090006 2131084833 720746870 2400159337 +conv2d fprop_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3123356211 67170595 2290618404 1832291434 +conv2d fprop_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3123356211 67170595 2290618404 1832291434 +conv2d fprop_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 374093114 3867270480 2959096809 3957922181 +conv2d fprop_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 374093114 3867270480 2959096809 855145836 +conv2d fprop_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3983093985 3903005020 613201518 1067564721 +conv2d fprop_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3983093985 3903005020 613201518 3116342099 +conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3167361585 764195688 167104692 3451521719 +conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3167361585 764195688 167104692 2267030515 +conv2d fprop_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2514717945 2683853778 2557576548 3978291544 +conv2d fprop_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2514717945 2683853778 2557576548 2608287914 +conv2d fprop_1x15x19x40_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1072544153 768805504 1993107524 2984226313 +conv2d fprop_1x15x19x40_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1072544153 768805504 1993107524 3008259681 +conv2d fprop_1x16x24x72_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3875284691 1195875134 1041989393 1900995663 +conv2d fprop_1x16x24x72_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3875284691 1195875134 1041989393 392783151 +conv2d fprop_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3179965682 1961598694 1222599802 2151043714 +conv2d fprop_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3179965682 1961598694 1222599802 1013916354 +conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2615784276 3832095556 1454859747 958765281 +conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2615784276 3832095556 1454859747 958765281 +conv2d fprop_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 674692187 808380753 1933240382 879488563 +conv2d fprop_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 674692187 808380753 1933240382 1399508988 +conv2d fprop_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 961566038 2529610490 1966904362 1818870269 +conv2d fprop_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 961566038 2529610490 1966904362 2484388308 +conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3562503578 4021646304 3426919182 4130346493 +conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3562503578 4021646304 3426919182 3486420438 +conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1144321310 4021646304 4185476157 964986272 +conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1144321310 4021646304 4185476157 1800819557 +conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3153402236 2244058322 623381239 4197612113 +conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3153402236 2244058322 623381239 2816226027 +conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3356996599 4021646304 748585111 438913488 +conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3356996599 4021646304 748585111 2812015344 +conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3356996599 4021646304 748585111 307241203 +conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3356996599 4021646304 748585111 3413043496 +conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4047780986 3796154296 3933470176 2042558105 +conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4047780986 3796154296 3933470176 2082898655 +conv2d fprop_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4196208609 640419443 3879567407 3126196765 +conv2d fprop_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4196208609 640419443 3879567407 370340580 +conv2d fprop_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1494714374 785253250 1954437668 16145912 +conv2d fprop_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1494714374 785253250 1954437668 352471766 +conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4104811786 417350231 2268084635 3829320356 +conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4104811786 417350231 2268084635 3317921115 +conv2d fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3747780903 2173099720 1217977439 3831700802 +conv2d fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3747780903 2173099720 1217977439 2066501805 +conv2d fprop_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2475183841 2416021807 90427526 3879607464 +conv2d fprop_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2475183841 2416021807 90427526 3914420497 +conv2d fprop_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 835429033 3252222904 934864596 2610068735 +conv2d fprop_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 835429033 3252222904 934864596 4160630405 +conv2d fprop_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 hnhwc_hnhwc_fnhwc_f_f 83915378 2330495260 722450442 220865236 +conv2d fprop_1x4x4x12_1x1_8x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2125023038 1189071464 1036082506 1072586342 +conv2d fprop_1x4x4x12_1x1_8x3x3_pad_h0w0_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2125023038 1189071464 1036082506 3209300743 +conv2d dgrad_1x1x1x8_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 558763846 1152388039 1036082506 2004021933 +conv2d dgrad_1x1x1x8_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 558763846 1152388039 1036082506 2004021933 +conv2d dgrad_1x1x8x8_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2125023038 2411745115 1585402383 692064357 +conv2d dgrad_1x1x8x8_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2125023038 2411745115 1585402383 3191762167 +conv2d dgrad_1x7x8x8_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2793112873 4021016722 1911021900 2493803192 +conv2d dgrad_1x7x8x8_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2793112873 4021016722 1911021900 2915688271 +conv2d dgrad_1x7x9x8_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 390767101 3987447721 311726682 356690874 +conv2d dgrad_1x7x9x8_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 390767101 3987447721 311726682 3600277581 +conv2d dgrad_2x7x9x8_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2346452629 1986983586 1898068767 1480022038 +conv2d dgrad_2x7x9x8_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2346452629 1986983586 1898068767 1482170634 +conv2d dgrad_3x7x9x8_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 695385844 3844983048 3628298618 1684782062 +conv2d dgrad_3x7x9x8_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 695385844 3844983048 3628298618 1312771437 +conv2d dgrad_3x7x9x8_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 118714643 741820436 3628298618 3927854537 +conv2d dgrad_3x7x9x8_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 118714643 741820436 3628298618 3753859236 +conv2d dgrad_3x7x9x8_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2078642274 2188679282 3628298618 331956491 +conv2d dgrad_3x7x9x8_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2078642274 2188679282 3628298618 3514953364 +conv2d dgrad_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1371340519 67170595 1307969125 1500995359 +conv2d dgrad_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1371340519 67170595 1307969125 1500995359 +conv2d dgrad_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2815270590 3903005020 1541598000 4224357357 +conv2d dgrad_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2815270590 3903005020 1541598000 1962390570 +conv2d dgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2371881721 764195688 2813830197 3377586111 +conv2d dgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2371881721 764195688 2813830197 1315372741 +conv2d dgrad_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2314925659 2683853778 2983167436 324191835 +conv2d dgrad_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2314925659 2683853778 2983167436 1085373547 +conv2d dgrad_1x15x19x40_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1327011661 768805504 3823519277 1916712483 +conv2d dgrad_1x15x19x40_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1327011661 768805504 3823519277 1584393587 +conv2d dgrad_1x16x24x72_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2223985070 1195875134 2636428043 1173364286 +conv2d dgrad_1x16x24x72_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2223985070 1195875134 2636428043 4109782860 +conv2d dgrad_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2519843766 785253250 2351024902 3316943078 +conv2d dgrad_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2519843766 785253250 2351024902 1065622134 +conv2d dgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4232110637 417350231 2574042419 294608043 +conv2d dgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4232110637 417350231 2574042419 491435109 +conv2d dgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3630316758 2173099720 3438778114 3788164138 +conv2d dgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3630316758 2173099720 3438778114 2104210385 +conv2d dgrad_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 866609547 2416021807 2145192778 2640510765 +conv2d dgrad_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 866609547 2416021807 2145192778 2748647248 +conv2d dgrad_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2538496988 3252222904 535024815 169162784 +conv2d dgrad_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2538496988 3252222904 535024815 3841086721 +conv2d dgrad_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 hnhwc_hnhwc_fnhwc_f_f 2329920945 2330495260 1682007569 215965178 +conv2d wgrad_1x1x1x8_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 558763846 3282100285 1585402383 820104967 +conv2d wgrad_1x1x1x8_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 558763846 3282100285 1585402383 820104967 +conv2d wgrad_1x1x8x8_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2125023038 1152388039 1919973277 4103626856 +conv2d wgrad_1x1x8x8_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2125023038 1152388039 1919973277 4164492975 +conv2d wgrad_1x7x8x8_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2793112873 2527774459 2695215306 1519678771 +conv2d wgrad_1x7x8x8_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2793112873 2527774459 2695215306 4056436284 +conv2d wgrad_1x7x9x8_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 390767101 3488761154 1737428004 2359527095 +conv2d wgrad_1x7x9x8_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 390767101 3488761154 1737428004 2240340004 +conv2d wgrad_2x7x9x8_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2346452629 2242491745 475190714 3363555017 +conv2d wgrad_2x7x9x8_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2346452629 2242491745 475190714 2240937477 +conv2d wgrad_3x7x9x8_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 695385844 1231166805 3030543492 1587793687 +conv2d wgrad_3x7x9x8_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 695385844 1231166805 3030543492 4129044530 +conv2d wgrad_3x7x9x8_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 118714643 1231166805 3002932066 1436246794 +conv2d wgrad_3x7x9x8_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 118714643 1231166805 3002932066 2860692730 +conv2d wgrad_3x7x9x8_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2078642274 1231166805 2704246666 3831504826 +conv2d wgrad_3x7x9x8_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2078642274 1231166805 2704246666 932276705 +conv2d wgrad_1x11x7x8_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2125023038 3952246738 1585402383 584217576 +conv2d wgrad_1x11x7x8_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2125023038 3952246738 1585402383 584217576 +conv2d wgrad_1x11x7x8_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2125023038 3952246738 2695215306 644274899 +conv2d wgrad_1x11x7x8_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2125023038 3952246738 2695215306 2070383610 +conv2d wgrad_1x13x11x8_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2793112873 3269716347 1585402383 2091838742 +conv2d wgrad_1x13x11x8_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2793112873 3269716347 1585402383 2091838742 +conv2d wgrad_1x17x19x8_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 624119942 2208297031 3036494114 1391639360 +conv2d wgrad_1x17x19x8_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 624119942 2208297031 3036494114 2853291206 +conv2d wgrad_1x23x5x8_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 118714643 3638907161 2381193183 4076043307 +conv2d wgrad_1x23x5x8_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 118714643 3638907161 2381193183 162466399 +conv2d wgrad_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3912955093 1748311260 3097125139 1760368998 +conv2d wgrad_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3912955093 1748311260 3097125139 370487040 +conv2d wgrad_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2380180646 950177617 3097125139 2441672462 +conv2d wgrad_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2380180646 950177617 3097125139 4093130692 +conv2d wgrad_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4197955097 3223936144 3268227320 3407045986 +conv2d wgrad_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4197955097 3223936144 3268227320 3327506975 +conv2d wgrad_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1371340519 2413111238 4020828414 3782469663 +conv2d wgrad_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1371340519 2413111238 4020828414 3782469663 +conv2d wgrad_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4064585156 1849062825 2621782130 1997835815 +conv2d wgrad_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4064585156 1849062825 2621782130 2176605786 +conv2d wgrad_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2815270590 1028019427 3062073018 1532225578 +conv2d wgrad_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2815270590 1028019427 3062073018 3175924537 +conv2d wgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2371881721 665408258 4119019906 3070068838 +conv2d wgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2371881721 665408258 4119019906 4095170522 +conv2d wgrad_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2314925659 1329921014 2730633195 1859011130 +conv2d wgrad_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2314925659 1329921014 2730633195 1829382848 +conv2d wgrad_1x15x19x40_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1327011661 654394569 180842421 456267566 +conv2d wgrad_1x15x19x40_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1327011661 654394569 180842421 4065629708 +conv2d wgrad_1x16x24x72_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2223985070 792561864 3949436648 2346117136 +conv2d wgrad_1x16x24x72_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2223985070 792561864 3949436648 3375703263 +conv2d wgrad_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1663038387 2182213461 3596813277 2680766255 +conv2d wgrad_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1663038387 2182213461 3596813277 819444109 +conv2d wgrad_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2518156720 4284293314 3730143675 2330242371 +conv2d wgrad_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2518156720 4284293314 3730143675 2330242371 +conv2d wgrad_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1153452126 1488124595 3162712336 2186928678 +conv2d wgrad_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1153452126 1488124595 3162712336 4038147074 +conv2d wgrad_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3557673531 1324698024 1813717058 1491574211 +conv2d wgrad_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3557673531 1324698024 1813717058 664642450 +conv2d wgrad_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3373735934 1617738198 2081693980 1691976762 +conv2d wgrad_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3373735934 1617738198 2081693980 1352928468 +conv2d wgrad_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1883441731 169231440 2081693980 4154079864 +conv2d wgrad_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1883441731 169231440 2081693980 49240552 +conv2d wgrad_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2212544074 1111691860 1000406280 1708014479 +conv2d wgrad_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2212544074 1111691860 1000406280 2976393539 +conv2d wgrad_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 841696772 3782006390 2081693980 141423768 +conv2d wgrad_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 841696772 3782006390 2081693980 1807012226 +conv2d wgrad_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 841696772 3782006390 2081693980 4065587708 +conv2d wgrad_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 841696772 3782006390 2081693980 3621154478 +conv2d wgrad_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3869576724 4018387624 2099843274 3166971951 +conv2d wgrad_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3869576724 4018387624 2099843274 710495809 +conv2d wgrad_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2106265431 1007447386 3357056235 3446932548 +conv2d wgrad_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2106265431 1007447386 3357056235 344178277 +conv2d wgrad_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2519843766 4040055257 852881505 1633246355 +conv2d wgrad_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2519843766 4040055257 852881505 778858244 +conv2d wgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4232110637 428926092 1217669626 1090782686 +conv2d wgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4232110637 428926092 1217669626 1523455590 +conv2d wgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3630316758 1787889194 2989357662 3134613422 +conv2d wgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3630316758 1787889194 2989357662 1158845444 +conv2d wgrad_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 866609547 857847029 3333097025 3239572160 +conv2d wgrad_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 866609547 857847029 3333097025 2436320376 +conv2d wgrad_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2538496988 3294874067 3443985888 4083335585 +conv2d wgrad_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2538496988 3294874067 3443985888 3302917564 +conv2d wgrad_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 hnhwc_hnhwc_fnhwc_f_f 2329920945 531242570 422976060 2227466455 +conv2d wgrad_1x4x4x12_1x1_8x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 542905940 2411745115 1138813722 1312614816 +conv2d wgrad_1x4x4x12_1x1_8x3x3_pad_h0w0_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 542905940 2411745115 1138813722 4095449788 +conv2d_with_broadcast fprop_1x1x1x8_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 542905940 1152388039 4158385867 734550470 +conv2d_with_broadcast fprop_1x1x1x8_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 542905940 1152388039 4158385867 734550470 +conv2d_with_broadcast fprop_1x1x8x8_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1585175937 2411745115 3960856459 2498726848 +conv2d_with_broadcast fprop_1x1x8x8_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1585175937 2411745115 3960856459 2808936497 +conv2d_with_broadcast fprop_1x7x8x8_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2793112873 4021016722 3736889909 775549951 +conv2d_with_broadcast fprop_1x7x8x8_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2793112873 4021016722 3736889909 999245167 +conv2d_with_broadcast fprop_1x7x9x8_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3892602751 3987447721 1048231784 3578965759 +conv2d_with_broadcast fprop_1x7x9x8_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3892602751 3987447721 1048231784 3635477992 +conv2d_with_broadcast fprop_2x7x9x8_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1647798936 1986983586 2046300236 942258126 +conv2d_with_broadcast fprop_2x7x9x8_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1647798936 1986983586 2046300236 3024889990 +conv2d_with_broadcast fprop_3x7x9x8_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 664571366 3844983048 1063165266 1754355425 +conv2d_with_broadcast fprop_3x7x9x8_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 664571366 3844983048 1063165266 3524504078 +conv2d_with_broadcast fprop_3x7x9x8_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 664571366 741820436 4160104467 1904546220 +conv2d_with_broadcast fprop_3x7x9x8_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 664571366 741820436 4160104467 3380363267 +conv2d_with_broadcast fprop_3x7x9x8_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 664571366 2188679282 2508923337 202634559 +conv2d_with_broadcast fprop_3x7x9x8_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 664571366 2188679282 2508923337 129526502 +conv2d_with_broadcast fprop_1x11x7x8_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3376807219 1152388039 3960856459 687707885 +conv2d_with_broadcast fprop_1x11x7x8_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3376807219 1152388039 3960856459 687707885 +conv2d_with_broadcast fprop_1x11x7x8_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3376807219 4021016722 3960856459 551076377 +conv2d_with_broadcast fprop_1x11x7x8_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3376807219 4021016722 3960856459 2736700182 +conv2d_with_broadcast fprop_1x13x11x8_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3708184432 1152388039 3736889909 3070080445 +conv2d_with_broadcast fprop_1x13x11x8_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3708184432 1152388039 3736889909 3070080445 +conv2d_with_broadcast fprop_1x17x19x8_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3793481427 2784674253 206323080 4139449287 +conv2d_with_broadcast fprop_1x17x19x8_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3793481427 2784674253 206323080 37247821 +conv2d_with_broadcast fprop_1x23x5x8_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 724504571 866495443 4160104467 4166513007 +conv2d_with_broadcast fprop_1x23x5x8_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 724504571 866495443 4160104467 4147817279 +conv2d_with_broadcast fprop_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3696992523 2499748007 798339031 2425875414 +conv2d_with_broadcast fprop_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3696992523 2499748007 798339031 3318636886 +conv2d_with_broadcast fprop_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3999848747 2499748007 3897780943 1663875047 +conv2d_with_broadcast fprop_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3999848747 2499748007 3897780943 433625993 +conv2d_with_broadcast fprop_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1193090006 2131084833 1626339432 2602241257 +conv2d_with_broadcast fprop_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1193090006 2131084833 1626339432 2541313003 +conv2d_with_broadcast fprop_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3123356211 67170595 363851176 2694787359 +conv2d_with_broadcast fprop_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3123356211 67170595 363851176 2694787359 +conv2d_with_broadcast fprop_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 374093114 3867270480 3627748989 1042228997 +conv2d_with_broadcast fprop_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 374093114 3867270480 3627748989 2764787715 +conv2d_with_broadcast fprop_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3983093985 3903005020 2973635008 4245228167 +conv2d_with_broadcast fprop_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3983093985 3903005020 2973635008 2011478650 +conv2d_with_broadcast fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3167361585 764195688 1462303962 1213873906 +conv2d_with_broadcast fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3167361585 764195688 1462303962 3997555577 +conv2d_with_broadcast fprop_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2514717945 2683853778 864525589 2026195616 +conv2d_with_broadcast fprop_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2514717945 2683853778 864525589 1429180818 +conv2d_with_broadcast fprop_1x15x19x40_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1072544153 768805504 2851440397 1223626417 +conv2d_with_broadcast fprop_1x15x19x40_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1072544153 768805504 2851440397 4064809744 +conv2d_with_broadcast fprop_1x16x24x72_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3875284691 1195875134 1294114950 3478825820 +conv2d_with_broadcast fprop_1x16x24x72_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3875284691 1195875134 1294114950 2402092772 +conv2d_with_broadcast fprop_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3179965682 1961598694 1473372001 2976087551 +conv2d_with_broadcast fprop_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3179965682 1961598694 1473372001 2952357814 +conv2d_with_broadcast fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2615784276 3832095556 3136842101 4100457990 +conv2d_with_broadcast fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2615784276 3832095556 3136842101 4100457990 +conv2d_with_broadcast fprop_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 674692187 808380753 3300005124 3291766543 +conv2d_with_broadcast fprop_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 674692187 808380753 3300005124 932781825 +conv2d_with_broadcast fprop_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 961566038 2529610490 3633207144 2483933988 +conv2d_with_broadcast fprop_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 961566038 2529610490 3633207144 1649841999 +conv2d_with_broadcast fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3562503578 4021646304 3183877059 640801373 +conv2d_with_broadcast fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3562503578 4021646304 3183877059 1324440001 +conv2d_with_broadcast fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1144321310 4021646304 79191055 2497200625 +conv2d_with_broadcast fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1144321310 4021646304 79191055 4215716431 +conv2d_with_broadcast fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3153402236 2244058322 751167457 2006726432 +conv2d_with_broadcast fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3153402236 2244058322 751167457 111060226 +conv2d_with_broadcast fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3356996599 4021646304 625876979 803435641 +conv2d_with_broadcast fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3356996599 4021646304 625876979 1867934544 +conv2d_with_broadcast fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3356996599 4021646304 625876979 1900794023 +conv2d_with_broadcast fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3356996599 4021646304 625876979 3680542634 +conv2d_with_broadcast fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4047780986 3796154296 1508665058 4120442081 +conv2d_with_broadcast fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4047780986 3796154296 1508665058 2603142772 +conv2d_with_broadcast fprop_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4196208609 640419443 1000858800 1782212700 +conv2d_with_broadcast fprop_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4196208609 640419443 1000858800 3786504077 +conv2d_with_broadcast fprop_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1494714374 785253250 1035710906 607193837 +conv2d_with_broadcast fprop_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1494714374 785253250 1035710906 1133996987 +conv2d_with_broadcast fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4104811786 417350231 2732854425 3257806996 +conv2d_with_broadcast fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4104811786 417350231 2732854425 3964495721 +conv2d_with_broadcast fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3747780903 2173099720 1359872451 260513555 +conv2d_with_broadcast fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3747780903 2173099720 1359872451 959940766 +conv2d_with_broadcast fprop_1x56x56x64_56x56_256x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1421336162 1007447386 3633207144 3338848009 +conv2d_with_broadcast fprop_1x56x56x64_56x56_256x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1421336162 1007447386 3633207144 3338848009 +conv2d_with_broadcast fprop_1x56x56x64_56x56_64x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1421336162 7870409 1884736347 2440609834 +conv2d_with_broadcast fprop_1x56x56x64_56x56_64x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1421336162 7870409 1884736347 2440609834 +conv2d_with_broadcast fprop_1x56x56x64_56x56_64x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1421336162 2912193732 1884736347 616776505 +conv2d_with_broadcast fprop_1x56x56x64_56x56_64x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1421336162 2912193732 1884736347 2676398591 +conv2d_with_broadcast fprop_1x56x56x256_56x56_64x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3557673531 1007447386 1884736347 2365143548 +conv2d_with_broadcast fprop_1x56x56x256_56x56_64x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3557673531 1007447386 1884736347 2365143548 +conv2d_with_broadcast fprop_1x56x56x256_28x28_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3557673531 3832095556 1694475665 593280269 +conv2d_with_broadcast fprop_1x56x56x256_28x28_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3557673531 3832095556 1694475665 593280269 +conv2d_with_broadcast fprop_1x56x56x256_28x28_128x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3557673531 3855174680 715002355 3155431044 +conv2d_with_broadcast fprop_1x56x56x256_28x28_128x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3557673531 3855174680 715002355 3155431044 +conv2d_with_broadcast fprop_1x28x28x128_28x28_128x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4108139691 2822482016 715002355 2715024840 +conv2d_with_broadcast fprop_1x28x28x128_28x28_128x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4108139691 2822482016 715002355 1827980477 +conv2d_with_broadcast fprop_1x28x28x128_28x28_512x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4108139691 223404009 1694475665 1950498144 +conv2d_with_broadcast fprop_1x28x28x128_28x28_512x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4108139691 223404009 1694475665 1950498144 +conv2d_with_broadcast fprop_1x28x28x512_28x28_128x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 961566038 223404009 715002355 2992665436 +conv2d_with_broadcast fprop_1x28x28x512_28x28_128x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 961566038 223404009 715002355 2992665436 +conv2d_with_broadcast fprop_1x28x28x512_14x14_1024x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 961566038 887849976 1884736347 737409599 +conv2d_with_broadcast fprop_1x28x28x512_14x14_1024x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 961566038 887849976 1884736347 737409599 +conv2d_with_broadcast fprop_1x28x28x512_14x14_256x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 961566038 3832095556 1159201466 1807823289 +conv2d_with_broadcast fprop_1x28x28x512_14x14_256x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 961566038 3832095556 1159201466 1807823289 +conv2d_with_broadcast fprop_1x14x14x256_14x14_256x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2111106579 417350231 1159201466 2790858874 +conv2d_with_broadcast fprop_1x14x14x256_14x14_256x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2111106579 417350231 1159201466 1803950909 +conv2d_with_broadcast fprop_1x14x14x256_14x14_1024x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2111106579 3796154296 1884736347 2211230337 +conv2d_with_broadcast fprop_1x14x14x256_14x14_1024x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2111106579 3796154296 1884736347 2211230337 +conv2d_with_broadcast fprop_1x14x14x1024_14x14_256x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1421336162 3796154296 1159201466 1707831674 +conv2d_with_broadcast fprop_1x14x14x1024_14x14_256x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1421336162 3796154296 1159201466 1707831674 +conv2d_with_broadcast fprop_1x14x14x1024_7x7_2048x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1421336162 1168268462 715002355 3410506082 +conv2d_with_broadcast fprop_1x14x14x1024_7x7_2048x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1421336162 1168268462 715002355 3410506082 +conv2d_with_broadcast fprop_1x14x14x1024_7x7_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1421336162 887849976 760086468 3430817166 +conv2d_with_broadcast fprop_1x14x14x1024_7x7_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1421336162 887849976 760086468 3430817166 +conv2d_with_broadcast fprop_1x7x7x512_7x7_512x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2237641409 718873845 760086468 4057506865 +conv2d_with_broadcast fprop_1x7x7x512_7x7_512x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2237641409 718873845 760086468 1139160912 +conv2d_with_broadcast fprop_1x7x7x512_7x7_2048x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2237641409 1971906017 715002355 3080563386 +conv2d_with_broadcast fprop_1x7x7x512_7x7_2048x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2237641409 1971906017 715002355 3080563386 +conv2d_with_broadcast fprop_1x7x7x2048_7x7_512x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4108139691 1971906017 760086468 1857427510 +conv2d_with_broadcast fprop_1x7x7x2048_7x7_512x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4108139691 1971906017 760086468 1857427510 +conv2d_with_broadcast fprop_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2475183841 2416021807 2396614489 2728011042 +conv2d_with_broadcast fprop_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2475183841 2416021807 2396614489 48736270 +conv2d_with_broadcast fprop_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 835429033 3252222904 812624363 3532893716 +conv2d_with_broadcast fprop_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 835429033 3252222904 812624363 385208528 +conv2d_with_broadcast fprop_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 hnhwc_hnhwc_hnhwc_f_f 83915378 2330495260 877009950 1683572655 +conv2d_with_reduction fprop_1x1x1x8_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 542905940 1152388039 4158385867 734550470 +conv2d_with_reduction fprop_1x1x1x8_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 542905940 1152388039 4158385867 734550470 +conv2d_with_reduction fprop_1x1x8x8_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1585175937 2411745115 3960856459 2498726848 +conv2d_with_reduction fprop_1x1x8x8_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1585175937 2411745115 3960856459 2808936497 +conv2d_with_reduction fprop_1x7x8x8_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2793112873 4021016722 3736889909 775549951 +conv2d_with_reduction fprop_1x7x8x8_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2793112873 4021016722 3736889909 999245167 +conv2d_with_reduction fprop_1x7x9x8_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3892602751 3987447721 1048231784 3578965759 +conv2d_with_reduction fprop_1x7x9x8_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3892602751 3987447721 1048231784 3635477992 +conv2d_with_reduction fprop_2x7x9x8_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1647798936 1986983586 2046300236 942258126 +conv2d_with_reduction fprop_2x7x9x8_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1647798936 1986983586 2046300236 3024889990 +conv2d_with_reduction fprop_3x7x9x8_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 664571366 3844983048 1063165266 1754355425 +conv2d_with_reduction fprop_3x7x9x8_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 664571366 3844983048 1063165266 3524504078 +conv2d_with_reduction fprop_3x7x9x8_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 664571366 741820436 4160104467 1904546220 +conv2d_with_reduction fprop_3x7x9x8_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 664571366 741820436 4160104467 3380363267 +conv2d_with_reduction fprop_3x7x9x8_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 664571366 2188679282 2508923337 202634559 +conv2d_with_reduction fprop_3x7x9x8_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 664571366 2188679282 2508923337 129526502 +conv2d_with_reduction fprop_1x11x7x8_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3376807219 1152388039 3960856459 687707885 +conv2d_with_reduction fprop_1x11x7x8_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3376807219 1152388039 3960856459 687707885 +conv2d_with_reduction fprop_1x11x7x8_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3376807219 4021016722 3960856459 551076377 +conv2d_with_reduction fprop_1x11x7x8_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3376807219 4021016722 3960856459 2736700182 +conv2d_with_reduction fprop_1x13x11x8_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3708184432 1152388039 3736889909 3070080445 +conv2d_with_reduction fprop_1x13x11x8_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3708184432 1152388039 3736889909 3070080445 +conv2d_with_reduction fprop_1x17x19x8_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3793481427 2784674253 206323080 4139449287 +conv2d_with_reduction fprop_1x17x19x8_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3793481427 2784674253 206323080 37247821 +conv2d_with_reduction fprop_1x23x5x8_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 724504571 866495443 4160104467 4166513007 +conv2d_with_reduction fprop_1x23x5x8_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 724504571 866495443 4160104467 4147817279 +conv2d_with_reduction fprop_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3696992523 2499748007 798339031 2425875414 +conv2d_with_reduction fprop_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3696992523 2499748007 798339031 3318636886 +conv2d_with_reduction fprop_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3999848747 2499748007 3897780943 1663875047 +conv2d_with_reduction fprop_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3999848747 2499748007 3897780943 433625993 +conv2d_with_reduction fprop_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1193090006 2131084833 1626339432 2602241257 +conv2d_with_reduction fprop_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1193090006 2131084833 1626339432 2541313003 +conv2d_with_reduction fprop_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3123356211 67170595 363851176 2694787359 +conv2d_with_reduction fprop_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3123356211 67170595 363851176 2694787359 +conv2d_with_reduction fprop_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 374093114 3867270480 3627748989 1042228997 +conv2d_with_reduction fprop_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 374093114 3867270480 3627748989 2764787715 +conv2d_with_reduction fprop_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3983093985 3903005020 2973635008 4245228167 +conv2d_with_reduction fprop_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3983093985 3903005020 2973635008 2011478650 +conv2d_with_reduction fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3167361585 764195688 1462303962 1213873906 +conv2d_with_reduction fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3167361585 764195688 1462303962 3997555577 +conv2d_with_reduction fprop_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2514717945 2683853778 864525589 2026195616 +conv2d_with_reduction fprop_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2514717945 2683853778 864525589 1429180818 +conv2d_with_reduction fprop_1x15x19x40_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1072544153 768805504 2851440397 1223626417 +conv2d_with_reduction fprop_1x15x19x40_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1072544153 768805504 2851440397 4064809744 +conv2d_with_reduction fprop_1x16x24x72_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3875284691 1195875134 1294114950 3478825820 +conv2d_with_reduction fprop_1x16x24x72_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3875284691 1195875134 1294114950 2402092772 +conv2d_with_reduction fprop_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3179965682 1961598694 1473372001 2976087551 +conv2d_with_reduction fprop_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3179965682 1961598694 1473372001 2952357814 +conv2d_with_reduction fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2615784276 3832095556 3136842101 4100457990 +conv2d_with_reduction fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2615784276 3832095556 3136842101 4100457990 +conv2d_with_reduction fprop_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 674692187 808380753 3300005124 3291766543 +conv2d_with_reduction fprop_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 674692187 808380753 3300005124 932781825 +conv2d_with_reduction fprop_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 961566038 2529610490 3633207144 2483933988 +conv2d_with_reduction fprop_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 961566038 2529610490 3633207144 1649841999 +conv2d_with_reduction fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3562503578 4021646304 3183877059 640801373 +conv2d_with_reduction fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3562503578 4021646304 3183877059 1324440001 +conv2d_with_reduction fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1144321310 4021646304 79191055 2497200625 +conv2d_with_reduction fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1144321310 4021646304 79191055 4215716431 +conv2d_with_reduction fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3153402236 2244058322 751167457 2006726432 +conv2d_with_reduction fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3153402236 2244058322 751167457 111060226 +conv2d_with_reduction fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3356996599 4021646304 625876979 803435641 +conv2d_with_reduction fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3356996599 4021646304 625876979 1867934544 +conv2d_with_reduction fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3356996599 4021646304 625876979 1900794023 +conv2d_with_reduction fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3356996599 4021646304 625876979 3680542634 +conv2d_with_reduction fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4047780986 3796154296 1508665058 4120442081 +conv2d_with_reduction fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4047780986 3796154296 1508665058 2603142772 +conv2d_with_reduction fprop_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4196208609 640419443 1000858800 1782212700 +conv2d_with_reduction fprop_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4196208609 640419443 1000858800 3786504077 +conv2d_with_reduction fprop_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1494714374 785253250 1035710906 607193837 +conv2d_with_reduction fprop_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1494714374 785253250 1035710906 1133996987 +conv2d_with_reduction fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4104811786 417350231 2732854425 3257806996 +conv2d_with_reduction fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4104811786 417350231 2732854425 3964495721 +conv2d_with_reduction fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3747780903 2173099720 1359872451 260513555 +conv2d_with_reduction fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3747780903 2173099720 1359872451 959940766 +conv2d_with_reduction fprop_1x56x56x64_56x56_256x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1421336162 1007447386 3633207144 3338848009 +conv2d_with_reduction fprop_1x56x56x64_56x56_256x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1421336162 1007447386 3633207144 3338848009 +conv2d_with_reduction fprop_1x56x56x64_56x56_64x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1421336162 7870409 1884736347 2440609834 +conv2d_with_reduction fprop_1x56x56x64_56x56_64x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1421336162 7870409 1884736347 2440609834 +conv2d_with_reduction fprop_1x56x56x64_56x56_64x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1421336162 2912193732 1884736347 616776505 +conv2d_with_reduction fprop_1x56x56x64_56x56_64x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1421336162 2912193732 1884736347 2676398591 +conv2d_with_reduction fprop_1x56x56x256_56x56_64x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3557673531 1007447386 1884736347 2365143548 +conv2d_with_reduction fprop_1x56x56x256_56x56_64x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3557673531 1007447386 1884736347 2365143548 +conv2d_with_reduction fprop_1x56x56x256_28x28_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3557673531 3832095556 1694475665 593280269 +conv2d_with_reduction fprop_1x56x56x256_28x28_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3557673531 3832095556 1694475665 593280269 +conv2d_with_reduction fprop_1x56x56x256_28x28_128x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3557673531 3855174680 715002355 3155431044 +conv2d_with_reduction fprop_1x56x56x256_28x28_128x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 3557673531 3855174680 715002355 3155431044 +conv2d_with_reduction fprop_1x28x28x128_28x28_128x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4108139691 2822482016 715002355 2715024840 +conv2d_with_reduction fprop_1x28x28x128_28x28_128x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4108139691 2822482016 715002355 1827980477 +conv2d_with_reduction fprop_1x28x28x128_28x28_512x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4108139691 223404009 1694475665 1950498144 +conv2d_with_reduction fprop_1x28x28x128_28x28_512x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4108139691 223404009 1694475665 1950498144 +conv2d_with_reduction fprop_1x28x28x512_28x28_128x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 961566038 223404009 715002355 2992665436 +conv2d_with_reduction fprop_1x28x28x512_28x28_128x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 961566038 223404009 715002355 2992665436 +conv2d_with_reduction fprop_1x28x28x512_14x14_1024x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 961566038 887849976 1884736347 737409599 +conv2d_with_reduction fprop_1x28x28x512_14x14_1024x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 961566038 887849976 1884736347 737409599 +conv2d_with_reduction fprop_1x28x28x512_14x14_256x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 961566038 3832095556 1159201466 1807823289 +conv2d_with_reduction fprop_1x28x28x512_14x14_256x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 961566038 3832095556 1159201466 1807823289 +conv2d_with_reduction fprop_1x14x14x256_14x14_256x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2111106579 417350231 1159201466 2790858874 +conv2d_with_reduction fprop_1x14x14x256_14x14_256x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2111106579 417350231 1159201466 1803950909 +conv2d_with_reduction fprop_1x14x14x256_14x14_1024x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2111106579 3796154296 1884736347 2211230337 +conv2d_with_reduction fprop_1x14x14x256_14x14_1024x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2111106579 3796154296 1884736347 2211230337 +conv2d_with_reduction fprop_1x14x14x1024_14x14_256x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1421336162 3796154296 1159201466 1707831674 +conv2d_with_reduction fprop_1x14x14x1024_14x14_256x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1421336162 3796154296 1159201466 1707831674 +conv2d_with_reduction fprop_1x14x14x1024_7x7_2048x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1421336162 1168268462 715002355 3410506082 +conv2d_with_reduction fprop_1x14x14x1024_7x7_2048x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1421336162 1168268462 715002355 3410506082 +conv2d_with_reduction fprop_1x14x14x1024_7x7_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1421336162 887849976 760086468 3430817166 +conv2d_with_reduction fprop_1x14x14x1024_7x7_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 1421336162 887849976 760086468 3430817166 +conv2d_with_reduction fprop_1x7x7x512_7x7_512x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2237641409 718873845 760086468 4057506865 +conv2d_with_reduction fprop_1x7x7x512_7x7_512x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2237641409 718873845 760086468 1139160912 +conv2d_with_reduction fprop_1x7x7x512_7x7_2048x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2237641409 1971906017 715002355 3080563386 +conv2d_with_reduction fprop_1x7x7x512_7x7_2048x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2237641409 1971906017 715002355 3080563386 +conv2d_with_reduction fprop_1x7x7x2048_7x7_512x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4108139691 1971906017 760086468 1857427510 +conv2d_with_reduction fprop_1x7x7x2048_7x7_512x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 4108139691 1971906017 760086468 1857427510 +conv2d_with_reduction fprop_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2475183841 2416021807 2396614489 2728011042 +conv2d_with_reduction fprop_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 2475183841 2416021807 2396614489 48736270 +conv2d_with_reduction fprop_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 835429033 3252222904 812624363 3532893716 +conv2d_with_reduction fprop_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_hnhwc_f_f 835429033 3252222904 812624363 385208528 +conv2d_with_reduction fprop_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 hnhwc_hnhwc_hnhwc_f_f 83915378 2330495260 877009950 1683572655 +conv3d wgrad_1x1x3x3x8_13x3_8x1x1_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 260402488 3429884614 1585402383 833795101 +conv3d wgrad_1x1x3x3x8_13x3_8x1x1_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 260402488 3429884614 1585402383 833795101 +conv3d wgrad_1x1x1x8x8_33x8_8x1x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 3289763692 3767246007 1919973277 3027845106 +conv3d wgrad_1x1x1x8x8_33x8_8x1x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 3289763692 3767246007 1919973277 3438501812 +conv3d wgrad_1x8x8x8x8_88x8_8x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 2160711118 237996982 3097125139 3385458653 +conv3d wgrad_1x8x8x8x8_88x8_8x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 2160711118 237996982 3097125139 4067030004 +conv3d wgrad_1x16x16x16x8_1616x16_8x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 4101517777 2117188192 3097125139 2362430426 +conv3d wgrad_1x16x16x16x8_1616x16_8x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 4101517777 2117188192 3097125139 232096215 +conv3d wgrad_1x1x15x19x160_113x14_224x3x6_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 2925080861 1932218676 4010744414 1482409913 +conv3d wgrad_1x1x15x19x160_113x14_224x3x6_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 2925080861 1932218676 4010744414 3679067567 +conv3d wgrad_1x2x1x1x8_11x1_8x1x1_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 2490670429 1753332435 375701898 2590121240 +conv3d wgrad_1x2x1x1x8_11x1_8x1x1_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 2490670429 1753332435 375701898 1128734185 +conv3d wgrad_1x1x7x7x8_15x5_16x3x3_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 2588182096 66500551 2381193183 2086127414 +conv3d wgrad_1x1x7x7x8_15x5_16x3x3_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 2588182096 66500551 2381193183 1043269053 +conv3d wgrad_1x11x15x19x64_1215x20_32x3x6_pad_d1h1w3_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 2938214523 2512139560 1761528323 1783334553 +conv3d wgrad_1x11x15x19x64_1215x20_32x3x6_pad_d1h1w3_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 2938214523 2512139560 1761528323 1179670460 +conv3d wgrad_1x32x32x32x16_1616x16_32x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 795080053 2455664422 3608965793 765434198 +conv3d wgrad_1x32x32x32x16_1616x16_32x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 795080053 2455664422 3608965793 188802405 +conv3d wgrad_1x16x16x16x32_1616x16_32x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 795080053 790945652 2636428043 693887212 +conv3d wgrad_1x16x16x16x32_1616x16_32x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 795080053 790945652 2636428043 384409630 +conv3d wgrad_1x16x16x16x32_88x8_64x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 4101517777 790945652 843165254 3460165373 +conv3d wgrad_1x16x16x16x32_88x8_64x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 4101517777 790945652 843165254 3773819495 +conv3d wgrad_1x8x8x8x64_88x8_64x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 4101517777 2117188192 3823648244 533514635 +conv3d wgrad_1x8x8x8x64_88x8_64x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 4101517777 2117188192 3823648244 1741504291 +conv3d wgrad_1x8x8x8x64_44x4_128x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 1247243134 2117188192 2396130566 737907007 +conv3d wgrad_1x8x8x8x64_44x4_128x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 1247243134 2117188192 2396130566 2545563286 +conv3d wgrad_1x4x4x4x128_44x4_128x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 1247243134 1323666964 3626031696 399985743 +conv3d wgrad_1x4x4x4x128_44x4_128x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 1247243134 1323666964 3626031696 2555400535 +conv3d wgrad_1x8x8x8x128_88x8_128x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 796662877 2017705694 3626031696 2729880203 +conv3d wgrad_1x8x8x8x128_88x8_128x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 796662877 2017705694 3626031696 2506274387 +conv3d wgrad_1x16x16x16x64_1616x16_64x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 1092746437 2070552692 3823648244 244959174 +conv3d wgrad_1x16x16x16x64_1616x16_64x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 1092746437 2070552692 3823648244 488179326 +conv3d wgrad_1x32x32x32x16_1616x16_64x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 1092746437 2455664422 357908110 1536430798 +conv3d wgrad_1x32x32x32x16_1616x16_64x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 1092746437 2455664422 357908110 943604539 +conv3d wgrad_1x16x16x16x32_88x8_128x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 796662877 790945652 4105258568 2484072263 +conv3d wgrad_1x16x16x16x32_88x8_128x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 796662877 790945652 4105258568 1264798027 +conv3d wgrad_1x8x8x8x32_66x6_32x3x3_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_corr_alpha2_beta2 hndhwc_hndhwc_fndhwc_f_f 3757300649 1729998821 2636428043 857756936 diff --git a/test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_tensorop_f32_sm80.txt b/test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_tensorop_f32_sm80.txt new file mode 100644 index 00000000..c4bf06c0 --- /dev/null +++ b/test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_tensorop_f32_sm80.txt @@ -0,0 +1,283 @@ +conv2d fprop_1x1x1x8_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 542905940 1152388039 486967919 3706990238 +conv2d fprop_1x1x1x8_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 542905940 1152388039 486967919 3706990238 +conv2d fprop_1x1x8x8_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1585175937 2411745115 1919973277 506127908 +conv2d fprop_1x1x8x8_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1585175937 2411745115 1919973277 2193864160 +conv2d fprop_1x7x8x8_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2793112873 4021016722 1911021900 3872812830 +conv2d fprop_1x7x8x8_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2793112873 4021016722 1911021900 2445012180 +conv2d fprop_1x7x9x8_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3892602751 3987447721 2963892063 3556328678 +conv2d fprop_1x7x9x8_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3892602751 3987447721 2963892063 2088376980 +conv2d fprop_2x7x9x8_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1647798936 1986983586 3862463878 498933688 +conv2d fprop_2x7x9x8_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1647798936 1986983586 3862463878 2581533408 +conv2d fprop_3x7x9x8_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 664571366 3844983048 373331791 3515334098 +conv2d fprop_3x7x9x8_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 664571366 3844983048 373331791 2566159340 +conv2d fprop_3x7x9x8_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 664571366 741820436 2695215306 4235633178 +conv2d fprop_3x7x9x8_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 664571366 741820436 2695215306 815310604 +conv2d fprop_3x7x9x8_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 664571366 2188679282 2132201986 1043483661 +conv2d fprop_3x7x9x8_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 664571366 2188679282 2132201986 2172013863 +conv2d fprop_1x11x7x8_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3376807219 1152388039 1919973277 1836150764 +conv2d fprop_1x11x7x8_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3376807219 1152388039 1919973277 1836150764 +conv2d fprop_1x11x7x8_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3376807219 4021016722 1919973277 3537245111 +conv2d fprop_1x11x7x8_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3376807219 4021016722 1919973277 1166347001 +conv2d fprop_1x13x11x8_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3708184432 1152388039 1911021900 4188669650 +conv2d fprop_1x13x11x8_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3708184432 1152388039 1911021900 4188669650 +conv2d fprop_1x17x19x8_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3793481427 2784674253 5067603 3709403009 +conv2d fprop_1x17x19x8_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3793481427 2784674253 5067603 3666537740 +conv2d fprop_1x23x5x8_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 724504571 866495443 2695215306 2273666993 +conv2d fprop_1x23x5x8_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 724504571 866495443 2695215306 3277012289 +conv2d fprop_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3696992523 2499748007 2381193183 4256475381 +conv2d fprop_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3696992523 2499748007 2381193183 868016148 +conv2d fprop_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3999848747 2499748007 1038140209 3720347250 +conv2d fprop_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3999848747 2499748007 1038140209 3079806253 +conv2d fprop_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1193090006 2131084833 720746870 1441382904 +conv2d fprop_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1193090006 2131084833 720746870 2400159337 +conv2d fprop_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3123356211 67170595 2290618404 1832291434 +conv2d fprop_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3123356211 67170595 2290618404 1832291434 +conv2d fprop_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 374093114 3867270480 2959096809 3957922181 +conv2d fprop_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 374093114 3867270480 2959096809 855145836 +conv2d fprop_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3983093985 3903005020 613201518 1067564721 +conv2d fprop_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3983093985 3903005020 613201518 3116342099 +conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3167361585 764195688 167104692 3451521719 +conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3167361585 764195688 167104692 2267030515 +conv2d fprop_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2514717945 2683853778 2557576548 3978291544 +conv2d fprop_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2514717945 2683853778 2557576548 2608287914 +conv2d fprop_1x15x19x40_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1072544153 768805504 1993107524 2984226313 +conv2d fprop_1x15x19x40_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1072544153 768805504 1993107524 3008259681 +conv2d fprop_1x16x24x72_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3875284691 1195875134 1041989393 1900995663 +conv2d fprop_1x16x24x72_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3875284691 1195875134 1041989393 392783151 +conv2d fprop_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3179965682 1961598694 1222599802 2151043714 +conv2d fprop_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3179965682 1961598694 1222599802 1013916354 +conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2615784276 3832095556 1454859747 958765281 +conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2615784276 3832095556 1454859747 958765281 +conv2d fprop_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 674692187 808380753 1933240382 879488563 +conv2d fprop_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 674692187 808380753 1933240382 1399508988 +conv2d fprop_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 961566038 2529610490 1966904362 1818870269 +conv2d fprop_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 961566038 2529610490 1966904362 2484388308 +conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3562503578 4021646304 3426919182 4130346493 +conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3562503578 4021646304 3426919182 3486420438 +conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1144321310 4021646304 4185476157 964986272 +conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1144321310 4021646304 4185476157 1800819557 +conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3153402236 2244058322 623381239 4197612113 +conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3153402236 2244058322 623381239 2816226027 +conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3356996599 4021646304 748585111 438913488 +conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3356996599 4021646304 748585111 2812015344 +conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3356996599 4021646304 748585111 307241203 +conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3356996599 4021646304 748585111 3413043496 +conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4047780986 3796154296 3933470176 2042558105 +conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4047780986 3796154296 3933470176 2082898655 +conv2d fprop_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4196208609 640419443 3879567407 3126196765 +conv2d fprop_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4196208609 640419443 3879567407 370340580 +conv2d fprop_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1494714374 785253250 1954437668 16145912 +conv2d fprop_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1494714374 785253250 1954437668 352471766 +conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4104811786 417350231 2268084635 3829320356 +conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4104811786 417350231 2268084635 3317921115 +conv2d fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3747780903 2173099720 1217977439 3831700802 +conv2d fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3747780903 2173099720 1217977439 2066501805 +conv2d fprop_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2475183841 2416021807 90427526 3879607464 +conv2d fprop_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2475183841 2416021807 90427526 3914420497 +conv2d fprop_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 835429033 3252222904 934864596 2610068735 +conv2d fprop_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 835429033 3252222904 934864596 4160630405 +conv2d fprop_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 hnhwc_hnhwc_fnhwc_f_f 83915378 2330495260 722450442 220865236 +conv2d dgrad_1x1x1x8_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 558763846 1152388039 1036082506 2004021933 +conv2d dgrad_1x1x1x8_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 558763846 1152388039 1036082506 2004021933 +conv2d dgrad_1x1x8x8_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2125023038 2411745115 1585402383 692064357 +conv2d dgrad_1x1x8x8_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2125023038 2411745115 1585402383 3191762167 +conv2d dgrad_1x7x8x8_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2793112873 4021016722 1911021900 2493803192 +conv2d dgrad_1x7x8x8_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2793112873 4021016722 1911021900 2915688271 +conv2d dgrad_1x7x9x8_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 390767101 3987447721 311726682 356690874 +conv2d dgrad_1x7x9x8_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 390767101 3987447721 311726682 3600277581 +conv2d dgrad_2x7x9x8_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2346452629 1986983586 1898068767 1480022038 +conv2d dgrad_2x7x9x8_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2346452629 1986983586 1898068767 1482170634 +conv2d dgrad_3x7x9x8_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 695385844 3844983048 3628298618 1684782062 +conv2d dgrad_3x7x9x8_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 695385844 3844983048 3628298618 1312771437 +conv2d dgrad_3x7x9x8_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 118714643 741820436 3628298618 3927854537 +conv2d dgrad_3x7x9x8_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 118714643 741820436 3628298618 3753859236 +conv2d dgrad_3x7x9x8_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2078642274 2188679282 3628298618 331956491 +conv2d dgrad_3x7x9x8_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2078642274 2188679282 3628298618 3514953364 +conv2d dgrad_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1371340519 67170595 1307969125 1500995359 +conv2d dgrad_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1371340519 67170595 1307969125 1500995359 +conv2d dgrad_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2815270590 3903005020 1541598000 4224357357 +conv2d dgrad_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2815270590 3903005020 1541598000 1962390570 +conv2d dgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2371881721 764195688 2813830197 3377586111 +conv2d dgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2371881721 764195688 2813830197 1315372741 +conv2d dgrad_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2314925659 2683853778 2983167436 324191835 +conv2d dgrad_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2314925659 2683853778 2983167436 1085373547 +conv2d dgrad_1x15x19x40_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1327011661 768805504 3823519277 1916712483 +conv2d dgrad_1x15x19x40_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1327011661 768805504 3823519277 1584393587 +conv2d dgrad_1x16x24x72_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2223985070 1195875134 2636428043 1173364286 +conv2d dgrad_1x16x24x72_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2223985070 1195875134 2636428043 4109782860 +conv2d dgrad_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2519843766 785253250 2351024902 3316943078 +conv2d dgrad_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2519843766 785253250 2351024902 1065622134 +conv2d dgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4232110637 417350231 2574042419 294608043 +conv2d dgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4232110637 417350231 2574042419 491435109 +conv2d dgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3630316758 2173099720 3438778114 3788164138 +conv2d dgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3630316758 2173099720 3438778114 2104210385 +conv2d dgrad_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 866609547 2416021807 2145192778 2640510765 +conv2d dgrad_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 866609547 2416021807 2145192778 2748647248 +conv2d dgrad_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2538496988 3252222904 535024815 169162784 +conv2d dgrad_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2538496988 3252222904 535024815 3841086721 +conv2d dgrad_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 hnhwc_hnhwc_fnhwc_f_f 2329920945 2330495260 1682007569 215965178 +conv2d wgrad_1x1x1x8_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 558763846 3282100285 1585402383 820104967 +conv2d wgrad_1x1x1x8_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 558763846 3282100285 1585402383 820104967 +conv2d wgrad_1x1x8x8_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2125023038 1152388039 1919973277 4103626856 +conv2d wgrad_1x1x8x8_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2125023038 1152388039 1919973277 4164492975 +conv2d wgrad_1x7x8x8_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2793112873 2527774459 2695215306 1519678771 +conv2d wgrad_1x7x8x8_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2793112873 2527774459 2695215306 4056436284 +conv2d wgrad_1x7x9x8_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 390767101 3488761154 1737428004 2359527095 +conv2d wgrad_1x7x9x8_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 390767101 3488761154 1737428004 2240340004 +conv2d wgrad_2x7x9x8_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2346452629 2242491745 475190714 3363555017 +conv2d wgrad_2x7x9x8_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2346452629 2242491745 475190714 2240937477 +conv2d wgrad_3x7x9x8_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 695385844 1231166805 3030543492 1587793687 +conv2d wgrad_3x7x9x8_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 695385844 1231166805 3030543492 4129044530 +conv2d wgrad_3x7x9x8_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 118714643 1231166805 3002932066 1436246794 +conv2d wgrad_3x7x9x8_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 118714643 1231166805 3002932066 2860692730 +conv2d wgrad_3x7x9x8_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2078642274 1231166805 2704246666 3831504826 +conv2d wgrad_3x7x9x8_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2078642274 1231166805 2704246666 932276705 +conv2d wgrad_1x11x7x8_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2125023038 3952246738 1585402383 584217576 +conv2d wgrad_1x11x7x8_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2125023038 3952246738 1585402383 584217576 +conv2d wgrad_1x11x7x8_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2125023038 3952246738 2695215306 644274899 +conv2d wgrad_1x11x7x8_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2125023038 3952246738 2695215306 2070383610 +conv2d wgrad_1x13x11x8_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2793112873 3269716347 1585402383 2091838742 +conv2d wgrad_1x13x11x8_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2793112873 3269716347 1585402383 2091838742 +conv2d wgrad_1x17x19x8_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 624119942 2208297031 3036494114 1391639360 +conv2d wgrad_1x17x19x8_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 624119942 2208297031 3036494114 2853291206 +conv2d wgrad_1x23x5x8_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 118714643 3638907161 2381193183 4076043307 +conv2d wgrad_1x23x5x8_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 118714643 3638907161 2381193183 162466399 +conv2d wgrad_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3912955093 1748311260 3097125139 1760368998 +conv2d wgrad_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3912955093 1748311260 3097125139 370487040 +conv2d wgrad_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2380180646 950177617 3097125139 2441672462 +conv2d wgrad_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2380180646 950177617 3097125139 4093130692 +conv2d wgrad_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4197955097 3223936144 3268227320 3407045986 +conv2d wgrad_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4197955097 3223936144 3268227320 3327506975 +conv2d wgrad_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1371340519 2413111238 4020828414 3782469663 +conv2d wgrad_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1371340519 2413111238 4020828414 3782469663 +conv2d wgrad_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4064585156 1849062825 2621782130 1997835815 +conv2d wgrad_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4064585156 1849062825 2621782130 2176605786 +conv2d wgrad_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2815270590 1028019427 3062073018 1532225578 +conv2d wgrad_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2815270590 1028019427 3062073018 3175924537 +conv2d wgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2371881721 665408258 4119019906 3070068838 +conv2d wgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2371881721 665408258 4119019906 4095170522 +conv2d wgrad_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2314925659 1329921014 2730633195 1859011130 +conv2d wgrad_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2314925659 1329921014 2730633195 1829382848 +conv2d wgrad_1x15x19x40_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1327011661 654394569 180842421 456267566 +conv2d wgrad_1x15x19x40_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1327011661 654394569 180842421 4065629708 +conv2d wgrad_1x16x24x72_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2223985070 792561864 3949436648 2346117136 +conv2d wgrad_1x16x24x72_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2223985070 792561864 3949436648 3375703263 +conv2d wgrad_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1663038387 2182213461 3596813277 2680766255 +conv2d wgrad_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1663038387 2182213461 3596813277 819444109 +conv2d wgrad_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2518156720 4284293314 3730143675 2330242371 +conv2d wgrad_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2518156720 4284293314 3730143675 2330242371 +conv2d wgrad_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1153452126 1488124595 3162712336 2186928678 +conv2d wgrad_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1153452126 1488124595 3162712336 4038147074 +conv2d wgrad_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3557673531 1324698024 1813717058 1491574211 +conv2d wgrad_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3557673531 1324698024 1813717058 664642450 +conv2d wgrad_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3373735934 1617738198 2081693980 1691976762 +conv2d wgrad_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3373735934 1617738198 2081693980 1352928468 +conv2d wgrad_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1883441731 169231440 2081693980 4154079864 +conv2d wgrad_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1883441731 169231440 2081693980 49240552 +conv2d wgrad_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2212544074 1111691860 1000406280 1708014479 +conv2d wgrad_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2212544074 1111691860 1000406280 2976393539 +conv2d wgrad_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 841696772 3782006390 2081693980 141423768 +conv2d wgrad_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 841696772 3782006390 2081693980 1807012226 +conv2d wgrad_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 841696772 3782006390 2081693980 4065587708 +conv2d wgrad_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 841696772 3782006390 2081693980 3621154478 +conv2d wgrad_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3869576724 4018387624 2099843274 3166971951 +conv2d wgrad_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3869576724 4018387624 2099843274 710495809 +conv2d wgrad_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2106265431 1007447386 3357056235 3446932548 +conv2d wgrad_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2106265431 1007447386 3357056235 344178277 +conv2d wgrad_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2519843766 4040055257 852881505 1633246355 +conv2d wgrad_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2519843766 4040055257 852881505 778858244 +conv2d wgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4232110637 428926092 1217669626 1090782686 +conv2d wgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4232110637 428926092 1217669626 1523455590 +conv2d wgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3630316758 1787889194 2989357662 3134613422 +conv2d wgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3630316758 1787889194 2989357662 1158845444 +conv2d wgrad_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 866609547 857847029 3333097025 3239572160 +conv2d wgrad_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 866609547 857847029 3333097025 2436320376 +conv2d wgrad_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2538496988 3294874067 3443985888 4083335585 +conv2d wgrad_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2538496988 3294874067 3443985888 3302917564 +conv2d wgrad_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 hnhwc_hnhwc_fnhwc_f_f 2329920945 531242570 422976060 2227466455 +conv2d wgrad_1x4x4x12_1x1_8x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 542905940 2411745115 1138813722 1312614816 +conv2d wgrad_1x4x4x12_1x1_8x3x3_pad_h0w0_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 542905940 2411745115 1138813722 4095449788 +conv2d dgrad_1x11x7x8_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2125023038 1152388039 2564029204 3847371704 +conv2d dgrad_1x11x7x8_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2125023038 1152388039 2564029204 3847371704 +conv2d dgrad_1x11x7x8_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2125023038 4021016722 2564029204 2275490218 +conv2d dgrad_1x11x7x8_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2125023038 4021016722 2564029204 2132854116 +conv2d dgrad_1x13x11x8_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2793112873 1152388039 1676221042 3084298807 +conv2d dgrad_1x13x11x8_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2793112873 1152388039 1676221042 3084298807 +conv2d dgrad_1x17x19x8_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 624119942 2784674253 191365871 3758840577 +conv2d dgrad_1x17x19x8_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 624119942 2784674253 191365871 201424948 +conv2d dgrad_1x23x5x8_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 118714643 866495443 4038686961 1705939247 +conv2d dgrad_1x23x5x8_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 118714643 866495443 4038686961 2657530738 +conv2d dgrad_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3912955093 2499748007 688732356 3362895769 +conv2d dgrad_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3912955093 2499748007 688732356 4133050271 +conv2d dgrad_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2380180646 2499748007 3810393490 1868454698 +conv2d dgrad_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2380180646 2499748007 3810393490 4121127243 +conv2d dgrad_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4197955097 2131084833 2598385656 2382857802 +conv2d dgrad_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4197955097 2131084833 2598385656 3397661960 +conv2d dgrad_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4064585156 3867270480 2925287275 2208597029 +conv2d dgrad_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 4064585156 3867270480 2925287275 2274812992 +conv2d dgrad_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1663038387 1961598694 1379168514 1077915308 +conv2d dgrad_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1663038387 1961598694 1379168514 397243715 +conv2d dgrad_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2518156720 3832095556 2917865891 4107392040 +conv2d dgrad_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2518156720 3832095556 2917865891 4107392040 +conv2d dgrad_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1153452126 808380753 3243383160 254649230 +conv2d dgrad_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1153452126 808380753 3243383160 1326012988 +conv2d dgrad_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3557673531 2529610490 78092922 937037280 +conv2d dgrad_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3557673531 2529610490 78092922 1858025388 +conv2d dgrad_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3373735934 4021646304 2515450069 981269351 +conv2d dgrad_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3373735934 4021646304 2515450069 1054201594 +conv2d dgrad_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1883441731 4021646304 2499255921 2519894620 +conv2d dgrad_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 1883441731 4021646304 2499255921 779628000 +conv2d dgrad_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2212544074 2244058322 2208482181 1483418711 +conv2d dgrad_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2212544074 2244058322 2208482181 4167030585 +conv2d dgrad_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 841696772 4021646304 3459396734 456980924 +conv2d dgrad_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 841696772 4021646304 3459396734 2013696151 +conv2d dgrad_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 841696772 4021646304 3459396734 3583381428 +conv2d dgrad_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 841696772 4021646304 3459396734 3556307685 +conv2d dgrad_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3869576724 3796154296 4084859749 2972648305 +conv2d dgrad_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3869576724 3796154296 4084859749 2483086888 +conv2d dgrad_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2106265431 640419443 843165254 729442897 +conv2d dgrad_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 2106265431 640419443 843165254 1567894631 +conv2d dgrad_1x56x56x8_28x28_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha2_beta2 hnhwc_hnhwc_fnhwc_f_f 3092573237 1152388039 1813717058 3398247359 +conv2d dgrad_1x4x4x12_1x1_8x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 542905940 1189071464 1919973277 257167289 +conv2d dgrad_1x4x4x12_1x1_8x3x3_pad_h0w0_stride_h3w3_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 542905940 1189071464 1919973277 419044577 +conv2d dgrad_1x56x56x12_28x28_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3092573237 1046014840 3129171903 3024531293 +conv2d dgrad_1x56x56x12_28x28_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3092573237 1046014840 3129171903 3024531293 +conv2d dgrad_1x55x55x12_28x28_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3092573237 1046014840 1050854832 1177039304 +conv2d dgrad_1x55x55x12_28x28_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 hnhwc_hnhwc_fnhwc_f_f 3092573237 1046014840 1050854832 1177039304 +conv3d wgrad_1x1x3x3x8_13x3_8x1x1_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 260402488 3429884614 1585402383 833795101 +conv3d wgrad_1x1x3x3x8_13x3_8x1x1_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 260402488 3429884614 1585402383 833795101 +conv3d wgrad_1x1x1x8x8_33x8_8x1x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 3289763692 3767246007 1919973277 3027845106 +conv3d wgrad_1x1x1x8x8_33x8_8x1x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 3289763692 3767246007 1919973277 3438501812 +conv3d wgrad_1x8x8x8x8_88x8_8x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 2160711118 237996982 3097125139 3385458653 +conv3d wgrad_1x8x8x8x8_88x8_8x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 2160711118 237996982 3097125139 4067030004 +conv3d wgrad_1x16x16x16x8_1616x16_8x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 4101517777 2117188192 3097125139 2362430426 +conv3d wgrad_1x16x16x16x8_1616x16_8x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 4101517777 2117188192 3097125139 232096215 +conv3d wgrad_1x1x15x19x160_113x14_224x3x6_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 2925080861 1932218676 4010744414 1482409913 +conv3d wgrad_1x1x15x19x160_113x14_224x3x6_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 2925080861 1932218676 4010744414 3679067567 +conv3d wgrad_1x2x1x1x8_11x1_8x1x1_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 2490670429 1753332435 375701898 2590121240 +conv3d wgrad_1x2x1x1x8_11x1_8x1x1_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 2490670429 1753332435 375701898 1128734185 +conv3d wgrad_1x1x7x7x8_15x5_16x3x3_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 2588182096 66500551 2381193183 2086127414 +conv3d wgrad_1x1x7x7x8_15x5_16x3x3_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 2588182096 66500551 2381193183 1043269053 +conv3d wgrad_1x11x15x19x64_1215x20_32x3x6_pad_d1h1w3_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 2938214523 2512139560 1761528323 1783334553 +conv3d wgrad_1x11x15x19x64_1215x20_32x3x6_pad_d1h1w3_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 2938214523 2512139560 1761528323 1179670460 +conv3d wgrad_1x32x32x32x16_1616x16_32x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 795080053 2455664422 3608965793 765434198 +conv3d wgrad_1x32x32x32x16_1616x16_32x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 795080053 2455664422 3608965793 188802405 +conv3d wgrad_1x16x16x16x32_1616x16_32x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 795080053 790945652 2636428043 693887212 +conv3d wgrad_1x16x16x16x32_1616x16_32x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 795080053 790945652 2636428043 384409630 +conv3d wgrad_1x16x16x16x32_88x8_64x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 4101517777 790945652 843165254 3460165373 +conv3d wgrad_1x16x16x16x32_88x8_64x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 4101517777 790945652 843165254 3773819495 +conv3d wgrad_1x8x8x8x64_88x8_64x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 4101517777 2117188192 3823648244 533514635 +conv3d wgrad_1x8x8x8x64_88x8_64x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 4101517777 2117188192 3823648244 1741504291 +conv3d wgrad_1x8x8x8x64_44x4_128x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 1247243134 2117188192 2396130566 737907007 +conv3d wgrad_1x8x8x8x64_44x4_128x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 1247243134 2117188192 2396130566 2545563286 +conv3d wgrad_1x4x4x4x128_44x4_128x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 1247243134 1323666964 3626031696 399985743 +conv3d wgrad_1x4x4x4x128_44x4_128x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 1247243134 1323666964 3626031696 2555400535 +conv3d wgrad_1x8x8x8x128_88x8_128x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 796662877 2017705694 3626031696 2729880203 +conv3d wgrad_1x8x8x8x128_88x8_128x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 796662877 2017705694 3626031696 2506274387 +conv3d wgrad_1x16x16x16x64_1616x16_64x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 1092746437 2070552692 3823648244 244959174 +conv3d wgrad_1x16x16x16x64_1616x16_64x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 1092746437 2070552692 3823648244 488179326 +conv3d wgrad_1x32x32x32x16_1616x16_64x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 1092746437 2455664422 357908110 1536430798 +conv3d wgrad_1x32x32x32x16_1616x16_64x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 1092746437 2455664422 357908110 943604539 +conv3d wgrad_1x16x16x16x32_88x8_128x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_corr_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 796662877 790945652 4105258568 2484072263 +conv3d wgrad_1x16x16x16x32_88x8_128x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_conv_alpha1_beta0 hndhwc_hndhwc_fndhwc_f_f 796662877 790945652 4105258568 1264798027 +conv3d wgrad_1x8x8x8x32_66x6_32x3x3_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_corr_alpha2_beta2 hndhwc_hndhwc_fndhwc_f_f 3757300649 1729998821 2636428043 857756936 diff --git a/test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_tensorop_f32_tf32_sm80.txt b/test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_tensorop_f32_tf32_sm80.txt new file mode 100644 index 00000000..30531f4c --- /dev/null +++ b/test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_tensorop_f32_tf32_sm80.txt @@ -0,0 +1,310 @@ +conv2d fprop_1x1x1x4_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3717255578 1782345730 486967919 3628559084 +conv2d fprop_1x1x1x4_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3717255578 1782345730 486967919 3628559084 +conv2d fprop_1x1x8x4_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1438136294 118775326 1919973277 3060288390 +conv2d fprop_1x1x8x4_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1438136294 118775326 1919973277 2970039207 +conv2d fprop_1x7x8x4_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2098961808 3028917290 1911021900 2231724975 +conv2d fprop_1x7x8x4_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2098961808 3028917290 1911021900 32368867 +conv2d fprop_1x7x9x4_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2965017607 403875147 2963892063 3617758931 +conv2d fprop_1x7x9x4_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2965017607 403875147 2963892063 1213867461 +conv2d fprop_2x7x9x4_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 667266590 2406505019 3862463878 1032157693 +conv2d fprop_2x7x9x4_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 667266590 2406505019 3862463878 2512484574 +conv2d fprop_3x7x9x4_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2475006452 3635815590 373331791 156090483 +conv2d fprop_3x7x9x4_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2475006452 3635815590 373331791 2125643627 +conv2d fprop_3x7x9x4_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2475006452 511925094 2695215306 1183833128 +conv2d fprop_3x7x9x4_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2475006452 511925094 2695215306 950863078 +conv2d fprop_3x7x9x4_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2475006452 2496949223 2132201986 1645211922 +conv2d fprop_3x7x9x4_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2475006452 2496949223 2132201986 888922489 +conv2d fprop_1x11x7x4_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 997827848 1782345730 1919973277 3242947060 +conv2d fprop_1x11x7x4_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 997827848 1782345730 1919973277 3242947060 +conv2d fprop_1x11x7x4_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 997827848 3028917290 1919973277 2570877648 +conv2d fprop_1x11x7x4_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 997827848 3028917290 1919973277 3601912269 +conv2d fprop_1x13x11x4_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2114904520 1782345730 1911021900 1777629475 +conv2d fprop_1x13x11x4_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2114904520 1782345730 1911021900 1777629475 +conv2d fprop_1x17x19x4_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2323582907 3813900300 5067603 2056042422 +conv2d fprop_1x17x19x4_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2323582907 3813900300 5067603 3581028648 +conv2d fprop_1x23x5x4_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2541524845 3272268888 2695215306 1284148249 +conv2d fprop_1x23x5x4_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2541524845 3272268888 2695215306 529645467 +conv2d fprop_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 185683981 2918738410 2381193183 3146807750 +conv2d fprop_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 185683981 2918738410 2381193183 3919362272 +conv2d fprop_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 588375524 2918738410 1038140209 2259020777 +conv2d fprop_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 588375524 2918738410 1038140209 2073872618 +conv2d fprop_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 637528927 1554396804 720746870 2382111738 +conv2d fprop_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 637528927 1554396804 720746870 1697335701 +conv2d fprop_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 867397053 1472739692 2290618404 3626933660 +conv2d fprop_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 867397053 1472739692 2290618404 3626933660 +conv2d fprop_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3560734409 796697554 2959096809 3337392869 +conv2d fprop_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3560734409 796697554 2959096809 1465273757 +conv2d fprop_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 613937219 2493469875 613201518 2545095524 +conv2d fprop_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 613937219 2493469875 613201518 119102005 +conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3361618670 3613518632 167104692 2075508652 +conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3361618670 3613518632 167104692 249956416 +conv2d fprop_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2077916351 2183908250 2557576548 825424571 +conv2d fprop_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2077916351 2183908250 2557576548 3723250582 +conv2d fprop_1x15x19x36_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3337465330 687027832 1993107524 3917445532 +conv2d fprop_1x15x19x36_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3337465330 687027832 1993107524 723144309 +conv2d fprop_1x16x24x68_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2259030042 474618004 1041989393 4007092054 +conv2d fprop_1x16x24x68_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2259030042 474618004 1041989393 121750431 +conv2d fprop_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 698239287 1381476450 1222599802 2705358355 +conv2d fprop_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 698239287 1381476450 1222599802 1921082534 +conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3178095839 69226238 1454859747 2429064071 +conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3178095839 69226238 1454859747 2429064071 +conv2d fprop_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1986322298 2695937708 1933240382 258792262 +conv2d fprop_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1986322298 2695937708 1933240382 1350961289 +conv2d fprop_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3975148258 1880796449 1966904362 2674132654 +conv2d fprop_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3975148258 1880796449 1966904362 4061823028 +conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 864161797 2648936894 3426919182 1513280885 +conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 864161797 2648936894 3426919182 3304279324 +conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3665852514 2648936894 4185476157 1062588307 +conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3665852514 2648936894 4185476157 1737400068 +conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1720566144 613651087 623381239 3456180201 +conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1720566144 613651087 623381239 612510019 +conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3148446926 2648936894 748585111 1918078890 +conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3148446926 2648936894 748585111 1296506717 +conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3148446926 2648936894 748585111 720321141 +conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3148446926 2648936894 748585111 2250020808 +conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3133916250 3595579175 3933470176 1216324411 +conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3133916250 3595579175 3933470176 1337638334 +conv2d fprop_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 647896480 614033835 3879567407 2187352956 +conv2d fprop_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 647896480 614033835 3879567407 3505172898 +conv2d fprop_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1692261965 2681206785 1954437668 3275330607 +conv2d fprop_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1692261965 2681206785 1954437668 3211342413 +conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 5417052 782710099 2268084635 2574304052 +conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 5417052 782710099 2268084635 959147198 +conv2d fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 81064849 3635880243 1217977439 1759317211 +conv2d fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 81064849 3635880243 1217977439 3077847987 +conv2d fprop_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1369732128 3852936634 90427526 1404446496 +conv2d fprop_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1369732128 3852936634 90427526 4261451781 +conv2d fprop_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1636922998 3114239439 934864596 1375658938 +conv2d fprop_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1636922998 3114239439 934864596 90410819 +conv2d fprop_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 tf32nhwc_tf32nhwc_fnhwc_f_f 2874774314 3318673787 722450442 726679987 +conv2d fprop_1x4x4x12_1x1_8x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 8625275 3518807763 1036082506 3048105666 +conv2d fprop_1x4x4x12_1x1_8x3x3_pad_h0w0_stride_h3w3_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 8625275 3518807763 1036082506 3968624793 +conv2d dgrad_1x1x1x4_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1544571845 1782345730 3890556343 3840696830 +conv2d dgrad_1x1x1x4_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1544571845 1782345730 3890556343 3840696830 +conv2d dgrad_1x1x8x4_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 8625275 118775326 2980124304 2572054323 +conv2d dgrad_1x1x8x4_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 8625275 118775326 2980124304 4006425566 +conv2d dgrad_1x7x8x4_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3304381737 3028917290 802623080 1655787066 +conv2d dgrad_1x7x8x4_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3304381737 3028917290 802623080 2956350850 +conv2d dgrad_1x7x9x4_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2123037179 403875147 2738672641 962312341 +conv2d dgrad_1x7x9x4_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2123037179 403875147 2738672641 3126435132 +conv2d dgrad_2x7x9x4_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2220887882 2406505019 311726682 4061102478 +conv2d dgrad_2x7x9x4_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2220887882 2406505019 311726682 2314369199 +conv2d dgrad_3x7x9x4_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3209955145 3635815590 1146816700 3996872848 +conv2d dgrad_3x7x9x4_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3209955145 3635815590 1146816700 2269688958 +conv2d dgrad_3x7x9x4_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3007342694 511925094 1146816700 1719436727 +conv2d dgrad_3x7x9x4_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3007342694 511925094 1146816700 4236972782 +conv2d dgrad_3x7x9x4_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3553706784 2496949223 1146816700 1307853167 +conv2d dgrad_3x7x9x4_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3553706784 2496949223 1146816700 469937848 +conv2d dgrad_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1430474811 1472739692 1307969125 2041137857 +conv2d dgrad_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1430474811 1472739692 1307969125 2041137857 +conv2d dgrad_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1394549732 2493469875 1541598000 562462849 +conv2d dgrad_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1394549732 2493469875 1541598000 585918261 +conv2d dgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 136395274 3613518632 2813830197 3551637995 +conv2d dgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 136395274 3613518632 2813830197 2284323213 +conv2d dgrad_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 176279613 2183908250 2983167436 866644788 +conv2d dgrad_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 176279613 2183908250 2983167436 253066315 +conv2d dgrad_1x15x19x36_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 4065225089 687027832 1092494080 274183605 +conv2d dgrad_1x15x19x36_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 4065225089 687027832 1092494080 1241514327 +conv2d dgrad_1x16x24x68_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3973136713 474618004 3541920582 2018009151 +conv2d dgrad_1x16x24x68_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3973136713 474618004 3541920582 1303918704 +conv2d dgrad_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3059307623 2681206785 2351024902 941952011 +conv2d dgrad_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3059307623 2681206785 2351024902 3577527602 +conv2d dgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1541852759 782710099 2574042419 2518679986 +conv2d dgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1541852759 782710099 2574042419 3797768014 +conv2d dgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 310867128 3635880243 3438778114 508409341 +conv2d dgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 310867128 3635880243 3438778114 3918768286 +conv2d dgrad_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1053420568 3852936634 2145192778 3649177283 +conv2d dgrad_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1053420568 3852936634 2145192778 2392118609 +conv2d dgrad_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 822175023 3114239439 535024815 4039199216 +conv2d dgrad_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 822175023 3114239439 535024815 2891577765 +conv2d dgrad_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 tf32nhwc_tf32nhwc_fnhwc_f_f 1919899681 3318673787 1682007569 2212069629 +conv2d wgrad_1x1x1x4_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1544571845 2859715967 2980124304 2169312899 +conv2d wgrad_1x1x1x4_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1544571845 2859715967 2980124304 2169312899 +conv2d wgrad_1x1x8x4_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 8625275 1782345730 294738383 1067638259 +conv2d wgrad_1x1x8x4_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 8625275 1782345730 294738383 504959928 +conv2d wgrad_1x7x8x4_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3304381737 4201241820 1588130777 587511439 +conv2d wgrad_1x7x8x4_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3304381737 4201241820 1588130777 3767308084 +conv2d wgrad_1x7x9x4_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2123037179 442597470 3036494114 4124366680 +conv2d wgrad_1x7x9x4_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2123037179 442597470 3036494114 2833785422 +conv2d wgrad_2x7x9x4_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2220887882 347064917 1239438433 1775116239 +conv2d wgrad_2x7x9x4_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2220887882 347064917 1239438433 1288542483 +conv2d wgrad_3x7x9x4_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3209955145 3544266385 4207987780 4054255409 +conv2d wgrad_3x7x9x4_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3209955145 3544266385 4207987780 180834054 +conv2d wgrad_3x7x9x4_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3007342694 3544266385 2381193183 353843599 +conv2d wgrad_3x7x9x4_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3007342694 3544266385 2381193183 1860329513 +conv2d wgrad_3x7x9x4_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3553706784 3544266385 2224388272 347055977 +conv2d wgrad_3x7x9x4_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3553706784 3544266385 2224388272 994788649 +conv2d wgrad_1x11x7x4_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 8625275 4212246537 2980124304 1448608264 +conv2d wgrad_1x11x7x4_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 8625275 4212246537 2980124304 1448608264 +conv2d wgrad_1x11x7x4_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 8625275 4212246537 1588130777 2381531672 +conv2d wgrad_1x11x7x4_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 8625275 4212246537 1588130777 3417974847 +conv2d wgrad_1x13x11x4_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3304381737 1617004893 2980124304 2512582600 +conv2d wgrad_1x13x11x4_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3304381737 1617004893 2980124304 2512582600 +conv2d wgrad_1x17x19x4_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1244986212 3239036557 3840288384 2880620663 +conv2d wgrad_1x17x19x4_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1244986212 3239036557 3840288384 2552969500 +conv2d wgrad_1x23x5x4_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3007342694 292469256 2695215306 3059274099 +conv2d wgrad_1x23x5x4_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3007342694 292469256 2695215306 3436607346 +conv2d wgrad_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3748734767 1457012710 3097125139 3643918743 +conv2d wgrad_1x13x17x8_6x8_24x3x3_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3748734767 1457012710 3097125139 747082267 +conv2d wgrad_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2635354436 2208465189 3097125139 21340657 +conv2d wgrad_1x23x21x8_8x7_24x3x3_pad_h1w1_stride_h3w3_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2635354436 2208465189 3097125139 56227083 +conv2d wgrad_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2145197840 1160827838 3268227320 2781787969 +conv2d wgrad_1x20x24x8_8x10_40x3x3_pad_h3w3_stride_h3w3_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2145197840 1160827838 3268227320 4054149767 +conv2d wgrad_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1430474811 1631191071 4020828414 692686929 +conv2d wgrad_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1430474811 1631191071 4020828414 692686929 +conv2d wgrad_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3718005369 1445988057 2621782130 2833949486 +conv2d wgrad_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3718005369 1445988057 2621782130 3251496823 +conv2d wgrad_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1394549732 268042026 3062073018 220836046 +conv2d wgrad_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1394549732 268042026 3062073018 1707620502 +conv2d wgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 136395274 3160856554 4119019906 338363935 +conv2d wgrad_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 136395274 3160856554 4119019906 738717986 +conv2d wgrad_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 176279613 3166512658 2730633195 4275642741 +conv2d wgrad_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 176279613 3166512658 2730633195 3105473059 +conv2d wgrad_1x15x19x36_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 4065225089 743025951 2871183684 1457368860 +conv2d wgrad_1x15x19x36_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 4065225089 743025951 2871183684 2129608645 +conv2d wgrad_1x16x24x68_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3973136713 3690950270 995921099 1290832901 +conv2d wgrad_1x16x24x68_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3973136713 3690950270 995921099 154477589 +conv2d wgrad_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 453168706 2270954829 3596813277 3377048584 +conv2d wgrad_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 453168706 2270954829 3596813277 412509722 +conv2d wgrad_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2806952902 2924638290 3730143675 2712417789 +conv2d wgrad_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2806952902 2924638290 3730143675 2712417789 +conv2d wgrad_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3121139790 3364591286 3162712336 2691220362 +conv2d wgrad_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3121139790 3364591286 3162712336 3441232209 +conv2d wgrad_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2888814356 1897929139 1813717058 3953491679 +conv2d wgrad_1x224x224x8_112x112_64x7x7_pad_h3w3_stride_h2w2_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2888814356 1897929139 1813717058 1261891081 +conv2d wgrad_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 610508213 451878551 2081693980 3439473075 +conv2d wgrad_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 610508213 451878551 2081693980 3352279175 +conv2d wgrad_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 348294340 312331117 2081693980 3359165930 +conv2d wgrad_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 348294340 312331117 2081693980 1942009480 +conv2d wgrad_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 718804073 2164859727 1000406280 332157749 +conv2d wgrad_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 718804073 2164859727 1000406280 2601063004 +conv2d wgrad_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2287428715 1869820720 2081693980 4100031453 +conv2d wgrad_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2287428715 1869820720 2081693980 1919107176 +conv2d wgrad_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2287428715 1869820720 2081693980 1618717912 +conv2d wgrad_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2287428715 1869820720 2081693980 221251170 +conv2d wgrad_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 579355209 3754887563 2099843274 1752033731 +conv2d wgrad_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 579355209 3754887563 2099843274 2254410747 +conv2d wgrad_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2276312507 1260894714 3357056235 4220059751 +conv2d wgrad_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 2276312507 1260894714 3357056235 3334646036 +conv2d wgrad_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3059307623 562108865 852881505 1729883415 +conv2d wgrad_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 3059307623 562108865 852881505 3112064590 +conv2d wgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1541852759 341839800 1217669626 2313445327 +conv2d wgrad_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1541852759 341839800 1217669626 2378687243 +conv2d wgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 310867128 2038652539 2989357662 2523233417 +conv2d wgrad_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 310867128 2038652539 2989357662 845687444 +conv2d wgrad_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1053420568 2110244091 3333097025 2967627788 +conv2d wgrad_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 1053420568 2110244091 3333097025 285381561 +conv2d wgrad_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 822175023 71186875 3443985888 4252958697 +conv2d wgrad_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 tf32nhwc_tf32nhwc_fnhwc_f_f 822175023 71186875 3443985888 160172027 +conv2d wgrad_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 tf32nhwc_tf32nhwc_fnhwc_f_f 1919899681 1064146348 422976060 2522704893 +conv3d fprop_1x1x3x3x4_13x3_8x1x1_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 2319502486 1782345730 486967919 4136734195 +conv3d fprop_1x1x3x3x4_13x3_8x1x1_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 2319502486 1782345730 486967919 4136734195 +conv3d fprop_1x1x1x8x4_33x8_8x1x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1438136294 118775326 2695215306 2927015709 +conv3d fprop_1x1x1x8x4_33x8_8x1x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1438136294 118775326 2695215306 4170943443 +conv3d fprop_1x8x8x8x4_88x8_8x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 407326717 3518807763 3608965793 698355669 +conv3d fprop_1x8x8x8x4_88x8_8x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 407326717 3518807763 3608965793 5045356 +conv3d fprop_1x16x16x16x4_1616x16_8x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 647896480 3518807763 4105258568 4103291466 +conv3d fprop_1x16x16x16x4_1616x16_8x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 647896480 3518807763 4105258568 4029231659 +conv3d fprop_1x1x15x19x160_113x14_224x3x6_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 867397053 1428724777 2090640390 1215385964 +conv3d fprop_1x1x15x19x160_113x14_224x3x6_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 867397053 1428724777 2090640390 498496015 +conv3d fprop_1x2x1x1x4_11x1_8x1x1_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 238174184 1674520437 1036082506 887717017 +conv3d fprop_1x2x1x1x4_11x1_8x1x1_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 238174184 1674520437 1036082506 587915145 +conv3d fprop_1x1x7x7x4_15x5_16x3x3_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 3889952025 3272268888 898368197 4271884702 +conv3d fprop_1x1x7x7x4_15x5_16x3x3_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 3889952025 3272268888 898368197 3556157853 +conv3d fprop_1x11x15x19x64_1215x20_32x3x6_pad_d1h1w3_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 3187517693 3677644254 281930990 1021147836 +conv3d fprop_1x11x15x19x64_1215x20_32x3x6_pad_d1h1w3_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 3187517693 3677644254 281930990 3524380466 +conv3d fprop_1x32x32x32x16_1616x16_32x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 2247510726 2370687016 3730143675 1238812141 +conv3d fprop_1x32x32x32x16_1616x16_32x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 2247510726 2370687016 3730143675 2970038618 +conv3d fprop_1x16x16x16x32_1616x16_32x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1193322564 4122056936 3730143675 1023049729 +conv3d fprop_1x16x16x16x32_1616x16_32x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1193322564 4122056936 3730143675 109265270 +conv3d fprop_1x16x16x16x32_88x8_64x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1193322564 1260894714 4105258568 1321339181 +conv3d fprop_1x16x16x16x32_88x8_64x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1193322564 1260894714 4105258568 1115472017 +conv3d fprop_1x8x8x8x64_88x8_64x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 4051538848 3794173544 4105258568 3595166297 +conv3d fprop_1x8x8x8x64_88x8_64x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 4051538848 3794173544 4105258568 4158879376 +conv3d fprop_1x8x8x8x64_44x4_128x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 4051538848 1811285868 357908110 1351907827 +conv3d fprop_1x8x8x8x64_44x4_128x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 4051538848 1811285868 357908110 2935470866 +conv3d fprop_1x4x4x4x128_44x4_128x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 2177662229 3798825666 357908110 857966805 +conv3d fprop_1x4x4x4x128_44x4_128x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 2177662229 3798825666 357908110 3012212889 +conv3d fprop_1x8x8x8x128_88x8_128x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1329805353 3798825666 2396130566 1514562293 +conv3d fprop_1x8x8x8x128_88x8_128x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1329805353 3798825666 2396130566 186849532 +conv3d fprop_1x16x16x16x64_1616x16_64x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 2614793891 3794173544 2099843274 28708521 +conv3d fprop_1x16x16x16x64_1616x16_64x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 2614793891 3794173544 2099843274 1423759885 +conv3d fprop_1x32x32x32x16_1616x16_64x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 2247510726 1297934566 2099843274 4045723120 +conv3d fprop_1x32x32x32x16_1616x16_64x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 2247510726 1297934566 2099843274 3875049477 +conv3d fprop_1x16x16x16x32_88x8_128x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1193322564 2962142380 2396130566 2601052002 +conv3d fprop_1x16x16x16x32_88x8_128x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1193322564 2962142380 2396130566 2310738198 +conv3d fprop_1x8x8x8x32_66x6_32x3x3_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_corr_alpha2_beta2 tf32ndhwc_tf32ndhwc_fndhwc_f_f 647896480 4122056936 925123690 251219599 +conv3d dgrad_1x1x3x3x4_13x3_8x1x1_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1544571845 1782345730 2631899086 4172520459 +conv3d dgrad_1x1x3x3x4_13x3_8x1x1_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1544571845 1782345730 2631899086 4172520459 +conv3d dgrad_1x1x1x8x4_33x8_8x1x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 3007342694 118775326 2980124304 2325666174 +conv3d dgrad_1x1x1x8x4_33x8_8x1x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 3007342694 118775326 2980124304 3237412583 +conv3d dgrad_1x8x8x8x4_88x8_8x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 838246671 3518807763 852881505 655607058 +conv3d dgrad_1x8x8x8x4_88x8_8x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 838246671 3518807763 852881505 2254239777 +conv3d dgrad_1x16x16x16x4_1616x16_8x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 4051538848 3518807763 843165254 3325209335 +conv3d dgrad_1x16x16x16x4_1616x16_8x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 4051538848 3518807763 843165254 2627823804 +conv3d dgrad_1x1x15x19x160_113x14_224x3x6_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1644441141 1428724777 1307969125 2400954527 +conv3d dgrad_1x1x15x19x160_113x14_224x3x6_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1644441141 1428724777 1307969125 3666407510 +conv3d dgrad_1x2x1x1x4_11x1_8x1x1_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 238174184 1674520437 1036082506 2949975932 +conv3d dgrad_1x2x1x1x4_11x1_8x1x1_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 238174184 1674520437 1036082506 1943004100 +conv3d dgrad_1x1x7x7x4_15x5_16x3x3_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 3595246799 3272268888 4235871427 2941526336 +conv3d dgrad_1x1x7x7x4_15x5_16x3x3_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 3595246799 3272268888 4235871427 2946859880 +conv3d dgrad_1x11x15x19x64_1215x20_32x3x6_pad_d1h1w3_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 4058682611 3677644254 1887901192 1540875533 +conv3d dgrad_1x11x15x19x64_1215x20_32x3x6_pad_d1h1w3_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 4058682611 3677644254 1887901192 2224057566 +conv3d dgrad_1x32x32x32x16_1616x16_32x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1193322564 2370687016 3843555714 936632720 +conv3d dgrad_1x32x32x32x16_1616x16_32x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1193322564 2370687016 3843555714 2508620825 +conv3d dgrad_1x16x16x16x32_1616x16_32x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1193322564 4122056936 3730143675 1092073219 +conv3d dgrad_1x16x16x16x32_1616x16_32x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1193322564 4122056936 3730143675 1665031504 +conv3d dgrad_1x16x16x16x32_88x8_64x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 4051538848 1260894714 3730143675 3800549697 +conv3d dgrad_1x16x16x16x32_88x8_64x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 4051538848 1260894714 3730143675 138980222 +conv3d dgrad_1x8x8x8x64_88x8_64x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 4051538848 3794173544 4105258568 2796017159 +conv3d dgrad_1x8x8x8x64_88x8_64x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 4051538848 3794173544 4105258568 3061070136 +conv3d dgrad_1x8x8x8x64_44x4_128x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 2177662229 1811285868 4105258568 3835960274 +conv3d dgrad_1x8x8x8x64_44x4_128x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 2177662229 1811285868 4105258568 1306995538 +conv3d dgrad_1x4x4x4x128_44x4_128x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 2177662229 3798825666 357908110 3705914599 +conv3d dgrad_1x4x4x4x128_44x4_128x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 2177662229 3798825666 357908110 4247587642 +conv3d dgrad_1x8x8x8x128_88x8_128x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1329805353 3798825666 2396130566 4140791836 +conv3d dgrad_1x8x8x8x128_88x8_128x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1329805353 3798825666 2396130566 3667453924 +conv3d dgrad_1x16x16x16x64_1616x16_64x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 2614793891 3794173544 2099843274 1201571258 +conv3d dgrad_1x16x16x16x64_1616x16_64x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 2614793891 3794173544 2099843274 2360671465 +conv3d dgrad_1x32x32x32x16_1616x16_64x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 2614793891 1297934566 3843555714 2551458848 +conv3d dgrad_1x32x32x32x16_1616x16_64x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 2614793891 1297934566 3843555714 2147657107 +conv3d dgrad_1x16x16x16x32_88x8_128x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1329805353 2962142380 3730143675 2771085644 +conv3d dgrad_1x16x16x16x32_88x8_128x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1329805353 2962142380 3730143675 3242056610 +conv3d dgrad_1x8x8x8x32_66x6_32x3x3_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_corr_alpha2_beta2 tf32ndhwc_tf32ndhwc_fndhwc_f_f 230909806 4122056936 843165254 1420345119 +conv3d wgrad_1x1x3x3x4_13x3_8x1x1_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1544571845 1854804847 2980124304 2713669702 +conv3d wgrad_1x1x3x3x4_13x3_8x1x1_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1544571845 1854804847 2980124304 2713669702 +conv3d wgrad_1x1x1x8x4_33x8_8x1x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 3007342694 1782345730 294738383 3475727547 +conv3d wgrad_1x1x1x8x4_33x8_8x1x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 3007342694 1782345730 294738383 947267570 +conv3d wgrad_1x8x8x8x4_88x8_8x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 838246671 2681206785 1138813722 824075861 +conv3d wgrad_1x8x8x8x4_88x8_8x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 838246671 2681206785 1138813722 82060253 +conv3d wgrad_1x16x16x16x4_1616x16_8x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 4051538848 1260894714 1138813722 3511262600 +conv3d wgrad_1x16x16x16x4_1616x16_8x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 4051538848 1260894714 1138813722 4001643207 +conv3d wgrad_1x1x15x19x160_113x14_224x3x6_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1644441141 1631191071 4010744414 942007202 +conv3d wgrad_1x1x15x19x160_113x14_224x3x6_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1644441141 1631191071 4010744414 4225324524 +conv3d wgrad_1x2x1x1x4_11x1_8x1x1_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 238174184 2768167245 1585402383 4211777251 +conv3d wgrad_1x2x1x1x4_11x1_8x1x1_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 238174184 2768167245 1585402383 3750079405 +conv3d wgrad_1x1x7x7x4_15x5_16x3x3_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 3595246799 1159024946 2695215306 4166286345 +conv3d wgrad_1x1x7x7x4_15x5_16x3x3_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 3595246799 1159024946 2695215306 614099477 +conv3d wgrad_1x11x15x19x64_1215x20_32x3x6_pad_d1h1w3_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 4058682611 2837532843 1761528323 549885623 +conv3d wgrad_1x11x15x19x64_1215x20_32x3x6_pad_d1h1w3_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 4058682611 2837532843 1761528323 2965474394 +conv3d wgrad_1x32x32x32x16_1616x16_32x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1193322564 3423607562 3608965793 2798805760 +conv3d wgrad_1x32x32x32x16_1616x16_32x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1193322564 3423607562 3608965793 1941621355 +conv3d wgrad_1x16x16x16x32_1616x16_32x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1193322564 69226238 2636428043 2649061923 +conv3d wgrad_1x16x16x16x32_1616x16_32x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1193322564 69226238 2636428043 3901259858 +conv3d wgrad_1x16x16x16x32_88x8_64x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 4051538848 69226238 843165254 902125153 +conv3d wgrad_1x16x16x16x32_88x8_64x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 4051538848 69226238 843165254 2897689664 +conv3d wgrad_1x8x8x8x64_88x8_64x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 4051538848 2962142380 3823648244 1458256611 +conv3d wgrad_1x8x8x8x64_88x8_64x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 4051538848 2962142380 3823648244 479658980 +conv3d wgrad_1x8x8x8x64_44x4_128x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 2177662229 2962142380 2396130566 1553657582 +conv3d wgrad_1x8x8x8x64_44x4_128x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 2177662229 2962142380 2396130566 1843818935 +conv3d wgrad_1x4x4x4x128_44x4_128x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 2177662229 1297934566 3626031696 3309140977 +conv3d wgrad_1x4x4x4x128_44x4_128x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 2177662229 1297934566 3626031696 3877168474 +conv3d wgrad_1x8x8x8x128_88x8_128x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1329805353 1811285868 3626031696 2959655816 +conv3d wgrad_1x8x8x8x128_88x8_128x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1329805353 1811285868 3626031696 2478179322 +conv3d wgrad_1x16x16x16x64_1616x16_64x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 2614793891 3595579175 3823648244 1409929324 +conv3d wgrad_1x16x16x16x64_1616x16_64x3x3_pad_d1h1w1_stride_d1h1w1_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 2614793891 3595579175 3823648244 3283749403 +conv3d wgrad_1x32x32x32x16_1616x16_64x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 2614793891 3423607562 357908110 3763482552 +conv3d wgrad_1x32x32x32x16_1616x16_64x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 2614793891 3423607562 357908110 984484147 +conv3d wgrad_1x16x16x16x32_88x8_128x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_corr_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1329805353 69226238 4105258568 3342929921 +conv3d wgrad_1x16x16x16x32_88x8_128x2x2_pad_d0h0w0_stride_d2h2w2_dil_d1h1w1_conv_alpha1_beta0 tf32ndhwc_tf32ndhwc_fndhwc_f_f 1329805353 69226238 4105258568 893600736 +conv3d wgrad_1x8x8x8x32_66x6_32x3x3_pad_d0h0w0_stride_d1h1w1_dil_d1h1w1_corr_alpha2_beta2 tf32ndhwc_tf32ndhwc_fndhwc_f_f 230909806 1260894714 2636428043 2198733997 diff --git a/test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_tensorop_s32.txt b/test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_tensorop_s32.txt new file mode 100644 index 00000000..a7d35604 --- /dev/null +++ b/test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_tensorop_s32.txt @@ -0,0 +1,138 @@ +conv2d fprop_1x1x1x16_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 776637956 2909557486 4209557191 2628681889 +conv2d fprop_1x1x1x16_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 776637956 2909557486 4209557191 2628681889 +conv2d fprop_1x1x8x16_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 4082436958 2547545036 3324685530 3597206714 +conv2d fprop_1x1x8x16_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 4082436958 2547545036 3324685530 1585522588 +conv2d fprop_1x7x8x16_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 3137973937 2767077290 1907827554 2051304566 +conv2d fprop_1x7x8x16_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 3137973937 2767077290 1907827554 374967253 +conv2d fprop_1x7x9x16_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 655725974 3592472200 506563703 1674820694 +conv2d fprop_1x7x9x16_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 655725974 3592472200 506563703 3975853358 +conv2d fprop_2x7x9x16_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 725233472 3683589771 493841232 304963909 +conv2d fprop_2x7x9x16_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 725233472 3683589771 493841232 564533608 +conv2d fprop_3x7x9x16_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 2099667058 3573646278 1206332722 7205531 +conv2d fprop_3x7x9x16_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 2099667058 3573646278 1206332722 3794965864 +conv2d fprop_3x7x9x16_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 2099667058 3329730583 1710685046 3598855498 +conv2d fprop_3x7x9x16_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 2099667058 3329730583 1710685046 3145627823 +conv2d fprop_3x7x9x16_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 2099667058 817147799 3026961444 1414034801 +conv2d fprop_3x7x9x16_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 2099667058 817147799 3026961444 2252310527 +conv2d fprop_1x11x7x16_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 1291281134 2909557486 3324685530 2255601108 +conv2d fprop_1x11x7x16_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 1291281134 2909557486 3324685530 2255601108 +conv2d fprop_1x11x7x16_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 1291281134 2767077290 3324685530 1579121635 +conv2d fprop_1x11x7x16_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 1291281134 2767077290 3324685530 1047334046 +conv2d fprop_1x13x11x16_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 1759229365 2909557486 1907827554 2852376056 +conv2d fprop_1x13x11x16_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 1759229365 2909557486 1907827554 2852376056 +conv2d fprop_1x17x19x16_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 1263288900 1744815467 2720280253 2089128292 +conv2d fprop_1x17x19x16_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 1263288900 1744815467 2720280253 638322597 +conv2d fprop_1x23x5x16_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 1511055498 474572393 1710685046 2879179680 +conv2d fprop_1x23x5x16_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 1511055498 474572393 1710685046 795194450 +conv2d fprop_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 3754468132 3475016041 1588474612 3183183520 +conv2d fprop_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 3754468132 3475016041 1588474612 3183183520 +conv2d fprop_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 3716123936 1149399708 1907179330 1630548311 +conv2d fprop_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 3716123936 1149399708 1907179330 1880077064 +conv2d fprop_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 1375227342 2508824805 320006437 1729280118 +conv2d fprop_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 1375227342 2508824805 320006437 2905577482 +conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 4026202008 1287566710 1898545094 1486216406 +conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 4026202008 1287566710 1898545094 2323099205 +conv2d fprop_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 2844675317 4066343306 4242252933 2049528680 +conv2d fprop_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 2844675317 4066343306 4242252933 823670492 +conv2d fprop_1x15x19x48_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 361106642 1568764056 3184621425 2460081734 +conv2d fprop_1x15x19x48_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 361106642 1568764056 3184621425 288784852 +conv2d fprop_1x16x24x80_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 4020962878 2388318674 3148695436 4074620609 +conv2d fprop_1x16x24x80_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 4020962878 2388318674 3148695436 1605237591 +conv2d fprop_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 3423715025 4214947574 1328605014 2088140151 +conv2d fprop_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 3423715025 4214947574 1328605014 4055269725 +conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 386898678 2914579947 3231726095 404674518 +conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 386898678 2914579947 3231726095 404674518 +conv2d fprop_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 401261840 1032488977 2856763908 1755771098 +conv2d fprop_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 401261840 1032488977 2856763908 2984060885 +conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 3490742496 4290221705 2240369427 3419649411 +conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 3490742496 4290221705 2240369427 2709709134 +conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 674972000 4290221705 936455799 4083729015 +conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 674972000 4290221705 936455799 521087104 +conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 3559114385 611927961 3672240685 3952711909 +conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 3559114385 611927961 3672240685 831276460 +conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 2456458966 4290221705 2301484646 1300821064 +conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 2456458966 4290221705 2301484646 2700027247 +conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 2456458966 4290221705 2301484646 4025827387 +conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 2456458966 4290221705 2301484646 2458486788 +conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 1536613554 3309037265 3087350413 4020856657 +conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 1536613554 3309037265 3087350413 3244570506 +conv2d fprop_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 2165036913 3329730583 612960361 4168246341 +conv2d fprop_1x32x32x16_6x16_32x3x3_pad_h1w1_stride_h6w2_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 2165036913 3329730583 612960361 2987573285 +conv2d fprop_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 2944909811 3592472200 1605228522 3887013673 +conv2d fprop_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 2944909811 3592472200 1605228522 1802699064 +conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 747123297 2666823963 4290039622 1215151709 +conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 747123297 2666823963 4290039622 3031516069 +conv2d fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 1618850980 1182471718 961406695 372399325 +conv2d fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 1618850980 1182471718 961406695 2003082608 +conv2d fprop_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 3643206226 3835297279 520645775 3465036547 +conv2d fprop_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 3643206226 3835297279 520645775 4223629118 +conv2d fprop_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 431903943 1729040908 358345442 4245746659 +conv2d fprop_1x233x35x48_233x35_24x7x5_pad_h3w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nhwc_s8nhwc_inhwc_i_f 431903943 1729040908 358345442 1224340034 +conv2d fprop_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 s8nhwc_s8nhwc_inhwc_i_f 4121454290 3081115403 2255344759 3879663798 +conv2d fprop_1x1x1x32_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 1274533679 3493243764 4209557191 1794065453 +conv2d fprop_1x1x1x32_3x3_8x1x1_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 1274533679 3493243764 4209557191 1794065453 +conv2d fprop_1x1x8x32_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 3620204349 4039599338 3324685530 2607282269 +conv2d fprop_1x1x8x32_3x8_8x1x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 3620204349 4039599338 3324685530 342506690 +conv2d fprop_1x7x8x32_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 4077428439 1945710027 1907827554 3710378399 +conv2d fprop_1x7x8x32_7x8_8x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 4077428439 1945710027 1907827554 1781834480 +conv2d fprop_1x7x9x32_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 2705691693 98103785 506563703 4119696922 +conv2d fprop_1x7x9x32_6x8_8x4x4_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 2705691693 98103785 506563703 2431904689 +conv2d fprop_2x7x9x32_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 3097300362 3751678476 493841232 4001455828 +conv2d fprop_2x7x9x32_5x7_8x5x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 3097300362 3751678476 493841232 1571483249 +conv2d fprop_3x7x9x32_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 1866766242 2133678546 1206332722 1403098053 +conv2d fprop_3x7x9x32_4x7_8x6x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 1866766242 2133678546 1206332722 1404854483 +conv2d fprop_3x7x9x32_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 1866766242 2687010637 1710685046 1600340471 +conv2d fprop_3x7x9x32_4x6_8x6x6_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 1866766242 2687010637 1710685046 994371582 +conv2d fprop_3x7x9x32_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 1866766242 1180898714 3026961444 2433465010 +conv2d fprop_3x7x9x32_3x5_8x7x7_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 1866766242 1180898714 3026961444 285484823 +conv2d fprop_1x11x7x32_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 4135010890 3493243764 3324685530 4263674457 +conv2d fprop_1x11x7x32_6x4_8x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 4135010890 3493243764 3324685530 4263674457 +conv2d fprop_1x11x7x32_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 4135010890 1945710027 3324685530 498513214 +conv2d fprop_1x11x7x32_6x4_8x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 4135010890 1945710027 3324685530 4201919667 +conv2d fprop_1x13x11x32_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 201700668 3493243764 1907827554 922168319 +conv2d fprop_1x13x11x32_8x7_8x1x1_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 201700668 3493243764 1907827554 922168319 +conv2d fprop_1x17x19x32_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 1446981531 3656150002 2720280253 731620572 +conv2d fprop_1x17x19x32_9x10_16x2x2_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 1446981531 3656150002 2720280253 2646086767 +conv2d fprop_1x23x5x32_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 2846234135 4032399193 1710685046 3988493390 +conv2d fprop_1x23x5x32_12x3_16x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 2846234135 4032399193 1710685046 876293908 +conv2d fprop_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 3622290451 1505540897 1588474612 3183183520 +conv2d fprop_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 3622290451 1505540897 1588474612 3183183520 +conv2d fprop_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 3586800483 2619789293 1907179330 1630548311 +conv2d fprop_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 3586800483 2619789293 1907179330 1880077064 +conv2d fprop_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 78021249 449214431 320006437 1729280118 +conv2d fprop_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 78021249 449214431 320006437 2905577482 +conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 1731762070 2256228363 1898545094 1486216406 +conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 1731762070 2256228363 1898545094 2323099205 +conv2d fprop_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 3184694370 1580258916 4242252933 2049528680 +conv2d fprop_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 3184694370 1580258916 4242252933 823670492 +conv2d fprop_1x15x19x64_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 311384756 2487637155 3184621425 1268005490 +conv2d fprop_1x15x19x64_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 311384756 2487637155 3184621425 4214263209 +conv2d fprop_1x16x24x96_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 407775809 1638222167 3148695436 2894438838 +conv2d fprop_1x16x24x96_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 407775809 1638222167 3148695436 1246130088 +conv2d fprop_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 4006852980 809343331 1328605014 2088140151 +conv2d fprop_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 4006852980 809343331 1328605014 4055269725 +conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 2370365802 4043424400 3231726095 404674518 +conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 2370365802 4043424400 3231726095 404674518 +conv2d fprop_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 2744507507 3011298323 2856763908 1755771098 +conv2d fprop_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 2744507507 3011298323 2856763908 2984060885 +conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 1348352570 3581007866 2240369427 3419649411 +conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 1348352570 3581007866 2240369427 2709709134 +conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 1293446836 3581007866 936455799 4083729015 +conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 1293446836 3581007866 936455799 521087104 +conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 3716781942 2056918974 3672240685 3952711909 +conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 3716781942 2056918974 3672240685 831276460 +conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 3230209466 3581007866 2301484646 1300821064 +conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 3230209466 3581007866 2301484646 2700027247 +conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 3230209466 3581007866 2301484646 4025827387 +conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 3230209466 3581007866 2301484646 2458486788 +conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 3752219115 2612889427 3087350413 4020856657 +conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 3752219115 2612889427 3087350413 3244570506 +conv2d fprop_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 712760952 3656150002 1605228522 3887013673 +conv2d fprop_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 712760952 3656150002 1605228522 1802699064 +conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 1998572675 3327908208 4290039622 1215151709 +conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 1998572675 3327908208 4290039622 3031516069 +conv2d fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 1104335117 3889837239 961406695 372399325 +conv2d fprop_4x2x3x256_1x1_328x3x5_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 1104335117 3889837239 961406695 2003082608 +conv2d fprop_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 260007636 1848202930 520645775 3465036547 +conv2d fprop_1x124x224x96_229x129_24x7x7_pad_h3w3_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nhwc_s4nhwc_inhwc_i_f 260007636 1848202930 520645775 4223629118 +conv2d fprop_1x17x11x288_17x11_160x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha2_beta2 s4nhwc_s4nhwc_inhwc_i_f 2451612476 2037530725 2255344759 3879663798 diff --git a/test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_tensorop_s32_interleaved.txt b/test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_tensorop_s32_interleaved.txt new file mode 100644 index 00000000..c662bdef --- /dev/null +++ b/test/unit/data/hashes/cached_results_cutlass_test_unit_conv_device_tensorop_s32_interleaved.txt @@ -0,0 +1,128 @@ +conv2d fprop_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2257020387 2306573932 56677867 1786106543 +conv2d fprop_1x15x19x160_15x19_224x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2257020387 2306573932 56677867 1786106543 +conv2d fprop_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 1156972542 2907159083 2175481248 3712392696 +conv2d fprop_1x19x37x160_10x19_224x3x3_pad_h1w1_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 1156972542 2907159083 2175481248 4128818508 +conv2d fprop_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 3622914431 360329309 1000827686 3397816796 +conv2d fprop_1x16x16x160_17x16_224x2x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 3622914431 360329309 1000827686 4023952937 +conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 333387085 3931688998 1579688492 2803910439 +conv2d fprop_1x23x21x128_23x21_224x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 333387085 3931688998 1579688492 3489895425 +conv2d fprop_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2517840716 1167172924 941948898 854314410 +conv2d fprop_1x29x37x160_29x37_224x5x5_pad_h2w2_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2517840716 1167172924 941948898 1777561699 +conv2d fprop_1x15x19x64_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 993960861 2689618577 345208683 1561585450 +conv2d fprop_1x15x19x64_15x19_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 993960861 2689618577 345208683 2857545178 +conv2d fprop_1x16x24x96_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2390973039 3400735473 1409384107 1984034023 +conv2d fprop_1x16x24x96_16x24_96x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2390973039 3400735473 1409384107 2037481465 +conv2d fprop_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 1491809625 4124099354 4139509970 978245698 +conv2d fprop_1x13x16x288_7x8_160x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 1491809625 4124099354 4139509970 3910925680 +conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 272928955 1481031949 3330491456 111364403 +conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 272928955 1481031949 3330491456 111364403 +conv2d fprop_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 401261840 318138824 1323519634 3290313862 +conv2d fprop_1x71x80x32_36x40_64x5x5_pad_h2w2_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 401261840 318138824 1323519634 3540214472 +conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 465953721 1662236565 1710006728 3977590264 +conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 465953721 1662236565 1710006728 3361009847 +conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2643174311 1662236565 1815140928 3586291526 +conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2643174311 1662236565 1815140928 726840541 +conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 4293764010 4142851469 1225898575 3671738402 +conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 4293764010 4142851469 1225898575 1183585862 +conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 1437928962 1662236565 3221182201 980724445 +conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 1437928962 1662236565 3221182201 1864754285 +conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 1437928962 1662236565 3221182201 520312359 +conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 1437928962 1662236565 3221182201 2915637276 +conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2058879935 3902389594 1882642169 1604948913 +conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2058879935 3902389594 1882642169 4198560030 +conv2d fprop_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2944909811 3226488785 3103396187 1821576460 +conv2d fprop_32x24x32x32_24x31_32x1x2_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2944909811 3226488785 3103396187 1178412803 +conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 844345223 3833205704 2120200862 3212986739 +conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 844345223 3833205704 2120200862 426277367 +conv2d fprop_1x56x56x64_56x56_256x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2680465245 2224844652 1189066576 2095234196 +conv2d fprop_1x56x56x64_56x56_256x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2680465245 2224844652 1189066576 2095234196 +conv2d fprop_1x56x56x64_56x56_64x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2680465245 3330175594 2327522013 4239788654 +conv2d fprop_1x56x56x64_56x56_64x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2680465245 3330175594 2327522013 4239788654 +conv2d fprop_1x56x56x64_56x56_64x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2680465245 2942065884 2327522013 2474295423 +conv2d fprop_1x56x56x64_56x56_64x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2680465245 2942065884 2327522013 2015087380 +conv2d fprop_1x56x56x256_56x56_64x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2494788361 673559002 2327522013 2262256181 +conv2d fprop_1x56x56x256_56x56_64x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2494788361 673559002 2327522013 2262256181 +conv2d fprop_1x56x56x256_28x28_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2494788361 1481031949 2522097906 1586033176 +conv2d fprop_1x56x56x256_28x28_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2494788361 1481031949 2522097906 1586033176 +conv2d fprop_1x56x56x256_28x28_128x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2494788361 2508231365 3519987931 2139994607 +conv2d fprop_1x56x56x256_28x28_128x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2494788361 2508231365 3519987931 2139994607 +conv2d fprop_1x28x28x128_28x28_128x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 3118588240 2407183407 3519987931 2315728838 +conv2d fprop_1x28x28x128_28x28_128x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 3118588240 2407183407 3519987931 3439977625 +conv2d fprop_1x28x28x128_28x28_512x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 3118588240 3633353507 2522097906 1293871809 +conv2d fprop_1x28x28x128_28x28_512x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 3118588240 3633353507 2522097906 1293871809 +conv2d fprop_1x28x28x512_28x28_128x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 3830854792 594370640 3519987931 1641417207 +conv2d fprop_1x28x28x512_28x28_128x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 3830854792 594370640 3519987931 1641417207 +conv2d fprop_1x28x28x512_14x14_1024x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 3830854792 415008779 2163698124 1309114882 +conv2d fprop_1x28x28x512_14x14_1024x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 3830854792 415008779 2163698124 1309114882 +conv2d fprop_1x28x28x512_14x14_256x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 3830854792 3137005789 1648179354 57450329 +conv2d fprop_1x28x28x512_14x14_256x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 3830854792 3137005789 1648179354 57450329 +conv2d fprop_1x14x14x256_14x14_256x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 901927536 3144319670 1648179354 2433410615 +conv2d fprop_1x14x14x256_14x14_256x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 901927536 3144319670 1648179354 1242210609 +conv2d fprop_1x14x14x256_14x14_1024x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 901927536 1994427383 2163698124 4149927306 +conv2d fprop_1x14x14x256_14x14_1024x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 901927536 1994427383 2163698124 4149927306 +conv2d fprop_1x14x14x1024_14x14_256x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2446540417 1805451200 1648179354 2061091838 +conv2d fprop_1x14x14x1024_14x14_256x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2446540417 1805451200 1648179354 2061091838 +conv2d fprop_1x14x14x1024_7x7_2048x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2446540417 1160209190 4271553919 4158225587 +conv2d fprop_1x14x14x1024_7x7_2048x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2446540417 1160209190 4271553919 4158225587 +conv2d fprop_1x14x14x1024_7x7_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2446540417 3270127523 3766582212 2050622635 +conv2d fprop_1x14x14x1024_7x7_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 2446540417 3270127523 3766582212 2050622635 +conv2d fprop_1x7x7x512_7x7_512x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 673124514 2964077129 3766582212 540078333 +conv2d fprop_1x7x7x512_7x7_512x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 673124514 2964077129 3766582212 1459986551 +conv2d fprop_1x7x7x512_7x7_2048x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 673124514 162466699 4271553919 4229033905 +conv2d fprop_1x7x7x512_7x7_2048x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 673124514 162466699 4271553919 4229033905 +conv2d fprop_1x7x7x2048_7x7_512x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 3790854543 1740163851 3766582212 666611661 +conv2d fprop_1x7x7x2048_7x7_512x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s8nc32hw32_s8c32rsk32_s8nc32hw32_i_f 3790854543 1740163851 3766582212 666611661 +conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 3563756534 4145401287 2774147186 516022831 +conv2d fprop_1x55x51x256_28x26_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 3563756534 4145401287 2774147186 516022831 +conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_corr_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 2958274559 2084126045 1988418915 2830875772 +conv2d fprop_1x27x23x256_9x7_512x3x3_pad_h0w0_stride_h3w3_dil_h1w1_conv_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 2958274559 2084126045 1988418915 3488898477 +conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_corr_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 1464005709 2084126045 4276149125 1229313578 +conv2d fprop_1x27x31x256_12x11_512x3x3_pad_h5w7_stride_h3w4_dil_h1w1_conv_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 1464005709 2084126045 4276149125 1822992361 +conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_corr_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 3218517333 2442721868 2671270533 2371226722 +conv2d fprop_1x27x35x256_15x9_512x7x5_pad_h11w7_stride_h3w5_dil_h1w1_conv_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 3218517333 2442721868 2671270533 1280057593 +conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_corr_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 1354370892 2084126045 4193566385 2927747028 +conv2d fprop_1x27x27x256_27x14_512x3x3_pad_h1w1_stride_h1w2_dil_h1w1_conv_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 1354370892 2084126045 4193566385 1389616458 +conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_corr_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 1354370892 2084126045 4193566385 1186086047 +conv2d fprop_1x27x27x256_14x27_512x3x3_pad_h1w1_stride_h2w1_dil_h1w1_conv_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 1354370892 2084126045 4193566385 929529143 +conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 2304806251 3157706556 3303866450 1115195627 +conv2d fprop_3x28x28x256_14x14_256x2x2_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 2304806251 3157706556 3303866450 1898128533 +conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 2637544259 2993513863 3602417067 2435518052 +conv2d fprop_4x4x5x128_3x3_256x3x6_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 2637544259 2993513863 3602417067 173672416 +conv2d fprop_1x56x56x64_56x56_256x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 3301525734 2221050180 2689662309 2294233865 +conv2d fprop_1x56x56x64_56x56_256x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 3301525734 2221050180 2689662309 2294233865 +conv2d fprop_1x56x56x64_56x56_64x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 3301525734 98103785 2280943499 2499302634 +conv2d fprop_1x56x56x64_56x56_64x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 3301525734 98103785 2280943499 2499302634 +conv2d fprop_1x56x56x64_56x56_64x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 3301525734 1677636452 2280943499 2796886958 +conv2d fprop_1x56x56x64_56x56_64x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 3301525734 1677636452 2280943499 1708111283 +conv2d fprop_1x56x56x256_56x56_64x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 1130520359 59038063 2280943499 2620671899 +conv2d fprop_1x56x56x256_56x56_64x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 1130520359 59038063 2280943499 2620671899 +conv2d fprop_1x56x56x256_28x28_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 1130520359 4145401287 1219103855 3961007091 +conv2d fprop_1x56x56x256_28x28_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 1130520359 4145401287 1219103855 3961007091 +conv2d fprop_1x56x56x256_28x28_128x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 1130520359 504379703 1488928479 1427134856 +conv2d fprop_1x56x56x256_28x28_128x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 1130520359 504379703 1488928479 1427134856 +conv2d fprop_1x28x28x128_28x28_128x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 899899803 1505875757 1488928479 396727280 +conv2d fprop_1x28x28x128_28x28_128x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 899899803 1505875757 1488928479 614758331 +conv2d fprop_1x28x28x128_28x28_512x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 899899803 1730326830 1219103855 2571923929 +conv2d fprop_1x28x28x128_28x28_512x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 899899803 1730326830 1219103855 2571923929 +conv2d fprop_1x28x28x512_28x28_128x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 816701080 1375821331 1488928479 4192533639 +conv2d fprop_1x28x28x512_28x28_128x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 816701080 1375821331 1488928479 4192533639 +conv2d fprop_1x28x28x512_14x14_1024x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 816701080 1276141125 806482633 3315974235 +conv2d fprop_1x28x28x512_14x14_1024x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 816701080 1276141125 806482633 3315974235 +conv2d fprop_1x28x28x512_14x14_256x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 816701080 1201032911 1764365805 2880519032 +conv2d fprop_1x28x28x512_14x14_256x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 816701080 1201032911 1764365805 2880519032 +conv2d fprop_1x14x14x256_14x14_256x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 2248088895 3515084013 1764365805 2795451116 +conv2d fprop_1x14x14x256_14x14_256x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 2248088895 3515084013 1764365805 3995792090 +conv2d fprop_1x14x14x256_14x14_1024x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 2248088895 2239379728 806482633 3653954546 +conv2d fprop_1x14x14x256_14x14_1024x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 2248088895 2239379728 806482633 3653954546 +conv2d fprop_1x14x14x1024_14x14_256x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 4171891827 3739025576 1764365805 3540733588 +conv2d fprop_1x14x14x1024_14x14_256x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 4171891827 3739025576 1764365805 3540733588 +conv2d fprop_1x14x14x1024_7x7_2048x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 4171891827 4128795926 2366524779 2258473990 +conv2d fprop_1x14x14x1024_7x7_2048x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 4171891827 4128795926 2366524779 2258473990 +conv2d fprop_1x14x14x1024_7x7_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_corr_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 4171891827 3574788779 1867333940 3661997755 +conv2d fprop_1x14x14x1024_7x7_512x1x1_pad_h0w0_stride_h2w2_dil_h1w1_conv_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 4171891827 3574788779 1867333940 3661997755 +conv2d fprop_1x7x7x512_7x7_512x3x3_pad_h1w1_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 1843516393 560338949 1867333940 512869068 +conv2d fprop_1x7x7x512_7x7_512x3x3_pad_h1w1_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 1843516393 560338949 1867333940 147207245 +conv2d fprop_1x7x7x512_7x7_2048x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 1843516393 1545827399 2366524779 2494918310 +conv2d fprop_1x7x7x512_7x7_2048x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 1843516393 1545827399 2366524779 2494918310 +conv2d fprop_1x7x7x2048_7x7_512x1x1_pad_h0w0_stride_h1w1_dil_h1w1_corr_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 3623215010 2231173245 1867333940 1230240181 +conv2d fprop_1x7x7x2048_7x7_512x1x1_pad_h0w0_stride_h1w1_dil_h1w1_conv_alpha1_beta0 s4nc64hw64_s4c64rsk64_s4nc64hw64_i_f 3623215010 2231173245 1867333940 1230240181 diff --git a/test/unit/epilogue/thread/CMakeLists.txt b/test/unit/epilogue/thread/CMakeLists.txt index 91a03774..3044bbdd 100644 --- a/test/unit/epilogue/thread/CMakeLists.txt +++ b/test/unit/epilogue/thread/CMakeLists.txt @@ -22,6 +22,7 @@ cutlass_test_unit_add_executable( cutlass_test_unit_epilogue_thread + activation.cu linear_combination.cu linear_combination_planar_complex.cu ) diff --git a/test/unit/epilogue/thread/activation.cu b/test/unit/epilogue/thread/activation.cu new file mode 100644 index 00000000..cdc1b8f6 --- /dev/null +++ b/test/unit/epilogue/thread/activation.cu @@ -0,0 +1,314 @@ +/*************************************************************************************************** + * Copyright (c) 2017-2021, NVIDIA CORPORATION. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without modification, are permitted + * provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright notice, this list of + * conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright notice, this list of + * conditions and the following disclaimer in the documentation and/or other materials + * provided with the distribution. + * * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used + * to endorse or promote products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND + * FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, + * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************************************/ +/*! \file + \brief Unit tests for thread-level GEMM +*/ + +#include "../../common/cutlass_unit_test.h" + +#include "cutlass/epilogue/thread/activation.h" + +#include "cutlass/util/host_tensor.h" + +///////////////////////////////////////////////////////////////////////////////////////////////// + +template +__global__ void test_Epilogue_thread_activation(T *out, T *in) { + + cutlass::Array *vec_out = reinterpret_cast *>(out); + cutlass::Array *vec_in = reinterpret_cast *>(in); + + Func func; + vec_out[threadIdx.x] = func(vec_in[threadIdx.x]); +} + +///////////////////////////////////////////////////////////////////////////////////////////////// + +// +// Reference +// + +static double GELU_golden_input[] = { + 1.587425827980, 1.157652974129, 0.750432848930, -0.965980410576, + -0.388184845448, 0.014422321692, 0.353164494038, 1.354383468628, + 0.167588576674, 0.272798538208, -0.377032428980, 1.923444747925, + 0.308164477348, -0.341318070889, 0.278338819742, -0.292668998241, + -1.051743745804, -0.814175724983, 0.112737402320, 1.262938618660, + -1.582363605499, 0.722016870975, 1.053453564644, -0.659764587879, + 0.734917521477, 0.091274201870, 0.604461073875, -0.219043627381, + -0.136795744300, 0.960650205612, -1.805408835411, 0.091029644012, + -1.023343324661, 0.147713735700, -0.499895423651, 1.351878166199, + -1.631091356277, -0.336171895266, -1.612408638000, 0.090832948685, + -0.658132910728, -0.326727777719, -1.986387014389, 0.787685871124, + -1.015677452087, -0.225094825029, 0.876752018929, 0.744826257229, + 0.870290279388, -0.757595360279, 1.510331749916, 0.750012576580, + 0.906444966793, -0.915759027004, 1.260277032852, -0.158465340734, + -0.109191477299, -0.817102134228, 0.391305118799, -0.524910449982, + 0.351349592209, 0.801979541779, 0.446691334248, -0.741077482700, + 1.205966711044, -0.910210072994, 0.945986449718, 0.784096539021, + 1.670521497726, 0.344931513071, -0.301411420107, 0.309870749712, + -0.879704594612, -1.951189517975, -0.805817663670, -0.661812782288, + -0.505914270878, -1.836273789406, -0.381845980883, -0.554707705975, + -0.375447630882, -0.516645610332, 0.509586095810, 1.087131023407, + 2.664817094803, -1.558295488358, -0.076461032033, -0.504621028900, + 1.327111959457, -1.819981694221, 1.350415468216, -2.074112653732, + 1.501431345940, -1.339013576508, 0.162817999721, -1.473457217216, + 0.357770472765, 0.188413277268, 1.601302266121, -0.653882205486, + 0.856162548065, 0.763102591038, -0.526283502579, 0.581961452961, + 0.089969776571, 1.968745589256, 0.545802056789, -1.168786048889, + 1.206663012505, -0.109096683562, -1.223938226700, 0.744599223137, + -1.779406785965, 0.766436159611, -0.579044401646, -1.002057313919, + -0.715845823288, -0.562508940697, 0.886768460274, 2.327786445618, + -0.148763969541, -0.918884515762, -0.367678701878, -1.105021238327, + -0.461237311363, 0.158228352666, -0.254040330648, 1.427477598190, + 0.277530491352, 0.046293262392, -0.535557329655, -1.486695051193, + -0.953706681728, -1.040495038033, -0.314667612314, 0.348172843456, + 0.522773325443, 0.025960063562, -0.482472360134, 1.993084549904, + -0.253064930439, -0.012146313675, -2.166327714920, 0.398040622473, + -0.022238900885, -0.443580865860, -0.898376941681, -0.571689844131, + 1.666979670525, -0.831176340580, -0.671057403088, 0.481970995665, + -1.096243023872, -1.493894338608, 0.596651911736, -0.229505166411, + 1.165976166725, 0.905094027519, 0.049716457725, -1.362933635712, + -0.366948783398, 1.461613893509, -0.718411505222, 0.895385026932, + -0.763122260571, 1.329716682434, 1.366570711136, -0.086544901133, + 0.059739742428, 0.940766513348, -0.272854357958, -1.738811373711, + -0.361239165068, 0.696977972984, 1.288442254066, 1.264815807343, + -0.573566436768, -1.141678214073, 0.081865988672, -0.886228799820, + -0.236933603883, 1.050115466118, -0.538952171803, 0.651773929596, + -0.220034509897, -1.198960781097, 1.247478365898, -0.053529661149, + 0.639809548855, 1.672434806824, 0.511088073254, -1.179364681244, + -0.730427742004, 0.157630980015, 0.389369845390, -0.925578773022, + -0.093250080943, -0.391062080860, 0.852983593941, 1.868778109550, + -1.198786258698, 0.604997038841, -1.482687234879, -2.469333171844, + 0.718807697296, -0.559609353542, 2.187228441238, -2.927527904510, + 0.148535788059, -0.097280368209, 0.674131810665, -1.137645959854, + 0.792729616165, -1.166317462921, -0.498791724443, 1.675866723061, + -0.137909621000, -0.653263568878, -2.281216144562, 0.296096831560, + 2.002410173416, 1.083609819412, 0.933580815792, -1.504760265350, + 2.185185909271, 0.286121010780, -1.035485863686, -0.216372340918, + -0.274334043264, -0.849510788918, -1.397169828415, -0.407644748688, + 0.159476816654, -0.170650705695, 0.335193097591, -0.156852483749, + 0.036168430001, 0.858105242252, -1.086121797562, 0.404813349247, + -0.481496721506, -0.389882832766, 0.020690204576, -0.772020936012, + -0.758921504021, 0.323482036591, 0.115715265274, -0.811228036880, + -0.882436633110, 0.176811277866, 1.678015947342, 0.379081040621, + -0.842976212502, 0.346952259541, -0.545828759670, 1.632800459862 +}; + +static double GELU_golden_output[] = { + 1.498199582100, 1.014679551125, 0.580462038517, -0.161344811320, + -0.135453075171, 0.007294139825, 0.225325092673, 1.235459089279, + 0.094946734607, 0.165724009275, -0.133120641112, 1.871103763580, + 0.191376730800, -0.125069886446, 0.169681981206, -0.112644664943, + -0.154036879539, -0.169163048267, 0.061428427696, 1.132469892502, + -0.089851818979, 0.552240371704, 0.899579226971, -0.168043658137, + 0.565008401871, 0.048956073821, 0.439583092928, -0.090532489121, + -0.060955654830, 0.798911273479, -0.064101703465, 0.048816055059, + -0.156645998359, 0.082529976964, -0.154254898429, 1.232632875443, + -0.083896033466, -0.123835846782, -0.086161509156, 0.048703473061, + -0.167972877622, -0.121522113681, -0.046670529991, 0.617986679077, + -0.157319813967, -0.092503339052, 0.709896743298, 0.574865520000, + 0.703132867813, -0.169963955879, 1.411436080933, 0.580042064190, + 0.741154611111, -0.164741978049, 1.129479527473, -0.069256491959, + -0.049848672003, -0.169087052345, 0.255214750767, -0.157380074263, + 0.223928079009, 0.632535398006, 0.300378054380, -0.169946283102, + 1.068588852882, -0.165071934462, 0.783203184605, 0.614346146584, + 1.591325283051, 0.219006344676, -0.115003645420, 0.192637458444, + -0.166712537408, -0.049788996577, -0.169361919165, -0.168130636215, + -0.155041679740, -0.060888241976, -0.134137839079, -0.160614117980, + -0.132782235742, -0.156389534473, 0.354075312614, 0.936574816704, + 2.654553413391, -0.092845752835, -0.035900454968, -0.154874503613, + 1.204704761505, -0.062572605908, 1.230982899666, -0.039479542524, + 1.401402950287, -0.120890334249, 0.091938301921, -0.103604510427, + 0.228880971670, 0.108285568655, 1.513783097267, -0.167782157660, + 0.688394129276, 0.593158841133, -0.157540664077, 0.418839782476, + 0.048209801316, 1.920528769493, 0.386099845171, -0.141709372401, + 1.069367766380, -0.049809500575, -0.135230198503, 0.574639260769, + -0.066881760955, 0.596510827541, -0.162873372436, -0.158483341336, + -0.169686436653, -0.161375194788, 0.720409095287, 2.304597616196, + -0.065585561097, -0.164551988244, -0.131098195910, -0.148708447814, + -0.148663327098, 0.089060656726, -0.101548098028, 1.317959904671, + 0.169103100896, 0.024001283571, -0.158595800400, -0.101909510791, + -0.162240833044, -0.155090972781, -0.118474565446, 0.221488356590, + 0.365645468235, 0.013248858973, -0.151851043105, 1.946992278099, + -0.101253561676, -0.006014300976, -0.032804865390, 0.260597169399, + -0.010922161862, -0.145792976022, -0.165743649006, -0.162226170301, + 1.587365984917, -0.168676435947, -0.168497130275, 0.330191940069, + -0.149622067809, -0.100989677012, 0.432351946831, -0.093922272325, + 1.023946166039, 0.739726305008, 0.025843897834, -0.117827951908, + -0.130937814713, 1.356489539146, -0.169726014137, 0.729478538036, + -0.169943705201, 1.207641005516, 1.249209761620, -0.040288090706, + 0.031292784959, 0.777626037598, -0.107090584934, -0.071350336075, + -0.129670530558, 0.527676224709, 1.161149263382, 1.134579420090, + -0.162394225597, -0.144757837057, 0.043603736907, -0.166386902332, + -0.096278958023, 0.895924389362, -0.158969298005, 0.484089732170, + -0.090857118368, -0.138206124306, 1.115107178688, -0.025622237474, + 0.472724437714, 1.593463659286, 0.355387806892, -0.140493586659, + -0.169871479273, 0.088687323034, 0.253673940897, -0.164135158062, + -0.043161027133, -0.136040985584, 0.685087263584, 1.811169505119, + -0.138226687908, 0.440080583096, -0.102422207594, -0.016713079065, + 0.549075841904, -0.161096408963, 2.155813455582, -0.005001218989, + 0.083037458360, -0.044870752841, 0.505522191525, -0.145202502608, + 0.623111069202, -0.141991063952, -0.154108211398, 1.597298502922, + -0.061391282827, -0.167753636837, -0.025704355910, 0.182520583272, + 1.957115054131, 0.932696640491, 0.769961357117, -0.099604383111, + 2.153636932373, 0.175279796124, -0.155551761389, -0.089653611183, + -0.107515335083, -0.168032020330, -0.113423995674, -0.139319628477, + 0.089841812849, -0.073763631284, 0.211594089866, -0.068651281297, + 0.018605981022, 0.690416753292, -0.150658726692, 0.266040354967, + -0.151710823178, -0.135800719261, 0.010515870526, -0.169883996248, + -0.169960290194, 0.202769815922, 0.063187584281, -0.169236257672, + -0.166577890515, 0.100812792778, 1.599699616432, 0.245525524020, + -0.168275654316, 0.220552831888, -0.159705042839, 1.549110531807 +}; + +///////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Epilogue_thread_gelu_taylor, device_f32) { + + int const kN = 256; + int const kV = 4; + + using Element = float; + using Func = cutlass::epilogue::thread::GELU_taylor>; + + double tolerance = 0.005; + + // + // Construct workspace + // + cutlass::HostTensor tensor_Destination({1, kN}); + cutlass::HostTensor tensor_Source({1, kN}); + + for (int i = 0; i < kN; ++i) { + tensor_Source.host_data(i) = Element(GELU_golden_input[i]); + } + + tensor_Destination.sync_device(); + tensor_Source.sync_device(); + + // + // Launch the kernel + // + dim3 grid(1,1,1); + dim3 block(kN / kV, 1, 1); + + test_Epilogue_thread_activation<<< grid, block >>>( + tensor_Destination.device_data(), + tensor_Source.device_data()); + + tensor_Destination.sync_host(); + + // + // Verify + // + + for (int i = 0; i < kN; ++i) { + Element input = Element(GELU_golden_input[i]); + Element got = tensor_Destination.host_data(i); + Element expected = Element(GELU_golden_output[i]); + + double rel_error = (double(got) - double(expected)) / double(expected); + + double tolerance_override = tolerance; + + switch (i) { + case 142: tolerance_override = 0.008; break; + case 203: tolerance_override = 0.03; break; + case 207: tolerance_override = 0.09; break; + case 218: tolerance_override = 0.013; break; + } + + EXPECT_LT(std::abs(rel_error), tolerance_override) + << "Input[" << i << "]: " << input << ", Got: " << got << ", expected: " << expected; + } +} + +///////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Epilogue_thread_gelu_taylor, device_f16) { + + int const kN = 256; + int const kV = 8; + + using Element = cutlass::half_t; + using Func = cutlass::epilogue::thread::GELU_taylor>; + + double tolerance = 0.005; + + // + // Construct workspace + // + cutlass::HostTensor tensor_Destination({1, kN}); + cutlass::HostTensor tensor_Source({1, kN}); + + for (int i = 0; i < kN; ++i) { + tensor_Source.host_data(i) = Element(GELU_golden_input[i]); + } + + tensor_Destination.sync_device(); + tensor_Source.sync_device(); + + // + // Launch the kernel + // + dim3 grid(1,1,1); + dim3 block(kN / kV, 1, 1); + + test_Epilogue_thread_activation<<< grid, block >>>( + tensor_Destination.device_data(), + tensor_Source.device_data()); + + tensor_Destination.sync_host(); + + // + // Verify + // + + for (int i = 0; i < kN; ++i) { + Element input = Element(GELU_golden_input[i]); + Element got = tensor_Destination.host_data(i); + Element expected = Element(GELU_golden_output[i]); + + double rel_error = (double(got) - double(expected)) / double(expected); + + double tolerance_override = tolerance; + + switch (i) { + case 36: tolerance_override = 0.006; break; + case 77: tolerance_override = 0.009; break; + case 95: tolerance_override = 0.008; break; + case 112: tolerance_override = 0.007; break; + case 171: tolerance_override = 0.006; break; + case 203: tolerance_override = 0.03; break; + case 207: tolerance_override = 0.15; break; + } + + EXPECT_LT(std::abs(rel_error), tolerance_override) + << "Input[" << i << "]: " << input << ", Got: " << got << ", expected: " << expected; + } +} + +///////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/test/unit/epilogue/thread/linear_combination.cu b/test/unit/epilogue/thread/linear_combination.cu index 3eb31316..ae154363 100644 --- a/test/unit/epilogue/thread/linear_combination.cu +++ b/test/unit/epilogue/thread/linear_combination.cu @@ -159,3 +159,41 @@ TEST(Epilogue_thread_linear_combination_gelu, device_side_f16_f16_ptr) { } ///////////////////////////////////////////////////////////////////////////////////////////////// + +TEST(Epilogue_thread_linear_combination_gelu_taylor, device_side_f16_f16_ptr) { + + using Element = cutlass::half_t; + using ElementOutput = cutlass::half_t; + int const kCount = 8; + + using LinearCombinationGELU = cutlass::epilogue::thread::LinearCombinationGELU< + ElementOutput, + kCount, + Element, + Element>; + + Element alpha = Element(1); + Element beta = Element(0); + + typename LinearCombinationGELU::Params params(&alpha, &beta); + + LinearCombinationGELU linear_combination_op(params); + + cutlass::Array accum; + + for (int i = 0; i < kCount; ++i) { + accum[i] = Element((float)i * 0.3f); + } + + cutlass::Array destination = linear_combination_op(accum, accum); + cutlass::epilogue::thread::GELU gelu_func; + + for (int i = 0; i < kCount; ++i) { + ElementOutput expected = gelu_func(accum[i]); + ElementOutput got = destination[i]; + EXPECT_TRUE(expected == got); + } +} + +///////////////////////////////////////////////////////////////////////////////////////////////// + diff --git a/tools/library/include/cutlass/library/library.h b/tools/library/include/cutlass/library/library.h index b3c593d3..3c7da39f 100644 --- a/tools/library/include/cutlass/library/library.h +++ b/tools/library/include/cutlass/library/library.h @@ -51,7 +51,6 @@ #include "cutlass/matrix_coord.h" #include "cutlass/tensor_coord.h" #include "cutlass/layout/tensor.h" - #include "cutlass/gemm/gemm.h" #include "cutlass/conv/convolution.h" #include "cutlass/conv/conv2d_problem_size.h" diff --git a/tools/library/scripts/generator.py b/tools/library/scripts/generator.py index e5bc3464..daf793fe 100644 --- a/tools/library/scripts/generator.py +++ b/tools/library/scripts/generator.py @@ -162,10 +162,11 @@ def CreateConv2dOperator(manifest, layout, tile_descriptions, data_type, alignme # iterator algorithm (analytic and optimized) iterator_algorithms = [IteratorAlgorithm.Analytic, IteratorAlgorithm.Optimized] - # by default, only generate the largest tile size + # by default, only generate the largest tile size, largest alignment, and optimized iterator if manifest.args.kernels == '': tile_descriptions = [tile_descriptions[0],] alignment_constraints = [alignment_constraints[0],] + iterator_algorithms = [IteratorAlgorithm.Optimized] operations = [] @@ -212,12 +213,21 @@ def CreateConv2dOperator(manifest, layout, tile_descriptions, data_type, alignme # better for problem sizes with large activation channel count swizzling_functor_strided_dgrad_ = SwizzlingFunctor.StridedDgradIdentity1 - new_operation = Conv2dOperation(ConvKind.Dgrad, IteratorAlgorithm.Analytic, tile.minimum_compute_capability, tile,\ - A, B, C, element_epilogue, StrideSupport.Strided, epilogue_functor, swizzling_functor_strided_dgrad_) + if IteratorAlgorithm.Analytic in iterator_algorithms: + new_operation = Conv2dOperation(ConvKind.Dgrad, IteratorAlgorithm.Analytic, tile.minimum_compute_capability, tile,\ + A, B, C, element_epilogue, StrideSupport.Strided, epilogue_functor, swizzling_functor_strided_dgrad_) - manifest.append(new_operation) - operations.append(new_operation) + manifest.append(new_operation) + operations.append(new_operation) + # Strided support for Optimized Dgrad + if IteratorAlgorithm.Optimized in iterator_algorithms: + new_operation = Conv2dOperation(ConvKind.Dgrad, IteratorAlgorithm.Optimized, tile.minimum_compute_capability, tile,\ + A, B, C, element_epilogue, StrideSupport.Strided, epilogue_functor, swizzling_functor_strided_dgrad_) + + manifest.append(new_operation) + operations.append(new_operation) + # # Conv2d Wgrad # @@ -246,34 +256,70 @@ def CreateConv3dOperator(manifest, layout, tile_descriptions, data_type, alignme # iterator algorithm (analytic and optimized) iterator_algorithms = [IteratorAlgorithm.Analytic, IteratorAlgorithm.Optimized] - # by default, only generate the largest tile size + # by default, only generate the largest tile size and optimized iterators if manifest.args.kernels == '': tile_descriptions = [tile_descriptions[0],] + iterator_algorithms = [IteratorAlgorithm.Optimized] operations = [] + # All tile sizes for Conv3dFprop and Conv3dWgrad for tile in tile_descriptions: - for conv_kind in conv_kinds: + A = TensorDescription(element_a, layout, alignment) + B = TensorDescription(element_b, layout, alignment) + C = TensorDescription(element_c, layout, alignment_c) + + # + # Conv3d Fprop + # + if ConvKind.Fprop in conv_kinds: + # Strided support for Analytic and Optimized Fprop for iterator_algorithm in iterator_algorithms: - A = TensorDescription(element_a, layout, alignment) - B = TensorDescription(element_b, layout, alignment) - C = TensorDescription(element_c, layout, alignment_c) - - # optimized conv3d iterator algorithm is only for Wgrad - if (iterator_algorithm == IteratorAlgorithm.Optimized) \ - and ((conv_kind == ConvKind.Fprop) or (conv_kind == ConvKind.Dgrad)): - continue - - # strided support for Fprop (Analytic/Optimized), Dgrad (Analytic), and Wgrad (Analytic) - new_operation = Conv3dOperation(conv_kind, iterator_algorithm, tile.minimum_compute_capability, tile,\ - A, B, C, element_epilogue, StrideSupport.Strided, epilogue_functor) - + new_operation = Conv3dOperation(ConvKind.Fprop, iterator_algorithm, tile.minimum_compute_capability, tile,\ + A, B, C, element_epilogue, StrideSupport.Strided) + manifest.append(new_operation) + operations.append(new_operation) + # + # Conv3d Wgrad + # + if ConvKind.Wgrad in conv_kinds: + + # Strided support for Analytic and Optimized Wgrad + for iterator_algorithm in iterator_algorithms: + new_operation = Conv3dOperation(ConvKind.Wgrad, iterator_algorithm, tile.minimum_compute_capability, tile,\ + A, B, C, element_epilogue, StrideSupport.Strided, epilogue_functor) manifest.append(new_operation) operations.append(new_operation) + # All tile sizes for Conv3dDgrad + for tile in tile_descriptions: + + A = TensorDescription(element_a, layout, alignment) + B = TensorDescription(element_b, layout, alignment) + C = TensorDescription(element_c, layout, alignment_c) + + # + # Conv3d Dgrad + # + if ConvKind.Dgrad in conv_kinds: + # Unity stride for Optimized Dgrad + new_operation = Conv3dOperation(ConvKind.Dgrad, IteratorAlgorithm.Optimized, tile.minimum_compute_capability, tile,\ + A, B, C, element_epilogue, StrideSupport.Unity, epilogue_functor) + + manifest.append(new_operation) + operations.append(new_operation) + + # Strided support for Analytic Dgrad + # Conv3dDgrad has a naive strided support which does not cut down redundant MMAs + new_operation = Conv3dOperation(ConvKind.Dgrad, IteratorAlgorithm.Analytic, tile.minimum_compute_capability, tile,\ + A, B, C, element_epilogue, StrideSupport.Strided, epilogue_functor) + + manifest.append(new_operation) + operations.append(new_operation) return operations + ################################################################################################### ################################################################################################### @@ -1158,7 +1204,7 @@ def GenerateSM75_TensorOp_88128(manifest, args): data_type = [DataType.b1, DataType.b1, DataType.s32, DataType.s32] CreateGemmOperator(manifest, layouts, tile_descriptions, \ - data_type, alignment_constraints, None, EpilogueFunctor.LinearCombinationClamp) + data_type, alignment_constraints) # @@ -1934,7 +1980,7 @@ def GenerateSM80_TensorOp_168256(manifest, args): data_type = [DataType.b1, DataType.b1, DataType.s32, DataType.s32] CreateGemmOperator(manifest, layouts, tile_descriptions, \ - data_type, alignment_constraints, None, EpilogueFunctor.LinearCombinationClamp) + data_type, alignment_constraints) # diff --git a/tools/library/src/util.cu b/tools/library/src/util.cu index 6792cf68..a9cef1c5 100644 --- a/tools/library/src/util.cu +++ b/tools/library/src/util.cu @@ -28,7 +28,6 @@ #include "cutlass/cutlass.h" #include "cutlass/numeric_types.h" #include "cutlass/complex.h" - #include "cutlass/layout/matrix.h" #include "cutlass/library/library.h" diff --git a/tools/profiler/src/cublas_helpers.h b/tools/profiler/src/cublas_helpers.h index e6122939..200d9130 100644 --- a/tools/profiler/src/cublas_helpers.h +++ b/tools/profiler/src/cublas_helpers.h @@ -34,7 +34,6 @@ #include "cutlass/cutlass.h" #include "cutlass/library/library.h" #include "cutlass/library/util.h" - #include "options.h" ///////////////////////////////////////////////////////////////////////////////////////////////// diff --git a/tools/util/include/cutlass/util/reference/device/tensor_fill.h b/tools/util/include/cutlass/util/reference/device/tensor_fill.h index a9191048..6612d8e3 100644 --- a/tools/util/include/cutlass/util/reference/device/tensor_fill.h +++ b/tools/util/include/cutlass/util/reference/device/tensor_fill.h @@ -50,7 +50,6 @@ #include "cutlass/array.h" #include "cutlass/complex.h" #include "cutlass/tensor_view.h" - #include "cutlass/util/reference/device/tensor_foreach.h" #include "cutlass/util/distribution.h" diff --git a/tools/util/include/cutlass/util/reference/host/tensor_fill.h b/tools/util/include/cutlass/util/reference/host/tensor_fill.h index 8c4b4177..0bc6c57c 100644 --- a/tools/util/include/cutlass/util/reference/host/tensor_fill.h +++ b/tools/util/include/cutlass/util/reference/host/tensor_fill.h @@ -42,7 +42,6 @@ #include "cutlass/subbyte_reference.h" #include "cutlass/tensor_view.h" #include "cutlass/tensor_view_planar_complex.h" - #include "cutlass/util/distribution.h" #include "tensor_foreach.h"