cutlass/tools/util/host_tensor.h

389 lines
11 KiB
C
Raw Normal View History

2018-05-17 02:44:56 +08:00
/***************************************************************************************************
* Copyright (c) 2017-2019, NVIDIA CORPORATION. All rights reserved.
2018-05-17 02:44:56 +08:00
*
* Redistribution and use in source and binary forms, with or without modification, are permitted
* provided that the following conditions are met:
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright notice, this list of
* conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
* * Neither the name of the NVIDIA CORPORATION nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior written
* permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
* FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NVIDIA CORPORATION BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TOR (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
**************************************************************************************************/
#pragma once
/*! \file
2018-09-19 07:58:03 +08:00
\brief HostTensor contributes management for both host and device memory.
HostTensor allocates host and device memory upon construction. Basic element-wise operations on
host memory synchronize device memory automatically. Explicit copy operations provide abstractions
for CUDA memcpy operations.
Call device_{data, ref, view} for accessing device memory allocations.
See cutlass/tensor_ref.h, cutlass/tensor_view.h, and tools/util/host_tensor_view.h for more details.
2018-05-17 02:44:56 +08:00
*/
2018-09-19 07:58:03 +08:00
#include "cutlass/cutlass.h"
#include "cutlass/matrix_traits.h"
#include "cutlass/tensor_ref.h"
#include "tools/util/device_memory.h"
#include "tools/util/host_tensor_view.h"
#include "tools/util/type_traits.h"
2018-05-17 02:44:56 +08:00
#include <vector>
namespace cutlass {
2018-09-19 07:58:03 +08:00
///////////////////////////////////////////////////////////////////////////////////////////////////
/// Host tensor
template <
/// Scalar data type (may be mapped to compatible types for use on host and device)
typename T,
/// Rank of logical tensor
int Rank_ = 4,
/// Maps a Coord<Rank_> in the logical tensor index space to the internal n-D array
typename MapFunc_ = IdentityTensorMapFunc<Rank_>,
/// Rank of internal n-D array
int StorageRank_ = MapFunc_::kStorageRank,
/// Index type used for coordinates
typename Index_ = int,
/// Index type used for offsets and pointer differences
typename LongIndex_ = long long
>
class HostTensor : public HostTensorView<
typename TypeTraits<T>::host_type,
Rank_,
MapFunc_,
StorageRank_,
Index_,
LongIndex_> {
2018-05-17 02:44:56 +08:00
public:
2018-09-19 07:58:03 +08:00
/// Type used for host-side allocations
typedef typename TypeTraits<T>::host_type HostType;
2018-05-17 02:44:56 +08:00
/// Type used for device-side allocations
typedef typename TypeTraits<T>::device_type DeviceType;
/// Base class
2018-09-19 07:58:03 +08:00
typedef HostTensorView<
typename TypeTraits<T>::host_type,
Rank_,
MapFunc_,
StorageRank_,
Index_,
LongIndex_> Base;
/// Type used to compute the offset of an element to the base of a tensor
typedef LongIndex_ LongIndex;
2018-05-17 02:44:56 +08:00
2018-09-19 07:58:03 +08:00
/// Tensor reference to device memory
typedef typename cutlass::TensorRef<
DeviceType,
Rank_,
MapFunc_,
StorageRank_,
Index_,
LongIndex_> DeviceTensorRef;
2018-05-17 02:44:56 +08:00
2018-09-19 07:58:03 +08:00
/// Tensor reference to constant device memory
typedef typename DeviceTensorRef::ConstTensorRef ConstDeviceTensorRef;
2018-05-17 02:44:56 +08:00
2018-09-19 07:58:03 +08:00
/// TensorView to device memory
typedef TensorView<
DeviceType,
Rank_,
MapFunc_,
StorageRank_,
Index_,
LongIndex_> DeviceTensorView;
/// Tensor reference to constant device memory
typedef typename DeviceTensorView::ConstTensorView ConstDeviceTensorView;
2018-05-17 02:44:56 +08:00
/// Tensor reference to host memory
typedef typename Base::TensorRef_t TensorRef_t;
2018-05-17 02:44:56 +08:00
2018-09-19 07:58:03 +08:00
/// Tensor view to host memory
typedef HostTensorView<
2018-09-19 07:58:03 +08:00
typename TypeTraits<T>::host_type,
Rank_,
MapFunc_,
StorageRank_,
Index_,
LongIndex_> HostTensorView_t;
2018-05-17 02:44:56 +08:00
2018-09-19 07:58:03 +08:00
/// Tensor view to host memory
typedef typename HostTensorView_t::ConstTensorView ConstHostTensorView;
2018-09-19 07:58:03 +08:00
/// Coordinate in logical tensor space
typedef typename TensorRef_t::TensorCoord TensorCoord;
2018-09-19 07:58:03 +08:00
/// Coordinate in storage n-D array
typedef typename TensorRef_t::StorageCoord StorageCoord;
2018-05-17 02:44:56 +08:00
2018-09-19 07:58:03 +08:00
/// Stride vector in storage coordinate space
/// Least significant stride is = 1 and not stored
typedef typename TensorRef_t::StrideVector StrideVector;
2018-09-19 07:58:03 +08:00
/// Rank of internal storage.
static int const kStorageRank = Base::kStorageRank;
2018-05-17 02:44:56 +08:00
private:
2018-09-19 07:58:03 +08:00
2018-05-17 02:44:56 +08:00
/// Host-side memory allocation
2018-09-19 07:58:03 +08:00
std::vector<HostType> host_;
2018-05-17 02:44:56 +08:00
/// Device-side memory
cutlass::device_memory::allocation<DeviceType> device_;
public:
//
// Device and Host Methods
//
/// Default constructor
HostTensor() {}
2018-09-19 07:58:03 +08:00
/// Constructor for resizing the least significant rank
HostTensor(Index_ size_1D, bool device_backed = true) {
this->resize(size_1D, device_backed);
}
/// Helper to construct from pointer, stride, and size
HostTensor(
StorageCoord const &_stride,
TensorCoord const& _size,
bool _device_backed = true
) {
this->reset(_stride, _size);
}
2018-05-17 02:44:56 +08:00
2018-09-19 07:58:03 +08:00
/// Clears the HostTensor allocation to size/capacity = 0
void reset() {
host_.clear();
device_.reset();
Base::reset();
2018-05-17 02:44:56 +08:00
}
2018-09-19 07:58:03 +08:00
/// Helper to resize the least significant rank
void resize(
Index_ size_1D,
bool _device_backed = true) {
2018-05-17 02:44:56 +08:00
2018-09-19 07:58:03 +08:00
TensorCoord _size;
_size[Base::kRank - 1] = size_1D;
for (int i = 0; i < Base::kRank - 1; ++i) {
_size[i] = 1;
}
StorageCoord _stride;
_stride[Base::kStorageRank - 1] = 1;
for (int i = 0; i < Base::kStorageRank - 1; ++i) {
_stride[i] = size_1D;
}
this->reset(_stride, _size, _device_backed);
}
2018-05-17 02:44:56 +08:00
/// Updates the reference and size of a Tensor_view object
2018-09-19 07:58:03 +08:00
void reset(
StorageCoord const& stride,
TensorCoord const& size,
bool _device_backed = true) {
2018-05-17 02:44:56 +08:00
2018-09-19 07:58:03 +08:00
// Construct a temporary TensorView so we can calculate the new capacity
size_t _capacity = Base(nullptr, stride, size).capacity();
// Allocate memory
2018-05-17 02:44:56 +08:00
DeviceType* _device_memory = nullptr;
2018-09-19 07:58:03 +08:00
if (_device_backed) {
2018-05-17 02:44:56 +08:00
_device_memory = cutlass::device_memory::allocate<DeviceType>(_capacity);
}
host_.clear();
host_.resize(_capacity);
device_.reset(_device_memory, _capacity);
Base::reset(TensorRef_t(host_.data(), stride), size);
2018-05-17 02:44:56 +08:00
}
2018-09-19 07:58:03 +08:00
/// Accesses the tensor reference pointing to data
TensorRef_t host_ref() { return Base::ref(); }
2018-09-19 07:58:03 +08:00
/// Accesses the tensor reference pointing to data
TensorRef_t host_ref() const { return Base::ref(); }
2018-05-17 02:44:56 +08:00
2018-09-19 07:58:03 +08:00
/// Accesses the tensor reference pointing to data
DeviceTensorRef device_ref() const {
return DeviceTensorRef(device_data(), this->stride());
}
2018-05-17 02:44:56 +08:00
2018-09-19 07:58:03 +08:00
/// Accesses the tensor reference pointing to data
HostTensorView_t host_view() {
return HostTensorView_t(host_data(), this->stride(), this->size());
2018-09-19 07:58:03 +08:00
}
2018-05-17 02:44:56 +08:00
2018-09-19 07:58:03 +08:00
/// Accesses the tensor reference pointing to data
ConstHostTensorView host_view() const {
return HostTensorView_t(host_data(), this->stride(), this->size());
2018-05-17 02:44:56 +08:00
}
2018-09-19 07:58:03 +08:00
/// Accesses the tensor reference pointing to data
DeviceTensorView device_view() const {
return DeviceTensorView(device_data(), this->stride(), this->size());
}
2018-05-17 02:44:56 +08:00
/// Gets pointer to host data
2018-09-19 07:58:03 +08:00
HostType * host_data() { return host_.data(); }
/// Gets pointer to device data
DeviceType* device_data() { return device_.get(); }
2018-05-17 02:44:56 +08:00
/// Gets pointer to host data
2018-09-19 07:58:03 +08:00
HostType const * host_data() const { return host_.data(); }
2018-05-17 02:44:56 +08:00
/// Gets pointer to device data
2018-09-19 07:58:03 +08:00
DeviceType * device_data() const { return device_.get(); }
/// Returns true if device memory is allocated
bool device_backed() const {
return device_.get();
}
2018-05-17 02:44:56 +08:00
/// Copies data from device to host
void sync_host() {
2018-09-19 07:58:03 +08:00
if (device_.get()) {
2018-05-17 02:44:56 +08:00
device_memory::copy_to_host(
2018-09-19 07:58:03 +08:00
host_.data(), reinterpret_cast<HostType const*>(device_.get()), host_.size());
2018-05-17 02:44:56 +08:00
}
}
/// Copies data from host to device
void sync_device() {
2018-09-19 07:58:03 +08:00
if (device_.get()) {
2018-05-17 02:44:56 +08:00
device_memory::copy_to_device(
2018-09-19 07:58:03 +08:00
device_.get(),
reinterpret_cast<DeviceType const*>(host_.data()),
host_.size());
2018-05-17 02:44:56 +08:00
}
}
2018-09-19 07:58:03 +08:00
/// Copy data from a caller-supplied device pointer into host memory
void copy_to_host(DeviceType const* ptr_device) {
2018-05-17 02:44:56 +08:00
device_memory::copy_to_host(
2018-09-19 07:58:03 +08:00
host_.data(), reinterpret_cast<HostType const*>(ptr_device), host_.size());
2018-05-17 02:44:56 +08:00
}
2018-09-19 07:58:03 +08:00
/// Copies device-to-device
void copy_to_device(DeviceType* ptr_device) {
2018-05-17 02:44:56 +08:00
device_memory::copy_to_device(
2018-09-19 07:58:03 +08:00
ptr_device, reinterpret_cast<DeviceType const*>(host_.data()), host_.size());
2018-05-17 02:44:56 +08:00
}
/// Accumulate in place
2018-09-19 07:58:03 +08:00
template <typename SrcTensorView>
HostTensor& operator+=(SrcTensorView const& tensor) {
2018-05-17 02:44:56 +08:00
Base::operator+=(tensor);
sync_device();
return *this;
}
/// Subtract in place
2018-09-19 07:58:03 +08:00
template <typename SrcTensorView>
HostTensor& operator-=(SrcTensorView const& tensor) {
2018-05-17 02:44:56 +08:00
Base::operator-=(tensor);
sync_device();
return *this;
}
/// Multiply in place
2018-09-19 07:58:03 +08:00
template <typename SrcTensorView>
HostTensor& operator*=(SrcTensorView const& tensor) {
2018-05-17 02:44:56 +08:00
Base::operator*=(tensor);
sync_device();
return *this;
}
/// Divide in place
2018-09-19 07:58:03 +08:00
template <typename SrcTensorView>
HostTensor& operator/=(SrcTensorView const& tensor) {
2018-05-17 02:44:56 +08:00
Base::operator/=(tensor);
sync_device();
return *this;
}
/// Fills with random data
template <typename Gen>
void fill_random(Gen generator) {
Base::fill_random(generator);
sync_device();
}
/// Procedurally assigns elements
template <typename Gen>
void generate(Gen generator) {
Base::generate(generator);
sync_device();
}
/// Procedurally visits elements
template <typename Gen>
void visit(Gen& generator) const {
Base::visit(generator);
}
/// initializes with identity
void fill_identity() {
Base::fill_identity();
sync_device();
}
/// computes elements as a linear combination of their coordinates
2018-09-19 07:58:03 +08:00
void fill_linear(TensorCoord v, HostType offset = HostType(0)) {
2018-05-17 02:44:56 +08:00
Base::fill_linear(v, offset);
sync_device();
}
/// computes elements as a linear combination of their coordinates
2018-09-19 07:58:03 +08:00
void fill_sequential(HostType v = HostType(1), HostType offset = HostType(0)) {
2018-05-17 02:44:56 +08:00
Base::fill_sequential(v, offset);
sync_device();
}
/// fills with a value
2018-09-19 07:58:03 +08:00
void fill(HostType val = HostType(0)) {
2018-05-17 02:44:56 +08:00
Base::fill(val);
sync_device();
}
2018-09-19 07:58:03 +08:00
/// copies from external data source and performs type conversion
template <
typename SrcType,
typename SrcMapFunc_,
int SrcStorageRank_,
typename SrcIndex_,
typename SrcLongIndex_
>
void fill(
TensorView<SrcType, Base::kRank, SrcMapFunc_, SrcStorageRank_, SrcIndex_, SrcLongIndex_> const& tensor) {
2018-05-17 02:44:56 +08:00
Base::fill(tensor);
sync_device();
}
};
2018-09-19 07:58:03 +08:00
///////////////////////////////////////////////////////////////////////////////////////////////////
2018-05-17 02:44:56 +08:00
} // namespace cutlass