43 using data_store = std::variant<non_owning_buffer<device_type::cpu, T>,
48 buffer() : device_{}, data_{}, size_{}, cached_ptr{nullptr} {}
55 : device_{[mem_type, &
device]() {
63 data_{[
this, mem_type, size, stream]() {
64 auto result = data_store{};
66 case device_type::cpu: result = owning_buffer<device_type::cpu, T>{size};
break;
68 result = owning_buffer<device_type::gpu, T>{std::get<1>(device_), size, stream};
75 auto result =
static_cast<T*
>(
nullptr);
76 switch (data_.index()) {
77 case 0: result = std::get<0>(data_).get();
break;
78 case 1: result = std::get<1>(data_).get();
break;
79 case 2: result = std::get<2>(data_).get();
break;
80 case 3: result = std::get<3>(data_).get();
break;
89 : device_{[mem_type, &device]() {
97 data_{[input_data, mem_type]() {
98 auto result = data_store{};
100 case device_type::cpu: result = non_owning_buffer<device_type::cpu, T>{input_data};
break;
101 case device_type::gpu: result = non_owning_buffer<device_type::gpu, T>{input_data};
break;
106 cached_ptr{[
this]() {
107 auto result =
static_cast<T*
>(
nullptr);
108 switch (data_.index()) {
109 case 0: result = std::get<0>(data_).get();
break;
110 case 1: result = std::get<1>(data_).get();
break;
111 case 2: result = std::get<2>(data_).get();
break;
112 case 3: result = std::get<3>(data_).get();
break;
129 : device_{[mem_type, &device]() {
132 case device_type::cpu: result = device_id<device_type::cpu>{device};
break;
133 case device_type::gpu: result = device_id<device_type::gpu>{device};
break;
137 data_{[
this, &other, mem_type, stream]() {
138 auto result = data_store{};
139 auto result_data =
static_cast<T*
>(
nullptr);
140 if (mem_type == device_type::cpu) {
141 auto buf = owning_buffer<device_type::cpu, T>(other.
size());
142 result_data = buf.get();
143 result = std::move(buf);
144 }
else if (mem_type == device_type::gpu) {
145 auto buf = owning_buffer<device_type::gpu, T>(std::get<1>(device_), other.
size(), stream);
146 result_data = buf.get();
147 result = std::move(buf);
153 cached_ptr{[
this]() {
154 auto result =
static_cast<T*
>(
nullptr);
155 switch (data_.index()) {
156 case 0: result = std::get<0>(data_).get();
break;
157 case 1: result = std::get<1>(data_).get();
break;
158 case 2: result = std::get<2>(data_).get();
break;
159 case 3: result = std::get<3>(data_).get();
break;
182 swap(first.device_, second.device_);
183 swap(first.data_, second.data_);
184 swap(first.size_, second.size_);
185 swap(first.cached_ptr, second.cached_ptr);
199 : device_{[mem_type, &device]() {
207 data_{[&other, mem_type, device, stream]() {
208 auto result = data_store{};
210 result = std::move(other.data_);
212 auto* result_data =
static_cast<T*
>(
nullptr);
213 if (mem_type == device_type::cpu) {
214 auto buf = owning_buffer<device_type::cpu, T>{other.
size()};
215 result_data = buf.get();
216 result = std::move(buf);
217 }
else if (mem_type == device_type::gpu) {
218 auto buf = owning_buffer<device_type::gpu, T>{device, other.
size(), stream};
219 result_data = buf.get();
220 result = std::move(buf);
227 cached_ptr{[
this]() {
228 auto result =
static_cast<T*
>(
nullptr);
229 switch (data_.index()) {
230 case 0: result = std::get<0>(data_).get();
break;
231 case 1: result = std::get<1>(data_).get();
break;
232 case 2: result = std::get<2>(data_).get();
break;
233 case 3: result = std::get<3>(data_).get();
break;
254 data_ = std::move(other.data_);
255 device_ = std::move(other.device_);
256 size_ = std::move(other.size_);
257 cached_ptr = std::move(other.cached_ptr);
263 typename = decltype(*std::declval<iter_t&>(),
void(), ++std::declval<iter_t&>(),
void())>
264 buffer(iter_t
const& begin, iter_t
const& end)
265 :
buffer{static_cast<size_t>(std::distance(begin, end))}
267 auto index = std::size_t{};
268 std::for_each(begin, end, [&index,
this](
auto&& val) { data()[index++] = val; });
273 typename = decltype(*std::declval<iter_t&>(),
void(), ++std::declval<iter_t&>(),
void())>
281 typename = decltype(*std::declval<iter_t&>(),
void(), ++std::declval<iter_t&>(),
void())>
291 auto size() const noexcept {
return size_; }
296 if (device_.index() == 0) {
297 result = device_type::cpu;
299 result = device_type::gpu;
304 auto device() const noexcept {
return device_; }
309 switch (device_.index()) {
310 case 0: result = std::get<0>(device_).value();
break;
311 case 1: result = std::get<1>(device_).value();
break;
324 template <
bool bounds_check,
typename T,
typename U>
332 if constexpr (bounds_check) {
333 if (src.
size() - src_offset < size || dst.
size() - dst_offset < size) {
334 throw out_of_bounds(
"Attempted copy to or from buffer of inadequate size");
338 src.
data() + src_offset,
345 template <
bool bounds_check,
typename T,
typename U>
348 copy<bounds_check>(dst, src, 0, 0, src.
size(), stream);
350 template <
bool bounds_check,
typename T,
typename U>
356 template <
bool bounds_check,
typename T,
typename U>
364 if constexpr (bounds_check) {
365 if (src.size() - src_offset < size || dst.
size() - dst_offset < size) {
366 throw out_of_bounds(
"Attempted copy to or from buffer of inadequate size");
369 copy(dst.data() + dst_offset,
370 src.
data() + src_offset,
377 template <
bool bounds_check,
typename T,
typename U>
383 copy<bounds_check>(dst, src, dst_offset, 0, src.
size(), stream);
386 template <
bool bounds_check,
typename T,
typename U>
389 copy<bounds_check>(dst, src, 0, 0, src.size(), stream);
391 template <
bool bounds_check,
typename T,
typename U>
394 copy<bounds_check>(dst, src, 0, 0, src.size(),
cuda_stream{});
#define DEVICE
Definition: gpu_support.hpp:34
#define HOST
Definition: gpu_support.hpp:33
Definition: buffer.hpp:33
const_agnostic_same_t< T, U > copy(buffer< T > &&dst, buffer< U > &&src)
Definition: buffer.hpp:392
int cuda_stream
Definition: cuda_stream.hpp:25
std::enable_if_t< std::is_same_v< std::remove_const_t< T >, std::remove_const_t< U > >, V > const_agnostic_same_t
Definition: const_agnostic.hpp:22
device_type
Definition: device_type.hpp:18
std::variant< device_id< device_type::cpu >, device_id< device_type::gpu > > device_id_variant
Definition: device_id.hpp:30
A container which may or may not own its own data on host or device.
Definition: buffer.hpp:39
T value_type
Definition: buffer.hpp:41
buffer(iter_t const &begin, iter_t const &end, device_type mem_type, int device, cuda_stream stream=cuda_stream{})
Definition: buffer.hpp:282
buffer(buffer< T > const &other, device_type mem_type, int device=0, cuda_stream stream=cuda_stream{})
Construct one buffer from another in the given memory location (either on host or on device) A buffer...
Definition: buffer.hpp:125
buffer(iter_t const &begin, iter_t const &end)
Definition: buffer.hpp:264
std::variant< non_owning_buffer< device_type::cpu, T >, non_owning_buffer< device_type::gpu, T >, owning_buffer< device_type::cpu, T >, owning_buffer< device_type::gpu, T > > data_store
Definition: buffer.hpp:46
buffer(buffer< T > const &other, cuda_stream stream=cuda_stream{})
Create owning copy of existing buffer with given stream The memory type of this new buffer will be th...
Definition: buffer.hpp:170
buffer()
Definition: buffer.hpp:48
auto size() const noexcept
Definition: buffer.hpp:291
buffer(buffer< T > &&other, device_type mem_type, int device)
Definition: buffer.hpp:239
friend void swap(buffer< T > &first, buffer< T > &second)
Create owning copy of existing buffer The memory type of this new buffer will be the same as the orig...
Definition: buffer.hpp:179
buffer(index_type size, device_type mem_type=device_type::cpu, int device=0, cuda_stream stream=0)
Definition: buffer.hpp:51
buffer< T > & operator=(buffer< T > &&other) noexcept
Definition: buffer.hpp:252
HOST DEVICE auto * data() const noexcept
Definition: buffer.hpp:292
buffer(iter_t const &begin, iter_t const &end, device_type mem_type)
Definition: buffer.hpp:274
std::size_t index_type
Definition: buffer.hpp:40
auto memory_type() const noexcept
Definition: buffer.hpp:293
buffer(T *input_data, index_type size, device_type mem_type=device_type::cpu, int device=0)
Definition: buffer.hpp:88
buffer< T > & operator=(buffer< T > const &other)
Definition: buffer.hpp:187
buffer(buffer< T > &&other) noexcept
Definition: buffer.hpp:248
buffer(buffer< T > &&other, device_type mem_type)
Definition: buffer.hpp:243
auto device_index() const noexcept
Definition: buffer.hpp:306
auto device() const noexcept
Definition: buffer.hpp:304
buffer(buffer< T > &&other, device_type mem_type, int device, cuda_stream stream)
Move from existing buffer unless a copy is necessary based on memory location.
Definition: buffer.hpp:198
Definition: exceptions.hpp:29