Loading [MathJax]/extensions/tex2jax.js
cuML C++ API  23.12
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
buffer.hpp
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2023, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #pragma once
17 #include <cstddef>
27 #include <iterator>
28 #include <memory>
29 #include <stdint.h>
30 #include <utility>
31 #include <variant>
32 
33 namespace raft_proto {
38 template <typename T>
39 struct buffer {
40  using index_type = std::size_t;
41  using value_type = T;
42 
43  using data_store = std::variant<non_owning_buffer<device_type::cpu, T>,
47 
48  buffer() : device_{}, data_{}, size_{}, cached_ptr{nullptr} {}
49 
52  device_type mem_type = device_type::cpu,
53  int device = 0,
54  cuda_stream stream = 0)
55  : device_{[mem_type, &device]() {
56  auto result = device_id_variant{};
57  switch (mem_type) {
60  }
61  return result;
62  }()},
63  data_{[this, mem_type, size, stream]() {
64  auto result = data_store{};
65  switch (mem_type) {
66  case device_type::cpu: result = owning_buffer<device_type::cpu, T>{size}; break;
67  case device_type::gpu:
68  result = owning_buffer<device_type::gpu, T>{std::get<1>(device_), size, stream};
69  break;
70  }
71  return result;
72  }()},
73  size_{size},
74  cached_ptr{[this]() {
75  auto result = static_cast<T*>(nullptr);
76  switch (data_.index()) {
77  case 0: result = std::get<0>(data_).get(); break;
78  case 1: result = std::get<1>(data_).get(); break;
79  case 2: result = std::get<2>(data_).get(); break;
80  case 3: result = std::get<3>(data_).get(); break;
81  }
82  return result;
83  }()}
84  {
85  }
86 
88  buffer(T* input_data, index_type size, device_type mem_type = device_type::cpu, int device = 0)
89  : device_{[mem_type, &device]() {
90  auto result = device_id_variant{};
91  switch (mem_type) {
92  case device_type::cpu: result = device_id<device_type::cpu>{device}; break;
93  case device_type::gpu: result = device_id<device_type::gpu>{device}; break;
94  }
95  return result;
96  }()},
97  data_{[input_data, mem_type]() {
98  auto result = data_store{};
99  switch (mem_type) {
100  case device_type::cpu: result = non_owning_buffer<device_type::cpu, T>{input_data}; break;
101  case device_type::gpu: result = non_owning_buffer<device_type::gpu, T>{input_data}; break;
102  }
103  return result;
104  }()},
105  size_{size},
106  cached_ptr{[this]() {
107  auto result = static_cast<T*>(nullptr);
108  switch (data_.index()) {
109  case 0: result = std::get<0>(data_).get(); break;
110  case 1: result = std::get<1>(data_).get(); break;
111  case 2: result = std::get<2>(data_).get(); break;
112  case 3: result = std::get<3>(data_).get(); break;
113  }
114  return result;
115  }()}
116  {
117  }
118 
125  buffer(buffer<T> const& other,
126  device_type mem_type,
127  int device = 0,
128  cuda_stream stream = cuda_stream{})
129  : device_{[mem_type, &device]() {
130  auto result = device_id_variant{};
131  switch (mem_type) {
132  case device_type::cpu: result = device_id<device_type::cpu>{device}; break;
133  case device_type::gpu: result = device_id<device_type::gpu>{device}; break;
134  }
135  return result;
136  }()},
137  data_{[this, &other, mem_type, stream]() {
138  auto result = data_store{};
139  auto result_data = static_cast<T*>(nullptr);
140  if (mem_type == device_type::cpu) {
141  auto buf = owning_buffer<device_type::cpu, T>(other.size());
142  result_data = buf.get();
143  result = std::move(buf);
144  } else if (mem_type == device_type::gpu) {
145  auto buf = owning_buffer<device_type::gpu, T>(std::get<1>(device_), other.size(), stream);
146  result_data = buf.get();
147  result = std::move(buf);
148  }
149  copy(result_data, other.data(), other.size(), mem_type, other.memory_type(), stream);
150  return result;
151  }()},
152  size_{other.size()},
153  cached_ptr{[this]() {
154  auto result = static_cast<T*>(nullptr);
155  switch (data_.index()) {
156  case 0: result = std::get<0>(data_).get(); break;
157  case 1: result = std::get<1>(data_).get(); break;
158  case 2: result = std::get<2>(data_).get(); break;
159  case 3: result = std::get<3>(data_).get(); break;
160  }
161  return result;
162  }()}
163  {
164  }
165 
170  buffer(buffer<T> const& other, cuda_stream stream = cuda_stream{})
171  : buffer(other, other.memory_type(), other.device_index(), stream)
172  {
173  }
174 
179  friend void swap(buffer<T>& first, buffer<T>& second)
180  {
181  using std::swap;
182  swap(first.device_, second.device_);
183  swap(first.data_, second.data_);
184  swap(first.size_, second.size_);
185  swap(first.cached_ptr, second.cached_ptr);
186  }
188  {
189  auto copy = other;
190  swap(*this, copy);
191  return *this;
192  }
193 
198  buffer(buffer<T>&& other, device_type mem_type, int device, cuda_stream stream)
199  : device_{[mem_type, &device]() {
200  auto result = device_id_variant{};
201  switch (mem_type) {
202  case device_type::cpu: result = device_id<device_type::cpu>{device}; break;
203  case device_type::gpu: result = device_id<device_type::gpu>{device}; break;
204  }
205  return result;
206  }()},
207  data_{[&other, mem_type, device, stream]() {
208  auto result = data_store{};
209  if (mem_type == other.memory_type() && device == other.device_index()) {
210  result = std::move(other.data_);
211  } else {
212  auto* result_data = static_cast<T*>(nullptr);
213  if (mem_type == device_type::cpu) {
214  auto buf = owning_buffer<device_type::cpu, T>{other.size()};
215  result_data = buf.get();
216  result = std::move(buf);
217  } else if (mem_type == device_type::gpu) {
218  auto buf = owning_buffer<device_type::gpu, T>{device, other.size(), stream};
219  result_data = buf.get();
220  result = std::move(buf);
221  }
222  copy(result_data, other.data(), other.size(), mem_type, other.memory_type(), stream);
223  }
224  return result;
225  }()},
226  size_{other.size()},
227  cached_ptr{[this]() {
228  auto result = static_cast<T*>(nullptr);
229  switch (data_.index()) {
230  case 0: result = std::get<0>(data_).get(); break;
231  case 1: result = std::get<1>(data_).get(); break;
232  case 2: result = std::get<2>(data_).get(); break;
233  case 3: result = std::get<3>(data_).get(); break;
234  }
235  return result;
236  }()}
237  {
238  }
239  buffer(buffer<T>&& other, device_type mem_type, int device)
240  : buffer{std::move(other), mem_type, device, cuda_stream{}}
241  {
242  }
243  buffer(buffer<T>&& other, device_type mem_type)
244  : buffer{std::move(other), mem_type, 0, cuda_stream{}}
245  {
246  }
247 
248  buffer(buffer<T>&& other) noexcept
249  : buffer{std::move(other), other.memory_type(), other.device_index(), cuda_stream{}}
250  {
251  }
252  buffer<T>& operator=(buffer<T>&& other) noexcept
253  {
254  data_ = std::move(other.data_);
255  device_ = std::move(other.device_);
256  size_ = std::move(other.size_);
257  cached_ptr = std::move(other.cached_ptr);
258  return *this;
259  }
260 
261  template <
262  typename iter_t,
263  typename = decltype(*std::declval<iter_t&>(), void(), ++std::declval<iter_t&>(), void())>
264  buffer(iter_t const& begin, iter_t const& end)
265  : buffer{static_cast<size_t>(std::distance(begin, end))}
266  {
267  auto index = std::size_t{};
268  std::for_each(begin, end, [&index, this](auto&& val) { data()[index++] = val; });
269  }
270 
271  template <
272  typename iter_t,
273  typename = decltype(*std::declval<iter_t&>(), void(), ++std::declval<iter_t&>(), void())>
274  buffer(iter_t const& begin, iter_t const& end, device_type mem_type)
275  : buffer{buffer{begin, end}, mem_type}
276  {
277  }
278 
279  template <
280  typename iter_t,
281  typename = decltype(*std::declval<iter_t&>(), void(), ++std::declval<iter_t&>(), void())>
282  buffer(iter_t const& begin,
283  iter_t const& end,
284  device_type mem_type,
285  int device,
286  cuda_stream stream = cuda_stream{})
287  : buffer{buffer{begin, end}, mem_type, device, stream}
288  {
289  }
290 
291  auto size() const noexcept { return size_; }
292  HOST DEVICE auto* data() const noexcept { return cached_ptr; }
293  auto memory_type() const noexcept
294  {
295  auto result = device_type{};
296  if (device_.index() == 0) {
297  result = device_type::cpu;
298  } else {
299  result = device_type::gpu;
300  }
301  return result;
302  }
303 
304  auto device() const noexcept { return device_; }
305 
306  auto device_index() const noexcept
307  {
308  auto result = int{};
309  switch (device_.index()) {
310  case 0: result = std::get<0>(device_).value(); break;
311  case 1: result = std::get<1>(device_).value(); break;
312  }
313  return result;
314  }
315  ~buffer() = default;
316 
317  private:
318  device_id_variant device_;
319  data_store data_;
320  index_type size_;
321  T* cached_ptr;
322 };
323 
324 template <bool bounds_check, typename T, typename U>
326  buffer<U> const& src,
327  typename buffer<T>::index_type dst_offset,
328  typename buffer<U>::index_type src_offset,
329  typename buffer<T>::index_type size,
330  cuda_stream stream)
331 {
332  if constexpr (bounds_check) {
333  if (src.size() - src_offset < size || dst.size() - dst_offset < size) {
334  throw out_of_bounds("Attempted copy to or from buffer of inadequate size");
335  }
336  }
337  copy(dst.data() + dst_offset,
338  src.data() + src_offset,
339  size,
340  dst.memory_type(),
341  src.memory_type(),
342  stream);
343 }
344 
345 template <bool bounds_check, typename T, typename U>
347 {
348  copy<bounds_check>(dst, src, 0, 0, src.size(), stream);
349 }
350 template <bool bounds_check, typename T, typename U>
352 {
353  copy<bounds_check>(dst, src, 0, 0, src.size(), cuda_stream{});
354 }
355 
356 template <bool bounds_check, typename T, typename U>
358  buffer<U>&& src,
359  typename buffer<T>::index_type dst_offset,
360  typename buffer<U>::index_type src_offset,
361  typename buffer<T>::index_type size,
362  cuda_stream stream)
363 {
364  if constexpr (bounds_check) {
365  if (src.size() - src_offset < size || dst.size() - dst_offset < size) {
366  throw out_of_bounds("Attempted copy to or from buffer of inadequate size");
367  }
368  }
369  copy(dst.data() + dst_offset,
370  src.data() + src_offset,
371  size,
372  dst.memory_type(),
373  src.memory_type(),
374  stream);
375 }
376 
377 template <bool bounds_check, typename T, typename U>
379  buffer<U>&& src,
380  typename buffer<T>::index_type dst_offset,
381  cuda_stream stream)
382 {
383  copy<bounds_check>(dst, src, dst_offset, 0, src.size(), stream);
384 }
385 
386 template <bool bounds_check, typename T, typename U>
388 {
389  copy<bounds_check>(dst, src, 0, 0, src.size(), stream);
390 }
391 template <bool bounds_check, typename T, typename U>
393 {
394  copy<bounds_check>(dst, src, 0, 0, src.size(), cuda_stream{});
395 }
396 
397 } // namespace raft_proto
#define DEVICE
Definition: gpu_support.hpp:34
#define HOST
Definition: gpu_support.hpp:33
Definition: buffer.hpp:33
const_agnostic_same_t< T, U > copy(buffer< T > &&dst, buffer< U > &&src)
Definition: buffer.hpp:392
int cuda_stream
Definition: cuda_stream.hpp:25
std::enable_if_t< std::is_same_v< std::remove_const_t< T >, std::remove_const_t< U > >, V > const_agnostic_same_t
Definition: const_agnostic.hpp:22
device_type
Definition: device_type.hpp:18
std::variant< device_id< device_type::cpu >, device_id< device_type::gpu > > device_id_variant
Definition: device_id.hpp:30
A container which may or may not own its own data on host or device.
Definition: buffer.hpp:39
T value_type
Definition: buffer.hpp:41
buffer(iter_t const &begin, iter_t const &end, device_type mem_type, int device, cuda_stream stream=cuda_stream{})
Definition: buffer.hpp:282
buffer(buffer< T > const &other, device_type mem_type, int device=0, cuda_stream stream=cuda_stream{})
Construct one buffer from another in the given memory location (either on host or on device) A buffer...
Definition: buffer.hpp:125
buffer(iter_t const &begin, iter_t const &end)
Definition: buffer.hpp:264
std::variant< non_owning_buffer< device_type::cpu, T >, non_owning_buffer< device_type::gpu, T >, owning_buffer< device_type::cpu, T >, owning_buffer< device_type::gpu, T > > data_store
Definition: buffer.hpp:46
buffer(buffer< T > const &other, cuda_stream stream=cuda_stream{})
Create owning copy of existing buffer with given stream The memory type of this new buffer will be th...
Definition: buffer.hpp:170
buffer()
Definition: buffer.hpp:48
auto size() const noexcept
Definition: buffer.hpp:291
buffer(buffer< T > &&other, device_type mem_type, int device)
Definition: buffer.hpp:239
friend void swap(buffer< T > &first, buffer< T > &second)
Create owning copy of existing buffer The memory type of this new buffer will be the same as the orig...
Definition: buffer.hpp:179
buffer(index_type size, device_type mem_type=device_type::cpu, int device=0, cuda_stream stream=0)
Definition: buffer.hpp:51
buffer< T > & operator=(buffer< T > &&other) noexcept
Definition: buffer.hpp:252
HOST DEVICE auto * data() const noexcept
Definition: buffer.hpp:292
buffer(iter_t const &begin, iter_t const &end, device_type mem_type)
Definition: buffer.hpp:274
std::size_t index_type
Definition: buffer.hpp:40
auto memory_type() const noexcept
Definition: buffer.hpp:293
buffer(T *input_data, index_type size, device_type mem_type=device_type::cpu, int device=0)
Definition: buffer.hpp:88
buffer< T > & operator=(buffer< T > const &other)
Definition: buffer.hpp:187
buffer(buffer< T > &&other) noexcept
Definition: buffer.hpp:248
buffer(buffer< T > &&other, device_type mem_type)
Definition: buffer.hpp:243
auto device_index() const noexcept
Definition: buffer.hpp:306
auto device() const noexcept
Definition: buffer.hpp:304
buffer(buffer< T > &&other, device_type mem_type, int device, cuda_stream stream)
Move from existing buffer unless a copy is necessary based on memory location.
Definition: buffer.hpp:198
Definition: base.hpp:22
Definition: base.hpp:26
Definition: exceptions.hpp:29