19 #include <rmm/detail/aligned.hpp>
20 #include <rmm/detail/cuda_util.hpp>
21 #include <rmm/detail/error.hpp>
22 #include <rmm/detail/logging_assert.hpp>
23 #include <rmm/logger.hpp>
24 #include <rmm/mr/device/detail/coalescing_free_list.hpp>
25 #include <rmm/mr/device/detail/stream_ordered_memory_resource.hpp>
28 #include <rmm/detail/thrust_namespace.h>
29 #include <thrust/iterator/counting_iterator.h>
30 #include <thrust/iterator/transform_iterator.h>
31 #include <thrust/optional.h>
35 #include <cuda_runtime_api.h>
45 #include <unordered_map>
66 template <
class PoolResource,
class Upstream,
class Property,
class =
void>
72 template <
class PoolResource,
class Upstream,
class Property>
76 cuda::std::enable_if_t<!cuda::has_property<Upstream, Property>>> {
78 #pragma GCC diagnostic push
79 #pragma GCC diagnostic ignored "-Wnon-template-friend"
85 friend void get_property(const PoolResource&, Property) = delete;
87 #pragma GCC diagnostic pop
102 template <
typename Upstream>
105 maybe_remove_property<pool_memory_resource<Upstream>, Upstream, cuda::mr::device_accessible>,
106 public detail::stream_ordered_memory_resource<pool_memory_resource<Upstream>,
107 detail::coalescing_free_list>,
108 public cuda::forward_property<pool_memory_resource<Upstream>, Upstream> {
111 detail::coalescing_free_list>;
130 thrust::optional<std::size_t> initial_pool_size = thrust::nullopt,
131 thrust::optional<std::size_t> maximum_pool_size = thrust::nullopt)
132 : upstream_mr_{[upstream_mr]() {
133 RMM_EXPECTS(
nullptr != upstream_mr,
"Unexpected null upstream pointer.");
137 RMM_EXPECTS(rmm::detail::is_aligned(initial_pool_size.value_or(0),
138 rmm::detail::CUDA_ALLOCATION_ALIGNMENT),
139 "Error, Initial pool size required to be a multiple of 256 bytes");
140 RMM_EXPECTS(rmm::detail::is_aligned(maximum_pool_size.value_or(0),
141 rmm::detail::CUDA_ALLOCATION_ALIGNMENT),
142 "Error, Maximum pool size required to be a multiple of 256 bytes");
163 template <
typename Upstream2 = Upstream,
164 cuda::std::enable_if_t<cuda::mr::async_resource<Upstream2>,
int> = 0>
166 thrust::optional<std::size_t> initial_pool_size = thrust::nullopt,
167 thrust::optional<std::size_t> maximum_pool_size = thrust::nullopt)
220 [[nodiscard]] std::size_t
pool_size() const noexcept {
return current_pool_size_; }
225 using typename detail::stream_ordered_memory_resource<pool_memory_resource<Upstream>,
226 detail::coalescing_free_list>::split_block;
239 return std::numeric_limits<std::size_t>::max();
259 while (try_size >= min_size) {
261 if (block.has_value()) {
262 current_pool_size_ += block.value().size();
263 return block.value();
265 if (try_size == min_size) {
268 try_size = std::max(min_size, try_size / 2);
270 RMM_LOG_ERROR(
"[A][Stream {}][Upstream {}B][FAILURE maximum pool size exceeded]",
271 fmt::ptr(stream.
value()),
290 thrust::optional<std::size_t> maximum_size)
292 auto const try_size = [&]() {
293 if (not initial_size.has_value()) {
294 auto const [free, total] = (
get_upstream()->supports_get_mem_info())
296 : rmm::detail::available_device_memory();
297 return rmm::detail::align_up(std::min(free, total / 2),
298 rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
300 return initial_size.value();
303 current_pool_size_ = 0;
304 maximum_pool_size_ = maximum_size;
306 RMM_EXPECTS(try_size <= maximum_pool_size_.value_or(std::numeric_limits<std::size_t>::max()),
307 "Initial pool size exceeds the maximum pool size!");
310 auto const block =
try_to_expand(try_size, try_size, cuda_stream_legacy);
311 this->insert_block(block, cuda_stream_legacy);
347 if (maximum_pool_size_.has_value()) {
348 auto const unaligned_remaining = maximum_pool_size_.value() -
pool_size();
349 using rmm::detail::align_up;
350 auto const remaining = align_up(unaligned_remaining, rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
351 auto const aligned_size = align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT);
352 return (aligned_size <= remaining) ? std::max(aligned_size, remaining / 2) : 0;
366 RMM_LOG_DEBUG(
"[A][Stream {}][Upstream {}B]", fmt::ptr(stream.
value()), size);
368 if (size == 0) {
return {}; }
371 void* ptr =
get_upstream()->allocate_async(size, stream);
372 return thrust::optional<block_type>{
373 *upstream_blocks_.emplace(
static_cast<char*
>(ptr), size,
true).first};
374 }
catch (std::exception
const& e) {
375 return thrust::nullopt;
391 block_type const alloc{block.pointer(), size, block.is_head()};
392 #ifdef RMM_POOL_TRACK_ALLOCATIONS
393 allocated_blocks_.insert(alloc);
396 auto rest = (block.size() > size)
398 ?
block_type{block.pointer() + size, block.size() - size,
false}
400 return {alloc, rest};
413 #ifdef RMM_POOL_TRACK_ALLOCATIONS
415 auto const iter = allocated_blocks_.find(
static_cast<char*
>(ptr));
416 RMM_LOGGING_ASSERT(iter != allocated_blocks_.end());
419 RMM_LOGGING_ASSERT(block.size() == rmm::detail::align_up(size, allocation_alignment));
420 allocated_blocks_.erase(iter);
424 auto const iter = upstream_blocks_.find(
static_cast<char*
>(ptr));
425 return block_type{
static_cast<char*
>(ptr), size, (iter != upstream_blocks_.end())};
437 for (
auto block : upstream_blocks_) {
438 get_upstream()->deallocate(block.pointer(), block.size());
440 upstream_blocks_.clear();
441 #ifdef RMM_POOL_TRACK_ALLOCATIONS
442 allocated_blocks_.clear();
445 current_pool_size_ = 0;
448 #ifdef RMM_DEBUG_PRINT
459 auto const [free, total] = upstream_mr_->get_mem_info(rmm::cuda_stream_default);
460 std::cout <<
"GPU free memory: " << free <<
" total: " << total <<
"\n";
462 std::cout <<
"upstream_blocks: " << upstream_blocks_.size() <<
"\n";
463 std::size_t upstream_total{0};
465 for (
auto blocks : upstream_blocks_) {
467 upstream_total += blocks.size();
469 std::cout <<
"total upstream: " << upstream_total <<
" B\n";
471 #ifdef RMM_POOL_TRACK_ALLOCATIONS
472 std::cout <<
"allocated_blocks: " << allocated_blocks_.size() <<
"\n";
473 for (
auto block : allocated_blocks_)
477 this->print_free_blocks();
491 std::size_t largest{};
493 std::for_each(blocks.cbegin(), blocks.cend(), [&largest, &total](
auto const& block) {
494 total += block.size();
495 largest = std::max(largest, block.size());
497 return {largest, total};
516 Upstream* upstream_mr_;
517 std::size_t current_pool_size_{};
518 thrust::optional<std::size_t> maximum_pool_size_{};
520 #ifdef RMM_POOL_TRACK_ALLOCATIONS
521 std::set<block_type, rmm::mr::detail::compare_blocks<block_type>> allocated_blocks_;
525 std::set<block_type, rmm::mr::detail::compare_blocks<block_type>> upstream_blocks_;
Strongly-typed non-owning wrapper for CUDA streams with default constructor.
Definition: cuda_stream_view.hpp:41
constexpr cudaStream_t value() const noexcept
Get the wrapped stream.
Definition: cuda_stream_view.hpp:75
A coalescing best-fit suballocator which uses a pool of memory allocated from an upstream memory_reso...
Definition: pool_memory_resource.hpp:108
void initialize_pool(thrust::optional< std::size_t > initial_size, thrust::optional< std::size_t > maximum_size)
Allocate initial memory for the pool.
Definition: pool_memory_resource.hpp:289
block_type free_block(void *ptr, std::size_t size) noexcept
Finds, frees and returns the block associated with pointer ptr.
Definition: pool_memory_resource.hpp:411
split_block allocate_from_block(block_type const &block, std::size_t size)
Splits block if necessary to return a pointer to memory of size bytes.
Definition: pool_memory_resource.hpp:389
std::size_t size_to_grow(std::size_t size) const
Given a minimum size, computes an appropriate size to grow the pool.
Definition: pool_memory_resource.hpp:345
bool supports_streams() const noexcept override
Queries whether the resource supports use of non-null CUDA streams for allocation/deallocation.
Definition: pool_memory_resource.hpp:190
const Upstream & upstream_resource() const noexcept
Get the upstream memory_resource object.
Definition: pool_memory_resource.hpp:204
thrust::optional< block_type > block_from_upstream(std::size_t size, cuda_stream_view stream)
Allocate a block from upstream to expand the suballocation pool.
Definition: pool_memory_resource.hpp:364
free_list::block_type block_type
The type of block returned by the free list.
Definition: pool_memory_resource.hpp:224
std::pair< std::size_t, std::size_t > free_list_summary(free_list const &blocks)
Get the largest available block size and total free size in the specified free list.
Definition: pool_memory_resource.hpp:489
std::pair< std::size_t, std::size_t > do_get_mem_info(cuda_stream_view stream) const override
Get free and available memory for memory resource.
Definition: pool_memory_resource.hpp:508
std::size_t get_maximum_allocation_size() const
Get the maximum size of allocations supported by this memory resource.
Definition: pool_memory_resource.hpp:237
block_type expand_pool(std::size_t size, free_list &blocks, cuda_stream_view stream)
Allocate space from upstream to supply the suballocation pool and return a sufficiently sized block.
Definition: pool_memory_resource.hpp:324
bool supports_get_mem_info() const noexcept override
Query whether the resource supports the get_mem_info API.
Definition: pool_memory_resource.hpp:197
Upstream * get_upstream() const noexcept
Get the upstream memory_resource object.
Definition: pool_memory_resource.hpp:211
void release()
Free all memory allocated from the upstream memory_resource.
Definition: pool_memory_resource.hpp:433
block_type try_to_expand(std::size_t try_size, std::size_t min_size, cuda_stream_view stream)
Try to expand the pool by allocating a block of at least min_size bytes from upstream.
Definition: pool_memory_resource.hpp:257
std::lock_guard< std::mutex > lock_guard
Type of lock used to synchronize access.
Definition: pool_memory_resource.hpp:227
std::size_t pool_size() const noexcept
Computes the size of the current pool.
Definition: pool_memory_resource.hpp:220
pool_memory_resource(Upstream2 &upstream_mr, thrust::optional< std::size_t > initial_pool_size=thrust::nullopt, thrust::optional< std::size_t > maximum_pool_size=thrust::nullopt)
Construct a pool_memory_resource and allocate the initial device memory pool using upstream_mr.
Definition: pool_memory_resource.hpp:165
~pool_memory_resource() override
Destroy the pool_memory_resource and deallocate all memory it allocated using the upstream resource.
Definition: pool_memory_resource.hpp:176
detail::coalescing_free_list free_list
The free list implementation.
Definition: pool_memory_resource.hpp:223
Exception thrown when RMM runs out of memory.
Definition: error.hpp:89
A helper class to remove the device_accessible property.
Definition: pool_memory_resource.hpp:67