17 #include <cuda_runtime_api.h>
25 namespace experimental {
32 auto thread_local cache = std::vector<int>{};
33 if (cache.size() == 0) {
34 auto device_count =
int{};
36 cache.resize(device_count);
37 for (
auto dev = 0; dev < device_count; ++dev) {
39 cudaDeviceGetAttribute(&(cache[dev]), cudaDevAttrMaxSharedMemoryPerBlockOptin, dev));
47 auto thread_local cache = std::vector<int>{};
48 if (cache.size() == 0) {
49 auto device_count =
int{};
51 cache.resize(device_count);
52 for (
auto dev = 0; dev < device_count; ++dev) {
54 cudaDeviceGetAttribute(&(cache[dev]), cudaDevAttrMultiProcessorCount, dev));
64 cudaDeviceGetAttribute(&result, cudaDevAttrMaxThreadsPerMultiProcessor,
device_id.value()));
70 auto thread_local cache = std::vector<int>{};
71 if (cache.size() == 0) {
72 auto device_count =
int{};
74 cache.resize(device_count);
75 for (
auto dev = 0; dev < device_count; ++dev) {
77 cudaDeviceGetAttribute(&(cache[dev]), cudaDevAttrMaxSharedMemoryPerMultiprocessor, dev));
87 cudaDeviceGetAttribute(&result, cudaDevAttrMemoryClockRate,
device_id.value()));
99 auto constexpr
static const MAX_READ_CHUNK =
index_type{128};
100 auto constexpr
static const MAX_BLOCKS =
index_type{65536};
101 auto constexpr
static const WARP_SIZE =
index_type{32};
102 auto constexpr
static const MAX_THREADS_PER_BLOCK =
index_type{256};
104 #if __CUDA_ARCH__ == 720 || __CUDA_ARCH__ == 750 || __CUDA_ARCH__ == 860 || __CUDA_ARCH__ == 870
105 auto constexpr
static const MAX_THREADS_PER_SM =
index_type{1024};
107 auto constexpr
static const MAX_THREADS_PER_SM =
index_type{2048};
110 auto constexpr
static const MAX_THREADS_PER_SM =
index_type{2048};
113 auto constexpr
static const MIN_BLOCKS_PER_SM = MAX_THREADS_PER_SM / MAX_THREADS_PER_BLOCK;
auto get_mem_clock_rate(raft_proto::device_id< raft_proto::device_type::gpu > device_id)
Definition: gpu_introspection.hpp:83
auto get_sm_count(raft_proto::device_id< raft_proto::device_type::gpu > device_id)
Definition: gpu_introspection.hpp:45
auto get_core_clock_rate(raft_proto::device_id< raft_proto::device_type::gpu > device_id)
Definition: gpu_introspection.hpp:91
auto get_max_threads_per_sm(raft_proto::device_id< raft_proto::device_type::gpu > device_id)
Definition: gpu_introspection.hpp:60
auto get_max_shared_mem_per_sm(raft_proto::device_id< raft_proto::device_type::gpu > device_id)
Definition: gpu_introspection.hpp:68
auto get_max_shared_mem_per_block(raft_proto::device_id< raft_proto::device_type::gpu > device_id)
Definition: gpu_introspection.hpp:29
uint32_t index_type
Definition: index_type.hpp:21
Definition: dbscan.hpp:27
void cuda_check(error_t const &err) noexcept(!GPU_ENABLED)
Definition: cuda_check.hpp:26
detail::device_id< D > device_id
Definition: device_id.hpp:28