Hierarchical Clustering#
#include <raft/cluster/single_linkage.cuh>
-
enum raft::cluster::hierarchy::LinkageDistance#
Determines the method for computing the minimum spanning tree (MST)
Values:
-
enumerator PAIRWISE#
Use a pairwise distance matrix as input to the mst. This is very fast and the best option for fairly small datasets (~50k data points)
-
enumerator KNN_GRAPH#
Construct a KNN graph as input to the mst and provide additional edges if the mst does not converge. This is slower but scales to very large datasets.
-
enumerator PAIRWISE#
-
constexpr int raft::cluster::hierarchy::DEFAULT_CONST_C = 15#
-
template<typename value_t, typename idx_t, LinkageDistance dist_type = LinkageDistance::KNN_GRAPH>
void raft::cluster::hierarchy::single_linkage(raft::resources const &handle, raft::device_matrix_view<const value_t, idx_t, row_major> X, raft::device_matrix_view<idx_t, idx_t, row_major> dendrogram, raft::device_vector_view<idx_t, idx_t> labels, raft::distance::DistanceType metric, size_t n_clusters, std::optional<int> c = std::make_optional<int>(DEFAULT_CONST_C))# Single-linkage clustering, capable of constructing a KNN graph to scale the algorithm beyond the n^2 memory consumption of implementations that use the fully-connected graph of pairwise distances by connecting a knn graph when k is not large enough to connect it.
- Template Parameters:
value_idx –
value_t –
dist_type – method to use for constructing connectivities graph
- Parameters:
handle – [in] raft handle
X – [in] dense input matrix in row-major layout
dendrogram – [out] output dendrogram (size [n_rows - 1] * 2)
labels – [out] output labels vector (size n_rows)
metric – [in] distance metrix to use when constructing connectivities graph
n_clusters – [in] number of clusters to assign data samples
c – [in] a constant used when constructing connectivities from knn graph. Allows the indirect control of k. The algorithm will set
k = log(n) + c