Summary Statistics#
Covariance#
#include <raft/stats/cov.cuh>
namespace raft::stats
-
template<typename value_t, typename idx_t, typename layout_t>
void cov(raft::resources const &handle, raft::device_matrix_view<value_t, idx_t, layout_t> data, raft::device_vector_view<const value_t, idx_t> mu, raft::device_matrix_view<value_t, idx_t, layout_t> covar, bool sample, bool stable)# Compute covariance of the input matrix.
Mean operation is assumed to be performed on a given column.
Note
if stable=true, then the input data will be mean centered after this function returns!
- Template Parameters:
value_t – the data type
idx_t – the index type
layout_t – Layout type of the input data.
- Parameters:
handle – [in] the raft handle
data – [in] the input matrix (this will get mean-centered at the end!) (length = nrows * ncols)
mu – [in] mean vector of the input matrix (length = ncols)
covar – [out] the output covariance matrix (length = ncols * ncols)
sample – [in] whether to evaluate sample covariance or not. In other words, whether to normalize the output using N-1 or N, for true or false, respectively
stable – [in] whether to run the slower-but-numerically-stable version or not
Histogram#
#include <raft/stats/histogram.cuh>
namespace raft::stats
-
enum HistType#
Types of support histogram implementations.
Values:
-
enumerator HistTypeSmemBits1#
shared mem atomics but with bins to be 1b int’s
-
enumerator HistTypeSmemBits2#
shared mem atomics but with bins to be 2b int’s
-
enumerator HistTypeSmemBits4#
shared mem atomics but with bins to be 4b int’s
-
enumerator HistTypeSmemBits8#
shared mem atomics but with bins to ba 1B int’s
-
enumerator HistTypeSmemBits16#
shared mem atomics but with bins to be 2B int’s
-
enumerator HistTypeGmem#
use only global atomics
-
enumerator HistTypeSmem#
uses shared mem atomics to reduce global traffic
-
enumerator HistTypeSmemMatchAny#
uses shared mem atomics with match_any intrinsic to further reduce shared memory traffic. This can only be enabled on Volta and later architectures. If one tries to enable this for older arch’s, it will fall back to
HistTypeSmem
.Note
This is to be used only when the input dataset leads to a lot of repetitions in a given warp, else, this algo can be much slower than
HistTypeSmem
!
-
enumerator HistTypeSmemHash#
builds a hashmap of active bins in shared mem
-
enumerator HistTypeAuto#
decide at runtime the best algo for the given inputs
-
enumerator HistTypeSmemBits1#
-
template<typename value_t, typename idx_t, typename binner_op = IdentityBinner<value_t, idx_t>>
void histogram(raft::resources const &handle, HistType type, raft::device_matrix_view<const value_t, idx_t, raft::col_major> data, raft::device_matrix_view<int, idx_t, raft::col_major> bins, binner_op binner = IdentityBinner<value_t, idx_t>())# Perform histogram on the input data. It chooses the right load size based on the input data vector length. It also supports large-bin cases using a specialized smem-based hashing technique.
Note
signature of binner_op is
int func(value_t, IdxT);
- Template Parameters:
value_t – input data type
idx_t – data type used to compute indices
binner_op – takes the input data and computes its bin index
- Parameters:
handle – [in] the raft handle
type – [in] histogram implementation type to choose
data – [in] input data col-major (length = nrows * ncols)
bins – [out] the output bins col-major (length = nbins * ncols)
binner – [in] the operation that computes the bin index of the input data
Mean#
#include <raft/stats/mean.cuh>
namespace raft::stats
-
template<typename value_t, typename idx_t, typename layout_t>
void mean(raft::resources const &handle, raft::device_matrix_view<const value_t, idx_t, layout_t> data, raft::device_vector_view<value_t, idx_t> mu, bool sample)# Compute mean of the input matrix.
Mean operation is assumed to be performed on a given column.
- Template Parameters:
value_t – the data type
idx_t – index type
layout_t – Layout type of the input matrix.
- Parameters:
handle – [in] the raft handle
data – [in] the input matrix
mu – [out] the output mean vector
sample – [in] whether to evaluate sample mean or not. In other words, whether to normalize the output using N-1 or N, for true or false, respectively
Mean Center#
#include <raft/stats/mean_center.cuh>
namespace raft::stats
-
template<typename value_t, typename idx_t, typename layout_t>
void mean_center(raft::resources const &handle, raft::device_matrix_view<const value_t, idx_t, layout_t> data, raft::device_vector_view<const value_t, idx_t> mu, raft::device_matrix_view<value_t, idx_t, layout_t> out, bool bcast_along_rows)# Center the input matrix wrt its mean.
- Template Parameters:
value_t – the data type
idx_t – index type
layout_t – Layout type of the input matrix.
- Parameters:
handle – [in] the raft handle
data – [in] input matrix of size nrows * ncols
mu – [in] the mean vector of size ncols if bcast_along_rows else nrows
out – [out] the output mean-centered matrix
bcast_along_rows – [in] whether to broadcast vector along rows or columns
-
template<typename value_t, typename idx_t, typename layout_t>
void mean_add(raft::resources const &handle, raft::device_matrix_view<const value_t, idx_t, layout_t> data, raft::device_vector_view<const value_t, idx_t> mu, raft::device_matrix_view<value_t, idx_t, layout_t> out, bool bcast_along_rows)# Add the input matrix wrt its mean.
- Template Parameters:
Type – the data type
idx_t – index type
layout_t – Layout type of the input matrix.
TPB – threads per block of the cuda kernel launched
- Parameters:
handle – [in] the raft handle
data – [in] input matrix of size nrows * ncols
mu – [in] the mean vector of size ncols if bcast_along_rows else nrows
out – [out] the output mean-centered matrix
bcast_along_rows – [in] whether to broadcast vector along rows or columns
Mean Variance#
#include <raft/stats/mean_var.cuh>
namespace raft::stats
-
template<typename value_t, typename idx_t, typename layout_t>
void meanvar(raft::resources const &handle, raft::device_matrix_view<const value_t, idx_t, layout_t> data, raft::device_vector_view<value_t, idx_t> mean, raft::device_vector_view<value_t, idx_t> var, bool sample)# Compute mean and variance for each column of a given matrix.
The operation is performed in a single sweep. Consider using it when you need to compute both mean and variance, or when you need to compute variance but don’t have the mean. It’s almost twice faster than running
mean
andvars
sequentially, because all three kernels are memory-bound.- Template Parameters:
value_t – the data type
idx_t – Integer type used for addressing
layout_t – Layout type of the input matrix.
- Parameters:
handle – [in] the raft handle
data – [in] the input matrix of size [N, D]
mean – [out] the output mean vector of size D
var – [out] the output variance vector of size D
sample – [in] whether to evaluate sample variance or not. In other words, whether to normalize the variance using N-1 or N, for true or false respectively.
Min/Max#
#include <raft/stats/minmax.cuh>
namespace raft::stats
-
template<typename value_t, typename idx_t>
void minmax(raft::resources const &handle, raft::device_matrix_view<const value_t, idx_t, raft::col_major> data, std::optional<raft::device_vector_view<const unsigned, idx_t>> rowids, std::optional<raft::device_vector_view<const unsigned, idx_t>> colids, raft::device_vector_view<value_t, idx_t> globalmin, raft::device_vector_view<value_t, idx_t> globalmax, std::optional<raft::device_vector_view<value_t, idx_t>> sampledcols)# Computes min/max across every column of the input matrix, as well as optionally allow to subsample based on the given row/col ID mapping vectors.
Note
This method makes the following assumptions:
input and output matrices are assumed to be col-major
ncols is small enough to fit the whole of min/max values across all cols in shared memory
- Template Parameters:
value_t – Data type of input matrix element.
idx_t – Index type of matrix extent.
- Parameters:
handle – [in] the raft handle
data – [in] input data col-major of size [nrows, ncols], unless rowids or colids length is smaller
rowids – [in] optional row ID mappings of length nrows. If you want to skip this index lookup entirely, pass std::nullopt
colids – [in] optional col ID mappings of length ncols. If you want to skip this index lookup entirely, pass std::nullopt
globalmin – [out] final col-wise global minimum (size = ncols)
globalmax – [out] final col-wise global maximum (size = ncols)
sampledcols – [out] output sampled data. Pass std::nullopt if you don’t need this
Standard Deviation#
#include <raft/stats/stddev.cuh>
namespace raft::stats
-
template<typename value_t, typename idx_t, typename layout_t>
void stddev(raft::resources const &handle, raft::device_matrix_view<const value_t, idx_t, layout_t> data, raft::device_vector_view<const value_t, idx_t> mu, raft::device_vector_view<value_t, idx_t> std, bool sample)# Compute stddev of the input matrix.
Stddev operation is assumed to be performed on a given column.
- Template Parameters:
value_t – the data type
idx_t – Integer type used to for addressing
layout_t – Layout type of the input matrix.
- Parameters:
handle – [in] the raft handle
data – [in] the input matrix
mu – [in] the mean vector
std – [out] the output stddev vector
sample – [in] whether to evaluate sample stddev or not. In other words, whether to normalize the output using N-1 or N, for true or false, respectively
Sum#
#include <raft/stats/sum.cuh>
namespace raft::stats
-
template<typename value_t, typename idx_t, typename layout_t>
void sum(raft::resources const &handle, raft::device_matrix_view<const value_t, idx_t, layout_t> input, raft::device_vector_view<value_t, idx_t> output)# Compute sum of the input matrix.
Sum operation is assumed to be performed on a given column.
- Template Parameters:
value_t – the data type
idx_t – Integer type used to for addressing
layout_t – Layout type of the input matrix.
- Parameters:
handle – [in] the raft handle
input – [in] the input matrix
output – [out] the output mean vector
Weighted Average#
#include <raft/stats/weighted_mean.cuh>
namespace raft::stats
-
template<typename value_t, typename idx_t, typename layout_t>
void weighted_mean(raft::resources const &handle, raft::device_matrix_view<const value_t, idx_t, layout_t> data, raft::device_vector_view<const value_t, idx_t> weights, raft::device_vector_view<value_t, idx_t> mu, bool along_rows)# Compute the weighted mean of the input matrix with a vector of weights, along rows or along columns.
- Template Parameters:
value_t – the data type
idx_t – Integer type used to for addressing
layout_t – Layout type of the input matrix.
- Parameters:
handle – [in] the raft handle
data – [in] the input matrix of size nrows * ncols
weights – [in] weight of size ncols if along_row is true, else of size nrows
mu – [out] the output mean vector of size nrows if along_row is true, else of size ncols
along_rows – [in] whether to reduce along rows or columns
-
template<typename value_t, typename idx_t, typename layout_t>
void row_weighted_mean(raft::resources const &handle, raft::device_matrix_view<const value_t, idx_t, layout_t> data, raft::device_vector_view<const value_t, idx_t> weights, raft::device_vector_view<value_t, idx_t> mu)# Compute the row-wise weighted mean of the input matrix with a vector of column weights.
- Template Parameters:
value_t – the data type
idx_t – Integer type used to for addressing
layout_t – Layout type of the input matrix.
- Parameters:
handle – [in] the raft handle
data – [in] the input matrix of size nrows * ncols
weights – [in] weight vector of size ncols
mu – [out] the output mean vector of size nrows
-
template<typename value_t, typename idx_t, typename layout_t>
void col_weighted_mean(raft::resources const &handle, raft::device_matrix_view<const value_t, idx_t, layout_t> data, raft::device_vector_view<const value_t, idx_t> weights, raft::device_vector_view<value_t, idx_t> mu)# Compute the column-wise weighted mean of the input matrix with a vector of row weights.
- Template Parameters:
value_t – the data type
idx_t – Integer type used to for addressing
layout_t – Layout type of the input matrix.
- Parameters:
handle – [in] the raft handle
data – [in] the input matrix of size nrows * ncols
weights – [in] weight vector of size nrows
mu – [out] the output mean vector of size ncols