libcudf  24.02.00
row_operators.cuh
1 /*
2  * Copyright (c) 2019-2023, NVIDIA CORPORATION.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #pragma once
18 
20 #include <cudf/detail/utilities/assert.cuh>
21 #include <cudf/hashing/detail/hash_functions.cuh>
22 #include <cudf/hashing/detail/hashing.hpp>
23 #include <cudf/sorting.hpp>
27 
28 #include <thrust/equal.h>
29 #include <thrust/execution_policy.h>
30 #include <thrust/iterator/counting_iterator.h>
31 #include <thrust/swap.h>
32 #include <thrust/transform_reduce.h>
33 
34 #include <limits>
35 
36 namespace cudf {
37 
46 enum class weak_ordering {
47  LESS,
48  EQUIVALENT,
49  GREATER
50 };
51 
52 namespace detail {
61 template <typename Element>
62 __device__ weak_ordering compare_elements(Element lhs, Element rhs)
63 {
64  if (lhs < rhs) {
65  return weak_ordering::LESS;
66  } else if (rhs < lhs) {
68  }
70 }
71 } // namespace detail
72 
82 template <typename Element, std::enable_if_t<std::is_floating_point_v<Element>>* = nullptr>
83 __device__ weak_ordering relational_compare(Element lhs, Element rhs)
84 {
85  if (isnan(lhs) and isnan(rhs)) {
87  } else if (isnan(rhs)) {
88  return weak_ordering::LESS;
89  } else if (isnan(lhs)) {
91  }
92 
93  return detail::compare_elements(lhs, rhs);
94 }
95 
104 inline __device__ auto null_compare(bool lhs_is_null, bool rhs_is_null, null_order null_precedence)
105 {
106  if (lhs_is_null and rhs_is_null) { // null <? null
108  } else if (lhs_is_null) { // null <? x
109  return (null_precedence == null_order::BEFORE) ? weak_ordering::LESS : weak_ordering::GREATER;
110  } else if (rhs_is_null) { // x <? null
111  return (null_precedence == null_order::AFTER) ? weak_ordering::LESS : weak_ordering::GREATER;
112  }
114 }
115 
124 template <typename Element, std::enable_if_t<not std::is_floating_point_v<Element>>* = nullptr>
125 __device__ weak_ordering relational_compare(Element lhs, Element rhs)
126 {
127  return detail::compare_elements(lhs, rhs);
128 }
129 
138 template <typename Element, std::enable_if_t<std::is_floating_point_v<Element>>* = nullptr>
139 __device__ bool equality_compare(Element lhs, Element rhs)
140 {
141  if (isnan(lhs) and isnan(rhs)) { return true; }
142  return lhs == rhs;
143 }
144 
153 template <typename Element, std::enable_if_t<not std::is_floating_point_v<Element>>* = nullptr>
154 __device__ bool equality_compare(Element const lhs, Element const rhs)
155 {
156  return lhs == rhs;
157 }
158 
164 template <typename Nullate>
166  public:
178  __host__ __device__
180  column_device_view lhs,
181  column_device_view rhs,
182  null_equality nulls_are_equal = null_equality::EQUAL)
183  : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, nulls_are_equal{nulls_are_equal}
184  {
185  }
186 
194  template <typename Element,
195  std::enable_if_t<cudf::is_equality_comparable<Element, Element>()>* = nullptr>
196  __device__ bool operator()(size_type lhs_element_index,
197  size_type rhs_element_index) const noexcept
198  {
199  if (nulls) {
200  bool const lhs_is_null{lhs.is_null(lhs_element_index)};
201  bool const rhs_is_null{rhs.is_null(rhs_element_index)};
202  if (lhs_is_null and rhs_is_null) {
203  return nulls_are_equal == null_equality::EQUAL;
204  } else if (lhs_is_null != rhs_is_null) {
205  return false;
206  }
207  }
208 
209  return equality_compare(lhs.element<Element>(lhs_element_index),
210  rhs.element<Element>(rhs_element_index));
211  }
212 
213  // @cond
214  template <typename Element,
215  std::enable_if_t<not cudf::is_equality_comparable<Element, Element>()>* = nullptr>
216  __device__ bool operator()(size_type lhs_element_index, size_type rhs_element_index)
217  {
218  CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
219  }
220  // @endcond
221 
222  private:
223  column_device_view lhs;
224  column_device_view rhs;
225  Nullate nulls;
226  null_equality nulls_are_equal;
227 };
228 
234 template <typename Nullate>
236  public:
246  table_device_view lhs,
247  table_device_view rhs,
248  null_equality nulls_are_equal = null_equality::EQUAL)
249  : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, nulls_are_equal{nulls_are_equal}
250  {
251  CUDF_EXPECTS(lhs.num_columns() == rhs.num_columns(), "Mismatched number of columns.");
252  }
253 
261  __device__ bool operator()(size_type lhs_row_index, size_type rhs_row_index) const noexcept
262  {
263  auto equal_elements = [=](column_device_view l, column_device_view r) {
264  return cudf::type_dispatcher(l.type(),
265  element_equality_comparator{nulls, l, r, nulls_are_equal},
266  lhs_row_index,
267  rhs_row_index);
268  };
269 
270  return thrust::equal(thrust::seq, lhs.begin(), lhs.end(), rhs.begin(), equal_elements);
271  }
272 
273  private:
274  table_device_view lhs;
275  table_device_view rhs;
276  Nullate nulls;
277  null_equality nulls_are_equal;
278 };
279 
285 template <typename Nullate>
287  public:
299  __host__ __device__ element_relational_comparator(Nullate has_nulls,
300  column_device_view lhs,
301  column_device_view rhs,
302  null_order null_precedence)
303  : lhs{lhs}, rhs{rhs}, nulls{has_nulls}, null_precedence{null_precedence}
304  {
305  }
306 
315  __host__ __device__ element_relational_comparator(Nullate has_nulls,
316  column_device_view lhs,
317  column_device_view rhs)
318  : lhs{lhs}, rhs{rhs}, nulls{has_nulls}
319  {
320  }
321 
330  template <typename Element,
331  std::enable_if_t<cudf::is_relationally_comparable<Element, Element>()>* = nullptr>
332  __device__ weak_ordering operator()(size_type lhs_element_index,
333  size_type rhs_element_index) const noexcept
334  {
335  if (nulls) {
336  bool const lhs_is_null{lhs.is_null(lhs_element_index)};
337  bool const rhs_is_null{rhs.is_null(rhs_element_index)};
338 
339  if (lhs_is_null or rhs_is_null) { // at least one is null
340  return null_compare(lhs_is_null, rhs_is_null, null_precedence);
341  }
342  }
343 
344  return relational_compare(lhs.element<Element>(lhs_element_index),
345  rhs.element<Element>(rhs_element_index));
346  }
347 
348  // @cond
349  template <typename Element,
350  std::enable_if_t<not cudf::is_relationally_comparable<Element, Element>()>* = nullptr>
351  __device__ weak_ordering operator()(size_type lhs_element_index, size_type rhs_element_index)
352  {
353  CUDF_UNREACHABLE("Attempted to compare elements of uncomparable types.");
354  }
355  // @endcond
356 
357  private:
358  column_device_view lhs;
359  column_device_view rhs;
360  Nullate nulls;
361  null_order null_precedence{};
362 };
363 
379 template <typename Nullate>
381  public:
402  table_device_view lhs,
403  table_device_view rhs,
404  order const* column_order = nullptr,
405  null_order const* null_precedence = nullptr)
406  : _lhs{lhs},
407  _rhs{rhs},
408  _nulls{has_nulls},
409  _column_order{column_order},
410  _null_precedence{null_precedence}
411  {
412  CUDF_EXPECTS(_lhs.num_columns() == _rhs.num_columns(), "Mismatched number of columns.");
413  }
414 
424  __device__ bool operator()(size_type lhs_index, size_type rhs_index) const noexcept
425  {
426  for (size_type i = 0; i < _lhs.num_columns(); ++i) {
427  bool ascending = (_column_order == nullptr) or (_column_order[i] == order::ASCENDING);
428 
429  null_order null_precedence =
430  _null_precedence == nullptr ? null_order::BEFORE : _null_precedence[i];
431 
432  auto comparator =
433  element_relational_comparator{_nulls, _lhs.column(i), _rhs.column(i), null_precedence};
434 
435  weak_ordering state =
436  cudf::type_dispatcher(_lhs.column(i).type(), comparator, lhs_index, rhs_index);
437 
438  if (state == weak_ordering::EQUIVALENT) { continue; }
439 
440  return state == (ascending ? weak_ordering::LESS : weak_ordering::GREATER);
441  }
442  return false;
443  }
444 
445  private:
446  table_device_view _lhs;
447  table_device_view _rhs;
448  Nullate _nulls{};
449  null_order const* _null_precedence{};
450  order const* _column_order{};
451 }; // class row_lexicographic_comparator
452 
459 template <template <typename> class hash_function, typename Nullate>
461  public:
470  template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
471  __device__ hash_value_type operator()(column_device_view col, size_type row_index) const
472  {
473  if (has_nulls && col.is_null(row_index)) { return std::numeric_limits<hash_value_type>::max(); }
474  return hash_function<T>{}(col.element<T>(row_index));
475  }
476 
485  template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
486  __device__ hash_value_type operator()(column_device_view col, size_type row_index) const
487  {
488  CUDF_UNREACHABLE("Unsupported type in hash.");
489  }
490 
491  Nullate has_nulls;
492 };
493 
500 template <template <typename> class hash_function, typename Nullate>
502  public:
509  __device__ element_hasher_with_seed(Nullate has_nulls, uint32_t seed)
510  : _seed{seed}, _has_nulls{has_nulls}
511  {
512  }
513 
521  __device__ element_hasher_with_seed(Nullate has_nulls, uint32_t seed, hash_value_type null_hash)
522  : _seed{seed}, _null_hash{null_hash}, _has_nulls{has_nulls}
523  {
524  }
525 
534  template <typename T, CUDF_ENABLE_IF(column_device_view::has_element_accessor<T>())>
535  __device__ hash_value_type operator()(column_device_view col, size_type row_index) const
536  {
537  if (_has_nulls && col.is_null(row_index)) { return _null_hash; }
538  return hash_function<T>{_seed}(col.element<T>(row_index));
539  }
540 
549  template <typename T, CUDF_ENABLE_IF(not column_device_view::has_element_accessor<T>())>
550  __device__ hash_value_type operator()(column_device_view col, size_type row_index) const
551  {
552  CUDF_UNREACHABLE("Unsupported type in hash.");
553  }
554 
555  private:
556  uint32_t _seed{DEFAULT_HASH_SEED};
557  hash_value_type _null_hash{std::numeric_limits<hash_value_type>::max()};
558  Nullate _has_nulls;
559 };
560 
567 template <template <typename> class hash_function, typename Nullate>
568 class row_hasher {
569  public:
570  row_hasher() = delete;
571 
579  : _table{t}, _has_nulls{has_nulls}
580  {
581  }
590  : _table{t}, _seed(seed), _has_nulls{has_nulls}
591  {
592  }
593 
600  __device__ auto operator()(size_type row_index) const
601  {
602  // Hash the first column w/ the seed
603  auto const initial_hash = cudf::hashing::detail::hash_combine(
604  hash_value_type{0},
605  type_dispatcher<dispatch_storage_type>(
606  _table.column(0).type(),
608  _table.column(0),
609  row_index));
610 
611  // Hashes an element in a column
612  auto hasher = [=](size_type column_index) {
613  return cudf::type_dispatcher<dispatch_storage_type>(
614  _table.column(column_index).type(),
616  _table.column(column_index),
617  row_index);
618  };
619 
620  // Hash each element and combine all the hash values together
621  return thrust::transform_reduce(
622  thrust::seq,
623  // note that this starts at 1 and not 0 now since we already hashed the first column
624  thrust::make_counting_iterator(1),
625  thrust::make_counting_iterator(_table.num_columns()),
626  hasher,
627  initial_hash,
628  [](hash_value_type lhs, hash_value_type rhs) {
629  return cudf::hashing::detail::hash_combine(lhs, rhs);
630  });
631  }
632 
633  private:
634  table_device_view _table;
635  Nullate _has_nulls;
636  uint32_t _seed{DEFAULT_HASH_SEED};
637 };
638 
639 } // namespace cudf
An immutable, non-owning view of device data as a column of elements that is trivially copyable and u...
T element(size_type element_index) const noexcept
Returns reference to element at the specified index.
CUDF_HOST_DEVICE data_type type() const noexcept
Returns the element type.
bool is_null(size_type element_index) const noexcept
Returns whether the specified element is null.
ColumnDeviceView const & column(size_type column_index) const noexcept
Returns a reference to the view of the specified column.
size_type num_columns() const noexcept
Returns the number of columns.
Performs an equality comparison between two elements in two columns.
element_equality_comparator(Nullate has_nulls, column_device_view lhs, column_device_view rhs, null_equality nulls_are_equal=null_equality::EQUAL)
Construct type-dispatched function object for comparing equality between two elements.
bool operator()(size_type lhs_element_index, size_type rhs_element_index) const noexcept
Compares the specified elements for equality.
Function object for computing the hash value of a row in a column.
element_hasher_with_seed(Nullate has_nulls, uint32_t seed)
Constructs a function object for hashing an element in the given column.
element_hasher_with_seed(Nullate has_nulls, uint32_t seed, hash_value_type null_hash)
Constructs a function object for hashing an element in the given column.
Computes the hash value of an element in the given column.
Nullate has_nulls
A cudf::nullate type describing how to check for nulls.
Performs a relational comparison between two elements in two columns.
weak_ordering operator()(size_type lhs_element_index, size_type rhs_element_index) const noexcept
Performs a relational comparison between the specified elements.
element_relational_comparator(Nullate has_nulls, column_device_view lhs, column_device_view rhs)
Construct type-dispatched function object for performing a relational comparison between two elements...
element_relational_comparator(Nullate has_nulls, column_device_view lhs, column_device_view rhs, null_order null_precedence)
Construct type-dispatched function object for performing a relational comparison between two elements...
Performs a relational comparison between two elements in two tables.
row_equality_comparator(Nullate has_nulls, table_device_view lhs, table_device_view rhs, null_equality nulls_are_equal=null_equality::EQUAL)
Construct a new row equality comparator object.
bool operator()(size_type lhs_row_index, size_type rhs_row_index) const noexcept
Compares the specified rows for equality.
Computes the hash value of a row in the given table.
CUDF_HOST_DEVICE row_hasher(Nullate has_nulls, table_device_view t, uint32_t seed)
Constructs a row_hasher object with a seed value.
CUDF_HOST_DEVICE row_hasher(Nullate has_nulls, table_device_view t)
Constructs a row_hasher object.
auto operator()(size_type row_index) const
Computes the hash value of the row at row_index in the table
Computes whether one row is lexicographically less than another row.
bool operator()(size_type lhs_index, size_type rhs_index) const noexcept
Checks whether the row at lhs_index in the lhs table compares lexicographically less than the row at ...
row_lexicographic_comparator(Nullate has_nulls, table_device_view lhs, table_device_view rhs, order const *column_order=nullptr, null_order const *null_precedence=nullptr)
Construct a function object for performing a lexicographic comparison between the rows of two tables.
Table device view that is usable in device memory.
Column device view class definitions.
uint32_t hash_value_type
Type of hash value.
Definition: hashing.hpp:35
CUDF_HOST_DEVICE constexpr decltype(auto) __forceinline__ type_dispatcher(cudf::data_type dtype, Functor f, Ts &&... args)
Invokes an operator() template with the type instantiation based on the specified cudf::data_type's i...
#define CUDF_EXPECTS(...)
Macro for checking (pre-)conditions that throws an exception when a condition is violated.
Definition: error.hpp:176
null_order
Indicates how null values compare against all other values.
Definition: types.hpp:157
null_equality
Enum to consider two nulls as equal or unequal.
Definition: types.hpp:149
int32_t size_type
Row index type for columns and tables.
Definition: types.hpp:93
order
Indicates the order in which elements should be sorted.
Definition: types.hpp:116
@ BEFORE
NULL values ordered before all other values.
@ AFTER
NULL values ordered after all other values.
@ EQUAL
nulls compare equal
@ ASCENDING
Elements ordered from small to large.
cuDF interfaces
Definition: aggregation.hpp:34
bool has_nulls(table_view const &view)
Returns True if the table has nulls in any of its columns.
weak_ordering
Result type of the element_relational_comparator function object.
@ LESS
Indicates a is less than (ordered before) b
@ EQUIVALENT
Indicates a is ordered neither before nor after b
@ GREATER
Indicates a is greater than (ordered after) b
auto null_compare(bool lhs_is_null, bool rhs_is_null, null_order null_precedence)
Compare the nulls according to null order.
weak_ordering relational_compare(Element lhs, Element rhs)
A specialization for floating-point Element type relational comparison to derive the order of the ele...
bool equality_compare(Element lhs, Element rhs)
A specialization for floating-point Element type to check if lhs is equivalent to rhs....
Column APIs for sort and rank.
Table device view class definitions.
Defines the mapping between cudf::type_id runtime type information and concrete C++ types.
#define CUDF_HOST_DEVICE
Indicates that the function or method is usable on host and device.
Definition: types.hpp:32