From 67d6509958751cc31194a845c9f305665a5aaab5 Mon Sep 17 00:00:00 2001 From: Himanshu Date: Thu, 15 Apr 2021 00:35:00 +0530 Subject: [PATCH 1/7] Update device_radix_sort.cuh --- cub/device/device_radix_sort.cuh | 1 + 1 file changed, 1 insertion(+) diff --git a/cub/device/device_radix_sort.cuh b/cub/device/device_radix_sort.cuh index 7534c508cc..74c9405bea 100644 --- a/cub/device/device_radix_sort.cuh +++ b/cub/device/device_radix_sort.cuh @@ -67,6 +67,7 @@ namespace cub { * method can only be applied to unsigned integral types, DeviceRadixSort * is able to sort signed and floating-point types via simple bit-wise transformations * that ensure lexicographic key ordering. For floating-point types -0.0 and +0.0 are + * cub::DeviceRadixSort is stable * considered equal and appear in the result in the same order as they appear in * the input. * From f014cc58bd671bc6dd4eeb0960e26c06749cfa62 Mon Sep 17 00:00:00 2001 From: Himanshu Date: Thu, 15 Apr 2021 00:38:20 +0530 Subject: [PATCH 2/7] Update device_radix_sort.cuh --- cub/device/device_radix_sort.cuh | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/cub/device/device_radix_sort.cuh b/cub/device/device_radix_sort.cuh index 74c9405bea..33aaf7ceb7 100644 --- a/cub/device/device_radix_sort.cuh +++ b/cub/device/device_radix_sort.cuh @@ -66,8 +66,7 @@ namespace cub { * half-precision floating-point type. Although the direct radix sorting * method can only be applied to unsigned integral types, DeviceRadixSort * is able to sort signed and floating-point types via simple bit-wise transformations - * that ensure lexicographic key ordering. For floating-point types -0.0 and +0.0 are - * cub::DeviceRadixSort is stable + * that ensure lexicographic key ordering. DeviceRadixSort is stable For floating-point types -0.0 and +0.0 are * considered equal and appear in the result in the same order as they appear in * the input. * From bc016afdc37325e1e98f52906bf991d40db49842 Mon Sep 17 00:00:00 2001 From: Himanshu Date: Thu, 15 Apr 2021 22:19:44 +0530 Subject: [PATCH 3/7] Update device_radix_sort.cuh --- cub/device/device_radix_sort.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cub/device/device_radix_sort.cuh b/cub/device/device_radix_sort.cuh index 33aaf7ceb7..45f14c614d 100644 --- a/cub/device/device_radix_sort.cuh +++ b/cub/device/device_radix_sort.cuh @@ -66,7 +66,7 @@ namespace cub { * half-precision floating-point type. Although the direct radix sorting * method can only be applied to unsigned integral types, DeviceRadixSort * is able to sort signed and floating-point types via simple bit-wise transformations - * that ensure lexicographic key ordering. DeviceRadixSort is stable For floating-point types -0.0 and +0.0 are + * that ensure lexicographic key ordering. DeviceRadixSort is stable. For floating-point types -0.0 and +0.0 are * considered equal and appear in the result in the same order as they appear in * the input. * From 1966bf33e697f3f00b24e8e0fcaeea0019b20e75 Mon Sep 17 00:00:00 2001 From: Allison Vacanti Date: Wed, 12 May 2021 17:16:46 -0400 Subject: [PATCH 4/7] Add more notices about radix sort stability. --- cub/block/block_radix_sort.cuh | 6 +++--- cub/device/device_radix_sort.cuh | 6 +++--- cub/device/device_segmented_radix_sort.cuh | 6 +++--- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/cub/block/block_radix_sort.cuh b/cub/block/block_radix_sort.cuh index b27148b85c..7d3bc93073 100644 --- a/cub/block/block_radix_sort.cuh +++ b/cub/block/block_radix_sort.cuh @@ -77,9 +77,9 @@ namespace cub { * bit-sequences of \p RADIX_BITS as radix digit places. Although the direct radix sorting * method can only be applied to unsigned integral types, BlockRadixSort * is able to sort signed and floating-point types via simple bit-wise transformations - * that ensure lexicographic key ordering. For floating-point types -0.0 and +0.0 are - * considered equal and appear in the result in the same order as they appear in - * the input. + * that ensure lexicographic key ordering. BlockRadixSort is stable. For + * floating-point types -0.0 and +0.0 are considered equal and appear in the + * result in the same order as they appear in the input. * - \rowmajor * * \par Performance Considerations diff --git a/cub/device/device_radix_sort.cuh b/cub/device/device_radix_sort.cuh index 45f14c614d..983ff0cf57 100644 --- a/cub/device/device_radix_sort.cuh +++ b/cub/device/device_radix_sort.cuh @@ -66,9 +66,9 @@ namespace cub { * half-precision floating-point type. Although the direct radix sorting * method can only be applied to unsigned integral types, DeviceRadixSort * is able to sort signed and floating-point types via simple bit-wise transformations - * that ensure lexicographic key ordering. DeviceRadixSort is stable. For floating-point types -0.0 and +0.0 are - * considered equal and appear in the result in the same order as they appear in - * the input. + * that ensure lexicographic key ordering. DeviceRadixSort is stable. For + * floating-point types -0.0 and +0.0 are considered equal and appear in the + * result in the same order as they appear in the input. * * \par Usage Considerations * \cdp_class{DeviceRadixSort} diff --git a/cub/device/device_segmented_radix_sort.cuh b/cub/device/device_segmented_radix_sort.cuh index b03a2bafb6..c5c7fe9304 100644 --- a/cub/device/device_segmented_radix_sort.cuh +++ b/cub/device/device_segmented_radix_sort.cuh @@ -66,9 +66,9 @@ namespace cub { * half-precision floating-point type. Although the direct radix sorting * method can only be applied to unsigned integral types, DeviceSegmentedRadixSort * is able to sort signed and floating-point types via simple bit-wise transformations - * that ensure lexicographic key ordering. For floating-point types -0.0 and +0.0 are - * considered equal and appear in the result in the same order as they appear in - * the input. + * that ensure lexicographic key ordering. DeviceSegmentedRadixSort is stable. For + * floating-point types -0.0 and +0.0 are considered equal and appear in the + * result in the same order as they appear in the input. * * \par Usage Considerations * \cdp_class{DeviceSegmentedRadixSort} From 2e459727e7e798427e356daa0a8baa8e50bb0e50 Mon Sep 17 00:00:00 2001 From: Allison Vacanti Date: Thu, 20 May 2021 15:46:41 -0400 Subject: [PATCH 5/7] Document RadixSort twiddling and its impact on bit-range truncation. For both Device and Block algorithms. Note that these do not handle twiddling during descending sort in the same way. --- cub/block/block_radix_sort.cuh | 64 +++++++++++++++++++++++--------- cub/device/device_radix_sort.cuh | 43 +++++++++++++++++---- 2 files changed, 83 insertions(+), 24 deletions(-) diff --git a/cub/block/block_radix_sort.cuh b/cub/block/block_radix_sort.cuh index 7d3bc93073..7919384ba4 100644 --- a/cub/block/block_radix_sort.cuh +++ b/cub/block/block_radix_sort.cuh @@ -64,23 +64,53 @@ namespace cub { * \tparam PTX_ARCH [optional] \ptxversion * * \par Overview - * - The [radix sorting method](http://en.wikipedia.org/wiki/Radix_sort) arranges - * items into ascending order. It relies upon a positional representation for - * keys, i.e., each key is comprised of an ordered sequence of symbols (e.g., digits, - * characters, etc.) specified from least-significant to most-significant. For a - * given input sequence of keys and a set of rules specifying a total ordering - * of the symbolic alphabet, the radix sorting method produces a lexicographic - * ordering of those keys. - * - BlockRadixSort can sort all of the built-in C++ numeric primitive types - * (unsigned char, \p int, \p double, etc.) as well as CUDA's \p __half - * half-precision floating-point type. Within each key, the implementation treats fixed-length - * bit-sequences of \p RADIX_BITS as radix digit places. Although the direct radix sorting - * method can only be applied to unsigned integral types, BlockRadixSort - * is able to sort signed and floating-point types via simple bit-wise transformations - * that ensure lexicographic key ordering. BlockRadixSort is stable. For - * floating-point types -0.0 and +0.0 are considered equal and appear in the - * result in the same order as they appear in the input. - * - \rowmajor + * The [radix sorting method](http://en.wikipedia.org/wiki/Radix_sort) arranges + * items into ascending order. It relies upon a positional representation for + * keys, i.e., each key is comprised of an ordered sequence of symbols (e.g., digits, + * characters, etc.) specified from least-significant to most-significant. For a + * given input sequence of keys and a set of rules specifying a total ordering + * of the symbolic alphabet, the radix sorting method produces a lexicographic + * ordering of those keys. + * + * \rowmajor + * + * \par Supported Types + * BlockRadixSort can sort all of the built-in C++ numeric primitive types + * (unsigned char, \p int, \p double, etc.) as well as CUDA's \p __half + * half-precision floating-point type. + * + * \par Bitwise Key Transformations + * Although the direct radix sorting method can only be applied to unsigned + * integral types, BlockRadixSort is able to sort signed and floating-point + * types via simple bit-wise transformations that ensure lexicographic key + * ordering. + * + * These transformations must be considered when restricting the + * `[begin_bit, end_bit)` range, as the bitwise transformations will occur + * before the bit-range truncation. + * + * Any transformations applied to the keys prior to sorting are reversed + * while writing to the final output buffer. + * + * \par Type Specific Bitwise Transformations + * To convert the input values into a radix-sortable bitwise representation, + * the following transformations take place prior to sorting: + * + * - For unsigned integral values, the keys are used directly. + * - For signed integral values, the sign bit is inverted. + * - For positive floating point values, the sign bit is inverted. + * - For negative floating point values, the full key is inverted. + * + * \par No Descending Sort Transformations + * Unlike `DeviceRadixSort`, `BlockRadixSort` does not invert the input key bits + * when performing a descending sort. Instead, it has special logic to reverse + * the order of the keys while sorting. + * + * \par Stability + * BlockRadixSort is stable. For floating-point types -0.0 and +0.0 + * are considered equal and appear in the result in the same order as they + * appear in the input. + * * * \par Performance Considerations * - \granularity diff --git a/cub/device/device_radix_sort.cuh b/cub/device/device_radix_sort.cuh index 983ff0cf57..57971159c7 100644 --- a/cub/device/device_radix_sort.cuh +++ b/cub/device/device_radix_sort.cuh @@ -60,15 +60,44 @@ namespace cub { * of the symbolic alphabet, the radix sorting method produces a lexicographic * ordering of those keys. * - * \par + * \par Supported Types * DeviceRadixSort can sort all of the built-in C++ numeric primitive types * (unsigned char, \p int, \p double, etc.) as well as CUDA's \p __half - * half-precision floating-point type. Although the direct radix sorting - * method can only be applied to unsigned integral types, DeviceRadixSort - * is able to sort signed and floating-point types via simple bit-wise transformations - * that ensure lexicographic key ordering. DeviceRadixSort is stable. For - * floating-point types -0.0 and +0.0 are considered equal and appear in the - * result in the same order as they appear in the input. + * half-precision floating-point type. + * + * \par Transformations + * Although the direct radix sorting method can only be applied to unsigned + * integral types, DeviceRadixSort is able to sort signed and floating-point + * types via simple bit-wise transformations that ensure lexicographic key + * ordering. Additional transformations occur for descending sorts. These + * transformations must be considered when restricting the + * `[begin_bit, end_bit)` range, as the bitwise transformations will occur + * before the bit-range truncation. + * + * Any transformations applied to the keys prior to sorting are reversed + * while writing to the final output buffer. + * + * \par Type Specific Bitwise Transformations + * To convert the input values into a radix-sortable bitwise representation, + * the following transformations take place prior to sorting: + * + * - For unsigned integral values, the keys are used directly. + * - For signed integral values, the sign bit is inverted. + * - For positive floating point values, the sign bit is inverted. + * - For negative floating point values, the full key is inverted. + * + * For floating point types, positive and negative zero are a special case and + * will be considered equivalent during sorting. + * + * \par Descending Sort Bitwise Transformations + * If descending sort is used, the keys are inverted after performing any + * type-specific transformations, and the resulting keys are sorted in ascending + * order. + * + * \par Stability + * DeviceRadixSort is stable. For floating-point types, -0.0 and +0.0 are + * considered equal and appear in the result in the same order as they appear in + * the input. * * \par Usage Considerations * \cdp_class{DeviceRadixSort} From d12cb62dbb2add413e0e345cf25b380236dea61a Mon Sep 17 00:00:00 2001 From: Allison Vacanti Date: Thu, 20 May 2021 15:48:59 -0400 Subject: [PATCH 6/7] Document restrictions on DeviceRadixSort input ranges. --- cub/device/device_radix_sort.cuh | 102 +++++++++++++++++++++++++++---- 1 file changed, 89 insertions(+), 13 deletions(-) diff --git a/cub/device/device_radix_sort.cuh b/cub/device/device_radix_sort.cuh index 57971159c7..d9c4f3c723 100644 --- a/cub/device/device_radix_sort.cuh +++ b/cub/device/device_radix_sort.cuh @@ -122,7 +122,15 @@ struct DeviceRadixSort * \brief Sorts key-value pairs into ascending order. (~2N auxiliary storage required) * * \par - * - The contents of the input data are not altered by the sorting operation + * - The contents of the input data are not altered by the sorting operation. + * - Pointers to contiguous memory must be used; iterators are not currently + * supported. + * - In-place operations are not supported. There must be no overlap between + * any of the provided ranges: + * - `[d_keys_in, d_keys_in + num_items)` + * - `[d_keys_out, d_keys_out + num_items)` + * - `[d_values_in, d_values_in + num_items)` + * - `[d_values_out, d_values_out + num_items)` * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageNP For sorting using only O(P) temporary storage, see the sorting interface using DoubleBuffer wrappers below. * - \devicestorage @@ -191,6 +199,11 @@ struct DeviceRadixSort // Signed integer type for global offsets typedef int OffsetT; + // We cast away const-ness, but will *not* write to these arrays. + // `DispatchRadixSort::Dispatch` will allocate temporary storage and + // create a new double-buffer internally when the `is_overwrite_ok` flag + // is not set. + constexpr bool is_overwrite_okay = false; DoubleBuffer d_keys(const_cast(d_keys_in), d_keys_out); DoubleBuffer d_values(const_cast(d_values_in), d_values_out); @@ -202,7 +215,7 @@ struct DeviceRadixSort num_items, begin_bit, end_bit, - false, + is_overwrite_okay, stream, debug_synchronous); } @@ -218,6 +231,12 @@ struct DeviceRadixSort * contains the input data to be sorted). * - The contents of both buffers within each pair may be altered by the sorting * operation. + * - In-place operations are not supported. There must be no overlap between + * any of the provided ranges: + * - `[d_keys.Current(), d_keys.Current() + num_items)` + * - `[d_keys.Alternate(), d_keys.Alternate() + num_items)` + * - `[d_values.Current(), d_values.Current() + num_items)` + * - `[d_values.Alternate(), d_values.Alternate() + num_items)` * - Upon completion, the sorting operation will update the "current" indicator * within each DoubleBuffer wrapper to reference which of the two buffers * now contains the sorted output sequence (a function of the number of key bits @@ -290,6 +309,8 @@ struct DeviceRadixSort // Signed integer type for global offsets typedef int OffsetT; + constexpr bool is_overwrite_okay = true; + return DispatchRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, @@ -298,7 +319,7 @@ struct DeviceRadixSort num_items, begin_bit, end_bit, - true, + is_overwrite_okay, stream, debug_synchronous); } @@ -308,7 +329,15 @@ struct DeviceRadixSort * \brief Sorts key-value pairs into descending order. (~2N auxiliary storage required). * * \par - * - The contents of the input data are not altered by the sorting operation + * - The contents of the input data are not altered by the sorting operation. + * - Pointers to contiguous memory must be used; iterators are not currently + * supported. + * - In-place operations are not supported. There must be no overlap between + * any of the provided ranges: + * - `[d_keys_in, d_keys_in + num_items)` + * - `[d_keys_out, d_keys_out + num_items)` + * - `[d_values_in, d_values_in + num_items)` + * - `[d_values_out, d_values_out + num_items)` * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageNP For sorting using only O(P) temporary storage, see the sorting interface using DoubleBuffer wrappers below. * - \devicestorage @@ -372,6 +401,11 @@ struct DeviceRadixSort // Signed integer type for global offsets typedef int OffsetT; + // We cast away const-ness, but will *not* write to these arrays. + // `DispatchRadixSort::Dispatch` will allocate temporary storage and + // create a new double-buffer internally when the `is_overwrite_ok` flag + // is not set. + constexpr bool is_overwrite_okay = false; DoubleBuffer d_keys(const_cast(d_keys_in), d_keys_out); DoubleBuffer d_values(const_cast(d_values_in), d_values_out); @@ -383,7 +417,7 @@ struct DeviceRadixSort num_items, begin_bit, end_bit, - false, + is_overwrite_okay, stream, debug_synchronous); } @@ -399,6 +433,12 @@ struct DeviceRadixSort * contains the input data to be sorted). * - The contents of both buffers within each pair may be altered by the sorting * operation. + * - In-place operations are not supported. There must be no overlap between + * any of the provided ranges: + * - `[d_keys.Current(), d_keys.Current() + num_items)` + * - `[d_keys.Alternate(), d_keys.Alternate() + num_items)` + * - `[d_values.Current(), d_values.Current() + num_items)` + * - `[d_values.Alternate(), d_values.Alternate() + num_items)` * - Upon completion, the sorting operation will update the "current" indicator * within each DoubleBuffer wrapper to reference which of the two buffers * now contains the sorted output sequence (a function of the number of key bits @@ -466,6 +506,8 @@ struct DeviceRadixSort // Signed integer type for global offsets typedef int OffsetT; + constexpr bool is_overwrite_okay = true; + return DispatchRadixSort::Dispatch( d_temp_storage, temp_storage_bytes, @@ -474,7 +516,7 @@ struct DeviceRadixSort num_items, begin_bit, end_bit, - true, + is_overwrite_okay, stream, debug_synchronous); } @@ -491,7 +533,13 @@ struct DeviceRadixSort * \brief Sorts keys into ascending order. (~2N auxiliary storage required) * * \par - * - The contents of the input data are not altered by the sorting operation + * - The contents of the input data are not altered by the sorting operation. + * - Pointers to contiguous memory must be used; iterators are not currently + * supported. + * - In-place operations are not supported. There must be no overlap between + * any of the provided ranges: + * - `[d_keys_in, d_keys_in + num_items)` + * - `[d_keys_out, d_keys_out + num_items)` * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageNP For sorting using only O(P) temporary storage, see the sorting interface using DoubleBuffer wrappers below. * - \devicestorage @@ -548,8 +596,13 @@ struct DeviceRadixSort // Signed integer type for global offsets typedef int OffsetT; - // Null value type + // We cast away const-ness, but will *not* write to these arrays. + // `DispatchRadixSort::Dispatch` will allocate temporary storage and + // create a new double-buffer internally when the `is_overwrite_ok` flag + // is not set. + constexpr bool is_overwrite_okay = false; DoubleBuffer d_keys(const_cast(d_keys_in), d_keys_out); + // Null value type DoubleBuffer d_values; return DispatchRadixSort::Dispatch( @@ -560,7 +613,7 @@ struct DeviceRadixSort num_items, begin_bit, end_bit, - false, + is_overwrite_okay, stream, debug_synchronous); } @@ -574,6 +627,10 @@ struct DeviceRadixSort * DoubleBuffer structure that indicates which of the two buffers is * "current" (and thus contains the input data to be sorted). * - The contents of both buffers may be altered by the sorting operation. + * - In-place operations are not supported. There must be no overlap between + * any of the provided ranges: + * - `[d_keys.Current(), d_keys.Current() + num_items)` + * - `[d_keys.Alternate(), d_keys.Alternate() + num_items)` * - Upon completion, the sorting operation will update the "current" indicator * within the DoubleBuffer wrapper to reference which of the two buffers * now contains the sorted output sequence (a function of the number of key bits @@ -636,6 +693,8 @@ struct DeviceRadixSort // Signed integer type for global offsets typedef int OffsetT; + constexpr bool is_overwrite_okay = true; + // Null value type DoubleBuffer d_values; @@ -647,7 +706,7 @@ struct DeviceRadixSort num_items, begin_bit, end_bit, - true, + is_overwrite_okay, stream, debug_synchronous); } @@ -656,7 +715,13 @@ struct DeviceRadixSort * \brief Sorts keys into descending order. (~2N auxiliary storage required). * * \par - * - The contents of the input data are not altered by the sorting operation + * - The contents of the input data are not altered by the sorting operation. + * - Pointers to contiguous memory must be used; iterators are not currently + * supported. + * - In-place operations are not supported. There must be no overlap between + * any of the provided ranges: + * - `[d_keys_in, d_keys_in + num_items)` + * - `[d_keys_out, d_keys_out + num_items)` * - An optional bit subrange [begin_bit, end_bit) of differentiating key bits can be specified. This can reduce overall sorting overhead and yield a corresponding performance improvement. * - \devicestorageNP For sorting using only O(P) temporary storage, see the sorting interface using DoubleBuffer wrappers below. * - \devicestorage @@ -712,6 +777,11 @@ struct DeviceRadixSort // Signed integer type for global offsets typedef int OffsetT; + // We cast away const-ness, but will *not* write to these arrays. + // `DispatchRadixSort::Dispatch` will allocate temporary storage and + // create a new double-buffer internally when the `is_overwrite_ok` flag + // is not set. + constexpr bool is_overwrite_okay = false; DoubleBuffer d_keys(const_cast(d_keys_in), d_keys_out); DoubleBuffer d_values; @@ -723,7 +793,7 @@ struct DeviceRadixSort num_items, begin_bit, end_bit, - false, + is_overwrite_okay, stream, debug_synchronous); } @@ -737,6 +807,10 @@ struct DeviceRadixSort * DoubleBuffer structure that indicates which of the two buffers is * "current" (and thus contains the input data to be sorted). * - The contents of both buffers may be altered by the sorting operation. + * - In-place operations are not supported. There must be no overlap between + * any of the provided ranges: + * - `[d_keys.Current(), d_keys.Current() + num_items)` + * - `[d_keys.Alternate(), d_keys.Alternate() + num_items)` * - Upon completion, the sorting operation will update the "current" indicator * within the DoubleBuffer wrapper to reference which of the two buffers * now contains the sorted output sequence (a function of the number of key bits @@ -795,6 +869,8 @@ struct DeviceRadixSort // Signed integer type for global offsets typedef int OffsetT; + constexpr bool is_overwrite_okay = true; + // Null value type DoubleBuffer d_values; @@ -806,7 +882,7 @@ struct DeviceRadixSort num_items, begin_bit, end_bit, - true, + is_overwrite_okay, stream, debug_synchronous); } From e4c3bbf687e2992f67fa5e881b12430d8c7fe8c5 Mon Sep 17 00:00:00 2001 From: Allison Vacanti Date: Tue, 8 Jun 2021 12:01:29 -0400 Subject: [PATCH 7/7] Add "Floating Point Special Cases" section to RadixSort docs. --- cub/block/block_radix_sort.cuh | 7 +++++++ cub/device/device_radix_sort.cuh | 7 +++++++ cub/device/device_segmented_radix_sort.cuh | 12 +++--------- 3 files changed, 17 insertions(+), 9 deletions(-) diff --git a/cub/block/block_radix_sort.cuh b/cub/block/block_radix_sort.cuh index 7919384ba4..1fa7184c11 100644 --- a/cub/block/block_radix_sort.cuh +++ b/cub/block/block_radix_sort.cuh @@ -79,6 +79,13 @@ namespace cub { * (unsigned char, \p int, \p double, etc.) as well as CUDA's \p __half * half-precision floating-point type. * + * \par Floating-Point Special Cases + * + * - Positive and negative zeros are considered equivalent, and will be treated + * as such in the output. + * - No special handling is implemented for NaN values; these are sorted + * according to their bit representations after any transformations. + * * \par Bitwise Key Transformations * Although the direct radix sorting method can only be applied to unsigned * integral types, BlockRadixSort is able to sort signed and floating-point diff --git a/cub/device/device_radix_sort.cuh b/cub/device/device_radix_sort.cuh index d9c4f3c723..59bb90725b 100644 --- a/cub/device/device_radix_sort.cuh +++ b/cub/device/device_radix_sort.cuh @@ -65,6 +65,13 @@ namespace cub { * (unsigned char, \p int, \p double, etc.) as well as CUDA's \p __half * half-precision floating-point type. * + * \par Floating-Point Special Cases + * + * - Positive and negative zeros are considered equivalent, and will be treated + * as such in the output. + * - No special handling is implemented for NaN values; these are sorted + * according to their bit representations after any transformations. + * * \par Transformations * Although the direct radix sorting method can only be applied to unsigned * integral types, DeviceRadixSort is able to sort signed and floating-point diff --git a/cub/device/device_segmented_radix_sort.cuh b/cub/device/device_segmented_radix_sort.cuh index c5c7fe9304..d4edc9c01b 100644 --- a/cub/device/device_segmented_radix_sort.cuh +++ b/cub/device/device_segmented_radix_sort.cuh @@ -60,15 +60,9 @@ namespace cub { * of the symbolic alphabet, the radix sorting method produces a lexicographic * ordering of those keys. * - * \par - * DeviceSegmentedRadixSort can sort all of the built-in C++ numeric primitive types - * (unsigned char, \p int, \p double, etc.) as well as CUDA's \p __half - * half-precision floating-point type. Although the direct radix sorting - * method can only be applied to unsigned integral types, DeviceSegmentedRadixSort - * is able to sort signed and floating-point types via simple bit-wise transformations - * that ensure lexicographic key ordering. DeviceSegmentedRadixSort is stable. For - * floating-point types -0.0 and +0.0 are considered equal and appear in the - * result in the same order as they appear in the input. + * \par See Also + * DeviceSegmentedRadixSort shares its implementation with DeviceRadixSort. See + * that algorithm's documentation for more information. * * \par Usage Considerations * \cdp_class{DeviceSegmentedRadixSort}