Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Commit

Permalink
Fixes for rle test failures
Browse files Browse the repository at this point in the history
Former-commit-id: fa35717193b7f11bcc918b4aa5b6b87d462ed4bc
  • Loading branch information
dumerrill committed Oct 11, 2016
1 parent ff028b7 commit d84db8a
Show file tree
Hide file tree
Showing 4 changed files with 77 additions and 7 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<hr>
<h3>About CUB</h3>

Current release: v1.5.2 (03/21/2016)
Current release: v1.5.3 (10/11/2016)

We recommend the [CUB Project Website](http://nvlabs.github.com/cub) and the [cub-users discussion forum](http://groups.google.com/group/cub-users) for further information and examples.

Expand Down
8 changes: 4 additions & 4 deletions cub/agent/agent_rle.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,7 @@ struct AgentRle
LengthOffsetPair (&lengths_and_num_runs)[ITEMS_PER_THREAD])
{
// Perform warpscans
int warp_id = ((WARPS == 1) ? 0 : threadIdx.x / WARP_THREADS);
unsigned int warp_id = ((WARPS == 1) ? 0 : threadIdx.x / WARP_THREADS);
int lane_id = LaneId();

LengthOffsetPair identity;
Expand Down Expand Up @@ -422,7 +422,7 @@ struct AgentRle
Int2Type<true> is_warp_time_slice)
{
unsigned int warp_id = ((WARPS == 1) ? 0 : threadIdx.x / WARP_THREADS);
unsigned int lane_id = LaneId();
int lane_id = LaneId();

// Locally compact items within the warp (first warp)
if (warp_id == 0)
Expand Down Expand Up @@ -479,7 +479,7 @@ struct AgentRle
Int2Type<false> is_warp_time_slice)
{
unsigned int warp_id = ((WARPS == 1) ? 0 : threadIdx.x / WARP_THREADS);
unsigned int lane_id = LaneId();
int lane_id = LaneId();

// Unzip
OffsetT run_offsets[ITEMS_PER_THREAD];
Expand Down Expand Up @@ -733,7 +733,7 @@ struct AgentRle

// First warp computes tile prefix in lane 0
TilePrefixCallbackOpT prefix_op(tile_status, temp_storage.prefix, Sum(), tile_idx);
int warp_id = ((WARPS == 1) ? 0 : threadIdx.x / WARP_THREADS);
unsigned int warp_id = ((WARPS == 1) ? 0 : threadIdx.x / WARP_THREADS);
if (warp_id == 0)
{
prefix_op(tile_aggregate);
Expand Down
70 changes: 70 additions & 0 deletions cub/block/block_scan.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -1115,6 +1115,7 @@ public:

//@} end member group
#ifndef DOXYGEN_SHOULD_SKIP_THIS // Do not document no-initial-value scans

/******************************************************************//**
* \name Exclusive prefix scan operations (no initial value, single datum per thread)
*********************************************************************/
Expand Down Expand Up @@ -1161,10 +1162,79 @@ public:
InternalBlockScan(temp_storage).ExclusiveScan(input, output, scan_op, block_aggregate);
}

//@} end member group
/******************************************************************//**
* \name Exclusive prefix scan operations (no initial value, multiple data per thread)
*********************************************************************/
//@{


/**
* \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. With no initial value, the output computed for <em>thread</em><sub>0</sub> is undefined.
*
* \par
* - Supports non-commutative scan operators.
* - \blocked
* - \granularity
* - \smemreuse
*
* \tparam ITEMS_PER_THREAD <b>[inferred]</b> The number of consecutive items partitioned onto each thread.
* \tparam ScanOp <b>[inferred]</b> Binary scan functor type having member <tt>T operator()(const T &a, const T &b)</tt>
*/
template <
int ITEMS_PER_THREAD,
typename ScanOp>
__device__ __forceinline__ void ExclusiveScan(
T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items
T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input)
ScanOp scan_op) ///< [in] Binary scan functor
{
// Reduce consecutive thread items in registers
T thread_partial = ThreadReduce(input, scan_op);

// Exclusive threadblock-scan
ExclusiveScan(thread_partial, thread_partial, scan_op);

// Exclusive scan in registers with prefix
ThreadScanExclusive(input, output, scan_op, thread_partial, (linear_tid != 0));
}


/**
* \brief Computes an exclusive block-wide prefix scan using the specified binary \p scan_op functor. Each thread contributes an array of consecutive input elements. Also provides every thread with the block-wide \p block_aggregate of all inputs. With no initial value, the output computed for <em>thread</em><sub>0</sub> is undefined.
*
* \par
* - Supports non-commutative scan operators.
* - \blocked
* - \granularity
* - \smemreuse
*
* \tparam ITEMS_PER_THREAD <b>[inferred]</b> The number of consecutive items partitioned onto each thread.
* \tparam ScanOp <b>[inferred]</b> Binary scan functor type having member <tt>T operator()(const T &a, const T &b)</tt>
*/
template <
int ITEMS_PER_THREAD,
typename ScanOp>
__device__ __forceinline__ void ExclusiveScan(
T (&input)[ITEMS_PER_THREAD], ///< [in] Calling thread's input items
T (&output)[ITEMS_PER_THREAD], ///< [out] Calling thread's output items (may be aliased to \p input)
ScanOp scan_op, ///< [in] Binary scan functor
T &block_aggregate) ///< [out] block-wide aggregate reduction of input items
{
// Reduce consecutive thread items in registers
T thread_partial = ThreadReduce(input, scan_op);

// Exclusive threadblock-scan
ExclusiveScan(thread_partial, thread_partial, scan_op, block_aggregate);

// Exclusive scan in registers with prefix
ThreadScanExclusive(input, output, scan_op, thread_partial, (linear_tid != 0));
}


//@} end member group
#endif // DOXYGEN_SHOULD_SKIP_THIS // Do not document no-initial-value scans

/******************************************************************//**
* \name Inclusive prefix sum operations
*********************************************************************/
Expand Down
4 changes: 2 additions & 2 deletions test/test_util.h
Original file line number Diff line number Diff line change
Expand Up @@ -1430,9 +1430,9 @@ void DisplayResults(
/**
* Print the contents of a host array
*/
template <typename T>
template <typename InputIteratorT>
void DisplayResults(
T *h_data,
InputIteratorT h_data,
size_t num_items)
{
// Display data
Expand Down

0 comments on commit d84db8a

Please sign in to comment.