diff --git a/packages/tpetra/core/src/Tpetra_Util.hpp b/packages/tpetra/core/src/Tpetra_Util.hpp index 7b184aefdd07..7a68e1f73aad 100644 --- a/packages/tpetra/core/src/Tpetra_Util.hpp +++ b/packages/tpetra/core/src/Tpetra_Util.hpp @@ -876,6 +876,137 @@ namespace Tpetra { bool congruent (const Teuchos::Comm& comm1, const Teuchos::Comm& comm2); + + /// \brief Search indsToSearch[0 .. numEnt-1] for + /// \c indToFind, using equality comparison. + /// + /// \return If found, return index of \c indToFind in \c indsToSearch; + /// else, return \c numEnt (by analogy with C++ Standard Library + /// functions like std::find, that return "the end of the sequence" + /// in this case). + /// + /// \tparam OffsetType Integer type that can be used to represent any + /// valid index in \c indsToSearch, up to and including \c numEnt. + /// \tparam IndexViewType 1-D array of equality-comparable entries + /// (generally intended to be column indices). + /// + /// \param indsToSearch [in] Array of indices to search. For a sparse + /// graph or matrix, this is the array of all the column indices for + /// some row of the graph / matrix. + /// \param numEnt [in] Number of entries in \c indsToSearch to + /// search. This is a separate argument, first so that this + /// function works with raw arrays as well as Kokkos::View, and + /// second so that users don't have to incur the overhead of + /// calling Kokkos::subview to limit the length of a View. The + /// latter may be particularly helpful for the case of the + /// begin/end-pointer variant of CSR graph/matrix storage. + /// \param indToFind [in] (Local) column index for which to find the + /// offset. This has the same type as that of each entry in + /// \c indsToSearch. + /// \param hint [in] Hint for where to find \c indToFind in the array. + /// If indsToSearch[hint] == indToFind, then the hint is + /// correct. The hint is ignored if it is out of range (that is, + /// greater than or equal to the number of entries in the given + /// row). + /// \param isSorted [in] Whether the input array of indices to search + /// is sorted in increasing order. + /// + /// The hint optimizes for the case of calling this method several + /// times with the same sparse graph / matrix row, when several index + /// inputs occur in consecutive sequence. This may occur (for + /// example) when there are multiple degrees of freedom per mesh + /// point, and users are handling the assignment of degrees of freedom + /// to global indices manually (rather than letting some other class + /// take care of it). In that case, users might choose to assign the + /// degrees of freedom for a mesh point to consecutive global indices. + /// Epetra implements the hint for this reason. + /// + /// The hint only costs two comparisons (one to check range, and the + /// other to see if the hint was correct), and it can save searching + /// for the indices (which may take a lot more than two comparisons). + /// + /// \note To implementers: We put \c indsToSearch before \c indToFind + /// so that we can derive the type of indToFind directly from that + /// of each entry of \c indsToSearch, without needing + /// \c IndexViewType to be a Kokkos::View. Thankfully, arguments to + /// a C++ function behave more like LET* than LET (in ANSI Common + /// Lisp terms). + template + KOKKOS_INLINE_FUNCTION OffsetType + findRelOffset (const IndexViewType& indsToSearch, + const OffsetType numEnt, + /* typename IndexViewType::const_value_type */ + const typename std::decay::type indToFind, + const OffsetType hint, + const bool isSorted) + { + // IndexViewType doesn't have to be a Kokkos::View; it just has to + // implement operator[] like a 1-D array. + // + // static_assert (Kokkos::is_view::value, + // "IndexViewType must be a Kokkos::View"); + // static_assert (static_cast (IndexViewType::rank) == 1, + // "IndexViewType must be a rank-1 Kokkos::View"); + static_assert (std::is_integral::value, + "OffsetType must be an integer."); + + if (hint < numEnt && indsToSearch[hint] == indToFind) { + return hint; // hint was correct + } + +#if 0 + // Even if the array is sorted, use linear search if the number of + // entries is small ("small" is a tuning parameter; feel free to + // tune for your architecture). 'constexpr' promises the compiler + // that it can bake this constant as a literal into the code. + constexpr OffsetType linearSearchThreshold = 16; + + if (! isSorted || numEnt < linearSearchThreshold) { +#else + if (! isSorted) { +#endif + for (OffsetType k = 0; k < numEnt; ++k) { + if (indsToSearch[k] == indToFind) { + return k; + } + } + } + else { // use binary search + OffsetType start = 0; + OffsetType end = numEnt; + // Compare epetra/src/Epetra_Util.cpp, Epetra_Util_binary_search. + // Unlike that function, I don't use end = numEnt-1, because I + // want this code to work also for unsigned OffsetType (signed is + // preferred, though). Thus, in my code, end is always "one past + // the last valid index." + while (end > start) { + // Invariants: 0 <= start < end, thus start + end > 0. + const OffsetType mid = (start + end - 1) / 2; + // Invariants: 0 <= start <= mid < end. + if (indsToSearch[mid] < indToFind) { + // Invariant: start < mid+1 (thus, recursion terminates), + // and for all k <= mid, indsToSearch[k] < indToFind. + start = mid + 1; // Invariant: 0 < mid < start <= end. + } + else { // indsToSearch[mid] >= indToFind + // Invariant: mid < end (thus, recursion terminates), + // and for all k <= mid, indsToSearch[k] >= indToFind. + end = mid; // Invariant: 0 <= start <= mid <= end. + } + } + // Invariant: 0 <= start == end. + + // Don't actually check the first entry if numEnt == 0. If numEnt + // > 0 and indsToSearch == NULL, that's the caller's problem. + if (numEnt > static_cast (0) && + indsToSearch[start] == indToFind) { + return start; + } + } + + return numEnt; // "end of sequence" + } + } // namespace Details } // namespace Tpetra diff --git a/packages/tpetra/core/test/CrsGraph/CMakeLists.txt b/packages/tpetra/core/test/CrsGraph/CMakeLists.txt index 4c376ec2abbb..61ad3252b328 100644 --- a/packages/tpetra/core/test/CrsGraph/CMakeLists.txt +++ b/packages/tpetra/core/test/CrsGraph/CMakeLists.txt @@ -33,6 +33,14 @@ TRIBITS_ADD_EXECUTABLE_AND_TEST( COMM serial mpi ) +TRIBITS_ADD_EXECUTABLE_AND_TEST( + CrsGraph_findRelOffset + SOURCES + CrsGraph_findRelOffset + ${TEUCHOS_STD_UNIT_TEST_MAIN} + COMM serial mpi + ) + # TRIBITS_COPY_FILES_TO_BINARY_DIR(TpetraCrsGraphCopyFiles # SOURCE_FILES west0067.rua mhd1280b.cua # EXEDEPS CrsGraph_UnitTests diff --git a/packages/tpetra/core/test/CrsGraph/CrsGraph_findRelOffset.cpp b/packages/tpetra/core/test/CrsGraph/CrsGraph_findRelOffset.cpp new file mode 100644 index 000000000000..cac33e3a8779 --- /dev/null +++ b/packages/tpetra/core/test/CrsGraph/CrsGraph_findRelOffset.cpp @@ -0,0 +1,285 @@ +/* +// @HEADER +// *********************************************************************** +// +// Tpetra: Templated Linear Algebra Services Package +// Copyright (2008) Sandia Corporation +// +// Under the terms of Contract DE-AC04-94AL85000 with Sandia Corporation, +// the U.S. Government retains certain rights in this software. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// 1. Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the Corporation nor the names of the +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +// Questions? Contact Michael A. Heroux (maherou@sandia.gov) +// +// ************************************************************************ +// @HEADER +*/ + +#include "Tpetra_TestingUtilities.hpp" +#include "Tpetra_Util.hpp" +#include "Kokkos_View.hpp" + +namespace { // (anonymous) + using std::endl; + + // + // UNIT TESTS + // + + TEUCHOS_UNIT_TEST_TEMPLATE_1_DECL( CrsGraph, findRelOffset, Node ) + { + using Tpetra::Details::findRelOffset; + typedef int LO; + typedef typename Node::device_type DT; + typedef Kokkos::View IVT; + + Teuchos::OSTab tab0 (out); + out << "Test findRelOffset" << endl; + Teuchos::OSTab tab1 (out); + + Node node; // just to start the Kokkos execution space + + out << "Test empty arrays" << endl; + + // Test with zero indices to search, using a raw array. + { + LO numEnt = 0; + const LO* indsToSearch = NULL; + const LO indToFind = 42; + for (LO hint = 0; hint < 3; ++hint) { + // Length-zero array is trivially sorted, but try the unsorted + // case just to make sure that branch of the code is right. + LO offset = + findRelOffset (indsToSearch, numEnt, + indToFind, hint, true); + TEST_EQUALITY( offset, numEnt ); // not in the array + offset = findRelOffset (indsToSearch, numEnt, + indToFind, hint, false); + TEST_EQUALITY( offset, numEnt ); // not in the array + } + } + + out << "Test the sorted, nonempty array case" << endl; + + // Test the sorted case, with a raw array. + { + LO numEnt = 7; + const LO indsToSearch[7] = {1, 1, 2, 3, 5, 8, 13}; + const bool isSorted = true; + + for (LO hint = 0; hint < 10; ++hint) { + // Test an index that is not in the array. + // This one is in [min, max]. + LO indNotThere = 4; + LO offset = + findRelOffset (indsToSearch, numEnt, + indNotThere, hint, isSorted); + TEST_EQUALITY( offset, numEnt ); // not in the array + + // Test another index that is not in the array. + // This one is _not_ in [min, max]. + indNotThere = 42; + offset = findRelOffset (indsToSearch, numEnt, + indNotThere, hint, isSorted); + TEST_EQUALITY( offset, numEnt ); // not in the array + + // Test all indices that are in the array. + for (LO k = 0; k < numEnt; ++k) { + + const LO indToFind = indsToSearch[k]; // in the array + offset = findRelOffset (indsToSearch, numEnt, + indToFind, hint, isSorted); + if (indToFind == static_cast (1)) { + // 1 is a duplicate in this example. Treat it as a special + // case. We don't specify which instance of duplicates the + // function must return, so either one is fine. + TEST_ASSERT( offset == static_cast (0) || + offset == static_cast (1) ); + } + else { + TEST_EQUALITY( offset, k ); + } + } + } + } + + // Test the sorted case, with a Kokkos::View. + { + LO numEnt = 7; + Kokkos::View indsToSearch ("indsToSearch", numEnt); + // This assumes UVM. + indsToSearch[0] = 1; + indsToSearch[1] = 1; + indsToSearch[2] = 2; + indsToSearch[3] = 3; + indsToSearch[4] = 5; + indsToSearch[5] = 8; + indsToSearch[6] = 13; + const bool isSorted = true; + + for (LO hint = 0; hint < 10; ++hint) { + // Test an index that is not in the array. + // This one is in [min, max]. + LO indNotThere = 4; + LO offset = findRelOffset (indsToSearch, numEnt, + indNotThere, hint, isSorted); + TEST_EQUALITY( offset, numEnt ); // not in the array + + // Test another index that is not in the array. + // This one is _not_ in [min, max]. + indNotThere = 42; + offset = findRelOffset (indsToSearch, numEnt, + indNotThere, hint, isSorted); + TEST_EQUALITY( offset, numEnt ); // not in the array + + // Test all indices that are in the array. + for (LO k = 0; k < numEnt; ++k) { + const LO indToFind = indsToSearch[k]; // in the array + offset = findRelOffset (indsToSearch, numEnt, + indToFind, hint, isSorted); + if (indToFind == static_cast (1)) { + // 1 is a duplicate in this example. Treat it as a special + // case. We don't specify which instance of duplicates the + // function must return, so either one is fine. + TEST_ASSERT( offset == static_cast (0) || + offset == static_cast (1) ); + } + else { + TEST_EQUALITY( offset, k ); + } + } + } + } + + out << "Test the unsorted, nonempty array case" << endl; + + // Test the unsorted case, with a raw array. + { + LO numEnt = 7; + const LO indsToSearch[7] = {8, 1, 13, 1, 3, 2, 5}; + const bool isSorted = false; + + for (LO hint = 0; hint < 10; ++hint) { + // Test an index that is not in the array. + // This one is in [min, max]. + LO indNotThere = 4; + LO offset = + findRelOffset (indsToSearch, numEnt, + indNotThere, hint, isSorted); + TEST_EQUALITY( offset, numEnt ); // not in the array + + // Test another index that is not in the array. + // This one is _not_ in [min, max]. + indNotThere = 42; + offset = findRelOffset (indsToSearch, numEnt, + indNotThere, hint, isSorted); + TEST_EQUALITY( offset, numEnt ); // not in the array + + // Test all indices that are in the array. + for (LO k = 0; k < numEnt; ++k) { + const LO indToFind = indsToSearch[k]; // in the array + offset = findRelOffset (indsToSearch, numEnt, + indToFind, hint, isSorted); + if (indToFind == static_cast (1)) { + // 1 is a duplicate in this example. Treat it as a special + // case. We don't specify which instance of duplicates the + // function must return, so either one is fine. + TEST_ASSERT( offset == static_cast (1) || + offset == static_cast (3) ); + } + else { + TEST_EQUALITY( offset, k ); + } + } + } + } + + // Test the unsorted case, with a Kokkos::View. + { + LO numEnt = 7; + Kokkos::View indsToSearch ("indsToSearch", numEnt); + // This assumes UVM. + indsToSearch[0] = 8; + indsToSearch[1] = 1; + indsToSearch[2] = 13; + indsToSearch[3] = 1; + indsToSearch[4] = 3; + indsToSearch[5] = 2; + indsToSearch[6] = 5; + const bool isSorted = false; + + for (LO hint = 0; hint < 10; ++hint) { + // Test an index that is not in the array. + // This one is in [min, max]. + LO indNotThere = 4; + LO offset = findRelOffset (indsToSearch, numEnt, + indNotThere, hint, isSorted); + TEST_EQUALITY( offset, numEnt ); // not in the array + + // Test another index that is not in the array. + // This one is _not_ in [min, max]. + indNotThere = 42; + offset = findRelOffset (indsToSearch, numEnt, + indNotThere, hint, isSorted); + TEST_EQUALITY( offset, numEnt ); // not in the array + + // Test all indices that are in the array. + for (LO k = 0; k < numEnt; ++k) { + const LO indToFind = indsToSearch[k]; // in the array + offset = findRelOffset (indsToSearch, numEnt, + indToFind, hint, isSorted); + if (indToFind == static_cast (1)) { + // 1 is a duplicate in this example. Treat it as a special + // case. We don't specify which instance of duplicates the + // function must return, so either one is fine. + TEST_ASSERT( offset == static_cast (1) || + offset == static_cast (3) ); + } + else { + TEST_EQUALITY( offset, k ); + } + } + } + } + } + +// +// INSTANTIATIONS +// + +#define UNIT_TEST_GROUP( NODE ) \ + TEUCHOS_UNIT_TEST_TEMPLATE_1_INSTANT( CrsGraph, findRelOffset, NODE ) + + TPETRA_ETI_MANGLING_TYPEDEFS() + + TPETRA_INSTANTIATE_N( UNIT_TEST_GROUP ) + +} // namespace (anonymous) + +