Skip to content

Commit

Permalink
Merge pull request #102 from h-2/feature/view_trim
Browse files Browse the repository at this point in the history
[feature] view::trim
  • Loading branch information
h-2 authored Aug 8, 2017
2 parents 563b6ab + 4582c9c commit f991d50
Show file tree
Hide file tree
Showing 4 changed files with 340 additions and 1 deletion.
3 changes: 2 additions & 1 deletion include/seqan3/range/view/all.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,12 +47,13 @@
#include <seqan3/range/view/rank_to.hpp>
#include <seqan3/range/view/to_char.hpp>
#include <seqan3/range/view/to_rank.hpp>
#include <seqan3/range/view/trim.hpp>

/*!\defgroup view View
* \brief Views are "lazy range combinators" that offer modified views onto other ranges.
* \ingroup range
* \sa https://ericniebler.github.io/range-v3/index.html#range-views
* \sa range/view.hpp
* \sa range/view/all.hpp
*
* SeqAn3 makes heavy use of views as defined in the
* [Ranges Technical Specification](http://en.cppreference.com/w/cpp/experimental/ranges). Currently the
Expand Down
228 changes: 228 additions & 0 deletions include/seqan3/range/view/trim.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,228 @@
// ============================================================================
// SeqAn - The Library for Sequence Analysis
// ============================================================================
//
// Copyright (c) 2006-2017, Knut Reinert & Freie Universitaet Berlin
// Copyright (c) 2016-2017, Knut Reinert & MPI Molekulare Genetik
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of Knut Reinert or the FU Berlin nor the names of
// its contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
// DAMAGE.
//
// ============================================================================

/*!\file
* \ingroup view
* \author Hannes Hauswedell <hannes.hauswedell AT fu-berlin.de>
* \brief Provides seqan3::view::trim.
*/

#pragma once

#include <range/v3/view/take_while.hpp>

#include <seqan3/alphabet/quality/quality_composition.hpp>
#include <seqan3/range/concept.hpp>

namespace seqan3::detail
{

/*!\brief The underlying type of seqan3::view::trim.
* \ingroup view
*
* Under the hood this delegates to ranges::view::take_while.
*/
struct trim_fn
{
/*!\brief Trim based on minimum phred score.
* \tparam irng_t The type of the range being processed. See seqan3::view::trim for requirements.
* \param irange The range being processed.
* \param threshold The minimum quality as a phred score [integral type].
*/
template <typename irng_t>
auto operator()(irng_t && irange,
underlying_phred_t<ranges::value_type_t<std::decay_t<irng_t>>> const threshold) const
requires input_range_concept<irng_t> && quality_concept<ranges::value_type_t<std::decay_t<irng_t>>>
{
return ranges::view::take_while(std::forward<irng_t>(irange), [threshold] (auto && value)
{
return to_phred(std::forward<decltype(value)>(value)) >= threshold;
});
}

/*!\brief Trim based on value_type.
* \tparam irng_t The type of the range being processed. See seqan3::view::trim for requirements.
* \param irange The range being processed.
* \param threshold The minimum quality given by a value of the ranges type.
*/
template <typename irng_t>
auto operator()(irng_t && irange,
std::decay_t<ranges::value_type_t<std::decay_t<irng_t>>> const threshold) const
requires input_range_concept<irng_t> && quality_concept<ranges::value_type_t<std::decay_t<irng_t>>>
{
return (*this)(std::forward<irng_t>(irange), to_phred(threshold));
}

/*!\brief A functor that behaves like a named version of std::bind around seqan3::detail::trim_fn::operator().
* \tparam threshold_t Must be an integral type or satisfy the seqan3::quality_concept.
*
* \details
*
* * The single-parameter-operator() of trim_fn would normally return a bound two-parameter-operator() via
* std::bind.
* * trim_fn's friend operator| would take this as second argument via decltype(std::bind(...)) so that it
* is specific to this overload.
* * However, since the type of the threshold parameter is a template argument, we need function template
* overloading. This doesn't work with decltype() and the actual return type of std::bind is implementation
* defined.
* * So we have this helper class template which behaves exactly the same, but has a distinct (named) type.
*
* \attention You should never instantiate this manually.
*/
template <typename threshold_t>
struct delegate
{
//!\brief The intermediately stored threshold.
threshold_t const threshold;
//!\brief Reference to the parent
trim_fn const & parent;

//!\brief The operator() that only takes the range as argument and forwards to the two-parameter operator().
template <typename irng_t>
auto operator()(irng_t && irange) const
{
return parent(std::forward<irng_t>(irange), threshold);
}
};

/*!\brief Range-less interface for use with the pipe notation.
* \tparam threshold_t Must be an integral type or satisfy the seqan3::quality_concept.
* \param threshold The minimum quality given by a value of the range's value type.
*
* \details
*
* Binds to one of the other interfaces and forwards the threshold.
*/
template <typename threshold_t>
delegate<threshold_t> operator()(threshold_t const threshold) const
requires std::is_integral_v<std::decay_t<threshold_t>> || quality_concept<std::decay_t<threshold_t>>
{
return delegate<threshold_t>{threshold, *this};
// this doesn't work here, see seqan3::detail::trim_fn::delegate.
//return std::bind(trim_fn(), std::placeholders::_1, threshold);
}

/*!\brief Pipe operator that enables view-typical use of pipe notation.
* \tparam irng_t The type of the range being processed. See seqan3::view::trim for requirements.
* \tparam threshold_t Must be of `irng_t`'s `value_type` or that `value_type`'s seqan3::underlying_phred_t.
* \param irange The range being processed as left argument of the pipe.
* \param bound_view The result of the single-argument operator() (interface with bound threshold parameter).
*/
template <typename irng_t,
typename threshold_t>
//!\cond
requires input_range_concept<irng_t> && quality_concept<ranges::value_type_t<std::decay_t<irng_t>>> &&
(std::is_same_v<std::decay_t<threshold_t>,
std::decay_t<ranges::value_type_t<std::decay_t<irng_t>>>> ||
std::is_convertible_v<std::decay_t<threshold_t>,
underlying_phred_t<std::decay_t<ranges::value_type_t<std::decay_t<irng_t>>>>>)
//!\endcond
friend auto operator|(irng_t && irange,
seqan3::detail::trim_fn::delegate<threshold_t> const & bound_view)
{
return bound_view(std::forward<irng_t>(irange));
}
};

} // namespace seqan3::detail

namespace seqan3::view
{

/*!\brief A view that does quality-threshold trimming on a range of seqan3::quality_concept.
* \tparam irng_t The type of the range being processed. See below for requirements.
* \tparam threshold_t Either of `value_type_t<irng_t>` or of `seqan3::underlying_phred_t<value_type_t<irng_t>>`.
* \param irange The range being processed. [parameter is omitted in pipe notation]
* \param threshold The minimum quality.
* \returns A trimmed range. See below for the properties of the returned range.
* \ingroup view
*
* \details
*
* This view can be used to do easy quality based trimming of sequences.
*
* \par View properties
*
* | | `irng_t` (range input type) | `rrng_t` (range return type) |
* |---------------------|-------------------------------|-----------------------------------------------------------|
* | range | seqan3::input_range_concept | seqan3::view_concept + all range concepts met by `irng_t` except seqan3::sized_range_concept |
* | `range_reference_t` | seqan3::quality_concept | `range_reference_t<irng_t>` |
*
* * The input properties are **requirements** on the range input type.
* * The return properties are **guarantees** given on the range return type.
* * for more details, see \ref view.
*
* \par Example
*
* Operating on a range of seqan3::illumina18:
* ```cpp
* std::vector<illumina18> vec{illumina18{40}, illumina18{40}, illumina18{30}, illumina18{20}, illumina18{10}};
*
* // trim by phred_value
* auto v1 = vec | view::trim(20u); // == ['I','I','?','5']
*
* // trim by quality character
* auto v2 = vec | view::trim(illumina18{40}); // == ['I','I']
*
* // function syntax
* auto v3 = view::trim(vec, 20u); // == ['I','I','?','5']
*
* // combinability
* std::string v4 = view::trim(vec, 20u) | view::to_char; // == "II?5"
* ```
*
* Or operating on a range of seqan3::dna5q:
* ```cpp
* std::vector<dna5q> vec{{dna5::A, illumina18{40}}, {dna5::G, illumina18{40}}, {dna5::G, illumina18{30}},
* {dna5::A, illumina18{20}}, {dna5::T, illumina18{10}}};
* std::vector<dna5q> cmp{{dna5::A, illumina18{40}}, {dna5::G, illumina18{40}}, {dna5::G, illumina18{30}},
* {dna5::A, illumina18{20}}};
*
* // trim by phred_value
* auto v1 = vec | view::trim(20u);
* assert(std::vector<dna5q>(v1) == cmp);
*
* // trim by quality character; in this case the nucleotide part of the character is irrelevant
* auto v2 = vec | view::trim(dna5q{dna5::C, illumina18{20}});
* assert(std::vector<dna5q>(v2) == cmp);
*
* // combinability
* std::string v4 = view::trim(vec, 20u) | view::to_char;
* EXPECT_EQ("AGGA", v4);
* ```
*/

seqan3::detail::trim_fn const trim;

} // namespace seqan3::view
1 change: 1 addition & 0 deletions test/range/view/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ seqan3_test(view_convert_test.cpp)
seqan3_test(view_rank_to_test.cpp)
seqan3_test(view_to_char_test.cpp)
seqan3_test(view_to_rank_test.cpp)
seqan3_test(view_trim_test.cpp)
109 changes: 109 additions & 0 deletions test/range/view/view_trim_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
// ==========================================================================
// SeqAn - The Library for Sequence Analysis
// ==========================================================================
//
// Copyright (c) 2006-2017, Knut Reinert, FU Berlin
// Copyright (c) 2016-2017, Knut Reinert & MPI Molekulare Genetik
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of Knut Reinert or the FU Berlin nor the names of
// its contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL KNUT REINERT OR THE FU BERLIN BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
// OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
// DAMAGE.
//
// ==========================================================================

#include <iostream>

#include <gtest/gtest.h>

#include <range/v3/view/reverse.hpp>

#include <seqan3/alphabet/quality/all.hpp>
#include <seqan3/range/view/to_char.hpp>
#include <seqan3/range/view/trim.hpp>

using namespace seqan3;
using namespace seqan3::view;

TEST(view_trim, standalone)
{
std::vector<illumina18> vec{ illumina18{40}, illumina18{40}, illumina18{30}, illumina18{20}, illumina18{10}};
std::vector<illumina18> cmp1{illumina18{40}, illumina18{40}, illumina18{30}, illumina18{20}};
std::vector<illumina18> cmp2{illumina18{40}, illumina18{40}};

// trim by phred_value
auto v1 = vec | view::trim(20u); // == ['I','I','?','5']
EXPECT_EQ(std::vector<illumina18>(v1), cmp1);

// trim by quality character
auto v2 = vec | view::trim(illumina18{40}); // == ['I','I']
EXPECT_EQ(std::vector<illumina18>(v2), cmp2);

// function syntax
auto v3 = view::trim(vec, 20u); // == ['I','I','?','5']
EXPECT_EQ(std::vector<illumina18>(v3), cmp1);

// combinability
std::string v4 = view::trim(vec, 20u) | view::to_char; // == "II?5"
EXPECT_EQ("II?5", v4);
}

TEST(view_trim, quality_composition)
{
std::vector<dna5q> vec{{dna5::A, illumina18{40}}, {dna5::G, illumina18{40}}, {dna5::G, illumina18{30}},
{dna5::A, illumina18{20}}, {dna5::T, illumina18{10}}};
std::vector<dna5q> cmp1{{dna5::A, illumina18{40}}, {dna5::G, illumina18{40}}, {dna5::G, illumina18{30}},
{dna5::A, illumina18{20}}};
std::vector<dna5q> cmp2{{dna5::A, illumina18{40}}, {dna5::G, illumina18{40}}};

// trim by phred_value
auto v1 = vec | view::trim(20u);
EXPECT_EQ(std::vector<dna5q>(v1), cmp1);

// trim by quality character
auto v2 = vec | view::trim(dna5q{dna5::C, 40});
EXPECT_EQ(std::vector<dna5q>(v2), cmp2);

// function syntax
auto v3 = view::trim(vec, 20u);
EXPECT_EQ(std::vector<dna5q>(v3), cmp1);

// combinability
std::string v4 = view::trim(vec, 20u) | view::to_char;
EXPECT_EQ("AGGA", v4);
}

TEST(view_trim, concepts)
{
std::vector<dna5q> vec{{dna5::A, 40}, {dna5::G, 40}, {dna5::G, 30}, {dna5::A, 20}, {dna5::T, 10}};
EXPECT_TRUE(input_range_concept<decltype(vec)>);
EXPECT_TRUE(forward_range_concept<decltype(vec)>);
EXPECT_TRUE(random_access_range_concept<decltype(vec)>);
EXPECT_TRUE(sized_range_concept<decltype(vec)>);

auto v1 = vec | view::trim(20u);
EXPECT_TRUE(input_range_concept<decltype(v1)>);
EXPECT_TRUE(forward_range_concept<decltype(v1)>);
EXPECT_TRUE(random_access_range_concept<decltype(v1)>);
EXPECT_TRUE(!sized_range_concept<decltype(v1)>);
}

0 comments on commit f991d50

Please sign in to comment.