diff --git a/src/k_smallest.rs b/src/k_smallest.rs
index fe699fbd4..b909887f5 100644
--- a/src/k_smallest.rs
+++ b/src/k_smallest.rs
@@ -57,6 +57,7 @@ where
     }
 
     iter.for_each(|val| {
+        debug_assert_eq!(storage.len(), k);
         if is_less_than(&val, &storage[0]) {
             // Treating this as an push-and-pop saves having to write a sift-up implementation.
             // https://en.wikipedia.org/wiki/Binary_heap#Insert_then_extract
diff --git a/src/lib.rs b/src/lib.rs
index ec374c469..a297a6d80 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -55,7 +55,7 @@ extern crate core as std;
 extern crate alloc;
 
 #[cfg(feature = "use_alloc")]
-use alloc::{string::String, vec::Vec};
+use alloc::{collections::VecDeque, string::String, vec::Vec};
 
 pub use either::Either;
 
@@ -72,6 +72,8 @@ use std::fmt::Write;
 use std::hash::Hash;
 use std::iter::{once, IntoIterator};
 #[cfg(feature = "use_alloc")]
+type VecDequeIntoIter<T> = alloc::collections::vec_deque::IntoIter<T>;
+#[cfg(feature = "use_alloc")]
 type VecIntoIter<T> = alloc::vec::IntoIter<T>;
 use std::iter::FromIterator;
 
@@ -3144,6 +3146,67 @@ pub trait Itertools: Iterator {
         self.k_largest_by(k, k_smallest::key_to_cmp(key))
     }
 
+    /// Consumes the iterator and return an iterator of the last `n` elements.
+    ///
+    /// The iterator, if directly collected to a `VecDeque`, is converted
+    /// without any extra copying or allocation cost.
+    /// If directly collected to a `Vec`, it may need some data movement
+    /// but no re-allocation.
+    ///
+    /// ```
+    /// use itertools::{assert_equal, Itertools};
+    ///
+    /// let v = vec![5, 9, 8, 4, 2, 12, 0];
+    /// assert_equal(v.iter().tail(3), &[2, 12, 0]);
+    /// assert_equal(v.iter().tail(10), &v);
+    ///
+    /// assert_equal(v.iter().tail(1), v.iter().last());
+    ///
+    /// assert_equal((0..100).tail(10), 90..100);
+    ///
+    /// assert_equal((0..100).filter(|x| x % 3 == 0).tail(10), (72..100).step_by(3));
+    /// ```
+    ///
+    /// For double ended iterators without side-effects, you might prefer
+    /// `.rev().take(n).rev()` to have a similar result (lazy and non-allocating)
+    /// without consuming the entire iterator.
+    #[cfg(feature = "use_alloc")]
+    fn tail(self, n: usize) -> VecDequeIntoIter<Self::Item>
+    where
+        Self: Sized,
+    {
+        match n {
+            0 => {
+                self.last();
+                VecDeque::new()
+            }
+            1 => self.last().into_iter().collect(),
+            _ => {
+                // Skip the starting part of the iterator if possible.
+                let (low, _) = self.size_hint();
+                let mut iter = self.fuse().skip(low.saturating_sub(n));
+                // TODO: If VecDeque has a more efficient method than
+                // `.pop_front();.push_back(val)` in the future then maybe revisit this.
+                let mut data: Vec<_> = iter.by_ref().take(n).collect();
+                // Update `data` cyclically.
+                let idx = iter.fold(0, |i, val| {
+                    debug_assert_eq!(data.len(), n);
+                    data[i] = val;
+                    if i + 1 == n {
+                        0
+                    } else {
+                        i + 1
+                    }
+                });
+                // Respect the insertion order, efficiently.
+                let mut data = VecDeque::from(data);
+                data.rotate_left(idx);
+                data
+            }
+        }
+        .into_iter()
+    }
+
     /// Collect all iterator elements into one of two
     /// partitions. Unlike [`Iterator::partition`], each partition may
     /// have a distinct type.
diff --git a/tests/quick.rs b/tests/quick.rs
index bb08e21c6..aa61e3c1e 100644
--- a/tests/quick.rs
+++ b/tests/quick.rs
@@ -1949,4 +1949,11 @@ quickcheck! {
             result_set.is_empty()
         }
     }
+
+    fn tail(v: Vec<i32>, n: u8) -> bool {
+        let n = n as usize;
+        let result = &v[v.len().saturating_sub(n)..];
+        itertools::equal(v.iter().tail(n), result)
+            && itertools::equal(v.iter().filter(|_| true).tail(n), result)
+    }
 }