diff --git a/crates/circuit/Cargo.toml b/crates/circuit/Cargo.toml
index 457d64108254..373295408be1 100644
--- a/crates/circuit/Cargo.toml
+++ b/crates/circuit/Cargo.toml
@@ -38,7 +38,7 @@ features = ["rayon"]
 
 [dependencies.smallvec]
 workspace = true
-features = ["union"]
+features = ["union", "const_generics"]
 
 [features]
 cache_pygates = []
diff --git a/crates/circuit/src/dag_circuit.rs b/crates/circuit/src/dag_circuit.rs
index ca1a9f59aa1d..d891dfe2af12 100644
--- a/crates/circuit/src/dag_circuit.rs
+++ b/crates/circuit/src/dag_circuit.rs
@@ -25,7 +25,7 @@ use crate::dag_node::{DAGInNode, DAGNode, DAGOpNode, DAGOutNode};
 use crate::dot_utils::build_dot;
 use crate::error::DAGCircuitError;
 use crate::imports;
-use crate::interner::{Interned, Interner};
+use crate::interner::{Interned, InternedMap, Interner};
 use crate::operations::{Operation, OperationRef, Param, PyInstruction, StandardGate};
 use crate::packed_instruction::{PackedInstruction, PackedOperation};
 use crate::rustworkx_core_vnext::isomorphism;
@@ -4810,6 +4810,76 @@ impl DAGCircuit {
         &self.vars
     }
 
+    /// Merge the `qargs` in a different [Interner] into this DAG, remapping the qubits.
+    ///
+    /// This is useful for simplifying the direct mapping of [PackedInstruction]s from one DAG to
+    /// another, like in substitution methods, or rebuilding a new DAG out of a lot of smaller ones.
+    /// See [Interner::merge_map_slice] for more information on the mapping function.
+    ///
+    /// The input [InternedMap] is cleared of its previous entries by this method, and then we
+    /// re-use the allocation.
+    pub fn merge_qargs_using(
+        &mut self,
+        other: &Interner<[Qubit]>,
+        map_fn: impl FnMut(&Qubit) -> Option<Qubit>,
+        map: &mut InternedMap<[Qubit]>,
+    ) {
+        // 4 is an arbitrary guess for the amount of stack space to allocate for mapping the
+        // `qargs`, but it doesn't matter if it's too short because it'll safely spill to the heap.
+        self.qargs_interner
+            .merge_map_slice_using::<4>(other, map_fn, map);
+    }
+
+    /// Merge the `qargs` in a different [Interner] into this DAG, remapping the qubits.
+    ///
+    /// This is useful for simplifying the direct mapping of [PackedInstruction]s from one DAG to
+    /// another, like in substitution methods, or rebuilding a new DAG out of a lot of smaller ones.
+    /// See [Interner::merge_map_slice] for more information on the mapping function.
+    pub fn merge_qargs(
+        &mut self,
+        other: &Interner<[Qubit]>,
+        map_fn: impl FnMut(&Qubit) -> Option<Qubit>,
+    ) -> InternedMap<[Qubit]> {
+        let mut out = InternedMap::new();
+        self.merge_qargs_using(other, map_fn, &mut out);
+        out
+    }
+
+    /// Merge the `cargs` in a different [Interner] into this DAG, remapping the clbits.
+    ///
+    /// This is useful for simplifying the direct mapping of [PackedInstruction]s from one DAG to
+    /// another, like in substitution methods, or rebuilding a new DAG out of a lot of smaller ones.
+    /// See [Interner::merge_map_slice] for more information on the mapping function.
+    ///
+    /// The input [InternedMap] is cleared of its previous entries by this method, and then we
+    /// re-use the allocation.
+    pub fn merge_cargs_using(
+        &mut self,
+        other: &Interner<[Clbit]>,
+        map_fn: impl FnMut(&Clbit) -> Option<Clbit>,
+        map: &mut InternedMap<[Clbit]>,
+    ) {
+        // 4 is an arbitrary guess for the amount of stack space to allocate for mapping the
+        // `cargs`, but it doesn't matter if it's too short because it'll safely spill to the heap.
+        self.cargs_interner
+            .merge_map_slice_using::<4>(other, map_fn, map);
+    }
+
+    /// Merge the `cargs` in a different [Interner] into this DAG, remapping the clbits.
+    ///
+    /// This is useful for simplifying the direct mapping of [PackedInstruction]s from one DAG to
+    /// another, like in substitution methods, or rebuilding a new DAG out of a lot of smaller ones.
+    /// See [Interner::merge_map_slice] for more information on the mapping function.
+    pub fn merge_cargs(
+        &mut self,
+        other: &Interner<[Clbit]>,
+        map_fn: impl FnMut(&Clbit) -> Option<Clbit>,
+    ) -> InternedMap<[Clbit]> {
+        let mut out = InternedMap::new();
+        self.merge_cargs_using(other, map_fn, &mut out);
+        out
+    }
+
     /// Return an iterator of gate runs with non-conditional op nodes of given names
     pub fn collect_runs(
         &self,
@@ -6383,10 +6453,24 @@ impl DAGCircuit {
         &self.op_names
     }
 
-    /// Extends the DAG with valid instances of [PackedInstruction]
+    /// Extends the DAG with valid instances of [PackedInstruction].
     pub fn extend<I>(&mut self, py: Python, iter: I) -> PyResult<Vec<NodeIndex>>
     where
         I: IntoIterator<Item = PackedInstruction>,
+    {
+        self.try_extend(
+            py,
+            iter.into_iter()
+                .map(|inst| -> Result<PackedInstruction, Infallible> { Ok(inst) }),
+        )
+    }
+
+    /// Extends the DAG with valid instances of [PackedInstruction], where the iterator produces the
+    /// results in a fallible manner.
+    pub fn try_extend<I, E>(&mut self, py: Python, iter: I) -> PyResult<Vec<NodeIndex>>
+    where
+        I: IntoIterator<Item = Result<PackedInstruction, E>>,
+        PyErr: From<E>,
     {
         // Create HashSets to keep track of each bit/var's last node
         let mut qubit_last_nodes: HashMap<Qubit, NodeIndex> = HashMap::default();
@@ -6400,6 +6484,7 @@ impl DAGCircuit {
         // Store new nodes to return
         let mut new_nodes = Vec::with_capacity(iter.size_hint().1.unwrap_or_default());
         for instr in iter {
+            let instr = instr?;
             let op_name = instr.op.name();
             let (all_cbits, vars): (Vec<Clbit>, Option<Vec<PyObject>>) = {
                 if self.may_have_additional_wires(py, &instr) {
@@ -6571,8 +6656,8 @@ impl DAGCircuit {
 
         new_dag.metadata = qc.metadata.map(|meta| meta.unbind());
 
-        // Add the qubits depending on order.
-        let qubit_map: Option<Vec<Qubit>> = if let Some(qubit_ordering) = qubit_order {
+        // Add the qubits depending on order, and produce the qargs map.
+        let qarg_map = if let Some(qubit_ordering) = qubit_order {
             let mut ordered_vec = Vec::from_iter((0..num_qubits as u32).map(Qubit));
             qubit_ordering
                 .into_iter()
@@ -6587,7 +6672,11 @@ impl DAGCircuit {
                     ordered_vec[qubit_index.index()] = new_dag.add_qubit_unchecked(py, &qubit)?;
                     Ok(())
                 })?;
-            Some(ordered_vec)
+            // The `Vec::get` use is because an arbitrary interner might contain old references to
+            // bit instances beyond `num_qubits`, such as if it's from a DAG that had wires removed.
+            new_dag.merge_qargs(qc_data.qargs_interner(), |bit| {
+                ordered_vec.get(bit.index()).copied()
+            })
         } else {
             qc_data
                 .qubits()
@@ -6597,11 +6686,11 @@ impl DAGCircuit {
                     new_dag.add_qubit_unchecked(py, qubit.bind(py))?;
                     Ok(())
                 })?;
-            None
+            new_dag.merge_qargs(qc_data.qargs_interner(), |bit| Some(*bit))
         };
 
-        // Add the clbits depending on order.
-        let clbit_map: Option<Vec<Clbit>> = if let Some(clbit_ordering) = clbit_order {
+        // Add the clbits depending on order, and produce the cargs map.
+        let carg_map = if let Some(clbit_ordering) = clbit_order {
             let mut ordered_vec = Vec::from_iter((0..num_clbits as u32).map(Clbit));
             clbit_ordering
                 .into_iter()
@@ -6616,7 +6705,11 @@ impl DAGCircuit {
                     ordered_vec[clbit_index.index()] = new_dag.add_clbit_unchecked(py, &clbit)?;
                     Ok(())
                 })?;
-            Some(ordered_vec)
+            // The `Vec::get` use is because an arbitrary interner might contain old references to
+            // bit instances beyond `num_clbits`, such as if it's from a DAG that had wires removed.
+            new_dag.merge_cargs(qc_data.cargs_interner(), |bit| {
+                ordered_vec.get(bit.index()).copied()
+            })
         } else {
             qc_data
                 .clbits()
@@ -6626,7 +6719,7 @@ impl DAGCircuit {
                     new_dag.add_clbit_unchecked(py, clbit.bind(py))?;
                     Ok(())
                 })?;
-            None
+            new_dag.merge_cargs(qc_data.cargs_interner(), |bit| Some(*bit))
         };
 
         // Add all of the new vars.
@@ -6655,57 +6748,24 @@ impl DAGCircuit {
             }
         }
 
-        // Pre-process and re-intern all indices again.
-        let instructions: Vec<PackedInstruction> = qc_data
-            .iter()
-            .map(|instr| -> PyResult<PackedInstruction> {
-                // Re-map the qubits
-                let new_qargs = if let Some(qubit_mapping) = &qubit_map {
-                    let qargs = qc_data
-                        .get_qargs(instr.qubits)
-                        .iter()
-                        .map(|bit| qubit_mapping[bit.index()])
-                        .collect();
-                    new_dag.qargs_interner.insert_owned(qargs)
-                } else {
-                    new_dag
-                        .qargs_interner
-                        .insert(qc_data.get_qargs(instr.qubits))
-                };
-                // Remap the clbits
-                let new_cargs = if let Some(clbit_mapping) = &clbit_map {
-                    let qargs = qc_data
-                        .get_cargs(instr.clbits)
-                        .iter()
-                        .map(|bit| clbit_mapping[bit.index()])
-                        .collect();
-                    new_dag.cargs_interner.insert_owned(qargs)
-                } else {
-                    new_dag
-                        .cargs_interner
-                        .insert(qc_data.get_cargs(instr.clbits))
-                };
-                // Copy the operations
-
+        new_dag.try_extend(
+            py,
+            qc_data.iter().map(|instr| -> PyResult<PackedInstruction> {
                 Ok(PackedInstruction {
                     op: if copy_op {
                         instr.op.py_deepcopy(py, None)?
                     } else {
                         instr.op.clone()
                     },
-                    qubits: new_qargs,
-                    clbits: new_cargs,
+                    qubits: qarg_map[instr.qubits],
+                    clbits: carg_map[instr.clbits],
                     params: instr.params.clone(),
                     extra_attrs: instr.extra_attrs.clone(),
                     #[cfg(feature = "cache_pygates")]
                     py_op: OnceLock::new(),
                 })
-            })
-            .collect::<PyResult<Vec<_>>>()?;
-
-        // Finally add all the instructions back
-        new_dag.extend(py, instructions)?;
-
+            }),
+        )?;
         Ok(new_dag)
     }
 
diff --git a/crates/circuit/src/interner.rs b/crates/circuit/src/interner.rs
index b77ecb51fa98..17065c5c92e1 100644
--- a/crates/circuit/src/interner.rs
+++ b/crates/circuit/src/interner.rs
@@ -10,18 +10,19 @@
 // copyright notice, and modified files need to carry a notice indicating
 // that they have been altered from the originals.
 
-use std::borrow::Borrow;
+use std::borrow::{Borrow, Cow};
 use std::fmt;
 use std::hash::Hash;
 use std::marker::PhantomData;
 
 use indexmap::IndexSet;
+use smallvec::SmallVec;
 
 /// A key to retrieve a value (by reference) from an interner of the same type.  This is narrower
 /// than a true reference, at the cost that it is explicitly not lifetime bound to the interner it
 /// came from; it is up to the user to ensure that they never attempt to query an interner with a
 /// key from a different interner.
-#[derive(Debug, Eq, PartialEq)]
+#[derive(Debug, Eq, PartialEq, Hash)]
 pub struct Interned<T: ?Sized> {
     index: u32,
     // Storing the type of the interned value adds a small amount more type safety to the interner
@@ -43,6 +44,128 @@ impl<T: ?Sized> Copy for Interned<T> {}
 unsafe impl<T: ?Sized> Send for Interned<T> {}
 unsafe impl<T: ?Sized> Sync for Interned<T> {}
 
+/// A map of the indices from one interner to another.
+///
+/// This is created by the interner-merging functions like [Interner::merge_map] and
+/// [Interner::merge_slice_map].
+///
+/// This map can be indexed by the [Interned] keys of the smaller [Interner], and returns [Interned]
+/// keys that work on the larger [Interner] (the one that expanded itself).
+///
+/// The indexing implementation panics if asked for the new key for an object that was filtered out
+/// during the merge.
+#[derive(Clone, Debug, Default)]
+pub struct InternedMap<S: ?Sized, T: ?Sized = S> {
+    // We can use [Vec] here, because [Interner] keys are guaranteed to be consecutive integers
+    // counting from zero; it's effectively how an [Interner] does lookups from [Interned] already.
+    // The [Option] is to support filtering in the map; we don't use a hash-map because we expect
+    // filtering to only infrequently remove values.
+    map: Vec<Option<Interned<T>>>,
+    // We're pretending that we're a mapping type from [Interned<S>] to [Interned<T>].
+    _other: PhantomData<Interned<S>>,
+}
+impl<S: ?Sized, T: ?Sized> InternedMap<S, T> {
+    /// Create a new empty [InternedMap].
+    ///
+    /// You can use this as a persistent allocation for repeated calls to [Interner::merge_map] or
+    /// related functions.
+    pub fn new() -> Self {
+        Self::with_capacity(0)
+    }
+
+    /// Create a new empty [InternedMap] with pre-allocated capacity.
+    ///
+    /// You can use this as a persistent allocation for repeated calls to [Interner::merge_map] or
+    /// related functions.
+    pub fn with_capacity(cap: usize) -> Self {
+        Self {
+            map: Vec::with_capacity(cap),
+            _other: PhantomData,
+        }
+    }
+
+    /// An iterator over the pairs of values in the map.
+    ///
+    /// The first item of the tuple is the keys that can be used to index the map, the second is the
+    /// result from mapping that key.
+    pub fn iter(&self) -> impl Iterator<Item = (Interned<S>, Interned<T>)> + '_ {
+        self.map.iter().enumerate().filter_map(|(key, value)| {
+            value.map(|value| {
+                (
+                    Interned {
+                        index: key as u32,
+                        _type: PhantomData,
+                    },
+                    value,
+                )
+            })
+        })
+    }
+}
+impl<S: ?Sized, T: ?Sized> ::std::ops::Index<Interned<S>> for InternedMap<S, T> {
+    type Output = Interned<T>;
+
+    fn index(&self, index: Interned<S>) -> &Self::Output {
+        // We could write a fallable [Interner::get] for handling filtered keys safely, but I
+        // couldn't imagine a use-case for that.
+        self.map[index.index as usize]
+            .as_ref()
+            .expect("lookup keys should not have been filtered out")
+    }
+}
+impl<S: ?Sized, T: ?Sized> IntoIterator for InternedMap<S, T> {
+    type Item = <interned_map::IntoIter<S, T> as Iterator>::Item;
+    type IntoIter = interned_map::IntoIter<S, T>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        Self::IntoIter::from(self)
+    }
+}
+// Private namespace to hide the types of the iterator in.
+mod interned_map {
+    use super::*;
+    use std::{iter, vec};
+
+    pub struct IntoIter<S: ?Sized, T: ?Sized = S> {
+        // This ugly type is to try and re-use as much of the built-in [Iterator]-adaptor structure
+        // as possible.  We have to stop when we encounter what would be a [FilterMap] because we
+        // can't name the type of the mapping function.
+        iter: iter::Enumerate<vec::IntoIter<Option<Interned<T>>>>,
+        _type: PhantomData<S>,
+    }
+    impl<S: ?Sized, T: ?Sized> From<InternedMap<S, T>> for IntoIter<S, T> {
+        fn from(val: InternedMap<S, T>) -> Self {
+            Self {
+                iter: val.map.into_iter().enumerate(),
+                _type: PhantomData,
+            }
+        }
+    }
+    impl<S: ?Sized, T: ?Sized> Iterator for IntoIter<S, T> {
+        type Item = (Interned<S>, Interned<T>);
+        fn next(&mut self) -> Option<Self::Item> {
+            for (key, value) in self.iter.by_ref() {
+                let Some(value) = value else {
+                    continue;
+                };
+                return Some((
+                    Interned {
+                        index: key as u32,
+                        _type: PhantomData,
+                    },
+                    value,
+                ));
+            }
+            None
+        }
+        fn size_hint(&self) -> (usize, Option<usize>) {
+            self.iter.size_hint()
+        }
+    }
+    impl<S: ?Sized, T: ?Sized> ExactSizeIterator for IntoIter<S, T> {}
+    impl<S: ?Sized, T: ?Sized> iter::FusedIterator for IntoIter<S, T> {}
+}
+
 /// An append-only data structure for interning generic Rust types.
 ///
 /// The interner can lookup keys using a reference type, and will create the corresponding owned
@@ -95,6 +218,29 @@ where
         f.debug_tuple("Interner").field(&self.0).finish()
     }
 }
+// We can choose either [FromIterator<T>] or `FromIterator<<T as ToOwned>::Owned>` as the
+// implementation for [Interner<T>], but we can't have both, because the blanket implementation of
+// [ToOwned] for `T: Clone` would cause overlap.  If somebody's constructing a new [Interner] from
+// an iterator, chances are that they've either already got owned values, or there aren't going to
+// be too many duplicates.
+impl<T> ::std::iter::FromIterator<<T as ToOwned>::Owned> for Interner<T>
+where
+    T: ?Sized + ToOwned,
+    <T as ToOwned>::Owned: Hash + Eq + Default,
+{
+    fn from_iter<I>(iter: I) -> Self
+    where
+        I: IntoIterator<Item = <T as ToOwned>::Owned>,
+    {
+        let iter = iter.into_iter();
+        let (min, _) = iter.size_hint();
+        let mut out = Self::with_capacity(min + 1);
+        for x in iter {
+            out.insert_owned(x);
+        }
+        out
+    }
+}
 
 impl<T> Interner<T>
 where
@@ -138,7 +284,6 @@ where
 impl<T> Interner<T>
 where
     T: ?Sized + ToOwned,
-    <T as ToOwned>::Owned: Hash + Eq,
 {
     /// Retrieve a reference to the stored value for this key.
     pub fn get(&self, index: Interned<T>) -> &T {
@@ -150,6 +295,75 @@ where
             .borrow()
     }
 
+    /// The number of entries stored in the interner.
+    pub fn len(&self) -> usize {
+        self.0.len()
+    }
+
+    /// Whether there are zero stored keys.
+    ///
+    /// This is always false, because we always contain a default key, but clippy complains if we
+    /// don't have it.
+    pub fn is_empty(&self) -> bool {
+        self.0.is_empty()
+    }
+
+    /// An iterator over the [Interned] keys.
+    pub fn keys(&self) -> impl ExactSizeIterator<Item = Interned<T>> + '_ {
+        (0..self.len() as u32).map(|index| Interned {
+            index,
+            _type: PhantomData,
+        })
+    }
+
+    /// An iterator over the stored values.
+    pub fn values(&self) -> impl ExactSizeIterator<Item = &'_ T> + '_ {
+        self.0.iter().map(|x| x.borrow())
+    }
+
+    /// An iterator over pairs of the [Interned] keys and their associated values.
+    pub fn items(&self) -> impl ExactSizeIterator<Item = (Interned<T>, &'_ T)> + '_ {
+        self.0.iter().enumerate().map(|(i, v)| {
+            (
+                Interned {
+                    index: i as u32,
+                    _type: PhantomData,
+                },
+                v.borrow(),
+            )
+        })
+    }
+}
+
+impl<T> Interner<T>
+where
+    T: ?Sized + ToOwned + Hash + Eq,
+{
+    /// Get the [Interned] key corresponding to the given borrowed example, if it has already been
+    /// stored.
+    ///
+    /// This method does not store `value` if it is not present.
+    pub fn try_key(&self, value: &T) -> Option<Interned<T>> {
+        self.0.get_index_of(value).map(|index| Interned {
+            index: index as u32,
+            _type: PhantomData,
+        })
+    }
+
+    /// Return whether this value is already in the [Interner].
+    ///
+    /// Typically you want to use [try_key], which returns the key if present, or [insert], which
+    /// stores the value if it wasn't already present.
+    pub fn contains(&self, value: &T) -> bool {
+        self.try_key(value).is_some()
+    }
+}
+
+impl<T> Interner<T>
+where
+    T: ?Sized + ToOwned,
+    <T as ToOwned>::Owned: Hash + Eq,
+{
     /// Internal worker function that inserts an owned value assuming that the value didn't
     /// previously exist in the map.
     fn insert_new(&mut self, value: <T as ToOwned>::Owned) -> u32 {
@@ -162,15 +376,34 @@ where
         index as u32
     }
 
+    /// Get an interner key corresponding to the given owned type.  If not already stored, the value
+    /// will be used as the key, otherwise it will be dropped.
+    ///
+    /// If you don't already have the owned value, use `insert`; this will only allocate if the
+    /// lookup fails.
+    pub fn insert_owned(&mut self, value: <T as ToOwned>::Owned) -> Interned<T> {
+        let index = match self.0.get_index_of(&value) {
+            Some(index) => index as u32,
+            None => self.insert_new(value),
+        };
+        Interned {
+            index,
+            _type: PhantomData,
+        }
+    }
+}
+
+impl<T> Interner<T>
+where
+    T: ?Sized + ToOwned + Hash + Eq,
+    <T as ToOwned>::Owned: Hash + Eq,
+{
     /// Get an interner key corresponding to the given referenced type.  If not already stored, this
     /// function will allocate a new owned value to use as the storage.
     ///
     /// If you already have an owned value, use `insert_owned`, but in general this function will be
     /// more efficient *unless* you already had the value for other reasons.
-    pub fn insert(&mut self, value: &T) -> Interned<T>
-    where
-        T: Hash + Eq,
-    {
+    pub fn insert(&mut self, value: &T) -> Interned<T> {
         let index = match self.0.get_index_of(value) {
             Some(index) => index as u32,
             None => self.insert_new(value.to_owned()),
@@ -181,26 +414,138 @@ where
         }
     }
 
-    /// Get an interner key corresponding to the given owned type.  If not already stored, the value
-    /// will be used as the key, otherwise it will be dropped.
+    /// Get an interner key corresponding to the given [Cow].
     ///
-    /// If you don't already have the owned value, use `insert`; this will only allocate if the
-    /// lookup fails.
-    pub fn insert_owned(&mut self, value: <T as ToOwned>::Owned) -> Interned<T> {
-        let index = match self.0.get_index_of(&value) {
-            Some(index) => index as u32,
-            None => self.insert_new(value),
-        };
-        Interned {
-            index,
-            _type: PhantomData,
+    /// If not already stored, the value will be used as the key, cloning if required.  If it is
+    /// stored, the value is dropped.
+    #[inline]
+    pub fn insert_cow(&mut self, value: Cow<T>) -> Interned<T> {
+        match value {
+            Cow::Borrowed(value) => self.insert(value),
+            Cow::Owned(value) => self.insert_owned(value),
+        }
+    }
+
+    /// Merge another interner into this one, re-using the output storage for the key mapping.
+    ///
+    /// The output mapping converts [Interned] indices from `other` to their new representations in
+    /// `self`.  Strictly, the interners can be for different types, but in practice it likely makes
+    /// most sense for them to be the same.
+    pub fn merge_map_using<S>(
+        &mut self,
+        other: &Interner<S>,
+        mut map_fn: impl FnMut(&S) -> Option<Cow<T>>,
+        target: &mut InternedMap<S, T>,
+    ) where
+        S: ?Sized + ToOwned + Hash + Eq,
+    {
+        target.map.clear();
+        target.map.reserve(other.0.len());
+        for key in other.0.iter() {
+            target
+                .map
+                .push(map_fn(key.borrow()).map(|cow| self.insert_cow(cow)));
+        }
+    }
+
+    /// Merge another interner into this one.
+    ///
+    /// The output mapping converts [Interned] indices from `other` to their new representations in
+    /// `self`.  Strictly, the interners can be for different types, but in practice it likely makes
+    /// most sense for them to be the same.
+    pub fn merge_map<S>(
+        &mut self,
+        other: &Interner<S>,
+        map_fn: impl FnMut(&S) -> Option<Cow<T>>,
+    ) -> InternedMap<S, T>
+    where
+        S: ?Sized + ToOwned + Hash + Eq,
+    {
+        let mut out = InternedMap::new();
+        self.merge_map_using(other, map_fn, &mut out);
+        out
+    }
+}
+
+impl<T> Interner<[T]>
+where
+    T: Hash + Eq + Clone,
+{
+    /// Merge another interner into this one, re-using the output storage for the key mapping.
+    ///
+    /// The mapping function is for scalar elements of the slice, as opposed to in [merge_map] where
+    /// it is for the entire key at once.  This function makes it easier to avoid allocations when
+    /// mapping slice-based conversions (though if `T` is not [Copy] and you're expecting there to
+    /// be a lot of true insertions during the merge, there is a potential clone inefficiency).
+    ///
+    /// If the `scalar_map_fn` returns `None` for any element of a slice, that entire slice is
+    /// filtered out from the merge.  The subsequent [InternedMap] will panic if the corresponding
+    /// [Interned] key is used as a lookup.
+    pub fn merge_map_slice_using<const N: usize>(
+        &mut self,
+        // Actually, `other` could be [Interner<[S]>], but then you'd need to specify `S` whenever
+        // you want to set `N`, which is just an API annoyance since we'll probably never need the
+        // two interners to be different types.
+        other: &Self,
+        mut scalar_map_fn: impl FnMut(&T) -> Option<T>,
+        target: &mut InternedMap<[T]>,
+    ) {
+        // Workspace for the mapping function. The aim here is that we're working on the stack, so
+        // the mapping doesn't need to make heap allocations.  We could either guess (which the
+        // higher-level `merge_slice_map` does), or force the caller to tell us how much stack space
+        // to allocate.  This method is lower level, so in this case we ask them to tell us; if
+        // they're optimizing to the point of re-using the return allocations, they probably have a
+        // good idea about the maximum slice size of the interner they'll be merging in.
+        let mut work = SmallVec::<[T; N]>::with_capacity(N);
+        target.map.clear();
+        target.map.reserve(other.0.len());
+        for slice in other.0.iter() {
+            let new_slice = 'slice: {
+                work.clear();
+                work.reserve(slice.len());
+                for value in slice {
+                    let Some(scalar) = scalar_map_fn(value) else {
+                        break 'slice None;
+                    };
+                    work.push(scalar);
+                }
+                Some(work.as_slice())
+            };
+            target.map.push(new_slice.map(|slice| self.insert(slice)));
         }
     }
+
+    /// Merge another interner into this one.
+    ///
+    /// If you need to call this many times in a row, see [merge_map_slice_using] for a version that
+    /// can re-use the allocations of the output mapping.
+    ///
+    /// The mapping function is for scalar elements of the slice, as opposed to in [merge_map] where
+    /// it is for the entire key at once.  This function makes it easier to avoid allocations when
+    /// mapping slice-based conversions (though if `T` is not [Copy] and you're expecting there to
+    /// be a lot of true insertions during the merge, there is a potential clone inefficiency).
+    ///
+    /// If the `scalar_map_fn` returns `None` for any element of a slice, that entire slice is
+    /// filtered out from the merge.  The subsequent [InternedMap] will panic if the corresponding
+    /// [Interned] key is used as a lookup.
+    pub fn merge_map_slice(
+        &mut self,
+        other: &Self,
+        scalar_map_fn: impl FnMut(&T) -> Option<T>,
+    ) -> InternedMap<[T]> {
+        let mut out = InternedMap::new();
+        // We're specifying the stack space here.  This is just a guess, but it's not hugely
+        // important; we'll safely spill from the stack to the heap if needed, and this function is
+        // an API convenience at the cost of optimal allocation performance anyway.
+        self.merge_map_slice_using::<4>(other, scalar_map_fn, &mut out);
+        out
+    }
 }
 
 #[cfg(test)]
 mod test {
     use super::*;
+    use hashbrown::{HashMap, HashSet};
 
     #[test]
     fn default_key_exists() {
@@ -215,4 +560,122 @@ mod test {
         assert_eq!(capacity.get_default(), capacity.get_default());
         assert_eq!(capacity.get(capacity.get_default()), "");
     }
+
+    #[test]
+    fn can_merge_two_interners() {
+        let mut base = Interner::<str>::from_iter(["hello", "world"].map(String::from));
+        let other = Interner::<str>::from_iter(["a", "world", "b"].map(String::from));
+
+        fn to_hashmap<T: ?Sized + Hash + Eq + ToOwned>(
+            interner: &Interner<T>,
+        ) -> HashMap<Interned<T>, <T as ToOwned>::Owned> {
+            interner
+                .items()
+                .map(|(key, value)| (key, value.to_owned()))
+                .collect()
+        }
+
+        let initial = to_hashmap(&base);
+        // Sanity check that we start off with the values we expect.
+        let expected = ["", "hello", "world"]
+            .map(String::from)
+            .into_iter()
+            .collect::<HashSet<_>>();
+        assert_eq!(
+            expected,
+            HashSet::from_iter(base.values().map(String::from))
+        );
+
+        let other_items = to_hashmap(&other);
+        let other_map = base.merge_map(&other, |x| Some(x.into()));
+        // All of the keys from the previously stored values must be the same.
+        assert_eq!(
+            initial,
+            initial
+                .iter()
+                .map(|(key, value)| (base.try_key(value).unwrap(), base.get(*key).to_owned()))
+                .collect::<HashMap<_, _>>(),
+        );
+        // All of the keys from the merged-in map should now be present.
+        assert_eq!(
+            other_items,
+            other
+                .keys()
+                .map(|key| (key, base.get(other_map[key]).to_owned()))
+                .collect::<HashMap<_, _>>(),
+        );
+
+        // This interner is of a different type and will produce duplicate keys during the mapping.
+        let nums = Interner::<[u8]>::from_iter([vec![4], vec![1, 5], vec![2, 4], vec![3]]);
+        let map_fn = |x: &[u8]| x.iter().sum::<u8>().to_string();
+        let num_map = base.merge_map(&nums, |x| Some(map_fn(x).into()));
+        // All of the keys from the previously stored values must be the same.
+        assert_eq!(
+            initial,
+            initial
+                .iter()
+                .map(|(key, value)| (base.try_key(value).unwrap(), base.get(*key).to_owned()))
+                .collect::<HashMap<_, _>>(),
+        );
+        // All of the keys from the merged-in map should now be present.
+        assert_eq!(
+            nums.items()
+                .map(|(key, value)| (key, map_fn(value)))
+                .collect::<HashMap<_, _>>(),
+            nums.keys()
+                .map(|key| (key, base.get(num_map[key]).to_owned()))
+                .collect(),
+        );
+    }
+
+    #[test]
+    fn can_merge_two_sliced_interners() {
+        let mut map = InternedMap::<[u8]>::new();
+        let mut base = Interner::<[u8]>::from_iter([
+            vec![],
+            vec![0],
+            vec![1],
+            vec![2],
+            vec![0, 1],
+            vec![1, 2],
+        ]);
+        let only_2q = Interner::<[u8]>::from_iter([vec![0], vec![1], vec![0, 1]]);
+
+        // This is the identity map, so all the values should come out the same.
+        base.merge_map_slice_using::<2>(&only_2q, |x| Some(*x), &mut map);
+        let expected = [vec![], vec![0], vec![1], vec![0, 1]];
+        let (small, big): (Vec<_>, Vec<_>) = expected
+            .iter()
+            .map(|x| {
+                let key = only_2q.try_key(x).unwrap();
+                (only_2q.get(key).to_owned(), base.get(map[key]).to_owned())
+            })
+            .unzip();
+        assert_eq!(small, big);
+
+        // Map qubits [0, 1] to [2, 1].  This involves an insertion.
+        base.merge_map_slice_using::<2>(&only_2q, |x| [2u8, 1].get(*x as usize).copied(), &mut map);
+        let expected = HashSet::<(Vec<u8>, Vec<u8>)>::from([
+            (vec![], vec![]),
+            (vec![0], vec![2]),
+            (vec![1], vec![1]),
+            (vec![0, 1], vec![2, 1]),
+        ]);
+        let actual = map
+            .iter()
+            .map(|(small, big)| (only_2q.get(small).to_owned(), base.get(big).to_owned()))
+            .collect::<HashSet<_>>();
+        assert_eq!(expected, actual);
+        assert_eq!(&[2, 1], base.get(map[only_2q.try_key(&[0, 1]).unwrap()]));
+
+        // Map qubit [0] to [3], and drop things involving 1.
+        base.merge_map_slice_using::<2>(&only_2q, |x| [3u8].get(*x as usize).copied(), &mut map);
+        let expected = HashSet::<(Vec<u8>, Vec<u8>)>::from([(vec![], vec![]), (vec![0], vec![3])]);
+        // For the last test, we'll also use the `into_iter` method.
+        let actual = map
+            .into_iter()
+            .map(|(small, big)| (only_2q.get(small).to_owned(), base.get(big).to_owned()))
+            .collect::<HashSet<_>>();
+        assert_eq!(expected, actual);
+    }
 }