diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md
index 53e2c419ea..2e14f6dc02 100644
--- a/.github/CHANGELOG.md
+++ b/.github/CHANGELOG.md
@@ -98,6 +98,9 @@
 
 ### Bug fixes
 
+* Bug fix for `applyMatrix` in `lightning.tensor`. Matrix operator data is not stored in the `cuGateCache` object to support `TensorProd` obs with multiple `Hermitian` obs.
+  [(#932)](https://github.com/PennyLaneAI/pennylane-lightning/pull/932)
+
 * Bug fix for `_pauli_word` of `QuantumScriptSerializer`. `_pauli_word` can process `PauliWord` object: `I`.
   [(#919)](https://github.com/PennyLaneAI/pennylane-lightning/pull/919)
 
diff --git a/pennylane_lightning/core/_version.py b/pennylane_lightning/core/_version.py
index 9b3cc2f16a..eb1de07d4e 100644
--- a/pennylane_lightning/core/_version.py
+++ b/pennylane_lightning/core/_version.py
@@ -16,4 +16,4 @@
    Version number (major.minor.patch[-label])
 """
 
-__version__ = "0.39.0-dev36"
+__version__ = "0.39.0-dev37"
diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaMPI.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaMPI.hpp
index a98b51df60..538dd868c4 100644
--- a/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaMPI.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaMPI.hpp
@@ -440,6 +440,19 @@ class StateVectorCudaMPI final
                     cuGates::getRot<CFP_t>(params[0], params[1], params[2]);
                 applyDeviceMatrixGate(rot_matrix.data(), ctrls, tgts, false);
             }
+        } else if (opName == "Matrix") {
+            DataBuffer<CFP_t, int> d_matrix{
+                gate_matrix.size(), BaseType::getDataBuffer().getDevTag(),
+                true};
+            d_matrix.CopyHostDataToGpu(gate_matrix.data(), d_matrix.getLength(),
+                                       false);
+            // ensure wire indexing correctly preserved for tensor-observables
+            const std::vector<std::size_t> ctrls_local{ctrls.rbegin(),
+                                                       ctrls.rend()};
+            const std::vector<std::size_t> tgts_local{tgts.rbegin(),
+                                                      tgts.rend()};
+            applyDeviceMatrixGate(d_matrix.getData(), ctrls_local, tgts_local,
+                                  adjoint);
         } else if (par_gates_.find(opName) != par_gates_.end()) {
             par_gates_.at(opName)(wires, adjoint, params);
         } else { // No offloadable function call; defer to matrix passing
@@ -519,7 +532,7 @@ class StateVectorCudaMPI final
                      const std::vector<std::size_t> &wires,
                      bool adjoint = false) {
         PL_ABORT_IF(wires.empty(), "Number of wires must be larger than 0");
-        const std::string opName = {};
+        const std::string opName = "Matrix";
         std::size_t n = std::size_t{1} << wires.size();
         const std::vector<std::complex<PrecisionT>> matrix(gate_matrix,
                                                            gate_matrix + n * n);
diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaManaged.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaManaged.hpp
index b33c16471e..f1efc77130 100644
--- a/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaManaged.hpp
+++ b/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaManaged.hpp
@@ -360,6 +360,19 @@ class StateVectorCudaManaged
                     cuGates::getRot<CFP_t>(params[0], params[1], params[2]);
                 applyDeviceMatrixGate(rot_matrix.data(), ctrls, tgts, false);
             }
+        } else if (opName == "Matrix") {
+            DataBuffer<CFP_t, int> d_matrix{
+                gate_matrix.size(), BaseType::getDataBuffer().getDevTag(),
+                true};
+            d_matrix.CopyHostDataToGpu(gate_matrix.data(), d_matrix.getLength(),
+                                       false);
+            // ensure wire indexing correctly preserved for tensor-observables
+            const std::vector<std::size_t> ctrls_local{ctrls.rbegin(),
+                                                       ctrls.rend()};
+            const std::vector<std::size_t> tgts_local{tgts.rbegin(),
+                                                      tgts.rend()};
+            applyDeviceMatrixGate(d_matrix.getData(), ctrls_local, tgts_local,
+                                  adjoint);
         } else if (par_gates_.find(opName) != par_gates_.end()) {
             par_gates_.at(opName)(wires, adjoint, params);
         } else { // No offloadable function call; defer to matrix passing
@@ -439,7 +452,7 @@ class StateVectorCudaManaged
                      const std::vector<std::size_t> &wires,
                      bool adjoint = false) {
         PL_ABORT_IF(wires.empty(), "Number of wires must be larger than 0");
-        const std::string opName = {};
+        const std::string opName = "Matrix";
         std::size_t n = std::size_t{1} << wires.size();
         const std::vector<std::complex<PrecisionT>> matrix(gate_matrix,
                                                            gate_matrix + n * n);