From 23fb07d4a2fc3ac0ede27f78cb7895c35bfcbf6e Mon Sep 17 00:00:00 2001
From: Cameron Gutman <aicommander@gmail.com>
Date: Sun, 14 Jan 2024 18:12:40 -0600
Subject: [PATCH] Implement control stream v2 encryption

---
 src/rtsp.cpp   | 18 ++++++++++++-----
 src/stream.cpp | 52 ++++++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 57 insertions(+), 13 deletions(-)

diff --git a/src/rtsp.cpp b/src/rtsp.cpp
index 35300e6e3fc..14123b3df9f 100644
--- a/src/rtsp.cpp
+++ b/src/rtsp.cpp
@@ -518,14 +518,22 @@ namespace rtsp_stream {
     std::stringstream ss;
 
     // Tell the client about our supported features
-    ss << "a=x-ss-general.featureFlags: " << (uint32_t) platf::get_capabilities() << std::endl;
+    ss << "a=x-ss-general.featureFlags:" << (uint32_t) platf::get_capabilities() << std::endl;
 
-    if (video::active_hevc_mode != 1) {
-      ss << "sprop-parameter-sets=AAAAAU"sv << std::endl;
-    }
+    // Always request new control stream encryption if the client supports it
+    uint32_t encryption_flags_supported = SS_ENC_CONTROL_V2 | SS_ENC_AUDIO;
+    uint32_t encryption_flags_requested = SS_ENC_CONTROL_V2;
+
+    // Report supported and required encryption flags
+    ss << "a=x-ss-general.encryptionSupported:" << encryption_flags_supported << std::endl;
+    ss << "a=x-ss-general.encryptionRequested:" << encryption_flags_requested << std::endl;
 
     if (video::last_encoder_probe_supported_ref_frames_invalidation) {
-      ss << "x-nv-video[0].refPicInvalidation=1"sv << std::endl;
+      ss << "a=x-nv-video[0].refPicInvalidation:1"sv << std::endl;
+    }
+
+    if (video::active_hevc_mode != 1) {
+      ss << "sprop-parameter-sets=AAAAAU"sv << std::endl;
     }
 
     if (video::active_av1_mode != 1) {
diff --git a/src/stream.cpp b/src/stream.cpp
index ce5a25a112f..e57d5996456 100644
--- a/src/stream.cpp
+++ b/src/stream.cpp
@@ -381,11 +381,11 @@ namespace stream {
       crypto::cipher::gcm_t cipher;
       crypto::aes_t legacy_input_enc_iv;  // Only used when the client doesn't support full control stream encryption
 
-      uint32_t connect_data;  // Used for new clients with ML_FF_SESSION_ID_V1
+      std::uint32_t connect_data;  // Used for new clients with ML_FF_SESSION_ID_V1
       std::string expected_peer_address;  // Only used for legacy clients without ML_FF_SESSION_ID_V1
 
       net::peer_t peer;
-      std::uint8_t seq;
+      std::uint32_t seq;
 
       platf::feedback_queue_t feedback_queue;
       safe::mail_raw_t::event_t<video::hdr_info_t> hdr_queue;
@@ -414,9 +414,29 @@ namespace stream {
       return plaintext;
     }
 
-    crypto::aes_t iv(16);
     auto seq = session->control.seq++;
-    iv[0] = seq;
+
+    crypto::aes_t iv;
+    if (session->config.encryptionFlagsEnabled & SS_ENC_CONTROL_V2) {
+      // We use the deterministic IV construction algorithm specified in NIST SP 800-38D
+      // Section 8.2.1. The sequence number is our "invocation" field and the 'CH' in the
+      // high bytes is the "fixed" field. Because each client provides their own unique
+      // key, our values in the fixed field need only uniquely identify each independent
+      // use of the client's key with AES-GCM in our code.
+      //
+      // The sequence number is 32 bits long which allows for 2^32 control stream messages
+      // to be sent to each client before the IV repeats.
+      iv.resize(12);
+      std::copy_n((uint8_t *) &seq, sizeof(seq), std::begin(iv));
+      iv[10] = 'H';  // Host originated
+      iv[11] = 'C';  // Control stream
+    }
+    else {
+      // Nvidia's old style encryption uses a 16-byte IV
+      iv.resize(16);
+
+      iv[0] = (std::uint8_t) seq;
+    }
 
     auto packet = (control_encrypted_p) tagged_cipher.data();
 
@@ -915,11 +935,27 @@ namespace stream {
       std::string_view tagged_cipher { (char *) header->payload(), (size_t) tagged_cipher_length };
 
       auto &cipher = session->control.cipher;
-      crypto::aes_t iv(16);
-      iv[0] = (std::uint8_t) seq;
+      crypto::aes_t iv;
+      if (session->config.encryptionFlagsEnabled & SS_ENC_CONTROL_V2) {
+        // We use the deterministic IV construction algorithm specified in NIST SP 800-38D
+        // Section 8.2.1. The sequence number is our "invocation" field and the 'CC' in the
+        // high bytes is the "fixed" field. Because each client provides their own unique
+        // key, our values in the fixed field need only uniquely identify each independent
+        // use of the client's key with AES-GCM in our code.
+        //
+        // The sequence number is 32 bits long which allows for 2^32 control stream messages
+        // to be received from each client before the IV repeats.
+        iv.resize(12);
+        std::copy_n((uint8_t *) &seq, sizeof(seq), std::begin(iv));
+        iv[10] = 'C';  // Client originated
+        iv[11] = 'C';  // Control stream
+      }
+      else {
+        // Nvidia's old style encryption uses a 16-byte IV
+        iv.resize(16);
 
-      // update control sequence
-      ++session->control.seq;
+        iv[0] = (std::uint8_t) seq;
+      }
 
       std::vector<uint8_t> plaintext;
       if (cipher.decrypt(tagged_cipher, plaintext, &iv)) {