diff --git a/stuffer/s2n_stuffer.h b/stuffer/s2n_stuffer.h index 8faff14ef13..abf294cce80 100644 --- a/stuffer/s2n_stuffer.h +++ b/stuffer/s2n_stuffer.h @@ -164,6 +164,7 @@ int s2n_stuffer_copy(struct s2n_stuffer *from, struct s2n_stuffer *to, uint32_t * "FF" == 255 or [ 0xff ] * "0001" == 1 or [ 0x00, 0x01 ] */ +S2N_RESULT s2n_hex_digit(uint8_t half_byte, uint8_t *hex_digit); S2N_RESULT s2n_stuffer_read_hex(struct s2n_stuffer *hex_in, const struct s2n_blob *bytes_out); S2N_RESULT s2n_stuffer_write_hex(struct s2n_stuffer *hex_out, const struct s2n_blob *bytes_in); S2N_RESULT s2n_stuffer_read_uint8_hex(struct s2n_stuffer *stuffer, uint8_t *u); diff --git a/stuffer/s2n_stuffer_hex.c b/stuffer/s2n_stuffer_hex.c index 8faf1245abf..c5ef562e943 100644 --- a/stuffer/s2n_stuffer_hex.c +++ b/stuffer/s2n_stuffer_hex.c @@ -30,6 +30,14 @@ static const uint8_t hex_to_value[] = { /* clang-format on */ }; +S2N_RESULT s2n_hex_digit(uint8_t half_byte, uint8_t *hex_digit) +{ + RESULT_ENSURE_REF(hex_digit); + RESULT_ENSURE(half_byte < s2n_array_len(value_to_hex), S2N_ERR_BAD_HEX); + *hex_digit = value_to_hex[half_byte]; + return S2N_RESULT_OK; +} + static S2N_RESULT s2n_stuffer_hex_digit_from_char(uint8_t c, uint8_t *i) { RESULT_ENSURE(c < s2n_array_len(hex_to_value), S2N_ERR_BAD_HEX); diff --git a/tests/pcap/build.rs b/tests/pcap/build.rs index 58fbefeaa61..a849d6770f2 100644 --- a/tests/pcap/build.rs +++ b/tests/pcap/build.rs @@ -28,10 +28,7 @@ fn get_download_urls() -> HashMap { "latest.pcapng", "macos_tcp_flags.pcap", "tls-alpn-h2.pcap", - // TODO: non-ascii alpn handling currently differs between implementations, - // with no official consensus. Wireshark chose different handling than s2n-tls did. - // See https://github.com/FoxIO-LLC/ja4/pull/147 - // "tls-non-ascii-alpn.pcapng", + "tls-non-ascii-alpn.pcapng", "tls12.pcap", "tls3.pcapng", ]; diff --git a/tests/pcap/data/alpns.pcap b/tests/pcap/data/alpns.pcap new file mode 100644 index 00000000000..dd9245d7fed Binary files /dev/null and b/tests/pcap/data/alpns.pcap differ diff --git a/tests/pcap/tests/s2n_client_hellos.rs b/tests/pcap/tests/s2n_client_hellos.rs index 8e96c575c6b..d814ffb4ba0 100644 --- a/tests/pcap/tests/s2n_client_hellos.rs +++ b/tests/pcap/tests/s2n_client_hellos.rs @@ -71,18 +71,33 @@ fn ja4_fingerprints() -> Result<()> { test_all_client_hellos(|pcap_hello, s2n_hello| { let mut fingerprint = builder.build(&s2n_hello)?; - let s2n_ja4_hash = fingerprint + let s2n_hash = fingerprint .hash() .context("s2n failed to calculate ja4 hash")? .to_owned(); - - let s2n_ja4_str = fingerprint + let s2n_str = fingerprint .raw() .context("s2n failed to calculate ja4 string")? .to_owned(); - assert_eq!(pcap_hello.ja4_hash(), Some(s2n_ja4_hash)); - assert_eq!(pcap_hello.ja4_string(), Some(s2n_ja4_str)); + let mut tshark_hash = pcap_hello + .ja4_hash() + .expect("pcap did not contain ja4 hash"); + let mut tshark_str = pcap_hello + .ja4_string() + .expect("pcap did not contain ja4 string"); + + // Handle known issues: + // tshark currently doesn't handle special alpn characters correctly + // TODO: remove this when tshark updates + let exceptions = [("-+", "2b"), ("__", "5f")]; + for (a, b) in exceptions { + tshark_hash = tshark_hash.replace(a, b); + tshark_str = tshark_str.replace(a, b); + } + + assert_eq!(tshark_str, s2n_str); + assert_eq!(tshark_hash, s2n_hash); Ok(()) }) } diff --git a/tests/unit/s2n_fingerprint_ja4_test.c b/tests/unit/s2n_fingerprint_ja4_test.c index ce7b064f333..cdac483c9bf 100644 --- a/tests/unit/s2n_fingerprint_ja4_test.c +++ b/tests/unit/s2n_fingerprint_ja4_test.c @@ -731,32 +731,62 @@ int main(int argc, char **argv) EXPECT_TRUE(output_size > S2N_JA4_A_ALPN_LAST); EXPECT_EQUAL(output[S2N_JA4_A_ALPN_FIRST], 'q'); - EXPECT_EQUAL(output[S2N_JA4_A_ALPN_LAST], '0'); + EXPECT_EQUAL(output[S2N_JA4_A_ALPN_LAST], 'q'); }; - /* Test non-ascii alpn value */ + /* Test non-ascii alpn values + * + * The spec does not currently define this case, but will be updated in the + * future according to https://github.com/FoxIO-LLC/ja4/issues/148 + */ { + struct { + const uint8_t bytes[4]; + const size_t bytes_size; + const char *str; + } test_cases[] = { + { .bytes = { 0xAB }, .str = "ab", .bytes_size = 1 }, + { .bytes = { 0xAB, 0xCD }, .str = "ad", .bytes_size = 2 }, + { .bytes = { 0x30, 0xAB }, .str = "3b", .bytes_size = 2 }, + { .bytes = { 0x30, 0x31, 0xAB, 0xCD }, .str = "3d", .bytes_size = 4 }, + { .bytes = { 0x30, 0xAB, 0xCD, 0x31 }, .str = "01", .bytes_size = 4 }, + }; + S2N_INIT_CLIENT_HELLO(client_hello_bytes, S2N_TEST_CLIENT_HELLO_VERSION, S2N_TEST_CLIENT_HELLO_AFTER_VERSION, S2N_TEST_CLIENT_HELLO_CIPHERS, S2N_TEST_CLIENT_HELLO_AFTER_CIPHERS, /* extensions size */ - 0x00, 9, + 0x00, 11, /* extension: alpn */ - 0x00, TLS_EXTENSION_ALPN, 0x00, 5, - 0x00, 3, - 2, UINT8_MAX, 128); + 0x00, TLS_EXTENSION_ALPN, 0x00, 7, + 0x00, 5, + 0, 0, 0, 0, 0); + + /* We allocated enough space in the above client hello for + * a 1-byte length and a 4-byte alpn */ + EXPECT_EQUAL(sizeof(test_cases[0].bytes), 4); + const size_t alpn_mem_size = sizeof(test_cases[0].bytes); + const size_t offset = sizeof(client_hello_bytes) - alpn_mem_size; + uint8_t *const bytes_ptr = client_hello_bytes + offset; + uint8_t *const length_ptr = bytes_ptr - 1; + + for (size_t i = 0; i < s2n_array_len(test_cases); i++) { + *length_ptr = test_cases[i].bytes_size; + EXPECT_MEMCPY_SUCCESS(bytes_ptr, + test_cases[i].bytes, sizeof(test_cases[i].bytes)); - uint8_t output[S2N_TEST_OUTPUT_SIZE] = { 0 }; - uint32_t output_size = 0; - EXPECT_OK(s2n_test_ja4_hash_from_bytes( - client_hello_bytes, sizeof(client_hello_bytes), - sizeof(output), output, &output_size)); + uint8_t output[S2N_TEST_OUTPUT_SIZE] = { 0 }; + uint32_t output_size = 0; + EXPECT_OK(s2n_test_ja4_hash_from_bytes( + client_hello_bytes, sizeof(client_hello_bytes), + sizeof(output), output, &output_size)); - EXPECT_TRUE(output_size > S2N_JA4_A_ALPN_LAST); - EXPECT_EQUAL(output[S2N_JA4_A_ALPN_FIRST], '9'); - EXPECT_EQUAL(output[S2N_JA4_A_ALPN_LAST], '9'); + EXPECT_TRUE(output_size > S2N_JA4_A_ALPN_LAST); + EXPECT_EQUAL(output[S2N_JA4_A_ALPN_FIRST], test_cases[i].str[0]); + EXPECT_EQUAL(output[S2N_JA4_A_ALPN_LAST], test_cases[i].str[1]); + } }; /* Test no ALPN diff --git a/tls/s2n_fingerprint_ja4.c b/tls/s2n_fingerprint_ja4.c index 606caffe6a0..38bc03cc2c5 100644 --- a/tls/s2n_fingerprint_ja4.c +++ b/tls/s2n_fingerprint_ja4.c @@ -13,6 +13,8 @@ * permissions and limitations under the License. */ +#include + #include "crypto/s2n_hash.h" #include "stuffer/s2n_stuffer.h" #include "tls/extensions/s2n_client_supported_versions.h" @@ -49,8 +51,6 @@ #define S2N_JA4_IANA_ENTRY_SIZE (S2N_JA4_IANA_HEX_SIZE + 1) #define S2N_JA4_WORKSPACE_SIZE ((S2N_JA4_LIST_LIMIT * (S2N_JA4_IANA_ENTRY_SIZE))) -#define S2N_ASCII_MAX 127 - const char *s2n_ja4_version_strings[] = { /** *= https://raw.githubusercontent.com/FoxIO-LLC/ja4/v0.18.2/technical_details/JA4.md#tls-version @@ -240,22 +240,20 @@ static S2N_RESULT s2n_fingerprint_ja4_alpn(struct s2n_stuffer *output, *= https://raw.githubusercontent.com/FoxIO-LLC/ja4/v0.18.2/technical_details/JA4.md#alpn-extension-value *# If there are no ALPN values or no ALPN extension then we print “00” *# as the value in the fingerprint. - * - * The spec doesn't define what to do with an 1-byte ALPNs. There also currently - * aren't any valid 1-byte ALPNs, and it seems unlikely one will be added in - * the future. But just in case, we match the behavior of the reference - * implementations and write a single '0' for any missing characters: - * - https://github.com/FoxIO-LLC/ja4/blob/main/rust/ja4/src/tls.rs#L455-L459 - * - https://github.com/FoxIO-LLC/ja4/blob/main/python/ja4.py#L187-L194 */ - uint8_t first_char = (protocol.size > 0) ? protocol.data[0] : '0'; - uint8_t last_char = (protocol.size > 1) ? protocol.data[protocol.size - 1] : '0'; + uint8_t first_char = '0', last_char = '0'; + if (protocol.size > 0) { + first_char = protocol.data[0]; + last_char = protocol.data[protocol.size - 1]; + } - /* The reference implementations also replaces non-ascii characters with '9', - * although the spec does not document this behavior either. + /* The spec does not currently define this case, but will be updated in the + * future according to https://github.com/FoxIO-LLC/ja4/issues/148 */ - first_char = (first_char > S2N_ASCII_MAX) ? '9' : first_char; - last_char = (last_char > S2N_ASCII_MAX) ? '9' : last_char; + if (!isalnum(first_char) || !isalnum(last_char)) { + RESULT_GUARD(s2n_hex_digit((first_char >> 4), &first_char)); + RESULT_GUARD(s2n_hex_digit((last_char & 0x0F), &last_char)); + } RESULT_GUARD_POSIX(s2n_stuffer_write_char(output, first_char)); RESULT_GUARD_POSIX(s2n_stuffer_write_char(output, last_char));