diff --git a/yara-x/src/modules/dotnet/parser.rs b/yara-x/src/modules/dotnet/parser.rs index 70362d7f9..2796d22b1 100644 --- a/yara-x/src/modules/dotnet/parser.rs +++ b/yara-x/src/modules/dotnet/parser.rs @@ -9,7 +9,7 @@ use nom::branch::alt; use nom::bytes::complete::{take, take_till}; use nom::combinator::{cond, map, map_opt, map_parser, map_res, verify}; use nom::error::ErrorKind; -use nom::multi::{count, length_count, length_data, many_m_n}; +use nom::multi::{count, length_count, length_data, many0, many_m_n}; use nom::number::complete::{le_u16, le_u32, le_u64, u8}; use nom::sequence::tuple; use nom::{bits, IResult, Parser}; @@ -79,6 +79,8 @@ pub struct Dotnet<'a> { guids: OnceCell>>, /// User types. user_types: OnceCell>>, + /// All strings in the `#US` stream. + user_strings: OnceCell>, /// Modules table. modules: Vec>, /// TypeRef table. @@ -214,6 +216,13 @@ impl<'a> Dotnet<'a> { .iter() } + pub fn get_user_strings(&self) -> impl Iterator { + self.user_strings + .get_or_init(|| self.parse_user_strings()) + .as_slice() + .iter() + } + pub fn get_string_constants(&self) -> impl Iterator { self.constants.iter().filter_map(|c| { if c.type_ == Type::String { @@ -718,6 +727,41 @@ impl<'a> Dotnet<'a> { Ok((remainder, ())) } + /// Parses the `#US` stream, and returns all the string contained + /// in it. + fn parse_user_strings(&self) -> Vec<&'a [u8]> { + let strings = if let Some(us_stream) = + self.us_stream.and_then(|index| self.get_stream(index)) + { + // The `#US` stream is composed of a series of varints followed by + // the number of bytes indicated by the varint. + many0(length_data(varint))(us_stream) + .map(|(_, strings)| strings) + .ok() + } else { + None + }; + + let mut strings = match strings { + Some(strings) => strings, + None => return vec![], + }; + + // Retain only the strings with length >= 3. All non-empty strings have + // at least 3 bytes because strings are UTF-16 (2 bytes per character) + // plus an extra byte that can be 0x00 or 0x01, and indicates whether + // any of the UTF-16 characters have a non-zero bit in the top byte. + // This is described in ECMA-335 II.24.2.4. + strings.retain(|s| s.len() >= 3); + + // Discard the extra byte from all strings. + for string in strings.iter_mut() { + *string = &string[0..string.len() - 1]; + } + + strings + } + fn parse_user_types(&self) -> Vec> { let mut classes = Vec::new(); for (idx, type_def) in self.type_defs.iter().enumerate() { @@ -2540,9 +2584,16 @@ impl From> for protos::dotnet::Dotnet { .constants .extend(dotnet.get_string_constants().map(|c| c.to_vec())); + result + .user_strings + .extend(dotnet.get_user_strings().map(|c| c.to_vec())); + result.set_number_of_streams(result.streams.len().try_into().unwrap()); result.set_number_of_guids(result.guids.len().try_into().unwrap()); result.set_number_of_classes(result.classes.len().try_into().unwrap()); + result.set_number_of_user_strings( + result.user_strings.len().try_into().unwrap(), + ); result.set_number_of_assembly_refs( result.assembly_refs.len().try_into().unwrap(), diff --git a/yara-x/src/modules/dotnet/tests/testdata/605ebe5b5f4b94e1a73e0ad1162bd542e5cb948d1a4ea5a575a14d6b9d6ee849.out b/yara-x/src/modules/dotnet/tests/testdata/605ebe5b5f4b94e1a73e0ad1162bd542e5cb948d1a4ea5a575a14d6b9d6ee849.out index 90dad4490..f5a3d82f1 100644 --- a/yara-x/src/modules/dotnet/tests/testdata/605ebe5b5f4b94e1a73e0ad1162bd542e5cb948d1a4ea5a575a14d6b9d6ee849.out +++ b/yara-x/src/modules/dotnet/tests/testdata/605ebe5b5f4b94e1a73e0ad1162bd542e5cb948d1a4ea5a575a14d6b9d6ee849.out @@ -6,6 +6,7 @@ number_of_guids: 1 number_of_resources: 0 number_of_classes: 24 number_of_assembly_refs: 1 +number_of_user_strings: 1 number_of_constants: 0 streams: - name: "#~" @@ -4796,4 +4797,6 @@ classes: virtual: true final: false return_type: "void" - number_of_parameters: 0 \ No newline at end of file + number_of_parameters: 0 +user_strings: + - " \000" \ No newline at end of file diff --git a/yara-x/src/modules/dotnet/tests/testdata/86a1e48cfc843eabfe1b468ef9358c1068950f849c612ab808225b359db0bb8c.out b/yara-x/src/modules/dotnet/tests/testdata/86a1e48cfc843eabfe1b468ef9358c1068950f849c612ab808225b359db0bb8c.out index c9733e3d9..acfb48f3c 100644 --- a/yara-x/src/modules/dotnet/tests/testdata/86a1e48cfc843eabfe1b468ef9358c1068950f849c612ab808225b359db0bb8c.out +++ b/yara-x/src/modules/dotnet/tests/testdata/86a1e48cfc843eabfe1b468ef9358c1068950f849c612ab808225b359db0bb8c.out @@ -6,6 +6,7 @@ number_of_guids: 1 number_of_resources: 0 number_of_classes: 12 number_of_assembly_refs: 1 +number_of_user_strings: 0 number_of_constants: 40 streams: - name: "#~" diff --git a/yara-x/src/modules/dotnet/tests/testdata/984750efd1cb94e5ca7b366863af2092af954dad65df534bff603b9afcb49cd4.out b/yara-x/src/modules/dotnet/tests/testdata/984750efd1cb94e5ca7b366863af2092af954dad65df534bff603b9afcb49cd4.out index d7bf24265..76e5337a8 100644 --- a/yara-x/src/modules/dotnet/tests/testdata/984750efd1cb94e5ca7b366863af2092af954dad65df534bff603b9afcb49cd4.out +++ b/yara-x/src/modules/dotnet/tests/testdata/984750efd1cb94e5ca7b366863af2092af954dad65df534bff603b9afcb49cd4.out @@ -6,6 +6,7 @@ number_of_guids: 1 number_of_resources: 1 number_of_classes: 31 number_of_assembly_refs: 2 +number_of_user_strings: 45 number_of_constants: 0 streams: - name: "#~" @@ -1834,4 +1835,50 @@ classes: virtual: false final: false return_type: "System.Net.Sockets.SocketException" - number_of_parameters: 0 \ No newline at end of file + number_of_parameters: 0 +user_strings: + - "A\000n\000 \000u\000n\000i\000n\000i\000t\000i\000a\000l\000i\000z\000e\000d\000,\000 \000o\000r\000 \000\'\000d\000e\000f\000a\000u\000l\000t\000\'\000,\000 \000V\000a\000l\000u\000e\000S\000t\000o\000p\000w\000a\000t\000c\000h\000 \000c\000a\000n\000n\000o\000t\000 \000b\000e\000 \000u\000s\000e\000d\000 \000t\000o\000 \000g\000e\000t\000 \000e\000l\000a\000p\000s\000e\000d\000 \000t\000i\000m\000e\000.\000" + - ",\000 \000" + - "n\000e\000t\000_\000t\000o\000o\000l\000o\000n\000g\000" + - "n\000e\000t\000_\000i\000n\000v\000a\000l\000i\000d\000_\000i\000p\000_\000a\000d\000d\000r\000" + - "S\000y\000s\000t\000e\000m\000.\000R\000e\000s\000o\000u\000r\000c\000e\000s\000.\000U\000s\000e\000S\000y\000s\000t\000e\000m\000R\000e\000s\000o\000u\000r\000c\000e\000K\000e\000y\000s\000" + - "G\000e\000t\000H\000o\000s\000t\000N\000a\000m\000e\000" + - "h\000o\000s\000t\000N\000a\000m\000e\000O\000r\000A\000d\000d\000r\000e\000s\000s\000" + - "I\000n\000v\000a\000l\000i\000d\000 \000a\000d\000d\000r\000e\000s\000s\000 \000\'\000{\0000\000}\000\'\000" + - "G\000e\000t\000H\000o\000s\000t\000A\000d\000d\000r\000e\000s\000s\000e\000s\000" + - "{\0000\000}\000 \000D\000N\000S\000 \000l\000o\000o\000k\000u\000p\000 \000f\000a\000i\000l\000e\000d\000 \000w\000i\000t\000h\000 \000{\0001\000}\000" + - "G\000e\000t\000H\000o\000s\000t\000E\000n\000t\000r\000y\000O\000r\000A\000d\000d\000r\000e\000s\000s\000e\000s\000C\000o\000r\000e\000" + - "f\000o\000r\000w\000a\000r\000d\000 \000l\000o\000o\000k\000u\000p\000 \000f\000o\000r\000 \000\'\000{\0000\000}\000\'\000 \000f\000a\000i\000l\000e\000d\000 \000w\000i\000t\000h\000 \000{\0001\000}\000" + - "h\000o\000s\000t\000N\000a\000m\000e\000" + - "G\000e\000t\000H\000o\000s\000t\000E\000n\000t\000r\000y\000O\000r\000A\000d\000d\000r\000e\000s\000s\000e\000s\000C\000o\000r\000e\000A\000s\000y\000n\000c\000" + - "{\0000\000}\000 \000w\000i\000t\000h\000 \000{\0001\000}\000 \000e\000n\000t\000r\000i\000e\000s\000" + - "G\000e\000t\000H\000o\000s\000t\000E\000n\000t\000r\000y\000A\000s\000y\000n\000c\000" + - "{\0000\000}\000 \000D\000N\000S\000 \000l\000o\000o\000k\000u\000p\000 \000w\000a\000s\000 \000c\000a\000n\000c\000e\000l\000e\000d\000" + - "(\000?\000)\000" + - "(\000n\000u\000l\000l\000)\000" + - "#\000" + - "[\000" + - "]\000" + - "(\000" + - ")\000" + - ":\000" + - "(\0000\000x\000" + - "X\000" + - "0\000x\000" + - "d\000n\000s\000-\000l\000o\000o\000k\000u\000p\000s\000-\000r\000e\000q\000u\000e\000s\000t\000e\000d\000" + - "D\000N\000S\000 \000L\000o\000o\000k\000u\000p\000s\000 \000R\000e\000q\000u\000e\000s\000t\000e\000d\000" + - "c\000u\000r\000r\000e\000n\000t\000-\000d\000n\000s\000-\000l\000o\000o\000k\000u\000p\000s\000" + - "C\000u\000r\000r\000e\000n\000t\000 \000D\000N\000S\000 \000L\000o\000o\000k\000u\000p\000s\000" + - "d\000n\000s\000-\000l\000o\000o\000k\000u\000p\000s\000-\000d\000u\000r\000a\000t\000i\000o\000n\000" + - "A\000v\000e\000r\000a\000g\000e\000 \000D\000N\000S\000 \000L\000o\000o\000k\000u\000p\000 \000D\000u\000r\000a\000t\000i\000o\000n\000" + - "m\000s\000" + - "S\000y\000s\000t\000e\000m\000.\000N\000e\000t\000.\000D\000i\000s\000a\000b\000l\000e\000I\000P\000v\0006\000" + - "D\000O\000T\000N\000E\000T\000_\000S\000Y\000S\000T\000E\000M\000_\000N\000E\000T\000_\000D\000I\000S\000A\000B\000L\000E\000I\000P\000V\0006\000" + - "1\000" + - "t\000r\000u\000e\000" + - "G\000e\000t\000H\000o\000s\000t\000N\000a\000m\000e\000 \000f\000a\000i\000l\000e\000d\000 \000w\000i\000t\000h\000 \000{\0000\000}\000" + - "N\000O\000N\000N\000U\000L\000L\000S\000E\000N\000T\000I\000N\000E\000L\000" + - "w\000s\0002\000_\0003\0002\000.\000d\000l\000l\000" + - "G\000e\000t\000A\000d\000d\000r\000I\000n\000f\000o\000E\000x\000C\000a\000n\000c\000e\000l\000" + - "G\000e\000t\000A\000d\000d\000r\000I\000n\000f\000o\000E\000x\000C\000a\000n\000c\000e\000l\000 \000r\000e\000t\000u\000r\000n\000e\000d\000 \000e\000r\000r\000o\000r\000 \000{\0000\000}\000" + - "R\000e\000g\000i\000s\000t\000e\000r\000F\000o\000r\000C\000a\000n\000c\000e\000l\000l\000a\000t\000i\000o\000n\000" \ No newline at end of file diff --git a/yara-x/src/modules/protos/dotnet.proto b/yara-x/src/modules/protos/dotnet.proto index 35830d1ed..a7e18ede8 100644 --- a/yara-x/src/modules/protos/dotnet.proto +++ b/yara-x/src/modules/protos/dotnet.proto @@ -34,6 +34,7 @@ message Dotnet { repeated Resource resources = 21; repeated Class classes = 22; repeated uint32 field_offsets = 23; + repeated bytes user_strings = 24; } message Assembly {