Skip to content

Commit

Permalink
feat: add user_strings field to dotnet module
Browse files Browse the repository at this point in the history
  • Loading branch information
plusvic committed Dec 19, 2023
1 parent d698c3a commit ca7f6cb
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 3 deletions.
53 changes: 52 additions & 1 deletion yara-x/src/modules/dotnet/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use nom::branch::alt;
use nom::bytes::complete::{take, take_till};
use nom::combinator::{cond, map, map_opt, map_parser, map_res, verify};
use nom::error::ErrorKind;
use nom::multi::{count, length_count, length_data, many_m_n};
use nom::multi::{count, length_count, length_data, many0, many_m_n};
use nom::number::complete::{le_u16, le_u32, le_u64, u8};
use nom::sequence::tuple;
use nom::{bits, IResult, Parser};
Expand Down Expand Up @@ -79,6 +79,8 @@ pub struct Dotnet<'a> {
guids: OnceCell<Option<Vec<Uuid>>>,
/// User types.
user_types: OnceCell<Vec<Class<'a>>>,
/// All strings in the `#US` stream.
user_strings: OnceCell<Vec<&'a [u8]>>,
/// Modules table.
modules: Vec<Option<&'a str>>,
/// TypeRef table.
Expand Down Expand Up @@ -214,6 +216,13 @@ impl<'a> Dotnet<'a> {
.iter()
}

pub fn get_user_strings(&self) -> impl Iterator<Item = &&[u8]> {
self.user_strings
.get_or_init(|| self.parse_user_strings())
.as_slice()
.iter()
}

pub fn get_string_constants(&self) -> impl Iterator<Item = &[u8]> {
self.constants.iter().filter_map(|c| {
if c.type_ == Type::String {
Expand Down Expand Up @@ -718,6 +727,41 @@ impl<'a> Dotnet<'a> {
Ok((remainder, ()))
}

/// Parses the `#US` stream, and returns all the string contained
/// in it.
fn parse_user_strings(&self) -> Vec<&'a [u8]> {
let strings = if let Some(us_stream) =
self.us_stream.and_then(|index| self.get_stream(index))
{
// The `#US` stream is composed of a series of varints followed by
// the number of bytes indicated by the varint.
many0(length_data(varint))(us_stream)
.map(|(_, strings)| strings)
.ok()
} else {
None
};

let mut strings = match strings {
Some(strings) => strings,
None => return vec![],
};

// Retain only the strings with length >= 3. All non-empty strings have
// at least 3 bytes because strings are UTF-16 (2 bytes per character)
// plus an extra byte that can be 0x00 or 0x01, and indicates whether
// any of the UTF-16 characters have a non-zero bit in the top byte.
// This is described in ECMA-335 II.24.2.4.
strings.retain(|s| s.len() >= 3);

// Discard the extra byte from all strings.
for string in strings.iter_mut() {
*string = &string[0..string.len() - 1];
}

strings
}

fn parse_user_types(&self) -> Vec<Class<'a>> {
let mut classes = Vec::new();
for (idx, type_def) in self.type_defs.iter().enumerate() {
Expand Down Expand Up @@ -2540,9 +2584,16 @@ impl From<Dotnet<'_>> for protos::dotnet::Dotnet {
.constants
.extend(dotnet.get_string_constants().map(|c| c.to_vec()));

result
.user_strings
.extend(dotnet.get_user_strings().map(|c| c.to_vec()));

result.set_number_of_streams(result.streams.len().try_into().unwrap());
result.set_number_of_guids(result.guids.len().try_into().unwrap());
result.set_number_of_classes(result.classes.len().try_into().unwrap());
result.set_number_of_user_strings(
result.user_strings.len().try_into().unwrap(),
);

result.set_number_of_assembly_refs(
result.assembly_refs.len().try_into().unwrap(),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ number_of_guids: 1
number_of_resources: 0
number_of_classes: 24
number_of_assembly_refs: 1
number_of_user_strings: 1
number_of_constants: 0
streams:
- name: "#~"
Expand Down Expand Up @@ -4796,4 +4797,6 @@ classes:
virtual: true
final: false
return_type: "void"
number_of_parameters: 0
number_of_parameters: 0
user_strings:
- " \000"
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ number_of_guids: 1
number_of_resources: 0
number_of_classes: 12
number_of_assembly_refs: 1
number_of_user_strings: 0
number_of_constants: 40
streams:
- name: "#~"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ number_of_guids: 1
number_of_resources: 1
number_of_classes: 31
number_of_assembly_refs: 2
number_of_user_strings: 45
number_of_constants: 0
streams:
- name: "#~"
Expand Down Expand Up @@ -1834,4 +1835,50 @@ classes:
virtual: false
final: false
return_type: "System.Net.Sockets.SocketException"
number_of_parameters: 0
number_of_parameters: 0
user_strings:
- "A\000n\000 \000u\000n\000i\000n\000i\000t\000i\000a\000l\000i\000z\000e\000d\000,\000 \000o\000r\000 \000\'\000d\000e\000f\000a\000u\000l\000t\000\'\000,\000 \000V\000a\000l\000u\000e\000S\000t\000o\000p\000w\000a\000t\000c\000h\000 \000c\000a\000n\000n\000o\000t\000 \000b\000e\000 \000u\000s\000e\000d\000 \000t\000o\000 \000g\000e\000t\000 \000e\000l\000a\000p\000s\000e\000d\000 \000t\000i\000m\000e\000.\000"
- ",\000 \000"
- "n\000e\000t\000_\000t\000o\000o\000l\000o\000n\000g\000"
- "n\000e\000t\000_\000i\000n\000v\000a\000l\000i\000d\000_\000i\000p\000_\000a\000d\000d\000r\000"
- "S\000y\000s\000t\000e\000m\000.\000R\000e\000s\000o\000u\000r\000c\000e\000s\000.\000U\000s\000e\000S\000y\000s\000t\000e\000m\000R\000e\000s\000o\000u\000r\000c\000e\000K\000e\000y\000s\000"
- "G\000e\000t\000H\000o\000s\000t\000N\000a\000m\000e\000"
- "h\000o\000s\000t\000N\000a\000m\000e\000O\000r\000A\000d\000d\000r\000e\000s\000s\000"
- "I\000n\000v\000a\000l\000i\000d\000 \000a\000d\000d\000r\000e\000s\000s\000 \000\'\000{\0000\000}\000\'\000"
- "G\000e\000t\000H\000o\000s\000t\000A\000d\000d\000r\000e\000s\000s\000e\000s\000"
- "{\0000\000}\000 \000D\000N\000S\000 \000l\000o\000o\000k\000u\000p\000 \000f\000a\000i\000l\000e\000d\000 \000w\000i\000t\000h\000 \000{\0001\000}\000"
- "G\000e\000t\000H\000o\000s\000t\000E\000n\000t\000r\000y\000O\000r\000A\000d\000d\000r\000e\000s\000s\000e\000s\000C\000o\000r\000e\000"
- "f\000o\000r\000w\000a\000r\000d\000 \000l\000o\000o\000k\000u\000p\000 \000f\000o\000r\000 \000\'\000{\0000\000}\000\'\000 \000f\000a\000i\000l\000e\000d\000 \000w\000i\000t\000h\000 \000{\0001\000}\000"
- "h\000o\000s\000t\000N\000a\000m\000e\000"
- "G\000e\000t\000H\000o\000s\000t\000E\000n\000t\000r\000y\000O\000r\000A\000d\000d\000r\000e\000s\000s\000e\000s\000C\000o\000r\000e\000A\000s\000y\000n\000c\000"
- "{\0000\000}\000 \000w\000i\000t\000h\000 \000{\0001\000}\000 \000e\000n\000t\000r\000i\000e\000s\000"
- "G\000e\000t\000H\000o\000s\000t\000E\000n\000t\000r\000y\000A\000s\000y\000n\000c\000"
- "{\0000\000}\000 \000D\000N\000S\000 \000l\000o\000o\000k\000u\000p\000 \000w\000a\000s\000 \000c\000a\000n\000c\000e\000l\000e\000d\000"
- "(\000?\000)\000"
- "(\000n\000u\000l\000l\000)\000"
- "#\000"
- "[\000"
- "]\000"
- "(\000"
- ")\000"
- ":\000"
- "(\0000\000x\000"
- "X\000"
- "0\000x\000"
- "d\000n\000s\000-\000l\000o\000o\000k\000u\000p\000s\000-\000r\000e\000q\000u\000e\000s\000t\000e\000d\000"
- "D\000N\000S\000 \000L\000o\000o\000k\000u\000p\000s\000 \000R\000e\000q\000u\000e\000s\000t\000e\000d\000"
- "c\000u\000r\000r\000e\000n\000t\000-\000d\000n\000s\000-\000l\000o\000o\000k\000u\000p\000s\000"
- "C\000u\000r\000r\000e\000n\000t\000 \000D\000N\000S\000 \000L\000o\000o\000k\000u\000p\000s\000"
- "d\000n\000s\000-\000l\000o\000o\000k\000u\000p\000s\000-\000d\000u\000r\000a\000t\000i\000o\000n\000"
- "A\000v\000e\000r\000a\000g\000e\000 \000D\000N\000S\000 \000L\000o\000o\000k\000u\000p\000 \000D\000u\000r\000a\000t\000i\000o\000n\000"
- "m\000s\000"
- "S\000y\000s\000t\000e\000m\000.\000N\000e\000t\000.\000D\000i\000s\000a\000b\000l\000e\000I\000P\000v\0006\000"
- "D\000O\000T\000N\000E\000T\000_\000S\000Y\000S\000T\000E\000M\000_\000N\000E\000T\000_\000D\000I\000S\000A\000B\000L\000E\000I\000P\000V\0006\000"
- "1\000"
- "t\000r\000u\000e\000"
- "G\000e\000t\000H\000o\000s\000t\000N\000a\000m\000e\000 \000f\000a\000i\000l\000e\000d\000 \000w\000i\000t\000h\000 \000{\0000\000}\000"
- "N\000O\000N\000N\000U\000L\000L\000S\000E\000N\000T\000I\000N\000E\000L\000"
- "w\000s\0002\000_\0003\0002\000.\000d\000l\000l\000"
- "G\000e\000t\000A\000d\000d\000r\000I\000n\000f\000o\000E\000x\000C\000a\000n\000c\000e\000l\000"
- "G\000e\000t\000A\000d\000d\000r\000I\000n\000f\000o\000E\000x\000C\000a\000n\000c\000e\000l\000 \000r\000e\000t\000u\000r\000n\000e\000d\000 \000e\000r\000r\000o\000r\000 \000{\0000\000}\000"
- "R\000e\000g\000i\000s\000t\000e\000r\000F\000o\000r\000C\000a\000n\000c\000e\000l\000l\000a\000t\000i\000o\000n\000"
1 change: 1 addition & 0 deletions yara-x/src/modules/protos/dotnet.proto
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ message Dotnet {
repeated Resource resources = 21;
repeated Class classes = 22;
repeated uint32 field_offsets = 23;
repeated bytes user_strings = 24;
}

message Assembly {
Expand Down

0 comments on commit ca7f6cb

Please sign in to comment.