diff --git a/rust/src/mime/mod.rs b/rust/src/mime/mod.rs index 44c34dedaf88..5899da415fea 100644 --- a/rust/src/mime/mod.rs +++ b/rust/src/mime/mod.rs @@ -19,9 +19,9 @@ use crate::common::nom7::take_until_and_consume; use nom7::branch::alt; -use nom7::bytes::complete::{take_till, take_until, take_while}; +use nom7::bytes::complete::{tag, take, take_till, take_until, take_while}; use nom7::character::complete::char; -use nom7::combinator::{complete, opt, rest}; +use nom7::combinator::{complete, opt, rest, value}; use nom7::error::{make_error, ErrorKind}; use nom7::{Err, IResult}; use std; @@ -42,7 +42,7 @@ fn mime_parse_value_delimited(input: &[u8]) -> IResult<&[u8], &[u8]> { if input[i] == b'"' && !escaping { return Ok((&input[i + 1..], &input[..i])); } - //TODOmime unescape later + // unescape can be processed later escaping = false; } } @@ -55,8 +55,8 @@ fn mime_parse_value_delimited(input: &[u8]) -> IResult<&[u8], &[u8]> { fn mime_parse_value_until(input: &[u8]) -> IResult<&[u8], &[u8]> { let (input, value) = alt((take_till(|ch: u8| ch == b';'), rest))(input)?; for i in 0..value.len() { - if !is_mime_space(value[value.len()-i-1]) { - return Ok((input, &value[..value.len()-i])); + if !is_mime_space(value[value.len() - i - 1]) { + return Ok((input, &value[..value.len() - i])); } } return Ok((input, value)); @@ -176,6 +176,360 @@ pub unsafe extern "C" fn rs_mime_find_header_token( return false; } +#[derive(Debug)] +enum MimeParserState { + MimeStart = 0, + MimeHeader = 1, + MimeHeaderEnd = 2, + MimeChunk = 3, + MimeBoundaryWaitingForEol = 4, +} + +impl Default for MimeParserState { + fn default() -> Self { + MimeParserState::MimeStart + } +} + +#[derive(Debug, Default)] +pub struct MimeStateHTTP { + boundary: Vec, + filename: Vec, + state: MimeParserState, +} + +#[repr(u8)] +#[derive(Copy, Clone, PartialOrd, PartialEq)] +pub enum MimeParserResult { + MimeNeedsMore = 0, + MimeFileOpen = 1, + MimeFileChunk = 2, + MimeFileClose = 3, +} + +fn mime_parse_skip_line(input: &[u8]) -> IResult<&[u8], MimeParserState> { + let (input, _) = take_till(|ch: u8| ch == b'\n')(input)?; + let (input, _) = char('\n')(input)?; + return Ok((input, MimeParserState::MimeStart)); +} + +fn mime_parse_boundary_regular<'a, 'b>( + boundary: &'b [u8], input: &'a [u8], +) -> IResult<&'a [u8], MimeParserState> { + let (input, _) = tag(boundary)(input)?; + let (input, _) = take_till(|ch: u8| ch == b'\n')(input)?; + let (input, _) = char('\n')(input)?; + return Ok((input, MimeParserState::MimeHeader)); +} + +// Number of characters after boundary, without end of line, before changing state to streaming +const MIME_BOUNDARY_MAX_BEFORE_EOL: usize = 128; +const MIME_HEADER_MAX_LINE: usize = 4096; + +fn mime_parse_boundary_missing_eol<'a, 'b>( + boundary: &'b [u8], input: &'a [u8], +) -> IResult<&'a [u8], MimeParserState> { + let (input, _) = tag(boundary)(input)?; + let (input, _) = take(MIME_BOUNDARY_MAX_BEFORE_EOL)(input)?; + return Ok((input, MimeParserState::MimeBoundaryWaitingForEol)); +} + +fn mime_parse_boundary<'a, 'b>( + boundary: &'b [u8], input: &'a [u8], +) -> IResult<&'a [u8], MimeParserState> { + let r = mime_parse_boundary_regular(boundary, input); + if r.is_ok() { + return r; + } + let r2 = mime_parse_skip_line(input); + if r2.is_ok() { + return r2; + } + return mime_parse_boundary_missing_eol(boundary, input); +} + +fn mime_consume_until_eol(input: &[u8]) -> IResult<&[u8], bool> { + return alt((value(true, mime_parse_skip_line), value(false, rest)))(input); +} + +fn mime_parse_header_line(input: &[u8]) -> IResult<&[u8], &[u8]> { + let (input, name) = take_till(|ch: u8| ch == b':')(input)?; + let (input, _) = char(':')(input)?; + return Ok((input, name)); +} + +// s2 is already lower case +fn rs_equals_lowercase(s1: &[u8], s2: &[u8]) -> bool { + if s1.len() == s2.len() { + for i in 0..s1.len() { + if s1[i].to_ascii_lowercase() != s2[i] { + return false; + } + } + return true; + } + return false; +} + +fn mime_parse_headers<'a, 'b>( + ctx: &'b mut MimeStateHTTP, i: &'a [u8], +) -> IResult<&'a [u8], (MimeParserState, bool, bool)> { + let mut fileopen = false; + let mut errored = false; + let mut input = i; + while input.len() > 0 { + match take_until::<_, &[u8], nom7::error::Error<&[u8]>>("\r\n")(input) { + Ok((input2, line)) => { + match mime_parse_header_line(line) { + Ok((value, name)) => { + if rs_equals_lowercase(name, "content-disposition".as_bytes()) { + let mut sections_values = Vec::new(); + if let Ok(filename) = mime_find_header_token( + value, + "filename".as_bytes(), + &mut sections_values, + ) { + if filename.len() > 0 { + ctx.filename = Vec::with_capacity(filename.len()); + fileopen = true; + for c in filename { + // unescape + if *c != b'\\' { + ctx.filename.push(*c); + } + } + } + } + } + if value.len() == 0 { + errored = true; + } + } + _ => { + if line.len() > 0 { + errored = true; + } + } + } + let (input3, _) = tag("\r\n")(input2)?; + input = input3; + if line.len() == 0 || (line.len() == 1 && line[0] == b'\r') { + return Ok((input, (MimeParserState::MimeHeaderEnd, fileopen, errored))); + } + } + _ => { + // guard against too long header lines + if input.len() > MIME_HEADER_MAX_LINE { + return Ok(( + input, + ( + MimeParserState::MimeBoundaryWaitingForEol, + fileopen, + errored, + ), + )); + } + if input.len() < i.len() { + return Ok((input, (MimeParserState::MimeHeader, fileopen, errored))); + } // else only an incomplete line, ask for more + return Err(Err::Error(make_error(input, ErrorKind::Eof))); + } + } + } + return Ok((input, (MimeParserState::MimeHeader, fileopen, errored))); +} + +fn mime_consume_chunk<'a, 'b>(boundary: &'b [u8], input: &'a [u8]) -> IResult<&'a [u8], bool> { + let r: Result<(&[u8], &[u8]), Err>> = take_until("\r\n")(input); + match r { + Ok((input, line)) => { + let (input2, _) = tag("\r\n")(input)?; + if input2.len() < boundary.len() { + if input2 == &boundary[..input2.len()] { + if line.len() > 0 { + // consume as chunk up to eol (not consuming eol) + return Ok((input, false)); + } + // new line beignning like boundary, with nothin to consume as chunk : request more + return Err(Err::Error(make_error(input, ErrorKind::Eof))); + } + // not like boundary : consume everything as chunk + return Ok((&input[input.len()..], false)); + } // else + if &input2[..boundary.len()] == boundary { + // end of file with boundary, consume eol but do not consume boundary + return Ok((input2, true)); + } + // not like boundary : consume everything as chunk + return Ok((input2, false)); + } + _ => { + return Ok((&input[input.len()..], false)); + } + } +} + +pub const MIME_EVENT_FLAG_INVALID_HEADER: u32 = 0x01; +pub const MIME_EVENT_FLAG_NO_FILEDATA: u32 = 0x02; + +fn mime_process(ctx: &mut MimeStateHTTP, i: &[u8]) -> (MimeParserResult, u32, u32) { + let mut input = i; + let mut consumed = 0; + let mut warnings = 0; + while input.len() > 0 { + match ctx.state { + MimeParserState::MimeStart => { + if let Ok((rem, next)) = mime_parse_boundary(&ctx.boundary, input) { + ctx.state = next; + consumed += (input.len() - rem.len()) as u32; + input = rem; + } else { + return (MimeParserResult::MimeNeedsMore, consumed, warnings); + } + } + MimeParserState::MimeBoundaryWaitingForEol => { + if let Ok((rem, found)) = mime_consume_until_eol(input) { + if found { + ctx.state = MimeParserState::MimeHeader; + } + consumed += (input.len() - rem.len()) as u32; + input = rem; + } else { + // should never happen + return (MimeParserResult::MimeNeedsMore, consumed, warnings); + } + } + MimeParserState::MimeHeader => { + if let Ok((rem, (next, fileopen, err))) = mime_parse_headers(ctx, input) { + ctx.state = next; + consumed += (input.len() - rem.len()) as u32; + input = rem; + if err { + warnings |= MIME_EVENT_FLAG_INVALID_HEADER; + } + if fileopen { + return (MimeParserResult::MimeFileOpen, consumed, warnings); + } + } else { + return (MimeParserResult::MimeNeedsMore, consumed, warnings); + } + } + MimeParserState::MimeHeaderEnd => { + // check if we start with the boundary + // and transition to chunk, or empty file and back to start + if input.len() < ctx.boundary.len() { + if input == &ctx.boundary[..input.len()] { + return (MimeParserResult::MimeNeedsMore, consumed, warnings); + } + ctx.state = MimeParserState::MimeChunk; + } else { + if &input[..ctx.boundary.len()] == ctx.boundary { + ctx.state = MimeParserState::MimeStart; + if ctx.filename.len() > 0 { + warnings |= MIME_EVENT_FLAG_NO_FILEDATA; + } + ctx.filename.clear(); + return (MimeParserResult::MimeFileClose, consumed, warnings); + } else { + ctx.state = MimeParserState::MimeChunk; + } + } + } + MimeParserState::MimeChunk => { + if let Ok((rem, eof)) = mime_consume_chunk(&ctx.boundary, input) { + consumed += (input.len() - rem.len()) as u32; + if eof { + ctx.state = MimeParserState::MimeStart; + ctx.filename.clear(); + return (MimeParserResult::MimeFileClose, consumed, warnings); + } else { + // + 2 for \r\n + if rem.len() < ctx.boundary.len() + 2 { + return (MimeParserResult::MimeFileChunk, consumed, warnings); + } + input = rem; + } + } else { + return (MimeParserResult::MimeNeedsMore, consumed, warnings); + } + } + } + } + return (MimeParserResult::MimeNeedsMore, consumed, warnings); +} + +pub fn mime_state_init(i: &[u8]) -> Option { + let mut sections_values = Vec::new(); + match mime_find_header_token(i, "boundary".as_bytes(), &mut sections_values) { + Ok(value) => { + if value.len() <= RS_MIME_MAX_TOKEN_LEN { + let mut r = MimeStateHTTP::default(); + r.boundary = Vec::with_capacity(2 + value.len()); + // start wih 2 additional hyphens + r.boundary.push(b'-'); + r.boundary.push(b'-'); + for c in value { + // unescape + if *c != b'\\' { + r.boundary.push(*c); + } + } + return Some(r); + } + } + _ => {} + } + return None; +} + +#[no_mangle] +pub unsafe extern "C" fn rs_mime_state_init( + input: *const u8, input_len: u32, +) -> *mut MimeStateHTTP { + let slice = build_slice!(input, input_len as usize); + + if let Some(ctx) = mime_state_init(slice) { + let boxed = Box::new(ctx); + return Box::into_raw(boxed) as *mut _; + } + return std::ptr::null_mut(); +} + +#[no_mangle] +pub unsafe extern "C" fn rs_mime_parse( + ctx: &mut MimeStateHTTP, input: *const u8, input_len: u32, consumed: *mut u32, + warnings: *mut u32, +) -> MimeParserResult { + let slice = build_slice!(input, input_len as usize); + let (r, c, w) = mime_process(ctx, slice); + *consumed = c; + *warnings = w; + return r; +} + +#[no_mangle] +pub unsafe extern "C" fn rs_mime_state_get_filename( + ctx: &mut MimeStateHTTP, buffer: *mut *const u8, filename_len: *mut u16, +) { + if ctx.filename.len() > 0 { + *buffer = ctx.filename.as_ptr(); + if ctx.filename.len() < u16::MAX.into() { + *filename_len = ctx.filename.len() as u16; + } else { + *filename_len = u16::MAX; + } + } else { + *buffer = std::ptr::null_mut(); + *filename_len = 0; + } +} + +#[no_mangle] +pub unsafe extern "C" fn rs_mime_state_free(ctx: &mut MimeStateHTTP) { + // Just unbox... + std::mem::drop(Box::from_raw(ctx)); +} + #[cfg(test)] mod test { use super::*; diff --git a/src/app-layer-htp.c b/src/app-layer-htp.c index 786ec2f4d82b..acdb8739d923 100644 --- a/src/app-layer-htp.c +++ b/src/app-layer-htp.c @@ -99,6 +99,10 @@ StreamingBufferConfig htp_sbcfg = STREAMING_BUFFER_CONFIG_INITIALIZER; /** Limit to the number of libhtp messages that can be handled */ #define HTP_MAX_MESSAGES 512 +/** a boundary should be smaller in size */ +// RFC 2046 states that max boundary size is 70 +#define HTP_BOUNDARY_MAX 200U + SC_ATOMIC_DECLARE(uint32_t, htp_config_flags); #ifdef DEBUG @@ -368,8 +372,8 @@ static void HtpTxUserDataFree(HtpState *state, HtpTxUserData *htud) if (htud->response_headers_raw) HTPFree(htud->response_headers_raw, htud->response_headers_raw_len); AppLayerDecoderEventsFreeEvents(&htud->tx_data.events); - if (htud->boundary) - HTPFree(htud->boundary, htud->boundary_len); + if (htud->mime_state) + rs_mime_state_free(htud->mime_state); if (htud->tx_data.de_state != NULL) { DetectEngineStateFree(htud->tx_data.de_state); } @@ -1114,92 +1118,6 @@ static int HTTPParseContentDispositionHeader(uint8_t *name, size_t name_len, return 0; } -/** - * \param name /Lowercase/ version of the variable name - */ -static int HTTPParseContentTypeHeader(uint8_t *name, size_t name_len, - uint8_t *data, size_t len, uint8_t **retptr, size_t *retlen) -{ - SCEnter(); -#ifdef PRINT - printf("DATA START: \n"); - PrintRawDataFp(stdout, data, len); - printf("DATA END: \n"); -#endif - size_t x; - int quote = 0; - - for (x = 0; x < len; x++) { - if (!(isspace(data[x]))) - break; - } - - if (x >= len) { - SCReturnInt(0); - } - - uint8_t *line = data+x; - size_t line_len = len-x; - size_t offset = 0; -#ifdef PRINT - printf("LINE START: \n"); - PrintRawDataFp(stdout, line, line_len); - printf("LINE END: \n"); -#endif - for (x = 0 ; x < line_len; x++) { - if (x > 0) { - if (line[x - 1] != '\\' && line[x] == '\"') { - quote++; - } - - if (((line[x - 1] != '\\' && line[x] == ';') || ((x + 1) == line_len)) && (quote == 0 || quote % 2 == 0)) { - uint8_t *token = line + offset; - size_t token_len = x - offset; - - if ((x + 1) == line_len) { - token_len++; - } - - offset = x + 1; - - while (offset < line_len && isspace(line[offset])) { - x++; - offset++; - } -#ifdef PRINT - printf("TOKEN START: \n"); - PrintRawDataFp(stdout, token, token_len); - printf("TOKEN END: \n"); -#endif - if (token_len > name_len) { - if (name == NULL || SCMemcmpLowercase(name, token, name_len) == 0) { - uint8_t *value = token + name_len; - size_t value_len = token_len - name_len; - - if (value[0] == '\"') { - value++; - value_len--; - } - if (value[value_len-1] == '\"') { - value_len--; - } -#ifdef PRINT - printf("VALUE START: \n"); - PrintRawDataFp(stdout, value, value_len); - printf("VALUE END: \n"); -#endif - *retptr = value; - *retlen = value_len; - SCReturnInt(1); - } - } - } - } - } - - SCReturnInt(0); -} - /** * \brief setup multipart parsing: extract boundary and store it * @@ -1218,123 +1136,15 @@ static int HtpRequestBodySetupMultipart(htp_tx_t *tx, HtpTxUserData *htud) htp_header_t *h = (htp_header_t *)htp_table_get_c(tx->request_headers, "Content-Type"); if (h != NULL && bstr_len(h->value) > 0) { - uint8_t *boundary = NULL; - size_t boundary_len = 0; - - int r = HTTPParseContentTypeHeader((uint8_t *)"boundary=", 9, - (uint8_t *) bstr_ptr(h->value), bstr_len(h->value), - &boundary, &boundary_len); - if (r == 1) { -#ifdef PRINT - printf("BOUNDARY START: \n"); - PrintRawDataFp(stdout, boundary, boundary_len); - printf("BOUNDARY END: \n"); -#endif - if (boundary_len < HTP_BOUNDARY_MAX) { - htud->boundary = HTPMalloc(boundary_len); - if (htud->boundary == NULL) { - return -1; - } - htud->boundary_len = (uint8_t)boundary_len; - memcpy(htud->boundary, boundary, boundary_len); - - htud->tsflags |= HTP_BOUNDARY_SET; - } else { - SCLogDebug("invalid boundary"); - return -1; - } + htud->mime_state = rs_mime_state_init(bstr_ptr(h->value), bstr_len(h->value)); + if (htud->mime_state) { + htud->tsflags |= HTP_BOUNDARY_SET; SCReturnInt(1); } - //SCReturnInt(1); } SCReturnInt(0); } -#define C_D_HDR "content-disposition:" -#define C_D_HDR_LEN 20 -#define C_T_HDR "content-type:" -#define C_T_HDR_LEN 13 - -static void HtpRequestBodyMultipartParseHeader(HtpState *hstate, - HtpTxUserData *htud, - uint8_t *header, uint32_t header_len, - uint8_t **filename, uint16_t *filename_len, - uint8_t **filetype, uint16_t *filetype_len) -{ - uint8_t *fn = NULL; - size_t fn_len = 0; - uint8_t *ft = NULL; - size_t ft_len = 0; - -#ifdef PRINT - printf("HEADER START: \n"); - PrintRawDataFp(stdout, header, header_len); - printf("HEADER END: \n"); -#endif - - while (header_len > 0) { - uint8_t *next_line = Bs2bmSearch(header, header_len, (uint8_t *)"\r\n", 2); - uint8_t *line = header; - uint32_t line_len; - - if (next_line == NULL) { - line_len = header_len; - } else { - line_len = next_line - header; - } - uint8_t *sc = (uint8_t *)memchr(line, ':', line_len); - if (sc == NULL) { - HTPSetEvent(hstate, htud, STREAM_TOSERVER, - HTTP_DECODER_EVENT_MULTIPART_INVALID_HEADER); - /* if the : we found is the final char, it means we have - * no value */ - } else if (line_len > 0 && sc == &line[line_len - 1]) { - HTPSetEvent(hstate, htud, STREAM_TOSERVER, - HTTP_DECODER_EVENT_MULTIPART_INVALID_HEADER); - } else { -#ifdef PRINT - printf("LINE START: \n"); - PrintRawDataFp(stdout, line, line_len); - printf("LINE END: \n"); -#endif - if (line_len >= C_D_HDR_LEN && - SCMemcmpLowercase(C_D_HDR, line, C_D_HDR_LEN) == 0) { - uint8_t *value = line + C_D_HDR_LEN; - uint32_t value_len = line_len - C_D_HDR_LEN; - - /* parse content-disposition */ - (void)HTTPParseContentDispositionHeader((uint8_t *)"filename=", 9, - value, value_len, &fn, &fn_len); - } else if (line_len >= C_T_HDR_LEN && - SCMemcmpLowercase(C_T_HDR, line, C_T_HDR_LEN) == 0) { - SCLogDebug("content-type line"); - uint8_t *value = line + C_T_HDR_LEN; - uint32_t value_len = line_len - C_T_HDR_LEN; - - (void)HTTPParseContentTypeHeader(NULL, 0, - value, value_len, &ft, &ft_len); - } - } - - if (next_line == NULL) { - SCLogDebug("no next_line"); - break; - } - header_len -= ((next_line + 2) - header); - header = next_line + 2; - } /* while (header_len > 0) */ - - if (fn_len > USHRT_MAX) - fn_len = USHRT_MAX; - if (ft_len > USHRT_MAX) - ft_len = USHRT_MAX; - - *filename = fn; - *filename_len = (uint16_t)fn_len; - *filetype = ft; - *filetype_len = (uint16_t)ft_len; -} - /** * \brief Create a single buffer from the HtpBodyChunks in our list * @@ -1364,336 +1174,104 @@ static void FlagDetectStateNewFile(HtpTxUserData *tx, int dir) } } -/** - * \brief Setup boundary buffers - */ -static void HtpRequestBodySetupBoundary(HtpTxUserData *htud, - uint8_t *boundary, uint32_t boundary_len) -{ - memset(boundary, '-', boundary_len); - memcpy(boundary + 2, htud->boundary, htud->boundary_len); -} - static int HtpRequestBodyHandleMultipart(HtpState *hstate, HtpTxUserData *htud, void *tx, - const uint8_t *chunks_buffer, uint32_t chunks_buffer_len) + const uint8_t *chunks_buffer, uint32_t chunks_buffer_len, bool eof) { - int result = 0; - uint8_t boundary[htud->boundary_len + 4]; /**< size limited to HTP_BOUNDARY_MAX + 4 */ - uint16_t expected_boundary_len = htud->boundary_len + 2; - uint16_t expected_boundary_end_len = htud->boundary_len + 4; - int tx_progress = 0; - #ifdef PRINT printf("CHUNK START: \n"); PrintRawDataFp(stdout, chunks_buffer, chunks_buffer_len); printf("CHUNK END: \n"); #endif - HtpRequestBodySetupBoundary(htud, boundary, htud->boundary_len + 4); - - /* search for the header start, header end and form end */ - const uint8_t *header_start = Bs2bmSearch(chunks_buffer, chunks_buffer_len, - boundary, expected_boundary_len); - /* end of the multipart form */ - const uint8_t *form_end = NULL; - /* end marker belonging to header_start */ - const uint8_t *header_end = NULL; - if (header_start != NULL) { - header_end = Bs2bmSearch(header_start, chunks_buffer_len - (header_start - chunks_buffer), - (uint8_t *)"\r\n\r\n", 4); - form_end = Bs2bmSearch(header_start, chunks_buffer_len - (header_start - chunks_buffer), - boundary, expected_boundary_end_len); - } - - SCLogDebug("header_start %p, header_end %p, form_end %p", header_start, - header_end, form_end); - - /* we currently only handle multipart for ts. When we support it for tc, - * we will need to supply right direction */ - tx_progress = AppLayerParserGetStateProgress(IPPROTO_TCP, ALPROTO_HTTP1, tx, STREAM_TOSERVER); - /* if we're in the file storage process, deal with that now */ - if (htud->tsflags & HTP_FILENAME_SET) { - if (header_start != NULL || (tx_progress > HTP_REQUEST_BODY)) { - SCLogDebug("reached the end of the file"); - - const uint8_t *filedata = chunks_buffer; - uint32_t filedata_len = 0; - uint8_t flags = 0; - - if (header_start != NULL) { - if (header_start == filedata + 2) { - /* last chunk had all data, but not the boundary */ - SCLogDebug("last chunk had all data, but not the boundary"); - filedata_len = 0; - } else if (header_start > filedata + 2) { - SCLogDebug("some data from last file before the boundary"); - /* some data from last file before the boundary */ - filedata_len = header_start - filedata - 2; - } - } - /* body parsing done, we did not get our form end. Use all data - * we still have and signal to files API we have an issue. */ - if (tx_progress > HTP_REQUEST_BODY) { - filedata_len = chunks_buffer_len; - flags = FILE_TRUNCATED; - } - - if (filedata_len > chunks_buffer_len) { - HTPSetEvent(hstate, htud, STREAM_TOSERVER, - HTTP_DECODER_EVENT_MULTIPART_GENERIC_ERROR); - goto end; - } -#ifdef PRINT - printf("FILEDATA (final chunk) START: \n"); - PrintRawDataFp(stdout, filedata, filedata_len); - printf("FILEDATA (final chunk) END: \n"); -#endif - if (!(htud->tsflags & HTP_DONTSTORE)) { - if (HTPFileClose(htud, filedata, filedata_len, flags, STREAM_TOSERVER) == -1) { - goto end; - } - } - - htud->tsflags &=~ HTP_FILENAME_SET; - - /* fall through */ - } else { - SCLogDebug("not yet at the end of the file"); - - if (chunks_buffer_len > expected_boundary_end_len) { - const uint8_t *filedata = chunks_buffer; - uint32_t filedata_len = chunks_buffer_len - expected_boundary_len; - for (; filedata_len < chunks_buffer_len; filedata_len++) { - // take as much as we can until the beginning of a new line - if (chunks_buffer[filedata_len] == '\r') { - if (filedata_len + 1 == chunks_buffer_len || - chunks_buffer[filedata_len + 1] == '\n') { - break; - } - } - } - -#ifdef PRINT - printf("FILEDATA (part) START: \n"); - PrintRawDataFp(stdout, filedata, filedata_len); - printf("FILEDATA (part) END: \n"); -#endif - - if (!(htud->tsflags & HTP_DONTSTORE)) { - result = HTPFileStoreChunk(htud, filedata, filedata_len, STREAM_TOSERVER); - if (result == -1) { - goto end; - } else if (result == -2) { - /* we know for sure we're not storing the file */ - htud->tsflags |= HTP_DONTSTORE; - } - } + // libhtp will not call us back too late + // should libhtp send a callback eof for 0 chunked ? + DEBUG_VALIDATE_BUG_ON(AppLayerParserGetStateProgress(IPPROTO_TCP, ALPROTO_HTTP1, tx, + STREAM_TOSERVER) >= HTP_REQUEST_COMPLETE); - htud->request_body.body_parsed += filedata_len; - } else { - SCLogDebug("chunk too small to already process in part"); - } + const uint8_t *cur_buf = chunks_buffer; + uint32_t cur_buf_len = chunks_buffer_len; - goto end; + if (eof) { + // abrupt end of connection + if (htud->tsflags & HTP_FILENAME_SET && !(htud->tsflags & HTP_DONTSTORE)) { + /* we currently only handle multipart for ts. When we support it for tc, + * we will need to supply right direction */ + HTPFileClose(htud, cur_buf, cur_buf_len, FILE_TRUNCATED, STREAM_TOSERVER); } + htud->tsflags &= ~HTP_FILENAME_SET; + goto end; } - while (header_start != NULL && header_end != NULL && - header_end != form_end && - header_start < (chunks_buffer + chunks_buffer_len) && - header_end < (chunks_buffer + chunks_buffer_len) && - header_start < header_end) - { - uint8_t *filename = NULL; - uint16_t filename_len = 0; - uint8_t *filetype = NULL; - uint16_t filetype_len = 0; - - uint32_t header_len = header_end - header_start; - SCLogDebug("header_len %u", header_len); - uint8_t *header = (uint8_t *)header_start; - - /* skip empty records */ - if (expected_boundary_len == header_len) { - goto next; - } else if ((uint32_t)(expected_boundary_len + 2) <= header_len) { - header_len -= (expected_boundary_len + 2); - header = (uint8_t *)header_start + (expected_boundary_len + 2); // + for 0d 0a + uint32_t consumed; + uint32_t warnings; + int result = 0; + const uint8_t *filename = NULL; + uint16_t filename_len = 0; + + // keep parsing mime and use callbacks when needed + while (cur_buf_len > 0) { + MimeParserResult r = + rs_mime_parse(htud->mime_state, cur_buf, cur_buf_len, &consumed, &warnings); + DEBUG_VALIDATE_BUG_ON(consumed > cur_buf_len); + htud->request_body.body_parsed += consumed; + if (warnings) { + if (warnings & MIME_EVENT_FLAG_INVALID_HEADER) { + HTPSetEvent( + hstate, htud, STREAM_TOSERVER, HTTP_DECODER_EVENT_MULTIPART_INVALID_HEADER); + } + if (warnings & MIME_EVENT_FLAG_NO_FILEDATA) { + HTPSetEvent( + hstate, htud, STREAM_TOSERVER, HTTP_DECODER_EVENT_MULTIPART_NO_FILEDATA); + } } - - HtpRequestBodyMultipartParseHeader(hstate, htud, header, header_len, - &filename, &filename_len, &filetype, &filetype_len); - - if (filename != NULL) { - const uint8_t *filedata = NULL; - uint32_t filedata_len = 0; - - SCLogDebug("we have a filename"); - - htud->tsflags |= HTP_FILENAME_SET; - htud->tsflags &= ~HTP_DONTSTORE; - - SCLogDebug("header_end %p", header_end); - SCLogDebug("form_end %p", form_end); - - /* everything until the final boundary is the file */ - if (form_end != NULL) { - SCLogDebug("have form_end"); - - filedata = header_end + 4; - if (form_end == filedata) { - HTPSetEvent(hstate, htud, STREAM_TOSERVER, - HTTP_DECODER_EVENT_MULTIPART_NO_FILEDATA); - goto end; - } else if (form_end < filedata) { - HTPSetEvent(hstate, htud, STREAM_TOSERVER, - HTTP_DECODER_EVENT_MULTIPART_GENERIC_ERROR); - goto end; - } - - filedata_len = form_end - (header_end + 4 + 2); - SCLogDebug("filedata_len %"PRIuMAX, (uintmax_t)filedata_len); - - /* or is it? */ - uint8_t *header_next = Bs2bmSearch(filedata, filedata_len, - boundary, expected_boundary_len); - if (header_next != NULL) { - filedata_len -= (form_end - header_next); - } - - if (filedata_len > chunks_buffer_len) { - HTPSetEvent(hstate, htud, STREAM_TOSERVER, - HTTP_DECODER_EVENT_MULTIPART_GENERIC_ERROR); - goto end; - } - SCLogDebug("filedata_len %"PRIuMAX, (uintmax_t)filedata_len); -#ifdef PRINT - printf("FILEDATA START: \n"); - PrintRawDataFp(stdout, filedata, filedata_len); - printf("FILEDATA END: \n"); -#endif - - result = HTPFileOpen(hstate, htud, filename, filename_len, filedata, filedata_len, - STREAM_TOSERVER); - if (result == -1) { - goto end; - } else if (result == -2) { - htud->tsflags |= HTP_DONTSTORE; - } else { - if (HTPFileClose(htud, NULL, 0, 0, STREAM_TOSERVER) == -1) { - goto end; - } - } - FlagDetectStateNewFile(htud, STREAM_TOSERVER); - - htud->request_body.body_parsed += (header_end - chunks_buffer); - htud->tsflags &= ~HTP_FILENAME_SET; - } else { - SCLogDebug("chunk doesn't contain form end"); - - filedata = header_end + 4; - filedata_len = chunks_buffer_len - (filedata - chunks_buffer); - SCLogDebug("filedata_len %u (chunks_buffer_len %u)", filedata_len, chunks_buffer_len); - - if (filedata_len > chunks_buffer_len) { - HTPSetEvent(hstate, htud, STREAM_TOSERVER, - HTTP_DECODER_EVENT_MULTIPART_GENERIC_ERROR); - goto end; - } - -#ifdef PRINT - printf("FILEDATA START: \n"); - PrintRawDataFp(stdout, filedata, filedata_len); - printf("FILEDATA END: \n"); -#endif - /* form doesn't end in this chunk, but the part might. Lets - * see if have another coming up */ - uint8_t *header_next = Bs2bmSearch(filedata, filedata_len, - boundary, expected_boundary_len); - SCLogDebug("header_next %p", header_next); - if (header_next == NULL) { - SCLogDebug("more file data to come"); - - uint32_t offset = (header_end + 4) - chunks_buffer; - SCLogDebug("offset %u", offset); - htud->request_body.body_parsed += offset; - - if (filedata_len >= (uint32_t)(expected_boundary_len + 2)) { - filedata_len -= (expected_boundary_len + 2 - 1); - // take as much as we can until start of boundary - for (size_t nb = 0; nb < (size_t)expected_boundary_len + 1; nb++) { - if (filedata[filedata_len] == '\r') { - if (nb == expected_boundary_len || - filedata[filedata_len + 1] == '\n') { - break; - } - } - filedata_len++; - } - SCLogDebug("opening file with partial data"); - } else { - filedata = NULL; - filedata_len = 0; - } - result = HTPFileOpen(hstate, htud, filename, filename_len, filedata, - filedata_len, STREAM_TOSERVER); + switch (r) { + case MimeNeedsMore: + // there is not enough data, wait for more next time + goto end; + case MimeFileOpen: + // get filename owned by mime state + rs_mime_state_get_filename(htud->mime_state, &filename, &filename_len); + if (filename_len > 0) { + htud->tsflags |= HTP_FILENAME_SET; + htud->tsflags &= ~HTP_DONTSTORE; + result = HTPFileOpen( + hstate, htud, filename, filename_len, NULL, 0, STREAM_TOSERVER); if (result == -1) { goto end; } else if (result == -2) { htud->tsflags |= HTP_DONTSTORE; } FlagDetectStateNewFile(htud, STREAM_TOSERVER); - htud->request_body.body_parsed += filedata_len; - SCLogDebug("htud->request_body.body_parsed %"PRIu64, htud->request_body.body_parsed); - - } else if (header_next - filedata > 2) { - filedata_len = header_next - filedata - 2; - SCLogDebug("filedata_len %u", filedata_len); - - result = HTPFileOpen(hstate, htud, filename, filename_len, filedata, - filedata_len, STREAM_TOSERVER); + } + break; + case MimeFileChunk: + if (htud->tsflags & HTP_FILENAME_SET && !(htud->tsflags & HTP_DONTSTORE)) { + result = HTPFileStoreChunk(htud, cur_buf, consumed, STREAM_TOSERVER); if (result == -1) { goto end; } else if (result == -2) { + /* we know for sure we're not storing the file */ htud->tsflags |= HTP_DONTSTORE; - } else { - if (HTPFileClose(htud, NULL, 0, 0, STREAM_TOSERVER) == -1) { - goto end; + } + } + break; + case MimeFileClose: + if (htud->tsflags & HTP_FILENAME_SET && !(htud->tsflags & HTP_DONTSTORE)) { + uint32_t lastsize = consumed; + if (lastsize > 0 && cur_buf[lastsize - 1] == '\n') { + lastsize--; + if (lastsize > 0 && cur_buf[lastsize - 1] == '\r') { + lastsize--; } } - FlagDetectStateNewFile(htud, STREAM_TOSERVER); - - htud->tsflags &= ~HTP_FILENAME_SET; - htud->request_body.body_parsed += (header_end - chunks_buffer); + HTPFileClose(htud, cur_buf, lastsize, 0, STREAM_TOSERVER); } - } - } -next: - SCLogDebug("header_start %p, header_end %p, form_end %p", - header_start, header_end, form_end); - - /* Search next boundary entry after the start of body */ - uint32_t cursizeread = header_end - chunks_buffer; - header_start = Bs2bmSearch(header_end + 4, - chunks_buffer_len - (cursizeread + 4), - boundary, expected_boundary_len); - if (header_start != NULL) { - header_end = Bs2bmSearch(header_end + 4, - chunks_buffer_len - (cursizeread + 4), - (uint8_t *) "\r\n\r\n", 4); - } - } - - /* if we're parsing the multipart and we're not currently processing a - * file, we move the body pointer forward. */ - if (form_end == NULL && !(htud->tsflags & HTP_FILENAME_SET) && header_start == NULL) { - if (chunks_buffer_len > expected_boundary_end_len) { - uint32_t move = chunks_buffer_len - expected_boundary_end_len + 1; - - htud->request_body.body_parsed += move; - SCLogDebug("form not ready, file not set, parsing non-file " - "record: moved %u", move); + htud->tsflags &= ~HTP_FILENAME_SET; + break; + // TODO event on parsing error ? } + cur_buf += consumed; + cur_buf_len -= consumed; } end: @@ -1931,7 +1509,8 @@ static int HTPCallbackRequestBodyData(htp_tx_data_t *d) printf("REASSCHUNK END: \n"); #endif - HtpRequestBodyHandleMultipart(hstate, tx_ud, d->tx, chunks_buffer, chunks_buffer_len); + HtpRequestBodyHandleMultipart(hstate, tx_ud, d->tx, chunks_buffer, chunks_buffer_len, + (d->data == NULL && d->len == 0)); } else if (tx_ud->request_body_type == HTP_BODY_REQUEST_POST || tx_ud->request_body_type == HTP_BODY_REQUEST_PUT) { @@ -6030,7 +5609,11 @@ static int HTPBodyReassemblyTest01(void) printf("REASSCHUNK END: \n"); #endif - HtpRequestBodyHandleMultipart(&hstate, &htud, &tx, chunks_buffer, chunks_buffer_len); + htud.mime_state = rs_mime_state_init((const uint8_t *)"multipart/form-data; boundary=toto", + strlen("multipart/form-data; boundary=toto")); + FAIL_IF_NULL(htud.mime_state); + htud.tsflags |= HTP_BOUNDARY_SET; + HtpRequestBodyHandleMultipart(&hstate, &htud, &tx, chunks_buffer, chunks_buffer_len, false); if (htud.request_body.content_len_so_far != 669) { printf("htud.request_body.content_len_so_far %"PRIu64": ", htud.request_body.content_len_so_far); diff --git a/src/app-layer-htp.h b/src/app-layer-htp.h index 6b376285434d..9c39ba393ffd 100644 --- a/src/app-layer-htp.h +++ b/src/app-layer-htp.h @@ -58,9 +58,6 @@ #define HTP_CONFIG_DEFAULT_RANDOMIZE 1 #define HTP_CONFIG_DEFAULT_RANDOMIZE_RANGE 10 -/** a boundary should be smaller in size */ -#define HTP_BOUNDARY_MAX 200U - // 0x0001 not used #define HTP_FLAG_STATE_CLOSED_TS 0x0002 /**< Flag to indicate that HTTP connection is closed */ @@ -212,8 +209,6 @@ typedef struct HtpTxUserData_ { uint8_t request_has_trailers; uint8_t response_has_trailers; - uint8_t boundary_len; - uint8_t tsflags; uint8_t tcflags; @@ -229,10 +224,7 @@ typedef struct HtpTxUserData_ { uint32_t request_headers_raw_len; uint32_t response_headers_raw_len; - /** Holds the boundary identification string if any (used on - * multipart/form-data only) - */ - uint8_t *boundary; + MimeStateHTTP *mime_state; HttpRangeContainerBlock *file_range; /**< used to assign track ids to range file */