suricata
util-decode-mime.h
Go to the documentation of this file.
1 /* Copyright (C) 2012 BAE Systems
2  * Copyright (C) 2021 Open Information Security Foundation
3  *
4  * You can copy, redistribute or modify this Program under the terms of
5  * the GNU General Public License version 2 as published by the Free
6  * Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11  * GNU General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public License
14  * version 2 along with this program; if not, write to the Free Software
15  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
16  * 02110-1301, USA.
17  */
18 
19 /**
20  * \file
21  *
22  * \author David Abarbanel <david.abarbanel@baesystems.com>
23  *
24  */
25 
26 #ifndef MIME_DECODE_H_
27 #define MIME_DECODE_H_
28 
29 #include "conf.h"
30 #include "util-base64.h"
31 #include "util-file.h"
32 
33 /* Content Flags */
34 #define CTNT_IS_MSG 1
35 #define CTNT_IS_ENV 2
36 #define CTNT_IS_ENCAP 4
37 #define CTNT_IS_BODYPART 8
38 #define CTNT_IS_MULTIPART 16
39 #define CTNT_IS_ATTACHMENT 32
40 #define CTNT_IS_BASE64 64
41 #define CTNT_IS_QP 128
42 #define CTNT_IS_TEXT 256
43 #define CTNT_IS_HTML 512
44 
45 /* URL Flags */
46 #define URL_IS_IP4 1
47 #define URL_IS_IP6 2
48 #define URL_IS_EXE 4
49 
50 /* Anomaly Flags */
51 #define ANOM_INVALID_BASE64 1 /* invalid base64 chars */
52 #define ANOM_INVALID_QP 2 /* invalid quoted-printable chars */
53 #define ANOM_LONG_HEADER_NAME 4 /* header is abnormally long */
54 #define ANOM_LONG_HEADER_VALUE 8 /* header value is abnormally long
55  * (includes multi-line) */
56 #define ANOM_LONG_LINE 16 /* Lines that exceed 998 octets */
57 #define ANOM_LONG_ENC_LINE 32 /* Lines that exceed 76 octets */
58 #define ANOM_MALFORMED_MSG 64 /* Misc msg format errors found */
59 #define ANOM_LONG_BOUNDARY 128 /* Boundary too long */
60 #define ANOM_LONG_FILENAME 256 /* filename truncated */
61 
62 /* Publicly exposed size constants */
63 #define DATA_CHUNK_SIZE 3072 /* Should be divisible by 3 */
64 
65 /* Mime Parser Constants */
66 #define HEADER_READY 0x01
67 #define HEADER_STARTED 0x02
68 #define HEADER_DONE 0x03
69 #define BODY_STARTED 0x04
70 #define BODY_DONE 0x05
71 #define BODY_END_BOUND 0x06
72 #define PARSE_DONE 0x07
73 #define PARSE_ERROR 0x08
74 
75 /**
76  * \brief Mime Decoder Error Codes
77  */
78 typedef enum MimeDecRetCode {
84  MIME_DEC_ERR_STATE = -4, /**< parser in error state */
87 
88 /**
89  * \brief Structure for containing configuration options
90  *
91  */
92 typedef struct MimeDecConfig {
93  bool decode_base64; /**< Decode base64 bodies */
94  bool decode_quoted_printable; /**< Decode quoted-printable bodies */
95  bool extract_urls; /**< Extract and store URLs in data structure */
96  ConfNode *extract_urls_schemes; /**< List of schemes of which to
97  extract urls */
98  bool log_url_scheme; /**< Log the scheme of extracted URLs */
99  bool body_md5; /**< Compute md5 sum of body */
100  uint32_t header_value_depth; /**< Depth of which to store header values
101  (Default is 2000) */
102 } MimeDecConfig;
103 
104 /**
105  * \brief This represents a header field name and associated value
106  */
107 typedef struct MimeDecField {
108  uint8_t *name; /**< Name of the header field */
109  uint32_t name_len; /**< Length of the name */
110  uint32_t value_len; /**< Length of the value */
111  uint8_t *value; /**< Value of the header field */
112  struct MimeDecField *next; /**< Pointer to next field */
113 } MimeDecField;
114 
115 /**
116  * \brief This represents a URL value node in a linked list
117  *
118  * Since HTML can sometimes contain a high number of URLs, this
119  * structure only features the URL host name/IP or those that are
120  * pointing to an executable file (see url_flags to determine which).
121  */
122 typedef struct MimeDecUrl {
123  uint8_t *url; /**< String representation of full or partial URL (lowercase) */
124  uint32_t url_len; /**< Length of the URL string */
125  uint32_t url_flags; /**< Flags indicating type of URL */
126  struct MimeDecUrl *next; /**< Pointer to next URL */
127 } MimeDecUrl;
128 
129 /**
130  * \brief This represents the MIME Entity (or also top level message) in a
131  * child-sibling tree
132  */
133 typedef struct MimeDecEntity {
134  MimeDecField *field_list; /**< Pointer to list of header fields */
135  MimeDecUrl *url_list; /**< Pointer to list of URLs */
136  uint32_t header_flags; /**< Flags indicating header characteristics */
137  uint32_t ctnt_flags; /**< Flags indicating type of content */
138  uint32_t anomaly_flags; /**< Flags indicating an anomaly in the message */
139  uint32_t filename_len; /**< Length of file attachment name */
140  uint8_t *filename; /**< Name of file attachment */
141  uint8_t *ctnt_type; /**< Quick access pointer to short-hand content type field */
142  uint32_t ctnt_type_len; /**< Length of content type field value */
143  uint32_t msg_id_len; /**< Quick access pointer to message Id */
144  uint8_t *msg_id; /**< Quick access pointer to message Id */
145  struct MimeDecEntity *next; /**< Pointer to list of sibling entities */
146  struct MimeDecEntity *child; /**< Pointer to list of child entities */
147  struct MimeDecEntity *last_child; /**< Pointer to tail of the list of child entities */
148 } MimeDecEntity;
149 
150 /**
151  * \brief Structure contains boundary and entity for the current node (entity)
152  * in the stack
153  *
154  */
155 typedef struct MimeDecStackNode {
156  MimeDecEntity *data; /**< Pointer to the entity data structure */
157  uint8_t *bdef; /**< Copy of boundary definition for child entity */
158  uint16_t bdef_len; /**< Boundary length for child entity */
159  bool is_encap; /**< Flag indicating entity is encapsulated in message */
160  struct MimeDecStackNode *next; /**< Pointer to next item on the stack */
162 
163 /**
164  * \brief Structure holds the top of the stack along with some free reusable nodes
165  *
166  */
167 typedef struct MimeDecStack {
168  MimeDecStackNode *top; /**< Pointer to the top of the stack */
169  MimeDecStackNode *free_nodes; /**< Pointer to the list of free nodes */
170  uint32_t free_nodes_cnt; /**< Count of free nodes in the list */
171 } MimeDecStack;
172 
173 /**
174  * \brief Structure contains a list of value and lengths for robust data processing
175  *
176  */
177 typedef struct DataValue {
178  uint8_t *value; /**< Copy of data value */
179  uint32_t value_len; /**< Length of data value */
180  struct DataValue *next; /**< Pointer to next value in the list */
181 } DataValue;
182 
183 /**
184  * \brief Structure contains the current state of the MIME parser
185  *
186  */
187 typedef struct MimeDecParseState {
188  MimeDecEntity *msg; /**< Pointer to the top-level message entity */
189  MimeDecStack *stack; /**< Pointer to the top of the entity stack */
190  uint8_t *hname; /**< Copy of the last known header name */
191  uint32_t hlen; /**< Length of the last known header name */
192  uint32_t hvlen; /**< Total length of value list */
193  DataValue *hvalue; /**< Pointer to the incomplete header value list */
194  uint8_t bvremain[B64_BLOCK]; /**< Remainder from base64-decoded line */
195  uint8_t bvr_len; /**< Length of remainder from base64-decoded line */
196  uint8_t data_chunk[DATA_CHUNK_SIZE]; /**< Buffer holding data chunk */
198  uint8_t md5[SC_MD5_LEN];
199  bool has_md5;
200  uint8_t state_flag; /**< Flag representing current state of parser */
201  uint32_t data_chunk_len; /**< Length of data chunk */
202  int found_child; /**< Flag indicating a child entity was found */
203  int body_begin; /**< Currently at beginning of body */
204  int body_end; /**< Currently at end of body */
205  uint8_t current_line_delimiter_len; /**< Length of line delimiter */
206  void *data; /**< Pointer to data specific to the caller */
207  int (*DataChunkProcessorFunc) (const uint8_t *chunk, uint32_t len,
208  struct MimeDecParseState *state); /**< Data chunk processing function callback */
210 
211 /* Config functions */
212 void MimeDecSetConfig(MimeDecConfig *config);
214 
215 /* Memory functions */
216 void MimeDecFreeEntity(MimeDecEntity *entity);
217 void MimeDecFreeField(MimeDecField *field);
218 void MimeDecFreeUrl(MimeDecUrl *url);
219 
220 /* List functions */
222 MimeDecField * MimeDecFindField(const MimeDecEntity *entity, const char *name);
223 int MimeDecFindFieldsForEach(const MimeDecEntity *entity, const char *name, int (*DataCallback)(const uint8_t *val, const size_t, void *data), void *data);
225 
226 /* Helper functions */
227 //MimeDecField * MimeDecFillField(MimeDecEntity *entity, const char *name,
228 // uint32_t nlen, const char *value, uint32_t vlen, int copy_name_value);
229 
230 /* Parser functions */
231 MimeDecParseState * MimeDecInitParser(void *data, int (*dcpfunc)(const uint8_t *chunk,
232  uint32_t len, MimeDecParseState *state));
235 int MimeDecParseLine(const uint8_t *line, const uint32_t len, const uint8_t delim_len, MimeDecParseState *state);
236 MimeDecEntity * MimeDecParseFullMsg(const uint8_t *buf, uint32_t blen, void *data,
237  int (*DataChunkProcessorFunc)(const uint8_t *chunk, uint32_t len, MimeDecParseState *state));
239 
240 /* Test functions */
241 void MimeDecRegisterTests(void);
242 
243 #endif
MIME_DEC_MORE
@ MIME_DEC_MORE
Definition: util-decode-mime.h:79
MimeDecAddEntity
MimeDecEntity * MimeDecAddEntity(MimeDecEntity *parent)
Creates and adds a child entity to the specified parent entity.
Definition: util-decode-mime.c:383
MimeDecParseState::data_chunk
uint8_t data_chunk[DATA_CHUNK_SIZE]
Definition: util-decode-mime.h:195
MimeDecEntity::ctnt_flags
uint32_t ctnt_flags
Definition: util-decode-mime.h:136
MimeDecEntity::child
struct MimeDecEntity * child
Definition: util-decode-mime.h:145
MimeDecInitParser
MimeDecParseState * MimeDecInitParser(void *data, int(*dcpfunc)(const uint8_t *chunk, uint32_t len, MimeDecParseState *state))
Init the parser by allocating memory for the state and top-level entity.
Definition: util-decode-mime.c:2404
len
uint8_t len
Definition: app-layer-dnp3.h:2
MIME_DEC_OK
@ MIME_DEC_OK
Definition: util-decode-mime.h:78
MIME_DEC_ERR_MEM
@ MIME_DEC_ERR_MEM
Definition: util-decode-mime.h:81
MimeDecFreeField
void MimeDecFreeField(MimeDecField *field)
Iteratively frees a header field entry list.
Definition: util-decode-mime.c:209
DataValue::next
struct DataValue * next
Definition: util-decode-mime.h:179
MimeDecEntity::ctnt_type_len
uint32_t ctnt_type_len
Definition: util-decode-mime.h:141
MimeDecParseState::md5_ctx
SCMd5 * md5_ctx
Definition: util-decode-mime.h:196
MimeDecConfig::header_value_depth
uint32_t header_value_depth
Definition: util-decode-mime.h:99
MimeDecField::value
uint8_t * value
Definition: util-decode-mime.h:110
MimeDecEntity::last_child
struct MimeDecEntity * last_child
Definition: util-decode-mime.h:146
MimeDecStack
Structure holds the top of the stack along with some free reusable nodes.
Definition: util-decode-mime.h:166
MimeDecParseState::data
void * data
Definition: util-decode-mime.h:205
MimeDecEntity::filename
uint8_t * filename
Definition: util-decode-mime.h:139
MimeDecParseState::bvremain
uint8_t bvremain[B64_BLOCK]
Definition: util-decode-mime.h:193
MimeDecStackNode::bdef_len
uint16_t bdef_len
Definition: util-decode-mime.h:157
MimeDecParseComplete
int MimeDecParseComplete(MimeDecParseState *state)
Called to indicate that the last message line has been processed and the parsing operation is complet...
Definition: util-decode-mime.c:2490
MimeDecParseState::current_line_delimiter_len
uint8_t current_line_delimiter_len
Definition: util-decode-mime.h:204
MimeDecEntity::field_list
MimeDecField * field_list
Definition: util-decode-mime.h:133
MimeDecStack::top
MimeDecStackNode * top
Definition: util-decode-mime.h:167
MimeDecEntity::filename_len
uint32_t filename_len
Definition: util-decode-mime.h:138
DataValue::value_len
uint32_t value_len
Definition: util-decode-mime.h:178
util-base64.h
MimeDecRetCode
MimeDecRetCode
Mime Decoder Error Codes.
Definition: util-decode-mime.h:77
MimeDecParseState::found_child
int found_child
Definition: util-decode-mime.h:201
MimeDecUrl::next
struct MimeDecUrl * next
Definition: util-decode-mime.h:125
MimeDecField
This represents a header field name and associated value.
Definition: util-decode-mime.h:106
MimeDecParseState::hvlen
uint32_t hvlen
Definition: util-decode-mime.h:191
MimeDecEntity::url_list
MimeDecUrl * url_list
Definition: util-decode-mime.h:134
MIME_DEC_ERR_OVERFLOW
@ MIME_DEC_ERR_OVERFLOW
Definition: util-decode-mime.h:84
MimeDecAddField
MimeDecField * MimeDecAddField(MimeDecEntity *entity)
Creates and adds a header field entry to an entity.
Definition: util-decode-mime.c:267
MimeDecUrl::url_len
uint32_t url_len
Definition: util-decode-mime.h:123
MimeDecConfig
struct MimeDecConfig MimeDecConfig
Structure for containing configuration options.
MimeDecField::name
uint8_t * name
Definition: util-decode-mime.h:107
MimeDecParseState::body_begin
int body_begin
Definition: util-decode-mime.h:202
MimeDecParseState
struct MimeDecParseState MimeDecParseState
Structure contains the current state of the MIME parser.
MimeDecStackNode::bdef
uint8_t * bdef
Definition: util-decode-mime.h:156
MimeDecConfig::extract_urls_schemes
ConfNode * extract_urls_schemes
Definition: util-decode-mime.h:95
MimeDecEntity::ctnt_type
uint8_t * ctnt_type
Definition: util-decode-mime.h:140
MimeDecFindFieldsForEach
int MimeDecFindFieldsForEach(const MimeDecEntity *entity, const char *name, int(*DataCallback)(const uint8_t *val, const size_t, void *data), void *data)
Searches for header fields with the specified name.
Definition: util-decode-mime.c:297
MimeDecParseState::stack
MimeDecStack * stack
Definition: util-decode-mime.h:188
DataValue
struct DataValue DataValue
Structure contains a list of value and lengths for robust data processing.
MIME_DEC_ERR_PARSE
@ MIME_DEC_ERR_PARSE
Definition: util-decode-mime.h:82
MimeDecSetConfig
void MimeDecSetConfig(MimeDecConfig *config)
Set global config policy.
Definition: util-decode-mime.c:127
MimeDecParseState::hvalue
DataValue * hvalue
Definition: util-decode-mime.h:192
MimeDecStackNode::is_encap
bool is_encap
Definition: util-decode-mime.h:158
MimeDecField::next
struct MimeDecField * next
Definition: util-decode-mime.h:111
conf.h
MimeDecStackNode
Structure contains boundary and entity for the current node (entity) in the stack.
Definition: util-decode-mime.h:154
MimeDecRegisterTests
void MimeDecRegisterTests(void)
Definition: util-decode-mime.c:3562
MimeDecConfig::decode_base64
bool decode_base64
Definition: util-decode-mime.h:92
MimeDecParseState::hlen
uint32_t hlen
Definition: util-decode-mime.h:190
MimeDecParseState::data_chunk_len
uint32_t data_chunk_len
Definition: util-decode-mime.h:200
MimeDecParseState::hname
uint8_t * hname
Definition: util-decode-mime.h:189
MimeDecStackNode::next
struct MimeDecStackNode * next
Definition: util-decode-mime.h:159
MimeDecUrl::url_flags
uint32_t url_flags
Definition: util-decode-mime.h:124
MimeDecConfig::log_url_scheme
bool log_url_scheme
Definition: util-decode-mime.h:97
MimeDecConfig
Structure for containing configuration options.
Definition: util-decode-mime.h:91
MimeDecParseStateGetStatus
const char * MimeDecParseStateGetStatus(MimeDecParseState *state)
Definition: util-decode-mime.c:2319
MimeDecEntity::msg_id
uint8_t * msg_id
Definition: util-decode-mime.h:143
MimeDecStackNode
struct MimeDecStackNode MimeDecStackNode
Structure contains boundary and entity for the current node (entity) in the stack.
MimeDecParseState::msg
MimeDecEntity * msg
Definition: util-decode-mime.h:187
util-file.h
MimeDecFreeUrl
void MimeDecFreeUrl(MimeDecUrl *url)
Iteratively frees a URL entry list.
Definition: util-decode-mime.c:238
MimeDecField::value_len
uint32_t value_len
Definition: util-decode-mime.h:109
MimeDecParseState::has_md5
bool has_md5
Definition: util-decode-mime.h:198
MimeDecParseFullMsg
MimeDecEntity * MimeDecParseFullMsg(const uint8_t *buf, uint32_t blen, void *data, int(*DataChunkProcessorFunc)(const uint8_t *chunk, uint32_t len, MimeDecParseState *state))
Parses an entire message when available in its entirety (wraps the line-based parsing functions)
Definition: util-decode-mime.c:2592
MimeDecEntity::anomaly_flags
uint32_t anomaly_flags
Definition: util-decode-mime.h:137
MimeDecField
struct MimeDecField MimeDecField
This represents a header field name and associated value.
MIME_DEC_ERR_STATE
@ MIME_DEC_ERR_STATE
Definition: util-decode-mime.h:83
MimeDecConfig::decode_quoted_printable
bool decode_quoted_printable
Definition: util-decode-mime.h:93
MimeDecEntity
struct MimeDecEntity MimeDecEntity
This represents the MIME Entity (or also top level message) in a child-sibling tree.
MimeDecConfig::extract_urls
bool extract_urls
Definition: util-decode-mime.h:94
MimeDecParseState::body_end
int body_end
Definition: util-decode-mime.h:203
MimeDecParseState
Structure contains the current state of the MIME parser.
Definition: util-decode-mime.h:186
MimeDecGetConfig
MimeDecConfig * MimeDecGetConfig(void)
Get global config policy.
Definition: util-decode-mime.c:146
MimeDecDeInitParser
void MimeDecDeInitParser(MimeDecParseState *state)
De-Init parser by freeing up any residual memory.
Definition: util-decode-mime.c:2454
DATA_CHUNK_SIZE
#define DATA_CHUNK_SIZE
Definition: util-decode-mime.h:62
ConfNode_
Definition: conf.h:32
MimeDecUrl
This represents a URL value node in a linked list.
Definition: util-decode-mime.h:121
MimeDecStack::free_nodes_cnt
uint32_t free_nodes_cnt
Definition: util-decode-mime.h:169
MimeDecFindField
MimeDecField * MimeDecFindField(const MimeDecEntity *entity, const char *name)
Searches for a header field with the specified name.
Definition: util-decode-mime.c:325
MimeDecParseState::bvr_len
uint8_t bvr_len
Definition: util-decode-mime.h:194
DataValue
Structure contains a list of value and lengths for robust data processing.
Definition: util-decode-mime.h:176
B64_BLOCK
#define B64_BLOCK
Definition: util-base64.h:32
MimeDecEntity::msg_id_len
uint32_t msg_id_len
Definition: util-decode-mime.h:142
MimeDecEntity::next
struct MimeDecEntity * next
Definition: util-decode-mime.h:144
MimeDecField::name_len
uint32_t name_len
Definition: util-decode-mime.h:108
SC_MD5_LEN
#define SC_MD5_LEN
Definition: util-file.h:43
MimeDecStack::free_nodes
MimeDecStackNode * free_nodes
Definition: util-decode-mime.h:168
MimeDecStack
struct MimeDecStack MimeDecStack
Structure holds the top of the stack along with some free reusable nodes.
MimeDecEntity::header_flags
uint32_t header_flags
Definition: util-decode-mime.h:135
MimeDecFreeEntity
void MimeDecFreeEntity(MimeDecEntity *entity)
Frees a mime entity tree.
Definition: util-decode-mime.c:176
MimeDecEntity
This represents the MIME Entity (or also top level message) in a child-sibling tree.
Definition: util-decode-mime.h:132
MimeDecParseState::state_flag
uint8_t state_flag
Definition: util-decode-mime.h:199
MimeDecConfig::body_md5
bool body_md5
Definition: util-decode-mime.h:98
SCMd5
struct SCMd5 SCMd5
Definition: util-file.h:42
MIME_DEC_ERR_DATA
@ MIME_DEC_ERR_DATA
Definition: util-decode-mime.h:80
MimeDecStackNode::data
MimeDecEntity * data
Definition: util-decode-mime.h:155
MimeDecParseState::md5
uint8_t md5[SC_MD5_LEN]
Definition: util-decode-mime.h:197
MimeDecParseState::DataChunkProcessorFunc
int(* DataChunkProcessorFunc)(const uint8_t *chunk, uint32_t len, struct MimeDecParseState *state)
Definition: util-decode-mime.h:206
MimeDecParseLine
int MimeDecParseLine(const uint8_t *line, const uint32_t len, const uint8_t delim_len, MimeDecParseState *state)
Parse a line of a MIME message and update the parser state.
Definition: util-decode-mime.c:2558
MimeDecUrl::url
uint8_t * url
Definition: util-decode-mime.h:122
MimeDecUrl
struct MimeDecUrl MimeDecUrl
This represents a URL value node in a linked list.
DataValue::value
uint8_t * value
Definition: util-decode-mime.h:177