suricata
util-decode-mime.h
Go to the documentation of this file.
1 /* Copyright (C) 2012 BAE Systems
2  *
3  * You can copy, redistribute or modify this Program under the terms of
4  * the GNU General Public License version 2 as published by the Free
5  * Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * version 2 along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15  * 02110-1301, USA.
16  */
17 
18 /**
19  * \file
20  *
21  * \author David Abarbanel <david.abarbanel@baesystems.com>
22  *
23  */
24 
25 #ifndef MIME_DECODE_H_
26 #define MIME_DECODE_H_
27 
28 #include <stdio.h>
29 #include <stdlib.h>
30 #include <stdint.h>
31 
32 #include "suricata.h"
33 #include "util-base64.h"
34 #include "util-debug.h"
35 
36 /* Content Flags */
37 #define CTNT_IS_MSG 1
38 #define CTNT_IS_ENV 2
39 #define CTNT_IS_ENCAP 4
40 #define CTNT_IS_BODYPART 8
41 #define CTNT_IS_MULTIPART 16
42 #define CTNT_IS_ATTACHMENT 32
43 #define CTNT_IS_BASE64 64
44 #define CTNT_IS_QP 128
45 #define CTNT_IS_TEXT 256
46 #define CTNT_IS_HTML 512
47 
48 /* URL Flags */
49 #define URL_IS_IP4 1
50 #define URL_IS_IP6 2
51 #define URL_IS_EXE 4
52 
53 /* Anomaly Flags */
54 #define ANOM_INVALID_BASE64 1 /* invalid base64 chars */
55 #define ANOM_INVALID_QP 2 /* invalid qouted-printable chars */
56 #define ANOM_LONG_HEADER_NAME 4 /* header is abnormally long */
57 #define ANOM_LONG_HEADER_VALUE 8 /* header value is abnormally long
58  * (includes multi-line) */
59 #define ANOM_LONG_LINE 16 /* Lines that exceed 998 octets */
60 #define ANOM_LONG_ENC_LINE 32 /* Lines that exceed 76 octets */
61 #define ANOM_MALFORMED_MSG 64 /* Misc msg format errors found */
62 #define ANOM_LONG_BOUNDARY 128 /* Boundary too long */
63 
64 /* Publicly exposed size constants */
65 #define DATA_CHUNK_SIZE 3072 /* Should be divisible by 3 */
66 #define LINEREM_SIZE 256
67 
68 /* Mime Parser Constants */
69 #define HEADER_READY 0x01
70 #define HEADER_STARTED 0x02
71 #define HEADER_DONE 0x03
72 #define BODY_STARTED 0x04
73 #define BODY_DONE 0x05
74 #define BODY_END_BOUND 0x06
75 #define PARSE_DONE 0x07
76 #define PARSE_ERROR 0x08
77 
78 /**
79  * \brief Mime Decoder Error Codes
80  */
81 typedef enum MimeDecRetCode {
87  MIME_DEC_ERR_STATE = -4, /**< parser in error state */
89 
90 /**
91  * \brief Structure for containing configuration options
92  *
93  */
94 typedef struct MimeDecConfig {
95  int decode_base64; /**< Decode base64 bodies */
96  int decode_quoted_printable; /**< Decode quoted-printable bodies */
97  int extract_urls; /**< Extract and store URLs in data structure */
98  int body_md5; /**< Compute md5 sum of body */
99  uint32_t header_value_depth; /**< Depth of which to store header values
100  (Default is 2000) */
101 } MimeDecConfig;
102 
103 /**
104  * \brief This represents a header field name and associated value
105  */
106 typedef struct MimeDecField {
107  uint8_t *name; /**< Name of the header field */
108  uint32_t name_len; /**< Length of the name */
109  uint32_t value_len; /**< Length of the value */
110  uint8_t *value; /**< Value of the header field */
111  struct MimeDecField *next; /**< Pointer to next field */
112 } MimeDecField;
113 
114 /**
115  * \brief This represents a URL value node in a linked list
116  *
117  * Since HTML can sometimes contain a high number of URLs, this
118  * structure only features the URL host name/IP or those that are
119  * pointing to an executable file (see url_flags to determine which).
120  */
121 typedef struct MimeDecUrl {
122  uint8_t *url; /**< String representation of full or partial URL (lowercase) */
123  uint32_t url_len; /**< Length of the URL string */
124  uint32_t url_flags; /**< Flags indicating type of URL */
125  struct MimeDecUrl *next; /**< Pointer to next URL */
126 } MimeDecUrl;
127 
128 /**
129  * \brief This represents the MIME Entity (or also top level message) in a
130  * child-sibling tree
131  */
132 typedef struct MimeDecEntity {
133  MimeDecField *field_list; /**< Pointer to list of header fields */
134  MimeDecUrl *url_list; /**< Pointer to list of URLs */
135  uint32_t body_len; /**< Length of body (prior to any decoding) */
136  uint32_t decoded_body_len; /**< Length of body after decoding */
137  uint32_t header_flags; /**< Flags indicating header characteristics */
138  uint32_t ctnt_flags; /**< Flags indicating type of content */
139  uint32_t anomaly_flags; /**< Flags indicating an anomaly in the message */
140  uint32_t filename_len; /**< Length of file attachment name */
141  uint8_t *filename; /**< Name of file attachment */
142  uint8_t *ctnt_type; /**< Quick access pointer to short-hand content type field */
143  uint32_t ctnt_type_len; /**< Length of content type field value */
144  uint32_t msg_id_len; /**< Quick access pointer to message Id */
145  uint8_t *msg_id; /**< Quick access pointer to message Id */
146  struct MimeDecEntity *next; /**< Pointer to list of sibling entities */
147  struct MimeDecEntity *child; /**< Pointer to list of child entities */
148 } MimeDecEntity;
149 
150 /**
151  * \brief Structure contains boundary and entity for the current node (entity)
152  * in the stack
153  *
154  */
155 typedef struct MimeDecStackNode {
156  MimeDecEntity *data; /**< Pointer to the entity data structure */
157  uint8_t *bdef; /**< Copy of boundary definition for child entity */
158  uint32_t bdef_len; /**< Boundary length for child entity */
159  int is_encap; /**< Flag indicating entity is encapsulated in message */
160  struct MimeDecStackNode *next; /**< Pointer to next item on the stack */
162 
163 /**
164  * \brief Structure holds the top of the stack along with some free reusable nodes
165  *
166  */
167 typedef struct MimeDecStack {
168  MimeDecStackNode *top; /**< Pointer to the top of the stack */
169  MimeDecStackNode *free_nodes; /**< Pointer to the list of free nodes */
170  uint32_t free_nodes_cnt; /**< Count of free nodes in the list */
171 } MimeDecStack;
172 
173 /**
174  * \brief Structure contains a list of value and lengths for robust data processing
175  *
176  */
177 typedef struct DataValue {
178  uint8_t *value; /**< Copy of data value */
179  uint32_t value_len; /**< Length of data value */
180  struct DataValue *next; /**< Pointer to next value in the list */
181 } DataValue;
182 
183 /**
184  * \brief Structure contains the current state of the MIME parser
185  *
186  */
187 typedef struct MimeDecParseState {
188  MimeDecEntity *msg; /**< Pointer to the top-level message entity */
189  MimeDecStack *stack; /**< Pointer to the top of the entity stack */
190  uint8_t *hname; /**< Copy of the last known header name */
191  uint32_t hlen; /**< Length of the last known header name */
192  uint32_t hvlen; /**< Total length of value list */
193  DataValue *hvalue; /**< Pointer to the incomplete header value list */
194  uint8_t linerem[LINEREM_SIZE]; /**< Remainder from previous line (for URL extraction) */
195  uint16_t linerem_len; /**< Length of remainder from previous line */
196  uint8_t bvremain[B64_BLOCK]; /**< Remainder from base64-decoded line */
197  uint8_t bvr_len; /**< Length of remainder from base64-decoded line */
198  uint8_t data_chunk[DATA_CHUNK_SIZE]; /**< Buffer holding data chunk */
199 #ifdef HAVE_NSS
200  HASHContext *md5_ctx;
201  uint8_t md5[MD5_LENGTH];
202 #endif
203  uint8_t state_flag; /**< Flag representing current state of parser */
204  uint32_t data_chunk_len; /**< Length of data chunk */
205  int found_child; /**< Flag indicating a child entity was found */
206  int body_begin; /**< Currently at beginning of body */
207  int body_end; /**< Currently at end of body */
208  uint8_t current_line_delimiter_len; /**< Length of line delimiter */
209  void *data; /**< Pointer to data specific to the caller */
210  int (*DataChunkProcessorFunc) (const uint8_t *chunk, uint32_t len,
211  struct MimeDecParseState *state); /**< Data chunk processing function callback */
213 
214 /* Config functions */
215 void MimeDecSetConfig(MimeDecConfig *config);
217 
218 /* Memory functions */
219 void MimeDecFreeEntity(MimeDecEntity *entity);
220 void MimeDecFreeField(MimeDecField *field);
221 void MimeDecFreeUrl(MimeDecUrl *url);
222 
223 /* List functions */
225 MimeDecField * MimeDecFindField(const MimeDecEntity *entity, const char *name);
226 int MimeDecFindFieldsForEach(const MimeDecEntity *entity, const char *name, int (*DataCallback)(const uint8_t *val, const size_t, void *data), void *data);
228 
229 /* Helper functions */
230 //MimeDecField * MimeDecFillField(MimeDecEntity *entity, const char *name,
231 // uint32_t nlen, const char *value, uint32_t vlen, int copy_name_value);
232 
233 /* Parser functions */
234 MimeDecParseState * MimeDecInitParser(void *data, int (*dcpfunc)(const uint8_t *chunk,
235  uint32_t len, MimeDecParseState *state));
238 int MimeDecParseLine(const uint8_t *line, const uint32_t len, const uint8_t delim_len, MimeDecParseState *state);
239 MimeDecEntity * MimeDecParseFullMsg(const uint8_t *buf, uint32_t blen, void *data,
240  int (*DataChunkProcessorFunc)(const uint8_t *chunk, uint32_t len, MimeDecParseState *state));
242 
243 #ifdef AFLFUZZ_MIME
244 int MimeParserDataFromFile(char *filename);
245 #endif
246 
247 /* Test functions */
248 void MimeDecRegisterTests(void);
249 
250 #endif
uint32_t value_len
struct MimeDecField MimeDecField
This represents a header field name and associated value.
struct MimeDecUrl * next
MimeDecField * field_list
Structure for containing configuration options.
int MimeDecFindFieldsForEach(const MimeDecEntity *entity, const char *name, int(*DataCallback)(const uint8_t *val, const size_t, void *data), void *data)
Searches for header fields with the specified name.
struct DataValue DataValue
Structure contains a list of value and lengths for robust data processing.
struct MimeDecField * next
#define DATA_CHUNK_SIZE
uint8_t * value
uint32_t decoded_body_len
void MimeDecFreeUrl(MimeDecUrl *url)
Iteratively frees a URL entry list.
const char * MimeDecParseStateGetStatus(MimeDecParseState *state)
uint8_t * url
uint8_t current_line_delimiter_len
MimeDecStack * stack
void MimeDecFreeEntity(MimeDecEntity *entity)
Frees a mime entity tree.
struct MimeDecUrl MimeDecUrl
This represents a URL value node in a linked list.
struct MimeDecStackNode MimeDecStackNode
Structure contains boundary and entity for the current node (entity) in the stack.
uint32_t url_len
uint32_t ctnt_type_len
MimeDecField * MimeDecFindField(const MimeDecEntity *entity, const char *name)
Searches for a header field with the specified name.
void MimeDecDeInitParser(MimeDecParseState *state)
De-Init parser by freeing up any residual memory.
uint8_t * ctnt_type
MimeDecConfig * MimeDecGetConfig(void)
Get global config policy.
MimeDecEntity * MimeDecParseFullMsg(const uint8_t *buf, uint32_t blen, void *data, int(*DataChunkProcessorFunc)(const uint8_t *chunk, uint32_t len, MimeDecParseState *state))
Parses an entire message when available in its entirety (wraps the line-based parsing functions) ...
This represents the MIME Entity (or also top level message) in a child-sibling tree.
Structure contains a list of value and lengths for robust data processing.
uint32_t value_len
MimeDecField * MimeDecAddField(MimeDecEntity *entity)
Creates and adds a header field entry to an entity.
void MimeDecSetConfig(MimeDecConfig *config)
Set global config policy.
struct MimeDecEntity * child
struct MimeDecStackNode * next
uint32_t free_nodes_cnt
void MimeDecRegisterTests(void)
int MimeDecParseLine(const uint8_t *line, const uint32_t len, const uint8_t delim_len, MimeDecParseState *state)
Parse a line of a MIME message and update the parser state.
uint32_t url_flags
MimeDecUrl * url_list
MimeDecEntity * MimeDecAddEntity(MimeDecEntity *parent)
Creates and adds a child entity to the specified parent entity.
void MimeDecFreeField(MimeDecField *field)
Iteratively frees a header field entry list.
MimeDecStackNode * free_nodes
Structure contains boundary and entity for the current node (entity) in the stack.
MimeDecRetCode
Mime Decoder Error Codes.
uint32_t header_flags
int MimeDecParseComplete(MimeDecParseState *state)
Called to indicate that the last message line has been processed and the parsing operation is complet...
struct MimeDecConfig MimeDecConfig
Structure for containing configuration options.
struct MimeDecEntity MimeDecEntity
This represents the MIME Entity (or also top level message) in a child-sibling tree.
MimeDecStackNode * top
#define LINEREM_SIZE
MimeDecEntity * msg
struct MimeDecParseState MimeDecParseState
Structure contains the current state of the MIME parser.
struct DataValue * next
uint32_t anomaly_flags
uint32_t header_value_depth
Structure holds the top of the stack along with some free reusable nodes.
MimeDecParseState * MimeDecInitParser(void *data, int(*dcpfunc)(const uint8_t *chunk, uint32_t len, MimeDecParseState *state))
Init the parser by allocating memory for the state and top-level entity.
struct MimeDecStack MimeDecStack
Structure holds the top of the stack along with some free reusable nodes.
Structure contains the current state of the MIME parser.
uint8_t * filename
uint8_t len
uint32_t filename_len
int(* DataChunkProcessorFunc)(const uint8_t *chunk, uint32_t len, struct MimeDecParseState *state)
This represents a header field name and associated value.
This represents a URL value node in a linked list.
#define B64_BLOCK
Definition: util-base64.h:49
struct MimeDecEntity * next
MimeDecEntity * data