suricata
util-mpm-hs-cache.c
Go to the documentation of this file.
1 /* Copyright (C) 2007-2024 Open Information Security Foundation
2  *
3  * You can copy, redistribute or modify this Program under the terms of
4  * the GNU General Public License version 2 as published by the Free
5  * Software Foundation.
6  *
7  * This program is distributed in the hope that it will be useful,
8  * but WITHOUT ANY WARRANTY; without even the implied warranty of
9  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10  * GNU General Public License for more details.
11  *
12  * You should have received a copy of the GNU General Public License
13  * version 2 along with this program; if not, write to the Free Software
14  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
15  * 02110-1301, USA.
16  */
17 
18 /**
19  * \file
20  *
21  * \author Lukas Sismis <lsismis@oisf.net>
22  *
23  * Hyperscan cache helper utilities for MPM cache files.
24  */
25 
26 #include "suricata-common.h"
27 #include "suricata.h"
28 #include "detect-engine.h"
29 #include "util-debug.h"
30 #include "util-hash-lookup3.h"
31 #include "util-mpm-hs-core.h"
32 #include "util-mpm-hs-cache.h"
33 #include "util-path.h"
34 
35 #ifdef BUILD_HYPERSCAN
36 
37 #include "rust.h"
38 #include <hs.h>
39 
40 #define HS_CACHE_FILE_VERSION "2"
41 #define HS_CACHE_FILE_SUFFIX "_v" HS_CACHE_FILE_VERSION ".hs"
42 
43 static int16_t HSCacheConstructFPath(
44  const char *dir_path, const char *db_hash, char *out_path, uint16_t out_path_size)
45 {
46  char filename[NAME_MAX];
47  uint64_t r = snprintf(filename, sizeof(filename), "%s" HS_CACHE_FILE_SUFFIX, db_hash);
48  if (r != (uint64_t)(strlen(db_hash) + strlen(HS_CACHE_FILE_SUFFIX)))
49  return -1;
50 
51  r = PathMerge(out_path, out_path_size, dir_path, filename);
52  if (r)
53  return -1;
54 
55  return 0;
56 }
57 
58 static char *HSReadStream(const char *file_path, size_t *buffer_sz)
59 {
60  FILE *file = fopen(file_path, "rb");
61  if (!file) {
62  SCLogDebug("Failed to open file %s: %s", file_path, strerror(errno));
63  return NULL;
64  }
65 
66  // Seek to the end of the file to determine its size
67  fseek(file, 0, SEEK_END);
68  long file_sz = ftell(file);
69  if (file_sz < 0) {
70  SCLogDebug("Failed to determine file size of %s: %s", file_path, strerror(errno));
71  fclose(file);
72  return NULL;
73  }
74 
75  char *buffer = (char *)SCCalloc(file_sz, sizeof(char));
76  if (!buffer) {
77  SCLogWarning("Failed to allocate memory");
78  fclose(file);
79  return NULL;
80  }
81 
82  // Rewind file pointer and read the file into the buffer
83  errno = 0;
84  rewind(file);
85  if (errno != 0) {
86  SCLogDebug("Failed to rewind file %s: %s", file_path, strerror(errno));
87  SCFree(buffer);
88  fclose(file);
89  return NULL;
90  }
91  size_t bytes_read = fread(buffer, 1, file_sz, file);
92  if (bytes_read != (size_t)file_sz) {
93  SCLogDebug("Failed to read the entire file %s: %s", file_path, strerror(errno));
94  SCFree(buffer);
95  fclose(file);
96  return NULL;
97  }
98 
99  *buffer_sz = file_sz;
100  fclose(file);
101  return buffer;
102 }
103 
104 /**
105  * Function to hash the searched pattern, only things relevant to Hyperscan
106  * compilation are hashed.
107  */
108 static void SCHSCachePatternHash(const SCHSPattern *p, SCSha256 *sha256)
109 {
110  BUG_ON(p->original_pat == NULL);
111  BUG_ON(p->sids == NULL);
112 
113  SCSha256Update(sha256, (const uint8_t *)&p->len, sizeof(p->len));
114  SCSha256Update(sha256, (const uint8_t *)&p->flags, sizeof(p->flags));
115  SCSha256Update(sha256, (const uint8_t *)p->original_pat, p->len);
116  SCSha256Update(sha256, (const uint8_t *)&p->id, sizeof(p->id));
117  SCSha256Update(sha256, (const uint8_t *)&p->offset, sizeof(p->offset));
118  SCSha256Update(sha256, (const uint8_t *)&p->depth, sizeof(p->depth));
119  SCSha256Update(sha256, (const uint8_t *)&p->sids_size, sizeof(p->sids_size));
120  SCSha256Update(sha256, (const uint8_t *)p->sids, p->sids_size * sizeof(SigIntId));
121 }
122 
123 int HSLoadCache(hs_database_t **hs_db, const char *hs_db_hash, const char *dirpath)
124 {
125  char hash_file_static[PATH_MAX];
126  int ret = (int)HSCacheConstructFPath(
127  dirpath, hs_db_hash, hash_file_static, sizeof(hash_file_static));
128 
129  if (ret != 0)
130  return -1;
131 
132  SCLogDebug("Loading the cached HS DB from %s", hash_file_static);
133  if (!SCPathExists(hash_file_static))
134  return -1;
135 
136  FILE *db_cache = fopen(hash_file_static, "r");
137  char *buffer = NULL;
138  if (db_cache) {
139  size_t buffer_size;
140  buffer = HSReadStream(hash_file_static, &buffer_size);
141  if (!buffer) {
142  SCLogWarning("Hyperscan cached DB file %s cannot be read", hash_file_static);
143  ret = -1;
144  goto freeup;
145  }
146 
147  hs_error_t error = hs_deserialize_database(buffer, buffer_size, hs_db);
148  if (error != HS_SUCCESS) {
149  SCLogWarning("Failed to deserialize Hyperscan database of %s: %s", hash_file_static,
150  HSErrorToStr(error));
151  ret = -1;
152  goto freeup;
153  }
154 
155  ret = 0;
156  /* Touch file to update modification time so active caches are retained. */
157  if (SCTouchFile(hash_file_static) != 0) {
158  SCLogDebug("Failed to update mtime for %s", hash_file_static);
159  }
160  goto freeup;
161  }
162 
163 freeup:
164  if (db_cache)
165  fclose(db_cache);
166  if (buffer)
167  SCFree(buffer);
168  return ret;
169 }
170 
171 static int HSSaveCache(hs_database_t *hs_db, const char *hs_db_hash, const char *dstpath)
172 {
173  static bool notified = false;
174  char *db_stream = NULL;
175  size_t db_size;
176  int ret;
177 
178  hs_error_t err = hs_serialize_database(hs_db, &db_stream, &db_size);
179  if (err != HS_SUCCESS) {
180  SCLogWarning("Failed to serialize Hyperscan database: %s", HSErrorToStr(err));
181  ret = -1;
182  goto cleanup;
183  }
184 
185  char hash_file_static[PATH_MAX];
186  ret = (int)HSCacheConstructFPath(
187  dstpath, hs_db_hash, hash_file_static, sizeof(hash_file_static));
188  if (ret != 0)
189  goto cleanup;
190  SCLogDebug("Caching the compiled HS at %s", hash_file_static);
191  if (SCPathExists(hash_file_static)) {
192  // potentially signs that it might not work as expected as we got into
193  // hash collision. If this happens with older and not used caches it is
194  // fine.
195  // It is problematic when one ruleset yields two colliding MPM groups.
196  SCLogWarning("Overwriting cache file %s. If the problem persists consider switching off "
197  "the caching",
198  hash_file_static);
199  }
200 
201  FILE *db_cache_out = fopen(hash_file_static, "w");
202  if (!db_cache_out) {
203  if (!notified) {
204  SCLogWarning("Failed to create Hyperscan cache file, make sure the folder exist and is "
205  "writable or adjust sgh-mpm-caching-path setting (%s)",
206  hash_file_static);
207  notified = true;
208  }
209  ret = -1;
210  goto cleanup;
211  }
212  size_t r = fwrite(db_stream, sizeof(db_stream[0]), db_size, db_cache_out);
213  if (r > 0 && (size_t)r != db_size) {
214  SCLogWarning("Failed to write to file: %s", hash_file_static);
215  if (r != db_size) {
216  // possibly a corrupted DB cache was created
217  r = remove(hash_file_static);
218  if (r != 0) {
219  SCLogWarning("Failed to remove corrupted cache file: %s", hash_file_static);
220  }
221  }
222  }
223  ret = fclose(db_cache_out);
224  if (ret != 0) {
225  SCLogWarning("Failed to close file: %s", hash_file_static);
226  goto cleanup;
227  }
228 
229 cleanup:
230  if (db_stream)
231  SCFree(db_stream);
232  return ret;
233 }
234 
235 int HSHashDb(const PatternDatabase *pd, char *hash, size_t hash_len)
236 {
237  SCSha256 *hasher = SCSha256New();
238  if (hasher == NULL) {
239  SCLogDebug("sha256 hashing failed");
240  return -1;
241  }
242  SCSha256Update(hasher, (const uint8_t *)&pd->pattern_cnt, sizeof(pd->pattern_cnt));
243  for (uint32_t i = 0; i < pd->pattern_cnt; i++) {
244  SCHSCachePatternHash(pd->parray[i], hasher);
245  }
246 
247  if (!SCSha256FinalizeToHex(hasher, hash, hash_len)) {
248  hasher = NULL;
249  SCLogDebug("sha256 hashing failed");
250  return -1;
251  }
252 
253  hasher = NULL;
254  return 0;
255 }
256 
257 void HSSaveCacheIterator(void *data, void *aux)
258 {
259  PatternDatabase *pd = (PatternDatabase *)data;
260  struct HsIteratorData *iter_data = (struct HsIteratorData *)aux;
261  if (pd->no_cache)
262  return;
263 
264  // count only cacheable DBs
265  iter_data->pd_stats->hs_cacheable_dbs_cnt++;
266  if (pd->cached) {
267  iter_data->pd_stats->hs_dbs_cache_loaded_cnt++;
268  return;
269  }
270 
271  char hs_db_hash[SC_SHA256_LEN * 2 + 1]; // * 2 for hex +1 for nul terminator
272  if (HSHashDb(pd, hs_db_hash, ARRAY_SIZE(hs_db_hash)) != 0) {
273  return;
274  }
275  if (HSSaveCache(pd->hs_db, hs_db_hash, iter_data->cache_path) == 0) {
276  pd->cached = true; // for rule reloads
277  iter_data->pd_stats->hs_dbs_cache_saved_cnt++;
278  }
279 }
280 
281 void HSCacheFilenameUsedIterator(void *data, void *aux)
282 {
283  PatternDatabase *pd = (PatternDatabase *)data;
284  struct HsInUseCacheFilesIteratorData *iter_data = (struct HsInUseCacheFilesIteratorData *)aux;
285  if (pd->no_cache || !pd->cached)
286  return;
287 
288  char hs_db_hash[SC_SHA256_LEN * 2 + 1]; // * 2 for hex +1 for nul terminator
289  if (HSHashDb(pd, hs_db_hash, ARRAY_SIZE(hs_db_hash)) != 0) {
290  return;
291  }
292 
293  char *fpath = SCCalloc(PATH_MAX, sizeof(char));
294  if (fpath == NULL) {
295  SCLogWarning("Failed to allocate memory for cache file path");
296  return;
297  }
298  if (HSCacheConstructFPath(iter_data->cache_path, hs_db_hash, fpath, PATH_MAX)) {
299  SCFree(fpath);
300  return;
301  }
302 
303  int r = HashTableAdd(iter_data->tbl, (void *)fpath, (uint16_t)strlen(fpath));
304  if (r < 0) {
305  SCLogWarning("Failed to add used cache file path %s to hash table", fpath);
306  SCFree(fpath);
307  }
308 }
309 
310 /**
311  * \brief Check if HS cache file is stale by age.
312  *
313  * \param mtime File modification time.
314  * \param cutoff Time cutoff (files older than this will be removed).
315  *
316  * \retval true if file should be pruned, false otherwise.
317  */
318 static bool HSPruneFileByAge(time_t mtime, time_t cutoff)
319 {
320  return mtime < cutoff;
321 }
322 
323 /**
324  * \brief Check if HS cache file is version-compatible.
325  *
326  * \param filename Cache file name.
327  *
328  * \retval true if file should be pruned, false otherwise.
329  */
330 static bool HSPruneFileByVersion(const char *filename)
331 {
332  if (strlen(filename) < strlen(HS_CACHE_FILE_SUFFIX)) {
333  return true;
334  }
335 
336  const char *underscore = strrchr(filename, '_');
337  if (underscore == NULL || strcmp(underscore, HS_CACHE_FILE_SUFFIX) != 0) {
338  return true;
339  }
340 
341  return false;
342 }
343 
344 int SCHSCachePruneEvaluate(MpmConfig *mpm_conf, HashTable *inuse_caches)
345 {
346  if (mpm_conf == NULL || mpm_conf->cache_dir_path == NULL)
347  return -1;
348  if (mpm_conf->cache_max_age_seconds == 0)
349  return 0; // disabled
350 
351  const time_t now = time(NULL);
352  if (now == (time_t)-1) {
353  return -1;
354  } else if (mpm_conf->cache_max_age_seconds >= (uint64_t)now) {
355  return 0;
356  }
357 
358  DIR *dir = opendir(mpm_conf->cache_dir_path);
359  if (dir == NULL) {
360  return -1;
361  }
362 
363  struct dirent *ent;
364  char path[PATH_MAX];
365  uint32_t considered = 0, removed = 0;
366  const time_t cutoff = now - (time_t)mpm_conf->cache_max_age_seconds;
367  while ((ent = readdir(dir)) != NULL) {
368  const char *name = ent->d_name;
369  size_t namelen = strlen(name);
370  if (namelen < 3 || strcmp(name + namelen - 3, ".hs") != 0)
371  continue;
372 
373  if (PathMerge(path, ARRAY_SIZE(path), mpm_conf->cache_dir_path, name) != 0)
374  continue;
375 
376  struct stat st;
377  /* TOCTOU: race window between stat and unlink is acceptable here.
378  * On Linux somebody can still modify (use the cache file) between the
379  * fstat and unlink, on Windows (HS not supported there but still relevant)
380  * TOCTOU happens when closing the file descriptor and unlinking the file.
381  * Cache mechanism is best-effort and e.g. not pruning or pruning an extra
382  * cache file is not problematic.
383  * Stat is used here to ease file handling as fstat doesn't bring any benefit */
384  /* coverity[toctou] */
385  if (SCStatFn(path, &st) != 0 || !S_ISREG(st.st_mode))
386  continue;
387 
388  considered++;
389 
390  const bool prune_by_age = HSPruneFileByAge(st.st_mtime, cutoff);
391  const bool prune_by_version = HSPruneFileByVersion(name);
392  if (!prune_by_age && !prune_by_version)
393  continue;
394 
395  void *cache_inuse = HashTableLookup(inuse_caches, path, (uint16_t)strlen(path));
396  if (cache_inuse != NULL)
397  continue; // in use
398 
399  /* coverity[toctou] */
400  int ret = unlink(path);
401  if (ret == 0 || (ret == -1 && errno == ENOENT)) {
402  removed++;
403  SCLogDebug("File %s removed because of %s%s%s", path, prune_by_age ? "age" : "",
404  prune_by_age && prune_by_version ? " and " : "",
405  prune_by_version ? "incompatible version" : "");
406  } else {
407  SCLogWarning("Failed to prune \"%s\": %s", path, strerror(errno));
408  }
409  }
410  closedir(dir);
411 
412  PatternDatabaseCache *pd_cache_stats = mpm_conf->cache_stats;
413  if (pd_cache_stats) {
414  pd_cache_stats->hs_dbs_cache_pruned_cnt = removed;
415  pd_cache_stats->hs_dbs_cache_pruned_considered_cnt = considered;
416  pd_cache_stats->hs_dbs_cache_pruned_cutoff = cutoff;
417  pd_cache_stats->cache_max_age_seconds = mpm_conf->cache_max_age_seconds;
418  }
419  return 0;
420 }
421 
422 void *SCHSCacheStatsInit(void)
423 {
424  PatternDatabaseCache *pd_cache_stats = SCCalloc(1, sizeof(PatternDatabaseCache));
425  if (pd_cache_stats == NULL) {
426  SCLogError("Failed to allocate memory for Hyperscan cache stats");
427  return NULL;
428  }
429  return pd_cache_stats;
430 }
431 
432 void SCHSCacheStatsPrint(void *data)
433 {
434  if (data == NULL) {
435  return;
436  }
437 
438  PatternDatabaseCache *pd_cache_stats = (PatternDatabaseCache *)data;
439 
440  char time_str[64];
441  struct tm tm_s;
442  struct tm *tm_info = SCLocalTime(pd_cache_stats->hs_dbs_cache_pruned_cutoff, &tm_s);
443  if (tm_info != NULL) {
444  strftime(time_str, ARRAY_SIZE(time_str), "%Y-%m-%d %H:%M:%S", tm_info);
445  } else {
446  snprintf(time_str, ARRAY_SIZE(time_str), "%" PRIu64 " seconds",
447  pd_cache_stats->cache_max_age_seconds);
448  }
449 
450  if (pd_cache_stats->hs_cacheable_dbs_cnt) {
451  SCLogInfo("Rule group caching - loaded: %u newly cached: %u total cacheable: %u",
452  pd_cache_stats->hs_dbs_cache_loaded_cnt, pd_cache_stats->hs_dbs_cache_saved_cnt,
453  pd_cache_stats->hs_cacheable_dbs_cnt);
454  }
455  if (pd_cache_stats->hs_dbs_cache_pruned_considered_cnt) {
456  SCLogInfo("Rule group cache pruning removed %u/%u of HS caches due to "
457  "version-incompatibility (not v%s) or "
458  "age (older than %s)",
459  pd_cache_stats->hs_dbs_cache_pruned_cnt,
460  pd_cache_stats->hs_dbs_cache_pruned_considered_cnt, HS_CACHE_FILE_VERSION,
461  time_str);
462  }
463 }
464 
465 void SCHSCacheStatsDeinit(void *data)
466 {
467  if (data == NULL) {
468  return;
469  }
470  PatternDatabaseCache *pd_cache_stats = (PatternDatabaseCache *)data;
471  SCFree(pd_cache_stats);
472 }
473 
474 #endif /* BUILD_HYPERSCAN */
detect-engine.h
PathMerge
int PathMerge(char *out_buf, size_t buf_size, const char *const dir, const char *const fname)
Definition: util-path.c:74
SCLogDebug
#define SCLogDebug(...)
Definition: util-debug.h:282
name
const char * name
Definition: detect-engine-proto.c:48
SCTouchFile
int SCTouchFile(const char *path)
Update access and modification time of an existing file to 'now'.
Definition: util-path.c:286
SC_SHA256_LEN
#define SC_SHA256_LEN
Definition: util-file.h:104
HashTable_
Definition: util-hash.h:35
rust.h
SCSha256
struct SCSha256 SCSha256
Definition: util-file.h:103
MpmConfig_::cache_dir_path
const char * cache_dir_path
Definition: util-mpm.h:92
util-debug.h
HashTableLookup
void * HashTableLookup(HashTable *ht, void *data, uint16_t datalen)
Definition: util-hash.c:182
SCLogWarning
#define SCLogWarning(...)
Macro used to log WARNING messages.
Definition: util-debug.h:262
HashTableAdd
int HashTableAdd(HashTable *ht, void *data, uint16_t datalen)
Definition: util-hash.c:120
BUG_ON
#define BUG_ON(x)
Definition: suricata-common.h:317
SigIntId
#define SigIntId
Definition: detect-engine-state.h:38
SCLocalTime
struct tm * SCLocalTime(time_t timep, struct tm *result)
Definition: util-time.c:267
SCLogInfo
#define SCLogInfo(...)
Macro used to log INFORMATIONAL messages.
Definition: util-debug.h:232
MpmConfig_::cache_max_age_seconds
uint64_t cache_max_age_seconds
Definition: util-mpm.h:93
SCPathExists
bool SCPathExists(const char *path)
Check if a path exists.
Definition: util-path.c:183
ARRAY_SIZE
#define ARRAY_SIZE(arr)
Definition: suricata-common.h:562
suricata-common.h
util-path.h
util-mpm-hs-core.h
MpmConfig_::cache_stats
void * cache_stats
Definition: util-mpm.h:94
util-hash-lookup3.h
SCLogError
#define SCLogError(...)
Macro used to log ERROR messages.
Definition: util-debug.h:274
SCFree
#define SCFree(p)
Definition: util-mem.h:61
MpmConfig_
Definition: util-mpm.h:91
suricata.h
SCStatFn
#define SCStatFn(pathname, statbuf)
Definition: util-path.h:35
util-mpm-hs-cache.h
SCCalloc
#define SCCalloc(nm, sz)
Definition: util-mem.h:53