filter: reduce memory usage, fix OOM crashes

* check return values of memory allocation and abort gracefully
  in out-of-memory situations

* use sblist (linear dynamic array) instead of linked list
  - this removes one pointer per filter rule
  - removes need to manually allocate/free every single list item
    (instead block allocation is used)
  - simplifies code

* remove storage of (unused) input rule
  - removes one char* pointer per filter rule
  - removes storage of the raw bytes of each filter rule

* add line number to display on out-of-memory/invalid regex situation

* replace duplicate filter_domain()/filter_host() code with a single
  function filter_run()
  - reduces code size and management effort

with these improvements, >1 million regex rules can be loaded with
4 GB of RAM, whereas previously it crashed with about 950K.

the list for testing was assembled from
http://www.shallalist.de/Downloads/shallalist.tar.gz

closes 
This commit is contained in:
rofl0r 2020-09-05 19:32:21 +01:00
parent c4dc3ba007
commit 233ce6de3b
3 changed files with 35 additions and 65 deletions

@ -29,18 +29,17 @@
#include "log.h"
#include "reqs.h"
#include "conf.h"
#include "sblist.h"
#define FILTER_BUFFER_LEN (512)
static int err;
struct filter_list {
struct filter_list *next;
char *pat;
regex_t *cpat;
regex_t cpatb;
};
static struct filter_list *fl = NULL;
static sblist *fl = NULL;
static int already_init = 0;
static filter_policy_t default_policy = FILTER_DEFAULT_ALLOW;
@ -50,10 +49,10 @@ static filter_policy_t default_policy = FILTER_DEFAULT_ALLOW;
void filter_init (void)
{
FILE *fd;
struct filter_list *p;
struct filter_list fe;
char buf[FILTER_BUFFER_LEN];
char *s, *start;
int cflags;
int cflags, lineno = 0;
if (fl || already_init) {
return;
@ -64,8 +63,6 @@ void filter_init (void)
return;
}
p = NULL;
cflags = REG_NEWLINE | REG_NOSUB;
if (config->filter_extended)
cflags |= REG_EXTENDED;
@ -73,6 +70,7 @@ void filter_init (void)
cflags |= REG_ICASE;
while (fgets (buf, FILTER_BUFFER_LEN, fd)) {
++lineno;
/* skip leading whitespace */
s = buf;
while (*s && isspace ((unsigned char) *s))
@ -104,24 +102,22 @@ void filter_init (void)
if (*s == '\0')
continue;
if (!p) /* head of list */
fl = p =
(struct filter_list *)
safecalloc (1, sizeof (struct filter_list));
else { /* next entry */
p->next =
(struct filter_list *)
safecalloc (1, sizeof (struct filter_list));
p = p->next;
}
if (!fl) fl = sblist_new(sizeof(struct filter_list),
4096/sizeof(struct filter_list));
p->pat = safestrdup (s);
p->cpat = (regex_t *) safemalloc (sizeof (regex_t));
err = regcomp (p->cpat, p->pat, cflags);
err = regcomp (&fe.cpatb, s, cflags);
if (err != 0) {
if (err == REG_ESPACE) goto oom;
fprintf (stderr,
"Bad regex in %s: %s\n",
config->filter, p->pat);
"Bad regex in %s: line %d - %s\n",
config->filter, lineno, s);
exit (EX_DATAERR);
}
if (!sblist_add(fl, &fe)) {
oom:;
fprintf (stderr,
"out of memory parsing filter file %s: line %d\n",
config->filter, lineno);
exit (EX_DATAERR);
}
}
@ -137,15 +133,16 @@ void filter_init (void)
/* unlink the list */
void filter_destroy (void)
{
struct filter_list *p, *q;
struct filter_list *p;
size_t i;
if (already_init) {
for (p = q = fl; p; p = q) {
regfree (p->cpat);
safefree (p->cpat);
safefree (p->pat);
q = p->next;
safefree (p);
if (fl) {
for (i = 0; i < sblist_getsize(fl); ++i) {
p = sblist_get(fl, i);
regfree (&p->cpatb);
}
sblist_free(fl);
}
fl = NULL;
already_init = 0;
@ -165,45 +162,19 @@ void filter_reload (void)
}
/* Return 0 to allow, non-zero to block */
int filter_domain (const char *host)
int filter_run (const char *str)
{
struct filter_list *p;
size_t i;
int result;
if (!fl || !already_init)
goto COMMON_EXIT;
for (p = fl; p; p = p->next) {
for (i = 0; i < sblist_getsize(fl); ++i) {
p = sblist_get(fl, i);
result =
regexec (p->cpat, host, (size_t) 0, (regmatch_t *) 0, 0);
if (result == 0) {
if (default_policy == FILTER_DEFAULT_ALLOW)
return 1;
else
return 0;
}
}
COMMON_EXIT:
if (default_policy == FILTER_DEFAULT_ALLOW)
return 0;
else
return 1;
}
/* returns 0 to allow, non-zero to block */
int filter_url (const char *url)
{
struct filter_list *p;
int result;
if (!fl || !already_init)
goto COMMON_EXIT;
for (p = fl; p; p = p->next) {
result =
regexec (p->cpat, url, (size_t) 0, (regmatch_t *) 0, 0);
regexec (&p->cpatb, str, (size_t) 0, (regmatch_t *) 0, 0);
if (result == 0) {
if (default_policy == FILTER_DEFAULT_ALLOW)

@ -29,8 +29,7 @@ typedef enum {
extern void filter_init (void);
extern void filter_destroy (void);
extern void filter_reload (void);
extern int filter_domain (const char *host);
extern int filter_url (const char *url);
extern int filter_run (const char *str);
extern void filter_set_default_policy (filter_policy_t policy);

@ -457,9 +457,9 @@ BAD_REQUEST_ERROR:
*/
if (config->filter) {
if (config->filter_url)
ret = filter_url (url);
ret = filter_run (url);
else
ret = filter_domain (request->host);
ret = filter_run (request->host);
if (ret) {
update_stats (STAT_DENIED);