postscreen_dnsbl.c revision 1.1
1/*	$NetBSD: postscreen_dnsbl.c,v 1.1 2011/03/02 19:32:26 tron Exp $	*/
2
3/*++
4/* NAME
5/*	postscreen_dnsbl 3
6/* SUMMARY
7/*	postscreen DNSBL support
8/* SYNOPSIS
9/*	#include <postscreen.h>
10/*
11/*	void	psc_dnsbl_init(void)
12/*
13/*	int	psc_dnsbl_request(client_addr, callback, context)
14/*	char	*client_addr;
15/*	void	(*callback)(int, char *);
16/*	char	*context;
17/*
18/*	int	psc_dnsbl_retrieve(client_addr, dnsbl_name, dnsbl_index)
19/*	char	*client_addr;
20/*	const char **dnsbl_name;
21/*	int	dnsbl_index;
22/* DESCRIPTION
23/*	This module implements preliminary support for DNSBL lookups.
24/*	Multiple requests for the same information are handled with
25/*	reference counts.
26/*
27/*	psc_dnsbl_init() initializes this module, and must be called
28/*	once before any of the other functions in this module.
29/*
30/*	psc_dnsbl_request() requests a blocklist score for the
31/*	specified client IP address and increments the reference
32/*	count.  The request completes in the background. The client
33/*	IP address must be in inet_ntop(3) output format.  The
34/*	callback argument specifies a function that is called when
35/*	the requested result is available. The context is passed
36/*	on to the callback function. The callback should ignore its
37/*	first argument (it exists for compatibility with Postfix
38/*	generic event infrastructure).
39/*	The result value is the index for the psc_dnsbl_retrieve()
40/*	call.
41/*
42/*	psc_dnsbl_retrieve() retrieves the result score requested with
43/*	psc_dnsbl_request() and decrements the reference count. It
44/*	is an error to retrieve a score without requesting it first.
45/* LICENSE
46/* .ad
47/* .fi
48/*	The Secure Mailer license must be distributed with this software.
49/* AUTHOR(S)
50/*	Wietse Venema
51/*	IBM T.J. Watson Research
52/*	P.O. Box 704
53/*	Yorktown Heights, NY 10598, USA
54/*--*/
55
56/* System library. */
57
58#include <sys_defs.h>
59#include <sys/socket.h>			/* AF_INET */
60#include <netinet/in.h>			/* inet_pton() */
61#include <arpa/inet.h>			/* inet_pton() */
62#include <stdio.h>			/* sscanf */
63
64/* Utility library. */
65
66#include <msg.h>
67#include <mymalloc.h>
68#include <argv.h>
69#include <htable.h>
70#include <events.h>
71#include <vstream.h>
72#include <connect.h>
73#include <split_at.h>
74#include <valid_hostname.h>
75#include <ip_match.h>
76#include <myaddrinfo.h>
77#include <stringops.h>
78
79/* Global library. */
80
81#include <mail_params.h>
82#include <mail_proto.h>
83
84/* Application-specific. */
85
86#include <postscreen.h>
87
88 /*
89  * Talking to the DNSBLOG service.
90  */
91#define DNSBLOG_TIMEOUT			10
92static char *psc_dnsbl_service;
93
94 /*
95  * Per-DNSBL filters and weights.
96  *
97  * The postscreen_dnsbl_sites parameter specifies zero or more DNSBL domains.
98  * We provide multiple access methods, one for quick iteration when sending
99  * queries to all DNSBL servers, and one for quick location when receiving a
100  * reply from one DNSBL server.
101  *
102  * Each DNSBL domain can be specified more than once, each time with a
103  * different (filter, weight) pair. We group (filter, weight) pairs in a
104  * linked list under their DNSBL domain name. The list head has a reference
105  * to a "safe name" for the DNSBL, in case the name includes a password.
106  */
107static HTABLE *dnsbl_site_cache;	/* indexed by DNSBNL domain */
108static HTABLE_INFO **dnsbl_site_list;	/* flattened cache */
109
110typedef struct {
111    const char *safe_dnsbl;		/* from postscreen_dnsbl_reply_map */
112    struct PSC_DNSBL_SITE *first;	/* list of (filter, weight) tuples */
113} PSC_DNSBL_HEAD;
114
115typedef struct PSC_DNSBL_SITE {
116    char   *filter;			/* printable filter (default: null) */
117    char   *byte_codes;			/* encoded filter (default: null) */
118    int     weight;			/* reply weight (default: 1) */
119    struct PSC_DNSBL_SITE *next;	/* linked list */
120} PSC_DNSBL_SITE;
121
122 /*
123  * Per-client DNSBL scores.
124  *
125  * Some SMTP clients make parallel connections. This can trigger parallel
126  * blocklist score requests when the pre-handshake delays of the connections
127  * overlap.
128  *
129  * We combine requests for the same score under the client IP address in a
130  * single reference-counted entry. The reference count goes up with each
131  * request for a score, and it goes down with each score retrieval. Each
132  * score has one or more requestors that need to be notified when the result
133  * is ready, so that postscreen can terminate a pre-handshake delay when all
134  * pre-handshake tests are completed.
135  */
136static HTABLE *dnsbl_score_cache;	/* indexed by client address */
137
138typedef struct {
139    void    (*callback) (int, char *);	/* generic call-back routine */
140    char   *context;			/* generic call-back argument */
141} PSC_CALL_BACK_ENTRY;
142
143typedef struct {
144    const char *dnsbl;			/* one contributing DNSBL */
145    int     total;			/* combined blocklist score */
146    int     refcount;			/* score reference count */
147    int     pending_lookups;		/* nr of DNS requests in flight */
148    /* Call-back table support. */
149    int     index;			/* next table index */
150    int     limit;			/* last valid index */
151    PSC_CALL_BACK_ENTRY table[1];	/* actually a bunch */
152} PSC_DNSBL_SCORE;
153
154#define PSC_CALL_BACK_INIT(sp) do { \
155	(sp)->limit = 0; \
156	(sp)->index = 0; \
157    } while (0)
158
159#define PSC_CALL_BACK_INDEX_OF_LAST(sp) ((sp)->index - 1)
160
161#define PSC_CALL_BACK_CANCEL(sp, idx) do { \
162	PSC_CALL_BACK_ENTRY *_cb_; \
163	if ((idx) < 0 || (idx) >= (sp)->index) \
164	    msg_panic("%s: index %d must be >= 0 and < %d", \
165		      myname, (idx), (sp)->index); \
166	_cb_ = (sp)->table + (idx); \
167	event_cancel_timer(_cb_->callback, _cb_->context); \
168	_cb_->callback = 0; \
169	_cb_->context = 0; \
170    } while (0)
171
172#define PSC_CALL_BACK_EXTEND(hp, sp) do { \
173	if ((sp)->index >= (sp)->limit) { \
174	    int _count_ = ((sp)->limit ? (sp)->limit * 2 : 5); \
175	    (hp)->value = myrealloc((char *) (sp), sizeof(*(sp)) + \
176				    _count_ * sizeof((sp)->table)); \
177	    (sp) = (PSC_DNSBL_SCORE *) (hp)->value; \
178	    (sp)->limit = _count_; \
179	} \
180    } while (0)
181
182#define PSC_CALL_BACK_ENTER(sp, fn, ctx) do { \
183	PSC_CALL_BACK_ENTRY *_cb_ = (sp)->table + (sp)->index++; \
184	_cb_->callback = (fn); \
185	_cb_->context = (ctx); \
186    } while (0)
187
188#define PSC_CALL_BACK_NOTIFY(sp, ev) do { \
189	PSC_CALL_BACK_ENTRY *_cb_; \
190	for (_cb_ = (sp)->table; _cb_ < (sp)->table + (sp)->index; _cb_++) \
191	    if (_cb_->callback != 0) \
192		_cb_->callback((ev), _cb_->context); \
193    } while (0)
194
195#define PSC_NULL_EVENT	(0)
196
197 /*
198  * Per-request state.
199  *
200  * This implementation stores the client IP address and DNSBL domain in the
201  * DNSBLOG query/reply stream. This simplifies code, and allows the DNSBLOG
202  * server to produce more informative logging.
203  */
204static VSTRING *reply_client;		/* client address in DNSBLOG reply */
205static VSTRING *reply_dnsbl;		/* domain in DNSBLOG reply */
206static VSTRING *reply_addr;		/* adress list in DNSBLOG reply */
207
208/* psc_dnsbl_add_site - add DNSBL site information */
209
210static void psc_dnsbl_add_site(const char *site)
211{
212    const char *myname = "psc_dnsbl_add_site";
213    char   *saved_site = mystrdup(site);
214    VSTRING *byte_codes = 0;
215    PSC_DNSBL_HEAD *head;
216    PSC_DNSBL_SITE *new_site;
217    char    junk;
218    const char *weight_text;
219    char   *pattern_text;
220    int     weight;
221    HTABLE_INFO *ht;
222    char   *parse_err;
223
224    /*
225     * Parse the required DNSBL domain name, the optional reply filter and
226     * the optional reply weight factor.
227     */
228#define DO_GRIPE	1
229
230    /* Negative weight means whitelist. */
231    if ((weight_text = split_at(saved_site, '*')) != 0) {
232	if (sscanf(weight_text, "%d%c", &weight, &junk) != 1)
233	    msg_fatal("bad DNSBL weight factor \"%s\" in \"%s\"",
234		      weight_text, site);
235    } else {
236	weight = 1;
237    }
238    /* Reply filter. */
239    if ((pattern_text = split_at(saved_site, '=')) != 0) {
240	byte_codes = vstring_alloc(100);
241	if ((parse_err = ip_match_parse(byte_codes, pattern_text)) != 0)
242	    msg_fatal("bad DNSBL filter syntax: %s", parse_err);
243    }
244    if (valid_hostname(saved_site, DO_GRIPE) == 0)
245	msg_fatal("bad DNSBL domain name \"%s\" in \"%s\"",
246		  saved_site, site);
247
248    if (msg_verbose > 1)
249	msg_info("%s: \"%s\" -> domain=\"%s\" pattern=\"%s\" weight=%d",
250		 myname, site, saved_site, pattern_text ? pattern_text :
251		 "null", weight);
252
253    /*
254     * Look up or create the (filter, weight) list head for this DNSBL domain
255     * name.
256     */
257    if ((head = (PSC_DNSBL_HEAD *)
258	 htable_find(dnsbl_site_cache, saved_site)) == 0) {
259	head = (PSC_DNSBL_HEAD *) mymalloc(sizeof(*head));
260	ht = htable_enter(dnsbl_site_cache, saved_site, (char *) head);
261	/* Translate the DNSBL name into a safe name if available. */
262	if (psc_dnsbl_reply == 0
263	 || (head->safe_dnsbl = dict_get(psc_dnsbl_reply, saved_site)) == 0)
264	    head->safe_dnsbl = ht->key;
265	head->first = 0;
266    }
267
268    /*
269     * Append the new (filter, weight) node to the list for this DNSBL domain
270     * name.
271     */
272    new_site = (PSC_DNSBL_SITE *) mymalloc(sizeof(*new_site));
273    new_site->filter = (pattern_text ? mystrdup(pattern_text) : 0);
274    new_site->byte_codes = (byte_codes ? ip_match_save(byte_codes) : 0);
275    new_site->weight = weight;
276    new_site->next = head->first;
277    head->first = new_site;
278
279    myfree(saved_site);
280    if (byte_codes)
281	vstring_free(byte_codes);
282}
283
284/* psc_dnsbl_match - match DNSBL reply filter */
285
286static int psc_dnsbl_match(const char *filter, ARGV *reply)
287{
288    char    addr_buf[MAI_HOSTADDR_STRSIZE];
289    char  **cpp;
290
291    /*
292     * Run the replies through the pattern-matching engine.
293     */
294    for (cpp = reply->argv; *cpp != 0; cpp++) {
295	if (inet_pton(AF_INET, *cpp, addr_buf) != 1)
296	    msg_warn("address conversion error for %s -- ignoring this reply",
297		     *cpp);
298	if (ip_match_execute(filter, addr_buf))
299	    return (1);
300    }
301    return (0);
302}
303
304/* psc_dnsbl_retrieve - retrieve blocklist score, decrement reference count */
305
306int     psc_dnsbl_retrieve(const char *client_addr, const char **dnsbl_name,
307			           int dnsbl_index)
308{
309    const char *myname = "psc_dnsbl_retrieve";
310    PSC_DNSBL_SCORE *score;
311    int     result_score;
312
313    /*
314     * Sanity check.
315     */
316    if ((score = (PSC_DNSBL_SCORE *)
317	 htable_find(dnsbl_score_cache, client_addr)) == 0)
318	msg_panic("%s: no blocklist score for %s", myname, client_addr);
319
320    /*
321     * Disable callbacks.
322     */
323    PSC_CALL_BACK_CANCEL(score, dnsbl_index);
324
325    /*
326     * Reads are destructive.
327     */
328    result_score = score->total;
329    *dnsbl_name = score->dnsbl;
330    score->refcount -= 1;
331    if (score->refcount < 1) {
332	if (msg_verbose > 1)
333	    msg_info("%s: delete blocklist score for %s", myname, client_addr);
334	htable_delete(dnsbl_score_cache, client_addr, myfree);
335    }
336    return (result_score);
337}
338
339/* psc_dnsbl_receive - receive DNSBL reply, update blocklist score */
340
341static void psc_dnsbl_receive(int event, char *context)
342{
343    const char *myname = "psc_dnsbl_receive";
344    VSTREAM *stream = (VSTREAM *) context;
345    PSC_DNSBL_SCORE *score;
346    PSC_DNSBL_HEAD *head;
347    PSC_DNSBL_SITE *site;
348    ARGV   *reply_argv;
349
350    PSC_CLEAR_EVENT_REQUEST(vstream_fileno(stream), psc_dnsbl_receive, context);
351
352    /*
353     * Receive the DNSBL lookup result.
354     *
355     * This is preliminary code to explore the field. Later, DNSBL lookup will
356     * be handled by an UDP-based DNS client that is built directly into some
357     * Postfix daemon.
358     *
359     * Don't bother looking up the blocklist score when the client IP address is
360     * not listed at the DNSBL.
361     *
362     * Don't panic when the blocklist score no longer exists. It may be deleted
363     * when the client triggers a "drop" action after pregreet, when the
364     * client does not pregreet and the DNSBL reply arrives late, or when the
365     * client triggers a "drop" action after hanging up.
366     */
367    if (event == EVENT_READ
368	&& attr_scan(stream,
369		     ATTR_FLAG_STRICT,
370		     ATTR_TYPE_STR, MAIL_ATTR_RBL_DOMAIN, reply_dnsbl,
371		     ATTR_TYPE_STR, MAIL_ATTR_ACT_CLIENT_ADDR, reply_client,
372		     ATTR_TYPE_STR, MAIL_ATTR_RBL_ADDR, reply_addr,
373		     ATTR_TYPE_END) == 3
374	&& (score = (PSC_DNSBL_SCORE *)
375	    htable_find(dnsbl_score_cache, STR(reply_client))) != 0) {
376
377	/*
378	 * Run this response past all applicable DNSBL filters and update the
379	 * blocklist score for this client IP address.
380	 *
381	 * Don't panic when the DNSBL domain name is not found. The DNSBLOG
382	 * server may be messed up.
383	 */
384	if (msg_verbose > 1)
385	    msg_info("%s: client=\"%s\" score=%d domain=\"%s\" reply=\"%s\"",
386		     myname, STR(reply_client), score->total,
387		     STR(reply_dnsbl), STR(reply_addr));
388	if (*STR(reply_addr) != 0) {
389	    head = (PSC_DNSBL_HEAD *)
390		htable_find(dnsbl_site_cache, STR(reply_dnsbl));
391	    site = (head ? head->first : (PSC_DNSBL_SITE *) 0);
392	    for (reply_argv = 0; site != 0; site = site->next) {
393		if (site->byte_codes == 0
394		    || psc_dnsbl_match(site->byte_codes, reply_argv ? reply_argv :
395			 (reply_argv = argv_split(STR(reply_addr), " ")))) {
396		    if (score->dnsbl == 0)
397			score->dnsbl = head->safe_dnsbl;
398		    score->total += site->weight;
399		    if (msg_verbose > 1)
400			msg_info("%s: filter=\"%s\" weight=%d score=%d",
401			       myname, site->filter ? site->filter : "null",
402				 site->weight, score->total);
403		}
404	    }
405	    if (reply_argv != 0)
406		argv_free(reply_argv);
407	}
408
409	/*
410	 * Notify the requestor(s) that the result is ready to be picked up.
411	 * If this call isn't made, clients have to sit out the entire
412	 * pre-handshake delay.
413	 */
414	score->pending_lookups -= 1;
415	if (score->pending_lookups == 0)
416	    PSC_CALL_BACK_NOTIFY(score, PSC_NULL_EVENT);
417    }
418    /* Here, score may be a null pointer. */
419    vstream_fclose(stream);
420}
421
422/* psc_dnsbl_request  - send dnsbl query, increment reference count */
423
424int     psc_dnsbl_request(const char *client_addr,
425			          void (*callback) (int, char *),
426			          char *context)
427{
428    const char *myname = "psc_dnsbl_request";
429    int     fd;
430    VSTREAM *stream;
431    HTABLE_INFO **ht;
432    PSC_DNSBL_SCORE *score;
433    HTABLE_INFO *hash_node;
434
435    /*
436     * Some spambots make several connections at nearly the same time,
437     * causing their pregreet delays to overlap. Such connections can share
438     * the efforts of DNSBL lookup.
439     *
440     * We store a reference-counted DNSBL score under its client IP address. We
441     * increment the reference count with each score request, and decrement
442     * the reference count with each score retrieval.
443     *
444     * Do not notify the requestor NOW when the DNS replies are already in.
445     * Reason: we must not make a backwards call while we are still in the
446     * middle of executing the corresponding forward call. Instead we create
447     * a zero-delay timer request and call the notification function from
448     * there.
449     *
450     * psc_dnsbl_request() could instead return a result value to indicate that
451     * the DNSBL score is already available, but that would complicate the
452     * caller with two different notification code paths: one asynchronous
453     * code path via the callback invocation, and one synchronous code path
454     * via the psc_dnsbl_request() result value. That would be a source of
455     * future bugs.
456     */
457    if ((hash_node = htable_locate(dnsbl_score_cache, client_addr)) != 0) {
458	score = (PSC_DNSBL_SCORE *) hash_node->value;
459	score->refcount += 1;
460	PSC_CALL_BACK_EXTEND(hash_node, score);
461	PSC_CALL_BACK_ENTER(score, callback, context);
462	if (msg_verbose > 1)
463	    msg_info("%s: reuse blocklist score for %s refcount=%d pending=%d",
464		     myname, client_addr, score->refcount,
465		     score->pending_lookups);
466	if (score->pending_lookups == 0)
467	    event_request_timer(callback, context, EVENT_NULL_DELAY);
468	return (PSC_CALL_BACK_INDEX_OF_LAST(score));
469    }
470    if (msg_verbose > 1)
471	msg_info("%s: create blocklist score for %s", myname, client_addr);
472    score = (PSC_DNSBL_SCORE *) mymalloc(sizeof(*score));
473    score->dnsbl = 0;
474    score->total = 0;
475    score->refcount = 1;
476    score->pending_lookups = 0;
477    PSC_CALL_BACK_INIT(score);
478    PSC_CALL_BACK_ENTER(score, callback, context);
479    (void) htable_enter(dnsbl_score_cache, client_addr, (char *) score);
480
481    /*
482     * Send a query to all DNSBL servers. Later, DNSBL lookup will be done
483     * with an UDP-based DNS client that is built directly into Postfix code.
484     * We therefore do not optimize the maximum out of this temporary
485     * implementation.
486     */
487    for (ht = dnsbl_site_list; *ht; ht++) {
488	if ((fd = LOCAL_CONNECT(psc_dnsbl_service, NON_BLOCKING, 1)) < 0) {
489	    msg_warn("%s: connect to %s service: %m",
490		     myname, psc_dnsbl_service);
491	    continue;
492	}
493	stream = vstream_fdopen(fd, O_RDWR);
494	attr_print(stream, ATTR_FLAG_NONE,
495		   ATTR_TYPE_STR, MAIL_ATTR_RBL_DOMAIN, ht[0]->key,
496		   ATTR_TYPE_STR, MAIL_ATTR_ACT_CLIENT_ADDR, client_addr,
497		   ATTR_TYPE_END);
498	if (vstream_fflush(stream) != 0) {
499	    msg_warn("%s: error sending to %s service: %m",
500		     myname, psc_dnsbl_service);
501	    vstream_fclose(stream);
502	    continue;
503	}
504	PSC_READ_EVENT_REQUEST(vstream_fileno(stream), psc_dnsbl_receive,
505			       (char *) stream, DNSBLOG_TIMEOUT);
506	score->pending_lookups += 1;
507    }
508    return (PSC_CALL_BACK_INDEX_OF_LAST(score));
509}
510
511/* psc_dnsbl_init - initialize */
512
513void    psc_dnsbl_init(void)
514{
515    const char *myname = "psc_dnsbl_init";
516    ARGV   *dnsbl_site = argv_split(var_psc_dnsbl_sites, ", \t\r\n");
517    char  **cpp;
518
519    /*
520     * Sanity check.
521     */
522    if (dnsbl_site_cache != 0)
523	msg_panic("%s: called more than once", myname);
524
525    /*
526     * pre-compute the DNSBLOG socket name.
527     */
528    psc_dnsbl_service = concatenate(MAIL_CLASS_PRIVATE, "/",
529				    var_dnsblog_service, (char *) 0);
530
531    /*
532     * Prepare for quick iteration when sending out queries to all DNSBL
533     * servers, and for quick lookup when a reply arrives from a specific
534     * DNSBL server.
535     */
536    dnsbl_site_cache = htable_create(13);
537    for (cpp = dnsbl_site->argv; *cpp; cpp++)
538	psc_dnsbl_add_site(*cpp);
539    argv_free(dnsbl_site);
540    dnsbl_site_list = htable_list(dnsbl_site_cache);
541
542    /*
543     * The per-client blocklist score.
544     */
545    dnsbl_score_cache = htable_create(13);
546
547    /*
548     * Space for ad-hoc DNSBLOG server request/reply parameters.
549     */
550    reply_client = vstring_alloc(100);
551    reply_dnsbl = vstring_alloc(100);
552    reply_addr = vstring_alloc(100);
553}
554