dispatch.c 98.2 KB
Newer Older
Michael Graff's avatar
Michael Graff committed
1
/*
2
 * Copyright (C) 1999-2009, 2011-2017  Internet Systems Consortium, Inc. ("ISC")
3
 *
4
5
6
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
Michael Graff's avatar
Michael Graff committed
7
8
 */

9
/*! \file */
David Lawrence's avatar
David Lawrence committed
10

Michael Graff's avatar
Michael Graff committed
11
12
13
#include <config.h>

#include <stdlib.h>
14
15
#include <sys/types.h>
#include <unistd.h>
16
#include <stdlib.h>
Michael Graff's avatar
Michael Graff committed
17

18
#include <isc/entropy.h>
Michael Graff's avatar
Michael Graff committed
19
#include <isc/mem.h>
20
#include <isc/mutex.h>
21
#include <isc/portset.h>
22
#include <isc/print.h>
23
#include <isc/random.h>
24
#include <isc/socket.h>
25
#include <isc/stats.h>
26
#include <isc/string.h>
27
#include <isc/task.h>
28
#include <isc/time.h>
Michael Graff's avatar
Michael Graff committed
29
#include <isc/util.h>
Michael Graff's avatar
Michael Graff committed
30

31
#include <dns/acl.h>
Michael Graff's avatar
Michael Graff committed
32
#include <dns/dispatch.h>
33
34
#include <dns/events.h>
#include <dns/log.h>
35
#include <dns/message.h>
36
#include <dns/portlist.h>
37
#include <dns/stats.h>
38
#include <dns/tcpmsg.h>
39
40
#include <dns/types.h>

41
42
typedef ISC_LIST(dns_dispentry_t)	dns_displist_t;

43
typedef struct dispsocket		dispsocket_t;
44
45
typedef ISC_LIST(dispsocket_t)		dispsocketlist_t;

46
47
48
typedef struct dispportentry		dispportentry_t;
typedef ISC_LIST(dispportentry_t)	dispportlist_t;

49
50
typedef struct dns_qid {
	unsigned int	magic;
51
52
	unsigned int	qid_nbuckets;	/*%< hash table size */
	unsigned int	qid_increment;	/*%< id increment on collision */
53
	isc_mutex_t	lock;
54
	dns_displist_t	*qid_table;	/*%< the table itself */
55
	dispsocketlist_t *sock_table;	/*%< socket table */
56
57
} dns_qid_t;

58
59
60
61
struct dns_dispatchmgr {
	/* Unlocked. */
	unsigned int			magic;
	isc_mem_t		       *mctx;
62
	dns_acl_t		       *blackhole;
63
	dns_portlist_t		       *portlist;
64
	isc_stats_t		       *stats;
65
	isc_entropy_t		       *entropy; /*%< entropy source */
66
67
68
69
70

	/* Locked by "lock". */
	isc_mutex_t			lock;
	unsigned int			state;
	ISC_LIST(dns_dispatch_t)	list;
71

72
73
74
	/* Locked by rng_lock. */
	isc_mutex_t			rng_lock;
	isc_rng_t		       *rngctx; /*%< RNG context for QID */
75

76
	/* locked by buffer_lock */
77
	dns_qid_t			*qid;
78
	isc_mutex_t			buffer_lock;
79
80
81
	unsigned int			buffers;    /*%< allocated buffers */
	unsigned int			buffersize; /*%< size of each buffer */
	unsigned int			maxbuffers; /*%< max buffers */
82
83

	/* Locked internally. */
84
85
86
87
88
	isc_mutex_t			depool_lock;
	isc_mempool_t		       *depool;	/*%< pool for dispatch events */
	isc_mutex_t			rpool_lock;
	isc_mempool_t		       *rpool;	/*%< pool for replies */
	isc_mutex_t			dpool_lock;
89
	isc_mempool_t		       *dpool;  /*%< dispatch allocations */
90
91
92
93
	isc_mutex_t			bpool_lock;
	isc_mempool_t		       *bpool;	/*%< pool for buffers */
	isc_mutex_t			spool_lock;
	isc_mempool_t		       *spool;	/*%< pool for dispsocks */
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113

	/*%
	 * Locked by qid->lock if qid exists; otherwise, can be used without
	 * being locked.
	 * Memory footprint considerations: this is a simple implementation of
	 * available ports, i.e., an ordered array of the actual port numbers.
	 * This will require about 256KB of memory in the worst case (128KB for
	 * each of IPv4 and IPv6).  We could reduce it by representing it as a
	 * more sophisticated way such as a list (or array) of ranges that are
	 * searched to identify a specific port.  Our decision here is the saved
	 * memory isn't worth the implementation complexity, considering the
	 * fact that the whole BIND9 process (which is mainly named) already
	 * requires a pretty large memory footprint.  We may, however, have to
	 * revisit the decision when we want to use it as a separate module for
	 * an environment where memory requirement is severer.
	 */
	in_port_t	*v4ports;	/*%< available ports for IPv4 */
	unsigned int	nv4ports;	/*%< # of available ports for IPv4 */
	in_port_t	*v6ports;	/*%< available ports for IPv4 */
	unsigned int	nv6ports;	/*%< # of available ports for IPv4 */
114
115
116
117
118
119
};

#define MGR_SHUTTINGDOWN		0x00000001U
#define MGR_IS_SHUTTINGDOWN(l)	(((l)->state & MGR_SHUTTINGDOWN) != 0)

#define IS_PRIVATE(d)	(((d)->attributes & DNS_DISPATCHATTR_PRIVATE) != 0)
Michael Graff's avatar
Michael Graff committed
120

Michael Graff's avatar
Michael Graff committed
121
struct dns_dispentry {
Michael Graff's avatar
Michael Graff committed
122
	unsigned int			magic;
123
	dns_dispatch_t		       *disp;
124
	dns_messageid_t			id;
125
	in_port_t			port;
Michael Graff's avatar
Michael Graff committed
126
	unsigned int			bucket;
Michael Graff's avatar
Michael Graff committed
127
	isc_sockaddr_t			host;
Michael Graff's avatar
Michael Graff committed
128
129
130
	isc_task_t		       *task;
	isc_taskaction_t		action;
	void			       *arg;
Michael Graff's avatar
Michael Graff committed
131
	isc_boolean_t			item_out;
132
	dispsocket_t			*dispsocket;
Michael Graff's avatar
Michael Graff committed
133
	ISC_LIST(dns_dispatchevent_t)	items;
Michael Graff's avatar
Michael Graff committed
134
	ISC_LINK(dns_dispentry_t)	link;
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
};

/*%
 * Maximum number of dispatch sockets that can be pooled for reuse.  The
 * appropriate value may vary, but experiments have shown a busy caching server
 * may need more than 1000 sockets concurrently opened.  The maximum allowable
 * number of dispatch sockets (per manager) will be set to the double of this
 * value.
 */
#ifndef DNS_DISPATCH_POOLSOCKS
#define DNS_DISPATCH_POOLSOCKS			2048
#endif

/*%
 * Quota to control the number of dispatch sockets.  If a dispatch has more
 * than the quota of sockets, new queries will purge oldest ones, so that
 * a massive number of outstanding queries won't prevent subsequent queries
 * (especially if the older ones take longer time and result in timeout).
 */
#ifndef DNS_DISPATCH_SOCKSQUOTA
#define DNS_DISPATCH_SOCKSQUOTA			3072
#endif

struct dispsocket {
	unsigned int			magic;
	isc_socket_t			*socket;
	dns_dispatch_t			*disp;
162
	isc_sockaddr_t			host;
163
164
	in_port_t			localport; /* XXX: should be removed later */
	dispportentry_t			*portentry;
165
166
167
	dns_dispentry_t			*resp;
	isc_task_t			*task;
	ISC_LINK(dispsocket_t)		link;
168
169
	unsigned int			bucket;
	ISC_LINK(dispsocket_t)		blink;
Michael Graff's avatar
Michael Graff committed
170
};
Michael Graff's avatar
Michael Graff committed
171

172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
/*%
 * A port table entry.  We remember every port we first open in a table with a
 * reference counter so that we can 'reuse' the same port (with different
 * destination addresses) using the SO_REUSEADDR socket option.
 */
struct dispportentry {
	in_port_t			port;
	unsigned int			refs;
	ISC_LINK(struct dispportentry)	link;
};

#ifndef DNS_DISPATCH_PORTTABLESIZE
#define DNS_DISPATCH_PORTTABLESIZE	1024
#endif

187
#define INVALID_BUCKET		(0xffffdead)
Michael Graff's avatar
Michael Graff committed
188

189
190
191
192
193
194
195
/*%
 * Number of tasks for each dispatch that use separate sockets for different
 * transactions.  This must be a power of 2 as it will divide 32 bit numbers
 * to get an uniformly random tasks selection.  See get_dispsocket().
 */
#define MAX_INTERNAL_TASKS	64

Michael Graff's avatar
Michael Graff committed
196
197
struct dns_dispatch {
	/* Unlocked. */
198
199
	unsigned int		magic;		/*%< magic */
	dns_dispatchmgr_t      *mgr;		/*%< dispatch manager */
200
201
202
203
204
205
206
	int			ntasks;
	/*%
	 * internal task buckets.  We use multiple tasks to distribute various
	 * socket events well when using separate dispatch sockets.  We use the
	 * 1st task (task[0]) for internal control events.
	 */
	isc_task_t	       *task[MAX_INTERNAL_TASKS];
207
208
	isc_socket_t	       *socket;		/*%< isc socket attached to */
	isc_sockaddr_t		local;		/*%< local address */
209
	in_port_t		localport;	/*%< local UDP port */
210
	isc_sockaddr_t		peer;		/*%< peer address (TCP) */
Evan Hunt's avatar
Evan Hunt committed
211
	isc_dscp_t		dscp;		/*%< "listen-on" DSCP value */
212
	unsigned int		maxrequests;	/*%< max requests */
213
	isc_event_t	       *ctlevent;
Michael Graff's avatar
Michael Graff committed
214

215
216
217
	isc_mutex_t		sepool_lock;
	isc_mempool_t	       *sepool;		/*%< pool for socket events */

218
	/*% Locked by mgr->lock. */
219
220
221
	ISC_LINK(dns_dispatch_t) link;

	/* Locked by "lock". */
222
	isc_mutex_t		lock;		/*%< locks all below */
223
	isc_sockettype_t	socktype;
224
	unsigned int		attributes;
225
226
	unsigned int		refcount;	/*%< number of users */
	dns_dispatchevent_t    *failsafe_ev;	/*%< failsafe cancel event */
Michael Graff's avatar
Michael Graff committed
227
	unsigned int		shutting_down : 1,
228
229
				shutdown_out : 1,
				connected : 1,
230
				tcpmsg_valid : 1,
231
				recv_pending : 1; /*%< is a recv() pending? */
Michael Graff's avatar
Michael Graff committed
232
	isc_result_t		shutdown_why;
233
234
235
	ISC_LIST(dispsocket_t)	activesockets;
	ISC_LIST(dispsocket_t)	inactivesockets;
	unsigned int		nsockets;
236
237
238
	unsigned int		requests;	/*%< how many requests we have */
	unsigned int		tcpbuffers;	/*%< allocated buffers */
	dns_tcpmsg_t		tcpmsg;		/*%< for tcp streams */
239
	dns_qid_t		*qid;
240
	isc_rng_t		*rngctx;	/*%< for QID/UDP port num */
241
242
	dispportlist_t		*port_table;	/*%< hold ports 'owned' by us */
	isc_mempool_t		*portpool;	/*%< port table entries  */
Michael Graff's avatar
Michael Graff committed
243
244
};

245
246
247
#define QID_MAGIC		ISC_MAGIC('Q', 'i', 'd', ' ')
#define VALID_QID(e)		ISC_MAGIC_VALID((e), QID_MAGIC)

248
249
#define RESPONSE_MAGIC		ISC_MAGIC('D', 'r', 's', 'p')
#define VALID_RESPONSE(e)	ISC_MAGIC_VALID((e), RESPONSE_MAGIC)
Michael Graff's avatar
Michael Graff committed
250

251
252
253
#define DISPSOCK_MAGIC		ISC_MAGIC('D', 's', 'o', 'c')
#define VALID_DISPSOCK(e)	ISC_MAGIC_VALID((e), DISPSOCK_MAGIC)

254
255
#define DISPATCH_MAGIC		ISC_MAGIC('D', 'i', 's', 'p')
#define VALID_DISPATCH(e)	ISC_MAGIC_VALID((e), DISPATCH_MAGIC)
Michael Graff's avatar
Michael Graff committed
256

257
258
#define DNS_DISPATCHMGR_MAGIC	ISC_MAGIC('D', 'M', 'g', 'r')
#define VALID_DISPATCHMGR(e)	ISC_MAGIC_VALID((e), DNS_DISPATCHMGR_MAGIC)
Michael Graff's avatar
Michael Graff committed
259

260
261
#define DNS_QID(disp) ((disp)->socktype == isc_sockettype_tcp) ? \
		       (disp)->qid : (disp)->mgr->qid
262
263
#define DISP_RNGCTX(disp) ((disp)->socktype == isc_sockettype_udp) ? \
			((disp)->rngctx) : ((disp)->mgr->rngctx)
264
265
266
267
268
269
270
271
272
273
274

/*%
 * Locking a query port buffer is a bit tricky.  We access the buffer without
 * locking until qid is created.  Technically, there is a possibility of race
 * between the creation of qid and access to the port buffer; in practice,
 * however, this should be safe because qid isn't created until the first
 * dispatch is created and there should be no contending situation until then.
 */
#define PORTBUFLOCK(mgr) if ((mgr)->qid != NULL) LOCK(&((mgr)->qid->lock))
#define PORTBUFUNLOCK(mgr) if ((mgr)->qid != NULL) UNLOCK((&(mgr)->qid->lock))

275
/*
276
 * Statics.
277
 */
278
static dns_dispentry_t *entry_search(dns_qid_t *, const isc_sockaddr_t *,
279
				     dns_messageid_t, in_port_t, unsigned int);
280
static isc_boolean_t destroy_disp_ok(dns_dispatch_t *);
281
static void destroy_disp(isc_task_t *task, isc_event_t *event);
282
283
284
285
286
static void destroy_dispsocket(dns_dispatch_t *, dispsocket_t **);
static void deactivate_dispsocket(dns_dispatch_t *, dispsocket_t *);
static void udp_exrecv(isc_task_t *, isc_event_t *);
static void udp_shrecv(isc_task_t *, isc_event_t *);
static void udp_recv(isc_event_t *, dns_dispatch_t *, dispsocket_t *);
287
static void tcp_recv(isc_task_t *, isc_event_t *);
288
static isc_result_t startrecv(dns_dispatch_t *, dispsocket_t *);
289
290
static isc_uint32_t dns_hash(dns_qid_t *, const isc_sockaddr_t *,
			     dns_messageid_t, in_port_t);
291
static void free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len);
292
static void *allocate_udp_buffer(dns_dispatch_t *disp);
293
294
static inline void free_devent(dns_dispatch_t *disp, dns_dispatchevent_t *ev);
static inline dns_dispatchevent_t *allocate_devent(dns_dispatch_t *disp);
295
static void do_cancel(dns_dispatch_t *disp);
296
297
static dns_dispentry_t *linear_first(dns_qid_t *disp);
static dns_dispentry_t *linear_next(dns_qid_t *disp,
298
				    dns_dispentry_t *resp);
299
static void dispatch_free(dns_dispatch_t **dispp);
300
301
302
static isc_result_t get_udpsocket(dns_dispatchmgr_t *mgr,
				  dns_dispatch_t *disp,
				  isc_socketmgr_t *sockmgr,
303
				  const isc_sockaddr_t *localaddr,
304
305
				  isc_socket_t **sockp,
				  isc_socket_t *dup_socket);
306
307
308
static isc_result_t dispatch_createudp(dns_dispatchmgr_t *mgr,
				       isc_socketmgr_t *sockmgr,
				       isc_taskmgr_t *taskmgr,
309
				       const isc_sockaddr_t *localaddr,
310
311
				       unsigned int maxrequests,
				       unsigned int attributes,
312
313
				       dns_dispatch_t **dispp,
				       isc_socket_t *dup_socket);
314
315
static isc_boolean_t destroy_mgr_ok(dns_dispatchmgr_t *mgr);
static void destroy_mgr(dns_dispatchmgr_t **mgrp);
316
static isc_result_t qid_allocate(dns_dispatchmgr_t *mgr, unsigned int buckets,
317
318
				 unsigned int increment, dns_qid_t **qidp,
				 isc_boolean_t needaddrtable);
319
static void qid_destroy(isc_mem_t *mctx, dns_qid_t **qidp);
320
321
static isc_result_t open_socket(isc_socketmgr_t *mgr,
				const isc_sockaddr_t *local,
322
323
				unsigned int options, isc_socket_t **sockp,
				isc_socket_t *dup_socket);
324
325
static isc_boolean_t portavailable(dns_dispatchmgr_t *mgr, isc_socket_t *sock,
				   isc_sockaddr_t *sockaddrp);
326
327

#define LVL(x) ISC_LOG_DEBUG(x)
Michael Graff's avatar
Michael Graff committed
328

329
330
331
332
static void
mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...)
     ISC_FORMAT_PRINTF(3, 4);

333
static void
334
mgr_log(dns_dispatchmgr_t *mgr, int level, const char *fmt, ...) {
335
336
337
	char msgbuf[2048];
	va_list ap;

338
339
340
	if (! isc_log_wouldlog(dns_lctx, level))
		return;

341
342
343
344
	va_start(ap, fmt);
	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
	va_end(ap);

345
346
347
348
349
	isc_log_write(dns_lctx,
		      DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
		      level, "dispatchmgr %p: %s", mgr, msgbuf);
}

350
static inline void
Mark Andrews's avatar
Mark Andrews committed
351
inc_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) {
352
353
354
355
	if (mgr->stats != NULL)
		isc_stats_increment(mgr->stats, counter);
}

356
357
358
359
360
361
static inline void
dec_stats(dns_dispatchmgr_t *mgr, isc_statscounter_t counter) {
	if (mgr->stats != NULL)
		isc_stats_decrement(mgr->stats, counter);
}

362
363
364
365
static void
dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...)
     ISC_FORMAT_PRINTF(3, 4);

366
367
368
369
370
static void
dispatch_log(dns_dispatch_t *disp, int level, const char *fmt, ...) {
	char msgbuf[2048];
	va_list ap;

Andreas Gustafsson's avatar
Andreas Gustafsson committed
371
372
	if (! isc_log_wouldlog(dns_lctx, level))
		return;
373

374
375
376
377
378
379
380
	va_start(ap, fmt);
	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
	va_end(ap);

	isc_log_write(dns_lctx,
		      DNS_LOGCATEGORY_DISPATCH, DNS_LOGMODULE_DISPATCH,
		      level, "dispatch %p: %s", disp, msgbuf);
381
382
}

383
384
385
386
387
static void
request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
	    int level, const char *fmt, ...)
     ISC_FORMAT_PRINTF(4, 5);

388
389
static void
request_log(dns_dispatch_t *disp, dns_dispentry_t *resp,
390
	    int level, const char *fmt, ...)
391
392
393
394
395
{
	char msgbuf[2048];
	char peerbuf[256];
	va_list ap;

396
397
398
	if (! isc_log_wouldlog(dns_lctx, level))
		return;

399
400
401
402
403
	va_start(ap, fmt);
	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
	va_end(ap);

	if (VALID_RESPONSE(resp)) {
Andreas Gustafsson's avatar
Andreas Gustafsson committed
404
		isc_sockaddr_format(&resp->host, peerbuf, sizeof(peerbuf));
405
406
		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
			      DNS_LOGMODULE_DISPATCH, level,
407
			      "dispatch %p response %p %s: %s", disp, resp,
408
409
			      peerbuf, msgbuf);
	} else {
410
411
412
413
		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DISPATCH,
			      DNS_LOGMODULE_DISPATCH, level,
			      "dispatch %p req/resp %p: %s", disp, resp,
			      msgbuf);
414
415
416
	}
}

417
418
419
420
/*
 * Return a hash of the destination and message id.
 */
static isc_uint32_t
421
dns_hash(dns_qid_t *qid, const isc_sockaddr_t *dest, dns_messageid_t id,
422
423
	 in_port_t port)
{
424
425
426
	unsigned int ret;

	ret = isc_sockaddr_hash(dest, ISC_TRUE);
427
	ret ^= (id << 16) | port;
428
	ret %= qid->qid_nbuckets;
429

430
	INSIST(ret < qid->qid_nbuckets);
431
432
433
434

	return (ret);
}

435
436
437
/*
 * Find the first entry in 'qid'.  Returns NULL if there are no entries.
 */
Michael Graff's avatar
Michael Graff committed
438
static dns_dispentry_t *
439
linear_first(dns_qid_t *qid) {
Michael Graff's avatar
Michael Graff committed
440
441
442
443
444
	dns_dispentry_t *ret;
	unsigned int bucket;

	bucket = 0;

445
446
	while (bucket < qid->qid_nbuckets) {
		ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
Michael Graff's avatar
Michael Graff committed
447
448
449
450
451
452
453
454
		if (ret != NULL)
			return (ret);
		bucket++;
	}

	return (NULL);
}

455
456
457
458
/*
 * Find the next entry after 'resp' in 'qid'.  Return NULL if there are
 * no more entries.
 */
Michael Graff's avatar
Michael Graff committed
459
static dns_dispentry_t *
460
linear_next(dns_qid_t *qid, dns_dispentry_t *resp) {
Michael Graff's avatar
Michael Graff committed
461
462
463
464
465
466
467
468
	dns_dispentry_t *ret;
	unsigned int bucket;

	ret = ISC_LIST_NEXT(resp, link);
	if (ret != NULL)
		return (ret);

	bucket = resp->bucket;
469
	bucket++;
470
471
	while (bucket < qid->qid_nbuckets) {
		ret = ISC_LIST_HEAD(qid->qid_table[bucket]);
Michael Graff's avatar
Michael Graff committed
472
473
474
475
476
477
478
		if (ret != NULL)
			return (ret);
		bucket++;
	}

	return (NULL);
}
479

480
481
482
483
484
485
486
487
488
/*
 * The dispatch must be locked.
 */
static isc_boolean_t
destroy_disp_ok(dns_dispatch_t *disp)
{
	if (disp->refcount != 0)
		return (ISC_FALSE);

489
	if (disp->recv_pending != 0)
490
491
		return (ISC_FALSE);

492
493
494
	if (!ISC_LIST_EMPTY(disp->activesockets))
		return (ISC_FALSE);

495
496
497
498
499
500
	if (disp->shutting_down == 0)
		return (ISC_FALSE);

	return (ISC_TRUE);
}

501
/*
502
503
 * Called when refcount reaches 0 (and safe to destroy).
 *
Evan Hunt's avatar
Evan Hunt committed
504
505
 * The dispatcher must be locked.
 * The manager must not be locked.
506
507
 */
static void
508
destroy_disp(isc_task_t *task, isc_event_t *event) {
509
	dns_dispatch_t *disp;
510
511
	dns_dispatchmgr_t *mgr;
	isc_boolean_t killmgr;
512
513
	dispsocket_t *dispsocket;
	int i;
514

515
516
517
518
519
	INSIST(event->ev_type == DNS_EVENT_DISPATCHCONTROL);

	UNUSED(task);

	disp = event->ev_arg;
520
521
	mgr = disp->mgr;

522
	LOCK(&mgr->lock);
523
	ISC_LIST_UNLINK(mgr->list, disp, link);
Michael Graff's avatar
Michael Graff committed
524

525
526
	dispatch_log(disp, LVL(90),
		     "shutting down; detaching from sock %p, task %p",
527
		     disp->socket, disp->task[0]); /* XXXX */
528

529
530
	if (disp->sepool != NULL) {
		isc_mempool_destroy(&disp->sepool);
531
		(void)isc_mutex_destroy(&disp->sepool_lock);
532
533
	}

534
535
536
537
538
539
540
541
	if (disp->socket != NULL)
		isc_socket_detach(&disp->socket);
	while ((dispsocket = ISC_LIST_HEAD(disp->inactivesockets)) != NULL) {
		ISC_LIST_UNLINK(disp->inactivesockets, dispsocket, link);
		destroy_dispsocket(disp, &dispsocket);
	}
	for (i = 0; i < disp->ntasks; i++)
		isc_task_detach(&disp->task[i]);
542
	isc_event_free(&event);
543

544
	dispatch_free(&disp);
545
546
547
548
549

	killmgr = destroy_mgr_ok(mgr);
	UNLOCK(&mgr->lock);
	if (killmgr)
		destroy_mgr(&mgr);
550
551
}

552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
/*%
 * Manipulate port table per dispatch: find an entry for a given port number,
 * create a new entry, and decrement a given entry with possible clean-up.
 */
static dispportentry_t *
port_search(dns_dispatch_t *disp, in_port_t port) {
	dispportentry_t *portentry;

	REQUIRE(disp->port_table != NULL);

	portentry = ISC_LIST_HEAD(disp->port_table[port %
						   DNS_DISPATCH_PORTTABLESIZE]);
	while (portentry != NULL) {
		if (portentry->port == port)
			return (portentry);
		portentry = ISC_LIST_NEXT(portentry, link);
	}

	return (NULL);
}

static dispportentry_t *
new_portentry(dns_dispatch_t *disp, in_port_t port) {
	dispportentry_t *portentry;
576
	dns_qid_t *qid;
577
578
579
580
581
582
583
584

	REQUIRE(disp->port_table != NULL);

	portentry = isc_mempool_get(disp->portpool);
	if (portentry == NULL)
		return (portentry);

	portentry->port = port;
585
	portentry->refs = 1;
586
	ISC_LINK_INIT(portentry, link);
587
588
	qid = DNS_QID(disp);
	LOCK(&qid->lock);
589
590
	ISC_LIST_APPEND(disp->port_table[port % DNS_DISPATCH_PORTTABLESIZE],
			portentry, link);
591
	UNLOCK(&qid->lock);
592
593
594
595

	return (portentry);
}

596
597
598
/*%
 * The caller must not hold the qid->lock.
 */
599
600
601
static void
deref_portentry(dns_dispatch_t *disp, dispportentry_t **portentryp) {
	dispportentry_t *portentry = *portentryp;
Mark Andrews's avatar
Mark Andrews committed
602
	dns_qid_t *qid;
603
604
605
606

	REQUIRE(disp->port_table != NULL);
	REQUIRE(portentry != NULL && portentry->refs > 0);

607
608
	qid = DNS_QID(disp);
	LOCK(&qid->lock);
609
	portentry->refs--;
610

611
	if (portentry->refs == 0) {
612
613
614
615
616
617
		ISC_LIST_UNLINK(disp->port_table[portentry->port %
						 DNS_DISPATCH_PORTTABLESIZE],
				portentry, link);
		isc_mempool_put(disp->portpool, portentry);
	}

Evan Hunt's avatar
Evan Hunt committed
618
619
620
621
	/*
	 * Set '*portentryp' to NULL inside the lock so that
	 * dispsock->portentry does not change in socket_search.
	 */
622
	*portentryp = NULL;
Evan Hunt's avatar
Evan Hunt committed
623
624

	UNLOCK(&qid->lock);
625
626
}

627
628
/*%
 * Find a dispsocket for socket address 'dest', and port number 'port'.
Evan Hunt's avatar
Evan Hunt committed
629
 * Return NULL if no such entry exists.  Requires qid->lock to be held.
630
631
 */
static dispsocket_t *
632
socket_search(dns_qid_t *qid, const isc_sockaddr_t *dest, in_port_t port,
633
634
635
636
	      unsigned int bucket)
{
	dispsocket_t *dispsock;

Evan Hunt's avatar
Evan Hunt committed
637
	REQUIRE(VALID_QID(qid));
638
639
640
641
642
	REQUIRE(bucket < qid->qid_nbuckets);

	dispsock = ISC_LIST_HEAD(qid->sock_table[bucket]);

	while (dispsock != NULL) {
Automatic Updater's avatar
Automatic Updater committed
643
		if (dispsock->portentry != NULL &&
644
645
		    dispsock->portentry->port == port &&
		    isc_sockaddr_equal(dest, &dispsock->host))
646
647
648
649
650
651
652
			return (dispsock);
		dispsock = ISC_LIST_NEXT(dispsock, blink);
	}

	return (NULL);
}

653
654
/*%
 * Make a new socket for a single dispatch with a random port number.
655
 * The caller must hold the disp->lock
656
657
 */
static isc_result_t
658
get_dispsocket(dns_dispatch_t *disp, const isc_sockaddr_t *dest,
659
660
	       isc_socketmgr_t *sockmgr, dispsocket_t **dispsockp,
	       in_port_t *portp)
661
662
663
664
665
666
667
668
{
	int i;
	isc_uint32_t r;
	dns_dispatchmgr_t *mgr = disp->mgr;
	isc_socket_t *sock = NULL;
	isc_result_t result = ISC_R_FAILURE;
	in_port_t port;
	isc_sockaddr_t localaddr;
669
	unsigned int bucket = 0;
670
671
672
	dispsocket_t *dispsock;
	unsigned int nports;
	in_port_t *ports;
673
	unsigned int bindoptions;
674
	dispportentry_t *portentry = NULL;
675
	dns_qid_t *qid;
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700

	if (isc_sockaddr_pf(&disp->local) == AF_INET) {
		nports = disp->mgr->nv4ports;
		ports = disp->mgr->v4ports;
	} else {
		nports = disp->mgr->nv6ports;
		ports = disp->mgr->v6ports;
	}
	if (nports == 0)
		return (ISC_R_ADDRNOTAVAIL);

	dispsock = ISC_LIST_HEAD(disp->inactivesockets);
	if (dispsock != NULL) {
		ISC_LIST_UNLINK(disp->inactivesockets, dispsock, link);
		sock = dispsock->socket;
		dispsock->socket = NULL;
	} else {
		dispsock = isc_mempool_get(mgr->spool);
		if (dispsock == NULL)
			return (ISC_R_NOMEMORY);

		disp->nsockets++;
		dispsock->socket = NULL;
		dispsock->disp = disp;
		dispsock->resp = NULL;
701
		dispsock->portentry = NULL;
702
703
704
705
		isc_random_get(&r);
		dispsock->task = NULL;
		isc_task_attach(disp->task[r % disp->ntasks], &dispsock->task);
		ISC_LINK_INIT(dispsock, link);
706
		ISC_LINK_INIT(dispsock, blink);
707
708
709
710
711
712
713
714
715
		dispsock->magic = DISPSOCK_MAGIC;
	}

	/*
	 * Pick up a random UDP port and open a new socket with it.  Avoid
	 * choosing ports that share the same destination because it will be
	 * very likely to fail in bind(2) or connect(2).
	 */
	localaddr = disp->local;
716
717
	qid = DNS_QID(disp);

718
	for (i = 0; i < 64; i++) {
719
		port = ports[isc_rng_uniformrandom(DISP_RNGCTX(disp), nports)];
720
721
		isc_sockaddr_setport(&localaddr, port);

722
		LOCK(&qid->lock);
723
		bucket = dns_hash(qid, dest, 0, port);
724
725
		if (socket_search(qid, dest, port, bucket) != NULL) {
			UNLOCK(&qid->lock);
726
			continue;
727
728
		}
		UNLOCK(&qid->lock);
729
		bindoptions = 0;
730
		portentry = port_search(disp, port);
731

732
733
		if (portentry != NULL)
			bindoptions |= ISC_SOCKET_REUSEADDRESS;
734
		result = open_socket(sockmgr, &localaddr, bindoptions, &sock,
Automatic Updater's avatar
Automatic Updater committed
735
				     NULL);
736
737
738
739
740
741
742
		if (result == ISC_R_SUCCESS) {
			if (portentry == NULL) {
				portentry = new_portentry(disp, port);
				if (portentry == NULL) {
					result = ISC_R_NOMEMORY;
					break;
				}
743
744
745
746
			} else {
				LOCK(&qid->lock);
				portentry->refs++;
				UNLOCK(&qid->lock);
747
748
			}
			break;
749
750
751
752
753
754
		} else if (result == ISC_R_NOPERM) {
			char buf[ISC_SOCKADDR_FORMATSIZE];
			isc_sockaddr_format(&localaddr, buf, sizeof(buf));
			dispatch_log(disp, ISC_LOG_WARNING,
				     "open_socket(%s) -> %s: continuing",
				     buf, isc_result_totext(result));
755
		} else if (result != ISC_R_ADDRINUSE)
756
757
758
759
760
			break;
	}

	if (result == ISC_R_SUCCESS) {
		dispsock->socket = sock;
761
		dispsock->host = *dest;
762
		dispsock->portentry = portentry;
763
		dispsock->bucket = bucket;
764
		LOCK(&qid->lock);
765
		ISC_LIST_APPEND(qid->sock_table[bucket], dispsock, blink);
766
		UNLOCK(&qid->lock);
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
		*dispsockp = dispsock;
		*portp = port;
	} else {
		/*
		 * We could keep it in the inactive list, but since this should
		 * be an exceptional case and might be resource shortage, we'd
		 * rather destroy it.
		 */
		if (sock != NULL)
			isc_socket_detach(&sock);
		destroy_dispsocket(disp, &dispsock);
	}

	return (result);
}

/*%
 * Destroy a dedicated dispatch socket.
 */
static void
destroy_dispsocket(dns_dispatch_t *disp, dispsocket_t **dispsockp) {
	dispsocket_t *dispsock;
789
	dns_qid_t *qid;
790
791
792
793
794
795
796
797
798
799
800

	/*
	 * The dispatch must be locked.
	 */

	REQUIRE(dispsockp != NULL && *dispsockp != NULL);
	dispsock = *dispsockp;
	REQUIRE(!ISC_LINK_LINKED(dispsock, link));

	disp->nsockets--;
	dispsock->magic = 0;
801
802
	if (dispsock->portentry != NULL)
		deref_portentry(disp, &dispsock->portentry);
803
804
	if (dispsock->socket != NULL)
		isc_socket_detach(&dispsock->socket);
805
806
807
808
809
810
811
	if (ISC_LINK_LINKED(dispsock, blink)) {
		qid = DNS_QID(disp);
		LOCK(&qid->lock);
		ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
				blink);
		UNLOCK(&qid->lock);
	}
812
813
814
815
816
817
818
819
820
821
822
823
824
	if (dispsock->task != NULL)
		isc_task_detach(&dispsock->task);
	isc_mempool_put(disp->mgr->spool, dispsock);

	*dispsockp = NULL;
}

/*%
 * Deactivate a dedicated dispatch socket.  Move it to the inactive list for
 * future reuse unless the total number of sockets are exceeding the maximum.
 */
static void
deactivate_dispsocket(dns_dispatch_t *disp, dispsocket_t *dispsock) {
825
	isc_result_t result;
826
	dns_qid_t *qid;
827

828
829
830
831
832
833
834
835
836
	/*
	 * The dispatch must be locked.
	 */
	ISC_LIST_UNLINK(disp->activesockets, dispsock, link);
	if (dispsock->resp != NULL) {
		INSIST(dispsock->resp->dispsocket == dispsock);
		dispsock->resp->dispsocket = NULL;
	}

837
838
839
	INSIST(dispsock->portentry != NULL);
	deref_portentry(disp, &dispsock->portentry);

840
841
842
	if (disp->nsockets > DNS_DISPATCH_POOLSOCKS)
		destroy_dispsocket(disp, &dispsock);
	else {
843
		result = isc_socket_close(dispsock->socket);
844
845
846
847
848
849
850

		qid = DNS_QID(disp);
		LOCK(&qid->lock);
		ISC_LIST_UNLINK(qid->sock_table[dispsock->bucket], dispsock,
				blink);
		UNLOCK(&qid->lock);

851
852
853
854
855
856
857
858
859
860
861
		if (result == ISC_R_SUCCESS)
			ISC_LIST_APPEND(disp->inactivesockets, dispsock, link);
		else {
			/*
			 * If the underlying system does not allow this
			 * optimization, destroy this temporary structure (and
			 * create a new one for a new transaction).
			 */
			INSIST(result == ISC_R_NOTIMPLEMENTED);
			destroy_dispsocket(disp, &dispsock);
		}
862
863
	}
}
864

865
/*
866
 * Find an entry for query ID 'id', socket address 'dest', and port number
867
 * 'port'.
868
869
 * Return NULL if no such entry exists.
 */
Michael Graff's avatar
Michael Graff committed
870
static dns_dispentry_t *
871
entry_search(dns_qid_t *qid, const isc_sockaddr_t *dest, dns_messageid_t id,
872
	     in_port_t port, unsigned int bucket)
873
{
Michael Graff's avatar
Michael Graff committed
874
	dns_dispentry_t *res;
875

Evan Hunt's avatar
Evan Hunt committed
876
	REQUIRE(VALID_QID(qid));
877
	REQUIRE(bucket < qid->qid_nbuckets);
878

879
	res = ISC_LIST_HEAD(qid->qid_table[bucket]);
880
881

	while (res != NULL) {
882
		if (res->id == id && isc_sockaddr_equal(dest, &res->host) &&
883
		    res->port == port) {
884
			return (res);
885
		}
886
887
888
889
890
891
		res = ISC_LIST_NEXT(res, link);
	}

	return (NULL);
}

Michael Graff's avatar
Michael Graff committed
892
static void
893
free_buffer(dns_dispatch_t *disp, void *buf, unsigned int len) {
894
	isc_mempool_t *bpool;
Michael Graff's avatar
Michael Graff committed
895
	INSIST(buf != NULL && len != 0);
896

Michael Graff's avatar
Michael Graff committed
897

898
	switch (disp->socktype) {
899
	case isc_sockettype_tcp:
900
901
		INSIST(disp->tcpbuffers > 0);
		disp->tcpbuffers--;
902
		isc_mem_put(disp->mgr->mctx, buf, len);
903
		break;
904
	case isc_sockettype_udp:
905
906
907
908
		LOCK(&disp->mgr->buffer_lock);
		INSIST(disp->mgr->buffers > 0);
		INSIST(len == disp->mgr->buffersize);
		disp->mgr->buffers--;
909
		bpool = disp->mgr->bpool;
910
		UNLOCK(&disp->mgr->buffer_lock);
911
		isc_mempool_put(bpool, buf);
912
913
		break;
	default:
Michael Graff's avatar
Michael Graff committed
914
		INSIST(0);
915
916
		break;
	}
Michael Graff's avatar
Michael Graff committed
917
918
919
}

static void *
920
allocate_udp_buffer(dns_dispatch_t *disp) {
921
	isc_mempool_t *bpool;
Michael Graff's avatar
Michael Graff committed
922
923
	void *temp;

924
	LOCK(&disp->mgr->buffer_lock);
925
926
	bpool = disp->mgr->bpool;
	disp->mgr->buffers++;
927
	UNLOCK(&disp->mgr->buffer_lock);
Michael Graff's avatar
Michael Graff committed
928

929
930
931
932
933
934
935
936
	temp = isc_mempool_get(bpool);

	if (temp == NULL) {
		LOCK(&disp->mgr->buffer_lock);
		disp->mgr->buffers--;
		UNLOCK(&disp->mgr->buffer_lock);
	}

Michael Graff's avatar
Michael Graff committed
937
938
939
940
	return (temp);
}

static inline void
941
942
943
944
945
946
947
free_sevent(isc_event_t *ev) {
	isc_mempool_t *pool = ev->ev_destroy_arg;
	isc_socketevent_t *sev = (isc_socketevent_t *) ev;
	isc_mempool_put(pool, sev);
}

static inline isc_socketevent_t *
948
allocate_sevent(dns_dispatch_t *disp, isc_socket_t *sock,
Tinderbox User's avatar
Tinderbox User committed
949
		isc_eventtype_t type, isc_taskaction_t action, const void *arg)
950
951
952
953
954
955
956
957
958
{
	isc_socketevent_t *ev;
	void *deconst_arg;

	ev = isc_mempool_get(disp->sepool);
	if (ev == NULL)
		return (NULL);
	DE_CONST(arg, deconst_arg);
	ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, type,
959
		       action, deconst_arg, sock,
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
		       free_sevent, disp->sepool);
	ev->result = ISC_R_UNSET;
	ISC_LINK_INIT(ev, ev_link);
	ISC_LIST_INIT(ev->bufferlist);
	ev->region.base = NULL;
	ev->n = 0;
	ev->offset = 0;
	ev->attributes = 0;

	return (ev);
}


static inline void
free_devent(dns_dispatch_t *disp, dns_dispatchevent_t *ev) {
Michael Graff's avatar
Michael Graff committed
975
976
977
	if (disp->failsafe_ev == ev) {
		INSIST(disp->shutdown_out == 1);
		disp->shutdown_out = 0;
978

Michael Graff's avatar
Michael Graff committed
979
980
981
		return;
	}

982
	isc_mempool_put(disp->mgr->depool, ev);
Michael Graff's avatar
Michael Graff committed
983
984
985
}

static inline dns_dispatchevent_t *
986
allocate_devent(dns_dispatch_t *disp) {
Michael Graff's avatar
Michael Graff committed
987
988
	dns_dispatchevent_t *ev;

989
	ev = isc_mempool_get(disp->mgr->depool);
990
991
	if (ev == NULL)
		return (NULL);
992
993
	ISC_EVENT_INIT(ev, sizeof(*ev), 0, NULL, 0,
		       NULL, NULL, NULL, NULL, NULL);
Michael Graff's avatar
Michael Graff committed
994
995
996
997

	return (ev);
}

998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
static void
udp_exrecv(isc_task_t *task, isc_event_t *ev) {
	dispsocket_t *dispsock = ev->ev_arg;

	UNUSED(task);

	REQUIRE(VALID_DISPSOCK(dispsock));
	udp_recv(ev, dispsock->disp, dispsock);
}

static void
udp_shrecv(isc_task_t *task, isc_event_t *ev) {
	dns_dispatch_t *disp = ev->ev_arg;

	UNUSED(task);

	REQUIRE(VALID_DISPATCH(disp));
	udp_recv(ev, disp, NULL);
}

Michael Graff's avatar
Michael Graff committed
1018
1019
1020
/*
 * General flow:
 *
1021
 * If I/O result == CANCELED or error, free the buffer.
Michael Graff's avatar
Michael Graff committed
1022
 *
1023
 * If query, free the buffer, restart.
Michael Graff's avatar
Michael Graff committed
1024
1025
1026
1027
1028
1029
1030
1031
 *
 * If response:
 *	Allocate event, fill in details.
 *		If cannot allocate, free buffer, restart.
 *	find target.  If not found, free buffer, restart.
 *	if event queue is not empty, queue.  else, send.
 *	restart.
 */
1032
static void
1033
udp_recv(isc_event_t *ev_in, dns_dispatch_t *disp, dispsocket_t *dispsock) {
1034
1035
	isc_socketevent_t *ev = (isc_socketevent_t *)ev_in;
	dns_messageid_t id;
Michael Graff's avatar
Michael Graff committed
1036
	isc_result_t dres;
1037
1038
	isc_buffer_t source;
	unsigned int flags;
1039
	dns_dispentry_t *resp = NULL;
Michael Graff's avatar
Michael Graff committed
1040
1041
	dns_dispatchevent_t *rev;
	unsigned int bucket;
Michael Graff's avatar
Michael Graff committed
1042
	isc_boolean_t killit;
Michael Graff's avatar
Michael Graff committed
1043
	isc_boolean_t queue_response;
1044
	dns_dispatchmgr_t *mgr;
1045
	dns_qid_t *qid;
1046
1047
	isc_netaddr_t netaddr;
	int match;
1048
1049
	int result;
	isc_boolean_t qidlocked = ISC_FALSE;
1050
1051
1052

	LOCK(&disp->lock);

1053
	mgr = disp->mgr;
1054
	qid = mgr->qid;
1055

1056
	dispatch_log(disp, LVL(90),
1057
		     "got packet: requests %d, buffers %d, recvs %d",
1058
		     disp->requests, disp->mgr->buffers, disp->recv_pending);
Michael Graff's avatar
Michael Graff committed
1059

1060
	if (dispsock == NULL && ev->ev_type == ISC_SOCKEVENT_RECVDONE) {
1061
1062
1063
1064
1065
		/*
		 * Unless the receive event was imported from a listening
		 * interface, in which case the event type is
		 * DNS_EVENT_IMPORTRECVDONE, receive operation must be pending.
		 */
1066
1067
1068
		INSIST(disp->recv_pending != 0);
		disp->recv_pending = 0;
	}
Michael Graff's avatar
Michael Graff committed
1069

1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
	if (dispsock != NULL &&
	    (ev->result == ISC_R_CANCELED || dispsock->resp == NULL)) {
		/*
		 * dispsock->resp can be NULL if this transaction was canceled
		 * just after receiving a response.  Since this socket is
		 * exclusively used and there should be at most one receive
		 * event the canceled event should have been no effect.  So
		 * we can (and should) deactivate the socket right now.
		 */
		deactivate_dispsocket(disp, dispsock);
		dispsock = NULL;
	}

1083
	if (disp->shutting_down) {
1084
1085
1086
1087
1088
		/*
		 * This dispatcher is shutting down.
		 */
		free_buffer(disp, ev->region.base, ev->region.length);

1089
1090
		isc_event_free(&ev_in);
		ev = NULL;
1091

1092
		killit = destroy_disp_ok(disp);
1093
		UNLOCK(&disp->lock);
1094
		if (killit)
1095
			isc_task_send(disp->task[0], &disp->ctlevent);
1096
1097
1098
1099

		return;
	}

1100
1101
	if ((disp->attributes & DNS_DISPATCHATTR_EXCLUSIVE) != 0) {
		if (dispsock != NULL) {
Automatic Updater's avatar
Automatic Updater committed
1102
			resp = dispsock->resp;
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
			id = resp->id;
			if (ev->result != ISC_R_SUCCESS) {
				/*
				 * This is most likely a network error on a
				 * connected socket.  It makes no sense to
				 * check the address or parse the packet, but it
				 * will help to return the error to the caller.
				 */
				goto sendresponse;
			}
		} else {
Mark Andrews's avatar
Mark Andrews committed
1114
1115
			free_buffer(disp, ev->region.base, ev->region.length);

1116
			isc_event_free(&ev_in);
1117
			UNLOCK(&disp->lock);
1118
			return;
1119
1120
		}
	} else if (ev->result != ISC_R_SUCCESS) {
1121
1122
		free_buffer(disp, ev->region.base, ev->region.length);

1123
1124
1125
1126
		if (ev->result != ISC_R_CANCELED)
			dispatch_log(disp, ISC_LOG_ERROR,
				     "odd socket result in udp_recv(): %s",
				     isc_result_totext(ev->result));
1127

1128
		isc_event_free(&ev_in);
1129
		UNLOCK(&disp->lock);
1130
		return;
1131
1132
	}

1133
1134
1135
1136
	/*
	 * If this is from a blackholed address, drop it.
	 */
	isc_netaddr_fromsockaddr(&netaddr, &ev->address);
1137
1138
	if (disp->mgr->blackhole != NULL &&
	    dns_acl_match(&netaddr, NULL, disp->mgr->blackhole,
1139
			  NULL, &match, NULL) == ISC_R_SUCCESS &&
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
	    match > 0)
	{
		if (isc_log_wouldlog(dns_lctx, LVL(10))) {
			char netaddrstr[ISC_NETADDR_FORMATSIZE];
			isc_netaddr_format(&netaddr, netaddrstr,
					   sizeof(netaddrstr));
			dispatch_log(disp, LVL(10),
				     "blackholed packet from %s",
				     netaddrstr);
		}
		free_buffer(disp, ev->region.base, ev->region.length);
		goto restart;
	}

1154
1155
1156
	/*
	 * Peek into the buffer to see what we can see.
	 */
1157
	isc_buffer_init(&source, ev->region.base, ev->region.length);
Michael Graff's avatar
Michael Graff committed
1158
	isc_buffer_add(&source, ev->n);
1159
	dres = dns_message_peekheader(&source, &id, &flags);
1160
	if (dres != ISC_R_SUCCESS) {
Michael Graff's avatar
Michael Graff committed
1161
		free_buffer(disp, ev->region.base, ev->region.length);
1162
		dispatch_log(disp, LVL(10), "got garbage packet");
1163
1164
1165
		goto restart;
	}

1166
	dispatch_log(disp, LVL(92),
1167
		     "got valid DNS message header, /QR %c, id %u",
1168
		     ((flags & DNS_MESSAGEFLAG_QR) ? '1' : '0'), id);
Michael Graff's avatar
Michael Graff committed
1169

1170
	/*
1171
1172
	 * Look at flags.  If query, drop it. If response,
	 * look to see where it goes.
1173
	 */
Michael Graff's avatar
Michael Graff committed
1174
	if ((flags & DNS_MESSAGEFLAG_QR) == 0) {
Michael Graff's avatar
Michael Graff committed
1175
		/* query */
1176
1177
		free_buffer(disp, ev->region.base, ev->region.length);
		goto restart;
1178
1179
	}

1180
1181
1182
1183
1184
	/*
	 * Search for the corresponding response.  If we are using an exclusive
	 * socket, we've already identified it and we can skip the search; but
	 * the ID and the address must match the expected ones.
	 */
1185
	if (resp == NULL) {
1186
1187
1188
		bucket = dns_hash(qid, &ev->address, id, disp->localport);
		LOCK(&qid->lock);
		qidlocked = ISC_TRUE;
1189
1190
		resp = entry_search(qid, &ev->address, id, disp->localport,
				    bucket);
1191
1192
1193
1194
1195
		dispatch_log(disp, LVL(90),
			     "search for response in bucket %d: %s",
			     bucket, (resp == NULL ? "not found" : "found"));

		if (resp == NULL) {
1196
			inc_stats(mgr, dns_resstatscounter_mismatch);
1197
1198
1199
1200
1201
1202
1203
			free_buffer(disp, ev->region.base, ev->region.length);
			goto unlock;
		}
	} else if (resp->id != id || !isc_sockaddr_equal(&ev->address,
							 &resp->host)) {
		dispatch_log(disp, LVL(90),
			     "response to an exclusive socket doesn't match");
1204
		inc_stats(mgr, dns_resstatscounter_mismatch);
1205
1206
		free_buffer(disp, ev->region.base, ev->region.length);
		goto unlock;
1207
	}
1208