rbtdb.c 262 KB
Newer Older
Bob Halley's avatar
Bob Halley committed
1
/*
Tinderbox User's avatar
Tinderbox User committed
2
 * Copyright (C) 2004-2014  Internet Systems Consortium, Inc. ("ISC")
Mark Andrews's avatar
Mark Andrews committed
3
 * Copyright (C) 1999-2003  Internet Software Consortium.
4
 *
Automatic Updater's avatar
Automatic Updater committed
5
 * Permission to use, copy, modify, and/or distribute this software for any
Bob Halley's avatar
Bob Halley committed
6
7
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
8
 *
Mark Andrews's avatar
Mark Andrews committed
9
10
11
12
13
14
15
 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
 * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
 * PERFORMANCE OF THIS SOFTWARE.
Bob Halley's avatar
Bob Halley committed
16
17
 */

18
/*! \file */
David Lawrence's avatar
David Lawrence committed
19

20
21
22
23
/*
 * Principal Author: Bob Halley
 */

24
25
#include <config.h>

Mark Andrews's avatar
Mark Andrews committed
26
/* #define inline */
27

Mark Andrews's avatar
Mark Andrews committed
28
29
30
31
#ifdef HAVE_INTTYPES_H
#include <inttypes.h> /* uintptr_t */
#endif

32
#include <isc/crc64.h>
33
#include <isc/event.h>
34
#include <isc/heap.h>
35
#include <isc/file.h>
Evan Hunt's avatar
Evan Hunt committed
36
#include <isc/hex.h>
37
#include <isc/mem.h>
38
#include <isc/mutex.h>
39
#include <isc/platform.h>
40
#include <isc/print.h>
41
#include <isc/random.h>
42
#include <isc/refcount.h>
Bob Halley's avatar
Bob Halley committed
43
#include <isc/rwlock.h>
44
#include <isc/serial.h>
45
46
#include <isc/socket.h>
#include <isc/stdio.h>
47
#include <isc/string.h>
48
#include <isc/task.h>
49
#include <isc/time.h>
Michael Graff's avatar
Michael Graff committed
50
#include <isc/util.h>
Bob Halley's avatar
Bob Halley committed
51

52
#include <dns/acache.h>
53
#include <dns/callbacks.h>
54
55
#include <dns/db.h>
#include <dns/dbiterator.h>
56
#include <dns/events.h>
57
#include <dns/fixedname.h>
58
#include <dns/lib.h>
59
#include <dns/log.h>
60
#include <dns/masterdump.h>
61
#include <dns/nsec.h>
62
#include <dns/nsec3.h>
63
#include <dns/rbt.h>
64
#include <dns/rpz.h>
65
#include <dns/rdata.h>
Bob Halley's avatar
Bob Halley committed
66
67
#include <dns/rdataset.h>
#include <dns/rdatasetiter.h>
68
#include <dns/rdataslab.h>
69
#include <dns/rdatastruct.h>
70
#include <dns/result.h>
71
#include <dns/stats.h>
72
#include <dns/version.h>
73
74
#include <dns/view.h>
#include <dns/zone.h>
75
#include <dns/zonekey.h>
Bob Halley's avatar
Bob Halley committed
76

Evan Hunt's avatar
Evan Hunt committed
77
#ifndef WIN32
78
#include <sys/mman.h>
Evan Hunt's avatar
Evan Hunt committed
79
80
81
82
83
84
#else
#define PROT_READ	0x01
#define PROT_WRITE	0x02
#define MAP_PRIVATE	0x0002
#define MAP_FAILED	((void *)-1)
#endif
85

86
87
88
#ifdef DNS_RBTDB_VERSION64
#include "rbtdb64.h"
#else
Bob Halley's avatar
Bob Halley committed
89
#include "rbtdb.h"
90
#endif
Bob Halley's avatar
Bob Halley committed
91

92
#ifdef DNS_RBTDB_VERSION64
93
#define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '8')
94
#else
95
#define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '4')
96
97
#endif

98
99
100
101
102
103
#define CHECK(op) \
	do { result = (op); \
		if (result != ISC_R_SUCCESS) goto failure; \
	} while (0)

/*
Evan Hunt's avatar
Evan Hunt committed
104
 * This is the map file header for RBTDB images.  It is populated, and then
105
106
107
108
109
110
111
112
113
114
 * written, as the LAST thing done to the file.  Writing this last (with
 * zeros in the header area initially) will ensure that the header is only
 * valid when the RBTDB image is also valid.
 */
typedef struct rbtdb_file_header rbtdb_file_header_t;

/* Pad to 32 bytes */
static char FILE_VERSION[32] = "\0";

/* Header length, always the same size regardless of structure size */
115
#define RBTDB_HEADER_LENGTH	1024
116
117
118
119
120
121
122
123
124
125
126
127
128

struct rbtdb_file_header {
	char version1[32];
	isc_uint32_t ptrsize;
	unsigned int bigendian:1;
	isc_uint64_t tree;
	isc_uint64_t nsec;
	isc_uint64_t nsec3;

	char version2[32];  		/* repeated; must match version1 */
};


129
/*%
130
131
132
 * Note that "impmagic" is not the first four bytes of the struct, so
 * ISC_MAGIC_VALID cannot be used.
 */
133
#define VALID_RBTDB(rbtdb)      ((rbtdb) != NULL && \
Automatic Updater's avatar
Automatic Updater committed
134
				 (rbtdb)->common.impmagic == RBTDB_MAGIC)
Bob Halley's avatar
Bob Halley committed
135

136
#ifdef DNS_RBTDB_VERSION64
137
typedef isc_uint64_t                    rbtdb_serial_t;
138
/*%
139
140
141
142
143
144
 * Make casting easier in symbolic debuggers by using different names
 * for the 64 bit version.
 */
#define dns_rbtdb_t dns_rbtdb64_t
#define rdatasetheader_t rdatasetheader64_t
#define rbtdb_version_t rbtdb_version64_t
145
#else
146
typedef isc_uint32_t                    rbtdb_serial_t;
147
148
#endif

149
typedef isc_uint32_t                    rbtdb_rdatatype_t;
Bob Halley's avatar
Bob Halley committed
150

151
152
#define RBTDB_RDATATYPE_BASE(type)      ((dns_rdatatype_t)((type) & 0xFFFF))
#define RBTDB_RDATATYPE_EXT(type)       ((dns_rdatatype_t)((type) >> 16))
153
#define RBTDB_RDATATYPE_VALUE(b, e)     ((rbtdb_rdatatype_t)((e) << 16) | (b))
Bob Halley's avatar
Bob Halley committed
154

155
#define RBTDB_RDATATYPE_SIGNSEC \
Automatic Updater's avatar
Automatic Updater committed
156
		RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
157
158
#define RBTDB_RDATATYPE_SIGNSEC3 \
		RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
Bob Halley's avatar
Bob Halley committed
159
#define RBTDB_RDATATYPE_SIGNS \
Automatic Updater's avatar
Automatic Updater committed
160
		RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
Bob Halley's avatar
Bob Halley committed
161
#define RBTDB_RDATATYPE_SIGCNAME \
Automatic Updater's avatar
Automatic Updater committed
162
		RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
163
#define RBTDB_RDATATYPE_SIGDNAME \
Automatic Updater's avatar
Automatic Updater committed
164
		RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
165
166
#define RBTDB_RDATATYPE_SIGDDS \
		RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ds)
167
#define RBTDB_RDATATYPE_NCACHEANY \
Automatic Updater's avatar
Automatic Updater committed
168
		RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
Bob Halley's avatar
Bob Halley committed
169

170
/*
171
 * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
172
 * Using rwlock is effective with regard to lookup performance only when
173
 * it is implemented in an efficient way.
174
175
176
177
 * Otherwise, it is generally wise to stick to the simple locking since rwlock
 * would require more memory or can even make lookups slower due to its own
 * overhead (when it internally calls mutex locks).
 */
178
#ifdef ISC_RWLOCK_USEATOMIC
179
180
181
182
183
184
#define DNS_RBTDB_USERWLOCK 1
#else
#define DNS_RBTDB_USERWLOCK 0
#endif

#if DNS_RBTDB_USERWLOCK
185
186
187
188
#define RBTDB_INITLOCK(l)       isc_rwlock_init((l), 0, 0)
#define RBTDB_DESTROYLOCK(l)    isc_rwlock_destroy(l)
#define RBTDB_LOCK(l, t)        RWLOCK((l), (t))
#define RBTDB_UNLOCK(l, t)      RWUNLOCK((l), (t))
189
#else
190
191
192
193
#define RBTDB_INITLOCK(l)       isc_mutex_init(l)
#define RBTDB_DESTROYLOCK(l)    DESTROYLOCK(l)
#define RBTDB_LOCK(l, t)        LOCK(l)
#define RBTDB_UNLOCK(l, t)      UNLOCK(l)
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
#endif

/*
 * Since node locking is sensitive to both performance and memory footprint,
 * we need some trick here.  If we have both high-performance rwlock and
 * high performance and small-memory reference counters, we use rwlock for
 * node lock and isc_refcount for node references.  In this case, we don't have
 * to protect the access to the counters by locks.
 * Otherwise, we simply use ordinary mutex lock for node locking, and use
 * simple integers as reference counters which is protected by the lock.
 * In most cases, we can simply use wrapper macros such as NODE_LOCK and
 * NODE_UNLOCK.  In some other cases, however, we need to protect reference
 * counters first and then protect other parts of a node as read-only data.
 * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
 * provided for these special cases.  When we can use the efficient backend
 * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
 * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
 * section including the access to the reference counter.
 * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
 * section is also protected by NODE_STRONGLOCK().
 */
#if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
typedef isc_rwlock_t nodelock_t;

218
219
220
221
222
223
224
225
226
227
228
#define NODE_INITLOCK(l)        isc_rwlock_init((l), 0, 0)
#define NODE_DESTROYLOCK(l)     isc_rwlock_destroy(l)
#define NODE_LOCK(l, t)         RWLOCK((l), (t))
#define NODE_UNLOCK(l, t)       RWUNLOCK((l), (t))
#define NODE_TRYUPGRADE(l)      isc_rwlock_tryupgrade(l)

#define NODE_STRONGLOCK(l)      ((void)0)
#define NODE_STRONGUNLOCK(l)    ((void)0)
#define NODE_WEAKLOCK(l, t)     NODE_LOCK(l, t)
#define NODE_WEAKUNLOCK(l, t)   NODE_UNLOCK(l, t)
#define NODE_WEAKDOWNGRADE(l)   isc_rwlock_downgrade(l)
229
230
231
#else
typedef isc_mutex_t nodelock_t;

232
233
234
235
236
237
238
239
240
241
242
#define NODE_INITLOCK(l)        isc_mutex_init(l)
#define NODE_DESTROYLOCK(l)     DESTROYLOCK(l)
#define NODE_LOCK(l, t)         LOCK(l)
#define NODE_UNLOCK(l, t)       UNLOCK(l)
#define NODE_TRYUPGRADE(l)      ISC_R_SUCCESS

#define NODE_STRONGLOCK(l)      LOCK(l)
#define NODE_STRONGUNLOCK(l)    UNLOCK(l)
#define NODE_WEAKLOCK(l, t)     ((void)0)
#define NODE_WEAKUNLOCK(l, t)   ((void)0)
#define NODE_WEAKDOWNGRADE(l)   ((void)0)
243
244
#endif

245
246
247
248
249
250
251
252
253
/*%
 * Whether to rate-limit updating the LRU to avoid possible thread contention.
 * Our performance measurement has shown the cost is marginal, so it's defined
 * to be 0 by default either with or without threads.
 */
#ifndef DNS_RBTDB_LIMITLRUUPDATE
#define DNS_RBTDB_LIMITLRUUPDATE 0
#endif

254
/*
255
 * Allow clients with a virtual time of up to 5 minutes in the past to see
256
257
258
259
 * records that would have otherwise have expired.
 */
#define RBTDB_VIRTUAL 300

260
struct noqname {
261
262
263
264
	dns_name_t 	name;
	void *     	neg;
	void *     	negsig;
	dns_rdatatype_t	type;
265
266
};

267
typedef struct acachectl acachectl_t;
268

Bob Halley's avatar
Bob Halley committed
269
typedef struct rdatasetheader {
Automatic Updater's avatar
Automatic Updater committed
270
271
272
273
274
275
276
277
278
	/*%
	 * Locked by the owning node's lock.
	 */
	rbtdb_serial_t                  serial;
	dns_ttl_t                       rdh_ttl;
	rbtdb_rdatatype_t               type;
	isc_uint16_t                    attributes;
	dns_trust_t                     trust;
	struct noqname                  *noqname;
279
	struct noqname                  *closest;
280
281
282
	unsigned int 			is_mmapped : 1;
	unsigned int 			next_is_relative : 1;
	unsigned int 			node_is_relative : 1;
Automatic Updater's avatar
Automatic Updater committed
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
	/*%<
	 * We don't use the LIST macros, because the LIST structure has
	 * both head and tail pointers, and is doubly linked.
	 */

	struct rdatasetheader           *next;
	/*%<
	 * If this is the top header for an rdataset, 'next' points
	 * to the top header for the next rdataset (i.e., the next type).
	 * Otherwise, it points up to the header whose down pointer points
	 * at this header.
	 */

	struct rdatasetheader           *down;
	/*%<
	 * Points to the header for the next older version of
	 * this rdataset.
	 */

	isc_uint32_t                    count;
	/*%<
	 * Monotonously increased every time this rdataset is bound so that
	 * it is used as the base of the starting point in DNS responses
	 * when the "cyclic" rrset-order is required.  Since the ordering
	 * should not be so crucial, no lock is set for the counter for
	 * performance reasons.
	 */

	acachectl_t                     *additional_auth;
	acachectl_t                     *additional_glue;

	dns_rbtnode_t                   *node;
	isc_stdtime_t                   last_used;
316
	ISC_LINK(struct rdatasetheader) link;
Automatic Updater's avatar
Automatic Updater committed
317
318
319
320
321

	unsigned int                    heap_index;
	/*%<
	 * Used for TTL-based cache cleaning.
	 */
Automatic Updater's avatar
Automatic Updater committed
322
	isc_stdtime_t                   resign;
Bob Halley's avatar
Bob Halley committed
323
324
} rdatasetheader_t;

325
326
327
328
329
330
331
332
typedef ISC_LIST(rdatasetheader_t)      rdatasetheaderlist_t;
typedef ISC_LIST(dns_rbtnode_t)         rbtnodelist_t;

#define RDATASET_ATTR_NONEXISTENT       0x0001
#define RDATASET_ATTR_STALE             0x0002
#define RDATASET_ATTR_IGNORE            0x0004
#define RDATASET_ATTR_RETAIN            0x0008
#define RDATASET_ATTR_NXDOMAIN          0x0010
333
#define RDATASET_ATTR_RESIGN            0x0020
334
#define RDATASET_ATTR_STATCOUNT         0x0040
335
#define RDATASET_ATTR_OPTOUT            0x0080
336
#define RDATASET_ATTR_NEGATIVE          0x0100
337
#define RDATASET_ATTR_PREFETCH          0x0200
Michael Graff's avatar
Michael Graff committed
338

339
typedef struct acache_cbarg {
Automatic Updater's avatar
Automatic Updater committed
340
341
342
343
344
	dns_rdatasetadditional_t        type;
	unsigned int                    count;
	dns_db_t                        *db;
	dns_dbnode_t                    *node;
	rdatasetheader_t                *header;
345
346
347
} acache_cbarg_t;

struct acachectl {
Automatic Updater's avatar
Automatic Updater committed
348
349
	dns_acacheentry_t               *entry;
	acache_cbarg_t                  *cbarg;
350
351
};

Michael Graff's avatar
Michael Graff committed
352
353
354
355
356
357
358
/*
 * XXX
 * When the cache will pre-expire data (due to memory low or other
 * situations) before the rdataset's TTL has expired, it MUST
 * respect the RETAIN bit and not expire the data until its TTL is
 * expired.
 */
359

360
#undef IGNORE                   /* WIN32 winbase.h defines this. */
361

362
#define EXISTS(header) \
Automatic Updater's avatar
Automatic Updater committed
363
	(((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
364
#define NONEXISTENT(header) \
Automatic Updater's avatar
Automatic Updater committed
365
	(((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
366
#define IGNORE(header) \
Automatic Updater's avatar
Automatic Updater committed
367
	(((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
Michael Graff's avatar
Michael Graff committed
368
#define RETAIN(header) \
Automatic Updater's avatar
Automatic Updater committed
369
	(((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
370
#define NXDOMAIN(header) \
Automatic Updater's avatar
Automatic Updater committed
371
	(((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
372
#define RESIGN(header) \
Automatic Updater's avatar
Automatic Updater committed
373
	(((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
374
375
#define OPTOUT(header) \
	(((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
376
377
#define NEGATIVE(header) \
	(((header)->attributes & RDATASET_ATTR_NEGATIVE) != 0)
378
379
#define PREFETCH(header) \
	(((header)->attributes & RDATASET_ATTR_PREFETCH) != 0)
380

381
#define DEFAULT_NODE_LOCK_COUNT         7       /*%< Should be prime. */
382
383
384
385
386
387
388
389
390
391
392
393
394

/*%
 * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
 * There is a tradeoff issue about configuring this value: if this is too
 * small, it may cause heavier contention between threads; if this is too large,
 * LRU purge algorithm won't work well (entries tend to be purged prematurely).
 * The default value should work well for most environments, but this can
 * also be configurable at compilation time via the
 * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable.  This value must be larger than
 * 1 due to the assumption of overmem_purge().
 */
#ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
#if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
395
#error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger than 1"
396
397
398
399
400
401
#else
#define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
#endif
#else
#define DEFAULT_CACHE_NODE_LOCK_COUNT   16
#endif	/* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
Bob Halley's avatar
Bob Halley committed
402
403

typedef struct {
Automatic Updater's avatar
Automatic Updater committed
404
405
406
407
408
	nodelock_t                      lock;
	/* Protected in the refcount routines. */
	isc_refcount_t                  references;
	/* Locked by lock. */
	isc_boolean_t                   exiting;
409
410
411
} rbtdb_nodelock_t;

typedef struct rbtdb_changed {
Automatic Updater's avatar
Automatic Updater committed
412
413
414
	dns_rbtnode_t *                 node;
	isc_boolean_t                   dirty;
	ISC_LINK(struct rbtdb_changed)  link;
415
416
} rbtdb_changed_t;

417
typedef ISC_LIST(rbtdb_changed_t)       rbtdb_changedlist_t;
418

419
420
421
422
423
424
typedef enum {
	dns_db_insecure,
	dns_db_partial,
	dns_db_secure
} dns_db_secure_t;

425
426
typedef struct dns_rbtdb dns_rbtdb_t;

427
428
429
430
431
432
433
/* Reason for expiring a record from cache */
typedef enum {
	expire_lru,
	expire_ttl,
	expire_flush
} expire_t;

434
typedef struct rbtdb_version {
Automatic Updater's avatar
Automatic Updater committed
435
436
	/* Not locked */
	rbtdb_serial_t                  serial;
437
	dns_rbtdb_t *			rbtdb;
Automatic Updater's avatar
Automatic Updater committed
438
439
440
441
442
443
444
445
446
447
	/*
	 * Protected in the refcount routines.
	 * XXXJT: should we change the lock policy based on the refcount
	 * performance?
	 */
	isc_refcount_t                  references;
	/* Locked by database lock. */
	isc_boolean_t                   writer;
	isc_boolean_t                   commit_ok;
	rbtdb_changedlist_t             changed_list;
448
	rdatasetheaderlist_t		resigned_list;
Automatic Updater's avatar
Automatic Updater committed
449
	ISC_LINK(struct rbtdb_version)  link;
450
	dns_db_secure_t			secure;
451
452
453
454
455
456
	isc_boolean_t			havensec3;
	/* NSEC3 parameters */
	dns_hash_t			hash;
	isc_uint8_t			flags;
	isc_uint16_t			iterations;
	isc_uint8_t			salt_length;
457
	unsigned char			salt[DNS_NSEC3_SALTSIZE];
458
459
} rbtdb_version_t;

460
461
typedef ISC_LIST(rbtdb_version_t)       rbtdb_versionlist_t;

462
struct dns_rbtdb {
Automatic Updater's avatar
Automatic Updater committed
463
464
	/* Unlocked. */
	dns_db_t                        common;
465
	/* Locks the data in this struct */
466
#if DNS_RBTDB_USERWLOCK
Automatic Updater's avatar
Automatic Updater committed
467
	isc_rwlock_t                    lock;
468
#else
Automatic Updater's avatar
Automatic Updater committed
469
	isc_mutex_t                     lock;
470
#endif
471
	/* Locks the tree structure (prevents nodes appearing/disappearing) */
Automatic Updater's avatar
Automatic Updater committed
472
	isc_rwlock_t                    tree_lock;
473
	/* Locks for individual tree nodes */
Automatic Updater's avatar
Automatic Updater committed
474
475
476
	unsigned int                    node_lock_count;
	rbtdb_nodelock_t *              node_locks;
	dns_rbtnode_t *                 origin_node;
477
	dns_stats_t *			rrsetstats; /* cache DB only */
478
	isc_stats_t *			cachestats; /* cache DB only */
Automatic Updater's avatar
Automatic Updater committed
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
	/* Locked by lock. */
	unsigned int                    active;
	isc_refcount_t                  references;
	unsigned int                    attributes;
	rbtdb_serial_t                  current_serial;
	rbtdb_serial_t                  least_serial;
	rbtdb_serial_t                  next_serial;
	rbtdb_version_t *               current_version;
	rbtdb_version_t *               future_version;
	rbtdb_versionlist_t             open_versions;
	isc_task_t *                    task;
	dns_dbnode_t                    *soanode;
	dns_dbnode_t                    *nsnode;

	/*
	 * This is a linked list used to implement the LRU cache.  There will
	 * be node_lock_count linked lists here.  Nodes in bucket 1 will be
	 * placed on the linked list rdatasets[1].
	 */
	rdatasetheaderlist_t            *rdatasets;
499

Automatic Updater's avatar
Automatic Updater committed
500
501
	/*%
	 * Temporary storage for stale cache nodes and dynamically deleted
502
	 * nodes that await being cleaned up.
Automatic Updater's avatar
Automatic Updater committed
503
	 */
Automatic Updater's avatar
Automatic Updater committed
504
505
506
	rbtnodelist_t                   *deadnodes;

	/*
507
508
509
510
	 * Heaps.  These are used for TTL based expiry in a cache,
	 * or for zone resigning in a zone DB.  hmctx is the memory
	 * context to use for the heap (which differs from the main
	 * database memory context in the case of a cache).
Automatic Updater's avatar
Automatic Updater committed
511
	 */
512
	isc_mem_t *			hmctx;
Automatic Updater's avatar
Automatic Updater committed
513
	isc_heap_t                      **heaps;
Tinderbox User's avatar
Tinderbox User committed
514

515
516
517
518
	/*
	 * Base values for the mmap() code.
	 */
	void *				mmap_location;
519
	size_t				mmap_size;
Automatic Updater's avatar
Automatic Updater committed
520
521
522

	/* Locked by tree_lock. */
	dns_rbt_t *                     tree;
523
	dns_rbt_t *			nsec;
524
	dns_rbt_t *			nsec3;
525
526
527
	dns_rpz_zones_t			*rpzs;
	dns_rpz_num_t			rpz_num;
	dns_rpz_zones_t			*load_rpzs;
Automatic Updater's avatar
Automatic Updater committed
528
529
530

	/* Unlocked */
	unsigned int                    quantum;
531
};
Bob Halley's avatar
Bob Halley committed
532

533
534
#define RBTDB_ATTR_LOADED               0x01
#define RBTDB_ATTR_LOADING              0x02
535

536
/*%
537
538
539
 * Search Context
 */
typedef struct {
Automatic Updater's avatar
Automatic Updater committed
540
541
542
543
544
545
546
547
548
549
550
551
552
	dns_rbtdb_t *           rbtdb;
	rbtdb_version_t *       rbtversion;
	rbtdb_serial_t          serial;
	unsigned int            options;
	dns_rbtnodechain_t      chain;
	isc_boolean_t           copy_name;
	isc_boolean_t           need_cleanup;
	isc_boolean_t           wild;
	dns_rbtnode_t *         zonecut;
	rdatasetheader_t *      zonecut_rdataset;
	rdatasetheader_t *      zonecut_sigrdataset;
	dns_fixedname_t         zonecut_name;
	isc_stdtime_t           now;
553
554
} rbtdb_search_t;

555
/*%
556
557
558
 * Load Context
 */
typedef struct {
Automatic Updater's avatar
Automatic Updater committed
559
560
	dns_rbtdb_t *           rbtdb;
	isc_stdtime_t           now;
561
562
} rbtdb_load_t;

563
static void delete_callback(void *data, void *arg);
Bob Halley's avatar
Bob Halley committed
564
static void rdataset_disassociate(dns_rdataset_t *rdataset);
565
566
static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
Bob Halley's avatar
Bob Halley committed
567
static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
Bob Halley's avatar
Bob Halley committed
568
static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
Bob Halley's avatar
Bob Halley committed
569
static unsigned int rdataset_count(dns_rdataset_t *rdataset);
570
static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
Automatic Updater's avatar
Automatic Updater committed
571
					dns_name_t *name,
572
573
574
575
576
577
					dns_rdataset_t *neg,
					dns_rdataset_t *negsig);
static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
					dns_name_t *name,
					dns_rdataset_t *neg,
					dns_rdataset_t *negsig);
578
static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
Automatic Updater's avatar
Automatic Updater committed
579
580
581
582
583
584
585
586
587
588
					   dns_rdatasetadditional_t type,
					   dns_rdatatype_t qtype,
					   dns_acache_t *acache,
					   dns_zone_t **zonep,
					   dns_db_t **dbp,
					   dns_dbversion_t **versionp,
					   dns_dbnode_t **nodep,
					   dns_name_t *fname,
					   dns_message_t *msg,
					   isc_stdtime_t now);
589
static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
Automatic Updater's avatar
Automatic Updater committed
590
591
592
593
594
595
596
597
					   dns_rdatasetadditional_t type,
					   dns_rdatatype_t qtype,
					   dns_acache_t *acache,
					   dns_zone_t *zone,
					   dns_db_t *db,
					   dns_dbversion_t *version,
					   dns_dbnode_t *node,
					   dns_name_t *fname);
598
static isc_result_t rdataset_putadditional(dns_acache_t *acache,
Automatic Updater's avatar
Automatic Updater committed
599
600
601
					   dns_rdataset_t *rdataset,
					   dns_rdatasetadditional_t type,
					   dns_rdatatype_t qtype);
602
static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
Automatic Updater's avatar
Automatic Updater committed
603
					      isc_stdtime_t now);
604
static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
Automatic Updater's avatar
Automatic Updater committed
605
			  isc_stdtime_t now);
606
static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
607
			  isc_boolean_t tree_locked, expire_t reason);
608
609
static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
			  isc_stdtime_t now, isc_boolean_t tree_locked);
610
611
static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
				  rdatasetheader_t *newheader);
612
static void prune_tree(isc_task_t *task, isc_event_t *event);
613
614
static void rdataset_settrust(dns_rdataset_t *rdataset, dns_trust_t trust);
static void rdataset_expire(dns_rdataset_t *rdataset);
615
616

static dns_rdatasetmethods_t rdataset_methods = {
Automatic Updater's avatar
Automatic Updater committed
617
618
619
620
621
622
623
624
	rdataset_disassociate,
	rdataset_first,
	rdataset_next,
	rdataset_current,
	rdataset_clone,
	rdataset_count,
	NULL,
	rdataset_getnoqname,
625
626
	NULL,
	rdataset_getclosest,
Automatic Updater's avatar
Automatic Updater committed
627
628
	rdataset_getadditional,
	rdataset_setadditional,
629
630
631
	rdataset_putadditional,
	rdataset_settrust,
	rdataset_expire
Bob Halley's avatar
Bob Halley committed
632
633
634
};

static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
635
636
static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
Bob Halley's avatar
Bob Halley committed
637
static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
Automatic Updater's avatar
Automatic Updater committed
638
				 dns_rdataset_t *rdataset);
Bob Halley's avatar
Bob Halley committed
639
640

static dns_rdatasetitermethods_t rdatasetiter_methods = {
Automatic Updater's avatar
Automatic Updater committed
641
642
643
644
	rdatasetiter_destroy,
	rdatasetiter_first,
	rdatasetiter_next,
	rdatasetiter_current
645
646
};

Bob Halley's avatar
Bob Halley committed
647
typedef struct rbtdb_rdatasetiter {
Automatic Updater's avatar
Automatic Updater committed
648
649
	dns_rdatasetiter_t              common;
	rdatasetheader_t *              current;
Bob Halley's avatar
Bob Halley committed
650
651
} rbtdb_rdatasetiter_t;

652
653
654
655
static void             dbiterator_destroy(dns_dbiterator_t **iteratorp);
static isc_result_t     dbiterator_first(dns_dbiterator_t *iterator);
static isc_result_t     dbiterator_last(dns_dbiterator_t *iterator);
static isc_result_t     dbiterator_seek(dns_dbiterator_t *iterator,
Automatic Updater's avatar
Automatic Updater committed
656
					dns_name_t *name);
657
658
659
static isc_result_t     dbiterator_prev(dns_dbiterator_t *iterator);
static isc_result_t     dbiterator_next(dns_dbiterator_t *iterator);
static isc_result_t     dbiterator_current(dns_dbiterator_t *iterator,
Automatic Updater's avatar
Automatic Updater committed
660
661
					   dns_dbnode_t **nodep,
					   dns_name_t *name);
662
663
static isc_result_t     dbiterator_pause(dns_dbiterator_t *iterator);
static isc_result_t     dbiterator_origin(dns_dbiterator_t *iterator,
Automatic Updater's avatar
Automatic Updater committed
664
					  dns_name_t *name);
665
666

static dns_dbiteratormethods_t dbiterator_methods = {
Automatic Updater's avatar
Automatic Updater committed
667
668
669
670
671
672
673
674
675
	dbiterator_destroy,
	dbiterator_first,
	dbiterator_last,
	dbiterator_seek,
	dbiterator_prev,
	dbiterator_next,
	dbiterator_current,
	dbiterator_pause,
	dbiterator_origin
676
677
};

678
679
#define DELETION_BATCH_MAX 64

680
/*
681
 * If 'paused' is ISC_TRUE, then the tree lock is not being held.
682
 */
683
typedef struct rbtdb_dbiterator {
Automatic Updater's avatar
Automatic Updater committed
684
685
686
687
688
689
690
691
	dns_dbiterator_t                common;
	isc_boolean_t                   paused;
	isc_boolean_t                   new_origin;
	isc_rwlocktype_t                tree_locked;
	isc_result_t                    result;
	dns_fixedname_t                 name;
	dns_fixedname_t                 origin;
	dns_rbtnodechain_t              chain;
692
693
	dns_rbtnodechain_t		nsec3chain;
	dns_rbtnodechain_t		*current;
Automatic Updater's avatar
Automatic Updater committed
694
695
696
	dns_rbtnode_t                   *node;
	dns_rbtnode_t                   *deletions[DELETION_BATCH_MAX];
	int                             delete;
697
698
	isc_boolean_t			nsec3only;
	isc_boolean_t			nonsec3;
699
700
701
} rbtdb_dbiterator_t;


702
703
#define IS_STUB(rbtdb)  (((rbtdb)->common.attributes & DNS_DBATTR_STUB)  != 0)
#define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
704

705
static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
Automatic Updater's avatar
Automatic Updater committed
706
		       isc_event_t *event);
707
static void overmem(dns_db_t *db, isc_boolean_t overmem);
708
static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version);
709

710
711
712
/*%
 * 'init_count' is used to initialize 'newheader->count' which inturn
 * is used to determine where in the cycle rrset-order cyclic starts.
Francis Dupont's avatar
Francis Dupont committed
713
 * We don't lock this as we don't care about simultaneous updates.
714
715
 *
 * Note:
716
 *      Both init_count and header->count can be ISC_UINT32_MAX.
717
 *      The count on the returned rdataset however can't be as
718
719
 *      that indicates that the database does not implement cyclic
 *      processing.
720
721
722
 */
static unsigned int init_count;

723
724
725
726
727
728
/*
 * Locking
 *
 * If a routine is going to lock more than one lock in this module, then
 * the locking must be done in the following order:
 *
729
 *      Tree Lock
730
 *
731
732
 *      Node Lock       (Only one from the set may be locked at one time by
 *                       any caller)
733
 *
734
 *      Database Lock
735
736
737
738
 *
 * Failure to follow this hierarchy can result in deadlock.
 */

739
740
741
/*
 * Deleting Nodes
 *
742
 * For zone databases the node for the origin of the zone MUST NOT be deleted.
743
744
 */

Evan Hunt's avatar
Evan Hunt committed
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
/*
 * Debugging routines
 */
#ifdef DEBUG
static void
hexdump(const char *desc, unsigned char *data, size_t size) {
	char hexdump[BUFSIZ];
	isc_buffer_t b;
	isc_region_t r;

	isc_buffer_init(&b, hexdump, sizeof(hexdump));
	r.base = data;
	r.length = size;
	isc_hex_totext(&r, 0, "", &b);
	isc_buffer_putuint8(&b, 0);
	fprintf(stderr, "%s: %s\n", desc, hexdump);
}
#endif

764

765
766
767
768
/*
 * DB Routines
 */

Bob Halley's avatar
Bob Halley committed
769
770
static void
attach(dns_db_t *source, dns_db_t **targetp) {
Automatic Updater's avatar
Automatic Updater committed
771
	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
Bob Halley's avatar
Bob Halley committed
772

Automatic Updater's avatar
Automatic Updater committed
773
	REQUIRE(VALID_RBTDB(rbtdb));
Bob Halley's avatar
Bob Halley committed
774

Automatic Updater's avatar
Automatic Updater committed
775
	isc_refcount_increment(&rbtdb->references, NULL);
Bob Halley's avatar
Bob Halley committed
776

Automatic Updater's avatar
Automatic Updater committed
777
	*targetp = source;
Bob Halley's avatar
Bob Halley committed
778
779
780
}

static void
781
free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
Automatic Updater's avatar
Automatic Updater committed
782
	dns_rbtdb_t *rbtdb = event->ev_arg;
783

Automatic Updater's avatar
Automatic Updater committed
784
	UNUSED(task);
785

Automatic Updater's avatar
Automatic Updater committed
786
	free_rbtdb(rbtdb, ISC_TRUE, event);
787
788
}

789
790
791
static void
update_cachestats(dns_rbtdb_t *rbtdb, isc_result_t result) {
	INSIST(IS_CACHE(rbtdb));
792
793
794

	if (rbtdb->cachestats == NULL)
		return;
795
796
797
798
799
800
801
802
803
804
805
806

	switch (result) {
	case ISC_R_SUCCESS:
	case DNS_R_CNAME:
	case DNS_R_DNAME:
	case DNS_R_DELEGATION:
	case DNS_R_NCACHENXDOMAIN:
	case DNS_R_NCACHENXRRSET:
		isc_stats_increment(rbtdb->cachestats,
				    dns_cachestatscounter_hits);
		break;
	default:
Tinderbox User's avatar
Tinderbox User committed
807
		isc_stats_increment(rbtdb->cachestats,
808
809
810
811
				    dns_cachestatscounter_misses);
	}
}

812
813
814
815
816
817
818
819
820
821
822
static void
update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
		  isc_boolean_t increment)
{
	dns_rdatastatstype_t statattributes = 0;
	dns_rdatastatstype_t base = 0;
	dns_rdatastatstype_t type;

	/* At the moment we count statistics only for cache DB */
	INSIST(IS_CACHE(rbtdb));

823
824
825
826
827
828
829
	if (NEGATIVE(header)) {
		if (NXDOMAIN(header))
			statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
		else {
			statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
			base = RBTDB_RDATATYPE_EXT(header->type);
		}
830
831
832
833
834
835
836
837
838
839
	} else
		base = RBTDB_RDATATYPE_BASE(header->type);

	type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
	if (increment)
		dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
	else
		dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
}

840
841
static void
set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
Automatic Updater's avatar
Automatic Updater committed
842
843
844
845
846
847
848
	int idx;
	isc_heap_t *heap;
	dns_ttl_t oldttl;

	oldttl = header->rdh_ttl;
	header->rdh_ttl = newttl;

849
850
851
	if (!IS_CACHE(rbtdb))
		return;

Automatic Updater's avatar
Automatic Updater committed
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
	/*
	 * It's possible the rbtdb is not a cache.  If this is the case,
	 * we will not have a heap, and we move on.  If we do, though,
	 * we might need to adjust things.
	 */
	if (header->heap_index == 0 || newttl == oldttl)
		return;
	idx = header->node->locknum;
	if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
	    return;
	heap = rbtdb->heaps[idx];

	if (newttl < oldttl)
		isc_heap_increased(heap, header->heap_index);
	else
		isc_heap_decreased(heap, header->heap_index);
868
869
870
}

/*%
871
 * These functions allow the heap code to rank the priority of each
872
873
874
875
 * element.  It returns ISC_TRUE if v1 happens "sooner" than v2.
 */
static isc_boolean_t
ttl_sooner(void *v1, void *v2) {
Automatic Updater's avatar
Automatic Updater committed
876
877
	rdatasetheader_t *h1 = v1;
	rdatasetheader_t *h2 = v2;
878

Automatic Updater's avatar
Automatic Updater committed
879
880
881
	if (h1->rdh_ttl < h2->rdh_ttl)
		return (ISC_TRUE);
	return (ISC_FALSE);
882
883
}

884
885
static isc_boolean_t
resign_sooner(void *v1, void *v2) {
Automatic Updater's avatar
Automatic Updater committed
886
887
	rdatasetheader_t *h1 = v1;
	rdatasetheader_t *h2 = v2;
888

Automatic Updater's avatar
Automatic Updater committed
889
890
891
	if (h1->resign < h2->resign)
		return (ISC_TRUE);
	return (ISC_FALSE);
892
893
}

894
895
896
897
/*%
 * This function sets the heap index into the header.
 */
static void
898
set_index(void *what, unsigned int index) {
Automatic Updater's avatar
Automatic Updater committed
899
	rdatasetheader_t *h = what;
900

Automatic Updater's avatar
Automatic Updater committed
901
	h->heap_index = index;
902
903
}

904
905
906
907
908
909
/*%
 * Work out how many nodes can be deleted in the time between two
 * requests to the nameserver.  Smooth the resulting number and use it
 * as a estimate for the number of nodes to be deleted in the next
 * iteration.
 */
910
911
static unsigned int
adjust_quantum(unsigned int old, isc_time_t *start) {
Automatic Updater's avatar
Automatic Updater committed
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
	unsigned int pps = dns_pps;     /* packets per second */
	unsigned int interval;
	isc_uint64_t usecs;
	isc_time_t end;
	unsigned int new;

	if (pps < 100)
		pps = 100;
	isc_time_now(&end);

	interval = 1000000 / pps;       /* interval in usec */
	if (interval == 0)
		interval = 1;
	usecs = isc_time_microdiff(&end, start);
	if (usecs == 0) {
		/*
		 * We were unable to measure the amount of time taken.
		 * Double the nodes deleted next time.
		 */
		old *= 2;
		if (old > 1000)
			old = 1000;
		return (old);
	}
	new = old * interval;
	new /= (unsigned int)usecs;
	if (new == 0)
		new = 1;
	else if (new > 1000)
		new = 1000;

	/* Smooth */
	new = (new + old * 3) / 4;

	isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
		      ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);

	return (new);
950
}
951

952
953
static void
free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
Automatic Updater's avatar
Automatic Updater committed
954
955
956
957
	unsigned int i;
	isc_ondestroy_t ondest;
	isc_result_t result;
	char buf[DNS_NAME_FORMATSIZE];
958
	dns_rbt_t **treep;
Automatic Updater's avatar
Automatic Updater committed
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
	isc_time_t start;

	if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
		overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);

	REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
	REQUIRE(rbtdb->future_version == NULL);

	if (rbtdb->current_version != NULL) {
		unsigned int refs;

		isc_refcount_decrement(&rbtdb->current_version->references,
				       &refs);
		INSIST(refs == 0);
		UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
		isc_refcount_destroy(&rbtdb->current_version->references);
		isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
			    sizeof(rbtdb_version_t));
	}

979
980
981
982
983
984
985
986
987
988
	/*
	 * We assume the number of remaining dead nodes is reasonably small;
	 * the overhead of unlinking all nodes here should be negligible.
	 */
	for (i = 0; i < rbtdb->node_lock_count; i++) {
		dns_rbtnode_t *node;

		node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
		while (node != NULL) {
			ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
Automatic Updater's avatar
Automatic Updater committed
989
990
991
			node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
		}
	}
992

Automatic Updater's avatar
Automatic Updater committed
993
994
	if (event == NULL)
		rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010

	for (;;) {
		/*
		 * pick the next tree to (start to) destroy
		 */
		treep = &rbtdb->tree;
		if (*treep == NULL) {
			treep = &rbtdb->nsec;
			if (*treep == NULL) {
				treep = &rbtdb->nsec3;
				/*
				 * we're finished after clear cutting
				 */
				if (*treep == NULL)
					break;
			}
Automatic Updater's avatar
Automatic Updater committed
1011
		}
1012
1013

		isc_time_now(&start);
1014
		result = dns_rbt_destroy2(treep, rbtdb->quantum);
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
		if (result == ISC_R_QUOTA) {
			INSIST(rbtdb->task != NULL);
			if (rbtdb->quantum != 0)
				rbtdb->quantum = adjust_quantum(rbtdb->quantum,
								&start);
			if (event == NULL)
				event = isc_event_allocate(rbtdb->common.mctx,
							   NULL,
							 DNS_EVENT_FREESTORAGE,
							   free_rbtdb_callback,
							   rbtdb,
							   sizeof(isc_event_t));
			if (event == NULL)
1028
				continue;
1029
1030
1031
			isc_task_send(rbtdb->task, &event);
			return;
		}
1032
		INSIST(result == ISC_R_SUCCESS && *treep == NULL);
1033
1034
	}

Automatic Updater's avatar
Automatic Updater committed
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
	if (event != NULL)
		isc_event_free(&event);
	if (log) {
		if (dns_name_dynamic(&rbtdb->common.origin))
			dns_name_format(&rbtdb->common.origin, buf,
					sizeof(buf));
		else
			strcpy(buf, "<UNKNOWN>");
		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
			      DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
			      "done free_rbtdb(%s)", buf);
	}
	if (dns_name_dynamic(&rbtdb->common.origin))
		dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
	for (i = 0; i < rbtdb->node_lock_count; i++) {
		isc_refcount_destroy(&rbtdb->node_locks[i].references);
		NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
	}

	/*
Automatic Updater's avatar
Automatic Updater committed
1055
	 * Clean up LRU / re-signing order lists.
Automatic Updater's avatar
Automatic Updater committed
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
	 */
	if (rbtdb->rdatasets != NULL) {
		for (i = 0; i < rbtdb->node_lock_count; i++)
			INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
		isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
			    rbtdb->node_lock_count *
			    sizeof(rdatasetheaderlist_t));
	}
	/*
	 * Clean up dead node buckets.
	 */
	if (rbtdb->deadnodes != NULL) {
		for (i = 0; i < rbtdb->node_lock_count; i++)
			INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
		isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
		    rbtdb->node_lock_count * sizeof(rbtnodelist_t));
	}
	/*
Automatic Updater's avatar
Automatic Updater committed
1074
	 * Clean up heap objects.
Automatic Updater's avatar
Automatic Updater committed
1075
1076
1077
1078
	 */
	if (rbtdb->heaps != NULL) {
		for (i = 0; i < rbtdb->node_lock_count; i++)
			isc_heap_destroy(&rbtdb->heaps[i]);
1079
1080
		isc_mem_put(rbtdb->hmctx, rbtdb->heaps,
			    rbtdb->node_lock_count * sizeof(isc_heap_t *));
Automatic Updater's avatar
Automatic Updater committed
1081
1082
	}

1083
1084
	if (rbtdb->rrsetstats != NULL)
		dns_stats_detach(&rbtdb->rrsetstats);
1085
1086
	if (rbtdb->cachestats != NULL)
		isc_stats_detach(&rbtdb->cachestats);
1087

1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
	if (rbtdb->load_rpzs != NULL) {
		/*
		 * We must be cleaning up after a failed zone loading.
		 */
		REQUIRE(rbtdb->rpzs != NULL &&
			rbtdb->rpz_num < rbtdb->rpzs->p.num_zones);
		dns_rpz_detach_rpzs(&rbtdb->load_rpzs);
	}
	if (rbtdb->rpzs != NULL) {
		REQUIRE(rbtdb->rpz_num < rbtdb->rpzs->p.num_zones);
		dns_rpz_detach_rpzs(&rbtdb->rpzs);
	}
1100

Automatic Updater's avatar
Automatic Updater committed
1101
1102
1103
1104
1105
1106
	isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
		    rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
	isc_rwlock_destroy(&rbtdb->tree_lock);
	isc_refcount_destroy(&rbtdb->references);
	if (rbtdb->task != NULL)
		isc_task_detach(&rbtdb->task);
1107

Automatic Updater's avatar
Automatic Updater committed
1108
1109
1110
1111
	RBTDB_DESTROYLOCK(&rbtdb->lock);
	rbtdb->common.magic = 0;
	rbtdb->common.impmagic = 0;
	ondest = rbtdb->common.ondest;
1112
	isc_mem_detach(&rbtdb->hmctx);
Tinderbox User's avatar
Tinderbox User committed
1113

1114
	if (rbtdb->mmap_location != NULL)
1115
1116
		isc_file_munmap(rbtdb->mmap_location,
				(size_t) rbtdb->mmap_size);
Tinderbox User's avatar
Tinderbox User committed
1117

Automatic Updater's avatar
Automatic Updater committed
1118
1119
	isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
	isc_ondestroy_notify(&ondest, rbtdb);
Bob Halley's avatar
Bob Halley committed
1120
1121
}

1122
static inline void
1123
maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
Automatic Updater's avatar
Automatic Updater committed
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
	isc_boolean_t want_free = ISC_FALSE;
	unsigned int i;
	unsigned int inactive = 0;

	/* XXX check for open versions here */

	if (rbtdb->soanode != NULL)
		dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
	if (rbtdb->nsnode != NULL)
		dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);

	/*
	 * Even though there are no external direct references, there still
	 * may be nodes in use.
	 */
	for (i = 0; i < rbtdb->node_lock_count; i++) {
		NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
		rbtdb->node_locks[i].exiting = ISC_TRUE;
		NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
		if (isc_refcount_current(&rbtdb->node_locks[i].references)
		    == 0) {
			inactive++;
		}
	}

	if (inactive != 0) {
		RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
		rbtdb->active -= inactive;
		if (rbtdb->active == 0)
			want_free = ISC_TRUE;
		RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
		if (want_free) {
			char buf[DNS_NAME_FORMATSIZE];
			if (dns_name_dynamic(&rbtdb->common.origin))
				dns_name_format(&rbtdb->common.origin, buf,
						sizeof(buf));
			else
				strcpy(buf, "<UNKNOWN>");
			isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
				      DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
				      "calling free_rbtdb(%s)", buf);
			free_rbtdb(rbtdb, ISC_TRUE, NULL);
		}
	}
Bob Halley's avatar
Bob Halley committed
1168
1169
1170
1171
}

static void
detach(dns_db_t **dbp) {
Automatic Updater's avatar
Automatic Updater committed
1172
1173
	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
	unsigned int refs;
Bob Halley's avatar
Bob Halley committed