rbtdb.c 227 KB
Newer Older
Bob Halley's avatar
Bob Halley committed
1
/*
Automatic Updater's avatar
Automatic Updater committed
2
 * Copyright (C) 2004-2008  Internet Systems Consortium, Inc. ("ISC")
Mark Andrews's avatar
Mark Andrews committed
3
 * Copyright (C) 1999-2003  Internet Software Consortium.
4
 *
Automatic Updater's avatar
Automatic Updater committed
5
 * Permission to use, copy, modify, and/or distribute this software for any
Bob Halley's avatar
Bob Halley committed
6 7
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
8
 *
Mark Andrews's avatar
Mark Andrews committed
9 10 11 12 13 14 15
 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
 * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
 * PERFORMANCE OF THIS SOFTWARE.
Bob Halley's avatar
Bob Halley committed
16 17
 */

Francis Dupont's avatar
Francis Dupont committed
18
/* $Id: rbtdb.c,v 1.271 2009/01/17 14:50:20 fdupont Exp $ */
19 20

/*! \file */
David Lawrence's avatar
David Lawrence committed
21

22 23 24 25
/*
 * Principal Author: Bob Halley
 */

26 27
#include <config.h>

Mark Andrews's avatar
Mark Andrews committed
28
/* #define inline */
29

30
#include <isc/event.h>
31
#include <isc/heap.h>
32
#include <isc/mem.h>
33
#include <isc/mutex.h>
34
#include <isc/platform.h>
35
#include <isc/print.h>
36
#include <isc/random.h>
37
#include <isc/refcount.h>
Bob Halley's avatar
Bob Halley committed
38
#include <isc/rwlock.h>
39
#include <isc/serial.h>
40
#include <isc/string.h>
41
#include <isc/task.h>
42
#include <isc/time.h>
Michael Graff's avatar
Michael Graff committed
43
#include <isc/util.h>
Bob Halley's avatar
Bob Halley committed
44

45
#include <dns/acache.h>
46 47
#include <dns/db.h>
#include <dns/dbiterator.h>
48
#include <dns/events.h>
49
#include <dns/fixedname.h>
50
#include <dns/lib.h>
51
#include <dns/log.h>
52
#include <dns/masterdump.h>
53
#include <dns/nsec.h>
54
#include <dns/nsec3.h>
55
#include <dns/rbt.h>
56
#include <dns/rdata.h>
Bob Halley's avatar
Bob Halley committed
57 58
#include <dns/rdataset.h>
#include <dns/rdatasetiter.h>
59
#include <dns/rdataslab.h>
60
#include <dns/rdatastruct.h>
61
#include <dns/result.h>
62
#include <dns/stats.h>
63 64
#include <dns/view.h>
#include <dns/zone.h>
65
#include <dns/zonekey.h>
Bob Halley's avatar
Bob Halley committed
66

67 68 69
#ifdef DNS_RBTDB_VERSION64
#include "rbtdb64.h"
#else
Bob Halley's avatar
Bob Halley committed
70
#include "rbtdb.h"
71
#endif
Bob Halley's avatar
Bob Halley committed
72

73
#ifdef DNS_RBTDB_VERSION64
74
#define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '8')
75
#else
76
#define RBTDB_MAGIC                     ISC_MAGIC('R', 'B', 'D', '4')
77 78
#endif

79
/*%
80 81 82
 * Note that "impmagic" is not the first four bytes of the struct, so
 * ISC_MAGIC_VALID cannot be used.
 */
83
#define VALID_RBTDB(rbtdb)      ((rbtdb) != NULL && \
Automatic Updater's avatar
Automatic Updater committed
84
				 (rbtdb)->common.impmagic == RBTDB_MAGIC)
Bob Halley's avatar
Bob Halley committed
85

86
#ifdef DNS_RBTDB_VERSION64
87
typedef isc_uint64_t                    rbtdb_serial_t;
88
/*%
89 90 91 92 93 94
 * Make casting easier in symbolic debuggers by using different names
 * for the 64 bit version.
 */
#define dns_rbtdb_t dns_rbtdb64_t
#define rdatasetheader_t rdatasetheader64_t
#define rbtdb_version_t rbtdb_version64_t
95
#else
96
typedef isc_uint32_t                    rbtdb_serial_t;
97 98
#endif

99
typedef isc_uint32_t                    rbtdb_rdatatype_t;
Bob Halley's avatar
Bob Halley committed
100

101 102
#define RBTDB_RDATATYPE_BASE(type)      ((dns_rdatatype_t)((type) & 0xFFFF))
#define RBTDB_RDATATYPE_EXT(type)       ((dns_rdatatype_t)((type) >> 16))
103
#define RBTDB_RDATATYPE_VALUE(b, e)     ((rbtdb_rdatatype_t)((e) << 16) | (b))
Bob Halley's avatar
Bob Halley committed
104

105
#define RBTDB_RDATATYPE_SIGNSEC \
Automatic Updater's avatar
Automatic Updater committed
106
		RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec)
107 108
#define RBTDB_RDATATYPE_SIGNSEC3 \
		RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_nsec3)
Bob Halley's avatar
Bob Halley committed
109
#define RBTDB_RDATATYPE_SIGNS \
Automatic Updater's avatar
Automatic Updater committed
110
		RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_ns)
Bob Halley's avatar
Bob Halley committed
111
#define RBTDB_RDATATYPE_SIGCNAME \
Automatic Updater's avatar
Automatic Updater committed
112
		RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_cname)
113
#define RBTDB_RDATATYPE_SIGDNAME \
Automatic Updater's avatar
Automatic Updater committed
114
		RBTDB_RDATATYPE_VALUE(dns_rdatatype_rrsig, dns_rdatatype_dname)
115
#define RBTDB_RDATATYPE_NCACHEANY \
Automatic Updater's avatar
Automatic Updater committed
116
		RBTDB_RDATATYPE_VALUE(0, dns_rdatatype_any)
Bob Halley's avatar
Bob Halley committed
117

118
/*
119
 * We use rwlock for DB lock only when ISC_RWLOCK_USEATOMIC is non 0.
120
 * Using rwlock is effective with regard to lookup performance only when
121
 * it is implemented in an efficient way.
122 123 124 125
 * Otherwise, it is generally wise to stick to the simple locking since rwlock
 * would require more memory or can even make lookups slower due to its own
 * overhead (when it internally calls mutex locks).
 */
126
#ifdef ISC_RWLOCK_USEATOMIC
127 128 129 130 131 132
#define DNS_RBTDB_USERWLOCK 1
#else
#define DNS_RBTDB_USERWLOCK 0
#endif

#if DNS_RBTDB_USERWLOCK
133 134 135 136
#define RBTDB_INITLOCK(l)       isc_rwlock_init((l), 0, 0)
#define RBTDB_DESTROYLOCK(l)    isc_rwlock_destroy(l)
#define RBTDB_LOCK(l, t)        RWLOCK((l), (t))
#define RBTDB_UNLOCK(l, t)      RWUNLOCK((l), (t))
137
#else
138 139 140 141
#define RBTDB_INITLOCK(l)       isc_mutex_init(l)
#define RBTDB_DESTROYLOCK(l)    DESTROYLOCK(l)
#define RBTDB_LOCK(l, t)        LOCK(l)
#define RBTDB_UNLOCK(l, t)      UNLOCK(l)
142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165
#endif

/*
 * Since node locking is sensitive to both performance and memory footprint,
 * we need some trick here.  If we have both high-performance rwlock and
 * high performance and small-memory reference counters, we use rwlock for
 * node lock and isc_refcount for node references.  In this case, we don't have
 * to protect the access to the counters by locks.
 * Otherwise, we simply use ordinary mutex lock for node locking, and use
 * simple integers as reference counters which is protected by the lock.
 * In most cases, we can simply use wrapper macros such as NODE_LOCK and
 * NODE_UNLOCK.  In some other cases, however, we need to protect reference
 * counters first and then protect other parts of a node as read-only data.
 * Special additional macros, NODE_STRONGLOCK(), NODE_WEAKLOCK(), etc, are also
 * provided for these special cases.  When we can use the efficient backend
 * routines, we should only protect the "other members" by NODE_WEAKLOCK(read).
 * Otherwise, we should use NODE_STRONGLOCK() to protect the entire critical
 * section including the access to the reference counter.
 * Note that we cannot use NODE_LOCK()/NODE_UNLOCK() wherever the protected
 * section is also protected by NODE_STRONGLOCK().
 */
#if defined(ISC_RWLOCK_USEATOMIC) && defined(DNS_RBT_USEISCREFCOUNT)
typedef isc_rwlock_t nodelock_t;

166 167 168 169 170 171 172 173 174 175 176
#define NODE_INITLOCK(l)        isc_rwlock_init((l), 0, 0)
#define NODE_DESTROYLOCK(l)     isc_rwlock_destroy(l)
#define NODE_LOCK(l, t)         RWLOCK((l), (t))
#define NODE_UNLOCK(l, t)       RWUNLOCK((l), (t))
#define NODE_TRYUPGRADE(l)      isc_rwlock_tryupgrade(l)

#define NODE_STRONGLOCK(l)      ((void)0)
#define NODE_STRONGUNLOCK(l)    ((void)0)
#define NODE_WEAKLOCK(l, t)     NODE_LOCK(l, t)
#define NODE_WEAKUNLOCK(l, t)   NODE_UNLOCK(l, t)
#define NODE_WEAKDOWNGRADE(l)   isc_rwlock_downgrade(l)
177 178 179
#else
typedef isc_mutex_t nodelock_t;

180 181 182 183 184 185 186 187 188 189 190
#define NODE_INITLOCK(l)        isc_mutex_init(l)
#define NODE_DESTROYLOCK(l)     DESTROYLOCK(l)
#define NODE_LOCK(l, t)         LOCK(l)
#define NODE_UNLOCK(l, t)       UNLOCK(l)
#define NODE_TRYUPGRADE(l)      ISC_R_SUCCESS

#define NODE_STRONGLOCK(l)      LOCK(l)
#define NODE_STRONGUNLOCK(l)    UNLOCK(l)
#define NODE_WEAKLOCK(l, t)     ((void)0)
#define NODE_WEAKUNLOCK(l, t)   ((void)0)
#define NODE_WEAKDOWNGRADE(l)   ((void)0)
191 192
#endif

193
/*
194
 * Allow clients with a virtual time of up to 5 minutes in the past to see
195 196 197 198
 * records that would have otherwise have expired.
 */
#define RBTDB_VIRTUAL 300

199
struct noqname {
200 201 202 203
	dns_name_t 	name;
	void *     	neg;
	void *     	negsig;
	dns_rdatatype_t	type;
204 205
};

206
typedef struct acachectl acachectl_t;
207

Bob Halley's avatar
Bob Halley committed
208
typedef struct rdatasetheader {
Automatic Updater's avatar
Automatic Updater committed
209 210 211 212 213 214 215 216 217
	/*%
	 * Locked by the owning node's lock.
	 */
	rbtdb_serial_t                  serial;
	dns_ttl_t                       rdh_ttl;
	rbtdb_rdatatype_t               type;
	isc_uint16_t                    attributes;
	dns_trust_t                     trust;
	struct noqname                  *noqname;
218
	struct noqname                  *closest;
Automatic Updater's avatar
Automatic Updater committed
219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263
	/*%<
	 * We don't use the LIST macros, because the LIST structure has
	 * both head and tail pointers, and is doubly linked.
	 */

	struct rdatasetheader           *next;
	/*%<
	 * If this is the top header for an rdataset, 'next' points
	 * to the top header for the next rdataset (i.e., the next type).
	 * Otherwise, it points up to the header whose down pointer points
	 * at this header.
	 */

	struct rdatasetheader           *down;
	/*%<
	 * Points to the header for the next older version of
	 * this rdataset.
	 */

	isc_uint32_t                    count;
	/*%<
	 * Monotonously increased every time this rdataset is bound so that
	 * it is used as the base of the starting point in DNS responses
	 * when the "cyclic" rrset-order is required.  Since the ordering
	 * should not be so crucial, no lock is set for the counter for
	 * performance reasons.
	 */

	acachectl_t                     *additional_auth;
	acachectl_t                     *additional_glue;

	dns_rbtnode_t                   *node;
	isc_stdtime_t                   last_used;
	ISC_LINK(struct rdatasetheader) lru_link;
	/*%<
	 * Used for LRU-based cache management.  We should probably make
	 * these cache-DB specific.  We might also make it a pointer and
	 * ensure only the top header has a valid link to save memory.
	 * The linked-list is locked by the rbtdb->lrulock.
	 */

	/*
	 * It's possible this should not be here anymore, but instead
	 * referenced from the bucket's heap directly.
	 */
264
#if 0
Automatic Updater's avatar
Automatic Updater committed
265
	isc_heap_t                      *heap;
266
#endif
Automatic Updater's avatar
Automatic Updater committed
267 268 269 270
	unsigned int                    heap_index;
	/*%<
	 * Used for TTL-based cache cleaning.
	 */
Automatic Updater's avatar
Automatic Updater committed
271
	isc_stdtime_t                   resign;
Bob Halley's avatar
Bob Halley committed
272 273
} rdatasetheader_t;

274 275 276 277 278 279 280 281
typedef ISC_LIST(rdatasetheader_t)      rdatasetheaderlist_t;
typedef ISC_LIST(dns_rbtnode_t)         rbtnodelist_t;

#define RDATASET_ATTR_NONEXISTENT       0x0001
#define RDATASET_ATTR_STALE             0x0002
#define RDATASET_ATTR_IGNORE            0x0004
#define RDATASET_ATTR_RETAIN            0x0008
#define RDATASET_ATTR_NXDOMAIN          0x0010
282
#define RDATASET_ATTR_RESIGN            0x0020
283
#define RDATASET_ATTR_STATCOUNT         0x0040
284
#define RDATASET_ATTR_OPTOUT		0x0080
Michael Graff's avatar
Michael Graff committed
285

286
typedef struct acache_cbarg {
Automatic Updater's avatar
Automatic Updater committed
287 288 289 290 291
	dns_rdatasetadditional_t        type;
	unsigned int                    count;
	dns_db_t                        *db;
	dns_dbnode_t                    *node;
	rdatasetheader_t                *header;
292 293 294
} acache_cbarg_t;

struct acachectl {
Automatic Updater's avatar
Automatic Updater committed
295 296
	dns_acacheentry_t               *entry;
	acache_cbarg_t                  *cbarg;
297 298
};

Michael Graff's avatar
Michael Graff committed
299 300 301 302 303 304 305
/*
 * XXX
 * When the cache will pre-expire data (due to memory low or other
 * situations) before the rdataset's TTL has expired, it MUST
 * respect the RETAIN bit and not expire the data until its TTL is
 * expired.
 */
306

307
#undef IGNORE                   /* WIN32 winbase.h defines this. */
308

309
#define EXISTS(header) \
Automatic Updater's avatar
Automatic Updater committed
310
	(((header)->attributes & RDATASET_ATTR_NONEXISTENT) == 0)
311
#define NONEXISTENT(header) \
Automatic Updater's avatar
Automatic Updater committed
312
	(((header)->attributes & RDATASET_ATTR_NONEXISTENT) != 0)
313
#define IGNORE(header) \
Automatic Updater's avatar
Automatic Updater committed
314
	(((header)->attributes & RDATASET_ATTR_IGNORE) != 0)
Michael Graff's avatar
Michael Graff committed
315
#define RETAIN(header) \
Automatic Updater's avatar
Automatic Updater committed
316
	(((header)->attributes & RDATASET_ATTR_RETAIN) != 0)
317
#define NXDOMAIN(header) \
Automatic Updater's avatar
Automatic Updater committed
318
	(((header)->attributes & RDATASET_ATTR_NXDOMAIN) != 0)
319
#define RESIGN(header) \
Automatic Updater's avatar
Automatic Updater committed
320
	(((header)->attributes & RDATASET_ATTR_RESIGN) != 0)
321 322
#define OPTOUT(header) \
	(((header)->attributes & RDATASET_ATTR_OPTOUT) != 0)
323

324
#define DEFAULT_NODE_LOCK_COUNT         7       /*%< Should be prime. */
325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344

/*%
 * Number of buckets for cache DB entries (locks, LRU lists, TTL heaps).
 * There is a tradeoff issue about configuring this value: if this is too
 * small, it may cause heavier contention between threads; if this is too large,
 * LRU purge algorithm won't work well (entries tend to be purged prematurely).
 * The default value should work well for most environments, but this can
 * also be configurable at compilation time via the
 * DNS_RBTDB_CACHE_NODE_LOCK_COUNT variable.  This value must be larger than
 * 1 due to the assumption of overmem_purge().
 */
#ifdef DNS_RBTDB_CACHE_NODE_LOCK_COUNT
#if DNS_RBTDB_CACHE_NODE_LOCK_COUNT <= 1
#error "DNS_RBTDB_CACHE_NODE_LOCK_COUNT must be larger 1"
#else
#define DEFAULT_CACHE_NODE_LOCK_COUNT DNS_RBTDB_CACHE_NODE_LOCK_COUNT
#endif
#else
#define DEFAULT_CACHE_NODE_LOCK_COUNT   16
#endif	/* DNS_RBTDB_CACHE_NODE_LOCK_COUNT */
Bob Halley's avatar
Bob Halley committed
345 346

typedef struct {
Automatic Updater's avatar
Automatic Updater committed
347 348 349 350 351
	nodelock_t                      lock;
	/* Protected in the refcount routines. */
	isc_refcount_t                  references;
	/* Locked by lock. */
	isc_boolean_t                   exiting;
352 353 354
} rbtdb_nodelock_t;

typedef struct rbtdb_changed {
Automatic Updater's avatar
Automatic Updater committed
355 356 357
	dns_rbtnode_t *                 node;
	isc_boolean_t                   dirty;
	ISC_LINK(struct rbtdb_changed)  link;
358 359
} rbtdb_changed_t;

360
typedef ISC_LIST(rbtdb_changed_t)       rbtdb_changedlist_t;
361

362 363 364 365 366 367
typedef enum {
	dns_db_insecure,
	dns_db_partial,
	dns_db_secure
} dns_db_secure_t;

368
typedef struct rbtdb_version {
Automatic Updater's avatar
Automatic Updater committed
369 370 371 372 373 374 375 376 377 378 379 380
	/* Not locked */
	rbtdb_serial_t                  serial;
	/*
	 * Protected in the refcount routines.
	 * XXXJT: should we change the lock policy based on the refcount
	 * performance?
	 */
	isc_refcount_t                  references;
	/* Locked by database lock. */
	isc_boolean_t                   writer;
	isc_boolean_t                   commit_ok;
	rbtdb_changedlist_t             changed_list;
381
	rdatasetheaderlist_t		resigned_list;
Automatic Updater's avatar
Automatic Updater committed
382
	ISC_LINK(struct rbtdb_version)  link;
383
	dns_db_secure_t			secure;
384 385 386 387 388 389 390
	isc_boolean_t			havensec3;
	/* NSEC3 parameters */
	dns_hash_t			hash;
	isc_uint8_t			flags;
	isc_uint16_t			iterations;
	isc_uint8_t			salt_length;
	unsigned char			salt[NSEC3_MAX_HASH_LENGTH];
391 392
} rbtdb_version_t;

393 394
typedef ISC_LIST(rbtdb_version_t)       rbtdb_versionlist_t;

Bob Halley's avatar
Bob Halley committed
395
typedef struct {
Automatic Updater's avatar
Automatic Updater committed
396 397
	/* Unlocked. */
	dns_db_t                        common;
398
#if DNS_RBTDB_USERWLOCK
Automatic Updater's avatar
Automatic Updater committed
399
	isc_rwlock_t                    lock;
400
#else
Automatic Updater's avatar
Automatic Updater committed
401
	isc_mutex_t                     lock;
402
#endif
Automatic Updater's avatar
Automatic Updater committed
403 404 405 406
	isc_rwlock_t                    tree_lock;
	unsigned int                    node_lock_count;
	rbtdb_nodelock_t *              node_locks;
	dns_rbtnode_t *                 origin_node;
407
	dns_stats_t *			rrsetstats; /* cache DB only */
Automatic Updater's avatar
Automatic Updater committed
408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428
	/* Locked by lock. */
	unsigned int                    active;
	isc_refcount_t                  references;
	unsigned int                    attributes;
	rbtdb_serial_t                  current_serial;
	rbtdb_serial_t                  least_serial;
	rbtdb_serial_t                  next_serial;
	rbtdb_version_t *               current_version;
	rbtdb_version_t *               future_version;
	rbtdb_versionlist_t             open_versions;
	isc_boolean_t                   overmem;
	isc_task_t *                    task;
	dns_dbnode_t                    *soanode;
	dns_dbnode_t                    *nsnode;

	/*
	 * This is a linked list used to implement the LRU cache.  There will
	 * be node_lock_count linked lists here.  Nodes in bucket 1 will be
	 * placed on the linked list rdatasets[1].
	 */
	rdatasetheaderlist_t            *rdatasets;
429

Automatic Updater's avatar
Automatic Updater committed
430 431
	/*%
	 * Temporary storage for stale cache nodes and dynamically deleted
432
	 * nodes that await being cleaned up.
Automatic Updater's avatar
Automatic Updater committed
433
	 */
Automatic Updater's avatar
Automatic Updater committed
434 435 436 437 438 439 440 441 442
	rbtnodelist_t                   *deadnodes;

	/*
	 * Heaps.  Each of these is used for TTL based expiry.
	 */
	isc_heap_t                      **heaps;

	/* Locked by tree_lock. */
	dns_rbt_t *                     tree;
443
	dns_rbt_t *			nsec3;
Automatic Updater's avatar
Automatic Updater committed
444 445 446

	/* Unlocked */
	unsigned int                    quantum;
Bob Halley's avatar
Bob Halley committed
447 448
} dns_rbtdb_t;

449 450
#define RBTDB_ATTR_LOADED               0x01
#define RBTDB_ATTR_LOADING              0x02
451

452
/*%
453 454 455
 * Search Context
 */
typedef struct {
Automatic Updater's avatar
Automatic Updater committed
456 457 458 459 460 461 462 463 464 465 466 467 468
	dns_rbtdb_t *           rbtdb;
	rbtdb_version_t *       rbtversion;
	rbtdb_serial_t          serial;
	unsigned int            options;
	dns_rbtnodechain_t      chain;
	isc_boolean_t           copy_name;
	isc_boolean_t           need_cleanup;
	isc_boolean_t           wild;
	dns_rbtnode_t *         zonecut;
	rdatasetheader_t *      zonecut_rdataset;
	rdatasetheader_t *      zonecut_sigrdataset;
	dns_fixedname_t         zonecut_name;
	isc_stdtime_t           now;
469 470
} rbtdb_search_t;

471
/*%
472 473 474
 * Load Context
 */
typedef struct {
Automatic Updater's avatar
Automatic Updater committed
475 476
	dns_rbtdb_t *           rbtdb;
	isc_stdtime_t           now;
477 478
} rbtdb_load_t;

Bob Halley's avatar
Bob Halley committed
479
static void rdataset_disassociate(dns_rdataset_t *rdataset);
480 481
static isc_result_t rdataset_first(dns_rdataset_t *rdataset);
static isc_result_t rdataset_next(dns_rdataset_t *rdataset);
Bob Halley's avatar
Bob Halley committed
482
static void rdataset_current(dns_rdataset_t *rdataset, dns_rdata_t *rdata);
Bob Halley's avatar
Bob Halley committed
483
static void rdataset_clone(dns_rdataset_t *source, dns_rdataset_t *target);
Bob Halley's avatar
Bob Halley committed
484
static unsigned int rdataset_count(dns_rdataset_t *rdataset);
485
static isc_result_t rdataset_getnoqname(dns_rdataset_t *rdataset,
Automatic Updater's avatar
Automatic Updater committed
486
					dns_name_t *name,
487 488 489 490 491 492
					dns_rdataset_t *neg,
					dns_rdataset_t *negsig);
static isc_result_t rdataset_getclosest(dns_rdataset_t *rdataset,
					dns_name_t *name,
					dns_rdataset_t *neg,
					dns_rdataset_t *negsig);
493
static isc_result_t rdataset_getadditional(dns_rdataset_t *rdataset,
Automatic Updater's avatar
Automatic Updater committed
494 495 496 497 498 499 500 501 502 503
					   dns_rdatasetadditional_t type,
					   dns_rdatatype_t qtype,
					   dns_acache_t *acache,
					   dns_zone_t **zonep,
					   dns_db_t **dbp,
					   dns_dbversion_t **versionp,
					   dns_dbnode_t **nodep,
					   dns_name_t *fname,
					   dns_message_t *msg,
					   isc_stdtime_t now);
504
static isc_result_t rdataset_setadditional(dns_rdataset_t *rdataset,
Automatic Updater's avatar
Automatic Updater committed
505 506 507 508 509 510 511 512
					   dns_rdatasetadditional_t type,
					   dns_rdatatype_t qtype,
					   dns_acache_t *acache,
					   dns_zone_t *zone,
					   dns_db_t *db,
					   dns_dbversion_t *version,
					   dns_dbnode_t *node,
					   dns_name_t *fname);
513
static isc_result_t rdataset_putadditional(dns_acache_t *acache,
Automatic Updater's avatar
Automatic Updater committed
514 515 516
					   dns_rdataset_t *rdataset,
					   dns_rdatasetadditional_t type,
					   dns_rdatatype_t qtype);
517
static inline isc_boolean_t need_headerupdate(rdatasetheader_t *header,
Automatic Updater's avatar
Automatic Updater committed
518
					      isc_stdtime_t now);
519
static void update_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
Automatic Updater's avatar
Automatic Updater committed
520
			  isc_stdtime_t now);
521 522 523 524
static void expire_header(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
			  isc_boolean_t tree_locked);
static void overmem_purge(dns_rbtdb_t *rbtdb, unsigned int locknum_start,
			  isc_stdtime_t now, isc_boolean_t tree_locked);
525 526
static isc_result_t resign_insert(dns_rbtdb_t *rbtdb, int idx,
				  rdatasetheader_t *newheader);
527 528

static dns_rdatasetmethods_t rdataset_methods = {
Automatic Updater's avatar
Automatic Updater committed
529 530 531 532 533 534 535 536
	rdataset_disassociate,
	rdataset_first,
	rdataset_next,
	rdataset_current,
	rdataset_clone,
	rdataset_count,
	NULL,
	rdataset_getnoqname,
537 538
	NULL,
	rdataset_getclosest,
Automatic Updater's avatar
Automatic Updater committed
539 540 541
	rdataset_getadditional,
	rdataset_setadditional,
	rdataset_putadditional
Bob Halley's avatar
Bob Halley committed
542 543 544
};

static void rdatasetiter_destroy(dns_rdatasetiter_t **iteratorp);
545 546
static isc_result_t rdatasetiter_first(dns_rdatasetiter_t *iterator);
static isc_result_t rdatasetiter_next(dns_rdatasetiter_t *iterator);
Bob Halley's avatar
Bob Halley committed
547
static void rdatasetiter_current(dns_rdatasetiter_t *iterator,
Automatic Updater's avatar
Automatic Updater committed
548
				 dns_rdataset_t *rdataset);
Bob Halley's avatar
Bob Halley committed
549 550

static dns_rdatasetitermethods_t rdatasetiter_methods = {
Automatic Updater's avatar
Automatic Updater committed
551 552 553 554
	rdatasetiter_destroy,
	rdatasetiter_first,
	rdatasetiter_next,
	rdatasetiter_current
555 556
};

Bob Halley's avatar
Bob Halley committed
557
typedef struct rbtdb_rdatasetiter {
Automatic Updater's avatar
Automatic Updater committed
558 559
	dns_rdatasetiter_t              common;
	rdatasetheader_t *              current;
Bob Halley's avatar
Bob Halley committed
560 561
} rbtdb_rdatasetiter_t;

562 563 564 565
static void             dbiterator_destroy(dns_dbiterator_t **iteratorp);
static isc_result_t     dbiterator_first(dns_dbiterator_t *iterator);
static isc_result_t     dbiterator_last(dns_dbiterator_t *iterator);
static isc_result_t     dbiterator_seek(dns_dbiterator_t *iterator,
Automatic Updater's avatar
Automatic Updater committed
566
					dns_name_t *name);
567 568 569
static isc_result_t     dbiterator_prev(dns_dbiterator_t *iterator);
static isc_result_t     dbiterator_next(dns_dbiterator_t *iterator);
static isc_result_t     dbiterator_current(dns_dbiterator_t *iterator,
Automatic Updater's avatar
Automatic Updater committed
570 571
					   dns_dbnode_t **nodep,
					   dns_name_t *name);
572 573
static isc_result_t     dbiterator_pause(dns_dbiterator_t *iterator);
static isc_result_t     dbiterator_origin(dns_dbiterator_t *iterator,
Automatic Updater's avatar
Automatic Updater committed
574
					  dns_name_t *name);
575 576

static dns_dbiteratormethods_t dbiterator_methods = {
Automatic Updater's avatar
Automatic Updater committed
577 578 579 580 581 582 583 584 585
	dbiterator_destroy,
	dbiterator_first,
	dbiterator_last,
	dbiterator_seek,
	dbiterator_prev,
	dbiterator_next,
	dbiterator_current,
	dbiterator_pause,
	dbiterator_origin
586 587
};

588 589
#define DELETION_BATCH_MAX 64

590
/*
591
 * If 'paused' is ISC_TRUE, then the tree lock is not being held.
592
 */
593
typedef struct rbtdb_dbiterator {
Automatic Updater's avatar
Automatic Updater committed
594 595 596 597 598 599 600 601
	dns_dbiterator_t                common;
	isc_boolean_t                   paused;
	isc_boolean_t                   new_origin;
	isc_rwlocktype_t                tree_locked;
	isc_result_t                    result;
	dns_fixedname_t                 name;
	dns_fixedname_t                 origin;
	dns_rbtnodechain_t              chain;
602 603
	dns_rbtnodechain_t		nsec3chain;
	dns_rbtnodechain_t		*current;
Automatic Updater's avatar
Automatic Updater committed
604 605 606
	dns_rbtnode_t                   *node;
	dns_rbtnode_t                   *deletions[DELETION_BATCH_MAX];
	int                             delete;
607 608
	isc_boolean_t			nsec3only;
	isc_boolean_t			nonsec3;
609 610 611
} rbtdb_dbiterator_t;


612 613
#define IS_STUB(rbtdb)  (((rbtdb)->common.attributes & DNS_DBATTR_STUB)  != 0)
#define IS_CACHE(rbtdb) (((rbtdb)->common.attributes & DNS_DBATTR_CACHE) != 0)
614

615
static void free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log,
Automatic Updater's avatar
Automatic Updater committed
616
		       isc_event_t *event);
617
static void overmem(dns_db_t *db, isc_boolean_t overmem);
618 619
static void setnsec3parameters(dns_db_t *db, rbtdb_version_t *version,
			       isc_boolean_t *nsec3createflag);
620

621 622 623
/*%
 * 'init_count' is used to initialize 'newheader->count' which inturn
 * is used to determine where in the cycle rrset-order cyclic starts.
Francis Dupont's avatar
Francis Dupont committed
624
 * We don't lock this as we don't care about simultaneous updates.
625 626
 *
 * Note:
627
 *      Both init_count and header->count can be ISC_UINT32_MAX.
628
 *      The count on the returned rdataset however can't be as
629 630
 *      that indicates that the database does not implement cyclic
 *      processing.
631 632 633
 */
static unsigned int init_count;

634 635 636 637 638 639
/*
 * Locking
 *
 * If a routine is going to lock more than one lock in this module, then
 * the locking must be done in the following order:
 *
640
 *      Tree Lock
641
 *
642 643
 *      Node Lock       (Only one from the set may be locked at one time by
 *                       any caller)
644
 *
645
 *      Database Lock
646 647 648 649
 *
 * Failure to follow this hierarchy can result in deadlock.
 */

650 651 652
/*
 * Deleting Nodes
 *
653
 * For zone databases the node for the origin of the zone MUST NOT be deleted.
654 655 656
 */


657 658 659 660
/*
 * DB Routines
 */

Bob Halley's avatar
Bob Halley committed
661 662
static void
attach(dns_db_t *source, dns_db_t **targetp) {
Automatic Updater's avatar
Automatic Updater committed
663
	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)source;
Bob Halley's avatar
Bob Halley committed
664

Automatic Updater's avatar
Automatic Updater committed
665
	REQUIRE(VALID_RBTDB(rbtdb));
Bob Halley's avatar
Bob Halley committed
666

Automatic Updater's avatar
Automatic Updater committed
667
	isc_refcount_increment(&rbtdb->references, NULL);
Bob Halley's avatar
Bob Halley committed
668

Automatic Updater's avatar
Automatic Updater committed
669
	*targetp = source;
Bob Halley's avatar
Bob Halley committed
670 671 672
}

static void
673
free_rbtdb_callback(isc_task_t *task, isc_event_t *event) {
Automatic Updater's avatar
Automatic Updater committed
674
	dns_rbtdb_t *rbtdb = event->ev_arg;
675

Automatic Updater's avatar
Automatic Updater committed
676
	UNUSED(task);
677

Automatic Updater's avatar
Automatic Updater committed
678
	free_rbtdb(rbtdb, ISC_TRUE, event);
679 680
}

681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706
static void
update_rrsetstats(dns_rbtdb_t *rbtdb, rdatasetheader_t *header,
		  isc_boolean_t increment)
{
	dns_rdatastatstype_t statattributes = 0;
	dns_rdatastatstype_t base = 0;
	dns_rdatastatstype_t type;

	/* At the moment we count statistics only for cache DB */
	INSIST(IS_CACHE(rbtdb));

	if (NXDOMAIN(header))
		statattributes = DNS_RDATASTATSTYPE_ATTR_NXDOMAIN;
	else if (RBTDB_RDATATYPE_BASE(header->type) == 0) {
		statattributes = DNS_RDATASTATSTYPE_ATTR_NXRRSET;
		base = RBTDB_RDATATYPE_EXT(header->type);
	} else
		base = RBTDB_RDATATYPE_BASE(header->type);

	type = DNS_RDATASTATSTYPE_VALUE(base, statattributes);
	if (increment)
		dns_rdatasetstats_increment(rbtdb->rrsetstats, type);
	else
		dns_rdatasetstats_decrement(rbtdb->rrsetstats, type);
}

707 708
static void
set_ttl(dns_rbtdb_t *rbtdb, rdatasetheader_t *header, dns_ttl_t newttl) {
Automatic Updater's avatar
Automatic Updater committed
709 710 711 712 713 714 715
	int idx;
	isc_heap_t *heap;
	dns_ttl_t oldttl;

	oldttl = header->rdh_ttl;
	header->rdh_ttl = newttl;

716 717 718
	if (!IS_CACHE(rbtdb))
		return;

Automatic Updater's avatar
Automatic Updater committed
719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734
	/*
	 * It's possible the rbtdb is not a cache.  If this is the case,
	 * we will not have a heap, and we move on.  If we do, though,
	 * we might need to adjust things.
	 */
	if (header->heap_index == 0 || newttl == oldttl)
		return;
	idx = header->node->locknum;
	if (rbtdb->heaps == NULL || rbtdb->heaps[idx] == NULL)
	    return;
	heap = rbtdb->heaps[idx];

	if (newttl < oldttl)
		isc_heap_increased(heap, header->heap_index);
	else
		isc_heap_decreased(heap, header->heap_index);
735 736 737
}

/*%
738
 * These functions allow the heap code to rank the priority of each
739 740 741 742
 * element.  It returns ISC_TRUE if v1 happens "sooner" than v2.
 */
static isc_boolean_t
ttl_sooner(void *v1, void *v2) {
Automatic Updater's avatar
Automatic Updater committed
743 744
	rdatasetheader_t *h1 = v1;
	rdatasetheader_t *h2 = v2;
745

Automatic Updater's avatar
Automatic Updater committed
746 747 748
	if (h1->rdh_ttl < h2->rdh_ttl)
		return (ISC_TRUE);
	return (ISC_FALSE);
749 750
}

751 752
static isc_boolean_t
resign_sooner(void *v1, void *v2) {
Automatic Updater's avatar
Automatic Updater committed
753 754
	rdatasetheader_t *h1 = v1;
	rdatasetheader_t *h2 = v2;
755

Automatic Updater's avatar
Automatic Updater committed
756 757 758
	if (h1->resign < h2->resign)
		return (ISC_TRUE);
	return (ISC_FALSE);
759 760
}

761 762 763 764
/*%
 * This function sets the heap index into the header.
 */
static void
765
set_index(void *what, unsigned int index) {
Automatic Updater's avatar
Automatic Updater committed
766
	rdatasetheader_t *h = what;
767

Automatic Updater's avatar
Automatic Updater committed
768
	h->heap_index = index;
769 770
}

771 772 773 774 775 776
/*%
 * Work out how many nodes can be deleted in the time between two
 * requests to the nameserver.  Smooth the resulting number and use it
 * as a estimate for the number of nodes to be deleted in the next
 * iteration.
 */
777 778
static unsigned int
adjust_quantum(unsigned int old, isc_time_t *start) {
Automatic Updater's avatar
Automatic Updater committed
779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816
	unsigned int pps = dns_pps;     /* packets per second */
	unsigned int interval;
	isc_uint64_t usecs;
	isc_time_t end;
	unsigned int new;

	if (pps < 100)
		pps = 100;
	isc_time_now(&end);

	interval = 1000000 / pps;       /* interval in usec */
	if (interval == 0)
		interval = 1;
	usecs = isc_time_microdiff(&end, start);
	if (usecs == 0) {
		/*
		 * We were unable to measure the amount of time taken.
		 * Double the nodes deleted next time.
		 */
		old *= 2;
		if (old > 1000)
			old = 1000;
		return (old);
	}
	new = old * interval;
	new /= (unsigned int)usecs;
	if (new == 0)
		new = 1;
	else if (new > 1000)
		new = 1000;

	/* Smooth */
	new = (new + old * 3) / 4;

	isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE, DNS_LOGMODULE_CACHE,
		      ISC_LOG_DEBUG(1), "adjust_quantum -> %d", new);

	return (new);
817
}
818

819 820
static void
free_rbtdb(dns_rbtdb_t *rbtdb, isc_boolean_t log, isc_event_t *event) {
Automatic Updater's avatar
Automatic Updater committed
821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844
	unsigned int i;
	isc_ondestroy_t ondest;
	isc_result_t result;
	char buf[DNS_NAME_FORMATSIZE];
	isc_time_t start;

	if (IS_CACHE(rbtdb) && rbtdb->common.rdclass == dns_rdataclass_in)
		overmem((dns_db_t *)rbtdb, (isc_boolean_t)-1);

	REQUIRE(rbtdb->current_version != NULL || EMPTY(rbtdb->open_versions));
	REQUIRE(rbtdb->future_version == NULL);

	if (rbtdb->current_version != NULL) {
		unsigned int refs;

		isc_refcount_decrement(&rbtdb->current_version->references,
				       &refs);
		INSIST(refs == 0);
		UNLINK(rbtdb->open_versions, rbtdb->current_version, link);
		isc_refcount_destroy(&rbtdb->current_version->references);
		isc_mem_put(rbtdb->common.mctx, rbtdb->current_version,
			    sizeof(rbtdb_version_t));
	}

845 846 847 848 849 850 851 852 853 854
	/*
	 * We assume the number of remaining dead nodes is reasonably small;
	 * the overhead of unlinking all nodes here should be negligible.
	 */
	for (i = 0; i < rbtdb->node_lock_count; i++) {
		dns_rbtnode_t *node;

		node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
		while (node != NULL) {
			ISC_LIST_UNLINK(rbtdb->deadnodes[i], node, deadlink);
Automatic Updater's avatar
Automatic Updater committed
855 856 857
			node = ISC_LIST_HEAD(rbtdb->deadnodes[i]);
		}
	}
858

Automatic Updater's avatar
Automatic Updater committed
859 860
	if (event == NULL)
		rbtdb->quantum = (rbtdb->task != NULL) ? 100 : 0;
861
 again:
Automatic Updater's avatar
Automatic Updater committed
862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883
	if (rbtdb->tree != NULL) {
		isc_time_now(&start);
		result = dns_rbt_destroy2(&rbtdb->tree, rbtdb->quantum);
		if (result == ISC_R_QUOTA) {
			INSIST(rbtdb->task != NULL);
			if (rbtdb->quantum != 0)
				rbtdb->quantum = adjust_quantum(rbtdb->quantum,
								&start);
			if (event == NULL)
				event = isc_event_allocate(rbtdb->common.mctx,
							   NULL,
							 DNS_EVENT_FREESTORAGE,
							   free_rbtdb_callback,
							   rbtdb,
							   sizeof(isc_event_t));
			if (event == NULL)
				goto again;
			isc_task_send(rbtdb->task, &event);
			return;
		}
		INSIST(result == ISC_R_SUCCESS && rbtdb->tree == NULL);
	}
884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907

	if (rbtdb->nsec3 != NULL) {
		isc_time_now(&start);
		result = dns_rbt_destroy2(&rbtdb->nsec3, rbtdb->quantum);
		if (result == ISC_R_QUOTA) {
			INSIST(rbtdb->task != NULL);
			if (rbtdb->quantum != 0)
				rbtdb->quantum = adjust_quantum(rbtdb->quantum,
								&start);
			if (event == NULL)
				event = isc_event_allocate(rbtdb->common.mctx,
							   NULL,
							 DNS_EVENT_FREESTORAGE,
							   free_rbtdb_callback,
							   rbtdb,
							   sizeof(isc_event_t));
			if (event == NULL)
				goto again;
			isc_task_send(rbtdb->task, &event);
			return;
		}
		INSIST(result == ISC_R_SUCCESS && rbtdb->nsec3 == NULL);
	}

Automatic Updater's avatar
Automatic Updater committed
908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927
	if (event != NULL)
		isc_event_free(&event);
	if (log) {
		if (dns_name_dynamic(&rbtdb->common.origin))
			dns_name_format(&rbtdb->common.origin, buf,
					sizeof(buf));
		else
			strcpy(buf, "<UNKNOWN>");
		isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
			      DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
			      "done free_rbtdb(%s)", buf);
	}
	if (dns_name_dynamic(&rbtdb->common.origin))
		dns_name_free(&rbtdb->common.origin, rbtdb->common.mctx);
	for (i = 0; i < rbtdb->node_lock_count; i++) {
		isc_refcount_destroy(&rbtdb->node_locks[i].references);
		NODE_DESTROYLOCK(&rbtdb->node_locks[i].lock);
	}

	/*
Automatic Updater's avatar
Automatic Updater committed
928
	 * Clean up LRU / re-signing order lists.
Automatic Updater's avatar
Automatic Updater committed
929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946
	 */
	if (rbtdb->rdatasets != NULL) {
		for (i = 0; i < rbtdb->node_lock_count; i++)
			INSIST(ISC_LIST_EMPTY(rbtdb->rdatasets[i]));
		isc_mem_put(rbtdb->common.mctx, rbtdb->rdatasets,
			    rbtdb->node_lock_count *
			    sizeof(rdatasetheaderlist_t));
	}
	/*
	 * Clean up dead node buckets.
	 */
	if (rbtdb->deadnodes != NULL) {
		for (i = 0; i < rbtdb->node_lock_count; i++)
			INSIST(ISC_LIST_EMPTY(rbtdb->deadnodes[i]));
		isc_mem_put(rbtdb->common.mctx, rbtdb->deadnodes,
		    rbtdb->node_lock_count * sizeof(rbtnodelist_t));
	}
	/*
Automatic Updater's avatar
Automatic Updater committed
947
	 * Clean up heap objects.
Automatic Updater's avatar
Automatic Updater committed
948 949 950 951 952 953 954 955 956
	 */
	if (rbtdb->heaps != NULL) {
		for (i = 0; i < rbtdb->node_lock_count; i++)
			isc_heap_destroy(&rbtdb->heaps[i]);
		isc_mem_put(rbtdb->common.mctx, rbtdb->heaps,
			    rbtdb->node_lock_count *
			    sizeof(isc_heap_t *));
	}

957 958 959
	if (rbtdb->rrsetstats != NULL)
		dns_stats_detach(&rbtdb->rrsetstats);

Automatic Updater's avatar
Automatic Updater committed
960 961 962 963 964 965
	isc_mem_put(rbtdb->common.mctx, rbtdb->node_locks,
		    rbtdb->node_lock_count * sizeof(rbtdb_nodelock_t));
	isc_rwlock_destroy(&rbtdb->tree_lock);
	isc_refcount_destroy(&rbtdb->references);
	if (rbtdb->task != NULL)
		isc_task_detach(&rbtdb->task);
966

Automatic Updater's avatar
Automatic Updater committed
967 968 969 970 971 972
	RBTDB_DESTROYLOCK(&rbtdb->lock);
	rbtdb->common.magic = 0;
	rbtdb->common.impmagic = 0;
	ondest = rbtdb->common.ondest;
	isc_mem_putanddetach(&rbtdb->common.mctx, rbtdb, sizeof(*rbtdb));
	isc_ondestroy_notify(&ondest, rbtdb);
Bob Halley's avatar
Bob Halley committed
973 974
}

975
static inline void
976
maybe_free_rbtdb(dns_rbtdb_t *rbtdb) {
Automatic Updater's avatar
Automatic Updater committed
977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020
	isc_boolean_t want_free = ISC_FALSE;
	unsigned int i;
	unsigned int inactive = 0;

	/* XXX check for open versions here */

	if (rbtdb->soanode != NULL)
		dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->soanode);
	if (rbtdb->nsnode != NULL)
		dns_db_detachnode((dns_db_t *)rbtdb, &rbtdb->nsnode);

	/*
	 * Even though there are no external direct references, there still
	 * may be nodes in use.
	 */
	for (i = 0; i < rbtdb->node_lock_count; i++) {
		NODE_LOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
		rbtdb->node_locks[i].exiting = ISC_TRUE;
		NODE_UNLOCK(&rbtdb->node_locks[i].lock, isc_rwlocktype_write);
		if (isc_refcount_current(&rbtdb->node_locks[i].references)
		    == 0) {
			inactive++;
		}
	}

	if (inactive != 0) {
		RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
		rbtdb->active -= inactive;
		if (rbtdb->active == 0)
			want_free = ISC_TRUE;
		RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);
		if (want_free) {
			char buf[DNS_NAME_FORMATSIZE];
			if (dns_name_dynamic(&rbtdb->common.origin))
				dns_name_format(&rbtdb->common.origin, buf,
						sizeof(buf));
			else
				strcpy(buf, "<UNKNOWN>");
			isc_log_write(dns_lctx, DNS_LOGCATEGORY_DATABASE,
				      DNS_LOGMODULE_CACHE, ISC_LOG_DEBUG(1),
				      "calling free_rbtdb(%s)", buf);
			free_rbtdb(rbtdb, ISC_TRUE, NULL);
		}
	}
Bob Halley's avatar
Bob Halley committed
1021 1022 1023 1024
}

static void
detach(dns_db_t **dbp) {
Automatic Updater's avatar
Automatic Updater committed
1025 1026
	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)(*dbp);
	unsigned int refs;
Bob Halley's avatar
Bob Halley committed
1027

Automatic Updater's avatar
Automatic Updater committed
1028
	REQUIRE(VALID_RBTDB(rbtdb));
Bob Halley's avatar
Bob Halley committed
1029

Automatic Updater's avatar
Automatic Updater committed
1030
	isc_refcount_decrement(&rbtdb->references, &refs);
Bob Halley's avatar
Bob Halley committed
1031

Automatic Updater's avatar
Automatic Updater committed
1032 1033
	if (refs == 0)
		maybe_free_rbtdb(rbtdb);
Bob Halley's avatar
Bob Halley committed
1034

Automatic Updater's avatar
Automatic Updater committed
1035
	*dbp = NULL;
Bob Halley's avatar
Bob Halley committed
1036 1037 1038 1039
}

static void
currentversion(dns_db_t *db, dns_dbversion_t **versionp) {
Automatic Updater's avatar
Automatic Updater committed
1040 1041 1042
	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
	rbtdb_version_t *version;
	unsigned int refs;
Bob Halley's avatar
Bob Halley committed
1043

Automatic Updater's avatar
Automatic Updater committed
1044
	REQUIRE(VALID_RBTDB(rbtdb));
Bob Halley's avatar
Bob Halley committed
1045

Automatic Updater's avatar
Automatic Updater committed
1046 1047 1048 1049
	RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_read);
	version = rbtdb->current_version;
	isc_refcount_increment(&version->references, &refs);
	RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_read);
1050

Automatic Updater's avatar
Automatic Updater committed
1051
	*versionp = (dns_dbversion_t *)version;
1052 1053 1054
}

static inline rbtdb_version_t *
1055
allocate_version(isc_mem_t *mctx, rbtdb_serial_t serial,
Automatic Updater's avatar
Automatic Updater committed
1056
		 unsigned int references, isc_boolean_t writer)
1057
{
Automatic Updater's avatar
Automatic Updater committed
1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072
	isc_result_t result;
	rbtdb_version_t *version;

	version = isc_mem_get(mctx, sizeof(*version));
	if (version == NULL)
		return (NULL);
	version->serial = serial;
	result = isc_refcount_init(&version->references, references);
	if (result != ISC_R_SUCCESS) {
		isc_mem_put(mctx, version, sizeof(*version));
		return (NULL);
	}
	version->writer = writer;
	version->commit_ok = ISC_FALSE;
	ISC_LIST_INIT(version->changed_list);
Automatic Updater's avatar
Automatic Updater committed
1073
	ISC_LIST_INIT(version->resigned_list);
Automatic Updater's avatar
Automatic Updater committed
1074 1075 1076
	ISC_LINK_INIT(version, link);

	return (version);
Bob Halley's avatar
Bob Halley committed
1077 1078
}

1079
static isc_result_t
Bob Halley's avatar
Bob Halley committed
1080
newversion(dns_db_t *db, dns_dbversion_t **versionp) {
Automatic Updater's avatar
Automatic Updater committed
1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093
	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
	rbtdb_version_t *version;

	REQUIRE(VALID_RBTDB(rbtdb));
	REQUIRE(versionp != NULL && *versionp == NULL);
	REQUIRE(rbtdb->future_version == NULL);

	RBTDB_LOCK(&rbtdb->lock, isc_rwlocktype_write);
	RUNTIME_CHECK(rbtdb->next_serial != 0);         /* XXX Error? */
	version = allocate_version(rbtdb->common.mctx, rbtdb->next_serial, 1,
				   ISC_TRUE);
	if (version != NULL) {
		version->commit_ok = ISC_TRUE;
1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111
		version->secure = rbtdb->current_version->secure;
		version->havensec3 = rbtdb->current_version->havensec3;
		if (version->havensec3) {
			version->flags = rbtdb->current_version->flags;
			version->iterations =
				rbtdb->current_version->iterations;
			version->hash = rbtdb->current_version->hash;
			version->salt_length =
				rbtdb->current_version->salt_length;
			memcpy(version->salt, rbtdb->current_version->salt,
			       version->salt_length);
		} else {
			version->flags = 0;
			version->iterations = 0;
			version->hash = 0;
			version->salt_length = 0;
			memset(version->salt, 0, sizeof(version->salt));
		}
Automatic Updater's avatar
Automatic Updater committed
1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122
		rbtdb->next_serial++;
		rbtdb->future_version = version;
	}
	RBTDB_UNLOCK(&rbtdb->lock, isc_rwlocktype_write);

	if (version == NULL)
		return (ISC_R_NOMEMORY);

	*versionp = version;

	return (ISC_R_SUCCESS);
1123 1124
}

1125 1126
static void
attachversion(dns_db_t *db, dns_dbversion_t *source,
Automatic Updater's avatar
Automatic Updater committed
1127
	      dns_dbversion_t **targetp)
1128
{
Automatic Updater's avatar
Automatic Updater committed
1129 1130 1131
	dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
	rbtdb_version_t *rbtversion = source;
	unsigned int refs;
1132

Automatic Updater's avatar
Automatic Updater committed
1133
	REQUIRE(VALID_RBTDB(rbtdb));
1134