Commit 27fe1966 authored by Tatuya JINMEI 神明達哉's avatar Tatuya JINMEI 神明達哉
Browse files

2937. [bug] Worked around an apparent race condition in over

			memory conditions.  Without this fix a DNS cache DB or
			ADB could incorrectly stay in an over memory state,
			effectively refusing further caching, which
			subsequently made a BIND 9 caching server unworkable.
			This fix prevents this problem from happening by
			polling the state of the memory context, rather than
			making a copy of the state, which appeared to cause
			a race.  This is a "workaround" in that it doesn't
			solve the possible race per se, but several experiments
			proved this change solves the symptom.  Also, the
			polling overhead hasn't been reported to be an issue.
			This bug should only affect a caching server that
			specifies a finite max-cache-size.  It's also quite
			likely that the bug happens only when enabling threads,
			but it's not confirmed yet. [RT #21818]
parent fc7bf6dc
2937. [bug] Worked around an apparent race condition in over
memory conditions. Without this fix a DNS cache DB or
ADB could incorrectly stay in an over memory state,
effectively refusing further caching, which
subsequently made a BIND 9 caching server unworkable.
This fix prevents this problem from happening by
polling the state of the memory context, rather than
making a copy of the state, which appeared to cause
a race. This is a "workaround" in that it doesn't
solve the possible race per se, but several experiments
proved this change solves the symptom. Also, the
polling overhead hasn't been reported to be an issue.
This bug should only affect a caching server that
specifies a finite max-cache-size. It's also quite
likely that the bug happens only when enabling threads,
but it's not confirmed yet. [RT #21818]
2936. [func] Improved configuration syntax and multiple-view
support for addzone/delzone feature (see change
#2930). Removed "new-zone-file" option, replaced
......
......@@ -15,7 +15,7 @@
* PERFORMANCE OF THIS SOFTWARE.
*/
/* $Id: adb.c,v 1.247 2009/02/03 22:33:13 jinmei Exp $ */
/* $Id: adb.c,v 1.248 2010/08/11 22:54:58 jinmei Exp $ */
/*! \file
*
......@@ -118,7 +118,6 @@ struct dns_adb {
isc_taskmgr_t *taskmgr;
isc_task_t *task;
isc_boolean_t overmem;
isc_interval_t tick_interval;
int next_cleanbucket;
......@@ -294,8 +293,8 @@ static inline void inc_adb_irefcnt(dns_adb_t *);
static inline void inc_adb_erefcnt(dns_adb_t *);
static inline void inc_entry_refcnt(dns_adb_t *, dns_adbentry_t *,
isc_boolean_t);
static inline isc_boolean_t dec_entry_refcnt(dns_adb_t *, dns_adbentry_t *,
isc_boolean_t);
static inline isc_boolean_t dec_entry_refcnt(dns_adb_t *, isc_boolean_t,
dns_adbentry_t *, isc_boolean_t);
static inline void violate_locking_hierarchy(isc_mutex_t *, isc_mutex_t *);
static isc_boolean_t clean_namehooks(dns_adb_t *, dns_adbnamehooklist_t *);
static void clean_target(dns_adb_t *, dns_name_t *);
......@@ -777,7 +776,7 @@ link_entry(dns_adb_t *adb, int bucket, dns_adbentry_t *entry) {
int i;
dns_adbentry_t *e;
if (adb->overmem) {
if (isc_mem_isovermem(adb->mctx)) {
for (i = 0; i < 2; i++) {
e = ISC_LIST_TAIL(adb->entries[bucket]);
if (e == NULL)
......@@ -943,6 +942,7 @@ clean_namehooks(dns_adb_t *adb, dns_adbnamehooklist_t *namehooks) {
dns_adbnamehook_t *namehook;
int addr_bucket;
isc_boolean_t result = ISC_FALSE;
isc_boolean_t overmem = isc_mem_isovermem(adb->mctx);
addr_bucket = DNS_ADB_INVALIDBUCKET;
namehook = ISC_LIST_HEAD(*namehooks);
......@@ -963,7 +963,8 @@ clean_namehooks(dns_adb_t *adb, dns_adbnamehooklist_t *namehooks) {
LOCK(&adb->entrylocks[addr_bucket]);
}
result = dec_entry_refcnt(adb, entry, ISC_FALSE);
result = dec_entry_refcnt(adb, overmem, entry,
ISC_FALSE);
}
/*
......@@ -1235,7 +1236,9 @@ inc_entry_refcnt(dns_adb_t *adb, dns_adbentry_t *entry, isc_boolean_t lock) {
}
static inline isc_boolean_t
dec_entry_refcnt(dns_adb_t *adb, dns_adbentry_t *entry, isc_boolean_t lock) {
dec_entry_refcnt(dns_adb_t *adb, isc_boolean_t overmem, dns_adbentry_t *entry,
isc_boolean_t lock)
{
int bucket;
isc_boolean_t destroy_entry;
isc_boolean_t result = ISC_FALSE;
......@@ -1250,7 +1253,7 @@ dec_entry_refcnt(dns_adb_t *adb, dns_adbentry_t *entry, isc_boolean_t lock) {
destroy_entry = ISC_FALSE;
if (entry->refcnt == 0 &&
(adb->entry_sd[bucket] || entry->expires == 0 || adb->overmem ||
(adb->entry_sd[bucket] || entry->expires == 0 || overmem ||
(entry->flags & ENTRY_IS_DEAD) != 0)) {
destroy_entry = ISC_TRUE;
result = unlink_entry(adb, entry);
......@@ -1852,7 +1855,7 @@ check_stale_name(dns_adb_t *adb, int bucket, isc_stdtime_t now) {
int victims, max_victims;
isc_boolean_t result;
dns_adbname_t *victim, *next_victim;
isc_boolean_t overmem = adb->overmem;
isc_boolean_t overmem = isc_mem_isovermem(adb->mctx);
int scans = 0;
INSIST(bucket != DNS_ADB_INVALIDBUCKET);
......@@ -2049,7 +2052,6 @@ dns_adb_create(isc_mem_t *mem, dns_view_t *view, isc_timermgr_t *timermgr,
adb, NULL, NULL);
adb->cevent_sent = ISC_FALSE;
adb->shutting_down = ISC_FALSE;
adb->overmem = ISC_FALSE;
ISC_LIST_INIT(adb->whenshutdown);
isc_mem_attach(mem, &adb->mctx);
......@@ -2616,6 +2618,7 @@ dns_adb_destroyfind(dns_adbfind_t **findp) {
dns_adbaddrinfo_t *ai;
int bucket;
dns_adb_t *adb;
isc_boolean_t overmem;
REQUIRE(findp != NULL && DNS_ADBFIND_VALID(*findp));
find = *findp;
......@@ -2640,13 +2643,14 @@ dns_adb_destroyfind(dns_adbfind_t **findp) {
* Return the find to the memory pool, and decrement the adb's
* reference count.
*/
overmem = isc_mem_isovermem(adb->mctx);
ai = ISC_LIST_HEAD(find->list);
while (ai != NULL) {
ISC_LIST_UNLINK(find->list, ai, publink);
entry = ai->entry;
ai->entry = NULL;
INSIST(DNS_ADBENTRY_VALID(entry));
RUNTIME_CHECK(dec_entry_refcnt(adb, entry, ISC_TRUE) ==
RUNTIME_CHECK(dec_entry_refcnt(adb, overmem, entry, ISC_TRUE) ==
ISC_FALSE);
free_adbaddrinfo(adb, &ai);
ai = ISC_LIST_HEAD(find->list);
......@@ -3509,6 +3513,7 @@ dns_adb_freeaddrinfo(dns_adb_t *adb, dns_adbaddrinfo_t **addrp) {
int bucket;
isc_stdtime_t now;
isc_boolean_t want_check_exit = ISC_FALSE;
isc_boolean_t overmem;
REQUIRE(DNS_ADB_VALID(adb));
REQUIRE(addrp != NULL);
......@@ -3520,13 +3525,14 @@ dns_adb_freeaddrinfo(dns_adb_t *adb, dns_adbaddrinfo_t **addrp) {
isc_stdtime_get(&now);
*addrp = NULL;
overmem = isc_mem_isovermem(adb->mctx);
bucket = addr->entry->lock_bucket;
LOCK(&adb->entrylocks[bucket]);
entry->expires = now + ADB_ENTRY_WINDOW;
want_check_exit = dec_entry_refcnt(adb, entry, ISC_FALSE);
want_check_exit = dec_entry_refcnt(adb, overmem, entry, ISC_FALSE);
UNLOCK(&adb->entrylocks[bucket]);
......@@ -3591,6 +3597,14 @@ dns_adb_flushname(dns_adb_t *adb, dns_name_t *name) {
static void
water(void *arg, int mark) {
/*
* We're going to change the way to handle overmem condition: use
* isc_mem_isovermem() instead of storing the state via this callback,
* since the latter way tends to cause race conditions.
* To minimize the change, and in case we re-enable the callback
* approach, however, keep this function at the moment.
*/
dns_adb_t *adb = arg;
isc_boolean_t overmem = ISC_TF(mark == ISC_MEM_HIWATER);
......@@ -3598,17 +3612,6 @@ water(void *arg, int mark) {
DP(ISC_LOG_DEBUG(1),
"adb reached %s water mark", overmem ? "high" : "low");
/*
* We can't use adb->lock as there is potential for water
* to be called when adb->lock is held.
*/
LOCK(&adb->overmemlock);
if (adb->overmem != overmem) {
adb->overmem = overmem;
isc_mem_waterack(adb->mctx, mark);
}
UNLOCK(&adb->overmemlock);
}
void
......
......@@ -15,7 +15,7 @@
* PERFORMANCE OF THIS SOFTWARE.
*/
/* $Id: rbtdb.c,v 1.301 2010/05/10 01:39:03 marka Exp $ */
/* $Id: rbtdb.c,v 1.302 2010/08/11 22:54:58 jinmei Exp $ */
/*! \file */
......@@ -411,7 +411,6 @@ typedef struct {
rbtdb_version_t * current_version;
rbtdb_version_t * future_version;
rbtdb_versionlist_t open_versions;
isc_boolean_t overmem;
isc_task_t * task;
dns_dbnode_t *soanode;
dns_dbnode_t *nsnode;
......@@ -5117,7 +5116,7 @@ expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
if (now == 0)
isc_stdtime_get(&now);
if (rbtdb->overmem) {
if (isc_mem_isovermem(rbtdb->common.mctx)) {
isc_uint32_t val;
isc_random_get(&val);
......@@ -5127,8 +5126,8 @@ expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
force_expire = ISC_TF(rbtnode->down == NULL && val % 4 == 0);
/*
* Note that 'log' can be true IFF rbtdb->overmem is also true.
* rbtdb->overmem can currently only be true for cache
* Note that 'log' can be true IFF overmem is also true.
* overmem can currently only be true for cache
* databases -- hence all of the "overmem cache" log strings.
*/
log = ISC_TF(isc_log_wouldlog(dns_lctx, level));
......@@ -5173,7 +5172,7 @@ expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
"reprieve by RETAIN() %s",
printname);
}
} else if (rbtdb->overmem && log)
} else if (isc_mem_isovermem(rbtdb->common.mctx) && log)
isc_log_write(dns_lctx, category, module, level,
"overmem cache: saved %s", printname);
......@@ -5185,10 +5184,12 @@ expirenode(dns_db_t *db, dns_dbnode_t *node, isc_stdtime_t now) {
static void
overmem(dns_db_t *db, isc_boolean_t overmem) {
dns_rbtdb_t *rbtdb = (dns_rbtdb_t *)db;
/* This is an empty callback. See adb.c:water() */
if (IS_CACHE(rbtdb))
rbtdb->overmem = overmem;
UNUSED(db);
UNUSED(overmem);
return;
}
static void
......@@ -6134,6 +6135,7 @@ addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
isc_boolean_t delegating;
isc_boolean_t newnsec;
isc_boolean_t tree_locked = ISC_FALSE;
isc_boolean_t cache_is_overmem = ISC_FALSE;
REQUIRE(VALID_RBTDB(rbtdb));
......@@ -6230,12 +6232,14 @@ addrdataset(dns_db_t *db, dns_dbnode_t *node, dns_dbversion_t *version,
* the tree. In the latter case the lock does not necessarily have to
* be acquired but it will help purge stale entries more effectively.
*/
if (delegating || newnsec || (IS_CACHE(rbtdb) && rbtdb->overmem)) {
if (IS_CACHE(rbtdb) && isc_mem_isovermem(rbtdb->common.mctx))
cache_is_overmem = ISC_TRUE;
if (delegating || newnsec || cache_is_overmem) {
tree_locked = ISC_TRUE;
RWLOCK(&rbtdb->tree_lock, isc_rwlocktype_write);
}
if (IS_CACHE(rbtdb) && rbtdb->overmem)
if (cache_is_overmem)
overmem_purge(rbtdb, rbtnode->locknum, now, tree_locked);
NODE_LOCK(&rbtdb->node_locks[rbtnode->locknum].lock,
......@@ -7399,7 +7403,6 @@ dns_rbtdb_create
return (result);
}
rbtdb->attributes = 0;
rbtdb->overmem = ISC_FALSE;
rbtdb->task = NULL;
/*
......
......@@ -15,7 +15,7 @@
* PERFORMANCE OF THIS SOFTWARE.
*/
/* $Id: mem.h,v 1.88 2010/03/04 23:50:34 tbox Exp $ */
/* $Id: mem.h,v 1.89 2010/08/11 22:54:58 jinmei Exp $ */
#ifndef ISC_MEM_H
#define ISC_MEM_H 1
......@@ -224,6 +224,7 @@ typedef struct isc_memmethods {
void *water_arg, size_t hiwater, size_t lowater);
void (*waterack)(isc_mem_t *ctx, int flag);
size_t (*inuse)(isc_mem_t *mctx);
isc_boolean_t (*isovermem)(isc_mem_t *mctx);
isc_result_t (*mpcreate)(isc_mem_t *mctx, size_t size,
isc_mempool_t **mpctxp);
} isc_memmethods_t;
......@@ -420,6 +421,14 @@ isc_mem_inuse(isc_mem_t *mctx);
* allocated from the system but not yet used.
*/
isc_boolean_t
isc_mem_isovermem(isc_mem_t *mctx);
/*%<
* Return true iff the memory context is in "over memory" state, i.e.,
* a hiwater mark has been set and the used amount of memory has exceeds
* the mark.
*/
void
isc_mem_setwater(isc_mem_t *mctx, isc_mem_water_t water, void *water_arg,
size_t hiwater, size_t lowater);
......
......@@ -14,7 +14,7 @@
* PERFORMANCE OF THIS SOFTWARE.
*/
/* $Id: namespace.h,v 1.5 2009/10/01 01:30:01 sar Exp $ */
/* $Id: namespace.h,v 1.6 2010/08/11 22:54:58 jinmei Exp $ */
#ifndef ISCAPI_NAMESPACE_H
#define ISCAPI_NAMESPACE_H 1
......@@ -67,6 +67,7 @@
#define isc_mem_getquota isc__mem_getquota
#define isc_mem_gettag isc__mem_gettag
#define isc_mem_inuse isc__mem_inuse
#define isc_mem_isovermem isc__mem_isovermem
#define isc_mem_setname isc__mem_setname
#define isc_mem_setwater isc__mem_setwater
#define isc_mem_printallactive isc__mem_printallactive
......
......@@ -15,7 +15,7 @@
* PERFORMANCE OF THIS SOFTWARE.
*/
/* $Id: mem.c,v 1.156 2010/05/12 00:46:55 marka Exp $ */
/* $Id: mem.c,v 1.157 2010/08/11 22:54:58 jinmei Exp $ */
/*! \file */
......@@ -144,6 +144,7 @@ struct isc__mem {
size_t hi_water;
size_t lo_water;
isc_boolean_t hi_called;
isc_boolean_t is_overmem;
isc_mem_water_t water;
void * water_arg;
ISC_LIST(isc__mempool_t) pools;
......@@ -269,6 +270,8 @@ ISC_MEMFUNC_SCOPE size_t
isc__mem_getquota(isc_mem_t *ctx);
ISC_MEMFUNC_SCOPE size_t
isc__mem_inuse(isc_mem_t *ctx);
ISC_MEMFUNC_SCOPE isc_boolean_t
isc__mem_isovermem(isc_mem_t *ctx);
ISC_MEMFUNC_SCOPE void
isc__mem_setwater(isc_mem_t *ctx, isc_mem_water_t water, void *water_arg,
size_t hiwater, size_t lowater);
......@@ -345,6 +348,7 @@ static struct isc__memmethods {
isc__mem_setwater,
isc__mem_waterack,
isc__mem_inuse,
isc__mem_isovermem,
isc__mempool_create
}
#ifndef BIND9
......@@ -939,6 +943,7 @@ isc__mem_createx2(size_t init_max_size, size_t target_size,
ctx->hi_water = 0;
ctx->lo_water = 0;
ctx->hi_called = ISC_FALSE;
ctx->is_overmem = ISC_FALSE;
ctx->water = NULL;
ctx->water_arg = NULL;
ctx->common.impmagic = MEM_MAGIC;
......@@ -1281,6 +1286,10 @@ isc___mem_get(isc_mem_t *ctx0, size_t size FLARG) {
}
ADD_TRACE(ctx, ptr, size, file, line);
if (ctx->hi_water != 0U && ctx->inuse > ctx->hi_water &&
!ctx->is_overmem) {
ctx->is_overmem = ISC_TRUE;
}
if (ctx->hi_water != 0U && !ctx->hi_called &&
ctx->inuse > ctx->hi_water) {
call_water = ISC_TRUE;
......@@ -1338,6 +1347,10 @@ isc___mem_put(isc_mem_t *ctx0, void *ptr, size_t size FLARG) {
* when the context was pushed over hi_water but then had
* isc_mem_setwater() called with 0 for hi_water and lo_water.
*/
if (ctx->is_overmem &&
(ctx->inuse < ctx->lo_water || ctx->lo_water == 0U)) {
ctx->is_overmem = ISC_FALSE;
}
if (ctx->hi_called &&
(ctx->inuse < ctx->lo_water || ctx->lo_water == 0U)) {
if (ctx->water != NULL)
......@@ -1529,6 +1542,11 @@ isc___mem_allocate(isc_mem_t *ctx0, size_t size FLARG) {
#if ISC_MEM_TRACKLINES
ADD_TRACE(ctx, si, si[-1].u.size, file, line);
#endif
if (ctx->hi_water != 0U && ctx->inuse > ctx->hi_water &&
!ctx->is_overmem) {
ctx->is_overmem = ISC_TRUE;
}
if (ctx->hi_water != 0U && !ctx->hi_called &&
ctx->inuse > ctx->hi_water) {
ctx->hi_called = ISC_TRUE;
......@@ -1619,6 +1637,11 @@ isc___mem_free(isc_mem_t *ctx0, void *ptr FLARG) {
* when the context was pushed over hi_water but then had
* isc_mem_setwater() called with 0 for hi_water and lo_water.
*/
if (ctx->is_overmem &&
(ctx->inuse < ctx->lo_water || ctx->lo_water == 0U)) {
ctx->is_overmem = ISC_FALSE;
}
if (ctx->hi_called &&
(ctx->inuse < ctx->lo_water || ctx->lo_water == 0U)) {
ctx->hi_called = ISC_FALSE;
......@@ -1753,6 +1776,18 @@ isc__mem_setwater(isc_mem_t *ctx0, isc_mem_water_t water, void *water_arg,
(oldwater)(oldwater_arg, ISC_MEM_LOWATER);
}
ISC_MEMFUNC_SCOPE isc_boolean_t
isc__mem_isovermem(isc_mem_t *ctx0) {
isc__mem_t *ctx = (isc__mem_t *)ctx0;
/*
* We don't bother to lock the context because 100% accuracy isn't
* necessary (and even if we locked the context the returned value
* could be different from the actual state when it's used anyway)
*/
return (ctx->is_overmem);
}
ISC_MEMFUNC_SCOPE void
isc__mem_setname(isc_mem_t *ctx0, const char *name, void *tag) {
isc__mem_t *ctx = (isc__mem_t *)ctx0;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment