Commit 821ff5e8 authored by Evan Hunt's avatar Evan Hunt

[master] isc_atomic_storeq()/stats improvements

4248.	[func]		Add an isc_atomic_storeq() function, use it in
			stats counters to improve performance.
			[RT #39972] [RT #39979]
parent 1ff2118d
4248. [func] Add an isc_atomic_storeq() function, use it in
stats counters to improve performance.
[RT #39972] [RT #39979]
4247. [port] Require both HAVE_JSON and JSON_C_VERSION to be
defined to report json library version. [RT #41045]
......
......@@ -65,14 +65,10 @@ typedef struct {
counter_t counters[TASKS];
void do_xaddq(isc_task_t *task, isc_event_t *ev);
#if defined(ISC_PLATFORM_HAVEXADD)
isc_int32_t counter_32;
void do_xadd(isc_task_t *task, isc_event_t *ev);
static isc_int32_t counter_32;
void
static void
do_xadd(isc_task_t *task, isc_event_t *ev) {
counter_t *state = (counter_t *)ev->ev_arg;
int i;
......@@ -128,11 +124,9 @@ test_atomic_xadd() {
#endif
#if defined(ISC_PLATFORM_HAVEXADDQ)
isc_int64_t counter_64;
void do_xaddq(isc_task_t *task, isc_event_t *ev);
static isc_int64_t counter_64;
void
static void
do_xaddq(isc_task_t *task, isc_event_t *ev) {
counter_t *state = (counter_t *)ev->ev_arg;
int i;
......@@ -188,6 +182,150 @@ test_atomic_xaddq() {
}
#endif
static isc_int32_t store_32;
static void
do_store(isc_task_t *task, isc_event_t *ev) {
counter_t *state = (counter_t *)ev->ev_arg;
int i;
isc_uint8_t r;
isc_uint32_t val;
r = random() % 256;
val = (r << 24) | (r << 16) | (r << 8) | r;
for (i = 0 ; i < COUNTS_PER_ITERATION ; i++) {
isc_atomic_store(&store_32, val);
}
state->iteration++;
if (state->iteration < ITERATIONS) {
isc_task_send(task, &ev);
} else {
isc_event_free(&ev);
}
}
static void
test_atomic_store() {
int test_result;
isc_task_t *tasks[TASKS];
isc_event_t *event;
int i;
isc_uint8_t r;
isc_uint32_t val;
t_assert("test_atomic_store", 1, T_REQUIRED, "%s",
"ensure that isc_atomic_store() works.");
setup();
memset(counters, 0, sizeof(counters));
store_32 = 0;
/*
* Create our tasks, and allocate an event to get the counters
* going.
*/
for (i = 0 ; i < TASKS ; i++) {
tasks[i] = NULL;
CHECK(isc_task_create(task_manager, 0, &tasks[i]));
event = isc_event_allocate(mctx, NULL, 1000, do_store,
&counters[i],
sizeof(struct isc_event));
isc_task_sendanddetach(&tasks[i], &event);
}
teardown();
test_result = T_PASS;
r = store_32 & 0xff;
val = (r << 24) | (r << 16) | (r << 8) | r;
t_info("32-bit store 0x%x, expected 0x%x\n",
(isc_uint32_t) store_32, val);
if ((isc_uint32_t) store_32 != val)
test_result = T_FAIL;
t_result(test_result);
store_32 = 0;
}
#if defined(ISC_PLATFORM_HAVEATOMICSTOREQ)
static isc_int64_t store_64;
static void
do_storeq(isc_task_t *task, isc_event_t *ev) {
counter_t *state = (counter_t *)ev->ev_arg;
int i;
isc_uint8_t r;
isc_uint64_t val;
r = random() % 256;
val = (((isc_uint64_t) r << 24) |
((isc_uint64_t) r << 16) |
((isc_uint64_t) r << 8) |
(isc_uint64_t) r);
val |= ((isc_uint64_t) val << 32);
for (i = 0 ; i < COUNTS_PER_ITERATION ; i++) {
isc_atomic_storeq(&store_64, val);
}
state->iteration++;
if (state->iteration < ITERATIONS) {
isc_task_send(task, &ev);
} else {
isc_event_free(&ev);
}
}
static void
test_atomic_storeq() {
int test_result;
isc_task_t *tasks[TASKS];
isc_event_t *event;
int i;
isc_uint8_t r;
isc_uint64_t val;
t_assert("test_atomic_storeq", 1, T_REQUIRED, "%s",
"ensure that isc_atomic_storeq() works.");
setup();
memset(counters, 0, sizeof(counters));
store_64 = 0;
/*
* Create our tasks, and allocate an event to get the counters going.
*/
for (i = 0 ; i < TASKS ; i++) {
tasks[i] = NULL;
CHECK(isc_task_create(task_manager, 0, &tasks[i]));
event = isc_event_allocate(mctx, NULL, 1000, do_storeq,
&counters[i], sizeof(struct isc_event));
isc_task_sendanddetach(&tasks[i], &event);
}
teardown();
test_result = T_PASS;
r = store_64 & 0xff;
val = (((isc_uint64_t) r << 24) |
((isc_uint64_t) r << 16) |
((isc_uint64_t) r << 8) |
(isc_uint64_t) r);
val |= ((isc_uint64_t) val << 32);
t_info("64-bit store 0x%"ISC_PRINT_QUADFORMAT"x, expected 0x%"ISC_PRINT_QUADFORMAT"x\n",
(isc_uint64_t) store_64, val);
if ((isc_uint64_t) store_64 != val)
test_result = T_FAIL;
t_result(test_result);
store_64 = 0;
}
#endif /* ISC_PLATFORM_HAVEATOMICSTOREQ */
testspec_t T_testlist[] = {
#if defined(ISC_PLATFORM_HAVEXADD)
......@@ -195,6 +333,10 @@ testspec_t T_testlist[] = {
#endif
#if defined(ISC_PLATFORM_HAVEXADDQ)
{ (PFV) test_atomic_xaddq, "test_atomic_xaddq" },
#endif
{ (PFV) test_atomic_store, "test_atomic_store" },
#if defined(ISC_PLATFORM_HAVEXADDQ)
{ (PFV) test_atomic_storeq, "test_atomic_storeq" },
#endif
{ (PFV) 0, NULL }
};
......
......@@ -717,6 +717,7 @@ ISC_PLATFORM_USEMACASM
ISC_PLATFORM_USESTDASM
ISC_PLATFORM_USEOSFASM
ISC_PLATFORM_USEGCCASM
ISC_PLATFORM_HAVEATOMICSTOREQ
ISC_PLATFORM_HAVEATOMICSTORE
ISC_PLATFORM_HAVECMPXCHG
ISC_PLATFORM_HAVEXADDQ
......@@ -19854,14 +19855,17 @@ if test "$have_atomic" = "yes"; then
ISC_PLATFORM_HAVEATOMICSTORE="#define ISC_PLATFORM_HAVEATOMICSTORE 1"
if test "$have_xaddq" = "yes"; then
ISC_PLATFORM_HAVEXADDQ="#define ISC_PLATFORM_HAVEXADDQ 1"
ISC_PLATFORM_HAVEATOMICSTOREQ="#define ISC_PLATFORM_HAVEATOMICSTOREQ 1"
else
ISC_PLATFORM_HAVEXADDQ="#undef ISC_PLATFORM_HAVEXADDQ"
ISC_PLATFORM_HAVEATOMICSTOREQ="#undef ISC_PLATFORM_HAVEATOMICSTOREQ"
fi
else
ISC_PLATFORM_HAVEXADD="#undef ISC_PLATFORM_HAVEXADD"
ISC_PLATFORM_HAVECMPXCHG="#undef ISC_PLATFORM_HAVECMPXCHG"
ISC_PLATFORM_HAVEATOMICSTORE="#undef ISC_PLATFORM_HAVEATOMICSTORE"
ISC_PLATFORM_HAVEXADDQ="#undef ISC_PLATFORM_HAVEXADDQ"
ISC_PLATFORM_HAVEATOMICSTOREQ="#undef ISC_PLATFORM_HAVEATOMICSTOREQ"
fi
......@@ -19874,6 +19878,7 @@ fi
ISC_ARCH_DIR=$arch
......
......@@ -3841,20 +3841,24 @@ if test "$have_atomic" = "yes"; then
ISC_PLATFORM_HAVEATOMICSTORE="#define ISC_PLATFORM_HAVEATOMICSTORE 1"
if test "$have_xaddq" = "yes"; then
ISC_PLATFORM_HAVEXADDQ="#define ISC_PLATFORM_HAVEXADDQ 1"
ISC_PLATFORM_HAVEATOMICSTOREQ="#define ISC_PLATFORM_HAVEATOMICSTOREQ 1"
else
ISC_PLATFORM_HAVEXADDQ="#undef ISC_PLATFORM_HAVEXADDQ"
ISC_PLATFORM_HAVEATOMICSTOREQ="#undef ISC_PLATFORM_HAVEATOMICSTOREQ"
fi
else
ISC_PLATFORM_HAVEXADD="#undef ISC_PLATFORM_HAVEXADD"
ISC_PLATFORM_HAVECMPXCHG="#undef ISC_PLATFORM_HAVECMPXCHG"
ISC_PLATFORM_HAVEATOMICSTORE="#undef ISC_PLATFORM_HAVEATOMICSTORE"
ISC_PLATFORM_HAVEXADDQ="#undef ISC_PLATFORM_HAVEXADDQ"
ISC_PLATFORM_HAVEATOMICSTOREQ="#undef ISC_PLATFORM_HAVEATOMICSTOREQ"
fi
AC_SUBST(ISC_PLATFORM_HAVEXADD)
AC_SUBST(ISC_PLATFORM_HAVEXADDQ)
AC_SUBST(ISC_PLATFORM_HAVECMPXCHG)
AC_SUBST(ISC_PLATFORM_HAVEATOMICSTORE)
AC_SUBST(ISC_PLATFORM_HAVEATOMICSTOREQ)
AC_SUBST(ISC_PLATFORM_USEGCCASM)
AC_SUBST(ISC_PLATFORM_USEOSFASM)
......
......@@ -288,11 +288,17 @@
@ISC_PLATFORM_HAVEXADDQ@
/*
* If the "atomic swap" operation is available on this architecture,
* ISC_PLATFORM_HAVEATOMICSTORE" will be defined.
* If the 32-bit "atomic swap" operation is available on this
* architecture, ISC_PLATFORM_HAVEATOMICSTORE" will be defined.
*/
@ISC_PLATFORM_HAVEATOMICSTORE@
/*
* If the 64-bit "atomic swap" operation is available on this
* architecture, ISC_PLATFORM_HAVEATOMICSTORE" will be defined.
*/
@ISC_PLATFORM_HAVEATOMICSTOREQ@
/*
* If the "compare-and-exchange" operation is available on this architecture,
* ISC_PLATFORM_HAVECMPXCHG will be defined.
......
......@@ -35,13 +35,45 @@
#define ISC_STATS_MAGIC ISC_MAGIC('S', 't', 'a', 't')
#define ISC_STATS_VALID(x) ISC_MAGIC_VALID(x, ISC_STATS_MAGIC)
#ifndef ISC_STATS_USEMULTIFIELDS
#if defined(ISC_RWLOCK_USEATOMIC) && defined(ISC_PLATFORM_HAVEXADD) && !defined(ISC_PLATFORM_HAVEXADDQ)
/*%
* Local macro confirming prescence of 64-bit
* increment and store operations, just to make
* the later macros simpler
*/
#if defined(ISC_PLATFORM_HAVEXADDQ) && defined(ISC_PLATFORM_HAVEATOMICSTOREQ)
#define ISC_STATS_HAVEATOMICQ 1
#else
#define ISC_STATS_HAVEATOMICQ 0
#endif
/*%
* Only lock the counters if 64-bit atomic operations are
* not available but cheap atomic lock operations are.
* On a modern 64-bit system this should never be the case.
*
* Normal locks are too expensive to be used whenever a counter
* is updated.
*/
#if !ISC_STATS_HAVEATOMICQ && defined(ISC_RWLOCK_HAVEATOMIC)
#define ISC_STATS_LOCKCOUNTERS 1
#else
#define ISC_STATS_LOCKCOUNTERS 0
#endif
/*%
* If 64-bit atomic operations are not available but
* 32-bit operations are then split the counter into two,
* using the atomic operations to try to ensure that any carry
* from the low word is correctly carried into the high word.
*
* Otherwise, just rely on standard 64-bit data types
* and operations
*/
#if !ISC_STATS_HAVEATOMICQ && defined(ISC_PLATFORM_HAVEXADD)
#define ISC_STATS_USEMULTIFIELDS 1
#else
#define ISC_STATS_USEMULTIFIELDS 0
#endif
#endif /* ISC_STATS_USEMULTIFIELDS */
#if ISC_STATS_USEMULTIFIELDS
typedef struct {
......@@ -65,7 +97,7 @@ struct isc_stats {
* Locked by counterlock or unlocked if efficient rwlock is not
* available.
*/
#ifdef ISC_RWLOCK_USEATOMIC
#if ISC_STATS_LOCKCOUNTERS
isc_rwlock_t counterlock;
#endif
isc_stat_t *counters;
......@@ -111,7 +143,7 @@ create_stats(isc_mem_t *mctx, int ncounters, isc_stats_t **statsp) {
goto clean_counters;
}
#ifdef ISC_RWLOCK_USEATOMIC
#if ISC_STATS_LOCKCOUNTERS
result = isc_rwlock_init(&stats->counterlock, 0, 0);
if (result != ISC_R_SUCCESS)
goto clean_copiedcounters;
......@@ -131,7 +163,7 @@ create_stats(isc_mem_t *mctx, int ncounters, isc_stats_t **statsp) {
clean_counters:
isc_mem_put(mctx, stats->counters, sizeof(isc_stat_t) * ncounters);
#ifdef ISC_RWLOCK_USEATOMIC
#if ISC_STATS_LOCKCOUNTERS
clean_copiedcounters:
isc_mem_put(mctx, stats->copiedcounters,
sizeof(isc_stat_t) * ncounters);
......@@ -177,7 +209,7 @@ isc_stats_detach(isc_stats_t **statsp) {
sizeof(isc_stat_t) * stats->ncounters);
UNLOCK(&stats->lock);
DESTROYLOCK(&stats->lock);
#ifdef ISC_RWLOCK_USEATOMIC
#if ISC_STATS_LOCKCOUNTERS
isc_rwlock_destroy(&stats->counterlock);
#endif
isc_mem_putanddetach(&stats->mctx, stats, sizeof(*stats));
......@@ -198,7 +230,7 @@ static inline void
incrementcounter(isc_stats_t *stats, int counter) {
isc_int32_t prev;
#ifdef ISC_RWLOCK_USEATOMIC
#if ISC_STATS_LOCKCOUNTERS
/*
* We use a "read" lock to prevent other threads from reading the
* counter while we "writing" a counter field. The write access itself
......@@ -219,7 +251,7 @@ incrementcounter(isc_stats_t *stats, int counter) {
*/
if (prev == (isc_int32_t)0xffffffff)
isc_atomic_xadd((isc_int32_t *)&stats->counters[counter].hi, 1);
#elif defined(ISC_PLATFORM_HAVEXADDQ)
#elif ISC_STATS_HAVEATOMICQ
UNUSED(prev);
isc_atomic_xaddq((isc_int64_t *)&stats->counters[counter], 1);
#else
......@@ -227,7 +259,7 @@ incrementcounter(isc_stats_t *stats, int counter) {
stats->counters[counter]++;
#endif
#ifdef ISC_RWLOCK_USEATOMIC
#if ISC_STATS_LOCKCOUNTERS
isc_rwlock_unlock(&stats->counterlock, isc_rwlocktype_read);
#endif
}
......@@ -236,7 +268,7 @@ static inline void
decrementcounter(isc_stats_t *stats, int counter) {
isc_int32_t prev;
#ifdef ISC_RWLOCK_USEATOMIC
#if ISC_STATS_LOCKCOUNTERS
isc_rwlock_lock(&stats->counterlock, isc_rwlocktype_read);
#endif
......@@ -245,7 +277,7 @@ decrementcounter(isc_stats_t *stats, int counter) {
if (prev == 0)
isc_atomic_xadd((isc_int32_t *)&stats->counters[counter].hi,
-1);
#elif defined(ISC_PLATFORM_HAVEXADDQ)
#elif ISC_STATS_HAVEATOMICQ
UNUSED(prev);
isc_atomic_xaddq((isc_int64_t *)&stats->counters[counter], -1);
#else
......@@ -253,7 +285,7 @@ decrementcounter(isc_stats_t *stats, int counter) {
stats->counters[counter]--;
#endif
#ifdef ISC_RWLOCK_USEATOMIC
#if ISC_STATS_LOCKCOUNTERS
isc_rwlock_unlock(&stats->counterlock, isc_rwlocktype_read);
#endif
}
......@@ -262,7 +294,7 @@ static void
copy_counters(isc_stats_t *stats) {
int i;
#ifdef ISC_RWLOCK_USEATOMIC
#if ISC_STATS_LOCKCOUNTERS
/*
* We use a "write" lock before "reading" the statistics counters as
* an exclusive lock.
......@@ -270,19 +302,21 @@ copy_counters(isc_stats_t *stats) {
isc_rwlock_lock(&stats->counterlock, isc_rwlocktype_write);
#endif
#if ISC_STATS_USEMULTIFIELDS
for (i = 0; i < stats->ncounters; i++) {
#if ISC_STATS_USEMULTIFIELDS
stats->copiedcounters[i] =
(isc_uint64_t)(stats->counters[i].hi) << 32 |
stats->counters[i].lo;
}
(isc_uint64_t)(stats->counters[i].hi) << 32 |
stats->counters[i].lo;
#elif ISC_STATS_HAVEATOMICQ
/* use xaddq(..., 0) as an atomic load */
stats->copiedcounters[i] =
(isc_uint64_t)isc_atomic_xaddq((isc_int64_t *)&stats->counters[i], 0);
#else
UNUSED(i);
memmove(stats->copiedcounters, stats->counters,
stats->ncounters * sizeof(isc_stat_t));
stats->copiedcounters[i] = stats->counters[i];
#endif
}
#ifdef ISC_RWLOCK_USEATOMIC
#if ISC_STATS_LOCKCOUNTERS
isc_rwlock_unlock(&stats->counterlock, isc_rwlocktype_write);
#endif
}
......@@ -335,7 +369,7 @@ isc_stats_set(isc_stats_t *stats, isc_uint64_t val,
REQUIRE(ISC_STATS_VALID(stats));
REQUIRE(counter < stats->ncounters);
#ifdef ISC_RWLOCK_USEATOMIC
#if ISC_STATS_LOCKCOUNTERS
/*
* We use a "write" lock before "reading" the statistics counters as
* an exclusive lock.
......@@ -346,11 +380,13 @@ isc_stats_set(isc_stats_t *stats, isc_uint64_t val,
#if ISC_STATS_USEMULTIFIELDS
stats->counters[counter].hi = (isc_uint32_t)((val >> 32) & 0xffffffff);
stats->counters[counter].lo = (isc_uint32_t)(val & 0xffffffff);
#elif ISC_STATS_HAVEATOMICQ
isc_atomic_storeq((isc_int64_t *)&stats->counters[counter], val);
#else
stats->counters[counter] = val;
#endif
#ifdef ISC_RWLOCK_USEATOMIC
#if ISC_STATS_LOCKCOUNTERS
isc_rwlock_unlock(&stats->counterlock, isc_rwlocktype_write);
#endif
}
......
......@@ -117,6 +117,12 @@ ATF_TC_BODY(snprintf, tc) {
n = isc_print_snprintf(buf, sizeof(buf), "%zo", size);
ATF_CHECK_EQ(n, 4);
ATF_CHECK_STREQ(buf, "1750");
zz = 0xf5f5f5f5f5f5f5f5;
memset(buf, 0xff, sizeof(buf));
n = isc_print_snprintf(buf, sizeof(buf), "0x%"ISC_PRINT_QUADFORMAT"x", zz);
ATF_CHECK_EQ(n, 18);
ATF_CHECK_STREQ(buf, "0xf5f5f5f5f5f5f5f5");
}
ATF_TC(fprintf);
......
/*
* Copyright (C) 2013 Internet Systems Consortium, Inc. ("ISC")
* Copyright (C) 2013, 2015 Internet Systems Consortium, Inc. ("ISC")
*
* Permission to use, copy, modify, and/or distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
......@@ -43,7 +43,7 @@ isc_atomic_xaddq(isc_int64_t *p, isc_int64_t val) {
#endif
/*
* This routine atomically stores the value 'val' in 'p'.
* This routine atomically stores the value 'val' in 'p' (32-bit version).
*/
#ifdef ISC_PLATFORM_HAVEATOMICSTORE
static __inline void
......@@ -52,6 +52,16 @@ isc_atomic_store(isc_int32_t *p, isc_int32_t val) {
}
#endif
/*
* This routine atomically stores the value 'val' in 'p' (64-bit version).
*/
#ifdef ISC_PLATFORM_HAVEATOMICSTOREQ
static __inline void
isc_atomic_storeq(isc_int64_t *p, isc_int64_t val) {
(void) _InterlockedExchange64((__int64 *)p, (__int64)val);
}
#endif
/*
* This routine atomically replaces the value in 'p' with 'val', if the
* original value is equal to 'cmpval'. The original value is returned in any
......
......@@ -62,7 +62,7 @@ isc_atomic_xaddq(isc_int64_t *p, isc_int64_t val) {
#endif /* ISC_PLATFORM_HAVEXADDQ */
/*
* This routine atomically stores the value 'val' in 'p'.
* This routine atomically stores the value 'val' in 'p' (32-bit version).
*/
static __inline__ void
isc_atomic_store(isc_int32_t *p, isc_int32_t val) {
......@@ -81,6 +81,28 @@ isc_atomic_store(isc_int32_t *p, isc_int32_t val) {
: "memory");
}
#ifdef ISC_PLATFORM_HAVEATOMICSTOREQ
/*
* This routine atomically stores the value 'val' in 'p' (64-bit version).
*/
static __inline__ void
isc_atomic_storeq(isc_int64_t *p, isc_int64_t val) {
__asm__ volatile(
#ifdef ISC_PLATFORM_USETHREADS
/*
* xchg should automatically lock memory, but we add it
* explicitly just in case (it at least doesn't harm)
*/
"lock;"
#endif
"xchgq %1, %0"
:
: "r"(val), "m"(*p)
: "memory");
}
#endif /* ISC_PLATFORM_HAVEATOMICSTOREQ */
/*
* This routine atomically replaces the value in 'p' with 'val', if the
* original value is equal to 'cmpval'. The original value is returned in any
......
......@@ -100,6 +100,9 @@ isc_atomic_cmpxchg(isc_int32_t *p, isc_int32_t cmpval, isc_int32_t val) {
UNUSED(val);
__asm (
/*
* p is %rdi, cmpval is %esi, val is %edx.
*/
"movl %edx, %ecx\n"
"movl %esi, %eax\n"
"movq %rdi, %rdx\n"
......@@ -108,8 +111,12 @@ isc_atomic_cmpxchg(isc_int32_t *p, isc_int32_t cmpval, isc_int32_t val) {
"lock;"
#endif
/*
* If (%rdi) == %eax then (%rdi) := %edx.
* %eax is set to old (%ecx), which will be the return value.
* If [%rdi] == %eax then [%rdi] := %ecx (equal to %edx
* from above), and %eax is untouched (equal to %esi)
* from above.
*
* Else if [%rdi] != %eax then [%rdi] := [%rdi]
* (rewritten in write cycle) and %eax := [%rdi].
*/
"cmpxchgl %ecx, (%rdx)"
);
......
......@@ -383,6 +383,7 @@ my @substdefh = ("AES_SIT",
my %configdefp;
my @substdefp = ("ISC_PLATFORM_HAVEATOMICSTORE",
"ISC_PLATFORM_HAVEATOMICSTOREQ",
"ISC_PLATFORM_HAVECMPXCHG",
"ISC_PLATFORM_HAVEXADD",
"ISC_PLATFORM_HAVEXADDQ",
......@@ -1305,6 +1306,7 @@ if ($enable_intrinsics eq "yes") {
$configdefp{"ISC_PLATFORM_HAVEXADD"} = 1;
if ($want_x64 eq "yes") {
$configdefp{"ISC_PLATFORM_HAVEXADDQ"} = 1;
$configdefp{"ISC_PLATFORM_HAVEATOMICSTOREQ"} = 1;
}
$configdefp{"ISC_PLATFORM_HAVEATOMICSTORE"} = 1;
$configdefp{"ISC_PLATFORM_HAVECMPXCHG"} = 1;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment