Commit d51f09a8 authored by Ondřej Surý's avatar Ondřej Surý

Merge branch 'ondrej/refactor-setsockopt' into 'main'

Refactor the setsockopt() code in network manager into helper functions

See merge request isc-projects/bind9!4227
parents 1567524a fd975a55
......@@ -834,7 +834,31 @@ isc__nm_decstats(isc_nm_t *mgr, isc_statscounter_t counterid);
*/
isc_result_t
isc__nm_socket_freebind(const uv_handle_t *handle);
isc__nm_socket_freebind(uv_os_sock_t fd, sa_family_t sa_family);
/*%<
* Set the IP_FREEBIND (or equivalent) socket option on the uv_handle
*/
isc_result_t
isc__nm_socket_reuse(uv_os_sock_t fd);
/*%<
* Set the SO_REUSEADDR or SO_REUSEPORT (or equivalent) socket option on the fd
*/
isc_result_t
isc__nm_socket_reuse_lb(uv_os_sock_t fd);
/*%<
* Set the SO_REUSEPORT_LB (or equivalent) socket option on the fd
*/
isc_result_t
isc__nm_socket_incoming_cpu(uv_os_sock_t fd);
/*%<
* Set the SO_INCOMING_CPU socket option on the fd if available
*/
isc_result_t
isc__nm_socket_dontfrag(uv_os_sock_t fd, sa_family_t sa_family);
/*%<
* Set the SO_IP_DONTFRAG (or equivalent) socket option of the fd if available
*/
......@@ -1584,51 +1584,169 @@ isc__nm_decstats(isc_nm_t *mgr, isc_statscounter_t counterid) {
setsockopt(socket, level, name, &(int){ 1 }, sizeof(int))
isc_result_t
isc__nm_socket_freebind(const uv_handle_t *handle) {
isc__nm_socket_freebind(uv_os_sock_t fd, sa_family_t sa_family) {
/*
* Set the IP_FREEBIND (or equivalent option) on the uv_handle.
*/
isc_result_t result = ISC_R_SUCCESS;
uv_os_fd_t fd;
if (uv_fileno(handle, &fd) != 0) {
return (ISC_R_FAILURE);
}
#ifdef IP_FREEBIND
UNUSED(sa_family);
if (setsockopt_on(fd, IPPROTO_IP, IP_FREEBIND) == -1) {
return (ISC_R_FAILURE);
}
return (ISC_R_SUCCESS);
#elif defined(IP_BINDANY) || defined(IPV6_BINDANY)
struct sockaddr_in sockfd;
if (getsockname(fd, (struct sockaddr *)&sockfd,
&(socklen_t){ sizeof(sockfd) }) == -1)
{
return (ISC_R_FAILURE);
}
if (sa_family == AF_INET) {
#if defined(IP_BINDANY)
if (sockfd.sin_family == AF_INET) {
if (setsockopt_on(fd, IPPROTO_IP, IP_BINDANY) == -1) {
return (ISC_R_FAILURE);
}
}
return (ISC_R_SUCCESS);
#endif
} else if (sa_family == AF_INET6) {
#if defined(IPV6_BINDANY)
if (sockfd.sin_family == AF_INET6) {
if (setsockopt_on(fd, IPPROTO_IPV6, IPV6_BINDANY) == -1) {
return (ISC_R_FAILURE);
}
}
return (ISC_R_SUCCESS);
#endif
}
return (ISC_R_NOTIMPLEMENTED);
#elif defined(SO_BINDANY)
UNUSED(sa_family);
if (setsockopt_on(fd, SOL_SOCKET, SO_BINDANY) == -1) {
return (ISC_R_FAILURE);
}
return (ISC_R_SUCCESS);
#else
UNUSED(fd);
UNUSED(sa_family);
return (ISC_R_NOTIMPLEMENTED);
#endif
}
isc_result_t
isc__nm_socket_reuse(uv_os_sock_t fd) {
/*
* Generally, the SO_REUSEADDR socket option allows reuse of
* local addresses.
*
* On the BSDs, SO_REUSEPORT implies SO_REUSEADDR but with some
* additional refinements for programs that use multicast.
*
* On Linux, SO_REUSEPORT has different semantics: it _shares_ the port
* rather than steal it from the current listener, so we don't use it
* here, but rather in isc__nm_socket_reuse_lb().
*
* On Windows, it also allows a socket to forcibly bind to a port in use
* by another socket.
*/
#if defined(SO_REUSEPORT) && !defined(__linux__)
if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT) == -1) {
return (ISC_R_FAILURE);
}
return (ISC_R_SUCCESS);
#elif defined(SO_REUSEADDR)
if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEADDR) == -1) {
return (ISC_R_FAILURE);
}
return (ISC_R_SUCCESS);
#else
UNUSED(handle);
UNUSED(fd);
result = ISC_R_NOTIMPLEMENTED;
return (ISC_R_NOTIMPLEMENTED);
#endif
return (result);
}
isc_result_t
isc__nm_socket_reuse_lb(uv_os_sock_t fd) {
/*
* On FreeBSD 12+, SO_REUSEPORT_LB socket option allows sockets to be
* bound to an identical socket address. For UDP sockets, the use of
* this option can provide better distribution of incoming datagrams to
* multiple processes (or threads) as compared to the traditional
* technique of having multiple processes compete to receive datagrams
* on the same socket.
*
* On Linux, the same thing is achieved simply with SO_REUSEPORT.
*/
#if defined(SO_REUSEPORT_LB)
if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT_LB) == -1) {
return (ISC_R_FAILURE);
} else {
return (ISC_R_SUCCESS);
}
#elif defined(SO_REUSEPORT) && defined(__linux__)
if (setsockopt_on(fd, SOL_SOCKET, SO_REUSEPORT) == -1) {
return (ISC_R_FAILURE);
} else {
return (ISC_R_SUCCESS);
}
#else
UNUSED(fd);
return (ISC_R_NOTIMPLEMENTED);
#endif
}
isc_result_t
isc__nm_socket_incoming_cpu(uv_os_sock_t fd) {
#ifdef SO_INCOMING_CPU
if (setsockopt_on(fd, SOL_SOCKET, SO_INCOMING_CPU) == -1) {
return (ISC_R_FAILURE);
} else {
return (ISC_R_SUCCESS);
}
#else
UNUSED(fd);
#endif
return (ISC_R_NOTIMPLEMENTED);
}
isc_result_t
isc__nm_socket_dontfrag(uv_os_sock_t fd, sa_family_t sa_family) {
/*
* Set the Don't Fragment flag on IP packets
*/
if (sa_family == AF_INET6) {
#if defined(IPV6_DONTFRAG)
if (setsockopt_on(fd, IPPROTO_IPV6, IPV6_DONTFRAG) == -1) {
return (ISC_R_FAILURE);
} else {
return (ISC_R_SUCCESS);
}
#elif defined(IPV6_MTU_DISCOVER)
if (setsockopt(fd, IPPROTO_IPV6, IPV6_MTU_DISCOVER,
&(int){ IP_PMTUDISC_DO }, sizeof(int)) == -1)
{
return (ISC_R_FAILURE);
} else {
return (ISC_R_SUCCESS);
}
#else
UNUSED(fd);
#endif
} else if (sa_family == AF_INET) {
#if defined(IP_DONTFRAG)
if (setsockopt_on(fd, IPPROTO_IP, IP_DONTFRAG) == -1) {
return (ISC_R_FAILURE);
} else {
return (ISC_R_SUCCESS);
}
#elif defined(IP_MTU_DISCOVER)
if (setsockopt(fd, IPPROTO_IP, IP_MTU_DISCOVER,
&(int){ IP_PMTUDISC_DO }, sizeof(int)) == -1)
{
return (ISC_R_FAILURE);
} else {
return (ISC_R_SUCCESS);
}
#else
UNUSED(fd);
#endif
} else {
return (ISC_R_FAMILYNOSUPPORT);
}
return (ISC_R_NOTIMPLEMENTED);
}
#ifdef NETMGR_TRACE
......
......@@ -318,6 +318,8 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) {
isc_nmsocket_t *sock = ievent->sock;
struct sockaddr_storage sname;
int r, flags = 0, snamelen = sizeof(sname);
sa_family_t sa_family;
uv_os_sock_t fd;
REQUIRE(isc__nm_in_netthread());
REQUIRE(sock->type == isc_nm_tcplistener);
......@@ -334,14 +336,16 @@ isc__nm_async_tcplisten(isc__networker_t *worker, isc__netievent_t *ev0) {
isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPEN]);
if (sock->iface->addr.type.sa.sa_family == AF_INET6) {
sa_family = sock->iface->addr.type.sa.sa_family;
if (sa_family == AF_INET6) {
flags = UV_TCP_IPV6ONLY;
}
r = uv_tcp_bind(&sock->uv_handle.tcp, &sock->iface->addr.type.sa,
flags);
if (r == UV_EADDRNOTAVAIL &&
isc__nm_socket_freebind(&sock->uv_handle.handle) == ISC_R_SUCCESS)
uv_fileno(&sock->uv_handle.handle, (uv_os_fd_t *)&fd) == 0 &&
isc__nm_socket_freebind(fd, sa_family) == ISC_R_SUCCESS)
{
/*
* Retry binding with IP_FREEBIND (or equivalent option) if the
......
......@@ -65,8 +65,8 @@ isc_nm_listenudp(isc_nm_t *mgr, isc_nmiface_t *iface, isc_nm_recv_cb_t cb,
nsock->extrahandlesize = extrahandlesize;
for (size_t i = 0; i < mgr->nworkers; i++) {
isc_result_t result;
uint16_t family = iface->addr.type.sa.sa_family;
int res = 0;
isc__netievent_udplisten_t *ievent = NULL;
isc_nmsocket_t *csock = &nsock->children[i];
......@@ -82,46 +82,20 @@ isc_nm_listenudp(isc_nm_t *mgr, isc_nmiface_t *iface, isc_nm_recv_cb_t cb,
csock->fd = socket(family, SOCK_DGRAM, 0);
RUNTIME_CHECK(csock->fd >= 0);
/*
* This is SO_REUSE**** hell:
*
* Generally, the SO_REUSEADDR socket option allows reuse of
* local addresses. On Windows, it also allows a socket to
* forcibly bind to a port in use by another socket.
*
* On Linux, SO_REUSEPORT socket option allows sockets to be
* bound to an identical socket address. For UDP sockets, the
* use of this option can provide better distribution of
* incoming datagrams to multiple processes (or threads) as
* compared to the traditional technique of having multiple
* processes compete to receive datagrams on the same socket.
*
* On FreeBSD, the same thing is achieved with SO_REUSEPORT_LB.
*
*/
#if defined(SO_REUSEADDR)
res = setsockopt(csock->fd, SOL_SOCKET, SO_REUSEADDR,
&(int){ 1 }, sizeof(int));
RUNTIME_CHECK(res == 0);
#endif
#if defined(SO_REUSEPORT_LB)
res = setsockopt(csock->fd, SOL_SOCKET, SO_REUSEPORT_LB,
&(int){ 1 }, sizeof(int));
RUNTIME_CHECK(res == 0);
#elif defined(SO_REUSEPORT)
res = setsockopt(csock->fd, SOL_SOCKET, SO_REUSEPORT,
&(int){ 1 }, sizeof(int));
RUNTIME_CHECK(res == 0);
#endif
result = isc__nm_socket_reuse(csock->fd);
RUNTIME_CHECK(result == ISC_R_SUCCESS ||
result == ISC_R_NOTIMPLEMENTED);
result = isc__nm_socket_reuse_lb(csock->fd);
RUNTIME_CHECK(result == ISC_R_SUCCESS ||
result == ISC_R_NOTIMPLEMENTED);
#ifdef SO_INCOMING_CPU
/* We don't check for the result, because SO_INCOMING_CPU can be
* available without the setter on Linux kernel version 4.4, and
* setting SO_INCOMING_CPU is just an optimization.
*/
(void)setsockopt(csock->fd, SOL_SOCKET, SO_INCOMING_CPU,
&(int){ 1 }, sizeof(int));
#endif
(void)isc__nm_socket_incoming_cpu(csock->fd);
ievent = isc__nm_get_ievent(mgr, netievent_udplisten);
ievent->sock = csock;
isc__nm_enqueue_ievent(&mgr->workers[i],
......@@ -167,6 +141,7 @@ isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) {
isc_nmsocket_t *sock = ievent->sock;
int r, uv_bind_flags = 0;
int uv_init_flags = 0;
sa_family_t sa_family;
REQUIRE(sock->type == isc_nm_udpsocket);
REQUIRE(sock->iface != NULL);
......@@ -188,14 +163,15 @@ isc__nm_async_udplisten(isc__networker_t *worker, isc__netievent_t *ev0) {
isc__nm_incstats(sock->mgr, sock->statsindex[STATID_OPENFAIL]);
}
if (sock->iface->addr.type.sa.sa_family == AF_INET6) {
sa_family = sock->iface->addr.type.sa.sa_family;
if (sa_family == AF_INET6) {
uv_bind_flags |= UV_UDP_IPV6ONLY;
}
r = uv_udp_bind(&sock->uv_handle.udp,
&sock->parent->iface->addr.type.sa, uv_bind_flags);
if (r == UV_EADDRNOTAVAIL &&
isc__nm_socket_freebind(&sock->uv_handle.handle) == ISC_R_SUCCESS)
isc__nm_socket_freebind(sock->fd, sa_family) == ISC_R_SUCCESS)
{
/*
* Retry binding with IP_FREEBIND (or equivalent option) if the
......
......@@ -56,6 +56,8 @@ typedef uint32_t socklen_t;
#undef MSG_TRUNC
typedef uint16_t sa_family_t;
/*
* Set up a macro for importing and exporting from the DLL
*/
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment