Commit 06bd8da9 authored by Ondřej Surý's avatar Ondřej Surý Committed by Ondřej Surý
Browse files

Use IP_RECVERR / IPV6_RECVERR to learn about ICMP destination unreachable

BIND 9 uses a connect()ed UDP socket when sending recursive queries.
The connect()ed socket provides feedback on a variety of ICMP
errors (e.g.  port unreachable) which BIND 9 can then use to decide what
to do with errors (report them to the client, try again with a different
nameserver etc).

However, Linux's implementation does not report what it considers
"transient" conditions, which is defined as Destination Host
Unreachable, Destination Network Unreachable, Source Route Failed and
Message Too Big.

BIND 9 now uses set setsockopt(fd, SOL_IP(V6), IP(V6)_RECVERR, &one,
sizeof(one)); and receive a EPOLL_ERR, and recvmsg return value so that
it notices that the send has failed immediately rather than waiting for
a timeout.
parent e4557f42
Pipeline #59053 failed with stages
in 23 minutes and 42 seconds
......@@ -1463,6 +1463,13 @@ isc__nm_socket_connectiontimeout(uv_os_sock_t fd, int timeout_ms);
* the minimum value must be at least 1000 (1 second).
*/
isc_result_t
isc__nm_socket_recverr(uv_os_sock_t fd, sa_family_t sa_family);
/*%<
* Set IP_RECVERR/IPV6_RECVERR on the socket to enable full ICMP error reporting
* on Linux, for faster failovers to working DNS servers.
*/
void
isc__nm_tls_initialize(void);
/*%<
......
......@@ -2313,6 +2313,35 @@ isc__nm_socket_connectiontimeout(uv_os_sock_t fd, int timeout_ms) {
#endif
}
isc_result_t
isc__nm_socket_recverr(uv_os_sock_t fd, sa_family_t sa_family) {
if (sa_family == AF_INET6) {
#if defined(IPV6_RECVERR)
if (setsockopt_on(fd, IPPROTO_IPV6, IPV6_RECVERR) == -1) {
return (ISC_R_FAILURE);
}
return (ISC_R_SUCCESS);
#else
UNUSED(fd);
UNUSED(sa_family);
return (ISC_R_NOTIMPLEMENTED);
#endif
} else if (sa_family == AF_INET) {
#if defined(IP_RECVERR)
if (setsockopt_on(fd, IPPROTO_IP, IP_RECVERR) == -1) {
return (ISC_R_FAILURE);
}
return (ISC_R_SUCCESS);
#else
UNUSED(fd);
UNUSED(sa_family);
return (ISC_R_NOTIMPLEMENTED);
#endif
}
return (ISC_R_FAMILYNOSUPPORT);
}
#ifdef NETMGR_TRACE
/*
* Dump all active sockets in netmgr. We output to stderr
......
......@@ -336,7 +336,10 @@ isc_nm_tcpconnect(isc_nm_t *mgr, isc_nmiface_t *local, isc_nmiface_t *peer,
sock->fd = fd;
atomic_init(&sock->client, true);
result = isc__nm_socket_connectiontimeout(fd, timeout);
(void)isc__nm_socket_incoming_cpu(sock->fd);
(void)isc__nm_socket_recverr(sock->fd, sa_family);
result = isc__nm_socket_connectiontimeout(sock->fd, timeout);
RUNTIME_CHECK(result == ISC_R_SUCCESS);
req = isc__nm_uvreq_get(mgr, sock);
......@@ -383,6 +386,7 @@ isc__nm_tcp_lb_socket(sa_family_t sa_family) {
RUNTIME_CHECK(result == ISC_R_SUCCESS);
(void)isc__nm_socket_incoming_cpu(sock);
(void)isc__nm_socket_recverr(sock, sa_family);
/* FIXME: set mss */
......
......@@ -379,7 +379,10 @@ isc_nm_tcpdnsconnect(isc_nm_t *mgr, isc_nmiface_t *local, isc_nmiface_t *peer,
sock->fd = fd;
atomic_init(&sock->client, true);
result = isc__nm_socket_connectiontimeout(fd, timeout);
(void)isc__nm_socket_incoming_cpu(sock->fd);
(void)isc__nm_socket_recverr(sock->fd, sa_family);
result = isc__nm_socket_connectiontimeout(sock->fd, timeout);
RUNTIME_CHECK(result == ISC_R_SUCCESS);
req = isc__nm_uvreq_get(mgr, sock);
......
......@@ -87,6 +87,7 @@ isc__nm_udp_lb_socket(sa_family_t sa_family) {
(void)isc__nm_socket_incoming_cpu(sock);
(void)isc__nm_socket_dontfrag(sock, sa_family);
(void)isc__nm_socket_recverr(sock, sa_family);
result = isc__nm_socket_reuse(sock);
RUNTIME_CHECK(result == ISC_R_SUCCESS);
......@@ -802,8 +803,8 @@ isc_nm_udpconnect(isc_nm_t *mgr, isc_nmiface_t *local, isc_nmiface_t *peer,
result == ISC_R_NOTIMPLEMENTED);
(void)isc__nm_socket_incoming_cpu(sock->fd);
(void)isc__nm_socket_dontfrag(sock->fd, sa_family);
(void)isc__nm_socket_recverr(sock->fd, sa_family);
req = isc__nm_uvreq_get(mgr, sock);
req->cb.connect = cb;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment