socket.c 91.5 KB
Newer Older
Danny Mayer's avatar
Danny Mayer committed
1
/*
Automatic Updater's avatar
Automatic Updater committed
2
 * Copyright (C) 2004-2008  Internet Systems Consortium, Inc. ("ISC")
Mark Andrews's avatar
Mark Andrews committed
3
 * Copyright (C) 2000-2003  Internet Software Consortium.
Danny Mayer's avatar
Danny Mayer committed
4
 *
Automatic Updater's avatar
Automatic Updater committed
5
 * Permission to use, copy, modify, and/or distribute this software for any
Danny Mayer's avatar
Danny Mayer committed
6 7 8
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 *
Mark Andrews's avatar
Mark Andrews committed
9 10 11 12 13 14 15
 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
 * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
 * PERFORMANCE OF THIS SOFTWARE.
Danny Mayer's avatar
Danny Mayer committed
16 17
 */

18
/* $Id: socket.c,v 1.70 2008/09/16 17:19:01 explorer Exp $ */
19

20 21
/* This code uses functions which are only available on Server 2003 and
 * higher, and Windows XP and higher.
22 23 24
 *
 * This code is by nature multithreaded and takes advantage of various
 * features to pass on information through the completion port for
25 26
 * when I/O is completed.  All sends, receives, accepts, and connects are
 * completed through the completion port.
27
 *
28 29 30
 * The number of Completion Port Worker threads used is the total number
 * of CPU's + 1. This increases the likelihood that a Worker Thread is
 * available for processing a completed request.
31 32 33
 *
 * XXXPDM 5 August, 2002
 */
Danny Mayer's avatar
Danny Mayer committed
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50

#define MAKE_EXTERNAL 1
#include <config.h>

#include <sys/types.h>

#ifndef _WINSOCKAPI_
#define _WINSOCKAPI_   /* Prevent inclusion of winsock.h in windows.h */
#endif

#include <errno.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <io.h>
#include <fcntl.h>
51
#include <process.h>
Danny Mayer's avatar
Danny Mayer committed
52 53 54 55 56 57 58 59 60 61

#include <isc/buffer.h>
#include <isc/bufferlist.h>
#include <isc/condition.h>
#include <isc/list.h>
#include <isc/log.h>
#include <isc/mem.h>
#include <isc/msgs.h>
#include <isc/mutex.h>
#include <isc/net.h>
62
#include <isc/once.h>
63
#include <isc/os.h>
Danny Mayer's avatar
Danny Mayer committed
64 65 66 67
#include <isc/platform.h>
#include <isc/print.h>
#include <isc/region.h>
#include <isc/socket.h>
68 69
#include <isc/strerror.h>
#include <isc/syslog.h>
Danny Mayer's avatar
Danny Mayer committed
70 71 72
#include <isc/task.h>
#include <isc/thread.h>
#include <isc/util.h>
73
#include <isc/win32os.h>
Danny Mayer's avatar
Danny Mayer committed
74

75 76
#include <mswsock.h>

77
#include "errno2result.h"
78

79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98
/*
 * How in the world can Microsoft exist with APIs like this?
 * We can't actually call this directly, because it turns out
 * no library exports this function.  Instead, we need to
 * issue a runtime call to get the address.
 */
LPFN_CONNECTEX ISCConnectEx;
LPFN_ACCEPTEX ISCAcceptEx;
LPFN_GETACCEPTEXSOCKADDRS ISCGetAcceptExSockaddrs;

/*
 * Run expensive internal consistancy checks.
 */
#ifdef ISC_SOCKET_CONSISTENCY_CHECKS
#define CONSISTENT(sock) consistent(sock)
#else
#define CONSISTENT(sock) do {} while (0)
#endif
static void consistent(isc_socket_t *sock);

99 100 101 102 103 104 105
/*
 * Define this macro to control the behavior of connection
 * resets on UDP sockets.  See Microsoft KnowledgeBase Article Q263823
 * for details.
 * NOTE: This requires that Windows 2000 systems install Service Pack 2
 * or later.
 */
106 107
#ifndef SIO_UDP_CONNRESET
#define SIO_UDP_CONNRESET _WSAIOW(IOC_VENDOR,12)
108 109
#endif

Danny Mayer's avatar
Danny Mayer committed
110 111 112 113 114 115 116 117 118 119 120 121
/*
 * Some systems define the socket length argument as an int, some as size_t,
 * some as socklen_t.  This is here so it can be easily changed if needed.
 */
#ifndef ISC_SOCKADDR_LEN_T
#define ISC_SOCKADDR_LEN_T unsigned int
#endif

/*
 * Define what the possible "soft" errors can be.  These are non-fatal returns
 * of various network related functions, like recv() and so on.
 */
122
#define SOFT_ERROR(e)	((e) == WSAEINTR || \
123
			 (e) == WSAEWOULDBLOCK || \
124 125 126
			 (e) == EWOULDBLOCK || \
			 (e) == EINTR || \
			 (e) == EAGAIN || \
Danny Mayer's avatar
Danny Mayer committed
127 128
			 (e) == 0)

129 130 131 132 133 134 135
/*
 * Pending errors are not really errors and should be
 * kept separate
 */
#define PENDING_ERROR(e) ((e) == WSA_IO_PENDING || (e) == 0)

#define DOIO_SUCCESS	  0       /* i/o ok, event sent */
136 137 138
#define DOIO_SOFT	  1       /* i/o ok, soft error, no event sent */
#define DOIO_HARD	  2       /* i/o error, event sent */
#define DOIO_EOF	  3       /* EOF, no event sent */
139
#define DOIO_PENDING	  4       /* status when i/o is in process */
140
#define DOIO_NEEDMORE	  5       /* IO was processed, but we need more due to minimum */
141

Danny Mayer's avatar
Danny Mayer committed
142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
#define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x)

/*
 * DLVL(90)  --  Function entry/exit and other tracing.
 * DLVL(70)  --  Socket "correctness" -- including returning of events, etc.
 * DLVL(60)  --  Socket data send/receive
 * DLVL(50)  --  Event tracing, including receiving/sending completion events.
 * DLVL(20)  --  Socket creation/destruction.
 */
#define TRACE_LEVEL		90
#define CORRECTNESS_LEVEL	70
#define IOEVENT_LEVEL		60
#define EVENT_LEVEL		50
#define CREATION_LEVEL		20

#define TRACE		DLVL(TRACE_LEVEL)
#define CORRECTNESS	DLVL(CORRECTNESS_LEVEL)
#define IOEVENT		DLVL(IOEVENT_LEVEL)
#define EVENT		DLVL(EVENT_LEVEL)
#define CREATION	DLVL(CREATION_LEVEL)

typedef isc_event_t intev_t;

165 166 167 168 169 170 171 172 173 174 175 176 177
/*
 * Socket State
 */
enum {
  SOCK_INITIALIZED,	/* Socket Initialized */
  SOCK_OPEN,		/* Socket opened but nothing yet to do */
  SOCK_DATA,		/* Socket sending or receiving data */
  SOCK_LISTEN,		/* TCP Socket listening for connects */
  SOCK_ACCEPT,		/* TCP socket is waiting to accept */
  SOCK_CONNECT,		/* TCP Socket connecting */
  SOCK_CLOSED,		/* Socket has been closed */
};

178 179
#define SOCKET_MAGIC		ISC_MAGIC('I', 'O', 'i', 'o')
#define VALID_SOCKET(t)		ISC_MAGIC_VALID(t, SOCKET_MAGIC)
Danny Mayer's avatar
Danny Mayer committed
180 181 182 183 184 185 186 187 188 189 190 191 192 193

/*
 * IPv6 control information.  If the socket is an IPv6 socket we want
 * to collect the destination address and interface so the client can
 * set them on outgoing packets.
 */
#ifdef ISC_PLATFORM_HAVEIPV6
#ifndef USE_CMSG
#define USE_CMSG	1
#endif
#endif

/*
 * We really  don't want to try and use these control messages. Win32
194
 * doesn't have this mechanism before XP.
Danny Mayer's avatar
Danny Mayer committed
195 196 197 198 199 200 201 202
 */
#undef USE_CMSG

/*
 * Message header for recvmsg and sendmsg calls.
 * Used value-result for recvmsg, value only for sendmsg.
 */
struct msghdr {
203 204
	SOCKADDR_STORAGE to_addr;	/* UDP send/recv address */
	int      to_addr_len;		/* length of the address */
Automatic Updater's avatar
Automatic Updater committed
205 206 207 208
	WSABUF  *msg_iov;		/* scatter/gather array */
	u_int   msg_iovlen;             /* # elements in msg_iov */
	void	*msg_control;           /* ancillary data, see below */
	u_int   msg_controllen;         /* ancillary data buffer len */
209
	int	msg_totallen;		/* total length of this message */
Danny Mayer's avatar
Danny Mayer committed
210
} msghdr;
Automatic Updater's avatar
Automatic Updater committed
211

212 213
/*
 * The size to raise the receive buffer to.
214 215 216
 */
#define RCVBUFSIZE (32*1024)

217
/*
218 219
 * The number of times a send operation is repeated if the result
 * is WSAEINTR.
220 221 222
 */
#define NRETRIES 10

Danny Mayer's avatar
Danny Mayer committed
223 224 225 226 227 228
struct isc_socket {
	/* Not locked. */
	unsigned int		magic;
	isc_socketmgr_t	       *manager;
	isc_mutex_t		lock;
	isc_sockettype_t	type;
229

230 231
	/* Pointers to scatter/gather buffers */
	WSABUF			iov[ISC_SOCKET_MAXSCATTERGATHER];
232

Danny Mayer's avatar
Danny Mayer committed
233 234
	/* Locked by socket lock. */
	ISC_LINK(isc_socket_t)	link;
235 236 237
	unsigned int		references; /* EXTERNAL references */
	SOCKET			fd;	/* file handle */
	int			pf;	/* protocol family */
238 239 240
	char			name[16];
	void *			tag;

241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256
	/*
	 * Each recv() call uses this buffer.  It is a per-socket receive
	 * buffer that allows us to decouple the system recv() from the
	 * recv_list done events.  This means the items on the recv_list
	 * can be removed without having to cancel pending system recv()
	 * calls.  It also allows us to read-ahead in some cases.
	 */
	struct {
		SOCKADDR_STORAGE	from_addr;	   // UDP send/recv address
		int		from_addr_len;	   // length of the address
		char		*base;		   // the base of the buffer
		char		*consume_position; // where to start copying data from next
		unsigned int	len;		   // the actual size of this buffer
		unsigned int	remaining;	   // the number of bytes remaining
	} recvbuf;

Danny Mayer's avatar
Danny Mayer committed
257 258 259 260 261 262 263
	ISC_LIST(isc_socketevent_t)		send_list;
	ISC_LIST(isc_socketevent_t)		recv_list;
	ISC_LIST(isc_socket_newconnev_t)	accept_list;
	isc_socket_connev_t		       *connect_ev;

	isc_sockaddr_t		address;  /* remote address */

264
	unsigned int		listener : 1,	/* listener socket */
Danny Mayer's avatar
Danny Mayer committed
265
				connected : 1,
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
				pending_connect : 1, /* connect pending */
				bound : 1;	/* bound to local addr */
	unsigned int		pending_iocp;	/* Should equal the counters below. Debug. */
	unsigned int		pending_recv;  /* Number of outstanding recv() calls. */
	unsigned int		pending_send;  /* Number of outstanding send() calls. */
	unsigned int		pending_accept; /* Number of outstanding accept() calls. */
	unsigned int		state; /* Socket state. Debugging and consistency checking. */
	int			state_lineno;  /* line which last touched state */
};

#define _set_state(sock, _state) do { (sock)->state = (_state); (sock)->state_lineno = __LINE__; } while (0)

/*
 * Buffer structure
 */
typedef struct buflist buflist_t;

struct buflist {
	void			*buf;
	unsigned int		buflen;
	ISC_LINK(buflist_t)	link;
Danny Mayer's avatar
Danny Mayer committed
287 288
};

289 290 291 292 293 294
/*
 * I/O Completion ports Info structures
 */

static HANDLE hHeapHandle = NULL;
typedef struct IoCompletionInfo {
295 296 297 298 299 300 301 302 303
	OVERLAPPED		overlapped;
	isc_socketevent_t	*dev;  /* send()/recv() done event */
	isc_socket_connev_t	*cdev; /* connect() done event */
	isc_socket_newconnev_t	*adev; /* accept() done event */
	void			*acceptbuffer;
	DWORD			received_bytes;
	int			request_type;
	struct msghdr		messagehdr;
	ISC_LIST(buflist_t)	bufferlist;	/*%< list of buffers */
304 305 306 307
} IoCompletionInfo;

/*
 * Define a maximum number of I/O Completion Port worker threads
308 309
 * to handle the load on the Completion Port. The actual number
 * used is the number of CPU's + 1.
310 311 312
 */
#define MAX_IOCPTHREADS 20

313 314 315
#define SOCKET_MANAGER_MAGIC	ISC_MAGIC('I', 'O', 'm', 'g')
#define VALID_MANAGER(m)	ISC_MAGIC_VALID(m, SOCKET_MANAGER_MAGIC)

Danny Mayer's avatar
Danny Mayer committed
316 317
struct isc_socketmgr {
	/* Not locked. */
318 319 320
	unsigned int			magic;
	isc_mem_t		       *mctx;
	isc_mutex_t			lock;
Danny Mayer's avatar
Danny Mayer committed
321
	/* Locked by manager lock. */
322 323 324 325 326 327 328
	ISC_LIST(isc_socket_t)		socklist;
	isc_boolean_t			bShutdown;
	isc_condition_t			shutdown_ok;
	HANDLE				hIoCompletionPort;
	int				maxIOCPThreads;
	HANDLE				hIOCPThreads[MAX_IOCPTHREADS];
	DWORD				dwIOCPThreadIds[MAX_IOCPTHREADS];
329 330 331 332 333 334 335 336 337 338 339 340 341 342

	/*
	 * Debugging.
	 * Modified by InterlockedIncrement() and InterlockedDecrement()
	 */
	LONG				totalSockets;
	LONG				iocp_total;
};

enum {
	SOCKET_RECV,
	SOCKET_SEND,
	SOCKET_ACCEPT,
	SOCKET_CONNECT
Danny Mayer's avatar
Danny Mayer committed
343 344 345 346 347 348
};

/*
 * send() and recv() iovec counts
 */
#define MAXSCATTERGATHER_SEND	(ISC_SOCKET_MAXSCATTERGATHER)
349
#define MAXSCATTERGATHER_RECV	(ISC_SOCKET_MAXSCATTERGATHER)
Danny Mayer's avatar
Danny Mayer committed
350

351
static isc_threadresult_t WINAPI SocketIoThread(LPVOID ThreadContext);
352 353 354 355 356 357 358 359 360 361 362 363
static void maybe_free_socket(isc_socket_t **, int);
static void free_socket(isc_socket_t **, int);
static isc_boolean_t senddone_is_active(isc_socket_t *sock, isc_socketevent_t *dev);
static isc_boolean_t acceptdone_is_active(isc_socket_t *sock, isc_socket_newconnev_t *dev);
static isc_boolean_t connectdone_is_active(isc_socket_t *sock, isc_socket_connev_t *dev);
static void send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev);
static void send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev);
static void send_acceptdone_event(isc_socket_t *sock, isc_socket_newconnev_t **adev);
static void send_connectdone_event(isc_socket_t *sock, isc_socket_connev_t **cdev);
static void send_recvdone_abort(isc_socket_t *sock, isc_result_t result);
static void queue_receive_event(isc_socket_t *sock, isc_task_t *task, isc_socketevent_t *dev);
static void queue_receive_request(isc_socket_t *sock);
364 365

/*
366
 * This is used to dump the contents of the sock structure
367 368 369 370 371 372 373 374
 * You should make sure that the sock is locked before
 * dumping it. Since the code uses simple printf() statements
 * it should only be used interactively.
 */
void
sock_dump(isc_socket_t *sock) {
	isc_socketevent_t *ldev;
	isc_socket_newconnev_t *ndev;
375 376

#if 0
377 378 379 380 381 382 383 384 385
	isc_sockaddr_t addr;
	char socktext[256];

	isc_socket_getpeername(sock, &addr);
	isc_sockaddr_format(&addr, socktext, sizeof(socktext));
	printf("Remote Socket: %s\n", socktext);
	isc_socket_getsockname(sock, &addr);
	isc_sockaddr_format(&addr, socktext, sizeof(socktext));
	printf("This Socket: %s\n", socktext);
386
#endif
387 388 389 390 391

	printf("\n\t\tSock Dump\n");
	printf("\t\tfd: %u\n", sock->fd);
	printf("\t\treferences: %d\n", sock->references);
	printf("\t\tpending_accept: %d\n", sock->pending_accept);
392
	printf("\t\tconnecting: %d\n", sock->pending_connect);
393 394
	printf("\t\tconnected: %d\n", sock->connected);
	printf("\t\tbound: %d\n", sock->bound);
395
	printf("\t\tpending_iocp: %d\n", sock->pending_iocp);
396 397 398 399 400 401 402 403
	printf("\t\tsocket type: %d\n", sock->type);

	printf("\n\t\tSock Recv List\n");
	ldev = ISC_LIST_HEAD(sock->recv_list);
	while (ldev != NULL) {
		printf("\t\tdev: %p\n", ldev);
		ldev = ISC_LIST_NEXT(ldev, ev_link);
	}
404

405 406 407 408 409 410
	printf("\n\t\tSock Send List\n");
	ldev = ISC_LIST_HEAD(sock->send_list);
	while (ldev != NULL) {
		printf("\t\tdev: %p\n", ldev);
		ldev = ISC_LIST_NEXT(ldev, ev_link);
	}
411

412 413 414 415 416 417 418
	printf("\n\t\tSock Accept List\n");
	ndev = ISC_LIST_HEAD(sock->accept_list);
	while (ndev != NULL) {
		printf("\t\tdev: %p\n", ldev);
		ndev = ISC_LIST_NEXT(ndev, ev_link);
	}
}
419 420 421 422 423 424

static void
socket_log(int lineno, isc_socket_t *sock, isc_sockaddr_t *address,
	   isc_logcategory_t *category, isc_logmodule_t *module, int level,
	   isc_msgcat_t *msgcat, int msgset, int message,
	   const char *fmt, ...) ISC_FORMAT_PRINTF(9, 10);
425 426 427 428 429 430 431 432 433 434

/*  This function will add an entry to the I/O completion port
 *  that will signal the I/O thread to exit (gracefully)
 */
static void
signal_iocompletionport_exit(isc_socketmgr_t *manager) {
	int i;
	int errval;
	char strbuf[ISC_STRERRORSIZE];

435
	REQUIRE(VALID_MANAGER(manager));
436
	for (i = 0; i < manager->maxIOCPThreads; i++) {
437 438
		if (!PostQueuedCompletionStatus(manager->hIoCompletionPort,
						0, 0, 0)) {
439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456
			errval = GetLastError();
			isc__strerror(errval, strbuf, sizeof(strbuf));
			FATAL_ERROR(__FILE__, __LINE__,
				isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
				ISC_MSG_FAILED,
				"Can't request service thread to exit: %s"),
				strbuf);
		}
	}
}

/*
 * Create the worker threads for the I/O Completion Port
 */
void
iocompletionport_createthreads(int total_threads, isc_socketmgr_t *manager) {
	int errval;
	char strbuf[ISC_STRERRORSIZE];
457
	int i;
458

459 460
	INSIST(total_threads > 0);
	REQUIRE(VALID_MANAGER(manager));
461 462 463 464
	/*
	 * We need at least one
	 */
	for (i = 0; i < total_threads; i++) {
465
		manager->hIOCPThreads[i] = CreateThread(NULL, 0, SocketIoThread,
466 467
						manager, 0,
						&manager->dwIOCPThreadIds[i]);
468
		if (manager->hIOCPThreads[i] == NULL) {
469 470 471 472 473 474 475
			errval = GetLastError();
			isc__strerror(errval, strbuf, sizeof(strbuf));
			FATAL_ERROR(__FILE__, __LINE__,
				isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
				ISC_MSG_FAILED,
				"Can't create IOCP thread: %s"),
				strbuf);
476
			exit(1);
477 478 479 480
		}
	}
}

481
/*
482
 *  Create/initialise the I/O completion port
483
 */
484 485 486 487
void
iocompletionport_init(isc_socketmgr_t *manager) {
	int errval;
	char strbuf[ISC_STRERRORSIZE];
488 489

	REQUIRE(VALID_MANAGER(manager));
490 491 492 493
	/*
	 * Create a private heap to handle the socket overlapped structure
	 * The miniumum number of structures is 10, there is no maximum
	 */
494 495 496 497 498 499 500 501 502 503 504 505 506 507
	hHeapHandle = HeapCreate(0, 10 * sizeof(IoCompletionInfo), 0);
	if (hHeapHandle == NULL) {
		errval = GetLastError();
		isc__strerror(errval, strbuf, sizeof(strbuf));
		FATAL_ERROR(__FILE__, __LINE__,
			    isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
					   ISC_MSG_FAILED,
					   "HeapCreate() failed during "
					   "initialization: %s"),
			    strbuf);
		exit(1);
	}

	manager->maxIOCPThreads = min(isc_os_ncpus() + 1, MAX_IOCPTHREADS);
508 509 510

	/* Now Create the Completion Port */
	manager->hIoCompletionPort = CreateIoCompletionPort(
511 512
			INVALID_HANDLE_VALUE, NULL,
			0, manager->maxIOCPThreads);
513 514 515 516 517 518 519 520 521 522 523
	if (manager->hIoCompletionPort == NULL) {
		errval = GetLastError();
		isc__strerror(errval, strbuf, sizeof(strbuf));
		FATAL_ERROR(__FILE__, __LINE__,
				isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
				ISC_MSG_FAILED,
				"CreateIoCompletionPort() failed "
				"during initialization: %s"),
				strbuf);
		exit(1);
	}
524

525 526
	/*
	 * Worker threads for servicing the I/O
Automatic Updater's avatar
Automatic Updater committed
527
	 */
528 529
	iocompletionport_createthreads(manager->maxIOCPThreads, manager);
}
530

531
/*
532 533
 * Associate a socket with an IO Completion Port.  This allows us to queue events for it
 * and have our worker pool of threads process them.
534
 */
535
void
536 537
iocompletionport_update(isc_socket_t *sock) {
	HANDLE hiocp;
538
	char strbuf[ISC_STRERRORSIZE];
539

540
	REQUIRE(VALID_SOCKET(sock));
541

542 543 544 545 546 547
	hiocp = CreateIoCompletionPort((HANDLE)sock->fd,
		sock->manager->hIoCompletionPort, (ULONG_PTR)sock, 0);

	if (hiocp == NULL) {
		DWORD errval = GetLastError();
		isc__strerror(errval, strbuf, sizeof(strbuf));
548 549 550 551
		isc_log_iwrite(isc_lctx,
				ISC_LOGCATEGORY_GENERAL,
				ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR,
				isc_msgcat, ISC_MSGSET_SOCKET,
Automatic Updater's avatar
Automatic Updater committed
552
				ISC_MSG_TOOMANYHANDLES,
553 554 555 556 557 558 559 560 561 562 563 564 565 566 567
				"iocompletionport_update: failed to open"
				" io completion port: %s",
				strbuf);

		/* XXXMLG temporary hack to make failures detected.
		 * This function should return errors to the caller, not
		 * exit here.
		 */
		FATAL_ERROR(__FILE__, __LINE__,
				isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
				ISC_MSG_FAILED,
				"CreateIoCompletionPort() failed "
				"during initialization: %s"),
				strbuf);
		exit(1);
568 569
	}

570
	InterlockedIncrement(&sock->manager->iocp_total);
571
}
572

573
/*
574 575 576 577 578 579
 * Routine to cleanup and then close the socket.
 * Only close the socket here if it is NOT associated
 * with an event, otherwise the WSAWaitForMultipleEvents
 * may fail due to the fact that the the Wait should not
 * be running while closing an event or a socket.
 * The socket is locked before calling this function
580
 */
581 582
void
socket_close(isc_socket_t *sock) {
583

584
	REQUIRE(sock != NULL);
585

586 587 588 589 590
	if (sock->fd != INVALID_SOCKET) {
		closesocket(sock->fd);
		sock->fd = INVALID_SOCKET;
		_set_state(sock, SOCK_CLOSED);
		InterlockedDecrement(&sock->manager->totalSockets);
591
	}
592
}
593

594 595
static isc_once_t initialise_once = ISC_ONCE_INIT;
static isc_boolean_t initialised = ISC_FALSE;
596

597 598 599 600 601 602 603 604 605 606
static void
initialise(void) {
	WORD wVersionRequested;
	WSADATA wsaData;
	int err;
	SOCKET sock;
	GUID GUIDConnectEx = WSAID_CONNECTEX;
	GUID GUIDAcceptEx = WSAID_ACCEPTEX;
	GUID GUIDGetAcceptExSockaddrs = WSAID_GETACCEPTEXSOCKADDRS;
	DWORD dwBytes;
607

608 609 610 611 612 613 614 615 616 617 618 619
	/* Need Winsock 2.2 or better */
	wVersionRequested = MAKEWORD(2, 2);

	err = WSAStartup(wVersionRequested, &wsaData);
	if (err != 0) {
		char strbuf[ISC_STRERRORSIZE];
		isc__strerror(err, strbuf, sizeof(strbuf));
		FATAL_ERROR(__FILE__, __LINE__, "WSAStartup() %s: %s",
			    isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
					   ISC_MSG_FAILED, "failed"),
			    strbuf);
		exit(1);
620 621
	}
	/*
622 623 624 625
	 * The following APIs do not exist as functions in a library, but we must
	 * ask winsock for them.  They are "extensions" -- but why they cannot be
	 * actual functions is beyond me.  So, ask winsock for the pointers to the
	 * functions we need.
626
	 */
627 628 629 630 631 632 633
	sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
	INSIST(sock != INVALID_SOCKET);
	err = WSAIoctl(sock,  SIO_GET_EXTENSION_FUNCTION_POINTER,
		 &GUIDConnectEx, sizeof(GUIDConnectEx),
		 &ISCConnectEx, sizeof(ISCConnectEx),
		 &dwBytes, NULL, NULL);
	INSIST(err == 0);
634

635 636 637 638 639
	err = WSAIoctl(sock,  SIO_GET_EXTENSION_FUNCTION_POINTER,
		 &GUIDAcceptEx, sizeof(GUIDAcceptEx),
		 &ISCAcceptEx, sizeof(ISCAcceptEx),
		 &dwBytes, NULL, NULL);
	INSIST(err == 0);
640

641 642 643 644 645
	err = WSAIoctl(sock,  SIO_GET_EXTENSION_FUNCTION_POINTER,
		 &GUIDGetAcceptExSockaddrs, sizeof(GUIDGetAcceptExSockaddrs),
		 &ISCGetAcceptExSockaddrs, sizeof(ISCGetAcceptExSockaddrs),
		 &dwBytes, NULL, NULL);
	INSIST(err == 0);
646

647
	closesocket(sock);
648

649
	initialised = ISC_TRUE;
650
}
651

652
/*
653
 * Initialize socket services
654
 */
655 656 657 658 659 660 661 662 663 664 665
void
InitSockets(void) {
	RUNTIME_CHECK(isc_once_do(&initialise_once,
				  initialise) == ISC_R_SUCCESS);
	if (!initialised)
		exit(1);
}

int
internal_sendmsg(isc_socket_t *sock, IoCompletionInfo *lpo,
		 struct msghdr *messagehdr, int flags, int *Error)
666
{
667 668 669 670
	int Result;
	DWORD BytesSent;
	DWORD Flags = flags;
	int total_sent;
671

672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695
	*Error = 0;
	Result = WSASendTo(sock->fd, messagehdr->msg_iov,
			   messagehdr->msg_iovlen, &BytesSent,
			   Flags, (SOCKADDR *)&messagehdr->to_addr,
			   messagehdr->to_addr_len, (LPWSAOVERLAPPED)lpo,
			   NULL);

	total_sent = (int)BytesSent;

	/* Check for errors.*/
	if (Result == SOCKET_ERROR) {
		*Error = WSAGetLastError();

		switch (*Error) {
		case WSA_IO_INCOMPLETE:
		case WSA_WAIT_IO_COMPLETION:
		case WSA_IO_PENDING:
		case NO_ERROR:		/* Strange, but okay */
			sock->pending_iocp++;
			sock->pending_send++;
			break;

		default:
			return (-1);
696 697
			break;
		}
698 699 700
	} else {
		sock->pending_iocp++;
		sock->pending_send++;
701
	}
702

703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719
	if (lpo != NULL)
		return (0);
	else
		return (total_sent);
}

static void
queue_receive_request(isc_socket_t *sock) {
	DWORD Flags = 0;
	DWORD NumBytes = 0;
	int total_bytes = 0;
	int Result;
	int Error;
	WSABUF iov[1];
	IoCompletionInfo *lpo;
	isc_result_t isc_result;

720
	/*
721
	 * If we already have a receive pending, do nothing.
722
	 */
723 724
	if (sock->pending_recv > 0)
		return;
725

726 727 728 729 730
	/*
	 * If no one is waiting, do nothing.
	 */
	if (ISC_LIST_EMPTY(sock->recv_list))
		return;
731

732 733
	INSIST(sock->recvbuf.remaining == 0);
	INSIST(sock->fd != INVALID_SOCKET);
734

735 736
	iov[0].len = sock->recvbuf.len;
	iov[0].buf = sock->recvbuf.base;
737

738 739 740 741 742
	lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
					    HEAP_ZERO_MEMORY,
					    sizeof(IoCompletionInfo));
	RUNTIME_CHECK(lpo != NULL);
	lpo->request_type = SOCKET_RECV;
743

744
	sock->recvbuf.from_addr_len = sizeof(sock->recvbuf.from_addr);
745

746 747 748 749 750 751
	Error = 0;
	Result = WSARecvFrom((SOCKET)sock->fd, iov, 1,
			     &NumBytes, &Flags,
			     (SOCKADDR *)&sock->recvbuf.from_addr,
			     &sock->recvbuf.from_addr_len,
			     (LPWSAOVERLAPPED)lpo, NULL);
752

753 754 755
	/* Check for errors. */
	if (Result == SOCKET_ERROR) {
		Error = WSAGetLastError();
756

757 758 759 760 761
		switch (Error) {
		case WSA_IO_PENDING:
			sock->pending_iocp++;
			sock->pending_recv++;
			break;
762

763 764 765 766 767 768 769
		default:
			isc_result = isc__errno2result(Error);
			if (isc_result == ISC_R_UNEXPECTED)
				UNEXPECTED_ERROR(__FILE__, __LINE__,
					"WSARecvFrom: Windows error code: %d, isc result %d",
					Error, isc_result);
			send_recvdone_abort(sock, isc_result);
770 771
			break;
		}
772 773 774 775 776 777 778
	} else {
		/*
		 * The recv() finished immediately, but we will still get
		 * a completion event.  Rather than duplicate code, let
		 * that thread handle sending the data along its way.
		 */
		sock->pending_iocp++;
779
		sock->pending_recv++;
780
	}
781

782 783 784 785 786 787 788
	socket_log(__LINE__, sock, NULL, IOEVENT,
		   isc_msgcat, ISC_MSGSET_SOCKET,
		   ISC_MSG_DOIORECV,
		   "queue_io_request: fd %d result %d error %d",
		   sock->fd, Result, Error);

	CONSISTENT(sock);
789
}
Danny Mayer's avatar
Danny Mayer committed
790 791

static void
792 793 794
manager_log(isc_socketmgr_t *sockmgr, isc_logcategory_t *category,
	    isc_logmodule_t *module, int level, const char *fmt, ...)
{
Danny Mayer's avatar
Danny Mayer committed
795 796 797
	char msgbuf[2048];
	va_list ap;

798
	if (!isc_log_wouldlog(isc_lctx, level))
Danny Mayer's avatar
Danny Mayer committed
799 800 801 802 803 804 805 806 807 808
		return;

	va_start(ap, fmt);
	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
	va_end(ap);

	isc_log_write(isc_lctx, category, module, level,
		      "sockmgr %p: %s", sockmgr, msgbuf);
}

809
static void
810
socket_log(int lineno, isc_socket_t *sock, isc_sockaddr_t *address,
Danny Mayer's avatar
Danny Mayer committed
811 812
	   isc_logcategory_t *category, isc_logmodule_t *module, int level,
	   isc_msgcat_t *msgcat, int msgset, int message,
813 814
	   const char *fmt, ...)
{
Danny Mayer's avatar
Danny Mayer committed
815 816 817 818
	char msgbuf[2048];
	char peerbuf[256];
	va_list ap;

819 820

	if (!isc_log_wouldlog(isc_lctx, level))
Danny Mayer's avatar
Danny Mayer committed
821 822 823 824 825 826 827 828 829
		return;

	va_start(ap, fmt);
	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
	va_end(ap);

	if (address == NULL) {
		isc_log_iwrite(isc_lctx, category, module, level,
			       msgcat, msgset, message,
830
			       "socket %p line %d: %s", sock, lineno, msgbuf);
831
	} else {
Mark Andrews's avatar
Mark Andrews committed
832
		isc_sockaddr_format(address, peerbuf, sizeof(peerbuf));
833 834
		isc_log_iwrite(isc_lctx, category, module, level,
			       msgcat, msgset, message,
835 836
				   "socket %p line %d peer %s: %s", sock, lineno,
				   peerbuf, msgbuf);
Danny Mayer's avatar
Danny Mayer committed
837
	}
838

Danny Mayer's avatar
Danny Mayer committed
839
}
840

Danny Mayer's avatar
Danny Mayer committed
841
/*
842
 * Make an fd SOCKET non-blocking.
Danny Mayer's avatar
Danny Mayer committed
843 844
 */
static isc_result_t
845
make_nonblock(SOCKET fd) {
Danny Mayer's avatar
Danny Mayer committed
846 847
	int ret;
	unsigned long flags = 1;
848
	char strbuf[ISC_STRERRORSIZE];
Danny Mayer's avatar
Danny Mayer committed
849 850

	/* Set the socket to non-blocking */
851
	ret = ioctlsocket(fd, FIONBIO, &flags);
Danny Mayer's avatar
Danny Mayer committed
852 853

	if (ret == -1) {
854
		isc__strerror(errno, strbuf, sizeof(strbuf));
Danny Mayer's avatar
Danny Mayer committed
855 856
		UNEXPECTED_ERROR(__FILE__, __LINE__,
				 "ioctlsocket(%d, FIOBIO, %d): %s",
857
				 fd, flags, strbuf);
Danny Mayer's avatar
Danny Mayer committed
858 859 860 861 862 863

		return (ISC_R_UNEXPECTED);
	}

	return (ISC_R_SUCCESS);
}
864

Danny Mayer's avatar
Danny Mayer committed
865
/*
866 867 868 869 870 871 872 873
 * Windows 2000 systems incorrectly cause UDP sockets using WASRecvFrom
 * to not work correctly, returning a WSACONNRESET error when a WSASendTo
 * fails with an "ICMP port unreachable" response and preventing the
 * socket from using the WSARecvFrom in subsequent operations.
 * The function below fixes this, but requires that Windows 2000
 * Service Pack 2 or later be installed on the system.  NT 4.0
 * systems are not affected by this and work correctly.
 * See Microsoft Knowledge Base Article Q263823 for details of this.
Danny Mayer's avatar
Danny Mayer committed
874
 */
875 876 877 878 879 880
isc_result_t
connection_reset_fix(SOCKET fd) {
	DWORD dwBytesReturned = 0;
	BOOL  bNewBehavior = FALSE;
	DWORD status;

881
	if (isc_win32os_majorversion() < 5)
882 883
		return (ISC_R_SUCCESS); /*  NT 4.0 has no problem */

884
	/* disable bad behavior using IOCTL: SIO_UDP_CONNRESET */
885 886 887 888 889
	status = WSAIoctl(fd, SIO_UDP_CONNRESET, &bNewBehavior,
			  sizeof(bNewBehavior), NULL, 0,
			  &dwBytesReturned, NULL, NULL);
	if (status != SOCKET_ERROR)
		return (ISC_R_SUCCESS);
890 891 892 893 894
	else {
		UNEXPECTED_ERROR(__FILE__, __LINE__,
				 "WSAIoctl(SIO_UDP_CONNRESET, oldBehaviour) %s",
				 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
						ISC_MSG_FAILED, "failed"));
895
		return (ISC_R_UNEXPECTED);
896
	}
Danny Mayer's avatar
Danny Mayer committed
897 898 899 900 901 902 903 904 905 906 907 908 909
}

/*
 * Construct an iov array and attach it to the msghdr passed in.  This is
 * the SEND constructor, which will use the used region of the buffer
 * (if using a buffer list) or will use the internal region (if a single
 * buffer I/O is requested).
 *
 * Nothing can be NULL, and the done event must list at least one buffer
 * on the buffer linked list for this function to be meaningful.
 */
static void
build_msghdr_send(isc_socket_t *sock, isc_socketevent_t *dev,
910 911
		  struct msghdr *msg, char *cmsg, WSABUF *iov,
		  IoCompletionInfo  *lpo)
912
{
Danny Mayer's avatar
Danny Mayer committed
913 914
	unsigned int iovcount;
	isc_buffer_t *buffer;
915
	buflist_t  *cpbuffer;
Danny Mayer's avatar
Danny Mayer committed
916 917 918 919
	isc_region_t used;
	size_t write_count;
	size_t skip_count;

Mark Andrews's avatar
Mark Andrews committed
920
	memset(msg, 0, sizeof(*msg));
Danny Mayer's avatar
Danny Mayer committed
921

922 923
	memcpy(&msg->to_addr, &dev->address.type, dev->address.length);
	msg->to_addr_len = dev->address.length;
Danny Mayer's avatar
Danny Mayer committed
924 925 926 927 928 929 930 931 932 933

	buffer = ISC_LIST_HEAD(dev->bufferlist);
	write_count = 0;
	iovcount = 0;

	/*
	 * Single buffer I/O?  Skip what we've done so far in this region.
	 */
	if (buffer == NULL) {
		write_count = dev->region.length - dev->n;
934 935 936 937 938 939 940 941 942 943 944 945 946 947
		cpbuffer = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, sizeof(buflist_t));
		RUNTIME_CHECK(cpbuffer != NULL);
		cpbuffer->buf = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, write_count);
		RUNTIME_CHECK(cpbuffer->buf != NULL);

		socket_log(__LINE__, sock, NULL, TRACE,
		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
		   "alloc_buffer %p %d %p %d", cpbuffer, sizeof(buflist_t),
		   cpbuffer->buf, write_count);

		memcpy(cpbuffer->buf,(dev->region.base + dev->n), write_count);
		cpbuffer->buflen = write_count;
		ISC_LIST_ENQUEUE(lpo->bufferlist, cpbuffer, link);
		iov[0].buf = cpbuffer->buf;
Danny Mayer's avatar
Danny Mayer committed
948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972
		iov[0].len = write_count;
		iovcount = 1;

		goto config;
	}

	/*
	 * Multibuffer I/O.
	 * Skip the data in the buffer list that we have already written.
	 */
	skip_count = dev->n;
	while (buffer != NULL) {
		REQUIRE(ISC_BUFFER_VALID(buffer));
		if (skip_count < isc_buffer_usedlength(buffer))
			break;
		skip_count -= isc_buffer_usedlength(buffer);
		buffer = ISC_LIST_NEXT(buffer, link);
	}

	while (buffer != NULL) {
		INSIST(iovcount < MAXSCATTERGATHER_SEND);

		isc_buffer_usedregion(buffer, &used);

		if (used.length > 0) {
973 974 975 976 977 978 979 980 981 982 983 984 985 986
			int uselen = used.length - skip_count;
			cpbuffer = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, sizeof(buflist_t));
			RUNTIME_CHECK(cpbuffer != NULL);
			cpbuffer->buf = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, uselen);
			RUNTIME_CHECK(cpbuffer->buf != NULL);

			socket_log(__LINE__, sock, NULL, TRACE,
			   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
			   "alloc_buffer %p %d %p %d", cpbuffer, sizeof(buflist_t),
			   cpbuffer->buf, write_count);

			memcpy(cpbuffer->buf,(used.base + skip_count), uselen);
			cpbuffer->buflen = uselen;
			iov[iovcount].buf = cpbuffer->buf;
Danny Mayer's avatar
Danny Mayer committed
987
			iov[iovcount].len = used.length - skip_count;
988
			write_count += uselen;
Danny Mayer's avatar
Danny Mayer committed
989 990 991 992 993 994 995 996 997 998 999
			skip_count = 0;
			iovcount++;
		}
		buffer = ISC_LIST_NEXT(buffer, link);
	}

	INSIST(skip_count == 0);

 config:
	msg->msg_iov = iov;
	msg->msg_iovlen = iovcount;
1000
	msg->msg_totallen = write_count;
Danny Mayer's avatar
Danny Mayer committed
1001 1002 1003 1004
}

static void
set_dev_address(isc_sockaddr_t *address, isc_socket_t *sock,
1005 1006
		isc_socketevent_t *dev)
{
Danny Mayer's avatar
Danny Mayer committed
1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017
	if (sock->type == isc_sockettype_udp) {
		if (address != NULL)
			dev->address = *address;
		else
			dev->address = sock->address;
	} else if (sock->type == isc_sockettype_tcp) {
		INSIST(address == NULL);
		dev->address = sock->address;
	}
}

1018 1019 1020 1021 1022 1023 1024 1025 1026
static void
destroy_socketevent(isc_event_t *event) {
	isc_socketevent_t *ev = (isc_socketevent_t *)event;

	INSIST(ISC_LIST_EMPTY(ev->bufferlist));

	(ev->destroy)(event);
}

Danny Mayer's avatar
Danny Mayer committed
1027 1028
static isc_socketevent_t *
allocate_socketevent(isc_socket_t *sock, isc_eventtype_t eventtype,
1029 1030
		     isc_taskaction_t action, const void *arg)
{
Danny Mayer's avatar
Danny Mayer committed
1031 1032 1033 1034 1035
	isc_socketevent_t *ev;

	ev = (isc_socketevent_t *)isc_event_allocate(sock->manager->mctx,
						     sock, eventtype,
						     action, arg,
Mark Andrews's avatar
Mark Andrews committed
1036
						     sizeof(*ev));
Danny Mayer's avatar
Danny Mayer committed
1037 1038 1039
	if (ev == NULL)
		return (NULL);

1040
	ev->result = ISC_R_IOERROR; // XXXMLG temporary change to detect failure to set
Danny Mayer's avatar
Danny Mayer committed
1041 1042 1043 1044 1045 1046
	ISC_LINK_INIT(ev, ev_link);
	ISC_LIST_INIT(ev->bufferlist);
	ev->region.base = NULL;
	ev->n = 0;
	ev->offset = 0;
	ev->attributes = 0;
1047 1048
	ev->destroy = ev->ev_destroy;
	ev->ev_destroy = destroy_socketevent;
Danny Mayer's avatar
Danny Mayer committed
1049 1050 1051 1052 1053 1054 1055 1056 1057

	return (ev);
}

#if defined(ISC_SOCKET_DEBUG)
static void
dump_msg(struct msghdr *msg, isc_socket_t *sock) {
	unsigned int i;

1058
	printf("MSGHDR %p, Socket #: %u\n", msg, sock->fd);
Danny Mayer's avatar
Danny Mayer committed
1059 1060
	printf("\tname %p, namelen %d\n", msg->msg_name, msg->msg_namelen);
	printf("\tiov %p, iovlen %d\n", msg->msg_iov, msg->msg_iovlen);
1061
	for (i = 0; i < (unsigned int)msg->msg_iovlen; i++)
Danny Mayer's avatar
Danny Mayer committed
1062 1063 1064 1065 1066 1067
		printf("\t\t%d\tbase %p, len %d\n", i,
		       msg->msg_iov[i].buf,
		       msg->msg_iov[i].len);
}
#endif

1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168
/*
 * map the error code
 */
int
map_socket_error(isc_socket_t *sock, int windows_errno, int *isc_errno,
		 char *errorstring, size_t bufsize) {

	int doreturn;
	switch (windows_errno) {
	case WSAECONNREFUSED:
		*isc_errno = ISC_R_CONNREFUSED;
		if (sock->connected)
			doreturn = DOIO_HARD;
		else
			doreturn = DOIO_SOFT;
		break;
	case WSAENETUNREACH:
	case ERROR_NETWORK_UNREACHABLE:
		*isc_errno = ISC_R_NETUNREACH;
		if (sock->connected)
			doreturn = DOIO_HARD;
		else
			doreturn = DOIO_SOFT;
		break;
	case ERROR_PORT_UNREACHABLE:
	case ERROR_HOST_UNREACHABLE:
	case WSAEHOSTUNREACH:
		*isc_errno = ISC_R_HOSTUNREACH;
		if (sock->connected)
			doreturn = DOIO_HARD;
		else
			doreturn = DOIO_SOFT;
		break;
	case WSAENETDOWN:
		*isc_errno = ISC_R_NETDOWN;
		if (sock->connected)
			doreturn = DOIO_HARD;
		else
			doreturn = DOIO_SOFT;
		break;
	case WSAEHOSTDOWN:
		*isc_errno = ISC_R_HOSTDOWN;
		if (sock->connected)
			doreturn = DOIO_HARD;
		else
			doreturn = DOIO_SOFT;
		break;
	case WSAEACCES:
		*isc_errno = ISC_R_NOPERM;
		if (sock->connected)
			doreturn = DOIO_HARD;
		else
			doreturn = DOIO_SOFT;
		break;
	case WSAECONNRESET:
	case WSAENETRESET:
	case WSAECONNABORTED:
	case WSAEDISCON:
		*isc_errno = ISC_R_CONNECTIONRESET;
		if (sock->connected)
			doreturn = DOIO_HARD;
		else
			doreturn = DOIO_SOFT;
		break;
	case WSAENOTCONN:
		*isc_errno = ISC_R_NOTCONNECTED;
		if (sock->connected)
			doreturn = DOIO_HARD;
		else
			doreturn = DOIO_SOFT;
		break;
	case ERROR_OPERATION_ABORTED:
	case ERROR_CONNECTION_ABORTED:
	case ERROR_REQUEST_ABORTED:
		*isc_errno = ISC_R_CONNECTIONRESET;
		doreturn = DOIO_HARD;
		break;
	case WSAENOBUFS:
		*isc_errno = ISC_R_NORESOURCES;
		doreturn = DOIO_HARD;
		break;
	case WSAEAFNOSUPPORT:
		*isc_errno = ISC_R_FAMILYNOSUPPORT;
		doreturn = DOIO_HARD;
		break;
	case WSAEADDRNOTAVAIL:
		*isc_errno = ISC_R_ADDRNOTAVAIL;
		doreturn = DOIO_HARD;
		break;
	case WSAEDESTADDRREQ:
		*isc_errno = ISC_R_BADADDRESSFORM;
		doreturn = DOIO_HARD;
		break;
	case ERROR_NETNAME_DELETED:
		*isc_errno = ISC_R_NETDOWN;
		doreturn = DOIO_HARD;
		break;
	default:
		*isc_errno = ISC_R_IOERROR;
		doreturn = DOIO_HARD;
		break;
Danny Mayer's avatar
Danny Mayer committed
1169
	}
1170 1171
	if (doreturn == DOIO_HARD) {
		isc__strerror(windows_errno, errorstring, bufsize);
Danny Mayer's avatar
Danny Mayer committed
1172
	}