socket.c 76.5 KB
Newer Older
Bob Halley's avatar
Bob Halley committed
1
/*
David Lawrence's avatar
David Lawrence committed
2
 * Copyright (C) 1998-2000  Internet Software Consortium.
3
 *
Bob Halley's avatar
Bob Halley committed
4
5
6
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
7
 *
8
9
10
11
12
13
14
15
 * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM
 * DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL
 * INTERNET SOFTWARE CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT,
 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING
 * FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
 * NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
 * WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
Bob Halley's avatar
Bob Halley committed
16
 */
Bob Halley's avatar
Bob Halley committed
17

18
/* $Id: socket.c,v 1.172 2000/11/22 23:48:14 gson Exp $ */
David Lawrence's avatar
David Lawrence committed
19

Bob Halley's avatar
Bob Halley committed
20
#include <config.h>
21

22
#include <sys/param.h>
Michael Graff's avatar
Michael Graff committed
23
#include <sys/types.h>
Michael Graff's avatar
Michael Graff committed
24
25
#include <sys/socket.h>
#include <sys/time.h>
Michael Graff's avatar
Michael Graff committed
26
27
#include <sys/uio.h>

28
29
30
31
32
#include <errno.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
Michael Graff's avatar
Michael Graff committed
33
#include <fcntl.h>
34

35
#include <isc/buffer.h>
36
#include <isc/bufferlist.h>
37
38
#include <isc/condition.h>
#include <isc/list.h>
Michael Graff's avatar
Michael Graff committed
39
#include <isc/log.h>
40
#include <isc/mem.h>
41
#include <isc/mutex.h>
42
#include <isc/net.h>
43
#include <isc/platform.h>
Michael Graff's avatar
Michael Graff committed
44
#include <isc/print.h>
45
#include <isc/region.h>
46
#include <isc/socket.h>
47
#include <isc/task.h>
48
#include <isc/thread.h>
Michael Graff's avatar
Michael Graff committed
49
#include <isc/util.h>
Bob Halley's avatar
Bob Halley committed
50

51
52
#ifndef ISC_PLATFORM_USETHREADS
#include "socket_p.h"
53
#endif /* ISC_PLATFORM_USETHREADS */
54

55
56
/*
 * Some systems define the socket length argument as an int, some as size_t,
57
 * some as socklen_t.  This is here so it can be easily changed if needed.
58
 */
59
#ifndef ISC_SOCKADDR_LEN_T
60
#define ISC_SOCKADDR_LEN_T unsigned int
61
#endif
62

63
64
65
/*
 * Define what the possible "soft" errors can be.  These are non-fatal returns
 * of various network related functions, like recv() and so on.
66
67
68
69
 *
 * For some reason, BSDI (and perhaps others) will sometimes return <0
 * from recv() but will have errno==0.  This is broken, but we have to
 * work around it here.
70
 */
71
72
73
74
#define SOFT_ERROR(e)	((e) == EAGAIN || \
			 (e) == EWOULDBLOCK || \
			 (e) == EINTR || \
			 (e) == 0)
75

Michael Graff's avatar
Michael Graff committed
76
#define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x)
77

Michael Graff's avatar
Michael Graff committed
78
79
80
81
82
83
84
/*
 * DLVL(90)  --  Function entry/exit and other tracing.
 * DLVL(70)  --  Socket "correctness" -- including returning of events, etc.
 * DLVL(60)  --  Socket data send/receive
 * DLVL(50)  --  Event tracing, including receiving/sending completion events.
 * DLVL(20)  --  Socket creation/destruction.
 */
85
86
87
88
89
90
91
92
93
94
95
#define TRACE_LEVEL		90
#define CORRECTNESS_LEVEL	70
#define IOEVENT_LEVEL		60
#define EVENT_LEVEL		50
#define CREATION_LEVEL		20

#define TRACE		DLVL(TRACE_LEVEL)
#define CORRECTNESS	DLVL(CORRECTNESS_LEVEL)
#define IOEVENT		DLVL(IOEVENT_LEVEL)
#define EVENT		DLVL(EVENT_LEVEL)
#define CREATION	DLVL(CREATION_LEVEL)
96

97
typedef isc_event_t intev_t;
Michael Graff's avatar
Michael Graff committed
98
99
100

#define SOCKET_MAGIC		0x494f696fU	/* IOio */
#define VALID_SOCKET(t)		((t) != NULL && (t)->magic == SOCKET_MAGIC)
Michael Graff's avatar
Michael Graff committed
101

Michael Graff's avatar
Michael Graff committed
102
103
104
105
106
107
108
109
110
111
112
113
/*
 * IPv6 control information.  If the socket is an IPv6 socket we want
 * to collect the destination address and interface so the client can
 * set them on outgoing packets.
 */
#ifdef ISC_PLATFORM_HAVEIPV6
#ifndef USE_CMSG
#define USE_CMSG	1
#endif
#endif

/*
114
 * NetBSD and FreeBSD can timestamp packets.  XXXMLG Should we have
Michael Graff's avatar
Michael Graff committed
115
116
117
118
119
120
121
122
123
 * a setsockopt() like interface to request timestamps, and if the OS
 * doesn't do it for us, call gettimeofday() on every UDP receive?
 */
#ifdef SO_TIMESTAMP
#ifndef USE_CMSG
#define USE_CMSG	1
#endif
#endif

124
125
126
127
128
129
130
131
/*
 * Check to see if we have even basic support for cracking messages from
 * the control data returned from/sent via recvmsg()/sendmsg().
 */
#if defined(USE_CMSG) && (!defined(CMSG_LEN) || !defined(CMSG_SPACE))
#undef USE_CMSG
#endif

132
133
struct isc_socket {
	/* Not locked. */
134
135
136
137
	unsigned int		magic;
	isc_socketmgr_t	       *manager;
	isc_mutex_t		lock;
	isc_sockettype_t	type;
Michael Graff's avatar
Michael Graff committed
138

139
	/* Locked by socket lock. */
140
	ISC_LINK(isc_socket_t)	link;
141
142
	unsigned int		references;
	int			fd;
143
	int			pf;
144

145
	ISC_LIST(isc_socketevent_t)		send_list;
146
	ISC_LIST(isc_socketevent_t)		recv_list;
147
	ISC_LIST(isc_socket_newconnev_t)	accept_list;
148
149
150
151
152
153
154
	isc_socket_connev_t		       *connect_ev;

	/*
	 * Internal events.  Posted when a descriptor is readable or
	 * writable.  These are statically allocated and never freed.
	 * They will be set to non-purgable before use.
	 */
155
156
	intev_t			readable_ev;
	intev_t			writable_ev;
157

158
	isc_sockaddr_t		address;  /* remote address */
159

160
161
162
163
164
165
166
	unsigned int		pending_recv : 1,
				pending_send : 1,
				pending_accept : 1,
				listener : 1, /* listener socket */
				connected : 1,
				connecting : 1, /* connect pending */
				bound : 1; /* bound to local addr */
167

168
#ifdef ISC_NET_RECVOVERFLOW
169
	unsigned char		overflow; /* used for MSG_TRUNC fake */
170
#endif
Michael Graff's avatar
Michael Graff committed
171
#ifdef USE_CMSG
172
173
	unsigned char	       *cmsg;
	unsigned int		cmsglen;
174
#endif
175
176
};

177
178
179
#define SOCKET_MANAGER_MAGIC	0x494f6d67U	/* IOmg */
#define VALID_MANAGER(m)	((m) != NULL && \
				 (m)->magic == SOCKET_MANAGER_MAGIC)
180
181
struct isc_socketmgr {
	/* Not locked. */
182
183
184
	unsigned int		magic;
	isc_mem_t	       *mctx;
	isc_mutex_t		lock;
185
	/* Locked by manager lock. */
186
	ISC_LIST(isc_socket_t)	socklist;
187
188
189
190
191
	fd_set			read_fds;
	fd_set			write_fds;
	isc_socket_t	       *fds[FD_SETSIZE];
	int			fdstate[FD_SETSIZE];
	int			maxfd;
192
193
194
#ifdef ISC_PLATFORM_USETHREADS
	isc_thread_t		watcher;
	isc_condition_t		shutdown_ok;
195
	int			pipe_fds[2];
196
#else /* ISC_PLATFORM_USETHREADS */
197
	unsigned int		refs;
198
#endif /* ISC_PLATFORM_USETHREADS */
199
200
};

201
202
#ifndef ISC_PLATFORM_USETHREADS
static isc_socketmgr_t *socketmgr = NULL;
203
#endif /* ISC_PLATFORM_USETHREADS */
204

Michael Graff's avatar
Michael Graff committed
205
206
207
208
#define CLOSED		0	/* this one must be zero */
#define MANAGED		1
#define CLOSE_PENDING	2

209
210
211
212
213
214
215
216
217
218
/*
 * send() and recv() iovec counts
 */
#define MAXSCATTERGATHER_SEND	(ISC_SOCKET_MAXSCATTERGATHER)
#ifdef ISC_NET_RECVOVERFLOW
# define MAXSCATTERGATHER_RECV	(ISC_SOCKET_MAXSCATTERGATHER + 1)
#else
# define MAXSCATTERGATHER_RECV	(ISC_SOCKET_MAXSCATTERGATHER)
#endif

219
220
static void send_recvdone_event(isc_socket_t *, isc_socketevent_t **,
				isc_result_t);
Michael Graff's avatar
Michael Graff committed
221
222
static void send_senddone_event(isc_socket_t *, isc_socketevent_t **,
				isc_result_t);
Bob Halley's avatar
Bob Halley committed
223
224
225
226
static void free_socket(isc_socket_t **);
static isc_result_t allocate_socket(isc_socketmgr_t *, isc_sockettype_t,
				    isc_socket_t **);
static void destroy(isc_socket_t **);
227
228
229
230
static void internal_accept(isc_task_t *, isc_event_t *);
static void internal_connect(isc_task_t *, isc_event_t *);
static void internal_recv(isc_task_t *, isc_event_t *);
static void internal_send(isc_task_t *, isc_event_t *);
231
static void process_cmsg(isc_socket_t *, struct msghdr *, isc_socketevent_t *);
Michael Graff's avatar
Michael Graff committed
232
static void build_msghdr_send(isc_socket_t *, isc_socketevent_t *,
233
			      struct msghdr *, struct iovec *, size_t *);
Michael Graff's avatar
Michael Graff committed
234
static void build_msghdr_recv(isc_socket_t *, isc_socketevent_t *,
235
			      struct msghdr *, struct iovec *, size_t *);
Michael Graff's avatar
Michael Graff committed
236
237
238

#define SELECT_POKE_SHUTDOWN		(-1)
#define SELECT_POKE_NOTHING		(-2)
239

240
241
#define SOCK_DEAD(s)			((s)->references == 0)

Michael Graff's avatar
Michael Graff committed
242
243
244
245
246
247
248
249
static void
manager_log(isc_socketmgr_t *sockmgr,
	    isc_logcategory_t *category, isc_logmodule_t *module, int level,
	    const char *fmt, ...)
{
	char msgbuf[2048];
	va_list ap;

250
251
252
	if (! isc_log_wouldlog(isc_lctx, level))
		return;

Michael Graff's avatar
Michael Graff committed
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
	va_start(ap, fmt);
	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
	va_end(ap);

	isc_log_write(isc_lctx, category, module, level,
		      "sockmgr %p: %s", sockmgr, msgbuf);
}

static void
socket_log(isc_socket_t *sock, isc_sockaddr_t *address,
	   isc_logcategory_t *category, isc_logmodule_t *module, int level,
	   const char *fmt, ...)
{
	char msgbuf[2048];
	char peerbuf[256];
	va_list ap;

270
271
272
	if (! isc_log_wouldlog(isc_lctx, level))
		return;

Michael Graff's avatar
Michael Graff committed
273
274
275
276
277
278
279
280
	va_start(ap, fmt);
	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
	va_end(ap);

	if (address == NULL) {
		isc_log_write(isc_lctx, category, module, level,
			      "socket %p: %s", sock, msgbuf);
	} else {
281
		isc_sockaddr_format(address, peerbuf, sizeof peerbuf);
Michael Graff's avatar
Michael Graff committed
282
283
284
285
286
		isc_log_write(isc_lctx, category, module, level,
			      "socket %p %s: %s", sock, peerbuf, msgbuf);
	}
}

287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
static void
wakeup_socket(isc_socketmgr_t *manager, int fd) {
	isc_event_t *ev2;
	isc_socketevent_t *rev;
	isc_socket_t *sock;

	/*
	 * This is a wakeup on a socket.  Look at the event queue for both
	 * read and write, and decide if we need to watch on it now or not.
	 */
	INSIST(fd < FD_SETSIZE);

	if (manager->fdstate[fd] == CLOSE_PENDING) {
		manager->fdstate[fd] = CLOSED;
		FD_CLR(fd, &manager->read_fds);
		FD_CLR(fd, &manager->write_fds);
		close(fd);
		return;
	}
	if (manager->fdstate[fd] != MANAGED)
		return;

	sock = manager->fds[fd];

	/*
	 * If there are no events, or there is an event but we
	 * have already queued up the internal event on a task's
	 * queue, clear the bit.  Otherwise, set it.
	 */
	rev = ISC_LIST_HEAD(sock->recv_list);
	ev2 = (isc_event_t *) ISC_LIST_HEAD(sock->accept_list);
	if ((rev == NULL && ev2 == NULL)
	    || sock->pending_recv || sock->pending_accept)
		FD_CLR(sock->fd, &manager->read_fds);
	else
		FD_SET(sock->fd, &manager->read_fds);

	rev = ISC_LIST_HEAD(sock->send_list);
	if ((rev == NULL || sock->pending_send) && !sock->connecting)
		FD_CLR(sock->fd, &manager->write_fds);
	else
		FD_SET(sock->fd, &manager->write_fds);
}

#ifdef ISC_PLATFORM_USETHREADS
332
/*
Michael Graff's avatar
Michael Graff committed
333
334
335
 * Poke the select loop when there is something for us to do.
 * We assume that if a write completes here, it will be inserted into the
 * queue fully.  That is, we will not get partial writes.
336
337
 */
static void
338
select_poke(isc_socketmgr_t *mgr, int msg) {
Michael Graff's avatar
Michael Graff committed
339
340
	int cc;

341
342
343
	do {
		cc = write(mgr->pipe_fds[1], &msg, sizeof(int));
	} while (cc < 0 && SOFT_ERROR(errno));
344
			        
345
	if (cc < 0)
Michael Graff's avatar
Michael Graff committed
346
347
348
		FATAL_ERROR(__FILE__, __LINE__,
			    "write() failed during watcher poke: %s",
			    strerror(errno));
349
350

	INSIST(cc == sizeof(int));
351
352
353
}

/*
Andreas Gustafsson's avatar
Andreas Gustafsson committed
354
 * Read a message on the internal fd.
355
 */
Michael Graff's avatar
Michael Graff committed
356
static int
357
select_readmsg(isc_socketmgr_t *mgr) {
Michael Graff's avatar
Michael Graff committed
358
	int msg;
Michael Graff's avatar
Michael Graff committed
359
360
	int cc;

Michael Graff's avatar
Michael Graff committed
361
	cc = read(mgr->pipe_fds[0], &msg, sizeof(int));
Michael Graff's avatar
Michael Graff committed
362
	if (cc < 0) {
Michael Graff's avatar
Michael Graff committed
363
		if (SOFT_ERROR(errno))
Michael Graff's avatar
Michael Graff committed
364
			return (SELECT_POKE_NOTHING);
Michael Graff's avatar
Michael Graff committed
365

Michael Graff's avatar
Michael Graff committed
366
367
368
		FATAL_ERROR(__FILE__, __LINE__,
			    "read() failed during watcher poke: %s",
			    strerror(errno));
369
		
Michael Graff's avatar
Michael Graff committed
370
		return (SELECT_POKE_NOTHING);
Michael Graff's avatar
Michael Graff committed
371
	}
372

Michael Graff's avatar
Michael Graff committed
373
	return (msg);
374
}
375
#else /* ISC_PLATFORM_USETHREADS */
376
377
378
379
380
381
382
383
384
385
386
/*
 * Update the state of the socketmgr when something changes.
 */
static void
select_poke(isc_socketmgr_t *manager, int msg) {
	if (msg == SELECT_POKE_SHUTDOWN)
		return;
	else if (msg >= 0)
		wakeup_socket(manager, msg);
	return;
}
387
#endif /* ISC_PLATFORM_USETHREADS */
388
389

/*
Andreas Gustafsson's avatar
Andreas Gustafsson committed
390
 * Make a fd non-blocking.
391
 */
Michael Graff's avatar
Michael Graff committed
392
static isc_result_t
393
make_nonblock(int fd) {
Michael Graff's avatar
Michael Graff committed
394
395
	int ret;
	int flags;
396

Michael Graff's avatar
Michael Graff committed
397
398
399
	flags = fcntl(fd, F_GETFL, 0);
	flags |= O_NONBLOCK;
	ret = fcntl(fd, F_SETFL, flags);
400

Michael Graff's avatar
Michael Graff committed
401
402
403
404
	if (ret == -1) {
		UNEXPECTED_ERROR(__FILE__, __LINE__,
				 "fcntl(%d, F_SETFL, %d): %s",
				 fd, flags, strerror(errno));
Michael Graff's avatar
Michael Graff committed
405

Michael Graff's avatar
Michael Graff committed
406
		return (ISC_R_UNEXPECTED);
Michael Graff's avatar
Michael Graff committed
407
408
	}

Michael Graff's avatar
Michael Graff committed
409
	return (ISC_R_SUCCESS);
410
411
}

412
413
414
415
/*
 * Process control messages received on a socket.
 */
static void
416
process_cmsg(isc_socket_t *sock, struct msghdr *msg, isc_socketevent_t *dev) {
Michael Graff's avatar
Michael Graff committed
417
#ifdef USE_CMSG
418
	struct cmsghdr *cmsgp;
Michael Graff's avatar
Michael Graff committed
419
420
421
422
423
424
425
426
#ifdef ISC_PLATFORM_HAVEIPV6
	struct in6_pktinfo *pktinfop;
#endif
#ifdef SO_TIMESTAMP
	struct timeval *timevalp;
#endif
#endif

427
428
429
430
431
432
	/*
	 * sock is used only when ISC_NET_BSD44MSGHDR and USE_CMSG are defined.
	 * msg and dev are used only when ISC_NET_BSD44MSGHDR is defined.
	 * They are all here, outside of the CPP tests, because it is
	 * more consistent with the usual ISC coding style.
	 */
433
	UNUSED(sock);
434
435
436
437
438
439
440
	UNUSED(msg);
	UNUSED(dev);

#ifndef ISC_NET_BSD44MSGHDR
	return;

#else  /* defined ISC_NET_BSD44MSGHDR */
441

Bob Halley's avatar
Bob Halley committed
442
#ifdef MSG_TRUNC
443
444
	if ((msg->msg_flags & MSG_TRUNC) == MSG_TRUNC)
		dev->attributes |= ISC_SOCKEVENTATTR_TRUNC;
Bob Halley's avatar
Bob Halley committed
445
#endif
446

Bob Halley's avatar
Bob Halley committed
447
#ifdef MSG_CTRUNC
448
449
	if ((msg->msg_flags & MSG_CTRUNC) == MSG_CTRUNC)
		dev->attributes |= ISC_SOCKEVENTATTR_CTRUNC;
Bob Halley's avatar
Bob Halley committed
450
#endif
451

452
453
454
455
456
457
	/*
	 * Check for multicast.
	 */
	if (isc_sockaddr_ismulticast(&dev->address))
		dev->attributes |= ISC_SOCKEVENTATTR_MULTICAST;

Michael Graff's avatar
Michael Graff committed
458
459
460
#ifndef USE_CMSG
	return;
#else
461
462
	if (msg->msg_controllen == 0 || msg->msg_control == NULL)
		return;
Michael Graff's avatar
Michael Graff committed
463
464
465
466
467
468
469
470
471
472

#ifdef SO_TIMESTAMP
	timevalp = NULL;
#endif
#ifdef ISC_PLATFORM_HAVEIPV6
	pktinfop = NULL;
#endif

	cmsgp = CMSG_FIRSTHDR(msg);
	while (cmsgp != NULL) {
473
		socket_log(sock, NULL, TRACE, "processing cmsg %p", cmsgp);
Michael Graff's avatar
Michael Graff committed
474
475

#ifdef ISC_PLATFORM_HAVEIPV6
476
477
		if (cmsgp->cmsg_level == IPPROTO_IPV6
		    && cmsgp->cmsg_type == IPV6_PKTINFO) {
478

Michael Graff's avatar
Michael Graff committed
479
			pktinfop = (struct in6_pktinfo *)CMSG_DATA(cmsgp);
480
481
			memcpy(&dev->pktinfo, pktinfop,
			       sizeof(struct in6_pktinfo));
Michael Graff's avatar
Michael Graff committed
482
			dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO;
483
			socket_log(sock, NULL, TRACE,
David Lawrence's avatar
David Lawrence committed
484
485
				   "interface received on ifindex %u",
				   dev->pktinfo.ipi6_ifindex);
Michael Graff's avatar
Michael Graff committed
486
487
488
489
490
			goto next;
		}
#endif

#ifdef SO_TIMESTAMP
491
492
		if (cmsgp->cmsg_level == SOL_SOCKET
		    && cmsgp->cmsg_type == SCM_TIMESTAMP) {
Michael Graff's avatar
Michael Graff committed
493
494
495
496
497
498
499
500
501
502
503
504
505
			timevalp = (struct timeval *)CMSG_DATA(cmsgp);
			dev->timestamp.seconds = timevalp->tv_sec;
			dev->timestamp.nanoseconds = timevalp->tv_usec * 1000;
			dev->attributes |= ISC_SOCKEVENTATTR_TIMESTAMP;
			goto next;
		}
#endif

	next:
		cmsgp = CMSG_NXTHDR(msg, cmsgp);
	}
#endif /* USE_CMSG */

506
#endif /* ISC_NET_BSD44MSGHDR */
507
508
509

}

510
511
512
/*
 * Construct an iov array and attach it to the msghdr passed in.  Return
 * 0 on success, non-zero on failure.  This is the SEND constructor, which
513
514
 * will used the used region of the buffer (if using a buffer list) or
 * will use the internal region (if a single buffer I/O is requested).
515
516
517
 *
 * Nothing can be NULL, and the done event must list at least one buffer
 * on the buffer linked list for this function to be meaningful.
Michael Graff's avatar
fix    
Michael Graff committed
518
519
520
 *
 * If write_countp != NULL, *write_countp will hold the number of bytes
 * this transaction can send.
521
 */
Michael Graff's avatar
Michael Graff committed
522
static void
523
build_msghdr_send(isc_socket_t *sock, isc_socketevent_t *dev,
524
		  struct msghdr *msg, struct iovec *iov, size_t *write_countp)
525
526
527
528
{
	unsigned int iovcount;
	isc_buffer_t *buffer;
	isc_region_t used;
529
530
531
	size_t write_count;
	size_t skip_count;

Michael Graff's avatar
fix    
Michael Graff committed
532
	memset(msg, 0, sizeof (*msg));
533
534
535
536

	if (sock->type == isc_sockettype_udp) {
		msg->msg_name = (void *)&dev->address.type.sa;
		msg->msg_namelen = dev->address.length;
Michael Graff's avatar
fix    
Michael Graff committed
537
538
539
	} else {
		msg->msg_name = NULL;
		msg->msg_namelen = 0;
540
	}
541
542

	buffer = ISC_LIST_HEAD(dev->bufferlist);
543
	write_count = 0;
Michael Graff's avatar
fix    
Michael Graff committed
544
	iovcount = 0;
545

546
	/*
547
	 * Single buffer I/O?  Skip what we've done so far in this region.
548
549
	 */
	if (buffer == NULL) {
550
551
552
		write_count = dev->region.length - dev->n;
		iov[0].iov_base = (void *)(dev->region.base + dev->n);
		iov[0].iov_len = write_count;
Michael Graff's avatar
fix    
Michael Graff committed
553
		iovcount = 1;
554

555
556
557
558
559
560
561
		goto config;
	}

	/*
	 * Multibuffer I/O.
	 * Skip the data in the buffer list that we have already written.
	 */
Michael Graff's avatar
fix    
Michael Graff committed
562
	skip_count = dev->n;
563
	while (buffer != NULL) {
564
		REQUIRE(ISC_BUFFER_VALID(buffer));
565
		if (skip_count < isc_buffer_usedlength(buffer))
566
			break;
567
		skip_count -= isc_buffer_usedlength(buffer);
568
		buffer = ISC_LIST_NEXT(buffer, link);
569
570
571
	}

	while (buffer != NULL) {
572
		INSIST(iovcount < MAXSCATTERGATHER_SEND);
573

574
		isc_buffer_usedregion(buffer, &used);
575

576
		if (used.length > 0) {
577
578
579
580
581
			iov[iovcount].iov_base = (void *)(used.base
							  + skip_count);
			iov[iovcount].iov_len = used.length - skip_count;
			write_count += (used.length - skip_count);
			skip_count = 0;
582
583
584
585
586
			iovcount++;
		}
		buffer = ISC_LIST_NEXT(buffer, link);
	}

Michael Graff's avatar
fix    
Michael Graff committed
587
588
589
	INSIST(skip_count == 0);

 config:
590
591
592
	msg->msg_iov = iov;
	msg->msg_iovlen = iovcount;

593
594
595
596
#ifdef ISC_NET_BSD44MSGHDR
	msg->msg_control = NULL;
	msg->msg_controllen = 0;
	msg->msg_flags = 0;
597
598
599
600
#if defined(USE_CMSG)
	if ((sock->type == isc_sockettype_udp)
	    && ((dev->attributes & ISC_SOCKEVENTATTR_PKTINFO) != 0)) {
		struct cmsghdr *cmsgp;
601
602
		struct in6_pktinfo *pktinfop;

603
		socket_log(sock, NULL, TRACE,
David Lawrence's avatar
David Lawrence committed
604
605
			   "sendto pktinfo data, ifindex %u",
			   dev->pktinfo.ipi6_ifindex);
606

607
608
609
610
611
612
613
614
		msg->msg_controllen = CMSG_SPACE(sizeof(struct in6_pktinfo));
		msg->msg_control = (void *)sock->cmsg;

		cmsgp = (struct cmsghdr *)sock->cmsg;
		cmsgp->cmsg_level = IPPROTO_IPV6;
		cmsgp->cmsg_type = IPV6_PKTINFO;
		cmsgp->cmsg_len = CMSG_LEN(sizeof(struct in6_pktinfo));
		pktinfop = (struct in6_pktinfo *)CMSG_DATA(cmsgp);
615
		memcpy(pktinfop, &dev->pktinfo, sizeof(struct in6_pktinfo));
616
617
618
	}
#endif /* USE_CMSG */
#else /* ISC_NET_BSD44MSGHDR */
619
620
	msg->msg_accrights = NULL;
	msg->msg_accrightslen = 0;
621
#endif /* ISC_NET_BSD44MSGHDR */
622
623
624

	if (write_countp != NULL)
		*write_countp = write_count;
625
626
}

Michael Graff's avatar
fix    
Michael Graff committed
627
628
629
630
631
632
633
634
635
636
637
638
/*
 * Construct an iov array and attach it to the msghdr passed in.  Return
 * 0 on success, non-zero on failure.  This is the RECV constructor, which
 * will use the avialable region of the buffer (if using a buffer list) or
 * will use the internal region (if a single buffer I/O is requested).
 *
 * Nothing can be NULL, and the done event must list at least one buffer
 * on the buffer linked list for this function to be meaningful.
 *
 * If read_countp != NULL, *read_countp will hold the number of bytes
 * this transaction can receive.
 */
Michael Graff's avatar
Michael Graff committed
639
static void
Michael Graff's avatar
fix    
Michael Graff committed
640
build_msghdr_recv(isc_socket_t *sock, isc_socketevent_t *dev,
641
		  struct msghdr *msg, struct iovec *iov, size_t *read_countp)
Michael Graff's avatar
fix    
Michael Graff committed
642
643
644
645
646
647
648
649
650
651
652
{
	unsigned int iovcount;
	isc_buffer_t *buffer;
	isc_region_t available;
	size_t read_count;

	memset(msg, 0, sizeof (struct msghdr));

	if (sock->type == isc_sockettype_udp) {
		memset(&dev->address, 0, sizeof(dev->address));
		msg->msg_name = (void *)&dev->address.type.sa;
Bob Halley's avatar
Bob Halley committed
653
		msg->msg_namelen = sizeof(dev->address.type);
654
655
656
657
658
#ifdef ISC_NET_RECVOVERFLOW
		/* If needed, steal one iovec for overflow detection. */
		maxiov--;
#endif
	} else { /* TCP */
Michael Graff's avatar
fix    
Michael Graff committed
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
		msg->msg_name = NULL;
		msg->msg_namelen = 0;
		dev->address = sock->address;
	}

	buffer = ISC_LIST_HEAD(dev->bufferlist);
	read_count = 0;

	/*
	 * Single buffer I/O?  Skip what we've done so far in this region.
	 */
	if (buffer == NULL) {
		read_count = dev->region.length - dev->n;
		iov[0].iov_base = (void *)(dev->region.base + dev->n);
		iov[0].iov_len = read_count;
674
		iovcount = 1;
Michael Graff's avatar
fix    
Michael Graff committed
675
676
677
678
679
680
681
682
683

		goto config;
	}

	/*
	 * Multibuffer I/O.
	 * Skip empty buffers.
	 */
	while (buffer != NULL) {
684
		REQUIRE(ISC_BUFFER_VALID(buffer));
685
		if (isc_buffer_availablelength(buffer) != 0)
Michael Graff's avatar
fix    
Michael Graff committed
686
687
688
689
690
691
			break;
		buffer = ISC_LIST_NEXT(buffer, link);
	}

	iovcount = 0;
	while (buffer != NULL) {
692
		INSIST(iovcount < MAXSCATTERGATHER_RECV);
Michael Graff's avatar
fix    
Michael Graff committed
693

694
		isc_buffer_availableregion(buffer, &available);
Michael Graff's avatar
fix    
Michael Graff committed
695
696
697
698
699
700
701
702
703
704

		if (available.length > 0) {
			iov[iovcount].iov_base = (void *)(available.base);
			iov[iovcount].iov_len = available.length;
			read_count += available.length;
			iovcount++;
		}
		buffer = ISC_LIST_NEXT(buffer, link);
	}

705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
 config:

	/*
	 * If needed, set up to receive that one extra byte.  Note that
	 * we know there is at least one iov left, since we stole it
	 * at the top of this function.
	 */
#ifdef ISC_NET_RECVOVERFLOW
	if (sock->type == isc_sockettype_udp) {
		iov[iovcount].iov_base = (void *)(&sock->overflow);
		iov[iovcount].iov_len = 1;
		iovcount++;
	}
#endif

Michael Graff's avatar
fix    
Michael Graff committed
720
721
722
723
724
725
	msg->msg_iov = iov;
	msg->msg_iovlen = iovcount;

#ifdef ISC_NET_BSD44MSGHDR
	msg->msg_control = NULL;
	msg->msg_controllen = 0;
726
727
	msg->msg_flags = 0;
#if defined(USE_CMSG)
Michael Graff's avatar
Michael Graff committed
728
	if (sock->type == isc_sockettype_udp) {
729
730
		msg->msg_control = (void *)sock->cmsg;
		msg->msg_controllen = sock->cmsglen;
Michael Graff's avatar
Michael Graff committed
731
	}
732
733
#endif /* USE_CMSG */
#else /* ISC_NET_BSD44MSGHDR */
Michael Graff's avatar
fix    
Michael Graff committed
734
735
	msg->msg_accrights = NULL;
	msg->msg_accrightslen = 0;
736
#endif /* ISC_NET_BSD44MSGHDR */
Michael Graff's avatar
fix    
Michael Graff committed
737
738
739
740
741

	if (read_countp != NULL)
		*read_countp = read_count;
}

742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
static void
set_dev_address(isc_sockaddr_t *address, isc_socket_t *sock,
		isc_socketevent_t *dev)
{
	if (sock->type == isc_sockettype_udp) {
		if (address != NULL)
			dev->address = *address;
		else
			dev->address = sock->address;
	} else if (sock->type == isc_sockettype_tcp) {
		INSIST(address == NULL);
		dev->address = sock->address;
	}
}

757
758
static isc_socketevent_t *
allocate_socketevent(isc_socket_t *sock, isc_eventtype_t eventtype,
David Lawrence's avatar
David Lawrence committed
759
		     isc_taskaction_t action, const void *arg)
760
761
762
763
764
765
766
767
768
769
770
771
{
	isc_socketevent_t *ev;

	ev = (isc_socketevent_t *)isc_event_allocate(sock->manager->mctx,
						     sock, eventtype,
						     action, arg,
						     sizeof (*ev));

	if (ev == NULL)
		return (NULL);

	ev->result = ISC_R_UNEXPECTED;
772
	ISC_LINK_INIT(ev, ev_link);
773
774
	ISC_LIST_INIT(ev->bufferlist);
	ev->region.base = NULL;
Michael Graff's avatar
fix    
Michael Graff committed
775
776
	ev->n = 0;
	ev->offset = 0;
777
	ev->attributes = 0;
778
779
780
781

	return (ev);
}

782
783
#if defined(ISC_SOCKET_DEBUG)
static void
784
dump_msg(struct msghdr *msg) {
785
786
787
788
789
	unsigned int i;

	printf("MSGHDR %p\n", msg);
	printf("\tname %p, namelen %d\n", msg->msg_name, msg->msg_namelen);
	printf("\tiov %p, iovlen %d\n", msg->msg_iov, msg->msg_iovlen);
Michael Graff's avatar
Michael Graff committed
790
	for (i = 0 ; i < (unsigned int)msg->msg_iovlen ; i++)
791
792
793
		printf("\t\t%d\tbase %p, len %d\n", i,
		       msg->msg_iov[i].iov_base,
		       msg->msg_iov[i].iov_len);
Michael Graff's avatar
Michael Graff committed
794
795
796
797
#ifdef ISC_NET_BSD44MSGHDR
	printf("\tcontrol %p, controllen %d\n", msg->msg_control,
	       msg->msg_controllen);
#endif
798
799
800
}
#endif

Michael Graff's avatar
Michael Graff committed
801
802
803
804
805
#define DOIO_SUCCESS		0	/* i/o ok, event sent */
#define DOIO_SOFT		1	/* i/o ok, soft error, no event sent */
#define DOIO_HARD		2	/* i/o error, event sent */
#define DOIO_EOF		3	/* EOF, no event sent */

806
static int
807
doio_recv(isc_socket_t *sock, isc_socketevent_t *dev) {
808
	int cc;
809
	struct iovec iov[MAXSCATTERGATHER_RECV];
810
	size_t read_count;
811
	size_t actual_count;
812
	struct msghdr msghdr;
813
	isc_buffer_t *buffer;
814

815
	build_msghdr_recv(sock, dev, &msghdr, iov, &read_count);
816
817
818
819

#if defined(ISC_SOCKET_DEBUG)
	dump_msg(&msghdr);
#endif
820
821
822
823
824
825
826

	cc = recvmsg(sock->fd, &msghdr, 0);

	if (cc < 0) {
		if (SOFT_ERROR(errno))
			return (DOIO_SOFT);

827
828
829
830
		if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL))
			socket_log(sock, NULL, IOEVENT,
				   "doio_recv: recvmsg(%d) %d bytes, err %d/%s",
				   sock->fd, cc, errno, strerror(errno));
831

832
833
834
835
#define SOFT_OR_HARD(_system, _isc) \
	if (errno == _system) { \
		if (sock->connected) { \
			send_recvdone_event(sock, &dev, _isc); \
Michael Graff's avatar
Michael Graff committed
836
			return (DOIO_HARD); \
837
		} \
Michael Graff's avatar
Michael Graff committed
838
		return (DOIO_SOFT); \
839
	}
Michael Graff's avatar
Michael Graff committed
840
841
842
843
844
#define ALWAYS_HARD(_system, _isc) \
	if (errno == _system) { \
		send_recvdone_event(sock, &dev, _isc); \
		return (DOIO_HARD); \
	}
845
846

		SOFT_OR_HARD(ECONNREFUSED, ISC_R_CONNREFUSED);
847
848
		SOFT_OR_HARD(ENETUNREACH, ISC_R_NETUNREACH);
		SOFT_OR_HARD(EHOSTUNREACH, ISC_R_HOSTUNREACH);
Michael Graff's avatar
Michael Graff committed
849
		ALWAYS_HARD(ENOBUFS, ISC_R_NORESOURCES);
850

Michael Graff's avatar
Michael Graff committed
851
852
#undef SOFT_OR_HARD
#undef ALWAYS_HARD
853
854
855
856
857
858
859
860
861
862

		send_recvdone_event(sock, &dev, ISC_R_UNEXPECTED);
		return (DOIO_SUCCESS);
	}

	/*
	 * On TCP, zero length reads indicate EOF, while on
	 * UDP, zero length reads are perfectly valid, although
	 * strange.
	 */
863
	if ((sock->type == isc_sockettype_tcp) && (cc == 0))
864
865
		return (DOIO_EOF);

Michael Graff's avatar
Michael Graff committed
866
867
868
	if (sock->type == isc_sockettype_udp)
		dev->address.length = msghdr.msg_namelen;

Michael Graff's avatar
Michael Graff committed
869
870
	socket_log(sock, &dev->address, IOEVENT, "packet received correctly");

871
872
873
874
875
876
877
878
879
880
881
882
	/*
	 * Overflow bit detection.  If we received MORE bytes than we should,
	 * this indicates an overflow situation.  Set the flag in the
	 * dev entry and adjust how much we read by one.
	 */
#ifdef ISC_NET_RECVOVERFLOW
	if ((sock->type == isc_sockettype_udp) && ((size_t)cc > read_count)) {
		dev->attributes |= ISC_SOCKEVENTATTR_TRUNC;
		cc--;
	}
#endif

883
884
885
886
	/*
	 * If there are control messages attached, run through them and pull
	 * out the interesting bits.
	 */
Michael Graff's avatar
Michael Graff committed
887
888
	if (sock->type == isc_sockettype_udp)
		process_cmsg(sock, &msghdr, dev);
889

890
891
892
893
894
895
896
	/*
	 * update the buffers (if any) and the i/o count
	 */
	dev->n += cc;
	actual_count = cc;
	buffer = ISC_LIST_HEAD(dev->bufferlist);
	while (buffer != NULL && actual_count > 0) {
897
		REQUIRE(ISC_BUFFER_VALID(buffer));
898
899
		if (isc_buffer_availablelength(buffer) <= actual_count) {
			actual_count -= isc_buffer_availablelength(buffer);
900
			isc_buffer_add(buffer,
901
				       isc_buffer_availablelength(buffer));
902
903
904
905
906
907
908
909
910
911
912
		} else {
			isc_buffer_add(buffer, actual_count);
			actual_count = 0;
			break;
		}
		buffer = ISC_LIST_NEXT(buffer, link);
		if (buffer == NULL) {
			INSIST(actual_count == 0);
		}
	}

913
914
915
916
917
918
919
920
	/*
	 * If we read less than we expected, update counters,
	 * and let the upper layer poke the descriptor.
	 */
	if (((size_t)cc != read_count) && (dev->n < dev->minimum))
		return (DOIO_SOFT);

	/*
Andreas Gustafsson's avatar
Andreas Gustafsson committed
921
	 * Full reads are posted, or partials if partials are ok.
922
923
	 */
	send_recvdone_event(sock, &dev, ISC_R_SUCCESS);
Michael Graff's avatar
Michael Graff committed
924
925
926
	return (DOIO_SUCCESS);
}

Andreas Gustafsson's avatar
Andreas Gustafsson committed
927
928
929
930
931
932
933
934
935
936
937
938
939
/*
 * Returns:
 *	DOIO_SUCCESS	The operation succeeded.  The senddone event
 *			was sent.
 *
 *	DOIO_HARD	A hard or unexpected I/O error was encountered.
 *			The senddone event was sent.
 *
 *	DOIO_SOFT	A soft I/O error was encountered.  No senddone
 *			event was sent.  The operation should be retried.
 *
 *	No other return values are possible.
 */
Michael Graff's avatar
Michael Graff committed
940
static int
941
doio_send(isc_socket_t *sock, isc_socketevent_t *dev) {
Michael Graff's avatar
Michael Graff committed
942
	int cc;
943
	struct iovec iov[MAXSCATTERGATHER_SEND];
Michael Graff's avatar
Michael Graff committed
944
945
	size_t write_count;
	struct msghdr msghdr;
946
	char addrbuf[ISC_SOCKADDR_FORMATSIZE];
Michael Graff's avatar
Michael Graff committed
947

948
	build_msghdr_send(sock, dev, &msghdr, iov, &write_count);
Michael Graff's avatar
Michael Graff committed
949
950
951
952

	cc = sendmsg(sock->fd, &msghdr, 0);

	/*
Andreas Gustafsson's avatar
Andreas Gustafsson committed
953
	 * Check for error or block condition.
Michael Graff's avatar
Michael Graff committed
954
955
956
957
958
959
960
961
962
963
964
965
966
	 */
	if (cc < 0) {
		if (SOFT_ERROR(errno))
			return (DOIO_SOFT);

#define SOFT_OR_HARD(_system, _isc) \
	if (errno == _system) { \
		if (sock->connected) { \
			send_senddone_event(sock, &dev, _isc); \
			return (DOIO_HARD); \
		} \
		return (DOIO_SOFT); \
	}
Michael Graff's avatar
Michael Graff committed
967
968
969
970
971
#define ALWAYS_HARD(_system, _isc) \
	if (errno == _system) { \
		send_senddone_event(sock, &dev, _isc); \
		return (DOIO_HARD); \
	}
Michael Graff's avatar
Michael Graff committed
972

973
974
		SOFT_OR_HARD(EACCES, ISC_R_NOPERM);
		SOFT_OR_HARD(EAFNOSUPPORT, ISC_R_ADDRNOTAVAIL);
975
976
		SOFT_OR_HARD(ECONNREFUSED, ISC_R_CONNREFUSED);
		ALWAYS_HARD(EADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL);
977
		ALWAYS_HARD(EHOSTUNREACH, ISC_R_HOSTUNREACH);
978
979
980
#ifdef EHOSTDOWN
		ALWAYS_HARD(EHOSTDOWN, ISC_R_HOSTUNREACH);
#endif
981
		ALWAYS_HARD(ENETUNREACH, ISC_R_NETUNREACH);
Michael Graff's avatar
Michael Graff committed
982
		ALWAYS_HARD(ENOBUFS, ISC_R_NORESOURCES);
983
		ALWAYS_HARD(EPERM, ISC_R_HOSTUNREACH);
984
		ALWAYS_HARD(EPIPE, ISC_R_NOTCONNECTED);
Michael Graff's avatar
Michael Graff committed
985

Michael Graff's avatar
Michael Graff committed
986
987
#undef SOFT_OR_HARD
#undef ALWAYS_HARD
Michael Graff's avatar
Michael Graff committed
988
989
990
991
992
993
994
995
996
997

		/*
		 * The other error types depend on whether or not the
		 * socket is UDP or TCP.  If it is UDP, some errors
		 * that we expect to be fatal under TCP are merely
		 * annoying, and are really soft errors.
		 *
		 * However, these soft errors are still returned as
		 * a status.
		 */
998
		isc_sockaddr_format(&dev->address, addrbuf, sizeof(addrbuf));
Michael Graff's avatar
Michael Graff committed
999
		UNEXPECTED_ERROR(__FILE__, __LINE__,
1000
1001
				 "internal_send: %s: %s",
				 addrbuf, strerror(errno));
Michael Graff's avatar
Michael Graff committed
1002
1003
1004
1005
1006
1007
1008
1009
1010
		send_senddone_event(sock, &dev, ISC_R_UNEXPECTED);
		return (DOIO_HARD);
	}

	if (cc == 0)
		UNEXPECTED_ERROR(__FILE__, __LINE__,
				 "internal_send: send() returned 0");

	/*
Andreas Gustafsson's avatar
Andreas Gustafsson committed
1011
	 * If we write less than we expected, update counters, poke.
Michael Graff's avatar
Michael Graff committed
1012
1013
1014
1015
	 */
	dev->n += cc;
	if ((size_t)cc != write_count)
		return (DOIO_SOFT);
1016

Michael Graff's avatar
Michael Graff committed
1017
1018
1019
1020
1021
	/*
	 * Exactly what we wanted to write.  We're done with this
	 * entry.  Post its completion event.
	 */
	send_senddone_event(sock, &dev, ISC_R_SUCCESS);
1022
1023
1024
	return (DOIO_SUCCESS);
}

1025
1026
1027
/*
 * Kill.
 *
1028
1029
 * Caller must ensure that the socket is not locked and no external
 * references exist.
1030
1031
 */
static void
1032
destroy(isc_socket_t **sockp) {
Bob Halley's avatar
Bob Halley committed
1033
1034
	isc_socket_t *sock = *sockp;
	isc_socketmgr_t *manager = sock->manager;
1035

Michael Graff's avatar
Michael Graff committed
1036
	socket_log(sock, NULL, CREATION, "destroying");
Michael Graff's avatar
Michael Graff committed
1037

1038
1039
1040
1041
1042
	INSIST(ISC_LIST_EMPTY(sock->accept_list));
	INSIST(ISC_LIST_EMPTY(sock->recv_list));
	INSIST(ISC_LIST_EMPTY(sock->send_list));
	INSIST(sock->connect_ev == NULL);

1043
1044
1045
	LOCK(&manager->lock);

	/*
Bob Halley's avatar
Bob Halley committed
1046
	 * No one has this socket open, so the watcher doesn't have to be
Michael Graff's avatar
Michael Graff committed
1047
	 * poked, and the socket doesn't have to be locked.
1048
	 */
Michael Graff's avatar
Michael Graff committed
1049
	manager->fds[sock->fd] = NULL;
Michael Graff's avatar
Michael Graff committed
1050
	manager->fdstate[sock->fd] = CLOSE_PENDING;
1051
1052
	select_poke(manager, sock->fd);
	ISC_LIST_UNLINK(manager->socklist, sock, link);
Michael Graff's avatar
Michael Graff committed
1053

1054
#ifdef ISC_PLATFORM_USETHREADS
1055
	if (ISC_LIST_EMPTY(manager->socklist))
1056
		SIGNAL(&manager->shutdown_ok);
1057
#endif /* ISC_PLATFORM_USETHREADS */
1058

1059
1060
1061
1062
	/*
	 * XXX should reset manager->maxfd here
	 */

1063
1064
	UNLOCK(&manager->lock);

Michael Graff's avatar
Michael Graff committed
1065
	free_socket(sockp);
Michael Graff's avatar
Michael Graff committed
1066
1067
1068
}

static isc_result_t
Bob Halley's avatar
Bob Halley committed
1069
1070
allocate_socket(isc_socketmgr_t *manager, isc_sockettype_t type,
		isc_socket_t **socketp)
Michael Graff's avatar
Michael Graff committed
1071
{
Bob Halley's avatar
Bob Halley committed
1072
	isc_socket_t *sock;
1073
	isc_result_t ret;
Michael Graff's avatar
Michael Graff committed
1074
1075
1076
1077

	sock = isc_mem_get(manager->mctx, sizeof *sock);

	if (sock == NULL)
1078
		return (ISC_R_NOMEMORY);
Michael Graff's avatar
Michael Graff committed
1079

1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
#if USE_CMSG  /* Let's hope the OSs are sane, and pad correctly XXXMLG */
	sock->cmsglen = 0;
#ifdef ISC_PLATFORM_HAVEIPV6
	sock->cmsglen += CMSG_SPACE(sizeof(struct in6_pktinfo));
#endif
#ifdef SO_TIMESTAMP
	sock->cmsglen += CMSG_SPACE(sizeof(struct timeval));
#endif
	sock->cmsg = isc_mem_get(manager->mctx, sock->cmsglen);
	if (sock->cmsg == NULL) {
		ret = ISC_R_NOMEMORY;
		goto err1;
	}
#endif

1095
1096
1097
	ret = ISC_R_UNEXPECTED;

	sock->magic = 0;
Michael Graff's avatar
Michael Graff committed
1098
	sock->references = 0;
Michael Graff's avatar
Michael Graff committed
1099
1100
1101

	sock->manager = manager;
	sock->type = type;
1102
	sock->fd = -1;
Michael Graff's avatar
Michael Graff committed
1103

1104
1105
	ISC_LINK_INIT(sock, link);

Michael Graff's avatar
Michael Graff committed
1106
1107
1108
	/*
	 * set up list of readers and writers to be initially empty
	 */
1109
1110
	ISC_LIST_INIT(sock->recv_list);
	ISC_LIST_INIT(sock->send_list);
1111
	ISC_LIST_INIT(sock->accept_list);
Michael Graff's avatar
Michael Graff committed
1112
	sock->connect_ev = NULL;
Michael Graff's avatar
Michael Graff committed
1113
1114
1115
1116
1117
1118
	sock->pending_recv = 0;
	sock->pending_send = 0;
	sock->pending_accept = 0;
	sock->listener = 0;
	sock->connected = 0;
	sock->connecting = 0;
1119
	sock->bound = 0;
1120

Michael Graff's avatar
Michael Graff committed
1121
1122
1123
1124
1125
1126
1127
	/*