socket.c 59.7 KB
Newer Older
Bob Halley's avatar
Bob Halley committed
1
/*
2
 * Copyright (C) 1998, 1999  Internet Software Consortium.
Bob Halley's avatar
Bob Halley committed
3
4
5
6
7
8
9
10
11
12
13
14
15
16
 * 
 * Permission to use, copy, modify, and distribute this software for any
 * purpose with or without fee is hereby granted, provided that the above
 * copyright notice and this permission notice appear in all copies.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS
 * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE
 * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL
 * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR
 * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
 * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
 * SOFTWARE.
 */
Bob Halley's avatar
Bob Halley committed
17
18

#include <config.h>
19
20
21
22
23
24

#include <errno.h>
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
Michael Graff's avatar
Michael Graff committed
25
#include <fcntl.h>
26
27
28
29
30

#include <sys/types.h>
#include <sys/socket.h>

#include <isc/assertions.h>
31
#include <isc/error.h>
32
33
34
35
36
#include <isc/thread.h>
#include <isc/mutex.h>
#include <isc/condition.h>
#include <isc/socket.h>

37
#include "util.h"
Bob Halley's avatar
Bob Halley committed
38

39
40
41
42
#ifndef _WIN32
#define WINAPI /* we're not windows */
#endif

43
#define ISC_TASK_SEND(a, b) do { \
44
	RUNTIME_CHECK(isc_task_send(a, b) == ISC_R_SUCCESS); \
45
46
} while (0);

Michael Graff's avatar
Michael Graff committed
47
#define SOFT_ERROR(e)	((e) == EAGAIN || (e) == EWOULDBLOCK || (e) == EINTR)
48

Michael Graff's avatar
Michael Graff committed
49
#if 1
50
51
52
53
#define ISC_SOCKET_DEBUG
#endif

#if defined(ISC_SOCKET_DEBUG)
Michael Graff's avatar
Michael Graff committed
54
55
56
57
58
59
60
#define TRACE_WATCHER	0x0001
#define TRACE_LISTEN	0x0002
#define TRACE_CONNECT	0x0004
#define TRACE_RECV	0x0008
#define TRACE_SEND    	0x0010
#define TRACE_MANAGER	0x0020

61
int trace_level = 0xffffffff;
Michael Graff's avatar
Michael Graff committed
62
63
64
#define XTRACE(l, a)	if (l & trace_level) printf a
#define XENTER(l, a)	if (l & trace_level) printf("ENTER %s\n", (a))
#define XEXIT(l, a)	if (l & trace_level) printf("EXIT %s\n", (a))
65
#else
Michael Graff's avatar
Michael Graff committed
66
67
68
#define XTRACE(l, a)
#define XENTER(l, a)
#define XEXIT(l, a)
69
70
71
#endif

/*
Michael Graff's avatar
Michael Graff committed
72
73
 * internal event used to send readable/writable events to our internal
 * functions.
74
 */
Michael Graff's avatar
Michael Graff committed
75
typedef struct rwintev {
Bob Halley's avatar
Bob Halley committed
76
77
78
	isc_event_t			common;	   /* Sender is the socket. */
	isc_task_t *			task;	   /* task to send these to */
	isc_socketevent_t *		done_ev;   /* the done event to post */
Michael Graff's avatar
Michael Graff committed
79
	isc_boolean_t			partial;   /* partial i/o ok */
Michael Graff's avatar
Michael Graff committed
80
	isc_boolean_t			canceled;  /* I/O was canceled */
Michael Graff's avatar
Michael Graff committed
81
	isc_boolean_t			posted;	   /* event posted to task */
Michael Graff's avatar
Michael Graff committed
82
	LINK(struct rwintev)		link;	   /* next event */
Bob Halley's avatar
Bob Halley committed
83
} rwintev_t;
Michael Graff's avatar
Michael Graff committed
84
85

typedef struct ncintev {
Bob Halley's avatar
Bob Halley committed
86
87
88
	isc_event_t			common;	   /* Sender is the socket */
	isc_task_t *			task;	   /* task to send these to */
	isc_socket_newconnev_t *	done_ev;   /* the done event */
Michael Graff's avatar
Michael Graff committed
89
	isc_boolean_t			canceled;  /* accept was canceled */
Michael Graff's avatar
Michael Graff committed
90
	isc_boolean_t			posted;	   /* event posted to task */
Michael Graff's avatar
Michael Graff committed
91
	LINK(struct ncintev)		link;	   /* next event */
Bob Halley's avatar
Bob Halley committed
92
} ncintev_t;
Michael Graff's avatar
Michael Graff committed
93
94

typedef struct cnintev {
Bob Halley's avatar
Bob Halley committed
95
96
97
	isc_event_t			common;	   /* Sender is the socket */
	isc_task_t *			task;	   /* task to send these to */
	isc_socket_connev_t *		done_ev;   /* the done event */
Michael Graff's avatar
Michael Graff committed
98
	isc_boolean_t			canceled;  /* connect was canceled */
Michael Graff's avatar
Michael Graff committed
99
	isc_boolean_t			posted;	   /* event posted to task */
Bob Halley's avatar
Bob Halley committed
100
} cnintev_t;
Michael Graff's avatar
Michael Graff committed
101
102
103

#define SOCKET_MAGIC		0x494f696fU	/* IOio */
#define VALID_SOCKET(t)		((t) != NULL && (t)->magic == SOCKET_MAGIC)
Michael Graff's avatar
Michael Graff committed
104

105
106
107
struct isc_socket {
	/* Not locked. */
	unsigned int			magic;
Bob Halley's avatar
Bob Halley committed
108
	isc_socketmgr_t *		manager;
109
	isc_mutex_t			lock;
Michael Graff's avatar
Michael Graff committed
110
111
	isc_sockettype_t		type;

112
113
114
	/* Locked by socket lock. */
	unsigned int			references;
	int				fd;
115
116
	isc_result_t			recv_result;
	isc_result_t			send_result;
Bob Halley's avatar
Bob Halley committed
117
118
119
120
	LIST(rwintev_t)			recv_list;
	LIST(rwintev_t)			send_list;
	LIST(ncintev_t)			accept_list;
	cnintev_t *			connect_ev;
Michael Graff's avatar
Michael Graff committed
121
122
	isc_boolean_t			pending_recv;
	isc_boolean_t			pending_send;
Michael Graff's avatar
Michael Graff committed
123
	isc_boolean_t			pending_accept;
Michael Graff's avatar
Michael Graff committed
124
	isc_boolean_t			listener;  /* is a listener socket */
Michael Graff's avatar
Michael Graff committed
125
	isc_boolean_t			connected;
Michael Graff's avatar
Michael Graff committed
126
	isc_boolean_t			connecting; /* connect pending */
Bob Halley's avatar
Bob Halley committed
127
128
129
130
	rwintev_t *			riev; /* allocated recv intev */
	rwintev_t *			wiev; /* allocated send intev */
	cnintev_t *			ciev; /* allocated accept intev */
	isc_sockaddr_t			address;  /* remote address */
Michael Graff's avatar
Michael Graff committed
131
	int				addrlength; /* remote addrlen */
132
133
134
135
136
137
138
139
};

#define SOCKET_MANAGER_MAGIC		0x494f6d67U	/* IOmg */
#define VALID_MANAGER(m)		((m) != NULL && \
					 (m)->magic == SOCKET_MANAGER_MAGIC)
struct isc_socketmgr {
	/* Not locked. */
	unsigned int			magic;
Bob Halley's avatar
Bob Halley committed
140
	isc_mem_t *			mctx;
141
142
	isc_mutex_t			lock;
	/* Locked by manager lock. */
Michael Graff's avatar
Michael Graff committed
143
	unsigned int			nsockets;  /* sockets managed */
Michael Graff's avatar
Michael Graff committed
144
	isc_thread_t			watcher;
Michael Graff's avatar
Michael Graff committed
145
	fd_set				read_fds;
146
	fd_set				write_fds;
Bob Halley's avatar
Bob Halley committed
147
	isc_socket_t *			fds[FD_SETSIZE];
Michael Graff's avatar
Michael Graff committed
148
	int				fdstate[FD_SETSIZE];
149
	int				maxfd;
Michael Graff's avatar
Michael Graff committed
150
	int				pipe_fds[2];
151
152
};

Michael Graff's avatar
Michael Graff committed
153
154
155
156
#define CLOSED		0	/* this one must be zero */
#define MANAGED		1
#define CLOSE_PENDING	2

Bob Halley's avatar
Bob Halley committed
157
158
159
160
161
162
163
164
165
static void send_recvdone_event(isc_socket_t *, rwintev_t **,
				isc_socketevent_t **, isc_result_t);
static void send_senddone_event(isc_socket_t *, rwintev_t **,
				isc_socketevent_t **, isc_result_t);
static void done_event_destroy(isc_event_t *);
static void free_socket(isc_socket_t **);
static isc_result_t allocate_socket(isc_socketmgr_t *, isc_sockettype_t,
				    isc_socket_t **);
static void destroy(isc_socket_t **);
166
167
168
169
static void internal_accept(isc_task_t *, isc_event_t *);
static void internal_connect(isc_task_t *, isc_event_t *);
static void internal_recv(isc_task_t *, isc_event_t *);
static void internal_send(isc_task_t *, isc_event_t *);
Michael Graff's avatar
Michael Graff committed
170
171
172
173

#define SELECT_POKE_SHUTDOWN		(-1)
#define SELECT_POKE_NOTHING		(-2)
#define SELECT_POKE_RESCAN		(-3) /* XXX implement */
174
175

/*
Michael Graff's avatar
Michael Graff committed
176
177
178
 * Poke the select loop when there is something for us to do.
 * We assume that if a write completes here, it will be inserted into the
 * queue fully.  That is, we will not get partial writes.
179
180
 */
static void
Bob Halley's avatar
Bob Halley committed
181
select_poke(isc_socketmgr_t *mgr, int msg)
182
{
Michael Graff's avatar
Michael Graff committed
183
184
	int cc;

Michael Graff's avatar
Michael Graff committed
185
	cc = write(mgr->pipe_fds[1], &msg, sizeof(int));
Michael Graff's avatar
Michael Graff committed
186
187
188
189
	if (cc < 0) /* XXX need to handle EAGAIN, EINTR here */
		FATAL_ERROR(__FILE__, __LINE__,
			    "write() failed during watcher poke: %s",
			    strerror(errno));
190
191
192
193
194
}

/*
 * read a message on the internal fd.
 */
Michael Graff's avatar
Michael Graff committed
195
static int
Bob Halley's avatar
Bob Halley committed
196
select_readmsg(isc_socketmgr_t *mgr)
197
{
Michael Graff's avatar
Michael Graff committed
198
	int msg;
Michael Graff's avatar
Michael Graff committed
199
200
	int cc;

Michael Graff's avatar
Michael Graff committed
201
	cc = read(mgr->pipe_fds[0], &msg, sizeof(int));
Michael Graff's avatar
Michael Graff committed
202
	if (cc < 0) {
Michael Graff's avatar
Michael Graff committed
203
		if (SOFT_ERROR(errno))
Michael Graff's avatar
Michael Graff committed
204
			return (SELECT_POKE_NOTHING);
Michael Graff's avatar
Michael Graff committed
205

Michael Graff's avatar
Michael Graff committed
206
207
208
209
		FATAL_ERROR(__FILE__, __LINE__,
			    "read() failed during watcher poke: %s",
			    strerror(errno));

Michael Graff's avatar
Michael Graff committed
210
		return (SELECT_POKE_NOTHING);
Michael Graff's avatar
Michael Graff committed
211
	}
212

Michael Graff's avatar
Michael Graff committed
213
	return (msg);
214
215
216
}

/*
Michael Graff's avatar
Michael Graff committed
217
 * Make a fd non-blocking
218
 */
Michael Graff's avatar
Michael Graff committed
219
220
static isc_result_t
make_nonblock(int fd)
221
{
Michael Graff's avatar
Michael Graff committed
222
223
	int ret;
	int flags;
224

Michael Graff's avatar
Michael Graff committed
225
226
227
	flags = fcntl(fd, F_GETFL, 0);
	flags |= O_NONBLOCK;
	ret = fcntl(fd, F_SETFL, flags);
228

Michael Graff's avatar
Michael Graff committed
229
230
231
232
	if (ret == -1) {
		UNEXPECTED_ERROR(__FILE__, __LINE__,
				 "fcntl(%d, F_SETFL, %d): %s",
				 fd, flags, strerror(errno));
Michael Graff's avatar
Michael Graff committed
233

Michael Graff's avatar
Michael Graff committed
234
		return (ISC_R_UNEXPECTED);
Michael Graff's avatar
Michael Graff committed
235
236
	}

Michael Graff's avatar
Michael Graff committed
237
	return (ISC_R_SUCCESS);
238
239
}

240
#ifdef ISC_SOCKET_DEBUG
Michael Graff's avatar
Michael Graff committed
241
static void
Bob Halley's avatar
Bob Halley committed
242
socket_dump(isc_socket_t *sock)
Michael Graff's avatar
Michael Graff committed
243
{
Bob Halley's avatar
Bob Halley committed
244
245
	rwintev_t *	rwiev;
	ncintev_t *	aiev;
Michael Graff's avatar
Michael Graff committed
246
247
248
249
250
251
252

	printf("--------\nDump of socket %p\n", sock);
	printf("fd: %d, references %u\n", sock->fd, sock->references);

	printf("recv queue:\n");
	rwiev = HEAD(sock->recv_list);
	while (rwiev != NULL) {
Bob Halley's avatar
Bob Halley committed
253
254
		printf("\tintev %p, done_ev %p, task %p, "
		       "canceled %d, posted %d",
Michael Graff's avatar
Michael Graff committed
255
256
257
258
259
260
261
262
		       rwiev, rwiev->done_ev, rwiev->task, rwiev->canceled,
		       rwiev->posted);
		rwiev = NEXT(rwiev, link);
	}

	printf("send queue:\n");
	rwiev = HEAD(sock->send_list);
	while (rwiev != NULL) {
Bob Halley's avatar
Bob Halley committed
263
264
		printf("\tintev %p, done_ev %p, task %p, "
		       "canceled %d, posted %d",
Michael Graff's avatar
Michael Graff committed
265
266
267
268
269
270
271
272
		       rwiev, rwiev->done_ev, rwiev->task, rwiev->canceled,
		       rwiev->posted);
		rwiev = NEXT(rwiev, link);
	}

	printf("accept queue:\n");
	aiev = HEAD(sock->accept_list);
	while (aiev != NULL) {
Bob Halley's avatar
Bob Halley committed
273
274
		printf("\tintev %p, done_ev %p, task %p, "
		       "canceled %d, posted %d\n",
Michael Graff's avatar
Michael Graff committed
275
276
277
278
279
280
281
		       aiev, aiev->done_ev, aiev->task, aiev->canceled,
		       aiev->posted);
		aiev = NEXT(aiev, link);
	}

	printf("--------\n");
}
282
#endif
Michael Graff's avatar
Michael Graff committed
283

284
/*
Michael Graff's avatar
Michael Graff committed
285
 * Handle freeing a done event when needed.
286
 */
Michael Graff's avatar
Michael Graff committed
287
static void
Bob Halley's avatar
Bob Halley committed
288
done_event_destroy(isc_event_t *ev)
289
{
Bob Halley's avatar
Bob Halley committed
290
	isc_socket_t *sock = ev->sender;
Michael Graff's avatar
Michael Graff committed
291
	isc_boolean_t kill_socket = ISC_FALSE;
292
293
294
295
296
297

	/*
	 * detach from the socket.  We would have already detached from the
	 * task when we actually queue this event up.
	 */
	LOCK(&sock->lock);
Michael Graff's avatar
Michael Graff committed
298
		
299
300
	REQUIRE(sock->references > 0);
	sock->references--;
Michael Graff's avatar
Michael Graff committed
301
302
	XTRACE(TRACE_MANAGER, ("done_event_destroy: sock %p, ref cnt == %d\n",
			       sock, sock->references));
303

Michael Graff's avatar
Michael Graff committed
304
305
	if (sock->references == 0)
		kill_socket = ISC_TRUE;
306
	UNLOCK(&sock->lock);
Michael Graff's avatar
Michael Graff committed
307
308
309
	
	if (kill_socket)
		destroy(&sock);
310
311
}

312
313
314
315
316
317
/*
 * Kill.
 *
 * Caller must ensure locking.
 */
static void
Bob Halley's avatar
Bob Halley committed
318
destroy(isc_socket_t **sockp)
319
{
Bob Halley's avatar
Bob Halley committed
320
321
	isc_socket_t *sock = *sockp;
	isc_socketmgr_t *manager = sock->manager;
322

Michael Graff's avatar
Michael Graff committed
323
324
	XTRACE(TRACE_MANAGER,
	       ("destroy sockp = %p, sock = %p\n", sockp, sock));
Michael Graff's avatar
Michael Graff committed
325

Michael Graff's avatar
Michael Graff committed
326
327
328
329
330
331
332
333
334
	if (sock->riev)
		isc_event_free((isc_event_t **)&sock->riev);
	if (sock->wiev)
		isc_event_free((isc_event_t **)&sock->wiev);
	if (sock->ciev)
		isc_event_free((isc_event_t **)&sock->ciev);
	if (sock->connect_ev)
		isc_event_free((isc_event_t **)&sock->connect_ev);

335
336
337
	LOCK(&manager->lock);

	/*
Michael Graff's avatar
Michael Graff committed
338
	 * Noone has this socket open, so the watcher doesn't have to be
Michael Graff's avatar
Michael Graff committed
339
	 * poked, and the socket doesn't have to be locked.
340
	 */
Michael Graff's avatar
Michael Graff committed
341
	manager->fds[sock->fd] = NULL;
Michael Graff's avatar
Michael Graff committed
342
343
	manager->fdstate[sock->fd] = CLOSE_PENDING;
	select_poke(sock->manager, sock->fd);
Michael Graff's avatar
Michael Graff committed
344
	manager->nsockets--;
Michael Graff's avatar
Michael Graff committed
345
	XTRACE(TRACE_MANAGER, ("nsockets == %d\n", manager->nsockets));
346

347
348
349
350
	/*
	 * XXX should reset manager->maxfd here
	 */

351
352
	UNLOCK(&manager->lock);

Michael Graff's avatar
Michael Graff committed
353
	free_socket(sockp);
Michael Graff's avatar
Michael Graff committed
354
355
356
}

static isc_result_t
Bob Halley's avatar
Bob Halley committed
357
358
allocate_socket(isc_socketmgr_t *manager, isc_sockettype_t type,
		isc_socket_t **socketp)
Michael Graff's avatar
Michael Graff committed
359
{
Bob Halley's avatar
Bob Halley committed
360
	isc_socket_t *sock;
Michael Graff's avatar
Michael Graff committed
361
362
363
364

	sock = isc_mem_get(manager->mctx, sizeof *sock);

	if (sock == NULL)
365
		return (ISC_R_NOMEMORY);
Michael Graff's avatar
Michael Graff committed
366
367

	sock->magic = SOCKET_MAGIC;
Michael Graff's avatar
Michael Graff committed
368
	sock->references = 0;
Michael Graff's avatar
Michael Graff committed
369
370
371

	sock->manager = manager;
	sock->type = type;
372
	sock->fd = -1;
Michael Graff's avatar
Michael Graff committed
373
374
375
376

	/*
	 * set up list of readers and writers to be initially empty
	 */
Michael Graff's avatar
Michael Graff committed
377
378
379
	INIT_LIST(sock->recv_list);
	INIT_LIST(sock->send_list);
	INIT_LIST(sock->accept_list);
Michael Graff's avatar
Michael Graff committed
380
	sock->connect_ev = NULL;
Michael Graff's avatar
Michael Graff committed
381
382
	sock->pending_recv = ISC_FALSE;
	sock->pending_send = ISC_FALSE;
Michael Graff's avatar
Michael Graff committed
383
	sock->pending_accept = ISC_FALSE;
Michael Graff's avatar
Michael Graff committed
384
	sock->listener = ISC_FALSE;
Michael Graff's avatar
Michael Graff committed
385
	sock->connected = ISC_FALSE;
Michael Graff's avatar
Michael Graff committed
386
387
388
389
390
391
	sock->connecting = ISC_FALSE;
	sock->riev = NULL;
	sock->wiev = NULL;
	sock->ciev = NULL;

	sock->addrlength = 0;
Michael Graff's avatar
Michael Graff committed
392

393
394
395
	sock->recv_result = ISC_R_SUCCESS;
	sock->send_result = ISC_R_SUCCESS;

Michael Graff's avatar
Michael Graff committed
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
	/*
	 * initialize the lock
	 */
	if (isc_mutex_init(&sock->lock) != ISC_R_SUCCESS) {
		sock->magic = 0;
		isc_mem_put(manager->mctx, sock, sizeof *sock);
		UNEXPECTED_ERROR(__FILE__, __LINE__,
				 "isc_mutex_init() failed");
		return (ISC_R_UNEXPECTED);
	}

	*socketp = sock;

	return (ISC_R_SUCCESS);
}

/*
 * This event requires that the various lists be empty, that the reference
 * count be 1, and that the magic number is valid.  The other socket bits,
 * like the lock, must be initialized as well.  The fd associated must be
 * marked as closed, by setting it to -1 on close, or this routine will
 * also close the socket.
 */
static void
Bob Halley's avatar
Bob Halley committed
420
free_socket(isc_socket_t **socketp)
Michael Graff's avatar
Michael Graff committed
421
{
Bob Halley's avatar
Bob Halley committed
422
	isc_socket_t *sock = *socketp;
Michael Graff's avatar
Michael Graff committed
423

Michael Graff's avatar
Michael Graff committed
424
	REQUIRE(sock->references == 0);
Michael Graff's avatar
Michael Graff committed
425
	REQUIRE(VALID_SOCKET(sock));
Michael Graff's avatar
Michael Graff committed
426
	REQUIRE(!sock->connecting);
Michael Graff's avatar
Michael Graff committed
427
428
	REQUIRE(!sock->pending_recv);
	REQUIRE(!sock->pending_send);
Michael Graff's avatar
Michael Graff committed
429
	REQUIRE(!sock->pending_accept);
Michael Graff's avatar
Michael Graff committed
430
431
432
	REQUIRE(EMPTY(sock->recv_list));
	REQUIRE(EMPTY(sock->send_list));
	REQUIRE(EMPTY(sock->accept_list));
Michael Graff's avatar
Michael Graff committed
433

434
	sock->magic = 0;
Michael Graff's avatar
Michael Graff committed
435
436
437
438

	(void)isc_mutex_destroy(&sock->lock);

	isc_mem_put(sock->manager->mctx, sock, sizeof *sock);
Michael Graff's avatar
Michael Graff committed
439
440

	*socketp = NULL;
441
442
443
444
445
446
447
448
449
450
}

/*
 * Create a new 'type' socket managed by 'manager'.  The sockets
 * parameters are specified by 'expires' and 'interval'.  Events
 * will be posted to 'task' and when dispatched 'action' will be
 * called with 'arg' as the arg value.  The new socket is returned
 * in 'socketp'.
 */
isc_result_t
Bob Halley's avatar
Bob Halley committed
451
452
isc_socket_create(isc_socketmgr_t *manager, isc_sockettype_t type,
		  isc_socket_t **socketp)
453
{
Bob Halley's avatar
Bob Halley committed
454
	isc_socket_t *sock = NULL;
Michael Graff's avatar
Michael Graff committed
455
	isc_result_t ret;
456
457
458
459

	REQUIRE(VALID_MANAGER(manager));
	REQUIRE(socketp != NULL && *socketp == NULL);

Michael Graff's avatar
Michael Graff committed
460
	XENTER(TRACE_MANAGER, "isc_socket_create");
Michael Graff's avatar
Michael Graff committed
461
462
463
464
	
	ret = allocate_socket(manager, type, &sock);
	if (ret != ISC_R_SUCCESS)
		return (ret);
465
466
467
468
469
470
471
472
473
474

	switch (type) {
	case isc_socket_udp:
		sock->fd = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP);
		break;
	case isc_socket_tcp:
		sock->fd = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
		break;
	}
	if (sock->fd < 0) {
Michael Graff's avatar
Michael Graff committed
475
		free_socket(&sock);
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491

		switch (errno) {
		case EMFILE:
		case ENFILE:
		case ENOBUFS:
			return (ISC_R_NORESOURCES);
			break;
		default:
			UNEXPECTED_ERROR(__FILE__, __LINE__,
					 "socket() failed: %s",
					 strerror(errno));
			return (ISC_R_UNEXPECTED);
			break;
		}
	}

Michael Graff's avatar
Michael Graff committed
492
	if (make_nonblock(sock->fd) != ISC_R_SUCCESS) {
Michael Graff's avatar
Michael Graff committed
493
		free_socket(&sock);
Michael Graff's avatar
Michael Graff committed
494
495
496
		return (ISC_R_UNEXPECTED);
	}

497
498
499
	sock->references = 1;
	*socketp = sock;

500
501
502
503
504
505
506
	LOCK(&manager->lock);

	/*
	 * Note we don't have to lock the socket like we normally would because
	 * there are no external references to it yet.
	 */

Michael Graff's avatar
Michael Graff committed
507
	manager->fds[sock->fd] = sock;
Michael Graff's avatar
Michael Graff committed
508
	manager->fdstate[sock->fd] = MANAGED;
Michael Graff's avatar
Michael Graff committed
509
	manager->nsockets++;
Michael Graff's avatar
Michael Graff committed
510
	XTRACE(TRACE_MANAGER, ("nsockets == %d\n", manager->nsockets));
511
512
	if (manager->maxfd < sock->fd)
		manager->maxfd = sock->fd;
513
514
515

	UNLOCK(&manager->lock);

Michael Graff's avatar
Michael Graff committed
516
	XEXIT(TRACE_MANAGER, "isc_socket_create");
517

Michael Graff's avatar
Michael Graff committed
518
	return (ISC_R_SUCCESS);
519
520
521
522
523
524
}

/*
 * Attach to a socket.  Caller must explicitly detach when it is done.
 */
void
Bob Halley's avatar
Bob Halley committed
525
isc_socket_attach(isc_socket_t *sock, isc_socket_t **socketp)
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
{
	REQUIRE(VALID_SOCKET(sock));
	REQUIRE(socketp != NULL && *socketp == NULL);

	LOCK(&sock->lock);
	sock->references++;
	UNLOCK(&sock->lock);
	
	*socketp = sock;
}

/*
 * Dereference a socket.  If this is the last reference to it, clean things
 * up by destroying the socket.
 */
void 
Bob Halley's avatar
Bob Halley committed
542
isc_socket_detach(isc_socket_t **socketp)
543
{
Bob Halley's avatar
Bob Halley committed
544
	isc_socket_t *sock;
Michael Graff's avatar
Michael Graff committed
545
	isc_boolean_t kill_socket = ISC_FALSE;
546
547
548
549
550

	REQUIRE(socketp != NULL);
	sock = *socketp;
	REQUIRE(VALID_SOCKET(sock));

Michael Graff's avatar
Michael Graff committed
551
	XENTER(TRACE_MANAGER, "isc_socket_detach");
552
553
554
555
556

	LOCK(&sock->lock);
	REQUIRE(sock->references > 0);
	sock->references--;
	if (sock->references == 0)
Michael Graff's avatar
Michael Graff committed
557
		kill_socket = ISC_TRUE;
558
559
	UNLOCK(&sock->lock);
	
Michael Graff's avatar
Michael Graff committed
560
561
	if (kill_socket)
		destroy(&sock);
562

Michael Graff's avatar
Michael Graff committed
563
	XEXIT(TRACE_MANAGER, "isc_socket_detach");
564
565
566
567

	*socketp = NULL;
}

Michael Graff's avatar
Michael Graff committed
568
569
570
571
572
573
574
575
576
/*
 * I/O is possible on a given socket.  Schedule an event to this task that
 * will call an internal function to do the I/O.  This will charge the
 * task with the I/O operation and let our select loop handler get back
 * to doing something real as fast as possible.
 *
 * The socket and manager must be locked before calling this function.
 */
static void
Bob Halley's avatar
Bob Halley committed
577
dispatch_read(isc_socket_t *sock)
Michael Graff's avatar
Michael Graff committed
578
{
Bob Halley's avatar
Bob Halley committed
579
580
	rwintev_t *iev;
	isc_event_t *ev;
Michael Graff's avatar
Michael Graff committed
581

Michael Graff's avatar
Michael Graff committed
582
	iev = HEAD(sock->recv_list);
Bob Halley's avatar
Bob Halley committed
583
	ev = (isc_event_t *)iev;
Michael Graff's avatar
Michael Graff committed
584

Michael Graff's avatar
Michael Graff committed
585
	INSIST(!sock->pending_recv);
Michael Graff's avatar
Michael Graff committed
586

Michael Graff's avatar
Michael Graff committed
587
	sock->pending_recv = ISC_TRUE;
Michael Graff's avatar
Michael Graff committed
588

Michael Graff's avatar
Michael Graff committed
589
590
	XTRACE(TRACE_WATCHER, ("dispatch_read:  posted event %p to task %p\n",
			       ev, iev->task));
591

Michael Graff's avatar
Michael Graff committed
592
593
	iev->posted = ISC_TRUE;

594
	ISC_TASK_SEND(iev->task, &ev);
Michael Graff's avatar
Michael Graff committed
595
596
}

597
static void
Bob Halley's avatar
Bob Halley committed
598
dispatch_write(isc_socket_t *sock)
Michael Graff's avatar
Michael Graff committed
599
{
Bob Halley's avatar
Bob Halley committed
600
601
	rwintev_t *iev;
	isc_event_t *ev;
Michael Graff's avatar
Michael Graff committed
602

Michael Graff's avatar
Michael Graff committed
603
	iev = HEAD(sock->send_list);
Bob Halley's avatar
Bob Halley committed
604
	ev = (isc_event_t *)iev;
Michael Graff's avatar
Michael Graff committed
605

Michael Graff's avatar
Michael Graff committed
606
607
	INSIST(!sock->pending_send);
	sock->pending_send = ISC_TRUE;
Michael Graff's avatar
Michael Graff committed
608

Michael Graff's avatar
Michael Graff committed
609
610
	iev->posted = ISC_TRUE;

611
	ISC_TASK_SEND(iev->task, &ev);
Michael Graff's avatar
Michael Graff committed
612
613
}

614
static void
Bob Halley's avatar
Bob Halley committed
615
dispatch_listen(isc_socket_t *sock)
616
{
Bob Halley's avatar
Bob Halley committed
617
618
	ncintev_t *iev;
	isc_event_t *ev;
619

Michael Graff's avatar
Michael Graff committed
620
	iev = HEAD(sock->accept_list);
Bob Halley's avatar
Bob Halley committed
621
	ev = (isc_event_t *)iev;
622

Michael Graff's avatar
Michael Graff committed
623
	INSIST(!sock->pending_accept);
624

Michael Graff's avatar
Michael Graff committed
625
626
627
	sock->pending_accept = ISC_TRUE;

	iev->posted = ISC_TRUE;
628

629
	ISC_TASK_SEND(iev->task, &ev);
630
631
}

Michael Graff's avatar
Michael Graff committed
632
static void
Bob Halley's avatar
Bob Halley committed
633
dispatch_connect(isc_socket_t *sock)
Michael Graff's avatar
Michael Graff committed
634
{
Bob Halley's avatar
Bob Halley committed
635
	cnintev_t *iev;
Michael Graff's avatar
Michael Graff committed
636
637
638
639
640

	INSIST(sock->connecting);

	iev = sock->connect_ev;

Michael Graff's avatar
Michael Graff committed
641
642
	iev->posted = ISC_TRUE;

Bob Halley's avatar
Bob Halley committed
643
	ISC_TASK_SEND(iev->task, (isc_event_t **)&iev);
Michael Graff's avatar
Michael Graff committed
644
645
}

Michael Graff's avatar
Michael Graff committed
646
647
648
649
650
651
652
/*
 * Dequeue an item off the given socket's read queue, set the result code
 * in the done event to the one provided, and send it to the task it was
 * destined for.
 *
 * Caller must have the socket locked.
 */
Michael Graff's avatar
Michael Graff committed
653
static void
Bob Halley's avatar
Bob Halley committed
654
655
send_recvdone_event(isc_socket_t *sock, rwintev_t **iev,
		    isc_socketevent_t **dev, isc_result_t resultcode)
Michael Graff's avatar
Michael Graff committed
656
{
Michael Graff's avatar
Michael Graff committed
657
	REQUIRE(!EMPTY(sock->recv_list));
Michael Graff's avatar
Michael Graff committed
658
659
660
661
662
	REQUIRE(iev != NULL);
	REQUIRE(*iev != NULL);
	REQUIRE(dev != NULL);
	REQUIRE(*dev != NULL);

Michael Graff's avatar
Michael Graff committed
663
	DEQUEUE(sock->recv_list, *iev, link);
Michael Graff's avatar
Michael Graff committed
664
	(*dev)->result = resultcode;
Bob Halley's avatar
Bob Halley committed
665
	ISC_TASK_SEND((*iev)->task, (isc_event_t **)dev);
Michael Graff's avatar
Michael Graff committed
666
	isc_task_detach(&(*iev)->task);
Michael Graff's avatar
Michael Graff committed
667
	(*iev)->done_ev = NULL;
Bob Halley's avatar
Bob Halley committed
668
	isc_event_free((isc_event_t **)iev);
Michael Graff's avatar
Michael Graff committed
669
}
670
static void
Bob Halley's avatar
Bob Halley committed
671
672
send_senddone_event(isc_socket_t *sock, rwintev_t **iev,
		    isc_socketevent_t **dev, isc_result_t resultcode)
673
{
Michael Graff's avatar
Michael Graff committed
674
	REQUIRE(!EMPTY(sock->send_list));
675
676
677
678
679
	REQUIRE(iev != NULL);
	REQUIRE(*iev != NULL);
	REQUIRE(dev != NULL);
	REQUIRE(*dev != NULL);

Michael Graff's avatar
Michael Graff committed
680
	DEQUEUE(sock->send_list, *iev, link);
681
	(*dev)->result = resultcode;
Bob Halley's avatar
Bob Halley committed
682
	ISC_TASK_SEND((*iev)->task, (isc_event_t **)dev);
Michael Graff's avatar
Michael Graff committed
683
	isc_task_detach(&(*iev)->task);
684
	(*iev)->done_ev = NULL;
Bob Halley's avatar
Bob Halley committed
685
	isc_event_free((isc_event_t **)iev);
686
}
Michael Graff's avatar
Michael Graff committed
687

688
static void
Bob Halley's avatar
Bob Halley committed
689
690
send_ncdone_event(ncintev_t **iev,
		  isc_socket_newconnev_t **dev, isc_result_t resultcode)
691
692
693
694
695
696
697
{
	REQUIRE(iev != NULL);
	REQUIRE(*iev != NULL);
	REQUIRE(dev != NULL);
	REQUIRE(*dev != NULL);

	(*dev)->result = resultcode;
Michael Graff's avatar
Michael Graff committed
698
	(*dev)->common.destroy = done_event_destroy;
Bob Halley's avatar
Bob Halley committed
699
	ISC_TASK_SEND((*iev)->task, (isc_event_t **)dev);
Michael Graff's avatar
Michael Graff committed
700
	isc_task_detach(&(*iev)->task);
Michael Graff's avatar
Michael Graff committed
701
	(*iev)->done_ev = NULL;
702

Bob Halley's avatar
Bob Halley committed
703
	isc_event_free((isc_event_t **)iev);
704
705
}

Michael Graff's avatar
Michael Graff committed
706
707
708
709
710
711
712
/*
 * Call accept() on a socket, to get the new file descriptor.  The listen
 * socket is used as a prototype to create a new isc_socket_t.  The new
 * socket is referenced twice (one for the task which is receiving this
 * message, and once for the message itself) so the task does not need to
 * attach to the socket again.  The task is not attached at all.
 */
713
static void
Bob Halley's avatar
Bob Halley committed
714
internal_accept(isc_task_t *task, isc_event_t *ev)
Michael Graff's avatar
Michael Graff committed
715
{
Bob Halley's avatar
Bob Halley committed
716
717
718
719
	isc_socket_t *sock;
	isc_socketmgr_t *manager;
	isc_socket_newconnev_t *dev;
	ncintev_t *iev;
Michael Graff's avatar
Michael Graff committed
720
	struct sockaddr addr;
721
	u_int addrlen;
Michael Graff's avatar
Michael Graff committed
722
	int fd;
723
	isc_result_t result = ISC_R_SUCCESS;
Michael Graff's avatar
Michael Graff committed
724
725

	sock = ev->sender;
726
727
	REQUIRE(VALID_SOCKET(sock));

Bob Halley's avatar
Bob Halley committed
728
	iev = (ncintev_t *)ev;
729
730
	manager = sock->manager;
	REQUIRE(VALID_MANAGER(manager));
Michael Graff's avatar
Michael Graff committed
731
732

	LOCK(&sock->lock);
Michael Graff's avatar
Michael Graff committed
733
734
	XTRACE(TRACE_LISTEN,
	       ("internal_accept called, locked parent sock %p\n", sock));
Michael Graff's avatar
Michael Graff committed
735

Michael Graff's avatar
Michael Graff committed
736
	REQUIRE(sock->pending_accept);
Michael Graff's avatar
Michael Graff committed
737
	REQUIRE(sock->listener);
Michael Graff's avatar
Michael Graff committed
738
	REQUIRE(!EMPTY(sock->accept_list));
Michael Graff's avatar
Michael Graff committed
739
740
	REQUIRE(iev->task == task);

Michael Graff's avatar
Michael Graff committed
741
	sock->pending_accept = ISC_FALSE;
Michael Graff's avatar
Michael Graff committed
742

743
744
745
746
	/*
	 * Has this event been canceled?
	 */
	if (iev->canceled) {
Michael Graff's avatar
Michael Graff committed
747
		DEQUEUE(sock->accept_list, iev, link);
Bob Halley's avatar
Bob Halley committed
748
		isc_event_free((isc_event_t **)iev);
Michael Graff's avatar
Michael Graff committed
749
		if (!EMPTY(sock->accept_list))
750
751
752
753
			select_poke(sock->manager, sock->fd);

		UNLOCK(&sock->lock);

754
		return;
755
756
	}

Michael Graff's avatar
Michael Graff committed
757
758
	/*
	 * Try to accept the new connection.  If the accept fails with
Michael Graff's avatar
Michael Graff committed
759
	 * EAGAIN or EINTR, simply poke the watcher to watch this socket
Michael Graff's avatar
Michael Graff committed
760
761
	 * again.
	 */
762
	addrlen = sizeof(addr);
Michael Graff's avatar
Michael Graff committed
763
764
	fd = accept(sock->fd, &addr, &addrlen);
	if (fd < 0) {
Michael Graff's avatar
Michael Graff committed
765
		if (SOFT_ERROR(errno)) {
Michael Graff's avatar
Michael Graff committed
766
767
			select_poke(sock->manager, sock->fd);
			UNLOCK(&sock->lock);
768
			return;
Michael Graff's avatar
Michael Graff committed
769
770
771
772
		}

		/*
		 * If some other error, ignore it as well and hope
Michael Graff's avatar
Michael Graff committed
773
		 * for the best, but log it.
Michael Graff's avatar
Michael Graff committed
774
		 */
Michael Graff's avatar
Michael Graff committed
775
776
		XTRACE(TRACE_LISTEN, ("internal_accept: accept returned %s\n",
				      strerror(errno)));
777
778
779

		fd = -1;
		result = ISC_R_UNEXPECTED;
Michael Graff's avatar
Michael Graff committed
780
	}
781
782

	if (fd != -1 && (make_nonblock(fd) != ISC_R_SUCCESS)) {
783
		close(fd);
784
785
786
787
788
		fd = -1;

		result = ISC_R_UNEXPECTED;

		free_socket(&dev->newsocket);
789
	}
Michael Graff's avatar
Michael Graff committed
790

Michael Graff's avatar
Michael Graff committed
791
792
793
794
795
	DEQUEUE(sock->accept_list, iev, link);

	if (!EMPTY(sock->accept_list))
		select_poke(sock->manager, sock->fd);

Michael Graff's avatar
Michael Graff committed
796
797
	UNLOCK(&sock->lock);

Michael Graff's avatar
Michael Graff committed
798
799
800
801
802
	/*
	 * The accept succeeded.  Pull off the done event and set the
	 * fd and other information in the socket descriptor here.  These
	 * were preallocated for us.
	 */
Michael Graff's avatar
Michael Graff committed
803
804
	dev = iev->done_ev;
	iev->done_ev = NULL;
Michael Graff's avatar
Michael Graff committed
805

806
	/*
807
	 * -1 means the new socket didn't happen.
808
	 */
809
810
	if (fd != -1) {
		dev->newsocket->fd = fd;
811

812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
		/*
		 * Save away the remote address
		 */
		dev->newsocket->addrlength = addrlen;
		memcpy(&dev->newsocket->address, &addr, addrlen);
		dev->addrlength = addrlen;
		memcpy(&dev->address, &addr, addrlen);

		LOCK(&manager->lock);
		manager->fds[fd] = dev->newsocket;
		manager->fdstate[fd] = MANAGED;
		if (manager->maxfd < fd)
			manager->maxfd = fd;
		manager->nsockets++;
		UNLOCK(&manager->lock);

		XTRACE(TRACE_LISTEN, ("internal_accept: newsock %p, fd %d\n",
				      dev->newsocket, fd));
	}
Michael Graff's avatar
Michael Graff committed
831

832
	send_ncdone_event(&iev, &dev, result);
Michael Graff's avatar
Michael Graff committed
833
834
}

835
static void
Bob Halley's avatar
Bob Halley committed
836
internal_recv(isc_task_t *task, isc_event_t *ev)
837
{
Bob Halley's avatar
Bob Halley committed
838
839
840
	rwintev_t *iev;
	isc_socketevent_t *dev;
	isc_socket_t *sock;
Michael Graff's avatar
Michael Graff committed
841
842
	int cc;
	size_t read_count;
843
844
	struct sockaddr addr;
	u_int addrlen;
Michael Graff's avatar
Michael Graff committed
845
846
847
848

	/*
	 * Find out what socket this is and lock it.
	 */
Bob Halley's avatar
Bob Halley committed
849
	sock = (isc_socket_t *)ev->sender;
Michael Graff's avatar
Michael Graff committed
850
851
	LOCK(&sock->lock);

Michael Graff's avatar
Michael Graff committed
852
853
	INSIST(sock->pending_recv == ISC_TRUE);
	sock->pending_recv = ISC_FALSE;
Michael Graff's avatar
Michael Graff committed
854

Michael Graff's avatar
Michael Graff committed
855
	XTRACE(TRACE_RECV,
Michael Graff's avatar
Michael Graff committed
856
	       ("internal_recv: sock %p, fd %d\n", sock, sock->fd));
857

Michael Graff's avatar
Michael Graff committed
858
859
860
861
	/*
	 * Pull the first entry off the list, and look at it.  If it is
	 * NULL, or not ours, something bad happened.
	 */
Michael Graff's avatar
Michael Graff committed
862
	iev = HEAD(sock->recv_list);
Michael Graff's avatar
Michael Graff committed
863
864
865
	INSIST(iev != NULL);
	INSIST(iev->task == task);

Michael Graff's avatar
Michael Graff committed
866
867
868
869
870
871
872
	/*
	 * Try to do as much I/O as possible on this socket.  There are no
	 * limits here, currently.  If some sort of quantum read count is
	 * desired before giving up control, make certain to process markers
	 * regardless of quantum.
	 */
	do {
Michael Graff's avatar
Michael Graff committed
873
		iev = HEAD(sock->recv_list);
Michael Graff's avatar
Michael Graff committed
874
875
		dev = iev->done_ev;

Michael Graff's avatar
Michael Graff committed
876
		/*
877
		 * check for canceled I/O
Michael Graff's avatar
Michael Graff committed
878
		 */
879
		if (iev->canceled) {
Michael Graff's avatar
Michael Graff committed
880
			DEQUEUE(sock->recv_list, iev, link);
Bob Halley's avatar
Bob Halley committed
881
			isc_event_free((isc_event_t **)&iev);
Michael Graff's avatar
Michael Graff committed
882
			goto next;
Michael Graff's avatar
Michael Graff committed
883
884
		}

Michael Graff's avatar
Michael Graff committed
885
886
887
888
889
		/*
		 * If this is a marker event, post its completion and
		 * continue the loop.
		 */
		if (dev->common.type == ISC_SOCKEVENT_RECVMARK) {
890
891
			send_recvdone_event(sock, &iev, &dev,
					    sock->recv_result);
Michael Graff's avatar
Michael Graff committed
892
			goto next;
Michael Graff's avatar
Michael Graff committed
893
894
		}

Michael Graff's avatar
Michael Graff committed
895
896
897
898
		/*
		 * It must be a read request.  Try to satisfy it as best
		 * we can.
		 */
Michael Graff's avatar
Michael Graff committed
899
		read_count = dev->region.length - dev->n;
Michael Graff's avatar
Michael Graff committed
900
		if (sock->type == isc_socket_udp) {
901
902
903
904
905
906
907
			addrlen = sizeof(addr);
			cc = recvfrom(sock->fd, dev->region.base + dev->n,
				      read_count, 0,
				      (struct sockaddr *)&addr,
				      &addrlen);
			memcpy(&dev->address, &addr, addrlen);
			dev->addrlength = addrlen;
Michael Graff's avatar
Michael Graff committed
908
909
910
911
		} else {
			cc = recv(sock->fd, dev->region.base + dev->n,
				  read_count, 0);
			memcpy(&dev->address, &sock->address,
912
			       (size_t)sock->addrlength);
Michael Graff's avatar
Michael Graff committed
913
			dev->addrlength = sock->addrlength;
914
		}			
915

Michael Graff's avatar
Michael Graff committed
916
		XTRACE(TRACE_RECV,
Michael Graff's avatar
Michael Graff committed
917
		       ("internal_recv:  read(%d) %d\n", sock->fd, cc));
Michael Graff's avatar
Michael Graff committed
918
919
920
921
922

		/*
		 * check for error or block condition
		 */
		if (cc < 0) {
Michael Graff's avatar
Michael Graff committed
923
			if (SOFT_ERROR(errno))
Michael Graff's avatar
Michael Graff committed
924
				goto poke;
Michael Graff's avatar
Michael Graff committed
925

Michael Graff's avatar
Michael Graff committed
926
#if 0
Michael Graff's avatar
Michael Graff committed
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
#define SOFT_OR_HARD(_system, _isc) \
	if (errno == _system) { \
		if (sock->connected) { \
			if (sock->type == isc_socket_tcp) \
				sock->recv_result = _isc; \
			send_recvdone_event(sock, &iev, &dev, _isc); \
		} \
		goto next; \
	}

			SOFT_OR_HARD(ECONNREFUSED, ISC_R_CONNREFUSED);
			SOFT_OR_HARD(ENETUNREACH, ISC_R_NETUNREACH);
			SOFT_OR_HARD(EHOSTUNREACH, ISC_R_HOSTUNREACH);
#undef SOFT_OR_HARD

			/*
			 * This might not be a permanent error.
			 */
			if (errno == ENOBUFS) {
				send_recvdone_event(sock, &iev, &dev,
						    ISC_R_NORESOURCES);

				goto next;
			}
Michael Graff's avatar
Michael Graff committed
951
#endif
Michael Graff's avatar
Michael Graff committed
952

Michael Graff's avatar
Michael Graff committed
953
			UNEXPECTED_ERROR(__FILE__, __LINE__,
954
955
					 "internal read: %s", strerror(errno));

Michael Graff's avatar
Michael Graff committed
956
			sock->recv_result = ISC_R_UNEXPECTED;
957
			send_recvdone_event(sock, &iev, &dev,
Michael Graff's avatar
Michael Graff committed
958
					    ISC_R_UNEXPECTED);
959

Michael Graff's avatar
Michael Graff committed
960
			goto next;
Michael Graff's avatar
Michael Graff committed
961
		}
Michael Graff's avatar
Michael Graff committed
962

Michael Graff's avatar
Michael Graff committed
963
964
965
		/*
		 * read of 0 means the remote end was closed.  Run through
		 * the event queue and dispatch all the events with an EOF
Michael Graff's avatar
Michael Graff committed
966
967
		 * result code.  This will set the EOF flag in markers as
		 * well, but that's really ok.
Michael Graff's avatar
Michael Graff committed
968
969
970
		 */
		if (cc == 0) {
			do {
971
972
				send_recvdone_event(sock, &iev, &dev,
						    ISC_R_EOF);
Michael Graff's avatar
Michael Graff committed
973
				iev = HEAD(sock->recv_list);
Michael Graff's avatar
Michael Graff committed
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
			} while (iev != NULL);

			goto poke;
		}

		/*
		 * if we read less than we expected, update counters,
		 * poke.
		 */
		if ((size_t)cc < read_count) {
			dev->n += cc;

			/*
			 * If partial reads are allowed, we return whatever
			 * was read with a success result, and continue
			 * the loop.
			 */
			if (iev->partial) {
992
993
				send_recvdone_event(sock, &iev, &dev,
						    ISC_R_SUCCESS);
Michael Graff's avatar
Michael Graff committed
994
				goto next;
Michael Graff's avatar
Michael Graff committed
995
996
997
998
999
1000
1001
1002
1003
			}

			/*
			 * Partials not ok.  Exit the loop and notify the
			 * watcher to wait for more reads
			 */
			goto poke;
		}

1004
		/*
Michael Graff's avatar
Michael Graff committed
1005
1006
		 * Exactly what we wanted to read.  We're done with this
		 * entry.  Post its completion event.
1007
		 */
1008
1009
		if ((size_t)cc == read_count) {
			dev->n += read_count;
1010
			send_recvdone_event(sock, &iev, &dev, ISC_R_SUCCESS);
1011
		}
Michael Graff's avatar
Michael Graff committed
1012

Michael Graff's avatar
Michael Graff committed
1013
	next:
Michael Graff's avatar
Michael Graff committed
1014
		; /* some compilers need this here... */
Michael Graff's avatar
Michael Graff committed
1015
	} while (!EMPTY(sock->recv_list));
Michael Graff's avatar
Michael Graff committed
1016
1017

 poke:
Michael Graff's avatar
Michael Graff committed
1018
	if (!EMPTY(sock->recv_list))
Michael Graff's avatar
Michael Graff committed
1019
1020
1021
		select_poke(sock->manager, sock->fd);

	UNLOCK(&sock->lock);
Michael Graff's avatar