1290001Sglebius/*
2290001Sglebius * Copyright (C) 2004-2012  Internet Systems Consortium, Inc. ("ISC")
3290001Sglebius * Copyright (C) 2000-2003  Internet Software Consortium.
4290001Sglebius *
5290001Sglebius * Permission to use, copy, modify, and/or distribute this software for any
6290001Sglebius * purpose with or without fee is hereby granted, provided that the above
7290001Sglebius * copyright notice and this permission notice appear in all copies.
8290001Sglebius *
9290001Sglebius * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH
10290001Sglebius * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11290001Sglebius * AND FITNESS.  IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT,
12290001Sglebius * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13290001Sglebius * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
14290001Sglebius * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15290001Sglebius * PERFORMANCE OF THIS SOFTWARE.
16290001Sglebius */
17290001Sglebius
18290001Sglebius/* $Id$ */
19290001Sglebius
20290001Sglebius/* This code uses functions which are only available on Server 2003 and
21290001Sglebius * higher, and Windows XP and higher.
22290001Sglebius *
23290001Sglebius * This code is by nature multithreaded and takes advantage of various
24290001Sglebius * features to pass on information through the completion port for
25290001Sglebius * when I/O is completed.  All sends, receives, accepts, and connects are
26290001Sglebius * completed through the completion port.
27290001Sglebius *
28290001Sglebius * The number of Completion Port Worker threads used is the total number
29290001Sglebius * of CPU's + 1. This increases the likelihood that a Worker Thread is
30290001Sglebius * available for processing a completed request.
31290001Sglebius *
32290001Sglebius * XXXPDM 5 August, 2002
33290001Sglebius */
34290001Sglebius
35290001Sglebius#define MAKE_EXTERNAL 1
36290001Sglebius#include <config.h>
37290001Sglebius
38290001Sglebius#include <sys/types.h>
39290001Sglebius
40290001Sglebius#ifndef _WINSOCKAPI_
41290001Sglebius#define _WINSOCKAPI_   /* Prevent inclusion of winsock.h in windows.h */
42290001Sglebius#endif
43290001Sglebius
44290001Sglebius#include <errno.h>
45290001Sglebius#include <stddef.h>
46290001Sglebius#include <stdlib.h>
47290001Sglebius#include <string.h>
48290001Sglebius#include <unistd.h>
49290001Sglebius#include <io.h>
50290001Sglebius#include <fcntl.h>
51290001Sglebius#include <process.h>
52290001Sglebius
53290001Sglebius#include <isc/buffer.h>
54290001Sglebius#include <isc/bufferlist.h>
55290001Sglebius#include <isc/condition.h>
56290001Sglebius#include <isc/list.h>
57290001Sglebius#include <isc/log.h>
58290001Sglebius#include <isc/mem.h>
59290001Sglebius#include <isc/msgs.h>
60290001Sglebius#include <isc/mutex.h>
61290001Sglebius#include <isc/net.h>
62290001Sglebius#include <isc/once.h>
63290001Sglebius#include <isc/os.h>
64290001Sglebius#include <isc/platform.h>
65290001Sglebius#include <isc/print.h>
66290001Sglebius#include <isc/region.h>
67290001Sglebius#include <isc/socket.h>
68290001Sglebius#include <isc/stats.h>
69290001Sglebius#include <isc/strerror.h>
70290001Sglebius#include <isc/syslog.h>
71290001Sglebius#include <isc/task.h>
72290001Sglebius#include <isc/thread.h>
73290001Sglebius#include <isc/util.h>
74290001Sglebius#include <isc/win32os.h>
75290001Sglebius
76290001Sglebius#include <mswsock.h>
77290001Sglebius
78290001Sglebius#include "errno2result.h"
79290001Sglebius
80290001Sglebius/*
81290001Sglebius * How in the world can Microsoft exist with APIs like this?
82290001Sglebius * We can't actually call this directly, because it turns out
83290001Sglebius * no library exports this function.  Instead, we need to
84290001Sglebius * issue a runtime call to get the address.
85290001Sglebius */
86290001SglebiusLPFN_CONNECTEX ISCConnectEx;
87290001SglebiusLPFN_ACCEPTEX ISCAcceptEx;
88290001SglebiusLPFN_GETACCEPTEXSOCKADDRS ISCGetAcceptExSockaddrs;
89290001Sglebius
90290001Sglebius/*
91290001Sglebius * Run expensive internal consistency checks.
92290001Sglebius */
93290001Sglebius#ifdef ISC_SOCKET_CONSISTENCY_CHECKS
94290001Sglebius#define CONSISTENT(sock) consistent(sock)
95290001Sglebius#else
96290001Sglebius#define CONSISTENT(sock) do {} while (0)
97290001Sglebius#endif
98290001Sglebiusstatic void consistent(isc_socket_t *sock);
99290001Sglebius
100290001Sglebius/*
101290001Sglebius * Define this macro to control the behavior of connection
102290001Sglebius * resets on UDP sockets.  See Microsoft KnowledgeBase Article Q263823
103290001Sglebius * for details.
104290001Sglebius * NOTE: This requires that Windows 2000 systems install Service Pack 2
105290001Sglebius * or later.
106290001Sglebius */
107290001Sglebius#ifndef SIO_UDP_CONNRESET
108290001Sglebius#define SIO_UDP_CONNRESET _WSAIOW(IOC_VENDOR,12)
109290001Sglebius#endif
110290001Sglebius
111290001Sglebius/*
112290001Sglebius * Some systems define the socket length argument as an int, some as size_t,
113290001Sglebius * some as socklen_t.  This is here so it can be easily changed if needed.
114290001Sglebius */
115290001Sglebius#ifndef ISC_SOCKADDR_LEN_T
116290001Sglebius#define ISC_SOCKADDR_LEN_T unsigned int
117290001Sglebius#endif
118290001Sglebius
119290001Sglebius/*
120290001Sglebius * Define what the possible "soft" errors can be.  These are non-fatal returns
121290001Sglebius * of various network related functions, like recv() and so on.
122290001Sglebius */
123290001Sglebius#define SOFT_ERROR(e)	((e) == WSAEINTR || \
124290001Sglebius			 (e) == WSAEWOULDBLOCK || \
125290001Sglebius			 (e) == EWOULDBLOCK || \
126290001Sglebius			 (e) == EINTR || \
127290001Sglebius			 (e) == EAGAIN || \
128290001Sglebius			 (e) == 0)
129290001Sglebius
130290001Sglebius/*
131290001Sglebius * Pending errors are not really errors and should be
132290001Sglebius * kept separate
133290001Sglebius */
134290001Sglebius#define PENDING_ERROR(e) ((e) == WSA_IO_PENDING || (e) == 0)
135290001Sglebius
136290001Sglebius#define DOIO_SUCCESS	  0       /* i/o ok, event sent */
137290001Sglebius#define DOIO_SOFT	  1       /* i/o ok, soft error, no event sent */
138290001Sglebius#define DOIO_HARD	  2       /* i/o error, event sent */
139290001Sglebius#define DOIO_EOF	  3       /* EOF, no event sent */
140290001Sglebius#define DOIO_PENDING	  4       /* status when i/o is in process */
141290001Sglebius#define DOIO_NEEDMORE	  5       /* IO was processed, but we need more due to minimum */
142290001Sglebius
143290001Sglebius#define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x)
144290001Sglebius
145290001Sglebius/*
146290001Sglebius * DLVL(90)  --  Function entry/exit and other tracing.
147290001Sglebius * DLVL(70)  --  Socket "correctness" -- including returning of events, etc.
148290001Sglebius * DLVL(60)  --  Socket data send/receive
149290001Sglebius * DLVL(50)  --  Event tracing, including receiving/sending completion events.
150290001Sglebius * DLVL(20)  --  Socket creation/destruction.
151290001Sglebius */
152290001Sglebius#define TRACE_LEVEL		90
153290001Sglebius#define CORRECTNESS_LEVEL	70
154290001Sglebius#define IOEVENT_LEVEL		60
155290001Sglebius#define EVENT_LEVEL		50
156290001Sglebius#define CREATION_LEVEL		20
157290001Sglebius
158290001Sglebius#define TRACE		DLVL(TRACE_LEVEL)
159290001Sglebius#define CORRECTNESS	DLVL(CORRECTNESS_LEVEL)
160290001Sglebius#define IOEVENT		DLVL(IOEVENT_LEVEL)
161290001Sglebius#define EVENT		DLVL(EVENT_LEVEL)
162290001Sglebius#define CREATION	DLVL(CREATION_LEVEL)
163290001Sglebius
164290001Sglebiustypedef isc_event_t intev_t;
165290001Sglebius
166290001Sglebius/*
167290001Sglebius * Socket State
168290001Sglebius */
169290001Sglebiusenum {
170290001Sglebius  SOCK_INITIALIZED,	/* Socket Initialized */
171290001Sglebius  SOCK_OPEN,		/* Socket opened but nothing yet to do */
172290001Sglebius  SOCK_DATA,		/* Socket sending or receiving data */
173290001Sglebius  SOCK_LISTEN,		/* TCP Socket listening for connects */
174290001Sglebius  SOCK_ACCEPT,		/* TCP socket is waiting to accept */
175290001Sglebius  SOCK_CONNECT,		/* TCP Socket connecting */
176290001Sglebius  SOCK_CLOSED,		/* Socket has been closed */
177290001Sglebius};
178290001Sglebius
179290001Sglebius#define SOCKET_MAGIC		ISC_MAGIC('I', 'O', 'i', 'o')
180290001Sglebius#define VALID_SOCKET(t)		ISC_MAGIC_VALID(t, SOCKET_MAGIC)
181290001Sglebius
182290001Sglebius/*
183290001Sglebius * IPv6 control information.  If the socket is an IPv6 socket we want
184290001Sglebius * to collect the destination address and interface so the client can
185290001Sglebius * set them on outgoing packets.
186290001Sglebius */
187290001Sglebius#ifdef ISC_PLATFORM_HAVEIPV6
188290001Sglebius#ifndef USE_CMSG
189290001Sglebius#define USE_CMSG	1
190290001Sglebius#endif
191290001Sglebius#endif
192290001Sglebius
193290001Sglebius/*
194290001Sglebius * We really  don't want to try and use these control messages. Win32
195290001Sglebius * doesn't have this mechanism before XP.
196290001Sglebius */
197290001Sglebius#undef USE_CMSG
198290001Sglebius
199290001Sglebius/*
200290001Sglebius * Message header for recvmsg and sendmsg calls.
201290001Sglebius * Used value-result for recvmsg, value only for sendmsg.
202290001Sglebius */
203290001Sglebiusstruct msghdr {
204290001Sglebius	SOCKADDR_STORAGE to_addr;	/* UDP send/recv address */
205290001Sglebius	int      to_addr_len;		/* length of the address */
206290001Sglebius	WSABUF  *msg_iov;		/* scatter/gather array */
207290001Sglebius	u_int   msg_iovlen;             /* # elements in msg_iov */
208290001Sglebius	void	*msg_control;           /* ancillary data, see below */
209290001Sglebius	u_int   msg_controllen;         /* ancillary data buffer len */
210290001Sglebius	int	msg_totallen;		/* total length of this message */
211290001Sglebius} msghdr;
212290001Sglebius
213290001Sglebius/*
214290001Sglebius * The size to raise the receive buffer to.
215290001Sglebius */
216290001Sglebius#define RCVBUFSIZE (32*1024)
217290001Sglebius
218290001Sglebius/*
219290001Sglebius * The number of times a send operation is repeated if the result
220290001Sglebius * is WSAEINTR.
221290001Sglebius */
222290001Sglebius#define NRETRIES 10
223290001Sglebius
224290001Sglebiusstruct isc_socket {
225290001Sglebius	/* Not locked. */
226290001Sglebius	unsigned int		magic;
227290001Sglebius	isc_socketmgr_t	       *manager;
228290001Sglebius	isc_mutex_t		lock;
229290001Sglebius	isc_sockettype_t	type;
230290001Sglebius
231290001Sglebius	/* Pointers to scatter/gather buffers */
232290001Sglebius	WSABUF			iov[ISC_SOCKET_MAXSCATTERGATHER];
233290001Sglebius
234290001Sglebius	/* Locked by socket lock. */
235290001Sglebius	ISC_LINK(isc_socket_t)	link;
236290001Sglebius	unsigned int		references; /* EXTERNAL references */
237290001Sglebius	SOCKET			fd;	/* file handle */
238290001Sglebius	int			pf;	/* protocol family */
239290001Sglebius	char			name[16];
240290001Sglebius	void *			tag;
241290001Sglebius
242290001Sglebius	/*
243290001Sglebius	 * Each recv() call uses this buffer.  It is a per-socket receive
244290001Sglebius	 * buffer that allows us to decouple the system recv() from the
245290001Sglebius	 * recv_list done events.  This means the items on the recv_list
246290001Sglebius	 * can be removed without having to cancel pending system recv()
247290001Sglebius	 * calls.  It also allows us to read-ahead in some cases.
248290001Sglebius	 */
249290001Sglebius	struct {
250290001Sglebius		SOCKADDR_STORAGE	from_addr;	   // UDP send/recv address
251290001Sglebius		int		from_addr_len;	   // length of the address
252290001Sglebius		char		*base;		   // the base of the buffer
253290001Sglebius		char		*consume_position; // where to start copying data from next
254290001Sglebius		unsigned int	len;		   // the actual size of this buffer
255290001Sglebius		unsigned int	remaining;	   // the number of bytes remaining
256290001Sglebius	} recvbuf;
257290001Sglebius
258290001Sglebius	ISC_LIST(isc_socketevent_t)		send_list;
259290001Sglebius	ISC_LIST(isc_socketevent_t)		recv_list;
260290001Sglebius	ISC_LIST(isc_socket_newconnev_t)	accept_list;
261290001Sglebius	isc_socket_connev_t		       *connect_ev;
262290001Sglebius
263290001Sglebius	isc_sockaddr_t		address;  /* remote address */
264290001Sglebius
265290001Sglebius	unsigned int		listener : 1,	/* listener socket */
266290001Sglebius				connected : 1,
267290001Sglebius				pending_connect : 1, /* connect pending */
268290001Sglebius				bound : 1,	/* bound to local addr */
269290001Sglebius				dupped : 1;     /* created by isc_socket_dup() */
270290001Sglebius	unsigned int		pending_iocp;	/* Should equal the counters below. Debug. */
271290001Sglebius	unsigned int		pending_recv;  /* Number of outstanding recv() calls. */
272290001Sglebius	unsigned int		pending_send;  /* Number of outstanding send() calls. */
273290001Sglebius	unsigned int		pending_accept; /* Number of outstanding accept() calls. */
274290001Sglebius	unsigned int		state; /* Socket state. Debugging and consistency checking. */
275290001Sglebius	int			state_lineno;  /* line which last touched state */
276290001Sglebius};
277290001Sglebius
278290001Sglebius#define _set_state(sock, _state) do { (sock)->state = (_state); (sock)->state_lineno = __LINE__; } while (0)
279290001Sglebius
280290001Sglebius/*
281290001Sglebius * Buffer structure
282290001Sglebius */
283290001Sglebiustypedef struct buflist buflist_t;
284290001Sglebius
285290001Sglebiusstruct buflist {
286290001Sglebius	void			*buf;
287290001Sglebius	unsigned int		buflen;
288290001Sglebius	ISC_LINK(buflist_t)	link;
289290001Sglebius};
290290001Sglebius
291290001Sglebius/*
292290001Sglebius * I/O Completion ports Info structures
293290001Sglebius */
294290001Sglebius
295290001Sglebiusstatic HANDLE hHeapHandle = NULL;
296290001Sglebiustypedef struct IoCompletionInfo {
297290001Sglebius	OVERLAPPED		overlapped;
298290001Sglebius	isc_socketevent_t	*dev;  /* send()/recv() done event */
299290001Sglebius	isc_socket_connev_t	*cdev; /* connect() done event */
300290001Sglebius	isc_socket_newconnev_t	*adev; /* accept() done event */
301290001Sglebius	void			*acceptbuffer;
302290001Sglebius	DWORD			received_bytes;
303290001Sglebius	int			request_type;
304290001Sglebius	struct msghdr		messagehdr;
305290001Sglebius	ISC_LIST(buflist_t)	bufferlist;	/*%< list of buffers */
306290001Sglebius} IoCompletionInfo;
307290001Sglebius
308290001Sglebius/*
309290001Sglebius * Define a maximum number of I/O Completion Port worker threads
310290001Sglebius * to handle the load on the Completion Port. The actual number
311290001Sglebius * used is the number of CPU's + 1.
312290001Sglebius */
313290001Sglebius#define MAX_IOCPTHREADS 20
314290001Sglebius
315290001Sglebius#define SOCKET_MANAGER_MAGIC	ISC_MAGIC('I', 'O', 'm', 'g')
316290001Sglebius#define VALID_MANAGER(m)	ISC_MAGIC_VALID(m, SOCKET_MANAGER_MAGIC)
317290001Sglebius
318290001Sglebiusstruct isc_socketmgr {
319290001Sglebius	/* Not locked. */
320290001Sglebius	unsigned int			magic;
321290001Sglebius	isc_mem_t		       *mctx;
322290001Sglebius	isc_mutex_t			lock;
323290001Sglebius	isc_stats_t		       *stats;
324290001Sglebius
325290001Sglebius	/* Locked by manager lock. */
326290001Sglebius	ISC_LIST(isc_socket_t)		socklist;
327290001Sglebius	isc_boolean_t			bShutdown;
328290001Sglebius	isc_condition_t			shutdown_ok;
329290001Sglebius	HANDLE				hIoCompletionPort;
330290001Sglebius	int				maxIOCPThreads;
331290001Sglebius	HANDLE				hIOCPThreads[MAX_IOCPTHREADS];
332290001Sglebius	DWORD				dwIOCPThreadIds[MAX_IOCPTHREADS];
333290001Sglebius
334290001Sglebius	/*
335290001Sglebius	 * Debugging.
336290001Sglebius	 * Modified by InterlockedIncrement() and InterlockedDecrement()
337290001Sglebius	 */
338290001Sglebius	LONG				totalSockets;
339290001Sglebius	LONG				iocp_total;
340290001Sglebius};
341290001Sglebius
342290001Sglebiusenum {
343290001Sglebius	SOCKET_RECV,
344290001Sglebius	SOCKET_SEND,
345290001Sglebius	SOCKET_ACCEPT,
346290001Sglebius	SOCKET_CONNECT
347290001Sglebius};
348290001Sglebius
349290001Sglebius/*
350290001Sglebius * send() and recv() iovec counts
351290001Sglebius */
352290001Sglebius#define MAXSCATTERGATHER_SEND	(ISC_SOCKET_MAXSCATTERGATHER)
353290001Sglebius#define MAXSCATTERGATHER_RECV	(ISC_SOCKET_MAXSCATTERGATHER)
354290001Sglebius
355290001Sglebiusstatic isc_result_t socket_create(isc_socketmgr_t *manager0, int pf,
356290001Sglebius				  isc_sockettype_t type,
357290001Sglebius				  isc_socket_t **socketp,
358290001Sglebius				  isc_socket_t *dup_socket);
359290001Sglebiusstatic isc_threadresult_t WINAPI SocketIoThread(LPVOID ThreadContext);
360290001Sglebiusstatic void maybe_free_socket(isc_socket_t **, int);
361290001Sglebiusstatic void free_socket(isc_socket_t **, int);
362290001Sglebiusstatic isc_boolean_t senddone_is_active(isc_socket_t *sock, isc_socketevent_t *dev);
363290001Sglebiusstatic isc_boolean_t acceptdone_is_active(isc_socket_t *sock, isc_socket_newconnev_t *dev);
364290001Sglebiusstatic isc_boolean_t connectdone_is_active(isc_socket_t *sock, isc_socket_connev_t *dev);
365290001Sglebiusstatic void send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev);
366290001Sglebiusstatic void send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev);
367290001Sglebiusstatic void send_acceptdone_event(isc_socket_t *sock, isc_socket_newconnev_t **adev);
368290001Sglebiusstatic void send_connectdone_event(isc_socket_t *sock, isc_socket_connev_t **cdev);
369290001Sglebiusstatic void send_recvdone_abort(isc_socket_t *sock, isc_result_t result);
370290001Sglebiusstatic void queue_receive_event(isc_socket_t *sock, isc_task_t *task, isc_socketevent_t *dev);
371290001Sglebiusstatic void queue_receive_request(isc_socket_t *sock);
372290001Sglebius
373290001Sglebius/*
374290001Sglebius * This is used to dump the contents of the sock structure
375290001Sglebius * You should make sure that the sock is locked before
376290001Sglebius * dumping it. Since the code uses simple printf() statements
377290001Sglebius * it should only be used interactively.
378290001Sglebius */
379290001Sglebiusvoid
380290001Sglebiussock_dump(isc_socket_t *sock) {
381290001Sglebius	isc_socketevent_t *ldev;
382290001Sglebius	isc_socket_newconnev_t *ndev;
383290001Sglebius
384290001Sglebius#if 0
385290001Sglebius	isc_sockaddr_t addr;
386290001Sglebius	char socktext[256];
387290001Sglebius
388290001Sglebius	isc_socket_getpeername(sock, &addr);
389290001Sglebius	isc_sockaddr_format(&addr, socktext, sizeof(socktext));
390290001Sglebius	printf("Remote Socket: %s\n", socktext);
391290001Sglebius	isc_socket_getsockname(sock, &addr);
392290001Sglebius	isc_sockaddr_format(&addr, socktext, sizeof(socktext));
393290001Sglebius	printf("This Socket: %s\n", socktext);
394290001Sglebius#endif
395290001Sglebius
396290001Sglebius	printf("\n\t\tSock Dump\n");
397290001Sglebius	printf("\t\tfd: %u\n", sock->fd);
398290001Sglebius	printf("\t\treferences: %d\n", sock->references);
399290001Sglebius	printf("\t\tpending_accept: %d\n", sock->pending_accept);
400290001Sglebius	printf("\t\tconnecting: %d\n", sock->pending_connect);
401290001Sglebius	printf("\t\tconnected: %d\n", sock->connected);
402290001Sglebius	printf("\t\tbound: %d\n", sock->bound);
403290001Sglebius	printf("\t\tpending_iocp: %d\n", sock->pending_iocp);
404290001Sglebius	printf("\t\tsocket type: %d\n", sock->type);
405290001Sglebius
406290001Sglebius	printf("\n\t\tSock Recv List\n");
407290001Sglebius	ldev = ISC_LIST_HEAD(sock->recv_list);
408290001Sglebius	while (ldev != NULL) {
409290001Sglebius		printf("\t\tdev: %p\n", ldev);
410290001Sglebius		ldev = ISC_LIST_NEXT(ldev, ev_link);
411290001Sglebius	}
412290001Sglebius
413290001Sglebius	printf("\n\t\tSock Send List\n");
414290001Sglebius	ldev = ISC_LIST_HEAD(sock->send_list);
415290001Sglebius	while (ldev != NULL) {
416290001Sglebius		printf("\t\tdev: %p\n", ldev);
417290001Sglebius		ldev = ISC_LIST_NEXT(ldev, ev_link);
418290001Sglebius	}
419290001Sglebius
420290001Sglebius	printf("\n\t\tSock Accept List\n");
421290001Sglebius	ndev = ISC_LIST_HEAD(sock->accept_list);
422290001Sglebius	while (ndev != NULL) {
423290001Sglebius		printf("\t\tdev: %p\n", ldev);
424290001Sglebius		ndev = ISC_LIST_NEXT(ndev, ev_link);
425290001Sglebius	}
426290001Sglebius}
427290001Sglebius
428290001Sglebiusstatic void
429290001Sglebiussocket_log(int lineno, isc_socket_t *sock, isc_sockaddr_t *address,
430290001Sglebius	   isc_logcategory_t *category, isc_logmodule_t *module, int level,
431290001Sglebius	   isc_msgcat_t *msgcat, int msgset, int message,
432290001Sglebius	   const char *fmt, ...) ISC_FORMAT_PRINTF(9, 10);
433290001Sglebius
434290001Sglebius/*  This function will add an entry to the I/O completion port
435290001Sglebius *  that will signal the I/O thread to exit (gracefully)
436290001Sglebius */
437290001Sglebiusstatic void
438290001Sglebiussignal_iocompletionport_exit(isc_socketmgr_t *manager) {
439290001Sglebius	int i;
440290001Sglebius	int errval;
441290001Sglebius	char strbuf[ISC_STRERRORSIZE];
442290001Sglebius
443290001Sglebius	REQUIRE(VALID_MANAGER(manager));
444290001Sglebius	for (i = 0; i < manager->maxIOCPThreads; i++) {
445290001Sglebius		if (!PostQueuedCompletionStatus(manager->hIoCompletionPort,
446290001Sglebius						0, 0, 0)) {
447290001Sglebius			errval = GetLastError();
448290001Sglebius			isc__strerror(errval, strbuf, sizeof(strbuf));
449290001Sglebius			FATAL_ERROR(__FILE__, __LINE__,
450290001Sglebius				isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
451290001Sglebius				ISC_MSG_FAILED,
452290001Sglebius				"Can't request service thread to exit: %s"),
453290001Sglebius				strbuf);
454290001Sglebius		}
455290001Sglebius	}
456290001Sglebius}
457290001Sglebius
458290001Sglebius/*
459290001Sglebius * Create the worker threads for the I/O Completion Port
460290001Sglebius */
461290001Sglebiusvoid
462290001Sglebiusiocompletionport_createthreads(int total_threads, isc_socketmgr_t *manager) {
463290001Sglebius	int errval;
464290001Sglebius	char strbuf[ISC_STRERRORSIZE];
465290001Sglebius	int i;
466290001Sglebius
467290001Sglebius	INSIST(total_threads > 0);
468290001Sglebius	REQUIRE(VALID_MANAGER(manager));
469290001Sglebius	/*
470290001Sglebius	 * We need at least one
471290001Sglebius	 */
472290001Sglebius	for (i = 0; i < total_threads; i++) {
473290001Sglebius		manager->hIOCPThreads[i] = CreateThread(NULL, 0, SocketIoThread,
474290001Sglebius						manager, 0,
475290001Sglebius						&manager->dwIOCPThreadIds[i]);
476290001Sglebius		if (manager->hIOCPThreads[i] == NULL) {
477290001Sglebius			errval = GetLastError();
478290001Sglebius			isc__strerror(errval, strbuf, sizeof(strbuf));
479290001Sglebius			FATAL_ERROR(__FILE__, __LINE__,
480290001Sglebius				isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
481290001Sglebius				ISC_MSG_FAILED,
482290001Sglebius				"Can't create IOCP thread: %s"),
483290001Sglebius				strbuf);
484290001Sglebius			exit(1);
485290001Sglebius		}
486290001Sglebius	}
487290001Sglebius}
488290001Sglebius
489290001Sglebius/*
490290001Sglebius *  Create/initialise the I/O completion port
491290001Sglebius */
492290001Sglebiusvoid
493290001Sglebiusiocompletionport_init(isc_socketmgr_t *manager) {
494290001Sglebius	int errval;
495290001Sglebius	char strbuf[ISC_STRERRORSIZE];
496290001Sglebius
497290001Sglebius	REQUIRE(VALID_MANAGER(manager));
498290001Sglebius	/*
499290001Sglebius	 * Create a private heap to handle the socket overlapped structure
500290001Sglebius	 * The minimum number of structures is 10, there is no maximum
501290001Sglebius	 */
502290001Sglebius	hHeapHandle = HeapCreate(0, 10 * sizeof(IoCompletionInfo), 0);
503290001Sglebius	if (hHeapHandle == NULL) {
504290001Sglebius		errval = GetLastError();
505290001Sglebius		isc__strerror(errval, strbuf, sizeof(strbuf));
506290001Sglebius		FATAL_ERROR(__FILE__, __LINE__,
507290001Sglebius			    isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
508290001Sglebius					   ISC_MSG_FAILED,
509290001Sglebius					   "HeapCreate() failed during "
510290001Sglebius					   "initialization: %s"),
511290001Sglebius			    strbuf);
512290001Sglebius		exit(1);
513290001Sglebius	}
514290001Sglebius
515290001Sglebius	manager->maxIOCPThreads = min(isc_os_ncpus() + 1, MAX_IOCPTHREADS);
516290001Sglebius
517290001Sglebius	/* Now Create the Completion Port */
518290001Sglebius	manager->hIoCompletionPort = CreateIoCompletionPort(
519290001Sglebius			INVALID_HANDLE_VALUE, NULL,
520290001Sglebius			0, manager->maxIOCPThreads);
521290001Sglebius	if (manager->hIoCompletionPort == NULL) {
522290001Sglebius		errval = GetLastError();
523290001Sglebius		isc__strerror(errval, strbuf, sizeof(strbuf));
524290001Sglebius		FATAL_ERROR(__FILE__, __LINE__,
525290001Sglebius				isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
526290001Sglebius				ISC_MSG_FAILED,
527290001Sglebius				"CreateIoCompletionPort() failed "
528290001Sglebius				"during initialization: %s"),
529290001Sglebius				strbuf);
530290001Sglebius		exit(1);
531290001Sglebius	}
532290001Sglebius
533290001Sglebius	/*
534290001Sglebius	 * Worker threads for servicing the I/O
535290001Sglebius	 */
536290001Sglebius	iocompletionport_createthreads(manager->maxIOCPThreads, manager);
537290001Sglebius}
538290001Sglebius
539290001Sglebius/*
540290001Sglebius * Associate a socket with an IO Completion Port.  This allows us to queue events for it
541290001Sglebius * and have our worker pool of threads process them.
542290001Sglebius */
543290001Sglebiusvoid
544290001Sglebiusiocompletionport_update(isc_socket_t *sock) {
545290001Sglebius	HANDLE hiocp;
546290001Sglebius	char strbuf[ISC_STRERRORSIZE];
547290001Sglebius
548290001Sglebius	REQUIRE(VALID_SOCKET(sock));
549290001Sglebius
550290001Sglebius	hiocp = CreateIoCompletionPort((HANDLE)sock->fd,
551290001Sglebius		sock->manager->hIoCompletionPort, (ULONG_PTR)sock, 0);
552290001Sglebius
553290001Sglebius	if (hiocp == NULL) {
554290001Sglebius		DWORD errval = GetLastError();
555290001Sglebius		isc__strerror(errval, strbuf, sizeof(strbuf));
556290001Sglebius		isc_log_iwrite(isc_lctx,
557290001Sglebius				ISC_LOGCATEGORY_GENERAL,
558290001Sglebius				ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR,
559290001Sglebius				isc_msgcat, ISC_MSGSET_SOCKET,
560290001Sglebius				ISC_MSG_TOOMANYHANDLES,
561290001Sglebius				"iocompletionport_update: failed to open"
562290001Sglebius				" io completion port: %s",
563290001Sglebius				strbuf);
564290001Sglebius
565290001Sglebius		/* XXXMLG temporary hack to make failures detected.
566290001Sglebius		 * This function should return errors to the caller, not
567290001Sglebius		 * exit here.
568290001Sglebius		 */
569290001Sglebius		FATAL_ERROR(__FILE__, __LINE__,
570290001Sglebius				isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
571290001Sglebius				ISC_MSG_FAILED,
572290001Sglebius				"CreateIoCompletionPort() failed "
573290001Sglebius				"during initialization: %s"),
574290001Sglebius				strbuf);
575290001Sglebius		exit(1);
576290001Sglebius	}
577290001Sglebius
578290001Sglebius	InterlockedIncrement(&sock->manager->iocp_total);
579290001Sglebius}
580290001Sglebius
581290001Sglebius/*
582290001Sglebius * Routine to cleanup and then close the socket.
583290001Sglebius * Only close the socket here if it is NOT associated
584290001Sglebius * with an event, otherwise the WSAWaitForMultipleEvents
585290001Sglebius * may fail due to the fact that the Wait should not
586290001Sglebius * be running while closing an event or a socket.
587290001Sglebius * The socket is locked before calling this function
588290001Sglebius */
589290001Sglebiusvoid
590290001Sglebiussocket_close(isc_socket_t *sock) {
591290001Sglebius
592290001Sglebius	REQUIRE(sock != NULL);
593290001Sglebius
594290001Sglebius	if (sock->fd != INVALID_SOCKET) {
595290001Sglebius		closesocket(sock->fd);
596290001Sglebius		sock->fd = INVALID_SOCKET;
597290001Sglebius		_set_state(sock, SOCK_CLOSED);
598290001Sglebius		InterlockedDecrement(&sock->manager->totalSockets);
599290001Sglebius	}
600290001Sglebius}
601290001Sglebius
602290001Sglebiusstatic isc_once_t initialise_once = ISC_ONCE_INIT;
603290001Sglebiusstatic isc_boolean_t initialised = ISC_FALSE;
604290001Sglebius
605290001Sglebiusstatic void
606290001Sglebiusinitialise(void) {
607290001Sglebius	WORD wVersionRequested;
608290001Sglebius	WSADATA wsaData;
609290001Sglebius	int err;
610290001Sglebius	SOCKET sock;
611290001Sglebius	GUID GUIDConnectEx = WSAID_CONNECTEX;
612290001Sglebius	GUID GUIDAcceptEx = WSAID_ACCEPTEX;
613290001Sglebius	GUID GUIDGetAcceptExSockaddrs = WSAID_GETACCEPTEXSOCKADDRS;
614290001Sglebius	DWORD dwBytes;
615290001Sglebius
616290001Sglebius	/* Need Winsock 2.2 or better */
617290001Sglebius	wVersionRequested = MAKEWORD(2, 2);
618290001Sglebius
619290001Sglebius	err = WSAStartup(wVersionRequested, &wsaData);
620290001Sglebius	if (err != 0) {
621290001Sglebius		char strbuf[ISC_STRERRORSIZE];
622290001Sglebius		isc__strerror(err, strbuf, sizeof(strbuf));
623290001Sglebius		FATAL_ERROR(__FILE__, __LINE__, "WSAStartup() %s: %s",
624290001Sglebius			    isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
625290001Sglebius					   ISC_MSG_FAILED, "failed"),
626290001Sglebius			    strbuf);
627290001Sglebius		exit(1);
628290001Sglebius	}
629290001Sglebius	/*
630290001Sglebius	 * The following APIs do not exist as functions in a library, but we must
631290001Sglebius	 * ask winsock for them.  They are "extensions" -- but why they cannot be
632290001Sglebius	 * actual functions is beyond me.  So, ask winsock for the pointers to the
633290001Sglebius	 * functions we need.
634290001Sglebius	 */
635290001Sglebius	sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
636290001Sglebius	INSIST(sock != INVALID_SOCKET);
637290001Sglebius	err = WSAIoctl(sock,  SIO_GET_EXTENSION_FUNCTION_POINTER,
638290001Sglebius		 &GUIDConnectEx, sizeof(GUIDConnectEx),
639290001Sglebius		 &ISCConnectEx, sizeof(ISCConnectEx),
640290001Sglebius		 &dwBytes, NULL, NULL);
641290001Sglebius	INSIST(err == 0);
642290001Sglebius
643290001Sglebius	err = WSAIoctl(sock,  SIO_GET_EXTENSION_FUNCTION_POINTER,
644290001Sglebius		 &GUIDAcceptEx, sizeof(GUIDAcceptEx),
645290001Sglebius		 &ISCAcceptEx, sizeof(ISCAcceptEx),
646290001Sglebius		 &dwBytes, NULL, NULL);
647290001Sglebius	INSIST(err == 0);
648290001Sglebius
649290001Sglebius	err = WSAIoctl(sock,  SIO_GET_EXTENSION_FUNCTION_POINTER,
650290001Sglebius		 &GUIDGetAcceptExSockaddrs, sizeof(GUIDGetAcceptExSockaddrs),
651290001Sglebius		 &ISCGetAcceptExSockaddrs, sizeof(ISCGetAcceptExSockaddrs),
652290001Sglebius		 &dwBytes, NULL, NULL);
653290001Sglebius	INSIST(err == 0);
654290001Sglebius
655290001Sglebius	closesocket(sock);
656290001Sglebius
657290001Sglebius	initialised = ISC_TRUE;
658290001Sglebius}
659290001Sglebius
660290001Sglebius/*
661290001Sglebius * Initialize socket services
662290001Sglebius */
663290001Sglebiusvoid
664290001SglebiusInitSockets(void) {
665290001Sglebius	RUNTIME_CHECK(isc_once_do(&initialise_once,
666290001Sglebius				  initialise) == ISC_R_SUCCESS);
667290001Sglebius	if (!initialised)
668290001Sglebius		exit(1);
669290001Sglebius}
670290001Sglebius
671290001Sglebiusint
672290001Sglebiusinternal_sendmsg(isc_socket_t *sock, IoCompletionInfo *lpo,
673290001Sglebius		 struct msghdr *messagehdr, int flags, int *Error)
674290001Sglebius{
675290001Sglebius	int Result;
676290001Sglebius	DWORD BytesSent;
677290001Sglebius	DWORD Flags = flags;
678290001Sglebius	int total_sent;
679290001Sglebius
680290001Sglebius	*Error = 0;
681290001Sglebius	Result = WSASendTo(sock->fd, messagehdr->msg_iov,
682290001Sglebius			   messagehdr->msg_iovlen, &BytesSent,
683290001Sglebius			   Flags, (SOCKADDR *)&messagehdr->to_addr,
684290001Sglebius			   messagehdr->to_addr_len, (LPWSAOVERLAPPED)lpo,
685290001Sglebius			   NULL);
686290001Sglebius
687290001Sglebius	total_sent = (int)BytesSent;
688290001Sglebius
689290001Sglebius	/* Check for errors.*/
690290001Sglebius	if (Result == SOCKET_ERROR) {
691290001Sglebius		*Error = WSAGetLastError();
692290001Sglebius
693290001Sglebius		switch (*Error) {
694290001Sglebius		case WSA_IO_INCOMPLETE:
695290001Sglebius		case WSA_WAIT_IO_COMPLETION:
696290001Sglebius		case WSA_IO_PENDING:
697290001Sglebius		case NO_ERROR:		/* Strange, but okay */
698290001Sglebius			sock->pending_iocp++;
699290001Sglebius			sock->pending_send++;
700290001Sglebius			break;
701290001Sglebius
702290001Sglebius		default:
703290001Sglebius			return (-1);
704290001Sglebius			break;
705290001Sglebius		}
706290001Sglebius	} else {
707290001Sglebius		sock->pending_iocp++;
708290001Sglebius		sock->pending_send++;
709290001Sglebius	}
710290001Sglebius
711290001Sglebius	if (lpo != NULL)
712290001Sglebius		return (0);
713290001Sglebius	else
714290001Sglebius		return (total_sent);
715290001Sglebius}
716290001Sglebius
717290001Sglebiusstatic void
718290001Sglebiusqueue_receive_request(isc_socket_t *sock) {
719290001Sglebius	DWORD Flags = 0;
720290001Sglebius	DWORD NumBytes = 0;
721290001Sglebius	int total_bytes = 0;
722290001Sglebius	int Result;
723290001Sglebius	int Error;
724290001Sglebius	int need_retry;
725290001Sglebius	WSABUF iov[1];
726290001Sglebius	IoCompletionInfo *lpo = NULL;
727290001Sglebius	isc_result_t isc_result;
728290001Sglebius
729290001Sglebius retry:
730290001Sglebius	need_retry = ISC_FALSE;
731290001Sglebius
732290001Sglebius	/*
733290001Sglebius	 * If we already have a receive pending, do nothing.
734290001Sglebius	 */
735290001Sglebius	if (sock->pending_recv > 0) {
736290001Sglebius		if (lpo != NULL)
737290001Sglebius			HeapFree(hHeapHandle, 0, lpo);
738290001Sglebius		return;
739290001Sglebius	}
740290001Sglebius
741290001Sglebius	/*
742290001Sglebius	 * If no one is waiting, do nothing.
743290001Sglebius	 */
744290001Sglebius	if (ISC_LIST_EMPTY(sock->recv_list)) {
745290001Sglebius		if (lpo != NULL)
746290001Sglebius			HeapFree(hHeapHandle, 0, lpo);
747290001Sglebius		return;
748290001Sglebius	}
749290001Sglebius
750290001Sglebius	INSIST(sock->recvbuf.remaining == 0);
751290001Sglebius	INSIST(sock->fd != INVALID_SOCKET);
752290001Sglebius
753290001Sglebius	iov[0].len = sock->recvbuf.len;
754290001Sglebius	iov[0].buf = sock->recvbuf.base;
755290001Sglebius
756290001Sglebius	if (lpo == NULL) {
757290001Sglebius		lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
758290001Sglebius						    HEAP_ZERO_MEMORY,
759290001Sglebius						    sizeof(IoCompletionInfo));
760290001Sglebius		RUNTIME_CHECK(lpo != NULL);
761290001Sglebius	} else
762290001Sglebius		ZeroMemory(lpo, sizeof(IoCompletionInfo));
763290001Sglebius	lpo->request_type = SOCKET_RECV;
764290001Sglebius
765290001Sglebius	sock->recvbuf.from_addr_len = sizeof(sock->recvbuf.from_addr);
766290001Sglebius
767290001Sglebius	Error = 0;
768290001Sglebius	Result = WSARecvFrom((SOCKET)sock->fd, iov, 1,
769290001Sglebius			     &NumBytes, &Flags,
770290001Sglebius			     (SOCKADDR *)&sock->recvbuf.from_addr,
771290001Sglebius			     &sock->recvbuf.from_addr_len,
772290001Sglebius			     (LPWSAOVERLAPPED)lpo, NULL);
773290001Sglebius
774290001Sglebius	/* Check for errors. */
775290001Sglebius	if (Result == SOCKET_ERROR) {
776290001Sglebius		Error = WSAGetLastError();
777290001Sglebius
778290001Sglebius		switch (Error) {
779290001Sglebius		case WSA_IO_PENDING:
780290001Sglebius			sock->pending_iocp++;
781290001Sglebius			sock->pending_recv++;
782290001Sglebius			break;
783290001Sglebius
784290001Sglebius		/* direct error: no completion event */
785290001Sglebius		case ERROR_HOST_UNREACHABLE:
786290001Sglebius		case WSAENETRESET:
787290001Sglebius		case WSAECONNRESET:
788290001Sglebius			if (!sock->connected) {
789290001Sglebius				/* soft error */
790290001Sglebius				need_retry = ISC_TRUE;
791290001Sglebius				break;
792290001Sglebius			}
793290001Sglebius			/* FALLTHROUGH */
794290001Sglebius
795290001Sglebius		default:
796290001Sglebius			isc_result = isc__errno2result(Error);
797290001Sglebius			if (isc_result == ISC_R_UNEXPECTED)
798290001Sglebius				UNEXPECTED_ERROR(__FILE__, __LINE__,
799290001Sglebius					"WSARecvFrom: Windows error code: %d, isc result %d",
800290001Sglebius					Error, isc_result);
801290001Sglebius			send_recvdone_abort(sock, isc_result);
802290001Sglebius			HeapFree(hHeapHandle, 0, lpo);
803290001Sglebius			lpo = NULL;
804290001Sglebius			break;
805290001Sglebius		}
806290001Sglebius	} else {
807290001Sglebius		/*
808290001Sglebius		 * The recv() finished immediately, but we will still get
809290001Sglebius		 * a completion event.  Rather than duplicate code, let
810290001Sglebius		 * that thread handle sending the data along its way.
811290001Sglebius		 */
812290001Sglebius		sock->pending_iocp++;
813290001Sglebius		sock->pending_recv++;
814290001Sglebius	}
815290001Sglebius
816290001Sglebius	socket_log(__LINE__, sock, NULL, IOEVENT,
817290001Sglebius		   isc_msgcat, ISC_MSGSET_SOCKET,
818290001Sglebius		   ISC_MSG_DOIORECV,
819290001Sglebius		   "queue_io_request: fd %d result %d error %d",
820290001Sglebius		   sock->fd, Result, Error);
821290001Sglebius
822290001Sglebius	CONSISTENT(sock);
823290001Sglebius
824290001Sglebius	if (need_retry)
825290001Sglebius		goto retry;
826290001Sglebius}
827290001Sglebius
828290001Sglebiusstatic void
829290001Sglebiusmanager_log(isc_socketmgr_t *sockmgr, isc_logcategory_t *category,
830290001Sglebius	    isc_logmodule_t *module, int level, const char *fmt, ...)
831290001Sglebius{
832290001Sglebius	char msgbuf[2048];
833290001Sglebius	va_list ap;
834290001Sglebius
835290001Sglebius	if (!isc_log_wouldlog(isc_lctx, level))
836290001Sglebius		return;
837290001Sglebius
838290001Sglebius	va_start(ap, fmt);
839290001Sglebius	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
840290001Sglebius	va_end(ap);
841290001Sglebius
842290001Sglebius	isc_log_write(isc_lctx, category, module, level,
843290001Sglebius		      "sockmgr %p: %s", sockmgr, msgbuf);
844290001Sglebius}
845290001Sglebius
846290001Sglebiusstatic void
847290001Sglebiussocket_log(int lineno, isc_socket_t *sock, isc_sockaddr_t *address,
848290001Sglebius	   isc_logcategory_t *category, isc_logmodule_t *module, int level,
849290001Sglebius	   isc_msgcat_t *msgcat, int msgset, int message,
850290001Sglebius	   const char *fmt, ...)
851290001Sglebius{
852290001Sglebius	char msgbuf[2048];
853290001Sglebius	char peerbuf[256];
854290001Sglebius	va_list ap;
855290001Sglebius
856290001Sglebius
857290001Sglebius	if (!isc_log_wouldlog(isc_lctx, level))
858290001Sglebius		return;
859290001Sglebius
860290001Sglebius	va_start(ap, fmt);
861290001Sglebius	vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap);
862290001Sglebius	va_end(ap);
863290001Sglebius
864290001Sglebius	if (address == NULL) {
865290001Sglebius		isc_log_iwrite(isc_lctx, category, module, level,
866290001Sglebius			       msgcat, msgset, message,
867290001Sglebius			       "socket %p line %d: %s", sock, lineno, msgbuf);
868290001Sglebius	} else {
869290001Sglebius		isc_sockaddr_format(address, peerbuf, sizeof(peerbuf));
870290001Sglebius		isc_log_iwrite(isc_lctx, category, module, level,
871290001Sglebius			       msgcat, msgset, message,
872290001Sglebius				   "socket %p line %d peer %s: %s", sock, lineno,
873290001Sglebius				   peerbuf, msgbuf);
874290001Sglebius	}
875290001Sglebius
876290001Sglebius}
877290001Sglebius
878290001Sglebius/*
879290001Sglebius * Make an fd SOCKET non-blocking.
880290001Sglebius */
881290001Sglebiusstatic isc_result_t
882290001Sglebiusmake_nonblock(SOCKET fd) {
883290001Sglebius	int ret;
884290001Sglebius	unsigned long flags = 1;
885290001Sglebius	char strbuf[ISC_STRERRORSIZE];
886290001Sglebius
887290001Sglebius	/* Set the socket to non-blocking */
888290001Sglebius	ret = ioctlsocket(fd, FIONBIO, &flags);
889290001Sglebius
890290001Sglebius	if (ret == -1) {
891290001Sglebius		isc__strerror(errno, strbuf, sizeof(strbuf));
892290001Sglebius		UNEXPECTED_ERROR(__FILE__, __LINE__,
893290001Sglebius				 "ioctlsocket(%d, FIOBIO, %d): %s",
894290001Sglebius				 fd, flags, strbuf);
895290001Sglebius
896290001Sglebius		return (ISC_R_UNEXPECTED);
897290001Sglebius	}
898290001Sglebius
899290001Sglebius	return (ISC_R_SUCCESS);
900290001Sglebius}
901290001Sglebius
902290001Sglebius/*
903290001Sglebius * Windows 2000 systems incorrectly cause UDP sockets using WSARecvFrom
904290001Sglebius * to not work correctly, returning a WSACONNRESET error when a WSASendTo
905290001Sglebius * fails with an "ICMP port unreachable" response and preventing the
906290001Sglebius * socket from using the WSARecvFrom in subsequent operations.
907290001Sglebius * The function below fixes this, but requires that Windows 2000
908290001Sglebius * Service Pack 2 or later be installed on the system.  NT 4.0
909290001Sglebius * systems are not affected by this and work correctly.
910290001Sglebius * See Microsoft Knowledge Base Article Q263823 for details of this.
911290001Sglebius */
912290001Sglebiusisc_result_t
913290001Sglebiusconnection_reset_fix(SOCKET fd) {
914290001Sglebius	DWORD dwBytesReturned = 0;
915290001Sglebius	BOOL  bNewBehavior = FALSE;
916290001Sglebius	DWORD status;
917290001Sglebius
918290001Sglebius	if (isc_win32os_majorversion() < 5)
919290001Sglebius		return (ISC_R_SUCCESS); /*  NT 4.0 has no problem */
920290001Sglebius
921290001Sglebius	/* disable bad behavior using IOCTL: SIO_UDP_CONNRESET */
922290001Sglebius	status = WSAIoctl(fd, SIO_UDP_CONNRESET, &bNewBehavior,
923290001Sglebius			  sizeof(bNewBehavior), NULL, 0,
924290001Sglebius			  &dwBytesReturned, NULL, NULL);
925290001Sglebius	if (status != SOCKET_ERROR)
926290001Sglebius		return (ISC_R_SUCCESS);
927290001Sglebius	else {
928290001Sglebius		UNEXPECTED_ERROR(__FILE__, __LINE__,
929290001Sglebius				 "WSAIoctl(SIO_UDP_CONNRESET, oldBehaviour) %s",
930290001Sglebius				 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
931290001Sglebius						ISC_MSG_FAILED, "failed"));
932290001Sglebius		return (ISC_R_UNEXPECTED);
933290001Sglebius	}
934290001Sglebius}
935290001Sglebius
936290001Sglebius/*
937290001Sglebius * Construct an iov array and attach it to the msghdr passed in.  This is
938290001Sglebius * the SEND constructor, which will use the used region of the buffer
939290001Sglebius * (if using a buffer list) or will use the internal region (if a single
940290001Sglebius * buffer I/O is requested).
941290001Sglebius *
942290001Sglebius * Nothing can be NULL, and the done event must list at least one buffer
943290001Sglebius * on the buffer linked list for this function to be meaningful.
944290001Sglebius */
945290001Sglebiusstatic void
946290001Sglebiusbuild_msghdr_send(isc_socket_t *sock, isc_socketevent_t *dev,
947290001Sglebius		  struct msghdr *msg, char *cmsg, WSABUF *iov,
948290001Sglebius		  IoCompletionInfo  *lpo)
949290001Sglebius{
950290001Sglebius	unsigned int iovcount;
951290001Sglebius	isc_buffer_t *buffer;
952290001Sglebius	buflist_t  *cpbuffer;
953290001Sglebius	isc_region_t used;
954290001Sglebius	size_t write_count;
955290001Sglebius	size_t skip_count;
956290001Sglebius
957290001Sglebius	memset(msg, 0, sizeof(*msg));
958290001Sglebius
959290001Sglebius	memcpy(&msg->to_addr, &dev->address.type, dev->address.length);
960290001Sglebius	msg->to_addr_len = dev->address.length;
961290001Sglebius
962290001Sglebius	buffer = ISC_LIST_HEAD(dev->bufferlist);
963290001Sglebius	write_count = 0;
964290001Sglebius	iovcount = 0;
965290001Sglebius
966290001Sglebius	/*
967290001Sglebius	 * Single buffer I/O?  Skip what we've done so far in this region.
968290001Sglebius	 */
969290001Sglebius	if (buffer == NULL) {
970290001Sglebius		write_count = dev->region.length - dev->n;
971290001Sglebius		cpbuffer = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, sizeof(buflist_t));
972290001Sglebius		RUNTIME_CHECK(cpbuffer != NULL);
973290001Sglebius		cpbuffer->buf = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, write_count);
974290001Sglebius		RUNTIME_CHECK(cpbuffer->buf != NULL);
975290001Sglebius
976290001Sglebius		socket_log(__LINE__, sock, NULL, TRACE,
977290001Sglebius		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
978290001Sglebius		   "alloc_buffer %p %d %p %d", cpbuffer, sizeof(buflist_t),
979290001Sglebius		   cpbuffer->buf, write_count);
980290001Sglebius
981290001Sglebius		memcpy(cpbuffer->buf,(dev->region.base + dev->n), write_count);
982290001Sglebius		cpbuffer->buflen = write_count;
983290001Sglebius		ISC_LIST_ENQUEUE(lpo->bufferlist, cpbuffer, link);
984290001Sglebius		iov[0].buf = cpbuffer->buf;
985290001Sglebius		iov[0].len = write_count;
986290001Sglebius		iovcount = 1;
987290001Sglebius
988290001Sglebius		goto config;
989290001Sglebius	}
990290001Sglebius
991290001Sglebius	/*
992290001Sglebius	 * Multibuffer I/O.
993290001Sglebius	 * Skip the data in the buffer list that we have already written.
994290001Sglebius	 */
995290001Sglebius	skip_count = dev->n;
996290001Sglebius	while (buffer != NULL) {
997290001Sglebius		REQUIRE(ISC_BUFFER_VALID(buffer));
998290001Sglebius		if (skip_count < isc_buffer_usedlength(buffer))
999290001Sglebius			break;
1000290001Sglebius		skip_count -= isc_buffer_usedlength(buffer);
1001290001Sglebius		buffer = ISC_LIST_NEXT(buffer, link);
1002290001Sglebius	}
1003290001Sglebius
1004290001Sglebius	while (buffer != NULL) {
1005290001Sglebius		INSIST(iovcount < MAXSCATTERGATHER_SEND);
1006290001Sglebius
1007290001Sglebius		isc_buffer_usedregion(buffer, &used);
1008290001Sglebius
1009290001Sglebius		if (used.length > 0) {
1010290001Sglebius			int uselen = used.length - skip_count;
1011290001Sglebius			cpbuffer = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, sizeof(buflist_t));
1012290001Sglebius			RUNTIME_CHECK(cpbuffer != NULL);
1013290001Sglebius			cpbuffer->buf = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, uselen);
1014290001Sglebius			RUNTIME_CHECK(cpbuffer->buf != NULL);
1015290001Sglebius
1016290001Sglebius			socket_log(__LINE__, sock, NULL, TRACE,
1017290001Sglebius			   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
1018290001Sglebius			   "alloc_buffer %p %d %p %d", cpbuffer, sizeof(buflist_t),
1019290001Sglebius			   cpbuffer->buf, write_count);
1020290001Sglebius
1021290001Sglebius			memcpy(cpbuffer->buf,(used.base + skip_count), uselen);
1022290001Sglebius			cpbuffer->buflen = uselen;
1023290001Sglebius			iov[iovcount].buf = cpbuffer->buf;
1024290001Sglebius			iov[iovcount].len = used.length - skip_count;
1025290001Sglebius			write_count += uselen;
1026290001Sglebius			skip_count = 0;
1027290001Sglebius			iovcount++;
1028290001Sglebius		}
1029290001Sglebius		buffer = ISC_LIST_NEXT(buffer, link);
1030290001Sglebius	}
1031290001Sglebius
1032290001Sglebius	INSIST(skip_count == 0);
1033290001Sglebius
1034290001Sglebius config:
1035290001Sglebius	msg->msg_iov = iov;
1036290001Sglebius	msg->msg_iovlen = iovcount;
1037290001Sglebius	msg->msg_totallen = write_count;
1038290001Sglebius}
1039290001Sglebius
1040290001Sglebiusstatic void
1041290001Sglebiusset_dev_address(isc_sockaddr_t *address, isc_socket_t *sock,
1042290001Sglebius		isc_socketevent_t *dev)
1043290001Sglebius{
1044290001Sglebius	if (sock->type == isc_sockettype_udp) {
1045290001Sglebius		if (address != NULL)
1046290001Sglebius			dev->address = *address;
1047290001Sglebius		else
1048290001Sglebius			dev->address = sock->address;
1049290001Sglebius	} else if (sock->type == isc_sockettype_tcp) {
1050290001Sglebius		INSIST(address == NULL);
1051290001Sglebius		dev->address = sock->address;
1052290001Sglebius	}
1053290001Sglebius}
1054290001Sglebius
1055290001Sglebiusstatic void
1056290001Sglebiusdestroy_socketevent(isc_event_t *event) {
1057290001Sglebius	isc_socketevent_t *ev = (isc_socketevent_t *)event;
1058290001Sglebius
1059290001Sglebius	INSIST(ISC_LIST_EMPTY(ev->bufferlist));
1060290001Sglebius
1061290001Sglebius	(ev->destroy)(event);
1062290001Sglebius}
1063290001Sglebius
1064290001Sglebiusstatic isc_socketevent_t *
1065290001Sglebiusallocate_socketevent(isc_socket_t *sock, isc_eventtype_t eventtype,
1066290001Sglebius		     isc_taskaction_t action, const void *arg)
1067290001Sglebius{
1068290001Sglebius	isc_socketevent_t *ev;
1069290001Sglebius
1070290001Sglebius	ev = (isc_socketevent_t *)isc_event_allocate(sock->manager->mctx,
1071290001Sglebius						     sock, eventtype,
1072290001Sglebius						     action, arg,
1073290001Sglebius						     sizeof(*ev));
1074290001Sglebius	if (ev == NULL)
1075290001Sglebius		return (NULL);
1076290001Sglebius
1077290001Sglebius	ev->result = ISC_R_IOERROR; // XXXMLG temporary change to detect failure to set
1078290001Sglebius	ISC_LINK_INIT(ev, ev_link);
1079290001Sglebius	ISC_LIST_INIT(ev->bufferlist);
1080290001Sglebius	ev->region.base = NULL;
1081290001Sglebius	ev->n = 0;
1082290001Sglebius	ev->offset = 0;
1083290001Sglebius	ev->attributes = 0;
1084290001Sglebius	ev->destroy = ev->ev_destroy;
1085290001Sglebius	ev->ev_destroy = destroy_socketevent;
1086290001Sglebius
1087290001Sglebius	return (ev);
1088290001Sglebius}
1089290001Sglebius
1090290001Sglebius#if defined(ISC_SOCKET_DEBUG)
1091290001Sglebiusstatic void
1092290001Sglebiusdump_msg(struct msghdr *msg, isc_socket_t *sock) {
1093290001Sglebius	unsigned int i;
1094290001Sglebius
1095290001Sglebius	printf("MSGHDR %p, Socket #: %u\n", msg, sock->fd);
1096290001Sglebius	printf("\tname %p, namelen %d\n", msg->msg_name, msg->msg_namelen);
1097290001Sglebius	printf("\tiov %p, iovlen %d\n", msg->msg_iov, msg->msg_iovlen);
1098290001Sglebius	for (i = 0; i < (unsigned int)msg->msg_iovlen; i++)
1099290001Sglebius		printf("\t\t%d\tbase %p, len %d\n", i,
1100290001Sglebius		       msg->msg_iov[i].buf,
1101290001Sglebius		       msg->msg_iov[i].len);
1102290001Sglebius}
1103290001Sglebius#endif
1104290001Sglebius
1105290001Sglebius/*
1106290001Sglebius * map the error code
1107290001Sglebius */
1108290001Sglebiusint
1109290001Sglebiusmap_socket_error(isc_socket_t *sock, int windows_errno, int *isc_errno,
1110290001Sglebius		 char *errorstring, size_t bufsize) {
1111290001Sglebius
1112290001Sglebius	int doreturn;
1113290001Sglebius	switch (windows_errno) {
1114290001Sglebius	case WSAECONNREFUSED:
1115290001Sglebius		*isc_errno = ISC_R_CONNREFUSED;
1116290001Sglebius		if (sock->connected)
1117290001Sglebius			doreturn = DOIO_HARD;
1118290001Sglebius		else
1119290001Sglebius			doreturn = DOIO_SOFT;
1120290001Sglebius		break;
1121290001Sglebius	case WSAENETUNREACH:
1122290001Sglebius	case ERROR_NETWORK_UNREACHABLE:
1123290001Sglebius		*isc_errno = ISC_R_NETUNREACH;
1124290001Sglebius		if (sock->connected)
1125290001Sglebius			doreturn = DOIO_HARD;
1126290001Sglebius		else
1127290001Sglebius			doreturn = DOIO_SOFT;
1128290001Sglebius		break;
1129290001Sglebius	case ERROR_PORT_UNREACHABLE:
1130290001Sglebius	case ERROR_HOST_UNREACHABLE:
1131290001Sglebius	case WSAEHOSTUNREACH:
1132290001Sglebius		*isc_errno = ISC_R_HOSTUNREACH;
1133290001Sglebius		if (sock->connected)
1134290001Sglebius			doreturn = DOIO_HARD;
1135290001Sglebius		else
1136290001Sglebius			doreturn = DOIO_SOFT;
1137290001Sglebius		break;
1138290001Sglebius	case WSAENETDOWN:
1139290001Sglebius		*isc_errno = ISC_R_NETDOWN;
1140290001Sglebius		if (sock->connected)
1141290001Sglebius			doreturn = DOIO_HARD;
1142290001Sglebius		else
1143290001Sglebius			doreturn = DOIO_SOFT;
1144290001Sglebius		break;
1145290001Sglebius	case WSAEHOSTDOWN:
1146290001Sglebius		*isc_errno = ISC_R_HOSTDOWN;
1147290001Sglebius		if (sock->connected)
1148290001Sglebius			doreturn = DOIO_HARD;
1149290001Sglebius		else
1150290001Sglebius			doreturn = DOIO_SOFT;
1151290001Sglebius		break;
1152290001Sglebius	case WSAEACCES:
1153290001Sglebius		*isc_errno = ISC_R_NOPERM;
1154290001Sglebius		if (sock->connected)
1155290001Sglebius			doreturn = DOIO_HARD;
1156290001Sglebius		else
1157290001Sglebius			doreturn = DOIO_SOFT;
1158290001Sglebius		break;
1159290001Sglebius	case WSAECONNRESET:
1160290001Sglebius	case WSAENETRESET:
1161290001Sglebius	case WSAECONNABORTED:
1162290001Sglebius	case WSAEDISCON:
1163290001Sglebius		*isc_errno = ISC_R_CONNECTIONRESET;
1164290001Sglebius		if (sock->connected)
1165290001Sglebius			doreturn = DOIO_HARD;
1166290001Sglebius		else
1167290001Sglebius			doreturn = DOIO_SOFT;
1168290001Sglebius		break;
1169290001Sglebius	case WSAENOTCONN:
1170290001Sglebius		*isc_errno = ISC_R_NOTCONNECTED;
1171290001Sglebius		if (sock->connected)
1172290001Sglebius			doreturn = DOIO_HARD;
1173290001Sglebius		else
1174290001Sglebius			doreturn = DOIO_SOFT;
1175290001Sglebius		break;
1176290001Sglebius	case ERROR_OPERATION_ABORTED:
1177290001Sglebius	case ERROR_CONNECTION_ABORTED:
1178290001Sglebius	case ERROR_REQUEST_ABORTED:
1179290001Sglebius		*isc_errno = ISC_R_CONNECTIONRESET;
1180290001Sglebius		doreturn = DOIO_HARD;
1181290001Sglebius		break;
1182290001Sglebius	case WSAENOBUFS:
1183290001Sglebius		*isc_errno = ISC_R_NORESOURCES;
1184290001Sglebius		doreturn = DOIO_HARD;
1185290001Sglebius		break;
1186290001Sglebius	case WSAEAFNOSUPPORT:
1187290001Sglebius		*isc_errno = ISC_R_FAMILYNOSUPPORT;
1188290001Sglebius		doreturn = DOIO_HARD;
1189290001Sglebius		break;
1190290001Sglebius	case WSAEADDRNOTAVAIL:
1191290001Sglebius		*isc_errno = ISC_R_ADDRNOTAVAIL;
1192290001Sglebius		doreturn = DOIO_HARD;
1193290001Sglebius		break;
1194290001Sglebius	case WSAEDESTADDRREQ:
1195290001Sglebius		*isc_errno = ISC_R_BADADDRESSFORM;
1196290001Sglebius		doreturn = DOIO_HARD;
1197290001Sglebius		break;
1198290001Sglebius	case ERROR_NETNAME_DELETED:
1199290001Sglebius		*isc_errno = ISC_R_NETDOWN;
1200290001Sglebius		doreturn = DOIO_HARD;
1201290001Sglebius		break;
1202290001Sglebius	default:
1203290001Sglebius		*isc_errno = ISC_R_IOERROR;
1204290001Sglebius		doreturn = DOIO_HARD;
1205290001Sglebius		break;
1206290001Sglebius	}
1207290001Sglebius	if (doreturn == DOIO_HARD) {
1208290001Sglebius		isc__strerror(windows_errno, errorstring, bufsize);
1209290001Sglebius	}
1210290001Sglebius	return (doreturn);
1211290001Sglebius}
1212290001Sglebius
1213290001Sglebiusstatic void
1214290001Sglebiusfill_recv(isc_socket_t *sock, isc_socketevent_t *dev) {
1215290001Sglebius	isc_region_t r;
1216290001Sglebius	int copylen;
1217290001Sglebius	isc_buffer_t *buffer;
1218290001Sglebius
1219290001Sglebius	INSIST(dev->n < dev->minimum);
1220290001Sglebius	INSIST(sock->recvbuf.remaining > 0);
1221290001Sglebius	INSIST(sock->pending_recv == 0);
1222290001Sglebius
1223290001Sglebius	if (sock->type == isc_sockettype_udp) {
1224290001Sglebius		dev->address.length = sock->recvbuf.from_addr_len;
1225290001Sglebius		memcpy(&dev->address.type, &sock->recvbuf.from_addr,
1226290001Sglebius		    sock->recvbuf.from_addr_len);
1227290001Sglebius		if (isc_sockaddr_getport(&dev->address) == 0) {
1228290001Sglebius			if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) {
1229290001Sglebius				socket_log(__LINE__, sock, &dev->address, IOEVENT,
1230290001Sglebius					   isc_msgcat, ISC_MSGSET_SOCKET,
1231290001Sglebius					   ISC_MSG_ZEROPORT,
1232290001Sglebius					   "dropping source port zero packet");
1233290001Sglebius			}
1234290001Sglebius			sock->recvbuf.remaining = 0;
1235290001Sglebius			return;
1236290001Sglebius		}
1237290001Sglebius	} else if (sock->type == isc_sockettype_tcp) {
1238290001Sglebius		dev->address = sock->address;
1239290001Sglebius	}
1240290001Sglebius
1241290001Sglebius	/*
1242290001Sglebius	 * Run through the list of buffers we were given, and find the
1243290001Sglebius	 * first one with space.  Once it is found, loop through, filling
1244290001Sglebius	 * the buffers as much as possible.
1245290001Sglebius	 */
1246290001Sglebius	buffer = ISC_LIST_HEAD(dev->bufferlist);
1247290001Sglebius	if (buffer != NULL) { // Multi-buffer receive
1248290001Sglebius		while (buffer != NULL && sock->recvbuf.remaining > 0) {
1249290001Sglebius			REQUIRE(ISC_BUFFER_VALID(buffer));
1250290001Sglebius			if (isc_buffer_availablelength(buffer) > 0) {
1251290001Sglebius				isc_buffer_availableregion(buffer, &r);
1252290001Sglebius				copylen = min(r.length, sock->recvbuf.remaining);
1253290001Sglebius				memcpy(r.base, sock->recvbuf.consume_position, copylen);
1254290001Sglebius				sock->recvbuf.consume_position += copylen;
1255290001Sglebius				sock->recvbuf.remaining -= copylen;
1256290001Sglebius				isc_buffer_add(buffer, copylen);
1257290001Sglebius				dev->n += copylen;
1258290001Sglebius			}
1259290001Sglebius			buffer = ISC_LIST_NEXT(buffer, link);
1260290001Sglebius		}
1261290001Sglebius	} else { // Single-buffer receive
1262290001Sglebius		copylen = min(dev->region.length - dev->n, sock->recvbuf.remaining);
1263290001Sglebius		memcpy(dev->region.base + dev->n, sock->recvbuf.consume_position, copylen);
1264290001Sglebius		sock->recvbuf.consume_position += copylen;
1265290001Sglebius		sock->recvbuf.remaining -= copylen;
1266290001Sglebius		dev->n += copylen;
1267290001Sglebius	}
1268290001Sglebius
1269290001Sglebius	/*
1270290001Sglebius	 * UDP receives are all-consuming.  That is, if we have 4k worth of
1271290001Sglebius	 * data in our receive buffer, and the caller only gave us
1272290001Sglebius	 * 1k of space, we will toss the remaining 3k of data.  TCP
1273290001Sglebius	 * will keep the extra data around and use it for later requests.
1274290001Sglebius	 */
1275290001Sglebius	if (sock->type == isc_sockettype_udp)
1276290001Sglebius		sock->recvbuf.remaining = 0;
1277290001Sglebius}
1278290001Sglebius
1279290001Sglebius/*
1280290001Sglebius * Copy out as much data from the internal buffer to done events.
1281290001Sglebius * As each done event is filled, send it along its way.
1282290001Sglebius */
1283290001Sglebiusstatic void
1284290001Sglebiuscompleteio_recv(isc_socket_t *sock)
1285290001Sglebius{
1286290001Sglebius	isc_socketevent_t *dev;
1287290001Sglebius
1288290001Sglebius	/*
1289290001Sglebius	 * If we are in the process of filling our buffer, we cannot
1290290001Sglebius	 * touch it yet, so don't.
1291290001Sglebius	 */
1292290001Sglebius	if (sock->pending_recv > 0)
1293290001Sglebius		return;
1294290001Sglebius
1295290001Sglebius	while (sock->recvbuf.remaining > 0 && !ISC_LIST_EMPTY(sock->recv_list)) {
1296290001Sglebius		dev = ISC_LIST_HEAD(sock->recv_list);
1297290001Sglebius
1298290001Sglebius		/*
1299290001Sglebius		 * See if we have sufficient data in our receive buffer
1300290001Sglebius		 * to handle this.  If we do, copy out the data.
1301290001Sglebius		 */
1302290001Sglebius		fill_recv(sock, dev);
1303290001Sglebius
1304290001Sglebius		/*
1305290001Sglebius		 * Did we satisfy it?
1306290001Sglebius		 */
1307290001Sglebius		if (dev->n >= dev->minimum) {
1308290001Sglebius			dev->result = ISC_R_SUCCESS;
1309290001Sglebius			send_recvdone_event(sock, &dev);
1310290001Sglebius		}
1311290001Sglebius	}
1312290001Sglebius}
1313290001Sglebius
1314290001Sglebius/*
1315290001Sglebius * Returns:
1316290001Sglebius *	DOIO_SUCCESS	The operation succeeded.  dev->result contains
1317290001Sglebius *			ISC_R_SUCCESS.
1318290001Sglebius *
1319290001Sglebius *	DOIO_HARD	A hard or unexpected I/O error was encountered.
1320290001Sglebius *			dev->result contains the appropriate error.
1321290001Sglebius *
1322290001Sglebius *	DOIO_SOFT	A soft I/O error was encountered.  No senddone
1323290001Sglebius *			event was sent.  The operation should be retried.
1324290001Sglebius *
1325290001Sglebius *	No other return values are possible.
1326290001Sglebius */
1327290001Sglebiusstatic int
1328290001Sglebiuscompleteio_send(isc_socket_t *sock, isc_socketevent_t *dev,
1329290001Sglebius		struct msghdr *messagehdr, int cc, int send_errno)
1330290001Sglebius{
1331290001Sglebius	char addrbuf[ISC_SOCKADDR_FORMATSIZE];
1332290001Sglebius	char strbuf[ISC_STRERRORSIZE];
1333290001Sglebius
1334290001Sglebius	if (send_errno != 0) {
1335290001Sglebius		if (SOFT_ERROR(send_errno))
1336290001Sglebius			return (DOIO_SOFT);
1337290001Sglebius
1338290001Sglebius		return (map_socket_error(sock, send_errno, &dev->result,
1339290001Sglebius			strbuf, sizeof(strbuf)));
1340290001Sglebius
1341290001Sglebius		/*
1342290001Sglebius		 * The other error types depend on whether or not the
1343290001Sglebius		 * socket is UDP or TCP.  If it is UDP, some errors
1344290001Sglebius		 * that we expect to be fatal under TCP are merely
1345290001Sglebius		 * annoying, and are really soft errors.
1346290001Sglebius		 *
1347290001Sglebius		 * However, these soft errors are still returned as
1348290001Sglebius		 * a status.
1349290001Sglebius		 */
1350290001Sglebius		isc_sockaddr_format(&dev->address, addrbuf, sizeof(addrbuf));
1351290001Sglebius		isc__strerror(send_errno, strbuf, sizeof(strbuf));
1352290001Sglebius		UNEXPECTED_ERROR(__FILE__, __LINE__, "completeio_send: %s: %s",
1353290001Sglebius				 addrbuf, strbuf);
1354290001Sglebius		dev->result = isc__errno2result(send_errno);
1355290001Sglebius		return (DOIO_HARD);
1356290001Sglebius	}
1357290001Sglebius
1358290001Sglebius	/*
1359290001Sglebius	 * If we write less than we expected, update counters, poke.
1360290001Sglebius	 */
1361290001Sglebius	dev->n += cc;
1362290001Sglebius	if (cc != messagehdr->msg_totallen)
1363290001Sglebius		return (DOIO_SOFT);
1364290001Sglebius
1365290001Sglebius	/*
1366290001Sglebius	 * Exactly what we wanted to write.  We're done with this
1367290001Sglebius	 * entry.  Post its completion event.
1368290001Sglebius	 */
1369290001Sglebius	dev->result = ISC_R_SUCCESS;
1370290001Sglebius	return (DOIO_SUCCESS);
1371290001Sglebius}
1372290001Sglebius
1373290001Sglebiusstatic int
1374290001Sglebiusstartio_send(isc_socket_t *sock, isc_socketevent_t *dev, int *nbytes,
1375290001Sglebius	     int *send_errno)
1376290001Sglebius{
1377290001Sglebius	char *cmsg = NULL;
1378290001Sglebius	char strbuf[ISC_STRERRORSIZE];
1379290001Sglebius	IoCompletionInfo *lpo;
1380290001Sglebius	int status;
1381290001Sglebius	struct msghdr *msghdr;
1382290001Sglebius
1383290001Sglebius	lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
1384290001Sglebius					    HEAP_ZERO_MEMORY,
1385290001Sglebius					    sizeof(IoCompletionInfo));
1386290001Sglebius	RUNTIME_CHECK(lpo != NULL);
1387290001Sglebius	lpo->request_type = SOCKET_SEND;
1388290001Sglebius	lpo->dev = dev;
1389290001Sglebius	msghdr = &lpo->messagehdr;
1390290001Sglebius	memset(msghdr, 0, sizeof(struct msghdr));
1391290001Sglebius	ISC_LIST_INIT(lpo->bufferlist);
1392290001Sglebius
1393290001Sglebius	build_msghdr_send(sock, dev, msghdr, cmsg, sock->iov, lpo);
1394290001Sglebius
1395290001Sglebius	*nbytes = internal_sendmsg(sock, lpo, msghdr, 0, send_errno);
1396290001Sglebius
1397290001Sglebius	if (*nbytes < 0) {
1398290001Sglebius		/*
1399290001Sglebius		 * I/O has been initiated
1400290001Sglebius		 * completion will be through the completion port
1401290001Sglebius		 */
1402290001Sglebius		if (PENDING_ERROR(*send_errno)) {
1403290001Sglebius			status = DOIO_PENDING;
1404290001Sglebius			goto done;
1405290001Sglebius		}
1406290001Sglebius
1407290001Sglebius		if (SOFT_ERROR(*send_errno)) {
1408290001Sglebius			status = DOIO_SOFT;
1409290001Sglebius			goto done;
1410290001Sglebius		}
1411290001Sglebius
1412290001Sglebius		/*
1413290001Sglebius		 * If we got this far then something is wrong
1414290001Sglebius		 */
1415290001Sglebius		if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) {
1416290001Sglebius			isc__strerror(*send_errno, strbuf, sizeof(strbuf));
1417290001Sglebius			socket_log(__LINE__, sock, NULL, IOEVENT,
1418290001Sglebius				   isc_msgcat, ISC_MSGSET_SOCKET,
1419290001Sglebius				   ISC_MSG_INTERNALSEND,
1420290001Sglebius				   "startio_send: internal_sendmsg(%d) %d "
1421290001Sglebius				   "bytes, err %d/%s",
1422290001Sglebius				   sock->fd, *nbytes, *send_errno, strbuf);
1423290001Sglebius		}
1424290001Sglebius		status = DOIO_HARD;
1425290001Sglebius		goto done;
1426290001Sglebius	}
1427290001Sglebius	dev->result = ISC_R_SUCCESS;
1428290001Sglebius	status = DOIO_SOFT;
1429290001Sglebius done:
1430290001Sglebius	_set_state(sock, SOCK_DATA);
1431290001Sglebius	return (status);
1432290001Sglebius}
1433290001Sglebius
1434290001Sglebiusstatic isc_result_t
1435290001Sglebiusallocate_socket(isc_socketmgr_t *manager, isc_sockettype_t type,
1436290001Sglebius		isc_socket_t **socketp) {
1437290001Sglebius	isc_socket_t *sock;
1438290001Sglebius	isc_result_t result;
1439290001Sglebius
1440290001Sglebius	sock = isc_mem_get(manager->mctx, sizeof(*sock));
1441290001Sglebius
1442290001Sglebius	if (sock == NULL)
1443290001Sglebius		return (ISC_R_NOMEMORY);
1444290001Sglebius
1445290001Sglebius	sock->magic = 0;
1446290001Sglebius	sock->references = 0;
1447290001Sglebius
1448290001Sglebius	sock->manager = manager;
1449290001Sglebius	sock->type = type;
1450290001Sglebius	sock->fd = INVALID_SOCKET;
1451290001Sglebius
1452290001Sglebius	ISC_LINK_INIT(sock, link);
1453290001Sglebius
1454290001Sglebius	/*
1455290001Sglebius	 * set up list of readers and writers to be initially empty
1456290001Sglebius	 */
1457290001Sglebius	ISC_LIST_INIT(sock->recv_list);
1458290001Sglebius	ISC_LIST_INIT(sock->send_list);
1459290001Sglebius	ISC_LIST_INIT(sock->accept_list);
1460290001Sglebius	sock->connect_ev = NULL;
1461290001Sglebius	sock->pending_accept = 0;
1462290001Sglebius	sock->pending_recv = 0;
1463290001Sglebius	sock->pending_send = 0;
1464290001Sglebius	sock->pending_iocp = 0;
1465290001Sglebius	sock->listener = 0;
1466290001Sglebius	sock->connected = 0;
1467290001Sglebius	sock->pending_connect = 0;
1468290001Sglebius	sock->bound = 0;
1469290001Sglebius	sock->dupped = 0;
1470290001Sglebius	memset(sock->name, 0, sizeof(sock->name));	// zero the name field
1471290001Sglebius	_set_state(sock, SOCK_INITIALIZED);
1472290001Sglebius
1473290001Sglebius	sock->recvbuf.len = 65536;
1474290001Sglebius	sock->recvbuf.consume_position = sock->recvbuf.base;
1475290001Sglebius	sock->recvbuf.remaining = 0;
1476290001Sglebius	sock->recvbuf.base = isc_mem_get(manager->mctx, sock->recvbuf.len); // max buffer size
1477290001Sglebius	if (sock->recvbuf.base == NULL) {
1478290001Sglebius		sock->magic = 0;
1479290001Sglebius		goto error;
1480290001Sglebius	}
1481290001Sglebius
1482290001Sglebius	/*
1483290001Sglebius	 * initialize the lock
1484290001Sglebius	 */
1485290001Sglebius	result = isc_mutex_init(&sock->lock);
1486290001Sglebius	if (result != ISC_R_SUCCESS) {
1487290001Sglebius		sock->magic = 0;
1488290001Sglebius		isc_mem_put(manager->mctx, sock->recvbuf.base, sock->recvbuf.len);
1489290001Sglebius		sock->recvbuf.base = NULL;
1490290001Sglebius		goto error;
1491290001Sglebius	}
1492290001Sglebius
1493290001Sglebius	socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
1494290001Sglebius		   "allocated");
1495290001Sglebius
1496290001Sglebius	sock->magic = SOCKET_MAGIC;
1497290001Sglebius	*socketp = sock;
1498290001Sglebius
1499290001Sglebius	return (ISC_R_SUCCESS);
1500290001Sglebius
1501290001Sglebius error:
1502290001Sglebius	isc_mem_put(manager->mctx, sock, sizeof(*sock));
1503290001Sglebius
1504290001Sglebius	return (result);
1505290001Sglebius}
1506290001Sglebius
1507290001Sglebius/*
1508290001Sglebius * Verify that the socket state is consistent.
1509290001Sglebius */
1510290001Sglebiusstatic void
1511290001Sglebiusconsistent(isc_socket_t *sock) {
1512290001Sglebius
1513290001Sglebius	isc_socketevent_t *dev;
1514290001Sglebius	isc_socket_newconnev_t *nev;
1515290001Sglebius	unsigned int count;
1516290001Sglebius	char *crash_reason;
1517290001Sglebius	isc_boolean_t crash = ISC_FALSE;
1518290001Sglebius
1519290001Sglebius	REQUIRE(sock->pending_iocp == sock->pending_recv + sock->pending_send
1520290001Sglebius		+ sock->pending_accept + sock->pending_connect);
1521290001Sglebius
1522290001Sglebius	dev = ISC_LIST_HEAD(sock->send_list);
1523290001Sglebius	count = 0;
1524290001Sglebius	while (dev != NULL) {
1525290001Sglebius		count++;
1526290001Sglebius		dev = ISC_LIST_NEXT(dev, ev_link);
1527290001Sglebius	}
1528290001Sglebius	if (count > sock->pending_send) {
1529290001Sglebius		crash = ISC_TRUE;
1530290001Sglebius		crash_reason = "send_list > sock->pending_send";
1531290001Sglebius	}
1532290001Sglebius
1533290001Sglebius	nev = ISC_LIST_HEAD(sock->accept_list);
1534290001Sglebius	count = 0;
1535290001Sglebius	while (nev != NULL) {
1536290001Sglebius		count++;
1537290001Sglebius		nev = ISC_LIST_NEXT(nev, ev_link);
1538290001Sglebius	}
1539290001Sglebius	if (count > sock->pending_accept) {
1540290001Sglebius		crash = ISC_TRUE;
1541290001Sglebius		crash_reason = "send_list > sock->pending_send";
1542290001Sglebius	}
1543290001Sglebius
1544290001Sglebius	if (crash) {
1545290001Sglebius		socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET,
1546290001Sglebius			   ISC_MSG_DESTROYING, "SOCKET INCONSISTENT: %s",
1547290001Sglebius			   crash_reason);
1548290001Sglebius		sock_dump(sock);
1549290001Sglebius		INSIST(crash == ISC_FALSE);
1550290001Sglebius	}
1551290001Sglebius}
1552290001Sglebius
1553290001Sglebius/*
1554290001Sglebius * Maybe free the socket.
1555290001Sglebius *
1556290001Sglebius * This function will verify tht the socket is no longer in use in any way,
1557290001Sglebius * either internally or externally.  This is the only place where this
1558290001Sglebius * check is to be made; if some bit of code believes that IT is done with
1559290001Sglebius * the socket (e.g., some reference counter reaches zero), it should call
1560290001Sglebius * this function.
1561290001Sglebius *
1562290001Sglebius * When calling this function, the socket must be locked, and the manager
1563290001Sglebius * must be unlocked.
1564290001Sglebius *
1565290001Sglebius * When this function returns, *socketp will be NULL.  No tricks to try
1566290001Sglebius * to hold on to this pointer are allowed.
1567290001Sglebius */
1568290001Sglebiusstatic void
1569290001Sglebiusmaybe_free_socket(isc_socket_t **socketp, int lineno) {
1570290001Sglebius	isc_socket_t *sock = *socketp;
1571290001Sglebius	*socketp = NULL;
1572290001Sglebius
1573290001Sglebius	INSIST(VALID_SOCKET(sock));
1574290001Sglebius	CONSISTENT(sock);
1575290001Sglebius
1576290001Sglebius	if (sock->pending_iocp > 0
1577290001Sglebius	    || sock->pending_recv > 0
1578290001Sglebius	    || sock->pending_send > 0
1579290001Sglebius	    || sock->pending_accept > 0
1580290001Sglebius	    || sock->references > 0
1581290001Sglebius	    || sock->pending_connect == 1
1582290001Sglebius	    || !ISC_LIST_EMPTY(sock->recv_list)
1583290001Sglebius	    || !ISC_LIST_EMPTY(sock->send_list)
1584290001Sglebius	    || !ISC_LIST_EMPTY(sock->accept_list)
1585290001Sglebius	    || sock->fd != INVALID_SOCKET) {
1586290001Sglebius		UNLOCK(&sock->lock);
1587290001Sglebius		return;
1588290001Sglebius	}
1589290001Sglebius	UNLOCK(&sock->lock);
1590290001Sglebius
1591290001Sglebius	free_socket(&sock, lineno);
1592290001Sglebius}
1593290001Sglebius
1594290001Sglebiusvoid
1595290001Sglebiusfree_socket(isc_socket_t **sockp, int lineno) {
1596290001Sglebius	isc_socketmgr_t *manager;
1597290001Sglebius	isc_socket_t *sock = *sockp;
1598290001Sglebius	*sockp = NULL;
1599290001Sglebius
1600290001Sglebius	manager = sock->manager;
1601290001Sglebius
1602290001Sglebius	/*
1603290001Sglebius	 * Seems we can free the socket after all.
1604290001Sglebius	 */
1605290001Sglebius	manager = sock->manager;
1606290001Sglebius	socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET,
1607290001Sglebius		   ISC_MSG_DESTROYING, "freeing socket line %d fd %d lock %p semaphore %p",
1608290001Sglebius		   lineno, sock->fd, &sock->lock, sock->lock.LockSemaphore);
1609290001Sglebius
1610290001Sglebius	sock->magic = 0;
1611290001Sglebius	DESTROYLOCK(&sock->lock);
1612290001Sglebius
1613290001Sglebius	if (sock->recvbuf.base != NULL)
1614290001Sglebius		isc_mem_put(manager->mctx, sock->recvbuf.base, sock->recvbuf.len);
1615290001Sglebius
1616290001Sglebius	LOCK(&manager->lock);
1617290001Sglebius	if (ISC_LINK_LINKED(sock, link))
1618290001Sglebius		ISC_LIST_UNLINK(manager->socklist, sock, link);
1619290001Sglebius	isc_mem_put(manager->mctx, sock, sizeof(*sock));
1620290001Sglebius
1621290001Sglebius	if (ISC_LIST_EMPTY(manager->socklist))
1622290001Sglebius		SIGNAL(&manager->shutdown_ok);
1623290001Sglebius	UNLOCK(&manager->lock);
1624290001Sglebius}
1625290001Sglebius
1626290001Sglebius/*
1627290001Sglebius * Create a new 'type' socket managed by 'manager'.  Events
1628290001Sglebius * will be posted to 'task' and when dispatched 'action' will be
1629290001Sglebius * called with 'arg' as the arg value.  The new socket is returned
1630290001Sglebius * in 'socketp'.
1631290001Sglebius */
1632290001Sglebiusstatic isc_result_t
1633290001Sglebiussocket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type,
1634290001Sglebius	      isc_socket_t **socketp, isc_socket_t *dup_socket)
1635290001Sglebius{
1636290001Sglebius	isc_socket_t *sock = NULL;
1637290001Sglebius	isc_result_t result;
1638290001Sglebius#if defined(USE_CMSG)
1639290001Sglebius	int on = 1;
1640290001Sglebius#endif
1641290001Sglebius#if defined(SO_RCVBUF)
1642290001Sglebius	ISC_SOCKADDR_LEN_T optlen;
1643290001Sglebius	int size;
1644290001Sglebius#endif
1645290001Sglebius	int socket_errno;
1646290001Sglebius	char strbuf[ISC_STRERRORSIZE];
1647290001Sglebius
1648290001Sglebius	REQUIRE(VALID_MANAGER(manager));
1649290001Sglebius	REQUIRE(socketp != NULL && *socketp == NULL);
1650290001Sglebius	REQUIRE(type != isc_sockettype_fdwatch);
1651290001Sglebius
1652290001Sglebius	if (dup_socket != NULL)
1653290001Sglebius		return (ISC_R_NOTIMPLEMENTED);
1654290001Sglebius
1655290001Sglebius	result = allocate_socket(manager, type, &sock);
1656290001Sglebius	if (result != ISC_R_SUCCESS)
1657290001Sglebius		return (result);
1658290001Sglebius
1659290001Sglebius	sock->pf = pf;
1660290001Sglebius#if 0
1661290001Sglebius	if (dup_socket == NULL) {
1662290001Sglebius#endif
1663290001Sglebius		switch (type) {
1664290001Sglebius		case isc_sockettype_udp:
1665290001Sglebius			sock->fd = socket(pf, SOCK_DGRAM, IPPROTO_UDP);
1666290001Sglebius			if (sock->fd != INVALID_SOCKET) {
1667290001Sglebius				result = connection_reset_fix(sock->fd);
1668290001Sglebius				if (result != ISC_R_SUCCESS) {
1669290001Sglebius					socket_log(__LINE__, sock,
1670290001Sglebius						NULL, EVENT, NULL, 0, 0,
1671290001Sglebius						"closed %d %d %d "
1672290001Sglebius						"con_reset_fix_failed",
1673290001Sglebius						sock->pending_recv,
1674290001Sglebius						sock->pending_send,
1675290001Sglebius						sock->references);
1676290001Sglebius					closesocket(sock->fd);
1677290001Sglebius					_set_state(sock, SOCK_CLOSED);
1678290001Sglebius					sock->fd = INVALID_SOCKET;
1679290001Sglebius					free_socket(&sock, __LINE__);
1680290001Sglebius					return (result);
1681290001Sglebius				}
1682290001Sglebius			}
1683290001Sglebius			break;
1684290001Sglebius		case isc_sockettype_tcp:
1685290001Sglebius			sock->fd = socket(pf, SOCK_STREAM, IPPROTO_TCP);
1686290001Sglebius			break;
1687290001Sglebius		}
1688290001Sglebius#if 0
1689290001Sglebius	} else {
1690290001Sglebius		/*
1691290001Sglebius		 * XXX: dup() is deprecated in windows, use _dup()
1692290001Sglebius		 * instead.  In future we may want to investigate
1693290001Sglebius		 * WSADuplicateSocket().
1694290001Sglebius		 */
1695290001Sglebius		sock->fd = _dup(dup_socket->fd);
1696290001Sglebius		sock->dupped = 1;
1697290001Sglebius		sock->bound = dup_socket->bound;
1698290001Sglebius	}
1699290001Sglebius#endif
1700290001Sglebius
1701290001Sglebius	if (sock->fd == INVALID_SOCKET) {
1702290001Sglebius		socket_errno = WSAGetLastError();
1703290001Sglebius		free_socket(&sock, __LINE__);
1704290001Sglebius
1705290001Sglebius		switch (socket_errno) {
1706290001Sglebius		case WSAEMFILE:
1707290001Sglebius		case WSAENOBUFS:
1708290001Sglebius			return (ISC_R_NORESOURCES);
1709290001Sglebius
1710290001Sglebius		case WSAEPROTONOSUPPORT:
1711290001Sglebius		case WSAEPFNOSUPPORT:
1712290001Sglebius		case WSAEAFNOSUPPORT:
1713290001Sglebius			return (ISC_R_FAMILYNOSUPPORT);
1714290001Sglebius
1715290001Sglebius		default:
1716290001Sglebius			isc__strerror(socket_errno, strbuf, sizeof(strbuf));
1717290001Sglebius			UNEXPECTED_ERROR(__FILE__, __LINE__,
1718290001Sglebius					 "socket() %s: %s",
1719290001Sglebius					 isc_msgcat_get(isc_msgcat,
1720290001Sglebius							ISC_MSGSET_GENERAL,
1721290001Sglebius							ISC_MSG_FAILED,
1722290001Sglebius							"failed"),
1723290001Sglebius					 strbuf);
1724290001Sglebius			return (ISC_R_UNEXPECTED);
1725290001Sglebius		}
1726290001Sglebius	}
1727290001Sglebius
1728290001Sglebius	result = make_nonblock(sock->fd);
1729290001Sglebius	if (result != ISC_R_SUCCESS) {
1730290001Sglebius		socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
1731290001Sglebius			"closed %d %d %d make_nonblock_failed",
1732290001Sglebius			sock->pending_recv, sock->pending_send,
1733290001Sglebius			sock->references);
1734290001Sglebius		closesocket(sock->fd);
1735290001Sglebius		sock->fd = INVALID_SOCKET;
1736290001Sglebius		free_socket(&sock, __LINE__);
1737290001Sglebius		return (result);
1738290001Sglebius	}
1739290001Sglebius
1740290001Sglebius
1741290001Sglebius#if defined(USE_CMSG) || defined(SO_RCVBUF)
1742290001Sglebius	if (type == isc_sockettype_udp) {
1743290001Sglebius
1744290001Sglebius#if defined(USE_CMSG)
1745290001Sglebius#if defined(ISC_PLATFORM_HAVEIPV6)
1746290001Sglebius#ifdef IPV6_RECVPKTINFO
1747290001Sglebius		/* 2292bis */
1748290001Sglebius		if ((pf == AF_INET6)
1749290001Sglebius		    && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO,
1750290001Sglebius				   (char *)&on, sizeof(on)) < 0)) {
1751290001Sglebius			isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
1752290001Sglebius			UNEXPECTED_ERROR(__FILE__, __LINE__,
1753290001Sglebius					 "setsockopt(%d, IPV6_RECVPKTINFO) "
1754290001Sglebius					 "%s: %s", sock->fd,
1755290001Sglebius					 isc_msgcat_get(isc_msgcat,
1756290001Sglebius							ISC_MSGSET_GENERAL,
1757290001Sglebius							ISC_MSG_FAILED,
1758290001Sglebius							"failed"),
1759290001Sglebius					 strbuf);
1760290001Sglebius		}
1761290001Sglebius#else
1762290001Sglebius		/* 2292 */
1763290001Sglebius		if ((pf == AF_INET6)
1764290001Sglebius		    && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_PKTINFO,
1765290001Sglebius				   (char *)&on, sizeof(on)) < 0)) {
1766290001Sglebius			isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
1767290001Sglebius			UNEXPECTED_ERROR(__FILE__, __LINE__,
1768290001Sglebius					 "setsockopt(%d, IPV6_PKTINFO) %s: %s",
1769290001Sglebius					 sock->fd,
1770290001Sglebius					 isc_msgcat_get(isc_msgcat,
1771290001Sglebius							ISC_MSGSET_GENERAL,
1772290001Sglebius							ISC_MSG_FAILED,
1773290001Sglebius							"failed"),
1774290001Sglebius					 strbuf);
1775290001Sglebius		}
1776290001Sglebius#endif /* IPV6_RECVPKTINFO */
1777290001Sglebius#ifdef IPV6_USE_MIN_MTU	/*2292bis, not too common yet*/
1778290001Sglebius		/* use minimum MTU */
1779290001Sglebius		if (pf == AF_INET6) {
1780290001Sglebius			(void)setsockopt(sock->fd, IPPROTO_IPV6,
1781290001Sglebius					 IPV6_USE_MIN_MTU,
1782290001Sglebius					 (char *)&on, sizeof(on));
1783290001Sglebius		}
1784290001Sglebius#endif
1785290001Sglebius#endif /* ISC_PLATFORM_HAVEIPV6 */
1786290001Sglebius#endif /* defined(USE_CMSG) */
1787290001Sglebius
1788290001Sglebius#if defined(SO_RCVBUF)
1789290001Sglebius	       optlen = sizeof(size);
1790290001Sglebius	       if (getsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF,
1791290001Sglebius			      (char *)&size, &optlen) >= 0 &&
1792290001Sglebius		    size < RCVBUFSIZE) {
1793290001Sglebius		       size = RCVBUFSIZE;
1794290001Sglebius		       (void)setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF,
1795290001Sglebius					(char *)&size, sizeof(size));
1796290001Sglebius	       }
1797290001Sglebius#endif
1798290001Sglebius
1799290001Sglebius	}
1800290001Sglebius#endif /* defined(USE_CMSG) || defined(SO_RCVBUF) */
1801290001Sglebius
1802290001Sglebius	_set_state(sock, SOCK_OPEN);
1803290001Sglebius	sock->references = 1;
1804290001Sglebius	*socketp = sock;
1805290001Sglebius
1806290001Sglebius	iocompletionport_update(sock);
1807290001Sglebius
1808290001Sglebius	/*
1809290001Sglebius	 * Note we don't have to lock the socket like we normally would because
1810290001Sglebius	 * there are no external references to it yet.
1811290001Sglebius	 */
1812290001Sglebius	LOCK(&manager->lock);
1813290001Sglebius	ISC_LIST_APPEND(manager->socklist, sock, link);
1814290001Sglebius	InterlockedIncrement(&manager->totalSockets);
1815290001Sglebius	UNLOCK(&manager->lock);
1816290001Sglebius
1817290001Sglebius	socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat,
1818290001Sglebius		   ISC_MSGSET_SOCKET, ISC_MSG_CREATED,
1819290001Sglebius		   "created %u type %u", sock->fd, type);
1820290001Sglebius
1821290001Sglebius	return (ISC_R_SUCCESS);
1822290001Sglebius}
1823290001Sglebius
1824290001Sglebiusisc_result_t
1825290001Sglebiusisc__socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type,
1826290001Sglebius		   isc_socket_t **socketp)
1827290001Sglebius{
1828290001Sglebius	return (socket_create(manager, pf, type, socketp, NULL));
1829290001Sglebius}
1830290001Sglebius
1831290001Sglebiusisc_result_t
1832290001Sglebiusisc__socket_dup(isc_socket_t *sock, isc_socket_t **socketp) {
1833290001Sglebius	REQUIRE(VALID_SOCKET(sock));
1834290001Sglebius	REQUIRE(socketp != NULL && *socketp == NULL);
1835290001Sglebius
1836290001Sglebius#if 1
1837290001Sglebius	return (ISC_R_NOTIMPLEMENTED);
1838290001Sglebius#else
1839290001Sglebius	return (socket_create(sock->manager, sock->pf, sock->type,
1840290001Sglebius			      socketp, sock));
1841290001Sglebius#endif
1842290001Sglebius}
1843290001Sglebius
1844290001Sglebiusisc_result_t
1845290001Sglebiusisc_socket_open(isc_socket_t *sock) {
1846290001Sglebius	REQUIRE(VALID_SOCKET(sock));
1847290001Sglebius	REQUIRE(sock->type != isc_sockettype_fdwatch);
1848290001Sglebius
1849290001Sglebius	return (ISC_R_NOTIMPLEMENTED);
1850290001Sglebius}
1851290001Sglebius
1852290001Sglebius/*
1853290001Sglebius * Attach to a socket.  Caller must explicitly detach when it is done.
1854290001Sglebius */
1855290001Sglebiusvoid
1856290001Sglebiusisc__socket_attach(isc_socket_t *sock, isc_socket_t **socketp) {
1857290001Sglebius	REQUIRE(VALID_SOCKET(sock));
1858290001Sglebius	REQUIRE(socketp != NULL && *socketp == NULL);
1859290001Sglebius
1860290001Sglebius	LOCK(&sock->lock);
1861290001Sglebius	CONSISTENT(sock);
1862290001Sglebius	sock->references++;
1863290001Sglebius	UNLOCK(&sock->lock);
1864290001Sglebius
1865290001Sglebius	*socketp = sock;
1866290001Sglebius}
1867290001Sglebius
1868290001Sglebius/*
1869290001Sglebius * Dereference a socket.  If this is the last reference to it, clean things
1870290001Sglebius * up by destroying the socket.
1871290001Sglebius */
1872290001Sglebiusvoid
1873290001Sglebiusisc__socket_detach(isc_socket_t **socketp) {
1874290001Sglebius	isc_socket_t *sock;
1875290001Sglebius	isc_boolean_t kill_socket = ISC_FALSE;
1876290001Sglebius
1877290001Sglebius	REQUIRE(socketp != NULL);
1878290001Sglebius	sock = *socketp;
1879290001Sglebius	REQUIRE(VALID_SOCKET(sock));
1880290001Sglebius	REQUIRE(sock->type != isc_sockettype_fdwatch);
1881290001Sglebius
1882290001Sglebius	LOCK(&sock->lock);
1883290001Sglebius	CONSISTENT(sock);
1884290001Sglebius	REQUIRE(sock->references > 0);
1885290001Sglebius	sock->references--;
1886290001Sglebius
1887290001Sglebius	socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
1888290001Sglebius		"detach_socket %d %d %d",
1889290001Sglebius		sock->pending_recv, sock->pending_send,
1890290001Sglebius		sock->references);
1891290001Sglebius
1892290001Sglebius	if (sock->references == 0 && sock->fd != INVALID_SOCKET) {
1893290001Sglebius		closesocket(sock->fd);
1894290001Sglebius		sock->fd = INVALID_SOCKET;
1895290001Sglebius		_set_state(sock, SOCK_CLOSED);
1896290001Sglebius	}
1897290001Sglebius
1898290001Sglebius	maybe_free_socket(&sock, __LINE__);
1899290001Sglebius
1900290001Sglebius	*socketp = NULL;
1901290001Sglebius}
1902290001Sglebius
1903290001Sglebiusisc_result_t
1904290001Sglebiusisc_socket_close(isc_socket_t *sock) {
1905290001Sglebius	REQUIRE(VALID_SOCKET(sock));
1906290001Sglebius	REQUIRE(sock->type != isc_sockettype_fdwatch);
1907290001Sglebius
1908290001Sglebius	return (ISC_R_NOTIMPLEMENTED);
1909290001Sglebius}
1910290001Sglebius
1911290001Sglebius/*
1912290001Sglebius * Dequeue an item off the given socket's read queue, set the result code
1913290001Sglebius * in the done event to the one provided, and send it to the task it was
1914290001Sglebius * destined for.
1915290001Sglebius *
1916290001Sglebius * If the event to be sent is on a list, remove it before sending.  If
1917290001Sglebius * asked to, send and detach from the task as well.
1918290001Sglebius *
1919290001Sglebius * Caller must have the socket locked if the event is attached to the socket.
1920290001Sglebius */
1921290001Sglebiusstatic void
1922290001Sglebiussend_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev) {
1923290001Sglebius	isc_task_t *task;
1924290001Sglebius
1925290001Sglebius	task = (*dev)->ev_sender;
1926290001Sglebius	(*dev)->ev_sender = sock;
1927290001Sglebius
1928290001Sglebius	if (ISC_LINK_LINKED(*dev, ev_link))
1929290001Sglebius		ISC_LIST_DEQUEUE(sock->recv_list, *dev, ev_link);
1930290001Sglebius
1931290001Sglebius	if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED)
1932290001Sglebius	    == ISC_SOCKEVENTATTR_ATTACHED)
1933290001Sglebius		isc_task_sendanddetach(&task, (isc_event_t **)dev);
1934290001Sglebius	else
1935290001Sglebius		isc_task_send(task, (isc_event_t **)dev);
1936290001Sglebius
1937290001Sglebius	CONSISTENT(sock);
1938290001Sglebius}
1939290001Sglebius
1940290001Sglebius/*
1941290001Sglebius * See comments for send_recvdone_event() above.
1942290001Sglebius */
1943290001Sglebiusstatic void
1944290001Sglebiussend_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev) {
1945290001Sglebius	isc_task_t *task;
1946290001Sglebius
1947290001Sglebius	INSIST(dev != NULL && *dev != NULL);
1948290001Sglebius
1949290001Sglebius	task = (*dev)->ev_sender;
1950290001Sglebius	(*dev)->ev_sender = sock;
1951290001Sglebius
1952290001Sglebius	if (ISC_LINK_LINKED(*dev, ev_link))
1953290001Sglebius		ISC_LIST_DEQUEUE(sock->send_list, *dev, ev_link);
1954290001Sglebius
1955290001Sglebius	if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED)
1956290001Sglebius	    == ISC_SOCKEVENTATTR_ATTACHED)
1957290001Sglebius		isc_task_sendanddetach(&task, (isc_event_t **)dev);
1958290001Sglebius	else
1959290001Sglebius		isc_task_send(task, (isc_event_t **)dev);
1960290001Sglebius
1961290001Sglebius	CONSISTENT(sock);
1962290001Sglebius}
1963290001Sglebius
1964290001Sglebius/*
1965290001Sglebius * See comments for send_recvdone_event() above.
1966290001Sglebius */
1967290001Sglebiusstatic void
1968290001Sglebiussend_acceptdone_event(isc_socket_t *sock, isc_socket_newconnev_t **adev) {
1969290001Sglebius	isc_task_t *task;
1970290001Sglebius
1971290001Sglebius	INSIST(adev != NULL && *adev != NULL);
1972290001Sglebius
1973290001Sglebius	task = (*adev)->ev_sender;
1974290001Sglebius	(*adev)->ev_sender = sock;
1975290001Sglebius
1976290001Sglebius	if (ISC_LINK_LINKED(*adev, ev_link))
1977290001Sglebius		ISC_LIST_DEQUEUE(sock->accept_list, *adev, ev_link);
1978290001Sglebius
1979290001Sglebius	isc_task_sendanddetach(&task, (isc_event_t **)adev);
1980290001Sglebius
1981290001Sglebius	CONSISTENT(sock);
1982290001Sglebius}
1983290001Sglebius
1984290001Sglebius/*
1985290001Sglebius * See comments for send_recvdone_event() above.
1986290001Sglebius */
1987290001Sglebiusstatic void
1988290001Sglebiussend_connectdone_event(isc_socket_t *sock, isc_socket_connev_t **cdev) {
1989290001Sglebius	isc_task_t *task;
1990290001Sglebius
1991290001Sglebius	INSIST(cdev != NULL && *cdev != NULL);
1992290001Sglebius
1993290001Sglebius	task = (*cdev)->ev_sender;
1994290001Sglebius	(*cdev)->ev_sender = sock;
1995290001Sglebius
1996290001Sglebius	sock->connect_ev = NULL;
1997290001Sglebius
1998290001Sglebius	isc_task_sendanddetach(&task, (isc_event_t **)cdev);
1999290001Sglebius
2000290001Sglebius	CONSISTENT(sock);
2001290001Sglebius}
2002290001Sglebius
2003290001Sglebius/*
2004290001Sglebius * On entry to this function, the event delivered is the internal
2005290001Sglebius * readable event, and the first item on the accept_list should be
2006290001Sglebius * the done event we want to send.  If the list is empty, this is a no-op,
2007290001Sglebius * so just close the new connection, unlock, and return.
2008290001Sglebius *
2009290001Sglebius * Note the socket is locked before entering here
2010290001Sglebius */
2011290001Sglebiusstatic void
2012290001Sglebiusinternal_accept(isc_socket_t *sock, IoCompletionInfo *lpo, int accept_errno) {
2013290001Sglebius	isc_socket_newconnev_t *adev;
2014290001Sglebius	isc_result_t result = ISC_R_SUCCESS;
2015290001Sglebius	isc_socket_t *nsock;
2016290001Sglebius	struct sockaddr *localaddr;
2017290001Sglebius	int localaddr_len = sizeof(*localaddr);
2018290001Sglebius	struct sockaddr *remoteaddr;
2019290001Sglebius	int remoteaddr_len = sizeof(*remoteaddr);
2020290001Sglebius
2021290001Sglebius	INSIST(VALID_SOCKET(sock));
2022290001Sglebius	LOCK(&sock->lock);
2023290001Sglebius	CONSISTENT(sock);
2024290001Sglebius
2025290001Sglebius	socket_log(__LINE__, sock, NULL, TRACE,
2026290001Sglebius		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
2027290001Sglebius		   "internal_accept called");
2028290001Sglebius
2029290001Sglebius	INSIST(sock->listener);
2030290001Sglebius
2031290001Sglebius	INSIST(sock->pending_iocp > 0);
2032290001Sglebius	sock->pending_iocp--;
2033290001Sglebius	INSIST(sock->pending_accept > 0);
2034290001Sglebius	sock->pending_accept--;
2035290001Sglebius
2036290001Sglebius	adev = lpo->adev;
2037290001Sglebius
2038290001Sglebius	/*
2039290001Sglebius	 * If the event is no longer in the list we can just return.
2040290001Sglebius	 */
2041290001Sglebius	if (!acceptdone_is_active(sock, adev))
2042290001Sglebius		goto done;
2043290001Sglebius
2044290001Sglebius	nsock = adev->newsocket;
2045290001Sglebius
2046290001Sglebius	/*
2047290001Sglebius	 * Pull off the done event.
2048290001Sglebius	 */
2049290001Sglebius	ISC_LIST_UNLINK(sock->accept_list, adev, ev_link);
2050290001Sglebius
2051290001Sglebius	/*
2052290001Sglebius	 * Extract the addresses from the socket, copy them into the structure,
2053290001Sglebius	 * and return the new socket.
2054290001Sglebius	 */
2055290001Sglebius	ISCGetAcceptExSockaddrs(lpo->acceptbuffer, 0,
2056290001Sglebius		sizeof(SOCKADDR_STORAGE) + 16, sizeof(SOCKADDR_STORAGE) + 16,
2057290001Sglebius		(LPSOCKADDR *)&localaddr, &localaddr_len,
2058290001Sglebius		(LPSOCKADDR *)&remoteaddr, &remoteaddr_len);
2059290001Sglebius	memcpy(&adev->address.type, remoteaddr, remoteaddr_len);
2060290001Sglebius	adev->address.length = remoteaddr_len;
2061290001Sglebius	nsock->address = adev->address;
2062290001Sglebius	nsock->pf = adev->address.type.sa.sa_family;
2063290001Sglebius
2064290001Sglebius	socket_log(__LINE__, nsock, &nsock->address, TRACE,
2065290001Sglebius		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
2066290001Sglebius		   "internal_accept parent %p", sock);
2067290001Sglebius
2068290001Sglebius	result = make_nonblock(adev->newsocket->fd);
2069290001Sglebius	INSIST(result == ISC_R_SUCCESS);
2070290001Sglebius
2071290001Sglebius	INSIST(setsockopt(nsock->fd, SOL_SOCKET, SO_UPDATE_ACCEPT_CONTEXT,
2072290001Sglebius			  (char *)&sock->fd, sizeof(sock->fd)) == 0);
2073290001Sglebius
2074290001Sglebius	/*
2075290001Sglebius	 * Hook it up into the manager.
2076290001Sglebius	 */
2077290001Sglebius	nsock->bound = 1;
2078290001Sglebius	nsock->connected = 1;
2079290001Sglebius	_set_state(nsock, SOCK_OPEN);
2080290001Sglebius
2081290001Sglebius	LOCK(&nsock->manager->lock);
2082290001Sglebius	ISC_LIST_APPEND(nsock->manager->socklist, nsock, link);
2083290001Sglebius	InterlockedIncrement(&nsock->manager->totalSockets);
2084290001Sglebius	UNLOCK(&nsock->manager->lock);
2085290001Sglebius
2086290001Sglebius	socket_log(__LINE__, sock, &nsock->address, CREATION,
2087290001Sglebius		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN,
2088290001Sglebius		   "accepted_connection new_socket %p fd %d",
2089290001Sglebius		   nsock, nsock->fd);
2090290001Sglebius
2091290001Sglebius	adev->result = result;
2092290001Sglebius	send_acceptdone_event(sock, &adev);
2093290001Sglebius
2094290001Sglebiusdone:
2095290001Sglebius	CONSISTENT(sock);
2096290001Sglebius	UNLOCK(&sock->lock);
2097290001Sglebius
2098290001Sglebius	HeapFree(hHeapHandle, 0, lpo->acceptbuffer);
2099290001Sglebius	lpo->acceptbuffer = NULL;
2100290001Sglebius}
2101290001Sglebius
2102290001Sglebius/*
2103290001Sglebius * Called when a socket with a pending connect() finishes.
2104290001Sglebius * Note that the socket is locked before entering.
2105290001Sglebius */
2106290001Sglebiusstatic void
2107290001Sglebiusinternal_connect(isc_socket_t *sock, IoCompletionInfo *lpo, int connect_errno) {
2108290001Sglebius	isc_socket_connev_t *cdev;
2109290001Sglebius	char strbuf[ISC_STRERRORSIZE];
2110290001Sglebius
2111290001Sglebius	INSIST(VALID_SOCKET(sock));
2112290001Sglebius
2113290001Sglebius	LOCK(&sock->lock);
2114290001Sglebius
2115290001Sglebius	INSIST(sock->pending_iocp > 0);
2116290001Sglebius	sock->pending_iocp--;
2117290001Sglebius	INSIST(sock->pending_connect == 1);
2118290001Sglebius	sock->pending_connect = 0;
2119290001Sglebius
2120290001Sglebius	/*
2121290001Sglebius	 * Has this event been canceled?
2122290001Sglebius	 */
2123290001Sglebius	cdev = lpo->cdev;
2124290001Sglebius	if (!connectdone_is_active(sock, cdev)) {
2125290001Sglebius		sock->pending_connect = 0;
2126290001Sglebius		if (sock->fd != INVALID_SOCKET) {
2127290001Sglebius			closesocket(sock->fd);
2128290001Sglebius			sock->fd = INVALID_SOCKET;
2129290001Sglebius			_set_state(sock, SOCK_CLOSED);
2130290001Sglebius		}
2131290001Sglebius		CONSISTENT(sock);
2132290001Sglebius		UNLOCK(&sock->lock);
2133290001Sglebius		return;
2134290001Sglebius	}
2135290001Sglebius
2136290001Sglebius	/*
2137290001Sglebius	 * Check possible Windows network event error status here.
2138290001Sglebius	 */
2139290001Sglebius	if (connect_errno != 0) {
2140290001Sglebius		/*
2141290001Sglebius		 * If the error is SOFT, just try again on this
2142290001Sglebius		 * fd and pretend nothing strange happened.
2143290001Sglebius		 */
2144290001Sglebius		if (SOFT_ERROR(connect_errno) ||
2145290001Sglebius		    connect_errno == WSAEINPROGRESS) {
2146290001Sglebius			sock->pending_connect = 1;
2147290001Sglebius			CONSISTENT(sock);
2148290001Sglebius			UNLOCK(&sock->lock);
2149290001Sglebius			return;
2150290001Sglebius		}
2151290001Sglebius
2152290001Sglebius		/*
2153290001Sglebius		 * Translate other errors into ISC_R_* flavors.
2154290001Sglebius		 */
2155290001Sglebius		switch (connect_errno) {
2156290001Sglebius#define ERROR_MATCH(a, b) case a: cdev->result = b; break;
2157290001Sglebius			ERROR_MATCH(WSAEACCES, ISC_R_NOPERM);
2158290001Sglebius			ERROR_MATCH(WSAEADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL);
2159290001Sglebius			ERROR_MATCH(WSAEAFNOSUPPORT, ISC_R_ADDRNOTAVAIL);
2160290001Sglebius			ERROR_MATCH(WSAECONNREFUSED, ISC_R_CONNREFUSED);
2161290001Sglebius			ERROR_MATCH(WSAEHOSTUNREACH, ISC_R_HOSTUNREACH);
2162290001Sglebius			ERROR_MATCH(WSAEHOSTDOWN, ISC_R_HOSTDOWN);
2163290001Sglebius			ERROR_MATCH(WSAENETUNREACH, ISC_R_NETUNREACH);
2164290001Sglebius			ERROR_MATCH(WSAENETDOWN, ISC_R_NETDOWN);
2165290001Sglebius			ERROR_MATCH(WSAENOBUFS, ISC_R_NORESOURCES);
2166290001Sglebius			ERROR_MATCH(WSAECONNRESET, ISC_R_CONNECTIONRESET);
2167290001Sglebius			ERROR_MATCH(WSAECONNABORTED, ISC_R_CONNECTIONRESET);
2168290001Sglebius			ERROR_MATCH(WSAETIMEDOUT, ISC_R_TIMEDOUT);
2169290001Sglebius#undef ERROR_MATCH
2170290001Sglebius		default:
2171290001Sglebius			cdev->result = ISC_R_UNEXPECTED;
2172290001Sglebius			isc__strerror(connect_errno, strbuf, sizeof(strbuf));
2173290001Sglebius			UNEXPECTED_ERROR(__FILE__, __LINE__,
2174290001Sglebius					 "internal_connect: connect() %s",
2175290001Sglebius					 strbuf);
2176290001Sglebius		}
2177290001Sglebius	} else {
2178290001Sglebius		INSIST(setsockopt(sock->fd, SOL_SOCKET,
2179290001Sglebius				  SO_UPDATE_CONNECT_CONTEXT, NULL, 0) == 0);
2180290001Sglebius		cdev->result = ISC_R_SUCCESS;
2181290001Sglebius		sock->connected = 1;
2182290001Sglebius		socket_log(__LINE__, sock, &sock->address, IOEVENT,
2183290001Sglebius			   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN,
2184290001Sglebius			   "internal_connect: success");
2185290001Sglebius	}
2186290001Sglebius
2187290001Sglebius	send_connectdone_event(sock, &cdev);
2188290001Sglebius
2189290001Sglebius	UNLOCK(&sock->lock);
2190290001Sglebius}
2191290001Sglebius
2192290001Sglebius/*
2193290001Sglebius * Loop through the socket, returning ISC_R_EOF for each done event pending.
2194290001Sglebius */
2195290001Sglebiusstatic void
2196290001Sglebiussend_recvdone_abort(isc_socket_t *sock, isc_result_t result) {
2197290001Sglebius	isc_socketevent_t *dev;
2198290001Sglebius
2199290001Sglebius	while (!ISC_LIST_EMPTY(sock->recv_list)) {
2200290001Sglebius		dev = ISC_LIST_HEAD(sock->recv_list);
2201290001Sglebius		dev->result = result;
2202290001Sglebius		send_recvdone_event(sock, &dev);
2203290001Sglebius	}
2204290001Sglebius}
2205290001Sglebius
2206290001Sglebius/*
2207290001Sglebius * Take the data we received in our private buffer, and if any recv() calls on
2208290001Sglebius * our list are satisfied, send the corresponding done event.
2209290001Sglebius *
2210290001Sglebius * If we need more data (there are still items on the recv_list after we consume all
2211290001Sglebius * our data) then arrange for another system recv() call to fill our buffers.
2212290001Sglebius */
2213290001Sglebiusstatic void
2214290001Sglebiusinternal_recv(isc_socket_t *sock, int nbytes)
2215290001Sglebius{
2216290001Sglebius	INSIST(VALID_SOCKET(sock));
2217290001Sglebius
2218290001Sglebius	LOCK(&sock->lock);
2219290001Sglebius	CONSISTENT(sock);
2220290001Sglebius
2221290001Sglebius	socket_log(__LINE__, sock, NULL, IOEVENT,
2222290001Sglebius		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALRECV,
2223290001Sglebius		   "internal_recv: %d bytes received", nbytes);
2224290001Sglebius
2225290001Sglebius	/*
2226290001Sglebius	 * If we got here, the I/O operation succeeded.  However, we might still have removed this
2227290001Sglebius	 * event from our notification list (or never placed it on it due to immediate completion.)
2228290001Sglebius	 * Handle the reference counting here, and handle the cancellation event just after.
2229290001Sglebius	 */
2230290001Sglebius	INSIST(sock->pending_iocp > 0);
2231290001Sglebius	sock->pending_iocp--;
2232290001Sglebius	INSIST(sock->pending_recv > 0);
2233290001Sglebius	sock->pending_recv--;
2234290001Sglebius
2235290001Sglebius	/*
2236290001Sglebius	 * The only way we could have gotten here is that our I/O has successfully completed.
2237290001Sglebius	 * Update our pointers, and move on.  The only odd case here is that we might not
2238290001Sglebius	 * have received enough data on a TCP stream to satisfy the minimum requirements.  If
2239290001Sglebius	 * this is the case, we will re-issue the recv() call for what we need.
2240290001Sglebius	 *
2241290001Sglebius	 * We do check for a recv() of 0 bytes on a TCP stream.  This means the remote end
2242290001Sglebius	 * has closed.
2243290001Sglebius	 */
2244290001Sglebius	if (nbytes == 0 && sock->type == isc_sockettype_tcp) {
2245290001Sglebius		send_recvdone_abort(sock, ISC_R_EOF);
2246290001Sglebius		maybe_free_socket(&sock, __LINE__);
2247290001Sglebius		return;
2248290001Sglebius	}
2249290001Sglebius	sock->recvbuf.remaining = nbytes;
2250290001Sglebius	sock->recvbuf.consume_position = sock->recvbuf.base;
2251290001Sglebius	completeio_recv(sock);
2252290001Sglebius
2253290001Sglebius	/*
2254290001Sglebius	 * If there are more receivers waiting for data, queue another receive
2255290001Sglebius	 * here.
2256290001Sglebius	 */
2257290001Sglebius	queue_receive_request(sock);
2258290001Sglebius
2259290001Sglebius	/*
2260290001Sglebius	 * Unlock and/or destroy if we are the last thing this socket has left to do.
2261290001Sglebius	 */
2262290001Sglebius	maybe_free_socket(&sock, __LINE__);
2263290001Sglebius}
2264290001Sglebius
2265290001Sglebiusstatic void
2266290001Sglebiusinternal_send(isc_socket_t *sock, isc_socketevent_t *dev,
2267290001Sglebius	      struct msghdr *messagehdr, int nbytes, int send_errno, IoCompletionInfo *lpo)
2268290001Sglebius{
2269290001Sglebius	buflist_t *buffer;
2270290001Sglebius
2271290001Sglebius	/*
2272290001Sglebius	 * Find out what socket this is and lock it.
2273290001Sglebius	 */
2274290001Sglebius	INSIST(VALID_SOCKET(sock));
2275290001Sglebius
2276290001Sglebius	LOCK(&sock->lock);
2277290001Sglebius	CONSISTENT(sock);
2278290001Sglebius
2279290001Sglebius	socket_log(__LINE__, sock, NULL, IOEVENT,
2280290001Sglebius		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALSEND,
2281290001Sglebius		   "internal_send: task got socket event %p", dev);
2282290001Sglebius
2283290001Sglebius	buffer = ISC_LIST_HEAD(lpo->bufferlist);
2284290001Sglebius	while (buffer != NULL) {
2285290001Sglebius		ISC_LIST_DEQUEUE(lpo->bufferlist, buffer, link);
2286290001Sglebius
2287290001Sglebius		socket_log(__LINE__, sock, NULL, TRACE,
2288290001Sglebius		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK,
2289290001Sglebius		   "free_buffer %p %p", buffer, buffer->buf);
2290290001Sglebius
2291290001Sglebius		HeapFree(hHeapHandle, 0, buffer->buf);
2292290001Sglebius		HeapFree(hHeapHandle, 0, buffer);
2293290001Sglebius		buffer = ISC_LIST_HEAD(lpo->bufferlist);
2294290001Sglebius	}
2295290001Sglebius
2296290001Sglebius	INSIST(sock->pending_iocp > 0);
2297290001Sglebius	sock->pending_iocp--;
2298290001Sglebius	INSIST(sock->pending_send > 0);
2299290001Sglebius	sock->pending_send--;
2300290001Sglebius
2301290001Sglebius	/* If the event is no longer in the list we can just return */
2302290001Sglebius	if (!senddone_is_active(sock, dev))
2303290001Sglebius		goto done;
2304290001Sglebius
2305290001Sglebius	/*
2306290001Sglebius	 * Set the error code and send things on its way.
2307290001Sglebius	 */
2308290001Sglebius	switch (completeio_send(sock, dev, messagehdr, nbytes, send_errno)) {
2309290001Sglebius	case DOIO_SOFT:
2310290001Sglebius		break;
2311290001Sglebius	case DOIO_HARD:
2312290001Sglebius	case DOIO_SUCCESS:
2313290001Sglebius		send_senddone_event(sock, &dev);
2314290001Sglebius		break;
2315290001Sglebius	}
2316290001Sglebius
2317290001Sglebius done:
2318290001Sglebius	maybe_free_socket(&sock, __LINE__);
2319290001Sglebius}
2320290001Sglebius
2321290001Sglebius/*
2322290001Sglebius * These return if the done event passed in is on the list (or for connect, is
2323290001Sglebius * the one we're waiting for.  Using these ensures we will not double-send an
2324290001Sglebius * event.
2325290001Sglebius */
2326290001Sglebiusstatic isc_boolean_t
2327290001Sglebiussenddone_is_active(isc_socket_t *sock, isc_socketevent_t *dev)
2328290001Sglebius{
2329290001Sglebius	isc_socketevent_t *ldev;
2330290001Sglebius
2331290001Sglebius	ldev = ISC_LIST_HEAD(sock->send_list);
2332290001Sglebius	while (ldev != NULL && ldev != dev)
2333290001Sglebius		ldev = ISC_LIST_NEXT(ldev, ev_link);
2334290001Sglebius
2335290001Sglebius	return (ldev == NULL ? ISC_FALSE : ISC_TRUE);
2336290001Sglebius}
2337290001Sglebius
2338290001Sglebiusstatic isc_boolean_t
2339290001Sglebiusacceptdone_is_active(isc_socket_t *sock, isc_socket_newconnev_t *dev)
2340290001Sglebius{
2341290001Sglebius	isc_socket_newconnev_t *ldev;
2342290001Sglebius
2343290001Sglebius	ldev = ISC_LIST_HEAD(sock->accept_list);
2344290001Sglebius	while (ldev != NULL && ldev != dev)
2345290001Sglebius		ldev = ISC_LIST_NEXT(ldev, ev_link);
2346290001Sglebius
2347290001Sglebius	return (ldev == NULL ? ISC_FALSE : ISC_TRUE);
2348290001Sglebius}
2349290001Sglebius
2350290001Sglebiusstatic isc_boolean_t
2351290001Sglebiusconnectdone_is_active(isc_socket_t *sock, isc_socket_connev_t *dev)
2352290001Sglebius{
2353290001Sglebius	return (sock->connect_ev == dev ? ISC_TRUE : ISC_FALSE);
2354290001Sglebius}
2355290001Sglebius
2356290001Sglebius//
2357290001Sglebius// The Windows network stack seems to have two very distinct paths depending
2358290001Sglebius// on what is installed.  Specifically, if something is looking at network
2359290001Sglebius// connections (like an anti-virus or anti-malware application, such as
2360290001Sglebius// McAfee products) Windows may return additional error conditions which
2361290001Sglebius// were not previously returned.
2362290001Sglebius//
2363290001Sglebius// One specific one is when a TCP SYN scan is used.  In this situation,
2364290001Sglebius// Windows responds with the SYN-ACK, but the scanner never responds with
2365290001Sglebius// the 3rd packet, the ACK.  Windows consiers this a partially open connection.
2366290001Sglebius// Most Unix networking stacks, and Windows without McAfee installed, will
2367290001Sglebius// not return this to the caller.  However, with this product installed,
2368290001Sglebius// Windows returns this as a failed status on the Accept() call.  Here, we
2369290001Sglebius// will just re-issue the ISCAcceptEx() call as if nothing had happened.
2370290001Sglebius//
2371290001Sglebius// This code should only be called when the listening socket has received
2372290001Sglebius// such an error.  Additionally, the "parent" socket must be locked.
2373290001Sglebius// Additionally, the lpo argument is re-used here, and must not be freed
2374290001Sglebius// by the caller.
2375290001Sglebius//
2376290001Sglebiusstatic isc_result_t
2377290001Sglebiusrestart_accept(isc_socket_t *parent, IoCompletionInfo *lpo)
2378290001Sglebius{
2379290001Sglebius	isc_socket_t *nsock = lpo->adev->newsocket;
2380290001Sglebius	SOCKET new_fd;
2381290001Sglebius
2382290001Sglebius	/*
2383290001Sglebius	 * AcceptEx() requires we pass in a socket.  Note that we carefully
2384290001Sglebius	 * do not close the previous socket in case of an error message returned by
2385290001Sglebius	 * our new socket() call.  If we return an error here, our caller will
2386290001Sglebius	 * clean up.
2387290001Sglebius	 */
2388290001Sglebius	new_fd = socket(parent->pf, SOCK_STREAM, IPPROTO_TCP);
2389290001Sglebius	if (nsock->fd == INVALID_SOCKET) {
2390290001Sglebius		return (ISC_R_FAILURE); // parent will ask windows for error message
2391290001Sglebius	}
2392290001Sglebius	closesocket(nsock->fd);
2393290001Sglebius	nsock->fd = new_fd;
2394290001Sglebius
2395290001Sglebius	memset(&lpo->overlapped, 0, sizeof(lpo->overlapped));
2396290001Sglebius
2397290001Sglebius	ISCAcceptEx(parent->fd,
2398290001Sglebius		    nsock->fd,				/* Accepted Socket */
2399290001Sglebius		    lpo->acceptbuffer,			/* Buffer for initial Recv */
2400290001Sglebius		    0,					/* Length of Buffer */
2401290001Sglebius		    sizeof(SOCKADDR_STORAGE) + 16,	/* Local address length + 16 */
2402290001Sglebius		    sizeof(SOCKADDR_STORAGE) + 16,	/* Remote address lengh + 16 */
2403290001Sglebius		    (LPDWORD)&lpo->received_bytes,	/* Bytes Recved */
2404290001Sglebius		    (LPOVERLAPPED)lpo			/* Overlapped structure */
2405290001Sglebius		    );
2406290001Sglebius
2407290001Sglebius	InterlockedDecrement(&nsock->manager->iocp_total);
2408290001Sglebius	iocompletionport_update(nsock);
2409290001Sglebius
2410290001Sglebius	return (ISC_R_SUCCESS);
2411290001Sglebius}
2412290001Sglebius
2413290001Sglebius/*
2414290001Sglebius * This is the I/O Completion Port Worker Function. It loops forever
2415290001Sglebius * waiting for I/O to complete and then forwards them for further
2416290001Sglebius * processing. There are a number of these in separate threads.
2417290001Sglebius */
2418290001Sglebiusstatic isc_threadresult_t WINAPI
2419290001SglebiusSocketIoThread(LPVOID ThreadContext) {
2420290001Sglebius	isc_socketmgr_t *manager = ThreadContext;
2421290001Sglebius	BOOL bSuccess = FALSE;
2422290001Sglebius	DWORD nbytes;
2423290001Sglebius	IoCompletionInfo *lpo = NULL;
2424290001Sglebius	isc_socket_t *sock = NULL;
2425290001Sglebius	int request;
2426290001Sglebius	struct msghdr *messagehdr = NULL;
2427290001Sglebius	int errval;
2428290001Sglebius	char strbuf[ISC_STRERRORSIZE];
2429290001Sglebius	int errstatus;
2430290001Sglebius
2431290001Sglebius	REQUIRE(VALID_MANAGER(manager));
2432290001Sglebius
2433290001Sglebius	/*
2434290001Sglebius	 * Set the thread priority high enough so I/O will
2435290001Sglebius	 * preempt normal recv packet processing, but not
2436290001Sglebius	 * higher than the timer sync thread.
2437290001Sglebius	 */
2438290001Sglebius	if (!SetThreadPriority(GetCurrentThread(),
2439290001Sglebius			       THREAD_PRIORITY_ABOVE_NORMAL)) {
2440290001Sglebius		errval = GetLastError();
2441290001Sglebius		isc__strerror(errval, strbuf, sizeof(strbuf));
2442290001Sglebius		FATAL_ERROR(__FILE__, __LINE__,
2443290001Sglebius				isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
2444290001Sglebius				ISC_MSG_FAILED,
2445290001Sglebius				"Can't set thread priority: %s"),
2446290001Sglebius				strbuf);
2447290001Sglebius	}
2448290001Sglebius
2449290001Sglebius	/*
2450290001Sglebius	 * Loop forever waiting on I/O Completions and then processing them
2451290001Sglebius	 */
2452290001Sglebius	while (TRUE) {
2453290001Sglebius		wait_again:
2454290001Sglebius		bSuccess = GetQueuedCompletionStatus(manager->hIoCompletionPort,
2455290001Sglebius						     &nbytes, (LPDWORD)&sock,
2456290001Sglebius						     (LPWSAOVERLAPPED *)&lpo,
2457290001Sglebius						     INFINITE);
2458290001Sglebius		if (lpo == NULL) /* Received request to exit */
2459290001Sglebius			break;
2460290001Sglebius
2461290001Sglebius		REQUIRE(VALID_SOCKET(sock));
2462290001Sglebius
2463290001Sglebius		request = lpo->request_type;
2464290001Sglebius
2465290001Sglebius		errstatus = 0;
2466290001Sglebius		if (!bSuccess) {
2467290001Sglebius			isc_result_t isc_result;
2468290001Sglebius
2469290001Sglebius			/*
2470290001Sglebius			 * Did the I/O operation complete?
2471290001Sglebius			 */
2472290001Sglebius			errstatus = GetLastError();
2473290001Sglebius			isc_result = isc__errno2resultx(errstatus, __FILE__, __LINE__);
2474290001Sglebius
2475290001Sglebius			LOCK(&sock->lock);
2476290001Sglebius			CONSISTENT(sock);
2477290001Sglebius			switch (request) {
2478290001Sglebius			case SOCKET_RECV:
2479290001Sglebius				INSIST(sock->pending_iocp > 0);
2480290001Sglebius				sock->pending_iocp--;
2481290001Sglebius				INSIST(sock->pending_recv > 0);
2482290001Sglebius				sock->pending_recv--;
2483290001Sglebius				if (!sock->connected &&
2484290001Sglebius				    ((errstatus == ERROR_HOST_UNREACHABLE) ||
2485290001Sglebius				     (errstatus == WSAENETRESET) ||
2486290001Sglebius				     (errstatus == WSAECONNRESET))) {
2487290001Sglebius					/* ignore soft errors */
2488290001Sglebius					queue_receive_request(sock);
2489290001Sglebius					break;
2490290001Sglebius				}
2491290001Sglebius				send_recvdone_abort(sock, isc_result);
2492290001Sglebius				if (isc_result == ISC_R_UNEXPECTED) {
2493290001Sglebius					UNEXPECTED_ERROR(__FILE__, __LINE__,
2494290001Sglebius						"SOCKET_RECV: Windows error code: %d, returning ISC error %d",
2495290001Sglebius						errstatus, isc_result);
2496290001Sglebius				}
2497290001Sglebius				break;
2498290001Sglebius
2499290001Sglebius			case SOCKET_SEND:
2500290001Sglebius				INSIST(sock->pending_iocp > 0);
2501290001Sglebius				sock->pending_iocp--;
2502290001Sglebius				INSIST(sock->pending_send > 0);
2503290001Sglebius				sock->pending_send--;
2504290001Sglebius				if (senddone_is_active(sock, lpo->dev)) {
2505290001Sglebius					lpo->dev->result = isc_result;
2506290001Sglebius					socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2507290001Sglebius						"canceled_send");
2508290001Sglebius					send_senddone_event(sock, &lpo->dev);
2509290001Sglebius				}
2510290001Sglebius				break;
2511290001Sglebius
2512290001Sglebius			case SOCKET_ACCEPT:
2513290001Sglebius				INSIST(sock->pending_iocp > 0);
2514290001Sglebius				INSIST(sock->pending_accept > 0);
2515290001Sglebius
2516290001Sglebius				socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2517290001Sglebius					"Accept: errstatus=%d isc_result=%d", errstatus, isc_result);
2518290001Sglebius
2519290001Sglebius				if (acceptdone_is_active(sock, lpo->adev)) {
2520290001Sglebius					if (restart_accept(sock, lpo) == ISC_R_SUCCESS) {
2521290001Sglebius						UNLOCK(&sock->lock);
2522290001Sglebius						goto wait_again;
2523290001Sglebius					} else {
2524290001Sglebius						errstatus = GetLastError();
2525290001Sglebius						isc_result = isc__errno2resultx(errstatus, __FILE__, __LINE__);
2526290001Sglebius						socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2527290001Sglebius							"restart_accept() failed: errstatus=%d isc_result=%d",
2528290001Sglebius							errstatus, isc_result);
2529290001Sglebius					}
2530290001Sglebius				}
2531290001Sglebius
2532290001Sglebius				sock->pending_iocp--;
2533290001Sglebius				sock->pending_accept--;
2534290001Sglebius				if (acceptdone_is_active(sock, lpo->adev)) {
2535290001Sglebius					closesocket(lpo->adev->newsocket->fd);
2536290001Sglebius					lpo->adev->newsocket->fd = INVALID_SOCKET;
2537290001Sglebius					lpo->adev->newsocket->references--;
2538290001Sglebius					free_socket(&lpo->adev->newsocket, __LINE__);
2539290001Sglebius					lpo->adev->result = isc_result;
2540290001Sglebius					socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2541290001Sglebius						"canceled_accept");
2542290001Sglebius					send_acceptdone_event(sock, &lpo->adev);
2543290001Sglebius				}
2544290001Sglebius				break;
2545290001Sglebius
2546290001Sglebius			case SOCKET_CONNECT:
2547290001Sglebius				INSIST(sock->pending_iocp > 0);
2548290001Sglebius				sock->pending_iocp--;
2549290001Sglebius				INSIST(sock->pending_connect == 1);
2550290001Sglebius				sock->pending_connect = 0;
2551290001Sglebius				if (connectdone_is_active(sock, lpo->cdev)) {
2552290001Sglebius					lpo->cdev->result = isc_result;
2553290001Sglebius					socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2554290001Sglebius						"canceled_connect");
2555290001Sglebius					send_connectdone_event(sock, &lpo->cdev);
2556290001Sglebius				}
2557290001Sglebius				break;
2558290001Sglebius			}
2559290001Sglebius			maybe_free_socket(&sock, __LINE__);
2560290001Sglebius
2561290001Sglebius			if (lpo != NULL)
2562290001Sglebius				HeapFree(hHeapHandle, 0, lpo);
2563290001Sglebius			continue;
2564290001Sglebius		}
2565290001Sglebius
2566290001Sglebius		messagehdr = &lpo->messagehdr;
2567290001Sglebius
2568290001Sglebius		switch (request) {
2569290001Sglebius		case SOCKET_RECV:
2570290001Sglebius			internal_recv(sock, nbytes);
2571290001Sglebius			break;
2572290001Sglebius		case SOCKET_SEND:
2573290001Sglebius			internal_send(sock, lpo->dev, messagehdr, nbytes, errstatus, lpo);
2574290001Sglebius			break;
2575290001Sglebius		case SOCKET_ACCEPT:
2576290001Sglebius			internal_accept(sock, lpo, errstatus);
2577290001Sglebius			break;
2578290001Sglebius		case SOCKET_CONNECT:
2579290001Sglebius			internal_connect(sock, lpo, errstatus);
2580290001Sglebius			break;
2581290001Sglebius		}
2582290001Sglebius
2583290001Sglebius		if (lpo != NULL)
2584290001Sglebius			HeapFree(hHeapHandle, 0, lpo);
2585290001Sglebius	}
2586290001Sglebius
2587290001Sglebius	/*
2588290001Sglebius	 * Exit Completion Port Thread
2589290001Sglebius	 */
2590290001Sglebius	manager_log(manager, TRACE,
2591290001Sglebius		    isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2592290001Sglebius				   ISC_MSG_EXITING, "SocketIoThread exiting"));
2593290001Sglebius	return ((isc_threadresult_t)0);
2594290001Sglebius}
2595290001Sglebius
2596290001Sglebius/*
2597290001Sglebius * Create a new socket manager.
2598290001Sglebius */
2599290001Sglebiusisc_result_t
2600290001Sglebiusisc__socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) {
2601290001Sglebius	return (isc_socketmgr_create2(mctx, managerp, 0));
2602290001Sglebius}
2603290001Sglebius
2604290001Sglebiusisc_result_t
2605290001Sglebiusisc__socketmgr_create2(isc_mem_t *mctx, isc_socketmgr_t **managerp,
2606290001Sglebius		       unsigned int maxsocks)
2607290001Sglebius{
2608290001Sglebius	isc_socketmgr_t *manager;
2609290001Sglebius	isc_result_t result;
2610290001Sglebius
2611290001Sglebius	REQUIRE(managerp != NULL && *managerp == NULL);
2612290001Sglebius
2613290001Sglebius	if (maxsocks != 0)
2614290001Sglebius		return (ISC_R_NOTIMPLEMENTED);
2615290001Sglebius
2616290001Sglebius	manager = isc_mem_get(mctx, sizeof(*manager));
2617290001Sglebius	if (manager == NULL)
2618290001Sglebius		return (ISC_R_NOMEMORY);
2619290001Sglebius
2620290001Sglebius	InitSockets();
2621290001Sglebius
2622290001Sglebius	manager->magic = SOCKET_MANAGER_MAGIC;
2623290001Sglebius	manager->mctx = NULL;
2624290001Sglebius	manager->stats = NULL;
2625290001Sglebius	ISC_LIST_INIT(manager->socklist);
2626290001Sglebius	result = isc_mutex_init(&manager->lock);
2627290001Sglebius	if (result != ISC_R_SUCCESS) {
2628290001Sglebius		isc_mem_put(mctx, manager, sizeof(*manager));
2629290001Sglebius		return (result);
2630290001Sglebius	}
2631290001Sglebius	if (isc_condition_init(&manager->shutdown_ok) != ISC_R_SUCCESS) {
2632290001Sglebius		DESTROYLOCK(&manager->lock);
2633290001Sglebius		isc_mem_put(mctx, manager, sizeof(*manager));
2634290001Sglebius		UNEXPECTED_ERROR(__FILE__, __LINE__,
2635290001Sglebius				 "isc_condition_init() %s",
2636290001Sglebius				 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2637290001Sglebius						ISC_MSG_FAILED, "failed"));
2638290001Sglebius		return (ISC_R_UNEXPECTED);
2639290001Sglebius	}
2640290001Sglebius
2641290001Sglebius	isc_mem_attach(mctx, &manager->mctx);
2642290001Sglebius
2643290001Sglebius	iocompletionport_init(manager);	/* Create the Completion Ports */
2644290001Sglebius
2645290001Sglebius	manager->bShutdown = ISC_FALSE;
2646290001Sglebius	manager->totalSockets = 0;
2647290001Sglebius	manager->iocp_total = 0;
2648290001Sglebius
2649290001Sglebius	*managerp = manager;
2650290001Sglebius
2651290001Sglebius	return (ISC_R_SUCCESS);
2652290001Sglebius}
2653290001Sglebius
2654290001Sglebiusisc_result_t
2655290001Sglebiusisc__socketmgr_getmaxsockets(isc_socketmgr_t *manager, unsigned int *nsockp) {
2656290001Sglebius	REQUIRE(VALID_MANAGER(manager));
2657290001Sglebius	REQUIRE(nsockp != NULL);
2658290001Sglebius
2659290001Sglebius	return (ISC_R_NOTIMPLEMENTED);
2660290001Sglebius}
2661290001Sglebius
2662290001Sglebiusvoid
2663290001Sglebiusisc__socketmgr_setstats(isc_socketmgr_t *manager, isc_stats_t *stats) {
2664290001Sglebius	REQUIRE(VALID_MANAGER(manager));
2665290001Sglebius	REQUIRE(ISC_LIST_EMPTY(manager->socklist));
2666290001Sglebius	REQUIRE(manager->stats == NULL);
2667290001Sglebius	REQUIRE(isc_stats_ncounters(stats) == isc_sockstatscounter_max);
2668290001Sglebius
2669290001Sglebius	isc_stats_attach(stats, &manager->stats);
2670290001Sglebius}
2671290001Sglebius
2672290001Sglebiusvoid
2673290001Sglebiusisc__socketmgr_destroy(isc_socketmgr_t **managerp) {
2674290001Sglebius	isc_socketmgr_t *manager;
2675290001Sglebius	int i;
2676290001Sglebius	isc_mem_t *mctx;
2677290001Sglebius
2678290001Sglebius	/*
2679290001Sglebius	 * Destroy a socket manager.
2680290001Sglebius	 */
2681290001Sglebius
2682290001Sglebius	REQUIRE(managerp != NULL);
2683290001Sglebius	manager = *managerp;
2684290001Sglebius	REQUIRE(VALID_MANAGER(manager));
2685290001Sglebius
2686290001Sglebius	LOCK(&manager->lock);
2687290001Sglebius
2688290001Sglebius	/*
2689290001Sglebius	 * Wait for all sockets to be destroyed.
2690290001Sglebius	 */
2691290001Sglebius	while (!ISC_LIST_EMPTY(manager->socklist)) {
2692290001Sglebius		manager_log(manager, CREATION,
2693290001Sglebius			    isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET,
2694290001Sglebius					   ISC_MSG_SOCKETSREMAIN,
2695290001Sglebius					   "sockets exist"));
2696290001Sglebius		WAIT(&manager->shutdown_ok, &manager->lock);
2697290001Sglebius	}
2698290001Sglebius
2699290001Sglebius	UNLOCK(&manager->lock);
2700290001Sglebius
2701290001Sglebius	/*
2702290001Sglebius	 * Here, we need to had some wait code for the completion port
2703290001Sglebius	 * thread.
2704290001Sglebius	 */
2705290001Sglebius	signal_iocompletionport_exit(manager);
2706290001Sglebius	manager->bShutdown = ISC_TRUE;
2707290001Sglebius
2708290001Sglebius	/*
2709290001Sglebius	 * Wait for threads to exit.
2710290001Sglebius	 */
2711290001Sglebius	for (i = 0; i < manager->maxIOCPThreads; i++) {
2712290001Sglebius		if (isc_thread_join((isc_thread_t) manager->hIOCPThreads[i],
2713290001Sglebius			NULL) != ISC_R_SUCCESS)
2714290001Sglebius			UNEXPECTED_ERROR(__FILE__, __LINE__,
2715290001Sglebius				 "isc_thread_join() for Completion Port %s",
2716290001Sglebius				 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
2717290001Sglebius						ISC_MSG_FAILED, "failed"));
2718290001Sglebius	}
2719290001Sglebius	/*
2720290001Sglebius	 * Clean up.
2721290001Sglebius	 */
2722290001Sglebius
2723290001Sglebius	CloseHandle(manager->hIoCompletionPort);
2724290001Sglebius
2725290001Sglebius	(void)isc_condition_destroy(&manager->shutdown_ok);
2726290001Sglebius
2727290001Sglebius	DESTROYLOCK(&manager->lock);
2728290001Sglebius	if (manager->stats != NULL)
2729290001Sglebius		isc_stats_detach(&manager->stats);
2730290001Sglebius	manager->magic = 0;
2731290001Sglebius	mctx= manager->mctx;
2732290001Sglebius	isc_mem_put(mctx, manager, sizeof(*manager));
2733290001Sglebius
2734290001Sglebius	isc_mem_detach(&mctx);
2735290001Sglebius
2736290001Sglebius	*managerp = NULL;
2737290001Sglebius}
2738290001Sglebius
2739290001Sglebiusstatic void
2740290001Sglebiusqueue_receive_event(isc_socket_t *sock, isc_task_t *task, isc_socketevent_t *dev)
2741290001Sglebius{
2742290001Sglebius	isc_task_t *ntask = NULL;
2743290001Sglebius
2744290001Sglebius	isc_task_attach(task, &ntask);
2745290001Sglebius	dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED;
2746290001Sglebius
2747290001Sglebius	/*
2748290001Sglebius	 * Enqueue the request.
2749290001Sglebius	 */
2750290001Sglebius	INSIST(!ISC_LINK_LINKED(dev, ev_link));
2751290001Sglebius	ISC_LIST_ENQUEUE(sock->recv_list, dev, ev_link);
2752290001Sglebius
2753290001Sglebius	socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
2754290001Sglebius		   "queue_receive_event: event %p -> task %p",
2755290001Sglebius		   dev, ntask);
2756290001Sglebius}
2757290001Sglebius
2758290001Sglebius/*
2759290001Sglebius * Check the pending receive queue, and if we have data pending, give it to this
2760290001Sglebius * caller.  If we have none, queue an I/O request.  If this caller is not the first
2761290001Sglebius * on the list, then we will just queue this event and return.
2762290001Sglebius *
2763290001Sglebius * Caller must have the socket locked.
2764290001Sglebius */
2765290001Sglebiusstatic isc_result_t
2766290001Sglebiussocket_recv(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task,
2767290001Sglebius	    unsigned int flags)
2768290001Sglebius{
2769290001Sglebius	int cc = 0;
2770290001Sglebius	isc_task_t *ntask = NULL;
2771290001Sglebius	isc_result_t result = ISC_R_SUCCESS;
2772290001Sglebius	int recv_errno = 0;
2773290001Sglebius
2774290001Sglebius	dev->ev_sender = task;
2775290001Sglebius
2776290001Sglebius	if (sock->fd == INVALID_SOCKET)
2777290001Sglebius		return (ISC_R_EOF);
2778290001Sglebius
2779290001Sglebius	/*
2780290001Sglebius	 * Queue our event on the list of things to do.  Call our function to
2781290001Sglebius	 * attempt to fill buffers as much as possible, and return done events.
2782290001Sglebius	 * We are going to lie about our handling of the ISC_SOCKFLAG_IMMEDIATE
2783290001Sglebius	 * here and tell our caller that we could not satisfy it immediately.
2784290001Sglebius	 */
2785290001Sglebius	queue_receive_event(sock, task, dev);
2786290001Sglebius	if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0)
2787290001Sglebius		result = ISC_R_INPROGRESS;
2788290001Sglebius
2789290001Sglebius	completeio_recv(sock);
2790290001Sglebius
2791290001Sglebius	/*
2792290001Sglebius	 * If there are more receivers waiting for data, queue another receive
2793290001Sglebius	 * here.  If the
2794290001Sglebius	 */
2795290001Sglebius	queue_receive_request(sock);
2796290001Sglebius
2797290001Sglebius	return (result);
2798290001Sglebius}
2799290001Sglebius
2800290001Sglebiusisc_result_t
2801290001Sglebiusisc__socket_recvv(isc_socket_t *sock, isc_bufferlist_t *buflist,
2802290001Sglebius		 unsigned int minimum, isc_task_t *task,
2803290001Sglebius		 isc_taskaction_t action, const void *arg)
2804290001Sglebius{
2805290001Sglebius	isc_socketevent_t *dev;
2806290001Sglebius	isc_socketmgr_t *manager;
2807290001Sglebius	unsigned int iocount;
2808290001Sglebius	isc_buffer_t *buffer;
2809290001Sglebius	isc_result_t ret;
2810290001Sglebius
2811290001Sglebius	REQUIRE(VALID_SOCKET(sock));
2812290001Sglebius	LOCK(&sock->lock);
2813290001Sglebius	CONSISTENT(sock);
2814290001Sglebius
2815290001Sglebius	/*
2816290001Sglebius	 * Make sure that the socket is not closed.  XXXMLG change error here?
2817290001Sglebius	 */
2818290001Sglebius	if (sock->fd == INVALID_SOCKET) {
2819290001Sglebius		UNLOCK(&sock->lock);
2820290001Sglebius		return (ISC_R_CONNREFUSED);
2821290001Sglebius	}
2822290001Sglebius
2823290001Sglebius	REQUIRE(buflist != NULL);
2824290001Sglebius	REQUIRE(!ISC_LIST_EMPTY(*buflist));
2825290001Sglebius	REQUIRE(task != NULL);
2826290001Sglebius	REQUIRE(action != NULL);
2827290001Sglebius
2828290001Sglebius	manager = sock->manager;
2829290001Sglebius	REQUIRE(VALID_MANAGER(manager));
2830290001Sglebius
2831290001Sglebius	iocount = isc_bufferlist_availablecount(buflist);
2832290001Sglebius	REQUIRE(iocount > 0);
2833290001Sglebius
2834290001Sglebius	INSIST(sock->bound);
2835290001Sglebius
2836290001Sglebius	dev = allocate_socketevent(sock, ISC_SOCKEVENT_RECVDONE, action, arg);
2837290001Sglebius	if (dev == NULL) {
2838290001Sglebius		UNLOCK(&sock->lock);
2839290001Sglebius		return (ISC_R_NOMEMORY);
2840290001Sglebius	}
2841290001Sglebius
2842290001Sglebius	/*
2843290001Sglebius	 * UDP sockets are always partial read
2844290001Sglebius	 */
2845290001Sglebius	if (sock->type == isc_sockettype_udp)
2846290001Sglebius		dev->minimum = 1;
2847290001Sglebius	else {
2848290001Sglebius		if (minimum == 0)
2849290001Sglebius			dev->minimum = iocount;
2850290001Sglebius		else
2851290001Sglebius			dev->minimum = minimum;
2852290001Sglebius	}
2853290001Sglebius
2854290001Sglebius	/*
2855290001Sglebius	 * Move each buffer from the passed in list to our internal one.
2856290001Sglebius	 */
2857290001Sglebius	buffer = ISC_LIST_HEAD(*buflist);
2858290001Sglebius	while (buffer != NULL) {
2859290001Sglebius		ISC_LIST_DEQUEUE(*buflist, buffer, link);
2860290001Sglebius		ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link);
2861290001Sglebius		buffer = ISC_LIST_HEAD(*buflist);
2862290001Sglebius	}
2863290001Sglebius
2864290001Sglebius	ret = socket_recv(sock, dev, task, 0);
2865290001Sglebius
2866290001Sglebius	UNLOCK(&sock->lock);
2867290001Sglebius	return (ret);
2868290001Sglebius}
2869290001Sglebius
2870290001Sglebiusisc_result_t
2871290001Sglebiusisc__socket_recv(isc_socket_t *sock, isc_region_t *region,
2872290001Sglebius		 unsigned int minimum, isc_task_t *task,
2873290001Sglebius		 isc_taskaction_t action, const void *arg)
2874290001Sglebius{
2875290001Sglebius	isc_socketevent_t *dev;
2876290001Sglebius	isc_socketmgr_t *manager;
2877290001Sglebius	isc_result_t ret;
2878290001Sglebius
2879290001Sglebius	REQUIRE(VALID_SOCKET(sock));
2880290001Sglebius	LOCK(&sock->lock);
2881290001Sglebius	CONSISTENT(sock);
2882290001Sglebius
2883290001Sglebius	/*
2884290001Sglebius	 * make sure that the socket's not closed
2885290001Sglebius	 */
2886290001Sglebius	if (sock->fd == INVALID_SOCKET) {
2887290001Sglebius		UNLOCK(&sock->lock);
2888290001Sglebius		return (ISC_R_CONNREFUSED);
2889290001Sglebius	}
2890290001Sglebius	REQUIRE(action != NULL);
2891290001Sglebius
2892290001Sglebius	manager = sock->manager;
2893290001Sglebius	REQUIRE(VALID_MANAGER(manager));
2894290001Sglebius
2895290001Sglebius	INSIST(sock->bound);
2896290001Sglebius
2897290001Sglebius	dev = allocate_socketevent(sock, ISC_SOCKEVENT_RECVDONE, action, arg);
2898290001Sglebius	if (dev == NULL) {
2899290001Sglebius		UNLOCK(&sock->lock);
2900290001Sglebius		return (ISC_R_NOMEMORY);
2901290001Sglebius	}
2902290001Sglebius
2903290001Sglebius	ret = isc_socket_recv2(sock, region, minimum, task, dev, 0);
2904290001Sglebius	UNLOCK(&sock->lock);
2905290001Sglebius	return (ret);
2906290001Sglebius}
2907290001Sglebius
2908290001Sglebiusisc_result_t
2909290001Sglebiusisc__socket_recv2(isc_socket_t *sock, isc_region_t *region,
2910290001Sglebius		  unsigned int minimum, isc_task_t *task,
2911290001Sglebius		  isc_socketevent_t *event, unsigned int flags)
2912290001Sglebius{
2913290001Sglebius	isc_result_t ret;
2914290001Sglebius
2915290001Sglebius	REQUIRE(VALID_SOCKET(sock));
2916290001Sglebius	LOCK(&sock->lock);
2917290001Sglebius	CONSISTENT(sock);
2918290001Sglebius
2919290001Sglebius	event->result = ISC_R_UNEXPECTED;
2920290001Sglebius	event->ev_sender = sock;
2921290001Sglebius	/*
2922290001Sglebius	 * make sure that the socket's not closed
2923290001Sglebius	 */
2924290001Sglebius	if (sock->fd == INVALID_SOCKET) {
2925290001Sglebius		UNLOCK(&sock->lock);
2926290001Sglebius		return (ISC_R_CONNREFUSED);
2927290001Sglebius	}
2928290001Sglebius
2929290001Sglebius	ISC_LIST_INIT(event->bufferlist);
2930290001Sglebius	event->region = *region;
2931290001Sglebius	event->n = 0;
2932290001Sglebius	event->offset = 0;
2933290001Sglebius	event->attributes = 0;
2934290001Sglebius
2935290001Sglebius	/*
2936290001Sglebius	 * UDP sockets are always partial read.
2937290001Sglebius	 */
2938290001Sglebius	if (sock->type == isc_sockettype_udp)
2939290001Sglebius		event->minimum = 1;
2940290001Sglebius	else {
2941290001Sglebius		if (minimum == 0)
2942290001Sglebius			event->minimum = region->length;
2943290001Sglebius		else
2944290001Sglebius			event->minimum = minimum;
2945290001Sglebius	}
2946290001Sglebius
2947290001Sglebius	ret = socket_recv(sock, event, task, flags);
2948290001Sglebius	UNLOCK(&sock->lock);
2949290001Sglebius	return (ret);
2950290001Sglebius}
2951290001Sglebius
2952290001Sglebius/*
2953290001Sglebius * Caller must have the socket locked.
2954290001Sglebius */
2955290001Sglebiusstatic isc_result_t
2956290001Sglebiussocket_send(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task,
2957290001Sglebius	    isc_sockaddr_t *address, struct in6_pktinfo *pktinfo,
2958290001Sglebius	    unsigned int flags)
2959290001Sglebius{
2960290001Sglebius	int io_state;
2961290001Sglebius	int send_errno = 0;
2962290001Sglebius	int cc = 0;
2963290001Sglebius	isc_task_t *ntask = NULL;
2964290001Sglebius	isc_result_t result = ISC_R_SUCCESS;
2965290001Sglebius
2966290001Sglebius	dev->ev_sender = task;
2967290001Sglebius
2968290001Sglebius	set_dev_address(address, sock, dev);
2969290001Sglebius	if (pktinfo != NULL) {
2970290001Sglebius		socket_log(__LINE__, sock, NULL, TRACE, isc_msgcat, ISC_MSGSET_SOCKET,
2971290001Sglebius			   ISC_MSG_PKTINFOPROVIDED,
2972290001Sglebius			   "pktinfo structure provided, ifindex %u (set to 0)",
2973290001Sglebius			   pktinfo->ipi6_ifindex);
2974290001Sglebius
2975290001Sglebius		dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO;
2976290001Sglebius		dev->pktinfo = *pktinfo;
2977290001Sglebius		/*
2978290001Sglebius		 * Set the pktinfo index to 0 here, to let the kernel decide
2979290001Sglebius		 * what interface it should send on.
2980290001Sglebius		 */
2981290001Sglebius		dev->pktinfo.ipi6_ifindex = 0;
2982290001Sglebius	}
2983290001Sglebius
2984290001Sglebius	io_state = startio_send(sock, dev, &cc, &send_errno);
2985290001Sglebius	switch (io_state) {
2986290001Sglebius	case DOIO_PENDING:	/* I/O started. Nothing more to do */
2987290001Sglebius	case DOIO_SOFT:
2988290001Sglebius		/*
2989290001Sglebius		 * We couldn't send all or part of the request right now, so
2990290001Sglebius		 * queue it unless ISC_SOCKFLAG_NORETRY is set.
2991290001Sglebius		 */
2992290001Sglebius		if ((flags & ISC_SOCKFLAG_NORETRY) == 0) {
2993290001Sglebius			isc_task_attach(task, &ntask);
2994290001Sglebius			dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED;
2995290001Sglebius
2996290001Sglebius			/*
2997290001Sglebius			 * Enqueue the request.
2998290001Sglebius			 */
2999290001Sglebius			INSIST(!ISC_LINK_LINKED(dev, ev_link));
3000290001Sglebius			ISC_LIST_ENQUEUE(sock->send_list, dev, ev_link);
3001290001Sglebius
3002290001Sglebius			socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0,
3003290001Sglebius				   "socket_send: event %p -> task %p",
3004290001Sglebius				   dev, ntask);
3005290001Sglebius
3006290001Sglebius			if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0)
3007290001Sglebius				result = ISC_R_INPROGRESS;
3008290001Sglebius			break;
3009290001Sglebius		}
3010290001Sglebius
3011290001Sglebius	case DOIO_SUCCESS:
3012290001Sglebius		break;
3013290001Sglebius	}
3014290001Sglebius
3015290001Sglebius	return (result);
3016290001Sglebius}
3017290001Sglebius
3018290001Sglebiusisc_result_t
3019290001Sglebiusisc__socket_send(isc_socket_t *sock, isc_region_t *region,
3020290001Sglebius		 isc_task_t *task, isc_taskaction_t action, const void *arg)
3021290001Sglebius{
3022290001Sglebius	/*
3023290001Sglebius	 * REQUIRE() checking is performed in isc_socket_sendto().
3024290001Sglebius	 */
3025290001Sglebius	return (isc_socket_sendto(sock, region, task, action, arg, NULL,
3026290001Sglebius				  NULL));
3027290001Sglebius}
3028290001Sglebius
3029290001Sglebiusisc_result_t
3030290001Sglebiusisc__socket_sendto(isc_socket_t *sock, isc_region_t *region,
3031290001Sglebius		   isc_task_t *task, isc_taskaction_t action, const void *arg,
3032290001Sglebius		   isc_sockaddr_t *address, struct in6_pktinfo *pktinfo)
3033290001Sglebius{
3034290001Sglebius	isc_socketevent_t *dev;
3035290001Sglebius	isc_socketmgr_t *manager;
3036290001Sglebius	isc_result_t ret;
3037290001Sglebius
3038290001Sglebius	REQUIRE(VALID_SOCKET(sock));
3039290001Sglebius	REQUIRE(sock->type != isc_sockettype_fdwatch);
3040290001Sglebius
3041290001Sglebius	LOCK(&sock->lock);
3042290001Sglebius	CONSISTENT(sock);
3043290001Sglebius
3044290001Sglebius	/*
3045290001Sglebius	 * make sure that the socket's not closed
3046290001Sglebius	 */
3047290001Sglebius	if (sock->fd == INVALID_SOCKET) {
3048290001Sglebius		UNLOCK(&sock->lock);
3049290001Sglebius		return (ISC_R_CONNREFUSED);
3050290001Sglebius	}
3051290001Sglebius	REQUIRE(region != NULL);
3052290001Sglebius	REQUIRE(task != NULL);
3053290001Sglebius	REQUIRE(action != NULL);
3054290001Sglebius
3055290001Sglebius	manager = sock->manager;
3056290001Sglebius	REQUIRE(VALID_MANAGER(manager));
3057290001Sglebius
3058290001Sglebius	INSIST(sock->bound);
3059290001Sglebius
3060290001Sglebius	dev = allocate_socketevent(sock, ISC_SOCKEVENT_SENDDONE, action, arg);
3061290001Sglebius	if (dev == NULL) {
3062290001Sglebius		UNLOCK(&sock->lock);
3063290001Sglebius		return (ISC_R_NOMEMORY);
3064290001Sglebius	}
3065290001Sglebius	dev->region = *region;
3066290001Sglebius
3067290001Sglebius	ret = socket_send(sock, dev, task, address, pktinfo, 0);
3068290001Sglebius	UNLOCK(&sock->lock);
3069290001Sglebius	return (ret);
3070290001Sglebius}
3071290001Sglebius
3072290001Sglebiusisc_result_t
3073290001Sglebiusisc__socket_sendv(isc_socket_t *sock, isc_bufferlist_t *buflist,
3074290001Sglebius		  isc_task_t *task, isc_taskaction_t action, const void *arg)
3075290001Sglebius{
3076290001Sglebius	return (isc_socket_sendtov(sock, buflist, task, action, arg, NULL,
3077290001Sglebius				   NULL));
3078290001Sglebius}
3079290001Sglebius
3080290001Sglebiusisc_result_t
3081290001Sglebiusisc__socket_sendtov(isc_socket_t *sock, isc_bufferlist_t *buflist,
3082290001Sglebius		    isc_task_t *task, isc_taskaction_t action, const void *arg,
3083290001Sglebius		    isc_sockaddr_t *address, struct in6_pktinfo *pktinfo)
3084290001Sglebius{
3085290001Sglebius	isc_socketevent_t *dev;
3086290001Sglebius	isc_socketmgr_t *manager;
3087290001Sglebius	unsigned int iocount;
3088290001Sglebius	isc_buffer_t *buffer;
3089290001Sglebius	isc_result_t ret;
3090290001Sglebius
3091290001Sglebius	REQUIRE(VALID_SOCKET(sock));
3092290001Sglebius
3093290001Sglebius	LOCK(&sock->lock);
3094290001Sglebius	CONSISTENT(sock);
3095290001Sglebius
3096290001Sglebius	/*
3097290001Sglebius	 * make sure that the socket's not closed
3098290001Sglebius	 */
3099290001Sglebius	if (sock->fd == INVALID_SOCKET) {
3100290001Sglebius		UNLOCK(&sock->lock);
3101290001Sglebius		return (ISC_R_CONNREFUSED);
3102290001Sglebius	}
3103290001Sglebius	REQUIRE(buflist != NULL);
3104290001Sglebius	REQUIRE(!ISC_LIST_EMPTY(*buflist));
3105290001Sglebius	REQUIRE(task != NULL);
3106290001Sglebius	REQUIRE(action != NULL);
3107290001Sglebius
3108290001Sglebius	manager = sock->manager;
3109290001Sglebius	REQUIRE(VALID_MANAGER(manager));
3110290001Sglebius
3111290001Sglebius	iocount = isc_bufferlist_usedcount(buflist);
3112290001Sglebius	REQUIRE(iocount > 0);
3113290001Sglebius
3114290001Sglebius	dev = allocate_socketevent(sock, ISC_SOCKEVENT_SENDDONE, action, arg);
3115290001Sglebius	if (dev == NULL) {
3116290001Sglebius		UNLOCK(&sock->lock);
3117290001Sglebius		return (ISC_R_NOMEMORY);
3118290001Sglebius	}
3119290001Sglebius
3120290001Sglebius	/*
3121290001Sglebius	 * Move each buffer from the passed in list to our internal one.
3122290001Sglebius	 */
3123290001Sglebius	buffer = ISC_LIST_HEAD(*buflist);
3124290001Sglebius	while (buffer != NULL) {
3125290001Sglebius		ISC_LIST_DEQUEUE(*buflist, buffer, link);
3126290001Sglebius		ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link);
3127290001Sglebius		buffer = ISC_LIST_HEAD(*buflist);
3128290001Sglebius	}
3129290001Sglebius
3130290001Sglebius	ret = socket_send(sock, dev, task, address, pktinfo, 0);
3131290001Sglebius	UNLOCK(&sock->lock);
3132290001Sglebius	return (ret);
3133290001Sglebius}
3134290001Sglebius
3135290001Sglebiusisc_result_t
3136290001Sglebiusisc__socket_sendto2(isc_socket_t *sock, isc_region_t *region,
3137290001Sglebius		    isc_task_t *task,
3138290001Sglebius		    isc_sockaddr_t *address, struct in6_pktinfo *pktinfo,
3139290001Sglebius		    isc_socketevent_t *event, unsigned int flags)
3140290001Sglebius{
3141290001Sglebius	isc_result_t ret;
3142290001Sglebius
3143290001Sglebius	REQUIRE(VALID_SOCKET(sock));
3144290001Sglebius	LOCK(&sock->lock);
3145290001Sglebius	CONSISTENT(sock);
3146290001Sglebius
3147290001Sglebius	REQUIRE((flags & ~(ISC_SOCKFLAG_IMMEDIATE|ISC_SOCKFLAG_NORETRY)) == 0);
3148290001Sglebius	if ((flags & ISC_SOCKFLAG_NORETRY) != 0)
3149290001Sglebius		REQUIRE(sock->type == isc_sockettype_udp);
3150290001Sglebius	event->ev_sender = sock;
3151290001Sglebius	event->result = ISC_R_UNEXPECTED;
3152290001Sglebius	/*
3153290001Sglebius	 * make sure that the socket's not closed
3154290001Sglebius	 */
3155290001Sglebius	if (sock->fd == INVALID_SOCKET) {
3156290001Sglebius		UNLOCK(&sock->lock);
3157290001Sglebius		return (ISC_R_CONNREFUSED);
3158290001Sglebius	}
3159290001Sglebius	ISC_LIST_INIT(event->bufferlist);
3160290001Sglebius	event->region = *region;
3161290001Sglebius	event->n = 0;
3162290001Sglebius	event->offset = 0;
3163290001Sglebius	event->attributes = 0;
3164290001Sglebius
3165290001Sglebius	ret = socket_send(sock, event, task, address, pktinfo, flags);
3166290001Sglebius	UNLOCK(&sock->lock);
3167290001Sglebius	return (ret);
3168290001Sglebius}
3169290001Sglebius
3170290001Sglebiusisc_result_t
3171290001Sglebiusisc__socket_bind(isc_socket_t *sock, isc_sockaddr_t *sockaddr,
3172290001Sglebius		 unsigned int options) {
3173290001Sglebius	int bind_errno;
3174290001Sglebius	char strbuf[ISC_STRERRORSIZE];
3175290001Sglebius	int on = 1;
3176290001Sglebius
3177290001Sglebius	REQUIRE(VALID_SOCKET(sock));
3178290001Sglebius	LOCK(&sock->lock);
3179290001Sglebius	CONSISTENT(sock);
3180290001Sglebius
3181290001Sglebius	/*
3182290001Sglebius	 * make sure that the socket's not closed
3183290001Sglebius	 */
3184290001Sglebius	if (sock->fd == INVALID_SOCKET) {
3185290001Sglebius		UNLOCK(&sock->lock);
3186290001Sglebius		return (ISC_R_CONNREFUSED);
3187290001Sglebius	}
3188290001Sglebius
3189290001Sglebius	INSIST(!sock->bound);
3190290001Sglebius	INSIST(!sock->dupped);
3191290001Sglebius
3192290001Sglebius	if (sock->pf != sockaddr->type.sa.sa_family) {
3193290001Sglebius		UNLOCK(&sock->lock);
3194290001Sglebius		return (ISC_R_FAMILYMISMATCH);
3195290001Sglebius	}
3196290001Sglebius	/*
3197290001Sglebius	 * Only set SO_REUSEADDR when we want a specific port.
3198290001Sglebius	 */
3199290001Sglebius	if ((options & ISC_SOCKET_REUSEADDRESS) != 0 &&
3200290001Sglebius	    isc_sockaddr_getport(sockaddr) != (in_port_t)0 &&
3201290001Sglebius	    setsockopt(sock->fd, SOL_SOCKET, SO_REUSEADDR, (char *)&on,
3202290001Sglebius		       sizeof(on)) < 0) {
3203290001Sglebius		UNEXPECTED_ERROR(__FILE__, __LINE__,
3204290001Sglebius				 "setsockopt(%d) %s", sock->fd,
3205290001Sglebius				 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL,
3206290001Sglebius						ISC_MSG_FAILED, "failed"));
3207290001Sglebius		/* Press on... */
3208290001Sglebius	}
3209290001Sglebius	if (bind(sock->fd, &sockaddr->type.sa, sockaddr->length) < 0) {
3210290001Sglebius		bind_errno = WSAGetLastError();
3211290001Sglebius		UNLOCK(&sock->lock);
3212290001Sglebius		switch (bind_errno) {
3213290001Sglebius		case WSAEACCES:
3214290001Sglebius			return (ISC_R_NOPERM);
3215290001Sglebius		case WSAEADDRNOTAVAIL:
3216290001Sglebius			return (ISC_R_ADDRNOTAVAIL);
3217290001Sglebius		case WSAEADDRINUSE:
3218290001Sglebius			return (ISC_R_ADDRINUSE);
3219290001Sglebius		case WSAEINVAL:
3220290001Sglebius			return (ISC_R_BOUND);
3221290001Sglebius		default:
3222290001Sglebius			isc__strerror(bind_errno, strbuf, sizeof(strbuf));
3223290001Sglebius			UNEXPECTED_ERROR(__FILE__, __LINE__, "bind: %s",
3224290001Sglebius					 strbuf);
3225290001Sglebius			return (ISC_R_UNEXPECTED);
3226290001Sglebius		}
3227290001Sglebius	}
3228290001Sglebius
3229290001Sglebius	socket_log(__LINE__, sock, sockaddr, TRACE,
3230290001Sglebius		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, "bound");
3231290001Sglebius	sock->bound = 1;
3232290001Sglebius
3233290001Sglebius	UNLOCK(&sock->lock);
3234290001Sglebius	return (ISC_R_SUCCESS);
3235290001Sglebius}
3236290001Sglebius
3237290001Sglebiusisc_result_t
3238290001Sglebiusisc__socket_filter(isc_socket_t *sock, const char *filter) {
3239290001Sglebius	UNUSED(sock);
3240290001Sglebius	UNUSED(filter);
3241290001Sglebius
3242290001Sglebius	REQUIRE(VALID_SOCKET(sock));
3243290001Sglebius	return (ISC_R_NOTIMPLEMENTED);
3244290001Sglebius}
3245290001Sglebius
3246290001Sglebius/*
3247290001Sglebius * Set up to listen on a given socket.  We do this by creating an internal
3248290001Sglebius * event that will be dispatched when the socket has read activity.  The
3249290001Sglebius * watcher will send the internal event to the task when there is a new
3250290001Sglebius * connection.
3251290001Sglebius *
3252290001Sglebius * Unlike in read, we don't preallocate a done event here.  Every time there
3253290001Sglebius * is a new connection we'll have to allocate a new one anyway, so we might
3254290001Sglebius * as well keep things simple rather than having to track them.
3255290001Sglebius */
3256290001Sglebiusisc_result_t
3257290001Sglebiusisc__socket_listen(isc_socket_t *sock, unsigned int backlog) {
3258290001Sglebius	char strbuf[ISC_STRERRORSIZE];
3259290001Sglebius
3260290001Sglebius	REQUIRE(VALID_SOCKET(sock));
3261290001Sglebius
3262290001Sglebius	LOCK(&sock->lock);
3263290001Sglebius	CONSISTENT(sock);
3264290001Sglebius
3265290001Sglebius	/*
3266290001Sglebius	 * make sure that the socket's not closed
3267290001Sglebius	 */
3268290001Sglebius	if (sock->fd == INVALID_SOCKET) {
3269290001Sglebius		UNLOCK(&sock->lock);
3270290001Sglebius		return (ISC_R_CONNREFUSED);
3271290001Sglebius	}
3272290001Sglebius
3273290001Sglebius	REQUIRE(!sock->listener);
3274290001Sglebius	REQUIRE(sock->bound);
3275290001Sglebius	REQUIRE(sock->type == isc_sockettype_tcp);
3276290001Sglebius
3277290001Sglebius	if (backlog == 0)
3278290001Sglebius		backlog = SOMAXCONN;
3279290001Sglebius
3280290001Sglebius	if (listen(sock->fd, (int)backlog) < 0) {
3281290001Sglebius		UNLOCK(&sock->lock);
3282290001Sglebius		isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
3283290001Sglebius
3284290001Sglebius		UNEXPECTED_ERROR(__FILE__, __LINE__, "listen: %s", strbuf);
3285290001Sglebius
3286290001Sglebius		return (ISC_R_UNEXPECTED);
3287290001Sglebius	}
3288290001Sglebius
3289290001Sglebius	socket_log(__LINE__, sock, NULL, TRACE,
3290290001Sglebius		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, "listening");
3291290001Sglebius	sock->listener = 1;
3292290001Sglebius	_set_state(sock, SOCK_LISTEN);
3293290001Sglebius
3294290001Sglebius	UNLOCK(&sock->lock);
3295290001Sglebius	return (ISC_R_SUCCESS);
3296290001Sglebius}
3297290001Sglebius
3298290001Sglebius/*
3299290001Sglebius * This should try to do aggressive accept() XXXMLG
3300290001Sglebius */
3301290001Sglebiusisc_result_t
3302290001Sglebiusisc__socket_accept(isc_socket_t *sock,
3303290001Sglebius		   isc_task_t *task, isc_taskaction_t action, const void *arg)
3304290001Sglebius{
3305290001Sglebius	isc_socket_newconnev_t *adev;
3306290001Sglebius	isc_socketmgr_t *manager;
3307290001Sglebius	isc_task_t *ntask = NULL;
3308290001Sglebius	isc_socket_t *nsock;
3309290001Sglebius	isc_result_t result;
3310290001Sglebius	IoCompletionInfo *lpo;
3311290001Sglebius
3312290001Sglebius	REQUIRE(VALID_SOCKET(sock));
3313290001Sglebius
3314290001Sglebius	manager = sock->manager;
3315290001Sglebius	REQUIRE(VALID_MANAGER(manager));
3316290001Sglebius
3317290001Sglebius	LOCK(&sock->lock);
3318290001Sglebius	CONSISTENT(sock);
3319290001Sglebius
3320290001Sglebius	/*
3321290001Sglebius	 * make sure that the socket's not closed
3322290001Sglebius	 */
3323290001Sglebius	if (sock->fd == INVALID_SOCKET) {
3324290001Sglebius		UNLOCK(&sock->lock);
3325290001Sglebius		return (ISC_R_CONNREFUSED);
3326290001Sglebius	}
3327290001Sglebius
3328290001Sglebius	REQUIRE(sock->listener);
3329290001Sglebius
3330290001Sglebius	/*
3331290001Sglebius	 * Sender field is overloaded here with the task we will be sending
3332290001Sglebius	 * this event to.  Just before the actual event is delivered the
3333290001Sglebius	 * actual ev_sender will be touched up to be the socket.
3334290001Sglebius	 */
3335290001Sglebius	adev = (isc_socket_newconnev_t *)
3336290001Sglebius		isc_event_allocate(manager->mctx, task, ISC_SOCKEVENT_NEWCONN,
3337290001Sglebius				   action, arg, sizeof(*adev));
3338290001Sglebius	if (adev == NULL) {
3339290001Sglebius		UNLOCK(&sock->lock);
3340290001Sglebius		return (ISC_R_NOMEMORY);
3341290001Sglebius	}
3342290001Sglebius	ISC_LINK_INIT(adev, ev_link);
3343290001Sglebius
3344290001Sglebius	result = allocate_socket(manager, sock->type, &nsock);
3345290001Sglebius	if (result != ISC_R_SUCCESS) {
3346290001Sglebius		isc_event_free((isc_event_t **)&adev);
3347290001Sglebius		UNLOCK(&sock->lock);
3348290001Sglebius		return (result);
3349290001Sglebius	}
3350290001Sglebius
3351290001Sglebius	/*
3352290001Sglebius	 * AcceptEx() requires we pass in a socket.
3353290001Sglebius	 */
3354290001Sglebius	nsock->fd = socket(sock->pf, SOCK_STREAM, IPPROTO_TCP);
3355290001Sglebius	if (nsock->fd == INVALID_SOCKET) {
3356290001Sglebius		free_socket(&nsock, __LINE__);
3357290001Sglebius		isc_event_free((isc_event_t **)&adev);
3358290001Sglebius		UNLOCK(&sock->lock);
3359290001Sglebius		return (ISC_R_FAILURE); // XXXMLG need real error message
3360290001Sglebius	}
3361290001Sglebius
3362290001Sglebius	/*
3363290001Sglebius	 * Attach to socket and to task.
3364290001Sglebius	 */
3365290001Sglebius	isc_task_attach(task, &ntask);
3366290001Sglebius	if (isc_task_exiting(ntask)) {
3367290001Sglebius		free_socket(&nsock, __LINE__);
3368290001Sglebius		isc_task_detach(&ntask);
3369290001Sglebius		isc_event_free(ISC_EVENT_PTR(&adev));
3370290001Sglebius		UNLOCK(&sock->lock);
3371290001Sglebius		return (ISC_R_SHUTTINGDOWN);
3372290001Sglebius	}
3373290001Sglebius	nsock->references++;
3374290001Sglebius
3375290001Sglebius	adev->ev_sender = ntask;
3376290001Sglebius	adev->newsocket = nsock;
3377290001Sglebius	_set_state(nsock, SOCK_ACCEPT);
3378290001Sglebius
3379290001Sglebius	/*
3380290001Sglebius	 * Queue io completion for an accept().
3381290001Sglebius	 */
3382290001Sglebius	lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
3383290001Sglebius					    HEAP_ZERO_MEMORY,
3384290001Sglebius					    sizeof(IoCompletionInfo));
3385290001Sglebius	RUNTIME_CHECK(lpo != NULL);
3386290001Sglebius	lpo->acceptbuffer = (void *)HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY,
3387290001Sglebius		(sizeof(SOCKADDR_STORAGE) + 16) * 2);
3388290001Sglebius	RUNTIME_CHECK(lpo->acceptbuffer != NULL);
3389290001Sglebius
3390290001Sglebius	lpo->adev = adev;
3391290001Sglebius	lpo->request_type = SOCKET_ACCEPT;
3392290001Sglebius
3393290001Sglebius	ISCAcceptEx(sock->fd,
3394290001Sglebius		    nsock->fd,				/* Accepted Socket */
3395290001Sglebius		    lpo->acceptbuffer,			/* Buffer for initial Recv */
3396290001Sglebius		    0,					/* Length of Buffer */
3397290001Sglebius		    sizeof(SOCKADDR_STORAGE) + 16,		/* Local address length + 16 */
3398290001Sglebius		    sizeof(SOCKADDR_STORAGE) + 16,		/* Remote address lengh + 16 */
3399290001Sglebius		    (LPDWORD)&lpo->received_bytes,	/* Bytes Recved */
3400290001Sglebius		    (LPOVERLAPPED)lpo			/* Overlapped structure */
3401290001Sglebius		    );
3402290001Sglebius	iocompletionport_update(nsock);
3403290001Sglebius
3404290001Sglebius	socket_log(__LINE__, sock, NULL, TRACE,
3405290001Sglebius		   isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND,
3406290001Sglebius		   "accepting for nsock %p fd %d", nsock, nsock->fd);
3407290001Sglebius
3408290001Sglebius	/*
3409290001Sglebius	 * Enqueue the event
3410290001Sglebius	 */
3411290001Sglebius	ISC_LIST_ENQUEUE(sock->accept_list, adev, ev_link);
3412290001Sglebius	sock->pending_accept++;
3413290001Sglebius	sock->pending_iocp++;
3414290001Sglebius
3415290001Sglebius	UNLOCK(&sock->lock);
3416290001Sglebius	return (ISC_R_SUCCESS);
3417290001Sglebius}
3418290001Sglebius
3419290001Sglebiusisc_result_t
3420290001Sglebiusisc__socket_connect(isc_socket_t *sock, isc_sockaddr_t *addr,
3421290001Sglebius		    isc_task_t *task, isc_taskaction_t action, const void *arg)
3422290001Sglebius{
3423290001Sglebius	char strbuf[ISC_STRERRORSIZE];
3424290001Sglebius	isc_socket_connev_t *cdev;
3425290001Sglebius	isc_task_t *ntask = NULL;
3426290001Sglebius	isc_socketmgr_t *manager;
3427290001Sglebius	IoCompletionInfo *lpo;
3428290001Sglebius	int bind_errno;
3429290001Sglebius
3430290001Sglebius	REQUIRE(VALID_SOCKET(sock));
3431290001Sglebius	REQUIRE(addr != NULL);
3432290001Sglebius	REQUIRE(task != NULL);
3433290001Sglebius	REQUIRE(action != NULL);
3434290001Sglebius
3435290001Sglebius	manager = sock->manager;
3436290001Sglebius	REQUIRE(VALID_MANAGER(manager));
3437290001Sglebius	REQUIRE(addr != NULL);
3438290001Sglebius
3439290001Sglebius	if (isc_sockaddr_ismulticast(addr))
3440290001Sglebius		return (ISC_R_MULTICAST);
3441290001Sglebius
3442290001Sglebius	LOCK(&sock->lock);
3443290001Sglebius	CONSISTENT(sock);
3444290001Sglebius
3445290001Sglebius	/*
3446290001Sglebius	 * make sure that the socket's not closed
3447290001Sglebius	 */
3448290001Sglebius	if (sock->fd == INVALID_SOCKET) {
3449290001Sglebius		UNLOCK(&sock->lock);
3450290001Sglebius		return (ISC_R_CONNREFUSED);
3451290001Sglebius	}
3452290001Sglebius
3453290001Sglebius	/*
3454290001Sglebius	 * Windows sockets won't connect unless the socket is bound.
3455290001Sglebius	 */
3456290001Sglebius	if (!sock->bound) {
3457290001Sglebius		isc_sockaddr_t any;
3458290001Sglebius
3459290001Sglebius		isc_sockaddr_anyofpf(&any, isc_sockaddr_pf(addr));
3460290001Sglebius		if (bind(sock->fd, &any.type.sa, any.length) < 0) {
3461290001Sglebius			bind_errno = WSAGetLastError();
3462290001Sglebius			UNLOCK(&sock->lock);
3463290001Sglebius			switch (bind_errno) {
3464290001Sglebius			case WSAEACCES:
3465290001Sglebius				return (ISC_R_NOPERM);
3466290001Sglebius			case WSAEADDRNOTAVAIL:
3467290001Sglebius				return (ISC_R_ADDRNOTAVAIL);
3468290001Sglebius			case WSAEADDRINUSE:
3469290001Sglebius				return (ISC_R_ADDRINUSE);
3470290001Sglebius			case WSAEINVAL:
3471290001Sglebius				return (ISC_R_BOUND);
3472290001Sglebius			default:
3473290001Sglebius				isc__strerror(bind_errno, strbuf,
3474290001Sglebius					      sizeof(strbuf));
3475290001Sglebius				UNEXPECTED_ERROR(__FILE__, __LINE__,
3476290001Sglebius						 "bind: %s", strbuf);
3477290001Sglebius				return (ISC_R_UNEXPECTED);
3478290001Sglebius			}
3479290001Sglebius		}
3480290001Sglebius		sock->bound = 1;
3481290001Sglebius	}
3482290001Sglebius
3483290001Sglebius	REQUIRE(!sock->pending_connect);
3484290001Sglebius
3485290001Sglebius	cdev = (isc_socket_connev_t *)isc_event_allocate(manager->mctx, sock,
3486290001Sglebius							ISC_SOCKEVENT_CONNECT,
3487290001Sglebius							action,	arg,
3488290001Sglebius							sizeof(*cdev));
3489290001Sglebius	if (cdev == NULL) {
3490290001Sglebius		UNLOCK(&sock->lock);
3491290001Sglebius		return (ISC_R_NOMEMORY);
3492290001Sglebius	}
3493290001Sglebius	ISC_LINK_INIT(cdev, ev_link);
3494290001Sglebius
3495290001Sglebius	if (sock->type == isc_sockettype_tcp) {
3496290001Sglebius		/*
3497290001Sglebius		 * Queue io completion for an accept().
3498290001Sglebius		 */
3499290001Sglebius		lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle,
3500290001Sglebius						    HEAP_ZERO_MEMORY,
3501290001Sglebius						    sizeof(IoCompletionInfo));
3502290001Sglebius		lpo->cdev = cdev;
3503290001Sglebius		lpo->request_type = SOCKET_CONNECT;
3504290001Sglebius
3505290001Sglebius		sock->address = *addr;
3506290001Sglebius		ISCConnectEx(sock->fd, &addr->type.sa, addr->length,
3507290001Sglebius			NULL, 0, NULL, (LPOVERLAPPED)lpo);
3508290001Sglebius
3509290001Sglebius		/*
3510290001Sglebius		 * Attach to task.
3511290001Sglebius		 */
3512290001Sglebius		isc_task_attach(task, &ntask);
3513290001Sglebius		cdev->ev_sender = ntask;
3514290001Sglebius
3515290001Sglebius		sock->pending_connect = 1;
3516290001Sglebius		_set_state(sock, SOCK_CONNECT);
3517290001Sglebius
3518290001Sglebius		/*
3519290001Sglebius		 * Enqueue the request.
3520290001Sglebius		 */
3521290001Sglebius		sock->connect_ev = cdev;
3522290001Sglebius		sock->pending_iocp++;
3523290001Sglebius	} else {
3524290001Sglebius		WSAConnect(sock->fd, &addr->type.sa, addr->length, NULL, NULL, NULL, NULL);
3525290001Sglebius		cdev->result = ISC_R_SUCCESS;
3526290001Sglebius		isc_task_send(task, (isc_event_t **)&cdev);
3527290001Sglebius	}
3528290001Sglebius	CONSISTENT(sock);
3529290001Sglebius	UNLOCK(&sock->lock);
3530290001Sglebius
3531290001Sglebius	return (ISC_R_SUCCESS);
3532290001Sglebius}
3533290001Sglebius
3534290001Sglebiusisc_result_t
3535290001Sglebiusisc__socket_getpeername(isc_socket_t *sock, isc_sockaddr_t *addressp) {
3536290001Sglebius	isc_result_t result;
3537290001Sglebius
3538290001Sglebius	REQUIRE(VALID_SOCKET(sock));
3539290001Sglebius	REQUIRE(addressp != NULL);
3540290001Sglebius
3541290001Sglebius	LOCK(&sock->lock);
3542290001Sglebius	CONSISTENT(sock);
3543290001Sglebius
3544290001Sglebius	/*
3545290001Sglebius	 * make sure that the socket's not closed
3546290001Sglebius	 */
3547290001Sglebius	if (sock->fd == INVALID_SOCKET) {
3548290001Sglebius		UNLOCK(&sock->lock);
3549290001Sglebius		return (ISC_R_CONNREFUSED);
3550290001Sglebius	}
3551290001Sglebius
3552290001Sglebius	if (sock->connected) {
3553290001Sglebius		*addressp = sock->address;
3554290001Sglebius		result = ISC_R_SUCCESS;
3555290001Sglebius	} else {
3556290001Sglebius		result = ISC_R_NOTCONNECTED;
3557290001Sglebius	}
3558290001Sglebius
3559290001Sglebius	UNLOCK(&sock->lock);
3560290001Sglebius
3561290001Sglebius	return (result);
3562290001Sglebius}
3563290001Sglebius
3564290001Sglebiusisc_result_t
3565290001Sglebiusisc__socket_getsockname(isc_socket_t *sock, isc_sockaddr_t *addressp) {
3566290001Sglebius	ISC_SOCKADDR_LEN_T len;
3567290001Sglebius	isc_result_t result;
3568290001Sglebius	char strbuf[ISC_STRERRORSIZE];
3569290001Sglebius
3570290001Sglebius	REQUIRE(VALID_SOCKET(sock));
3571290001Sglebius	REQUIRE(addressp != NULL);
3572290001Sglebius
3573290001Sglebius	LOCK(&sock->lock);
3574290001Sglebius	CONSISTENT(sock);
3575290001Sglebius
3576290001Sglebius	/*
3577290001Sglebius	 * make sure that the socket's not closed
3578290001Sglebius	 */
3579290001Sglebius	if (sock->fd == INVALID_SOCKET) {
3580290001Sglebius		UNLOCK(&sock->lock);
3581290001Sglebius		return (ISC_R_CONNREFUSED);
3582290001Sglebius	}
3583290001Sglebius
3584290001Sglebius	if (!sock->bound) {
3585290001Sglebius		result = ISC_R_NOTBOUND;
3586290001Sglebius		goto out;
3587290001Sglebius	}
3588290001Sglebius
3589290001Sglebius	result = ISC_R_SUCCESS;
3590290001Sglebius
3591290001Sglebius	len = sizeof(addressp->type);
3592290001Sglebius	if (getsockname(sock->fd, &addressp->type.sa, (void *)&len) < 0) {
3593290001Sglebius		isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf));
3594290001Sglebius		UNEXPECTED_ERROR(__FILE__, __LINE__, "getsockname: %s",
3595290001Sglebius				 strbuf);
3596290001Sglebius		result = ISC_R_UNEXPECTED;
3597290001Sglebius		goto out;
3598290001Sglebius	}
3599290001Sglebius	addressp->length = (unsigned int)len;
3600290001Sglebius
3601290001Sglebius out:
3602290001Sglebius	UNLOCK(&sock->lock);
3603290001Sglebius
3604290001Sglebius	return (result);
3605290001Sglebius}
3606290001Sglebius
3607290001Sglebius/*
3608290001Sglebius * Run through the list of events on this socket, and cancel the ones
3609290001Sglebius * queued for task "task" of type "how".  "how" is a bitmask.
3610290001Sglebius */
3611290001Sglebiusvoid
3612290001Sglebiusisc__socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how) {
3613290001Sglebius
3614290001Sglebius	REQUIRE(VALID_SOCKET(sock));
3615290001Sglebius
3616290001Sglebius	/*
3617290001Sglebius	 * Quick exit if there is nothing to do.  Don't even bother locking
3618290001Sglebius	 * in this case.
3619290001Sglebius	 */
3620290001Sglebius	if (how == 0)
3621290001Sglebius		return;
3622290001Sglebius
3623290001Sglebius	LOCK(&sock->lock);
3624290001Sglebius	CONSISTENT(sock);
3625290001Sglebius
3626290001Sglebius	/*
3627290001Sglebius	 * make sure that the socket's not closed
3628290001Sglebius	 */
3629290001Sglebius	if (sock->fd == INVALID_SOCKET) {
3630290001Sglebius		UNLOCK(&sock->lock);
3631290001Sglebius		return;
3632290001Sglebius	}
3633290001Sglebius
3634290001Sglebius	/*
3635290001Sglebius	 * All of these do the same thing, more or less.
3636290001Sglebius	 * Each will:
3637290001Sglebius	 *	o If the internal event is marked as "posted" try to
3638290001Sglebius	 *	  remove it from the task's queue.  If this fails, mark it
3639290001Sglebius	 *	  as canceled instead, and let the task clean it up later.
3640290001Sglebius	 *	o For each I/O request for that task of that type, post
3641290001Sglebius	 *	  its done event with status of "ISC_R_CANCELED".
3642290001Sglebius	 *	o Reset any state needed.
3643290001Sglebius	 */
3644290001Sglebius
3645290001Sglebius	if ((how & ISC_SOCKCANCEL_RECV) == ISC_SOCKCANCEL_RECV) {
3646290001Sglebius		isc_socketevent_t      *dev;
3647290001Sglebius		isc_socketevent_t      *next;
3648290001Sglebius		isc_task_t	       *current_task;
3649290001Sglebius
3650290001Sglebius		dev = ISC_LIST_HEAD(sock->recv_list);
3651290001Sglebius		while (dev != NULL) {
3652290001Sglebius			current_task = dev->ev_sender;
3653290001Sglebius			next = ISC_LIST_NEXT(dev, ev_link);
3654290001Sglebius			if ((task == NULL) || (task == current_task)) {
3655290001Sglebius				dev->result = ISC_R_CANCELED;
3656290001Sglebius				send_recvdone_event(sock, &dev);
3657290001Sglebius			}
3658290001Sglebius			dev = next;
3659290001Sglebius		}
3660290001Sglebius	}
3661290001Sglebius	how &= ~ISC_SOCKCANCEL_RECV;
3662290001Sglebius
3663290001Sglebius	if ((how & ISC_SOCKCANCEL_SEND) == ISC_SOCKCANCEL_SEND) {
3664290001Sglebius		isc_socketevent_t      *dev;
3665290001Sglebius		isc_socketevent_t      *next;
3666290001Sglebius		isc_task_t	       *current_task;
3667290001Sglebius
3668290001Sglebius		dev = ISC_LIST_HEAD(sock->send_list);
3669290001Sglebius
3670290001Sglebius		while (dev != NULL) {
3671290001Sglebius			current_task = dev->ev_sender;
3672290001Sglebius			next = ISC_LIST_NEXT(dev, ev_link);
3673290001Sglebius			if ((task == NULL) || (task == current_task)) {
3674290001Sglebius				dev->result = ISC_R_CANCELED;
3675290001Sglebius				send_senddone_event(sock, &dev);
3676290001Sglebius			}
3677290001Sglebius			dev = next;
3678290001Sglebius		}
3679290001Sglebius	}
3680290001Sglebius	how &= ~ISC_SOCKCANCEL_SEND;
3681290001Sglebius
3682290001Sglebius	if (((how & ISC_SOCKCANCEL_ACCEPT) == ISC_SOCKCANCEL_ACCEPT)
3683290001Sglebius	    && !ISC_LIST_EMPTY(sock->accept_list)) {
3684290001Sglebius		isc_socket_newconnev_t *dev;
3685290001Sglebius		isc_socket_newconnev_t *next;
3686290001Sglebius		isc_task_t	       *current_task;
3687290001Sglebius
3688290001Sglebius		dev = ISC_LIST_HEAD(sock->accept_list);
3689290001Sglebius		while (dev != NULL) {
3690290001Sglebius			current_task = dev->ev_sender;
3691290001Sglebius			next = ISC_LIST_NEXT(dev, ev_link);
3692290001Sglebius
3693290001Sglebius			if ((task == NULL) || (task == current_task)) {
3694290001Sglebius
3695290001Sglebius				dev->newsocket->references--;
3696290001Sglebius				closesocket(dev->newsocket->fd);
3697290001Sglebius				dev->newsocket->fd = INVALID_SOCKET;
3698290001Sglebius				free_socket(&dev->newsocket, __LINE__);
3699290001Sglebius
3700290001Sglebius				dev->result = ISC_R_CANCELED;
3701290001Sglebius				send_acceptdone_event(sock, &dev);
3702290001Sglebius			}
3703290001Sglebius
3704290001Sglebius			dev = next;
3705290001Sglebius		}
3706290001Sglebius	}
3707290001Sglebius	how &= ~ISC_SOCKCANCEL_ACCEPT;
3708290001Sglebius
3709290001Sglebius	/*
3710290001Sglebius	 * Connecting is not a list.
3711290001Sglebius	 */
3712290001Sglebius	if (((how & ISC_SOCKCANCEL_CONNECT) == ISC_SOCKCANCEL_CONNECT)
3713290001Sglebius	    && sock->connect_ev != NULL) {
3714290001Sglebius		isc_socket_connev_t    *dev;
3715290001Sglebius		isc_task_t	       *current_task;
3716290001Sglebius
3717290001Sglebius		INSIST(sock->pending_connect);
3718290001Sglebius
3719290001Sglebius		dev = sock->connect_ev;
3720290001Sglebius		current_task = dev->ev_sender;
3721290001Sglebius
3722290001Sglebius		if ((task == NULL) || (task == current_task)) {
3723290001Sglebius			closesocket(sock->fd);
3724290001Sglebius			sock->fd = INVALID_SOCKET;
3725290001Sglebius			_set_state(sock, SOCK_CLOSED);
3726290001Sglebius
3727290001Sglebius			sock->connect_ev = NULL;
3728290001Sglebius			dev->result = ISC_R_CANCELED;
3729290001Sglebius			send_connectdone_event(sock, &dev);
3730290001Sglebius		}
3731290001Sglebius	}
3732290001Sglebius	how &= ~ISC_SOCKCANCEL_CONNECT;
3733290001Sglebius
3734290001Sglebius	maybe_free_socket(&sock, __LINE__);
3735290001Sglebius}
3736290001Sglebius
3737290001Sglebiusisc_sockettype_t
3738290001Sglebiusisc__socket_gettype(isc_socket_t *sock) {
3739290001Sglebius	isc_sockettype_t type;
3740290001Sglebius
3741290001Sglebius	REQUIRE(VALID_SOCKET(sock));
3742290001Sglebius
3743290001Sglebius	LOCK(&sock->lock);
3744290001Sglebius
3745290001Sglebius	/*
3746290001Sglebius	 * make sure that the socket's not closed
3747290001Sglebius	 */
3748290001Sglebius	if (sock->fd == INVALID_SOCKET) {
3749290001Sglebius		UNLOCK(&sock->lock);
3750290001Sglebius		return (ISC_R_CONNREFUSED);
3751290001Sglebius	}
3752290001Sglebius
3753290001Sglebius	type = sock->type;
3754290001Sglebius	UNLOCK(&sock->lock);
3755290001Sglebius	return (type);
3756290001Sglebius}
3757290001Sglebius
3758290001Sglebiusisc_boolean_t
3759290001Sglebiusisc__socket_isbound(isc_socket_t *sock) {
3760290001Sglebius	isc_boolean_t val;
3761290001Sglebius
3762290001Sglebius	REQUIRE(VALID_SOCKET(sock));
3763290001Sglebius
3764290001Sglebius	LOCK(&sock->lock);
3765290001Sglebius	CONSISTENT(sock);
3766290001Sglebius
3767290001Sglebius	/*
3768290001Sglebius	 * make sure that the socket's not closed
3769290001Sglebius	 */
3770290001Sglebius	if (sock->fd == INVALID_SOCKET) {
3771290001Sglebius		UNLOCK(&sock->lock);
3772290001Sglebius		return (ISC_FALSE);
3773290001Sglebius	}
3774290001Sglebius
3775290001Sglebius	val = ((sock->bound) ? ISC_TRUE : ISC_FALSE);
3776290001Sglebius	UNLOCK(&sock->lock);
3777290001Sglebius
3778290001Sglebius	return (val);
3779290001Sglebius}
3780290001Sglebius
3781290001Sglebiusvoid
3782290001Sglebiusisc__socket_ipv6only(isc_socket_t *sock, isc_boolean_t yes) {
3783290001Sglebius#if defined(IPV6_V6ONLY)
3784290001Sglebius	int onoff = yes ? 1 : 0;
3785290001Sglebius#else
3786290001Sglebius	UNUSED(yes);
3787290001Sglebius#endif
3788290001Sglebius
3789290001Sglebius	REQUIRE(VALID_SOCKET(sock));
3790290001Sglebius
3791290001Sglebius#ifdef IPV6_V6ONLY
3792290001Sglebius	if (sock->pf == AF_INET6) {
3793290001Sglebius		(void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_V6ONLY,
3794290001Sglebius				 (char *)&onoff, sizeof(onoff));
3795290001Sglebius	}
3796290001Sglebius#endif
3797290001Sglebius}
3798290001Sglebius
3799290001Sglebiusvoid
3800290001Sglebiusisc__socket_cleanunix(isc_sockaddr_t *addr, isc_boolean_t active) {
3801290001Sglebius	UNUSED(addr);
3802290001Sglebius	UNUSED(active);
3803290001Sglebius}
3804290001Sglebius
3805290001Sglebiusisc_result_t
3806290001Sglebiusisc__socket_permunix(isc_sockaddr_t *addr, isc_uint32_t perm,
3807290001Sglebius		     isc_uint32_t owner,	isc_uint32_t group)
3808290001Sglebius{
3809290001Sglebius	UNUSED(addr);
3810290001Sglebius	UNUSED(perm);
3811290001Sglebius	UNUSED(owner);
3812290001Sglebius	UNUSED(group);
3813290001Sglebius	return (ISC_R_NOTIMPLEMENTED);
3814290001Sglebius}
3815290001Sglebius
3816290001Sglebiusvoid
3817290001Sglebiusisc__socket_setname(isc_socket_t *socket, const char *name, void *tag) {
3818290001Sglebius
3819290001Sglebius	/*
3820290001Sglebius	 * Name 'socket'.
3821290001Sglebius	 */
3822290001Sglebius
3823290001Sglebius	REQUIRE(VALID_SOCKET(socket));
3824290001Sglebius
3825290001Sglebius	LOCK(&socket->lock);
3826290001Sglebius	memset(socket->name, 0, sizeof(socket->name));
3827290001Sglebius	strncpy(socket->name, name, sizeof(socket->name) - 1);
3828290001Sglebius	socket->tag = tag;
3829290001Sglebius	UNLOCK(&socket->lock);
3830290001Sglebius}
3831290001Sglebius
3832290001Sglebiusconst char *
3833290001Sglebiusisc__socket_getname(isc_socket_t *socket) {
3834290001Sglebius	return (socket->name);
3835290001Sglebius}
3836290001Sglebius
3837290001Sglebiusvoid *
3838290001Sglebiusisc__socket_gettag(isc_socket_t *socket) {
3839290001Sglebius	return (socket->tag);
3840290001Sglebius}
3841290001Sglebius
3842290001Sglebiusint
3843290001Sglebiusisc__socket_getfd(isc_socket_t *socket) {
3844290001Sglebius	return ((short) socket->fd);
3845290001Sglebius}
3846290001Sglebius
3847290001Sglebiusvoid
3848290001Sglebiusisc__socketmgr_setreserved(isc_socketmgr_t *manager, isc_uint32_t reserved) {
3849290001Sglebius	UNUSED(manager);
3850290001Sglebius	UNUSED(reserved);
3851290001Sglebius}
3852290001Sglebius
3853290001Sglebiusvoid
3854290001Sglebiusisc___socketmgr_maxudp(isc_socketmgr_t *manager, int maxudp) {
3855290001Sglebius
3856290001Sglebius	UNUSED(manager);
3857290001Sglebius	UNUSED(maxudp);
3858290001Sglebius}
3859290001Sglebius
3860290001Sglebius#ifdef HAVE_LIBXML2
3861290001Sglebius
3862290001Sglebiusstatic const char *
3863290001Sglebius_socktype(isc_sockettype_t type)
3864290001Sglebius{
3865290001Sglebius	if (type == isc_sockettype_udp)
3866290001Sglebius		return ("udp");
3867290001Sglebius	else if (type == isc_sockettype_tcp)
3868290001Sglebius		return ("tcp");
3869290001Sglebius	else if (type == isc_sockettype_unix)
3870290001Sglebius		return ("unix");
3871290001Sglebius	else if (type == isc_sockettype_fdwatch)
3872290001Sglebius		return ("fdwatch");
3873290001Sglebius	else
3874290001Sglebius		return ("not-initialized");
3875290001Sglebius}
3876290001Sglebius
3877290001Sglebiusvoid
3878290001Sglebiusisc_socketmgr_renderxml(isc_socketmgr_t *mgr, xmlTextWriterPtr writer)
3879290001Sglebius{
3880290001Sglebius	isc_socket_t *sock;
3881290001Sglebius	char peerbuf[ISC_SOCKADDR_FORMATSIZE];
3882290001Sglebius	isc_sockaddr_t addr;
3883290001Sglebius	ISC_SOCKADDR_LEN_T len;
3884290001Sglebius
3885290001Sglebius	LOCK(&mgr->lock);
3886290001Sglebius
3887290001Sglebius#ifndef ISC_PLATFORM_USETHREADS
3888290001Sglebius	xmlTextWriterStartElement(writer, ISC_XMLCHAR "references");
3889290001Sglebius	xmlTextWriterWriteFormatString(writer, "%d", mgr->refs);
3890290001Sglebius	xmlTextWriterEndElement(writer);
3891290001Sglebius#endif
3892290001Sglebius
3893290001Sglebius	xmlTextWriterStartElement(writer, ISC_XMLCHAR "sockets");
3894290001Sglebius	sock = ISC_LIST_HEAD(mgr->socklist);
3895290001Sglebius	while (sock != NULL) {
3896290001Sglebius		LOCK(&sock->lock);
3897290001Sglebius		xmlTextWriterStartElement(writer, ISC_XMLCHAR "socket");
3898290001Sglebius
3899290001Sglebius		xmlTextWriterStartElement(writer, ISC_XMLCHAR "id");
3900290001Sglebius		xmlTextWriterWriteFormatString(writer, "%p", sock);
3901290001Sglebius		xmlTextWriterEndElement(writer);
3902290001Sglebius
3903290001Sglebius		if (sock->name[0] != 0) {
3904290001Sglebius			xmlTextWriterStartElement(writer, ISC_XMLCHAR "name");
3905290001Sglebius			xmlTextWriterWriteFormatString(writer, "%s",
3906290001Sglebius						       sock->name);
3907290001Sglebius			xmlTextWriterEndElement(writer); /* name */
3908290001Sglebius		}
3909290001Sglebius
3910290001Sglebius		xmlTextWriterStartElement(writer, ISC_XMLCHAR "references");
3911290001Sglebius		xmlTextWriterWriteFormatString(writer, "%d", sock->references);
3912290001Sglebius		xmlTextWriterEndElement(writer);
3913290001Sglebius
3914290001Sglebius		xmlTextWriterWriteElement(writer, ISC_XMLCHAR "type",
3915290001Sglebius					  ISC_XMLCHAR _socktype(sock->type));
3916290001Sglebius
3917290001Sglebius		if (sock->connected) {
3918290001Sglebius			isc_sockaddr_format(&sock->address, peerbuf,
3919290001Sglebius					    sizeof(peerbuf));
3920290001Sglebius			xmlTextWriterWriteElement(writer,
3921290001Sglebius						  ISC_XMLCHAR "peer-address",
3922290001Sglebius						  ISC_XMLCHAR peerbuf);
3923290001Sglebius		}
3924290001Sglebius
3925290001Sglebius		len = sizeof(addr);
3926290001Sglebius		if (getsockname(sock->fd, &addr.type.sa, (void *)&len) == 0) {
3927290001Sglebius			isc_sockaddr_format(&addr, peerbuf, sizeof(peerbuf));
3928290001Sglebius			xmlTextWriterWriteElement(writer,
3929290001Sglebius						  ISC_XMLCHAR "local-address",
3930290001Sglebius						  ISC_XMLCHAR peerbuf);
3931290001Sglebius		}
3932290001Sglebius
3933290001Sglebius		xmlTextWriterStartElement(writer, ISC_XMLCHAR "states");
3934290001Sglebius		if (sock->pending_recv)
3935290001Sglebius			xmlTextWriterWriteElement(writer, ISC_XMLCHAR "state",
3936290001Sglebius						ISC_XMLCHAR "pending-receive");
3937290001Sglebius		if (sock->pending_send)
3938290001Sglebius			xmlTextWriterWriteElement(writer, ISC_XMLCHAR "state",
3939290001Sglebius						  ISC_XMLCHAR "pending-send");
3940290001Sglebius		if (sock->pending_accept)
3941290001Sglebius			xmlTextWriterWriteElement(writer, ISC_XMLCHAR "state",
3942290001Sglebius						 ISC_XMLCHAR "pending_accept");
3943290001Sglebius		if (sock->listener)
3944290001Sglebius			xmlTextWriterWriteElement(writer, ISC_XMLCHAR "state",
3945290001Sglebius						  ISC_XMLCHAR "listener");
3946290001Sglebius		if (sock->connected)
3947290001Sglebius			xmlTextWriterWriteElement(writer, ISC_XMLCHAR "state",
3948290001Sglebius						  ISC_XMLCHAR "connected");
3949290001Sglebius		if (sock->pending_connect)
3950290001Sglebius			xmlTextWriterWriteElement(writer, ISC_XMLCHAR "state",
3951290001Sglebius						  ISC_XMLCHAR "connecting");
3952290001Sglebius		if (sock->bound)
3953290001Sglebius			xmlTextWriterWriteElement(writer, ISC_XMLCHAR "state",
3954290001Sglebius						  ISC_XMLCHAR "bound");
3955290001Sglebius
3956290001Sglebius		xmlTextWriterEndElement(writer); /* states */
3957290001Sglebius
3958290001Sglebius		xmlTextWriterEndElement(writer); /* socket */
3959290001Sglebius
3960290001Sglebius		UNLOCK(&sock->lock);
3961290001Sglebius		sock = ISC_LIST_NEXT(sock, link);
3962290001Sglebius	}
3963290001Sglebius	xmlTextWriterEndElement(writer); /* sockets */
3964290001Sglebius
3965290001Sglebius	UNLOCK(&mgr->lock);
3966290001Sglebius}
3967290001Sglebius#endif /* HAVE_LIBXML2 */
3968