1/* 2 * Copyright (C) 2004-2009 Internet Systems Consortium, Inc. ("ISC") 3 * Copyright (C) 2000-2003 Internet Software Consortium. 4 * 5 * Permission to use, copy, modify, and/or distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH 10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 11 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, 12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 14 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 15 * PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18/* $Id: socket.c,v 1.70.54.4 2009/01/29 22:40:36 jinmei Exp $ */ 19 20/* This code uses functions which are only available on Server 2003 and 21 * higher, and Windows XP and higher. 22 * 23 * This code is by nature multithreaded and takes advantage of various 24 * features to pass on information through the completion port for 25 * when I/O is completed. All sends, receives, accepts, and connects are 26 * completed through the completion port. 27 * 28 * The number of Completion Port Worker threads used is the total number 29 * of CPU's + 1. This increases the likelihood that a Worker Thread is 30 * available for processing a completed request. 31 * 32 * XXXPDM 5 August, 2002 33 */ 34 35#define MAKE_EXTERNAL 1 36#include <config.h> 37 38#include <sys/types.h> 39 40#ifndef _WINSOCKAPI_ 41#define _WINSOCKAPI_ /* Prevent inclusion of winsock.h in windows.h */ 42#endif 43 44#include <errno.h> 45#include <stddef.h> 46#include <stdlib.h> 47#include <string.h> 48#include <unistd.h> 49#include <io.h> 50#include <fcntl.h> 51#include <process.h> 52 53#include <isc/buffer.h> 54#include <isc/bufferlist.h> 55#include <isc/condition.h> 56#include <isc/list.h> 57#include <isc/log.h> 58#include <isc/mem.h> 59#include <isc/msgs.h> 60#include <isc/mutex.h> 61#include <isc/net.h> 62#include <isc/once.h> 63#include <isc/os.h> 64#include <isc/platform.h> 65#include <isc/print.h> 66#include <isc/region.h> 67#include <isc/socket.h> 68#include <isc/stats.h> 69#include <isc/strerror.h> 70#include <isc/syslog.h> 71#include <isc/task.h> 72#include <isc/thread.h> 73#include <isc/util.h> 74#include <isc/win32os.h> 75 76#include <mswsock.h> 77 78#include "errno2result.h" 79 80/* 81 * How in the world can Microsoft exist with APIs like this? 82 * We can't actually call this directly, because it turns out 83 * no library exports this function. Instead, we need to 84 * issue a runtime call to get the address. 85 */ 86LPFN_CONNECTEX ISCConnectEx; 87LPFN_ACCEPTEX ISCAcceptEx; 88LPFN_GETACCEPTEXSOCKADDRS ISCGetAcceptExSockaddrs; 89 90/* 91 * Run expensive internal consistency checks. 92 */ 93#ifdef ISC_SOCKET_CONSISTENCY_CHECKS 94#define CONSISTENT(sock) consistent(sock) 95#else 96#define CONSISTENT(sock) do {} while (0) 97#endif 98static void consistent(isc_socket_t *sock); 99 100/* 101 * Define this macro to control the behavior of connection 102 * resets on UDP sockets. See Microsoft KnowledgeBase Article Q263823 103 * for details. 104 * NOTE: This requires that Windows 2000 systems install Service Pack 2 105 * or later. 106 */ 107#ifndef SIO_UDP_CONNRESET 108#define SIO_UDP_CONNRESET _WSAIOW(IOC_VENDOR,12) 109#endif 110 111/* 112 * Some systems define the socket length argument as an int, some as size_t, 113 * some as socklen_t. This is here so it can be easily changed if needed. 114 */ 115#ifndef ISC_SOCKADDR_LEN_T 116#define ISC_SOCKADDR_LEN_T unsigned int 117#endif 118 119/* 120 * Define what the possible "soft" errors can be. These are non-fatal returns 121 * of various network related functions, like recv() and so on. 122 */ 123#define SOFT_ERROR(e) ((e) == WSAEINTR || \ 124 (e) == WSAEWOULDBLOCK || \ 125 (e) == EWOULDBLOCK || \ 126 (e) == EINTR || \ 127 (e) == EAGAIN || \ 128 (e) == 0) 129 130/* 131 * Pending errors are not really errors and should be 132 * kept separate 133 */ 134#define PENDING_ERROR(e) ((e) == WSA_IO_PENDING || (e) == 0) 135 136#define DOIO_SUCCESS 0 /* i/o ok, event sent */ 137#define DOIO_SOFT 1 /* i/o ok, soft error, no event sent */ 138#define DOIO_HARD 2 /* i/o error, event sent */ 139#define DOIO_EOF 3 /* EOF, no event sent */ 140#define DOIO_PENDING 4 /* status when i/o is in process */ 141#define DOIO_NEEDMORE 5 /* IO was processed, but we need more due to minimum */ 142 143#define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x) 144 145/* 146 * DLVL(90) -- Function entry/exit and other tracing. 147 * DLVL(70) -- Socket "correctness" -- including returning of events, etc. 148 * DLVL(60) -- Socket data send/receive 149 * DLVL(50) -- Event tracing, including receiving/sending completion events. 150 * DLVL(20) -- Socket creation/destruction. 151 */ 152#define TRACE_LEVEL 90 153#define CORRECTNESS_LEVEL 70 154#define IOEVENT_LEVEL 60 155#define EVENT_LEVEL 50 156#define CREATION_LEVEL 20 157 158#define TRACE DLVL(TRACE_LEVEL) 159#define CORRECTNESS DLVL(CORRECTNESS_LEVEL) 160#define IOEVENT DLVL(IOEVENT_LEVEL) 161#define EVENT DLVL(EVENT_LEVEL) 162#define CREATION DLVL(CREATION_LEVEL) 163 164typedef isc_event_t intev_t; 165 166/* 167 * Socket State 168 */ 169enum { 170 SOCK_INITIALIZED, /* Socket Initialized */ 171 SOCK_OPEN, /* Socket opened but nothing yet to do */ 172 SOCK_DATA, /* Socket sending or receiving data */ 173 SOCK_LISTEN, /* TCP Socket listening for connects */ 174 SOCK_ACCEPT, /* TCP socket is waiting to accept */ 175 SOCK_CONNECT, /* TCP Socket connecting */ 176 SOCK_CLOSED, /* Socket has been closed */ 177}; 178 179#define SOCKET_MAGIC ISC_MAGIC('I', 'O', 'i', 'o') 180#define VALID_SOCKET(t) ISC_MAGIC_VALID(t, SOCKET_MAGIC) 181 182/* 183 * IPv6 control information. If the socket is an IPv6 socket we want 184 * to collect the destination address and interface so the client can 185 * set them on outgoing packets. 186 */ 187#ifdef ISC_PLATFORM_HAVEIPV6 188#ifndef USE_CMSG 189#define USE_CMSG 1 190#endif 191#endif 192 193/* 194 * We really don't want to try and use these control messages. Win32 195 * doesn't have this mechanism before XP. 196 */ 197#undef USE_CMSG 198 199/* 200 * Message header for recvmsg and sendmsg calls. 201 * Used value-result for recvmsg, value only for sendmsg. 202 */ 203struct msghdr { 204 SOCKADDR_STORAGE to_addr; /* UDP send/recv address */ 205 int to_addr_len; /* length of the address */ 206 WSABUF *msg_iov; /* scatter/gather array */ 207 u_int msg_iovlen; /* # elements in msg_iov */ 208 void *msg_control; /* ancillary data, see below */ 209 u_int msg_controllen; /* ancillary data buffer len */ 210 int msg_totallen; /* total length of this message */ 211} msghdr; 212 213/* 214 * The size to raise the receive buffer to. 215 */ 216#define RCVBUFSIZE (32*1024) 217 218/* 219 * The number of times a send operation is repeated if the result 220 * is WSAEINTR. 221 */ 222#define NRETRIES 10 223 224struct isc_socket { 225 /* Not locked. */ 226 unsigned int magic; 227 isc_socketmgr_t *manager; 228 isc_mutex_t lock; 229 isc_sockettype_t type; 230 231 /* Pointers to scatter/gather buffers */ 232 WSABUF iov[ISC_SOCKET_MAXSCATTERGATHER]; 233 234 /* Locked by socket lock. */ 235 ISC_LINK(isc_socket_t) link; 236 unsigned int references; /* EXTERNAL references */ 237 SOCKET fd; /* file handle */ 238 int pf; /* protocol family */ 239 char name[16]; 240 void * tag; 241 242 /* 243 * Each recv() call uses this buffer. It is a per-socket receive 244 * buffer that allows us to decouple the system recv() from the 245 * recv_list done events. This means the items on the recv_list 246 * can be removed without having to cancel pending system recv() 247 * calls. It also allows us to read-ahead in some cases. 248 */ 249 struct { 250 SOCKADDR_STORAGE from_addr; // UDP send/recv address 251 int from_addr_len; // length of the address 252 char *base; // the base of the buffer 253 char *consume_position; // where to start copying data from next 254 unsigned int len; // the actual size of this buffer 255 unsigned int remaining; // the number of bytes remaining 256 } recvbuf; 257 258 ISC_LIST(isc_socketevent_t) send_list; 259 ISC_LIST(isc_socketevent_t) recv_list; 260 ISC_LIST(isc_socket_newconnev_t) accept_list; 261 isc_socket_connev_t *connect_ev; 262 263 isc_sockaddr_t address; /* remote address */ 264 265 unsigned int listener : 1, /* listener socket */ 266 connected : 1, 267 pending_connect : 1, /* connect pending */ 268 bound : 1; /* bound to local addr */ 269 unsigned int pending_iocp; /* Should equal the counters below. Debug. */ 270 unsigned int pending_recv; /* Number of outstanding recv() calls. */ 271 unsigned int pending_send; /* Number of outstanding send() calls. */ 272 unsigned int pending_accept; /* Number of outstanding accept() calls. */ 273 unsigned int state; /* Socket state. Debugging and consistency checking. */ 274 int state_lineno; /* line which last touched state */ 275}; 276 277#define _set_state(sock, _state) do { (sock)->state = (_state); (sock)->state_lineno = __LINE__; } while (0) 278 279/* 280 * Buffer structure 281 */ 282typedef struct buflist buflist_t; 283 284struct buflist { 285 void *buf; 286 unsigned int buflen; 287 ISC_LINK(buflist_t) link; 288}; 289 290/* 291 * I/O Completion ports Info structures 292 */ 293 294static HANDLE hHeapHandle = NULL; 295typedef struct IoCompletionInfo { 296 OVERLAPPED overlapped; 297 isc_socketevent_t *dev; /* send()/recv() done event */ 298 isc_socket_connev_t *cdev; /* connect() done event */ 299 isc_socket_newconnev_t *adev; /* accept() done event */ 300 void *acceptbuffer; 301 DWORD received_bytes; 302 int request_type; 303 struct msghdr messagehdr; 304 ISC_LIST(buflist_t) bufferlist; /*%< list of buffers */ 305} IoCompletionInfo; 306 307/* 308 * Define a maximum number of I/O Completion Port worker threads 309 * to handle the load on the Completion Port. The actual number 310 * used is the number of CPU's + 1. 311 */ 312#define MAX_IOCPTHREADS 20 313 314#define SOCKET_MANAGER_MAGIC ISC_MAGIC('I', 'O', 'm', 'g') 315#define VALID_MANAGER(m) ISC_MAGIC_VALID(m, SOCKET_MANAGER_MAGIC) 316 317struct isc_socketmgr { 318 /* Not locked. */ 319 unsigned int magic; 320 isc_mem_t *mctx; 321 isc_mutex_t lock; 322 isc_stats_t *stats; 323 324 /* Locked by manager lock. */ 325 ISC_LIST(isc_socket_t) socklist; 326 isc_boolean_t bShutdown; 327 isc_condition_t shutdown_ok; 328 HANDLE hIoCompletionPort; 329 int maxIOCPThreads; 330 HANDLE hIOCPThreads[MAX_IOCPTHREADS]; 331 DWORD dwIOCPThreadIds[MAX_IOCPTHREADS]; 332 333 /* 334 * Debugging. 335 * Modified by InterlockedIncrement() and InterlockedDecrement() 336 */ 337 LONG totalSockets; 338 LONG iocp_total; 339}; 340 341enum { 342 SOCKET_RECV, 343 SOCKET_SEND, 344 SOCKET_ACCEPT, 345 SOCKET_CONNECT 346}; 347 348/* 349 * send() and recv() iovec counts 350 */ 351#define MAXSCATTERGATHER_SEND (ISC_SOCKET_MAXSCATTERGATHER) 352#define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER) 353 354static isc_threadresult_t WINAPI SocketIoThread(LPVOID ThreadContext); 355static void maybe_free_socket(isc_socket_t **, int); 356static void free_socket(isc_socket_t **, int); 357static isc_boolean_t senddone_is_active(isc_socket_t *sock, isc_socketevent_t *dev); 358static isc_boolean_t acceptdone_is_active(isc_socket_t *sock, isc_socket_newconnev_t *dev); 359static isc_boolean_t connectdone_is_active(isc_socket_t *sock, isc_socket_connev_t *dev); 360static void send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev); 361static void send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev); 362static void send_acceptdone_event(isc_socket_t *sock, isc_socket_newconnev_t **adev); 363static void send_connectdone_event(isc_socket_t *sock, isc_socket_connev_t **cdev); 364static void send_recvdone_abort(isc_socket_t *sock, isc_result_t result); 365static void queue_receive_event(isc_socket_t *sock, isc_task_t *task, isc_socketevent_t *dev); 366static void queue_receive_request(isc_socket_t *sock); 367 368/* 369 * This is used to dump the contents of the sock structure 370 * You should make sure that the sock is locked before 371 * dumping it. Since the code uses simple printf() statements 372 * it should only be used interactively. 373 */ 374void 375sock_dump(isc_socket_t *sock) { 376 isc_socketevent_t *ldev; 377 isc_socket_newconnev_t *ndev; 378 379#if 0 380 isc_sockaddr_t addr; 381 char socktext[256]; 382 383 isc_socket_getpeername(sock, &addr); 384 isc_sockaddr_format(&addr, socktext, sizeof(socktext)); 385 printf("Remote Socket: %s\n", socktext); 386 isc_socket_getsockname(sock, &addr); 387 isc_sockaddr_format(&addr, socktext, sizeof(socktext)); 388 printf("This Socket: %s\n", socktext); 389#endif 390 391 printf("\n\t\tSock Dump\n"); 392 printf("\t\tfd: %u\n", sock->fd); 393 printf("\t\treferences: %d\n", sock->references); 394 printf("\t\tpending_accept: %d\n", sock->pending_accept); 395 printf("\t\tconnecting: %d\n", sock->pending_connect); 396 printf("\t\tconnected: %d\n", sock->connected); 397 printf("\t\tbound: %d\n", sock->bound); 398 printf("\t\tpending_iocp: %d\n", sock->pending_iocp); 399 printf("\t\tsocket type: %d\n", sock->type); 400 401 printf("\n\t\tSock Recv List\n"); 402 ldev = ISC_LIST_HEAD(sock->recv_list); 403 while (ldev != NULL) { 404 printf("\t\tdev: %p\n", ldev); 405 ldev = ISC_LIST_NEXT(ldev, ev_link); 406 } 407 408 printf("\n\t\tSock Send List\n"); 409 ldev = ISC_LIST_HEAD(sock->send_list); 410 while (ldev != NULL) { 411 printf("\t\tdev: %p\n", ldev); 412 ldev = ISC_LIST_NEXT(ldev, ev_link); 413 } 414 415 printf("\n\t\tSock Accept List\n"); 416 ndev = ISC_LIST_HEAD(sock->accept_list); 417 while (ndev != NULL) { 418 printf("\t\tdev: %p\n", ldev); 419 ndev = ISC_LIST_NEXT(ndev, ev_link); 420 } 421} 422 423static void 424socket_log(int lineno, isc_socket_t *sock, isc_sockaddr_t *address, 425 isc_logcategory_t *category, isc_logmodule_t *module, int level, 426 isc_msgcat_t *msgcat, int msgset, int message, 427 const char *fmt, ...) ISC_FORMAT_PRINTF(9, 10); 428 429/* This function will add an entry to the I/O completion port 430 * that will signal the I/O thread to exit (gracefully) 431 */ 432static void 433signal_iocompletionport_exit(isc_socketmgr_t *manager) { 434 int i; 435 int errval; 436 char strbuf[ISC_STRERRORSIZE]; 437 438 REQUIRE(VALID_MANAGER(manager)); 439 for (i = 0; i < manager->maxIOCPThreads; i++) { 440 if (!PostQueuedCompletionStatus(manager->hIoCompletionPort, 441 0, 0, 0)) { 442 errval = GetLastError(); 443 isc__strerror(errval, strbuf, sizeof(strbuf)); 444 FATAL_ERROR(__FILE__, __LINE__, 445 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, 446 ISC_MSG_FAILED, 447 "Can't request service thread to exit: %s"), 448 strbuf); 449 } 450 } 451} 452 453/* 454 * Create the worker threads for the I/O Completion Port 455 */ 456void 457iocompletionport_createthreads(int total_threads, isc_socketmgr_t *manager) { 458 int errval; 459 char strbuf[ISC_STRERRORSIZE]; 460 int i; 461 462 INSIST(total_threads > 0); 463 REQUIRE(VALID_MANAGER(manager)); 464 /* 465 * We need at least one 466 */ 467 for (i = 0; i < total_threads; i++) { 468 manager->hIOCPThreads[i] = CreateThread(NULL, 0, SocketIoThread, 469 manager, 0, 470 &manager->dwIOCPThreadIds[i]); 471 if (manager->hIOCPThreads[i] == NULL) { 472 errval = GetLastError(); 473 isc__strerror(errval, strbuf, sizeof(strbuf)); 474 FATAL_ERROR(__FILE__, __LINE__, 475 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, 476 ISC_MSG_FAILED, 477 "Can't create IOCP thread: %s"), 478 strbuf); 479 exit(1); 480 } 481 } 482} 483 484/* 485 * Create/initialise the I/O completion port 486 */ 487void 488iocompletionport_init(isc_socketmgr_t *manager) { 489 int errval; 490 char strbuf[ISC_STRERRORSIZE]; 491 492 REQUIRE(VALID_MANAGER(manager)); 493 /* 494 * Create a private heap to handle the socket overlapped structure 495 * The minimum number of structures is 10, there is no maximum 496 */ 497 hHeapHandle = HeapCreate(0, 10 * sizeof(IoCompletionInfo), 0); 498 if (hHeapHandle == NULL) { 499 errval = GetLastError(); 500 isc__strerror(errval, strbuf, sizeof(strbuf)); 501 FATAL_ERROR(__FILE__, __LINE__, 502 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, 503 ISC_MSG_FAILED, 504 "HeapCreate() failed during " 505 "initialization: %s"), 506 strbuf); 507 exit(1); 508 } 509 510 manager->maxIOCPThreads = min(isc_os_ncpus() + 1, MAX_IOCPTHREADS); 511 512 /* Now Create the Completion Port */ 513 manager->hIoCompletionPort = CreateIoCompletionPort( 514 INVALID_HANDLE_VALUE, NULL, 515 0, manager->maxIOCPThreads); 516 if (manager->hIoCompletionPort == NULL) { 517 errval = GetLastError(); 518 isc__strerror(errval, strbuf, sizeof(strbuf)); 519 FATAL_ERROR(__FILE__, __LINE__, 520 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, 521 ISC_MSG_FAILED, 522 "CreateIoCompletionPort() failed " 523 "during initialization: %s"), 524 strbuf); 525 exit(1); 526 } 527 528 /* 529 * Worker threads for servicing the I/O 530 */ 531 iocompletionport_createthreads(manager->maxIOCPThreads, manager); 532} 533 534/* 535 * Associate a socket with an IO Completion Port. This allows us to queue events for it 536 * and have our worker pool of threads process them. 537 */ 538void 539iocompletionport_update(isc_socket_t *sock) { 540 HANDLE hiocp; 541 char strbuf[ISC_STRERRORSIZE]; 542 543 REQUIRE(VALID_SOCKET(sock)); 544 545 hiocp = CreateIoCompletionPort((HANDLE)sock->fd, 546 sock->manager->hIoCompletionPort, (ULONG_PTR)sock, 0); 547 548 if (hiocp == NULL) { 549 DWORD errval = GetLastError(); 550 isc__strerror(errval, strbuf, sizeof(strbuf)); 551 isc_log_iwrite(isc_lctx, 552 ISC_LOGCATEGORY_GENERAL, 553 ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, 554 isc_msgcat, ISC_MSGSET_SOCKET, 555 ISC_MSG_TOOMANYHANDLES, 556 "iocompletionport_update: failed to open" 557 " io completion port: %s", 558 strbuf); 559 560 /* XXXMLG temporary hack to make failures detected. 561 * This function should return errors to the caller, not 562 * exit here. 563 */ 564 FATAL_ERROR(__FILE__, __LINE__, 565 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, 566 ISC_MSG_FAILED, 567 "CreateIoCompletionPort() failed " 568 "during initialization: %s"), 569 strbuf); 570 exit(1); 571 } 572 573 InterlockedIncrement(&sock->manager->iocp_total); 574} 575 576/* 577 * Routine to cleanup and then close the socket. 578 * Only close the socket here if it is NOT associated 579 * with an event, otherwise the WSAWaitForMultipleEvents 580 * may fail due to the fact that the Wait should not 581 * be running while closing an event or a socket. 582 * The socket is locked before calling this function 583 */ 584void 585socket_close(isc_socket_t *sock) { 586 587 REQUIRE(sock != NULL); 588 589 if (sock->fd != INVALID_SOCKET) { 590 closesocket(sock->fd); 591 sock->fd = INVALID_SOCKET; 592 _set_state(sock, SOCK_CLOSED); 593 InterlockedDecrement(&sock->manager->totalSockets); 594 } 595} 596 597static isc_once_t initialise_once = ISC_ONCE_INIT; 598static isc_boolean_t initialised = ISC_FALSE; 599 600static void 601initialise(void) { 602 WORD wVersionRequested; 603 WSADATA wsaData; 604 int err; 605 SOCKET sock; 606 GUID GUIDConnectEx = WSAID_CONNECTEX; 607 GUID GUIDAcceptEx = WSAID_ACCEPTEX; 608 GUID GUIDGetAcceptExSockaddrs = WSAID_GETACCEPTEXSOCKADDRS; 609 DWORD dwBytes; 610 611 /* Need Winsock 2.2 or better */ 612 wVersionRequested = MAKEWORD(2, 2); 613 614 err = WSAStartup(wVersionRequested, &wsaData); 615 if (err != 0) { 616 char strbuf[ISC_STRERRORSIZE]; 617 isc__strerror(err, strbuf, sizeof(strbuf)); 618 FATAL_ERROR(__FILE__, __LINE__, "WSAStartup() %s: %s", 619 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 620 ISC_MSG_FAILED, "failed"), 621 strbuf); 622 exit(1); 623 } 624 /* 625 * The following APIs do not exist as functions in a library, but we must 626 * ask winsock for them. They are "extensions" -- but why they cannot be 627 * actual functions is beyond me. So, ask winsock for the pointers to the 628 * functions we need. 629 */ 630 sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); 631 INSIST(sock != INVALID_SOCKET); 632 err = WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER, 633 &GUIDConnectEx, sizeof(GUIDConnectEx), 634 &ISCConnectEx, sizeof(ISCConnectEx), 635 &dwBytes, NULL, NULL); 636 INSIST(err == 0); 637 638 err = WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER, 639 &GUIDAcceptEx, sizeof(GUIDAcceptEx), 640 &ISCAcceptEx, sizeof(ISCAcceptEx), 641 &dwBytes, NULL, NULL); 642 INSIST(err == 0); 643 644 err = WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER, 645 &GUIDGetAcceptExSockaddrs, sizeof(GUIDGetAcceptExSockaddrs), 646 &ISCGetAcceptExSockaddrs, sizeof(ISCGetAcceptExSockaddrs), 647 &dwBytes, NULL, NULL); 648 INSIST(err == 0); 649 650 closesocket(sock); 651 652 initialised = ISC_TRUE; 653} 654 655/* 656 * Initialize socket services 657 */ 658void 659InitSockets(void) { 660 RUNTIME_CHECK(isc_once_do(&initialise_once, 661 initialise) == ISC_R_SUCCESS); 662 if (!initialised) 663 exit(1); 664} 665 666int 667internal_sendmsg(isc_socket_t *sock, IoCompletionInfo *lpo, 668 struct msghdr *messagehdr, int flags, int *Error) 669{ 670 int Result; 671 DWORD BytesSent; 672 DWORD Flags = flags; 673 int total_sent; 674 675 *Error = 0; 676 Result = WSASendTo(sock->fd, messagehdr->msg_iov, 677 messagehdr->msg_iovlen, &BytesSent, 678 Flags, (SOCKADDR *)&messagehdr->to_addr, 679 messagehdr->to_addr_len, (LPWSAOVERLAPPED)lpo, 680 NULL); 681 682 total_sent = (int)BytesSent; 683 684 /* Check for errors.*/ 685 if (Result == SOCKET_ERROR) { 686 *Error = WSAGetLastError(); 687 688 switch (*Error) { 689 case WSA_IO_INCOMPLETE: 690 case WSA_WAIT_IO_COMPLETION: 691 case WSA_IO_PENDING: 692 case NO_ERROR: /* Strange, but okay */ 693 sock->pending_iocp++; 694 sock->pending_send++; 695 break; 696 697 default: 698 return (-1); 699 break; 700 } 701 } else { 702 sock->pending_iocp++; 703 sock->pending_send++; 704 } 705 706 if (lpo != NULL) 707 return (0); 708 else 709 return (total_sent); 710} 711 712static void 713queue_receive_request(isc_socket_t *sock) { 714 DWORD Flags = 0; 715 DWORD NumBytes = 0; 716 int total_bytes = 0; 717 int Result; 718 int Error; 719 WSABUF iov[1]; 720 IoCompletionInfo *lpo; 721 isc_result_t isc_result; 722 723 /* 724 * If we already have a receive pending, do nothing. 725 */ 726 if (sock->pending_recv > 0) 727 return; 728 729 /* 730 * If no one is waiting, do nothing. 731 */ 732 if (ISC_LIST_EMPTY(sock->recv_list)) 733 return; 734 735 INSIST(sock->recvbuf.remaining == 0); 736 INSIST(sock->fd != INVALID_SOCKET); 737 738 iov[0].len = sock->recvbuf.len; 739 iov[0].buf = sock->recvbuf.base; 740 741 lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle, 742 HEAP_ZERO_MEMORY, 743 sizeof(IoCompletionInfo)); 744 RUNTIME_CHECK(lpo != NULL); 745 lpo->request_type = SOCKET_RECV; 746 747 sock->recvbuf.from_addr_len = sizeof(sock->recvbuf.from_addr); 748 749 Error = 0; 750 Result = WSARecvFrom((SOCKET)sock->fd, iov, 1, 751 &NumBytes, &Flags, 752 (SOCKADDR *)&sock->recvbuf.from_addr, 753 &sock->recvbuf.from_addr_len, 754 (LPWSAOVERLAPPED)lpo, NULL); 755 756 /* Check for errors. */ 757 if (Result == SOCKET_ERROR) { 758 Error = WSAGetLastError(); 759 760 switch (Error) { 761 case WSA_IO_PENDING: 762 sock->pending_iocp++; 763 sock->pending_recv++; 764 break; 765 766 default: 767 isc_result = isc__errno2result(Error); 768 if (isc_result == ISC_R_UNEXPECTED) 769 UNEXPECTED_ERROR(__FILE__, __LINE__, 770 "WSARecvFrom: Windows error code: %d, isc result %d", 771 Error, isc_result); 772 send_recvdone_abort(sock, isc_result); 773 break; 774 } 775 } else { 776 /* 777 * The recv() finished immediately, but we will still get 778 * a completion event. Rather than duplicate code, let 779 * that thread handle sending the data along its way. 780 */ 781 sock->pending_iocp++; 782 sock->pending_recv++; 783 } 784 785 socket_log(__LINE__, sock, NULL, IOEVENT, 786 isc_msgcat, ISC_MSGSET_SOCKET, 787 ISC_MSG_DOIORECV, 788 "queue_io_request: fd %d result %d error %d", 789 sock->fd, Result, Error); 790 791 CONSISTENT(sock); 792} 793 794static void 795manager_log(isc_socketmgr_t *sockmgr, isc_logcategory_t *category, 796 isc_logmodule_t *module, int level, const char *fmt, ...) 797{ 798 char msgbuf[2048]; 799 va_list ap; 800 801 if (!isc_log_wouldlog(isc_lctx, level)) 802 return; 803 804 va_start(ap, fmt); 805 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); 806 va_end(ap); 807 808 isc_log_write(isc_lctx, category, module, level, 809 "sockmgr %p: %s", sockmgr, msgbuf); 810} 811 812static void 813socket_log(int lineno, isc_socket_t *sock, isc_sockaddr_t *address, 814 isc_logcategory_t *category, isc_logmodule_t *module, int level, 815 isc_msgcat_t *msgcat, int msgset, int message, 816 const char *fmt, ...) 817{ 818 char msgbuf[2048]; 819 char peerbuf[256]; 820 va_list ap; 821 822 823 if (!isc_log_wouldlog(isc_lctx, level)) 824 return; 825 826 va_start(ap, fmt); 827 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); 828 va_end(ap); 829 830 if (address == NULL) { 831 isc_log_iwrite(isc_lctx, category, module, level, 832 msgcat, msgset, message, 833 "socket %p line %d: %s", sock, lineno, msgbuf); 834 } else { 835 isc_sockaddr_format(address, peerbuf, sizeof(peerbuf)); 836 isc_log_iwrite(isc_lctx, category, module, level, 837 msgcat, msgset, message, 838 "socket %p line %d peer %s: %s", sock, lineno, 839 peerbuf, msgbuf); 840 } 841 842} 843 844/* 845 * Make an fd SOCKET non-blocking. 846 */ 847static isc_result_t 848make_nonblock(SOCKET fd) { 849 int ret; 850 unsigned long flags = 1; 851 char strbuf[ISC_STRERRORSIZE]; 852 853 /* Set the socket to non-blocking */ 854 ret = ioctlsocket(fd, FIONBIO, &flags); 855 856 if (ret == -1) { 857 isc__strerror(errno, strbuf, sizeof(strbuf)); 858 UNEXPECTED_ERROR(__FILE__, __LINE__, 859 "ioctlsocket(%d, FIOBIO, %d): %s", 860 fd, flags, strbuf); 861 862 return (ISC_R_UNEXPECTED); 863 } 864 865 return (ISC_R_SUCCESS); 866} 867 868/* 869 * Windows 2000 systems incorrectly cause UDP sockets using WASRecvFrom 870 * to not work correctly, returning a WSACONNRESET error when a WSASendTo 871 * fails with an "ICMP port unreachable" response and preventing the 872 * socket from using the WSARecvFrom in subsequent operations. 873 * The function below fixes this, but requires that Windows 2000 874 * Service Pack 2 or later be installed on the system. NT 4.0 875 * systems are not affected by this and work correctly. 876 * See Microsoft Knowledge Base Article Q263823 for details of this. 877 */ 878isc_result_t 879connection_reset_fix(SOCKET fd) { 880 DWORD dwBytesReturned = 0; 881 BOOL bNewBehavior = FALSE; 882 DWORD status; 883 884 if (isc_win32os_majorversion() < 5) 885 return (ISC_R_SUCCESS); /* NT 4.0 has no problem */ 886 887 /* disable bad behavior using IOCTL: SIO_UDP_CONNRESET */ 888 status = WSAIoctl(fd, SIO_UDP_CONNRESET, &bNewBehavior, 889 sizeof(bNewBehavior), NULL, 0, 890 &dwBytesReturned, NULL, NULL); 891 if (status != SOCKET_ERROR) 892 return (ISC_R_SUCCESS); 893 else { 894 UNEXPECTED_ERROR(__FILE__, __LINE__, 895 "WSAIoctl(SIO_UDP_CONNRESET, oldBehaviour) %s", 896 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 897 ISC_MSG_FAILED, "failed")); 898 return (ISC_R_UNEXPECTED); 899 } 900} 901 902/* 903 * Construct an iov array and attach it to the msghdr passed in. This is 904 * the SEND constructor, which will use the used region of the buffer 905 * (if using a buffer list) or will use the internal region (if a single 906 * buffer I/O is requested). 907 * 908 * Nothing can be NULL, and the done event must list at least one buffer 909 * on the buffer linked list for this function to be meaningful. 910 */ 911static void 912build_msghdr_send(isc_socket_t *sock, isc_socketevent_t *dev, 913 struct msghdr *msg, char *cmsg, WSABUF *iov, 914 IoCompletionInfo *lpo) 915{ 916 unsigned int iovcount; 917 isc_buffer_t *buffer; 918 buflist_t *cpbuffer; 919 isc_region_t used; 920 size_t write_count; 921 size_t skip_count; 922 923 memset(msg, 0, sizeof(*msg)); 924 925 memcpy(&msg->to_addr, &dev->address.type, dev->address.length); 926 msg->to_addr_len = dev->address.length; 927 928 buffer = ISC_LIST_HEAD(dev->bufferlist); 929 write_count = 0; 930 iovcount = 0; 931 932 /* 933 * Single buffer I/O? Skip what we've done so far in this region. 934 */ 935 if (buffer == NULL) { 936 write_count = dev->region.length - dev->n; 937 cpbuffer = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, sizeof(buflist_t)); 938 RUNTIME_CHECK(cpbuffer != NULL); 939 cpbuffer->buf = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, write_count); 940 RUNTIME_CHECK(cpbuffer->buf != NULL); 941 942 socket_log(__LINE__, sock, NULL, TRACE, 943 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK, 944 "alloc_buffer %p %d %p %d", cpbuffer, sizeof(buflist_t), 945 cpbuffer->buf, write_count); 946 947 memcpy(cpbuffer->buf,(dev->region.base + dev->n), write_count); 948 cpbuffer->buflen = write_count; 949 ISC_LIST_ENQUEUE(lpo->bufferlist, cpbuffer, link); 950 iov[0].buf = cpbuffer->buf; 951 iov[0].len = write_count; 952 iovcount = 1; 953 954 goto config; 955 } 956 957 /* 958 * Multibuffer I/O. 959 * Skip the data in the buffer list that we have already written. 960 */ 961 skip_count = dev->n; 962 while (buffer != NULL) { 963 REQUIRE(ISC_BUFFER_VALID(buffer)); 964 if (skip_count < isc_buffer_usedlength(buffer)) 965 break; 966 skip_count -= isc_buffer_usedlength(buffer); 967 buffer = ISC_LIST_NEXT(buffer, link); 968 } 969 970 while (buffer != NULL) { 971 INSIST(iovcount < MAXSCATTERGATHER_SEND); 972 973 isc_buffer_usedregion(buffer, &used); 974 975 if (used.length > 0) { 976 int uselen = used.length - skip_count; 977 cpbuffer = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, sizeof(buflist_t)); 978 RUNTIME_CHECK(cpbuffer != NULL); 979 cpbuffer->buf = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, uselen); 980 RUNTIME_CHECK(cpbuffer->buf != NULL); 981 982 socket_log(__LINE__, sock, NULL, TRACE, 983 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK, 984 "alloc_buffer %p %d %p %d", cpbuffer, sizeof(buflist_t), 985 cpbuffer->buf, write_count); 986 987 memcpy(cpbuffer->buf,(used.base + skip_count), uselen); 988 cpbuffer->buflen = uselen; 989 iov[iovcount].buf = cpbuffer->buf; 990 iov[iovcount].len = used.length - skip_count; 991 write_count += uselen; 992 skip_count = 0; 993 iovcount++; 994 } 995 buffer = ISC_LIST_NEXT(buffer, link); 996 } 997 998 INSIST(skip_count == 0); 999 1000 config: 1001 msg->msg_iov = iov; 1002 msg->msg_iovlen = iovcount; 1003 msg->msg_totallen = write_count; 1004} 1005 1006static void 1007set_dev_address(isc_sockaddr_t *address, isc_socket_t *sock, 1008 isc_socketevent_t *dev) 1009{ 1010 if (sock->type == isc_sockettype_udp) { 1011 if (address != NULL) 1012 dev->address = *address; 1013 else 1014 dev->address = sock->address; 1015 } else if (sock->type == isc_sockettype_tcp) { 1016 INSIST(address == NULL); 1017 dev->address = sock->address; 1018 } 1019} 1020 1021static void 1022destroy_socketevent(isc_event_t *event) { 1023 isc_socketevent_t *ev = (isc_socketevent_t *)event; 1024 1025 INSIST(ISC_LIST_EMPTY(ev->bufferlist)); 1026 1027 (ev->destroy)(event); 1028} 1029 1030static isc_socketevent_t * 1031allocate_socketevent(isc_socket_t *sock, isc_eventtype_t eventtype, 1032 isc_taskaction_t action, const void *arg) 1033{ 1034 isc_socketevent_t *ev; 1035 1036 ev = (isc_socketevent_t *)isc_event_allocate(sock->manager->mctx, 1037 sock, eventtype, 1038 action, arg, 1039 sizeof(*ev)); 1040 if (ev == NULL) 1041 return (NULL); 1042 1043 ev->result = ISC_R_IOERROR; // XXXMLG temporary change to detect failure to set 1044 ISC_LINK_INIT(ev, ev_link); 1045 ISC_LIST_INIT(ev->bufferlist); 1046 ev->region.base = NULL; 1047 ev->n = 0; 1048 ev->offset = 0; 1049 ev->attributes = 0; 1050 ev->destroy = ev->ev_destroy; 1051 ev->ev_destroy = destroy_socketevent; 1052 1053 return (ev); 1054} 1055 1056#if defined(ISC_SOCKET_DEBUG) 1057static void 1058dump_msg(struct msghdr *msg, isc_socket_t *sock) { 1059 unsigned int i; 1060 1061 printf("MSGHDR %p, Socket #: %u\n", msg, sock->fd); 1062 printf("\tname %p, namelen %d\n", msg->msg_name, msg->msg_namelen); 1063 printf("\tiov %p, iovlen %d\n", msg->msg_iov, msg->msg_iovlen); 1064 for (i = 0; i < (unsigned int)msg->msg_iovlen; i++) 1065 printf("\t\t%d\tbase %p, len %d\n", i, 1066 msg->msg_iov[i].buf, 1067 msg->msg_iov[i].len); 1068} 1069#endif 1070 1071/* 1072 * map the error code 1073 */ 1074int 1075map_socket_error(isc_socket_t *sock, int windows_errno, int *isc_errno, 1076 char *errorstring, size_t bufsize) { 1077 1078 int doreturn; 1079 switch (windows_errno) { 1080 case WSAECONNREFUSED: 1081 *isc_errno = ISC_R_CONNREFUSED; 1082 if (sock->connected) 1083 doreturn = DOIO_HARD; 1084 else 1085 doreturn = DOIO_SOFT; 1086 break; 1087 case WSAENETUNREACH: 1088 case ERROR_NETWORK_UNREACHABLE: 1089 *isc_errno = ISC_R_NETUNREACH; 1090 if (sock->connected) 1091 doreturn = DOIO_HARD; 1092 else 1093 doreturn = DOIO_SOFT; 1094 break; 1095 case ERROR_PORT_UNREACHABLE: 1096 case ERROR_HOST_UNREACHABLE: 1097 case WSAEHOSTUNREACH: 1098 *isc_errno = ISC_R_HOSTUNREACH; 1099 if (sock->connected) 1100 doreturn = DOIO_HARD; 1101 else 1102 doreturn = DOIO_SOFT; 1103 break; 1104 case WSAENETDOWN: 1105 *isc_errno = ISC_R_NETDOWN; 1106 if (sock->connected) 1107 doreturn = DOIO_HARD; 1108 else 1109 doreturn = DOIO_SOFT; 1110 break; 1111 case WSAEHOSTDOWN: 1112 *isc_errno = ISC_R_HOSTDOWN; 1113 if (sock->connected) 1114 doreturn = DOIO_HARD; 1115 else 1116 doreturn = DOIO_SOFT; 1117 break; 1118 case WSAEACCES: 1119 *isc_errno = ISC_R_NOPERM; 1120 if (sock->connected) 1121 doreturn = DOIO_HARD; 1122 else 1123 doreturn = DOIO_SOFT; 1124 break; 1125 case WSAECONNRESET: 1126 case WSAENETRESET: 1127 case WSAECONNABORTED: 1128 case WSAEDISCON: 1129 *isc_errno = ISC_R_CONNECTIONRESET; 1130 if (sock->connected) 1131 doreturn = DOIO_HARD; 1132 else 1133 doreturn = DOIO_SOFT; 1134 break; 1135 case WSAENOTCONN: 1136 *isc_errno = ISC_R_NOTCONNECTED; 1137 if (sock->connected) 1138 doreturn = DOIO_HARD; 1139 else 1140 doreturn = DOIO_SOFT; 1141 break; 1142 case ERROR_OPERATION_ABORTED: 1143 case ERROR_CONNECTION_ABORTED: 1144 case ERROR_REQUEST_ABORTED: 1145 *isc_errno = ISC_R_CONNECTIONRESET; 1146 doreturn = DOIO_HARD; 1147 break; 1148 case WSAENOBUFS: 1149 *isc_errno = ISC_R_NORESOURCES; 1150 doreturn = DOIO_HARD; 1151 break; 1152 case WSAEAFNOSUPPORT: 1153 *isc_errno = ISC_R_FAMILYNOSUPPORT; 1154 doreturn = DOIO_HARD; 1155 break; 1156 case WSAEADDRNOTAVAIL: 1157 *isc_errno = ISC_R_ADDRNOTAVAIL; 1158 doreturn = DOIO_HARD; 1159 break; 1160 case WSAEDESTADDRREQ: 1161 *isc_errno = ISC_R_BADADDRESSFORM; 1162 doreturn = DOIO_HARD; 1163 break; 1164 case ERROR_NETNAME_DELETED: 1165 *isc_errno = ISC_R_NETDOWN; 1166 doreturn = DOIO_HARD; 1167 break; 1168 default: 1169 *isc_errno = ISC_R_IOERROR; 1170 doreturn = DOIO_HARD; 1171 break; 1172 } 1173 if (doreturn == DOIO_HARD) { 1174 isc__strerror(windows_errno, errorstring, bufsize); 1175 } 1176 return (doreturn); 1177} 1178 1179static void 1180fill_recv(isc_socket_t *sock, isc_socketevent_t *dev) { 1181 isc_region_t r; 1182 int copylen; 1183 isc_buffer_t *buffer; 1184 1185 INSIST(dev->n < dev->minimum); 1186 INSIST(sock->recvbuf.remaining > 0); 1187 INSIST(sock->pending_recv == 0); 1188 1189 if (sock->type == isc_sockettype_udp) { 1190 dev->address.length = sock->recvbuf.from_addr_len; 1191 memcpy(&dev->address.type, &sock->recvbuf.from_addr, 1192 sock->recvbuf.from_addr_len); 1193 if (isc_sockaddr_getport(&dev->address) == 0) { 1194 if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) { 1195 socket_log(__LINE__, sock, &dev->address, IOEVENT, 1196 isc_msgcat, ISC_MSGSET_SOCKET, 1197 ISC_MSG_ZEROPORT, 1198 "dropping source port zero packet"); 1199 } 1200 sock->recvbuf.remaining = 0; 1201 return; 1202 } 1203 } else if (sock->type == isc_sockettype_tcp) { 1204 dev->address = sock->address; 1205 } 1206 1207 /* 1208 * Run through the list of buffers we were given, and find the 1209 * first one with space. Once it is found, loop through, filling 1210 * the buffers as much as possible. 1211 */ 1212 buffer = ISC_LIST_HEAD(dev->bufferlist); 1213 if (buffer != NULL) { // Multi-buffer receive 1214 while (buffer != NULL && sock->recvbuf.remaining > 0) { 1215 REQUIRE(ISC_BUFFER_VALID(buffer)); 1216 if (isc_buffer_availablelength(buffer) > 0) { 1217 isc_buffer_availableregion(buffer, &r); 1218 copylen = min(r.length, sock->recvbuf.remaining); 1219 memcpy(r.base, sock->recvbuf.consume_position, copylen); 1220 sock->recvbuf.consume_position += copylen; 1221 sock->recvbuf.remaining -= copylen; 1222 isc_buffer_add(buffer, copylen); 1223 dev->n += copylen; 1224 } 1225 buffer = ISC_LIST_NEXT(buffer, link); 1226 } 1227 } else { // Single-buffer receive 1228 copylen = min(dev->region.length - dev->n, sock->recvbuf.remaining); 1229 memcpy(dev->region.base + dev->n, sock->recvbuf.consume_position, copylen); 1230 sock->recvbuf.consume_position += copylen; 1231 sock->recvbuf.remaining -= copylen; 1232 dev->n += copylen; 1233 } 1234 1235 /* 1236 * UDP receives are all-consuming. That is, if we have 4k worth of 1237 * data in our receive buffer, and the caller only gave us 1238 * 1k of space, we will toss the remaining 3k of data. TCP 1239 * will keep the extra data around and use it for later requests. 1240 */ 1241 if (sock->type == isc_sockettype_udp) 1242 sock->recvbuf.remaining = 0; 1243} 1244 1245/* 1246 * Copy out as much data from the internal buffer to done events. 1247 * As each done event is filled, send it along its way. 1248 */ 1249static void 1250completeio_recv(isc_socket_t *sock) 1251{ 1252 isc_socketevent_t *dev; 1253 1254 /* 1255 * If we are in the process of filling our buffer, we cannot 1256 * touch it yet, so don't. 1257 */ 1258 if (sock->pending_recv > 0) 1259 return; 1260 1261 while (sock->recvbuf.remaining > 0 && !ISC_LIST_EMPTY(sock->recv_list)) { 1262 dev = ISC_LIST_HEAD(sock->recv_list); 1263 1264 /* 1265 * See if we have sufficient data in our receive buffer 1266 * to handle this. If we do, copy out the data. 1267 */ 1268 fill_recv(sock, dev); 1269 1270 /* 1271 * Did we satisfy it? 1272 */ 1273 if (dev->n >= dev->minimum) { 1274 dev->result = ISC_R_SUCCESS; 1275 send_recvdone_event(sock, &dev); 1276 } 1277 } 1278} 1279 1280/* 1281 * Returns: 1282 * DOIO_SUCCESS The operation succeeded. dev->result contains 1283 * ISC_R_SUCCESS. 1284 * 1285 * DOIO_HARD A hard or unexpected I/O error was encountered. 1286 * dev->result contains the appropriate error. 1287 * 1288 * DOIO_SOFT A soft I/O error was encountered. No senddone 1289 * event was sent. The operation should be retried. 1290 * 1291 * No other return values are possible. 1292 */ 1293static int 1294completeio_send(isc_socket_t *sock, isc_socketevent_t *dev, 1295 struct msghdr *messagehdr, int cc, int send_errno) 1296{ 1297 char addrbuf[ISC_SOCKADDR_FORMATSIZE]; 1298 char strbuf[ISC_STRERRORSIZE]; 1299 1300 if (send_errno != 0) { 1301 if (SOFT_ERROR(send_errno)) 1302 return (DOIO_SOFT); 1303 1304 return (map_socket_error(sock, send_errno, &dev->result, 1305 strbuf, sizeof(strbuf))); 1306 1307 /* 1308 * The other error types depend on whether or not the 1309 * socket is UDP or TCP. If it is UDP, some errors 1310 * that we expect to be fatal under TCP are merely 1311 * annoying, and are really soft errors. 1312 * 1313 * However, these soft errors are still returned as 1314 * a status. 1315 */ 1316 isc_sockaddr_format(&dev->address, addrbuf, sizeof(addrbuf)); 1317 isc__strerror(send_errno, strbuf, sizeof(strbuf)); 1318 UNEXPECTED_ERROR(__FILE__, __LINE__, "completeio_send: %s: %s", 1319 addrbuf, strbuf); 1320 dev->result = isc__errno2result(send_errno); 1321 return (DOIO_HARD); 1322 } 1323 1324 /* 1325 * If we write less than we expected, update counters, poke. 1326 */ 1327 dev->n += cc; 1328 if (cc != messagehdr->msg_totallen) 1329 return (DOIO_SOFT); 1330 1331 /* 1332 * Exactly what we wanted to write. We're done with this 1333 * entry. Post its completion event. 1334 */ 1335 dev->result = ISC_R_SUCCESS; 1336 return (DOIO_SUCCESS); 1337} 1338 1339static int 1340startio_send(isc_socket_t *sock, isc_socketevent_t *dev, int *nbytes, 1341 int *send_errno) 1342{ 1343 char *cmsg = NULL; 1344 char strbuf[ISC_STRERRORSIZE]; 1345 IoCompletionInfo *lpo; 1346 int status; 1347 struct msghdr *msghdr; 1348 1349 lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle, 1350 HEAP_ZERO_MEMORY, 1351 sizeof(IoCompletionInfo)); 1352 RUNTIME_CHECK(lpo != NULL); 1353 lpo->request_type = SOCKET_SEND; 1354 lpo->dev = dev; 1355 msghdr = &lpo->messagehdr; 1356 memset(msghdr, 0, sizeof(struct msghdr)); 1357 ISC_LIST_INIT(lpo->bufferlist); 1358 1359 build_msghdr_send(sock, dev, msghdr, cmsg, sock->iov, lpo); 1360 1361 *nbytes = internal_sendmsg(sock, lpo, msghdr, 0, send_errno); 1362 1363 if (*nbytes < 0) { 1364 /* 1365 * I/O has been initiated 1366 * completion will be through the completion port 1367 */ 1368 if (PENDING_ERROR(*send_errno)) { 1369 status = DOIO_PENDING; 1370 goto done; 1371 } 1372 1373 if (SOFT_ERROR(*send_errno)) { 1374 status = DOIO_SOFT; 1375 goto done; 1376 } 1377 1378 /* 1379 * If we got this far then something is wrong 1380 */ 1381 if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) { 1382 isc__strerror(*send_errno, strbuf, sizeof(strbuf)); 1383 socket_log(__LINE__, sock, NULL, IOEVENT, 1384 isc_msgcat, ISC_MSGSET_SOCKET, 1385 ISC_MSG_INTERNALSEND, 1386 "startio_send: internal_sendmsg(%d) %d " 1387 "bytes, err %d/%s", 1388 sock->fd, *nbytes, *send_errno, strbuf); 1389 } 1390 goto done; 1391 } 1392 dev->result = ISC_R_SUCCESS; 1393 status = DOIO_SOFT; 1394 done: 1395 _set_state(sock, SOCK_DATA); 1396 return (status); 1397} 1398 1399static isc_result_t 1400allocate_socket(isc_socketmgr_t *manager, isc_sockettype_t type, 1401 isc_socket_t **socketp) { 1402 isc_socket_t *sock; 1403 isc_result_t result; 1404 1405 sock = isc_mem_get(manager->mctx, sizeof(*sock)); 1406 1407 if (sock == NULL) 1408 return (ISC_R_NOMEMORY); 1409 1410 sock->magic = 0; 1411 sock->references = 0; 1412 1413 sock->manager = manager; 1414 sock->type = type; 1415 sock->fd = INVALID_SOCKET; 1416 1417 ISC_LINK_INIT(sock, link); 1418 1419 /* 1420 * set up list of readers and writers to be initially empty 1421 */ 1422 ISC_LIST_INIT(sock->recv_list); 1423 ISC_LIST_INIT(sock->send_list); 1424 ISC_LIST_INIT(sock->accept_list); 1425 sock->connect_ev = NULL; 1426 sock->pending_accept = 0; 1427 sock->pending_recv = 0; 1428 sock->pending_send = 0; 1429 sock->pending_iocp = 0; 1430 sock->listener = 0; 1431 sock->connected = 0; 1432 sock->pending_connect = 0; 1433 sock->bound = 0; 1434 memset(sock->name, 0, sizeof(sock->name)); // zero the name field 1435 _set_state(sock, SOCK_INITIALIZED); 1436 1437 sock->recvbuf.len = 65536; 1438 sock->recvbuf.consume_position = sock->recvbuf.base; 1439 sock->recvbuf.remaining = 0; 1440 sock->recvbuf.base = isc_mem_get(manager->mctx, sock->recvbuf.len); // max buffer size 1441 if (sock->recvbuf.base == NULL) { 1442 sock->magic = 0; 1443 goto error; 1444 } 1445 1446 /* 1447 * initialize the lock 1448 */ 1449 result = isc_mutex_init(&sock->lock); 1450 if (result != ISC_R_SUCCESS) { 1451 sock->magic = 0; 1452 isc_mem_put(manager->mctx, sock->recvbuf.base, sock->recvbuf.len); 1453 sock->recvbuf.base = NULL; 1454 goto error; 1455 } 1456 1457 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, 1458 "allocated"); 1459 1460 sock->magic = SOCKET_MAGIC; 1461 *socketp = sock; 1462 1463 return (ISC_R_SUCCESS); 1464 1465 error: 1466 isc_mem_put(manager->mctx, sock, sizeof(*sock)); 1467 1468 return (result); 1469} 1470 1471/* 1472 * Verify that the socket state is consistent. 1473 */ 1474static void 1475consistent(isc_socket_t *sock) { 1476 1477 isc_socketevent_t *dev; 1478 isc_socket_newconnev_t *nev; 1479 unsigned int count; 1480 char *crash_reason; 1481 isc_boolean_t crash = ISC_FALSE; 1482 1483 REQUIRE(sock->pending_iocp == sock->pending_recv + sock->pending_send 1484 + sock->pending_accept + sock->pending_connect); 1485 1486 dev = ISC_LIST_HEAD(sock->send_list); 1487 count = 0; 1488 while (dev != NULL) { 1489 count++; 1490 dev = ISC_LIST_NEXT(dev, ev_link); 1491 } 1492 if (count > sock->pending_send) { 1493 crash = ISC_TRUE; 1494 crash_reason = "send_list > sock->pending_send"; 1495 } 1496 1497 nev = ISC_LIST_HEAD(sock->accept_list); 1498 count = 0; 1499 while (nev != NULL) { 1500 count++; 1501 nev = ISC_LIST_NEXT(nev, ev_link); 1502 } 1503 if (count > sock->pending_accept) { 1504 crash = ISC_TRUE; 1505 crash_reason = "send_list > sock->pending_send"; 1506 } 1507 1508 if (crash) { 1509 socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET, 1510 ISC_MSG_DESTROYING, "SOCKET INCONSISTENT: %s", 1511 crash_reason); 1512 sock_dump(sock); 1513 INSIST(crash == ISC_FALSE); 1514 } 1515} 1516 1517/* 1518 * Maybe free the socket. 1519 * 1520 * This function will verify tht the socket is no longer in use in any way, 1521 * either internally or externally. This is the only place where this 1522 * check is to be made; if some bit of code believes that IT is done with 1523 * the socket (e.g., some reference counter reaches zero), it should call 1524 * this function. 1525 * 1526 * When calling this function, the socket must be locked, and the manager 1527 * must be unlocked. 1528 * 1529 * When this function returns, *socketp will be NULL. No tricks to try 1530 * to hold on to this pointer are allowed. 1531 */ 1532static void 1533maybe_free_socket(isc_socket_t **socketp, int lineno) { 1534 isc_socket_t *sock = *socketp; 1535 *socketp = NULL; 1536 1537 INSIST(VALID_SOCKET(sock)); 1538 CONSISTENT(sock); 1539 1540 if (sock->pending_iocp > 0 1541 || sock->pending_recv > 0 1542 || sock->pending_send > 0 1543 || sock->pending_accept > 0 1544 || sock->references > 0 1545 || sock->pending_connect == 1 1546 || !ISC_LIST_EMPTY(sock->recv_list) 1547 || !ISC_LIST_EMPTY(sock->send_list) 1548 || !ISC_LIST_EMPTY(sock->accept_list) 1549 || sock->fd != INVALID_SOCKET) { 1550 UNLOCK(&sock->lock); 1551 return; 1552 } 1553 UNLOCK(&sock->lock); 1554 1555 free_socket(&sock, lineno); 1556} 1557 1558void 1559free_socket(isc_socket_t **sockp, int lineno) { 1560 isc_socketmgr_t *manager; 1561 isc_socket_t *sock = *sockp; 1562 *sockp = NULL; 1563 1564 manager = sock->manager; 1565 1566 /* 1567 * Seems we can free the socket after all. 1568 */ 1569 manager = sock->manager; 1570 socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET, 1571 ISC_MSG_DESTROYING, "freeing socket line %d fd %d lock %p semaphore %p", 1572 lineno, sock->fd, &sock->lock, sock->lock.LockSemaphore); 1573 1574 sock->magic = 0; 1575 DESTROYLOCK(&sock->lock); 1576 1577 if (sock->recvbuf.base != NULL) 1578 isc_mem_put(manager->mctx, sock->recvbuf.base, sock->recvbuf.len); 1579 1580 LOCK(&manager->lock); 1581 if (ISC_LINK_LINKED(sock, link)) 1582 ISC_LIST_UNLINK(manager->socklist, sock, link); 1583 isc_mem_put(manager->mctx, sock, sizeof(*sock)); 1584 1585 if (ISC_LIST_EMPTY(manager->socklist)) 1586 SIGNAL(&manager->shutdown_ok); 1587 UNLOCK(&manager->lock); 1588} 1589 1590/* 1591 * Create a new 'type' socket managed by 'manager'. Events 1592 * will be posted to 'task' and when dispatched 'action' will be 1593 * called with 'arg' as the arg value. The new socket is returned 1594 * in 'socketp'. 1595 */ 1596isc_result_t 1597isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, 1598 isc_socket_t **socketp) { 1599 isc_socket_t *sock = NULL; 1600 isc_result_t result; 1601#if defined(USE_CMSG) 1602 int on = 1; 1603#endif 1604#if defined(SO_RCVBUF) 1605 ISC_SOCKADDR_LEN_T optlen; 1606 int size; 1607#endif 1608 int socket_errno; 1609 char strbuf[ISC_STRERRORSIZE]; 1610 1611 REQUIRE(VALID_MANAGER(manager)); 1612 REQUIRE(socketp != NULL && *socketp == NULL); 1613 REQUIRE(type != isc_sockettype_fdwatch); 1614 1615 result = allocate_socket(manager, type, &sock); 1616 if (result != ISC_R_SUCCESS) 1617 return (result); 1618 1619 sock->pf = pf; 1620 switch (type) { 1621 case isc_sockettype_udp: 1622 sock->fd = socket(pf, SOCK_DGRAM, IPPROTO_UDP); 1623 if (sock->fd != INVALID_SOCKET) { 1624 result = connection_reset_fix(sock->fd); 1625 if (result != ISC_R_SUCCESS) { 1626 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, 1627 "closed %d %d %d con_reset_fix_failed", 1628 sock->pending_recv, sock->pending_send, 1629 sock->references); 1630 closesocket(sock->fd); 1631 _set_state(sock, SOCK_CLOSED); 1632 sock->fd = INVALID_SOCKET; 1633 free_socket(&sock, __LINE__); 1634 return (result); 1635 } 1636 } 1637 break; 1638 case isc_sockettype_tcp: 1639 sock->fd = socket(pf, SOCK_STREAM, IPPROTO_TCP); 1640 break; 1641 } 1642 1643 if (sock->fd == INVALID_SOCKET) { 1644 socket_errno = WSAGetLastError(); 1645 free_socket(&sock, __LINE__); 1646 1647 switch (socket_errno) { 1648 case WSAEMFILE: 1649 case WSAENOBUFS: 1650 return (ISC_R_NORESOURCES); 1651 1652 case WSAEPROTONOSUPPORT: 1653 case WSAEPFNOSUPPORT: 1654 case WSAEAFNOSUPPORT: 1655 return (ISC_R_FAMILYNOSUPPORT); 1656 1657 default: 1658 isc__strerror(socket_errno, strbuf, sizeof(strbuf)); 1659 UNEXPECTED_ERROR(__FILE__, __LINE__, 1660 "socket() %s: %s", 1661 isc_msgcat_get(isc_msgcat, 1662 ISC_MSGSET_GENERAL, 1663 ISC_MSG_FAILED, 1664 "failed"), 1665 strbuf); 1666 return (ISC_R_UNEXPECTED); 1667 } 1668 } 1669 1670 result = make_nonblock(sock->fd); 1671 if (result != ISC_R_SUCCESS) { 1672 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, 1673 "closed %d %d %d make_nonblock_failed", 1674 sock->pending_recv, sock->pending_send, 1675 sock->references); 1676 closesocket(sock->fd); 1677 sock->fd = INVALID_SOCKET; 1678 free_socket(&sock, __LINE__); 1679 return (result); 1680 } 1681 1682 1683#if defined(USE_CMSG) || defined(SO_RCVBUF) 1684 if (type == isc_sockettype_udp) { 1685 1686#if defined(USE_CMSG) 1687#if defined(ISC_PLATFORM_HAVEIPV6) 1688#ifdef IPV6_RECVPKTINFO 1689 /* 2292bis */ 1690 if ((pf == AF_INET6) 1691 && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, 1692 (void *)&on, sizeof(on)) < 0)) { 1693 isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf)); 1694 UNEXPECTED_ERROR(__FILE__, __LINE__, 1695 "setsockopt(%d, IPV6_RECVPKTINFO) " 1696 "%s: %s", sock->fd, 1697 isc_msgcat_get(isc_msgcat, 1698 ISC_MSGSET_GENERAL, 1699 ISC_MSG_FAILED, 1700 "failed"), 1701 strbuf); 1702 } 1703#else 1704 /* 2292 */ 1705 if ((pf == AF_INET6) 1706 && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_PKTINFO, 1707 (void *)&on, sizeof(on)) < 0)) { 1708 isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf)); 1709 UNEXPECTED_ERROR(__FILE__, __LINE__, 1710 "setsockopt(%d, IPV6_PKTINFO) %s: %s", 1711 sock->fd, 1712 isc_msgcat_get(isc_msgcat, 1713 ISC_MSGSET_GENERAL, 1714 ISC_MSG_FAILED, 1715 "failed"), 1716 strbuf); 1717 } 1718#endif /* IPV6_RECVPKTINFO */ 1719#ifdef IPV6_USE_MIN_MTU /*2292bis, not too common yet*/ 1720 /* use minimum MTU */ 1721 if (pf == AF_INET6) { 1722 (void)setsockopt(sock->fd, IPPROTO_IPV6, 1723 IPV6_USE_MIN_MTU, 1724 (void *)&on, sizeof(on)); 1725 } 1726#endif 1727#endif /* ISC_PLATFORM_HAVEIPV6 */ 1728#endif /* defined(USE_CMSG) */ 1729 1730#if defined(SO_RCVBUF) 1731 optlen = sizeof(size); 1732 if (getsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF, 1733 (void *)&size, &optlen) >= 0 && 1734 size < RCVBUFSIZE) { 1735 size = RCVBUFSIZE; 1736 (void)setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF, 1737 (void *)&size, sizeof(size)); 1738 } 1739#endif 1740 1741 } 1742#endif /* defined(USE_CMSG) || defined(SO_RCVBUF) */ 1743 1744 _set_state(sock, SOCK_OPEN); 1745 sock->references = 1; 1746 *socketp = sock; 1747 1748 iocompletionport_update(sock); 1749 1750 /* 1751 * Note we don't have to lock the socket like we normally would because 1752 * there are no external references to it yet. 1753 */ 1754 LOCK(&manager->lock); 1755 ISC_LIST_APPEND(manager->socklist, sock, link); 1756 InterlockedIncrement(&manager->totalSockets); 1757 UNLOCK(&manager->lock); 1758 1759 socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET, 1760 ISC_MSG_CREATED, "created %u type %u", sock->fd, type); 1761 1762 return (ISC_R_SUCCESS); 1763} 1764 1765isc_result_t 1766isc_socket_open(isc_socket_t *sock) { 1767 REQUIRE(VALID_SOCKET(sock)); 1768 REQUIRE(sock->type != isc_sockettype_fdwatch); 1769 1770 return (ISC_R_NOTIMPLEMENTED); 1771} 1772 1773/* 1774 * Attach to a socket. Caller must explicitly detach when it is done. 1775 */ 1776void 1777isc_socket_attach(isc_socket_t *sock, isc_socket_t **socketp) { 1778 REQUIRE(VALID_SOCKET(sock)); 1779 REQUIRE(socketp != NULL && *socketp == NULL); 1780 1781 LOCK(&sock->lock); 1782 CONSISTENT(sock); 1783 sock->references++; 1784 UNLOCK(&sock->lock); 1785 1786 *socketp = sock; 1787} 1788 1789/* 1790 * Dereference a socket. If this is the last reference to it, clean things 1791 * up by destroying the socket. 1792 */ 1793void 1794isc_socket_detach(isc_socket_t **socketp) { 1795 isc_socket_t *sock; 1796 isc_boolean_t kill_socket = ISC_FALSE; 1797 1798 REQUIRE(socketp != NULL); 1799 sock = *socketp; 1800 REQUIRE(VALID_SOCKET(sock)); 1801 REQUIRE(sock->type != isc_sockettype_fdwatch); 1802 1803 LOCK(&sock->lock); 1804 CONSISTENT(sock); 1805 REQUIRE(sock->references > 0); 1806 sock->references--; 1807 1808 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, 1809 "detach_socket %d %d %d", 1810 sock->pending_recv, sock->pending_send, 1811 sock->references); 1812 1813 if (sock->references == 0 && sock->fd != INVALID_SOCKET) { 1814 closesocket(sock->fd); 1815 sock->fd = INVALID_SOCKET; 1816 _set_state(sock, SOCK_CLOSED); 1817 } 1818 1819 maybe_free_socket(&sock, __LINE__); 1820 1821 *socketp = NULL; 1822} 1823 1824isc_result_t 1825isc_socket_close(isc_socket_t *sock) { 1826 REQUIRE(VALID_SOCKET(sock)); 1827 REQUIRE(sock->type != isc_sockettype_fdwatch); 1828 1829 return (ISC_R_NOTIMPLEMENTED); 1830} 1831 1832/* 1833 * Dequeue an item off the given socket's read queue, set the result code 1834 * in the done event to the one provided, and send it to the task it was 1835 * destined for. 1836 * 1837 * If the event to be sent is on a list, remove it before sending. If 1838 * asked to, send and detach from the task as well. 1839 * 1840 * Caller must have the socket locked if the event is attached to the socket. 1841 */ 1842static void 1843send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev) { 1844 isc_task_t *task; 1845 1846 task = (*dev)->ev_sender; 1847 (*dev)->ev_sender = sock; 1848 1849 if (ISC_LINK_LINKED(*dev, ev_link)) 1850 ISC_LIST_DEQUEUE(sock->recv_list, *dev, ev_link); 1851 1852 if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED) 1853 == ISC_SOCKEVENTATTR_ATTACHED) 1854 isc_task_sendanddetach(&task, (isc_event_t **)dev); 1855 else 1856 isc_task_send(task, (isc_event_t **)dev); 1857 1858 CONSISTENT(sock); 1859} 1860 1861/* 1862 * See comments for send_recvdone_event() above. 1863 */ 1864static void 1865send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev) { 1866 isc_task_t *task; 1867 1868 INSIST(dev != NULL && *dev != NULL); 1869 1870 task = (*dev)->ev_sender; 1871 (*dev)->ev_sender = sock; 1872 1873 if (ISC_LINK_LINKED(*dev, ev_link)) 1874 ISC_LIST_DEQUEUE(sock->send_list, *dev, ev_link); 1875 1876 if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED) 1877 == ISC_SOCKEVENTATTR_ATTACHED) 1878 isc_task_sendanddetach(&task, (isc_event_t **)dev); 1879 else 1880 isc_task_send(task, (isc_event_t **)dev); 1881 1882 CONSISTENT(sock); 1883} 1884 1885/* 1886 * See comments for send_recvdone_event() above. 1887 */ 1888static void 1889send_acceptdone_event(isc_socket_t *sock, isc_socket_newconnev_t **adev) { 1890 isc_task_t *task; 1891 1892 INSIST(adev != NULL && *adev != NULL); 1893 1894 task = (*adev)->ev_sender; 1895 (*adev)->ev_sender = sock; 1896 1897 if (ISC_LINK_LINKED(*adev, ev_link)) 1898 ISC_LIST_DEQUEUE(sock->accept_list, *adev, ev_link); 1899 1900 isc_task_sendanddetach(&task, (isc_event_t **)adev); 1901 1902 CONSISTENT(sock); 1903} 1904 1905/* 1906 * See comments for send_recvdone_event() above. 1907 */ 1908static void 1909send_connectdone_event(isc_socket_t *sock, isc_socket_connev_t **cdev) { 1910 isc_task_t *task; 1911 1912 INSIST(cdev != NULL && *cdev != NULL); 1913 1914 task = (*cdev)->ev_sender; 1915 (*cdev)->ev_sender = sock; 1916 1917 sock->connect_ev = NULL; 1918 1919 isc_task_sendanddetach(&task, (isc_event_t **)cdev); 1920 1921 CONSISTENT(sock); 1922} 1923 1924/* 1925 * On entry to this function, the event delivered is the internal 1926 * readable event, and the first item on the accept_list should be 1927 * the done event we want to send. If the list is empty, this is a no-op, 1928 * so just close the new connection, unlock, and return. 1929 * 1930 * Note the socket is locked before entering here 1931 */ 1932static void 1933internal_accept(isc_socket_t *sock, IoCompletionInfo *lpo, int accept_errno) { 1934 isc_socket_newconnev_t *adev; 1935 isc_result_t result = ISC_R_SUCCESS; 1936 isc_socket_t *nsock; 1937 struct sockaddr *localaddr; 1938 int localaddr_len = sizeof(*localaddr); 1939 struct sockaddr *remoteaddr; 1940 int remoteaddr_len = sizeof(*remoteaddr); 1941 1942 INSIST(VALID_SOCKET(sock)); 1943 LOCK(&sock->lock); 1944 CONSISTENT(sock); 1945 1946 socket_log(__LINE__, sock, NULL, TRACE, 1947 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK, 1948 "internal_accept called"); 1949 1950 INSIST(sock->listener); 1951 1952 INSIST(sock->pending_iocp > 0); 1953 sock->pending_iocp--; 1954 INSIST(sock->pending_accept > 0); 1955 sock->pending_accept--; 1956 1957 adev = lpo->adev; 1958 1959 /* 1960 * If the event is no longer in the list we can just return. 1961 */ 1962 if (!acceptdone_is_active(sock, adev)) 1963 goto done; 1964 1965 nsock = adev->newsocket; 1966 1967 /* 1968 * Pull off the done event. 1969 */ 1970 ISC_LIST_UNLINK(sock->accept_list, adev, ev_link); 1971 1972 /* 1973 * Extract the addresses from the socket, copy them into the structure, 1974 * and return the new socket. 1975 */ 1976 ISCGetAcceptExSockaddrs(lpo->acceptbuffer, 0, 1977 sizeof(SOCKADDR_STORAGE) + 16, sizeof(SOCKADDR_STORAGE) + 16, 1978 (LPSOCKADDR *)&localaddr, &localaddr_len, 1979 (LPSOCKADDR *)&remoteaddr, &remoteaddr_len); 1980 memcpy(&adev->address.type, remoteaddr, remoteaddr_len); 1981 adev->address.length = remoteaddr_len; 1982 nsock->address = adev->address; 1983 nsock->pf = adev->address.type.sa.sa_family; 1984 1985 socket_log(__LINE__, nsock, &nsock->address, TRACE, 1986 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK, 1987 "internal_accept parent %p", sock); 1988 1989 result = make_nonblock(adev->newsocket->fd); 1990 INSIST(result == ISC_R_SUCCESS); 1991 1992 INSIST(setsockopt(nsock->fd, SOL_SOCKET, SO_UPDATE_ACCEPT_CONTEXT, 1993 (char *)&sock->fd, sizeof(sock->fd)) == 0); 1994 1995 /* 1996 * Hook it up into the manager. 1997 */ 1998 nsock->bound = 1; 1999 nsock->connected = 1; 2000 _set_state(nsock, SOCK_OPEN); 2001 2002 LOCK(&nsock->manager->lock); 2003 ISC_LIST_APPEND(nsock->manager->socklist, nsock, link); 2004 InterlockedIncrement(&nsock->manager->totalSockets); 2005 UNLOCK(&nsock->manager->lock); 2006 2007 socket_log(__LINE__, sock, &nsock->address, CREATION, 2008 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN, 2009 "accepted_connection new_socket %p fd %d", 2010 nsock, nsock->fd); 2011 2012 adev->result = result; 2013 send_acceptdone_event(sock, &adev); 2014 2015done: 2016 CONSISTENT(sock); 2017 UNLOCK(&sock->lock); 2018 2019 HeapFree(hHeapHandle, 0, lpo->acceptbuffer); 2020 lpo->acceptbuffer = NULL; 2021} 2022 2023/* 2024 * Called when a socket with a pending connect() finishes. 2025 * Note that the socket is locked before entering. 2026 */ 2027static void 2028internal_connect(isc_socket_t *sock, IoCompletionInfo *lpo, int connect_errno) { 2029 isc_socket_connev_t *cdev; 2030 char strbuf[ISC_STRERRORSIZE]; 2031 2032 INSIST(VALID_SOCKET(sock)); 2033 2034 LOCK(&sock->lock); 2035 2036 INSIST(sock->pending_iocp > 0); 2037 sock->pending_iocp--; 2038 INSIST(sock->pending_connect == 1); 2039 sock->pending_connect = 0; 2040 2041 /* 2042 * Has this event been canceled? 2043 */ 2044 cdev = lpo->cdev; 2045 if (!connectdone_is_active(sock, cdev)) { 2046 sock->pending_connect = 0; 2047 if (sock->fd != INVALID_SOCKET) { 2048 closesocket(sock->fd); 2049 sock->fd = INVALID_SOCKET; 2050 _set_state(sock, SOCK_CLOSED); 2051 } 2052 CONSISTENT(sock); 2053 UNLOCK(&sock->lock); 2054 return; 2055 } 2056 2057 /* 2058 * Check possible Windows network event error status here. 2059 */ 2060 if (connect_errno != 0) { 2061 /* 2062 * If the error is SOFT, just try again on this 2063 * fd and pretend nothing strange happened. 2064 */ 2065 if (SOFT_ERROR(connect_errno) || 2066 connect_errno == WSAEINPROGRESS) { 2067 sock->pending_connect = 1; 2068 CONSISTENT(sock); 2069 UNLOCK(&sock->lock); 2070 return; 2071 } 2072 2073 /* 2074 * Translate other errors into ISC_R_* flavors. 2075 */ 2076 switch (connect_errno) { 2077#define ERROR_MATCH(a, b) case a: cdev->result = b; break; 2078 ERROR_MATCH(WSAEACCES, ISC_R_NOPERM); 2079 ERROR_MATCH(WSAEADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL); 2080 ERROR_MATCH(WSAEAFNOSUPPORT, ISC_R_ADDRNOTAVAIL); 2081 ERROR_MATCH(WSAECONNREFUSED, ISC_R_CONNREFUSED); 2082 ERROR_MATCH(WSAEHOSTUNREACH, ISC_R_HOSTUNREACH); 2083 ERROR_MATCH(WSAEHOSTDOWN, ISC_R_HOSTDOWN); 2084 ERROR_MATCH(WSAENETUNREACH, ISC_R_NETUNREACH); 2085 ERROR_MATCH(WSAENETDOWN, ISC_R_NETDOWN); 2086 ERROR_MATCH(WSAENOBUFS, ISC_R_NORESOURCES); 2087 ERROR_MATCH(WSAECONNRESET, ISC_R_CONNECTIONRESET); 2088 ERROR_MATCH(WSAECONNABORTED, ISC_R_CONNECTIONRESET); 2089 ERROR_MATCH(WSAETIMEDOUT, ISC_R_TIMEDOUT); 2090#undef ERROR_MATCH 2091 default: 2092 cdev->result = ISC_R_UNEXPECTED; 2093 isc__strerror(connect_errno, strbuf, sizeof(strbuf)); 2094 UNEXPECTED_ERROR(__FILE__, __LINE__, 2095 "internal_connect: connect() %s", 2096 strbuf); 2097 } 2098 } else { 2099 INSIST(setsockopt(sock->fd, SOL_SOCKET, SO_UPDATE_CONNECT_CONTEXT, NULL, 0) == 0); 2100 cdev->result = ISC_R_SUCCESS; 2101 sock->connected = 1; 2102 socket_log(__LINE__, sock, &sock->address, IOEVENT, 2103 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN, 2104 "internal_connect: success"); 2105 } 2106 2107 send_connectdone_event(sock, &cdev); 2108 2109 UNLOCK(&sock->lock); 2110} 2111 2112/* 2113 * Loop through the socket, returning ISC_R_EOF for each done event pending. 2114 */ 2115static void 2116send_recvdone_abort(isc_socket_t *sock, isc_result_t result) { 2117 isc_socketevent_t *dev; 2118 2119 while (!ISC_LIST_EMPTY(sock->recv_list)) { 2120 dev = ISC_LIST_HEAD(sock->recv_list); 2121 dev->result = result; 2122 send_recvdone_event(sock, &dev); 2123 } 2124} 2125 2126/* 2127 * Take the data we received in our private buffer, and if any recv() calls on 2128 * our list are satisfied, send the corresponding done event. 2129 * 2130 * If we need more data (there are still items on the recv_list after we consume all 2131 * our data) then arrange for another system recv() call to fill our buffers. 2132 */ 2133static void 2134internal_recv(isc_socket_t *sock, int nbytes) 2135{ 2136 INSIST(VALID_SOCKET(sock)); 2137 2138 LOCK(&sock->lock); 2139 CONSISTENT(sock); 2140 2141 socket_log(__LINE__, sock, NULL, IOEVENT, 2142 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALRECV, 2143 "internal_recv: %d bytes received", nbytes); 2144 2145 /* 2146 * If we got here, the I/O operation succeeded. However, we might still have removed this 2147 * event from our notification list (or never placed it on it due to immediate completion.) 2148 * Handle the reference counting here, and handle the cancellation event just after. 2149 */ 2150 INSIST(sock->pending_iocp > 0); 2151 sock->pending_iocp--; 2152 INSIST(sock->pending_recv > 0); 2153 sock->pending_recv--; 2154 2155 /* 2156 * The only way we could have gotten here is that our I/O has successfully completed. 2157 * Update our pointers, and move on. The only odd case here is that we might not 2158 * have received enough data on a TCP stream to satisfy the minimum requirements. If 2159 * this is the case, we will re-issue the recv() call for what we need. 2160 * 2161 * We do check for a recv() of 0 bytes on a TCP stream. This means the remote end 2162 * has closed. 2163 */ 2164 if (nbytes == 0 && sock->type == isc_sockettype_tcp) { 2165 send_recvdone_abort(sock, ISC_R_EOF); 2166 maybe_free_socket(&sock, __LINE__); 2167 return; 2168 } 2169 sock->recvbuf.remaining = nbytes; 2170 sock->recvbuf.consume_position = sock->recvbuf.base; 2171 completeio_recv(sock); 2172 2173 /* 2174 * If there are more receivers waiting for data, queue another receive 2175 * here. 2176 */ 2177 queue_receive_request(sock); 2178 2179 /* 2180 * Unlock and/or destroy if we are the last thing this socket has left to do. 2181 */ 2182 maybe_free_socket(&sock, __LINE__); 2183} 2184 2185static void 2186internal_send(isc_socket_t *sock, isc_socketevent_t *dev, 2187 struct msghdr *messagehdr, int nbytes, int send_errno, IoCompletionInfo *lpo) 2188{ 2189 buflist_t *buffer; 2190 2191 /* 2192 * Find out what socket this is and lock it. 2193 */ 2194 INSIST(VALID_SOCKET(sock)); 2195 2196 LOCK(&sock->lock); 2197 CONSISTENT(sock); 2198 2199 socket_log(__LINE__, sock, NULL, IOEVENT, 2200 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALSEND, 2201 "internal_send: task got socket event %p", dev); 2202 2203 buffer = ISC_LIST_HEAD(lpo->bufferlist); 2204 while (buffer != NULL) { 2205 ISC_LIST_DEQUEUE(lpo->bufferlist, buffer, link); 2206 2207 socket_log(__LINE__, sock, NULL, TRACE, 2208 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK, 2209 "free_buffer %p %p", buffer, buffer->buf); 2210 2211 HeapFree(hHeapHandle, 0, buffer->buf); 2212 HeapFree(hHeapHandle, 0, buffer); 2213 buffer = ISC_LIST_HEAD(lpo->bufferlist); 2214 } 2215 2216 INSIST(sock->pending_iocp > 0); 2217 sock->pending_iocp--; 2218 INSIST(sock->pending_send > 0); 2219 sock->pending_send--; 2220 2221 /* If the event is no longer in the list we can just return */ 2222 if (!senddone_is_active(sock, dev)) 2223 goto done; 2224 2225 /* 2226 * Set the error code and send things on its way. 2227 */ 2228 switch (completeio_send(sock, dev, messagehdr, nbytes, send_errno)) { 2229 case DOIO_SOFT: 2230 break; 2231 case DOIO_HARD: 2232 case DOIO_SUCCESS: 2233 send_senddone_event(sock, &dev); 2234 break; 2235 } 2236 2237 done: 2238 maybe_free_socket(&sock, __LINE__); 2239} 2240 2241/* 2242 * These return if the done event passed in is on the list (or for connect, is 2243 * the one we're waiting for. Using these ensures we will not double-send an 2244 * event. 2245 */ 2246static isc_boolean_t 2247senddone_is_active(isc_socket_t *sock, isc_socketevent_t *dev) 2248{ 2249 isc_socketevent_t *ldev; 2250 2251 ldev = ISC_LIST_HEAD(sock->send_list); 2252 while (ldev != NULL && ldev != dev) 2253 ldev = ISC_LIST_NEXT(ldev, ev_link); 2254 2255 return (ldev == NULL ? ISC_FALSE : ISC_TRUE); 2256} 2257 2258static isc_boolean_t 2259acceptdone_is_active(isc_socket_t *sock, isc_socket_newconnev_t *dev) 2260{ 2261 isc_socket_newconnev_t *ldev; 2262 2263 ldev = ISC_LIST_HEAD(sock->accept_list); 2264 while (ldev != NULL && ldev != dev) 2265 ldev = ISC_LIST_NEXT(ldev, ev_link); 2266 2267 return (ldev == NULL ? ISC_FALSE : ISC_TRUE); 2268} 2269 2270static isc_boolean_t 2271connectdone_is_active(isc_socket_t *sock, isc_socket_connev_t *dev) 2272{ 2273 return (sock->connect_ev == dev ? ISC_TRUE : ISC_FALSE); 2274} 2275 2276/* 2277 * This is the I/O Completion Port Worker Function. It loops forever 2278 * waiting for I/O to complete and then forwards them for further 2279 * processing. There are a number of these in separate threads. 2280 */ 2281static isc_threadresult_t WINAPI 2282SocketIoThread(LPVOID ThreadContext) { 2283 isc_socketmgr_t *manager = ThreadContext; 2284 BOOL bSuccess = FALSE; 2285 DWORD nbytes; 2286 IoCompletionInfo *lpo = NULL; 2287 isc_socket_t *sock = NULL; 2288 int request; 2289 struct msghdr *messagehdr = NULL; 2290 int errval; 2291 char strbuf[ISC_STRERRORSIZE]; 2292 int errstatus; 2293 2294 REQUIRE(VALID_MANAGER(manager)); 2295 2296 /* 2297 * Set the thread priority high enough so I/O will 2298 * preempt normal recv packet processing, but not 2299 * higher than the timer sync thread. 2300 */ 2301 if (!SetThreadPriority(GetCurrentThread(), 2302 THREAD_PRIORITY_ABOVE_NORMAL)) { 2303 errval = GetLastError(); 2304 isc__strerror(errval, strbuf, sizeof(strbuf)); 2305 FATAL_ERROR(__FILE__, __LINE__, 2306 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, 2307 ISC_MSG_FAILED, 2308 "Can't set thread priority: %s"), 2309 strbuf); 2310 } 2311 2312 /* 2313 * Loop forever waiting on I/O Completions and then processing them 2314 */ 2315 while (TRUE) { 2316 bSuccess = GetQueuedCompletionStatus(manager->hIoCompletionPort, 2317 &nbytes, (LPDWORD)&sock, 2318 (LPWSAOVERLAPPED *)&lpo, 2319 INFINITE); 2320 if (lpo == NULL) /* Received request to exit */ 2321 break; 2322 2323 REQUIRE(VALID_SOCKET(sock)); 2324 2325 request = lpo->request_type; 2326 2327 errstatus = 0; 2328 if (!bSuccess) { 2329 isc_result_t isc_result; 2330 2331 /* 2332 * Did the I/O operation complete? 2333 */ 2334 errstatus = WSAGetLastError(); 2335 isc_result = isc__errno2resultx(errstatus, __FILE__, __LINE__); 2336 2337 LOCK(&sock->lock); 2338 CONSISTENT(sock); 2339 switch (request) { 2340 case SOCKET_RECV: 2341 INSIST(sock->pending_iocp > 0); 2342 sock->pending_iocp--; 2343 INSIST(sock->pending_recv > 0); 2344 sock->pending_recv--; 2345 send_recvdone_abort(sock, isc_result); 2346 if (isc_result == ISC_R_UNEXPECTED) { 2347 UNEXPECTED_ERROR(__FILE__, __LINE__, 2348 "SOCKET_RECV: Windows error code: %d, returning ISC error %d", 2349 errstatus, isc_result); 2350 } 2351 break; 2352 2353 case SOCKET_SEND: 2354 INSIST(sock->pending_iocp > 0); 2355 sock->pending_iocp--; 2356 INSIST(sock->pending_send > 0); 2357 sock->pending_send--; 2358 if (senddone_is_active(sock, lpo->dev)) { 2359 lpo->dev->result = isc_result; 2360 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, 2361 "canceled_send"); 2362 send_senddone_event(sock, &lpo->dev); 2363 } 2364 break; 2365 2366 case SOCKET_ACCEPT: 2367 INSIST(sock->pending_iocp > 0); 2368 sock->pending_iocp--; 2369 INSIST(sock->pending_accept > 0); 2370 sock->pending_accept--; 2371 if (acceptdone_is_active(sock, lpo->adev)) { 2372 closesocket(lpo->adev->newsocket->fd); 2373 lpo->adev->newsocket->fd = INVALID_SOCKET; 2374 lpo->adev->newsocket->references--; 2375 free_socket(&lpo->adev->newsocket, __LINE__); 2376 lpo->adev->result = isc_result; 2377 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, 2378 "canceled_accept"); 2379 send_acceptdone_event(sock, &lpo->adev); 2380 } 2381 break; 2382 2383 case SOCKET_CONNECT: 2384 INSIST(sock->pending_iocp > 0); 2385 sock->pending_iocp--; 2386 INSIST(sock->pending_connect == 1); 2387 sock->pending_connect = 0; 2388 if (connectdone_is_active(sock, lpo->cdev)) { 2389 lpo->cdev->result = isc_result; 2390 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, 2391 "canceled_connect"); 2392 send_connectdone_event(sock, &lpo->cdev); 2393 } 2394 break; 2395 } 2396 maybe_free_socket(&sock, __LINE__); 2397 2398 if (lpo != NULL) 2399 HeapFree(hHeapHandle, 0, lpo); 2400 continue; 2401 } 2402 2403 messagehdr = &lpo->messagehdr; 2404 2405 switch (request) { 2406 case SOCKET_RECV: 2407 internal_recv(sock, nbytes); 2408 break; 2409 case SOCKET_SEND: 2410 internal_send(sock, lpo->dev, messagehdr, nbytes, errstatus, lpo); 2411 break; 2412 case SOCKET_ACCEPT: 2413 internal_accept(sock, lpo, errstatus); 2414 break; 2415 case SOCKET_CONNECT: 2416 internal_connect(sock, lpo, errstatus); 2417 break; 2418 } 2419 2420 if (lpo != NULL) 2421 HeapFree(hHeapHandle, 0, lpo); 2422 } 2423 2424 /* 2425 * Exit Completion Port Thread 2426 */ 2427 manager_log(manager, TRACE, 2428 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 2429 ISC_MSG_EXITING, "SocketIoThread exiting")); 2430 return ((isc_threadresult_t)0); 2431} 2432 2433/* 2434 * Create a new socket manager. 2435 */ 2436isc_result_t 2437isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) { 2438 return (isc_socketmgr_create2(mctx, managerp, 0)); 2439} 2440 2441isc_result_t 2442isc_socketmgr_create2(isc_mem_t *mctx, isc_socketmgr_t **managerp, 2443 unsigned int maxsocks) 2444{ 2445 isc_socketmgr_t *manager; 2446 isc_result_t result; 2447 2448 REQUIRE(managerp != NULL && *managerp == NULL); 2449 2450 if (maxsocks != 0) 2451 return (ISC_R_NOTIMPLEMENTED); 2452 2453 manager = isc_mem_get(mctx, sizeof(*manager)); 2454 if (manager == NULL) 2455 return (ISC_R_NOMEMORY); 2456 2457 InitSockets(); 2458 2459 manager->magic = SOCKET_MANAGER_MAGIC; 2460 manager->mctx = NULL; 2461 manager->stats = NULL; 2462 ISC_LIST_INIT(manager->socklist); 2463 result = isc_mutex_init(&manager->lock); 2464 if (result != ISC_R_SUCCESS) { 2465 isc_mem_put(mctx, manager, sizeof(*manager)); 2466 return (result); 2467 } 2468 if (isc_condition_init(&manager->shutdown_ok) != ISC_R_SUCCESS) { 2469 DESTROYLOCK(&manager->lock); 2470 isc_mem_put(mctx, manager, sizeof(*manager)); 2471 UNEXPECTED_ERROR(__FILE__, __LINE__, 2472 "isc_condition_init() %s", 2473 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 2474 ISC_MSG_FAILED, "failed")); 2475 return (ISC_R_UNEXPECTED); 2476 } 2477 2478 isc_mem_attach(mctx, &manager->mctx); 2479 2480 iocompletionport_init(manager); /* Create the Completion Ports */ 2481 2482 manager->bShutdown = ISC_FALSE; 2483 manager->totalSockets = 0; 2484 manager->iocp_total = 0; 2485 2486 *managerp = manager; 2487 2488 return (ISC_R_SUCCESS); 2489} 2490 2491isc_result_t 2492isc_socketmgr_getmaxsockets(isc_socketmgr_t *manager, unsigned int *nsockp) { 2493 REQUIRE(VALID_MANAGER(manager)); 2494 REQUIRE(nsockp != NULL); 2495 2496 return (ISC_R_NOTIMPLEMENTED); 2497} 2498 2499void 2500isc_socketmgr_setstats(isc_socketmgr_t *manager, isc_stats_t *stats) { 2501 REQUIRE(VALID_MANAGER(manager)); 2502 REQUIRE(ISC_LIST_EMPTY(manager->socklist)); 2503 REQUIRE(manager->stats == NULL); 2504 REQUIRE(isc_stats_ncounters(stats) == isc_sockstatscounter_max); 2505 2506 isc_stats_attach(stats, &manager->stats); 2507} 2508 2509void 2510isc_socketmgr_destroy(isc_socketmgr_t **managerp) { 2511 isc_socketmgr_t *manager; 2512 int i; 2513 isc_mem_t *mctx; 2514 2515 /* 2516 * Destroy a socket manager. 2517 */ 2518 2519 REQUIRE(managerp != NULL); 2520 manager = *managerp; 2521 REQUIRE(VALID_MANAGER(manager)); 2522 2523 LOCK(&manager->lock); 2524 2525 /* 2526 * Wait for all sockets to be destroyed. 2527 */ 2528 while (!ISC_LIST_EMPTY(manager->socklist)) { 2529 manager_log(manager, CREATION, 2530 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, 2531 ISC_MSG_SOCKETSREMAIN, 2532 "sockets exist")); 2533 WAIT(&manager->shutdown_ok, &manager->lock); 2534 } 2535 2536 UNLOCK(&manager->lock); 2537 2538 /* 2539 * Here, we need to had some wait code for the completion port 2540 * thread. 2541 */ 2542 signal_iocompletionport_exit(manager); 2543 manager->bShutdown = ISC_TRUE; 2544 2545 /* 2546 * Wait for threads to exit. 2547 */ 2548 for (i = 0; i < manager->maxIOCPThreads; i++) { 2549 if (isc_thread_join((isc_thread_t) manager->hIOCPThreads[i], 2550 NULL) != ISC_R_SUCCESS) 2551 UNEXPECTED_ERROR(__FILE__, __LINE__, 2552 "isc_thread_join() for Completion Port %s", 2553 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 2554 ISC_MSG_FAILED, "failed")); 2555 } 2556 /* 2557 * Clean up. 2558 */ 2559 2560 CloseHandle(manager->hIoCompletionPort); 2561 2562 (void)isc_condition_destroy(&manager->shutdown_ok); 2563 2564 DESTROYLOCK(&manager->lock); 2565 if (manager->stats != NULL) 2566 isc_stats_detach(&manager->stats); 2567 manager->magic = 0; 2568 mctx= manager->mctx; 2569 isc_mem_put(mctx, manager, sizeof(*manager)); 2570 2571 isc_mem_detach(&mctx); 2572 2573 *managerp = NULL; 2574} 2575 2576static void 2577queue_receive_event(isc_socket_t *sock, isc_task_t *task, isc_socketevent_t *dev) 2578{ 2579 isc_task_t *ntask = NULL; 2580 2581 isc_task_attach(task, &ntask); 2582 dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED; 2583 2584 /* 2585 * Enqueue the request. 2586 */ 2587 INSIST(!ISC_LINK_LINKED(dev, ev_link)); 2588 ISC_LIST_ENQUEUE(sock->recv_list, dev, ev_link); 2589 2590 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, 2591 "queue_receive_event: event %p -> task %p", 2592 dev, ntask); 2593} 2594 2595/* 2596 * Check the pending receive queue, and if we have data pending, give it to this 2597 * caller. If we have none, queue an I/O request. If this caller is not the first 2598 * on the list, then we will just queue this event and return. 2599 * 2600 * Caller must have the socket locked. 2601 */ 2602static isc_result_t 2603socket_recv(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task, 2604 unsigned int flags) 2605{ 2606 int cc = 0; 2607 isc_task_t *ntask = NULL; 2608 isc_result_t result = ISC_R_SUCCESS; 2609 int recv_errno = 0; 2610 2611 dev->ev_sender = task; 2612 2613 if (sock->fd == INVALID_SOCKET) 2614 return (ISC_R_EOF); 2615 2616 /* 2617 * Queue our event on the list of things to do. Call our function to 2618 * attempt to fill buffers as much as possible, and return done events. 2619 * We are going to lie about our handling of the ISC_SOCKFLAG_IMMEDIATE 2620 * here and tell our caller that we could not satisfy it immediately. 2621 */ 2622 queue_receive_event(sock, task, dev); 2623 if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0) 2624 result = ISC_R_INPROGRESS; 2625 2626 completeio_recv(sock); 2627 2628 /* 2629 * If there are more receivers waiting for data, queue another receive 2630 * here. If the 2631 */ 2632 queue_receive_request(sock); 2633 2634 return (result); 2635} 2636 2637isc_result_t 2638isc_socket_recvv(isc_socket_t *sock, isc_bufferlist_t *buflist, 2639 unsigned int minimum, isc_task_t *task, 2640 isc_taskaction_t action, const void *arg) 2641{ 2642 isc_socketevent_t *dev; 2643 isc_socketmgr_t *manager; 2644 unsigned int iocount; 2645 isc_buffer_t *buffer; 2646 isc_result_t ret; 2647 2648 REQUIRE(VALID_SOCKET(sock)); 2649 LOCK(&sock->lock); 2650 CONSISTENT(sock); 2651 2652 /* 2653 * Make sure that the socket is not closed. XXXMLG change error here? 2654 */ 2655 if (sock->fd == INVALID_SOCKET) { 2656 UNLOCK(&sock->lock); 2657 return (ISC_R_CONNREFUSED); 2658 } 2659 2660 REQUIRE(buflist != NULL); 2661 REQUIRE(!ISC_LIST_EMPTY(*buflist)); 2662 REQUIRE(task != NULL); 2663 REQUIRE(action != NULL); 2664 2665 manager = sock->manager; 2666 REQUIRE(VALID_MANAGER(manager)); 2667 2668 iocount = isc_bufferlist_availablecount(buflist); 2669 REQUIRE(iocount > 0); 2670 2671 INSIST(sock->bound); 2672 2673 dev = allocate_socketevent(sock, ISC_SOCKEVENT_RECVDONE, action, arg); 2674 if (dev == NULL) { 2675 UNLOCK(&sock->lock); 2676 return (ISC_R_NOMEMORY); 2677 } 2678 2679 /* 2680 * UDP sockets are always partial read 2681 */ 2682 if (sock->type == isc_sockettype_udp) 2683 dev->minimum = 1; 2684 else { 2685 if (minimum == 0) 2686 dev->minimum = iocount; 2687 else 2688 dev->minimum = minimum; 2689 } 2690 2691 /* 2692 * Move each buffer from the passed in list to our internal one. 2693 */ 2694 buffer = ISC_LIST_HEAD(*buflist); 2695 while (buffer != NULL) { 2696 ISC_LIST_DEQUEUE(*buflist, buffer, link); 2697 ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link); 2698 buffer = ISC_LIST_HEAD(*buflist); 2699 } 2700 2701 ret = socket_recv(sock, dev, task, 0); 2702 2703 UNLOCK(&sock->lock); 2704 return (ret); 2705} 2706 2707isc_result_t 2708isc_socket_recv(isc_socket_t *sock, isc_region_t *region, unsigned int minimum, 2709 isc_task_t *task, isc_taskaction_t action, const void *arg) 2710{ 2711 isc_socketevent_t *dev; 2712 isc_socketmgr_t *manager; 2713 isc_result_t ret; 2714 2715 REQUIRE(VALID_SOCKET(sock)); 2716 LOCK(&sock->lock); 2717 CONSISTENT(sock); 2718 2719 /* 2720 * make sure that the socket's not closed 2721 */ 2722 if (sock->fd == INVALID_SOCKET) { 2723 UNLOCK(&sock->lock); 2724 return (ISC_R_CONNREFUSED); 2725 } 2726 REQUIRE(action != NULL); 2727 2728 manager = sock->manager; 2729 REQUIRE(VALID_MANAGER(manager)); 2730 2731 INSIST(sock->bound); 2732 2733 dev = allocate_socketevent(sock, ISC_SOCKEVENT_RECVDONE, action, arg); 2734 if (dev == NULL) { 2735 UNLOCK(&sock->lock); 2736 return (ISC_R_NOMEMORY); 2737 } 2738 2739 ret = isc_socket_recv2(sock, region, minimum, task, dev, 0); 2740 UNLOCK(&sock->lock); 2741 return (ret); 2742} 2743 2744isc_result_t 2745isc_socket_recv2(isc_socket_t *sock, isc_region_t *region, 2746 unsigned int minimum, isc_task_t *task, 2747 isc_socketevent_t *event, unsigned int flags) 2748{ 2749 isc_result_t ret; 2750 2751 REQUIRE(VALID_SOCKET(sock)); 2752 LOCK(&sock->lock); 2753 CONSISTENT(sock); 2754 2755 event->result = ISC_R_UNEXPECTED; 2756 event->ev_sender = sock; 2757 /* 2758 * make sure that the socket's not closed 2759 */ 2760 if (sock->fd == INVALID_SOCKET) { 2761 UNLOCK(&sock->lock); 2762 return (ISC_R_CONNREFUSED); 2763 } 2764 2765 ISC_LIST_INIT(event->bufferlist); 2766 event->region = *region; 2767 event->n = 0; 2768 event->offset = 0; 2769 event->attributes = 0; 2770 2771 /* 2772 * UDP sockets are always partial read. 2773 */ 2774 if (sock->type == isc_sockettype_udp) 2775 event->minimum = 1; 2776 else { 2777 if (minimum == 0) 2778 event->minimum = region->length; 2779 else 2780 event->minimum = minimum; 2781 } 2782 2783 ret = socket_recv(sock, event, task, flags); 2784 UNLOCK(&sock->lock); 2785 return (ret); 2786} 2787 2788/* 2789 * Caller must have the socket locked. 2790 */ 2791static isc_result_t 2792socket_send(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task, 2793 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo, 2794 unsigned int flags) 2795{ 2796 int io_state; 2797 int send_errno = 0; 2798 int cc = 0; 2799 isc_task_t *ntask = NULL; 2800 isc_result_t result = ISC_R_SUCCESS; 2801 2802 dev->ev_sender = task; 2803 2804 set_dev_address(address, sock, dev); 2805 if (pktinfo != NULL) { 2806 socket_log(__LINE__, sock, NULL, TRACE, isc_msgcat, ISC_MSGSET_SOCKET, 2807 ISC_MSG_PKTINFOPROVIDED, 2808 "pktinfo structure provided, ifindex %u (set to 0)", 2809 pktinfo->ipi6_ifindex); 2810 2811 dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO; 2812 dev->pktinfo = *pktinfo; 2813 /* 2814 * Set the pktinfo index to 0 here, to let the kernel decide 2815 * what interface it should send on. 2816 */ 2817 dev->pktinfo.ipi6_ifindex = 0; 2818 } 2819 2820 io_state = startio_send(sock, dev, &cc, &send_errno); 2821 switch (io_state) { 2822 case DOIO_PENDING: /* I/O started. Nothing more to do */ 2823 case DOIO_SOFT: 2824 /* 2825 * We couldn't send all or part of the request right now, so 2826 * queue it unless ISC_SOCKFLAG_NORETRY is set. 2827 */ 2828 if ((flags & ISC_SOCKFLAG_NORETRY) == 0) { 2829 isc_task_attach(task, &ntask); 2830 dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED; 2831 2832 /* 2833 * Enqueue the request. 2834 */ 2835 INSIST(!ISC_LINK_LINKED(dev, ev_link)); 2836 ISC_LIST_ENQUEUE(sock->send_list, dev, ev_link); 2837 2838 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, 2839 "socket_send: event %p -> task %p", 2840 dev, ntask); 2841 2842 if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0) 2843 result = ISC_R_INPROGRESS; 2844 break; 2845 } 2846 2847 case DOIO_SUCCESS: 2848 break; 2849 } 2850 2851 return (result); 2852} 2853 2854isc_result_t 2855isc_socket_send(isc_socket_t *sock, isc_region_t *region, 2856 isc_task_t *task, isc_taskaction_t action, const void *arg) 2857{ 2858 /* 2859 * REQUIRE() checking is performed in isc_socket_sendto(). 2860 */ 2861 return (isc_socket_sendto(sock, region, task, action, arg, NULL, 2862 NULL)); 2863} 2864 2865isc_result_t 2866isc_socket_sendto(isc_socket_t *sock, isc_region_t *region, 2867 isc_task_t *task, isc_taskaction_t action, const void *arg, 2868 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo) 2869{ 2870 isc_socketevent_t *dev; 2871 isc_socketmgr_t *manager; 2872 isc_result_t ret; 2873 2874 REQUIRE(VALID_SOCKET(sock)); 2875 REQUIRE(sock->type != isc_sockettype_fdwatch); 2876 2877 LOCK(&sock->lock); 2878 CONSISTENT(sock); 2879 2880 /* 2881 * make sure that the socket's not closed 2882 */ 2883 if (sock->fd == INVALID_SOCKET) { 2884 UNLOCK(&sock->lock); 2885 return (ISC_R_CONNREFUSED); 2886 } 2887 REQUIRE(region != NULL); 2888 REQUIRE(task != NULL); 2889 REQUIRE(action != NULL); 2890 2891 manager = sock->manager; 2892 REQUIRE(VALID_MANAGER(manager)); 2893 2894 INSIST(sock->bound); 2895 2896 dev = allocate_socketevent(sock, ISC_SOCKEVENT_SENDDONE, action, arg); 2897 if (dev == NULL) { 2898 UNLOCK(&sock->lock); 2899 return (ISC_R_NOMEMORY); 2900 } 2901 dev->region = *region; 2902 2903 ret = socket_send(sock, dev, task, address, pktinfo, 0); 2904 UNLOCK(&sock->lock); 2905 return (ret); 2906} 2907 2908isc_result_t 2909isc_socket_sendv(isc_socket_t *sock, isc_bufferlist_t *buflist, 2910 isc_task_t *task, isc_taskaction_t action, const void *arg) 2911{ 2912 return (isc_socket_sendtov(sock, buflist, task, action, arg, NULL, 2913 NULL)); 2914} 2915 2916isc_result_t 2917isc_socket_sendtov(isc_socket_t *sock, isc_bufferlist_t *buflist, 2918 isc_task_t *task, isc_taskaction_t action, const void *arg, 2919 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo) 2920{ 2921 isc_socketevent_t *dev; 2922 isc_socketmgr_t *manager; 2923 unsigned int iocount; 2924 isc_buffer_t *buffer; 2925 isc_result_t ret; 2926 2927 REQUIRE(VALID_SOCKET(sock)); 2928 2929 LOCK(&sock->lock); 2930 CONSISTENT(sock); 2931 2932 /* 2933 * make sure that the socket's not closed 2934 */ 2935 if (sock->fd == INVALID_SOCKET) { 2936 UNLOCK(&sock->lock); 2937 return (ISC_R_CONNREFUSED); 2938 } 2939 REQUIRE(buflist != NULL); 2940 REQUIRE(!ISC_LIST_EMPTY(*buflist)); 2941 REQUIRE(task != NULL); 2942 REQUIRE(action != NULL); 2943 2944 manager = sock->manager; 2945 REQUIRE(VALID_MANAGER(manager)); 2946 2947 iocount = isc_bufferlist_usedcount(buflist); 2948 REQUIRE(iocount > 0); 2949 2950 dev = allocate_socketevent(sock, ISC_SOCKEVENT_SENDDONE, action, arg); 2951 if (dev == NULL) { 2952 UNLOCK(&sock->lock); 2953 return (ISC_R_NOMEMORY); 2954 } 2955 2956 /* 2957 * Move each buffer from the passed in list to our internal one. 2958 */ 2959 buffer = ISC_LIST_HEAD(*buflist); 2960 while (buffer != NULL) { 2961 ISC_LIST_DEQUEUE(*buflist, buffer, link); 2962 ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link); 2963 buffer = ISC_LIST_HEAD(*buflist); 2964 } 2965 2966 ret = socket_send(sock, dev, task, address, pktinfo, 0); 2967 UNLOCK(&sock->lock); 2968 return (ret); 2969} 2970 2971isc_result_t 2972isc_socket_sendto2(isc_socket_t *sock, isc_region_t *region, 2973 isc_task_t *task, 2974 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo, 2975 isc_socketevent_t *event, unsigned int flags) 2976{ 2977 isc_result_t ret; 2978 2979 REQUIRE(VALID_SOCKET(sock)); 2980 LOCK(&sock->lock); 2981 CONSISTENT(sock); 2982 2983 REQUIRE((flags & ~(ISC_SOCKFLAG_IMMEDIATE|ISC_SOCKFLAG_NORETRY)) == 0); 2984 if ((flags & ISC_SOCKFLAG_NORETRY) != 0) 2985 REQUIRE(sock->type == isc_sockettype_udp); 2986 event->ev_sender = sock; 2987 event->result = ISC_R_UNEXPECTED; 2988 /* 2989 * make sure that the socket's not closed 2990 */ 2991 if (sock->fd == INVALID_SOCKET) { 2992 UNLOCK(&sock->lock); 2993 return (ISC_R_CONNREFUSED); 2994 } 2995 ISC_LIST_INIT(event->bufferlist); 2996 event->region = *region; 2997 event->n = 0; 2998 event->offset = 0; 2999 event->attributes = 0; 3000 3001 ret = socket_send(sock, event, task, address, pktinfo, flags); 3002 UNLOCK(&sock->lock); 3003 return (ret); 3004} 3005 3006isc_result_t 3007isc_socket_bind(isc_socket_t *sock, isc_sockaddr_t *sockaddr, 3008 unsigned int options) { 3009 int bind_errno; 3010 char strbuf[ISC_STRERRORSIZE]; 3011 int on = 1; 3012 3013 REQUIRE(VALID_SOCKET(sock)); 3014 LOCK(&sock->lock); 3015 CONSISTENT(sock); 3016 3017 /* 3018 * make sure that the socket's not closed 3019 */ 3020 if (sock->fd == INVALID_SOCKET) { 3021 UNLOCK(&sock->lock); 3022 return (ISC_R_CONNREFUSED); 3023 } 3024 3025 INSIST(!sock->bound); 3026 3027 if (sock->pf != sockaddr->type.sa.sa_family) { 3028 UNLOCK(&sock->lock); 3029 return (ISC_R_FAMILYMISMATCH); 3030 } 3031 /* 3032 * Only set SO_REUSEADDR when we want a specific port. 3033 */ 3034 if ((options & ISC_SOCKET_REUSEADDRESS) != 0 && 3035 isc_sockaddr_getport(sockaddr) != (in_port_t)0 && 3036 setsockopt(sock->fd, SOL_SOCKET, SO_REUSEADDR, (void *)&on, 3037 sizeof(on)) < 0) { 3038 UNEXPECTED_ERROR(__FILE__, __LINE__, 3039 "setsockopt(%d) %s", sock->fd, 3040 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 3041 ISC_MSG_FAILED, "failed")); 3042 /* Press on... */ 3043 } 3044 if (bind(sock->fd, &sockaddr->type.sa, sockaddr->length) < 0) { 3045 bind_errno = WSAGetLastError(); 3046 UNLOCK(&sock->lock); 3047 switch (bind_errno) { 3048 case WSAEACCES: 3049 return (ISC_R_NOPERM); 3050 case WSAEADDRNOTAVAIL: 3051 return (ISC_R_ADDRNOTAVAIL); 3052 case WSAEADDRINUSE: 3053 return (ISC_R_ADDRINUSE); 3054 case WSAEINVAL: 3055 return (ISC_R_BOUND); 3056 default: 3057 isc__strerror(bind_errno, strbuf, sizeof(strbuf)); 3058 UNEXPECTED_ERROR(__FILE__, __LINE__, "bind: %s", 3059 strbuf); 3060 return (ISC_R_UNEXPECTED); 3061 } 3062 } 3063 3064 socket_log(__LINE__, sock, sockaddr, TRACE, 3065 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, "bound"); 3066 sock->bound = 1; 3067 3068 UNLOCK(&sock->lock); 3069 return (ISC_R_SUCCESS); 3070} 3071 3072isc_result_t 3073isc_socket_filter(isc_socket_t *sock, const char *filter) { 3074 UNUSED(sock); 3075 UNUSED(filter); 3076 3077 REQUIRE(VALID_SOCKET(sock)); 3078 return (ISC_R_NOTIMPLEMENTED); 3079} 3080 3081/* 3082 * Set up to listen on a given socket. We do this by creating an internal 3083 * event that will be dispatched when the socket has read activity. The 3084 * watcher will send the internal event to the task when there is a new 3085 * connection. 3086 * 3087 * Unlike in read, we don't preallocate a done event here. Every time there 3088 * is a new connection we'll have to allocate a new one anyway, so we might 3089 * as well keep things simple rather than having to track them. 3090 */ 3091isc_result_t 3092isc_socket_listen(isc_socket_t *sock, unsigned int backlog) { 3093 char strbuf[ISC_STRERRORSIZE]; 3094 3095 REQUIRE(VALID_SOCKET(sock)); 3096 3097 LOCK(&sock->lock); 3098 CONSISTENT(sock); 3099 3100 /* 3101 * make sure that the socket's not closed 3102 */ 3103 if (sock->fd == INVALID_SOCKET) { 3104 UNLOCK(&sock->lock); 3105 return (ISC_R_CONNREFUSED); 3106 } 3107 3108 REQUIRE(!sock->listener); 3109 REQUIRE(sock->bound); 3110 REQUIRE(sock->type == isc_sockettype_tcp); 3111 3112 if (backlog == 0) 3113 backlog = SOMAXCONN; 3114 3115 if (listen(sock->fd, (int)backlog) < 0) { 3116 UNLOCK(&sock->lock); 3117 isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf)); 3118 3119 UNEXPECTED_ERROR(__FILE__, __LINE__, "listen: %s", strbuf); 3120 3121 return (ISC_R_UNEXPECTED); 3122 } 3123 3124 socket_log(__LINE__, sock, NULL, TRACE, 3125 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, "listening"); 3126 sock->listener = 1; 3127 _set_state(sock, SOCK_LISTEN); 3128 3129 UNLOCK(&sock->lock); 3130 return (ISC_R_SUCCESS); 3131} 3132 3133/* 3134 * This should try to do aggressive accept() XXXMLG 3135 */ 3136isc_result_t 3137isc_socket_accept(isc_socket_t *sock, 3138 isc_task_t *task, isc_taskaction_t action, const void *arg) 3139{ 3140 isc_socket_newconnev_t *adev; 3141 isc_socketmgr_t *manager; 3142 isc_task_t *ntask = NULL; 3143 isc_socket_t *nsock; 3144 isc_result_t result; 3145 IoCompletionInfo *lpo; 3146 3147 REQUIRE(VALID_SOCKET(sock)); 3148 3149 manager = sock->manager; 3150 REQUIRE(VALID_MANAGER(manager)); 3151 3152 LOCK(&sock->lock); 3153 CONSISTENT(sock); 3154 3155 /* 3156 * make sure that the socket's not closed 3157 */ 3158 if (sock->fd == INVALID_SOCKET) { 3159 UNLOCK(&sock->lock); 3160 return (ISC_R_CONNREFUSED); 3161 } 3162 3163 REQUIRE(sock->listener); 3164 3165 /* 3166 * Sender field is overloaded here with the task we will be sending 3167 * this event to. Just before the actual event is delivered the 3168 * actual ev_sender will be touched up to be the socket. 3169 */ 3170 adev = (isc_socket_newconnev_t *) 3171 isc_event_allocate(manager->mctx, task, ISC_SOCKEVENT_NEWCONN, 3172 action, arg, sizeof(*adev)); 3173 if (adev == NULL) { 3174 UNLOCK(&sock->lock); 3175 return (ISC_R_NOMEMORY); 3176 } 3177 ISC_LINK_INIT(adev, ev_link); 3178 3179 result = allocate_socket(manager, sock->type, &nsock); 3180 if (result != ISC_R_SUCCESS) { 3181 isc_event_free((isc_event_t **)&adev); 3182 UNLOCK(&sock->lock); 3183 return (result); 3184 } 3185 3186 /* 3187 * AcceptEx() requires we pass in a socket. 3188 */ 3189 nsock->fd = socket(sock->pf, SOCK_STREAM, IPPROTO_TCP); 3190 if (nsock->fd == INVALID_SOCKET) { 3191 free_socket(&nsock, __LINE__); 3192 isc_event_free((isc_event_t **)&adev); 3193 UNLOCK(&sock->lock); 3194 return (ISC_R_FAILURE); // XXXMLG need real error message 3195 } 3196 3197 /* 3198 * Attach to socket and to task. 3199 */ 3200 isc_task_attach(task, &ntask); 3201 nsock->references++; 3202 3203 adev->ev_sender = ntask; 3204 adev->newsocket = nsock; 3205 _set_state(nsock, SOCK_ACCEPT); 3206 3207 /* 3208 * Queue io completion for an accept(). 3209 */ 3210 lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle, 3211 HEAP_ZERO_MEMORY, 3212 sizeof(IoCompletionInfo)); 3213 RUNTIME_CHECK(lpo != NULL); 3214 lpo->acceptbuffer = (void *)HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, 3215 (sizeof(SOCKADDR_STORAGE) + 16) * 2); 3216 RUNTIME_CHECK(lpo->acceptbuffer != NULL); 3217 3218 lpo->adev = adev; 3219 lpo->request_type = SOCKET_ACCEPT; 3220 3221 ISCAcceptEx(sock->fd, 3222 nsock->fd, /* Accepted Socket */ 3223 lpo->acceptbuffer, /* Buffer for initial Recv */ 3224 0, /* Length of Buffer */ 3225 sizeof(SOCKADDR_STORAGE) + 16, /* Local address length + 16 */ 3226 sizeof(SOCKADDR_STORAGE) + 16, /* Remote address lengh + 16 */ 3227 (LPDWORD)&lpo->received_bytes, /* Bytes Recved */ 3228 (LPOVERLAPPED)lpo /* Overlapped structure */ 3229 ); 3230 iocompletionport_update(nsock); 3231 3232 socket_log(__LINE__, sock, NULL, TRACE, 3233 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, 3234 "accepting for nsock %p fd %d", nsock, nsock->fd); 3235 3236 /* 3237 * Enqueue the event 3238 */ 3239 ISC_LIST_ENQUEUE(sock->accept_list, adev, ev_link); 3240 sock->pending_accept++; 3241 sock->pending_iocp++; 3242 3243 UNLOCK(&sock->lock); 3244 return (ISC_R_SUCCESS); 3245} 3246 3247isc_result_t 3248isc_socket_connect(isc_socket_t *sock, isc_sockaddr_t *addr, 3249 isc_task_t *task, isc_taskaction_t action, const void *arg) 3250{ 3251 char strbuf[ISC_STRERRORSIZE]; 3252 isc_socket_connev_t *cdev; 3253 isc_task_t *ntask = NULL; 3254 isc_socketmgr_t *manager; 3255 IoCompletionInfo *lpo; 3256 int bind_errno; 3257 3258 REQUIRE(VALID_SOCKET(sock)); 3259 REQUIRE(addr != NULL); 3260 REQUIRE(task != NULL); 3261 REQUIRE(action != NULL); 3262 3263 manager = sock->manager; 3264 REQUIRE(VALID_MANAGER(manager)); 3265 REQUIRE(addr != NULL); 3266 3267 if (isc_sockaddr_ismulticast(addr)) 3268 return (ISC_R_MULTICAST); 3269 3270 LOCK(&sock->lock); 3271 CONSISTENT(sock); 3272 3273 /* 3274 * make sure that the socket's not closed 3275 */ 3276 if (sock->fd == INVALID_SOCKET) { 3277 UNLOCK(&sock->lock); 3278 return (ISC_R_CONNREFUSED); 3279 } 3280 3281 /* 3282 * Windows sockets won't connect unless the socket is bound. 3283 */ 3284 if (!sock->bound) { 3285 isc_sockaddr_t any; 3286 3287 isc_sockaddr_anyofpf(&any, isc_sockaddr_pf(addr)); 3288 if (bind(sock->fd, &any.type.sa, any.length) < 0) { 3289 bind_errno = WSAGetLastError(); 3290 UNLOCK(&sock->lock); 3291 switch (bind_errno) { 3292 case WSAEACCES: 3293 return (ISC_R_NOPERM); 3294 case WSAEADDRNOTAVAIL: 3295 return (ISC_R_ADDRNOTAVAIL); 3296 case WSAEADDRINUSE: 3297 return (ISC_R_ADDRINUSE); 3298 case WSAEINVAL: 3299 return (ISC_R_BOUND); 3300 default: 3301 isc__strerror(bind_errno, strbuf, 3302 sizeof(strbuf)); 3303 UNEXPECTED_ERROR(__FILE__, __LINE__, 3304 "bind: %s", strbuf); 3305 return (ISC_R_UNEXPECTED); 3306 } 3307 } 3308 sock->bound = 1; 3309 } 3310 3311 REQUIRE(!sock->pending_connect); 3312 3313 cdev = (isc_socket_connev_t *)isc_event_allocate(manager->mctx, sock, 3314 ISC_SOCKEVENT_CONNECT, 3315 action, arg, 3316 sizeof(*cdev)); 3317 if (cdev == NULL) { 3318 UNLOCK(&sock->lock); 3319 return (ISC_R_NOMEMORY); 3320 } 3321 ISC_LINK_INIT(cdev, ev_link); 3322 3323 if (sock->type == isc_sockettype_tcp) { 3324 /* 3325 * Queue io completion for an accept(). 3326 */ 3327 lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle, 3328 HEAP_ZERO_MEMORY, 3329 sizeof(IoCompletionInfo)); 3330 lpo->cdev = cdev; 3331 lpo->request_type = SOCKET_CONNECT; 3332 3333 sock->address = *addr; 3334 ISCConnectEx(sock->fd, &addr->type.sa, addr->length, 3335 NULL, 0, NULL, (LPOVERLAPPED)lpo); 3336 3337 /* 3338 * Attach to task. 3339 */ 3340 isc_task_attach(task, &ntask); 3341 cdev->ev_sender = ntask; 3342 3343 sock->pending_connect = 1; 3344 _set_state(sock, SOCK_CONNECT); 3345 3346 /* 3347 * Enqueue the request. 3348 */ 3349 sock->connect_ev = cdev; 3350 sock->pending_iocp++; 3351 } else { 3352 WSAConnect(sock->fd, &addr->type.sa, addr->length, NULL, NULL, NULL, NULL); 3353 cdev->result = ISC_R_SUCCESS; 3354 isc_task_send(task, (isc_event_t **)&cdev); 3355 } 3356 CONSISTENT(sock); 3357 UNLOCK(&sock->lock); 3358 3359 return (ISC_R_SUCCESS); 3360} 3361 3362isc_result_t 3363isc_socket_getpeername(isc_socket_t *sock, isc_sockaddr_t *addressp) { 3364 isc_result_t result; 3365 3366 REQUIRE(VALID_SOCKET(sock)); 3367 REQUIRE(addressp != NULL); 3368 3369 LOCK(&sock->lock); 3370 CONSISTENT(sock); 3371 3372 /* 3373 * make sure that the socket's not closed 3374 */ 3375 if (sock->fd == INVALID_SOCKET) { 3376 UNLOCK(&sock->lock); 3377 return (ISC_R_CONNREFUSED); 3378 } 3379 3380 if (sock->connected) { 3381 *addressp = sock->address; 3382 result = ISC_R_SUCCESS; 3383 } else { 3384 result = ISC_R_NOTCONNECTED; 3385 } 3386 3387 UNLOCK(&sock->lock); 3388 3389 return (result); 3390} 3391 3392isc_result_t 3393isc_socket_getsockname(isc_socket_t *sock, isc_sockaddr_t *addressp) { 3394 ISC_SOCKADDR_LEN_T len; 3395 isc_result_t result; 3396 char strbuf[ISC_STRERRORSIZE]; 3397 3398 REQUIRE(VALID_SOCKET(sock)); 3399 REQUIRE(addressp != NULL); 3400 3401 LOCK(&sock->lock); 3402 CONSISTENT(sock); 3403 3404 /* 3405 * make sure that the socket's not closed 3406 */ 3407 if (sock->fd == INVALID_SOCKET) { 3408 UNLOCK(&sock->lock); 3409 return (ISC_R_CONNREFUSED); 3410 } 3411 3412 if (!sock->bound) { 3413 result = ISC_R_NOTBOUND; 3414 goto out; 3415 } 3416 3417 result = ISC_R_SUCCESS; 3418 3419 len = sizeof(addressp->type); 3420 if (getsockname(sock->fd, &addressp->type.sa, (void *)&len) < 0) { 3421 isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf)); 3422 UNEXPECTED_ERROR(__FILE__, __LINE__, "getsockname: %s", 3423 strbuf); 3424 result = ISC_R_UNEXPECTED; 3425 goto out; 3426 } 3427 addressp->length = (unsigned int)len; 3428 3429 out: 3430 UNLOCK(&sock->lock); 3431 3432 return (result); 3433} 3434 3435/* 3436 * Run through the list of events on this socket, and cancel the ones 3437 * queued for task "task" of type "how". "how" is a bitmask. 3438 */ 3439void 3440isc_socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how) { 3441 3442 REQUIRE(VALID_SOCKET(sock)); 3443 3444 /* 3445 * Quick exit if there is nothing to do. Don't even bother locking 3446 * in this case. 3447 */ 3448 if (how == 0) 3449 return; 3450 3451 LOCK(&sock->lock); 3452 CONSISTENT(sock); 3453 3454 /* 3455 * make sure that the socket's not closed 3456 */ 3457 if (sock->fd == INVALID_SOCKET) { 3458 UNLOCK(&sock->lock); 3459 return; 3460 } 3461 3462 /* 3463 * All of these do the same thing, more or less. 3464 * Each will: 3465 * o If the internal event is marked as "posted" try to 3466 * remove it from the task's queue. If this fails, mark it 3467 * as canceled instead, and let the task clean it up later. 3468 * o For each I/O request for that task of that type, post 3469 * its done event with status of "ISC_R_CANCELED". 3470 * o Reset any state needed. 3471 */ 3472 3473 if ((how & ISC_SOCKCANCEL_RECV) == ISC_SOCKCANCEL_RECV) { 3474 isc_socketevent_t *dev; 3475 isc_socketevent_t *next; 3476 isc_task_t *current_task; 3477 3478 dev = ISC_LIST_HEAD(sock->recv_list); 3479 while (dev != NULL) { 3480 current_task = dev->ev_sender; 3481 next = ISC_LIST_NEXT(dev, ev_link); 3482 if ((task == NULL) || (task == current_task)) { 3483 dev->result = ISC_R_CANCELED; 3484 send_recvdone_event(sock, &dev); 3485 } 3486 dev = next; 3487 } 3488 } 3489 how &= ~ISC_SOCKCANCEL_RECV; 3490 3491 if ((how & ISC_SOCKCANCEL_SEND) == ISC_SOCKCANCEL_SEND) { 3492 isc_socketevent_t *dev; 3493 isc_socketevent_t *next; 3494 isc_task_t *current_task; 3495 3496 dev = ISC_LIST_HEAD(sock->send_list); 3497 3498 while (dev != NULL) { 3499 current_task = dev->ev_sender; 3500 next = ISC_LIST_NEXT(dev, ev_link); 3501 if ((task == NULL) || (task == current_task)) { 3502 dev->result = ISC_R_CANCELED; 3503 send_senddone_event(sock, &dev); 3504 } 3505 dev = next; 3506 } 3507 } 3508 how &= ~ISC_SOCKCANCEL_SEND; 3509 3510 if (((how & ISC_SOCKCANCEL_ACCEPT) == ISC_SOCKCANCEL_ACCEPT) 3511 && !ISC_LIST_EMPTY(sock->accept_list)) { 3512 isc_socket_newconnev_t *dev; 3513 isc_socket_newconnev_t *next; 3514 isc_task_t *current_task; 3515 3516 dev = ISC_LIST_HEAD(sock->accept_list); 3517 while (dev != NULL) { 3518 current_task = dev->ev_sender; 3519 next = ISC_LIST_NEXT(dev, ev_link); 3520 3521 if ((task == NULL) || (task == current_task)) { 3522 3523 dev->newsocket->references--; 3524 closesocket(dev->newsocket->fd); 3525 dev->newsocket->fd = INVALID_SOCKET; 3526 free_socket(&dev->newsocket, __LINE__); 3527 3528 dev->result = ISC_R_CANCELED; 3529 send_acceptdone_event(sock, &dev); 3530 } 3531 3532 dev = next; 3533 } 3534 } 3535 how &= ~ISC_SOCKCANCEL_ACCEPT; 3536 3537 /* 3538 * Connecting is not a list. 3539 */ 3540 if (((how & ISC_SOCKCANCEL_CONNECT) == ISC_SOCKCANCEL_CONNECT) 3541 && sock->connect_ev != NULL) { 3542 isc_socket_connev_t *dev; 3543 isc_task_t *current_task; 3544 3545 INSIST(sock->pending_connect); 3546 3547 dev = sock->connect_ev; 3548 current_task = dev->ev_sender; 3549 3550 if ((task == NULL) || (task == current_task)) { 3551 closesocket(sock->fd); 3552 sock->fd = INVALID_SOCKET; 3553 _set_state(sock, SOCK_CLOSED); 3554 3555 sock->connect_ev = NULL; 3556 dev->result = ISC_R_CANCELED; 3557 send_connectdone_event(sock, &dev); 3558 } 3559 } 3560 how &= ~ISC_SOCKCANCEL_CONNECT; 3561 3562 maybe_free_socket(&sock, __LINE__); 3563} 3564 3565isc_sockettype_t 3566isc_socket_gettype(isc_socket_t *sock) { 3567 isc_sockettype_t type; 3568 3569 REQUIRE(VALID_SOCKET(sock)); 3570 3571 LOCK(&sock->lock); 3572 3573 /* 3574 * make sure that the socket's not closed 3575 */ 3576 if (sock->fd == INVALID_SOCKET) { 3577 UNLOCK(&sock->lock); 3578 return (ISC_R_CONNREFUSED); 3579 } 3580 3581 type = sock->type; 3582 UNLOCK(&sock->lock); 3583 return (type); 3584} 3585 3586isc_boolean_t 3587isc_socket_isbound(isc_socket_t *sock) { 3588 isc_boolean_t val; 3589 3590 REQUIRE(VALID_SOCKET(sock)); 3591 3592 LOCK(&sock->lock); 3593 CONSISTENT(sock); 3594 3595 /* 3596 * make sure that the socket's not closed 3597 */ 3598 if (sock->fd == INVALID_SOCKET) { 3599 UNLOCK(&sock->lock); 3600 return (ISC_FALSE); 3601 } 3602 3603 val = ((sock->bound) ? ISC_TRUE : ISC_FALSE); 3604 UNLOCK(&sock->lock); 3605 3606 return (val); 3607} 3608 3609void 3610isc_socket_ipv6only(isc_socket_t *sock, isc_boolean_t yes) { 3611#if defined(IPV6_V6ONLY) 3612 int onoff = yes ? 1 : 0; 3613#else 3614 UNUSED(yes); 3615#endif 3616 3617 REQUIRE(VALID_SOCKET(sock)); 3618 3619#ifdef IPV6_V6ONLY 3620 if (sock->pf == AF_INET6) { 3621 (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_V6ONLY, 3622 (void *)&onoff, sizeof(onoff)); 3623 } 3624#endif 3625} 3626 3627void 3628isc_socket_cleanunix(isc_sockaddr_t *addr, isc_boolean_t active) { 3629 UNUSED(addr); 3630 UNUSED(active); 3631} 3632 3633isc_result_t 3634isc_socket_permunix(isc_sockaddr_t *addr, isc_uint32_t perm, 3635 isc_uint32_t owner, isc_uint32_t group) 3636{ 3637 UNUSED(addr); 3638 UNUSED(perm); 3639 UNUSED(owner); 3640 UNUSED(group); 3641 return (ISC_R_NOTIMPLEMENTED); 3642} 3643 3644void 3645isc_socket_setname(isc_socket_t *socket, const char *name, void *tag) { 3646 3647 /* 3648 * Name 'socket'. 3649 */ 3650 3651 REQUIRE(VALID_SOCKET(socket)); 3652 3653 LOCK(&socket->lock); 3654 memset(socket->name, 0, sizeof(socket->name)); 3655 strncpy(socket->name, name, sizeof(socket->name) - 1); 3656 socket->tag = tag; 3657 UNLOCK(&socket->lock); 3658} 3659 3660const char * 3661isc_socket_getname(isc_socket_t *socket) { 3662 return (socket->name); 3663} 3664 3665void * 3666isc_socket_gettag(isc_socket_t *socket) { 3667 return (socket->tag); 3668} 3669 3670void 3671isc__socketmgr_setreserved(isc_socketmgr_t *manager, isc_uint32_t reserved) { 3672 UNUSED(manager); 3673 UNUSED(reserved); 3674} 3675