1/* $NetBSD$ */ 2 3/* 4 * Copyright (C) 2004-2009 Internet Systems Consortium, Inc. ("ISC") 5 * Copyright (C) 2000-2003 Internet Software Consortium. 6 * 7 * Permission to use, copy, modify, and/or distribute this software for any 8 * purpose with or without fee is hereby granted, provided that the above 9 * copyright notice and this permission notice appear in all copies. 10 * 11 * THE SOFTWARE IS PROVIDED "AS IS" AND ISC DISCLAIMS ALL WARRANTIES WITH 12 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY 13 * AND FITNESS. IN NO EVENT SHALL ISC BE LIABLE FOR ANY SPECIAL, DIRECT, 14 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM 15 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE 16 * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR 17 * PERFORMANCE OF THIS SOFTWARE. 18 */ 19 20/* Id: socket.c,v 1.70.54.4 2009/01/29 22:40:36 jinmei Exp */ 21 22/* This code uses functions which are only available on Server 2003 and 23 * higher, and Windows XP and higher. 24 * 25 * This code is by nature multithreaded and takes advantage of various 26 * features to pass on information through the completion port for 27 * when I/O is completed. All sends, receives, accepts, and connects are 28 * completed through the completion port. 29 * 30 * The number of Completion Port Worker threads used is the total number 31 * of CPU's + 1. This increases the likelihood that a Worker Thread is 32 * available for processing a completed request. 33 * 34 * XXXPDM 5 August, 2002 35 */ 36 37#define MAKE_EXTERNAL 1 38#include <config.h> 39 40#include <sys/types.h> 41 42#ifndef _WINSOCKAPI_ 43#define _WINSOCKAPI_ /* Prevent inclusion of winsock.h in windows.h */ 44#endif 45 46#include <errno.h> 47#include <stddef.h> 48#include <stdlib.h> 49#include <string.h> 50#include <unistd.h> 51#include <io.h> 52#include <fcntl.h> 53#include <process.h> 54 55#include <isc/buffer.h> 56#include <isc/bufferlist.h> 57#include <isc/condition.h> 58#include <isc/list.h> 59#include <isc/log.h> 60#include <isc/mem.h> 61#include <isc/msgs.h> 62#include <isc/mutex.h> 63#include <isc/net.h> 64#include <isc/once.h> 65#include <isc/os.h> 66#include <isc/platform.h> 67#include <isc/print.h> 68#include <isc/region.h> 69#include <isc/socket.h> 70#include <isc/stats.h> 71#include <isc/strerror.h> 72#include <isc/syslog.h> 73#include <isc/task.h> 74#include <isc/thread.h> 75#include <isc/util.h> 76#include <isc/win32os.h> 77 78#include <mswsock.h> 79 80#include "errno2result.h" 81 82/* 83 * How in the world can Microsoft exist with APIs like this? 84 * We can't actually call this directly, because it turns out 85 * no library exports this function. Instead, we need to 86 * issue a runtime call to get the address. 87 */ 88LPFN_CONNECTEX ISCConnectEx; 89LPFN_ACCEPTEX ISCAcceptEx; 90LPFN_GETACCEPTEXSOCKADDRS ISCGetAcceptExSockaddrs; 91 92/* 93 * Run expensive internal consistency checks. 94 */ 95#ifdef ISC_SOCKET_CONSISTENCY_CHECKS 96#define CONSISTENT(sock) consistent(sock) 97#else 98#define CONSISTENT(sock) do {} while (0) 99#endif 100static void consistent(isc_socket_t *sock); 101 102/* 103 * Define this macro to control the behavior of connection 104 * resets on UDP sockets. See Microsoft KnowledgeBase Article Q263823 105 * for details. 106 * NOTE: This requires that Windows 2000 systems install Service Pack 2 107 * or later. 108 */ 109#ifndef SIO_UDP_CONNRESET 110#define SIO_UDP_CONNRESET _WSAIOW(IOC_VENDOR,12) 111#endif 112 113/* 114 * Some systems define the socket length argument as an int, some as size_t, 115 * some as socklen_t. This is here so it can be easily changed if needed. 116 */ 117#ifndef ISC_SOCKADDR_LEN_T 118#define ISC_SOCKADDR_LEN_T unsigned int 119#endif 120 121/* 122 * Define what the possible "soft" errors can be. These are non-fatal returns 123 * of various network related functions, like recv() and so on. 124 */ 125#define SOFT_ERROR(e) ((e) == WSAEINTR || \ 126 (e) == WSAEWOULDBLOCK || \ 127 (e) == EWOULDBLOCK || \ 128 (e) == EINTR || \ 129 (e) == EAGAIN || \ 130 (e) == 0) 131 132/* 133 * Pending errors are not really errors and should be 134 * kept separate 135 */ 136#define PENDING_ERROR(e) ((e) == WSA_IO_PENDING || (e) == 0) 137 138#define DOIO_SUCCESS 0 /* i/o ok, event sent */ 139#define DOIO_SOFT 1 /* i/o ok, soft error, no event sent */ 140#define DOIO_HARD 2 /* i/o error, event sent */ 141#define DOIO_EOF 3 /* EOF, no event sent */ 142#define DOIO_PENDING 4 /* status when i/o is in process */ 143#define DOIO_NEEDMORE 5 /* IO was processed, but we need more due to minimum */ 144 145#define DLVL(x) ISC_LOGCATEGORY_GENERAL, ISC_LOGMODULE_SOCKET, ISC_LOG_DEBUG(x) 146 147/* 148 * DLVL(90) -- Function entry/exit and other tracing. 149 * DLVL(70) -- Socket "correctness" -- including returning of events, etc. 150 * DLVL(60) -- Socket data send/receive 151 * DLVL(50) -- Event tracing, including receiving/sending completion events. 152 * DLVL(20) -- Socket creation/destruction. 153 */ 154#define TRACE_LEVEL 90 155#define CORRECTNESS_LEVEL 70 156#define IOEVENT_LEVEL 60 157#define EVENT_LEVEL 50 158#define CREATION_LEVEL 20 159 160#define TRACE DLVL(TRACE_LEVEL) 161#define CORRECTNESS DLVL(CORRECTNESS_LEVEL) 162#define IOEVENT DLVL(IOEVENT_LEVEL) 163#define EVENT DLVL(EVENT_LEVEL) 164#define CREATION DLVL(CREATION_LEVEL) 165 166typedef isc_event_t intev_t; 167 168/* 169 * Socket State 170 */ 171enum { 172 SOCK_INITIALIZED, /* Socket Initialized */ 173 SOCK_OPEN, /* Socket opened but nothing yet to do */ 174 SOCK_DATA, /* Socket sending or receiving data */ 175 SOCK_LISTEN, /* TCP Socket listening for connects */ 176 SOCK_ACCEPT, /* TCP socket is waiting to accept */ 177 SOCK_CONNECT, /* TCP Socket connecting */ 178 SOCK_CLOSED, /* Socket has been closed */ 179}; 180 181#define SOCKET_MAGIC ISC_MAGIC('I', 'O', 'i', 'o') 182#define VALID_SOCKET(t) ISC_MAGIC_VALID(t, SOCKET_MAGIC) 183 184/* 185 * IPv6 control information. If the socket is an IPv6 socket we want 186 * to collect the destination address and interface so the client can 187 * set them on outgoing packets. 188 */ 189#ifdef ISC_PLATFORM_HAVEIPV6 190#ifndef USE_CMSG 191#define USE_CMSG 1 192#endif 193#endif 194 195/* 196 * We really don't want to try and use these control messages. Win32 197 * doesn't have this mechanism before XP. 198 */ 199#undef USE_CMSG 200 201/* 202 * Message header for recvmsg and sendmsg calls. 203 * Used value-result for recvmsg, value only for sendmsg. 204 */ 205struct msghdr { 206 SOCKADDR_STORAGE to_addr; /* UDP send/recv address */ 207 int to_addr_len; /* length of the address */ 208 WSABUF *msg_iov; /* scatter/gather array */ 209 u_int msg_iovlen; /* # elements in msg_iov */ 210 void *msg_control; /* ancillary data, see below */ 211 u_int msg_controllen; /* ancillary data buffer len */ 212 int msg_totallen; /* total length of this message */ 213} msghdr; 214 215/* 216 * The size to raise the receive buffer to. 217 */ 218#define RCVBUFSIZE (32*1024) 219 220/* 221 * The number of times a send operation is repeated if the result 222 * is WSAEINTR. 223 */ 224#define NRETRIES 10 225 226struct isc_socket { 227 /* Not locked. */ 228 unsigned int magic; 229 isc_socketmgr_t *manager; 230 isc_mutex_t lock; 231 isc_sockettype_t type; 232 233 /* Pointers to scatter/gather buffers */ 234 WSABUF iov[ISC_SOCKET_MAXSCATTERGATHER]; 235 236 /* Locked by socket lock. */ 237 ISC_LINK(isc_socket_t) link; 238 unsigned int references; /* EXTERNAL references */ 239 SOCKET fd; /* file handle */ 240 int pf; /* protocol family */ 241 char name[16]; 242 void * tag; 243 244 /* 245 * Each recv() call uses this buffer. It is a per-socket receive 246 * buffer that allows us to decouple the system recv() from the 247 * recv_list done events. This means the items on the recv_list 248 * can be removed without having to cancel pending system recv() 249 * calls. It also allows us to read-ahead in some cases. 250 */ 251 struct { 252 SOCKADDR_STORAGE from_addr; // UDP send/recv address 253 int from_addr_len; // length of the address 254 char *base; // the base of the buffer 255 char *consume_position; // where to start copying data from next 256 unsigned int len; // the actual size of this buffer 257 unsigned int remaining; // the number of bytes remaining 258 } recvbuf; 259 260 ISC_LIST(isc_socketevent_t) send_list; 261 ISC_LIST(isc_socketevent_t) recv_list; 262 ISC_LIST(isc_socket_newconnev_t) accept_list; 263 isc_socket_connev_t *connect_ev; 264 265 isc_sockaddr_t address; /* remote address */ 266 267 unsigned int listener : 1, /* listener socket */ 268 connected : 1, 269 pending_connect : 1, /* connect pending */ 270 bound : 1; /* bound to local addr */ 271 unsigned int pending_iocp; /* Should equal the counters below. Debug. */ 272 unsigned int pending_recv; /* Number of outstanding recv() calls. */ 273 unsigned int pending_send; /* Number of outstanding send() calls. */ 274 unsigned int pending_accept; /* Number of outstanding accept() calls. */ 275 unsigned int state; /* Socket state. Debugging and consistency checking. */ 276 int state_lineno; /* line which last touched state */ 277}; 278 279#define _set_state(sock, _state) do { (sock)->state = (_state); (sock)->state_lineno = __LINE__; } while (0) 280 281/* 282 * Buffer structure 283 */ 284typedef struct buflist buflist_t; 285 286struct buflist { 287 void *buf; 288 unsigned int buflen; 289 ISC_LINK(buflist_t) link; 290}; 291 292/* 293 * I/O Completion ports Info structures 294 */ 295 296static HANDLE hHeapHandle = NULL; 297typedef struct IoCompletionInfo { 298 OVERLAPPED overlapped; 299 isc_socketevent_t *dev; /* send()/recv() done event */ 300 isc_socket_connev_t *cdev; /* connect() done event */ 301 isc_socket_newconnev_t *adev; /* accept() done event */ 302 void *acceptbuffer; 303 DWORD received_bytes; 304 int request_type; 305 struct msghdr messagehdr; 306 ISC_LIST(buflist_t) bufferlist; /*%< list of buffers */ 307} IoCompletionInfo; 308 309/* 310 * Define a maximum number of I/O Completion Port worker threads 311 * to handle the load on the Completion Port. The actual number 312 * used is the number of CPU's + 1. 313 */ 314#define MAX_IOCPTHREADS 20 315 316#define SOCKET_MANAGER_MAGIC ISC_MAGIC('I', 'O', 'm', 'g') 317#define VALID_MANAGER(m) ISC_MAGIC_VALID(m, SOCKET_MANAGER_MAGIC) 318 319struct isc_socketmgr { 320 /* Not locked. */ 321 unsigned int magic; 322 isc_mem_t *mctx; 323 isc_mutex_t lock; 324 isc_stats_t *stats; 325 326 /* Locked by manager lock. */ 327 ISC_LIST(isc_socket_t) socklist; 328 isc_boolean_t bShutdown; 329 isc_condition_t shutdown_ok; 330 HANDLE hIoCompletionPort; 331 int maxIOCPThreads; 332 HANDLE hIOCPThreads[MAX_IOCPTHREADS]; 333 DWORD dwIOCPThreadIds[MAX_IOCPTHREADS]; 334 335 /* 336 * Debugging. 337 * Modified by InterlockedIncrement() and InterlockedDecrement() 338 */ 339 LONG totalSockets; 340 LONG iocp_total; 341}; 342 343enum { 344 SOCKET_RECV, 345 SOCKET_SEND, 346 SOCKET_ACCEPT, 347 SOCKET_CONNECT 348}; 349 350/* 351 * send() and recv() iovec counts 352 */ 353#define MAXSCATTERGATHER_SEND (ISC_SOCKET_MAXSCATTERGATHER) 354#define MAXSCATTERGATHER_RECV (ISC_SOCKET_MAXSCATTERGATHER) 355 356static isc_threadresult_t WINAPI SocketIoThread(LPVOID ThreadContext); 357static void maybe_free_socket(isc_socket_t **, int); 358static void free_socket(isc_socket_t **, int); 359static isc_boolean_t senddone_is_active(isc_socket_t *sock, isc_socketevent_t *dev); 360static isc_boolean_t acceptdone_is_active(isc_socket_t *sock, isc_socket_newconnev_t *dev); 361static isc_boolean_t connectdone_is_active(isc_socket_t *sock, isc_socket_connev_t *dev); 362static void send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev); 363static void send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev); 364static void send_acceptdone_event(isc_socket_t *sock, isc_socket_newconnev_t **adev); 365static void send_connectdone_event(isc_socket_t *sock, isc_socket_connev_t **cdev); 366static void send_recvdone_abort(isc_socket_t *sock, isc_result_t result); 367static void queue_receive_event(isc_socket_t *sock, isc_task_t *task, isc_socketevent_t *dev); 368static void queue_receive_request(isc_socket_t *sock); 369 370/* 371 * This is used to dump the contents of the sock structure 372 * You should make sure that the sock is locked before 373 * dumping it. Since the code uses simple printf() statements 374 * it should only be used interactively. 375 */ 376void 377sock_dump(isc_socket_t *sock) { 378 isc_socketevent_t *ldev; 379 isc_socket_newconnev_t *ndev; 380 381#if 0 382 isc_sockaddr_t addr; 383 char socktext[256]; 384 385 isc_socket_getpeername(sock, &addr); 386 isc_sockaddr_format(&addr, socktext, sizeof(socktext)); 387 printf("Remote Socket: %s\n", socktext); 388 isc_socket_getsockname(sock, &addr); 389 isc_sockaddr_format(&addr, socktext, sizeof(socktext)); 390 printf("This Socket: %s\n", socktext); 391#endif 392 393 printf("\n\t\tSock Dump\n"); 394 printf("\t\tfd: %u\n", sock->fd); 395 printf("\t\treferences: %d\n", sock->references); 396 printf("\t\tpending_accept: %d\n", sock->pending_accept); 397 printf("\t\tconnecting: %d\n", sock->pending_connect); 398 printf("\t\tconnected: %d\n", sock->connected); 399 printf("\t\tbound: %d\n", sock->bound); 400 printf("\t\tpending_iocp: %d\n", sock->pending_iocp); 401 printf("\t\tsocket type: %d\n", sock->type); 402 403 printf("\n\t\tSock Recv List\n"); 404 ldev = ISC_LIST_HEAD(sock->recv_list); 405 while (ldev != NULL) { 406 printf("\t\tdev: %p\n", ldev); 407 ldev = ISC_LIST_NEXT(ldev, ev_link); 408 } 409 410 printf("\n\t\tSock Send List\n"); 411 ldev = ISC_LIST_HEAD(sock->send_list); 412 while (ldev != NULL) { 413 printf("\t\tdev: %p\n", ldev); 414 ldev = ISC_LIST_NEXT(ldev, ev_link); 415 } 416 417 printf("\n\t\tSock Accept List\n"); 418 ndev = ISC_LIST_HEAD(sock->accept_list); 419 while (ndev != NULL) { 420 printf("\t\tdev: %p\n", ldev); 421 ndev = ISC_LIST_NEXT(ndev, ev_link); 422 } 423} 424 425static void 426socket_log(int lineno, isc_socket_t *sock, isc_sockaddr_t *address, 427 isc_logcategory_t *category, isc_logmodule_t *module, int level, 428 isc_msgcat_t *msgcat, int msgset, int message, 429 const char *fmt, ...) ISC_FORMAT_PRINTF(9, 10); 430 431/* This function will add an entry to the I/O completion port 432 * that will signal the I/O thread to exit (gracefully) 433 */ 434static void 435signal_iocompletionport_exit(isc_socketmgr_t *manager) { 436 int i; 437 int errval; 438 char strbuf[ISC_STRERRORSIZE]; 439 440 REQUIRE(VALID_MANAGER(manager)); 441 for (i = 0; i < manager->maxIOCPThreads; i++) { 442 if (!PostQueuedCompletionStatus(manager->hIoCompletionPort, 443 0, 0, 0)) { 444 errval = GetLastError(); 445 isc__strerror(errval, strbuf, sizeof(strbuf)); 446 FATAL_ERROR(__FILE__, __LINE__, 447 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, 448 ISC_MSG_FAILED, 449 "Can't request service thread to exit: %s"), 450 strbuf); 451 } 452 } 453} 454 455/* 456 * Create the worker threads for the I/O Completion Port 457 */ 458void 459iocompletionport_createthreads(int total_threads, isc_socketmgr_t *manager) { 460 int errval; 461 char strbuf[ISC_STRERRORSIZE]; 462 int i; 463 464 INSIST(total_threads > 0); 465 REQUIRE(VALID_MANAGER(manager)); 466 /* 467 * We need at least one 468 */ 469 for (i = 0; i < total_threads; i++) { 470 manager->hIOCPThreads[i] = CreateThread(NULL, 0, SocketIoThread, 471 manager, 0, 472 &manager->dwIOCPThreadIds[i]); 473 if (manager->hIOCPThreads[i] == NULL) { 474 errval = GetLastError(); 475 isc__strerror(errval, strbuf, sizeof(strbuf)); 476 FATAL_ERROR(__FILE__, __LINE__, 477 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, 478 ISC_MSG_FAILED, 479 "Can't create IOCP thread: %s"), 480 strbuf); 481 exit(1); 482 } 483 } 484} 485 486/* 487 * Create/initialise the I/O completion port 488 */ 489void 490iocompletionport_init(isc_socketmgr_t *manager) { 491 int errval; 492 char strbuf[ISC_STRERRORSIZE]; 493 494 REQUIRE(VALID_MANAGER(manager)); 495 /* 496 * Create a private heap to handle the socket overlapped structure 497 * The minimum number of structures is 10, there is no maximum 498 */ 499 hHeapHandle = HeapCreate(0, 10 * sizeof(IoCompletionInfo), 0); 500 if (hHeapHandle == NULL) { 501 errval = GetLastError(); 502 isc__strerror(errval, strbuf, sizeof(strbuf)); 503 FATAL_ERROR(__FILE__, __LINE__, 504 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, 505 ISC_MSG_FAILED, 506 "HeapCreate() failed during " 507 "initialization: %s"), 508 strbuf); 509 exit(1); 510 } 511 512 manager->maxIOCPThreads = min(isc_os_ncpus() + 1, MAX_IOCPTHREADS); 513 514 /* Now Create the Completion Port */ 515 manager->hIoCompletionPort = CreateIoCompletionPort( 516 INVALID_HANDLE_VALUE, NULL, 517 0, manager->maxIOCPThreads); 518 if (manager->hIoCompletionPort == NULL) { 519 errval = GetLastError(); 520 isc__strerror(errval, strbuf, sizeof(strbuf)); 521 FATAL_ERROR(__FILE__, __LINE__, 522 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, 523 ISC_MSG_FAILED, 524 "CreateIoCompletionPort() failed " 525 "during initialization: %s"), 526 strbuf); 527 exit(1); 528 } 529 530 /* 531 * Worker threads for servicing the I/O 532 */ 533 iocompletionport_createthreads(manager->maxIOCPThreads, manager); 534} 535 536/* 537 * Associate a socket with an IO Completion Port. This allows us to queue events for it 538 * and have our worker pool of threads process them. 539 */ 540void 541iocompletionport_update(isc_socket_t *sock) { 542 HANDLE hiocp; 543 char strbuf[ISC_STRERRORSIZE]; 544 545 REQUIRE(VALID_SOCKET(sock)); 546 547 hiocp = CreateIoCompletionPort((HANDLE)sock->fd, 548 sock->manager->hIoCompletionPort, (ULONG_PTR)sock, 0); 549 550 if (hiocp == NULL) { 551 DWORD errval = GetLastError(); 552 isc__strerror(errval, strbuf, sizeof(strbuf)); 553 isc_log_iwrite(isc_lctx, 554 ISC_LOGCATEGORY_GENERAL, 555 ISC_LOGMODULE_SOCKET, ISC_LOG_ERROR, 556 isc_msgcat, ISC_MSGSET_SOCKET, 557 ISC_MSG_TOOMANYHANDLES, 558 "iocompletionport_update: failed to open" 559 " io completion port: %s", 560 strbuf); 561 562 /* XXXMLG temporary hack to make failures detected. 563 * This function should return errors to the caller, not 564 * exit here. 565 */ 566 FATAL_ERROR(__FILE__, __LINE__, 567 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, 568 ISC_MSG_FAILED, 569 "CreateIoCompletionPort() failed " 570 "during initialization: %s"), 571 strbuf); 572 exit(1); 573 } 574 575 InterlockedIncrement(&sock->manager->iocp_total); 576} 577 578/* 579 * Routine to cleanup and then close the socket. 580 * Only close the socket here if it is NOT associated 581 * with an event, otherwise the WSAWaitForMultipleEvents 582 * may fail due to the fact that the Wait should not 583 * be running while closing an event or a socket. 584 * The socket is locked before calling this function 585 */ 586void 587socket_close(isc_socket_t *sock) { 588 589 REQUIRE(sock != NULL); 590 591 if (sock->fd != INVALID_SOCKET) { 592 closesocket(sock->fd); 593 sock->fd = INVALID_SOCKET; 594 _set_state(sock, SOCK_CLOSED); 595 InterlockedDecrement(&sock->manager->totalSockets); 596 } 597} 598 599static isc_once_t initialise_once = ISC_ONCE_INIT; 600static isc_boolean_t initialised = ISC_FALSE; 601 602static void 603initialise(void) { 604 WORD wVersionRequested; 605 WSADATA wsaData; 606 int err; 607 SOCKET sock; 608 GUID GUIDConnectEx = WSAID_CONNECTEX; 609 GUID GUIDAcceptEx = WSAID_ACCEPTEX; 610 GUID GUIDGetAcceptExSockaddrs = WSAID_GETACCEPTEXSOCKADDRS; 611 DWORD dwBytes; 612 613 /* Need Winsock 2.2 or better */ 614 wVersionRequested = MAKEWORD(2, 2); 615 616 err = WSAStartup(wVersionRequested, &wsaData); 617 if (err != 0) { 618 char strbuf[ISC_STRERRORSIZE]; 619 isc__strerror(err, strbuf, sizeof(strbuf)); 620 FATAL_ERROR(__FILE__, __LINE__, "WSAStartup() %s: %s", 621 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 622 ISC_MSG_FAILED, "failed"), 623 strbuf); 624 exit(1); 625 } 626 /* 627 * The following APIs do not exist as functions in a library, but we must 628 * ask winsock for them. They are "extensions" -- but why they cannot be 629 * actual functions is beyond me. So, ask winsock for the pointers to the 630 * functions we need. 631 */ 632 sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); 633 INSIST(sock != INVALID_SOCKET); 634 err = WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER, 635 &GUIDConnectEx, sizeof(GUIDConnectEx), 636 &ISCConnectEx, sizeof(ISCConnectEx), 637 &dwBytes, NULL, NULL); 638 INSIST(err == 0); 639 640 err = WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER, 641 &GUIDAcceptEx, sizeof(GUIDAcceptEx), 642 &ISCAcceptEx, sizeof(ISCAcceptEx), 643 &dwBytes, NULL, NULL); 644 INSIST(err == 0); 645 646 err = WSAIoctl(sock, SIO_GET_EXTENSION_FUNCTION_POINTER, 647 &GUIDGetAcceptExSockaddrs, sizeof(GUIDGetAcceptExSockaddrs), 648 &ISCGetAcceptExSockaddrs, sizeof(ISCGetAcceptExSockaddrs), 649 &dwBytes, NULL, NULL); 650 INSIST(err == 0); 651 652 closesocket(sock); 653 654 initialised = ISC_TRUE; 655} 656 657/* 658 * Initialize socket services 659 */ 660void 661InitSockets(void) { 662 RUNTIME_CHECK(isc_once_do(&initialise_once, 663 initialise) == ISC_R_SUCCESS); 664 if (!initialised) 665 exit(1); 666} 667 668int 669internal_sendmsg(isc_socket_t *sock, IoCompletionInfo *lpo, 670 struct msghdr *messagehdr, int flags, int *Error) 671{ 672 int Result; 673 DWORD BytesSent; 674 DWORD Flags = flags; 675 int total_sent; 676 677 *Error = 0; 678 Result = WSASendTo(sock->fd, messagehdr->msg_iov, 679 messagehdr->msg_iovlen, &BytesSent, 680 Flags, (SOCKADDR *)&messagehdr->to_addr, 681 messagehdr->to_addr_len, (LPWSAOVERLAPPED)lpo, 682 NULL); 683 684 total_sent = (int)BytesSent; 685 686 /* Check for errors.*/ 687 if (Result == SOCKET_ERROR) { 688 *Error = WSAGetLastError(); 689 690 switch (*Error) { 691 case WSA_IO_INCOMPLETE: 692 case WSA_WAIT_IO_COMPLETION: 693 case WSA_IO_PENDING: 694 case NO_ERROR: /* Strange, but okay */ 695 sock->pending_iocp++; 696 sock->pending_send++; 697 break; 698 699 default: 700 return (-1); 701 break; 702 } 703 } else { 704 sock->pending_iocp++; 705 sock->pending_send++; 706 } 707 708 if (lpo != NULL) 709 return (0); 710 else 711 return (total_sent); 712} 713 714static void 715queue_receive_request(isc_socket_t *sock) { 716 DWORD Flags = 0; 717 DWORD NumBytes = 0; 718 int total_bytes = 0; 719 int Result; 720 int Error; 721 WSABUF iov[1]; 722 IoCompletionInfo *lpo; 723 isc_result_t isc_result; 724 725 /* 726 * If we already have a receive pending, do nothing. 727 */ 728 if (sock->pending_recv > 0) 729 return; 730 731 /* 732 * If no one is waiting, do nothing. 733 */ 734 if (ISC_LIST_EMPTY(sock->recv_list)) 735 return; 736 737 INSIST(sock->recvbuf.remaining == 0); 738 INSIST(sock->fd != INVALID_SOCKET); 739 740 iov[0].len = sock->recvbuf.len; 741 iov[0].buf = sock->recvbuf.base; 742 743 lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle, 744 HEAP_ZERO_MEMORY, 745 sizeof(IoCompletionInfo)); 746 RUNTIME_CHECK(lpo != NULL); 747 lpo->request_type = SOCKET_RECV; 748 749 sock->recvbuf.from_addr_len = sizeof(sock->recvbuf.from_addr); 750 751 Error = 0; 752 Result = WSARecvFrom((SOCKET)sock->fd, iov, 1, 753 &NumBytes, &Flags, 754 (SOCKADDR *)&sock->recvbuf.from_addr, 755 &sock->recvbuf.from_addr_len, 756 (LPWSAOVERLAPPED)lpo, NULL); 757 758 /* Check for errors. */ 759 if (Result == SOCKET_ERROR) { 760 Error = WSAGetLastError(); 761 762 switch (Error) { 763 case WSA_IO_PENDING: 764 sock->pending_iocp++; 765 sock->pending_recv++; 766 break; 767 768 default: 769 isc_result = isc__errno2result(Error); 770 if (isc_result == ISC_R_UNEXPECTED) 771 UNEXPECTED_ERROR(__FILE__, __LINE__, 772 "WSARecvFrom: Windows error code: %d, isc result %d", 773 Error, isc_result); 774 send_recvdone_abort(sock, isc_result); 775 break; 776 } 777 } else { 778 /* 779 * The recv() finished immediately, but we will still get 780 * a completion event. Rather than duplicate code, let 781 * that thread handle sending the data along its way. 782 */ 783 sock->pending_iocp++; 784 sock->pending_recv++; 785 } 786 787 socket_log(__LINE__, sock, NULL, IOEVENT, 788 isc_msgcat, ISC_MSGSET_SOCKET, 789 ISC_MSG_DOIORECV, 790 "queue_io_request: fd %d result %d error %d", 791 sock->fd, Result, Error); 792 793 CONSISTENT(sock); 794} 795 796static void 797manager_log(isc_socketmgr_t *sockmgr, isc_logcategory_t *category, 798 isc_logmodule_t *module, int level, const char *fmt, ...) 799{ 800 char msgbuf[2048]; 801 va_list ap; 802 803 if (!isc_log_wouldlog(isc_lctx, level)) 804 return; 805 806 va_start(ap, fmt); 807 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); 808 va_end(ap); 809 810 isc_log_write(isc_lctx, category, module, level, 811 "sockmgr %p: %s", sockmgr, msgbuf); 812} 813 814static void 815socket_log(int lineno, isc_socket_t *sock, isc_sockaddr_t *address, 816 isc_logcategory_t *category, isc_logmodule_t *module, int level, 817 isc_msgcat_t *msgcat, int msgset, int message, 818 const char *fmt, ...) 819{ 820 char msgbuf[2048]; 821 char peerbuf[256]; 822 va_list ap; 823 824 825 if (!isc_log_wouldlog(isc_lctx, level)) 826 return; 827 828 va_start(ap, fmt); 829 vsnprintf(msgbuf, sizeof(msgbuf), fmt, ap); 830 va_end(ap); 831 832 if (address == NULL) { 833 isc_log_iwrite(isc_lctx, category, module, level, 834 msgcat, msgset, message, 835 "socket %p line %d: %s", sock, lineno, msgbuf); 836 } else { 837 isc_sockaddr_format(address, peerbuf, sizeof(peerbuf)); 838 isc_log_iwrite(isc_lctx, category, module, level, 839 msgcat, msgset, message, 840 "socket %p line %d peer %s: %s", sock, lineno, 841 peerbuf, msgbuf); 842 } 843 844} 845 846/* 847 * Make an fd SOCKET non-blocking. 848 */ 849static isc_result_t 850make_nonblock(SOCKET fd) { 851 int ret; 852 unsigned long flags = 1; 853 char strbuf[ISC_STRERRORSIZE]; 854 855 /* Set the socket to non-blocking */ 856 ret = ioctlsocket(fd, FIONBIO, &flags); 857 858 if (ret == -1) { 859 isc__strerror(errno, strbuf, sizeof(strbuf)); 860 UNEXPECTED_ERROR(__FILE__, __LINE__, 861 "ioctlsocket(%d, FIOBIO, %d): %s", 862 fd, flags, strbuf); 863 864 return (ISC_R_UNEXPECTED); 865 } 866 867 return (ISC_R_SUCCESS); 868} 869 870/* 871 * Windows 2000 systems incorrectly cause UDP sockets using WASRecvFrom 872 * to not work correctly, returning a WSACONNRESET error when a WSASendTo 873 * fails with an "ICMP port unreachable" response and preventing the 874 * socket from using the WSARecvFrom in subsequent operations. 875 * The function below fixes this, but requires that Windows 2000 876 * Service Pack 2 or later be installed on the system. NT 4.0 877 * systems are not affected by this and work correctly. 878 * See Microsoft Knowledge Base Article Q263823 for details of this. 879 */ 880isc_result_t 881connection_reset_fix(SOCKET fd) { 882 DWORD dwBytesReturned = 0; 883 BOOL bNewBehavior = FALSE; 884 DWORD status; 885 886 if (isc_win32os_majorversion() < 5) 887 return (ISC_R_SUCCESS); /* NT 4.0 has no problem */ 888 889 /* disable bad behavior using IOCTL: SIO_UDP_CONNRESET */ 890 status = WSAIoctl(fd, SIO_UDP_CONNRESET, &bNewBehavior, 891 sizeof(bNewBehavior), NULL, 0, 892 &dwBytesReturned, NULL, NULL); 893 if (status != SOCKET_ERROR) 894 return (ISC_R_SUCCESS); 895 else { 896 UNEXPECTED_ERROR(__FILE__, __LINE__, 897 "WSAIoctl(SIO_UDP_CONNRESET, oldBehaviour) %s", 898 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 899 ISC_MSG_FAILED, "failed")); 900 return (ISC_R_UNEXPECTED); 901 } 902} 903 904/* 905 * Construct an iov array and attach it to the msghdr passed in. This is 906 * the SEND constructor, which will use the used region of the buffer 907 * (if using a buffer list) or will use the internal region (if a single 908 * buffer I/O is requested). 909 * 910 * Nothing can be NULL, and the done event must list at least one buffer 911 * on the buffer linked list for this function to be meaningful. 912 */ 913static void 914build_msghdr_send(isc_socket_t *sock, isc_socketevent_t *dev, 915 struct msghdr *msg, char *cmsg, WSABUF *iov, 916 IoCompletionInfo *lpo) 917{ 918 unsigned int iovcount; 919 isc_buffer_t *buffer; 920 buflist_t *cpbuffer; 921 isc_region_t used; 922 size_t write_count; 923 size_t skip_count; 924 925 memset(msg, 0, sizeof(*msg)); 926 927 memcpy(&msg->to_addr, &dev->address.type, dev->address.length); 928 msg->to_addr_len = dev->address.length; 929 930 buffer = ISC_LIST_HEAD(dev->bufferlist); 931 write_count = 0; 932 iovcount = 0; 933 934 /* 935 * Single buffer I/O? Skip what we've done so far in this region. 936 */ 937 if (buffer == NULL) { 938 write_count = dev->region.length - dev->n; 939 cpbuffer = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, sizeof(buflist_t)); 940 RUNTIME_CHECK(cpbuffer != NULL); 941 cpbuffer->buf = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, write_count); 942 RUNTIME_CHECK(cpbuffer->buf != NULL); 943 944 socket_log(__LINE__, sock, NULL, TRACE, 945 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK, 946 "alloc_buffer %p %d %p %d", cpbuffer, sizeof(buflist_t), 947 cpbuffer->buf, write_count); 948 949 memcpy(cpbuffer->buf,(dev->region.base + dev->n), write_count); 950 cpbuffer->buflen = write_count; 951 ISC_LIST_ENQUEUE(lpo->bufferlist, cpbuffer, link); 952 iov[0].buf = cpbuffer->buf; 953 iov[0].len = write_count; 954 iovcount = 1; 955 956 goto config; 957 } 958 959 /* 960 * Multibuffer I/O. 961 * Skip the data in the buffer list that we have already written. 962 */ 963 skip_count = dev->n; 964 while (buffer != NULL) { 965 REQUIRE(ISC_BUFFER_VALID(buffer)); 966 if (skip_count < isc_buffer_usedlength(buffer)) 967 break; 968 skip_count -= isc_buffer_usedlength(buffer); 969 buffer = ISC_LIST_NEXT(buffer, link); 970 } 971 972 while (buffer != NULL) { 973 INSIST(iovcount < MAXSCATTERGATHER_SEND); 974 975 isc_buffer_usedregion(buffer, &used); 976 977 if (used.length > 0) { 978 int uselen = used.length - skip_count; 979 cpbuffer = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, sizeof(buflist_t)); 980 RUNTIME_CHECK(cpbuffer != NULL); 981 cpbuffer->buf = HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, uselen); 982 RUNTIME_CHECK(cpbuffer->buf != NULL); 983 984 socket_log(__LINE__, sock, NULL, TRACE, 985 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK, 986 "alloc_buffer %p %d %p %d", cpbuffer, sizeof(buflist_t), 987 cpbuffer->buf, write_count); 988 989 memcpy(cpbuffer->buf,(used.base + skip_count), uselen); 990 cpbuffer->buflen = uselen; 991 iov[iovcount].buf = cpbuffer->buf; 992 iov[iovcount].len = used.length - skip_count; 993 write_count += uselen; 994 skip_count = 0; 995 iovcount++; 996 } 997 buffer = ISC_LIST_NEXT(buffer, link); 998 } 999 1000 INSIST(skip_count == 0); 1001 1002 config: 1003 msg->msg_iov = iov; 1004 msg->msg_iovlen = iovcount; 1005 msg->msg_totallen = write_count; 1006} 1007 1008static void 1009set_dev_address(isc_sockaddr_t *address, isc_socket_t *sock, 1010 isc_socketevent_t *dev) 1011{ 1012 if (sock->type == isc_sockettype_udp) { 1013 if (address != NULL) 1014 dev->address = *address; 1015 else 1016 dev->address = sock->address; 1017 } else if (sock->type == isc_sockettype_tcp) { 1018 INSIST(address == NULL); 1019 dev->address = sock->address; 1020 } 1021} 1022 1023static void 1024destroy_socketevent(isc_event_t *event) { 1025 isc_socketevent_t *ev = (isc_socketevent_t *)event; 1026 1027 INSIST(ISC_LIST_EMPTY(ev->bufferlist)); 1028 1029 (ev->destroy)(event); 1030} 1031 1032static isc_socketevent_t * 1033allocate_socketevent(isc_socket_t *sock, isc_eventtype_t eventtype, 1034 isc_taskaction_t action, const void *arg) 1035{ 1036 isc_socketevent_t *ev; 1037 1038 ev = (isc_socketevent_t *)isc_event_allocate(sock->manager->mctx, 1039 sock, eventtype, 1040 action, arg, 1041 sizeof(*ev)); 1042 if (ev == NULL) 1043 return (NULL); 1044 1045 ev->result = ISC_R_IOERROR; // XXXMLG temporary change to detect failure to set 1046 ISC_LINK_INIT(ev, ev_link); 1047 ISC_LIST_INIT(ev->bufferlist); 1048 ev->region.base = NULL; 1049 ev->n = 0; 1050 ev->offset = 0; 1051 ev->attributes = 0; 1052 ev->destroy = ev->ev_destroy; 1053 ev->ev_destroy = destroy_socketevent; 1054 1055 return (ev); 1056} 1057 1058#if defined(ISC_SOCKET_DEBUG) 1059static void 1060dump_msg(struct msghdr *msg, isc_socket_t *sock) { 1061 unsigned int i; 1062 1063 printf("MSGHDR %p, Socket #: %u\n", msg, sock->fd); 1064 printf("\tname %p, namelen %d\n", msg->msg_name, msg->msg_namelen); 1065 printf("\tiov %p, iovlen %d\n", msg->msg_iov, msg->msg_iovlen); 1066 for (i = 0; i < (unsigned int)msg->msg_iovlen; i++) 1067 printf("\t\t%d\tbase %p, len %d\n", i, 1068 msg->msg_iov[i].buf, 1069 msg->msg_iov[i].len); 1070} 1071#endif 1072 1073/* 1074 * map the error code 1075 */ 1076int 1077map_socket_error(isc_socket_t *sock, int windows_errno, int *isc_errno, 1078 char *errorstring, size_t bufsize) { 1079 1080 int doreturn; 1081 switch (windows_errno) { 1082 case WSAECONNREFUSED: 1083 *isc_errno = ISC_R_CONNREFUSED; 1084 if (sock->connected) 1085 doreturn = DOIO_HARD; 1086 else 1087 doreturn = DOIO_SOFT; 1088 break; 1089 case WSAENETUNREACH: 1090 case ERROR_NETWORK_UNREACHABLE: 1091 *isc_errno = ISC_R_NETUNREACH; 1092 if (sock->connected) 1093 doreturn = DOIO_HARD; 1094 else 1095 doreturn = DOIO_SOFT; 1096 break; 1097 case ERROR_PORT_UNREACHABLE: 1098 case ERROR_HOST_UNREACHABLE: 1099 case WSAEHOSTUNREACH: 1100 *isc_errno = ISC_R_HOSTUNREACH; 1101 if (sock->connected) 1102 doreturn = DOIO_HARD; 1103 else 1104 doreturn = DOIO_SOFT; 1105 break; 1106 case WSAENETDOWN: 1107 *isc_errno = ISC_R_NETDOWN; 1108 if (sock->connected) 1109 doreturn = DOIO_HARD; 1110 else 1111 doreturn = DOIO_SOFT; 1112 break; 1113 case WSAEHOSTDOWN: 1114 *isc_errno = ISC_R_HOSTDOWN; 1115 if (sock->connected) 1116 doreturn = DOIO_HARD; 1117 else 1118 doreturn = DOIO_SOFT; 1119 break; 1120 case WSAEACCES: 1121 *isc_errno = ISC_R_NOPERM; 1122 if (sock->connected) 1123 doreturn = DOIO_HARD; 1124 else 1125 doreturn = DOIO_SOFT; 1126 break; 1127 case WSAECONNRESET: 1128 case WSAENETRESET: 1129 case WSAECONNABORTED: 1130 case WSAEDISCON: 1131 *isc_errno = ISC_R_CONNECTIONRESET; 1132 if (sock->connected) 1133 doreturn = DOIO_HARD; 1134 else 1135 doreturn = DOIO_SOFT; 1136 break; 1137 case WSAENOTCONN: 1138 *isc_errno = ISC_R_NOTCONNECTED; 1139 if (sock->connected) 1140 doreturn = DOIO_HARD; 1141 else 1142 doreturn = DOIO_SOFT; 1143 break; 1144 case ERROR_OPERATION_ABORTED: 1145 case ERROR_CONNECTION_ABORTED: 1146 case ERROR_REQUEST_ABORTED: 1147 *isc_errno = ISC_R_CONNECTIONRESET; 1148 doreturn = DOIO_HARD; 1149 break; 1150 case WSAENOBUFS: 1151 *isc_errno = ISC_R_NORESOURCES; 1152 doreturn = DOIO_HARD; 1153 break; 1154 case WSAEAFNOSUPPORT: 1155 *isc_errno = ISC_R_FAMILYNOSUPPORT; 1156 doreturn = DOIO_HARD; 1157 break; 1158 case WSAEADDRNOTAVAIL: 1159 *isc_errno = ISC_R_ADDRNOTAVAIL; 1160 doreturn = DOIO_HARD; 1161 break; 1162 case WSAEDESTADDRREQ: 1163 *isc_errno = ISC_R_BADADDRESSFORM; 1164 doreturn = DOIO_HARD; 1165 break; 1166 case ERROR_NETNAME_DELETED: 1167 *isc_errno = ISC_R_NETDOWN; 1168 doreturn = DOIO_HARD; 1169 break; 1170 default: 1171 *isc_errno = ISC_R_IOERROR; 1172 doreturn = DOIO_HARD; 1173 break; 1174 } 1175 if (doreturn == DOIO_HARD) { 1176 isc__strerror(windows_errno, errorstring, bufsize); 1177 } 1178 return (doreturn); 1179} 1180 1181static void 1182fill_recv(isc_socket_t *sock, isc_socketevent_t *dev) { 1183 isc_region_t r; 1184 int copylen; 1185 isc_buffer_t *buffer; 1186 1187 INSIST(dev->n < dev->minimum); 1188 INSIST(sock->recvbuf.remaining > 0); 1189 INSIST(sock->pending_recv == 0); 1190 1191 if (sock->type == isc_sockettype_udp) { 1192 dev->address.length = sock->recvbuf.from_addr_len; 1193 memcpy(&dev->address.type, &sock->recvbuf.from_addr, 1194 sock->recvbuf.from_addr_len); 1195 if (isc_sockaddr_getport(&dev->address) == 0) { 1196 if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) { 1197 socket_log(__LINE__, sock, &dev->address, IOEVENT, 1198 isc_msgcat, ISC_MSGSET_SOCKET, 1199 ISC_MSG_ZEROPORT, 1200 "dropping source port zero packet"); 1201 } 1202 sock->recvbuf.remaining = 0; 1203 return; 1204 } 1205 } else if (sock->type == isc_sockettype_tcp) { 1206 dev->address = sock->address; 1207 } 1208 1209 /* 1210 * Run through the list of buffers we were given, and find the 1211 * first one with space. Once it is found, loop through, filling 1212 * the buffers as much as possible. 1213 */ 1214 buffer = ISC_LIST_HEAD(dev->bufferlist); 1215 if (buffer != NULL) { // Multi-buffer receive 1216 while (buffer != NULL && sock->recvbuf.remaining > 0) { 1217 REQUIRE(ISC_BUFFER_VALID(buffer)); 1218 if (isc_buffer_availablelength(buffer) > 0) { 1219 isc_buffer_availableregion(buffer, &r); 1220 copylen = min(r.length, sock->recvbuf.remaining); 1221 memcpy(r.base, sock->recvbuf.consume_position, copylen); 1222 sock->recvbuf.consume_position += copylen; 1223 sock->recvbuf.remaining -= copylen; 1224 isc_buffer_add(buffer, copylen); 1225 dev->n += copylen; 1226 } 1227 buffer = ISC_LIST_NEXT(buffer, link); 1228 } 1229 } else { // Single-buffer receive 1230 copylen = min(dev->region.length - dev->n, sock->recvbuf.remaining); 1231 memcpy(dev->region.base + dev->n, sock->recvbuf.consume_position, copylen); 1232 sock->recvbuf.consume_position += copylen; 1233 sock->recvbuf.remaining -= copylen; 1234 dev->n += copylen; 1235 } 1236 1237 /* 1238 * UDP receives are all-consuming. That is, if we have 4k worth of 1239 * data in our receive buffer, and the caller only gave us 1240 * 1k of space, we will toss the remaining 3k of data. TCP 1241 * will keep the extra data around and use it for later requests. 1242 */ 1243 if (sock->type == isc_sockettype_udp) 1244 sock->recvbuf.remaining = 0; 1245} 1246 1247/* 1248 * Copy out as much data from the internal buffer to done events. 1249 * As each done event is filled, send it along its way. 1250 */ 1251static void 1252completeio_recv(isc_socket_t *sock) 1253{ 1254 isc_socketevent_t *dev; 1255 1256 /* 1257 * If we are in the process of filling our buffer, we cannot 1258 * touch it yet, so don't. 1259 */ 1260 if (sock->pending_recv > 0) 1261 return; 1262 1263 while (sock->recvbuf.remaining > 0 && !ISC_LIST_EMPTY(sock->recv_list)) { 1264 dev = ISC_LIST_HEAD(sock->recv_list); 1265 1266 /* 1267 * See if we have sufficient data in our receive buffer 1268 * to handle this. If we do, copy out the data. 1269 */ 1270 fill_recv(sock, dev); 1271 1272 /* 1273 * Did we satisfy it? 1274 */ 1275 if (dev->n >= dev->minimum) { 1276 dev->result = ISC_R_SUCCESS; 1277 send_recvdone_event(sock, &dev); 1278 } 1279 } 1280} 1281 1282/* 1283 * Returns: 1284 * DOIO_SUCCESS The operation succeeded. dev->result contains 1285 * ISC_R_SUCCESS. 1286 * 1287 * DOIO_HARD A hard or unexpected I/O error was encountered. 1288 * dev->result contains the appropriate error. 1289 * 1290 * DOIO_SOFT A soft I/O error was encountered. No senddone 1291 * event was sent. The operation should be retried. 1292 * 1293 * No other return values are possible. 1294 */ 1295static int 1296completeio_send(isc_socket_t *sock, isc_socketevent_t *dev, 1297 struct msghdr *messagehdr, int cc, int send_errno) 1298{ 1299 char addrbuf[ISC_SOCKADDR_FORMATSIZE]; 1300 char strbuf[ISC_STRERRORSIZE]; 1301 1302 if (send_errno != 0) { 1303 if (SOFT_ERROR(send_errno)) 1304 return (DOIO_SOFT); 1305 1306 return (map_socket_error(sock, send_errno, &dev->result, 1307 strbuf, sizeof(strbuf))); 1308 1309 /* 1310 * The other error types depend on whether or not the 1311 * socket is UDP or TCP. If it is UDP, some errors 1312 * that we expect to be fatal under TCP are merely 1313 * annoying, and are really soft errors. 1314 * 1315 * However, these soft errors are still returned as 1316 * a status. 1317 */ 1318 isc_sockaddr_format(&dev->address, addrbuf, sizeof(addrbuf)); 1319 isc__strerror(send_errno, strbuf, sizeof(strbuf)); 1320 UNEXPECTED_ERROR(__FILE__, __LINE__, "completeio_send: %s: %s", 1321 addrbuf, strbuf); 1322 dev->result = isc__errno2result(send_errno); 1323 return (DOIO_HARD); 1324 } 1325 1326 /* 1327 * If we write less than we expected, update counters, poke. 1328 */ 1329 dev->n += cc; 1330 if (cc != messagehdr->msg_totallen) 1331 return (DOIO_SOFT); 1332 1333 /* 1334 * Exactly what we wanted to write. We're done with this 1335 * entry. Post its completion event. 1336 */ 1337 dev->result = ISC_R_SUCCESS; 1338 return (DOIO_SUCCESS); 1339} 1340 1341static int 1342startio_send(isc_socket_t *sock, isc_socketevent_t *dev, int *nbytes, 1343 int *send_errno) 1344{ 1345 char *cmsg = NULL; 1346 char strbuf[ISC_STRERRORSIZE]; 1347 IoCompletionInfo *lpo; 1348 int status; 1349 struct msghdr *msghdr; 1350 1351 lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle, 1352 HEAP_ZERO_MEMORY, 1353 sizeof(IoCompletionInfo)); 1354 RUNTIME_CHECK(lpo != NULL); 1355 lpo->request_type = SOCKET_SEND; 1356 lpo->dev = dev; 1357 msghdr = &lpo->messagehdr; 1358 memset(msghdr, 0, sizeof(struct msghdr)); 1359 ISC_LIST_INIT(lpo->bufferlist); 1360 1361 build_msghdr_send(sock, dev, msghdr, cmsg, sock->iov, lpo); 1362 1363 *nbytes = internal_sendmsg(sock, lpo, msghdr, 0, send_errno); 1364 1365 if (*nbytes < 0) { 1366 /* 1367 * I/O has been initiated 1368 * completion will be through the completion port 1369 */ 1370 if (PENDING_ERROR(*send_errno)) { 1371 status = DOIO_PENDING; 1372 goto done; 1373 } 1374 1375 if (SOFT_ERROR(*send_errno)) { 1376 status = DOIO_SOFT; 1377 goto done; 1378 } 1379 1380 /* 1381 * If we got this far then something is wrong 1382 */ 1383 if (isc_log_wouldlog(isc_lctx, IOEVENT_LEVEL)) { 1384 isc__strerror(*send_errno, strbuf, sizeof(strbuf)); 1385 socket_log(__LINE__, sock, NULL, IOEVENT, 1386 isc_msgcat, ISC_MSGSET_SOCKET, 1387 ISC_MSG_INTERNALSEND, 1388 "startio_send: internal_sendmsg(%d) %d " 1389 "bytes, err %d/%s", 1390 sock->fd, *nbytes, *send_errno, strbuf); 1391 } 1392 goto done; 1393 } 1394 dev->result = ISC_R_SUCCESS; 1395 status = DOIO_SOFT; 1396 done: 1397 _set_state(sock, SOCK_DATA); 1398 return (status); 1399} 1400 1401static isc_result_t 1402allocate_socket(isc_socketmgr_t *manager, isc_sockettype_t type, 1403 isc_socket_t **socketp) { 1404 isc_socket_t *sock; 1405 isc_result_t result; 1406 1407 sock = isc_mem_get(manager->mctx, sizeof(*sock)); 1408 1409 if (sock == NULL) 1410 return (ISC_R_NOMEMORY); 1411 1412 sock->magic = 0; 1413 sock->references = 0; 1414 1415 sock->manager = manager; 1416 sock->type = type; 1417 sock->fd = INVALID_SOCKET; 1418 1419 ISC_LINK_INIT(sock, link); 1420 1421 /* 1422 * set up list of readers and writers to be initially empty 1423 */ 1424 ISC_LIST_INIT(sock->recv_list); 1425 ISC_LIST_INIT(sock->send_list); 1426 ISC_LIST_INIT(sock->accept_list); 1427 sock->connect_ev = NULL; 1428 sock->pending_accept = 0; 1429 sock->pending_recv = 0; 1430 sock->pending_send = 0; 1431 sock->pending_iocp = 0; 1432 sock->listener = 0; 1433 sock->connected = 0; 1434 sock->pending_connect = 0; 1435 sock->bound = 0; 1436 memset(sock->name, 0, sizeof(sock->name)); // zero the name field 1437 _set_state(sock, SOCK_INITIALIZED); 1438 1439 sock->recvbuf.len = 65536; 1440 sock->recvbuf.consume_position = sock->recvbuf.base; 1441 sock->recvbuf.remaining = 0; 1442 sock->recvbuf.base = isc_mem_get(manager->mctx, sock->recvbuf.len); // max buffer size 1443 if (sock->recvbuf.base == NULL) { 1444 sock->magic = 0; 1445 goto error; 1446 } 1447 1448 /* 1449 * initialize the lock 1450 */ 1451 result = isc_mutex_init(&sock->lock); 1452 if (result != ISC_R_SUCCESS) { 1453 sock->magic = 0; 1454 isc_mem_put(manager->mctx, sock->recvbuf.base, sock->recvbuf.len); 1455 sock->recvbuf.base = NULL; 1456 goto error; 1457 } 1458 1459 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, 1460 "allocated"); 1461 1462 sock->magic = SOCKET_MAGIC; 1463 *socketp = sock; 1464 1465 return (ISC_R_SUCCESS); 1466 1467 error: 1468 isc_mem_put(manager->mctx, sock, sizeof(*sock)); 1469 1470 return (result); 1471} 1472 1473/* 1474 * Verify that the socket state is consistent. 1475 */ 1476static void 1477consistent(isc_socket_t *sock) { 1478 1479 isc_socketevent_t *dev; 1480 isc_socket_newconnev_t *nev; 1481 unsigned int count; 1482 char *crash_reason; 1483 isc_boolean_t crash = ISC_FALSE; 1484 1485 REQUIRE(sock->pending_iocp == sock->pending_recv + sock->pending_send 1486 + sock->pending_accept + sock->pending_connect); 1487 1488 dev = ISC_LIST_HEAD(sock->send_list); 1489 count = 0; 1490 while (dev != NULL) { 1491 count++; 1492 dev = ISC_LIST_NEXT(dev, ev_link); 1493 } 1494 if (count > sock->pending_send) { 1495 crash = ISC_TRUE; 1496 crash_reason = "send_list > sock->pending_send"; 1497 } 1498 1499 nev = ISC_LIST_HEAD(sock->accept_list); 1500 count = 0; 1501 while (nev != NULL) { 1502 count++; 1503 nev = ISC_LIST_NEXT(nev, ev_link); 1504 } 1505 if (count > sock->pending_accept) { 1506 crash = ISC_TRUE; 1507 crash_reason = "send_list > sock->pending_send"; 1508 } 1509 1510 if (crash) { 1511 socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET, 1512 ISC_MSG_DESTROYING, "SOCKET INCONSISTENT: %s", 1513 crash_reason); 1514 sock_dump(sock); 1515 INSIST(crash == ISC_FALSE); 1516 } 1517} 1518 1519/* 1520 * Maybe free the socket. 1521 * 1522 * This function will verify tht the socket is no longer in use in any way, 1523 * either internally or externally. This is the only place where this 1524 * check is to be made; if some bit of code believes that IT is done with 1525 * the socket (e.g., some reference counter reaches zero), it should call 1526 * this function. 1527 * 1528 * When calling this function, the socket must be locked, and the manager 1529 * must be unlocked. 1530 * 1531 * When this function returns, *socketp will be NULL. No tricks to try 1532 * to hold on to this pointer are allowed. 1533 */ 1534static void 1535maybe_free_socket(isc_socket_t **socketp, int lineno) { 1536 isc_socket_t *sock = *socketp; 1537 *socketp = NULL; 1538 1539 INSIST(VALID_SOCKET(sock)); 1540 CONSISTENT(sock); 1541 1542 if (sock->pending_iocp > 0 1543 || sock->pending_recv > 0 1544 || sock->pending_send > 0 1545 || sock->pending_accept > 0 1546 || sock->references > 0 1547 || sock->pending_connect == 1 1548 || !ISC_LIST_EMPTY(sock->recv_list) 1549 || !ISC_LIST_EMPTY(sock->send_list) 1550 || !ISC_LIST_EMPTY(sock->accept_list) 1551 || sock->fd != INVALID_SOCKET) { 1552 UNLOCK(&sock->lock); 1553 return; 1554 } 1555 UNLOCK(&sock->lock); 1556 1557 free_socket(&sock, lineno); 1558} 1559 1560void 1561free_socket(isc_socket_t **sockp, int lineno) { 1562 isc_socketmgr_t *manager; 1563 isc_socket_t *sock = *sockp; 1564 *sockp = NULL; 1565 1566 manager = sock->manager; 1567 1568 /* 1569 * Seems we can free the socket after all. 1570 */ 1571 manager = sock->manager; 1572 socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET, 1573 ISC_MSG_DESTROYING, "freeing socket line %d fd %d lock %p semaphore %p", 1574 lineno, sock->fd, &sock->lock, sock->lock.LockSemaphore); 1575 1576 sock->magic = 0; 1577 DESTROYLOCK(&sock->lock); 1578 1579 if (sock->recvbuf.base != NULL) 1580 isc_mem_put(manager->mctx, sock->recvbuf.base, sock->recvbuf.len); 1581 1582 LOCK(&manager->lock); 1583 if (ISC_LINK_LINKED(sock, link)) 1584 ISC_LIST_UNLINK(manager->socklist, sock, link); 1585 isc_mem_put(manager->mctx, sock, sizeof(*sock)); 1586 1587 if (ISC_LIST_EMPTY(manager->socklist)) 1588 SIGNAL(&manager->shutdown_ok); 1589 UNLOCK(&manager->lock); 1590} 1591 1592/* 1593 * Create a new 'type' socket managed by 'manager'. Events 1594 * will be posted to 'task' and when dispatched 'action' will be 1595 * called with 'arg' as the arg value. The new socket is returned 1596 * in 'socketp'. 1597 */ 1598isc_result_t 1599isc_socket_create(isc_socketmgr_t *manager, int pf, isc_sockettype_t type, 1600 isc_socket_t **socketp) { 1601 isc_socket_t *sock = NULL; 1602 isc_result_t result; 1603#if defined(USE_CMSG) 1604 int on = 1; 1605#endif 1606#if defined(SO_RCVBUF) 1607 ISC_SOCKADDR_LEN_T optlen; 1608 int size; 1609#endif 1610 int socket_errno; 1611 char strbuf[ISC_STRERRORSIZE]; 1612 1613 REQUIRE(VALID_MANAGER(manager)); 1614 REQUIRE(socketp != NULL && *socketp == NULL); 1615 REQUIRE(type != isc_sockettype_fdwatch); 1616 1617 result = allocate_socket(manager, type, &sock); 1618 if (result != ISC_R_SUCCESS) 1619 return (result); 1620 1621 sock->pf = pf; 1622 switch (type) { 1623 case isc_sockettype_udp: 1624 sock->fd = socket(pf, SOCK_DGRAM, IPPROTO_UDP); 1625 if (sock->fd != INVALID_SOCKET) { 1626 result = connection_reset_fix(sock->fd); 1627 if (result != ISC_R_SUCCESS) { 1628 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, 1629 "closed %d %d %d con_reset_fix_failed", 1630 sock->pending_recv, sock->pending_send, 1631 sock->references); 1632 closesocket(sock->fd); 1633 _set_state(sock, SOCK_CLOSED); 1634 sock->fd = INVALID_SOCKET; 1635 free_socket(&sock, __LINE__); 1636 return (result); 1637 } 1638 } 1639 break; 1640 case isc_sockettype_tcp: 1641 sock->fd = socket(pf, SOCK_STREAM, IPPROTO_TCP); 1642 break; 1643 } 1644 1645 if (sock->fd == INVALID_SOCKET) { 1646 socket_errno = WSAGetLastError(); 1647 free_socket(&sock, __LINE__); 1648 1649 switch (socket_errno) { 1650 case WSAEMFILE: 1651 case WSAENOBUFS: 1652 return (ISC_R_NORESOURCES); 1653 1654 case WSAEPROTONOSUPPORT: 1655 case WSAEPFNOSUPPORT: 1656 case WSAEAFNOSUPPORT: 1657 return (ISC_R_FAMILYNOSUPPORT); 1658 1659 default: 1660 isc__strerror(socket_errno, strbuf, sizeof(strbuf)); 1661 UNEXPECTED_ERROR(__FILE__, __LINE__, 1662 "socket() %s: %s", 1663 isc_msgcat_get(isc_msgcat, 1664 ISC_MSGSET_GENERAL, 1665 ISC_MSG_FAILED, 1666 "failed"), 1667 strbuf); 1668 return (ISC_R_UNEXPECTED); 1669 } 1670 } 1671 1672 result = make_nonblock(sock->fd); 1673 if (result != ISC_R_SUCCESS) { 1674 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, 1675 "closed %d %d %d make_nonblock_failed", 1676 sock->pending_recv, sock->pending_send, 1677 sock->references); 1678 closesocket(sock->fd); 1679 sock->fd = INVALID_SOCKET; 1680 free_socket(&sock, __LINE__); 1681 return (result); 1682 } 1683 1684 1685#if defined(USE_CMSG) || defined(SO_RCVBUF) 1686 if (type == isc_sockettype_udp) { 1687 1688#if defined(USE_CMSG) 1689#if defined(ISC_PLATFORM_HAVEIPV6) 1690#ifdef IPV6_RECVPKTINFO 1691 /* 2292bis */ 1692 if ((pf == AF_INET6) 1693 && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_RECVPKTINFO, 1694 (void *)&on, sizeof(on)) < 0)) { 1695 isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf)); 1696 UNEXPECTED_ERROR(__FILE__, __LINE__, 1697 "setsockopt(%d, IPV6_RECVPKTINFO) " 1698 "%s: %s", sock->fd, 1699 isc_msgcat_get(isc_msgcat, 1700 ISC_MSGSET_GENERAL, 1701 ISC_MSG_FAILED, 1702 "failed"), 1703 strbuf); 1704 } 1705#else 1706 /* 2292 */ 1707 if ((pf == AF_INET6) 1708 && (setsockopt(sock->fd, IPPROTO_IPV6, IPV6_PKTINFO, 1709 (void *)&on, sizeof(on)) < 0)) { 1710 isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf)); 1711 UNEXPECTED_ERROR(__FILE__, __LINE__, 1712 "setsockopt(%d, IPV6_PKTINFO) %s: %s", 1713 sock->fd, 1714 isc_msgcat_get(isc_msgcat, 1715 ISC_MSGSET_GENERAL, 1716 ISC_MSG_FAILED, 1717 "failed"), 1718 strbuf); 1719 } 1720#endif /* IPV6_RECVPKTINFO */ 1721#ifdef IPV6_USE_MIN_MTU /*2292bis, not too common yet*/ 1722 /* use minimum MTU */ 1723 if (pf == AF_INET6) { 1724 (void)setsockopt(sock->fd, IPPROTO_IPV6, 1725 IPV6_USE_MIN_MTU, 1726 (void *)&on, sizeof(on)); 1727 } 1728#endif 1729#endif /* ISC_PLATFORM_HAVEIPV6 */ 1730#endif /* defined(USE_CMSG) */ 1731 1732#if defined(SO_RCVBUF) 1733 optlen = sizeof(size); 1734 if (getsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF, 1735 (void *)&size, &optlen) >= 0 && 1736 size < RCVBUFSIZE) { 1737 size = RCVBUFSIZE; 1738 (void)setsockopt(sock->fd, SOL_SOCKET, SO_RCVBUF, 1739 (void *)&size, sizeof(size)); 1740 } 1741#endif 1742 1743 } 1744#endif /* defined(USE_CMSG) || defined(SO_RCVBUF) */ 1745 1746 _set_state(sock, SOCK_OPEN); 1747 sock->references = 1; 1748 *socketp = sock; 1749 1750 iocompletionport_update(sock); 1751 1752 /* 1753 * Note we don't have to lock the socket like we normally would because 1754 * there are no external references to it yet. 1755 */ 1756 LOCK(&manager->lock); 1757 ISC_LIST_APPEND(manager->socklist, sock, link); 1758 InterlockedIncrement(&manager->totalSockets); 1759 UNLOCK(&manager->lock); 1760 1761 socket_log(__LINE__, sock, NULL, CREATION, isc_msgcat, ISC_MSGSET_SOCKET, 1762 ISC_MSG_CREATED, "created %u type %u", sock->fd, type); 1763 1764 return (ISC_R_SUCCESS); 1765} 1766 1767isc_result_t 1768isc_socket_open(isc_socket_t *sock) { 1769 REQUIRE(VALID_SOCKET(sock)); 1770 REQUIRE(sock->type != isc_sockettype_fdwatch); 1771 1772 return (ISC_R_NOTIMPLEMENTED); 1773} 1774 1775/* 1776 * Attach to a socket. Caller must explicitly detach when it is done. 1777 */ 1778void 1779isc_socket_attach(isc_socket_t *sock, isc_socket_t **socketp) { 1780 REQUIRE(VALID_SOCKET(sock)); 1781 REQUIRE(socketp != NULL && *socketp == NULL); 1782 1783 LOCK(&sock->lock); 1784 CONSISTENT(sock); 1785 sock->references++; 1786 UNLOCK(&sock->lock); 1787 1788 *socketp = sock; 1789} 1790 1791/* 1792 * Dereference a socket. If this is the last reference to it, clean things 1793 * up by destroying the socket. 1794 */ 1795void 1796isc_socket_detach(isc_socket_t **socketp) { 1797 isc_socket_t *sock; 1798 isc_boolean_t kill_socket = ISC_FALSE; 1799 1800 REQUIRE(socketp != NULL); 1801 sock = *socketp; 1802 REQUIRE(VALID_SOCKET(sock)); 1803 REQUIRE(sock->type != isc_sockettype_fdwatch); 1804 1805 LOCK(&sock->lock); 1806 CONSISTENT(sock); 1807 REQUIRE(sock->references > 0); 1808 sock->references--; 1809 1810 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, 1811 "detach_socket %d %d %d", 1812 sock->pending_recv, sock->pending_send, 1813 sock->references); 1814 1815 if (sock->references == 0 && sock->fd != INVALID_SOCKET) { 1816 closesocket(sock->fd); 1817 sock->fd = INVALID_SOCKET; 1818 _set_state(sock, SOCK_CLOSED); 1819 } 1820 1821 maybe_free_socket(&sock, __LINE__); 1822 1823 *socketp = NULL; 1824} 1825 1826isc_result_t 1827isc_socket_close(isc_socket_t *sock) { 1828 REQUIRE(VALID_SOCKET(sock)); 1829 REQUIRE(sock->type != isc_sockettype_fdwatch); 1830 1831 return (ISC_R_NOTIMPLEMENTED); 1832} 1833 1834/* 1835 * Dequeue an item off the given socket's read queue, set the result code 1836 * in the done event to the one provided, and send it to the task it was 1837 * destined for. 1838 * 1839 * If the event to be sent is on a list, remove it before sending. If 1840 * asked to, send and detach from the task as well. 1841 * 1842 * Caller must have the socket locked if the event is attached to the socket. 1843 */ 1844static void 1845send_recvdone_event(isc_socket_t *sock, isc_socketevent_t **dev) { 1846 isc_task_t *task; 1847 1848 task = (*dev)->ev_sender; 1849 (*dev)->ev_sender = sock; 1850 1851 if (ISC_LINK_LINKED(*dev, ev_link)) 1852 ISC_LIST_DEQUEUE(sock->recv_list, *dev, ev_link); 1853 1854 if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED) 1855 == ISC_SOCKEVENTATTR_ATTACHED) 1856 isc_task_sendanddetach(&task, (isc_event_t **)dev); 1857 else 1858 isc_task_send(task, (isc_event_t **)dev); 1859 1860 CONSISTENT(sock); 1861} 1862 1863/* 1864 * See comments for send_recvdone_event() above. 1865 */ 1866static void 1867send_senddone_event(isc_socket_t *sock, isc_socketevent_t **dev) { 1868 isc_task_t *task; 1869 1870 INSIST(dev != NULL && *dev != NULL); 1871 1872 task = (*dev)->ev_sender; 1873 (*dev)->ev_sender = sock; 1874 1875 if (ISC_LINK_LINKED(*dev, ev_link)) 1876 ISC_LIST_DEQUEUE(sock->send_list, *dev, ev_link); 1877 1878 if (((*dev)->attributes & ISC_SOCKEVENTATTR_ATTACHED) 1879 == ISC_SOCKEVENTATTR_ATTACHED) 1880 isc_task_sendanddetach(&task, (isc_event_t **)dev); 1881 else 1882 isc_task_send(task, (isc_event_t **)dev); 1883 1884 CONSISTENT(sock); 1885} 1886 1887/* 1888 * See comments for send_recvdone_event() above. 1889 */ 1890static void 1891send_acceptdone_event(isc_socket_t *sock, isc_socket_newconnev_t **adev) { 1892 isc_task_t *task; 1893 1894 INSIST(adev != NULL && *adev != NULL); 1895 1896 task = (*adev)->ev_sender; 1897 (*adev)->ev_sender = sock; 1898 1899 if (ISC_LINK_LINKED(*adev, ev_link)) 1900 ISC_LIST_DEQUEUE(sock->accept_list, *adev, ev_link); 1901 1902 isc_task_sendanddetach(&task, (isc_event_t **)adev); 1903 1904 CONSISTENT(sock); 1905} 1906 1907/* 1908 * See comments for send_recvdone_event() above. 1909 */ 1910static void 1911send_connectdone_event(isc_socket_t *sock, isc_socket_connev_t **cdev) { 1912 isc_task_t *task; 1913 1914 INSIST(cdev != NULL && *cdev != NULL); 1915 1916 task = (*cdev)->ev_sender; 1917 (*cdev)->ev_sender = sock; 1918 1919 sock->connect_ev = NULL; 1920 1921 isc_task_sendanddetach(&task, (isc_event_t **)cdev); 1922 1923 CONSISTENT(sock); 1924} 1925 1926/* 1927 * On entry to this function, the event delivered is the internal 1928 * readable event, and the first item on the accept_list should be 1929 * the done event we want to send. If the list is empty, this is a no-op, 1930 * so just close the new connection, unlock, and return. 1931 * 1932 * Note the socket is locked before entering here 1933 */ 1934static void 1935internal_accept(isc_socket_t *sock, IoCompletionInfo *lpo, int accept_errno) { 1936 isc_socket_newconnev_t *adev; 1937 isc_result_t result = ISC_R_SUCCESS; 1938 isc_socket_t *nsock; 1939 struct sockaddr *localaddr; 1940 int localaddr_len = sizeof(*localaddr); 1941 struct sockaddr *remoteaddr; 1942 int remoteaddr_len = sizeof(*remoteaddr); 1943 1944 INSIST(VALID_SOCKET(sock)); 1945 LOCK(&sock->lock); 1946 CONSISTENT(sock); 1947 1948 socket_log(__LINE__, sock, NULL, TRACE, 1949 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK, 1950 "internal_accept called"); 1951 1952 INSIST(sock->listener); 1953 1954 INSIST(sock->pending_iocp > 0); 1955 sock->pending_iocp--; 1956 INSIST(sock->pending_accept > 0); 1957 sock->pending_accept--; 1958 1959 adev = lpo->adev; 1960 1961 /* 1962 * If the event is no longer in the list we can just return. 1963 */ 1964 if (!acceptdone_is_active(sock, adev)) 1965 goto done; 1966 1967 nsock = adev->newsocket; 1968 1969 /* 1970 * Pull off the done event. 1971 */ 1972 ISC_LIST_UNLINK(sock->accept_list, adev, ev_link); 1973 1974 /* 1975 * Extract the addresses from the socket, copy them into the structure, 1976 * and return the new socket. 1977 */ 1978 ISCGetAcceptExSockaddrs(lpo->acceptbuffer, 0, 1979 sizeof(SOCKADDR_STORAGE) + 16, sizeof(SOCKADDR_STORAGE) + 16, 1980 (LPSOCKADDR *)&localaddr, &localaddr_len, 1981 (LPSOCKADDR *)&remoteaddr, &remoteaddr_len); 1982 memcpy(&adev->address.type, remoteaddr, remoteaddr_len); 1983 adev->address.length = remoteaddr_len; 1984 nsock->address = adev->address; 1985 nsock->pf = adev->address.type.sa.sa_family; 1986 1987 socket_log(__LINE__, nsock, &nsock->address, TRACE, 1988 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK, 1989 "internal_accept parent %p", sock); 1990 1991 result = make_nonblock(adev->newsocket->fd); 1992 INSIST(result == ISC_R_SUCCESS); 1993 1994 INSIST(setsockopt(nsock->fd, SOL_SOCKET, SO_UPDATE_ACCEPT_CONTEXT, 1995 (char *)&sock->fd, sizeof(sock->fd)) == 0); 1996 1997 /* 1998 * Hook it up into the manager. 1999 */ 2000 nsock->bound = 1; 2001 nsock->connected = 1; 2002 _set_state(nsock, SOCK_OPEN); 2003 2004 LOCK(&nsock->manager->lock); 2005 ISC_LIST_APPEND(nsock->manager->socklist, nsock, link); 2006 InterlockedIncrement(&nsock->manager->totalSockets); 2007 UNLOCK(&nsock->manager->lock); 2008 2009 socket_log(__LINE__, sock, &nsock->address, CREATION, 2010 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN, 2011 "accepted_connection new_socket %p fd %d", 2012 nsock, nsock->fd); 2013 2014 adev->result = result; 2015 send_acceptdone_event(sock, &adev); 2016 2017done: 2018 CONSISTENT(sock); 2019 UNLOCK(&sock->lock); 2020 2021 HeapFree(hHeapHandle, 0, lpo->acceptbuffer); 2022 lpo->acceptbuffer = NULL; 2023} 2024 2025/* 2026 * Called when a socket with a pending connect() finishes. 2027 * Note that the socket is locked before entering. 2028 */ 2029static void 2030internal_connect(isc_socket_t *sock, IoCompletionInfo *lpo, int connect_errno) { 2031 isc_socket_connev_t *cdev; 2032 char strbuf[ISC_STRERRORSIZE]; 2033 2034 INSIST(VALID_SOCKET(sock)); 2035 2036 LOCK(&sock->lock); 2037 2038 INSIST(sock->pending_iocp > 0); 2039 sock->pending_iocp--; 2040 INSIST(sock->pending_connect == 1); 2041 sock->pending_connect = 0; 2042 2043 /* 2044 * Has this event been canceled? 2045 */ 2046 cdev = lpo->cdev; 2047 if (!connectdone_is_active(sock, cdev)) { 2048 sock->pending_connect = 0; 2049 if (sock->fd != INVALID_SOCKET) { 2050 closesocket(sock->fd); 2051 sock->fd = INVALID_SOCKET; 2052 _set_state(sock, SOCK_CLOSED); 2053 } 2054 CONSISTENT(sock); 2055 UNLOCK(&sock->lock); 2056 return; 2057 } 2058 2059 /* 2060 * Check possible Windows network event error status here. 2061 */ 2062 if (connect_errno != 0) { 2063 /* 2064 * If the error is SOFT, just try again on this 2065 * fd and pretend nothing strange happened. 2066 */ 2067 if (SOFT_ERROR(connect_errno) || 2068 connect_errno == WSAEINPROGRESS) { 2069 sock->pending_connect = 1; 2070 CONSISTENT(sock); 2071 UNLOCK(&sock->lock); 2072 return; 2073 } 2074 2075 /* 2076 * Translate other errors into ISC_R_* flavors. 2077 */ 2078 switch (connect_errno) { 2079#define ERROR_MATCH(a, b) case a: cdev->result = b; break; 2080 ERROR_MATCH(WSAEACCES, ISC_R_NOPERM); 2081 ERROR_MATCH(WSAEADDRNOTAVAIL, ISC_R_ADDRNOTAVAIL); 2082 ERROR_MATCH(WSAEAFNOSUPPORT, ISC_R_ADDRNOTAVAIL); 2083 ERROR_MATCH(WSAECONNREFUSED, ISC_R_CONNREFUSED); 2084 ERROR_MATCH(WSAEHOSTUNREACH, ISC_R_HOSTUNREACH); 2085 ERROR_MATCH(WSAEHOSTDOWN, ISC_R_HOSTDOWN); 2086 ERROR_MATCH(WSAENETUNREACH, ISC_R_NETUNREACH); 2087 ERROR_MATCH(WSAENETDOWN, ISC_R_NETDOWN); 2088 ERROR_MATCH(WSAENOBUFS, ISC_R_NORESOURCES); 2089 ERROR_MATCH(WSAECONNRESET, ISC_R_CONNECTIONRESET); 2090 ERROR_MATCH(WSAECONNABORTED, ISC_R_CONNECTIONRESET); 2091 ERROR_MATCH(WSAETIMEDOUT, ISC_R_TIMEDOUT); 2092#undef ERROR_MATCH 2093 default: 2094 cdev->result = ISC_R_UNEXPECTED; 2095 isc__strerror(connect_errno, strbuf, sizeof(strbuf)); 2096 UNEXPECTED_ERROR(__FILE__, __LINE__, 2097 "internal_connect: connect() %s", 2098 strbuf); 2099 } 2100 } else { 2101 INSIST(setsockopt(sock->fd, SOL_SOCKET, SO_UPDATE_CONNECT_CONTEXT, NULL, 0) == 0); 2102 cdev->result = ISC_R_SUCCESS; 2103 sock->connected = 1; 2104 socket_log(__LINE__, sock, &sock->address, IOEVENT, 2105 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTEDCXN, 2106 "internal_connect: success"); 2107 } 2108 2109 send_connectdone_event(sock, &cdev); 2110 2111 UNLOCK(&sock->lock); 2112} 2113 2114/* 2115 * Loop through the socket, returning ISC_R_EOF for each done event pending. 2116 */ 2117static void 2118send_recvdone_abort(isc_socket_t *sock, isc_result_t result) { 2119 isc_socketevent_t *dev; 2120 2121 while (!ISC_LIST_EMPTY(sock->recv_list)) { 2122 dev = ISC_LIST_HEAD(sock->recv_list); 2123 dev->result = result; 2124 send_recvdone_event(sock, &dev); 2125 } 2126} 2127 2128/* 2129 * Take the data we received in our private buffer, and if any recv() calls on 2130 * our list are satisfied, send the corresponding done event. 2131 * 2132 * If we need more data (there are still items on the recv_list after we consume all 2133 * our data) then arrange for another system recv() call to fill our buffers. 2134 */ 2135static void 2136internal_recv(isc_socket_t *sock, int nbytes) 2137{ 2138 INSIST(VALID_SOCKET(sock)); 2139 2140 LOCK(&sock->lock); 2141 CONSISTENT(sock); 2142 2143 socket_log(__LINE__, sock, NULL, IOEVENT, 2144 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALRECV, 2145 "internal_recv: %d bytes received", nbytes); 2146 2147 /* 2148 * If we got here, the I/O operation succeeded. However, we might still have removed this 2149 * event from our notification list (or never placed it on it due to immediate completion.) 2150 * Handle the reference counting here, and handle the cancellation event just after. 2151 */ 2152 INSIST(sock->pending_iocp > 0); 2153 sock->pending_iocp--; 2154 INSIST(sock->pending_recv > 0); 2155 sock->pending_recv--; 2156 2157 /* 2158 * The only way we could have gotten here is that our I/O has successfully completed. 2159 * Update our pointers, and move on. The only odd case here is that we might not 2160 * have received enough data on a TCP stream to satisfy the minimum requirements. If 2161 * this is the case, we will re-issue the recv() call for what we need. 2162 * 2163 * We do check for a recv() of 0 bytes on a TCP stream. This means the remote end 2164 * has closed. 2165 */ 2166 if (nbytes == 0 && sock->type == isc_sockettype_tcp) { 2167 send_recvdone_abort(sock, ISC_R_EOF); 2168 maybe_free_socket(&sock, __LINE__); 2169 return; 2170 } 2171 sock->recvbuf.remaining = nbytes; 2172 sock->recvbuf.consume_position = sock->recvbuf.base; 2173 completeio_recv(sock); 2174 2175 /* 2176 * If there are more receivers waiting for data, queue another receive 2177 * here. 2178 */ 2179 queue_receive_request(sock); 2180 2181 /* 2182 * Unlock and/or destroy if we are the last thing this socket has left to do. 2183 */ 2184 maybe_free_socket(&sock, __LINE__); 2185} 2186 2187static void 2188internal_send(isc_socket_t *sock, isc_socketevent_t *dev, 2189 struct msghdr *messagehdr, int nbytes, int send_errno, IoCompletionInfo *lpo) 2190{ 2191 buflist_t *buffer; 2192 2193 /* 2194 * Find out what socket this is and lock it. 2195 */ 2196 INSIST(VALID_SOCKET(sock)); 2197 2198 LOCK(&sock->lock); 2199 CONSISTENT(sock); 2200 2201 socket_log(__LINE__, sock, NULL, IOEVENT, 2202 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_INTERNALSEND, 2203 "internal_send: task got socket event %p", dev); 2204 2205 buffer = ISC_LIST_HEAD(lpo->bufferlist); 2206 while (buffer != NULL) { 2207 ISC_LIST_DEQUEUE(lpo->bufferlist, buffer, link); 2208 2209 socket_log(__LINE__, sock, NULL, TRACE, 2210 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_ACCEPTLOCK, 2211 "free_buffer %p %p", buffer, buffer->buf); 2212 2213 HeapFree(hHeapHandle, 0, buffer->buf); 2214 HeapFree(hHeapHandle, 0, buffer); 2215 buffer = ISC_LIST_HEAD(lpo->bufferlist); 2216 } 2217 2218 INSIST(sock->pending_iocp > 0); 2219 sock->pending_iocp--; 2220 INSIST(sock->pending_send > 0); 2221 sock->pending_send--; 2222 2223 /* If the event is no longer in the list we can just return */ 2224 if (!senddone_is_active(sock, dev)) 2225 goto done; 2226 2227 /* 2228 * Set the error code and send things on its way. 2229 */ 2230 switch (completeio_send(sock, dev, messagehdr, nbytes, send_errno)) { 2231 case DOIO_SOFT: 2232 break; 2233 case DOIO_HARD: 2234 case DOIO_SUCCESS: 2235 send_senddone_event(sock, &dev); 2236 break; 2237 } 2238 2239 done: 2240 maybe_free_socket(&sock, __LINE__); 2241} 2242 2243/* 2244 * These return if the done event passed in is on the list (or for connect, is 2245 * the one we're waiting for. Using these ensures we will not double-send an 2246 * event. 2247 */ 2248static isc_boolean_t 2249senddone_is_active(isc_socket_t *sock, isc_socketevent_t *dev) 2250{ 2251 isc_socketevent_t *ldev; 2252 2253 ldev = ISC_LIST_HEAD(sock->send_list); 2254 while (ldev != NULL && ldev != dev) 2255 ldev = ISC_LIST_NEXT(ldev, ev_link); 2256 2257 return (ldev == NULL ? ISC_FALSE : ISC_TRUE); 2258} 2259 2260static isc_boolean_t 2261acceptdone_is_active(isc_socket_t *sock, isc_socket_newconnev_t *dev) 2262{ 2263 isc_socket_newconnev_t *ldev; 2264 2265 ldev = ISC_LIST_HEAD(sock->accept_list); 2266 while (ldev != NULL && ldev != dev) 2267 ldev = ISC_LIST_NEXT(ldev, ev_link); 2268 2269 return (ldev == NULL ? ISC_FALSE : ISC_TRUE); 2270} 2271 2272static isc_boolean_t 2273connectdone_is_active(isc_socket_t *sock, isc_socket_connev_t *dev) 2274{ 2275 return (sock->connect_ev == dev ? ISC_TRUE : ISC_FALSE); 2276} 2277 2278/* 2279 * This is the I/O Completion Port Worker Function. It loops forever 2280 * waiting for I/O to complete and then forwards them for further 2281 * processing. There are a number of these in separate threads. 2282 */ 2283static isc_threadresult_t WINAPI 2284SocketIoThread(LPVOID ThreadContext) { 2285 isc_socketmgr_t *manager = ThreadContext; 2286 BOOL bSuccess = FALSE; 2287 DWORD nbytes; 2288 IoCompletionInfo *lpo = NULL; 2289 isc_socket_t *sock = NULL; 2290 int request; 2291 struct msghdr *messagehdr = NULL; 2292 int errval; 2293 char strbuf[ISC_STRERRORSIZE]; 2294 int errstatus; 2295 2296 REQUIRE(VALID_MANAGER(manager)); 2297 2298 /* 2299 * Set the thread priority high enough so I/O will 2300 * preempt normal recv packet processing, but not 2301 * higher than the timer sync thread. 2302 */ 2303 if (!SetThreadPriority(GetCurrentThread(), 2304 THREAD_PRIORITY_ABOVE_NORMAL)) { 2305 errval = GetLastError(); 2306 isc__strerror(errval, strbuf, sizeof(strbuf)); 2307 FATAL_ERROR(__FILE__, __LINE__, 2308 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, 2309 ISC_MSG_FAILED, 2310 "Can't set thread priority: %s"), 2311 strbuf); 2312 } 2313 2314 /* 2315 * Loop forever waiting on I/O Completions and then processing them 2316 */ 2317 while (TRUE) { 2318 bSuccess = GetQueuedCompletionStatus(manager->hIoCompletionPort, 2319 &nbytes, (LPDWORD)&sock, 2320 (LPWSAOVERLAPPED *)&lpo, 2321 INFINITE); 2322 if (lpo == NULL) /* Received request to exit */ 2323 break; 2324 2325 REQUIRE(VALID_SOCKET(sock)); 2326 2327 request = lpo->request_type; 2328 2329 errstatus = 0; 2330 if (!bSuccess) { 2331 isc_result_t isc_result; 2332 2333 /* 2334 * Did the I/O operation complete? 2335 */ 2336 errstatus = WSAGetLastError(); 2337 isc_result = isc__errno2resultx(errstatus, __FILE__, __LINE__); 2338 2339 LOCK(&sock->lock); 2340 CONSISTENT(sock); 2341 switch (request) { 2342 case SOCKET_RECV: 2343 INSIST(sock->pending_iocp > 0); 2344 sock->pending_iocp--; 2345 INSIST(sock->pending_recv > 0); 2346 sock->pending_recv--; 2347 send_recvdone_abort(sock, isc_result); 2348 if (isc_result == ISC_R_UNEXPECTED) { 2349 UNEXPECTED_ERROR(__FILE__, __LINE__, 2350 "SOCKET_RECV: Windows error code: %d, returning ISC error %d", 2351 errstatus, isc_result); 2352 } 2353 break; 2354 2355 case SOCKET_SEND: 2356 INSIST(sock->pending_iocp > 0); 2357 sock->pending_iocp--; 2358 INSIST(sock->pending_send > 0); 2359 sock->pending_send--; 2360 if (senddone_is_active(sock, lpo->dev)) { 2361 lpo->dev->result = isc_result; 2362 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, 2363 "canceled_send"); 2364 send_senddone_event(sock, &lpo->dev); 2365 } 2366 break; 2367 2368 case SOCKET_ACCEPT: 2369 INSIST(sock->pending_iocp > 0); 2370 sock->pending_iocp--; 2371 INSIST(sock->pending_accept > 0); 2372 sock->pending_accept--; 2373 if (acceptdone_is_active(sock, lpo->adev)) { 2374 closesocket(lpo->adev->newsocket->fd); 2375 lpo->adev->newsocket->fd = INVALID_SOCKET; 2376 lpo->adev->newsocket->references--; 2377 free_socket(&lpo->adev->newsocket, __LINE__); 2378 lpo->adev->result = isc_result; 2379 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, 2380 "canceled_accept"); 2381 send_acceptdone_event(sock, &lpo->adev); 2382 } 2383 break; 2384 2385 case SOCKET_CONNECT: 2386 INSIST(sock->pending_iocp > 0); 2387 sock->pending_iocp--; 2388 INSIST(sock->pending_connect == 1); 2389 sock->pending_connect = 0; 2390 if (connectdone_is_active(sock, lpo->cdev)) { 2391 lpo->cdev->result = isc_result; 2392 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, 2393 "canceled_connect"); 2394 send_connectdone_event(sock, &lpo->cdev); 2395 } 2396 break; 2397 } 2398 maybe_free_socket(&sock, __LINE__); 2399 2400 if (lpo != NULL) 2401 HeapFree(hHeapHandle, 0, lpo); 2402 continue; 2403 } 2404 2405 messagehdr = &lpo->messagehdr; 2406 2407 switch (request) { 2408 case SOCKET_RECV: 2409 internal_recv(sock, nbytes); 2410 break; 2411 case SOCKET_SEND: 2412 internal_send(sock, lpo->dev, messagehdr, nbytes, errstatus, lpo); 2413 break; 2414 case SOCKET_ACCEPT: 2415 internal_accept(sock, lpo, errstatus); 2416 break; 2417 case SOCKET_CONNECT: 2418 internal_connect(sock, lpo, errstatus); 2419 break; 2420 } 2421 2422 if (lpo != NULL) 2423 HeapFree(hHeapHandle, 0, lpo); 2424 } 2425 2426 /* 2427 * Exit Completion Port Thread 2428 */ 2429 manager_log(manager, TRACE, 2430 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 2431 ISC_MSG_EXITING, "SocketIoThread exiting")); 2432 return ((isc_threadresult_t)0); 2433} 2434 2435/* 2436 * Create a new socket manager. 2437 */ 2438isc_result_t 2439isc_socketmgr_create(isc_mem_t *mctx, isc_socketmgr_t **managerp) { 2440 return (isc_socketmgr_create2(mctx, managerp, 0)); 2441} 2442 2443isc_result_t 2444isc_socketmgr_create2(isc_mem_t *mctx, isc_socketmgr_t **managerp, 2445 unsigned int maxsocks) 2446{ 2447 isc_socketmgr_t *manager; 2448 isc_result_t result; 2449 2450 REQUIRE(managerp != NULL && *managerp == NULL); 2451 2452 if (maxsocks != 0) 2453 return (ISC_R_NOTIMPLEMENTED); 2454 2455 manager = isc_mem_get(mctx, sizeof(*manager)); 2456 if (manager == NULL) 2457 return (ISC_R_NOMEMORY); 2458 2459 InitSockets(); 2460 2461 manager->magic = SOCKET_MANAGER_MAGIC; 2462 manager->mctx = NULL; 2463 manager->stats = NULL; 2464 ISC_LIST_INIT(manager->socklist); 2465 result = isc_mutex_init(&manager->lock); 2466 if (result != ISC_R_SUCCESS) { 2467 isc_mem_put(mctx, manager, sizeof(*manager)); 2468 return (result); 2469 } 2470 if (isc_condition_init(&manager->shutdown_ok) != ISC_R_SUCCESS) { 2471 DESTROYLOCK(&manager->lock); 2472 isc_mem_put(mctx, manager, sizeof(*manager)); 2473 UNEXPECTED_ERROR(__FILE__, __LINE__, 2474 "isc_condition_init() %s", 2475 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 2476 ISC_MSG_FAILED, "failed")); 2477 return (ISC_R_UNEXPECTED); 2478 } 2479 2480 isc_mem_attach(mctx, &manager->mctx); 2481 2482 iocompletionport_init(manager); /* Create the Completion Ports */ 2483 2484 manager->bShutdown = ISC_FALSE; 2485 manager->totalSockets = 0; 2486 manager->iocp_total = 0; 2487 2488 *managerp = manager; 2489 2490 return (ISC_R_SUCCESS); 2491} 2492 2493isc_result_t 2494isc_socketmgr_getmaxsockets(isc_socketmgr_t *manager, unsigned int *nsockp) { 2495 REQUIRE(VALID_MANAGER(manager)); 2496 REQUIRE(nsockp != NULL); 2497 2498 return (ISC_R_NOTIMPLEMENTED); 2499} 2500 2501void 2502isc_socketmgr_setstats(isc_socketmgr_t *manager, isc_stats_t *stats) { 2503 REQUIRE(VALID_MANAGER(manager)); 2504 REQUIRE(ISC_LIST_EMPTY(manager->socklist)); 2505 REQUIRE(manager->stats == NULL); 2506 REQUIRE(isc_stats_ncounters(stats) == isc_sockstatscounter_max); 2507 2508 isc_stats_attach(stats, &manager->stats); 2509} 2510 2511void 2512isc_socketmgr_destroy(isc_socketmgr_t **managerp) { 2513 isc_socketmgr_t *manager; 2514 int i; 2515 isc_mem_t *mctx; 2516 2517 /* 2518 * Destroy a socket manager. 2519 */ 2520 2521 REQUIRE(managerp != NULL); 2522 manager = *managerp; 2523 REQUIRE(VALID_MANAGER(manager)); 2524 2525 LOCK(&manager->lock); 2526 2527 /* 2528 * Wait for all sockets to be destroyed. 2529 */ 2530 while (!ISC_LIST_EMPTY(manager->socklist)) { 2531 manager_log(manager, CREATION, 2532 isc_msgcat_get(isc_msgcat, ISC_MSGSET_SOCKET, 2533 ISC_MSG_SOCKETSREMAIN, 2534 "sockets exist")); 2535 WAIT(&manager->shutdown_ok, &manager->lock); 2536 } 2537 2538 UNLOCK(&manager->lock); 2539 2540 /* 2541 * Here, we need to had some wait code for the completion port 2542 * thread. 2543 */ 2544 signal_iocompletionport_exit(manager); 2545 manager->bShutdown = ISC_TRUE; 2546 2547 /* 2548 * Wait for threads to exit. 2549 */ 2550 for (i = 0; i < manager->maxIOCPThreads; i++) { 2551 if (isc_thread_join((isc_thread_t) manager->hIOCPThreads[i], 2552 NULL) != ISC_R_SUCCESS) 2553 UNEXPECTED_ERROR(__FILE__, __LINE__, 2554 "isc_thread_join() for Completion Port %s", 2555 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 2556 ISC_MSG_FAILED, "failed")); 2557 } 2558 /* 2559 * Clean up. 2560 */ 2561 2562 CloseHandle(manager->hIoCompletionPort); 2563 2564 (void)isc_condition_destroy(&manager->shutdown_ok); 2565 2566 DESTROYLOCK(&manager->lock); 2567 if (manager->stats != NULL) 2568 isc_stats_detach(&manager->stats); 2569 manager->magic = 0; 2570 mctx= manager->mctx; 2571 isc_mem_put(mctx, manager, sizeof(*manager)); 2572 2573 isc_mem_detach(&mctx); 2574 2575 *managerp = NULL; 2576} 2577 2578static void 2579queue_receive_event(isc_socket_t *sock, isc_task_t *task, isc_socketevent_t *dev) 2580{ 2581 isc_task_t *ntask = NULL; 2582 2583 isc_task_attach(task, &ntask); 2584 dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED; 2585 2586 /* 2587 * Enqueue the request. 2588 */ 2589 INSIST(!ISC_LINK_LINKED(dev, ev_link)); 2590 ISC_LIST_ENQUEUE(sock->recv_list, dev, ev_link); 2591 2592 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, 2593 "queue_receive_event: event %p -> task %p", 2594 dev, ntask); 2595} 2596 2597/* 2598 * Check the pending receive queue, and if we have data pending, give it to this 2599 * caller. If we have none, queue an I/O request. If this caller is not the first 2600 * on the list, then we will just queue this event and return. 2601 * 2602 * Caller must have the socket locked. 2603 */ 2604static isc_result_t 2605socket_recv(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task, 2606 unsigned int flags) 2607{ 2608 int cc = 0; 2609 isc_task_t *ntask = NULL; 2610 isc_result_t result = ISC_R_SUCCESS; 2611 int recv_errno = 0; 2612 2613 dev->ev_sender = task; 2614 2615 if (sock->fd == INVALID_SOCKET) 2616 return (ISC_R_EOF); 2617 2618 /* 2619 * Queue our event on the list of things to do. Call our function to 2620 * attempt to fill buffers as much as possible, and return done events. 2621 * We are going to lie about our handling of the ISC_SOCKFLAG_IMMEDIATE 2622 * here and tell our caller that we could not satisfy it immediately. 2623 */ 2624 queue_receive_event(sock, task, dev); 2625 if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0) 2626 result = ISC_R_INPROGRESS; 2627 2628 completeio_recv(sock); 2629 2630 /* 2631 * If there are more receivers waiting for data, queue another receive 2632 * here. If the 2633 */ 2634 queue_receive_request(sock); 2635 2636 return (result); 2637} 2638 2639isc_result_t 2640isc_socket_recvv(isc_socket_t *sock, isc_bufferlist_t *buflist, 2641 unsigned int minimum, isc_task_t *task, 2642 isc_taskaction_t action, const void *arg) 2643{ 2644 isc_socketevent_t *dev; 2645 isc_socketmgr_t *manager; 2646 unsigned int iocount; 2647 isc_buffer_t *buffer; 2648 isc_result_t ret; 2649 2650 REQUIRE(VALID_SOCKET(sock)); 2651 LOCK(&sock->lock); 2652 CONSISTENT(sock); 2653 2654 /* 2655 * Make sure that the socket is not closed. XXXMLG change error here? 2656 */ 2657 if (sock->fd == INVALID_SOCKET) { 2658 UNLOCK(&sock->lock); 2659 return (ISC_R_CONNREFUSED); 2660 } 2661 2662 REQUIRE(buflist != NULL); 2663 REQUIRE(!ISC_LIST_EMPTY(*buflist)); 2664 REQUIRE(task != NULL); 2665 REQUIRE(action != NULL); 2666 2667 manager = sock->manager; 2668 REQUIRE(VALID_MANAGER(manager)); 2669 2670 iocount = isc_bufferlist_availablecount(buflist); 2671 REQUIRE(iocount > 0); 2672 2673 INSIST(sock->bound); 2674 2675 dev = allocate_socketevent(sock, ISC_SOCKEVENT_RECVDONE, action, arg); 2676 if (dev == NULL) { 2677 UNLOCK(&sock->lock); 2678 return (ISC_R_NOMEMORY); 2679 } 2680 2681 /* 2682 * UDP sockets are always partial read 2683 */ 2684 if (sock->type == isc_sockettype_udp) 2685 dev->minimum = 1; 2686 else { 2687 if (minimum == 0) 2688 dev->minimum = iocount; 2689 else 2690 dev->minimum = minimum; 2691 } 2692 2693 /* 2694 * Move each buffer from the passed in list to our internal one. 2695 */ 2696 buffer = ISC_LIST_HEAD(*buflist); 2697 while (buffer != NULL) { 2698 ISC_LIST_DEQUEUE(*buflist, buffer, link); 2699 ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link); 2700 buffer = ISC_LIST_HEAD(*buflist); 2701 } 2702 2703 ret = socket_recv(sock, dev, task, 0); 2704 2705 UNLOCK(&sock->lock); 2706 return (ret); 2707} 2708 2709isc_result_t 2710isc_socket_recv(isc_socket_t *sock, isc_region_t *region, unsigned int minimum, 2711 isc_task_t *task, isc_taskaction_t action, const void *arg) 2712{ 2713 isc_socketevent_t *dev; 2714 isc_socketmgr_t *manager; 2715 isc_result_t ret; 2716 2717 REQUIRE(VALID_SOCKET(sock)); 2718 LOCK(&sock->lock); 2719 CONSISTENT(sock); 2720 2721 /* 2722 * make sure that the socket's not closed 2723 */ 2724 if (sock->fd == INVALID_SOCKET) { 2725 UNLOCK(&sock->lock); 2726 return (ISC_R_CONNREFUSED); 2727 } 2728 REQUIRE(action != NULL); 2729 2730 manager = sock->manager; 2731 REQUIRE(VALID_MANAGER(manager)); 2732 2733 INSIST(sock->bound); 2734 2735 dev = allocate_socketevent(sock, ISC_SOCKEVENT_RECVDONE, action, arg); 2736 if (dev == NULL) { 2737 UNLOCK(&sock->lock); 2738 return (ISC_R_NOMEMORY); 2739 } 2740 2741 ret = isc_socket_recv2(sock, region, minimum, task, dev, 0); 2742 UNLOCK(&sock->lock); 2743 return (ret); 2744} 2745 2746isc_result_t 2747isc_socket_recv2(isc_socket_t *sock, isc_region_t *region, 2748 unsigned int minimum, isc_task_t *task, 2749 isc_socketevent_t *event, unsigned int flags) 2750{ 2751 isc_result_t ret; 2752 2753 REQUIRE(VALID_SOCKET(sock)); 2754 LOCK(&sock->lock); 2755 CONSISTENT(sock); 2756 2757 event->result = ISC_R_UNEXPECTED; 2758 event->ev_sender = sock; 2759 /* 2760 * make sure that the socket's not closed 2761 */ 2762 if (sock->fd == INVALID_SOCKET) { 2763 UNLOCK(&sock->lock); 2764 return (ISC_R_CONNREFUSED); 2765 } 2766 2767 ISC_LIST_INIT(event->bufferlist); 2768 event->region = *region; 2769 event->n = 0; 2770 event->offset = 0; 2771 event->attributes = 0; 2772 2773 /* 2774 * UDP sockets are always partial read. 2775 */ 2776 if (sock->type == isc_sockettype_udp) 2777 event->minimum = 1; 2778 else { 2779 if (minimum == 0) 2780 event->minimum = region->length; 2781 else 2782 event->minimum = minimum; 2783 } 2784 2785 ret = socket_recv(sock, event, task, flags); 2786 UNLOCK(&sock->lock); 2787 return (ret); 2788} 2789 2790/* 2791 * Caller must have the socket locked. 2792 */ 2793static isc_result_t 2794socket_send(isc_socket_t *sock, isc_socketevent_t *dev, isc_task_t *task, 2795 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo, 2796 unsigned int flags) 2797{ 2798 int io_state; 2799 int send_errno = 0; 2800 int cc = 0; 2801 isc_task_t *ntask = NULL; 2802 isc_result_t result = ISC_R_SUCCESS; 2803 2804 dev->ev_sender = task; 2805 2806 set_dev_address(address, sock, dev); 2807 if (pktinfo != NULL) { 2808 socket_log(__LINE__, sock, NULL, TRACE, isc_msgcat, ISC_MSGSET_SOCKET, 2809 ISC_MSG_PKTINFOPROVIDED, 2810 "pktinfo structure provided, ifindex %u (set to 0)", 2811 pktinfo->ipi6_ifindex); 2812 2813 dev->attributes |= ISC_SOCKEVENTATTR_PKTINFO; 2814 dev->pktinfo = *pktinfo; 2815 /* 2816 * Set the pktinfo index to 0 here, to let the kernel decide 2817 * what interface it should send on. 2818 */ 2819 dev->pktinfo.ipi6_ifindex = 0; 2820 } 2821 2822 io_state = startio_send(sock, dev, &cc, &send_errno); 2823 switch (io_state) { 2824 case DOIO_PENDING: /* I/O started. Nothing more to do */ 2825 case DOIO_SOFT: 2826 /* 2827 * We couldn't send all or part of the request right now, so 2828 * queue it unless ISC_SOCKFLAG_NORETRY is set. 2829 */ 2830 if ((flags & ISC_SOCKFLAG_NORETRY) == 0) { 2831 isc_task_attach(task, &ntask); 2832 dev->attributes |= ISC_SOCKEVENTATTR_ATTACHED; 2833 2834 /* 2835 * Enqueue the request. 2836 */ 2837 INSIST(!ISC_LINK_LINKED(dev, ev_link)); 2838 ISC_LIST_ENQUEUE(sock->send_list, dev, ev_link); 2839 2840 socket_log(__LINE__, sock, NULL, EVENT, NULL, 0, 0, 2841 "socket_send: event %p -> task %p", 2842 dev, ntask); 2843 2844 if ((flags & ISC_SOCKFLAG_IMMEDIATE) != 0) 2845 result = ISC_R_INPROGRESS; 2846 break; 2847 } 2848 2849 case DOIO_SUCCESS: 2850 break; 2851 } 2852 2853 return (result); 2854} 2855 2856isc_result_t 2857isc_socket_send(isc_socket_t *sock, isc_region_t *region, 2858 isc_task_t *task, isc_taskaction_t action, const void *arg) 2859{ 2860 /* 2861 * REQUIRE() checking is performed in isc_socket_sendto(). 2862 */ 2863 return (isc_socket_sendto(sock, region, task, action, arg, NULL, 2864 NULL)); 2865} 2866 2867isc_result_t 2868isc_socket_sendto(isc_socket_t *sock, isc_region_t *region, 2869 isc_task_t *task, isc_taskaction_t action, const void *arg, 2870 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo) 2871{ 2872 isc_socketevent_t *dev; 2873 isc_socketmgr_t *manager; 2874 isc_result_t ret; 2875 2876 REQUIRE(VALID_SOCKET(sock)); 2877 REQUIRE(sock->type != isc_sockettype_fdwatch); 2878 2879 LOCK(&sock->lock); 2880 CONSISTENT(sock); 2881 2882 /* 2883 * make sure that the socket's not closed 2884 */ 2885 if (sock->fd == INVALID_SOCKET) { 2886 UNLOCK(&sock->lock); 2887 return (ISC_R_CONNREFUSED); 2888 } 2889 REQUIRE(region != NULL); 2890 REQUIRE(task != NULL); 2891 REQUIRE(action != NULL); 2892 2893 manager = sock->manager; 2894 REQUIRE(VALID_MANAGER(manager)); 2895 2896 INSIST(sock->bound); 2897 2898 dev = allocate_socketevent(sock, ISC_SOCKEVENT_SENDDONE, action, arg); 2899 if (dev == NULL) { 2900 UNLOCK(&sock->lock); 2901 return (ISC_R_NOMEMORY); 2902 } 2903 dev->region = *region; 2904 2905 ret = socket_send(sock, dev, task, address, pktinfo, 0); 2906 UNLOCK(&sock->lock); 2907 return (ret); 2908} 2909 2910isc_result_t 2911isc_socket_sendv(isc_socket_t *sock, isc_bufferlist_t *buflist, 2912 isc_task_t *task, isc_taskaction_t action, const void *arg) 2913{ 2914 return (isc_socket_sendtov(sock, buflist, task, action, arg, NULL, 2915 NULL)); 2916} 2917 2918isc_result_t 2919isc_socket_sendtov(isc_socket_t *sock, isc_bufferlist_t *buflist, 2920 isc_task_t *task, isc_taskaction_t action, const void *arg, 2921 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo) 2922{ 2923 isc_socketevent_t *dev; 2924 isc_socketmgr_t *manager; 2925 unsigned int iocount; 2926 isc_buffer_t *buffer; 2927 isc_result_t ret; 2928 2929 REQUIRE(VALID_SOCKET(sock)); 2930 2931 LOCK(&sock->lock); 2932 CONSISTENT(sock); 2933 2934 /* 2935 * make sure that the socket's not closed 2936 */ 2937 if (sock->fd == INVALID_SOCKET) { 2938 UNLOCK(&sock->lock); 2939 return (ISC_R_CONNREFUSED); 2940 } 2941 REQUIRE(buflist != NULL); 2942 REQUIRE(!ISC_LIST_EMPTY(*buflist)); 2943 REQUIRE(task != NULL); 2944 REQUIRE(action != NULL); 2945 2946 manager = sock->manager; 2947 REQUIRE(VALID_MANAGER(manager)); 2948 2949 iocount = isc_bufferlist_usedcount(buflist); 2950 REQUIRE(iocount > 0); 2951 2952 dev = allocate_socketevent(sock, ISC_SOCKEVENT_SENDDONE, action, arg); 2953 if (dev == NULL) { 2954 UNLOCK(&sock->lock); 2955 return (ISC_R_NOMEMORY); 2956 } 2957 2958 /* 2959 * Move each buffer from the passed in list to our internal one. 2960 */ 2961 buffer = ISC_LIST_HEAD(*buflist); 2962 while (buffer != NULL) { 2963 ISC_LIST_DEQUEUE(*buflist, buffer, link); 2964 ISC_LIST_ENQUEUE(dev->bufferlist, buffer, link); 2965 buffer = ISC_LIST_HEAD(*buflist); 2966 } 2967 2968 ret = socket_send(sock, dev, task, address, pktinfo, 0); 2969 UNLOCK(&sock->lock); 2970 return (ret); 2971} 2972 2973isc_result_t 2974isc_socket_sendto2(isc_socket_t *sock, isc_region_t *region, 2975 isc_task_t *task, 2976 isc_sockaddr_t *address, struct in6_pktinfo *pktinfo, 2977 isc_socketevent_t *event, unsigned int flags) 2978{ 2979 isc_result_t ret; 2980 2981 REQUIRE(VALID_SOCKET(sock)); 2982 LOCK(&sock->lock); 2983 CONSISTENT(sock); 2984 2985 REQUIRE((flags & ~(ISC_SOCKFLAG_IMMEDIATE|ISC_SOCKFLAG_NORETRY)) == 0); 2986 if ((flags & ISC_SOCKFLAG_NORETRY) != 0) 2987 REQUIRE(sock->type == isc_sockettype_udp); 2988 event->ev_sender = sock; 2989 event->result = ISC_R_UNEXPECTED; 2990 /* 2991 * make sure that the socket's not closed 2992 */ 2993 if (sock->fd == INVALID_SOCKET) { 2994 UNLOCK(&sock->lock); 2995 return (ISC_R_CONNREFUSED); 2996 } 2997 ISC_LIST_INIT(event->bufferlist); 2998 event->region = *region; 2999 event->n = 0; 3000 event->offset = 0; 3001 event->attributes = 0; 3002 3003 ret = socket_send(sock, event, task, address, pktinfo, flags); 3004 UNLOCK(&sock->lock); 3005 return (ret); 3006} 3007 3008isc_result_t 3009isc_socket_bind(isc_socket_t *sock, isc_sockaddr_t *sockaddr, 3010 unsigned int options) { 3011 int bind_errno; 3012 char strbuf[ISC_STRERRORSIZE]; 3013 int on = 1; 3014 3015 REQUIRE(VALID_SOCKET(sock)); 3016 LOCK(&sock->lock); 3017 CONSISTENT(sock); 3018 3019 /* 3020 * make sure that the socket's not closed 3021 */ 3022 if (sock->fd == INVALID_SOCKET) { 3023 UNLOCK(&sock->lock); 3024 return (ISC_R_CONNREFUSED); 3025 } 3026 3027 INSIST(!sock->bound); 3028 3029 if (sock->pf != sockaddr->type.sa.sa_family) { 3030 UNLOCK(&sock->lock); 3031 return (ISC_R_FAMILYMISMATCH); 3032 } 3033 /* 3034 * Only set SO_REUSEADDR when we want a specific port. 3035 */ 3036 if ((options & ISC_SOCKET_REUSEADDRESS) != 0 && 3037 isc_sockaddr_getport(sockaddr) != (in_port_t)0 && 3038 setsockopt(sock->fd, SOL_SOCKET, SO_REUSEADDR, (void *)&on, 3039 sizeof(on)) < 0) { 3040 UNEXPECTED_ERROR(__FILE__, __LINE__, 3041 "setsockopt(%d) %s", sock->fd, 3042 isc_msgcat_get(isc_msgcat, ISC_MSGSET_GENERAL, 3043 ISC_MSG_FAILED, "failed")); 3044 /* Press on... */ 3045 } 3046 if (bind(sock->fd, &sockaddr->type.sa, sockaddr->length) < 0) { 3047 bind_errno = WSAGetLastError(); 3048 UNLOCK(&sock->lock); 3049 switch (bind_errno) { 3050 case WSAEACCES: 3051 return (ISC_R_NOPERM); 3052 case WSAEADDRNOTAVAIL: 3053 return (ISC_R_ADDRNOTAVAIL); 3054 case WSAEADDRINUSE: 3055 return (ISC_R_ADDRINUSE); 3056 case WSAEINVAL: 3057 return (ISC_R_BOUND); 3058 default: 3059 isc__strerror(bind_errno, strbuf, sizeof(strbuf)); 3060 UNEXPECTED_ERROR(__FILE__, __LINE__, "bind: %s", 3061 strbuf); 3062 return (ISC_R_UNEXPECTED); 3063 } 3064 } 3065 3066 socket_log(__LINE__, sock, sockaddr, TRACE, 3067 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, "bound"); 3068 sock->bound = 1; 3069 3070 UNLOCK(&sock->lock); 3071 return (ISC_R_SUCCESS); 3072} 3073 3074isc_result_t 3075isc_socket_filter(isc_socket_t *sock, const char *filter) { 3076 UNUSED(sock); 3077 UNUSED(filter); 3078 3079 REQUIRE(VALID_SOCKET(sock)); 3080 return (ISC_R_NOTIMPLEMENTED); 3081} 3082 3083/* 3084 * Set up to listen on a given socket. We do this by creating an internal 3085 * event that will be dispatched when the socket has read activity. The 3086 * watcher will send the internal event to the task when there is a new 3087 * connection. 3088 * 3089 * Unlike in read, we don't preallocate a done event here. Every time there 3090 * is a new connection we'll have to allocate a new one anyway, so we might 3091 * as well keep things simple rather than having to track them. 3092 */ 3093isc_result_t 3094isc_socket_listen(isc_socket_t *sock, unsigned int backlog) { 3095 char strbuf[ISC_STRERRORSIZE]; 3096 3097 REQUIRE(VALID_SOCKET(sock)); 3098 3099 LOCK(&sock->lock); 3100 CONSISTENT(sock); 3101 3102 /* 3103 * make sure that the socket's not closed 3104 */ 3105 if (sock->fd == INVALID_SOCKET) { 3106 UNLOCK(&sock->lock); 3107 return (ISC_R_CONNREFUSED); 3108 } 3109 3110 REQUIRE(!sock->listener); 3111 REQUIRE(sock->bound); 3112 REQUIRE(sock->type == isc_sockettype_tcp); 3113 3114 if (backlog == 0) 3115 backlog = SOMAXCONN; 3116 3117 if (listen(sock->fd, (int)backlog) < 0) { 3118 UNLOCK(&sock->lock); 3119 isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf)); 3120 3121 UNEXPECTED_ERROR(__FILE__, __LINE__, "listen: %s", strbuf); 3122 3123 return (ISC_R_UNEXPECTED); 3124 } 3125 3126 socket_log(__LINE__, sock, NULL, TRACE, 3127 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, "listening"); 3128 sock->listener = 1; 3129 _set_state(sock, SOCK_LISTEN); 3130 3131 UNLOCK(&sock->lock); 3132 return (ISC_R_SUCCESS); 3133} 3134 3135/* 3136 * This should try to do aggressive accept() XXXMLG 3137 */ 3138isc_result_t 3139isc_socket_accept(isc_socket_t *sock, 3140 isc_task_t *task, isc_taskaction_t action, const void *arg) 3141{ 3142 isc_socket_newconnev_t *adev; 3143 isc_socketmgr_t *manager; 3144 isc_task_t *ntask = NULL; 3145 isc_socket_t *nsock; 3146 isc_result_t result; 3147 IoCompletionInfo *lpo; 3148 3149 REQUIRE(VALID_SOCKET(sock)); 3150 3151 manager = sock->manager; 3152 REQUIRE(VALID_MANAGER(manager)); 3153 3154 LOCK(&sock->lock); 3155 CONSISTENT(sock); 3156 3157 /* 3158 * make sure that the socket's not closed 3159 */ 3160 if (sock->fd == INVALID_SOCKET) { 3161 UNLOCK(&sock->lock); 3162 return (ISC_R_CONNREFUSED); 3163 } 3164 3165 REQUIRE(sock->listener); 3166 3167 /* 3168 * Sender field is overloaded here with the task we will be sending 3169 * this event to. Just before the actual event is delivered the 3170 * actual ev_sender will be touched up to be the socket. 3171 */ 3172 adev = (isc_socket_newconnev_t *) 3173 isc_event_allocate(manager->mctx, task, ISC_SOCKEVENT_NEWCONN, 3174 action, arg, sizeof(*adev)); 3175 if (adev == NULL) { 3176 UNLOCK(&sock->lock); 3177 return (ISC_R_NOMEMORY); 3178 } 3179 ISC_LINK_INIT(adev, ev_link); 3180 3181 result = allocate_socket(manager, sock->type, &nsock); 3182 if (result != ISC_R_SUCCESS) { 3183 isc_event_free((isc_event_t **)&adev); 3184 UNLOCK(&sock->lock); 3185 return (result); 3186 } 3187 3188 /* 3189 * AcceptEx() requires we pass in a socket. 3190 */ 3191 nsock->fd = socket(sock->pf, SOCK_STREAM, IPPROTO_TCP); 3192 if (nsock->fd == INVALID_SOCKET) { 3193 free_socket(&nsock, __LINE__); 3194 isc_event_free((isc_event_t **)&adev); 3195 UNLOCK(&sock->lock); 3196 return (ISC_R_FAILURE); // XXXMLG need real error message 3197 } 3198 3199 /* 3200 * Attach to socket and to task. 3201 */ 3202 isc_task_attach(task, &ntask); 3203 nsock->references++; 3204 3205 adev->ev_sender = ntask; 3206 adev->newsocket = nsock; 3207 _set_state(nsock, SOCK_ACCEPT); 3208 3209 /* 3210 * Queue io completion for an accept(). 3211 */ 3212 lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle, 3213 HEAP_ZERO_MEMORY, 3214 sizeof(IoCompletionInfo)); 3215 RUNTIME_CHECK(lpo != NULL); 3216 lpo->acceptbuffer = (void *)HeapAlloc(hHeapHandle, HEAP_ZERO_MEMORY, 3217 (sizeof(SOCKADDR_STORAGE) + 16) * 2); 3218 RUNTIME_CHECK(lpo->acceptbuffer != NULL); 3219 3220 lpo->adev = adev; 3221 lpo->request_type = SOCKET_ACCEPT; 3222 3223 ISCAcceptEx(sock->fd, 3224 nsock->fd, /* Accepted Socket */ 3225 lpo->acceptbuffer, /* Buffer for initial Recv */ 3226 0, /* Length of Buffer */ 3227 sizeof(SOCKADDR_STORAGE) + 16, /* Local address length + 16 */ 3228 sizeof(SOCKADDR_STORAGE) + 16, /* Remote address lengh + 16 */ 3229 (LPDWORD)&lpo->received_bytes, /* Bytes Recved */ 3230 (LPOVERLAPPED)lpo /* Overlapped structure */ 3231 ); 3232 iocompletionport_update(nsock); 3233 3234 socket_log(__LINE__, sock, NULL, TRACE, 3235 isc_msgcat, ISC_MSGSET_SOCKET, ISC_MSG_BOUND, 3236 "accepting for nsock %p fd %d", nsock, nsock->fd); 3237 3238 /* 3239 * Enqueue the event 3240 */ 3241 ISC_LIST_ENQUEUE(sock->accept_list, adev, ev_link); 3242 sock->pending_accept++; 3243 sock->pending_iocp++; 3244 3245 UNLOCK(&sock->lock); 3246 return (ISC_R_SUCCESS); 3247} 3248 3249isc_result_t 3250isc_socket_connect(isc_socket_t *sock, isc_sockaddr_t *addr, 3251 isc_task_t *task, isc_taskaction_t action, const void *arg) 3252{ 3253 char strbuf[ISC_STRERRORSIZE]; 3254 isc_socket_connev_t *cdev; 3255 isc_task_t *ntask = NULL; 3256 isc_socketmgr_t *manager; 3257 IoCompletionInfo *lpo; 3258 int bind_errno; 3259 3260 REQUIRE(VALID_SOCKET(sock)); 3261 REQUIRE(addr != NULL); 3262 REQUIRE(task != NULL); 3263 REQUIRE(action != NULL); 3264 3265 manager = sock->manager; 3266 REQUIRE(VALID_MANAGER(manager)); 3267 REQUIRE(addr != NULL); 3268 3269 if (isc_sockaddr_ismulticast(addr)) 3270 return (ISC_R_MULTICAST); 3271 3272 LOCK(&sock->lock); 3273 CONSISTENT(sock); 3274 3275 /* 3276 * make sure that the socket's not closed 3277 */ 3278 if (sock->fd == INVALID_SOCKET) { 3279 UNLOCK(&sock->lock); 3280 return (ISC_R_CONNREFUSED); 3281 } 3282 3283 /* 3284 * Windows sockets won't connect unless the socket is bound. 3285 */ 3286 if (!sock->bound) { 3287 isc_sockaddr_t any; 3288 3289 isc_sockaddr_anyofpf(&any, isc_sockaddr_pf(addr)); 3290 if (bind(sock->fd, &any.type.sa, any.length) < 0) { 3291 bind_errno = WSAGetLastError(); 3292 UNLOCK(&sock->lock); 3293 switch (bind_errno) { 3294 case WSAEACCES: 3295 return (ISC_R_NOPERM); 3296 case WSAEADDRNOTAVAIL: 3297 return (ISC_R_ADDRNOTAVAIL); 3298 case WSAEADDRINUSE: 3299 return (ISC_R_ADDRINUSE); 3300 case WSAEINVAL: 3301 return (ISC_R_BOUND); 3302 default: 3303 isc__strerror(bind_errno, strbuf, 3304 sizeof(strbuf)); 3305 UNEXPECTED_ERROR(__FILE__, __LINE__, 3306 "bind: %s", strbuf); 3307 return (ISC_R_UNEXPECTED); 3308 } 3309 } 3310 sock->bound = 1; 3311 } 3312 3313 REQUIRE(!sock->pending_connect); 3314 3315 cdev = (isc_socket_connev_t *)isc_event_allocate(manager->mctx, sock, 3316 ISC_SOCKEVENT_CONNECT, 3317 action, arg, 3318 sizeof(*cdev)); 3319 if (cdev == NULL) { 3320 UNLOCK(&sock->lock); 3321 return (ISC_R_NOMEMORY); 3322 } 3323 ISC_LINK_INIT(cdev, ev_link); 3324 3325 if (sock->type == isc_sockettype_tcp) { 3326 /* 3327 * Queue io completion for an accept(). 3328 */ 3329 lpo = (IoCompletionInfo *)HeapAlloc(hHeapHandle, 3330 HEAP_ZERO_MEMORY, 3331 sizeof(IoCompletionInfo)); 3332 lpo->cdev = cdev; 3333 lpo->request_type = SOCKET_CONNECT; 3334 3335 sock->address = *addr; 3336 ISCConnectEx(sock->fd, &addr->type.sa, addr->length, 3337 NULL, 0, NULL, (LPOVERLAPPED)lpo); 3338 3339 /* 3340 * Attach to task. 3341 */ 3342 isc_task_attach(task, &ntask); 3343 cdev->ev_sender = ntask; 3344 3345 sock->pending_connect = 1; 3346 _set_state(sock, SOCK_CONNECT); 3347 3348 /* 3349 * Enqueue the request. 3350 */ 3351 sock->connect_ev = cdev; 3352 sock->pending_iocp++; 3353 } else { 3354 WSAConnect(sock->fd, &addr->type.sa, addr->length, NULL, NULL, NULL, NULL); 3355 cdev->result = ISC_R_SUCCESS; 3356 isc_task_send(task, (isc_event_t **)&cdev); 3357 } 3358 CONSISTENT(sock); 3359 UNLOCK(&sock->lock); 3360 3361 return (ISC_R_SUCCESS); 3362} 3363 3364isc_result_t 3365isc_socket_getpeername(isc_socket_t *sock, isc_sockaddr_t *addressp) { 3366 isc_result_t result; 3367 3368 REQUIRE(VALID_SOCKET(sock)); 3369 REQUIRE(addressp != NULL); 3370 3371 LOCK(&sock->lock); 3372 CONSISTENT(sock); 3373 3374 /* 3375 * make sure that the socket's not closed 3376 */ 3377 if (sock->fd == INVALID_SOCKET) { 3378 UNLOCK(&sock->lock); 3379 return (ISC_R_CONNREFUSED); 3380 } 3381 3382 if (sock->connected) { 3383 *addressp = sock->address; 3384 result = ISC_R_SUCCESS; 3385 } else { 3386 result = ISC_R_NOTCONNECTED; 3387 } 3388 3389 UNLOCK(&sock->lock); 3390 3391 return (result); 3392} 3393 3394isc_result_t 3395isc_socket_getsockname(isc_socket_t *sock, isc_sockaddr_t *addressp) { 3396 ISC_SOCKADDR_LEN_T len; 3397 isc_result_t result; 3398 char strbuf[ISC_STRERRORSIZE]; 3399 3400 REQUIRE(VALID_SOCKET(sock)); 3401 REQUIRE(addressp != NULL); 3402 3403 LOCK(&sock->lock); 3404 CONSISTENT(sock); 3405 3406 /* 3407 * make sure that the socket's not closed 3408 */ 3409 if (sock->fd == INVALID_SOCKET) { 3410 UNLOCK(&sock->lock); 3411 return (ISC_R_CONNREFUSED); 3412 } 3413 3414 if (!sock->bound) { 3415 result = ISC_R_NOTBOUND; 3416 goto out; 3417 } 3418 3419 result = ISC_R_SUCCESS; 3420 3421 len = sizeof(addressp->type); 3422 if (getsockname(sock->fd, &addressp->type.sa, (void *)&len) < 0) { 3423 isc__strerror(WSAGetLastError(), strbuf, sizeof(strbuf)); 3424 UNEXPECTED_ERROR(__FILE__, __LINE__, "getsockname: %s", 3425 strbuf); 3426 result = ISC_R_UNEXPECTED; 3427 goto out; 3428 } 3429 addressp->length = (unsigned int)len; 3430 3431 out: 3432 UNLOCK(&sock->lock); 3433 3434 return (result); 3435} 3436 3437/* 3438 * Run through the list of events on this socket, and cancel the ones 3439 * queued for task "task" of type "how". "how" is a bitmask. 3440 */ 3441void 3442isc_socket_cancel(isc_socket_t *sock, isc_task_t *task, unsigned int how) { 3443 3444 REQUIRE(VALID_SOCKET(sock)); 3445 3446 /* 3447 * Quick exit if there is nothing to do. Don't even bother locking 3448 * in this case. 3449 */ 3450 if (how == 0) 3451 return; 3452 3453 LOCK(&sock->lock); 3454 CONSISTENT(sock); 3455 3456 /* 3457 * make sure that the socket's not closed 3458 */ 3459 if (sock->fd == INVALID_SOCKET) { 3460 UNLOCK(&sock->lock); 3461 return; 3462 } 3463 3464 /* 3465 * All of these do the same thing, more or less. 3466 * Each will: 3467 * o If the internal event is marked as "posted" try to 3468 * remove it from the task's queue. If this fails, mark it 3469 * as canceled instead, and let the task clean it up later. 3470 * o For each I/O request for that task of that type, post 3471 * its done event with status of "ISC_R_CANCELED". 3472 * o Reset any state needed. 3473 */ 3474 3475 if ((how & ISC_SOCKCANCEL_RECV) == ISC_SOCKCANCEL_RECV) { 3476 isc_socketevent_t *dev; 3477 isc_socketevent_t *next; 3478 isc_task_t *current_task; 3479 3480 dev = ISC_LIST_HEAD(sock->recv_list); 3481 while (dev != NULL) { 3482 current_task = dev->ev_sender; 3483 next = ISC_LIST_NEXT(dev, ev_link); 3484 if ((task == NULL) || (task == current_task)) { 3485 dev->result = ISC_R_CANCELED; 3486 send_recvdone_event(sock, &dev); 3487 } 3488 dev = next; 3489 } 3490 } 3491 how &= ~ISC_SOCKCANCEL_RECV; 3492 3493 if ((how & ISC_SOCKCANCEL_SEND) == ISC_SOCKCANCEL_SEND) { 3494 isc_socketevent_t *dev; 3495 isc_socketevent_t *next; 3496 isc_task_t *current_task; 3497 3498 dev = ISC_LIST_HEAD(sock->send_list); 3499 3500 while (dev != NULL) { 3501 current_task = dev->ev_sender; 3502 next = ISC_LIST_NEXT(dev, ev_link); 3503 if ((task == NULL) || (task == current_task)) { 3504 dev->result = ISC_R_CANCELED; 3505 send_senddone_event(sock, &dev); 3506 } 3507 dev = next; 3508 } 3509 } 3510 how &= ~ISC_SOCKCANCEL_SEND; 3511 3512 if (((how & ISC_SOCKCANCEL_ACCEPT) == ISC_SOCKCANCEL_ACCEPT) 3513 && !ISC_LIST_EMPTY(sock->accept_list)) { 3514 isc_socket_newconnev_t *dev; 3515 isc_socket_newconnev_t *next; 3516 isc_task_t *current_task; 3517 3518 dev = ISC_LIST_HEAD(sock->accept_list); 3519 while (dev != NULL) { 3520 current_task = dev->ev_sender; 3521 next = ISC_LIST_NEXT(dev, ev_link); 3522 3523 if ((task == NULL) || (task == current_task)) { 3524 3525 dev->newsocket->references--; 3526 closesocket(dev->newsocket->fd); 3527 dev->newsocket->fd = INVALID_SOCKET; 3528 free_socket(&dev->newsocket, __LINE__); 3529 3530 dev->result = ISC_R_CANCELED; 3531 send_acceptdone_event(sock, &dev); 3532 } 3533 3534 dev = next; 3535 } 3536 } 3537 how &= ~ISC_SOCKCANCEL_ACCEPT; 3538 3539 /* 3540 * Connecting is not a list. 3541 */ 3542 if (((how & ISC_SOCKCANCEL_CONNECT) == ISC_SOCKCANCEL_CONNECT) 3543 && sock->connect_ev != NULL) { 3544 isc_socket_connev_t *dev; 3545 isc_task_t *current_task; 3546 3547 INSIST(sock->pending_connect); 3548 3549 dev = sock->connect_ev; 3550 current_task = dev->ev_sender; 3551 3552 if ((task == NULL) || (task == current_task)) { 3553 closesocket(sock->fd); 3554 sock->fd = INVALID_SOCKET; 3555 _set_state(sock, SOCK_CLOSED); 3556 3557 sock->connect_ev = NULL; 3558 dev->result = ISC_R_CANCELED; 3559 send_connectdone_event(sock, &dev); 3560 } 3561 } 3562 how &= ~ISC_SOCKCANCEL_CONNECT; 3563 3564 maybe_free_socket(&sock, __LINE__); 3565} 3566 3567isc_sockettype_t 3568isc_socket_gettype(isc_socket_t *sock) { 3569 isc_sockettype_t type; 3570 3571 REQUIRE(VALID_SOCKET(sock)); 3572 3573 LOCK(&sock->lock); 3574 3575 /* 3576 * make sure that the socket's not closed 3577 */ 3578 if (sock->fd == INVALID_SOCKET) { 3579 UNLOCK(&sock->lock); 3580 return (ISC_R_CONNREFUSED); 3581 } 3582 3583 type = sock->type; 3584 UNLOCK(&sock->lock); 3585 return (type); 3586} 3587 3588isc_boolean_t 3589isc_socket_isbound(isc_socket_t *sock) { 3590 isc_boolean_t val; 3591 3592 REQUIRE(VALID_SOCKET(sock)); 3593 3594 LOCK(&sock->lock); 3595 CONSISTENT(sock); 3596 3597 /* 3598 * make sure that the socket's not closed 3599 */ 3600 if (sock->fd == INVALID_SOCKET) { 3601 UNLOCK(&sock->lock); 3602 return (ISC_FALSE); 3603 } 3604 3605 val = ((sock->bound) ? ISC_TRUE : ISC_FALSE); 3606 UNLOCK(&sock->lock); 3607 3608 return (val); 3609} 3610 3611void 3612isc_socket_ipv6only(isc_socket_t *sock, isc_boolean_t yes) { 3613#if defined(IPV6_V6ONLY) 3614 int onoff = yes ? 1 : 0; 3615#else 3616 UNUSED(yes); 3617#endif 3618 3619 REQUIRE(VALID_SOCKET(sock)); 3620 3621#ifdef IPV6_V6ONLY 3622 if (sock->pf == AF_INET6) { 3623 (void)setsockopt(sock->fd, IPPROTO_IPV6, IPV6_V6ONLY, 3624 (void *)&onoff, sizeof(onoff)); 3625 } 3626#endif 3627} 3628 3629void 3630isc_socket_cleanunix(isc_sockaddr_t *addr, isc_boolean_t active) { 3631 UNUSED(addr); 3632 UNUSED(active); 3633} 3634 3635isc_result_t 3636isc_socket_permunix(isc_sockaddr_t *addr, isc_uint32_t perm, 3637 isc_uint32_t owner, isc_uint32_t group) 3638{ 3639 UNUSED(addr); 3640 UNUSED(perm); 3641 UNUSED(owner); 3642 UNUSED(group); 3643 return (ISC_R_NOTIMPLEMENTED); 3644} 3645 3646void 3647isc_socket_setname(isc_socket_t *socket, const char *name, void *tag) { 3648 3649 /* 3650 * Name 'socket'. 3651 */ 3652 3653 REQUIRE(VALID_SOCKET(socket)); 3654 3655 LOCK(&socket->lock); 3656 memset(socket->name, 0, sizeof(socket->name)); 3657 strncpy(socket->name, name, sizeof(socket->name) - 1); 3658 socket->tag = tag; 3659 UNLOCK(&socket->lock); 3660} 3661 3662const char * 3663isc_socket_getname(isc_socket_t *socket) { 3664 return (socket->name); 3665} 3666 3667void * 3668isc_socket_gettag(isc_socket_t *socket) { 3669 return (socket->tag); 3670} 3671 3672void 3673isc__socketmgr_setreserved(isc_socketmgr_t *manager, isc_uint32_t reserved) { 3674 UNUSED(manager); 3675 UNUSED(reserved); 3676} 3677