/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2010 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include /* * This file contains the implementation of TxDring data transfer mode of VIO * Protocol in vnet. The functions in this file are invoked from vnet_gen.c * after TxDring mode is negotiated with the peer during attribute phase of * handshake. This file contains functions that setup the transmit and receive * descriptor rings, and associated resources in TxDring mode. It also contains * the transmit and receive data processing functions that are invoked in * TxDring mode. */ /* Functions exported to vnet_gen.c */ int vgen_create_tx_dring(vgen_ldc_t *ldcp); void vgen_destroy_tx_dring(vgen_ldc_t *ldcp); int vgen_map_rx_dring(vgen_ldc_t *ldcp, void *pkt); void vgen_unmap_rx_dring(vgen_ldc_t *ldcp); int vgen_dringsend(void *arg, mblk_t *mp); void vgen_ldc_msg_worker(void *arg); void vgen_stop_msg_thread(vgen_ldc_t *ldcp); int vgen_handle_dringdata(void *arg1, void *arg2); mblk_t *vgen_poll_rcv(vgen_ldc_t *ldcp, int bytes_to_pickup); int vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg, size_t msglen, boolean_t caller_holds_lock); /* Internal functions */ static int vgen_init_multipools(vgen_ldc_t *ldcp); static int vgen_handle_dringdata_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); static int vgen_process_dringdata(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); static int vgen_handle_dringdata_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); static int vgen_handle_dringdata_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp); static void vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp, mblk_t *bpt); static int vgen_send_dringdata(vgen_ldc_t *ldcp, uint32_t start, int32_t end); static int vgen_send_dringack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start, int32_t end, uint8_t pstate); static void vgen_reclaim(vgen_ldc_t *ldcp); static void vgen_reclaim_dring(vgen_ldc_t *ldcp); /* Functions imported from vnet_gen.c */ extern int vgen_handle_evt_read(vgen_ldc_t *ldcp, vgen_caller_t caller); extern int vgen_handle_evt_reset(vgen_ldc_t *ldcp, vgen_caller_t caller); extern void vgen_handle_pkt_data(void *arg1, void *arg2, uint32_t msglen); extern void vgen_destroy_rxpools(void *arg); /* Tunables */ extern int vgen_rxpool_cleanup_delay; extern boolean_t vnet_jumbo_rxpools; extern uint32_t vnet_num_descriptors; extern uint32_t vgen_chain_len; extern uint32_t vgen_ldcwr_retries; extern uint32_t vgen_recv_delay; extern uint32_t vgen_recv_retries; extern uint32_t vgen_rbufsz1; extern uint32_t vgen_rbufsz2; extern uint32_t vgen_rbufsz3; extern uint32_t vgen_rbufsz4; extern uint32_t vgen_nrbufs1; extern uint32_t vgen_nrbufs2; extern uint32_t vgen_nrbufs3; extern uint32_t vgen_nrbufs4; #ifdef DEBUG #define DEBUG_PRINTF vgen_debug_printf extern int vnet_dbglevel; extern int vgen_inject_err_flag; extern void vgen_debug_printf(const char *fname, vgen_t *vgenp, vgen_ldc_t *ldcp, const char *fmt, ...); extern boolean_t vgen_inject_error(vgen_ldc_t *ldcp, int error); #endif /* * Allocate transmit resources for the channel. The resources consist of a * transmit descriptor ring and an associated transmit buffer area. */ int vgen_create_tx_dring(vgen_ldc_t *ldcp) { int i; int rv; ldc_mem_info_t minfo; uint32_t txdsize; uint32_t tbufsize; vgen_private_desc_t *tbufp; vnet_public_desc_t *txdp; vio_dring_entry_hdr_t *hdrp; caddr_t datap = NULL; int ci; uint32_t ncookies; size_t data_sz; vgen_t *vgenp = LDC_TO_VGEN(ldcp); ldcp->num_txds = vnet_num_descriptors; txdsize = sizeof (vnet_public_desc_t); tbufsize = sizeof (vgen_private_desc_t); /* allocate transmit buffer ring */ tbufp = kmem_zalloc(ldcp->num_txds * tbufsize, KM_NOSLEEP); if (tbufp == NULL) { return (DDI_FAILURE); } ldcp->tbufp = tbufp; ldcp->tbufendp = &((ldcp->tbufp)[ldcp->num_txds]); /* create transmit descriptor ring */ rv = ldc_mem_dring_create(ldcp->num_txds, txdsize, &ldcp->tx_dring_handle); if (rv != 0) { DWARN(vgenp, ldcp, "ldc_mem_dring_create() failed\n"); goto fail; } /* get the addr of descriptor ring */ rv = ldc_mem_dring_info(ldcp->tx_dring_handle, &minfo); if (rv != 0) { DWARN(vgenp, ldcp, "ldc_mem_dring_info() failed\n"); goto fail; } ldcp->txdp = (vnet_public_desc_t *)(minfo.vaddr); /* * In order to ensure that the number of ldc cookies per descriptor is * limited to be within the default MAX_COOKIES (2), we take the steps * outlined below: * * Align the entire data buffer area to 8K and carve out per descriptor * data buffers starting from this 8K aligned base address. * * We round up the mtu specified to be a multiple of 2K or 4K. * For sizes up to 12K we round up the size to the next 2K. * For sizes > 12K we round up to the next 4K (otherwise sizes such as * 14K could end up needing 3 cookies, with the buffer spread across * 3 8K pages: 8K+6K, 2K+8K+2K, 6K+8K, ...). */ data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN; if (data_sz <= VNET_12K) { data_sz = VNET_ROUNDUP_2K(data_sz); } else { data_sz = VNET_ROUNDUP_4K(data_sz); } /* allocate extra 8K bytes for alignment */ ldcp->tx_data_sz = (data_sz * ldcp->num_txds) + VNET_8K; datap = kmem_zalloc(ldcp->tx_data_sz, KM_SLEEP); ldcp->tx_datap = datap; /* align the starting address of the data area to 8K */ datap = (caddr_t)VNET_ROUNDUP_8K((uintptr_t)datap); /* * for each private descriptor, allocate a ldc mem_handle which is * required to map the data during transmit, set the flags * to free (available for use by transmit routine). */ for (i = 0; i < ldcp->num_txds; i++) { tbufp = &(ldcp->tbufp[i]); rv = ldc_mem_alloc_handle(ldcp->ldc_handle, &(tbufp->memhandle)); if (rv) { tbufp->memhandle = 0; goto fail; } /* * bind ldc memhandle to the corresponding transmit buffer. */ ci = ncookies = 0; rv = ldc_mem_bind_handle(tbufp->memhandle, (caddr_t)datap, data_sz, LDC_SHADOW_MAP, LDC_MEM_R, &(tbufp->memcookie[ci]), &ncookies); if (rv != 0) { goto fail; } /* * successful in binding the handle to tx data buffer. * set datap in the private descr to this buffer. */ tbufp->datap = datap; if ((ncookies == 0) || (ncookies > MAX_COOKIES)) { goto fail; } for (ci = 1; ci < ncookies; ci++) { rv = ldc_mem_nextcookie(tbufp->memhandle, &(tbufp->memcookie[ci])); if (rv != 0) { goto fail; } } tbufp->ncookies = ncookies; datap += data_sz; tbufp->flags = VGEN_PRIV_DESC_FREE; txdp = &(ldcp->txdp[i]); hdrp = &txdp->hdr; hdrp->dstate = VIO_DESC_FREE; hdrp->ack = B_FALSE; tbufp->descp = txdp; } /* * The descriptors and the associated buffers are all ready; * now bind descriptor ring to the channel. */ rv = ldc_mem_dring_bind(ldcp->ldc_handle, ldcp->tx_dring_handle, LDC_DIRECT_MAP | LDC_SHADOW_MAP, LDC_MEM_RW, &ldcp->tx_dring_cookie, &ncookies); if (rv != 0) { DWARN(vgenp, ldcp, "ldc_mem_dring_bind failed " "rv(%x)\n", rv); goto fail; } ASSERT(ncookies == 1); ldcp->tx_dring_ncookies = ncookies; /* reset tbuf walking pointers */ ldcp->next_tbufp = ldcp->tbufp; ldcp->cur_tbufp = ldcp->tbufp; /* initialize tx seqnum and index */ ldcp->next_txseq = VNET_ISS; ldcp->next_txi = 0; ldcp->resched_peer = B_TRUE; ldcp->resched_peer_txi = 0; return (VGEN_SUCCESS); fail: vgen_destroy_tx_dring(ldcp); return (VGEN_FAILURE); } /* * Free transmit resources for the channel. */ void vgen_destroy_tx_dring(vgen_ldc_t *ldcp) { int i; int tbufsize = sizeof (vgen_private_desc_t); vgen_private_desc_t *tbufp = ldcp->tbufp; /* We first unbind the descriptor ring */ if (ldcp->tx_dring_ncookies != 0) { (void) ldc_mem_dring_unbind(ldcp->tx_dring_handle); ldcp->tx_dring_ncookies = 0; } /* Unbind transmit buffers */ if (ldcp->tbufp != NULL) { /* for each tbuf (priv_desc), free ldc mem_handle */ for (i = 0; i < ldcp->num_txds; i++) { tbufp = &(ldcp->tbufp[i]); if (tbufp->datap) { /* if bound to a ldc memhandle */ (void) ldc_mem_unbind_handle(tbufp->memhandle); tbufp->datap = NULL; } if (tbufp->memhandle) { (void) ldc_mem_free_handle(tbufp->memhandle); tbufp->memhandle = 0; } } } /* Free tx data buffer area */ if (ldcp->tx_datap != NULL) { kmem_free(ldcp->tx_datap, ldcp->tx_data_sz); ldcp->tx_datap = NULL; ldcp->tx_data_sz = 0; } /* Free transmit descriptor ring */ if (ldcp->tx_dring_handle != 0) { (void) ldc_mem_dring_destroy(ldcp->tx_dring_handle); ldcp->tx_dring_handle = 0; ldcp->txdp = NULL; } /* Free transmit buffer ring */ if (ldcp->tbufp != NULL) { kmem_free(ldcp->tbufp, ldcp->num_txds * tbufsize); ldcp->tbufp = ldcp->tbufendp = NULL; } } /* * Map the transmit descriptor ring exported * by the peer, as our receive descriptor ring. */ int vgen_map_rx_dring(vgen_ldc_t *ldcp, void *pkt) { int rv; ldc_mem_info_t minfo; ldc_mem_cookie_t dcookie; uint32_t ncookies; uint32_t num_desc; uint32_t desc_size; vio_dring_reg_msg_t *msg = pkt; vgen_t *vgenp = LDC_TO_VGEN(ldcp); ncookies = msg->ncookies; num_desc = msg->num_descriptors; desc_size = msg->descriptor_size; bcopy(&msg->cookie[0], &dcookie, sizeof (ldc_mem_cookie_t)); /* * Sanity check. */ if (num_desc < VGEN_NUM_DESCRIPTORS_MIN || desc_size < sizeof (vnet_public_desc_t)) { goto fail; } /* Map the remote dring */ rv = ldc_mem_dring_map(ldcp->ldc_handle, &dcookie, ncookies, num_desc, desc_size, LDC_DIRECT_MAP, &(ldcp->rx_dring_handle)); if (rv != 0) { goto fail; } /* * Sucessfully mapped, now try to get info about the mapped dring */ rv = ldc_mem_dring_info(ldcp->rx_dring_handle, &minfo); if (rv != 0) { goto fail; } /* * Save ring address, number of descriptors. */ ldcp->mrxdp = (vnet_public_desc_t *)(minfo.vaddr); bcopy(&dcookie, &(ldcp->rx_dring_cookie), sizeof (dcookie)); ldcp->rx_dring_ncookies = ncookies; ldcp->num_rxds = num_desc; /* Initialize rx dring indexes and seqnum */ ldcp->next_rxi = 0; ldcp->next_rxseq = VNET_ISS; ldcp->dring_mtype = minfo.mtype; /* Save peer's dring_info values */ bcopy(&dcookie, &(ldcp->peer_hparams.dring_cookie), sizeof (ldc_mem_cookie_t)); ldcp->peer_hparams.num_desc = num_desc; ldcp->peer_hparams.desc_size = desc_size; ldcp->peer_hparams.dring_ncookies = ncookies; /* Set dring_ident for the peer */ ldcp->peer_hparams.dring_ident = (uint64_t)ldcp->txdp; /* Return the dring_ident in ack msg */ msg->dring_ident = (uint64_t)ldcp->txdp; /* alloc rx mblk pools */ rv = vgen_init_multipools(ldcp); if (rv != 0) { /* * We do not return failure if receive mblk pools can't * be allocated; instead allocb(9F) will be used to * dynamically allocate buffers during receive. */ DWARN(vgenp, ldcp, "vnet%d: failed to allocate rx mblk " "pools for channel(0x%lx)\n", vgenp->instance, ldcp->ldc_id); } return (VGEN_SUCCESS); fail: if (ldcp->rx_dring_handle != 0) { (void) ldc_mem_dring_unmap(ldcp->rx_dring_handle); ldcp->rx_dring_handle = 0; } return (VGEN_FAILURE); } /* * Unmap the receive descriptor ring. */ void vgen_unmap_rx_dring(vgen_ldc_t *ldcp) { vgen_t *vgenp = LDC_TO_VGEN(ldcp); vio_mblk_pool_t *vmp = NULL; /* Destroy receive mblk pools */ vio_destroy_multipools(&ldcp->vmp, &vmp); if (vmp != NULL) { /* * If we can't destroy the rx pool for this channel, * dispatch a task to retry and clean up. Note that we * don't need to wait for the task to complete. If the * vnet device itself gets detached, it will wait for * the task to complete implicitly in * ddi_taskq_destroy(). */ (void) ddi_taskq_dispatch(vgenp->rxp_taskq, vgen_destroy_rxpools, vmp, DDI_SLEEP); } /* Unmap peer's dring */ if (ldcp->rx_dring_handle != 0) { (void) ldc_mem_dring_unmap(ldcp->rx_dring_handle); ldcp->rx_dring_handle = 0; } /* clobber rx ring members */ bzero(&ldcp->rx_dring_cookie, sizeof (ldcp->rx_dring_cookie)); ldcp->mrxdp = NULL; ldcp->next_rxi = 0; ldcp->num_rxds = 0; ldcp->next_rxseq = VNET_ISS; } /* Allocate receive resources */ static int vgen_init_multipools(vgen_ldc_t *ldcp) { size_t data_sz; vgen_t *vgenp = LDC_TO_VGEN(ldcp); int status; uint32_t sz1 = 0; uint32_t sz2 = 0; uint32_t sz3 = 0; uint32_t sz4 = 0; /* * We round up the mtu specified to be a multiple of 2K. * We then create rx pools based on the rounded up size. */ data_sz = vgenp->max_frame_size + VNET_IPALIGN + VNET_LDCALIGN; data_sz = VNET_ROUNDUP_2K(data_sz); /* * If pool sizes are specified, use them. Note that the presence of * the first tunable will be used as a hint. */ if (vgen_rbufsz1 != 0) { sz1 = vgen_rbufsz1; sz2 = vgen_rbufsz2; sz3 = vgen_rbufsz3; sz4 = vgen_rbufsz4; if (sz4 == 0) { /* need 3 pools */ ldcp->max_rxpool_size = sz3; status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS, sz1, sz2, sz3, vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3); } else { ldcp->max_rxpool_size = sz4; status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1, sz1, sz2, sz3, sz4, vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4); } return (status); } /* * Pool sizes are not specified. We select the pool sizes based on the * mtu if vnet_jumbo_rxpools is enabled. */ if (vnet_jumbo_rxpools == B_FALSE || data_sz == VNET_2K) { /* * Receive buffer pool allocation based on mtu is disabled. * Use the default mechanism of standard size pool allocation. */ sz1 = VGEN_DBLK_SZ_128; sz2 = VGEN_DBLK_SZ_256; sz3 = VGEN_DBLK_SZ_2048; ldcp->max_rxpool_size = sz3; status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS, sz1, sz2, sz3, vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3); return (status); } switch (data_sz) { case VNET_4K: sz1 = VGEN_DBLK_SZ_128; sz2 = VGEN_DBLK_SZ_256; sz3 = VGEN_DBLK_SZ_2048; sz4 = sz3 << 1; /* 4K */ ldcp->max_rxpool_size = sz4; status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1, sz1, sz2, sz3, sz4, vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4); break; default: /* data_sz: 4K+ to 16K */ sz1 = VGEN_DBLK_SZ_256; sz2 = VGEN_DBLK_SZ_2048; sz3 = data_sz >> 1; /* Jumbo-size/2 */ sz4 = data_sz; /* Jumbo-size */ ldcp->max_rxpool_size = sz4; status = vio_init_multipools(&ldcp->vmp, VGEN_NUM_VMPOOLS + 1, sz1, sz2, sz3, sz4, vgen_nrbufs1, vgen_nrbufs2, vgen_nrbufs3, vgen_nrbufs4); break; } return (status); } /* * This function transmits normal data frames (non-priority) over the channel. * It queues the frame into the transmit descriptor ring and sends a * VIO_DRING_DATA message if needed, to wake up the peer to (re)start * processing. */ int vgen_dringsend(void *arg, mblk_t *mp) { vgen_ldc_t *ldcp = (vgen_ldc_t *)arg; vgen_private_desc_t *tbufp; vgen_private_desc_t *rtbufp; vnet_public_desc_t *rtxdp; vgen_private_desc_t *ntbufp; vnet_public_desc_t *txdp; vio_dring_entry_hdr_t *hdrp; vgen_stats_t *statsp; struct ether_header *ehp; boolean_t is_bcast = B_FALSE; boolean_t is_mcast = B_FALSE; size_t mblksz; caddr_t dst; mblk_t *bp; size_t size; int rv = 0; vgen_t *vgenp = LDC_TO_VGEN(ldcp); vgen_hparams_t *lp = &ldcp->local_hparams; statsp = &ldcp->stats; size = msgsize(mp); DBG1(vgenp, ldcp, "enter\n"); if (ldcp->ldc_status != LDC_UP) { DWARN(vgenp, ldcp, "status(%d), dropping packet\n", ldcp->ldc_status); goto dringsend_exit; } /* drop the packet if ldc is not up or handshake is not done */ if (ldcp->hphase != VH_DONE) { DWARN(vgenp, ldcp, "hphase(%x), dropping packet\n", ldcp->hphase); goto dringsend_exit; } if (size > (size_t)lp->mtu) { DWARN(vgenp, ldcp, "invalid size(%d)\n", size); goto dringsend_exit; } if (size < ETHERMIN) size = ETHERMIN; ehp = (struct ether_header *)mp->b_rptr; is_bcast = IS_BROADCAST(ehp); is_mcast = IS_MULTICAST(ehp); mutex_enter(&ldcp->txlock); /* * allocate a descriptor */ tbufp = ldcp->next_tbufp; ntbufp = NEXTTBUF(ldcp, tbufp); if (ntbufp == ldcp->cur_tbufp) { /* out of tbufs/txds */ mutex_enter(&ldcp->tclock); /* Try reclaiming now */ vgen_reclaim_dring(ldcp); ldcp->reclaim_lbolt = ddi_get_lbolt(); if (ntbufp == ldcp->cur_tbufp) { /* Now we are really out of tbuf/txds */ ldcp->tx_blocked_lbolt = ddi_get_lbolt(); ldcp->tx_blocked = B_TRUE; mutex_exit(&ldcp->tclock); statsp->tx_no_desc++; mutex_exit(&ldcp->txlock); return (VGEN_TX_NORESOURCES); } mutex_exit(&ldcp->tclock); } /* update next available tbuf in the ring and update tx index */ ldcp->next_tbufp = ntbufp; INCR_TXI(ldcp->next_txi, ldcp); /* Mark the buffer busy before releasing the lock */ tbufp->flags = VGEN_PRIV_DESC_BUSY; mutex_exit(&ldcp->txlock); /* copy data into pre-allocated transmit buffer */ dst = tbufp->datap + VNET_IPALIGN; for (bp = mp; bp != NULL; bp = bp->b_cont) { mblksz = MBLKL(bp); bcopy(bp->b_rptr, dst, mblksz); dst += mblksz; } tbufp->datalen = size; /* initialize the corresponding public descriptor (txd) */ txdp = tbufp->descp; hdrp = &txdp->hdr; txdp->nbytes = size; txdp->ncookies = tbufp->ncookies; bcopy((tbufp->memcookie), (txdp->memcookie), tbufp->ncookies * sizeof (ldc_mem_cookie_t)); mutex_enter(&ldcp->wrlock); /* * If the flags not set to BUSY, it implies that the clobber * was done while we were copying the data. In such case, * discard the packet and return. */ if (tbufp->flags != VGEN_PRIV_DESC_BUSY) { statsp->oerrors++; mutex_exit(&ldcp->wrlock); goto dringsend_exit; } hdrp->dstate = VIO_DESC_READY; /* update stats */ statsp->opackets++; statsp->obytes += size; if (is_bcast) statsp->brdcstxmt++; else if (is_mcast) statsp->multixmt++; /* send dring datamsg to the peer */ if (ldcp->resched_peer) { rtbufp = &ldcp->tbufp[ldcp->resched_peer_txi]; rtxdp = rtbufp->descp; if (rtxdp->hdr.dstate == VIO_DESC_READY) { rv = vgen_send_dringdata(ldcp, (uint32_t)ldcp->resched_peer_txi, -1); if (rv != 0) { /* error: drop the packet */ DWARN(vgenp, ldcp, "failed sending dringdata msg " "rv(%d) len(%d)\n", rv, size); statsp->oerrors++; } else { ldcp->resched_peer = B_FALSE; } } } mutex_exit(&ldcp->wrlock); dringsend_exit: if (rv == ECONNRESET) { (void) vgen_handle_evt_reset(ldcp, VGEN_OTHER); } freemsg(mp); DBG1(vgenp, ldcp, "exit\n"); return (VGEN_TX_SUCCESS); } mblk_t * vgen_poll_rcv(vgen_ldc_t *ldcp, int bytes_to_pickup) { mblk_t *bp = NULL; mblk_t *bpt = NULL; mblk_t *mp = NULL; size_t mblk_sz = 0; size_t sz = 0; uint_t count = 0; mutex_enter(&ldcp->pollq_lock); bp = ldcp->pollq_headp; while (bp != NULL) { /* get the size of this packet */ mblk_sz = msgdsize(bp); /* if adding this pkt, exceeds the size limit, we are done. */ if (sz + mblk_sz > bytes_to_pickup) { break; } /* we have room for this packet */ sz += mblk_sz; /* increment the # of packets being sent up */ count++; /* track the last processed pkt */ bpt = bp; /* get the next pkt */ bp = bp->b_next; } if (count != 0) { /* * picked up some packets; save the head of pkts to be sent up. */ mp = ldcp->pollq_headp; /* move the pollq_headp to skip over the pkts being sent up */ ldcp->pollq_headp = bp; /* picked up all pending pkts in the queue; reset tail also */ if (ldcp->pollq_headp == NULL) { ldcp->pollq_tailp = NULL; } /* terminate the tail of pkts to be sent up */ bpt->b_next = NULL; } /* * We prepend any high priority packets to the chain of packets; note * that if we are already at the bytes_to_pickup limit, we might * slightly exceed that in such cases. That should be ok, as these pkts * are expected to be small in size and arrive at an interval in the * the order of a few seconds. */ if (ldcp->rx_pktdata == vgen_handle_pkt_data && ldcp->rx_pri_head != NULL) { ldcp->rx_pri_tail->b_next = mp; mp = ldcp->rx_pri_head; ldcp->rx_pri_head = ldcp->rx_pri_tail = NULL; } mutex_exit(&ldcp->pollq_lock); return (mp); } /* * Process dring data messages (info/ack/nack) */ int vgen_handle_dringdata(void *arg1, void *arg2) { vgen_ldc_t *ldcp = (vgen_ldc_t *)arg1; vio_msg_tag_t *tagp = (vio_msg_tag_t *)arg2; vgen_t *vgenp = LDC_TO_VGEN(ldcp); int rv = 0; DBG1(vgenp, ldcp, "enter\n"); switch (tagp->vio_subtype) { case VIO_SUBTYPE_INFO: /* * To reduce the locking contention, release the * cblock here and re-acquire it once we are done * receiving packets. */ mutex_exit(&ldcp->cblock); mutex_enter(&ldcp->rxlock); rv = vgen_handle_dringdata_info(ldcp, tagp); mutex_exit(&ldcp->rxlock); mutex_enter(&ldcp->cblock); break; case VIO_SUBTYPE_ACK: rv = vgen_handle_dringdata_ack(ldcp, tagp); break; case VIO_SUBTYPE_NACK: rv = vgen_handle_dringdata_nack(ldcp, tagp); break; } DBG1(vgenp, ldcp, "exit rv(%d)\n", rv); return (rv); } static int vgen_handle_dringdata_info(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) { uint32_t start; int32_t end; int rv = 0; vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp; vgen_t *vgenp = LDC_TO_VGEN(ldcp); vgen_stats_t *statsp = &ldcp->stats; #ifdef VGEN_HANDLE_LOST_PKTS uint32_t rxi; int n; #endif DBG1(vgenp, ldcp, "enter\n"); start = dringmsg->start_idx; end = dringmsg->end_idx; /* * received a data msg, which contains the start and end * indices of the descriptors within the rx ring holding data, * the seq_num of data packet corresponding to the start index, * and the dring_ident. * We can now read the contents of each of these descriptors * and gather data from it. */ DBG1(vgenp, ldcp, "INFO: start(%d), end(%d)\n", start, end); /* validate rx start and end indexes */ if (!(CHECK_RXI(start, ldcp)) || ((end != -1) && !(CHECK_RXI(end, ldcp)))) { DWARN(vgenp, ldcp, "Invalid Rx start(%d) or end(%d)\n", start, end); /* drop the message if invalid index */ return (rv); } /* validate dring_ident */ if (dringmsg->dring_ident != ldcp->peer_hparams.dring_ident) { DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n", dringmsg->dring_ident); /* invalid dring_ident, drop the msg */ return (rv); } #ifdef DEBUG if (vgen_inject_error(ldcp, VGEN_ERR_RXLOST)) { /* drop this msg to simulate lost pkts for debugging */ vgen_inject_err_flag &= ~(VGEN_ERR_RXLOST); return (rv); } #endif statsp->dring_data_msgs_rcvd++; #ifdef VGEN_HANDLE_LOST_PKTS /* receive start index doesn't match expected index */ if (ldcp->next_rxi != start) { DWARN(vgenp, ldcp, "next_rxi(%d) != start(%d)\n", ldcp->next_rxi, start); /* calculate the number of pkts lost */ if (start >= ldcp->next_rxi) { n = start - ldcp->next_rxi; } else { n = ldcp->num_rxds - (ldcp->next_rxi - start); } statsp->rx_lost_pkts += n; tagp->vio_subtype = VIO_SUBTYPE_NACK; tagp->vio_sid = ldcp->local_sid; /* indicate the range of lost descriptors */ dringmsg->start_idx = ldcp->next_rxi; rxi = start; DECR_RXI(rxi, ldcp); dringmsg->end_idx = rxi; /* dring ident is left unchanged */ rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*dringmsg), B_FALSE); if (rv != VGEN_SUCCESS) { DWARN(vgenp, ldcp, "vgen_sendmsg failed, stype:NACK\n"); return (rv); } /* * treat this range of descrs/pkts as dropped * and set the new expected value of next_rxi * and continue(below) to process from the new * start index. */ ldcp->next_rxi = start; } #endif /* VGEN_HANDLE_LOST_PKTS */ /* Now receive messages */ rv = vgen_process_dringdata(ldcp, tagp); DBG1(vgenp, ldcp, "exit rv(%d)\n", rv); return (rv); } static int vgen_process_dringdata(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) { boolean_t set_ack_start = B_FALSE; uint32_t start; uint32_t ack_end; uint32_t next_rxi; uint32_t rxi; int count = 0; int rv = 0; uint32_t retries = 0; vgen_stats_t *statsp; vnet_public_desc_t rxd; vio_dring_entry_hdr_t *hdrp; mblk_t *bp = NULL; mblk_t *bpt = NULL; uint32_t ack_start; boolean_t rxd_err = B_FALSE; mblk_t *mp = NULL; vio_mblk_t *vmp = NULL; size_t nbytes; boolean_t ack_needed = B_FALSE; size_t nread; uint64_t off = 0; struct ether_header *ehp; vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp; vgen_t *vgenp = LDC_TO_VGEN(ldcp); vgen_hparams_t *lp = &ldcp->local_hparams; DBG1(vgenp, ldcp, "enter\n"); statsp = &ldcp->stats; start = dringmsg->start_idx; /* * start processing the descriptors from the specified * start index, up to the index a descriptor is not ready * to be processed or we process the entire descriptor ring * and wrap around upto the start index. */ /* need to set the start index of descriptors to be ack'd */ set_ack_start = B_TRUE; /* index upto which we have ack'd */ ack_end = start; DECR_RXI(ack_end, ldcp); next_rxi = rxi = start; do { vgen_recv_retry: rv = vnet_dring_entry_copy(&(ldcp->mrxdp[rxi]), &rxd, ldcp->dring_mtype, ldcp->rx_dring_handle, rxi, rxi); if (rv != 0) { DWARN(vgenp, ldcp, "ldc_mem_dring_acquire() failed" " rv(%d)\n", rv); statsp->ierrors++; return (rv); } hdrp = &rxd.hdr; if (hdrp->dstate != VIO_DESC_READY) { /* * Before waiting and retry here, send up * the packets that are received already */ if (bp != NULL) { DTRACE_PROBE1(vgen_rcv_msgs, int, count); vgen_rx(ldcp, bp, bpt); count = 0; bp = bpt = NULL; } /* * descriptor is not ready. * retry descriptor acquire, stop processing * after max # retries. */ if (retries == vgen_recv_retries) break; retries++; drv_usecwait(vgen_recv_delay); goto vgen_recv_retry; } retries = 0; if (set_ack_start) { /* * initialize the start index of the range * of descriptors to be ack'd. */ ack_start = rxi; set_ack_start = B_FALSE; } if ((rxd.nbytes < ETHERMIN) || (rxd.nbytes > lp->mtu) || (rxd.ncookies == 0) || (rxd.ncookies > MAX_COOKIES)) { rxd_err = B_TRUE; } else { /* * Try to allocate an mblk from the free pool * of recv mblks for the channel. * If this fails, use allocb(). */ nbytes = (VNET_IPALIGN + rxd.nbytes + 7) & ~7; if (nbytes > ldcp->max_rxpool_size) { mp = allocb(VNET_IPALIGN + rxd.nbytes + 8, BPRI_MED); vmp = NULL; } else { vmp = vio_multipool_allocb(&ldcp->vmp, nbytes); if (vmp == NULL) { statsp->rx_vio_allocb_fail++; /* * Data buffer returned by allocb(9F) * is 8byte aligned. We allocate extra * 8 bytes to ensure size is multiple * of 8 bytes for ldc_mem_copy(). */ mp = allocb(VNET_IPALIGN + rxd.nbytes + 8, BPRI_MED); } else { mp = vmp->mp; } } } if ((rxd_err) || (mp == NULL)) { /* * rxd_err or allocb() failure, * drop this packet, get next. */ if (rxd_err) { statsp->ierrors++; rxd_err = B_FALSE; } else { statsp->rx_allocb_fail++; } ack_needed = hdrp->ack; /* set descriptor done bit */ rv = vnet_dring_entry_set_dstate(&(ldcp->mrxdp[rxi]), ldcp->dring_mtype, ldcp->rx_dring_handle, rxi, rxi, VIO_DESC_DONE); if (rv != 0) { DWARN(vgenp, ldcp, "vnet_dring_entry_set_dstate err rv(%d)\n", rv); return (rv); } if (ack_needed) { ack_needed = B_FALSE; /* * sender needs ack for this packet, * ack pkts upto this index. */ ack_end = rxi; rv = vgen_send_dringack(ldcp, tagp, ack_start, ack_end, VIO_DP_ACTIVE); if (rv != VGEN_SUCCESS) { goto error_ret; } /* need to set new ack start index */ set_ack_start = B_TRUE; } goto vgen_next_rxi; } nread = nbytes; rv = ldc_mem_copy(ldcp->ldc_handle, (caddr_t)mp->b_rptr, off, &nread, rxd.memcookie, rxd.ncookies, LDC_COPY_IN); /* if ldc_mem_copy() failed */ if (rv) { DWARN(vgenp, ldcp, "ldc_mem_copy err rv(%d)\n", rv); statsp->ierrors++; freemsg(mp); goto error_ret; } ack_needed = hdrp->ack; rv = vnet_dring_entry_set_dstate(&(ldcp->mrxdp[rxi]), ldcp->dring_mtype, ldcp->rx_dring_handle, rxi, rxi, VIO_DESC_DONE); if (rv != 0) { DWARN(vgenp, ldcp, "vnet_dring_entry_set_dstate err rv(%d)\n", rv); freemsg(mp); goto error_ret; } mp->b_rptr += VNET_IPALIGN; if (ack_needed) { ack_needed = B_FALSE; /* * sender needs ack for this packet, * ack pkts upto this index. */ ack_end = rxi; rv = vgen_send_dringack(ldcp, tagp, ack_start, ack_end, VIO_DP_ACTIVE); if (rv != VGEN_SUCCESS) { freemsg(mp); goto error_ret; } /* need to set new ack start index */ set_ack_start = B_TRUE; } if (nread != nbytes) { DWARN(vgenp, ldcp, "ldc_mem_copy nread(%lx), nbytes(%lx)\n", nread, nbytes); statsp->ierrors++; freemsg(mp); goto vgen_next_rxi; } /* point to the actual end of data */ mp->b_wptr = mp->b_rptr + rxd.nbytes; if (vmp != NULL) { vmp->state = VIO_MBLK_HAS_DATA; } /* update stats */ statsp->ipackets++; statsp->rbytes += rxd.nbytes; ehp = (struct ether_header *)mp->b_rptr; if (IS_BROADCAST(ehp)) statsp->brdcstrcv++; else if (IS_MULTICAST(ehp)) statsp->multircv++; /* build a chain of received packets */ if (bp == NULL) { /* first pkt */ bp = mp; bpt = bp; bpt->b_next = NULL; } else { mp->b_next = NULL; bpt->b_next = mp; bpt = mp; } if (count++ > vgen_chain_len) { DTRACE_PROBE1(vgen_rcv_msgs, int, count); vgen_rx(ldcp, bp, bpt); count = 0; bp = bpt = NULL; } vgen_next_rxi: /* update end index of range of descrs to be ack'd */ ack_end = rxi; /* update the next index to be processed */ INCR_RXI(next_rxi, ldcp); if (next_rxi == start) { /* * processed the entire descriptor ring upto * the index at which we started. */ break; } rxi = next_rxi; _NOTE(CONSTCOND) } while (1); /* * send an ack message to peer indicating that we have stopped * processing descriptors. */ if (set_ack_start) { /* * We have ack'd upto some index and we have not * processed any descriptors beyond that index. * Use the last ack'd index as both the start and * end of range of descrs being ack'd. * Note: This results in acking the last index twice * and should be harmless. */ ack_start = ack_end; } rv = vgen_send_dringack(ldcp, tagp, ack_start, ack_end, VIO_DP_STOPPED); if (rv != VGEN_SUCCESS) { goto error_ret; } /* save new recv index of next dring msg */ ldcp->next_rxi = next_rxi; error_ret: /* send up packets received so far */ if (bp != NULL) { DTRACE_PROBE1(vgen_rcv_msgs, int, count); vgen_rx(ldcp, bp, bpt); bp = bpt = NULL; } DBG1(vgenp, ldcp, "exit rv(%d)\n", rv); return (rv); } static int vgen_handle_dringdata_ack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) { int rv = 0; uint32_t start; int32_t end; uint32_t txi; boolean_t ready_txd = B_FALSE; vgen_stats_t *statsp; vgen_private_desc_t *tbufp; vnet_public_desc_t *txdp; vio_dring_entry_hdr_t *hdrp; vgen_t *vgenp = LDC_TO_VGEN(ldcp); vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp; DBG1(vgenp, ldcp, "enter\n"); start = dringmsg->start_idx; end = dringmsg->end_idx; statsp = &ldcp->stats; /* * received an ack corresponding to a specific descriptor for * which we had set the ACK bit in the descriptor (during * transmit). This enables us to reclaim descriptors. */ DBG2(vgenp, ldcp, "ACK: start(%d), end(%d)\n", start, end); /* validate start and end indexes in the tx ack msg */ if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) { /* drop the message if invalid index */ DWARN(vgenp, ldcp, "Invalid Tx ack start(%d) or end(%d)\n", start, end); return (rv); } /* validate dring_ident */ if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) { /* invalid dring_ident, drop the msg */ DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n", dringmsg->dring_ident); return (rv); } statsp->dring_data_acks_rcvd++; /* reclaim descriptors that are done */ vgen_reclaim(ldcp); if (dringmsg->dring_process_state != VIO_DP_STOPPED) { /* * receiver continued processing descriptors after * sending us the ack. */ return (rv); } statsp->dring_stopped_acks_rcvd++; /* receiver stopped processing descriptors */ mutex_enter(&ldcp->wrlock); mutex_enter(&ldcp->tclock); /* * determine if there are any pending tx descriptors * ready to be processed by the receiver(peer) and if so, * send a message to the peer to restart receiving. */ ready_txd = B_FALSE; /* * using the end index of the descriptor range for which * we received the ack, check if the next descriptor is * ready. */ txi = end; INCR_TXI(txi, ldcp); tbufp = &ldcp->tbufp[txi]; txdp = tbufp->descp; hdrp = &txdp->hdr; if (hdrp->dstate == VIO_DESC_READY) { ready_txd = B_TRUE; } else { /* * descr next to the end of ack'd descr range is not * ready. * starting from the current reclaim index, check * if any descriptor is ready. */ txi = ldcp->cur_tbufp - ldcp->tbufp; tbufp = &ldcp->tbufp[txi]; txdp = tbufp->descp; hdrp = &txdp->hdr; if (hdrp->dstate == VIO_DESC_READY) { ready_txd = B_TRUE; } } if (ready_txd) { /* * we have tx descriptor(s) ready to be * processed by the receiver. * send a message to the peer with the start index * of ready descriptors. */ rv = vgen_send_dringdata(ldcp, txi, -1); if (rv != VGEN_SUCCESS) { ldcp->resched_peer = B_TRUE; ldcp->resched_peer_txi = txi; mutex_exit(&ldcp->tclock); mutex_exit(&ldcp->wrlock); return (rv); } } else { /* * no ready tx descriptors. set the flag to send a * message to peer when tx descriptors are ready in * transmit routine. */ ldcp->resched_peer = B_TRUE; ldcp->resched_peer_txi = ldcp->cur_tbufp - ldcp->tbufp; } mutex_exit(&ldcp->tclock); mutex_exit(&ldcp->wrlock); DBG1(vgenp, ldcp, "exit rv(%d)\n", rv); return (rv); } static int vgen_handle_dringdata_nack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) { int rv = 0; uint32_t start; int32_t end; uint32_t txi; vnet_public_desc_t *txdp; vio_dring_entry_hdr_t *hdrp; vgen_t *vgenp = LDC_TO_VGEN(ldcp); vio_dring_msg_t *dringmsg = (vio_dring_msg_t *)tagp; DBG1(vgenp, ldcp, "enter\n"); start = dringmsg->start_idx; end = dringmsg->end_idx; /* * peer sent a NACK msg to indicate lost packets. * The start and end correspond to the range of descriptors * for which the peer didn't receive a dring data msg and so * didn't receive the corresponding data. */ DWARN(vgenp, ldcp, "NACK: start(%d), end(%d)\n", start, end); /* validate start and end indexes in the tx nack msg */ if (!(CHECK_TXI(start, ldcp)) || !(CHECK_TXI(end, ldcp))) { /* drop the message if invalid index */ DWARN(vgenp, ldcp, "Invalid Tx nack start(%d) or end(%d)\n", start, end); return (rv); } /* validate dring_ident */ if (dringmsg->dring_ident != ldcp->local_hparams.dring_ident) { /* invalid dring_ident, drop the msg */ DWARN(vgenp, ldcp, "Invalid dring ident 0x%x\n", dringmsg->dring_ident); return (rv); } mutex_enter(&ldcp->txlock); mutex_enter(&ldcp->tclock); if (ldcp->next_tbufp == ldcp->cur_tbufp) { /* no busy descriptors, bogus nack ? */ mutex_exit(&ldcp->tclock); mutex_exit(&ldcp->txlock); return (rv); } /* we just mark the descrs as done so they can be reclaimed */ for (txi = start; txi <= end; ) { txdp = &(ldcp->txdp[txi]); hdrp = &txdp->hdr; if (hdrp->dstate == VIO_DESC_READY) hdrp->dstate = VIO_DESC_DONE; INCR_TXI(txi, ldcp); } mutex_exit(&ldcp->tclock); mutex_exit(&ldcp->txlock); DBG1(vgenp, ldcp, "exit rv(%d)\n", rv); return (rv); } /* * Send received packets up the stack. */ static void vgen_rx(vgen_ldc_t *ldcp, mblk_t *bp, mblk_t *bpt) { vio_net_rx_cb_t vrx_cb = ldcp->portp->vcb.vio_net_rx_cb; vgen_t *vgenp = LDC_TO_VGEN(ldcp); if (ldcp->msg_thread != NULL) { ASSERT(MUTEX_HELD(&ldcp->rxlock)); } else { ASSERT(MUTEX_HELD(&ldcp->cblock)); } mutex_enter(&ldcp->pollq_lock); if (ldcp->polling_on == B_TRUE) { /* * If we are in polling mode, simply queue * the packets onto the poll queue and return. */ if (ldcp->pollq_headp == NULL) { ldcp->pollq_headp = bp; ldcp->pollq_tailp = bpt; } else { ldcp->pollq_tailp->b_next = bp; ldcp->pollq_tailp = bpt; } mutex_exit(&ldcp->pollq_lock); return; } /* * Prepend any pending mblks in the poll queue, now that we * are in interrupt mode, before sending up the chain of pkts. */ if (ldcp->pollq_headp != NULL) { DBG2(vgenp, ldcp, "vgen_rx(%lx), pending pollq_headp\n", (uintptr_t)ldcp); ldcp->pollq_tailp->b_next = bp; bp = ldcp->pollq_headp; ldcp->pollq_headp = ldcp->pollq_tailp = NULL; } mutex_exit(&ldcp->pollq_lock); if (ldcp->msg_thread != NULL) { mutex_exit(&ldcp->rxlock); } else { mutex_exit(&ldcp->cblock); } /* Send up the packets */ vrx_cb(ldcp->portp->vhp, bp); if (ldcp->msg_thread != NULL) { mutex_enter(&ldcp->rxlock); } else { mutex_enter(&ldcp->cblock); } } static void vgen_reclaim(vgen_ldc_t *ldcp) { mutex_enter(&ldcp->tclock); vgen_reclaim_dring(ldcp); ldcp->reclaim_lbolt = ddi_get_lbolt(); mutex_exit(&ldcp->tclock); } /* * transmit reclaim function. starting from the current reclaim index * look for descriptors marked DONE and reclaim the descriptor. */ static void vgen_reclaim_dring(vgen_ldc_t *ldcp) { int count = 0; vnet_public_desc_t *txdp; vgen_private_desc_t *tbufp; vio_dring_entry_hdr_t *hdrp; tbufp = ldcp->cur_tbufp; txdp = tbufp->descp; hdrp = &txdp->hdr; while ((hdrp->dstate == VIO_DESC_DONE) && (tbufp != ldcp->next_tbufp)) { tbufp->flags = VGEN_PRIV_DESC_FREE; hdrp->dstate = VIO_DESC_FREE; hdrp->ack = B_FALSE; tbufp = NEXTTBUF(ldcp, tbufp); txdp = tbufp->descp; hdrp = &txdp->hdr; count++; } ldcp->cur_tbufp = tbufp; /* * Check if mac layer should be notified to restart transmissions */ if ((ldcp->tx_blocked) && (count > 0)) { vio_net_tx_update_t vtx_update = ldcp->portp->vcb.vio_net_tx_update; ldcp->tx_blocked = B_FALSE; vtx_update(ldcp->portp->vhp); } } /* * Send descriptor ring data message to the peer over ldc. */ static int vgen_send_dringdata(vgen_ldc_t *ldcp, uint32_t start, int32_t end) { vgen_t *vgenp = LDC_TO_VGEN(ldcp); vio_dring_msg_t dringmsg, *msgp = &dringmsg; vio_msg_tag_t *tagp = &msgp->tag; vgen_stats_t *statsp = &ldcp->stats; int rv; #ifdef DEBUG if (vgen_inject_error(ldcp, VGEN_ERR_TXTIMEOUT)) { return (VGEN_SUCCESS); } #endif bzero(msgp, sizeof (*msgp)); tagp->vio_msgtype = VIO_TYPE_DATA; tagp->vio_subtype = VIO_SUBTYPE_INFO; tagp->vio_subtype_env = VIO_DRING_DATA; tagp->vio_sid = ldcp->local_sid; msgp->dring_ident = ldcp->local_hparams.dring_ident; msgp->start_idx = start; msgp->end_idx = end; rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (dringmsg), B_TRUE); if (rv != VGEN_SUCCESS) { DWARN(vgenp, ldcp, "vgen_sendmsg failed\n"); return (rv); } statsp->dring_data_msgs_sent++; DBG2(vgenp, ldcp, "DRING_DATA_SENT \n"); return (VGEN_SUCCESS); } /* * Send dring data ack message. */ static int vgen_send_dringack(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp, uint32_t start, int32_t end, uint8_t pstate) { int rv = 0; vgen_t *vgenp = LDC_TO_VGEN(ldcp); vio_dring_msg_t *msgp = (vio_dring_msg_t *)tagp; vgen_stats_t *statsp = &ldcp->stats; tagp->vio_msgtype = VIO_TYPE_DATA; tagp->vio_subtype = VIO_SUBTYPE_ACK; tagp->vio_subtype_env = VIO_DRING_DATA; tagp->vio_sid = ldcp->local_sid; msgp->start_idx = start; msgp->end_idx = end; msgp->dring_process_state = pstate; rv = vgen_sendmsg(ldcp, (caddr_t)tagp, sizeof (*msgp), B_FALSE); if (rv != VGEN_SUCCESS) { DWARN(vgenp, ldcp, "vgen_sendmsg() failed\n"); } statsp->dring_data_acks_sent++; if (pstate == VIO_DP_STOPPED) { statsp->dring_stopped_acks_sent++; } return (rv); } /* * Wrapper routine to send the given message over ldc using ldc_write(). */ int vgen_sendmsg(vgen_ldc_t *ldcp, caddr_t msg, size_t msglen, boolean_t caller_holds_lock) { int rv; size_t len; uint32_t retries = 0; vgen_t *vgenp = LDC_TO_VGEN(ldcp); vio_msg_tag_t *tagp = (vio_msg_tag_t *)msg; vio_dring_msg_t *dmsg; vio_raw_data_msg_t *rmsg; boolean_t data_msg = B_FALSE; len = msglen; if ((len == 0) || (msg == NULL)) return (VGEN_FAILURE); if (!caller_holds_lock) { mutex_enter(&ldcp->wrlock); } if (tagp->vio_subtype == VIO_SUBTYPE_INFO) { if (tagp->vio_subtype_env == VIO_DRING_DATA) { dmsg = (vio_dring_msg_t *)tagp; dmsg->seq_num = ldcp->next_txseq; data_msg = B_TRUE; } else if (tagp->vio_subtype_env == VIO_PKT_DATA) { rmsg = (vio_raw_data_msg_t *)tagp; rmsg->seq_num = ldcp->next_txseq; data_msg = B_TRUE; } } do { len = msglen; rv = ldc_write(ldcp->ldc_handle, (caddr_t)msg, &len); if (retries++ >= vgen_ldcwr_retries) break; } while (rv == EWOULDBLOCK); if (rv == 0 && data_msg == B_TRUE) { ldcp->next_txseq++; } if (!caller_holds_lock) { mutex_exit(&ldcp->wrlock); } if (rv != 0) { DWARN(vgenp, ldcp, "ldc_write failed: rv(%d)\n", rv, msglen); return (rv); } if (len != msglen) { DWARN(vgenp, ldcp, "ldc_write failed: rv(%d) msglen (%d)\n", rv, msglen); return (VGEN_FAILURE); } return (VGEN_SUCCESS); } int vgen_check_datamsg_seq(vgen_ldc_t *ldcp, vio_msg_tag_t *tagp) { vio_raw_data_msg_t *rmsg; vio_dring_msg_t *dmsg; uint64_t seq_num; vgen_t *vgenp = LDC_TO_VGEN(ldcp); if (tagp->vio_subtype_env == VIO_DRING_DATA) { dmsg = (vio_dring_msg_t *)tagp; seq_num = dmsg->seq_num; } else if (tagp->vio_subtype_env == VIO_PKT_DATA) { rmsg = (vio_raw_data_msg_t *)tagp; seq_num = rmsg->seq_num; } else { return (EINVAL); } if (seq_num != ldcp->next_rxseq) { /* seqnums don't match */ DWARN(vgenp, ldcp, "next_rxseq(0x%lx) != seq_num(0x%lx)\n", ldcp->next_rxseq, seq_num); return (EINVAL); } ldcp->next_rxseq++; return (0); } /* * vgen_ldc_msg_worker -- A per LDC worker thread. This thread is woken up by * the LDC interrupt handler to process LDC packets and receive data. */ void vgen_ldc_msg_worker(void *arg) { callb_cpr_t cprinfo; vgen_ldc_t *ldcp = (vgen_ldc_t *)arg; vgen_t *vgenp = LDC_TO_VGEN(ldcp); int rv; DBG1(vgenp, ldcp, "enter\n"); CALLB_CPR_INIT(&cprinfo, &ldcp->msg_thr_lock, callb_generic_cpr, "vnet_rcv_thread"); mutex_enter(&ldcp->msg_thr_lock); while (!(ldcp->msg_thr_flags & VGEN_WTHR_STOP)) { CALLB_CPR_SAFE_BEGIN(&cprinfo); /* * Wait until the data is received or a stop * request is received. */ while (!(ldcp->msg_thr_flags & (VGEN_WTHR_DATARCVD | VGEN_WTHR_STOP))) { cv_wait(&ldcp->msg_thr_cv, &ldcp->msg_thr_lock); } CALLB_CPR_SAFE_END(&cprinfo, &ldcp->msg_thr_lock) /* * First process the stop request. */ if (ldcp->msg_thr_flags & VGEN_WTHR_STOP) { DBG2(vgenp, ldcp, "stopped\n"); break; } ldcp->msg_thr_flags &= ~VGEN_WTHR_DATARCVD; ldcp->msg_thr_flags |= VGEN_WTHR_PROCESSING; mutex_exit(&ldcp->msg_thr_lock); DBG2(vgenp, ldcp, "calling vgen_handle_evt_read\n"); rv = vgen_handle_evt_read(ldcp, VGEN_MSG_THR); mutex_enter(&ldcp->msg_thr_lock); ldcp->msg_thr_flags &= ~VGEN_WTHR_PROCESSING; if (rv != 0) { /* * Channel has been reset. The thread should now exit. * The thread may be recreated if TxDring is negotiated * on this channel after the channel comes back up * again. */ ldcp->msg_thr_flags |= VGEN_WTHR_STOP; break; } } /* * Update the run status and wakeup the thread that * has sent the stop request. */ ldcp->msg_thr_flags &= ~VGEN_WTHR_STOP; ldcp->msg_thread = NULL; CALLB_CPR_EXIT(&cprinfo); thread_exit(); DBG1(vgenp, ldcp, "exit\n"); } /* vgen_stop_msg_thread -- Co-ordinate with receive thread to stop it */ void vgen_stop_msg_thread(vgen_ldc_t *ldcp) { kt_did_t tid = 0; vgen_t *vgenp = LDC_TO_VGEN(ldcp); DBG1(vgenp, ldcp, "enter\n"); /* * Send a stop request by setting the stop flag and * wait until the receive thread stops. */ mutex_enter(&ldcp->msg_thr_lock); if (ldcp->msg_thread != NULL) { tid = ldcp->msg_thread->t_did; ldcp->msg_thr_flags |= VGEN_WTHR_STOP; cv_signal(&ldcp->msg_thr_cv); } mutex_exit(&ldcp->msg_thr_lock); if (tid != 0) { thread_join(tid); } DBG1(vgenp, ldcp, "exit\n"); }