cxgb_cpl_io.c revision 183059
1129198Scognet/************************************************************************** 2129198Scognet 3139735SimpCopyright (c) 2007-2008, Chelsio Inc. 4129198ScognetAll rights reserved. 5129198Scognet 6129198ScognetRedistribution and use in source and binary forms, with or without 7129198Scognetmodification, are permitted provided that the following conditions are met: 8129198Scognet 9129198Scognet 1. Redistributions of source code must retain the above copyright notice, 10129198Scognet this list of conditions and the following disclaimer. 11129198Scognet 12129198Scognet 2. Neither the name of the Chelsio Corporation nor the names of its 13129198Scognet contributors may be used to endorse or promote products derived from 14129198Scognet this software without specific prior written permission. 15129198Scognet 16129198ScognetTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 17129198ScognetAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18129198ScognetIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19129198ScognetARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 20129198ScognetLIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21129198ScognetCONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22129198ScognetSUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23129198ScognetINTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24129198ScognetCONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25129198ScognetARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26129198ScognetPOSSIBILITY OF SUCH DAMAGE. 27129198Scognet 28129198Scognet***************************************************************************/ 29129198Scognet 30129198Scognet#include <sys/cdefs.h> 31129198Scognet__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/tom/cxgb_cpl_io.c 183059 2008-09-16 01:02:17Z kmacy $"); 32129198Scognet 33129198Scognet#include <sys/param.h> 34129198Scognet#include <sys/systm.h> 35129198Scognet#include <sys/fcntl.h> 36129198Scognet#include <sys/kernel.h> 37129198Scognet#include <sys/limits.h> 38129198Scognet#include <sys/ktr.h> 39129198Scognet#include <sys/lock.h> 40129198Scognet#include <sys/mbuf.h> 41129198Scognet#include <sys/mutex.h> 42129198Scognet#include <sys/sockstate.h> 43129198Scognet#include <sys/sockopt.h> 44129198Scognet#include <sys/socket.h> 45129198Scognet#include <sys/sockbuf.h> 46129198Scognet#include <sys/sysctl.h> 47129198Scognet#include <sys/syslog.h> 48129198Scognet#include <sys/protosw.h> 49129198Scognet#include <sys/priv.h> 50129198Scognet#include <sys/vimage.h> 51129198Scognet 52129198Scognet#include <net/if.h> 53129198Scognet#include <net/route.h> 54129198Scognet 55129198Scognet#include <netinet/in.h> 56129198Scognet#include <netinet/in_pcb.h> 57129198Scognet#include <netinet/in_systm.h> 58273827Sandrew#include <netinet/in_var.h> 59129198Scognet 60129198Scognet 61129198Scognet#include <dev/cxgb/cxgb_osdep.h> 62129198Scognet#include <dev/cxgb/sys/mbufq.h> 63129198Scognet 64129198Scognet#include <netinet/ip.h> 65129198Scognet#include <netinet/tcp_var.h> 66129198Scognet#include <netinet/tcp_fsm.h> 67129198Scognet#include <netinet/tcp_offload.h> 68129198Scognet#include <netinet/tcp_seq.h> 69129198Scognet#include <netinet/tcp_syncache.h> 70129198Scognet#include <netinet/tcp_timer.h> 71129198Scognet#include <net/route.h> 72129198Scognet 73129198Scognet#include <dev/cxgb/t3cdev.h> 74129198Scognet#include <dev/cxgb/common/cxgb_firmware_exports.h> 75129198Scognet#include <dev/cxgb/common/cxgb_t3_cpl.h> 76129198Scognet#include <dev/cxgb/common/cxgb_tcb.h> 77129198Scognet#include <dev/cxgb/common/cxgb_ctl_defs.h> 78129198Scognet#include <dev/cxgb/cxgb_offload.h> 79129198Scognet#include <vm/vm.h> 80129198Scognet#include <vm/pmap.h> 81129198Scognet#include <machine/bus.h> 82129198Scognet#include <dev/cxgb/sys/mvec.h> 83129198Scognet#include <dev/cxgb/ulp/toecore/cxgb_toedev.h> 84129198Scognet#include <dev/cxgb/ulp/tom/cxgb_defs.h> 85129198Scognet#include <dev/cxgb/ulp/tom/cxgb_tom.h> 86129198Scognet#include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h> 87129198Scognet#include <dev/cxgb/ulp/tom/cxgb_toepcb.h> 88129198Scognet#include <dev/cxgb/ulp/tom/cxgb_tcp.h> 89129198Scognet 90129198Scognet#include <dev/cxgb/ulp/tom/cxgb_tcp_offload.h> 91129198Scognet 92129198Scognet/* 93129198Scognet * For ULP connections HW may add headers, e.g., for digests, that aren't part 94129198Scognet * of the messages sent by the host but that are part of the TCP payload and 95129198Scognet * therefore consume TCP sequence space. Tx connection parameters that 96129198Scognet * operate in TCP sequence space are affected by the HW additions and need to 97129198Scognet * compensate for them to accurately track TCP sequence numbers. This array 98129198Scognet * contains the compensating extra lengths for ULP packets. It is indexed by 99129198Scognet * a packet's ULP submode. 100129198Scognet */ 101129198Scognetconst unsigned int t3_ulp_extra_len[] = {0, 4, 4, 8}; 102129198Scognet 103129198Scognet#ifdef notyet 104129198Scognet/* 105129198Scognet * This sk_buff holds a fake header-only TCP segment that we use whenever we 106129198Scognet * need to exploit SW TCP functionality that expects TCP headers, such as 107129198Scognet * tcp_create_openreq_child(). It's a RO buffer that may be used by multiple 108129198Scognet * CPUs without locking. 109129198Scognet */ 110129198Scognetstatic struct mbuf *tcphdr_mbuf __read_mostly; 111129198Scognet#endif 112129198Scognet 113129198Scognet/* 114129198Scognet * Size of WRs in bytes. Note that we assume all devices we are handling have 115129198Scognet * the same WR size. 116129198Scognet */ 117129198Scognetstatic unsigned int wrlen __read_mostly; 118129198Scognet 119129198Scognet/* 120129198Scognet * The number of WRs needed for an skb depends on the number of page fragments 121129198Scognet * in the skb and whether it has any payload in its main body. This maps the 122129198Scognet * length of the gather list represented by an skb into the # of necessary WRs. 123129198Scognet */ 124129198Scognetstatic unsigned int mbuf_wrs[TX_MAX_SEGS + 1] __read_mostly; 125129198Scognet 126129198Scognet/* 127129198Scognet * Max receive window supported by HW in bytes. Only a small part of it can 128129198Scognet * be set through option0, the rest needs to be set through RX_DATA_ACK. 129129198Scognet */ 130129198Scognet#define MAX_RCV_WND ((1U << 27) - 1) 131129198Scognet 132129198Scognet/* 133129198Scognet * Min receive window. We want it to be large enough to accommodate receive 134273827Sandrew * coalescing, handle jumbo frames, and not trigger sender SWS avoidance. 135239687Sgonzo */ 136239687Sgonzo#define MIN_RCV_WND (24 * 1024U) 137239687Sgonzo#define INP_TOS(inp) ((inp_ip_tos_get(inp) >> 2) & M_TOS) 138239687Sgonzo 139239687Sgonzo#define VALIDATE_SEQ 0 140239687Sgonzo#define VALIDATE_SOCK(so) 141239687Sgonzo#define DEBUG_WR 0 142239687Sgonzo 143239687Sgonzo#define TCP_TIMEWAIT 1 144239687Sgonzo#define TCP_CLOSE 2 145129198Scognet#define TCP_DROP 3 146236991Simp 147129198Scognetextern int tcp_do_autorcvbuf; 148236991Simpextern int tcp_do_autosndbuf; 149129198Scognetextern int tcp_autorcvbuf_max; 150129198Scognetextern int tcp_autosndbuf_max; 151129198Scognet 152129198Scognetstatic void t3_send_reset(struct toepcb *toep); 153129198Scognetstatic void send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status); 154129198Scognetstatic inline void free_atid(struct t3cdev *cdev, unsigned int tid); 155129198Scognetstatic void handle_syncache_event(int event, void *arg); 156129198Scognet 157129198Scognetstatic inline void 158129198ScognetSBAPPEND(struct sockbuf *sb, struct mbuf *n) 159129198Scognet{ 160129198Scognet struct mbuf *m; 161129198Scognet 162129198Scognet m = sb->sb_mb; 163129198Scognet while (m) { 164129198Scognet KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || 165129198Scognet !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n", 166129198Scognet !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len)); 167129198Scognet KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x", 168129198Scognet m->m_next, m->m_nextpkt, m->m_flags)); 169129198Scognet m = m->m_next; 170129198Scognet } 171129198Scognet m = n; 172129198Scognet while (m) { 173129198Scognet KASSERT(((m->m_flags & M_EXT) && (m->m_ext.ext_type == EXT_EXTREF)) || 174129198Scognet !(m->m_flags & M_EXT), ("unexpected type M_EXT=%d ext_type=%d m_len=%d\n", 175129198Scognet !!(m->m_flags & M_EXT), m->m_ext.ext_type, m->m_len)); 176129198Scognet KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x", 177129198Scognet m->m_next, m->m_nextpkt, m->m_flags)); 178129198Scognet m = m->m_next; 179129198Scognet } 180129198Scognet KASSERT(sb->sb_flags & SB_NOCOALESCE, ("NOCOALESCE not set")); 181129198Scognet sbappendstream_locked(sb, n); 182129198Scognet m = sb->sb_mb; 183129198Scognet 184129198Scognet while (m) { 185129198Scognet KASSERT(m->m_next != (struct mbuf *)0xffffffff, ("bad next value m_next=%p m_nextpkt=%p m_flags=0x%x", 186129198Scognet m->m_next, m->m_nextpkt, m->m_flags)); 187129198Scognet m = m->m_next; 188129198Scognet } 189129198Scognet} 190129198Scognet 191129198Scognetstatic inline int 192129198Scognetis_t3a(const struct toedev *dev) 193129198Scognet{ 194129198Scognet return (dev->tod_ttid == TOE_ID_CHELSIO_T3); 195129198Scognet} 196129198Scognet 197129198Scognetstatic void 198129198Scognetdump_toepcb(struct toepcb *toep) 199129198Scognet{ 200129198Scognet DPRINTF("qset_idx=%d qset=%d ulp_mode=%d mtu_idx=%d tid=%d\n", 201129198Scognet toep->tp_qset_idx, toep->tp_qset, toep->tp_ulp_mode, 202129198Scognet toep->tp_mtu_idx, toep->tp_tid); 203129198Scognet 204129198Scognet DPRINTF("wr_max=%d wr_avail=%d wr_unacked=%d mss_clamp=%d flags=0x%x\n", 205129198Scognet toep->tp_wr_max, toep->tp_wr_avail, toep->tp_wr_unacked, 206129198Scognet toep->tp_mss_clamp, toep->tp_flags); 207129198Scognet} 208129198Scognet 209129198Scognet#ifndef RTALLOC2_DEFINED 210129198Scognetstatic struct rtentry * 211129198Scognetrtalloc2(struct sockaddr *dst, int report, u_long ignflags) 212129198Scognet{ 213129198Scognet struct rtentry *rt = NULL; 214129198Scognet 215129198Scognet if ((rt = rtalloc1(dst, report, ignflags)) != NULL) 216129198Scognet RT_UNLOCK(rt); 217129198Scognet 218129198Scognet return (rt); 219129198Scognet} 220129198Scognet#endif 221129198Scognet 222129198Scognet/* 223129198Scognet * Determine whether to send a CPL message now or defer it. A message is 224129198Scognet * deferred if the connection is in SYN_SENT since we don't know the TID yet. 225129198Scognet * For connections in other states the message is sent immediately. 226129198Scognet * If through_l2t is set the message is subject to ARP processing, otherwise 227129198Scognet * it is sent directly. 228129198Scognet */ 229129198Scognetstatic inline void 230129198Scognetsend_or_defer(struct toepcb *toep, struct mbuf *m, int through_l2t) 231129198Scognet{ 232129198Scognet struct tcpcb *tp = toep->tp_tp; 233129198Scognet 234129198Scognet if (__predict_false(tp->t_state == TCPS_SYN_SENT)) { 235129198Scognet inp_wlock(tp->t_inpcb); 236129198Scognet mbufq_tail(&toep->out_of_order_queue, m); // defer 237129198Scognet inp_wunlock(tp->t_inpcb); 238129198Scognet } else if (through_l2t) 239129198Scognet l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t); // send through L2T 240129198Scognet else 241129198Scognet cxgb_ofld_send(TOEP_T3C_DEV(toep), m); // send directly 242129198Scognet} 243129198Scognet 244129198Scognetstatic inline unsigned int 245129198Scognetmkprio(unsigned int cntrl, const struct toepcb *toep) 246129198Scognet{ 247129198Scognet return (cntrl); 248129198Scognet} 249129198Scognet 250129198Scognet/* 251129198Scognet * Populate a TID_RELEASE WR. The skb must be already propely sized. 252129198Scognet */ 253129198Scognetstatic inline void 254129198Scognetmk_tid_release(struct mbuf *m, const struct toepcb *toep, unsigned int tid) 255129198Scognet{ 256129198Scognet struct cpl_tid_release *req; 257129198Scognet 258129198Scognet m_set_priority(m, mkprio(CPL_PRIORITY_SETUP, toep)); 259129198Scognet m->m_pkthdr.len = m->m_len = sizeof(*req); 260129198Scognet req = mtod(m, struct cpl_tid_release *); 261129198Scognet req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 262129198Scognet req->wr.wr_lo = 0; 263129198Scognet OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_TID_RELEASE, tid)); 264129198Scognet} 265129198Scognet 266129198Scognetstatic inline void 267129198Scognetmake_tx_data_wr(struct socket *so, struct mbuf *m, int len, struct mbuf *tail) 268129198Scognet{ 269129198Scognet struct tcpcb *tp = so_sototcpcb(so); 270129198Scognet struct toepcb *toep = tp->t_toe; 271129198Scognet struct tx_data_wr *req; 272129198Scognet struct sockbuf *snd; 273129198Scognet 274129198Scognet inp_lock_assert(tp->t_inpcb); 275129198Scognet snd = so_sockbuf_snd(so); 276129198Scognet 277129198Scognet req = mtod(m, struct tx_data_wr *); 278129198Scognet m->m_len = sizeof(*req); 279129198Scognet req->wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_TX_DATA)); 280129198Scognet req->wr_lo = htonl(V_WR_TID(toep->tp_tid)); 281129198Scognet /* len includes the length of any HW ULP additions */ 282129198Scognet req->len = htonl(len); 283129198Scognet req->param = htonl(V_TX_PORT(toep->tp_l2t->smt_idx)); 284129198Scognet /* V_TX_ULP_SUBMODE sets both the mode and submode */ 285129198Scognet req->flags = htonl(V_TX_ULP_SUBMODE(/*skb_ulp_mode(skb)*/ 0) | 286129198Scognet V_TX_URG(/* skb_urgent(skb) */ 0 ) | 287129198Scognet V_TX_SHOVE((!(tp->t_flags & TF_MORETOCOME) && 288129198Scognet (tail ? 0 : 1)))); 289129198Scognet req->sndseq = htonl(tp->snd_nxt); 290129198Scognet if (__predict_false((toep->tp_flags & TP_DATASENT) == 0)) { 291129198Scognet req->flags |= htonl(V_TX_ACK_PAGES(2) | F_TX_INIT | 292279467Sdim V_TX_CPU_IDX(toep->tp_qset)); 293129198Scognet 294129198Scognet /* Sendbuffer is in units of 32KB. 295129198Scognet */ 296129198Scognet if (V_tcp_do_autosndbuf && snd->sb_flags & SB_AUTOSIZE) 297129198Scognet req->param |= htonl(V_TX_SNDBUF(V_tcp_autosndbuf_max >> 15)); 298129198Scognet else { 299129198Scognet req->param |= htonl(V_TX_SNDBUF(snd->sb_hiwat >> 15)); 300129198Scognet } 301129198Scognet 302129198Scognet toep->tp_flags |= TP_DATASENT; 303129198Scognet } 304129198Scognet} 305129198Scognet 306129198Scognet#define IMM_LEN 64 /* XXX - see WR_LEN in the cxgb driver */ 307129198Scognet 308129198Scognetint 309129198Scognett3_push_frames(struct socket *so, int req_completion) 310129198Scognet{ 311129198Scognet struct tcpcb *tp = so_sototcpcb(so); 312129198Scognet struct toepcb *toep = tp->t_toe; 313129198Scognet 314129198Scognet struct mbuf *tail, *m0, *last; 315129198Scognet struct t3cdev *cdev; 316129198Scognet struct tom_data *d; 317129198Scognet int state, bytes, count, total_bytes; 318129198Scognet bus_dma_segment_t segs[TX_MAX_SEGS], *segp; 319129198Scognet struct sockbuf *snd; 320129198Scognet 321129198Scognet if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_CLOSED) { 322129198Scognet DPRINTF("tcp state=%d\n", tp->t_state); 323129198Scognet return (0); 324129198Scognet } 325129198Scognet 326129198Scognet state = so_state_get(so); 327129198Scognet 328129198Scognet if (state & (SS_ISDISCONNECTING|SS_ISDISCONNECTED)) { 329129198Scognet DPRINTF("disconnecting\n"); 330129198Scognet 331129198Scognet return (0); 332129198Scognet } 333129198Scognet 334129198Scognet inp_lock_assert(tp->t_inpcb); 335129198Scognet 336129198Scognet snd = so_sockbuf_snd(so); 337129198Scognet sockbuf_lock(snd); 338129198Scognet 339129198Scognet d = TOM_DATA(toep->tp_toedev); 340129198Scognet cdev = d->cdev; 341129198Scognet 342129198Scognet last = tail = snd->sb_sndptr ? snd->sb_sndptr : snd->sb_mb; 343129198Scognet 344236991Simp total_bytes = 0; 345129198Scognet DPRINTF("wr_avail=%d tail=%p snd.cc=%d tp_last=%p\n", 346129198Scognet toep->tp_wr_avail, tail, snd->sb_cc, toep->tp_m_last); 347129198Scognet 348129198Scognet if (last && toep->tp_m_last == last && snd->sb_sndptroff != 0) { 349129198Scognet KASSERT(tail, ("sbdrop error")); 350129198Scognet last = tail = tail->m_next; 351129198Scognet } 352129198Scognet 353129198Scognet if ((toep->tp_wr_avail == 0 ) || (tail == NULL)) { 354129198Scognet DPRINTF("wr_avail=%d tail=%p\n", toep->tp_wr_avail, tail); 355129198Scognet sockbuf_unlock(snd); 356129198Scognet 357129198Scognet return (0); 358129198Scognet } 359129198Scognet 360129198Scognet toep->tp_m_last = NULL; 361129198Scognet while (toep->tp_wr_avail && (tail != NULL)) { 362129198Scognet count = bytes = 0; 363129198Scognet segp = segs; 364129198Scognet if ((m0 = m_gethdr(M_NOWAIT, MT_DATA)) == NULL) { 365129198Scognet sockbuf_unlock(snd); 366129198Scognet return (0); 367129198Scognet } 368129198Scognet /* 369129198Scognet * If the data in tail fits as in-line, then 370129198Scognet * make an immediate data wr. 371129198Scognet */ 372129198Scognet if (tail->m_len <= IMM_LEN) { 373129198Scognet count = 1; 374129198Scognet bytes = tail->m_len; 375129198Scognet last = tail; 376129198Scognet tail = tail->m_next; 377129198Scognet m_set_sgl(m0, NULL); 378129198Scognet m_set_sgllen(m0, 0); 379129198Scognet make_tx_data_wr(so, m0, bytes, tail); 380129198Scognet m_append(m0, bytes, mtod(last, caddr_t)); 381129198Scognet KASSERT(!m0->m_next, ("bad append")); 382129198Scognet } else { 383129198Scognet while ((mbuf_wrs[count + 1] <= toep->tp_wr_avail) 384129198Scognet && (tail != NULL) && (count < TX_MAX_SEGS-1)) { 385129198Scognet bytes += tail->m_len; 386129198Scognet last = tail; 387129198Scognet count++; 388129198Scognet /* 389129198Scognet * technically an abuse to be using this for a VA 390129198Scognet * but less gross than defining my own structure 391129198Scognet * or calling pmap_kextract from here :-| 392129198Scognet */ 393129198Scognet segp->ds_addr = (bus_addr_t)tail->m_data; 394129198Scognet segp->ds_len = tail->m_len; 395129198Scognet DPRINTF("count=%d wr_needed=%d ds_addr=%p ds_len=%d\n", 396129198Scognet count, mbuf_wrs[count], tail->m_data, tail->m_len); 397129198Scognet segp++; 398129198Scognet tail = tail->m_next; 399129198Scognet } 400129198Scognet DPRINTF("wr_avail=%d mbuf_wrs[%d]=%d tail=%p\n", 401129198Scognet toep->tp_wr_avail, count, mbuf_wrs[count], tail); 402129198Scognet 403129198Scognet m_set_sgl(m0, segs); 404129198Scognet m_set_sgllen(m0, count); 405129198Scognet make_tx_data_wr(so, m0, bytes, tail); 406129198Scognet } 407129198Scognet m_set_priority(m0, mkprio(CPL_PRIORITY_DATA, toep)); 408129198Scognet 409129198Scognet if (tail) { 410129198Scognet snd->sb_sndptr = tail; 411129198Scognet toep->tp_m_last = NULL; 412129198Scognet } else 413129198Scognet toep->tp_m_last = snd->sb_sndptr = last; 414129198Scognet 415129198Scognet 416129198Scognet DPRINTF("toep->tp_m_last=%p\n", toep->tp_m_last); 417129198Scognet 418129198Scognet snd->sb_sndptroff += bytes; 419129198Scognet total_bytes += bytes; 420129198Scognet toep->tp_write_seq += bytes; 421129198Scognet CTR6(KTR_TOM, "t3_push_frames: wr_avail=%d mbuf_wrs[%d]=%d" 422129198Scognet " tail=%p sndptr=%p sndptroff=%d", 423129198Scognet toep->tp_wr_avail, count, mbuf_wrs[count], 424129198Scognet tail, snd->sb_sndptr, snd->sb_sndptroff); 425129198Scognet if (tail) 426129198Scognet CTR4(KTR_TOM, "t3_push_frames: total_bytes=%d" 427129198Scognet " tp_m_last=%p tailbuf=%p snd_una=0x%08x", 428129198Scognet total_bytes, toep->tp_m_last, tail->m_data, 429129198Scognet tp->snd_una); 430129198Scognet else 431129198Scognet CTR3(KTR_TOM, "t3_push_frames: total_bytes=%d" 432129198Scognet " tp_m_last=%p snd_una=0x%08x", 433129198Scognet total_bytes, toep->tp_m_last, tp->snd_una); 434129198Scognet 435129198Scognet 436129198Scognet#ifdef KTR 437129198Scognet{ 438129198Scognet int i; 439129198Scognet 440129198Scognet i = 0; 441129198Scognet while (i < count && m_get_sgllen(m0)) { 442129198Scognet if ((count - i) >= 3) { 443129198Scognet CTR6(KTR_TOM, 444129198Scognet "t3_push_frames: pa=0x%zx len=%d pa=0x%zx" 445129198Scognet " len=%d pa=0x%zx len=%d", 446129198Scognet segs[i].ds_addr, segs[i].ds_len, 447129198Scognet segs[i + 1].ds_addr, segs[i + 1].ds_len, 448129198Scognet segs[i + 2].ds_addr, segs[i + 2].ds_len); 449129198Scognet i += 3; 450129198Scognet } else if ((count - i) == 2) { 451129198Scognet CTR4(KTR_TOM, 452129198Scognet "t3_push_frames: pa=0x%zx len=%d pa=0x%zx" 453129198Scognet " len=%d", 454129198Scognet segs[i].ds_addr, segs[i].ds_len, 455129198Scognet segs[i + 1].ds_addr, segs[i + 1].ds_len); 456129198Scognet i += 2; 457129198Scognet } else { 458129198Scognet CTR2(KTR_TOM, "t3_push_frames: pa=0x%zx len=%d", 459129198Scognet segs[i].ds_addr, segs[i].ds_len); 460129198Scognet i++; 461129198Scognet } 462129198Scognet 463129198Scognet } 464129198Scognet} 465129198Scognet#endif 466129198Scognet /* 467129198Scognet * remember credits used 468129198Scognet */ 469129198Scognet m0->m_pkthdr.csum_data = mbuf_wrs[count]; 470129198Scognet m0->m_pkthdr.len = bytes; 471129198Scognet toep->tp_wr_avail -= mbuf_wrs[count]; 472129198Scognet toep->tp_wr_unacked += mbuf_wrs[count]; 473129198Scognet 474129198Scognet if ((req_completion && toep->tp_wr_unacked == mbuf_wrs[count]) || 475129198Scognet toep->tp_wr_unacked >= toep->tp_wr_max / 2) { 476129198Scognet struct work_request_hdr *wr = cplhdr(m0); 477129198Scognet 478129198Scognet wr->wr_hi |= htonl(F_WR_COMPL); 479129198Scognet toep->tp_wr_unacked = 0; 480129198Scognet } 481129198Scognet KASSERT((m0->m_pkthdr.csum_data > 0) && 482129198Scognet (m0->m_pkthdr.csum_data <= 4), ("bad credit count %d", 483129198Scognet m0->m_pkthdr.csum_data)); 484129198Scognet m0->m_type = MT_DONTFREE; 485129198Scognet enqueue_wr(toep, m0); 486129198Scognet DPRINTF("sending offload tx with %d bytes in %d segments\n", 487129198Scognet bytes, count); 488129198Scognet l2t_send(cdev, m0, toep->tp_l2t); 489129198Scognet } 490129198Scognet sockbuf_unlock(snd); 491129198Scognet return (total_bytes); 492129198Scognet} 493129198Scognet 494129198Scognet/* 495129198Scognet * Close a connection by sending a CPL_CLOSE_CON_REQ message. Cannot fail 496129198Scognet * under any circumstances. We take the easy way out and always queue the 497129198Scognet * message to the write_queue. We can optimize the case where the queue is 498129198Scognet * already empty though the optimization is probably not worth it. 499129198Scognet */ 500129198Scognetstatic void 501129198Scognetclose_conn(struct socket *so) 502129198Scognet{ 503129198Scognet struct mbuf *m; 504129198Scognet struct cpl_close_con_req *req; 505129198Scognet struct tom_data *d; 506129198Scognet struct inpcb *inp = so_sotoinpcb(so); 507129198Scognet struct tcpcb *tp; 508129198Scognet struct toepcb *toep; 509129198Scognet unsigned int tid; 510129198Scognet 511129198Scognet 512129198Scognet inp_wlock(inp); 513129198Scognet tp = so_sototcpcb(so); 514129198Scognet toep = tp->t_toe; 515129198Scognet 516129198Scognet if (tp->t_state != TCPS_SYN_SENT) 517129198Scognet t3_push_frames(so, 1); 518129198Scognet 519129198Scognet if (toep->tp_flags & TP_FIN_SENT) { 520129198Scognet inp_wunlock(inp); 521129198Scognet return; 522129198Scognet } 523129198Scognet 524129198Scognet tid = toep->tp_tid; 525129198Scognet 526129198Scognet d = TOM_DATA(toep->tp_toedev); 527129198Scognet 528129198Scognet m = m_gethdr_nofail(sizeof(*req)); 529129198Scognet m_set_priority(m, CPL_PRIORITY_DATA); 530129198Scognet m_set_sgl(m, NULL); 531129198Scognet m_set_sgllen(m, 0); 532129198Scognet 533129198Scognet toep->tp_flags |= TP_FIN_SENT; 534129198Scognet req = mtod(m, struct cpl_close_con_req *); 535129198Scognet 536129198Scognet req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_CLOSE_CON)); 537129198Scognet req->wr.wr_lo = htonl(V_WR_TID(tid)); 538129198Scognet OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_CON_REQ, tid)); 539129198Scognet req->rsvd = 0; 540129198Scognet inp_wunlock(inp); 541129198Scognet /* 542129198Scognet * XXX - need to defer shutdown while there is still data in the queue 543129198Scognet * 544129198Scognet */ 545129198Scognet CTR4(KTR_TOM, "%s CLOSE_CON_REQ so %p tp %p tid=%u", __FUNCTION__, so, tp, tid); 546129198Scognet cxgb_ofld_send(d->cdev, m); 547129198Scognet 548129198Scognet} 549129198Scognet 550129198Scognet/* 551129198Scognet * Handle an ARP failure for a CPL_ABORT_REQ. Change it into a no RST variant 552129198Scognet * and send it along. 553129198Scognet */ 554129198Scognetstatic void 555129198Scognetabort_arp_failure(struct t3cdev *cdev, struct mbuf *m) 556129198Scognet{ 557129198Scognet struct cpl_abort_req *req = cplhdr(m); 558129198Scognet 559129198Scognet req->cmd = CPL_ABORT_NO_RST; 560129198Scognet cxgb_ofld_send(cdev, m); 561129198Scognet} 562129198Scognet 563129198Scognet/* 564129198Scognet * Send RX credits through an RX_DATA_ACK CPL message. If nofail is 0 we are 565129198Scognet * permitted to return without sending the message in case we cannot allocate 566129198Scognet * an sk_buff. Returns the number of credits sent. 567129198Scognet */ 568129198Scognetuint32_t 569129198Scognett3_send_rx_credits(struct tcpcb *tp, uint32_t credits, uint32_t dack, int nofail) 570129198Scognet{ 571129198Scognet struct mbuf *m; 572129198Scognet struct cpl_rx_data_ack *req; 573129198Scognet struct toepcb *toep = tp->t_toe; 574129198Scognet struct toedev *tdev = toep->tp_toedev; 575129198Scognet 576129198Scognet m = m_gethdr_nofail(sizeof(*req)); 577129198Scognet 578129198Scognet DPRINTF("returning %u credits to HW\n", credits); 579129198Scognet 580129198Scognet req = mtod(m, struct cpl_rx_data_ack *); 581129198Scognet req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 582129198Scognet req->wr.wr_lo = 0; 583129198Scognet OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid)); 584129198Scognet req->credit_dack = htonl(dack | V_RX_CREDITS(credits)); 585129198Scognet m_set_priority(m, mkprio(CPL_PRIORITY_ACK, toep)); 586129198Scognet cxgb_ofld_send(TOM_DATA(tdev)->cdev, m); 587129198Scognet return (credits); 588129198Scognet} 589129198Scognet 590129198Scognet/* 591129198Scognet * Send RX_DATA_ACK CPL message to request a modulation timer to be scheduled. 592129198Scognet * This is only used in DDP mode, so we take the opportunity to also set the 593129198Scognet * DACK mode and flush any Rx credits. 594129198Scognet */ 595129198Scognetvoid 596129198Scognett3_send_rx_modulate(struct toepcb *toep) 597129198Scognet{ 598129198Scognet struct mbuf *m; 599129198Scognet struct cpl_rx_data_ack *req; 600129198Scognet 601129198Scognet m = m_gethdr_nofail(sizeof(*req)); 602129198Scognet 603129198Scognet req = mtod(m, struct cpl_rx_data_ack *); 604129198Scognet req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 605129198Scognet req->wr.wr_lo = 0; 606129198Scognet m->m_pkthdr.len = m->m_len = sizeof(*req); 607129198Scognet 608129198Scognet OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, toep->tp_tid)); 609129198Scognet req->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE | 610129198Scognet V_RX_DACK_MODE(1) | 611129198Scognet V_RX_CREDITS(toep->tp_copied_seq - toep->tp_rcv_wup)); 612129198Scognet m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 613129198Scognet cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 614129198Scognet toep->tp_rcv_wup = toep->tp_copied_seq; 615129198Scognet} 616129198Scognet 617129198Scognet/* 618129198Scognet * Handle receipt of an urgent pointer. 619129198Scognet */ 620129198Scognetstatic void 621129198Scognethandle_urg_ptr(struct socket *so, uint32_t urg_seq) 622129198Scognet{ 623129198Scognet#ifdef URGENT_DATA_SUPPORTED 624129198Scognet struct tcpcb *tp = so_sototcpcb(so); 625129198Scognet 626129198Scognet urg_seq--; /* initially points past the urgent data, per BSD */ 627129198Scognet 628129198Scognet if (tp->urg_data && !after(urg_seq, tp->urg_seq)) 629129198Scognet return; /* duplicate pointer */ 630129198Scognet sk_send_sigurg(sk); 631129198Scognet if (tp->urg_seq == tp->copied_seq && tp->urg_data && 632129198Scognet !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) { 633129198Scognet struct sk_buff *skb = skb_peek(&sk->sk_receive_queue); 634129198Scognet 635129198Scognet tp->copied_seq++; 636129198Scognet if (skb && tp->copied_seq - TCP_SKB_CB(skb)->seq >= skb->len) 637129198Scognet tom_eat_skb(sk, skb, 0); 638129198Scognet } 639129198Scognet tp->urg_data = TCP_URG_NOTYET; 640129198Scognet tp->urg_seq = urg_seq; 641129198Scognet#endif 642129198Scognet} 643129198Scognet 644129198Scognet/* 645129198Scognet * Returns true if a socket cannot accept new Rx data. 646129198Scognet */ 647129198Scognetstatic inline int 648129198Scognetso_no_receive(const struct socket *so) 649129198Scognet{ 650129198Scognet return (so_state_get(so) & (SS_ISDISCONNECTED|SS_ISDISCONNECTING)); 651129198Scognet} 652129198Scognet 653129198Scognet/* 654129198Scognet * Process an urgent data notification. 655129198Scognet */ 656129198Scognetstatic void 657129198Scognetrx_urg_notify(struct toepcb *toep, struct mbuf *m) 658129198Scognet{ 659129198Scognet struct cpl_rx_urg_notify *hdr = cplhdr(m); 660129198Scognet struct socket *so = inp_inpcbtosocket(toep->tp_tp->t_inpcb); 661129198Scognet 662129198Scognet VALIDATE_SOCK(so); 663129198Scognet 664129198Scognet if (!so_no_receive(so)) 665129198Scognet handle_urg_ptr(so, ntohl(hdr->seq)); 666129198Scognet 667129198Scognet m_freem(m); 668129198Scognet} 669129198Scognet 670129198Scognet/* 671129198Scognet * Handler for RX_URG_NOTIFY CPL messages. 672129198Scognet */ 673129198Scognetstatic int 674129198Scognetdo_rx_urg_notify(struct t3cdev *cdev, struct mbuf *m, void *ctx) 675129198Scognet{ 676129198Scognet struct toepcb *toep = (struct toepcb *)ctx; 677129198Scognet 678129198Scognet rx_urg_notify(toep, m); 679129198Scognet return (0); 680129198Scognet} 681129198Scognet 682129198Scognetstatic __inline int 683129198Scognetis_delack_mode_valid(struct toedev *dev, struct toepcb *toep) 684129198Scognet{ 685129198Scognet return (toep->tp_ulp_mode || 686129198Scognet (toep->tp_ulp_mode == ULP_MODE_TCPDDP && 687129198Scognet dev->tod_ttid >= TOE_ID_CHELSIO_T3)); 688129198Scognet} 689129198Scognet 690129198Scognet/* 691129198Scognet * Set of states for which we should return RX credits. 692129198Scognet */ 693129198Scognet#define CREDIT_RETURN_STATE (TCPF_ESTABLISHED | TCPF_FIN_WAIT1 | TCPF_FIN_WAIT2) 694 695/* 696 * Called after some received data has been read. It returns RX credits 697 * to the HW for the amount of data processed. 698 */ 699void 700t3_cleanup_rbuf(struct tcpcb *tp, int copied) 701{ 702 struct toepcb *toep = tp->t_toe; 703 struct socket *so; 704 struct toedev *dev; 705 int dack_mode, must_send, read; 706 u32 thres, credits, dack = 0; 707 struct sockbuf *rcv; 708 709 so = inp_inpcbtosocket(tp->t_inpcb); 710 rcv = so_sockbuf_rcv(so); 711 712 if (!((tp->t_state == TCPS_ESTABLISHED) || (tp->t_state == TCPS_FIN_WAIT_1) || 713 (tp->t_state == TCPS_FIN_WAIT_2))) { 714 if (copied) { 715 sockbuf_lock(rcv); 716 toep->tp_copied_seq += copied; 717 sockbuf_unlock(rcv); 718 } 719 720 return; 721 } 722 723 inp_lock_assert(tp->t_inpcb); 724 725 sockbuf_lock(rcv); 726 if (copied) 727 toep->tp_copied_seq += copied; 728 else { 729 read = toep->tp_enqueued_bytes - rcv->sb_cc; 730 toep->tp_copied_seq += read; 731 } 732 credits = toep->tp_copied_seq - toep->tp_rcv_wup; 733 toep->tp_enqueued_bytes = rcv->sb_cc; 734 sockbuf_unlock(rcv); 735 736 if (credits > rcv->sb_mbmax) { 737 log(LOG_ERR, "copied_seq=%u rcv_wup=%u credits=%u\n", 738 toep->tp_copied_seq, toep->tp_rcv_wup, credits); 739 credits = rcv->sb_mbmax; 740 } 741 742 743 /* 744 * XXX this won't accurately reflect credit return - we need 745 * to look at the difference between the amount that has been 746 * put in the recv sockbuf and what is there now 747 */ 748 749 if (__predict_false(!credits)) 750 return; 751 752 dev = toep->tp_toedev; 753 thres = TOM_TUNABLE(dev, rx_credit_thres); 754 755 if (__predict_false(thres == 0)) 756 return; 757 758 if (is_delack_mode_valid(dev, toep)) { 759 dack_mode = TOM_TUNABLE(dev, delack); 760 if (__predict_false(dack_mode != toep->tp_delack_mode)) { 761 u32 r = tp->rcv_nxt - toep->tp_delack_seq; 762 763 if (r >= tp->rcv_wnd || r >= 16 * toep->tp_mss_clamp) 764 dack = F_RX_DACK_CHANGE | 765 V_RX_DACK_MODE(dack_mode); 766 } 767 } else 768 dack = F_RX_DACK_CHANGE | V_RX_DACK_MODE(1); 769 770 /* 771 * For coalescing to work effectively ensure the receive window has 772 * at least 16KB left. 773 */ 774 must_send = credits + 16384 >= tp->rcv_wnd; 775 776 if (must_send || credits >= thres) 777 toep->tp_rcv_wup += t3_send_rx_credits(tp, credits, dack, must_send); 778} 779 780static int 781cxgb_toe_disconnect(struct tcpcb *tp) 782{ 783 struct socket *so; 784 785 DPRINTF("cxgb_toe_disconnect\n"); 786 787 so = inp_inpcbtosocket(tp->t_inpcb); 788 close_conn(so); 789 return (0); 790} 791 792static int 793cxgb_toe_reset(struct tcpcb *tp) 794{ 795 struct toepcb *toep = tp->t_toe; 796 797 t3_send_reset(toep); 798 799 /* 800 * unhook from socket 801 */ 802 tp->t_flags &= ~TF_TOE; 803 toep->tp_tp = NULL; 804 tp->t_toe = NULL; 805 return (0); 806} 807 808static int 809cxgb_toe_send(struct tcpcb *tp) 810{ 811 struct socket *so; 812 813 DPRINTF("cxgb_toe_send\n"); 814 dump_toepcb(tp->t_toe); 815 816 so = inp_inpcbtosocket(tp->t_inpcb); 817 t3_push_frames(so, 1); 818 return (0); 819} 820 821static int 822cxgb_toe_rcvd(struct tcpcb *tp) 823{ 824 825 inp_lock_assert(tp->t_inpcb); 826 827 t3_cleanup_rbuf(tp, 0); 828 829 return (0); 830} 831 832static void 833cxgb_toe_detach(struct tcpcb *tp) 834{ 835 struct toepcb *toep; 836 837 /* 838 * XXX how do we handle teardown in the SYN_SENT state? 839 * 840 */ 841 inp_lock_assert(tp->t_inpcb); 842 toep = tp->t_toe; 843 toep->tp_tp = NULL; 844 845 /* 846 * unhook from socket 847 */ 848 tp->t_flags &= ~TF_TOE; 849 tp->t_toe = NULL; 850} 851 852 853static struct toe_usrreqs cxgb_toe_usrreqs = { 854 .tu_disconnect = cxgb_toe_disconnect, 855 .tu_reset = cxgb_toe_reset, 856 .tu_send = cxgb_toe_send, 857 .tu_rcvd = cxgb_toe_rcvd, 858 .tu_detach = cxgb_toe_detach, 859 .tu_detach = cxgb_toe_detach, 860 .tu_syncache_event = handle_syncache_event, 861}; 862 863 864static void 865__set_tcb_field(struct toepcb *toep, struct mbuf *m, uint16_t word, 866 uint64_t mask, uint64_t val, int no_reply) 867{ 868 struct cpl_set_tcb_field *req; 869 870 CTR4(KTR_TCB, "__set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx", 871 toep->tp_tid, word, mask, val); 872 873 req = mtod(m, struct cpl_set_tcb_field *); 874 m->m_pkthdr.len = m->m_len = sizeof(*req); 875 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 876 req->wr.wr_lo = 0; 877 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, toep->tp_tid)); 878 req->reply = V_NO_REPLY(no_reply); 879 req->cpu_idx = 0; 880 req->word = htons(word); 881 req->mask = htobe64(mask); 882 req->val = htobe64(val); 883 884 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 885 send_or_defer(toep, m, 0); 886} 887 888static void 889t3_set_tcb_field(struct toepcb *toep, uint16_t word, uint64_t mask, uint64_t val) 890{ 891 struct mbuf *m; 892 struct tcpcb *tp = toep->tp_tp; 893 894 if (toep == NULL) 895 return; 896 897 if (tp->t_state == TCPS_CLOSED || (toep->tp_flags & TP_ABORT_SHUTDOWN)) { 898 printf("not seting field\n"); 899 return; 900 } 901 902 m = m_gethdr_nofail(sizeof(struct cpl_set_tcb_field)); 903 904 __set_tcb_field(toep, m, word, mask, val, 1); 905} 906 907/* 908 * Set one of the t_flags bits in the TCB. 909 */ 910static void 911set_tcb_tflag(struct toepcb *toep, unsigned int bit_pos, int val) 912{ 913 914 t3_set_tcb_field(toep, W_TCB_T_FLAGS1, 1ULL << bit_pos, val << bit_pos); 915} 916 917/* 918 * Send a SET_TCB_FIELD CPL message to change a connection's Nagle setting. 919 */ 920static void 921t3_set_nagle(struct toepcb *toep) 922{ 923 struct tcpcb *tp = toep->tp_tp; 924 925 set_tcb_tflag(toep, S_TF_NAGLE, !(tp->t_flags & TF_NODELAY)); 926} 927 928/* 929 * Send a SET_TCB_FIELD CPL message to change a connection's keepalive setting. 930 */ 931void 932t3_set_keepalive(struct toepcb *toep, int on_off) 933{ 934 935 set_tcb_tflag(toep, S_TF_KEEPALIVE, on_off); 936} 937 938void 939t3_set_rcv_coalesce_enable(struct toepcb *toep, int on_off) 940{ 941 set_tcb_tflag(toep, S_TF_RCV_COALESCE_ENABLE, on_off); 942} 943 944void 945t3_set_dack_mss(struct toepcb *toep, int on_off) 946{ 947 948 set_tcb_tflag(toep, S_TF_DACK_MSS, on_off); 949} 950 951/* 952 * Send a SET_TCB_FIELD CPL message to change a connection's TOS setting. 953 */ 954static void 955t3_set_tos(struct toepcb *toep) 956{ 957 int tos = inp_ip_tos_get(toep->tp_tp->t_inpcb); 958 959 t3_set_tcb_field(toep, W_TCB_TOS, V_TCB_TOS(M_TCB_TOS), 960 V_TCB_TOS(tos)); 961} 962 963 964/* 965 * In DDP mode, TP fails to schedule a timer to push RX data to the host when 966 * DDP is disabled (data is delivered to freelist). [Note that, the peer should 967 * set the PSH bit in the last segment, which would trigger delivery.] 968 * We work around the issue by setting a DDP buffer in a partial placed state, 969 * which guarantees that TP will schedule a timer. 970 */ 971#define TP_DDP_TIMER_WORKAROUND_MASK\ 972 (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1) |\ 973 ((V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) |\ 974 V_TCB_RX_DDP_BUF0_LEN(3)) << 32)) 975#define TP_DDP_TIMER_WORKAROUND_VAL\ 976 (V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(0) |\ 977 ((V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)1) | V_TCB_RX_DDP_BUF0_LEN((uint64_t)2)) <<\ 978 32)) 979 980static void 981t3_enable_ddp(struct toepcb *toep, int on) 982{ 983 if (on) { 984 985 t3_set_tcb_field(toep, W_TCB_RX_DDP_FLAGS, V_TF_DDP_OFF(1), 986 V_TF_DDP_OFF(0)); 987 } else 988 t3_set_tcb_field(toep, W_TCB_RX_DDP_FLAGS, 989 V_TF_DDP_OFF(1) | 990 TP_DDP_TIMER_WORKAROUND_MASK, 991 V_TF_DDP_OFF(1) | 992 TP_DDP_TIMER_WORKAROUND_VAL); 993 994} 995 996void 997t3_set_ddp_tag(struct toepcb *toep, int buf_idx, unsigned int tag_color) 998{ 999 t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF0_TAG + buf_idx, 1000 V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG), 1001 tag_color); 1002} 1003 1004void 1005t3_set_ddp_buf(struct toepcb *toep, int buf_idx, unsigned int offset, 1006 unsigned int len) 1007{ 1008 if (buf_idx == 0) 1009 t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF0_OFFSET, 1010 V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) | 1011 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), 1012 V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset) | 1013 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len)); 1014 else 1015 t3_set_tcb_field(toep, W_TCB_RX_DDP_BUF1_OFFSET, 1016 V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) | 1017 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN << 32), 1018 V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset) | 1019 V_TCB_RX_DDP_BUF1_LEN(((uint64_t)len) << 32)); 1020} 1021 1022static int 1023t3_set_cong_control(struct socket *so, const char *name) 1024{ 1025#ifdef CONGESTION_CONTROL_SUPPORTED 1026 int cong_algo; 1027 1028 for (cong_algo = 0; cong_algo < ARRAY_SIZE(t3_cong_ops); cong_algo++) 1029 if (!strcmp(name, t3_cong_ops[cong_algo].name)) 1030 break; 1031 1032 if (cong_algo >= ARRAY_SIZE(t3_cong_ops)) 1033 return -EINVAL; 1034#endif 1035 return 0; 1036} 1037 1038int 1039t3_get_tcb(struct toepcb *toep) 1040{ 1041 struct cpl_get_tcb *req; 1042 struct tcpcb *tp = toep->tp_tp; 1043 struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA); 1044 1045 if (!m) 1046 return (ENOMEM); 1047 1048 inp_lock_assert(tp->t_inpcb); 1049 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 1050 req = mtod(m, struct cpl_get_tcb *); 1051 m->m_pkthdr.len = m->m_len = sizeof(*req); 1052 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 1053 req->wr.wr_lo = 0; 1054 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, toep->tp_tid)); 1055 req->cpuno = htons(toep->tp_qset); 1056 req->rsvd = 0; 1057 if (tp->t_state == TCPS_SYN_SENT) 1058 mbufq_tail(&toep->out_of_order_queue, m); // defer 1059 else 1060 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 1061 return 0; 1062} 1063 1064static inline void 1065so_insert_tid(struct tom_data *d, struct toepcb *toep, unsigned int tid) 1066{ 1067 1068 toepcb_hold(toep); 1069 1070 cxgb_insert_tid(d->cdev, d->client, toep, tid); 1071} 1072 1073/** 1074 * find_best_mtu - find the entry in the MTU table closest to an MTU 1075 * @d: TOM state 1076 * @mtu: the target MTU 1077 * 1078 * Returns the index of the value in the MTU table that is closest to but 1079 * does not exceed the target MTU. 1080 */ 1081static unsigned int 1082find_best_mtu(const struct t3c_data *d, unsigned short mtu) 1083{ 1084 int i = 0; 1085 1086 while (i < d->nmtus - 1 && d->mtus[i + 1] <= mtu) 1087 ++i; 1088 return (i); 1089} 1090 1091static unsigned int 1092select_mss(struct t3c_data *td, struct tcpcb *tp, unsigned int pmtu) 1093{ 1094 unsigned int idx; 1095 1096#ifdef notyet 1097 struct rtentry *dst = so_sotoinpcb(so)->inp_route.ro_rt; 1098#endif 1099 if (tp) { 1100 tp->t_maxseg = pmtu - 40; 1101 if (tp->t_maxseg < td->mtus[0] - 40) 1102 tp->t_maxseg = td->mtus[0] - 40; 1103 idx = find_best_mtu(td, tp->t_maxseg + 40); 1104 1105 tp->t_maxseg = td->mtus[idx] - 40; 1106 } else 1107 idx = find_best_mtu(td, pmtu); 1108 1109 return (idx); 1110} 1111 1112static inline void 1113free_atid(struct t3cdev *cdev, unsigned int tid) 1114{ 1115 struct toepcb *toep = cxgb_free_atid(cdev, tid); 1116 1117 if (toep) 1118 toepcb_release(toep); 1119} 1120 1121/* 1122 * Release resources held by an offload connection (TID, L2T entry, etc.) 1123 */ 1124static void 1125t3_release_offload_resources(struct toepcb *toep) 1126{ 1127 struct tcpcb *tp = toep->tp_tp; 1128 struct toedev *tdev = toep->tp_toedev; 1129 struct t3cdev *cdev; 1130 struct socket *so; 1131 unsigned int tid = toep->tp_tid; 1132 struct sockbuf *rcv; 1133 1134 CTR0(KTR_TOM, "t3_release_offload_resources"); 1135 1136 if (!tdev) 1137 return; 1138 1139 cdev = TOEP_T3C_DEV(toep); 1140 if (!cdev) 1141 return; 1142 1143 toep->tp_qset = 0; 1144 t3_release_ddp_resources(toep); 1145 1146#ifdef CTRL_SKB_CACHE 1147 kfree_skb(CTRL_SKB_CACHE(tp)); 1148 CTRL_SKB_CACHE(tp) = NULL; 1149#endif 1150 1151 if (toep->tp_wr_avail != toep->tp_wr_max) { 1152 purge_wr_queue(toep); 1153 reset_wr_list(toep); 1154 } 1155 1156 if (toep->tp_l2t) { 1157 l2t_release(L2DATA(cdev), toep->tp_l2t); 1158 toep->tp_l2t = NULL; 1159 } 1160 toep->tp_tp = NULL; 1161 if (tp) { 1162 inp_lock_assert(tp->t_inpcb); 1163 so = inp_inpcbtosocket(tp->t_inpcb); 1164 rcv = so_sockbuf_rcv(so); 1165 /* 1166 * cancel any offloaded reads 1167 * 1168 */ 1169 sockbuf_lock(rcv); 1170 tp->t_toe = NULL; 1171 tp->t_flags &= ~TF_TOE; 1172 if (toep->tp_ddp_state.user_ddp_pending) { 1173 t3_cancel_ubuf(toep, rcv); 1174 toep->tp_ddp_state.user_ddp_pending = 0; 1175 } 1176 so_sorwakeup_locked(so); 1177 1178 } 1179 1180 if (toep->tp_state == TCPS_SYN_SENT) { 1181 free_atid(cdev, tid); 1182#ifdef notyet 1183 __skb_queue_purge(&tp->out_of_order_queue); 1184#endif 1185 } else { // we have TID 1186 cxgb_remove_tid(cdev, toep, tid); 1187 toepcb_release(toep); 1188 } 1189#if 0 1190 log(LOG_INFO, "closing TID %u, state %u\n", tid, tp->t_state); 1191#endif 1192} 1193 1194static void 1195install_offload_ops(struct socket *so) 1196{ 1197 struct tcpcb *tp = so_sototcpcb(so); 1198 1199 KASSERT(tp->t_toe != NULL, ("toepcb not set")); 1200 1201 t3_install_socket_ops(so); 1202 tp->t_flags |= TF_TOE; 1203 tp->t_tu = &cxgb_toe_usrreqs; 1204} 1205 1206/* 1207 * Determine the receive window scaling factor given a target max 1208 * receive window. 1209 */ 1210static __inline int 1211select_rcv_wscale(int space) 1212{ 1213 int wscale = 0; 1214 1215 if (space > MAX_RCV_WND) 1216 space = MAX_RCV_WND; 1217 1218 if (V_tcp_do_rfc1323) 1219 for (; space > 65535 && wscale < 14; space >>= 1, ++wscale) ; 1220 1221 return (wscale); 1222} 1223 1224/* 1225 * Determine the receive window size for a socket. 1226 */ 1227static unsigned long 1228select_rcv_wnd(struct toedev *dev, struct socket *so) 1229{ 1230 struct tom_data *d = TOM_DATA(dev); 1231 unsigned int wnd; 1232 unsigned int max_rcv_wnd; 1233 struct sockbuf *rcv; 1234 1235 rcv = so_sockbuf_rcv(so); 1236 1237 if (V_tcp_do_autorcvbuf) 1238 wnd = V_tcp_autorcvbuf_max; 1239 else 1240 wnd = rcv->sb_hiwat; 1241 1242 1243 1244 /* XXX 1245 * For receive coalescing to work effectively we need a receive window 1246 * that can accomodate a coalesced segment. 1247 */ 1248 if (wnd < MIN_RCV_WND) 1249 wnd = MIN_RCV_WND; 1250 1251 /* PR 5138 */ 1252 max_rcv_wnd = (dev->tod_ttid < TOE_ID_CHELSIO_T3C ? 1253 (uint32_t)d->rx_page_size * 23 : 1254 MAX_RCV_WND); 1255 1256 return min(wnd, max_rcv_wnd); 1257} 1258 1259/* 1260 * Assign offload parameters to some socket fields. This code is used by 1261 * both active and passive opens. 1262 */ 1263static inline void 1264init_offload_socket(struct socket *so, struct toedev *dev, unsigned int tid, 1265 struct l2t_entry *e, struct rtentry *dst, struct toepcb *toep) 1266{ 1267 struct tcpcb *tp = so_sototcpcb(so); 1268 struct t3c_data *td = T3C_DATA(TOM_DATA(dev)->cdev); 1269 struct sockbuf *snd, *rcv; 1270 1271#ifdef notyet 1272 SOCK_LOCK_ASSERT(so); 1273#endif 1274 1275 snd = so_sockbuf_snd(so); 1276 rcv = so_sockbuf_rcv(so); 1277 1278 log(LOG_INFO, "initializing offload socket\n"); 1279 /* 1280 * We either need to fix push frames to work with sbcompress 1281 * or we need to add this 1282 */ 1283 snd->sb_flags |= SB_NOCOALESCE; 1284 rcv->sb_flags |= SB_NOCOALESCE; 1285 1286 tp->t_toe = toep; 1287 toep->tp_tp = tp; 1288 toep->tp_toedev = dev; 1289 1290 toep->tp_tid = tid; 1291 toep->tp_l2t = e; 1292 toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(dev, max_wrs); 1293 toep->tp_wr_unacked = 0; 1294 toep->tp_delack_mode = 0; 1295 1296 toep->tp_mtu_idx = select_mss(td, tp, dst->rt_ifp->if_mtu); 1297 /* 1298 * XXX broken 1299 * 1300 */ 1301 tp->rcv_wnd = select_rcv_wnd(dev, so); 1302 1303 toep->tp_ulp_mode = TOM_TUNABLE(dev, ddp) && !(so_options_get(so) & SO_NO_DDP) && 1304 tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0; 1305 toep->tp_qset_idx = 0; 1306 1307 reset_wr_list(toep); 1308 DPRINTF("initialization done\n"); 1309} 1310 1311/* 1312 * The next two functions calculate the option 0 value for a socket. 1313 */ 1314static inline unsigned int 1315calc_opt0h(struct socket *so, int mtu_idx) 1316{ 1317 struct tcpcb *tp = so_sototcpcb(so); 1318 int wscale = select_rcv_wscale(tp->rcv_wnd); 1319 1320 return V_NAGLE((tp->t_flags & TF_NODELAY) == 0) | 1321 V_KEEP_ALIVE((so_options_get(so) & SO_KEEPALIVE) != 0) | F_TCAM_BYPASS | 1322 V_WND_SCALE(wscale) | V_MSS_IDX(mtu_idx); 1323} 1324 1325static inline unsigned int 1326calc_opt0l(struct socket *so, int ulp_mode) 1327{ 1328 struct tcpcb *tp = so_sototcpcb(so); 1329 unsigned int val; 1330 1331 val = V_TOS(INP_TOS(tp->t_inpcb)) | V_ULP_MODE(ulp_mode) | 1332 V_RCV_BUFSIZ(min(tp->rcv_wnd >> 10, (u32)M_RCV_BUFSIZ)); 1333 1334 DPRINTF("opt0l tos=%08x rcv_wnd=%ld opt0l=%08x\n", INP_TOS(tp->t_inpcb), tp->rcv_wnd, val); 1335 return (val); 1336} 1337 1338static inline unsigned int 1339calc_opt2(const struct socket *so, struct toedev *dev) 1340{ 1341 int flv_valid; 1342 1343 flv_valid = (TOM_TUNABLE(dev, cong_alg) != -1); 1344 1345 return (V_FLAVORS_VALID(flv_valid) | 1346 V_CONG_CONTROL_FLAVOR(flv_valid ? TOM_TUNABLE(dev, cong_alg) : 0)); 1347} 1348 1349#if DEBUG_WR > 1 1350static int 1351count_pending_wrs(const struct toepcb *toep) 1352{ 1353 const struct mbuf *m; 1354 int n = 0; 1355 1356 wr_queue_walk(toep, m) 1357 n += m->m_pkthdr.csum_data; 1358 return (n); 1359} 1360#endif 1361 1362#if 0 1363(((*(struct tom_data **)&(dev)->l4opt)->conf.cong_alg) != -1) 1364#endif 1365 1366static void 1367mk_act_open_req(struct socket *so, struct mbuf *m, 1368 unsigned int atid, const struct l2t_entry *e) 1369{ 1370 struct cpl_act_open_req *req; 1371 struct inpcb *inp = so_sotoinpcb(so); 1372 struct tcpcb *tp = inp_inpcbtotcpcb(inp); 1373 struct toepcb *toep = tp->t_toe; 1374 struct toedev *tdev = toep->tp_toedev; 1375 1376 m_set_priority((struct mbuf *)m, mkprio(CPL_PRIORITY_SETUP, toep)); 1377 1378 req = mtod(m, struct cpl_act_open_req *); 1379 m->m_pkthdr.len = m->m_len = sizeof(*req); 1380 1381 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 1382 req->wr.wr_lo = 0; 1383 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ACT_OPEN_REQ, atid)); 1384 inp_4tuple_get(inp, &req->local_ip, &req->local_port, &req->peer_ip, &req->peer_port); 1385#if 0 1386 req->local_port = inp->inp_lport; 1387 req->peer_port = inp->inp_fport; 1388 memcpy(&req->local_ip, &inp->inp_laddr, 4); 1389 memcpy(&req->peer_ip, &inp->inp_faddr, 4); 1390#endif 1391 req->opt0h = htonl(calc_opt0h(so, toep->tp_mtu_idx) | V_L2T_IDX(e->idx) | 1392 V_TX_CHANNEL(e->smt_idx)); 1393 req->opt0l = htonl(calc_opt0l(so, toep->tp_ulp_mode)); 1394 req->params = 0; 1395 req->opt2 = htonl(calc_opt2(so, tdev)); 1396} 1397 1398 1399/* 1400 * Convert an ACT_OPEN_RPL status to an errno. 1401 */ 1402static int 1403act_open_rpl_status_to_errno(int status) 1404{ 1405 switch (status) { 1406 case CPL_ERR_CONN_RESET: 1407 return (ECONNREFUSED); 1408 case CPL_ERR_ARP_MISS: 1409 return (EHOSTUNREACH); 1410 case CPL_ERR_CONN_TIMEDOUT: 1411 return (ETIMEDOUT); 1412 case CPL_ERR_TCAM_FULL: 1413 return (ENOMEM); 1414 case CPL_ERR_CONN_EXIST: 1415 log(LOG_ERR, "ACTIVE_OPEN_RPL: 4-tuple in use\n"); 1416 return (EADDRINUSE); 1417 default: 1418 return (EIO); 1419 } 1420} 1421 1422static void 1423fail_act_open(struct toepcb *toep, int errno) 1424{ 1425 struct tcpcb *tp = toep->tp_tp; 1426 1427 t3_release_offload_resources(toep); 1428 if (tp) { 1429 inp_wunlock(tp->t_inpcb); 1430 tcp_offload_drop(tp, errno); 1431 } 1432 1433#ifdef notyet 1434 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 1435#endif 1436} 1437 1438/* 1439 * Handle active open failures. 1440 */ 1441static void 1442active_open_failed(struct toepcb *toep, struct mbuf *m) 1443{ 1444 struct cpl_act_open_rpl *rpl = cplhdr(m); 1445 struct inpcb *inp; 1446 1447 if (toep->tp_tp == NULL) 1448 goto done; 1449 1450 inp = toep->tp_tp->t_inpcb; 1451 1452/* 1453 * Don't handle connection retry for now 1454 */ 1455#ifdef notyet 1456 struct inet_connection_sock *icsk = inet_csk(sk); 1457 1458 if (rpl->status == CPL_ERR_CONN_EXIST && 1459 icsk->icsk_retransmit_timer.function != act_open_retry_timer) { 1460 icsk->icsk_retransmit_timer.function = act_open_retry_timer; 1461 sk_reset_timer(so, &icsk->icsk_retransmit_timer, 1462 jiffies + HZ / 2); 1463 } else 1464#endif 1465 { 1466 inp_wlock(inp); 1467 /* 1468 * drops the inpcb lock 1469 */ 1470 fail_act_open(toep, act_open_rpl_status_to_errno(rpl->status)); 1471 } 1472 1473 done: 1474 m_free(m); 1475} 1476 1477/* 1478 * Return whether a failed active open has allocated a TID 1479 */ 1480static inline int 1481act_open_has_tid(int status) 1482{ 1483 return status != CPL_ERR_TCAM_FULL && status != CPL_ERR_CONN_EXIST && 1484 status != CPL_ERR_ARP_MISS; 1485} 1486 1487/* 1488 * Process an ACT_OPEN_RPL CPL message. 1489 */ 1490static int 1491do_act_open_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) 1492{ 1493 struct toepcb *toep = (struct toepcb *)ctx; 1494 struct cpl_act_open_rpl *rpl = cplhdr(m); 1495 1496 if (cdev->type != T3A && act_open_has_tid(rpl->status)) 1497 cxgb_queue_tid_release(cdev, GET_TID(rpl)); 1498 1499 active_open_failed(toep, m); 1500 return (0); 1501} 1502 1503/* 1504 * Handle an ARP failure for an active open. XXX purge ofo queue 1505 * 1506 * XXX badly broken for crossed SYNs as the ATID is no longer valid. 1507 * XXX crossed SYN errors should be generated by PASS_ACCEPT_RPL which should 1508 * check SOCK_DEAD or sk->sk_sock. Or maybe generate the error here but don't 1509 * free the atid. Hmm. 1510 */ 1511#ifdef notyet 1512static void 1513act_open_req_arp_failure(struct t3cdev *dev, struct mbuf *m) 1514{ 1515 struct toepcb *toep = m_get_toep(m); 1516 struct tcpcb *tp = toep->tp_tp; 1517 struct inpcb *inp = tp->t_inpcb; 1518 struct socket *so; 1519 1520 inp_wlock(inp); 1521 if (tp->t_state == TCPS_SYN_SENT || tp->t_state == TCPS_SYN_RECEIVED) { 1522 /* 1523 * drops the inpcb lock 1524 */ 1525 fail_act_open(so, EHOSTUNREACH); 1526 printf("freeing %p\n", m); 1527 1528 m_free(m); 1529 } else 1530 inp_wunlock(inp); 1531} 1532#endif 1533/* 1534 * Send an active open request. 1535 */ 1536int 1537t3_connect(struct toedev *tdev, struct socket *so, 1538 struct rtentry *rt, struct sockaddr *nam) 1539{ 1540 struct mbuf *m; 1541 struct l2t_entry *e; 1542 struct tom_data *d = TOM_DATA(tdev); 1543 struct inpcb *inp = so_sotoinpcb(so); 1544 struct tcpcb *tp = intotcpcb(inp); 1545 struct toepcb *toep; /* allocated by init_offload_socket */ 1546 1547 int atid; 1548 1549 toep = toepcb_alloc(); 1550 if (toep == NULL) 1551 goto out_err; 1552 1553 if ((atid = cxgb_alloc_atid(d->cdev, d->client, toep)) < 0) 1554 goto out_err; 1555 1556 e = t3_l2t_get(d->cdev, rt, rt->rt_ifp, nam); 1557 if (!e) 1558 goto free_tid; 1559 1560 inp_lock_assert(inp); 1561 m = m_gethdr(MT_DATA, M_WAITOK); 1562 1563#if 0 1564 m->m_toe.mt_toepcb = tp->t_toe; 1565 set_arp_failure_handler((struct mbuf *)m, act_open_req_arp_failure); 1566#endif 1567 so_lock(so); 1568 1569 init_offload_socket(so, tdev, atid, e, rt, toep); 1570 1571 install_offload_ops(so); 1572 1573 mk_act_open_req(so, m, atid, e); 1574 so_unlock(so); 1575 1576 soisconnecting(so); 1577 toep = tp->t_toe; 1578 m_set_toep(m, tp->t_toe); 1579 1580 toep->tp_state = TCPS_SYN_SENT; 1581 l2t_send(d->cdev, (struct mbuf *)m, e); 1582 1583 if (toep->tp_ulp_mode) 1584 t3_enable_ddp(toep, 0); 1585 return (0); 1586 1587free_tid: 1588 printf("failing connect - free atid\n"); 1589 1590 free_atid(d->cdev, atid); 1591out_err: 1592 printf("return ENOMEM\n"); 1593 return (ENOMEM); 1594} 1595 1596/* 1597 * Send an ABORT_REQ message. Cannot fail. This routine makes sure we do 1598 * not send multiple ABORT_REQs for the same connection and also that we do 1599 * not try to send a message after the connection has closed. Returns 1 if 1600 * an ABORT_REQ wasn't generated after all, 0 otherwise. 1601 */ 1602static void 1603t3_send_reset(struct toepcb *toep) 1604{ 1605 1606 struct cpl_abort_req *req; 1607 unsigned int tid = toep->tp_tid; 1608 int mode = CPL_ABORT_SEND_RST; 1609 struct tcpcb *tp = toep->tp_tp; 1610 struct toedev *tdev = toep->tp_toedev; 1611 struct socket *so = NULL; 1612 struct mbuf *m; 1613 struct sockbuf *snd; 1614 1615 if (tp) { 1616 inp_lock_assert(tp->t_inpcb); 1617 so = inp_inpcbtosocket(tp->t_inpcb); 1618 } 1619 1620 if (__predict_false((toep->tp_flags & TP_ABORT_SHUTDOWN) || 1621 tdev == NULL)) 1622 return; 1623 toep->tp_flags |= (TP_ABORT_RPL_PENDING|TP_ABORT_SHUTDOWN); 1624 1625 snd = so_sockbuf_snd(so); 1626 /* Purge the send queue so we don't send anything after an abort. */ 1627 if (so) 1628 sbflush(snd); 1629 if ((toep->tp_flags & TP_CLOSE_CON_REQUESTED) && is_t3a(tdev)) 1630 mode |= CPL_ABORT_POST_CLOSE_REQ; 1631 1632 m = m_gethdr_nofail(sizeof(*req)); 1633 m_set_priority(m, mkprio(CPL_PRIORITY_DATA, toep)); 1634 set_arp_failure_handler(m, abort_arp_failure); 1635 1636 req = mtod(m, struct cpl_abort_req *); 1637 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_REQ)); 1638 req->wr.wr_lo = htonl(V_WR_TID(tid)); 1639 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_ABORT_REQ, tid)); 1640 req->rsvd0 = tp ? htonl(tp->snd_nxt) : 0; 1641 req->rsvd1 = !(toep->tp_flags & TP_DATASENT); 1642 req->cmd = mode; 1643 if (tp && (tp->t_state == TCPS_SYN_SENT)) 1644 mbufq_tail(&toep->out_of_order_queue, m); // defer 1645 else 1646 l2t_send(TOEP_T3C_DEV(toep), m, toep->tp_l2t); 1647} 1648 1649static int 1650t3_ip_ctloutput(struct socket *so, struct sockopt *sopt) 1651{ 1652 struct inpcb *inp; 1653 int error, optval; 1654 1655 if (sopt->sopt_name == IP_OPTIONS) 1656 return (ENOPROTOOPT); 1657 1658 if (sopt->sopt_name != IP_TOS) 1659 return (EOPNOTSUPP); 1660 1661 error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); 1662 1663 if (error) 1664 return (error); 1665 1666 if (optval > IPTOS_PREC_CRITIC_ECP && !suser(curthread)) 1667 return (EPERM); 1668 1669 inp = so_sotoinpcb(so); 1670 inp_wlock(inp); 1671 inp_ip_tos_set(inp, optval); 1672#if 0 1673 inp->inp_ip_tos = optval; 1674#endif 1675 t3_set_tos(inp_inpcbtotcpcb(inp)->t_toe); 1676 inp_wunlock(inp); 1677 1678 return (0); 1679} 1680 1681static int 1682t3_tcp_ctloutput(struct socket *so, struct sockopt *sopt) 1683{ 1684 int err = 0; 1685 size_t copied; 1686 1687 if (sopt->sopt_name != TCP_CONGESTION && 1688 sopt->sopt_name != TCP_NODELAY) 1689 return (EOPNOTSUPP); 1690 1691 if (sopt->sopt_name == TCP_CONGESTION) { 1692 char name[TCP_CA_NAME_MAX]; 1693 int optlen = sopt->sopt_valsize; 1694 struct tcpcb *tp; 1695 1696 if (sopt->sopt_dir == SOPT_GET) { 1697 KASSERT(0, ("unimplemented")); 1698 return (EOPNOTSUPP); 1699 } 1700 1701 if (optlen < 1) 1702 return (EINVAL); 1703 1704 err = copyinstr(sopt->sopt_val, name, 1705 min(TCP_CA_NAME_MAX - 1, optlen), &copied); 1706 if (err) 1707 return (err); 1708 if (copied < 1) 1709 return (EINVAL); 1710 1711 tp = so_sototcpcb(so); 1712 /* 1713 * XXX I need to revisit this 1714 */ 1715 if ((err = t3_set_cong_control(so, name)) == 0) { 1716#ifdef CONGESTION_CONTROL_SUPPORTED 1717 tp->t_cong_control = strdup(name, M_CXGB); 1718#endif 1719 } else 1720 return (err); 1721 } else { 1722 int optval, oldval; 1723 struct inpcb *inp; 1724 struct tcpcb *tp; 1725 1726 if (sopt->sopt_dir == SOPT_GET) 1727 return (EOPNOTSUPP); 1728 1729 err = sooptcopyin(sopt, &optval, sizeof optval, 1730 sizeof optval); 1731 1732 if (err) 1733 return (err); 1734 1735 inp = so_sotoinpcb(so); 1736 tp = inp_inpcbtotcpcb(inp); 1737 1738 inp_wlock(inp); 1739 1740 oldval = tp->t_flags; 1741 if (optval) 1742 tp->t_flags |= TF_NODELAY; 1743 else 1744 tp->t_flags &= ~TF_NODELAY; 1745 inp_wunlock(inp); 1746 1747 1748 if (oldval != tp->t_flags && (tp->t_toe != NULL)) 1749 t3_set_nagle(tp->t_toe); 1750 1751 } 1752 1753 return (0); 1754} 1755 1756int 1757t3_ctloutput(struct socket *so, struct sockopt *sopt) 1758{ 1759 int err; 1760 1761 if (sopt->sopt_level != IPPROTO_TCP) 1762 err = t3_ip_ctloutput(so, sopt); 1763 else 1764 err = t3_tcp_ctloutput(so, sopt); 1765 1766 if (err != EOPNOTSUPP) 1767 return (err); 1768 1769 return (tcp_ctloutput(so, sopt)); 1770} 1771 1772/* 1773 * Returns true if we need to explicitly request RST when we receive new data 1774 * on an RX-closed connection. 1775 */ 1776static inline int 1777need_rst_on_excess_rx(const struct toepcb *toep) 1778{ 1779 return (1); 1780} 1781 1782/* 1783 * Handles Rx data that arrives in a state where the socket isn't accepting 1784 * new data. 1785 */ 1786static void 1787handle_excess_rx(struct toepcb *toep, struct mbuf *m) 1788{ 1789 1790 if (need_rst_on_excess_rx(toep) && 1791 !(toep->tp_flags & TP_ABORT_SHUTDOWN)) 1792 t3_send_reset(toep); 1793 m_freem(m); 1794} 1795 1796/* 1797 * Process a get_tcb_rpl as a DDP completion (similar to RX_DDP_COMPLETE) 1798 * by getting the DDP offset from the TCB. 1799 */ 1800static void 1801tcb_rpl_as_ddp_complete(struct toepcb *toep, struct mbuf *m) 1802{ 1803 struct ddp_state *q = &toep->tp_ddp_state; 1804 struct ddp_buf_state *bsp; 1805 struct cpl_get_tcb_rpl *hdr; 1806 unsigned int ddp_offset; 1807 struct socket *so; 1808 struct tcpcb *tp; 1809 struct sockbuf *rcv; 1810 int state; 1811 1812 uint64_t t; 1813 __be64 *tcb; 1814 1815 tp = toep->tp_tp; 1816 so = inp_inpcbtosocket(tp->t_inpcb); 1817 1818 inp_lock_assert(tp->t_inpcb); 1819 rcv = so_sockbuf_rcv(so); 1820 sockbuf_lock(rcv); 1821 1822 /* Note that we only accout for CPL_GET_TCB issued by the DDP code. 1823 * We really need a cookie in order to dispatch the RPLs. 1824 */ 1825 q->get_tcb_count--; 1826 1827 /* It is a possible that a previous CPL already invalidated UBUF DDP 1828 * and moved the cur_buf idx and hence no further processing of this 1829 * skb is required. However, the app might be sleeping on 1830 * !q->get_tcb_count and we need to wake it up. 1831 */ 1832 if (q->cancel_ubuf && !t3_ddp_ubuf_pending(toep)) { 1833 int state = so_state_get(so); 1834 1835 m_freem(m); 1836 if (__predict_true((state & SS_NOFDREF) == 0)) 1837 so_sorwakeup_locked(so); 1838 else 1839 sockbuf_unlock(rcv); 1840 1841 return; 1842 } 1843 1844 bsp = &q->buf_state[q->cur_buf]; 1845 hdr = cplhdr(m); 1846 tcb = (__be64 *)(hdr + 1); 1847 if (q->cur_buf == 0) { 1848 t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF0_OFFSET) / 2]); 1849 ddp_offset = t >> (32 + S_TCB_RX_DDP_BUF0_OFFSET); 1850 } else { 1851 t = be64toh(tcb[(31 - W_TCB_RX_DDP_BUF1_OFFSET) / 2]); 1852 ddp_offset = t >> S_TCB_RX_DDP_BUF1_OFFSET; 1853 } 1854 ddp_offset &= M_TCB_RX_DDP_BUF0_OFFSET; 1855 m->m_cur_offset = bsp->cur_offset; 1856 bsp->cur_offset = ddp_offset; 1857 m->m_len = m->m_pkthdr.len = ddp_offset - m->m_cur_offset; 1858 1859 CTR5(KTR_TOM, 1860 "tcb_rpl_as_ddp_complete: idx=%d seq=0x%x hwbuf=%u ddp_offset=%u cur_offset=%u", 1861 q->cur_buf, tp->rcv_nxt, q->cur_buf, ddp_offset, m->m_cur_offset); 1862 KASSERT(ddp_offset >= m->m_cur_offset, 1863 ("ddp_offset=%u less than cur_offset=%u", 1864 ddp_offset, m->m_cur_offset)); 1865 1866#if 0 1867{ 1868 unsigned int ddp_flags, rcv_nxt, rx_hdr_offset, buf_idx; 1869 1870 t = be64toh(tcb[(31 - W_TCB_RX_DDP_FLAGS) / 2]); 1871 ddp_flags = (t >> S_TCB_RX_DDP_FLAGS) & M_TCB_RX_DDP_FLAGS; 1872 1873 t = be64toh(tcb[(31 - W_TCB_RCV_NXT) / 2]); 1874 rcv_nxt = t >> S_TCB_RCV_NXT; 1875 rcv_nxt &= M_TCB_RCV_NXT; 1876 1877 t = be64toh(tcb[(31 - W_TCB_RX_HDR_OFFSET) / 2]); 1878 rx_hdr_offset = t >> (32 + S_TCB_RX_HDR_OFFSET); 1879 rx_hdr_offset &= M_TCB_RX_HDR_OFFSET; 1880 1881 T3_TRACE2(TIDTB(sk), 1882 "tcb_rpl_as_ddp_complete: DDP FLAGS 0x%x dma up to 0x%x", 1883 ddp_flags, rcv_nxt - rx_hdr_offset); 1884 T3_TRACE4(TB(q), 1885 "tcb_rpl_as_ddp_complete: rcvnxt 0x%x hwbuf %u cur_offset %u cancel %u", 1886 tp->rcv_nxt, q->cur_buf, bsp->cur_offset, q->cancel_ubuf); 1887 T3_TRACE3(TB(q), 1888 "tcb_rpl_as_ddp_complete: TCB rcvnxt 0x%x hwbuf 0x%x ddp_offset %u", 1889 rcv_nxt - rx_hdr_offset, ddp_flags, ddp_offset); 1890 T3_TRACE2(TB(q), 1891 "tcb_rpl_as_ddp_complete: flags0 0x%x flags1 0x%x", 1892 q->buf_state[0].flags, q->buf_state[1].flags); 1893 1894} 1895#endif 1896 if (__predict_false(so_no_receive(so) && m->m_pkthdr.len)) { 1897 handle_excess_rx(toep, m); 1898 return; 1899 } 1900 1901#ifdef T3_TRACE 1902 if ((int)m->m_pkthdr.len < 0) { 1903 t3_ddp_error(so, "tcb_rpl_as_ddp_complete: neg len"); 1904 } 1905#endif 1906 if (bsp->flags & DDP_BF_NOCOPY) { 1907#ifdef T3_TRACE 1908 T3_TRACE0(TB(q), 1909 "tcb_rpl_as_ddp_complete: CANCEL UBUF"); 1910 1911 if (!q->cancel_ubuf && !(sk->sk_shutdown & RCV_SHUTDOWN)) { 1912 printk("!cancel_ubuf"); 1913 t3_ddp_error(sk, "tcb_rpl_as_ddp_complete: !cancel_ubuf"); 1914 } 1915#endif 1916 m->m_ddp_flags = DDP_BF_PSH | DDP_BF_NOCOPY | 1; 1917 bsp->flags &= ~(DDP_BF_NOCOPY|DDP_BF_NODATA); 1918 q->cur_buf ^= 1; 1919 } else if (bsp->flags & DDP_BF_NOFLIP) { 1920 1921 m->m_ddp_flags = 1; /* always a kernel buffer */ 1922 1923 /* now HW buffer carries a user buffer */ 1924 bsp->flags &= ~DDP_BF_NOFLIP; 1925 bsp->flags |= DDP_BF_NOCOPY; 1926 1927 /* It is possible that the CPL_GET_TCB_RPL doesn't indicate 1928 * any new data in which case we're done. If in addition the 1929 * offset is 0, then there wasn't a completion for the kbuf 1930 * and we need to decrement the posted count. 1931 */ 1932 if (m->m_pkthdr.len == 0) { 1933 if (ddp_offset == 0) { 1934 q->kbuf_posted--; 1935 bsp->flags |= DDP_BF_NODATA; 1936 } 1937 sockbuf_unlock(rcv); 1938 m_free(m); 1939 return; 1940 } 1941 } else { 1942 sockbuf_unlock(rcv); 1943 1944 /* This reply is for a CPL_GET_TCB_RPL to cancel the UBUF DDP, 1945 * but it got here way late and nobody cares anymore. 1946 */ 1947 m_free(m); 1948 return; 1949 } 1950 1951 m->m_ddp_gl = (unsigned char *)bsp->gl; 1952 m->m_flags |= M_DDP; 1953 m->m_seq = tp->rcv_nxt; 1954 tp->rcv_nxt += m->m_pkthdr.len; 1955 tp->t_rcvtime = ticks; 1956 CTR3(KTR_TOM, "tcb_rpl_as_ddp_complete: seq 0x%x hwbuf %u m->m_pktlen %u", 1957 m->m_seq, q->cur_buf, m->m_pkthdr.len); 1958 if (m->m_pkthdr.len == 0) { 1959 q->user_ddp_pending = 0; 1960 m_free(m); 1961 } else 1962 SBAPPEND(rcv, m); 1963 1964 state = so_state_get(so); 1965 if (__predict_true((state & SS_NOFDREF) == 0)) 1966 so_sorwakeup_locked(so); 1967 else 1968 sockbuf_unlock(rcv); 1969} 1970 1971/* 1972 * Process a CPL_GET_TCB_RPL. These can also be generated by the DDP code, 1973 * in that case they are similar to DDP completions. 1974 */ 1975static int 1976do_get_tcb_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) 1977{ 1978 struct toepcb *toep = (struct toepcb *)ctx; 1979 1980 /* OK if socket doesn't exist */ 1981 if (toep == NULL) { 1982 printf("null toep in do_get_tcb_rpl\n"); 1983 return (CPL_RET_BUF_DONE); 1984 } 1985 1986 inp_wlock(toep->tp_tp->t_inpcb); 1987 tcb_rpl_as_ddp_complete(toep, m); 1988 inp_wunlock(toep->tp_tp->t_inpcb); 1989 1990 return (0); 1991} 1992 1993static void 1994handle_ddp_data(struct toepcb *toep, struct mbuf *m) 1995{ 1996 struct tcpcb *tp = toep->tp_tp; 1997 struct socket *so; 1998 struct ddp_state *q; 1999 struct ddp_buf_state *bsp; 2000 struct cpl_rx_data *hdr = cplhdr(m); 2001 unsigned int rcv_nxt = ntohl(hdr->seq); 2002 struct sockbuf *rcv; 2003 2004 if (tp->rcv_nxt == rcv_nxt) 2005 return; 2006 2007 inp_lock_assert(tp->t_inpcb); 2008 so = inp_inpcbtosocket(tp->t_inpcb); 2009 rcv = so_sockbuf_rcv(so); 2010 sockbuf_lock(rcv); 2011 2012 q = &toep->tp_ddp_state; 2013 bsp = &q->buf_state[q->cur_buf]; 2014 KASSERT(SEQ_GT(rcv_nxt, tp->rcv_nxt), ("tp->rcv_nxt=0x%08x decreased rcv_nxt=0x08%x", 2015 rcv_nxt, tp->rcv_nxt)); 2016 m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt; 2017 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2018 CTR3(KTR_TOM, "rcv_nxt=0x%x tp->rcv_nxt=0x%x len=%d", 2019 rcv_nxt, tp->rcv_nxt, m->m_pkthdr.len); 2020 2021#ifdef T3_TRACE 2022 if ((int)m->m_pkthdr.len < 0) { 2023 t3_ddp_error(so, "handle_ddp_data: neg len"); 2024 } 2025#endif 2026 m->m_ddp_gl = (unsigned char *)bsp->gl; 2027 m->m_flags |= M_DDP; 2028 m->m_cur_offset = bsp->cur_offset; 2029 m->m_ddp_flags = DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1; 2030 if (bsp->flags & DDP_BF_NOCOPY) 2031 bsp->flags &= ~DDP_BF_NOCOPY; 2032 2033 m->m_seq = tp->rcv_nxt; 2034 tp->rcv_nxt = rcv_nxt; 2035 bsp->cur_offset += m->m_pkthdr.len; 2036 if (!(bsp->flags & DDP_BF_NOFLIP)) 2037 q->cur_buf ^= 1; 2038 /* 2039 * For now, don't re-enable DDP after a connection fell out of DDP 2040 * mode. 2041 */ 2042 q->ubuf_ddp_ready = 0; 2043 sockbuf_unlock(rcv); 2044} 2045 2046/* 2047 * Process new data received for a connection. 2048 */ 2049static void 2050new_rx_data(struct toepcb *toep, struct mbuf *m) 2051{ 2052 struct cpl_rx_data *hdr = cplhdr(m); 2053 struct tcpcb *tp = toep->tp_tp; 2054 struct socket *so; 2055 struct sockbuf *rcv; 2056 int state; 2057 int len = be16toh(hdr->len); 2058 2059 inp_wlock(tp->t_inpcb); 2060 2061 so = inp_inpcbtosocket(tp->t_inpcb); 2062 2063 if (__predict_false(so_no_receive(so))) { 2064 handle_excess_rx(toep, m); 2065 inp_wunlock(tp->t_inpcb); 2066 TRACE_EXIT; 2067 return; 2068 } 2069 2070 if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) 2071 handle_ddp_data(toep, m); 2072 2073 m->m_seq = ntohl(hdr->seq); 2074 m->m_ulp_mode = 0; /* for iSCSI */ 2075 2076#if VALIDATE_SEQ 2077 if (__predict_false(m->m_seq != tp->rcv_nxt)) { 2078 log(LOG_ERR, 2079 "%s: TID %u: Bad sequence number %u, expected %u\n", 2080 toep->tp_toedev->name, toep->tp_tid, m->m_seq, 2081 tp->rcv_nxt); 2082 m_freem(m); 2083 inp_wunlock(tp->t_inpcb); 2084 return; 2085 } 2086#endif 2087 m_adj(m, sizeof(*hdr)); 2088 2089#ifdef URGENT_DATA_SUPPORTED 2090 /* 2091 * We don't handle urgent data yet 2092 */ 2093 if (__predict_false(hdr->urg)) 2094 handle_urg_ptr(so, tp->rcv_nxt + ntohs(hdr->urg)); 2095 if (__predict_false(tp->urg_data == TCP_URG_NOTYET && 2096 tp->urg_seq - tp->rcv_nxt < skb->len)) 2097 tp->urg_data = TCP_URG_VALID | skb->data[tp->urg_seq - 2098 tp->rcv_nxt]; 2099#endif 2100 if (__predict_false(hdr->dack_mode != toep->tp_delack_mode)) { 2101 toep->tp_delack_mode = hdr->dack_mode; 2102 toep->tp_delack_seq = tp->rcv_nxt; 2103 } 2104 CTR6(KTR_TOM, "appending mbuf=%p pktlen=%d m_len=%d len=%d rcv_nxt=0x%x enqueued_bytes=%d", 2105 m, m->m_pkthdr.len, m->m_len, len, tp->rcv_nxt, toep->tp_enqueued_bytes); 2106 2107 if (len < m->m_pkthdr.len) 2108 m->m_pkthdr.len = m->m_len = len; 2109 2110 tp->rcv_nxt += m->m_pkthdr.len; 2111 tp->t_rcvtime = ticks; 2112 toep->tp_enqueued_bytes += m->m_pkthdr.len; 2113 CTR2(KTR_TOM, 2114 "new_rx_data: seq 0x%x len %u", 2115 m->m_seq, m->m_pkthdr.len); 2116 inp_wunlock(tp->t_inpcb); 2117 rcv = so_sockbuf_rcv(so); 2118 sockbuf_lock(rcv); 2119#if 0 2120 if (sb_notify(rcv)) 2121 DPRINTF("rx_data so=%p flags=0x%x len=%d\n", so, rcv->sb_flags, m->m_pkthdr.len); 2122#endif 2123 SBAPPEND(rcv, m); 2124 2125#ifdef notyet 2126 /* 2127 * We're giving too many credits to the card - but disable this check so we can keep on moving :-| 2128 * 2129 */ 2130 KASSERT(rcv->sb_cc < (rcv->sb_mbmax << 1), 2131 2132 ("so=%p, data contents exceed mbmax, sb_cc=%d sb_mbmax=%d", 2133 so, rcv->sb_cc, rcv->sb_mbmax)); 2134#endif 2135 2136 2137 CTR2(KTR_TOM, "sb_cc=%d sb_mbcnt=%d", 2138 rcv->sb_cc, rcv->sb_mbcnt); 2139 2140 state = so_state_get(so); 2141 if (__predict_true((state & SS_NOFDREF) == 0)) 2142 so_sorwakeup_locked(so); 2143 else 2144 sockbuf_unlock(rcv); 2145} 2146 2147/* 2148 * Handler for RX_DATA CPL messages. 2149 */ 2150static int 2151do_rx_data(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2152{ 2153 struct toepcb *toep = (struct toepcb *)ctx; 2154 2155 DPRINTF("rx_data len=%d\n", m->m_pkthdr.len); 2156 2157 new_rx_data(toep, m); 2158 2159 return (0); 2160} 2161 2162static void 2163new_rx_data_ddp(struct toepcb *toep, struct mbuf *m) 2164{ 2165 struct tcpcb *tp; 2166 struct ddp_state *q; 2167 struct ddp_buf_state *bsp; 2168 struct cpl_rx_data_ddp *hdr; 2169 struct socket *so; 2170 unsigned int ddp_len, rcv_nxt, ddp_report, end_offset, buf_idx; 2171 int nomoredata = 0; 2172 unsigned int delack_mode; 2173 struct sockbuf *rcv; 2174 2175 tp = toep->tp_tp; 2176 inp_wlock(tp->t_inpcb); 2177 so = inp_inpcbtosocket(tp->t_inpcb); 2178 2179 if (__predict_false(so_no_receive(so))) { 2180 2181 handle_excess_rx(toep, m); 2182 inp_wunlock(tp->t_inpcb); 2183 return; 2184 } 2185 2186 q = &toep->tp_ddp_state; 2187 hdr = cplhdr(m); 2188 ddp_report = ntohl(hdr->u.ddp_report); 2189 buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1; 2190 bsp = &q->buf_state[buf_idx]; 2191 2192 CTR4(KTR_TOM, 2193 "new_rx_data_ddp: tp->rcv_nxt 0x%x cur_offset %u " 2194 "hdr seq 0x%x len %u", 2195 tp->rcv_nxt, bsp->cur_offset, ntohl(hdr->seq), 2196 ntohs(hdr->len)); 2197 CTR3(KTR_TOM, 2198 "new_rx_data_ddp: offset %u ddp_report 0x%x buf_idx=%d", 2199 G_DDP_OFFSET(ddp_report), ddp_report, buf_idx); 2200 2201 ddp_len = ntohs(hdr->len); 2202 rcv_nxt = ntohl(hdr->seq) + ddp_len; 2203 2204 delack_mode = G_DDP_DACK_MODE(ddp_report); 2205 if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) { 2206 toep->tp_delack_mode = delack_mode; 2207 toep->tp_delack_seq = tp->rcv_nxt; 2208 } 2209 2210 m->m_seq = tp->rcv_nxt; 2211 tp->rcv_nxt = rcv_nxt; 2212 2213 tp->t_rcvtime = ticks; 2214 /* 2215 * Store the length in m->m_len. We are changing the meaning of 2216 * m->m_len here, we need to be very careful that nothing from now on 2217 * interprets ->len of this packet the usual way. 2218 */ 2219 m->m_len = m->m_pkthdr.len = rcv_nxt - m->m_seq; 2220 inp_wunlock(tp->t_inpcb); 2221 CTR3(KTR_TOM, 2222 "new_rx_data_ddp: m_len=%u rcv_next 0x%08x rcv_nxt_prev=0x%08x ", 2223 m->m_len, rcv_nxt, m->m_seq); 2224 /* 2225 * Figure out where the new data was placed in the buffer and store it 2226 * in when. Assumes the buffer offset starts at 0, consumer needs to 2227 * account for page pod's pg_offset. 2228 */ 2229 end_offset = G_DDP_OFFSET(ddp_report) + ddp_len; 2230 m->m_cur_offset = end_offset - m->m_pkthdr.len; 2231 2232 rcv = so_sockbuf_rcv(so); 2233 sockbuf_lock(rcv); 2234 2235 m->m_ddp_gl = (unsigned char *)bsp->gl; 2236 m->m_flags |= M_DDP; 2237 bsp->cur_offset = end_offset; 2238 toep->tp_enqueued_bytes += m->m_pkthdr.len; 2239 2240 /* 2241 * Length is only meaningful for kbuf 2242 */ 2243 if (!(bsp->flags & DDP_BF_NOCOPY)) 2244 KASSERT(m->m_len <= bsp->gl->dgl_length, 2245 ("length received exceeds ddp pages: len=%d dgl_length=%d", 2246 m->m_len, bsp->gl->dgl_length)); 2247 2248 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2249 KASSERT(m->m_next == NULL, ("m_len=%p", m->m_next)); 2250 /* 2251 * Bit 0 of flags stores whether the DDP buffer is completed. 2252 * Note that other parts of the code depend on this being in bit 0. 2253 */ 2254 if ((bsp->flags & DDP_BF_NOINVAL) && end_offset != bsp->gl->dgl_length) { 2255 panic("spurious ddp completion"); 2256 } else { 2257 m->m_ddp_flags = !!(ddp_report & F_DDP_BUF_COMPLETE); 2258 if (m->m_ddp_flags && !(bsp->flags & DDP_BF_NOFLIP)) 2259 q->cur_buf ^= 1; /* flip buffers */ 2260 } 2261 2262 if (bsp->flags & DDP_BF_NOCOPY) { 2263 m->m_ddp_flags |= (bsp->flags & DDP_BF_NOCOPY); 2264 bsp->flags &= ~DDP_BF_NOCOPY; 2265 } 2266 2267 if (ddp_report & F_DDP_PSH) 2268 m->m_ddp_flags |= DDP_BF_PSH; 2269 if (nomoredata) 2270 m->m_ddp_flags |= DDP_BF_NODATA; 2271 2272#ifdef notyet 2273 skb_reset_transport_header(skb); 2274 tcp_hdr(skb)->fin = 0; /* changes original hdr->ddp_report */ 2275#endif 2276 SBAPPEND(rcv, m); 2277 2278 if ((so_state_get(so) & SS_NOFDREF) == 0 && ((ddp_report & F_DDP_PSH) || 2279 (((m->m_ddp_flags & (DDP_BF_NOCOPY|1)) == (DDP_BF_NOCOPY|1)) 2280 || !(m->m_ddp_flags & DDP_BF_NOCOPY)))) 2281 so_sorwakeup_locked(so); 2282 else 2283 sockbuf_unlock(rcv); 2284} 2285 2286#define DDP_ERR (F_DDP_PPOD_MISMATCH | F_DDP_LLIMIT_ERR | F_DDP_ULIMIT_ERR |\ 2287 F_DDP_PPOD_PARITY_ERR | F_DDP_PADDING_ERR | F_DDP_OFFSET_ERR |\ 2288 F_DDP_INVALID_TAG | F_DDP_COLOR_ERR | F_DDP_TID_MISMATCH |\ 2289 F_DDP_INVALID_PPOD) 2290 2291/* 2292 * Handler for RX_DATA_DDP CPL messages. 2293 */ 2294static int 2295do_rx_data_ddp(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2296{ 2297 struct toepcb *toep = ctx; 2298 const struct cpl_rx_data_ddp *hdr = cplhdr(m); 2299 2300 VALIDATE_SOCK(so); 2301 2302 if (__predict_false(ntohl(hdr->ddpvld_status) & DDP_ERR)) { 2303 log(LOG_ERR, "RX_DATA_DDP for TID %u reported error 0x%x\n", 2304 GET_TID(hdr), G_DDP_VALID(ntohl(hdr->ddpvld_status))); 2305 return (CPL_RET_BUF_DONE); 2306 } 2307#if 0 2308 skb->h.th = tcphdr_skb->h.th; 2309#endif 2310 new_rx_data_ddp(toep, m); 2311 return (0); 2312} 2313 2314static void 2315process_ddp_complete(struct toepcb *toep, struct mbuf *m) 2316{ 2317 struct tcpcb *tp = toep->tp_tp; 2318 struct socket *so; 2319 struct ddp_state *q; 2320 struct ddp_buf_state *bsp; 2321 struct cpl_rx_ddp_complete *hdr; 2322 unsigned int ddp_report, buf_idx, when, delack_mode; 2323 int nomoredata = 0; 2324 struct sockbuf *rcv; 2325 2326 inp_wlock(tp->t_inpcb); 2327 so = inp_inpcbtosocket(tp->t_inpcb); 2328 2329 if (__predict_false(so_no_receive(so))) { 2330 struct inpcb *inp = so_sotoinpcb(so); 2331 2332 handle_excess_rx(toep, m); 2333 inp_wunlock(inp); 2334 return; 2335 } 2336 q = &toep->tp_ddp_state; 2337 hdr = cplhdr(m); 2338 ddp_report = ntohl(hdr->ddp_report); 2339 buf_idx = (ddp_report >> S_DDP_BUF_IDX) & 1; 2340 m->m_pkthdr.csum_data = tp->rcv_nxt; 2341 2342 rcv = so_sockbuf_rcv(so); 2343 sockbuf_lock(rcv); 2344 2345 bsp = &q->buf_state[buf_idx]; 2346 when = bsp->cur_offset; 2347 m->m_len = m->m_pkthdr.len = G_DDP_OFFSET(ddp_report) - when; 2348 tp->rcv_nxt += m->m_len; 2349 tp->t_rcvtime = ticks; 2350 2351 delack_mode = G_DDP_DACK_MODE(ddp_report); 2352 if (__predict_false(G_DDP_DACK_MODE(ddp_report) != toep->tp_delack_mode)) { 2353 toep->tp_delack_mode = delack_mode; 2354 toep->tp_delack_seq = tp->rcv_nxt; 2355 } 2356#ifdef notyet 2357 skb_reset_transport_header(skb); 2358 tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */ 2359#endif 2360 inp_wunlock(tp->t_inpcb); 2361 2362 KASSERT(m->m_len >= 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2363 CTR5(KTR_TOM, 2364 "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u " 2365 "ddp_report 0x%x offset %u, len %u", 2366 tp->rcv_nxt, bsp->cur_offset, ddp_report, 2367 G_DDP_OFFSET(ddp_report), m->m_len); 2368 2369 m->m_cur_offset = bsp->cur_offset; 2370 bsp->cur_offset += m->m_len; 2371 2372 if (!(bsp->flags & DDP_BF_NOFLIP)) { 2373 q->cur_buf ^= 1; /* flip buffers */ 2374 if (G_DDP_OFFSET(ddp_report) < q->kbuf[0]->dgl_length) 2375 nomoredata=1; 2376 } 2377 2378 CTR4(KTR_TOM, 2379 "process_ddp_complete: tp->rcv_nxt 0x%x cur_offset %u " 2380 "ddp_report %u offset %u", 2381 tp->rcv_nxt, bsp->cur_offset, ddp_report, 2382 G_DDP_OFFSET(ddp_report)); 2383 2384 m->m_ddp_gl = (unsigned char *)bsp->gl; 2385 m->m_flags |= M_DDP; 2386 m->m_ddp_flags = (bsp->flags & DDP_BF_NOCOPY) | 1; 2387 if (bsp->flags & DDP_BF_NOCOPY) 2388 bsp->flags &= ~DDP_BF_NOCOPY; 2389 if (nomoredata) 2390 m->m_ddp_flags |= DDP_BF_NODATA; 2391 2392 SBAPPEND(rcv, m); 2393 if ((so_state_get(so) & SS_NOFDREF) == 0) 2394 so_sorwakeup_locked(so); 2395 else 2396 sockbuf_unlock(rcv); 2397} 2398 2399/* 2400 * Handler for RX_DDP_COMPLETE CPL messages. 2401 */ 2402static int 2403do_rx_ddp_complete(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2404{ 2405 struct toepcb *toep = ctx; 2406 2407 VALIDATE_SOCK(so); 2408#if 0 2409 skb->h.th = tcphdr_skb->h.th; 2410#endif 2411 process_ddp_complete(toep, m); 2412 return (0); 2413} 2414 2415/* 2416 * Move a socket to TIME_WAIT state. We need to make some adjustments to the 2417 * socket state before calling tcp_time_wait to comply with its expectations. 2418 */ 2419static void 2420enter_timewait(struct tcpcb *tp) 2421{ 2422 /* 2423 * Bump rcv_nxt for the peer FIN. We don't do this at the time we 2424 * process peer_close because we don't want to carry the peer FIN in 2425 * the socket's receive queue and if we increment rcv_nxt without 2426 * having the FIN in the receive queue we'll confuse facilities such 2427 * as SIOCINQ. 2428 */ 2429 inp_wlock(tp->t_inpcb); 2430 tp->rcv_nxt++; 2431 2432 tp->ts_recent_age = 0; /* defeat recycling */ 2433 tp->t_srtt = 0; /* defeat tcp_update_metrics */ 2434 inp_wunlock(tp->t_inpcb); 2435 tcp_offload_twstart(tp); 2436} 2437 2438/* 2439 * For TCP DDP a PEER_CLOSE may also be an implicit RX_DDP_COMPLETE. This 2440 * function deals with the data that may be reported along with the FIN. 2441 * Returns -1 if no further processing of the PEER_CLOSE is needed, >= 0 to 2442 * perform normal FIN-related processing. In the latter case 1 indicates that 2443 * there was an implicit RX_DDP_COMPLETE and the skb should not be freed, 0 the 2444 * skb can be freed. 2445 */ 2446static int 2447handle_peer_close_data(struct socket *so, struct mbuf *m) 2448{ 2449 struct tcpcb *tp = so_sototcpcb(so); 2450 struct toepcb *toep = tp->t_toe; 2451 struct ddp_state *q; 2452 struct ddp_buf_state *bsp; 2453 struct cpl_peer_close *req = cplhdr(m); 2454 unsigned int rcv_nxt = ntohl(req->rcv_nxt) - 1; /* exclude FIN */ 2455 struct sockbuf *rcv; 2456 2457 if (tp->rcv_nxt == rcv_nxt) /* no data */ 2458 return (0); 2459 2460 CTR0(KTR_TOM, "handle_peer_close_data"); 2461 if (__predict_false(so_no_receive(so))) { 2462 handle_excess_rx(toep, m); 2463 2464 /* 2465 * Although we discard the data we want to process the FIN so 2466 * that PEER_CLOSE + data behaves the same as RX_DATA_DDP + 2467 * PEER_CLOSE without data. In particular this PEER_CLOSE 2468 * may be what will close the connection. We return 1 because 2469 * handle_excess_rx() already freed the packet. 2470 */ 2471 return (1); 2472 } 2473 2474 inp_lock_assert(tp->t_inpcb); 2475 q = &toep->tp_ddp_state; 2476 rcv = so_sockbuf_rcv(so); 2477 sockbuf_lock(rcv); 2478 2479 bsp = &q->buf_state[q->cur_buf]; 2480 m->m_len = m->m_pkthdr.len = rcv_nxt - tp->rcv_nxt; 2481 KASSERT(m->m_len > 0, ("%s m_len=%d", __FUNCTION__, m->m_len)); 2482 m->m_ddp_gl = (unsigned char *)bsp->gl; 2483 m->m_flags |= M_DDP; 2484 m->m_cur_offset = bsp->cur_offset; 2485 m->m_ddp_flags = 2486 DDP_BF_PSH | (bsp->flags & DDP_BF_NOCOPY) | 1; 2487 m->m_seq = tp->rcv_nxt; 2488 tp->rcv_nxt = rcv_nxt; 2489 bsp->cur_offset += m->m_pkthdr.len; 2490 if (!(bsp->flags & DDP_BF_NOFLIP)) 2491 q->cur_buf ^= 1; 2492#ifdef notyet 2493 skb_reset_transport_header(skb); 2494 tcp_hdr(skb)->fin = 0; /* changes valid memory past CPL */ 2495#endif 2496 tp->t_rcvtime = ticks; 2497 SBAPPEND(rcv, m); 2498 if (__predict_true((so_state_get(so) & SS_NOFDREF) == 0)) 2499 so_sorwakeup_locked(so); 2500 else 2501 sockbuf_unlock(rcv); 2502 2503 return (1); 2504} 2505 2506/* 2507 * Handle a peer FIN. 2508 */ 2509static void 2510do_peer_fin(struct toepcb *toep, struct mbuf *m) 2511{ 2512 struct socket *so; 2513 struct tcpcb *tp = toep->tp_tp; 2514 int keep, action; 2515 2516 action = keep = 0; 2517 CTR1(KTR_TOM, "do_peer_fin state=%d", tp->t_state); 2518 if (!is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) { 2519 printf("abort_pending set\n"); 2520 2521 goto out; 2522 } 2523 inp_wlock(tp->t_inpcb); 2524 so = inp_inpcbtosocket(toep->tp_tp->t_inpcb); 2525 if (toep->tp_ulp_mode == ULP_MODE_TCPDDP) { 2526 keep = handle_peer_close_data(so, m); 2527 if (keep < 0) { 2528 inp_wunlock(tp->t_inpcb); 2529 return; 2530 } 2531 } 2532 if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { 2533 CTR1(KTR_TOM, 2534 "waking up waiters for cantrcvmore on %p ", so); 2535 socantrcvmore(so); 2536 2537 /* 2538 * If connection is half-synchronized 2539 * (ie NEEDSYN flag on) then delay ACK, 2540 * so it may be piggybacked when SYN is sent. 2541 * Otherwise, since we received a FIN then no 2542 * more input can be expected, send ACK now. 2543 */ 2544 if (tp->t_flags & TF_NEEDSYN) 2545 tp->t_flags |= TF_DELACK; 2546 else 2547 tp->t_flags |= TF_ACKNOW; 2548 tp->rcv_nxt++; 2549 } 2550 2551 switch (tp->t_state) { 2552 case TCPS_SYN_RECEIVED: 2553 tp->t_starttime = ticks; 2554 /* FALLTHROUGH */ 2555 case TCPS_ESTABLISHED: 2556 tp->t_state = TCPS_CLOSE_WAIT; 2557 break; 2558 case TCPS_FIN_WAIT_1: 2559 tp->t_state = TCPS_CLOSING; 2560 break; 2561 case TCPS_FIN_WAIT_2: 2562 /* 2563 * If we've sent an abort_req we must have sent it too late, 2564 * HW will send us a reply telling us so, and this peer_close 2565 * is really the last message for this connection and needs to 2566 * be treated as an abort_rpl, i.e., transition the connection 2567 * to TCP_CLOSE (note that the host stack does this at the 2568 * time of generating the RST but we must wait for HW). 2569 * Otherwise we enter TIME_WAIT. 2570 */ 2571 t3_release_offload_resources(toep); 2572 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 2573 action = TCP_CLOSE; 2574 } else { 2575 action = TCP_TIMEWAIT; 2576 } 2577 break; 2578 default: 2579 log(LOG_ERR, 2580 "%s: TID %u received PEER_CLOSE in bad state %d\n", 2581 toep->tp_toedev->tod_name, toep->tp_tid, tp->t_state); 2582 } 2583 inp_wunlock(tp->t_inpcb); 2584 2585 if (action == TCP_TIMEWAIT) { 2586 enter_timewait(tp); 2587 } else if (action == TCP_DROP) { 2588 tcp_offload_drop(tp, 0); 2589 } else if (action == TCP_CLOSE) { 2590 tcp_offload_close(tp); 2591 } 2592 2593#ifdef notyet 2594 /* Do not send POLL_HUP for half duplex close. */ 2595 if ((sk->sk_shutdown & SEND_SHUTDOWN) || 2596 sk->sk_state == TCP_CLOSE) 2597 sk_wake_async(so, 1, POLL_HUP); 2598 else 2599 sk_wake_async(so, 1, POLL_IN); 2600#endif 2601 2602out: 2603 if (!keep) 2604 m_free(m); 2605} 2606 2607/* 2608 * Handler for PEER_CLOSE CPL messages. 2609 */ 2610static int 2611do_peer_close(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2612{ 2613 struct toepcb *toep = (struct toepcb *)ctx; 2614 2615 VALIDATE_SOCK(so); 2616 2617 do_peer_fin(toep, m); 2618 return (0); 2619} 2620 2621static void 2622process_close_con_rpl(struct toepcb *toep, struct mbuf *m) 2623{ 2624 struct cpl_close_con_rpl *rpl = cplhdr(m); 2625 struct tcpcb *tp = toep->tp_tp; 2626 struct socket *so; 2627 int action = 0; 2628 struct sockbuf *rcv; 2629 2630 inp_wlock(tp->t_inpcb); 2631 so = inp_inpcbtosocket(tp->t_inpcb); 2632 2633 tp->snd_una = ntohl(rpl->snd_nxt) - 1; /* exclude FIN */ 2634 2635 if (!is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_ABORT_RPL_PENDING)) { 2636 inp_wunlock(tp->t_inpcb); 2637 goto out; 2638 } 2639 2640 CTR3(KTR_TOM, "process_close_con_rpl(%p) state=%d dead=%d", toep, 2641 tp->t_state, !!(so_state_get(so) & SS_NOFDREF)); 2642 2643 switch (tp->t_state) { 2644 case TCPS_CLOSING: /* see FIN_WAIT2 case in do_peer_fin */ 2645 t3_release_offload_resources(toep); 2646 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 2647 action = TCP_CLOSE; 2648 2649 } else { 2650 action = TCP_TIMEWAIT; 2651 } 2652 break; 2653 case TCPS_LAST_ACK: 2654 /* 2655 * In this state we don't care about pending abort_rpl. 2656 * If we've sent abort_req it was post-close and was sent too 2657 * late, this close_con_rpl is the actual last message. 2658 */ 2659 t3_release_offload_resources(toep); 2660 action = TCP_CLOSE; 2661 break; 2662 case TCPS_FIN_WAIT_1: 2663 /* 2664 * If we can't receive any more 2665 * data, then closing user can proceed. 2666 * Starting the timer is contrary to the 2667 * specification, but if we don't get a FIN 2668 * we'll hang forever. 2669 * 2670 * XXXjl: 2671 * we should release the tp also, and use a 2672 * compressed state. 2673 */ 2674 if (so) 2675 rcv = so_sockbuf_rcv(so); 2676 else 2677 break; 2678 2679 if (rcv->sb_state & SBS_CANTRCVMORE) { 2680 int timeout; 2681 2682 if (so) 2683 soisdisconnected(so); 2684 timeout = (tcp_fast_finwait2_recycle) ? 2685 tcp_finwait2_timeout : tcp_maxidle; 2686 tcp_timer_activate(tp, TT_2MSL, timeout); 2687 } 2688 tp->t_state = TCPS_FIN_WAIT_2; 2689 if ((so_options_get(so) & SO_LINGER) && so_linger_get(so) == 0 && 2690 (toep->tp_flags & TP_ABORT_SHUTDOWN) == 0) { 2691 action = TCP_DROP; 2692 } 2693 2694 break; 2695 default: 2696 log(LOG_ERR, 2697 "%s: TID %u received CLOSE_CON_RPL in bad state %d\n", 2698 toep->tp_toedev->tod_name, toep->tp_tid, 2699 tp->t_state); 2700 } 2701 inp_wunlock(tp->t_inpcb); 2702 2703 2704 if (action == TCP_TIMEWAIT) { 2705 enter_timewait(tp); 2706 } else if (action == TCP_DROP) { 2707 tcp_offload_drop(tp, 0); 2708 } else if (action == TCP_CLOSE) { 2709 tcp_offload_close(tp); 2710 } 2711out: 2712 m_freem(m); 2713} 2714 2715/* 2716 * Handler for CLOSE_CON_RPL CPL messages. 2717 */ 2718static int 2719do_close_con_rpl(struct t3cdev *cdev, struct mbuf *m, 2720 void *ctx) 2721{ 2722 struct toepcb *toep = (struct toepcb *)ctx; 2723 2724 process_close_con_rpl(toep, m); 2725 return (0); 2726} 2727 2728/* 2729 * Process abort replies. We only process these messages if we anticipate 2730 * them as the coordination between SW and HW in this area is somewhat lacking 2731 * and sometimes we get ABORT_RPLs after we are done with the connection that 2732 * originated the ABORT_REQ. 2733 */ 2734static void 2735process_abort_rpl(struct toepcb *toep, struct mbuf *m) 2736{ 2737 struct tcpcb *tp = toep->tp_tp; 2738 struct socket *so; 2739 int needclose = 0; 2740 2741#ifdef T3_TRACE 2742 T3_TRACE1(TIDTB(sk), 2743 "process_abort_rpl: GTS rpl pending %d", 2744 sock_flag(sk, ABORT_RPL_PENDING)); 2745#endif 2746 2747 inp_wlock(tp->t_inpcb); 2748 so = inp_inpcbtosocket(tp->t_inpcb); 2749 2750 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 2751 /* 2752 * XXX panic on tcpdrop 2753 */ 2754 if (!(toep->tp_flags & TP_ABORT_RPL_RCVD) && !is_t3a(toep->tp_toedev)) 2755 toep->tp_flags |= TP_ABORT_RPL_RCVD; 2756 else { 2757 toep->tp_flags &= ~(TP_ABORT_RPL_RCVD|TP_ABORT_RPL_PENDING); 2758 if (!(toep->tp_flags & TP_ABORT_REQ_RCVD) || 2759 !is_t3a(toep->tp_toedev)) { 2760 if (toep->tp_flags & TP_ABORT_REQ_RCVD) 2761 panic("TP_ABORT_REQ_RCVD set"); 2762 t3_release_offload_resources(toep); 2763 needclose = 1; 2764 } 2765 } 2766 } 2767 inp_wunlock(tp->t_inpcb); 2768 2769 if (needclose) 2770 tcp_offload_close(tp); 2771 2772 m_free(m); 2773} 2774 2775/* 2776 * Handle an ABORT_RPL_RSS CPL message. 2777 */ 2778static int 2779do_abort_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx) 2780{ 2781 struct cpl_abort_rpl_rss *rpl = cplhdr(m); 2782 struct toepcb *toep; 2783 2784 /* 2785 * Ignore replies to post-close aborts indicating that the abort was 2786 * requested too late. These connections are terminated when we get 2787 * PEER_CLOSE or CLOSE_CON_RPL and by the time the abort_rpl_rss 2788 * arrives the TID is either no longer used or it has been recycled. 2789 */ 2790 if (rpl->status == CPL_ERR_ABORT_FAILED) { 2791discard: 2792 m_free(m); 2793 return (0); 2794 } 2795 2796 toep = (struct toepcb *)ctx; 2797 2798 /* 2799 * Sometimes we've already closed the socket, e.g., a post-close 2800 * abort races with ABORT_REQ_RSS, the latter frees the socket 2801 * expecting the ABORT_REQ will fail with CPL_ERR_ABORT_FAILED, 2802 * but FW turns the ABORT_REQ into a regular one and so we get 2803 * ABORT_RPL_RSS with status 0 and no socket. Only on T3A. 2804 */ 2805 if (!toep) 2806 goto discard; 2807 2808 if (toep->tp_tp == NULL) { 2809 log(LOG_NOTICE, "removing tid for abort\n"); 2810 cxgb_remove_tid(cdev, toep, toep->tp_tid); 2811 if (toep->tp_l2t) 2812 l2t_release(L2DATA(cdev), toep->tp_l2t); 2813 2814 toepcb_release(toep); 2815 goto discard; 2816 } 2817 2818 log(LOG_NOTICE, "toep=%p\n", toep); 2819 log(LOG_NOTICE, "tp=%p\n", toep->tp_tp); 2820 2821 toepcb_hold(toep); 2822 process_abort_rpl(toep, m); 2823 toepcb_release(toep); 2824 return (0); 2825} 2826 2827/* 2828 * Convert the status code of an ABORT_REQ into a FreeBSD error code. Also 2829 * indicate whether RST should be sent in response. 2830 */ 2831static int 2832abort_status_to_errno(struct socket *so, int abort_reason, int *need_rst) 2833{ 2834 struct tcpcb *tp = so_sototcpcb(so); 2835 2836 switch (abort_reason) { 2837 case CPL_ERR_BAD_SYN: 2838#if 0 2839 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONSYN); // fall through 2840#endif 2841 case CPL_ERR_CONN_RESET: 2842 // XXX need to handle SYN_RECV due to crossed SYNs 2843 return (tp->t_state == TCPS_CLOSE_WAIT ? EPIPE : ECONNRESET); 2844 case CPL_ERR_XMIT_TIMEDOUT: 2845 case CPL_ERR_PERSIST_TIMEDOUT: 2846 case CPL_ERR_FINWAIT2_TIMEDOUT: 2847 case CPL_ERR_KEEPALIVE_TIMEDOUT: 2848#if 0 2849 NET_INC_STATS_BH(LINUX_MIB_TCPABORTONTIMEOUT); 2850#endif 2851 return (ETIMEDOUT); 2852 default: 2853 return (EIO); 2854 } 2855} 2856 2857static inline void 2858set_abort_rpl_wr(struct mbuf *m, unsigned int tid, int cmd) 2859{ 2860 struct cpl_abort_rpl *rpl = cplhdr(m); 2861 2862 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_OFLD_HOST_ABORT_CON_RPL)); 2863 rpl->wr.wr_lo = htonl(V_WR_TID(tid)); 2864 m->m_len = m->m_pkthdr.len = sizeof(*rpl); 2865 2866 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_ABORT_RPL, tid)); 2867 rpl->cmd = cmd; 2868} 2869 2870static void 2871send_deferred_abort_rpl(struct toedev *tdev, struct mbuf *m) 2872{ 2873 struct mbuf *reply_mbuf; 2874 struct cpl_abort_req_rss *req = cplhdr(m); 2875 2876 reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_abort_rpl)); 2877 m_set_priority(m, CPL_PRIORITY_DATA); 2878 m->m_len = m->m_pkthdr.len = sizeof(struct cpl_abort_rpl); 2879 set_abort_rpl_wr(reply_mbuf, GET_TID(req), req->status); 2880 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf); 2881 m_free(m); 2882} 2883 2884/* 2885 * Returns whether an ABORT_REQ_RSS message is a negative advice. 2886 */ 2887static inline int 2888is_neg_adv_abort(unsigned int status) 2889{ 2890 return status == CPL_ERR_RTX_NEG_ADVICE || 2891 status == CPL_ERR_PERSIST_NEG_ADVICE; 2892} 2893 2894static void 2895send_abort_rpl(struct mbuf *m, struct toedev *tdev, int rst_status) 2896{ 2897 struct mbuf *reply_mbuf; 2898 struct cpl_abort_req_rss *req = cplhdr(m); 2899 2900 reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA); 2901 2902 if (!reply_mbuf) { 2903 /* Defer the reply. Stick rst_status into req->cmd. */ 2904 req->status = rst_status; 2905 t3_defer_reply(m, tdev, send_deferred_abort_rpl); 2906 return; 2907 } 2908 2909 m_set_priority(reply_mbuf, CPL_PRIORITY_DATA); 2910 set_abort_rpl_wr(reply_mbuf, GET_TID(req), rst_status); 2911 m_free(m); 2912 2913 /* 2914 * XXX need to sync with ARP as for SYN_RECV connections we can send 2915 * these messages while ARP is pending. For other connection states 2916 * it's not a problem. 2917 */ 2918 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf); 2919} 2920 2921#ifdef notyet 2922static void 2923cleanup_syn_rcv_conn(struct socket *child, struct socket *parent) 2924{ 2925 CXGB_UNIMPLEMENTED(); 2926#ifdef notyet 2927 struct request_sock *req = child->sk_user_data; 2928 2929 inet_csk_reqsk_queue_removed(parent, req); 2930 synq_remove(tcp_sk(child)); 2931 __reqsk_free(req); 2932 child->sk_user_data = NULL; 2933#endif 2934} 2935 2936 2937/* 2938 * Performs the actual work to abort a SYN_RECV connection. 2939 */ 2940static void 2941do_abort_syn_rcv(struct socket *child, struct socket *parent) 2942{ 2943 struct tcpcb *parenttp = so_sototcpcb(parent); 2944 struct tcpcb *childtp = so_sototcpcb(child); 2945 2946 /* 2947 * If the server is still open we clean up the child connection, 2948 * otherwise the server already did the clean up as it was purging 2949 * its SYN queue and the skb was just sitting in its backlog. 2950 */ 2951 if (__predict_false(parenttp->t_state == TCPS_LISTEN)) { 2952 cleanup_syn_rcv_conn(child, parent); 2953 inp_wlock(childtp->t_inpcb); 2954 t3_release_offload_resources(childtp->t_toe); 2955 inp_wunlock(childtp->t_inpcb); 2956 tcp_offload_close(childtp); 2957 } 2958} 2959#endif 2960 2961/* 2962 * Handle abort requests for a SYN_RECV connection. These need extra work 2963 * because the socket is on its parent's SYN queue. 2964 */ 2965static int 2966abort_syn_rcv(struct socket *so, struct mbuf *m) 2967{ 2968 CXGB_UNIMPLEMENTED(); 2969#ifdef notyet 2970 struct socket *parent; 2971 struct toedev *tdev = toep->tp_toedev; 2972 struct t3cdev *cdev = TOM_DATA(tdev)->cdev; 2973 struct socket *oreq = so->so_incomp; 2974 struct t3c_tid_entry *t3c_stid; 2975 struct tid_info *t; 2976 2977 if (!oreq) 2978 return -1; /* somehow we are not on the SYN queue */ 2979 2980 t = &(T3C_DATA(cdev))->tid_maps; 2981 t3c_stid = lookup_stid(t, oreq->ts_recent); 2982 parent = ((struct listen_ctx *)t3c_stid->ctx)->lso; 2983 2984 so_lock(parent); 2985 do_abort_syn_rcv(so, parent); 2986 send_abort_rpl(m, tdev, CPL_ABORT_NO_RST); 2987 so_unlock(parent); 2988#endif 2989 return (0); 2990} 2991 2992/* 2993 * Process abort requests. If we are waiting for an ABORT_RPL we ignore this 2994 * request except that we need to reply to it. 2995 */ 2996static void 2997process_abort_req(struct toepcb *toep, struct mbuf *m, struct toedev *tdev) 2998{ 2999 int rst_status = CPL_ABORT_NO_RST; 3000 const struct cpl_abort_req_rss *req = cplhdr(m); 3001 struct tcpcb *tp = toep->tp_tp; 3002 struct socket *so; 3003 int needclose = 0; 3004 3005 inp_wlock(tp->t_inpcb); 3006 so = inp_inpcbtosocket(toep->tp_tp->t_inpcb); 3007 if ((toep->tp_flags & TP_ABORT_REQ_RCVD) == 0) { 3008 toep->tp_flags |= (TP_ABORT_REQ_RCVD|TP_ABORT_SHUTDOWN); 3009 m_free(m); 3010 goto skip; 3011 } 3012 3013 toep->tp_flags &= ~TP_ABORT_REQ_RCVD; 3014 /* 3015 * Three cases to consider: 3016 * a) We haven't sent an abort_req; close the connection. 3017 * b) We have sent a post-close abort_req that will get to TP too late 3018 * and will generate a CPL_ERR_ABORT_FAILED reply. The reply will 3019 * be ignored and the connection should be closed now. 3020 * c) We have sent a regular abort_req that will get to TP too late. 3021 * That will generate an abort_rpl with status 0, wait for it. 3022 */ 3023 if (((toep->tp_flags & TP_ABORT_RPL_PENDING) == 0) || 3024 (is_t3a(toep->tp_toedev) && (toep->tp_flags & TP_CLOSE_CON_REQUESTED))) { 3025 int error; 3026 3027 error = abort_status_to_errno(so, req->status, 3028 &rst_status); 3029 so_error_set(so, error); 3030 3031 if (__predict_true((so_state_get(so) & SS_NOFDREF) == 0)) 3032 so_sorwakeup(so); 3033 /* 3034 * SYN_RECV needs special processing. If abort_syn_rcv() 3035 * returns 0 is has taken care of the abort. 3036 */ 3037 if ((tp->t_state == TCPS_SYN_RECEIVED) && !abort_syn_rcv(so, m)) 3038 goto skip; 3039 3040 t3_release_offload_resources(toep); 3041 needclose = 1; 3042 } 3043 inp_wunlock(tp->t_inpcb); 3044 3045 if (needclose) 3046 tcp_offload_close(tp); 3047 3048 send_abort_rpl(m, tdev, rst_status); 3049 return; 3050skip: 3051 inp_wunlock(tp->t_inpcb); 3052} 3053 3054/* 3055 * Handle an ABORT_REQ_RSS CPL message. 3056 */ 3057static int 3058do_abort_req(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3059{ 3060 const struct cpl_abort_req_rss *req = cplhdr(m); 3061 struct toepcb *toep = (struct toepcb *)ctx; 3062 3063 if (is_neg_adv_abort(req->status)) { 3064 m_free(m); 3065 return (0); 3066 } 3067 3068 log(LOG_NOTICE, "aborting tid=%d\n", toep->tp_tid); 3069 3070 if ((toep->tp_flags & (TP_SYN_RCVD|TP_ABORT_REQ_RCVD)) == TP_SYN_RCVD) { 3071 cxgb_remove_tid(cdev, toep, toep->tp_tid); 3072 toep->tp_flags |= TP_ABORT_REQ_RCVD; 3073 3074 send_abort_rpl(m, toep->tp_toedev, CPL_ABORT_NO_RST); 3075 if (toep->tp_l2t) 3076 l2t_release(L2DATA(cdev), toep->tp_l2t); 3077 3078 /* 3079 * Unhook 3080 */ 3081 toep->tp_tp->t_toe = NULL; 3082 toep->tp_tp->t_flags &= ~TF_TOE; 3083 toep->tp_tp = NULL; 3084 /* 3085 * XXX need to call syncache_chkrst - but we don't 3086 * have a way of doing that yet 3087 */ 3088 toepcb_release(toep); 3089 log(LOG_ERR, "abort for unestablished connection :-(\n"); 3090 return (0); 3091 } 3092 if (toep->tp_tp == NULL) { 3093 log(LOG_NOTICE, "disconnected toepcb\n"); 3094 /* should be freed momentarily */ 3095 return (0); 3096 } 3097 3098 3099 toepcb_hold(toep); 3100 process_abort_req(toep, m, toep->tp_toedev); 3101 toepcb_release(toep); 3102 return (0); 3103} 3104#ifdef notyet 3105static void 3106pass_open_abort(struct socket *child, struct socket *parent, struct mbuf *m) 3107{ 3108 struct toedev *tdev = TOE_DEV(parent); 3109 3110 do_abort_syn_rcv(child, parent); 3111 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) { 3112 struct cpl_pass_accept_rpl *rpl = cplhdr(m); 3113 3114 rpl->opt0h = htonl(F_TCAM_BYPASS); 3115 rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT); 3116 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m); 3117 } else 3118 m_free(m); 3119} 3120#endif 3121static void 3122handle_pass_open_arp_failure(struct socket *so, struct mbuf *m) 3123{ 3124 CXGB_UNIMPLEMENTED(); 3125 3126#ifdef notyet 3127 struct t3cdev *cdev; 3128 struct socket *parent; 3129 struct socket *oreq; 3130 struct t3c_tid_entry *t3c_stid; 3131 struct tid_info *t; 3132 struct tcpcb *otp, *tp = so_sototcpcb(so); 3133 struct toepcb *toep = tp->t_toe; 3134 3135 /* 3136 * If the connection is being aborted due to the parent listening 3137 * socket going away there's nothing to do, the ABORT_REQ will close 3138 * the connection. 3139 */ 3140 if (toep->tp_flags & TP_ABORT_RPL_PENDING) { 3141 m_free(m); 3142 return; 3143 } 3144 3145 oreq = so->so_incomp; 3146 otp = so_sototcpcb(oreq); 3147 3148 cdev = T3C_DEV(so); 3149 t = &(T3C_DATA(cdev))->tid_maps; 3150 t3c_stid = lookup_stid(t, otp->ts_recent); 3151 parent = ((struct listen_ctx *)t3c_stid->ctx)->lso; 3152 3153 so_lock(parent); 3154 pass_open_abort(so, parent, m); 3155 so_unlock(parent); 3156#endif 3157} 3158 3159/* 3160 * Handle an ARP failure for a CPL_PASS_ACCEPT_RPL. This is treated similarly 3161 * to an ABORT_REQ_RSS in SYN_RECV as both events need to tear down a SYN_RECV 3162 * connection. 3163 */ 3164static void 3165pass_accept_rpl_arp_failure(struct t3cdev *cdev, struct mbuf *m) 3166{ 3167 3168#ifdef notyet 3169 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 3170 BLOG_SKB_CB(skb)->dev = TOE_DEV(skb->sk); 3171#endif 3172 handle_pass_open_arp_failure(m_get_socket(m), m); 3173} 3174 3175/* 3176 * Populate a reject CPL_PASS_ACCEPT_RPL WR. 3177 */ 3178static void 3179mk_pass_accept_rpl(struct mbuf *reply_mbuf, struct mbuf *req_mbuf) 3180{ 3181 struct cpl_pass_accept_req *req = cplhdr(req_mbuf); 3182 struct cpl_pass_accept_rpl *rpl = cplhdr(reply_mbuf); 3183 unsigned int tid = GET_TID(req); 3184 3185 m_set_priority(reply_mbuf, CPL_PRIORITY_SETUP); 3186 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 3187 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid)); 3188 rpl->peer_ip = req->peer_ip; // req->peer_ip not overwritten yet 3189 rpl->opt0h = htonl(F_TCAM_BYPASS); 3190 rpl->opt0l_status = htonl(CPL_PASS_OPEN_REJECT); 3191 rpl->opt2 = 0; 3192 rpl->rsvd = rpl->opt2; /* workaround for HW bug */ 3193} 3194 3195/* 3196 * Send a deferred reject to an accept request. 3197 */ 3198static void 3199reject_pass_request(struct toedev *tdev, struct mbuf *m) 3200{ 3201 struct mbuf *reply_mbuf; 3202 3203 reply_mbuf = m_gethdr_nofail(sizeof(struct cpl_pass_accept_rpl)); 3204 mk_pass_accept_rpl(reply_mbuf, m); 3205 cxgb_ofld_send(TOM_DATA(tdev)->cdev, reply_mbuf); 3206 m_free(m); 3207} 3208 3209static void 3210handle_syncache_event(int event, void *arg) 3211{ 3212 struct toepcb *toep = arg; 3213 3214 switch (event) { 3215 case TOE_SC_ENTRY_PRESENT: 3216 /* 3217 * entry already exists - free toepcb 3218 * and l2t 3219 */ 3220 printf("syncache entry present\n"); 3221 toepcb_release(toep); 3222 break; 3223 case TOE_SC_DROP: 3224 /* 3225 * The syncache has given up on this entry 3226 * either it timed out, or it was evicted 3227 * we need to explicitly release the tid 3228 */ 3229 printf("syncache entry dropped\n"); 3230 toepcb_release(toep); 3231 break; 3232 default: 3233 log(LOG_ERR, "unknown syncache event %d\n", event); 3234 break; 3235 } 3236} 3237 3238static void 3239syncache_add_accept_req(struct cpl_pass_accept_req *req, struct socket *lso, struct toepcb *toep) 3240{ 3241 struct in_conninfo inc; 3242 struct tcpopt to; 3243 struct tcphdr th; 3244 struct inpcb *inp; 3245 int mss, wsf, sack, ts; 3246 uint32_t rcv_isn = ntohl(req->rcv_isn); 3247 3248 bzero(&to, sizeof(struct tcpopt)); 3249 inp = so_sotoinpcb(lso); 3250 3251 /* 3252 * Fill out information for entering us into the syncache 3253 */ 3254 inc.inc_fport = th.th_sport = req->peer_port; 3255 inc.inc_lport = th.th_dport = req->local_port; 3256 th.th_seq = req->rcv_isn; 3257 th.th_flags = TH_SYN; 3258 3259 toep->tp_iss = toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = rcv_isn + 1; 3260 3261 3262 inc.inc_isipv6 = 0; 3263 inc.inc_len = 0; 3264 inc.inc_faddr.s_addr = req->peer_ip; 3265 inc.inc_laddr.s_addr = req->local_ip; 3266 3267 DPRINTF("syncache add of %d:%d %d:%d\n", 3268 ntohl(req->local_ip), ntohs(req->local_port), 3269 ntohl(req->peer_ip), ntohs(req->peer_port)); 3270 3271 mss = req->tcp_options.mss; 3272 wsf = req->tcp_options.wsf; 3273 ts = req->tcp_options.tstamp; 3274 sack = req->tcp_options.sack; 3275 to.to_mss = mss; 3276 to.to_wscale = wsf; 3277 to.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0); 3278 tcp_offload_syncache_add(&inc, &to, &th, inp, &lso, &cxgb_toe_usrreqs, toep); 3279} 3280 3281 3282/* 3283 * Process a CPL_PASS_ACCEPT_REQ message. Does the part that needs the socket 3284 * lock held. Note that the sock here is a listening socket that is not owned 3285 * by the TOE. 3286 */ 3287static void 3288process_pass_accept_req(struct socket *so, struct mbuf *m, struct toedev *tdev, 3289 struct listen_ctx *lctx) 3290{ 3291 int rt_flags; 3292 struct l2t_entry *e; 3293 struct iff_mac tim; 3294 struct mbuf *reply_mbuf, *ddp_mbuf = NULL; 3295 struct cpl_pass_accept_rpl *rpl; 3296 struct cpl_pass_accept_req *req = cplhdr(m); 3297 unsigned int tid = GET_TID(req); 3298 struct tom_data *d = TOM_DATA(tdev); 3299 struct t3cdev *cdev = d->cdev; 3300 struct tcpcb *tp = so_sototcpcb(so); 3301 struct toepcb *newtoep; 3302 struct rtentry *dst; 3303 struct sockaddr_in nam; 3304 struct t3c_data *td = T3C_DATA(cdev); 3305 3306 reply_mbuf = m_gethdr(M_NOWAIT, MT_DATA); 3307 if (__predict_false(reply_mbuf == NULL)) { 3308 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) 3309 t3_defer_reply(m, tdev, reject_pass_request); 3310 else { 3311 cxgb_queue_tid_release(cdev, tid); 3312 m_free(m); 3313 } 3314 DPRINTF("failed to get reply_mbuf\n"); 3315 3316 goto out; 3317 } 3318 3319 if (tp->t_state != TCPS_LISTEN) { 3320 DPRINTF("socket not in listen state\n"); 3321 3322 goto reject; 3323 } 3324 3325 tim.mac_addr = req->dst_mac; 3326 tim.vlan_tag = ntohs(req->vlan_tag); 3327 if (cdev->ctl(cdev, GET_IFF_FROM_MAC, &tim) < 0 || !tim.dev) { 3328 DPRINTF("rejecting from failed GET_IFF_FROM_MAC\n"); 3329 goto reject; 3330 } 3331 3332#ifdef notyet 3333 /* 3334 * XXX do route lookup to confirm that we're still listening on this 3335 * address 3336 */ 3337 if (ip_route_input(skb, req->local_ip, req->peer_ip, 3338 G_PASS_OPEN_TOS(ntohl(req->tos_tid)), tim.dev)) 3339 goto reject; 3340 rt_flags = ((struct rtable *)skb->dst)->rt_flags & 3341 (RTCF_BROADCAST | RTCF_MULTICAST | RTCF_LOCAL); 3342 dst_release(skb->dst); // done with the input route, release it 3343 skb->dst = NULL; 3344 3345 if ((rt_flags & RTF_LOCAL) == 0) 3346 goto reject; 3347#endif 3348 /* 3349 * XXX 3350 */ 3351 rt_flags = RTF_LOCAL; 3352 if ((rt_flags & RTF_LOCAL) == 0) 3353 goto reject; 3354 3355 /* 3356 * Calculate values and add to syncache 3357 */ 3358 3359 newtoep = toepcb_alloc(); 3360 if (newtoep == NULL) 3361 goto reject; 3362 3363 bzero(&nam, sizeof(struct sockaddr_in)); 3364 3365 nam.sin_len = sizeof(struct sockaddr_in); 3366 nam.sin_family = AF_INET; 3367 nam.sin_addr.s_addr =req->peer_ip; 3368 dst = rtalloc2((struct sockaddr *)&nam, 1, 0); 3369 3370 if (dst == NULL) { 3371 printf("failed to find route\n"); 3372 goto reject; 3373 } 3374 e = newtoep->tp_l2t = t3_l2t_get(d->cdev, dst, tim.dev, 3375 (struct sockaddr *)&nam); 3376 if (e == NULL) { 3377 DPRINTF("failed to get l2t\n"); 3378 } 3379 /* 3380 * Point to our listen socket until accept 3381 */ 3382 newtoep->tp_tp = tp; 3383 newtoep->tp_flags = TP_SYN_RCVD; 3384 newtoep->tp_tid = tid; 3385 newtoep->tp_toedev = tdev; 3386 tp->rcv_wnd = select_rcv_wnd(tdev, so); 3387 3388 cxgb_insert_tid(cdev, d->client, newtoep, tid); 3389 so_lock(so); 3390 LIST_INSERT_HEAD(&lctx->synq_head, newtoep, synq_entry); 3391 so_unlock(so); 3392 3393 newtoep->tp_ulp_mode = TOM_TUNABLE(tdev, ddp) && !(so_options_get(so) & SO_NO_DDP) && 3394 tp->rcv_wnd >= MIN_DDP_RCV_WIN ? ULP_MODE_TCPDDP : 0; 3395 3396 if (newtoep->tp_ulp_mode) { 3397 ddp_mbuf = m_gethdr(M_NOWAIT, MT_DATA); 3398 3399 if (ddp_mbuf == NULL) 3400 newtoep->tp_ulp_mode = 0; 3401 } 3402 3403 CTR4(KTR_TOM, "ddp=%d rcv_wnd=%ld min_win=%d ulp_mode=%d", 3404 TOM_TUNABLE(tdev, ddp), tp->rcv_wnd, MIN_DDP_RCV_WIN, newtoep->tp_ulp_mode); 3405 set_arp_failure_handler(reply_mbuf, pass_accept_rpl_arp_failure); 3406 /* 3407 * XXX workaround for lack of syncache drop 3408 */ 3409 toepcb_hold(newtoep); 3410 syncache_add_accept_req(req, so, newtoep); 3411 3412 rpl = cplhdr(reply_mbuf); 3413 reply_mbuf->m_pkthdr.len = reply_mbuf->m_len = sizeof(*rpl); 3414 rpl->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD)); 3415 rpl->wr.wr_lo = 0; 3416 OPCODE_TID(rpl) = htonl(MK_OPCODE_TID(CPL_PASS_ACCEPT_RPL, tid)); 3417 rpl->opt2 = htonl(calc_opt2(so, tdev)); 3418 rpl->rsvd = rpl->opt2; /* workaround for HW bug */ 3419 rpl->peer_ip = req->peer_ip; // req->peer_ip is not overwritten 3420 3421 rpl->opt0h = htonl(calc_opt0h(so, select_mss(td, NULL, dst->rt_ifp->if_mtu)) | 3422 V_L2T_IDX(e->idx) | V_TX_CHANNEL(e->smt_idx)); 3423 rpl->opt0l_status = htonl(calc_opt0l(so, newtoep->tp_ulp_mode) | 3424 CPL_PASS_OPEN_ACCEPT); 3425 3426 DPRINTF("opt0l_status=%08x\n", rpl->opt0l_status); 3427 3428 m_set_priority(reply_mbuf, mkprio(CPL_PRIORITY_SETUP, newtoep)); 3429 3430 l2t_send(cdev, reply_mbuf, e); 3431 m_free(m); 3432 if (newtoep->tp_ulp_mode) { 3433 __set_tcb_field(newtoep, ddp_mbuf, W_TCB_RX_DDP_FLAGS, 3434 V_TF_DDP_OFF(1) | 3435 TP_DDP_TIMER_WORKAROUND_MASK, 3436 V_TF_DDP_OFF(1) | 3437 TP_DDP_TIMER_WORKAROUND_VAL, 1); 3438 } else 3439 printf("not offloading\n"); 3440 3441 3442 3443 return; 3444reject: 3445 if (tdev->tod_ttid == TOE_ID_CHELSIO_T3) 3446 mk_pass_accept_rpl(reply_mbuf, m); 3447 else 3448 mk_tid_release(reply_mbuf, newtoep, tid); 3449 cxgb_ofld_send(cdev, reply_mbuf); 3450 m_free(m); 3451out: 3452#if 0 3453 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS); 3454#else 3455 return; 3456#endif 3457} 3458 3459/* 3460 * Handle a CPL_PASS_ACCEPT_REQ message. 3461 */ 3462static int 3463do_pass_accept_req(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3464{ 3465 struct listen_ctx *listen_ctx = (struct listen_ctx *)ctx; 3466 struct socket *lso = listen_ctx->lso; /* XXX need an interlock against the listen socket going away */ 3467 struct tom_data *d = listen_ctx->tom_data; 3468 3469#if VALIDATE_TID 3470 struct cpl_pass_accept_req *req = cplhdr(m); 3471 unsigned int tid = GET_TID(req); 3472 struct tid_info *t = &(T3C_DATA(cdev))->tid_maps; 3473 3474 if (unlikely(!lsk)) { 3475 printk(KERN_ERR "%s: PASS_ACCEPT_REQ had unknown STID %lu\n", 3476 cdev->name, 3477 (unsigned long)((union listen_entry *)ctx - 3478 t->stid_tab)); 3479 return CPL_RET_BUF_DONE; 3480 } 3481 if (unlikely(tid >= t->ntids)) { 3482 printk(KERN_ERR "%s: passive open TID %u too large\n", 3483 cdev->name, tid); 3484 return CPL_RET_BUF_DONE; 3485 } 3486 /* 3487 * For T3A the current user of the TID may have closed but its last 3488 * message(s) may have been backlogged so the TID appears to be still 3489 * in use. Just take the TID away, the connection can close at its 3490 * own leisure. For T3B this situation is a bug. 3491 */ 3492 if (!valid_new_tid(t, tid) && 3493 cdev->type != T3A) { 3494 printk(KERN_ERR "%s: passive open uses existing TID %u\n", 3495 cdev->name, tid); 3496 return CPL_RET_BUF_DONE; 3497 } 3498#endif 3499 3500 process_pass_accept_req(lso, m, &d->tdev, listen_ctx); 3501 return (0); 3502} 3503 3504/* 3505 * Called when a connection is established to translate the TCP options 3506 * reported by HW to FreeBSD's native format. 3507 */ 3508static void 3509assign_rxopt(struct socket *so, unsigned int opt) 3510{ 3511 struct tcpcb *tp = so_sototcpcb(so); 3512 struct toepcb *toep = tp->t_toe; 3513 const struct t3c_data *td = T3C_DATA(TOEP_T3C_DEV(toep)); 3514 3515 inp_lock_assert(tp->t_inpcb); 3516 3517 toep->tp_mss_clamp = td->mtus[G_TCPOPT_MSS(opt)] - 40; 3518 tp->t_flags |= G_TCPOPT_TSTAMP(opt) ? TF_RCVD_TSTMP : 0; 3519 tp->t_flags |= G_TCPOPT_SACK(opt) ? TF_SACK_PERMIT : 0; 3520 tp->t_flags |= G_TCPOPT_WSCALE_OK(opt) ? TF_RCVD_SCALE : 0; 3521 if ((tp->t_flags & (TF_RCVD_SCALE|TF_REQ_SCALE)) == 3522 (TF_RCVD_SCALE|TF_REQ_SCALE)) 3523 tp->rcv_scale = tp->request_r_scale; 3524} 3525 3526/* 3527 * Completes some final bits of initialization for just established connections 3528 * and changes their state to TCP_ESTABLISHED. 3529 * 3530 * snd_isn here is the ISN after the SYN, i.e., the true ISN + 1. 3531 */ 3532static void 3533make_established(struct socket *so, u32 snd_isn, unsigned int opt) 3534{ 3535 struct tcpcb *tp = so_sototcpcb(so); 3536 struct toepcb *toep = tp->t_toe; 3537 3538 toep->tp_write_seq = tp->iss = tp->snd_max = tp->snd_nxt = tp->snd_una = snd_isn; 3539 assign_rxopt(so, opt); 3540 3541 /* 3542 *XXXXXXXXXXX 3543 * 3544 */ 3545#ifdef notyet 3546 so->so_proto->pr_ctloutput = t3_ctloutput; 3547#endif 3548 3549#if 0 3550 inet_sk(sk)->id = tp->write_seq ^ jiffies; 3551#endif 3552 /* 3553 * XXX not clear what rcv_wup maps to 3554 */ 3555 /* 3556 * Causes the first RX_DATA_ACK to supply any Rx credits we couldn't 3557 * pass through opt0. 3558 */ 3559 if (tp->rcv_wnd > (M_RCV_BUFSIZ << 10)) 3560 toep->tp_rcv_wup -= tp->rcv_wnd - (M_RCV_BUFSIZ << 10); 3561 3562 dump_toepcb(toep); 3563 3564#ifdef notyet 3565/* 3566 * no clean interface for marking ARP up to date 3567 */ 3568 dst_confirm(sk->sk_dst_cache); 3569#endif 3570 tp->t_starttime = ticks; 3571 tp->t_state = TCPS_ESTABLISHED; 3572 soisconnected(so); 3573} 3574 3575static int 3576syncache_expand_establish_req(struct cpl_pass_establish *req, struct socket **so, struct toepcb *toep) 3577{ 3578 3579 struct in_conninfo inc; 3580 struct tcpopt to; 3581 struct tcphdr th; 3582 int mss, wsf, sack, ts; 3583 struct mbuf *m = NULL; 3584 const struct t3c_data *td = T3C_DATA(TOM_DATA(toep->tp_toedev)->cdev); 3585 unsigned int opt; 3586 3587#ifdef MAC 3588#error "no MAC support" 3589#endif 3590 3591 opt = ntohs(req->tcp_opt); 3592 3593 bzero(&to, sizeof(struct tcpopt)); 3594 3595 /* 3596 * Fill out information for entering us into the syncache 3597 */ 3598 inc.inc_fport = th.th_sport = req->peer_port; 3599 inc.inc_lport = th.th_dport = req->local_port; 3600 th.th_seq = req->rcv_isn; 3601 th.th_flags = TH_ACK; 3602 3603 inc.inc_isipv6 = 0; 3604 inc.inc_len = 0; 3605 inc.inc_faddr.s_addr = req->peer_ip; 3606 inc.inc_laddr.s_addr = req->local_ip; 3607 3608 mss = td->mtus[G_TCPOPT_MSS(opt)] - 40; 3609 wsf = G_TCPOPT_WSCALE_OK(opt); 3610 ts = G_TCPOPT_TSTAMP(opt); 3611 sack = G_TCPOPT_SACK(opt); 3612 3613 to.to_mss = mss; 3614 to.to_wscale = G_TCPOPT_SND_WSCALE(opt); 3615 to.to_flags = (mss ? TOF_MSS : 0) | (wsf ? TOF_SCALE : 0) | (ts ? TOF_TS : 0) | (sack ? TOF_SACKPERM : 0); 3616 3617 DPRINTF("syncache expand of %d:%d %d:%d mss:%d wsf:%d ts:%d sack:%d\n", 3618 ntohl(req->local_ip), ntohs(req->local_port), 3619 ntohl(req->peer_ip), ntohs(req->peer_port), 3620 mss, wsf, ts, sack); 3621 return tcp_offload_syncache_expand(&inc, &to, &th, so, m); 3622} 3623 3624 3625/* 3626 * Process a CPL_PASS_ESTABLISH message. XXX a lot of the locking doesn't work 3627 * if we are in TCP_SYN_RECV due to crossed SYNs 3628 */ 3629static int 3630do_pass_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3631{ 3632 struct cpl_pass_establish *req = cplhdr(m); 3633 struct toepcb *toep = (struct toepcb *)ctx; 3634 struct tcpcb *tp = toep->tp_tp; 3635 struct socket *so, *lso; 3636 struct t3c_data *td = T3C_DATA(cdev); 3637 struct sockbuf *snd, *rcv; 3638 3639 // Complete socket initialization now that we have the SND_ISN 3640 3641 struct toedev *tdev; 3642 3643 3644 tdev = toep->tp_toedev; 3645 3646 inp_wlock(tp->t_inpcb); 3647 3648 /* 3649 * 3650 * XXX need to add reference while we're manipulating 3651 */ 3652 so = lso = inp_inpcbtosocket(tp->t_inpcb); 3653 3654 inp_wunlock(tp->t_inpcb); 3655 3656 so_lock(so); 3657 LIST_REMOVE(toep, synq_entry); 3658 so_unlock(so); 3659 3660 if (!syncache_expand_establish_req(req, &so, toep)) { 3661 /* 3662 * No entry 3663 */ 3664 CXGB_UNIMPLEMENTED(); 3665 } 3666 if (so == NULL) { 3667 /* 3668 * Couldn't create the socket 3669 */ 3670 CXGB_UNIMPLEMENTED(); 3671 } 3672 3673 tp = so_sototcpcb(so); 3674 inp_wlock(tp->t_inpcb); 3675 3676 snd = so_sockbuf_snd(so); 3677 rcv = so_sockbuf_rcv(so); 3678 3679 snd->sb_flags |= SB_NOCOALESCE; 3680 rcv->sb_flags |= SB_NOCOALESCE; 3681 3682 toep->tp_tp = tp; 3683 toep->tp_flags = 0; 3684 tp->t_toe = toep; 3685 reset_wr_list(toep); 3686 tp->rcv_wnd = select_rcv_wnd(tdev, so); 3687 tp->rcv_nxt = toep->tp_copied_seq; 3688 install_offload_ops(so); 3689 3690 toep->tp_wr_max = toep->tp_wr_avail = TOM_TUNABLE(tdev, max_wrs); 3691 toep->tp_wr_unacked = 0; 3692 toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data)); 3693 toep->tp_qset_idx = 0; 3694 toep->tp_mtu_idx = select_mss(td, tp, toep->tp_l2t->neigh->rt_ifp->if_mtu); 3695 3696 /* 3697 * XXX Cancel any keep alive timer 3698 */ 3699 3700 make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt)); 3701 3702 /* 3703 * XXX workaround for lack of syncache drop 3704 */ 3705 toepcb_release(toep); 3706 inp_wunlock(tp->t_inpcb); 3707 3708 CTR1(KTR_TOM, "do_pass_establish tid=%u", toep->tp_tid); 3709 cxgb_log_tcb(cdev->adapter, toep->tp_tid); 3710#ifdef notyet 3711 /* 3712 * XXX not sure how these checks map to us 3713 */ 3714 if (unlikely(sk->sk_socket)) { // simultaneous opens only 3715 sk->sk_state_change(sk); 3716 sk_wake_async(so, 0, POLL_OUT); 3717 } 3718 /* 3719 * The state for the new connection is now up to date. 3720 * Next check if we should add the connection to the parent's 3721 * accept queue. When the parent closes it resets connections 3722 * on its SYN queue, so check if we are being reset. If so we 3723 * don't need to do anything more, the coming ABORT_RPL will 3724 * destroy this socket. Otherwise move the connection to the 3725 * accept queue. 3726 * 3727 * Note that we reset the synq before closing the server so if 3728 * we are not being reset the stid is still open. 3729 */ 3730 if (unlikely(!tp->forward_skb_hint)) { // removed from synq 3731 __kfree_skb(skb); 3732 goto unlock; 3733 } 3734#endif 3735 m_free(m); 3736 3737 return (0); 3738} 3739 3740/* 3741 * Fill in the right TID for CPL messages waiting in the out-of-order queue 3742 * and send them to the TOE. 3743 */ 3744static void 3745fixup_and_send_ofo(struct toepcb *toep) 3746{ 3747 struct mbuf *m; 3748 struct toedev *tdev = toep->tp_toedev; 3749 struct tcpcb *tp = toep->tp_tp; 3750 unsigned int tid = toep->tp_tid; 3751 3752 log(LOG_NOTICE, "fixup_and_send_ofo\n"); 3753 3754 inp_lock_assert(tp->t_inpcb); 3755 while ((m = mbufq_dequeue(&toep->out_of_order_queue)) != NULL) { 3756 /* 3757 * A variety of messages can be waiting but the fields we'll 3758 * be touching are common to all so any message type will do. 3759 */ 3760 struct cpl_close_con_req *p = cplhdr(m); 3761 3762 p->wr.wr_lo = htonl(V_WR_TID(tid)); 3763 OPCODE_TID(p) = htonl(MK_OPCODE_TID(p->ot.opcode, tid)); 3764 cxgb_ofld_send(TOM_DATA(tdev)->cdev, m); 3765 } 3766} 3767 3768/* 3769 * Updates socket state from an active establish CPL message. Runs with the 3770 * socket lock held. 3771 */ 3772static void 3773socket_act_establish(struct socket *so, struct mbuf *m) 3774{ 3775 struct cpl_act_establish *req = cplhdr(m); 3776 u32 rcv_isn = ntohl(req->rcv_isn); /* real RCV_ISN + 1 */ 3777 struct tcpcb *tp = so_sototcpcb(so); 3778 struct toepcb *toep = tp->t_toe; 3779 3780 if (__predict_false(tp->t_state != TCPS_SYN_SENT)) 3781 log(LOG_ERR, "TID %u expected SYN_SENT, found %d\n", 3782 toep->tp_tid, tp->t_state); 3783 3784 tp->ts_recent_age = ticks; 3785 tp->irs = tp->rcv_wnd = tp->rcv_nxt = rcv_isn; 3786 toep->tp_delack_seq = toep->tp_rcv_wup = toep->tp_copied_seq = tp->irs; 3787 3788 make_established(so, ntohl(req->snd_isn), ntohs(req->tcp_opt)); 3789 3790 /* 3791 * Now that we finally have a TID send any CPL messages that we had to 3792 * defer for lack of a TID. 3793 */ 3794 if (mbufq_len(&toep->out_of_order_queue)) 3795 fixup_and_send_ofo(toep); 3796 3797 if (__predict_false(so_state_get(so) & SS_NOFDREF)) { 3798 /* 3799 * XXX does this even make sense? 3800 */ 3801 so_sorwakeup(so); 3802 } 3803 m_free(m); 3804#ifdef notyet 3805/* 3806 * XXX assume no write requests permitted while socket connection is 3807 * incomplete 3808 */ 3809 /* 3810 * Currently the send queue must be empty at this point because the 3811 * socket layer does not send anything before a connection is 3812 * established. To be future proof though we handle the possibility 3813 * that there are pending buffers to send (either TX_DATA or 3814 * CLOSE_CON_REQ). First we need to adjust the sequence number of the 3815 * buffers according to the just learned write_seq, and then we send 3816 * them on their way. 3817 */ 3818 fixup_pending_writeq_buffers(sk); 3819 if (t3_push_frames(so, 1)) 3820 sk->sk_write_space(sk); 3821#endif 3822 3823 toep->tp_state = tp->t_state; 3824 V_tcpstat.tcps_connects++; 3825 3826} 3827 3828/* 3829 * Process a CPL_ACT_ESTABLISH message. 3830 */ 3831static int 3832do_act_establish(struct t3cdev *cdev, struct mbuf *m, void *ctx) 3833{ 3834 struct cpl_act_establish *req = cplhdr(m); 3835 unsigned int tid = GET_TID(req); 3836 unsigned int atid = G_PASS_OPEN_TID(ntohl(req->tos_tid)); 3837 struct toepcb *toep = (struct toepcb *)ctx; 3838 struct tcpcb *tp = toep->tp_tp; 3839 struct socket *so; 3840 struct toedev *tdev; 3841 struct tom_data *d; 3842 3843 if (tp == NULL) { 3844 free_atid(cdev, atid); 3845 return (0); 3846 } 3847 inp_wlock(tp->t_inpcb); 3848 3849 /* 3850 * XXX 3851 */ 3852 so = inp_inpcbtosocket(tp->t_inpcb); 3853 tdev = toep->tp_toedev; /* blow up here if link was down */ 3854 d = TOM_DATA(tdev); 3855 3856 /* 3857 * It's OK if the TID is currently in use, the owning socket may have 3858 * backlogged its last CPL message(s). Just take it away. 3859 */ 3860 toep->tp_tid = tid; 3861 toep->tp_tp = tp; 3862 so_insert_tid(d, toep, tid); 3863 free_atid(cdev, atid); 3864 toep->tp_qset = G_QNUM(ntohl(m->m_pkthdr.csum_data)); 3865 3866 socket_act_establish(so, m); 3867 inp_wunlock(tp->t_inpcb); 3868 CTR1(KTR_TOM, "do_act_establish tid=%u", toep->tp_tid); 3869 cxgb_log_tcb(cdev->adapter, toep->tp_tid); 3870 3871 return (0); 3872} 3873 3874/* 3875 * Process an acknowledgment of WR completion. Advance snd_una and send the 3876 * next batch of work requests from the write queue. 3877 */ 3878static void 3879wr_ack(struct toepcb *toep, struct mbuf *m) 3880{ 3881 struct tcpcb *tp = toep->tp_tp; 3882 struct cpl_wr_ack *hdr = cplhdr(m); 3883 struct socket *so; 3884 unsigned int credits = ntohs(hdr->credits); 3885 u32 snd_una = ntohl(hdr->snd_una); 3886 int bytes = 0; 3887 struct sockbuf *snd; 3888 3889 CTR2(KTR_SPARE2, "wr_ack: snd_una=%u credits=%d", snd_una, credits); 3890 3891 inp_wlock(tp->t_inpcb); 3892 so = inp_inpcbtosocket(tp->t_inpcb); 3893 toep->tp_wr_avail += credits; 3894 if (toep->tp_wr_unacked > toep->tp_wr_max - toep->tp_wr_avail) 3895 toep->tp_wr_unacked = toep->tp_wr_max - toep->tp_wr_avail; 3896 3897 while (credits) { 3898 struct mbuf *p = peek_wr(toep); 3899 3900 if (__predict_false(!p)) { 3901 log(LOG_ERR, "%u WR_ACK credits for TID %u with " 3902 "nothing pending, state %u wr_avail=%u\n", 3903 credits, toep->tp_tid, tp->t_state, toep->tp_wr_avail); 3904 break; 3905 } 3906 CTR2(KTR_TOM, 3907 "wr_ack: p->credits=%d p->bytes=%d", 3908 p->m_pkthdr.csum_data, p->m_pkthdr.len); 3909 KASSERT(p->m_pkthdr.csum_data != 0, 3910 ("empty request still on list")); 3911 3912 if (__predict_false(credits < p->m_pkthdr.csum_data)) { 3913 3914#if DEBUG_WR > 1 3915 struct tx_data_wr *w = cplhdr(p); 3916 log(LOG_ERR, 3917 "TID %u got %u WR credits, need %u, len %u, " 3918 "main body %u, frags %u, seq # %u, ACK una %u," 3919 " ACK nxt %u, WR_AVAIL %u, WRs pending %u\n", 3920 toep->tp_tid, credits, p->csum, p->len, 3921 p->len - p->data_len, skb_shinfo(p)->nr_frags, 3922 ntohl(w->sndseq), snd_una, ntohl(hdr->snd_nxt), 3923 toep->tp_wr_avail, count_pending_wrs(tp) - credits); 3924#endif 3925 p->m_pkthdr.csum_data -= credits; 3926 break; 3927 } else { 3928 dequeue_wr(toep); 3929 credits -= p->m_pkthdr.csum_data; 3930 bytes += p->m_pkthdr.len; 3931 CTR3(KTR_TOM, 3932 "wr_ack: done with wr of %d bytes remain credits=%d wr credits=%d", 3933 p->m_pkthdr.len, credits, p->m_pkthdr.csum_data); 3934 3935 m_free(p); 3936 } 3937 } 3938 3939#if DEBUG_WR 3940 check_wr_invariants(tp); 3941#endif 3942 3943 if (__predict_false(SEQ_LT(snd_una, tp->snd_una))) { 3944#if VALIDATE_SEQ 3945 struct tom_data *d = TOM_DATA(TOE_DEV(so)); 3946 3947 log(LOG_ERR "%s: unexpected sequence # %u in WR_ACK " 3948 "for TID %u, snd_una %u\n", (&d->tdev)->name, snd_una, 3949 toep->tp_tid, tp->snd_una); 3950#endif 3951 goto out_free; 3952 } 3953 3954 if (tp->snd_una != snd_una) { 3955 tp->snd_una = snd_una; 3956 tp->ts_recent_age = ticks; 3957#ifdef notyet 3958 /* 3959 * Keep ARP entry "minty fresh" 3960 */ 3961 dst_confirm(sk->sk_dst_cache); 3962#endif 3963 if (tp->snd_una == tp->snd_nxt) 3964 toep->tp_flags &= ~TP_TX_WAIT_IDLE; 3965 } 3966 3967 snd = so_sockbuf_snd(so); 3968 if (bytes) { 3969 CTR1(KTR_SPARE2, "wr_ack: sbdrop(%d)", bytes); 3970 snd = so_sockbuf_snd(so); 3971 sockbuf_lock(snd); 3972 sbdrop_locked(snd, bytes); 3973 so_sowwakeup_locked(so); 3974 } 3975 3976 if (snd->sb_sndptroff < snd->sb_cc) 3977 t3_push_frames(so, 0); 3978 3979out_free: 3980 inp_wunlock(tp->t_inpcb); 3981 m_free(m); 3982} 3983 3984/* 3985 * Handler for TX_DATA_ACK CPL messages. 3986 */ 3987static int 3988do_wr_ack(struct t3cdev *dev, struct mbuf *m, void *ctx) 3989{ 3990 struct toepcb *toep = (struct toepcb *)ctx; 3991 3992 VALIDATE_SOCK(so); 3993 3994 wr_ack(toep, m); 3995 return 0; 3996} 3997 3998/* 3999 * Handler for TRACE_PKT CPL messages. Just sink these packets. 4000 */ 4001static int 4002do_trace_pkt(struct t3cdev *dev, struct mbuf *m, void *ctx) 4003{ 4004 m_freem(m); 4005 return 0; 4006} 4007 4008/* 4009 * Reset a connection that is on a listener's SYN queue or accept queue, 4010 * i.e., one that has not had a struct socket associated with it. 4011 * Must be called from process context. 4012 * 4013 * Modeled after code in inet_csk_listen_stop(). 4014 */ 4015static void 4016t3_reset_listen_child(struct socket *child) 4017{ 4018 struct tcpcb *tp = so_sototcpcb(child); 4019 4020 t3_send_reset(tp->t_toe); 4021} 4022 4023 4024static void 4025t3_child_disconnect(struct socket *so, void *arg) 4026{ 4027 struct tcpcb *tp = so_sototcpcb(so); 4028 4029 if (tp->t_flags & TF_TOE) { 4030 inp_wlock(tp->t_inpcb); 4031 t3_reset_listen_child(so); 4032 inp_wunlock(tp->t_inpcb); 4033 } 4034} 4035 4036/* 4037 * Disconnect offloaded established but not yet accepted connections sitting 4038 * on a server's accept_queue. We just send an ABORT_REQ at this point and 4039 * finish off the disconnect later as we may need to wait for the ABORT_RPL. 4040 */ 4041void 4042t3_disconnect_acceptq(struct socket *listen_so) 4043{ 4044 4045 so_lock(listen_so); 4046 so_listeners_apply_all(listen_so, t3_child_disconnect, NULL); 4047 so_unlock(listen_so); 4048} 4049 4050/* 4051 * Reset offloaded connections sitting on a server's syn queue. As above 4052 * we send ABORT_REQ and finish off when we get ABORT_RPL. 4053 */ 4054 4055void 4056t3_reset_synq(struct listen_ctx *lctx) 4057{ 4058 struct toepcb *toep; 4059 4060 so_lock(lctx->lso); 4061 while (!LIST_EMPTY(&lctx->synq_head)) { 4062 toep = LIST_FIRST(&lctx->synq_head); 4063 LIST_REMOVE(toep, synq_entry); 4064 toep->tp_tp = NULL; 4065 t3_send_reset(toep); 4066 cxgb_remove_tid(TOEP_T3C_DEV(toep), toep, toep->tp_tid); 4067 toepcb_release(toep); 4068 } 4069 so_unlock(lctx->lso); 4070} 4071 4072 4073int 4074t3_setup_ppods(struct toepcb *toep, const struct ddp_gather_list *gl, 4075 unsigned int nppods, unsigned int tag, unsigned int maxoff, 4076 unsigned int pg_off, unsigned int color) 4077{ 4078 unsigned int i, j, pidx; 4079 struct pagepod *p; 4080 struct mbuf *m; 4081 struct ulp_mem_io *req; 4082 unsigned int tid = toep->tp_tid; 4083 const struct tom_data *td = TOM_DATA(toep->tp_toedev); 4084 unsigned int ppod_addr = tag * PPOD_SIZE + td->ddp_llimit; 4085 4086 CTR6(KTR_TOM, "t3_setup_ppods(gl=%p nppods=%u tag=%u maxoff=%u pg_off=%u color=%u)", 4087 gl, nppods, tag, maxoff, pg_off, color); 4088 4089 for (i = 0; i < nppods; ++i) { 4090 m = m_gethdr_nofail(sizeof(*req) + PPOD_SIZE); 4091 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4092 req = mtod(m, struct ulp_mem_io *); 4093 m->m_pkthdr.len = m->m_len = sizeof(*req) + PPOD_SIZE; 4094 req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS)); 4095 req->wr.wr_lo = 0; 4096 req->cmd_lock_addr = htonl(V_ULP_MEMIO_ADDR(ppod_addr >> 5) | 4097 V_ULPTX_CMD(ULP_MEM_WRITE)); 4098 req->len = htonl(V_ULP_MEMIO_DATA_LEN(PPOD_SIZE / 32) | 4099 V_ULPTX_NFLITS(PPOD_SIZE / 8 + 1)); 4100 4101 p = (struct pagepod *)(req + 1); 4102 if (__predict_false(i < nppods - NUM_SENTINEL_PPODS)) { 4103 p->pp_vld_tid = htonl(F_PPOD_VALID | V_PPOD_TID(tid)); 4104 p->pp_pgsz_tag_color = htonl(V_PPOD_TAG(tag) | 4105 V_PPOD_COLOR(color)); 4106 p->pp_max_offset = htonl(maxoff); 4107 p->pp_page_offset = htonl(pg_off); 4108 p->pp_rsvd = 0; 4109 for (pidx = 4 * i, j = 0; j < 5; ++j, ++pidx) 4110 p->pp_addr[j] = pidx < gl->dgl_nelem ? 4111 htobe64(VM_PAGE_TO_PHYS(gl->dgl_pages[pidx])) : 0; 4112 } else 4113 p->pp_vld_tid = 0; /* mark sentinel page pods invalid */ 4114 send_or_defer(toep, m, 0); 4115 ppod_addr += PPOD_SIZE; 4116 } 4117 return (0); 4118} 4119 4120/* 4121 * Build a CPL_BARRIER message as payload of a ULP_TX_PKT command. 4122 */ 4123static inline void 4124mk_cpl_barrier_ulp(struct cpl_barrier *b) 4125{ 4126 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)b; 4127 4128 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 4129 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*b) / 8)); 4130 b->opcode = CPL_BARRIER; 4131} 4132 4133/* 4134 * Build a CPL_GET_TCB message as payload of a ULP_TX_PKT command. 4135 */ 4136static inline void 4137mk_get_tcb_ulp(struct cpl_get_tcb *req, unsigned int tid, unsigned int cpuno) 4138{ 4139 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req; 4140 4141 txpkt = (struct ulp_txpkt *)req; 4142 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 4143 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8)); 4144 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_GET_TCB, tid)); 4145 req->cpuno = htons(cpuno); 4146} 4147 4148/* 4149 * Build a CPL_SET_TCB_FIELD message as payload of a ULP_TX_PKT command. 4150 */ 4151static inline void 4152mk_set_tcb_field_ulp(struct cpl_set_tcb_field *req, unsigned int tid, 4153 unsigned int word, uint64_t mask, uint64_t val) 4154{ 4155 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)req; 4156 4157 CTR4(KTR_TCB, "mk_set_tcb_field_ulp(tid=%u word=0x%x mask=%jx val=%jx", 4158 tid, word, mask, val); 4159 4160 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 4161 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*req) / 8)); 4162 OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_SET_TCB_FIELD, tid)); 4163 req->reply = V_NO_REPLY(1); 4164 req->cpu_idx = 0; 4165 req->word = htons(word); 4166 req->mask = htobe64(mask); 4167 req->val = htobe64(val); 4168} 4169 4170/* 4171 * Build a CPL_RX_DATA_ACK message as payload of a ULP_TX_PKT command. 4172 */ 4173static void 4174mk_rx_data_ack_ulp(struct toepcb *toep, struct cpl_rx_data_ack *ack, 4175 unsigned int tid, unsigned int credits) 4176{ 4177 struct ulp_txpkt *txpkt = (struct ulp_txpkt *)ack; 4178 4179 txpkt->cmd_dest = htonl(V_ULPTX_CMD(ULP_TXPKT)); 4180 txpkt->len = htonl(V_ULPTX_NFLITS(sizeof(*ack) / 8)); 4181 OPCODE_TID(ack) = htonl(MK_OPCODE_TID(CPL_RX_DATA_ACK, tid)); 4182 ack->credit_dack = htonl(F_RX_MODULATE | F_RX_DACK_CHANGE | 4183 V_RX_DACK_MODE(TOM_TUNABLE(toep->tp_toedev, delack)) | 4184 V_RX_CREDITS(credits)); 4185} 4186 4187void 4188t3_cancel_ddpbuf(struct toepcb *toep, unsigned int bufidx) 4189{ 4190 unsigned int wrlen; 4191 struct mbuf *m; 4192 struct work_request_hdr *wr; 4193 struct cpl_barrier *lock; 4194 struct cpl_set_tcb_field *req; 4195 struct cpl_get_tcb *getreq; 4196 struct ddp_state *p = &toep->tp_ddp_state; 4197 4198#if 0 4199 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv); 4200#endif 4201 wrlen = sizeof(*wr) + sizeof(*req) + 2 * sizeof(*lock) + 4202 sizeof(*getreq); 4203 m = m_gethdr_nofail(wrlen); 4204 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4205 wr = mtod(m, struct work_request_hdr *); 4206 bzero(wr, wrlen); 4207 4208 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS)); 4209 m->m_pkthdr.len = m->m_len = wrlen; 4210 4211 lock = (struct cpl_barrier *)(wr + 1); 4212 mk_cpl_barrier_ulp(lock); 4213 4214 req = (struct cpl_set_tcb_field *)(lock + 1); 4215 4216 CTR1(KTR_TCB, "t3_cancel_ddpbuf(bufidx=%u)", bufidx); 4217 4218 /* Hmmm, not sure if this actually a good thing: reactivating 4219 * the other buffer might be an issue if it has been completed 4220 * already. However, that is unlikely, since the fact that the UBUF 4221 * is not completed indicates that there is no oustanding data. 4222 */ 4223 if (bufidx == 0) 4224 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4225 V_TF_DDP_ACTIVE_BUF(1) | 4226 V_TF_DDP_BUF0_VALID(1), 4227 V_TF_DDP_ACTIVE_BUF(1)); 4228 else 4229 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4230 V_TF_DDP_ACTIVE_BUF(1) | 4231 V_TF_DDP_BUF1_VALID(1), 0); 4232 4233 getreq = (struct cpl_get_tcb *)(req + 1); 4234 mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset); 4235 4236 mk_cpl_barrier_ulp((struct cpl_barrier *)(getreq + 1)); 4237 4238 /* Keep track of the number of oustanding CPL_GET_TCB requests 4239 */ 4240 p->get_tcb_count++; 4241 4242#ifdef T3_TRACE 4243 T3_TRACE1(TIDTB(so), 4244 "t3_cancel_ddpbuf: bufidx %u", bufidx); 4245#endif 4246 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 4247} 4248 4249/** 4250 * t3_overlay_ddpbuf - overlay an existing DDP buffer with a new one 4251 * @sk: the socket associated with the buffers 4252 * @bufidx: index of HW DDP buffer (0 or 1) 4253 * @tag0: new tag for HW buffer 0 4254 * @tag1: new tag for HW buffer 1 4255 * @len: new length for HW buf @bufidx 4256 * 4257 * Sends a compound WR to overlay a new DDP buffer on top of an existing 4258 * buffer by changing the buffer tag and length and setting the valid and 4259 * active flag accordingly. The caller must ensure the new buffer is at 4260 * least as big as the existing one. Since we typically reprogram both HW 4261 * buffers this function sets both tags for convenience. Read the TCB to 4262 * determine how made data was written into the buffer before the overlay 4263 * took place. 4264 */ 4265void 4266t3_overlay_ddpbuf(struct toepcb *toep, unsigned int bufidx, unsigned int tag0, 4267 unsigned int tag1, unsigned int len) 4268{ 4269 unsigned int wrlen; 4270 struct mbuf *m; 4271 struct work_request_hdr *wr; 4272 struct cpl_get_tcb *getreq; 4273 struct cpl_set_tcb_field *req; 4274 struct ddp_state *p = &toep->tp_ddp_state; 4275 4276 CTR4(KTR_TCB, "t3_setup_ppods(bufidx=%u tag0=%u tag1=%u len=%u)", 4277 bufidx, tag0, tag1, len); 4278#if 0 4279 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv); 4280#endif 4281 wrlen = sizeof(*wr) + 3 * sizeof(*req) + sizeof(*getreq); 4282 m = m_gethdr_nofail(wrlen); 4283 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4284 wr = mtod(m, struct work_request_hdr *); 4285 m->m_pkthdr.len = m->m_len = wrlen; 4286 bzero(wr, wrlen); 4287 4288 4289 /* Set the ATOMIC flag to make sure that TP processes the following 4290 * CPLs in an atomic manner and no wire segments can be interleaved. 4291 */ 4292 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS) | F_WR_ATOMIC); 4293 req = (struct cpl_set_tcb_field *)(wr + 1); 4294 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_TAG, 4295 V_TCB_RX_DDP_BUF0_TAG(M_TCB_RX_DDP_BUF0_TAG) | 4296 V_TCB_RX_DDP_BUF1_TAG(M_TCB_RX_DDP_BUF1_TAG) << 32, 4297 V_TCB_RX_DDP_BUF0_TAG(tag0) | 4298 V_TCB_RX_DDP_BUF1_TAG((uint64_t)tag1) << 32); 4299 req++; 4300 if (bufidx == 0) { 4301 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_LEN, 4302 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), 4303 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len)); 4304 req++; 4305 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4306 V_TF_DDP_PUSH_DISABLE_0(1) | 4307 V_TF_DDP_BUF0_VALID(1) | V_TF_DDP_ACTIVE_BUF(1), 4308 V_TF_DDP_PUSH_DISABLE_0(0) | 4309 V_TF_DDP_BUF0_VALID(1)); 4310 } else { 4311 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_LEN, 4312 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN), 4313 V_TCB_RX_DDP_BUF1_LEN((uint64_t)len)); 4314 req++; 4315 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, 4316 V_TF_DDP_PUSH_DISABLE_1(1) | 4317 V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1), 4318 V_TF_DDP_PUSH_DISABLE_1(0) | 4319 V_TF_DDP_BUF1_VALID(1) | V_TF_DDP_ACTIVE_BUF(1)); 4320 } 4321 4322 getreq = (struct cpl_get_tcb *)(req + 1); 4323 mk_get_tcb_ulp(getreq, toep->tp_tid, toep->tp_qset); 4324 4325 /* Keep track of the number of oustanding CPL_GET_TCB requests 4326 */ 4327 p->get_tcb_count++; 4328 4329#ifdef T3_TRACE 4330 T3_TRACE4(TIDTB(sk), 4331 "t3_overlay_ddpbuf: bufidx %u tag0 %u tag1 %u " 4332 "len %d", 4333 bufidx, tag0, tag1, len); 4334#endif 4335 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 4336} 4337 4338/* 4339 * Sends a compound WR containing all the CPL messages needed to program the 4340 * two HW DDP buffers, namely optionally setting up the length and offset of 4341 * each buffer, programming the DDP flags, and optionally sending RX_DATA_ACK. 4342 */ 4343void 4344t3_setup_ddpbufs(struct toepcb *toep, unsigned int len0, unsigned int offset0, 4345 unsigned int len1, unsigned int offset1, 4346 uint64_t ddp_flags, uint64_t flag_mask, int modulate) 4347{ 4348 unsigned int wrlen; 4349 struct mbuf *m; 4350 struct work_request_hdr *wr; 4351 struct cpl_set_tcb_field *req; 4352 4353 CTR6(KTR_TCB, "t3_setup_ddpbufs(len0=%u offset0=%u len1=%u offset1=%u ddp_flags=0x%08x%08x ", 4354 len0, offset0, len1, offset1, ddp_flags >> 32, ddp_flags & 0xffffffff); 4355 4356#if 0 4357 SOCKBUF_LOCK_ASSERT(&toeptoso(toep)->so_rcv); 4358#endif 4359 wrlen = sizeof(*wr) + sizeof(*req) + (len0 ? sizeof(*req) : 0) + 4360 (len1 ? sizeof(*req) : 0) + 4361 (modulate ? sizeof(struct cpl_rx_data_ack) : 0); 4362 m = m_gethdr_nofail(wrlen); 4363 m_set_priority(m, mkprio(CPL_PRIORITY_CONTROL, toep)); 4364 wr = mtod(m, struct work_request_hdr *); 4365 bzero(wr, wrlen); 4366 4367 wr->wr_hi = htonl(V_WR_OP(FW_WROPCODE_BYPASS)); 4368 m->m_pkthdr.len = m->m_len = wrlen; 4369 4370 req = (struct cpl_set_tcb_field *)(wr + 1); 4371 if (len0) { /* program buffer 0 offset and length */ 4372 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF0_OFFSET, 4373 V_TCB_RX_DDP_BUF0_OFFSET(M_TCB_RX_DDP_BUF0_OFFSET) | 4374 V_TCB_RX_DDP_BUF0_LEN(M_TCB_RX_DDP_BUF0_LEN), 4375 V_TCB_RX_DDP_BUF0_OFFSET((uint64_t)offset0) | 4376 V_TCB_RX_DDP_BUF0_LEN((uint64_t)len0)); 4377 req++; 4378 } 4379 if (len1) { /* program buffer 1 offset and length */ 4380 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_BUF1_OFFSET, 4381 V_TCB_RX_DDP_BUF1_OFFSET(M_TCB_RX_DDP_BUF1_OFFSET) | 4382 V_TCB_RX_DDP_BUF1_LEN(M_TCB_RX_DDP_BUF1_LEN) << 32, 4383 V_TCB_RX_DDP_BUF1_OFFSET((uint64_t)offset1) | 4384 V_TCB_RX_DDP_BUF1_LEN((uint64_t)len1) << 32); 4385 req++; 4386 } 4387 4388 mk_set_tcb_field_ulp(req, toep->tp_tid, W_TCB_RX_DDP_FLAGS, flag_mask, 4389 ddp_flags); 4390 4391 if (modulate) { 4392 mk_rx_data_ack_ulp(toep, 4393 (struct cpl_rx_data_ack *)(req + 1), toep->tp_tid, 4394 toep->tp_copied_seq - toep->tp_rcv_wup); 4395 toep->tp_rcv_wup = toep->tp_copied_seq; 4396 } 4397 4398#ifdef T3_TRACE 4399 T3_TRACE5(TIDTB(sk), 4400 "t3_setup_ddpbufs: len0 %u len1 %u ddp_flags 0x%08x%08x " 4401 "modulate %d", 4402 len0, len1, ddp_flags >> 32, ddp_flags & 0xffffffff, 4403 modulate); 4404#endif 4405 4406 cxgb_ofld_send(TOEP_T3C_DEV(toep), m); 4407} 4408 4409void 4410t3_init_wr_tab(unsigned int wr_len) 4411{ 4412 int i; 4413 4414 if (mbuf_wrs[1]) /* already initialized */ 4415 return; 4416 4417 for (i = 1; i < ARRAY_SIZE(mbuf_wrs); i++) { 4418 int sgl_len = (3 * i) / 2 + (i & 1); 4419 4420 sgl_len += 3; 4421 mbuf_wrs[i] = sgl_len <= wr_len ? 4422 1 : 1 + (sgl_len - 2) / (wr_len - 1); 4423 } 4424 4425 wrlen = wr_len * 8; 4426} 4427 4428int 4429t3_init_cpl_io(void) 4430{ 4431#ifdef notyet 4432 tcphdr_skb = alloc_skb(sizeof(struct tcphdr), GFP_KERNEL); 4433 if (!tcphdr_skb) { 4434 log(LOG_ERR, 4435 "Chelsio TCP offload: can't allocate sk_buff\n"); 4436 return -1; 4437 } 4438 skb_put(tcphdr_skb, sizeof(struct tcphdr)); 4439 tcphdr_skb->h.raw = tcphdr_skb->data; 4440 memset(tcphdr_skb->data, 0, tcphdr_skb->len); 4441#endif 4442 4443 t3tom_register_cpl_handler(CPL_ACT_ESTABLISH, do_act_establish); 4444 t3tom_register_cpl_handler(CPL_ACT_OPEN_RPL, do_act_open_rpl); 4445 t3tom_register_cpl_handler(CPL_TX_DMA_ACK, do_wr_ack); 4446 t3tom_register_cpl_handler(CPL_RX_DATA, do_rx_data); 4447 t3tom_register_cpl_handler(CPL_CLOSE_CON_RPL, do_close_con_rpl); 4448 t3tom_register_cpl_handler(CPL_PEER_CLOSE, do_peer_close); 4449 t3tom_register_cpl_handler(CPL_PASS_ESTABLISH, do_pass_establish); 4450 t3tom_register_cpl_handler(CPL_PASS_ACCEPT_REQ, do_pass_accept_req); 4451 t3tom_register_cpl_handler(CPL_ABORT_REQ_RSS, do_abort_req); 4452 t3tom_register_cpl_handler(CPL_ABORT_RPL_RSS, do_abort_rpl); 4453 t3tom_register_cpl_handler(CPL_RX_DATA_DDP, do_rx_data_ddp); 4454 t3tom_register_cpl_handler(CPL_RX_DDP_COMPLETE, do_rx_ddp_complete); 4455 t3tom_register_cpl_handler(CPL_RX_URG_NOTIFY, do_rx_urg_notify); 4456 t3tom_register_cpl_handler(CPL_TRACE_PKT, do_trace_pkt); 4457 t3tom_register_cpl_handler(CPL_GET_TCB_RPL, do_get_tcb_rpl); 4458 return (0); 4459} 4460 4461