cxgb_listen.c revision 178302
1/**************************************************************************
2
3Copyright (c) 2007, Chelsio Inc.
4All rights reserved.
5
6Redistribution and use in source and binary forms, with or without
7modification, are permitted provided that the following conditions are met:
8
9 1. Redistributions of source code must retain the above copyright notice,
10    this list of conditions and the following disclaimer.
11
12 2. Neither the name of the Chelsio Corporation nor the names of its
13    contributors may be used to endorse or promote products derived from
14    this software without specific prior written permission.
15
16THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26POSSIBILITY OF SUCH DAMAGE.
27
28***************************************************************************/
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD: head/sys/dev/cxgb/ulp/tom/cxgb_listen.c 178302 2008-04-19 03:22:43Z kmacy $");
32
33#include <sys/param.h>
34#include <sys/systm.h>
35#include <sys/fcntl.h>
36#include <sys/limits.h>
37#include <sys/lock.h>
38#include <sys/mbuf.h>
39#include <sys/mutex.h>
40#include <sys/socket.h>
41#include <sys/socketvar.h>
42#include <sys/syslog.h>
43
44#include <net/if.h>
45#include <net/route.h>
46
47#include <netinet/in.h>
48#include <netinet/in_pcb.h>
49#include <netinet/in_systm.h>
50#include <netinet/in_var.h>
51
52
53#include <dev/cxgb/cxgb_osdep.h>
54#include <dev/cxgb/sys/mbufq.h>
55
56#include <netinet/tcp.h>
57#include <netinet/tcp_var.h>
58#include <netinet/tcp_fsm.h>
59
60#include <netinet/tcp_offload.h>
61#include <net/route.h>
62
63#include <dev/cxgb/t3cdev.h>
64#include <dev/cxgb/common/cxgb_firmware_exports.h>
65#include <dev/cxgb/common/cxgb_t3_cpl.h>
66#include <dev/cxgb/common/cxgb_tcb.h>
67#include <dev/cxgb/common/cxgb_ctl_defs.h>
68#include <dev/cxgb/cxgb_offload.h>
69#include <dev/cxgb/ulp/toecore/cxgb_toedev.h>
70#include <dev/cxgb/ulp/tom/cxgb_defs.h>
71#include <dev/cxgb/ulp/tom/cxgb_tom.h>
72#include <dev/cxgb/ulp/tom/cxgb_t3_ddp.h>
73#include <dev/cxgb/ulp/tom/cxgb_toepcb.h>
74
75
76static struct listen_info *listen_hash_add(struct tom_data *d, struct socket *so, unsigned int stid);
77static int listen_hash_del(struct tom_data *d, struct socket *so);
78
79/*
80 * Process a CPL_CLOSE_LISTSRV_RPL message.  If the status is good we release
81 * the STID.
82 */
83static int
84do_close_server_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
85{
86	struct cpl_close_listserv_rpl *rpl = cplhdr(m);
87	unsigned int stid = GET_TID(rpl);
88
89	if (rpl->status != CPL_ERR_NONE)
90		log(LOG_ERR, "Unexpected CLOSE_LISTSRV_RPL status %u for "
91		       "STID %u\n", rpl->status, stid);
92	else {
93		struct listen_ctx *listen_ctx = (struct listen_ctx *)ctx;
94
95		cxgb_free_stid(cdev, stid);
96		free(listen_ctx, M_CXGB);
97	}
98
99	return (CPL_RET_BUF_DONE);
100}
101
102/*
103 * Process a CPL_PASS_OPEN_RPL message.  Remove the socket from the listen hash
104 * table and free the STID if there was any error, otherwise nothing to do.
105 */
106static int
107do_pass_open_rpl(struct t3cdev *cdev, struct mbuf *m, void *ctx)
108{
109       	struct cpl_pass_open_rpl *rpl = cplhdr(m);
110
111	if (rpl->status != CPL_ERR_NONE) {
112		int stid = GET_TID(rpl);
113		struct listen_ctx *listen_ctx = (struct listen_ctx *)ctx;
114		struct tom_data *d = listen_ctx->tom_data;
115		struct socket *lso = listen_ctx->lso;
116
117#if VALIDATE_TID
118		if (!lso)
119			return (CPL_RET_UNKNOWN_TID | CPL_RET_BUF_DONE);
120#endif
121		/*
122		 * Note: It is safe to unconditionally call listen_hash_del()
123		 * at this point without risking unhashing a reincarnation of
124		 * an already closed socket (i.e., there is no listen, close,
125		 * listen, free the sock for the second listen while processing
126		 * a message for the first race) because we are still holding
127		 * a reference on the socket.  It is possible that the unhash
128		 * will fail because the socket is already closed, but we can't
129		 * unhash the wrong socket because it is impossible for the
130		 * socket to which this message refers to have reincarnated.
131		 */
132		listen_hash_del(d, lso);
133		cxgb_free_stid(cdev, stid);
134#ifdef notyet
135		/*
136		 * XXX need to unreference the inpcb
137		 * but we have no way of knowing that other TOMs aren't referencing it
138		 */
139		sock_put(lso);
140#endif
141		free(listen_ctx, M_CXGB);
142	}
143	return CPL_RET_BUF_DONE;
144}
145
146void
147t3_init_listen_cpl_handlers(void)
148{
149	t3tom_register_cpl_handler(CPL_PASS_OPEN_RPL, do_pass_open_rpl);
150	t3tom_register_cpl_handler(CPL_CLOSE_LISTSRV_RPL, do_close_server_rpl);
151}
152
153static inline int
154listen_hashfn(const struct socket *so)
155{
156	return ((unsigned long)so >> 10) & (LISTEN_INFO_HASH_SIZE - 1);
157}
158
159/*
160 * Create and add a listen_info entry to the listen hash table.  This and the
161 * listen hash table functions below cannot be called from softirqs.
162 */
163static struct listen_info *
164listen_hash_add(struct tom_data *d, struct socket *so, unsigned int stid)
165{
166	struct listen_info *p;
167
168	p = malloc(sizeof(*p), M_CXGB, M_NOWAIT|M_ZERO);
169	if (p) {
170		int bucket = listen_hashfn(so);
171
172		p->so = so;	/* just a key, no need to take a reference */
173		p->stid = stid;
174		mtx_lock(&d->listen_lock);
175		p->next = d->listen_hash_tab[bucket];
176		d->listen_hash_tab[bucket] = p;
177		mtx_unlock(&d->listen_lock);
178	}
179	return p;
180}
181
182/*
183 * Given a pointer to a listening socket return its server TID by consulting
184 * the socket->stid map.  Returns -1 if the socket is not in the map.
185 */
186static int
187listen_hash_find(struct tom_data *d, struct socket *so)
188{
189	int stid = -1, bucket = listen_hashfn(so);
190	struct listen_info *p;
191
192	mtx_lock(&d->listen_lock);
193	for (p = d->listen_hash_tab[bucket]; p; p = p->next)
194		if (p->so == so) {
195			stid = p->stid;
196			break;
197		}
198	mtx_unlock(&d->listen_lock);
199	return stid;
200}
201
202/*
203 * Delete the listen_info structure for a listening socket.  Returns the server
204 * TID for the socket if it is present in the socket->stid map, or -1.
205 */
206static int
207listen_hash_del(struct tom_data *d, struct socket *so)
208{
209	int bucket, stid = -1;
210	struct listen_info *p, **prev;
211
212	bucket = listen_hashfn(so);
213	prev  = &d->listen_hash_tab[bucket];
214
215	mtx_lock(&d->listen_lock);
216	for (p = *prev; p; prev = &p->next, p = p->next)
217		if (p->so == so) {
218			stid = p->stid;
219			*prev = p->next;
220			free(p, M_CXGB);
221			break;
222		}
223	mtx_unlock(&d->listen_lock);
224
225	return (stid);
226}
227
228/*
229 * Start a listening server by sending a passive open request to HW.
230 */
231void
232t3_listen_start(struct toedev *dev, struct socket *so, struct t3cdev *cdev)
233{
234	int stid;
235	struct mbuf *m;
236	struct cpl_pass_open_req *req;
237	struct tom_data *d = TOM_DATA(dev);
238	struct inpcb *inp = sotoinpcb(so);
239	struct listen_ctx *ctx;
240
241	if (!TOM_TUNABLE(dev, activated))
242		return;
243
244	if (listen_hash_find(d, so) != -1)
245		return;
246
247	CTR1(KTR_TOM, "start listen on port %u", ntohs(inp->inp_lport));
248	ctx = malloc(sizeof(*ctx), M_CXGB, M_NOWAIT|M_ZERO);
249
250	if (!ctx)
251		return;
252
253	ctx->tom_data = d;
254	ctx->lso = so;
255	ctx->ulp_mode = TOM_TUNABLE(dev, ddp) && !(so->so_options & SO_NO_DDP) ? ULP_MODE_TCPDDP : 0;
256	LIST_INIT(&ctx->synq_head);
257
258	stid = cxgb_alloc_stid(d->cdev, d->client, ctx);
259	if (stid < 0)
260		goto free_ctx;
261
262	m = m_gethdr(M_NOWAIT, MT_DATA);
263	if (m == NULL)
264		goto free_stid;
265	m->m_pkthdr.len = m->m_len = sizeof(*req);
266
267	if (!listen_hash_add(d, so, stid))
268		goto free_all;
269
270	req = mtod(m, struct cpl_pass_open_req *);
271	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
272	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_PASS_OPEN_REQ, stid));
273	req->local_port = inp->inp_lport;
274	memcpy(&req->local_ip, &inp->inp_laddr, 4);
275	req->peer_port = 0;
276	req->peer_ip = 0;
277	req->peer_netmask = 0;
278	req->opt0h = htonl(F_DELACK | F_TCAM_BYPASS);
279	req->opt0l = htonl(V_RCV_BUFSIZ(16));
280	req->opt1 = htonl(V_CONN_POLICY(CPL_CONN_POLICY_ASK));
281
282	m_set_priority(m, CPL_PRIORITY_LISTEN);
283	cxgb_ofld_send(cdev, m);
284	return;
285
286free_all:
287	m_free(m);
288free_stid:
289	cxgb_free_stid(cdev, stid);
290#if 0
291	sock_put(sk);
292#endif
293free_ctx:
294	free(ctx, M_CXGB);
295}
296
297/*
298 * Stop a listening server by sending a close_listsvr request to HW.
299 * The server TID is freed when we get the reply.
300 */
301void
302t3_listen_stop(struct toedev *dev, struct socket *so, struct t3cdev *cdev)
303{
304	struct mbuf *m;
305	struct cpl_close_listserv_req *req;
306	struct listen_ctx *lctx;
307	int stid = listen_hash_del(TOM_DATA(dev), so);
308
309	if (stid < 0)
310		return;
311
312	lctx = cxgb_get_lctx(cdev, stid);
313	/*
314	 * Do this early so embryonic connections are marked as being aborted
315	 * while the stid is still open.  This ensures pass_establish messages
316	 * that arrive while we are closing the server will be able to locate
317	 * the listening socket.
318	 */
319	t3_reset_synq(lctx);
320
321	/* Send the close ASAP to stop further passive opens */
322	m = m_gethdr(M_NOWAIT, MT_DATA);
323	if (m == NULL) {
324		/*
325		 * XXX allocate from lowmem cache
326		 */
327	}
328	m->m_pkthdr.len = m->m_len = sizeof(*req);
329
330	req = mtod(m, struct cpl_close_listserv_req *);
331	req->wr.wr_hi = htonl(V_WR_OP(FW_WROPCODE_FORWARD));
332	OPCODE_TID(req) = htonl(MK_OPCODE_TID(CPL_CLOSE_LISTSRV_REQ, stid));
333	req->cpu_idx = 0;
334	m_set_priority(m, CPL_PRIORITY_LISTEN);
335	cxgb_ofld_send(cdev, m);
336
337	t3_disconnect_acceptq(so);
338}
339