1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21/*
22 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
23 * Use is subject to license terms.
24 */
25
26#include <sys/ib/clients/rds/rds.h>
27#include <sys/ib/clients/rds/rds_kstat.h>
28
29#include <inet/ipclassifier.h>
30
31struct rds_kstat_s rds_kstat = {
32	{"rds_nports",			KSTAT_DATA_ULONG},
33	{"rds_nsessions",		KSTAT_DATA_ULONG},
34	{"rds_tx_bytes",		KSTAT_DATA_ULONG},
35	{"rds_tx_pkts",			KSTAT_DATA_ULONG},
36	{"rds_tx_errors",		KSTAT_DATA_ULONG},
37	{"rds_rx_bytes",		KSTAT_DATA_ULONG},
38	{"rds_rx_pkts",			KSTAT_DATA_ULONG},
39	{"rds_rx_pkts_pending",		KSTAT_DATA_ULONG},
40	{"rds_rx_errors",		KSTAT_DATA_ULONG},
41	{"rds_tx_acks",			KSTAT_DATA_ULONG},
42	{"rds_post_recv_buf_called",	KSTAT_DATA_ULONG},
43	{"rds_stalls_triggered",	KSTAT_DATA_ULONG},
44	{"rds_stalls_sent",		KSTAT_DATA_ULONG},
45	{"rds_unstalls_triggered",	KSTAT_DATA_ULONG},
46	{"rds_unstalls_sent",		KSTAT_DATA_ULONG},
47	{"rds_stalls_recvd",		KSTAT_DATA_ULONG},
48	{"rds_unstalls_recvd",		KSTAT_DATA_ULONG},
49	{"rds_stalls_ignored",		KSTAT_DATA_ULONG},
50	{"rds_enobufs",			KSTAT_DATA_ULONG},
51	{"rds_ewouldblocks",		KSTAT_DATA_ULONG},
52	{"rds_failovers",		KSTAT_DATA_ULONG},
53	{"rds_port_quota",		KSTAT_DATA_ULONG},
54	{"rds_port_quota_adjusted",	KSTAT_DATA_ULONG},
55};
56
57kstat_t *rds_kstatsp;
58static kmutex_t rds_kstat_mutex;
59
60
61struct	kmem_cache	*rds_alloc_cache;
62
63uint_t	rds_bind_fanout_size = RDS_BIND_FANOUT_SIZE;
64rds_bf_t *rds_bind_fanout;
65
66void
67rds_increment_kstat(kstat_named_t *ksnp, boolean_t lock, uint_t num)
68{
69	if (lock)
70		mutex_enter(&rds_kstat_mutex);
71	ksnp->value.ul += num;
72	if (lock)
73		mutex_exit(&rds_kstat_mutex);
74}
75
76void
77rds_decrement_kstat(kstat_named_t *ksnp, boolean_t lock, uint_t num)
78{
79	if (lock)
80		mutex_enter(&rds_kstat_mutex);
81	ksnp->value.ul -= num;
82	if (lock)
83		mutex_exit(&rds_kstat_mutex);
84}
85
86void
87rds_set_kstat(kstat_named_t *ksnp, boolean_t lock, ulong_t num)
88{
89	if (lock)
90		mutex_enter(&rds_kstat_mutex);
91	ksnp->value.ul = num;
92	if (lock)
93		mutex_exit(&rds_kstat_mutex);
94}
95
96ulong_t
97rds_get_kstat(kstat_named_t *ksnp, boolean_t lock)
98{
99	ulong_t	value;
100
101	if (lock)
102		mutex_enter(&rds_kstat_mutex);
103	value = ksnp->value.ul;
104	if (lock)
105		mutex_exit(&rds_kstat_mutex);
106
107	return (value);
108}
109
110
111void
112rds_fini()
113{
114	int	i;
115
116	for (i = 0; i < rds_bind_fanout_size; i++) {
117		mutex_destroy(&rds_bind_fanout[i].rds_bf_lock);
118	}
119	kmem_free(rds_bind_fanout, rds_bind_fanout_size * sizeof (rds_bf_t));
120
121	kmem_cache_destroy(rds_alloc_cache);
122	kstat_delete(rds_kstatsp);
123}
124
125
126void
127rds_init()
128{
129	rds_alloc_cache = kmem_cache_create("rds_alloc_cache",
130	    sizeof (rds_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
131	rds_hash_init();
132	/*
133	 * kstats
134	 */
135	rds_kstatsp = kstat_create("rds", 0,
136	    "rds_kstat", "misc", KSTAT_TYPE_NAMED,
137	    sizeof (rds_kstat) / sizeof (kstat_named_t),
138	    KSTAT_FLAG_VIRTUAL | KSTAT_FLAG_WRITABLE);
139	if (rds_kstatsp != NULL) {
140		rds_kstatsp->ks_lock = &rds_kstat_mutex;
141		rds_kstatsp->ks_data = (void *)&rds_kstat;
142		kstat_install(rds_kstatsp);
143	}
144}
145
146#define	UINT_32_BITS 31
147void
148rds_hash_init()
149{
150	int i;
151
152	if (rds_bind_fanout_size & (rds_bind_fanout_size - 1)) {
153		/* Not a power of two. Round up to nearest power of two */
154		for (i = 0; i < UINT_32_BITS; i++) {
155			if (rds_bind_fanout_size < (1 << i))
156				break;
157		}
158		rds_bind_fanout_size = 1 << i;
159	}
160	rds_bind_fanout = kmem_zalloc(rds_bind_fanout_size *
161	    sizeof (rds_bf_t), KM_SLEEP);
162	for (i = 0; i < rds_bind_fanout_size; i++) {
163		mutex_init(&rds_bind_fanout[i].rds_bf_lock, NULL, MUTEX_DEFAULT,
164		    NULL);
165	}
166}
167
168void
169rds_free(rds_t *rds)
170{
171	ASSERT(rds->rds_refcnt == 0);
172	ASSERT(MUTEX_HELD(&rds->rds_lock));
173	crfree(rds->rds_cred);
174	kmem_cache_free(rds_alloc_cache, rds);
175}
176
177rds_t *
178rds_create(void *rds_ulpd, cred_t *credp)
179{
180	rds_t	*rds;
181
182	/* User must supply a credential. */
183	if (credp == NULL)
184		return (NULL);
185	rds = kmem_cache_alloc(rds_alloc_cache, KM_SLEEP);
186	if (rds == NULL) {
187		return (NULL);
188	}
189
190	bzero(rds, sizeof (rds_t));
191	mutex_init(&rds->rds_lock, NULL, MUTEX_DEFAULT, NULL);
192	cv_init(&rds->rds_refcv, NULL, CV_DEFAULT, NULL);
193	rds->rds_cred = credp;
194	rds->rds_ulpd = rds_ulpd;
195	rds->rds_zoneid = getzoneid();
196	crhold(credp);
197	rds->rds_refcnt++;
198	return (rds);
199}
200
201
202/*
203 * Hash list removal routine for rds_t structures.
204 */
205void
206rds_bind_hash_remove(rds_t *rds, boolean_t caller_holds_lock)
207{
208	rds_t   *rdsnext;
209	kmutex_t *lockp;
210
211	if (rds->rds_ptpbhn == NULL)
212		return;
213
214	/*
215	 * Extract the lock pointer in case there are concurrent
216	 * hash_remove's for this instance.
217	 */
218	ASSERT(rds->rds_port != 0);
219	if (!caller_holds_lock) {
220		lockp = &rds_bind_fanout[RDS_BIND_HASH(rds->rds_port)].
221		    rds_bf_lock;
222		ASSERT(lockp != NULL);
223		mutex_enter(lockp);
224	}
225
226	if (rds->rds_ptpbhn != NULL) {
227		rdsnext = rds->rds_bind_hash;
228		if (rdsnext != NULL) {
229			rdsnext->rds_ptpbhn = rds->rds_ptpbhn;
230			rds->rds_bind_hash = NULL;
231		}
232		*rds->rds_ptpbhn = rdsnext;
233		rds->rds_ptpbhn = NULL;
234	}
235
236	RDS_DEC_REF_CNT(rds);
237
238	if (!caller_holds_lock) {
239		mutex_exit(lockp);
240	}
241}
242
243void
244rds_bind_hash_insert(rds_bf_t *rdsbf, rds_t *rds)
245{
246	rds_t   **rdsp;
247	rds_t   *rdsnext;
248
249	ASSERT(MUTEX_HELD(&rdsbf->rds_bf_lock));
250	if (rds->rds_ptpbhn != NULL) {
251		rds_bind_hash_remove(rds, B_TRUE);
252	}
253
254	rdsp = &rdsbf->rds_bf_rds;
255	rdsnext = rdsp[0];
256
257	if (rdsnext != NULL) {
258		rdsnext->rds_ptpbhn = &rds->rds_bind_hash;
259	}
260	rds->rds_bind_hash = rdsnext;
261	rds->rds_ptpbhn = rdsp;
262	rdsp[0] = rds;
263	RDS_INCR_REF_CNT(rds);
264
265}
266
267/*
268 * Everything is in network byte order
269 */
270/* ARGSUSED */
271rds_t *
272rds_fanout(ipaddr_t local_addr, ipaddr_t rem_addr,
273    in_port_t local_port, in_port_t rem_port, zoneid_t zoneid)
274{
275	rds_t	*rds;
276	rds_bf_t *rdsbf;
277
278	rdsbf = &rds_bind_fanout[RDS_BIND_HASH(local_port)];
279	mutex_enter(&rdsbf->rds_bf_lock);
280	rds = rdsbf->rds_bf_rds;
281	while (rds != NULL) {
282		if (!(rds->rds_flags & RDS_CLOSING)) {
283			if ((RDS_MATCH(rds, local_port, local_addr)) &&
284			    ((local_addr != INADDR_LOOPBACK) ||
285			    (rds->rds_zoneid == zoneid))) {
286				RDS_INCR_REF_CNT(rds);
287				break;
288			}
289		}
290		rds = rds->rds_bind_hash;
291	}
292	mutex_exit(&rdsbf->rds_bf_lock);
293	return (rds);
294}
295
296boolean_t
297rds_islocal(ipaddr_t addr)
298{
299	ip_stack_t *ipst;
300
301	ipst = netstack_find_by_zoneid(GLOBAL_ZONEID)->netstack_ip;
302	ASSERT(ipst != NULL);
303	if (ip_laddr_verify_v4(addr, ALL_ZONES, ipst, B_FALSE) == IPVL_BAD) {
304		netstack_rele(ipst->ips_netstack);
305		return (B_FALSE);
306	}
307	netstack_rele(ipst->ips_netstack);
308	return (B_TRUE);
309}
310