ilb_alg_hash.c revision 10946:324bab2b3370
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27#include <sys/types.h>
28#include <sys/cmn_err.h>
29#include <netinet/in.h>
30#include <inet/ip.h>
31#include <inet/ip6.h>
32#include <sys/crc32.h>
33
34#include <inet/ilb.h>
35#include "ilb_impl.h"
36#include "ilb_alg.h"
37
38#define	HASH_IP_V4(hash, addr, size) 					\
39{									\
40	CRC32((hash), &(addr), sizeof (in_addr_t), -1U, crc32_table);	\
41	(hash) %= (size);						\
42}
43#define	HASH_IP_V6(hash, addr, size)					\
44	HASH_IP_V4((hash), (addr)->s6_addr32[3], (size))
45
46#define	HASH_IP_PORT_V4(hash, addr, port, size) 			\
47{									\
48	uint32_t val = (addr) ^ ((port) << 16) ^ (port);		\
49	CRC32((hash), &val, sizeof (uint32_t), -1U, crc32_table);	\
50	(hash) %= (size);						\
51}
52#define	HASH_IP_PORT_V6(hash, addr, port, size)				\
53	HASH_IP_PORT_V4((hash), (addr)->s6_addr32[3], (port), (size))
54
55#define	HASH_IP_VIP_V4(hash, saddr, daddr, size)			\
56{									\
57	uint32_t val = (saddr) ^ (daddr);				\
58	CRC32((hash), &val, sizeof (uint32_t), -1U, crc32_table);	\
59	(hash) %= (size);						\
60}
61#define	HASH_IP_VIP_V6(hash, saddr, daddr, size) 			\
62	HASH_IP_VIP_V4((hash), (saddr)->s6_addr32[3], (daddr)->s6_addr32[3], \
63	(size))
64
65#define	INIT_HASH_TBL_SIZE	10
66
67typedef struct {
68	ilb_server_t	*server;
69	boolean_t	enabled;
70} hash_server_t;
71
72/*
73 * There are two hash tables.  The hash_tbl holds all servers, both enabled
74 * and disabled.  The hash_enabled_tbl only holds enabled servers.  Having
75 * two tables allows the hash on a client request remains the same even when
76 * some servers are disabled.  If a server is disabled and a client's request
77 * hashes to it, we will do another hash.  This time the has is on the enabled
78 * server table.
79 */
80typedef struct hash_s {
81	kmutex_t	hash_lock;
82	size_t		hash_servers;		/* Total # of servers */
83	size_t		hash_tbl_size;		/* All server table size */
84	size_t		hash_enabled_servers;	/* # of enabled servers */
85	size_t		hash_enabled_tbl_size;	/* Enabled server table size */
86	hash_server_t	*hash_tbl;
87	hash_server_t	*hash_enabled_tbl;
88	ilb_algo_impl_t	hash_type;
89} hash_t;
90
91static void hash_fini(ilb_alg_data_t **);
92
93/* ARGSUSED */
94static boolean_t
95hash_lb(in6_addr_t *saddr, in_port_t sport, in6_addr_t *daddr,
96    in_port_t dport, void *alg_data, ilb_server_t **ret_server)
97{
98	hash_t *hash_alg = (hash_t *)alg_data;
99	uint32_t i;
100
101	ASSERT(ret_server != NULL);
102	*ret_server = NULL;
103
104	mutex_enter(&hash_alg->hash_lock);
105
106	if (hash_alg->hash_servers == 0) {
107		mutex_exit(&hash_alg->hash_lock);
108		return (B_FALSE);
109	}
110
111	switch (hash_alg->hash_type) {
112	case ILB_ALG_IMPL_HASH_IP:
113		HASH_IP_V6(i, saddr, hash_alg->hash_servers);
114		break;
115	case ILB_ALG_IMPL_HASH_IP_SPORT:
116		HASH_IP_PORT_V6(i, saddr, sport, hash_alg->hash_servers);
117		break;
118	case ILB_ALG_IMPL_HASH_IP_VIP:
119		HASH_IP_VIP_V6(i, saddr, daddr, hash_alg->hash_servers);
120		break;
121	default:
122		mutex_exit(&hash_alg->hash_lock);
123		return (B_FALSE);
124	}
125	if (hash_alg->hash_tbl[i].enabled) {
126		*ret_server = hash_alg->hash_tbl[i].server;
127		mutex_exit(&hash_alg->hash_lock);
128		return (B_TRUE);
129	}
130
131	if (hash_alg->hash_enabled_servers == 0) {
132		mutex_exit(&hash_alg->hash_lock);
133		return (B_FALSE);
134	}
135
136	switch (hash_alg->hash_type) {
137	case ILB_ALG_IMPL_HASH_IP:
138		HASH_IP_V6(i, saddr, hash_alg->hash_enabled_servers);
139		break;
140	case ILB_ALG_IMPL_HASH_IP_SPORT:
141		HASH_IP_PORT_V6(i, saddr, sport,
142		    hash_alg->hash_enabled_servers);
143		break;
144	case ILB_ALG_IMPL_HASH_IP_VIP:
145		HASH_IP_VIP_V6(i, saddr, daddr,
146		    hash_alg->hash_enabled_servers);
147		break;
148	default:
149		ASSERT(0);
150		break;
151	}
152	*ret_server = hash_alg->hash_enabled_tbl[i].server;
153	mutex_exit(&hash_alg->hash_lock);
154	return (B_TRUE);
155}
156
157static boolean_t
158del_server(hash_server_t *tbl, size_t hash_size, ilb_server_t *host)
159{
160	size_t i, j;
161
162	for (i = 0; i < hash_size; i++) {
163		if (tbl[i].server == host) {
164			if (i == hash_size - 1)
165				break;
166			for (j = i; j < hash_size - 1; j++)
167				tbl[j] = tbl[j + 1];
168			break;
169		}
170	}
171	/* Not found... */
172	if (i == hash_size)
173		return (B_FALSE);
174	tbl[hash_size - 1].server = NULL;
175	tbl[hash_size - 1].enabled = B_FALSE;
176	return (B_TRUE);
177}
178
179static int
180hash_server_del(ilb_server_t *host, void *alg_data)
181{
182	hash_t *hash_alg = (hash_t *)alg_data;
183	boolean_t ret;
184
185	mutex_enter(&hash_alg->hash_lock);
186
187	ret = del_server(hash_alg->hash_tbl, hash_alg->hash_servers, host);
188	if (!ret) {
189		mutex_exit(&hash_alg->hash_lock);
190		return (EINVAL);
191	}
192	hash_alg->hash_servers--;
193
194	/* The server may not be enabled. */
195	ret = del_server(hash_alg->hash_enabled_tbl,
196	    hash_alg->hash_enabled_servers, host);
197	if (ret)
198		hash_alg->hash_enabled_servers--;
199
200	mutex_exit(&hash_alg->hash_lock);
201	ILB_SERVER_REFRELE(host);
202	return (0);
203}
204
205static int
206grow_tbl(hash_server_t **hash_tbl, size_t *tbl_size)
207{
208	size_t mem_size;
209	hash_server_t *new_tbl;
210
211	if ((new_tbl = kmem_zalloc(sizeof (hash_server_t) *
212	    (*tbl_size + INIT_HASH_TBL_SIZE), KM_NOSLEEP)) == NULL) {
213		return (ENOMEM);
214	}
215	mem_size = *tbl_size * sizeof (hash_server_t);
216	bcopy(*hash_tbl, new_tbl, mem_size);
217	kmem_free(*hash_tbl, mem_size);
218	*hash_tbl = new_tbl;
219	*tbl_size += INIT_HASH_TBL_SIZE;
220	return (0);
221}
222
223static int
224hash_server_add(ilb_server_t *host, void *alg_data)
225{
226	hash_t *hash_alg = (hash_t *)alg_data;
227	size_t new_size;
228
229	mutex_enter(&hash_alg->hash_lock);
230
231	/* First add the server to the hash_tbl. */
232	new_size = hash_alg->hash_servers + 1;
233	if (new_size > hash_alg->hash_tbl_size) {
234		if (grow_tbl(&hash_alg->hash_tbl, &hash_alg->hash_tbl_size) !=
235		    0) {
236			mutex_exit(&hash_alg->hash_lock);
237			return (ENOMEM);
238		}
239	}
240
241	hash_alg->hash_tbl[hash_alg->hash_servers].server = host;
242	hash_alg->hash_tbl[hash_alg->hash_servers].enabled = host->iser_enabled;
243	hash_alg->hash_servers++;
244
245	if (!host->iser_enabled) {
246		mutex_exit(&hash_alg->hash_lock);
247		ILB_SERVER_REFHOLD(host);
248		return (0);
249	}
250
251	/* If the server is enabled, add it to the hasn_enabled_tbl. */
252	new_size = hash_alg->hash_enabled_servers + 1;
253	if (new_size > hash_alg->hash_enabled_tbl_size) {
254		if (grow_tbl(&hash_alg->hash_enabled_tbl,
255		    &hash_alg->hash_enabled_tbl_size) != 0) {
256			mutex_exit(&hash_alg->hash_lock);
257			return (ENOMEM);
258		}
259	}
260	hash_alg->hash_enabled_tbl[hash_alg->hash_enabled_servers].server =
261	    host;
262	hash_alg->hash_enabled_tbl[hash_alg->hash_enabled_servers].enabled =
263	    B_TRUE;
264	hash_alg->hash_enabled_servers++;
265
266	mutex_exit(&hash_alg->hash_lock);
267	ILB_SERVER_REFHOLD(host);
268	return (0);
269}
270
271static int
272hash_server_enable(ilb_server_t *host, void *alg_data)
273{
274	hash_t *alg = (hash_t *)alg_data;
275	size_t new_size, i;
276
277	mutex_enter(&alg->hash_lock);
278
279	for (i = 0; i < alg->hash_servers; i++) {
280		if (alg->hash_tbl[i].server == host) {
281			if (alg->hash_tbl[i].enabled) {
282				mutex_exit(&alg->hash_lock);
283				return (0);
284			} else {
285				break;
286			}
287		}
288	}
289	if (i == alg->hash_servers) {
290		mutex_exit(&alg->hash_lock);
291		return (EINVAL);
292	}
293
294#if DEBUG
295	/* The server should not be in the enabled tabled. */
296	{
297		size_t j;
298
299		for (j = 0; j < alg->hash_enabled_servers; j++) {
300			if (alg->hash_enabled_tbl[j].server == host) {
301				cmn_err(CE_PANIC, "Corrupted ILB enabled hash "
302				    "table");
303			}
304		}
305	}
306#endif
307
308	new_size = alg->hash_enabled_servers + 1;
309	if (new_size > alg->hash_enabled_tbl_size) {
310		if (grow_tbl(&alg->hash_enabled_tbl,
311		    &alg->hash_enabled_tbl_size) != 0) {
312			mutex_exit(&alg->hash_lock);
313			return (ENOMEM);
314		}
315	}
316	alg->hash_tbl[i].enabled = B_TRUE;
317	alg->hash_enabled_tbl[alg->hash_enabled_servers].server = host;
318	alg->hash_enabled_tbl[alg->hash_enabled_servers].enabled = B_TRUE;
319	alg->hash_enabled_servers++;
320
321	mutex_exit(&alg->hash_lock);
322	return (0);
323}
324
325static int
326hash_server_disable(ilb_server_t *host, void *alg_data)
327{
328	hash_t *alg = (hash_t *)alg_data;
329	size_t i;
330
331	mutex_enter(&alg->hash_lock);
332
333	for (i = 0; i < alg->hash_servers; i++) {
334		if (alg->hash_tbl[i].server == host) {
335			if (!alg->hash_tbl[i].enabled) {
336				mutex_exit(&alg->hash_lock);
337				return (0);
338			} else {
339				break;
340			}
341		}
342	}
343	if (i == alg->hash_servers) {
344		mutex_exit(&alg->hash_lock);
345		return (EINVAL);
346	}
347
348	alg->hash_tbl[i].enabled = B_FALSE;
349#if DEBUG
350	ASSERT(del_server(alg->hash_enabled_tbl, alg->hash_enabled_servers,
351	    host));
352#else
353	(void) del_server(alg->hash_enabled_tbl, alg->hash_enabled_servers,
354	    host);
355#endif
356	alg->hash_enabled_servers--;
357
358	mutex_exit(&alg->hash_lock);
359	return (0);
360}
361
362/* ARGSUSED */
363ilb_alg_data_t *
364ilb_alg_hash_init(ilb_rule_t *rule, const void *arg)
365{
366	ilb_alg_data_t	*alg;
367	hash_t		*hash_alg;
368	int		flags = *(int *)arg;
369
370	if ((alg = kmem_alloc(sizeof (ilb_alg_data_t), KM_NOSLEEP)) == NULL)
371		return (NULL);
372	if ((hash_alg = kmem_alloc(sizeof (hash_t), KM_NOSLEEP)) == NULL) {
373		kmem_free(alg, sizeof (ilb_alg_data_t));
374		return (NULL);
375	}
376	alg->ilb_alg_lb = hash_lb;
377	alg->ilb_alg_server_del = hash_server_del;
378	alg->ilb_alg_server_add = hash_server_add;
379	alg->ilb_alg_server_enable = hash_server_enable;
380	alg->ilb_alg_server_disable = hash_server_disable;
381	alg->ilb_alg_fini = hash_fini;
382	alg->ilb_alg_data = hash_alg;
383
384	mutex_init(&hash_alg->hash_lock, NULL, MUTEX_DEFAULT, NULL);
385	hash_alg->hash_type = flags;
386
387	/* Table of all servers */
388	hash_alg->hash_servers = 0;
389	hash_alg->hash_tbl_size = INIT_HASH_TBL_SIZE;
390	hash_alg->hash_tbl = kmem_zalloc(sizeof (hash_server_t) *
391	    INIT_HASH_TBL_SIZE, KM_NOSLEEP);
392	if (hash_alg->hash_tbl == NULL) {
393		kmem_free(hash_alg, sizeof (hash_t));
394		kmem_free(alg, sizeof (ilb_alg_data_t));
395		return (NULL);
396	}
397
398	/* Table of only enabled servers */
399	hash_alg->hash_enabled_servers = 0;
400	hash_alg->hash_enabled_tbl_size = INIT_HASH_TBL_SIZE;
401	hash_alg->hash_enabled_tbl = kmem_zalloc(sizeof (hash_server_t) *
402	    INIT_HASH_TBL_SIZE, KM_NOSLEEP);
403	if (hash_alg->hash_tbl == NULL) {
404		kmem_free(hash_alg->hash_tbl, INIT_HASH_TBL_SIZE *
405		    sizeof (ilb_server_t *));
406		kmem_free(hash_alg, sizeof (hash_t));
407		kmem_free(alg, sizeof (ilb_alg_data_t));
408		return (NULL);
409	}
410
411	return (alg);
412}
413
414static void
415hash_fini(ilb_alg_data_t **alg)
416{
417	hash_t		*hash_alg;
418	int		i;
419
420	hash_alg = (*alg)->ilb_alg_data;
421	for (i = 0; i < hash_alg->hash_servers; i++)
422		ILB_SERVER_REFRELE(hash_alg->hash_tbl[i].server);
423
424	kmem_free(hash_alg->hash_tbl, sizeof (hash_server_t) *
425	    hash_alg->hash_tbl_size);
426	kmem_free(hash_alg->hash_enabled_tbl, sizeof (hash_server_t) *
427	    hash_alg->hash_enabled_tbl_size);
428	kmem_free(hash_alg, sizeof (hash_t));
429	kmem_free(*alg, sizeof (ilb_alg_data_t));
430	*alg = NULL;
431}
432