1/*
2 * xfrm_state.c
3 *
4 * Changes:
5 *	Mitsuru KANDA @USAGI
6 * 	Kazunori MIYAZAWA @USAGI
7 * 	Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8 * 		IPv6 support
9 * 	YOSHIFUJI Hideaki @USAGI
10 * 		Split up af-specific functions
11 *	Derek Atkins <derek@ihtfp.com>
12 *		Add UDP Encapsulation
13 *
14 */
15
16#include <linux/workqueue.h>
17#include <net/xfrm.h>
18#include <linux/pfkeyv2.h>
19#include <linux/ipsec.h>
20#include <linux/module.h>
21#include <linux/cache.h>
22#include <asm/uaccess.h>
23#include <linux/audit.h>
24#include <linux/cache.h>
25
26#include "xfrm_hash.h"
27
28struct sock *xfrm_nl;
29EXPORT_SYMBOL(xfrm_nl);
30
31u32 sysctl_xfrm_aevent_etime __read_mostly = XFRM_AE_ETIME;
32EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
33
34u32 sysctl_xfrm_aevent_rseqth __read_mostly = XFRM_AE_SEQT_SIZE;
35EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
36
37u32 sysctl_xfrm_acq_expires __read_mostly = 30;
38
39/* Each xfrm_state may be linked to two tables:
40
41   1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
42   2. Hash table by (daddr,family,reqid) to find what SAs exist for given
43      destination/tunnel endpoint. (output)
44 */
45
46static DEFINE_SPINLOCK(xfrm_state_lock);
47
48/* Hash table to find appropriate SA towards given target (endpoint
49 * of tunnel or destination of transport mode) allowed by selector.
50 *
51 * Main use is finding SA after policy selected tunnel or transport mode.
52 * Also, it can be used by ah/esp icmp error handler to find offending SA.
53 */
54static struct hlist_head *xfrm_state_bydst __read_mostly;
55static struct hlist_head *xfrm_state_bysrc __read_mostly;
56static struct hlist_head *xfrm_state_byspi __read_mostly;
57static unsigned int xfrm_state_hmask __read_mostly;
58static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
59static unsigned int xfrm_state_num;
60static unsigned int xfrm_state_genid;
61
62static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
63					 xfrm_address_t *saddr,
64					 u32 reqid,
65					 unsigned short family)
66{
67	return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
68}
69
70static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr,
71					 xfrm_address_t *saddr,
72					 unsigned short family)
73{
74	return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask);
75}
76
77static inline unsigned int
78xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
79{
80	return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask);
81}
82
83static void xfrm_hash_transfer(struct hlist_head *list,
84			       struct hlist_head *ndsttable,
85			       struct hlist_head *nsrctable,
86			       struct hlist_head *nspitable,
87			       unsigned int nhashmask)
88{
89	struct hlist_node *entry, *tmp;
90	struct xfrm_state *x;
91
92	hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
93		unsigned int h;
94
95		h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
96				    x->props.reqid, x->props.family,
97				    nhashmask);
98		hlist_add_head(&x->bydst, ndsttable+h);
99
100		h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
101				    x->props.family,
102				    nhashmask);
103		hlist_add_head(&x->bysrc, nsrctable+h);
104
105		if (x->id.spi) {
106			h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
107					    x->id.proto, x->props.family,
108					    nhashmask);
109			hlist_add_head(&x->byspi, nspitable+h);
110		}
111	}
112}
113
114static unsigned long xfrm_hash_new_size(void)
115{
116	return ((xfrm_state_hmask + 1) << 1) *
117		sizeof(struct hlist_head);
118}
119
120static DEFINE_MUTEX(hash_resize_mutex);
121
122static void xfrm_hash_resize(struct work_struct *__unused)
123{
124	struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
125	unsigned long nsize, osize;
126	unsigned int nhashmask, ohashmask;
127	int i;
128
129	mutex_lock(&hash_resize_mutex);
130
131	nsize = xfrm_hash_new_size();
132	ndst = xfrm_hash_alloc(nsize);
133	if (!ndst)
134		goto out_unlock;
135	nsrc = xfrm_hash_alloc(nsize);
136	if (!nsrc) {
137		xfrm_hash_free(ndst, nsize);
138		goto out_unlock;
139	}
140	nspi = xfrm_hash_alloc(nsize);
141	if (!nspi) {
142		xfrm_hash_free(ndst, nsize);
143		xfrm_hash_free(nsrc, nsize);
144		goto out_unlock;
145	}
146
147	spin_lock_bh(&xfrm_state_lock);
148
149	nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
150	for (i = xfrm_state_hmask; i >= 0; i--)
151		xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
152				   nhashmask);
153
154	odst = xfrm_state_bydst;
155	osrc = xfrm_state_bysrc;
156	ospi = xfrm_state_byspi;
157	ohashmask = xfrm_state_hmask;
158
159	xfrm_state_bydst = ndst;
160	xfrm_state_bysrc = nsrc;
161	xfrm_state_byspi = nspi;
162	xfrm_state_hmask = nhashmask;
163
164	spin_unlock_bh(&xfrm_state_lock);
165
166	osize = (ohashmask + 1) * sizeof(struct hlist_head);
167	xfrm_hash_free(odst, osize);
168	xfrm_hash_free(osrc, osize);
169	xfrm_hash_free(ospi, osize);
170
171out_unlock:
172	mutex_unlock(&hash_resize_mutex);
173}
174
175static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
176
177DECLARE_WAIT_QUEUE_HEAD(km_waitq);
178EXPORT_SYMBOL(km_waitq);
179
180static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
181static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
182
183static struct work_struct xfrm_state_gc_work;
184static HLIST_HEAD(xfrm_state_gc_list);
185static DEFINE_SPINLOCK(xfrm_state_gc_lock);
186
187int __xfrm_state_delete(struct xfrm_state *x);
188
189int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
190void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
191
192static void xfrm_state_gc_destroy(struct xfrm_state *x)
193{
194	del_timer_sync(&x->timer);
195	del_timer_sync(&x->rtimer);
196	kfree(x->aalg);
197	kfree(x->ealg);
198	kfree(x->calg);
199	kfree(x->encap);
200	kfree(x->coaddr);
201	if (x->mode)
202		xfrm_put_mode(x->mode);
203	if (x->type) {
204		x->type->destructor(x);
205		xfrm_put_type(x->type);
206	}
207	security_xfrm_state_free(x);
208	kfree(x);
209}
210
211static void xfrm_state_gc_task(struct work_struct *data)
212{
213	struct xfrm_state *x;
214	struct hlist_node *entry, *tmp;
215	struct hlist_head gc_list;
216
217	spin_lock_bh(&xfrm_state_gc_lock);
218	gc_list.first = xfrm_state_gc_list.first;
219	INIT_HLIST_HEAD(&xfrm_state_gc_list);
220	spin_unlock_bh(&xfrm_state_gc_lock);
221
222	hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
223		xfrm_state_gc_destroy(x);
224
225	wake_up(&km_waitq);
226}
227
228static inline unsigned long make_jiffies(long secs)
229{
230	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
231		return MAX_SCHEDULE_TIMEOUT-1;
232	else
233		return secs*HZ;
234}
235
236static void xfrm_timer_handler(unsigned long data)
237{
238	struct xfrm_state *x = (struct xfrm_state*)data;
239	unsigned long now = get_seconds();
240	long next = LONG_MAX;
241	int warn = 0;
242	int err = 0;
243
244	spin_lock(&x->lock);
245	if (x->km.state == XFRM_STATE_DEAD)
246		goto out;
247	if (x->km.state == XFRM_STATE_EXPIRED)
248		goto expired;
249	if (x->lft.hard_add_expires_seconds) {
250		long tmo = x->lft.hard_add_expires_seconds +
251			x->curlft.add_time - now;
252		if (tmo <= 0)
253			goto expired;
254		if (tmo < next)
255			next = tmo;
256	}
257	if (x->lft.hard_use_expires_seconds) {
258		long tmo = x->lft.hard_use_expires_seconds +
259			(x->curlft.use_time ? : now) - now;
260		if (tmo <= 0)
261			goto expired;
262		if (tmo < next)
263			next = tmo;
264	}
265	if (x->km.dying)
266		goto resched;
267	if (x->lft.soft_add_expires_seconds) {
268		long tmo = x->lft.soft_add_expires_seconds +
269			x->curlft.add_time - now;
270		if (tmo <= 0)
271			warn = 1;
272		else if (tmo < next)
273			next = tmo;
274	}
275	if (x->lft.soft_use_expires_seconds) {
276		long tmo = x->lft.soft_use_expires_seconds +
277			(x->curlft.use_time ? : now) - now;
278		if (tmo <= 0)
279			warn = 1;
280		else if (tmo < next)
281			next = tmo;
282	}
283
284	x->km.dying = warn;
285	if (warn)
286		km_state_expired(x, 0, 0);
287resched:
288	if (next != LONG_MAX)
289		mod_timer(&x->timer, jiffies + make_jiffies(next));
290
291	goto out;
292
293expired:
294	if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
295		x->km.state = XFRM_STATE_EXPIRED;
296		wake_up(&km_waitq);
297		next = 2;
298		goto resched;
299	}
300
301	err = __xfrm_state_delete(x);
302	if (!err && x->id.spi)
303		km_state_expired(x, 1, 0);
304
305	xfrm_audit_log(audit_get_loginuid(current->audit_context), 0,
306		       AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x);
307
308out:
309	spin_unlock(&x->lock);
310}
311
312static void xfrm_replay_timer_handler(unsigned long data);
313
314struct xfrm_state *xfrm_state_alloc(void)
315{
316	struct xfrm_state *x;
317
318	x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
319
320	if (x) {
321		atomic_set(&x->refcnt, 1);
322		atomic_set(&x->tunnel_users, 0);
323		INIT_HLIST_NODE(&x->bydst);
324		INIT_HLIST_NODE(&x->bysrc);
325		INIT_HLIST_NODE(&x->byspi);
326		init_timer(&x->timer);
327		x->timer.function = xfrm_timer_handler;
328		x->timer.data	  = (unsigned long)x;
329		init_timer(&x->rtimer);
330		x->rtimer.function = xfrm_replay_timer_handler;
331		x->rtimer.data     = (unsigned long)x;
332		x->curlft.add_time = get_seconds();
333		x->lft.soft_byte_limit = XFRM_INF;
334		x->lft.soft_packet_limit = XFRM_INF;
335		x->lft.hard_byte_limit = XFRM_INF;
336		x->lft.hard_packet_limit = XFRM_INF;
337		x->replay_maxage = 0;
338		x->replay_maxdiff = 0;
339		spin_lock_init(&x->lock);
340	}
341	return x;
342}
343EXPORT_SYMBOL(xfrm_state_alloc);
344
345void __xfrm_state_destroy(struct xfrm_state *x)
346{
347	BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
348
349	spin_lock_bh(&xfrm_state_gc_lock);
350	hlist_add_head(&x->bydst, &xfrm_state_gc_list);
351	spin_unlock_bh(&xfrm_state_gc_lock);
352	schedule_work(&xfrm_state_gc_work);
353}
354EXPORT_SYMBOL(__xfrm_state_destroy);
355
356int __xfrm_state_delete(struct xfrm_state *x)
357{
358	int err = -ESRCH;
359
360	if (x->km.state != XFRM_STATE_DEAD) {
361		x->km.state = XFRM_STATE_DEAD;
362		spin_lock(&xfrm_state_lock);
363		hlist_del(&x->bydst);
364		hlist_del(&x->bysrc);
365		if (x->id.spi)
366			hlist_del(&x->byspi);
367		xfrm_state_num--;
368		spin_unlock(&xfrm_state_lock);
369
370		/* All xfrm_state objects are created by xfrm_state_alloc.
371		 * The xfrm_state_alloc call gives a reference, and that
372		 * is what we are dropping here.
373		 */
374		__xfrm_state_put(x);
375		err = 0;
376	}
377
378	return err;
379}
380EXPORT_SYMBOL(__xfrm_state_delete);
381
382int xfrm_state_delete(struct xfrm_state *x)
383{
384	int err;
385
386	spin_lock_bh(&x->lock);
387	err = __xfrm_state_delete(x);
388	spin_unlock_bh(&x->lock);
389
390	return err;
391}
392EXPORT_SYMBOL(xfrm_state_delete);
393
394#ifdef CONFIG_SECURITY_NETWORK_XFRM
395static inline int
396xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info)
397{
398	int i, err = 0;
399
400	for (i = 0; i <= xfrm_state_hmask; i++) {
401		struct hlist_node *entry;
402		struct xfrm_state *x;
403
404		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
405			if (xfrm_id_proto_match(x->id.proto, proto) &&
406			   (err = security_xfrm_state_delete(x)) != 0) {
407				xfrm_audit_log(audit_info->loginuid,
408					       audit_info->secid,
409					       AUDIT_MAC_IPSEC_DELSA,
410                                               0, NULL, x);
411
412				return err;
413			}
414		}
415	}
416
417	return err;
418}
419#else
420static inline int
421xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info)
422{
423	return 0;
424}
425#endif
426
427int xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info)
428{
429	int i, err = 0;
430
431	spin_lock_bh(&xfrm_state_lock);
432	err = xfrm_state_flush_secctx_check(proto, audit_info);
433	if (err)
434		goto out;
435
436	for (i = 0; i <= xfrm_state_hmask; i++) {
437		struct hlist_node *entry;
438		struct xfrm_state *x;
439restart:
440		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
441			if (!xfrm_state_kern(x) &&
442			    xfrm_id_proto_match(x->id.proto, proto)) {
443				xfrm_state_hold(x);
444				spin_unlock_bh(&xfrm_state_lock);
445
446				err = xfrm_state_delete(x);
447				xfrm_audit_log(audit_info->loginuid,
448					       audit_info->secid,
449					       AUDIT_MAC_IPSEC_DELSA,
450					       err ? 0 : 1, NULL, x);
451				xfrm_state_put(x);
452
453				spin_lock_bh(&xfrm_state_lock);
454				goto restart;
455			}
456		}
457	}
458	err = 0;
459
460out:
461	spin_unlock_bh(&xfrm_state_lock);
462	wake_up(&km_waitq);
463	return err;
464}
465EXPORT_SYMBOL(xfrm_state_flush);
466
467void xfrm_sad_getinfo(struct xfrmk_sadinfo *si)
468{
469	spin_lock_bh(&xfrm_state_lock);
470	si->sadcnt = xfrm_state_num;
471	si->sadhcnt = xfrm_state_hmask;
472	si->sadhmcnt = xfrm_state_hashmax;
473	spin_unlock_bh(&xfrm_state_lock);
474}
475EXPORT_SYMBOL(xfrm_sad_getinfo);
476
477static int
478xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
479		  struct xfrm_tmpl *tmpl,
480		  xfrm_address_t *daddr, xfrm_address_t *saddr,
481		  unsigned short family)
482{
483	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
484	if (!afinfo)
485		return -1;
486	afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
487	xfrm_state_put_afinfo(afinfo);
488	return 0;
489}
490
491static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
492{
493	unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
494	struct xfrm_state *x;
495	struct hlist_node *entry;
496
497	hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
498		if (x->props.family != family ||
499		    x->id.spi       != spi ||
500		    x->id.proto     != proto)
501			continue;
502
503		switch (family) {
504		case AF_INET:
505			if (x->id.daddr.a4 != daddr->a4)
506				continue;
507			break;
508		case AF_INET6:
509			if (!ipv6_addr_equal((struct in6_addr *)daddr,
510					     (struct in6_addr *)
511					     x->id.daddr.a6))
512				continue;
513			break;
514		}
515
516		xfrm_state_hold(x);
517		return x;
518	}
519
520	return NULL;
521}
522
523static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
524{
525	unsigned int h = xfrm_src_hash(daddr, saddr, family);
526	struct xfrm_state *x;
527	struct hlist_node *entry;
528
529	hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
530		if (x->props.family != family ||
531		    x->id.proto     != proto)
532			continue;
533
534		switch (family) {
535		case AF_INET:
536			if (x->id.daddr.a4 != daddr->a4 ||
537			    x->props.saddr.a4 != saddr->a4)
538				continue;
539			break;
540		case AF_INET6:
541			if (!ipv6_addr_equal((struct in6_addr *)daddr,
542					     (struct in6_addr *)
543					     x->id.daddr.a6) ||
544			    !ipv6_addr_equal((struct in6_addr *)saddr,
545					     (struct in6_addr *)
546					     x->props.saddr.a6))
547				continue;
548			break;
549		}
550
551		xfrm_state_hold(x);
552		return x;
553	}
554
555	return NULL;
556}
557
558static inline struct xfrm_state *
559__xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
560{
561	if (use_spi)
562		return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
563					   x->id.proto, family);
564	else
565		return __xfrm_state_lookup_byaddr(&x->id.daddr,
566						  &x->props.saddr,
567						  x->id.proto, family);
568}
569
570static void xfrm_hash_grow_check(int have_hash_collision)
571{
572	if (have_hash_collision &&
573	    (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
574	    xfrm_state_num > xfrm_state_hmask)
575		schedule_work(&xfrm_hash_work);
576}
577
578struct xfrm_state *
579xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
580		struct flowi *fl, struct xfrm_tmpl *tmpl,
581		struct xfrm_policy *pol, int *err,
582		unsigned short family)
583{
584	unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
585	struct hlist_node *entry;
586	struct xfrm_state *x, *x0;
587	int acquire_in_progress = 0;
588	int error = 0;
589	struct xfrm_state *best = NULL;
590
591	spin_lock_bh(&xfrm_state_lock);
592	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
593		if (x->props.family == family &&
594		    x->props.reqid == tmpl->reqid &&
595		    !(x->props.flags & XFRM_STATE_WILDRECV) &&
596		    xfrm_state_addr_check(x, daddr, saddr, family) &&
597		    tmpl->mode == x->props.mode &&
598		    tmpl->id.proto == x->id.proto &&
599		    (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
600			/* Resolution logic:
601			   1. There is a valid state with matching selector.
602			      Done.
603			   2. Valid state with inappropriate selector. Skip.
604
605			   Entering area of "sysdeps".
606
607			   3. If state is not valid, selector is temporary,
608			      it selects only session which triggered
609			      previous resolution. Key manager will do
610			      something to install a state with proper
611			      selector.
612			 */
613			if (x->km.state == XFRM_STATE_VALID) {
614				if (!xfrm_selector_match(&x->sel, fl, family) ||
615				    !security_xfrm_state_pol_flow_match(x, pol, fl))
616					continue;
617				if (!best ||
618				    best->km.dying > x->km.dying ||
619				    (best->km.dying == x->km.dying &&
620				     best->curlft.add_time < x->curlft.add_time))
621					best = x;
622			} else if (x->km.state == XFRM_STATE_ACQ) {
623				acquire_in_progress = 1;
624			} else if (x->km.state == XFRM_STATE_ERROR ||
625				   x->km.state == XFRM_STATE_EXPIRED) {
626				if (xfrm_selector_match(&x->sel, fl, family) &&
627				    security_xfrm_state_pol_flow_match(x, pol, fl))
628					error = -ESRCH;
629			}
630		}
631	}
632
633	x = best;
634	if (!x && !error && !acquire_in_progress) {
635		if (tmpl->id.spi &&
636		    (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
637					      tmpl->id.proto, family)) != NULL) {
638			xfrm_state_put(x0);
639			error = -EEXIST;
640			goto out;
641		}
642		x = xfrm_state_alloc();
643		if (x == NULL) {
644			error = -ENOMEM;
645			goto out;
646		}
647		/* Initialize temporary selector matching only
648		 * to current session. */
649		xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
650
651		error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
652		if (error) {
653			x->km.state = XFRM_STATE_DEAD;
654			xfrm_state_put(x);
655			x = NULL;
656			goto out;
657		}
658
659		if (km_query(x, tmpl, pol) == 0) {
660			x->km.state = XFRM_STATE_ACQ;
661			hlist_add_head(&x->bydst, xfrm_state_bydst+h);
662			h = xfrm_src_hash(daddr, saddr, family);
663			hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
664			if (x->id.spi) {
665				h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
666				hlist_add_head(&x->byspi, xfrm_state_byspi+h);
667			}
668			x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires;
669			x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
670			add_timer(&x->timer);
671			xfrm_state_num++;
672			xfrm_hash_grow_check(x->bydst.next != NULL);
673		} else {
674			x->km.state = XFRM_STATE_DEAD;
675			xfrm_state_put(x);
676			x = NULL;
677			error = -ESRCH;
678		}
679	}
680out:
681	if (x)
682		xfrm_state_hold(x);
683	else
684		*err = acquire_in_progress ? -EAGAIN : error;
685	spin_unlock_bh(&xfrm_state_lock);
686	return x;
687}
688
689static void __xfrm_state_insert(struct xfrm_state *x)
690{
691	unsigned int h;
692
693	x->genid = ++xfrm_state_genid;
694
695	h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
696			  x->props.reqid, x->props.family);
697	hlist_add_head(&x->bydst, xfrm_state_bydst+h);
698
699	h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family);
700	hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
701
702	if (x->id.spi) {
703		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
704				  x->props.family);
705
706		hlist_add_head(&x->byspi, xfrm_state_byspi+h);
707	}
708
709	mod_timer(&x->timer, jiffies + HZ);
710	if (x->replay_maxage)
711		mod_timer(&x->rtimer, jiffies + x->replay_maxage);
712
713	wake_up(&km_waitq);
714
715	xfrm_state_num++;
716
717	xfrm_hash_grow_check(x->bydst.next != NULL);
718}
719
720/* xfrm_state_lock is held */
721static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
722{
723	unsigned short family = xnew->props.family;
724	u32 reqid = xnew->props.reqid;
725	struct xfrm_state *x;
726	struct hlist_node *entry;
727	unsigned int h;
728
729	h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family);
730	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
731		if (x->props.family	== family &&
732		    x->props.reqid	== reqid &&
733		    !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
734		    !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
735			x->genid = xfrm_state_genid;
736	}
737}
738
739void xfrm_state_insert(struct xfrm_state *x)
740{
741	spin_lock_bh(&xfrm_state_lock);
742	__xfrm_state_bump_genids(x);
743	__xfrm_state_insert(x);
744	spin_unlock_bh(&xfrm_state_lock);
745}
746EXPORT_SYMBOL(xfrm_state_insert);
747
748/* xfrm_state_lock is held */
749static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
750{
751	unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
752	struct hlist_node *entry;
753	struct xfrm_state *x;
754
755	hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
756		if (x->props.reqid  != reqid ||
757		    x->props.mode   != mode ||
758		    x->props.family != family ||
759		    x->km.state     != XFRM_STATE_ACQ ||
760		    x->id.spi       != 0 ||
761		    x->id.proto	    != proto)
762			continue;
763
764		switch (family) {
765		case AF_INET:
766			if (x->id.daddr.a4    != daddr->a4 ||
767			    x->props.saddr.a4 != saddr->a4)
768				continue;
769			break;
770		case AF_INET6:
771			if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
772					     (struct in6_addr *)daddr) ||
773			    !ipv6_addr_equal((struct in6_addr *)
774					     x->props.saddr.a6,
775					     (struct in6_addr *)saddr))
776				continue;
777			break;
778		}
779
780		xfrm_state_hold(x);
781		return x;
782	}
783
784	if (!create)
785		return NULL;
786
787	x = xfrm_state_alloc();
788	if (likely(x)) {
789		switch (family) {
790		case AF_INET:
791			x->sel.daddr.a4 = daddr->a4;
792			x->sel.saddr.a4 = saddr->a4;
793			x->sel.prefixlen_d = 32;
794			x->sel.prefixlen_s = 32;
795			x->props.saddr.a4 = saddr->a4;
796			x->id.daddr.a4 = daddr->a4;
797			break;
798
799		case AF_INET6:
800			ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
801				       (struct in6_addr *)daddr);
802			ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
803				       (struct in6_addr *)saddr);
804			x->sel.prefixlen_d = 128;
805			x->sel.prefixlen_s = 128;
806			ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
807				       (struct in6_addr *)saddr);
808			ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
809				       (struct in6_addr *)daddr);
810			break;
811		}
812
813		x->km.state = XFRM_STATE_ACQ;
814		x->id.proto = proto;
815		x->props.family = family;
816		x->props.mode = mode;
817		x->props.reqid = reqid;
818		x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires;
819		xfrm_state_hold(x);
820		x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
821		add_timer(&x->timer);
822		hlist_add_head(&x->bydst, xfrm_state_bydst+h);
823		h = xfrm_src_hash(daddr, saddr, family);
824		hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
825		wake_up(&km_waitq);
826
827		xfrm_state_num++;
828
829		xfrm_hash_grow_check(x->bydst.next != NULL);
830	}
831
832	return x;
833}
834
835static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
836
837int xfrm_state_add(struct xfrm_state *x)
838{
839	struct xfrm_state *x1;
840	int family;
841	int err;
842	int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
843
844	family = x->props.family;
845
846	spin_lock_bh(&xfrm_state_lock);
847
848	x1 = __xfrm_state_locate(x, use_spi, family);
849	if (x1) {
850		xfrm_state_put(x1);
851		x1 = NULL;
852		err = -EEXIST;
853		goto out;
854	}
855
856	if (use_spi && x->km.seq) {
857		x1 = __xfrm_find_acq_byseq(x->km.seq);
858		if (x1 && ((x1->id.proto != x->id.proto) ||
859		    xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family))) {
860			xfrm_state_put(x1);
861			x1 = NULL;
862		}
863	}
864
865	if (use_spi && !x1)
866		x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
867				     x->id.proto,
868				     &x->id.daddr, &x->props.saddr, 0);
869
870	__xfrm_state_bump_genids(x);
871	__xfrm_state_insert(x);
872	err = 0;
873
874out:
875	spin_unlock_bh(&xfrm_state_lock);
876
877	if (x1) {
878		xfrm_state_delete(x1);
879		xfrm_state_put(x1);
880	}
881
882	return err;
883}
884EXPORT_SYMBOL(xfrm_state_add);
885
886#ifdef CONFIG_XFRM_MIGRATE
887struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
888{
889	int err = -ENOMEM;
890	struct xfrm_state *x = xfrm_state_alloc();
891	if (!x)
892		goto error;
893
894	memcpy(&x->id, &orig->id, sizeof(x->id));
895	memcpy(&x->sel, &orig->sel, sizeof(x->sel));
896	memcpy(&x->lft, &orig->lft, sizeof(x->lft));
897	x->props.mode = orig->props.mode;
898	x->props.replay_window = orig->props.replay_window;
899	x->props.reqid = orig->props.reqid;
900	x->props.family = orig->props.family;
901	x->props.saddr = orig->props.saddr;
902
903	if (orig->aalg) {
904		x->aalg = xfrm_algo_clone(orig->aalg);
905		if (!x->aalg)
906			goto error;
907	}
908	x->props.aalgo = orig->props.aalgo;
909
910	if (orig->ealg) {
911		x->ealg = xfrm_algo_clone(orig->ealg);
912		if (!x->ealg)
913			goto error;
914	}
915	x->props.ealgo = orig->props.ealgo;
916
917	if (orig->calg) {
918		x->calg = xfrm_algo_clone(orig->calg);
919		if (!x->calg)
920			goto error;
921	}
922	x->props.calgo = orig->props.calgo;
923
924	if (orig->encap) {
925		x->encap = kmemdup(orig->encap, sizeof(*x->encap), GFP_KERNEL);
926		if (!x->encap)
927			goto error;
928	}
929
930	if (orig->coaddr) {
931		x->coaddr = kmemdup(orig->coaddr, sizeof(*x->coaddr),
932				    GFP_KERNEL);
933		if (!x->coaddr)
934			goto error;
935	}
936
937	err = xfrm_init_state(x);
938	if (err)
939		goto error;
940
941	x->props.flags = orig->props.flags;
942
943	x->curlft.add_time = orig->curlft.add_time;
944	x->km.state = orig->km.state;
945	x->km.seq = orig->km.seq;
946
947	return x;
948
949 error:
950	if (errp)
951		*errp = err;
952	if (x) {
953		kfree(x->aalg);
954		kfree(x->ealg);
955		kfree(x->calg);
956		kfree(x->encap);
957		kfree(x->coaddr);
958	}
959	kfree(x);
960	return NULL;
961}
962EXPORT_SYMBOL(xfrm_state_clone);
963
964/* xfrm_state_lock is held */
965struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m)
966{
967	unsigned int h;
968	struct xfrm_state *x;
969	struct hlist_node *entry;
970
971	if (m->reqid) {
972		h = xfrm_dst_hash(&m->old_daddr, &m->old_saddr,
973				  m->reqid, m->old_family);
974		hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
975			if (x->props.mode != m->mode ||
976			    x->id.proto != m->proto)
977				continue;
978			if (m->reqid && x->props.reqid != m->reqid)
979				continue;
980			if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr,
981					  m->old_family) ||
982			    xfrm_addr_cmp(&x->props.saddr, &m->old_saddr,
983					  m->old_family))
984				continue;
985			xfrm_state_hold(x);
986			return x;
987		}
988	} else {
989		h = xfrm_src_hash(&m->old_daddr, &m->old_saddr,
990				  m->old_family);
991		hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
992			if (x->props.mode != m->mode ||
993			    x->id.proto != m->proto)
994				continue;
995			if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr,
996					  m->old_family) ||
997			    xfrm_addr_cmp(&x->props.saddr, &m->old_saddr,
998					  m->old_family))
999				continue;
1000			xfrm_state_hold(x);
1001			return x;
1002		}
1003	}
1004
1005	return NULL;
1006}
1007EXPORT_SYMBOL(xfrm_migrate_state_find);
1008
1009struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x,
1010				       struct xfrm_migrate *m)
1011{
1012	struct xfrm_state *xc;
1013	int err;
1014
1015	xc = xfrm_state_clone(x, &err);
1016	if (!xc)
1017		return NULL;
1018
1019	memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr));
1020	memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));
1021
1022	/* add state */
1023	if (!xfrm_addr_cmp(&x->id.daddr, &m->new_daddr, m->new_family)) {
1024		/* a care is needed when the destination address of the
1025		   state is to be updated as it is a part of triplet */
1026		xfrm_state_insert(xc);
1027	} else {
1028		if ((err = xfrm_state_add(xc)) < 0)
1029			goto error;
1030	}
1031
1032	return xc;
1033error:
1034	kfree(xc);
1035	return NULL;
1036}
1037EXPORT_SYMBOL(xfrm_state_migrate);
1038#endif
1039
1040int xfrm_state_update(struct xfrm_state *x)
1041{
1042	struct xfrm_state *x1;
1043	int err;
1044	int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
1045
1046	spin_lock_bh(&xfrm_state_lock);
1047	x1 = __xfrm_state_locate(x, use_spi, x->props.family);
1048
1049	err = -ESRCH;
1050	if (!x1)
1051		goto out;
1052
1053	if (xfrm_state_kern(x1)) {
1054		xfrm_state_put(x1);
1055		err = -EEXIST;
1056		goto out;
1057	}
1058
1059	if (x1->km.state == XFRM_STATE_ACQ) {
1060		__xfrm_state_insert(x);
1061		x = NULL;
1062	}
1063	err = 0;
1064
1065out:
1066	spin_unlock_bh(&xfrm_state_lock);
1067
1068	if (err)
1069		return err;
1070
1071	if (!x) {
1072		xfrm_state_delete(x1);
1073		xfrm_state_put(x1);
1074		return 0;
1075	}
1076
1077	err = -EINVAL;
1078	spin_lock_bh(&x1->lock);
1079	if (likely(x1->km.state == XFRM_STATE_VALID)) {
1080		if (x->encap && x1->encap)
1081			memcpy(x1->encap, x->encap, sizeof(*x1->encap));
1082		if (x->coaddr && x1->coaddr) {
1083			memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
1084		}
1085		if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
1086			memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
1087		memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
1088		x1->km.dying = 0;
1089
1090		mod_timer(&x1->timer, jiffies + HZ);
1091		if (x1->curlft.use_time)
1092			xfrm_state_check_expire(x1);
1093
1094		err = 0;
1095	}
1096	spin_unlock_bh(&x1->lock);
1097
1098	xfrm_state_put(x1);
1099
1100	return err;
1101}
1102EXPORT_SYMBOL(xfrm_state_update);
1103
1104int xfrm_state_check_expire(struct xfrm_state *x)
1105{
1106	if (!x->curlft.use_time)
1107		x->curlft.use_time = get_seconds();
1108
1109	if (x->km.state != XFRM_STATE_VALID)
1110		return -EINVAL;
1111
1112	if (x->curlft.bytes >= x->lft.hard_byte_limit ||
1113	    x->curlft.packets >= x->lft.hard_packet_limit) {
1114		x->km.state = XFRM_STATE_EXPIRED;
1115		mod_timer(&x->timer, jiffies);
1116		return -EINVAL;
1117	}
1118
1119	if (!x->km.dying &&
1120	    (x->curlft.bytes >= x->lft.soft_byte_limit ||
1121	     x->curlft.packets >= x->lft.soft_packet_limit)) {
1122		x->km.dying = 1;
1123		km_state_expired(x, 0, 0);
1124	}
1125	return 0;
1126}
1127EXPORT_SYMBOL(xfrm_state_check_expire);
1128
1129static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
1130{
1131	int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
1132		- skb_headroom(skb);
1133
1134	if (nhead > 0)
1135		return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
1136
1137	/* Check tail too... */
1138	return 0;
1139}
1140
1141int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
1142{
1143	int err = xfrm_state_check_expire(x);
1144	if (err < 0)
1145		goto err;
1146	err = xfrm_state_check_space(x, skb);
1147err:
1148	return err;
1149}
1150EXPORT_SYMBOL(xfrm_state_check);
1151
1152struct xfrm_state *
1153xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto,
1154		  unsigned short family)
1155{
1156	struct xfrm_state *x;
1157
1158	spin_lock_bh(&xfrm_state_lock);
1159	x = __xfrm_state_lookup(daddr, spi, proto, family);
1160	spin_unlock_bh(&xfrm_state_lock);
1161	return x;
1162}
1163EXPORT_SYMBOL(xfrm_state_lookup);
1164
1165struct xfrm_state *
1166xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
1167			 u8 proto, unsigned short family)
1168{
1169	struct xfrm_state *x;
1170
1171	spin_lock_bh(&xfrm_state_lock);
1172	x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
1173	spin_unlock_bh(&xfrm_state_lock);
1174	return x;
1175}
1176EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1177
1178struct xfrm_state *
1179xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
1180	      xfrm_address_t *daddr, xfrm_address_t *saddr,
1181	      int create, unsigned short family)
1182{
1183	struct xfrm_state *x;
1184
1185	spin_lock_bh(&xfrm_state_lock);
1186	x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
1187	spin_unlock_bh(&xfrm_state_lock);
1188
1189	return x;
1190}
1191EXPORT_SYMBOL(xfrm_find_acq);
1192
1193#ifdef CONFIG_XFRM_SUB_POLICY
1194int
1195xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1196	       unsigned short family)
1197{
1198	int err = 0;
1199	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1200	if (!afinfo)
1201		return -EAFNOSUPPORT;
1202
1203	spin_lock_bh(&xfrm_state_lock);
1204	if (afinfo->tmpl_sort)
1205		err = afinfo->tmpl_sort(dst, src, n);
1206	spin_unlock_bh(&xfrm_state_lock);
1207	xfrm_state_put_afinfo(afinfo);
1208	return err;
1209}
1210EXPORT_SYMBOL(xfrm_tmpl_sort);
1211
1212int
1213xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1214		unsigned short family)
1215{
1216	int err = 0;
1217	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1218	if (!afinfo)
1219		return -EAFNOSUPPORT;
1220
1221	spin_lock_bh(&xfrm_state_lock);
1222	if (afinfo->state_sort)
1223		err = afinfo->state_sort(dst, src, n);
1224	spin_unlock_bh(&xfrm_state_lock);
1225	xfrm_state_put_afinfo(afinfo);
1226	return err;
1227}
1228EXPORT_SYMBOL(xfrm_state_sort);
1229#endif
1230
1231/* Silly enough, but I'm lazy to build resolution list */
1232
1233static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1234{
1235	int i;
1236
1237	for (i = 0; i <= xfrm_state_hmask; i++) {
1238		struct hlist_node *entry;
1239		struct xfrm_state *x;
1240
1241		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1242			if (x->km.seq == seq &&
1243			    x->km.state == XFRM_STATE_ACQ) {
1244				xfrm_state_hold(x);
1245				return x;
1246			}
1247		}
1248	}
1249	return NULL;
1250}
1251
1252struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1253{
1254	struct xfrm_state *x;
1255
1256	spin_lock_bh(&xfrm_state_lock);
1257	x = __xfrm_find_acq_byseq(seq);
1258	spin_unlock_bh(&xfrm_state_lock);
1259	return x;
1260}
1261EXPORT_SYMBOL(xfrm_find_acq_byseq);
1262
1263u32 xfrm_get_acqseq(void)
1264{
1265	u32 res;
1266	static u32 acqseq;
1267	static DEFINE_SPINLOCK(acqseq_lock);
1268
1269	spin_lock_bh(&acqseq_lock);
1270	res = (++acqseq ? : ++acqseq);
1271	spin_unlock_bh(&acqseq_lock);
1272	return res;
1273}
1274EXPORT_SYMBOL(xfrm_get_acqseq);
1275
1276void
1277xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi)
1278{
1279	unsigned int h;
1280	struct xfrm_state *x0;
1281
1282	if (x->id.spi)
1283		return;
1284
1285	if (minspi == maxspi) {
1286		x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1287		if (x0) {
1288			xfrm_state_put(x0);
1289			return;
1290		}
1291		x->id.spi = minspi;
1292	} else {
1293		u32 spi = 0;
1294		u32 low = ntohl(minspi);
1295		u32 high = ntohl(maxspi);
1296		for (h=0; h<high-low+1; h++) {
1297			spi = low + net_random()%(high-low+1);
1298			x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1299			if (x0 == NULL) {
1300				x->id.spi = htonl(spi);
1301				break;
1302			}
1303			xfrm_state_put(x0);
1304		}
1305	}
1306	if (x->id.spi) {
1307		spin_lock_bh(&xfrm_state_lock);
1308		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1309		hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1310		spin_unlock_bh(&xfrm_state_lock);
1311		wake_up(&km_waitq);
1312	}
1313}
1314EXPORT_SYMBOL(xfrm_alloc_spi);
1315
1316int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1317		    void *data)
1318{
1319	int i;
1320	struct xfrm_state *x, *last = NULL;
1321	struct hlist_node *entry;
1322	int count = 0;
1323	int err = 0;
1324
1325	spin_lock_bh(&xfrm_state_lock);
1326	for (i = 0; i <= xfrm_state_hmask; i++) {
1327		hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1328			if (!xfrm_id_proto_match(x->id.proto, proto))
1329				continue;
1330			if (last) {
1331				err = func(last, count, data);
1332				if (err)
1333					goto out;
1334			}
1335			last = x;
1336			count++;
1337		}
1338	}
1339	if (count == 0) {
1340		err = -ENOENT;
1341		goto out;
1342	}
1343	err = func(last, 0, data);
1344out:
1345	spin_unlock_bh(&xfrm_state_lock);
1346	return err;
1347}
1348EXPORT_SYMBOL(xfrm_state_walk);
1349
1350
1351void xfrm_replay_notify(struct xfrm_state *x, int event)
1352{
1353	struct km_event c;
1354	/* we send notify messages in case
1355	 *  1. we updated on of the sequence numbers, and the seqno difference
1356	 *     is at least x->replay_maxdiff, in this case we also update the
1357	 *     timeout of our timer function
1358	 *  2. if x->replay_maxage has elapsed since last update,
1359	 *     and there were changes
1360	 *
1361	 *  The state structure must be locked!
1362	 */
1363
1364	switch (event) {
1365	case XFRM_REPLAY_UPDATE:
1366		if (x->replay_maxdiff &&
1367		    (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1368		    (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1369			if (x->xflags & XFRM_TIME_DEFER)
1370				event = XFRM_REPLAY_TIMEOUT;
1371			else
1372				return;
1373		}
1374
1375		break;
1376
1377	case XFRM_REPLAY_TIMEOUT:
1378		if ((x->replay.seq == x->preplay.seq) &&
1379		    (x->replay.bitmap == x->preplay.bitmap) &&
1380		    (x->replay.oseq == x->preplay.oseq)) {
1381			x->xflags |= XFRM_TIME_DEFER;
1382			return;
1383		}
1384
1385		break;
1386	}
1387
1388	memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1389	c.event = XFRM_MSG_NEWAE;
1390	c.data.aevent = event;
1391	km_state_notify(x, &c);
1392
1393	if (x->replay_maxage &&
1394	    !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
1395		x->xflags &= ~XFRM_TIME_DEFER;
1396}
1397EXPORT_SYMBOL(xfrm_replay_notify);
1398
1399static void xfrm_replay_timer_handler(unsigned long data)
1400{
1401	struct xfrm_state *x = (struct xfrm_state*)data;
1402
1403	spin_lock(&x->lock);
1404
1405	if (x->km.state == XFRM_STATE_VALID) {
1406		if (xfrm_aevent_is_on())
1407			xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1408		else
1409			x->xflags |= XFRM_TIME_DEFER;
1410	}
1411
1412	spin_unlock(&x->lock);
1413}
1414
1415int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq)
1416{
1417	u32 diff;
1418	u32 seq = ntohl(net_seq);
1419
1420	if (unlikely(seq == 0))
1421		return -EINVAL;
1422
1423	if (likely(seq > x->replay.seq))
1424		return 0;
1425
1426	diff = x->replay.seq - seq;
1427	if (diff >= min_t(unsigned int, x->props.replay_window,
1428			  sizeof(x->replay.bitmap) * 8)) {
1429		x->stats.replay_window++;
1430		return -EINVAL;
1431	}
1432
1433	if (x->replay.bitmap & (1U << diff)) {
1434		x->stats.replay++;
1435		return -EINVAL;
1436	}
1437	return 0;
1438}
1439EXPORT_SYMBOL(xfrm_replay_check);
1440
1441void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
1442{
1443	u32 diff;
1444	u32 seq = ntohl(net_seq);
1445
1446	if (seq > x->replay.seq) {
1447		diff = seq - x->replay.seq;
1448		if (diff < x->props.replay_window)
1449			x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1450		else
1451			x->replay.bitmap = 1;
1452		x->replay.seq = seq;
1453	} else {
1454		diff = x->replay.seq - seq;
1455		x->replay.bitmap |= (1U << diff);
1456	}
1457
1458	if (xfrm_aevent_is_on())
1459		xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1460}
1461EXPORT_SYMBOL(xfrm_replay_advance);
1462
1463static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1464static DEFINE_RWLOCK(xfrm_km_lock);
1465
1466void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1467{
1468	struct xfrm_mgr *km;
1469
1470	read_lock(&xfrm_km_lock);
1471	list_for_each_entry(km, &xfrm_km_list, list)
1472		if (km->notify_policy)
1473			km->notify_policy(xp, dir, c);
1474	read_unlock(&xfrm_km_lock);
1475}
1476
1477void km_state_notify(struct xfrm_state *x, struct km_event *c)
1478{
1479	struct xfrm_mgr *km;
1480	read_lock(&xfrm_km_lock);
1481	list_for_each_entry(km, &xfrm_km_list, list)
1482		if (km->notify)
1483			km->notify(x, c);
1484	read_unlock(&xfrm_km_lock);
1485}
1486
1487EXPORT_SYMBOL(km_policy_notify);
1488EXPORT_SYMBOL(km_state_notify);
1489
1490void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1491{
1492	struct km_event c;
1493
1494	c.data.hard = hard;
1495	c.pid = pid;
1496	c.event = XFRM_MSG_EXPIRE;
1497	km_state_notify(x, &c);
1498
1499	if (hard)
1500		wake_up(&km_waitq);
1501}
1502
1503EXPORT_SYMBOL(km_state_expired);
1504/*
1505 * We send to all registered managers regardless of failure
1506 * We are happy with one success
1507*/
1508int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1509{
1510	int err = -EINVAL, acqret;
1511	struct xfrm_mgr *km;
1512
1513	read_lock(&xfrm_km_lock);
1514	list_for_each_entry(km, &xfrm_km_list, list) {
1515		acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1516		if (!acqret)
1517			err = acqret;
1518	}
1519	read_unlock(&xfrm_km_lock);
1520	return err;
1521}
1522EXPORT_SYMBOL(km_query);
1523
1524int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
1525{
1526	int err = -EINVAL;
1527	struct xfrm_mgr *km;
1528
1529	read_lock(&xfrm_km_lock);
1530	list_for_each_entry(km, &xfrm_km_list, list) {
1531		if (km->new_mapping)
1532			err = km->new_mapping(x, ipaddr, sport);
1533		if (!err)
1534			break;
1535	}
1536	read_unlock(&xfrm_km_lock);
1537	return err;
1538}
1539EXPORT_SYMBOL(km_new_mapping);
1540
1541void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1542{
1543	struct km_event c;
1544
1545	c.data.hard = hard;
1546	c.pid = pid;
1547	c.event = XFRM_MSG_POLEXPIRE;
1548	km_policy_notify(pol, dir, &c);
1549
1550	if (hard)
1551		wake_up(&km_waitq);
1552}
1553EXPORT_SYMBOL(km_policy_expired);
1554
1555int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
1556	       struct xfrm_migrate *m, int num_migrate)
1557{
1558	int err = -EINVAL;
1559	int ret;
1560	struct xfrm_mgr *km;
1561
1562	read_lock(&xfrm_km_lock);
1563	list_for_each_entry(km, &xfrm_km_list, list) {
1564		if (km->migrate) {
1565			ret = km->migrate(sel, dir, type, m, num_migrate);
1566			if (!ret)
1567				err = ret;
1568		}
1569	}
1570	read_unlock(&xfrm_km_lock);
1571	return err;
1572}
1573EXPORT_SYMBOL(km_migrate);
1574
1575int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1576{
1577	int err = -EINVAL;
1578	int ret;
1579	struct xfrm_mgr *km;
1580
1581	read_lock(&xfrm_km_lock);
1582	list_for_each_entry(km, &xfrm_km_list, list) {
1583		if (km->report) {
1584			ret = km->report(proto, sel, addr);
1585			if (!ret)
1586				err = ret;
1587		}
1588	}
1589	read_unlock(&xfrm_km_lock);
1590	return err;
1591}
1592EXPORT_SYMBOL(km_report);
1593
1594int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1595{
1596	int err;
1597	u8 *data;
1598	struct xfrm_mgr *km;
1599	struct xfrm_policy *pol = NULL;
1600
1601	if (optlen <= 0 || optlen > PAGE_SIZE)
1602		return -EMSGSIZE;
1603
1604	data = kmalloc(optlen, GFP_KERNEL);
1605	if (!data)
1606		return -ENOMEM;
1607
1608	err = -EFAULT;
1609	if (copy_from_user(data, optval, optlen))
1610		goto out;
1611
1612	err = -EINVAL;
1613	read_lock(&xfrm_km_lock);
1614	list_for_each_entry(km, &xfrm_km_list, list) {
1615		pol = km->compile_policy(sk, optname, data,
1616					 optlen, &err);
1617		if (err >= 0)
1618			break;
1619	}
1620	read_unlock(&xfrm_km_lock);
1621
1622	if (err >= 0) {
1623		xfrm_sk_policy_insert(sk, err, pol);
1624		xfrm_pol_put(pol);
1625		err = 0;
1626	}
1627
1628out:
1629	kfree(data);
1630	return err;
1631}
1632EXPORT_SYMBOL(xfrm_user_policy);
1633
1634int xfrm_register_km(struct xfrm_mgr *km)
1635{
1636	write_lock_bh(&xfrm_km_lock);
1637	list_add_tail(&km->list, &xfrm_km_list);
1638	write_unlock_bh(&xfrm_km_lock);
1639	return 0;
1640}
1641EXPORT_SYMBOL(xfrm_register_km);
1642
1643int xfrm_unregister_km(struct xfrm_mgr *km)
1644{
1645	write_lock_bh(&xfrm_km_lock);
1646	list_del(&km->list);
1647	write_unlock_bh(&xfrm_km_lock);
1648	return 0;
1649}
1650EXPORT_SYMBOL(xfrm_unregister_km);
1651
1652int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1653{
1654	int err = 0;
1655	if (unlikely(afinfo == NULL))
1656		return -EINVAL;
1657	if (unlikely(afinfo->family >= NPROTO))
1658		return -EAFNOSUPPORT;
1659	write_lock_bh(&xfrm_state_afinfo_lock);
1660	if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1661		err = -ENOBUFS;
1662	else
1663		xfrm_state_afinfo[afinfo->family] = afinfo;
1664	write_unlock_bh(&xfrm_state_afinfo_lock);
1665	return err;
1666}
1667EXPORT_SYMBOL(xfrm_state_register_afinfo);
1668
1669int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1670{
1671	int err = 0;
1672	if (unlikely(afinfo == NULL))
1673		return -EINVAL;
1674	if (unlikely(afinfo->family >= NPROTO))
1675		return -EAFNOSUPPORT;
1676	write_lock_bh(&xfrm_state_afinfo_lock);
1677	if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1678		if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1679			err = -EINVAL;
1680		else
1681			xfrm_state_afinfo[afinfo->family] = NULL;
1682	}
1683	write_unlock_bh(&xfrm_state_afinfo_lock);
1684	return err;
1685}
1686EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1687
1688struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1689{
1690	struct xfrm_state_afinfo *afinfo;
1691	if (unlikely(family >= NPROTO))
1692		return NULL;
1693	read_lock(&xfrm_state_afinfo_lock);
1694	afinfo = xfrm_state_afinfo[family];
1695	if (unlikely(!afinfo))
1696		read_unlock(&xfrm_state_afinfo_lock);
1697	return afinfo;
1698}
1699
1700void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1701{
1702	read_unlock(&xfrm_state_afinfo_lock);
1703}
1704
1705EXPORT_SYMBOL(xfrm_state_get_afinfo);
1706EXPORT_SYMBOL(xfrm_state_put_afinfo);
1707
1708/* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1709void xfrm_state_delete_tunnel(struct xfrm_state *x)
1710{
1711	if (x->tunnel) {
1712		struct xfrm_state *t = x->tunnel;
1713
1714		if (atomic_read(&t->tunnel_users) == 2)
1715			xfrm_state_delete(t);
1716		atomic_dec(&t->tunnel_users);
1717		xfrm_state_put(t);
1718		x->tunnel = NULL;
1719	}
1720}
1721EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1722
1723int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1724{
1725	int res;
1726
1727	spin_lock_bh(&x->lock);
1728	if (x->km.state == XFRM_STATE_VALID &&
1729	    x->type && x->type->get_mtu)
1730		res = x->type->get_mtu(x, mtu);
1731	else
1732		res = mtu - x->props.header_len;
1733	spin_unlock_bh(&x->lock);
1734	return res;
1735}
1736
1737int xfrm_init_state(struct xfrm_state *x)
1738{
1739	struct xfrm_state_afinfo *afinfo;
1740	int family = x->props.family;
1741	int err;
1742
1743	err = -EAFNOSUPPORT;
1744	afinfo = xfrm_state_get_afinfo(family);
1745	if (!afinfo)
1746		goto error;
1747
1748	err = 0;
1749	if (afinfo->init_flags)
1750		err = afinfo->init_flags(x);
1751
1752	xfrm_state_put_afinfo(afinfo);
1753
1754	if (err)
1755		goto error;
1756
1757	err = -EPROTONOSUPPORT;
1758	x->type = xfrm_get_type(x->id.proto, family);
1759	if (x->type == NULL)
1760		goto error;
1761
1762	err = x->type->init_state(x);
1763	if (err)
1764		goto error;
1765
1766	x->mode = xfrm_get_mode(x->props.mode, family);
1767	if (x->mode == NULL)
1768		goto error;
1769
1770	x->km.state = XFRM_STATE_VALID;
1771
1772error:
1773	return err;
1774}
1775
1776EXPORT_SYMBOL(xfrm_init_state);
1777
1778void __init xfrm_state_init(void)
1779{
1780	unsigned int sz;
1781
1782	sz = sizeof(struct hlist_head) * 8;
1783
1784	xfrm_state_bydst = xfrm_hash_alloc(sz);
1785	xfrm_state_bysrc = xfrm_hash_alloc(sz);
1786	xfrm_state_byspi = xfrm_hash_alloc(sz);
1787	if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1788		panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1789	xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1790
1791	INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);
1792}
1793