1// SPDX-License-Identifier: GPL-2.0
2
3#include <linux/types.h>
4#include <linux/atomic.h>
5#include <linux/inetdevice.h>
6#include <linux/netfilter.h>
7#include <linux/netfilter_ipv4.h>
8#include <linux/netfilter_ipv6.h>
9
10#include <net/netfilter/nf_nat_masquerade.h>
11
12struct masq_dev_work {
13	struct work_struct work;
14	struct net *net;
15	netns_tracker ns_tracker;
16	union nf_inet_addr addr;
17	int ifindex;
18	int (*iter)(struct nf_conn *i, void *data);
19};
20
21#define MAX_MASQ_WORKER_COUNT	16
22
23static DEFINE_MUTEX(masq_mutex);
24static unsigned int masq_refcnt __read_mostly;
25static atomic_t masq_worker_count __read_mostly;
26
27unsigned int
28nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum,
29		       const struct nf_nat_range2 *range,
30		       const struct net_device *out)
31{
32	struct nf_conn *ct;
33	struct nf_conn_nat *nat;
34	enum ip_conntrack_info ctinfo;
35	struct nf_nat_range2 newrange;
36	const struct rtable *rt;
37	__be32 newsrc, nh;
38
39	WARN_ON(hooknum != NF_INET_POST_ROUTING);
40
41	ct = nf_ct_get(skb, &ctinfo);
42
43	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
44			 ctinfo == IP_CT_RELATED_REPLY)));
45
46	/* Source address is 0.0.0.0 - locally generated packet that is
47	 * probably not supposed to be masqueraded.
48	 */
49	if (ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.u3.ip == 0)
50		return NF_ACCEPT;
51
52	rt = skb_rtable(skb);
53	nh = rt_nexthop(rt, ip_hdr(skb)->daddr);
54	newsrc = inet_select_addr(out, nh, RT_SCOPE_UNIVERSE);
55	if (!newsrc) {
56		pr_info("%s ate my IP address\n", out->name);
57		return NF_DROP;
58	}
59
60	nat = nf_ct_nat_ext_add(ct);
61	if (nat)
62		nat->masq_index = out->ifindex;
63
64	/* Transfer from original range. */
65	memset(&newrange.min_addr, 0, sizeof(newrange.min_addr));
66	memset(&newrange.max_addr, 0, sizeof(newrange.max_addr));
67	newrange.flags       = range->flags | NF_NAT_RANGE_MAP_IPS;
68	newrange.min_addr.ip = newsrc;
69	newrange.max_addr.ip = newsrc;
70	newrange.min_proto   = range->min_proto;
71	newrange.max_proto   = range->max_proto;
72
73	/* Hand modified range to generic setup. */
74	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
75}
76EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv4);
77
78static void iterate_cleanup_work(struct work_struct *work)
79{
80	struct nf_ct_iter_data iter_data = {};
81	struct masq_dev_work *w;
82
83	w = container_of(work, struct masq_dev_work, work);
84
85	iter_data.net = w->net;
86	iter_data.data = (void *)w;
87	nf_ct_iterate_cleanup_net(w->iter, &iter_data);
88
89	put_net_track(w->net, &w->ns_tracker);
90	kfree(w);
91	atomic_dec(&masq_worker_count);
92	module_put(THIS_MODULE);
93}
94
95/* Iterate conntrack table in the background and remove conntrack entries
96 * that use the device/address being removed.
97 *
98 * In case too many work items have been queued already or memory allocation
99 * fails iteration is skipped, conntrack entries will time out eventually.
100 */
101static void nf_nat_masq_schedule(struct net *net, union nf_inet_addr *addr,
102				 int ifindex,
103				 int (*iter)(struct nf_conn *i, void *data),
104				 gfp_t gfp_flags)
105{
106	struct masq_dev_work *w;
107
108	if (atomic_read(&masq_worker_count) > MAX_MASQ_WORKER_COUNT)
109		return;
110
111	net = maybe_get_net(net);
112	if (!net)
113		return;
114
115	if (!try_module_get(THIS_MODULE))
116		goto err_module;
117
118	w = kzalloc(sizeof(*w), gfp_flags);
119	if (w) {
120		/* We can overshoot MAX_MASQ_WORKER_COUNT, no big deal */
121		atomic_inc(&masq_worker_count);
122
123		INIT_WORK(&w->work, iterate_cleanup_work);
124		w->ifindex = ifindex;
125		w->net = net;
126		netns_tracker_alloc(net, &w->ns_tracker, gfp_flags);
127		w->iter = iter;
128		if (addr)
129			w->addr = *addr;
130		schedule_work(&w->work);
131		return;
132	}
133
134	module_put(THIS_MODULE);
135 err_module:
136	put_net(net);
137}
138
139static int device_cmp(struct nf_conn *i, void *arg)
140{
141	const struct nf_conn_nat *nat = nfct_nat(i);
142	const struct masq_dev_work *w = arg;
143
144	if (!nat)
145		return 0;
146	return nat->masq_index == w->ifindex;
147}
148
149static int masq_device_event(struct notifier_block *this,
150			     unsigned long event,
151			     void *ptr)
152{
153	const struct net_device *dev = netdev_notifier_info_to_dev(ptr);
154	struct net *net = dev_net(dev);
155
156	if (event == NETDEV_DOWN) {
157		/* Device was downed.  Search entire table for
158		 * conntracks which were associated with that device,
159		 * and forget them.
160		 */
161
162		nf_nat_masq_schedule(net, NULL, dev->ifindex,
163				     device_cmp, GFP_KERNEL);
164	}
165
166	return NOTIFY_DONE;
167}
168
169static int inet_cmp(struct nf_conn *ct, void *ptr)
170{
171	struct nf_conntrack_tuple *tuple;
172	struct masq_dev_work *w = ptr;
173
174	if (!device_cmp(ct, ptr))
175		return 0;
176
177	tuple = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
178
179	return nf_inet_addr_cmp(&w->addr, &tuple->dst.u3);
180}
181
182static int masq_inet_event(struct notifier_block *this,
183			   unsigned long event,
184			   void *ptr)
185{
186	const struct in_ifaddr *ifa = ptr;
187	const struct in_device *idev;
188	const struct net_device *dev;
189	union nf_inet_addr addr;
190
191	if (event != NETDEV_DOWN)
192		return NOTIFY_DONE;
193
194	/* The masq_dev_notifier will catch the case of the device going
195	 * down.  So if the inetdev is dead and being destroyed we have
196	 * no work to do.  Otherwise this is an individual address removal
197	 * and we have to perform the flush.
198	 */
199	idev = ifa->ifa_dev;
200	if (idev->dead)
201		return NOTIFY_DONE;
202
203	memset(&addr, 0, sizeof(addr));
204
205	addr.ip = ifa->ifa_address;
206
207	dev = idev->dev;
208	nf_nat_masq_schedule(dev_net(idev->dev), &addr, dev->ifindex,
209			     inet_cmp, GFP_KERNEL);
210
211	return NOTIFY_DONE;
212}
213
214static struct notifier_block masq_dev_notifier = {
215	.notifier_call	= masq_device_event,
216};
217
218static struct notifier_block masq_inet_notifier = {
219	.notifier_call	= masq_inet_event,
220};
221
222#if IS_ENABLED(CONFIG_IPV6)
223static int
224nat_ipv6_dev_get_saddr(struct net *net, const struct net_device *dev,
225		       const struct in6_addr *daddr, unsigned int srcprefs,
226		       struct in6_addr *saddr)
227{
228#ifdef CONFIG_IPV6_MODULE
229	const struct nf_ipv6_ops *v6_ops = nf_get_ipv6_ops();
230
231	if (!v6_ops)
232		return -EHOSTUNREACH;
233
234	return v6_ops->dev_get_saddr(net, dev, daddr, srcprefs, saddr);
235#else
236	return ipv6_dev_get_saddr(net, dev, daddr, srcprefs, saddr);
237#endif
238}
239
240unsigned int
241nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range2 *range,
242		       const struct net_device *out)
243{
244	enum ip_conntrack_info ctinfo;
245	struct nf_conn_nat *nat;
246	struct in6_addr src;
247	struct nf_conn *ct;
248	struct nf_nat_range2 newrange;
249
250	ct = nf_ct_get(skb, &ctinfo);
251	WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED ||
252			 ctinfo == IP_CT_RELATED_REPLY)));
253
254	if (nat_ipv6_dev_get_saddr(nf_ct_net(ct), out,
255				   &ipv6_hdr(skb)->daddr, 0, &src) < 0)
256		return NF_DROP;
257
258	nat = nf_ct_nat_ext_add(ct);
259	if (nat)
260		nat->masq_index = out->ifindex;
261
262	newrange.flags		= range->flags | NF_NAT_RANGE_MAP_IPS;
263	newrange.min_addr.in6	= src;
264	newrange.max_addr.in6	= src;
265	newrange.min_proto	= range->min_proto;
266	newrange.max_proto	= range->max_proto;
267
268	return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC);
269}
270EXPORT_SYMBOL_GPL(nf_nat_masquerade_ipv6);
271
272/* atomic notifier; can't call nf_ct_iterate_cleanup_net (it can sleep).
273 *
274 * Defer it to the system workqueue.
275 *
276 * As we can have 'a lot' of inet_events (depending on amount of ipv6
277 * addresses being deleted), we also need to limit work item queue.
278 */
279static int masq_inet6_event(struct notifier_block *this,
280			    unsigned long event, void *ptr)
281{
282	struct inet6_ifaddr *ifa = ptr;
283	const struct net_device *dev;
284	union nf_inet_addr addr;
285
286	if (event != NETDEV_DOWN)
287		return NOTIFY_DONE;
288
289	dev = ifa->idev->dev;
290
291	memset(&addr, 0, sizeof(addr));
292
293	addr.in6 = ifa->addr;
294
295	nf_nat_masq_schedule(dev_net(dev), &addr, dev->ifindex, inet_cmp,
296			     GFP_ATOMIC);
297	return NOTIFY_DONE;
298}
299
300static struct notifier_block masq_inet6_notifier = {
301	.notifier_call	= masq_inet6_event,
302};
303
304static int nf_nat_masquerade_ipv6_register_notifier(void)
305{
306	return register_inet6addr_notifier(&masq_inet6_notifier);
307}
308#else
309static inline int nf_nat_masquerade_ipv6_register_notifier(void) { return 0; }
310#endif
311
312int nf_nat_masquerade_inet_register_notifiers(void)
313{
314	int ret = 0;
315
316	mutex_lock(&masq_mutex);
317	if (WARN_ON_ONCE(masq_refcnt == UINT_MAX)) {
318		ret = -EOVERFLOW;
319		goto out_unlock;
320	}
321
322	/* check if the notifier was already set */
323	if (++masq_refcnt > 1)
324		goto out_unlock;
325
326	/* Register for device down reports */
327	ret = register_netdevice_notifier(&masq_dev_notifier);
328	if (ret)
329		goto err_dec;
330	/* Register IP address change reports */
331	ret = register_inetaddr_notifier(&masq_inet_notifier);
332	if (ret)
333		goto err_unregister;
334
335	ret = nf_nat_masquerade_ipv6_register_notifier();
336	if (ret)
337		goto err_unreg_inet;
338
339	mutex_unlock(&masq_mutex);
340	return ret;
341err_unreg_inet:
342	unregister_inetaddr_notifier(&masq_inet_notifier);
343err_unregister:
344	unregister_netdevice_notifier(&masq_dev_notifier);
345err_dec:
346	masq_refcnt--;
347out_unlock:
348	mutex_unlock(&masq_mutex);
349	return ret;
350}
351EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_register_notifiers);
352
353void nf_nat_masquerade_inet_unregister_notifiers(void)
354{
355	mutex_lock(&masq_mutex);
356	/* check if the notifiers still have clients */
357	if (--masq_refcnt > 0)
358		goto out_unlock;
359
360	unregister_netdevice_notifier(&masq_dev_notifier);
361	unregister_inetaddr_notifier(&masq_inet_notifier);
362#if IS_ENABLED(CONFIG_IPV6)
363	unregister_inet6addr_notifier(&masq_inet6_notifier);
364#endif
365out_unlock:
366	mutex_unlock(&masq_mutex);
367}
368EXPORT_SYMBOL_GPL(nf_nat_masquerade_inet_unregister_notifiers);
369