1From: Alexander Duyck <alexander.h.duyck@redhat.com>
2Date: Wed, 31 Dec 2014 10:56:43 -0800
3Subject: [PATCH] fib_trie: Push assignment of child to parent down into
4 inflate/halve
5
6This change makes it so that the assignment of the tnode to the parent is
7handled directly within whatever function is currently handling the node be
8it inflate, halve, or resize.  By doing this we can avoid some of the need
9to set NULL pointers in the tree while we are resizing the subnodes.
10
11Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
12Signed-off-by: David S. Miller <davem@davemloft.net>
13---
14
15--- a/net/ipv4/fib_trie.c
16+++ b/net/ipv4/fib_trie.c
17@@ -146,9 +146,7 @@ struct trie {
18 #endif
19 };
20 
21-static void tnode_put_child_reorg(struct tnode *tn, unsigned long i,
22-				  struct tnode *n, int wasfull);
23-static struct tnode *resize(struct trie *t, struct tnode *tn);
24+static void resize(struct trie *t, struct tnode *tn);
25 /* tnodes to free after resize(); protected by RTNL */
26 static struct callback_head *tnode_free_head;
27 static size_t tnode_free_size;
28@@ -396,22 +394,13 @@ static inline int tnode_full(const struc
29 	return n && ((n->pos + n->bits) == tn->pos) && IS_TNODE(n);
30 }
31 
32-static inline void put_child(struct tnode *tn, unsigned long i,
33-			     struct tnode *n)
34-{
35-	tnode_put_child_reorg(tn, i, n, -1);
36-}
37-
38- /*
39-  * Add a child at position i overwriting the old value.
40-  * Update the value of full_children and empty_children.
41-  */
42-
43-static void tnode_put_child_reorg(struct tnode *tn, unsigned long i,
44-				  struct tnode *n, int wasfull)
45+/* Add a child at position i overwriting the old value.
46+ * Update the value of full_children and empty_children.
47+ */
48+static void put_child(struct tnode *tn, unsigned long i, struct tnode *n)
49 {
50 	struct tnode *chi = rtnl_dereference(tn->child[i]);
51-	int isfull;
52+	int isfull, wasfull;
53 
54 	BUG_ON(i >= tnode_child_length(tn));
55 
56@@ -422,10 +411,9 @@ static void tnode_put_child_reorg(struct
57 		tn->empty_children--;
58 
59 	/* update fullChildren */
60-	if (wasfull == -1)
61-		wasfull = tnode_full(tn, chi);
62-
63+	wasfull = tnode_full(tn, chi);
64 	isfull = tnode_full(tn, n);
65+
66 	if (wasfull && !isfull)
67 		tn->full_children--;
68 	else if (!wasfull && isfull)
69@@ -458,9 +446,10 @@ static void tnode_clean_free(struct tnod
70 	node_free(tn);
71 }
72 
73-static struct tnode *inflate(struct trie *t, struct tnode *oldtnode)
74+static int inflate(struct trie *t, struct tnode *oldtnode)
75 {
76 	unsigned long olen = tnode_child_length(oldtnode);
77+	struct tnode *tp = node_parent(oldtnode);
78 	struct tnode *tn;
79 	unsigned long i;
80 	t_key m;
81@@ -468,9 +457,8 @@ static struct tnode *inflate(struct trie
82 	pr_debug("In inflate\n");
83 
84 	tn = tnode_new(oldtnode->key, oldtnode->pos - 1, oldtnode->bits + 1);
85-
86 	if (!tn)
87-		return ERR_PTR(-ENOMEM);
88+		return -ENOMEM;
89 
90 	/*
91 	 * Preallocate and store tnodes before the actual work so we
92@@ -564,30 +552,36 @@ static struct tnode *inflate(struct trie
93 			put_child(left, j, rtnl_dereference(inode->child[j]));
94 			put_child(right, j, rtnl_dereference(inode->child[j + size]));
95 		}
96-		put_child(tn, 2*i, resize(t, left));
97-		put_child(tn, 2*i+1, resize(t, right));
98+
99+		put_child(tn, 2 * i, left);
100+		put_child(tn, 2 * i + 1, right);
101 
102 		tnode_free_safe(inode);
103+
104+		resize(t, left);
105+		resize(t, right);
106 	}
107+
108+	put_child_root(tp, t, tn->key, tn);
109 	tnode_free_safe(oldtnode);
110-	return tn;
111+	return 0;
112 nomem:
113 	tnode_clean_free(tn);
114-	return ERR_PTR(-ENOMEM);
115+	return -ENOMEM;
116 }
117 
118-static struct tnode *halve(struct trie *t, struct tnode *oldtnode)
119+static int halve(struct trie *t, struct tnode *oldtnode)
120 {
121 	unsigned long olen = tnode_child_length(oldtnode);
122+	struct tnode *tp = node_parent(oldtnode);
123 	struct tnode *tn, *left, *right;
124 	int i;
125 
126 	pr_debug("In halve\n");
127 
128 	tn = tnode_new(oldtnode->key, oldtnode->pos + 1, oldtnode->bits - 1);
129-
130 	if (!tn)
131-		return ERR_PTR(-ENOMEM);
132+		return -ENOMEM;
133 
134 	/*
135 	 * Preallocate and store tnodes before the actual work so we
136@@ -606,8 +600,10 @@ static struct tnode *halve(struct trie *
137 
138 			newn = tnode_new(left->key, oldtnode->pos, 1);
139 
140-			if (!newn)
141-				goto nomem;
142+			if (!newn) {
143+				tnode_clean_free(tn);
144+				return -ENOMEM;
145+			}
146 
147 			put_child(tn, i/2, newn);
148 		}
149@@ -635,16 +631,18 @@ static struct tnode *halve(struct trie *
150 
151 		/* Two nonempty children */
152 		newBinNode = tnode_get_child(tn, i/2);
153-		put_child(tn, i/2, NULL);
154 		put_child(newBinNode, 0, left);
155 		put_child(newBinNode, 1, right);
156-		put_child(tn, i/2, resize(t, newBinNode));
157+
158+		put_child(tn, i / 2, newBinNode);
159+
160+		resize(t, newBinNode);
161 	}
162+
163+	put_child_root(tp, t, tn->key, tn);
164 	tnode_free_safe(oldtnode);
165-	return tn;
166-nomem:
167-	tnode_clean_free(tn);
168-	return ERR_PTR(-ENOMEM);
169+
170+	return 0;
171 }
172 
173 /* From "Implementing a dynamic compressed trie" by Stefan Nilsson of
174@@ -704,45 +702,48 @@ nomem:
175  *    tnode_child_length(tn)
176  *
177  */
178-static bool should_inflate(const struct tnode *tn)
179+static bool should_inflate(const struct tnode *tp, const struct tnode *tn)
180 {
181 	unsigned long used = tnode_child_length(tn);
182 	unsigned long threshold = used;
183 
184 	/* Keep root node larger */
185-	threshold *= node_parent(tn) ? inflate_threshold :
186-				       inflate_threshold_root;
187+	threshold *= tp ? inflate_threshold : inflate_threshold_root;
188 	used += tn->full_children;
189 	used -= tn->empty_children;
190 
191 	return tn->pos && ((50 * used) >= threshold);
192 }
193 
194-static bool should_halve(const struct tnode *tn)
195+static bool should_halve(const struct tnode *tp, const struct tnode *tn)
196 {
197 	unsigned long used = tnode_child_length(tn);
198 	unsigned long threshold = used;
199 
200 	/* Keep root node larger */
201-	threshold *= node_parent(tn) ? halve_threshold :
202-				       halve_threshold_root;
203+	threshold *= tp ? halve_threshold : halve_threshold_root;
204 	used -= tn->empty_children;
205 
206 	return (tn->bits > 1) && ((100 * used) < threshold);
207 }
208 
209 #define MAX_WORK 10
210-static struct tnode *resize(struct trie *t, struct tnode *tn)
211+static void resize(struct trie *t, struct tnode *tn)
212 {
213-	struct tnode *old_tn, *n = NULL;
214+	struct tnode *tp = node_parent(tn), *n = NULL;
215+	struct tnode __rcu **cptr;
216 	int max_work;
217 
218-	if (!tn)
219-		return NULL;
220-
221 	pr_debug("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
222 		 tn, inflate_threshold, halve_threshold);
223 
224+	/* track the tnode via the pointer from the parent instead of
225+	 * doing it ourselves.  This way we can let RCU fully do its
226+	 * thing without us interfering
227+	 */
228+	cptr = tp ? &tp->child[get_index(tn->key, tp)] : &t->trie;
229+	BUG_ON(tn != rtnl_dereference(*cptr));
230+
231 	/* No children */
232 	if (tn->empty_children > (tnode_child_length(tn) - 1))
233 		goto no_children;
234@@ -755,39 +756,35 @@ static struct tnode *resize(struct trie
235 	 * nonempty nodes that are above the threshold.
236 	 */
237 	max_work = MAX_WORK;
238-	while (should_inflate(tn) && max_work--) {
239-		old_tn = tn;
240-		tn = inflate(t, tn);
241-
242-		if (IS_ERR(tn)) {
243-			tn = old_tn;
244+	while (should_inflate(tp, tn) && max_work--) {
245+		if (inflate(t, tn)) {
246 #ifdef CONFIG_IP_FIB_TRIE_STATS
247 			this_cpu_inc(t->stats->resize_node_skipped);
248 #endif
249 			break;
250 		}
251+
252+		tn = rtnl_dereference(*cptr);
253 	}
254 
255 	/* Return if at least one inflate is run */
256 	if (max_work != MAX_WORK)
257-		return tn;
258+		return;
259 
260 	/* Halve as long as the number of empty children in this
261 	 * node is above threshold.
262 	 */
263 	max_work = MAX_WORK;
264-	while (should_halve(tn) && max_work--) {
265-		old_tn = tn;
266-		tn = halve(t, tn);
267-		if (IS_ERR(tn)) {
268-			tn = old_tn;
269+	while (should_halve(tp, tn) && max_work--) {
270+		if (halve(t, tn)) {
271 #ifdef CONFIG_IP_FIB_TRIE_STATS
272 			this_cpu_inc(t->stats->resize_node_skipped);
273 #endif
274 			break;
275 		}
276-	}
277 
278+		tn = rtnl_dereference(*cptr);
279+	}
280 
281 	/* Only one child remains */
282 	if (tn->empty_children == (tnode_child_length(tn) - 1)) {
283@@ -797,11 +794,12 @@ one_child:
284 			n = tnode_get_child(tn, --i);
285 no_children:
286 		/* compress one level */
287-		node_set_parent(n, NULL);
288+		put_child_root(tp, t, tn->key, n);
289+		node_set_parent(n, tp);
290+
291+		/* drop dead node */
292 		tnode_free_safe(tn);
293-		return n;
294 	}
295-	return tn;
296 }
297 
298 /* readside must use rcu_read_lock currently dump routines
299@@ -882,34 +880,19 @@ static struct tnode *fib_find_node(struc
300 
301 static void trie_rebalance(struct trie *t, struct tnode *tn)
302 {
303-	int wasfull;
304-	t_key cindex, key;
305 	struct tnode *tp;
306 
307-	key = tn->key;
308-
309-	while (tn != NULL && (tp = node_parent(tn)) != NULL) {
310-		cindex = get_index(key, tp);
311-		wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
312-		tn = resize(t, tn);
313-
314-		tnode_put_child_reorg(tp, cindex, tn, wasfull);
315-
316-		tp = node_parent(tn);
317-		if (!tp)
318-			rcu_assign_pointer(t->trie, tn);
319+	while ((tp = node_parent(tn)) != NULL) {
320+		resize(t, tn);
321 
322 		tnode_free_flush();
323-		if (!tp)
324-			break;
325 		tn = tp;
326 	}
327 
328 	/* Handle last (top) tnode */
329 	if (IS_TNODE(tn))
330-		tn = resize(t, tn);
331+		resize(t, tn);
332 
333-	rcu_assign_pointer(t->trie, tn);
334 	tnode_free_flush();
335 }
336 
337