1// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0-or-later
2/*
3 * Copyright 2008 - 2016 Freescale Semiconductor Inc.
4 * Copyright 2020 NXP
5 */
6
7#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8
9#include <linux/init.h>
10#include <linux/mod_devicetable.h>
11#include <linux/module.h>
12#include <linux/of_mdio.h>
13#include <linux/of_net.h>
14#include <linux/io.h>
15#include <linux/if_arp.h>
16#include <linux/if_vlan.h>
17#include <linux/icmp.h>
18#include <linux/ip.h>
19#include <linux/ipv6.h>
20#include <linux/platform_device.h>
21#include <linux/udp.h>
22#include <linux/tcp.h>
23#include <linux/net.h>
24#include <linux/skbuff.h>
25#include <linux/etherdevice.h>
26#include <linux/if_ether.h>
27#include <linux/highmem.h>
28#include <linux/percpu.h>
29#include <linux/dma-mapping.h>
30#include <linux/sort.h>
31#include <linux/phy_fixed.h>
32#include <linux/bpf.h>
33#include <linux/bpf_trace.h>
34#include <soc/fsl/bman.h>
35#include <soc/fsl/qman.h>
36#include "fman.h"
37#include "fman_port.h"
38#include "mac.h"
39#include "dpaa_eth.h"
40
41/* CREATE_TRACE_POINTS only needs to be defined once. Other dpaa files
42 * using trace events only need to #include <trace/events/sched.h>
43 */
44#define CREATE_TRACE_POINTS
45#include "dpaa_eth_trace.h"
46
47static int debug = -1;
48module_param(debug, int, 0444);
49MODULE_PARM_DESC(debug, "Module/Driver verbosity level (0=none,...,16=all)");
50
51static u16 tx_timeout = 1000;
52module_param(tx_timeout, ushort, 0444);
53MODULE_PARM_DESC(tx_timeout, "The Tx timeout in ms");
54
55#define FM_FD_STAT_RX_ERRORS						\
56	(FM_FD_ERR_DMA | FM_FD_ERR_PHYSICAL	| \
57	 FM_FD_ERR_SIZE | FM_FD_ERR_CLS_DISCARD | \
58	 FM_FD_ERR_EXTRACTION | FM_FD_ERR_NO_SCHEME	| \
59	 FM_FD_ERR_PRS_TIMEOUT | FM_FD_ERR_PRS_ILL_INSTRUCT | \
60	 FM_FD_ERR_PRS_HDR_ERR)
61
62#define FM_FD_STAT_TX_ERRORS \
63	(FM_FD_ERR_UNSUPPORTED_FORMAT | \
64	 FM_FD_ERR_LENGTH | FM_FD_ERR_DMA)
65
66#define DPAA_MSG_DEFAULT (NETIF_MSG_DRV | NETIF_MSG_PROBE | \
67			  NETIF_MSG_LINK | NETIF_MSG_IFUP | \
68			  NETIF_MSG_IFDOWN | NETIF_MSG_HW)
69
70#define DPAA_INGRESS_CS_THRESHOLD 0x10000000
71/* Ingress congestion threshold on FMan ports
72 * The size in bytes of the ingress tail-drop threshold on FMan ports.
73 * Traffic piling up above this value will be rejected by QMan and discarded
74 * by FMan.
75 */
76
77/* Size in bytes of the FQ taildrop threshold */
78#define DPAA_FQ_TD 0x200000
79
80#define DPAA_CS_THRESHOLD_1G 0x06000000
81/* Egress congestion threshold on 1G ports, range 0x1000 .. 0x10000000
82 * The size in bytes of the egress Congestion State notification threshold on
83 * 1G ports. The 1G dTSECs can quite easily be flooded by cores doing Tx in a
84 * tight loop (e.g. by sending UDP datagrams at "while(1) speed"),
85 * and the larger the frame size, the more acute the problem.
86 * So we have to find a balance between these factors:
87 * - avoiding the device staying congested for a prolonged time (risking
88 *   the netdev watchdog to fire - see also the tx_timeout module param);
89 * - affecting performance of protocols such as TCP, which otherwise
90 *   behave well under the congestion notification mechanism;
91 * - preventing the Tx cores from tightly-looping (as if the congestion
92 *   threshold was too low to be effective);
93 * - running out of memory if the CS threshold is set too high.
94 */
95
96#define DPAA_CS_THRESHOLD_10G 0x10000000
97/* The size in bytes of the egress Congestion State notification threshold on
98 * 10G ports, range 0x1000 .. 0x10000000
99 */
100
101/* Largest value that the FQD's OAL field can hold */
102#define FSL_QMAN_MAX_OAL	127
103
104/* Default alignment for start of data in an Rx FD */
105#ifdef CONFIG_DPAA_ERRATUM_A050385
106/* aligning data start to 64 avoids DMA transaction splits, unless the buffer
107 * is crossing a 4k page boundary
108 */
109#define DPAA_FD_DATA_ALIGNMENT  (fman_has_errata_a050385() ? 64 : 16)
110/* aligning to 256 avoids DMA transaction splits caused by 4k page boundary
111 * crossings; also, all SG fragments except the last must have a size multiple
112 * of 256 to avoid DMA transaction splits
113 */
114#define DPAA_A050385_ALIGN 256
115#define DPAA_FD_RX_DATA_ALIGNMENT (fman_has_errata_a050385() ? \
116				   DPAA_A050385_ALIGN : 16)
117#else
118#define DPAA_FD_DATA_ALIGNMENT  16
119#define DPAA_FD_RX_DATA_ALIGNMENT DPAA_FD_DATA_ALIGNMENT
120#endif
121
122/* The DPAA requires 256 bytes reserved and mapped for the SGT */
123#define DPAA_SGT_SIZE 256
124
125/* Values for the L3R field of the FM Parse Results
126 */
127/* L3 Type field: First IP Present IPv4 */
128#define FM_L3_PARSE_RESULT_IPV4	0x8000
129/* L3 Type field: First IP Present IPv6 */
130#define FM_L3_PARSE_RESULT_IPV6	0x4000
131/* Values for the L4R field of the FM Parse Results */
132/* L4 Type field: UDP */
133#define FM_L4_PARSE_RESULT_UDP	0x40
134/* L4 Type field: TCP */
135#define FM_L4_PARSE_RESULT_TCP	0x20
136
137/* FD status field indicating whether the FM Parser has attempted to validate
138 * the L4 csum of the frame.
139 * Note that having this bit set doesn't necessarily imply that the checksum
140 * is valid. One would have to check the parse results to find that out.
141 */
142#define FM_FD_STAT_L4CV         0x00000004
143
144#define DPAA_SGT_MAX_ENTRIES 16 /* maximum number of entries in SG Table */
145#define DPAA_BUFF_RELEASE_MAX 8 /* maximum number of buffers released at once */
146
147#define FSL_DPAA_BPID_INV		0xff
148#define FSL_DPAA_ETH_MAX_BUF_COUNT	128
149#define FSL_DPAA_ETH_REFILL_THRESHOLD	80
150
151#define DPAA_TX_PRIV_DATA_SIZE	16
152#define DPAA_PARSE_RESULTS_SIZE sizeof(struct fman_prs_result)
153#define DPAA_TIME_STAMP_SIZE 8
154#define DPAA_HASH_RESULTS_SIZE 8
155#define DPAA_HWA_SIZE (DPAA_PARSE_RESULTS_SIZE + DPAA_TIME_STAMP_SIZE \
156		       + DPAA_HASH_RESULTS_SIZE)
157#define DPAA_RX_PRIV_DATA_DEFAULT_SIZE (DPAA_TX_PRIV_DATA_SIZE + \
158					XDP_PACKET_HEADROOM - DPAA_HWA_SIZE)
159#ifdef CONFIG_DPAA_ERRATUM_A050385
160#define DPAA_RX_PRIV_DATA_A050385_SIZE (DPAA_A050385_ALIGN - DPAA_HWA_SIZE)
161#define DPAA_RX_PRIV_DATA_SIZE (fman_has_errata_a050385() ? \
162				DPAA_RX_PRIV_DATA_A050385_SIZE : \
163				DPAA_RX_PRIV_DATA_DEFAULT_SIZE)
164#else
165#define DPAA_RX_PRIV_DATA_SIZE DPAA_RX_PRIV_DATA_DEFAULT_SIZE
166#endif
167
168#define DPAA_ETH_PCD_RXQ_NUM	128
169
170#define DPAA_ENQUEUE_RETRIES	100000
171
172enum port_type {RX, TX};
173
174struct fm_port_fqs {
175	struct dpaa_fq *tx_defq;
176	struct dpaa_fq *tx_errq;
177	struct dpaa_fq *rx_defq;
178	struct dpaa_fq *rx_errq;
179	struct dpaa_fq *rx_pcdq;
180};
181
182/* All the dpa bps in use at any moment */
183static struct dpaa_bp *dpaa_bp_array[BM_MAX_NUM_OF_POOLS];
184
185#define DPAA_BP_RAW_SIZE 4096
186
187#ifdef CONFIG_DPAA_ERRATUM_A050385
188#define dpaa_bp_size(raw_size) (SKB_WITH_OVERHEAD(raw_size) & \
189				~(DPAA_A050385_ALIGN - 1))
190#else
191#define dpaa_bp_size(raw_size) SKB_WITH_OVERHEAD(raw_size)
192#endif
193
194static int dpaa_max_frm;
195
196static int dpaa_rx_extra_headroom;
197
198#define dpaa_get_max_mtu()	\
199	(dpaa_max_frm - (VLAN_ETH_HLEN + ETH_FCS_LEN))
200
201static void dpaa_eth_cgr_set_speed(struct mac_device *mac_dev, int speed);
202
203static int dpaa_netdev_init(struct net_device *net_dev,
204			    const struct net_device_ops *dpaa_ops,
205			    u16 tx_timeout)
206{
207	struct dpaa_priv *priv = netdev_priv(net_dev);
208	struct device *dev = net_dev->dev.parent;
209	struct mac_device *mac_dev = priv->mac_dev;
210	struct dpaa_percpu_priv *percpu_priv;
211	const u8 *mac_addr;
212	int i, err;
213
214	/* Although we access another CPU's private data here
215	 * we do it at initialization so it is safe
216	 */
217	for_each_possible_cpu(i) {
218		percpu_priv = per_cpu_ptr(priv->percpu_priv, i);
219		percpu_priv->net_dev = net_dev;
220	}
221
222	net_dev->netdev_ops = dpaa_ops;
223	mac_addr = mac_dev->addr;
224
225	net_dev->mem_start = (unsigned long)priv->mac_dev->res->start;
226	net_dev->mem_end = (unsigned long)priv->mac_dev->res->end;
227
228	net_dev->min_mtu = ETH_MIN_MTU;
229	net_dev->max_mtu = dpaa_get_max_mtu();
230
231	net_dev->hw_features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM |
232				 NETIF_F_LLTX | NETIF_F_RXHASH);
233
234	net_dev->hw_features |= NETIF_F_SG | NETIF_F_HIGHDMA;
235	/* The kernels enables GSO automatically, if we declare NETIF_F_SG.
236	 * For conformity, we'll still declare GSO explicitly.
237	 */
238	net_dev->features |= NETIF_F_GSO;
239	net_dev->features |= NETIF_F_RXCSUM;
240
241	net_dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
242	/* we do not want shared skbs on TX */
243	net_dev->priv_flags &= ~IFF_TX_SKB_SHARING;
244
245	net_dev->features |= net_dev->hw_features;
246	net_dev->vlan_features = net_dev->features;
247
248	net_dev->xdp_features = NETDEV_XDP_ACT_BASIC |
249				NETDEV_XDP_ACT_REDIRECT |
250				NETDEV_XDP_ACT_NDO_XMIT;
251
252	if (is_valid_ether_addr(mac_addr)) {
253		memcpy(net_dev->perm_addr, mac_addr, net_dev->addr_len);
254		eth_hw_addr_set(net_dev, mac_addr);
255	} else {
256		eth_hw_addr_random(net_dev);
257		err = mac_dev->change_addr(mac_dev->fman_mac,
258			(const enet_addr_t *)net_dev->dev_addr);
259		if (err) {
260			dev_err(dev, "Failed to set random MAC address\n");
261			return -EINVAL;
262		}
263		dev_info(dev, "Using random MAC address: %pM\n",
264			 net_dev->dev_addr);
265	}
266
267	net_dev->ethtool_ops = &dpaa_ethtool_ops;
268
269	net_dev->needed_headroom = priv->tx_headroom;
270	net_dev->watchdog_timeo = msecs_to_jiffies(tx_timeout);
271
272	/* The rest of the config is filled in by the mac device already */
273	mac_dev->phylink_config.dev = &net_dev->dev;
274	mac_dev->phylink_config.type = PHYLINK_NETDEV;
275	mac_dev->update_speed = dpaa_eth_cgr_set_speed;
276	mac_dev->phylink = phylink_create(&mac_dev->phylink_config,
277					  dev_fwnode(mac_dev->dev),
278					  mac_dev->phy_if,
279					  mac_dev->phylink_ops);
280	if (IS_ERR(mac_dev->phylink)) {
281		err = PTR_ERR(mac_dev->phylink);
282		dev_err_probe(dev, err, "Could not create phylink\n");
283		return err;
284	}
285
286	/* start without the RUNNING flag, phylib controls it later */
287	netif_carrier_off(net_dev);
288
289	err = register_netdev(net_dev);
290	if (err < 0) {
291		dev_err(dev, "register_netdev() = %d\n", err);
292		phylink_destroy(mac_dev->phylink);
293		return err;
294	}
295
296	return 0;
297}
298
299static int dpaa_stop(struct net_device *net_dev)
300{
301	struct mac_device *mac_dev;
302	struct dpaa_priv *priv;
303	int i, error;
304	int err = 0;
305
306	priv = netdev_priv(net_dev);
307	mac_dev = priv->mac_dev;
308
309	netif_tx_stop_all_queues(net_dev);
310	/* Allow the Fman (Tx) port to process in-flight frames before we
311	 * try switching it off.
312	 */
313	msleep(200);
314
315	phylink_stop(mac_dev->phylink);
316	mac_dev->disable(mac_dev->fman_mac);
317
318	for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++) {
319		error = fman_port_disable(mac_dev->port[i]);
320		if (error)
321			err = error;
322	}
323
324	phylink_disconnect_phy(mac_dev->phylink);
325	net_dev->phydev = NULL;
326
327	msleep(200);
328
329	return err;
330}
331
332static void dpaa_tx_timeout(struct net_device *net_dev, unsigned int txqueue)
333{
334	struct dpaa_percpu_priv *percpu_priv;
335	const struct dpaa_priv	*priv;
336
337	priv = netdev_priv(net_dev);
338	percpu_priv = this_cpu_ptr(priv->percpu_priv);
339
340	netif_crit(priv, timer, net_dev, "Transmit timeout latency: %u ms\n",
341		   jiffies_to_msecs(jiffies - dev_trans_start(net_dev)));
342
343	percpu_priv->stats.tx_errors++;
344}
345
346/* Calculates the statistics for the given device by adding the statistics
347 * collected by each CPU.
348 */
349static void dpaa_get_stats64(struct net_device *net_dev,
350			     struct rtnl_link_stats64 *s)
351{
352	int numstats = sizeof(struct rtnl_link_stats64) / sizeof(u64);
353	struct dpaa_priv *priv = netdev_priv(net_dev);
354	struct dpaa_percpu_priv *percpu_priv;
355	u64 *netstats = (u64 *)s;
356	u64 *cpustats;
357	int i, j;
358
359	for_each_possible_cpu(i) {
360		percpu_priv = per_cpu_ptr(priv->percpu_priv, i);
361
362		cpustats = (u64 *)&percpu_priv->stats;
363
364		/* add stats from all CPUs */
365		for (j = 0; j < numstats; j++)
366			netstats[j] += cpustats[j];
367	}
368}
369
370static int dpaa_setup_tc(struct net_device *net_dev, enum tc_setup_type type,
371			 void *type_data)
372{
373	struct dpaa_priv *priv = netdev_priv(net_dev);
374	struct tc_mqprio_qopt *mqprio = type_data;
375	u8 num_tc;
376	int i;
377
378	if (type != TC_SETUP_QDISC_MQPRIO)
379		return -EOPNOTSUPP;
380
381	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
382	num_tc = mqprio->num_tc;
383
384	if (num_tc == priv->num_tc)
385		return 0;
386
387	if (!num_tc) {
388		netdev_reset_tc(net_dev);
389		goto out;
390	}
391
392	if (num_tc > DPAA_TC_NUM) {
393		netdev_err(net_dev, "Too many traffic classes: max %d supported.\n",
394			   DPAA_TC_NUM);
395		return -EINVAL;
396	}
397
398	netdev_set_num_tc(net_dev, num_tc);
399
400	for (i = 0; i < num_tc; i++)
401		netdev_set_tc_queue(net_dev, i, DPAA_TC_TXQ_NUM,
402				    i * DPAA_TC_TXQ_NUM);
403
404out:
405	priv->num_tc = num_tc ? : 1;
406	netif_set_real_num_tx_queues(net_dev, priv->num_tc * DPAA_TC_TXQ_NUM);
407	return 0;
408}
409
410static struct mac_device *dpaa_mac_dev_get(struct platform_device *pdev)
411{
412	struct dpaa_eth_data *eth_data;
413	struct device *dpaa_dev;
414	struct mac_device *mac_dev;
415
416	dpaa_dev = &pdev->dev;
417	eth_data = dpaa_dev->platform_data;
418	if (!eth_data) {
419		dev_err(dpaa_dev, "eth_data missing\n");
420		return ERR_PTR(-ENODEV);
421	}
422	mac_dev = eth_data->mac_dev;
423	if (!mac_dev) {
424		dev_err(dpaa_dev, "mac_dev missing\n");
425		return ERR_PTR(-EINVAL);
426	}
427
428	return mac_dev;
429}
430
431static int dpaa_set_mac_address(struct net_device *net_dev, void *addr)
432{
433	const struct dpaa_priv *priv;
434	struct mac_device *mac_dev;
435	struct sockaddr old_addr;
436	int err;
437
438	priv = netdev_priv(net_dev);
439
440	memcpy(old_addr.sa_data, net_dev->dev_addr,  ETH_ALEN);
441
442	err = eth_mac_addr(net_dev, addr);
443	if (err < 0) {
444		netif_err(priv, drv, net_dev, "eth_mac_addr() = %d\n", err);
445		return err;
446	}
447
448	mac_dev = priv->mac_dev;
449
450	err = mac_dev->change_addr(mac_dev->fman_mac,
451				   (const enet_addr_t *)net_dev->dev_addr);
452	if (err < 0) {
453		netif_err(priv, drv, net_dev, "mac_dev->change_addr() = %d\n",
454			  err);
455		/* reverting to previous address */
456		eth_mac_addr(net_dev, &old_addr);
457
458		return err;
459	}
460
461	return 0;
462}
463
464static void dpaa_set_rx_mode(struct net_device *net_dev)
465{
466	const struct dpaa_priv	*priv;
467	int err;
468
469	priv = netdev_priv(net_dev);
470
471	if (!!(net_dev->flags & IFF_PROMISC) != priv->mac_dev->promisc) {
472		priv->mac_dev->promisc = !priv->mac_dev->promisc;
473		err = priv->mac_dev->set_promisc(priv->mac_dev->fman_mac,
474						 priv->mac_dev->promisc);
475		if (err < 0)
476			netif_err(priv, drv, net_dev,
477				  "mac_dev->set_promisc() = %d\n",
478				  err);
479	}
480
481	if (!!(net_dev->flags & IFF_ALLMULTI) != priv->mac_dev->allmulti) {
482		priv->mac_dev->allmulti = !priv->mac_dev->allmulti;
483		err = priv->mac_dev->set_allmulti(priv->mac_dev->fman_mac,
484						  priv->mac_dev->allmulti);
485		if (err < 0)
486			netif_err(priv, drv, net_dev,
487				  "mac_dev->set_allmulti() = %d\n",
488				  err);
489	}
490
491	err = priv->mac_dev->set_multi(net_dev, priv->mac_dev);
492	if (err < 0)
493		netif_err(priv, drv, net_dev, "mac_dev->set_multi() = %d\n",
494			  err);
495}
496
497static struct dpaa_bp *dpaa_bpid2pool(int bpid)
498{
499	if (WARN_ON(bpid < 0 || bpid >= BM_MAX_NUM_OF_POOLS))
500		return NULL;
501
502	return dpaa_bp_array[bpid];
503}
504
505/* checks if this bpool is already allocated */
506static bool dpaa_bpid2pool_use(int bpid)
507{
508	if (dpaa_bpid2pool(bpid)) {
509		refcount_inc(&dpaa_bp_array[bpid]->refs);
510		return true;
511	}
512
513	return false;
514}
515
516/* called only once per bpid by dpaa_bp_alloc_pool() */
517static void dpaa_bpid2pool_map(int bpid, struct dpaa_bp *dpaa_bp)
518{
519	dpaa_bp_array[bpid] = dpaa_bp;
520	refcount_set(&dpaa_bp->refs, 1);
521}
522
523static int dpaa_bp_alloc_pool(struct dpaa_bp *dpaa_bp)
524{
525	int err;
526
527	if (dpaa_bp->size == 0 || dpaa_bp->config_count == 0) {
528		pr_err("%s: Buffer pool is not properly initialized! Missing size or initial number of buffers\n",
529		       __func__);
530		return -EINVAL;
531	}
532
533	/* If the pool is already specified, we only create one per bpid */
534	if (dpaa_bp->bpid != FSL_DPAA_BPID_INV &&
535	    dpaa_bpid2pool_use(dpaa_bp->bpid))
536		return 0;
537
538	if (dpaa_bp->bpid == FSL_DPAA_BPID_INV) {
539		dpaa_bp->pool = bman_new_pool();
540		if (!dpaa_bp->pool) {
541			pr_err("%s: bman_new_pool() failed\n",
542			       __func__);
543			return -ENODEV;
544		}
545
546		dpaa_bp->bpid = (u8)bman_get_bpid(dpaa_bp->pool);
547	}
548
549	if (dpaa_bp->seed_cb) {
550		err = dpaa_bp->seed_cb(dpaa_bp);
551		if (err)
552			goto pool_seed_failed;
553	}
554
555	dpaa_bpid2pool_map(dpaa_bp->bpid, dpaa_bp);
556
557	return 0;
558
559pool_seed_failed:
560	pr_err("%s: pool seeding failed\n", __func__);
561	bman_free_pool(dpaa_bp->pool);
562
563	return err;
564}
565
566/* remove and free all the buffers from the given buffer pool */
567static void dpaa_bp_drain(struct dpaa_bp *bp)
568{
569	u8 num = 8;
570	int ret;
571
572	do {
573		struct bm_buffer bmb[8];
574		int i;
575
576		ret = bman_acquire(bp->pool, bmb, num);
577		if (ret < 0) {
578			if (num == 8) {
579				/* we have less than 8 buffers left;
580				 * drain them one by one
581				 */
582				num = 1;
583				ret = 1;
584				continue;
585			} else {
586				/* Pool is fully drained */
587				break;
588			}
589		}
590
591		if (bp->free_buf_cb)
592			for (i = 0; i < num; i++)
593				bp->free_buf_cb(bp, &bmb[i]);
594	} while (ret > 0);
595}
596
597static void dpaa_bp_free(struct dpaa_bp *dpaa_bp)
598{
599	struct dpaa_bp *bp = dpaa_bpid2pool(dpaa_bp->bpid);
600
601	/* the mapping between bpid and dpaa_bp is done very late in the
602	 * allocation procedure; if something failed before the mapping, the bp
603	 * was not configured, therefore we don't need the below instructions
604	 */
605	if (!bp)
606		return;
607
608	if (!refcount_dec_and_test(&bp->refs))
609		return;
610
611	if (bp->free_buf_cb)
612		dpaa_bp_drain(bp);
613
614	dpaa_bp_array[bp->bpid] = NULL;
615	bman_free_pool(bp->pool);
616}
617
618static void dpaa_bps_free(struct dpaa_priv *priv)
619{
620	dpaa_bp_free(priv->dpaa_bp);
621}
622
623/* Use multiple WQs for FQ assignment:
624 *	- Tx Confirmation queues go to WQ1.
625 *	- Rx Error and Tx Error queues go to WQ5 (giving them a better chance
626 *	  to be scheduled, in case there are many more FQs in WQ6).
627 *	- Rx Default goes to WQ6.
628 *	- Tx queues go to different WQs depending on their priority. Equal
629 *	  chunks of NR_CPUS queues go to WQ6 (lowest priority), WQ2, WQ1 and
630 *	  WQ0 (highest priority).
631 * This ensures that Tx-confirmed buffers are timely released. In particular,
632 * it avoids congestion on the Tx Confirm FQs, which can pile up PFDRs if they
633 * are greatly outnumbered by other FQs in the system, while
634 * dequeue scheduling is round-robin.
635 */
636static inline void dpaa_assign_wq(struct dpaa_fq *fq, int idx)
637{
638	switch (fq->fq_type) {
639	case FQ_TYPE_TX_CONFIRM:
640	case FQ_TYPE_TX_CONF_MQ:
641		fq->wq = 1;
642		break;
643	case FQ_TYPE_RX_ERROR:
644	case FQ_TYPE_TX_ERROR:
645		fq->wq = 5;
646		break;
647	case FQ_TYPE_RX_DEFAULT:
648	case FQ_TYPE_RX_PCD:
649		fq->wq = 6;
650		break;
651	case FQ_TYPE_TX:
652		switch (idx / DPAA_TC_TXQ_NUM) {
653		case 0:
654			/* Low priority (best effort) */
655			fq->wq = 6;
656			break;
657		case 1:
658			/* Medium priority */
659			fq->wq = 2;
660			break;
661		case 2:
662			/* High priority */
663			fq->wq = 1;
664			break;
665		case 3:
666			/* Very high priority */
667			fq->wq = 0;
668			break;
669		default:
670			WARN(1, "Too many TX FQs: more than %d!\n",
671			     DPAA_ETH_TXQ_NUM);
672		}
673		break;
674	default:
675		WARN(1, "Invalid FQ type %d for FQID %d!\n",
676		     fq->fq_type, fq->fqid);
677	}
678}
679
680static struct dpaa_fq *dpaa_fq_alloc(struct device *dev,
681				     u32 start, u32 count,
682				     struct list_head *list,
683				     enum dpaa_fq_type fq_type)
684{
685	struct dpaa_fq *dpaa_fq;
686	int i;
687
688	dpaa_fq = devm_kcalloc(dev, count, sizeof(*dpaa_fq),
689			       GFP_KERNEL);
690	if (!dpaa_fq)
691		return NULL;
692
693	for (i = 0; i < count; i++) {
694		dpaa_fq[i].fq_type = fq_type;
695		dpaa_fq[i].fqid = start ? start + i : 0;
696		list_add_tail(&dpaa_fq[i].list, list);
697	}
698
699	for (i = 0; i < count; i++)
700		dpaa_assign_wq(dpaa_fq + i, i);
701
702	return dpaa_fq;
703}
704
705static int dpaa_alloc_all_fqs(struct device *dev, struct list_head *list,
706			      struct fm_port_fqs *port_fqs)
707{
708	struct dpaa_fq *dpaa_fq;
709	u32 fq_base, fq_base_aligned, i;
710
711	dpaa_fq = dpaa_fq_alloc(dev, 0, 1, list, FQ_TYPE_RX_ERROR);
712	if (!dpaa_fq)
713		goto fq_alloc_failed;
714
715	port_fqs->rx_errq = &dpaa_fq[0];
716
717	dpaa_fq = dpaa_fq_alloc(dev, 0, 1, list, FQ_TYPE_RX_DEFAULT);
718	if (!dpaa_fq)
719		goto fq_alloc_failed;
720
721	port_fqs->rx_defq = &dpaa_fq[0];
722
723	/* the PCD FQIDs range needs to be aligned for correct operation */
724	if (qman_alloc_fqid_range(&fq_base, 2 * DPAA_ETH_PCD_RXQ_NUM))
725		goto fq_alloc_failed;
726
727	fq_base_aligned = ALIGN(fq_base, DPAA_ETH_PCD_RXQ_NUM);
728
729	for (i = fq_base; i < fq_base_aligned; i++)
730		qman_release_fqid(i);
731
732	for (i = fq_base_aligned + DPAA_ETH_PCD_RXQ_NUM;
733	     i < (fq_base + 2 * DPAA_ETH_PCD_RXQ_NUM); i++)
734		qman_release_fqid(i);
735
736	dpaa_fq = dpaa_fq_alloc(dev, fq_base_aligned, DPAA_ETH_PCD_RXQ_NUM,
737				list, FQ_TYPE_RX_PCD);
738	if (!dpaa_fq)
739		goto fq_alloc_failed;
740
741	port_fqs->rx_pcdq = &dpaa_fq[0];
742
743	if (!dpaa_fq_alloc(dev, 0, DPAA_ETH_TXQ_NUM, list, FQ_TYPE_TX_CONF_MQ))
744		goto fq_alloc_failed;
745
746	dpaa_fq = dpaa_fq_alloc(dev, 0, 1, list, FQ_TYPE_TX_ERROR);
747	if (!dpaa_fq)
748		goto fq_alloc_failed;
749
750	port_fqs->tx_errq = &dpaa_fq[0];
751
752	dpaa_fq = dpaa_fq_alloc(dev, 0, 1, list, FQ_TYPE_TX_CONFIRM);
753	if (!dpaa_fq)
754		goto fq_alloc_failed;
755
756	port_fqs->tx_defq = &dpaa_fq[0];
757
758	if (!dpaa_fq_alloc(dev, 0, DPAA_ETH_TXQ_NUM, list, FQ_TYPE_TX))
759		goto fq_alloc_failed;
760
761	return 0;
762
763fq_alloc_failed:
764	dev_err(dev, "dpaa_fq_alloc() failed\n");
765	return -ENOMEM;
766}
767
768static u32 rx_pool_channel;
769static DEFINE_SPINLOCK(rx_pool_channel_init);
770
771static int dpaa_get_channel(void)
772{
773	spin_lock(&rx_pool_channel_init);
774	if (!rx_pool_channel) {
775		u32 pool;
776		int ret;
777
778		ret = qman_alloc_pool(&pool);
779
780		if (!ret)
781			rx_pool_channel = pool;
782	}
783	spin_unlock(&rx_pool_channel_init);
784	if (!rx_pool_channel)
785		return -ENOMEM;
786	return rx_pool_channel;
787}
788
789static void dpaa_release_channel(void)
790{
791	qman_release_pool(rx_pool_channel);
792}
793
794static void dpaa_eth_add_channel(u16 channel, struct device *dev)
795{
796	u32 pool = QM_SDQCR_CHANNELS_POOL_CONV(channel);
797	const cpumask_t *cpus = qman_affine_cpus();
798	struct qman_portal *portal;
799	int cpu;
800
801	for_each_cpu_and(cpu, cpus, cpu_online_mask) {
802		portal = qman_get_affine_portal(cpu);
803		qman_p_static_dequeue_add(portal, pool);
804		qman_start_using_portal(portal, dev);
805	}
806}
807
808/* Congestion group state change notification callback.
809 * Stops the device's egress queues while they are congested and
810 * wakes them upon exiting congested state.
811 * Also updates some CGR-related stats.
812 */
813static void dpaa_eth_cgscn(struct qman_portal *qm, struct qman_cgr *cgr,
814			   int congested)
815{
816	struct dpaa_priv *priv = (struct dpaa_priv *)container_of(cgr,
817		struct dpaa_priv, cgr_data.cgr);
818
819	if (congested) {
820		priv->cgr_data.congestion_start_jiffies = jiffies;
821		netif_tx_stop_all_queues(priv->net_dev);
822		priv->cgr_data.cgr_congested_count++;
823	} else {
824		priv->cgr_data.congested_jiffies +=
825			(jiffies - priv->cgr_data.congestion_start_jiffies);
826		netif_tx_wake_all_queues(priv->net_dev);
827	}
828}
829
830static int dpaa_eth_cgr_init(struct dpaa_priv *priv)
831{
832	struct qm_mcc_initcgr initcgr;
833	u32 cs_th;
834	int err;
835
836	err = qman_alloc_cgrid(&priv->cgr_data.cgr.cgrid);
837	if (err < 0) {
838		if (netif_msg_drv(priv))
839			pr_err("%s: Error %d allocating CGR ID\n",
840			       __func__, err);
841		goto out_error;
842	}
843	priv->cgr_data.cgr.cb = dpaa_eth_cgscn;
844
845	/* Enable Congestion State Change Notifications and CS taildrop */
846	memset(&initcgr, 0, sizeof(initcgr));
847	initcgr.we_mask = cpu_to_be16(QM_CGR_WE_CSCN_EN | QM_CGR_WE_CS_THRES);
848	initcgr.cgr.cscn_en = QM_CGR_EN;
849
850	/* Set different thresholds based on the configured MAC speed.
851	 * This may turn suboptimal if the MAC is reconfigured at another
852	 * speed, so MACs must call dpaa_eth_cgr_set_speed in their link_up
853	 * callback.
854	 */
855	if (priv->mac_dev->phylink_config.mac_capabilities & MAC_10000FD)
856		cs_th = DPAA_CS_THRESHOLD_10G;
857	else
858		cs_th = DPAA_CS_THRESHOLD_1G;
859	qm_cgr_cs_thres_set64(&initcgr.cgr.cs_thres, cs_th, 1);
860
861	initcgr.we_mask |= cpu_to_be16(QM_CGR_WE_CSTD_EN);
862	initcgr.cgr.cstd_en = QM_CGR_EN;
863
864	err = qman_create_cgr(&priv->cgr_data.cgr, QMAN_CGR_FLAG_USE_INIT,
865			      &initcgr);
866	if (err < 0) {
867		if (netif_msg_drv(priv))
868			pr_err("%s: Error %d creating CGR with ID %d\n",
869			       __func__, err, priv->cgr_data.cgr.cgrid);
870		qman_release_cgrid(priv->cgr_data.cgr.cgrid);
871		goto out_error;
872	}
873	if (netif_msg_drv(priv))
874		pr_debug("Created CGR %d for netdev with hwaddr %pM on QMan channel %d\n",
875			 priv->cgr_data.cgr.cgrid, priv->mac_dev->addr,
876			 priv->cgr_data.cgr.chan);
877
878out_error:
879	return err;
880}
881
882static void dpaa_eth_cgr_set_speed(struct mac_device *mac_dev, int speed)
883{
884	struct net_device *net_dev = to_net_dev(mac_dev->phylink_config.dev);
885	struct dpaa_priv *priv = netdev_priv(net_dev);
886	struct qm_mcc_initcgr opts = { };
887	u32 cs_th;
888	int err;
889
890	opts.we_mask = cpu_to_be16(QM_CGR_WE_CS_THRES);
891	switch (speed) {
892	case SPEED_10000:
893		cs_th = DPAA_CS_THRESHOLD_10G;
894		break;
895	case SPEED_1000:
896	default:
897		cs_th = DPAA_CS_THRESHOLD_1G;
898		break;
899	}
900	qm_cgr_cs_thres_set64(&opts.cgr.cs_thres, cs_th, 1);
901
902	err = qman_update_cgr_safe(&priv->cgr_data.cgr, &opts);
903	if (err)
904		netdev_err(net_dev, "could not update speed: %d\n", err);
905}
906
907static inline void dpaa_setup_ingress(const struct dpaa_priv *priv,
908				      struct dpaa_fq *fq,
909				      const struct qman_fq *template)
910{
911	fq->fq_base = *template;
912	fq->net_dev = priv->net_dev;
913
914	fq->flags = QMAN_FQ_FLAG_NO_ENQUEUE;
915	fq->channel = priv->channel;
916}
917
918static inline void dpaa_setup_egress(const struct dpaa_priv *priv,
919				     struct dpaa_fq *fq,
920				     struct fman_port *port,
921				     const struct qman_fq *template)
922{
923	fq->fq_base = *template;
924	fq->net_dev = priv->net_dev;
925
926	if (port) {
927		fq->flags = QMAN_FQ_FLAG_TO_DCPORTAL;
928		fq->channel = (u16)fman_port_get_qman_channel_id(port);
929	} else {
930		fq->flags = QMAN_FQ_FLAG_NO_MODIFY;
931	}
932}
933
934static void dpaa_fq_setup(struct dpaa_priv *priv,
935			  const struct dpaa_fq_cbs *fq_cbs,
936			  struct fman_port *tx_port)
937{
938	int egress_cnt = 0, conf_cnt = 0, num_portals = 0, portal_cnt = 0, cpu;
939	const cpumask_t *affine_cpus = qman_affine_cpus();
940	u16 channels[NR_CPUS];
941	struct dpaa_fq *fq;
942
943	for_each_cpu_and(cpu, affine_cpus, cpu_online_mask)
944		channels[num_portals++] = qman_affine_channel(cpu);
945
946	if (num_portals == 0)
947		dev_err(priv->net_dev->dev.parent,
948			"No Qman software (affine) channels found\n");
949
950	/* Initialize each FQ in the list */
951	list_for_each_entry(fq, &priv->dpaa_fq_list, list) {
952		switch (fq->fq_type) {
953		case FQ_TYPE_RX_DEFAULT:
954			dpaa_setup_ingress(priv, fq, &fq_cbs->rx_defq);
955			break;
956		case FQ_TYPE_RX_ERROR:
957			dpaa_setup_ingress(priv, fq, &fq_cbs->rx_errq);
958			break;
959		case FQ_TYPE_RX_PCD:
960			if (!num_portals)
961				continue;
962			dpaa_setup_ingress(priv, fq, &fq_cbs->rx_defq);
963			fq->channel = channels[portal_cnt++ % num_portals];
964			break;
965		case FQ_TYPE_TX:
966			dpaa_setup_egress(priv, fq, tx_port,
967					  &fq_cbs->egress_ern);
968			/* If we have more Tx queues than the number of cores,
969			 * just ignore the extra ones.
970			 */
971			if (egress_cnt < DPAA_ETH_TXQ_NUM)
972				priv->egress_fqs[egress_cnt++] = &fq->fq_base;
973			break;
974		case FQ_TYPE_TX_CONF_MQ:
975			priv->conf_fqs[conf_cnt++] = &fq->fq_base;
976			fallthrough;
977		case FQ_TYPE_TX_CONFIRM:
978			dpaa_setup_ingress(priv, fq, &fq_cbs->tx_defq);
979			break;
980		case FQ_TYPE_TX_ERROR:
981			dpaa_setup_ingress(priv, fq, &fq_cbs->tx_errq);
982			break;
983		default:
984			dev_warn(priv->net_dev->dev.parent,
985				 "Unknown FQ type detected!\n");
986			break;
987		}
988	}
989
990	 /* Make sure all CPUs receive a corresponding Tx queue. */
991	while (egress_cnt < DPAA_ETH_TXQ_NUM) {
992		list_for_each_entry(fq, &priv->dpaa_fq_list, list) {
993			if (fq->fq_type != FQ_TYPE_TX)
994				continue;
995			priv->egress_fqs[egress_cnt++] = &fq->fq_base;
996			if (egress_cnt == DPAA_ETH_TXQ_NUM)
997				break;
998		}
999	}
1000}
1001
1002static inline int dpaa_tx_fq_to_id(const struct dpaa_priv *priv,
1003				   struct qman_fq *tx_fq)
1004{
1005	int i;
1006
1007	for (i = 0; i < DPAA_ETH_TXQ_NUM; i++)
1008		if (priv->egress_fqs[i] == tx_fq)
1009			return i;
1010
1011	return -EINVAL;
1012}
1013
1014static int dpaa_fq_init(struct dpaa_fq *dpaa_fq, bool td_enable)
1015{
1016	const struct dpaa_priv	*priv;
1017	struct qman_fq *confq = NULL;
1018	struct qm_mcc_initfq initfq;
1019	struct device *dev;
1020	struct qman_fq *fq;
1021	int queue_id;
1022	int err;
1023
1024	priv = netdev_priv(dpaa_fq->net_dev);
1025	dev = dpaa_fq->net_dev->dev.parent;
1026
1027	if (dpaa_fq->fqid == 0)
1028		dpaa_fq->flags |= QMAN_FQ_FLAG_DYNAMIC_FQID;
1029
1030	dpaa_fq->init = !(dpaa_fq->flags & QMAN_FQ_FLAG_NO_MODIFY);
1031
1032	err = qman_create_fq(dpaa_fq->fqid, dpaa_fq->flags, &dpaa_fq->fq_base);
1033	if (err) {
1034		dev_err(dev, "qman_create_fq() failed\n");
1035		return err;
1036	}
1037	fq = &dpaa_fq->fq_base;
1038
1039	if (dpaa_fq->init) {
1040		memset(&initfq, 0, sizeof(initfq));
1041
1042		initfq.we_mask = cpu_to_be16(QM_INITFQ_WE_FQCTRL);
1043		/* Note: we may get to keep an empty FQ in cache */
1044		initfq.fqd.fq_ctrl = cpu_to_be16(QM_FQCTRL_PREFERINCACHE);
1045
1046		/* Try to reduce the number of portal interrupts for
1047		 * Tx Confirmation FQs.
1048		 */
1049		if (dpaa_fq->fq_type == FQ_TYPE_TX_CONFIRM)
1050			initfq.fqd.fq_ctrl |= cpu_to_be16(QM_FQCTRL_AVOIDBLOCK);
1051
1052		/* FQ placement */
1053		initfq.we_mask |= cpu_to_be16(QM_INITFQ_WE_DESTWQ);
1054
1055		qm_fqd_set_destwq(&initfq.fqd, dpaa_fq->channel, dpaa_fq->wq);
1056
1057		/* Put all egress queues in a congestion group of their own.
1058		 * Sensu stricto, the Tx confirmation queues are Rx FQs,
1059		 * rather than Tx - but they nonetheless account for the
1060		 * memory footprint on behalf of egress traffic. We therefore
1061		 * place them in the netdev's CGR, along with the Tx FQs.
1062		 */
1063		if (dpaa_fq->fq_type == FQ_TYPE_TX ||
1064		    dpaa_fq->fq_type == FQ_TYPE_TX_CONFIRM ||
1065		    dpaa_fq->fq_type == FQ_TYPE_TX_CONF_MQ) {
1066			initfq.we_mask |= cpu_to_be16(QM_INITFQ_WE_CGID);
1067			initfq.fqd.fq_ctrl |= cpu_to_be16(QM_FQCTRL_CGE);
1068			initfq.fqd.cgid = (u8)priv->cgr_data.cgr.cgrid;
1069			/* Set a fixed overhead accounting, in an attempt to
1070			 * reduce the impact of fixed-size skb shells and the
1071			 * driver's needed headroom on system memory. This is
1072			 * especially the case when the egress traffic is
1073			 * composed of small datagrams.
1074			 * Unfortunately, QMan's OAL value is capped to an
1075			 * insufficient value, but even that is better than
1076			 * no overhead accounting at all.
1077			 */
1078			initfq.we_mask |= cpu_to_be16(QM_INITFQ_WE_OAC);
1079			qm_fqd_set_oac(&initfq.fqd, QM_OAC_CG);
1080			qm_fqd_set_oal(&initfq.fqd,
1081				       min(sizeof(struct sk_buff) +
1082				       priv->tx_headroom,
1083				       (size_t)FSL_QMAN_MAX_OAL));
1084		}
1085
1086		if (td_enable) {
1087			initfq.we_mask |= cpu_to_be16(QM_INITFQ_WE_TDTHRESH);
1088			qm_fqd_set_taildrop(&initfq.fqd, DPAA_FQ_TD, 1);
1089			initfq.fqd.fq_ctrl = cpu_to_be16(QM_FQCTRL_TDE);
1090		}
1091
1092		if (dpaa_fq->fq_type == FQ_TYPE_TX) {
1093			queue_id = dpaa_tx_fq_to_id(priv, &dpaa_fq->fq_base);
1094			if (queue_id >= 0)
1095				confq = priv->conf_fqs[queue_id];
1096			if (confq) {
1097				initfq.we_mask |=
1098					cpu_to_be16(QM_INITFQ_WE_CONTEXTA);
1099			/* ContextA: OVOM=1(use contextA2 bits instead of ICAD)
1100			 *	     A2V=1 (contextA A2 field is valid)
1101			 *	     A0V=1 (contextA A0 field is valid)
1102			 *	     B0V=1 (contextB field is valid)
1103			 * ContextA A2: EBD=1 (deallocate buffers inside FMan)
1104			 * ContextB B0(ASPID): 0 (absolute Virtual Storage ID)
1105			 */
1106				qm_fqd_context_a_set64(&initfq.fqd,
1107						       0x1e00000080000000ULL);
1108			}
1109		}
1110
1111		/* Put all the ingress queues in our "ingress CGR". */
1112		if (priv->use_ingress_cgr &&
1113		    (dpaa_fq->fq_type == FQ_TYPE_RX_DEFAULT ||
1114		     dpaa_fq->fq_type == FQ_TYPE_RX_ERROR ||
1115		     dpaa_fq->fq_type == FQ_TYPE_RX_PCD)) {
1116			initfq.we_mask |= cpu_to_be16(QM_INITFQ_WE_CGID);
1117			initfq.fqd.fq_ctrl |= cpu_to_be16(QM_FQCTRL_CGE);
1118			initfq.fqd.cgid = (u8)priv->ingress_cgr.cgrid;
1119			/* Set a fixed overhead accounting, just like for the
1120			 * egress CGR.
1121			 */
1122			initfq.we_mask |= cpu_to_be16(QM_INITFQ_WE_OAC);
1123			qm_fqd_set_oac(&initfq.fqd, QM_OAC_CG);
1124			qm_fqd_set_oal(&initfq.fqd,
1125				       min(sizeof(struct sk_buff) +
1126				       priv->tx_headroom,
1127				       (size_t)FSL_QMAN_MAX_OAL));
1128		}
1129
1130		/* Initialization common to all ingress queues */
1131		if (dpaa_fq->flags & QMAN_FQ_FLAG_NO_ENQUEUE) {
1132			initfq.we_mask |= cpu_to_be16(QM_INITFQ_WE_CONTEXTA);
1133			initfq.fqd.fq_ctrl |= cpu_to_be16(QM_FQCTRL_HOLDACTIVE |
1134						QM_FQCTRL_CTXASTASHING);
1135			initfq.fqd.context_a.stashing.exclusive =
1136				QM_STASHING_EXCL_DATA | QM_STASHING_EXCL_CTX |
1137				QM_STASHING_EXCL_ANNOTATION;
1138			qm_fqd_set_stashing(&initfq.fqd, 1, 2,
1139					    DIV_ROUND_UP(sizeof(struct qman_fq),
1140							 64));
1141		}
1142
1143		err = qman_init_fq(fq, QMAN_INITFQ_FLAG_SCHED, &initfq);
1144		if (err < 0) {
1145			dev_err(dev, "qman_init_fq(%u) = %d\n",
1146				qman_fq_fqid(fq), err);
1147			qman_destroy_fq(fq);
1148			return err;
1149		}
1150	}
1151
1152	dpaa_fq->fqid = qman_fq_fqid(fq);
1153
1154	if (dpaa_fq->fq_type == FQ_TYPE_RX_DEFAULT ||
1155	    dpaa_fq->fq_type == FQ_TYPE_RX_PCD) {
1156		err = xdp_rxq_info_reg(&dpaa_fq->xdp_rxq, dpaa_fq->net_dev,
1157				       dpaa_fq->fqid, 0);
1158		if (err) {
1159			dev_err(dev, "xdp_rxq_info_reg() = %d\n", err);
1160			return err;
1161		}
1162
1163		err = xdp_rxq_info_reg_mem_model(&dpaa_fq->xdp_rxq,
1164						 MEM_TYPE_PAGE_ORDER0, NULL);
1165		if (err) {
1166			dev_err(dev, "xdp_rxq_info_reg_mem_model() = %d\n",
1167				err);
1168			xdp_rxq_info_unreg(&dpaa_fq->xdp_rxq);
1169			return err;
1170		}
1171	}
1172
1173	return 0;
1174}
1175
1176static int dpaa_fq_free_entry(struct device *dev, struct qman_fq *fq)
1177{
1178	const struct dpaa_priv  *priv;
1179	struct dpaa_fq *dpaa_fq;
1180	int err, error;
1181
1182	err = 0;
1183
1184	dpaa_fq = container_of(fq, struct dpaa_fq, fq_base);
1185	priv = netdev_priv(dpaa_fq->net_dev);
1186
1187	if (dpaa_fq->init) {
1188		err = qman_retire_fq(fq, NULL);
1189		if (err < 0 && netif_msg_drv(priv))
1190			dev_err(dev, "qman_retire_fq(%u) = %d\n",
1191				qman_fq_fqid(fq), err);
1192
1193		error = qman_oos_fq(fq);
1194		if (error < 0 && netif_msg_drv(priv)) {
1195			dev_err(dev, "qman_oos_fq(%u) = %d\n",
1196				qman_fq_fqid(fq), error);
1197			if (err >= 0)
1198				err = error;
1199		}
1200	}
1201
1202	if ((dpaa_fq->fq_type == FQ_TYPE_RX_DEFAULT ||
1203	     dpaa_fq->fq_type == FQ_TYPE_RX_PCD) &&
1204	    xdp_rxq_info_is_reg(&dpaa_fq->xdp_rxq))
1205		xdp_rxq_info_unreg(&dpaa_fq->xdp_rxq);
1206
1207	qman_destroy_fq(fq);
1208	list_del(&dpaa_fq->list);
1209
1210	return err;
1211}
1212
1213static int dpaa_fq_free(struct device *dev, struct list_head *list)
1214{
1215	struct dpaa_fq *dpaa_fq, *tmp;
1216	int err, error;
1217
1218	err = 0;
1219	list_for_each_entry_safe(dpaa_fq, tmp, list, list) {
1220		error = dpaa_fq_free_entry(dev, (struct qman_fq *)dpaa_fq);
1221		if (error < 0 && err >= 0)
1222			err = error;
1223	}
1224
1225	return err;
1226}
1227
1228static int dpaa_eth_init_tx_port(struct fman_port *port, struct dpaa_fq *errq,
1229				 struct dpaa_fq *defq,
1230				 struct dpaa_buffer_layout *buf_layout)
1231{
1232	struct fman_buffer_prefix_content buf_prefix_content;
1233	struct fman_port_params params;
1234	int err;
1235
1236	memset(&params, 0, sizeof(params));
1237	memset(&buf_prefix_content, 0, sizeof(buf_prefix_content));
1238
1239	buf_prefix_content.priv_data_size = buf_layout->priv_data_size;
1240	buf_prefix_content.pass_prs_result = true;
1241	buf_prefix_content.pass_hash_result = true;
1242	buf_prefix_content.pass_time_stamp = true;
1243	buf_prefix_content.data_align = DPAA_FD_DATA_ALIGNMENT;
1244
1245	params.specific_params.non_rx_params.err_fqid = errq->fqid;
1246	params.specific_params.non_rx_params.dflt_fqid = defq->fqid;
1247
1248	err = fman_port_config(port, &params);
1249	if (err) {
1250		pr_err("%s: fman_port_config failed\n", __func__);
1251		return err;
1252	}
1253
1254	err = fman_port_cfg_buf_prefix_content(port, &buf_prefix_content);
1255	if (err) {
1256		pr_err("%s: fman_port_cfg_buf_prefix_content failed\n",
1257		       __func__);
1258		return err;
1259	}
1260
1261	err = fman_port_init(port);
1262	if (err)
1263		pr_err("%s: fm_port_init failed\n", __func__);
1264
1265	return err;
1266}
1267
1268static int dpaa_eth_init_rx_port(struct fman_port *port, struct dpaa_bp *bp,
1269				 struct dpaa_fq *errq,
1270				 struct dpaa_fq *defq, struct dpaa_fq *pcdq,
1271				 struct dpaa_buffer_layout *buf_layout)
1272{
1273	struct fman_buffer_prefix_content buf_prefix_content;
1274	struct fman_port_rx_params *rx_p;
1275	struct fman_port_params params;
1276	int err;
1277
1278	memset(&params, 0, sizeof(params));
1279	memset(&buf_prefix_content, 0, sizeof(buf_prefix_content));
1280
1281	buf_prefix_content.priv_data_size = buf_layout->priv_data_size;
1282	buf_prefix_content.pass_prs_result = true;
1283	buf_prefix_content.pass_hash_result = true;
1284	buf_prefix_content.pass_time_stamp = true;
1285	buf_prefix_content.data_align = DPAA_FD_RX_DATA_ALIGNMENT;
1286
1287	rx_p = &params.specific_params.rx_params;
1288	rx_p->err_fqid = errq->fqid;
1289	rx_p->dflt_fqid = defq->fqid;
1290	if (pcdq) {
1291		rx_p->pcd_base_fqid = pcdq->fqid;
1292		rx_p->pcd_fqs_count = DPAA_ETH_PCD_RXQ_NUM;
1293	}
1294
1295	rx_p->ext_buf_pools.num_of_pools_used = 1;
1296	rx_p->ext_buf_pools.ext_buf_pool[0].id =  bp->bpid;
1297	rx_p->ext_buf_pools.ext_buf_pool[0].size = (u16)bp->size;
1298
1299	err = fman_port_config(port, &params);
1300	if (err) {
1301		pr_err("%s: fman_port_config failed\n", __func__);
1302		return err;
1303	}
1304
1305	err = fman_port_cfg_buf_prefix_content(port, &buf_prefix_content);
1306	if (err) {
1307		pr_err("%s: fman_port_cfg_buf_prefix_content failed\n",
1308		       __func__);
1309		return err;
1310	}
1311
1312	err = fman_port_init(port);
1313	if (err)
1314		pr_err("%s: fm_port_init failed\n", __func__);
1315
1316	return err;
1317}
1318
1319static int dpaa_eth_init_ports(struct mac_device *mac_dev,
1320			       struct dpaa_bp *bp,
1321			       struct fm_port_fqs *port_fqs,
1322			       struct dpaa_buffer_layout *buf_layout,
1323			       struct device *dev)
1324{
1325	struct fman_port *rxport = mac_dev->port[RX];
1326	struct fman_port *txport = mac_dev->port[TX];
1327	int err;
1328
1329	err = dpaa_eth_init_tx_port(txport, port_fqs->tx_errq,
1330				    port_fqs->tx_defq, &buf_layout[TX]);
1331	if (err)
1332		return err;
1333
1334	err = dpaa_eth_init_rx_port(rxport, bp, port_fqs->rx_errq,
1335				    port_fqs->rx_defq, port_fqs->rx_pcdq,
1336				    &buf_layout[RX]);
1337
1338	return err;
1339}
1340
1341static int dpaa_bman_release(const struct dpaa_bp *dpaa_bp,
1342			     struct bm_buffer *bmb, int cnt)
1343{
1344	int err;
1345
1346	err = bman_release(dpaa_bp->pool, bmb, cnt);
1347	/* Should never occur, address anyway to avoid leaking the buffers */
1348	if (WARN_ON(err) && dpaa_bp->free_buf_cb)
1349		while (cnt-- > 0)
1350			dpaa_bp->free_buf_cb(dpaa_bp, &bmb[cnt]);
1351
1352	return cnt;
1353}
1354
1355static void dpaa_release_sgt_members(struct qm_sg_entry *sgt)
1356{
1357	struct bm_buffer bmb[DPAA_BUFF_RELEASE_MAX];
1358	struct dpaa_bp *dpaa_bp;
1359	int i = 0, j;
1360
1361	memset(bmb, 0, sizeof(bmb));
1362
1363	do {
1364		dpaa_bp = dpaa_bpid2pool(sgt[i].bpid);
1365		if (!dpaa_bp)
1366			return;
1367
1368		j = 0;
1369		do {
1370			WARN_ON(qm_sg_entry_is_ext(&sgt[i]));
1371
1372			bm_buffer_set64(&bmb[j], qm_sg_entry_get64(&sgt[i]));
1373
1374			j++; i++;
1375		} while (j < ARRAY_SIZE(bmb) &&
1376				!qm_sg_entry_is_final(&sgt[i - 1]) &&
1377				sgt[i - 1].bpid == sgt[i].bpid);
1378
1379		dpaa_bman_release(dpaa_bp, bmb, j);
1380	} while (!qm_sg_entry_is_final(&sgt[i - 1]));
1381}
1382
1383static void dpaa_fd_release(const struct net_device *net_dev,
1384			    const struct qm_fd *fd)
1385{
1386	struct qm_sg_entry *sgt;
1387	struct dpaa_bp *dpaa_bp;
1388	struct bm_buffer bmb;
1389	dma_addr_t addr;
1390	void *vaddr;
1391
1392	bmb.data = 0;
1393	bm_buffer_set64(&bmb, qm_fd_addr(fd));
1394
1395	dpaa_bp = dpaa_bpid2pool(fd->bpid);
1396	if (!dpaa_bp)
1397		return;
1398
1399	if (qm_fd_get_format(fd) == qm_fd_sg) {
1400		vaddr = phys_to_virt(qm_fd_addr(fd));
1401		sgt = vaddr + qm_fd_get_offset(fd);
1402
1403		dma_unmap_page(dpaa_bp->priv->rx_dma_dev, qm_fd_addr(fd),
1404			       DPAA_BP_RAW_SIZE, DMA_FROM_DEVICE);
1405
1406		dpaa_release_sgt_members(sgt);
1407
1408		addr = dma_map_page(dpaa_bp->priv->rx_dma_dev,
1409				    virt_to_page(vaddr), 0, DPAA_BP_RAW_SIZE,
1410				    DMA_FROM_DEVICE);
1411		if (dma_mapping_error(dpaa_bp->priv->rx_dma_dev, addr)) {
1412			netdev_err(net_dev, "DMA mapping failed\n");
1413			return;
1414		}
1415		bm_buffer_set64(&bmb, addr);
1416	}
1417
1418	dpaa_bman_release(dpaa_bp, &bmb, 1);
1419}
1420
1421static void count_ern(struct dpaa_percpu_priv *percpu_priv,
1422		      const union qm_mr_entry *msg)
1423{
1424	switch (msg->ern.rc & QM_MR_RC_MASK) {
1425	case QM_MR_RC_CGR_TAILDROP:
1426		percpu_priv->ern_cnt.cg_tdrop++;
1427		break;
1428	case QM_MR_RC_WRED:
1429		percpu_priv->ern_cnt.wred++;
1430		break;
1431	case QM_MR_RC_ERROR:
1432		percpu_priv->ern_cnt.err_cond++;
1433		break;
1434	case QM_MR_RC_ORPWINDOW_EARLY:
1435		percpu_priv->ern_cnt.early_window++;
1436		break;
1437	case QM_MR_RC_ORPWINDOW_LATE:
1438		percpu_priv->ern_cnt.late_window++;
1439		break;
1440	case QM_MR_RC_FQ_TAILDROP:
1441		percpu_priv->ern_cnt.fq_tdrop++;
1442		break;
1443	case QM_MR_RC_ORPWINDOW_RETIRED:
1444		percpu_priv->ern_cnt.fq_retired++;
1445		break;
1446	case QM_MR_RC_ORP_ZERO:
1447		percpu_priv->ern_cnt.orp_zero++;
1448		break;
1449	}
1450}
1451
1452/* Turn on HW checksum computation for this outgoing frame.
1453 * If the current protocol is not something we support in this regard
1454 * (or if the stack has already computed the SW checksum), we do nothing.
1455 *
1456 * Returns 0 if all goes well (or HW csum doesn't apply), and a negative value
1457 * otherwise.
1458 *
1459 * Note that this function may modify the fd->cmd field and the skb data buffer
1460 * (the Parse Results area).
1461 */
1462static int dpaa_enable_tx_csum(struct dpaa_priv *priv,
1463			       struct sk_buff *skb,
1464			       struct qm_fd *fd,
1465			       void *parse_results)
1466{
1467	struct fman_prs_result *parse_result;
1468	u16 ethertype = ntohs(skb->protocol);
1469	struct ipv6hdr *ipv6h = NULL;
1470	struct iphdr *iph;
1471	int retval = 0;
1472	u8 l4_proto;
1473
1474	if (skb->ip_summed != CHECKSUM_PARTIAL)
1475		return 0;
1476
1477	/* Note: L3 csum seems to be already computed in sw, but we can't choose
1478	 * L4 alone from the FM configuration anyway.
1479	 */
1480
1481	/* Fill in some fields of the Parse Results array, so the FMan
1482	 * can find them as if they came from the FMan Parser.
1483	 */
1484	parse_result = (struct fman_prs_result *)parse_results;
1485
1486	/* If we're dealing with VLAN, get the real Ethernet type */
1487	if (ethertype == ETH_P_8021Q)
1488		ethertype = ntohs(skb_vlan_eth_hdr(skb)->h_vlan_encapsulated_proto);
1489
1490	/* Fill in the relevant L3 parse result fields
1491	 * and read the L4 protocol type
1492	 */
1493	switch (ethertype) {
1494	case ETH_P_IP:
1495		parse_result->l3r = cpu_to_be16(FM_L3_PARSE_RESULT_IPV4);
1496		iph = ip_hdr(skb);
1497		WARN_ON(!iph);
1498		l4_proto = iph->protocol;
1499		break;
1500	case ETH_P_IPV6:
1501		parse_result->l3r = cpu_to_be16(FM_L3_PARSE_RESULT_IPV6);
1502		ipv6h = ipv6_hdr(skb);
1503		WARN_ON(!ipv6h);
1504		l4_proto = ipv6h->nexthdr;
1505		break;
1506	default:
1507		/* We shouldn't even be here */
1508		if (net_ratelimit())
1509			netif_alert(priv, tx_err, priv->net_dev,
1510				    "Can't compute HW csum for L3 proto 0x%x\n",
1511				    ntohs(skb->protocol));
1512		retval = -EIO;
1513		goto return_error;
1514	}
1515
1516	/* Fill in the relevant L4 parse result fields */
1517	switch (l4_proto) {
1518	case IPPROTO_UDP:
1519		parse_result->l4r = FM_L4_PARSE_RESULT_UDP;
1520		break;
1521	case IPPROTO_TCP:
1522		parse_result->l4r = FM_L4_PARSE_RESULT_TCP;
1523		break;
1524	default:
1525		if (net_ratelimit())
1526			netif_alert(priv, tx_err, priv->net_dev,
1527				    "Can't compute HW csum for L4 proto 0x%x\n",
1528				    l4_proto);
1529		retval = -EIO;
1530		goto return_error;
1531	}
1532
1533	/* At index 0 is IPOffset_1 as defined in the Parse Results */
1534	parse_result->ip_off[0] = (u8)skb_network_offset(skb);
1535	parse_result->l4_off = (u8)skb_transport_offset(skb);
1536
1537	/* Enable L3 (and L4, if TCP or UDP) HW checksum. */
1538	fd->cmd |= cpu_to_be32(FM_FD_CMD_RPD | FM_FD_CMD_DTC);
1539
1540	/* On P1023 and similar platforms fd->cmd interpretation could
1541	 * be disabled by setting CONTEXT_A bit ICMD; currently this bit
1542	 * is not set so we do not need to check; in the future, if/when
1543	 * using context_a we need to check this bit
1544	 */
1545
1546return_error:
1547	return retval;
1548}
1549
1550static int dpaa_bp_add_8_bufs(const struct dpaa_bp *dpaa_bp)
1551{
1552	struct net_device *net_dev = dpaa_bp->priv->net_dev;
1553	struct bm_buffer bmb[8];
1554	dma_addr_t addr;
1555	struct page *p;
1556	u8 i;
1557
1558	for (i = 0; i < 8; i++) {
1559		p = dev_alloc_pages(0);
1560		if (unlikely(!p)) {
1561			netdev_err(net_dev, "dev_alloc_pages() failed\n");
1562			goto release_previous_buffs;
1563		}
1564
1565		addr = dma_map_page(dpaa_bp->priv->rx_dma_dev, p, 0,
1566				    DPAA_BP_RAW_SIZE, DMA_FROM_DEVICE);
1567		if (unlikely(dma_mapping_error(dpaa_bp->priv->rx_dma_dev,
1568					       addr))) {
1569			netdev_err(net_dev, "DMA map failed\n");
1570			goto release_previous_buffs;
1571		}
1572
1573		bmb[i].data = 0;
1574		bm_buffer_set64(&bmb[i], addr);
1575	}
1576
1577release_bufs:
1578	return dpaa_bman_release(dpaa_bp, bmb, i);
1579
1580release_previous_buffs:
1581	WARN_ONCE(1, "dpaa_eth: failed to add buffers on Rx\n");
1582
1583	bm_buffer_set64(&bmb[i], 0);
1584	/* Avoid releasing a completely null buffer; bman_release() requires
1585	 * at least one buffer.
1586	 */
1587	if (likely(i))
1588		goto release_bufs;
1589
1590	return 0;
1591}
1592
1593static int dpaa_bp_seed(struct dpaa_bp *dpaa_bp)
1594{
1595	int i;
1596
1597	/* Give each CPU an allotment of "config_count" buffers */
1598	for_each_possible_cpu(i) {
1599		int *count_ptr = per_cpu_ptr(dpaa_bp->percpu_count, i);
1600		int j;
1601
1602		/* Although we access another CPU's counters here
1603		 * we do it at boot time so it is safe
1604		 */
1605		for (j = 0; j < dpaa_bp->config_count; j += 8)
1606			*count_ptr += dpaa_bp_add_8_bufs(dpaa_bp);
1607	}
1608	return 0;
1609}
1610
1611/* Add buffers/(pages) for Rx processing whenever bpool count falls below
1612 * REFILL_THRESHOLD.
1613 */
1614static int dpaa_eth_refill_bpool(struct dpaa_bp *dpaa_bp, int *countptr)
1615{
1616	int count = *countptr;
1617	int new_bufs;
1618
1619	if (unlikely(count < FSL_DPAA_ETH_REFILL_THRESHOLD)) {
1620		do {
1621			new_bufs = dpaa_bp_add_8_bufs(dpaa_bp);
1622			if (unlikely(!new_bufs)) {
1623				/* Avoid looping forever if we've temporarily
1624				 * run out of memory. We'll try again at the
1625				 * next NAPI cycle.
1626				 */
1627				break;
1628			}
1629			count += new_bufs;
1630		} while (count < FSL_DPAA_ETH_MAX_BUF_COUNT);
1631
1632		*countptr = count;
1633		if (unlikely(count < FSL_DPAA_ETH_MAX_BUF_COUNT))
1634			return -ENOMEM;
1635	}
1636
1637	return 0;
1638}
1639
1640static int dpaa_eth_refill_bpools(struct dpaa_priv *priv)
1641{
1642	struct dpaa_bp *dpaa_bp;
1643	int *countptr;
1644
1645	dpaa_bp = priv->dpaa_bp;
1646	if (!dpaa_bp)
1647		return -EINVAL;
1648	countptr = this_cpu_ptr(dpaa_bp->percpu_count);
1649
1650	return dpaa_eth_refill_bpool(dpaa_bp, countptr);
1651}
1652
1653/* Cleanup function for outgoing frame descriptors that were built on Tx path,
1654 * either contiguous frames or scatter/gather ones.
1655 * Skb freeing is not handled here.
1656 *
1657 * This function may be called on error paths in the Tx function, so guard
1658 * against cases when not all fd relevant fields were filled in. To avoid
1659 * reading the invalid transmission timestamp for the error paths set ts to
1660 * false.
1661 *
1662 * Return the skb backpointer, since for S/G frames the buffer containing it
1663 * gets freed here.
1664 *
1665 * No skb backpointer is set when transmitting XDP frames. Cleanup the buffer
1666 * and return NULL in this case.
1667 */
1668static struct sk_buff *dpaa_cleanup_tx_fd(const struct dpaa_priv *priv,
1669					  const struct qm_fd *fd, bool ts)
1670{
1671	const enum dma_data_direction dma_dir = DMA_TO_DEVICE;
1672	struct device *dev = priv->net_dev->dev.parent;
1673	struct skb_shared_hwtstamps shhwtstamps;
1674	dma_addr_t addr = qm_fd_addr(fd);
1675	void *vaddr = phys_to_virt(addr);
1676	const struct qm_sg_entry *sgt;
1677	struct dpaa_eth_swbp *swbp;
1678	struct sk_buff *skb;
1679	u64 ns;
1680	int i;
1681
1682	if (unlikely(qm_fd_get_format(fd) == qm_fd_sg)) {
1683		dma_unmap_page(priv->tx_dma_dev, addr,
1684			       qm_fd_get_offset(fd) + DPAA_SGT_SIZE,
1685			       dma_dir);
1686
1687		/* The sgt buffer has been allocated with netdev_alloc_frag(),
1688		 * it's from lowmem.
1689		 */
1690		sgt = vaddr + qm_fd_get_offset(fd);
1691
1692		/* sgt[0] is from lowmem, was dma_map_single()-ed */
1693		dma_unmap_single(priv->tx_dma_dev, qm_sg_addr(&sgt[0]),
1694				 qm_sg_entry_get_len(&sgt[0]), dma_dir);
1695
1696		/* remaining pages were mapped with skb_frag_dma_map() */
1697		for (i = 1; (i < DPAA_SGT_MAX_ENTRIES) &&
1698		     !qm_sg_entry_is_final(&sgt[i - 1]); i++) {
1699			WARN_ON(qm_sg_entry_is_ext(&sgt[i]));
1700
1701			dma_unmap_page(priv->tx_dma_dev, qm_sg_addr(&sgt[i]),
1702				       qm_sg_entry_get_len(&sgt[i]), dma_dir);
1703		}
1704	} else {
1705		dma_unmap_single(priv->tx_dma_dev, addr,
1706				 qm_fd_get_offset(fd) + qm_fd_get_length(fd),
1707				 dma_dir);
1708	}
1709
1710	swbp = (struct dpaa_eth_swbp *)vaddr;
1711	skb = swbp->skb;
1712
1713	/* No skb backpointer is set when running XDP. An xdp_frame
1714	 * backpointer is saved instead.
1715	 */
1716	if (!skb) {
1717		xdp_return_frame(swbp->xdpf);
1718		return NULL;
1719	}
1720
1721	/* DMA unmapping is required before accessing the HW provided info */
1722	if (ts && priv->tx_tstamp &&
1723	    skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
1724		memset(&shhwtstamps, 0, sizeof(shhwtstamps));
1725
1726		if (!fman_port_get_tstamp(priv->mac_dev->port[TX], vaddr,
1727					  &ns)) {
1728			shhwtstamps.hwtstamp = ns_to_ktime(ns);
1729			skb_tstamp_tx(skb, &shhwtstamps);
1730		} else {
1731			dev_warn(dev, "fman_port_get_tstamp failed!\n");
1732		}
1733	}
1734
1735	if (qm_fd_get_format(fd) == qm_fd_sg)
1736		/* Free the page that we allocated on Tx for the SGT */
1737		free_pages((unsigned long)vaddr, 0);
1738
1739	return skb;
1740}
1741
1742static u8 rx_csum_offload(const struct dpaa_priv *priv, const struct qm_fd *fd)
1743{
1744	/* The parser has run and performed L4 checksum validation.
1745	 * We know there were no parser errors (and implicitly no
1746	 * L4 csum error), otherwise we wouldn't be here.
1747	 */
1748	if ((priv->net_dev->features & NETIF_F_RXCSUM) &&
1749	    (be32_to_cpu(fd->status) & FM_FD_STAT_L4CV))
1750		return CHECKSUM_UNNECESSARY;
1751
1752	/* We're here because either the parser didn't run or the L4 checksum
1753	 * was not verified. This may include the case of a UDP frame with
1754	 * checksum zero or an L4 proto other than TCP/UDP
1755	 */
1756	return CHECKSUM_NONE;
1757}
1758
1759#define PTR_IS_ALIGNED(x, a) (IS_ALIGNED((unsigned long)(x), (a)))
1760
1761/* Build a linear skb around the received buffer.
1762 * We are guaranteed there is enough room at the end of the data buffer to
1763 * accommodate the shared info area of the skb.
1764 */
1765static struct sk_buff *contig_fd_to_skb(const struct dpaa_priv *priv,
1766					const struct qm_fd *fd)
1767{
1768	ssize_t fd_off = qm_fd_get_offset(fd);
1769	dma_addr_t addr = qm_fd_addr(fd);
1770	struct dpaa_bp *dpaa_bp;
1771	struct sk_buff *skb;
1772	void *vaddr;
1773
1774	vaddr = phys_to_virt(addr);
1775	WARN_ON(!IS_ALIGNED((unsigned long)vaddr, SMP_CACHE_BYTES));
1776
1777	dpaa_bp = dpaa_bpid2pool(fd->bpid);
1778	if (!dpaa_bp)
1779		goto free_buffer;
1780
1781	skb = build_skb(vaddr, dpaa_bp->size +
1782			SKB_DATA_ALIGN(sizeof(struct skb_shared_info)));
1783	if (WARN_ONCE(!skb, "Build skb failure on Rx\n"))
1784		goto free_buffer;
1785	skb_reserve(skb, fd_off);
1786	skb_put(skb, qm_fd_get_length(fd));
1787
1788	skb->ip_summed = rx_csum_offload(priv, fd);
1789
1790	return skb;
1791
1792free_buffer:
1793	free_pages((unsigned long)vaddr, 0);
1794	return NULL;
1795}
1796
1797/* Build an skb with the data of the first S/G entry in the linear portion and
1798 * the rest of the frame as skb fragments.
1799 *
1800 * The page fragment holding the S/G Table is recycled here.
1801 */
1802static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv,
1803				    const struct qm_fd *fd)
1804{
1805	ssize_t fd_off = qm_fd_get_offset(fd);
1806	dma_addr_t addr = qm_fd_addr(fd);
1807	const struct qm_sg_entry *sgt;
1808	struct page *page, *head_page;
1809	struct dpaa_bp *dpaa_bp;
1810	void *vaddr, *sg_vaddr;
1811	int frag_off, frag_len;
1812	struct sk_buff *skb;
1813	dma_addr_t sg_addr;
1814	int page_offset;
1815	unsigned int sz;
1816	int *count_ptr;
1817	int i, j;
1818
1819	vaddr = phys_to_virt(addr);
1820	WARN_ON(!IS_ALIGNED((unsigned long)vaddr, SMP_CACHE_BYTES));
1821
1822	/* Iterate through the SGT entries and add data buffers to the skb */
1823	sgt = vaddr + fd_off;
1824	skb = NULL;
1825	for (i = 0; i < DPAA_SGT_MAX_ENTRIES; i++) {
1826		/* Extension bit is not supported */
1827		WARN_ON(qm_sg_entry_is_ext(&sgt[i]));
1828
1829		sg_addr = qm_sg_addr(&sgt[i]);
1830		sg_vaddr = phys_to_virt(sg_addr);
1831		WARN_ON(!PTR_IS_ALIGNED(sg_vaddr, SMP_CACHE_BYTES));
1832
1833		dma_unmap_page(priv->rx_dma_dev, sg_addr,
1834			       DPAA_BP_RAW_SIZE, DMA_FROM_DEVICE);
1835
1836		/* We may use multiple Rx pools */
1837		dpaa_bp = dpaa_bpid2pool(sgt[i].bpid);
1838		if (!dpaa_bp)
1839			goto free_buffers;
1840
1841		if (!skb) {
1842			sz = dpaa_bp->size +
1843				SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
1844			skb = build_skb(sg_vaddr, sz);
1845			if (WARN_ON(!skb))
1846				goto free_buffers;
1847
1848			skb->ip_summed = rx_csum_offload(priv, fd);
1849
1850			/* Make sure forwarded skbs will have enough space
1851			 * on Tx, if extra headers are added.
1852			 */
1853			WARN_ON(fd_off != priv->rx_headroom);
1854			skb_reserve(skb, fd_off);
1855			skb_put(skb, qm_sg_entry_get_len(&sgt[i]));
1856		} else {
1857			/* Not the first S/G entry; all data from buffer will
1858			 * be added in an skb fragment; fragment index is offset
1859			 * by one since first S/G entry was incorporated in the
1860			 * linear part of the skb.
1861			 *
1862			 * Caution: 'page' may be a tail page.
1863			 */
1864			page = virt_to_page(sg_vaddr);
1865			head_page = virt_to_head_page(sg_vaddr);
1866
1867			/* Compute offset in (possibly tail) page */
1868			page_offset = ((unsigned long)sg_vaddr &
1869					(PAGE_SIZE - 1)) +
1870				(page_address(page) - page_address(head_page));
1871			/* page_offset only refers to the beginning of sgt[i];
1872			 * but the buffer itself may have an internal offset.
1873			 */
1874			frag_off = qm_sg_entry_get_off(&sgt[i]) + page_offset;
1875			frag_len = qm_sg_entry_get_len(&sgt[i]);
1876			/* skb_add_rx_frag() does no checking on the page; if
1877			 * we pass it a tail page, we'll end up with
1878			 * bad page accounting and eventually with segafults.
1879			 */
1880			skb_add_rx_frag(skb, i - 1, head_page, frag_off,
1881					frag_len, dpaa_bp->size);
1882		}
1883
1884		/* Update the pool count for the current {cpu x bpool} */
1885		count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
1886		(*count_ptr)--;
1887
1888		if (qm_sg_entry_is_final(&sgt[i]))
1889			break;
1890	}
1891	WARN_ONCE(i == DPAA_SGT_MAX_ENTRIES, "No final bit on SGT\n");
1892
1893	/* free the SG table buffer */
1894	free_pages((unsigned long)vaddr, 0);
1895
1896	return skb;
1897
1898free_buffers:
1899	/* free all the SG entries */
1900	for (j = 0; j < DPAA_SGT_MAX_ENTRIES ; j++) {
1901		sg_addr = qm_sg_addr(&sgt[j]);
1902		sg_vaddr = phys_to_virt(sg_addr);
1903		/* all pages 0..i were unmaped */
1904		if (j > i)
1905			dma_unmap_page(priv->rx_dma_dev, qm_sg_addr(&sgt[j]),
1906				       DPAA_BP_RAW_SIZE, DMA_FROM_DEVICE);
1907		free_pages((unsigned long)sg_vaddr, 0);
1908		/* counters 0..i-1 were decremented */
1909		if (j >= i) {
1910			dpaa_bp = dpaa_bpid2pool(sgt[j].bpid);
1911			if (dpaa_bp) {
1912				count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
1913				(*count_ptr)--;
1914			}
1915		}
1916
1917		if (qm_sg_entry_is_final(&sgt[j]))
1918			break;
1919	}
1920	/* free the SGT fragment */
1921	free_pages((unsigned long)vaddr, 0);
1922
1923	return NULL;
1924}
1925
1926static int skb_to_contig_fd(struct dpaa_priv *priv,
1927			    struct sk_buff *skb, struct qm_fd *fd,
1928			    int *offset)
1929{
1930	struct net_device *net_dev = priv->net_dev;
1931	enum dma_data_direction dma_dir;
1932	struct dpaa_eth_swbp *swbp;
1933	unsigned char *buff_start;
1934	dma_addr_t addr;
1935	int err;
1936
1937	/* We are guaranteed to have at least tx_headroom bytes
1938	 * available, so just use that for offset.
1939	 */
1940	fd->bpid = FSL_DPAA_BPID_INV;
1941	buff_start = skb->data - priv->tx_headroom;
1942	dma_dir = DMA_TO_DEVICE;
1943
1944	swbp = (struct dpaa_eth_swbp *)buff_start;
1945	swbp->skb = skb;
1946
1947	/* Enable L3/L4 hardware checksum computation.
1948	 *
1949	 * We must do this before dma_map_single(DMA_TO_DEVICE), because we may
1950	 * need to write into the skb.
1951	 */
1952	err = dpaa_enable_tx_csum(priv, skb, fd,
1953				  buff_start + DPAA_TX_PRIV_DATA_SIZE);
1954	if (unlikely(err < 0)) {
1955		if (net_ratelimit())
1956			netif_err(priv, tx_err, net_dev, "HW csum error: %d\n",
1957				  err);
1958		return err;
1959	}
1960
1961	/* Fill in the rest of the FD fields */
1962	qm_fd_set_contig(fd, priv->tx_headroom, skb->len);
1963	fd->cmd |= cpu_to_be32(FM_FD_CMD_FCO);
1964
1965	/* Map the entire buffer size that may be seen by FMan, but no more */
1966	addr = dma_map_single(priv->tx_dma_dev, buff_start,
1967			      priv->tx_headroom + skb->len, dma_dir);
1968	if (unlikely(dma_mapping_error(priv->tx_dma_dev, addr))) {
1969		if (net_ratelimit())
1970			netif_err(priv, tx_err, net_dev, "dma_map_single() failed\n");
1971		return -EINVAL;
1972	}
1973	qm_fd_addr_set64(fd, addr);
1974
1975	return 0;
1976}
1977
1978static int skb_to_sg_fd(struct dpaa_priv *priv,
1979			struct sk_buff *skb, struct qm_fd *fd)
1980{
1981	const enum dma_data_direction dma_dir = DMA_TO_DEVICE;
1982	const int nr_frags = skb_shinfo(skb)->nr_frags;
1983	struct net_device *net_dev = priv->net_dev;
1984	struct dpaa_eth_swbp *swbp;
1985	struct qm_sg_entry *sgt;
1986	void *buff_start;
1987	skb_frag_t *frag;
1988	dma_addr_t addr;
1989	size_t frag_len;
1990	struct page *p;
1991	int i, j, err;
1992
1993	/* get a page to store the SGTable */
1994	p = dev_alloc_pages(0);
1995	if (unlikely(!p)) {
1996		netdev_err(net_dev, "dev_alloc_pages() failed\n");
1997		return -ENOMEM;
1998	}
1999	buff_start = page_address(p);
2000
2001	/* Enable L3/L4 hardware checksum computation.
2002	 *
2003	 * We must do this before dma_map_single(DMA_TO_DEVICE), because we may
2004	 * need to write into the skb.
2005	 */
2006	err = dpaa_enable_tx_csum(priv, skb, fd,
2007				  buff_start + DPAA_TX_PRIV_DATA_SIZE);
2008	if (unlikely(err < 0)) {
2009		if (net_ratelimit())
2010			netif_err(priv, tx_err, net_dev, "HW csum error: %d\n",
2011				  err);
2012		goto csum_failed;
2013	}
2014
2015	/* SGT[0] is used by the linear part */
2016	sgt = (struct qm_sg_entry *)(buff_start + priv->tx_headroom);
2017	frag_len = skb_headlen(skb);
2018	qm_sg_entry_set_len(&sgt[0], frag_len);
2019	sgt[0].bpid = FSL_DPAA_BPID_INV;
2020	sgt[0].offset = 0;
2021	addr = dma_map_single(priv->tx_dma_dev, skb->data,
2022			      skb_headlen(skb), dma_dir);
2023	if (unlikely(dma_mapping_error(priv->tx_dma_dev, addr))) {
2024		netdev_err(priv->net_dev, "DMA mapping failed\n");
2025		err = -EINVAL;
2026		goto sg0_map_failed;
2027	}
2028	qm_sg_entry_set64(&sgt[0], addr);
2029
2030	/* populate the rest of SGT entries */
2031	for (i = 0; i < nr_frags; i++) {
2032		frag = &skb_shinfo(skb)->frags[i];
2033		frag_len = skb_frag_size(frag);
2034		WARN_ON(!skb_frag_page(frag));
2035		addr = skb_frag_dma_map(priv->tx_dma_dev, frag, 0,
2036					frag_len, dma_dir);
2037		if (unlikely(dma_mapping_error(priv->tx_dma_dev, addr))) {
2038			netdev_err(priv->net_dev, "DMA mapping failed\n");
2039			err = -EINVAL;
2040			goto sg_map_failed;
2041		}
2042
2043		qm_sg_entry_set_len(&sgt[i + 1], frag_len);
2044		sgt[i + 1].bpid = FSL_DPAA_BPID_INV;
2045		sgt[i + 1].offset = 0;
2046
2047		/* keep the offset in the address */
2048		qm_sg_entry_set64(&sgt[i + 1], addr);
2049	}
2050
2051	/* Set the final bit in the last used entry of the SGT */
2052	qm_sg_entry_set_f(&sgt[nr_frags], frag_len);
2053
2054	/* set fd offset to priv->tx_headroom */
2055	qm_fd_set_sg(fd, priv->tx_headroom, skb->len);
2056
2057	/* DMA map the SGT page */
2058	swbp = (struct dpaa_eth_swbp *)buff_start;
2059	swbp->skb = skb;
2060
2061	addr = dma_map_page(priv->tx_dma_dev, p, 0,
2062			    priv->tx_headroom + DPAA_SGT_SIZE, dma_dir);
2063	if (unlikely(dma_mapping_error(priv->tx_dma_dev, addr))) {
2064		netdev_err(priv->net_dev, "DMA mapping failed\n");
2065		err = -EINVAL;
2066		goto sgt_map_failed;
2067	}
2068
2069	fd->bpid = FSL_DPAA_BPID_INV;
2070	fd->cmd |= cpu_to_be32(FM_FD_CMD_FCO);
2071	qm_fd_addr_set64(fd, addr);
2072
2073	return 0;
2074
2075sgt_map_failed:
2076sg_map_failed:
2077	for (j = 0; j < i; j++)
2078		dma_unmap_page(priv->tx_dma_dev, qm_sg_addr(&sgt[j]),
2079			       qm_sg_entry_get_len(&sgt[j]), dma_dir);
2080sg0_map_failed:
2081csum_failed:
2082	free_pages((unsigned long)buff_start, 0);
2083
2084	return err;
2085}
2086
2087static inline int dpaa_xmit(struct dpaa_priv *priv,
2088			    struct rtnl_link_stats64 *percpu_stats,
2089			    int queue,
2090			    struct qm_fd *fd)
2091{
2092	struct qman_fq *egress_fq;
2093	int err, i;
2094
2095	egress_fq = priv->egress_fqs[queue];
2096	if (fd->bpid == FSL_DPAA_BPID_INV)
2097		fd->cmd |= cpu_to_be32(qman_fq_fqid(priv->conf_fqs[queue]));
2098
2099	/* Trace this Tx fd */
2100	trace_dpaa_tx_fd(priv->net_dev, egress_fq, fd);
2101
2102	for (i = 0; i < DPAA_ENQUEUE_RETRIES; i++) {
2103		err = qman_enqueue(egress_fq, fd);
2104		if (err != -EBUSY)
2105			break;
2106	}
2107
2108	if (unlikely(err < 0)) {
2109		percpu_stats->tx_fifo_errors++;
2110		return err;
2111	}
2112
2113	percpu_stats->tx_packets++;
2114	percpu_stats->tx_bytes += qm_fd_get_length(fd);
2115
2116	return 0;
2117}
2118
2119#ifdef CONFIG_DPAA_ERRATUM_A050385
2120static int dpaa_a050385_wa_skb(struct net_device *net_dev, struct sk_buff **s)
2121{
2122	struct dpaa_priv *priv = netdev_priv(net_dev);
2123	struct sk_buff *new_skb, *skb = *s;
2124	unsigned char *start, i;
2125
2126	/* check linear buffer alignment */
2127	if (!PTR_IS_ALIGNED(skb->data, DPAA_A050385_ALIGN))
2128		goto workaround;
2129
2130	/* linear buffers just need to have an aligned start */
2131	if (!skb_is_nonlinear(skb))
2132		return 0;
2133
2134	/* linear data size for nonlinear skbs needs to be aligned */
2135	if (!IS_ALIGNED(skb_headlen(skb), DPAA_A050385_ALIGN))
2136		goto workaround;
2137
2138	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
2139		skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
2140
2141		/* all fragments need to have aligned start addresses */
2142		if (!IS_ALIGNED(skb_frag_off(frag), DPAA_A050385_ALIGN))
2143			goto workaround;
2144
2145		/* all but last fragment need to have aligned sizes */
2146		if (!IS_ALIGNED(skb_frag_size(frag), DPAA_A050385_ALIGN) &&
2147		    (i < skb_shinfo(skb)->nr_frags - 1))
2148			goto workaround;
2149	}
2150
2151	return 0;
2152
2153workaround:
2154	/* copy all the skb content into a new linear buffer */
2155	new_skb = netdev_alloc_skb(net_dev, skb->len + DPAA_A050385_ALIGN - 1 +
2156						priv->tx_headroom);
2157	if (!new_skb)
2158		return -ENOMEM;
2159
2160	/* NET_SKB_PAD bytes already reserved, adding up to tx_headroom */
2161	skb_reserve(new_skb, priv->tx_headroom - NET_SKB_PAD);
2162
2163	/* Workaround for DPAA_A050385 requires data start to be aligned */
2164	start = PTR_ALIGN(new_skb->data, DPAA_A050385_ALIGN);
2165	if (start - new_skb->data)
2166		skb_reserve(new_skb, start - new_skb->data);
2167
2168	skb_put(new_skb, skb->len);
2169	skb_copy_bits(skb, 0, new_skb->data, skb->len);
2170	skb_copy_header(new_skb, skb);
2171	new_skb->dev = skb->dev;
2172
2173	/* Copy relevant timestamp info from the old skb to the new */
2174	if (priv->tx_tstamp) {
2175		skb_shinfo(new_skb)->tx_flags = skb_shinfo(skb)->tx_flags;
2176		skb_shinfo(new_skb)->hwtstamps = skb_shinfo(skb)->hwtstamps;
2177		skb_shinfo(new_skb)->tskey = skb_shinfo(skb)->tskey;
2178		if (skb->sk)
2179			skb_set_owner_w(new_skb, skb->sk);
2180	}
2181
2182	/* We move the headroom when we align it so we have to reset the
2183	 * network and transport header offsets relative to the new data
2184	 * pointer. The checksum offload relies on these offsets.
2185	 */
2186	skb_set_network_header(new_skb, skb_network_offset(skb));
2187	skb_set_transport_header(new_skb, skb_transport_offset(skb));
2188
2189	dev_kfree_skb(skb);
2190	*s = new_skb;
2191
2192	return 0;
2193}
2194
2195static int dpaa_a050385_wa_xdpf(struct dpaa_priv *priv,
2196				struct xdp_frame **init_xdpf)
2197{
2198	struct xdp_frame *new_xdpf, *xdpf = *init_xdpf;
2199	void *new_buff, *aligned_data;
2200	struct page *p;
2201	u32 data_shift;
2202	int headroom;
2203
2204	/* Check the data alignment and make sure the headroom is large
2205	 * enough to store the xdpf backpointer. Use an aligned headroom
2206	 * value.
2207	 *
2208	 * Due to alignment constraints, we give XDP access to the full 256
2209	 * byte frame headroom. If the XDP program uses all of it, copy the
2210	 * data to a new buffer and make room for storing the backpointer.
2211	 */
2212	if (PTR_IS_ALIGNED(xdpf->data, DPAA_FD_DATA_ALIGNMENT) &&
2213	    xdpf->headroom >= priv->tx_headroom) {
2214		xdpf->headroom = priv->tx_headroom;
2215		return 0;
2216	}
2217
2218	/* Try to move the data inside the buffer just enough to align it and
2219	 * store the xdpf backpointer. If the available headroom isn't large
2220	 * enough, resort to allocating a new buffer and copying the data.
2221	 */
2222	aligned_data = PTR_ALIGN_DOWN(xdpf->data, DPAA_FD_DATA_ALIGNMENT);
2223	data_shift = xdpf->data - aligned_data;
2224
2225	/* The XDP frame's headroom needs to be large enough to accommodate
2226	 * shifting the data as well as storing the xdpf backpointer.
2227	 */
2228	if (xdpf->headroom  >= data_shift + priv->tx_headroom) {
2229		memmove(aligned_data, xdpf->data, xdpf->len);
2230		xdpf->data = aligned_data;
2231		xdpf->headroom = priv->tx_headroom;
2232		return 0;
2233	}
2234
2235	/* The new xdp_frame is stored in the new buffer. Reserve enough space
2236	 * in the headroom for storing it along with the driver's private
2237	 * info. The headroom needs to be aligned to DPAA_FD_DATA_ALIGNMENT to
2238	 * guarantee the data's alignment in the buffer.
2239	 */
2240	headroom = ALIGN(sizeof(*new_xdpf) + priv->tx_headroom,
2241			 DPAA_FD_DATA_ALIGNMENT);
2242
2243	/* Assure the extended headroom and data don't overflow the buffer,
2244	 * while maintaining the mandatory tailroom.
2245	 */
2246	if (headroom + xdpf->len > DPAA_BP_RAW_SIZE -
2247			SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
2248		return -ENOMEM;
2249
2250	p = dev_alloc_pages(0);
2251	if (unlikely(!p))
2252		return -ENOMEM;
2253
2254	/* Copy the data to the new buffer at a properly aligned offset */
2255	new_buff = page_address(p);
2256	memcpy(new_buff + headroom, xdpf->data, xdpf->len);
2257
2258	/* Create an XDP frame around the new buffer in a similar fashion
2259	 * to xdp_convert_buff_to_frame.
2260	 */
2261	new_xdpf = new_buff;
2262	new_xdpf->data = new_buff + headroom;
2263	new_xdpf->len = xdpf->len;
2264	new_xdpf->headroom = priv->tx_headroom;
2265	new_xdpf->frame_sz = DPAA_BP_RAW_SIZE;
2266	new_xdpf->mem.type = MEM_TYPE_PAGE_ORDER0;
2267
2268	/* Release the initial buffer */
2269	xdp_return_frame_rx_napi(xdpf);
2270
2271	*init_xdpf = new_xdpf;
2272	return 0;
2273}
2274#endif
2275
2276static netdev_tx_t
2277dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
2278{
2279	const int queue_mapping = skb_get_queue_mapping(skb);
2280	bool nonlinear = skb_is_nonlinear(skb);
2281	struct rtnl_link_stats64 *percpu_stats;
2282	struct dpaa_percpu_priv *percpu_priv;
2283	struct netdev_queue *txq;
2284	struct dpaa_priv *priv;
2285	struct qm_fd fd;
2286	int offset = 0;
2287	int err = 0;
2288
2289	priv = netdev_priv(net_dev);
2290	percpu_priv = this_cpu_ptr(priv->percpu_priv);
2291	percpu_stats = &percpu_priv->stats;
2292
2293	qm_fd_clear_fd(&fd);
2294
2295	if (!nonlinear) {
2296		/* We're going to store the skb backpointer at the beginning
2297		 * of the data buffer, so we need a privately owned skb
2298		 *
2299		 * We've made sure skb is not shared in dev->priv_flags,
2300		 * we need to verify the skb head is not cloned
2301		 */
2302		if (skb_cow_head(skb, priv->tx_headroom))
2303			goto enomem;
2304
2305		WARN_ON(skb_is_nonlinear(skb));
2306	}
2307
2308	/* MAX_SKB_FRAGS is equal or larger than our dpaa_SGT_MAX_ENTRIES;
2309	 * make sure we don't feed FMan with more fragments than it supports.
2310	 */
2311	if (unlikely(nonlinear &&
2312		     (skb_shinfo(skb)->nr_frags >= DPAA_SGT_MAX_ENTRIES))) {
2313		/* If the egress skb contains more fragments than we support
2314		 * we have no choice but to linearize it ourselves.
2315		 */
2316		if (__skb_linearize(skb))
2317			goto enomem;
2318
2319		nonlinear = skb_is_nonlinear(skb);
2320	}
2321
2322#ifdef CONFIG_DPAA_ERRATUM_A050385
2323	if (unlikely(fman_has_errata_a050385())) {
2324		if (dpaa_a050385_wa_skb(net_dev, &skb))
2325			goto enomem;
2326		nonlinear = skb_is_nonlinear(skb);
2327	}
2328#endif
2329
2330	if (nonlinear) {
2331		/* Just create a S/G fd based on the skb */
2332		err = skb_to_sg_fd(priv, skb, &fd);
2333		percpu_priv->tx_frag_skbuffs++;
2334	} else {
2335		/* Create a contig FD from this skb */
2336		err = skb_to_contig_fd(priv, skb, &fd, &offset);
2337	}
2338	if (unlikely(err < 0))
2339		goto skb_to_fd_failed;
2340
2341	txq = netdev_get_tx_queue(net_dev, queue_mapping);
2342
2343	/* LLTX requires to do our own update of trans_start */
2344	txq_trans_cond_update(txq);
2345
2346	if (priv->tx_tstamp && skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
2347		fd.cmd |= cpu_to_be32(FM_FD_CMD_UPD);
2348		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
2349	}
2350
2351	if (likely(dpaa_xmit(priv, percpu_stats, queue_mapping, &fd) == 0))
2352		return NETDEV_TX_OK;
2353
2354	dpaa_cleanup_tx_fd(priv, &fd, false);
2355skb_to_fd_failed:
2356enomem:
2357	percpu_stats->tx_errors++;
2358	dev_kfree_skb(skb);
2359	return NETDEV_TX_OK;
2360}
2361
2362static void dpaa_rx_error(struct net_device *net_dev,
2363			  const struct dpaa_priv *priv,
2364			  struct dpaa_percpu_priv *percpu_priv,
2365			  const struct qm_fd *fd,
2366			  u32 fqid)
2367{
2368	if (net_ratelimit())
2369		netif_err(priv, hw, net_dev, "Err FD status = 0x%08x\n",
2370			  be32_to_cpu(fd->status) & FM_FD_STAT_RX_ERRORS);
2371
2372	percpu_priv->stats.rx_errors++;
2373
2374	if (be32_to_cpu(fd->status) & FM_FD_ERR_DMA)
2375		percpu_priv->rx_errors.dme++;
2376	if (be32_to_cpu(fd->status) & FM_FD_ERR_PHYSICAL)
2377		percpu_priv->rx_errors.fpe++;
2378	if (be32_to_cpu(fd->status) & FM_FD_ERR_SIZE)
2379		percpu_priv->rx_errors.fse++;
2380	if (be32_to_cpu(fd->status) & FM_FD_ERR_PRS_HDR_ERR)
2381		percpu_priv->rx_errors.phe++;
2382
2383	dpaa_fd_release(net_dev, fd);
2384}
2385
2386static void dpaa_tx_error(struct net_device *net_dev,
2387			  const struct dpaa_priv *priv,
2388			  struct dpaa_percpu_priv *percpu_priv,
2389			  const struct qm_fd *fd,
2390			  u32 fqid)
2391{
2392	struct sk_buff *skb;
2393
2394	if (net_ratelimit())
2395		netif_warn(priv, hw, net_dev, "FD status = 0x%08x\n",
2396			   be32_to_cpu(fd->status) & FM_FD_STAT_TX_ERRORS);
2397
2398	percpu_priv->stats.tx_errors++;
2399
2400	skb = dpaa_cleanup_tx_fd(priv, fd, false);
2401	dev_kfree_skb(skb);
2402}
2403
2404static int dpaa_eth_poll(struct napi_struct *napi, int budget)
2405{
2406	struct dpaa_napi_portal *np =
2407			container_of(napi, struct dpaa_napi_portal, napi);
2408	int cleaned;
2409
2410	np->xdp_act = 0;
2411
2412	cleaned = qman_p_poll_dqrr(np->p, budget);
2413
2414	if (np->xdp_act & XDP_REDIRECT)
2415		xdp_do_flush();
2416
2417	if (cleaned < budget) {
2418		napi_complete_done(napi, cleaned);
2419		qman_p_irqsource_add(np->p, QM_PIRQ_DQRI);
2420	} else if (np->down) {
2421		qman_p_irqsource_add(np->p, QM_PIRQ_DQRI);
2422	}
2423
2424	return cleaned;
2425}
2426
2427static void dpaa_tx_conf(struct net_device *net_dev,
2428			 const struct dpaa_priv *priv,
2429			 struct dpaa_percpu_priv *percpu_priv,
2430			 const struct qm_fd *fd,
2431			 u32 fqid)
2432{
2433	struct sk_buff	*skb;
2434
2435	if (unlikely(be32_to_cpu(fd->status) & FM_FD_STAT_TX_ERRORS)) {
2436		if (net_ratelimit())
2437			netif_warn(priv, hw, net_dev, "FD status = 0x%08x\n",
2438				   be32_to_cpu(fd->status) &
2439				   FM_FD_STAT_TX_ERRORS);
2440
2441		percpu_priv->stats.tx_errors++;
2442	}
2443
2444	percpu_priv->tx_confirm++;
2445
2446	skb = dpaa_cleanup_tx_fd(priv, fd, true);
2447
2448	consume_skb(skb);
2449}
2450
2451static inline int dpaa_eth_napi_schedule(struct dpaa_percpu_priv *percpu_priv,
2452					 struct qman_portal *portal, bool sched_napi)
2453{
2454	if (sched_napi) {
2455		/* Disable QMan IRQ and invoke NAPI */
2456		qman_p_irqsource_remove(portal, QM_PIRQ_DQRI);
2457
2458		percpu_priv->np.p = portal;
2459		napi_schedule(&percpu_priv->np.napi);
2460		percpu_priv->in_interrupt++;
2461		return 1;
2462	}
2463	return 0;
2464}
2465
2466static enum qman_cb_dqrr_result rx_error_dqrr(struct qman_portal *portal,
2467					      struct qman_fq *fq,
2468					      const struct qm_dqrr_entry *dq,
2469					      bool sched_napi)
2470{
2471	struct dpaa_fq *dpaa_fq = container_of(fq, struct dpaa_fq, fq_base);
2472	struct dpaa_percpu_priv *percpu_priv;
2473	struct net_device *net_dev;
2474	struct dpaa_bp *dpaa_bp;
2475	struct dpaa_priv *priv;
2476
2477	net_dev = dpaa_fq->net_dev;
2478	priv = netdev_priv(net_dev);
2479	dpaa_bp = dpaa_bpid2pool(dq->fd.bpid);
2480	if (!dpaa_bp)
2481		return qman_cb_dqrr_consume;
2482
2483	percpu_priv = this_cpu_ptr(priv->percpu_priv);
2484
2485	if (dpaa_eth_napi_schedule(percpu_priv, portal, sched_napi))
2486		return qman_cb_dqrr_stop;
2487
2488	dpaa_eth_refill_bpools(priv);
2489	dpaa_rx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
2490
2491	return qman_cb_dqrr_consume;
2492}
2493
2494static int dpaa_xdp_xmit_frame(struct net_device *net_dev,
2495			       struct xdp_frame *xdpf)
2496{
2497	struct dpaa_priv *priv = netdev_priv(net_dev);
2498	struct rtnl_link_stats64 *percpu_stats;
2499	struct dpaa_percpu_priv *percpu_priv;
2500	struct dpaa_eth_swbp *swbp;
2501	struct netdev_queue *txq;
2502	void *buff_start;
2503	struct qm_fd fd;
2504	dma_addr_t addr;
2505	int err;
2506
2507	percpu_priv = this_cpu_ptr(priv->percpu_priv);
2508	percpu_stats = &percpu_priv->stats;
2509
2510#ifdef CONFIG_DPAA_ERRATUM_A050385
2511	if (unlikely(fman_has_errata_a050385())) {
2512		if (dpaa_a050385_wa_xdpf(priv, &xdpf)) {
2513			err = -ENOMEM;
2514			goto out_error;
2515		}
2516	}
2517#endif
2518
2519	if (xdpf->headroom < DPAA_TX_PRIV_DATA_SIZE) {
2520		err = -EINVAL;
2521		goto out_error;
2522	}
2523
2524	buff_start = xdpf->data - xdpf->headroom;
2525
2526	/* Leave empty the skb backpointer at the start of the buffer.
2527	 * Save the XDP frame for easy cleanup on confirmation.
2528	 */
2529	swbp = (struct dpaa_eth_swbp *)buff_start;
2530	swbp->skb = NULL;
2531	swbp->xdpf = xdpf;
2532
2533	qm_fd_clear_fd(&fd);
2534	fd.bpid = FSL_DPAA_BPID_INV;
2535	fd.cmd |= cpu_to_be32(FM_FD_CMD_FCO);
2536	qm_fd_set_contig(&fd, xdpf->headroom, xdpf->len);
2537
2538	addr = dma_map_single(priv->tx_dma_dev, buff_start,
2539			      xdpf->headroom + xdpf->len,
2540			      DMA_TO_DEVICE);
2541	if (unlikely(dma_mapping_error(priv->tx_dma_dev, addr))) {
2542		err = -EINVAL;
2543		goto out_error;
2544	}
2545
2546	qm_fd_addr_set64(&fd, addr);
2547
2548	/* Bump the trans_start */
2549	txq = netdev_get_tx_queue(net_dev, smp_processor_id());
2550	txq_trans_cond_update(txq);
2551
2552	err = dpaa_xmit(priv, percpu_stats, smp_processor_id(), &fd);
2553	if (err) {
2554		dma_unmap_single(priv->tx_dma_dev, addr,
2555				 qm_fd_get_offset(&fd) + qm_fd_get_length(&fd),
2556				 DMA_TO_DEVICE);
2557		goto out_error;
2558	}
2559
2560	return 0;
2561
2562out_error:
2563	percpu_stats->tx_errors++;
2564	return err;
2565}
2566
2567static u32 dpaa_run_xdp(struct dpaa_priv *priv, struct qm_fd *fd, void *vaddr,
2568			struct dpaa_fq *dpaa_fq, unsigned int *xdp_meta_len)
2569{
2570	ssize_t fd_off = qm_fd_get_offset(fd);
2571	struct bpf_prog *xdp_prog;
2572	struct xdp_frame *xdpf;
2573	struct xdp_buff xdp;
2574	u32 xdp_act;
2575	int err;
2576
2577	xdp_prog = READ_ONCE(priv->xdp_prog);
2578	if (!xdp_prog)
2579		return XDP_PASS;
2580
2581	xdp_init_buff(&xdp, DPAA_BP_RAW_SIZE - DPAA_TX_PRIV_DATA_SIZE,
2582		      &dpaa_fq->xdp_rxq);
2583	xdp_prepare_buff(&xdp, vaddr + fd_off - XDP_PACKET_HEADROOM,
2584			 XDP_PACKET_HEADROOM, qm_fd_get_length(fd), true);
2585
2586	/* We reserve a fixed headroom of 256 bytes under the erratum and we
2587	 * offer it all to XDP programs to use. If no room is left for the
2588	 * xdpf backpointer on TX, we will need to copy the data.
2589	 * Disable metadata support since data realignments might be required
2590	 * and the information can be lost.
2591	 */
2592#ifdef CONFIG_DPAA_ERRATUM_A050385
2593	if (unlikely(fman_has_errata_a050385())) {
2594		xdp_set_data_meta_invalid(&xdp);
2595		xdp.data_hard_start = vaddr;
2596		xdp.frame_sz = DPAA_BP_RAW_SIZE;
2597	}
2598#endif
2599
2600	xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp);
2601
2602	/* Update the length and the offset of the FD */
2603	qm_fd_set_contig(fd, xdp.data - vaddr, xdp.data_end - xdp.data);
2604
2605	switch (xdp_act) {
2606	case XDP_PASS:
2607#ifdef CONFIG_DPAA_ERRATUM_A050385
2608		*xdp_meta_len = xdp_data_meta_unsupported(&xdp) ? 0 :
2609				xdp.data - xdp.data_meta;
2610#else
2611		*xdp_meta_len = xdp.data - xdp.data_meta;
2612#endif
2613		break;
2614	case XDP_TX:
2615		/* We can access the full headroom when sending the frame
2616		 * back out
2617		 */
2618		xdp.data_hard_start = vaddr;
2619		xdp.frame_sz = DPAA_BP_RAW_SIZE;
2620		xdpf = xdp_convert_buff_to_frame(&xdp);
2621		if (unlikely(!xdpf)) {
2622			free_pages((unsigned long)vaddr, 0);
2623			break;
2624		}
2625
2626		if (dpaa_xdp_xmit_frame(priv->net_dev, xdpf))
2627			xdp_return_frame_rx_napi(xdpf);
2628
2629		break;
2630	case XDP_REDIRECT:
2631		/* Allow redirect to use the full headroom */
2632		xdp.data_hard_start = vaddr;
2633		xdp.frame_sz = DPAA_BP_RAW_SIZE;
2634
2635		err = xdp_do_redirect(priv->net_dev, &xdp, xdp_prog);
2636		if (err) {
2637			trace_xdp_exception(priv->net_dev, xdp_prog, xdp_act);
2638			free_pages((unsigned long)vaddr, 0);
2639		}
2640		break;
2641	default:
2642		bpf_warn_invalid_xdp_action(priv->net_dev, xdp_prog, xdp_act);
2643		fallthrough;
2644	case XDP_ABORTED:
2645		trace_xdp_exception(priv->net_dev, xdp_prog, xdp_act);
2646		fallthrough;
2647	case XDP_DROP:
2648		/* Free the buffer */
2649		free_pages((unsigned long)vaddr, 0);
2650		break;
2651	}
2652
2653	return xdp_act;
2654}
2655
2656static enum qman_cb_dqrr_result rx_default_dqrr(struct qman_portal *portal,
2657						struct qman_fq *fq,
2658						const struct qm_dqrr_entry *dq,
2659						bool sched_napi)
2660{
2661	bool ts_valid = false, hash_valid = false;
2662	struct skb_shared_hwtstamps *shhwtstamps;
2663	unsigned int skb_len, xdp_meta_len = 0;
2664	struct rtnl_link_stats64 *percpu_stats;
2665	struct dpaa_percpu_priv *percpu_priv;
2666	const struct qm_fd *fd = &dq->fd;
2667	dma_addr_t addr = qm_fd_addr(fd);
2668	struct dpaa_napi_portal *np;
2669	enum qm_fd_format fd_format;
2670	struct net_device *net_dev;
2671	u32 fd_status, hash_offset;
2672	struct qm_sg_entry *sgt;
2673	struct dpaa_bp *dpaa_bp;
2674	struct dpaa_fq *dpaa_fq;
2675	struct dpaa_priv *priv;
2676	struct sk_buff *skb;
2677	int *count_ptr;
2678	u32 xdp_act;
2679	void *vaddr;
2680	u32 hash;
2681	u64 ns;
2682
2683	dpaa_fq = container_of(fq, struct dpaa_fq, fq_base);
2684	fd_status = be32_to_cpu(fd->status);
2685	fd_format = qm_fd_get_format(fd);
2686	net_dev = dpaa_fq->net_dev;
2687	priv = netdev_priv(net_dev);
2688	dpaa_bp = dpaa_bpid2pool(dq->fd.bpid);
2689	if (!dpaa_bp)
2690		return qman_cb_dqrr_consume;
2691
2692	/* Trace the Rx fd */
2693	trace_dpaa_rx_fd(net_dev, fq, &dq->fd);
2694
2695	percpu_priv = this_cpu_ptr(priv->percpu_priv);
2696	percpu_stats = &percpu_priv->stats;
2697	np = &percpu_priv->np;
2698
2699	if (unlikely(dpaa_eth_napi_schedule(percpu_priv, portal, sched_napi)))
2700		return qman_cb_dqrr_stop;
2701
2702	/* Make sure we didn't run out of buffers */
2703	if (unlikely(dpaa_eth_refill_bpools(priv))) {
2704		/* Unable to refill the buffer pool due to insufficient
2705		 * system memory. Just release the frame back into the pool,
2706		 * otherwise we'll soon end up with an empty buffer pool.
2707		 */
2708		dpaa_fd_release(net_dev, &dq->fd);
2709		return qman_cb_dqrr_consume;
2710	}
2711
2712	if (unlikely(fd_status & FM_FD_STAT_RX_ERRORS) != 0) {
2713		if (net_ratelimit())
2714			netif_warn(priv, hw, net_dev, "FD status = 0x%08x\n",
2715				   fd_status & FM_FD_STAT_RX_ERRORS);
2716
2717		percpu_stats->rx_errors++;
2718		dpaa_fd_release(net_dev, fd);
2719		return qman_cb_dqrr_consume;
2720	}
2721
2722	dma_unmap_page(dpaa_bp->priv->rx_dma_dev, addr, DPAA_BP_RAW_SIZE,
2723		       DMA_FROM_DEVICE);
2724
2725	/* prefetch the first 64 bytes of the frame or the SGT start */
2726	vaddr = phys_to_virt(addr);
2727	prefetch(vaddr + qm_fd_get_offset(fd));
2728
2729	/* The only FD types that we may receive are contig and S/G */
2730	WARN_ON((fd_format != qm_fd_contig) && (fd_format != qm_fd_sg));
2731
2732	/* Account for either the contig buffer or the SGT buffer (depending on
2733	 * which case we were in) having been removed from the pool.
2734	 */
2735	count_ptr = this_cpu_ptr(dpaa_bp->percpu_count);
2736	(*count_ptr)--;
2737
2738	/* Extract the timestamp stored in the headroom before running XDP */
2739	if (priv->rx_tstamp) {
2740		if (!fman_port_get_tstamp(priv->mac_dev->port[RX], vaddr, &ns))
2741			ts_valid = true;
2742		else
2743			WARN_ONCE(1, "fman_port_get_tstamp failed!\n");
2744	}
2745
2746	/* Extract the hash stored in the headroom before running XDP */
2747	if (net_dev->features & NETIF_F_RXHASH && priv->keygen_in_use &&
2748	    !fman_port_get_hash_result_offset(priv->mac_dev->port[RX],
2749					      &hash_offset)) {
2750		hash = be32_to_cpu(*(u32 *)(vaddr + hash_offset));
2751		hash_valid = true;
2752	}
2753
2754	if (likely(fd_format == qm_fd_contig)) {
2755		xdp_act = dpaa_run_xdp(priv, (struct qm_fd *)fd, vaddr,
2756				       dpaa_fq, &xdp_meta_len);
2757		np->xdp_act |= xdp_act;
2758		if (xdp_act != XDP_PASS) {
2759			percpu_stats->rx_packets++;
2760			percpu_stats->rx_bytes += qm_fd_get_length(fd);
2761			return qman_cb_dqrr_consume;
2762		}
2763		skb = contig_fd_to_skb(priv, fd);
2764	} else {
2765		/* XDP doesn't support S/G frames. Return the fragments to the
2766		 * buffer pool and release the SGT.
2767		 */
2768		if (READ_ONCE(priv->xdp_prog)) {
2769			WARN_ONCE(1, "S/G frames not supported under XDP\n");
2770			sgt = vaddr + qm_fd_get_offset(fd);
2771			dpaa_release_sgt_members(sgt);
2772			free_pages((unsigned long)vaddr, 0);
2773			return qman_cb_dqrr_consume;
2774		}
2775		skb = sg_fd_to_skb(priv, fd);
2776	}
2777	if (!skb)
2778		return qman_cb_dqrr_consume;
2779
2780	if (xdp_meta_len)
2781		skb_metadata_set(skb, xdp_meta_len);
2782
2783	/* Set the previously extracted timestamp */
2784	if (ts_valid) {
2785		shhwtstamps = skb_hwtstamps(skb);
2786		memset(shhwtstamps, 0, sizeof(*shhwtstamps));
2787		shhwtstamps->hwtstamp = ns_to_ktime(ns);
2788	}
2789
2790	skb->protocol = eth_type_trans(skb, net_dev);
2791
2792	/* Set the previously extracted hash */
2793	if (hash_valid) {
2794		enum pkt_hash_types type;
2795
2796		/* if L4 exists, it was used in the hash generation */
2797		type = be32_to_cpu(fd->status) & FM_FD_STAT_L4CV ?
2798			PKT_HASH_TYPE_L4 : PKT_HASH_TYPE_L3;
2799		skb_set_hash(skb, hash, type);
2800	}
2801
2802	skb_len = skb->len;
2803
2804	if (unlikely(netif_receive_skb(skb) == NET_RX_DROP)) {
2805		percpu_stats->rx_dropped++;
2806		return qman_cb_dqrr_consume;
2807	}
2808
2809	percpu_stats->rx_packets++;
2810	percpu_stats->rx_bytes += skb_len;
2811
2812	return qman_cb_dqrr_consume;
2813}
2814
2815static enum qman_cb_dqrr_result conf_error_dqrr(struct qman_portal *portal,
2816						struct qman_fq *fq,
2817						const struct qm_dqrr_entry *dq,
2818						bool sched_napi)
2819{
2820	struct dpaa_percpu_priv *percpu_priv;
2821	struct net_device *net_dev;
2822	struct dpaa_priv *priv;
2823
2824	net_dev = ((struct dpaa_fq *)fq)->net_dev;
2825	priv = netdev_priv(net_dev);
2826
2827	percpu_priv = this_cpu_ptr(priv->percpu_priv);
2828
2829	if (dpaa_eth_napi_schedule(percpu_priv, portal, sched_napi))
2830		return qman_cb_dqrr_stop;
2831
2832	dpaa_tx_error(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
2833
2834	return qman_cb_dqrr_consume;
2835}
2836
2837static enum qman_cb_dqrr_result conf_dflt_dqrr(struct qman_portal *portal,
2838					       struct qman_fq *fq,
2839					       const struct qm_dqrr_entry *dq,
2840					       bool sched_napi)
2841{
2842	struct dpaa_percpu_priv *percpu_priv;
2843	struct net_device *net_dev;
2844	struct dpaa_priv *priv;
2845
2846	net_dev = ((struct dpaa_fq *)fq)->net_dev;
2847	priv = netdev_priv(net_dev);
2848
2849	/* Trace the fd */
2850	trace_dpaa_tx_conf_fd(net_dev, fq, &dq->fd);
2851
2852	percpu_priv = this_cpu_ptr(priv->percpu_priv);
2853
2854	if (dpaa_eth_napi_schedule(percpu_priv, portal, sched_napi))
2855		return qman_cb_dqrr_stop;
2856
2857	dpaa_tx_conf(net_dev, priv, percpu_priv, &dq->fd, fq->fqid);
2858
2859	return qman_cb_dqrr_consume;
2860}
2861
2862static void egress_ern(struct qman_portal *portal,
2863		       struct qman_fq *fq,
2864		       const union qm_mr_entry *msg)
2865{
2866	const struct qm_fd *fd = &msg->ern.fd;
2867	struct dpaa_percpu_priv *percpu_priv;
2868	const struct dpaa_priv *priv;
2869	struct net_device *net_dev;
2870	struct sk_buff *skb;
2871
2872	net_dev = ((struct dpaa_fq *)fq)->net_dev;
2873	priv = netdev_priv(net_dev);
2874	percpu_priv = this_cpu_ptr(priv->percpu_priv);
2875
2876	percpu_priv->stats.tx_dropped++;
2877	percpu_priv->stats.tx_fifo_errors++;
2878	count_ern(percpu_priv, msg);
2879
2880	skb = dpaa_cleanup_tx_fd(priv, fd, false);
2881	dev_kfree_skb_any(skb);
2882}
2883
2884static const struct dpaa_fq_cbs dpaa_fq_cbs = {
2885	.rx_defq = { .cb = { .dqrr = rx_default_dqrr } },
2886	.tx_defq = { .cb = { .dqrr = conf_dflt_dqrr } },
2887	.rx_errq = { .cb = { .dqrr = rx_error_dqrr } },
2888	.tx_errq = { .cb = { .dqrr = conf_error_dqrr } },
2889	.egress_ern = { .cb = { .ern = egress_ern } }
2890};
2891
2892static void dpaa_eth_napi_enable(struct dpaa_priv *priv)
2893{
2894	struct dpaa_percpu_priv *percpu_priv;
2895	int i;
2896
2897	for_each_online_cpu(i) {
2898		percpu_priv = per_cpu_ptr(priv->percpu_priv, i);
2899
2900		percpu_priv->np.down = false;
2901		napi_enable(&percpu_priv->np.napi);
2902	}
2903}
2904
2905static void dpaa_eth_napi_disable(struct dpaa_priv *priv)
2906{
2907	struct dpaa_percpu_priv *percpu_priv;
2908	int i;
2909
2910	for_each_online_cpu(i) {
2911		percpu_priv = per_cpu_ptr(priv->percpu_priv, i);
2912
2913		percpu_priv->np.down = true;
2914		napi_disable(&percpu_priv->np.napi);
2915	}
2916}
2917
2918static int dpaa_open(struct net_device *net_dev)
2919{
2920	struct mac_device *mac_dev;
2921	struct dpaa_priv *priv;
2922	int err, i;
2923
2924	priv = netdev_priv(net_dev);
2925	mac_dev = priv->mac_dev;
2926	dpaa_eth_napi_enable(priv);
2927
2928	err = phylink_of_phy_connect(mac_dev->phylink,
2929				     mac_dev->dev->of_node, 0);
2930	if (err)
2931		goto phy_init_failed;
2932
2933	for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++) {
2934		err = fman_port_enable(mac_dev->port[i]);
2935		if (err)
2936			goto mac_start_failed;
2937	}
2938
2939	err = priv->mac_dev->enable(mac_dev->fman_mac);
2940	if (err < 0) {
2941		netif_err(priv, ifup, net_dev, "mac_dev->enable() = %d\n", err);
2942		goto mac_start_failed;
2943	}
2944	phylink_start(mac_dev->phylink);
2945
2946	netif_tx_start_all_queues(net_dev);
2947
2948	return 0;
2949
2950mac_start_failed:
2951	for (i = 0; i < ARRAY_SIZE(mac_dev->port); i++)
2952		fman_port_disable(mac_dev->port[i]);
2953	phylink_disconnect_phy(mac_dev->phylink);
2954
2955phy_init_failed:
2956	dpaa_eth_napi_disable(priv);
2957
2958	return err;
2959}
2960
2961static int dpaa_eth_stop(struct net_device *net_dev)
2962{
2963	struct dpaa_priv *priv;
2964	int err;
2965
2966	err = dpaa_stop(net_dev);
2967
2968	priv = netdev_priv(net_dev);
2969	dpaa_eth_napi_disable(priv);
2970
2971	return err;
2972}
2973
2974static bool xdp_validate_mtu(struct dpaa_priv *priv, int mtu)
2975{
2976	int max_contig_data = priv->dpaa_bp->size - priv->rx_headroom;
2977
2978	/* We do not support S/G fragments when XDP is enabled.
2979	 * Limit the MTU in relation to the buffer size.
2980	 */
2981	if (mtu + VLAN_ETH_HLEN + ETH_FCS_LEN > max_contig_data) {
2982		dev_warn(priv->net_dev->dev.parent,
2983			 "The maximum MTU for XDP is %d\n",
2984			 max_contig_data - VLAN_ETH_HLEN - ETH_FCS_LEN);
2985		return false;
2986	}
2987
2988	return true;
2989}
2990
2991static int dpaa_change_mtu(struct net_device *net_dev, int new_mtu)
2992{
2993	struct dpaa_priv *priv = netdev_priv(net_dev);
2994
2995	if (priv->xdp_prog && !xdp_validate_mtu(priv, new_mtu))
2996		return -EINVAL;
2997
2998	net_dev->mtu = new_mtu;
2999	return 0;
3000}
3001
3002static int dpaa_setup_xdp(struct net_device *net_dev, struct netdev_bpf *bpf)
3003{
3004	struct dpaa_priv *priv = netdev_priv(net_dev);
3005	struct bpf_prog *old_prog;
3006	int err;
3007	bool up;
3008
3009	/* S/G fragments are not supported in XDP-mode */
3010	if (bpf->prog && !xdp_validate_mtu(priv, net_dev->mtu)) {
3011		NL_SET_ERR_MSG_MOD(bpf->extack, "MTU too large for XDP");
3012		return -EINVAL;
3013	}
3014
3015	up = netif_running(net_dev);
3016
3017	if (up)
3018		dpaa_eth_stop(net_dev);
3019
3020	old_prog = xchg(&priv->xdp_prog, bpf->prog);
3021	if (old_prog)
3022		bpf_prog_put(old_prog);
3023
3024	if (up) {
3025		err = dpaa_open(net_dev);
3026		if (err) {
3027			NL_SET_ERR_MSG_MOD(bpf->extack, "dpaa_open() failed");
3028			return err;
3029		}
3030	}
3031
3032	return 0;
3033}
3034
3035static int dpaa_xdp(struct net_device *net_dev, struct netdev_bpf *xdp)
3036{
3037	switch (xdp->command) {
3038	case XDP_SETUP_PROG:
3039		return dpaa_setup_xdp(net_dev, xdp);
3040	default:
3041		return -EINVAL;
3042	}
3043}
3044
3045static int dpaa_xdp_xmit(struct net_device *net_dev, int n,
3046			 struct xdp_frame **frames, u32 flags)
3047{
3048	struct xdp_frame *xdpf;
3049	int i, nxmit = 0;
3050
3051	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
3052		return -EINVAL;
3053
3054	if (!netif_running(net_dev))
3055		return -ENETDOWN;
3056
3057	for (i = 0; i < n; i++) {
3058		xdpf = frames[i];
3059		if (dpaa_xdp_xmit_frame(net_dev, xdpf))
3060			break;
3061		nxmit++;
3062	}
3063
3064	return nxmit;
3065}
3066
3067static int dpaa_ts_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
3068{
3069	struct dpaa_priv *priv = netdev_priv(dev);
3070	struct hwtstamp_config config;
3071
3072	if (copy_from_user(&config, rq->ifr_data, sizeof(config)))
3073		return -EFAULT;
3074
3075	switch (config.tx_type) {
3076	case HWTSTAMP_TX_OFF:
3077		/* Couldn't disable rx/tx timestamping separately.
3078		 * Do nothing here.
3079		 */
3080		priv->tx_tstamp = false;
3081		break;
3082	case HWTSTAMP_TX_ON:
3083		priv->mac_dev->set_tstamp(priv->mac_dev->fman_mac, true);
3084		priv->tx_tstamp = true;
3085		break;
3086	default:
3087		return -ERANGE;
3088	}
3089
3090	if (config.rx_filter == HWTSTAMP_FILTER_NONE) {
3091		/* Couldn't disable rx/tx timestamping separately.
3092		 * Do nothing here.
3093		 */
3094		priv->rx_tstamp = false;
3095	} else {
3096		priv->mac_dev->set_tstamp(priv->mac_dev->fman_mac, true);
3097		priv->rx_tstamp = true;
3098		/* TS is set for all frame types, not only those requested */
3099		config.rx_filter = HWTSTAMP_FILTER_ALL;
3100	}
3101
3102	return copy_to_user(rq->ifr_data, &config, sizeof(config)) ?
3103			-EFAULT : 0;
3104}
3105
3106static int dpaa_ioctl(struct net_device *net_dev, struct ifreq *rq, int cmd)
3107{
3108	int ret = -EINVAL;
3109	struct dpaa_priv *priv = netdev_priv(net_dev);
3110
3111	if (cmd == SIOCGMIIREG) {
3112		if (net_dev->phydev)
3113			return phylink_mii_ioctl(priv->mac_dev->phylink, rq,
3114						 cmd);
3115	}
3116
3117	if (cmd == SIOCSHWTSTAMP)
3118		return dpaa_ts_ioctl(net_dev, rq, cmd);
3119
3120	return ret;
3121}
3122
3123static const struct net_device_ops dpaa_ops = {
3124	.ndo_open = dpaa_open,
3125	.ndo_start_xmit = dpaa_start_xmit,
3126	.ndo_stop = dpaa_eth_stop,
3127	.ndo_tx_timeout = dpaa_tx_timeout,
3128	.ndo_get_stats64 = dpaa_get_stats64,
3129	.ndo_change_carrier = fixed_phy_change_carrier,
3130	.ndo_set_mac_address = dpaa_set_mac_address,
3131	.ndo_validate_addr = eth_validate_addr,
3132	.ndo_set_rx_mode = dpaa_set_rx_mode,
3133	.ndo_eth_ioctl = dpaa_ioctl,
3134	.ndo_setup_tc = dpaa_setup_tc,
3135	.ndo_change_mtu = dpaa_change_mtu,
3136	.ndo_bpf = dpaa_xdp,
3137	.ndo_xdp_xmit = dpaa_xdp_xmit,
3138};
3139
3140static int dpaa_napi_add(struct net_device *net_dev)
3141{
3142	struct dpaa_priv *priv = netdev_priv(net_dev);
3143	struct dpaa_percpu_priv *percpu_priv;
3144	int cpu;
3145
3146	for_each_possible_cpu(cpu) {
3147		percpu_priv = per_cpu_ptr(priv->percpu_priv, cpu);
3148
3149		netif_napi_add(net_dev, &percpu_priv->np.napi, dpaa_eth_poll);
3150	}
3151
3152	return 0;
3153}
3154
3155static void dpaa_napi_del(struct net_device *net_dev)
3156{
3157	struct dpaa_priv *priv = netdev_priv(net_dev);
3158	struct dpaa_percpu_priv *percpu_priv;
3159	int cpu;
3160
3161	for_each_possible_cpu(cpu) {
3162		percpu_priv = per_cpu_ptr(priv->percpu_priv, cpu);
3163
3164		netif_napi_del(&percpu_priv->np.napi);
3165	}
3166}
3167
3168static inline void dpaa_bp_free_pf(const struct dpaa_bp *bp,
3169				   struct bm_buffer *bmb)
3170{
3171	dma_addr_t addr = bm_buf_addr(bmb);
3172
3173	dma_unmap_page(bp->priv->rx_dma_dev, addr, DPAA_BP_RAW_SIZE,
3174		       DMA_FROM_DEVICE);
3175
3176	skb_free_frag(phys_to_virt(addr));
3177}
3178
3179/* Alloc the dpaa_bp struct and configure default values */
3180static struct dpaa_bp *dpaa_bp_alloc(struct device *dev)
3181{
3182	struct dpaa_bp *dpaa_bp;
3183
3184	dpaa_bp = devm_kzalloc(dev, sizeof(*dpaa_bp), GFP_KERNEL);
3185	if (!dpaa_bp)
3186		return ERR_PTR(-ENOMEM);
3187
3188	dpaa_bp->bpid = FSL_DPAA_BPID_INV;
3189	dpaa_bp->percpu_count = devm_alloc_percpu(dev, *dpaa_bp->percpu_count);
3190	if (!dpaa_bp->percpu_count)
3191		return ERR_PTR(-ENOMEM);
3192
3193	dpaa_bp->config_count = FSL_DPAA_ETH_MAX_BUF_COUNT;
3194
3195	dpaa_bp->seed_cb = dpaa_bp_seed;
3196	dpaa_bp->free_buf_cb = dpaa_bp_free_pf;
3197
3198	return dpaa_bp;
3199}
3200
3201/* Place all ingress FQs (Rx Default, Rx Error) in a dedicated CGR.
3202 * We won't be sending congestion notifications to FMan; for now, we just use
3203 * this CGR to generate enqueue rejections to FMan in order to drop the frames
3204 * before they reach our ingress queues and eat up memory.
3205 */
3206static int dpaa_ingress_cgr_init(struct dpaa_priv *priv)
3207{
3208	struct qm_mcc_initcgr initcgr;
3209	u32 cs_th;
3210	int err;
3211
3212	err = qman_alloc_cgrid(&priv->ingress_cgr.cgrid);
3213	if (err < 0) {
3214		if (netif_msg_drv(priv))
3215			pr_err("Error %d allocating CGR ID\n", err);
3216		goto out_error;
3217	}
3218
3219	/* Enable CS TD, but disable Congestion State Change Notifications. */
3220	memset(&initcgr, 0, sizeof(initcgr));
3221	initcgr.we_mask = cpu_to_be16(QM_CGR_WE_CS_THRES);
3222	initcgr.cgr.cscn_en = QM_CGR_EN;
3223	cs_th = DPAA_INGRESS_CS_THRESHOLD;
3224	qm_cgr_cs_thres_set64(&initcgr.cgr.cs_thres, cs_th, 1);
3225
3226	initcgr.we_mask |= cpu_to_be16(QM_CGR_WE_CSTD_EN);
3227	initcgr.cgr.cstd_en = QM_CGR_EN;
3228
3229	/* This CGR will be associated with the SWP affined to the current CPU.
3230	 * However, we'll place all our ingress FQs in it.
3231	 */
3232	err = qman_create_cgr(&priv->ingress_cgr, QMAN_CGR_FLAG_USE_INIT,
3233			      &initcgr);
3234	if (err < 0) {
3235		if (netif_msg_drv(priv))
3236			pr_err("Error %d creating ingress CGR with ID %d\n",
3237			       err, priv->ingress_cgr.cgrid);
3238		qman_release_cgrid(priv->ingress_cgr.cgrid);
3239		goto out_error;
3240	}
3241	if (netif_msg_drv(priv))
3242		pr_debug("Created ingress CGR %d for netdev with hwaddr %pM\n",
3243			 priv->ingress_cgr.cgrid, priv->mac_dev->addr);
3244
3245	priv->use_ingress_cgr = true;
3246
3247out_error:
3248	return err;
3249}
3250
3251static u16 dpaa_get_headroom(struct dpaa_buffer_layout *bl,
3252			     enum port_type port)
3253{
3254	u16 headroom;
3255
3256	/* The frame headroom must accommodate:
3257	 * - the driver private data area
3258	 * - parse results, hash results, timestamp if selected
3259	 * If either hash results or time stamp are selected, both will
3260	 * be copied to/from the frame headroom, as TS is located between PR and
3261	 * HR in the IC and IC copy size has a granularity of 16bytes
3262	 * (see description of FMBM_RICP and FMBM_TICP registers in DPAARM)
3263	 *
3264	 * Also make sure the headroom is a multiple of data_align bytes
3265	 */
3266	headroom = (u16)(bl[port].priv_data_size + DPAA_HWA_SIZE);
3267
3268	if (port == RX) {
3269#ifdef CONFIG_DPAA_ERRATUM_A050385
3270		if (unlikely(fman_has_errata_a050385()))
3271			headroom = XDP_PACKET_HEADROOM;
3272#endif
3273
3274		return ALIGN(headroom, DPAA_FD_RX_DATA_ALIGNMENT);
3275	} else {
3276		return ALIGN(headroom, DPAA_FD_DATA_ALIGNMENT);
3277	}
3278}
3279
3280static int dpaa_eth_probe(struct platform_device *pdev)
3281{
3282	struct net_device *net_dev = NULL;
3283	struct dpaa_bp *dpaa_bp = NULL;
3284	struct dpaa_fq *dpaa_fq, *tmp;
3285	struct dpaa_priv *priv = NULL;
3286	struct fm_port_fqs port_fqs;
3287	struct mac_device *mac_dev;
3288	int err = 0, channel;
3289	struct device *dev;
3290
3291	dev = &pdev->dev;
3292
3293	err = bman_is_probed();
3294	if (!err)
3295		return -EPROBE_DEFER;
3296	if (err < 0) {
3297		dev_err(dev, "failing probe due to bman probe error\n");
3298		return -ENODEV;
3299	}
3300	err = qman_is_probed();
3301	if (!err)
3302		return -EPROBE_DEFER;
3303	if (err < 0) {
3304		dev_err(dev, "failing probe due to qman probe error\n");
3305		return -ENODEV;
3306	}
3307	err = bman_portals_probed();
3308	if (!err)
3309		return -EPROBE_DEFER;
3310	if (err < 0) {
3311		dev_err(dev,
3312			"failing probe due to bman portals probe error\n");
3313		return -ENODEV;
3314	}
3315	err = qman_portals_probed();
3316	if (!err)
3317		return -EPROBE_DEFER;
3318	if (err < 0) {
3319		dev_err(dev,
3320			"failing probe due to qman portals probe error\n");
3321		return -ENODEV;
3322	}
3323
3324	/* Allocate this early, so we can store relevant information in
3325	 * the private area
3326	 */
3327	net_dev = alloc_etherdev_mq(sizeof(*priv), DPAA_ETH_TXQ_NUM);
3328	if (!net_dev) {
3329		dev_err(dev, "alloc_etherdev_mq() failed\n");
3330		return -ENOMEM;
3331	}
3332
3333	/* Do this here, so we can be verbose early */
3334	SET_NETDEV_DEV(net_dev, dev->parent);
3335	dev_set_drvdata(dev, net_dev);
3336
3337	priv = netdev_priv(net_dev);
3338	priv->net_dev = net_dev;
3339
3340	priv->msg_enable = netif_msg_init(debug, DPAA_MSG_DEFAULT);
3341
3342	mac_dev = dpaa_mac_dev_get(pdev);
3343	if (IS_ERR(mac_dev)) {
3344		netdev_err(net_dev, "dpaa_mac_dev_get() failed\n");
3345		err = PTR_ERR(mac_dev);
3346		goto free_netdev;
3347	}
3348
3349	/* Devices used for DMA mapping */
3350	priv->rx_dma_dev = fman_port_get_device(mac_dev->port[RX]);
3351	priv->tx_dma_dev = fman_port_get_device(mac_dev->port[TX]);
3352	err = dma_coerce_mask_and_coherent(priv->rx_dma_dev, DMA_BIT_MASK(40));
3353	if (!err)
3354		err = dma_coerce_mask_and_coherent(priv->tx_dma_dev,
3355						   DMA_BIT_MASK(40));
3356	if (err) {
3357		netdev_err(net_dev, "dma_coerce_mask_and_coherent() failed\n");
3358		goto free_netdev;
3359	}
3360
3361	/* If fsl_fm_max_frm is set to a higher value than the all-common 1500,
3362	 * we choose conservatively and let the user explicitly set a higher
3363	 * MTU via ifconfig. Otherwise, the user may end up with different MTUs
3364	 * in the same LAN.
3365	 * If on the other hand fsl_fm_max_frm has been chosen below 1500,
3366	 * start with the maximum allowed.
3367	 */
3368	net_dev->mtu = min(dpaa_get_max_mtu(), ETH_DATA_LEN);
3369
3370	netdev_dbg(net_dev, "Setting initial MTU on net device: %d\n",
3371		   net_dev->mtu);
3372
3373	priv->buf_layout[RX].priv_data_size = DPAA_RX_PRIV_DATA_SIZE; /* Rx */
3374	priv->buf_layout[TX].priv_data_size = DPAA_TX_PRIV_DATA_SIZE; /* Tx */
3375
3376	/* bp init */
3377	dpaa_bp = dpaa_bp_alloc(dev);
3378	if (IS_ERR(dpaa_bp)) {
3379		err = PTR_ERR(dpaa_bp);
3380		goto free_dpaa_bps;
3381	}
3382	/* the raw size of the buffers used for reception */
3383	dpaa_bp->raw_size = DPAA_BP_RAW_SIZE;
3384	/* avoid runtime computations by keeping the usable size here */
3385	dpaa_bp->size = dpaa_bp_size(dpaa_bp->raw_size);
3386	dpaa_bp->priv = priv;
3387
3388	err = dpaa_bp_alloc_pool(dpaa_bp);
3389	if (err < 0)
3390		goto free_dpaa_bps;
3391	priv->dpaa_bp = dpaa_bp;
3392
3393	INIT_LIST_HEAD(&priv->dpaa_fq_list);
3394
3395	memset(&port_fqs, 0, sizeof(port_fqs));
3396
3397	err = dpaa_alloc_all_fqs(dev, &priv->dpaa_fq_list, &port_fqs);
3398	if (err < 0) {
3399		dev_err(dev, "dpaa_alloc_all_fqs() failed\n");
3400		goto free_dpaa_bps;
3401	}
3402
3403	priv->mac_dev = mac_dev;
3404
3405	channel = dpaa_get_channel();
3406	if (channel < 0) {
3407		dev_err(dev, "dpaa_get_channel() failed\n");
3408		err = channel;
3409		goto free_dpaa_bps;
3410	}
3411
3412	priv->channel = (u16)channel;
3413
3414	/* Walk the CPUs with affine portals
3415	 * and add this pool channel to each's dequeue mask.
3416	 */
3417	dpaa_eth_add_channel(priv->channel, &pdev->dev);
3418
3419	dpaa_fq_setup(priv, &dpaa_fq_cbs, priv->mac_dev->port[TX]);
3420
3421	/* Create a congestion group for this netdev, with
3422	 * dynamically-allocated CGR ID.
3423	 * Must be executed after probing the MAC, but before
3424	 * assigning the egress FQs to the CGRs.
3425	 */
3426	err = dpaa_eth_cgr_init(priv);
3427	if (err < 0) {
3428		dev_err(dev, "Error initializing CGR\n");
3429		goto free_dpaa_bps;
3430	}
3431
3432	err = dpaa_ingress_cgr_init(priv);
3433	if (err < 0) {
3434		dev_err(dev, "Error initializing ingress CGR\n");
3435		goto delete_egress_cgr;
3436	}
3437
3438	/* Add the FQs to the interface, and make them active */
3439	list_for_each_entry_safe(dpaa_fq, tmp, &priv->dpaa_fq_list, list) {
3440		err = dpaa_fq_init(dpaa_fq, false);
3441		if (err < 0)
3442			goto free_dpaa_fqs;
3443	}
3444
3445	priv->tx_headroom = dpaa_get_headroom(priv->buf_layout, TX);
3446	priv->rx_headroom = dpaa_get_headroom(priv->buf_layout, RX);
3447
3448	/* All real interfaces need their ports initialized */
3449	err = dpaa_eth_init_ports(mac_dev, dpaa_bp, &port_fqs,
3450				  &priv->buf_layout[0], dev);
3451	if (err)
3452		goto free_dpaa_fqs;
3453
3454	/* Rx traffic distribution based on keygen hashing defaults to on */
3455	priv->keygen_in_use = true;
3456
3457	priv->percpu_priv = devm_alloc_percpu(dev, *priv->percpu_priv);
3458	if (!priv->percpu_priv) {
3459		dev_err(dev, "devm_alloc_percpu() failed\n");
3460		err = -ENOMEM;
3461		goto free_dpaa_fqs;
3462	}
3463
3464	priv->num_tc = 1;
3465	netif_set_real_num_tx_queues(net_dev, priv->num_tc * DPAA_TC_TXQ_NUM);
3466
3467	/* Initialize NAPI */
3468	err = dpaa_napi_add(net_dev);
3469	if (err < 0)
3470		goto delete_dpaa_napi;
3471
3472	err = dpaa_netdev_init(net_dev, &dpaa_ops, tx_timeout);
3473	if (err < 0)
3474		goto delete_dpaa_napi;
3475
3476	dpaa_eth_sysfs_init(&net_dev->dev);
3477
3478	netif_info(priv, probe, net_dev, "Probed interface %s\n",
3479		   net_dev->name);
3480
3481	return 0;
3482
3483delete_dpaa_napi:
3484	dpaa_napi_del(net_dev);
3485free_dpaa_fqs:
3486	dpaa_fq_free(dev, &priv->dpaa_fq_list);
3487	qman_delete_cgr_safe(&priv->ingress_cgr);
3488	qman_release_cgrid(priv->ingress_cgr.cgrid);
3489delete_egress_cgr:
3490	qman_delete_cgr_safe(&priv->cgr_data.cgr);
3491	qman_release_cgrid(priv->cgr_data.cgr.cgrid);
3492free_dpaa_bps:
3493	dpaa_bps_free(priv);
3494free_netdev:
3495	dev_set_drvdata(dev, NULL);
3496	free_netdev(net_dev);
3497
3498	return err;
3499}
3500
3501static void dpaa_remove(struct platform_device *pdev)
3502{
3503	struct net_device *net_dev;
3504	struct dpaa_priv *priv;
3505	struct device *dev;
3506	int err;
3507
3508	dev = &pdev->dev;
3509	net_dev = dev_get_drvdata(dev);
3510
3511	priv = netdev_priv(net_dev);
3512
3513	dpaa_eth_sysfs_remove(dev);
3514
3515	dev_set_drvdata(dev, NULL);
3516	unregister_netdev(net_dev);
3517	phylink_destroy(priv->mac_dev->phylink);
3518
3519	err = dpaa_fq_free(dev, &priv->dpaa_fq_list);
3520	if (err)
3521		dev_err(dev, "Failed to free FQs on remove (%pE)\n",
3522			ERR_PTR(err));
3523
3524	qman_delete_cgr_safe(&priv->ingress_cgr);
3525	qman_release_cgrid(priv->ingress_cgr.cgrid);
3526	qman_delete_cgr_safe(&priv->cgr_data.cgr);
3527	qman_release_cgrid(priv->cgr_data.cgr.cgrid);
3528
3529	dpaa_napi_del(net_dev);
3530
3531	dpaa_bps_free(priv);
3532
3533	free_netdev(net_dev);
3534}
3535
3536static const struct platform_device_id dpaa_devtype[] = {
3537	{
3538		.name = "dpaa-ethernet",
3539		.driver_data = 0,
3540	}, {
3541	}
3542};
3543MODULE_DEVICE_TABLE(platform, dpaa_devtype);
3544
3545static struct platform_driver dpaa_driver = {
3546	.driver = {
3547		.name = KBUILD_MODNAME,
3548	},
3549	.id_table = dpaa_devtype,
3550	.probe = dpaa_eth_probe,
3551	.remove_new = dpaa_remove
3552};
3553
3554static int __init dpaa_load(void)
3555{
3556	int err;
3557
3558	pr_debug("FSL DPAA Ethernet driver\n");
3559
3560	/* initialize dpaa_eth mirror values */
3561	dpaa_rx_extra_headroom = fman_get_rx_extra_headroom();
3562	dpaa_max_frm = fman_get_max_frm();
3563
3564	err = platform_driver_register(&dpaa_driver);
3565	if (err < 0)
3566		pr_err("Error, platform_driver_register() = %d\n", err);
3567
3568	return err;
3569}
3570module_init(dpaa_load);
3571
3572static void __exit dpaa_unload(void)
3573{
3574	platform_driver_unregister(&dpaa_driver);
3575
3576	/* Only one channel is used and needs to be released after all
3577	 * interfaces are removed
3578	 */
3579	dpaa_release_channel();
3580}
3581module_exit(dpaa_unload);
3582
3583MODULE_LICENSE("Dual BSD/GPL");
3584MODULE_DESCRIPTION("FSL DPAA Ethernet driver");
3585