1/*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (C) 2013 Emulex
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 *    this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the Emulex Corporation nor the names of its
18 *    contributors may be used to endorse or promote products derived from
19 *    this software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Contact Information:
34 * freebsd-drivers@emulex.com
35 *
36 * Emulex
37 * 3333 Susan Street
38 * Costa Mesa, CA 92626
39 */
40
41
42#include "opt_inet6.h"
43#include "opt_inet.h"
44
45#include "oce_if.h"
46#include "oce_user.h"
47
48#define is_tso_pkt(m) (m->m_pkthdr.csum_flags & CSUM_TSO)
49
50/* UE Status Low CSR */
51static char *ue_status_low_desc[] = {
52        "CEV",
53        "CTX",
54        "DBUF",
55        "ERX",
56        "Host",
57        "MPU",
58        "NDMA",
59        "PTC ",
60        "RDMA ",
61        "RXF ",
62        "RXIPS ",
63        "RXULP0 ",
64        "RXULP1 ",
65        "RXULP2 ",
66        "TIM ",
67        "TPOST ",
68        "TPRE ",
69        "TXIPS ",
70        "TXULP0 ",
71        "TXULP1 ",
72        "UC ",
73        "WDMA ",
74        "TXULP2 ",
75        "HOST1 ",
76        "P0_OB_LINK ",
77        "P1_OB_LINK ",
78        "HOST_GPIO ",
79        "MBOX ",
80        "AXGMAC0",
81        "AXGMAC1",
82        "JTAG",
83        "MPU_INTPEND"
84};
85
86/* UE Status High CSR */
87static char *ue_status_hi_desc[] = {
88        "LPCMEMHOST",
89        "MGMT_MAC",
90        "PCS0ONLINE",
91        "MPU_IRAM",
92        "PCS1ONLINE",
93        "PCTL0",
94        "PCTL1",
95        "PMEM",
96        "RR",
97        "TXPB",
98        "RXPP",
99        "XAUI",
100        "TXP",
101        "ARM",
102        "IPC",
103        "HOST2",
104        "HOST3",
105        "HOST4",
106        "HOST5",
107        "HOST6",
108        "HOST7",
109        "HOST8",
110        "HOST9",
111        "NETC",
112        "Unknown",
113        "Unknown",
114        "Unknown",
115        "Unknown",
116        "Unknown",
117        "Unknown",
118        "Unknown",
119        "Unknown"
120};
121
122struct oce_common_cqe_info{
123        uint8_t vtp:1;
124        uint8_t l4_cksum_pass:1;
125        uint8_t ip_cksum_pass:1;
126        uint8_t ipv6_frame:1;
127        uint8_t qnq:1;
128        uint8_t rsvd:3;
129        uint8_t num_frags;
130        uint16_t pkt_size;
131        uint16_t vtag;
132};
133
134/* Driver entry points prototypes */
135static int  oce_probe(device_t dev);
136static int  oce_attach(device_t dev);
137static int  oce_detach(device_t dev);
138static int  oce_shutdown(device_t dev);
139static int  oce_ioctl(if_t ifp, u_long command, caddr_t data);
140static void oce_init(void *xsc);
141static int  oce_multiq_start(if_t ifp, struct mbuf *m);
142static void oce_multiq_flush(if_t ifp);
143
144/* Driver interrupt routines protypes */
145static void oce_intr(void *arg, int pending);
146static int  oce_setup_intr(POCE_SOFTC sc);
147static int  oce_fast_isr(void *arg);
148static int  oce_alloc_intr(POCE_SOFTC sc, int vector,
149			  void (*isr) (void *arg, int pending));
150
151/* Media callbacks prototypes */
152static void oce_media_status(if_t ifp, struct ifmediareq *req);
153static int  oce_media_change(if_t ifp);
154
155/* Transmit routines prototypes */
156static int  oce_tx(POCE_SOFTC sc, struct mbuf **mpp, int wq_index);
157static void oce_tx_restart(POCE_SOFTC sc, struct oce_wq *wq);
158static void oce_process_tx_completion(struct oce_wq *wq);
159static int  oce_multiq_transmit(if_t ifp, struct mbuf *m,
160				 struct oce_wq *wq);
161
162/* Receive routines prototypes */
163static int  oce_cqe_vtp_valid(POCE_SOFTC sc, struct oce_nic_rx_cqe *cqe);
164static int  oce_cqe_portid_valid(POCE_SOFTC sc, struct oce_nic_rx_cqe *cqe);
165static void oce_rx(struct oce_rq *rq, struct oce_nic_rx_cqe *cqe);
166static void oce_check_rx_bufs(POCE_SOFTC sc, uint32_t num_cqes, struct oce_rq *rq);
167static uint16_t oce_rq_handler_lro(void *arg);
168static void oce_correct_header(struct mbuf *m, struct nic_hwlro_cqe_part1 *cqe1, struct nic_hwlro_cqe_part2 *cqe2);
169static void oce_rx_lro(struct oce_rq *rq, struct nic_hwlro_singleton_cqe *cqe, struct nic_hwlro_cqe_part2 *cqe2);
170static void oce_rx_mbuf_chain(struct oce_rq *rq, struct oce_common_cqe_info *cqe_info, struct mbuf **m);
171
172/* Helper function prototypes in this file */
173static int  oce_attach_ifp(POCE_SOFTC sc);
174static void oce_add_vlan(void *arg, if_t ifp, uint16_t vtag);
175static void oce_del_vlan(void *arg, if_t ifp, uint16_t vtag);
176static int  oce_vid_config(POCE_SOFTC sc);
177static void oce_mac_addr_set(POCE_SOFTC sc);
178static int  oce_handle_passthrough(if_t ifp, caddr_t data);
179static void oce_local_timer(void *arg);
180static void oce_if_deactivate(POCE_SOFTC sc);
181static void oce_if_activate(POCE_SOFTC sc);
182static void setup_max_queues_want(POCE_SOFTC sc);
183static void update_queues_got(POCE_SOFTC sc);
184static void process_link_state(POCE_SOFTC sc,
185		 struct oce_async_cqe_link_state *acqe);
186static int oce_tx_asic_stall_verify(POCE_SOFTC sc, struct mbuf *m);
187static void oce_get_config(POCE_SOFTC sc);
188static struct mbuf *oce_insert_vlan_tag(POCE_SOFTC sc, struct mbuf *m, boolean_t *complete);
189static void oce_read_env_variables(POCE_SOFTC sc);
190
191/* IP specific */
192#if defined(INET6) || defined(INET)
193static int  oce_init_lro(POCE_SOFTC sc);
194static struct mbuf * oce_tso_setup(POCE_SOFTC sc, struct mbuf **mpp);
195#endif
196
197static device_method_t oce_dispatch[] = {
198	DEVMETHOD(device_probe, oce_probe),
199	DEVMETHOD(device_attach, oce_attach),
200	DEVMETHOD(device_detach, oce_detach),
201	DEVMETHOD(device_shutdown, oce_shutdown),
202
203	DEVMETHOD_END
204};
205
206static driver_t oce_driver = {
207	"oce",
208	oce_dispatch,
209	sizeof(OCE_SOFTC)
210};
211
212/* global vars */
213const char component_revision[32] = {"///" COMPONENT_REVISION "///"};
214
215/* Module capabilites and parameters */
216uint32_t oce_max_rsp_handled = OCE_MAX_RSP_HANDLED;
217uint32_t oce_enable_rss = OCE_MODCAP_RSS;
218uint32_t oce_rq_buf_size = 2048;
219
220TUNABLE_INT("hw.oce.max_rsp_handled", &oce_max_rsp_handled);
221TUNABLE_INT("hw.oce.enable_rss", &oce_enable_rss);
222
223/* Supported devices table */
224static uint32_t supportedDevices[] =  {
225	(PCI_VENDOR_SERVERENGINES << 16) | PCI_PRODUCT_BE2,
226	(PCI_VENDOR_SERVERENGINES << 16) | PCI_PRODUCT_BE3,
227	(PCI_VENDOR_EMULEX << 16) | PCI_PRODUCT_BE3,
228	(PCI_VENDOR_EMULEX << 16) | PCI_PRODUCT_XE201,
229	(PCI_VENDOR_EMULEX << 16) | PCI_PRODUCT_XE201_VF,
230	(PCI_VENDOR_EMULEX << 16) | PCI_PRODUCT_SH
231};
232
233DRIVER_MODULE(oce, pci, oce_driver, 0, 0);
234MODULE_PNP_INFO("W32:vendor/device", pci, oce, supportedDevices,
235    nitems(supportedDevices));
236MODULE_DEPEND(oce, pci, 1, 1, 1);
237MODULE_DEPEND(oce, ether, 1, 1, 1);
238MODULE_VERSION(oce, 1);
239
240POCE_SOFTC softc_head = NULL;
241POCE_SOFTC softc_tail = NULL;
242
243struct oce_rdma_if *oce_rdma_if = NULL;
244
245/*****************************************************************************
246 *			Driver entry points functions                        *
247 *****************************************************************************/
248
249static int
250oce_probe(device_t dev)
251{
252	uint16_t vendor = 0;
253	uint16_t device = 0;
254	int i = 0;
255	char str[256] = {0};
256	POCE_SOFTC sc;
257
258	sc = device_get_softc(dev);
259	bzero(sc, sizeof(OCE_SOFTC));
260	sc->dev = dev;
261
262	vendor = pci_get_vendor(dev);
263	device = pci_get_device(dev);
264
265	for (i = 0; i < (sizeof(supportedDevices) / sizeof(uint32_t)); i++) {
266		if (vendor == ((supportedDevices[i] >> 16) & 0xffff)) {
267			if (device == (supportedDevices[i] & 0xffff)) {
268				sprintf(str, "%s:%s", "Emulex CNA NIC function",
269					component_revision);
270				device_set_desc_copy(dev, str);
271
272				switch (device) {
273				case PCI_PRODUCT_BE2:
274					sc->flags |= OCE_FLAGS_BE2;
275					break;
276				case PCI_PRODUCT_BE3:
277					sc->flags |= OCE_FLAGS_BE3;
278					break;
279				case PCI_PRODUCT_XE201:
280				case PCI_PRODUCT_XE201_VF:
281					sc->flags |= OCE_FLAGS_XE201;
282					break;
283				case PCI_PRODUCT_SH:
284					sc->flags |= OCE_FLAGS_SH;
285					break;
286				default:
287					return ENXIO;
288				}
289				return BUS_PROBE_DEFAULT;
290			}
291		}
292	}
293
294	return ENXIO;
295}
296
297static int
298oce_attach(device_t dev)
299{
300	POCE_SOFTC sc;
301	int rc = 0;
302
303	sc = device_get_softc(dev);
304
305	rc = oce_hw_pci_alloc(sc);
306	if (rc)
307		return rc;
308
309	sc->tx_ring_size = OCE_TX_RING_SIZE;
310	sc->rx_ring_size = OCE_RX_RING_SIZE;
311	/* receive fragment size should be multiple of 2K */
312	sc->rq_frag_size = ((oce_rq_buf_size / 2048) * 2048);
313	sc->flow_control = OCE_DEFAULT_FLOW_CONTROL;
314	sc->promisc	 = OCE_DEFAULT_PROMISCUOUS;
315
316	LOCK_CREATE(&sc->bmbx_lock, "Mailbox_lock");
317	LOCK_CREATE(&sc->dev_lock,  "Device_lock");
318
319	/* initialise the hardware */
320	rc = oce_hw_init(sc);
321	if (rc)
322		goto pci_res_free;
323
324	oce_read_env_variables(sc);
325
326	oce_get_config(sc);
327
328	setup_max_queues_want(sc);
329
330	rc = oce_setup_intr(sc);
331	if (rc)
332		goto mbox_free;
333
334	rc = oce_queue_init_all(sc);
335	if (rc)
336		goto intr_free;
337
338	rc = oce_attach_ifp(sc);
339	if (rc)
340		goto queues_free;
341
342#if defined(INET6) || defined(INET)
343	rc = oce_init_lro(sc);
344	if (rc)
345		goto ifp_free;
346#endif
347
348	rc = oce_hw_start(sc);
349	if (rc)
350		goto lro_free;
351
352	sc->vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
353				oce_add_vlan, sc, EVENTHANDLER_PRI_FIRST);
354	sc->vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
355				oce_del_vlan, sc, EVENTHANDLER_PRI_FIRST);
356
357	rc = oce_stats_init(sc);
358	if (rc)
359		goto vlan_free;
360
361	oce_add_sysctls(sc);
362
363	callout_init(&sc->timer, CALLOUT_MPSAFE);
364	rc = callout_reset(&sc->timer, 2 * hz, oce_local_timer, sc);
365	if (rc)
366		goto stats_free;
367
368	sc->next =NULL;
369	if (softc_tail != NULL) {
370	  softc_tail->next = sc;
371	} else {
372	  softc_head = sc;
373	}
374	softc_tail = sc;
375
376	gone_in_dev(dev, 15, "relatively uncommon 10GbE NIC");
377
378	return 0;
379
380stats_free:
381	callout_drain(&sc->timer);
382	oce_stats_free(sc);
383vlan_free:
384	if (sc->vlan_attach)
385		EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach);
386	if (sc->vlan_detach)
387		EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach);
388	oce_hw_intr_disable(sc);
389lro_free:
390#if defined(INET6) || defined(INET)
391	oce_free_lro(sc);
392ifp_free:
393#endif
394	ether_ifdetach(sc->ifp);
395	if_free(sc->ifp);
396queues_free:
397	oce_queue_release_all(sc);
398intr_free:
399	oce_intr_free(sc);
400mbox_free:
401	oce_dma_free(sc, &sc->bsmbx);
402pci_res_free:
403	oce_hw_pci_free(sc);
404	LOCK_DESTROY(&sc->dev_lock);
405	LOCK_DESTROY(&sc->bmbx_lock);
406	return rc;
407
408}
409
410static int
411oce_detach(device_t dev)
412{
413	POCE_SOFTC sc = device_get_softc(dev);
414	POCE_SOFTC poce_sc_tmp, *ppoce_sc_tmp1, poce_sc_tmp2 = NULL;
415
416        poce_sc_tmp = softc_head;
417        ppoce_sc_tmp1 = &softc_head;
418        while (poce_sc_tmp != NULL) {
419          if (poce_sc_tmp == sc) {
420            *ppoce_sc_tmp1 = sc->next;
421            if (sc->next == NULL) {
422              softc_tail = poce_sc_tmp2;
423            }
424            break;
425          }
426          poce_sc_tmp2 = poce_sc_tmp;
427          ppoce_sc_tmp1 = &poce_sc_tmp->next;
428          poce_sc_tmp = poce_sc_tmp->next;
429        }
430
431	LOCK(&sc->dev_lock);
432	oce_if_deactivate(sc);
433	UNLOCK(&sc->dev_lock);
434
435	callout_drain(&sc->timer);
436
437	if (sc->vlan_attach != NULL)
438		EVENTHANDLER_DEREGISTER(vlan_config, sc->vlan_attach);
439	if (sc->vlan_detach != NULL)
440		EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vlan_detach);
441
442	ether_ifdetach(sc->ifp);
443
444	if_free(sc->ifp);
445
446	oce_hw_shutdown(sc);
447
448	bus_generic_detach(dev);
449
450	return 0;
451}
452
453static int
454oce_shutdown(device_t dev)
455{
456	int rc;
457
458	rc = oce_detach(dev);
459
460	return rc;
461}
462
463static int
464oce_ioctl(if_t ifp, u_long command, caddr_t data)
465{
466	struct ifreq *ifr = (struct ifreq *)data;
467	POCE_SOFTC sc = if_getsoftc(ifp);
468	struct ifi2creq i2c;
469	uint8_t	offset = 0;
470	int rc = 0;
471	uint32_t u;
472
473	switch (command) {
474	case SIOCGIFMEDIA:
475		rc = ifmedia_ioctl(ifp, ifr, &sc->media, command);
476		break;
477
478	case SIOCSIFMTU:
479		if (ifr->ifr_mtu > OCE_MAX_MTU)
480			rc = EINVAL;
481		else
482			if_setmtu(ifp, ifr->ifr_mtu);
483		break;
484
485	case SIOCSIFFLAGS:
486		if (if_getflags(ifp) & IFF_UP) {
487			if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING)) {
488				if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0);
489				oce_init(sc);
490			}
491			device_printf(sc->dev, "Interface Up\n");
492		} else {
493			LOCK(&sc->dev_lock);
494
495			if_setdrvflagbits(sc->ifp, 0,
496			    IFF_DRV_RUNNING | IFF_DRV_OACTIVE);
497			oce_if_deactivate(sc);
498
499			UNLOCK(&sc->dev_lock);
500
501			device_printf(sc->dev, "Interface Down\n");
502		}
503
504		if ((if_getflags(ifp) & IFF_PROMISC) && !sc->promisc) {
505			if (!oce_rxf_set_promiscuous(sc, (1 | (1 << 1))))
506				sc->promisc = TRUE;
507		} else if (!(if_getflags(ifp) & IFF_PROMISC) && sc->promisc) {
508			if (!oce_rxf_set_promiscuous(sc, 0))
509				sc->promisc = FALSE;
510		}
511
512		break;
513
514	case SIOCADDMULTI:
515	case SIOCDELMULTI:
516		rc = oce_hw_update_multicast(sc);
517		if (rc)
518			device_printf(sc->dev,
519				"Update multicast address failed\n");
520		break;
521
522	case SIOCSIFCAP:
523		u = ifr->ifr_reqcap ^ if_getcapenable(ifp);
524
525		if (u & IFCAP_TXCSUM) {
526			if_togglecapenable(ifp, IFCAP_TXCSUM);
527			if_togglehwassist(ifp, (CSUM_TCP | CSUM_UDP | CSUM_IP));
528
529			if (IFCAP_TSO & if_getcapenable(ifp) &&
530			    !(IFCAP_TXCSUM & if_getcapenable(ifp))) {
531				u &= ~IFCAP_TSO;
532				if_setcapenablebit(ifp, 0, IFCAP_TSO);
533				if_sethwassistbits(ifp, 0, CSUM_TSO);
534				if_printf(ifp,
535					 "TSO disabled due to -txcsum.\n");
536			}
537		}
538
539		if (u & IFCAP_RXCSUM)
540			if_togglecapenable(ifp, IFCAP_RXCSUM);
541
542		if (u & IFCAP_TSO4) {
543			if_togglecapenable(ifp, IFCAP_TSO4);
544
545			if (IFCAP_TSO & if_getcapenable(ifp)) {
546				if (IFCAP_TXCSUM & if_getcapenable(ifp))
547					if_sethwassistbits(ifp, CSUM_TSO, 0);
548				else {
549					if_setcapenablebit(ifp, 0, IFCAP_TSO);
550					if_sethwassistbits(ifp, 0, CSUM_TSO);
551					if_printf(ifp,
552					    "Enable txcsum first.\n");
553					rc = EAGAIN;
554				}
555			} else
556				if_sethwassistbits(ifp, 0, CSUM_TSO);
557		}
558
559		if (u & IFCAP_VLAN_HWTAGGING)
560			if_togglecapenable(ifp, IFCAP_VLAN_HWTAGGING);
561
562		if (u & IFCAP_VLAN_HWFILTER) {
563			if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
564			oce_vid_config(sc);
565		}
566#if defined(INET6) || defined(INET)
567		if (u & IFCAP_LRO) {
568			if_togglecapenable(ifp, IFCAP_LRO);
569			if(sc->enable_hwlro) {
570				if(if_getcapenable(ifp) & IFCAP_LRO) {
571					rc = oce_mbox_nic_set_iface_lro_config(sc, 1);
572				}else {
573					rc = oce_mbox_nic_set_iface_lro_config(sc, 0);
574				}
575			}
576		}
577#endif
578
579		break;
580
581	case SIOCGI2C:
582		rc = copyin(ifr_data_get_ptr(ifr), &i2c, sizeof(i2c));
583		if (rc)
584			break;
585
586		if (i2c.dev_addr == PAGE_NUM_A0) {
587			offset = i2c.offset;
588		} else if (i2c.dev_addr == PAGE_NUM_A2) {
589			offset = TRANSCEIVER_A0_SIZE + i2c.offset;
590		} else {
591			rc = EINVAL;
592			break;
593		}
594
595		if (i2c.len > sizeof(i2c.data) ||
596		    i2c.len + offset > sizeof(sfp_vpd_dump_buffer)) {
597			rc = EINVAL;
598			break;
599		}
600
601		rc = oce_mbox_read_transrecv_data(sc, i2c.dev_addr);
602		if (rc) {
603			rc = -rc;
604			break;
605		}
606
607		memcpy(&i2c.data[0], &sfp_vpd_dump_buffer[offset], i2c.len);
608
609		rc = copyout(&i2c, ifr_data_get_ptr(ifr), sizeof(i2c));
610		break;
611
612	case SIOCGPRIVATE_0:
613		rc = priv_check(curthread, PRIV_DRIVER);
614		if (rc != 0)
615			break;
616		rc = oce_handle_passthrough(ifp, data);
617		break;
618	default:
619		rc = ether_ioctl(ifp, command, data);
620		break;
621	}
622
623	return rc;
624}
625
626static void
627oce_init(void *arg)
628{
629	POCE_SOFTC sc = arg;
630
631	LOCK(&sc->dev_lock);
632
633	if (if_getflags(sc->ifp) & IFF_UP) {
634		oce_if_deactivate(sc);
635		oce_if_activate(sc);
636	}
637
638	UNLOCK(&sc->dev_lock);
639
640}
641
642static int
643oce_multiq_start(if_t ifp, struct mbuf *m)
644{
645	POCE_SOFTC sc = if_getsoftc(ifp);
646	struct oce_wq *wq = NULL;
647	int queue_index = 0;
648	int status = 0;
649
650	if (!sc->link_status)
651		return ENXIO;
652
653	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
654		queue_index = m->m_pkthdr.flowid % sc->nwqs;
655
656	wq = sc->wq[queue_index];
657
658	LOCK(&wq->tx_lock);
659	status = oce_multiq_transmit(ifp, m, wq);
660	UNLOCK(&wq->tx_lock);
661
662	return status;
663
664}
665
666static void
667oce_multiq_flush(if_t ifp)
668{
669	POCE_SOFTC sc = if_getsoftc(ifp);
670	struct mbuf     *m;
671	int i = 0;
672
673	for (i = 0; i < sc->nwqs; i++) {
674		while ((m = buf_ring_dequeue_sc(sc->wq[i]->br)) != NULL)
675			m_freem(m);
676	}
677	if_qflush(ifp);
678}
679
680/*****************************************************************************
681 *                   Driver interrupt routines functions                     *
682 *****************************************************************************/
683
684static void
685oce_intr(void *arg, int pending)
686{
687
688	POCE_INTR_INFO ii = (POCE_INTR_INFO) arg;
689	POCE_SOFTC sc = ii->sc;
690	struct oce_eq *eq = ii->eq;
691	struct oce_eqe *eqe;
692	struct oce_cq *cq = NULL;
693	int i, num_eqes = 0;
694
695	bus_dmamap_sync(eq->ring->dma.tag, eq->ring->dma.map,
696				 BUS_DMASYNC_POSTWRITE);
697	do {
698		eqe = RING_GET_CONSUMER_ITEM_VA(eq->ring, struct oce_eqe);
699		if (eqe->evnt == 0)
700			break;
701		eqe->evnt = 0;
702		bus_dmamap_sync(eq->ring->dma.tag, eq->ring->dma.map,
703					BUS_DMASYNC_POSTWRITE);
704		RING_GET(eq->ring, 1);
705		num_eqes++;
706
707	} while (TRUE);
708
709	if (!num_eqes)
710		goto eq_arm; /* Spurious */
711
712 	/* Clear EQ entries, but dont arm */
713	oce_arm_eq(sc, eq->eq_id, num_eqes, FALSE, FALSE);
714
715	/* Process TX, RX and MCC. But dont arm CQ*/
716	for (i = 0; i < eq->cq_valid; i++) {
717		cq = eq->cq[i];
718		(*cq->cq_handler)(cq->cb_arg);
719	}
720
721	/* Arm all cqs connected to this EQ */
722	for (i = 0; i < eq->cq_valid; i++) {
723		cq = eq->cq[i];
724		oce_arm_cq(sc, cq->cq_id, 0, TRUE);
725	}
726
727eq_arm:
728	oce_arm_eq(sc, eq->eq_id, 0, TRUE, FALSE);
729
730	return;
731}
732
733static int
734oce_setup_intr(POCE_SOFTC sc)
735{
736	int rc = 0, use_intx = 0;
737	int vector = 0, req_vectors = 0;
738	int tot_req_vectors, tot_vectors;
739
740	if (is_rss_enabled(sc))
741		req_vectors = MAX((sc->nrqs - 1), sc->nwqs);
742	else
743		req_vectors = 1;
744
745	tot_req_vectors = req_vectors;
746	if (sc->rdma_flags & OCE_RDMA_FLAG_SUPPORTED) {
747	  if (req_vectors > 1) {
748	    tot_req_vectors += OCE_RDMA_VECTORS;
749	    sc->roce_intr_count = OCE_RDMA_VECTORS;
750	  }
751	}
752
753        if (sc->flags & OCE_FLAGS_MSIX_CAPABLE) {
754		sc->intr_count = req_vectors;
755                tot_vectors = tot_req_vectors;
756		rc = pci_alloc_msix(sc->dev, &tot_vectors);
757		if (rc != 0) {
758			use_intx = 1;
759			pci_release_msi(sc->dev);
760		} else {
761		  if (sc->rdma_flags & OCE_RDMA_FLAG_SUPPORTED) {
762		    if (tot_vectors < tot_req_vectors) {
763		      if (sc->intr_count < (2 * OCE_RDMA_VECTORS)) {
764			sc->roce_intr_count = (tot_vectors / 2);
765		      }
766		      sc->intr_count = tot_vectors - sc->roce_intr_count;
767		    }
768		  } else {
769		    sc->intr_count = tot_vectors;
770		  }
771    		  sc->flags |= OCE_FLAGS_USING_MSIX;
772		}
773	} else
774		use_intx = 1;
775
776	if (use_intx)
777		sc->intr_count = 1;
778
779	/* Scale number of queues based on intr we got */
780	update_queues_got(sc);
781
782	if (use_intx) {
783		device_printf(sc->dev, "Using legacy interrupt\n");
784		rc = oce_alloc_intr(sc, vector, oce_intr);
785		if (rc)
786			goto error;
787	} else {
788		for (; vector < sc->intr_count; vector++) {
789			rc = oce_alloc_intr(sc, vector, oce_intr);
790			if (rc)
791				goto error;
792		}
793	}
794
795	return 0;
796error:
797	oce_intr_free(sc);
798	return rc;
799}
800
801static int
802oce_fast_isr(void *arg)
803{
804	POCE_INTR_INFO ii = (POCE_INTR_INFO) arg;
805	POCE_SOFTC sc = ii->sc;
806
807	if (ii->eq == NULL)
808		return FILTER_STRAY;
809
810	oce_arm_eq(sc, ii->eq->eq_id, 0, FALSE, TRUE);
811
812	taskqueue_enqueue(ii->tq, &ii->task);
813
814 	ii->eq->intr++;
815
816	return FILTER_HANDLED;
817}
818
819static int
820oce_alloc_intr(POCE_SOFTC sc, int vector, void (*isr) (void *arg, int pending))
821{
822	POCE_INTR_INFO ii;
823	int rc = 0, rr;
824
825	if (vector >= OCE_MAX_EQ)
826		return (EINVAL);
827
828	ii = &sc->intrs[vector];
829
830	/* Set the resource id for the interrupt.
831	 * MSIx is vector + 1 for the resource id,
832	 * INTx is 0 for the resource id.
833	 */
834	if (sc->flags & OCE_FLAGS_USING_MSIX)
835		rr = vector + 1;
836	else
837		rr = 0;
838	ii->intr_res = bus_alloc_resource_any(sc->dev,
839					      SYS_RES_IRQ,
840					      &rr, RF_ACTIVE|RF_SHAREABLE);
841	ii->irq_rr = rr;
842	if (ii->intr_res == NULL) {
843		device_printf(sc->dev,
844			  "Could not allocate interrupt\n");
845		rc = ENXIO;
846		return rc;
847	}
848
849	TASK_INIT(&ii->task, 0, isr, ii);
850	ii->vector = vector;
851	sprintf(ii->task_name, "oce_task[%d]", ii->vector);
852	ii->tq = taskqueue_create_fast(ii->task_name,
853			M_NOWAIT,
854			taskqueue_thread_enqueue,
855			&ii->tq);
856	taskqueue_start_threads(&ii->tq, 1, PI_NET, "%s taskq",
857			device_get_nameunit(sc->dev));
858
859	ii->sc = sc;
860	rc = bus_setup_intr(sc->dev,
861			ii->intr_res,
862			INTR_TYPE_NET,
863			oce_fast_isr, NULL, ii, &ii->tag);
864	return rc;
865
866}
867
868void
869oce_intr_free(POCE_SOFTC sc)
870{
871	int i = 0;
872
873	for (i = 0; i < sc->intr_count; i++) {
874
875		if (sc->intrs[i].tag != NULL)
876			bus_teardown_intr(sc->dev, sc->intrs[i].intr_res,
877						sc->intrs[i].tag);
878		if (sc->intrs[i].tq != NULL)
879			taskqueue_free(sc->intrs[i].tq);
880
881		if (sc->intrs[i].intr_res != NULL)
882			bus_release_resource(sc->dev, SYS_RES_IRQ,
883						sc->intrs[i].irq_rr,
884						sc->intrs[i].intr_res);
885		sc->intrs[i].tag = NULL;
886		sc->intrs[i].intr_res = NULL;
887	}
888
889	if (sc->flags & OCE_FLAGS_USING_MSIX)
890		pci_release_msi(sc->dev);
891
892}
893
894/******************************************************************************
895*			  Media callbacks functions 			      *
896******************************************************************************/
897
898static void
899oce_media_status(if_t ifp, struct ifmediareq *req)
900{
901	POCE_SOFTC sc = (POCE_SOFTC) if_getsoftc(ifp);
902
903	req->ifm_status = IFM_AVALID;
904	req->ifm_active = IFM_ETHER;
905
906	if (sc->link_status == 1)
907		req->ifm_status |= IFM_ACTIVE;
908	else
909		return;
910
911	switch (sc->link_speed) {
912	case 1: /* 10 Mbps */
913		req->ifm_active |= IFM_10_T | IFM_FDX;
914		sc->speed = 10;
915		break;
916	case 2: /* 100 Mbps */
917		req->ifm_active |= IFM_100_TX | IFM_FDX;
918		sc->speed = 100;
919		break;
920	case 3: /* 1 Gbps */
921		req->ifm_active |= IFM_1000_T | IFM_FDX;
922		sc->speed = 1000;
923		break;
924	case 4: /* 10 Gbps */
925		req->ifm_active |= IFM_10G_SR | IFM_FDX;
926		sc->speed = 10000;
927		break;
928	case 5: /* 20 Gbps */
929		req->ifm_active |= IFM_10G_SR | IFM_FDX;
930		sc->speed = 20000;
931		break;
932	case 6: /* 25 Gbps */
933		req->ifm_active |= IFM_10G_SR | IFM_FDX;
934		sc->speed = 25000;
935		break;
936	case 7: /* 40 Gbps */
937		req->ifm_active |= IFM_40G_SR4 | IFM_FDX;
938		sc->speed = 40000;
939		break;
940	default:
941		sc->speed = 0;
942		break;
943	}
944
945	return;
946}
947
948int
949oce_media_change(if_t ifp)
950{
951	return 0;
952}
953
954static void oce_is_pkt_dest_bmc(POCE_SOFTC sc,
955				struct mbuf *m, boolean_t *os2bmc,
956				struct mbuf **m_new)
957{
958	struct ether_header *eh = NULL;
959
960	eh = mtod(m, struct ether_header *);
961
962	if (!is_os2bmc_enabled(sc) || *os2bmc) {
963		*os2bmc = FALSE;
964		goto done;
965	}
966	if (!ETHER_IS_MULTICAST(eh->ether_dhost))
967		goto done;
968
969	if (is_mc_allowed_on_bmc(sc, eh) ||
970	    is_bc_allowed_on_bmc(sc, eh) ||
971	    is_arp_allowed_on_bmc(sc, ntohs(eh->ether_type))) {
972		*os2bmc = TRUE;
973		goto done;
974	}
975
976	if (mtod(m, struct ip *)->ip_p == IPPROTO_IPV6) {
977		struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
978		uint8_t nexthdr = ip6->ip6_nxt;
979		if (nexthdr == IPPROTO_ICMPV6) {
980			struct icmp6_hdr *icmp6 = (struct icmp6_hdr *)(ip6 + 1);
981			switch (icmp6->icmp6_type) {
982			case ND_ROUTER_ADVERT:
983				*os2bmc = is_ipv6_ra_filt_enabled(sc);
984				goto done;
985			case ND_NEIGHBOR_ADVERT:
986				*os2bmc = is_ipv6_na_filt_enabled(sc);
987				goto done;
988			default:
989				break;
990			}
991		}
992	}
993
994	if (mtod(m, struct ip *)->ip_p == IPPROTO_UDP) {
995		struct ip *ip = mtod(m, struct ip *);
996		int iphlen = ip->ip_hl << 2;
997		struct udphdr *uh = (struct udphdr *)((caddr_t)ip + iphlen);
998		switch (uh->uh_dport) {
999		case DHCP_CLIENT_PORT:
1000			*os2bmc = is_dhcp_client_filt_enabled(sc);
1001			goto done;
1002		case DHCP_SERVER_PORT:
1003			*os2bmc = is_dhcp_srvr_filt_enabled(sc);
1004			goto done;
1005		case NET_BIOS_PORT1:
1006		case NET_BIOS_PORT2:
1007			*os2bmc = is_nbios_filt_enabled(sc);
1008			goto done;
1009		case DHCPV6_RAS_PORT:
1010			*os2bmc = is_ipv6_ras_filt_enabled(sc);
1011			goto done;
1012		default:
1013			break;
1014		}
1015	}
1016done:
1017	if (*os2bmc) {
1018		*m_new = m_dup(m, M_NOWAIT);
1019		if (!*m_new) {
1020			*os2bmc = FALSE;
1021			return;
1022		}
1023		*m_new = oce_insert_vlan_tag(sc, *m_new, NULL);
1024	}
1025}
1026
1027/*****************************************************************************
1028 *			  Transmit routines functions			     *
1029 *****************************************************************************/
1030
1031static int
1032oce_tx(POCE_SOFTC sc, struct mbuf **mpp, int wq_index)
1033{
1034	int rc = 0, i, retry_cnt = 0;
1035	bus_dma_segment_t segs[OCE_MAX_TX_ELEMENTS];
1036	struct mbuf *m, *m_temp, *m_new = NULL;
1037	struct oce_wq *wq = sc->wq[wq_index];
1038	struct oce_packet_desc *pd;
1039	struct oce_nic_hdr_wqe *nichdr;
1040	struct oce_nic_frag_wqe *nicfrag;
1041	struct ether_header *eh = NULL;
1042	int num_wqes;
1043	uint32_t reg_value;
1044	boolean_t complete = TRUE;
1045	boolean_t os2bmc = FALSE;
1046
1047	m = *mpp;
1048	if (!m)
1049		return EINVAL;
1050
1051	if (!(m->m_flags & M_PKTHDR)) {
1052		rc = ENXIO;
1053		goto free_ret;
1054	}
1055
1056	/* Don't allow non-TSO packets longer than MTU */
1057	if (!is_tso_pkt(m)) {
1058		eh = mtod(m, struct ether_header *);
1059		if(m->m_pkthdr.len > ETHER_MAX_FRAME(sc->ifp, eh->ether_type, FALSE))
1060			 goto free_ret;
1061	}
1062
1063	if(oce_tx_asic_stall_verify(sc, m)) {
1064		m = oce_insert_vlan_tag(sc, m, &complete);
1065		if(!m) {
1066			device_printf(sc->dev, "Insertion unsuccessful\n");
1067			return 0;
1068		}
1069	}
1070
1071	/* Lancer, SH ASIC has a bug wherein Packets that are 32 bytes or less
1072	 * may cause a transmit stall on that port. So the work-around is to
1073	 * pad short packets (<= 32 bytes) to a 36-byte length.
1074	*/
1075	if(IS_SH(sc) || IS_XE201(sc) ) {
1076		if(m->m_pkthdr.len <= 32) {
1077			char buf[36];
1078			bzero((void *)buf, 36);
1079			m_append(m, (36 - m->m_pkthdr.len), buf);
1080		}
1081	}
1082
1083tx_start:
1084	if (m->m_pkthdr.csum_flags & CSUM_TSO) {
1085		/* consolidate packet buffers for TSO/LSO segment offload */
1086#if defined(INET6) || defined(INET)
1087		m = oce_tso_setup(sc, mpp);
1088#else
1089		m = NULL;
1090#endif
1091		if (m == NULL) {
1092			rc = ENXIO;
1093			goto free_ret;
1094		}
1095	}
1096
1097	pd = &wq->pckts[wq->pkt_desc_head];
1098
1099retry:
1100	rc = bus_dmamap_load_mbuf_sg(wq->tag,
1101				     pd->map,
1102				     m, segs, &pd->nsegs, BUS_DMA_NOWAIT);
1103	if (rc == 0) {
1104		num_wqes = pd->nsegs + 1;
1105		if (IS_BE(sc) || IS_SH(sc)) {
1106			/*Dummy required only for BE3.*/
1107			if (num_wqes & 1)
1108				num_wqes++;
1109		}
1110		if (num_wqes >= RING_NUM_FREE(wq->ring)) {
1111			bus_dmamap_unload(wq->tag, pd->map);
1112			return EBUSY;
1113		}
1114		atomic_store_rel_int(&wq->pkt_desc_head,
1115				     (wq->pkt_desc_head + 1) % \
1116				      OCE_WQ_PACKET_ARRAY_SIZE);
1117		bus_dmamap_sync(wq->tag, pd->map, BUS_DMASYNC_PREWRITE);
1118		pd->mbuf = m;
1119
1120		nichdr =
1121		    RING_GET_PRODUCER_ITEM_VA(wq->ring, struct oce_nic_hdr_wqe);
1122		nichdr->u0.dw[0] = 0;
1123		nichdr->u0.dw[1] = 0;
1124		nichdr->u0.dw[2] = 0;
1125		nichdr->u0.dw[3] = 0;
1126
1127		nichdr->u0.s.complete = complete;
1128		nichdr->u0.s.mgmt = os2bmc;
1129		nichdr->u0.s.event = 1;
1130		nichdr->u0.s.crc = 1;
1131		nichdr->u0.s.forward = 0;
1132		nichdr->u0.s.ipcs = (m->m_pkthdr.csum_flags & CSUM_IP) ? 1 : 0;
1133		nichdr->u0.s.udpcs =
1134			(m->m_pkthdr.csum_flags & CSUM_UDP) ? 1 : 0;
1135		nichdr->u0.s.tcpcs =
1136			(m->m_pkthdr.csum_flags & CSUM_TCP) ? 1 : 0;
1137		nichdr->u0.s.num_wqe = num_wqes;
1138		nichdr->u0.s.total_length = m->m_pkthdr.len;
1139
1140		if (m->m_flags & M_VLANTAG) {
1141			nichdr->u0.s.vlan = 1; /*Vlan present*/
1142			nichdr->u0.s.vlan_tag = m->m_pkthdr.ether_vtag;
1143		}
1144
1145		if (m->m_pkthdr.csum_flags & CSUM_TSO) {
1146			if (m->m_pkthdr.tso_segsz) {
1147				nichdr->u0.s.lso = 1;
1148				nichdr->u0.s.lso_mss  = m->m_pkthdr.tso_segsz;
1149			}
1150			if (!IS_BE(sc) || !IS_SH(sc))
1151				nichdr->u0.s.ipcs = 1;
1152		}
1153
1154		RING_PUT(wq->ring, 1);
1155		atomic_add_int(&wq->ring->num_used, 1);
1156
1157		for (i = 0; i < pd->nsegs; i++) {
1158			nicfrag =
1159			    RING_GET_PRODUCER_ITEM_VA(wq->ring,
1160						      struct oce_nic_frag_wqe);
1161			nicfrag->u0.s.rsvd0 = 0;
1162			nicfrag->u0.s.frag_pa_hi = ADDR_HI(segs[i].ds_addr);
1163			nicfrag->u0.s.frag_pa_lo = ADDR_LO(segs[i].ds_addr);
1164			nicfrag->u0.s.frag_len = segs[i].ds_len;
1165			pd->wqe_idx = wq->ring->pidx;
1166			RING_PUT(wq->ring, 1);
1167			atomic_add_int(&wq->ring->num_used, 1);
1168		}
1169		if (num_wqes > (pd->nsegs + 1)) {
1170			nicfrag =
1171			    RING_GET_PRODUCER_ITEM_VA(wq->ring,
1172						      struct oce_nic_frag_wqe);
1173			nicfrag->u0.dw[0] = 0;
1174			nicfrag->u0.dw[1] = 0;
1175			nicfrag->u0.dw[2] = 0;
1176			nicfrag->u0.dw[3] = 0;
1177			pd->wqe_idx = wq->ring->pidx;
1178			RING_PUT(wq->ring, 1);
1179			atomic_add_int(&wq->ring->num_used, 1);
1180			pd->nsegs++;
1181		}
1182
1183		if_inc_counter(sc->ifp, IFCOUNTER_OPACKETS, 1);
1184		wq->tx_stats.tx_reqs++;
1185		wq->tx_stats.tx_wrbs += num_wqes;
1186		wq->tx_stats.tx_bytes += m->m_pkthdr.len;
1187		wq->tx_stats.tx_pkts++;
1188
1189		bus_dmamap_sync(wq->ring->dma.tag, wq->ring->dma.map,
1190				BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
1191		reg_value = (num_wqes << 16) | wq->wq_id;
1192
1193		/* if os2bmc is not enabled or if the pkt is already tagged as
1194		   bmc, do nothing
1195		 */
1196		oce_is_pkt_dest_bmc(sc, m, &os2bmc, &m_new);
1197
1198		if_inc_counter(sc->ifp, IFCOUNTER_OBYTES, m->m_pkthdr.len);
1199		if (m->m_flags & M_MCAST)
1200			if_inc_counter(sc->ifp, IFCOUNTER_OMCASTS, 1);
1201		ETHER_BPF_MTAP(sc->ifp, m);
1202
1203		OCE_WRITE_REG32(sc, db, wq->db_offset, reg_value);
1204
1205	} else if (rc == EFBIG)	{
1206		if (retry_cnt == 0) {
1207			m_temp = m_defrag(m, M_NOWAIT);
1208			if (m_temp == NULL)
1209				goto free_ret;
1210			m = m_temp;
1211			*mpp = m_temp;
1212			retry_cnt = retry_cnt + 1;
1213			goto retry;
1214		} else
1215			goto free_ret;
1216	} else if (rc == ENOMEM)
1217		return rc;
1218	else
1219		goto free_ret;
1220
1221	if (os2bmc) {
1222		m = m_new;
1223		goto tx_start;
1224	}
1225
1226	return 0;
1227
1228free_ret:
1229	m_freem(*mpp);
1230	*mpp = NULL;
1231	return rc;
1232}
1233
1234static void
1235oce_process_tx_completion(struct oce_wq *wq)
1236{
1237	struct oce_packet_desc *pd;
1238	POCE_SOFTC sc = (POCE_SOFTC) wq->parent;
1239	struct mbuf *m;
1240
1241	pd = &wq->pckts[wq->pkt_desc_tail];
1242	atomic_store_rel_int(&wq->pkt_desc_tail,
1243			     (wq->pkt_desc_tail + 1) % OCE_WQ_PACKET_ARRAY_SIZE);
1244	atomic_subtract_int(&wq->ring->num_used, pd->nsegs + 1);
1245	bus_dmamap_sync(wq->tag, pd->map, BUS_DMASYNC_POSTWRITE);
1246	bus_dmamap_unload(wq->tag, pd->map);
1247
1248	m = pd->mbuf;
1249	m_freem(m);
1250	pd->mbuf = NULL;
1251
1252	if (if_getdrvflags(sc->ifp) & IFF_DRV_OACTIVE) {
1253		if (wq->ring->num_used < (wq->ring->num_items / 2)) {
1254			if_setdrvflagbits(sc->ifp, 0, (IFF_DRV_OACTIVE));
1255			oce_tx_restart(sc, wq);
1256		}
1257	}
1258}
1259
1260static void
1261oce_tx_restart(POCE_SOFTC sc, struct oce_wq *wq)
1262{
1263
1264	if ((if_getdrvflags(sc->ifp) & IFF_DRV_RUNNING) != IFF_DRV_RUNNING)
1265		return;
1266
1267	if (!drbr_empty(sc->ifp, wq->br))
1268		taskqueue_enqueue(taskqueue_swi, &wq->txtask);
1269
1270}
1271
1272#if defined(INET6) || defined(INET)
1273static struct mbuf *
1274oce_tso_setup(POCE_SOFTC sc, struct mbuf **mpp)
1275{
1276	struct mbuf *m;
1277#ifdef INET
1278	struct ip *ip;
1279#endif
1280#ifdef INET6
1281	struct ip6_hdr *ip6;
1282#endif
1283	struct ether_vlan_header *eh;
1284	struct tcphdr *th;
1285	uint16_t etype;
1286	int total_len = 0, ehdrlen = 0;
1287
1288	m = *mpp;
1289
1290	if (M_WRITABLE(m) == 0) {
1291		m = m_dup(*mpp, M_NOWAIT);
1292		if (!m)
1293			return NULL;
1294		m_freem(*mpp);
1295		*mpp = m;
1296	}
1297
1298	eh = mtod(m, struct ether_vlan_header *);
1299	if (eh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
1300		etype = ntohs(eh->evl_proto);
1301		ehdrlen = ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN;
1302	} else {
1303		etype = ntohs(eh->evl_encap_proto);
1304		ehdrlen = ETHER_HDR_LEN;
1305	}
1306
1307	switch (etype) {
1308#ifdef INET
1309	case ETHERTYPE_IP:
1310		ip = (struct ip *)(m->m_data + ehdrlen);
1311		if (ip->ip_p != IPPROTO_TCP)
1312			return NULL;
1313		th = (struct tcphdr *)((caddr_t)ip + (ip->ip_hl << 2));
1314
1315		total_len = ehdrlen + (ip->ip_hl << 2) + (th->th_off << 2);
1316		break;
1317#endif
1318#ifdef INET6
1319	case ETHERTYPE_IPV6:
1320		ip6 = (struct ip6_hdr *)(m->m_data + ehdrlen);
1321		if (ip6->ip6_nxt != IPPROTO_TCP)
1322			return NULL;
1323		th = (struct tcphdr *)((caddr_t)ip6 + sizeof(struct ip6_hdr));
1324
1325		total_len = ehdrlen + sizeof(struct ip6_hdr) + (th->th_off << 2);
1326		break;
1327#endif
1328	default:
1329		return NULL;
1330	}
1331
1332	m = m_pullup(m, total_len);
1333	*mpp = m;
1334	return m;
1335}
1336#endif /* INET6 || INET */
1337
1338void
1339oce_tx_task(void *arg, int npending)
1340{
1341	struct oce_wq *wq = arg;
1342	POCE_SOFTC sc = wq->parent;
1343	if_t ifp = sc->ifp;
1344	int rc = 0;
1345
1346	LOCK(&wq->tx_lock);
1347	rc = oce_multiq_transmit(ifp, NULL, wq);
1348	if (rc) {
1349		device_printf(sc->dev,
1350				"TX[%d] restart failed\n", wq->queue_index);
1351	}
1352	UNLOCK(&wq->tx_lock);
1353}
1354
1355void
1356oce_start(if_t ifp)
1357{
1358	POCE_SOFTC sc = if_getsoftc(ifp);
1359	struct mbuf *m;
1360	int rc = 0;
1361	int def_q = 0; /* Defualt tx queue is 0*/
1362
1363	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
1364			IFF_DRV_RUNNING)
1365		return;
1366
1367	if (!sc->link_status)
1368		return;
1369
1370	while (true) {
1371		m = if_dequeue(sc->ifp);
1372		if (m == NULL)
1373			break;
1374
1375		LOCK(&sc->wq[def_q]->tx_lock);
1376		rc = oce_tx(sc, &m, def_q);
1377		UNLOCK(&sc->wq[def_q]->tx_lock);
1378		if (rc) {
1379			if (m != NULL) {
1380				sc->wq[def_q]->tx_stats.tx_stops ++;
1381				if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0);
1382				if_sendq_prepend(ifp, m);
1383				m = NULL;
1384			}
1385			break;
1386		}
1387	}
1388}
1389
1390/* Handle the Completion Queue for transmit */
1391uint16_t
1392oce_wq_handler(void *arg)
1393{
1394	struct oce_wq *wq = (struct oce_wq *)arg;
1395	POCE_SOFTC sc = wq->parent;
1396	struct oce_cq *cq = wq->cq;
1397	struct oce_nic_tx_cqe *cqe;
1398	int num_cqes = 0;
1399
1400	LOCK(&wq->tx_compl_lock);
1401	bus_dmamap_sync(cq->ring->dma.tag,
1402			cq->ring->dma.map, BUS_DMASYNC_POSTWRITE);
1403	cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_nic_tx_cqe);
1404	while (cqe->u0.dw[3]) {
1405		DW_SWAP((uint32_t *) cqe, sizeof(oce_wq_cqe));
1406
1407		wq->ring->cidx = cqe->u0.s.wqe_index + 1;
1408		if (wq->ring->cidx >= wq->ring->num_items)
1409			wq->ring->cidx -= wq->ring->num_items;
1410
1411		oce_process_tx_completion(wq);
1412		wq->tx_stats.tx_compl++;
1413		cqe->u0.dw[3] = 0;
1414		RING_GET(cq->ring, 1);
1415		bus_dmamap_sync(cq->ring->dma.tag,
1416				cq->ring->dma.map, BUS_DMASYNC_POSTWRITE);
1417		cqe =
1418		    RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_nic_tx_cqe);
1419		num_cqes++;
1420	}
1421
1422	if (num_cqes)
1423		oce_arm_cq(sc, cq->cq_id, num_cqes, FALSE);
1424
1425	UNLOCK(&wq->tx_compl_lock);
1426	return num_cqes;
1427}
1428
1429static int
1430oce_multiq_transmit(if_t ifp, struct mbuf *m, struct oce_wq *wq)
1431{
1432	POCE_SOFTC sc = if_getsoftc(ifp);
1433	int status = 0, queue_index = 0;
1434	struct mbuf *next = NULL;
1435	struct buf_ring *br = NULL;
1436
1437	br  = wq->br;
1438	queue_index = wq->queue_index;
1439
1440	if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
1441		IFF_DRV_RUNNING) {
1442		if (m != NULL)
1443			status = drbr_enqueue(ifp, br, m);
1444		return status;
1445	}
1446
1447	if (m != NULL) {
1448		if ((status = drbr_enqueue(ifp, br, m)) != 0)
1449			return status;
1450	}
1451	while ((next = drbr_peek(ifp, br)) != NULL) {
1452		if (oce_tx(sc, &next, queue_index)) {
1453			if (next == NULL) {
1454				drbr_advance(ifp, br);
1455			} else {
1456				drbr_putback(ifp, br, next);
1457				wq->tx_stats.tx_stops ++;
1458				if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, 0);
1459			}
1460			break;
1461		}
1462		drbr_advance(ifp, br);
1463	}
1464
1465	return 0;
1466}
1467
1468/*****************************************************************************
1469 *			    Receive  routines functions 		     *
1470 *****************************************************************************/
1471
1472static void
1473oce_correct_header(struct mbuf *m, struct nic_hwlro_cqe_part1 *cqe1, struct nic_hwlro_cqe_part2 *cqe2)
1474{
1475	uint32_t *p;
1476        struct ether_header *eh = NULL;
1477        struct tcphdr *tcp_hdr = NULL;
1478        struct ip *ip4_hdr = NULL;
1479        struct ip6_hdr *ip6 = NULL;
1480        uint32_t payload_len = 0;
1481
1482        eh = mtod(m, struct ether_header *);
1483        /* correct IP header */
1484        if(!cqe2->ipv6_frame) {
1485		ip4_hdr = (struct ip *)((char*)eh + sizeof(struct ether_header));
1486                ip4_hdr->ip_ttl = cqe2->frame_lifespan;
1487                ip4_hdr->ip_len = htons(cqe2->coalesced_size - sizeof(struct ether_header));
1488                tcp_hdr = (struct tcphdr *)((char*)ip4_hdr + sizeof(struct ip));
1489        }else {
1490        	ip6 = (struct ip6_hdr *)((char*)eh + sizeof(struct ether_header));
1491                ip6->ip6_ctlun.ip6_un1.ip6_un1_hlim = cqe2->frame_lifespan;
1492                payload_len = cqe2->coalesced_size - sizeof(struct ether_header)
1493                                                - sizeof(struct ip6_hdr);
1494                ip6->ip6_ctlun.ip6_un1.ip6_un1_plen = htons(payload_len);
1495                tcp_hdr = (struct tcphdr *)((char*)ip6 + sizeof(struct ip6_hdr));
1496        }
1497
1498        /* correct tcp header */
1499        tcp_hdr->th_ack = htonl(cqe2->tcp_ack_num);
1500        if(cqe2->push) {
1501        	tcp_hdr->th_flags |= TH_PUSH;
1502        }
1503        tcp_hdr->th_win = htons(cqe2->tcp_window);
1504        tcp_hdr->th_sum = 0xffff;
1505        if(cqe2->ts_opt) {
1506                p = (uint32_t *)((char*)tcp_hdr + sizeof(struct tcphdr) + 2);
1507                *p = cqe1->tcp_timestamp_val;
1508                *(p+1) = cqe1->tcp_timestamp_ecr;
1509        }
1510
1511	return;
1512}
1513
1514static void
1515oce_rx_mbuf_chain(struct oce_rq *rq, struct oce_common_cqe_info *cqe_info, struct mbuf **m)
1516{
1517	POCE_SOFTC sc = (POCE_SOFTC) rq->parent;
1518        uint32_t i = 0, frag_len = 0;
1519	uint32_t len = cqe_info->pkt_size;
1520        struct oce_packet_desc *pd;
1521        struct mbuf *tail = NULL;
1522
1523        for (i = 0; i < cqe_info->num_frags; i++) {
1524                if (rq->ring->cidx == rq->ring->pidx) {
1525                        device_printf(sc->dev,
1526                                  "oce_rx_mbuf_chain: Invalid RX completion - Queue is empty\n");
1527                        return;
1528                }
1529                pd = &rq->pckts[rq->ring->cidx];
1530
1531                bus_dmamap_sync(rq->tag, pd->map, BUS_DMASYNC_POSTWRITE);
1532                bus_dmamap_unload(rq->tag, pd->map);
1533		RING_GET(rq->ring, 1);
1534                rq->pending--;
1535
1536                frag_len = (len > rq->cfg.frag_size) ? rq->cfg.frag_size : len;
1537                pd->mbuf->m_len = frag_len;
1538
1539                if (tail != NULL) {
1540                        /* additional fragments */
1541                        pd->mbuf->m_flags &= ~M_PKTHDR;
1542                        tail->m_next = pd->mbuf;
1543			if(rq->islro)
1544                        	tail->m_nextpkt = NULL;
1545                        tail = pd->mbuf;
1546                } else {
1547                        /* first fragment, fill out much of the packet header */
1548                        pd->mbuf->m_pkthdr.len = len;
1549			if(rq->islro)
1550                        	pd->mbuf->m_nextpkt = NULL;
1551                        pd->mbuf->m_pkthdr.csum_flags = 0;
1552                        if (IF_CSUM_ENABLED(sc)) {
1553                                if (cqe_info->l4_cksum_pass) {
1554                                        if(!cqe_info->ipv6_frame) { /* IPV4 */
1555                                                pd->mbuf->m_pkthdr.csum_flags |=
1556                                                        (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1557                                        }else { /* IPV6 frame */
1558						if(rq->islro) {
1559                                                	pd->mbuf->m_pkthdr.csum_flags |=
1560                                                        (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
1561						}
1562                                        }
1563                                        pd->mbuf->m_pkthdr.csum_data = 0xffff;
1564                                }
1565                                if (cqe_info->ip_cksum_pass) {
1566                                        pd->mbuf->m_pkthdr.csum_flags |=
1567                                               (CSUM_IP_CHECKED|CSUM_IP_VALID);
1568                                }
1569                        }
1570                        *m = tail = pd->mbuf;
1571               }
1572                pd->mbuf = NULL;
1573                len -= frag_len;
1574        }
1575
1576        return;
1577}
1578
1579static void
1580oce_rx_lro(struct oce_rq *rq, struct nic_hwlro_singleton_cqe *cqe, struct nic_hwlro_cqe_part2 *cqe2)
1581{
1582        POCE_SOFTC sc = (POCE_SOFTC) rq->parent;
1583        struct nic_hwlro_cqe_part1 *cqe1 = NULL;
1584        struct mbuf *m = NULL;
1585	struct oce_common_cqe_info cq_info;
1586
1587	/* parse cqe */
1588        if(cqe2 == NULL) {
1589                cq_info.pkt_size =  cqe->pkt_size;
1590                cq_info.vtag = cqe->vlan_tag;
1591                cq_info.l4_cksum_pass = cqe->l4_cksum_pass;
1592                cq_info.ip_cksum_pass = cqe->ip_cksum_pass;
1593                cq_info.ipv6_frame = cqe->ipv6_frame;
1594                cq_info.vtp = cqe->vtp;
1595                cq_info.qnq = cqe->qnq;
1596        }else {
1597                cqe1 = (struct nic_hwlro_cqe_part1 *)cqe;
1598                cq_info.pkt_size =  cqe2->coalesced_size;
1599                cq_info.vtag = cqe2->vlan_tag;
1600                cq_info.l4_cksum_pass = cqe2->l4_cksum_pass;
1601                cq_info.ip_cksum_pass = cqe2->ip_cksum_pass;
1602                cq_info.ipv6_frame = cqe2->ipv6_frame;
1603                cq_info.vtp = cqe2->vtp;
1604                cq_info.qnq = cqe1->qnq;
1605        }
1606
1607	cq_info.vtag = BSWAP_16(cq_info.vtag);
1608
1609        cq_info.num_frags = cq_info.pkt_size / rq->cfg.frag_size;
1610        if(cq_info.pkt_size % rq->cfg.frag_size)
1611                cq_info.num_frags++;
1612
1613	oce_rx_mbuf_chain(rq, &cq_info, &m);
1614
1615	if (m) {
1616		if(cqe2) {
1617			//assert(cqe2->valid != 0);
1618
1619			//assert(cqe2->cqe_type != 2);
1620			oce_correct_header(m, cqe1, cqe2);
1621		}
1622
1623		m->m_pkthdr.rcvif = sc->ifp;
1624		if (rq->queue_index)
1625			m->m_pkthdr.flowid = (rq->queue_index - 1);
1626		else
1627			m->m_pkthdr.flowid = rq->queue_index;
1628		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
1629
1630		/* This deternies if vlan tag is Valid */
1631		if (cq_info.vtp) {
1632			if (sc->function_mode & FNM_FLEX10_MODE) {
1633				/* FLEX10. If QnQ is not set, neglect VLAN */
1634				if (cq_info.qnq) {
1635					m->m_pkthdr.ether_vtag = cq_info.vtag;
1636					m->m_flags |= M_VLANTAG;
1637				}
1638			} else if (sc->pvid != (cq_info.vtag & VLAN_VID_MASK))  {
1639				/* In UMC mode generally pvid will be striped by
1640				   hw. But in some cases we have seen it comes
1641				   with pvid. So if pvid == vlan, neglect vlan.
1642				 */
1643				m->m_pkthdr.ether_vtag = cq_info.vtag;
1644				m->m_flags |= M_VLANTAG;
1645			}
1646		}
1647		if_inc_counter(sc->ifp, IFCOUNTER_IPACKETS, 1);
1648
1649		if_input(sc->ifp, m);
1650
1651		/* Update rx stats per queue */
1652		rq->rx_stats.rx_pkts++;
1653		rq->rx_stats.rx_bytes += cq_info.pkt_size;
1654		rq->rx_stats.rx_frags += cq_info.num_frags;
1655		rq->rx_stats.rx_ucast_pkts++;
1656	}
1657        return;
1658}
1659
1660static void
1661oce_rx(struct oce_rq *rq, struct oce_nic_rx_cqe *cqe)
1662{
1663	POCE_SOFTC sc = (POCE_SOFTC) rq->parent;
1664	int len;
1665	struct mbuf *m = NULL;
1666	struct oce_common_cqe_info cq_info;
1667	uint16_t vtag = 0;
1668
1669	/* Is it a flush compl that has no data */
1670	if(!cqe->u0.s.num_fragments)
1671		goto exit;
1672
1673	len = cqe->u0.s.pkt_size;
1674	if (!len) {
1675		/*partial DMA workaround for Lancer*/
1676		oce_discard_rx_comp(rq, cqe->u0.s.num_fragments);
1677		goto exit;
1678	}
1679
1680	if (!oce_cqe_portid_valid(sc, cqe)) {
1681		oce_discard_rx_comp(rq, cqe->u0.s.num_fragments);
1682		goto exit;
1683	}
1684
1685	 /* Get vlan_tag value */
1686	if(IS_BE(sc) || IS_SH(sc))
1687		vtag = BSWAP_16(cqe->u0.s.vlan_tag);
1688	else
1689		vtag = cqe->u0.s.vlan_tag;
1690
1691	cq_info.l4_cksum_pass = cqe->u0.s.l4_cksum_pass;
1692	cq_info.ip_cksum_pass = cqe->u0.s.ip_cksum_pass;
1693	cq_info.ipv6_frame = cqe->u0.s.ip_ver;
1694	cq_info.num_frags = cqe->u0.s.num_fragments;
1695	cq_info.pkt_size = cqe->u0.s.pkt_size;
1696
1697	oce_rx_mbuf_chain(rq, &cq_info, &m);
1698
1699	if (m) {
1700		m->m_pkthdr.rcvif = sc->ifp;
1701		if (rq->queue_index)
1702			m->m_pkthdr.flowid = (rq->queue_index - 1);
1703		else
1704			m->m_pkthdr.flowid = rq->queue_index;
1705		M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
1706
1707		/* This deternies if vlan tag is Valid */
1708		if (oce_cqe_vtp_valid(sc, cqe)) {
1709			if (sc->function_mode & FNM_FLEX10_MODE) {
1710				/* FLEX10. If QnQ is not set, neglect VLAN */
1711				if (cqe->u0.s.qnq) {
1712					m->m_pkthdr.ether_vtag = vtag;
1713					m->m_flags |= M_VLANTAG;
1714				}
1715			} else if (sc->pvid != (vtag & VLAN_VID_MASK))  {
1716				/* In UMC mode generally pvid will be striped by
1717				   hw. But in some cases we have seen it comes
1718				   with pvid. So if pvid == vlan, neglect vlan.
1719				*/
1720				m->m_pkthdr.ether_vtag = vtag;
1721				m->m_flags |= M_VLANTAG;
1722			}
1723		}
1724
1725		if_inc_counter(sc->ifp, IFCOUNTER_IPACKETS, 1);
1726#if defined(INET6) || defined(INET)
1727		/* Try to queue to LRO */
1728		if (IF_LRO_ENABLED(sc) &&
1729		    (cqe->u0.s.ip_cksum_pass) &&
1730		    (cqe->u0.s.l4_cksum_pass) &&
1731		    (!cqe->u0.s.ip_ver)       &&
1732		    (rq->lro.lro_cnt != 0)) {
1733			if (tcp_lro_rx(&rq->lro, m, 0) == 0) {
1734				rq->lro_pkts_queued ++;
1735				goto post_done;
1736			}
1737			/* If LRO posting fails then try to post to STACK */
1738		}
1739#endif
1740
1741		if_input(sc->ifp, m);
1742#if defined(INET6) || defined(INET)
1743post_done:
1744#endif
1745		/* Update rx stats per queue */
1746		rq->rx_stats.rx_pkts++;
1747		rq->rx_stats.rx_bytes += cqe->u0.s.pkt_size;
1748		rq->rx_stats.rx_frags += cqe->u0.s.num_fragments;
1749		if (cqe->u0.s.pkt_type == OCE_MULTICAST_PACKET)
1750			rq->rx_stats.rx_mcast_pkts++;
1751		if (cqe->u0.s.pkt_type == OCE_UNICAST_PACKET)
1752			rq->rx_stats.rx_ucast_pkts++;
1753	}
1754exit:
1755	return;
1756}
1757
1758void
1759oce_discard_rx_comp(struct oce_rq *rq, int num_frags)
1760{
1761	uint32_t i = 0;
1762	struct oce_packet_desc *pd;
1763	POCE_SOFTC sc = (POCE_SOFTC) rq->parent;
1764
1765	for (i = 0; i < num_frags; i++) {
1766                if (rq->ring->cidx == rq->ring->pidx) {
1767                        device_printf(sc->dev,
1768                                "oce_discard_rx_comp: Invalid RX completion - Queue is empty\n");
1769                        return;
1770                }
1771                pd = &rq->pckts[rq->ring->cidx];
1772                bus_dmamap_sync(rq->tag, pd->map, BUS_DMASYNC_POSTWRITE);
1773                bus_dmamap_unload(rq->tag, pd->map);
1774                if (pd->mbuf != NULL) {
1775                        m_freem(pd->mbuf);
1776                        pd->mbuf = NULL;
1777                }
1778
1779		RING_GET(rq->ring, 1);
1780                rq->pending--;
1781	}
1782}
1783
1784static int
1785oce_cqe_vtp_valid(POCE_SOFTC sc, struct oce_nic_rx_cqe *cqe)
1786{
1787	struct oce_nic_rx_cqe_v1 *cqe_v1;
1788	int vtp = 0;
1789
1790	if (sc->be3_native) {
1791		cqe_v1 = (struct oce_nic_rx_cqe_v1 *)cqe;
1792		vtp =  cqe_v1->u0.s.vlan_tag_present;
1793	} else
1794		vtp = cqe->u0.s.vlan_tag_present;
1795
1796	return vtp;
1797
1798}
1799
1800static int
1801oce_cqe_portid_valid(POCE_SOFTC sc, struct oce_nic_rx_cqe *cqe)
1802{
1803	struct oce_nic_rx_cqe_v1 *cqe_v1;
1804	int port_id = 0;
1805
1806	if (sc->be3_native && (IS_BE(sc) || IS_SH(sc))) {
1807		cqe_v1 = (struct oce_nic_rx_cqe_v1 *)cqe;
1808		port_id =  cqe_v1->u0.s.port;
1809		if (sc->port_id != port_id)
1810			return 0;
1811	} else
1812		;/* For BE3 legacy and Lancer this is dummy */
1813
1814	return 1;
1815
1816}
1817
1818#if defined(INET6) || defined(INET)
1819void
1820oce_rx_flush_lro(struct oce_rq *rq)
1821{
1822	struct lro_ctrl	*lro = &rq->lro;
1823	POCE_SOFTC sc = (POCE_SOFTC) rq->parent;
1824
1825	if (!IF_LRO_ENABLED(sc))
1826		return;
1827
1828	tcp_lro_flush_all(lro);
1829	rq->lro_pkts_queued = 0;
1830
1831	return;
1832}
1833
1834static int
1835oce_init_lro(POCE_SOFTC sc)
1836{
1837	struct lro_ctrl *lro = NULL;
1838	int i = 0, rc = 0;
1839
1840	for (i = 0; i < sc->nrqs; i++) {
1841		lro = &sc->rq[i]->lro;
1842		rc = tcp_lro_init(lro);
1843		if (rc != 0) {
1844			device_printf(sc->dev, "LRO init failed\n");
1845			return rc;
1846		}
1847		lro->ifp = sc->ifp;
1848	}
1849
1850	return rc;
1851}
1852
1853void
1854oce_free_lro(POCE_SOFTC sc)
1855{
1856	struct lro_ctrl *lro = NULL;
1857	int i = 0;
1858
1859	for (i = 0; i < sc->nrqs; i++) {
1860		lro = &sc->rq[i]->lro;
1861		if (lro)
1862			tcp_lro_free(lro);
1863	}
1864}
1865#endif
1866
1867int
1868oce_alloc_rx_bufs(struct oce_rq *rq, int count)
1869{
1870	POCE_SOFTC sc = (POCE_SOFTC) rq->parent;
1871	int i, rc;
1872	struct oce_packet_desc *pd;
1873	bus_dma_segment_t segs[6];
1874	int nsegs, added = 0;
1875	struct oce_nic_rqe *rqe;
1876	pd_rxulp_db_t rxdb_reg;
1877	uint32_t val = 0;
1878	uint32_t oce_max_rq_posts = 64;
1879
1880	bzero(&rxdb_reg, sizeof(pd_rxulp_db_t));
1881	for (i = 0; i < count; i++) {
1882		pd = &rq->pckts[rq->ring->pidx];
1883		pd->mbuf = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, oce_rq_buf_size);
1884		if (pd->mbuf == NULL) {
1885			device_printf(sc->dev, "mbuf allocation failed, size = %d\n",oce_rq_buf_size);
1886			break;
1887		}
1888		pd->mbuf->m_nextpkt = NULL;
1889
1890		pd->mbuf->m_len = pd->mbuf->m_pkthdr.len = rq->cfg.frag_size;
1891
1892		rc = bus_dmamap_load_mbuf_sg(rq->tag,
1893					     pd->map,
1894					     pd->mbuf,
1895					     segs, &nsegs, BUS_DMA_NOWAIT);
1896		if (rc) {
1897			m_free(pd->mbuf);
1898			device_printf(sc->dev, "bus_dmamap_load_mbuf_sg failed rc = %d\n", rc);
1899			break;
1900		}
1901
1902		if (nsegs != 1) {
1903			i--;
1904			continue;
1905		}
1906
1907		bus_dmamap_sync(rq->tag, pd->map, BUS_DMASYNC_PREREAD);
1908
1909		rqe = RING_GET_PRODUCER_ITEM_VA(rq->ring, struct oce_nic_rqe);
1910		rqe->u0.s.frag_pa_hi = ADDR_HI(segs[0].ds_addr);
1911		rqe->u0.s.frag_pa_lo = ADDR_LO(segs[0].ds_addr);
1912		DW_SWAP(u32ptr(rqe), sizeof(struct oce_nic_rqe));
1913		RING_PUT(rq->ring, 1);
1914		added++;
1915		rq->pending++;
1916	}
1917	oce_max_rq_posts = sc->enable_hwlro ? OCE_HWLRO_MAX_RQ_POSTS : OCE_MAX_RQ_POSTS;
1918	if (added != 0) {
1919		for (i = added / oce_max_rq_posts; i > 0; i--) {
1920			rxdb_reg.bits.num_posted = oce_max_rq_posts;
1921			rxdb_reg.bits.qid = rq->rq_id;
1922			if(rq->islro) {
1923                                val |= rq->rq_id & DB_LRO_RQ_ID_MASK;
1924                                val |= oce_max_rq_posts << 16;
1925                                OCE_WRITE_REG32(sc, db, DB_OFFSET, val);
1926			}else {
1927				OCE_WRITE_REG32(sc, db, PD_RXULP_DB, rxdb_reg.dw0);
1928			}
1929			added -= oce_max_rq_posts;
1930		}
1931		if (added > 0) {
1932			rxdb_reg.bits.qid = rq->rq_id;
1933			rxdb_reg.bits.num_posted = added;
1934			if(rq->islro) {
1935                                val |= rq->rq_id & DB_LRO_RQ_ID_MASK;
1936                                val |= added << 16;
1937                                OCE_WRITE_REG32(sc, db, DB_OFFSET, val);
1938			}else {
1939				OCE_WRITE_REG32(sc, db, PD_RXULP_DB, rxdb_reg.dw0);
1940			}
1941		}
1942	}
1943
1944	return 0;
1945}
1946
1947static void
1948oce_check_rx_bufs(POCE_SOFTC sc, uint32_t num_cqes, struct oce_rq *rq)
1949{
1950        if (num_cqes) {
1951                oce_arm_cq(sc, rq->cq->cq_id, num_cqes, FALSE);
1952		if(!sc->enable_hwlro) {
1953			if((OCE_RQ_PACKET_ARRAY_SIZE - rq->pending) > 1)
1954				oce_alloc_rx_bufs(rq, ((OCE_RQ_PACKET_ARRAY_SIZE - rq->pending) - 1));
1955		}else {
1956                	if ((OCE_RQ_PACKET_ARRAY_SIZE -1 - rq->pending) > 64)
1957                        	oce_alloc_rx_bufs(rq, 64);
1958        	}
1959	}
1960
1961        return;
1962}
1963
1964uint16_t
1965oce_rq_handler_lro(void *arg)
1966{
1967        struct oce_rq *rq = (struct oce_rq *)arg;
1968        struct oce_cq *cq = rq->cq;
1969        POCE_SOFTC sc = rq->parent;
1970        struct nic_hwlro_singleton_cqe *cqe;
1971        struct nic_hwlro_cqe_part2 *cqe2;
1972        int num_cqes = 0;
1973
1974	LOCK(&rq->rx_lock);
1975        bus_dmamap_sync(cq->ring->dma.tag,cq->ring->dma.map, BUS_DMASYNC_POSTWRITE);
1976        cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct nic_hwlro_singleton_cqe);
1977        while (cqe->valid) {
1978                if(cqe->cqe_type == 0) { /* singleton cqe */
1979			/* we should not get singleton cqe after cqe1 on same rq */
1980			if(rq->cqe_firstpart != NULL) {
1981				device_printf(sc->dev, "Got singleton cqe after cqe1 \n");
1982				goto exit_rq_handler_lro;
1983			}
1984                        if(cqe->error != 0) {
1985                                rq->rx_stats.rxcp_err++;
1986				if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, 1);
1987                        }
1988                        oce_rx_lro(rq, cqe, NULL);
1989                        rq->rx_stats.rx_compl++;
1990                        cqe->valid = 0;
1991                        RING_GET(cq->ring, 1);
1992                        num_cqes++;
1993                        if (num_cqes >= (IS_XE201(sc) ? 8 : oce_max_rsp_handled))
1994                                break;
1995                }else if(cqe->cqe_type == 0x1) { /* first part */
1996			/* we should not get cqe1 after cqe1 on same rq */
1997			if(rq->cqe_firstpart != NULL) {
1998				device_printf(sc->dev, "Got cqe1 after cqe1 \n");
1999				goto exit_rq_handler_lro;
2000			}
2001			rq->cqe_firstpart = (struct nic_hwlro_cqe_part1 *)cqe;
2002                        RING_GET(cq->ring, 1);
2003                }else if(cqe->cqe_type == 0x2) { /* second part */
2004			cqe2 = (struct nic_hwlro_cqe_part2 *)cqe;
2005                        if(cqe2->error != 0) {
2006                                rq->rx_stats.rxcp_err++;
2007				if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, 1);
2008                        }
2009			/* We should not get cqe2 without cqe1 */
2010			if(rq->cqe_firstpart == NULL) {
2011				device_printf(sc->dev, "Got cqe2 without cqe1 \n");
2012				goto exit_rq_handler_lro;
2013			}
2014                        oce_rx_lro(rq, (struct nic_hwlro_singleton_cqe *)rq->cqe_firstpart, cqe2);
2015
2016                        rq->rx_stats.rx_compl++;
2017                        rq->cqe_firstpart->valid = 0;
2018                        cqe2->valid = 0;
2019			rq->cqe_firstpart = NULL;
2020
2021                        RING_GET(cq->ring, 1);
2022                        num_cqes += 2;
2023                        if (num_cqes >= (IS_XE201(sc) ? 8 : oce_max_rsp_handled))
2024                                break;
2025		}
2026
2027                bus_dmamap_sync(cq->ring->dma.tag,cq->ring->dma.map, BUS_DMASYNC_POSTWRITE);
2028                cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct nic_hwlro_singleton_cqe);
2029        }
2030	oce_check_rx_bufs(sc, num_cqes, rq);
2031exit_rq_handler_lro:
2032	UNLOCK(&rq->rx_lock);
2033	return 0;
2034}
2035
2036/* Handle the Completion Queue for receive */
2037uint16_t
2038oce_rq_handler(void *arg)
2039{
2040	struct epoch_tracker et;
2041	struct oce_rq *rq = (struct oce_rq *)arg;
2042	struct oce_cq *cq = rq->cq;
2043	POCE_SOFTC sc = rq->parent;
2044	struct oce_nic_rx_cqe *cqe;
2045	int num_cqes = 0;
2046
2047	NET_EPOCH_ENTER(et);
2048	if(rq->islro) {
2049		oce_rq_handler_lro(arg);
2050		NET_EPOCH_EXIT(et);
2051		return 0;
2052	}
2053	LOCK(&rq->rx_lock);
2054	bus_dmamap_sync(cq->ring->dma.tag,
2055			cq->ring->dma.map, BUS_DMASYNC_POSTWRITE);
2056	cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_nic_rx_cqe);
2057	while (cqe->u0.dw[2]) {
2058		DW_SWAP((uint32_t *) cqe, sizeof(oce_rq_cqe));
2059
2060		if (cqe->u0.s.error == 0) {
2061			oce_rx(rq, cqe);
2062		} else {
2063			rq->rx_stats.rxcp_err++;
2064			if_inc_counter(sc->ifp, IFCOUNTER_IERRORS, 1);
2065			/* Post L3/L4 errors to stack.*/
2066			oce_rx(rq, cqe);
2067		}
2068		rq->rx_stats.rx_compl++;
2069		cqe->u0.dw[2] = 0;
2070
2071#if defined(INET6) || defined(INET)
2072		if (IF_LRO_ENABLED(sc) && rq->lro_pkts_queued >= 16) {
2073			oce_rx_flush_lro(rq);
2074		}
2075#endif
2076
2077		RING_GET(cq->ring, 1);
2078		bus_dmamap_sync(cq->ring->dma.tag,
2079				cq->ring->dma.map, BUS_DMASYNC_POSTWRITE);
2080		cqe =
2081		    RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_nic_rx_cqe);
2082		num_cqes++;
2083		if (num_cqes >= (IS_XE201(sc) ? 8 : oce_max_rsp_handled))
2084			break;
2085	}
2086
2087#if defined(INET6) || defined(INET)
2088        if (IF_LRO_ENABLED(sc))
2089                oce_rx_flush_lro(rq);
2090#endif
2091
2092	oce_check_rx_bufs(sc, num_cqes, rq);
2093	UNLOCK(&rq->rx_lock);
2094	NET_EPOCH_EXIT(et);
2095	return 0;
2096
2097}
2098
2099/*****************************************************************************
2100 *		   Helper function prototypes in this file 		     *
2101 *****************************************************************************/
2102
2103static int
2104oce_attach_ifp(POCE_SOFTC sc)
2105{
2106
2107	sc->ifp = if_alloc(IFT_ETHER);
2108	if (!sc->ifp)
2109		return ENOMEM;
2110
2111	ifmedia_init(&sc->media, IFM_IMASK, oce_media_change, oce_media_status);
2112	ifmedia_add(&sc->media, IFM_ETHER | IFM_AUTO, 0, NULL);
2113	ifmedia_set(&sc->media, IFM_ETHER | IFM_AUTO);
2114
2115	if_setflags(sc->ifp, IFF_BROADCAST | IFF_MULTICAST);
2116	if_setioctlfn(sc->ifp, oce_ioctl);
2117	if_setstartfn(sc->ifp, oce_start);
2118	if_setinitfn(sc->ifp, oce_init);
2119	if_setmtu(sc->ifp, ETHERMTU);
2120	if_setsoftc(sc->ifp, sc);
2121	if_settransmitfn(sc->ifp, oce_multiq_start);
2122	if_setqflushfn(sc->ifp, oce_multiq_flush);
2123
2124	if_initname(sc->ifp,
2125		    device_get_name(sc->dev), device_get_unit(sc->dev));
2126
2127	if_setsendqlen(sc->ifp, OCE_MAX_TX_DESC - 1);
2128	if_setsendqready(sc->ifp);
2129
2130	if_sethwassist(sc->ifp, OCE_IF_HWASSIST);
2131	if_sethwassistbits(sc->ifp, CSUM_TSO, 0);
2132	if_sethwassistbits(sc->ifp, (CSUM_IP | CSUM_TCP | CSUM_UDP), 0);
2133
2134	if_setcapabilities(sc->ifp, OCE_IF_CAPABILITIES);
2135	if_setcapabilitiesbit(sc->ifp, IFCAP_HWCSUM, 0);
2136	if_setcapabilitiesbit(sc->ifp, IFCAP_VLAN_HWFILTER, 0);
2137
2138#if defined(INET6) || defined(INET)
2139	if_setcapabilitiesbit(sc->ifp, IFCAP_TSO, 0);
2140	if_setcapabilitiesbit(sc->ifp, IFCAP_LRO, 0);
2141	if_setcapabilitiesbit(sc->ifp, IFCAP_VLAN_HWTSO, 0);
2142#endif
2143
2144	if_setcapenable(sc->ifp, if_getcapabilities(sc->ifp));
2145	if_setbaudrate(sc->ifp, IF_Gbps(10));
2146
2147	if_sethwtsomax(sc->ifp, 65536 - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
2148	if_sethwtsomaxsegcount(sc->ifp, OCE_MAX_TX_ELEMENTS);
2149	if_sethwtsomaxsegsize(sc->ifp, 4096);
2150
2151	ether_ifattach(sc->ifp, sc->macaddr.mac_addr);
2152
2153	return 0;
2154}
2155
2156static void
2157oce_add_vlan(void *arg, if_t ifp, uint16_t vtag)
2158{
2159	POCE_SOFTC sc = if_getsoftc(ifp);
2160
2161	if (if_getsoftc(ifp) !=  arg)
2162		return;
2163	if ((vtag == 0) || (vtag > 4095))
2164		return;
2165
2166	sc->vlan_tag[vtag] = 1;
2167	sc->vlans_added++;
2168	if (sc->vlans_added <= (sc->max_vlans + 1))
2169		oce_vid_config(sc);
2170}
2171
2172static void
2173oce_del_vlan(void *arg, if_t ifp, uint16_t vtag)
2174{
2175	POCE_SOFTC sc = if_getsoftc(ifp);
2176
2177	if (if_getsoftc(ifp) !=  arg)
2178		return;
2179	if ((vtag == 0) || (vtag > 4095))
2180		return;
2181
2182	sc->vlan_tag[vtag] = 0;
2183	sc->vlans_added--;
2184	oce_vid_config(sc);
2185}
2186
2187/*
2188 * A max of 64 vlans can be configured in BE. If the user configures
2189 * more, place the card in vlan promiscuous mode.
2190 */
2191static int
2192oce_vid_config(POCE_SOFTC sc)
2193{
2194	struct normal_vlan vtags[MAX_VLANFILTER_SIZE];
2195	uint16_t ntags = 0, i;
2196	int status = 0;
2197
2198	if ((sc->vlans_added <= MAX_VLANFILTER_SIZE) &&
2199			(if_getcapenable(sc->ifp) & IFCAP_VLAN_HWFILTER)) {
2200		for (i = 0; i < MAX_VLANS; i++) {
2201			if (sc->vlan_tag[i]) {
2202				vtags[ntags].vtag = i;
2203				ntags++;
2204			}
2205		}
2206		if (ntags)
2207			status = oce_config_vlan(sc, (uint8_t) sc->if_id,
2208						vtags, ntags, 1, 0);
2209	} else
2210		status = oce_config_vlan(sc, (uint8_t) sc->if_id,
2211					 	NULL, 0, 1, 1);
2212	return status;
2213}
2214
2215static void
2216oce_mac_addr_set(POCE_SOFTC sc)
2217{
2218	uint32_t old_pmac_id = sc->pmac_id;
2219	int status = 0;
2220
2221	status = bcmp((if_getlladdr(sc->ifp)), sc->macaddr.mac_addr,
2222			 sc->macaddr.size_of_struct);
2223	if (!status)
2224		return;
2225
2226	status = oce_mbox_macaddr_add(sc, (uint8_t *)(if_getlladdr(sc->ifp)),
2227					sc->if_id, &sc->pmac_id);
2228	if (!status) {
2229		status = oce_mbox_macaddr_del(sc, sc->if_id, old_pmac_id);
2230		bcopy((if_getlladdr(sc->ifp)), sc->macaddr.mac_addr,
2231				 sc->macaddr.size_of_struct);
2232	}
2233	if (status)
2234		device_printf(sc->dev, "Failed update macaddress\n");
2235
2236}
2237
2238static int
2239oce_handle_passthrough(if_t ifp, caddr_t data)
2240{
2241	POCE_SOFTC sc = if_getsoftc(ifp);
2242	struct ifreq *ifr = (struct ifreq *)data;
2243	int rc = ENXIO;
2244	char cookie[32] = {0};
2245	void *priv_data = ifr_data_get_ptr(ifr);
2246	void *ioctl_ptr;
2247	uint32_t req_size;
2248	struct mbx_hdr req;
2249	OCE_DMA_MEM dma_mem;
2250
2251	if (copyin(priv_data, cookie, strlen(IOCTL_COOKIE)))
2252		return EFAULT;
2253
2254	if (memcmp(cookie, IOCTL_COOKIE, strlen(IOCTL_COOKIE)))
2255		return EINVAL;
2256
2257	ioctl_ptr = (char *)priv_data + strlen(IOCTL_COOKIE);
2258	if (copyin(ioctl_ptr, &req, sizeof(struct mbx_hdr)))
2259		return EFAULT;
2260
2261	req_size = le32toh(req.u0.req.request_length);
2262	if (req_size > 65536)
2263		return EINVAL;
2264
2265	req_size += sizeof(struct mbx_hdr);
2266	rc = oce_dma_alloc(sc, req_size, &dma_mem, 0);
2267	if (rc)
2268		return ENOMEM;
2269
2270	if (copyin(ioctl_ptr, OCE_DMAPTR(&dma_mem,char), req_size)) {
2271		rc = EFAULT;
2272		goto dma_free;
2273	}
2274
2275	rc = oce_pass_through_mbox(sc, &dma_mem, req_size);
2276	if (rc) {
2277		rc = EIO;
2278		goto dma_free;
2279	}
2280
2281	if (copyout(OCE_DMAPTR(&dma_mem,char), ioctl_ptr, req_size)) {
2282		rc =  EFAULT;
2283		goto dma_free;
2284	}
2285
2286	/*
2287	   firmware is filling all the attributes for this ioctl except
2288	   the driver version..so fill it
2289	 */
2290	if(req.u0.rsp.opcode == OPCODE_COMMON_GET_CNTL_ATTRIBUTES) {
2291		struct mbx_common_get_cntl_attr *fw_cmd =
2292		    (struct mbx_common_get_cntl_attr *)ioctl_ptr;
2293		_Static_assert(sizeof(COMPONENT_REVISION) <=
2294		     sizeof(fw_cmd->params.rsp.cntl_attr_info.hba_attr.drv_ver_str),
2295		     "driver version string too long");
2296
2297		rc = copyout(COMPONENT_REVISION,
2298		    fw_cmd->params.rsp.cntl_attr_info.hba_attr.drv_ver_str,
2299		    sizeof(COMPONENT_REVISION));
2300	}
2301
2302dma_free:
2303	oce_dma_free(sc, &dma_mem);
2304	return rc;
2305
2306}
2307
2308static void
2309oce_eqd_set_periodic(POCE_SOFTC sc)
2310{
2311	struct oce_set_eqd set_eqd[OCE_MAX_EQ];
2312	struct oce_aic_obj *aic;
2313	struct oce_eq *eqo;
2314	uint64_t now = 0, delta;
2315	int eqd, i, num = 0;
2316	uint32_t tx_reqs = 0, rxpkts = 0, pps;
2317	struct oce_wq *wq;
2318	struct oce_rq *rq;
2319
2320	#define ticks_to_msecs(t)       (1000 * (t) / hz)
2321
2322	for (i = 0 ; i < sc->neqs; i++) {
2323		eqo = sc->eq[i];
2324		aic = &sc->aic_obj[i];
2325		/* When setting the static eq delay from the user space */
2326		if (!aic->enable) {
2327			if (aic->ticks)
2328				aic->ticks = 0;
2329			eqd = aic->et_eqd;
2330			goto modify_eqd;
2331		}
2332
2333		if (i == 0) {
2334			rq = sc->rq[0];
2335			rxpkts = rq->rx_stats.rx_pkts;
2336		} else
2337			rxpkts = 0;
2338		if (i + 1 < sc->nrqs) {
2339			rq = sc->rq[i + 1];
2340			rxpkts += rq->rx_stats.rx_pkts;
2341		}
2342		if (i < sc->nwqs) {
2343			wq = sc->wq[i];
2344			tx_reqs = wq->tx_stats.tx_reqs;
2345		} else
2346			tx_reqs = 0;
2347		now = ticks;
2348
2349		if (!aic->ticks || now < aic->ticks ||
2350		    rxpkts < aic->prev_rxpkts || tx_reqs < aic->prev_txreqs) {
2351			aic->prev_rxpkts = rxpkts;
2352			aic->prev_txreqs = tx_reqs;
2353			aic->ticks = now;
2354			continue;
2355		}
2356
2357		delta = ticks_to_msecs(now - aic->ticks);
2358
2359		pps = (((uint32_t)(rxpkts - aic->prev_rxpkts) * 1000) / delta) +
2360		      (((uint32_t)(tx_reqs - aic->prev_txreqs) * 1000) / delta);
2361		eqd = (pps / 15000) << 2;
2362		if (eqd < 8)
2363			eqd = 0;
2364
2365		/* Make sure that the eq delay is in the known range */
2366		eqd = min(eqd, aic->max_eqd);
2367		eqd = max(eqd, aic->min_eqd);
2368
2369		aic->prev_rxpkts = rxpkts;
2370		aic->prev_txreqs = tx_reqs;
2371		aic->ticks = now;
2372
2373modify_eqd:
2374		if (eqd != aic->cur_eqd) {
2375			set_eqd[num].delay_multiplier = (eqd * 65)/100;
2376			set_eqd[num].eq_id = eqo->eq_id;
2377			aic->cur_eqd = eqd;
2378			num++;
2379		}
2380	}
2381
2382	/* Is there atleast one eq that needs to be modified? */
2383        for(i = 0; i < num; i += 8) {
2384                if((num - i) >=8 )
2385                        oce_mbox_eqd_modify_periodic(sc, &set_eqd[i], 8);
2386                else
2387                        oce_mbox_eqd_modify_periodic(sc, &set_eqd[i], (num - i));
2388        }
2389
2390}
2391
2392static void oce_detect_hw_error(POCE_SOFTC sc)
2393{
2394
2395	uint32_t ue_low = 0, ue_high = 0, ue_low_mask = 0, ue_high_mask = 0;
2396	uint32_t sliport_status = 0, sliport_err1 = 0, sliport_err2 = 0;
2397	uint32_t i;
2398
2399	if (sc->hw_error)
2400		return;
2401
2402	if (IS_XE201(sc)) {
2403		sliport_status = OCE_READ_REG32(sc, db, SLIPORT_STATUS_OFFSET);
2404		if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
2405			sliport_err1 = OCE_READ_REG32(sc, db, SLIPORT_ERROR1_OFFSET);
2406			sliport_err2 = OCE_READ_REG32(sc, db, SLIPORT_ERROR2_OFFSET);
2407		}
2408	} else {
2409		ue_low = OCE_READ_REG32(sc, devcfg, PCICFG_UE_STATUS_LOW);
2410		ue_high = OCE_READ_REG32(sc, devcfg, PCICFG_UE_STATUS_HIGH);
2411		ue_low_mask = OCE_READ_REG32(sc, devcfg, PCICFG_UE_STATUS_LOW_MASK);
2412		ue_high_mask = OCE_READ_REG32(sc, devcfg, PCICFG_UE_STATUS_HI_MASK);
2413
2414		ue_low = (ue_low & ~ue_low_mask);
2415		ue_high = (ue_high & ~ue_high_mask);
2416	}
2417
2418	/* On certain platforms BE hardware can indicate spurious UEs.
2419	 * Allow the h/w to stop working completely in case of a real UE.
2420	 * Hence not setting the hw_error for UE detection.
2421	 */
2422	if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
2423		sc->hw_error = TRUE;
2424		device_printf(sc->dev, "Error detected in the card\n");
2425	}
2426
2427	if (sliport_status & SLIPORT_STATUS_ERR_MASK) {
2428		device_printf(sc->dev,
2429				"ERR: sliport status 0x%x\n", sliport_status);
2430		device_printf(sc->dev,
2431				"ERR: sliport error1 0x%x\n", sliport_err1);
2432		device_printf(sc->dev,
2433				"ERR: sliport error2 0x%x\n", sliport_err2);
2434	}
2435
2436	if (ue_low) {
2437		for (i = 0; ue_low; ue_low >>= 1, i++) {
2438			if (ue_low & 1)
2439				device_printf(sc->dev, "UE: %s bit set\n",
2440							ue_status_low_desc[i]);
2441		}
2442	}
2443
2444	if (ue_high) {
2445		for (i = 0; ue_high; ue_high >>= 1, i++) {
2446			if (ue_high & 1)
2447				device_printf(sc->dev, "UE: %s bit set\n",
2448							ue_status_hi_desc[i]);
2449		}
2450	}
2451
2452}
2453
2454static void
2455oce_local_timer(void *arg)
2456{
2457	POCE_SOFTC sc = arg;
2458	int i = 0;
2459
2460	oce_detect_hw_error(sc);
2461	oce_refresh_nic_stats(sc);
2462	oce_refresh_queue_stats(sc);
2463	oce_mac_addr_set(sc);
2464
2465	/* TX Watch Dog*/
2466	for (i = 0; i < sc->nwqs; i++)
2467		oce_tx_restart(sc, sc->wq[i]);
2468
2469	/* calculate and set the eq delay for optimal interrupt rate */
2470	if (IS_BE(sc) || IS_SH(sc))
2471		oce_eqd_set_periodic(sc);
2472
2473	callout_reset(&sc->timer, hz, oce_local_timer, sc);
2474}
2475
2476static void
2477oce_tx_compl_clean(POCE_SOFTC sc)
2478{
2479	struct oce_wq *wq;
2480	int i = 0, timeo = 0, num_wqes = 0;
2481	int pending_txqs = sc->nwqs;
2482
2483	/* Stop polling for compls when HW has been silent for 10ms or
2484	 * hw_error or no outstanding completions expected
2485	 */
2486	do {
2487		pending_txqs = sc->nwqs;
2488
2489		for_all_wq_queues(sc, wq, i) {
2490			num_wqes = oce_wq_handler(wq);
2491
2492			if(num_wqes)
2493				timeo = 0;
2494
2495			if(!wq->ring->num_used)
2496				pending_txqs--;
2497		}
2498
2499		if (pending_txqs == 0 || ++timeo > 10 || sc->hw_error)
2500			break;
2501
2502		DELAY(1000);
2503	} while (TRUE);
2504
2505	for_all_wq_queues(sc, wq, i) {
2506		while(wq->ring->num_used) {
2507			LOCK(&wq->tx_compl_lock);
2508			oce_process_tx_completion(wq);
2509			UNLOCK(&wq->tx_compl_lock);
2510		}
2511	}
2512
2513}
2514
2515/* NOTE : This should only be called holding
2516 *        DEVICE_LOCK.
2517 */
2518static void
2519oce_if_deactivate(POCE_SOFTC sc)
2520{
2521	int i;
2522	struct oce_rq *rq;
2523	struct oce_wq *wq;
2524	struct oce_eq *eq;
2525
2526	if_setdrvflagbits(sc->ifp, 0, (IFF_DRV_RUNNING | IFF_DRV_OACTIVE));
2527
2528	oce_tx_compl_clean(sc);
2529
2530	/* Stop intrs and finish any bottom halves pending */
2531	oce_hw_intr_disable(sc);
2532
2533	/* Since taskqueue_drain takes a Gaint Lock, We should not acquire
2534	   any other lock. So unlock device lock and require after
2535	   completing taskqueue_drain.
2536	*/
2537	UNLOCK(&sc->dev_lock);
2538	for (i = 0; i < sc->intr_count; i++) {
2539		if (sc->intrs[i].tq != NULL) {
2540			taskqueue_drain(sc->intrs[i].tq, &sc->intrs[i].task);
2541		}
2542	}
2543	LOCK(&sc->dev_lock);
2544
2545	/* Delete RX queue in card with flush param */
2546	oce_stop_rx(sc);
2547
2548	/* Invalidate any pending cq and eq entries*/
2549	for_all_evnt_queues(sc, eq, i)
2550		oce_drain_eq(eq);
2551	for_all_rq_queues(sc, rq, i)
2552		oce_drain_rq_cq(rq);
2553	for_all_wq_queues(sc, wq, i)
2554		oce_drain_wq_cq(wq);
2555
2556	/* But still we need to get MCC aync events.
2557	   So enable intrs and also arm first EQ
2558	*/
2559	oce_hw_intr_enable(sc);
2560	oce_arm_eq(sc, sc->eq[0]->eq_id, 0, TRUE, FALSE);
2561
2562	DELAY(10);
2563}
2564
2565static void
2566oce_if_activate(POCE_SOFTC sc)
2567{
2568	struct oce_eq *eq;
2569	struct oce_rq *rq;
2570	struct oce_wq *wq;
2571	int i, rc = 0;
2572
2573	if_setdrvflagbits(sc->ifp, IFF_DRV_RUNNING , 0);
2574
2575	oce_hw_intr_disable(sc);
2576
2577	oce_start_rx(sc);
2578
2579	for_all_rq_queues(sc, rq, i) {
2580		rc = oce_start_rq(rq);
2581		if (rc)
2582			device_printf(sc->dev, "Unable to start RX\n");
2583	}
2584
2585	for_all_wq_queues(sc, wq, i) {
2586		rc = oce_start_wq(wq);
2587		if (rc)
2588			device_printf(sc->dev, "Unable to start TX\n");
2589	}
2590
2591	for_all_evnt_queues(sc, eq, i)
2592		oce_arm_eq(sc, eq->eq_id, 0, TRUE, FALSE);
2593
2594	oce_hw_intr_enable(sc);
2595
2596}
2597
2598static void
2599process_link_state(POCE_SOFTC sc, struct oce_async_cqe_link_state *acqe)
2600{
2601	/* Update Link status */
2602	if ((acqe->u0.s.link_status & ~ASYNC_EVENT_LOGICAL) ==
2603	     ASYNC_EVENT_LINK_UP) {
2604		sc->link_status = ASYNC_EVENT_LINK_UP;
2605		if_link_state_change(sc->ifp, LINK_STATE_UP);
2606	} else {
2607		sc->link_status = ASYNC_EVENT_LINK_DOWN;
2608		if_link_state_change(sc->ifp, LINK_STATE_DOWN);
2609	}
2610}
2611
2612static void oce_async_grp5_osbmc_process(POCE_SOFTC sc,
2613					 struct oce_async_evt_grp5_os2bmc *evt)
2614{
2615	DW_SWAP(evt, sizeof(struct oce_async_evt_grp5_os2bmc));
2616	if (evt->u.s.mgmt_enable)
2617		sc->flags |= OCE_FLAGS_OS2BMC;
2618	else
2619		return;
2620
2621	sc->bmc_filt_mask = evt->u.s.arp_filter;
2622	sc->bmc_filt_mask |= (evt->u.s.dhcp_client_filt << 1);
2623	sc->bmc_filt_mask |= (evt->u.s.dhcp_server_filt << 2);
2624	sc->bmc_filt_mask |= (evt->u.s.net_bios_filt << 3);
2625	sc->bmc_filt_mask |= (evt->u.s.bcast_filt << 4);
2626	sc->bmc_filt_mask |= (evt->u.s.ipv6_nbr_filt << 5);
2627	sc->bmc_filt_mask |= (evt->u.s.ipv6_ra_filt << 6);
2628	sc->bmc_filt_mask |= (evt->u.s.ipv6_ras_filt << 7);
2629	sc->bmc_filt_mask |= (evt->u.s.mcast_filt << 8);
2630}
2631
2632static void oce_process_grp5_events(POCE_SOFTC sc, struct oce_mq_cqe *cqe)
2633{
2634	struct oce_async_event_grp5_pvid_state *gcqe;
2635	struct oce_async_evt_grp5_os2bmc *bmccqe;
2636
2637	switch (cqe->u0.s.async_type) {
2638	case ASYNC_EVENT_PVID_STATE:
2639		/* GRP5 PVID */
2640		gcqe = (struct oce_async_event_grp5_pvid_state *)cqe;
2641		if (gcqe->enabled)
2642			sc->pvid = gcqe->tag & VLAN_VID_MASK;
2643		else
2644			sc->pvid = 0;
2645		break;
2646	case ASYNC_EVENT_OS2BMC:
2647		bmccqe = (struct oce_async_evt_grp5_os2bmc *)cqe;
2648		oce_async_grp5_osbmc_process(sc, bmccqe);
2649		break;
2650	default:
2651		break;
2652	}
2653}
2654
2655/* Handle the Completion Queue for the Mailbox/Async notifications */
2656uint16_t
2657oce_mq_handler(void *arg)
2658{
2659	struct oce_mq *mq = (struct oce_mq *)arg;
2660	POCE_SOFTC sc = mq->parent;
2661	struct oce_cq *cq = mq->cq;
2662	int num_cqes = 0, evt_type = 0, optype = 0;
2663	struct oce_mq_cqe *cqe;
2664	struct oce_async_cqe_link_state *acqe;
2665	struct oce_async_event_qnq *dbgcqe;
2666
2667	bus_dmamap_sync(cq->ring->dma.tag,
2668			cq->ring->dma.map, BUS_DMASYNC_POSTWRITE);
2669	cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_mq_cqe);
2670
2671	while (cqe->u0.dw[3]) {
2672		DW_SWAP((uint32_t *) cqe, sizeof(oce_mq_cqe));
2673		if (cqe->u0.s.async_event) {
2674			evt_type = cqe->u0.s.event_type;
2675			optype = cqe->u0.s.async_type;
2676			if (evt_type  == ASYNC_EVENT_CODE_LINK_STATE) {
2677				/* Link status evt */
2678				acqe = (struct oce_async_cqe_link_state *)cqe;
2679				process_link_state(sc, acqe);
2680			} else if (evt_type == ASYNC_EVENT_GRP5) {
2681				oce_process_grp5_events(sc, cqe);
2682			} else if (evt_type == ASYNC_EVENT_CODE_DEBUG &&
2683					optype == ASYNC_EVENT_DEBUG_QNQ) {
2684				dbgcqe =  (struct oce_async_event_qnq *)cqe;
2685				if(dbgcqe->valid)
2686					sc->qnqid = dbgcqe->vlan_tag;
2687				sc->qnq_debug_event = TRUE;
2688			}
2689		}
2690		cqe->u0.dw[3] = 0;
2691		RING_GET(cq->ring, 1);
2692		bus_dmamap_sync(cq->ring->dma.tag,
2693				cq->ring->dma.map, BUS_DMASYNC_POSTWRITE);
2694		cqe = RING_GET_CONSUMER_ITEM_VA(cq->ring, struct oce_mq_cqe);
2695		num_cqes++;
2696	}
2697
2698	if (num_cqes)
2699		oce_arm_cq(sc, cq->cq_id, num_cqes, FALSE);
2700
2701	return 0;
2702}
2703
2704static void
2705setup_max_queues_want(POCE_SOFTC sc)
2706{
2707	/* Check if it is FLEX machine. Is so dont use RSS */
2708	if ((sc->function_mode & FNM_FLEX10_MODE) ||
2709	    (sc->function_mode & FNM_UMC_MODE)    ||
2710	    (sc->function_mode & FNM_VNIC_MODE)	  ||
2711	    (!is_rss_enabled(sc))		  ||
2712	    IS_BE2(sc)) {
2713		sc->nrqs = 1;
2714		sc->nwqs = 1;
2715	} else {
2716		sc->nrqs = MIN(OCE_NCPUS, sc->nrssqs) + 1;
2717		sc->nwqs = MIN(OCE_NCPUS, sc->nrssqs);
2718	}
2719
2720	if (IS_BE2(sc) && is_rss_enabled(sc))
2721		sc->nrqs = MIN(OCE_NCPUS, sc->nrssqs) + 1;
2722}
2723
2724static void
2725update_queues_got(POCE_SOFTC sc)
2726{
2727	if (is_rss_enabled(sc)) {
2728		sc->nrqs = sc->intr_count + 1;
2729		sc->nwqs = sc->intr_count;
2730	} else {
2731		sc->nrqs = 1;
2732		sc->nwqs = 1;
2733	}
2734
2735	if (IS_BE2(sc))
2736		sc->nwqs = 1;
2737}
2738
2739static int
2740oce_check_ipv6_ext_hdr(struct mbuf *m)
2741{
2742	struct ether_header *eh = mtod(m, struct ether_header *);
2743	caddr_t m_datatemp = m->m_data;
2744
2745	if (eh->ether_type == htons(ETHERTYPE_IPV6)) {
2746		m->m_data += sizeof(struct ether_header);
2747		struct ip6_hdr *ip6 = mtod(m, struct ip6_hdr *);
2748
2749		if((ip6->ip6_nxt != IPPROTO_TCP) && \
2750				(ip6->ip6_nxt != IPPROTO_UDP)){
2751			struct ip6_ext *ip6e = NULL;
2752			m->m_data += sizeof(struct ip6_hdr);
2753
2754			ip6e = (struct ip6_ext *) mtod(m, struct ip6_ext *);
2755			if(ip6e->ip6e_len == 0xff) {
2756				m->m_data = m_datatemp;
2757				return TRUE;
2758			}
2759		}
2760		m->m_data = m_datatemp;
2761	}
2762	return FALSE;
2763}
2764
2765static int
2766is_be3_a1(POCE_SOFTC sc)
2767{
2768	if((sc->flags & OCE_FLAGS_BE3)  && ((sc->asic_revision & 0xFF) < 2)) {
2769		return TRUE;
2770	}
2771	return FALSE;
2772}
2773
2774static struct mbuf *
2775oce_insert_vlan_tag(POCE_SOFTC sc, struct mbuf *m, boolean_t *complete)
2776{
2777	uint16_t vlan_tag = 0;
2778
2779	if(!M_WRITABLE(m))
2780		return NULL;
2781
2782	/* Embed vlan tag in the packet if it is not part of it */
2783	if(m->m_flags & M_VLANTAG) {
2784		vlan_tag = EVL_VLANOFTAG(m->m_pkthdr.ether_vtag);
2785		m->m_flags &= ~M_VLANTAG;
2786	}
2787
2788	/* if UMC, ignore vlan tag insertion and instead insert pvid */
2789	if(sc->pvid) {
2790		if(!vlan_tag)
2791			vlan_tag = sc->pvid;
2792		if (complete)
2793			*complete = FALSE;
2794	}
2795
2796	if(vlan_tag) {
2797		m = ether_vlanencap(m, vlan_tag);
2798	}
2799
2800	if(sc->qnqid) {
2801		m = ether_vlanencap(m, sc->qnqid);
2802
2803		if (complete)
2804			*complete = FALSE;
2805	}
2806	return m;
2807}
2808
2809static int
2810oce_tx_asic_stall_verify(POCE_SOFTC sc, struct mbuf *m)
2811{
2812	if(is_be3_a1(sc) && IS_QNQ_OR_UMC(sc) && \
2813			oce_check_ipv6_ext_hdr(m)) {
2814		return TRUE;
2815	}
2816	return FALSE;
2817}
2818
2819static void
2820oce_get_config(POCE_SOFTC sc)
2821{
2822	int rc = 0;
2823	uint32_t max_rss = 0;
2824
2825	if ((IS_BE(sc) || IS_SH(sc)) && (!sc->be3_native))
2826		max_rss = OCE_LEGACY_MODE_RSS;
2827	else
2828		max_rss = OCE_MAX_RSS;
2829
2830	if (!IS_BE(sc)) {
2831		rc = oce_get_profile_config(sc, max_rss);
2832		if (rc) {
2833			sc->nwqs = OCE_MAX_WQ;
2834			sc->nrssqs = max_rss;
2835			sc->nrqs = sc->nrssqs + 1;
2836		}
2837	}
2838	else { /* For BE3 don't rely on fw for determining the resources */
2839		sc->nrssqs = max_rss;
2840		sc->nrqs = sc->nrssqs + 1;
2841		sc->nwqs = OCE_MAX_WQ;
2842		sc->max_vlans = MAX_VLANFILTER_SIZE;
2843	}
2844}
2845
2846static void
2847oce_rdma_close(void)
2848{
2849  if (oce_rdma_if != NULL) {
2850    oce_rdma_if = NULL;
2851  }
2852}
2853
2854static void
2855oce_get_mac_addr(POCE_SOFTC sc, uint8_t *macaddr)
2856{
2857  memcpy(macaddr, sc->macaddr.mac_addr, 6);
2858}
2859
2860int
2861oce_register_rdma(POCE_RDMA_INFO rdma_info, POCE_RDMA_IF rdma_if)
2862{
2863  POCE_SOFTC sc;
2864  struct oce_dev_info di;
2865  int i;
2866
2867  if ((rdma_info == NULL) || (rdma_if == NULL)) {
2868    return -EINVAL;
2869  }
2870
2871  if ((rdma_info->size != OCE_RDMA_INFO_SIZE) ||
2872      (rdma_if->size != OCE_RDMA_IF_SIZE)) {
2873    return -ENXIO;
2874  }
2875
2876  rdma_info->close = oce_rdma_close;
2877  rdma_info->mbox_post = oce_mbox_post;
2878  rdma_info->common_req_hdr_init = mbx_common_req_hdr_init;
2879  rdma_info->get_mac_addr = oce_get_mac_addr;
2880
2881  oce_rdma_if = rdma_if;
2882
2883  sc = softc_head;
2884  while (sc != NULL) {
2885    if (oce_rdma_if->announce != NULL) {
2886      memset(&di, 0, sizeof(di));
2887      di.dev = sc->dev;
2888      di.softc = sc;
2889      di.ifp = sc->ifp;
2890      di.db_bhandle = sc->db_bhandle;
2891      di.db_btag = sc->db_btag;
2892      di.db_page_size = 4096;
2893      if (sc->flags & OCE_FLAGS_USING_MSIX) {
2894        di.intr_mode = OCE_INTERRUPT_MODE_MSIX;
2895      } else if (sc->flags & OCE_FLAGS_USING_MSI) {
2896        di.intr_mode = OCE_INTERRUPT_MODE_MSI;
2897      } else {
2898        di.intr_mode = OCE_INTERRUPT_MODE_INTX;
2899      }
2900      di.dev_family = OCE_GEN2_FAMILY; // fixme: must detect skyhawk
2901      if (di.intr_mode != OCE_INTERRUPT_MODE_INTX) {
2902        di.msix.num_vectors = sc->intr_count + sc->roce_intr_count;
2903        di.msix.start_vector = sc->intr_count;
2904        for (i=0; i<di.msix.num_vectors; i++) {
2905          di.msix.vector_list[i] = sc->intrs[i].vector;
2906        }
2907      } else {
2908      }
2909      memcpy(di.mac_addr, sc->macaddr.mac_addr, 6);
2910      di.vendor_id = pci_get_vendor(sc->dev);
2911      di.dev_id = pci_get_device(sc->dev);
2912
2913      if (sc->rdma_flags & OCE_RDMA_FLAG_SUPPORTED) {
2914          di.flags  |= OCE_RDMA_INFO_RDMA_SUPPORTED;
2915      }
2916
2917      rdma_if->announce(&di);
2918      sc = sc->next;
2919    }
2920  }
2921
2922  return 0;
2923}
2924
2925static void
2926oce_read_env_variables( POCE_SOFTC sc )
2927{
2928	char *value = NULL;
2929	int rc = 0;
2930
2931        /* read if user wants to enable hwlro or swlro */
2932        //value = getenv("oce_enable_hwlro");
2933        if(value && IS_SH(sc)) {
2934                sc->enable_hwlro = strtol(value, NULL, 10);
2935                if(sc->enable_hwlro) {
2936                        rc = oce_mbox_nic_query_lro_capabilities(sc, NULL, NULL);
2937                        if(rc) {
2938                                device_printf(sc->dev, "no hardware lro support\n");
2939                		device_printf(sc->dev, "software lro enabled\n");
2940                                sc->enable_hwlro = 0;
2941                        }else {
2942                                device_printf(sc->dev, "hardware lro enabled\n");
2943				oce_max_rsp_handled = 32;
2944                        }
2945                }else {
2946                        device_printf(sc->dev, "software lro enabled\n");
2947                }
2948        }else {
2949                sc->enable_hwlro = 0;
2950        }
2951
2952        /* read mbuf size */
2953        //value = getenv("oce_rq_buf_size");
2954        if(value && IS_SH(sc)) {
2955                oce_rq_buf_size = strtol(value, NULL, 10);
2956                switch(oce_rq_buf_size) {
2957                case 2048:
2958                case 4096:
2959                case 9216:
2960                case 16384:
2961                        break;
2962
2963                default:
2964                        device_printf(sc->dev, " Supported oce_rq_buf_size values are 2K, 4K, 9K, 16K \n");
2965                        oce_rq_buf_size = 2048;
2966                }
2967        }
2968
2969	return;
2970}
2971