1/*-
2 * Copyright 2003-2011 Netlogic Microsystems (Netlogic). All rights
3 * reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are
7 * met:
8 *
9 * 1. Redistributions of source code must retain the above copyright
10 *    notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 *    notice, this list of conditions and the following disclaimer in
13 *    the documentation and/or other materials provided with the
14 *    distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY Netlogic Microsystems ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL NETLOGIC OR CONTRIBUTORS BE
20 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
26 * THE POSSIBILITY OF SUCH DAMAGE.
27 *
28 * NETLOGIC_BSD */
29
30#include <sys/cdefs.h>
31__FBSDID("$FreeBSD$");
32#include <sys/types.h>
33#include <sys/systm.h>
34#include <sys/param.h>
35#include <sys/lock.h>
36#include <sys/mutex.h>
37#include <sys/proc.h>
38#include <sys/limits.h>
39#include <sys/bus.h>
40
41#include <sys/ktr.h>
42#include <sys/kernel.h>
43#include <sys/kthread.h>
44#include <sys/proc.h>
45#include <sys/resourcevar.h>
46#include <sys/sched.h>
47#include <sys/unistd.h>
48#include <sys/sysctl.h>
49#include <sys/malloc.h>
50
51#include <machine/reg.h>
52#include <machine/cpu.h>
53#include <machine/hwfunc.h>
54#include <machine/mips_opcode.h>
55#include <machine/param.h>
56#include <machine/intr_machdep.h>
57
58#include <mips/nlm/hal/mips-extns.h>
59#include <mips/nlm/hal/haldefs.h>
60#include <mips/nlm/hal/iomap.h>
61#include <mips/nlm/hal/cop2.h>
62#include <mips/nlm/hal/fmn.h>
63#include <mips/nlm/hal/pic.h>
64
65#include <mips/nlm/msgring.h>
66#include <mips/nlm/interrupt.h>
67#include <mips/nlm/xlp.h>
68#include <mips/nlm/board.h>
69
70#define MSGRNG_NSTATIONS 1024
71/*
72 * Keep track of our message ring handler threads, each core has a
73 * different message station. Ideally we will need to start a few
74 * message handling threads every core, and wake them up depending on
75 * load
76 */
77struct msgring_thread {
78	struct thread	*thread; /* msgring handler threads */
79	int	needed;		/* thread needs to wake up */
80};
81static struct msgring_thread msgring_threads[XLP_MAX_CORES * XLP_MAX_THREADS];
82static struct proc *msgring_proc;	/* all threads are under a proc */
83
84/*
85 * The device drivers can register a handler for the messages sent
86 * from a station (corresponding to the device).
87 */
88struct tx_stn_handler {
89	msgring_handler action;
90	void *arg;
91};
92static struct tx_stn_handler msgmap[MSGRNG_NSTATIONS];
93static struct mtx	msgmap_lock;
94uint64_t xlp_cms_base;
95uint32_t xlp_msg_thread_mask;
96static int xlp_msg_threads_per_core = 3; /* Make tunable */
97
98static void create_msgring_thread(int hwtid);
99static int msgring_process_fast_intr(void *arg);
100/*
101 * Boot time init, called only once
102 */
103void
104xlp_msgring_config(void)
105{
106	unsigned int thrmask, mask;
107	int i;
108
109	/* TODO: Add other nodes */
110	xlp_cms_base = nlm_get_cms_regbase(0);
111
112	mtx_init(&msgmap_lock, "msgring", NULL, MTX_SPIN);
113	if (xlp_threads_per_core < xlp_msg_threads_per_core)
114		xlp_msg_threads_per_core = xlp_threads_per_core;
115	thrmask = ((1 << xlp_msg_threads_per_core) - 1);
116	/*thrmask <<= xlp_threads_per_core - xlp_msg_threads_per_core;*/
117	mask = 0;
118	for (i = 0; i < XLP_MAX_CORES; i++) {
119		mask <<= XLP_MAX_THREADS;
120		mask |= thrmask;
121	}
122	xlp_msg_thread_mask = xlp_hw_thread_mask & mask;
123	printf("Initializing CMS...@%jx, Message handler thread mask %#jx\n",
124	    (uintmax_t)xlp_cms_base, (uintmax_t)xlp_msg_thread_mask);
125}
126
127/*
128 * Initialize the messaging subsystem.
129 *
130 * Message Stations are shared among all threads in a cpu core, this
131 * has to be called once from every core which is online.
132 */
133void
134xlp_msgring_iodi_config(void)
135{
136	void *cookie;
137
138	xlp_msgring_config();
139/*	nlm_cms_default_setup(0,0,0,0); */
140	nlm_cms_credit_setup(50);
141	create_msgring_thread(0);
142	cpu_establish_hardintr("msgring", msgring_process_fast_intr, NULL,
143	    NULL, IRQ_MSGRING, INTR_TYPE_NET, &cookie);
144}
145
146void
147nlm_cms_credit_setup(int credit)
148{
149	int src, qid, i;
150
151#if 0
152	/* there are a total of 18 src stations on XLP. */
153	printf("Setting up CMS credits!\n");
154	for (src=0; src<18; src++) {
155		for(qid=0; qid<1024; qid++) {
156			nlm_cms_setup_credits(xlp_cms_base, qid, src, credit);
157		}
158	}
159#endif
160	printf("Setting up CMS credits!\n");
161	/* CPU Credits */
162	for (i = 1; i < 8; i++) {
163		src = (i << 4);
164		for (qid = 0; qid < 1024; qid++)
165			nlm_cms_setup_credits(xlp_cms_base, qid, src, credit);
166	}
167	/* PCIE Credits */
168	for(i = 0; i < 4; i++) {
169		src = (256 + (i * 2));
170		for(qid = 0; qid < 1024; qid++)
171			nlm_cms_setup_credits(xlp_cms_base, qid, src, credit);
172	}
173	/* DTE Credits */
174	src = 264;
175	for (qid = 0; qid < 1024; qid++)
176		nlm_cms_setup_credits(xlp_cms_base, qid, src, credit);
177	/* RSA Credits */
178	src = 272;
179	for (qid = 0; qid < 1024; qid++)
180		nlm_cms_setup_credits(xlp_cms_base, qid, src, credit);
181
182	/* Crypto Credits */
183	src = 281;
184	for (qid = 0; qid < 1024; qid++)
185		nlm_cms_setup_credits(xlp_cms_base, qid, src, credit);
186
187	/* CMP Credits */
188	src = 298;
189	for (qid = 0; qid < 1024; qid++)
190		nlm_cms_setup_credits(xlp_cms_base, qid, src, credit);
191
192	/* POE Credits */
193	src = 384;
194	for(qid = 0; qid < 1024; qid++)
195		nlm_cms_setup_credits(xlp_cms_base, qid, src, credit);
196
197	/* NAE Credits */
198	src = 476;
199	for(qid = 0; qid < 1024; qid++)
200		nlm_cms_setup_credits(xlp_cms_base, qid, src, credit);
201}
202
203void
204xlp_msgring_cpu_init(uint32_t cpuid)
205{
206	int queue,i;
207
208	queue = CMS_CPU_PUSHQ(0, ((cpuid >> 2) & 0x7), (cpuid & 0x3), 0);
209	/* temp allocate 4 segments to each output queue */
210	nlm_cms_alloc_onchip_q(xlp_cms_base, queue, 4);
211	/* Enable high watermark and non empty interrupt */
212	nlm_cms_per_queue_level_intr(xlp_cms_base, queue,2,0);
213	for(i=0;i<8;i++) {
214		/* temp distribute the credits to all CPU stations */
215		nlm_cms_setup_credits(xlp_cms_base, queue, i * 16, 8);
216	}
217}
218
219void
220xlp_cpu_msgring_handler(int bucket, int size, int code, int stid,
221		    struct nlm_fmn_msg *msg, void *data)
222{
223	int i;
224
225	printf("vc:%d srcid:%d size:%d\n",bucket,stid,size);
226	for(i=0;i<size;i++) {
227		printf("msg->msg[%d]:0x%jx ", i, (uintmax_t)msg->msg[i]);
228	}
229	printf("\n");
230}
231
232/*
233 * Drain out max_messages for the buckets set in the bucket mask.
234 * Use max_msgs = 0 to drain out all messages.
235 */
236int
237xlp_handle_msg_vc(int vc, int max_msgs)
238{
239	struct nlm_fmn_msg msg;
240	int i, srcid = 0, size = 0, code = 0;
241	struct tx_stn_handler *he;
242	uint32_t mflags, status;
243
244	for (i = 0; i < max_msgs; i++) {
245		mflags = nlm_save_flags_cop2();
246		status = nlm_fmn_msgrcv(vc, &srcid, &size, &code, &msg);
247		nlm_restore_flags(mflags);
248		if (status != 0) /* If there is no msg or error */
249			break;
250		if (srcid < 0 && srcid >= 1024) {
251			printf("[%s]: bad src id %d\n", __func__, srcid);
252			continue;
253		}
254		he = &msgmap[srcid];
255		if(he->action != NULL)
256			(he->action)(vc, size, code, srcid, &msg, he->arg);
257#if 0 /* debug */
258		else
259			printf("[%s]: No Handler for message from stn_id=%d,"
260			    " vc=%d, size=%d, msg0=%jx, dropping message\n",
261			    __func__, srcid, vc, size, (uintmax_t)msg.msg[0]);
262#endif
263	}
264
265	return (i);
266}
267
268static int
269msgring_process_fast_intr(void *arg)
270{
271	struct msgring_thread *mthd;
272	struct thread *td;
273	int	cpu;
274
275	cpu = nlm_cpuid();
276	mthd = &msgring_threads[cpu];
277	td = mthd->thread;
278
279	/* clear pending interrupts */
280	nlm_write_c0_eirr(1ULL << IRQ_MSGRING);
281
282	/* wake up the target thread */
283	mthd->needed = 1;
284	thread_lock(td);
285	if (TD_AWAITING_INTR(td)) {
286		TD_CLR_IWAIT(td);
287		sched_add(td, SRQ_INTR);
288	}
289
290	thread_unlock(td);
291	return (FILTER_HANDLED);
292}
293
294u_int fmn_msgcount[32][4];
295u_int fmn_loops[32];
296
297static void
298msgring_process(void * arg)
299{
300	volatile struct msgring_thread *mthd;
301	struct thread *td;
302	uint32_t mflags;
303	int hwtid, vc, handled, nmsgs;
304
305	hwtid = (intptr_t)arg;
306	mthd = &msgring_threads[hwtid];
307	td = mthd->thread;
308	KASSERT(curthread == td,
309	    ("%s:msg_ithread and proc linkage out of sync", __func__));
310
311	/* First bind this thread to the right CPU */
312	thread_lock(td);
313	sched_bind(td, xlp_hwtid_to_cpuid[hwtid]);
314	thread_unlock(td);
315
316	if (hwtid != nlm_cpuid())
317		printf("Misscheduled hwtid %d != cpuid %d\n", hwtid, nlm_cpuid());
318	mflags = nlm_save_flags_cop2();
319	nlm_fmn_cpu_init(IRQ_MSGRING, 0, 0, 0, 0, 0);
320	nlm_restore_flags(mflags);
321
322	/* start processing messages */
323	for( ; ; ) {
324		/*atomic_store_rel_int(&mthd->needed, 0);*/
325
326	        /* enable cop2 access */
327		do {
328			handled = 0;
329			for (vc = 0; vc < 4; vc++) {
330				nmsgs = xlp_handle_msg_vc(vc, 1);
331				fmn_msgcount[hwtid][vc] += nmsgs;
332				handled += nmsgs;
333			}
334		} while (handled);
335
336		/* sleep */
337#if 0
338		thread_lock(td);
339		if (mthd->needed) {
340			thread_unlock(td);
341			continue;
342		}
343		sched_class(td, PRI_ITHD);
344		TD_SET_IWAIT(td);
345		mi_switch(SW_VOL, NULL);
346		thread_unlock(td);
347#else
348		pause("wmsg", 1);
349#endif
350		fmn_loops[hwtid]++;
351	}
352}
353
354static void
355create_msgring_thread(int hwtid)
356{
357	struct msgring_thread *mthd;
358	struct thread *td;
359	int	error;
360
361	mthd = &msgring_threads[hwtid];
362	error = kproc_kthread_add(msgring_process, (void *)(uintptr_t)hwtid,
363	    &msgring_proc, &td, RFSTOPPED, 2, "msgrngproc",
364	    "msgthr%d", hwtid);
365	if (error)
366		panic("kproc_kthread_add() failed with %d", error);
367	mthd->thread = td;
368
369	thread_lock(td);
370	sched_class(td, PRI_ITHD);
371	sched_add(td, SRQ_INTR);
372	thread_unlock(td);
373	CTR2(KTR_INTR, "%s: created %s", __func__, td->td_name);
374}
375
376int
377register_msgring_handler(int startb, int endb, msgring_handler action,
378    void *arg)
379{
380	int	i;
381
382	printf("Register handler %d-%d %p(%p)\n", startb, endb, action, arg);
383	KASSERT(startb >= 0 && startb <= endb && endb < MSGRNG_NSTATIONS,
384	    ("Invalid value for for bucket range %d,%d", startb, endb));
385
386	mtx_lock_spin(&msgmap_lock);
387	for (i = startb; i <= endb; i++) {
388		KASSERT(msgmap[i].action == NULL,
389		   ("Bucket %d already used [action %p]", i, msgmap[i].action));
390		msgmap[i].action = action;
391		msgmap[i].arg = arg;
392	}
393	mtx_unlock_spin(&msgmap_lock);
394	return (0);
395}
396
397/*
398 * Start message ring processing threads on other CPUs, after SMP start
399 */
400static void
401start_msgring_threads(void *arg)
402{
403	int	hwt;
404
405	for (hwt = 1; hwt < XLP_MAX_CORES * XLP_MAX_THREADS; hwt++) {
406		if ((xlp_msg_thread_mask & (1 << hwt)) == 0)
407			continue;
408		create_msgring_thread(hwt);
409	}
410}
411
412SYSINIT(start_msgring_threads, SI_SUB_SMP, SI_ORDER_MIDDLE,
413    start_msgring_threads, NULL);
414
415/*
416 * DEBUG support, XXX: static buffer, not locked
417 */
418static int
419sys_print_debug(SYSCTL_HANDLER_ARGS)
420{
421	int error, nb, i, fs;
422	static char xprintb[4096], *buf;
423
424	buf = xprintb;
425	fs = sizeof(xprintb);
426	nb = snprintf(buf, fs,
427	    "\nID     vc0       vc1       vc2     vc3     loops\n");
428	buf += nb;
429	fs -= nb;
430	for (i = 0; i < 32; i++) {
431		if ((xlp_hw_thread_mask & (1 << i)) == 0)
432			continue;
433		nb = snprintf(buf, fs,
434		    "%2d: %8d %8d %8d %8d %8d\n", i,
435		    fmn_msgcount[i][0], fmn_msgcount[i][1],
436		    fmn_msgcount[i][2], fmn_msgcount[i][3],
437		    fmn_loops[i]);
438		buf += nb;
439		fs -= nb;
440	}
441	error = SYSCTL_OUT(req, xprintb, buf - xprintb);
442	return (error);
443}
444
445SYSCTL_PROC(_debug, OID_AUTO, msgring, CTLTYPE_STRING | CTLFLAG_RD, 0, 0,
446    sys_print_debug, "A", "msgring debug info");
447