• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt-6.x.4708/linux/linux-2.6.36/arch/sparc/kernel/
1/* ldc.c: Logical Domain Channel link-layer protocol driver.
2 *
3 * Copyright (C) 2007, 2008 David S. Miller <davem@davemloft.net>
4 */
5
6#include <linux/kernel.h>
7#include <linux/module.h>
8#include <linux/slab.h>
9#include <linux/spinlock.h>
10#include <linux/delay.h>
11#include <linux/errno.h>
12#include <linux/string.h>
13#include <linux/scatterlist.h>
14#include <linux/interrupt.h>
15#include <linux/list.h>
16#include <linux/init.h>
17#include <linux/bitmap.h>
18
19#include <asm/hypervisor.h>
20#include <asm/iommu.h>
21#include <asm/page.h>
22#include <asm/ldc.h>
23#include <asm/mdesc.h>
24
25#define DRV_MODULE_NAME		"ldc"
26#define PFX DRV_MODULE_NAME	": "
27#define DRV_MODULE_VERSION	"1.1"
28#define DRV_MODULE_RELDATE	"July 22, 2008"
29
30static char version[] __devinitdata =
31	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
32#define LDC_PACKET_SIZE		64
33
34/* Packet header layout for unreliable and reliable mode frames.
35 * When in RAW mode, packets are simply straight 64-byte payloads
36 * with no headers.
37 */
38struct ldc_packet {
39	u8			type;
40#define LDC_CTRL		0x01
41#define LDC_DATA		0x02
42#define LDC_ERR			0x10
43
44	u8			stype;
45#define LDC_INFO		0x01
46#define LDC_ACK			0x02
47#define LDC_NACK		0x04
48
49	u8			ctrl;
50#define LDC_VERS		0x01 /* Link Version		*/
51#define LDC_RTS			0x02 /* Request To Send		*/
52#define LDC_RTR			0x03 /* Ready To Receive	*/
53#define LDC_RDX			0x04 /* Ready for Data eXchange	*/
54#define LDC_CTRL_MSK		0x0f
55
56	u8			env;
57#define LDC_LEN			0x3f
58#define LDC_FRAG_MASK		0xc0
59#define LDC_START		0x40
60#define LDC_STOP		0x80
61
62	u32			seqid;
63
64	union {
65		u8		u_data[LDC_PACKET_SIZE - 8];
66		struct {
67			u32	pad;
68			u32	ackid;
69			u8	r_data[LDC_PACKET_SIZE - 8 - 8];
70		} r;
71	} u;
72};
73
74struct ldc_version {
75	u16 major;
76	u16 minor;
77};
78
79/* Ordered from largest major to lowest.  */
80static struct ldc_version ver_arr[] = {
81	{ .major = 1, .minor = 0 },
82};
83
84#define LDC_DEFAULT_MTU			(4 * LDC_PACKET_SIZE)
85#define LDC_DEFAULT_NUM_ENTRIES		(PAGE_SIZE / LDC_PACKET_SIZE)
86
87struct ldc_channel;
88
89struct ldc_mode_ops {
90	int (*write)(struct ldc_channel *, const void *, unsigned int);
91	int (*read)(struct ldc_channel *, void *, unsigned int);
92};
93
94static const struct ldc_mode_ops raw_ops;
95static const struct ldc_mode_ops nonraw_ops;
96static const struct ldc_mode_ops stream_ops;
97
98int ldom_domaining_enabled;
99
100struct ldc_iommu {
101	/* Protects arena alloc/free.  */
102	spinlock_t			lock;
103	struct iommu_arena		arena;
104	struct ldc_mtable_entry		*page_table;
105};
106
107struct ldc_channel {
108	/* Protects all operations that depend upon channel state.  */
109	spinlock_t			lock;
110
111	unsigned long			id;
112
113	u8				*mssbuf;
114	u32				mssbuf_len;
115	u32				mssbuf_off;
116
117	struct ldc_packet		*tx_base;
118	unsigned long			tx_head;
119	unsigned long			tx_tail;
120	unsigned long			tx_num_entries;
121	unsigned long			tx_ra;
122
123	unsigned long			tx_acked;
124
125	struct ldc_packet		*rx_base;
126	unsigned long			rx_head;
127	unsigned long			rx_tail;
128	unsigned long			rx_num_entries;
129	unsigned long			rx_ra;
130
131	u32				rcv_nxt;
132	u32				snd_nxt;
133
134	unsigned long			chan_state;
135
136	struct ldc_channel_config	cfg;
137	void				*event_arg;
138
139	const struct ldc_mode_ops	*mops;
140
141	struct ldc_iommu		iommu;
142
143	struct ldc_version		ver;
144
145	u8				hs_state;
146#define LDC_HS_CLOSED			0x00
147#define LDC_HS_OPEN			0x01
148#define LDC_HS_GOTVERS			0x02
149#define LDC_HS_SENTRTR			0x03
150#define LDC_HS_GOTRTR			0x04
151#define LDC_HS_COMPLETE			0x10
152
153	u8				flags;
154#define LDC_FLAG_ALLOCED_QUEUES		0x01
155#define LDC_FLAG_REGISTERED_QUEUES	0x02
156#define LDC_FLAG_REGISTERED_IRQS	0x04
157#define LDC_FLAG_RESET			0x10
158
159	u8				mss;
160	u8				state;
161
162#define LDC_IRQ_NAME_MAX		32
163	char				rx_irq_name[LDC_IRQ_NAME_MAX];
164	char				tx_irq_name[LDC_IRQ_NAME_MAX];
165
166	struct hlist_head		mh_list;
167
168	struct hlist_node		list;
169};
170
171#define ldcdbg(TYPE, f, a...) \
172do {	if (lp->cfg.debug & LDC_DEBUG_##TYPE) \
173		printk(KERN_INFO PFX "ID[%lu] " f, lp->id, ## a); \
174} while (0)
175
176static const char *state_to_str(u8 state)
177{
178	switch (state) {
179	case LDC_STATE_INVALID:
180		return "INVALID";
181	case LDC_STATE_INIT:
182		return "INIT";
183	case LDC_STATE_BOUND:
184		return "BOUND";
185	case LDC_STATE_READY:
186		return "READY";
187	case LDC_STATE_CONNECTED:
188		return "CONNECTED";
189	default:
190		return "<UNKNOWN>";
191	}
192}
193
194static void ldc_set_state(struct ldc_channel *lp, u8 state)
195{
196	ldcdbg(STATE, "STATE (%s) --> (%s)\n",
197	       state_to_str(lp->state),
198	       state_to_str(state));
199
200	lp->state = state;
201}
202
203static unsigned long __advance(unsigned long off, unsigned long num_entries)
204{
205	off += LDC_PACKET_SIZE;
206	if (off == (num_entries * LDC_PACKET_SIZE))
207		off = 0;
208
209	return off;
210}
211
212static unsigned long rx_advance(struct ldc_channel *lp, unsigned long off)
213{
214	return __advance(off, lp->rx_num_entries);
215}
216
217static unsigned long tx_advance(struct ldc_channel *lp, unsigned long off)
218{
219	return __advance(off, lp->tx_num_entries);
220}
221
222static struct ldc_packet *handshake_get_tx_packet(struct ldc_channel *lp,
223						  unsigned long *new_tail)
224{
225	struct ldc_packet *p;
226	unsigned long t;
227
228	t = tx_advance(lp, lp->tx_tail);
229	if (t == lp->tx_head)
230		return NULL;
231
232	*new_tail = t;
233
234	p = lp->tx_base;
235	return p + (lp->tx_tail / LDC_PACKET_SIZE);
236}
237
238/* When we are in reliable or stream mode, have to track the next packet
239 * we haven't gotten an ACK for in the TX queue using tx_acked.  We have
240 * to be careful not to stomp over the queue past that point.  During
241 * the handshake, we don't have TX data packets pending in the queue
242 * and that's why handshake_get_tx_packet() need not be mindful of
243 * lp->tx_acked.
244 */
245static unsigned long head_for_data(struct ldc_channel *lp)
246{
247	if (lp->cfg.mode == LDC_MODE_STREAM)
248		return lp->tx_acked;
249	return lp->tx_head;
250}
251
252static int tx_has_space_for(struct ldc_channel *lp, unsigned int size)
253{
254	unsigned long limit, tail, new_tail, diff;
255	unsigned int mss;
256
257	limit = head_for_data(lp);
258	tail = lp->tx_tail;
259	new_tail = tx_advance(lp, tail);
260	if (new_tail == limit)
261		return 0;
262
263	if (limit > new_tail)
264		diff = limit - new_tail;
265	else
266		diff = (limit +
267			((lp->tx_num_entries * LDC_PACKET_SIZE) - new_tail));
268	diff /= LDC_PACKET_SIZE;
269	mss = lp->mss;
270
271	if (diff * mss < size)
272		return 0;
273
274	return 1;
275}
276
277static struct ldc_packet *data_get_tx_packet(struct ldc_channel *lp,
278					     unsigned long *new_tail)
279{
280	struct ldc_packet *p;
281	unsigned long h, t;
282
283	h = head_for_data(lp);
284	t = tx_advance(lp, lp->tx_tail);
285	if (t == h)
286		return NULL;
287
288	*new_tail = t;
289
290	p = lp->tx_base;
291	return p + (lp->tx_tail / LDC_PACKET_SIZE);
292}
293
294static int set_tx_tail(struct ldc_channel *lp, unsigned long tail)
295{
296	unsigned long orig_tail = lp->tx_tail;
297	int limit = 1000;
298
299	lp->tx_tail = tail;
300	while (limit-- > 0) {
301		unsigned long err;
302
303		err = sun4v_ldc_tx_set_qtail(lp->id, tail);
304		if (!err)
305			return 0;
306
307		if (err != HV_EWOULDBLOCK) {
308			lp->tx_tail = orig_tail;
309			return -EINVAL;
310		}
311		udelay(1);
312	}
313
314	lp->tx_tail = orig_tail;
315	return -EBUSY;
316}
317
318/* This just updates the head value in the hypervisor using
319 * a polling loop with a timeout.  The caller takes care of
320 * upating software state representing the head change, if any.
321 */
322static int __set_rx_head(struct ldc_channel *lp, unsigned long head)
323{
324	int limit = 1000;
325
326	while (limit-- > 0) {
327		unsigned long err;
328
329		err = sun4v_ldc_rx_set_qhead(lp->id, head);
330		if (!err)
331			return 0;
332
333		if (err != HV_EWOULDBLOCK)
334			return -EINVAL;
335
336		udelay(1);
337	}
338
339	return -EBUSY;
340}
341
342static int send_tx_packet(struct ldc_channel *lp,
343			  struct ldc_packet *p,
344			  unsigned long new_tail)
345{
346	BUG_ON(p != (lp->tx_base + (lp->tx_tail / LDC_PACKET_SIZE)));
347
348	return set_tx_tail(lp, new_tail);
349}
350
351static struct ldc_packet *handshake_compose_ctrl(struct ldc_channel *lp,
352						 u8 stype, u8 ctrl,
353						 void *data, int dlen,
354						 unsigned long *new_tail)
355{
356	struct ldc_packet *p = handshake_get_tx_packet(lp, new_tail);
357
358	if (p) {
359		memset(p, 0, sizeof(*p));
360		p->type = LDC_CTRL;
361		p->stype = stype;
362		p->ctrl = ctrl;
363		if (data)
364			memcpy(p->u.u_data, data, dlen);
365	}
366	return p;
367}
368
369static int start_handshake(struct ldc_channel *lp)
370{
371	struct ldc_packet *p;
372	struct ldc_version *ver;
373	unsigned long new_tail;
374
375	ver = &ver_arr[0];
376
377	ldcdbg(HS, "SEND VER INFO maj[%u] min[%u]\n",
378	       ver->major, ver->minor);
379
380	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
381				   ver, sizeof(*ver), &new_tail);
382	if (p) {
383		int err = send_tx_packet(lp, p, new_tail);
384		if (!err)
385			lp->flags &= ~LDC_FLAG_RESET;
386		return err;
387	}
388	return -EBUSY;
389}
390
391static int send_version_nack(struct ldc_channel *lp,
392			     u16 major, u16 minor)
393{
394	struct ldc_packet *p;
395	struct ldc_version ver;
396	unsigned long new_tail;
397
398	ver.major = major;
399	ver.minor = minor;
400
401	p = handshake_compose_ctrl(lp, LDC_NACK, LDC_VERS,
402				   &ver, sizeof(ver), &new_tail);
403	if (p) {
404		ldcdbg(HS, "SEND VER NACK maj[%u] min[%u]\n",
405		       ver.major, ver.minor);
406
407		return send_tx_packet(lp, p, new_tail);
408	}
409	return -EBUSY;
410}
411
412static int send_version_ack(struct ldc_channel *lp,
413			    struct ldc_version *vp)
414{
415	struct ldc_packet *p;
416	unsigned long new_tail;
417
418	p = handshake_compose_ctrl(lp, LDC_ACK, LDC_VERS,
419				   vp, sizeof(*vp), &new_tail);
420	if (p) {
421		ldcdbg(HS, "SEND VER ACK maj[%u] min[%u]\n",
422		       vp->major, vp->minor);
423
424		return send_tx_packet(lp, p, new_tail);
425	}
426	return -EBUSY;
427}
428
429static int send_rts(struct ldc_channel *lp)
430{
431	struct ldc_packet *p;
432	unsigned long new_tail;
433
434	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTS, NULL, 0,
435				   &new_tail);
436	if (p) {
437		p->env = lp->cfg.mode;
438		p->seqid = 0;
439		lp->rcv_nxt = 0;
440
441		ldcdbg(HS, "SEND RTS env[0x%x] seqid[0x%x]\n",
442		       p->env, p->seqid);
443
444		return send_tx_packet(lp, p, new_tail);
445	}
446	return -EBUSY;
447}
448
449static int send_rtr(struct ldc_channel *lp)
450{
451	struct ldc_packet *p;
452	unsigned long new_tail;
453
454	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RTR, NULL, 0,
455				   &new_tail);
456	if (p) {
457		p->env = lp->cfg.mode;
458		p->seqid = 0;
459
460		ldcdbg(HS, "SEND RTR env[0x%x] seqid[0x%x]\n",
461		       p->env, p->seqid);
462
463		return send_tx_packet(lp, p, new_tail);
464	}
465	return -EBUSY;
466}
467
468static int send_rdx(struct ldc_channel *lp)
469{
470	struct ldc_packet *p;
471	unsigned long new_tail;
472
473	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_RDX, NULL, 0,
474				   &new_tail);
475	if (p) {
476		p->env = 0;
477		p->seqid = ++lp->snd_nxt;
478		p->u.r.ackid = lp->rcv_nxt;
479
480		ldcdbg(HS, "SEND RDX env[0x%x] seqid[0x%x] ackid[0x%x]\n",
481		       p->env, p->seqid, p->u.r.ackid);
482
483		return send_tx_packet(lp, p, new_tail);
484	}
485	return -EBUSY;
486}
487
488static int send_data_nack(struct ldc_channel *lp, struct ldc_packet *data_pkt)
489{
490	struct ldc_packet *p;
491	unsigned long new_tail;
492	int err;
493
494	p = data_get_tx_packet(lp, &new_tail);
495	if (!p)
496		return -EBUSY;
497	memset(p, 0, sizeof(*p));
498	p->type = data_pkt->type;
499	p->stype = LDC_NACK;
500	p->ctrl = data_pkt->ctrl & LDC_CTRL_MSK;
501	p->seqid = lp->snd_nxt + 1;
502	p->u.r.ackid = lp->rcv_nxt;
503
504	ldcdbg(HS, "SEND DATA NACK type[0x%x] ctl[0x%x] seq[0x%x] ack[0x%x]\n",
505	       p->type, p->ctrl, p->seqid, p->u.r.ackid);
506
507	err = send_tx_packet(lp, p, new_tail);
508	if (!err)
509		lp->snd_nxt++;
510
511	return err;
512}
513
514static int ldc_abort(struct ldc_channel *lp)
515{
516	unsigned long hv_err;
517
518	ldcdbg(STATE, "ABORT\n");
519
520	/* We report but do not act upon the hypervisor errors because
521	 * there really isn't much we can do if they fail at this point.
522	 */
523	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
524	if (hv_err)
525		printk(KERN_ERR PFX "ldc_abort: "
526		       "sun4v_ldc_tx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
527		       lp->id, lp->tx_ra, lp->tx_num_entries, hv_err);
528
529	hv_err = sun4v_ldc_tx_get_state(lp->id,
530					&lp->tx_head,
531					&lp->tx_tail,
532					&lp->chan_state);
533	if (hv_err)
534		printk(KERN_ERR PFX "ldc_abort: "
535		       "sun4v_ldc_tx_get_state(%lx,...) failed, err=%lu\n",
536		       lp->id, hv_err);
537
538	hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
539	if (hv_err)
540		printk(KERN_ERR PFX "ldc_abort: "
541		       "sun4v_ldc_rx_qconf(%lx,%lx,%lx) failed, err=%lu\n",
542		       lp->id, lp->rx_ra, lp->rx_num_entries, hv_err);
543
544	/* Refetch the RX queue state as well, because we could be invoked
545	 * here in the queue processing context.
546	 */
547	hv_err = sun4v_ldc_rx_get_state(lp->id,
548					&lp->rx_head,
549					&lp->rx_tail,
550					&lp->chan_state);
551	if (hv_err)
552		printk(KERN_ERR PFX "ldc_abort: "
553		       "sun4v_ldc_rx_get_state(%lx,...) failed, err=%lu\n",
554		       lp->id, hv_err);
555
556	return -ECONNRESET;
557}
558
559static struct ldc_version *find_by_major(u16 major)
560{
561	struct ldc_version *ret = NULL;
562	int i;
563
564	for (i = 0; i < ARRAY_SIZE(ver_arr); i++) {
565		struct ldc_version *v = &ver_arr[i];
566		if (v->major <= major) {
567			ret = v;
568			break;
569		}
570	}
571	return ret;
572}
573
574static int process_ver_info(struct ldc_channel *lp, struct ldc_version *vp)
575{
576	struct ldc_version *vap;
577	int err;
578
579	ldcdbg(HS, "GOT VERSION INFO major[%x] minor[%x]\n",
580	       vp->major, vp->minor);
581
582	if (lp->hs_state == LDC_HS_GOTVERS) {
583		lp->hs_state = LDC_HS_OPEN;
584		memset(&lp->ver, 0, sizeof(lp->ver));
585	}
586
587	vap = find_by_major(vp->major);
588	if (!vap) {
589		err = send_version_nack(lp, 0, 0);
590	} else if (vap->major != vp->major) {
591		err = send_version_nack(lp, vap->major, vap->minor);
592	} else {
593		struct ldc_version ver = *vp;
594		if (ver.minor > vap->minor)
595			ver.minor = vap->minor;
596		err = send_version_ack(lp, &ver);
597		if (!err) {
598			lp->ver = ver;
599			lp->hs_state = LDC_HS_GOTVERS;
600		}
601	}
602	if (err)
603		return ldc_abort(lp);
604
605	return 0;
606}
607
608static int process_ver_ack(struct ldc_channel *lp, struct ldc_version *vp)
609{
610	ldcdbg(HS, "GOT VERSION ACK major[%x] minor[%x]\n",
611	       vp->major, vp->minor);
612
613	if (lp->hs_state == LDC_HS_GOTVERS) {
614		if (lp->ver.major != vp->major ||
615		    lp->ver.minor != vp->minor)
616			return ldc_abort(lp);
617	} else {
618		lp->ver = *vp;
619		lp->hs_state = LDC_HS_GOTVERS;
620	}
621	if (send_rts(lp))
622		return ldc_abort(lp);
623	return 0;
624}
625
626static int process_ver_nack(struct ldc_channel *lp, struct ldc_version *vp)
627{
628	struct ldc_version *vap;
629	struct ldc_packet *p;
630	unsigned long new_tail;
631
632	if (vp->major == 0 && vp->minor == 0)
633		return ldc_abort(lp);
634
635	vap = find_by_major(vp->major);
636	if (!vap)
637		return ldc_abort(lp);
638
639	p = handshake_compose_ctrl(lp, LDC_INFO, LDC_VERS,
640					   vap, sizeof(*vap),
641					   &new_tail);
642	if (!p)
643		return ldc_abort(lp);
644
645	return send_tx_packet(lp, p, new_tail);
646}
647
648static int process_version(struct ldc_channel *lp,
649			   struct ldc_packet *p)
650{
651	struct ldc_version *vp;
652
653	vp = (struct ldc_version *) p->u.u_data;
654
655	switch (p->stype) {
656	case LDC_INFO:
657		return process_ver_info(lp, vp);
658
659	case LDC_ACK:
660		return process_ver_ack(lp, vp);
661
662	case LDC_NACK:
663		return process_ver_nack(lp, vp);
664
665	default:
666		return ldc_abort(lp);
667	}
668}
669
670static int process_rts(struct ldc_channel *lp,
671		       struct ldc_packet *p)
672{
673	ldcdbg(HS, "GOT RTS stype[%x] seqid[%x] env[%x]\n",
674	       p->stype, p->seqid, p->env);
675
676	if (p->stype     != LDC_INFO	   ||
677	    lp->hs_state != LDC_HS_GOTVERS ||
678	    p->env       != lp->cfg.mode)
679		return ldc_abort(lp);
680
681	lp->snd_nxt = p->seqid;
682	lp->rcv_nxt = p->seqid;
683	lp->hs_state = LDC_HS_SENTRTR;
684	if (send_rtr(lp))
685		return ldc_abort(lp);
686
687	return 0;
688}
689
690static int process_rtr(struct ldc_channel *lp,
691		       struct ldc_packet *p)
692{
693	ldcdbg(HS, "GOT RTR stype[%x] seqid[%x] env[%x]\n",
694	       p->stype, p->seqid, p->env);
695
696	if (p->stype     != LDC_INFO ||
697	    p->env       != lp->cfg.mode)
698		return ldc_abort(lp);
699
700	lp->snd_nxt = p->seqid;
701	lp->hs_state = LDC_HS_COMPLETE;
702	ldc_set_state(lp, LDC_STATE_CONNECTED);
703	send_rdx(lp);
704
705	return LDC_EVENT_UP;
706}
707
708static int rx_seq_ok(struct ldc_channel *lp, u32 seqid)
709{
710	return lp->rcv_nxt + 1 == seqid;
711}
712
713static int process_rdx(struct ldc_channel *lp,
714		       struct ldc_packet *p)
715{
716	ldcdbg(HS, "GOT RDX stype[%x] seqid[%x] env[%x] ackid[%x]\n",
717	       p->stype, p->seqid, p->env, p->u.r.ackid);
718
719	if (p->stype != LDC_INFO ||
720	    !(rx_seq_ok(lp, p->seqid)))
721		return ldc_abort(lp);
722
723	lp->rcv_nxt = p->seqid;
724
725	lp->hs_state = LDC_HS_COMPLETE;
726	ldc_set_state(lp, LDC_STATE_CONNECTED);
727
728	return LDC_EVENT_UP;
729}
730
731static int process_control_frame(struct ldc_channel *lp,
732				 struct ldc_packet *p)
733{
734	switch (p->ctrl) {
735	case LDC_VERS:
736		return process_version(lp, p);
737
738	case LDC_RTS:
739		return process_rts(lp, p);
740
741	case LDC_RTR:
742		return process_rtr(lp, p);
743
744	case LDC_RDX:
745		return process_rdx(lp, p);
746
747	default:
748		return ldc_abort(lp);
749	}
750}
751
752static int process_error_frame(struct ldc_channel *lp,
753			       struct ldc_packet *p)
754{
755	return ldc_abort(lp);
756}
757
758static int process_data_ack(struct ldc_channel *lp,
759			    struct ldc_packet *ack)
760{
761	unsigned long head = lp->tx_acked;
762	u32 ackid = ack->u.r.ackid;
763
764	while (1) {
765		struct ldc_packet *p = lp->tx_base + (head / LDC_PACKET_SIZE);
766
767		head = tx_advance(lp, head);
768
769		if (p->seqid == ackid) {
770			lp->tx_acked = head;
771			return 0;
772		}
773		if (head == lp->tx_tail)
774			return ldc_abort(lp);
775	}
776
777	return 0;
778}
779
780static void send_events(struct ldc_channel *lp, unsigned int event_mask)
781{
782	if (event_mask & LDC_EVENT_RESET)
783		lp->cfg.event(lp->event_arg, LDC_EVENT_RESET);
784	if (event_mask & LDC_EVENT_UP)
785		lp->cfg.event(lp->event_arg, LDC_EVENT_UP);
786	if (event_mask & LDC_EVENT_DATA_READY)
787		lp->cfg.event(lp->event_arg, LDC_EVENT_DATA_READY);
788}
789
790static irqreturn_t ldc_rx(int irq, void *dev_id)
791{
792	struct ldc_channel *lp = dev_id;
793	unsigned long orig_state, hv_err, flags;
794	unsigned int event_mask;
795
796	spin_lock_irqsave(&lp->lock, flags);
797
798	orig_state = lp->chan_state;
799	hv_err = sun4v_ldc_rx_get_state(lp->id,
800					&lp->rx_head,
801					&lp->rx_tail,
802					&lp->chan_state);
803
804	ldcdbg(RX, "RX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
805	       orig_state, lp->chan_state, lp->rx_head, lp->rx_tail);
806
807	event_mask = 0;
808
809	if (lp->cfg.mode == LDC_MODE_RAW &&
810	    lp->chan_state == LDC_CHANNEL_UP) {
811		lp->hs_state = LDC_HS_COMPLETE;
812		ldc_set_state(lp, LDC_STATE_CONNECTED);
813
814		event_mask |= LDC_EVENT_UP;
815
816		orig_state = lp->chan_state;
817	}
818
819	/* If we are in reset state, flush the RX queue and ignore
820	 * everything.
821	 */
822	if (lp->flags & LDC_FLAG_RESET) {
823		(void) __set_rx_head(lp, lp->rx_tail);
824		goto out;
825	}
826
827	/* Once we finish the handshake, we let the ldc_read()
828	 * paths do all of the control frame and state management.
829	 * Just trigger the callback.
830	 */
831	if (lp->hs_state == LDC_HS_COMPLETE) {
832handshake_complete:
833		if (lp->chan_state != orig_state) {
834			unsigned int event = LDC_EVENT_RESET;
835
836			if (lp->chan_state == LDC_CHANNEL_UP)
837				event = LDC_EVENT_UP;
838
839			event_mask |= event;
840		}
841		if (lp->rx_head != lp->rx_tail)
842			event_mask |= LDC_EVENT_DATA_READY;
843
844		goto out;
845	}
846
847	if (lp->chan_state != orig_state)
848		goto out;
849
850	while (lp->rx_head != lp->rx_tail) {
851		struct ldc_packet *p;
852		unsigned long new;
853		int err;
854
855		p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
856
857		switch (p->type) {
858		case LDC_CTRL:
859			err = process_control_frame(lp, p);
860			if (err > 0)
861				event_mask |= err;
862			break;
863
864		case LDC_DATA:
865			event_mask |= LDC_EVENT_DATA_READY;
866			err = 0;
867			break;
868
869		case LDC_ERR:
870			err = process_error_frame(lp, p);
871			break;
872
873		default:
874			err = ldc_abort(lp);
875			break;
876		}
877
878		if (err < 0)
879			break;
880
881		new = lp->rx_head;
882		new += LDC_PACKET_SIZE;
883		if (new == (lp->rx_num_entries * LDC_PACKET_SIZE))
884			new = 0;
885		lp->rx_head = new;
886
887		err = __set_rx_head(lp, new);
888		if (err < 0) {
889			(void) ldc_abort(lp);
890			break;
891		}
892		if (lp->hs_state == LDC_HS_COMPLETE)
893			goto handshake_complete;
894	}
895
896out:
897	spin_unlock_irqrestore(&lp->lock, flags);
898
899	send_events(lp, event_mask);
900
901	return IRQ_HANDLED;
902}
903
904static irqreturn_t ldc_tx(int irq, void *dev_id)
905{
906	struct ldc_channel *lp = dev_id;
907	unsigned long flags, hv_err, orig_state;
908	unsigned int event_mask = 0;
909
910	spin_lock_irqsave(&lp->lock, flags);
911
912	orig_state = lp->chan_state;
913	hv_err = sun4v_ldc_tx_get_state(lp->id,
914					&lp->tx_head,
915					&lp->tx_tail,
916					&lp->chan_state);
917
918	ldcdbg(TX, " TX state[0x%02lx:0x%02lx] head[0x%04lx] tail[0x%04lx]\n",
919	       orig_state, lp->chan_state, lp->tx_head, lp->tx_tail);
920
921	if (lp->cfg.mode == LDC_MODE_RAW &&
922	    lp->chan_state == LDC_CHANNEL_UP) {
923		lp->hs_state = LDC_HS_COMPLETE;
924		ldc_set_state(lp, LDC_STATE_CONNECTED);
925
926		event_mask |= LDC_EVENT_UP;
927	}
928
929	spin_unlock_irqrestore(&lp->lock, flags);
930
931	send_events(lp, event_mask);
932
933	return IRQ_HANDLED;
934}
935
936static HLIST_HEAD(ldc_channel_list);
937
938static int __ldc_channel_exists(unsigned long id)
939{
940	struct ldc_channel *lp;
941	struct hlist_node *n;
942
943	hlist_for_each_entry(lp, n, &ldc_channel_list, list) {
944		if (lp->id == id)
945			return 1;
946	}
947	return 0;
948}
949
950static int alloc_queue(const char *name, unsigned long num_entries,
951		       struct ldc_packet **base, unsigned long *ra)
952{
953	unsigned long size, order;
954	void *q;
955
956	size = num_entries * LDC_PACKET_SIZE;
957	order = get_order(size);
958
959	q = (void *) __get_free_pages(GFP_KERNEL, order);
960	if (!q) {
961		printk(KERN_ERR PFX "Alloc of %s queue failed with "
962		       "size=%lu order=%lu\n", name, size, order);
963		return -ENOMEM;
964	}
965
966	memset(q, 0, PAGE_SIZE << order);
967
968	*base = q;
969	*ra = __pa(q);
970
971	return 0;
972}
973
974static void free_queue(unsigned long num_entries, struct ldc_packet *q)
975{
976	unsigned long size, order;
977
978	if (!q)
979		return;
980
981	size = num_entries * LDC_PACKET_SIZE;
982	order = get_order(size);
983
984	free_pages((unsigned long)q, order);
985}
986
987#define LDC_IOTABLE_SIZE	(8 * 1024)
988
989static int ldc_iommu_init(struct ldc_channel *lp)
990{
991	unsigned long sz, num_tsb_entries, tsbsize, order;
992	struct ldc_iommu *iommu = &lp->iommu;
993	struct ldc_mtable_entry *table;
994	unsigned long hv_err;
995	int err;
996
997	num_tsb_entries = LDC_IOTABLE_SIZE;
998	tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
999
1000	spin_lock_init(&iommu->lock);
1001
1002	sz = num_tsb_entries / 8;
1003	sz = (sz + 7UL) & ~7UL;
1004	iommu->arena.map = kzalloc(sz, GFP_KERNEL);
1005	if (!iommu->arena.map) {
1006		printk(KERN_ERR PFX "Alloc of arena map failed, sz=%lu\n", sz);
1007		return -ENOMEM;
1008	}
1009
1010	iommu->arena.limit = num_tsb_entries;
1011
1012	order = get_order(tsbsize);
1013
1014	table = (struct ldc_mtable_entry *)
1015		__get_free_pages(GFP_KERNEL, order);
1016	err = -ENOMEM;
1017	if (!table) {
1018		printk(KERN_ERR PFX "Alloc of MTE table failed, "
1019		       "size=%lu order=%lu\n", tsbsize, order);
1020		goto out_free_map;
1021	}
1022
1023	memset(table, 0, PAGE_SIZE << order);
1024
1025	iommu->page_table = table;
1026
1027	hv_err = sun4v_ldc_set_map_table(lp->id, __pa(table),
1028					 num_tsb_entries);
1029	err = -EINVAL;
1030	if (hv_err)
1031		goto out_free_table;
1032
1033	return 0;
1034
1035out_free_table:
1036	free_pages((unsigned long) table, order);
1037	iommu->page_table = NULL;
1038
1039out_free_map:
1040	kfree(iommu->arena.map);
1041	iommu->arena.map = NULL;
1042
1043	return err;
1044}
1045
1046static void ldc_iommu_release(struct ldc_channel *lp)
1047{
1048	struct ldc_iommu *iommu = &lp->iommu;
1049	unsigned long num_tsb_entries, tsbsize, order;
1050
1051	(void) sun4v_ldc_set_map_table(lp->id, 0, 0);
1052
1053	num_tsb_entries = iommu->arena.limit;
1054	tsbsize = num_tsb_entries * sizeof(struct ldc_mtable_entry);
1055	order = get_order(tsbsize);
1056
1057	free_pages((unsigned long) iommu->page_table, order);
1058	iommu->page_table = NULL;
1059
1060	kfree(iommu->arena.map);
1061	iommu->arena.map = NULL;
1062}
1063
1064struct ldc_channel *ldc_alloc(unsigned long id,
1065			      const struct ldc_channel_config *cfgp,
1066			      void *event_arg)
1067{
1068	struct ldc_channel *lp;
1069	const struct ldc_mode_ops *mops;
1070	unsigned long dummy1, dummy2, hv_err;
1071	u8 mss, *mssbuf;
1072	int err;
1073
1074	err = -ENODEV;
1075	if (!ldom_domaining_enabled)
1076		goto out_err;
1077
1078	err = -EINVAL;
1079	if (!cfgp)
1080		goto out_err;
1081
1082	switch (cfgp->mode) {
1083	case LDC_MODE_RAW:
1084		mops = &raw_ops;
1085		mss = LDC_PACKET_SIZE;
1086		break;
1087
1088	case LDC_MODE_UNRELIABLE:
1089		mops = &nonraw_ops;
1090		mss = LDC_PACKET_SIZE - 8;
1091		break;
1092
1093	case LDC_MODE_STREAM:
1094		mops = &stream_ops;
1095		mss = LDC_PACKET_SIZE - 8 - 8;
1096		break;
1097
1098	default:
1099		goto out_err;
1100	}
1101
1102	if (!cfgp->event || !event_arg || !cfgp->rx_irq || !cfgp->tx_irq)
1103		goto out_err;
1104
1105	hv_err = sun4v_ldc_tx_qinfo(id, &dummy1, &dummy2);
1106	err = -ENODEV;
1107	if (hv_err == HV_ECHANNEL)
1108		goto out_err;
1109
1110	err = -EEXIST;
1111	if (__ldc_channel_exists(id))
1112		goto out_err;
1113
1114	mssbuf = NULL;
1115
1116	lp = kzalloc(sizeof(*lp), GFP_KERNEL);
1117	err = -ENOMEM;
1118	if (!lp)
1119		goto out_err;
1120
1121	spin_lock_init(&lp->lock);
1122
1123	lp->id = id;
1124
1125	err = ldc_iommu_init(lp);
1126	if (err)
1127		goto out_free_ldc;
1128
1129	lp->mops = mops;
1130	lp->mss = mss;
1131
1132	lp->cfg = *cfgp;
1133	if (!lp->cfg.mtu)
1134		lp->cfg.mtu = LDC_DEFAULT_MTU;
1135
1136	if (lp->cfg.mode == LDC_MODE_STREAM) {
1137		mssbuf = kzalloc(lp->cfg.mtu, GFP_KERNEL);
1138		if (!mssbuf) {
1139			err = -ENOMEM;
1140			goto out_free_iommu;
1141		}
1142		lp->mssbuf = mssbuf;
1143	}
1144
1145	lp->event_arg = event_arg;
1146
1147	lp->tx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1148	lp->rx_num_entries = LDC_DEFAULT_NUM_ENTRIES;
1149
1150	err = alloc_queue("TX", lp->tx_num_entries,
1151			  &lp->tx_base, &lp->tx_ra);
1152	if (err)
1153		goto out_free_mssbuf;
1154
1155	err = alloc_queue("RX", lp->rx_num_entries,
1156			  &lp->rx_base, &lp->rx_ra);
1157	if (err)
1158		goto out_free_txq;
1159
1160	lp->flags |= LDC_FLAG_ALLOCED_QUEUES;
1161
1162	lp->hs_state = LDC_HS_CLOSED;
1163	ldc_set_state(lp, LDC_STATE_INIT);
1164
1165	INIT_HLIST_NODE(&lp->list);
1166	hlist_add_head(&lp->list, &ldc_channel_list);
1167
1168	INIT_HLIST_HEAD(&lp->mh_list);
1169
1170	return lp;
1171
1172out_free_txq:
1173	free_queue(lp->tx_num_entries, lp->tx_base);
1174
1175out_free_mssbuf:
1176	kfree(mssbuf);
1177
1178out_free_iommu:
1179	ldc_iommu_release(lp);
1180
1181out_free_ldc:
1182	kfree(lp);
1183
1184out_err:
1185	return ERR_PTR(err);
1186}
1187EXPORT_SYMBOL(ldc_alloc);
1188
1189void ldc_free(struct ldc_channel *lp)
1190{
1191	if (lp->flags & LDC_FLAG_REGISTERED_IRQS) {
1192		free_irq(lp->cfg.rx_irq, lp);
1193		free_irq(lp->cfg.tx_irq, lp);
1194	}
1195
1196	if (lp->flags & LDC_FLAG_REGISTERED_QUEUES) {
1197		sun4v_ldc_tx_qconf(lp->id, 0, 0);
1198		sun4v_ldc_rx_qconf(lp->id, 0, 0);
1199		lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1200	}
1201	if (lp->flags & LDC_FLAG_ALLOCED_QUEUES) {
1202		free_queue(lp->tx_num_entries, lp->tx_base);
1203		free_queue(lp->rx_num_entries, lp->rx_base);
1204		lp->flags &= ~LDC_FLAG_ALLOCED_QUEUES;
1205	}
1206
1207	hlist_del(&lp->list);
1208
1209	kfree(lp->mssbuf);
1210
1211	ldc_iommu_release(lp);
1212
1213	kfree(lp);
1214}
1215EXPORT_SYMBOL(ldc_free);
1216
1217/* Bind the channel.  This registers the LDC queues with
1218 * the hypervisor and puts the channel into a pseudo-listening
1219 * state.  This does not initiate a handshake, ldc_connect() does
1220 * that.
1221 */
1222int ldc_bind(struct ldc_channel *lp, const char *name)
1223{
1224	unsigned long hv_err, flags;
1225	int err = -EINVAL;
1226
1227	if (!name ||
1228	    (lp->state != LDC_STATE_INIT))
1229		return -EINVAL;
1230
1231	snprintf(lp->rx_irq_name, LDC_IRQ_NAME_MAX, "%s RX", name);
1232	snprintf(lp->tx_irq_name, LDC_IRQ_NAME_MAX, "%s TX", name);
1233
1234	err = request_irq(lp->cfg.rx_irq, ldc_rx,
1235			  IRQF_SAMPLE_RANDOM | IRQF_DISABLED,
1236			  lp->rx_irq_name, lp);
1237	if (err)
1238		return err;
1239
1240	err = request_irq(lp->cfg.tx_irq, ldc_tx,
1241			  IRQF_SAMPLE_RANDOM | IRQF_DISABLED,
1242			  lp->tx_irq_name, lp);
1243	if (err) {
1244		free_irq(lp->cfg.rx_irq, lp);
1245		return err;
1246	}
1247
1248
1249	spin_lock_irqsave(&lp->lock, flags);
1250
1251	enable_irq(lp->cfg.rx_irq);
1252	enable_irq(lp->cfg.tx_irq);
1253
1254	lp->flags |= LDC_FLAG_REGISTERED_IRQS;
1255
1256	err = -ENODEV;
1257	hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1258	if (hv_err)
1259		goto out_free_irqs;
1260
1261	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1262	if (hv_err)
1263		goto out_free_irqs;
1264
1265	hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1266	if (hv_err)
1267		goto out_unmap_tx;
1268
1269	hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1270	if (hv_err)
1271		goto out_unmap_tx;
1272
1273	lp->flags |= LDC_FLAG_REGISTERED_QUEUES;
1274
1275	hv_err = sun4v_ldc_tx_get_state(lp->id,
1276					&lp->tx_head,
1277					&lp->tx_tail,
1278					&lp->chan_state);
1279	err = -EBUSY;
1280	if (hv_err)
1281		goto out_unmap_rx;
1282
1283	lp->tx_acked = lp->tx_head;
1284
1285	lp->hs_state = LDC_HS_OPEN;
1286	ldc_set_state(lp, LDC_STATE_BOUND);
1287
1288	spin_unlock_irqrestore(&lp->lock, flags);
1289
1290	return 0;
1291
1292out_unmap_rx:
1293	lp->flags &= ~LDC_FLAG_REGISTERED_QUEUES;
1294	sun4v_ldc_rx_qconf(lp->id, 0, 0);
1295
1296out_unmap_tx:
1297	sun4v_ldc_tx_qconf(lp->id, 0, 0);
1298
1299out_free_irqs:
1300	lp->flags &= ~LDC_FLAG_REGISTERED_IRQS;
1301	free_irq(lp->cfg.tx_irq, lp);
1302	free_irq(lp->cfg.rx_irq, lp);
1303
1304	spin_unlock_irqrestore(&lp->lock, flags);
1305
1306	return err;
1307}
1308EXPORT_SYMBOL(ldc_bind);
1309
1310int ldc_connect(struct ldc_channel *lp)
1311{
1312	unsigned long flags;
1313	int err;
1314
1315	if (lp->cfg.mode == LDC_MODE_RAW)
1316		return -EINVAL;
1317
1318	spin_lock_irqsave(&lp->lock, flags);
1319
1320	if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1321	    !(lp->flags & LDC_FLAG_REGISTERED_QUEUES) ||
1322	    lp->hs_state != LDC_HS_OPEN)
1323		err = -EINVAL;
1324	else
1325		err = start_handshake(lp);
1326
1327	spin_unlock_irqrestore(&lp->lock, flags);
1328
1329	return err;
1330}
1331EXPORT_SYMBOL(ldc_connect);
1332
1333int ldc_disconnect(struct ldc_channel *lp)
1334{
1335	unsigned long hv_err, flags;
1336	int err;
1337
1338	if (lp->cfg.mode == LDC_MODE_RAW)
1339		return -EINVAL;
1340
1341	if (!(lp->flags & LDC_FLAG_ALLOCED_QUEUES) ||
1342	    !(lp->flags & LDC_FLAG_REGISTERED_QUEUES))
1343		return -EINVAL;
1344
1345	spin_lock_irqsave(&lp->lock, flags);
1346
1347	err = -ENODEV;
1348	hv_err = sun4v_ldc_tx_qconf(lp->id, 0, 0);
1349	if (hv_err)
1350		goto out_err;
1351
1352	hv_err = sun4v_ldc_tx_qconf(lp->id, lp->tx_ra, lp->tx_num_entries);
1353	if (hv_err)
1354		goto out_err;
1355
1356	hv_err = sun4v_ldc_rx_qconf(lp->id, 0, 0);
1357	if (hv_err)
1358		goto out_err;
1359
1360	hv_err = sun4v_ldc_rx_qconf(lp->id, lp->rx_ra, lp->rx_num_entries);
1361	if (hv_err)
1362		goto out_err;
1363
1364	ldc_set_state(lp, LDC_STATE_BOUND);
1365	lp->hs_state = LDC_HS_OPEN;
1366	lp->flags |= LDC_FLAG_RESET;
1367
1368	spin_unlock_irqrestore(&lp->lock, flags);
1369
1370	return 0;
1371
1372out_err:
1373	sun4v_ldc_tx_qconf(lp->id, 0, 0);
1374	sun4v_ldc_rx_qconf(lp->id, 0, 0);
1375	free_irq(lp->cfg.tx_irq, lp);
1376	free_irq(lp->cfg.rx_irq, lp);
1377	lp->flags &= ~(LDC_FLAG_REGISTERED_IRQS |
1378		       LDC_FLAG_REGISTERED_QUEUES);
1379	ldc_set_state(lp, LDC_STATE_INIT);
1380
1381	spin_unlock_irqrestore(&lp->lock, flags);
1382
1383	return err;
1384}
1385EXPORT_SYMBOL(ldc_disconnect);
1386
1387int ldc_state(struct ldc_channel *lp)
1388{
1389	return lp->state;
1390}
1391EXPORT_SYMBOL(ldc_state);
1392
1393static int write_raw(struct ldc_channel *lp, const void *buf, unsigned int size)
1394{
1395	struct ldc_packet *p;
1396	unsigned long new_tail;
1397	int err;
1398
1399	if (size > LDC_PACKET_SIZE)
1400		return -EMSGSIZE;
1401
1402	p = data_get_tx_packet(lp, &new_tail);
1403	if (!p)
1404		return -EAGAIN;
1405
1406	memcpy(p, buf, size);
1407
1408	err = send_tx_packet(lp, p, new_tail);
1409	if (!err)
1410		err = size;
1411
1412	return err;
1413}
1414
1415static int read_raw(struct ldc_channel *lp, void *buf, unsigned int size)
1416{
1417	struct ldc_packet *p;
1418	unsigned long hv_err, new;
1419	int err;
1420
1421	if (size < LDC_PACKET_SIZE)
1422		return -EINVAL;
1423
1424	hv_err = sun4v_ldc_rx_get_state(lp->id,
1425					&lp->rx_head,
1426					&lp->rx_tail,
1427					&lp->chan_state);
1428	if (hv_err)
1429		return ldc_abort(lp);
1430
1431	if (lp->chan_state == LDC_CHANNEL_DOWN ||
1432	    lp->chan_state == LDC_CHANNEL_RESETTING)
1433		return -ECONNRESET;
1434
1435	if (lp->rx_head == lp->rx_tail)
1436		return 0;
1437
1438	p = lp->rx_base + (lp->rx_head / LDC_PACKET_SIZE);
1439	memcpy(buf, p, LDC_PACKET_SIZE);
1440
1441	new = rx_advance(lp, lp->rx_head);
1442	lp->rx_head = new;
1443
1444	err = __set_rx_head(lp, new);
1445	if (err < 0)
1446		err = -ECONNRESET;
1447	else
1448		err = LDC_PACKET_SIZE;
1449
1450	return err;
1451}
1452
1453static const struct ldc_mode_ops raw_ops = {
1454	.write		=	write_raw,
1455	.read		=	read_raw,
1456};
1457
1458static int write_nonraw(struct ldc_channel *lp, const void *buf,
1459			unsigned int size)
1460{
1461	unsigned long hv_err, tail;
1462	unsigned int copied;
1463	u32 seq;
1464	int err;
1465
1466	hv_err = sun4v_ldc_tx_get_state(lp->id, &lp->tx_head, &lp->tx_tail,
1467					&lp->chan_state);
1468	if (unlikely(hv_err))
1469		return -EBUSY;
1470
1471	if (unlikely(lp->chan_state != LDC_CHANNEL_UP))
1472		return ldc_abort(lp);
1473
1474	if (!tx_has_space_for(lp, size))
1475		return -EAGAIN;
1476
1477	seq = lp->snd_nxt;
1478	copied = 0;
1479	tail = lp->tx_tail;
1480	while (copied < size) {
1481		struct ldc_packet *p = lp->tx_base + (tail / LDC_PACKET_SIZE);
1482		u8 *data = ((lp->cfg.mode == LDC_MODE_UNRELIABLE) ?
1483			    p->u.u_data :
1484			    p->u.r.r_data);
1485		int data_len;
1486
1487		p->type = LDC_DATA;
1488		p->stype = LDC_INFO;
1489		p->ctrl = 0;
1490
1491		data_len = size - copied;
1492		if (data_len > lp->mss)
1493			data_len = lp->mss;
1494
1495		BUG_ON(data_len > LDC_LEN);
1496
1497		p->env = (data_len |
1498			  (copied == 0 ? LDC_START : 0) |
1499			  (data_len == size - copied ? LDC_STOP : 0));
1500
1501		p->seqid = ++seq;
1502
1503		ldcdbg(DATA, "SENT DATA [%02x:%02x:%02x:%02x:%08x]\n",
1504		       p->type,
1505		       p->stype,
1506		       p->ctrl,
1507		       p->env,
1508		       p->seqid);
1509
1510		memcpy(data, buf, data_len);
1511		buf += data_len;
1512		copied += data_len;
1513
1514		tail = tx_advance(lp, tail);
1515	}
1516
1517	err = set_tx_tail(lp, tail);
1518	if (!err) {
1519		lp->snd_nxt = seq;
1520		err = size;
1521	}
1522
1523	return err;
1524}
1525
1526static int rx_bad_seq(struct ldc_channel *lp, struct ldc_packet *p,
1527		      struct ldc_packet *first_frag)
1528{
1529	int err;
1530
1531	if (first_frag)
1532		lp->rcv_nxt = first_frag->seqid - 1;
1533
1534	err = send_data_nack(lp, p);
1535	if (err)
1536		return err;
1537
1538	err = __set_rx_head(lp, lp->rx_tail);
1539	if (err < 0)
1540		return ldc_abort(lp);
1541
1542	return 0;
1543}
1544
1545static int data_ack_nack(struct ldc_channel *lp, struct ldc_packet *p)
1546{
1547	if (p->stype & LDC_ACK) {
1548		int err = process_data_ack(lp, p);
1549		if (err)
1550			return err;
1551	}
1552	if (p->stype & LDC_NACK)
1553		return ldc_abort(lp);
1554
1555	return 0;
1556}
1557
1558static int rx_data_wait(struct ldc_channel *lp, unsigned long cur_head)
1559{
1560	unsigned long dummy;
1561	int limit = 1000;
1562
1563	ldcdbg(DATA, "DATA WAIT cur_head[%lx] rx_head[%lx] rx_tail[%lx]\n",
1564	       cur_head, lp->rx_head, lp->rx_tail);
1565	while (limit-- > 0) {
1566		unsigned long hv_err;
1567
1568		hv_err = sun4v_ldc_rx_get_state(lp->id,
1569						&dummy,
1570						&lp->rx_tail,
1571						&lp->chan_state);
1572		if (hv_err)
1573			return ldc_abort(lp);
1574
1575		if (lp->chan_state == LDC_CHANNEL_DOWN ||
1576		    lp->chan_state == LDC_CHANNEL_RESETTING)
1577			return -ECONNRESET;
1578
1579		if (cur_head != lp->rx_tail) {
1580			ldcdbg(DATA, "DATA WAIT DONE "
1581			       "head[%lx] tail[%lx] chan_state[%lx]\n",
1582			       dummy, lp->rx_tail, lp->chan_state);
1583			return 0;
1584		}
1585
1586		udelay(1);
1587	}
1588	return -EAGAIN;
1589}
1590
1591static int rx_set_head(struct ldc_channel *lp, unsigned long head)
1592{
1593	int err = __set_rx_head(lp, head);
1594
1595	if (err < 0)
1596		return ldc_abort(lp);
1597
1598	lp->rx_head = head;
1599	return 0;
1600}
1601
1602static void send_data_ack(struct ldc_channel *lp)
1603{
1604	unsigned long new_tail;
1605	struct ldc_packet *p;
1606
1607	p = data_get_tx_packet(lp, &new_tail);
1608	if (likely(p)) {
1609		int err;
1610
1611		memset(p, 0, sizeof(*p));
1612		p->type = LDC_DATA;
1613		p->stype = LDC_ACK;
1614		p->ctrl = 0;
1615		p->seqid = lp->snd_nxt + 1;
1616		p->u.r.ackid = lp->rcv_nxt;
1617
1618		err = send_tx_packet(lp, p, new_tail);
1619		if (!err)
1620			lp->snd_nxt++;
1621	}
1622}
1623
1624static int read_nonraw(struct ldc_channel *lp, void *buf, unsigned int size)
1625{
1626	struct ldc_packet *first_frag;
1627	unsigned long hv_err, new;
1628	int err, copied;
1629
1630	hv_err = sun4v_ldc_rx_get_state(lp->id,
1631					&lp->rx_head,
1632					&lp->rx_tail,
1633					&lp->chan_state);
1634	if (hv_err)
1635		return ldc_abort(lp);
1636
1637	if (lp->chan_state == LDC_CHANNEL_DOWN ||
1638	    lp->chan_state == LDC_CHANNEL_RESETTING)
1639		return -ECONNRESET;
1640
1641	if (lp->rx_head == lp->rx_tail)
1642		return 0;
1643
1644	first_frag = NULL;
1645	copied = err = 0;
1646	new = lp->rx_head;
1647	while (1) {
1648		struct ldc_packet *p;
1649		int pkt_len;
1650
1651		BUG_ON(new == lp->rx_tail);
1652		p = lp->rx_base + (new / LDC_PACKET_SIZE);
1653
1654		ldcdbg(RX, "RX read pkt[%02x:%02x:%02x:%02x:%08x:%08x] "
1655		       "rcv_nxt[%08x]\n",
1656		       p->type,
1657		       p->stype,
1658		       p->ctrl,
1659		       p->env,
1660		       p->seqid,
1661		       p->u.r.ackid,
1662		       lp->rcv_nxt);
1663
1664		if (unlikely(!rx_seq_ok(lp, p->seqid))) {
1665			err = rx_bad_seq(lp, p, first_frag);
1666			copied = 0;
1667			break;
1668		}
1669
1670		if (p->type & LDC_CTRL) {
1671			err = process_control_frame(lp, p);
1672			if (err < 0)
1673				break;
1674			err = 0;
1675		}
1676
1677		lp->rcv_nxt = p->seqid;
1678
1679		if (!(p->type & LDC_DATA)) {
1680			new = rx_advance(lp, new);
1681			goto no_data;
1682		}
1683		if (p->stype & (LDC_ACK | LDC_NACK)) {
1684			err = data_ack_nack(lp, p);
1685			if (err)
1686				break;
1687		}
1688		if (!(p->stype & LDC_INFO)) {
1689			new = rx_advance(lp, new);
1690			err = rx_set_head(lp, new);
1691			if (err)
1692				break;
1693			goto no_data;
1694		}
1695
1696		pkt_len = p->env & LDC_LEN;
1697
1698		/* Every initial packet starts with the START bit set.
1699		 *
1700		 * Singleton packets will have both START+STOP set.
1701		 *
1702		 * Fragments will have START set in the first frame, STOP
1703		 * set in the last frame, and neither bit set in middle
1704		 * frames of the packet.
1705		 *
1706		 * Therefore if we are at the beginning of a packet and
1707		 * we don't see START, or we are in the middle of a fragmented
1708		 * packet and do see START, we are unsynchronized and should
1709		 * flush the RX queue.
1710		 */
1711		if ((first_frag == NULL && !(p->env & LDC_START)) ||
1712		    (first_frag != NULL &&  (p->env & LDC_START))) {
1713			if (!first_frag)
1714				new = rx_advance(lp, new);
1715
1716			err = rx_set_head(lp, new);
1717			if (err)
1718				break;
1719
1720			if (!first_frag)
1721				goto no_data;
1722		}
1723		if (!first_frag)
1724			first_frag = p;
1725
1726		if (pkt_len > size - copied) {
1727			/* User didn't give us a big enough buffer,
1728			 * what to do?  This is a pretty serious error.
1729			 *
1730			 * Since we haven't updated the RX ring head to
1731			 * consume any of the packets, signal the error
1732			 * to the user and just leave the RX ring alone.
1733			 *
1734			 * This seems the best behavior because this allows
1735			 * a user of the LDC layer to start with a small
1736			 * RX buffer for ldc_read() calls and use -EMSGSIZE
1737			 * as a cue to enlarge it's read buffer.
1738			 */
1739			err = -EMSGSIZE;
1740			break;
1741		}
1742
1743		/* Ok, we are gonna eat this one.  */
1744		new = rx_advance(lp, new);
1745
1746		memcpy(buf,
1747		       (lp->cfg.mode == LDC_MODE_UNRELIABLE ?
1748			p->u.u_data : p->u.r.r_data), pkt_len);
1749		buf += pkt_len;
1750		copied += pkt_len;
1751
1752		if (p->env & LDC_STOP)
1753			break;
1754
1755no_data:
1756		if (new == lp->rx_tail) {
1757			err = rx_data_wait(lp, new);
1758			if (err)
1759				break;
1760		}
1761	}
1762
1763	if (!err)
1764		err = rx_set_head(lp, new);
1765
1766	if (err && first_frag)
1767		lp->rcv_nxt = first_frag->seqid - 1;
1768
1769	if (!err) {
1770		err = copied;
1771		if (err > 0 && lp->cfg.mode != LDC_MODE_UNRELIABLE)
1772			send_data_ack(lp);
1773	}
1774
1775	return err;
1776}
1777
1778static const struct ldc_mode_ops nonraw_ops = {
1779	.write		=	write_nonraw,
1780	.read		=	read_nonraw,
1781};
1782
1783static int write_stream(struct ldc_channel *lp, const void *buf,
1784			unsigned int size)
1785{
1786	if (size > lp->cfg.mtu)
1787		size = lp->cfg.mtu;
1788	return write_nonraw(lp, buf, size);
1789}
1790
1791static int read_stream(struct ldc_channel *lp, void *buf, unsigned int size)
1792{
1793	if (!lp->mssbuf_len) {
1794		int err = read_nonraw(lp, lp->mssbuf, lp->cfg.mtu);
1795		if (err < 0)
1796			return err;
1797
1798		lp->mssbuf_len = err;
1799		lp->mssbuf_off = 0;
1800	}
1801
1802	if (size > lp->mssbuf_len)
1803		size = lp->mssbuf_len;
1804	memcpy(buf, lp->mssbuf + lp->mssbuf_off, size);
1805
1806	lp->mssbuf_off += size;
1807	lp->mssbuf_len -= size;
1808
1809	return size;
1810}
1811
1812static const struct ldc_mode_ops stream_ops = {
1813	.write		=	write_stream,
1814	.read		=	read_stream,
1815};
1816
1817int ldc_write(struct ldc_channel *lp, const void *buf, unsigned int size)
1818{
1819	unsigned long flags;
1820	int err;
1821
1822	if (!buf)
1823		return -EINVAL;
1824
1825	if (!size)
1826		return 0;
1827
1828	spin_lock_irqsave(&lp->lock, flags);
1829
1830	if (lp->hs_state != LDC_HS_COMPLETE)
1831		err = -ENOTCONN;
1832	else
1833		err = lp->mops->write(lp, buf, size);
1834
1835	spin_unlock_irqrestore(&lp->lock, flags);
1836
1837	return err;
1838}
1839EXPORT_SYMBOL(ldc_write);
1840
1841int ldc_read(struct ldc_channel *lp, void *buf, unsigned int size)
1842{
1843	unsigned long flags;
1844	int err;
1845
1846	if (!buf)
1847		return -EINVAL;
1848
1849	if (!size)
1850		return 0;
1851
1852	spin_lock_irqsave(&lp->lock, flags);
1853
1854	if (lp->hs_state != LDC_HS_COMPLETE)
1855		err = -ENOTCONN;
1856	else
1857		err = lp->mops->read(lp, buf, size);
1858
1859	spin_unlock_irqrestore(&lp->lock, flags);
1860
1861	return err;
1862}
1863EXPORT_SYMBOL(ldc_read);
1864
1865static long arena_alloc(struct ldc_iommu *iommu, unsigned long npages)
1866{
1867	struct iommu_arena *arena = &iommu->arena;
1868	unsigned long n, start, end, limit;
1869	int pass;
1870
1871	limit = arena->limit;
1872	start = arena->hint;
1873	pass = 0;
1874
1875again:
1876	n = bitmap_find_next_zero_area(arena->map, limit, start, npages, 0);
1877	end = n + npages;
1878	if (unlikely(end >= limit)) {
1879		if (likely(pass < 1)) {
1880			limit = start;
1881			start = 0;
1882			pass++;
1883			goto again;
1884		} else {
1885			/* Scanned the whole thing, give up. */
1886			return -1;
1887		}
1888	}
1889	bitmap_set(arena->map, n, npages);
1890
1891	arena->hint = end;
1892
1893	return n;
1894}
1895
1896#define COOKIE_PGSZ_CODE	0xf000000000000000ULL
1897#define COOKIE_PGSZ_CODE_SHIFT	60ULL
1898
1899static u64 pagesize_code(void)
1900{
1901	switch (PAGE_SIZE) {
1902	default:
1903	case (8ULL * 1024ULL):
1904		return 0;
1905	case (64ULL * 1024ULL):
1906		return 1;
1907	case (512ULL * 1024ULL):
1908		return 2;
1909	case (4ULL * 1024ULL * 1024ULL):
1910		return 3;
1911	case (32ULL * 1024ULL * 1024ULL):
1912		return 4;
1913	case (256ULL * 1024ULL * 1024ULL):
1914		return 5;
1915	}
1916}
1917
1918static u64 make_cookie(u64 index, u64 pgsz_code, u64 page_offset)
1919{
1920	return ((pgsz_code << COOKIE_PGSZ_CODE_SHIFT) |
1921		(index << PAGE_SHIFT) |
1922		page_offset);
1923}
1924
1925static u64 cookie_to_index(u64 cookie, unsigned long *shift)
1926{
1927	u64 szcode = cookie >> COOKIE_PGSZ_CODE_SHIFT;
1928
1929	cookie &= ~COOKIE_PGSZ_CODE;
1930
1931	*shift = szcode * 3;
1932
1933	return (cookie >> (13ULL + (szcode * 3ULL)));
1934}
1935
1936static struct ldc_mtable_entry *alloc_npages(struct ldc_iommu *iommu,
1937					     unsigned long npages)
1938{
1939	long entry;
1940
1941	entry = arena_alloc(iommu, npages);
1942	if (unlikely(entry < 0))
1943		return NULL;
1944
1945	return iommu->page_table + entry;
1946}
1947
1948static u64 perm_to_mte(unsigned int map_perm)
1949{
1950	u64 mte_base;
1951
1952	mte_base = pagesize_code();
1953
1954	if (map_perm & LDC_MAP_SHADOW) {
1955		if (map_perm & LDC_MAP_R)
1956			mte_base |= LDC_MTE_COPY_R;
1957		if (map_perm & LDC_MAP_W)
1958			mte_base |= LDC_MTE_COPY_W;
1959	}
1960	if (map_perm & LDC_MAP_DIRECT) {
1961		if (map_perm & LDC_MAP_R)
1962			mte_base |= LDC_MTE_READ;
1963		if (map_perm & LDC_MAP_W)
1964			mte_base |= LDC_MTE_WRITE;
1965		if (map_perm & LDC_MAP_X)
1966			mte_base |= LDC_MTE_EXEC;
1967	}
1968	if (map_perm & LDC_MAP_IO) {
1969		if (map_perm & LDC_MAP_R)
1970			mte_base |= LDC_MTE_IOMMU_R;
1971		if (map_perm & LDC_MAP_W)
1972			mte_base |= LDC_MTE_IOMMU_W;
1973	}
1974
1975	return mte_base;
1976}
1977
1978static int pages_in_region(unsigned long base, long len)
1979{
1980	int count = 0;
1981
1982	do {
1983		unsigned long new = (base + PAGE_SIZE) & PAGE_MASK;
1984
1985		len -= (new - base);
1986		base = new;
1987		count++;
1988	} while (len > 0);
1989
1990	return count;
1991}
1992
1993struct cookie_state {
1994	struct ldc_mtable_entry		*page_table;
1995	struct ldc_trans_cookie		*cookies;
1996	u64				mte_base;
1997	u64				prev_cookie;
1998	u32				pte_idx;
1999	u32				nc;
2000};
2001
2002static void fill_cookies(struct cookie_state *sp, unsigned long pa,
2003			 unsigned long off, unsigned long len)
2004{
2005	do {
2006		unsigned long tlen, new = pa + PAGE_SIZE;
2007		u64 this_cookie;
2008
2009		sp->page_table[sp->pte_idx].mte = sp->mte_base | pa;
2010
2011		tlen = PAGE_SIZE;
2012		if (off)
2013			tlen = PAGE_SIZE - off;
2014		if (tlen > len)
2015			tlen = len;
2016
2017		this_cookie = make_cookie(sp->pte_idx,
2018					  pagesize_code(), off);
2019
2020		off = 0;
2021
2022		if (this_cookie == sp->prev_cookie) {
2023			sp->cookies[sp->nc - 1].cookie_size += tlen;
2024		} else {
2025			sp->cookies[sp->nc].cookie_addr = this_cookie;
2026			sp->cookies[sp->nc].cookie_size = tlen;
2027			sp->nc++;
2028		}
2029		sp->prev_cookie = this_cookie + tlen;
2030
2031		sp->pte_idx++;
2032
2033		len -= tlen;
2034		pa = new;
2035	} while (len > 0);
2036}
2037
2038static int sg_count_one(struct scatterlist *sg)
2039{
2040	unsigned long base = page_to_pfn(sg_page(sg)) << PAGE_SHIFT;
2041	long len = sg->length;
2042
2043	if ((sg->offset | len) & (8UL - 1))
2044		return -EFAULT;
2045
2046	return pages_in_region(base + sg->offset, len);
2047}
2048
2049static int sg_count_pages(struct scatterlist *sg, int num_sg)
2050{
2051	int count;
2052	int i;
2053
2054	count = 0;
2055	for (i = 0; i < num_sg; i++) {
2056		int err = sg_count_one(sg + i);
2057		if (err < 0)
2058			return err;
2059		count += err;
2060	}
2061
2062	return count;
2063}
2064
2065int ldc_map_sg(struct ldc_channel *lp,
2066	       struct scatterlist *sg, int num_sg,
2067	       struct ldc_trans_cookie *cookies, int ncookies,
2068	       unsigned int map_perm)
2069{
2070	unsigned long i, npages, flags;
2071	struct ldc_mtable_entry *base;
2072	struct cookie_state state;
2073	struct ldc_iommu *iommu;
2074	int err;
2075
2076	if (map_perm & ~LDC_MAP_ALL)
2077		return -EINVAL;
2078
2079	err = sg_count_pages(sg, num_sg);
2080	if (err < 0)
2081		return err;
2082
2083	npages = err;
2084	if (err > ncookies)
2085		return -EMSGSIZE;
2086
2087	iommu = &lp->iommu;
2088
2089	spin_lock_irqsave(&iommu->lock, flags);
2090	base = alloc_npages(iommu, npages);
2091	spin_unlock_irqrestore(&iommu->lock, flags);
2092
2093	if (!base)
2094		return -ENOMEM;
2095
2096	state.page_table = iommu->page_table;
2097	state.cookies = cookies;
2098	state.mte_base = perm_to_mte(map_perm);
2099	state.prev_cookie = ~(u64)0;
2100	state.pte_idx = (base - iommu->page_table);
2101	state.nc = 0;
2102
2103	for (i = 0; i < num_sg; i++)
2104		fill_cookies(&state, page_to_pfn(sg_page(&sg[i])) << PAGE_SHIFT,
2105			     sg[i].offset, sg[i].length);
2106
2107	return state.nc;
2108}
2109EXPORT_SYMBOL(ldc_map_sg);
2110
2111int ldc_map_single(struct ldc_channel *lp,
2112		   void *buf, unsigned int len,
2113		   struct ldc_trans_cookie *cookies, int ncookies,
2114		   unsigned int map_perm)
2115{
2116	unsigned long npages, pa, flags;
2117	struct ldc_mtable_entry *base;
2118	struct cookie_state state;
2119	struct ldc_iommu *iommu;
2120
2121	if ((map_perm & ~LDC_MAP_ALL) || (ncookies < 1))
2122		return -EINVAL;
2123
2124	pa = __pa(buf);
2125	if ((pa | len) & (8UL - 1))
2126		return -EFAULT;
2127
2128	npages = pages_in_region(pa, len);
2129
2130	iommu = &lp->iommu;
2131
2132	spin_lock_irqsave(&iommu->lock, flags);
2133	base = alloc_npages(iommu, npages);
2134	spin_unlock_irqrestore(&iommu->lock, flags);
2135
2136	if (!base)
2137		return -ENOMEM;
2138
2139	state.page_table = iommu->page_table;
2140	state.cookies = cookies;
2141	state.mte_base = perm_to_mte(map_perm);
2142	state.prev_cookie = ~(u64)0;
2143	state.pte_idx = (base - iommu->page_table);
2144	state.nc = 0;
2145	fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len);
2146	BUG_ON(state.nc != 1);
2147
2148	return state.nc;
2149}
2150EXPORT_SYMBOL(ldc_map_single);
2151
2152static void free_npages(unsigned long id, struct ldc_iommu *iommu,
2153			u64 cookie, u64 size)
2154{
2155	struct iommu_arena *arena = &iommu->arena;
2156	unsigned long i, shift, index, npages;
2157	struct ldc_mtable_entry *base;
2158
2159	npages = PAGE_ALIGN(((cookie & ~PAGE_MASK) + size)) >> PAGE_SHIFT;
2160	index = cookie_to_index(cookie, &shift);
2161	base = iommu->page_table + index;
2162
2163	BUG_ON(index > arena->limit ||
2164	       (index + npages) > arena->limit);
2165
2166	for (i = 0; i < npages; i++) {
2167		if (base->cookie)
2168			sun4v_ldc_revoke(id, cookie + (i << shift),
2169					 base->cookie);
2170		base->mte = 0;
2171		__clear_bit(index + i, arena->map);
2172	}
2173}
2174
2175void ldc_unmap(struct ldc_channel *lp, struct ldc_trans_cookie *cookies,
2176	       int ncookies)
2177{
2178	struct ldc_iommu *iommu = &lp->iommu;
2179	unsigned long flags;
2180	int i;
2181
2182	spin_lock_irqsave(&iommu->lock, flags);
2183	for (i = 0; i < ncookies; i++) {
2184		u64 addr = cookies[i].cookie_addr;
2185		u64 size = cookies[i].cookie_size;
2186
2187		free_npages(lp->id, iommu, addr, size);
2188	}
2189	spin_unlock_irqrestore(&iommu->lock, flags);
2190}
2191EXPORT_SYMBOL(ldc_unmap);
2192
2193int ldc_copy(struct ldc_channel *lp, int copy_dir,
2194	     void *buf, unsigned int len, unsigned long offset,
2195	     struct ldc_trans_cookie *cookies, int ncookies)
2196{
2197	unsigned int orig_len;
2198	unsigned long ra;
2199	int i;
2200
2201	if (copy_dir != LDC_COPY_IN && copy_dir != LDC_COPY_OUT) {
2202		printk(KERN_ERR PFX "ldc_copy: ID[%lu] Bad copy_dir[%d]\n",
2203		       lp->id, copy_dir);
2204		return -EINVAL;
2205	}
2206
2207	ra = __pa(buf);
2208	if ((ra | len | offset) & (8UL - 1)) {
2209		printk(KERN_ERR PFX "ldc_copy: ID[%lu] Unaligned buffer "
2210		       "ra[%lx] len[%x] offset[%lx]\n",
2211		       lp->id, ra, len, offset);
2212		return -EFAULT;
2213	}
2214
2215	if (lp->hs_state != LDC_HS_COMPLETE ||
2216	    (lp->flags & LDC_FLAG_RESET)) {
2217		printk(KERN_ERR PFX "ldc_copy: ID[%lu] Link down hs_state[%x] "
2218		       "flags[%x]\n", lp->id, lp->hs_state, lp->flags);
2219		return -ECONNRESET;
2220	}
2221
2222	orig_len = len;
2223	for (i = 0; i < ncookies; i++) {
2224		unsigned long cookie_raddr = cookies[i].cookie_addr;
2225		unsigned long this_len = cookies[i].cookie_size;
2226		unsigned long actual_len;
2227
2228		if (unlikely(offset)) {
2229			unsigned long this_off = offset;
2230
2231			if (this_off > this_len)
2232				this_off = this_len;
2233
2234			offset -= this_off;
2235			this_len -= this_off;
2236			if (!this_len)
2237				continue;
2238			cookie_raddr += this_off;
2239		}
2240
2241		if (this_len > len)
2242			this_len = len;
2243
2244		while (1) {
2245			unsigned long hv_err;
2246
2247			hv_err = sun4v_ldc_copy(lp->id, copy_dir,
2248						cookie_raddr, ra,
2249						this_len, &actual_len);
2250			if (unlikely(hv_err)) {
2251				printk(KERN_ERR PFX "ldc_copy: ID[%lu] "
2252				       "HV error %lu\n",
2253				       lp->id, hv_err);
2254				if (lp->hs_state != LDC_HS_COMPLETE ||
2255				    (lp->flags & LDC_FLAG_RESET))
2256					return -ECONNRESET;
2257				else
2258					return -EFAULT;
2259			}
2260
2261			cookie_raddr += actual_len;
2262			ra += actual_len;
2263			len -= actual_len;
2264			if (actual_len == this_len)
2265				break;
2266
2267			this_len -= actual_len;
2268		}
2269
2270		if (!len)
2271			break;
2272	}
2273
2274	/* It is caller policy what to do about short copies.
2275	 * For example, a networking driver can declare the
2276	 * packet a runt and drop it.
2277	 */
2278
2279	return orig_len - len;
2280}
2281EXPORT_SYMBOL(ldc_copy);
2282
2283void *ldc_alloc_exp_dring(struct ldc_channel *lp, unsigned int len,
2284			  struct ldc_trans_cookie *cookies, int *ncookies,
2285			  unsigned int map_perm)
2286{
2287	void *buf;
2288	int err;
2289
2290	if (len & (8UL - 1))
2291		return ERR_PTR(-EINVAL);
2292
2293	buf = kzalloc(len, GFP_KERNEL);
2294	if (!buf)
2295		return ERR_PTR(-ENOMEM);
2296
2297	err = ldc_map_single(lp, buf, len, cookies, *ncookies, map_perm);
2298	if (err < 0) {
2299		kfree(buf);
2300		return ERR_PTR(err);
2301	}
2302	*ncookies = err;
2303
2304	return buf;
2305}
2306EXPORT_SYMBOL(ldc_alloc_exp_dring);
2307
2308void ldc_free_exp_dring(struct ldc_channel *lp, void *buf, unsigned int len,
2309			struct ldc_trans_cookie *cookies, int ncookies)
2310{
2311	ldc_unmap(lp, cookies, ncookies);
2312	kfree(buf);
2313}
2314EXPORT_SYMBOL(ldc_free_exp_dring);
2315
2316static int __init ldc_init(void)
2317{
2318	unsigned long major, minor;
2319	struct mdesc_handle *hp;
2320	const u64 *v;
2321	int err;
2322	u64 mp;
2323
2324	hp = mdesc_grab();
2325	if (!hp)
2326		return -ENODEV;
2327
2328	mp = mdesc_node_by_name(hp, MDESC_NODE_NULL, "platform");
2329	err = -ENODEV;
2330	if (mp == MDESC_NODE_NULL)
2331		goto out;
2332
2333	v = mdesc_get_property(hp, mp, "domaining-enabled", NULL);
2334	if (!v)
2335		goto out;
2336
2337	major = 1;
2338	minor = 0;
2339	if (sun4v_hvapi_register(HV_GRP_LDOM, major, &minor)) {
2340		printk(KERN_INFO PFX "Could not register LDOM hvapi.\n");
2341		goto out;
2342	}
2343
2344	printk(KERN_INFO "%s", version);
2345
2346	if (!*v) {
2347		printk(KERN_INFO PFX "Domaining disabled.\n");
2348		goto out;
2349	}
2350	ldom_domaining_enabled = 1;
2351	err = 0;
2352
2353out:
2354	mdesc_release(hp);
2355	return err;
2356}
2357
2358core_initcall(ldc_init);
2359