• Home
  • History
  • Annotate
  • Line#
  • Navigate
  • Raw
  • Download
  • only in /asuswrt-rt-n18u-9.0.0.4.380.2695/release/src-rt-6.x.4708/linux/linux-2.6.36/arch/powerpc/kernel/
1/*
2 * IBM PowerPC Virtual I/O Infrastructure Support.
3 *
4 *    Copyright (c) 2003,2008 IBM Corp.
5 *     Dave Engebretsen engebret@us.ibm.com
6 *     Santiago Leon santil@us.ibm.com
7 *     Hollis Blanchard <hollisb@us.ibm.com>
8 *     Stephen Rothwell
9 *     Robert Jennings <rcjenn@us.ibm.com>
10 *
11 *      This program is free software; you can redistribute it and/or
12 *      modify it under the terms of the GNU General Public License
13 *      as published by the Free Software Foundation; either version
14 *      2 of the License, or (at your option) any later version.
15 */
16
17#include <linux/types.h>
18#include <linux/device.h>
19#include <linux/init.h>
20#include <linux/slab.h>
21#include <linux/console.h>
22#include <linux/module.h>
23#include <linux/mm.h>
24#include <linux/dma-mapping.h>
25#include <linux/kobject.h>
26
27#include <asm/iommu.h>
28#include <asm/dma.h>
29#include <asm/vio.h>
30#include <asm/prom.h>
31#include <asm/firmware.h>
32#include <asm/tce.h>
33#include <asm/abs_addr.h>
34#include <asm/page.h>
35#include <asm/hvcall.h>
36#include <asm/iseries/vio.h>
37#include <asm/iseries/hv_types.h>
38#include <asm/iseries/hv_lp_config.h>
39#include <asm/iseries/hv_call_xm.h>
40#include <asm/iseries/iommu.h>
41
42static struct bus_type vio_bus_type;
43
44static struct vio_dev vio_bus_device  = { /* fake "parent" device */
45	.name = "vio",
46	.type = "",
47	.dev.init_name = "vio",
48	.dev.bus = &vio_bus_type,
49};
50
51#ifdef CONFIG_PPC_SMLPAR
52/**
53 * vio_cmo_pool - A pool of IO memory for CMO use
54 *
55 * @size: The size of the pool in bytes
56 * @free: The amount of free memory in the pool
57 */
58struct vio_cmo_pool {
59	size_t size;
60	size_t free;
61};
62
63/* How many ms to delay queued balance work */
64#define VIO_CMO_BALANCE_DELAY 100
65
66/* Portion out IO memory to CMO devices by this chunk size */
67#define VIO_CMO_BALANCE_CHUNK 131072
68
69/**
70 * vio_cmo_dev_entry - A device that is CMO-enabled and requires entitlement
71 *
72 * @vio_dev: struct vio_dev pointer
73 * @list: pointer to other devices on bus that are being tracked
74 */
75struct vio_cmo_dev_entry {
76	struct vio_dev *viodev;
77	struct list_head list;
78};
79
80/**
81 * vio_cmo - VIO bus accounting structure for CMO entitlement
82 *
83 * @lock: spinlock for entire structure
84 * @balance_q: work queue for balancing system entitlement
85 * @device_list: list of CMO-enabled devices requiring entitlement
86 * @entitled: total system entitlement in bytes
87 * @reserve: pool of memory from which devices reserve entitlement, incl. spare
88 * @excess: pool of excess entitlement not needed for device reserves or spare
89 * @spare: IO memory for device hotplug functionality
90 * @min: minimum necessary for system operation
91 * @desired: desired memory for system operation
92 * @curr: bytes currently allocated
93 * @high: high water mark for IO data usage
94 */
95struct vio_cmo {
96	spinlock_t lock;
97	struct delayed_work balance_q;
98	struct list_head device_list;
99	size_t entitled;
100	struct vio_cmo_pool reserve;
101	struct vio_cmo_pool excess;
102	size_t spare;
103	size_t min;
104	size_t desired;
105	size_t curr;
106	size_t high;
107} vio_cmo;
108
109/**
110 * vio_cmo_OF_devices - Count the number of OF devices that have DMA windows
111 */
112static int vio_cmo_num_OF_devs(void)
113{
114	struct device_node *node_vroot;
115	int count = 0;
116
117	/*
118	 * Count the number of vdevice entries with an
119	 * ibm,my-dma-window OF property
120	 */
121	node_vroot = of_find_node_by_name(NULL, "vdevice");
122	if (node_vroot) {
123		struct device_node *of_node;
124		struct property *prop;
125
126		for_each_child_of_node(node_vroot, of_node) {
127			prop = of_find_property(of_node, "ibm,my-dma-window",
128			                       NULL);
129			if (prop)
130				count++;
131		}
132	}
133	of_node_put(node_vroot);
134	return count;
135}
136
137/**
138 * vio_cmo_alloc - allocate IO memory for CMO-enable devices
139 *
140 * @viodev: VIO device requesting IO memory
141 * @size: size of allocation requested
142 *
143 * Allocations come from memory reserved for the devices and any excess
144 * IO memory available to all devices.  The spare pool used to service
145 * hotplug must be equal to %VIO_CMO_MIN_ENT for the excess pool to be
146 * made available.
147 *
148 * Return codes:
149 *  0 for successful allocation and -ENOMEM for a failure
150 */
151static inline int vio_cmo_alloc(struct vio_dev *viodev, size_t size)
152{
153	unsigned long flags;
154	size_t reserve_free = 0;
155	size_t excess_free = 0;
156	int ret = -ENOMEM;
157
158	spin_lock_irqsave(&vio_cmo.lock, flags);
159
160	/* Determine the amount of free entitlement available in reserve */
161	if (viodev->cmo.entitled > viodev->cmo.allocated)
162		reserve_free = viodev->cmo.entitled - viodev->cmo.allocated;
163
164	/* If spare is not fulfilled, the excess pool can not be used. */
165	if (vio_cmo.spare >= VIO_CMO_MIN_ENT)
166		excess_free = vio_cmo.excess.free;
167
168	/* The request can be satisfied */
169	if ((reserve_free + excess_free) >= size) {
170		vio_cmo.curr += size;
171		if (vio_cmo.curr > vio_cmo.high)
172			vio_cmo.high = vio_cmo.curr;
173		viodev->cmo.allocated += size;
174		size -= min(reserve_free, size);
175		vio_cmo.excess.free -= size;
176		ret = 0;
177	}
178
179	spin_unlock_irqrestore(&vio_cmo.lock, flags);
180	return ret;
181}
182
183/**
184 * vio_cmo_dealloc - deallocate IO memory from CMO-enable devices
185 * @viodev: VIO device freeing IO memory
186 * @size: size of deallocation
187 *
188 * IO memory is freed by the device back to the correct memory pools.
189 * The spare pool is replenished first from either memory pool, then
190 * the reserve pool is used to reduce device entitlement, the excess
191 * pool is used to increase the reserve pool toward the desired entitlement
192 * target, and then the remaining memory is returned to the pools.
193 *
194 */
195static inline void vio_cmo_dealloc(struct vio_dev *viodev, size_t size)
196{
197	unsigned long flags;
198	size_t spare_needed = 0;
199	size_t excess_freed = 0;
200	size_t reserve_freed = size;
201	size_t tmp;
202	int balance = 0;
203
204	spin_lock_irqsave(&vio_cmo.lock, flags);
205	vio_cmo.curr -= size;
206
207	/* Amount of memory freed from the excess pool */
208	if (viodev->cmo.allocated > viodev->cmo.entitled) {
209		excess_freed = min(reserve_freed, (viodev->cmo.allocated -
210		                                   viodev->cmo.entitled));
211		reserve_freed -= excess_freed;
212	}
213
214	/* Remove allocation from device */
215	viodev->cmo.allocated -= (reserve_freed + excess_freed);
216
217	/* Spare is a subset of the reserve pool, replenish it first. */
218	spare_needed = VIO_CMO_MIN_ENT - vio_cmo.spare;
219
220	/*
221	 * Replenish the spare in the reserve pool from the excess pool.
222	 * This moves entitlement into the reserve pool.
223	 */
224	if (spare_needed && excess_freed) {
225		tmp = min(excess_freed, spare_needed);
226		vio_cmo.excess.size -= tmp;
227		vio_cmo.reserve.size += tmp;
228		vio_cmo.spare += tmp;
229		excess_freed -= tmp;
230		spare_needed -= tmp;
231		balance = 1;
232	}
233
234	/*
235	 * Replenish the spare in the reserve pool from the reserve pool.
236	 * This removes entitlement from the device down to VIO_CMO_MIN_ENT,
237	 * if needed, and gives it to the spare pool. The amount of used
238	 * memory in this pool does not change.
239	 */
240	if (spare_needed && reserve_freed) {
241		tmp = min(spare_needed, min(reserve_freed,
242		                            (viodev->cmo.entitled -
243		                             VIO_CMO_MIN_ENT)));
244
245		vio_cmo.spare += tmp;
246		viodev->cmo.entitled -= tmp;
247		reserve_freed -= tmp;
248		spare_needed -= tmp;
249		balance = 1;
250	}
251
252	/*
253	 * Increase the reserve pool until the desired allocation is met.
254	 * Move an allocation freed from the excess pool into the reserve
255	 * pool and schedule a balance operation.
256	 */
257	if (excess_freed && (vio_cmo.desired > vio_cmo.reserve.size)) {
258		tmp = min(excess_freed, (vio_cmo.desired - vio_cmo.reserve.size));
259
260		vio_cmo.excess.size -= tmp;
261		vio_cmo.reserve.size += tmp;
262		excess_freed -= tmp;
263		balance = 1;
264	}
265
266	/* Return memory from the excess pool to that pool */
267	if (excess_freed)
268		vio_cmo.excess.free += excess_freed;
269
270	if (balance)
271		schedule_delayed_work(&vio_cmo.balance_q, VIO_CMO_BALANCE_DELAY);
272	spin_unlock_irqrestore(&vio_cmo.lock, flags);
273}
274
275/**
276 * vio_cmo_entitlement_update - Manage system entitlement changes
277 *
278 * @new_entitlement: new system entitlement to attempt to accommodate
279 *
280 * Increases in entitlement will be used to fulfill the spare entitlement
281 * and the rest is given to the excess pool.  Decreases, if they are
282 * possible, come from the excess pool and from unused device entitlement
283 *
284 * Returns: 0 on success, -ENOMEM when change can not be made
285 */
286int vio_cmo_entitlement_update(size_t new_entitlement)
287{
288	struct vio_dev *viodev;
289	struct vio_cmo_dev_entry *dev_ent;
290	unsigned long flags;
291	size_t avail, delta, tmp;
292
293	spin_lock_irqsave(&vio_cmo.lock, flags);
294
295	/* Entitlement increases */
296	if (new_entitlement > vio_cmo.entitled) {
297		delta = new_entitlement - vio_cmo.entitled;
298
299		/* Fulfill spare allocation */
300		if (vio_cmo.spare < VIO_CMO_MIN_ENT) {
301			tmp = min(delta, (VIO_CMO_MIN_ENT - vio_cmo.spare));
302			vio_cmo.spare += tmp;
303			vio_cmo.reserve.size += tmp;
304			delta -= tmp;
305		}
306
307		/* Remaining new allocation goes to the excess pool */
308		vio_cmo.entitled += delta;
309		vio_cmo.excess.size += delta;
310		vio_cmo.excess.free += delta;
311
312		goto out;
313	}
314
315	/* Entitlement decreases */
316	delta = vio_cmo.entitled - new_entitlement;
317	avail = vio_cmo.excess.free;
318
319	/*
320	 * Need to check how much unused entitlement each device can
321	 * sacrifice to fulfill entitlement change.
322	 */
323	list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
324		if (avail >= delta)
325			break;
326
327		viodev = dev_ent->viodev;
328		if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
329		    (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
330				avail += viodev->cmo.entitled -
331				         max_t(size_t, viodev->cmo.allocated,
332				               VIO_CMO_MIN_ENT);
333	}
334
335	if (delta <= avail) {
336		vio_cmo.entitled -= delta;
337
338		/* Take entitlement from the excess pool first */
339		tmp = min(vio_cmo.excess.free, delta);
340		vio_cmo.excess.size -= tmp;
341		vio_cmo.excess.free -= tmp;
342		delta -= tmp;
343
344		/*
345		 * Remove all but VIO_CMO_MIN_ENT bytes from devices
346		 * until entitlement change is served
347		 */
348		list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
349			if (!delta)
350				break;
351
352			viodev = dev_ent->viodev;
353			tmp = 0;
354			if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
355			    (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
356				tmp = viodev->cmo.entitled -
357				      max_t(size_t, viodev->cmo.allocated,
358				            VIO_CMO_MIN_ENT);
359			viodev->cmo.entitled -= min(tmp, delta);
360			delta -= min(tmp, delta);
361		}
362	} else {
363		spin_unlock_irqrestore(&vio_cmo.lock, flags);
364		return -ENOMEM;
365	}
366
367out:
368	schedule_delayed_work(&vio_cmo.balance_q, 0);
369	spin_unlock_irqrestore(&vio_cmo.lock, flags);
370	return 0;
371}
372
373/**
374 * vio_cmo_balance - Balance entitlement among devices
375 *
376 * @work: work queue structure for this operation
377 *
378 * Any system entitlement above the minimum needed for devices, or
379 * already allocated to devices, can be distributed to the devices.
380 * The list of devices is iterated through to recalculate the desired
381 * entitlement level and to determine how much entitlement above the
382 * minimum entitlement is allocated to devices.
383 *
384 * Small chunks of the available entitlement are given to devices until
385 * their requirements are fulfilled or there is no entitlement left to give.
386 * Upon completion sizes of the reserve and excess pools are calculated.
387 *
388 * The system minimum entitlement level is also recalculated here.
389 * Entitlement will be reserved for devices even after vio_bus_remove to
390 * accommodate reloading the driver.  The OF tree is walked to count the
391 * number of devices present and this will remove entitlement for devices
392 * that have actually left the system after having vio_bus_remove called.
393 */
394static void vio_cmo_balance(struct work_struct *work)
395{
396	struct vio_cmo *cmo;
397	struct vio_dev *viodev;
398	struct vio_cmo_dev_entry *dev_ent;
399	unsigned long flags;
400	size_t avail = 0, level, chunk, need;
401	int devcount = 0, fulfilled;
402
403	cmo = container_of(work, struct vio_cmo, balance_q.work);
404
405	spin_lock_irqsave(&vio_cmo.lock, flags);
406
407	/* Calculate minimum entitlement and fulfill spare */
408	cmo->min = vio_cmo_num_OF_devs() * VIO_CMO_MIN_ENT;
409	BUG_ON(cmo->min > cmo->entitled);
410	cmo->spare = min_t(size_t, VIO_CMO_MIN_ENT, (cmo->entitled - cmo->min));
411	cmo->min += cmo->spare;
412	cmo->desired = cmo->min;
413
414	/*
415	 * Determine how much entitlement is available and reset device
416	 * entitlements
417	 */
418	avail = cmo->entitled - cmo->spare;
419	list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
420		viodev = dev_ent->viodev;
421		devcount++;
422		viodev->cmo.entitled = VIO_CMO_MIN_ENT;
423		cmo->desired += (viodev->cmo.desired - VIO_CMO_MIN_ENT);
424		avail -= max_t(size_t, viodev->cmo.allocated, VIO_CMO_MIN_ENT);
425	}
426
427	/*
428	 * Having provided each device with the minimum entitlement, loop
429	 * over the devices portioning out the remaining entitlement
430	 * until there is nothing left.
431	 */
432	level = VIO_CMO_MIN_ENT;
433	while (avail) {
434		fulfilled = 0;
435		list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
436			viodev = dev_ent->viodev;
437
438			if (viodev->cmo.desired <= level) {
439				fulfilled++;
440				continue;
441			}
442
443			/*
444			 * Give the device up to VIO_CMO_BALANCE_CHUNK
445			 * bytes of entitlement, but do not exceed the
446			 * desired level of entitlement for the device.
447			 */
448			chunk = min_t(size_t, avail, VIO_CMO_BALANCE_CHUNK);
449			chunk = min(chunk, (viodev->cmo.desired -
450			                    viodev->cmo.entitled));
451			viodev->cmo.entitled += chunk;
452
453			/*
454			 * If the memory for this entitlement increase was
455			 * already allocated to the device it does not come
456			 * from the available pool being portioned out.
457			 */
458			need = max(viodev->cmo.allocated, viodev->cmo.entitled)-
459			       max(viodev->cmo.allocated, level);
460			avail -= need;
461
462		}
463		if (fulfilled == devcount)
464			break;
465		level += VIO_CMO_BALANCE_CHUNK;
466	}
467
468	/* Calculate new reserve and excess pool sizes */
469	cmo->reserve.size = cmo->min;
470	cmo->excess.free = 0;
471	cmo->excess.size = 0;
472	need = 0;
473	list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
474		viodev = dev_ent->viodev;
475		/* Calculated reserve size above the minimum entitlement */
476		if (viodev->cmo.entitled)
477			cmo->reserve.size += (viodev->cmo.entitled -
478			                      VIO_CMO_MIN_ENT);
479		/* Calculated used excess entitlement */
480		if (viodev->cmo.allocated > viodev->cmo.entitled)
481			need += viodev->cmo.allocated - viodev->cmo.entitled;
482	}
483	cmo->excess.size = cmo->entitled - cmo->reserve.size;
484	cmo->excess.free = cmo->excess.size - need;
485
486	cancel_delayed_work(to_delayed_work(work));
487	spin_unlock_irqrestore(&vio_cmo.lock, flags);
488}
489
490static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size,
491                                          dma_addr_t *dma_handle, gfp_t flag)
492{
493	struct vio_dev *viodev = to_vio_dev(dev);
494	void *ret;
495
496	if (vio_cmo_alloc(viodev, roundup(size, PAGE_SIZE))) {
497		atomic_inc(&viodev->cmo.allocs_failed);
498		return NULL;
499	}
500
501	ret = dma_iommu_ops.alloc_coherent(dev, size, dma_handle, flag);
502	if (unlikely(ret == NULL)) {
503		vio_cmo_dealloc(viodev, roundup(size, PAGE_SIZE));
504		atomic_inc(&viodev->cmo.allocs_failed);
505	}
506
507	return ret;
508}
509
510static void vio_dma_iommu_free_coherent(struct device *dev, size_t size,
511                                        void *vaddr, dma_addr_t dma_handle)
512{
513	struct vio_dev *viodev = to_vio_dev(dev);
514
515	dma_iommu_ops.free_coherent(dev, size, vaddr, dma_handle);
516
517	vio_cmo_dealloc(viodev, roundup(size, PAGE_SIZE));
518}
519
520static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page,
521                                         unsigned long offset, size_t size,
522                                         enum dma_data_direction direction,
523                                         struct dma_attrs *attrs)
524{
525	struct vio_dev *viodev = to_vio_dev(dev);
526	dma_addr_t ret = DMA_ERROR_CODE;
527
528	if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE))) {
529		atomic_inc(&viodev->cmo.allocs_failed);
530		return ret;
531	}
532
533	ret = dma_iommu_ops.map_page(dev, page, offset, size, direction, attrs);
534	if (unlikely(dma_mapping_error(dev, ret))) {
535		vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
536		atomic_inc(&viodev->cmo.allocs_failed);
537	}
538
539	return ret;
540}
541
542static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
543				     size_t size,
544				     enum dma_data_direction direction,
545				     struct dma_attrs *attrs)
546{
547	struct vio_dev *viodev = to_vio_dev(dev);
548
549	dma_iommu_ops.unmap_page(dev, dma_handle, size, direction, attrs);
550
551	vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE));
552}
553
554static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
555                                int nelems, enum dma_data_direction direction,
556                                struct dma_attrs *attrs)
557{
558	struct vio_dev *viodev = to_vio_dev(dev);
559	struct scatterlist *sgl;
560	int ret, count = 0;
561	size_t alloc_size = 0;
562
563	for (sgl = sglist; count < nelems; count++, sgl++)
564		alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE);
565
566	if (vio_cmo_alloc(viodev, alloc_size)) {
567		atomic_inc(&viodev->cmo.allocs_failed);
568		return 0;
569	}
570
571	ret = dma_iommu_ops.map_sg(dev, sglist, nelems, direction, attrs);
572
573	if (unlikely(!ret)) {
574		vio_cmo_dealloc(viodev, alloc_size);
575		atomic_inc(&viodev->cmo.allocs_failed);
576		return ret;
577	}
578
579	for (sgl = sglist, count = 0; count < ret; count++, sgl++)
580		alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE);
581	if (alloc_size)
582		vio_cmo_dealloc(viodev, alloc_size);
583
584	return ret;
585}
586
587static void vio_dma_iommu_unmap_sg(struct device *dev,
588		struct scatterlist *sglist, int nelems,
589		enum dma_data_direction direction,
590		struct dma_attrs *attrs)
591{
592	struct vio_dev *viodev = to_vio_dev(dev);
593	struct scatterlist *sgl;
594	size_t alloc_size = 0;
595	int count = 0;
596
597	for (sgl = sglist; count < nelems; count++, sgl++)
598		alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE);
599
600	dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs);
601
602	vio_cmo_dealloc(viodev, alloc_size);
603}
604
605struct dma_map_ops vio_dma_mapping_ops = {
606	.alloc_coherent = vio_dma_iommu_alloc_coherent,
607	.free_coherent  = vio_dma_iommu_free_coherent,
608	.map_sg         = vio_dma_iommu_map_sg,
609	.unmap_sg       = vio_dma_iommu_unmap_sg,
610	.map_page       = vio_dma_iommu_map_page,
611	.unmap_page     = vio_dma_iommu_unmap_page,
612
613};
614
615/**
616 * vio_cmo_set_dev_desired - Set desired entitlement for a device
617 *
618 * @viodev: struct vio_dev for device to alter
619 * @new_desired: new desired entitlement level in bytes
620 *
621 * For use by devices to request a change to their entitlement at runtime or
622 * through sysfs.  The desired entitlement level is changed and a balancing
623 * of system resources is scheduled to run in the future.
624 */
625void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired)
626{
627	unsigned long flags;
628	struct vio_cmo_dev_entry *dev_ent;
629	int found = 0;
630
631	if (!firmware_has_feature(FW_FEATURE_CMO))
632		return;
633
634	spin_lock_irqsave(&vio_cmo.lock, flags);
635	if (desired < VIO_CMO_MIN_ENT)
636		desired = VIO_CMO_MIN_ENT;
637
638	/*
639	 * Changes will not be made for devices not in the device list.
640	 * If it is not in the device list, then no driver is loaded
641	 * for the device and it can not receive entitlement.
642	 */
643	list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
644		if (viodev == dev_ent->viodev) {
645			found = 1;
646			break;
647		}
648	if (!found) {
649		spin_unlock_irqrestore(&vio_cmo.lock, flags);
650		return;
651	}
652
653	/* Increase/decrease in desired device entitlement */
654	if (desired >= viodev->cmo.desired) {
655		/* Just bump the bus and device values prior to a balance*/
656		vio_cmo.desired += desired - viodev->cmo.desired;
657		viodev->cmo.desired = desired;
658	} else {
659		/* Decrease bus and device values for desired entitlement */
660		vio_cmo.desired -= viodev->cmo.desired - desired;
661		viodev->cmo.desired = desired;
662		/*
663		 * If less entitlement is desired than current entitlement, move
664		 * any reserve memory in the change region to the excess pool.
665		 */
666		if (viodev->cmo.entitled > desired) {
667			vio_cmo.reserve.size -= viodev->cmo.entitled - desired;
668			vio_cmo.excess.size += viodev->cmo.entitled - desired;
669			/*
670			 * If entitlement moving from the reserve pool to the
671			 * excess pool is currently unused, add to the excess
672			 * free counter.
673			 */
674			if (viodev->cmo.allocated < viodev->cmo.entitled)
675				vio_cmo.excess.free += viodev->cmo.entitled -
676				                       max(viodev->cmo.allocated, desired);
677			viodev->cmo.entitled = desired;
678		}
679	}
680	schedule_delayed_work(&vio_cmo.balance_q, 0);
681	spin_unlock_irqrestore(&vio_cmo.lock, flags);
682}
683
684/**
685 * vio_cmo_bus_probe - Handle CMO specific bus probe activities
686 *
687 * @viodev - Pointer to struct vio_dev for device
688 *
689 * Determine the devices IO memory entitlement needs, attempting
690 * to satisfy the system minimum entitlement at first and scheduling
691 * a balance operation to take care of the rest at a later time.
692 *
693 * Returns: 0 on success, -EINVAL when device doesn't support CMO, and
694 *          -ENOMEM when entitlement is not available for device or
695 *          device entry.
696 *
697 */
698static int vio_cmo_bus_probe(struct vio_dev *viodev)
699{
700	struct vio_cmo_dev_entry *dev_ent;
701	struct device *dev = &viodev->dev;
702	struct vio_driver *viodrv = to_vio_driver(dev->driver);
703	unsigned long flags;
704	size_t size;
705
706	/*
707	 * Check to see that device has a DMA window and configure
708	 * entitlement for the device.
709	 */
710	if (of_get_property(viodev->dev.of_node,
711	                    "ibm,my-dma-window", NULL)) {
712		/* Check that the driver is CMO enabled and get desired DMA */
713		if (!viodrv->get_desired_dma) {
714			dev_err(dev, "%s: device driver does not support CMO\n",
715			        __func__);
716			return -EINVAL;
717		}
718
719		viodev->cmo.desired = IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev));
720		if (viodev->cmo.desired < VIO_CMO_MIN_ENT)
721			viodev->cmo.desired = VIO_CMO_MIN_ENT;
722		size = VIO_CMO_MIN_ENT;
723
724		dev_ent = kmalloc(sizeof(struct vio_cmo_dev_entry),
725		                  GFP_KERNEL);
726		if (!dev_ent)
727			return -ENOMEM;
728
729		dev_ent->viodev = viodev;
730		spin_lock_irqsave(&vio_cmo.lock, flags);
731		list_add(&dev_ent->list, &vio_cmo.device_list);
732	} else {
733		viodev->cmo.desired = 0;
734		size = 0;
735		spin_lock_irqsave(&vio_cmo.lock, flags);
736	}
737
738	/*
739	 * If the needs for vio_cmo.min have not changed since they
740	 * were last set, the number of devices in the OF tree has
741	 * been constant and the IO memory for this is already in
742	 * the reserve pool.
743	 */
744	if (vio_cmo.min == ((vio_cmo_num_OF_devs() + 1) *
745	                    VIO_CMO_MIN_ENT)) {
746		/* Updated desired entitlement if device requires it */
747		if (size)
748			vio_cmo.desired += (viodev->cmo.desired -
749		                        VIO_CMO_MIN_ENT);
750	} else {
751		size_t tmp;
752
753		tmp = vio_cmo.spare + vio_cmo.excess.free;
754		if (tmp < size) {
755			dev_err(dev, "%s: insufficient free "
756			        "entitlement to add device. "
757			        "Need %lu, have %lu\n", __func__,
758				size, (vio_cmo.spare + tmp));
759			spin_unlock_irqrestore(&vio_cmo.lock, flags);
760			return -ENOMEM;
761		}
762
763		/* Use excess pool first to fulfill request */
764		tmp = min(size, vio_cmo.excess.free);
765		vio_cmo.excess.free -= tmp;
766		vio_cmo.excess.size -= tmp;
767		vio_cmo.reserve.size += tmp;
768
769		/* Use spare if excess pool was insufficient */
770		vio_cmo.spare -= size - tmp;
771
772		/* Update bus accounting */
773		vio_cmo.min += size;
774		vio_cmo.desired += viodev->cmo.desired;
775	}
776	spin_unlock_irqrestore(&vio_cmo.lock, flags);
777	return 0;
778}
779
780/**
781 * vio_cmo_bus_remove - Handle CMO specific bus removal activities
782 *
783 * @viodev - Pointer to struct vio_dev for device
784 *
785 * Remove the device from the cmo device list.  The minimum entitlement
786 * will be reserved for the device as long as it is in the system.  The
787 * rest of the entitlement the device had been allocated will be returned
788 * to the system.
789 */
790static void vio_cmo_bus_remove(struct vio_dev *viodev)
791{
792	struct vio_cmo_dev_entry *dev_ent;
793	unsigned long flags;
794	size_t tmp;
795
796	spin_lock_irqsave(&vio_cmo.lock, flags);
797	if (viodev->cmo.allocated) {
798		dev_err(&viodev->dev, "%s: device had %lu bytes of IO "
799		        "allocated after remove operation.\n",
800		        __func__, viodev->cmo.allocated);
801		BUG();
802	}
803
804	/*
805	 * Remove the device from the device list being maintained for
806	 * CMO enabled devices.
807	 */
808	list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
809		if (viodev == dev_ent->viodev) {
810			list_del(&dev_ent->list);
811			kfree(dev_ent);
812			break;
813		}
814
815	/*
816	 * Devices may not require any entitlement and they do not need
817	 * to be processed.  Otherwise, return the device's entitlement
818	 * back to the pools.
819	 */
820	if (viodev->cmo.entitled) {
821		/*
822		 * This device has not yet left the OF tree, it's
823		 * minimum entitlement remains in vio_cmo.min and
824		 * vio_cmo.desired
825		 */
826		vio_cmo.desired -= (viodev->cmo.desired - VIO_CMO_MIN_ENT);
827
828		/*
829		 * Save min allocation for device in reserve as long
830		 * as it exists in OF tree as determined by later
831		 * balance operation
832		 */
833		viodev->cmo.entitled -= VIO_CMO_MIN_ENT;
834
835		/* Replenish spare from freed reserve pool */
836		if (viodev->cmo.entitled && (vio_cmo.spare < VIO_CMO_MIN_ENT)) {
837			tmp = min(viodev->cmo.entitled, (VIO_CMO_MIN_ENT -
838			                                 vio_cmo.spare));
839			vio_cmo.spare += tmp;
840			viodev->cmo.entitled -= tmp;
841		}
842
843		/* Remaining reserve goes to excess pool */
844		vio_cmo.excess.size += viodev->cmo.entitled;
845		vio_cmo.excess.free += viodev->cmo.entitled;
846		vio_cmo.reserve.size -= viodev->cmo.entitled;
847
848		/*
849		 * Until the device is removed it will keep a
850		 * minimum entitlement; this will guarantee that
851		 * a module unload/load will result in a success.
852		 */
853		viodev->cmo.entitled = VIO_CMO_MIN_ENT;
854		viodev->cmo.desired = VIO_CMO_MIN_ENT;
855		atomic_set(&viodev->cmo.allocs_failed, 0);
856	}
857
858	spin_unlock_irqrestore(&vio_cmo.lock, flags);
859}
860
861static void vio_cmo_set_dma_ops(struct vio_dev *viodev)
862{
863	vio_dma_mapping_ops.dma_supported = dma_iommu_ops.dma_supported;
864	viodev->dev.archdata.dma_ops = &vio_dma_mapping_ops;
865}
866
867/**
868 * vio_cmo_bus_init - CMO entitlement initialization at bus init time
869 *
870 * Set up the reserve and excess entitlement pools based on available
871 * system entitlement and the number of devices in the OF tree that
872 * require entitlement in the reserve pool.
873 */
874static void vio_cmo_bus_init(void)
875{
876	struct hvcall_mpp_data mpp_data;
877	int err;
878
879	memset(&vio_cmo, 0, sizeof(struct vio_cmo));
880	spin_lock_init(&vio_cmo.lock);
881	INIT_LIST_HEAD(&vio_cmo.device_list);
882	INIT_DELAYED_WORK(&vio_cmo.balance_q, vio_cmo_balance);
883
884	/* Get current system entitlement */
885	err = h_get_mpp(&mpp_data);
886
887	/*
888	 * On failure, continue with entitlement set to 0, will panic()
889	 * later when spare is reserved.
890	 */
891	if (err != H_SUCCESS) {
892		printk(KERN_ERR "%s: unable to determine system IO "\
893		       "entitlement. (%d)\n", __func__, err);
894		vio_cmo.entitled = 0;
895	} else {
896		vio_cmo.entitled = mpp_data.entitled_mem;
897	}
898
899	/* Set reservation and check against entitlement */
900	vio_cmo.spare = VIO_CMO_MIN_ENT;
901	vio_cmo.reserve.size = vio_cmo.spare;
902	vio_cmo.reserve.size += (vio_cmo_num_OF_devs() *
903	                         VIO_CMO_MIN_ENT);
904	if (vio_cmo.reserve.size > vio_cmo.entitled) {
905		printk(KERN_ERR "%s: insufficient system entitlement\n",
906		       __func__);
907		panic("%s: Insufficient system entitlement", __func__);
908	}
909
910	/* Set the remaining accounting variables */
911	vio_cmo.excess.size = vio_cmo.entitled - vio_cmo.reserve.size;
912	vio_cmo.excess.free = vio_cmo.excess.size;
913	vio_cmo.min = vio_cmo.reserve.size;
914	vio_cmo.desired = vio_cmo.reserve.size;
915}
916
917/* sysfs device functions and data structures for CMO */
918
919#define viodev_cmo_rd_attr(name)                                        \
920static ssize_t viodev_cmo_##name##_show(struct device *dev,             \
921                                        struct device_attribute *attr,  \
922                                         char *buf)                     \
923{                                                                       \
924	return sprintf(buf, "%lu\n", to_vio_dev(dev)->cmo.name);        \
925}
926
927static ssize_t viodev_cmo_allocs_failed_show(struct device *dev,
928		struct device_attribute *attr, char *buf)
929{
930	struct vio_dev *viodev = to_vio_dev(dev);
931	return sprintf(buf, "%d\n", atomic_read(&viodev->cmo.allocs_failed));
932}
933
934static ssize_t viodev_cmo_allocs_failed_reset(struct device *dev,
935		struct device_attribute *attr, const char *buf, size_t count)
936{
937	struct vio_dev *viodev = to_vio_dev(dev);
938	atomic_set(&viodev->cmo.allocs_failed, 0);
939	return count;
940}
941
942static ssize_t viodev_cmo_desired_set(struct device *dev,
943		struct device_attribute *attr, const char *buf, size_t count)
944{
945	struct vio_dev *viodev = to_vio_dev(dev);
946	size_t new_desired;
947	int ret;
948
949	ret = strict_strtoul(buf, 10, &new_desired);
950	if (ret)
951		return ret;
952
953	vio_cmo_set_dev_desired(viodev, new_desired);
954	return count;
955}
956
957viodev_cmo_rd_attr(desired);
958viodev_cmo_rd_attr(entitled);
959viodev_cmo_rd_attr(allocated);
960
961static ssize_t name_show(struct device *, struct device_attribute *, char *);
962static ssize_t devspec_show(struct device *, struct device_attribute *, char *);
963static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
964			     char *buf);
965static struct device_attribute vio_cmo_dev_attrs[] = {
966	__ATTR_RO(name),
967	__ATTR_RO(devspec),
968	__ATTR_RO(modalias),
969	__ATTR(cmo_desired,       S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
970	       viodev_cmo_desired_show, viodev_cmo_desired_set),
971	__ATTR(cmo_entitled,      S_IRUGO, viodev_cmo_entitled_show,      NULL),
972	__ATTR(cmo_allocated,     S_IRUGO, viodev_cmo_allocated_show,     NULL),
973	__ATTR(cmo_allocs_failed, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
974	       viodev_cmo_allocs_failed_show, viodev_cmo_allocs_failed_reset),
975	__ATTR_NULL
976};
977
978/* sysfs bus functions and data structures for CMO */
979
980#define viobus_cmo_rd_attr(name)                                        \
981static ssize_t                                                          \
982viobus_cmo_##name##_show(struct bus_type *bt, char *buf)                \
983{                                                                       \
984	return sprintf(buf, "%lu\n", vio_cmo.name);                     \
985}
986
987#define viobus_cmo_pool_rd_attr(name, var)                              \
988static ssize_t                                                          \
989viobus_cmo_##name##_pool_show_##var(struct bus_type *bt, char *buf)     \
990{                                                                       \
991	return sprintf(buf, "%lu\n", vio_cmo.name.var);                 \
992}
993
994static ssize_t viobus_cmo_high_reset(struct bus_type *bt, const char *buf,
995                                     size_t count)
996{
997	unsigned long flags;
998
999	spin_lock_irqsave(&vio_cmo.lock, flags);
1000	vio_cmo.high = vio_cmo.curr;
1001	spin_unlock_irqrestore(&vio_cmo.lock, flags);
1002
1003	return count;
1004}
1005
1006viobus_cmo_rd_attr(entitled);
1007viobus_cmo_pool_rd_attr(reserve, size);
1008viobus_cmo_pool_rd_attr(excess, size);
1009viobus_cmo_pool_rd_attr(excess, free);
1010viobus_cmo_rd_attr(spare);
1011viobus_cmo_rd_attr(min);
1012viobus_cmo_rd_attr(desired);
1013viobus_cmo_rd_attr(curr);
1014viobus_cmo_rd_attr(high);
1015
1016static struct bus_attribute vio_cmo_bus_attrs[] = {
1017	__ATTR(cmo_entitled, S_IRUGO, viobus_cmo_entitled_show, NULL),
1018	__ATTR(cmo_reserve_size, S_IRUGO, viobus_cmo_reserve_pool_show_size, NULL),
1019	__ATTR(cmo_excess_size, S_IRUGO, viobus_cmo_excess_pool_show_size, NULL),
1020	__ATTR(cmo_excess_free, S_IRUGO, viobus_cmo_excess_pool_show_free, NULL),
1021	__ATTR(cmo_spare,   S_IRUGO, viobus_cmo_spare_show,   NULL),
1022	__ATTR(cmo_min,     S_IRUGO, viobus_cmo_min_show,     NULL),
1023	__ATTR(cmo_desired, S_IRUGO, viobus_cmo_desired_show, NULL),
1024	__ATTR(cmo_curr,    S_IRUGO, viobus_cmo_curr_show,    NULL),
1025	__ATTR(cmo_high,    S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
1026	       viobus_cmo_high_show, viobus_cmo_high_reset),
1027	__ATTR_NULL
1028};
1029
1030static void vio_cmo_sysfs_init(void)
1031{
1032	vio_bus_type.dev_attrs = vio_cmo_dev_attrs;
1033	vio_bus_type.bus_attrs = vio_cmo_bus_attrs;
1034}
1035#else /* CONFIG_PPC_SMLPAR */
1036/* Dummy functions for iSeries platform */
1037int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; }
1038void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) {}
1039static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; }
1040static void vio_cmo_bus_remove(struct vio_dev *viodev) {}
1041static void vio_cmo_set_dma_ops(struct vio_dev *viodev) {}
1042static void vio_cmo_bus_init(void) {}
1043static void vio_cmo_sysfs_init(void) { }
1044#endif /* CONFIG_PPC_SMLPAR */
1045EXPORT_SYMBOL(vio_cmo_entitlement_update);
1046EXPORT_SYMBOL(vio_cmo_set_dev_desired);
1047
1048static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
1049{
1050	const unsigned char *dma_window;
1051	struct iommu_table *tbl;
1052	unsigned long offset, size;
1053
1054	if (firmware_has_feature(FW_FEATURE_ISERIES))
1055		return vio_build_iommu_table_iseries(dev);
1056
1057	dma_window = of_get_property(dev->dev.of_node,
1058				  "ibm,my-dma-window", NULL);
1059	if (!dma_window)
1060		return NULL;
1061
1062	tbl = kzalloc(sizeof(*tbl), GFP_KERNEL);
1063	if (tbl == NULL)
1064		return NULL;
1065
1066	of_parse_dma_window(dev->dev.of_node, dma_window,
1067			    &tbl->it_index, &offset, &size);
1068
1069	/* TCE table size - measured in tce entries */
1070	tbl->it_size = size >> IOMMU_PAGE_SHIFT;
1071	/* offset for VIO should always be 0 */
1072	tbl->it_offset = offset >> IOMMU_PAGE_SHIFT;
1073	tbl->it_busno = 0;
1074	tbl->it_type = TCE_VB;
1075	tbl->it_blocksize = 16;
1076
1077	return iommu_init_table(tbl, -1);
1078}
1079
1080/**
1081 * vio_match_device: - Tell if a VIO device has a matching
1082 *			VIO device id structure.
1083 * @ids:	array of VIO device id structures to search in
1084 * @dev:	the VIO device structure to match against
1085 *
1086 * Used by a driver to check whether a VIO device present in the
1087 * system is in its list of supported devices. Returns the matching
1088 * vio_device_id structure or NULL if there is no match.
1089 */
1090static const struct vio_device_id *vio_match_device(
1091		const struct vio_device_id *ids, const struct vio_dev *dev)
1092{
1093	while (ids->type[0] != '\0') {
1094		if ((strncmp(dev->type, ids->type, strlen(ids->type)) == 0) &&
1095		    of_device_is_compatible(dev->dev.of_node,
1096					 ids->compat))
1097			return ids;
1098		ids++;
1099	}
1100	return NULL;
1101}
1102
1103/*
1104 * Convert from struct device to struct vio_dev and pass to driver.
1105 * dev->driver has already been set by generic code because vio_bus_match
1106 * succeeded.
1107 */
1108static int vio_bus_probe(struct device *dev)
1109{
1110	struct vio_dev *viodev = to_vio_dev(dev);
1111	struct vio_driver *viodrv = to_vio_driver(dev->driver);
1112	const struct vio_device_id *id;
1113	int error = -ENODEV;
1114
1115	if (!viodrv->probe)
1116		return error;
1117
1118	id = vio_match_device(viodrv->id_table, viodev);
1119	if (id) {
1120		memset(&viodev->cmo, 0, sizeof(viodev->cmo));
1121		if (firmware_has_feature(FW_FEATURE_CMO)) {
1122			error = vio_cmo_bus_probe(viodev);
1123			if (error)
1124				return error;
1125		}
1126		error = viodrv->probe(viodev, id);
1127		if (error && firmware_has_feature(FW_FEATURE_CMO))
1128			vio_cmo_bus_remove(viodev);
1129	}
1130
1131	return error;
1132}
1133
1134/* convert from struct device to struct vio_dev and pass to driver. */
1135static int vio_bus_remove(struct device *dev)
1136{
1137	struct vio_dev *viodev = to_vio_dev(dev);
1138	struct vio_driver *viodrv = to_vio_driver(dev->driver);
1139	struct device *devptr;
1140	int ret = 1;
1141
1142	/*
1143	 * Hold a reference to the device after the remove function is called
1144	 * to allow for CMO accounting cleanup for the device.
1145	 */
1146	devptr = get_device(dev);
1147
1148	if (viodrv->remove)
1149		ret = viodrv->remove(viodev);
1150
1151	if (!ret && firmware_has_feature(FW_FEATURE_CMO))
1152		vio_cmo_bus_remove(viodev);
1153
1154	put_device(devptr);
1155	return ret;
1156}
1157
1158/**
1159 * vio_register_driver: - Register a new vio driver
1160 * @drv:	The vio_driver structure to be registered.
1161 */
1162int vio_register_driver(struct vio_driver *viodrv)
1163{
1164	printk(KERN_DEBUG "%s: driver %s registering\n", __func__,
1165		viodrv->driver.name);
1166
1167	/* fill in 'struct driver' fields */
1168	viodrv->driver.bus = &vio_bus_type;
1169
1170	return driver_register(&viodrv->driver);
1171}
1172EXPORT_SYMBOL(vio_register_driver);
1173
1174/**
1175 * vio_unregister_driver - Remove registration of vio driver.
1176 * @driver:	The vio_driver struct to be removed form registration
1177 */
1178void vio_unregister_driver(struct vio_driver *viodrv)
1179{
1180	driver_unregister(&viodrv->driver);
1181}
1182EXPORT_SYMBOL(vio_unregister_driver);
1183
1184/* vio_dev refcount hit 0 */
1185static void __devinit vio_dev_release(struct device *dev)
1186{
1187	of_node_put(dev->of_node);
1188	kfree(to_vio_dev(dev));
1189}
1190
1191/**
1192 * vio_register_device_node: - Register a new vio device.
1193 * @of_node:	The OF node for this device.
1194 *
1195 * Creates and initializes a vio_dev structure from the data in
1196 * of_node and adds it to the list of virtual devices.
1197 * Returns a pointer to the created vio_dev or NULL if node has
1198 * NULL device_type or compatible fields.
1199 */
1200struct vio_dev *vio_register_device_node(struct device_node *of_node)
1201{
1202	struct vio_dev *viodev;
1203	const unsigned int *unit_address;
1204
1205	/* we need the 'device_type' property, in order to match with drivers */
1206	if (of_node->type == NULL) {
1207		printk(KERN_WARNING "%s: node %s missing 'device_type'\n",
1208				__func__,
1209				of_node->name ? of_node->name : "<unknown>");
1210		return NULL;
1211	}
1212
1213	unit_address = of_get_property(of_node, "reg", NULL);
1214	if (unit_address == NULL) {
1215		printk(KERN_WARNING "%s: node %s missing 'reg'\n",
1216				__func__,
1217				of_node->name ? of_node->name : "<unknown>");
1218		return NULL;
1219	}
1220
1221	/* allocate a vio_dev for this node */
1222	viodev = kzalloc(sizeof(struct vio_dev), GFP_KERNEL);
1223	if (viodev == NULL)
1224		return NULL;
1225
1226	viodev->irq = irq_of_parse_and_map(of_node, 0);
1227
1228	dev_set_name(&viodev->dev, "%x", *unit_address);
1229	viodev->name = of_node->name;
1230	viodev->type = of_node->type;
1231	viodev->unit_address = *unit_address;
1232	if (firmware_has_feature(FW_FEATURE_ISERIES)) {
1233		unit_address = of_get_property(of_node,
1234				"linux,unit_address", NULL);
1235		if (unit_address != NULL)
1236			viodev->unit_address = *unit_address;
1237	}
1238	viodev->dev.of_node = of_node_get(of_node);
1239
1240	if (firmware_has_feature(FW_FEATURE_CMO))
1241		vio_cmo_set_dma_ops(viodev);
1242	else
1243		viodev->dev.archdata.dma_ops = &dma_iommu_ops;
1244	set_iommu_table_base(&viodev->dev, vio_build_iommu_table(viodev));
1245	set_dev_node(&viodev->dev, of_node_to_nid(of_node));
1246
1247	/* init generic 'struct device' fields: */
1248	viodev->dev.parent = &vio_bus_device.dev;
1249	viodev->dev.bus = &vio_bus_type;
1250	viodev->dev.release = vio_dev_release;
1251
1252	/* register with generic device framework */
1253	if (device_register(&viodev->dev)) {
1254		printk(KERN_ERR "%s: failed to register device %s\n",
1255				__func__, dev_name(&viodev->dev));
1256		kfree(viodev);
1257		return NULL;
1258	}
1259
1260	return viodev;
1261}
1262EXPORT_SYMBOL(vio_register_device_node);
1263
1264/**
1265 * vio_bus_init: - Initialize the virtual IO bus
1266 */
1267static int __init vio_bus_init(void)
1268{
1269	int err;
1270	struct device_node *node_vroot;
1271
1272	if (firmware_has_feature(FW_FEATURE_CMO))
1273		vio_cmo_sysfs_init();
1274
1275	err = bus_register(&vio_bus_type);
1276	if (err) {
1277		printk(KERN_ERR "failed to register VIO bus\n");
1278		return err;
1279	}
1280
1281	/*
1282	 * The fake parent of all vio devices, just to give us
1283	 * a nice directory
1284	 */
1285	err = device_register(&vio_bus_device.dev);
1286	if (err) {
1287		printk(KERN_WARNING "%s: device_register returned %i\n",
1288				__func__, err);
1289		return err;
1290	}
1291
1292	if (firmware_has_feature(FW_FEATURE_CMO))
1293		vio_cmo_bus_init();
1294
1295	node_vroot = of_find_node_by_name(NULL, "vdevice");
1296	if (node_vroot) {
1297		struct device_node *of_node;
1298
1299		/*
1300		 * Create struct vio_devices for each virtual device in
1301		 * the device tree. Drivers will associate with them later.
1302		 */
1303		for (of_node = node_vroot->child; of_node != NULL;
1304				of_node = of_node->sibling)
1305			vio_register_device_node(of_node);
1306		of_node_put(node_vroot);
1307	}
1308
1309	return 0;
1310}
1311__initcall(vio_bus_init);
1312
1313static ssize_t name_show(struct device *dev,
1314		struct device_attribute *attr, char *buf)
1315{
1316	return sprintf(buf, "%s\n", to_vio_dev(dev)->name);
1317}
1318
1319static ssize_t devspec_show(struct device *dev,
1320		struct device_attribute *attr, char *buf)
1321{
1322	struct device_node *of_node = dev->of_node;
1323
1324	return sprintf(buf, "%s\n", of_node ? of_node->full_name : "none");
1325}
1326
1327static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
1328			     char *buf)
1329{
1330	const struct vio_dev *vio_dev = to_vio_dev(dev);
1331	struct device_node *dn;
1332	const char *cp;
1333
1334	dn = dev->of_node;
1335	if (!dn)
1336		return -ENODEV;
1337	cp = of_get_property(dn, "compatible", NULL);
1338	if (!cp)
1339		return -ENODEV;
1340
1341	return sprintf(buf, "vio:T%sS%s\n", vio_dev->type, cp);
1342}
1343
1344static struct device_attribute vio_dev_attrs[] = {
1345	__ATTR_RO(name),
1346	__ATTR_RO(devspec),
1347	__ATTR_RO(modalias),
1348	__ATTR_NULL
1349};
1350
1351void __devinit vio_unregister_device(struct vio_dev *viodev)
1352{
1353	device_unregister(&viodev->dev);
1354}
1355EXPORT_SYMBOL(vio_unregister_device);
1356
1357static int vio_bus_match(struct device *dev, struct device_driver *drv)
1358{
1359	const struct vio_dev *vio_dev = to_vio_dev(dev);
1360	struct vio_driver *vio_drv = to_vio_driver(drv);
1361	const struct vio_device_id *ids = vio_drv->id_table;
1362
1363	return (ids != NULL) && (vio_match_device(ids, vio_dev) != NULL);
1364}
1365
1366static int vio_hotplug(struct device *dev, struct kobj_uevent_env *env)
1367{
1368	const struct vio_dev *vio_dev = to_vio_dev(dev);
1369	struct device_node *dn;
1370	const char *cp;
1371
1372	dn = dev->of_node;
1373	if (!dn)
1374		return -ENODEV;
1375	cp = of_get_property(dn, "compatible", NULL);
1376	if (!cp)
1377		return -ENODEV;
1378
1379	add_uevent_var(env, "MODALIAS=vio:T%sS%s", vio_dev->type, cp);
1380	return 0;
1381}
1382
1383static struct bus_type vio_bus_type = {
1384	.name = "vio",
1385	.dev_attrs = vio_dev_attrs,
1386	.uevent = vio_hotplug,
1387	.match = vio_bus_match,
1388	.probe = vio_bus_probe,
1389	.remove = vio_bus_remove,
1390	.pm = GENERIC_SUBSYS_PM_OPS,
1391};
1392
1393/**
1394 * vio_get_attribute: - get attribute for virtual device
1395 * @vdev:	The vio device to get property.
1396 * @which:	The property/attribute to be extracted.
1397 * @length:	Pointer to length of returned data size (unused if NULL).
1398 *
1399 * Calls prom.c's of_get_property() to return the value of the
1400 * attribute specified by @which
1401*/
1402const void *vio_get_attribute(struct vio_dev *vdev, char *which, int *length)
1403{
1404	return of_get_property(vdev->dev.of_node, which, length);
1405}
1406EXPORT_SYMBOL(vio_get_attribute);
1407
1408#ifdef CONFIG_PPC_PSERIES
1409/* vio_find_name() - internal because only vio.c knows how we formatted the
1410 * kobject name
1411 */
1412static struct vio_dev *vio_find_name(const char *name)
1413{
1414	struct device *found;
1415
1416	found = bus_find_device_by_name(&vio_bus_type, NULL, name);
1417	if (!found)
1418		return NULL;
1419
1420	return to_vio_dev(found);
1421}
1422
1423/**
1424 * vio_find_node - find an already-registered vio_dev
1425 * @vnode: device_node of the virtual device we're looking for
1426 */
1427struct vio_dev *vio_find_node(struct device_node *vnode)
1428{
1429	const uint32_t *unit_address;
1430	char kobj_name[20];
1431
1432	/* construct the kobject name from the device node */
1433	unit_address = of_get_property(vnode, "reg", NULL);
1434	if (!unit_address)
1435		return NULL;
1436	snprintf(kobj_name, sizeof(kobj_name), "%x", *unit_address);
1437
1438	return vio_find_name(kobj_name);
1439}
1440EXPORT_SYMBOL(vio_find_node);
1441
1442int vio_enable_interrupts(struct vio_dev *dev)
1443{
1444	int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE);
1445	if (rc != H_SUCCESS)
1446		printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc);
1447	return rc;
1448}
1449EXPORT_SYMBOL(vio_enable_interrupts);
1450
1451int vio_disable_interrupts(struct vio_dev *dev)
1452{
1453	int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE);
1454	if (rc != H_SUCCESS)
1455		printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc);
1456	return rc;
1457}
1458EXPORT_SYMBOL(vio_disable_interrupts);
1459#endif /* CONFIG_PPC_PSERIES */
1460