1219820Sjeff/*
2219820Sjeff * Copyright (c) 2005 Topspin Communications.  All rights reserved.
3219820Sjeff * Copyright (c) 2005 Cisco Systems.  All rights reserved.
4219820Sjeff * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
5219820Sjeff *
6219820Sjeff * This software is available to you under a choice of one of two
7219820Sjeff * licenses.  You may choose to be licensed under the terms of the GNU
8219820Sjeff * General Public License (GPL) Version 2, available from the file
9219820Sjeff * COPYING in the main directory of this source tree, or the
10219820Sjeff * OpenIB.org BSD license below:
11219820Sjeff *
12219820Sjeff *     Redistribution and use in source and binary forms, with or
13219820Sjeff *     without modification, are permitted provided that the following
14219820Sjeff *     conditions are met:
15219820Sjeff *
16219820Sjeff *      - Redistributions of source code must retain the above
17219820Sjeff *        copyright notice, this list of conditions and the following
18219820Sjeff *        disclaimer.
19219820Sjeff *
20219820Sjeff *      - Redistributions in binary form must reproduce the above
21219820Sjeff *        copyright notice, this list of conditions and the following
22219820Sjeff *        disclaimer in the documentation and/or other materials
23219820Sjeff *        provided with the distribution.
24219820Sjeff *
25219820Sjeff * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26219820Sjeff * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27219820Sjeff * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28219820Sjeff * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29219820Sjeff * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30219820Sjeff * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31219820Sjeff * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32219820Sjeff * SOFTWARE.
33219820Sjeff */
34219820Sjeff
35219820Sjeff#include <linux/mm.h>
36219820Sjeff#include <linux/dma-mapping.h>
37219820Sjeff#include <linux/sched.h>
38219820Sjeff#ifdef __linux__
39219820Sjeff#include <linux/hugetlb.h>
40219820Sjeff#endif
41219820Sjeff#include <linux/dma-attrs.h>
42219820Sjeff
43219820Sjeff#include <sys/priv.h>
44219820Sjeff#include <sys/resource.h>
45219820Sjeff#include <sys/resourcevar.h>
46219820Sjeff
47219820Sjeff#include <vm/vm.h>
48219820Sjeff#include <vm/vm_map.h>
49219820Sjeff#include <vm/vm_object.h>
50219820Sjeff#include <vm/vm_pageout.h>
51219820Sjeff
52219820Sjeff#include "uverbs.h"
53219820Sjeff
54219820Sjeffstatic int allow_weak_ordering;
55219820Sjeffmodule_param(allow_weak_ordering, bool, 0444);
56219820SjeffMODULE_PARM_DESC(allow_weak_ordering,  "Allow weak ordering for data registered memory");
57219820Sjeff
58219820Sjeff#define IB_UMEM_MAX_PAGE_CHUNK						\
59219820Sjeff	((PAGE_SIZE - offsetof(struct ib_umem_chunk, page_list)) /	\
60219820Sjeff	 ((void *) &((struct ib_umem_chunk *) 0)->page_list[1] -	\
61219820Sjeff	  (void *) &((struct ib_umem_chunk *) 0)->page_list[0]))
62219820Sjeff
63219820Sjeff#ifdef __ia64__
64219820Sjeffextern int dma_map_sg_hp_wa;
65219820Sjeff
66219820Sjeffstatic int dma_map_sg_ia64(struct ib_device *ibdev,
67219820Sjeff			   struct scatterlist *sg,
68219820Sjeff			   int nents,
69219820Sjeff			   enum dma_data_direction dir)
70219820Sjeff{
71219820Sjeff	int i, rc, j, lents = 0;
72219820Sjeff	struct device *dev;
73219820Sjeff
74219820Sjeff	if (!dma_map_sg_hp_wa)
75219820Sjeff		return ib_dma_map_sg(ibdev, sg, nents, dir);
76219820Sjeff
77219820Sjeff	dev = ibdev->dma_device;
78219820Sjeff	for (i = 0; i < nents; ++i) {
79219820Sjeff		rc = dma_map_sg(dev, sg + i, 1, dir);
80219820Sjeff		if (rc <= 0) {
81219820Sjeff			for (j = 0; j < i; ++j)
82219820Sjeff				dma_unmap_sg(dev, sg + j, 1, dir);
83219820Sjeff
84219820Sjeff			return 0;
85219820Sjeff		}
86219820Sjeff		lents += rc;
87219820Sjeff	}
88219820Sjeff
89219820Sjeff	return lents;
90219820Sjeff}
91219820Sjeff
92219820Sjeffstatic void dma_unmap_sg_ia64(struct ib_device *ibdev,
93219820Sjeff			      struct scatterlist *sg,
94219820Sjeff			      int nents,
95219820Sjeff			      enum dma_data_direction dir)
96219820Sjeff{
97219820Sjeff	int i;
98219820Sjeff	struct device *dev;
99219820Sjeff
100219820Sjeff	if (!dma_map_sg_hp_wa)
101219820Sjeff		return ib_dma_unmap_sg(ibdev, sg, nents, dir);
102219820Sjeff
103219820Sjeff	dev = ibdev->dma_device;
104219820Sjeff	for (i = 0; i < nents; ++i)
105219820Sjeff		dma_unmap_sg(dev, sg + i, 1, dir);
106219820Sjeff}
107219820Sjeff
108219820Sjeff#define ib_dma_map_sg(dev, sg, nents, dir) dma_map_sg_ia64(dev, sg, nents, dir)
109219820Sjeff#define ib_dma_unmap_sg(dev, sg, nents, dir) dma_unmap_sg_ia64(dev, sg, nents, dir)
110219820Sjeff
111219820Sjeff#endif
112219820Sjeff
113219820Sjeffstatic void __ib_umem_release(struct ib_device *dev, struct ib_umem *umem, int dirty)
114219820Sjeff{
115219820Sjeff#ifdef __linux__
116219820Sjeff	struct ib_umem_chunk *chunk, *tmp;
117219820Sjeff	int i;
118219820Sjeff
119219820Sjeff	list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) {
120219820Sjeff		ib_dma_unmap_sg_attrs(dev, chunk->page_list,
121219820Sjeff				      chunk->nents, DMA_BIDIRECTIONAL, &chunk->attrs);
122219820Sjeff		for (i = 0; i < chunk->nents; ++i) {
123219820Sjeff			struct page *page = sg_page(&chunk->page_list[i]);
124219820Sjeff			if (umem->writable && dirty)
125219820Sjeff				set_page_dirty_lock(page);
126219820Sjeff			put_page(page);
127219820Sjeff		}
128219820Sjeff		kfree(chunk);
129219820Sjeff	}
130219820Sjeff#else
131219820Sjeff	struct ib_umem_chunk *chunk, *tmp;
132219820Sjeff	vm_object_t object;
133219820Sjeff	int i;
134219820Sjeff
135219820Sjeff	object = NULL;
136219820Sjeff	list_for_each_entry_safe(chunk, tmp, &umem->chunk_list, list) {
137219820Sjeff		ib_dma_unmap_sg_attrs(dev, chunk->page_list,
138219820Sjeff				      chunk->nents, DMA_BIDIRECTIONAL, &chunk->attrs);
139219820Sjeff		for (i = 0; i < chunk->nents; ++i) {
140219820Sjeff			struct page *page = sg_page(&chunk->page_list[i]);
141219820Sjeff			if (umem->writable && dirty) {
142219820Sjeff				if (object && object != page->object)
143248084Sattilio					VM_OBJECT_WUNLOCK(object);
144219820Sjeff				if (object != page->object) {
145219820Sjeff					object = page->object;
146248084Sattilio					VM_OBJECT_WLOCK(object);
147219820Sjeff				}
148219820Sjeff				vm_page_dirty(page);
149219820Sjeff			}
150219820Sjeff		}
151219820Sjeff		kfree(chunk);
152219820Sjeff	}
153219820Sjeff	if (object)
154248084Sattilio		VM_OBJECT_WUNLOCK(object);
155219820Sjeff
156219820Sjeff#endif
157219820Sjeff}
158219820Sjeff
159219820Sjeff/**
160219820Sjeff * ib_umem_get - Pin and DMA map userspace memory.
161219820Sjeff * @context: userspace context to pin memory for
162219820Sjeff * @addr: userspace virtual address to start at
163219820Sjeff * @size: length of region to pin
164219820Sjeff * @access: IB_ACCESS_xxx flags for memory being pinned
165219820Sjeff * @dmasync: flush in-flight DMA when the memory region is written
166219820Sjeff */
167219820Sjeffstruct ib_umem *ib_umem_get(struct ib_ucontext *context, unsigned long addr,
168219820Sjeff			    size_t size, int access, int dmasync)
169219820Sjeff{
170219820Sjeff#ifdef __linux__
171219820Sjeff	struct ib_umem *umem;
172219820Sjeff	struct page **page_list;
173219820Sjeff	struct vm_area_struct **vma_list;
174219820Sjeff	struct ib_umem_chunk *chunk;
175219820Sjeff	unsigned long locked;
176219820Sjeff	unsigned long lock_limit;
177219820Sjeff	unsigned long cur_base;
178219820Sjeff	unsigned long npages;
179219820Sjeff	int ret;
180219820Sjeff	int off;
181219820Sjeff	int i;
182219820Sjeff	DEFINE_DMA_ATTRS(attrs);
183219820Sjeff
184219820Sjeff	if (dmasync)
185219820Sjeff		dma_set_attr(DMA_ATTR_WRITE_BARRIER, &attrs);
186219820Sjeff	else if (allow_weak_ordering)
187219820Sjeff		dma_set_attr(DMA_ATTR_WEAK_ORDERING, &attrs);
188219820Sjeff
189219820Sjeff	if (!can_do_mlock())
190219820Sjeff		return ERR_PTR(-EPERM);
191219820Sjeff
192219820Sjeff	umem = kmalloc(sizeof *umem, GFP_KERNEL);
193219820Sjeff	if (!umem)
194219820Sjeff		return ERR_PTR(-ENOMEM);
195219820Sjeff
196219820Sjeff	umem->context   = context;
197219820Sjeff	umem->length    = size;
198219820Sjeff	umem->offset    = addr & ~PAGE_MASK;
199219820Sjeff	umem->page_size = PAGE_SIZE;
200219820Sjeff	/*
201219820Sjeff	 * We ask for writable memory if any access flags other than
202219820Sjeff	 * "remote read" are set.  "Local write" and "remote write"
203219820Sjeff	 * obviously require write access.  "Remote atomic" can do
204219820Sjeff	 * things like fetch and add, which will modify memory, and
205219820Sjeff	 * "MW bind" can change permissions by binding a window.
206219820Sjeff	 */
207219820Sjeff	umem->writable  = !!(access & ~IB_ACCESS_REMOTE_READ);
208219820Sjeff
209219820Sjeff	/* We assume the memory is from hugetlb until proved otherwise */
210219820Sjeff	umem->hugetlb   = 1;
211219820Sjeff
212219820Sjeff	INIT_LIST_HEAD(&umem->chunk_list);
213219820Sjeff
214219820Sjeff	page_list = (struct page **) __get_free_page(GFP_KERNEL);
215219820Sjeff	if (!page_list) {
216219820Sjeff		kfree(umem);
217219820Sjeff		return ERR_PTR(-ENOMEM);
218219820Sjeff	}
219219820Sjeff
220219820Sjeff	/*
221219820Sjeff	 * if we can't alloc the vma_list, it's not so bad;
222219820Sjeff	 * just assume the memory is not hugetlb memory
223219820Sjeff	 */
224219820Sjeff	vma_list = (struct vm_area_struct **) __get_free_page(GFP_KERNEL);
225219820Sjeff	if (!vma_list)
226219820Sjeff		umem->hugetlb = 0;
227219820Sjeff
228219820Sjeff	npages = PAGE_ALIGN(size + umem->offset) >> PAGE_SHIFT;
229219820Sjeff
230219820Sjeff	down_write(&current->mm->mmap_sem);
231219820Sjeff
232219820Sjeff	locked     = npages + current->mm->locked_vm;
233219820Sjeff	lock_limit = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT;
234219820Sjeff
235219820Sjeff	if ((locked > lock_limit) && !capable(CAP_IPC_LOCK)) {
236219820Sjeff		ret = -ENOMEM;
237219820Sjeff		goto out;
238219820Sjeff	}
239219820Sjeff
240219820Sjeff	cur_base = addr & PAGE_MASK;
241219820Sjeff
242219820Sjeff	ret = 0;
243219820Sjeff
244219820Sjeff	while (npages) {
245219820Sjeff		ret = get_user_pages(current, current->mm, cur_base,
246219820Sjeff				     min_t(unsigned long, npages,
247219820Sjeff					   PAGE_SIZE / sizeof (struct page *)),
248219820Sjeff				     1, !umem->writable, page_list, vma_list);
249219820Sjeff
250219820Sjeff		if (ret < 0)
251219820Sjeff			goto out;
252219820Sjeff
253219820Sjeff		cur_base += ret * PAGE_SIZE;
254219820Sjeff		npages   -= ret;
255219820Sjeff
256219820Sjeff		off = 0;
257219820Sjeff
258219820Sjeff		while (ret) {
259219820Sjeff			chunk = kmalloc(sizeof *chunk + sizeof (struct scatterlist) *
260219820Sjeff					min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK),
261219820Sjeff					GFP_KERNEL);
262219820Sjeff			if (!chunk) {
263219820Sjeff				ret = -ENOMEM;
264219820Sjeff				goto out;
265219820Sjeff			}
266219820Sjeff
267219820Sjeff			chunk->attrs = attrs;
268219820Sjeff			chunk->nents = min_t(int, ret, IB_UMEM_MAX_PAGE_CHUNK);
269219820Sjeff			sg_init_table(chunk->page_list, chunk->nents);
270219820Sjeff			for (i = 0; i < chunk->nents; ++i) {
271219820Sjeff				if (vma_list &&
272219820Sjeff				    !is_vm_hugetlb_page(vma_list[i + off]))
273219820Sjeff					umem->hugetlb = 0;
274219820Sjeff				sg_set_page(&chunk->page_list[i], page_list[i + off], PAGE_SIZE, 0);
275219820Sjeff			}
276219820Sjeff
277219820Sjeff			chunk->nmap = ib_dma_map_sg_attrs(context->device,
278219820Sjeff							  &chunk->page_list[0],
279219820Sjeff							  chunk->nents,
280219820Sjeff							  DMA_BIDIRECTIONAL,
281219820Sjeff							  &attrs);
282219820Sjeff			if (chunk->nmap <= 0) {
283219820Sjeff				for (i = 0; i < chunk->nents; ++i)
284219820Sjeff					put_page(sg_page(&chunk->page_list[i]));
285219820Sjeff				kfree(chunk);
286219820Sjeff
287219820Sjeff				ret = -ENOMEM;
288219820Sjeff				goto out;
289219820Sjeff			}
290219820Sjeff
291219820Sjeff			ret -= chunk->nents;
292219820Sjeff			off += chunk->nents;
293219820Sjeff			list_add_tail(&chunk->list, &umem->chunk_list);
294219820Sjeff		}
295219820Sjeff
296219820Sjeff		ret = 0;
297219820Sjeff	}
298219820Sjeff
299219820Sjeffout:
300219820Sjeff	if (ret < 0) {
301219820Sjeff		__ib_umem_release(context->device, umem, 0);
302219820Sjeff		kfree(umem);
303219820Sjeff	} else
304219820Sjeff		current->mm->locked_vm = locked;
305219820Sjeff
306219820Sjeff	up_write(&current->mm->mmap_sem);
307219820Sjeff	if (vma_list)
308219820Sjeff		free_page((unsigned long) vma_list);
309219820Sjeff	free_page((unsigned long) page_list);
310219820Sjeff
311219820Sjeff	return ret < 0 ? ERR_PTR(ret) : umem;
312219820Sjeff#else
313219820Sjeff	struct ib_umem *umem;
314219820Sjeff	struct ib_umem_chunk *chunk;
315219820Sjeff        struct proc *proc;
316219820Sjeff	pmap_t pmap;
317219820Sjeff        vm_offset_t end, last, start;
318219820Sjeff        vm_size_t npages;
319219820Sjeff        int error;
320219820Sjeff	int ents;
321219820Sjeff	int ret;
322219820Sjeff	int i;
323219820Sjeff	DEFINE_DMA_ATTRS(attrs);
324219820Sjeff
325219820Sjeff	error = priv_check(curthread, PRIV_VM_MLOCK);
326219820Sjeff	if (error)
327219820Sjeff		return ERR_PTR(-error);
328219820Sjeff
329219820Sjeff	last = addr + size;
330219820Sjeff	start = addr & PAGE_MASK; /* Use the linux PAGE_MASK definition. */
331219820Sjeff	end = roundup2(last, PAGE_SIZE); /* Use PAGE_MASK safe operation. */
332219820Sjeff	if (last < addr || end < addr)
333219820Sjeff		return ERR_PTR(-EINVAL);
334219820Sjeff	npages = atop(end - start);
335219820Sjeff	if (npages > vm_page_max_wired)
336219820Sjeff		return ERR_PTR(-ENOMEM);
337219820Sjeff	umem = kzalloc(sizeof *umem, GFP_KERNEL);
338219820Sjeff	if (!umem)
339219820Sjeff		return ERR_PTR(-ENOMEM);
340219820Sjeff	proc = curthread->td_proc;
341219820Sjeff	PROC_LOCK(proc);
342219820Sjeff	if (ptoa(npages +
343219820Sjeff	    pmap_wired_count(vm_map_pmap(&proc->p_vmspace->vm_map))) >
344219820Sjeff	    lim_cur(proc, RLIMIT_MEMLOCK)) {
345219820Sjeff		PROC_UNLOCK(proc);
346219820Sjeff		kfree(umem);
347219820Sjeff		return ERR_PTR(-ENOMEM);
348219820Sjeff	}
349219820Sjeff        PROC_UNLOCK(proc);
350219820Sjeff	if (npages + cnt.v_wire_count > vm_page_max_wired) {
351219820Sjeff		kfree(umem);
352219820Sjeff		return ERR_PTR(-EAGAIN);
353219820Sjeff	}
354219820Sjeff	error = vm_map_wire(&proc->p_vmspace->vm_map, start, end,
355219820Sjeff	    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES |
356219820Sjeff	    (umem->writable ? VM_MAP_WIRE_WRITE : 0));
357219820Sjeff	if (error != KERN_SUCCESS) {
358219820Sjeff		kfree(umem);
359219820Sjeff		return ERR_PTR(-ENOMEM);
360219820Sjeff	}
361219820Sjeff
362219820Sjeff	umem->context   = context;
363219820Sjeff	umem->length    = size;
364219820Sjeff	umem->offset    = addr & ~PAGE_MASK;
365219820Sjeff	umem->page_size = PAGE_SIZE;
366219820Sjeff	umem->start	= addr;
367219820Sjeff	/*
368219820Sjeff	 * We ask for writable memory if any access flags other than
369219820Sjeff	 * "remote read" are set.  "Local write" and "remote write"
370219820Sjeff	 * obviously require write access.  "Remote atomic" can do
371219820Sjeff	 * things like fetch and add, which will modify memory, and
372219820Sjeff	 * "MW bind" can change permissions by binding a window.
373219820Sjeff	 */
374219820Sjeff	umem->writable  = !!(access & ~IB_ACCESS_REMOTE_READ);
375219820Sjeff	umem->hugetlb = 0;
376219820Sjeff	INIT_LIST_HEAD(&umem->chunk_list);
377219820Sjeff
378219820Sjeff	pmap = vm_map_pmap(&proc->p_vmspace->vm_map);
379219820Sjeff	ret = 0;
380219820Sjeff	while (npages) {
381219820Sjeff		ents = min_t(int, npages, IB_UMEM_MAX_PAGE_CHUNK);
382219820Sjeff		chunk = kmalloc(sizeof(*chunk) +
383219820Sjeff				(sizeof(struct scatterlist) * ents),
384219820Sjeff				GFP_KERNEL);
385219820Sjeff		if (!chunk) {
386219820Sjeff			ret = -ENOMEM;
387219820Sjeff			goto out;
388219820Sjeff		}
389219820Sjeff
390219820Sjeff		chunk->attrs = attrs;
391219820Sjeff		chunk->nents = ents;
392219820Sjeff		sg_init_table(&chunk->page_list[0], ents);
393219820Sjeff		for (i = 0; i < chunk->nents; ++i) {
394219820Sjeff			vm_paddr_t pa;
395219820Sjeff
396219820Sjeff			pa = pmap_extract(pmap, start);
397219820Sjeff			if (pa == 0) {
398219820Sjeff				ret = -ENOMEM;
399219820Sjeff				kfree(chunk);
400219820Sjeff				goto out;
401219820Sjeff			}
402219820Sjeff			sg_set_page(&chunk->page_list[i], PHYS_TO_VM_PAGE(pa),
403219820Sjeff			    PAGE_SIZE, 0);
404219820Sjeff			npages--;
405219820Sjeff			start += PAGE_SIZE;
406219820Sjeff		}
407219820Sjeff
408219820Sjeff		chunk->nmap = ib_dma_map_sg_attrs(context->device,
409219820Sjeff						  &chunk->page_list[0],
410219820Sjeff						  chunk->nents,
411219820Sjeff						  DMA_BIDIRECTIONAL,
412219820Sjeff						  &attrs);
413219820Sjeff		if (chunk->nmap != chunk->nents) {
414219820Sjeff			kfree(chunk);
415219820Sjeff			ret = -ENOMEM;
416219820Sjeff			goto out;
417219820Sjeff		}
418219820Sjeff
419219820Sjeff		list_add_tail(&chunk->list, &umem->chunk_list);
420219820Sjeff	}
421219820Sjeff
422219820Sjeffout:
423219820Sjeff	if (ret < 0) {
424219820Sjeff		__ib_umem_release(context->device, umem, 0);
425219820Sjeff		kfree(umem);
426219820Sjeff	}
427219820Sjeff
428219820Sjeff	return ret < 0 ? ERR_PTR(ret) : umem;
429219820Sjeff#endif
430219820Sjeff}
431219820SjeffEXPORT_SYMBOL(ib_umem_get);
432219820Sjeff
433219820Sjeff#ifdef __linux__
434219820Sjeffstatic void ib_umem_account(struct work_struct *work)
435219820Sjeff{
436219820Sjeff	struct ib_umem *umem = container_of(work, struct ib_umem, work);
437219820Sjeff
438219820Sjeff	down_write(&umem->mm->mmap_sem);
439219820Sjeff	umem->mm->locked_vm -= umem->diff;
440219820Sjeff	up_write(&umem->mm->mmap_sem);
441219820Sjeff	mmput(umem->mm);
442219820Sjeff	kfree(umem);
443219820Sjeff}
444219820Sjeff#endif
445219820Sjeff
446219820Sjeff/**
447219820Sjeff * ib_umem_release - release memory pinned with ib_umem_get
448219820Sjeff * @umem: umem struct to release
449219820Sjeff */
450219820Sjeffvoid ib_umem_release(struct ib_umem *umem)
451219820Sjeff{
452219820Sjeff#ifdef __linux__
453219820Sjeff	struct ib_ucontext *context = umem->context;
454219820Sjeff	struct mm_struct *mm;
455219820Sjeff	unsigned long diff;
456219820Sjeff
457219820Sjeff	__ib_umem_release(umem->context->device, umem, 1);
458219820Sjeff
459219820Sjeff	mm = get_task_mm(current);
460219820Sjeff	if (!mm) {
461219820Sjeff		kfree(umem);
462219820Sjeff		return;
463219820Sjeff	}
464219820Sjeff
465219820Sjeff	diff = PAGE_ALIGN(umem->length + umem->offset) >> PAGE_SHIFT;
466219820Sjeff
467219820Sjeff	/*
468219820Sjeff	 * We may be called with the mm's mmap_sem already held.  This
469219820Sjeff	 * can happen when a userspace munmap() is the call that drops
470219820Sjeff	 * the last reference to our file and calls our release
471219820Sjeff	 * method.  If there are memory regions to destroy, we'll end
472219820Sjeff	 * up here and not be able to take the mmap_sem.  In that case
473219820Sjeff	 * we defer the vm_locked accounting to the system workqueue.
474219820Sjeff	 */
475219820Sjeff	if (context->closing) {
476219820Sjeff		if (!down_write_trylock(&mm->mmap_sem)) {
477219820Sjeff			INIT_WORK(&umem->work, ib_umem_account);
478219820Sjeff			umem->mm   = mm;
479219820Sjeff			umem->diff = diff;
480219820Sjeff
481219820Sjeff			schedule_work(&umem->work);
482219820Sjeff			return;
483219820Sjeff		}
484219820Sjeff	} else
485219820Sjeff		down_write(&mm->mmap_sem);
486219820Sjeff
487219820Sjeff	current->mm->locked_vm -= diff;
488219820Sjeff	up_write(&mm->mmap_sem);
489219820Sjeff	mmput(mm);
490219820Sjeff#else
491219820Sjeff	vm_offset_t addr, end, last, start;
492219820Sjeff	vm_size_t size;
493219820Sjeff	int error;
494219820Sjeff
495219820Sjeff	__ib_umem_release(umem->context->device, umem, 1);
496219820Sjeff	if (umem->context->closing) {
497219820Sjeff		kfree(umem);
498219820Sjeff		return;
499219820Sjeff	}
500219820Sjeff	error = priv_check(curthread, PRIV_VM_MUNLOCK);
501219820Sjeff	if (error)
502219820Sjeff		return;
503219820Sjeff	addr = umem->start;
504219820Sjeff	size = umem->length;
505219820Sjeff	last = addr + size;
506219820Sjeff        start = addr & PAGE_MASK; /* Use the linux PAGE_MASK definition. */
507219820Sjeff	end = roundup2(last, PAGE_SIZE); /* Use PAGE_MASK safe operation. */
508219820Sjeff	vm_map_unwire(&curthread->td_proc->p_vmspace->vm_map, start, end,
509219820Sjeff	    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
510219820Sjeff
511219820Sjeff#endif
512219820Sjeff	kfree(umem);
513219820Sjeff}
514219820SjeffEXPORT_SYMBOL(ib_umem_release);
515219820Sjeff
516219820Sjeffint ib_umem_page_count(struct ib_umem *umem)
517219820Sjeff{
518219820Sjeff	struct ib_umem_chunk *chunk;
519219820Sjeff	int shift;
520219820Sjeff	int i;
521219820Sjeff	int n;
522219820Sjeff
523219820Sjeff	shift = ilog2(umem->page_size);
524219820Sjeff
525219820Sjeff	n = 0;
526219820Sjeff	list_for_each_entry(chunk, &umem->chunk_list, list)
527219820Sjeff		for (i = 0; i < chunk->nmap; ++i)
528219820Sjeff			n += sg_dma_len(&chunk->page_list[i]) >> shift;
529219820Sjeff
530219820Sjeff	return n;
531219820Sjeff}
532219820SjeffEXPORT_SYMBOL(ib_umem_page_count);
533255972Salfred
534255972Salfred/**********************************************/
535255972Salfred/*
536255972Salfred * Stub functions for contiguous pages -
537255972Salfred * We currently do not support this feature
538255972Salfred */
539255972Salfred/**********************************************/
540255972Salfred
541255972Salfred/**
542255972Salfred * ib_cmem_release_contiguous_pages - release memory allocated by
543255972Salfred *                                              ib_cmem_alloc_contiguous_pages.
544255972Salfred * @cmem: cmem struct to release
545255972Salfred */
546255972Salfredvoid ib_cmem_release_contiguous_pages(struct ib_cmem *cmem)
547255972Salfred{
548255972Salfred}
549255972SalfredEXPORT_SYMBOL(ib_cmem_release_contiguous_pages);
550255972Salfred
551255972Salfred/**
552255972Salfred *  * ib_cmem_alloc_contiguous_pages - allocate contiguous pages
553255972Salfred *  *  @context: userspace context to allocate memory for
554255972Salfred *   * @total_size: total required size for that allocation.
555255972Salfred *    * @page_size_order: order of one contiguous page.
556255972Salfred *     */
557255972Salfredstruct ib_cmem *ib_cmem_alloc_contiguous_pages(struct ib_ucontext *context,
558255972Salfred                                unsigned long total_size,
559255972Salfred				                                unsigned long page_size_order)
560255972Salfred{
561255972Salfred	return NULL;
562255972Salfred}
563255972SalfredEXPORT_SYMBOL(ib_cmem_alloc_contiguous_pages);
564255972Salfred
565255972Salfred/**
566255972Salfred *  * ib_cmem_map_contiguous_pages_to_vma - map contiguous pages into VMA
567255972Salfred *   * @ib_cmem: cmem structure returned by ib_cmem_alloc_contiguous_pages
568255972Salfred *    * @vma: VMA to inject pages into.
569255972Salfred *     */
570255972Salfredint ib_cmem_map_contiguous_pages_to_vma(struct ib_cmem *ib_cmem,
571255972Salfred                                                struct vm_area_struct *vma)
572255972Salfred{
573255972Salfred	return 0;
574255972Salfred}
575255972SalfredEXPORT_SYMBOL(ib_cmem_map_contiguous_pages_to_vma);
576