1219820Sjeff/*
2219820Sjeff * Copyright (c) 2004, 2005 Topspin Communications.  All rights reserved.
3219820Sjeff * Copyright (c) 2005 Cisco Systems.  All rights reserved.
4219820Sjeff * Copyright (c) 2005 Mellanox Technologies. All rights reserved.
5219820Sjeff *
6219820Sjeff * This software is available to you under a choice of one of two
7219820Sjeff * licenses.  You may choose to be licensed under the terms of the GNU
8219820Sjeff * General Public License (GPL) Version 2, available from the file
9219820Sjeff * COPYING in the main directory of this source tree, or the
10219820Sjeff * OpenIB.org BSD license below:
11219820Sjeff *
12219820Sjeff *     Redistribution and use in source and binary forms, with or
13219820Sjeff *     without modification, are permitted provided that the following
14219820Sjeff *     conditions are met:
15219820Sjeff *
16219820Sjeff *      - Redistributions of source code must retain the above
17219820Sjeff *        copyright notice, this list of conditions and the following
18219820Sjeff *        disclaimer.
19219820Sjeff *
20219820Sjeff *      - Redistributions in binary form must reproduce the above
21219820Sjeff *        copyright notice, this list of conditions and the following
22219820Sjeff *        disclaimer in the documentation and/or other materials
23219820Sjeff *        provided with the distribution.
24219820Sjeff *
25219820Sjeff * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26219820Sjeff * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27219820Sjeff * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
28219820Sjeff * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
29219820Sjeff * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
30219820Sjeff * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
31219820Sjeff * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
32219820Sjeff * SOFTWARE.
33219820Sjeff */
34219820Sjeff
35219820Sjeff#include <linux/mm.h>
36219820Sjeff#include <linux/scatterlist.h>
37219820Sjeff#include <linux/sched.h>
38219820Sjeff
39219820Sjeff#include <asm/page.h>
40219820Sjeff
41219820Sjeff#include "mthca_memfree.h"
42219820Sjeff#include "mthca_dev.h"
43219820Sjeff#include "mthca_cmd.h"
44219820Sjeff
45219820Sjeff/*
46219820Sjeff * We allocate in as big chunks as we can, up to a maximum of 256 KB
47219820Sjeff * per chunk.
48219820Sjeff */
49219820Sjeffenum {
50219820Sjeff	MTHCA_ICM_ALLOC_SIZE   = 1 << 18,
51219820Sjeff	MTHCA_TABLE_CHUNK_SIZE = 1 << 18
52219820Sjeff};
53219820Sjeff
54219820Sjeffstruct mthca_user_db_table {
55219820Sjeff	struct mutex mutex;
56219820Sjeff	struct {
57219820Sjeff		u64                uvirt;
58219820Sjeff		struct scatterlist mem;
59219820Sjeff		int                refcount;
60219820Sjeff	}                page[0];
61219820Sjeff};
62219820Sjeff
63219820Sjeffstatic void mthca_free_icm_pages(struct mthca_dev *dev, struct mthca_icm_chunk *chunk)
64219820Sjeff{
65219820Sjeff	int i;
66219820Sjeff
67219820Sjeff	if (chunk->nsg > 0)
68219820Sjeff		pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages,
69219820Sjeff			     PCI_DMA_BIDIRECTIONAL);
70219820Sjeff
71219820Sjeff	for (i = 0; i < chunk->npages; ++i)
72219820Sjeff		__free_pages(sg_page(&chunk->mem[i]),
73219820Sjeff			     get_order(chunk->mem[i].length));
74219820Sjeff}
75219820Sjeff
76219820Sjeffstatic void mthca_free_icm_coherent(struct mthca_dev *dev, struct mthca_icm_chunk *chunk)
77219820Sjeff{
78219820Sjeff	int i;
79219820Sjeff
80219820Sjeff	for (i = 0; i < chunk->npages; ++i) {
81219820Sjeff		dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length,
82219820Sjeff				  lowmem_page_address(sg_page(&chunk->mem[i])),
83219820Sjeff				  sg_dma_address(&chunk->mem[i]));
84219820Sjeff	}
85219820Sjeff}
86219820Sjeff
87219820Sjeffvoid mthca_free_icm(struct mthca_dev *dev, struct mthca_icm *icm, int coherent)
88219820Sjeff{
89219820Sjeff	struct mthca_icm_chunk *chunk, *tmp;
90219820Sjeff
91219820Sjeff	if (!icm)
92219820Sjeff		return;
93219820Sjeff
94219820Sjeff	list_for_each_entry_safe(chunk, tmp, &icm->chunk_list, list) {
95219820Sjeff		if (coherent)
96219820Sjeff			mthca_free_icm_coherent(dev, chunk);
97219820Sjeff		else
98219820Sjeff			mthca_free_icm_pages(dev, chunk);
99219820Sjeff
100219820Sjeff		kfree(chunk);
101219820Sjeff	}
102219820Sjeff
103219820Sjeff	kfree(icm);
104219820Sjeff}
105219820Sjeff
106219820Sjeffstatic int mthca_alloc_icm_pages(struct scatterlist *mem, int order, gfp_t gfp_mask)
107219820Sjeff{
108219820Sjeff	struct page *page;
109219820Sjeff
110219820Sjeff	/*
111219820Sjeff	 * Use __GFP_ZERO because buggy firmware assumes ICM pages are
112219820Sjeff	 * cleared, and subtle failures are seen if they aren't.
113219820Sjeff	 */
114219820Sjeff	page = alloc_pages(gfp_mask | __GFP_ZERO, order);
115219820Sjeff	if (!page)
116219820Sjeff		return -ENOMEM;
117219820Sjeff
118219820Sjeff	sg_set_page(mem, page, PAGE_SIZE << order, 0);
119219820Sjeff	return 0;
120219820Sjeff}
121219820Sjeff
122219820Sjeffstatic int mthca_alloc_icm_coherent(struct device *dev, struct scatterlist *mem,
123219820Sjeff				    int order, gfp_t gfp_mask)
124219820Sjeff{
125219820Sjeff	void *buf = dma_alloc_coherent(dev, PAGE_SIZE << order, &sg_dma_address(mem),
126219820Sjeff				       gfp_mask);
127219820Sjeff	if (!buf)
128219820Sjeff		return -ENOMEM;
129219820Sjeff
130219820Sjeff	sg_set_buf(mem, buf, PAGE_SIZE << order);
131219820Sjeff	BUG_ON(mem->offset);
132219820Sjeff	sg_dma_len(mem) = PAGE_SIZE << order;
133219820Sjeff	return 0;
134219820Sjeff}
135219820Sjeff
136219820Sjeffstruct mthca_icm *mthca_alloc_icm(struct mthca_dev *dev, int npages,
137219820Sjeff				  gfp_t gfp_mask, int coherent)
138219820Sjeff{
139219820Sjeff	struct mthca_icm *icm;
140219820Sjeff	struct mthca_icm_chunk *chunk = NULL;
141219820Sjeff	int cur_order;
142219820Sjeff	int ret;
143219820Sjeff
144219820Sjeff	/* We use sg_set_buf for coherent allocs, which assumes low memory */
145219820Sjeff	BUG_ON(coherent && (gfp_mask & __GFP_HIGHMEM));
146219820Sjeff
147219820Sjeff	icm = kmalloc(sizeof *icm, gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
148219820Sjeff	if (!icm)
149219820Sjeff		return icm;
150219820Sjeff
151219820Sjeff	icm->refcount = 0;
152219820Sjeff	INIT_LIST_HEAD(&icm->chunk_list);
153219820Sjeff
154219820Sjeff	cur_order = get_order(MTHCA_ICM_ALLOC_SIZE);
155219820Sjeff
156219820Sjeff	while (npages > 0) {
157219820Sjeff		if (!chunk) {
158219820Sjeff			chunk = kmalloc(sizeof *chunk,
159219820Sjeff					gfp_mask & ~(__GFP_HIGHMEM | __GFP_NOWARN));
160219820Sjeff			if (!chunk)
161219820Sjeff				goto fail;
162219820Sjeff
163219820Sjeff			sg_init_table(chunk->mem, MTHCA_ICM_CHUNK_LEN);
164219820Sjeff			chunk->npages = 0;
165219820Sjeff			chunk->nsg    = 0;
166219820Sjeff			list_add_tail(&chunk->list, &icm->chunk_list);
167219820Sjeff		}
168219820Sjeff
169219820Sjeff		while (1 << cur_order > npages)
170219820Sjeff			--cur_order;
171219820Sjeff
172219820Sjeff		if (coherent)
173219820Sjeff			ret = mthca_alloc_icm_coherent(&dev->pdev->dev,
174219820Sjeff						       &chunk->mem[chunk->npages],
175219820Sjeff						       cur_order, gfp_mask);
176219820Sjeff		else
177219820Sjeff			ret = mthca_alloc_icm_pages(&chunk->mem[chunk->npages],
178219820Sjeff						    cur_order, gfp_mask);
179219820Sjeff
180219820Sjeff		if (!ret) {
181219820Sjeff			++chunk->npages;
182219820Sjeff
183219820Sjeff			if (coherent)
184219820Sjeff				++chunk->nsg;
185219820Sjeff			else if (chunk->npages == MTHCA_ICM_CHUNK_LEN) {
186219820Sjeff				chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
187219820Sjeff							chunk->npages,
188219820Sjeff							PCI_DMA_BIDIRECTIONAL);
189219820Sjeff
190219820Sjeff				if (chunk->nsg <= 0)
191219820Sjeff					goto fail;
192219820Sjeff			}
193219820Sjeff
194219820Sjeff			if (chunk->npages == MTHCA_ICM_CHUNK_LEN)
195219820Sjeff				chunk = NULL;
196219820Sjeff
197219820Sjeff			npages -= 1 << cur_order;
198219820Sjeff		} else {
199219820Sjeff			--cur_order;
200219820Sjeff			if (cur_order < 0)
201219820Sjeff				goto fail;
202219820Sjeff		}
203219820Sjeff	}
204219820Sjeff
205219820Sjeff	if (!coherent && chunk) {
206219820Sjeff		chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
207219820Sjeff					chunk->npages,
208219820Sjeff					PCI_DMA_BIDIRECTIONAL);
209219820Sjeff
210219820Sjeff		if (chunk->nsg <= 0)
211219820Sjeff			goto fail;
212219820Sjeff	}
213219820Sjeff
214219820Sjeff	return icm;
215219820Sjeff
216219820Sjefffail:
217219820Sjeff	mthca_free_icm(dev, icm, coherent);
218219820Sjeff	return NULL;
219219820Sjeff}
220219820Sjeff
221219820Sjeffint mthca_table_get(struct mthca_dev *dev, struct mthca_icm_table *table, int obj)
222219820Sjeff{
223219820Sjeff	int i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE;
224219820Sjeff	int ret = 0;
225219820Sjeff	u8 status;
226219820Sjeff
227219820Sjeff	mutex_lock(&table->mutex);
228219820Sjeff
229219820Sjeff	if (table->icm[i]) {
230219820Sjeff		++table->icm[i]->refcount;
231219820Sjeff		goto out;
232219820Sjeff	}
233219820Sjeff
234219820Sjeff	table->icm[i] = mthca_alloc_icm(dev, MTHCA_TABLE_CHUNK_SIZE >> PAGE_SHIFT,
235219820Sjeff					(table->lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
236219820Sjeff					__GFP_NOWARN, table->coherent);
237219820Sjeff	if (!table->icm[i]) {
238219820Sjeff		ret = -ENOMEM;
239219820Sjeff		goto out;
240219820Sjeff	}
241219820Sjeff
242219820Sjeff	if (mthca_MAP_ICM(dev, table->icm[i], table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
243219820Sjeff			  &status) || status) {
244219820Sjeff		mthca_free_icm(dev, table->icm[i], table->coherent);
245219820Sjeff		table->icm[i] = NULL;
246219820Sjeff		ret = -ENOMEM;
247219820Sjeff		goto out;
248219820Sjeff	}
249219820Sjeff
250219820Sjeff	++table->icm[i]->refcount;
251219820Sjeff
252219820Sjeffout:
253219820Sjeff	mutex_unlock(&table->mutex);
254219820Sjeff	return ret;
255219820Sjeff}
256219820Sjeff
257219820Sjeffvoid mthca_table_put(struct mthca_dev *dev, struct mthca_icm_table *table, int obj)
258219820Sjeff{
259219820Sjeff	int i;
260219820Sjeff	u8 status;
261219820Sjeff
262219820Sjeff	if (!mthca_is_memfree(dev))
263219820Sjeff		return;
264219820Sjeff
265219820Sjeff	i = (obj & (table->num_obj - 1)) * table->obj_size / MTHCA_TABLE_CHUNK_SIZE;
266219820Sjeff
267219820Sjeff	mutex_lock(&table->mutex);
268219820Sjeff
269219820Sjeff	if (--table->icm[i]->refcount == 0) {
270219820Sjeff		mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
271219820Sjeff				MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
272219820Sjeff				&status);
273219820Sjeff		mthca_free_icm(dev, table->icm[i], table->coherent);
274219820Sjeff		table->icm[i] = NULL;
275219820Sjeff	}
276219820Sjeff
277219820Sjeff	mutex_unlock(&table->mutex);
278219820Sjeff}
279219820Sjeff
280219820Sjeffvoid *mthca_table_find(struct mthca_icm_table *table, int obj, dma_addr_t *dma_handle)
281219820Sjeff{
282219820Sjeff	int idx, offset, dma_offset, i;
283219820Sjeff	struct mthca_icm_chunk *chunk;
284219820Sjeff	struct mthca_icm *icm;
285219820Sjeff	struct page *page = NULL;
286219820Sjeff
287219820Sjeff	if (!table->lowmem)
288219820Sjeff		return NULL;
289219820Sjeff
290219820Sjeff	mutex_lock(&table->mutex);
291219820Sjeff
292219820Sjeff	idx = (obj & (table->num_obj - 1)) * table->obj_size;
293219820Sjeff	icm = table->icm[idx / MTHCA_TABLE_CHUNK_SIZE];
294219820Sjeff	dma_offset = offset = idx % MTHCA_TABLE_CHUNK_SIZE;
295219820Sjeff
296219820Sjeff	if (!icm)
297219820Sjeff		goto out;
298219820Sjeff
299219820Sjeff	list_for_each_entry(chunk, &icm->chunk_list, list) {
300219820Sjeff		for (i = 0; i < chunk->npages; ++i) {
301219820Sjeff			if (dma_handle && dma_offset >= 0) {
302219820Sjeff				if (sg_dma_len(&chunk->mem[i]) > dma_offset)
303219820Sjeff					*dma_handle = sg_dma_address(&chunk->mem[i]) +
304219820Sjeff						dma_offset;
305219820Sjeff				dma_offset -= sg_dma_len(&chunk->mem[i]);
306219820Sjeff			}
307219820Sjeff			/* DMA mapping can merge pages but not split them,
308219820Sjeff			 * so if we found the page, dma_handle has already
309219820Sjeff			 * been assigned to. */
310219820Sjeff			if (chunk->mem[i].length > offset) {
311219820Sjeff				page = sg_page(&chunk->mem[i]);
312219820Sjeff				goto out;
313219820Sjeff			}
314219820Sjeff			offset -= chunk->mem[i].length;
315219820Sjeff		}
316219820Sjeff	}
317219820Sjeff
318219820Sjeffout:
319219820Sjeff	mutex_unlock(&table->mutex);
320219820Sjeff	return page ? lowmem_page_address(page) + offset : NULL;
321219820Sjeff}
322219820Sjeff
323219820Sjeffint mthca_table_get_range(struct mthca_dev *dev, struct mthca_icm_table *table,
324219820Sjeff			  int start, int end)
325219820Sjeff{
326219820Sjeff	int inc = MTHCA_TABLE_CHUNK_SIZE / table->obj_size;
327219820Sjeff	int i, err;
328219820Sjeff
329219820Sjeff	for (i = start; i <= end; i += inc) {
330219820Sjeff		err = mthca_table_get(dev, table, i);
331219820Sjeff		if (err)
332219820Sjeff			goto fail;
333219820Sjeff	}
334219820Sjeff
335219820Sjeff	return 0;
336219820Sjeff
337219820Sjefffail:
338219820Sjeff	while (i > start) {
339219820Sjeff		i -= inc;
340219820Sjeff		mthca_table_put(dev, table, i);
341219820Sjeff	}
342219820Sjeff
343219820Sjeff	return err;
344219820Sjeff}
345219820Sjeff
346219820Sjeffvoid mthca_table_put_range(struct mthca_dev *dev, struct mthca_icm_table *table,
347219820Sjeff			   int start, int end)
348219820Sjeff{
349219820Sjeff	int i;
350219820Sjeff
351219820Sjeff	if (!mthca_is_memfree(dev))
352219820Sjeff		return;
353219820Sjeff
354219820Sjeff	for (i = start; i <= end; i += MTHCA_TABLE_CHUNK_SIZE / table->obj_size)
355219820Sjeff		mthca_table_put(dev, table, i);
356219820Sjeff}
357219820Sjeff
358219820Sjeffstruct mthca_icm_table *mthca_alloc_icm_table(struct mthca_dev *dev,
359219820Sjeff					      u64 virt, int obj_size,
360219820Sjeff					      int nobj, int reserved,
361219820Sjeff					      int use_lowmem, int use_coherent)
362219820Sjeff{
363219820Sjeff	struct mthca_icm_table *table;
364219820Sjeff	int obj_per_chunk;
365219820Sjeff	int num_icm;
366219820Sjeff	unsigned chunk_size;
367219820Sjeff	int i;
368219820Sjeff	u8 status;
369219820Sjeff
370219820Sjeff	obj_per_chunk = MTHCA_TABLE_CHUNK_SIZE / obj_size;
371219820Sjeff	num_icm = DIV_ROUND_UP(nobj, obj_per_chunk);
372219820Sjeff
373219820Sjeff	table = kmalloc(sizeof *table + num_icm * sizeof *table->icm, GFP_KERNEL);
374219820Sjeff	if (!table)
375219820Sjeff		return NULL;
376219820Sjeff
377219820Sjeff	table->virt     = virt;
378219820Sjeff	table->num_icm  = num_icm;
379219820Sjeff	table->num_obj  = nobj;
380219820Sjeff	table->obj_size = obj_size;
381219820Sjeff	table->lowmem   = use_lowmem;
382219820Sjeff	table->coherent = use_coherent;
383219820Sjeff	mutex_init(&table->mutex);
384219820Sjeff
385219820Sjeff	for (i = 0; i < num_icm; ++i)
386219820Sjeff		table->icm[i] = NULL;
387219820Sjeff
388219820Sjeff	for (i = 0; i * MTHCA_TABLE_CHUNK_SIZE < reserved * obj_size; ++i) {
389219820Sjeff		chunk_size = MTHCA_TABLE_CHUNK_SIZE;
390219820Sjeff		if ((i + 1) * MTHCA_TABLE_CHUNK_SIZE > nobj * obj_size)
391219820Sjeff			chunk_size = nobj * obj_size - i * MTHCA_TABLE_CHUNK_SIZE;
392219820Sjeff
393219820Sjeff		table->icm[i] = mthca_alloc_icm(dev, chunk_size >> PAGE_SHIFT,
394219820Sjeff						(use_lowmem ? GFP_KERNEL : GFP_HIGHUSER) |
395219820Sjeff						__GFP_NOWARN, use_coherent);
396219820Sjeff		if (!table->icm[i])
397219820Sjeff			goto err;
398219820Sjeff		if (mthca_MAP_ICM(dev, table->icm[i], virt + i * MTHCA_TABLE_CHUNK_SIZE,
399219820Sjeff				  &status) || status) {
400219820Sjeff			mthca_free_icm(dev, table->icm[i], table->coherent);
401219820Sjeff			table->icm[i] = NULL;
402219820Sjeff			goto err;
403219820Sjeff		}
404219820Sjeff
405219820Sjeff		/*
406219820Sjeff		 * Add a reference to this ICM chunk so that it never
407219820Sjeff		 * gets freed (since it contains reserved firmware objects).
408219820Sjeff		 */
409219820Sjeff		++table->icm[i]->refcount;
410219820Sjeff	}
411219820Sjeff
412219820Sjeff	return table;
413219820Sjeff
414219820Sjefferr:
415219820Sjeff	for (i = 0; i < num_icm; ++i)
416219820Sjeff		if (table->icm[i]) {
417219820Sjeff			mthca_UNMAP_ICM(dev, virt + i * MTHCA_TABLE_CHUNK_SIZE,
418219820Sjeff					MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
419219820Sjeff					&status);
420219820Sjeff			mthca_free_icm(dev, table->icm[i], table->coherent);
421219820Sjeff		}
422219820Sjeff
423219820Sjeff	kfree(table);
424219820Sjeff
425219820Sjeff	return NULL;
426219820Sjeff}
427219820Sjeff
428219820Sjeffvoid mthca_free_icm_table(struct mthca_dev *dev, struct mthca_icm_table *table)
429219820Sjeff{
430219820Sjeff	int i;
431219820Sjeff	u8 status;
432219820Sjeff
433219820Sjeff	for (i = 0; i < table->num_icm; ++i)
434219820Sjeff		if (table->icm[i]) {
435219820Sjeff			mthca_UNMAP_ICM(dev, table->virt + i * MTHCA_TABLE_CHUNK_SIZE,
436219820Sjeff					MTHCA_TABLE_CHUNK_SIZE / MTHCA_ICM_PAGE_SIZE,
437219820Sjeff					&status);
438219820Sjeff			mthca_free_icm(dev, table->icm[i], table->coherent);
439219820Sjeff		}
440219820Sjeff
441219820Sjeff	kfree(table);
442219820Sjeff}
443219820Sjeff
444219820Sjeffstatic u64 mthca_uarc_virt(struct mthca_dev *dev, struct mthca_uar *uar, int page)
445219820Sjeff{
446219820Sjeff	return dev->uar_table.uarc_base +
447219820Sjeff		uar->index * dev->uar_table.uarc_size +
448219820Sjeff		page * MTHCA_ICM_PAGE_SIZE;
449219820Sjeff}
450219820Sjeff
451255932Salfred
452219820Sjeff#include <vm/vm_map.h>
453219820Sjeff#include <vm/vm_pageout.h>
454219820Sjeff#include <vm/pmap.h>
455219820Sjeff
456219820Sjeff#include <sys/resource.h>
457219820Sjeff#include <sys/resourcevar.h>
458219820Sjeff
459219820Sjeffint mthca_map_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
460219820Sjeff		      struct mthca_user_db_table *db_tab, int index, u64 uaddr)
461219820Sjeff{
462219820Sjeff#ifdef __linux__
463219820Sjeff	struct page *pages[1];
464219820Sjeff	int ret = 0;
465219820Sjeff	u8 status;
466219820Sjeff	int i;
467219820Sjeff
468219820Sjeff	if (!mthca_is_memfree(dev))
469219820Sjeff		return 0;
470219820Sjeff
471219820Sjeff	if (index < 0 || index > dev->uar_table.uarc_size / 8)
472219820Sjeff		return -EINVAL;
473219820Sjeff
474219820Sjeff	mutex_lock(&db_tab->mutex);
475219820Sjeff
476219820Sjeff	i = index / MTHCA_DB_REC_PER_PAGE;
477219820Sjeff
478219820Sjeff	if ((db_tab->page[i].refcount >= MTHCA_DB_REC_PER_PAGE)       ||
479219820Sjeff	    (db_tab->page[i].uvirt && db_tab->page[i].uvirt != uaddr) ||
480219820Sjeff	    (uaddr & 4095)) {
481219820Sjeff		ret = -EINVAL;
482219820Sjeff		goto out;
483219820Sjeff	}
484219820Sjeff
485219820Sjeff	if (db_tab->page[i].refcount) {
486219820Sjeff		++db_tab->page[i].refcount;
487219820Sjeff		goto out;
488219820Sjeff	}
489219820Sjeff
490219820Sjeff	ret = get_user_pages(current, current->mm, uaddr & PAGE_MASK, 1, 1, 0,
491219820Sjeff			     pages, NULL);
492219820Sjeff	if (ret < 0)
493219820Sjeff		goto out;
494219820Sjeff
495219820Sjeff	sg_set_page(&db_tab->page[i].mem, pages[0], MTHCA_ICM_PAGE_SIZE,
496219820Sjeff			uaddr & ~PAGE_MASK);
497219820Sjeff
498219820Sjeff	ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
499219820Sjeff	if (ret < 0) {
500219820Sjeff		put_page(pages[0]);
501219820Sjeff		goto out;
502219820Sjeff	}
503219820Sjeff
504219820Sjeff	ret = mthca_MAP_ICM_page(dev, sg_dma_address(&db_tab->page[i].mem),
505219820Sjeff				 mthca_uarc_virt(dev, uar, i), &status);
506219820Sjeff	if (!ret && status)
507219820Sjeff		ret = -EINVAL;
508219820Sjeff	if (ret) {
509219820Sjeff		pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
510219820Sjeff		put_page(sg_page(&db_tab->page[i].mem));
511219820Sjeff		goto out;
512219820Sjeff	}
513219820Sjeff
514219820Sjeff	db_tab->page[i].uvirt    = uaddr;
515219820Sjeff	db_tab->page[i].refcount = 1;
516219820Sjeff
517219820Sjeffout:
518219820Sjeff	mutex_unlock(&db_tab->mutex);
519219820Sjeff	return ret;
520219820Sjeff#else
521219820Sjeff	struct proc *proc;
522219820Sjeff	vm_offset_t start;
523219820Sjeff	vm_paddr_t paddr;
524219820Sjeff	pmap_t pmap;
525219820Sjeff	vm_page_t m;
526219820Sjeff	int ret = 0;
527219820Sjeff	u8 status;
528219820Sjeff	int i;
529219820Sjeff
530219820Sjeff	if (!mthca_is_memfree(dev))
531219820Sjeff		return 0;
532219820Sjeff
533219820Sjeff	if (index < 0 || index > dev->uar_table.uarc_size / 8)
534219820Sjeff		return -EINVAL;
535219820Sjeff
536219820Sjeff	mutex_lock(&db_tab->mutex);
537219820Sjeff
538219820Sjeff	i = index / MTHCA_DB_REC_PER_PAGE;
539219820Sjeff	start = 0;
540219820Sjeff
541219820Sjeff	if ((db_tab->page[i].refcount >= MTHCA_DB_REC_PER_PAGE)       ||
542219820Sjeff	    (db_tab->page[i].uvirt && db_tab->page[i].uvirt != uaddr) ||
543219820Sjeff	    (uaddr & 4095)) {
544219820Sjeff		ret = -EINVAL;
545219820Sjeff		goto out;
546219820Sjeff	}
547219820Sjeff
548219820Sjeff	if (db_tab->page[i].refcount) {
549219820Sjeff		++db_tab->page[i].refcount;
550219820Sjeff		goto out;
551219820Sjeff	}
552219820Sjeff
553219820Sjeff	proc = curproc;
554219820Sjeff	pmap = vm_map_pmap(&proc->p_vmspace->vm_map);
555219820Sjeff	PROC_LOCK(proc);
556219820Sjeff	if (ptoa(pmap_wired_count(pmap) + 1) > lim_cur(proc, RLIMIT_MEMLOCK)) {
557219820Sjeff		PROC_UNLOCK(proc);
558219820Sjeff		ret = -ENOMEM;
559219820Sjeff		goto out;
560219820Sjeff	}
561219820Sjeff	PROC_UNLOCK(proc);
562219820Sjeff	if (cnt.v_wire_count + 1 > vm_page_max_wired) {
563219820Sjeff		ret = -EAGAIN;
564219820Sjeff		goto out;
565219820Sjeff	}
566219820Sjeff	start = uaddr & PAGE_MASK;
567219820Sjeff	ret = vm_map_wire(&proc->p_vmspace->vm_map, start, start + PAGE_SIZE,
568219820Sjeff	    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES | VM_MAP_WIRE_WRITE);
569219820Sjeff        if (ret != KERN_SUCCESS) {
570219820Sjeff		start = 0;
571219820Sjeff		ret = -ENOMEM;
572219820Sjeff		goto out;
573219820Sjeff	}
574219820Sjeff	paddr = pmap_extract(pmap, uaddr);
575219820Sjeff	if (paddr == 0) {
576219820Sjeff		ret = -EFAULT;
577219820Sjeff		goto out;
578219820Sjeff	}
579219820Sjeff	m = PHYS_TO_VM_PAGE(paddr);
580219820Sjeff
581219820Sjeff	sg_set_page(&db_tab->page[i].mem, m, MTHCA_ICM_PAGE_SIZE,
582219820Sjeff			uaddr & ~PAGE_MASK);
583219820Sjeff
584219820Sjeff	ret = pci_map_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
585219820Sjeff	if (ret < 0)
586219820Sjeff		goto out;
587219820Sjeff
588219820Sjeff	ret = mthca_MAP_ICM_page(dev, sg_dma_address(&db_tab->page[i].mem),
589219820Sjeff				 mthca_uarc_virt(dev, uar, i), &status);
590219820Sjeff	if (!ret && status)
591219820Sjeff		ret = -EINVAL;
592219820Sjeff	if (ret) {
593219820Sjeff		pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
594219820Sjeff		goto out;
595219820Sjeff	}
596219820Sjeff
597219820Sjeff	db_tab->page[i].uvirt    = uaddr;
598219820Sjeff	db_tab->page[i].refcount = 1;
599219820Sjeff
600219820Sjeffout:
601219820Sjeff	if (ret < 0 && start)
602219820Sjeff		vm_map_unwire(&curthread->td_proc->p_vmspace->vm_map,
603219820Sjeff		    start, start + PAGE_SIZE,
604219820Sjeff		    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
605219820Sjeff	mutex_unlock(&db_tab->mutex);
606219820Sjeff	return ret;
607219820Sjeff#endif
608219820Sjeff}
609219820Sjeff
610219820Sjeffvoid mthca_unmap_user_db(struct mthca_dev *dev, struct mthca_uar *uar,
611219820Sjeff			 struct mthca_user_db_table *db_tab, int index)
612219820Sjeff{
613219820Sjeff	if (!mthca_is_memfree(dev))
614219820Sjeff		return;
615219820Sjeff
616219820Sjeff	/*
617219820Sjeff	 * To make our bookkeeping simpler, we don't unmap DB
618219820Sjeff	 * pages until we clean up the whole db table.
619219820Sjeff	 */
620219820Sjeff
621219820Sjeff	mutex_lock(&db_tab->mutex);
622219820Sjeff
623219820Sjeff	--db_tab->page[index / MTHCA_DB_REC_PER_PAGE].refcount;
624219820Sjeff
625219820Sjeff	mutex_unlock(&db_tab->mutex);
626219820Sjeff}
627219820Sjeff
628219820Sjeffstruct mthca_user_db_table *mthca_init_user_db_tab(struct mthca_dev *dev)
629219820Sjeff{
630219820Sjeff	struct mthca_user_db_table *db_tab;
631219820Sjeff	int npages;
632219820Sjeff	int i;
633219820Sjeff
634219820Sjeff	if (!mthca_is_memfree(dev))
635219820Sjeff		return NULL;
636219820Sjeff
637219820Sjeff	npages = dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE;
638219820Sjeff	db_tab = kmalloc(sizeof *db_tab + npages * sizeof *db_tab->page, GFP_KERNEL);
639219820Sjeff	if (!db_tab)
640219820Sjeff		return ERR_PTR(-ENOMEM);
641219820Sjeff
642219820Sjeff	mutex_init(&db_tab->mutex);
643219820Sjeff	for (i = 0; i < npages; ++i) {
644219820Sjeff		db_tab->page[i].refcount = 0;
645219820Sjeff		db_tab->page[i].uvirt    = 0;
646219820Sjeff		sg_init_table(&db_tab->page[i].mem, 1);
647219820Sjeff	}
648219820Sjeff
649219820Sjeff	return db_tab;
650219820Sjeff}
651219820Sjeff
652219820Sjeffvoid mthca_cleanup_user_db_tab(struct mthca_dev *dev, struct mthca_uar *uar,
653219820Sjeff			       struct mthca_user_db_table *db_tab)
654219820Sjeff{
655219820Sjeff	int i;
656219820Sjeff	u8 status;
657219820Sjeff
658219820Sjeff	if (!mthca_is_memfree(dev))
659219820Sjeff		return;
660219820Sjeff
661219820Sjeff	for (i = 0; i < dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE; ++i) {
662219820Sjeff		if (db_tab->page[i].uvirt) {
663219820Sjeff			mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, uar, i), 1, &status);
664219820Sjeff			pci_unmap_sg(dev->pdev, &db_tab->page[i].mem, 1, PCI_DMA_TODEVICE);
665219820Sjeff#ifdef __linux__
666219820Sjeff			put_page(sg_page(&db_tab->page[i].mem));
667219820Sjeff#else
668219820Sjeff			vm_offset_t start;
669219820Sjeff
670219820Sjeff			start = db_tab->page[i].uvirt & PAGE_MASK;
671219820Sjeff			vm_map_unwire(&curthread->td_proc->p_vmspace->vm_map,
672219820Sjeff			    start, start + PAGE_SIZE,
673219820Sjeff			    VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
674219820Sjeff#endif
675219820Sjeff		}
676219820Sjeff	}
677219820Sjeff
678219820Sjeff	kfree(db_tab);
679219820Sjeff}
680219820Sjeff
681219820Sjeffint mthca_alloc_db(struct mthca_dev *dev, enum mthca_db_type type,
682219820Sjeff		   u32 qn, __be32 **db)
683219820Sjeff{
684219820Sjeff	int group;
685219820Sjeff	int start, end, dir;
686219820Sjeff	int i, j;
687219820Sjeff	struct mthca_db_page *page;
688219820Sjeff	int ret = 0;
689219820Sjeff	u8 status;
690219820Sjeff
691219820Sjeff	mutex_lock(&dev->db_tab->mutex);
692219820Sjeff
693219820Sjeff	switch (type) {
694219820Sjeff	case MTHCA_DB_TYPE_CQ_ARM:
695219820Sjeff	case MTHCA_DB_TYPE_SQ:
696219820Sjeff		group = 0;
697219820Sjeff		start = 0;
698219820Sjeff		end   = dev->db_tab->max_group1;
699219820Sjeff		dir   = 1;
700219820Sjeff		break;
701219820Sjeff
702219820Sjeff	case MTHCA_DB_TYPE_CQ_SET_CI:
703219820Sjeff	case MTHCA_DB_TYPE_RQ:
704219820Sjeff	case MTHCA_DB_TYPE_SRQ:
705219820Sjeff		group = 1;
706219820Sjeff		start = dev->db_tab->npages - 1;
707219820Sjeff		end   = dev->db_tab->min_group2;
708219820Sjeff		dir   = -1;
709219820Sjeff		break;
710219820Sjeff
711219820Sjeff	default:
712219820Sjeff		ret = -EINVAL;
713219820Sjeff		goto out;
714219820Sjeff	}
715219820Sjeff
716219820Sjeff	for (i = start; i != end; i += dir)
717219820Sjeff		if (dev->db_tab->page[i].db_rec &&
718219820Sjeff		    !bitmap_full(dev->db_tab->page[i].used,
719219820Sjeff				 MTHCA_DB_REC_PER_PAGE)) {
720219820Sjeff			page = dev->db_tab->page + i;
721219820Sjeff			goto found;
722219820Sjeff		}
723219820Sjeff
724219820Sjeff	for (i = start; i != end; i += dir)
725219820Sjeff		if (!dev->db_tab->page[i].db_rec) {
726219820Sjeff			page = dev->db_tab->page + i;
727219820Sjeff			goto alloc;
728219820Sjeff		}
729219820Sjeff
730219820Sjeff	if (dev->db_tab->max_group1 >= dev->db_tab->min_group2 - 1) {
731219820Sjeff		ret = -ENOMEM;
732219820Sjeff		goto out;
733219820Sjeff	}
734219820Sjeff
735219820Sjeff	if (group == 0)
736219820Sjeff		++dev->db_tab->max_group1;
737219820Sjeff	else
738219820Sjeff		--dev->db_tab->min_group2;
739219820Sjeff
740219820Sjeff	page = dev->db_tab->page + end;
741219820Sjeff
742219820Sjeffalloc:
743219820Sjeff	page->db_rec = dma_alloc_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
744219820Sjeff					  &page->mapping, GFP_KERNEL);
745219820Sjeff	if (!page->db_rec) {
746219820Sjeff		ret = -ENOMEM;
747219820Sjeff		goto out;
748219820Sjeff	}
749219820Sjeff	memset(page->db_rec, 0, MTHCA_ICM_PAGE_SIZE);
750219820Sjeff
751219820Sjeff	ret = mthca_MAP_ICM_page(dev, page->mapping,
752219820Sjeff				 mthca_uarc_virt(dev, &dev->driver_uar, i), &status);
753219820Sjeff	if (!ret && status)
754219820Sjeff		ret = -EINVAL;
755219820Sjeff	if (ret) {
756219820Sjeff		dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
757219820Sjeff				  page->db_rec, page->mapping);
758219820Sjeff		goto out;
759219820Sjeff	}
760219820Sjeff
761219820Sjeff	bitmap_zero(page->used, MTHCA_DB_REC_PER_PAGE);
762219820Sjeff
763219820Sjefffound:
764219820Sjeff	j = find_first_zero_bit(page->used, MTHCA_DB_REC_PER_PAGE);
765219820Sjeff	set_bit(j, page->used);
766219820Sjeff
767219820Sjeff	if (group == 1)
768219820Sjeff		j = MTHCA_DB_REC_PER_PAGE - 1 - j;
769219820Sjeff
770219820Sjeff	ret = i * MTHCA_DB_REC_PER_PAGE + j;
771219820Sjeff
772219820Sjeff	page->db_rec[j] = cpu_to_be64((qn << 8) | (type << 5));
773219820Sjeff
774219820Sjeff	*db = (__be32 *) &page->db_rec[j];
775219820Sjeff
776219820Sjeffout:
777219820Sjeff	mutex_unlock(&dev->db_tab->mutex);
778219820Sjeff
779219820Sjeff	return ret;
780219820Sjeff}
781219820Sjeff
782219820Sjeffvoid mthca_free_db(struct mthca_dev *dev, int type, int db_index)
783219820Sjeff{
784219820Sjeff	int i, j;
785219820Sjeff	struct mthca_db_page *page;
786219820Sjeff	u8 status;
787219820Sjeff
788219820Sjeff	i = db_index / MTHCA_DB_REC_PER_PAGE;
789219820Sjeff	j = db_index % MTHCA_DB_REC_PER_PAGE;
790219820Sjeff
791219820Sjeff	page = dev->db_tab->page + i;
792219820Sjeff
793219820Sjeff	mutex_lock(&dev->db_tab->mutex);
794219820Sjeff
795219820Sjeff	page->db_rec[j] = 0;
796219820Sjeff	if (i >= dev->db_tab->min_group2)
797219820Sjeff		j = MTHCA_DB_REC_PER_PAGE - 1 - j;
798219820Sjeff	clear_bit(j, page->used);
799219820Sjeff
800219820Sjeff	if (bitmap_empty(page->used, MTHCA_DB_REC_PER_PAGE) &&
801219820Sjeff	    i >= dev->db_tab->max_group1 - 1) {
802219820Sjeff		mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);
803219820Sjeff
804219820Sjeff		dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
805219820Sjeff				  page->db_rec, page->mapping);
806219820Sjeff		page->db_rec = NULL;
807219820Sjeff
808219820Sjeff		if (i == dev->db_tab->max_group1) {
809219820Sjeff			--dev->db_tab->max_group1;
810219820Sjeff			/* XXX may be able to unmap more pages now */
811219820Sjeff		}
812219820Sjeff		if (i == dev->db_tab->min_group2)
813219820Sjeff			++dev->db_tab->min_group2;
814219820Sjeff	}
815219820Sjeff
816219820Sjeff	mutex_unlock(&dev->db_tab->mutex);
817219820Sjeff}
818219820Sjeff
819219820Sjeffint mthca_init_db_tab(struct mthca_dev *dev)
820219820Sjeff{
821219820Sjeff	int i;
822219820Sjeff
823219820Sjeff	if (!mthca_is_memfree(dev))
824219820Sjeff		return 0;
825219820Sjeff
826219820Sjeff	dev->db_tab = kmalloc(sizeof *dev->db_tab, GFP_KERNEL);
827219820Sjeff	if (!dev->db_tab)
828219820Sjeff		return -ENOMEM;
829219820Sjeff
830219820Sjeff	mutex_init(&dev->db_tab->mutex);
831219820Sjeff
832219820Sjeff	dev->db_tab->npages     = dev->uar_table.uarc_size / MTHCA_ICM_PAGE_SIZE;
833219820Sjeff	dev->db_tab->max_group1 = 0;
834219820Sjeff	dev->db_tab->min_group2 = dev->db_tab->npages - 1;
835219820Sjeff
836219820Sjeff	dev->db_tab->page = kmalloc(dev->db_tab->npages *
837219820Sjeff				    sizeof *dev->db_tab->page,
838219820Sjeff				    GFP_KERNEL);
839219820Sjeff	if (!dev->db_tab->page) {
840219820Sjeff		kfree(dev->db_tab);
841219820Sjeff		return -ENOMEM;
842219820Sjeff	}
843219820Sjeff
844219820Sjeff	for (i = 0; i < dev->db_tab->npages; ++i)
845219820Sjeff		dev->db_tab->page[i].db_rec = NULL;
846219820Sjeff
847219820Sjeff	return 0;
848219820Sjeff}
849219820Sjeff
850219820Sjeffvoid mthca_cleanup_db_tab(struct mthca_dev *dev)
851219820Sjeff{
852219820Sjeff	int i;
853219820Sjeff	u8 status;
854219820Sjeff
855219820Sjeff	if (!mthca_is_memfree(dev))
856219820Sjeff		return;
857219820Sjeff
858219820Sjeff	/*
859219820Sjeff	 * Because we don't always free our UARC pages when they
860219820Sjeff	 * become empty to make mthca_free_db() simpler we need to
861219820Sjeff	 * make a sweep through the doorbell pages and free any
862219820Sjeff	 * leftover pages now.
863219820Sjeff	 */
864219820Sjeff	for (i = 0; i < dev->db_tab->npages; ++i) {
865219820Sjeff		if (!dev->db_tab->page[i].db_rec)
866219820Sjeff			continue;
867219820Sjeff
868219820Sjeff		if (!bitmap_empty(dev->db_tab->page[i].used, MTHCA_DB_REC_PER_PAGE))
869219820Sjeff			mthca_warn(dev, "Kernel UARC page %d not empty\n", i);
870219820Sjeff
871219820Sjeff		mthca_UNMAP_ICM(dev, mthca_uarc_virt(dev, &dev->driver_uar, i), 1, &status);
872219820Sjeff
873219820Sjeff		dma_free_coherent(&dev->pdev->dev, MTHCA_ICM_PAGE_SIZE,
874219820Sjeff				  dev->db_tab->page[i].db_rec,
875219820Sjeff				  dev->db_tab->page[i].mapping);
876219820Sjeff	}
877219820Sjeff
878219820Sjeff	kfree(dev->db_tab->page);
879219820Sjeff	kfree(dev->db_tab);
880219820Sjeff}
881