1/*
2 * Linux driver for VMware's para-virtualized SCSI HBA.
3 *
4 * Copyright (C) 2008-2014, VMware, Inc. All Rights Reserved.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms of the GNU General Public License as published by the
8 * Free Software Foundation; version 2 of the License and no later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
13 * NON INFRINGEMENT.  See the GNU General Public License for more
14 * details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 */
21
22#include <linux/kernel.h>
23#include <linux/module.h>
24#include <linux/interrupt.h>
25#include <linux/slab.h>
26#include <linux/workqueue.h>
27#include <linux/pci.h>
28
29#include <scsi/scsi.h>
30#include <scsi/scsi_host.h>
31#include <scsi/scsi_cmnd.h>
32#include <scsi/scsi_device.h>
33#include <scsi/scsi_tcq.h>
34
35#include "vmw_pvscsi.h"
36
37#define PVSCSI_LINUX_DRIVER_DESC "VMware PVSCSI driver"
38
39MODULE_DESCRIPTION(PVSCSI_LINUX_DRIVER_DESC);
40MODULE_AUTHOR("VMware, Inc.");
41MODULE_LICENSE("GPL");
42MODULE_VERSION(PVSCSI_DRIVER_VERSION_STRING);
43
44#define PVSCSI_DEFAULT_NUM_PAGES_PER_RING	8
45#define PVSCSI_DEFAULT_NUM_PAGES_MSG_RING	1
46#define PVSCSI_DEFAULT_QUEUE_DEPTH		254
47#define SGL_SIZE				PAGE_SIZE
48
49struct pvscsi_sg_list {
50	struct PVSCSISGElement sge[PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT];
51};
52
53struct pvscsi_ctx {
54	/*
55	 * The index of the context in cmd_map serves as the context ID for a
56	 * 1-to-1 mapping completions back to requests.
57	 */
58	struct scsi_cmnd	*cmd;
59	struct pvscsi_sg_list	*sgl;
60	struct list_head	list;
61	dma_addr_t		dataPA;
62	dma_addr_t		sensePA;
63	dma_addr_t		sglPA;
64	struct completion	*abort_cmp;
65};
66
67struct pvscsi_adapter {
68	char				*mmioBase;
69	u8				rev;
70	bool				use_msg;
71	bool				use_req_threshold;
72
73	spinlock_t			hw_lock;
74
75	struct workqueue_struct		*workqueue;
76	struct work_struct		work;
77
78	struct PVSCSIRingReqDesc	*req_ring;
79	unsigned			req_pages;
80	unsigned			req_depth;
81	dma_addr_t			reqRingPA;
82
83	struct PVSCSIRingCmpDesc	*cmp_ring;
84	unsigned			cmp_pages;
85	dma_addr_t			cmpRingPA;
86
87	struct PVSCSIRingMsgDesc	*msg_ring;
88	unsigned			msg_pages;
89	dma_addr_t			msgRingPA;
90
91	struct PVSCSIRingsState		*rings_state;
92	dma_addr_t			ringStatePA;
93
94	struct pci_dev			*dev;
95	struct Scsi_Host		*host;
96
97	struct list_head		cmd_pool;
98	struct pvscsi_ctx		*cmd_map;
99};
100
101
102/* Command line parameters */
103static int pvscsi_ring_pages;
104static int pvscsi_msg_ring_pages = PVSCSI_DEFAULT_NUM_PAGES_MSG_RING;
105static int pvscsi_cmd_per_lun    = PVSCSI_DEFAULT_QUEUE_DEPTH;
106static bool pvscsi_disable_msi;
107static bool pvscsi_disable_msix;
108static bool pvscsi_use_msg       = true;
109static bool pvscsi_use_req_threshold = true;
110
111#define PVSCSI_RW (S_IRUSR | S_IWUSR)
112
113module_param_named(ring_pages, pvscsi_ring_pages, int, PVSCSI_RW);
114MODULE_PARM_DESC(ring_pages, "Number of pages per req/cmp ring - (default="
115		 __stringify(PVSCSI_DEFAULT_NUM_PAGES_PER_RING)
116		 "[up to 16 targets],"
117		 __stringify(PVSCSI_SETUP_RINGS_MAX_NUM_PAGES)
118		 "[for 16+ targets])");
119
120module_param_named(msg_ring_pages, pvscsi_msg_ring_pages, int, PVSCSI_RW);
121MODULE_PARM_DESC(msg_ring_pages, "Number of pages for the msg ring - (default="
122		 __stringify(PVSCSI_DEFAULT_NUM_PAGES_MSG_RING) ")");
123
124module_param_named(cmd_per_lun, pvscsi_cmd_per_lun, int, PVSCSI_RW);
125MODULE_PARM_DESC(cmd_per_lun, "Maximum commands per lun - (default="
126		 __stringify(PVSCSI_DEFAULT_QUEUE_DEPTH) ")");
127
128module_param_named(disable_msi, pvscsi_disable_msi, bool, PVSCSI_RW);
129MODULE_PARM_DESC(disable_msi, "Disable MSI use in driver - (default=0)");
130
131module_param_named(disable_msix, pvscsi_disable_msix, bool, PVSCSI_RW);
132MODULE_PARM_DESC(disable_msix, "Disable MSI-X use in driver - (default=0)");
133
134module_param_named(use_msg, pvscsi_use_msg, bool, PVSCSI_RW);
135MODULE_PARM_DESC(use_msg, "Use msg ring when available - (default=1)");
136
137module_param_named(use_req_threshold, pvscsi_use_req_threshold,
138		   bool, PVSCSI_RW);
139MODULE_PARM_DESC(use_req_threshold, "Use driver-based request coalescing if configured - (default=1)");
140
141static const struct pci_device_id pvscsi_pci_tbl[] = {
142	{ PCI_VDEVICE(VMWARE, PCI_DEVICE_ID_VMWARE_PVSCSI) },
143	{ 0 }
144};
145
146MODULE_DEVICE_TABLE(pci, pvscsi_pci_tbl);
147
148static struct device *
149pvscsi_dev(const struct pvscsi_adapter *adapter)
150{
151	return &(adapter->dev->dev);
152}
153
154static struct pvscsi_ctx *
155pvscsi_find_context(const struct pvscsi_adapter *adapter, struct scsi_cmnd *cmd)
156{
157	struct pvscsi_ctx *ctx, *end;
158
159	end = &adapter->cmd_map[adapter->req_depth];
160	for (ctx = adapter->cmd_map; ctx < end; ctx++)
161		if (ctx->cmd == cmd)
162			return ctx;
163
164	return NULL;
165}
166
167static struct pvscsi_ctx *
168pvscsi_acquire_context(struct pvscsi_adapter *adapter, struct scsi_cmnd *cmd)
169{
170	struct pvscsi_ctx *ctx;
171
172	if (list_empty(&adapter->cmd_pool))
173		return NULL;
174
175	ctx = list_first_entry(&adapter->cmd_pool, struct pvscsi_ctx, list);
176	ctx->cmd = cmd;
177	list_del(&ctx->list);
178
179	return ctx;
180}
181
182static void pvscsi_release_context(struct pvscsi_adapter *adapter,
183				   struct pvscsi_ctx *ctx)
184{
185	ctx->cmd = NULL;
186	ctx->abort_cmp = NULL;
187	list_add(&ctx->list, &adapter->cmd_pool);
188}
189
190/*
191 * Map a pvscsi_ctx struct to a context ID field value; we map to a simple
192 * non-zero integer. ctx always points to an entry in cmd_map array, hence
193 * the return value is always >=1.
194 */
195static u64 pvscsi_map_context(const struct pvscsi_adapter *adapter,
196			      const struct pvscsi_ctx *ctx)
197{
198	return ctx - adapter->cmd_map + 1;
199}
200
201static struct pvscsi_ctx *
202pvscsi_get_context(const struct pvscsi_adapter *adapter, u64 context)
203{
204	return &adapter->cmd_map[context - 1];
205}
206
207static void pvscsi_reg_write(const struct pvscsi_adapter *adapter,
208			     u32 offset, u32 val)
209{
210	writel(val, adapter->mmioBase + offset);
211}
212
213static u32 pvscsi_reg_read(const struct pvscsi_adapter *adapter, u32 offset)
214{
215	return readl(adapter->mmioBase + offset);
216}
217
218static u32 pvscsi_read_intr_status(const struct pvscsi_adapter *adapter)
219{
220	return pvscsi_reg_read(adapter, PVSCSI_REG_OFFSET_INTR_STATUS);
221}
222
223static void pvscsi_write_intr_status(const struct pvscsi_adapter *adapter,
224				     u32 val)
225{
226	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_INTR_STATUS, val);
227}
228
229static void pvscsi_unmask_intr(const struct pvscsi_adapter *adapter)
230{
231	u32 intr_bits;
232
233	intr_bits = PVSCSI_INTR_CMPL_MASK;
234	if (adapter->use_msg)
235		intr_bits |= PVSCSI_INTR_MSG_MASK;
236
237	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_INTR_MASK, intr_bits);
238}
239
240static void pvscsi_mask_intr(const struct pvscsi_adapter *adapter)
241{
242	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_INTR_MASK, 0);
243}
244
245static void pvscsi_write_cmd_desc(const struct pvscsi_adapter *adapter,
246				  u32 cmd, const void *desc, size_t len)
247{
248	const u32 *ptr = desc;
249	size_t i;
250
251	len /= sizeof(*ptr);
252	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_COMMAND, cmd);
253	for (i = 0; i < len; i++)
254		pvscsi_reg_write(adapter,
255				 PVSCSI_REG_OFFSET_COMMAND_DATA, ptr[i]);
256}
257
258static void pvscsi_abort_cmd(const struct pvscsi_adapter *adapter,
259			     const struct pvscsi_ctx *ctx)
260{
261	struct PVSCSICmdDescAbortCmd cmd = { 0 };
262
263	cmd.target = ctx->cmd->device->id;
264	cmd.context = pvscsi_map_context(adapter, ctx);
265
266	pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_ABORT_CMD, &cmd, sizeof(cmd));
267}
268
269static void pvscsi_kick_rw_io(const struct pvscsi_adapter *adapter)
270{
271	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_KICK_RW_IO, 0);
272}
273
274static void pvscsi_process_request_ring(const struct pvscsi_adapter *adapter)
275{
276	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_KICK_NON_RW_IO, 0);
277}
278
279static int scsi_is_rw(unsigned char op)
280{
281	return op == READ_6  || op == WRITE_6 ||
282	       op == READ_10 || op == WRITE_10 ||
283	       op == READ_12 || op == WRITE_12 ||
284	       op == READ_16 || op == WRITE_16;
285}
286
287static void pvscsi_kick_io(const struct pvscsi_adapter *adapter,
288			   unsigned char op)
289{
290	if (scsi_is_rw(op)) {
291		struct PVSCSIRingsState *s = adapter->rings_state;
292
293		if (!adapter->use_req_threshold ||
294		    s->reqProdIdx - s->reqConsIdx >= s->reqCallThreshold)
295			pvscsi_kick_rw_io(adapter);
296	} else {
297		pvscsi_process_request_ring(adapter);
298	}
299}
300
301static void ll_adapter_reset(const struct pvscsi_adapter *adapter)
302{
303	dev_dbg(pvscsi_dev(adapter), "Adapter Reset on %p\n", adapter);
304
305	pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_ADAPTER_RESET, NULL, 0);
306}
307
308static void ll_bus_reset(const struct pvscsi_adapter *adapter)
309{
310	dev_dbg(pvscsi_dev(adapter), "Resetting bus on %p\n", adapter);
311
312	pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_RESET_BUS, NULL, 0);
313}
314
315static void ll_device_reset(const struct pvscsi_adapter *adapter, u32 target)
316{
317	struct PVSCSICmdDescResetDevice cmd = { 0 };
318
319	dev_dbg(pvscsi_dev(adapter), "Resetting device: target=%u\n", target);
320
321	cmd.target = target;
322
323	pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_RESET_DEVICE,
324			      &cmd, sizeof(cmd));
325}
326
327static void pvscsi_create_sg(struct pvscsi_ctx *ctx,
328			     struct scatterlist *sg, unsigned count)
329{
330	unsigned i;
331	struct PVSCSISGElement *sge;
332
333	BUG_ON(count > PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT);
334
335	sge = &ctx->sgl->sge[0];
336	for (i = 0; i < count; i++, sg = sg_next(sg)) {
337		sge[i].addr   = sg_dma_address(sg);
338		sge[i].length = sg_dma_len(sg);
339		sge[i].flags  = 0;
340	}
341}
342
343/*
344 * Map all data buffers for a command into PCI space and
345 * setup the scatter/gather list if needed.
346 */
347static int pvscsi_map_buffers(struct pvscsi_adapter *adapter,
348			      struct pvscsi_ctx *ctx, struct scsi_cmnd *cmd,
349			      struct PVSCSIRingReqDesc *e)
350{
351	unsigned count;
352	unsigned bufflen = scsi_bufflen(cmd);
353	struct scatterlist *sg;
354
355	e->dataLen = bufflen;
356	e->dataAddr = 0;
357	if (bufflen == 0)
358		return 0;
359
360	sg = scsi_sglist(cmd);
361	count = scsi_sg_count(cmd);
362	if (count != 0) {
363		int segs = scsi_dma_map(cmd);
364
365		if (segs == -ENOMEM) {
366			scmd_printk(KERN_DEBUG, cmd,
367				    "vmw_pvscsi: Failed to map cmd sglist for DMA.\n");
368			return -ENOMEM;
369		} else if (segs > 1) {
370			pvscsi_create_sg(ctx, sg, segs);
371
372			e->flags |= PVSCSI_FLAG_CMD_WITH_SG_LIST;
373			ctx->sglPA = dma_map_single(&adapter->dev->dev,
374					ctx->sgl, SGL_SIZE, DMA_TO_DEVICE);
375			if (dma_mapping_error(&adapter->dev->dev, ctx->sglPA)) {
376				scmd_printk(KERN_ERR, cmd,
377					    "vmw_pvscsi: Failed to map ctx sglist for DMA.\n");
378				scsi_dma_unmap(cmd);
379				ctx->sglPA = 0;
380				return -ENOMEM;
381			}
382			e->dataAddr = ctx->sglPA;
383		} else
384			e->dataAddr = sg_dma_address(sg);
385	} else {
386		/*
387		 * In case there is no S/G list, scsi_sglist points
388		 * directly to the buffer.
389		 */
390		ctx->dataPA = dma_map_single(&adapter->dev->dev, sg, bufflen,
391					     cmd->sc_data_direction);
392		if (dma_mapping_error(&adapter->dev->dev, ctx->dataPA)) {
393			scmd_printk(KERN_DEBUG, cmd,
394				    "vmw_pvscsi: Failed to map direct data buffer for DMA.\n");
395			return -ENOMEM;
396		}
397		e->dataAddr = ctx->dataPA;
398	}
399
400	return 0;
401}
402
403/*
404 * The device incorrectly doesn't clear the first byte of the sense
405 * buffer in some cases. We have to do it ourselves.
406 * Otherwise we run into trouble when SWIOTLB is forced.
407 */
408static void pvscsi_patch_sense(struct scsi_cmnd *cmd)
409{
410	if (cmd->sense_buffer)
411		cmd->sense_buffer[0] = 0;
412}
413
414static void pvscsi_unmap_buffers(const struct pvscsi_adapter *adapter,
415				 struct pvscsi_ctx *ctx)
416{
417	struct scsi_cmnd *cmd;
418	unsigned bufflen;
419
420	cmd = ctx->cmd;
421	bufflen = scsi_bufflen(cmd);
422
423	if (bufflen != 0) {
424		unsigned count = scsi_sg_count(cmd);
425
426		if (count != 0) {
427			scsi_dma_unmap(cmd);
428			if (ctx->sglPA) {
429				dma_unmap_single(&adapter->dev->dev, ctx->sglPA,
430						 SGL_SIZE, DMA_TO_DEVICE);
431				ctx->sglPA = 0;
432			}
433		} else
434			dma_unmap_single(&adapter->dev->dev, ctx->dataPA,
435					 bufflen, cmd->sc_data_direction);
436	}
437	if (cmd->sense_buffer)
438		dma_unmap_single(&adapter->dev->dev, ctx->sensePA,
439				 SCSI_SENSE_BUFFERSIZE, DMA_FROM_DEVICE);
440}
441
442static int pvscsi_allocate_rings(struct pvscsi_adapter *adapter)
443{
444	adapter->rings_state = dma_alloc_coherent(&adapter->dev->dev, PAGE_SIZE,
445			&adapter->ringStatePA, GFP_KERNEL);
446	if (!adapter->rings_state)
447		return -ENOMEM;
448
449	adapter->req_pages = min(PVSCSI_MAX_NUM_PAGES_REQ_RING,
450				 pvscsi_ring_pages);
451	adapter->req_depth = adapter->req_pages
452					* PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
453	adapter->req_ring = dma_alloc_coherent(&adapter->dev->dev,
454			adapter->req_pages * PAGE_SIZE, &adapter->reqRingPA,
455			GFP_KERNEL);
456	if (!adapter->req_ring)
457		return -ENOMEM;
458
459	adapter->cmp_pages = min(PVSCSI_MAX_NUM_PAGES_CMP_RING,
460				 pvscsi_ring_pages);
461	adapter->cmp_ring = dma_alloc_coherent(&adapter->dev->dev,
462			adapter->cmp_pages * PAGE_SIZE, &adapter->cmpRingPA,
463			GFP_KERNEL);
464	if (!adapter->cmp_ring)
465		return -ENOMEM;
466
467	BUG_ON(!IS_ALIGNED(adapter->ringStatePA, PAGE_SIZE));
468	BUG_ON(!IS_ALIGNED(adapter->reqRingPA, PAGE_SIZE));
469	BUG_ON(!IS_ALIGNED(adapter->cmpRingPA, PAGE_SIZE));
470
471	if (!adapter->use_msg)
472		return 0;
473
474	adapter->msg_pages = min(PVSCSI_MAX_NUM_PAGES_MSG_RING,
475				 pvscsi_msg_ring_pages);
476	adapter->msg_ring = dma_alloc_coherent(&adapter->dev->dev,
477			adapter->msg_pages * PAGE_SIZE, &adapter->msgRingPA,
478			GFP_KERNEL);
479	if (!adapter->msg_ring)
480		return -ENOMEM;
481	BUG_ON(!IS_ALIGNED(adapter->msgRingPA, PAGE_SIZE));
482
483	return 0;
484}
485
486static void pvscsi_setup_all_rings(const struct pvscsi_adapter *adapter)
487{
488	struct PVSCSICmdDescSetupRings cmd = { 0 };
489	dma_addr_t base;
490	unsigned i;
491
492	cmd.ringsStatePPN   = adapter->ringStatePA >> PAGE_SHIFT;
493	cmd.reqRingNumPages = adapter->req_pages;
494	cmd.cmpRingNumPages = adapter->cmp_pages;
495
496	base = adapter->reqRingPA;
497	for (i = 0; i < adapter->req_pages; i++) {
498		cmd.reqRingPPNs[i] = base >> PAGE_SHIFT;
499		base += PAGE_SIZE;
500	}
501
502	base = adapter->cmpRingPA;
503	for (i = 0; i < adapter->cmp_pages; i++) {
504		cmd.cmpRingPPNs[i] = base >> PAGE_SHIFT;
505		base += PAGE_SIZE;
506	}
507
508	memset(adapter->rings_state, 0, PAGE_SIZE);
509	memset(adapter->req_ring, 0, adapter->req_pages * PAGE_SIZE);
510	memset(adapter->cmp_ring, 0, adapter->cmp_pages * PAGE_SIZE);
511
512	pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_SETUP_RINGS,
513			      &cmd, sizeof(cmd));
514
515	if (adapter->use_msg) {
516		struct PVSCSICmdDescSetupMsgRing cmd_msg = { 0 };
517
518		cmd_msg.numPages = adapter->msg_pages;
519
520		base = adapter->msgRingPA;
521		for (i = 0; i < adapter->msg_pages; i++) {
522			cmd_msg.ringPPNs[i] = base >> PAGE_SHIFT;
523			base += PAGE_SIZE;
524		}
525		memset(adapter->msg_ring, 0, adapter->msg_pages * PAGE_SIZE);
526
527		pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_SETUP_MSG_RING,
528				      &cmd_msg, sizeof(cmd_msg));
529	}
530}
531
532static int pvscsi_change_queue_depth(struct scsi_device *sdev, int qdepth)
533{
534	if (!sdev->tagged_supported)
535		qdepth = 1;
536	return scsi_change_queue_depth(sdev, qdepth);
537}
538
539/*
540 * Pull a completion descriptor off and pass the completion back
541 * to the SCSI mid layer.
542 */
543static void pvscsi_complete_request(struct pvscsi_adapter *adapter,
544				    const struct PVSCSIRingCmpDesc *e)
545{
546	struct pvscsi_ctx *ctx;
547	struct scsi_cmnd *cmd;
548	struct completion *abort_cmp;
549	u32 btstat = e->hostStatus;
550	u32 sdstat = e->scsiStatus;
551
552	ctx = pvscsi_get_context(adapter, e->context);
553	cmd = ctx->cmd;
554	abort_cmp = ctx->abort_cmp;
555	pvscsi_unmap_buffers(adapter, ctx);
556	if (sdstat != SAM_STAT_CHECK_CONDITION)
557		pvscsi_patch_sense(cmd);
558	pvscsi_release_context(adapter, ctx);
559	if (abort_cmp) {
560		/*
561		 * The command was requested to be aborted. Just signal that
562		 * the request completed and swallow the actual cmd completion
563		 * here. The abort handler will post a completion for this
564		 * command indicating that it got successfully aborted.
565		 */
566		complete(abort_cmp);
567		return;
568	}
569
570	cmd->result = 0;
571	if (sdstat != SAM_STAT_GOOD &&
572	    (btstat == BTSTAT_SUCCESS ||
573	     btstat == BTSTAT_LINKED_COMMAND_COMPLETED ||
574	     btstat == BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG)) {
575		if (sdstat == SAM_STAT_COMMAND_TERMINATED) {
576			cmd->result = (DID_RESET << 16);
577		} else {
578			cmd->result = (DID_OK << 16) | sdstat;
579		}
580	} else
581		switch (btstat) {
582		case BTSTAT_SUCCESS:
583		case BTSTAT_LINKED_COMMAND_COMPLETED:
584		case BTSTAT_LINKED_COMMAND_COMPLETED_WITH_FLAG:
585			/*
586			 * Commands like INQUIRY may transfer less data than
587			 * requested by the initiator via bufflen. Set residual
588			 * count to make upper layer aware of the actual amount
589			 * of data returned. There are cases when controller
590			 * returns zero dataLen with non zero data - do not set
591			 * residual count in that case.
592			 */
593			if (e->dataLen && (e->dataLen < scsi_bufflen(cmd)))
594				scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen);
595			cmd->result = (DID_OK << 16);
596			break;
597
598		case BTSTAT_DATARUN:
599		case BTSTAT_DATA_UNDERRUN:
600			/* Report residual data in underruns */
601			scsi_set_resid(cmd, scsi_bufflen(cmd) - e->dataLen);
602			cmd->result = (DID_ERROR << 16);
603			break;
604
605		case BTSTAT_SELTIMEO:
606			/* Our emulation returns this for non-connected devs */
607			cmd->result = (DID_BAD_TARGET << 16);
608			break;
609
610		case BTSTAT_LUNMISMATCH:
611		case BTSTAT_TAGREJECT:
612		case BTSTAT_BADMSG:
613		case BTSTAT_HAHARDWARE:
614		case BTSTAT_INVPHASE:
615		case BTSTAT_HATIMEOUT:
616		case BTSTAT_NORESPONSE:
617		case BTSTAT_DISCONNECT:
618		case BTSTAT_HASOFTWARE:
619		case BTSTAT_BUSFREE:
620		case BTSTAT_SENSFAILED:
621			cmd->result |= (DID_ERROR << 16);
622			break;
623
624		case BTSTAT_SENTRST:
625		case BTSTAT_RECVRST:
626		case BTSTAT_BUSRESET:
627			cmd->result = (DID_RESET << 16);
628			break;
629
630		case BTSTAT_ABORTQUEUE:
631			cmd->result = (DID_BUS_BUSY << 16);
632			break;
633
634		case BTSTAT_SCSIPARITY:
635			cmd->result = (DID_PARITY << 16);
636			break;
637
638		default:
639			cmd->result = (DID_ERROR << 16);
640			scmd_printk(KERN_DEBUG, cmd,
641				    "Unknown completion status: 0x%x\n",
642				    btstat);
643	}
644
645	dev_dbg(&cmd->device->sdev_gendev,
646		"cmd=%p %x ctx=%p result=0x%x status=0x%x,%x\n",
647		cmd, cmd->cmnd[0], ctx, cmd->result, btstat, sdstat);
648
649	scsi_done(cmd);
650}
651
652/*
653 * barrier usage : Since the PVSCSI device is emulated, there could be cases
654 * where we may want to serialize some accesses between the driver and the
655 * emulation layer. We use compiler barriers instead of the more expensive
656 * memory barriers because PVSCSI is only supported on X86 which has strong
657 * memory access ordering.
658 */
659static void pvscsi_process_completion_ring(struct pvscsi_adapter *adapter)
660{
661	struct PVSCSIRingsState *s = adapter->rings_state;
662	struct PVSCSIRingCmpDesc *ring = adapter->cmp_ring;
663	u32 cmp_entries = s->cmpNumEntriesLog2;
664
665	while (s->cmpConsIdx != s->cmpProdIdx) {
666		struct PVSCSIRingCmpDesc *e = ring + (s->cmpConsIdx &
667						      MASK(cmp_entries));
668		/*
669		 * This barrier() ensures that *e is not dereferenced while
670		 * the device emulation still writes data into the slot.
671		 * Since the device emulation advances s->cmpProdIdx only after
672		 * updating the slot we want to check it first.
673		 */
674		barrier();
675		pvscsi_complete_request(adapter, e);
676		/*
677		 * This barrier() ensures that compiler doesn't reorder write
678		 * to s->cmpConsIdx before the read of (*e) inside
679		 * pvscsi_complete_request. Otherwise, device emulation may
680		 * overwrite *e before we had a chance to read it.
681		 */
682		barrier();
683		s->cmpConsIdx++;
684	}
685}
686
687/*
688 * Translate a Linux SCSI request into a request ring entry.
689 */
690static int pvscsi_queue_ring(struct pvscsi_adapter *adapter,
691			     struct pvscsi_ctx *ctx, struct scsi_cmnd *cmd)
692{
693	struct PVSCSIRingsState *s;
694	struct PVSCSIRingReqDesc *e;
695	struct scsi_device *sdev;
696	u32 req_entries;
697
698	s = adapter->rings_state;
699	sdev = cmd->device;
700	req_entries = s->reqNumEntriesLog2;
701
702	/*
703	 * If this condition holds, we might have room on the request ring, but
704	 * we might not have room on the completion ring for the response.
705	 * However, we have already ruled out this possibility - we would not
706	 * have successfully allocated a context if it were true, since we only
707	 * have one context per request entry.  Check for it anyway, since it
708	 * would be a serious bug.
709	 */
710	if (s->reqProdIdx - s->cmpConsIdx >= 1 << req_entries) {
711		scmd_printk(KERN_ERR, cmd, "vmw_pvscsi: "
712			    "ring full: reqProdIdx=%d cmpConsIdx=%d\n",
713			    s->reqProdIdx, s->cmpConsIdx);
714		return -1;
715	}
716
717	e = adapter->req_ring + (s->reqProdIdx & MASK(req_entries));
718
719	e->bus    = sdev->channel;
720	e->target = sdev->id;
721	memset(e->lun, 0, sizeof(e->lun));
722	e->lun[1] = sdev->lun;
723
724	if (cmd->sense_buffer) {
725		ctx->sensePA = dma_map_single(&adapter->dev->dev,
726				cmd->sense_buffer, SCSI_SENSE_BUFFERSIZE,
727				DMA_FROM_DEVICE);
728		if (dma_mapping_error(&adapter->dev->dev, ctx->sensePA)) {
729			scmd_printk(KERN_DEBUG, cmd,
730				    "vmw_pvscsi: Failed to map sense buffer for DMA.\n");
731			ctx->sensePA = 0;
732			return -ENOMEM;
733		}
734		e->senseAddr = ctx->sensePA;
735		e->senseLen = SCSI_SENSE_BUFFERSIZE;
736	} else {
737		e->senseLen  = 0;
738		e->senseAddr = 0;
739	}
740	e->cdbLen   = cmd->cmd_len;
741	e->vcpuHint = smp_processor_id();
742	memcpy(e->cdb, cmd->cmnd, e->cdbLen);
743
744	e->tag = SIMPLE_QUEUE_TAG;
745
746	if (cmd->sc_data_direction == DMA_FROM_DEVICE)
747		e->flags = PVSCSI_FLAG_CMD_DIR_TOHOST;
748	else if (cmd->sc_data_direction == DMA_TO_DEVICE)
749		e->flags = PVSCSI_FLAG_CMD_DIR_TODEVICE;
750	else if (cmd->sc_data_direction == DMA_NONE)
751		e->flags = PVSCSI_FLAG_CMD_DIR_NONE;
752	else
753		e->flags = 0;
754
755	if (pvscsi_map_buffers(adapter, ctx, cmd, e) != 0) {
756		if (cmd->sense_buffer) {
757			dma_unmap_single(&adapter->dev->dev, ctx->sensePA,
758					 SCSI_SENSE_BUFFERSIZE,
759					 DMA_FROM_DEVICE);
760			ctx->sensePA = 0;
761		}
762		return -ENOMEM;
763	}
764
765	e->context = pvscsi_map_context(adapter, ctx);
766
767	barrier();
768
769	s->reqProdIdx++;
770
771	return 0;
772}
773
774static int pvscsi_queue_lck(struct scsi_cmnd *cmd)
775{
776	struct Scsi_Host *host = cmd->device->host;
777	struct pvscsi_adapter *adapter = shost_priv(host);
778	struct pvscsi_ctx *ctx;
779	unsigned long flags;
780	unsigned char op;
781
782	spin_lock_irqsave(&adapter->hw_lock, flags);
783
784	ctx = pvscsi_acquire_context(adapter, cmd);
785	if (!ctx || pvscsi_queue_ring(adapter, ctx, cmd) != 0) {
786		if (ctx)
787			pvscsi_release_context(adapter, ctx);
788		spin_unlock_irqrestore(&adapter->hw_lock, flags);
789		return SCSI_MLQUEUE_HOST_BUSY;
790	}
791
792	op = cmd->cmnd[0];
793
794	dev_dbg(&cmd->device->sdev_gendev,
795		"queued cmd %p, ctx %p, op=%x\n", cmd, ctx, op);
796
797	spin_unlock_irqrestore(&adapter->hw_lock, flags);
798
799	pvscsi_kick_io(adapter, op);
800
801	return 0;
802}
803
804static DEF_SCSI_QCMD(pvscsi_queue)
805
806static int pvscsi_abort(struct scsi_cmnd *cmd)
807{
808	struct pvscsi_adapter *adapter = shost_priv(cmd->device->host);
809	struct pvscsi_ctx *ctx;
810	unsigned long flags;
811	int result = SUCCESS;
812	DECLARE_COMPLETION_ONSTACK(abort_cmp);
813	int done;
814
815	scmd_printk(KERN_DEBUG, cmd, "task abort on host %u, %p\n",
816		    adapter->host->host_no, cmd);
817
818	spin_lock_irqsave(&adapter->hw_lock, flags);
819
820	/*
821	 * Poll the completion ring first - we might be trying to abort
822	 * a command that is waiting to be dispatched in the completion ring.
823	 */
824	pvscsi_process_completion_ring(adapter);
825
826	/*
827	 * If there is no context for the command, it either already succeeded
828	 * or else was never properly issued.  Not our problem.
829	 */
830	ctx = pvscsi_find_context(adapter, cmd);
831	if (!ctx) {
832		scmd_printk(KERN_DEBUG, cmd, "Failed to abort cmd %p\n", cmd);
833		goto out;
834	}
835
836	/*
837	 * Mark that the command has been requested to be aborted and issue
838	 * the abort.
839	 */
840	ctx->abort_cmp = &abort_cmp;
841
842	pvscsi_abort_cmd(adapter, ctx);
843	spin_unlock_irqrestore(&adapter->hw_lock, flags);
844	/* Wait for 2 secs for the completion. */
845	done = wait_for_completion_timeout(&abort_cmp, msecs_to_jiffies(2000));
846	spin_lock_irqsave(&adapter->hw_lock, flags);
847
848	if (!done) {
849		/*
850		 * Failed to abort the command, unmark the fact that it
851		 * was requested to be aborted.
852		 */
853		ctx->abort_cmp = NULL;
854		result = FAILED;
855		scmd_printk(KERN_DEBUG, cmd,
856			    "Failed to get completion for aborted cmd %p\n",
857			    cmd);
858		goto out;
859	}
860
861	/*
862	 * Successfully aborted the command.
863	 */
864	cmd->result = (DID_ABORT << 16);
865	scsi_done(cmd);
866
867out:
868	spin_unlock_irqrestore(&adapter->hw_lock, flags);
869	return result;
870}
871
872/*
873 * Abort all outstanding requests.  This is only safe to use if the completion
874 * ring will never be walked again or the device has been reset, because it
875 * destroys the 1-1 mapping between context field passed to emulation and our
876 * request structure.
877 */
878static void pvscsi_reset_all(struct pvscsi_adapter *adapter)
879{
880	unsigned i;
881
882	for (i = 0; i < adapter->req_depth; i++) {
883		struct pvscsi_ctx *ctx = &adapter->cmd_map[i];
884		struct scsi_cmnd *cmd = ctx->cmd;
885		if (cmd) {
886			scmd_printk(KERN_ERR, cmd,
887				    "Forced reset on cmd %p\n", cmd);
888			pvscsi_unmap_buffers(adapter, ctx);
889			pvscsi_patch_sense(cmd);
890			pvscsi_release_context(adapter, ctx);
891			cmd->result = (DID_RESET << 16);
892			scsi_done(cmd);
893		}
894	}
895}
896
897static int pvscsi_host_reset(struct scsi_cmnd *cmd)
898{
899	struct Scsi_Host *host = cmd->device->host;
900	struct pvscsi_adapter *adapter = shost_priv(host);
901	unsigned long flags;
902	bool use_msg;
903
904	scmd_printk(KERN_INFO, cmd, "SCSI Host reset\n");
905
906	spin_lock_irqsave(&adapter->hw_lock, flags);
907
908	use_msg = adapter->use_msg;
909
910	if (use_msg) {
911		adapter->use_msg = false;
912		spin_unlock_irqrestore(&adapter->hw_lock, flags);
913
914		/*
915		 * Now that we know that the ISR won't add more work on the
916		 * workqueue we can safely flush any outstanding work.
917		 */
918		flush_workqueue(adapter->workqueue);
919		spin_lock_irqsave(&adapter->hw_lock, flags);
920	}
921
922	/*
923	 * We're going to tear down the entire ring structure and set it back
924	 * up, so stalling new requests until all completions are flushed and
925	 * the rings are back in place.
926	 */
927
928	pvscsi_process_request_ring(adapter);
929
930	ll_adapter_reset(adapter);
931
932	/*
933	 * Now process any completions.  Note we do this AFTER adapter reset,
934	 * which is strange, but stops races where completions get posted
935	 * between processing the ring and issuing the reset.  The backend will
936	 * not touch the ring memory after reset, so the immediately pre-reset
937	 * completion ring state is still valid.
938	 */
939	pvscsi_process_completion_ring(adapter);
940
941	pvscsi_reset_all(adapter);
942	adapter->use_msg = use_msg;
943	pvscsi_setup_all_rings(adapter);
944	pvscsi_unmask_intr(adapter);
945
946	spin_unlock_irqrestore(&adapter->hw_lock, flags);
947
948	return SUCCESS;
949}
950
951static int pvscsi_bus_reset(struct scsi_cmnd *cmd)
952{
953	struct Scsi_Host *host = cmd->device->host;
954	struct pvscsi_adapter *adapter = shost_priv(host);
955	unsigned long flags;
956
957	scmd_printk(KERN_INFO, cmd, "SCSI Bus reset\n");
958
959	/*
960	 * We don't want to queue new requests for this bus after
961	 * flushing all pending requests to emulation, since new
962	 * requests could then sneak in during this bus reset phase,
963	 * so take the lock now.
964	 */
965	spin_lock_irqsave(&adapter->hw_lock, flags);
966
967	pvscsi_process_request_ring(adapter);
968	ll_bus_reset(adapter);
969	pvscsi_process_completion_ring(adapter);
970
971	spin_unlock_irqrestore(&adapter->hw_lock, flags);
972
973	return SUCCESS;
974}
975
976static int pvscsi_device_reset(struct scsi_cmnd *cmd)
977{
978	struct Scsi_Host *host = cmd->device->host;
979	struct pvscsi_adapter *adapter = shost_priv(host);
980	unsigned long flags;
981
982	scmd_printk(KERN_INFO, cmd, "SCSI device reset on scsi%u:%u\n",
983		    host->host_no, cmd->device->id);
984
985	/*
986	 * We don't want to queue new requests for this device after flushing
987	 * all pending requests to emulation, since new requests could then
988	 * sneak in during this device reset phase, so take the lock now.
989	 */
990	spin_lock_irqsave(&adapter->hw_lock, flags);
991
992	pvscsi_process_request_ring(adapter);
993	ll_device_reset(adapter, cmd->device->id);
994	pvscsi_process_completion_ring(adapter);
995
996	spin_unlock_irqrestore(&adapter->hw_lock, flags);
997
998	return SUCCESS;
999}
1000
1001static struct scsi_host_template pvscsi_template;
1002
1003static const char *pvscsi_info(struct Scsi_Host *host)
1004{
1005	struct pvscsi_adapter *adapter = shost_priv(host);
1006	static char buf[256];
1007
1008	sprintf(buf, "VMware PVSCSI storage adapter rev %d, req/cmp/msg rings: "
1009		"%u/%u/%u pages, cmd_per_lun=%u", adapter->rev,
1010		adapter->req_pages, adapter->cmp_pages, adapter->msg_pages,
1011		pvscsi_template.cmd_per_lun);
1012
1013	return buf;
1014}
1015
1016static struct scsi_host_template pvscsi_template = {
1017	.module				= THIS_MODULE,
1018	.name				= "VMware PVSCSI Host Adapter",
1019	.proc_name			= "vmw_pvscsi",
1020	.info				= pvscsi_info,
1021	.queuecommand			= pvscsi_queue,
1022	.this_id			= -1,
1023	.sg_tablesize			= PVSCSI_MAX_NUM_SG_ENTRIES_PER_SEGMENT,
1024	.dma_boundary			= UINT_MAX,
1025	.max_sectors			= 0xffff,
1026	.change_queue_depth		= pvscsi_change_queue_depth,
1027	.eh_abort_handler		= pvscsi_abort,
1028	.eh_device_reset_handler	= pvscsi_device_reset,
1029	.eh_bus_reset_handler		= pvscsi_bus_reset,
1030	.eh_host_reset_handler		= pvscsi_host_reset,
1031};
1032
1033static void pvscsi_process_msg(const struct pvscsi_adapter *adapter,
1034			       const struct PVSCSIRingMsgDesc *e)
1035{
1036	struct PVSCSIRingsState *s = adapter->rings_state;
1037	struct Scsi_Host *host = adapter->host;
1038	struct scsi_device *sdev;
1039
1040	printk(KERN_INFO "vmw_pvscsi: msg type: 0x%x - MSG RING: %u/%u (%u) \n",
1041	       e->type, s->msgProdIdx, s->msgConsIdx, s->msgNumEntriesLog2);
1042
1043	BUILD_BUG_ON(PVSCSI_MSG_LAST != 2);
1044
1045	if (e->type == PVSCSI_MSG_DEV_ADDED) {
1046		struct PVSCSIMsgDescDevStatusChanged *desc;
1047		desc = (struct PVSCSIMsgDescDevStatusChanged *)e;
1048
1049		printk(KERN_INFO
1050		       "vmw_pvscsi: msg: device added at scsi%u:%u:%u\n",
1051		       desc->bus, desc->target, desc->lun[1]);
1052
1053		if (!scsi_host_get(host))
1054			return;
1055
1056		sdev = scsi_device_lookup(host, desc->bus, desc->target,
1057					  desc->lun[1]);
1058		if (sdev) {
1059			printk(KERN_INFO "vmw_pvscsi: device already exists\n");
1060			scsi_device_put(sdev);
1061		} else
1062			scsi_add_device(adapter->host, desc->bus,
1063					desc->target, desc->lun[1]);
1064
1065		scsi_host_put(host);
1066	} else if (e->type == PVSCSI_MSG_DEV_REMOVED) {
1067		struct PVSCSIMsgDescDevStatusChanged *desc;
1068		desc = (struct PVSCSIMsgDescDevStatusChanged *)e;
1069
1070		printk(KERN_INFO
1071		       "vmw_pvscsi: msg: device removed at scsi%u:%u:%u\n",
1072		       desc->bus, desc->target, desc->lun[1]);
1073
1074		if (!scsi_host_get(host))
1075			return;
1076
1077		sdev = scsi_device_lookup(host, desc->bus, desc->target,
1078					  desc->lun[1]);
1079		if (sdev) {
1080			scsi_remove_device(sdev);
1081			scsi_device_put(sdev);
1082		} else
1083			printk(KERN_INFO
1084			       "vmw_pvscsi: failed to lookup scsi%u:%u:%u\n",
1085			       desc->bus, desc->target, desc->lun[1]);
1086
1087		scsi_host_put(host);
1088	}
1089}
1090
1091static int pvscsi_msg_pending(const struct pvscsi_adapter *adapter)
1092{
1093	struct PVSCSIRingsState *s = adapter->rings_state;
1094
1095	return s->msgProdIdx != s->msgConsIdx;
1096}
1097
1098static void pvscsi_process_msg_ring(const struct pvscsi_adapter *adapter)
1099{
1100	struct PVSCSIRingsState *s = adapter->rings_state;
1101	struct PVSCSIRingMsgDesc *ring = adapter->msg_ring;
1102	u32 msg_entries = s->msgNumEntriesLog2;
1103
1104	while (pvscsi_msg_pending(adapter)) {
1105		struct PVSCSIRingMsgDesc *e = ring + (s->msgConsIdx &
1106						      MASK(msg_entries));
1107
1108		barrier();
1109		pvscsi_process_msg(adapter, e);
1110		barrier();
1111		s->msgConsIdx++;
1112	}
1113}
1114
1115static void pvscsi_msg_workqueue_handler(struct work_struct *data)
1116{
1117	struct pvscsi_adapter *adapter;
1118
1119	adapter = container_of(data, struct pvscsi_adapter, work);
1120
1121	pvscsi_process_msg_ring(adapter);
1122}
1123
1124static int pvscsi_setup_msg_workqueue(struct pvscsi_adapter *adapter)
1125{
1126	char name[32];
1127
1128	if (!pvscsi_use_msg)
1129		return 0;
1130
1131	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_COMMAND,
1132			 PVSCSI_CMD_SETUP_MSG_RING);
1133
1134	if (pvscsi_reg_read(adapter, PVSCSI_REG_OFFSET_COMMAND_STATUS) == -1)
1135		return 0;
1136
1137	snprintf(name, sizeof(name),
1138		 "vmw_pvscsi_wq_%u", adapter->host->host_no);
1139
1140	adapter->workqueue = create_singlethread_workqueue(name);
1141	if (!adapter->workqueue) {
1142		printk(KERN_ERR "vmw_pvscsi: failed to create work queue\n");
1143		return 0;
1144	}
1145	INIT_WORK(&adapter->work, pvscsi_msg_workqueue_handler);
1146
1147	return 1;
1148}
1149
1150static bool pvscsi_setup_req_threshold(struct pvscsi_adapter *adapter,
1151				      bool enable)
1152{
1153	u32 val;
1154
1155	if (!pvscsi_use_req_threshold)
1156		return false;
1157
1158	pvscsi_reg_write(adapter, PVSCSI_REG_OFFSET_COMMAND,
1159			 PVSCSI_CMD_SETUP_REQCALLTHRESHOLD);
1160	val = pvscsi_reg_read(adapter, PVSCSI_REG_OFFSET_COMMAND_STATUS);
1161	if (val == -1) {
1162		printk(KERN_INFO "vmw_pvscsi: device does not support req_threshold\n");
1163		return false;
1164	} else {
1165		struct PVSCSICmdDescSetupReqCall cmd_msg = { 0 };
1166		cmd_msg.enable = enable;
1167		printk(KERN_INFO
1168		       "vmw_pvscsi: %sabling reqCallThreshold\n",
1169			enable ? "en" : "dis");
1170		pvscsi_write_cmd_desc(adapter,
1171				      PVSCSI_CMD_SETUP_REQCALLTHRESHOLD,
1172				      &cmd_msg, sizeof(cmd_msg));
1173		return pvscsi_reg_read(adapter,
1174				       PVSCSI_REG_OFFSET_COMMAND_STATUS) != 0;
1175	}
1176}
1177
1178static irqreturn_t pvscsi_isr(int irq, void *devp)
1179{
1180	struct pvscsi_adapter *adapter = devp;
1181	unsigned long flags;
1182
1183	spin_lock_irqsave(&adapter->hw_lock, flags);
1184	pvscsi_process_completion_ring(adapter);
1185	if (adapter->use_msg && pvscsi_msg_pending(adapter))
1186		queue_work(adapter->workqueue, &adapter->work);
1187	spin_unlock_irqrestore(&adapter->hw_lock, flags);
1188
1189	return IRQ_HANDLED;
1190}
1191
1192static irqreturn_t pvscsi_shared_isr(int irq, void *devp)
1193{
1194	struct pvscsi_adapter *adapter = devp;
1195	u32 val = pvscsi_read_intr_status(adapter);
1196
1197	if (!(val & PVSCSI_INTR_ALL_SUPPORTED))
1198		return IRQ_NONE;
1199	pvscsi_write_intr_status(devp, val);
1200	return pvscsi_isr(irq, devp);
1201}
1202
1203static void pvscsi_free_sgls(const struct pvscsi_adapter *adapter)
1204{
1205	struct pvscsi_ctx *ctx = adapter->cmd_map;
1206	unsigned i;
1207
1208	for (i = 0; i < adapter->req_depth; ++i, ++ctx)
1209		free_pages((unsigned long)ctx->sgl, get_order(SGL_SIZE));
1210}
1211
1212static void pvscsi_shutdown_intr(struct pvscsi_adapter *adapter)
1213{
1214	free_irq(pci_irq_vector(adapter->dev, 0), adapter);
1215	pci_free_irq_vectors(adapter->dev);
1216}
1217
1218static void pvscsi_release_resources(struct pvscsi_adapter *adapter)
1219{
1220	if (adapter->workqueue)
1221		destroy_workqueue(adapter->workqueue);
1222
1223	if (adapter->mmioBase)
1224		pci_iounmap(adapter->dev, adapter->mmioBase);
1225
1226	pci_release_regions(adapter->dev);
1227
1228	if (adapter->cmd_map) {
1229		pvscsi_free_sgls(adapter);
1230		kfree(adapter->cmd_map);
1231	}
1232
1233	if (adapter->rings_state)
1234		dma_free_coherent(&adapter->dev->dev, PAGE_SIZE,
1235				    adapter->rings_state, adapter->ringStatePA);
1236
1237	if (adapter->req_ring)
1238		dma_free_coherent(&adapter->dev->dev,
1239				    adapter->req_pages * PAGE_SIZE,
1240				    adapter->req_ring, adapter->reqRingPA);
1241
1242	if (adapter->cmp_ring)
1243		dma_free_coherent(&adapter->dev->dev,
1244				    adapter->cmp_pages * PAGE_SIZE,
1245				    adapter->cmp_ring, adapter->cmpRingPA);
1246
1247	if (adapter->msg_ring)
1248		dma_free_coherent(&adapter->dev->dev,
1249				    adapter->msg_pages * PAGE_SIZE,
1250				    adapter->msg_ring, adapter->msgRingPA);
1251}
1252
1253/*
1254 * Allocate scatter gather lists.
1255 *
1256 * These are statically allocated.  Trying to be clever was not worth it.
1257 *
1258 * Dynamic allocation can fail, and we can't go deep into the memory
1259 * allocator, since we're a SCSI driver, and trying too hard to allocate
1260 * memory might generate disk I/O.  We also don't want to fail disk I/O
1261 * in that case because we can't get an allocation - the I/O could be
1262 * trying to swap out data to free memory.  Since that is pathological,
1263 * just use a statically allocated scatter list.
1264 *
1265 */
1266static int pvscsi_allocate_sg(struct pvscsi_adapter *adapter)
1267{
1268	struct pvscsi_ctx *ctx;
1269	int i;
1270
1271	ctx = adapter->cmd_map;
1272	BUILD_BUG_ON(sizeof(struct pvscsi_sg_list) > SGL_SIZE);
1273
1274	for (i = 0; i < adapter->req_depth; ++i, ++ctx) {
1275		ctx->sgl = (void *)__get_free_pages(GFP_KERNEL,
1276						    get_order(SGL_SIZE));
1277		ctx->sglPA = 0;
1278		BUG_ON(!IS_ALIGNED(((unsigned long)ctx->sgl), PAGE_SIZE));
1279		if (!ctx->sgl) {
1280			for (; i >= 0; --i, --ctx) {
1281				free_pages((unsigned long)ctx->sgl,
1282					   get_order(SGL_SIZE));
1283				ctx->sgl = NULL;
1284			}
1285			return -ENOMEM;
1286		}
1287	}
1288
1289	return 0;
1290}
1291
1292/*
1293 * Query the device, fetch the config info and return the
1294 * maximum number of targets on the adapter. In case of
1295 * failure due to any reason return default i.e. 16.
1296 */
1297static u32 pvscsi_get_max_targets(struct pvscsi_adapter *adapter)
1298{
1299	struct PVSCSICmdDescConfigCmd cmd;
1300	struct PVSCSIConfigPageHeader *header;
1301	struct device *dev;
1302	dma_addr_t configPagePA;
1303	void *config_page;
1304	u32 numPhys = 16;
1305
1306	dev = pvscsi_dev(adapter);
1307	config_page = dma_alloc_coherent(&adapter->dev->dev, PAGE_SIZE,
1308			&configPagePA, GFP_KERNEL);
1309	if (!config_page) {
1310		dev_warn(dev, "vmw_pvscsi: failed to allocate memory for config page\n");
1311		goto exit;
1312	}
1313	BUG_ON(configPagePA & ~PAGE_MASK);
1314
1315	/* Fetch config info from the device. */
1316	cmd.configPageAddress = ((u64)PVSCSI_CONFIG_CONTROLLER_ADDRESS) << 32;
1317	cmd.configPageNum = PVSCSI_CONFIG_PAGE_CONTROLLER;
1318	cmd.cmpAddr = configPagePA;
1319	cmd._pad = 0;
1320
1321	/*
1322	 * Mark the completion page header with error values. If the device
1323	 * completes the command successfully, it sets the status values to
1324	 * indicate success.
1325	 */
1326	header = config_page;
1327	header->hostStatus = BTSTAT_INVPARAM;
1328	header->scsiStatus = SDSTAT_CHECK;
1329
1330	pvscsi_write_cmd_desc(adapter, PVSCSI_CMD_CONFIG, &cmd, sizeof cmd);
1331
1332	if (header->hostStatus == BTSTAT_SUCCESS &&
1333	    header->scsiStatus == SDSTAT_GOOD) {
1334		struct PVSCSIConfigPageController *config;
1335
1336		config = config_page;
1337		numPhys = config->numPhys;
1338	} else
1339		dev_warn(dev, "vmw_pvscsi: PVSCSI_CMD_CONFIG failed. hostStatus = 0x%x, scsiStatus = 0x%x\n",
1340			 header->hostStatus, header->scsiStatus);
1341	dma_free_coherent(&adapter->dev->dev, PAGE_SIZE, config_page,
1342			  configPagePA);
1343exit:
1344	return numPhys;
1345}
1346
1347static int pvscsi_probe(struct pci_dev *pdev, const struct pci_device_id *id)
1348{
1349	unsigned int irq_flag = PCI_IRQ_MSIX | PCI_IRQ_MSI | PCI_IRQ_LEGACY;
1350	struct pvscsi_adapter *adapter;
1351	struct pvscsi_adapter adapter_temp;
1352	struct Scsi_Host *host = NULL;
1353	unsigned int i;
1354	int error;
1355	u32 max_id;
1356
1357	error = -ENODEV;
1358
1359	if (pci_enable_device(pdev))
1360		return error;
1361
1362	if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64))) {
1363		printk(KERN_INFO "vmw_pvscsi: using 64bit dma\n");
1364	} else if (!dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32))) {
1365		printk(KERN_INFO "vmw_pvscsi: using 32bit dma\n");
1366	} else {
1367		printk(KERN_ERR "vmw_pvscsi: failed to set DMA mask\n");
1368		goto out_disable_device;
1369	}
1370
1371	/*
1372	 * Let's use a temp pvscsi_adapter struct until we find the number of
1373	 * targets on the adapter, after that we will switch to the real
1374	 * allocated struct.
1375	 */
1376	adapter = &adapter_temp;
1377	memset(adapter, 0, sizeof(*adapter));
1378	adapter->dev  = pdev;
1379	adapter->rev = pdev->revision;
1380
1381	if (pci_request_regions(pdev, "vmw_pvscsi")) {
1382		printk(KERN_ERR "vmw_pvscsi: pci memory selection failed\n");
1383		goto out_disable_device;
1384	}
1385
1386	for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
1387		if ((pci_resource_flags(pdev, i) & PCI_BASE_ADDRESS_SPACE_IO))
1388			continue;
1389
1390		if (pci_resource_len(pdev, i) < PVSCSI_MEM_SPACE_SIZE)
1391			continue;
1392
1393		break;
1394	}
1395
1396	if (i == DEVICE_COUNT_RESOURCE) {
1397		printk(KERN_ERR
1398		       "vmw_pvscsi: adapter has no suitable MMIO region\n");
1399		goto out_release_resources_and_disable;
1400	}
1401
1402	adapter->mmioBase = pci_iomap(pdev, i, PVSCSI_MEM_SPACE_SIZE);
1403
1404	if (!adapter->mmioBase) {
1405		printk(KERN_ERR
1406		       "vmw_pvscsi: can't iomap for BAR %d memsize %lu\n",
1407		       i, PVSCSI_MEM_SPACE_SIZE);
1408		goto out_release_resources_and_disable;
1409	}
1410
1411	pci_set_master(pdev);
1412
1413	/*
1414	 * Ask the device for max number of targets before deciding the
1415	 * default pvscsi_ring_pages value.
1416	 */
1417	max_id = pvscsi_get_max_targets(adapter);
1418	printk(KERN_INFO "vmw_pvscsi: max_id: %u\n", max_id);
1419
1420	if (pvscsi_ring_pages == 0)
1421		/*
1422		 * Set the right default value. Up to 16 it is 8, above it is
1423		 * max.
1424		 */
1425		pvscsi_ring_pages = (max_id > 16) ?
1426			PVSCSI_SETUP_RINGS_MAX_NUM_PAGES :
1427			PVSCSI_DEFAULT_NUM_PAGES_PER_RING;
1428	printk(KERN_INFO
1429	       "vmw_pvscsi: setting ring_pages to %d\n",
1430	       pvscsi_ring_pages);
1431
1432	pvscsi_template.can_queue =
1433		min(PVSCSI_MAX_NUM_PAGES_REQ_RING, pvscsi_ring_pages) *
1434		PVSCSI_MAX_NUM_REQ_ENTRIES_PER_PAGE;
1435	pvscsi_template.cmd_per_lun =
1436		min(pvscsi_template.can_queue, pvscsi_cmd_per_lun);
1437	host = scsi_host_alloc(&pvscsi_template, sizeof(struct pvscsi_adapter));
1438	if (!host) {
1439		printk(KERN_ERR "vmw_pvscsi: failed to allocate host\n");
1440		goto out_release_resources_and_disable;
1441	}
1442
1443	/*
1444	 * Let's use the real pvscsi_adapter struct here onwards.
1445	 */
1446	adapter = shost_priv(host);
1447	memset(adapter, 0, sizeof(*adapter));
1448	adapter->dev  = pdev;
1449	adapter->host = host;
1450	/*
1451	 * Copy back what we already have to the allocated adapter struct.
1452	 */
1453	adapter->rev = adapter_temp.rev;
1454	adapter->mmioBase = adapter_temp.mmioBase;
1455
1456	spin_lock_init(&adapter->hw_lock);
1457	host->max_channel = 0;
1458	host->max_lun     = 1;
1459	host->max_cmd_len = 16;
1460	host->max_id      = max_id;
1461
1462	pci_set_drvdata(pdev, host);
1463
1464	ll_adapter_reset(adapter);
1465
1466	adapter->use_msg = pvscsi_setup_msg_workqueue(adapter);
1467
1468	error = pvscsi_allocate_rings(adapter);
1469	if (error) {
1470		printk(KERN_ERR "vmw_pvscsi: unable to allocate ring memory\n");
1471		goto out_release_resources;
1472	}
1473
1474	/*
1475	 * From this point on we should reset the adapter if anything goes
1476	 * wrong.
1477	 */
1478	pvscsi_setup_all_rings(adapter);
1479
1480	adapter->cmd_map = kcalloc(adapter->req_depth,
1481				   sizeof(struct pvscsi_ctx), GFP_KERNEL);
1482	if (!adapter->cmd_map) {
1483		printk(KERN_ERR "vmw_pvscsi: failed to allocate memory.\n");
1484		error = -ENOMEM;
1485		goto out_reset_adapter;
1486	}
1487
1488	INIT_LIST_HEAD(&adapter->cmd_pool);
1489	for (i = 0; i < adapter->req_depth; i++) {
1490		struct pvscsi_ctx *ctx = adapter->cmd_map + i;
1491		list_add(&ctx->list, &adapter->cmd_pool);
1492	}
1493
1494	error = pvscsi_allocate_sg(adapter);
1495	if (error) {
1496		printk(KERN_ERR "vmw_pvscsi: unable to allocate s/g table\n");
1497		goto out_reset_adapter;
1498	}
1499
1500	if (pvscsi_disable_msix)
1501		irq_flag &= ~PCI_IRQ_MSIX;
1502	if (pvscsi_disable_msi)
1503		irq_flag &= ~PCI_IRQ_MSI;
1504
1505	error = pci_alloc_irq_vectors(adapter->dev, 1, 1, irq_flag);
1506	if (error < 0)
1507		goto out_reset_adapter;
1508
1509	adapter->use_req_threshold = pvscsi_setup_req_threshold(adapter, true);
1510	printk(KERN_DEBUG "vmw_pvscsi: driver-based request coalescing %sabled\n",
1511	       adapter->use_req_threshold ? "en" : "dis");
1512
1513	if (adapter->dev->msix_enabled || adapter->dev->msi_enabled) {
1514		printk(KERN_INFO "vmw_pvscsi: using MSI%s\n",
1515			adapter->dev->msix_enabled ? "-X" : "");
1516		error = request_irq(pci_irq_vector(pdev, 0), pvscsi_isr,
1517				0, "vmw_pvscsi", adapter);
1518	} else {
1519		printk(KERN_INFO "vmw_pvscsi: using INTx\n");
1520		error = request_irq(pci_irq_vector(pdev, 0), pvscsi_shared_isr,
1521				IRQF_SHARED, "vmw_pvscsi", adapter);
1522	}
1523
1524	if (error) {
1525		printk(KERN_ERR
1526		       "vmw_pvscsi: unable to request IRQ: %d\n", error);
1527		goto out_reset_adapter;
1528	}
1529
1530	error = scsi_add_host(host, &pdev->dev);
1531	if (error) {
1532		printk(KERN_ERR
1533		       "vmw_pvscsi: scsi_add_host failed: %d\n", error);
1534		goto out_reset_adapter;
1535	}
1536
1537	dev_info(&pdev->dev, "VMware PVSCSI rev %d host #%u\n",
1538		 adapter->rev, host->host_no);
1539
1540	pvscsi_unmask_intr(adapter);
1541
1542	scsi_scan_host(host);
1543
1544	return 0;
1545
1546out_reset_adapter:
1547	ll_adapter_reset(adapter);
1548out_release_resources:
1549	pvscsi_shutdown_intr(adapter);
1550	pvscsi_release_resources(adapter);
1551	scsi_host_put(host);
1552out_disable_device:
1553	pci_disable_device(pdev);
1554
1555	return error;
1556
1557out_release_resources_and_disable:
1558	pvscsi_shutdown_intr(adapter);
1559	pvscsi_release_resources(adapter);
1560	goto out_disable_device;
1561}
1562
1563static void __pvscsi_shutdown(struct pvscsi_adapter *adapter)
1564{
1565	pvscsi_mask_intr(adapter);
1566
1567	if (adapter->workqueue)
1568		flush_workqueue(adapter->workqueue);
1569
1570	pvscsi_shutdown_intr(adapter);
1571
1572	pvscsi_process_request_ring(adapter);
1573	pvscsi_process_completion_ring(adapter);
1574	ll_adapter_reset(adapter);
1575}
1576
1577static void pvscsi_shutdown(struct pci_dev *dev)
1578{
1579	struct Scsi_Host *host = pci_get_drvdata(dev);
1580	struct pvscsi_adapter *adapter = shost_priv(host);
1581
1582	__pvscsi_shutdown(adapter);
1583}
1584
1585static void pvscsi_remove(struct pci_dev *pdev)
1586{
1587	struct Scsi_Host *host = pci_get_drvdata(pdev);
1588	struct pvscsi_adapter *adapter = shost_priv(host);
1589
1590	scsi_remove_host(host);
1591
1592	__pvscsi_shutdown(adapter);
1593	pvscsi_release_resources(adapter);
1594
1595	scsi_host_put(host);
1596
1597	pci_disable_device(pdev);
1598}
1599
1600static struct pci_driver pvscsi_pci_driver = {
1601	.name		= "vmw_pvscsi",
1602	.id_table	= pvscsi_pci_tbl,
1603	.probe		= pvscsi_probe,
1604	.remove		= pvscsi_remove,
1605	.shutdown       = pvscsi_shutdown,
1606};
1607
1608static int __init pvscsi_init(void)
1609{
1610	pr_info("%s - version %s\n",
1611		PVSCSI_LINUX_DRIVER_DESC, PVSCSI_DRIVER_VERSION_STRING);
1612	return pci_register_driver(&pvscsi_pci_driver);
1613}
1614
1615static void __exit pvscsi_exit(void)
1616{
1617	pci_unregister_driver(&pvscsi_pci_driver);
1618}
1619
1620module_init(pvscsi_init);
1621module_exit(pvscsi_exit);
1622