block: implement and enforce request peek/start/fetch
[safe/jmp/linux-2.6] / drivers / block / cciss.c
1 /*
2  *    Disk Array driver for HP Smart Array controllers.
3  *    (C) Copyright 2000, 2007 Hewlett-Packard Development Company, L.P.
4  *
5  *    This program is free software; you can redistribute it and/or modify
6  *    it under the terms of the GNU General Public License as published by
7  *    the Free Software Foundation; version 2 of the License.
8  *
9  *    This program is distributed in the hope that it will be useful,
10  *    but WITHOUT ANY WARRANTY; without even the implied warranty of
11  *    MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12  *    General Public License for more details.
13  *
14  *    You should have received a copy of the GNU General Public License
15  *    along with this program; if not, write to the Free Software
16  *    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
17  *    02111-1307, USA.
18  *
19  *    Questions/Comments/Bugfixes to iss_storagedev@hp.com
20  *
21  */
22
23 #include <linux/module.h>
24 #include <linux/interrupt.h>
25 #include <linux/types.h>
26 #include <linux/pci.h>
27 #include <linux/kernel.h>
28 #include <linux/slab.h>
29 #include <linux/delay.h>
30 #include <linux/major.h>
31 #include <linux/fs.h>
32 #include <linux/bio.h>
33 #include <linux/blkpg.h>
34 #include <linux/timer.h>
35 #include <linux/proc_fs.h>
36 #include <linux/seq_file.h>
37 #include <linux/init.h>
38 #include <linux/hdreg.h>
39 #include <linux/spinlock.h>
40 #include <linux/compat.h>
41 #include <linux/blktrace_api.h>
42 #include <asm/uaccess.h>
43 #include <asm/io.h>
44
45 #include <linux/dma-mapping.h>
46 #include <linux/blkdev.h>
47 #include <linux/genhd.h>
48 #include <linux/completion.h>
49 #include <scsi/scsi.h>
50 #include <scsi/sg.h>
51 #include <scsi/scsi_ioctl.h>
52 #include <linux/cdrom.h>
53 #include <linux/scatterlist.h>
54 #include <linux/kthread.h>
55
56 #define CCISS_DRIVER_VERSION(maj,min,submin) ((maj<<16)|(min<<8)|(submin))
57 #define DRIVER_NAME "HP CISS Driver (v 3.6.20)"
58 #define DRIVER_VERSION CCISS_DRIVER_VERSION(3, 6, 20)
59
60 /* Embedded module documentation macros - see modules.h */
61 MODULE_AUTHOR("Hewlett-Packard Company");
62 MODULE_DESCRIPTION("Driver for HP Smart Array Controllers");
63 MODULE_SUPPORTED_DEVICE("HP SA5i SA5i+ SA532 SA5300 SA5312 SA641 SA642 SA6400"
64                         " SA6i P600 P800 P400 P400i E200 E200i E500 P700m"
65                         " Smart Array G2 Series SAS/SATA Controllers");
66 MODULE_VERSION("3.6.20");
67 MODULE_LICENSE("GPL");
68
69 #include "cciss_cmd.h"
70 #include "cciss.h"
71 #include <linux/cciss_ioctl.h>
72
73 /* define the PCI info for the cards we can control */
74 static const struct pci_device_id cciss_pci_device_id[] = {
75         {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISS,  0x0E11, 0x4070},
76         {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSB, 0x0E11, 0x4080},
77         {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSB, 0x0E11, 0x4082},
78         {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSB, 0x0E11, 0x4083},
79         {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x4091},
80         {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x409A},
81         {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x409B},
82         {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x409C},
83         {PCI_VENDOR_ID_COMPAQ, PCI_DEVICE_ID_COMPAQ_CISSC, 0x0E11, 0x409D},
84         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSA,     0x103C, 0x3225},
85         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x3223},
86         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x3234},
87         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x3235},
88         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3211},
89         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3212},
90         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3213},
91         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3214},
92         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSD,     0x103C, 0x3215},
93         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x3237},
94         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSC,     0x103C, 0x323D},
95         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3241},
96         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3243},
97         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3245},
98         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3247},
99         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x3249},
100         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x324A},
101         {PCI_VENDOR_ID_HP,     PCI_DEVICE_ID_HP_CISSE,     0x103C, 0x324B},
102         {PCI_VENDOR_ID_HP,     PCI_ANY_ID,      PCI_ANY_ID, PCI_ANY_ID,
103                 PCI_CLASS_STORAGE_RAID << 8, 0xffff << 8, 0},
104         {0,}
105 };
106
107 MODULE_DEVICE_TABLE(pci, cciss_pci_device_id);
108
109 /*  board_id = Subsystem Device ID & Vendor ID
110  *  product = Marketing Name for the board
111  *  access = Address of the struct of function pointers
112  */
113 static struct board_type products[] = {
114         {0x40700E11, "Smart Array 5300", &SA5_access},
115         {0x40800E11, "Smart Array 5i", &SA5B_access},
116         {0x40820E11, "Smart Array 532", &SA5B_access},
117         {0x40830E11, "Smart Array 5312", &SA5B_access},
118         {0x409A0E11, "Smart Array 641", &SA5_access},
119         {0x409B0E11, "Smart Array 642", &SA5_access},
120         {0x409C0E11, "Smart Array 6400", &SA5_access},
121         {0x409D0E11, "Smart Array 6400 EM", &SA5_access},
122         {0x40910E11, "Smart Array 6i", &SA5_access},
123         {0x3225103C, "Smart Array P600", &SA5_access},
124         {0x3223103C, "Smart Array P800", &SA5_access},
125         {0x3234103C, "Smart Array P400", &SA5_access},
126         {0x3235103C, "Smart Array P400i", &SA5_access},
127         {0x3211103C, "Smart Array E200i", &SA5_access},
128         {0x3212103C, "Smart Array E200", &SA5_access},
129         {0x3213103C, "Smart Array E200i", &SA5_access},
130         {0x3214103C, "Smart Array E200i", &SA5_access},
131         {0x3215103C, "Smart Array E200i", &SA5_access},
132         {0x3237103C, "Smart Array E500", &SA5_access},
133         {0x323D103C, "Smart Array P700m", &SA5_access},
134         {0x3241103C, "Smart Array P212", &SA5_access},
135         {0x3243103C, "Smart Array P410", &SA5_access},
136         {0x3245103C, "Smart Array P410i", &SA5_access},
137         {0x3247103C, "Smart Array P411", &SA5_access},
138         {0x3249103C, "Smart Array P812", &SA5_access},
139         {0x324A103C, "Smart Array P712m", &SA5_access},
140         {0x324B103C, "Smart Array P711m", &SA5_access},
141         {0xFFFF103C, "Unknown Smart Array", &SA5_access},
142 };
143
144 /* How long to wait (in milliseconds) for board to go into simple mode */
145 #define MAX_CONFIG_WAIT 30000
146 #define MAX_IOCTL_CONFIG_WAIT 1000
147
148 /*define how many times we will try a command because of bus resets */
149 #define MAX_CMD_RETRIES 3
150
151 #define MAX_CTLR        32
152
153 /* Originally cciss driver only supports 8 major numbers */
154 #define MAX_CTLR_ORIG   8
155
156 static ctlr_info_t *hba[MAX_CTLR];
157
158 static void do_cciss_request(struct request_queue *q);
159 static irqreturn_t do_cciss_intr(int irq, void *dev_id);
160 static int cciss_open(struct block_device *bdev, fmode_t mode);
161 static int cciss_release(struct gendisk *disk, fmode_t mode);
162 static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
163                        unsigned int cmd, unsigned long arg);
164 static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo);
165
166 static int cciss_revalidate(struct gendisk *disk);
167 static int rebuild_lun_table(ctlr_info_t *h, int first_time);
168 static int deregister_disk(ctlr_info_t *h, int drv_index,
169                            int clear_all);
170
171 static void cciss_read_capacity(int ctlr, int logvol, int withirq,
172                         sector_t *total_size, unsigned int *block_size);
173 static void cciss_read_capacity_16(int ctlr, int logvol, int withirq,
174                         sector_t *total_size, unsigned int *block_size);
175 static void cciss_geometry_inquiry(int ctlr, int logvol,
176                         int withirq, sector_t total_size,
177                         unsigned int block_size, InquiryData_struct *inq_buff,
178                                    drive_info_struct *drv);
179 static void __devinit cciss_interrupt_mode(ctlr_info_t *, struct pci_dev *,
180                                            __u32);
181 static void start_io(ctlr_info_t *h);
182 static int sendcmd(__u8 cmd, int ctlr, void *buff, size_t size,
183                    unsigned int use_unit_num, unsigned int log_unit,
184                    __u8 page_code, unsigned char *scsi3addr, int cmd_type);
185 static int sendcmd_withirq(__u8 cmd, int ctlr, void *buff, size_t size,
186                            unsigned int use_unit_num, unsigned int log_unit,
187                            __u8 page_code, int cmd_type);
188
189 static void fail_all_cmds(unsigned long ctlr);
190 static int scan_thread(void *data);
191 static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c);
192
193 #ifdef CONFIG_PROC_FS
194 static void cciss_procinit(int i);
195 #else
196 static void cciss_procinit(int i)
197 {
198 }
199 #endif                          /* CONFIG_PROC_FS */
200
201 #ifdef CONFIG_COMPAT
202 static int cciss_compat_ioctl(struct block_device *, fmode_t,
203                               unsigned, unsigned long);
204 #endif
205
206 static struct block_device_operations cciss_fops = {
207         .owner = THIS_MODULE,
208         .open = cciss_open,
209         .release = cciss_release,
210         .locked_ioctl = cciss_ioctl,
211         .getgeo = cciss_getgeo,
212 #ifdef CONFIG_COMPAT
213         .compat_ioctl = cciss_compat_ioctl,
214 #endif
215         .revalidate_disk = cciss_revalidate,
216 };
217
218 /*
219  * Enqueuing and dequeuing functions for cmdlists.
220  */
221 static inline void addQ(struct hlist_head *list, CommandList_struct *c)
222 {
223         hlist_add_head(&c->list, list);
224 }
225
226 static inline void removeQ(CommandList_struct *c)
227 {
228         if (WARN_ON(hlist_unhashed(&c->list)))
229                 return;
230
231         hlist_del_init(&c->list);
232 }
233
234 #include "cciss_scsi.c"         /* For SCSI tape support */
235
236 #define RAID_UNKNOWN 6
237
238 #ifdef CONFIG_PROC_FS
239
240 /*
241  * Report information about this controller.
242  */
243 #define ENG_GIG 1000000000
244 #define ENG_GIG_FACTOR (ENG_GIG/512)
245 #define ENGAGE_SCSI     "engage scsi"
246 static const char *raid_label[] = { "0", "4", "1(1+0)", "5", "5+1", "ADG",
247         "UNKNOWN"
248 };
249
250 static struct proc_dir_entry *proc_cciss;
251
252 static void cciss_seq_show_header(struct seq_file *seq)
253 {
254         ctlr_info_t *h = seq->private;
255
256         seq_printf(seq, "%s: HP %s Controller\n"
257                 "Board ID: 0x%08lx\n"
258                 "Firmware Version: %c%c%c%c\n"
259                 "IRQ: %d\n"
260                 "Logical drives: %d\n"
261                 "Current Q depth: %d\n"
262                 "Current # commands on controller: %d\n"
263                 "Max Q depth since init: %d\n"
264                 "Max # commands on controller since init: %d\n"
265                 "Max SG entries since init: %d\n",
266                 h->devname,
267                 h->product_name,
268                 (unsigned long)h->board_id,
269                 h->firm_ver[0], h->firm_ver[1], h->firm_ver[2],
270                 h->firm_ver[3], (unsigned int)h->intr[SIMPLE_MODE_INT],
271                 h->num_luns,
272                 h->Qdepth, h->commands_outstanding,
273                 h->maxQsinceinit, h->max_outstanding, h->maxSG);
274
275 #ifdef CONFIG_CISS_SCSI_TAPE
276         cciss_seq_tape_report(seq, h->ctlr);
277 #endif /* CONFIG_CISS_SCSI_TAPE */
278 }
279
280 static void *cciss_seq_start(struct seq_file *seq, loff_t *pos)
281 {
282         ctlr_info_t *h = seq->private;
283         unsigned ctlr = h->ctlr;
284         unsigned long flags;
285
286         /* prevent displaying bogus info during configuration
287          * or deconfiguration of a logical volume
288          */
289         spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
290         if (h->busy_configuring) {
291                 spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
292                 return ERR_PTR(-EBUSY);
293         }
294         h->busy_configuring = 1;
295         spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
296
297         if (*pos == 0)
298                 cciss_seq_show_header(seq);
299
300         return pos;
301 }
302
303 static int cciss_seq_show(struct seq_file *seq, void *v)
304 {
305         sector_t vol_sz, vol_sz_frac;
306         ctlr_info_t *h = seq->private;
307         unsigned ctlr = h->ctlr;
308         loff_t *pos = v;
309         drive_info_struct *drv = &h->drv[*pos];
310
311         if (*pos > h->highest_lun)
312                 return 0;
313
314         if (drv->heads == 0)
315                 return 0;
316
317         vol_sz = drv->nr_blocks;
318         vol_sz_frac = sector_div(vol_sz, ENG_GIG_FACTOR);
319         vol_sz_frac *= 100;
320         sector_div(vol_sz_frac, ENG_GIG_FACTOR);
321
322         if (drv->raid_level > 5)
323                 drv->raid_level = RAID_UNKNOWN;
324         seq_printf(seq, "cciss/c%dd%d:"
325                         "\t%4u.%02uGB\tRAID %s\n",
326                         ctlr, (int) *pos, (int)vol_sz, (int)vol_sz_frac,
327                         raid_label[drv->raid_level]);
328         return 0;
329 }
330
331 static void *cciss_seq_next(struct seq_file *seq, void *v, loff_t *pos)
332 {
333         ctlr_info_t *h = seq->private;
334
335         if (*pos > h->highest_lun)
336                 return NULL;
337         *pos += 1;
338
339         return pos;
340 }
341
342 static void cciss_seq_stop(struct seq_file *seq, void *v)
343 {
344         ctlr_info_t *h = seq->private;
345
346         /* Only reset h->busy_configuring if we succeeded in setting
347          * it during cciss_seq_start. */
348         if (v == ERR_PTR(-EBUSY))
349                 return;
350
351         h->busy_configuring = 0;
352 }
353
354 static struct seq_operations cciss_seq_ops = {
355         .start = cciss_seq_start,
356         .show  = cciss_seq_show,
357         .next  = cciss_seq_next,
358         .stop  = cciss_seq_stop,
359 };
360
361 static int cciss_seq_open(struct inode *inode, struct file *file)
362 {
363         int ret = seq_open(file, &cciss_seq_ops);
364         struct seq_file *seq = file->private_data;
365
366         if (!ret)
367                 seq->private = PDE(inode)->data;
368
369         return ret;
370 }
371
372 static ssize_t
373 cciss_proc_write(struct file *file, const char __user *buf,
374                  size_t length, loff_t *ppos)
375 {
376         int err;
377         char *buffer;
378
379 #ifndef CONFIG_CISS_SCSI_TAPE
380         return -EINVAL;
381 #endif
382
383         if (!buf || length > PAGE_SIZE - 1)
384                 return -EINVAL;
385
386         buffer = (char *)__get_free_page(GFP_KERNEL);
387         if (!buffer)
388                 return -ENOMEM;
389
390         err = -EFAULT;
391         if (copy_from_user(buffer, buf, length))
392                 goto out;
393         buffer[length] = '\0';
394
395 #ifdef CONFIG_CISS_SCSI_TAPE
396         if (strncmp(ENGAGE_SCSI, buffer, sizeof ENGAGE_SCSI - 1) == 0) {
397                 struct seq_file *seq = file->private_data;
398                 ctlr_info_t *h = seq->private;
399                 int rc;
400
401                 rc = cciss_engage_scsi(h->ctlr);
402                 if (rc != 0)
403                         err = -rc;
404                 else
405                         err = length;
406         } else
407 #endif /* CONFIG_CISS_SCSI_TAPE */
408                 err = -EINVAL;
409         /* might be nice to have "disengage" too, but it's not
410            safely possible. (only 1 module use count, lock issues.) */
411
412 out:
413         free_page((unsigned long)buffer);
414         return err;
415 }
416
417 static struct file_operations cciss_proc_fops = {
418         .owner   = THIS_MODULE,
419         .open    = cciss_seq_open,
420         .read    = seq_read,
421         .llseek  = seq_lseek,
422         .release = seq_release,
423         .write   = cciss_proc_write,
424 };
425
426 static void __devinit cciss_procinit(int i)
427 {
428         struct proc_dir_entry *pde;
429
430         if (proc_cciss == NULL)
431                 proc_cciss = proc_mkdir("driver/cciss", NULL);
432         if (!proc_cciss)
433                 return;
434         pde = proc_create_data(hba[i]->devname, S_IWUSR | S_IRUSR | S_IRGRP |
435                                         S_IROTH, proc_cciss,
436                                         &cciss_proc_fops, hba[i]);
437 }
438 #endif                          /* CONFIG_PROC_FS */
439
440 /*
441  * For operations that cannot sleep, a command block is allocated at init,
442  * and managed by cmd_alloc() and cmd_free() using a simple bitmap to track
443  * which ones are free or in use.  For operations that can wait for kmalloc
444  * to possible sleep, this routine can be called with get_from_pool set to 0.
445  * cmd_free() MUST be called with a got_from_pool set to 0 if cmd_alloc was.
446  */
447 static CommandList_struct *cmd_alloc(ctlr_info_t *h, int get_from_pool)
448 {
449         CommandList_struct *c;
450         int i;
451         u64bit temp64;
452         dma_addr_t cmd_dma_handle, err_dma_handle;
453
454         if (!get_from_pool) {
455                 c = (CommandList_struct *) pci_alloc_consistent(h->pdev,
456                         sizeof(CommandList_struct), &cmd_dma_handle);
457                 if (c == NULL)
458                         return NULL;
459                 memset(c, 0, sizeof(CommandList_struct));
460
461                 c->cmdindex = -1;
462
463                 c->err_info = (ErrorInfo_struct *)
464                     pci_alloc_consistent(h->pdev, sizeof(ErrorInfo_struct),
465                             &err_dma_handle);
466
467                 if (c->err_info == NULL) {
468                         pci_free_consistent(h->pdev,
469                                 sizeof(CommandList_struct), c, cmd_dma_handle);
470                         return NULL;
471                 }
472                 memset(c->err_info, 0, sizeof(ErrorInfo_struct));
473         } else {                /* get it out of the controllers pool */
474
475                 do {
476                         i = find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds);
477                         if (i == h->nr_cmds)
478                                 return NULL;
479                 } while (test_and_set_bit
480                          (i & (BITS_PER_LONG - 1),
481                           h->cmd_pool_bits + (i / BITS_PER_LONG)) != 0);
482 #ifdef CCISS_DEBUG
483                 printk(KERN_DEBUG "cciss: using command buffer %d\n", i);
484 #endif
485                 c = h->cmd_pool + i;
486                 memset(c, 0, sizeof(CommandList_struct));
487                 cmd_dma_handle = h->cmd_pool_dhandle
488                     + i * sizeof(CommandList_struct);
489                 c->err_info = h->errinfo_pool + i;
490                 memset(c->err_info, 0, sizeof(ErrorInfo_struct));
491                 err_dma_handle = h->errinfo_pool_dhandle
492                     + i * sizeof(ErrorInfo_struct);
493                 h->nr_allocs++;
494
495                 c->cmdindex = i;
496         }
497
498         INIT_HLIST_NODE(&c->list);
499         c->busaddr = (__u32) cmd_dma_handle;
500         temp64.val = (__u64) err_dma_handle;
501         c->ErrDesc.Addr.lower = temp64.val32.lower;
502         c->ErrDesc.Addr.upper = temp64.val32.upper;
503         c->ErrDesc.Len = sizeof(ErrorInfo_struct);
504
505         c->ctlr = h->ctlr;
506         return c;
507 }
508
509 /*
510  * Frees a command block that was previously allocated with cmd_alloc().
511  */
512 static void cmd_free(ctlr_info_t *h, CommandList_struct *c, int got_from_pool)
513 {
514         int i;
515         u64bit temp64;
516
517         if (!got_from_pool) {
518                 temp64.val32.lower = c->ErrDesc.Addr.lower;
519                 temp64.val32.upper = c->ErrDesc.Addr.upper;
520                 pci_free_consistent(h->pdev, sizeof(ErrorInfo_struct),
521                                     c->err_info, (dma_addr_t) temp64.val);
522                 pci_free_consistent(h->pdev, sizeof(CommandList_struct),
523                                     c, (dma_addr_t) c->busaddr);
524         } else {
525                 i = c - h->cmd_pool;
526                 clear_bit(i & (BITS_PER_LONG - 1),
527                           h->cmd_pool_bits + (i / BITS_PER_LONG));
528                 h->nr_frees++;
529         }
530 }
531
532 static inline ctlr_info_t *get_host(struct gendisk *disk)
533 {
534         return disk->queue->queuedata;
535 }
536
537 static inline drive_info_struct *get_drv(struct gendisk *disk)
538 {
539         return disk->private_data;
540 }
541
542 /*
543  * Open.  Make sure the device is really there.
544  */
545 static int cciss_open(struct block_device *bdev, fmode_t mode)
546 {
547         ctlr_info_t *host = get_host(bdev->bd_disk);
548         drive_info_struct *drv = get_drv(bdev->bd_disk);
549
550 #ifdef CCISS_DEBUG
551         printk(KERN_DEBUG "cciss_open %s\n", bdev->bd_disk->disk_name);
552 #endif                          /* CCISS_DEBUG */
553
554         if (host->busy_initializing || drv->busy_configuring)
555                 return -EBUSY;
556         /*
557          * Root is allowed to open raw volume zero even if it's not configured
558          * so array config can still work. Root is also allowed to open any
559          * volume that has a LUN ID, so it can issue IOCTL to reread the
560          * disk information.  I don't think I really like this
561          * but I'm already using way to many device nodes to claim another one
562          * for "raw controller".
563          */
564         if (drv->heads == 0) {
565                 if (MINOR(bdev->bd_dev) != 0) { /* not node 0? */
566                         /* if not node 0 make sure it is a partition = 0 */
567                         if (MINOR(bdev->bd_dev) & 0x0f) {
568                                 return -ENXIO;
569                                 /* if it is, make sure we have a LUN ID */
570                         } else if (drv->LunID == 0) {
571                                 return -ENXIO;
572                         }
573                 }
574                 if (!capable(CAP_SYS_ADMIN))
575                         return -EPERM;
576         }
577         drv->usage_count++;
578         host->usage_count++;
579         return 0;
580 }
581
582 /*
583  * Close.  Sync first.
584  */
585 static int cciss_release(struct gendisk *disk, fmode_t mode)
586 {
587         ctlr_info_t *host = get_host(disk);
588         drive_info_struct *drv = get_drv(disk);
589
590 #ifdef CCISS_DEBUG
591         printk(KERN_DEBUG "cciss_release %s\n", disk->disk_name);
592 #endif                          /* CCISS_DEBUG */
593
594         drv->usage_count--;
595         host->usage_count--;
596         return 0;
597 }
598
599 #ifdef CONFIG_COMPAT
600
601 static int do_ioctl(struct block_device *bdev, fmode_t mode,
602                     unsigned cmd, unsigned long arg)
603 {
604         int ret;
605         lock_kernel();
606         ret = cciss_ioctl(bdev, mode, cmd, arg);
607         unlock_kernel();
608         return ret;
609 }
610
611 static int cciss_ioctl32_passthru(struct block_device *bdev, fmode_t mode,
612                                   unsigned cmd, unsigned long arg);
613 static int cciss_ioctl32_big_passthru(struct block_device *bdev, fmode_t mode,
614                                       unsigned cmd, unsigned long arg);
615
616 static int cciss_compat_ioctl(struct block_device *bdev, fmode_t mode,
617                               unsigned cmd, unsigned long arg)
618 {
619         switch (cmd) {
620         case CCISS_GETPCIINFO:
621         case CCISS_GETINTINFO:
622         case CCISS_SETINTINFO:
623         case CCISS_GETNODENAME:
624         case CCISS_SETNODENAME:
625         case CCISS_GETHEARTBEAT:
626         case CCISS_GETBUSTYPES:
627         case CCISS_GETFIRMVER:
628         case CCISS_GETDRIVVER:
629         case CCISS_REVALIDVOLS:
630         case CCISS_DEREGDISK:
631         case CCISS_REGNEWDISK:
632         case CCISS_REGNEWD:
633         case CCISS_RESCANDISK:
634         case CCISS_GETLUNINFO:
635                 return do_ioctl(bdev, mode, cmd, arg);
636
637         case CCISS_PASSTHRU32:
638                 return cciss_ioctl32_passthru(bdev, mode, cmd, arg);
639         case CCISS_BIG_PASSTHRU32:
640                 return cciss_ioctl32_big_passthru(bdev, mode, cmd, arg);
641
642         default:
643                 return -ENOIOCTLCMD;
644         }
645 }
646
647 static int cciss_ioctl32_passthru(struct block_device *bdev, fmode_t mode,
648                                   unsigned cmd, unsigned long arg)
649 {
650         IOCTL32_Command_struct __user *arg32 =
651             (IOCTL32_Command_struct __user *) arg;
652         IOCTL_Command_struct arg64;
653         IOCTL_Command_struct __user *p = compat_alloc_user_space(sizeof(arg64));
654         int err;
655         u32 cp;
656
657         err = 0;
658         err |=
659             copy_from_user(&arg64.LUN_info, &arg32->LUN_info,
660                            sizeof(arg64.LUN_info));
661         err |=
662             copy_from_user(&arg64.Request, &arg32->Request,
663                            sizeof(arg64.Request));
664         err |=
665             copy_from_user(&arg64.error_info, &arg32->error_info,
666                            sizeof(arg64.error_info));
667         err |= get_user(arg64.buf_size, &arg32->buf_size);
668         err |= get_user(cp, &arg32->buf);
669         arg64.buf = compat_ptr(cp);
670         err |= copy_to_user(p, &arg64, sizeof(arg64));
671
672         if (err)
673                 return -EFAULT;
674
675         err = do_ioctl(bdev, mode, CCISS_PASSTHRU, (unsigned long)p);
676         if (err)
677                 return err;
678         err |=
679             copy_in_user(&arg32->error_info, &p->error_info,
680                          sizeof(arg32->error_info));
681         if (err)
682                 return -EFAULT;
683         return err;
684 }
685
686 static int cciss_ioctl32_big_passthru(struct block_device *bdev, fmode_t mode,
687                                       unsigned cmd, unsigned long arg)
688 {
689         BIG_IOCTL32_Command_struct __user *arg32 =
690             (BIG_IOCTL32_Command_struct __user *) arg;
691         BIG_IOCTL_Command_struct arg64;
692         BIG_IOCTL_Command_struct __user *p =
693             compat_alloc_user_space(sizeof(arg64));
694         int err;
695         u32 cp;
696
697         err = 0;
698         err |=
699             copy_from_user(&arg64.LUN_info, &arg32->LUN_info,
700                            sizeof(arg64.LUN_info));
701         err |=
702             copy_from_user(&arg64.Request, &arg32->Request,
703                            sizeof(arg64.Request));
704         err |=
705             copy_from_user(&arg64.error_info, &arg32->error_info,
706                            sizeof(arg64.error_info));
707         err |= get_user(arg64.buf_size, &arg32->buf_size);
708         err |= get_user(arg64.malloc_size, &arg32->malloc_size);
709         err |= get_user(cp, &arg32->buf);
710         arg64.buf = compat_ptr(cp);
711         err |= copy_to_user(p, &arg64, sizeof(arg64));
712
713         if (err)
714                 return -EFAULT;
715
716         err = do_ioctl(bdev, mode, CCISS_BIG_PASSTHRU, (unsigned long)p);
717         if (err)
718                 return err;
719         err |=
720             copy_in_user(&arg32->error_info, &p->error_info,
721                          sizeof(arg32->error_info));
722         if (err)
723                 return -EFAULT;
724         return err;
725 }
726 #endif
727
728 static int cciss_getgeo(struct block_device *bdev, struct hd_geometry *geo)
729 {
730         drive_info_struct *drv = get_drv(bdev->bd_disk);
731
732         if (!drv->cylinders)
733                 return -ENXIO;
734
735         geo->heads = drv->heads;
736         geo->sectors = drv->sectors;
737         geo->cylinders = drv->cylinders;
738         return 0;
739 }
740
741 static void check_ioctl_unit_attention(ctlr_info_t *host, CommandList_struct *c)
742 {
743         if (c->err_info->CommandStatus == CMD_TARGET_STATUS &&
744                         c->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION)
745                 (void)check_for_unit_attention(host, c);
746 }
747 /*
748  * ioctl
749  */
750 static int cciss_ioctl(struct block_device *bdev, fmode_t mode,
751                        unsigned int cmd, unsigned long arg)
752 {
753         struct gendisk *disk = bdev->bd_disk;
754         ctlr_info_t *host = get_host(disk);
755         drive_info_struct *drv = get_drv(disk);
756         int ctlr = host->ctlr;
757         void __user *argp = (void __user *)arg;
758
759 #ifdef CCISS_DEBUG
760         printk(KERN_DEBUG "cciss_ioctl: Called with cmd=%x %lx\n", cmd, arg);
761 #endif                          /* CCISS_DEBUG */
762
763         switch (cmd) {
764         case CCISS_GETPCIINFO:
765                 {
766                         cciss_pci_info_struct pciinfo;
767
768                         if (!arg)
769                                 return -EINVAL;
770                         pciinfo.domain = pci_domain_nr(host->pdev->bus);
771                         pciinfo.bus = host->pdev->bus->number;
772                         pciinfo.dev_fn = host->pdev->devfn;
773                         pciinfo.board_id = host->board_id;
774                         if (copy_to_user
775                             (argp, &pciinfo, sizeof(cciss_pci_info_struct)))
776                                 return -EFAULT;
777                         return 0;
778                 }
779         case CCISS_GETINTINFO:
780                 {
781                         cciss_coalint_struct intinfo;
782                         if (!arg)
783                                 return -EINVAL;
784                         intinfo.delay =
785                             readl(&host->cfgtable->HostWrite.CoalIntDelay);
786                         intinfo.count =
787                             readl(&host->cfgtable->HostWrite.CoalIntCount);
788                         if (copy_to_user
789                             (argp, &intinfo, sizeof(cciss_coalint_struct)))
790                                 return -EFAULT;
791                         return 0;
792                 }
793         case CCISS_SETINTINFO:
794                 {
795                         cciss_coalint_struct intinfo;
796                         unsigned long flags;
797                         int i;
798
799                         if (!arg)
800                                 return -EINVAL;
801                         if (!capable(CAP_SYS_ADMIN))
802                                 return -EPERM;
803                         if (copy_from_user
804                             (&intinfo, argp, sizeof(cciss_coalint_struct)))
805                                 return -EFAULT;
806                         if ((intinfo.delay == 0) && (intinfo.count == 0))
807                         {
808 //                      printk("cciss_ioctl: delay and count cannot be 0\n");
809                                 return -EINVAL;
810                         }
811                         spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
812                         /* Update the field, and then ring the doorbell */
813                         writel(intinfo.delay,
814                                &(host->cfgtable->HostWrite.CoalIntDelay));
815                         writel(intinfo.count,
816                                &(host->cfgtable->HostWrite.CoalIntCount));
817                         writel(CFGTBL_ChangeReq, host->vaddr + SA5_DOORBELL);
818
819                         for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) {
820                                 if (!(readl(host->vaddr + SA5_DOORBELL)
821                                       & CFGTBL_ChangeReq))
822                                         break;
823                                 /* delay and try again */
824                                 udelay(1000);
825                         }
826                         spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
827                         if (i >= MAX_IOCTL_CONFIG_WAIT)
828                                 return -EAGAIN;
829                         return 0;
830                 }
831         case CCISS_GETNODENAME:
832                 {
833                         NodeName_type NodeName;
834                         int i;
835
836                         if (!arg)
837                                 return -EINVAL;
838                         for (i = 0; i < 16; i++)
839                                 NodeName[i] =
840                                     readb(&host->cfgtable->ServerName[i]);
841                         if (copy_to_user(argp, NodeName, sizeof(NodeName_type)))
842                                 return -EFAULT;
843                         return 0;
844                 }
845         case CCISS_SETNODENAME:
846                 {
847                         NodeName_type NodeName;
848                         unsigned long flags;
849                         int i;
850
851                         if (!arg)
852                                 return -EINVAL;
853                         if (!capable(CAP_SYS_ADMIN))
854                                 return -EPERM;
855
856                         if (copy_from_user
857                             (NodeName, argp, sizeof(NodeName_type)))
858                                 return -EFAULT;
859
860                         spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
861
862                         /* Update the field, and then ring the doorbell */
863                         for (i = 0; i < 16; i++)
864                                 writeb(NodeName[i],
865                                        &host->cfgtable->ServerName[i]);
866
867                         writel(CFGTBL_ChangeReq, host->vaddr + SA5_DOORBELL);
868
869                         for (i = 0; i < MAX_IOCTL_CONFIG_WAIT; i++) {
870                                 if (!(readl(host->vaddr + SA5_DOORBELL)
871                                       & CFGTBL_ChangeReq))
872                                         break;
873                                 /* delay and try again */
874                                 udelay(1000);
875                         }
876                         spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
877                         if (i >= MAX_IOCTL_CONFIG_WAIT)
878                                 return -EAGAIN;
879                         return 0;
880                 }
881
882         case CCISS_GETHEARTBEAT:
883                 {
884                         Heartbeat_type heartbeat;
885
886                         if (!arg)
887                                 return -EINVAL;
888                         heartbeat = readl(&host->cfgtable->HeartBeat);
889                         if (copy_to_user
890                             (argp, &heartbeat, sizeof(Heartbeat_type)))
891                                 return -EFAULT;
892                         return 0;
893                 }
894         case CCISS_GETBUSTYPES:
895                 {
896                         BusTypes_type BusTypes;
897
898                         if (!arg)
899                                 return -EINVAL;
900                         BusTypes = readl(&host->cfgtable->BusTypes);
901                         if (copy_to_user
902                             (argp, &BusTypes, sizeof(BusTypes_type)))
903                                 return -EFAULT;
904                         return 0;
905                 }
906         case CCISS_GETFIRMVER:
907                 {
908                         FirmwareVer_type firmware;
909
910                         if (!arg)
911                                 return -EINVAL;
912                         memcpy(firmware, host->firm_ver, 4);
913
914                         if (copy_to_user
915                             (argp, firmware, sizeof(FirmwareVer_type)))
916                                 return -EFAULT;
917                         return 0;
918                 }
919         case CCISS_GETDRIVVER:
920                 {
921                         DriverVer_type DriverVer = DRIVER_VERSION;
922
923                         if (!arg)
924                                 return -EINVAL;
925
926                         if (copy_to_user
927                             (argp, &DriverVer, sizeof(DriverVer_type)))
928                                 return -EFAULT;
929                         return 0;
930                 }
931
932         case CCISS_DEREGDISK:
933         case CCISS_REGNEWD:
934         case CCISS_REVALIDVOLS:
935                 return rebuild_lun_table(host, 0);
936
937         case CCISS_GETLUNINFO:{
938                         LogvolInfo_struct luninfo;
939
940                         luninfo.LunID = drv->LunID;
941                         luninfo.num_opens = drv->usage_count;
942                         luninfo.num_parts = 0;
943                         if (copy_to_user(argp, &luninfo,
944                                          sizeof(LogvolInfo_struct)))
945                                 return -EFAULT;
946                         return 0;
947                 }
948         case CCISS_PASSTHRU:
949                 {
950                         IOCTL_Command_struct iocommand;
951                         CommandList_struct *c;
952                         char *buff = NULL;
953                         u64bit temp64;
954                         unsigned long flags;
955                         DECLARE_COMPLETION_ONSTACK(wait);
956
957                         if (!arg)
958                                 return -EINVAL;
959
960                         if (!capable(CAP_SYS_RAWIO))
961                                 return -EPERM;
962
963                         if (copy_from_user
964                             (&iocommand, argp, sizeof(IOCTL_Command_struct)))
965                                 return -EFAULT;
966                         if ((iocommand.buf_size < 1) &&
967                             (iocommand.Request.Type.Direction != XFER_NONE)) {
968                                 return -EINVAL;
969                         }
970 #if 0                           /* 'buf_size' member is 16-bits, and always smaller than kmalloc limit */
971                         /* Check kmalloc limits */
972                         if (iocommand.buf_size > 128000)
973                                 return -EINVAL;
974 #endif
975                         if (iocommand.buf_size > 0) {
976                                 buff = kmalloc(iocommand.buf_size, GFP_KERNEL);
977                                 if (buff == NULL)
978                                         return -EFAULT;
979                         }
980                         if (iocommand.Request.Type.Direction == XFER_WRITE) {
981                                 /* Copy the data into the buffer we created */
982                                 if (copy_from_user
983                                     (buff, iocommand.buf, iocommand.buf_size)) {
984                                         kfree(buff);
985                                         return -EFAULT;
986                                 }
987                         } else {
988                                 memset(buff, 0, iocommand.buf_size);
989                         }
990                         if ((c = cmd_alloc(host, 0)) == NULL) {
991                                 kfree(buff);
992                                 return -ENOMEM;
993                         }
994                         // Fill in the command type
995                         c->cmd_type = CMD_IOCTL_PEND;
996                         // Fill in Command Header
997                         c->Header.ReplyQueue = 0;       // unused in simple mode
998                         if (iocommand.buf_size > 0)     // buffer to fill
999                         {
1000                                 c->Header.SGList = 1;
1001                                 c->Header.SGTotal = 1;
1002                         } else  // no buffers to fill
1003                         {
1004                                 c->Header.SGList = 0;
1005                                 c->Header.SGTotal = 0;
1006                         }
1007                         c->Header.LUN = iocommand.LUN_info;
1008                         c->Header.Tag.lower = c->busaddr;       // use the kernel address the cmd block for tag
1009
1010                         // Fill in Request block
1011                         c->Request = iocommand.Request;
1012
1013                         // Fill in the scatter gather information
1014                         if (iocommand.buf_size > 0) {
1015                                 temp64.val = pci_map_single(host->pdev, buff,
1016                                         iocommand.buf_size,
1017                                         PCI_DMA_BIDIRECTIONAL);
1018                                 c->SG[0].Addr.lower = temp64.val32.lower;
1019                                 c->SG[0].Addr.upper = temp64.val32.upper;
1020                                 c->SG[0].Len = iocommand.buf_size;
1021                                 c->SG[0].Ext = 0;       // we are not chaining
1022                         }
1023                         c->waiting = &wait;
1024
1025                         /* Put the request on the tail of the request queue */
1026                         spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
1027                         addQ(&host->reqQ, c);
1028                         host->Qdepth++;
1029                         start_io(host);
1030                         spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
1031
1032                         wait_for_completion(&wait);
1033
1034                         /* unlock the buffers from DMA */
1035                         temp64.val32.lower = c->SG[0].Addr.lower;
1036                         temp64.val32.upper = c->SG[0].Addr.upper;
1037                         pci_unmap_single(host->pdev, (dma_addr_t) temp64.val,
1038                                          iocommand.buf_size,
1039                                          PCI_DMA_BIDIRECTIONAL);
1040
1041                         check_ioctl_unit_attention(host, c);
1042
1043                         /* Copy the error information out */
1044                         iocommand.error_info = *(c->err_info);
1045                         if (copy_to_user
1046                             (argp, &iocommand, sizeof(IOCTL_Command_struct))) {
1047                                 kfree(buff);
1048                                 cmd_free(host, c, 0);
1049                                 return -EFAULT;
1050                         }
1051
1052                         if (iocommand.Request.Type.Direction == XFER_READ) {
1053                                 /* Copy the data out of the buffer we created */
1054                                 if (copy_to_user
1055                                     (iocommand.buf, buff, iocommand.buf_size)) {
1056                                         kfree(buff);
1057                                         cmd_free(host, c, 0);
1058                                         return -EFAULT;
1059                                 }
1060                         }
1061                         kfree(buff);
1062                         cmd_free(host, c, 0);
1063                         return 0;
1064                 }
1065         case CCISS_BIG_PASSTHRU:{
1066                         BIG_IOCTL_Command_struct *ioc;
1067                         CommandList_struct *c;
1068                         unsigned char **buff = NULL;
1069                         int *buff_size = NULL;
1070                         u64bit temp64;
1071                         unsigned long flags;
1072                         BYTE sg_used = 0;
1073                         int status = 0;
1074                         int i;
1075                         DECLARE_COMPLETION_ONSTACK(wait);
1076                         __u32 left;
1077                         __u32 sz;
1078                         BYTE __user *data_ptr;
1079
1080                         if (!arg)
1081                                 return -EINVAL;
1082                         if (!capable(CAP_SYS_RAWIO))
1083                                 return -EPERM;
1084                         ioc = (BIG_IOCTL_Command_struct *)
1085                             kmalloc(sizeof(*ioc), GFP_KERNEL);
1086                         if (!ioc) {
1087                                 status = -ENOMEM;
1088                                 goto cleanup1;
1089                         }
1090                         if (copy_from_user(ioc, argp, sizeof(*ioc))) {
1091                                 status = -EFAULT;
1092                                 goto cleanup1;
1093                         }
1094                         if ((ioc->buf_size < 1) &&
1095                             (ioc->Request.Type.Direction != XFER_NONE)) {
1096                                 status = -EINVAL;
1097                                 goto cleanup1;
1098                         }
1099                         /* Check kmalloc limits  using all SGs */
1100                         if (ioc->malloc_size > MAX_KMALLOC_SIZE) {
1101                                 status = -EINVAL;
1102                                 goto cleanup1;
1103                         }
1104                         if (ioc->buf_size > ioc->malloc_size * MAXSGENTRIES) {
1105                                 status = -EINVAL;
1106                                 goto cleanup1;
1107                         }
1108                         buff =
1109                             kzalloc(MAXSGENTRIES * sizeof(char *), GFP_KERNEL);
1110                         if (!buff) {
1111                                 status = -ENOMEM;
1112                                 goto cleanup1;
1113                         }
1114                         buff_size = kmalloc(MAXSGENTRIES * sizeof(int),
1115                                                    GFP_KERNEL);
1116                         if (!buff_size) {
1117                                 status = -ENOMEM;
1118                                 goto cleanup1;
1119                         }
1120                         left = ioc->buf_size;
1121                         data_ptr = ioc->buf;
1122                         while (left) {
1123                                 sz = (left >
1124                                       ioc->malloc_size) ? ioc->
1125                                     malloc_size : left;
1126                                 buff_size[sg_used] = sz;
1127                                 buff[sg_used] = kmalloc(sz, GFP_KERNEL);
1128                                 if (buff[sg_used] == NULL) {
1129                                         status = -ENOMEM;
1130                                         goto cleanup1;
1131                                 }
1132                                 if (ioc->Request.Type.Direction == XFER_WRITE) {
1133                                         if (copy_from_user
1134                                             (buff[sg_used], data_ptr, sz)) {
1135                                                 status = -EFAULT;
1136                                                 goto cleanup1;
1137                                         }
1138                                 } else {
1139                                         memset(buff[sg_used], 0, sz);
1140                                 }
1141                                 left -= sz;
1142                                 data_ptr += sz;
1143                                 sg_used++;
1144                         }
1145                         if ((c = cmd_alloc(host, 0)) == NULL) {
1146                                 status = -ENOMEM;
1147                                 goto cleanup1;
1148                         }
1149                         c->cmd_type = CMD_IOCTL_PEND;
1150                         c->Header.ReplyQueue = 0;
1151
1152                         if (ioc->buf_size > 0) {
1153                                 c->Header.SGList = sg_used;
1154                                 c->Header.SGTotal = sg_used;
1155                         } else {
1156                                 c->Header.SGList = 0;
1157                                 c->Header.SGTotal = 0;
1158                         }
1159                         c->Header.LUN = ioc->LUN_info;
1160                         c->Header.Tag.lower = c->busaddr;
1161
1162                         c->Request = ioc->Request;
1163                         if (ioc->buf_size > 0) {
1164                                 int i;
1165                                 for (i = 0; i < sg_used; i++) {
1166                                         temp64.val =
1167                                             pci_map_single(host->pdev, buff[i],
1168                                                     buff_size[i],
1169                                                     PCI_DMA_BIDIRECTIONAL);
1170                                         c->SG[i].Addr.lower =
1171                                             temp64.val32.lower;
1172                                         c->SG[i].Addr.upper =
1173                                             temp64.val32.upper;
1174                                         c->SG[i].Len = buff_size[i];
1175                                         c->SG[i].Ext = 0;       /* we are not chaining */
1176                                 }
1177                         }
1178                         c->waiting = &wait;
1179                         /* Put the request on the tail of the request queue */
1180                         spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
1181                         addQ(&host->reqQ, c);
1182                         host->Qdepth++;
1183                         start_io(host);
1184                         spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
1185                         wait_for_completion(&wait);
1186                         /* unlock the buffers from DMA */
1187                         for (i = 0; i < sg_used; i++) {
1188                                 temp64.val32.lower = c->SG[i].Addr.lower;
1189                                 temp64.val32.upper = c->SG[i].Addr.upper;
1190                                 pci_unmap_single(host->pdev,
1191                                         (dma_addr_t) temp64.val, buff_size[i],
1192                                         PCI_DMA_BIDIRECTIONAL);
1193                         }
1194                         check_ioctl_unit_attention(host, c);
1195                         /* Copy the error information out */
1196                         ioc->error_info = *(c->err_info);
1197                         if (copy_to_user(argp, ioc, sizeof(*ioc))) {
1198                                 cmd_free(host, c, 0);
1199                                 status = -EFAULT;
1200                                 goto cleanup1;
1201                         }
1202                         if (ioc->Request.Type.Direction == XFER_READ) {
1203                                 /* Copy the data out of the buffer we created */
1204                                 BYTE __user *ptr = ioc->buf;
1205                                 for (i = 0; i < sg_used; i++) {
1206                                         if (copy_to_user
1207                                             (ptr, buff[i], buff_size[i])) {
1208                                                 cmd_free(host, c, 0);
1209                                                 status = -EFAULT;
1210                                                 goto cleanup1;
1211                                         }
1212                                         ptr += buff_size[i];
1213                                 }
1214                         }
1215                         cmd_free(host, c, 0);
1216                         status = 0;
1217                       cleanup1:
1218                         if (buff) {
1219                                 for (i = 0; i < sg_used; i++)
1220                                         kfree(buff[i]);
1221                                 kfree(buff);
1222                         }
1223                         kfree(buff_size);
1224                         kfree(ioc);
1225                         return status;
1226                 }
1227
1228         /* scsi_cmd_ioctl handles these, below, though some are not */
1229         /* very meaningful for cciss.  SG_IO is the main one people want. */
1230
1231         case SG_GET_VERSION_NUM:
1232         case SG_SET_TIMEOUT:
1233         case SG_GET_TIMEOUT:
1234         case SG_GET_RESERVED_SIZE:
1235         case SG_SET_RESERVED_SIZE:
1236         case SG_EMULATED_HOST:
1237         case SG_IO:
1238         case SCSI_IOCTL_SEND_COMMAND:
1239                 return scsi_cmd_ioctl(disk->queue, disk, mode, cmd, argp);
1240
1241         /* scsi_cmd_ioctl would normally handle these, below, but */
1242         /* they aren't a good fit for cciss, as CD-ROMs are */
1243         /* not supported, and we don't have any bus/target/lun */
1244         /* which we present to the kernel. */
1245
1246         case CDROM_SEND_PACKET:
1247         case CDROMCLOSETRAY:
1248         case CDROMEJECT:
1249         case SCSI_IOCTL_GET_IDLUN:
1250         case SCSI_IOCTL_GET_BUS_NUMBER:
1251         default:
1252                 return -ENOTTY;
1253         }
1254 }
1255
1256 static void cciss_check_queues(ctlr_info_t *h)
1257 {
1258         int start_queue = h->next_to_run;
1259         int i;
1260
1261         /* check to see if we have maxed out the number of commands that can
1262          * be placed on the queue.  If so then exit.  We do this check here
1263          * in case the interrupt we serviced was from an ioctl and did not
1264          * free any new commands.
1265          */
1266         if ((find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds)) == h->nr_cmds)
1267                 return;
1268
1269         /* We have room on the queue for more commands.  Now we need to queue
1270          * them up.  We will also keep track of the next queue to run so
1271          * that every queue gets a chance to be started first.
1272          */
1273         for (i = 0; i < h->highest_lun + 1; i++) {
1274                 int curr_queue = (start_queue + i) % (h->highest_lun + 1);
1275                 /* make sure the disk has been added and the drive is real
1276                  * because this can be called from the middle of init_one.
1277                  */
1278                 if (!(h->drv[curr_queue].queue) || !(h->drv[curr_queue].heads))
1279                         continue;
1280                 blk_start_queue(h->gendisk[curr_queue]->queue);
1281
1282                 /* check to see if we have maxed out the number of commands
1283                  * that can be placed on the queue.
1284                  */
1285                 if ((find_first_zero_bit(h->cmd_pool_bits, h->nr_cmds)) == h->nr_cmds) {
1286                         if (curr_queue == start_queue) {
1287                                 h->next_to_run =
1288                                     (start_queue + 1) % (h->highest_lun + 1);
1289                                 break;
1290                         } else {
1291                                 h->next_to_run = curr_queue;
1292                                 break;
1293                         }
1294                 }
1295         }
1296 }
1297
1298 static void cciss_softirq_done(struct request *rq)
1299 {
1300         CommandList_struct *cmd = rq->completion_data;
1301         ctlr_info_t *h = hba[cmd->ctlr];
1302         unsigned long flags;
1303         u64bit temp64;
1304         int i, ddir;
1305
1306         if (cmd->Request.Type.Direction == XFER_READ)
1307                 ddir = PCI_DMA_FROMDEVICE;
1308         else
1309                 ddir = PCI_DMA_TODEVICE;
1310
1311         /* command did not need to be retried */
1312         /* unmap the DMA mapping for all the scatter gather elements */
1313         for (i = 0; i < cmd->Header.SGList; i++) {
1314                 temp64.val32.lower = cmd->SG[i].Addr.lower;
1315                 temp64.val32.upper = cmd->SG[i].Addr.upper;
1316                 pci_unmap_page(h->pdev, temp64.val, cmd->SG[i].Len, ddir);
1317         }
1318
1319 #ifdef CCISS_DEBUG
1320         printk("Done with %p\n", rq);
1321 #endif                          /* CCISS_DEBUG */
1322
1323         /* set the residual count for pc requests */
1324         if (blk_pc_request(rq))
1325                 rq->resid_len = cmd->err_info->ResidualCnt;
1326
1327         blk_end_request_all(rq, (rq->errors == 0) ? 0 : -EIO);
1328
1329         spin_lock_irqsave(&h->lock, flags);
1330         cmd_free(h, cmd, 1);
1331         cciss_check_queues(h);
1332         spin_unlock_irqrestore(&h->lock, flags);
1333 }
1334
1335 /* This function gets the serial number of a logical drive via
1336  * inquiry page 0x83.  Serial no. is 16 bytes.  If the serial
1337  * number cannot be had, for whatever reason, 16 bytes of 0xff
1338  * are returned instead.
1339  */
1340 static void cciss_get_serial_no(int ctlr, int logvol, int withirq,
1341                                 unsigned char *serial_no, int buflen)
1342 {
1343 #define PAGE_83_INQ_BYTES 64
1344         int rc;
1345         unsigned char *buf;
1346
1347         if (buflen > 16)
1348                 buflen = 16;
1349         memset(serial_no, 0xff, buflen);
1350         buf = kzalloc(PAGE_83_INQ_BYTES, GFP_KERNEL);
1351         if (!buf)
1352                 return;
1353         memset(serial_no, 0, buflen);
1354         if (withirq)
1355                 rc = sendcmd_withirq(CISS_INQUIRY, ctlr, buf,
1356                         PAGE_83_INQ_BYTES, 1, logvol, 0x83, TYPE_CMD);
1357         else
1358                 rc = sendcmd(CISS_INQUIRY, ctlr, buf,
1359                         PAGE_83_INQ_BYTES, 1, logvol, 0x83, NULL, TYPE_CMD);
1360         if (rc == IO_OK)
1361                 memcpy(serial_no, &buf[8], buflen);
1362         kfree(buf);
1363         return;
1364 }
1365
1366 static void cciss_add_disk(ctlr_info_t *h, struct gendisk *disk,
1367                                 int drv_index)
1368 {
1369         disk->queue = blk_init_queue(do_cciss_request, &h->lock);
1370         sprintf(disk->disk_name, "cciss/c%dd%d", h->ctlr, drv_index);
1371         disk->major = h->major;
1372         disk->first_minor = drv_index << NWD_SHIFT;
1373         disk->fops = &cciss_fops;
1374         disk->private_data = &h->drv[drv_index];
1375         disk->driverfs_dev = &h->pdev->dev;
1376
1377         /* Set up queue information */
1378         blk_queue_bounce_limit(disk->queue, h->pdev->dma_mask);
1379
1380         /* This is a hardware imposed limit. */
1381         blk_queue_max_hw_segments(disk->queue, MAXSGENTRIES);
1382
1383         /* This is a limit in the driver and could be eliminated. */
1384         blk_queue_max_phys_segments(disk->queue, MAXSGENTRIES);
1385
1386         blk_queue_max_sectors(disk->queue, h->cciss_max_sectors);
1387
1388         blk_queue_softirq_done(disk->queue, cciss_softirq_done);
1389
1390         disk->queue->queuedata = h;
1391
1392         blk_queue_hardsect_size(disk->queue,
1393                                 h->drv[drv_index].block_size);
1394
1395         /* Make sure all queue data is written out before */
1396         /* setting h->drv[drv_index].queue, as setting this */
1397         /* allows the interrupt handler to start the queue */
1398         wmb();
1399         h->drv[drv_index].queue = disk->queue;
1400         add_disk(disk);
1401 }
1402
1403 /* This function will check the usage_count of the drive to be updated/added.
1404  * If the usage_count is zero and it is a heretofore unknown drive, or,
1405  * the drive's capacity, geometry, or serial number has changed,
1406  * then the drive information will be updated and the disk will be
1407  * re-registered with the kernel.  If these conditions don't hold,
1408  * then it will be left alone for the next reboot.  The exception to this
1409  * is disk 0 which will always be left registered with the kernel since it
1410  * is also the controller node.  Any changes to disk 0 will show up on
1411  * the next reboot.
1412  */
1413 static void cciss_update_drive_info(int ctlr, int drv_index, int first_time)
1414 {
1415         ctlr_info_t *h = hba[ctlr];
1416         struct gendisk *disk;
1417         InquiryData_struct *inq_buff = NULL;
1418         unsigned int block_size;
1419         sector_t total_size;
1420         unsigned long flags = 0;
1421         int ret = 0;
1422         drive_info_struct *drvinfo;
1423         int was_only_controller_node;
1424
1425         /* Get information about the disk and modify the driver structure */
1426         inq_buff = kmalloc(sizeof(InquiryData_struct), GFP_KERNEL);
1427         drvinfo = kmalloc(sizeof(*drvinfo), GFP_KERNEL);
1428         if (inq_buff == NULL || drvinfo == NULL)
1429                 goto mem_msg;
1430
1431         /* See if we're trying to update the "controller node"
1432          * this will happen the when the first logical drive gets
1433          * created by ACU.
1434          */
1435         was_only_controller_node = (drv_index == 0 &&
1436                                 h->drv[0].raid_level == -1);
1437
1438         /* testing to see if 16-byte CDBs are already being used */
1439         if (h->cciss_read == CCISS_READ_16) {
1440                 cciss_read_capacity_16(h->ctlr, drv_index, 1,
1441                         &total_size, &block_size);
1442
1443         } else {
1444                 cciss_read_capacity(ctlr, drv_index, 1,
1445                                     &total_size, &block_size);
1446
1447                 /* if read_capacity returns all F's this volume is >2TB */
1448                 /* in size so we switch to 16-byte CDB's for all */
1449                 /* read/write ops */
1450                 if (total_size == 0xFFFFFFFFULL) {
1451                         cciss_read_capacity_16(ctlr, drv_index, 1,
1452                         &total_size, &block_size);
1453                         h->cciss_read = CCISS_READ_16;
1454                         h->cciss_write = CCISS_WRITE_16;
1455                 } else {
1456                         h->cciss_read = CCISS_READ_10;
1457                         h->cciss_write = CCISS_WRITE_10;
1458                 }
1459         }
1460
1461         cciss_geometry_inquiry(ctlr, drv_index, 1, total_size, block_size,
1462                                inq_buff, drvinfo);
1463         drvinfo->block_size = block_size;
1464         drvinfo->nr_blocks = total_size + 1;
1465
1466         cciss_get_serial_no(ctlr, drv_index, 1, drvinfo->serial_no,
1467                         sizeof(drvinfo->serial_no));
1468
1469         /* Is it the same disk we already know, and nothing's changed? */
1470         if (h->drv[drv_index].raid_level != -1 &&
1471                 ((memcmp(drvinfo->serial_no,
1472                                 h->drv[drv_index].serial_no, 16) == 0) &&
1473                 drvinfo->block_size == h->drv[drv_index].block_size &&
1474                 drvinfo->nr_blocks == h->drv[drv_index].nr_blocks &&
1475                 drvinfo->heads == h->drv[drv_index].heads &&
1476                 drvinfo->sectors == h->drv[drv_index].sectors &&
1477                 drvinfo->cylinders == h->drv[drv_index].cylinders))
1478                         /* The disk is unchanged, nothing to update */
1479                         goto freeret;
1480
1481         /* If we get here it's not the same disk, or something's changed,
1482          * so we need to * deregister it, and re-register it, if it's not
1483          * in use.
1484          * If the disk already exists then deregister it before proceeding
1485          * (unless it's the first disk (for the controller node).
1486          */
1487         if (h->drv[drv_index].raid_level != -1 && drv_index != 0) {
1488                 printk(KERN_WARNING "disk %d has changed.\n", drv_index);
1489                 spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
1490                 h->drv[drv_index].busy_configuring = 1;
1491                 spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
1492
1493                 /* deregister_disk sets h->drv[drv_index].queue = NULL
1494                  * which keeps the interrupt handler from starting
1495                  * the queue.
1496                  */
1497                 ret = deregister_disk(h, drv_index, 0);
1498                 h->drv[drv_index].busy_configuring = 0;
1499         }
1500
1501         /* If the disk is in use return */
1502         if (ret)
1503                 goto freeret;
1504
1505         /* Save the new information from cciss_geometry_inquiry
1506          * and serial number inquiry.
1507          */
1508         h->drv[drv_index].block_size = drvinfo->block_size;
1509         h->drv[drv_index].nr_blocks = drvinfo->nr_blocks;
1510         h->drv[drv_index].heads = drvinfo->heads;
1511         h->drv[drv_index].sectors = drvinfo->sectors;
1512         h->drv[drv_index].cylinders = drvinfo->cylinders;
1513         h->drv[drv_index].raid_level = drvinfo->raid_level;
1514         memcpy(h->drv[drv_index].serial_no, drvinfo->serial_no, 16);
1515
1516         ++h->num_luns;
1517         disk = h->gendisk[drv_index];
1518         set_capacity(disk, h->drv[drv_index].nr_blocks);
1519
1520         /* If it's not disk 0 (drv_index != 0)
1521          * or if it was disk 0, but there was previously
1522          * no actual corresponding configured logical drive
1523          * (raid_leve == -1) then we want to update the
1524          * logical drive's information.
1525          */
1526         if (drv_index || first_time)
1527                 cciss_add_disk(h, disk, drv_index);
1528
1529 freeret:
1530         kfree(inq_buff);
1531         kfree(drvinfo);
1532         return;
1533 mem_msg:
1534         printk(KERN_ERR "cciss: out of memory\n");
1535         goto freeret;
1536 }
1537
1538 /* This function will find the first index of the controllers drive array
1539  * that has a -1 for the raid_level and will return that index.  This is
1540  * where new drives will be added.  If the index to be returned is greater
1541  * than the highest_lun index for the controller then highest_lun is set
1542  * to this new index.  If there are no available indexes then -1 is returned.
1543  * "controller_node" is used to know if this is a real logical drive, or just
1544  * the controller node, which determines if this counts towards highest_lun.
1545  */
1546 static int cciss_find_free_drive_index(int ctlr, int controller_node)
1547 {
1548         int i;
1549
1550         for (i = 0; i < CISS_MAX_LUN; i++) {
1551                 if (hba[ctlr]->drv[i].raid_level == -1) {
1552                         if (i > hba[ctlr]->highest_lun)
1553                                 if (!controller_node)
1554                                         hba[ctlr]->highest_lun = i;
1555                         return i;
1556                 }
1557         }
1558         return -1;
1559 }
1560
1561 /* cciss_add_gendisk finds a free hba[]->drv structure
1562  * and allocates a gendisk if needed, and sets the lunid
1563  * in the drvinfo structure.   It returns the index into
1564  * the ->drv[] array, or -1 if none are free.
1565  * is_controller_node indicates whether highest_lun should
1566  * count this disk, or if it's only being added to provide
1567  * a means to talk to the controller in case no logical
1568  * drives have yet been configured.
1569  */
1570 static int cciss_add_gendisk(ctlr_info_t *h, __u32 lunid, int controller_node)
1571 {
1572         int drv_index;
1573
1574         drv_index = cciss_find_free_drive_index(h->ctlr, controller_node);
1575         if (drv_index == -1)
1576                 return -1;
1577         /*Check if the gendisk needs to be allocated */
1578         if (!h->gendisk[drv_index]) {
1579                 h->gendisk[drv_index] =
1580                         alloc_disk(1 << NWD_SHIFT);
1581                 if (!h->gendisk[drv_index]) {
1582                         printk(KERN_ERR "cciss%d: could not "
1583                                 "allocate a new disk %d\n",
1584                                 h->ctlr, drv_index);
1585                         return -1;
1586                 }
1587         }
1588         h->drv[drv_index].LunID = lunid;
1589
1590         /* Don't need to mark this busy because nobody */
1591         /* else knows about this disk yet to contend */
1592         /* for access to it. */
1593         h->drv[drv_index].busy_configuring = 0;
1594         wmb();
1595         return drv_index;
1596 }
1597
1598 /* This is for the special case of a controller which
1599  * has no logical drives.  In this case, we still need
1600  * to register a disk so the controller can be accessed
1601  * by the Array Config Utility.
1602  */
1603 static void cciss_add_controller_node(ctlr_info_t *h)
1604 {
1605         struct gendisk *disk;
1606         int drv_index;
1607
1608         if (h->gendisk[0] != NULL) /* already did this? Then bail. */
1609                 return;
1610
1611         drv_index = cciss_add_gendisk(h, 0, 1);
1612         if (drv_index == -1) {
1613                 printk(KERN_WARNING "cciss%d: could not "
1614                         "add disk 0.\n", h->ctlr);
1615                 return;
1616         }
1617         h->drv[drv_index].block_size = 512;
1618         h->drv[drv_index].nr_blocks = 0;
1619         h->drv[drv_index].heads = 0;
1620         h->drv[drv_index].sectors = 0;
1621         h->drv[drv_index].cylinders = 0;
1622         h->drv[drv_index].raid_level = -1;
1623         memset(h->drv[drv_index].serial_no, 0, 16);
1624         disk = h->gendisk[drv_index];
1625         cciss_add_disk(h, disk, drv_index);
1626 }
1627
1628 /* This function will add and remove logical drives from the Logical
1629  * drive array of the controller and maintain persistency of ordering
1630  * so that mount points are preserved until the next reboot.  This allows
1631  * for the removal of logical drives in the middle of the drive array
1632  * without a re-ordering of those drives.
1633  * INPUT
1634  * h            = The controller to perform the operations on
1635  */
1636 static int rebuild_lun_table(ctlr_info_t *h, int first_time)
1637 {
1638         int ctlr = h->ctlr;
1639         int num_luns;
1640         ReportLunData_struct *ld_buff = NULL;
1641         int return_code;
1642         int listlength = 0;
1643         int i;
1644         int drv_found;
1645         int drv_index = 0;
1646         __u32 lunid = 0;
1647         unsigned long flags;
1648
1649         if (!capable(CAP_SYS_RAWIO))
1650                 return -EPERM;
1651
1652         /* Set busy_configuring flag for this operation */
1653         spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
1654         if (h->busy_configuring) {
1655                 spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
1656                 return -EBUSY;
1657         }
1658         h->busy_configuring = 1;
1659         spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
1660
1661         ld_buff = kzalloc(sizeof(ReportLunData_struct), GFP_KERNEL);
1662         if (ld_buff == NULL)
1663                 goto mem_msg;
1664
1665         return_code = sendcmd_withirq(CISS_REPORT_LOG, ctlr, ld_buff,
1666                                       sizeof(ReportLunData_struct), 0,
1667                                       0, 0, TYPE_CMD);
1668
1669         if (return_code == IO_OK)
1670                 listlength = be32_to_cpu(*(__be32 *) ld_buff->LUNListLength);
1671         else {  /* reading number of logical volumes failed */
1672                 printk(KERN_WARNING "cciss: report logical volume"
1673                        " command failed\n");
1674                 listlength = 0;
1675                 goto freeret;
1676         }
1677
1678         num_luns = listlength / 8;      /* 8 bytes per entry */
1679         if (num_luns > CISS_MAX_LUN) {
1680                 num_luns = CISS_MAX_LUN;
1681                 printk(KERN_WARNING "cciss: more luns configured"
1682                        " on controller than can be handled by"
1683                        " this driver.\n");
1684         }
1685
1686         if (num_luns == 0)
1687                 cciss_add_controller_node(h);
1688
1689         /* Compare controller drive array to driver's drive array
1690          * to see if any drives are missing on the controller due
1691          * to action of Array Config Utility (user deletes drive)
1692          * and deregister logical drives which have disappeared.
1693          */
1694         for (i = 0; i <= h->highest_lun; i++) {
1695                 int j;
1696                 drv_found = 0;
1697
1698                 /* skip holes in the array from already deleted drives */
1699                 if (h->drv[i].raid_level == -1)
1700                         continue;
1701
1702                 for (j = 0; j < num_luns; j++) {
1703                         memcpy(&lunid, &ld_buff->LUN[j][0], 4);
1704                         lunid = le32_to_cpu(lunid);
1705                         if (h->drv[i].LunID == lunid) {
1706                                 drv_found = 1;
1707                                 break;
1708                         }
1709                 }
1710                 if (!drv_found) {
1711                         /* Deregister it from the OS, it's gone. */
1712                         spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
1713                         h->drv[i].busy_configuring = 1;
1714                         spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
1715                         return_code = deregister_disk(h, i, 1);
1716                         h->drv[i].busy_configuring = 0;
1717                 }
1718         }
1719
1720         /* Compare controller drive array to driver's drive array.
1721          * Check for updates in the drive information and any new drives
1722          * on the controller due to ACU adding logical drives, or changing
1723          * a logical drive's size, etc.  Reregister any new/changed drives
1724          */
1725         for (i = 0; i < num_luns; i++) {
1726                 int j;
1727
1728                 drv_found = 0;
1729
1730                 memcpy(&lunid, &ld_buff->LUN[i][0], 4);
1731                 lunid = le32_to_cpu(lunid);
1732
1733                 /* Find if the LUN is already in the drive array
1734                  * of the driver.  If so then update its info
1735                  * if not in use.  If it does not exist then find
1736                  * the first free index and add it.
1737                  */
1738                 for (j = 0; j <= h->highest_lun; j++) {
1739                         if (h->drv[j].raid_level != -1 &&
1740                                 h->drv[j].LunID == lunid) {
1741                                 drv_index = j;
1742                                 drv_found = 1;
1743                                 break;
1744                         }
1745                 }
1746
1747                 /* check if the drive was found already in the array */
1748                 if (!drv_found) {
1749                         drv_index = cciss_add_gendisk(h, lunid, 0);
1750                         if (drv_index == -1)
1751                                 goto freeret;
1752                 }
1753                 cciss_update_drive_info(ctlr, drv_index, first_time);
1754         }               /* end for */
1755
1756 freeret:
1757         kfree(ld_buff);
1758         h->busy_configuring = 0;
1759         /* We return -1 here to tell the ACU that we have registered/updated
1760          * all of the drives that we can and to keep it from calling us
1761          * additional times.
1762          */
1763         return -1;
1764 mem_msg:
1765         printk(KERN_ERR "cciss: out of memory\n");
1766         h->busy_configuring = 0;
1767         goto freeret;
1768 }
1769
1770 /* This function will deregister the disk and it's queue from the
1771  * kernel.  It must be called with the controller lock held and the
1772  * drv structures busy_configuring flag set.  It's parameters are:
1773  *
1774  * disk = This is the disk to be deregistered
1775  * drv  = This is the drive_info_struct associated with the disk to be
1776  *        deregistered.  It contains information about the disk used
1777  *        by the driver.
1778  * clear_all = This flag determines whether or not the disk information
1779  *             is going to be completely cleared out and the highest_lun
1780  *             reset.  Sometimes we want to clear out information about
1781  *             the disk in preparation for re-adding it.  In this case
1782  *             the highest_lun should be left unchanged and the LunID
1783  *             should not be cleared.
1784 */
1785 static int deregister_disk(ctlr_info_t *h, int drv_index,
1786                            int clear_all)
1787 {
1788         int i;
1789         struct gendisk *disk;
1790         drive_info_struct *drv;
1791
1792         if (!capable(CAP_SYS_RAWIO))
1793                 return -EPERM;
1794
1795         drv = &h->drv[drv_index];
1796         disk = h->gendisk[drv_index];
1797
1798         /* make sure logical volume is NOT is use */
1799         if (clear_all || (h->gendisk[0] == disk)) {
1800                 if (drv->usage_count > 1)
1801                         return -EBUSY;
1802         } else if (drv->usage_count > 0)
1803                 return -EBUSY;
1804
1805         /* invalidate the devices and deregister the disk.  If it is disk
1806          * zero do not deregister it but just zero out it's values.  This
1807          * allows us to delete disk zero but keep the controller registered.
1808          */
1809         if (h->gendisk[0] != disk) {
1810                 struct request_queue *q = disk->queue;
1811                 if (disk->flags & GENHD_FL_UP)
1812                         del_gendisk(disk);
1813                 if (q) {
1814                         blk_cleanup_queue(q);
1815                         /* Set drv->queue to NULL so that we do not try
1816                          * to call blk_start_queue on this queue in the
1817                          * interrupt handler
1818                          */
1819                         drv->queue = NULL;
1820                 }
1821                 /* If clear_all is set then we are deleting the logical
1822                  * drive, not just refreshing its info.  For drives
1823                  * other than disk 0 we will call put_disk.  We do not
1824                  * do this for disk 0 as we need it to be able to
1825                  * configure the controller.
1826                  */
1827                 if (clear_all){
1828                         /* This isn't pretty, but we need to find the
1829                          * disk in our array and NULL our the pointer.
1830                          * This is so that we will call alloc_disk if
1831                          * this index is used again later.
1832                          */
1833                         for (i=0; i < CISS_MAX_LUN; i++){
1834                                 if (h->gendisk[i] == disk) {
1835                                         h->gendisk[i] = NULL;
1836                                         break;
1837                                 }
1838                         }
1839                         put_disk(disk);
1840                 }
1841         } else {
1842                 set_capacity(disk, 0);
1843         }
1844
1845         --h->num_luns;
1846         /* zero out the disk size info */
1847         drv->nr_blocks = 0;
1848         drv->block_size = 0;
1849         drv->heads = 0;
1850         drv->sectors = 0;
1851         drv->cylinders = 0;
1852         drv->raid_level = -1;   /* This can be used as a flag variable to
1853                                  * indicate that this element of the drive
1854                                  * array is free.
1855                                  */
1856
1857         if (clear_all) {
1858                 /* check to see if it was the last disk */
1859                 if (drv == h->drv + h->highest_lun) {
1860                         /* if so, find the new hightest lun */
1861                         int i, newhighest = -1;
1862                         for (i = 0; i <= h->highest_lun; i++) {
1863                                 /* if the disk has size > 0, it is available */
1864                                 if (h->drv[i].heads)
1865                                         newhighest = i;
1866                         }
1867                         h->highest_lun = newhighest;
1868                 }
1869
1870                 drv->LunID = 0;
1871         }
1872         return 0;
1873 }
1874
1875 static int fill_cmd(CommandList_struct *c, __u8 cmd, int ctlr, void *buff, size_t size, unsigned int use_unit_num,      /* 0: address the controller,
1876                                                                                                                            1: address logical volume log_unit,
1877                                                                                                                            2: periph device address is scsi3addr */
1878                     unsigned int log_unit, __u8 page_code,
1879                     unsigned char *scsi3addr, int cmd_type)
1880 {
1881         ctlr_info_t *h = hba[ctlr];
1882         u64bit buff_dma_handle;
1883         int status = IO_OK;
1884
1885         c->cmd_type = CMD_IOCTL_PEND;
1886         c->Header.ReplyQueue = 0;
1887         if (buff != NULL) {
1888                 c->Header.SGList = 1;
1889                 c->Header.SGTotal = 1;
1890         } else {
1891                 c->Header.SGList = 0;
1892                 c->Header.SGTotal = 0;
1893         }
1894         c->Header.Tag.lower = c->busaddr;
1895
1896         c->Request.Type.Type = cmd_type;
1897         if (cmd_type == TYPE_CMD) {
1898                 switch (cmd) {
1899                 case CISS_INQUIRY:
1900                         /* If the logical unit number is 0 then, this is going
1901                            to controller so It's a physical command
1902                            mode = 0 target = 0.  So we have nothing to write.
1903                            otherwise, if use_unit_num == 1,
1904                            mode = 1(volume set addressing) target = LUNID
1905                            otherwise, if use_unit_num == 2,
1906                            mode = 0(periph dev addr) target = scsi3addr */
1907                         if (use_unit_num == 1) {
1908                                 c->Header.LUN.LogDev.VolId =
1909                                     h->drv[log_unit].LunID;
1910                                 c->Header.LUN.LogDev.Mode = 1;
1911                         } else if (use_unit_num == 2) {
1912                                 memcpy(c->Header.LUN.LunAddrBytes, scsi3addr,
1913                                        8);
1914                                 c->Header.LUN.LogDev.Mode = 0;
1915                         }
1916                         /* are we trying to read a vital product page */
1917                         if (page_code != 0) {
1918                                 c->Request.CDB[1] = 0x01;
1919                                 c->Request.CDB[2] = page_code;
1920                         }
1921                         c->Request.CDBLen = 6;
1922                         c->Request.Type.Attribute = ATTR_SIMPLE;
1923                         c->Request.Type.Direction = XFER_READ;
1924                         c->Request.Timeout = 0;
1925                         c->Request.CDB[0] = CISS_INQUIRY;
1926                         c->Request.CDB[4] = size & 0xFF;
1927                         break;
1928                 case CISS_REPORT_LOG:
1929                 case CISS_REPORT_PHYS:
1930                         /* Talking to controller so It's a physical command
1931                            mode = 00 target = 0.  Nothing to write.
1932                          */
1933                         c->Request.CDBLen = 12;
1934                         c->Request.Type.Attribute = ATTR_SIMPLE;
1935                         c->Request.Type.Direction = XFER_READ;
1936                         c->Request.Timeout = 0;
1937                         c->Request.CDB[0] = cmd;
1938                         c->Request.CDB[6] = (size >> 24) & 0xFF;        //MSB
1939                         c->Request.CDB[7] = (size >> 16) & 0xFF;
1940                         c->Request.CDB[8] = (size >> 8) & 0xFF;
1941                         c->Request.CDB[9] = size & 0xFF;
1942                         break;
1943
1944                 case CCISS_READ_CAPACITY:
1945                         c->Header.LUN.LogDev.VolId = h->drv[log_unit].LunID;
1946                         c->Header.LUN.LogDev.Mode = 1;
1947                         c->Request.CDBLen = 10;
1948                         c->Request.Type.Attribute = ATTR_SIMPLE;
1949                         c->Request.Type.Direction = XFER_READ;
1950                         c->Request.Timeout = 0;
1951                         c->Request.CDB[0] = cmd;
1952                         break;
1953                 case CCISS_READ_CAPACITY_16:
1954                         c->Header.LUN.LogDev.VolId = h->drv[log_unit].LunID;
1955                         c->Header.LUN.LogDev.Mode = 1;
1956                         c->Request.CDBLen = 16;
1957                         c->Request.Type.Attribute = ATTR_SIMPLE;
1958                         c->Request.Type.Direction = XFER_READ;
1959                         c->Request.Timeout = 0;
1960                         c->Request.CDB[0] = cmd;
1961                         c->Request.CDB[1] = 0x10;
1962                         c->Request.CDB[10] = (size >> 24) & 0xFF;
1963                         c->Request.CDB[11] = (size >> 16) & 0xFF;
1964                         c->Request.CDB[12] = (size >> 8) & 0xFF;
1965                         c->Request.CDB[13] = size & 0xFF;
1966                         c->Request.Timeout = 0;
1967                         c->Request.CDB[0] = cmd;
1968                         break;
1969                 case CCISS_CACHE_FLUSH:
1970                         c->Request.CDBLen = 12;
1971                         c->Request.Type.Attribute = ATTR_SIMPLE;
1972                         c->Request.Type.Direction = XFER_WRITE;
1973                         c->Request.Timeout = 0;
1974                         c->Request.CDB[0] = BMIC_WRITE;
1975                         c->Request.CDB[6] = BMIC_CACHE_FLUSH;
1976                         break;
1977                 default:
1978                         printk(KERN_WARNING
1979                                "cciss%d:  Unknown Command 0x%c\n", ctlr, cmd);
1980                         return IO_ERROR;
1981                 }
1982         } else if (cmd_type == TYPE_MSG) {
1983                 switch (cmd) {
1984                 case 0: /* ABORT message */
1985                         c->Request.CDBLen = 12;
1986                         c->Request.Type.Attribute = ATTR_SIMPLE;
1987                         c->Request.Type.Direction = XFER_WRITE;
1988                         c->Request.Timeout = 0;
1989                         c->Request.CDB[0] = cmd;        /* abort */
1990                         c->Request.CDB[1] = 0;  /* abort a command */
1991                         /* buff contains the tag of the command to abort */
1992                         memcpy(&c->Request.CDB[4], buff, 8);
1993                         break;
1994                 case 1: /* RESET message */
1995                         c->Request.CDBLen = 12;
1996                         c->Request.Type.Attribute = ATTR_SIMPLE;
1997                         c->Request.Type.Direction = XFER_WRITE;
1998                         c->Request.Timeout = 0;
1999                         memset(&c->Request.CDB[0], 0, sizeof(c->Request.CDB));
2000                         c->Request.CDB[0] = cmd;        /* reset */
2001                         c->Request.CDB[1] = 0x04;       /* reset a LUN */
2002                         break;
2003                 case 3: /* No-Op message */
2004                         c->Request.CDBLen = 1;
2005                         c->Request.Type.Attribute = ATTR_SIMPLE;
2006                         c->Request.Type.Direction = XFER_WRITE;
2007                         c->Request.Timeout = 0;
2008                         c->Request.CDB[0] = cmd;
2009                         break;
2010                 default:
2011                         printk(KERN_WARNING
2012                                "cciss%d: unknown message type %d\n", ctlr, cmd);
2013                         return IO_ERROR;
2014                 }
2015         } else {
2016                 printk(KERN_WARNING
2017                        "cciss%d: unknown command type %d\n", ctlr, cmd_type);
2018                 return IO_ERROR;
2019         }
2020         /* Fill in the scatter gather information */
2021         if (size > 0) {
2022                 buff_dma_handle.val = (__u64) pci_map_single(h->pdev,
2023                                                              buff, size,
2024                                                              PCI_DMA_BIDIRECTIONAL);
2025                 c->SG[0].Addr.lower = buff_dma_handle.val32.lower;
2026                 c->SG[0].Addr.upper = buff_dma_handle.val32.upper;
2027                 c->SG[0].Len = size;
2028                 c->SG[0].Ext = 0;       /* we are not chaining */
2029         }
2030         return status;
2031 }
2032
2033 static int sendcmd_withirq(__u8 cmd,
2034                            int ctlr,
2035                            void *buff,
2036                            size_t size,
2037                            unsigned int use_unit_num,
2038                            unsigned int log_unit, __u8 page_code, int cmd_type)
2039 {
2040         ctlr_info_t *h = hba[ctlr];
2041         CommandList_struct *c;
2042         u64bit buff_dma_handle;
2043         unsigned long flags;
2044         int return_status;
2045         DECLARE_COMPLETION_ONSTACK(wait);
2046
2047         if ((c = cmd_alloc(h, 0)) == NULL)
2048                 return -ENOMEM;
2049         return_status = fill_cmd(c, cmd, ctlr, buff, size, use_unit_num,
2050                                  log_unit, page_code, NULL, cmd_type);
2051         if (return_status != IO_OK) {
2052                 cmd_free(h, c, 0);
2053                 return return_status;
2054         }
2055       resend_cmd2:
2056         c->waiting = &wait;
2057
2058         /* Put the request on the tail of the queue and send it */
2059         spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
2060         addQ(&h->reqQ, c);
2061         h->Qdepth++;
2062         start_io(h);
2063         spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
2064
2065         wait_for_completion(&wait);
2066
2067         if (c->err_info->CommandStatus != 0) {  /* an error has occurred */
2068                 switch (c->err_info->CommandStatus) {
2069                 case CMD_TARGET_STATUS:
2070                         printk(KERN_WARNING "cciss: cmd %p has "
2071                                " completed with errors\n", c);
2072                         if (c->err_info->ScsiStatus) {
2073                                 printk(KERN_WARNING "cciss: cmd %p "
2074                                        "has SCSI Status = %x\n",
2075                                        c, c->err_info->ScsiStatus);
2076                         }
2077
2078                         break;
2079                 case CMD_DATA_UNDERRUN:
2080                 case CMD_DATA_OVERRUN:
2081                         /* expected for inquire and report lun commands */
2082                         break;
2083                 case CMD_INVALID:
2084                         printk(KERN_WARNING "cciss: Cmd %p is "
2085                                "reported invalid\n", c);
2086                         return_status = IO_ERROR;
2087                         break;
2088                 case CMD_PROTOCOL_ERR:
2089                         printk(KERN_WARNING "cciss: cmd %p has "
2090                                "protocol error \n", c);
2091                         return_status = IO_ERROR;
2092                         break;
2093                 case CMD_HARDWARE_ERR:
2094                         printk(KERN_WARNING "cciss: cmd %p had "
2095                                " hardware error\n", c);
2096                         return_status = IO_ERROR;
2097                         break;
2098                 case CMD_CONNECTION_LOST:
2099                         printk(KERN_WARNING "cciss: cmd %p had "
2100                                "connection lost\n", c);
2101                         return_status = IO_ERROR;
2102                         break;
2103                 case CMD_ABORTED:
2104                         printk(KERN_WARNING "cciss: cmd %p was "
2105                                "aborted\n", c);
2106                         return_status = IO_ERROR;
2107                         break;
2108                 case CMD_ABORT_FAILED:
2109                         printk(KERN_WARNING "cciss: cmd %p reports "
2110                                "abort failed\n", c);
2111                         return_status = IO_ERROR;
2112                         break;
2113                 case CMD_UNSOLICITED_ABORT:
2114                         printk(KERN_WARNING
2115                                "cciss%d: unsolicited abort %p\n", ctlr, c);
2116                         if (c->retry_count < MAX_CMD_RETRIES) {
2117                                 printk(KERN_WARNING
2118                                        "cciss%d: retrying %p\n", ctlr, c);
2119                                 c->retry_count++;
2120                                 /* erase the old error information */
2121                                 memset(c->err_info, 0,
2122                                        sizeof(ErrorInfo_struct));
2123                                 return_status = IO_OK;
2124                                 INIT_COMPLETION(wait);
2125                                 goto resend_cmd2;
2126                         }
2127                         return_status = IO_ERROR;
2128                         break;
2129                 default:
2130                         printk(KERN_WARNING "cciss: cmd %p returned "
2131                                "unknown status %x\n", c,
2132                                c->err_info->CommandStatus);
2133                         return_status = IO_ERROR;
2134                 }
2135         }
2136         /* unlock the buffers from DMA */
2137         buff_dma_handle.val32.lower = c->SG[0].Addr.lower;
2138         buff_dma_handle.val32.upper = c->SG[0].Addr.upper;
2139         pci_unmap_single(h->pdev, (dma_addr_t) buff_dma_handle.val,
2140                          c->SG[0].Len, PCI_DMA_BIDIRECTIONAL);
2141         cmd_free(h, c, 0);
2142         return return_status;
2143 }
2144
2145 static void cciss_geometry_inquiry(int ctlr, int logvol,
2146                                    int withirq, sector_t total_size,
2147                                    unsigned int block_size,
2148                                    InquiryData_struct *inq_buff,
2149                                    drive_info_struct *drv)
2150 {
2151         int return_code;
2152         unsigned long t;
2153
2154         memset(inq_buff, 0, sizeof(InquiryData_struct));
2155         if (withirq)
2156                 return_code = sendcmd_withirq(CISS_INQUIRY, ctlr,
2157                                               inq_buff, sizeof(*inq_buff), 1,
2158                                               logvol, 0xC1, TYPE_CMD);
2159         else
2160                 return_code = sendcmd(CISS_INQUIRY, ctlr, inq_buff,
2161                                       sizeof(*inq_buff), 1, logvol, 0xC1, NULL,
2162                                       TYPE_CMD);
2163         if (return_code == IO_OK) {
2164                 if (inq_buff->data_byte[8] == 0xFF) {
2165                         printk(KERN_WARNING
2166                                "cciss: reading geometry failed, volume "
2167                                "does not support reading geometry\n");
2168                         drv->heads = 255;
2169                         drv->sectors = 32;      // Sectors per track
2170                         drv->cylinders = total_size + 1;
2171                         drv->raid_level = RAID_UNKNOWN;
2172                 } else {
2173                         drv->heads = inq_buff->data_byte[6];
2174                         drv->sectors = inq_buff->data_byte[7];
2175                         drv->cylinders = (inq_buff->data_byte[4] & 0xff) << 8;
2176                         drv->cylinders += inq_buff->data_byte[5];
2177                         drv->raid_level = inq_buff->data_byte[8];
2178                 }
2179                 drv->block_size = block_size;
2180                 drv->nr_blocks = total_size + 1;
2181                 t = drv->heads * drv->sectors;
2182                 if (t > 1) {
2183                         sector_t real_size = total_size + 1;
2184                         unsigned long rem = sector_div(real_size, t);
2185                         if (rem)
2186                                 real_size++;
2187                         drv->cylinders = real_size;
2188                 }
2189         } else {                /* Get geometry failed */
2190                 printk(KERN_WARNING "cciss: reading geometry failed\n");
2191         }
2192         printk(KERN_INFO "      heads=%d, sectors=%d, cylinders=%d\n\n",
2193                drv->heads, drv->sectors, drv->cylinders);
2194 }
2195
2196 static void
2197 cciss_read_capacity(int ctlr, int logvol, int withirq, sector_t *total_size,
2198                     unsigned int *block_size)
2199 {
2200         ReadCapdata_struct *buf;
2201         int return_code;
2202
2203         buf = kzalloc(sizeof(ReadCapdata_struct), GFP_KERNEL);
2204         if (!buf) {
2205                 printk(KERN_WARNING "cciss: out of memory\n");
2206                 return;
2207         }
2208
2209         if (withirq)
2210                 return_code = sendcmd_withirq(CCISS_READ_CAPACITY,
2211                                 ctlr, buf, sizeof(ReadCapdata_struct),
2212                                         1, logvol, 0, TYPE_CMD);
2213         else
2214                 return_code = sendcmd(CCISS_READ_CAPACITY,
2215                                 ctlr, buf, sizeof(ReadCapdata_struct),
2216                                         1, logvol, 0, NULL, TYPE_CMD);
2217         if (return_code == IO_OK) {
2218                 *total_size = be32_to_cpu(*(__be32 *) buf->total_size);
2219                 *block_size = be32_to_cpu(*(__be32 *) buf->block_size);
2220         } else {                /* read capacity command failed */
2221                 printk(KERN_WARNING "cciss: read capacity failed\n");
2222                 *total_size = 0;
2223                 *block_size = BLOCK_SIZE;
2224         }
2225         if (*total_size != 0)
2226                 printk(KERN_INFO "      blocks= %llu block_size= %d\n",
2227                 (unsigned long long)*total_size+1, *block_size);
2228         kfree(buf);
2229 }
2230
2231 static void
2232 cciss_read_capacity_16(int ctlr, int logvol, int withirq, sector_t *total_size,                                 unsigned int *block_size)
2233 {
2234         ReadCapdata_struct_16 *buf;
2235         int return_code;
2236
2237         buf = kzalloc(sizeof(ReadCapdata_struct_16), GFP_KERNEL);
2238         if (!buf) {
2239                 printk(KERN_WARNING "cciss: out of memory\n");
2240                 return;
2241         }
2242
2243         if (withirq) {
2244                 return_code = sendcmd_withirq(CCISS_READ_CAPACITY_16,
2245                         ctlr, buf, sizeof(ReadCapdata_struct_16),
2246                                 1, logvol, 0, TYPE_CMD);
2247         }
2248         else {
2249                 return_code = sendcmd(CCISS_READ_CAPACITY_16,
2250                         ctlr, buf, sizeof(ReadCapdata_struct_16),
2251                                 1, logvol, 0, NULL, TYPE_CMD);
2252         }
2253         if (return_code == IO_OK) {
2254                 *total_size = be64_to_cpu(*(__be64 *) buf->total_size);
2255                 *block_size = be32_to_cpu(*(__be32 *) buf->block_size);
2256         } else {                /* read capacity command failed */
2257                 printk(KERN_WARNING "cciss: read capacity failed\n");
2258                 *total_size = 0;
2259                 *block_size = BLOCK_SIZE;
2260         }
2261         printk(KERN_INFO "      blocks= %llu block_size= %d\n",
2262                (unsigned long long)*total_size+1, *block_size);
2263         kfree(buf);
2264 }
2265
2266 static int cciss_revalidate(struct gendisk *disk)
2267 {
2268         ctlr_info_t *h = get_host(disk);
2269         drive_info_struct *drv = get_drv(disk);
2270         int logvol;
2271         int FOUND = 0;
2272         unsigned int block_size;
2273         sector_t total_size;
2274         InquiryData_struct *inq_buff = NULL;
2275
2276         for (logvol = 0; logvol < CISS_MAX_LUN; logvol++) {
2277                 if (h->drv[logvol].LunID == drv->LunID) {
2278                         FOUND = 1;
2279                         break;
2280                 }
2281         }
2282
2283         if (!FOUND)
2284                 return 1;
2285
2286         inq_buff = kmalloc(sizeof(InquiryData_struct), GFP_KERNEL);
2287         if (inq_buff == NULL) {
2288                 printk(KERN_WARNING "cciss: out of memory\n");
2289                 return 1;
2290         }
2291         if (h->cciss_read == CCISS_READ_10) {
2292                 cciss_read_capacity(h->ctlr, logvol, 1,
2293                                         &total_size, &block_size);
2294         } else {
2295                 cciss_read_capacity_16(h->ctlr, logvol, 1,
2296                                         &total_size, &block_size);
2297         }
2298         cciss_geometry_inquiry(h->ctlr, logvol, 1, total_size, block_size,
2299                                inq_buff, drv);
2300
2301         blk_queue_hardsect_size(drv->queue, drv->block_size);
2302         set_capacity(disk, drv->nr_blocks);
2303
2304         kfree(inq_buff);
2305         return 0;
2306 }
2307
2308 /*
2309  *   Wait polling for a command to complete.
2310  *   The memory mapped FIFO is polled for the completion.
2311  *   Used only at init time, interrupts from the HBA are disabled.
2312  */
2313 static unsigned long pollcomplete(int ctlr)
2314 {
2315         unsigned long done;
2316         int i;
2317
2318         /* Wait (up to 20 seconds) for a command to complete */
2319
2320         for (i = 20 * HZ; i > 0; i--) {
2321                 done = hba[ctlr]->access.command_completed(hba[ctlr]);
2322                 if (done == FIFO_EMPTY)
2323                         schedule_timeout_uninterruptible(1);
2324                 else
2325                         return done;
2326         }
2327         /* Invalid address to tell caller we ran out of time */
2328         return 1;
2329 }
2330
2331 static int add_sendcmd_reject(__u8 cmd, int ctlr, unsigned long complete)
2332 {
2333         /* We get in here if sendcmd() is polling for completions
2334            and gets some command back that it wasn't expecting --
2335            something other than that which it just sent down.
2336            Ordinarily, that shouldn't happen, but it can happen when
2337            the scsi tape stuff gets into error handling mode, and
2338            starts using sendcmd() to try to abort commands and
2339            reset tape drives.  In that case, sendcmd may pick up
2340            completions of commands that were sent to logical drives
2341            through the block i/o system, or cciss ioctls completing, etc.
2342            In that case, we need to save those completions for later
2343            processing by the interrupt handler.
2344          */
2345
2346 #ifdef CONFIG_CISS_SCSI_TAPE
2347         struct sendcmd_reject_list *srl = &hba[ctlr]->scsi_rejects;
2348
2349         /* If it's not the scsi tape stuff doing error handling, (abort */
2350         /* or reset) then we don't expect anything weird. */
2351         if (cmd != CCISS_RESET_MSG && cmd != CCISS_ABORT_MSG) {
2352 #endif
2353                 printk(KERN_WARNING "cciss cciss%d: SendCmd "
2354                        "Invalid command list address returned! (%lx)\n",
2355                        ctlr, complete);
2356                 /* not much we can do. */
2357 #ifdef CONFIG_CISS_SCSI_TAPE
2358                 return 1;
2359         }
2360
2361         /* We've sent down an abort or reset, but something else
2362            has completed */
2363         if (srl->ncompletions >= (hba[ctlr]->nr_cmds + 2)) {
2364                 /* Uh oh.  No room to save it for later... */
2365                 printk(KERN_WARNING "cciss%d: Sendcmd: Invalid command addr, "
2366                        "reject list overflow, command lost!\n", ctlr);
2367                 return 1;
2368         }
2369         /* Save it for later */
2370         srl->complete[srl->ncompletions] = complete;
2371         srl->ncompletions++;
2372 #endif
2373         return 0;
2374 }
2375
2376 /*
2377  * Send a command to the controller, and wait for it to complete.
2378  * Only used at init time.
2379  */
2380 static int sendcmd(__u8 cmd, int ctlr, void *buff, size_t size, unsigned int use_unit_num,      /* 0: address the controller,
2381                                                                                                    1: address logical volume log_unit,
2382                                                                                                    2: periph device address is scsi3addr */
2383                    unsigned int log_unit,
2384                    __u8 page_code, unsigned char *scsi3addr, int cmd_type)
2385 {
2386         CommandList_struct *c;
2387         int i;
2388         unsigned long complete;
2389         ctlr_info_t *info_p = hba[ctlr];
2390         u64bit buff_dma_handle;
2391         int status, done = 0;
2392
2393         if ((c = cmd_alloc(info_p, 1)) == NULL) {
2394                 printk(KERN_WARNING "cciss: unable to get memory");
2395                 return IO_ERROR;
2396         }
2397         status = fill_cmd(c, cmd, ctlr, buff, size, use_unit_num,
2398                           log_unit, page_code, scsi3addr, cmd_type);
2399         if (status != IO_OK) {
2400                 cmd_free(info_p, c, 1);
2401                 return status;
2402         }
2403       resend_cmd1:
2404         /*
2405          * Disable interrupt
2406          */
2407 #ifdef CCISS_DEBUG
2408         printk(KERN_DEBUG "cciss: turning intr off\n");
2409 #endif                          /* CCISS_DEBUG */
2410         info_p->access.set_intr_mask(info_p, CCISS_INTR_OFF);
2411
2412         /* Make sure there is room in the command FIFO */
2413         /* Actually it should be completely empty at this time */
2414         /* unless we are in here doing error handling for the scsi */
2415         /* tape side of the driver. */
2416         for (i = 200000; i > 0; i--) {
2417                 /* if fifo isn't full go */
2418                 if (!(info_p->access.fifo_full(info_p))) {
2419
2420                         break;
2421                 }
2422                 udelay(10);
2423                 printk(KERN_WARNING "cciss cciss%d: SendCmd FIFO full,"
2424                        " waiting!\n", ctlr);
2425         }
2426         /*
2427          * Send the cmd
2428          */
2429         info_p->access.submit_command(info_p, c);
2430         done = 0;
2431         do {
2432                 complete = pollcomplete(ctlr);
2433
2434 #ifdef CCISS_DEBUG
2435                 printk(KERN_DEBUG "cciss: command completed\n");
2436 #endif                          /* CCISS_DEBUG */
2437
2438                 if (complete == 1) {
2439                         printk(KERN_WARNING
2440                                "cciss cciss%d: SendCmd Timeout out, "
2441                                "No command list address returned!\n", ctlr);
2442                         status = IO_ERROR;
2443                         done = 1;
2444                         break;
2445                 }
2446
2447                 /* This will need to change for direct lookup completions */
2448                 if ((complete & CISS_ERROR_BIT)
2449                     && (complete & ~CISS_ERROR_BIT) == c->busaddr) {
2450                         /* if data overrun or underun on Report command
2451                            ignore it
2452                          */
2453                         if (((c->Request.CDB[0] == CISS_REPORT_LOG) ||
2454                              (c->Request.CDB[0] == CISS_REPORT_PHYS) ||
2455                              (c->Request.CDB[0] == CISS_INQUIRY)) &&
2456                             ((c->err_info->CommandStatus ==
2457                               CMD_DATA_OVERRUN) ||
2458                              (c->err_info->CommandStatus == CMD_DATA_UNDERRUN)
2459                             )) {
2460                                 complete = c->busaddr;
2461                         } else {
2462                                 if (c->err_info->CommandStatus ==
2463                                     CMD_UNSOLICITED_ABORT) {
2464                                         printk(KERN_WARNING "cciss%d: "
2465                                                "unsolicited abort %p\n",
2466                                                ctlr, c);
2467                                         if (c->retry_count < MAX_CMD_RETRIES) {
2468                                                 printk(KERN_WARNING
2469                                                        "cciss%d: retrying %p\n",
2470                                                        ctlr, c);
2471                                                 c->retry_count++;
2472                                                 /* erase the old error */
2473                                                 /* information */
2474                                                 memset(c->err_info, 0,
2475                                                        sizeof
2476                                                        (ErrorInfo_struct));
2477                                                 goto resend_cmd1;
2478                                         } else {
2479                                                 printk(KERN_WARNING
2480                                                        "cciss%d: retried %p too "
2481                                                        "many times\n", ctlr, c);
2482                                                 status = IO_ERROR;
2483                                                 goto cleanup1;
2484                                         }
2485                                 } else if (c->err_info->CommandStatus ==
2486                                            CMD_UNABORTABLE) {
2487                                         printk(KERN_WARNING
2488                                                "cciss%d: command could not be aborted.\n",
2489                                                ctlr);
2490                                         status = IO_ERROR;
2491                                         goto cleanup1;
2492                                 }
2493                                 printk(KERN_WARNING "ciss ciss%d: sendcmd"
2494                                        " Error %x \n", ctlr,
2495                                        c->err_info->CommandStatus);
2496                                 printk(KERN_WARNING "ciss ciss%d: sendcmd"
2497                                        " offensive info\n"
2498                                        "  size %x\n   num %x   value %x\n",
2499                                        ctlr,
2500                                        c->err_info->MoreErrInfo.Invalid_Cmd.
2501                                        offense_size,
2502                                        c->err_info->MoreErrInfo.Invalid_Cmd.
2503                                        offense_num,
2504                                        c->err_info->MoreErrInfo.Invalid_Cmd.
2505                                        offense_value);
2506                                 status = IO_ERROR;
2507                                 goto cleanup1;
2508                         }
2509                 }
2510                 /* This will need changing for direct lookup completions */
2511                 if (complete != c->busaddr) {
2512                         if (add_sendcmd_reject(cmd, ctlr, complete) != 0) {
2513                                 BUG();  /* we are pretty much hosed if we get here. */
2514                         }
2515                         continue;
2516                 } else
2517                         done = 1;
2518         } while (!done);
2519
2520       cleanup1:
2521         /* unlock the data buffer from DMA */
2522         buff_dma_handle.val32.lower = c->SG[0].Addr.lower;
2523         buff_dma_handle.val32.upper = c->SG[0].Addr.upper;
2524         pci_unmap_single(info_p->pdev, (dma_addr_t) buff_dma_handle.val,
2525                          c->SG[0].Len, PCI_DMA_BIDIRECTIONAL);
2526 #ifdef CONFIG_CISS_SCSI_TAPE
2527         /* if we saved some commands for later, process them now. */
2528         if (info_p->scsi_rejects.ncompletions > 0)
2529                 do_cciss_intr(0, info_p);
2530 #endif
2531         cmd_free(info_p, c, 1);
2532         return status;
2533 }
2534
2535 /*
2536  * Map (physical) PCI mem into (virtual) kernel space
2537  */
2538 static void __iomem *remap_pci_mem(ulong base, ulong size)
2539 {
2540         ulong page_base = ((ulong) base) & PAGE_MASK;
2541         ulong page_offs = ((ulong) base) - page_base;
2542         void __iomem *page_remapped = ioremap(page_base, page_offs + size);
2543
2544         return page_remapped ? (page_remapped + page_offs) : NULL;
2545 }
2546
2547 /*
2548  * Takes jobs of the Q and sends them to the hardware, then puts it on
2549  * the Q to wait for completion.
2550  */
2551 static void start_io(ctlr_info_t *h)
2552 {
2553         CommandList_struct *c;
2554
2555         while (!hlist_empty(&h->reqQ)) {
2556                 c = hlist_entry(h->reqQ.first, CommandList_struct, list);
2557                 /* can't do anything if fifo is full */
2558                 if ((h->access.fifo_full(h))) {
2559                         printk(KERN_WARNING "cciss: fifo full\n");
2560                         break;
2561                 }
2562
2563                 /* Get the first entry from the Request Q */
2564                 removeQ(c);
2565                 h->Qdepth--;
2566
2567                 /* Tell the controller execute command */
2568                 h->access.submit_command(h, c);
2569
2570                 /* Put job onto the completed Q */
2571                 addQ(&h->cmpQ, c);
2572         }
2573 }
2574
2575 /* Assumes that CCISS_LOCK(h->ctlr) is held. */
2576 /* Zeros out the error record and then resends the command back */
2577 /* to the controller */
2578 static inline void resend_cciss_cmd(ctlr_info_t *h, CommandList_struct *c)
2579 {
2580         /* erase the old error information */
2581         memset(c->err_info, 0, sizeof(ErrorInfo_struct));
2582
2583         /* add it to software queue and then send it to the controller */
2584         addQ(&h->reqQ, c);
2585         h->Qdepth++;
2586         if (h->Qdepth > h->maxQsinceinit)
2587                 h->maxQsinceinit = h->Qdepth;
2588
2589         start_io(h);
2590 }
2591
2592 static inline unsigned int make_status_bytes(unsigned int scsi_status_byte,
2593         unsigned int msg_byte, unsigned int host_byte,
2594         unsigned int driver_byte)
2595 {
2596         /* inverse of macros in scsi.h */
2597         return (scsi_status_byte & 0xff) |
2598                 ((msg_byte & 0xff) << 8) |
2599                 ((host_byte & 0xff) << 16) |
2600                 ((driver_byte & 0xff) << 24);
2601 }
2602
2603 static inline int evaluate_target_status(ctlr_info_t *h,
2604                         CommandList_struct *cmd, int *retry_cmd)
2605 {
2606         unsigned char sense_key;
2607         unsigned char status_byte, msg_byte, host_byte, driver_byte;
2608         int error_value;
2609
2610         *retry_cmd = 0;
2611         /* If we get in here, it means we got "target status", that is, scsi status */
2612         status_byte = cmd->err_info->ScsiStatus;
2613         driver_byte = DRIVER_OK;
2614         msg_byte = cmd->err_info->CommandStatus; /* correct?  seems too device specific */
2615
2616         if (blk_pc_request(cmd->rq))
2617                 host_byte = DID_PASSTHROUGH;
2618         else
2619                 host_byte = DID_OK;
2620
2621         error_value = make_status_bytes(status_byte, msg_byte,
2622                 host_byte, driver_byte);
2623
2624         if (cmd->err_info->ScsiStatus != SAM_STAT_CHECK_CONDITION) {
2625                 if (!blk_pc_request(cmd->rq))
2626                         printk(KERN_WARNING "cciss: cmd %p "
2627                                "has SCSI Status 0x%x\n",
2628                                cmd, cmd->err_info->ScsiStatus);
2629                 return error_value;
2630         }
2631
2632         /* check the sense key */
2633         sense_key = 0xf & cmd->err_info->SenseInfo[2];
2634         /* no status or recovered error */
2635         if (((sense_key == 0x0) || (sense_key == 0x1)) && !blk_pc_request(cmd->rq))
2636                 error_value = 0;
2637
2638         if (check_for_unit_attention(h, cmd)) {
2639                 *retry_cmd = !blk_pc_request(cmd->rq);
2640                 return 0;
2641         }
2642
2643         if (!blk_pc_request(cmd->rq)) { /* Not SG_IO or similar? */
2644                 if (error_value != 0)
2645                         printk(KERN_WARNING "cciss: cmd %p has CHECK CONDITION"
2646                                " sense key = 0x%x\n", cmd, sense_key);
2647                 return error_value;
2648         }
2649
2650         /* SG_IO or similar, copy sense data back */
2651         if (cmd->rq->sense) {
2652                 if (cmd->rq->sense_len > cmd->err_info->SenseLen)
2653                         cmd->rq->sense_len = cmd->err_info->SenseLen;
2654                 memcpy(cmd->rq->sense, cmd->err_info->SenseInfo,
2655                         cmd->rq->sense_len);
2656         } else
2657                 cmd->rq->sense_len = 0;
2658
2659         return error_value;
2660 }
2661
2662 /* checks the status of the job and calls complete buffers to mark all
2663  * buffers for the completed job. Note that this function does not need
2664  * to hold the hba/queue lock.
2665  */
2666 static inline void complete_command(ctlr_info_t *h, CommandList_struct *cmd,
2667                                     int timeout)
2668 {
2669         int retry_cmd = 0;
2670         struct request *rq = cmd->rq;
2671
2672         rq->errors = 0;
2673
2674         if (timeout)
2675                 rq->errors = make_status_bytes(0, 0, 0, DRIVER_TIMEOUT);
2676
2677         if (cmd->err_info->CommandStatus == 0)  /* no error has occurred */
2678                 goto after_error_processing;
2679
2680         switch (cmd->err_info->CommandStatus) {
2681         case CMD_TARGET_STATUS:
2682                 rq->errors = evaluate_target_status(h, cmd, &retry_cmd);
2683                 break;
2684         case CMD_DATA_UNDERRUN:
2685                 if (blk_fs_request(cmd->rq)) {
2686                         printk(KERN_WARNING "cciss: cmd %p has"
2687                                " completed with data underrun "
2688                                "reported\n", cmd);
2689                         cmd->rq->resid_len = cmd->err_info->ResidualCnt;
2690                 }
2691                 break;
2692         case CMD_DATA_OVERRUN:
2693                 if (blk_fs_request(cmd->rq))
2694                         printk(KERN_WARNING "cciss: cmd %p has"
2695                                " completed with data overrun "
2696                                "reported\n", cmd);
2697                 break;
2698         case CMD_INVALID:
2699                 printk(KERN_WARNING "cciss: cmd %p is "
2700                        "reported invalid\n", cmd);
2701                 rq->errors = make_status_bytes(SAM_STAT_GOOD,
2702                         cmd->err_info->CommandStatus, DRIVER_OK,
2703                         blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
2704                 break;
2705         case CMD_PROTOCOL_ERR:
2706                 printk(KERN_WARNING "cciss: cmd %p has "
2707                        "protocol error \n", cmd);
2708                 rq->errors = make_status_bytes(SAM_STAT_GOOD,
2709                         cmd->err_info->CommandStatus, DRIVER_OK,
2710                         blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
2711                 break;
2712         case CMD_HARDWARE_ERR:
2713                 printk(KERN_WARNING "cciss: cmd %p had "
2714                        " hardware error\n", cmd);
2715                 rq->errors = make_status_bytes(SAM_STAT_GOOD,
2716                         cmd->err_info->CommandStatus, DRIVER_OK,
2717                         blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
2718                 break;
2719         case CMD_CONNECTION_LOST:
2720                 printk(KERN_WARNING "cciss: cmd %p had "
2721                        "connection lost\n", cmd);
2722                 rq->errors = make_status_bytes(SAM_STAT_GOOD,
2723                         cmd->err_info->CommandStatus, DRIVER_OK,
2724                         blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
2725                 break;
2726         case CMD_ABORTED:
2727                 printk(KERN_WARNING "cciss: cmd %p was "
2728                        "aborted\n", cmd);
2729                 rq->errors = make_status_bytes(SAM_STAT_GOOD,
2730                         cmd->err_info->CommandStatus, DRIVER_OK,
2731                         blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ABORT);
2732                 break;
2733         case CMD_ABORT_FAILED:
2734                 printk(KERN_WARNING "cciss: cmd %p reports "
2735                        "abort failed\n", cmd);
2736                 rq->errors = make_status_bytes(SAM_STAT_GOOD,
2737                         cmd->err_info->CommandStatus, DRIVER_OK,
2738                         blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
2739                 break;
2740         case CMD_UNSOLICITED_ABORT:
2741                 printk(KERN_WARNING "cciss%d: unsolicited "
2742                        "abort %p\n", h->ctlr, cmd);
2743                 if (cmd->retry_count < MAX_CMD_RETRIES) {
2744                         retry_cmd = 1;
2745                         printk(KERN_WARNING
2746                                "cciss%d: retrying %p\n", h->ctlr, cmd);
2747                         cmd->retry_count++;
2748                 } else
2749                         printk(KERN_WARNING
2750                                "cciss%d: %p retried too "
2751                                "many times\n", h->ctlr, cmd);
2752                 rq->errors = make_status_bytes(SAM_STAT_GOOD,
2753                         cmd->err_info->CommandStatus, DRIVER_OK,
2754                         blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ABORT);
2755                 break;
2756         case CMD_TIMEOUT:
2757                 printk(KERN_WARNING "cciss: cmd %p timedout\n", cmd);
2758                 rq->errors = make_status_bytes(SAM_STAT_GOOD,
2759                         cmd->err_info->CommandStatus, DRIVER_OK,
2760                         blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
2761                 break;
2762         default:
2763                 printk(KERN_WARNING "cciss: cmd %p returned "
2764                        "unknown status %x\n", cmd,
2765                        cmd->err_info->CommandStatus);
2766                 rq->errors = make_status_bytes(SAM_STAT_GOOD,
2767                         cmd->err_info->CommandStatus, DRIVER_OK,
2768                         blk_pc_request(cmd->rq) ? DID_PASSTHROUGH : DID_ERROR);
2769         }
2770
2771 after_error_processing:
2772
2773         /* We need to return this command */
2774         if (retry_cmd) {
2775                 resend_cciss_cmd(h, cmd);
2776                 return;
2777         }
2778         cmd->rq->completion_data = cmd;
2779         blk_complete_request(cmd->rq);
2780 }
2781
2782 /*
2783  * Get a request and submit it to the controller.
2784  */
2785 static void do_cciss_request(struct request_queue *q)
2786 {
2787         ctlr_info_t *h = q->queuedata;
2788         CommandList_struct *c;
2789         sector_t start_blk;
2790         int seg;
2791         struct request *creq;
2792         u64bit temp64;
2793         struct scatterlist tmp_sg[MAXSGENTRIES];
2794         drive_info_struct *drv;
2795         int i, dir;
2796
2797         /* We call start_io here in case there is a command waiting on the
2798          * queue that has not been sent.
2799          */
2800         if (blk_queue_plugged(q))
2801                 goto startio;
2802
2803       queue:
2804         creq = blk_peek_request(q);
2805         if (!creq)
2806                 goto startio;
2807
2808         BUG_ON(creq->nr_phys_segments > MAXSGENTRIES);
2809
2810         if ((c = cmd_alloc(h, 1)) == NULL)
2811                 goto full;
2812
2813         blk_start_request(creq);
2814
2815         spin_unlock_irq(q->queue_lock);
2816
2817         c->cmd_type = CMD_RWREQ;
2818         c->rq = creq;
2819
2820         /* fill in the request */
2821         drv = creq->rq_disk->private_data;
2822         c->Header.ReplyQueue = 0;       // unused in simple mode
2823         /* got command from pool, so use the command block index instead */
2824         /* for direct lookups. */
2825         /* The first 2 bits are reserved for controller error reporting. */
2826         c->Header.Tag.lower = (c->cmdindex << 3);
2827         c->Header.Tag.lower |= 0x04;    /* flag for direct lookup. */
2828         c->Header.LUN.LogDev.VolId = drv->LunID;
2829         c->Header.LUN.LogDev.Mode = 1;
2830         c->Request.CDBLen = 10; // 12 byte commands not in FW yet;
2831         c->Request.Type.Type = TYPE_CMD;        // It is a command.
2832         c->Request.Type.Attribute = ATTR_SIMPLE;
2833         c->Request.Type.Direction =
2834             (rq_data_dir(creq) == READ) ? XFER_READ : XFER_WRITE;
2835         c->Request.Timeout = 0; // Don't time out
2836         c->Request.CDB[0] =
2837             (rq_data_dir(creq) == READ) ? h->cciss_read : h->cciss_write;
2838         start_blk = blk_rq_pos(creq);
2839 #ifdef CCISS_DEBUG
2840         printk(KERN_DEBUG "ciss: sector =%d nr_sectors=%d\n",
2841                (int)blk_rq_pos(creq), (int)blk_rq_sectors(creq));
2842 #endif                          /* CCISS_DEBUG */
2843
2844         sg_init_table(tmp_sg, MAXSGENTRIES);
2845         seg = blk_rq_map_sg(q, creq, tmp_sg);
2846
2847         /* get the DMA records for the setup */
2848         if (c->Request.Type.Direction == XFER_READ)
2849                 dir = PCI_DMA_FROMDEVICE;
2850         else
2851                 dir = PCI_DMA_TODEVICE;
2852
2853         for (i = 0; i < seg; i++) {
2854                 c->SG[i].Len = tmp_sg[i].length;
2855                 temp64.val = (__u64) pci_map_page(h->pdev, sg_page(&tmp_sg[i]),
2856                                                   tmp_sg[i].offset,
2857                                                   tmp_sg[i].length, dir);
2858                 c->SG[i].Addr.lower = temp64.val32.lower;
2859                 c->SG[i].Addr.upper = temp64.val32.upper;
2860                 c->SG[i].Ext = 0;       // we are not chaining
2861         }
2862         /* track how many SG entries we are using */
2863         if (seg > h->maxSG)
2864                 h->maxSG = seg;
2865
2866 #ifdef CCISS_DEBUG
2867         printk(KERN_DEBUG "cciss: Submitting %u sectors in %d segments\n",
2868                blk_rq_sectors(creq), seg);
2869 #endif                          /* CCISS_DEBUG */
2870
2871         c->Header.SGList = c->Header.SGTotal = seg;
2872         if (likely(blk_fs_request(creq))) {
2873                 if(h->cciss_read == CCISS_READ_10) {
2874                         c->Request.CDB[1] = 0;
2875                         c->Request.CDB[2] = (start_blk >> 24) & 0xff;   //MSB
2876                         c->Request.CDB[3] = (start_blk >> 16) & 0xff;
2877                         c->Request.CDB[4] = (start_blk >> 8) & 0xff;
2878                         c->Request.CDB[5] = start_blk & 0xff;
2879                         c->Request.CDB[6] = 0;  // (sect >> 24) & 0xff; MSB
2880                         c->Request.CDB[7] = (blk_rq_sectors(creq) >> 8) & 0xff;
2881                         c->Request.CDB[8] = blk_rq_sectors(creq) & 0xff;
2882                         c->Request.CDB[9] = c->Request.CDB[11] = c->Request.CDB[12] = 0;
2883                 } else {
2884                         u32 upper32 = upper_32_bits(start_blk);
2885
2886                         c->Request.CDBLen = 16;
2887                         c->Request.CDB[1]= 0;
2888                         c->Request.CDB[2]= (upper32 >> 24) & 0xff;      //MSB
2889                         c->Request.CDB[3]= (upper32 >> 16) & 0xff;
2890                         c->Request.CDB[4]= (upper32 >>  8) & 0xff;
2891                         c->Request.CDB[5]= upper32 & 0xff;
2892                         c->Request.CDB[6]= (start_blk >> 24) & 0xff;
2893                         c->Request.CDB[7]= (start_blk >> 16) & 0xff;
2894                         c->Request.CDB[8]= (start_blk >>  8) & 0xff;
2895                         c->Request.CDB[9]= start_blk & 0xff;
2896                         c->Request.CDB[10]= (blk_rq_sectors(creq) >> 24) & 0xff;
2897                         c->Request.CDB[11]= (blk_rq_sectors(creq) >> 16) & 0xff;
2898                         c->Request.CDB[12]= (blk_rq_sectors(creq) >>  8) & 0xff;
2899                         c->Request.CDB[13]= blk_rq_sectors(creq) & 0xff;
2900                         c->Request.CDB[14] = c->Request.CDB[15] = 0;
2901                 }
2902         } else if (blk_pc_request(creq)) {
2903                 c->Request.CDBLen = creq->cmd_len;
2904                 memcpy(c->Request.CDB, creq->cmd, BLK_MAX_CDB);
2905         } else {
2906                 printk(KERN_WARNING "cciss%d: bad request type %d\n", h->ctlr, creq->cmd_type);
2907                 BUG();
2908         }
2909
2910         spin_lock_irq(q->queue_lock);
2911
2912         addQ(&h->reqQ, c);
2913         h->Qdepth++;
2914         if (h->Qdepth > h->maxQsinceinit)
2915                 h->maxQsinceinit = h->Qdepth;
2916
2917         goto queue;
2918 full:
2919         blk_stop_queue(q);
2920 startio:
2921         /* We will already have the driver lock here so not need
2922          * to lock it.
2923          */
2924         start_io(h);
2925 }
2926
2927 static inline unsigned long get_next_completion(ctlr_info_t *h)
2928 {
2929 #ifdef CONFIG_CISS_SCSI_TAPE
2930         /* Any rejects from sendcmd() lying around? Process them first */
2931         if (h->scsi_rejects.ncompletions == 0)
2932                 return h->access.command_completed(h);
2933         else {
2934                 struct sendcmd_reject_list *srl;
2935                 int n;
2936                 srl = &h->scsi_rejects;
2937                 n = --srl->ncompletions;
2938                 /* printk("cciss%d: processing saved reject\n", h->ctlr); */
2939                 printk("p");
2940                 return srl->complete[n];
2941         }
2942 #else
2943         return h->access.command_completed(h);
2944 #endif
2945 }
2946
2947 static inline int interrupt_pending(ctlr_info_t *h)
2948 {
2949 #ifdef CONFIG_CISS_SCSI_TAPE
2950         return (h->access.intr_pending(h)
2951                 || (h->scsi_rejects.ncompletions > 0));
2952 #else
2953         return h->access.intr_pending(h);
2954 #endif
2955 }
2956
2957 static inline long interrupt_not_for_us(ctlr_info_t *h)
2958 {
2959 #ifdef CONFIG_CISS_SCSI_TAPE
2960         return (((h->access.intr_pending(h) == 0) ||
2961                  (h->interrupts_enabled == 0))
2962                 && (h->scsi_rejects.ncompletions == 0));
2963 #else
2964         return (((h->access.intr_pending(h) == 0) ||
2965                  (h->interrupts_enabled == 0)));
2966 #endif
2967 }
2968
2969 static irqreturn_t do_cciss_intr(int irq, void *dev_id)
2970 {
2971         ctlr_info_t *h = dev_id;
2972         CommandList_struct *c;
2973         unsigned long flags;
2974         __u32 a, a1, a2;
2975
2976         if (interrupt_not_for_us(h))
2977                 return IRQ_NONE;
2978         /*
2979          * If there are completed commands in the completion queue,
2980          * we had better do something about it.
2981          */
2982         spin_lock_irqsave(CCISS_LOCK(h->ctlr), flags);
2983         while (interrupt_pending(h)) {
2984                 while ((a = get_next_completion(h)) != FIFO_EMPTY) {
2985                         a1 = a;
2986                         if ((a & 0x04)) {
2987                                 a2 = (a >> 3);
2988                                 if (a2 >= h->nr_cmds) {
2989                                         printk(KERN_WARNING
2990                                                "cciss: controller cciss%d failed, stopping.\n",
2991                                                h->ctlr);
2992                                         fail_all_cmds(h->ctlr);
2993                                         return IRQ_HANDLED;
2994                                 }
2995
2996                                 c = h->cmd_pool + a2;
2997                                 a = c->busaddr;
2998
2999                         } else {
3000                                 struct hlist_node *tmp;
3001
3002                                 a &= ~3;
3003                                 c = NULL;
3004                                 hlist_for_each_entry(c, tmp, &h->cmpQ, list) {
3005                                         if (c->busaddr == a)
3006                                                 break;
3007                                 }
3008                         }
3009                         /*
3010                          * If we've found the command, take it off the
3011                          * completion Q and free it
3012                          */
3013                         if (c && c->busaddr == a) {
3014                                 removeQ(c);
3015                                 if (c->cmd_type == CMD_RWREQ) {
3016                                         complete_command(h, c, 0);
3017                                 } else if (c->cmd_type == CMD_IOCTL_PEND) {
3018                                         complete(c->waiting);
3019                                 }
3020 #                               ifdef CONFIG_CISS_SCSI_TAPE
3021                                 else if (c->cmd_type == CMD_SCSI)
3022                                         complete_scsi_command(c, 0, a1);
3023 #                               endif
3024                                 continue;
3025                         }
3026                 }
3027         }
3028
3029         spin_unlock_irqrestore(CCISS_LOCK(h->ctlr), flags);
3030         return IRQ_HANDLED;
3031 }
3032
3033 static int scan_thread(void *data)
3034 {
3035         ctlr_info_t *h = data;
3036         int rc;
3037         DECLARE_COMPLETION_ONSTACK(wait);
3038         h->rescan_wait = &wait;
3039
3040         for (;;) {
3041                 rc = wait_for_completion_interruptible(&wait);
3042                 if (kthread_should_stop())
3043                         break;
3044                 if (!rc)
3045                         rebuild_lun_table(h, 0);
3046         }
3047         return 0;
3048 }
3049
3050 static int check_for_unit_attention(ctlr_info_t *h, CommandList_struct *c)
3051 {
3052         if (c->err_info->SenseInfo[2] != UNIT_ATTENTION)
3053                 return 0;
3054
3055         switch (c->err_info->SenseInfo[12]) {
3056         case STATE_CHANGED:
3057                 printk(KERN_WARNING "cciss%d: a state change "
3058                         "detected, command retried\n", h->ctlr);
3059                 return 1;
3060         break;
3061         case LUN_FAILED:
3062                 printk(KERN_WARNING "cciss%d: LUN failure "
3063                         "detected, action required\n", h->ctlr);
3064                 return 1;
3065         break;
3066         case REPORT_LUNS_CHANGED:
3067                 printk(KERN_WARNING "cciss%d: report LUN data "
3068                         "changed\n", h->ctlr);
3069                 if (h->rescan_wait)
3070                         complete(h->rescan_wait);
3071                 return 1;
3072         break;
3073         case POWER_OR_RESET:
3074                 printk(KERN_WARNING "cciss%d: a power on "
3075                         "or device reset detected\n", h->ctlr);
3076                 return 1;
3077         break;
3078         case UNIT_ATTENTION_CLEARED:
3079                 printk(KERN_WARNING "cciss%d: unit attention "
3080                     "cleared by another initiator\n", h->ctlr);
3081                 return 1;
3082         break;
3083         default:
3084                 printk(KERN_WARNING "cciss%d: unknown "
3085                         "unit attention detected\n", h->ctlr);
3086                                 return 1;
3087         }
3088 }
3089
3090 /*
3091  *  We cannot read the structure directly, for portability we must use
3092  *   the io functions.
3093  *   This is for debug only.
3094  */
3095 #ifdef CCISS_DEBUG
3096 static void print_cfg_table(CfgTable_struct *tb)
3097 {
3098         int i;
3099         char temp_name[17];
3100
3101         printk("Controller Configuration information\n");
3102         printk("------------------------------------\n");
3103         for (i = 0; i < 4; i++)
3104                 temp_name[i] = readb(&(tb->Signature[i]));
3105         temp_name[4] = '\0';
3106         printk("   Signature = %s\n", temp_name);
3107         printk("   Spec Number = %d\n", readl(&(tb->SpecValence)));
3108         printk("   Transport methods supported = 0x%x\n",
3109                readl(&(tb->TransportSupport)));
3110         printk("   Transport methods active = 0x%x\n",
3111                readl(&(tb->TransportActive)));
3112         printk("   Requested transport Method = 0x%x\n",
3113                readl(&(tb->HostWrite.TransportRequest)));
3114         printk("   Coalesce Interrupt Delay = 0x%x\n",
3115                readl(&(tb->HostWrite.CoalIntDelay)));
3116         printk("   Coalesce Interrupt Count = 0x%x\n",
3117                readl(&(tb->HostWrite.CoalIntCount)));
3118         printk("   Max outstanding commands = 0x%d\n",
3119                readl(&(tb->CmdsOutMax)));
3120         printk("   Bus Types = 0x%x\n", readl(&(tb->BusTypes)));
3121         for (i = 0; i < 16; i++)
3122                 temp_name[i] = readb(&(tb->ServerName[i]));
3123         temp_name[16] = '\0';
3124         printk("   Server Name = %s\n", temp_name);
3125         printk("   Heartbeat Counter = 0x%x\n\n\n", readl(&(tb->HeartBeat)));
3126 }
3127 #endif                          /* CCISS_DEBUG */
3128
3129 static int find_PCI_BAR_index(struct pci_dev *pdev, unsigned long pci_bar_addr)
3130 {
3131         int i, offset, mem_type, bar_type;
3132         if (pci_bar_addr == PCI_BASE_ADDRESS_0) /* looking for BAR zero? */
3133                 return 0;
3134         offset = 0;
3135         for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
3136                 bar_type = pci_resource_flags(pdev, i) & PCI_BASE_ADDRESS_SPACE;
3137                 if (bar_type == PCI_BASE_ADDRESS_SPACE_IO)
3138                         offset += 4;
3139                 else {
3140                         mem_type = pci_resource_flags(pdev, i) &
3141                             PCI_BASE_ADDRESS_MEM_TYPE_MASK;
3142                         switch (mem_type) {
3143                         case PCI_BASE_ADDRESS_MEM_TYPE_32:
3144                         case PCI_BASE_ADDRESS_MEM_TYPE_1M:
3145                                 offset += 4;    /* 32 bit */
3146                                 break;
3147                         case PCI_BASE_ADDRESS_MEM_TYPE_64:
3148                                 offset += 8;
3149                                 break;
3150                         default:        /* reserved in PCI 2.2 */
3151                                 printk(KERN_WARNING
3152                                        "Base address is invalid\n");
3153                                 return -1;
3154                                 break;
3155                         }
3156                 }
3157                 if (offset == pci_bar_addr - PCI_BASE_ADDRESS_0)
3158                         return i + 1;
3159         }
3160         return -1;
3161 }
3162
3163 /* If MSI/MSI-X is supported by the kernel we will try to enable it on
3164  * controllers that are capable. If not, we use IO-APIC mode.
3165  */
3166
3167 static void __devinit cciss_interrupt_mode(ctlr_info_t *c,
3168                                            struct pci_dev *pdev, __u32 board_id)
3169 {
3170 #ifdef CONFIG_PCI_MSI
3171         int err;
3172         struct msix_entry cciss_msix_entries[4] = { {0, 0}, {0, 1},
3173         {0, 2}, {0, 3}
3174         };
3175
3176         /* Some boards advertise MSI but don't really support it */
3177         if ((board_id == 0x40700E11) ||
3178             (board_id == 0x40800E11) ||
3179             (board_id == 0x40820E11) || (board_id == 0x40830E11))
3180                 goto default_int_mode;
3181
3182         if (pci_find_capability(pdev, PCI_CAP_ID_MSIX)) {
3183                 err = pci_enable_msix(pdev, cciss_msix_entries, 4);
3184                 if (!err) {
3185                         c->intr[0] = cciss_msix_entries[0].vector;
3186                         c->intr[1] = cciss_msix_entries[1].vector;
3187                         c->intr[2] = cciss_msix_entries[2].vector;
3188                         c->intr[3] = cciss_msix_entries[3].vector;
3189                         c->msix_vector = 1;
3190                         return;
3191                 }
3192                 if (err > 0) {
3193                         printk(KERN_WARNING "cciss: only %d MSI-X vectors "
3194                                "available\n", err);
3195                         goto default_int_mode;
3196                 } else {
3197                         printk(KERN_WARNING "cciss: MSI-X init failed %d\n",
3198                                err);
3199                         goto default_int_mode;
3200                 }
3201         }
3202         if (pci_find_capability(pdev, PCI_CAP_ID_MSI)) {
3203                 if (!pci_enable_msi(pdev)) {
3204                         c->msi_vector = 1;
3205                 } else {
3206                         printk(KERN_WARNING "cciss: MSI init failed\n");
3207                 }
3208         }
3209 default_int_mode:
3210 #endif                          /* CONFIG_PCI_MSI */
3211         /* if we get here we're going to use the default interrupt mode */
3212         c->intr[SIMPLE_MODE_INT] = pdev->irq;
3213         return;
3214 }
3215
3216 static int __devinit cciss_pci_init(ctlr_info_t *c, struct pci_dev *pdev)
3217 {
3218         ushort subsystem_vendor_id, subsystem_device_id, command;
3219         __u32 board_id, scratchpad = 0;
3220         __u64 cfg_offset;
3221         __u32 cfg_base_addr;
3222         __u64 cfg_base_addr_index;
3223         int i, err;
3224
3225         /* check to see if controller has been disabled */
3226         /* BEFORE trying to enable it */
3227         (void)pci_read_config_word(pdev, PCI_COMMAND, &command);
3228         if (!(command & 0x02)) {
3229                 printk(KERN_WARNING
3230                        "cciss: controller appears to be disabled\n");
3231                 return -ENODEV;
3232         }
3233
3234         err = pci_enable_device(pdev);
3235         if (err) {
3236                 printk(KERN_ERR "cciss: Unable to Enable PCI device\n");
3237                 return err;
3238         }
3239
3240         err = pci_request_regions(pdev, "cciss");
3241         if (err) {
3242                 printk(KERN_ERR "cciss: Cannot obtain PCI resources, "
3243                        "aborting\n");
3244                 return err;
3245         }
3246
3247         subsystem_vendor_id = pdev->subsystem_vendor;
3248         subsystem_device_id = pdev->subsystem_device;
3249         board_id = (((__u32) (subsystem_device_id << 16) & 0xffff0000) |
3250                     subsystem_vendor_id);
3251
3252 #ifdef CCISS_DEBUG
3253         printk("command = %x\n", command);
3254         printk("irq = %x\n", pdev->irq);
3255         printk("board_id = %x\n", board_id);
3256 #endif                          /* CCISS_DEBUG */
3257
3258 /* If the kernel supports MSI/MSI-X we will try to enable that functionality,
3259  * else we use the IO-APIC interrupt assigned to us by system ROM.
3260  */
3261         cciss_interrupt_mode(c, pdev, board_id);
3262
3263         /* find the memory BAR */
3264         for (i = 0; i < DEVICE_COUNT_RESOURCE; i++) {
3265                 if (pci_resource_flags(pdev, i) & IORESOURCE_MEM)
3266                         break;
3267         }
3268         if (i == DEVICE_COUNT_RESOURCE) {
3269                 printk(KERN_WARNING "cciss: No memory BAR found\n");
3270                 err = -ENODEV;
3271                 goto err_out_free_res;
3272         }
3273
3274         c->paddr = pci_resource_start(pdev, i); /* addressing mode bits
3275                                                  * already removed
3276                                                  */
3277
3278 #ifdef CCISS_DEBUG
3279         printk("address 0 = %lx\n", c->paddr);
3280 #endif                          /* CCISS_DEBUG */
3281         c->vaddr = remap_pci_mem(c->paddr, 0x250);
3282
3283         /* Wait for the board to become ready.  (PCI hotplug needs this.)
3284          * We poll for up to 120 secs, once per 100ms. */
3285         for (i = 0; i < 1200; i++) {
3286                 scratchpad = readl(c->vaddr + SA5_SCRATCHPAD_OFFSET);
3287                 if (scratchpad == CCISS_FIRMWARE_READY)
3288                         break;
3289                 set_current_state(TASK_INTERRUPTIBLE);
3290                 schedule_timeout(HZ / 10);      /* wait 100ms */
3291         }
3292         if (scratchpad != CCISS_FIRMWARE_READY) {
3293                 printk(KERN_WARNING "cciss: Board not ready.  Timed out.\n");
3294                 err = -ENODEV;
3295                 goto err_out_free_res;
3296         }
3297
3298         /* get the address index number */
3299         cfg_base_addr = readl(c->vaddr + SA5_CTCFG_OFFSET);
3300         cfg_base_addr &= (__u32) 0x0000ffff;
3301 #ifdef CCISS_DEBUG
3302         printk("cfg base address = %x\n", cfg_base_addr);
3303 #endif                          /* CCISS_DEBUG */
3304         cfg_base_addr_index = find_PCI_BAR_index(pdev, cfg_base_addr);
3305 #ifdef CCISS_DEBUG
3306         printk("cfg base address index = %llx\n",
3307                 (unsigned long long)cfg_base_addr_index);
3308 #endif                          /* CCISS_DEBUG */
3309         if (cfg_base_addr_index == -1) {
3310                 printk(KERN_WARNING "cciss: Cannot find cfg_base_addr_index\n");
3311                 err = -ENODEV;
3312                 goto err_out_free_res;
3313         }
3314
3315         cfg_offset = readl(c->vaddr + SA5_CTMEM_OFFSET);
3316 #ifdef CCISS_DEBUG
3317         printk("cfg offset = %llx\n", (unsigned long long)cfg_offset);
3318 #endif                          /* CCISS_DEBUG */
3319         c->cfgtable = remap_pci_mem(pci_resource_start(pdev,
3320                                                        cfg_base_addr_index) +
3321                                     cfg_offset, sizeof(CfgTable_struct));
3322         c->board_id = board_id;
3323
3324 #ifdef CCISS_DEBUG
3325         print_cfg_table(c->cfgtable);
3326 #endif                          /* CCISS_DEBUG */
3327
3328         /* Some controllers support Zero Memory Raid (ZMR).
3329          * When configured in ZMR mode the number of supported
3330          * commands drops to 64. So instead of just setting an
3331          * arbitrary value we make the driver a little smarter.
3332          * We read the config table to tell us how many commands
3333          * are supported on the controller then subtract 4 to
3334          * leave a little room for ioctl calls.
3335          */
3336         c->max_commands = readl(&(c->cfgtable->CmdsOutMax));
3337         for (i = 0; i < ARRAY_SIZE(products); i++) {
3338                 if (board_id == products[i].board_id) {
3339                         c->product_name = products[i].product_name;
3340                         c->access = *(products[i].access);
3341                         c->nr_cmds = c->max_commands - 4;
3342                         break;
3343                 }
3344         }
3345         if ((readb(&c->cfgtable->Signature[0]) != 'C') ||
3346             (readb(&c->cfgtable->Signature[1]) != 'I') ||
3347             (readb(&c->cfgtable->Signature[2]) != 'S') ||
3348             (readb(&c->cfgtable->Signature[3]) != 'S')) {
3349                 printk("Does not appear to be a valid CISS config table\n");
3350                 err = -ENODEV;
3351                 goto err_out_free_res;
3352         }
3353         /* We didn't find the controller in our list. We know the
3354          * signature is valid. If it's an HP device let's try to
3355          * bind to the device and fire it up. Otherwise we bail.
3356          */
3357         if (i == ARRAY_SIZE(products)) {
3358                 if (subsystem_vendor_id == PCI_VENDOR_ID_HP) {
3359                         c->product_name = products[i-1].product_name;
3360                         c->access = *(products[i-1].access);
3361                         c->nr_cmds = c->max_commands - 4;
3362                         printk(KERN_WARNING "cciss: This is an unknown "
3363                                 "Smart Array controller.\n"
3364                                 "cciss: Please update to the latest driver "
3365                                 "available from www.hp.com.\n");
3366                 } else {
3367                         printk(KERN_WARNING "cciss: Sorry, I don't know how"
3368                                 " to access the Smart Array controller %08lx\n"
3369                                         , (unsigned long)board_id);
3370                         err = -ENODEV;
3371                         goto err_out_free_res;
3372                 }
3373         }
3374 #ifdef CONFIG_X86
3375         {
3376                 /* Need to enable prefetch in the SCSI core for 6400 in x86 */
3377                 __u32 prefetch;
3378                 prefetch = readl(&(c->cfgtable->SCSI_Prefetch));
3379                 prefetch |= 0x100;
3380                 writel(prefetch, &(c->cfgtable->SCSI_Prefetch));
3381         }
3382 #endif
3383
3384         /* Disabling DMA prefetch and refetch for the P600.
3385          * An ASIC bug may result in accesses to invalid memory addresses.
3386          * We've disabled prefetch for some time now. Testing with XEN
3387          * kernels revealed a bug in the refetch if dom0 resides on a P600.
3388          */
3389         if(board_id == 0x3225103C) {
3390                 __u32 dma_prefetch;
3391                 __u32 dma_refetch;
3392                 dma_prefetch = readl(c->vaddr + I2O_DMA1_CFG);
3393                 dma_prefetch |= 0x8000;
3394                 writel(dma_prefetch, c->vaddr + I2O_DMA1_CFG);
3395                 pci_read_config_dword(pdev, PCI_COMMAND_PARITY, &dma_refetch);
3396                 dma_refetch |= 0x1;
3397                 pci_write_config_dword(pdev, PCI_COMMAND_PARITY, dma_refetch);
3398         }
3399
3400 #ifdef CCISS_DEBUG
3401         printk("Trying to put board into Simple mode\n");
3402 #endif                          /* CCISS_DEBUG */
3403         c->max_commands = readl(&(c->cfgtable->CmdsOutMax));
3404         /* Update the field, and then ring the doorbell */
3405         writel(CFGTBL_Trans_Simple, &(c->cfgtable->HostWrite.TransportRequest));
3406         writel(CFGTBL_ChangeReq, c->vaddr + SA5_DOORBELL);
3407
3408         /* under certain very rare conditions, this can take awhile.
3409          * (e.g.: hot replace a failed 144GB drive in a RAID 5 set right
3410          * as we enter this code.) */
3411         for (i = 0; i < MAX_CONFIG_WAIT; i++) {
3412                 if (!(readl(c->vaddr + SA5_DOORBELL) & CFGTBL_ChangeReq))
3413                         break;
3414                 /* delay and try again */
3415                 set_current_state(TASK_INTERRUPTIBLE);
3416                 schedule_timeout(10);
3417         }
3418
3419 #ifdef CCISS_DEBUG
3420         printk(KERN_DEBUG "I counter got to %d %x\n", i,
3421                readl(c->vaddr + SA5_DOORBELL));
3422 #endif                          /* CCISS_DEBUG */
3423 #ifdef CCISS_DEBUG
3424         print_cfg_table(c->cfgtable);
3425 #endif                          /* CCISS_DEBUG */
3426
3427         if (!(readl(&(c->cfgtable->TransportActive)) & CFGTBL_Trans_Simple)) {
3428                 printk(KERN_WARNING "cciss: unable to get board into"
3429                        " simple mode\n");
3430                 err = -ENODEV;
3431                 goto err_out_free_res;
3432         }
3433         return 0;
3434
3435 err_out_free_res:
3436         /*
3437          * Deliberately omit pci_disable_device(): it does something nasty to
3438          * Smart Array controllers that pci_enable_device does not undo
3439          */
3440         pci_release_regions(pdev);
3441         return err;
3442 }
3443
3444 /* Function to find the first free pointer into our hba[] array
3445  * Returns -1 if no free entries are left.
3446  */
3447 static int alloc_cciss_hba(void)
3448 {
3449         int i;
3450
3451         for (i = 0; i < MAX_CTLR; i++) {
3452                 if (!hba[i]) {
3453                         ctlr_info_t *p;
3454
3455                         p = kzalloc(sizeof(ctlr_info_t), GFP_KERNEL);
3456                         if (!p)
3457                                 goto Enomem;
3458                         hba[i] = p;
3459                         return i;
3460                 }
3461         }
3462         printk(KERN_WARNING "cciss: This driver supports a maximum"
3463                " of %d controllers.\n", MAX_CTLR);
3464         return -1;
3465 Enomem:
3466         printk(KERN_ERR "cciss: out of memory.\n");
3467         return -1;
3468 }
3469
3470 static void free_hba(int i)
3471 {
3472         ctlr_info_t *p = hba[i];
3473         int n;
3474
3475         hba[i] = NULL;
3476         for (n = 0; n < CISS_MAX_LUN; n++)
3477                 put_disk(p->gendisk[n]);
3478         kfree(p);
3479 }
3480
3481 /* Send a message CDB to the firmware. */
3482 static __devinit int cciss_message(struct pci_dev *pdev, unsigned char opcode, unsigned char type)
3483 {
3484         typedef struct {
3485                 CommandListHeader_struct CommandHeader;
3486                 RequestBlock_struct Request;
3487                 ErrDescriptor_struct ErrorDescriptor;
3488         } Command;
3489         static const size_t cmd_sz = sizeof(Command) + sizeof(ErrorInfo_struct);
3490         Command *cmd;
3491         dma_addr_t paddr64;
3492         uint32_t paddr32, tag;
3493         void __iomem *vaddr;
3494         int i, err;
3495
3496         vaddr = ioremap_nocache(pci_resource_start(pdev, 0), pci_resource_len(pdev, 0));
3497         if (vaddr == NULL)
3498                 return -ENOMEM;
3499
3500         /* The Inbound Post Queue only accepts 32-bit physical addresses for the
3501            CCISS commands, so they must be allocated from the lower 4GiB of
3502            memory. */
3503         err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
3504         if (err) {
3505                 iounmap(vaddr);
3506                 return -ENOMEM;
3507         }
3508
3509         cmd = pci_alloc_consistent(pdev, cmd_sz, &paddr64);
3510         if (cmd == NULL) {
3511                 iounmap(vaddr);
3512                 return -ENOMEM;
3513         }
3514
3515         /* This must fit, because of the 32-bit consistent DMA mask.  Also,
3516            although there's no guarantee, we assume that the address is at
3517            least 4-byte aligned (most likely, it's page-aligned). */
3518         paddr32 = paddr64;
3519
3520         cmd->CommandHeader.ReplyQueue = 0;
3521         cmd->CommandHeader.SGList = 0;
3522         cmd->CommandHeader.SGTotal = 0;
3523         cmd->CommandHeader.Tag.lower = paddr32;
3524         cmd->CommandHeader.Tag.upper = 0;
3525         memset(&cmd->CommandHeader.LUN.LunAddrBytes, 0, 8);
3526
3527         cmd->Request.CDBLen = 16;
3528         cmd->Request.Type.Type = TYPE_MSG;
3529         cmd->Request.Type.Attribute = ATTR_HEADOFQUEUE;
3530         cmd->Request.Type.Direction = XFER_NONE;
3531         cmd->Request.Timeout = 0; /* Don't time out */
3532         cmd->Request.CDB[0] = opcode;
3533         cmd->Request.CDB[1] = type;
3534         memset(&cmd->Request.CDB[2], 0, 14); /* the rest of the CDB is reserved */
3535
3536         cmd->ErrorDescriptor.Addr.lower = paddr32 + sizeof(Command);
3537         cmd->ErrorDescriptor.Addr.upper = 0;
3538         cmd->ErrorDescriptor.Len = sizeof(ErrorInfo_struct);
3539
3540         writel(paddr32, vaddr + SA5_REQUEST_PORT_OFFSET);
3541
3542         for (i = 0; i < 10; i++) {
3543                 tag = readl(vaddr + SA5_REPLY_PORT_OFFSET);
3544                 if ((tag & ~3) == paddr32)
3545                         break;
3546                 schedule_timeout_uninterruptible(HZ);
3547         }
3548
3549         iounmap(vaddr);
3550
3551         /* we leak the DMA buffer here ... no choice since the controller could
3552            still complete the command. */
3553         if (i == 10) {
3554                 printk(KERN_ERR "cciss: controller message %02x:%02x timed out\n",
3555                         opcode, type);
3556                 return -ETIMEDOUT;
3557         }
3558
3559         pci_free_consistent(pdev, cmd_sz, cmd, paddr64);
3560
3561         if (tag & 2) {
3562                 printk(KERN_ERR "cciss: controller message %02x:%02x failed\n",
3563                         opcode, type);
3564                 return -EIO;
3565         }
3566
3567         printk(KERN_INFO "cciss: controller message %02x:%02x succeeded\n",
3568                 opcode, type);
3569         return 0;
3570 }
3571
3572 #define cciss_soft_reset_controller(p) cciss_message(p, 1, 0)
3573 #define cciss_noop(p) cciss_message(p, 3, 0)
3574
3575 static __devinit int cciss_reset_msi(struct pci_dev *pdev)
3576 {
3577 /* the #defines are stolen from drivers/pci/msi.h. */
3578 #define msi_control_reg(base)           (base + PCI_MSI_FLAGS)
3579 #define PCI_MSIX_FLAGS_ENABLE           (1 << 15)
3580
3581         int pos;
3582         u16 control = 0;
3583
3584         pos = pci_find_capability(pdev, PCI_CAP_ID_MSI);
3585         if (pos) {
3586                 pci_read_config_word(pdev, msi_control_reg(pos), &control);
3587                 if (control & PCI_MSI_FLAGS_ENABLE) {
3588                         printk(KERN_INFO "cciss: resetting MSI\n");
3589                         pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSI_FLAGS_ENABLE);
3590                 }
3591         }
3592
3593         pos = pci_find_capability(pdev, PCI_CAP_ID_MSIX);
3594         if (pos) {
3595                 pci_read_config_word(pdev, msi_control_reg(pos), &control);
3596                 if (control & PCI_MSIX_FLAGS_ENABLE) {
3597                         printk(KERN_INFO "cciss: resetting MSI-X\n");
3598                         pci_write_config_word(pdev, msi_control_reg(pos), control & ~PCI_MSIX_FLAGS_ENABLE);
3599                 }
3600         }
3601
3602         return 0;
3603 }
3604
3605 /* This does a hard reset of the controller using PCI power management
3606  * states. */
3607 static __devinit int cciss_hard_reset_controller(struct pci_dev *pdev)
3608 {
3609         u16 pmcsr, saved_config_space[32];
3610         int i, pos;
3611
3612         printk(KERN_INFO "cciss: using PCI PM to reset controller\n");
3613
3614         /* This is very nearly the same thing as
3615
3616            pci_save_state(pci_dev);
3617            pci_set_power_state(pci_dev, PCI_D3hot);
3618            pci_set_power_state(pci_dev, PCI_D0);
3619            pci_restore_state(pci_dev);
3620
3621            but we can't use these nice canned kernel routines on
3622            kexec, because they also check the MSI/MSI-X state in PCI
3623            configuration space and do the wrong thing when it is
3624            set/cleared.  Also, the pci_save/restore_state functions
3625            violate the ordering requirements for restoring the
3626            configuration space from the CCISS document (see the
3627            comment below).  So we roll our own .... */
3628
3629         for (i = 0; i < 32; i++)
3630                 pci_read_config_word(pdev, 2*i, &saved_config_space[i]);
3631
3632         pos = pci_find_capability(pdev, PCI_CAP_ID_PM);
3633         if (pos == 0) {
3634                 printk(KERN_ERR "cciss_reset_controller: PCI PM not supported\n");
3635                 return -ENODEV;
3636         }
3637
3638         /* Quoting from the Open CISS Specification: "The Power
3639          * Management Control/Status Register (CSR) controls the power
3640          * state of the device.  The normal operating state is D0,
3641          * CSR=00h.  The software off state is D3, CSR=03h.  To reset
3642          * the controller, place the interface device in D3 then to
3643          * D0, this causes a secondary PCI reset which will reset the
3644          * controller." */
3645
3646         /* enter the D3hot power management state */
3647         pci_read_config_word(pdev, pos + PCI_PM_CTRL, &pmcsr);
3648         pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
3649         pmcsr |= PCI_D3hot;
3650         pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
3651
3652         schedule_timeout_uninterruptible(HZ >> 1);
3653
3654         /* enter the D0 power management state */
3655         pmcsr &= ~PCI_PM_CTRL_STATE_MASK;
3656         pmcsr |= PCI_D0;
3657         pci_write_config_word(pdev, pos + PCI_PM_CTRL, pmcsr);
3658
3659         schedule_timeout_uninterruptible(HZ >> 1);
3660
3661         /* Restore the PCI configuration space.  The Open CISS
3662          * Specification says, "Restore the PCI Configuration
3663          * Registers, offsets 00h through 60h. It is important to
3664          * restore the command register, 16-bits at offset 04h,
3665          * last. Do not restore the configuration status register,
3666          * 16-bits at offset 06h."  Note that the offset is 2*i. */
3667         for (i = 0; i < 32; i++) {
3668                 if (i == 2 || i == 3)
3669                         continue;
3670                 pci_write_config_word(pdev, 2*i, saved_config_space[i]);
3671         }
3672         wmb();
3673         pci_write_config_word(pdev, 4, saved_config_space[2]);
3674
3675         return 0;
3676 }
3677
3678 /*
3679  *  This is it.  Find all the controllers and register them.  I really hate
3680  *  stealing all these major device numbers.
3681  *  returns the number of block devices registered.
3682  */
3683 static int __devinit cciss_init_one(struct pci_dev *pdev,
3684                                     const struct pci_device_id *ent)
3685 {
3686         int i;
3687         int j = 0;
3688         int rc;
3689         int dac, return_code;
3690         InquiryData_struct *inq_buff = NULL;
3691
3692         if (reset_devices) {
3693                 /* Reset the controller with a PCI power-cycle */
3694                 if (cciss_hard_reset_controller(pdev) || cciss_reset_msi(pdev))
3695                         return -ENODEV;
3696
3697                 /* Now try to get the controller to respond to a no-op. Some
3698                    devices (notably the HP Smart Array 5i Controller) need
3699                    up to 30 seconds to respond. */
3700                 for (i=0; i<30; i++) {
3701                         if (cciss_noop(pdev) == 0)
3702                                 break;
3703
3704                         schedule_timeout_uninterruptible(HZ);
3705                 }
3706                 if (i == 30) {
3707                         printk(KERN_ERR "cciss: controller seems dead\n");
3708                         return -EBUSY;
3709                 }
3710         }
3711
3712         i = alloc_cciss_hba();
3713         if (i < 0)
3714                 return -1;
3715
3716         hba[i]->busy_initializing = 1;
3717         INIT_HLIST_HEAD(&hba[i]->cmpQ);
3718         INIT_HLIST_HEAD(&hba[i]->reqQ);
3719
3720         if (cciss_pci_init(hba[i], pdev) != 0)
3721                 goto clean1;
3722
3723         sprintf(hba[i]->devname, "cciss%d", i);
3724         hba[i]->ctlr = i;
3725         hba[i]->pdev = pdev;
3726
3727         /* configure PCI DMA stuff */
3728         if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(64)))
3729                 dac = 1;
3730         else if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(32)))
3731                 dac = 0;
3732         else {
3733                 printk(KERN_ERR "cciss: no suitable DMA available\n");
3734                 goto clean1;
3735         }
3736
3737         /*
3738          * register with the major number, or get a dynamic major number
3739          * by passing 0 as argument.  This is done for greater than
3740          * 8 controller support.
3741          */
3742         if (i < MAX_CTLR_ORIG)
3743                 hba[i]->major = COMPAQ_CISS_MAJOR + i;
3744         rc = register_blkdev(hba[i]->major, hba[i]->devname);
3745         if (rc == -EBUSY || rc == -EINVAL) {
3746                 printk(KERN_ERR
3747                        "cciss:  Unable to get major number %d for %s "
3748                        "on hba %d\n", hba[i]->major, hba[i]->devname, i);
3749                 goto clean1;
3750         } else {
3751                 if (i >= MAX_CTLR_ORIG)
3752                         hba[i]->major = rc;
3753         }
3754
3755         /* make sure the board interrupts are off */
3756         hba[i]->access.set_intr_mask(hba[i], CCISS_INTR_OFF);
3757         if (request_irq(hba[i]->intr[SIMPLE_MODE_INT], do_cciss_intr,
3758                         IRQF_DISABLED | IRQF_SHARED, hba[i]->devname, hba[i])) {
3759                 printk(KERN_ERR "cciss: Unable to get irq %d for %s\n",
3760                        hba[i]->intr[SIMPLE_MODE_INT], hba[i]->devname);
3761                 goto clean2;
3762         }
3763
3764         printk(KERN_INFO "%s: <0x%x> at PCI %s IRQ %d%s using DAC\n",
3765                hba[i]->devname, pdev->device, pci_name(pdev),
3766                hba[i]->intr[SIMPLE_MODE_INT], dac ? "" : " not");
3767
3768         hba[i]->cmd_pool_bits =
3769             kmalloc(DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG)
3770                         * sizeof(unsigned long), GFP_KERNEL);
3771         hba[i]->cmd_pool = (CommandList_struct *)
3772             pci_alloc_consistent(hba[i]->pdev,
3773                     hba[i]->nr_cmds * sizeof(CommandList_struct),
3774                     &(hba[i]->cmd_pool_dhandle));
3775         hba[i]->errinfo_pool = (ErrorInfo_struct *)
3776             pci_alloc_consistent(hba[i]->pdev,
3777                     hba[i]->nr_cmds * sizeof(ErrorInfo_struct),
3778                     &(hba[i]->errinfo_pool_dhandle));
3779         if ((hba[i]->cmd_pool_bits == NULL)
3780             || (hba[i]->cmd_pool == NULL)
3781             || (hba[i]->errinfo_pool == NULL)) {
3782                 printk(KERN_ERR "cciss: out of memory");
3783                 goto clean4;
3784         }
3785 #ifdef CONFIG_CISS_SCSI_TAPE
3786         hba[i]->scsi_rejects.complete =
3787             kmalloc(sizeof(hba[i]->scsi_rejects.complete[0]) *
3788                     (hba[i]->nr_cmds + 5), GFP_KERNEL);
3789         if (hba[i]->scsi_rejects.complete == NULL) {
3790                 printk(KERN_ERR "cciss: out of memory");
3791                 goto clean4;
3792         }
3793 #endif
3794         spin_lock_init(&hba[i]->lock);
3795
3796         /* Initialize the pdev driver private data.
3797            have it point to hba[i].  */
3798         pci_set_drvdata(pdev, hba[i]);
3799         /* command and error info recs zeroed out before
3800            they are used */
3801         memset(hba[i]->cmd_pool_bits, 0,
3802                DIV_ROUND_UP(hba[i]->nr_cmds, BITS_PER_LONG)
3803                         * sizeof(unsigned long));
3804
3805         hba[i]->num_luns = 0;
3806         hba[i]->highest_lun = -1;
3807         for (j = 0; j < CISS_MAX_LUN; j++) {
3808                 hba[i]->drv[j].raid_level = -1;
3809                 hba[i]->drv[j].queue = NULL;
3810                 hba[i]->gendisk[j] = NULL;
3811         }
3812
3813         cciss_scsi_setup(i);
3814
3815         /* Turn the interrupts on so we can service requests */
3816         hba[i]->access.set_intr_mask(hba[i], CCISS_INTR_ON);
3817
3818         /* Get the firmware version */
3819         inq_buff = kzalloc(sizeof(InquiryData_struct), GFP_KERNEL);
3820         if (inq_buff == NULL) {
3821                 printk(KERN_ERR "cciss: out of memory\n");
3822                 goto clean4;
3823         }
3824
3825         return_code = sendcmd_withirq(CISS_INQUIRY, i, inq_buff,
3826                 sizeof(InquiryData_struct), 0, 0 , 0, TYPE_CMD);
3827         if (return_code == IO_OK) {
3828                 hba[i]->firm_ver[0] = inq_buff->data_byte[32];
3829                 hba[i]->firm_ver[1] = inq_buff->data_byte[33];
3830                 hba[i]->firm_ver[2] = inq_buff->data_byte[34];
3831                 hba[i]->firm_ver[3] = inq_buff->data_byte[35];
3832         } else {         /* send command failed */
3833                 printk(KERN_WARNING "cciss: unable to determine firmware"
3834                         " version of controller\n");
3835         }
3836
3837         cciss_procinit(i);
3838
3839         hba[i]->cciss_max_sectors = 2048;
3840
3841         hba[i]->busy_initializing = 0;
3842
3843         rebuild_lun_table(hba[i], 1);
3844         hba[i]->cciss_scan_thread = kthread_run(scan_thread, hba[i],
3845                                 "cciss_scan%02d", i);
3846         if (IS_ERR(hba[i]->cciss_scan_thread))
3847                 return PTR_ERR(hba[i]->cciss_scan_thread);
3848
3849         return 1;
3850
3851 clean4:
3852         kfree(inq_buff);
3853 #ifdef CONFIG_CISS_SCSI_TAPE
3854         kfree(hba[i]->scsi_rejects.complete);
3855 #endif
3856         kfree(hba[i]->cmd_pool_bits);
3857         if (hba[i]->cmd_pool)
3858                 pci_free_consistent(hba[i]->pdev,
3859                                     hba[i]->nr_cmds * sizeof(CommandList_struct),
3860                                     hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle);
3861         if (hba[i]->errinfo_pool)
3862                 pci_free_consistent(hba[i]->pdev,
3863                                     hba[i]->nr_cmds * sizeof(ErrorInfo_struct),
3864                                     hba[i]->errinfo_pool,
3865                                     hba[i]->errinfo_pool_dhandle);
3866         free_irq(hba[i]->intr[SIMPLE_MODE_INT], hba[i]);
3867 clean2:
3868         unregister_blkdev(hba[i]->major, hba[i]->devname);
3869 clean1:
3870         hba[i]->busy_initializing = 0;
3871         /* cleanup any queues that may have been initialized */
3872         for (j=0; j <= hba[i]->highest_lun; j++){
3873                 drive_info_struct *drv = &(hba[i]->drv[j]);
3874                 if (drv->queue)
3875                         blk_cleanup_queue(drv->queue);
3876         }
3877         /*
3878          * Deliberately omit pci_disable_device(): it does something nasty to
3879          * Smart Array controllers that pci_enable_device does not undo
3880          */
3881         pci_release_regions(pdev);
3882         pci_set_drvdata(pdev, NULL);
3883         free_hba(i);
3884         return -1;
3885 }
3886
3887 static void cciss_shutdown(struct pci_dev *pdev)
3888 {
3889         ctlr_info_t *tmp_ptr;
3890         int i;
3891         char flush_buf[4];
3892         int return_code;
3893
3894         tmp_ptr = pci_get_drvdata(pdev);
3895         if (tmp_ptr == NULL)
3896                 return;
3897         i = tmp_ptr->ctlr;
3898         if (hba[i] == NULL)
3899                 return;
3900
3901         /* Turn board interrupts off  and send the flush cache command */
3902         /* sendcmd will turn off interrupt, and send the flush...
3903          * To write all data in the battery backed cache to disks */
3904         memset(flush_buf, 0, 4);
3905         return_code = sendcmd(CCISS_CACHE_FLUSH, i, flush_buf, 4, 0, 0, 0, NULL,
3906                               TYPE_CMD);
3907         if (return_code == IO_OK) {
3908                 printk(KERN_INFO "Completed flushing cache on controller %d\n", i);
3909         } else {
3910                 printk(KERN_WARNING "Error flushing cache on controller %d\n", i);
3911         }
3912         free_irq(hba[i]->intr[2], hba[i]);
3913 }
3914
3915 static void __devexit cciss_remove_one(struct pci_dev *pdev)
3916 {
3917         ctlr_info_t *tmp_ptr;
3918         int i, j;
3919
3920         if (pci_get_drvdata(pdev) == NULL) {
3921                 printk(KERN_ERR "cciss: Unable to remove device \n");
3922                 return;
3923         }
3924
3925         tmp_ptr = pci_get_drvdata(pdev);
3926         i = tmp_ptr->ctlr;
3927         if (hba[i] == NULL) {
3928                 printk(KERN_ERR "cciss: device appears to "
3929                        "already be removed \n");
3930                 return;
3931         }
3932
3933         kthread_stop(hba[i]->cciss_scan_thread);
3934
3935         remove_proc_entry(hba[i]->devname, proc_cciss);
3936         unregister_blkdev(hba[i]->major, hba[i]->devname);
3937
3938         /* remove it from the disk list */
3939         for (j = 0; j < CISS_MAX_LUN; j++) {
3940                 struct gendisk *disk = hba[i]->gendisk[j];
3941                 if (disk) {
3942                         struct request_queue *q = disk->queue;
3943
3944                         if (disk->flags & GENHD_FL_UP)
3945                                 del_gendisk(disk);
3946                         if (q)
3947                                 blk_cleanup_queue(q);
3948                 }
3949         }
3950
3951 #ifdef CONFIG_CISS_SCSI_TAPE
3952         cciss_unregister_scsi(i);       /* unhook from SCSI subsystem */
3953 #endif
3954
3955         cciss_shutdown(pdev);
3956
3957 #ifdef CONFIG_PCI_MSI
3958         if (hba[i]->msix_vector)
3959                 pci_disable_msix(hba[i]->pdev);
3960         else if (hba[i]->msi_vector)
3961                 pci_disable_msi(hba[i]->pdev);
3962 #endif                          /* CONFIG_PCI_MSI */
3963
3964         iounmap(hba[i]->vaddr);
3965
3966         pci_free_consistent(hba[i]->pdev, hba[i]->nr_cmds * sizeof(CommandList_struct),
3967                             hba[i]->cmd_pool, hba[i]->cmd_pool_dhandle);
3968         pci_free_consistent(hba[i]->pdev, hba[i]->nr_cmds * sizeof(ErrorInfo_struct),
3969                             hba[i]->errinfo_pool, hba[i]->errinfo_pool_dhandle);
3970         kfree(hba[i]->cmd_pool_bits);
3971 #ifdef CONFIG_CISS_SCSI_TAPE
3972         kfree(hba[i]->scsi_rejects.complete);
3973 #endif
3974         /*
3975          * Deliberately omit pci_disable_device(): it does something nasty to
3976          * Smart Array controllers that pci_enable_device does not undo
3977          */
3978         pci_release_regions(pdev);
3979         pci_set_drvdata(pdev, NULL);
3980         free_hba(i);
3981 }
3982
3983 static struct pci_driver cciss_pci_driver = {
3984         .name = "cciss",
3985         .probe = cciss_init_one,
3986         .remove = __devexit_p(cciss_remove_one),
3987         .id_table = cciss_pci_device_id,        /* id_table */
3988         .shutdown = cciss_shutdown,
3989 };
3990
3991 /*
3992  *  This is it.  Register the PCI driver information for the cards we control
3993  *  the OS will call our registered routines when it finds one of our cards.
3994  */
3995 static int __init cciss_init(void)
3996 {
3997         /*
3998          * The hardware requires that commands are aligned on a 64-bit
3999          * boundary. Given that we use pci_alloc_consistent() to allocate an
4000          * array of them, the size must be a multiple of 8 bytes.
4001          */
4002         BUILD_BUG_ON(sizeof(CommandList_struct) % 8);
4003
4004         printk(KERN_INFO DRIVER_NAME "\n");
4005
4006         /* Register for our PCI devices */
4007         return pci_register_driver(&cciss_pci_driver);
4008 }
4009
4010 static void __exit cciss_cleanup(void)
4011 {
4012         int i;
4013
4014         pci_unregister_driver(&cciss_pci_driver);
4015         /* double check that all controller entrys have been removed */
4016         for (i = 0; i < MAX_CTLR; i++) {
4017                 if (hba[i] != NULL) {
4018                         printk(KERN_WARNING "cciss: had to remove"
4019                                " controller %d\n", i);
4020                         cciss_remove_one(hba[i]->pdev);
4021                 }
4022         }
4023         remove_proc_entry("driver/cciss", NULL);
4024 }
4025
4026 static void fail_all_cmds(unsigned long ctlr)
4027 {
4028         /* If we get here, the board is apparently dead. */
4029         ctlr_info_t *h = hba[ctlr];
4030         CommandList_struct *c;
4031         unsigned long flags;
4032
4033         printk(KERN_WARNING "cciss%d: controller not responding.\n", h->ctlr);
4034         h->alive = 0;           /* the controller apparently died... */
4035
4036         spin_lock_irqsave(CCISS_LOCK(ctlr), flags);
4037
4038         pci_disable_device(h->pdev);    /* Make sure it is really dead. */
4039
4040         /* move everything off the request queue onto the completed queue */
4041         while (!hlist_empty(&h->reqQ)) {
4042                 c = hlist_entry(h->reqQ.first, CommandList_struct, list);
4043                 removeQ(c);
4044                 h->Qdepth--;
4045                 addQ(&h->cmpQ, c);
4046         }
4047
4048         /* Now, fail everything on the completed queue with a HW error */
4049         while (!hlist_empty(&h->cmpQ)) {
4050                 c = hlist_entry(h->cmpQ.first, CommandList_struct, list);
4051                 removeQ(c);
4052                 c->err_info->CommandStatus = CMD_HARDWARE_ERR;
4053                 if (c->cmd_type == CMD_RWREQ) {
4054                         complete_command(h, c, 0);
4055                 } else if (c->cmd_type == CMD_IOCTL_PEND)
4056                         complete(c->waiting);
4057 #ifdef CONFIG_CISS_SCSI_TAPE
4058                 else if (c->cmd_type == CMD_SCSI)
4059                         complete_scsi_command(c, 0, 0);
4060 #endif
4061         }
4062         spin_unlock_irqrestore(CCISS_LOCK(ctlr), flags);
4063         return;
4064 }
4065
4066 module_init(cciss_init);
4067 module_exit(cciss_cleanup);