X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=drivers%2Fscsi%2Fscsi_lib.c;h=7c4c889c5221e09ed9cb00380c5c3053256c8545;hb=7ec54588210df29ea637e6054489bc942c0ef371;hp=9a05076f9cf4f8d7a3cc6bffffc4c03996f3a799;hpb=f33b5d783b4f56be5ace6a1c98fb5f76b2d2d07d;p=safe%2Fjmp%2Flinux-2.6 diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 9a05076..7c4c889 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -17,51 +17,51 @@ #include #include #include +#include #include +#include #include #include #include #include #include -#include #include "scsi_priv.h" #include "scsi_logging.h" -#define SG_MEMPOOL_NR (sizeof(scsi_sg_pools)/sizeof(struct scsi_host_sg_pool)) -#define SG_MEMPOOL_SIZE 32 +#define SG_MEMPOOL_NR ARRAY_SIZE(scsi_sg_pools) +#define SG_MEMPOOL_SIZE 2 + +/* + * The maximum number of SG segments that we will put inside a scatterlist + * (unless chaining is used). Should ideally fit inside a single page, to + * avoid a higher order allocation. + */ +#define SCSI_MAX_SG_SEGMENTS 128 struct scsi_host_sg_pool { size_t size; - char *name; - kmem_cache_t *slab; + char *name; + struct kmem_cache *slab; mempool_t *pool; }; -#if (SCSI_MAX_PHYS_SEGMENTS < 32) -#error SCSI_MAX_PHYS_SEGMENTS is too small -#endif - -#define SP(x) { x, "sgpool-" #x } +#define SP(x) { x, "sgpool-" #x } static struct scsi_host_sg_pool scsi_sg_pools[] = { SP(8), SP(16), +#if (SCSI_MAX_SG_SEGMENTS > 16) SP(32), -#if (SCSI_MAX_PHYS_SEGMENTS > 32) +#if (SCSI_MAX_SG_SEGMENTS > 32) SP(64), -#if (SCSI_MAX_PHYS_SEGMENTS > 64) +#if (SCSI_MAX_SG_SEGMENTS > 64) SP(128), -#if (SCSI_MAX_PHYS_SEGMENTS > 128) - SP(256), -#if (SCSI_MAX_PHYS_SEGMENTS > 256) -#error SCSI_MAX_PHYS_SEGMENTS is too large -#endif #endif #endif #endif -}; +}; #undef SP static void scsi_run_queue(struct request_queue *q); @@ -82,8 +82,8 @@ static void scsi_unprep_request(struct request *req) { struct scsi_cmnd *cmd = req->special; - req->flags &= ~REQ_DONTPREP; - req->special = (req->flags & REQ_SPECIAL) ? cmd->sc_request : NULL; + req->cmd_flags &= ~REQ_DONTPREP; + req->special = NULL; scsi_put_command(cmd); } @@ -161,72 +161,6 @@ int scsi_queue_insert(struct scsi_cmnd *cmd, int reason) return 0; } -/* - * Function: scsi_do_req - * - * Purpose: Queue a SCSI request - * - * Arguments: sreq - command descriptor. - * cmnd - actual SCSI command to be performed. - * buffer - data buffer. - * bufflen - size of data buffer. - * done - completion function to be run. - * timeout - how long to let it run before timeout. - * retries - number of retries we allow. - * - * Lock status: No locks held upon entry. - * - * Returns: Nothing. - * - * Notes: This function is only used for queueing requests for things - * like ioctls and character device requests - this is because - * we essentially just inject a request into the queue for the - * device. - * - * In order to support the scsi_device_quiesce function, we - * now inject requests on the *head* of the device queue - * rather than the tail. - */ -void scsi_do_req(struct scsi_request *sreq, const void *cmnd, - void *buffer, unsigned bufflen, - void (*done)(struct scsi_cmnd *), - int timeout, int retries) -{ - /* - * If the upper level driver is reusing these things, then - * we should release the low-level block now. Another one will - * be allocated later when this request is getting queued. - */ - __scsi_release_request(sreq); - - /* - * Our own function scsi_done (which marks the host as not busy, - * disables the timeout counter, etc) will be called by us or by the - * scsi_hosts[host].queuecommand() function needs to also call - * the completion function for the high level driver. - */ - memcpy(sreq->sr_cmnd, cmnd, sizeof(sreq->sr_cmnd)); - sreq->sr_bufflen = bufflen; - sreq->sr_buffer = buffer; - sreq->sr_allowed = retries; - sreq->sr_done = done; - sreq->sr_timeout_per_command = timeout; - - if (sreq->sr_cmd_len == 0) - sreq->sr_cmd_len = COMMAND_SIZE(sreq->sr_cmnd[0]); - - /* - * head injection *required* here otherwise quiesce won't work - * - * Because users of this function are apt to reuse requests with no - * modification, we have to sanitise the request flags here - */ - sreq->sr_request->flags &= ~REQ_DONTPREP; - blk_insert_request(sreq->sr_device->request_queue, sreq->sr_request, - 1, sreq); -} -EXPORT_SYMBOL(scsi_do_req); - /** * scsi_execute - insert request and wait for the result * @sdev: scsi device @@ -239,9 +173,9 @@ EXPORT_SYMBOL(scsi_do_req); * @retries: number of times to retry request * @flags: or into request flags; * - * returns the req->errors value which is the the scsi_cmnd result + * returns the req->errors value which is the scsi_cmnd result * field. - **/ + */ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, int data_direction, void *buffer, unsigned bufflen, unsigned char *sense, int timeout, int retries, int flags) @@ -262,7 +196,8 @@ int scsi_execute(struct scsi_device *sdev, const unsigned char *cmd, req->sense_len = 0; req->retries = retries; req->timeout = timeout; - req->flags |= flags | REQ_BLOCK_PC | REQ_SPECIAL | REQ_QUIET; + req->cmd_type = REQ_TYPE_BLOCK_PC; + req->cmd_flags |= flags | REQ_QUIET | REQ_PREEMPT; /* * head injection *required* here otherwise quiesce won't work @@ -306,7 +241,7 @@ struct scsi_io_context { char sense[SCSI_SENSE_BUFFERSIZE]; }; -static kmem_cache_t *scsi_io_context_cache; +static struct kmem_cache *scsi_io_context_cache; static void scsi_end_async(struct request *req, int uptodate) { @@ -328,33 +263,18 @@ static int scsi_merge_bio(struct request *rq, struct bio *bio) bio->bi_rw |= (1 << BIO_RW); blk_queue_bounce(q, &bio); - if (!rq->bio) - blk_rq_bio_prep(q, rq, bio); - else if (!q->back_merge_fn(q, rq, bio)) - return -EINVAL; - else { - rq->biotail->bi_next = bio; - rq->biotail = bio; - rq->hard_nr_sectors += bio_sectors(bio); - rq->nr_sectors = rq->hard_nr_sectors; - } - - return 0; + return blk_rq_append_bio(q, rq, bio); } -static int scsi_bi_endio(struct bio *bio, unsigned int bytes_done, int error) +static void scsi_bi_endio(struct bio *bio, int error) { - if (bio->bi_size) - return 1; - bio_put(bio); - return 0; } /** * scsi_req_map_sg - map a scatterlist into a request * @rq: request to fill - * @sg: scatterlist + * @sgl: scatterlist * @nsegs: number of elements * @bufflen: len of buffer * @gfp: memory allocation flags @@ -367,20 +287,27 @@ static int scsi_req_map_sg(struct request *rq, struct scatterlist *sgl, int nsegs, unsigned bufflen, gfp_t gfp) { struct request_queue *q = rq->q; - int nr_pages = (bufflen + PAGE_SIZE - 1) >> PAGE_SHIFT; - unsigned int data_len = 0, len, bytes, off; + int nr_pages = (bufflen + sgl[0].offset + PAGE_SIZE - 1) >> PAGE_SHIFT; + unsigned int data_len = bufflen, len, bytes, off; + struct scatterlist *sg; struct page *page; struct bio *bio = NULL; int i, err, nr_vecs = 0; - for (i = 0; i < nsegs; i++) { - page = sgl[i].page; - off = sgl[i].offset; - len = sgl[i].length; - data_len += len; + for_each_sg(sgl, sg, nsegs, i) { + page = sg_page(sg); + off = sg->offset; + len = sg->length; + data_len += len; - while (len > 0) { + while (len > 0 && data_len > 0) { + /* + * sg sends a scatterlist that is larger than + * the data_len it wants transferred for certain + * IO sizes + */ bytes = min_t(unsigned int, len, PAGE_SIZE - off); + bytes = min(bytes, data_len); if (!bio) { nr_vecs = min_t(int, BIO_MAX_PAGES, nr_pages); @@ -404,7 +331,7 @@ static int scsi_req_map_sg(struct request *rq, struct scatterlist *sgl, if (bio->bi_vcnt >= nr_vecs) { err = scsi_merge_bio(rq, bio); if (err) { - bio_endio(bio, bio->bi_size, 0); + bio_endio(bio, 0); goto free_bios; } bio = NULL; @@ -412,12 +339,13 @@ static int scsi_req_map_sg(struct request *rq, struct scatterlist *sgl, page++; len -= bytes; + data_len -=bytes; off = 0; } } rq->buffer = rq->data = NULL; - rq->data_len = data_len; + rq->data_len = bufflen; return 0; free_bios: @@ -426,7 +354,7 @@ free_bios: /* * call endio instead of bio_put incase it was bounced */ - bio_endio(bio, bio->bi_size, 0); + bio_endio(bio, 0); } return err; @@ -437,14 +365,16 @@ free_bios: * @sdev: scsi device * @cmd: scsi command * @cmd_len: length of scsi cdb - * @data_direction: data direction + * @data_direction: DMA_TO_DEVICE, DMA_FROM_DEVICE, or DMA_NONE * @buffer: data buffer (this can be a kernel buffer or scatterlist) * @bufflen: len of buffer * @use_sg: if buffer is a scatterlist this is the number of elements * @timeout: request timeout in seconds * @retries: number of times to retry request - * @flags: or into request flags - **/ + * @privdata: data passed to done() + * @done: callback function when done + * @gfp: memory allocation flags + */ int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd, int cmd_len, int data_direction, void *buffer, unsigned bufflen, int use_sg, int timeout, int retries, void *privdata, @@ -455,15 +385,15 @@ int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd, int err = 0; int write = (data_direction == DMA_TO_DEVICE); - sioc = kmem_cache_alloc(scsi_io_context_cache, gfp); + sioc = kmem_cache_zalloc(scsi_io_context_cache, gfp); if (!sioc) return DRIVER_ERROR << 24; - memset(sioc, 0, sizeof(*sioc)); req = blk_get_request(sdev->request_queue, write, gfp); if (!req) goto free_sense; - req->flags |= REQ_BLOCK_PC | REQ_QUIET; + req->cmd_type = REQ_TYPE_BLOCK_PC; + req->cmd_flags |= REQ_QUIET; if (use_sg) err = scsi_req_map_sg(req, buffer, use_sg, bufflen, gfp); @@ -474,6 +404,7 @@ int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd, goto free_req; req->cmd_len = cmd_len; + memset(req->cmd, 0, BLK_MAX_CDB); /* ATAPI hates garbage after CDB */ memcpy(req->cmd, cmd, req->cmd_len); req->sense = sioc->sense; req->sense_len = 0; @@ -490,7 +421,7 @@ int scsi_execute_async(struct scsi_device *sdev, const unsigned char *cmd, free_req: blk_put_request(req); free_sense: - kfree(sioc); + kmem_cache_free(scsi_io_context_cache, sioc); return DRIVER_ERROR << 24; } EXPORT_SYMBOL_GPL(scsi_execute_async); @@ -502,60 +433,17 @@ EXPORT_SYMBOL_GPL(scsi_execute_async); * * Arguments: cmd - command that is ready to be queued. * - * Returns: Nothing - * * Notes: This function has the job of initializing a number of * fields related to error handling. Typically this will * be called once for each command, as required. */ -static int scsi_init_cmd_errh(struct scsi_cmnd *cmd) +static void scsi_init_cmd_errh(struct scsi_cmnd *cmd) { cmd->serial_number = 0; - - memset(cmd->sense_buffer, 0, sizeof cmd->sense_buffer); - + cmd->resid = 0; + memset(cmd->sense_buffer, 0, SCSI_SENSE_BUFFERSIZE); if (cmd->cmd_len == 0) cmd->cmd_len = COMMAND_SIZE(cmd->cmnd[0]); - - /* - * We need saved copies of a number of fields - this is because - * error handling may need to overwrite these with different values - * to run different commands, and once error handling is complete, - * we will need to restore these values prior to running the actual - * command. - */ - cmd->old_use_sg = cmd->use_sg; - cmd->old_cmd_len = cmd->cmd_len; - cmd->sc_old_data_direction = cmd->sc_data_direction; - cmd->old_underflow = cmd->underflow; - memcpy(cmd->data_cmnd, cmd->cmnd, sizeof(cmd->cmnd)); - cmd->buffer = cmd->request_buffer; - cmd->bufflen = cmd->request_bufflen; - - return 1; -} - -/* - * Function: scsi_setup_cmd_retry() - * - * Purpose: Restore the command state for a retry - * - * Arguments: cmd - command to be restored - * - * Returns: Nothing - * - * Notes: Immediately prior to retrying a command, we need - * to restore certain fields that we saved above. - */ -void scsi_setup_cmd_retry(struct scsi_cmnd *cmd) -{ - memcpy(cmd->cmnd, cmd->data_cmnd, sizeof(cmd->data_cmnd)); - cmd->request_buffer = cmd->buffer; - cmd->request_bufflen = cmd->bufflen; - cmd->use_sg = cmd->old_use_sg; - cmd->cmd_len = cmd->old_cmd_len; - cmd->sc_data_direction = cmd->sc_old_data_direction; - cmd->underflow = cmd->old_underflow; } void scsi_device_unbusy(struct scsi_device *sdev) @@ -566,7 +454,7 @@ void scsi_device_unbusy(struct scsi_device *sdev) spin_lock_irqsave(shost->host_lock, flags); shost->host_busy--; if (unlikely(scsi_host_in_recovery(shost) && - shost->host_failed)) + (shost->host_failed || shost->host_eh_scheduled))) scsi_eh_wakeup(shost); spin_unlock(shost->host_lock); spin_lock(sdev->request_queue->queue_lock); @@ -638,7 +526,7 @@ static void scsi_run_queue(struct request_queue *q) struct Scsi_Host *shost = sdev->host; unsigned long flags; - if (sdev->single_lun) + if (scsi_target(sdev)->single_lun) scsi_single_lun_run(sdev); spin_lock_irqsave(shost->host_lock, flags); @@ -661,7 +549,15 @@ static void scsi_run_queue(struct request_queue *q) list_del_init(&sdev->starved_entry); spin_unlock_irqrestore(shost->host_lock, flags); - blk_run_queue(sdev->request_queue); + + if (test_bit(QUEUE_FLAG_REENTER, &q->queue_flags) && + !test_and_set_bit(QUEUE_FLAG_REENTER, + &sdev->request_queue->queue_flags)) { + blk_run_queue(sdev->request_queue); + clear_bit(QUEUE_FLAG_REENTER, + &sdev->request_queue->queue_flags); + } else + blk_run_queue(sdev->request_queue); spin_lock_irqsave(shost->host_lock, flags); if (unlikely(!list_empty(&sdev->starved_entry))) @@ -738,7 +634,7 @@ void scsi_run_host_queues(struct Scsi_Host *shost) * of upper level post-processing and scsi_io_completion). * * Arguments: cmd - command that is complete. - * uptodate - 1 if I/O indicates success, <= 0 for I/O error. + * error - 0 if I/O indicates success, < 0 for I/O error. * bytes - number of bytes of completed I/O * requeue - indicates whether we should requeue leftovers. * @@ -753,26 +649,25 @@ void scsi_run_host_queues(struct Scsi_Host *shost) * at some point during this call. * Notes: If cmd was requeued, upon return it will be a stale pointer. */ -static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int uptodate, +static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int error, int bytes, int requeue) { - request_queue_t *q = cmd->device->request_queue; + struct request_queue *q = cmd->device->request_queue; struct request *req = cmd->request; - unsigned long flags; /* * If there are blocks left over at the end, set up the command * to queue the remainder of them. */ - if (end_that_request_chunk(req, uptodate, bytes)) { + if (blk_end_request(req, error, bytes)) { int leftover = (req->hard_nr_sectors << 9); if (blk_pc_request(req)) leftover = req->data_len; /* kill remainder if no retrys */ - if (!uptodate && blk_noretry_request(req)) - end_that_request_chunk(req, 0, leftover); + if (error && blk_noretry_request(req)) + blk_end_request(req, error, leftover); else { if (requeue) { /* @@ -787,14 +682,6 @@ static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int uptodate, } } - add_disk_randomness(req->rq_disk); - - spin_lock_irqsave(q->queue_lock, flags); - if (blk_rq_tagged(req)) - blk_queue_end_tag(q, req); - end_that_request_last(req, uptodate); - spin_unlock_irqrestore(q->queue_lock, flags); - /* * This will goose the queue request function at the end, so we don't * need to worry about launching another command. @@ -803,57 +690,87 @@ static struct scsi_cmnd *scsi_end_request(struct scsi_cmnd *cmd, int uptodate, return NULL; } -static struct scatterlist *scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask) -{ - struct scsi_host_sg_pool *sgp; - struct scatterlist *sgl; +/* + * Like SCSI_MAX_SG_SEGMENTS, but for archs that have sg chaining. This limit + * is totally arbitrary, a setting of 2048 will get you at least 8mb ios. + */ +#define SCSI_MAX_SG_CHAIN_SEGMENTS 2048 - BUG_ON(!cmd->use_sg); +static inline unsigned int scsi_sgtable_index(unsigned short nents) +{ + unsigned int index; - switch (cmd->use_sg) { + switch (nents) { case 1 ... 8: - cmd->sglist_len = 0; + index = 0; break; case 9 ... 16: - cmd->sglist_len = 1; + index = 1; break; +#if (SCSI_MAX_SG_SEGMENTS > 16) case 17 ... 32: - cmd->sglist_len = 2; + index = 2; break; -#if (SCSI_MAX_PHYS_SEGMENTS > 32) +#if (SCSI_MAX_SG_SEGMENTS > 32) case 33 ... 64: - cmd->sglist_len = 3; + index = 3; break; -#if (SCSI_MAX_PHYS_SEGMENTS > 64) +#if (SCSI_MAX_SG_SEGMENTS > 64) case 65 ... 128: - cmd->sglist_len = 4; - break; -#if (SCSI_MAX_PHYS_SEGMENTS > 128) - case 129 ... 256: - cmd->sglist_len = 5; + index = 4; break; #endif #endif #endif default: - return NULL; + printk(KERN_ERR "scsi: bad segment count=%d\n", nents); + BUG(); } - sgp = scsi_sg_pools + cmd->sglist_len; - sgl = mempool_alloc(sgp->pool, gfp_mask); - return sgl; + return index; } -static void scsi_free_sgtable(struct scatterlist *sgl, int index) +static void scsi_sg_free(struct scatterlist *sgl, unsigned int nents) { struct scsi_host_sg_pool *sgp; - BUG_ON(index >= SG_MEMPOOL_NR); - - sgp = scsi_sg_pools + index; + sgp = scsi_sg_pools + scsi_sgtable_index(nents); mempool_free(sgl, sgp->pool); } +static struct scatterlist *scsi_sg_alloc(unsigned int nents, gfp_t gfp_mask) +{ + struct scsi_host_sg_pool *sgp; + + sgp = scsi_sg_pools + scsi_sgtable_index(nents); + return mempool_alloc(sgp->pool, gfp_mask); +} + +int scsi_alloc_sgtable(struct scsi_cmnd *cmd, gfp_t gfp_mask) +{ + int ret; + + BUG_ON(!cmd->use_sg); + + ret = __sg_alloc_table(&cmd->sg_table, cmd->use_sg, + SCSI_MAX_SG_SEGMENTS, gfp_mask, scsi_sg_alloc); + if (unlikely(ret)) + __sg_free_table(&cmd->sg_table, SCSI_MAX_SG_SEGMENTS, + scsi_sg_free); + + cmd->request_buffer = cmd->sg_table.sgl; + return ret; +} + +EXPORT_SYMBOL(scsi_alloc_sgtable); + +void scsi_free_sgtable(struct scsi_cmnd *cmd) +{ + __sg_free_table(&cmd->sg_table, SCSI_MAX_SG_SEGMENTS, scsi_sg_free); +} + +EXPORT_SYMBOL(scsi_free_sgtable); + /* * Function: scsi_release_buffers() * @@ -873,22 +790,13 @@ static void scsi_free_sgtable(struct scatterlist *sgl, int index) */ static void scsi_release_buffers(struct scsi_cmnd *cmd) { - struct request *req = cmd->request; - - /* - * Free up any indirection buffers we allocated for DMA purposes. - */ if (cmd->use_sg) - scsi_free_sgtable(cmd->request_buffer, cmd->sglist_len); - else if (cmd->request_buffer != req->buffer) - kfree(cmd->request_buffer); + scsi_free_sgtable(cmd); /* * Zero these out. They now point to freed memory, and it is * dangerous to hang onto the pointers. */ - cmd->buffer = NULL; - cmd->bufflen = 0; cmd->request_buffer = NULL; cmd->request_bufflen = 0; } @@ -921,40 +829,25 @@ static void scsi_release_buffers(struct scsi_cmnd *cmd) * b) We can just use scsi_requeue_command() here. This would * be used if we just wanted to retry, for example. */ -void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes, - unsigned int block_bytes) +void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) { int result = cmd->result; - int this_count = cmd->bufflen; - request_queue_t *q = cmd->device->request_queue; + int this_count = cmd->request_bufflen; + struct request_queue *q = cmd->device->request_queue; struct request *req = cmd->request; int clear_errors = 1; struct scsi_sense_hdr sshdr; int sense_valid = 0; int sense_deferred = 0; - /* - * Free up any indirection buffers we allocated for DMA purposes. - * For the case of a READ, we need to copy the data out of the - * bounce buffer and into the real buffer. - */ - if (cmd->use_sg) - scsi_free_sgtable(cmd->buffer, cmd->sglist_len); - else if (cmd->buffer != req->buffer) { - if (rq_data_dir(req) == READ) { - unsigned long flags; - char *to = bio_kmap_irq(req->bio, &flags); - memcpy(to, cmd->buffer, cmd->bufflen); - bio_kunmap_irq(to, &flags); - } - kfree(cmd->buffer); - } + scsi_release_buffers(cmd); if (result) { sense_valid = scsi_command_normalize_sense(cmd, &sshdr); if (sense_valid) sense_deferred = scsi_sense_is_deferred(&sshdr); } + if (blk_pc_request(req)) { /* SG_IO ioctl from block level */ req->errors = result; if (result) { @@ -970,157 +863,133 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes, memcpy(req->sense, cmd->sense_buffer, len); req->sense_len = len; } - } else - req->data_len = cmd->resid; + } + req->data_len = cmd->resid; } /* - * Zero these out. They now point to freed memory, and it is - * dangerous to hang onto the pointers. - */ - cmd->buffer = NULL; - cmd->bufflen = 0; - cmd->request_buffer = NULL; - cmd->request_bufflen = 0; - - /* * Next deal with any sectors which we were able to correctly * handle. */ - if (good_bytes >= 0) { - SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, %d bytes done.\n", - req->nr_sectors, good_bytes)); - SCSI_LOG_HLCOMPLETE(1, printk("use_sg is %d\n", cmd->use_sg)); + SCSI_LOG_HLCOMPLETE(1, printk("%ld sectors total, " + "%d bytes done.\n", + req->nr_sectors, good_bytes)); + SCSI_LOG_HLCOMPLETE(1, printk("use_sg is %d\n", cmd->use_sg)); - if (clear_errors) - req->errors = 0; - /* - * If multiple sectors are requested in one buffer, then - * they will have been finished off by the first command. - * If not, then we have a multi-buffer command. - * - * If block_bytes != 0, it means we had a medium error - * of some sort, and that we want to mark some number of - * sectors as not uptodate. Thus we want to inhibit - * requeueing right here - we will requeue down below - * when we handle the bad sectors. - */ + if (clear_errors) + req->errors = 0; - /* - * If the command completed without error, then either - * finish off the rest of the command, or start a new one. - */ - if (scsi_end_request(cmd, 1, good_bytes, result == 0) == NULL) - return; - } - /* - * Now, if we were good little boys and girls, Santa left us a request - * sense buffer. We can extract information from this, so we - * can choose a block to remap, etc. + /* A number of bytes were successfully read. If there + * are leftovers and there is some kind of error + * (result != 0), retry the rest. + */ + if (scsi_end_request(cmd, 0, good_bytes, result == 0) == NULL) + return; + + /* good_bytes = 0, or (inclusive) there were leftovers and + * result = 0, so scsi_end_request couldn't retry. */ if (sense_valid && !sense_deferred) { switch (sshdr.sense_key) { case UNIT_ATTENTION: if (cmd->device->removable) { - /* detected disc change. set a bit + /* Detected disc change. Set a bit * and quietly refuse further access. */ cmd->device->changed = 1; - scsi_end_request(cmd, 0, - this_count, 1); + scsi_end_request(cmd, -EIO, this_count, 1); return; } else { - /* - * Must have been a power glitch, or a - * bus reset. Could not have been a - * media change, so we just retry the - * request and see what happens. - */ + /* Must have been a power glitch, or a + * bus reset. Could not have been a + * media change, so we just retry the + * request and see what happens. + */ scsi_requeue_command(q, cmd); return; } break; case ILLEGAL_REQUEST: - /* - * If we had an ILLEGAL REQUEST returned, then we may - * have performed an unsupported command. The only - * thing this should be would be a ten byte read where - * only a six byte read was supported. Also, on a - * system where READ CAPACITY failed, we may have read - * past the end of the disk. - */ + /* If we had an ILLEGAL REQUEST returned, then + * we may have performed an unsupported + * command. The only thing this should be + * would be a ten byte read where only a six + * byte read was supported. Also, on a system + * where READ CAPACITY failed, we may have + * read past the end of the disk. + */ if ((cmd->device->use_10_for_rw && sshdr.asc == 0x20 && sshdr.ascq == 0x00) && (cmd->cmnd[0] == READ_10 || cmd->cmnd[0] == WRITE_10)) { cmd->device->use_10_for_rw = 0; - /* - * This will cause a retry with a 6-byte - * command. + /* This will cause a retry with a + * 6-byte command. */ scsi_requeue_command(q, cmd); - result = 0; + return; } else { - scsi_end_request(cmd, 0, this_count, 1); + scsi_end_request(cmd, -EIO, this_count, 1); return; } break; case NOT_READY: - /* - * If the device is in the process of becoming ready, - * retry. + /* If the device is in the process of becoming + * ready, or has a temporary blockage, retry. */ - if (sshdr.asc == 0x04 && sshdr.ascq == 0x01) { - scsi_requeue_command(q, cmd); - return; + if (sshdr.asc == 0x04) { + switch (sshdr.ascq) { + case 0x01: /* becoming ready */ + case 0x04: /* format in progress */ + case 0x05: /* rebuild in progress */ + case 0x06: /* recalculation in progress */ + case 0x07: /* operation in progress */ + case 0x08: /* Long write in progress */ + case 0x09: /* self test in progress */ + scsi_requeue_command(q, cmd); + return; + default: + break; + } } - if (!(req->flags & REQ_QUIET)) - scmd_printk(KERN_INFO, cmd, - "Device not ready.\n"); - scsi_end_request(cmd, 0, this_count, 1); + if (!(req->cmd_flags & REQ_QUIET)) + scsi_cmd_print_sense_hdr(cmd, + "Device not ready", + &sshdr); + + scsi_end_request(cmd, -EIO, this_count, 1); return; case VOLUME_OVERFLOW: - if (!(req->flags & REQ_QUIET)) { + if (!(req->cmd_flags & REQ_QUIET)) { scmd_printk(KERN_INFO, cmd, - "Volume overflow, CDB: "); - __scsi_print_command(cmd->data_cmnd); + "Volume overflow, CDB: "); + __scsi_print_command(cmd->cmnd); scsi_print_sense("", cmd); } - scsi_end_request(cmd, 0, block_bytes, 1); + /* See SSC3rXX or current. */ + scsi_end_request(cmd, -EIO, this_count, 1); return; default: break; } - } /* driver byte != 0 */ + } if (host_byte(result) == DID_RESET) { - /* - * Third party bus reset or reset for error - * recovery reasons. Just retry the request - * and see what happens. + /* Third party bus reset or reset for error recovery + * reasons. Just retry the request and see what + * happens. */ scsi_requeue_command(q, cmd); return; } if (result) { - if (!(req->flags & REQ_QUIET)) { - scmd_printk(KERN_INFO, cmd, - "SCSI error: return code = 0x%x\n", result); - + if (!(req->cmd_flags & REQ_QUIET)) { + scsi_print_result(cmd); if (driver_byte(result) & DRIVER_SENSE) scsi_print_sense("", cmd); } - /* - * Mark a single buffer as not uptodate. Queue the remainder. - * We sometimes get this cruft in the event that a medium error - * isn't properly reported. - */ - block_bytes = req->hard_cur_sectors << 9; - if (!block_bytes) - block_bytes = req->data_len; - scsi_end_request(cmd, 0, block_bytes, 1); } + scsi_end_request(cmd, -EIO, this_count, !result); } -EXPORT_SYMBOL(scsi_io_completion); /* * Function: scsi_init_io() @@ -1131,105 +1000,101 @@ EXPORT_SYMBOL(scsi_io_completion); * * Returns: 0 on success * BLKPREP_DEFER if the failure is retryable - * BLKPREP_KILL if the failure is fatal */ static int scsi_init_io(struct scsi_cmnd *cmd) { struct request *req = cmd->request; - struct scatterlist *sgpnt; int count; /* - * if this is a rq->data based REQ_BLOCK_PC, setup for a non-sg xfer - */ - if ((req->flags & REQ_BLOCK_PC) && !req->bio) { - cmd->request_bufflen = req->data_len; - cmd->request_buffer = req->data; - req->buffer = req->data; - cmd->use_sg = 0; - return 0; - } - - /* - * we used to not use scatter-gather for single segment request, + * We used to not use scatter-gather for single segment request, * but now we do (it makes highmem I/O easier to support without * kmapping pages) */ cmd->use_sg = req->nr_phys_segments; /* - * if sg table allocation fails, requeue request later. + * If sg table allocation fails, requeue request later. */ - sgpnt = scsi_alloc_sgtable(cmd, GFP_ATOMIC); - if (unlikely(!sgpnt)) { + if (unlikely(scsi_alloc_sgtable(cmd, GFP_ATOMIC))) { scsi_unprep_request(req); return BLKPREP_DEFER; } - cmd->request_buffer = (char *) sgpnt; - cmd->request_bufflen = req->nr_sectors << 9; + req->buffer = NULL; if (blk_pc_request(req)) cmd->request_bufflen = req->data_len; - req->buffer = NULL; + else + cmd->request_bufflen = req->nr_sectors << 9; /* * Next, walk the list, and fill in the addresses and sizes of * each segment. */ count = blk_rq_map_sg(req->q, req, cmd->request_buffer); + BUG_ON(count > cmd->use_sg); + cmd->use_sg = count; + return BLKPREP_OK; +} - /* - * mapped well, send it off - */ - if (likely(count <= cmd->use_sg)) { - cmd->use_sg = count; - return 0; +static struct scsi_cmnd *scsi_get_cmd_from_req(struct scsi_device *sdev, + struct request *req) +{ + struct scsi_cmnd *cmd; + + if (!req->special) { + cmd = scsi_get_command(sdev, GFP_ATOMIC); + if (unlikely(!cmd)) + return NULL; + req->special = cmd; + } else { + cmd = req->special; } - printk(KERN_ERR "Incorrect number of segments after building list\n"); - printk(KERN_ERR "counted %d, received %d\n", count, cmd->use_sg); - printk(KERN_ERR "req nr_sec %lu, cur_nr_sec %u\n", req->nr_sectors, - req->current_nr_sectors); + /* pull a tag out of the request if we have one */ + cmd->tag = req->tag; + cmd->request = req; - /* release the command and kill it */ - scsi_release_buffers(cmd); - scsi_put_command(cmd); - return BLKPREP_KILL; + return cmd; } -static int scsi_issue_flush_fn(request_queue_t *q, struct gendisk *disk, - sector_t *error_sector) +int scsi_setup_blk_pc_cmnd(struct scsi_device *sdev, struct request *req) { - struct scsi_device *sdev = q->queuedata; - struct scsi_driver *drv; - - if (sdev->sdev_state != SDEV_RUNNING) - return -ENXIO; + struct scsi_cmnd *cmd; + int ret = scsi_prep_state_check(sdev, req); - drv = *(struct scsi_driver **) disk->private_data; - if (drv->issue_flush) - return drv->issue_flush(&sdev->sdev_gendev, error_sector); + if (ret != BLKPREP_OK) + return ret; - return -EOPNOTSUPP; -} + cmd = scsi_get_cmd_from_req(sdev, req); + if (unlikely(!cmd)) + return BLKPREP_DEFER; -static void scsi_blk_pc_done(struct scsi_cmnd *cmd) -{ - BUG_ON(!blk_pc_request(cmd->request)); /* - * This will complete the whole command with uptodate=1 so - * as far as the block layer is concerned the command completed - * successfully. Since this is a REQ_BLOCK_PC command the - * caller should check the request's errors value + * BLOCK_PC requests may transfer data, in which case they must + * a bio attached to them. Or they might contain a SCSI command + * that does not transfer data, in which case they may optionally + * submit a request without an attached bio. */ - scsi_io_completion(cmd, cmd->bufflen, 0); -} + if (req->bio) { + int ret; -static void scsi_setup_blk_pc_cmnd(struct scsi_cmnd *cmd) -{ - struct request *req = cmd->request; + BUG_ON(!req->nr_phys_segments); + + ret = scsi_init_io(cmd); + if (unlikely(ret)) + return ret; + } else { + BUG_ON(req->data_len); + BUG_ON(req->data); - BUG_ON(sizeof(req->cmd) > sizeof(cmd->cmnd)); + cmd->request_bufflen = 0; + cmd->request_buffer = NULL; + cmd->use_sg = 0; + req->buffer = NULL; + } + + BUILD_BUG_ON(sizeof(req->cmd) > sizeof(cmd->cmnd)); memcpy(cmd->cmnd, req->cmd, sizeof(cmd->cmnd)); cmd->cmd_len = req->cmd_len; if (!req->data_len) @@ -1242,165 +1107,127 @@ static void scsi_setup_blk_pc_cmnd(struct scsi_cmnd *cmd) cmd->transfersize = req->data_len; cmd->allowed = req->retries; cmd->timeout_per_command = req->timeout; - cmd->done = scsi_blk_pc_done; + return BLKPREP_OK; } +EXPORT_SYMBOL(scsi_setup_blk_pc_cmnd); -static int scsi_prep_fn(struct request_queue *q, struct request *req) +/* + * Setup a REQ_TYPE_FS command. These are simple read/write request + * from filesystems that still need to be translated to SCSI CDBs from + * the ULD. + */ +int scsi_setup_fs_cmnd(struct scsi_device *sdev, struct request *req) { - struct scsi_device *sdev = q->queuedata; struct scsi_cmnd *cmd; - int specials_only = 0; + int ret = scsi_prep_state_check(sdev, req); + if (ret != BLKPREP_OK) + return ret; /* - * Just check to see if the device is online. If it isn't, we - * refuse to process any commands. The device must be brought - * online before trying any recovery commands + * Filesystem requests must transfer data. */ - if (unlikely(!scsi_device_online(sdev))) { - sdev_printk(KERN_ERR, sdev, - "rejecting I/O to offline device\n"); - goto kill; - } - if (unlikely(sdev->sdev_state != SDEV_RUNNING)) { - /* OK, we're not in a running state don't prep - * user commands */ - if (sdev->sdev_state == SDEV_DEL) { - /* Device is fully deleted, no commands - * at all allowed down */ - sdev_printk(KERN_ERR, sdev, - "rejecting I/O to dead device\n"); - goto kill; - } - /* OK, we only allow special commands (i.e. not - * user initiated ones */ - specials_only = sdev->sdev_state; - } + BUG_ON(!req->nr_phys_segments); + + cmd = scsi_get_cmd_from_req(sdev, req); + if (unlikely(!cmd)) + return BLKPREP_DEFER; + + return scsi_init_io(cmd); +} +EXPORT_SYMBOL(scsi_setup_fs_cmnd); + +int scsi_prep_state_check(struct scsi_device *sdev, struct request *req) +{ + int ret = BLKPREP_OK; /* - * Find the actual device driver associated with this command. - * The SPECIAL requests are things like character device or - * ioctls, which did not originate from ll_rw_blk. Note that - * the special field is also used to indicate the cmd for - * the remainder of a partially fulfilled request that can - * come up when there is a medium error. We have to treat - * these two cases differently. We differentiate by looking - * at request->cmd, as this tells us the real story. + * If the device is not in running state we will reject some + * or all commands. */ - if (req->flags & REQ_SPECIAL && req->special) { - struct scsi_request *sreq = req->special; - - if (sreq->sr_magic == SCSI_REQ_MAGIC) { - cmd = scsi_get_command(sreq->sr_device, GFP_ATOMIC); - if (unlikely(!cmd)) - goto defer; - scsi_init_cmd_from_req(cmd, sreq); - } else - cmd = req->special; - } else if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) { - - if(unlikely(specials_only) && !(req->flags & REQ_SPECIAL)) { - if(specials_only == SDEV_QUIESCE || - specials_only == SDEV_BLOCK) - goto defer; - + if (unlikely(sdev->sdev_state != SDEV_RUNNING)) { + switch (sdev->sdev_state) { + case SDEV_OFFLINE: + /* + * If the device is offline we refuse to process any + * commands. The device must be brought online + * before trying any recovery commands. + */ sdev_printk(KERN_ERR, sdev, - "rejecting I/O to device being removed\n"); - goto kill; + "rejecting I/O to offline device\n"); + ret = BLKPREP_KILL; + break; + case SDEV_DEL: + /* + * If the device is fully deleted, we refuse to + * process any commands as well. + */ + sdev_printk(KERN_ERR, sdev, + "rejecting I/O to dead device\n"); + ret = BLKPREP_KILL; + break; + case SDEV_QUIESCE: + case SDEV_BLOCK: + /* + * If the devices is blocked we defer normal commands. + */ + if (!(req->cmd_flags & REQ_PREEMPT)) + ret = BLKPREP_DEFER; + break; + default: + /* + * For any other not fully online state we only allow + * special commands. In particular any user initiated + * command is not allowed. + */ + if (!(req->cmd_flags & REQ_PREEMPT)) + ret = BLKPREP_KILL; + break; } - - - /* - * Now try and find a command block that we can use. - */ - if (!req->special) { - cmd = scsi_get_command(sdev, GFP_ATOMIC); - if (unlikely(!cmd)) - goto defer; - } else - cmd = req->special; - - /* pull a tag out of the request if we have one */ - cmd->tag = req->tag; - } else { - blk_dump_rq_flags(req, "SCSI bad req"); - goto kill; } - - /* note the overloading of req->special. When the tag - * is active it always means cmd. If the tag goes - * back for re-queueing, it may be reset */ - req->special = cmd; - cmd->request = req; - - /* - * FIXME: drop the lock here because the functions below - * expect to be called without the queue lock held. Also, - * previously, we dequeued the request before dropping the - * lock. We hope REQ_STARTED prevents anything untoward from - * happening now. - */ - if (req->flags & (REQ_CMD | REQ_BLOCK_PC)) { - int ret; + return ret; +} +EXPORT_SYMBOL(scsi_prep_state_check); - /* - * This will do a couple of things: - * 1) Fill in the actual SCSI command. - * 2) Fill in any other upper-level specific fields - * (timeout). - * - * If this returns 0, it means that the request failed - * (reading past end of disk, reading offline device, - * etc). This won't actually talk to the device, but - * some kinds of consistency checking may cause the - * request to be rejected immediately. - */ +int scsi_prep_return(struct request_queue *q, struct request *req, int ret) +{ + struct scsi_device *sdev = q->queuedata; - /* - * This sets up the scatter-gather table (allocating if - * required). - */ - ret = scsi_init_io(cmd); - switch(ret) { - /* For BLKPREP_KILL/DEFER the cmd was released */ - case BLKPREP_KILL: - goto kill; - case BLKPREP_DEFER: - goto defer; + switch (ret) { + case BLKPREP_KILL: + req->errors = DID_NO_CONNECT << 16; + /* release the command and kill it */ + if (req->special) { + struct scsi_cmnd *cmd = req->special; + scsi_release_buffers(cmd); + scsi_put_command(cmd); + req->special = NULL; } - + break; + case BLKPREP_DEFER: /* - * Initialize the actual SCSI command for this request. + * If we defer, the elv_next_request() returns NULL, but the + * queue must be restarted, so we plug here if no returning + * command will automatically do that. */ - if (req->flags & REQ_BLOCK_PC) { - scsi_setup_blk_pc_cmnd(cmd); - } else if (req->rq_disk) { - struct scsi_driver *drv; - - drv = *(struct scsi_driver **)req->rq_disk->private_data; - if (unlikely(!drv->init_command(cmd))) { - scsi_release_buffers(cmd); - scsi_put_command(cmd); - goto kill; - } - } + if (sdev->device_busy == 0) + blk_plug_device(q); + break; + default: + req->cmd_flags |= REQ_DONTPREP; } - /* - * The request is now prepped, no need to come back here - */ - req->flags |= REQ_DONTPREP; - return BLKPREP_OK; + return ret; +} +EXPORT_SYMBOL(scsi_prep_return); - defer: - /* If we defer, the elv_next_request() returns NULL, but the - * queue must be restarted, so we plug here if no returning - * command will automatically do that. */ - if (sdev->device_busy == 0) - blk_plug_device(q); - return BLKPREP_DEFER; - kill: - req->errors = DID_NO_CONNECT << 16; - return BLKPREP_KILL; +int scsi_prep_fn(struct request_queue *q, struct request *req) +{ + struct scsi_device *sdev = q->queuedata; + int ret = BLKPREP_KILL; + + if (req->cmd_type == REQ_TYPE_BLOCK_PC) + ret = scsi_setup_blk_pc_cmnd(sdev, req); + return scsi_prep_return(q, req, ret); } /* @@ -1476,9 +1303,11 @@ static inline int scsi_host_queue_ready(struct request_queue *q, /* * Kill a request for a dead device */ -static void scsi_kill_request(struct request *req, request_queue_t *q) +static void scsi_kill_request(struct request *req, struct request_queue *q) { struct scsi_cmnd *cmd = req->special; + struct scsi_device *sdev = cmd->device; + struct Scsi_Host *shost = sdev->host; blkdev_dequeue_request(req); @@ -1491,6 +1320,19 @@ static void scsi_kill_request(struct request *req, request_queue_t *q) scsi_init_cmd_errh(cmd); cmd->result = DID_NO_CONNECT << 16; atomic_inc(&cmd->device->iorequest_cnt); + + /* + * SCSI request completion path will do scsi_device_unbusy(), + * bump busy counts. To bump the counters, we need to dance + * with the locks as normal issue path does. + */ + sdev->device_busy++; + spin_unlock(sdev->request_queue->queue_lock); + spin_lock(shost->host_lock); + shost->host_busy++; + spin_unlock(shost->host_lock); + spin_lock(sdev->request_queue->queue_lock); + __scsi_done(cmd); } @@ -1518,7 +1360,7 @@ static void scsi_softirq_done(struct request *rq) scsi_finish_command(cmd); break; case NEEDS_RETRY: - scsi_retry_command(cmd); + scsi_queue_insert(cmd, SCSI_MLQUEUE_EH_RETRY); break; case ADD_TO_MLQUEUE: scsi_queue_insert(cmd, SCSI_MLQUEUE_DEVICE_BUSY); @@ -1594,15 +1436,16 @@ static void scsi_request_fn(struct request_queue *q) if (unlikely(cmd == NULL)) { printk(KERN_CRIT "impossible request in %s.\n" "please mail a stack trace to " - "linux-scsi@vger.kernel.org", + "linux-scsi@vger.kernel.org\n", __FUNCTION__); + blk_dump_rq_flags(req, "foo"); BUG(); } spin_lock(shost->host_lock); if (!scsi_host_queue_ready(q, shost, sdev)) goto not_ready; - if (sdev->single_lun) { + if (scsi_target(sdev)->single_lun) { if (scsi_target(sdev)->starget_sdev_user && scsi_target(sdev)->starget_sdev_user != sdev) goto not_ready; @@ -1686,27 +1529,62 @@ u64 scsi_calculate_bounce_limit(struct Scsi_Host *shost) } EXPORT_SYMBOL(scsi_calculate_bounce_limit); -struct request_queue *scsi_alloc_queue(struct scsi_device *sdev) +struct request_queue *__scsi_alloc_queue(struct Scsi_Host *shost, + request_fn_proc *request_fn) { - struct Scsi_Host *shost = sdev->host; struct request_queue *q; - q = blk_init_queue(scsi_request_fn, NULL); + q = blk_init_queue(request_fn, NULL); if (!q) return NULL; - blk_queue_prep_rq(q, scsi_prep_fn); - + /* + * this limit is imposed by hardware restrictions + */ blk_queue_max_hw_segments(q, shost->sg_tablesize); - blk_queue_max_phys_segments(q, SCSI_MAX_PHYS_SEGMENTS); + + /* + * In the future, sg chaining support will be mandatory and this + * ifdef can then go away. Right now we don't have all archs + * converted, so better keep it safe. + */ +#ifdef ARCH_HAS_SG_CHAIN + if (shost->use_sg_chaining) + blk_queue_max_phys_segments(q, SCSI_MAX_SG_CHAIN_SEGMENTS); + else + blk_queue_max_phys_segments(q, SCSI_MAX_SG_SEGMENTS); +#else + blk_queue_max_phys_segments(q, SCSI_MAX_SG_SEGMENTS); +#endif + blk_queue_max_sectors(q, shost->max_sectors); blk_queue_bounce_limit(q, scsi_calculate_bounce_limit(shost)); blk_queue_segment_boundary(q, shost->dma_boundary); - blk_queue_issue_flush_fn(q, scsi_issue_flush_fn); - blk_queue_softirq_done(q, scsi_softirq_done); if (!shost->use_clustering) clear_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); + + /* + * set a reasonable default alignment on word boundaries: the + * host and device may alter it using + * blk_queue_update_dma_alignment() later. + */ + blk_queue_dma_alignment(q, 0x03); + + return q; +} +EXPORT_SYMBOL(__scsi_alloc_queue); + +struct request_queue *scsi_alloc_queue(struct scsi_device *sdev) +{ + struct request_queue *q; + + q = __scsi_alloc_queue(sdev->host, scsi_request_fn); + if (!q) + return NULL; + + blk_queue_prep_rq(q, scsi_prep_fn); + blk_queue_softirq_done(q, scsi_softirq_done); return q; } @@ -1770,7 +1648,7 @@ int __init scsi_init_queue(void) scsi_io_context_cache = kmem_cache_create("scsi_io_context", sizeof(struct scsi_io_context), - 0, 0, NULL, NULL); + 0, 0, NULL); if (!scsi_io_context_cache) { printk(KERN_ERR "SCSI: can't init scsi io context cache\n"); return -ENOMEM; @@ -1781,15 +1659,14 @@ int __init scsi_init_queue(void) int size = sgp->size * sizeof(struct scatterlist); sgp->slab = kmem_cache_create(sgp->name, size, 0, - SLAB_HWCACHE_ALIGN, NULL, NULL); + SLAB_HWCACHE_ALIGN, NULL); if (!sgp->slab) { printk(KERN_ERR "SCSI: can't init sg slab %s\n", sgp->name); } - sgp->pool = mempool_create(SG_MEMPOOL_SIZE, - mempool_alloc_slab, mempool_free_slab, - sgp->slab); + sgp->pool = mempool_create_slab_pool(SG_MEMPOOL_SIZE, + sgp->slab); if (!sgp->pool) { printk(KERN_ERR "SCSI: can't init sg mempool %s\n", sgp->name); @@ -1811,9 +1688,86 @@ void scsi_exit_queue(void) kmem_cache_destroy(sgp->slab); } } + /** - * scsi_mode_sense - issue a mode sense, falling back from 10 to - * six bytes if necessary. + * scsi_mode_select - issue a mode select + * @sdev: SCSI device to be queried + * @pf: Page format bit (1 == standard, 0 == vendor specific) + * @sp: Save page bit (0 == don't save, 1 == save) + * @modepage: mode page being requested + * @buffer: request buffer (may not be smaller than eight bytes) + * @len: length of request buffer. + * @timeout: command timeout + * @retries: number of retries before failing + * @data: returns a structure abstracting the mode header data + * @sshdr: place to put sense data (or NULL if no sense to be collected). + * must be SCSI_SENSE_BUFFERSIZE big. + * + * Returns zero if successful; negative error number or scsi + * status on error + * + */ +int +scsi_mode_select(struct scsi_device *sdev, int pf, int sp, int modepage, + unsigned char *buffer, int len, int timeout, int retries, + struct scsi_mode_data *data, struct scsi_sense_hdr *sshdr) +{ + unsigned char cmd[10]; + unsigned char *real_buffer; + int ret; + + memset(cmd, 0, sizeof(cmd)); + cmd[1] = (pf ? 0x10 : 0) | (sp ? 0x01 : 0); + + if (sdev->use_10_for_ms) { + if (len > 65535) + return -EINVAL; + real_buffer = kmalloc(8 + len, GFP_KERNEL); + if (!real_buffer) + return -ENOMEM; + memcpy(real_buffer + 8, buffer, len); + len += 8; + real_buffer[0] = 0; + real_buffer[1] = 0; + real_buffer[2] = data->medium_type; + real_buffer[3] = data->device_specific; + real_buffer[4] = data->longlba ? 0x01 : 0; + real_buffer[5] = 0; + real_buffer[6] = data->block_descriptor_length >> 8; + real_buffer[7] = data->block_descriptor_length; + + cmd[0] = MODE_SELECT_10; + cmd[7] = len >> 8; + cmd[8] = len; + } else { + if (len > 255 || data->block_descriptor_length > 255 || + data->longlba) + return -EINVAL; + + real_buffer = kmalloc(4 + len, GFP_KERNEL); + if (!real_buffer) + return -ENOMEM; + memcpy(real_buffer + 4, buffer, len); + len += 4; + real_buffer[0] = 0; + real_buffer[1] = data->medium_type; + real_buffer[2] = data->device_specific; + real_buffer[3] = data->block_descriptor_length; + + + cmd[0] = MODE_SELECT; + cmd[4] = len; + } + + ret = scsi_execute_req(sdev, cmd, DMA_TO_DEVICE, real_buffer, len, + sshdr, timeout, retries); + kfree(real_buffer); + return ret; +} +EXPORT_SYMBOL_GPL(scsi_mode_select); + +/** + * scsi_mode_sense - issue a mode sense, falling back from 10 to six bytes if necessary. * @sdev: SCSI device to be queried * @dbd: set if mode sense will allow block descriptors to be returned * @modepage: mode page being requested @@ -1822,17 +1776,18 @@ void scsi_exit_queue(void) * @timeout: command timeout * @retries: number of retries before failing * @data: returns a structure abstracting the mode header data - * @sense: place to put sense data (or NULL if no sense to be collected). + * @sshdr: place to put sense data (or NULL if no sense to be collected). * must be SCSI_SENSE_BUFFERSIZE big. * * Returns zero if unsuccessful, or the header offset (either 4 * or 8 depending on whether a six or ten byte command was * issued) if successful. - **/ + */ int scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage, unsigned char *buffer, int len, int timeout, int retries, - struct scsi_mode_data *data, struct scsi_sense_hdr *sshdr) { + struct scsi_mode_data *data, struct scsi_sense_hdr *sshdr) +{ unsigned char cmd[12]; int use_10_for_ms; int header_length; @@ -1921,40 +1876,69 @@ scsi_mode_sense(struct scsi_device *sdev, int dbd, int modepage, } EXPORT_SYMBOL(scsi_mode_sense); +/** + * scsi_test_unit_ready - test if unit is ready + * @sdev: scsi device to change the state of. + * @timeout: command timeout + * @retries: number of retries before failing + * @sshdr_external: Optional pointer to struct scsi_sense_hdr for + * returning sense. Make sure that this is cleared before passing + * in. + * + * Returns zero if unsuccessful or an error if TUR failed. For + * removable media, a return of NOT_READY or UNIT_ATTENTION is + * translated to success, with the ->changed flag updated. + **/ int -scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries) +scsi_test_unit_ready(struct scsi_device *sdev, int timeout, int retries, + struct scsi_sense_hdr *sshdr_external) { char cmd[] = { TEST_UNIT_READY, 0, 0, 0, 0, 0, }; - struct scsi_sense_hdr sshdr; + struct scsi_sense_hdr *sshdr; int result; - - result = scsi_execute_req(sdev, cmd, DMA_NONE, NULL, 0, &sshdr, - timeout, retries); + + if (!sshdr_external) + sshdr = kzalloc(sizeof(*sshdr), GFP_KERNEL); + else + sshdr = sshdr_external; + + /* try to eat the UNIT_ATTENTION if there are enough retries */ + do { + result = scsi_execute_req(sdev, cmd, DMA_NONE, NULL, 0, sshdr, + timeout, retries); + } while ((driver_byte(result) & DRIVER_SENSE) && + sshdr && sshdr->sense_key == UNIT_ATTENTION && + --retries); + + if (!sshdr) + /* could not allocate sense buffer, so can't process it */ + return result; if ((driver_byte(result) & DRIVER_SENSE) && sdev->removable) { - if ((scsi_sense_valid(&sshdr)) && - ((sshdr.sense_key == UNIT_ATTENTION) || - (sshdr.sense_key == NOT_READY))) { + if ((scsi_sense_valid(sshdr)) && + ((sshdr->sense_key == UNIT_ATTENTION) || + (sshdr->sense_key == NOT_READY))) { sdev->changed = 1; result = 0; } } + if (!sshdr_external) + kfree(sshdr); return result; } EXPORT_SYMBOL(scsi_test_unit_ready); /** - * scsi_device_set_state - Take the given device through the device - * state model. + * scsi_device_set_state - Take the given device through the device state model. * @sdev: scsi device to change the state of. * @state: state to change to. * * Returns zero if unsuccessful or an error if the requested * transition is illegal. - **/ + */ int scsi_device_set_state(struct scsi_device *sdev, enum scsi_device_state state) { @@ -2018,6 +2002,7 @@ scsi_device_set_state(struct scsi_device *sdev, enum scsi_device_state state) switch (oldstate) { case SDEV_CREATED: case SDEV_RUNNING: + case SDEV_QUIESCE: case SDEV_OFFLINE: case SDEV_BLOCK: break; @@ -2028,6 +2013,9 @@ scsi_device_set_state(struct scsi_device *sdev, enum scsi_device_state state) case SDEV_DEL: switch (oldstate) { + case SDEV_CREATED: + case SDEV_RUNNING: + case SDEV_OFFLINE: case SDEV_CANCEL: break; default: @@ -2051,6 +2039,142 @@ scsi_device_set_state(struct scsi_device *sdev, enum scsi_device_state state) EXPORT_SYMBOL(scsi_device_set_state); /** + * sdev_evt_emit - emit a single SCSI device uevent + * @sdev: associated SCSI device + * @evt: event to emit + * + * Send a single uevent (scsi_event) to the associated scsi_device. + */ +static void scsi_evt_emit(struct scsi_device *sdev, struct scsi_event *evt) +{ + int idx = 0; + char *envp[3]; + + switch (evt->evt_type) { + case SDEV_EVT_MEDIA_CHANGE: + envp[idx++] = "SDEV_MEDIA_CHANGE=1"; + break; + + default: + /* do nothing */ + break; + } + + envp[idx++] = NULL; + + kobject_uevent_env(&sdev->sdev_gendev.kobj, KOBJ_CHANGE, envp); +} + +/** + * sdev_evt_thread - send a uevent for each scsi event + * @work: work struct for scsi_device + * + * Dispatch queued events to their associated scsi_device kobjects + * as uevents. + */ +void scsi_evt_thread(struct work_struct *work) +{ + struct scsi_device *sdev; + LIST_HEAD(event_list); + + sdev = container_of(work, struct scsi_device, event_work); + + while (1) { + struct scsi_event *evt; + struct list_head *this, *tmp; + unsigned long flags; + + spin_lock_irqsave(&sdev->list_lock, flags); + list_splice_init(&sdev->event_list, &event_list); + spin_unlock_irqrestore(&sdev->list_lock, flags); + + if (list_empty(&event_list)) + break; + + list_for_each_safe(this, tmp, &event_list) { + evt = list_entry(this, struct scsi_event, node); + list_del(&evt->node); + scsi_evt_emit(sdev, evt); + kfree(evt); + } + } +} + +/** + * sdev_evt_send - send asserted event to uevent thread + * @sdev: scsi_device event occurred on + * @evt: event to send + * + * Assert scsi device event asynchronously. + */ +void sdev_evt_send(struct scsi_device *sdev, struct scsi_event *evt) +{ + unsigned long flags; + + if (!test_bit(evt->evt_type, sdev->supported_events)) { + kfree(evt); + return; + } + + spin_lock_irqsave(&sdev->list_lock, flags); + list_add_tail(&evt->node, &sdev->event_list); + schedule_work(&sdev->event_work); + spin_unlock_irqrestore(&sdev->list_lock, flags); +} +EXPORT_SYMBOL_GPL(sdev_evt_send); + +/** + * sdev_evt_alloc - allocate a new scsi event + * @evt_type: type of event to allocate + * @gfpflags: GFP flags for allocation + * + * Allocates and returns a new scsi_event. + */ +struct scsi_event *sdev_evt_alloc(enum scsi_device_event evt_type, + gfp_t gfpflags) +{ + struct scsi_event *evt = kzalloc(sizeof(struct scsi_event), gfpflags); + if (!evt) + return NULL; + + evt->evt_type = evt_type; + INIT_LIST_HEAD(&evt->node); + + /* evt_type-specific initialization, if any */ + switch (evt_type) { + case SDEV_EVT_MEDIA_CHANGE: + default: + /* do nothing */ + break; + } + + return evt; +} +EXPORT_SYMBOL_GPL(sdev_evt_alloc); + +/** + * sdev_evt_send_simple - send asserted event to uevent thread + * @sdev: scsi_device event occurred on + * @evt_type: type of event to send + * @gfpflags: GFP flags for allocation + * + * Assert scsi device event asynchronously, given an event type. + */ +void sdev_evt_send_simple(struct scsi_device *sdev, + enum scsi_device_event evt_type, gfp_t gfpflags) +{ + struct scsi_event *evt = sdev_evt_alloc(evt_type, gfpflags); + if (!evt) { + sdev_printk(KERN_ERR, sdev, "event %d eaten due to OOM\n", + evt_type); + return; + } + + sdev_evt_send(sdev, evt); +} +EXPORT_SYMBOL_GPL(sdev_evt_send_simple); + +/** * scsi_device_quiesce - Block user issued commands. * @sdev: scsi device to quiesce. * @@ -2064,7 +2188,7 @@ EXPORT_SYMBOL(scsi_device_set_state); * Must be called with user context, may sleep. * * Returns zero if unsuccessful or an error if not. - **/ + */ int scsi_device_quiesce(struct scsi_device *sdev) { @@ -2089,7 +2213,7 @@ EXPORT_SYMBOL(scsi_device_quiesce); * queues. * * Must be called with user context, may sleep. - **/ + */ void scsi_device_resume(struct scsi_device *sdev) { @@ -2126,8 +2250,7 @@ scsi_target_resume(struct scsi_target *starget) EXPORT_SYMBOL(scsi_target_resume); /** - * scsi_internal_device_block - internal function to put a device - * temporarily into the SDEV_BLOCK state + * scsi_internal_device_block - internal function to put a device temporarily into the SDEV_BLOCK state * @sdev: device to block * * Block request made by scsi lld's to temporarily stop all @@ -2142,11 +2265,11 @@ EXPORT_SYMBOL(scsi_target_resume); * state, all commands are deferred until the scsi lld reenables * the device with scsi_device_unblock or device_block_tmo fires. * This routine assumes the host_lock is held on entry. - **/ + */ int scsi_internal_device_block(struct scsi_device *sdev) { - request_queue_t *q = sdev->request_queue; + struct request_queue *q = sdev->request_queue; unsigned long flags; int err = 0; @@ -2182,11 +2305,11 @@ EXPORT_SYMBOL_GPL(scsi_internal_device_block); * (which must be a legal transition) allowing the midlayer to * goose the queue for this device. This routine assumes the * host_lock is held upon entry. - **/ + */ int scsi_internal_device_unblock(struct scsi_device *sdev) { - request_queue_t *q = sdev->request_queue; + struct request_queue *q = sdev->request_queue; int err; unsigned long flags; @@ -2257,3 +2380,63 @@ scsi_target_unblock(struct device *dev) device_for_each_child(dev, NULL, target_unblock); } EXPORT_SYMBOL_GPL(scsi_target_unblock); + +/** + * scsi_kmap_atomic_sg - find and atomically map an sg-elemnt + * @sgl: scatter-gather list + * @sg_count: number of segments in sg + * @offset: offset in bytes into sg, on return offset into the mapped area + * @len: bytes to map, on return number of bytes mapped + * + * Returns virtual address of the start of the mapped page + */ +void *scsi_kmap_atomic_sg(struct scatterlist *sgl, int sg_count, + size_t *offset, size_t *len) +{ + int i; + size_t sg_len = 0, len_complete = 0; + struct scatterlist *sg; + struct page *page; + + WARN_ON(!irqs_disabled()); + + for_each_sg(sgl, sg, sg_count, i) { + len_complete = sg_len; /* Complete sg-entries */ + sg_len += sg->length; + if (sg_len > *offset) + break; + } + + if (unlikely(i == sg_count)) { + printk(KERN_ERR "%s: Bytes in sg: %zu, requested offset %zu, " + "elements %d\n", + __FUNCTION__, sg_len, *offset, sg_count); + WARN_ON(1); + return NULL; + } + + /* Offset starting from the beginning of first page in this sg-entry */ + *offset = *offset - len_complete + sg->offset; + + /* Assumption: contiguous pages can be accessed as "page + i" */ + page = nth_page(sg_page(sg), (*offset >> PAGE_SHIFT)); + *offset &= ~PAGE_MASK; + + /* Bytes in this sg-entry from *offset to the end of the page */ + sg_len = PAGE_SIZE - *offset; + if (*len > sg_len) + *len = sg_len; + + return kmap_atomic(page, KM_BIO_SRC_IRQ); +} +EXPORT_SYMBOL(scsi_kmap_atomic_sg); + +/** + * scsi_kunmap_atomic_sg - atomically unmap a virtual address, previously mapped with scsi_kmap_atomic_sg + * @virt: virtual address to be unmapped + */ +void scsi_kunmap_atomic_sg(void *virt) +{ + kunmap_atomic(virt, KM_BIO_SRC_IRQ); +} +EXPORT_SYMBOL(scsi_kunmap_atomic_sg);