i2c: Document the message size limit
[safe/jmp/linux-2.6] / drivers / scsi / libiscsi.c
index 9c267b4..703eb6a 100644 (file)
 #include <scsi/scsi_transport_iscsi.h>
 #include <scsi/libiscsi.h>
 
+static int iscsi_dbg_lib_conn;
+module_param_named(debug_libiscsi_conn, iscsi_dbg_lib_conn, int,
+                  S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(debug_libiscsi_conn,
+                "Turn on debugging for connections in libiscsi module. "
+                "Set to 1 to turn on, and zero to turn off. Default is off.");
+
+static int iscsi_dbg_lib_session;
+module_param_named(debug_libiscsi_session, iscsi_dbg_lib_session, int,
+                  S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(debug_libiscsi_session,
+                "Turn on debugging for sessions in libiscsi module. "
+                "Set to 1 to turn on, and zero to turn off. Default is off.");
+
+static int iscsi_dbg_lib_eh;
+module_param_named(debug_libiscsi_eh, iscsi_dbg_lib_eh, int,
+                  S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(debug_libiscsi_eh,
+                "Turn on debugging for error handling in libiscsi module. "
+                "Set to 1 to turn on, and zero to turn off. Default is off.");
+
+#define ISCSI_DBG_CONN(_conn, dbg_fmt, arg...)                 \
+       do {                                                    \
+               if (iscsi_dbg_lib_conn)                         \
+                       iscsi_conn_printk(KERN_INFO, _conn,     \
+                                            "%s " dbg_fmt,     \
+                                            __func__, ##arg);  \
+       } while (0);
+
+#define ISCSI_DBG_SESSION(_session, dbg_fmt, arg...)                   \
+       do {                                                            \
+               if (iscsi_dbg_lib_session)                              \
+                       iscsi_session_printk(KERN_INFO, _session,       \
+                                            "%s " dbg_fmt,             \
+                                            __func__, ##arg);          \
+       } while (0);
+
+#define ISCSI_DBG_EH(_session, dbg_fmt, arg...)                                \
+       do {                                                            \
+               if (iscsi_dbg_lib_eh)                                   \
+                       iscsi_session_printk(KERN_INFO, _session,       \
+                                            "%s " dbg_fmt,             \
+                                            __func__, ##arg);          \
+       } while (0);
+
 /* Serial Number Arithmetic, 32 bits, less than, RFC1982 */
 #define SNA32_CHECK 2147483648UL
 
@@ -54,12 +99,19 @@ static int iscsi_sna_lte(u32 n1, u32 n2)
                            (n1 > n2 && (n2 - n1 < SNA32_CHECK)));
 }
 
-void
-iscsi_update_cmdsn(struct iscsi_session *session, struct iscsi_nopin *hdr)
+inline void iscsi_conn_queue_work(struct iscsi_conn *conn)
 {
-       uint32_t max_cmdsn = be32_to_cpu(hdr->max_cmdsn);
-       uint32_t exp_cmdsn = be32_to_cpu(hdr->exp_cmdsn);
+       struct Scsi_Host *shost = conn->session->host;
+       struct iscsi_host *ihost = shost_priv(shost);
+
+       if (ihost->workq)
+               queue_work(ihost->workq, &conn->xmitwork);
+}
+EXPORT_SYMBOL_GPL(iscsi_conn_queue_work);
 
+static void __iscsi_update_cmdsn(struct iscsi_session *session,
+                                uint32_t exp_cmdsn, uint32_t max_cmdsn)
+{
        /*
         * standard specifies this check for when to update expected and
         * max sequence numbers
@@ -78,44 +130,60 @@ iscsi_update_cmdsn(struct iscsi_session *session, struct iscsi_nopin *hdr)
                 * if the window closed with IO queued, then kick the
                 * xmit thread
                 */
-               if (!list_empty(&session->leadconn->xmitqueue) ||
-                   !list_empty(&session->leadconn->mgmtqueue)) {
-                       if (!(session->tt->caps & CAP_DATA_PATH_OFFLOAD))
-                               scsi_queue_work(session->host,
-                                               &session->leadconn->xmitwork);
-               }
+               if (!list_empty(&session->leadconn->cmdqueue) ||
+                   !list_empty(&session->leadconn->mgmtqueue))
+                       iscsi_conn_queue_work(session->leadconn);
        }
 }
+
+void iscsi_update_cmdsn(struct iscsi_session *session, struct iscsi_nopin *hdr)
+{
+       __iscsi_update_cmdsn(session, be32_to_cpu(hdr->exp_cmdsn),
+                            be32_to_cpu(hdr->max_cmdsn));
+}
 EXPORT_SYMBOL_GPL(iscsi_update_cmdsn);
 
-void iscsi_prep_unsolicit_data_pdu(struct iscsi_task *task,
-                                  struct iscsi_data *hdr)
+/**
+ * iscsi_prep_data_out_pdu - initialize Data-Out
+ * @task: scsi command task
+ * @r2t: R2T info
+ * @hdr: iscsi data in pdu
+ *
+ * Notes:
+ *     Initialize Data-Out within this R2T sequence and finds
+ *     proper data_offset within this SCSI command.
+ *
+ *     This function is called with connection lock taken.
+ **/
+void iscsi_prep_data_out_pdu(struct iscsi_task *task, struct iscsi_r2t_info *r2t,
+                          struct iscsi_data *hdr)
 {
        struct iscsi_conn *conn = task->conn;
+       unsigned int left = r2t->data_length - r2t->sent;
+
+       task->hdr_len = sizeof(struct iscsi_data);
 
        memset(hdr, 0, sizeof(struct iscsi_data));
-       hdr->ttt = cpu_to_be32(ISCSI_RESERVED_TAG);
-       hdr->datasn = cpu_to_be32(task->unsol_datasn);
-       task->unsol_datasn++;
+       hdr->ttt = r2t->ttt;
+       hdr->datasn = cpu_to_be32(r2t->datasn);
+       r2t->datasn++;
        hdr->opcode = ISCSI_OP_SCSI_DATA_OUT;
-       memcpy(hdr->lun, task->hdr->lun, sizeof(hdr->lun));
-
-       hdr->itt = task->hdr->itt;
-       hdr->exp_statsn = cpu_to_be32(conn->exp_statsn);
-       hdr->offset = cpu_to_be32(task->unsol_offset);
-
-       if (task->unsol_count > conn->max_xmit_dlength) {
+       memcpy(hdr->lun, task->lun, sizeof(hdr->lun));
+       hdr->itt = task->hdr_itt;
+       hdr->exp_statsn = r2t->exp_statsn;
+       hdr->offset = cpu_to_be32(r2t->data_offset + r2t->sent);
+       if (left > conn->max_xmit_dlength) {
                hton24(hdr->dlength, conn->max_xmit_dlength);
-               task->data_count = conn->max_xmit_dlength;
-               task->unsol_offset += task->data_count;
+               r2t->data_count = conn->max_xmit_dlength;
                hdr->flags = 0;
        } else {
-               hton24(hdr->dlength, task->unsol_count);
-               task->data_count = task->unsol_count;
+               hton24(hdr->dlength, left);
+               r2t->data_count = left;
                hdr->flags = ISCSI_FLAG_CMD_FINAL;
        }
+       conn->dataout_pdus_cnt++;
 }
-EXPORT_SYMBOL_GPL(iscsi_prep_unsolicit_data_pdu);
+EXPORT_SYMBOL_GPL(iscsi_prep_data_out_pdu);
 
 static int iscsi_add_hdr(struct iscsi_task *task, unsigned len)
 {
@@ -163,10 +231,11 @@ static int iscsi_prep_ecdb_ahs(struct iscsi_task *task)
        ecdb_ahdr->reserved = 0;
        memcpy(ecdb_ahdr->ecdb, cmd->cmnd + ISCSI_CDB_SIZE, rlen);
 
-       debug_scsi("iscsi_prep_ecdb_ahs: varlen_cdb_len %d "
-                  "rlen %d pad_len %d ahs_length %d iscsi_headers_size %u\n",
-                  cmd->cmd_len, rlen, pad_len, ahslength, task->hdr_len);
-
+       ISCSI_DBG_SESSION(task->conn->session,
+                         "iscsi_prep_ecdb_ahs: varlen_cdb_len %d "
+                         "rlen %d pad_len %d ahs_length %d iscsi_headers_size "
+                         "%u\n", cmd->cmd_len, rlen, pad_len, ahslength,
+                         task->hdr_len);
        return 0;
 }
 
@@ -188,10 +257,93 @@ static int iscsi_prep_bidi_ahs(struct iscsi_task *task)
        rlen_ahdr->reserved = 0;
        rlen_ahdr->read_length = cpu_to_be32(scsi_in(sc)->length);
 
-       debug_scsi("bidi-in rlen_ahdr->read_length(%d) "
-                  "rlen_ahdr->ahslength(%d)\n",
-                  be32_to_cpu(rlen_ahdr->read_length),
-                  be16_to_cpu(rlen_ahdr->ahslength));
+       ISCSI_DBG_SESSION(task->conn->session,
+                         "bidi-in rlen_ahdr->read_length(%d) "
+                         "rlen_ahdr->ahslength(%d)\n",
+                         be32_to_cpu(rlen_ahdr->read_length),
+                         be16_to_cpu(rlen_ahdr->ahslength));
+       return 0;
+}
+
+/**
+ * iscsi_check_tmf_restrictions - check if a task is affected by TMF
+ * @task: iscsi task
+ * @opcode: opcode to check for
+ *
+ * During TMF a task has to be checked if it's affected.
+ * All unrelated I/O can be passed through, but I/O to the
+ * affected LUN should be restricted.
+ * If 'fast_abort' is set we won't be sending any I/O to the
+ * affected LUN.
+ * Otherwise the target is waiting for all TTTs to be completed,
+ * so we have to send all outstanding Data-Out PDUs to the target.
+ */
+static int iscsi_check_tmf_restrictions(struct iscsi_task *task, int opcode)
+{
+       struct iscsi_conn *conn = task->conn;
+       struct iscsi_tm *tmf = &conn->tmhdr;
+       unsigned int hdr_lun;
+
+       if (conn->tmf_state == TMF_INITIAL)
+               return 0;
+
+       if ((tmf->opcode & ISCSI_OPCODE_MASK) != ISCSI_OP_SCSI_TMFUNC)
+               return 0;
+
+       switch (ISCSI_TM_FUNC_VALUE(tmf)) {
+       case ISCSI_TM_FUNC_LOGICAL_UNIT_RESET:
+               /*
+                * Allow PDUs for unrelated LUNs
+                */
+               hdr_lun = scsilun_to_int((struct scsi_lun *)tmf->lun);
+               if (hdr_lun != task->sc->device->lun)
+                       return 0;
+               /* fall through */
+       case ISCSI_TM_FUNC_TARGET_WARM_RESET:
+               /*
+                * Fail all SCSI cmd PDUs
+                */
+               if (opcode != ISCSI_OP_SCSI_DATA_OUT) {
+                       iscsi_conn_printk(KERN_INFO, conn,
+                                         "task [op %x/%x itt "
+                                         "0x%x/0x%x] "
+                                         "rejected.\n",
+                                         task->hdr->opcode, opcode,
+                                         task->itt, task->hdr_itt);
+                       return -EACCES;
+               }
+               /*
+                * And also all data-out PDUs in response to R2T
+                * if fast_abort is set.
+                */
+               if (conn->session->fast_abort) {
+                       iscsi_conn_printk(KERN_INFO, conn,
+                                         "task [op %x/%x itt "
+                                         "0x%x/0x%x] fast abort.\n",
+                                         task->hdr->opcode, opcode,
+                                         task->itt, task->hdr_itt);
+                       return -EACCES;
+               }
+               break;
+       case ISCSI_TM_FUNC_ABORT_TASK:
+               /*
+                * the caller has already checked if the task
+                * they want to abort was in the pending queue so if
+                * we are here the cmd pdu has gone out already, and
+                * we will only hit this for data-outs
+                */
+               if (opcode == ISCSI_OP_SCSI_DATA_OUT &&
+                   task->hdr_itt == tmf->rtt) {
+                       ISCSI_DBG_SESSION(conn->session,
+                                         "Preventing task %x/%x from sending "
+                                         "data-out due to abort task in "
+                                         "progress\n", task->itt,
+                                         task->hdr_itt);
+                       return -EACCES;
+               }
+               break;
+       }
+
        return 0;
 }
 
@@ -206,11 +358,30 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task)
 {
        struct iscsi_conn *conn = task->conn;
        struct iscsi_session *session = conn->session;
-       struct iscsi_cmd *hdr = task->hdr;
        struct scsi_cmnd *sc = task->sc;
+       struct iscsi_cmd *hdr;
        unsigned hdrlength, cmd_len;
+       itt_t itt;
        int rc;
 
+       rc = iscsi_check_tmf_restrictions(task, ISCSI_OP_SCSI_CMD);
+       if (rc)
+               return rc;
+
+       if (conn->session->tt->alloc_pdu) {
+               rc = conn->session->tt->alloc_pdu(task, ISCSI_OP_SCSI_CMD);
+               if (rc)
+                       return rc;
+       }
+       hdr = (struct iscsi_cmd *) task->hdr;
+       itt = hdr->itt;
+       memset(hdr, 0, sizeof(*hdr));
+
+       if (session->tt->parse_pdu_itt)
+               hdr->itt = task->hdr_itt = itt;
+       else
+               hdr->itt = task->hdr_itt = build_itt(task->itt,
+                                                    task->conn->session->age);
        task->hdr_len = 0;
        rc = iscsi_add_hdr(task, sizeof(*hdr));
        if (rc)
@@ -218,9 +389,7 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task)
        hdr->opcode = ISCSI_OP_SCSI_CMD;
        hdr->flags = ISCSI_ATTR_SIMPLE;
        int_to_scsilun(sc->device->lun, (struct scsi_lun *)hdr->lun);
-       hdr->itt = build_itt(task->itt, session->age);
-       hdr->cmdsn = cpu_to_be32(session->cmdsn);
-       session->cmdsn++;
+       memcpy(task->lun, hdr->lun, sizeof(task->lun));
        hdr->exp_statsn = cpu_to_be32(conn->exp_statsn);
        cmd_len = sc->cmd_len;
        if (cmd_len < ISCSI_CDB_SIZE)
@@ -242,6 +411,8 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task)
        }
        if (sc->sc_data_direction == DMA_TO_DEVICE) {
                unsigned out_len = scsi_out(sc)->length;
+               struct iscsi_r2t_info *r2t = &task->unsol_r2t;
+
                hdr->data_length = cpu_to_be32(out_len);
                hdr->flags |= ISCSI_FLAG_CMD_WRITE;
                /*
@@ -254,13 +425,11 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task)
                 *                      without R2T ack right after
                 *                      immediate data
                 *
-                *      r2t_data_count  bytes to be sent via R2T ack's
+                *      r2t data_length bytes to be sent via R2T ack's
                 *
                 *      pad_count       bytes to be sent as zero-padding
                 */
-               task->unsol_count = 0;
-               task->unsol_offset = 0;
-               task->unsol_datasn = 0;
+               memset(r2t, 0, sizeof(*r2t));
 
                if (session->imm_data_en) {
                        if (out_len >= session->first_burst)
@@ -274,12 +443,14 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task)
                        zero_data(hdr->dlength);
 
                if (!session->initial_r2t_en) {
-                       task->unsol_count = min(session->first_burst, out_len)
-                                                            - task->imm_count;
-                       task->unsol_offset = task->imm_count;
+                       r2t->data_length = min(session->first_burst, out_len) -
+                                              task->imm_count;
+                       r2t->data_offset = task->imm_count;
+                       r2t->ttt = cpu_to_be32(ISCSI_RESERVED_TAG);
+                       r2t->exp_statsn = cpu_to_be32(conn->exp_statsn);
                }
 
-               if (!task->unsol_count)
+               if (!task->unsol_r2t.data_length)
                        /* No unsolicit Data-Out's */
                        hdr->flags |= ISCSI_FLAG_CMD_FINAL;
        } else {
@@ -300,54 +471,53 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task)
        WARN_ON(hdrlength >= 256);
        hdr->hlength = hdrlength & 0xFF;
 
-       if (conn->session->tt->init_task &&
-           conn->session->tt->init_task(task))
+       if (session->tt->init_task && session->tt->init_task(task))
                return -EIO;
 
        task->state = ISCSI_TASK_RUNNING;
-       list_move_tail(&task->running, &conn->run_list);
+       hdr->cmdsn = task->cmdsn = cpu_to_be32(session->cmdsn);
+       session->cmdsn++;
 
        conn->scsicmd_pdus_cnt++;
-       debug_scsi("iscsi prep [%s cid %d sc %p cdb 0x%x itt 0x%x len %d "
-                  "bidi_len %d cmdsn %d win %d]\n", scsi_bidi_cmnd(sc) ?
-                  "bidirectional" : sc->sc_data_direction == DMA_TO_DEVICE ?
-                  "write" : "read", conn->id, sc, sc->cmnd[0], task->itt,
-                  scsi_bufflen(sc),
-                  scsi_bidi_cmnd(sc) ? scsi_in(sc)->length : 0,
-                  session->cmdsn, session->max_cmdsn - session->exp_cmdsn + 1);
+       ISCSI_DBG_SESSION(session, "iscsi prep [%s cid %d sc %p cdb 0x%x "
+                         "itt 0x%x len %d bidi_len %d cmdsn %d win %d]\n",
+                         scsi_bidi_cmnd(sc) ? "bidirectional" :
+                         sc->sc_data_direction == DMA_TO_DEVICE ?
+                         "write" : "read", conn->id, sc, sc->cmnd[0],
+                         task->itt, scsi_bufflen(sc),
+                         scsi_bidi_cmnd(sc) ? scsi_in(sc)->length : 0,
+                         session->cmdsn,
+                         session->max_cmdsn - session->exp_cmdsn + 1);
        return 0;
 }
 
 /**
- * iscsi_complete_command - finish a task
+ * iscsi_free_task - free a task
  * @task: iscsi cmd task
  *
  * Must be called with session lock.
  * This function returns the scsi command to scsi-ml or cleans
  * up mgmt tasks then returns the task to the pool.
  */
-static void iscsi_complete_command(struct iscsi_task *task)
+static void iscsi_free_task(struct iscsi_task *task)
 {
        struct iscsi_conn *conn = task->conn;
        struct iscsi_session *session = conn->session;
        struct scsi_cmnd *sc = task->sc;
 
-       list_del_init(&task->running);
-       task->state = ISCSI_TASK_COMPLETED;
-       task->sc = NULL;
+       ISCSI_DBG_SESSION(session, "freeing task itt 0x%x state %d sc %p\n",
+                         task->itt, task->state, task->sc);
 
-       if (conn->task == task)
-               conn->task = NULL;
+       session->tt->cleanup_task(task);
+       task->state = ISCSI_TASK_FREE;
+       task->sc = NULL;
        /*
         * login task is preallocated so do not free
         */
        if (conn->login_task == task)
                return;
 
-       __kfifo_put(session->cmdpool.queue, (void*)&task, sizeof(void*));
-
-       if (conn->ping_task == task)
-               conn->ping_task = NULL;
+       kfifo_in(&session->cmdpool.queue, (void*)&task, sizeof(void*));
 
        if (sc) {
                task->sc = NULL;
@@ -371,7 +541,7 @@ EXPORT_SYMBOL_GPL(__iscsi_get_task);
 static void __iscsi_put_task(struct iscsi_task *task)
 {
        if (atomic_dec_and_test(&task->refcount))
-               iscsi_complete_command(task);
+               iscsi_free_task(task);
 }
 
 void iscsi_put_task(struct iscsi_task *task)
@@ -384,33 +554,99 @@ void iscsi_put_task(struct iscsi_task *task)
 }
 EXPORT_SYMBOL_GPL(iscsi_put_task);
 
+/**
+ * iscsi_complete_task - finish a task
+ * @task: iscsi cmd task
+ * @state: state to complete task with
+ *
+ * Must be called with session lock.
+ */
+static void iscsi_complete_task(struct iscsi_task *task, int state)
+{
+       struct iscsi_conn *conn = task->conn;
+
+       ISCSI_DBG_SESSION(conn->session,
+                         "complete task itt 0x%x state %d sc %p\n",
+                         task->itt, task->state, task->sc);
+       if (task->state == ISCSI_TASK_COMPLETED ||
+           task->state == ISCSI_TASK_ABRT_TMF ||
+           task->state == ISCSI_TASK_ABRT_SESS_RECOV)
+               return;
+       WARN_ON_ONCE(task->state == ISCSI_TASK_FREE);
+       task->state = state;
+
+       if (!list_empty(&task->running))
+               list_del_init(&task->running);
+
+       if (conn->task == task)
+               conn->task = NULL;
+
+       if (conn->ping_task == task)
+               conn->ping_task = NULL;
+
+       /* release get from queueing */
+       __iscsi_put_task(task);
+}
+
+/**
+ * iscsi_complete_scsi_task - finish scsi task normally
+ * @task: iscsi task for scsi cmd
+ * @exp_cmdsn: expected cmd sn in cpu format
+ * @max_cmdsn: max cmd sn in cpu format
+ *
+ * This is used when drivers do not need or cannot perform
+ * lower level pdu processing.
+ *
+ * Called with session lock
+ */
+void iscsi_complete_scsi_task(struct iscsi_task *task,
+                             uint32_t exp_cmdsn, uint32_t max_cmdsn)
+{
+       struct iscsi_conn *conn = task->conn;
+
+       ISCSI_DBG_SESSION(conn->session, "[itt 0x%x]\n", task->itt);
+
+       conn->last_recv = jiffies;
+       __iscsi_update_cmdsn(conn->session, exp_cmdsn, max_cmdsn);
+       iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
+}
+EXPORT_SYMBOL_GPL(iscsi_complete_scsi_task);
+
+
 /*
- * session lock must be held
+ * session lock must be held and if not called for a task that is
+ * still pending or from the xmit thread, then xmit thread must
+ * be suspended.
  */
-static void fail_command(struct iscsi_conn *conn, struct iscsi_task *task,
-                        int err)
+static void fail_scsi_task(struct iscsi_task *task, int err)
 {
+       struct iscsi_conn *conn = task->conn;
        struct scsi_cmnd *sc;
+       int state;
 
+       /*
+        * if a command completes and we get a successful tmf response
+        * we will hit this because the scsi eh abort code does not take
+        * a ref to the task.
+        */
        sc = task->sc;
        if (!sc)
                return;
 
-       if (task->state == ISCSI_TASK_PENDING)
+       if (task->state == ISCSI_TASK_PENDING) {
                /*
                 * cmd never made it to the xmit thread, so we should not count
                 * the cmd in the sequencing
                 */
                conn->session->queued_cmdsn--;
+               /* it was never sent so just complete like normal */
+               state = ISCSI_TASK_COMPLETED;
+       } else if (err == DID_TRANSPORT_DISRUPTED)
+               state = ISCSI_TASK_ABRT_SESS_RECOV;
        else
-               conn->session->tt->cleanup_task(conn, task);
-       /*
-        * Check if cleanup_task dropped the lock and the command completed,
-        */
-       if (!task->sc)
-               return;
+               state = ISCSI_TASK_ABRT_TMF;
 
-       sc->result = err;
+       sc->result = err << 16;
        if (!scsi_bidi_cmnd(sc))
                scsi_set_resid(sc, scsi_bufflen(sc));
        else {
@@ -418,35 +654,34 @@ static void fail_command(struct iscsi_conn *conn, struct iscsi_task *task,
                scsi_in(sc)->resid = scsi_in(sc)->length;
        }
 
-       if (conn->task == task)
-               conn->task = NULL;
-       /* release ref from queuecommand */
-       __iscsi_put_task(task);
+       iscsi_complete_task(task, state);
 }
 
 static int iscsi_prep_mgmt_task(struct iscsi_conn *conn,
                                struct iscsi_task *task)
 {
        struct iscsi_session *session = conn->session;
-       struct iscsi_hdr *hdr = (struct iscsi_hdr *)task->hdr;
+       struct iscsi_hdr *hdr = task->hdr;
        struct iscsi_nopout *nop = (struct iscsi_nopout *)hdr;
+       uint8_t opcode = hdr->opcode & ISCSI_OPCODE_MASK;
 
        if (conn->session->state == ISCSI_STATE_LOGGING_OUT)
                return -ENOTCONN;
 
-       if (hdr->opcode != (ISCSI_OP_LOGIN | ISCSI_OP_IMMEDIATE) &&
-           hdr->opcode != (ISCSI_OP_TEXT | ISCSI_OP_IMMEDIATE))
+       if (opcode != ISCSI_OP_LOGIN && opcode != ISCSI_OP_TEXT)
                nop->exp_statsn = cpu_to_be32(conn->exp_statsn);
        /*
         * pre-format CmdSN for outgoing PDU.
         */
        nop->cmdsn = cpu_to_be32(session->cmdsn);
        if (hdr->itt != RESERVED_ITT) {
-               hdr->itt = build_itt(task->itt, session->age);
                /*
-                * TODO: We always use immediate, so we never hit this.
+                * TODO: We always use immediate for normal session pdus.
                 * If we start to send tmfs or nops as non-immediate then
                 * we should start checking the cmdsn numbers for mgmt tasks.
+                *
+                * During discovery sessions iscsid sends TEXT as non immediate,
+                * but we always only send one PDU at a time.
                 */
                if (conn->c_stage == ISCSI_CONN_STARTED &&
                    !(hdr->opcode & ISCSI_OP_IMMEDIATE)) {
@@ -455,16 +690,16 @@ static int iscsi_prep_mgmt_task(struct iscsi_conn *conn,
                }
        }
 
-       if (session->tt->init_task)
-               session->tt->init_task(task);
+       if (session->tt->init_task && session->tt->init_task(task))
+               return -EIO;
 
        if ((hdr->opcode & ISCSI_OPCODE_MASK) == ISCSI_OP_LOGOUT)
                session->state = ISCSI_STATE_LOGGING_OUT;
 
-       list_move_tail(&task->running, &conn->mgmt_run_list);
-       debug_scsi("mgmtpdu [op 0x%x hdr->itt 0x%x datalen %d]\n",
-                  hdr->opcode & ISCSI_OPCODE_MASK, hdr->itt,
-                  task->data_count);
+       task->state = ISCSI_TASK_RUNNING;
+       ISCSI_DBG_SESSION(session, "mgmtpdu [op 0x%x hdr->itt 0x%x "
+                         "datalen %d]\n", hdr->opcode & ISCSI_OPCODE_MASK,
+                         hdr->itt, task->data_count);
        return 0;
 }
 
@@ -473,33 +708,38 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
                      char *data, uint32_t data_size)
 {
        struct iscsi_session *session = conn->session;
+       struct iscsi_host *ihost = shost_priv(session->host);
+       uint8_t opcode = hdr->opcode & ISCSI_OPCODE_MASK;
        struct iscsi_task *task;
+       itt_t itt;
 
        if (session->state == ISCSI_STATE_TERMINATE)
                return NULL;
 
-       if (hdr->opcode == (ISCSI_OP_LOGIN | ISCSI_OP_IMMEDIATE) ||
-           hdr->opcode == (ISCSI_OP_TEXT | ISCSI_OP_IMMEDIATE))
+       if (opcode == ISCSI_OP_LOGIN || opcode == ISCSI_OP_TEXT) {
                /*
                 * Login and Text are sent serially, in
                 * request-followed-by-response sequence.
                 * Same task can be used. Same ITT must be used.
                 * Note that login_task is preallocated at conn_create().
                 */
+               if (conn->login_task->state != ISCSI_TASK_FREE) {
+                       iscsi_conn_printk(KERN_ERR, conn, "Login/Text in "
+                                         "progress. Cannot start new task.\n");
+                       return NULL;
+               }
+
                task = conn->login_task;
-       else {
+       } else {
+               if (session->state != ISCSI_STATE_LOGGED_IN)
+                       return NULL;
+
                BUG_ON(conn->c_stage == ISCSI_CONN_INITIAL_STAGE);
                BUG_ON(conn->c_stage == ISCSI_CONN_STOPPED);
 
-               if (!__kfifo_get(session->cmdpool.queue,
+               if (!kfifo_out(&session->cmdpool.queue,
                                 (void*)&task, sizeof(void*)))
                        return NULL;
-
-               if ((hdr->opcode == (ISCSI_OP_NOOP_OUT | ISCSI_OP_IMMEDIATE)) &&
-                    hdr->ttt == RESERVED_ITT) {
-                       conn->ping_task = task;
-                       conn->last_ping = jiffies;
-               }
        }
        /*
         * released in complete pdu for task we expect a response for, and
@@ -509,6 +749,8 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
        atomic_set(&task->refcount, 1);
        task->conn = conn;
        task->sc = NULL;
+       INIT_LIST_HEAD(&task->running);
+       task->state = ISCSI_TASK_PENDING;
 
        if (data_size) {
                memcpy(task->data, data, data_size);
@@ -516,23 +758,42 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
        } else
                task->data_count = 0;
 
+       if (conn->session->tt->alloc_pdu) {
+               if (conn->session->tt->alloc_pdu(task, hdr->opcode)) {
+                       iscsi_conn_printk(KERN_ERR, conn, "Could not allocate "
+                                        "pdu for mgmt task.\n");
+                       goto free_task;
+               }
+       }
+
+       itt = task->hdr->itt;
+       task->hdr_len = sizeof(struct iscsi_hdr);
        memcpy(task->hdr, hdr, sizeof(struct iscsi_hdr));
-       INIT_LIST_HEAD(&task->running);
-       list_add_tail(&task->running, &conn->mgmtqueue);
 
-       if (session->tt->caps & CAP_DATA_PATH_OFFLOAD) {
-               if (iscsi_prep_mgmt_task(conn, task)) {
-                       __iscsi_put_task(task);
-                       return NULL;
-               }
+       if (hdr->itt != RESERVED_ITT) {
+               if (session->tt->parse_pdu_itt)
+                       task->hdr->itt = itt;
+               else
+                       task->hdr->itt = build_itt(task->itt,
+                                                  task->conn->session->age);
+       }
 
-               if (session->tt->xmit_task(task))
-                       task = NULL;
+       if (!ihost->workq) {
+               if (iscsi_prep_mgmt_task(conn, task))
+                       goto free_task;
 
-       } else
-               scsi_queue_work(conn->session->host, &conn->xmitwork);
+               if (session->tt->xmit_task(task))
+                       goto free_task;
+       } else {
+               list_add_tail(&task->running, &conn->mgmtqueue);
+               iscsi_conn_queue_work(conn);
+       }
 
        return task;
+
+free_task:
+       __iscsi_put_task(task);
+       return NULL;
 }
 
 int iscsi_conn_send_pdu(struct iscsi_cls_conn *cls_conn, struct iscsi_hdr *hdr,
@@ -591,14 +852,15 @@ invalid_datalen:
                        goto out;
                }
 
-               senselen = be16_to_cpu(get_unaligned((__be16 *) data));
+               senselen = get_unaligned_be16(data);
                if (datalen < senselen)
                        goto invalid_datalen;
 
                memcpy(sc->sense_buffer, data + 2,
                       min_t(uint16_t, senselen, SCSI_SENSE_BUFFERSIZE));
-               debug_scsi("copied %d bytes of sense\n",
-                          min_t(uint16_t, senselen, SCSI_SENSE_BUFFERSIZE));
+               ISCSI_DBG_SESSION(session, "copied %d bytes of sense\n",
+                                 min_t(uint16_t, senselen,
+                                 SCSI_SENSE_BUFFERSIZE));
        }
 
        if (rhdr->flags & (ISCSI_FLAG_CMD_BIDI_UNDERFLOW |
@@ -626,11 +888,48 @@ invalid_datalen:
                        sc->result = (DID_BAD_TARGET << 16) | rhdr->cmd_status;
        }
 out:
-       debug_scsi("done [sc %lx res %d itt 0x%x]\n",
-                  (long)sc, sc->result, task->itt);
+       ISCSI_DBG_SESSION(session, "cmd rsp done [sc %p res %d itt 0x%x]\n",
+                         sc, sc->result, task->itt);
        conn->scsirsp_pdus_cnt++;
+       iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
+}
 
-       __iscsi_put_task(task);
+/**
+ * iscsi_data_in_rsp - SCSI Data-In Response processing
+ * @conn: iscsi connection
+ * @hdr:  iscsi pdu
+ * @task: scsi command task
+ **/
+static void
+iscsi_data_in_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
+                 struct iscsi_task *task)
+{
+       struct iscsi_data_rsp *rhdr = (struct iscsi_data_rsp *)hdr;
+       struct scsi_cmnd *sc = task->sc;
+
+       if (!(rhdr->flags & ISCSI_FLAG_DATA_STATUS))
+               return;
+
+       iscsi_update_cmdsn(conn->session, (struct iscsi_nopin *)hdr);
+       sc->result = (DID_OK << 16) | rhdr->cmd_status;
+       conn->exp_statsn = be32_to_cpu(rhdr->statsn) + 1;
+       if (rhdr->flags & (ISCSI_FLAG_DATA_UNDERFLOW |
+                          ISCSI_FLAG_DATA_OVERFLOW)) {
+               int res_count = be32_to_cpu(rhdr->residual_count);
+
+               if (res_count > 0 &&
+                   (rhdr->flags & ISCSI_FLAG_CMD_OVERFLOW ||
+                    res_count <= scsi_in(sc)->length))
+                       scsi_in(sc)->resid = res_count;
+               else
+                       sc->result = (DID_BAD_TARGET << 16) | rhdr->cmd_status;
+       }
+
+       ISCSI_DBG_SESSION(conn->session, "data in with status done "
+                         "[sc %p res %d itt 0x%x]\n",
+                         sc, sc->result, task->itt);
+       conn->scsirsp_pdus_cnt++;
+       iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
 }
 
 static void iscsi_tmf_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
@@ -674,6 +973,31 @@ static void iscsi_send_nopout(struct iscsi_conn *conn, struct iscsi_nopin *rhdr)
        task = __iscsi_conn_send_pdu(conn, (struct iscsi_hdr *)&hdr, NULL, 0);
        if (!task)
                iscsi_conn_printk(KERN_ERR, conn, "Could not send nopout\n");
+       else if (!rhdr) {
+               /* only track our nops */
+               conn->ping_task = task;
+               conn->last_ping = jiffies;
+       }
+}
+
+static int iscsi_nop_out_rsp(struct iscsi_task *task,
+                            struct iscsi_nopin *nop, char *data, int datalen)
+{
+       struct iscsi_conn *conn = task->conn;
+       int rc = 0;
+
+       if (conn->ping_task != task) {
+               /*
+                * If this is not in response to one of our
+                * nops then it must be from userspace.
+                */
+               if (iscsi_recv_pdu(conn->cls_conn, (struct iscsi_hdr *)nop,
+                                  data, datalen))
+                       rc = ISCSI_ERR_CONN_FAILED;
+       } else
+               mod_timer(&conn->transport_timer, jiffies + conn->recv_timeout);
+       iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
+       return rc;
 }
 
 static int iscsi_handle_reject(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
@@ -681,24 +1005,77 @@ static int iscsi_handle_reject(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 {
        struct iscsi_reject *reject = (struct iscsi_reject *)hdr;
        struct iscsi_hdr rejected_pdu;
-       uint32_t itt;
+       int opcode, rc = 0;
 
        conn->exp_statsn = be32_to_cpu(reject->statsn) + 1;
 
-       if (reject->reason == ISCSI_REASON_DATA_DIGEST_ERROR) {
-               if (ntoh24(reject->dlength) > datalen)
-                       return ISCSI_ERR_PROTO;
-
-               if (ntoh24(reject->dlength) >= sizeof(struct iscsi_hdr)) {
-                       memcpy(&rejected_pdu, data, sizeof(struct iscsi_hdr));
-                       itt = get_itt(rejected_pdu.itt);
-                       iscsi_conn_printk(KERN_ERR, conn,
-                                         "itt 0x%x had pdu (op 0x%x) rejected "
-                                         "due to DataDigest error.\n", itt,
-                                         rejected_pdu.opcode);
+       if (ntoh24(reject->dlength) > datalen ||
+           ntoh24(reject->dlength) < sizeof(struct iscsi_hdr)) {
+               iscsi_conn_printk(KERN_ERR, conn, "Cannot handle rejected "
+                                 "pdu. Invalid data length (pdu dlength "
+                                 "%u, datalen %d\n", ntoh24(reject->dlength),
+                                 datalen);
+               return ISCSI_ERR_PROTO;
+       }
+       memcpy(&rejected_pdu, data, sizeof(struct iscsi_hdr));
+       opcode = rejected_pdu.opcode & ISCSI_OPCODE_MASK;
+
+       switch (reject->reason) {
+       case ISCSI_REASON_DATA_DIGEST_ERROR:
+               iscsi_conn_printk(KERN_ERR, conn,
+                                 "pdu (op 0x%x itt 0x%x) rejected "
+                                 "due to DataDigest error.\n",
+                                 rejected_pdu.itt, opcode);
+               break;
+       case ISCSI_REASON_IMM_CMD_REJECT:
+               iscsi_conn_printk(KERN_ERR, conn,
+                                 "pdu (op 0x%x itt 0x%x) rejected. Too many "
+                                 "immediate commands.\n",
+                                 rejected_pdu.itt, opcode);
+               /*
+                * We only send one TMF at a time so if the target could not
+                * handle it, then it should get fixed (RFC mandates that
+                * a target can handle one immediate TMF per conn).
+                *
+                * For nops-outs, we could have sent more than one if
+                * the target is sending us lots of nop-ins
+                */
+               if (opcode != ISCSI_OP_NOOP_OUT)
+                       return 0;
+
+                if (rejected_pdu.itt == cpu_to_be32(ISCSI_RESERVED_TAG))
+                       /*
+                        * nop-out in response to target's nop-out rejected.
+                        * Just resend.
+                        */
+                       iscsi_send_nopout(conn,
+                                         (struct iscsi_nopin*)&rejected_pdu);
+               else {
+                       struct iscsi_task *task;
+                       /*
+                        * Our nop as ping got dropped. We know the target
+                        * and transport are ok so just clean up
+                        */
+                       task = iscsi_itt_to_task(conn, rejected_pdu.itt);
+                       if (!task) {
+                               iscsi_conn_printk(KERN_ERR, conn,
+                                                "Invalid pdu reject. Could "
+                                                "not lookup rejected task.\n");
+                               rc = ISCSI_ERR_BAD_ITT;
+                       } else
+                               rc = iscsi_nop_out_rsp(task,
+                                       (struct iscsi_nopin*)&rejected_pdu,
+                                       NULL, 0);
                }
+               break;
+       default:
+               iscsi_conn_printk(KERN_ERR, conn,
+                                 "pdu (op 0x%x itt 0x%x) rejected. Reason "
+                                 "code 0x%x\n", rejected_pdu.itt,
+                                 rejected_pdu.opcode, reject->reason);
+               break;
        }
-       return 0;
+       return rc;
 }
 
 /**
@@ -711,20 +1088,24 @@ static int iscsi_handle_reject(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
  *
  * The session lock must be held.
  */
-static struct iscsi_task *iscsi_itt_to_task(struct iscsi_conn *conn, itt_t itt)
+struct iscsi_task *iscsi_itt_to_task(struct iscsi_conn *conn, itt_t itt)
 {
        struct iscsi_session *session = conn->session;
-       uint32_t i;
+       int i;
 
        if (itt == RESERVED_ITT)
                return NULL;
 
-       i = get_itt(itt);
+       if (session->tt->parse_pdu_itt)
+               session->tt->parse_pdu_itt(conn, itt, &i, NULL);
+       else
+               i = get_itt(itt);
        if (i >= session->cmds_max)
                return NULL;
 
        return session->cmds[i];
 }
+EXPORT_SYMBOL_GPL(iscsi_itt_to_task);
 
 /**
  * __iscsi_complete_pdu - complete pdu
@@ -755,8 +1136,8 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
        else
                itt = ~0U;
 
-       debug_scsi("[op 0x%x cid %d itt 0x%x len %d]\n",
-                  opcode, conn->id, itt, datalen);
+       ISCSI_DBG_SESSION(session, "[op 0x%x cid %d itt 0x%x len %d]\n",
+                         opcode, conn->id, itt, datalen);
 
        if (itt == ~0U) {
                iscsi_update_cmdsn(session, (struct iscsi_nopin*)hdr);
@@ -794,6 +1175,7 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
                task = iscsi_itt_to_ctask(conn, hdr->itt);
                if (!task)
                        return ISCSI_ERR_BAD_ITT;
+               task->last_xfer = jiffies;
                break;
        case ISCSI_OP_R2T:
                /*
@@ -818,12 +1200,7 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
                iscsi_scsi_cmd_rsp(conn, hdr, task, data, datalen);
                break;
        case ISCSI_OP_SCSI_DATA_IN:
-               if (hdr->flags & ISCSI_FLAG_DATA_STATUS) {
-                       conn->scsirsp_pdus_cnt++;
-                       iscsi_update_cmdsn(session,
-                                          (struct iscsi_nopin*) hdr);
-                       __iscsi_put_task(task);
-               }
+               iscsi_data_in_rsp(conn, hdr, task);
                break;
        case ISCSI_OP_LOGOUT_RSP:
                iscsi_update_cmdsn(session, (struct iscsi_nopin*)hdr);
@@ -849,7 +1226,7 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
                }
 
                iscsi_tmf_rsp(conn, hdr);
-               __iscsi_put_task(task);
+               iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
                break;
        case ISCSI_OP_NOOP_IN:
                iscsi_update_cmdsn(session, (struct iscsi_nopin*)hdr);
@@ -859,15 +1236,8 @@ int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
                }
                conn->exp_statsn = be32_to_cpu(hdr->statsn) + 1;
 
-               if (conn->ping_task != task)
-                       /*
-                        * If this is not in response to one of our
-                        * nops then it must be from userspace.
-                        */
-                       goto recv_pdu;
-
-               mod_timer(&conn->transport_timer, jiffies + conn->recv_timeout);
-               __iscsi_put_task(task);
+               rc = iscsi_nop_out_rsp(task, (struct iscsi_nopin*)hdr,
+                                      data, datalen);
                break;
        default:
                rc = ISCSI_ERR_BAD_OPCODE;
@@ -879,7 +1249,7 @@ out:
 recv_pdu:
        if (iscsi_recv_pdu(conn->cls_conn, hdr, data, datalen))
                rc = ISCSI_ERR_CONN_FAILED;
-       __iscsi_put_task(task);
+       iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
        return rc;
 }
 EXPORT_SYMBOL_GPL(__iscsi_complete_pdu);
@@ -899,20 +1269,25 @@ EXPORT_SYMBOL_GPL(iscsi_complete_pdu);
 int iscsi_verify_itt(struct iscsi_conn *conn, itt_t itt)
 {
        struct iscsi_session *session = conn->session;
-       uint32_t i;
+       int age = 0, i = 0;
 
        if (itt == RESERVED_ITT)
                return 0;
 
-       if (((__force u32)itt & ISCSI_AGE_MASK) !=
-           (session->age << ISCSI_AGE_SHIFT)) {
+       if (session->tt->parse_pdu_itt)
+               session->tt->parse_pdu_itt(conn, itt, &i, &age);
+       else {
+               i = get_itt(itt);
+               age = ((__force u32)itt >> ISCSI_AGE_SHIFT) & ISCSI_AGE_MASK;
+       }
+
+       if (age != session->age) {
                iscsi_conn_printk(KERN_ERR, conn,
                                  "received itt %x expected session age (%x)\n",
                                  (__force u32)itt, session->age);
                return ISCSI_ERR_BAD_ITT;
        }
 
-       i = get_itt(itt);
        if (i >= session->cmds_max) {
                iscsi_conn_printk(KERN_ERR, conn,
                                  "received invalid itt index %u (max cmds "
@@ -954,6 +1329,37 @@ struct iscsi_task *iscsi_itt_to_ctask(struct iscsi_conn *conn, itt_t itt)
 }
 EXPORT_SYMBOL_GPL(iscsi_itt_to_ctask);
 
+void iscsi_session_failure(struct iscsi_session *session,
+                          enum iscsi_err err)
+{
+       struct iscsi_conn *conn;
+       struct device *dev;
+       unsigned long flags;
+
+       spin_lock_irqsave(&session->lock, flags);
+       conn = session->leadconn;
+       if (session->state == ISCSI_STATE_TERMINATE || !conn) {
+               spin_unlock_irqrestore(&session->lock, flags);
+               return;
+       }
+
+       dev = get_device(&conn->cls_conn->dev);
+       spin_unlock_irqrestore(&session->lock, flags);
+       if (!dev)
+               return;
+       /*
+        * if the host is being removed bypass the connection
+        * recovery initialization because we are going to kill
+        * the session.
+        */
+       if (err == ISCSI_ERR_INVALID_HOST)
+               iscsi_conn_error_event(conn->cls_conn, err);
+       else
+               iscsi_conn_failure(conn, err);
+       put_device(dev);
+}
+EXPORT_SYMBOL_GPL(iscsi_session_failure);
+
 void iscsi_conn_failure(struct iscsi_conn *conn, enum iscsi_err err)
 {
        struct iscsi_session *session = conn->session;
@@ -968,9 +1374,10 @@ void iscsi_conn_failure(struct iscsi_conn *conn, enum iscsi_err err)
        if (conn->stop_stage == 0)
                session->state = ISCSI_STATE_FAILED;
        spin_unlock_irqrestore(&session->lock, flags);
+
        set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
        set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
-       iscsi_conn_error(conn->cls_conn, err);
+       iscsi_conn_error_event(conn->cls_conn, err);
 }
 EXPORT_SYMBOL_GPL(iscsi_conn_failure);
 
@@ -982,10 +1389,10 @@ static int iscsi_check_cmdsn_window_closed(struct iscsi_conn *conn)
         * Check for iSCSI window and take care of CmdSN wrap-around
         */
        if (!iscsi_sna_lte(session->queued_cmdsn, session->max_cmdsn)) {
-               debug_scsi("iSCSI CmdSN closed. ExpCmdSn %u MaxCmdSN %u "
-                          "CmdSN %u/%u\n", session->exp_cmdsn,
-                          session->max_cmdsn, session->cmdsn,
-                          session->queued_cmdsn);
+               ISCSI_DBG_SESSION(session, "iSCSI CmdSN closed. ExpCmdSn "
+                                 "%u MaxCmdSN %u CmdSN %u/%u\n",
+                                 session->exp_cmdsn, session->max_cmdsn,
+                                 session->cmdsn, session->queued_cmdsn);
                return -ENOSPC;
        }
        return 0;
@@ -996,14 +1403,19 @@ static int iscsi_xmit_task(struct iscsi_conn *conn)
        struct iscsi_task *task = conn->task;
        int rc;
 
+       if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx))
+               return -ENODATA;
+
        __iscsi_get_task(task);
        spin_unlock_bh(&conn->session->lock);
        rc = conn->session->tt->xmit_task(task);
        spin_lock_bh(&conn->session->lock);
-       __iscsi_put_task(task);
-       if (!rc)
+       if (!rc) {
                /* done with this task */
+               task->last_xfer = jiffies;
                conn->task = NULL;
+       }
+       __iscsi_put_task(task);
        return rc;
 }
 
@@ -1019,8 +1431,13 @@ void iscsi_requeue_task(struct iscsi_task *task)
 {
        struct iscsi_conn *conn = task->conn;
 
-       list_move_tail(&task->running, &conn->requeue);
-       scsi_queue_work(conn->session->host, &conn->xmitwork);
+       /*
+        * this may be on the requeue list already if the xmit_task callout
+        * is handling the r2ts while we are adding new ones
+        */
+       if (list_empty(&task->running))
+               list_add_tail(&task->running, &conn->requeue);
+       iscsi_conn_queue_work(conn);
 }
 EXPORT_SYMBOL_GPL(iscsi_requeue_task);
 
@@ -1035,11 +1452,12 @@ EXPORT_SYMBOL_GPL(iscsi_requeue_task);
  **/
 static int iscsi_data_xmit(struct iscsi_conn *conn)
 {
+       struct iscsi_task *task;
        int rc = 0;
 
        spin_lock_bh(&conn->session->lock);
-       if (unlikely(conn->suspend_tx)) {
-               debug_scsi("conn %d Tx suspended!\n", conn->id);
+       if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) {
+               ISCSI_DBG_SESSION(conn->session, "Tx suspended!\n");
                spin_unlock_bh(&conn->session->lock);
                return -ENODATA;
        }
@@ -1047,7 +1465,7 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
        if (conn->task) {
                rc = iscsi_xmit_task(conn);
                if (rc)
-                       goto again;
+                       goto done;
        }
 
        /*
@@ -1059,6 +1477,7 @@ check_mgmt:
        while (!list_empty(&conn->mgmtqueue)) {
                conn->task = list_entry(conn->mgmtqueue.next,
                                         struct iscsi_task, running);
+               list_del_init(&conn->task->running);
                if (iscsi_prep_mgmt_task(conn, conn->task)) {
                        __iscsi_put_task(conn->task);
                        conn->task = NULL;
@@ -1066,27 +1485,32 @@ check_mgmt:
                }
                rc = iscsi_xmit_task(conn);
                if (rc)
-                       goto again;
+                       goto done;
        }
 
        /* process pending command queue */
-       while (!list_empty(&conn->xmitqueue)) {
-               if (conn->tmf_state == TMF_QUEUED)
-                       break;
-
-               conn->task = list_entry(conn->xmitqueue.next,
-                                        struct iscsi_task, running);
+       while (!list_empty(&conn->cmdqueue)) {
+               conn->task = list_entry(conn->cmdqueue.next, struct iscsi_task,
+                                       running);
+               list_del_init(&conn->task->running);
                if (conn->session->state == ISCSI_STATE_LOGGING_OUT) {
-                       fail_command(conn, conn->task, DID_IMM_RETRY << 16);
+                       fail_scsi_task(conn->task, DID_IMM_RETRY);
                        continue;
                }
-               if (iscsi_prep_scsi_cmd_pdu(conn->task)) {
-                       fail_command(conn, conn->task, DID_ABORT << 16);
+               rc = iscsi_prep_scsi_cmd_pdu(conn->task);
+               if (rc) {
+                       if (rc == -ENOMEM || rc == -EACCES) {
+                               list_add_tail(&conn->task->running,
+                                             &conn->cmdqueue);
+                               conn->task = NULL;
+                               goto done;
+                       } else
+                               fail_scsi_task(conn->task, DID_ABORT);
                        continue;
                }
                rc = iscsi_xmit_task(conn);
                if (rc)
-                       goto again;
+                       goto done;
                /*
                 * we could continuously get new task requests so
                 * we need to check the mgmt queue for nops that need to
@@ -1097,31 +1521,30 @@ check_mgmt:
        }
 
        while (!list_empty(&conn->requeue)) {
-               if (conn->session->fast_abort && conn->tmf_state != TMF_INITIAL)
-                       break;
-
                /*
                 * we always do fastlogout - conn stop code will clean up.
                 */
                if (conn->session->state == ISCSI_STATE_LOGGING_OUT)
                        break;
 
-               conn->task = list_entry(conn->requeue.next,
-                                        struct iscsi_task, running);
+               task = list_entry(conn->requeue.next, struct iscsi_task,
+                                 running);
+               if (iscsi_check_tmf_restrictions(task, ISCSI_OP_SCSI_DATA_OUT))
+                       break;
+
+               conn->task = task;
+               list_del_init(&conn->task->running);
                conn->task->state = ISCSI_TASK_RUNNING;
-               list_move_tail(conn->requeue.next, &conn->run_list);
                rc = iscsi_xmit_task(conn);
                if (rc)
-                       goto again;
+                       goto done;
                if (!list_empty(&conn->mgmtqueue))
                        goto check_mgmt;
        }
        spin_unlock_bh(&conn->session->lock);
        return -ENODATA;
 
-again:
-       if (unlikely(conn->suspend_tx))
-               rc = -ENODATA;
+done:
        spin_unlock_bh(&conn->session->lock);
        return rc;
 }
@@ -1139,6 +1562,29 @@ static void iscsi_xmitworker(struct work_struct *work)
        } while (rc >= 0 || rc == -EAGAIN);
 }
 
+static inline struct iscsi_task *iscsi_alloc_task(struct iscsi_conn *conn,
+                                                 struct scsi_cmnd *sc)
+{
+       struct iscsi_task *task;
+
+       if (!kfifo_out(&conn->session->cmdpool.queue,
+                        (void *) &task, sizeof(void *)))
+               return NULL;
+
+       sc->SCp.phase = conn->session->age;
+       sc->SCp.ptr = (char *) task;
+
+       atomic_set(&task->refcount, 1);
+       task->state = ISCSI_TASK_PENDING;
+       task->conn = conn;
+       task->sc = sc;
+       task->have_checked_conn = false;
+       task->last_timeout = jiffies;
+       task->last_xfer = jiffies;
+       INIT_LIST_HEAD(&task->running);
+       return task;
+}
+
 enum {
        FAILURE_BAD_HOST = 1,
        FAILURE_SESSION_FAILED,
@@ -1156,6 +1602,7 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
 {
        struct iscsi_cls_session *cls_session;
        struct Scsi_Host *host;
+       struct iscsi_host *ihost;
        int reason = 0;
        struct iscsi_session *session;
        struct iscsi_conn *conn;
@@ -1166,6 +1613,7 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
        sc->SCp.ptr = NULL;
 
        host = sc->device->host;
+       ihost = shost_priv(host);
        spin_unlock(host->host_lock);
 
        cls_session = starget_to_session(scsi_target(sc->device));
@@ -1178,13 +1626,7 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
                goto fault;
        }
 
-       /*
-        * ISCSI_STATE_FAILED is a temp. state. The recovery
-        * code will decide what is best to do with command queued
-        * during this time
-        */
-       if (session->state != ISCSI_STATE_LOGGED_IN &&
-           session->state != ISCSI_STATE_FAILED) {
+       if (session->state != ISCSI_STATE_LOGGED_IN) {
                /*
                 * to handle the race between when we set the recovery state
                 * and block the session we requeue here (commands could
@@ -1192,6 +1634,7 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
                 * up because the block code is not locked)
                 */
                switch (session->state) {
+               case ISCSI_STATE_FAILED:
                case ISCSI_STATE_IN_RECOVERY:
                        reason = FAILURE_SESSION_IN_RECOVERY;
                        sc->result = DID_IMM_RETRY << 16;
@@ -1202,7 +1645,7 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
                        break;
                case ISCSI_STATE_RECOVERY_FAILED:
                        reason = FAILURE_SESSION_RECOVERY_TIMEOUT;
-                       sc->result = DID_NO_CONNECT << 16;
+                       sc->result = DID_TRANSPORT_FAILFAST << 16;
                        break;
                case ISCSI_STATE_TERMINATE:
                        reason = FAILURE_SESSION_TERMINATE;
@@ -1222,56 +1665,66 @@ int iscsi_queuecommand(struct scsi_cmnd *sc, void (*done)(struct scsi_cmnd *))
                goto fault;
        }
 
+       if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) {
+               reason = FAILURE_SESSION_IN_RECOVERY;
+               sc->result = DID_REQUEUE;
+               goto fault;
+       }
+
        if (iscsi_check_cmdsn_window_closed(conn)) {
                reason = FAILURE_WINDOW_CLOSED;
                goto reject;
        }
 
-       if (!__kfifo_get(session->cmdpool.queue, (void*)&task,
-                        sizeof(void*))) {
+       task = iscsi_alloc_task(conn, sc);
+       if (!task) {
                reason = FAILURE_OOM;
                goto reject;
        }
-       sc->SCp.phase = session->age;
-       sc->SCp.ptr = (char *)task;
 
-       atomic_set(&task->refcount, 1);
-       task->state = ISCSI_TASK_PENDING;
-       task->conn = conn;
-       task->sc = sc;
-       INIT_LIST_HEAD(&task->running);
-       list_add_tail(&task->running, &conn->xmitqueue);
-
-       if (session->tt->caps & CAP_DATA_PATH_OFFLOAD) {
-               if (iscsi_prep_scsi_cmd_pdu(task)) {
-                       sc->result = DID_ABORT << 16;
-                       sc->scsi_done = NULL;
-                       iscsi_complete_command(task);
-                       goto fault;
+       if (!ihost->workq) {
+               reason = iscsi_prep_scsi_cmd_pdu(task);
+               if (reason) {
+                       if (reason == -ENOMEM ||  reason == -EACCES) {
+                               reason = FAILURE_OOM;
+                               goto prepd_reject;
+                       } else {
+                               sc->result = DID_ABORT << 16;
+                               goto prepd_fault;
+                       }
                }
                if (session->tt->xmit_task(task)) {
-                       sc->scsi_done = NULL;
-                       iscsi_complete_command(task);
+                       session->cmdsn--;
                        reason = FAILURE_SESSION_NOT_READY;
-                       goto reject;
+                       goto prepd_reject;
                }
-       } else
-               scsi_queue_work(session->host, &conn->xmitwork);
+       } else {
+               list_add_tail(&task->running, &conn->cmdqueue);
+               iscsi_conn_queue_work(conn);
+       }
 
        session->queued_cmdsn++;
        spin_unlock(&session->lock);
        spin_lock(host->host_lock);
        return 0;
 
+prepd_reject:
+       sc->scsi_done = NULL;
+       iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
 reject:
        spin_unlock(&session->lock);
-       debug_scsi("cmd 0x%x rejected (%d)\n", sc->cmnd[0], reason);
+       ISCSI_DBG_SESSION(session, "cmd 0x%x rejected (%d)\n",
+                         sc->cmnd[0], reason);
        spin_lock(host->host_lock);
-       return SCSI_MLQUEUE_HOST_BUSY;
+       return SCSI_MLQUEUE_TARGET_BUSY;
 
+prepd_fault:
+       sc->scsi_done = NULL;
+       iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
 fault:
        spin_unlock(&session->lock);
-       debug_scsi("iscsi: cmd 0x%x is not queued (%d)\n", sc->cmnd[0], reason);
+       ISCSI_DBG_SESSION(session, "iscsi: cmd 0x%x is not queued (%d)\n",
+                         sc->cmnd[0], reason);
        if (!scsi_bidi_cmnd(sc))
                scsi_set_resid(sc, scsi_bufflen(sc));
        else {
@@ -1284,78 +1737,34 @@ fault:
 }
 EXPORT_SYMBOL_GPL(iscsi_queuecommand);
 
-int iscsi_change_queue_depth(struct scsi_device *sdev, int depth)
+int iscsi_change_queue_depth(struct scsi_device *sdev, int depth, int reason)
 {
-       if (depth > ISCSI_MAX_CMD_PER_LUN)
-               depth = ISCSI_MAX_CMD_PER_LUN;
-       scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), depth);
+       switch (reason) {
+       case SCSI_QDEPTH_DEFAULT:
+               scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), depth);
+               break;
+       case SCSI_QDEPTH_QFULL:
+               scsi_track_queue_full(sdev, depth);
+               break;
+       case SCSI_QDEPTH_RAMP_UP:
+               scsi_adjust_queue_depth(sdev, scsi_get_tag_type(sdev), depth);
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
        return sdev->queue_depth;
 }
 EXPORT_SYMBOL_GPL(iscsi_change_queue_depth);
 
-void iscsi_session_recovery_timedout(struct iscsi_cls_session *cls_session)
+int iscsi_target_alloc(struct scsi_target *starget)
 {
+       struct iscsi_cls_session *cls_session = starget_to_session(starget);
        struct iscsi_session *session = cls_session->dd_data;
 
-       spin_lock_bh(&session->lock);
-       if (session->state != ISCSI_STATE_LOGGED_IN) {
-               session->state = ISCSI_STATE_RECOVERY_FAILED;
-               if (session->leadconn)
-                       wake_up(&session->leadconn->ehwait);
-       }
-       spin_unlock_bh(&session->lock);
-}
-EXPORT_SYMBOL_GPL(iscsi_session_recovery_timedout);
-
-int iscsi_eh_host_reset(struct scsi_cmnd *sc)
-{
-       struct iscsi_cls_session *cls_session;
-       struct iscsi_session *session;
-       struct iscsi_conn *conn;
-
-       cls_session = starget_to_session(scsi_target(sc->device));
-       session = cls_session->dd_data;
-       conn = session->leadconn;
-
-       mutex_lock(&session->eh_mutex);
-       spin_lock_bh(&session->lock);
-       if (session->state == ISCSI_STATE_TERMINATE) {
-failed:
-               debug_scsi("failing host reset: session terminated "
-                          "[CID %d age %d]\n", conn->id, session->age);
-               spin_unlock_bh(&session->lock);
-               mutex_unlock(&session->eh_mutex);
-               return FAILED;
-       }
-
-       spin_unlock_bh(&session->lock);
-       mutex_unlock(&session->eh_mutex);
-       /*
-        * we drop the lock here but the leadconn cannot be destoyed while
-        * we are in the scsi eh
-        */
-       iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
-
-       debug_scsi("iscsi_eh_host_reset wait for relogin\n");
-       wait_event_interruptible(conn->ehwait,
-                                session->state == ISCSI_STATE_TERMINATE ||
-                                session->state == ISCSI_STATE_LOGGED_IN ||
-                                session->state == ISCSI_STATE_RECOVERY_FAILED);
-       if (signal_pending(current))
-               flush_signals(current);
-
-       mutex_lock(&session->eh_mutex);
-       spin_lock_bh(&session->lock);
-       if (session->state == ISCSI_STATE_LOGGED_IN)
-               iscsi_session_printk(KERN_INFO, session,
-                                    "host reset succeeded\n");
-       else
-               goto failed;
-       spin_unlock_bh(&session->lock);
-       mutex_unlock(&session->eh_mutex);
-       return SUCCESS;
+       starget->can_queue = session->scsi_cmds_max;
+       return 0;
 }
-EXPORT_SYMBOL_GPL(iscsi_eh_host_reset);
+EXPORT_SYMBOL_GPL(iscsi_target_alloc);
 
 static void iscsi_tmf_timedout(unsigned long data)
 {
@@ -1365,7 +1774,7 @@ static void iscsi_tmf_timedout(unsigned long data)
        spin_lock(&session->lock);
        if (conn->tmf_state == TMF_QUEUED) {
                conn->tmf_state = TMF_TIMEDOUT;
-               debug_scsi("tmf timedout\n");
+               ISCSI_DBG_EH(session, "tmf timedout\n");
                /* unblock eh_abort() */
                wake_up(&conn->ehwait);
        }
@@ -1385,7 +1794,7 @@ static int iscsi_exec_task_mgmt_fn(struct iscsi_conn *conn,
                spin_unlock_bh(&session->lock);
                iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
                spin_lock_bh(&session->lock);
-               debug_scsi("tmf exec failure\n");
+               ISCSI_DBG_EH(session, "tmf exec failure\n");
                return -EPERM;
        }
        conn->tmfcmd_pdus_cnt++;
@@ -1393,7 +1802,7 @@ static int iscsi_exec_task_mgmt_fn(struct iscsi_conn *conn,
        conn->tmf_timer.function = iscsi_tmf_timedout;
        conn->tmf_timer.data = (unsigned long)conn;
        add_timer(&conn->tmf_timer);
-       debug_scsi("tmf set timeout\n");
+       ISCSI_DBG_EH(session, "tmf set timeout\n");
 
        spin_unlock_bh(&session->lock);
        mutex_unlock(&session->eh_mutex);
@@ -1426,82 +1835,142 @@ static int iscsi_exec_task_mgmt_fn(struct iscsi_conn *conn,
  * Fail commands. session lock held and recv side suspended and xmit
  * thread flushed
  */
-static void fail_all_commands(struct iscsi_conn *conn, unsigned lun,
-                             int error)
+static void fail_scsi_tasks(struct iscsi_conn *conn, unsigned lun,
+                           int error)
 {
-       struct iscsi_task *task, *tmp;
+       struct iscsi_task *task;
+       int i;
 
-       if (conn->task && (conn->task->sc->device->lun == lun || lun == -1))
-               conn->task = NULL;
+       for (i = 0; i < conn->session->cmds_max; i++) {
+               task = conn->session->cmds[i];
+               if (!task->sc || task->state == ISCSI_TASK_FREE)
+                       continue;
 
-       /* flush pending */
-       list_for_each_entry_safe(task, tmp, &conn->xmitqueue, running) {
-               if (lun == task->sc->device->lun || lun == -1) {
-                       debug_scsi("failing pending sc %p itt 0x%x\n",
-                                  task->sc, task->itt);
-                       fail_command(conn, task, error << 16);
-               }
-       }
+               if (lun != -1 && lun != task->sc->device->lun)
+                       continue;
 
-       list_for_each_entry_safe(task, tmp, &conn->requeue, running) {
-               if (lun == task->sc->device->lun || lun == -1) {
-                       debug_scsi("failing requeued sc %p itt 0x%x\n",
-                                  task->sc, task->itt);
-                       fail_command(conn, task, error << 16);
-               }
+               ISCSI_DBG_SESSION(conn->session,
+                                 "failing sc %p itt 0x%x state %d\n",
+                                 task->sc, task->itt, task->state);
+               fail_scsi_task(task, error);
        }
+}
 
-       /* fail all other running */
-       list_for_each_entry_safe(task, tmp, &conn->run_list, running) {
-               if (lun == task->sc->device->lun || lun == -1) {
-                       debug_scsi("failing in progress sc %p itt 0x%x\n",
-                                  task->sc, task->itt);
-                       fail_command(conn, task, DID_BUS_BUSY << 16);
-               }
-       }
+/**
+ * iscsi_suspend_queue - suspend iscsi_queuecommand
+ * @conn: iscsi conn to stop queueing IO on
+ *
+ * This grabs the session lock to make sure no one is in
+ * xmit_task/queuecommand, and then sets suspend to prevent
+ * new commands from being queued. This only needs to be called
+ * by offload drivers that need to sync a path like ep disconnect
+ * with the iscsi_queuecommand/xmit_task. To start IO again libiscsi
+ * will call iscsi_start_tx and iscsi_unblock_session when in FFP.
+ */
+void iscsi_suspend_queue(struct iscsi_conn *conn)
+{
+       spin_lock_bh(&conn->session->lock);
+       set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
+       spin_unlock_bh(&conn->session->lock);
 }
+EXPORT_SYMBOL_GPL(iscsi_suspend_queue);
 
+/**
+ * iscsi_suspend_tx - suspend iscsi_data_xmit
+ * @conn: iscsi conn tp stop processing IO on.
+ *
+ * This function sets the suspend bit to prevent iscsi_data_xmit
+ * from sending new IO, and if work is queued on the xmit thread
+ * it will wait for it to be completed.
+ */
 void iscsi_suspend_tx(struct iscsi_conn *conn)
 {
+       struct Scsi_Host *shost = conn->session->host;
+       struct iscsi_host *ihost = shost_priv(shost);
+
        set_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
-       if (!(conn->session->tt->caps & CAP_DATA_PATH_OFFLOAD))
-               scsi_flush_work(conn->session->host);
+       if (ihost->workq)
+               flush_workqueue(ihost->workq);
 }
 EXPORT_SYMBOL_GPL(iscsi_suspend_tx);
 
 static void iscsi_start_tx(struct iscsi_conn *conn)
 {
        clear_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx);
-       if (!(conn->session->tt->caps & CAP_DATA_PATH_OFFLOAD))
-               scsi_queue_work(conn->session->host, &conn->xmitwork);
+       iscsi_conn_queue_work(conn);
 }
 
-static enum scsi_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd)
+/*
+ * We want to make sure a ping is in flight. It has timed out.
+ * And we are not busy processing a pdu that is making
+ * progress but got started before the ping and is taking a while
+ * to complete so the ping is just stuck behind it in a queue.
+ */
+static int iscsi_has_ping_timed_out(struct iscsi_conn *conn)
+{
+       if (conn->ping_task &&
+           time_before_eq(conn->last_recv + (conn->recv_timeout * HZ) +
+                          (conn->ping_timeout * HZ), jiffies))
+               return 1;
+       else
+               return 0;
+}
+
+static enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
 {
+       enum blk_eh_timer_return rc = BLK_EH_NOT_HANDLED;
+       struct iscsi_task *task = NULL, *running_task;
        struct iscsi_cls_session *cls_session;
        struct iscsi_session *session;
        struct iscsi_conn *conn;
-       enum scsi_eh_timer_return rc = EH_NOT_HANDLED;
+       int i;
 
-       cls_session = starget_to_session(scsi_target(scmd->device));
+       cls_session = starget_to_session(scsi_target(sc->device));
        session = cls_session->dd_data;
 
-       debug_scsi("scsi cmd %p timedout\n", scmd);
+       ISCSI_DBG_EH(session, "scsi cmd %p timedout\n", sc);
 
        spin_lock(&session->lock);
        if (session->state != ISCSI_STATE_LOGGED_IN) {
                /*
-                * We are probably in the middle of iscsi recovery so let
-                * that complete and handle the error.
+                * We are probably in the middle of iscsi recovery so let
+                * that complete and handle the error.
+                */
+               rc = BLK_EH_RESET_TIMER;
+               goto done;
+       }
+
+       conn = session->leadconn;
+       if (!conn) {
+               /* In the middle of shuting down */
+               rc = BLK_EH_RESET_TIMER;
+               goto done;
+       }
+
+       task = (struct iscsi_task *)sc->SCp.ptr;
+       if (!task) {
+               /*
+                * Raced with completion. Just reset timer, and let it
+                * complete normally
                 */
-               rc = EH_RESET_TIMER;
+               rc = BLK_EH_RESET_TIMER;
                goto done;
        }
 
-       conn = session->leadconn;
-       if (!conn) {
-               /* In the middle of shuting down */
-               rc = EH_RESET_TIMER;
+       /*
+        * If we have sent (at least queued to the network layer) a pdu or
+        * recvd one for the task since the last timeout ask for
+        * more time. If on the next timeout we have not made progress
+        * we can check if it is the task or connection when we send the
+        * nop as a ping.
+        */
+       if (time_after(task->last_xfer, task->last_timeout)) {
+               ISCSI_DBG_EH(session, "Command making progress. Asking "
+                            "scsi-ml for more time to complete. "
+                            "Last data xfer at %lu. Last timeout was at "
+                            "%lu\n.", task->last_xfer, task->last_timeout);
+               task->have_checked_conn = false;
+               rc = BLK_EH_RESET_TIMER;
                goto done;
        }
 
@@ -1511,22 +1980,73 @@ static enum scsi_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *scmd)
         * if the ping timedout then we are in the middle of cleaning up
         * and can let the iscsi eh handle it
         */
-       if (time_before_eq(conn->last_recv + (conn->recv_timeout * HZ) +
-                           (conn->ping_timeout * HZ), jiffies))
-               rc = EH_RESET_TIMER;
+       if (iscsi_has_ping_timed_out(conn)) {
+               rc = BLK_EH_RESET_TIMER;
+               goto done;
+       }
+
+       for (i = 0; i < conn->session->cmds_max; i++) {
+               running_task = conn->session->cmds[i];
+               if (!running_task->sc || running_task == task ||
+                    running_task->state != ISCSI_TASK_RUNNING)
+                       continue;
+
+               /*
+                * Only check if cmds started before this one have made
+                * progress, or this could never fail
+                */
+               if (time_after(running_task->sc->jiffies_at_alloc,
+                              task->sc->jiffies_at_alloc))
+                       continue;
+
+               if (time_after(running_task->last_xfer, task->last_timeout)) {
+                       /*
+                        * This task has not made progress, but a task
+                        * started before us has transferred data since
+                        * we started/last-checked. We could be queueing
+                        * too many tasks or the LU is bad.
+                        *
+                        * If the device is bad the cmds ahead of us on
+                        * other devs will complete, and this loop will
+                        * eventually fail starting the scsi eh.
+                        */
+                       ISCSI_DBG_EH(session, "Command has not made progress "
+                                    "but commands ahead of it have. "
+                                    "Asking scsi-ml for more time to "
+                                    "complete. Our last xfer vs running task "
+                                    "last xfer %lu/%lu. Last check %lu.\n",
+                                    task->last_xfer, running_task->last_xfer,
+                                    task->last_timeout);
+                       rc = BLK_EH_RESET_TIMER;
+                       goto done;
+               }
+       }
+
+       /* Assumes nop timeout is shorter than scsi cmd timeout */
+       if (task->have_checked_conn)
+               goto done;
+
        /*
-        * if we are about to check the transport then give the command
-        * more time
+        * Checking the transport already or nop from a cmd timeout still
+        * running
         */
-       if (time_before_eq(conn->last_recv + (conn->recv_timeout * HZ),
-                          jiffies))
-               rc = EH_RESET_TIMER;
-       /* if in the middle of checking the transport then give us more time */
-       if (conn->ping_task)
-               rc = EH_RESET_TIMER;
+       if (conn->ping_task) {
+               task->have_checked_conn = true;
+               rc = BLK_EH_RESET_TIMER;
+               goto done;
+       }
+
+       /* Make sure there is a transport check done */
+       iscsi_send_nopout(conn, NULL);
+       task->have_checked_conn = true;
+       rc = BLK_EH_RESET_TIMER;
+
 done:
+       if (task)
+               task->last_timeout = jiffies;
        spin_unlock(&session->lock);
-       debug_scsi("return %s\n", rc == EH_RESET_TIMER ? "timer reset" : "nh");
+       ISCSI_DBG_EH(session, "return %s\n", rc == BLK_EH_RESET_TIMER ?
+                    "timer reset" : "nh");
        return rc;
 }
 
@@ -1546,13 +2066,13 @@ static void iscsi_check_transport_timeouts(unsigned long data)
 
        recv_timeout *= HZ;
        last_recv = conn->last_recv;
-       if (conn->ping_task &&
-           time_before_eq(conn->last_ping + (conn->ping_timeout * HZ),
-                          jiffies)) {
+
+       if (iscsi_has_ping_timed_out(conn)) {
                iscsi_conn_printk(KERN_ERR, conn, "ping timeout of %d secs "
-                                 "expired, last rx %lu, last ping %lu, "
-                                 "now %lu\n", conn->ping_timeout, last_recv,
-                                 conn->last_ping, jiffies);
+                                 "expired, recv timeout %d, last rx %lu, "
+                                 "last ping %lu, now %lu\n",
+                                 conn->ping_timeout, conn->recv_timeout,
+                                 last_recv, conn->last_ping, jiffies);
                spin_unlock(&session->lock);
                iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
                return;
@@ -1560,13 +2080,13 @@ static void iscsi_check_transport_timeouts(unsigned long data)
 
        if (time_before_eq(last_recv + recv_timeout, jiffies)) {
                /* send a ping to try to provoke some traffic */
-               debug_scsi("Sending nopout as ping on conn %p\n", conn);
+               ISCSI_DBG_CONN(conn, "Sending nopout as ping\n");
                iscsi_send_nopout(conn, NULL);
                next_timeout = conn->last_ping + (conn->ping_timeout * HZ);
        } else
                next_timeout = last_recv + recv_timeout;
 
-       debug_scsi("Setting next tmo %lu\n", next_timeout);
+       ISCSI_DBG_CONN(conn, "Setting next tmo %lu\n", next_timeout);
        mod_timer(&conn->transport_timer, next_timeout);
 done:
        spin_unlock(&session->lock);
@@ -1579,9 +2099,9 @@ static void iscsi_prep_abort_task_pdu(struct iscsi_task *task,
        hdr->opcode = ISCSI_OP_SCSI_TMFUNC | ISCSI_OP_IMMEDIATE;
        hdr->flags = ISCSI_TM_FUNC_ABORT_TASK & ISCSI_FLAG_TM_FUNC_MASK;
        hdr->flags |= ISCSI_FLAG_CMD_FINAL;
-       memcpy(hdr->lun, task->hdr->lun, sizeof(hdr->lun));
-       hdr->rtt = task->hdr->itt;
-       hdr->refcmdsn = task->hdr->cmdsn;
+       memcpy(hdr->lun, task->lun, sizeof(hdr->lun));
+       hdr->rtt = task->hdr_itt;
+       hdr->refcmdsn = task->cmdsn;
 }
 
 int iscsi_eh_abort(struct scsi_cmnd *sc)
@@ -1596,6 +2116,8 @@ int iscsi_eh_abort(struct scsi_cmnd *sc)
        cls_session = starget_to_session(scsi_target(sc->device));
        session = cls_session->dd_data;
 
+       ISCSI_DBG_EH(session, "aborting sc %p\n", sc);
+
        mutex_lock(&session->eh_mutex);
        spin_lock_bh(&session->lock);
        /*
@@ -1603,7 +2125,8 @@ int iscsi_eh_abort(struct scsi_cmnd *sc)
         * got the command.
         */
        if (!sc->SCp.ptr) {
-               debug_scsi("sc never reached iscsi layer or it completed.\n");
+               ISCSI_DBG_EH(session, "sc never reached iscsi layer or "
+                                     "it completed.\n");
                spin_unlock_bh(&session->lock);
                mutex_unlock(&session->eh_mutex);
                return SUCCESS;
@@ -1617,6 +2140,8 @@ int iscsi_eh_abort(struct scsi_cmnd *sc)
            sc->SCp.phase != session->age) {
                spin_unlock_bh(&session->lock);
                mutex_unlock(&session->eh_mutex);
+               ISCSI_DBG_EH(session, "failing abort due to dropped "
+                                 "session.\n");
                return FAILED;
        }
 
@@ -1625,16 +2150,17 @@ int iscsi_eh_abort(struct scsi_cmnd *sc)
        age = session->age;
 
        task = (struct iscsi_task *)sc->SCp.ptr;
-       debug_scsi("aborting [sc %p itt 0x%x]\n", sc, task->itt);
+       ISCSI_DBG_EH(session, "aborting [sc %p itt 0x%x]\n",
+                    sc, task->itt);
 
        /* task completed before time out */
        if (!task->sc) {
-               debug_scsi("sc completed while abort in progress\n");
+               ISCSI_DBG_EH(session, "sc completed while abort in progress\n");
                goto success;
        }
 
        if (task->state == ISCSI_TASK_PENDING) {
-               fail_command(conn, task, DID_ABORT << 16);
+               fail_scsi_task(task, DID_ABORT);
                goto success;
        }
 
@@ -1664,10 +2190,11 @@ int iscsi_eh_abort(struct scsi_cmnd *sc)
                 * good and have never sent us a successful tmf response
                 * then sent more data for the cmd.
                 */
-               spin_lock(&session->lock);
-               fail_command(conn, task, DID_ABORT << 16);
+               spin_lock_bh(&session->lock);
+               fail_scsi_task(task, DID_ABORT);
                conn->tmf_state = TMF_INITIAL;
-               spin_unlock(&session->lock);
+               memset(hdr, 0, sizeof(*hdr));
+               spin_unlock_bh(&session->lock);
                iscsi_start_tx(conn);
                goto success_unlocked;
        case TMF_TIMEDOUT:
@@ -1677,8 +2204,10 @@ int iscsi_eh_abort(struct scsi_cmnd *sc)
        case TMF_NOT_FOUND:
                if (!sc->SCp.ptr) {
                        conn->tmf_state = TMF_INITIAL;
+                       memset(hdr, 0, sizeof(*hdr));
                        /* task completed before tmf abort response */
-                       debug_scsi("sc completed while abort in progress\n");
+                       ISCSI_DBG_EH(session, "sc completed while abort in "
+                                             "progress\n");
                        goto success;
                }
                /* fall through */
@@ -1690,15 +2219,16 @@ int iscsi_eh_abort(struct scsi_cmnd *sc)
 success:
        spin_unlock_bh(&session->lock);
 success_unlocked:
-       debug_scsi("abort success [sc %lx itt 0x%x]\n", (long)sc, task->itt);
+       ISCSI_DBG_EH(session, "abort success [sc %p itt 0x%x]\n",
+                    sc, task->itt);
        mutex_unlock(&session->eh_mutex);
        return SUCCESS;
 
 failed:
        spin_unlock_bh(&session->lock);
 failed_unlocked:
-       debug_scsi("abort failed [sc %p itt 0x%x]\n", sc,
-                   task ? task->itt : 0);
+       ISCSI_DBG_EH(session, "abort failed [sc %p itt 0x%x]\n", sc,
+                    task ? task->itt : 0);
        mutex_unlock(&session->eh_mutex);
        return FAILED;
 }
@@ -1725,7 +2255,7 @@ int iscsi_eh_device_reset(struct scsi_cmnd *sc)
        cls_session = starget_to_session(scsi_target(sc->device));
        session = cls_session->dd_data;
 
-       debug_scsi("LU Reset [sc %p lun %u]\n", sc, sc->device->lun);
+       ISCSI_DBG_EH(session, "LU Reset [sc %p lun %u]\n", sc, sc->device->lun);
 
        mutex_lock(&session->eh_mutex);
        spin_lock_bh(&session->lock);
@@ -1768,10 +2298,11 @@ int iscsi_eh_device_reset(struct scsi_cmnd *sc)
 
        iscsi_suspend_tx(conn);
 
-       spin_lock(&session->lock);
-       fail_all_commands(conn, sc->device->lun, DID_ERROR);
+       spin_lock_bh(&session->lock);
+       memset(hdr, 0, sizeof(*hdr));
+       fail_scsi_tasks(conn, sc->device->lun, DID_ERROR);
        conn->tmf_state = TMF_INITIAL;
-       spin_unlock(&session->lock);
+       spin_unlock_bh(&session->lock);
 
        iscsi_start_tx(conn);
        goto done;
@@ -1779,13 +2310,179 @@ int iscsi_eh_device_reset(struct scsi_cmnd *sc)
 unlock:
        spin_unlock_bh(&session->lock);
 done:
-       debug_scsi("iscsi_eh_device_reset %s\n",
-                 rc == SUCCESS ? "SUCCESS" : "FAILED");
+       ISCSI_DBG_EH(session, "dev reset result = %s\n",
+                    rc == SUCCESS ? "SUCCESS" : "FAILED");
        mutex_unlock(&session->eh_mutex);
        return rc;
 }
 EXPORT_SYMBOL_GPL(iscsi_eh_device_reset);
 
+void iscsi_session_recovery_timedout(struct iscsi_cls_session *cls_session)
+{
+       struct iscsi_session *session = cls_session->dd_data;
+
+       spin_lock_bh(&session->lock);
+       if (session->state != ISCSI_STATE_LOGGED_IN) {
+               session->state = ISCSI_STATE_RECOVERY_FAILED;
+               if (session->leadconn)
+                       wake_up(&session->leadconn->ehwait);
+       }
+       spin_unlock_bh(&session->lock);
+}
+EXPORT_SYMBOL_GPL(iscsi_session_recovery_timedout);
+
+/**
+ * iscsi_eh_session_reset - drop session and attempt relogin
+ * @sc: scsi command
+ *
+ * This function will wait for a relogin, session termination from
+ * userspace, or a recovery/replacement timeout.
+ */
+static int iscsi_eh_session_reset(struct scsi_cmnd *sc)
+{
+       struct iscsi_cls_session *cls_session;
+       struct iscsi_session *session;
+       struct iscsi_conn *conn;
+
+       cls_session = starget_to_session(scsi_target(sc->device));
+       session = cls_session->dd_data;
+       conn = session->leadconn;
+
+       mutex_lock(&session->eh_mutex);
+       spin_lock_bh(&session->lock);
+       if (session->state == ISCSI_STATE_TERMINATE) {
+failed:
+               ISCSI_DBG_EH(session,
+                            "failing session reset: Could not log back into "
+                            "%s, %s [age %d]\n", session->targetname,
+                            conn->persistent_address, session->age);
+               spin_unlock_bh(&session->lock);
+               mutex_unlock(&session->eh_mutex);
+               return FAILED;
+       }
+
+       spin_unlock_bh(&session->lock);
+       mutex_unlock(&session->eh_mutex);
+       /*
+        * we drop the lock here but the leadconn cannot be destoyed while
+        * we are in the scsi eh
+        */
+       iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
+
+       ISCSI_DBG_EH(session, "wait for relogin\n");
+       wait_event_interruptible(conn->ehwait,
+                                session->state == ISCSI_STATE_TERMINATE ||
+                                session->state == ISCSI_STATE_LOGGED_IN ||
+                                session->state == ISCSI_STATE_RECOVERY_FAILED);
+       if (signal_pending(current))
+               flush_signals(current);
+
+       mutex_lock(&session->eh_mutex);
+       spin_lock_bh(&session->lock);
+       if (session->state == ISCSI_STATE_LOGGED_IN) {
+               ISCSI_DBG_EH(session,
+                            "session reset succeeded for %s,%s\n",
+                            session->targetname, conn->persistent_address);
+       } else
+               goto failed;
+       spin_unlock_bh(&session->lock);
+       mutex_unlock(&session->eh_mutex);
+       return SUCCESS;
+}
+
+static void iscsi_prep_tgt_reset_pdu(struct scsi_cmnd *sc, struct iscsi_tm *hdr)
+{
+       memset(hdr, 0, sizeof(*hdr));
+       hdr->opcode = ISCSI_OP_SCSI_TMFUNC | ISCSI_OP_IMMEDIATE;
+       hdr->flags = ISCSI_TM_FUNC_TARGET_WARM_RESET & ISCSI_FLAG_TM_FUNC_MASK;
+       hdr->flags |= ISCSI_FLAG_CMD_FINAL;
+       hdr->rtt = RESERVED_ITT;
+}
+
+/**
+ * iscsi_eh_target_reset - reset target
+ * @sc: scsi command
+ *
+ * This will attempt to send a warm target reset. If that fails
+ * then we will drop the session and attempt ERL0 recovery.
+ */
+int iscsi_eh_target_reset(struct scsi_cmnd *sc)
+{
+       struct iscsi_cls_session *cls_session;
+       struct iscsi_session *session;
+       struct iscsi_conn *conn;
+       struct iscsi_tm *hdr;
+       int rc = FAILED;
+
+       cls_session = starget_to_session(scsi_target(sc->device));
+       session = cls_session->dd_data;
+
+       ISCSI_DBG_EH(session, "tgt Reset [sc %p tgt %s]\n", sc,
+                    session->targetname);
+
+       mutex_lock(&session->eh_mutex);
+       spin_lock_bh(&session->lock);
+       /*
+        * Just check if we are not logged in. We cannot check for
+        * the phase because the reset could come from a ioctl.
+        */
+       if (!session->leadconn || session->state != ISCSI_STATE_LOGGED_IN)
+               goto unlock;
+       conn = session->leadconn;
+
+       /* only have one tmf outstanding at a time */
+       if (conn->tmf_state != TMF_INITIAL)
+               goto unlock;
+       conn->tmf_state = TMF_QUEUED;
+
+       hdr = &conn->tmhdr;
+       iscsi_prep_tgt_reset_pdu(sc, hdr);
+
+       if (iscsi_exec_task_mgmt_fn(conn, hdr, session->age,
+                                   session->tgt_reset_timeout)) {
+               rc = FAILED;
+               goto unlock;
+       }
+
+       switch (conn->tmf_state) {
+       case TMF_SUCCESS:
+               break;
+       case TMF_TIMEDOUT:
+               spin_unlock_bh(&session->lock);
+               iscsi_conn_failure(conn, ISCSI_ERR_CONN_FAILED);
+               goto done;
+       default:
+               conn->tmf_state = TMF_INITIAL;
+               goto unlock;
+       }
+
+       rc = SUCCESS;
+       spin_unlock_bh(&session->lock);
+
+       iscsi_suspend_tx(conn);
+
+       spin_lock_bh(&session->lock);
+       memset(hdr, 0, sizeof(*hdr));
+       fail_scsi_tasks(conn, -1, DID_ERROR);
+       conn->tmf_state = TMF_INITIAL;
+       spin_unlock_bh(&session->lock);
+
+       iscsi_start_tx(conn);
+       goto done;
+
+unlock:
+       spin_unlock_bh(&session->lock);
+done:
+       ISCSI_DBG_EH(session, "tgt %s reset result = %s\n", session->targetname,
+                    rc == SUCCESS ? "SUCCESS" : "FAILED");
+       mutex_unlock(&session->eh_mutex);
+
+       if (rc == FAILED)
+               rc = iscsi_eh_session_reset(sc);
+       return rc;
+}
+EXPORT_SYMBOL_GPL(iscsi_eh_target_reset);
+
 /*
  * Pre-allocate a pool of @max items of @item_size. By default, the pool
  * should be accessed via kfifo_{get,put} on q->queue.
@@ -1807,12 +2504,9 @@ iscsi_pool_init(struct iscsi_pool *q, int max, void ***items, int item_size)
                num_arrays++;
        q->pool = kzalloc(num_arrays * max * sizeof(void*), GFP_KERNEL);
        if (q->pool == NULL)
-               goto enomem;
+               return -ENOMEM;
 
-       q->queue = kfifo_init((void*)q->pool, max * sizeof(void*),
-                             GFP_KERNEL, NULL);
-       if (q->queue == ERR_PTR(-ENOMEM))
-               goto enomem;
+       kfifo_init(&q->queue, (void*)q->pool, max * sizeof(void*));
 
        for (i = 0; i < max; i++) {
                q->pool[i] = kzalloc(item_size, GFP_KERNEL);
@@ -1820,7 +2514,7 @@ iscsi_pool_init(struct iscsi_pool *q, int max, void ***items, int item_size)
                        q->max = i;
                        goto enomem;
                }
-               __kfifo_put(q->queue, (void*)&q->pool[i], sizeof(void*));
+               kfifo_in(&q->queue, (void*)&q->pool[i], sizeof(void*));
        }
 
        if (items) {
@@ -1842,8 +2536,7 @@ void iscsi_pool_free(struct iscsi_pool *q)
 
        for (i = 0; i < q->max; i++)
                kfree(q->pool[i]);
-       if (q->pool)
-               kfree(q->pool);
+       kfree(q->pool);
 }
 EXPORT_SYMBOL_GPL(iscsi_pool_free);
 
@@ -1857,6 +2550,14 @@ EXPORT_SYMBOL_GPL(iscsi_pool_free);
  */
 int iscsi_host_add(struct Scsi_Host *shost, struct device *pdev)
 {
+       if (!shost->can_queue)
+               shost->can_queue = ISCSI_DEF_XMIT_CMDS_MAX;
+
+       if (!shost->cmd_per_lun)
+               shost->cmd_per_lun = ISCSI_DEF_CMD_PER_LUN;
+
+       if (!shost->transportt->eh_timed_out)
+               shost->transportt->eh_timed_out = iscsi_eh_cmd_timed_out;
        return scsi_add_host(shost, pdev);
 }
 EXPORT_SYMBOL_GPL(iscsi_host_add);
@@ -1865,46 +2566,72 @@ EXPORT_SYMBOL_GPL(iscsi_host_add);
  * iscsi_host_alloc - allocate a host and driver data
  * @sht: scsi host template
  * @dd_data_size: driver host data size
- * @qdepth: default device queue depth
+ * @xmit_can_sleep: bool indicating if LLD will queue IO from a work queue
  *
  * This should be called by partial offload and software iscsi drivers.
  * To access the driver specific memory use the iscsi_host_priv() macro.
  */
 struct Scsi_Host *iscsi_host_alloc(struct scsi_host_template *sht,
-                                  int dd_data_size, uint16_t qdepth)
+                                  int dd_data_size, bool xmit_can_sleep)
 {
        struct Scsi_Host *shost;
+       struct iscsi_host *ihost;
 
        shost = scsi_host_alloc(sht, sizeof(struct iscsi_host) + dd_data_size);
        if (!shost)
                return NULL;
-       shost->transportt->eh_timed_out = iscsi_eh_cmd_timed_out;
+       ihost = shost_priv(shost);
 
-       if (qdepth > ISCSI_MAX_CMD_PER_LUN || qdepth < 1) {
-               if (qdepth != 0)
-                       printk(KERN_ERR "iscsi: invalid queue depth of %d. "
-                              "Queue depth must be between 1 and %d.\n",
-                              qdepth, ISCSI_MAX_CMD_PER_LUN);
-               qdepth = ISCSI_DEF_CMD_PER_LUN;
+       if (xmit_can_sleep) {
+               snprintf(ihost->workq_name, sizeof(ihost->workq_name),
+                       "iscsi_q_%d", shost->host_no);
+               ihost->workq = create_singlethread_workqueue(ihost->workq_name);
+               if (!ihost->workq)
+                       goto free_host;
        }
-       shost->cmd_per_lun = qdepth;
+
+       spin_lock_init(&ihost->lock);
+       ihost->state = ISCSI_HOST_SETUP;
+       ihost->num_sessions = 0;
+       init_waitqueue_head(&ihost->session_removal_wq);
        return shost;
+
+free_host:
+       scsi_host_put(shost);
+       return NULL;
 }
 EXPORT_SYMBOL_GPL(iscsi_host_alloc);
 
+static void iscsi_notify_host_removed(struct iscsi_cls_session *cls_session)
+{
+       iscsi_session_failure(cls_session->dd_data, ISCSI_ERR_INVALID_HOST);
+}
+
 /**
  * iscsi_host_remove - remove host and sessions
  * @shost: scsi host
  *
- * This will also remove any sessions attached to the host, but if userspace
- * is managing the session at the same time this will break. TODO: add
- * refcounting to the netlink iscsi interface so a rmmod or host hot unplug
- * does not remove the memory from under us.
+ * If there are any sessions left, this will initiate the removal and wait
+ * for the completion.
  */
 void iscsi_host_remove(struct Scsi_Host *shost)
 {
-       iscsi_host_for_each_session(shost, iscsi_session_teardown);
+       struct iscsi_host *ihost = shost_priv(shost);
+       unsigned long flags;
+
+       spin_lock_irqsave(&ihost->lock, flags);
+       ihost->state = ISCSI_HOST_REMOVED;
+       spin_unlock_irqrestore(&ihost->lock, flags);
+
+       iscsi_host_for_each_session(shost, iscsi_notify_host_removed);
+       wait_event_interruptible(ihost->session_removal_wq,
+                                ihost->num_sessions == 0);
+       if (signal_pending(current))
+               flush_signals(current);
+
        scsi_remove_host(shost);
+       if (ihost->workq)
+               destroy_workqueue(ihost->workq);
 }
 EXPORT_SYMBOL_GPL(iscsi_host_remove);
 
@@ -1919,6 +2646,27 @@ void iscsi_host_free(struct Scsi_Host *shost)
 }
 EXPORT_SYMBOL_GPL(iscsi_host_free);
 
+static void iscsi_host_dec_session_cnt(struct Scsi_Host *shost)
+{
+       struct iscsi_host *ihost = shost_priv(shost);
+       unsigned long flags;
+
+       shost = scsi_host_get(shost);
+       if (!shost) {
+               printk(KERN_ERR "Invalid state. Cannot notify host removal "
+                     "of session teardown event because host already "
+                     "removed.\n");
+               return;
+       }
+
+       spin_lock_irqsave(&ihost->lock, flags);
+       ihost->num_sessions--;
+       if (ihost->num_sessions == 0)
+               wake_up(&ihost->session_removal_wq);
+       spin_unlock_irqrestore(&ihost->lock, flags);
+       scsi_host_put(shost);
+}
+
 /**
  * iscsi_session_setup - create iscsi cls session and host and session
  * @iscsit: iscsi transport template
@@ -1936,12 +2684,25 @@ EXPORT_SYMBOL_GPL(iscsi_host_free);
  */
 struct iscsi_cls_session *
 iscsi_session_setup(struct iscsi_transport *iscsit, struct Scsi_Host *shost,
-                   uint16_t cmds_max, int cmd_task_size,
+                   uint16_t cmds_max, int dd_size, int cmd_task_size,
                    uint32_t initial_cmdsn, unsigned int id)
 {
+       struct iscsi_host *ihost = shost_priv(shost);
        struct iscsi_session *session;
        struct iscsi_cls_session *cls_session;
        int cmd_i, scsi_cmds, total_cmds = cmds_max;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ihost->lock, flags);
+       if (ihost->state == ISCSI_HOST_REMOVED) {
+               spin_unlock_irqrestore(&ihost->lock, flags);
+               return NULL;
+       }
+       ihost->num_sessions++;
+       spin_unlock_irqrestore(&ihost->lock, flags);
+
+       if (!total_cmds)
+               total_cmds = ISCSI_DEF_XMIT_CMDS_MAX;
        /*
         * The iscsi layer needs some tasks for nop handling and tmfs,
         * so the cmds_max must at least be greater than ISCSI_MGMT_CMDS_MAX
@@ -1951,7 +2712,7 @@ iscsi_session_setup(struct iscsi_transport *iscsit, struct Scsi_Host *shost,
                printk(KERN_ERR "iscsi: invalid can_queue of %d. can_queue "
                       "must be a power of two that is at least %d.\n",
                       total_cmds, ISCSI_TOTAL_CMDS_MIN);
-               return NULL;
+               goto dec_session_count;
        }
 
        if (total_cmds > ISCSI_TOTAL_CMDS_MAX) {
@@ -1973,14 +2734,16 @@ iscsi_session_setup(struct iscsi_transport *iscsit, struct Scsi_Host *shost,
        scsi_cmds = total_cmds - ISCSI_MGMT_CMDS_MAX;
 
        cls_session = iscsi_alloc_session(shost, iscsit,
-                                         sizeof(struct iscsi_session));
+                                         sizeof(struct iscsi_session) +
+                                         dd_size);
        if (!cls_session)
-               return NULL;
+               goto dec_session_count;
        session = cls_session->dd_data;
        session->cls_session = cls_session;
        session->host = shost;
        session->state = ISCSI_STATE_FREE;
        session->fast_abort = 1;
+       session->tgt_reset_timeout = 30;
        session->lu_reset_timeout = 15;
        session->abort_timeout = 10;
        session->scsi_cmds_max = scsi_cmds;
@@ -1990,6 +2753,7 @@ iscsi_session_setup(struct iscsi_transport *iscsit, struct Scsi_Host *shost,
        session->max_cmdsn = initial_cmdsn + 1;
        session->max_r2t = 1;
        session->tt = iscsit;
+       session->dd_data = cls_session->dd_data + sizeof(*session);
        mutex_init(&session->eh_mutex);
        spin_lock_init(&session->lock);
 
@@ -2006,6 +2770,7 @@ iscsi_session_setup(struct iscsi_transport *iscsit, struct Scsi_Host *shost,
                if (cmd_task_size)
                        task->dd_data = &task[1];
                task->itt = cmd_i;
+               task->state = ISCSI_TASK_FREE;
                INIT_LIST_HEAD(&task->running);
        }
 
@@ -2014,6 +2779,7 @@ iscsi_session_setup(struct iscsi_transport *iscsit, struct Scsi_Host *shost,
 
        if (iscsi_add_session(cls_session, id))
                goto cls_session_fail;
+
        return cls_session;
 
 cls_session_fail:
@@ -2022,6 +2788,8 @@ module_get_fail:
        iscsi_pool_free(&session->cmdpool);
 cmdpool_alloc_fail:
        iscsi_free_session(cls_session);
+dec_session_count:
+       iscsi_host_dec_session_cnt(shost);
        return NULL;
 }
 EXPORT_SYMBOL_GPL(iscsi_session_setup);
@@ -2037,6 +2805,7 @@ void iscsi_session_teardown(struct iscsi_cls_session *cls_session)
 {
        struct iscsi_session *session = cls_session->dd_data;
        struct module *owner = cls_session->transport->owner;
+       struct Scsi_Host *shost = session->host;
 
        iscsi_pool_free(&session->cmdpool);
 
@@ -2049,6 +2818,7 @@ void iscsi_session_teardown(struct iscsi_cls_session *cls_session)
        kfree(session->ifacename);
 
        iscsi_destroy_session(cls_session);
+       iscsi_host_dec_session_cnt(shost);
        module_put(owner);
 }
 EXPORT_SYMBOL_GPL(iscsi_session_teardown);
@@ -2087,16 +2857,14 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
        conn->transport_timer.data = (unsigned long)conn;
        conn->transport_timer.function = iscsi_check_transport_timeouts;
 
-       INIT_LIST_HEAD(&conn->run_list);
-       INIT_LIST_HEAD(&conn->mgmt_run_list);
        INIT_LIST_HEAD(&conn->mgmtqueue);
-       INIT_LIST_HEAD(&conn->xmitqueue);
+       INIT_LIST_HEAD(&conn->cmdqueue);
        INIT_LIST_HEAD(&conn->requeue);
        INIT_WORK(&conn->xmitwork, iscsi_xmitworker);
 
        /* allocate login_task used for the login/text sequences */
        spin_lock_bh(&session->lock);
-       if (!__kfifo_get(session->cmdpool.queue,
+       if (!kfifo_out(&session->cmdpool.queue,
                          (void*)&conn->login_task,
                         sizeof(void*))) {
                spin_unlock_bh(&session->lock);
@@ -2104,7 +2872,8 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
        }
        spin_unlock_bh(&session->lock);
 
-       data = kmalloc(ISCSI_DEF_MAX_RECV_SEG_LEN, GFP_KERNEL);
+       data = (char *) __get_free_pages(GFP_KERNEL,
+                                        get_order(ISCSI_DEF_MAX_RECV_SEG_LEN));
        if (!data)
                goto login_task_data_alloc_fail;
        conn->login_task->data = conn->data = data;
@@ -2115,7 +2884,7 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
        return cls_conn;
 
 login_task_data_alloc_fail:
-       __kfifo_put(session->cmdpool.queue, (void*)&conn->login_task,
+       kfifo_in(&session->cmdpool.queue, (void*)&conn->login_task,
                    sizeof(void*));
 login_task_alloc_fail:
        iscsi_destroy_conn(cls_conn);
@@ -2175,9 +2944,10 @@ void iscsi_conn_teardown(struct iscsi_cls_conn *cls_conn)
        iscsi_suspend_tx(conn);
 
        spin_lock_bh(&session->lock);
-       kfree(conn->data);
+       free_pages((unsigned long) conn->data,
+                  get_order(ISCSI_DEF_MAX_RECV_SEG_LEN));
        kfree(conn->persistent_address);
-       __kfifo_put(session->cmdpool.queue, (void*)&conn->login_task,
+       kfifo_in(&session->cmdpool.queue, (void*)&conn->login_task,
                    sizeof(void*));
        if (session->leadconn == conn)
                session->leadconn = NULL;
@@ -2256,25 +3026,28 @@ int iscsi_conn_start(struct iscsi_cls_conn *cls_conn)
 EXPORT_SYMBOL_GPL(iscsi_conn_start);
 
 static void
-flush_control_queues(struct iscsi_session *session, struct iscsi_conn *conn)
+fail_mgmt_tasks(struct iscsi_session *session, struct iscsi_conn *conn)
 {
-       struct iscsi_task *task, *tmp;
+       struct iscsi_task *task;
+       int i, state;
 
-       /* handle pending */
-       list_for_each_entry_safe(task, tmp, &conn->mgmtqueue, running) {
-               debug_scsi("flushing pending mgmt task itt 0x%x\n", task->itt);
-               /* release ref from prep task */
-               __iscsi_put_task(task);
-       }
+       for (i = 0; i < conn->session->cmds_max; i++) {
+               task = conn->session->cmds[i];
+               if (task->sc)
+                       continue;
 
-       /* handle running */
-       list_for_each_entry_safe(task, tmp, &conn->mgmt_run_list, running) {
-               debug_scsi("flushing running mgmt task itt 0x%x\n", task->itt);
-               /* release ref from prep task */
-               __iscsi_put_task(task);
-       }
+               if (task->state == ISCSI_TASK_FREE)
+                       continue;
+
+               ISCSI_DBG_SESSION(conn->session,
+                                 "failing mgmt itt 0x%x state %d\n",
+                                 task->itt, task->state);
+               state = ISCSI_TASK_ABRT_SESS_RECOV;
+               if (task->state == ISCSI_TASK_PENDING)
+                       state = ISCSI_TASK_COMPLETED;
+               iscsi_complete_task(task, state);
 
-       conn->task = NULL;
+       }
 }
 
 static void iscsi_start_session_recovery(struct iscsi_session *session,
@@ -2282,8 +3055,6 @@ static void iscsi_start_session_recovery(struct iscsi_session *session,
 {
        int old_stop_stage;
 
-       del_timer_sync(&conn->transport_timer);
-
        mutex_lock(&session->eh_mutex);
        spin_lock_bh(&session->lock);
        if (conn->stop_stage == STOP_CONN_TERM) {
@@ -2301,13 +3072,17 @@ static void iscsi_start_session_recovery(struct iscsi_session *session,
                session->state = ISCSI_STATE_TERMINATE;
        else if (conn->stop_stage != STOP_CONN_RECOVER)
                session->state = ISCSI_STATE_IN_RECOVERY;
+       spin_unlock_bh(&session->lock);
 
+       del_timer_sync(&conn->transport_timer);
+       iscsi_suspend_tx(conn);
+
+       spin_lock_bh(&session->lock);
        old_stop_stage = conn->stop_stage;
        conn->stop_stage = flag;
        conn->c_stage = ISCSI_CONN_STOPPED;
        spin_unlock_bh(&session->lock);
 
-       iscsi_suspend_tx(conn);
        /*
         * for connection level recovery we should not calculate
         * header digest. conn->hdr_size used for optimization
@@ -2319,7 +3094,7 @@ static void iscsi_start_session_recovery(struct iscsi_session *session,
                conn->datadgst_en = 0;
                if (session->state == ISCSI_STATE_IN_RECOVERY &&
                    old_stop_stage != STOP_CONN_RECOVER) {
-                       debug_scsi("blocking session\n");
+                       ISCSI_DBG_SESSION(session, "blocking session\n");
                        iscsi_block_session(session->cls_session);
                }
        }
@@ -2328,9 +3103,9 @@ static void iscsi_start_session_recovery(struct iscsi_session *session,
         * flush queues.
         */
        spin_lock_bh(&session->lock);
-       fail_all_commands(conn, -1,
-                       STOP_CONN_RECOVER ? DID_BUS_BUSY : DID_ERROR);
-       flush_control_queues(session, conn);
+       fail_scsi_tasks(conn, -1, DID_TRANSPORT_DISRUPTED);
+       fail_mgmt_tasks(session, conn);
+       memset(&conn->tmhdr, 0, sizeof(conn->tmhdr));
        spin_unlock_bh(&session->lock);
        mutex_unlock(&session->eh_mutex);
 }
@@ -2372,6 +3147,23 @@ int iscsi_conn_bind(struct iscsi_cls_session *cls_session,
 }
 EXPORT_SYMBOL_GPL(iscsi_conn_bind);
 
+static int iscsi_switch_str_param(char **param, char *new_val_buf)
+{
+       char *new_val;
+
+       if (*param) {
+               if (!strcmp(*param, new_val_buf))
+                       return 0;
+       }
+
+       new_val = kstrdup(new_val_buf, GFP_NOIO);
+       if (!new_val)
+               return -ENOMEM;
+
+       kfree(*param);
+       *param = new_val;
+       return 0;
+}
 
 int iscsi_set_param(struct iscsi_cls_conn *cls_conn,
                    enum iscsi_param param, char *buf, int buflen)
@@ -2390,6 +3182,9 @@ int iscsi_set_param(struct iscsi_cls_conn *cls_conn,
        case ISCSI_PARAM_LU_RESET_TMO:
                sscanf(buf, "%d", &session->lu_reset_timeout);
                break;
+       case ISCSI_PARAM_TGT_RESET_TMO:
+               sscanf(buf, "%d", &session->tgt_reset_timeout);
+               break;
        case ISCSI_PARAM_PING_TMO:
                sscanf(buf, "%d", &conn->ping_timeout);
                break;
@@ -2444,38 +3239,15 @@ int iscsi_set_param(struct iscsi_cls_conn *cls_conn,
                sscanf(buf, "%u", &conn->exp_statsn);
                break;
        case ISCSI_PARAM_USERNAME:
-               kfree(session->username);
-               session->username = kstrdup(buf, GFP_KERNEL);
-               if (!session->username)
-                       return -ENOMEM;
-               break;
+               return iscsi_switch_str_param(&session->username, buf);
        case ISCSI_PARAM_USERNAME_IN:
-               kfree(session->username_in);
-               session->username_in = kstrdup(buf, GFP_KERNEL);
-               if (!session->username_in)
-                       return -ENOMEM;
-               break;
+               return iscsi_switch_str_param(&session->username_in, buf);
        case ISCSI_PARAM_PASSWORD:
-               kfree(session->password);
-               session->password = kstrdup(buf, GFP_KERNEL);
-               if (!session->password)
-                       return -ENOMEM;
-               break;
+               return iscsi_switch_str_param(&session->password, buf);
        case ISCSI_PARAM_PASSWORD_IN:
-               kfree(session->password_in);
-               session->password_in = kstrdup(buf, GFP_KERNEL);
-               if (!session->password_in)
-                       return -ENOMEM;
-               break;
+               return iscsi_switch_str_param(&session->password_in, buf);
        case ISCSI_PARAM_TARGET_NAME:
-               /* this should not change between logins */
-               if (session->targetname)
-                       break;
-
-               session->targetname = kstrdup(buf, GFP_KERNEL);
-               if (!session->targetname)
-                       return -ENOMEM;
-               break;
+               return iscsi_switch_str_param(&session->targetname, buf);
        case ISCSI_PARAM_TPGT:
                sscanf(buf, "%d", &session->tpgt);
                break;
@@ -2483,25 +3255,11 @@ int iscsi_set_param(struct iscsi_cls_conn *cls_conn,
                sscanf(buf, "%d", &conn->persistent_port);
                break;
        case ISCSI_PARAM_PERSISTENT_ADDRESS:
-               /*
-                * this is the address returned in discovery so it should
-                * not change between logins.
-                */
-               if (conn->persistent_address)
-                       break;
-
-               conn->persistent_address = kstrdup(buf, GFP_KERNEL);
-               if (!conn->persistent_address)
-                       return -ENOMEM;
-               break;
+               return iscsi_switch_str_param(&conn->persistent_address, buf);
        case ISCSI_PARAM_IFACE_NAME:
-               if (!session->ifacename)
-                       session->ifacename = kstrdup(buf, GFP_KERNEL);
-               break;
+               return iscsi_switch_str_param(&session->ifacename, buf);
        case ISCSI_PARAM_INITIATOR_NAME:
-               if (!session->initiatorname)
-                       session->initiatorname = kstrdup(buf, GFP_KERNEL);
-               break;
+               return iscsi_switch_str_param(&session->initiatorname, buf);
        default:
                return -ENOSYS;
        }
@@ -2526,6 +3284,9 @@ int iscsi_session_get_param(struct iscsi_cls_session *cls_session,
        case ISCSI_PARAM_LU_RESET_TMO:
                len = sprintf(buf, "%d\n", session->lu_reset_timeout);
                break;
+       case ISCSI_PARAM_TGT_RESET_TMO:
+               len = sprintf(buf, "%d\n", session->tgt_reset_timeout);
+               break;
        case ISCSI_PARAM_INITIAL_R2T_EN:
                len = sprintf(buf, "%d\n", session->initial_r2t_en);
                break;
@@ -2572,10 +3333,7 @@ int iscsi_session_get_param(struct iscsi_cls_session *cls_session,
                len = sprintf(buf, "%s\n", session->ifacename);
                break;
        case ISCSI_PARAM_INITIATOR_NAME:
-               if (!session->initiatorname)
-                       len = sprintf(buf, "%s\n", "unknown");
-               else
-                       len = sprintf(buf, "%s\n", session->initiatorname);
+               len = sprintf(buf, "%s\n", session->initiatorname);
                break;
        default:
                return -ENOSYS;
@@ -2641,29 +3399,16 @@ int iscsi_host_get_param(struct Scsi_Host *shost, enum iscsi_host_param param,
 
        switch (param) {
        case ISCSI_HOST_PARAM_NETDEV_NAME:
-               if (!ihost->netdev)
-                       len = sprintf(buf, "%s\n", "default");
-               else
-                       len = sprintf(buf, "%s\n", ihost->netdev);
+               len = sprintf(buf, "%s\n", ihost->netdev);
                break;
        case ISCSI_HOST_PARAM_HWADDRESS:
-               if (!ihost->hwaddress)
-                       len = sprintf(buf, "%s\n", "default");
-               else
-                       len = sprintf(buf, "%s\n", ihost->hwaddress);
+               len = sprintf(buf, "%s\n", ihost->hwaddress);
                break;
        case ISCSI_HOST_PARAM_INITIATOR_NAME:
-               if (!ihost->initiatorname)
-                       len = sprintf(buf, "%s\n", "unknown");
-               else
-                       len = sprintf(buf, "%s\n", ihost->initiatorname);
+               len = sprintf(buf, "%s\n", ihost->initiatorname);
                break;
        case ISCSI_HOST_PARAM_IPADDRESS:
-               if (!strlen(ihost->local_address))
-                       len = sprintf(buf, "%s\n", "unknown");
-               else
-                       len = sprintf(buf, "%s\n",
-                                     ihost->local_address);
+               len = sprintf(buf, "%s\n", ihost->local_address);
                break;
        default:
                return -ENOSYS;
@@ -2680,17 +3425,11 @@ int iscsi_host_set_param(struct Scsi_Host *shost, enum iscsi_host_param param,
 
        switch (param) {
        case ISCSI_HOST_PARAM_NETDEV_NAME:
-               if (!ihost->netdev)
-                       ihost->netdev = kstrdup(buf, GFP_KERNEL);
-               break;
+               return iscsi_switch_str_param(&ihost->netdev, buf);
        case ISCSI_HOST_PARAM_HWADDRESS:
-               if (!ihost->hwaddress)
-                       ihost->hwaddress = kstrdup(buf, GFP_KERNEL);
-               break;
+               return iscsi_switch_str_param(&ihost->hwaddress, buf);
        case ISCSI_HOST_PARAM_INITIATOR_NAME:
-               if (!ihost->initiatorname)
-                       ihost->initiatorname = kstrdup(buf, GFP_KERNEL);
-               break;
+               return iscsi_switch_str_param(&ihost->initiatorname, buf);
        default:
                return -ENOSYS;
        }