powerpc/pseries/dlpar: Use kasprintf
[safe/jmp/linux-2.6] / fs / ocfs2 / cluster / heartbeat.c
index e331f4c..41d5f1f 100644 (file)
@@ -33,6 +33,8 @@
 #include <linux/random.h>
 #include <linux/crc32.h>
 #include <linux/time.h>
+#include <linux/debugfs.h>
+#include <linux/slab.h>
 
 #include "heartbeat.h"
 #include "tcp.h"
@@ -60,6 +62,11 @@ static unsigned long o2hb_live_node_bitmap[BITS_TO_LONGS(O2NM_MAX_NODES)];
 static LIST_HEAD(o2hb_node_events);
 static DECLARE_WAIT_QUEUE_HEAD(o2hb_steady_queue);
 
+#define O2HB_DEBUG_DIR                 "o2hb"
+#define O2HB_DEBUG_LIVENODES           "livenodes"
+static struct dentry *o2hb_debug_dir;
+static struct dentry *o2hb_debug_livenodes;
+
 static LIST_HEAD(o2hb_all_regions);
 
 static struct o2hb_callback {
@@ -72,7 +79,7 @@ static struct o2hb_callback *hbcall_from_type(enum o2hb_callback_type type);
 
 unsigned int o2hb_dead_threshold = O2HB_DEFAULT_DEAD_THRESHOLD;
 
-/* Only sets a new threshold if there are no active regions. 
+/* Only sets a new threshold if there are no active regions.
  *
  * No locking or otherwise interesting code is required for reading
  * o2hb_dead_threshold as it can't change once regions are active and
@@ -164,13 +171,14 @@ static void o2hb_write_timeout(struct work_struct *work)
 
        mlog(ML_ERROR, "Heartbeat write timeout to device %s after %u "
             "milliseconds\n", reg->hr_dev_name,
-            jiffies_to_msecs(jiffies - reg->hr_last_timeout_start)); 
+            jiffies_to_msecs(jiffies - reg->hr_last_timeout_start));
        o2quo_disk_timeout();
 }
 
 static void o2hb_arm_write_timeout(struct o2hb_region *reg)
 {
-       mlog(0, "Queue write timeout for %u ms\n", O2HB_MAX_WRITE_TIMEOUT_MS);
+       mlog(ML_HEARTBEAT, "Queue write timeout for %u ms\n",
+            O2HB_MAX_WRITE_TIMEOUT_MS);
 
        cancel_delayed_work(&reg->hr_write_timeout_work);
        reg->hr_last_timeout_start = jiffies;
@@ -216,8 +224,7 @@ static void o2hb_wait_on_io(struct o2hb_region *reg,
        wait_for_completion(&wc->wc_io_complete);
 }
 
-static int o2hb_bio_end_io(struct bio *bio,
-                          unsigned int bytes_done,
+static void o2hb_bio_end_io(struct bio *bio,
                           int error)
 {
        struct o2hb_bio_wait_ctxt *wc = bio->bi_private;
@@ -227,12 +234,8 @@ static int o2hb_bio_end_io(struct bio *bio,
                wc->wc_error = error;
        }
 
-       if (bio->bi_size)
-               return 1;
-
        o2hb_bio_wait_dec(wc, 1);
        bio_put(bio);
-       return 0;
 }
 
 /* Setup a Bio to cover I/O against num_slots slots starting at
@@ -272,7 +275,7 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg,
                current_page = cs / spp;
                page = reg->hr_slot_data[current_page];
 
-               vec_len = min(PAGE_CACHE_SIZE,
+               vec_len = min(PAGE_CACHE_SIZE - vec_start,
                              (max_slots-cs) * (PAGE_CACHE_SIZE/spp) );
 
                mlog(ML_HB_BIO, "page %d, vec_len = %u, vec_start = %u\n",
@@ -622,7 +625,7 @@ static int o2hb_check_slot(struct o2hb_region *reg,
             "seq %llu last %llu changed %u equal %u\n",
             slot->ds_node_num, (long long)slot->ds_last_generation,
             le32_to_cpu(hb_block->hb_cksum),
-            (unsigned long long)le64_to_cpu(hb_block->hb_seq), 
+            (unsigned long long)le64_to_cpu(hb_block->hb_seq),
             (unsigned long long)slot->ds_last_time, slot->ds_changed_samples,
             slot->ds_equal_samples);
 
@@ -859,7 +862,7 @@ static int o2hb_thread(void *data)
 
        while (!kthread_should_stop() && !reg->hr_unclean_stop) {
                /* We track the time spent inside
-                * o2hb_do_disk_heartbeat so that we avoid more then
+                * o2hb_do_disk_heartbeat so that we avoid more than
                 * hr_timeout_ms between disk writes. On busy systems
                 * this should result in a heartbeat which is less
                 * likely to time itself out. */
@@ -873,7 +876,8 @@ static int o2hb_thread(void *data)
                do_gettimeofday(&after_hb);
                elapsed_msec = o2hb_elapsed_msecs(&before_hb, &after_hb);
 
-               mlog(0, "start = %lu.%lu, end = %lu.%lu, msec = %u\n",
+               mlog(ML_HEARTBEAT,
+                    "start = %lu.%lu, end = %lu.%lu, msec = %u\n",
                     before_hb.tv_sec, (unsigned long) before_hb.tv_usec,
                     after_hb.tv_sec, (unsigned long) after_hb.tv_usec,
                     elapsed_msec);
@@ -910,7 +914,77 @@ static int o2hb_thread(void *data)
        return 0;
 }
 
-void o2hb_init(void)
+#ifdef CONFIG_DEBUG_FS
+static int o2hb_debug_open(struct inode *inode, struct file *file)
+{
+       unsigned long map[BITS_TO_LONGS(O2NM_MAX_NODES)];
+       char *buf = NULL;
+       int i = -1;
+       int out = 0;
+
+       buf = kmalloc(PAGE_SIZE, GFP_KERNEL);
+       if (!buf)
+               goto bail;
+
+       o2hb_fill_node_map(map, sizeof(map));
+
+       while ((i = find_next_bit(map, O2NM_MAX_NODES, i + 1)) < O2NM_MAX_NODES)
+               out += snprintf(buf + out, PAGE_SIZE - out, "%d ", i);
+       out += snprintf(buf + out, PAGE_SIZE - out, "\n");
+
+       i_size_write(inode, out);
+
+       file->private_data = buf;
+
+       return 0;
+bail:
+       return -ENOMEM;
+}
+
+static int o2hb_debug_release(struct inode *inode, struct file *file)
+{
+       kfree(file->private_data);
+       return 0;
+}
+
+static ssize_t o2hb_debug_read(struct file *file, char __user *buf,
+                                size_t nbytes, loff_t *ppos)
+{
+       return simple_read_from_buffer(buf, nbytes, ppos, file->private_data,
+                                      i_size_read(file->f_mapping->host));
+}
+#else
+static int o2hb_debug_open(struct inode *inode, struct file *file)
+{
+       return 0;
+}
+static int o2hb_debug_release(struct inode *inode, struct file *file)
+{
+       return 0;
+}
+static ssize_t o2hb_debug_read(struct file *file, char __user *buf,
+                              size_t nbytes, loff_t *ppos)
+{
+       return 0;
+}
+#endif  /* CONFIG_DEBUG_FS */
+
+static const struct file_operations o2hb_debug_fops = {
+       .open =         o2hb_debug_open,
+       .release =      o2hb_debug_release,
+       .read =         o2hb_debug_read,
+       .llseek =       generic_file_llseek,
+};
+
+void o2hb_exit(void)
+{
+       if (o2hb_debug_livenodes)
+               debugfs_remove(o2hb_debug_livenodes);
+       if (o2hb_debug_dir)
+               debugfs_remove(o2hb_debug_dir);
+}
+
+int o2hb_init(void)
 {
        int i;
 
@@ -923,6 +997,24 @@ void o2hb_init(void)
        INIT_LIST_HEAD(&o2hb_node_events);
 
        memset(o2hb_live_node_bitmap, 0, sizeof(o2hb_live_node_bitmap));
+
+       o2hb_debug_dir = debugfs_create_dir(O2HB_DEBUG_DIR, NULL);
+       if (!o2hb_debug_dir) {
+               mlog_errno(-ENOMEM);
+               return -ENOMEM;
+       }
+
+       o2hb_debug_livenodes = debugfs_create_file(O2HB_DEBUG_LIVENODES,
+                                                  S_IFREG|S_IRUSR,
+                                                  o2hb_debug_dir, NULL,
+                                                  &o2hb_debug_fops);
+       if (!o2hb_debug_livenodes) {
+               mlog_errno(-ENOMEM);
+               debugfs_remove(o2hb_debug_dir);
+               return -ENOMEM;
+       }
+
+       return 0;
 }
 
 /* if we're already in a callback then we're already serialized by the sem */
@@ -981,7 +1073,7 @@ static void o2hb_region_release(struct config_item *item)
        }
 
        if (reg->hr_bdev)
-               blkdev_put(reg->hr_bdev);
+               blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE);
 
        if (reg->hr_slots)
                kfree(reg->hr_slots);
@@ -1273,7 +1365,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
                goto out;
 
        reg->hr_bdev = I_BDEV(filp->f_mapping->host);
-       ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ, 0);
+       ret = blkdev_get(reg->hr_bdev, FMODE_WRITE | FMODE_READ);
        if (ret) {
                reg->hr_bdev = NULL;
                goto out;
@@ -1282,7 +1374,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
 
        bdevname(reg->hr_bdev, reg->hr_dev_name);
 
-       sectsize = bdev_hardsect_size(reg->hr_bdev);
+       sectsize = bdev_logical_block_size(reg->hr_bdev);
        if (sectsize != reg->hr_block_bytes) {
                mlog(ML_ERROR,
                     "blocksize %u incorrect for device, expected %d",
@@ -1335,6 +1427,7 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
        ret = wait_event_interruptible(o2hb_steady_queue,
                                atomic_read(&reg->hr_steady_iterations) == 0);
        if (ret) {
+               /* We got interrupted (hello ptrace!).  Clean up */
                spin_lock(&o2hb_live_lock);
                hb_task = reg->hr_task;
                reg->hr_task = NULL;
@@ -1345,7 +1438,16 @@ static ssize_t o2hb_region_dev_write(struct o2hb_region *reg,
                goto out;
        }
 
-       ret = count;
+       /* Ok, we were woken.  Make sure it wasn't by drop_item() */
+       spin_lock(&o2hb_live_lock);
+       hb_task = reg->hr_task;
+       spin_unlock(&o2hb_live_lock);
+
+       if (hb_task)
+               ret = count;
+       else
+               ret = -EIO;
+
 out:
        if (filp)
                fput(filp);
@@ -1353,7 +1455,7 @@ out:
                iput(inode);
        if (ret < 0) {
                if (reg->hr_bdev) {
-                       blkdev_put(reg->hr_bdev);
+                       blkdev_put(reg->hr_bdev, FMODE_READ|FMODE_WRITE);
                        reg->hr_bdev = NULL;
                }
        }
@@ -1367,7 +1469,7 @@ static ssize_t o2hb_region_pid_read(struct o2hb_region *reg,
 
        spin_lock(&o2hb_live_lock);
        if (reg->hr_task)
-               pid = reg->hr_task->pid;
+               pid = task_pid_nr(reg->hr_task);
        spin_unlock(&o2hb_live_lock);
 
        if (!pid)
@@ -1488,24 +1590,18 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g
                                                          const char *name)
 {
        struct o2hb_region *reg = NULL;
-       struct config_item *ret = NULL;
 
        reg = kzalloc(sizeof(struct o2hb_region), GFP_KERNEL);
        if (reg == NULL)
-               goto out; /* ENOMEM */
+               return ERR_PTR(-ENOMEM);
 
        config_item_init_type_name(&reg->hr_item, name, &o2hb_region_type);
 
-       ret = &reg->hr_item;
-
        spin_lock(&o2hb_live_lock);
        list_add_tail(&reg->hr_all_item, &o2hb_all_regions);
        spin_unlock(&o2hb_live_lock);
-out:
-       if (ret == NULL)
-               kfree(reg);
 
-       return ret;
+       return &reg->hr_item;
 }
 
 static void o2hb_heartbeat_group_drop_item(struct config_group *group,
@@ -1523,6 +1619,15 @@ static void o2hb_heartbeat_group_drop_item(struct config_group *group,
        if (hb_task)
                kthread_stop(hb_task);
 
+       /*
+        * If we're racing a dev_write(), we need to wake them.  They will
+        * check reg->hr_task
+        */
+       if (atomic_read(&reg->hr_steady_iterations) != 0) {
+               atomic_set(&reg->hr_steady_iterations, 0);
+               wake_up(&o2hb_steady_queue);
+       }
+
        config_item_put(item);
 }
 
@@ -1693,9 +1798,18 @@ static int o2hb_region_get(const char *region_uuid)
                ret = -ENOENT;
        spin_unlock(&o2hb_live_lock);
 
-       if (!ret)
-               ret = o2nm_depend_item(&reg->hr_item);
+       if (ret)
+               goto out;
+
+       ret = o2nm_depend_this_node();
+       if (ret)
+               goto out;
+
+       ret = o2nm_depend_item(&reg->hr_item);
+       if (ret)
+               o2nm_undepend_this_node();
 
+out:
        return ret;
 }
 
@@ -1709,8 +1823,10 @@ static void o2hb_region_put(const char *region_uuid)
 
        spin_unlock(&o2hb_live_lock);
 
-       if (reg)
+       if (reg) {
                o2nm_undepend_item(&reg->hr_item);
+               o2nm_undepend_this_node();
+       }
 }
 
 int o2hb_register_callback(const char *region_uuid,