+static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
+ int slot_num,
+ struct ocfs2_dinode *la_dinode,
+ struct ocfs2_dinode *tl_dinode,
+ struct ocfs2_quota_recovery *qrec);
+
+static inline int ocfs2_wait_on_mount(struct ocfs2_super *osb)
+{
+ return __ocfs2_wait_on_mount(osb, 0);
+}
+
+static inline int ocfs2_wait_on_quotas(struct ocfs2_super *osb)
+{
+ return __ocfs2_wait_on_mount(osb, 1);
+}
+
+/*
+ * This replay_map is to track online/offline slots, so we could recover
+ * offline slots during recovery and mount
+ */
+
+enum ocfs2_replay_state {
+ REPLAY_UNNEEDED = 0, /* Replay is not needed, so ignore this map */
+ REPLAY_NEEDED, /* Replay slots marked in rm_replay_slots */
+ REPLAY_DONE /* Replay was already queued */
+};
+
+struct ocfs2_replay_map {
+ unsigned int rm_slots;
+ enum ocfs2_replay_state rm_state;
+ unsigned char rm_replay_slots[0];
+};
+
+void ocfs2_replay_map_set_state(struct ocfs2_super *osb, int state)
+{
+ if (!osb->replay_map)
+ return;
+
+ /* If we've already queued the replay, we don't have any more to do */
+ if (osb->replay_map->rm_state == REPLAY_DONE)
+ return;
+
+ osb->replay_map->rm_state = state;
+}
+
+int ocfs2_compute_replay_slots(struct ocfs2_super *osb)
+{
+ struct ocfs2_replay_map *replay_map;
+ int i, node_num;
+
+ /* If replay map is already set, we don't do it again */
+ if (osb->replay_map)
+ return 0;
+
+ replay_map = kzalloc(sizeof(struct ocfs2_replay_map) +
+ (osb->max_slots * sizeof(char)), GFP_KERNEL);
+
+ if (!replay_map) {
+ mlog_errno(-ENOMEM);
+ return -ENOMEM;
+ }
+
+ spin_lock(&osb->osb_lock);
+
+ replay_map->rm_slots = osb->max_slots;
+ replay_map->rm_state = REPLAY_UNNEEDED;
+
+ /* set rm_replay_slots for offline slot(s) */
+ for (i = 0; i < replay_map->rm_slots; i++) {
+ if (ocfs2_slot_to_node_num_locked(osb, i, &node_num) == -ENOENT)
+ replay_map->rm_replay_slots[i] = 1;
+ }
+
+ osb->replay_map = replay_map;
+ spin_unlock(&osb->osb_lock);
+ return 0;
+}
+
+void ocfs2_queue_replay_slots(struct ocfs2_super *osb)
+{
+ struct ocfs2_replay_map *replay_map = osb->replay_map;
+ int i;
+
+ if (!replay_map)
+ return;
+
+ if (replay_map->rm_state != REPLAY_NEEDED)
+ return;
+
+ for (i = 0; i < replay_map->rm_slots; i++)
+ if (replay_map->rm_replay_slots[i])
+ ocfs2_queue_recovery_completion(osb->journal, i, NULL,
+ NULL, NULL);
+ replay_map->rm_state = REPLAY_DONE;
+}
+
+void ocfs2_free_replay_slots(struct ocfs2_super *osb)
+{
+ struct ocfs2_replay_map *replay_map = osb->replay_map;
+
+ if (!osb->replay_map)
+ return;
+
+ kfree(replay_map);
+ osb->replay_map = NULL;
+}
+
+int ocfs2_recovery_init(struct ocfs2_super *osb)
+{
+ struct ocfs2_recovery_map *rm;
+
+ mutex_init(&osb->recovery_lock);
+ osb->disable_recovery = 0;
+ osb->recovery_thread_task = NULL;
+ init_waitqueue_head(&osb->recovery_event);
+
+ rm = kzalloc(sizeof(struct ocfs2_recovery_map) +
+ osb->max_slots * sizeof(unsigned int),
+ GFP_KERNEL);
+ if (!rm) {
+ mlog_errno(-ENOMEM);
+ return -ENOMEM;
+ }
+
+ rm->rm_entries = (unsigned int *)((char *)rm +
+ sizeof(struct ocfs2_recovery_map));
+ osb->recovery_map = rm;
+
+ return 0;
+}
+
+/* we can't grab the goofy sem lock from inside wait_event, so we use
+ * memory barriers to make sure that we'll see the null task before
+ * being woken up */
+static int ocfs2_recovery_thread_running(struct ocfs2_super *osb)
+{
+ mb();
+ return osb->recovery_thread_task != NULL;
+}
+
+void ocfs2_recovery_exit(struct ocfs2_super *osb)
+{
+ struct ocfs2_recovery_map *rm;
+
+ /* disable any new recovery threads and wait for any currently
+ * running ones to exit. Do this before setting the vol_state. */
+ mutex_lock(&osb->recovery_lock);
+ osb->disable_recovery = 1;
+ mutex_unlock(&osb->recovery_lock);
+ wait_event(osb->recovery_event, !ocfs2_recovery_thread_running(osb));
+
+ /* At this point, we know that no more recovery threads can be
+ * launched, so wait for any recovery completion work to
+ * complete. */
+ flush_workqueue(ocfs2_wq);
+
+ /*
+ * Now that recovery is shut down, and the osb is about to be
+ * freed, the osb_lock is not taken here.
+ */
+ rm = osb->recovery_map;
+ /* XXX: Should we bug if there are dirty entries? */
+
+ kfree(rm);
+}
+
+static int __ocfs2_recovery_map_test(struct ocfs2_super *osb,
+ unsigned int node_num)
+{
+ int i;
+ struct ocfs2_recovery_map *rm = osb->recovery_map;
+
+ assert_spin_locked(&osb->osb_lock);
+
+ for (i = 0; i < rm->rm_used; i++) {
+ if (rm->rm_entries[i] == node_num)
+ return 1;
+ }
+
+ return 0;
+}
+
+/* Behaves like test-and-set. Returns the previous value */
+static int ocfs2_recovery_map_set(struct ocfs2_super *osb,
+ unsigned int node_num)
+{
+ struct ocfs2_recovery_map *rm = osb->recovery_map;
+
+ spin_lock(&osb->osb_lock);
+ if (__ocfs2_recovery_map_test(osb, node_num)) {
+ spin_unlock(&osb->osb_lock);
+ return 1;
+ }
+
+ /* XXX: Can this be exploited? Not from o2dlm... */
+ BUG_ON(rm->rm_used >= osb->max_slots);
+
+ rm->rm_entries[rm->rm_used] = node_num;
+ rm->rm_used++;
+ spin_unlock(&osb->osb_lock);
+
+ return 0;
+}
+
+static void ocfs2_recovery_map_clear(struct ocfs2_super *osb,
+ unsigned int node_num)
+{
+ int i;
+ struct ocfs2_recovery_map *rm = osb->recovery_map;
+
+ spin_lock(&osb->osb_lock);
+
+ for (i = 0; i < rm->rm_used; i++) {
+ if (rm->rm_entries[i] == node_num)
+ break;
+ }
+
+ if (i < rm->rm_used) {
+ /* XXX: be careful with the pointer math */
+ memmove(&(rm->rm_entries[i]), &(rm->rm_entries[i + 1]),
+ (rm->rm_used - i - 1) * sizeof(unsigned int));
+ rm->rm_used--;
+ }
+
+ spin_unlock(&osb->osb_lock);
+}