SAFE public projects git trees. - safe/jmp/linux-2.6/blob - fs/ceph/super.c

   1
   2 #include "ceph_debug.h"
   3
   4 #include <linux/backing-dev.h>
   5 #include <linux/fs.h>
   6 #include <linux/inet.h>
   7 #include <linux/in6.h>
   8 #include <linux/module.h>
   9 #include <linux/mount.h>
  10 #include <linux/parser.h>
  11 #include <linux/rwsem.h>
  12 #include <linux/sched.h>
  13 #include <linux/seq_file.h>
  14 #include <linux/statfs.h>
  15 #include <linux/string.h>
  16 #include <linux/version.h>
  17 #include <linux/vmalloc.h>
  18
  19 #include "decode.h"
  20 #include "super.h"
  21 #include "mon_client.h"
  22
  23 /*
  24  * Ceph superblock operations
  25  *
  26  * Handle the basics of mounting, unmounting.
  27  */
  28
  29
  30 /*
  31  * find filename portion of a path (/foo/bar/baz -> baz)
  32  */
  33 const char *ceph_file_part(const char *s, int len)
  34 {
  35         const char *e = s + len;
  36
  37         while (e != s && *(e-1) != '/')
  38                 e--;
  39         return e;
  40 }
  41
  42
  43 /*
  44  * super ops
  45  */
  46 static void ceph_put_super(struct super_block *s)
  47 {
  48         struct ceph_client *cl = ceph_client(s);
  49
  50         dout("put_super\n");
  51         ceph_mdsc_close_sessions(&cl->mdsc);
  52         return;
  53 }
  54
  55 static int ceph_statfs(struct dentry *dentry, struct kstatfs *buf)
  56 {
  57         struct ceph_client *client = ceph_inode_to_client(dentry->d_inode);
  58         struct ceph_monmap *monmap = client->monc.monmap;
  59         struct ceph_statfs st;
  60         u64 fsid;
  61         int err;
  62
  63         dout("statfs\n");
  64         err = ceph_monc_do_statfs(&client->monc, &st);
  65         if (err < 0)
  66                 return err;
  67
  68         /* fill in kstatfs */
  69         buf->f_type = CEPH_SUPER_MAGIC;  /* ?? */
  70
  71         /*
  72          * express utilization in terms of large blocks to avoid
  73          * overflow on 32-bit machines.
  74          */
  75         buf->f_bsize = 1 << CEPH_BLOCK_SHIFT;
  76         buf->f_blocks = le64_to_cpu(st.kb) >> (CEPH_BLOCK_SHIFT-10);
  77         buf->f_bfree = (le64_to_cpu(st.kb) - le64_to_cpu(st.kb_used)) >>
  78                 (CEPH_BLOCK_SHIFT-10);
  79         buf->f_bavail = le64_to_cpu(st.kb_avail) >> (CEPH_BLOCK_SHIFT-10);
  80
  81         buf->f_files = le64_to_cpu(st.num_objects);
  82         buf->f_ffree = -1;
  83         buf->f_namelen = PATH_MAX;
  84         buf->f_frsize = PAGE_CACHE_SIZE;
  85
  86         /* leave fsid little-endian, regardless of host endianness */
  87         fsid = *(u64 *)(&monmap->fsid) ^ *((u64 *)&monmap->fsid + 1);
  88         buf->f_fsid.val[0] = fsid & 0xffffffff;
  89         buf->f_fsid.val[1] = fsid >> 32;
  90
  91         return 0;
  92 }
  93
  94
  95 static int ceph_syncfs(struct super_block *sb, int wait)
  96 {
  97         dout("sync_fs %d\n", wait);
  98         ceph_osdc_sync(&ceph_client(sb)->osdc);
  99         ceph_mdsc_sync(&ceph_client(sb)->mdsc);
 100         return 0;
 101 }
 102
 103
 104 /**
 105  * ceph_show_options - Show mount options in /proc/mounts
 106  * @m: seq_file to write to
 107  * @mnt: mount descriptor
 108  */
 109 static int ceph_show_options(struct seq_file *m, struct vfsmount *mnt)
 110 {
 111         struct ceph_client *client = ceph_sb_to_client(mnt->mnt_sb);
 112         struct ceph_mount_args *args = &client->mount_args;
 113
 114         if (args->flags & CEPH_OPT_FSID)
 115                 seq_printf(m, ",fsidmajor=%llu,fsidminor%llu",
 116                            le64_to_cpu(*(__le64 *)&args->fsid.fsid[0]),
 117                            le64_to_cpu(*(__le64 *)&args->fsid.fsid[8]));
 118         if (args->flags & CEPH_OPT_NOSHARE)
 119                 seq_puts(m, ",noshare");
 120         if (args->flags & CEPH_OPT_DIRSTAT)
 121                 seq_puts(m, ",dirstat");
 122         if ((args->flags & CEPH_OPT_RBYTES) == 0)
 123                 seq_puts(m, ",norbytes");
 124         if (args->flags & CEPH_OPT_NOCRC)
 125                 seq_puts(m, ",nocrc");
 126         if (args->flags & CEPH_OPT_NOASYNCREADDIR)
 127                 seq_puts(m, ",noasyncreaddir");
 128         if (strcmp(args->snapdir_name, CEPH_SNAPDIRNAME_DEFAULT))
 129                 seq_printf(m, ",snapdirname=%s", args->snapdir_name);
 130         if (args->secret)
 131                 seq_puts(m, ",secret=<hidden>");
 132         return 0;
 133 }
 134
 135 /*
 136  * caches
 137  */
 138 struct kmem_cache *ceph_inode_cachep;
 139 struct kmem_cache *ceph_cap_cachep;
 140 struct kmem_cache *ceph_dentry_cachep;
 141 struct kmem_cache *ceph_file_cachep;
 142
 143 static void ceph_inode_init_once(void *foo)
 144 {
 145         struct ceph_inode_info *ci = foo;
 146         inode_init_once(&ci->vfs_inode);
 147 }
 148
 149 static int __init init_caches(void)
 150 {
 151         ceph_inode_cachep = kmem_cache_create("ceph_inode_info",
 152                                       sizeof(struct ceph_inode_info),
 153                                       __alignof__(struct ceph_inode_info),
 154                                       (SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD),
 155                                       ceph_inode_init_once);
 156         if (ceph_inode_cachep == NULL)
 157                 return -ENOMEM;
 158
 159         ceph_cap_cachep = KMEM_CACHE(ceph_cap,
 160                                      SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
 161         if (ceph_cap_cachep == NULL)
 162                 goto bad_cap;
 163
 164         ceph_dentry_cachep = KMEM_CACHE(ceph_dentry_info,
 165                                         SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
 166         if (ceph_dentry_cachep == NULL)
 167                 goto bad_dentry;
 168
 169         ceph_file_cachep = KMEM_CACHE(ceph_file_info,
 170                                       SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD);
 171         if (ceph_file_cachep == NULL)
 172                 goto bad_file;
 173
 174         return 0;
 175
 176 bad_file:
 177         kmem_cache_destroy(ceph_dentry_cachep);
 178 bad_dentry:
 179         kmem_cache_destroy(ceph_cap_cachep);
 180 bad_cap:
 181         kmem_cache_destroy(ceph_inode_cachep);
 182         return -ENOMEM;
 183 }
 184
 185 static void destroy_caches(void)
 186 {
 187         kmem_cache_destroy(ceph_inode_cachep);
 188         kmem_cache_destroy(ceph_cap_cachep);
 189         kmem_cache_destroy(ceph_dentry_cachep);
 190         kmem_cache_destroy(ceph_file_cachep);
 191 }
 192
 193
 194 /*
 195  * ceph_umount_begin - initiate forced umount.  Tear down down the
 196  * mount, skipping steps that may hang while waiting for server(s).
 197  */
 198 static void ceph_umount_begin(struct super_block *sb)
 199 {
 200         struct ceph_client *client = ceph_sb_to_client(sb);
 201
 202         dout("ceph_umount_begin - starting forced umount\n");
 203         if (!client)
 204                 return;
 205         client->mount_state = CEPH_MOUNT_SHUTDOWN;
 206         return;
 207 }
 208
 209 static const struct super_operations ceph_super_ops = {
 210         .alloc_inode    = ceph_alloc_inode,
 211         .destroy_inode  = ceph_destroy_inode,
 212         .write_inode    = ceph_write_inode,
 213         .sync_fs        = ceph_syncfs,
 214         .put_super      = ceph_put_super,
 215         .show_options   = ceph_show_options,
 216         .statfs         = ceph_statfs,
 217         .umount_begin   = ceph_umount_begin,
 218 };
 219
 220
 221 const char *ceph_msg_type_name(int type)
 222 {
 223         switch (type) {
 224         case CEPH_MSG_SHUTDOWN: return "shutdown";
 225         case CEPH_MSG_PING: return "ping";
 226         case CEPH_MSG_MON_MAP: return "mon_map";
 227         case CEPH_MSG_MON_GET_MAP: return "mon_get_map";
 228         case CEPH_MSG_MON_SUBSCRIBE: return "mon_subscribe";
 229         case CEPH_MSG_MON_SUBSCRIBE_ACK: return "mon_subscribe_ack";
 230         case CEPH_MSG_CLIENT_MOUNT: return "client_mount";
 231         case CEPH_MSG_CLIENT_MOUNT_ACK: return "client_mount_ack";
 232         case CEPH_MSG_STATFS: return "statfs";
 233         case CEPH_MSG_STATFS_REPLY: return "statfs_reply";
 234         case CEPH_MSG_MDS_MAP: return "mds_map";
 235         case CEPH_MSG_CLIENT_SESSION: return "client_session";
 236         case CEPH_MSG_CLIENT_RECONNECT: return "client_reconnect";
 237         case CEPH_MSG_CLIENT_REQUEST: return "client_request";
 238         case CEPH_MSG_CLIENT_REQUEST_FORWARD: return "client_request_forward";
 239         case CEPH_MSG_CLIENT_REPLY: return "client_reply";
 240         case CEPH_MSG_CLIENT_CAPS: return "client_caps";
 241         case CEPH_MSG_CLIENT_CAPRELEASE: return "client_cap_release";
 242         case CEPH_MSG_CLIENT_SNAP: return "client_snap";
 243         case CEPH_MSG_CLIENT_LEASE: return "client_lease";
 244         case CEPH_MSG_OSD_MAP: return "osd_map";
 245         case CEPH_MSG_OSD_OP: return "osd_op";
 246         case CEPH_MSG_OSD_OPREPLY: return "osd_opreply";
 247         default: return "unknown";
 248         }
 249 }
 250
 251
 252 /*
 253  * mount options
 254  */
 255 enum {
 256         Opt_fsidmajor,
 257         Opt_fsidminor,
 258         Opt_monport,
 259         Opt_wsize,
 260         Opt_rsize,
 261         Opt_osdtimeout,
 262         Opt_mount_timeout,
 263         Opt_caps_wanted_delay_min,
 264         Opt_caps_wanted_delay_max,
 265         Opt_readdir_max_entries,
 266         /* int args above */
 267         Opt_snapdirname,
 268         Opt_secret,
 269         /* string args above */
 270         Opt_ip,
 271         Opt_noshare,
 272         Opt_dirstat,
 273         Opt_nodirstat,
 274         Opt_rbytes,
 275         Opt_norbytes,
 276         Opt_nocrc,
 277         Opt_noasyncreaddir,
 278 };
 279
 280 static match_table_t arg_tokens = {
 281         {Opt_fsidmajor, "fsidmajor=%ld"},
 282         {Opt_fsidminor, "fsidminor=%ld"},
 283         {Opt_monport, "monport=%d"},
 284         {Opt_wsize, "wsize=%d"},
 285         {Opt_rsize, "rsize=%d"},
 286         {Opt_osdtimeout, "osdtimeout=%d"},
 287         {Opt_mount_timeout, "mount_timeout=%d"},
 288         {Opt_caps_wanted_delay_min, "caps_wanted_delay_min=%d"},
 289         {Opt_caps_wanted_delay_max, "caps_wanted_delay_max=%d"},
 290         {Opt_readdir_max_entries, "readdir_max_entries=%d"},
 291         /* int args above */
 292         {Opt_snapdirname, "snapdirname=%s"},
 293         {Opt_secret, "secret=%s"},
 294         /* string args above */
 295         {Opt_ip, "ip=%s"},
 296         {Opt_noshare, "noshare"},
 297         {Opt_dirstat, "dirstat"},
 298         {Opt_nodirstat, "nodirstat"},
 299         {Opt_rbytes, "rbytes"},
 300         {Opt_norbytes, "norbytes"},
 301         {Opt_nocrc, "nocrc"},
 302         {Opt_noasyncreaddir, "noasyncreaddir"},
 303         {-1, NULL}
 304 };
 305
 306
 307 static int parse_mount_args(struct ceph_client *client,
 308                             int flags, char *options, const char *dev_name,
 309                             const char **path)
 310 {
 311         struct ceph_mount_args *args = &client->mount_args;
 312         const char *c;
 313         int err;
 314         substring_t argstr[MAX_OPT_ARGS];
 315         int num_mon;
 316         struct ceph_entity_addr mon_addr[CEPH_MAX_MON_MOUNT_ADDR];
 317         int i;
 318
 319         dout("parse_mount_args dev_name '%s'\n", dev_name);
 320         memset(args, 0, sizeof(*args));
 321
 322         /* start with defaults */
 323         args->sb_flags = flags;
 324         args->flags = CEPH_OPT_DEFAULT;
 325         args->osd_timeout = 5;    /* seconds */
 326         args->mount_timeout = CEPH_MOUNT_TIMEOUT_DEFAULT; /* seconds */
 327         args->caps_wanted_delay_min = CEPH_CAPS_WANTED_DELAY_MIN_DEFAULT;
 328         args->caps_wanted_delay_max = CEPH_CAPS_WANTED_DELAY_MAX_DEFAULT;
 329         args->snapdir_name = kstrdup(CEPH_SNAPDIRNAME_DEFAULT, GFP_KERNEL);
 330         args->cap_release_safety = CEPH_CAPS_PER_RELEASE * 4;
 331         args->max_readdir = 1024;
 332
 333         /* ip1[:port1][,ip2[:port2]...]:/subdir/in/fs */
 334         if (!dev_name)
 335                 return -EINVAL;
 336         *path = strstr(dev_name, ":/");
 337         if (*path == NULL) {
 338                 pr_err("device name is missing path (no :/ in %s)\n",
 339                        dev_name);
 340                 return -EINVAL;
 341         }
 342
 343         /* get mon ip(s) */
 344         err = ceph_parse_ips(dev_name, *path, mon_addr,
 345                              CEPH_MAX_MON_MOUNT_ADDR, &num_mon);
 346         if (err < 0)
 347                 return err;
 348
 349         /* build initial monmap */
 350         client->monc.monmap = kzalloc(sizeof(*client->monc.monmap) +
 351                                num_mon*sizeof(client->monc.monmap->mon_inst[0]),
 352                                GFP_KERNEL);
 353         if (!client->monc.monmap)
 354                 return -ENOMEM;
 355         for (i = 0; i < num_mon; i++) {
 356                 client->monc.monmap->mon_inst[i].addr = mon_addr[i];
 357                 client->monc.monmap->mon_inst[i].addr.erank = 0;
 358                 client->monc.monmap->mon_inst[i].addr.nonce = 0;
 359                 client->monc.monmap->mon_inst[i].name.type =
 360                         CEPH_ENTITY_TYPE_MON;
 361                 client->monc.monmap->mon_inst[i].name.num = cpu_to_le64(i);
 362         }
 363         client->monc.monmap->num_mon = num_mon;
 364         memset(&args->my_addr.in_addr, 0, sizeof(args->my_addr.in_addr));
 365
 366         /* path on server */
 367         *path += 2;
 368         dout("server path '%s'\n", *path);
 369
 370         /* parse mount options */
 371         while ((c = strsep(&options, ",")) != NULL) {
 372                 int token, intval, ret;
 373                 if (!*c)
 374                         continue;
 375                 token = match_token((char *)c, arg_tokens, argstr);
 376                 if (token < 0) {
 377                         pr_err("bad mount option at '%s'\n", c);
 378                         return -EINVAL;
 379
 380                 }
 381                 if (token < Opt_ip) {
 382                         ret = match_int(&argstr[0], &intval);
 383                         if (ret < 0) {
 384                                 pr_err("bad mount option arg (not int) "
 385                                        "at '%s'\n", c);
 386                                 continue;
 387                         }
 388                         dout("got token %d intval %d\n", token, intval);
 389                 }
 390                 switch (token) {
 391                 case Opt_fsidmajor:
 392                         *(__le64 *)&args->fsid.fsid[0] = cpu_to_le64(intval);
 393                         break;
 394                 case Opt_fsidminor:
 395                         *(__le64 *)&args->fsid.fsid[8] = cpu_to_le64(intval);
 396                         break;
 397                 case Opt_ip:
 398                         err = ceph_parse_ips(argstr[0].from,
 399                                              argstr[0].to,
 400                                              &args->my_addr,
 401                                              1, NULL);
 402                         if (err < 0)
 403                                 return err;
 404                         args->flags |= CEPH_OPT_MYIP;
 405                         break;
 406
 407                 case Opt_snapdirname:
 408                         kfree(args->snapdir_name);
 409                         args->snapdir_name = kstrndup(argstr[0].from,
 410                                               argstr[0].to-argstr[0].from,
 411                                               GFP_KERNEL);
 412                         break;
 413                 case Opt_secret:
 414                         args->secret = kstrndup(argstr[0].from,
 415                                                 argstr[0].to-argstr[0].from,
 416                                                 GFP_KERNEL);
 417                         break;
 418
 419                         /* misc */
 420                 case Opt_wsize:
 421                         args->wsize = intval;
 422                         break;
 423                 case Opt_rsize:
 424                         args->rsize = intval;
 425                         break;
 426                 case Opt_osdtimeout:
 427                         args->osd_timeout = intval;
 428                         break;
 429                 case Opt_mount_timeout:
 430                         args->mount_timeout = intval;
 431                         break;
 432                 case Opt_caps_wanted_delay_min:
 433                         args->caps_wanted_delay_min = intval;
 434                         break;
 435                 case Opt_caps_wanted_delay_max:
 436                         args->caps_wanted_delay_max = intval;
 437                         break;
 438                 case Opt_readdir_max_entries:
 439                         args->max_readdir = intval;
 440                         break;
 441
 442                 case Opt_noshare:
 443                         args->flags |= CEPH_OPT_NOSHARE;
 444                         break;
 445
 446                 case Opt_dirstat:
 447                         args->flags |= CEPH_OPT_DIRSTAT;
 448                         break;
 449                 case Opt_nodirstat:
 450                         args->flags &= ~CEPH_OPT_DIRSTAT;
 451                         break;
 452                 case Opt_rbytes:
 453                         args->flags |= CEPH_OPT_RBYTES;
 454                         break;
 455                 case Opt_norbytes:
 456                         args->flags &= ~CEPH_OPT_RBYTES;
 457                         break;
 458                 case Opt_nocrc:
 459                         args->flags |= CEPH_OPT_NOCRC;
 460                         break;
 461                 case Opt_noasyncreaddir:
 462                         args->flags |= CEPH_OPT_NOASYNCREADDIR;
 463                         break;
 464
 465                 default:
 466                         BUG_ON(token);
 467                 }
 468         }
 469
 470         return 0;
 471 }
 472
 473 static void release_mount_args(struct ceph_mount_args *args)
 474 {
 475         kfree(args->snapdir_name);
 476         args->snapdir_name = NULL;
 477         kfree(args->secret);
 478         args->secret = NULL;
 479 }
 480
 481 /*
 482  * create a fresh client instance
 483  */
 484 static struct ceph_client *ceph_create_client(void)
 485 {
 486         struct ceph_client *client;
 487         int err = -ENOMEM;
 488
 489         client = kzalloc(sizeof(*client), GFP_KERNEL);
 490         if (client == NULL)
 491                 return ERR_PTR(-ENOMEM);
 492
 493         mutex_init(&client->mount_mutex);
 494
 495         init_waitqueue_head(&client->mount_wq);
 496
 497         client->sb = NULL;
 498         client->mount_state = CEPH_MOUNT_MOUNTING;
 499         client->whoami = -1;
 500
 501         client->msgr = NULL;
 502
 503         client->mount_err = 0;
 504         client->signed_ticket = NULL;
 505         client->signed_ticket_len = 0;
 506
 507         err = -ENOMEM;
 508         client->wb_wq = create_workqueue("ceph-writeback");
 509         if (client->wb_wq == NULL)
 510                 goto fail;
 511         client->pg_inv_wq = create_singlethread_workqueue("ceph-pg-invalid");
 512         if (client->pg_inv_wq == NULL)
 513                 goto fail_wb_wq;
 514         client->trunc_wq = create_singlethread_workqueue("ceph-trunc");
 515         if (client->trunc_wq == NULL)
 516                 goto fail_pg_inv_wq;
 517
 518         /* subsystems */
 519         err = ceph_monc_init(&client->monc, client);
 520         if (err < 0)
 521                 goto fail_trunc_wq;
 522         err = ceph_osdc_init(&client->osdc, client);
 523         if (err < 0)
 524                 goto fail_monc;
 525         ceph_mdsc_init(&client->mdsc, client);
 526         return client;
 527
 528 fail_monc:
 529         ceph_monc_stop(&client->monc);
 530 fail_trunc_wq:
 531         destroy_workqueue(client->trunc_wq);
 532 fail_pg_inv_wq:
 533         destroy_workqueue(client->pg_inv_wq);
 534 fail_wb_wq:
 535         destroy_workqueue(client->wb_wq);
 536 fail:
 537         kfree(client);
 538         return ERR_PTR(err);
 539 }
 540
 541 static void ceph_destroy_client(struct ceph_client *client)
 542 {
 543         dout("destroy_client %p\n", client);
 544
 545         /* unmount */
 546         ceph_mdsc_stop(&client->mdsc);
 547         ceph_monc_stop(&client->monc);
 548         ceph_osdc_stop(&client->osdc);
 549
 550         kfree(client->signed_ticket);
 551
 552         ceph_debugfs_client_cleanup(client);
 553         destroy_workqueue(client->wb_wq);
 554         destroy_workqueue(client->pg_inv_wq);
 555         destroy_workqueue(client->trunc_wq);
 556
 557         if (client->msgr)
 558                 ceph_messenger_destroy(client->msgr);
 559         if (client->wb_pagevec_pool)
 560                 mempool_destroy(client->wb_pagevec_pool);
 561
 562         release_mount_args(&client->mount_args);
 563
 564         kfree(client);
 565         dout("destroy_client %p done\n", client);
 566 }
 567
 568 /*
 569  * true if we have the mon map (and have thus joined the cluster)
 570  */
 571 static int have_mon_map(struct ceph_client *client)
 572 {
 573         return client->monc.monmap && client->monc.monmap->epoch;
 574 }
 575
 576 /*
 577  * Bootstrap mount by opening the root directory.  Note the mount
 578  * @started time from caller, and time out if this takes too long.
 579  */
 580 static struct dentry *open_root_dentry(struct ceph_client *client,
 581                                        const char *path,
 582                                        unsigned long started)
 583 {
 584         struct ceph_mds_client *mdsc = &client->mdsc;
 585         struct ceph_mds_request *req = NULL;
 586         int err;
 587         struct dentry *root;
 588
 589         /* open dir */
 590         dout("open_root_inode opening '%s'\n", path);
 591         req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_GETATTR, USE_ANY_MDS);
 592         if (IS_ERR(req))
 593                 return ERR_PTR(PTR_ERR(req));
 594         req->r_path1 = kstrdup(path, GFP_NOFS);
 595         req->r_ino1.ino = CEPH_INO_ROOT;
 596         req->r_ino1.snap = CEPH_NOSNAP;
 597         req->r_started = started;
 598         req->r_timeout = client->mount_args.mount_timeout * HZ;
 599         req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
 600         req->r_num_caps = 2;
 601         err = ceph_mdsc_do_request(mdsc, NULL, req);
 602         if (err == 0) {
 603                 dout("open_root_inode success\n");
 604                 if (ceph_ino(req->r_target_inode) == CEPH_INO_ROOT &&
 605                     client->sb->s_root == NULL)
 606                         root = d_alloc_root(req->r_target_inode);
 607                 else
 608                         root = d_obtain_alias(req->r_target_inode);
 609                 req->r_target_inode = NULL;
 610                 dout("open_root_inode success, root dentry is %p\n", root);
 611         } else {
 612                 root = ERR_PTR(err);
 613         }
 614         ceph_mdsc_put_request(req);
 615         return root;
 616 }
 617
 618 /*
 619  * mount: join the ceph cluster, and open root directory.
 620  */
 621 static int ceph_mount(struct ceph_client *client, struct vfsmount *mnt,
 622                       const char *path)
 623 {
 624         struct ceph_entity_addr *myaddr = NULL;
 625         int err;
 626         unsigned long timeout = client->mount_args.mount_timeout * HZ;
 627         unsigned long started = jiffies;  /* note the start time */
 628         struct dentry *root;
 629
 630         dout("mount start\n");
 631         mutex_lock(&client->mount_mutex);
 632
 633         /* initialize the messenger */
 634         if (client->msgr == NULL) {
 635                 if (ceph_test_opt(client, MYIP))
 636                         myaddr = &client->mount_args.my_addr;
 637                 client->msgr = ceph_messenger_create(myaddr);
 638                 if (IS_ERR(client->msgr)) {
 639                         err = PTR_ERR(client->msgr);
 640                         client->msgr = NULL;
 641                         goto out;
 642                 }
 643                 client->msgr->nocrc = ceph_test_opt(client, NOCRC);
 644         }
 645
 646         /* send mount request, and wait for mon, mds, and osd maps */
 647         err = ceph_monc_request_mount(&client->monc);
 648         if (err < 0)
 649                 goto out;
 650
 651         while (!have_mon_map(client) && !client->mount_err) {
 652                 err = -EIO;
 653                 if (timeout && time_after_eq(jiffies, started + timeout))
 654                         goto out;
 655
 656                 /* wait */
 657                 dout("mount waiting for mount\n");
 658                 err = wait_event_interruptible_timeout(client->mount_wq,
 659                                client->mount_err || have_mon_map(client),
 660                                timeout);
 661                 if (err == -EINTR || err == -ERESTARTSYS)
 662                         goto out;
 663                 if (client->mount_err) {
 664                         err = client->mount_err;
 665                         goto out;
 666                 }
 667         }
 668
 669         dout("mount opening root\n");
 670         root = open_root_dentry(client, "", started);
 671         if (IS_ERR(root)) {
 672                 err = PTR_ERR(root);
 673                 goto out;
 674         }
 675         if (client->sb->s_root)
 676                 dput(root);
 677         else
 678                 client->sb->s_root = root;
 679
 680         if (path[0] == 0) {
 681                 dget(root);
 682         } else {
 683                 dout("mount opening base mountpoint\n");
 684                 root = open_root_dentry(client, path, started);
 685                 if (IS_ERR(root)) {
 686                         err = PTR_ERR(root);
 687                         dput(client->sb->s_root);
 688                         client->sb->s_root = NULL;
 689                         goto out;
 690                 }
 691         }
 692
 693         mnt->mnt_root = root;
 694         mnt->mnt_sb = client->sb;
 695
 696         client->mount_state = CEPH_MOUNT_MOUNTED;
 697         dout("mount success\n");
 698         err = 0;
 699
 700 out:
 701         mutex_unlock(&client->mount_mutex);
 702         return err;
 703 }
 704
 705 static int ceph_set_super(struct super_block *s, void *data)
 706 {
 707         struct ceph_client *client = data;
 708         int ret;
 709
 710         dout("set_super %p data %p\n", s, data);
 711
 712         s->s_flags = client->mount_args.sb_flags;
 713         s->s_maxbytes = 1ULL << 40;  /* temp value until we get mdsmap */
 714
 715         s->s_fs_info = client;
 716         client->sb = s;
 717
 718         s->s_op = &ceph_super_ops;
 719         s->s_export_op = &ceph_export_ops;
 720
 721         s->s_time_gran = 1000;  /* 1000 ns == 1 us */
 722
 723         ret = set_anon_super(s, NULL);  /* what is that second arg for? */
 724         if (ret != 0)
 725                 goto fail;
 726
 727         return ret;
 728
 729 fail:
 730         s->s_fs_info = NULL;
 731         client->sb = NULL;
 732         return ret;
 733 }
 734
 735 /*
 736  * share superblock if same fs AND options
 737  */
 738 static int ceph_compare_super(struct super_block *sb, void *data)
 739 {
 740         struct ceph_client *new = data;
 741         struct ceph_mount_args *args = &new->mount_args;
 742         struct ceph_client *other = ceph_sb_to_client(sb);
 743         int i;
 744
 745         dout("ceph_compare_super %p\n", sb);
 746         if (args->flags & CEPH_OPT_FSID) {
 747                 if (ceph_fsid_compare(&args->fsid, &other->fsid)) {
 748                         dout("fsid doesn't match\n");
 749                         return 0;
 750                 }
 751         } else {
 752                 /* do we share (a) monitor? */
 753                 for (i = 0; i < new->monc.monmap->num_mon; i++)
 754                         if (ceph_monmap_contains(other->monc.monmap,
 755                                          &new->monc.monmap->mon_inst[i].addr))
 756                                 break;
 757                 if (i == new->monc.monmap->num_mon) {
 758                         dout("mon ip not part of monmap\n");
 759                         return 0;
 760                 }
 761                 dout("mon ip matches existing sb %p\n", sb);
 762         }
 763         if (args->sb_flags != other->mount_args.sb_flags) {
 764                 dout("flags differ\n");
 765                 return 0;
 766         }
 767         return 1;
 768 }
 769
 770 /*
 771  * construct our own bdi so we can control readahead, etc.
 772  */
 773 static int ceph_init_bdi(struct super_block *sb, struct ceph_client *client)
 774 {
 775         int err;
 776
 777         err = bdi_init(&client->backing_dev_info);
 778         if (err < 0)
 779                 return err;
 780
 781         /* set ra_pages based on rsize mount option? */
 782         if (client->mount_args.rsize >= PAGE_CACHE_SIZE)
 783                 client->backing_dev_info.ra_pages =
 784                         (client->mount_args.rsize + PAGE_CACHE_SIZE - 1)
 785                         >> PAGE_SHIFT;
 786
 787         err = bdi_register_dev(&client->backing_dev_info, sb->s_dev);
 788         return err;
 789 }
 790
 791 static int ceph_get_sb(struct file_system_type *fs_type,
 792                        int flags, const char *dev_name, void *data,
 793                        struct vfsmount *mnt)
 794 {
 795         struct super_block *sb;
 796         struct ceph_client *client;
 797         int err;
 798         int (*compare_super)(struct super_block *, void *) = ceph_compare_super;
 799         const char *path;
 800
 801         dout("ceph_get_sb\n");
 802
 803         /* create client (which we may/may not use) */
 804         client = ceph_create_client();
 805         if (IS_ERR(client))
 806                 return PTR_ERR(client);
 807
 808         err = parse_mount_args(client, flags, data, dev_name, &path);
 809         if (err < 0)
 810                 goto out;
 811
 812         if (client->mount_args.flags & CEPH_OPT_NOSHARE)
 813                 compare_super = NULL;
 814         sb = sget(fs_type, compare_super, ceph_set_super, client);
 815         if (IS_ERR(sb)) {
 816                 err = PTR_ERR(sb);
 817                 goto out;
 818         }
 819
 820         if (ceph_client(sb) != client) {
 821                 ceph_destroy_client(client);
 822                 client = ceph_client(sb);
 823                 dout("get_sb got existing client %p\n", client);
 824         } else {
 825                 dout("get_sb using new client %p\n", client);
 826
 827                 /* set up mempools */
 828                 err = -ENOMEM;
 829                 client->wb_pagevec_pool = mempool_create_kmalloc_pool(10,
 830                               client->mount_args.wsize >> PAGE_CACHE_SHIFT);
 831                 if (!client->wb_pagevec_pool)
 832                         goto out_splat;
 833
 834                 err = ceph_init_bdi(sb, client);
 835                 if (err < 0)
 836                         goto out_splat;
 837         }
 838
 839         err = ceph_mount(client, mnt, path);
 840         if (err < 0)
 841                 goto out_splat;
 842         dout("root %p inode %p ino %llx.%llx\n", mnt->mnt_root,
 843              mnt->mnt_root->d_inode, ceph_vinop(mnt->mnt_root->d_inode));
 844         return 0;
 845
 846 out_splat:
 847         ceph_mdsc_close_sessions(&client->mdsc);
 848         up_write(&sb->s_umount);
 849         deactivate_super(sb);
 850         goto out_final;
 851
 852 out:
 853         ceph_destroy_client(client);
 854 out_final:
 855         dout("ceph_get_sb fail %d\n", err);
 856         return err;
 857 }
 858
 859 static void ceph_kill_sb(struct super_block *s)
 860 {
 861         struct ceph_client *client = ceph_sb_to_client(s);
 862         dout("kill_sb %p\n", s);
 863         ceph_mdsc_pre_umount(&client->mdsc);
 864         bdi_unregister(&client->backing_dev_info);
 865         kill_anon_super(s);    /* will call put_super after sb is r/o */
 866         bdi_destroy(&client->backing_dev_info);
 867         ceph_destroy_client(client);
 868 }
 869
 870 static struct file_system_type ceph_fs_type = {
 871         .owner          = THIS_MODULE,
 872         .name           = "ceph",
 873         .get_sb         = ceph_get_sb,
 874         .kill_sb        = ceph_kill_sb,
 875         .fs_flags       = FS_RENAME_DOES_D_MOVE,
 876 };
 877
 878 #define _STRINGIFY(x) #x
 879 #define STRINGIFY(x) _STRINGIFY(x)
 880
 881 static int __init init_ceph(void)
 882 {
 883         int ret = 0;
 884
 885         ret = ceph_debugfs_init();
 886         if (ret < 0)
 887                 goto out;
 888
 889         ret = ceph_msgr_init();
 890         if (ret < 0)
 891                 goto out_debugfs;
 892
 893         ret = init_caches();
 894         if (ret)
 895                 goto out_msgr;
 896
 897         ceph_caps_init();
 898
 899         ret = register_filesystem(&ceph_fs_type);
 900         if (ret)
 901                 goto out_icache;
 902
 903         pr_info("loaded %d.%d.%d (mon/mds/osd proto %d/%d/%d)\n",
 904                 CEPH_VERSION_MAJOR, CEPH_VERSION_MINOR, CEPH_VERSION_PATCH,
 905                 CEPH_MONC_PROTOCOL, CEPH_MDSC_PROTOCOL, CEPH_OSDC_PROTOCOL);
 906         return 0;
 907
 908 out_icache:
 909         destroy_caches();
 910 out_msgr:
 911         ceph_msgr_exit();
 912 out_debugfs:
 913         ceph_debugfs_cleanup();
 914 out:
 915         return ret;
 916 }
 917
 918 static void __exit exit_ceph(void)
 919 {
 920         dout("exit_ceph\n");
 921         unregister_filesystem(&ceph_fs_type);
 922         ceph_caps_finalize();
 923         destroy_caches();
 924         ceph_msgr_exit();
 925         ceph_debugfs_cleanup();
 926 }
 927
 928 module_init(init_ceph);
 929 module_exit(exit_ceph);
 930
 931 MODULE_AUTHOR("Sage Weil <sage@newdream.net>");
 932 MODULE_AUTHOR("Yehuda Sadeh <yehuda@hq.newdream.net>");
 933 MODULE_AUTHOR("Patience Warnick <patience@newdream.net>");
 934 MODULE_DESCRIPTION("Ceph filesystem for Linux");
 935 MODULE_LICENSE("GPL");