[PATCH] uml: add host AIO support to block driver
[safe/jmp/linux-2.6] / arch / um / drivers / ubd_kern.c
1 /* 
2  * Copyright (C) 2000 Jeff Dike (jdike@karaya.com)
3  * Licensed under the GPL
4  */
5
6 /* 2001-09-28...2002-04-17
7  * Partition stuff by James_McMechan@hotmail.com
8  * old style ubd by setting UBD_SHIFT to 0
9  * 2002-09-27...2002-10-18 massive tinkering for 2.5
10  * partitions have changed in 2.5
11  * 2003-01-29 more tinkering for 2.5.59-1
12  * This should now address the sysfs problems and has
13  * the symlink for devfs to allow for booting with
14  * the common /dev/ubd/discX/... names rather than
15  * only /dev/ubdN/discN this version also has lots of
16  * clean ups preparing for ubd-many.
17  * James McMechan
18  */
19
20 #define MAJOR_NR UBD_MAJOR
21 #define UBD_SHIFT 4
22
23 #include "linux/config.h"
24 #include "linux/module.h"
25 #include "linux/blkdev.h"
26 #include "linux/hdreg.h"
27 #include "linux/init.h"
28 #include "linux/devfs_fs_kernel.h"
29 #include "linux/cdrom.h"
30 #include "linux/proc_fs.h"
31 #include "linux/ctype.h"
32 #include "linux/capability.h"
33 #include "linux/mm.h"
34 #include "linux/vmalloc.h"
35 #include "linux/blkpg.h"
36 #include "linux/genhd.h"
37 #include "linux/spinlock.h"
38 #include "asm/atomic.h"
39 #include "asm/segment.h"
40 #include "asm/uaccess.h"
41 #include "asm/irq.h"
42 #include "asm/types.h"
43 #include "asm/tlbflush.h"
44 #include "user_util.h"
45 #include "mem_user.h"
46 #include "kern_util.h"
47 #include "kern.h"
48 #include "mconsole_kern.h"
49 #include "init.h"
50 #include "irq_user.h"
51 #include "irq_kern.h"
52 #include "ubd_user.h"
53 #include "os.h"
54 #include "mem.h"
55 #include "mem_kern.h"
56 #include "cow.h"
57 #include "aio.h"
58
59 enum ubd_req { UBD_READ, UBD_WRITE };
60
61 struct io_thread_req {
62         enum aio_type op;
63         int fds[2];
64         unsigned long offsets[2];
65         unsigned long long offset;
66         unsigned long length;
67         char *buffer;
68         int sectorsize;
69         int bitmap_offset;
70         long bitmap_start;
71         long bitmap_end;
72         int error;
73 };
74
75 extern int open_ubd_file(char *file, struct openflags *openflags,
76                          char **backing_file_out, int *bitmap_offset_out,
77                          unsigned long *bitmap_len_out, int *data_offset_out,
78                          int *create_cow_out);
79 extern int create_cow_file(char *cow_file, char *backing_file,
80                            struct openflags flags, int sectorsize,
81                            int alignment, int *bitmap_offset_out,
82                            unsigned long *bitmap_len_out,
83                            int *data_offset_out);
84 extern int read_cow_bitmap(int fd, void *buf, int offset, int len);
85 extern void do_io(struct io_thread_req *req, struct request *r,
86                   unsigned long *bitmap);
87
88 static inline int ubd_test_bit(__u64 bit, void *data)
89 {
90         unsigned char *buffer = data;
91         __u64 n;
92         int bits, off;
93
94         bits = sizeof(buffer[0]) * 8;
95         n = bit / bits;
96         off = bit % bits;
97         return((buffer[n] & (1 << off)) != 0);
98 }
99
100 static inline void ubd_set_bit(__u64 bit, void *data)
101 {
102         unsigned char *buffer = data;
103         __u64 n;
104         int bits, off;
105
106         bits = sizeof(buffer[0]) * 8;
107         n = bit / bits;
108         off = bit % bits;
109         buffer[n] |= (1 << off);
110 }
111 /*End stuff from ubd_user.h*/
112
113 #define DRIVER_NAME "uml-blkdev"
114
115 static DEFINE_SPINLOCK(ubd_io_lock);
116 static DEFINE_SPINLOCK(ubd_lock);
117
118 static int ubd_open(struct inode * inode, struct file * filp);
119 static int ubd_release(struct inode * inode, struct file * file);
120 static int ubd_ioctl(struct inode * inode, struct file * file,
121                      unsigned int cmd, unsigned long arg);
122
123 #define MAX_DEV (8)
124
125 static struct block_device_operations ubd_blops = {
126         .owner          = THIS_MODULE,
127         .open           = ubd_open,
128         .release        = ubd_release,
129         .ioctl          = ubd_ioctl,
130 };
131
132 /* Protected by the queue_lock */
133 static request_queue_t *ubd_queue;
134
135 /* Protected by ubd_lock */
136 static int fake_major = MAJOR_NR;
137
138 static struct gendisk *ubd_gendisk[MAX_DEV];
139 static struct gendisk *fake_gendisk[MAX_DEV];
140  
141 #ifdef CONFIG_BLK_DEV_UBD_SYNC
142 #define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 1, .c = 0, \
143                                          .cl = 1 })
144 #else
145 #define OPEN_FLAGS ((struct openflags) { .r = 1, .w = 1, .s = 0, .c = 0, \
146                                          .cl = 1 })
147 #endif
148
149 /* Not protected - changed only in ubd_setup_common and then only to
150  * to enable O_SYNC.
151  */
152 static struct openflags global_openflags = OPEN_FLAGS;
153
154 struct cow {
155         /* This is the backing file, actually */
156         char *file;
157         int fd;
158         unsigned long *bitmap;
159         unsigned long bitmap_len;
160         int bitmap_offset;
161         int data_offset;
162 };
163
164 #define MAX_SG 64
165
166 struct ubd {
167         char *file;
168         int count;
169         int fd;
170         __u64 size;
171         struct openflags boot_openflags;
172         struct openflags openflags;
173         int no_cow;
174         struct cow cow;
175         struct platform_device pdev;
176         struct scatterlist sg[MAX_SG];
177 };
178
179 #define DEFAULT_COW { \
180         .file =                 NULL, \
181         .fd =                   -1, \
182         .bitmap =               NULL, \
183         .bitmap_offset =        0, \
184         .data_offset =          0, \
185 }
186
187 #define DEFAULT_UBD { \
188         .file =                 NULL, \
189         .count =                0, \
190         .fd =                   -1, \
191         .size =                 -1, \
192         .boot_openflags =       OPEN_FLAGS, \
193         .openflags =            OPEN_FLAGS, \
194         .no_cow =               0, \
195         .cow =                  DEFAULT_COW, \
196 }
197
198 struct ubd ubd_dev[MAX_DEV] = { [ 0 ... MAX_DEV - 1 ] = DEFAULT_UBD };
199
200 static int ubd0_init(void)
201 {
202         struct ubd *dev = &ubd_dev[0];
203
204         if(dev->file == NULL)
205                 dev->file = "root_fs";
206         return(0);
207 }
208
209 __initcall(ubd0_init);
210
211 /* Only changed by fake_ide_setup which is a setup */
212 static int fake_ide = 0;
213 static struct proc_dir_entry *proc_ide_root = NULL;
214 static struct proc_dir_entry *proc_ide = NULL;
215
216 static void make_proc_ide(void)
217 {
218         proc_ide_root = proc_mkdir("ide", NULL);
219         proc_ide = proc_mkdir("ide0", proc_ide_root);
220 }
221
222 static int proc_ide_read_media(char *page, char **start, off_t off, int count,
223                                int *eof, void *data)
224 {
225         int len;
226
227         strcpy(page, "disk\n");
228         len = strlen("disk\n");
229         len -= off;
230         if (len < count){
231                 *eof = 1;
232                 if (len <= 0) return 0;
233         }
234         else len = count;
235         *start = page + off;
236         return len;
237 }
238
239 static void make_ide_entries(char *dev_name)
240 {
241         struct proc_dir_entry *dir, *ent;
242         char name[64];
243
244         if(proc_ide_root == NULL) make_proc_ide();
245
246         dir = proc_mkdir(dev_name, proc_ide);
247         if(!dir) return;
248
249         ent = create_proc_entry("media", S_IFREG|S_IRUGO, dir);
250         if(!ent) return;
251         ent->nlink = 1;
252         ent->data = NULL;
253         ent->read_proc = proc_ide_read_media;
254         ent->write_proc = NULL;
255         sprintf(name,"ide0/%s", dev_name);
256         proc_symlink(dev_name, proc_ide_root, name);
257 }
258
259 static int fake_ide_setup(char *str)
260 {
261         fake_ide = 1;
262         return(1);
263 }
264
265 __setup("fake_ide", fake_ide_setup);
266
267 __uml_help(fake_ide_setup,
268 "fake_ide\n"
269 "    Create ide0 entries that map onto ubd devices.\n\n"
270 );
271
272 static int parse_unit(char **ptr)
273 {
274         char *str = *ptr, *end;
275         int n = -1;
276
277         if(isdigit(*str)) {
278                 n = simple_strtoul(str, &end, 0);
279                 if(end == str)
280                         return(-1);
281                 *ptr = end;
282         }
283         else if (('a' <= *str) && (*str <= 'h')) {
284                 n = *str - 'a';
285                 str++;
286                 *ptr = str;
287         }
288         return(n);
289 }
290
291 static int ubd_setup_common(char *str, int *index_out)
292 {
293         struct ubd *dev;
294         struct openflags flags = global_openflags;
295         char *backing_file;
296         int n, err, i;
297
298         if(index_out) *index_out = -1;
299         n = *str;
300         if(n == '='){
301                 char *end;
302                 int major;
303
304                 str++;
305                 if(!strcmp(str, "sync")){
306                         global_openflags = of_sync(global_openflags);
307                         return(0);
308                 }
309                 major = simple_strtoul(str, &end, 0);
310                 if((*end != '\0') || (end == str)){
311                         printk(KERN_ERR 
312                                "ubd_setup : didn't parse major number\n");
313                         return(1);
314                 }
315
316                 err = 1;
317                 spin_lock(&ubd_lock);
318                 if(fake_major != MAJOR_NR){
319                         printk(KERN_ERR "Can't assign a fake major twice\n");
320                         goto out1;
321                 }
322  
323                 fake_major = major;
324
325                 printk(KERN_INFO "Setting extra ubd major number to %d\n",
326                        major);
327                 err = 0;
328         out1:
329                 spin_unlock(&ubd_lock);
330                 return(err);
331         }
332
333         n = parse_unit(&str);
334         if(n < 0){
335                 printk(KERN_ERR "ubd_setup : couldn't parse unit number "
336                        "'%s'\n", str);
337                 return(1);
338         }
339         if(n >= MAX_DEV){
340                 printk(KERN_ERR "ubd_setup : index %d out of range "
341                        "(%d devices, from 0 to %d)\n", n, MAX_DEV, MAX_DEV - 1);
342                 return(1);
343         }
344
345         err = 1;
346         spin_lock(&ubd_lock);
347
348         dev = &ubd_dev[n];
349         if(dev->file != NULL){
350                 printk(KERN_ERR "ubd_setup : device already configured\n");
351                 goto out;
352         }
353
354         if (index_out)
355                 *index_out = n;
356
357         for (i = 0; i < 4; i++) {
358                 switch (*str) {
359                 case 'r':
360                         flags.w = 0;
361                         break;
362                 case 's':
363                         flags.s = 1;
364                         break;
365                 case 'd':
366                         dev->no_cow = 1;
367                         break;
368                 case '=':
369                         str++;
370                         goto break_loop;
371                 default:
372                         printk(KERN_ERR "ubd_setup : Expected '=' or flag letter (r,s or d)\n");
373                         goto out;
374                 }
375                 str++;
376         }
377
378         if (*str == '=')
379                 printk(KERN_ERR "ubd_setup : Too many flags specified\n");
380         else
381                 printk(KERN_ERR "ubd_setup : Expected '='\n");
382         goto out;
383
384 break_loop:
385         err = 0;
386         backing_file = strchr(str, ',');
387
388         if (!backing_file) {
389                 backing_file = strchr(str, ':');
390         }
391
392         if(backing_file){
393                 if(dev->no_cow)
394                         printk(KERN_ERR "Can't specify both 'd' and a "
395                                "cow file\n");
396                 else {
397                         *backing_file = '\0';
398                         backing_file++;
399                 }
400         }
401         dev->file = str;
402         dev->cow.file = backing_file;
403         dev->boot_openflags = flags;
404 out:
405         spin_unlock(&ubd_lock);
406         return(err);
407 }
408
409 static int ubd_setup(char *str)
410 {
411         ubd_setup_common(str, NULL);
412         return(1);
413 }
414
415 __setup("ubd", ubd_setup);
416 __uml_help(ubd_setup,
417 "ubd<n><flags>=<filename>[(:|,)<filename2>]\n"
418 "    This is used to associate a device with a file in the underlying\n"
419 "    filesystem. When specifying two filenames, the first one is the\n"
420 "    COW name and the second is the backing file name. As separator you can\n"
421 "    use either a ':' or a ',': the first one allows writing things like;\n"
422 "       ubd0=~/Uml/root_cow:~/Uml/root_backing_file\n"
423 "    while with a ',' the shell would not expand the 2nd '~'.\n"
424 "    When using only one filename, UML will detect whether to thread it like\n"
425 "    a COW file or a backing file. To override this detection, add the 'd'\n"
426 "    flag:\n"
427 "       ubd0d=BackingFile\n"
428 "    Usually, there is a filesystem in the file, but \n"
429 "    that's not required. Swap devices containing swap files can be\n"
430 "    specified like this. Also, a file which doesn't contain a\n"
431 "    filesystem can have its contents read in the virtual \n"
432 "    machine by running 'dd' on the device. <n> must be in the range\n"
433 "    0 to 7. Appending an 'r' to the number will cause that device\n"
434 "    to be mounted read-only. For example ubd1r=./ext_fs. Appending\n"
435 "    an 's' will cause data to be written to disk on the host immediately.\n\n"
436 );
437
438 static int udb_setup(char *str)
439 {
440         printk("udb%s specified on command line is almost certainly a ubd -> "
441                "udb TYPO\n", str);
442         return(1);
443 }
444
445 __setup("udb", udb_setup);
446 __uml_help(udb_setup,
447 "udb\n"
448 "    This option is here solely to catch ubd -> udb typos, which can be\n"
449 "    to impossible to catch visually unless you specifically look for\n"
450 "    them.  The only result of any option starting with 'udb' is an error\n"
451 "    in the boot output.\n\n"
452 );
453
454 static int fakehd_set = 0;
455 static int fakehd(char *str)
456 {
457         printk(KERN_INFO "fakehd : Changing ubd name to \"hd\".\n");
458         fakehd_set = 1;
459         return 1;
460 }
461
462 __setup("fakehd", fakehd);
463 __uml_help(fakehd,
464 "fakehd\n"
465 "    Change the ubd device name to \"hd\".\n\n"
466 );
467
468 static void do_ubd_request(request_queue_t * q);
469 static int in_ubd;
470
471 /* Changed by ubd_handler, which is serialized because interrupts only
472  * happen on CPU 0.
473  */
474 int intr_count = 0;
475
476 static void ubd_end_request(struct request *req, int bytes, int uptodate)
477 {
478         if (!end_that_request_first(req, uptodate, bytes >> 9)) {
479                 add_disk_randomness(req->rq_disk);
480                 end_that_request_last(req);
481         }
482 }
483
484 /* call ubd_finish if you need to serialize */
485 static void __ubd_finish(struct request *req, int bytes)
486 {
487         if(bytes < 0){
488                 ubd_end_request(req, 0, 0);
489                 return;
490         }
491
492         ubd_end_request(req, bytes, 1);
493 }
494
495 static inline void ubd_finish(struct request *req, int bytes)
496 {
497         spin_lock(&ubd_io_lock);
498         __ubd_finish(req, bytes);
499         spin_unlock(&ubd_io_lock);
500 }
501
502 struct bitmap_io {
503         atomic_t count;
504         struct aio_context aio;
505 };
506
507 struct ubd_aio {
508         struct aio_context aio;
509         struct request *req;
510         int len;
511         struct bitmap_io *bitmap;
512         void *bitmap_buf;
513 };
514
515 static int ubd_reply_fd = -1;
516
517 static irqreturn_t ubd_intr(int irq, void *dev, struct pt_regs *unused)
518 {
519         struct aio_thread_reply reply;
520         struct ubd_aio *aio;
521         struct request *req;
522         int err, n, fd = (int) (long) dev;
523
524         while(1){
525                 err = os_read_file(fd, &reply, sizeof(reply));
526                 if(err == -EAGAIN)
527                         break;
528                 if(err < 0){
529                         printk("ubd_aio_handler - read returned err %d\n",
530                                -err);
531                         break;
532                 }
533
534                 aio = container_of(reply.data, struct ubd_aio, aio);
535                 n = reply.err;
536
537                 if(n == 0){
538                         req = aio->req;
539                         req->nr_sectors -= aio->len >> 9;
540
541                         if((aio->bitmap != NULL) &&
542                            (atomic_dec_and_test(&aio->bitmap->count))){
543                                 aio->aio = aio->bitmap->aio;
544                                 aio->len = 0;
545                                 kfree(aio->bitmap);
546                                 aio->bitmap = NULL;
547                                 submit_aio(&aio->aio);
548                         }
549                         else {
550                                 if((req->nr_sectors == 0) &&
551                                    (aio->bitmap == NULL)){
552                                         int len = req->hard_nr_sectors << 9;
553                                         ubd_finish(req, len);
554                                 }
555
556                                 if(aio->bitmap_buf != NULL)
557                                         kfree(aio->bitmap_buf);
558                                 kfree(aio);
559                         }
560                 }
561                 else if(n < 0){
562                         ubd_finish(aio->req, n);
563                         if(aio->bitmap != NULL)
564                                 kfree(aio->bitmap);
565                         if(aio->bitmap_buf != NULL)
566                                 kfree(aio->bitmap_buf);
567                         kfree(aio);
568                 }
569         }
570         reactivate_fd(fd, UBD_IRQ);
571
572         do_ubd_request(ubd_queue);
573
574         return(IRQ_HANDLED);
575 }
576
577 static int ubd_file_size(struct ubd *dev, __u64 *size_out)
578 {
579         char *file;
580
581         file = dev->cow.file ? dev->cow.file : dev->file;
582         return(os_file_size(file, size_out));
583 }
584
585 static void ubd_close(struct ubd *dev)
586 {
587         os_close_file(dev->fd);
588         if(dev->cow.file == NULL)
589                 return;
590
591         os_close_file(dev->cow.fd);
592         vfree(dev->cow.bitmap);
593         dev->cow.bitmap = NULL;
594 }
595
596 static int ubd_open_dev(struct ubd *dev)
597 {
598         struct openflags flags;
599         char **back_ptr;
600         int err, create_cow, *create_ptr;
601
602         dev->openflags = dev->boot_openflags;
603         create_cow = 0;
604         create_ptr = (dev->cow.file != NULL) ? &create_cow : NULL;
605         back_ptr = dev->no_cow ? NULL : &dev->cow.file;
606         dev->fd = open_ubd_file(dev->file, &dev->openflags, back_ptr,
607                                 &dev->cow.bitmap_offset, &dev->cow.bitmap_len, 
608                                 &dev->cow.data_offset, create_ptr);
609
610         if((dev->fd == -ENOENT) && create_cow){
611                 dev->fd = create_cow_file(dev->file, dev->cow.file,
612                                           dev->openflags, 1 << 9, PAGE_SIZE,
613                                           &dev->cow.bitmap_offset, 
614                                           &dev->cow.bitmap_len,
615                                           &dev->cow.data_offset);
616                 if(dev->fd >= 0){
617                         printk(KERN_INFO "Creating \"%s\" as COW file for "
618                                "\"%s\"\n", dev->file, dev->cow.file);
619                 }
620         }
621
622         if(dev->fd < 0){
623                 printk("Failed to open '%s', errno = %d\n", dev->file,
624                        -dev->fd);
625                 return(dev->fd);
626         }
627
628         if(dev->cow.file != NULL){
629                 err = -ENOMEM;
630                 dev->cow.bitmap = (void *) vmalloc(dev->cow.bitmap_len);
631                 if(dev->cow.bitmap == NULL){
632                         printk(KERN_ERR "Failed to vmalloc COW bitmap\n");
633                         goto error;
634                 }
635                 flush_tlb_kernel_vm();
636
637                 err = read_cow_bitmap(dev->fd, dev->cow.bitmap, 
638                                       dev->cow.bitmap_offset, 
639                                       dev->cow.bitmap_len);
640                 if(err < 0)
641                         goto error;
642
643                 flags = dev->openflags;
644                 flags.w = 0;
645                 err = open_ubd_file(dev->cow.file, &flags, NULL, NULL, NULL, 
646                                     NULL, NULL);
647                 if(err < 0) goto error;
648                 dev->cow.fd = err;
649         }
650         return(0);
651  error:
652         os_close_file(dev->fd);
653         return(err);
654 }
655
656 static int ubd_new_disk(int major, u64 size, int unit,
657                         struct gendisk **disk_out)
658                         
659 {
660         struct gendisk *disk;
661         char from[sizeof("ubd/nnnnn\0")], to[sizeof("discnnnnn/disc\0")];
662         int err;
663
664         disk = alloc_disk(1 << UBD_SHIFT);
665         if(disk == NULL)
666                 return(-ENOMEM);
667
668         disk->major = major;
669         disk->first_minor = unit << UBD_SHIFT;
670         disk->fops = &ubd_blops;
671         set_capacity(disk, size / 512);
672         if(major == MAJOR_NR){
673                 sprintf(disk->disk_name, "ubd%c", 'a' + unit);
674                 sprintf(disk->devfs_name, "ubd/disc%d", unit);
675                 sprintf(from, "ubd/%d", unit);
676                 sprintf(to, "disc%d/disc", unit);
677                 err = devfs_mk_symlink(from, to);
678                 if(err)
679                         printk("ubd_new_disk failed to make link from %s to "
680                                "%s, error = %d\n", from, to, err);
681         }
682         else {
683                 sprintf(disk->disk_name, "ubd_fake%d", unit);
684                 sprintf(disk->devfs_name, "ubd_fake/disc%d", unit);
685         }
686
687         /* sysfs register (not for ide fake devices) */
688         if (major == MAJOR_NR) {
689                 ubd_dev[unit].pdev.id   = unit;
690                 ubd_dev[unit].pdev.name = DRIVER_NAME;
691                 platform_device_register(&ubd_dev[unit].pdev);
692                 disk->driverfs_dev = &ubd_dev[unit].pdev.dev;
693         }
694
695         disk->private_data = &ubd_dev[unit];
696         disk->queue = ubd_queue;
697         add_disk(disk);
698
699         *disk_out = disk;
700         return 0;
701 }
702
703 #define ROUND_BLOCK(n) ((n + ((1 << 9) - 1)) & (-1 << 9))
704
705 static int ubd_add(int n)
706 {
707         struct ubd *dev = &ubd_dev[n];
708         int err;
709
710         err = -ENODEV;
711         if(dev->file == NULL)
712                 goto out;
713
714         if (ubd_open_dev(dev))
715                 goto out;
716
717         err = ubd_file_size(dev, &dev->size);
718         if(err < 0)
719                 goto out_close;
720
721         dev->size = ROUND_BLOCK(dev->size);
722
723         err = ubd_new_disk(MAJOR_NR, dev->size, n, &ubd_gendisk[n]);
724         if(err) 
725                 goto out_close;
726  
727         if(fake_major != MAJOR_NR)
728                 ubd_new_disk(fake_major, dev->size, n, 
729                              &fake_gendisk[n]);
730
731         /* perhaps this should also be under the "if (fake_major)" above */
732         /* using the fake_disk->disk_name and also the fakehd_set name */
733         if (fake_ide)
734                 make_ide_entries(ubd_gendisk[n]->disk_name);
735
736         err = 0;
737 out_close:
738         ubd_close(dev);
739 out:
740         return err;
741 }
742
743 static int ubd_config(char *str)
744 {
745         int n, err;
746
747         str = uml_strdup(str);
748         if(str == NULL){
749                 printk(KERN_ERR "ubd_config failed to strdup string\n");
750                 return(1);
751         }
752         err = ubd_setup_common(str, &n);
753         if(err){
754                 kfree(str);
755                 return(-1);
756         }
757         if(n == -1) return(0);
758
759         spin_lock(&ubd_lock);
760         err = ubd_add(n);
761         if(err)
762                 ubd_dev[n].file = NULL;
763         spin_unlock(&ubd_lock);
764
765         return(err);
766 }
767
768 static int ubd_get_config(char *name, char *str, int size, char **error_out)
769 {
770         struct ubd *dev;
771         int n, len = 0;
772
773         n = parse_unit(&name);
774         if((n >= MAX_DEV) || (n < 0)){
775                 *error_out = "ubd_get_config : device number out of range";
776                 return(-1);
777         }
778
779         dev = &ubd_dev[n];
780         spin_lock(&ubd_lock);
781
782         if(dev->file == NULL){
783                 CONFIG_CHUNK(str, size, len, "", 1);
784                 goto out;
785         }
786
787         CONFIG_CHUNK(str, size, len, dev->file, 0);
788
789         if(dev->cow.file != NULL){
790                 CONFIG_CHUNK(str, size, len, ",", 0);
791                 CONFIG_CHUNK(str, size, len, dev->cow.file, 1);
792         }
793         else CONFIG_CHUNK(str, size, len, "", 1);
794
795  out:
796         spin_unlock(&ubd_lock);
797         return(len);
798 }
799
800 static int ubd_id(char **str, int *start_out, int *end_out)
801 {
802         int n;
803
804         n = parse_unit(str);
805         *start_out = 0;
806         *end_out = MAX_DEV - 1;
807         return n;
808 }
809
810 static int ubd_remove(int n)
811 {
812         struct ubd *dev;
813         int err = -ENODEV;
814
815         spin_lock(&ubd_lock);
816
817         if(ubd_gendisk[n] == NULL)
818                 goto out;
819
820         dev = &ubd_dev[n];
821
822         if(dev->file == NULL)
823                 goto out;
824
825         /* you cannot remove a open disk */
826         err = -EBUSY;
827         if(dev->count > 0)
828                 goto out;
829
830         del_gendisk(ubd_gendisk[n]);
831         put_disk(ubd_gendisk[n]);
832         ubd_gendisk[n] = NULL;
833
834         if(fake_gendisk[n] != NULL){
835                 del_gendisk(fake_gendisk[n]);
836                 put_disk(fake_gendisk[n]);
837                 fake_gendisk[n] = NULL;
838         }
839
840         platform_device_unregister(&dev->pdev);
841         *dev = ((struct ubd) DEFAULT_UBD);
842         err = 0;
843 out:
844         spin_unlock(&ubd_lock);
845         return err;
846 }
847
848 static struct mc_device ubd_mc = {
849         .name           = "ubd",
850         .config         = ubd_config,
851         .get_config     = ubd_get_config,
852         .id             = ubd_id,
853         .remove         = ubd_remove,
854 };
855
856 static int ubd_mc_init(void)
857 {
858         mconsole_register_dev(&ubd_mc);
859         return 0;
860 }
861
862 __initcall(ubd_mc_init);
863
864 static struct device_driver ubd_driver = {
865         .name  = DRIVER_NAME,
866         .bus   = &platform_bus_type,
867 };
868
869 int ubd_init(void)
870 {
871         int i;
872
873         ubd_reply_fd = init_aio_irq(UBD_IRQ, "ubd", ubd_intr);
874         if(ubd_reply_fd < 0)
875                 printk("Setting up ubd AIO failed, err = %d\n", ubd_reply_fd);
876
877         devfs_mk_dir("ubd");
878         if (register_blkdev(MAJOR_NR, "ubd"))
879                 return -1;
880
881         ubd_queue = blk_init_queue(do_ubd_request, &ubd_io_lock);
882         if (!ubd_queue) {
883                 unregister_blkdev(MAJOR_NR, "ubd");
884                 return -1;
885         }
886                 
887         blk_queue_max_hw_segments(ubd_queue, MAX_SG);
888         if (fake_major != MAJOR_NR) {
889                 char name[sizeof("ubd_nnn\0")];
890
891                 snprintf(name, sizeof(name), "ubd_%d", fake_major);
892                 devfs_mk_dir(name);
893                 if (register_blkdev(fake_major, "ubd"))
894                         return -1;
895         }
896         driver_register(&ubd_driver);
897         for (i = 0; i < MAX_DEV; i++) 
898                 ubd_add(i);
899
900         return 0;
901 }
902
903 late_initcall(ubd_init);
904
905 static int ubd_open(struct inode *inode, struct file *filp)
906 {
907         struct gendisk *disk = inode->i_bdev->bd_disk;
908         struct ubd *dev = disk->private_data;
909         int err = 0;
910
911         if(dev->count == 0){
912                 err = ubd_open_dev(dev);
913                 if(err){
914                         printk(KERN_ERR "%s: Can't open \"%s\": errno = %d\n",
915                                disk->disk_name, dev->file, -err);
916                         goto out;
917                 }
918         }
919         dev->count++;
920         set_disk_ro(disk, !dev->openflags.w);
921
922         /* This should no more be needed. And it didn't work anyway to exclude
923          * read-write remounting of filesystems.*/
924         /*if((filp->f_mode & FMODE_WRITE) && !dev->openflags.w){
925                 if(--dev->count == 0) ubd_close(dev);
926                 err = -EROFS;
927         }*/
928  out:
929         return(err);
930 }
931
932 static int ubd_release(struct inode * inode, struct file * file)
933 {
934         struct gendisk *disk = inode->i_bdev->bd_disk;
935         struct ubd *dev = disk->private_data;
936
937         if(--dev->count == 0)
938                 ubd_close(dev);
939         return(0);
940 }
941
942 static void cowify_bitmap(struct io_thread_req *req, unsigned long *bitmap)
943 {
944         __u64 sector = req->offset / req->sectorsize;
945         int i;
946
947         for(i = 0; i < req->length / req->sectorsize; i++){
948                 if(ubd_test_bit(sector + i, bitmap))
949                         continue;
950
951                 if(req->bitmap_start == -1)
952                         req->bitmap_start = sector + i;
953                 req->bitmap_end = sector + i + 1;
954
955                 ubd_set_bit(sector + i, bitmap);
956         }
957 }
958
959 /* Called with ubd_io_lock held */
960 static int prepare_request(struct request *req, struct io_thread_req *io_req,
961                            unsigned long long offset, int page_offset,
962                            int len, struct page *page)
963 {
964         struct gendisk *disk = req->rq_disk;
965         struct ubd *dev = disk->private_data;
966
967         /* This should be impossible now */
968         if((rq_data_dir(req) == WRITE) && !dev->openflags.w){
969                 printk("Write attempted on readonly ubd device %s\n", 
970                        disk->disk_name);
971                 ubd_end_request(req, 0, 0);
972                 return(1);
973         }
974
975         io_req->fds[0] = (dev->cow.file != NULL) ? dev->cow.fd : dev->fd;
976         io_req->fds[1] = dev->fd;
977         io_req->offset = offset;
978         io_req->length = len;
979         io_req->error = 0;
980         io_req->op = (rq_data_dir(req) == READ) ? AIO_READ : AIO_WRITE;
981         io_req->offsets[0] = 0;
982         io_req->offsets[1] = dev->cow.data_offset;
983         io_req->buffer = page_address(page) + page_offset;
984         io_req->sectorsize = 1 << 9;
985         io_req->bitmap_offset = dev->cow.bitmap_offset;
986         io_req->bitmap_start = -1;
987         io_req->bitmap_end = -1;
988
989         if((dev->cow.file != NULL) && (io_req->op == UBD_WRITE))
990                 cowify_bitmap(io_req, dev->cow.bitmap);
991         return(0);
992 }
993
994 /* Called with ubd_io_lock held */
995 static void do_ubd_request(request_queue_t *q)
996 {
997         struct io_thread_req io_req;
998         struct request *req;
999         __u64 sector;
1000         int err;
1001
1002         if(in_ubd)
1003                 return;
1004         in_ubd = 1;
1005         while((req = elv_next_request(q)) != NULL){
1006                 struct gendisk *disk = req->rq_disk;
1007                 struct ubd *dev = disk->private_data;
1008                 int n, i;
1009
1010                 blkdev_dequeue_request(req);
1011
1012                 sector = req->sector;
1013                 n = blk_rq_map_sg(q, req, dev->sg);
1014
1015                 for(i = 0; i < n; i++){
1016                         struct scatterlist *sg = &dev->sg[i];
1017
1018                         err = prepare_request(req, &io_req, sector << 9,
1019                                               sg->offset, sg->length,
1020                                               sg->page);
1021                         if(err)
1022                                 continue;
1023
1024                         sector += sg->length >> 9;
1025                         do_io(&io_req, req, dev->cow.bitmap);
1026                 }
1027         }
1028         in_ubd = 0;
1029 }
1030
1031 static int ubd_ioctl(struct inode * inode, struct file * file,
1032                      unsigned int cmd, unsigned long arg)
1033 {
1034         struct hd_geometry __user *loc = (struct hd_geometry __user *) arg;
1035         struct ubd *dev = inode->i_bdev->bd_disk->private_data;
1036         struct hd_driveid ubd_id = {
1037                 .cyls           = 0,
1038                 .heads          = 128,
1039                 .sectors        = 32,
1040         };
1041
1042         switch (cmd) {
1043                 struct hd_geometry g;
1044                 struct cdrom_volctrl volume;
1045         case HDIO_GETGEO:
1046                 if(!loc) return(-EINVAL);
1047                 g.heads = 128;
1048                 g.sectors = 32;
1049                 g.cylinders = dev->size / (128 * 32 * 512);
1050                 g.start = get_start_sect(inode->i_bdev);
1051                 return(copy_to_user(loc, &g, sizeof(g)) ? -EFAULT : 0);
1052
1053         case HDIO_GET_IDENTITY:
1054                 ubd_id.cyls = dev->size / (128 * 32 * 512);
1055                 if(copy_to_user((char __user *) arg, (char *) &ubd_id,
1056                                  sizeof(ubd_id)))
1057                         return(-EFAULT);
1058                 return(0);
1059                 
1060         case CDROMVOLREAD:
1061                 if(copy_from_user(&volume, (char __user *) arg, sizeof(volume)))
1062                         return(-EFAULT);
1063                 volume.channel0 = 255;
1064                 volume.channel1 = 255;
1065                 volume.channel2 = 255;
1066                 volume.channel3 = 255;
1067                 if(copy_to_user((char __user *) arg, &volume, sizeof(volume)))
1068                         return(-EFAULT);
1069                 return(0);
1070         }
1071         return(-EINVAL);
1072 }
1073
1074 static int same_backing_files(char *from_cmdline, char *from_cow, char *cow)
1075 {
1076         struct uml_stat buf1, buf2;
1077         int err;
1078
1079         if(from_cmdline == NULL) return(1);
1080         if(!strcmp(from_cmdline, from_cow)) return(1);
1081
1082         err = os_stat_file(from_cmdline, &buf1);
1083         if(err < 0){
1084                 printk("Couldn't stat '%s', err = %d\n", from_cmdline, -err);
1085                 return(1);
1086         }
1087         err = os_stat_file(from_cow, &buf2);
1088         if(err < 0){
1089                 printk("Couldn't stat '%s', err = %d\n", from_cow, -err);
1090                 return(1);
1091         }
1092         if((buf1.ust_dev == buf2.ust_dev) && (buf1.ust_ino == buf2.ust_ino))
1093                 return(1);
1094
1095         printk("Backing file mismatch - \"%s\" requested,\n"
1096                "\"%s\" specified in COW header of \"%s\"\n",
1097                from_cmdline, from_cow, cow);
1098         return(0);
1099 }
1100
1101 static int backing_file_mismatch(char *file, __u64 size, time_t mtime)
1102 {
1103         unsigned long modtime;
1104         long long actual;
1105         int err;
1106
1107         err = os_file_modtime(file, &modtime);
1108         if(err < 0){
1109                 printk("Failed to get modification time of backing file "
1110                        "\"%s\", err = %d\n", file, -err);
1111                 return(err);
1112         }
1113
1114         err = os_file_size(file, &actual);
1115         if(err < 0){
1116                 printk("Failed to get size of backing file \"%s\", "
1117                        "err = %d\n", file, -err);
1118                 return(err);
1119         }
1120
1121         if(actual != size){
1122                 /*__u64 can be a long on AMD64 and with %lu GCC complains; so
1123                  * the typecast.*/
1124                 printk("Size mismatch (%llu vs %llu) of COW header vs backing "
1125                        "file\n", (unsigned long long) size, actual);
1126                 return(-EINVAL);
1127         }
1128         if(modtime != mtime){
1129                 printk("mtime mismatch (%ld vs %ld) of COW header vs backing "
1130                        "file\n", mtime, modtime);
1131                 return(-EINVAL);
1132         }
1133         return(0);
1134 }
1135
1136 int read_cow_bitmap(int fd, void *buf, int offset, int len)
1137 {
1138         int err;
1139
1140         err = os_seek_file(fd, offset);
1141         if(err < 0)
1142                 return(err);
1143
1144         err = os_read_file(fd, buf, len);
1145         if(err < 0)
1146                 return(err);
1147
1148         return(0);
1149 }
1150
1151 int open_ubd_file(char *file, struct openflags *openflags,
1152                   char **backing_file_out, int *bitmap_offset_out,
1153                   unsigned long *bitmap_len_out, int *data_offset_out,
1154                   int *create_cow_out)
1155 {
1156         time_t mtime;
1157         unsigned long long size;
1158         __u32 version, align;
1159         char *backing_file;
1160         int fd, err, sectorsize, same, mode = 0644;
1161
1162         fd = os_open_file(file, *openflags, mode);
1163         if(fd < 0){
1164                 if((fd == -ENOENT) && (create_cow_out != NULL))
1165                         *create_cow_out = 1;
1166                 if(!openflags->w ||
1167                    ((fd != -EROFS) && (fd != -EACCES))) return(fd);
1168                 openflags->w = 0;
1169                 fd = os_open_file(file, *openflags, mode);
1170                 if(fd < 0)
1171                         return(fd);
1172         }
1173
1174         err = os_lock_file(fd, openflags->w);
1175         if(err < 0){
1176                 printk("Failed to lock '%s', err = %d\n", file, -err);
1177                 goto out_close;
1178         }
1179
1180         if(backing_file_out == NULL) return(fd);
1181
1182         err = read_cow_header(file_reader, &fd, &version, &backing_file, &mtime,
1183                               &size, &sectorsize, &align, bitmap_offset_out);
1184         if(err && (*backing_file_out != NULL)){
1185                 printk("Failed to read COW header from COW file \"%s\", "
1186                        "errno = %d\n", file, -err);
1187                 goto out_close;
1188         }
1189         if(err) return(fd);
1190
1191         if(backing_file_out == NULL) return(fd);
1192
1193         same = same_backing_files(*backing_file_out, backing_file, file);
1194
1195         if(!same && !backing_file_mismatch(*backing_file_out, size, mtime)){
1196                 printk("Switching backing file to '%s'\n", *backing_file_out);
1197                 err = write_cow_header(file, fd, *backing_file_out,
1198                                        sectorsize, align, &size);
1199                 if(err){
1200                         printk("Switch failed, errno = %d\n", -err);
1201                         return(err);
1202                 }
1203         }
1204         else {
1205                 *backing_file_out = backing_file;
1206                 err = backing_file_mismatch(*backing_file_out, size, mtime);
1207                 if(err) goto out_close;
1208         }
1209
1210         cow_sizes(version, size, sectorsize, align, *bitmap_offset_out,
1211                   bitmap_len_out, data_offset_out);
1212
1213         return(fd);
1214  out_close:
1215         os_close_file(fd);
1216         return(err);
1217 }
1218
1219 int create_cow_file(char *cow_file, char *backing_file, struct openflags flags,
1220                     int sectorsize, int alignment, int *bitmap_offset_out,
1221                     unsigned long *bitmap_len_out, int *data_offset_out)
1222 {
1223         int err, fd;
1224
1225         flags.c = 1;
1226         fd = open_ubd_file(cow_file, &flags, NULL, NULL, NULL, NULL, NULL);
1227         if(fd < 0){
1228                 err = fd;
1229                 printk("Open of COW file '%s' failed, errno = %d\n", cow_file,
1230                        -err);
1231                 goto out;
1232         }
1233
1234         err = init_cow_file(fd, cow_file, backing_file, sectorsize, alignment,
1235                             bitmap_offset_out, bitmap_len_out,
1236                             data_offset_out);
1237         if(!err)
1238                 return(fd);
1239         os_close_file(fd);
1240  out:
1241         return(err);
1242 }
1243
1244 void do_io(struct io_thread_req *req, struct request *r, unsigned long *bitmap)
1245 {
1246         struct ubd_aio *aio;
1247         struct bitmap_io *bitmap_io = NULL;
1248         char *buf;
1249         void *bitmap_buf = NULL;
1250         unsigned long len, sector;
1251         int nsectors, start, end, bit, err;
1252         __u64 off;
1253
1254         if(req->bitmap_start != -1){
1255                 /* Round up to the nearest word */
1256                 int round = sizeof(unsigned long);
1257                 len = (req->bitmap_end - req->bitmap_start +
1258                        round * 8 - 1) / (round * 8);
1259                 len *= round;
1260
1261                 off = req->bitmap_start / (8 * round);
1262                 off *= round;
1263
1264                 bitmap_io = kmalloc(sizeof(*bitmap_io), GFP_KERNEL);
1265                 if(bitmap_io == NULL){
1266                         printk("Failed to kmalloc bitmap IO\n");
1267                         req->error = 1;
1268                         return;
1269                 }
1270
1271                 bitmap_buf = kmalloc(len, GFP_KERNEL);
1272                 if(bitmap_buf == NULL){
1273                         printk("do_io : kmalloc of bitmap chunk "
1274                                "failed\n");
1275                         kfree(bitmap_io);
1276                         req->error = 1;
1277                         return;
1278                 }
1279                 memcpy(bitmap_buf, &bitmap[off / sizeof(bitmap[0])], len);
1280
1281                 *bitmap_io = ((struct bitmap_io)
1282                         { .count        = ATOMIC_INIT(0),
1283                           .aio          = INIT_AIO(AIO_WRITE, req->fds[1],
1284                                                    bitmap_buf, len,
1285                                                    req->bitmap_offset + off,
1286                                                    ubd_reply_fd) } );
1287         }
1288
1289         nsectors = req->length / req->sectorsize;
1290         start = 0;
1291         end = nsectors;
1292         bit = 0;
1293         do {
1294                 if(bitmap != NULL){
1295                         sector = req->offset / req->sectorsize;
1296                         bit = ubd_test_bit(sector + start, bitmap);
1297                         end = start;
1298                         while((end < nsectors) &&
1299                               (ubd_test_bit(sector + end, bitmap) == bit))
1300                                 end++;
1301                 }
1302
1303                 off = req->offsets[bit] + req->offset +
1304                         start * req->sectorsize;
1305                 len = (end - start) * req->sectorsize;
1306                 buf = &req->buffer[start * req->sectorsize];
1307
1308                 aio = kmalloc(sizeof(*aio), GFP_KERNEL);
1309                 if(aio == NULL){
1310                         req->error = 1;
1311                         return;
1312                 }
1313
1314                 *aio = ((struct ubd_aio)
1315                         { .aio          = INIT_AIO(req->op, req->fds[bit], buf,
1316                                                    len, off, ubd_reply_fd),
1317                           .len          = len,
1318                           .req          = r,
1319                           .bitmap       = bitmap_io,
1320                           .bitmap_buf   = bitmap_buf });
1321
1322                 if(aio->bitmap != NULL)
1323                         atomic_inc(&aio->bitmap->count);
1324
1325                 err = submit_aio(&aio->aio);
1326                 if(err){
1327                         printk("do_io - submit_aio failed, "
1328                                "err = %d\n", err);
1329                         req->error = 1;
1330                         return;
1331                 }
1332
1333                 start = end;
1334         } while(start < nsectors);
1335 }