[GFS2] Fix locking for Direct I/O reads
[safe/jmp/linux-2.6] / fs / gfs2 / ops_address.c
1 /*
2  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3  * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
4  *
5  * This copyrighted material is made available to anyone wishing to use,
6  * modify, copy, or redistribute it subject to the terms and conditions
7  * of the GNU General Public License v.2.
8  */
9
10 #include <linux/sched.h>
11 #include <linux/slab.h>
12 #include <linux/spinlock.h>
13 #include <linux/completion.h>
14 #include <linux/buffer_head.h>
15 #include <linux/pagemap.h>
16 #include <linux/pagevec.h>
17 #include <linux/mpage.h>
18 #include <linux/fs.h>
19 #include <linux/gfs2_ondisk.h>
20
21 #include "gfs2.h"
22 #include "lm_interface.h"
23 #include "incore.h"
24 #include "bmap.h"
25 #include "glock.h"
26 #include "inode.h"
27 #include "log.h"
28 #include "meta_io.h"
29 #include "ops_address.h"
30 #include "page.h"
31 #include "quota.h"
32 #include "trans.h"
33 #include "rgrp.h"
34 #include "ops_file.h"
35 #include "util.h"
36
37 /**
38  * gfs2_get_block - Fills in a buffer head with details about a block
39  * @inode: The inode
40  * @lblock: The block number to look up
41  * @bh_result: The buffer head to return the result in
42  * @create: Non-zero if we may add block to the file
43  *
44  * Returns: errno
45  */
46
47 int gfs2_get_block(struct inode *inode, sector_t lblock,
48                    struct buffer_head *bh_result, int create)
49 {
50         int new = create;
51         uint64_t dblock;
52         int error;
53         int boundary;
54
55         error = gfs2_block_map(inode, lblock, &new, &dblock, &boundary);
56         if (error)
57                 return error;
58
59         if (!dblock)
60                 return 0;
61
62         map_bh(bh_result, inode->i_sb, dblock);
63         if (new)
64                 set_buffer_new(bh_result);
65         if (boundary)
66                 set_buffer_boundary(bh_result);
67
68         return 0;
69 }
70
71 /**
72  * get_block_noalloc - Fills in a buffer head with details about a block
73  * @inode: The inode
74  * @lblock: The block number to look up
75  * @bh_result: The buffer head to return the result in
76  * @create: Non-zero if we may add block to the file
77  *
78  * Returns: errno
79  */
80
81 static int get_block_noalloc(struct inode *inode, sector_t lblock,
82                              struct buffer_head *bh_result, int create)
83 {
84         int new = 0;
85         uint64_t dblock;
86         int error;
87         int boundary;
88
89         error = gfs2_block_map(inode, lblock, &new, &dblock, &boundary);
90         if (error)
91                 return error;
92
93         if (dblock)
94                 map_bh(bh_result, inode->i_sb, dblock);
95         else if (gfs2_assert_withdraw(GFS2_SB(inode), !create))
96                 error = -EIO;
97         if (boundary)
98                 set_buffer_boundary(bh_result);
99
100         return error;
101 }
102
103 /**
104  * gfs2_writepage - Write complete page
105  * @page: Page to write
106  *
107  * Returns: errno
108  *
109  * Some of this is copied from block_write_full_page() although we still
110  * call it to do most of the work.
111  */
112
113 static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
114 {
115         struct inode *inode = page->mapping->host;
116         struct gfs2_inode *ip = GFS2_I(page->mapping->host);
117         struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
118         loff_t i_size = i_size_read(inode);
119         pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
120         unsigned offset;
121         int error;
122         int done_trans = 0;
123
124         if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) {
125                 unlock_page(page);
126                 return -EIO;
127         }
128         if (current->journal_info)
129                 goto out_ignore;
130
131         /* Is the page fully outside i_size? (truncate in progress) */
132         offset = i_size & (PAGE_CACHE_SIZE-1);
133         if (page->index > end_index || (page->index == end_index && !offset)) {
134                 page->mapping->a_ops->invalidatepage(page, 0);
135                 unlock_page(page);
136                 return 0; /* don't care */
137         }
138
139         if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) {
140                 error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
141                 if (error)
142                         goto out_ignore;
143                 gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
144                 done_trans = 1;
145         }
146         error = block_write_full_page(page, get_block_noalloc, wbc);
147         if (done_trans)
148                 gfs2_trans_end(sdp);
149         gfs2_meta_cache_flush(ip);
150         return error;
151
152 out_ignore:
153         redirty_page_for_writepage(wbc, page);
154         unlock_page(page);
155         return 0;
156 }
157
158 static int zero_readpage(struct page *page)
159 {
160         void *kaddr;
161
162         kaddr = kmap_atomic(page, KM_USER0);
163         memset(kaddr, 0, PAGE_CACHE_SIZE);
164         kunmap_atomic(page, KM_USER0);
165
166         SetPageUptodate(page);
167
168         return 0;
169 }
170
171 /**
172  * stuffed_readpage - Fill in a Linux page with stuffed file data
173  * @ip: the inode
174  * @page: the page
175  *
176  * Returns: errno
177  */
178
179 static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
180 {
181         struct buffer_head *dibh;
182         void *kaddr;
183         int error;
184
185         /* Only the first page of a stuffed file might contain data */
186         if (unlikely(page->index))
187                 return zero_readpage(page);
188
189         error = gfs2_meta_inode_buffer(ip, &dibh);
190         if (error)
191                 return error;
192
193         kaddr = kmap_atomic(page, KM_USER0);
194         memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode),
195                ip->i_di.di_size);
196         memset(kaddr + ip->i_di.di_size, 0, PAGE_CACHE_SIZE - ip->i_di.di_size);
197         kunmap_atomic(page, KM_USER0);
198
199         brelse(dibh);
200
201         SetPageUptodate(page);
202
203         return 0;
204 }
205
206
207 /**
208  * gfs2_readpage - readpage with locking
209  * @file: The file to read a page for. N.B. This may be NULL if we are
210  * reading an internal file.
211  * @page: The page to read
212  *
213  * Returns: errno
214  */
215
216 static int gfs2_readpage(struct file *file, struct page *page)
217 {
218         struct gfs2_inode *ip = GFS2_I(page->mapping->host);
219         struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
220         struct gfs2_holder gh;
221         int error;
222
223         if (likely(file != &gfs2_internal_file_sentinal)) {
224                 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|GL_AOP, &gh);
225                 error = gfs2_glock_nq_m_atime(1, &gh);
226                 if (unlikely(error))
227                         goto out_unlock;
228         }
229
230         if (gfs2_is_stuffed(ip)) {
231                 error = stuffed_readpage(ip, page);
232                 unlock_page(page);
233         } else
234                 error = mpage_readpage(page, gfs2_get_block);
235
236         if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
237                 error = -EIO;
238
239         if (file != &gfs2_internal_file_sentinal) {
240                 gfs2_glock_dq_m(1, &gh);
241                 gfs2_holder_uninit(&gh);
242         }
243 out:
244         return error;
245 out_unlock:
246         unlock_page(page);
247         if (file != &gfs2_internal_file_sentinal)
248                 gfs2_holder_uninit(&gh);
249         goto out;
250 }
251
252 #define list_to_page(head) (list_entry((head)->prev, struct page, lru))
253
254 /**
255  * gfs2_readpages - Read a bunch of pages at once
256  *
257  * Some notes:
258  * 1. This is only for readahead, so we can simply ignore any things
259  *    which are slightly inconvenient (such as locking conflicts between
260  *    the page lock and the glock) and return having done no I/O. Its
261  *    obviously not something we'd want to do on too regular a basis.
262  *    Any I/O we ignore at this time will be done via readpage later.
263  * 2. We have to handle stuffed files here too.
264  * 3. mpage_readpages() does most of the heavy lifting in the common case.
265  * 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places.
266  * 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as
267  *    well as read-ahead.
268  */
269 static int gfs2_readpages(struct file *file, struct address_space *mapping,
270                           struct list_head *pages, unsigned nr_pages)
271 {
272         struct inode *inode = mapping->host;
273         struct gfs2_inode *ip = GFS2_I(inode);
274         struct gfs2_sbd *sdp = GFS2_SB(inode);
275         struct gfs2_holder gh;
276         unsigned page_idx;
277         int ret;
278
279         if (likely(file != &gfs2_internal_file_sentinal)) {
280                 gfs2_holder_init(ip->i_gl, LM_ST_SHARED,
281                                  LM_FLAG_TRY_1CB|GL_ATIME|GL_AOP, &gh);
282                 ret = gfs2_glock_nq_m_atime(1, &gh);
283                 if (ret == GLR_TRYFAILED) 
284                         goto out_noerror;
285                 if (unlikely(ret))
286                         goto out_unlock;
287         }
288
289         if (gfs2_is_stuffed(ip)) {
290                 struct pagevec lru_pvec;
291                 pagevec_init(&lru_pvec, 0);
292                 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
293                         struct page *page = list_to_page(pages);
294                         list_del(&page->lru);
295                         if (!add_to_page_cache(page, mapping,
296                                                page->index, GFP_KERNEL)) {
297                                 ret = stuffed_readpage(ip, page);
298                                 unlock_page(page);
299                                 if (!pagevec_add(&lru_pvec, page))
300                                          __pagevec_lru_add(&lru_pvec);
301                         }
302                         page_cache_release(page);
303                 }
304                 pagevec_lru_add(&lru_pvec);
305                 ret = 0;
306         } else {
307                 /* What we really want to do .... */
308                 ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block);
309         }
310
311         if (likely(file != &gfs2_internal_file_sentinal)) {
312                 gfs2_glock_dq_m(1, &gh);
313                 gfs2_holder_uninit(&gh);
314         }
315 out:
316         if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
317                 ret = -EIO;
318         return ret;
319 out_noerror:
320         ret = 0;
321 out_unlock:
322         /* unlock all pages, we can't do any I/O right now */
323         for (page_idx = 0; page_idx < nr_pages; page_idx++) {
324                 struct page *page = list_to_page(pages);
325                 list_del(&page->lru);
326                 unlock_page(page);
327                 page_cache_release(page);
328         }
329         if (likely(file != &gfs2_internal_file_sentinal))
330                 gfs2_holder_uninit(&gh);
331         goto out;
332 }
333
334 /**
335  * gfs2_prepare_write - Prepare to write a page to a file
336  * @file: The file to write to
337  * @page: The page which is to be prepared for writing
338  * @from: From (byte range within page)
339  * @to: To (byte range within page)
340  *
341  * Returns: errno
342  */
343
344 static int gfs2_prepare_write(struct file *file, struct page *page,
345                               unsigned from, unsigned to)
346 {
347         struct gfs2_inode *ip = GFS2_I(page->mapping->host);
348         struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
349         unsigned int data_blocks, ind_blocks, rblocks;
350         int alloc_required;
351         int error = 0;
352         loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + from;
353         loff_t end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
354         struct gfs2_alloc *al;
355
356         gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|GL_AOP, &ip->i_gh);
357         error = gfs2_glock_nq_m_atime(1, &ip->i_gh);
358         if (error)
359                 goto out_uninit;
360
361         gfs2_write_calc_reserv(ip, to - from, &data_blocks, &ind_blocks);
362
363         error = gfs2_write_alloc_required(ip, pos, from - to, &alloc_required);
364         if (error)
365                 goto out_unlock;
366
367
368         if (alloc_required) {
369                 al = gfs2_alloc_get(ip);
370
371                 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
372                 if (error)
373                         goto out_alloc_put;
374
375                 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
376                 if (error)
377                         goto out_qunlock;
378
379                 al->al_requested = data_blocks + ind_blocks;
380                 error = gfs2_inplace_reserve(ip);
381                 if (error)
382                         goto out_qunlock;
383         }
384
385         rblocks = RES_DINODE + ind_blocks;
386         if (gfs2_is_jdata(ip))
387                 rblocks += data_blocks ? data_blocks : 1;
388         if (ind_blocks || data_blocks)
389                 rblocks += RES_STATFS + RES_QUOTA;
390
391         error = gfs2_trans_begin(sdp, rblocks, 0);
392         if (error)
393                 goto out;
394
395         if (gfs2_is_stuffed(ip)) {
396                 if (end > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
397                         error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page,
398                                                     page);
399                         if (error == 0)
400                                 goto prepare_write;
401                 } else if (!PageUptodate(page))
402                         error = stuffed_readpage(ip, page);
403                 goto out;
404         }
405
406 prepare_write:
407         error = block_prepare_write(page, from, to, gfs2_get_block);
408
409 out:
410         if (error) {
411                 gfs2_trans_end(sdp);
412                 if (alloc_required) {
413                         gfs2_inplace_release(ip);
414 out_qunlock:
415                         gfs2_quota_unlock(ip);
416 out_alloc_put:
417                         gfs2_alloc_put(ip);
418                 }
419 out_unlock:
420                 gfs2_glock_dq_m(1, &ip->i_gh);
421 out_uninit:
422                 gfs2_holder_uninit(&ip->i_gh);
423         }
424
425         return error;
426 }
427
428 /**
429  * gfs2_commit_write - Commit write to a file
430  * @file: The file to write to
431  * @page: The page containing the data
432  * @from: From (byte range within page)
433  * @to: To (byte range within page)
434  *
435  * Returns: errno
436  */
437
438 static int gfs2_commit_write(struct file *file, struct page *page,
439                              unsigned from, unsigned to)
440 {
441         struct inode *inode = page->mapping->host;
442         struct gfs2_inode *ip = GFS2_I(inode);
443         struct gfs2_sbd *sdp = GFS2_SB(inode);
444         int error = -EOPNOTSUPP;
445         struct buffer_head *dibh;
446         struct gfs2_alloc *al = &ip->i_alloc;;
447
448         if (gfs2_assert_withdraw(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)))
449                 goto fail_nounlock;
450
451         error = gfs2_meta_inode_buffer(ip, &dibh);
452         if (error)
453                 goto fail_endtrans;
454
455         gfs2_trans_add_bh(ip->i_gl, dibh, 1);
456
457         if (gfs2_is_stuffed(ip)) {
458                 uint64_t file_size;
459                 void *kaddr;
460
461                 file_size = ((uint64_t)page->index << PAGE_CACHE_SHIFT) + to;
462
463                 kaddr = kmap_atomic(page, KM_USER0);
464                 memcpy(dibh->b_data + sizeof(struct gfs2_dinode) + from,
465                        (char *)kaddr + from, to - from);
466                 kunmap_atomic(page, KM_USER0);
467
468                 SetPageUptodate(page);
469
470                 if (inode->i_size < file_size)
471                         i_size_write(inode, file_size);
472         } else {
473                 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED ||
474                     gfs2_is_jdata(ip))
475                         gfs2_page_add_databufs(ip, page, from, to);
476                 error = generic_commit_write(file, page, from, to);
477                 if (error)
478                         goto fail;
479         }
480
481         if (ip->i_di.di_size < inode->i_size)
482                 ip->i_di.di_size = inode->i_size;
483
484         gfs2_dinode_out(&ip->i_di, dibh->b_data);
485         brelse(dibh);
486         gfs2_trans_end(sdp);
487         if (al->al_requested) {
488                 gfs2_inplace_release(ip);
489                 gfs2_quota_unlock(ip);
490                 gfs2_alloc_put(ip);
491         }
492         gfs2_glock_dq_m(1, &ip->i_gh);
493         gfs2_holder_uninit(&ip->i_gh);
494         return 0;
495
496 fail:
497         brelse(dibh);
498 fail_endtrans:
499         gfs2_trans_end(sdp);
500         if (al->al_requested) {
501                 gfs2_inplace_release(ip);
502                 gfs2_quota_unlock(ip);
503                 gfs2_alloc_put(ip);
504         }
505         gfs2_glock_dq_m(1, &ip->i_gh);
506         gfs2_holder_uninit(&ip->i_gh);
507 fail_nounlock:
508         ClearPageUptodate(page);
509         return error;
510 }
511
512 /**
513  * gfs2_bmap - Block map function
514  * @mapping: Address space info
515  * @lblock: The block to map
516  *
517  * Returns: The disk address for the block or 0 on hole or error
518  */
519
520 static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock)
521 {
522         struct gfs2_inode *ip = GFS2_I(mapping->host);
523         struct gfs2_holder i_gh;
524         sector_t dblock = 0;
525         int error;
526
527         error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
528         if (error)
529                 return 0;
530
531         if (!gfs2_is_stuffed(ip))
532                 dblock = generic_block_bmap(mapping, lblock, gfs2_get_block);
533
534         gfs2_glock_dq_uninit(&i_gh);
535
536         return dblock;
537 }
538
539 static void discard_buffer(struct gfs2_sbd *sdp, struct buffer_head *bh)
540 {
541         struct gfs2_bufdata *bd;
542
543         gfs2_log_lock(sdp);
544         bd = bh->b_private;
545         if (bd) {
546                 bd->bd_bh = NULL;
547                 bh->b_private = NULL;
548                 gfs2_log_unlock(sdp);
549                 brelse(bh);
550         } else
551                 gfs2_log_unlock(sdp);
552
553         lock_buffer(bh);
554         clear_buffer_dirty(bh);
555         bh->b_bdev = NULL;
556         clear_buffer_mapped(bh);
557         clear_buffer_req(bh);
558         clear_buffer_new(bh);
559         clear_buffer_delay(bh);
560         unlock_buffer(bh);
561 }
562
563 static void gfs2_invalidatepage(struct page *page, unsigned long offset)
564 {
565         struct gfs2_sbd *sdp = page->mapping->host->i_sb->s_fs_info;
566         struct buffer_head *head, *bh, *next;
567         unsigned int curr_off = 0;
568
569         BUG_ON(!PageLocked(page));
570         if (!page_has_buffers(page))
571                 return;
572
573         bh = head = page_buffers(page);
574         do {
575                 unsigned int next_off = curr_off + bh->b_size;
576                 next = bh->b_this_page;
577
578                 if (offset <= curr_off)
579                         discard_buffer(sdp, bh);
580
581                 curr_off = next_off;
582                 bh = next;
583         } while (bh != head);
584
585         if (!offset)
586                 try_to_release_page(page, 0);
587
588         return;
589 }
590
591 static ssize_t gfs2_direct_IO_write(struct kiocb *iocb, const struct iovec *iov,
592                                     loff_t offset, unsigned long nr_segs)
593 {
594         struct file *file = iocb->ki_filp;
595         struct inode *inode = file->f_mapping->host;
596         struct gfs2_inode *ip = GFS2_I(inode);
597         struct gfs2_holder gh;
598         int rv;
599
600         /*
601          * Shared lock, even though its write, since we do no allocation
602          * on this path. All we need change is atime.
603          */
604         gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
605         rv = gfs2_glock_nq_m_atime(1, &gh);
606         if (rv)
607                 goto out;
608
609         /*
610          * Should we return an error here? I can't see that O_DIRECT for
611          * a journaled file makes any sense. For now we'll silently fall
612          * back to buffered I/O, likewise we do the same for stuffed
613          * files since they are (a) small and (b) unaligned.
614          */
615         if (gfs2_is_jdata(ip))
616                 goto out;
617
618         if (gfs2_is_stuffed(ip))
619                 goto out;
620
621         rv = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev,
622                                   iov, offset, nr_segs, gfs2_get_block,
623                                   NULL, DIO_OWN_LOCKING);
624 out:
625         gfs2_glock_dq_m(1, &gh);
626         gfs2_holder_uninit(&gh);
627
628         return rv;
629 }
630
631 /**
632  * gfs2_direct_IO
633  *
634  * This is called with a shared lock already held for the read path.
635  * Currently, no locks are held when the write path is called.
636  */
637 static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
638                               const struct iovec *iov, loff_t offset,
639                               unsigned long nr_segs)
640 {
641         struct file *file = iocb->ki_filp;
642         struct inode *inode = file->f_mapping->host;
643         struct gfs2_inode *ip = GFS2_I(inode);
644         struct gfs2_sbd *sdp = GFS2_SB(inode);
645         int ret;
646
647         if (rw == WRITE)
648                 return gfs2_direct_IO_write(iocb, iov, offset, nr_segs);
649
650         if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)) ||
651             gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
652                 return -EINVAL;
653
654         mutex_lock(&inode->i_mutex);
655         ret = __blockdev_direct_IO(READ, iocb, inode, inode->i_sb->s_bdev, iov,
656                                    offset, nr_segs, gfs2_get_block, NULL,
657                                    DIO_OWN_LOCKING);
658         mutex_unlock(&inode->i_mutex);
659         return ret;
660 }
661
662 const struct address_space_operations gfs2_file_aops = {
663         .writepage = gfs2_writepage,
664         .readpage = gfs2_readpage,
665         .readpages = gfs2_readpages,
666         .sync_page = block_sync_page,
667         .prepare_write = gfs2_prepare_write,
668         .commit_write = gfs2_commit_write,
669         .bmap = gfs2_bmap,
670         .invalidatepage = gfs2_invalidatepage,
671         .direct_IO = gfs2_direct_IO,
672 };
673