[GFS2] Remove semaphore.h from C files
[safe/jmp/linux-2.6] / fs / gfs2 / ops_address.c
1 /*
2  * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3  * Copyright (C) 2004-2005 Red Hat, Inc.  All rights reserved.
4  *
5  * This copyrighted material is made available to anyone wishing to use,
6  * modify, copy, or redistribute it subject to the terms and conditions
7  * of the GNU General Public License v.2.
8  */
9
10 #include <linux/sched.h>
11 #include <linux/slab.h>
12 #include <linux/spinlock.h>
13 #include <linux/completion.h>
14 #include <linux/buffer_head.h>
15 #include <linux/pagemap.h>
16 #include <linux/pagevec.h>
17 #include <linux/mpage.h>
18 #include <linux/fs.h>
19 #include <linux/gfs2_ondisk.h>
20
21 #include "gfs2.h"
22 #include "lm_interface.h"
23 #include "incore.h"
24 #include "bmap.h"
25 #include "glock.h"
26 #include "inode.h"
27 #include "log.h"
28 #include "meta_io.h"
29 #include "ops_address.h"
30 #include "page.h"
31 #include "quota.h"
32 #include "trans.h"
33 #include "rgrp.h"
34 #include "ops_file.h"
35 #include "util.h"
36
37 /**
38  * gfs2_get_block - Fills in a buffer head with details about a block
39  * @inode: The inode
40  * @lblock: The block number to look up
41  * @bh_result: The buffer head to return the result in
42  * @create: Non-zero if we may add block to the file
43  *
44  * Returns: errno
45  */
46
47 int gfs2_get_block(struct inode *inode, sector_t lblock,
48                    struct buffer_head *bh_result, int create)
49 {
50         int new = create;
51         uint64_t dblock;
52         int error;
53         int boundary;
54
55         error = gfs2_block_map(inode, lblock, &new, &dblock, &boundary);
56         if (error)
57                 return error;
58
59         if (!dblock)
60                 return 0;
61
62         map_bh(bh_result, inode->i_sb, dblock);
63         if (new)
64                 set_buffer_new(bh_result);
65         if (boundary)
66                 set_buffer_boundary(bh_result);
67
68         return 0;
69 }
70
71 /**
72  * get_block_noalloc - Fills in a buffer head with details about a block
73  * @inode: The inode
74  * @lblock: The block number to look up
75  * @bh_result: The buffer head to return the result in
76  * @create: Non-zero if we may add block to the file
77  *
78  * Returns: errno
79  */
80
81 static int get_block_noalloc(struct inode *inode, sector_t lblock,
82                              struct buffer_head *bh_result, int create)
83 {
84         struct gfs2_inode *ip = inode->u.generic_ip;
85         int new = 0;
86         uint64_t dblock;
87         int error;
88         int boundary;
89
90         error = gfs2_block_map(inode, lblock, &new, &dblock, &boundary);
91         if (error)
92                 return error;
93
94         if (dblock)
95                 map_bh(bh_result, inode->i_sb, dblock);
96         else if (gfs2_assert_withdraw(ip->i_sbd, !create))
97                 error = -EIO;
98         if (boundary)
99                 set_buffer_boundary(bh_result);
100
101         return error;
102 }
103
104 /**
105  * gfs2_writepage - Write complete page
106  * @page: Page to write
107  *
108  * Returns: errno
109  *
110  * Some of this is copied from block_write_full_page() although we still
111  * call it to do most of the work.
112  */
113
114 static int gfs2_writepage(struct page *page, struct writeback_control *wbc)
115 {
116         struct inode *inode = page->mapping->host;
117         struct gfs2_inode *ip = page->mapping->host->u.generic_ip;
118         struct gfs2_sbd *sdp = ip->i_sbd;
119         loff_t i_size = i_size_read(inode);
120         pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
121         unsigned offset;
122         int error;
123         int done_trans = 0;
124
125         if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl))) {
126                 unlock_page(page);
127                 return -EIO;
128         }
129         if (current->journal_info)
130                 goto out_ignore;
131
132         /* Is the page fully outside i_size? (truncate in progress) */
133         offset = i_size & (PAGE_CACHE_SIZE-1);
134         if (page->index > end_index || (page->index == end_index && !offset)) {
135                 page->mapping->a_ops->invalidatepage(page, 0);
136                 unlock_page(page);
137                 return 0; /* don't care */
138         }
139
140         if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED || gfs2_is_jdata(ip)) {
141                 error = gfs2_trans_begin(sdp, RES_DINODE + 1, 0);
142                 if (error)
143                         goto out_ignore;
144                 gfs2_page_add_databufs(ip, page, 0, sdp->sd_vfs->s_blocksize-1);
145                 done_trans = 1;
146         }
147         error = block_write_full_page(page, get_block_noalloc, wbc);
148         if (done_trans)
149                 gfs2_trans_end(sdp);
150         gfs2_meta_cache_flush(ip);
151         return error;
152
153 out_ignore:
154         redirty_page_for_writepage(wbc, page);
155         unlock_page(page);
156         return 0;
157 }
158
159 static int zero_readpage(struct page *page)
160 {
161         void *kaddr;
162
163         kaddr = kmap_atomic(page, KM_USER0);
164         memset(kaddr, 0, PAGE_CACHE_SIZE);
165         kunmap_atomic(page, KM_USER0);
166
167         SetPageUptodate(page);
168
169         return 0;
170 }
171
172 /**
173  * stuffed_readpage - Fill in a Linux page with stuffed file data
174  * @ip: the inode
175  * @page: the page
176  *
177  * Returns: errno
178  */
179
180 static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
181 {
182         struct buffer_head *dibh;
183         void *kaddr;
184         int error;
185
186         /* Only the first page of a stuffed file might contain data */
187         if (unlikely(page->index))
188                 return zero_readpage(page);
189
190         error = gfs2_meta_inode_buffer(ip, &dibh);
191         if (error)
192                 return error;
193
194         kaddr = kmap_atomic(page, KM_USER0);
195         memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode),
196                ip->i_di.di_size);
197         memset(kaddr + ip->i_di.di_size, 0, PAGE_CACHE_SIZE - ip->i_di.di_size);
198         kunmap_atomic(page, KM_USER0);
199
200         brelse(dibh);
201
202         SetPageUptodate(page);
203
204         return 0;
205 }
206
207
208 /**
209  * gfs2_readpage - readpage with locking
210  * @file: The file to read a page for. N.B. This may be NULL if we are
211  * reading an internal file.
212  * @page: The page to read
213  *
214  * Returns: errno
215  */
216
217 static int gfs2_readpage(struct file *file, struct page *page)
218 {
219         struct gfs2_inode *ip = page->mapping->host->u.generic_ip;
220         struct gfs2_sbd *sdp = ip->i_sbd;
221         struct gfs2_holder gh;
222         int error;
223
224         if (likely(file != &gfs2_internal_file_sentinal)) {
225                 gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME|GL_AOP, &gh);
226                 error = gfs2_glock_nq_m_atime(1, &gh);
227                 if (unlikely(error))
228                         goto out_unlock;
229         }
230
231         if (gfs2_is_stuffed(ip)) {
232                 error = stuffed_readpage(ip, page);
233                 unlock_page(page);
234         } else
235                 error = mpage_readpage(page, gfs2_get_block);
236
237         if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
238                 error = -EIO;
239
240         if (file != &gfs2_internal_file_sentinal) {
241                 gfs2_glock_dq_m(1, &gh);
242                 gfs2_holder_uninit(&gh);
243         }
244 out:
245         return error;
246 out_unlock:
247         unlock_page(page);
248         if (file != &gfs2_internal_file_sentinal)
249                 gfs2_holder_uninit(&gh);
250         goto out;
251 }
252
253 #define list_to_page(head) (list_entry((head)->prev, struct page, lru))
254
255 /**
256  * gfs2_readpages - Read a bunch of pages at once
257  *
258  * Some notes:
259  * 1. This is only for readahead, so we can simply ignore any things
260  *    which are slightly inconvenient (such as locking conflicts between
261  *    the page lock and the glock) and return having done no I/O. Its
262  *    obviously not something we'd want to do on too regular a basis.
263  *    Any I/O we ignore at this time will be done via readpage later.
264  * 2. We have to handle stuffed files here too.
265  * 3. mpage_readpages() does most of the heavy lifting in the common case.
266  * 4. gfs2_get_block() is relied upon to set BH_Boundary in the right places.
267  * 5. We use LM_FLAG_TRY_1CB here, effectively we then have lock-ahead as
268  *    well as read-ahead.
269  */
270 static int gfs2_readpages(struct file *file, struct address_space *mapping,
271                           struct list_head *pages, unsigned nr_pages)
272 {
273         struct inode *inode = mapping->host;
274         struct gfs2_inode *ip = inode->u.generic_ip;
275         struct gfs2_sbd *sdp = ip->i_sbd;
276         struct gfs2_holder gh;
277         unsigned page_idx;
278         int ret;
279
280         if (likely(file != &gfs2_internal_file_sentinal)) {
281                 gfs2_holder_init(ip->i_gl, LM_ST_SHARED,
282                                  LM_FLAG_TRY_1CB|GL_ATIME|GL_AOP, &gh);
283                 ret = gfs2_glock_nq_m_atime(1, &gh);
284                 if (ret == GLR_TRYFAILED) 
285                         goto out_noerror;
286                 if (unlikely(ret))
287                         goto out_unlock;
288         }
289
290         if (gfs2_is_stuffed(ip)) {
291                 struct pagevec lru_pvec;
292                 pagevec_init(&lru_pvec, 0);
293                 for (page_idx = 0; page_idx < nr_pages; page_idx++) {
294                         struct page *page = list_to_page(pages);
295                         list_del(&page->lru);
296                         if (!add_to_page_cache(page, mapping,
297                                                page->index, GFP_KERNEL)) {
298                                 ret = stuffed_readpage(ip, page);
299                                 unlock_page(page);
300                                 if (!pagevec_add(&lru_pvec, page))
301                                          __pagevec_lru_add(&lru_pvec);
302                         }
303                         page_cache_release(page);
304                 }
305                 pagevec_lru_add(&lru_pvec);
306                 ret = 0;
307         } else {
308                 /* What we really want to do .... */
309                 ret = mpage_readpages(mapping, pages, nr_pages, gfs2_get_block);
310         }
311
312         if (likely(file != &gfs2_internal_file_sentinal)) {
313                 gfs2_glock_dq_m(1, &gh);
314                 gfs2_holder_uninit(&gh);
315         }
316 out:
317         if (unlikely(test_bit(SDF_SHUTDOWN, &sdp->sd_flags)))
318                 ret = -EIO;
319         return ret;
320 out_noerror:
321         ret = 0;
322 out_unlock:
323         /* unlock all pages, we can't do any I/O right now */
324         for (page_idx = 0; page_idx < nr_pages; page_idx++) {
325                 struct page *page = list_to_page(pages);
326                 list_del(&page->lru);
327                 unlock_page(page);
328                 page_cache_release(page);
329         }
330         if (likely(file != &gfs2_internal_file_sentinal))
331                 gfs2_holder_uninit(&gh);
332         goto out;
333 }
334
335 /**
336  * gfs2_prepare_write - Prepare to write a page to a file
337  * @file: The file to write to
338  * @page: The page which is to be prepared for writing
339  * @from: From (byte range within page)
340  * @to: To (byte range within page)
341  *
342  * Returns: errno
343  */
344
345 static int gfs2_prepare_write(struct file *file, struct page *page,
346                               unsigned from, unsigned to)
347 {
348         struct gfs2_inode *ip = page->mapping->host->u.generic_ip;
349         struct gfs2_sbd *sdp = ip->i_sbd;
350         unsigned int data_blocks, ind_blocks, rblocks;
351         int alloc_required;
352         int error = 0;
353         loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + from;
354         loff_t end = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
355         struct gfs2_alloc *al;
356
357         gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, GL_ATIME|GL_AOP, &ip->i_gh);
358         error = gfs2_glock_nq_m_atime(1, &ip->i_gh);
359         if (error)
360                 goto out_uninit;
361
362         gfs2_write_calc_reserv(ip, to - from, &data_blocks, &ind_blocks);
363
364         error = gfs2_write_alloc_required(ip, pos, from - to, &alloc_required);
365         if (error)
366                 goto out_unlock;
367
368
369         if (alloc_required) {
370                 al = gfs2_alloc_get(ip);
371
372                 error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
373                 if (error)
374                         goto out_alloc_put;
375
376                 error = gfs2_quota_check(ip, ip->i_di.di_uid, ip->i_di.di_gid);
377                 if (error)
378                         goto out_qunlock;
379
380                 al->al_requested = data_blocks + ind_blocks;
381                 error = gfs2_inplace_reserve(ip);
382                 if (error)
383                         goto out_qunlock;
384         }
385
386         rblocks = RES_DINODE + ind_blocks;
387         if (gfs2_is_jdata(ip))
388                 rblocks += data_blocks ? data_blocks : 1;
389         if (ind_blocks || data_blocks)
390                 rblocks += RES_STATFS + RES_QUOTA;
391
392         error = gfs2_trans_begin(sdp, rblocks, 0);
393         if (error)
394                 goto out;
395
396         if (gfs2_is_stuffed(ip)) {
397                 if (end > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
398                         error = gfs2_unstuff_dinode(ip, gfs2_unstuffer_page,
399                                                     page);
400                         if (error == 0)
401                                 goto prepare_write;
402                 } else if (!PageUptodate(page))
403                         error = stuffed_readpage(ip, page);
404                 goto out;
405         }
406
407 prepare_write:
408         error = block_prepare_write(page, from, to, gfs2_get_block);
409
410 out:
411         if (error) {
412                 gfs2_trans_end(sdp);
413                 if (alloc_required) {
414                         gfs2_inplace_release(ip);
415 out_qunlock:
416                         gfs2_quota_unlock(ip);
417 out_alloc_put:
418                         gfs2_alloc_put(ip);
419                 }
420 out_unlock:
421                 gfs2_glock_dq_m(1, &ip->i_gh);
422 out_uninit:
423                 gfs2_holder_uninit(&ip->i_gh);
424         }
425
426         return error;
427 }
428
429 /**
430  * gfs2_commit_write - Commit write to a file
431  * @file: The file to write to
432  * @page: The page containing the data
433  * @from: From (byte range within page)
434  * @to: To (byte range within page)
435  *
436  * Returns: errno
437  */
438
439 static int gfs2_commit_write(struct file *file, struct page *page,
440                              unsigned from, unsigned to)
441 {
442         struct inode *inode = page->mapping->host;
443         struct gfs2_inode *ip = inode->u.generic_ip;
444         struct gfs2_sbd *sdp = ip->i_sbd;
445         int error = -EOPNOTSUPP;
446         struct buffer_head *dibh;
447         struct gfs2_alloc *al = &ip->i_alloc;;
448
449         if (gfs2_assert_withdraw(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)))
450                 goto fail_nounlock;
451
452         error = gfs2_meta_inode_buffer(ip, &dibh);
453         if (error)
454                 goto fail_endtrans;
455
456         gfs2_trans_add_bh(ip->i_gl, dibh, 1);
457
458         if (gfs2_is_stuffed(ip)) {
459                 uint64_t file_size;
460                 void *kaddr;
461
462                 file_size = ((uint64_t)page->index << PAGE_CACHE_SHIFT) + to;
463
464                 kaddr = kmap_atomic(page, KM_USER0);
465                 memcpy(dibh->b_data + sizeof(struct gfs2_dinode) + from,
466                        (char *)kaddr + from, to - from);
467                 kunmap_atomic(page, KM_USER0);
468
469                 SetPageUptodate(page);
470
471                 if (inode->i_size < file_size)
472                         i_size_write(inode, file_size);
473         } else {
474                 if (sdp->sd_args.ar_data == GFS2_DATA_ORDERED ||
475                     gfs2_is_jdata(ip))
476                         gfs2_page_add_databufs(ip, page, from, to);
477                 error = generic_commit_write(file, page, from, to);
478                 if (error)
479                         goto fail;
480         }
481
482         if (ip->i_di.di_size < inode->i_size)
483                 ip->i_di.di_size = inode->i_size;
484
485         gfs2_dinode_out(&ip->i_di, dibh->b_data);
486         brelse(dibh);
487         gfs2_trans_end(sdp);
488         if (al->al_requested) {
489                 gfs2_inplace_release(ip);
490                 gfs2_quota_unlock(ip);
491                 gfs2_alloc_put(ip);
492         }
493         gfs2_glock_dq_m(1, &ip->i_gh);
494         gfs2_holder_uninit(&ip->i_gh);
495         return 0;
496
497 fail:
498         brelse(dibh);
499 fail_endtrans:
500         gfs2_trans_end(sdp);
501         if (al->al_requested) {
502                 gfs2_inplace_release(ip);
503                 gfs2_quota_unlock(ip);
504                 gfs2_alloc_put(ip);
505         }
506         gfs2_glock_dq_m(1, &ip->i_gh);
507         gfs2_holder_uninit(&ip->i_gh);
508 fail_nounlock:
509         ClearPageUptodate(page);
510         return error;
511 }
512
513 /**
514  * gfs2_bmap - Block map function
515  * @mapping: Address space info
516  * @lblock: The block to map
517  *
518  * Returns: The disk address for the block or 0 on hole or error
519  */
520
521 static sector_t gfs2_bmap(struct address_space *mapping, sector_t lblock)
522 {
523         struct gfs2_inode *ip = mapping->host->u.generic_ip;
524         struct gfs2_holder i_gh;
525         sector_t dblock = 0;
526         int error;
527
528         error = gfs2_glock_nq_init(ip->i_gl, LM_ST_SHARED, LM_FLAG_ANY, &i_gh);
529         if (error)
530                 return 0;
531
532         if (!gfs2_is_stuffed(ip))
533                 dblock = generic_block_bmap(mapping, lblock, gfs2_get_block);
534
535         gfs2_glock_dq_uninit(&i_gh);
536
537         return dblock;
538 }
539
540 static void discard_buffer(struct gfs2_sbd *sdp, struct buffer_head *bh)
541 {
542         struct gfs2_bufdata *bd;
543
544         gfs2_log_lock(sdp);
545         bd = bh->b_private;
546         if (bd) {
547                 bd->bd_bh = NULL;
548                 bh->b_private = NULL;
549                 gfs2_log_unlock(sdp);
550                 brelse(bh);
551         } else
552                 gfs2_log_unlock(sdp);
553
554         lock_buffer(bh);
555         clear_buffer_dirty(bh);
556         bh->b_bdev = NULL;
557         clear_buffer_mapped(bh);
558         clear_buffer_req(bh);
559         clear_buffer_new(bh);
560         clear_buffer_delay(bh);
561         unlock_buffer(bh);
562 }
563
564 static void gfs2_invalidatepage(struct page *page, unsigned long offset)
565 {
566         struct gfs2_sbd *sdp = page->mapping->host->i_sb->s_fs_info;
567         struct buffer_head *head, *bh, *next;
568         unsigned int curr_off = 0;
569
570         BUG_ON(!PageLocked(page));
571         if (!page_has_buffers(page))
572                 return;
573
574         bh = head = page_buffers(page);
575         do {
576                 unsigned int next_off = curr_off + bh->b_size;
577                 next = bh->b_this_page;
578
579                 if (offset <= curr_off)
580                         discard_buffer(sdp, bh);
581
582                 curr_off = next_off;
583                 bh = next;
584         } while (bh != head);
585
586         if (!offset)
587                 try_to_release_page(page, 0);
588
589         return;
590 }
591
592 static ssize_t gfs2_direct_IO_write(struct kiocb *iocb, const struct iovec *iov,
593                                     loff_t offset, unsigned long nr_segs)
594 {
595         struct file *file = iocb->ki_filp;
596         struct inode *inode = file->f_mapping->host;
597         struct gfs2_inode *ip = inode->u.generic_ip;
598         struct gfs2_holder gh;
599         int rv;
600
601         /*
602          * Shared lock, even though its write, since we do no allocation
603          * on this path. All we need change is atime.
604          */
605         gfs2_holder_init(ip->i_gl, LM_ST_SHARED, GL_ATIME, &gh);
606         rv = gfs2_glock_nq_m_atime(1, &gh);
607         if (rv)
608                 goto out;
609
610         /*
611          * Should we return an error here? I can't see that O_DIRECT for
612          * a journaled file makes any sense. For now we'll silently fall
613          * back to buffered I/O, likewise we do the same for stuffed
614          * files since they are (a) small and (b) unaligned.
615          */
616         if (gfs2_is_jdata(ip))
617                 goto out;
618
619         if (gfs2_is_stuffed(ip))
620                 goto out;
621
622         rv = __blockdev_direct_IO(WRITE, iocb, inode, inode->i_sb->s_bdev,
623                                   iov, offset, nr_segs, gfs2_get_block,
624                                   NULL, DIO_OWN_LOCKING);
625 out:
626         gfs2_glock_dq_m(1, &gh);
627         gfs2_holder_uninit(&gh);
628
629         return rv;
630 }
631
632 /**
633  * gfs2_direct_IO
634  *
635  * This is called with a shared lock already held for the read path.
636  * Currently, no locks are held when the write path is called.
637  */
638 static ssize_t gfs2_direct_IO(int rw, struct kiocb *iocb,
639                               const struct iovec *iov, loff_t offset,
640                               unsigned long nr_segs)
641 {
642         struct file *file = iocb->ki_filp;
643         struct inode *inode = file->f_mapping->host;
644         struct gfs2_inode *ip = inode->u.generic_ip;
645         struct gfs2_sbd *sdp = ip->i_sbd;
646
647         if (rw == WRITE)
648                 return gfs2_direct_IO_write(iocb, iov, offset, nr_segs);
649
650         if (gfs2_assert_warn(sdp, gfs2_glock_is_locked_by_me(ip->i_gl)) ||
651             gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
652                 return -EINVAL;
653
654         return __blockdev_direct_IO(READ, iocb, inode, inode->i_sb->s_bdev, iov,
655                                     offset, nr_segs, gfs2_get_block, NULL,
656                                     DIO_OWN_LOCKING);
657 }
658
659 struct address_space_operations gfs2_file_aops = {
660         .writepage = gfs2_writepage,
661         .readpage = gfs2_readpage,
662         .readpages = gfs2_readpages,
663         .sync_page = block_sync_page,
664         .prepare_write = gfs2_prepare_write,
665         .commit_write = gfs2_commit_write,
666         .bmap = gfs2_bmap,
667         .invalidatepage = gfs2_invalidatepage,
668         .direct_IO = gfs2_direct_IO,
669 };
670