49b5ad22a9d8f0f7819c3315a15ecf21c4608263
[safe/jmp/linux-2.6] / fs / xfs / xfs_iomap.c
1 /*
2  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_bit.h"
21 #include "xfs_log.h"
22 #include "xfs_inum.h"
23 #include "xfs_trans.h"
24 #include "xfs_sb.h"
25 #include "xfs_ag.h"
26 #include "xfs_dir2.h"
27 #include "xfs_alloc.h"
28 #include "xfs_dmapi.h"
29 #include "xfs_quota.h"
30 #include "xfs_mount.h"
31 #include "xfs_bmap_btree.h"
32 #include "xfs_alloc_btree.h"
33 #include "xfs_ialloc_btree.h"
34 #include "xfs_dir2_sf.h"
35 #include "xfs_attr_sf.h"
36 #include "xfs_dinode.h"
37 #include "xfs_inode.h"
38 #include "xfs_ialloc.h"
39 #include "xfs_btree.h"
40 #include "xfs_bmap.h"
41 #include "xfs_rtalloc.h"
42 #include "xfs_error.h"
43 #include "xfs_itable.h"
44 #include "xfs_rw.h"
45 #include "xfs_attr.h"
46 #include "xfs_buf_item.h"
47 #include "xfs_trans_space.h"
48 #include "xfs_utils.h"
49 #include "xfs_iomap.h"
50 #include "xfs_trace.h"
51
52
53 #define XFS_WRITEIO_ALIGN(mp,off)       (((off) >> mp->m_writeio_log) \
54                                                 << mp->m_writeio_log)
55 #define XFS_STRAT_WRITE_IMAPS   2
56 #define XFS_WRITE_IMAPS         XFS_BMAP_MAX_NMAP
57
58 STATIC void
59 xfs_imap_to_bmap(
60         xfs_inode_t     *ip,
61         xfs_off_t       offset,
62         xfs_bmbt_irec_t *imap,
63         xfs_iomap_t     *iomapp,
64         int             imaps,                  /* Number of imap entries */
65         int             flags)
66 {
67         xfs_fsblock_t   start_block;
68
69         iomapp->iomap_offset = imap->br_startoff;
70         iomapp->iomap_bsize = imap->br_blockcount;
71         iomapp->iomap_flags = flags;
72
73         start_block = imap->br_startblock;
74         if (start_block == HOLESTARTBLOCK) {
75                 iomapp->iomap_bn = IOMAP_DADDR_NULL;
76                 iomapp->iomap_flags |= IOMAP_HOLE;
77         } else if (start_block == DELAYSTARTBLOCK) {
78                 iomapp->iomap_bn = IOMAP_DADDR_NULL;
79                 iomapp->iomap_flags |= IOMAP_DELAY;
80         } else {
81                 iomapp->iomap_bn = xfs_fsb_to_db(ip, start_block);
82                 if (ISUNWRITTEN(imap))
83                         iomapp->iomap_flags |= IOMAP_UNWRITTEN;
84         }
85 }
86
87 int
88 xfs_iomap(
89         xfs_inode_t     *ip,
90         xfs_off_t       offset,
91         ssize_t         count,
92         int             flags,
93         xfs_iomap_t     *iomapp,
94         int             *niomaps)
95 {
96         xfs_mount_t     *mp = ip->i_mount;
97         xfs_fileoff_t   offset_fsb, end_fsb;
98         int             error = 0;
99         int             lockmode = 0;
100         xfs_bmbt_irec_t imap;
101         int             nimaps = 1;
102         int             bmapi_flags = 0;
103         int             iomap_flags = 0;
104
105         ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
106         ASSERT(niomaps && *niomaps == 1);
107
108         if (XFS_FORCED_SHUTDOWN(mp))
109                 return XFS_ERROR(EIO);
110
111         trace_xfs_iomap_enter(ip, offset, count, flags, NULL);
112
113         switch (flags & (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE)) {
114         case BMAPI_READ:
115                 lockmode = xfs_ilock_map_shared(ip);
116                 bmapi_flags = XFS_BMAPI_ENTIRE;
117                 break;
118         case BMAPI_WRITE:
119                 lockmode = XFS_ILOCK_EXCL;
120                 if (flags & BMAPI_IGNSTATE)
121                         bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE;
122                 xfs_ilock(ip, lockmode);
123                 break;
124         case BMAPI_ALLOCATE:
125                 lockmode = XFS_ILOCK_SHARED;
126                 bmapi_flags = XFS_BMAPI_ENTIRE;
127
128                 /* Attempt non-blocking lock */
129                 if (flags & BMAPI_TRYLOCK) {
130                         if (!xfs_ilock_nowait(ip, lockmode))
131                                 return XFS_ERROR(EAGAIN);
132                 } else {
133                         xfs_ilock(ip, lockmode);
134                 }
135                 break;
136         default:
137                 BUG();
138         }
139
140         ASSERT(offset <= mp->m_maxioffset);
141         if ((xfs_fsize_t)offset + count > mp->m_maxioffset)
142                 count = mp->m_maxioffset - offset;
143         end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
144         offset_fsb = XFS_B_TO_FSBT(mp, offset);
145
146         error = xfs_bmapi(NULL, ip, offset_fsb,
147                         (xfs_filblks_t)(end_fsb - offset_fsb),
148                         bmapi_flags,  NULL, 0, &imap,
149                         &nimaps, NULL, NULL);
150
151         if (error)
152                 goto out;
153
154         switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) {
155         case BMAPI_WRITE:
156                 /* If we found an extent, return it */
157                 if (nimaps &&
158                     (imap.br_startblock != HOLESTARTBLOCK) &&
159                     (imap.br_startblock != DELAYSTARTBLOCK)) {
160                         trace_xfs_iomap_found(ip, offset, count, flags, &imap);
161                         break;
162                 }
163
164                 if (flags & (BMAPI_DIRECT|BMAPI_MMAP)) {
165                         error = xfs_iomap_write_direct(ip, offset, count, flags,
166                                                        &imap, &nimaps, nimaps);
167                 } else {
168                         error = xfs_iomap_write_delay(ip, offset, count, flags,
169                                                       &imap, &nimaps);
170                 }
171                 if (!error) {
172                         trace_xfs_iomap_alloc(ip, offset, count, flags, &imap);
173                 }
174                 iomap_flags = IOMAP_NEW;
175                 break;
176         case BMAPI_ALLOCATE:
177                 /* If we found an extent, return it */
178                 xfs_iunlock(ip, lockmode);
179                 lockmode = 0;
180
181                 if (nimaps && !isnullstartblock(imap.br_startblock)) {
182                         trace_xfs_iomap_found(ip, offset, count, flags, &imap);
183                         break;
184                 }
185
186                 error = xfs_iomap_write_allocate(ip, offset, count,
187                                                  &imap, &nimaps);
188                 break;
189         }
190
191         ASSERT(nimaps <= 1);
192
193         if (nimaps)
194                 xfs_imap_to_bmap(ip, offset, &imap, iomapp, nimaps, iomap_flags);
195         *niomaps = nimaps;
196
197 out:
198         if (lockmode)
199                 xfs_iunlock(ip, lockmode);
200         return XFS_ERROR(error);
201 }
202
203
204 STATIC int
205 xfs_iomap_eof_align_last_fsb(
206         xfs_mount_t     *mp,
207         xfs_inode_t     *ip,
208         xfs_extlen_t    extsize,
209         xfs_fileoff_t   *last_fsb)
210 {
211         xfs_fileoff_t   new_last_fsb = 0;
212         xfs_extlen_t    align;
213         int             eof, error;
214
215         if (XFS_IS_REALTIME_INODE(ip))
216                 ;
217         /*
218          * If mounted with the "-o swalloc" option, roundup the allocation
219          * request to a stripe width boundary if the file size is >=
220          * stripe width and we are allocating past the allocation eof.
221          */
222         else if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) &&
223                 (ip->i_size >= XFS_FSB_TO_B(mp, mp->m_swidth)))
224                 new_last_fsb = roundup_64(*last_fsb, mp->m_swidth);
225         /*
226          * Roundup the allocation request to a stripe unit (m_dalign) boundary
227          * if the file size is >= stripe unit size, and we are allocating past
228          * the allocation eof.
229          */
230         else if (mp->m_dalign && (ip->i_size >= XFS_FSB_TO_B(mp, mp->m_dalign)))
231                 new_last_fsb = roundup_64(*last_fsb, mp->m_dalign);
232
233         /*
234          * Always round up the allocation request to an extent boundary
235          * (when file on a real-time subvolume or has di_extsize hint).
236          */
237         if (extsize) {
238                 if (new_last_fsb)
239                         align = roundup_64(new_last_fsb, extsize);
240                 else
241                         align = extsize;
242                 new_last_fsb = roundup_64(*last_fsb, align);
243         }
244
245         if (new_last_fsb) {
246                 error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof);
247                 if (error)
248                         return error;
249                 if (eof)
250                         *last_fsb = new_last_fsb;
251         }
252         return 0;
253 }
254
255 STATIC int
256 xfs_cmn_err_fsblock_zero(
257         xfs_inode_t     *ip,
258         xfs_bmbt_irec_t *imap)
259 {
260         xfs_cmn_err(XFS_PTAG_FSBLOCK_ZERO, CE_ALERT, ip->i_mount,
261                         "Access to block zero in inode %llu "
262                         "start_block: %llx start_off: %llx "
263                         "blkcnt: %llx extent-state: %x\n",
264                 (unsigned long long)ip->i_ino,
265                 (unsigned long long)imap->br_startblock,
266                 (unsigned long long)imap->br_startoff,
267                 (unsigned long long)imap->br_blockcount,
268                 imap->br_state);
269         return EFSCORRUPTED;
270 }
271
272 int
273 xfs_iomap_write_direct(
274         xfs_inode_t     *ip,
275         xfs_off_t       offset,
276         size_t          count,
277         int             flags,
278         xfs_bmbt_irec_t *ret_imap,
279         int             *nmaps,
280         int             found)
281 {
282         xfs_mount_t     *mp = ip->i_mount;
283         xfs_fileoff_t   offset_fsb;
284         xfs_fileoff_t   last_fsb;
285         xfs_filblks_t   count_fsb, resaligned;
286         xfs_fsblock_t   firstfsb;
287         xfs_extlen_t    extsz, temp;
288         int             nimaps;
289         int             bmapi_flag;
290         int             quota_flag;
291         int             rt;
292         xfs_trans_t     *tp;
293         xfs_bmbt_irec_t imap;
294         xfs_bmap_free_t free_list;
295         uint            qblocks, resblks, resrtextents;
296         int             committed;
297         int             error;
298
299         /*
300          * Make sure that the dquots are there. This doesn't hold
301          * the ilock across a disk read.
302          */
303         error = xfs_qm_dqattach_locked(ip, 0);
304         if (error)
305                 return XFS_ERROR(error);
306
307         rt = XFS_IS_REALTIME_INODE(ip);
308         extsz = xfs_get_extsz_hint(ip);
309
310         offset_fsb = XFS_B_TO_FSBT(mp, offset);
311         last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
312         if ((offset + count) > ip->i_size) {
313                 error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
314                 if (error)
315                         goto error_out;
316         } else {
317                 if (found && (ret_imap->br_startblock == HOLESTARTBLOCK))
318                         last_fsb = MIN(last_fsb, (xfs_fileoff_t)
319                                         ret_imap->br_blockcount +
320                                         ret_imap->br_startoff);
321         }
322         count_fsb = last_fsb - offset_fsb;
323         ASSERT(count_fsb > 0);
324
325         resaligned = count_fsb;
326         if (unlikely(extsz)) {
327                 if ((temp = do_mod(offset_fsb, extsz)))
328                         resaligned += temp;
329                 if ((temp = do_mod(resaligned, extsz)))
330                         resaligned += extsz - temp;
331         }
332
333         if (unlikely(rt)) {
334                 resrtextents = qblocks = resaligned;
335                 resrtextents /= mp->m_sb.sb_rextsize;
336                 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
337                 quota_flag = XFS_QMOPT_RES_RTBLKS;
338         } else {
339                 resrtextents = 0;
340                 resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
341                 quota_flag = XFS_QMOPT_RES_REGBLKS;
342         }
343
344         /*
345          * Allocate and setup the transaction
346          */
347         xfs_iunlock(ip, XFS_ILOCK_EXCL);
348         tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
349         error = xfs_trans_reserve(tp, resblks,
350                         XFS_WRITE_LOG_RES(mp), resrtextents,
351                         XFS_TRANS_PERM_LOG_RES,
352                         XFS_WRITE_LOG_COUNT);
353         /*
354          * Check for running out of space, note: need lock to return
355          */
356         if (error)
357                 xfs_trans_cancel(tp, 0);
358         xfs_ilock(ip, XFS_ILOCK_EXCL);
359         if (error)
360                 goto error_out;
361
362         error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
363         if (error)
364                 goto error1;
365
366         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
367         xfs_trans_ihold(tp, ip);
368
369         bmapi_flag = XFS_BMAPI_WRITE;
370         if ((flags & BMAPI_DIRECT) && (offset < ip->i_size || extsz))
371                 bmapi_flag |= XFS_BMAPI_PREALLOC;
372
373         /*
374          * Issue the xfs_bmapi() call to allocate the blocks
375          */
376         xfs_bmap_init(&free_list, &firstfsb);
377         nimaps = 1;
378         error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, bmapi_flag,
379                 &firstfsb, 0, &imap, &nimaps, &free_list, NULL);
380         if (error)
381                 goto error0;
382
383         /*
384          * Complete the transaction
385          */
386         error = xfs_bmap_finish(&tp, &free_list, &committed);
387         if (error)
388                 goto error0;
389         error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
390         if (error)
391                 goto error_out;
392
393         /*
394          * Copy any maps to caller's array and return any error.
395          */
396         if (nimaps == 0) {
397                 error = ENOSPC;
398                 goto error_out;
399         }
400
401         if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) {
402                 error = xfs_cmn_err_fsblock_zero(ip, &imap);
403                 goto error_out;
404         }
405
406         *ret_imap = imap;
407         *nmaps = 1;
408         return 0;
409
410 error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
411         xfs_bmap_cancel(&free_list);
412         xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
413
414 error1: /* Just cancel transaction */
415         xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
416         *nmaps = 0;     /* nothing set-up here */
417
418 error_out:
419         return XFS_ERROR(error);
420 }
421
422 /*
423  * If the caller is doing a write at the end of the file, then extend the
424  * allocation out to the file system's write iosize.  We clean up any extra
425  * space left over when the file is closed in xfs_inactive().
426  */
427 STATIC int
428 xfs_iomap_eof_want_preallocate(
429         xfs_mount_t     *mp,
430         xfs_inode_t     *ip,
431         xfs_off_t       offset,
432         size_t          count,
433         int             ioflag,
434         xfs_bmbt_irec_t *imap,
435         int             nimaps,
436         int             *prealloc)
437 {
438         xfs_fileoff_t   start_fsb;
439         xfs_filblks_t   count_fsb;
440         xfs_fsblock_t   firstblock;
441         int             n, error, imaps;
442
443         *prealloc = 0;
444         if ((offset + count) <= ip->i_size)
445                 return 0;
446
447         /*
448          * If there are any real blocks past eof, then don't
449          * do any speculative allocation.
450          */
451         start_fsb = XFS_B_TO_FSBT(mp, ((xfs_ufsize_t)(offset + count - 1)));
452         count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
453         while (count_fsb > 0) {
454                 imaps = nimaps;
455                 firstblock = NULLFSBLOCK;
456                 error = xfs_bmapi(NULL, ip, start_fsb, count_fsb, 0,
457                                   &firstblock, 0, imap, &imaps, NULL, NULL);
458                 if (error)
459                         return error;
460                 for (n = 0; n < imaps; n++) {
461                         if ((imap[n].br_startblock != HOLESTARTBLOCK) &&
462                             (imap[n].br_startblock != DELAYSTARTBLOCK))
463                                 return 0;
464                         start_fsb += imap[n].br_blockcount;
465                         count_fsb -= imap[n].br_blockcount;
466                 }
467         }
468         *prealloc = 1;
469         return 0;
470 }
471
472 int
473 xfs_iomap_write_delay(
474         xfs_inode_t     *ip,
475         xfs_off_t       offset,
476         size_t          count,
477         int             ioflag,
478         xfs_bmbt_irec_t *ret_imap,
479         int             *nmaps)
480 {
481         xfs_mount_t     *mp = ip->i_mount;
482         xfs_fileoff_t   offset_fsb;
483         xfs_fileoff_t   last_fsb;
484         xfs_off_t       aligned_offset;
485         xfs_fileoff_t   ioalign;
486         xfs_fsblock_t   firstblock;
487         xfs_extlen_t    extsz;
488         int             nimaps;
489         xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];
490         int             prealloc, flushed = 0;
491         int             error;
492
493         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
494
495         /*
496          * Make sure that the dquots are there. This doesn't hold
497          * the ilock across a disk read.
498          */
499         error = xfs_qm_dqattach_locked(ip, 0);
500         if (error)
501                 return XFS_ERROR(error);
502
503         extsz = xfs_get_extsz_hint(ip);
504         offset_fsb = XFS_B_TO_FSBT(mp, offset);
505
506         error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count,
507                                 ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
508         if (error)
509                 return error;
510
511 retry:
512         if (prealloc) {
513                 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
514                 ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
515                 last_fsb = ioalign + mp->m_writeio_blocks;
516         } else {
517                 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
518         }
519
520         if (prealloc || extsz) {
521                 error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
522                 if (error)
523                         return error;
524         }
525
526         nimaps = XFS_WRITE_IMAPS;
527         firstblock = NULLFSBLOCK;
528         error = xfs_bmapi(NULL, ip, offset_fsb,
529                           (xfs_filblks_t)(last_fsb - offset_fsb),
530                           XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |
531                           XFS_BMAPI_ENTIRE, &firstblock, 1, imap,
532                           &nimaps, NULL, NULL);
533         if (error && (error != ENOSPC))
534                 return XFS_ERROR(error);
535
536         /*
537          * If bmapi returned us nothing, and if we didn't get back EDQUOT,
538          * then we must have run out of space - flush all other inodes with
539          * delalloc blocks and retry without EOF preallocation.
540          */
541         if (nimaps == 0) {
542                 trace_xfs_delalloc_enospc(ip, offset, count);
543                 if (flushed)
544                         return XFS_ERROR(ENOSPC);
545
546                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
547                 xfs_flush_inodes(ip);
548                 xfs_ilock(ip, XFS_ILOCK_EXCL);
549
550                 flushed = 1;
551                 error = 0;
552                 prealloc = 0;
553                 goto retry;
554         }
555
556         if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip)))
557                 return xfs_cmn_err_fsblock_zero(ip, &imap[0]);
558
559         *ret_imap = imap[0];
560         *nmaps = 1;
561
562         return 0;
563 }
564
565 /*
566  * Pass in a delayed allocate extent, convert it to real extents;
567  * return to the caller the extent we create which maps on top of
568  * the originating callers request.
569  *
570  * Called without a lock on the inode.
571  *
572  * We no longer bother to look at the incoming map - all we have to
573  * guarantee is that whatever we allocate fills the required range.
574  */
575 int
576 xfs_iomap_write_allocate(
577         xfs_inode_t     *ip,
578         xfs_off_t       offset,
579         size_t          count,
580         xfs_bmbt_irec_t *map,
581         int             *retmap)
582 {
583         xfs_mount_t     *mp = ip->i_mount;
584         xfs_fileoff_t   offset_fsb, last_block;
585         xfs_fileoff_t   end_fsb, map_start_fsb;
586         xfs_fsblock_t   first_block;
587         xfs_bmap_free_t free_list;
588         xfs_filblks_t   count_fsb;
589         xfs_bmbt_irec_t imap;
590         xfs_trans_t     *tp;
591         int             nimaps, committed;
592         int             error = 0;
593         int             nres;
594
595         *retmap = 0;
596
597         /*
598          * Make sure that the dquots are there.
599          */
600         error = xfs_qm_dqattach(ip, 0);
601         if (error)
602                 return XFS_ERROR(error);
603
604         offset_fsb = XFS_B_TO_FSBT(mp, offset);
605         count_fsb = map->br_blockcount;
606         map_start_fsb = map->br_startoff;
607
608         XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb));
609
610         while (count_fsb != 0) {
611                 /*
612                  * Set up a transaction with which to allocate the
613                  * backing store for the file.  Do allocations in a
614                  * loop until we get some space in the range we are
615                  * interested in.  The other space that might be allocated
616                  * is in the delayed allocation extent on which we sit
617                  * but before our buffer starts.
618                  */
619
620                 nimaps = 0;
621                 while (nimaps == 0) {
622                         tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
623                         tp->t_flags |= XFS_TRANS_RESERVE;
624                         nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
625                         error = xfs_trans_reserve(tp, nres,
626                                         XFS_WRITE_LOG_RES(mp),
627                                         0, XFS_TRANS_PERM_LOG_RES,
628                                         XFS_WRITE_LOG_COUNT);
629                         if (error) {
630                                 xfs_trans_cancel(tp, 0);
631                                 return XFS_ERROR(error);
632                         }
633                         xfs_ilock(ip, XFS_ILOCK_EXCL);
634                         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
635                         xfs_trans_ihold(tp, ip);
636
637                         xfs_bmap_init(&free_list, &first_block);
638
639                         /*
640                          * it is possible that the extents have changed since
641                          * we did the read call as we dropped the ilock for a
642                          * while. We have to be careful about truncates or hole
643                          * punchs here - we are not allowed to allocate
644                          * non-delalloc blocks here.
645                          *
646                          * The only protection against truncation is the pages
647                          * for the range we are being asked to convert are
648                          * locked and hence a truncate will block on them
649                          * first.
650                          *
651                          * As a result, if we go beyond the range we really
652                          * need and hit an delalloc extent boundary followed by
653                          * a hole while we have excess blocks in the map, we
654                          * will fill the hole incorrectly and overrun the
655                          * transaction reservation.
656                          *
657                          * Using a single map prevents this as we are forced to
658                          * check each map we look for overlap with the desired
659                          * range and abort as soon as we find it. Also, given
660                          * that we only return a single map, having one beyond
661                          * what we can return is probably a bit silly.
662                          *
663                          * We also need to check that we don't go beyond EOF;
664                          * this is a truncate optimisation as a truncate sets
665                          * the new file size before block on the pages we
666                          * currently have locked under writeback. Because they
667                          * are about to be tossed, we don't need to write them
668                          * back....
669                          */
670                         nimaps = 1;
671                         end_fsb = XFS_B_TO_FSB(mp, ip->i_size);
672                         error = xfs_bmap_last_offset(NULL, ip, &last_block,
673                                                         XFS_DATA_FORK);
674                         if (error)
675                                 goto trans_cancel;
676
677                         last_block = XFS_FILEOFF_MAX(last_block, end_fsb);
678                         if ((map_start_fsb + count_fsb) > last_block) {
679                                 count_fsb = last_block - map_start_fsb;
680                                 if (count_fsb == 0) {
681                                         error = EAGAIN;
682                                         goto trans_cancel;
683                                 }
684                         }
685
686                         /* Go get the actual blocks */
687                         error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb,
688                                         XFS_BMAPI_WRITE, &first_block, 1,
689                                         &imap, &nimaps, &free_list, NULL);
690                         if (error)
691                                 goto trans_cancel;
692
693                         error = xfs_bmap_finish(&tp, &free_list, &committed);
694                         if (error)
695                                 goto trans_cancel;
696
697                         error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
698                         if (error)
699                                 goto error0;
700
701                         xfs_iunlock(ip, XFS_ILOCK_EXCL);
702                 }
703
704                 /*
705                  * See if we were able to allocate an extent that
706                  * covers at least part of the callers request
707                  */
708                 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
709                         return xfs_cmn_err_fsblock_zero(ip, &imap);
710
711                 if ((offset_fsb >= imap.br_startoff) &&
712                     (offset_fsb < (imap.br_startoff +
713                                    imap.br_blockcount))) {
714                         *map = imap;
715                         *retmap = 1;
716                         XFS_STATS_INC(xs_xstrat_quick);
717                         return 0;
718                 }
719
720                 /*
721                  * So far we have not mapped the requested part of the
722                  * file, just surrounding data, try again.
723                  */
724                 count_fsb -= imap.br_blockcount;
725                 map_start_fsb = imap.br_startoff + imap.br_blockcount;
726         }
727
728 trans_cancel:
729         xfs_bmap_cancel(&free_list);
730         xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
731 error0:
732         xfs_iunlock(ip, XFS_ILOCK_EXCL);
733         return XFS_ERROR(error);
734 }
735
736 int
737 xfs_iomap_write_unwritten(
738         xfs_inode_t     *ip,
739         xfs_off_t       offset,
740         size_t          count)
741 {
742         xfs_mount_t     *mp = ip->i_mount;
743         xfs_fileoff_t   offset_fsb;
744         xfs_filblks_t   count_fsb;
745         xfs_filblks_t   numblks_fsb;
746         xfs_fsblock_t   firstfsb;
747         int             nimaps;
748         xfs_trans_t     *tp;
749         xfs_bmbt_irec_t imap;
750         xfs_bmap_free_t free_list;
751         uint            resblks;
752         int             committed;
753         int             error;
754
755         trace_xfs_unwritten_convert(ip, offset, count);
756
757         offset_fsb = XFS_B_TO_FSBT(mp, offset);
758         count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
759         count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb);
760
761         /*
762          * Reserve enough blocks in this transaction for two complete extent
763          * btree splits.  We may be converting the middle part of an unwritten
764          * extent and in this case we will insert two new extents in the btree
765          * each of which could cause a full split.
766          *
767          * This reservation amount will be used in the first call to
768          * xfs_bmbt_split() to select an AG with enough space to satisfy the
769          * rest of the operation.
770          */
771         resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
772
773         do {
774                 /*
775                  * set up a transaction to convert the range of extents
776                  * from unwritten to real. Do allocations in a loop until
777                  * we have covered the range passed in.
778                  *
779                  * Note that we open code the transaction allocation here
780                  * to pass KM_NOFS--we can't risk to recursing back into
781                  * the filesystem here as we might be asked to write out
782                  * the same inode that we complete here and might deadlock
783                  * on the iolock.
784                  */
785                 xfs_wait_for_freeze(mp, SB_FREEZE_TRANS);
786                 tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS);
787                 tp->t_flags |= XFS_TRANS_RESERVE;
788                 error = xfs_trans_reserve(tp, resblks,
789                                 XFS_WRITE_LOG_RES(mp), 0,
790                                 XFS_TRANS_PERM_LOG_RES,
791                                 XFS_WRITE_LOG_COUNT);
792                 if (error) {
793                         xfs_trans_cancel(tp, 0);
794                         return XFS_ERROR(error);
795                 }
796
797                 xfs_ilock(ip, XFS_ILOCK_EXCL);
798                 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
799                 xfs_trans_ihold(tp, ip);
800
801                 /*
802                  * Modify the unwritten extent state of the buffer.
803                  */
804                 xfs_bmap_init(&free_list, &firstfsb);
805                 nimaps = 1;
806                 error = xfs_bmapi(tp, ip, offset_fsb, count_fsb,
807                                   XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb,
808                                   1, &imap, &nimaps, &free_list, NULL);
809                 if (error)
810                         goto error_on_bmapi_transaction;
811
812                 error = xfs_bmap_finish(&(tp), &(free_list), &committed);
813                 if (error)
814                         goto error_on_bmapi_transaction;
815
816                 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
817                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
818                 if (error)
819                         return XFS_ERROR(error);
820
821                 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
822                         return xfs_cmn_err_fsblock_zero(ip, &imap);
823
824                 if ((numblks_fsb = imap.br_blockcount) == 0) {
825                         /*
826                          * The numblks_fsb value should always get
827                          * smaller, otherwise the loop is stuck.
828                          */
829                         ASSERT(imap.br_blockcount);
830                         break;
831                 }
832                 offset_fsb += numblks_fsb;
833                 count_fsb -= numblks_fsb;
834         } while (count_fsb > 0);
835
836         return 0;
837
838 error_on_bmapi_transaction:
839         xfs_bmap_cancel(&free_list);
840         xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT));
841         xfs_iunlock(ip, XFS_ILOCK_EXCL);
842         return XFS_ERROR(error);
843 }