xfs: report iomap_bn in block base
[safe/jmp/linux-2.6] / fs / xfs / xfs_iomap.c
1 /*
2  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_bit.h"
21 #include "xfs_log.h"
22 #include "xfs_inum.h"
23 #include "xfs_trans.h"
24 #include "xfs_sb.h"
25 #include "xfs_ag.h"
26 #include "xfs_dir2.h"
27 #include "xfs_alloc.h"
28 #include "xfs_dmapi.h"
29 #include "xfs_quota.h"
30 #include "xfs_mount.h"
31 #include "xfs_bmap_btree.h"
32 #include "xfs_alloc_btree.h"
33 #include "xfs_ialloc_btree.h"
34 #include "xfs_dir2_sf.h"
35 #include "xfs_attr_sf.h"
36 #include "xfs_dinode.h"
37 #include "xfs_inode.h"
38 #include "xfs_ialloc.h"
39 #include "xfs_btree.h"
40 #include "xfs_bmap.h"
41 #include "xfs_rtalloc.h"
42 #include "xfs_error.h"
43 #include "xfs_itable.h"
44 #include "xfs_rw.h"
45 #include "xfs_attr.h"
46 #include "xfs_buf_item.h"
47 #include "xfs_trans_space.h"
48 #include "xfs_utils.h"
49 #include "xfs_iomap.h"
50 #include "xfs_trace.h"
51
52
53 #define XFS_WRITEIO_ALIGN(mp,off)       (((off) >> mp->m_writeio_log) \
54                                                 << mp->m_writeio_log)
55 #define XFS_STRAT_WRITE_IMAPS   2
56 #define XFS_WRITE_IMAPS         XFS_BMAP_MAX_NMAP
57
58 STATIC void
59 xfs_imap_to_bmap(
60         xfs_inode_t     *ip,
61         xfs_off_t       offset,
62         xfs_bmbt_irec_t *imap,
63         xfs_iomap_t     *iomapp,
64         int             imaps,                  /* Number of imap entries */
65         int             flags)
66 {
67         iomapp->iomap_offset = imap->br_startoff;
68         iomapp->iomap_bsize = imap->br_blockcount;
69         iomapp->iomap_flags = flags;
70         iomapp->iomap_bn = imap->br_startblock;
71
72         if (imap->br_startblock != HOLESTARTBLOCK &&
73             imap->br_startblock != DELAYSTARTBLOCK &&
74             ISUNWRITTEN(imap))
75                 iomapp->iomap_flags |= IOMAP_UNWRITTEN;
76 }
77
78 int
79 xfs_iomap(
80         xfs_inode_t     *ip,
81         xfs_off_t       offset,
82         ssize_t         count,
83         int             flags,
84         xfs_iomap_t     *iomapp,
85         int             *niomaps)
86 {
87         xfs_mount_t     *mp = ip->i_mount;
88         xfs_fileoff_t   offset_fsb, end_fsb;
89         int             error = 0;
90         int             lockmode = 0;
91         xfs_bmbt_irec_t imap;
92         int             nimaps = 1;
93         int             bmapi_flags = 0;
94         int             iomap_flags = 0;
95
96         ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
97         ASSERT(niomaps && *niomaps == 1);
98
99         if (XFS_FORCED_SHUTDOWN(mp))
100                 return XFS_ERROR(EIO);
101
102         trace_xfs_iomap_enter(ip, offset, count, flags, NULL);
103
104         switch (flags & (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE)) {
105         case BMAPI_READ:
106                 lockmode = xfs_ilock_map_shared(ip);
107                 bmapi_flags = XFS_BMAPI_ENTIRE;
108                 break;
109         case BMAPI_WRITE:
110                 lockmode = XFS_ILOCK_EXCL;
111                 if (flags & BMAPI_IGNSTATE)
112                         bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE;
113                 xfs_ilock(ip, lockmode);
114                 break;
115         case BMAPI_ALLOCATE:
116                 lockmode = XFS_ILOCK_SHARED;
117                 bmapi_flags = XFS_BMAPI_ENTIRE;
118
119                 /* Attempt non-blocking lock */
120                 if (flags & BMAPI_TRYLOCK) {
121                         if (!xfs_ilock_nowait(ip, lockmode))
122                                 return XFS_ERROR(EAGAIN);
123                 } else {
124                         xfs_ilock(ip, lockmode);
125                 }
126                 break;
127         default:
128                 BUG();
129         }
130
131         ASSERT(offset <= mp->m_maxioffset);
132         if ((xfs_fsize_t)offset + count > mp->m_maxioffset)
133                 count = mp->m_maxioffset - offset;
134         end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
135         offset_fsb = XFS_B_TO_FSBT(mp, offset);
136
137         error = xfs_bmapi(NULL, ip, offset_fsb,
138                         (xfs_filblks_t)(end_fsb - offset_fsb),
139                         bmapi_flags,  NULL, 0, &imap,
140                         &nimaps, NULL, NULL);
141
142         if (error)
143                 goto out;
144
145         switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) {
146         case BMAPI_WRITE:
147                 /* If we found an extent, return it */
148                 if (nimaps &&
149                     (imap.br_startblock != HOLESTARTBLOCK) &&
150                     (imap.br_startblock != DELAYSTARTBLOCK)) {
151                         trace_xfs_iomap_found(ip, offset, count, flags, &imap);
152                         break;
153                 }
154
155                 if (flags & (BMAPI_DIRECT|BMAPI_MMAP)) {
156                         error = xfs_iomap_write_direct(ip, offset, count, flags,
157                                                        &imap, &nimaps, nimaps);
158                 } else {
159                         error = xfs_iomap_write_delay(ip, offset, count, flags,
160                                                       &imap, &nimaps);
161                 }
162                 if (!error) {
163                         trace_xfs_iomap_alloc(ip, offset, count, flags, &imap);
164                 }
165                 iomap_flags = IOMAP_NEW;
166                 break;
167         case BMAPI_ALLOCATE:
168                 /* If we found an extent, return it */
169                 xfs_iunlock(ip, lockmode);
170                 lockmode = 0;
171
172                 if (nimaps && !isnullstartblock(imap.br_startblock)) {
173                         trace_xfs_iomap_found(ip, offset, count, flags, &imap);
174                         break;
175                 }
176
177                 error = xfs_iomap_write_allocate(ip, offset, count,
178                                                  &imap, &nimaps);
179                 break;
180         }
181
182         ASSERT(nimaps <= 1);
183
184         if (nimaps)
185                 xfs_imap_to_bmap(ip, offset, &imap, iomapp, nimaps, iomap_flags);
186         *niomaps = nimaps;
187
188 out:
189         if (lockmode)
190                 xfs_iunlock(ip, lockmode);
191         return XFS_ERROR(error);
192 }
193
194
195 STATIC int
196 xfs_iomap_eof_align_last_fsb(
197         xfs_mount_t     *mp,
198         xfs_inode_t     *ip,
199         xfs_extlen_t    extsize,
200         xfs_fileoff_t   *last_fsb)
201 {
202         xfs_fileoff_t   new_last_fsb = 0;
203         xfs_extlen_t    align;
204         int             eof, error;
205
206         if (XFS_IS_REALTIME_INODE(ip))
207                 ;
208         /*
209          * If mounted with the "-o swalloc" option, roundup the allocation
210          * request to a stripe width boundary if the file size is >=
211          * stripe width and we are allocating past the allocation eof.
212          */
213         else if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) &&
214                 (ip->i_size >= XFS_FSB_TO_B(mp, mp->m_swidth)))
215                 new_last_fsb = roundup_64(*last_fsb, mp->m_swidth);
216         /*
217          * Roundup the allocation request to a stripe unit (m_dalign) boundary
218          * if the file size is >= stripe unit size, and we are allocating past
219          * the allocation eof.
220          */
221         else if (mp->m_dalign && (ip->i_size >= XFS_FSB_TO_B(mp, mp->m_dalign)))
222                 new_last_fsb = roundup_64(*last_fsb, mp->m_dalign);
223
224         /*
225          * Always round up the allocation request to an extent boundary
226          * (when file on a real-time subvolume or has di_extsize hint).
227          */
228         if (extsize) {
229                 if (new_last_fsb)
230                         align = roundup_64(new_last_fsb, extsize);
231                 else
232                         align = extsize;
233                 new_last_fsb = roundup_64(*last_fsb, align);
234         }
235
236         if (new_last_fsb) {
237                 error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof);
238                 if (error)
239                         return error;
240                 if (eof)
241                         *last_fsb = new_last_fsb;
242         }
243         return 0;
244 }
245
246 STATIC int
247 xfs_cmn_err_fsblock_zero(
248         xfs_inode_t     *ip,
249         xfs_bmbt_irec_t *imap)
250 {
251         xfs_cmn_err(XFS_PTAG_FSBLOCK_ZERO, CE_ALERT, ip->i_mount,
252                         "Access to block zero in inode %llu "
253                         "start_block: %llx start_off: %llx "
254                         "blkcnt: %llx extent-state: %x\n",
255                 (unsigned long long)ip->i_ino,
256                 (unsigned long long)imap->br_startblock,
257                 (unsigned long long)imap->br_startoff,
258                 (unsigned long long)imap->br_blockcount,
259                 imap->br_state);
260         return EFSCORRUPTED;
261 }
262
263 int
264 xfs_iomap_write_direct(
265         xfs_inode_t     *ip,
266         xfs_off_t       offset,
267         size_t          count,
268         int             flags,
269         xfs_bmbt_irec_t *ret_imap,
270         int             *nmaps,
271         int             found)
272 {
273         xfs_mount_t     *mp = ip->i_mount;
274         xfs_fileoff_t   offset_fsb;
275         xfs_fileoff_t   last_fsb;
276         xfs_filblks_t   count_fsb, resaligned;
277         xfs_fsblock_t   firstfsb;
278         xfs_extlen_t    extsz, temp;
279         int             nimaps;
280         int             bmapi_flag;
281         int             quota_flag;
282         int             rt;
283         xfs_trans_t     *tp;
284         xfs_bmbt_irec_t imap;
285         xfs_bmap_free_t free_list;
286         uint            qblocks, resblks, resrtextents;
287         int             committed;
288         int             error;
289
290         /*
291          * Make sure that the dquots are there. This doesn't hold
292          * the ilock across a disk read.
293          */
294         error = xfs_qm_dqattach_locked(ip, 0);
295         if (error)
296                 return XFS_ERROR(error);
297
298         rt = XFS_IS_REALTIME_INODE(ip);
299         extsz = xfs_get_extsz_hint(ip);
300
301         offset_fsb = XFS_B_TO_FSBT(mp, offset);
302         last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
303         if ((offset + count) > ip->i_size) {
304                 error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
305                 if (error)
306                         goto error_out;
307         } else {
308                 if (found && (ret_imap->br_startblock == HOLESTARTBLOCK))
309                         last_fsb = MIN(last_fsb, (xfs_fileoff_t)
310                                         ret_imap->br_blockcount +
311                                         ret_imap->br_startoff);
312         }
313         count_fsb = last_fsb - offset_fsb;
314         ASSERT(count_fsb > 0);
315
316         resaligned = count_fsb;
317         if (unlikely(extsz)) {
318                 if ((temp = do_mod(offset_fsb, extsz)))
319                         resaligned += temp;
320                 if ((temp = do_mod(resaligned, extsz)))
321                         resaligned += extsz - temp;
322         }
323
324         if (unlikely(rt)) {
325                 resrtextents = qblocks = resaligned;
326                 resrtextents /= mp->m_sb.sb_rextsize;
327                 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
328                 quota_flag = XFS_QMOPT_RES_RTBLKS;
329         } else {
330                 resrtextents = 0;
331                 resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
332                 quota_flag = XFS_QMOPT_RES_REGBLKS;
333         }
334
335         /*
336          * Allocate and setup the transaction
337          */
338         xfs_iunlock(ip, XFS_ILOCK_EXCL);
339         tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
340         error = xfs_trans_reserve(tp, resblks,
341                         XFS_WRITE_LOG_RES(mp), resrtextents,
342                         XFS_TRANS_PERM_LOG_RES,
343                         XFS_WRITE_LOG_COUNT);
344         /*
345          * Check for running out of space, note: need lock to return
346          */
347         if (error)
348                 xfs_trans_cancel(tp, 0);
349         xfs_ilock(ip, XFS_ILOCK_EXCL);
350         if (error)
351                 goto error_out;
352
353         error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
354         if (error)
355                 goto error1;
356
357         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
358         xfs_trans_ihold(tp, ip);
359
360         bmapi_flag = XFS_BMAPI_WRITE;
361         if ((flags & BMAPI_DIRECT) && (offset < ip->i_size || extsz))
362                 bmapi_flag |= XFS_BMAPI_PREALLOC;
363
364         /*
365          * Issue the xfs_bmapi() call to allocate the blocks
366          */
367         xfs_bmap_init(&free_list, &firstfsb);
368         nimaps = 1;
369         error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, bmapi_flag,
370                 &firstfsb, 0, &imap, &nimaps, &free_list, NULL);
371         if (error)
372                 goto error0;
373
374         /*
375          * Complete the transaction
376          */
377         error = xfs_bmap_finish(&tp, &free_list, &committed);
378         if (error)
379                 goto error0;
380         error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
381         if (error)
382                 goto error_out;
383
384         /*
385          * Copy any maps to caller's array and return any error.
386          */
387         if (nimaps == 0) {
388                 error = ENOSPC;
389                 goto error_out;
390         }
391
392         if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) {
393                 error = xfs_cmn_err_fsblock_zero(ip, &imap);
394                 goto error_out;
395         }
396
397         *ret_imap = imap;
398         *nmaps = 1;
399         return 0;
400
401 error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
402         xfs_bmap_cancel(&free_list);
403         xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
404
405 error1: /* Just cancel transaction */
406         xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
407         *nmaps = 0;     /* nothing set-up here */
408
409 error_out:
410         return XFS_ERROR(error);
411 }
412
413 /*
414  * If the caller is doing a write at the end of the file, then extend the
415  * allocation out to the file system's write iosize.  We clean up any extra
416  * space left over when the file is closed in xfs_inactive().
417  */
418 STATIC int
419 xfs_iomap_eof_want_preallocate(
420         xfs_mount_t     *mp,
421         xfs_inode_t     *ip,
422         xfs_off_t       offset,
423         size_t          count,
424         int             ioflag,
425         xfs_bmbt_irec_t *imap,
426         int             nimaps,
427         int             *prealloc)
428 {
429         xfs_fileoff_t   start_fsb;
430         xfs_filblks_t   count_fsb;
431         xfs_fsblock_t   firstblock;
432         int             n, error, imaps;
433
434         *prealloc = 0;
435         if ((offset + count) <= ip->i_size)
436                 return 0;
437
438         /*
439          * If there are any real blocks past eof, then don't
440          * do any speculative allocation.
441          */
442         start_fsb = XFS_B_TO_FSBT(mp, ((xfs_ufsize_t)(offset + count - 1)));
443         count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
444         while (count_fsb > 0) {
445                 imaps = nimaps;
446                 firstblock = NULLFSBLOCK;
447                 error = xfs_bmapi(NULL, ip, start_fsb, count_fsb, 0,
448                                   &firstblock, 0, imap, &imaps, NULL, NULL);
449                 if (error)
450                         return error;
451                 for (n = 0; n < imaps; n++) {
452                         if ((imap[n].br_startblock != HOLESTARTBLOCK) &&
453                             (imap[n].br_startblock != DELAYSTARTBLOCK))
454                                 return 0;
455                         start_fsb += imap[n].br_blockcount;
456                         count_fsb -= imap[n].br_blockcount;
457                 }
458         }
459         *prealloc = 1;
460         return 0;
461 }
462
463 int
464 xfs_iomap_write_delay(
465         xfs_inode_t     *ip,
466         xfs_off_t       offset,
467         size_t          count,
468         int             ioflag,
469         xfs_bmbt_irec_t *ret_imap,
470         int             *nmaps)
471 {
472         xfs_mount_t     *mp = ip->i_mount;
473         xfs_fileoff_t   offset_fsb;
474         xfs_fileoff_t   last_fsb;
475         xfs_off_t       aligned_offset;
476         xfs_fileoff_t   ioalign;
477         xfs_fsblock_t   firstblock;
478         xfs_extlen_t    extsz;
479         int             nimaps;
480         xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];
481         int             prealloc, flushed = 0;
482         int             error;
483
484         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
485
486         /*
487          * Make sure that the dquots are there. This doesn't hold
488          * the ilock across a disk read.
489          */
490         error = xfs_qm_dqattach_locked(ip, 0);
491         if (error)
492                 return XFS_ERROR(error);
493
494         extsz = xfs_get_extsz_hint(ip);
495         offset_fsb = XFS_B_TO_FSBT(mp, offset);
496
497         error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count,
498                                 ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
499         if (error)
500                 return error;
501
502 retry:
503         if (prealloc) {
504                 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
505                 ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
506                 last_fsb = ioalign + mp->m_writeio_blocks;
507         } else {
508                 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
509         }
510
511         if (prealloc || extsz) {
512                 error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
513                 if (error)
514                         return error;
515         }
516
517         nimaps = XFS_WRITE_IMAPS;
518         firstblock = NULLFSBLOCK;
519         error = xfs_bmapi(NULL, ip, offset_fsb,
520                           (xfs_filblks_t)(last_fsb - offset_fsb),
521                           XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |
522                           XFS_BMAPI_ENTIRE, &firstblock, 1, imap,
523                           &nimaps, NULL, NULL);
524         if (error && (error != ENOSPC))
525                 return XFS_ERROR(error);
526
527         /*
528          * If bmapi returned us nothing, and if we didn't get back EDQUOT,
529          * then we must have run out of space - flush all other inodes with
530          * delalloc blocks and retry without EOF preallocation.
531          */
532         if (nimaps == 0) {
533                 trace_xfs_delalloc_enospc(ip, offset, count);
534                 if (flushed)
535                         return XFS_ERROR(ENOSPC);
536
537                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
538                 xfs_flush_inodes(ip);
539                 xfs_ilock(ip, XFS_ILOCK_EXCL);
540
541                 flushed = 1;
542                 error = 0;
543                 prealloc = 0;
544                 goto retry;
545         }
546
547         if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip)))
548                 return xfs_cmn_err_fsblock_zero(ip, &imap[0]);
549
550         *ret_imap = imap[0];
551         *nmaps = 1;
552
553         return 0;
554 }
555
556 /*
557  * Pass in a delayed allocate extent, convert it to real extents;
558  * return to the caller the extent we create which maps on top of
559  * the originating callers request.
560  *
561  * Called without a lock on the inode.
562  *
563  * We no longer bother to look at the incoming map - all we have to
564  * guarantee is that whatever we allocate fills the required range.
565  */
566 int
567 xfs_iomap_write_allocate(
568         xfs_inode_t     *ip,
569         xfs_off_t       offset,
570         size_t          count,
571         xfs_bmbt_irec_t *map,
572         int             *retmap)
573 {
574         xfs_mount_t     *mp = ip->i_mount;
575         xfs_fileoff_t   offset_fsb, last_block;
576         xfs_fileoff_t   end_fsb, map_start_fsb;
577         xfs_fsblock_t   first_block;
578         xfs_bmap_free_t free_list;
579         xfs_filblks_t   count_fsb;
580         xfs_bmbt_irec_t imap;
581         xfs_trans_t     *tp;
582         int             nimaps, committed;
583         int             error = 0;
584         int             nres;
585
586         *retmap = 0;
587
588         /*
589          * Make sure that the dquots are there.
590          */
591         error = xfs_qm_dqattach(ip, 0);
592         if (error)
593                 return XFS_ERROR(error);
594
595         offset_fsb = XFS_B_TO_FSBT(mp, offset);
596         count_fsb = map->br_blockcount;
597         map_start_fsb = map->br_startoff;
598
599         XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb));
600
601         while (count_fsb != 0) {
602                 /*
603                  * Set up a transaction with which to allocate the
604                  * backing store for the file.  Do allocations in a
605                  * loop until we get some space in the range we are
606                  * interested in.  The other space that might be allocated
607                  * is in the delayed allocation extent on which we sit
608                  * but before our buffer starts.
609                  */
610
611                 nimaps = 0;
612                 while (nimaps == 0) {
613                         tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
614                         tp->t_flags |= XFS_TRANS_RESERVE;
615                         nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
616                         error = xfs_trans_reserve(tp, nres,
617                                         XFS_WRITE_LOG_RES(mp),
618                                         0, XFS_TRANS_PERM_LOG_RES,
619                                         XFS_WRITE_LOG_COUNT);
620                         if (error) {
621                                 xfs_trans_cancel(tp, 0);
622                                 return XFS_ERROR(error);
623                         }
624                         xfs_ilock(ip, XFS_ILOCK_EXCL);
625                         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
626                         xfs_trans_ihold(tp, ip);
627
628                         xfs_bmap_init(&free_list, &first_block);
629
630                         /*
631                          * it is possible that the extents have changed since
632                          * we did the read call as we dropped the ilock for a
633                          * while. We have to be careful about truncates or hole
634                          * punchs here - we are not allowed to allocate
635                          * non-delalloc blocks here.
636                          *
637                          * The only protection against truncation is the pages
638                          * for the range we are being asked to convert are
639                          * locked and hence a truncate will block on them
640                          * first.
641                          *
642                          * As a result, if we go beyond the range we really
643                          * need and hit an delalloc extent boundary followed by
644                          * a hole while we have excess blocks in the map, we
645                          * will fill the hole incorrectly and overrun the
646                          * transaction reservation.
647                          *
648                          * Using a single map prevents this as we are forced to
649                          * check each map we look for overlap with the desired
650                          * range and abort as soon as we find it. Also, given
651                          * that we only return a single map, having one beyond
652                          * what we can return is probably a bit silly.
653                          *
654                          * We also need to check that we don't go beyond EOF;
655                          * this is a truncate optimisation as a truncate sets
656                          * the new file size before block on the pages we
657                          * currently have locked under writeback. Because they
658                          * are about to be tossed, we don't need to write them
659                          * back....
660                          */
661                         nimaps = 1;
662                         end_fsb = XFS_B_TO_FSB(mp, ip->i_size);
663                         error = xfs_bmap_last_offset(NULL, ip, &last_block,
664                                                         XFS_DATA_FORK);
665                         if (error)
666                                 goto trans_cancel;
667
668                         last_block = XFS_FILEOFF_MAX(last_block, end_fsb);
669                         if ((map_start_fsb + count_fsb) > last_block) {
670                                 count_fsb = last_block - map_start_fsb;
671                                 if (count_fsb == 0) {
672                                         error = EAGAIN;
673                                         goto trans_cancel;
674                                 }
675                         }
676
677                         /* Go get the actual blocks */
678                         error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb,
679                                         XFS_BMAPI_WRITE, &first_block, 1,
680                                         &imap, &nimaps, &free_list, NULL);
681                         if (error)
682                                 goto trans_cancel;
683
684                         error = xfs_bmap_finish(&tp, &free_list, &committed);
685                         if (error)
686                                 goto trans_cancel;
687
688                         error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
689                         if (error)
690                                 goto error0;
691
692                         xfs_iunlock(ip, XFS_ILOCK_EXCL);
693                 }
694
695                 /*
696                  * See if we were able to allocate an extent that
697                  * covers at least part of the callers request
698                  */
699                 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
700                         return xfs_cmn_err_fsblock_zero(ip, &imap);
701
702                 if ((offset_fsb >= imap.br_startoff) &&
703                     (offset_fsb < (imap.br_startoff +
704                                    imap.br_blockcount))) {
705                         *map = imap;
706                         *retmap = 1;
707                         XFS_STATS_INC(xs_xstrat_quick);
708                         return 0;
709                 }
710
711                 /*
712                  * So far we have not mapped the requested part of the
713                  * file, just surrounding data, try again.
714                  */
715                 count_fsb -= imap.br_blockcount;
716                 map_start_fsb = imap.br_startoff + imap.br_blockcount;
717         }
718
719 trans_cancel:
720         xfs_bmap_cancel(&free_list);
721         xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
722 error0:
723         xfs_iunlock(ip, XFS_ILOCK_EXCL);
724         return XFS_ERROR(error);
725 }
726
727 int
728 xfs_iomap_write_unwritten(
729         xfs_inode_t     *ip,
730         xfs_off_t       offset,
731         size_t          count)
732 {
733         xfs_mount_t     *mp = ip->i_mount;
734         xfs_fileoff_t   offset_fsb;
735         xfs_filblks_t   count_fsb;
736         xfs_filblks_t   numblks_fsb;
737         xfs_fsblock_t   firstfsb;
738         int             nimaps;
739         xfs_trans_t     *tp;
740         xfs_bmbt_irec_t imap;
741         xfs_bmap_free_t free_list;
742         uint            resblks;
743         int             committed;
744         int             error;
745
746         trace_xfs_unwritten_convert(ip, offset, count);
747
748         offset_fsb = XFS_B_TO_FSBT(mp, offset);
749         count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
750         count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb);
751
752         /*
753          * Reserve enough blocks in this transaction for two complete extent
754          * btree splits.  We may be converting the middle part of an unwritten
755          * extent and in this case we will insert two new extents in the btree
756          * each of which could cause a full split.
757          *
758          * This reservation amount will be used in the first call to
759          * xfs_bmbt_split() to select an AG with enough space to satisfy the
760          * rest of the operation.
761          */
762         resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
763
764         do {
765                 /*
766                  * set up a transaction to convert the range of extents
767                  * from unwritten to real. Do allocations in a loop until
768                  * we have covered the range passed in.
769                  *
770                  * Note that we open code the transaction allocation here
771                  * to pass KM_NOFS--we can't risk to recursing back into
772                  * the filesystem here as we might be asked to write out
773                  * the same inode that we complete here and might deadlock
774                  * on the iolock.
775                  */
776                 xfs_wait_for_freeze(mp, SB_FREEZE_TRANS);
777                 tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS);
778                 tp->t_flags |= XFS_TRANS_RESERVE;
779                 error = xfs_trans_reserve(tp, resblks,
780                                 XFS_WRITE_LOG_RES(mp), 0,
781                                 XFS_TRANS_PERM_LOG_RES,
782                                 XFS_WRITE_LOG_COUNT);
783                 if (error) {
784                         xfs_trans_cancel(tp, 0);
785                         return XFS_ERROR(error);
786                 }
787
788                 xfs_ilock(ip, XFS_ILOCK_EXCL);
789                 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
790                 xfs_trans_ihold(tp, ip);
791
792                 /*
793                  * Modify the unwritten extent state of the buffer.
794                  */
795                 xfs_bmap_init(&free_list, &firstfsb);
796                 nimaps = 1;
797                 error = xfs_bmapi(tp, ip, offset_fsb, count_fsb,
798                                   XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb,
799                                   1, &imap, &nimaps, &free_list, NULL);
800                 if (error)
801                         goto error_on_bmapi_transaction;
802
803                 error = xfs_bmap_finish(&(tp), &(free_list), &committed);
804                 if (error)
805                         goto error_on_bmapi_transaction;
806
807                 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
808                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
809                 if (error)
810                         return XFS_ERROR(error);
811
812                 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
813                         return xfs_cmn_err_fsblock_zero(ip, &imap);
814
815                 if ((numblks_fsb = imap.br_blockcount) == 0) {
816                         /*
817                          * The numblks_fsb value should always get
818                          * smaller, otherwise the loop is stuck.
819                          */
820                         ASSERT(imap.br_blockcount);
821                         break;
822                 }
823                 offset_fsb += numblks_fsb;
824                 count_fsb -= numblks_fsb;
825         } while (count_fsb > 0);
826
827         return 0;
828
829 error_on_bmapi_transaction:
830         xfs_bmap_cancel(&free_list);
831         xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT));
832         xfs_iunlock(ip, XFS_ILOCK_EXCL);
833         return XFS_ERROR(error);
834 }