xfs: remove iomap_delta
[safe/jmp/linux-2.6] / fs / xfs / xfs_iomap.c
1 /*
2  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_bit.h"
21 #include "xfs_log.h"
22 #include "xfs_inum.h"
23 #include "xfs_trans.h"
24 #include "xfs_sb.h"
25 #include "xfs_ag.h"
26 #include "xfs_dir2.h"
27 #include "xfs_alloc.h"
28 #include "xfs_dmapi.h"
29 #include "xfs_quota.h"
30 #include "xfs_mount.h"
31 #include "xfs_bmap_btree.h"
32 #include "xfs_alloc_btree.h"
33 #include "xfs_ialloc_btree.h"
34 #include "xfs_dir2_sf.h"
35 #include "xfs_attr_sf.h"
36 #include "xfs_dinode.h"
37 #include "xfs_inode.h"
38 #include "xfs_ialloc.h"
39 #include "xfs_btree.h"
40 #include "xfs_bmap.h"
41 #include "xfs_rtalloc.h"
42 #include "xfs_error.h"
43 #include "xfs_itable.h"
44 #include "xfs_rw.h"
45 #include "xfs_attr.h"
46 #include "xfs_buf_item.h"
47 #include "xfs_trans_space.h"
48 #include "xfs_utils.h"
49 #include "xfs_iomap.h"
50 #include "xfs_trace.h"
51
52
53 #define XFS_WRITEIO_ALIGN(mp,off)       (((off) >> mp->m_writeio_log) \
54                                                 << mp->m_writeio_log)
55 #define XFS_STRAT_WRITE_IMAPS   2
56 #define XFS_WRITE_IMAPS         XFS_BMAP_MAX_NMAP
57
58 STATIC void
59 xfs_imap_to_bmap(
60         xfs_inode_t     *ip,
61         xfs_off_t       offset,
62         xfs_bmbt_irec_t *imap,
63         xfs_iomap_t     *iomapp,
64         int             imaps,                  /* Number of imap entries */
65         int             flags)
66 {
67         xfs_mount_t     *mp = ip->i_mount;
68         xfs_fsblock_t   start_block;
69
70         iomapp->iomap_offset = XFS_FSB_TO_B(mp, imap->br_startoff);
71         iomapp->iomap_bsize = XFS_FSB_TO_B(mp, imap->br_blockcount);
72         iomapp->iomap_flags = flags;
73
74         start_block = imap->br_startblock;
75         if (start_block == HOLESTARTBLOCK) {
76                 iomapp->iomap_bn = IOMAP_DADDR_NULL;
77                 iomapp->iomap_flags |= IOMAP_HOLE;
78         } else if (start_block == DELAYSTARTBLOCK) {
79                 iomapp->iomap_bn = IOMAP_DADDR_NULL;
80                 iomapp->iomap_flags |= IOMAP_DELAY;
81         } else {
82                 iomapp->iomap_bn = xfs_fsb_to_db(ip, start_block);
83                 if (ISUNWRITTEN(imap))
84                         iomapp->iomap_flags |= IOMAP_UNWRITTEN;
85         }
86 }
87
88 int
89 xfs_iomap(
90         xfs_inode_t     *ip,
91         xfs_off_t       offset,
92         ssize_t         count,
93         int             flags,
94         xfs_iomap_t     *iomapp,
95         int             *niomaps)
96 {
97         xfs_mount_t     *mp = ip->i_mount;
98         xfs_fileoff_t   offset_fsb, end_fsb;
99         int             error = 0;
100         int             lockmode = 0;
101         xfs_bmbt_irec_t imap;
102         int             nimaps = 1;
103         int             bmapi_flags = 0;
104         int             iomap_flags = 0;
105
106         ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
107         ASSERT(niomaps && *niomaps == 1);
108
109         if (XFS_FORCED_SHUTDOWN(mp))
110                 return XFS_ERROR(EIO);
111
112         trace_xfs_iomap_enter(ip, offset, count, flags, NULL);
113
114         switch (flags & (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE)) {
115         case BMAPI_READ:
116                 lockmode = xfs_ilock_map_shared(ip);
117                 bmapi_flags = XFS_BMAPI_ENTIRE;
118                 break;
119         case BMAPI_WRITE:
120                 lockmode = XFS_ILOCK_EXCL;
121                 if (flags & BMAPI_IGNSTATE)
122                         bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE;
123                 xfs_ilock(ip, lockmode);
124                 break;
125         case BMAPI_ALLOCATE:
126                 lockmode = XFS_ILOCK_SHARED;
127                 bmapi_flags = XFS_BMAPI_ENTIRE;
128
129                 /* Attempt non-blocking lock */
130                 if (flags & BMAPI_TRYLOCK) {
131                         if (!xfs_ilock_nowait(ip, lockmode))
132                                 return XFS_ERROR(EAGAIN);
133                 } else {
134                         xfs_ilock(ip, lockmode);
135                 }
136                 break;
137         default:
138                 BUG();
139         }
140
141         ASSERT(offset <= mp->m_maxioffset);
142         if ((xfs_fsize_t)offset + count > mp->m_maxioffset)
143                 count = mp->m_maxioffset - offset;
144         end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
145         offset_fsb = XFS_B_TO_FSBT(mp, offset);
146
147         error = xfs_bmapi(NULL, ip, offset_fsb,
148                         (xfs_filblks_t)(end_fsb - offset_fsb),
149                         bmapi_flags,  NULL, 0, &imap,
150                         &nimaps, NULL, NULL);
151
152         if (error)
153                 goto out;
154
155         switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) {
156         case BMAPI_WRITE:
157                 /* If we found an extent, return it */
158                 if (nimaps &&
159                     (imap.br_startblock != HOLESTARTBLOCK) &&
160                     (imap.br_startblock != DELAYSTARTBLOCK)) {
161                         trace_xfs_iomap_found(ip, offset, count, flags, &imap);
162                         break;
163                 }
164
165                 if (flags & (BMAPI_DIRECT|BMAPI_MMAP)) {
166                         error = xfs_iomap_write_direct(ip, offset, count, flags,
167                                                        &imap, &nimaps, nimaps);
168                 } else {
169                         error = xfs_iomap_write_delay(ip, offset, count, flags,
170                                                       &imap, &nimaps);
171                 }
172                 if (!error) {
173                         trace_xfs_iomap_alloc(ip, offset, count, flags, &imap);
174                 }
175                 iomap_flags = IOMAP_NEW;
176                 break;
177         case BMAPI_ALLOCATE:
178                 /* If we found an extent, return it */
179                 xfs_iunlock(ip, lockmode);
180                 lockmode = 0;
181
182                 if (nimaps && !isnullstartblock(imap.br_startblock)) {
183                         trace_xfs_iomap_found(ip, offset, count, flags, &imap);
184                         break;
185                 }
186
187                 error = xfs_iomap_write_allocate(ip, offset, count,
188                                                  &imap, &nimaps);
189                 break;
190         }
191
192         ASSERT(nimaps <= 1);
193
194         if (nimaps)
195                 xfs_imap_to_bmap(ip, offset, &imap, iomapp, nimaps, iomap_flags);
196         *niomaps = nimaps;
197
198 out:
199         if (lockmode)
200                 xfs_iunlock(ip, lockmode);
201         return XFS_ERROR(error);
202 }
203
204
205 STATIC int
206 xfs_iomap_eof_align_last_fsb(
207         xfs_mount_t     *mp,
208         xfs_inode_t     *ip,
209         xfs_extlen_t    extsize,
210         xfs_fileoff_t   *last_fsb)
211 {
212         xfs_fileoff_t   new_last_fsb = 0;
213         xfs_extlen_t    align;
214         int             eof, error;
215
216         if (XFS_IS_REALTIME_INODE(ip))
217                 ;
218         /*
219          * If mounted with the "-o swalloc" option, roundup the allocation
220          * request to a stripe width boundary if the file size is >=
221          * stripe width and we are allocating past the allocation eof.
222          */
223         else if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) &&
224                 (ip->i_size >= XFS_FSB_TO_B(mp, mp->m_swidth)))
225                 new_last_fsb = roundup_64(*last_fsb, mp->m_swidth);
226         /*
227          * Roundup the allocation request to a stripe unit (m_dalign) boundary
228          * if the file size is >= stripe unit size, and we are allocating past
229          * the allocation eof.
230          */
231         else if (mp->m_dalign && (ip->i_size >= XFS_FSB_TO_B(mp, mp->m_dalign)))
232                 new_last_fsb = roundup_64(*last_fsb, mp->m_dalign);
233
234         /*
235          * Always round up the allocation request to an extent boundary
236          * (when file on a real-time subvolume or has di_extsize hint).
237          */
238         if (extsize) {
239                 if (new_last_fsb)
240                         align = roundup_64(new_last_fsb, extsize);
241                 else
242                         align = extsize;
243                 new_last_fsb = roundup_64(*last_fsb, align);
244         }
245
246         if (new_last_fsb) {
247                 error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof);
248                 if (error)
249                         return error;
250                 if (eof)
251                         *last_fsb = new_last_fsb;
252         }
253         return 0;
254 }
255
256 STATIC int
257 xfs_cmn_err_fsblock_zero(
258         xfs_inode_t     *ip,
259         xfs_bmbt_irec_t *imap)
260 {
261         xfs_cmn_err(XFS_PTAG_FSBLOCK_ZERO, CE_ALERT, ip->i_mount,
262                         "Access to block zero in inode %llu "
263                         "start_block: %llx start_off: %llx "
264                         "blkcnt: %llx extent-state: %x\n",
265                 (unsigned long long)ip->i_ino,
266                 (unsigned long long)imap->br_startblock,
267                 (unsigned long long)imap->br_startoff,
268                 (unsigned long long)imap->br_blockcount,
269                 imap->br_state);
270         return EFSCORRUPTED;
271 }
272
273 int
274 xfs_iomap_write_direct(
275         xfs_inode_t     *ip,
276         xfs_off_t       offset,
277         size_t          count,
278         int             flags,
279         xfs_bmbt_irec_t *ret_imap,
280         int             *nmaps,
281         int             found)
282 {
283         xfs_mount_t     *mp = ip->i_mount;
284         xfs_fileoff_t   offset_fsb;
285         xfs_fileoff_t   last_fsb;
286         xfs_filblks_t   count_fsb, resaligned;
287         xfs_fsblock_t   firstfsb;
288         xfs_extlen_t    extsz, temp;
289         int             nimaps;
290         int             bmapi_flag;
291         int             quota_flag;
292         int             rt;
293         xfs_trans_t     *tp;
294         xfs_bmbt_irec_t imap;
295         xfs_bmap_free_t free_list;
296         uint            qblocks, resblks, resrtextents;
297         int             committed;
298         int             error;
299
300         /*
301          * Make sure that the dquots are there. This doesn't hold
302          * the ilock across a disk read.
303          */
304         error = xfs_qm_dqattach_locked(ip, 0);
305         if (error)
306                 return XFS_ERROR(error);
307
308         rt = XFS_IS_REALTIME_INODE(ip);
309         extsz = xfs_get_extsz_hint(ip);
310
311         offset_fsb = XFS_B_TO_FSBT(mp, offset);
312         last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
313         if ((offset + count) > ip->i_size) {
314                 error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
315                 if (error)
316                         goto error_out;
317         } else {
318                 if (found && (ret_imap->br_startblock == HOLESTARTBLOCK))
319                         last_fsb = MIN(last_fsb, (xfs_fileoff_t)
320                                         ret_imap->br_blockcount +
321                                         ret_imap->br_startoff);
322         }
323         count_fsb = last_fsb - offset_fsb;
324         ASSERT(count_fsb > 0);
325
326         resaligned = count_fsb;
327         if (unlikely(extsz)) {
328                 if ((temp = do_mod(offset_fsb, extsz)))
329                         resaligned += temp;
330                 if ((temp = do_mod(resaligned, extsz)))
331                         resaligned += extsz - temp;
332         }
333
334         if (unlikely(rt)) {
335                 resrtextents = qblocks = resaligned;
336                 resrtextents /= mp->m_sb.sb_rextsize;
337                 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
338                 quota_flag = XFS_QMOPT_RES_RTBLKS;
339         } else {
340                 resrtextents = 0;
341                 resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
342                 quota_flag = XFS_QMOPT_RES_REGBLKS;
343         }
344
345         /*
346          * Allocate and setup the transaction
347          */
348         xfs_iunlock(ip, XFS_ILOCK_EXCL);
349         tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
350         error = xfs_trans_reserve(tp, resblks,
351                         XFS_WRITE_LOG_RES(mp), resrtextents,
352                         XFS_TRANS_PERM_LOG_RES,
353                         XFS_WRITE_LOG_COUNT);
354         /*
355          * Check for running out of space, note: need lock to return
356          */
357         if (error)
358                 xfs_trans_cancel(tp, 0);
359         xfs_ilock(ip, XFS_ILOCK_EXCL);
360         if (error)
361                 goto error_out;
362
363         error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
364         if (error)
365                 goto error1;
366
367         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
368         xfs_trans_ihold(tp, ip);
369
370         bmapi_flag = XFS_BMAPI_WRITE;
371         if ((flags & BMAPI_DIRECT) && (offset < ip->i_size || extsz))
372                 bmapi_flag |= XFS_BMAPI_PREALLOC;
373
374         /*
375          * Issue the xfs_bmapi() call to allocate the blocks
376          */
377         xfs_bmap_init(&free_list, &firstfsb);
378         nimaps = 1;
379         error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, bmapi_flag,
380                 &firstfsb, 0, &imap, &nimaps, &free_list, NULL);
381         if (error)
382                 goto error0;
383
384         /*
385          * Complete the transaction
386          */
387         error = xfs_bmap_finish(&tp, &free_list, &committed);
388         if (error)
389                 goto error0;
390         error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
391         if (error)
392                 goto error_out;
393
394         /*
395          * Copy any maps to caller's array and return any error.
396          */
397         if (nimaps == 0) {
398                 error = ENOSPC;
399                 goto error_out;
400         }
401
402         if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) {
403                 error = xfs_cmn_err_fsblock_zero(ip, &imap);
404                 goto error_out;
405         }
406
407         *ret_imap = imap;
408         *nmaps = 1;
409         return 0;
410
411 error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
412         xfs_bmap_cancel(&free_list);
413         xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
414
415 error1: /* Just cancel transaction */
416         xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
417         *nmaps = 0;     /* nothing set-up here */
418
419 error_out:
420         return XFS_ERROR(error);
421 }
422
423 /*
424  * If the caller is doing a write at the end of the file, then extend the
425  * allocation out to the file system's write iosize.  We clean up any extra
426  * space left over when the file is closed in xfs_inactive().
427  */
428 STATIC int
429 xfs_iomap_eof_want_preallocate(
430         xfs_mount_t     *mp,
431         xfs_inode_t     *ip,
432         xfs_off_t       offset,
433         size_t          count,
434         int             ioflag,
435         xfs_bmbt_irec_t *imap,
436         int             nimaps,
437         int             *prealloc)
438 {
439         xfs_fileoff_t   start_fsb;
440         xfs_filblks_t   count_fsb;
441         xfs_fsblock_t   firstblock;
442         int             n, error, imaps;
443
444         *prealloc = 0;
445         if ((offset + count) <= ip->i_size)
446                 return 0;
447
448         /*
449          * If there are any real blocks past eof, then don't
450          * do any speculative allocation.
451          */
452         start_fsb = XFS_B_TO_FSBT(mp, ((xfs_ufsize_t)(offset + count - 1)));
453         count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
454         while (count_fsb > 0) {
455                 imaps = nimaps;
456                 firstblock = NULLFSBLOCK;
457                 error = xfs_bmapi(NULL, ip, start_fsb, count_fsb, 0,
458                                   &firstblock, 0, imap, &imaps, NULL, NULL);
459                 if (error)
460                         return error;
461                 for (n = 0; n < imaps; n++) {
462                         if ((imap[n].br_startblock != HOLESTARTBLOCK) &&
463                             (imap[n].br_startblock != DELAYSTARTBLOCK))
464                                 return 0;
465                         start_fsb += imap[n].br_blockcount;
466                         count_fsb -= imap[n].br_blockcount;
467                 }
468         }
469         *prealloc = 1;
470         return 0;
471 }
472
473 int
474 xfs_iomap_write_delay(
475         xfs_inode_t     *ip,
476         xfs_off_t       offset,
477         size_t          count,
478         int             ioflag,
479         xfs_bmbt_irec_t *ret_imap,
480         int             *nmaps)
481 {
482         xfs_mount_t     *mp = ip->i_mount;
483         xfs_fileoff_t   offset_fsb;
484         xfs_fileoff_t   last_fsb;
485         xfs_off_t       aligned_offset;
486         xfs_fileoff_t   ioalign;
487         xfs_fsblock_t   firstblock;
488         xfs_extlen_t    extsz;
489         int             nimaps;
490         xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];
491         int             prealloc, flushed = 0;
492         int             error;
493
494         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
495
496         /*
497          * Make sure that the dquots are there. This doesn't hold
498          * the ilock across a disk read.
499          */
500         error = xfs_qm_dqattach_locked(ip, 0);
501         if (error)
502                 return XFS_ERROR(error);
503
504         extsz = xfs_get_extsz_hint(ip);
505         offset_fsb = XFS_B_TO_FSBT(mp, offset);
506
507         error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count,
508                                 ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
509         if (error)
510                 return error;
511
512 retry:
513         if (prealloc) {
514                 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
515                 ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
516                 last_fsb = ioalign + mp->m_writeio_blocks;
517         } else {
518                 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
519         }
520
521         if (prealloc || extsz) {
522                 error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
523                 if (error)
524                         return error;
525         }
526
527         nimaps = XFS_WRITE_IMAPS;
528         firstblock = NULLFSBLOCK;
529         error = xfs_bmapi(NULL, ip, offset_fsb,
530                           (xfs_filblks_t)(last_fsb - offset_fsb),
531                           XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |
532                           XFS_BMAPI_ENTIRE, &firstblock, 1, imap,
533                           &nimaps, NULL, NULL);
534         if (error && (error != ENOSPC))
535                 return XFS_ERROR(error);
536
537         /*
538          * If bmapi returned us nothing, and if we didn't get back EDQUOT,
539          * then we must have run out of space - flush all other inodes with
540          * delalloc blocks and retry without EOF preallocation.
541          */
542         if (nimaps == 0) {
543                 trace_xfs_delalloc_enospc(ip, offset, count);
544                 if (flushed)
545                         return XFS_ERROR(ENOSPC);
546
547                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
548                 xfs_flush_inodes(ip);
549                 xfs_ilock(ip, XFS_ILOCK_EXCL);
550
551                 flushed = 1;
552                 error = 0;
553                 prealloc = 0;
554                 goto retry;
555         }
556
557         if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip)))
558                 return xfs_cmn_err_fsblock_zero(ip, &imap[0]);
559
560         *ret_imap = imap[0];
561         *nmaps = 1;
562
563         return 0;
564 }
565
566 /*
567  * Pass in a delayed allocate extent, convert it to real extents;
568  * return to the caller the extent we create which maps on top of
569  * the originating callers request.
570  *
571  * Called without a lock on the inode.
572  *
573  * We no longer bother to look at the incoming map - all we have to
574  * guarantee is that whatever we allocate fills the required range.
575  */
576 int
577 xfs_iomap_write_allocate(
578         xfs_inode_t     *ip,
579         xfs_off_t       offset,
580         size_t          count,
581         xfs_bmbt_irec_t *map,
582         int             *retmap)
583 {
584         xfs_mount_t     *mp = ip->i_mount;
585         xfs_fileoff_t   offset_fsb, last_block;
586         xfs_fileoff_t   end_fsb, map_start_fsb;
587         xfs_fsblock_t   first_block;
588         xfs_bmap_free_t free_list;
589         xfs_filblks_t   count_fsb;
590         xfs_bmbt_irec_t imap;
591         xfs_trans_t     *tp;
592         int             nimaps, committed;
593         int             error = 0;
594         int             nres;
595
596         *retmap = 0;
597
598         /*
599          * Make sure that the dquots are there.
600          */
601         error = xfs_qm_dqattach(ip, 0);
602         if (error)
603                 return XFS_ERROR(error);
604
605         offset_fsb = XFS_B_TO_FSBT(mp, offset);
606         count_fsb = map->br_blockcount;
607         map_start_fsb = map->br_startoff;
608
609         XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb));
610
611         while (count_fsb != 0) {
612                 /*
613                  * Set up a transaction with which to allocate the
614                  * backing store for the file.  Do allocations in a
615                  * loop until we get some space in the range we are
616                  * interested in.  The other space that might be allocated
617                  * is in the delayed allocation extent on which we sit
618                  * but before our buffer starts.
619                  */
620
621                 nimaps = 0;
622                 while (nimaps == 0) {
623                         tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
624                         tp->t_flags |= XFS_TRANS_RESERVE;
625                         nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
626                         error = xfs_trans_reserve(tp, nres,
627                                         XFS_WRITE_LOG_RES(mp),
628                                         0, XFS_TRANS_PERM_LOG_RES,
629                                         XFS_WRITE_LOG_COUNT);
630                         if (error) {
631                                 xfs_trans_cancel(tp, 0);
632                                 return XFS_ERROR(error);
633                         }
634                         xfs_ilock(ip, XFS_ILOCK_EXCL);
635                         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
636                         xfs_trans_ihold(tp, ip);
637
638                         xfs_bmap_init(&free_list, &first_block);
639
640                         /*
641                          * it is possible that the extents have changed since
642                          * we did the read call as we dropped the ilock for a
643                          * while. We have to be careful about truncates or hole
644                          * punchs here - we are not allowed to allocate
645                          * non-delalloc blocks here.
646                          *
647                          * The only protection against truncation is the pages
648                          * for the range we are being asked to convert are
649                          * locked and hence a truncate will block on them
650                          * first.
651                          *
652                          * As a result, if we go beyond the range we really
653                          * need and hit an delalloc extent boundary followed by
654                          * a hole while we have excess blocks in the map, we
655                          * will fill the hole incorrectly and overrun the
656                          * transaction reservation.
657                          *
658                          * Using a single map prevents this as we are forced to
659                          * check each map we look for overlap with the desired
660                          * range and abort as soon as we find it. Also, given
661                          * that we only return a single map, having one beyond
662                          * what we can return is probably a bit silly.
663                          *
664                          * We also need to check that we don't go beyond EOF;
665                          * this is a truncate optimisation as a truncate sets
666                          * the new file size before block on the pages we
667                          * currently have locked under writeback. Because they
668                          * are about to be tossed, we don't need to write them
669                          * back....
670                          */
671                         nimaps = 1;
672                         end_fsb = XFS_B_TO_FSB(mp, ip->i_size);
673                         error = xfs_bmap_last_offset(NULL, ip, &last_block,
674                                                         XFS_DATA_FORK);
675                         if (error)
676                                 goto trans_cancel;
677
678                         last_block = XFS_FILEOFF_MAX(last_block, end_fsb);
679                         if ((map_start_fsb + count_fsb) > last_block) {
680                                 count_fsb = last_block - map_start_fsb;
681                                 if (count_fsb == 0) {
682                                         error = EAGAIN;
683                                         goto trans_cancel;
684                                 }
685                         }
686
687                         /* Go get the actual blocks */
688                         error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb,
689                                         XFS_BMAPI_WRITE, &first_block, 1,
690                                         &imap, &nimaps, &free_list, NULL);
691                         if (error)
692                                 goto trans_cancel;
693
694                         error = xfs_bmap_finish(&tp, &free_list, &committed);
695                         if (error)
696                                 goto trans_cancel;
697
698                         error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
699                         if (error)
700                                 goto error0;
701
702                         xfs_iunlock(ip, XFS_ILOCK_EXCL);
703                 }
704
705                 /*
706                  * See if we were able to allocate an extent that
707                  * covers at least part of the callers request
708                  */
709                 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
710                         return xfs_cmn_err_fsblock_zero(ip, &imap);
711
712                 if ((offset_fsb >= imap.br_startoff) &&
713                     (offset_fsb < (imap.br_startoff +
714                                    imap.br_blockcount))) {
715                         *map = imap;
716                         *retmap = 1;
717                         XFS_STATS_INC(xs_xstrat_quick);
718                         return 0;
719                 }
720
721                 /*
722                  * So far we have not mapped the requested part of the
723                  * file, just surrounding data, try again.
724                  */
725                 count_fsb -= imap.br_blockcount;
726                 map_start_fsb = imap.br_startoff + imap.br_blockcount;
727         }
728
729 trans_cancel:
730         xfs_bmap_cancel(&free_list);
731         xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
732 error0:
733         xfs_iunlock(ip, XFS_ILOCK_EXCL);
734         return XFS_ERROR(error);
735 }
736
737 int
738 xfs_iomap_write_unwritten(
739         xfs_inode_t     *ip,
740         xfs_off_t       offset,
741         size_t          count)
742 {
743         xfs_mount_t     *mp = ip->i_mount;
744         xfs_fileoff_t   offset_fsb;
745         xfs_filblks_t   count_fsb;
746         xfs_filblks_t   numblks_fsb;
747         xfs_fsblock_t   firstfsb;
748         int             nimaps;
749         xfs_trans_t     *tp;
750         xfs_bmbt_irec_t imap;
751         xfs_bmap_free_t free_list;
752         uint            resblks;
753         int             committed;
754         int             error;
755
756         trace_xfs_unwritten_convert(ip, offset, count);
757
758         offset_fsb = XFS_B_TO_FSBT(mp, offset);
759         count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
760         count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb);
761
762         /*
763          * Reserve enough blocks in this transaction for two complete extent
764          * btree splits.  We may be converting the middle part of an unwritten
765          * extent and in this case we will insert two new extents in the btree
766          * each of which could cause a full split.
767          *
768          * This reservation amount will be used in the first call to
769          * xfs_bmbt_split() to select an AG with enough space to satisfy the
770          * rest of the operation.
771          */
772         resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
773
774         do {
775                 /*
776                  * set up a transaction to convert the range of extents
777                  * from unwritten to real. Do allocations in a loop until
778                  * we have covered the range passed in.
779                  *
780                  * Note that we open code the transaction allocation here
781                  * to pass KM_NOFS--we can't risk to recursing back into
782                  * the filesystem here as we might be asked to write out
783                  * the same inode that we complete here and might deadlock
784                  * on the iolock.
785                  */
786                 xfs_wait_for_freeze(mp, SB_FREEZE_TRANS);
787                 tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS);
788                 tp->t_flags |= XFS_TRANS_RESERVE;
789                 error = xfs_trans_reserve(tp, resblks,
790                                 XFS_WRITE_LOG_RES(mp), 0,
791                                 XFS_TRANS_PERM_LOG_RES,
792                                 XFS_WRITE_LOG_COUNT);
793                 if (error) {
794                         xfs_trans_cancel(tp, 0);
795                         return XFS_ERROR(error);
796                 }
797
798                 xfs_ilock(ip, XFS_ILOCK_EXCL);
799                 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
800                 xfs_trans_ihold(tp, ip);
801
802                 /*
803                  * Modify the unwritten extent state of the buffer.
804                  */
805                 xfs_bmap_init(&free_list, &firstfsb);
806                 nimaps = 1;
807                 error = xfs_bmapi(tp, ip, offset_fsb, count_fsb,
808                                   XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb,
809                                   1, &imap, &nimaps, &free_list, NULL);
810                 if (error)
811                         goto error_on_bmapi_transaction;
812
813                 error = xfs_bmap_finish(&(tp), &(free_list), &committed);
814                 if (error)
815                         goto error_on_bmapi_transaction;
816
817                 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
818                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
819                 if (error)
820                         return XFS_ERROR(error);
821
822                 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
823                         return xfs_cmn_err_fsblock_zero(ip, &imap);
824
825                 if ((numblks_fsb = imap.br_blockcount) == 0) {
826                         /*
827                          * The numblks_fsb value should always get
828                          * smaller, otherwise the loop is stuck.
829                          */
830                         ASSERT(imap.br_blockcount);
831                         break;
832                 }
833                 offset_fsb += numblks_fsb;
834                 count_fsb -= numblks_fsb;
835         } while (count_fsb > 0);
836
837         return 0;
838
839 error_on_bmapi_transaction:
840         xfs_bmap_cancel(&free_list);
841         xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT));
842         xfs_iunlock(ip, XFS_ILOCK_EXCL);
843         return XFS_ERROR(error);
844 }