xfs: kill some warnings on i386 builds
[safe/jmp/linux-2.6] / fs / xfs / xfs_iomap.c
1 /*
2  * Copyright (c) 2000-2006 Silicon Graphics, Inc.
3  * All Rights Reserved.
4  *
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License as
7  * published by the Free Software Foundation.
8  *
9  * This program is distributed in the hope that it would be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12  * GNU General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write the Free Software Foundation,
16  * Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17  */
18 #include "xfs.h"
19 #include "xfs_fs.h"
20 #include "xfs_bit.h"
21 #include "xfs_log.h"
22 #include "xfs_inum.h"
23 #include "xfs_trans.h"
24 #include "xfs_sb.h"
25 #include "xfs_ag.h"
26 #include "xfs_dir2.h"
27 #include "xfs_alloc.h"
28 #include "xfs_dmapi.h"
29 #include "xfs_quota.h"
30 #include "xfs_mount.h"
31 #include "xfs_bmap_btree.h"
32 #include "xfs_alloc_btree.h"
33 #include "xfs_ialloc_btree.h"
34 #include "xfs_dir2_sf.h"
35 #include "xfs_attr_sf.h"
36 #include "xfs_dinode.h"
37 #include "xfs_inode.h"
38 #include "xfs_ialloc.h"
39 #include "xfs_btree.h"
40 #include "xfs_bmap.h"
41 #include "xfs_rtalloc.h"
42 #include "xfs_error.h"
43 #include "xfs_itable.h"
44 #include "xfs_rw.h"
45 #include "xfs_attr.h"
46 #include "xfs_buf_item.h"
47 #include "xfs_trans_space.h"
48 #include "xfs_utils.h"
49 #include "xfs_iomap.h"
50 #include "xfs_trace.h"
51
52
53 #define XFS_WRITEIO_ALIGN(mp,off)       (((off) >> mp->m_writeio_log) \
54                                                 << mp->m_writeio_log)
55 #define XFS_STRAT_WRITE_IMAPS   2
56 #define XFS_WRITE_IMAPS         XFS_BMAP_MAX_NMAP
57
58 STATIC int
59 xfs_imap_to_bmap(
60         xfs_inode_t     *ip,
61         xfs_off_t       offset,
62         xfs_bmbt_irec_t *imap,
63         xfs_iomap_t     *iomapp,
64         int             imaps,                  /* Number of imap entries */
65         int             iomaps,                 /* Number of iomap entries */
66         int             flags)
67 {
68         xfs_mount_t     *mp = ip->i_mount;
69         int             pbm;
70         xfs_fsblock_t   start_block;
71
72
73         for (pbm = 0; imaps && pbm < iomaps; imaps--, iomapp++, imap++, pbm++) {
74                 iomapp->iomap_offset = XFS_FSB_TO_B(mp, imap->br_startoff);
75                 iomapp->iomap_delta = offset - iomapp->iomap_offset;
76                 iomapp->iomap_bsize = XFS_FSB_TO_B(mp, imap->br_blockcount);
77                 iomapp->iomap_flags = flags;
78
79                 if (XFS_IS_REALTIME_INODE(ip)) {
80                         iomapp->iomap_flags |= IOMAP_REALTIME;
81                         iomapp->iomap_target = mp->m_rtdev_targp;
82                 } else {
83                         iomapp->iomap_target = mp->m_ddev_targp;
84                 }
85                 start_block = imap->br_startblock;
86                 if (start_block == HOLESTARTBLOCK) {
87                         iomapp->iomap_bn = IOMAP_DADDR_NULL;
88                         iomapp->iomap_flags |= IOMAP_HOLE;
89                 } else if (start_block == DELAYSTARTBLOCK) {
90                         iomapp->iomap_bn = IOMAP_DADDR_NULL;
91                         iomapp->iomap_flags |= IOMAP_DELAY;
92                 } else {
93                         iomapp->iomap_bn = xfs_fsb_to_db(ip, start_block);
94                         if (ISUNWRITTEN(imap))
95                                 iomapp->iomap_flags |= IOMAP_UNWRITTEN;
96                 }
97
98                 offset += iomapp->iomap_bsize - iomapp->iomap_delta;
99         }
100         return pbm;     /* Return the number filled */
101 }
102
103 int
104 xfs_iomap(
105         xfs_inode_t     *ip,
106         xfs_off_t       offset,
107         ssize_t         count,
108         int             flags,
109         xfs_iomap_t     *iomapp,
110         int             *niomaps)
111 {
112         xfs_mount_t     *mp = ip->i_mount;
113         xfs_fileoff_t   offset_fsb, end_fsb;
114         int             error = 0;
115         int             lockmode = 0;
116         xfs_bmbt_irec_t imap;
117         int             nimaps = 1;
118         int             bmapi_flags = 0;
119         int             iomap_flags = 0;
120
121         ASSERT((ip->i_d.di_mode & S_IFMT) == S_IFREG);
122
123         if (XFS_FORCED_SHUTDOWN(mp))
124                 return XFS_ERROR(EIO);
125
126         trace_xfs_iomap_enter(ip, offset, count, flags, NULL);
127
128         switch (flags & (BMAPI_READ | BMAPI_WRITE | BMAPI_ALLOCATE)) {
129         case BMAPI_READ:
130                 lockmode = xfs_ilock_map_shared(ip);
131                 bmapi_flags = XFS_BMAPI_ENTIRE;
132                 break;
133         case BMAPI_WRITE:
134                 lockmode = XFS_ILOCK_EXCL;
135                 if (flags & BMAPI_IGNSTATE)
136                         bmapi_flags |= XFS_BMAPI_IGSTATE|XFS_BMAPI_ENTIRE;
137                 xfs_ilock(ip, lockmode);
138                 break;
139         case BMAPI_ALLOCATE:
140                 lockmode = XFS_ILOCK_SHARED;
141                 bmapi_flags = XFS_BMAPI_ENTIRE;
142
143                 /* Attempt non-blocking lock */
144                 if (flags & BMAPI_TRYLOCK) {
145                         if (!xfs_ilock_nowait(ip, lockmode))
146                                 return XFS_ERROR(EAGAIN);
147                 } else {
148                         xfs_ilock(ip, lockmode);
149                 }
150                 break;
151         default:
152                 BUG();
153         }
154
155         ASSERT(offset <= mp->m_maxioffset);
156         if ((xfs_fsize_t)offset + count > mp->m_maxioffset)
157                 count = mp->m_maxioffset - offset;
158         end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
159         offset_fsb = XFS_B_TO_FSBT(mp, offset);
160
161         error = xfs_bmapi(NULL, ip, offset_fsb,
162                         (xfs_filblks_t)(end_fsb - offset_fsb),
163                         bmapi_flags,  NULL, 0, &imap,
164                         &nimaps, NULL, NULL);
165
166         if (error)
167                 goto out;
168
169         switch (flags & (BMAPI_WRITE|BMAPI_ALLOCATE)) {
170         case BMAPI_WRITE:
171                 /* If we found an extent, return it */
172                 if (nimaps &&
173                     (imap.br_startblock != HOLESTARTBLOCK) &&
174                     (imap.br_startblock != DELAYSTARTBLOCK)) {
175                         trace_xfs_iomap_found(ip, offset, count, flags, &imap);
176                         break;
177                 }
178
179                 if (flags & (BMAPI_DIRECT|BMAPI_MMAP)) {
180                         error = xfs_iomap_write_direct(ip, offset, count, flags,
181                                                        &imap, &nimaps, nimaps);
182                 } else {
183                         error = xfs_iomap_write_delay(ip, offset, count, flags,
184                                                       &imap, &nimaps);
185                 }
186                 if (!error) {
187                         trace_xfs_iomap_alloc(ip, offset, count, flags, &imap);
188                 }
189                 iomap_flags = IOMAP_NEW;
190                 break;
191         case BMAPI_ALLOCATE:
192                 /* If we found an extent, return it */
193                 xfs_iunlock(ip, lockmode);
194                 lockmode = 0;
195
196                 if (nimaps && !isnullstartblock(imap.br_startblock)) {
197                         trace_xfs_iomap_found(ip, offset, count, flags, &imap);
198                         break;
199                 }
200
201                 error = xfs_iomap_write_allocate(ip, offset, count,
202                                                  &imap, &nimaps);
203                 break;
204         }
205
206         if (nimaps) {
207                 *niomaps = xfs_imap_to_bmap(ip, offset, &imap,
208                                             iomapp, nimaps, *niomaps, iomap_flags);
209         } else if (niomaps) {
210                 *niomaps = 0;
211         }
212
213 out:
214         if (lockmode)
215                 xfs_iunlock(ip, lockmode);
216         return XFS_ERROR(error);
217 }
218
219
220 STATIC int
221 xfs_iomap_eof_align_last_fsb(
222         xfs_mount_t     *mp,
223         xfs_inode_t     *ip,
224         xfs_extlen_t    extsize,
225         xfs_fileoff_t   *last_fsb)
226 {
227         xfs_fileoff_t   new_last_fsb = 0;
228         xfs_extlen_t    align;
229         int             eof, error;
230
231         if (XFS_IS_REALTIME_INODE(ip))
232                 ;
233         /*
234          * If mounted with the "-o swalloc" option, roundup the allocation
235          * request to a stripe width boundary if the file size is >=
236          * stripe width and we are allocating past the allocation eof.
237          */
238         else if (mp->m_swidth && (mp->m_flags & XFS_MOUNT_SWALLOC) &&
239                 (ip->i_size >= XFS_FSB_TO_B(mp, mp->m_swidth)))
240                 new_last_fsb = roundup_64(*last_fsb, mp->m_swidth);
241         /*
242          * Roundup the allocation request to a stripe unit (m_dalign) boundary
243          * if the file size is >= stripe unit size, and we are allocating past
244          * the allocation eof.
245          */
246         else if (mp->m_dalign && (ip->i_size >= XFS_FSB_TO_B(mp, mp->m_dalign)))
247                 new_last_fsb = roundup_64(*last_fsb, mp->m_dalign);
248
249         /*
250          * Always round up the allocation request to an extent boundary
251          * (when file on a real-time subvolume or has di_extsize hint).
252          */
253         if (extsize) {
254                 if (new_last_fsb)
255                         align = roundup_64(new_last_fsb, extsize);
256                 else
257                         align = extsize;
258                 new_last_fsb = roundup_64(*last_fsb, align);
259         }
260
261         if (new_last_fsb) {
262                 error = xfs_bmap_eof(ip, new_last_fsb, XFS_DATA_FORK, &eof);
263                 if (error)
264                         return error;
265                 if (eof)
266                         *last_fsb = new_last_fsb;
267         }
268         return 0;
269 }
270
271 STATIC int
272 xfs_cmn_err_fsblock_zero(
273         xfs_inode_t     *ip,
274         xfs_bmbt_irec_t *imap)
275 {
276         xfs_cmn_err(XFS_PTAG_FSBLOCK_ZERO, CE_ALERT, ip->i_mount,
277                         "Access to block zero in inode %llu "
278                         "start_block: %llx start_off: %llx "
279                         "blkcnt: %llx extent-state: %x\n",
280                 (unsigned long long)ip->i_ino,
281                 (unsigned long long)imap->br_startblock,
282                 (unsigned long long)imap->br_startoff,
283                 (unsigned long long)imap->br_blockcount,
284                 imap->br_state);
285         return EFSCORRUPTED;
286 }
287
288 int
289 xfs_iomap_write_direct(
290         xfs_inode_t     *ip,
291         xfs_off_t       offset,
292         size_t          count,
293         int             flags,
294         xfs_bmbt_irec_t *ret_imap,
295         int             *nmaps,
296         int             found)
297 {
298         xfs_mount_t     *mp = ip->i_mount;
299         xfs_fileoff_t   offset_fsb;
300         xfs_fileoff_t   last_fsb;
301         xfs_filblks_t   count_fsb, resaligned;
302         xfs_fsblock_t   firstfsb;
303         xfs_extlen_t    extsz, temp;
304         int             nimaps;
305         int             bmapi_flag;
306         int             quota_flag;
307         int             rt;
308         xfs_trans_t     *tp;
309         xfs_bmbt_irec_t imap;
310         xfs_bmap_free_t free_list;
311         uint            qblocks, resblks, resrtextents;
312         int             committed;
313         int             error;
314
315         /*
316          * Make sure that the dquots are there. This doesn't hold
317          * the ilock across a disk read.
318          */
319         error = xfs_qm_dqattach_locked(ip, 0);
320         if (error)
321                 return XFS_ERROR(error);
322
323         rt = XFS_IS_REALTIME_INODE(ip);
324         extsz = xfs_get_extsz_hint(ip);
325
326         offset_fsb = XFS_B_TO_FSBT(mp, offset);
327         last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
328         if ((offset + count) > ip->i_size) {
329                 error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
330                 if (error)
331                         goto error_out;
332         } else {
333                 if (found && (ret_imap->br_startblock == HOLESTARTBLOCK))
334                         last_fsb = MIN(last_fsb, (xfs_fileoff_t)
335                                         ret_imap->br_blockcount +
336                                         ret_imap->br_startoff);
337         }
338         count_fsb = last_fsb - offset_fsb;
339         ASSERT(count_fsb > 0);
340
341         resaligned = count_fsb;
342         if (unlikely(extsz)) {
343                 if ((temp = do_mod(offset_fsb, extsz)))
344                         resaligned += temp;
345                 if ((temp = do_mod(resaligned, extsz)))
346                         resaligned += extsz - temp;
347         }
348
349         if (unlikely(rt)) {
350                 resrtextents = qblocks = resaligned;
351                 resrtextents /= mp->m_sb.sb_rextsize;
352                 resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0);
353                 quota_flag = XFS_QMOPT_RES_RTBLKS;
354         } else {
355                 resrtextents = 0;
356                 resblks = qblocks = XFS_DIOSTRAT_SPACE_RES(mp, resaligned);
357                 quota_flag = XFS_QMOPT_RES_REGBLKS;
358         }
359
360         /*
361          * Allocate and setup the transaction
362          */
363         xfs_iunlock(ip, XFS_ILOCK_EXCL);
364         tp = xfs_trans_alloc(mp, XFS_TRANS_DIOSTRAT);
365         error = xfs_trans_reserve(tp, resblks,
366                         XFS_WRITE_LOG_RES(mp), resrtextents,
367                         XFS_TRANS_PERM_LOG_RES,
368                         XFS_WRITE_LOG_COUNT);
369         /*
370          * Check for running out of space, note: need lock to return
371          */
372         if (error)
373                 xfs_trans_cancel(tp, 0);
374         xfs_ilock(ip, XFS_ILOCK_EXCL);
375         if (error)
376                 goto error_out;
377
378         error = xfs_trans_reserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
379         if (error)
380                 goto error1;
381
382         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
383         xfs_trans_ihold(tp, ip);
384
385         bmapi_flag = XFS_BMAPI_WRITE;
386         if ((flags & BMAPI_DIRECT) && (offset < ip->i_size || extsz))
387                 bmapi_flag |= XFS_BMAPI_PREALLOC;
388
389         /*
390          * Issue the xfs_bmapi() call to allocate the blocks
391          */
392         xfs_bmap_init(&free_list, &firstfsb);
393         nimaps = 1;
394         error = xfs_bmapi(tp, ip, offset_fsb, count_fsb, bmapi_flag,
395                 &firstfsb, 0, &imap, &nimaps, &free_list, NULL);
396         if (error)
397                 goto error0;
398
399         /*
400          * Complete the transaction
401          */
402         error = xfs_bmap_finish(&tp, &free_list, &committed);
403         if (error)
404                 goto error0;
405         error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
406         if (error)
407                 goto error_out;
408
409         /*
410          * Copy any maps to caller's array and return any error.
411          */
412         if (nimaps == 0) {
413                 error = ENOSPC;
414                 goto error_out;
415         }
416
417         if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip))) {
418                 error = xfs_cmn_err_fsblock_zero(ip, &imap);
419                 goto error_out;
420         }
421
422         *ret_imap = imap;
423         *nmaps = 1;
424         return 0;
425
426 error0: /* Cancel bmap, unlock inode, unreserve quota blocks, cancel trans */
427         xfs_bmap_cancel(&free_list);
428         xfs_trans_unreserve_quota_nblks(tp, ip, qblocks, 0, quota_flag);
429
430 error1: /* Just cancel transaction */
431         xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
432         *nmaps = 0;     /* nothing set-up here */
433
434 error_out:
435         return XFS_ERROR(error);
436 }
437
438 /*
439  * If the caller is doing a write at the end of the file, then extend the
440  * allocation out to the file system's write iosize.  We clean up any extra
441  * space left over when the file is closed in xfs_inactive().
442  */
443 STATIC int
444 xfs_iomap_eof_want_preallocate(
445         xfs_mount_t     *mp,
446         xfs_inode_t     *ip,
447         xfs_off_t       offset,
448         size_t          count,
449         int             ioflag,
450         xfs_bmbt_irec_t *imap,
451         int             nimaps,
452         int             *prealloc)
453 {
454         xfs_fileoff_t   start_fsb;
455         xfs_filblks_t   count_fsb;
456         xfs_fsblock_t   firstblock;
457         int             n, error, imaps;
458
459         *prealloc = 0;
460         if ((offset + count) <= ip->i_size)
461                 return 0;
462
463         /*
464          * If there are any real blocks past eof, then don't
465          * do any speculative allocation.
466          */
467         start_fsb = XFS_B_TO_FSBT(mp, ((xfs_ufsize_t)(offset + count - 1)));
468         count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)XFS_MAXIOFFSET(mp));
469         while (count_fsb > 0) {
470                 imaps = nimaps;
471                 firstblock = NULLFSBLOCK;
472                 error = xfs_bmapi(NULL, ip, start_fsb, count_fsb, 0,
473                                   &firstblock, 0, imap, &imaps, NULL, NULL);
474                 if (error)
475                         return error;
476                 for (n = 0; n < imaps; n++) {
477                         if ((imap[n].br_startblock != HOLESTARTBLOCK) &&
478                             (imap[n].br_startblock != DELAYSTARTBLOCK))
479                                 return 0;
480                         start_fsb += imap[n].br_blockcount;
481                         count_fsb -= imap[n].br_blockcount;
482                 }
483         }
484         *prealloc = 1;
485         return 0;
486 }
487
488 int
489 xfs_iomap_write_delay(
490         xfs_inode_t     *ip,
491         xfs_off_t       offset,
492         size_t          count,
493         int             ioflag,
494         xfs_bmbt_irec_t *ret_imap,
495         int             *nmaps)
496 {
497         xfs_mount_t     *mp = ip->i_mount;
498         xfs_fileoff_t   offset_fsb;
499         xfs_fileoff_t   last_fsb;
500         xfs_off_t       aligned_offset;
501         xfs_fileoff_t   ioalign;
502         xfs_fsblock_t   firstblock;
503         xfs_extlen_t    extsz;
504         int             nimaps;
505         xfs_bmbt_irec_t imap[XFS_WRITE_IMAPS];
506         int             prealloc, flushed = 0;
507         int             error;
508
509         ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
510
511         /*
512          * Make sure that the dquots are there. This doesn't hold
513          * the ilock across a disk read.
514          */
515         error = xfs_qm_dqattach_locked(ip, 0);
516         if (error)
517                 return XFS_ERROR(error);
518
519         extsz = xfs_get_extsz_hint(ip);
520         offset_fsb = XFS_B_TO_FSBT(mp, offset);
521
522         error = xfs_iomap_eof_want_preallocate(mp, ip, offset, count,
523                                 ioflag, imap, XFS_WRITE_IMAPS, &prealloc);
524         if (error)
525                 return error;
526
527 retry:
528         if (prealloc) {
529                 aligned_offset = XFS_WRITEIO_ALIGN(mp, (offset + count - 1));
530                 ioalign = XFS_B_TO_FSBT(mp, aligned_offset);
531                 last_fsb = ioalign + mp->m_writeio_blocks;
532         } else {
533                 last_fsb = XFS_B_TO_FSB(mp, ((xfs_ufsize_t)(offset + count)));
534         }
535
536         if (prealloc || extsz) {
537                 error = xfs_iomap_eof_align_last_fsb(mp, ip, extsz, &last_fsb);
538                 if (error)
539                         return error;
540         }
541
542         nimaps = XFS_WRITE_IMAPS;
543         firstblock = NULLFSBLOCK;
544         error = xfs_bmapi(NULL, ip, offset_fsb,
545                           (xfs_filblks_t)(last_fsb - offset_fsb),
546                           XFS_BMAPI_DELAY | XFS_BMAPI_WRITE |
547                           XFS_BMAPI_ENTIRE, &firstblock, 1, imap,
548                           &nimaps, NULL, NULL);
549         if (error && (error != ENOSPC))
550                 return XFS_ERROR(error);
551
552         /*
553          * If bmapi returned us nothing, and if we didn't get back EDQUOT,
554          * then we must have run out of space - flush all other inodes with
555          * delalloc blocks and retry without EOF preallocation.
556          */
557         if (nimaps == 0) {
558                 trace_xfs_delalloc_enospc(ip, offset, count);
559                 if (flushed)
560                         return XFS_ERROR(ENOSPC);
561
562                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
563                 xfs_flush_inodes(ip);
564                 xfs_ilock(ip, XFS_ILOCK_EXCL);
565
566                 flushed = 1;
567                 error = 0;
568                 prealloc = 0;
569                 goto retry;
570         }
571
572         if (!(imap[0].br_startblock || XFS_IS_REALTIME_INODE(ip)))
573                 return xfs_cmn_err_fsblock_zero(ip, &imap[0]);
574
575         *ret_imap = imap[0];
576         *nmaps = 1;
577
578         return 0;
579 }
580
581 /*
582  * Pass in a delayed allocate extent, convert it to real extents;
583  * return to the caller the extent we create which maps on top of
584  * the originating callers request.
585  *
586  * Called without a lock on the inode.
587  *
588  * We no longer bother to look at the incoming map - all we have to
589  * guarantee is that whatever we allocate fills the required range.
590  */
591 int
592 xfs_iomap_write_allocate(
593         xfs_inode_t     *ip,
594         xfs_off_t       offset,
595         size_t          count,
596         xfs_bmbt_irec_t *map,
597         int             *retmap)
598 {
599         xfs_mount_t     *mp = ip->i_mount;
600         xfs_fileoff_t   offset_fsb, last_block;
601         xfs_fileoff_t   end_fsb, map_start_fsb;
602         xfs_fsblock_t   first_block;
603         xfs_bmap_free_t free_list;
604         xfs_filblks_t   count_fsb;
605         xfs_bmbt_irec_t imap;
606         xfs_trans_t     *tp;
607         int             nimaps, committed;
608         int             error = 0;
609         int             nres;
610
611         *retmap = 0;
612
613         /*
614          * Make sure that the dquots are there.
615          */
616         error = xfs_qm_dqattach(ip, 0);
617         if (error)
618                 return XFS_ERROR(error);
619
620         offset_fsb = XFS_B_TO_FSBT(mp, offset);
621         count_fsb = map->br_blockcount;
622         map_start_fsb = map->br_startoff;
623
624         XFS_STATS_ADD(xs_xstrat_bytes, XFS_FSB_TO_B(mp, count_fsb));
625
626         while (count_fsb != 0) {
627                 /*
628                  * Set up a transaction with which to allocate the
629                  * backing store for the file.  Do allocations in a
630                  * loop until we get some space in the range we are
631                  * interested in.  The other space that might be allocated
632                  * is in the delayed allocation extent on which we sit
633                  * but before our buffer starts.
634                  */
635
636                 nimaps = 0;
637                 while (nimaps == 0) {
638                         tp = xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE);
639                         tp->t_flags |= XFS_TRANS_RESERVE;
640                         nres = XFS_EXTENTADD_SPACE_RES(mp, XFS_DATA_FORK);
641                         error = xfs_trans_reserve(tp, nres,
642                                         XFS_WRITE_LOG_RES(mp),
643                                         0, XFS_TRANS_PERM_LOG_RES,
644                                         XFS_WRITE_LOG_COUNT);
645                         if (error) {
646                                 xfs_trans_cancel(tp, 0);
647                                 return XFS_ERROR(error);
648                         }
649                         xfs_ilock(ip, XFS_ILOCK_EXCL);
650                         xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
651                         xfs_trans_ihold(tp, ip);
652
653                         xfs_bmap_init(&free_list, &first_block);
654
655                         /*
656                          * it is possible that the extents have changed since
657                          * we did the read call as we dropped the ilock for a
658                          * while. We have to be careful about truncates or hole
659                          * punchs here - we are not allowed to allocate
660                          * non-delalloc blocks here.
661                          *
662                          * The only protection against truncation is the pages
663                          * for the range we are being asked to convert are
664                          * locked and hence a truncate will block on them
665                          * first.
666                          *
667                          * As a result, if we go beyond the range we really
668                          * need and hit an delalloc extent boundary followed by
669                          * a hole while we have excess blocks in the map, we
670                          * will fill the hole incorrectly and overrun the
671                          * transaction reservation.
672                          *
673                          * Using a single map prevents this as we are forced to
674                          * check each map we look for overlap with the desired
675                          * range and abort as soon as we find it. Also, given
676                          * that we only return a single map, having one beyond
677                          * what we can return is probably a bit silly.
678                          *
679                          * We also need to check that we don't go beyond EOF;
680                          * this is a truncate optimisation as a truncate sets
681                          * the new file size before block on the pages we
682                          * currently have locked under writeback. Because they
683                          * are about to be tossed, we don't need to write them
684                          * back....
685                          */
686                         nimaps = 1;
687                         end_fsb = XFS_B_TO_FSB(mp, ip->i_size);
688                         error = xfs_bmap_last_offset(NULL, ip, &last_block,
689                                                         XFS_DATA_FORK);
690                         if (error)
691                                 goto trans_cancel;
692
693                         last_block = XFS_FILEOFF_MAX(last_block, end_fsb);
694                         if ((map_start_fsb + count_fsb) > last_block) {
695                                 count_fsb = last_block - map_start_fsb;
696                                 if (count_fsb == 0) {
697                                         error = EAGAIN;
698                                         goto trans_cancel;
699                                 }
700                         }
701
702                         /* Go get the actual blocks */
703                         error = xfs_bmapi(tp, ip, map_start_fsb, count_fsb,
704                                         XFS_BMAPI_WRITE, &first_block, 1,
705                                         &imap, &nimaps, &free_list, NULL);
706                         if (error)
707                                 goto trans_cancel;
708
709                         error = xfs_bmap_finish(&tp, &free_list, &committed);
710                         if (error)
711                                 goto trans_cancel;
712
713                         error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
714                         if (error)
715                                 goto error0;
716
717                         xfs_iunlock(ip, XFS_ILOCK_EXCL);
718                 }
719
720                 /*
721                  * See if we were able to allocate an extent that
722                  * covers at least part of the callers request
723                  */
724                 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
725                         return xfs_cmn_err_fsblock_zero(ip, &imap);
726
727                 if ((offset_fsb >= imap.br_startoff) &&
728                     (offset_fsb < (imap.br_startoff +
729                                    imap.br_blockcount))) {
730                         *map = imap;
731                         *retmap = 1;
732                         XFS_STATS_INC(xs_xstrat_quick);
733                         return 0;
734                 }
735
736                 /*
737                  * So far we have not mapped the requested part of the
738                  * file, just surrounding data, try again.
739                  */
740                 count_fsb -= imap.br_blockcount;
741                 map_start_fsb = imap.br_startoff + imap.br_blockcount;
742         }
743
744 trans_cancel:
745         xfs_bmap_cancel(&free_list);
746         xfs_trans_cancel(tp, XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT);
747 error0:
748         xfs_iunlock(ip, XFS_ILOCK_EXCL);
749         return XFS_ERROR(error);
750 }
751
752 int
753 xfs_iomap_write_unwritten(
754         xfs_inode_t     *ip,
755         xfs_off_t       offset,
756         size_t          count)
757 {
758         xfs_mount_t     *mp = ip->i_mount;
759         xfs_fileoff_t   offset_fsb;
760         xfs_filblks_t   count_fsb;
761         xfs_filblks_t   numblks_fsb;
762         xfs_fsblock_t   firstfsb;
763         int             nimaps;
764         xfs_trans_t     *tp;
765         xfs_bmbt_irec_t imap;
766         xfs_bmap_free_t free_list;
767         uint            resblks;
768         int             committed;
769         int             error;
770
771         trace_xfs_unwritten_convert(ip, offset, count);
772
773         offset_fsb = XFS_B_TO_FSBT(mp, offset);
774         count_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
775         count_fsb = (xfs_filblks_t)(count_fsb - offset_fsb);
776
777         /*
778          * Reserve enough blocks in this transaction for two complete extent
779          * btree splits.  We may be converting the middle part of an unwritten
780          * extent and in this case we will insert two new extents in the btree
781          * each of which could cause a full split.
782          *
783          * This reservation amount will be used in the first call to
784          * xfs_bmbt_split() to select an AG with enough space to satisfy the
785          * rest of the operation.
786          */
787         resblks = XFS_DIOSTRAT_SPACE_RES(mp, 0) << 1;
788
789         do {
790                 /*
791                  * set up a transaction to convert the range of extents
792                  * from unwritten to real. Do allocations in a loop until
793                  * we have covered the range passed in.
794                  *
795                  * Note that we open code the transaction allocation here
796                  * to pass KM_NOFS--we can't risk to recursing back into
797                  * the filesystem here as we might be asked to write out
798                  * the same inode that we complete here and might deadlock
799                  * on the iolock.
800                  */
801                 xfs_wait_for_freeze(mp, SB_FREEZE_TRANS);
802                 tp = _xfs_trans_alloc(mp, XFS_TRANS_STRAT_WRITE, KM_NOFS);
803                 tp->t_flags |= XFS_TRANS_RESERVE;
804                 error = xfs_trans_reserve(tp, resblks,
805                                 XFS_WRITE_LOG_RES(mp), 0,
806                                 XFS_TRANS_PERM_LOG_RES,
807                                 XFS_WRITE_LOG_COUNT);
808                 if (error) {
809                         xfs_trans_cancel(tp, 0);
810                         return XFS_ERROR(error);
811                 }
812
813                 xfs_ilock(ip, XFS_ILOCK_EXCL);
814                 xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
815                 xfs_trans_ihold(tp, ip);
816
817                 /*
818                  * Modify the unwritten extent state of the buffer.
819                  */
820                 xfs_bmap_init(&free_list, &firstfsb);
821                 nimaps = 1;
822                 error = xfs_bmapi(tp, ip, offset_fsb, count_fsb,
823                                   XFS_BMAPI_WRITE|XFS_BMAPI_CONVERT, &firstfsb,
824                                   1, &imap, &nimaps, &free_list, NULL);
825                 if (error)
826                         goto error_on_bmapi_transaction;
827
828                 error = xfs_bmap_finish(&(tp), &(free_list), &committed);
829                 if (error)
830                         goto error_on_bmapi_transaction;
831
832                 error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
833                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
834                 if (error)
835                         return XFS_ERROR(error);
836
837                 if (!(imap.br_startblock || XFS_IS_REALTIME_INODE(ip)))
838                         return xfs_cmn_err_fsblock_zero(ip, &imap);
839
840                 if ((numblks_fsb = imap.br_blockcount) == 0) {
841                         /*
842                          * The numblks_fsb value should always get
843                          * smaller, otherwise the loop is stuck.
844                          */
845                         ASSERT(imap.br_blockcount);
846                         break;
847                 }
848                 offset_fsb += numblks_fsb;
849                 count_fsb -= numblks_fsb;
850         } while (count_fsb > 0);
851
852         return 0;
853
854 error_on_bmapi_transaction:
855         xfs_bmap_cancel(&free_list);
856         xfs_trans_cancel(tp, (XFS_TRANS_RELEASE_LOG_RES | XFS_TRANS_ABORT));
857         xfs_iunlock(ip, XFS_ILOCK_EXCL);
858         return XFS_ERROR(error);
859 }