Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc.
4 : * Copyright (C) 2010 Red Hat, Inc.
5 : * All Rights Reserved.
6 : */
7 : #include "xfs.h"
8 : #include "xfs_fs.h"
9 : #include "xfs_shared.h"
10 : #include "xfs_format.h"
11 : #include "xfs_log_format.h"
12 : #include "xfs_trans_resv.h"
13 : #include "xfs_mount.h"
14 : #include "xfs_extent_busy.h"
15 : #include "xfs_quota.h"
16 : #include "xfs_trans.h"
17 : #include "xfs_trans_priv.h"
18 : #include "xfs_log.h"
19 : #include "xfs_log_priv.h"
20 : #include "xfs_trace.h"
21 : #include "xfs_error.h"
22 : #include "xfs_defer.h"
23 : #include "xfs_inode.h"
24 : #include "xfs_dquot_item.h"
25 : #include "xfs_dquot.h"
26 : #include "xfs_icache.h"
27 : #include "xfs_rtbitmap.h"
28 : #include "xfs_rtgroup.h"
29 :
30 : struct kmem_cache *xfs_trans_cache;
31 :
32 : #if defined(CONFIG_TRACEPOINTS)
33 : static void
34 24333 : xfs_trans_trace_reservations(
35 : struct xfs_mount *mp)
36 : {
37 24333 : struct xfs_trans_res *res;
38 24333 : struct xfs_trans_res *end_res;
39 24333 : int i;
40 :
41 24333 : res = (struct xfs_trans_res *)M_RES(mp);
42 24333 : end_res = (struct xfs_trans_res *)(M_RES(mp) + 1);
43 729990 : for (i = 0; res < end_res; i++, res++)
44 705657 : trace_xfs_trans_resv_calc(mp, i, res);
45 24333 : }
46 : #else
47 : # define xfs_trans_trace_reservations(mp)
48 : #endif
49 :
50 : /*
51 : * Initialize the precomputed transaction reservation values
52 : * in the mount structure.
53 : */
54 : void
55 24333 : xfs_trans_init(
56 : struct xfs_mount *mp)
57 : {
58 24333 : xfs_trans_resv_calc(mp, M_RES(mp));
59 24333 : xfs_trans_trace_reservations(mp);
60 24333 : }
61 :
62 : /*
63 : * Free the transaction structure. If there is more clean up
64 : * to do when the structure is freed, add it here.
65 : */
66 : STATIC void
67 3220692356 : xfs_trans_free(
68 : struct xfs_trans *tp)
69 : {
70 3220692356 : xfs_extent_busy_sort(&tp->t_busy);
71 3221030568 : xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false);
72 :
73 3221012796 : if (tp->t_dfops_finished > 0)
74 615556240 : trace_xfs_defer_stats(tp);
75 :
76 3221052067 : trace_xfs_trans_free(tp, _RET_IP_);
77 3221065262 : xfs_trans_clear_context(tp);
78 3221129159 : if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT))
79 1210377771 : sb_end_intwrite(tp->t_mountp->m_super);
80 3221174981 : xfs_trans_free_dqinfo(tp);
81 3221044487 : kmem_cache_free(xfs_trans_cache, tp);
82 3221252303 : }
83 :
84 : /*
85 : * This is called to create a new transaction which will share the
86 : * permanent log reservation of the given transaction. The remaining
87 : * unused block and rt extent reservations are also inherited. This
88 : * implies that the original transaction is no longer allowed to allocate
89 : * blocks. Locks and log items, however, are no inherited. They must
90 : * be added to the new transaction explicitly.
91 : */
92 : STATIC struct xfs_trans *
93 962260171 : xfs_trans_dup(
94 : struct xfs_trans *tp)
95 : {
96 962260171 : struct xfs_trans *ntp;
97 :
98 962260171 : trace_xfs_trans_dup(tp, _RET_IP_);
99 :
100 962285924 : ntp = kmem_cache_zalloc(xfs_trans_cache, GFP_KERNEL | __GFP_NOFAIL);
101 :
102 : /*
103 : * Initialize the new transaction structure.
104 : */
105 962408176 : ntp->t_magic = XFS_TRANS_HEADER_MAGIC;
106 962408176 : ntp->t_mountp = tp->t_mountp;
107 962408176 : INIT_LIST_HEAD(&ntp->t_items);
108 962408176 : INIT_LIST_HEAD(&ntp->t_busy);
109 962408176 : INIT_LIST_HEAD(&ntp->t_dfops);
110 962408176 : ntp->t_highest_agno = NULLAGNUMBER;
111 :
112 962408176 : ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
113 962408176 : ASSERT(tp->t_ticket != NULL);
114 :
115 962408176 : ntp->t_flags = XFS_TRANS_PERM_LOG_RES |
116 962408176 : (tp->t_flags & XFS_TRANS_RESERVE) |
117 962408176 : (tp->t_flags & XFS_TRANS_NO_WRITECOUNT) |
118 : (tp->t_flags & XFS_TRANS_RES_FDBLKS);
119 : /* We gave our writer reference to the new transaction */
120 962408176 : tp->t_flags |= XFS_TRANS_NO_WRITECOUNT;
121 962408176 : ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket);
122 :
123 962349886 : ASSERT(tp->t_blk_res >= tp->t_blk_res_used);
124 962349886 : ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used;
125 962349886 : tp->t_blk_res = tp->t_blk_res_used;
126 :
127 962349886 : ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used;
128 962349886 : tp->t_rtx_res = tp->t_rtx_res_used;
129 :
130 962349886 : xfs_trans_switch_context(tp, ntp);
131 :
132 : /* move deferred ops over to the new tp */
133 962354496 : xfs_defer_move(ntp, tp);
134 :
135 962319658 : xfs_trans_dup_dqinfo(tp, ntp);
136 962395545 : return ntp;
137 : }
138 :
139 : /*
140 : * Try to reserve more blocks for a transaction.
141 : *
142 : * This is for callers that need to attach resources to a transaction, scan
143 : * those resources to determine the space reservation requirements, and then
144 : * modify the attached resources. In other words, online repair. This can
145 : * fail due to ENOSPC, so the caller must be able to cancel the transaction
146 : * without shutting down the fs.
147 : */
148 : int
149 88608593 : xfs_trans_reserve_more(
150 : struct xfs_trans *tp,
151 : unsigned int blocks,
152 : unsigned int rtextents)
153 : {
154 88608593 : struct xfs_mount *mp = tp->t_mountp;
155 88608593 : bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
156 88608593 : int error = 0;
157 :
158 88608593 : ASSERT(!(tp->t_flags & XFS_TRANS_DIRTY));
159 :
160 : /*
161 : * Attempt to reserve the needed disk blocks by decrementing
162 : * the number needed from the number available. This will
163 : * fail if the count would go below zero.
164 : */
165 88608593 : if (blocks > 0) {
166 88596671 : error = xfs_mod_fdblocks(mp, -((int64_t)blocks), rsvd);
167 88596671 : if (error)
168 : return -ENOSPC;
169 88596541 : tp->t_blk_res += blocks;
170 : }
171 :
172 : /*
173 : * Attempt to reserve the needed realtime extents by decrementing
174 : * the number needed from the number available. This will
175 : * fail if the count would go below zero.
176 : */
177 88608463 : if (rtextents > 0) {
178 2732 : error = xfs_mod_frextents(mp, -((int64_t)rtextents));
179 2732 : if (error) {
180 0 : error = -ENOSPC;
181 0 : goto out_blocks;
182 : }
183 2732 : tp->t_rtx_res += rtextents;
184 : }
185 :
186 : return 0;
187 : out_blocks:
188 0 : if (blocks > 0) {
189 0 : xfs_mod_fdblocks(mp, (int64_t)blocks, rsvd);
190 0 : tp->t_blk_res -= blocks;
191 : }
192 : return error;
193 : }
194 :
195 : /*
196 : * This is called to reserve free disk blocks and log space for the
197 : * given transaction. This must be done before allocating any resources
198 : * within the transaction.
199 : *
200 : * This will return ENOSPC if there are not enough blocks available.
201 : * It will sleep waiting for available log space.
202 : * The only valid value for the flags parameter is XFS_RES_LOG_PERM, which
203 : * is used by long running transactions. If any one of the reservations
204 : * fails then they will all be backed out.
205 : *
206 : * This does not do quota reservations. That typically is done by the
207 : * caller afterwards.
208 : */
209 : static int
210 3221023571 : xfs_trans_reserve(
211 : struct xfs_trans *tp,
212 : struct xfs_trans_res *resp,
213 : uint blocks,
214 : uint rtextents)
215 : {
216 3221023571 : struct xfs_mount *mp = tp->t_mountp;
217 3221023571 : int error = 0;
218 3221023571 : bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
219 :
220 : /*
221 : * Attempt to reserve the needed disk blocks by decrementing
222 : * the number needed from the number available. This will
223 : * fail if the count would go below zero.
224 : */
225 3221023571 : if (blocks > 0) {
226 873260381 : error = xfs_mod_fdblocks(mp, -((int64_t)blocks), rsvd);
227 873283676 : if (error != 0)
228 : return -ENOSPC;
229 864705570 : tp->t_blk_res += blocks;
230 : }
231 :
232 : /*
233 : * Reserve the log space needed for this transaction.
234 : */
235 3212468760 : if (resp->tr_logres > 0) {
236 2164153747 : bool permanent = false;
237 :
238 2164153747 : ASSERT(tp->t_log_res == 0 ||
239 : tp->t_log_res == resp->tr_logres);
240 2164153747 : ASSERT(tp->t_log_count == 0 ||
241 : tp->t_log_count == resp->tr_logcount);
242 :
243 2164153747 : if (resp->tr_logflags & XFS_TRANS_PERM_LOG_RES) {
244 2049831101 : tp->t_flags |= XFS_TRANS_PERM_LOG_RES;
245 2049831101 : permanent = true;
246 : } else {
247 114322646 : ASSERT(tp->t_ticket == NULL);
248 114322646 : ASSERT(!(tp->t_flags & XFS_TRANS_PERM_LOG_RES));
249 : }
250 :
251 2164153747 : if (tp->t_ticket != NULL) {
252 962144783 : ASSERT(resp->tr_logflags & XFS_TRANS_PERM_LOG_RES);
253 962144783 : error = xfs_log_regrant(mp, tp->t_ticket);
254 : } else {
255 1202008964 : error = xfs_log_reserve(mp, resp->tr_logres,
256 : resp->tr_logcount,
257 : &tp->t_ticket, permanent);
258 : }
259 :
260 2164246083 : if (error)
261 2103 : goto undo_blocks;
262 :
263 2164243980 : tp->t_log_res = resp->tr_logres;
264 2164243980 : tp->t_log_count = resp->tr_logcount;
265 : }
266 :
267 : /*
268 : * Attempt to reserve the needed realtime extents by decrementing
269 : * the number needed from the number available. This will
270 : * fail if the count would go below zero.
271 : */
272 3212558993 : if (rtextents > 0) {
273 125564747 : error = xfs_mod_frextents(mp, -((int64_t)rtextents));
274 125562668 : if (error) {
275 4 : error = -ENOSPC;
276 4 : goto undo_log;
277 : }
278 125562664 : tp->t_rtx_res += rtextents;
279 : }
280 :
281 : return 0;
282 :
283 : /*
284 : * Error cases jump to one of these labels to undo any
285 : * reservations which have already been performed.
286 : */
287 : undo_log:
288 4 : if (resp->tr_logres > 0) {
289 4 : xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket);
290 4 : tp->t_ticket = NULL;
291 4 : tp->t_log_res = 0;
292 4 : tp->t_flags &= ~XFS_TRANS_PERM_LOG_RES;
293 : }
294 :
295 0 : undo_blocks:
296 2107 : if (blocks > 0) {
297 352 : xfs_mod_fdblocks(mp, (int64_t)blocks, rsvd);
298 352 : tp->t_blk_res = 0;
299 : }
300 : return error;
301 : }
302 :
303 : int
304 2254062131 : xfs_trans_alloc(
305 : struct xfs_mount *mp,
306 : struct xfs_trans_res *resp,
307 : uint blocks,
308 : uint rtextents,
309 : uint flags,
310 : struct xfs_trans **tpp)
311 : {
312 2254062131 : struct xfs_trans *tp;
313 2254062131 : bool want_retry = true;
314 2258364553 : int error;
315 :
316 : /*
317 : * Allocate the handle before we do our freeze accounting and setting up
318 : * GFP_NOFS allocation context so that we avoid lockdep false positives
319 : * by doing GFP_KERNEL allocations inside sb_start_intwrite().
320 : */
321 2258364553 : retry:
322 2258364553 : tp = kmem_cache_zalloc(xfs_trans_cache, GFP_KERNEL | __GFP_NOFAIL);
323 2258737771 : if (!(flags & XFS_TRANS_NO_WRITECOUNT))
324 1210375802 : sb_start_intwrite(mp->m_super);
325 2258554813 : xfs_trans_set_context(tp);
326 :
327 : /*
328 : * Zero-reservation ("empty") transactions can't modify anything, so
329 : * they're allowed to run while we're frozen.
330 : */
331 4517316708 : WARN_ON(resp->tr_logres > 0 &&
332 : mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE);
333 2258658354 : ASSERT(!(flags & XFS_TRANS_RES_FDBLKS) ||
334 : xfs_has_lazysbcount(mp));
335 :
336 2258658354 : tp->t_magic = XFS_TRANS_HEADER_MAGIC;
337 2258658354 : tp->t_flags = flags;
338 2258658354 : tp->t_mountp = mp;
339 2258658354 : INIT_LIST_HEAD(&tp->t_items);
340 2258658354 : INIT_LIST_HEAD(&tp->t_busy);
341 2258658354 : INIT_LIST_HEAD(&tp->t_dfops);
342 2258658354 : tp->t_highest_agno = NULLAGNUMBER;
343 :
344 2258658354 : error = xfs_trans_reserve(tp, resp, blocks, rtextents);
345 2258763233 : if (error == -ENOSPC && want_retry) {
346 4302593 : xfs_trans_cancel(tp);
347 :
348 : /*
349 : * We weren't able to reserve enough space for the transaction.
350 : * Flush the other speculative space allocations to free space.
351 : * Do not perform a synchronous scan because callers can hold
352 : * other locks.
353 : */
354 4302196 : error = xfs_blockgc_flush_all(mp);
355 4302422 : if (error)
356 0 : return error;
357 4302422 : want_retry = false;
358 4302422 : goto retry;
359 : }
360 2254460640 : if (error) {
361 4277782 : xfs_trans_cancel(tp);
362 4277782 : return error;
363 : }
364 :
365 2250182858 : trace_xfs_trans_alloc(tp, _RET_IP_);
366 :
367 2250252124 : *tpp = tp;
368 2250252124 : return 0;
369 : }
370 :
371 : /*
372 : * Create an empty transaction with no reservation. This is a defensive
373 : * mechanism for routines that query metadata without actually modifying them --
374 : * if the metadata being queried is somehow cross-linked (think a btree block
375 : * pointer that points higher in the tree), we risk deadlock. However, blocks
376 : * grabbed as part of a transaction can be re-grabbed. The verifiers will
377 : * notice the corrupt block and the operation will fail back to userspace
378 : * without deadlocking.
379 : *
380 : * Note the zero-length reservation; this transaction MUST be cancelled without
381 : * any dirty data.
382 : *
383 : * Callers should obtain freeze protection to avoid a conflict with fs freezing
384 : * where we can be grabbing buffers at the same time that freeze is trying to
385 : * drain the buffer LRU list.
386 : */
387 : int
388 1048217215 : xfs_trans_alloc_empty(
389 : struct xfs_mount *mp,
390 : struct xfs_trans **tpp)
391 : {
392 1048217215 : struct xfs_trans_res resv = {0};
393 :
394 1048217215 : return xfs_trans_alloc(mp, &resv, 0, 0, XFS_TRANS_NO_WRITECOUNT, tpp);
395 : }
396 :
397 : /*
398 : * Record the indicated change to the given field for application
399 : * to the file system's superblock when the transaction commits.
400 : * For now, just store the change in the transaction structure.
401 : *
402 : * Mark the transaction structure to indicate that the superblock
403 : * needs to be updated before committing.
404 : *
405 : * Because we may not be keeping track of allocated/free inodes and
406 : * used filesystem blocks in the superblock, we do not mark the
407 : * superblock dirty in this transaction if we modify these fields.
408 : * We still need to update the transaction deltas so that they get
409 : * applied to the incore superblock, but we don't want them to
410 : * cause the superblock to get locked and logged if these are the
411 : * only fields in the superblock that the transaction modifies.
412 : */
413 : void
414 259780572 : xfs_trans_mod_sb(
415 : xfs_trans_t *tp,
416 : uint field,
417 : int64_t delta)
418 : {
419 259780572 : uint32_t flags = (XFS_TRANS_DIRTY|XFS_TRANS_SB_DIRTY);
420 259780572 : xfs_mount_t *mp = tp->t_mountp;
421 :
422 259780572 : switch (field) {
423 709290 : case XFS_TRANS_SB_ICOUNT:
424 709290 : tp->t_icount_delta += delta;
425 709290 : if (xfs_has_lazysbcount(mp))
426 709287 : flags &= ~XFS_TRANS_SB_DIRTY;
427 : break;
428 103016902 : case XFS_TRANS_SB_IFREE:
429 103016902 : tp->t_ifree_delta += delta;
430 103016902 : if (xfs_has_lazysbcount(mp))
431 103021823 : flags &= ~XFS_TRANS_SB_DIRTY;
432 : break;
433 98931495 : case XFS_TRANS_SB_FDBLOCKS:
434 : /*
435 : * Track the number of blocks allocated in the transaction.
436 : * Make sure it does not exceed the number reserved. If so,
437 : * shutdown as this can lead to accounting inconsistency.
438 : */
439 98931495 : if (delta < 0) {
440 46354510 : tp->t_blk_res_used += (uint)-delta;
441 46354510 : if (tp->t_blk_res_used > tp->t_blk_res)
442 0 : xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
443 52576985 : } else if (delta > 0 && (tp->t_flags & XFS_TRANS_RES_FDBLKS)) {
444 84610 : int64_t blkres_delta;
445 :
446 : /*
447 : * Return freed blocks directly to the reservation
448 : * instead of the global pool, being careful not to
449 : * overflow the trans counter. This is used to preserve
450 : * reservation across chains of transaction rolls that
451 : * repeatedly free and allocate blocks.
452 : */
453 84610 : blkres_delta = min_t(int64_t, delta,
454 : UINT_MAX - tp->t_blk_res);
455 84610 : tp->t_blk_res += blkres_delta;
456 84610 : delta -= blkres_delta;
457 : }
458 98931495 : tp->t_fdblocks_delta += delta;
459 98931495 : if (xfs_has_lazysbcount(mp))
460 98927369 : flags &= ~XFS_TRANS_SB_DIRTY;
461 : break;
462 17530483 : case XFS_TRANS_SB_RES_FDBLOCKS:
463 : /*
464 : * The allocation has already been applied to the
465 : * in-core superblock's counter. This should only
466 : * be applied to the on-disk superblock.
467 : */
468 17530483 : tp->t_res_fdblocks_delta += delta;
469 17530483 : if (xfs_has_lazysbcount(mp))
470 17530485 : flags &= ~XFS_TRANS_SB_DIRTY;
471 : break;
472 39592169 : case XFS_TRANS_SB_FREXTENTS:
473 : /*
474 : * Track the number of blocks allocated in the
475 : * transaction. Make sure it does not exceed the
476 : * number reserved.
477 : */
478 39592169 : if (delta < 0) {
479 23769774 : tp->t_rtx_res_used += (uint)-delta;
480 23769774 : ASSERT(tp->t_rtx_res_used <= tp->t_rtx_res);
481 : }
482 39592169 : tp->t_frextents_delta += delta;
483 39592169 : if (xfs_has_rtgroups(mp))
484 39592169 : flags &= ~XFS_TRANS_SB_DIRTY;
485 : break;
486 0 : case XFS_TRANS_SB_RES_FREXTENTS:
487 : /*
488 : * The allocation has already been applied to the
489 : * in-core superblock's counter. This should only
490 : * be applied to the on-disk superblock.
491 : */
492 0 : ASSERT(delta < 0);
493 0 : tp->t_res_frextents_delta += delta;
494 0 : break;
495 127 : case XFS_TRANS_SB_DBLOCKS:
496 127 : tp->t_dblocks_delta += delta;
497 127 : break;
498 86 : case XFS_TRANS_SB_AGCOUNT:
499 86 : ASSERT(delta > 0);
500 86 : tp->t_agcount_delta += delta;
501 86 : break;
502 0 : case XFS_TRANS_SB_IMAXPCT:
503 0 : tp->t_imaxpct_delta += delta;
504 0 : break;
505 0 : case XFS_TRANS_SB_REXTSIZE:
506 0 : tp->t_rextsize_delta += delta;
507 0 : break;
508 3 : case XFS_TRANS_SB_RBMBLOCKS:
509 3 : tp->t_rbmblocks_delta += delta;
510 3 : break;
511 5 : case XFS_TRANS_SB_RBLOCKS:
512 5 : tp->t_rblocks_delta += delta;
513 5 : break;
514 5 : case XFS_TRANS_SB_REXTENTS:
515 5 : tp->t_rextents_delta += delta;
516 5 : break;
517 3 : case XFS_TRANS_SB_REXTSLOG:
518 3 : tp->t_rextslog_delta += delta;
519 3 : break;
520 4 : case XFS_TRANS_SB_RGCOUNT:
521 4 : ASSERT(delta > 0);
522 4 : tp->t_rgcount_delta += delta;
523 4 : break;
524 0 : default:
525 0 : ASSERT(0);
526 0 : return;
527 : }
528 :
529 259780572 : tp->t_flags |= flags;
530 : }
531 :
532 : /*
533 : * xfs_trans_apply_sb_deltas() is called from the commit code
534 : * to bring the superblock buffer into the current transaction
535 : * and modify it as requested by earlier calls to xfs_trans_mod_sb().
536 : *
537 : * For now we just look at each field allowed to change and change
538 : * it if necessary.
539 : */
540 : STATIC void
541 342 : xfs_trans_apply_sb_deltas(
542 : xfs_trans_t *tp)
543 : {
544 342 : struct xfs_dsb *sbp;
545 342 : struct xfs_buf *bp;
546 342 : bool update_rtsb = false;
547 342 : int whole = 0;
548 :
549 342 : bp = xfs_trans_getsb(tp);
550 342 : sbp = bp->b_addr;
551 :
552 : /*
553 : * Only update the superblock counters if we are logging them
554 : */
555 342 : if (!xfs_has_lazysbcount((tp->t_mountp))) {
556 210 : if (tp->t_icount_delta)
557 2 : be64_add_cpu(&sbp->sb_icount, tp->t_icount_delta);
558 210 : if (tp->t_ifree_delta)
559 206 : be64_add_cpu(&sbp->sb_ifree, tp->t_ifree_delta);
560 210 : if (tp->t_fdblocks_delta)
561 8 : be64_add_cpu(&sbp->sb_fdblocks, tp->t_fdblocks_delta);
562 210 : if (tp->t_res_fdblocks_delta)
563 0 : be64_add_cpu(&sbp->sb_fdblocks, tp->t_res_fdblocks_delta);
564 : }
565 :
566 : /*
567 : * Updating frextents requires careful handling because it does not
568 : * behave like the lazysb counters because we cannot rely on log
569 : * recovery in older kenels to recompute the value from the rtbitmap.
570 : * This means that the ondisk frextents must be consistent with the
571 : * rtbitmap.
572 : *
573 : * Therefore, log the frextents change to the ondisk superblock and
574 : * update the incore superblock so that future calls to xfs_log_sb
575 : * write the correct value ondisk.
576 : *
577 : * Don't touch m_frextents because it includes incore reservations,
578 : * and those are handled by the unreserve function.
579 : *
580 : * sb_frextents was added to the lazy sb counters when the rt groups
581 : * feature was introduced. This is possible because we know that all
582 : * kernels supporting rtgroups will also recompute frextents from the
583 : * realtime bitmap.
584 : */
585 342 : if ((tp->t_frextents_delta || tp->t_res_frextents_delta) &&
586 5 : !xfs_has_rtgroups(tp->t_mountp)) {
587 0 : struct xfs_mount *mp = tp->t_mountp;
588 0 : int64_t rtxdelta;
589 :
590 0 : rtxdelta = tp->t_frextents_delta + tp->t_res_frextents_delta;
591 :
592 0 : spin_lock(&mp->m_sb_lock);
593 0 : be64_add_cpu(&sbp->sb_frextents, rtxdelta);
594 0 : mp->m_sb.sb_frextents += rtxdelta;
595 0 : spin_unlock(&mp->m_sb_lock);
596 : }
597 :
598 342 : if (tp->t_dblocks_delta) {
599 127 : be64_add_cpu(&sbp->sb_dblocks, tp->t_dblocks_delta);
600 127 : whole = 1;
601 : }
602 342 : if (tp->t_agcount_delta) {
603 86 : be32_add_cpu(&sbp->sb_agcount, tp->t_agcount_delta);
604 86 : whole = 1;
605 : }
606 342 : if (tp->t_imaxpct_delta) {
607 0 : sbp->sb_imax_pct += tp->t_imaxpct_delta;
608 0 : whole = 1;
609 : }
610 342 : if (tp->t_rextsize_delta) {
611 0 : be32_add_cpu(&sbp->sb_rextsize, tp->t_rextsize_delta);
612 0 : whole = 1;
613 0 : update_rtsb = true;
614 : }
615 342 : if (tp->t_rbmblocks_delta) {
616 3 : be32_add_cpu(&sbp->sb_rbmblocks, tp->t_rbmblocks_delta);
617 3 : whole = 1;
618 3 : update_rtsb = true;
619 : }
620 342 : if (tp->t_rblocks_delta) {
621 5 : be64_add_cpu(&sbp->sb_rblocks, tp->t_rblocks_delta);
622 5 : whole = 1;
623 5 : update_rtsb = true;
624 : }
625 342 : if (tp->t_rextents_delta) {
626 5 : be64_add_cpu(&sbp->sb_rextents, tp->t_rextents_delta);
627 5 : whole = 1;
628 5 : update_rtsb = true;
629 : }
630 342 : if (tp->t_rextslog_delta) {
631 3 : sbp->sb_rextslog += tp->t_rextslog_delta;
632 3 : whole = 1;
633 3 : update_rtsb = true;
634 : }
635 342 : if (tp->t_rgcount_delta) {
636 4 : be32_add_cpu(&sbp->sb_rgcount, tp->t_rgcount_delta);
637 4 : whole = 1;
638 4 : update_rtsb = true;
639 : }
640 :
641 342 : xfs_trans_buf_set_type(tp, bp, XFS_BLFT_SB_BUF);
642 342 : if (whole)
643 : /*
644 : * Log the whole thing, the fields are noncontiguous.
645 : */
646 132 : xfs_trans_log_buf(tp, bp, 0, sizeof(struct xfs_dsb) - 1);
647 : else
648 : /*
649 : * Since all the modifiable fields are contiguous, we
650 : * can get away with this.
651 : */
652 210 : xfs_trans_log_buf(tp, bp, offsetof(struct xfs_dsb, sb_icount),
653 : offsetof(struct xfs_dsb, sb_frextents) +
654 : sizeof(sbp->sb_frextents) - 1);
655 :
656 342 : if (update_rtsb)
657 5 : xfs_rtgroup_log_super(tp, bp);
658 342 : }
659 :
660 : /*
661 : * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations and
662 : * apply superblock counter changes to the in-core superblock. The
663 : * t_res_fdblocks_delta and t_res_frextents_delta fields are explicitly NOT
664 : * applied to the in-core superblock. The idea is that that has already been
665 : * done.
666 : *
667 : * If we are not logging superblock counters, then the inode allocated/free and
668 : * used block counts are not updated in the on disk superblock. In this case,
669 : * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we
670 : * still need to update the incore superblock with the changes.
671 : *
672 : * Deltas for the inode count are +/-64, hence we use a large batch size of 128
673 : * so we don't need to take the counter lock on every update.
674 : */
675 : #define XFS_ICOUNT_BATCH 128
676 :
677 : void
678 3220895710 : xfs_trans_unreserve_and_mod_sb(
679 : struct xfs_trans *tp)
680 : {
681 3220895710 : struct xfs_mount *mp = tp->t_mountp;
682 3220895710 : bool rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
683 3220895710 : int64_t blkdelta = 0;
684 3220895710 : int64_t rtxdelta = 0;
685 3220895710 : int64_t idelta = 0;
686 3220895710 : int64_t ifreedelta = 0;
687 3220895710 : int error;
688 :
689 : /* calculate deltas */
690 3220895710 : if (tp->t_blk_res > 0)
691 : blkdelta = tp->t_blk_res;
692 3220895710 : if ((tp->t_fdblocks_delta != 0) &&
693 8 : (xfs_has_lazysbcount(mp) ||
694 8 : (tp->t_flags & XFS_TRANS_SB_DIRTY)))
695 96641191 : blkdelta += tp->t_fdblocks_delta;
696 :
697 3220895710 : if (tp->t_rtx_res > 0)
698 : rtxdelta = tp->t_rtx_res;
699 3220895710 : if ((tp->t_frextents_delta != 0) &&
700 0 : (xfs_has_rtgroups(mp) ||
701 0 : (tp->t_flags & XFS_TRANS_SB_DIRTY)))
702 39574651 : rtxdelta += tp->t_frextents_delta;
703 :
704 3220895710 : if (xfs_has_lazysbcount(mp) ||
705 450 : (tp->t_flags & XFS_TRANS_SB_DIRTY)) {
706 3220895470 : idelta = tp->t_icount_delta;
707 3220895470 : ifreedelta = tp->t_ifree_delta;
708 : }
709 :
710 : /* apply the per-cpu counters */
711 3220895710 : if (blkdelta) {
712 991542131 : error = xfs_mod_fdblocks(mp, blkdelta, rsvd);
713 991574749 : ASSERT(!error);
714 : }
715 :
716 3220928328 : if (idelta)
717 709293 : percpu_counter_add_batch(&mp->m_icount, idelta,
718 : XFS_ICOUNT_BATCH);
719 :
720 3220928329 : if (ifreedelta)
721 103028208 : percpu_counter_add(&mp->m_ifree, ifreedelta);
722 :
723 3220928318 : if (rtxdelta) {
724 123136968 : error = xfs_mod_frextents(mp, rtxdelta);
725 123134415 : ASSERT(!error);
726 : }
727 :
728 3220925765 : if (!(tp->t_flags & XFS_TRANS_SB_DIRTY))
729 : return;
730 :
731 : /* apply remaining deltas */
732 342 : spin_lock(&mp->m_sb_lock);
733 342 : mp->m_sb.sb_fdblocks += tp->t_fdblocks_delta + tp->t_res_fdblocks_delta;
734 342 : mp->m_sb.sb_icount += idelta;
735 342 : mp->m_sb.sb_ifree += ifreedelta;
736 : /*
737 : * Do not touch sb_frextents here because we are dealing with incore
738 : * reservation. sb_frextents is not part of the lazy sb counters so it
739 : * must be consistent with the ondisk rtbitmap and must never include
740 : * incore reservations. sb_frextents was added to the lazy sb counters
741 : * when the realtime groups feature was introduced.
742 : */
743 342 : if (xfs_has_rtgroups(mp))
744 78 : mp->m_sb.sb_frextents += rtxdelta;
745 342 : mp->m_sb.sb_dblocks += tp->t_dblocks_delta;
746 342 : mp->m_sb.sb_agcount += tp->t_agcount_delta;
747 342 : mp->m_sb.sb_imax_pct += tp->t_imaxpct_delta;
748 342 : mp->m_sb.sb_rextsize += tp->t_rextsize_delta;
749 342 : if (tp->t_rextsize_delta) {
750 0 : mp->m_rtxblklog = log2_if_power2(mp->m_sb.sb_rextsize);
751 0 : mp->m_rtxblkmask = mask64_if_power2(mp->m_sb.sb_rextsize);
752 : }
753 342 : mp->m_sb.sb_rbmblocks += tp->t_rbmblocks_delta;
754 342 : mp->m_sb.sb_rblocks += tp->t_rblocks_delta;
755 342 : mp->m_sb.sb_rextents += tp->t_rextents_delta;
756 342 : mp->m_sb.sb_rextslog += tp->t_rextslog_delta;
757 342 : mp->m_sb.sb_rgcount += tp->t_rgcount_delta;
758 342 : spin_unlock(&mp->m_sb_lock);
759 :
760 : /*
761 : * Debug checks outside of the spinlock so they don't lock up the
762 : * machine if they fail.
763 : */
764 : ASSERT(mp->m_sb.sb_imax_pct >= 0);
765 : ASSERT(mp->m_sb.sb_rextslog >= 0);
766 : return;
767 : }
768 :
769 : /* Add the given log item to the transaction's list of log items. */
770 : void
771 30747817726 : xfs_trans_add_item(
772 : struct xfs_trans *tp,
773 : struct xfs_log_item *lip)
774 : {
775 30747817726 : ASSERT(lip->li_log == tp->t_mountp->m_log);
776 30747817726 : ASSERT(lip->li_ailp == tp->t_mountp->m_ail);
777 30747817726 : ASSERT(list_empty(&lip->li_trans));
778 30747817726 : ASSERT(!test_bit(XFS_LI_DIRTY, &lip->li_flags));
779 :
780 30747817726 : list_add_tail(&lip->li_trans, &tp->t_items);
781 30758405019 : trace_xfs_trans_add_item(tp, _RET_IP_);
782 30764479052 : }
783 :
784 : /*
785 : * Unlink the log item from the transaction. the log item is no longer
786 : * considered dirty in this transaction, as the linked transaction has
787 : * finished, either by abort or commit completion.
788 : */
789 : void
790 30784371263 : xfs_trans_del_item(
791 : struct xfs_log_item *lip)
792 : {
793 30784371263 : clear_bit(XFS_LI_DIRTY, &lip->li_flags);
794 30785890112 : list_del_init(&lip->li_trans);
795 30788808263 : }
796 :
797 : /* Detach and unlock all of the items in a transaction */
798 : static void
799 1446317690 : xfs_trans_free_items(
800 : struct xfs_trans *tp,
801 : bool abort)
802 : {
803 1446317690 : struct xfs_log_item *lip, *next;
804 :
805 1446317690 : trace_xfs_trans_free_items(tp, _RET_IP_);
806 :
807 1918474068 : list_for_each_entry_safe(lip, next, &tp->t_items, li_trans) {
808 472122812 : xfs_trans_del_item(lip);
809 472100410 : if (abort)
810 22622 : set_bit(XFS_LI_ABORTED, &lip->li_flags);
811 472100410 : if (lip->li_ops->iop_release)
812 472100410 : lip->li_ops->iop_release(lip);
813 : }
814 1446351256 : }
815 :
816 : static inline void
817 34354142 : xfs_log_item_batch_insert(
818 : struct xfs_ail *ailp,
819 : struct xfs_ail_cursor *cur,
820 : struct xfs_log_item **log_items,
821 : int nr_items,
822 : xfs_lsn_t commit_lsn)
823 : {
824 34354142 : int i;
825 :
826 34354142 : spin_lock(&ailp->ail_lock);
827 : /* xfs_trans_ail_update_bulk drops ailp->ail_lock */
828 34354142 : xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn);
829 :
830 1100333871 : for (i = 0; i < nr_items; i++) {
831 1031625587 : struct xfs_log_item *lip = log_items[i];
832 :
833 1031625587 : if (lip->li_ops->iop_unpin)
834 1031625587 : lip->li_ops->iop_unpin(lip, 0);
835 : }
836 34354142 : }
837 :
838 : /*
839 : * Bulk operation version of xfs_trans_committed that takes a log vector of
840 : * items to insert into the AIL. This uses bulk AIL insertion techniques to
841 : * minimise lock traffic.
842 : *
843 : * If we are called with the aborted flag set, it is because a log write during
844 : * a CIL checkpoint commit has failed. In this case, all the items in the
845 : * checkpoint have already gone through iop_committed and iop_committing, which
846 : * means that checkpoint commit abort handling is treated exactly the same
847 : * as an iclog write error even though we haven't started any IO yet. Hence in
848 : * this case all we need to do is iop_committed processing, followed by an
849 : * iop_unpin(aborted) call.
850 : *
851 : * The AIL cursor is used to optimise the insert process. If commit_lsn is not
852 : * at the end of the AIL, the insert cursor avoids the need to walk
853 : * the AIL to find the insertion point on every xfs_log_item_batch_insert()
854 : * call. This saves a lot of needless list walking and is a net win, even
855 : * though it slightly increases that amount of AIL lock traffic to set it up
856 : * and tear it down.
857 : */
858 : void
859 3762402 : xfs_trans_committed_bulk(
860 : struct xfs_ail *ailp,
861 : struct list_head *lv_chain,
862 : xfs_lsn_t commit_lsn,
863 : bool aborted)
864 : {
865 : #define LOG_ITEM_BATCH_SIZE 32
866 3762402 : struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE];
867 3762402 : struct xfs_log_vec *lv;
868 3762402 : struct xfs_ail_cursor cur;
869 3762402 : int i = 0;
870 :
871 3762402 : spin_lock(&ailp->ail_lock);
872 3762402 : xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn);
873 3762402 : spin_unlock(&ailp->ail_lock);
874 :
875 : /* unpin all the log items */
876 1042220094 : list_for_each_entry(lv, lv_chain, lv_list) {
877 1038457692 : struct xfs_log_item *lip = lv->lv_item;
878 1038457692 : xfs_lsn_t item_lsn;
879 :
880 1038457692 : if (aborted)
881 1202062 : set_bit(XFS_LI_ABORTED, &lip->li_flags);
882 :
883 1038457687 : if (lip->li_ops->flags & XFS_ITEM_RELEASE_WHEN_COMMITTED) {
884 2759272 : lip->li_ops->iop_release(lip);
885 2759272 : continue;
886 : }
887 :
888 1035698415 : if (lip->li_ops->iop_committed)
889 978046803 : item_lsn = lip->li_ops->iop_committed(lip, commit_lsn);
890 : else
891 : item_lsn = commit_lsn;
892 :
893 : /* item_lsn of -1 means the item needs no further processing */
894 1035698412 : if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0)
895 2775642 : continue;
896 :
897 : /*
898 : * if we are aborting the operation, no point in inserting the
899 : * object into the AIL as we are in a shutdown situation.
900 : */
901 1032922770 : if (aborted) {
902 2334494 : ASSERT(xlog_is_shutdown(ailp->ail_log));
903 1167247 : if (lip->li_ops->iop_unpin)
904 1167247 : lip->li_ops->iop_unpin(lip, 1);
905 1167255 : continue;
906 : }
907 :
908 1031755523 : if (item_lsn != commit_lsn) {
909 :
910 : /*
911 : * Not a bulk update option due to unusual item_lsn.
912 : * Push into AIL immediately, rechecking the lsn once
913 : * we have the ail lock. Then unpin the item. This does
914 : * not affect the AIL cursor the bulk insert path is
915 : * using.
916 : */
917 129936 : spin_lock(&ailp->ail_lock);
918 129936 : if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0)
919 0 : xfs_trans_ail_update(ailp, lip, item_lsn);
920 : else
921 129936 : spin_unlock(&ailp->ail_lock);
922 129936 : if (lip->li_ops->iop_unpin)
923 129936 : lip->li_ops->iop_unpin(lip, 0);
924 129936 : continue;
925 : }
926 :
927 : /* Item is a candidate for bulk AIL insert. */
928 1031625587 : log_items[i++] = lv->lv_item;
929 1031625587 : if (i >= LOG_ITEM_BATCH_SIZE) {
930 30683986 : xfs_log_item_batch_insert(ailp, &cur, log_items,
931 : LOG_ITEM_BATCH_SIZE, commit_lsn);
932 30683986 : i = 0;
933 : }
934 : }
935 :
936 : /* make sure we insert the remainder! */
937 3762402 : if (i)
938 3670156 : xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn);
939 :
940 3762402 : spin_lock(&ailp->ail_lock);
941 3762402 : xfs_trans_ail_cursor_done(&cur);
942 3762402 : spin_unlock(&ailp->ail_lock);
943 3762402 : }
944 :
945 : /*
946 : * Sort transaction items prior to running precommit operations. This will
947 : * attempt to order the items such that they will always be locked in the same
948 : * order. Items that have no sort function are moved to the end of the list
949 : * and so are locked last.
950 : *
951 : * This may need refinement as different types of objects add sort functions.
952 : *
953 : * Function is more complex than it needs to be because we are comparing 64 bit
954 : * values and the function only returns 32 bit values.
955 : */
956 : static int
957 9933736078 : xfs_trans_precommit_sort(
958 : void *unused_arg,
959 : const struct list_head *a,
960 : const struct list_head *b)
961 : {
962 9933736078 : struct xfs_log_item *lia = container_of(a,
963 : struct xfs_log_item, li_trans);
964 9933736078 : struct xfs_log_item *lib = container_of(b,
965 : struct xfs_log_item, li_trans);
966 9933736078 : int64_t diff;
967 :
968 : /*
969 : * If both items are non-sortable, leave them alone. If only one is
970 : * sortable, move the non-sortable item towards the end of the list.
971 : */
972 9933736078 : if (!lia->li_ops->iop_sort && !lib->li_ops->iop_sort)
973 : return 0;
974 5398985323 : if (!lia->li_ops->iop_sort)
975 : return 1;
976 5217543229 : if (!lib->li_ops->iop_sort)
977 : return -1;
978 :
979 1221789525 : diff = lia->li_ops->iop_sort(lia) - lib->li_ops->iop_sort(lib);
980 1221949878 : if (diff < 0)
981 : return -1;
982 341779349 : if (diff > 0)
983 326392233 : return 1;
984 : return 0;
985 : }
986 :
987 : /*
988 : * Run transaction precommit functions.
989 : *
990 : * If there is an error in any of the callouts, then stop immediately and
991 : * trigger a shutdown to abort the transaction. There is no recovery possible
992 : * from errors at this point as the transaction is dirty....
993 : */
994 : static int
995 2510727925 : xfs_trans_run_precommits(
996 : struct xfs_trans *tp)
997 : {
998 2510727925 : struct xfs_mount *mp = tp->t_mountp;
999 2510727925 : struct xfs_log_item *lip, *n;
1000 2510727925 : int error = 0;
1001 :
1002 : /*
1003 : * Sort the item list to avoid ABBA deadlocks with other transactions
1004 : * running precommit operations that lock multiple shared items such as
1005 : * inode cluster buffers.
1006 : */
1007 2510727925 : list_sort(NULL, &tp->t_items, xfs_trans_precommit_sort);
1008 :
1009 : /*
1010 : * Precommit operations can remove the log item from the transaction
1011 : * if the log item exists purely to delay modifications until they
1012 : * can be ordered against other operations. Hence we have to use
1013 : * list_for_each_entry_safe() here.
1014 : */
1015 11102697488 : list_for_each_entry_safe(lip, n, &tp->t_items, li_trans) {
1016 8591482756 : if (!test_bit(XFS_LI_DIRTY, &lip->li_flags))
1017 1191019448 : continue;
1018 7400463308 : if (lip->li_ops->iop_precommit) {
1019 2726485134 : error = lip->li_ops->iop_precommit(tp, lip);
1020 2726703144 : if (error)
1021 : break;
1022 : }
1023 : }
1024 2511215037 : if (error)
1025 305 : xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1026 2511215037 : return error;
1027 : }
1028 :
1029 : /*
1030 : * Commit the given transaction to the log.
1031 : *
1032 : * XFS disk error handling mechanism is not based on a typical
1033 : * transaction abort mechanism. Logically after the filesystem
1034 : * gets marked 'SHUTDOWN', we can't let any new transactions
1035 : * be durable - ie. committed to disk - because some metadata might
1036 : * be inconsistent. In such cases, this returns an error, and the
1037 : * caller may assume that all locked objects joined to the transaction
1038 : * have already been unlocked as if the commit had succeeded.
1039 : * Do not reference the transaction structure after this call.
1040 : */
1041 : static int
1042 1789504349 : __xfs_trans_commit(
1043 : struct xfs_trans *tp,
1044 : bool regrant)
1045 : {
1046 1789504349 : struct xfs_mount *mp = tp->t_mountp;
1047 1789504349 : struct xlog *log = mp->m_log;
1048 1789504349 : xfs_csn_t commit_seq = 0;
1049 1789504349 : int error = 0;
1050 1789504349 : int sync = tp->t_flags & XFS_TRANS_SYNC;
1051 :
1052 1789504349 : trace_xfs_trans_commit(tp, _RET_IP_);
1053 :
1054 1789581168 : error = xfs_trans_run_precommits(tp);
1055 1789406895 : if (error) {
1056 305 : if (tp->t_flags & XFS_TRANS_PERM_LOG_RES)
1057 294 : xfs_defer_cancel(tp);
1058 305 : goto out_unreserve;
1059 : }
1060 :
1061 : /*
1062 : * Finish deferred items on final commit. Only permanent transactions
1063 : * should ever have deferred ops.
1064 : */
1065 3579027678 : WARN_ON_ONCE(!list_empty(&tp->t_dfops) &&
1066 : !(tp->t_flags & XFS_TRANS_PERM_LOG_RES));
1067 1789406590 : if (!regrant && (tp->t_flags & XFS_TRANS_PERM_LOG_RES)) {
1068 722288694 : error = xfs_defer_finish_noroll(&tp);
1069 722268590 : if (error)
1070 1927 : goto out_unreserve;
1071 :
1072 : /* Run precommits from final tx in defer chain. */
1073 722266663 : error = xfs_trans_run_precommits(tp);
1074 722321220 : if (error)
1075 0 : goto out_unreserve;
1076 : }
1077 :
1078 : /*
1079 : * If there is nothing to be logged by the transaction,
1080 : * then unlock all of the items associated with the
1081 : * transaction and free the transaction structure.
1082 : * Also make sure to return any reserved blocks to
1083 : * the free pool.
1084 : */
1085 1789439116 : if (!(tp->t_flags & XFS_TRANS_DIRTY))
1086 14982118 : goto out_unreserve;
1087 :
1088 : /*
1089 : * We must check against log shutdown here because we cannot abort log
1090 : * items and leave them dirty, inconsistent and unpinned in memory while
1091 : * the log is active. This leaves them open to being written back to
1092 : * disk, and that will lead to on-disk corruption.
1093 : */
1094 3548913996 : if (xlog_is_shutdown(log)) {
1095 482 : error = -EIO;
1096 482 : goto out_unreserve;
1097 : }
1098 :
1099 1774456516 : ASSERT(tp->t_ticket != NULL);
1100 :
1101 : /*
1102 : * If we need to update the superblock, then do it now.
1103 : */
1104 1774456516 : if (tp->t_flags & XFS_TRANS_SB_DIRTY)
1105 342 : xfs_trans_apply_sb_deltas(tp);
1106 1774456516 : xfs_trans_apply_dquot_deltas(tp);
1107 :
1108 1774875421 : xlog_cil_commit(log, tp, &commit_seq, regrant);
1109 :
1110 1774914728 : xfs_trans_free(tp);
1111 :
1112 : /*
1113 : * If the transaction needs to be synchronous, then force the
1114 : * log out now and wait for it.
1115 : */
1116 1774939028 : if (sync) {
1117 134122 : error = xfs_log_force_seq(mp, commit_seq, XFS_LOG_SYNC, NULL);
1118 134122 : XFS_STATS_INC(mp, xs_trans_sync);
1119 : } else {
1120 1774804906 : XFS_STATS_INC(mp, xs_trans_async);
1121 : }
1122 :
1123 : return error;
1124 :
1125 14984832 : out_unreserve:
1126 14984832 : xfs_trans_unreserve_and_mod_sb(tp);
1127 :
1128 : /*
1129 : * It is indeed possible for the transaction to be not dirty but
1130 : * the dqinfo portion to be. All that means is that we have some
1131 : * (non-persistent) quota reservations that need to be unreserved.
1132 : */
1133 14984846 : xfs_trans_unreserve_and_mod_dquots(tp);
1134 14984651 : if (tp->t_ticket) {
1135 15381695 : if (regrant && !xlog_is_shutdown(log))
1136 437308 : xfs_log_ticket_regrant(log, tp->t_ticket);
1137 : else
1138 14506845 : xfs_log_ticket_ungrant(log, tp->t_ticket);
1139 14944431 : tp->t_ticket = NULL;
1140 : }
1141 14984929 : xfs_trans_free_items(tp, !!error);
1142 14984916 : xfs_trans_free(tp);
1143 :
1144 14984927 : XFS_STATS_INC(mp, xs_trans_empty);
1145 14984927 : return error;
1146 : }
1147 :
1148 : int
1149 827460263 : xfs_trans_commit(
1150 : struct xfs_trans *tp)
1151 : {
1152 827460263 : return __xfs_trans_commit(tp, false);
1153 : }
1154 :
1155 : /*
1156 : * Unlock all of the transaction's items and free the transaction. If the
1157 : * transaction is dirty, we must shut down the filesystem because there is no
1158 : * way to restore them to their previous state.
1159 : *
1160 : * If the transaction has made a log reservation, make sure to release it as
1161 : * well.
1162 : *
1163 : * This is a high level function (equivalent to xfs_trans_commit()) and so can
1164 : * be called after the transaction has effectively been aborted due to the mount
1165 : * being shut down. However, if the mount has not been shut down and the
1166 : * transaction is dirty we will shut the mount down and, in doing so, that
1167 : * guarantees that the log is shut down, too. Hence we don't need to be as
1168 : * careful with shutdown state and dirty items here as we need to be in
1169 : * xfs_trans_commit().
1170 : */
1171 : void
1172 1431177224 : xfs_trans_cancel(
1173 : struct xfs_trans *tp)
1174 : {
1175 1431177224 : struct xfs_mount *mp = tp->t_mountp;
1176 1431177224 : struct xlog *log = mp->m_log;
1177 1431177224 : bool dirty = (tp->t_flags & XFS_TRANS_DIRTY);
1178 :
1179 1431177224 : trace_xfs_trans_cancel(tp, _RET_IP_);
1180 :
1181 : /*
1182 : * It's never valid to cancel a transaction with deferred ops attached,
1183 : * because the transaction is effectively dirty. Complain about this
1184 : * loudly before freeing the in-memory defer items and shutting down the
1185 : * filesystem.
1186 : */
1187 1431239158 : if (!list_empty(&tp->t_dfops)) {
1188 50 : ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES);
1189 50 : dirty = true;
1190 50 : xfs_defer_cancel(tp);
1191 : }
1192 :
1193 : /*
1194 : * See if the caller is relying on us to shut down the filesystem. We
1195 : * only want an error report if there isn't already a shutdown in
1196 : * progress, so we only need to check against the mount shutdown state
1197 : * here.
1198 : */
1199 1431242838 : if (dirty && !xfs_is_shutdown(mp)) {
1200 4 : XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp);
1201 4 : xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1202 : }
1203 : #ifdef DEBUG
1204 : /* Log items need to be consistent until the log is shut down. */
1205 2862466127 : if (!dirty && !xlog_is_shutdown(log)) {
1206 1431241609 : struct xfs_log_item *lip;
1207 :
1208 1889111217 : list_for_each_entry(lip, &tp->t_items, li_trans)
1209 457869608 : ASSERT(!xlog_item_is_intent_done(lip));
1210 : }
1211 : #endif
1212 1431239158 : xfs_trans_unreserve_and_mod_sb(tp);
1213 1431358815 : xfs_trans_unreserve_and_mod_dquots(tp);
1214 :
1215 1431434675 : if (tp->t_ticket) {
1216 374643887 : xfs_log_ticket_ungrant(log, tp->t_ticket);
1217 374644137 : tp->t_ticket = NULL;
1218 : }
1219 :
1220 1431434925 : xfs_trans_free_items(tp, dirty);
1221 1431394002 : xfs_trans_free(tp);
1222 1431453543 : }
1223 :
1224 : /*
1225 : * Roll from one trans in the sequence of PERMANENT transactions to
1226 : * the next: permanent transactions are only flushed out when
1227 : * committed with xfs_trans_commit(), but we still want as soon
1228 : * as possible to let chunks of it go to the log. So we commit the
1229 : * chunk we've been working on and get a new transaction to continue.
1230 : */
1231 : int
1232 962275743 : xfs_trans_roll(
1233 : struct xfs_trans **tpp)
1234 : {
1235 962275743 : struct xfs_trans *trans = *tpp;
1236 962275743 : struct xfs_trans_res tres;
1237 962275743 : int error;
1238 :
1239 962275743 : trace_xfs_trans_roll(trans, _RET_IP_);
1240 :
1241 : /*
1242 : * Copy the critical parameters from one trans to the next.
1243 : */
1244 962295372 : tres.tr_logres = trans->t_log_res;
1245 962295372 : tres.tr_logcount = trans->t_log_count;
1246 :
1247 962295372 : *tpp = xfs_trans_dup(trans);
1248 :
1249 : /*
1250 : * Commit the current transaction.
1251 : * If this commit failed, then it'd just unlock those items that
1252 : * are not marked ihold. That also means that a filesystem shutdown
1253 : * is in progress. The caller takes the responsibility to cancel
1254 : * the duplicate transaction that gets returned.
1255 : */
1256 962380557 : error = __xfs_trans_commit(trans, true);
1257 962450970 : if (error)
1258 : return error;
1259 :
1260 : /*
1261 : * Reserve space in the log for the next transaction.
1262 : * This also pushes items in the "AIL", the list of logged items,
1263 : * out to disk if they are taking up space at the tail of the log
1264 : * that we want to use. This requires that either nothing be locked
1265 : * across this call, or that anything that is locked be logged in
1266 : * the prior and the next transactions.
1267 : */
1268 962452166 : tres.tr_logflags = XFS_TRANS_PERM_LOG_RES;
1269 962452166 : return xfs_trans_reserve(*tpp, &tres, 0, 0);
1270 : }
1271 :
1272 : /*
1273 : * Allocate an transaction, lock and join the inode to it, and reserve quota.
1274 : *
1275 : * The caller must ensure that the on-disk dquots attached to this inode have
1276 : * already been allocated and initialized. The caller is responsible for
1277 : * releasing ILOCK_EXCL if a new transaction is returned.
1278 : */
1279 : int
1280 715361245 : xfs_trans_alloc_inode(
1281 : struct xfs_inode *ip,
1282 : struct xfs_trans_res *resv,
1283 : unsigned int dblocks,
1284 : unsigned int rblocks,
1285 : bool force,
1286 : struct xfs_trans **tpp)
1287 : {
1288 715361245 : struct xfs_trans *tp;
1289 715361245 : struct xfs_mount *mp = ip->i_mount;
1290 715361245 : bool retried = false;
1291 715361518 : int error;
1292 :
1293 715361518 : retry:
1294 2106632313 : error = xfs_trans_alloc(mp, resv, dblocks,
1295 : xfs_extlen_to_rtxlen(mp, rblocks),
1296 : force ? XFS_TRANS_RESERVE : 0, &tp);
1297 715447474 : if (error)
1298 3204347 : return error;
1299 :
1300 712243127 : xfs_ilock(ip, XFS_ILOCK_EXCL);
1301 712242785 : xfs_trans_ijoin(tp, ip, 0);
1302 :
1303 712166476 : error = xfs_qm_dqattach_locked(ip, false);
1304 712228215 : if (error) {
1305 : /* Caller should have allocated the dquots! */
1306 0 : ASSERT(error != -ENOENT);
1307 0 : goto out_cancel;
1308 : }
1309 :
1310 712228215 : error = xfs_trans_reserve_quota_nblks(tp, ip, dblocks, rblocks, force);
1311 712248175 : if ((error == -EDQUOT || error == -ENOSPC) && !retried) {
1312 273 : xfs_trans_cancel(tp);
1313 273 : xfs_iunlock(ip, XFS_ILOCK_EXCL);
1314 273 : xfs_blockgc_free_quota(ip, 0);
1315 273 : retried = true;
1316 273 : goto retry;
1317 : }
1318 712247902 : if (error)
1319 250 : goto out_cancel;
1320 :
1321 712247652 : *tpp = tp;
1322 712247652 : return 0;
1323 :
1324 250 : out_cancel:
1325 250 : xfs_trans_cancel(tp);
1326 250 : xfs_iunlock(ip, XFS_ILOCK_EXCL);
1327 250 : return error;
1328 : }
1329 :
1330 :
1331 : /* Try to reserve more blocks and file quota for a transaction. */
1332 : int
1333 53659 : xfs_trans_reserve_more_inode(
1334 : struct xfs_trans *tp,
1335 : struct xfs_inode *ip,
1336 : unsigned int dblocks,
1337 : unsigned int rblocks,
1338 : bool force_quota)
1339 : {
1340 53659 : struct xfs_mount *mp = ip->i_mount;
1341 53659 : unsigned int rtx = xfs_extlen_to_rtxlen(mp, rblocks);
1342 53659 : int error;
1343 :
1344 53659 : ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
1345 :
1346 53656 : error = xfs_trans_reserve_more(tp, dblocks, rtx);
1347 53659 : if (error)
1348 : return error;
1349 :
1350 101677 : if (!XFS_IS_QUOTA_ON(mp) || xfs_is_quota_inode(&mp->m_sb, ip->i_ino))
1351 : return 0;
1352 :
1353 27819 : if (tp->t_flags & XFS_TRANS_RESERVE)
1354 0 : force_quota = true;
1355 :
1356 27819 : error = xfs_trans_reserve_quota_nblks(tp, ip, dblocks, rblocks,
1357 : force_quota);
1358 27816 : if (!error)
1359 : return 0;
1360 :
1361 : /* Quota failed, give back the new reservation. */
1362 0 : xfs_mod_fdblocks(mp, dblocks, tp->t_flags & XFS_TRANS_RESERVE);
1363 0 : tp->t_blk_res -= dblocks;
1364 0 : xfs_mod_frextents(mp, rtx);
1365 0 : tp->t_rtx_res -= rtx;
1366 0 : return error;
1367 : }
1368 :
1369 : /*
1370 : * Allocate an transaction in preparation for inode creation by reserving quota
1371 : * against the given dquots. Callers are not required to hold any inode locks.
1372 : */
1373 : int
1374 65202016 : xfs_trans_alloc_icreate(
1375 : struct xfs_mount *mp,
1376 : struct xfs_trans_res *resv,
1377 : struct xfs_dquot *udqp,
1378 : struct xfs_dquot *gdqp,
1379 : struct xfs_dquot *pdqp,
1380 : unsigned int dblocks,
1381 : struct xfs_trans **tpp)
1382 : {
1383 65202016 : struct xfs_trans *tp;
1384 65202016 : bool retried = false;
1385 65204332 : int error;
1386 :
1387 65204332 : retry:
1388 65204332 : error = xfs_trans_alloc(mp, resv, dblocks, 0, 0, &tp);
1389 65206978 : if (error)
1390 704087 : return error;
1391 :
1392 64502891 : error = xfs_trans_reserve_quota_icreate(tp, udqp, gdqp, pdqp, dblocks);
1393 64504555 : if ((error == -EDQUOT || error == -ENOSPC) && !retried) {
1394 2316 : xfs_trans_cancel(tp);
1395 2316 : xfs_blockgc_free_dquots(mp, udqp, gdqp, pdqp, 0);
1396 2316 : retried = true;
1397 2316 : goto retry;
1398 : }
1399 64502239 : if (error) {
1400 2067 : xfs_trans_cancel(tp);
1401 2067 : return error;
1402 : }
1403 :
1404 64500172 : *tpp = tp;
1405 64500172 : return 0;
1406 : }
1407 :
1408 : /*
1409 : * Allocate an transaction, lock and join the inode to it, and reserve quota
1410 : * in preparation for inode attribute changes that include uid, gid, or prid
1411 : * changes.
1412 : *
1413 : * The caller must ensure that the on-disk dquots attached to this inode have
1414 : * already been allocated and initialized. The ILOCK will be dropped when the
1415 : * transaction is committed or cancelled.
1416 : */
1417 : int
1418 9894197 : xfs_trans_alloc_ichange(
1419 : struct xfs_inode *ip,
1420 : struct xfs_dquot *new_udqp,
1421 : struct xfs_dquot *new_gdqp,
1422 : struct xfs_dquot *new_pdqp,
1423 : bool force,
1424 : struct xfs_trans **tpp)
1425 : {
1426 9894197 : struct xfs_trans *tp;
1427 9894197 : struct xfs_mount *mp = ip->i_mount;
1428 9894197 : struct xfs_dquot *udqp;
1429 9894197 : struct xfs_dquot *gdqp;
1430 9894197 : struct xfs_dquot *pdqp;
1431 9894197 : bool retried = false;
1432 9894199 : int error;
1433 :
1434 9894199 : retry:
1435 9894199 : error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp);
1436 9895957 : if (error)
1437 2 : return error;
1438 :
1439 9895955 : xfs_ilock(ip, XFS_ILOCK_EXCL);
1440 9896041 : xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
1441 :
1442 9895803 : error = xfs_qm_dqattach_locked(ip, false);
1443 9895883 : if (error) {
1444 : /* Caller should have allocated the dquots! */
1445 264 : ASSERT(error != -ENOENT);
1446 264 : goto out_cancel;
1447 : }
1448 :
1449 : /*
1450 : * For each quota type, skip quota reservations if the inode's dquots
1451 : * now match the ones that came from the caller, or the caller didn't
1452 : * pass one in. The inode's dquots can change if we drop the ILOCK to
1453 : * perform a blockgc scan, so we must preserve the caller's arguments.
1454 : */
1455 9895619 : udqp = (new_udqp != ip->i_udquot) ? new_udqp : NULL;
1456 9895619 : gdqp = (new_gdqp != ip->i_gdquot) ? new_gdqp : NULL;
1457 9895619 : pdqp = (new_pdqp != ip->i_pdquot) ? new_pdqp : NULL;
1458 9895619 : if (udqp || gdqp || pdqp) {
1459 6384205 : xfs_filblks_t dblocks, rblocks;
1460 6384205 : unsigned int qflags = XFS_QMOPT_RES_REGBLKS;
1461 6384205 : bool isrt = XFS_IS_REALTIME_INODE(ip);
1462 :
1463 6384205 : if (force)
1464 6245637 : qflags |= XFS_QMOPT_FORCE_RES;
1465 :
1466 6384205 : if (isrt) {
1467 713710 : error = xfs_iread_extents(tp, ip, XFS_DATA_FORK);
1468 713712 : if (error)
1469 2 : goto out_cancel;
1470 : }
1471 :
1472 6384207 : xfs_inode_count_blocks(tp, ip, &dblocks, &rblocks);
1473 :
1474 6384091 : if (isrt)
1475 713712 : rblocks += ip->i_delayed_blks;
1476 : else
1477 5670379 : dblocks += ip->i_delayed_blks;
1478 :
1479 : /*
1480 : * Reserve enough quota to handle blocks on disk and reserved
1481 : * for a delayed allocation. We'll actually transfer the
1482 : * delalloc reservation between dquots at chown time, even
1483 : * though that part is only semi-transactional.
1484 : */
1485 6384091 : error = xfs_trans_reserve_quota_bydquots(tp, mp, udqp, gdqp,
1486 : pdqp, dblocks, 1, qflags);
1487 6384137 : if ((error == -EDQUOT || error == -ENOSPC) && !retried) {
1488 2 : xfs_trans_cancel(tp);
1489 2 : xfs_blockgc_free_dquots(mp, udqp, gdqp, pdqp, 0);
1490 2 : retried = true;
1491 2 : goto retry;
1492 : }
1493 6384135 : if (error)
1494 2 : goto out_cancel;
1495 :
1496 : /* Do the same for realtime. */
1497 6384133 : qflags = XFS_QMOPT_RES_RTBLKS | (qflags & XFS_QMOPT_FORCE_RES);
1498 6384133 : error = xfs_trans_reserve_quota_bydquots(tp, mp, udqp, gdqp,
1499 : pdqp, rblocks, 0, qflags);
1500 6384162 : if ((error == -EDQUOT || error == -ENOSPC) && !retried) {
1501 0 : xfs_trans_cancel(tp);
1502 0 : xfs_blockgc_free_dquots(mp, udqp, gdqp, pdqp, 0);
1503 0 : retried = true;
1504 0 : goto retry;
1505 : }
1506 6384162 : if (error)
1507 0 : goto out_cancel;
1508 : }
1509 :
1510 9895576 : *tpp = tp;
1511 9895576 : return 0;
1512 :
1513 266 : out_cancel:
1514 266 : xfs_trans_cancel(tp);
1515 266 : return error;
1516 : }
1517 :
1518 : /*
1519 : * Allocate an transaction, lock and join the directory and child inodes to it,
1520 : * and reserve quota for a directory update. If there isn't sufficient space,
1521 : * @dblocks will be set to zero for a reservationless directory update and
1522 : * @nospace_error will be set to a negative errno describing the space
1523 : * constraint we hit.
1524 : *
1525 : * The caller must ensure that the on-disk dquots attached to this inode have
1526 : * already been allocated and initialized. The ILOCKs will be dropped when the
1527 : * transaction is committed or cancelled.
1528 : *
1529 : * Caller is responsible for unlocking the inodes manually upon return
1530 : */
1531 : int
1532 47203019 : xfs_trans_alloc_dir(
1533 : struct xfs_inode *dp,
1534 : struct xfs_trans_res *resv,
1535 : struct xfs_inode *ip,
1536 : unsigned int *dblocks,
1537 : struct xfs_trans **tpp,
1538 : int *nospace_error)
1539 : {
1540 47203019 : struct xfs_trans *tp;
1541 47203019 : struct xfs_mount *mp = ip->i_mount;
1542 47203019 : unsigned int resblks;
1543 47203019 : bool retried = false;
1544 47205845 : int error;
1545 :
1546 47205845 : retry:
1547 47205845 : *nospace_error = 0;
1548 47205845 : resblks = *dblocks;
1549 47205845 : error = xfs_trans_alloc(mp, resv, resblks, 0, 0, &tp);
1550 47207902 : if (error == -ENOSPC) {
1551 99890 : *nospace_error = error;
1552 99890 : resblks = 0;
1553 99890 : error = xfs_trans_alloc(mp, resv, resblks, 0, 0, &tp);
1554 : }
1555 47207902 : if (error)
1556 2 : return error;
1557 :
1558 47207900 : xfs_lock_two_inodes(dp, XFS_ILOCK_EXCL, ip, XFS_ILOCK_EXCL);
1559 :
1560 47207555 : xfs_trans_ijoin(tp, dp, 0);
1561 47206502 : xfs_trans_ijoin(tp, ip, 0);
1562 :
1563 47206233 : error = xfs_qm_dqattach_locked(dp, false);
1564 47207789 : if (error) {
1565 : /* Caller should have allocated the dquots! */
1566 0 : ASSERT(error != -ENOENT);
1567 0 : goto out_cancel;
1568 : }
1569 :
1570 47207789 : error = xfs_qm_dqattach_locked(ip, false);
1571 47205570 : if (error) {
1572 : /* Caller should have allocated the dquots! */
1573 0 : ASSERT(error != -ENOENT);
1574 0 : goto out_cancel;
1575 : }
1576 :
1577 47205570 : if (resblks == 0)
1578 99888 : goto done;
1579 :
1580 47105682 : error = xfs_trans_reserve_quota_nblks(tp, dp, resblks, 0, false);
1581 47107777 : if (error == -EDQUOT || error == -ENOSPC) {
1582 5387 : if (!retried) {
1583 2826 : xfs_trans_cancel(tp);
1584 2826 : xfs_iunlock(dp, XFS_ILOCK_EXCL);
1585 2826 : if (dp != ip)
1586 2826 : xfs_iunlock(ip, XFS_ILOCK_EXCL);
1587 2826 : xfs_blockgc_free_quota(dp, 0);
1588 2826 : retried = true;
1589 2826 : goto retry;
1590 : }
1591 :
1592 2561 : *nospace_error = error;
1593 2561 : resblks = 0;
1594 2561 : error = 0;
1595 : }
1596 47104951 : if (error)
1597 0 : goto out_cancel;
1598 :
1599 47102390 : done:
1600 47204839 : *tpp = tp;
1601 47204839 : *dblocks = resblks;
1602 47204839 : return 0;
1603 :
1604 0 : out_cancel:
1605 0 : xfs_trans_cancel(tp);
1606 0 : return error;
1607 : }
|