Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0+
2 : /*
3 : * Copyright (C) 2016 Oracle. All Rights Reserved.
4 : * Author: Darrick J. Wong <darrick.wong@oracle.com>
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_log_format.h"
11 : #include "xfs_trans_resv.h"
12 : #include "xfs_mount.h"
13 : #include "xfs_defer.h"
14 : #include "xfs_btree.h"
15 : #include "xfs_bmap.h"
16 : #include "xfs_refcount_btree.h"
17 : #include "xfs_alloc.h"
18 : #include "xfs_errortag.h"
19 : #include "xfs_error.h"
20 : #include "xfs_trace.h"
21 : #include "xfs_trans.h"
22 : #include "xfs_bit.h"
23 : #include "xfs_refcount.h"
24 : #include "xfs_rmap.h"
25 : #include "xfs_ag.h"
26 :
27 : struct kmem_cache *xfs_refcount_intent_cache;
28 :
29 : /* Allowable refcount adjustment amounts. */
30 : enum xfs_refc_adjust_op {
31 : XFS_REFCOUNT_ADJUST_INCREASE = 1,
32 : XFS_REFCOUNT_ADJUST_DECREASE = -1,
33 : XFS_REFCOUNT_ADJUST_COW_ALLOC = 0,
34 : XFS_REFCOUNT_ADJUST_COW_FREE = -1,
35 : };
36 :
37 : STATIC int __xfs_refcount_cow_alloc(struct xfs_btree_cur *rcur,
38 : xfs_agblock_t agbno, xfs_extlen_t aglen);
39 : STATIC int __xfs_refcount_cow_free(struct xfs_btree_cur *rcur,
40 : xfs_agblock_t agbno, xfs_extlen_t aglen);
41 :
42 : /*
43 : * Look up the first record less than or equal to [bno, len] in the btree
44 : * given by cur.
45 : */
46 : int
47 1543518766 : xfs_refcount_lookup_le(
48 : struct xfs_btree_cur *cur,
49 : enum xfs_refc_domain domain,
50 : xfs_agblock_t bno,
51 : int *stat)
52 : {
53 1604623516 : trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno,
54 : xfs_refcount_encode_startblock(bno, domain),
55 : XFS_LOOKUP_LE);
56 1542115183 : cur->bc_rec.rc.rc_startblock = bno;
57 1542115183 : cur->bc_rec.rc.rc_blockcount = 0;
58 1542115183 : cur->bc_rec.rc.rc_domain = domain;
59 1542115183 : return xfs_btree_lookup(cur, XFS_LOOKUP_LE, stat);
60 : }
61 :
62 : /*
63 : * Look up the first record greater than or equal to [bno, len] in the btree
64 : * given by cur.
65 : */
66 : int
67 412298596 : xfs_refcount_lookup_ge(
68 : struct xfs_btree_cur *cur,
69 : enum xfs_refc_domain domain,
70 : xfs_agblock_t bno,
71 : int *stat)
72 : {
73 459338543 : trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno,
74 : xfs_refcount_encode_startblock(bno, domain),
75 : XFS_LOOKUP_GE);
76 412298746 : cur->bc_rec.rc.rc_startblock = bno;
77 412298746 : cur->bc_rec.rc.rc_blockcount = 0;
78 412298746 : cur->bc_rec.rc.rc_domain = domain;
79 412298746 : return xfs_btree_lookup(cur, XFS_LOOKUP_GE, stat);
80 : }
81 :
82 : /*
83 : * Look up the first record equal to [bno, len] in the btree
84 : * given by cur.
85 : */
86 : int
87 0 : xfs_refcount_lookup_eq(
88 : struct xfs_btree_cur *cur,
89 : enum xfs_refc_domain domain,
90 : xfs_agblock_t bno,
91 : int *stat)
92 : {
93 0 : trace_xfs_refcount_lookup(cur->bc_mp, cur->bc_ag.pag->pag_agno,
94 : xfs_refcount_encode_startblock(bno, domain),
95 : XFS_LOOKUP_LE);
96 0 : cur->bc_rec.rc.rc_startblock = bno;
97 0 : cur->bc_rec.rc.rc_blockcount = 0;
98 0 : cur->bc_rec.rc.rc_domain = domain;
99 0 : return xfs_btree_lookup(cur, XFS_LOOKUP_EQ, stat);
100 : }
101 :
102 : /* Convert on-disk record to in-core format. */
103 : void
104 2596120461 : xfs_refcount_btrec_to_irec(
105 : const union xfs_btree_rec *rec,
106 : struct xfs_refcount_irec *irec)
107 : {
108 2596120461 : uint32_t start;
109 :
110 2596120461 : start = be32_to_cpu(rec->refc.rc_startblock);
111 2596120461 : if (start & XFS_REFC_COWFLAG) {
112 172363879 : start &= ~XFS_REFC_COWFLAG;
113 172363879 : irec->rc_domain = XFS_REFC_DOMAIN_COW;
114 : } else {
115 2423756582 : irec->rc_domain = XFS_REFC_DOMAIN_SHARED;
116 : }
117 :
118 2596120461 : irec->rc_startblock = start;
119 2596120461 : irec->rc_blockcount = be32_to_cpu(rec->refc.rc_blockcount);
120 2596120461 : irec->rc_refcount = be32_to_cpu(rec->refc.rc_refcount);
121 2596120461 : }
122 :
123 : /* Simple checks for refcount records. */
124 : xfs_failaddr_t
125 2596004147 : xfs_refcount_check_irec(
126 : struct xfs_btree_cur *cur,
127 : const struct xfs_refcount_irec *irec)
128 : {
129 2596004147 : struct xfs_perag *pag = cur->bc_ag.pag;
130 :
131 2596004147 : if (irec->rc_blockcount == 0 || irec->rc_blockcount > MAXREFCEXTLEN)
132 0 : return __this_address;
133 :
134 2596004147 : if (!xfs_refcount_check_domain(irec))
135 0 : return __this_address;
136 :
137 : /* check for valid extent range, including overflow */
138 2596004147 : if (!xfs_verify_agbext(pag, irec->rc_startblock, irec->rc_blockcount))
139 10 : return __this_address;
140 :
141 2596004137 : if (irec->rc_refcount == 0 || irec->rc_refcount > MAXREFCOUNT)
142 0 : return __this_address;
143 :
144 : return NULL;
145 : }
146 :
147 : static inline int
148 10 : xfs_refcount_complain_bad_rec(
149 : struct xfs_btree_cur *cur,
150 : xfs_failaddr_t fa,
151 : const struct xfs_refcount_irec *irec)
152 : {
153 10 : struct xfs_mount *mp = cur->bc_mp;
154 :
155 10 : xfs_warn(mp,
156 : "Refcount BTree record corruption in AG %d detected at %pS!",
157 : cur->bc_ag.pag->pag_agno, fa);
158 10 : xfs_warn(mp,
159 : "Start block 0x%x, block count 0x%x, references 0x%x",
160 : irec->rc_startblock, irec->rc_blockcount, irec->rc_refcount);
161 10 : return -EFSCORRUPTED;
162 : }
163 :
164 : /*
165 : * Get the data from the pointed-to record.
166 : */
167 : int
168 2562428980 : xfs_refcount_get_rec(
169 : struct xfs_btree_cur *cur,
170 : struct xfs_refcount_irec *irec,
171 : int *stat)
172 : {
173 2562428980 : union xfs_btree_rec *rec;
174 2562428980 : xfs_failaddr_t fa;
175 2562428980 : int error;
176 :
177 2562428980 : error = xfs_btree_get_rec(cur, &rec, stat);
178 2560978839 : if (error || !*stat)
179 : return error;
180 :
181 2550526621 : xfs_refcount_btrec_to_irec(rec, irec);
182 2550172636 : fa = xfs_refcount_check_irec(cur, irec);
183 2550699345 : if (fa)
184 10 : return xfs_refcount_complain_bad_rec(cur, fa, irec);
185 :
186 2550699335 : trace_xfs_refcount_get(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec);
187 2550699335 : return 0;
188 : }
189 :
190 : /*
191 : * Update the record referred to by cur to the value given
192 : * by [bno, len, refcount].
193 : * This either works (return 0) or gets an EFSCORRUPTED error.
194 : */
195 : STATIC int
196 129580546 : xfs_refcount_update(
197 : struct xfs_btree_cur *cur,
198 : struct xfs_refcount_irec *irec)
199 : {
200 129580546 : union xfs_btree_rec rec;
201 129580546 : uint32_t start;
202 129580546 : int error;
203 :
204 129580546 : trace_xfs_refcount_update(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec);
205 :
206 129580511 : start = xfs_refcount_encode_startblock(irec->rc_startblock,
207 : irec->rc_domain);
208 129580511 : rec.refc.rc_startblock = cpu_to_be32(start);
209 129580511 : rec.refc.rc_blockcount = cpu_to_be32(irec->rc_blockcount);
210 129580511 : rec.refc.rc_refcount = cpu_to_be32(irec->rc_refcount);
211 :
212 129580511 : error = xfs_btree_update(cur, &rec);
213 129580576 : if (error)
214 0 : trace_xfs_refcount_update_error(cur->bc_mp,
215 0 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
216 129580576 : return error;
217 : }
218 :
219 : /*
220 : * Insert the record referred to by cur to the value given
221 : * by [bno, len, refcount].
222 : * This either works (return 0) or gets an EFSCORRUPTED error.
223 : */
224 : int
225 73105519 : xfs_refcount_insert(
226 : struct xfs_btree_cur *cur,
227 : struct xfs_refcount_irec *irec,
228 : int *i)
229 : {
230 73105519 : int error;
231 :
232 73105519 : trace_xfs_refcount_insert(cur->bc_mp, cur->bc_ag.pag->pag_agno, irec);
233 :
234 73105367 : cur->bc_rec.rc.rc_startblock = irec->rc_startblock;
235 73105367 : cur->bc_rec.rc.rc_blockcount = irec->rc_blockcount;
236 73105367 : cur->bc_rec.rc.rc_refcount = irec->rc_refcount;
237 73105367 : cur->bc_rec.rc.rc_domain = irec->rc_domain;
238 :
239 73105367 : error = xfs_btree_insert(cur, i);
240 73105277 : if (error)
241 105 : goto out_error;
242 73105172 : if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) {
243 0 : error = -EFSCORRUPTED;
244 0 : goto out_error;
245 : }
246 :
247 73105277 : out_error:
248 73105277 : if (error)
249 210 : trace_xfs_refcount_insert_error(cur->bc_mp,
250 105 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
251 73105277 : return error;
252 : }
253 :
254 : /*
255 : * Remove the record referred to by cur, then set the pointer to the spot
256 : * where the record could be re-inserted, in case we want to increment or
257 : * decrement the cursor.
258 : * This either works (return 0) or gets an EFSCORRUPTED error.
259 : */
260 : STATIC int
261 65761286 : xfs_refcount_delete(
262 : struct xfs_btree_cur *cur,
263 : int *i)
264 : {
265 65761286 : struct xfs_refcount_irec irec;
266 65761286 : int found_rec;
267 65761286 : int error;
268 :
269 65761286 : error = xfs_refcount_get_rec(cur, &irec, &found_rec);
270 65761932 : if (error)
271 0 : goto out_error;
272 65761932 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
273 0 : error = -EFSCORRUPTED;
274 0 : goto out_error;
275 : }
276 65761932 : trace_xfs_refcount_delete(cur->bc_mp, cur->bc_ag.pag->pag_agno, &irec);
277 65761841 : error = xfs_btree_delete(cur, i);
278 65761015 : if (XFS_IS_CORRUPT(cur->bc_mp, *i != 1)) {
279 0 : error = -EFSCORRUPTED;
280 0 : goto out_error;
281 : }
282 65761015 : if (error)
283 0 : goto out_error;
284 65761015 : error = xfs_refcount_lookup_ge(cur, irec.rc_domain, irec.rc_startblock,
285 : &found_rec);
286 65762044 : out_error:
287 65762044 : if (error)
288 0 : trace_xfs_refcount_delete_error(cur->bc_mp,
289 0 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
290 65762044 : return error;
291 : }
292 :
293 : /*
294 : * Adjusting the Reference Count
295 : *
296 : * As stated elsewhere, the reference count btree (refcbt) stores
297 : * >1 reference counts for extents of physical blocks. In this
298 : * operation, we're either raising or lowering the reference count of
299 : * some subrange stored in the tree:
300 : *
301 : * <------ adjustment range ------>
302 : * ----+ +---+-----+ +--+--------+---------
303 : * 2 | | 3 | 4 | |17| 55 | 10
304 : * ----+ +---+-----+ +--+--------+---------
305 : * X axis is physical blocks number;
306 : * reference counts are the numbers inside the rectangles
307 : *
308 : * The first thing we need to do is to ensure that there are no
309 : * refcount extents crossing either boundary of the range to be
310 : * adjusted. For any extent that does cross a boundary, split it into
311 : * two extents so that we can increment the refcount of one of the
312 : * pieces later:
313 : *
314 : * <------ adjustment range ------>
315 : * ----+ +---+-----+ +--+--------+----+----
316 : * 2 | | 3 | 2 | |17| 55 | 10 | 10
317 : * ----+ +---+-----+ +--+--------+----+----
318 : *
319 : * For this next step, let's assume that all the physical blocks in
320 : * the adjustment range are mapped to a file and are therefore in use
321 : * at least once. Therefore, we can infer that any gap in the
322 : * refcount tree within the adjustment range represents a physical
323 : * extent with refcount == 1:
324 : *
325 : * <------ adjustment range ------>
326 : * ----+---+---+-----+-+--+--------+----+----
327 : * 2 |"1"| 3 | 2 |1|17| 55 | 10 | 10
328 : * ----+---+---+-----+-+--+--------+----+----
329 : * ^
330 : *
331 : * For each extent that falls within the interval range, figure out
332 : * which extent is to the left or the right of that extent. Now we
333 : * have a left, current, and right extent. If the new reference count
334 : * of the center extent enables us to merge left, center, and right
335 : * into one record covering all three, do so. If the center extent is
336 : * at the left end of the range, abuts the left extent, and its new
337 : * reference count matches the left extent's record, then merge them.
338 : * If the center extent is at the right end of the range, abuts the
339 : * right extent, and the reference counts match, merge those. In the
340 : * example, we can left merge (assuming an increment operation):
341 : *
342 : * <------ adjustment range ------>
343 : * --------+---+-----+-+--+--------+----+----
344 : * 2 | 3 | 2 |1|17| 55 | 10 | 10
345 : * --------+---+-----+-+--+--------+----+----
346 : * ^
347 : *
348 : * For all other extents within the range, adjust the reference count
349 : * or delete it if the refcount falls below 2. If we were
350 : * incrementing, the end result looks like this:
351 : *
352 : * <------ adjustment range ------>
353 : * --------+---+-----+-+--+--------+----+----
354 : * 2 | 4 | 3 |2|18| 56 | 11 | 10
355 : * --------+---+-----+-+--+--------+----+----
356 : *
357 : * The result of a decrement operation looks as such:
358 : *
359 : * <------ adjustment range ------>
360 : * ----+ +---+ +--+--------+----+----
361 : * 2 | | 2 | |16| 54 | 9 | 10
362 : * ----+ +---+ +--+--------+----+----
363 : * DDDD 111111DD
364 : *
365 : * The blocks marked "D" are freed; the blocks marked "1" are only
366 : * referenced once and therefore the record is removed from the
367 : * refcount btree.
368 : */
369 :
370 : /* Next block after this extent. */
371 : static inline xfs_agblock_t
372 : xfs_refc_next(
373 : struct xfs_refcount_irec *rc)
374 : {
375 427424924 : return rc->rc_startblock + rc->rc_blockcount;
376 : }
377 :
378 : /*
379 : * Split a refcount extent that crosses agbno.
380 : */
381 : STATIC int
382 351444966 : xfs_refcount_split_extent(
383 : struct xfs_btree_cur *cur,
384 : enum xfs_refc_domain domain,
385 : xfs_agblock_t agbno,
386 : bool *shape_changed)
387 : {
388 351444966 : struct xfs_refcount_irec rcext, tmp;
389 351444966 : int found_rec;
390 351444966 : int error;
391 :
392 351444966 : *shape_changed = false;
393 351444966 : error = xfs_refcount_lookup_le(cur, domain, agbno, &found_rec);
394 351453095 : if (error)
395 204 : goto out_error;
396 351452891 : if (!found_rec)
397 : return 0;
398 :
399 342106038 : error = xfs_refcount_get_rec(cur, &rcext, &found_rec);
400 342100566 : if (error)
401 0 : goto out_error;
402 342100566 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
403 0 : error = -EFSCORRUPTED;
404 0 : goto out_error;
405 : }
406 342100566 : if (rcext.rc_domain != domain)
407 : return 0;
408 341509478 : if (rcext.rc_startblock == agbno || xfs_refc_next(&rcext) <= agbno)
409 : return 0;
410 :
411 42895408 : *shape_changed = true;
412 42895408 : trace_xfs_refcount_split_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno,
413 : &rcext, agbno);
414 :
415 : /* Establish the right extent. */
416 42895354 : tmp = rcext;
417 42895354 : tmp.rc_startblock = agbno;
418 42895354 : tmp.rc_blockcount -= (agbno - rcext.rc_startblock);
419 42895354 : error = xfs_refcount_update(cur, &tmp);
420 42895323 : if (error)
421 0 : goto out_error;
422 :
423 : /* Insert the left extent. */
424 42895323 : tmp = rcext;
425 42895323 : tmp.rc_blockcount = agbno - rcext.rc_startblock;
426 42895323 : error = xfs_refcount_insert(cur, &tmp, &found_rec);
427 42895167 : if (error)
428 9 : goto out_error;
429 42895158 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
430 0 : error = -EFSCORRUPTED;
431 0 : goto out_error;
432 : }
433 : return error;
434 :
435 213 : out_error:
436 426 : trace_xfs_refcount_split_extent_error(cur->bc_mp,
437 213 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
438 213 : return error;
439 : }
440 :
441 : /*
442 : * Merge the left, center, and right extents.
443 : */
444 : STATIC int
445 2341154 : xfs_refcount_merge_center_extents(
446 : struct xfs_btree_cur *cur,
447 : struct xfs_refcount_irec *left,
448 : struct xfs_refcount_irec *center,
449 : struct xfs_refcount_irec *right,
450 : unsigned long long extlen,
451 : xfs_extlen_t *aglen)
452 : {
453 2341154 : int error;
454 2341154 : int found_rec;
455 :
456 2341154 : trace_xfs_refcount_merge_center_extents(cur->bc_mp,
457 2341154 : cur->bc_ag.pag->pag_agno, left, center, right);
458 :
459 2341154 : ASSERT(left->rc_domain == center->rc_domain);
460 2341154 : ASSERT(right->rc_domain == center->rc_domain);
461 :
462 : /*
463 : * Make sure the center and right extents are not in the btree.
464 : * If the center extent was synthesized, the first delete call
465 : * removes the right extent and we skip the second deletion.
466 : * If center and right were in the btree, then the first delete
467 : * call removes the center and the second one removes the right
468 : * extent.
469 : */
470 2341154 : error = xfs_refcount_lookup_ge(cur, center->rc_domain,
471 : center->rc_startblock, &found_rec);
472 2341154 : if (error)
473 0 : goto out_error;
474 2341154 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
475 0 : error = -EFSCORRUPTED;
476 0 : goto out_error;
477 : }
478 :
479 2341154 : error = xfs_refcount_delete(cur, &found_rec);
480 2341154 : if (error)
481 0 : goto out_error;
482 2341154 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
483 0 : error = -EFSCORRUPTED;
484 0 : goto out_error;
485 : }
486 :
487 2341154 : if (center->rc_refcount > 1) {
488 654375 : error = xfs_refcount_delete(cur, &found_rec);
489 654375 : if (error)
490 0 : goto out_error;
491 654375 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
492 0 : error = -EFSCORRUPTED;
493 0 : goto out_error;
494 : }
495 : }
496 :
497 : /* Enlarge the left extent. */
498 2341154 : error = xfs_refcount_lookup_le(cur, left->rc_domain,
499 : left->rc_startblock, &found_rec);
500 2341154 : if (error)
501 0 : goto out_error;
502 2341154 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
503 0 : error = -EFSCORRUPTED;
504 0 : goto out_error;
505 : }
506 :
507 2341154 : left->rc_blockcount = extlen;
508 2341154 : error = xfs_refcount_update(cur, left);
509 2341153 : if (error)
510 0 : goto out_error;
511 :
512 2341153 : *aglen = 0;
513 2341153 : return error;
514 :
515 0 : out_error:
516 0 : trace_xfs_refcount_merge_center_extents_error(cur->bc_mp,
517 0 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
518 0 : return error;
519 : }
520 :
521 : /*
522 : * Merge with the left extent.
523 : */
524 : STATIC int
525 8051401 : xfs_refcount_merge_left_extent(
526 : struct xfs_btree_cur *cur,
527 : struct xfs_refcount_irec *left,
528 : struct xfs_refcount_irec *cleft,
529 : xfs_agblock_t *agbno,
530 : xfs_extlen_t *aglen)
531 : {
532 8051401 : int error;
533 8051401 : int found_rec;
534 :
535 8051401 : trace_xfs_refcount_merge_left_extent(cur->bc_mp,
536 8051401 : cur->bc_ag.pag->pag_agno, left, cleft);
537 :
538 8051395 : ASSERT(left->rc_domain == cleft->rc_domain);
539 :
540 : /* If the extent at agbno (cleft) wasn't synthesized, remove it. */
541 8051395 : if (cleft->rc_refcount > 1) {
542 2412008 : error = xfs_refcount_lookup_le(cur, cleft->rc_domain,
543 : cleft->rc_startblock, &found_rec);
544 2412009 : if (error)
545 0 : goto out_error;
546 2412009 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
547 0 : error = -EFSCORRUPTED;
548 0 : goto out_error;
549 : }
550 :
551 2412009 : error = xfs_refcount_delete(cur, &found_rec);
552 2412009 : if (error)
553 0 : goto out_error;
554 2412009 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
555 0 : error = -EFSCORRUPTED;
556 0 : goto out_error;
557 : }
558 : }
559 :
560 : /* Enlarge the left extent. */
561 8051396 : error = xfs_refcount_lookup_le(cur, left->rc_domain,
562 : left->rc_startblock, &found_rec);
563 8051415 : if (error)
564 0 : goto out_error;
565 8051415 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
566 0 : error = -EFSCORRUPTED;
567 0 : goto out_error;
568 : }
569 :
570 8051415 : left->rc_blockcount += cleft->rc_blockcount;
571 8051415 : error = xfs_refcount_update(cur, left);
572 8051414 : if (error)
573 0 : goto out_error;
574 :
575 8051414 : *agbno += cleft->rc_blockcount;
576 8051414 : *aglen -= cleft->rc_blockcount;
577 8051414 : return error;
578 :
579 0 : out_error:
580 0 : trace_xfs_refcount_merge_left_extent_error(cur->bc_mp,
581 0 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
582 0 : return error;
583 : }
584 :
585 : /*
586 : * Merge with the right extent.
587 : */
588 : STATIC int
589 3839698 : xfs_refcount_merge_right_extent(
590 : struct xfs_btree_cur *cur,
591 : struct xfs_refcount_irec *right,
592 : struct xfs_refcount_irec *cright,
593 : xfs_extlen_t *aglen)
594 : {
595 3839698 : int error;
596 3839698 : int found_rec;
597 :
598 3839698 : trace_xfs_refcount_merge_right_extent(cur->bc_mp,
599 3839698 : cur->bc_ag.pag->pag_agno, cright, right);
600 :
601 3839698 : ASSERT(right->rc_domain == cright->rc_domain);
602 :
603 : /*
604 : * If the extent ending at agbno+aglen (cright) wasn't synthesized,
605 : * remove it.
606 : */
607 3839698 : if (cright->rc_refcount > 1) {
608 1854521 : error = xfs_refcount_lookup_le(cur, cright->rc_domain,
609 : cright->rc_startblock, &found_rec);
610 1854521 : if (error)
611 0 : goto out_error;
612 1854521 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
613 0 : error = -EFSCORRUPTED;
614 0 : goto out_error;
615 : }
616 :
617 1854521 : error = xfs_refcount_delete(cur, &found_rec);
618 1854521 : if (error)
619 0 : goto out_error;
620 1854521 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
621 0 : error = -EFSCORRUPTED;
622 0 : goto out_error;
623 : }
624 : }
625 :
626 : /* Enlarge the right extent. */
627 3839698 : error = xfs_refcount_lookup_le(cur, right->rc_domain,
628 : right->rc_startblock, &found_rec);
629 3839698 : if (error)
630 0 : goto out_error;
631 3839698 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
632 0 : error = -EFSCORRUPTED;
633 0 : goto out_error;
634 : }
635 :
636 3839698 : right->rc_startblock -= cright->rc_blockcount;
637 3839698 : right->rc_blockcount += cright->rc_blockcount;
638 3839698 : error = xfs_refcount_update(cur, right);
639 3839698 : if (error)
640 0 : goto out_error;
641 :
642 3839698 : *aglen -= cright->rc_blockcount;
643 3839698 : return error;
644 :
645 0 : out_error:
646 0 : trace_xfs_refcount_merge_right_extent_error(cur->bc_mp,
647 0 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
648 0 : return error;
649 : }
650 :
651 : /*
652 : * Find the left extent and the one after it (cleft). This function assumes
653 : * that we've already split any extent crossing agbno.
654 : */
655 : STATIC int
656 175726035 : xfs_refcount_find_left_extents(
657 : struct xfs_btree_cur *cur,
658 : struct xfs_refcount_irec *left,
659 : struct xfs_refcount_irec *cleft,
660 : enum xfs_refc_domain domain,
661 : xfs_agblock_t agbno,
662 : xfs_extlen_t aglen)
663 : {
664 175726035 : struct xfs_refcount_irec tmp;
665 175726035 : int error;
666 175726035 : int found_rec;
667 :
668 175726035 : left->rc_startblock = cleft->rc_startblock = NULLAGBLOCK;
669 175726035 : error = xfs_refcount_lookup_le(cur, domain, agbno - 1, &found_rec);
670 175727365 : if (error)
671 0 : goto out_error;
672 175727365 : if (!found_rec)
673 : return 0;
674 :
675 149589008 : error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
676 149588917 : if (error)
677 0 : goto out_error;
678 149588917 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
679 0 : error = -EFSCORRUPTED;
680 0 : goto out_error;
681 : }
682 :
683 149588917 : if (tmp.rc_domain != domain)
684 : return 0;
685 147939007 : if (xfs_refc_next(&tmp) != agbno)
686 : return 0;
687 : /* We have a left extent; retrieve (or invent) the next right one */
688 34499762 : *left = tmp;
689 :
690 34499762 : error = xfs_btree_increment(cur, 0, &found_rec);
691 34499726 : if (error)
692 0 : goto out_error;
693 34499726 : if (found_rec) {
694 33209258 : error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
695 33209278 : if (error)
696 0 : goto out_error;
697 33209278 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
698 0 : error = -EFSCORRUPTED;
699 0 : goto out_error;
700 : }
701 :
702 33209278 : if (tmp.rc_domain != domain)
703 195352 : goto not_found;
704 :
705 : /* if tmp starts at the end of our range, just use that */
706 33013926 : if (tmp.rc_startblock == agbno)
707 26832752 : *cleft = tmp;
708 : else {
709 : /*
710 : * There's a gap in the refcntbt at the start of the
711 : * range we're interested in (refcount == 1) so
712 : * synthesize the implied extent and pass it back.
713 : * We assume here that the agbno/aglen range was
714 : * passed in from a data fork extent mapping and
715 : * therefore is allocated to exactly one owner.
716 : */
717 6181174 : cleft->rc_startblock = agbno;
718 6181174 : cleft->rc_blockcount = min(aglen,
719 : tmp.rc_startblock - agbno);
720 6181174 : cleft->rc_refcount = 1;
721 6181174 : cleft->rc_domain = domain;
722 : }
723 : } else {
724 1290468 : not_found:
725 : /*
726 : * No extents, so pretend that there's one covering the whole
727 : * range.
728 : */
729 1485820 : cleft->rc_startblock = agbno;
730 1485820 : cleft->rc_blockcount = aglen;
731 1485820 : cleft->rc_refcount = 1;
732 1485820 : cleft->rc_domain = domain;
733 : }
734 34499746 : trace_xfs_refcount_find_left_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno,
735 : left, cleft, agbno);
736 34499746 : return error;
737 :
738 0 : out_error:
739 0 : trace_xfs_refcount_find_left_extent_error(cur->bc_mp,
740 0 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
741 0 : return error;
742 : }
743 :
744 : /*
745 : * Find the right extent and the one before it (cright). This function
746 : * assumes that we've already split any extents crossing agbno + aglen.
747 : */
748 : STATIC int
749 175726365 : xfs_refcount_find_right_extents(
750 : struct xfs_btree_cur *cur,
751 : struct xfs_refcount_irec *right,
752 : struct xfs_refcount_irec *cright,
753 : enum xfs_refc_domain domain,
754 : xfs_agblock_t agbno,
755 : xfs_extlen_t aglen)
756 : {
757 175726365 : struct xfs_refcount_irec tmp;
758 175726365 : int error;
759 175726365 : int found_rec;
760 :
761 175726365 : right->rc_startblock = cright->rc_startblock = NULLAGBLOCK;
762 175726365 : error = xfs_refcount_lookup_ge(cur, domain, agbno + aglen, &found_rec);
763 175727133 : if (error)
764 0 : goto out_error;
765 175727133 : if (!found_rec)
766 : return 0;
767 :
768 141765516 : error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
769 141765582 : if (error)
770 10 : goto out_error;
771 141765572 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
772 0 : error = -EFSCORRUPTED;
773 0 : goto out_error;
774 : }
775 :
776 141765572 : if (tmp.rc_domain != domain)
777 : return 0;
778 133557374 : if (tmp.rc_startblock != agbno + aglen)
779 : return 0;
780 : /* We have a right extent; retrieve (or invent) the next left one */
781 32025314 : *right = tmp;
782 :
783 32025314 : error = xfs_btree_decrement(cur, 0, &found_rec);
784 32025283 : if (error)
785 0 : goto out_error;
786 32025283 : if (found_rec) {
787 31993045 : error = xfs_refcount_get_rec(cur, &tmp, &found_rec);
788 31993036 : if (error)
789 0 : goto out_error;
790 31993036 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
791 0 : error = -EFSCORRUPTED;
792 0 : goto out_error;
793 : }
794 :
795 31993036 : if (tmp.rc_domain != domain)
796 1079 : goto not_found;
797 :
798 : /* if tmp ends at the end of our range, just use that */
799 31991957 : if (xfs_refc_next(&tmp) == agbno + aglen)
800 28114579 : *cright = tmp;
801 : else {
802 : /*
803 : * There's a gap in the refcntbt at the end of the
804 : * range we're interested in (refcount == 1) so
805 : * create the implied extent and pass it back.
806 : * We assume here that the agbno/aglen range was
807 : * passed in from a data fork extent mapping and
808 : * therefore is allocated to exactly one owner.
809 : */
810 3877378 : cright->rc_startblock = max(agbno, xfs_refc_next(&tmp));
811 3877378 : cright->rc_blockcount = right->rc_startblock -
812 : cright->rc_startblock;
813 3877378 : cright->rc_refcount = 1;
814 3877378 : cright->rc_domain = domain;
815 : }
816 : } else {
817 32238 : not_found:
818 : /*
819 : * No extents, so pretend that there's one covering the whole
820 : * range.
821 : */
822 33317 : cright->rc_startblock = agbno;
823 33317 : cright->rc_blockcount = aglen;
824 33317 : cright->rc_refcount = 1;
825 33317 : cright->rc_domain = domain;
826 : }
827 32025274 : trace_xfs_refcount_find_right_extent(cur->bc_mp, cur->bc_ag.pag->pag_agno,
828 : cright, right, agbno + aglen);
829 32025274 : return error;
830 :
831 10 : out_error:
832 20 : trace_xfs_refcount_find_right_extent_error(cur->bc_mp,
833 10 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
834 10 : return error;
835 : }
836 :
837 : /* Is this extent valid? */
838 : static inline bool
839 : xfs_refc_valid(
840 : const struct xfs_refcount_irec *rc)
841 : {
842 587544044 : return rc->rc_startblock != NULLAGBLOCK;
843 : }
844 :
845 : static inline xfs_nlink_t
846 : xfs_refc_merge_refcount(
847 : const struct xfs_refcount_irec *irec,
848 : enum xfs_refc_adjust_op adjust)
849 : {
850 : /* Once a record hits MAXREFCOUNT, it is pinned there forever */
851 78545097 : if (irec->rc_refcount == MAXREFCOUNT)
852 : return MAXREFCOUNT;
853 78545067 : return irec->rc_refcount + adjust;
854 : }
855 :
856 : static inline bool
857 47926622 : xfs_refc_want_merge_center(
858 : const struct xfs_refcount_irec *left,
859 : const struct xfs_refcount_irec *cleft,
860 : const struct xfs_refcount_irec *cright,
861 : const struct xfs_refcount_irec *right,
862 : bool cleft_is_cright,
863 : enum xfs_refc_adjust_op adjust,
864 : unsigned long long *ulenp)
865 : {
866 47926622 : unsigned long long ulen = left->rc_blockcount;
867 47926622 : xfs_nlink_t new_refcount;
868 :
869 : /*
870 : * To merge with a center record, both shoulder records must be
871 : * adjacent to the record we want to adjust. This is only true if
872 : * find_left and find_right made all four records valid.
873 : */
874 47926622 : if (!xfs_refc_valid(left) || !xfs_refc_valid(right) ||
875 18598204 : !xfs_refc_valid(cleft) || !xfs_refc_valid(cright))
876 : return false;
877 :
878 : /* There must only be one record for the entire range. */
879 18598204 : if (!cleft_is_cright)
880 : return false;
881 :
882 : /* The shoulder record refcounts must match the new refcount. */
883 17725443 : new_refcount = xfs_refc_merge_refcount(cleft, adjust);
884 17725443 : if (left->rc_refcount != new_refcount)
885 : return false;
886 3363935 : if (right->rc_refcount != new_refcount)
887 : return false;
888 :
889 : /*
890 : * The new record cannot exceed the max length. ulen is a ULL as the
891 : * individual record block counts can be up to (u32 - 1) in length
892 : * hence we need to catch u32 addition overflows here.
893 : */
894 2341154 : ulen += cleft->rc_blockcount + right->rc_blockcount;
895 2341154 : if (ulen >= MAXREFCEXTLEN)
896 : return false;
897 :
898 2341154 : *ulenp = ulen;
899 2341154 : return true;
900 : }
901 :
902 : static inline bool
903 45585383 : xfs_refc_want_merge_left(
904 : const struct xfs_refcount_irec *left,
905 : const struct xfs_refcount_irec *cleft,
906 : enum xfs_refc_adjust_op adjust)
907 : {
908 45585383 : unsigned long long ulen = left->rc_blockcount;
909 45585383 : xfs_nlink_t new_refcount;
910 :
911 : /*
912 : * For a left merge, the left shoulder record must be adjacent to the
913 : * start of the range. If this is true, find_left made left and cleft
914 : * contain valid contents.
915 : */
916 45585383 : if (!xfs_refc_valid(left) || !xfs_refc_valid(cleft))
917 : return false;
918 :
919 : /* Left shoulder record refcount must match the new refcount. */
920 32158452 : new_refcount = xfs_refc_merge_refcount(cleft, adjust);
921 32158452 : if (left->rc_refcount != new_refcount)
922 : return false;
923 :
924 : /*
925 : * The new record cannot exceed the max length. ulen is a ULL as the
926 : * individual record block counts can be up to (u32 - 1) in length
927 : * hence we need to catch u32 addition overflows here.
928 : */
929 8051413 : ulen += cleft->rc_blockcount;
930 8051413 : if (ulen >= MAXREFCEXTLEN)
931 0 : return false;
932 :
933 : return true;
934 : }
935 :
936 : static inline bool
937 44562631 : xfs_refc_want_merge_right(
938 : const struct xfs_refcount_irec *cright,
939 : const struct xfs_refcount_irec *right,
940 : enum xfs_refc_adjust_op adjust)
941 : {
942 44562631 : unsigned long long ulen = right->rc_blockcount;
943 44562631 : xfs_nlink_t new_refcount;
944 :
945 : /*
946 : * For a right merge, the right shoulder record must be adjacent to the
947 : * end of the range. If this is true, find_right made cright and right
948 : * contain valid contents.
949 : */
950 44562631 : if (!xfs_refc_valid(right) || !xfs_refc_valid(cright))
951 : return false;
952 :
953 : /* Right shoulder record refcount must match the new refcount. */
954 28661202 : new_refcount = xfs_refc_merge_refcount(cright, adjust);
955 28661202 : if (right->rc_refcount != new_refcount)
956 : return false;
957 :
958 : /*
959 : * The new record cannot exceed the max length. ulen is a ULL as the
960 : * individual record block counts can be up to (u32 - 1) in length
961 : * hence we need to catch u32 addition overflows here.
962 : */
963 3839698 : ulen += cright->rc_blockcount;
964 3839698 : if (ulen >= MAXREFCEXTLEN)
965 0 : return false;
966 :
967 : return true;
968 : }
969 :
970 : /*
971 : * Try to merge with any extents on the boundaries of the adjustment range.
972 : */
973 : STATIC int
974 175726114 : xfs_refcount_merge_extents(
975 : struct xfs_btree_cur *cur,
976 : enum xfs_refc_domain domain,
977 : xfs_agblock_t *agbno,
978 : xfs_extlen_t *aglen,
979 : enum xfs_refc_adjust_op adjust,
980 : bool *shape_changed)
981 : {
982 175726114 : struct xfs_refcount_irec left = {0}, cleft = {0};
983 175726114 : struct xfs_refcount_irec cright = {0}, right = {0};
984 175726114 : int error;
985 175726114 : unsigned long long ulen;
986 175726114 : bool cequal;
987 :
988 175726114 : *shape_changed = false;
989 : /*
990 : * Find the extent just below agbno [left], just above agbno [cleft],
991 : * just below (agbno + aglen) [cright], and just above (agbno + aglen)
992 : * [right].
993 : */
994 175726114 : error = xfs_refcount_find_left_extents(cur, &left, &cleft, domain,
995 : *agbno, *aglen);
996 175726453 : if (error)
997 : return error;
998 175726514 : error = xfs_refcount_find_right_extents(cur, &right, &cright, domain,
999 : *agbno, *aglen);
1000 175726611 : if (error)
1001 : return error;
1002 :
1003 : /* No left or right extent to merge; exit. */
1004 175726601 : if (!xfs_refc_valid(&left) && !xfs_refc_valid(&right))
1005 : return 0;
1006 :
1007 47926642 : cequal = (cleft.rc_startblock == cright.rc_startblock) &&
1008 17725444 : (cleft.rc_blockcount == cright.rc_blockcount);
1009 :
1010 : /* Try to merge left, cleft, and right. cleft must == cright. */
1011 47926642 : if (xfs_refc_want_merge_center(&left, &cleft, &cright, &right, cequal,
1012 : adjust, &ulen)) {
1013 2341154 : *shape_changed = true;
1014 2341154 : return xfs_refcount_merge_center_extents(cur, &left, &cleft,
1015 : &right, ulen, aglen);
1016 : }
1017 :
1018 : /* Try to merge left and cleft. */
1019 45585312 : if (xfs_refc_want_merge_left(&left, &cleft, adjust)) {
1020 8051406 : *shape_changed = true;
1021 8051406 : error = xfs_refcount_merge_left_extent(cur, &left, &cleft,
1022 : agbno, aglen);
1023 8051416 : if (error)
1024 : return error;
1025 :
1026 : /*
1027 : * If we just merged left + cleft and cleft == cright,
1028 : * we no longer have a cright to merge with right. We're done.
1029 : */
1030 8051416 : if (cequal)
1031 : return 0;
1032 : }
1033 :
1034 : /* Try to merge cright and right. */
1035 44562541 : if (xfs_refc_want_merge_right(&cright, &right, adjust)) {
1036 3839698 : *shape_changed = true;
1037 3839698 : return xfs_refcount_merge_right_extent(cur, &right, &cright,
1038 : aglen);
1039 : }
1040 :
1041 : return 0;
1042 : }
1043 :
1044 : /*
1045 : * XXX: This is a pretty hand-wavy estimate. The penalty for guessing
1046 : * true incorrectly is a shutdown FS; the penalty for guessing false
1047 : * incorrectly is more transaction rolls than might be necessary.
1048 : * Be conservative here.
1049 : */
1050 : static bool
1051 183442264 : xfs_refcount_still_have_space(
1052 : struct xfs_btree_cur *cur)
1053 : {
1054 183442264 : unsigned long overhead;
1055 :
1056 : /*
1057 : * Worst case estimate: full splits of the free space and rmap btrees
1058 : * to handle each of the shape changes to the refcount btree.
1059 : */
1060 183442264 : overhead = xfs_allocfree_block_count(cur->bc_mp,
1061 : cur->bc_ag.refc.shape_changes);
1062 183440357 : overhead += cur->bc_mp->m_refc_maxlevels;
1063 183440357 : overhead *= cur->bc_mp->m_sb.sb_blocksize;
1064 :
1065 : /*
1066 : * Only allow 2 refcount extent updates per transaction if the
1067 : * refcount continue update "error" has been injected.
1068 : */
1069 211479214 : if (cur->bc_ag.refc.nr_ops > 2 &&
1070 28038859 : XFS_TEST_ERROR(false, cur->bc_mp,
1071 : XFS_ERRTAG_REFCOUNT_CONTINUE_UPDATE))
1072 : return false;
1073 :
1074 183440103 : if (cur->bc_ag.refc.nr_ops == 0)
1075 : return true;
1076 38429344 : else if (overhead > cur->bc_tp->t_log_res)
1077 : return false;
1078 38429344 : return cur->bc_tp->t_log_res - overhead >
1079 38429344 : cur->bc_ag.refc.nr_ops * XFS_REFCOUNT_ITEM_OVERHEAD;
1080 : }
1081 :
1082 : /*
1083 : * Adjust the refcounts of middle extents. At this point we should have
1084 : * split extents that crossed the adjustment range; merged with adjacent
1085 : * extents; and updated agbno/aglen to reflect the merges. Therefore,
1086 : * all we have to do is update the extents inside [agbno, agbno + aglen].
1087 : */
1088 : STATIC int
1089 158489051 : xfs_refcount_adjust_extents(
1090 : struct xfs_btree_cur *cur,
1091 : xfs_agblock_t *agbno,
1092 : xfs_extlen_t *aglen,
1093 : enum xfs_refc_adjust_op adj)
1094 : {
1095 158489051 : struct xfs_refcount_irec ext, tmp;
1096 158489051 : int error;
1097 158489051 : int found_rec, found_tmp;
1098 158489051 : xfs_fsblock_t fsbno;
1099 :
1100 : /* Merging did all the work already. */
1101 158489051 : if (*aglen == 0)
1102 : return 0;
1103 :
1104 145967351 : error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_SHARED, *agbno,
1105 : &found_rec);
1106 145968412 : if (error)
1107 0 : goto out_error;
1108 :
1109 263084537 : while (*aglen > 0 && xfs_refcount_still_have_space(cur)) {
1110 176709907 : error = xfs_refcount_get_rec(cur, &ext, &found_rec);
1111 176711336 : if (error)
1112 0 : goto out_error;
1113 176711336 : if (!found_rec || ext.rc_domain != XFS_REFC_DOMAIN_SHARED) {
1114 16791582 : ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks;
1115 16791582 : ext.rc_blockcount = 0;
1116 16791582 : ext.rc_refcount = 0;
1117 16791582 : ext.rc_domain = XFS_REFC_DOMAIN_SHARED;
1118 : }
1119 :
1120 : /*
1121 : * Deal with a hole in the refcount tree; if a file maps to
1122 : * these blocks and there's no refcountbt record, pretend that
1123 : * there is one with refcount == 1.
1124 : */
1125 176711336 : if (ext.rc_startblock != *agbno) {
1126 66324653 : tmp.rc_startblock = *agbno;
1127 66324653 : tmp.rc_blockcount = min(*aglen,
1128 : ext.rc_startblock - *agbno);
1129 66324653 : tmp.rc_refcount = 1 + adj;
1130 66324653 : tmp.rc_domain = XFS_REFC_DOMAIN_SHARED;
1131 :
1132 66324653 : trace_xfs_refcount_modify_extent(cur->bc_mp,
1133 66324653 : cur->bc_ag.pag->pag_agno, &tmp);
1134 :
1135 : /*
1136 : * Either cover the hole (increment) or
1137 : * delete the range (decrement).
1138 : */
1139 66323971 : cur->bc_ag.refc.nr_ops++;
1140 66323971 : if (tmp.rc_refcount) {
1141 28264991 : error = xfs_refcount_insert(cur, &tmp,
1142 : &found_tmp);
1143 28264991 : if (error)
1144 95 : goto out_error;
1145 28264896 : if (XFS_IS_CORRUPT(cur->bc_mp,
1146 : found_tmp != 1)) {
1147 0 : error = -EFSCORRUPTED;
1148 0 : goto out_error;
1149 : }
1150 : } else {
1151 38058980 : fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
1152 : cur->bc_ag.pag->pag_agno,
1153 : tmp.rc_startblock);
1154 38058980 : error = xfs_free_extent_later(cur->bc_tp, fsbno,
1155 38058980 : tmp.rc_blockcount, NULL,
1156 : XFS_AG_RESV_NONE);
1157 38058693 : if (error)
1158 0 : goto out_error;
1159 : }
1160 :
1161 66323589 : (*agbno) += tmp.rc_blockcount;
1162 66323589 : (*aglen) -= tmp.rc_blockcount;
1163 :
1164 : /* Stop if there's nothing left to modify */
1165 66323589 : if (*aglen == 0 || !xfs_refcount_still_have_space(cur))
1166 : break;
1167 :
1168 : /* Move the cursor to the start of ext. */
1169 6729427 : error = xfs_refcount_lookup_ge(cur,
1170 : XFS_REFC_DOMAIN_SHARED, *agbno,
1171 : &found_rec);
1172 6729427 : if (error)
1173 0 : goto out_error;
1174 : }
1175 :
1176 : /*
1177 : * A previous step trimmed agbno/aglen such that the end of the
1178 : * range would not be in the middle of the record. If this is
1179 : * no longer the case, something is seriously wrong with the
1180 : * btree. Make sure we never feed the synthesized record into
1181 : * the processing loop below.
1182 : */
1183 117116110 : if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount == 0) ||
1184 117116110 : XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount > *aglen)) {
1185 0 : error = -EFSCORRUPTED;
1186 0 : goto out_error;
1187 : }
1188 :
1189 : /*
1190 : * Adjust the reference count and either update the tree
1191 : * (incr) or free the blocks (decr).
1192 : */
1193 117116110 : if (ext.rc_refcount == MAXREFCOUNT)
1194 48 : goto skip;
1195 117116062 : ext.rc_refcount += adj;
1196 117116062 : trace_xfs_refcount_modify_extent(cur->bc_mp,
1197 117116062 : cur->bc_ag.pag->pag_agno, &ext);
1198 117116057 : cur->bc_ag.refc.nr_ops++;
1199 117116057 : if (ext.rc_refcount > 1) {
1200 72453131 : error = xfs_refcount_update(cur, &ext);
1201 72453074 : if (error)
1202 0 : goto out_error;
1203 44662926 : } else if (ext.rc_refcount == 1) {
1204 44662926 : error = xfs_refcount_delete(cur, &found_rec);
1205 44662927 : if (error)
1206 0 : goto out_error;
1207 44662927 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
1208 0 : error = -EFSCORRUPTED;
1209 0 : goto out_error;
1210 : }
1211 44662927 : goto advloop;
1212 : } else {
1213 0 : fsbno = XFS_AGB_TO_FSB(cur->bc_mp,
1214 : cur->bc_ag.pag->pag_agno,
1215 : ext.rc_startblock);
1216 0 : error = xfs_free_extent_later(cur->bc_tp, fsbno,
1217 0 : ext.rc_blockcount, NULL,
1218 : XFS_AG_RESV_NONE);
1219 0 : if (error)
1220 0 : goto out_error;
1221 : }
1222 :
1223 0 : skip:
1224 72453122 : error = xfs_btree_increment(cur, 0, &found_rec);
1225 72453198 : if (error)
1226 0 : goto out_error;
1227 :
1228 72453198 : advloop:
1229 117116125 : (*agbno) += ext.rc_blockcount;
1230 117116125 : (*aglen) -= ext.rc_blockcount;
1231 : }
1232 :
1233 : return error;
1234 95 : out_error:
1235 190 : trace_xfs_refcount_modify_extent_error(cur->bc_mp,
1236 95 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
1237 95 : return error;
1238 : }
1239 :
1240 : /* Adjust the reference count of a range of AG blocks. */
1241 : STATIC int
1242 158489857 : xfs_refcount_adjust(
1243 : struct xfs_btree_cur *cur,
1244 : xfs_agblock_t *agbno,
1245 : xfs_extlen_t *aglen,
1246 : enum xfs_refc_adjust_op adj)
1247 : {
1248 158489857 : bool shape_changed;
1249 158489857 : int shape_changes = 0;
1250 158489857 : int error;
1251 :
1252 158489857 : if (adj == XFS_REFCOUNT_ADJUST_INCREASE)
1253 74530425 : trace_xfs_refcount_increase(cur->bc_mp,
1254 74530425 : cur->bc_ag.pag->pag_agno, *agbno, *aglen);
1255 : else
1256 83959432 : trace_xfs_refcount_decrease(cur->bc_mp,
1257 83959432 : cur->bc_ag.pag->pag_agno, *agbno, *aglen);
1258 :
1259 : /*
1260 : * Ensure that no rcextents cross the boundary of the adjustment range.
1261 : */
1262 158489267 : error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED,
1263 : *agbno, &shape_changed);
1264 158489480 : if (error)
1265 205 : goto out_error;
1266 158489275 : if (shape_changed)
1267 15467242 : shape_changes++;
1268 :
1269 158489275 : error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_SHARED,
1270 158489275 : *agbno + *aglen, &shape_changed);
1271 158489947 : if (error)
1272 8 : goto out_error;
1273 158489939 : if (shape_changed)
1274 15417877 : shape_changes++;
1275 :
1276 : /*
1277 : * Try to merge with the left or right extents of the range.
1278 : */
1279 158489939 : error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_SHARED,
1280 : agbno, aglen, adj, &shape_changed);
1281 158489237 : if (error)
1282 10 : goto out_error;
1283 158489227 : if (shape_changed)
1284 12770570 : shape_changes++;
1285 158489227 : if (shape_changes)
1286 32604169 : cur->bc_ag.refc.shape_changes++;
1287 :
1288 : /* Now that we've taken care of the ends, adjust the middle extents */
1289 158489227 : error = xfs_refcount_adjust_extents(cur, agbno, aglen, adj);
1290 158487695 : if (error)
1291 95 : goto out_error;
1292 :
1293 : return 0;
1294 :
1295 318 : out_error:
1296 636 : trace_xfs_refcount_adjust_error(cur->bc_mp, cur->bc_ag.pag->pag_agno,
1297 318 : error, _RET_IP_);
1298 318 : return error;
1299 : }
1300 :
1301 : /* Clean up after calling xfs_refcount_finish_one. */
1302 : void
1303 174015980 : xfs_refcount_finish_one_cleanup(
1304 : struct xfs_trans *tp,
1305 : struct xfs_btree_cur *rcur,
1306 : int error)
1307 : {
1308 174413544 : struct xfs_buf *agbp;
1309 :
1310 174015980 : if (rcur == NULL)
1311 : return;
1312 174413283 : agbp = rcur->bc_ag.agbp;
1313 174015719 : xfs_btree_del_cursor(rcur, error);
1314 174018378 : if (error)
1315 1723 : xfs_trans_brelse(tp, agbp);
1316 : }
1317 :
1318 : /*
1319 : * Set up a continuation a deferred refcount operation by updating the intent.
1320 : * Checks to make sure we're not going to run off the end of the AG.
1321 : */
1322 : static inline int
1323 1404 : xfs_refcount_continue_op(
1324 : struct xfs_btree_cur *cur,
1325 : struct xfs_refcount_intent *ri,
1326 : xfs_agblock_t new_agbno)
1327 : {
1328 1404 : struct xfs_mount *mp = cur->bc_mp;
1329 1404 : struct xfs_perag *pag = cur->bc_ag.pag;
1330 :
1331 1404 : if (XFS_IS_CORRUPT(mp, !xfs_verify_agbext(pag, new_agbno,
1332 : ri->ri_blockcount)))
1333 0 : return -EFSCORRUPTED;
1334 :
1335 1404 : ri->ri_startblock = XFS_AGB_TO_FSB(mp, pag->pag_agno, new_agbno);
1336 :
1337 1404 : ASSERT(xfs_verify_fsbext(mp, ri->ri_startblock, ri->ri_blockcount));
1338 1404 : ASSERT(pag->pag_agno == XFS_FSB_TO_AGNO(mp, ri->ri_startblock));
1339 :
1340 : return 0;
1341 : }
1342 :
1343 : /*
1344 : * Process one of the deferred refcount operations. We pass back the
1345 : * btree cursor to maintain our lock on the btree between calls.
1346 : * This saves time and eliminates a buffer deadlock between the
1347 : * superblock and the AGF because we'll always grab them in the same
1348 : * order.
1349 : */
1350 : int
1351 175724470 : xfs_refcount_finish_one(
1352 : struct xfs_trans *tp,
1353 : struct xfs_refcount_intent *ri,
1354 : struct xfs_btree_cur **pcur)
1355 : {
1356 175724470 : struct xfs_mount *mp = tp->t_mountp;
1357 175724470 : struct xfs_btree_cur *rcur;
1358 175724470 : struct xfs_buf *agbp = NULL;
1359 175724470 : int error = 0;
1360 175724470 : xfs_agblock_t bno;
1361 175724470 : unsigned long nr_ops = 0;
1362 175724470 : int shape_changes = 0;
1363 :
1364 175724470 : bno = XFS_FSB_TO_AGBNO(mp, ri->ri_startblock);
1365 :
1366 175720941 : trace_xfs_refcount_deferred(mp, XFS_FSB_TO_AGNO(mp, ri->ri_startblock),
1367 175721012 : ri->ri_type, XFS_FSB_TO_AGBNO(mp, ri->ri_startblock),
1368 : ri->ri_blockcount);
1369 :
1370 175721541 : if (XFS_TEST_ERROR(false, mp, XFS_ERRTAG_REFCOUNT_FINISH_ONE))
1371 : return -EIO;
1372 :
1373 : /*
1374 : * If we haven't gotten a cursor or the cursor AG doesn't match
1375 : * the startblock, get one now.
1376 : */
1377 175722794 : rcur = *pcur;
1378 175722794 : if (rcur != NULL && rcur->bc_ag.pag != ri->ri_pag) {
1379 397564 : nr_ops = rcur->bc_ag.refc.nr_ops;
1380 397564 : shape_changes = rcur->bc_ag.refc.shape_changes;
1381 397564 : xfs_refcount_finish_one_cleanup(tp, rcur, 0);
1382 397565 : rcur = NULL;
1383 397565 : *pcur = NULL;
1384 : }
1385 175722795 : if (rcur == NULL) {
1386 174411162 : error = xfs_alloc_read_agf(ri->ri_pag, tp,
1387 : XFS_ALLOC_FLAG_FREEING, &agbp);
1388 174415244 : if (error)
1389 : return error;
1390 :
1391 174414996 : rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, ri->ri_pag);
1392 174415004 : rcur->bc_ag.refc.nr_ops = nr_ops;
1393 174415004 : rcur->bc_ag.refc.shape_changes = shape_changes;
1394 : }
1395 175726637 : *pcur = rcur;
1396 :
1397 175726637 : switch (ri->ri_type) {
1398 74530434 : case XFS_REFCOUNT_INCREASE:
1399 74530434 : error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount,
1400 : XFS_REFCOUNT_ADJUST_INCREASE);
1401 74530413 : if (error)
1402 : return error;
1403 74530273 : if (ri->ri_blockcount > 0)
1404 126 : error = xfs_refcount_continue_op(rcur, ri, bno);
1405 : break;
1406 83959696 : case XFS_REFCOUNT_DECREASE:
1407 83959696 : error = xfs_refcount_adjust(rcur, &bno, &ri->ri_blockcount,
1408 : XFS_REFCOUNT_ADJUST_DECREASE);
1409 83957764 : if (error)
1410 : return error;
1411 83957586 : if (ri->ri_blockcount > 0)
1412 1278 : error = xfs_refcount_continue_op(rcur, ri, bno);
1413 : break;
1414 3400543 : case XFS_REFCOUNT_ALLOC_COW:
1415 3400543 : error = __xfs_refcount_cow_alloc(rcur, bno, ri->ri_blockcount);
1416 3400338 : if (error)
1417 : return error;
1418 3400337 : ri->ri_blockcount = 0;
1419 3400337 : break;
1420 13835964 : case XFS_REFCOUNT_FREE_COW:
1421 13835964 : error = __xfs_refcount_cow_free(rcur, bno, ri->ri_blockcount);
1422 13836702 : if (error)
1423 : return error;
1424 13836702 : ri->ri_blockcount = 0;
1425 13836702 : break;
1426 0 : default:
1427 0 : ASSERT(0);
1428 0 : return -EFSCORRUPTED;
1429 : }
1430 175724898 : if (!error && ri->ri_blockcount > 0)
1431 1404 : trace_xfs_refcount_finish_one_leftover(mp, ri->ri_pag->pag_agno,
1432 1404 : ri->ri_type, bno, ri->ri_blockcount);
1433 : return error;
1434 : }
1435 :
1436 : /*
1437 : * Record a refcount intent for later processing.
1438 : */
1439 : static void
1440 175719421 : __xfs_refcount_add(
1441 : struct xfs_trans *tp,
1442 : enum xfs_refcount_intent_type type,
1443 : xfs_fsblock_t startblock,
1444 : xfs_extlen_t blockcount)
1445 : {
1446 175719421 : struct xfs_refcount_intent *ri;
1447 :
1448 527149971 : trace_xfs_refcount_defer(tp->t_mountp,
1449 175715275 : XFS_FSB_TO_AGNO(tp->t_mountp, startblock),
1450 175719421 : type, XFS_FSB_TO_AGBNO(tp->t_mountp, startblock),
1451 : blockcount);
1452 :
1453 175715980 : ri = kmem_cache_alloc(xfs_refcount_intent_cache,
1454 : GFP_NOFS | __GFP_NOFAIL);
1455 175723116 : INIT_LIST_HEAD(&ri->ri_list);
1456 175723116 : ri->ri_type = type;
1457 175723116 : ri->ri_startblock = startblock;
1458 175723116 : ri->ri_blockcount = blockcount;
1459 :
1460 175723116 : xfs_refcount_update_get_group(tp->t_mountp, ri);
1461 175725542 : xfs_defer_add(tp, XFS_DEFER_OPS_TYPE_REFCOUNT, &ri->ri_list);
1462 175725891 : }
1463 :
1464 : /*
1465 : * Increase the reference count of the blocks backing a file's extent.
1466 : */
1467 : void
1468 74530033 : xfs_refcount_increase_extent(
1469 : struct xfs_trans *tp,
1470 : struct xfs_bmbt_irec *PREV)
1471 : {
1472 74530033 : if (!xfs_has_reflink(tp->t_mountp))
1473 : return;
1474 :
1475 74530030 : __xfs_refcount_add(tp, XFS_REFCOUNT_INCREASE, PREV->br_startblock,
1476 74530030 : PREV->br_blockcount);
1477 : }
1478 :
1479 : /*
1480 : * Decrease the reference count of the blocks backing a file's extent.
1481 : */
1482 : void
1483 83957281 : xfs_refcount_decrease_extent(
1484 : struct xfs_trans *tp,
1485 : struct xfs_bmbt_irec *PREV)
1486 : {
1487 83957281 : if (!xfs_has_reflink(tp->t_mountp))
1488 : return;
1489 :
1490 83956907 : __xfs_refcount_add(tp, XFS_REFCOUNT_DECREASE, PREV->br_startblock,
1491 83956907 : PREV->br_blockcount);
1492 : }
1493 :
1494 : /*
1495 : * Given an AG extent, find the lowest-numbered run of shared blocks
1496 : * within that range and return the range in fbno/flen. If
1497 : * find_end_of_shared is set, return the longest contiguous extent of
1498 : * shared blocks; if not, just return the first extent we find. If no
1499 : * shared blocks are found, fbno and flen will be set to NULLAGBLOCK
1500 : * and 0, respectively.
1501 : */
1502 : int
1503 991513224 : xfs_refcount_find_shared(
1504 : struct xfs_btree_cur *cur,
1505 : xfs_agblock_t agbno,
1506 : xfs_extlen_t aglen,
1507 : xfs_agblock_t *fbno,
1508 : xfs_extlen_t *flen,
1509 : bool find_end_of_shared)
1510 : {
1511 991513224 : struct xfs_refcount_irec tmp;
1512 991513224 : int i;
1513 991513224 : int have;
1514 991513224 : int error;
1515 :
1516 991513224 : trace_xfs_refcount_find_shared(cur->bc_mp, cur->bc_ag.pag->pag_agno,
1517 : agbno, aglen);
1518 :
1519 : /* By default, skip the whole range */
1520 988148156 : *fbno = NULLAGBLOCK;
1521 988148156 : *flen = 0;
1522 :
1523 : /* Try to find a refcount extent that crosses the start */
1524 988148156 : error = xfs_refcount_lookup_le(cur, XFS_REFC_DOMAIN_SHARED, agbno,
1525 : &have);
1526 991126980 : if (error)
1527 61 : goto out_error;
1528 991126919 : if (!have) {
1529 : /* No left extent, look at the next one */
1530 117709539 : error = xfs_btree_increment(cur, 0, &have);
1531 117691193 : if (error)
1532 0 : goto out_error;
1533 117691193 : if (!have)
1534 82445091 : goto done;
1535 : }
1536 908663483 : error = xfs_refcount_get_rec(cur, &tmp, &i);
1537 908468640 : if (error)
1538 0 : goto out_error;
1539 908468640 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
1540 0 : error = -EFSCORRUPTED;
1541 0 : goto out_error;
1542 : }
1543 908468640 : if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED)
1544 3385694 : goto done;
1545 :
1546 : /* If the extent ends before the start, look at the next one */
1547 905082946 : if (tmp.rc_startblock + tmp.rc_blockcount <= agbno) {
1548 702259485 : error = xfs_btree_increment(cur, 0, &have);
1549 702735070 : if (error)
1550 0 : goto out_error;
1551 702735070 : if (!have)
1552 12789590 : goto done;
1553 689945480 : error = xfs_refcount_get_rec(cur, &tmp, &i);
1554 689635757 : if (error)
1555 0 : goto out_error;
1556 689635757 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
1557 0 : error = -EFSCORRUPTED;
1558 0 : goto out_error;
1559 : }
1560 689635757 : if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED)
1561 32342383 : goto done;
1562 : }
1563 :
1564 : /* If the extent starts after the range we want, bail out */
1565 860116835 : if (tmp.rc_startblock >= agbno + aglen)
1566 686082757 : goto done;
1567 :
1568 : /* We found the start of a shared extent! */
1569 174034078 : if (tmp.rc_startblock < agbno) {
1570 1622216 : tmp.rc_blockcount -= (agbno - tmp.rc_startblock);
1571 1622216 : tmp.rc_startblock = agbno;
1572 : }
1573 :
1574 174034078 : *fbno = tmp.rc_startblock;
1575 174034078 : *flen = min(tmp.rc_blockcount, agbno + aglen - *fbno);
1576 174034078 : if (!find_end_of_shared)
1577 172205953 : goto done;
1578 :
1579 : /* Otherwise, find the end of this shared extent */
1580 2378659 : while (*fbno + *flen < agbno + aglen) {
1581 601541 : error = xfs_btree_increment(cur, 0, &have);
1582 625037 : if (error)
1583 0 : goto out_error;
1584 625037 : if (!have)
1585 : break;
1586 619300 : error = xfs_refcount_get_rec(cur, &tmp, &i);
1587 619300 : if (error)
1588 0 : goto out_error;
1589 619300 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
1590 0 : error = -EFSCORRUPTED;
1591 0 : goto out_error;
1592 : }
1593 619300 : if (tmp.rc_domain != XFS_REFC_DOMAIN_SHARED ||
1594 608965 : tmp.rc_startblock >= agbno + aglen ||
1595 570534 : tmp.rc_startblock != *fbno + *flen)
1596 : break;
1597 550534 : *flen = min(*flen + tmp.rc_blockcount, agbno + aglen - *fbno);
1598 : }
1599 :
1600 1851621 : done:
1601 991103089 : trace_xfs_refcount_find_shared_result(cur->bc_mp,
1602 991103089 : cur->bc_ag.pag->pag_agno, *fbno, *flen);
1603 :
1604 989566676 : out_error:
1605 989566676 : if (error)
1606 122 : trace_xfs_refcount_find_shared_error(cur->bc_mp,
1607 61 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
1608 989566676 : return error;
1609 : }
1610 :
1611 : /*
1612 : * Recovering CoW Blocks After a Crash
1613 : *
1614 : * Due to the way that the copy on write mechanism works, there's a window of
1615 : * opportunity in which we can lose track of allocated blocks during a crash.
1616 : * Because CoW uses delayed allocation in the in-core CoW fork, writeback
1617 : * causes blocks to be allocated and stored in the CoW fork. The blocks are
1618 : * no longer in the free space btree but are not otherwise recorded anywhere
1619 : * until the write completes and the blocks are mapped into the file. A crash
1620 : * in between allocation and remapping results in the replacement blocks being
1621 : * lost. This situation is exacerbated by the CoW extent size hint because
1622 : * allocations can hang around for long time.
1623 : *
1624 : * However, there is a place where we can record these allocations before they
1625 : * become mappings -- the reference count btree. The btree does not record
1626 : * extents with refcount == 1, so we can record allocations with a refcount of
1627 : * 1. Blocks being used for CoW writeout cannot be shared, so there should be
1628 : * no conflict with shared block records. These mappings should be created
1629 : * when we allocate blocks to the CoW fork and deleted when they're removed
1630 : * from the CoW fork.
1631 : *
1632 : * Minor nit: records for in-progress CoW allocations and records for shared
1633 : * extents must never be merged, to preserve the property that (except for CoW
1634 : * allocations) there are no refcount btree entries with refcount == 1. The
1635 : * only time this could potentially happen is when unsharing a block that's
1636 : * adjacent to CoW allocations, so we must be careful to avoid this.
1637 : *
1638 : * At mount time we recover lost CoW allocations by searching the refcount
1639 : * btree for these refcount == 1 mappings. These represent CoW allocations
1640 : * that were in progress at the time the filesystem went down, so we can free
1641 : * them to get the space back.
1642 : *
1643 : * This mechanism is superior to creating EFIs for unmapped CoW extents for
1644 : * several reasons -- first, EFIs pin the tail of the log and would have to be
1645 : * periodically relogged to avoid filling up the log. Second, CoW completions
1646 : * will have to file an EFD and create new EFIs for whatever remains in the
1647 : * CoW fork; this partially takes care of (1) but extent-size reservations
1648 : * will have to periodically relog even if there's no writeout in progress.
1649 : * This can happen if the CoW extent size hint is set, which you really want.
1650 : * Third, EFIs cannot currently be automatically relogged into newer
1651 : * transactions to advance the log tail. Fourth, stuffing the log full of
1652 : * EFIs places an upper bound on the number of CoW allocations that can be
1653 : * held filesystem-wide at any given time. Recording them in the refcount
1654 : * btree doesn't require us to maintain any state in memory and doesn't pin
1655 : * the log.
1656 : */
1657 : /*
1658 : * Adjust the refcounts of CoW allocations. These allocations are "magic"
1659 : * in that they're not referenced anywhere else in the filesystem, so we
1660 : * stash them in the refcount btree with a refcount of 1 until either file
1661 : * remapping (or CoW cancellation) happens.
1662 : */
1663 : STATIC int
1664 17236555 : xfs_refcount_adjust_cow_extents(
1665 : struct xfs_btree_cur *cur,
1666 : xfs_agblock_t agbno,
1667 : xfs_extlen_t aglen,
1668 : enum xfs_refc_adjust_op adj)
1669 : {
1670 17236555 : struct xfs_refcount_irec ext, tmp;
1671 17236555 : int error;
1672 17236555 : int found_rec, found_tmp;
1673 :
1674 17236555 : if (aglen == 0)
1675 : return 0;
1676 :
1677 : /* Find any overlapping refcount records */
1678 15781228 : error = xfs_refcount_lookup_ge(cur, XFS_REFC_DOMAIN_COW, agbno,
1679 : &found_rec);
1680 15782373 : if (error)
1681 0 : goto out_error;
1682 15782373 : error = xfs_refcount_get_rec(cur, &ext, &found_rec);
1683 15782298 : if (error)
1684 0 : goto out_error;
1685 15782298 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec &&
1686 : ext.rc_domain != XFS_REFC_DOMAIN_COW)) {
1687 0 : error = -EFSCORRUPTED;
1688 0 : goto out_error;
1689 : }
1690 15782298 : if (!found_rec) {
1691 1037308 : ext.rc_startblock = cur->bc_mp->m_sb.sb_agblocks;
1692 1037308 : ext.rc_blockcount = 0;
1693 1037308 : ext.rc_refcount = 0;
1694 1037308 : ext.rc_domain = XFS_REFC_DOMAIN_COW;
1695 : }
1696 :
1697 15782298 : switch (adj) {
1698 1945375 : case XFS_REFCOUNT_ADJUST_COW_ALLOC:
1699 : /* Adding a CoW reservation, there should be nothing here. */
1700 1945375 : if (XFS_IS_CORRUPT(cur->bc_mp,
1701 : agbno + aglen > ext.rc_startblock)) {
1702 0 : error = -EFSCORRUPTED;
1703 0 : goto out_error;
1704 : }
1705 :
1706 1945375 : tmp.rc_startblock = agbno;
1707 1945375 : tmp.rc_blockcount = aglen;
1708 1945375 : tmp.rc_refcount = 1;
1709 1945375 : tmp.rc_domain = XFS_REFC_DOMAIN_COW;
1710 :
1711 1945375 : trace_xfs_refcount_modify_extent(cur->bc_mp,
1712 1945375 : cur->bc_ag.pag->pag_agno, &tmp);
1713 :
1714 1945289 : error = xfs_refcount_insert(cur, &tmp,
1715 : &found_tmp);
1716 1945132 : if (error)
1717 1 : goto out_error;
1718 1945131 : if (XFS_IS_CORRUPT(cur->bc_mp, found_tmp != 1)) {
1719 0 : error = -EFSCORRUPTED;
1720 0 : goto out_error;
1721 : }
1722 : break;
1723 13836923 : case XFS_REFCOUNT_ADJUST_COW_FREE:
1724 : /* Removing a CoW reservation, there should be one extent. */
1725 13836923 : if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_startblock != agbno)) {
1726 0 : error = -EFSCORRUPTED;
1727 0 : goto out_error;
1728 : }
1729 13836923 : if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_blockcount != aglen)) {
1730 0 : error = -EFSCORRUPTED;
1731 0 : goto out_error;
1732 : }
1733 13836923 : if (XFS_IS_CORRUPT(cur->bc_mp, ext.rc_refcount != 1)) {
1734 0 : error = -EFSCORRUPTED;
1735 0 : goto out_error;
1736 : }
1737 :
1738 13836923 : ext.rc_refcount = 0;
1739 13836923 : trace_xfs_refcount_modify_extent(cur->bc_mp,
1740 13836923 : cur->bc_ag.pag->pag_agno, &ext);
1741 13836297 : error = xfs_refcount_delete(cur, &found_rec);
1742 13837053 : if (error)
1743 0 : goto out_error;
1744 13837053 : if (XFS_IS_CORRUPT(cur->bc_mp, found_rec != 1)) {
1745 0 : error = -EFSCORRUPTED;
1746 0 : goto out_error;
1747 : }
1748 : break;
1749 0 : default:
1750 0 : ASSERT(0);
1751 : }
1752 :
1753 : return error;
1754 1 : out_error:
1755 2 : trace_xfs_refcount_modify_extent_error(cur->bc_mp,
1756 1 : cur->bc_ag.pag->pag_agno, error, _RET_IP_);
1757 1 : return error;
1758 : }
1759 :
1760 : /*
1761 : * Add or remove refcount btree entries for CoW reservations.
1762 : */
1763 : STATIC int
1764 17235320 : xfs_refcount_adjust_cow(
1765 : struct xfs_btree_cur *cur,
1766 : xfs_agblock_t agbno,
1767 : xfs_extlen_t aglen,
1768 : enum xfs_refc_adjust_op adj)
1769 : {
1770 17235320 : bool shape_changed;
1771 17235320 : int error;
1772 :
1773 : /*
1774 : * Ensure that no rcextents cross the boundary of the adjustment range.
1775 : */
1776 17235320 : error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW,
1777 : agbno, &shape_changed);
1778 17234098 : if (error)
1779 0 : goto out_error;
1780 :
1781 17234098 : error = xfs_refcount_split_extent(cur, XFS_REFC_DOMAIN_COW,
1782 : agbno + aglen, &shape_changed);
1783 17237151 : if (error)
1784 0 : goto out_error;
1785 :
1786 : /*
1787 : * Try to merge with the left or right extents of the range.
1788 : */
1789 17237151 : error = xfs_refcount_merge_extents(cur, XFS_REFC_DOMAIN_COW, &agbno,
1790 : &aglen, adj, &shape_changed);
1791 17236394 : if (error)
1792 0 : goto out_error;
1793 :
1794 : /* Now that we've taken care of the ends, adjust the middle extents */
1795 17236394 : error = xfs_refcount_adjust_cow_extents(cur, agbno, aglen, adj);
1796 17237201 : if (error)
1797 1 : goto out_error;
1798 :
1799 : return 0;
1800 :
1801 1 : out_error:
1802 2 : trace_xfs_refcount_adjust_cow_error(cur->bc_mp, cur->bc_ag.pag->pag_agno,
1803 1 : error, _RET_IP_);
1804 1 : return error;
1805 : }
1806 :
1807 : /*
1808 : * Record a CoW allocation in the refcount btree.
1809 : */
1810 : STATIC int
1811 3400453 : __xfs_refcount_cow_alloc(
1812 : struct xfs_btree_cur *rcur,
1813 : xfs_agblock_t agbno,
1814 : xfs_extlen_t aglen)
1815 : {
1816 3400453 : trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_ag.pag->pag_agno,
1817 : agbno, aglen);
1818 :
1819 : /* Add refcount btree reservation */
1820 3400302 : return xfs_refcount_adjust_cow(rcur, agbno, aglen,
1821 : XFS_REFCOUNT_ADJUST_COW_ALLOC);
1822 : }
1823 :
1824 : /*
1825 : * Remove a CoW allocation from the refcount btree.
1826 : */
1827 : STATIC int
1828 13835488 : __xfs_refcount_cow_free(
1829 : struct xfs_btree_cur *rcur,
1830 : xfs_agblock_t agbno,
1831 : xfs_extlen_t aglen)
1832 : {
1833 13835488 : trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_ag.pag->pag_agno,
1834 : agbno, aglen);
1835 :
1836 : /* Remove refcount btree reservation */
1837 13834475 : return xfs_refcount_adjust_cow(rcur, agbno, aglen,
1838 : XFS_REFCOUNT_ADJUST_COW_FREE);
1839 : }
1840 :
1841 : /* Record a CoW staging extent in the refcount btree. */
1842 : void
1843 3400431 : xfs_refcount_alloc_cow_extent(
1844 : struct xfs_trans *tp,
1845 : xfs_fsblock_t fsb,
1846 : xfs_extlen_t len)
1847 : {
1848 3400431 : struct xfs_mount *mp = tp->t_mountp;
1849 :
1850 3400431 : if (!xfs_has_reflink(mp))
1851 : return;
1852 :
1853 3400409 : __xfs_refcount_add(tp, XFS_REFCOUNT_ALLOC_COW, fsb, len);
1854 :
1855 : /* Add rmap entry */
1856 3400484 : xfs_rmap_alloc_extent(tp, XFS_FSB_TO_AGNO(mp, fsb),
1857 3400490 : XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
1858 : }
1859 :
1860 : /* Forget a CoW staging event in the refcount btree. */
1861 : void
1862 13835656 : xfs_refcount_free_cow_extent(
1863 : struct xfs_trans *tp,
1864 : xfs_fsblock_t fsb,
1865 : xfs_extlen_t len)
1866 : {
1867 13835656 : struct xfs_mount *mp = tp->t_mountp;
1868 :
1869 13835656 : if (!xfs_has_reflink(mp))
1870 : return;
1871 :
1872 : /* Remove rmap entry */
1873 13834302 : xfs_rmap_free_extent(tp, XFS_FSB_TO_AGNO(mp, fsb),
1874 13835449 : XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
1875 13835370 : __xfs_refcount_add(tp, XFS_REFCOUNT_FREE_COW, fsb, len);
1876 : }
1877 :
1878 : struct xfs_refcount_recovery {
1879 : struct list_head rr_list;
1880 : struct xfs_refcount_irec rr_rrec;
1881 : };
1882 :
1883 : /* Stuff an extent on the recovery list. */
1884 : STATIC int
1885 326030 : xfs_refcount_recover_extent(
1886 : struct xfs_btree_cur *cur,
1887 : const union xfs_btree_rec *rec,
1888 : void *priv)
1889 : {
1890 326030 : struct list_head *debris = priv;
1891 326030 : struct xfs_refcount_recovery *rr;
1892 :
1893 326030 : if (XFS_IS_CORRUPT(cur->bc_mp,
1894 : be32_to_cpu(rec->refc.rc_refcount) != 1))
1895 0 : return -EFSCORRUPTED;
1896 :
1897 326030 : rr = kmalloc(sizeof(struct xfs_refcount_recovery),
1898 : GFP_KERNEL | __GFP_NOFAIL);
1899 326030 : INIT_LIST_HEAD(&rr->rr_list);
1900 326030 : xfs_refcount_btrec_to_irec(rec, &rr->rr_rrec);
1901 :
1902 326030 : if (xfs_refcount_check_irec(cur, &rr->rr_rrec) != NULL ||
1903 326030 : XFS_IS_CORRUPT(cur->bc_mp,
1904 : rr->rr_rrec.rc_domain != XFS_REFC_DOMAIN_COW)) {
1905 0 : kfree(rr);
1906 0 : return -EFSCORRUPTED;
1907 : }
1908 :
1909 326030 : list_add_tail(&rr->rr_list, debris);
1910 326030 : return 0;
1911 : }
1912 :
1913 : /* Find and remove leftover CoW reservations. */
1914 : int
1915 52069 : xfs_refcount_recover_cow_leftovers(
1916 : struct xfs_mount *mp,
1917 : struct xfs_perag *pag)
1918 : {
1919 52069 : struct xfs_trans *tp;
1920 52069 : struct xfs_btree_cur *cur;
1921 52069 : struct xfs_buf *agbp;
1922 52069 : struct xfs_refcount_recovery *rr, *n;
1923 52069 : struct list_head debris;
1924 52069 : union xfs_btree_irec low = {
1925 : .rc.rc_domain = XFS_REFC_DOMAIN_COW,
1926 : };
1927 52069 : union xfs_btree_irec high = {
1928 : .rc.rc_domain = XFS_REFC_DOMAIN_COW,
1929 : .rc.rc_startblock = -1U,
1930 : };
1931 52069 : xfs_fsblock_t fsb;
1932 52069 : int error;
1933 :
1934 : /* reflink filesystems mustn't have AGs larger than 2^31-1 blocks */
1935 52069 : BUILD_BUG_ON(XFS_MAX_CRC_AG_BLOCKS >= XFS_REFC_COWFLAG);
1936 52069 : if (mp->m_sb.sb_agblocks > XFS_MAX_CRC_AG_BLOCKS)
1937 : return -EOPNOTSUPP;
1938 :
1939 52069 : INIT_LIST_HEAD(&debris);
1940 :
1941 : /*
1942 : * In this first part, we use an empty transaction to gather up
1943 : * all the leftover CoW extents so that we can subsequently
1944 : * delete them. The empty transaction is used to avoid
1945 : * a buffer lock deadlock if there happens to be a loop in the
1946 : * refcountbt because we're allowed to re-grab a buffer that is
1947 : * already attached to our transaction. When we're done
1948 : * recording the CoW debris we cancel the (empty) transaction
1949 : * and everything goes away cleanly.
1950 : */
1951 52069 : error = xfs_trans_alloc_empty(mp, &tp);
1952 52069 : if (error)
1953 : return error;
1954 :
1955 52069 : error = xfs_alloc_read_agf(pag, tp, 0, &agbp);
1956 52069 : if (error)
1957 10 : goto out_trans;
1958 52059 : cur = xfs_refcountbt_init_cursor(mp, tp, agbp, pag);
1959 :
1960 : /* Find all the leftover CoW staging extents. */
1961 52059 : error = xfs_btree_query_range(cur, &low, &high,
1962 : xfs_refcount_recover_extent, &debris);
1963 52059 : xfs_btree_del_cursor(cur, error);
1964 52059 : xfs_trans_brelse(tp, agbp);
1965 52059 : xfs_trans_cancel(tp);
1966 52059 : if (error)
1967 12 : goto out_free;
1968 :
1969 : /* Now iterate the list to free the leftovers */
1970 378077 : list_for_each_entry_safe(rr, n, &debris, rr_list) {
1971 : /* Set up transaction. */
1972 326030 : error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, 0, 0, 0, &tp);
1973 326030 : if (error)
1974 0 : goto out_free;
1975 :
1976 326030 : trace_xfs_refcount_recover_extent(mp, pag->pag_agno,
1977 : &rr->rr_rrec);
1978 :
1979 : /* Free the orphan record */
1980 326030 : fsb = XFS_AGB_TO_FSB(mp, pag->pag_agno,
1981 : rr->rr_rrec.rc_startblock);
1982 326030 : xfs_refcount_free_cow_extent(tp, fsb,
1983 : rr->rr_rrec.rc_blockcount);
1984 :
1985 : /* Free the block. */
1986 326030 : error = xfs_free_extent_later(tp, fsb,
1987 326030 : rr->rr_rrec.rc_blockcount, NULL,
1988 : XFS_AG_RESV_NONE);
1989 326030 : if (error)
1990 0 : goto out_trans;
1991 :
1992 326030 : error = xfs_trans_commit(tp);
1993 326030 : if (error)
1994 0 : goto out_free;
1995 :
1996 326030 : list_del(&rr->rr_list);
1997 326030 : kfree(rr);
1998 : }
1999 :
2000 : return error;
2001 10 : out_trans:
2002 10 : xfs_trans_cancel(tp);
2003 22 : out_free:
2004 : /* Free the leftover list */
2005 22 : list_for_each_entry_safe(rr, n, &debris, rr_list) {
2006 0 : list_del(&rr->rr_list);
2007 0 : kfree(rr);
2008 : }
2009 : return error;
2010 : }
2011 :
2012 : /*
2013 : * Scan part of the keyspace of the refcount records and tell us if the area
2014 : * has no records, is fully mapped by records, or is partially filled.
2015 : */
2016 : int
2017 1534745333 : xfs_refcount_has_records(
2018 : struct xfs_btree_cur *cur,
2019 : enum xfs_refc_domain domain,
2020 : xfs_agblock_t bno,
2021 : xfs_extlen_t len,
2022 : enum xbtree_recpacking *outcome)
2023 : {
2024 1534745333 : union xfs_btree_irec low;
2025 1534745333 : union xfs_btree_irec high;
2026 :
2027 1534745333 : memset(&low, 0, sizeof(low));
2028 1534745333 : low.rc.rc_startblock = bno;
2029 1534745333 : memset(&high, 0xFF, sizeof(high));
2030 1534745333 : high.rc.rc_startblock = bno + len - 1;
2031 1534745333 : low.rc.rc_domain = high.rc.rc_domain = domain;
2032 :
2033 1534745333 : return xfs_btree_has_records(cur, &low, &high, NULL, outcome);
2034 : }
2035 :
2036 : int __init
2037 50 : xfs_refcount_intent_init_cache(void)
2038 : {
2039 50 : xfs_refcount_intent_cache = kmem_cache_create("xfs_refc_intent",
2040 : sizeof(struct xfs_refcount_intent),
2041 : 0, 0, NULL);
2042 :
2043 50 : return xfs_refcount_intent_cache != NULL ? 0 : -ENOMEM;
2044 : }
2045 :
2046 : void
2047 49 : xfs_refcount_intent_destroy_cache(void)
2048 : {
2049 49 : kmem_cache_destroy(xfs_refcount_intent_cache);
2050 49 : xfs_refcount_intent_cache = NULL;
2051 49 : }
|