Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (C) 2010 Red Hat, Inc.
4 : * All Rights Reserved.
5 : */
6 : #include "xfs.h"
7 : #include "xfs_shared.h"
8 : #include "xfs_format.h"
9 : #include "xfs_log_format.h"
10 : #include "xfs_trans_resv.h"
11 : #include "xfs_mount.h"
12 : #include "xfs_btree.h"
13 : #include "xfs_alloc_btree.h"
14 : #include "xfs_alloc.h"
15 : #include "xfs_discard.h"
16 : #include "xfs_error.h"
17 : #include "xfs_extent_busy.h"
18 : #include "xfs_trace.h"
19 : #include "xfs_log.h"
20 : #include "xfs_ag.h"
21 : #include "xfs_health.h"
22 : #include "xfs_rtbitmap.h"
23 :
24 : /*
25 : * For trim functions that support it, cycle the metadata locks periodically
26 : * to prevent other parts of the filesystem from starving.
27 : */
28 : #define XFS_TRIM_RELAX_INTERVAL (HZ)
29 :
30 : /* Trim the free space in this AG by block number. */
31 : static inline int
32 55592 : xfs_trim_ag_bybno(
33 : struct xfs_perag *pag,
34 : struct xfs_buf **agbpp,
35 : xfs_daddr_t start,
36 : xfs_daddr_t end,
37 : xfs_daddr_t minlen,
38 : uint64_t *blocks_trimmed)
39 : {
40 55592 : struct xfs_mount *mp = pag->pag_mount;
41 55592 : struct block_device *bdev = xfs_buftarg_bdev(mp->m_ddev_targp);
42 55592 : struct xfs_btree_cur *cur;
43 55592 : struct xfs_agf *agf = (*agbpp)->b_addr;
44 55592 : xfs_daddr_t end_daddr;
45 55592 : xfs_agnumber_t agno = pag->pag_agno;
46 55592 : xfs_agblock_t start_agbno;
47 55592 : xfs_agblock_t end_agbno;
48 55592 : xfs_extlen_t minlen_fsb = XFS_BB_TO_FSB(mp, minlen);
49 55592 : unsigned long last_relax = jiffies;
50 55592 : int i;
51 55592 : int error;
52 :
53 55592 : start = max(start, XFS_AGB_TO_DADDR(mp, agno, 0));
54 55592 : start_agbno = xfs_daddr_to_agbno(mp, start);
55 :
56 55592 : end_daddr = XFS_AGB_TO_DADDR(mp, agno, be32_to_cpu(agf->agf_length));
57 55592 : end = min(end, end_daddr - 1);
58 55592 : end_agbno = xfs_daddr_to_agbno(mp, end);
59 :
60 55592 : cur = xfs_allocbt_init_cursor(mp, NULL, *agbpp, pag, XFS_BTNUM_BNO);
61 :
62 55592 : error = xfs_alloc_lookup_le(cur, start_agbno, 0, &i);
63 55592 : if (error)
64 0 : goto out_del_cursor;
65 :
66 : /*
67 : * If we didn't find anything at or below start_agbno, increment the
68 : * cursor to see if there's another record above it.
69 : */
70 55592 : if (!i) {
71 46128 : error = xfs_btree_increment(cur, 0, &i);
72 46128 : if (error)
73 0 : goto out_del_cursor;
74 : }
75 :
76 : /* Loop the entire range that was asked for. */
77 8621370 : while (i) {
78 8589294 : xfs_agblock_t fbno;
79 8589294 : xfs_extlen_t flen;
80 8589294 : xfs_daddr_t dbno;
81 8589294 : xfs_extlen_t dlen;
82 :
83 8589294 : error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
84 8589294 : if (error)
85 3 : goto out_del_cursor;
86 8589294 : if (XFS_IS_CORRUPT(mp, i != 1)) {
87 0 : xfs_btree_mark_sick(cur);
88 0 : error = -EFSCORRUPTED;
89 0 : goto out_del_cursor;
90 : }
91 :
92 : /* Skip extents entirely outside of the range. */
93 8589294 : if (fbno >= end_agbno)
94 : break;
95 8565781 : if (fbno + flen < start_agbno)
96 1243 : goto next_extent;
97 :
98 : /* Trim the extent returned to the range we want. */
99 8564538 : if (fbno < start_agbno) {
100 8190 : flen -= start_agbno - fbno;
101 8190 : fbno = start_agbno;
102 : }
103 8564538 : if (fbno + flen > end_agbno + 1)
104 7979 : flen = end_agbno - fbno + 1;
105 :
106 : /* Ignore too small. */
107 8564538 : if (flen < minlen_fsb) {
108 8178797 : trace_xfs_discard_toosmall(mp, agno, fbno, flen);
109 8178797 : goto next_extent;
110 : }
111 :
112 : /*
113 : * If any blocks in the range are still busy, skip the
114 : * discard and try again the next time.
115 : */
116 385741 : if (xfs_extent_busy_search(mp, pag, fbno, flen)) {
117 77 : trace_xfs_discard_busy(mp, agno, fbno, flen);
118 77 : goto next_extent;
119 : }
120 :
121 385664 : trace_xfs_discard_extent(mp, agno, fbno, flen);
122 :
123 385664 : dbno = XFS_AGB_TO_DADDR(mp, agno, fbno);
124 385664 : dlen = XFS_FSB_TO_BB(mp, flen);
125 385664 : error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS);
126 385664 : if (error)
127 0 : goto out_del_cursor;
128 385664 : *blocks_trimmed += flen;
129 :
130 385664 : if (time_after(jiffies, last_relax + XFS_TRIM_RELAX_INTERVAL)) {
131 : /*
132 : * Cycle the AGF lock since we know how to pick up
133 : * where we left off.
134 : */
135 208 : trace_xfs_discard_relax(mp, agno, fbno, flen);
136 208 : xfs_btree_del_cursor(cur, error);
137 208 : xfs_buf_relse(*agbpp);
138 :
139 208 : error = xfs_alloc_read_agf(pag, NULL, 0, agbpp);
140 208 : if (error)
141 0 : return error;
142 :
143 208 : cur = xfs_allocbt_init_cursor(mp, NULL, *agbpp, pag,
144 : XFS_BTNUM_BNO);
145 208 : error = xfs_alloc_lookup_ge(cur, fbno + flen, 0, &i);
146 208 : last_relax = jiffies;
147 : } else {
148 385456 : next_extent:
149 8565573 : error = xfs_btree_increment(cur, 0, &i);
150 : }
151 8565781 : if (error)
152 0 : goto out_del_cursor;
153 :
154 8565781 : if (fatal_signal_pending(current)) {
155 3 : error = -ERESTARTSYS;
156 3 : goto out_del_cursor;
157 : }
158 : }
159 :
160 32076 : out_del_cursor:
161 55592 : xfs_btree_del_cursor(cur, error);
162 55592 : return error;
163 : }
164 :
165 : /* Trim the free space in this AG by length. */
166 : static inline int
167 6035 : xfs_trim_ag_bylen(
168 : struct xfs_perag *pag,
169 : struct xfs_buf *agbp,
170 : xfs_daddr_t start,
171 : xfs_daddr_t end,
172 : xfs_daddr_t minlen,
173 : uint64_t *blocks_trimmed)
174 : {
175 6035 : struct xfs_mount *mp = pag->pag_mount;
176 6035 : struct block_device *bdev = xfs_buftarg_bdev(mp->m_ddev_targp);
177 6035 : struct xfs_btree_cur *cur;
178 6035 : struct xfs_agf *agf = agbp->b_addr;
179 6035 : int error;
180 6035 : int i;
181 :
182 6035 : cur = xfs_allocbt_init_cursor(mp, NULL, agbp, pag, XFS_BTNUM_CNT);
183 :
184 : /*
185 : * Look up the longest btree in the AGF and start with it.
186 : */
187 12070 : error = xfs_alloc_lookup_ge(cur, 0, be32_to_cpu(agf->agf_longest), &i);
188 6034 : if (error)
189 0 : goto out_del_cursor;
190 :
191 : /*
192 : * Loop until we are done with all extents that are large
193 : * enough to be worth discarding.
194 : */
195 244637 : while (i) {
196 243901 : xfs_agblock_t fbno;
197 243901 : xfs_extlen_t flen;
198 243901 : xfs_daddr_t dbno;
199 243901 : xfs_extlen_t dlen;
200 :
201 243901 : error = xfs_alloc_get_rec(cur, &fbno, &flen, &i);
202 243901 : if (error)
203 : break;
204 243901 : if (XFS_IS_CORRUPT(mp, i != 1)) {
205 0 : xfs_btree_mark_sick(cur);
206 0 : error = -EFSCORRUPTED;
207 0 : break;
208 : }
209 487802 : ASSERT(flen <= be32_to_cpu(agf->agf_longest));
210 :
211 : /*
212 : * use daddr format for all range/len calculations as that is
213 : * the format the range/len variables are supplied in by
214 : * userspace.
215 : */
216 243901 : dbno = XFS_AGB_TO_DADDR(mp, pag->pag_agno, fbno);
217 243901 : dlen = XFS_FSB_TO_BB(mp, flen);
218 :
219 : /*
220 : * Too small? Give up.
221 : */
222 243901 : if (dlen < minlen) {
223 5297 : trace_xfs_discard_toosmall(mp, pag->pag_agno, fbno,
224 : flen);
225 5297 : break;
226 : }
227 :
228 : /*
229 : * If any blocks in the range are still busy, skip the
230 : * discard and try again the next time.
231 : */
232 238604 : if (xfs_extent_busy_search(mp, pag, fbno, flen)) {
233 2233 : trace_xfs_discard_busy(mp, pag->pag_agno, fbno, flen);
234 2233 : goto next_extent;
235 : }
236 :
237 236372 : trace_xfs_discard_extent(mp, pag->pag_agno, fbno, flen);
238 236371 : error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS);
239 236372 : if (error)
240 : break;
241 236372 : *blocks_trimmed += flen;
242 :
243 238605 : next_extent:
244 238605 : error = xfs_btree_decrement(cur, 0, &i);
245 238605 : if (error)
246 : break;
247 :
248 238605 : if (fatal_signal_pending(current)) {
249 : error = -ERESTARTSYS;
250 : break;
251 : }
252 : }
253 :
254 736 : out_del_cursor:
255 6035 : xfs_btree_del_cursor(cur, error);
256 6035 : return error;
257 : }
258 :
259 : STATIC int
260 61627 : xfs_trim_ag_extents(
261 : struct xfs_perag *pag,
262 : xfs_daddr_t start,
263 : xfs_daddr_t end,
264 : xfs_daddr_t minlen,
265 : uint64_t *blocks_trimmed)
266 : {
267 61627 : struct xfs_mount *mp = pag->pag_mount;
268 61627 : struct xfs_buf *agbp;
269 61627 : struct xfs_agf *agf;
270 61627 : int error;
271 :
272 : /*
273 : * Force out the log. This means any transactions that might have freed
274 : * space before we take the AGF buffer lock are now on disk, and the
275 : * volatile disk cache is flushed.
276 : */
277 61627 : xfs_log_force(mp, XFS_LOG_SYNC);
278 :
279 61626 : error = xfs_alloc_read_agf(pag, NULL, 0, &agbp);
280 61627 : if (error)
281 : return error;
282 61627 : agf = agbp->b_addr;
283 :
284 61627 : if (start > XFS_AGB_TO_DADDR(mp, pag->pag_agno, 0) ||
285 25160 : end < XFS_AGB_TO_DADDR(mp, pag->pag_agno,
286 25160 : be32_to_cpu(agf->agf_length)) - 1) {
287 : /* Only trimming part of this AG */
288 55592 : error = xfs_trim_ag_bybno(pag, &agbp, start, end, minlen,
289 : blocks_trimmed);
290 : } else {
291 : /* Trim this entire AG */
292 6035 : error = xfs_trim_ag_bylen(pag, agbp, start, end, minlen,
293 : blocks_trimmed);
294 : }
295 :
296 61627 : xfs_buf_relse(agbp);
297 61627 : return error;
298 : }
299 :
300 : static int
301 39453 : xfs_trim_ddev_extents(
302 : struct xfs_mount *mp,
303 : xfs_daddr_t start,
304 : xfs_daddr_t end,
305 : xfs_daddr_t minlen,
306 : uint64_t *blocks_trimmed)
307 : {
308 39453 : struct xfs_perag *pag;
309 39453 : xfs_agnumber_t agno;
310 39453 : int error, last_error = 0;
311 :
312 39453 : if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1)
313 8006 : end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks) - 1;
314 :
315 39453 : agno = xfs_daddr_to_agno(mp, start);
316 101075 : for_each_perag_range(mp, agno, xfs_daddr_to_agno(mp, end), pag) {
317 61627 : error = xfs_trim_ag_extents(pag, start, end, minlen,
318 : blocks_trimmed);
319 61627 : if (error) {
320 5 : last_error = error;
321 5 : if (error == -ERESTARTSYS) {
322 5 : xfs_perag_rele(pag);
323 5 : break;
324 : }
325 : }
326 : }
327 :
328 39453 : return last_error;
329 : }
330 :
331 : #ifdef CONFIG_XFS_RT
332 : struct xfs_trim_rtdev {
333 : uint64_t *blocks_trimmed;
334 : xfs_rtblock_t minlen_fsb;
335 : unsigned long last_relax;
336 : };
337 :
338 : static int
339 394767 : xfs_trim_rtdev_extent(
340 : struct xfs_mount *mp,
341 : struct xfs_trans *tp,
342 : const struct xfs_rtalloc_rec *rec,
343 : void *priv)
344 : {
345 394767 : struct block_device *bdev = xfs_buftarg_bdev(mp->m_rtdev_targp);
346 394767 : struct xfs_trim_rtdev *tr = priv;
347 394767 : xfs_rtblock_t rbno, rlen;
348 394767 : xfs_daddr_t dbno, dlen;
349 394767 : int error;
350 :
351 394767 : if (fatal_signal_pending(current))
352 : return -ERESTARTSYS;
353 :
354 394767 : rbno = xfs_rtx_to_rtb(mp, rec->ar_startext);
355 394767 : rlen = xfs_rtx_to_rtb(mp, rec->ar_extcount);
356 :
357 : /* Ignore too small. */
358 394767 : if (rlen < tr->minlen_fsb) {
359 373353 : trace_xfs_discard_rttoosmall(mp, rbno, rlen);
360 373353 : goto out;
361 : }
362 :
363 21414 : trace_xfs_discard_rtextent(mp, rbno, rlen);
364 :
365 21414 : dbno = XFS_FSB_TO_BB(mp, rbno);
366 21414 : dlen = XFS_FSB_TO_BB(mp, rlen);
367 :
368 21414 : error = blkdev_issue_discard(bdev, dbno, dlen, GFP_NOFS);
369 21414 : if (error)
370 : return error;
371 :
372 21414 : *tr->blocks_trimmed += rlen;
373 :
374 394767 : out:
375 394767 : if (time_after(jiffies, tr->last_relax + XFS_TRIM_RELAX_INTERVAL)) {
376 : /*
377 : * Cycle the rtbitmap lock since we know how to pick up
378 : * where we left off.
379 : */
380 73 : trace_xfs_discard_rtrelax(mp, rbno, rlen);
381 73 : xfs_rtbitmap_unlock_shared(mp, XFS_RBMLOCK_BITMAP);
382 73 : xfs_rtbitmap_lock_shared(mp, XFS_RBMLOCK_BITMAP);
383 73 : tr->last_relax = jiffies;
384 : }
385 :
386 : return 0;
387 : }
388 :
389 : static int
390 5086 : xfs_trim_rtdev_extents(
391 : struct xfs_mount *mp,
392 : xfs_daddr_t start,
393 : xfs_daddr_t end,
394 : xfs_daddr_t minlen,
395 : uint64_t *blocks_trimmed)
396 : {
397 5086 : struct xfs_rtalloc_rec low = { }, high = { };
398 5086 : struct xfs_trim_rtdev tr = {
399 : .blocks_trimmed = blocks_trimmed,
400 5086 : .minlen_fsb = XFS_BB_TO_FSB(mp, minlen),
401 : .last_relax = jiffies,
402 : };
403 5086 : xfs_daddr_t rtdev_daddr;
404 5086 : xfs_extlen_t mod;
405 5086 : int error;
406 :
407 : /* Shift the start and end downwards to match the rt device. */
408 5086 : rtdev_daddr = XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks);
409 5086 : if (start > rtdev_daddr)
410 1893 : start -= rtdev_daddr;
411 : else
412 : start = 0;
413 :
414 5086 : if (end <= rtdev_daddr)
415 : return 0;
416 3172 : end -= rtdev_daddr;
417 :
418 3172 : if (end > XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks) - 1)
419 3 : end = XFS_FSB_TO_BB(mp, mp->m_sb.sb_rblocks) - 1;
420 :
421 : /* Convert the rt blocks to rt extents */
422 3172 : low.ar_startext = xfs_rtb_to_rtx(mp, XFS_BB_TO_FSB(mp, start), &mod);
423 3172 : if (mod)
424 0 : low.ar_startext++;
425 3172 : high.ar_startext = xfs_rtb_to_rtx(mp, XFS_BB_TO_FSBT(mp, end), &mod);
426 :
427 : /*
428 : * Walk the free ranges between low and high. The query_range function
429 : * trims the extents returned.
430 : */
431 3172 : xfs_rtbitmap_lock_shared(mp, XFS_RBMLOCK_BITMAP);
432 3172 : error = xfs_rtalloc_query_range(mp, NULL, &low, &high,
433 : xfs_trim_rtdev_extent, &tr);
434 3172 : xfs_rtbitmap_unlock_shared(mp, XFS_RBMLOCK_BITMAP);
435 3172 : return error;
436 : }
437 : #else
438 : # define xfs_trim_rtdev_extents(m,s,e,n,b) (-EOPNOTSUPP)
439 : #endif /* CONFIG_XFS_RT */
440 :
441 : /*
442 : * trim a range of the filesystem.
443 : *
444 : * Note: the parameters passed from userspace are byte ranges into the
445 : * filesystem which does not match to the format we use for filesystem block
446 : * addressing. FSB addressing is sparse (AGNO|AGBNO), while the incoming format
447 : * is a linear address range. Hence we need to use DADDR based conversions and
448 : * comparisons for determining the correct offset and regions to trim.
449 : *
450 : * The realtime device is mapped into the FITRIM "address space" immediately
451 : * after the data device.
452 : */
453 : int
454 39469 : xfs_ioc_trim(
455 : struct xfs_mount *mp,
456 : struct fstrim_range __user *urange)
457 : {
458 39469 : struct block_device *bdev = xfs_buftarg_bdev(mp->m_ddev_targp);
459 39469 : struct block_device *rt_bdev = NULL;
460 39469 : unsigned int granularity = bdev_discard_granularity(bdev);
461 39469 : struct fstrim_range range;
462 39469 : xfs_rfsblock_t max_blocks;
463 39469 : xfs_daddr_t start, end, minlen;
464 39469 : uint64_t blocks_trimmed = 0;
465 39469 : int error, last_error = 0;
466 :
467 39469 : if (!capable(CAP_SYS_ADMIN))
468 : return -EPERM;
469 39469 : if (!bdev_max_discard_sectors(bdev))
470 : return -EOPNOTSUPP;
471 :
472 39469 : if (mp->m_rtdev_targp) {
473 5086 : rt_bdev = xfs_buftarg_bdev(mp->m_rtdev_targp);
474 5086 : if (!bdev_max_discard_sectors(rt_bdev))
475 : return -EOPNOTSUPP;
476 5086 : granularity = max(granularity,
477 : bdev_discard_granularity(rt_bdev));
478 : }
479 :
480 : /*
481 : * We haven't recovered the log, so we cannot use our bnobt-guided
482 : * storage zapping commands.
483 : */
484 39469 : if (xfs_has_norecovery(mp))
485 : return -EROFS;
486 :
487 39467 : if (copy_from_user(&range, urange, sizeof(range)))
488 : return -EFAULT;
489 :
490 39467 : range.minlen = max_t(u64, granularity, range.minlen);
491 39467 : minlen = BTOBB(range.minlen);
492 : /*
493 : * Truncating down the len isn't actually quite correct, but using
494 : * BBTOB would mean we trivially get overflows for values
495 : * of ULLONG_MAX or slightly lower. And ULLONG_MAX is the default
496 : * used by the fstrim application. In the end it really doesn't
497 : * matter as trimming blocks is an advisory interface.
498 : */
499 39467 : max_blocks = mp->m_sb.sb_dblocks + mp->m_sb.sb_rblocks;
500 39467 : if (range.start >= XFS_FSB_TO_B(mp, max_blocks) ||
501 39457 : range.minlen > XFS_FSB_TO_B(mp, mp->m_ag_max_usable) ||
502 39457 : range.len < mp->m_sb.sb_blocksize)
503 : return -EINVAL;
504 :
505 39453 : start = BTOBB(range.start);
506 39453 : end = start + BTOBBT(range.len) - 1;
507 :
508 39453 : error = xfs_trim_ddev_extents(mp, start, end, minlen, &blocks_trimmed);
509 39453 : if (error == -ERESTARTSYS)
510 : return error;
511 39448 : if (error)
512 0 : last_error = error;
513 :
514 39448 : if (rt_bdev) {
515 5086 : error = xfs_trim_rtdev_extents(mp, start, end, minlen,
516 : &blocks_trimmed);
517 5086 : if (error == -ERESTARTSYS)
518 : return error;
519 5086 : if (error)
520 0 : last_error = error;
521 : }
522 :
523 39448 : if (last_error)
524 : return last_error;
525 :
526 39448 : range.len = XFS_FSB_TO_B(mp, blocks_trimmed);
527 39448 : if (copy_to_user(urange, &range, sizeof(range)))
528 0 : return -EFAULT;
529 : return 0;
530 : }
|