Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
4 : * All Rights Reserved.
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_log_format.h"
11 : #include "xfs_trans_resv.h"
12 : #include "xfs_bit.h"
13 : #include "xfs_mount.h"
14 : #include "xfs_inode.h"
15 : #include "xfs_btree.h"
16 : #include "xfs_ialloc.h"
17 : #include "xfs_ialloc_btree.h"
18 : #include "xfs_alloc.h"
19 : #include "xfs_errortag.h"
20 : #include "xfs_error.h"
21 : #include "xfs_bmap.h"
22 : #include "xfs_trans.h"
23 : #include "xfs_buf_item.h"
24 : #include "xfs_icreate_item.h"
25 : #include "xfs_icache.h"
26 : #include "xfs_trace.h"
27 : #include "xfs_log.h"
28 : #include "xfs_rmap.h"
29 : #include "xfs_ag.h"
30 : #include "xfs_health.h"
31 :
32 : /*
33 : * Lookup a record by ino in the btree given by cur.
34 : */
35 : int /* error */
36 3842540066 : xfs_inobt_lookup(
37 : struct xfs_btree_cur *cur, /* btree cursor */
38 : xfs_agino_t ino, /* starting inode of chunk */
39 : xfs_lookup_t dir, /* <=, >=, == */
40 : int *stat) /* success/failure */
41 : {
42 7469953814 : cur->bc_rec.i.ir_startino = ino;
43 7469953814 : cur->bc_rec.i.ir_holemask = 0;
44 7469953814 : cur->bc_rec.i.ir_count = 0;
45 7469953814 : cur->bc_rec.i.ir_freecount = 0;
46 7469953814 : cur->bc_rec.i.ir_free = 0;
47 3847511598 : return xfs_btree_lookup(cur, dir, stat);
48 : }
49 :
50 : /*
51 : * Update the record referred to by cur to the value given.
52 : * This either works (return 0) or gets an EFSCORRUPTED error.
53 : */
54 : STATIC int /* error */
55 205405223 : xfs_inobt_update(
56 : struct xfs_btree_cur *cur, /* btree cursor */
57 : xfs_inobt_rec_incore_t *irec) /* btree record */
58 : {
59 205405223 : union xfs_btree_rec rec;
60 :
61 205405223 : rec.inobt.ir_startino = cpu_to_be32(irec->ir_startino);
62 205405223 : if (xfs_has_sparseinodes(cur->bc_mp)) {
63 205404961 : rec.inobt.ir_u.sp.ir_holemask = cpu_to_be16(irec->ir_holemask);
64 205404961 : rec.inobt.ir_u.sp.ir_count = irec->ir_count;
65 205404961 : rec.inobt.ir_u.sp.ir_freecount = irec->ir_freecount;
66 : } else {
67 : /* ir_holemask/ir_count not supported on-disk */
68 262 : rec.inobt.ir_u.f.ir_freecount = cpu_to_be32(irec->ir_freecount);
69 : }
70 205405223 : rec.inobt.ir_free = cpu_to_be64(irec->ir_free);
71 205405223 : return xfs_btree_update(cur, &rec);
72 : }
73 :
74 : /* Convert on-disk btree record to incore inobt record. */
75 : void
76 19739393796 : xfs_inobt_btrec_to_irec(
77 : struct xfs_mount *mp,
78 : const union xfs_btree_rec *rec,
79 : struct xfs_inobt_rec_incore *irec)
80 : {
81 19739393796 : irec->ir_startino = be32_to_cpu(rec->inobt.ir_startino);
82 19739393796 : if (xfs_has_sparseinodes(mp)) {
83 19739392658 : irec->ir_holemask = be16_to_cpu(rec->inobt.ir_u.sp.ir_holemask);
84 19739392658 : irec->ir_count = rec->inobt.ir_u.sp.ir_count;
85 19739392658 : irec->ir_freecount = rec->inobt.ir_u.sp.ir_freecount;
86 : } else {
87 : /*
88 : * ir_holemask/ir_count not supported on-disk. Fill in hardcoded
89 : * values for full inode chunks.
90 : */
91 1138 : irec->ir_holemask = XFS_INOBT_HOLEMASK_FULL;
92 1138 : irec->ir_count = XFS_INODES_PER_CHUNK;
93 1138 : irec->ir_freecount =
94 1138 : be32_to_cpu(rec->inobt.ir_u.f.ir_freecount);
95 : }
96 19739393796 : irec->ir_free = be64_to_cpu(rec->inobt.ir_free);
97 19739393796 : }
98 :
99 : /* Compute the freecount of an incore inode record. */
100 : uint8_t
101 19813482081 : xfs_inobt_rec_freecount(
102 : const struct xfs_inobt_rec_incore *irec)
103 : {
104 19813482081 : uint64_t realfree;
105 :
106 19813482081 : if (!xfs_inobt_issparse(irec->ir_holemask))
107 12347960436 : realfree = irec->ir_free;
108 : else
109 7465521645 : realfree = irec->ir_free & xfs_inobt_irec_to_allocmask(irec);
110 39633831536 : return hweight64(realfree);
111 : }
112 :
113 : inline xfs_failaddr_t
114 19748738213 : xfs_inobt_check_perag_irec(
115 : struct xfs_perag *pag,
116 : const struct xfs_inobt_rec_incore *irec)
117 : {
118 : /* Record has to be properly aligned within the AG. */
119 19748738213 : if (!xfs_verify_agino(pag, irec->ir_startino))
120 0 : return __this_address;
121 19748738213 : if (!xfs_verify_agino(pag,
122 : irec->ir_startino + XFS_INODES_PER_CHUNK - 1))
123 0 : return __this_address;
124 19748738213 : if (irec->ir_count < XFS_INODES_PER_HOLEMASK_BIT ||
125 : irec->ir_count > XFS_INODES_PER_CHUNK)
126 0 : return __this_address;
127 19748738213 : if (irec->ir_freecount > XFS_INODES_PER_CHUNK)
128 0 : return __this_address;
129 :
130 19748738213 : if (xfs_inobt_rec_freecount(irec) != irec->ir_freecount)
131 0 : return __this_address;
132 :
133 : return NULL;
134 : }
135 :
136 : /* Simple checks for inode records. */
137 : xfs_failaddr_t
138 63814760 : xfs_inobt_check_irec(
139 : struct xfs_btree_cur *cur,
140 : const struct xfs_inobt_rec_incore *irec)
141 : {
142 63814760 : return xfs_inobt_check_perag_irec(cur->bc_ag.pag, irec);
143 : }
144 :
145 : static inline int
146 0 : xfs_inobt_complain_bad_rec(
147 : struct xfs_btree_cur *cur,
148 : xfs_failaddr_t fa,
149 : const struct xfs_inobt_rec_incore *irec)
150 : {
151 0 : struct xfs_mount *mp = cur->bc_mp;
152 :
153 0 : xfs_warn(mp,
154 : "%s Inode BTree record corruption in AG %d detected at %pS!",
155 : cur->bc_btnum == XFS_BTNUM_INO ? "Used" : "Free",
156 : cur->bc_ag.pag->pag_agno, fa);
157 0 : xfs_warn(mp,
158 : "start inode 0x%x, count 0x%x, free 0x%x freemask 0x%llx, holemask 0x%x",
159 : irec->ir_startino, irec->ir_count, irec->ir_freecount,
160 : irec->ir_free, irec->ir_holemask);
161 0 : xfs_btree_mark_sick(cur);
162 0 : return -EFSCORRUPTED;
163 : }
164 :
165 : /*
166 : * Get the data from the pointed-to record.
167 : */
168 : int
169 19535494422 : xfs_inobt_get_rec(
170 : struct xfs_btree_cur *cur,
171 : struct xfs_inobt_rec_incore *irec,
172 : int *stat)
173 : {
174 19535494422 : struct xfs_mount *mp = cur->bc_mp;
175 19535494422 : union xfs_btree_rec *rec;
176 19535494422 : xfs_failaddr_t fa;
177 19535494422 : int error;
178 :
179 19535494422 : error = xfs_btree_get_rec(cur, &rec, stat);
180 19506239266 : if (error || *stat == 0)
181 : return error;
182 :
183 19515729726 : xfs_inobt_btrec_to_irec(mp, rec, irec);
184 19523673023 : fa = xfs_inobt_check_irec(cur, irec);
185 19554691012 : if (fa)
186 0 : return xfs_inobt_complain_bad_rec(cur, fa, irec);
187 :
188 : return 0;
189 : }
190 :
191 : /*
192 : * Insert a single inobt record. Cursor must already point to desired location.
193 : */
194 : int
195 0 : xfs_inobt_insert_rec(
196 : struct xfs_btree_cur *cur,
197 : uint16_t holemask,
198 : uint8_t count,
199 : int32_t freecount,
200 : xfs_inofree_t free,
201 : int *stat)
202 : {
203 15044631 : cur->bc_rec.i.ir_holemask = holemask;
204 15044631 : cur->bc_rec.i.ir_count = count;
205 15044631 : cur->bc_rec.i.ir_freecount = freecount;
206 15044631 : cur->bc_rec.i.ir_free = free;
207 0 : return xfs_btree_insert(cur, stat);
208 : }
209 :
210 : /*
211 : * Insert records describing a newly allocated inode chunk into the inobt.
212 : */
213 : STATIC int
214 549226 : xfs_inobt_insert(
215 : struct xfs_perag *pag,
216 : struct xfs_trans *tp,
217 : struct xfs_buf *agbp,
218 : xfs_agino_t newino,
219 : xfs_agino_t newlen,
220 : xfs_btnum_t btnum)
221 : {
222 549226 : struct xfs_btree_cur *cur;
223 549226 : xfs_agino_t thisino;
224 549226 : int i;
225 549226 : int error;
226 :
227 549226 : cur = xfs_inobt_init_cursor(pag, tp, agbp, btnum);
228 :
229 549226 : for (thisino = newino;
230 1098505 : thisino < newino + newlen;
231 549279 : thisino += XFS_INODES_PER_CHUNK) {
232 549279 : error = xfs_inobt_lookup(cur, thisino, XFS_LOOKUP_EQ, &i);
233 549281 : if (error) {
234 1 : xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
235 1 : return error;
236 : }
237 549280 : ASSERT(i == 0);
238 :
239 549280 : error = xfs_inobt_insert_rec(cur, XFS_INOBT_HOLEMASK_FULL,
240 : XFS_INODES_PER_CHUNK,
241 : XFS_INODES_PER_CHUNK,
242 : XFS_INOBT_ALL_FREE, &i);
243 549279 : if (error) {
244 0 : xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
245 0 : return error;
246 : }
247 549279 : ASSERT(i == 1);
248 : }
249 :
250 549226 : xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
251 :
252 549226 : return 0;
253 : }
254 :
255 : /*
256 : * Verify that the number of free inodes in the AGI is correct.
257 : */
258 : #ifdef DEBUG
259 : static int
260 420201629 : xfs_check_agi_freecount(
261 : struct xfs_btree_cur *cur)
262 : {
263 420201629 : if (cur->bc_nlevels == 1) {
264 364299419 : xfs_inobt_rec_incore_t rec;
265 364299419 : int freecount = 0;
266 364299419 : int error;
267 364299419 : int i;
268 :
269 364299419 : error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
270 364338394 : if (error)
271 302 : return error;
272 :
273 12376474771 : do {
274 12376474771 : error = xfs_inobt_get_rec(cur, &rec, &i);
275 12369361191 : if (error)
276 0 : return error;
277 :
278 12369361191 : if (i) {
279 12375428799 : freecount += rec.ir_freecount;
280 12375428799 : error = xfs_btree_increment(cur, 0, &i);
281 12382569735 : if (error)
282 0 : return error;
283 : }
284 12376502127 : } while (i == 1);
285 :
286 728730896 : if (!xfs_is_shutdown(cur->bc_mp))
287 364380580 : ASSERT(freecount == cur->bc_ag.pag->pagi_freecount);
288 : }
289 : return 0;
290 : }
291 : #else
292 : #define xfs_check_agi_freecount(cur) 0
293 : #endif
294 :
295 : /*
296 : * Initialise a new set of inodes. When called without a transaction context
297 : * (e.g. from recovery) we initiate a delayed write of the inode buffers rather
298 : * than logging them (which in a transaction context puts them into the AIL
299 : * for writeback rather than the xfsbufd queue).
300 : */
301 : int
302 556741 : xfs_ialloc_inode_init(
303 : struct xfs_mount *mp,
304 : struct xfs_trans *tp,
305 : struct list_head *buffer_list,
306 : int icount,
307 : xfs_agnumber_t agno,
308 : xfs_agblock_t agbno,
309 : xfs_agblock_t length,
310 : unsigned int gen)
311 : {
312 556741 : struct xfs_buf *fbuf;
313 556741 : struct xfs_dinode *free;
314 556741 : int nbufs;
315 556741 : int version;
316 556741 : int i, j;
317 556741 : xfs_daddr_t d;
318 556741 : xfs_ino_t ino = 0;
319 556741 : int error;
320 :
321 : /*
322 : * Loop over the new block(s), filling in the inodes. For small block
323 : * sizes, manipulate the inodes in buffers which are multiples of the
324 : * blocks size.
325 : */
326 556741 : nbufs = length / M_IGEO(mp)->blocks_per_cluster;
327 :
328 : /*
329 : * Figure out what version number to use in the inodes we create. If
330 : * the superblock version has caught up to the one that supports the new
331 : * inode format, then use the new inode version. Otherwise use the old
332 : * version so that old kernels will continue to be able to use the file
333 : * system.
334 : *
335 : * For v3 inodes, we also need to write the inode number into the inode,
336 : * so calculate the first inode number of the chunk here as
337 : * XFS_AGB_TO_AGINO() only works within a filesystem block, not
338 : * across multiple filesystem blocks (such as a cluster) and so cannot
339 : * be used in the cluster buffer loop below.
340 : *
341 : * Further, because we are writing the inode directly into the buffer
342 : * and calculating a CRC on the entire inode, we have ot log the entire
343 : * inode so that the entire range the CRC covers is present in the log.
344 : * That means for v3 inode we log the entire buffer rather than just the
345 : * inode cores.
346 : */
347 556741 : if (xfs_has_v3inodes(mp)) {
348 556675 : version = 3;
349 556675 : ino = XFS_AGINO_TO_INO(mp, agno, XFS_AGB_TO_AGINO(mp, agbno));
350 :
351 : /*
352 : * log the initialisation that is about to take place as an
353 : * logical operation. This means the transaction does not
354 : * need to log the physical changes to the inode buffers as log
355 : * recovery will know what initialisation is actually needed.
356 : * Hence we only need to log the buffers as "ordered" buffers so
357 : * they track in the AIL as if they were physically logged.
358 : */
359 556675 : if (tp)
360 549302 : xfs_icreate_log(tp, agno, agbno, icount,
361 549302 : mp->m_sb.sb_inodesize, length, gen);
362 : } else
363 : version = 2;
364 :
365 1391850 : for (j = 0; j < nbufs; j++) {
366 : /*
367 : * Get the block.
368 : */
369 835119 : d = XFS_AGB_TO_DADDR(mp, agno, agbno +
370 : (j * M_IGEO(mp)->blocks_per_cluster));
371 835119 : error = xfs_trans_get_buf(tp, mp->m_ddev_targp, d,
372 835119 : mp->m_bsize * M_IGEO(mp)->blocks_per_cluster,
373 : XBF_UNMAPPED, &fbuf);
374 835076 : if (error)
375 0 : return error;
376 :
377 : /* Initialize the inode buffers and log them appropriately. */
378 835076 : fbuf->b_ops = &xfs_inode_buf_ops;
379 835076 : xfs_buf_zero(fbuf, 0, BBTOB(fbuf->b_length));
380 28391764 : for (i = 0; i < M_IGEO(mp)->inodes_per_cluster; i++) {
381 26721605 : int ioffset = i << mp->m_sb.sb_inodelog;
382 :
383 26721605 : free = xfs_make_iptr(mp, fbuf, i);
384 26721135 : free->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
385 26721135 : free->di_version = version;
386 26721135 : free->di_gen = cpu_to_be32(gen);
387 26721135 : free->di_next_unlinked = cpu_to_be32(NULLAGINO);
388 :
389 26721135 : if (version == 3) {
390 26721007 : free->di_ino = cpu_to_be64(ino);
391 26721007 : ino++;
392 26721007 : uuid_copy(&free->di_uuid,
393 26721007 : &mp->m_sb.sb_meta_uuid);
394 26720998 : xfs_dinode_calc_crc(mp, free);
395 128 : } else if (tp) {
396 : /* just log the inode core */
397 128 : xfs_trans_log_buf(tp, fbuf, ioffset,
398 256 : ioffset + XFS_DINODE_SIZE(mp) - 1);
399 : }
400 : }
401 :
402 835083 : if (tp) {
403 : /*
404 : * Mark the buffer as an inode allocation buffer so it
405 : * sticks in AIL at the point of this allocation
406 : * transaction. This ensures the they are on disk before
407 : * the tail of the log can be moved past this
408 : * transaction (i.e. by preventing relogging from moving
409 : * it forward in the log).
410 : */
411 823948 : xfs_trans_inode_alloc_buf(tp, fbuf);
412 823909 : if (version == 3) {
413 : /*
414 : * Mark the buffer as ordered so that they are
415 : * not physically logged in the transaction but
416 : * still tracked in the AIL as part of the
417 : * transaction and pin the log appropriately.
418 : */
419 823890 : xfs_trans_ordered_buf(tp, fbuf);
420 : }
421 : } else {
422 11135 : fbuf->b_flags |= XBF_DONE;
423 11135 : xfs_buf_delwri_queue(fbuf, buffer_list);
424 11135 : xfs_buf_relse(fbuf);
425 : }
426 : }
427 : return 0;
428 : }
429 :
430 : /*
431 : * Align startino and allocmask for a recently allocated sparse chunk such that
432 : * they are fit for insertion (or merge) into the on-disk inode btrees.
433 : *
434 : * Background:
435 : *
436 : * When enabled, sparse inode support increases the inode alignment from cluster
437 : * size to inode chunk size. This means that the minimum range between two
438 : * non-adjacent inode records in the inobt is large enough for a full inode
439 : * record. This allows for cluster sized, cluster aligned block allocation
440 : * without need to worry about whether the resulting inode record overlaps with
441 : * another record in the tree. Without this basic rule, we would have to deal
442 : * with the consequences of overlap by potentially undoing recent allocations in
443 : * the inode allocation codepath.
444 : *
445 : * Because of this alignment rule (which is enforced on mount), there are two
446 : * inobt possibilities for newly allocated sparse chunks. One is that the
447 : * aligned inode record for the chunk covers a range of inodes not already
448 : * covered in the inobt (i.e., it is safe to insert a new sparse record). The
449 : * other is that a record already exists at the aligned startino that considers
450 : * the newly allocated range as sparse. In the latter case, record content is
451 : * merged in hope that sparse inode chunks fill to full chunks over time.
452 : */
453 : STATIC void
454 274744 : xfs_align_sparse_ino(
455 : struct xfs_mount *mp,
456 : xfs_agino_t *startino,
457 : uint16_t *allocmask)
458 : {
459 274744 : xfs_agblock_t agbno;
460 274744 : xfs_agblock_t mod;
461 274744 : int offset;
462 :
463 274744 : agbno = XFS_AGINO_TO_AGBNO(mp, *startino);
464 274744 : mod = agbno % mp->m_sb.sb_inoalignmt;
465 274744 : if (!mod)
466 : return;
467 :
468 : /* calculate the inode offset and align startino */
469 161334 : offset = XFS_AGB_TO_AGINO(mp, mod);
470 161334 : *startino -= offset;
471 :
472 : /*
473 : * Since startino has been aligned down, left shift allocmask such that
474 : * it continues to represent the same physical inodes relative to the
475 : * new startino.
476 : */
477 161334 : *allocmask <<= offset / XFS_INODES_PER_HOLEMASK_BIT;
478 : }
479 :
480 : /*
481 : * Determine whether the source inode record can merge into the target. Both
482 : * records must be sparse, the inode ranges must match and there must be no
483 : * allocation overlap between the records.
484 : */
485 : STATIC bool
486 27314 : __xfs_inobt_can_merge(
487 : struct xfs_inobt_rec_incore *trec, /* tgt record */
488 : struct xfs_inobt_rec_incore *srec) /* src record */
489 : {
490 27314 : uint64_t talloc;
491 27314 : uint64_t salloc;
492 :
493 : /* records must cover the same inode range */
494 27314 : if (trec->ir_startino != srec->ir_startino)
495 : return false;
496 :
497 : /* both records must be sparse */
498 27314 : if (!xfs_inobt_issparse(trec->ir_holemask) ||
499 27314 : !xfs_inobt_issparse(srec->ir_holemask))
500 : return false;
501 :
502 : /* both records must track some inodes */
503 27314 : if (!trec->ir_count || !srec->ir_count)
504 : return false;
505 :
506 : /* can't exceed capacity of a full record */
507 27314 : if (trec->ir_count + srec->ir_count > XFS_INODES_PER_CHUNK)
508 : return false;
509 :
510 : /* verify there is no allocation overlap */
511 27314 : talloc = xfs_inobt_irec_to_allocmask(trec);
512 27314 : salloc = xfs_inobt_irec_to_allocmask(srec);
513 27314 : if (talloc & salloc)
514 0 : return false;
515 :
516 : return true;
517 : }
518 :
519 : /*
520 : * Merge the source inode record into the target. The caller must call
521 : * __xfs_inobt_can_merge() to ensure the merge is valid.
522 : */
523 : STATIC void
524 27314 : __xfs_inobt_rec_merge(
525 : struct xfs_inobt_rec_incore *trec, /* target */
526 : struct xfs_inobt_rec_incore *srec) /* src */
527 : {
528 27314 : ASSERT(trec->ir_startino == srec->ir_startino);
529 :
530 : /* combine the counts */
531 27314 : trec->ir_count += srec->ir_count;
532 27314 : trec->ir_freecount += srec->ir_freecount;
533 :
534 : /*
535 : * Merge the holemask and free mask. For both fields, 0 bits refer to
536 : * allocated inodes. We combine the allocated ranges with bitwise AND.
537 : */
538 27314 : trec->ir_holemask &= srec->ir_holemask;
539 27314 : trec->ir_free &= srec->ir_free;
540 27314 : }
541 :
542 : /*
543 : * Insert a new sparse inode chunk into the associated inode btree. The inode
544 : * record for the sparse chunk is pre-aligned to a startino that should match
545 : * any pre-existing sparse inode record in the tree. This allows sparse chunks
546 : * to fill over time.
547 : *
548 : * This function supports two modes of handling preexisting records depending on
549 : * the merge flag. If merge is true, the provided record is merged with the
550 : * existing record and updated in place. The merged record is returned in nrec.
551 : * If merge is false, an existing record is replaced with the provided record.
552 : * If no preexisting record exists, the provided record is always inserted.
553 : *
554 : * It is considered corruption if a merge is requested and not possible. Given
555 : * the sparse inode alignment constraints, this should never happen.
556 : */
557 : STATIC int
558 549460 : xfs_inobt_insert_sprec(
559 : struct xfs_perag *pag,
560 : struct xfs_trans *tp,
561 : struct xfs_buf *agbp,
562 : int btnum,
563 : struct xfs_inobt_rec_incore *nrec, /* in/out: new/merged rec. */
564 : bool merge) /* merge or replace */
565 : {
566 549460 : struct xfs_mount *mp = pag->pag_mount;
567 549460 : struct xfs_btree_cur *cur;
568 549460 : int error;
569 549460 : int i;
570 549460 : struct xfs_inobt_rec_incore rec;
571 :
572 549460 : cur = xfs_inobt_init_cursor(pag, tp, agbp, btnum);
573 :
574 : /* the new record is pre-aligned so we know where to look */
575 549467 : error = xfs_inobt_lookup(cur, nrec->ir_startino, XFS_LOOKUP_EQ, &i);
576 549489 : if (error)
577 2 : goto error;
578 : /* if nothing there, insert a new record and return */
579 549487 : if (i == 0) {
580 522173 : error = xfs_inobt_insert_rec(cur, nrec->ir_holemask,
581 522173 : nrec->ir_count, nrec->ir_freecount,
582 : nrec->ir_free, &i);
583 522169 : if (error)
584 0 : goto error;
585 522169 : if (XFS_IS_CORRUPT(mp, i != 1)) {
586 0 : xfs_btree_mark_sick(cur);
587 0 : error = -EFSCORRUPTED;
588 0 : goto error;
589 : }
590 :
591 522169 : goto out;
592 : }
593 :
594 : /*
595 : * A record exists at this startino. Merge or replace the record
596 : * depending on what we've been asked to do.
597 : */
598 27314 : if (merge) {
599 27314 : error = xfs_inobt_get_rec(cur, &rec, &i);
600 27314 : if (error)
601 0 : goto error;
602 27314 : if (XFS_IS_CORRUPT(mp, i != 1)) {
603 0 : xfs_btree_mark_sick(cur);
604 0 : error = -EFSCORRUPTED;
605 0 : goto error;
606 : }
607 27314 : if (XFS_IS_CORRUPT(mp, rec.ir_startino != nrec->ir_startino)) {
608 0 : xfs_btree_mark_sick(cur);
609 0 : error = -EFSCORRUPTED;
610 0 : goto error;
611 : }
612 :
613 : /*
614 : * This should never fail. If we have coexisting records that
615 : * cannot merge, something is seriously wrong.
616 : */
617 27314 : if (XFS_IS_CORRUPT(mp, !__xfs_inobt_can_merge(nrec, &rec))) {
618 0 : xfs_btree_mark_sick(cur);
619 0 : error = -EFSCORRUPTED;
620 0 : goto error;
621 : }
622 :
623 27314 : trace_xfs_irec_merge_pre(mp, pag->pag_agno, rec.ir_startino,
624 : rec.ir_holemask, nrec->ir_startino,
625 : nrec->ir_holemask);
626 :
627 : /* merge to nrec to output the updated record */
628 27314 : __xfs_inobt_rec_merge(nrec, &rec);
629 :
630 27314 : trace_xfs_irec_merge_post(mp, pag->pag_agno, nrec->ir_startino,
631 : nrec->ir_holemask);
632 :
633 27314 : error = xfs_inobt_rec_check_count(mp, nrec);
634 27314 : if (error)
635 0 : goto error;
636 : }
637 :
638 27314 : error = xfs_inobt_update(cur, nrec);
639 27314 : if (error)
640 0 : goto error;
641 :
642 27314 : out:
643 549483 : xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
644 549483 : return 0;
645 2 : error:
646 2 : xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
647 2 : return error;
648 : }
649 :
650 : /*
651 : * Allocate new inodes in the allocation group specified by agbp. Returns 0 if
652 : * inodes were allocated in this AG; -EAGAIN if there was no space in this AG so
653 : * the caller knows it can try another AG, a hard -ENOSPC when over the maximum
654 : * inode count threshold, or the usual negative error code for other errors.
655 : */
656 : STATIC int
657 833134 : xfs_ialloc_ag_alloc(
658 : struct xfs_perag *pag,
659 : struct xfs_trans *tp,
660 : struct xfs_buf *agbp)
661 : {
662 833134 : struct xfs_agi *agi;
663 833134 : struct xfs_alloc_arg args;
664 833134 : int error;
665 833134 : xfs_agino_t newino; /* new first inode's number */
666 833134 : xfs_agino_t newlen; /* new number of inodes */
667 833134 : int isaligned = 0; /* inode allocation at stripe */
668 : /* unit boundary */
669 : /* init. to full chunk */
670 833134 : struct xfs_inobt_rec_incore rec;
671 833134 : struct xfs_ino_geometry *igeo = M_IGEO(tp->t_mountp);
672 833134 : uint16_t allocmask = (uint16_t) -1;
673 833134 : int do_sparse = 0;
674 :
675 833134 : memset(&args, 0, sizeof(args));
676 833134 : args.tp = tp;
677 833134 : args.mp = tp->t_mountp;
678 833134 : args.fsbno = NULLFSBLOCK;
679 833134 : args.oinfo = XFS_RMAP_OINFO_INODES;
680 833134 : args.pag = pag;
681 :
682 : #ifdef DEBUG
683 : /* randomly do sparse inode allocations */
684 833134 : if (xfs_has_sparseinodes(tp->t_mountp) &&
685 833129 : igeo->ialloc_min_blks < igeo->ialloc_blks)
686 833089 : do_sparse = get_random_u32_below(2);
687 : #endif
688 :
689 : /*
690 : * Locking will ensure that we don't have two callers in here
691 : * at one time.
692 : */
693 833133 : newlen = igeo->ialloc_inos;
694 833133 : if (igeo->maxicount &&
695 833129 : percpu_counter_read_positive(&args.mp->m_icount) + newlen >
696 : igeo->maxicount)
697 : return -ENOSPC;
698 833108 : args.minlen = args.maxlen = igeo->ialloc_blks;
699 : /*
700 : * First try to allocate inodes contiguous with the last-allocated
701 : * chunk of inodes. If the filesystem is striped, this will fill
702 : * an entire stripe unit with inodes.
703 : */
704 833108 : agi = agbp->b_addr;
705 833108 : newino = be32_to_cpu(agi->agi_newino);
706 833108 : args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
707 : igeo->ialloc_blks;
708 833108 : if (do_sparse)
709 414880 : goto sparse_alloc;
710 818619 : if (likely(newino != NULLAGINO &&
711 : (args.agbno < be32_to_cpu(agi->agi_length)))) {
712 399935 : args.prod = 1;
713 :
714 : /*
715 : * We need to take into account alignment here to ensure that
716 : * we don't modify the free list if we fail to have an exact
717 : * block. If we don't have an exact match, and every oher
718 : * attempt allocation attempt fails, we'll end up cancelling
719 : * a dirty transaction and shutting down.
720 : *
721 : * For an exact allocation, alignment must be 1,
722 : * however we need to take cluster alignment into account when
723 : * fixing up the freelist. Use the minalignslop field to
724 : * indicate that extra blocks might be required for alignment,
725 : * but not to use them in the actual exact allocation.
726 : */
727 399935 : args.alignment = 1;
728 399935 : args.minalignslop = igeo->cluster_align - 1;
729 :
730 : /* Allow space for the inode btree to split. */
731 399935 : args.minleft = igeo->inobt_maxlevels;
732 399935 : error = xfs_alloc_vextent_exact_bno(&args,
733 399935 : XFS_AGB_TO_FSB(args.mp, pag->pag_agno,
734 : args.agbno));
735 399929 : if (error)
736 : return error;
737 :
738 : /*
739 : * This request might have dirtied the transaction if the AG can
740 : * satisfy the request, but the exact block was not available.
741 : * If the allocation did fail, subsequent requests will relax
742 : * the exact agbno requirement and increase the alignment
743 : * instead. It is critical that the total size of the request
744 : * (len + alignment + slop) does not increase from this point
745 : * on, so reset minalignslop to ensure it is not included in
746 : * subsequent requests.
747 : */
748 399927 : args.minalignslop = 0;
749 : }
750 :
751 418220 : if (unlikely(args.fsbno == NULLFSBLOCK)) {
752 : /*
753 : * Set the alignment for the allocation.
754 : * If stripe alignment is turned on then align at stripe unit
755 : * boundary.
756 : * If the cluster size is smaller than a filesystem block
757 : * then we're doing I/O for inodes in filesystem block size
758 : * pieces, so don't need alignment anyway.
759 : */
760 341227 : isaligned = 0;
761 341227 : if (igeo->ialloc_align) {
762 0 : ASSERT(!xfs_has_noalign(args.mp));
763 0 : args.alignment = args.mp->m_dalign;
764 0 : isaligned = 1;
765 : } else
766 341227 : args.alignment = igeo->cluster_align;
767 : /*
768 : * Allocate a fixed-size extent of inodes.
769 : */
770 341227 : args.prod = 1;
771 : /*
772 : * Allow space for the inode btree to split.
773 : */
774 341227 : args.minleft = igeo->inobt_maxlevels;
775 341227 : error = xfs_alloc_vextent_near_bno(&args,
776 341227 : XFS_AGB_TO_FSB(args.mp, pag->pag_agno,
777 : be32_to_cpu(agi->agi_root)));
778 341222 : if (error)
779 : return error;
780 : }
781 :
782 : /*
783 : * If stripe alignment is turned on, then try again with cluster
784 : * alignment.
785 : */
786 341215 : if (isaligned && args.fsbno == NULLFSBLOCK) {
787 0 : args.alignment = igeo->cluster_align;
788 0 : error = xfs_alloc_vextent_near_bno(&args,
789 0 : XFS_AGB_TO_FSB(args.mp, pag->pag_agno,
790 : be32_to_cpu(agi->agi_root)));
791 0 : if (error)
792 : return error;
793 : }
794 :
795 : /*
796 : * Finally, try a sparse allocation if the filesystem supports it and
797 : * the sparse allocation length is smaller than a full chunk.
798 : */
799 418208 : if (xfs_has_sparseinodes(args.mp) &&
800 418216 : igeo->ialloc_min_blks < igeo->ialloc_blks &&
801 418176 : args.fsbno == NULLFSBLOCK) {
802 143596 : sparse_alloc:
803 558476 : args.alignment = args.mp->m_sb.sb_spino_align;
804 558476 : args.prod = 1;
805 :
806 558476 : args.minlen = igeo->ialloc_min_blks;
807 558476 : args.maxlen = args.minlen;
808 :
809 : /*
810 : * The inode record will be aligned to full chunk size. We must
811 : * prevent sparse allocation from AG boundaries that result in
812 : * invalid inode records, such as records that start at agbno 0
813 : * or extend beyond the AG.
814 : *
815 : * Set min agbno to the first aligned, non-zero agbno and max to
816 : * the last aligned agbno that is at least one full chunk from
817 : * the end of the AG.
818 : */
819 558476 : args.min_agbno = args.mp->m_sb.sb_inoalignmt;
820 558476 : args.max_agbno = round_down(args.mp->m_sb.sb_agblocks,
821 558476 : args.mp->m_sb.sb_inoalignmt) -
822 558476 : igeo->ialloc_blks;
823 :
824 558476 : error = xfs_alloc_vextent_near_bno(&args,
825 558476 : XFS_AGB_TO_FSB(args.mp, pag->pag_agno,
826 : be32_to_cpu(agi->agi_root)));
827 558475 : if (error)
828 : return error;
829 :
830 558467 : newlen = XFS_AGB_TO_AGINO(args.mp, args.len);
831 558467 : ASSERT(newlen <= XFS_INODES_PER_CHUNK);
832 558467 : allocmask = (1 << (newlen / XFS_INODES_PER_HOLEMASK_BIT)) - 1;
833 : }
834 :
835 833079 : if (args.fsbno == NULLFSBLOCK)
836 : return -EAGAIN;
837 :
838 549361 : ASSERT(args.len == args.minlen);
839 :
840 : /*
841 : * Stamp and write the inode buffers.
842 : *
843 : * Seed the new inode cluster with a random generation number. This
844 : * prevents short-term reuse of generation numbers if a chunk is
845 : * freed and then immediately reallocated. We use random numbers
846 : * rather than a linear progression to prevent the next generation
847 : * number from being easily guessable.
848 : */
849 549361 : error = xfs_ialloc_inode_init(args.mp, tp, NULL, newlen, pag->pag_agno,
850 : args.agbno, args.len, get_random_u32());
851 :
852 549364 : if (error)
853 : return error;
854 : /*
855 : * Convert the results.
856 : */
857 549364 : newino = XFS_AGB_TO_AGINO(args.mp, args.agbno);
858 :
859 549364 : if (xfs_inobt_issparse(~allocmask)) {
860 : /*
861 : * We've allocated a sparse chunk. Align the startino and mask.
862 : */
863 274742 : xfs_align_sparse_ino(args.mp, &newino, &allocmask);
864 :
865 274732 : rec.ir_startino = newino;
866 274732 : rec.ir_holemask = ~allocmask;
867 274732 : rec.ir_count = newlen;
868 274732 : rec.ir_freecount = newlen;
869 274732 : rec.ir_free = XFS_INOBT_ALL_FREE;
870 :
871 : /*
872 : * Insert the sparse record into the inobt and allow for a merge
873 : * if necessary. If a merge does occur, rec is updated to the
874 : * merged record.
875 : */
876 274732 : error = xfs_inobt_insert_sprec(pag, tp, agbp,
877 : XFS_BTNUM_INO, &rec, true);
878 274743 : if (error == -EFSCORRUPTED) {
879 0 : xfs_alert(args.mp,
880 : "invalid sparse inode record: ino 0x%llx holemask 0x%x count %u",
881 : XFS_AGINO_TO_INO(args.mp, pag->pag_agno,
882 : rec.ir_startino),
883 : rec.ir_holemask, rec.ir_count);
884 0 : xfs_force_shutdown(args.mp, SHUTDOWN_CORRUPT_INCORE);
885 : }
886 274743 : if (error)
887 : return error;
888 :
889 : /*
890 : * We can't merge the part we've just allocated as for the inobt
891 : * due to finobt semantics. The original record may or may not
892 : * exist independent of whether physical inodes exist in this
893 : * sparse chunk.
894 : *
895 : * We must update the finobt record based on the inobt record.
896 : * rec contains the fully merged and up to date inobt record
897 : * from the previous call. Set merge false to replace any
898 : * existing record with this one.
899 : */
900 274743 : if (xfs_has_finobt(args.mp)) {
901 274734 : error = xfs_inobt_insert_sprec(pag, tp, agbp,
902 : XFS_BTNUM_FINO, &rec, false);
903 274742 : if (error)
904 : return error;
905 : }
906 : } else {
907 : /* full chunk - insert new records to both btrees */
908 274622 : error = xfs_inobt_insert(pag, tp, agbp, newino, newlen,
909 : XFS_BTNUM_INO);
910 274622 : if (error)
911 : return error;
912 :
913 274621 : if (xfs_has_finobt(args.mp)) {
914 274606 : error = xfs_inobt_insert(pag, tp, agbp, newino,
915 : newlen, XFS_BTNUM_FINO);
916 274604 : if (error)
917 : return error;
918 : }
919 : }
920 :
921 : /*
922 : * Update AGI counts and newino.
923 : */
924 549368 : be32_add_cpu(&agi->agi_count, newlen);
925 549366 : be32_add_cpu(&agi->agi_freecount, newlen);
926 549366 : pag->pagi_freecount += newlen;
927 549366 : pag->pagi_count += newlen;
928 549366 : agi->agi_newino = cpu_to_be32(newino);
929 :
930 : /*
931 : * Log allocation group header fields
932 : */
933 549366 : xfs_ialloc_log_agi(tp, agbp,
934 : XFS_AGI_COUNT | XFS_AGI_FREECOUNT | XFS_AGI_NEWINO);
935 : /*
936 : * Modify/log superblock values for inode count and inode free count.
937 : */
938 549368 : xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen);
939 549368 : xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen);
940 549368 : return 0;
941 : }
942 :
943 : /*
944 : * Try to retrieve the next record to the left/right from the current one.
945 : */
946 : STATIC int
947 522 : xfs_ialloc_next_rec(
948 : struct xfs_btree_cur *cur,
949 : xfs_inobt_rec_incore_t *rec,
950 : int *done,
951 : int left)
952 : {
953 522 : int error;
954 522 : int i;
955 :
956 522 : if (left)
957 257 : error = xfs_btree_decrement(cur, 0, &i);
958 : else
959 265 : error = xfs_btree_increment(cur, 0, &i);
960 :
961 522 : if (error)
962 : return error;
963 522 : *done = !i;
964 522 : if (i) {
965 281 : error = xfs_inobt_get_rec(cur, rec, &i);
966 281 : if (error)
967 : return error;
968 281 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
969 0 : xfs_btree_mark_sick(cur);
970 0 : return -EFSCORRUPTED;
971 : }
972 : }
973 :
974 : return 0;
975 : }
976 :
977 : STATIC int
978 272 : xfs_ialloc_get_rec(
979 : struct xfs_btree_cur *cur,
980 : xfs_agino_t agino,
981 : xfs_inobt_rec_incore_t *rec,
982 : int *done)
983 : {
984 272 : int error;
985 272 : int i;
986 :
987 272 : error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_EQ, &i);
988 272 : if (error)
989 : return error;
990 272 : *done = !i;
991 272 : if (i) {
992 140 : error = xfs_inobt_get_rec(cur, rec, &i);
993 140 : if (error)
994 : return error;
995 140 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
996 0 : xfs_btree_mark_sick(cur);
997 0 : return -EFSCORRUPTED;
998 : }
999 : }
1000 :
1001 : return 0;
1002 : }
1003 :
1004 : /*
1005 : * Return the offset of the first free inode in the record. If the inode chunk
1006 : * is sparsely allocated, we convert the record holemask to inode granularity
1007 : * and mask off the unallocated regions from the inode free mask.
1008 : */
1009 : STATIC int
1010 69441637 : xfs_inobt_first_free_inode(
1011 : struct xfs_inobt_rec_incore *rec)
1012 : {
1013 69441637 : xfs_inofree_t realfree;
1014 :
1015 : /* if there are no holes, return the first available offset */
1016 69441637 : if (!xfs_inobt_issparse(rec->ir_holemask))
1017 46390560 : return xfs_lowbit64(rec->ir_free);
1018 :
1019 23051077 : realfree = xfs_inobt_irec_to_allocmask(rec);
1020 23051313 : realfree &= rec->ir_free;
1021 :
1022 46102626 : return xfs_lowbit64(realfree);
1023 : }
1024 :
1025 : /*
1026 : * If this AG has corrupt inodes, check if allocating this inode would fail
1027 : * with corruption errors. Returns 0 if we're clear, or EAGAIN to try again
1028 : * somewhere else.
1029 : */
1030 : static int
1031 0 : xfs_dialloc_check_ino(
1032 : struct xfs_perag *pag,
1033 : struct xfs_trans *tp,
1034 : xfs_ino_t ino)
1035 : {
1036 0 : struct xfs_imap imap;
1037 0 : struct xfs_buf *bp;
1038 0 : int error;
1039 :
1040 0 : error = xfs_imap(pag, tp, ino, &imap, 0);
1041 0 : if (error)
1042 : return -EAGAIN;
1043 :
1044 0 : error = xfs_imap_to_bp(pag->pag_mount, tp, &imap, &bp);
1045 0 : if (error)
1046 : return -EAGAIN;
1047 :
1048 0 : xfs_trans_brelse(tp, bp);
1049 0 : return 0;
1050 : }
1051 :
1052 : /*
1053 : * Allocate an inode using the inobt-only algorithm.
1054 : */
1055 : STATIC int
1056 1395 : xfs_dialloc_ag_inobt(
1057 : struct xfs_perag *pag,
1058 : struct xfs_trans *tp,
1059 : struct xfs_buf *agbp,
1060 : xfs_ino_t parent,
1061 : xfs_ino_t *inop)
1062 : {
1063 1395 : struct xfs_mount *mp = tp->t_mountp;
1064 1395 : struct xfs_agi *agi = agbp->b_addr;
1065 1395 : xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent);
1066 1395 : xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent);
1067 1395 : struct xfs_btree_cur *cur, *tcur;
1068 1395 : struct xfs_inobt_rec_incore rec, trec;
1069 1395 : xfs_ino_t ino;
1070 1395 : int error;
1071 1395 : int offset;
1072 1395 : int i, j;
1073 1395 : int searchdistance = 10;
1074 :
1075 2790 : ASSERT(xfs_perag_initialised_agi(pag));
1076 2790 : ASSERT(xfs_perag_allows_inodes(pag));
1077 1395 : ASSERT(pag->pagi_freecount > 0);
1078 :
1079 1395 : restart_pagno:
1080 1395 : cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO);
1081 : /*
1082 : * If pagino is 0 (this is the root inode allocation) use newino.
1083 : * This must work because we've just allocated some.
1084 : */
1085 1395 : if (!pagino)
1086 28 : pagino = be32_to_cpu(agi->agi_newino);
1087 :
1088 1395 : error = xfs_check_agi_freecount(cur);
1089 1395 : if (error)
1090 0 : goto error0;
1091 :
1092 : /*
1093 : * If in the same AG as the parent, try to get near the parent.
1094 : */
1095 1395 : if (pagno == pag->pag_agno) {
1096 1234 : int doneleft; /* done, to the left */
1097 1234 : int doneright; /* done, to the right */
1098 :
1099 1234 : error = xfs_inobt_lookup(cur, pagino, XFS_LOOKUP_LE, &i);
1100 1234 : if (error)
1101 0 : goto error0;
1102 1234 : if (XFS_IS_CORRUPT(mp, i != 1)) {
1103 0 : xfs_btree_mark_sick(cur);
1104 0 : error = -EFSCORRUPTED;
1105 0 : goto error0;
1106 : }
1107 :
1108 1234 : error = xfs_inobt_get_rec(cur, &rec, &j);
1109 1234 : if (error)
1110 0 : goto error0;
1111 1234 : if (XFS_IS_CORRUPT(mp, j != 1)) {
1112 0 : xfs_btree_mark_sick(cur);
1113 0 : error = -EFSCORRUPTED;
1114 0 : goto error0;
1115 : }
1116 :
1117 1234 : if (rec.ir_freecount > 0) {
1118 : /*
1119 : * Found a free inode in the same chunk
1120 : * as the parent, done.
1121 : */
1122 1234 : goto alloc_inode;
1123 : }
1124 :
1125 :
1126 : /*
1127 : * In the same AG as parent, but parent's chunk is full.
1128 : */
1129 :
1130 : /* duplicate the cursor, search left & right simultaneously */
1131 386 : error = xfs_btree_dup_cursor(cur, &tcur);
1132 386 : if (error)
1133 0 : goto error0;
1134 :
1135 : /*
1136 : * Skip to last blocks looked up if same parent inode.
1137 : */
1138 386 : if (pagino != NULLAGINO &&
1139 386 : pag->pagl_pagino == pagino &&
1140 136 : pag->pagl_leftrec != NULLAGINO &&
1141 136 : pag->pagl_rightrec != NULLAGINO) {
1142 136 : error = xfs_ialloc_get_rec(tcur, pag->pagl_leftrec,
1143 : &trec, &doneleft);
1144 136 : if (error)
1145 0 : goto error1;
1146 :
1147 136 : error = xfs_ialloc_get_rec(cur, pag->pagl_rightrec,
1148 : &rec, &doneright);
1149 136 : if (error)
1150 0 : goto error1;
1151 : } else {
1152 : /* search left with tcur, back up 1 record */
1153 250 : error = xfs_ialloc_next_rec(tcur, &trec, &doneleft, 1);
1154 250 : if (error)
1155 0 : goto error1;
1156 :
1157 : /* search right with cur, go forward 1 record. */
1158 250 : error = xfs_ialloc_next_rec(cur, &rec, &doneright, 0);
1159 250 : if (error)
1160 0 : goto error1;
1161 : }
1162 :
1163 : /*
1164 : * Loop until we find an inode chunk with a free inode.
1165 : */
1166 408 : while (--searchdistance > 0 && (!doneleft || !doneright)) {
1167 408 : int useleft; /* using left inode chunk this time */
1168 :
1169 : /* figure out the closer block if both are valid. */
1170 408 : if (!doneleft && !doneright) {
1171 20 : useleft = pagino -
1172 20 : (trec.ir_startino + XFS_INODES_PER_CHUNK - 1) <
1173 20 : rec.ir_startino - pagino;
1174 : } else {
1175 388 : useleft = !doneleft;
1176 : }
1177 :
1178 : /* free inodes to the left? */
1179 408 : if (useleft && trec.ir_freecount) {
1180 17 : xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1181 17 : cur = tcur;
1182 :
1183 17 : pag->pagl_leftrec = trec.ir_startino;
1184 17 : pag->pagl_rightrec = rec.ir_startino;
1185 17 : pag->pagl_pagino = pagino;
1186 17 : rec = trec;
1187 17 : goto alloc_inode;
1188 : }
1189 :
1190 : /* free inodes to the right? */
1191 391 : if (!useleft && rec.ir_freecount) {
1192 369 : xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
1193 :
1194 369 : pag->pagl_leftrec = trec.ir_startino;
1195 369 : pag->pagl_rightrec = rec.ir_startino;
1196 369 : pag->pagl_pagino = pagino;
1197 369 : goto alloc_inode;
1198 : }
1199 :
1200 : /* get next record to check */
1201 22 : if (useleft) {
1202 7 : error = xfs_ialloc_next_rec(tcur, &trec,
1203 : &doneleft, 1);
1204 : } else {
1205 15 : error = xfs_ialloc_next_rec(cur, &rec,
1206 : &doneright, 0);
1207 : }
1208 22 : if (error)
1209 0 : goto error1;
1210 : }
1211 :
1212 0 : if (searchdistance <= 0) {
1213 : /*
1214 : * Not in range - save last search
1215 : * location and allocate a new inode
1216 : */
1217 0 : xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
1218 0 : pag->pagl_leftrec = trec.ir_startino;
1219 0 : pag->pagl_rightrec = rec.ir_startino;
1220 0 : pag->pagl_pagino = pagino;
1221 :
1222 : } else {
1223 : /*
1224 : * We've reached the end of the btree. because
1225 : * we are only searching a small chunk of the
1226 : * btree each search, there is obviously free
1227 : * inodes closer to the parent inode than we
1228 : * are now. restart the search again.
1229 : */
1230 0 : pag->pagl_pagino = NULLAGINO;
1231 0 : pag->pagl_leftrec = NULLAGINO;
1232 0 : pag->pagl_rightrec = NULLAGINO;
1233 0 : xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
1234 0 : xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1235 0 : goto restart_pagno;
1236 : }
1237 : }
1238 :
1239 : /*
1240 : * In a different AG from the parent.
1241 : * See if the most recently allocated block has any free.
1242 : */
1243 161 : if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
1244 161 : error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino),
1245 : XFS_LOOKUP_EQ, &i);
1246 161 : if (error)
1247 0 : goto error0;
1248 :
1249 161 : if (i == 1) {
1250 161 : error = xfs_inobt_get_rec(cur, &rec, &j);
1251 161 : if (error)
1252 0 : goto error0;
1253 :
1254 161 : if (j == 1 && rec.ir_freecount > 0) {
1255 : /*
1256 : * The last chunk allocated in the group
1257 : * still has a free inode.
1258 : */
1259 161 : goto alloc_inode;
1260 : }
1261 : }
1262 : }
1263 :
1264 : /*
1265 : * None left in the last group, search the whole AG
1266 : */
1267 0 : error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
1268 0 : if (error)
1269 0 : goto error0;
1270 0 : if (XFS_IS_CORRUPT(mp, i != 1)) {
1271 0 : xfs_btree_mark_sick(cur);
1272 0 : error = -EFSCORRUPTED;
1273 0 : goto error0;
1274 : }
1275 :
1276 0 : for (;;) {
1277 0 : error = xfs_inobt_get_rec(cur, &rec, &i);
1278 0 : if (error)
1279 0 : goto error0;
1280 0 : if (XFS_IS_CORRUPT(mp, i != 1)) {
1281 0 : xfs_btree_mark_sick(cur);
1282 0 : error = -EFSCORRUPTED;
1283 0 : goto error0;
1284 : }
1285 0 : if (rec.ir_freecount > 0)
1286 : break;
1287 0 : error = xfs_btree_increment(cur, 0, &i);
1288 0 : if (error)
1289 0 : goto error0;
1290 0 : if (XFS_IS_CORRUPT(mp, i != 1)) {
1291 0 : xfs_btree_mark_sick(cur);
1292 0 : error = -EFSCORRUPTED;
1293 0 : goto error0;
1294 : }
1295 : }
1296 :
1297 0 : alloc_inode:
1298 1395 : offset = xfs_inobt_first_free_inode(&rec);
1299 1395 : ASSERT(offset >= 0);
1300 1395 : ASSERT(offset < XFS_INODES_PER_CHUNK);
1301 1395 : ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
1302 : XFS_INODES_PER_CHUNK) == 0);
1303 1395 : ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, rec.ir_startino + offset);
1304 :
1305 1395 : if (xfs_ag_has_sickness(pag, XFS_SICK_AG_INODES)) {
1306 0 : error = xfs_dialloc_check_ino(pag, tp, ino);
1307 0 : if (error)
1308 0 : goto error0;
1309 : }
1310 :
1311 1395 : rec.ir_free &= ~XFS_INOBT_MASK(offset);
1312 1395 : rec.ir_freecount--;
1313 1395 : error = xfs_inobt_update(cur, &rec);
1314 1395 : if (error)
1315 0 : goto error0;
1316 1395 : be32_add_cpu(&agi->agi_freecount, -1);
1317 1395 : xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
1318 1395 : pag->pagi_freecount--;
1319 :
1320 1395 : error = xfs_check_agi_freecount(cur);
1321 1395 : if (error)
1322 0 : goto error0;
1323 :
1324 1395 : xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1325 1395 : xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
1326 1395 : *inop = ino;
1327 1395 : return 0;
1328 : error1:
1329 0 : xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
1330 0 : error0:
1331 0 : xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
1332 0 : return error;
1333 : }
1334 :
1335 : /*
1336 : * Use the free inode btree to allocate an inode based on distance from the
1337 : * parent. Note that the provided cursor may be deleted and replaced.
1338 : */
1339 : STATIC int
1340 64442250 : xfs_dialloc_ag_finobt_near(
1341 : xfs_agino_t pagino,
1342 : struct xfs_btree_cur **ocur,
1343 : struct xfs_inobt_rec_incore *rec)
1344 : {
1345 64442250 : struct xfs_btree_cur *lcur = *ocur; /* left search cursor */
1346 64442250 : struct xfs_btree_cur *rcur; /* right search cursor */
1347 64442250 : struct xfs_inobt_rec_incore rrec;
1348 64442250 : int error;
1349 64442250 : int i, j;
1350 :
1351 64442250 : error = xfs_inobt_lookup(lcur, pagino, XFS_LOOKUP_LE, &i);
1352 64439916 : if (error)
1353 : return error;
1354 :
1355 64439916 : if (i == 1) {
1356 6756183 : error = xfs_inobt_get_rec(lcur, rec, &i);
1357 6756230 : if (error)
1358 : return error;
1359 6756230 : if (XFS_IS_CORRUPT(lcur->bc_mp, i != 1)) {
1360 0 : xfs_btree_mark_sick(lcur);
1361 0 : return -EFSCORRUPTED;
1362 : }
1363 :
1364 : /*
1365 : * See if we've landed in the parent inode record. The finobt
1366 : * only tracks chunks with at least one free inode, so record
1367 : * existence is enough.
1368 : */
1369 6756230 : if (pagino >= rec->ir_startino &&
1370 6756276 : pagino < (rec->ir_startino + XFS_INODES_PER_CHUNK))
1371 : return 0;
1372 : }
1373 :
1374 62073323 : error = xfs_btree_dup_cursor(lcur, &rcur);
1375 62076329 : if (error)
1376 : return error;
1377 :
1378 62076710 : error = xfs_inobt_lookup(rcur, pagino, XFS_LOOKUP_GE, &j);
1379 62074539 : if (error)
1380 0 : goto error_rcur;
1381 62074539 : if (j == 1) {
1382 60255618 : error = xfs_inobt_get_rec(rcur, &rrec, &j);
1383 60257029 : if (error)
1384 0 : goto error_rcur;
1385 60257029 : if (XFS_IS_CORRUPT(lcur->bc_mp, j != 1)) {
1386 0 : xfs_btree_mark_sick(lcur);
1387 0 : error = -EFSCORRUPTED;
1388 0 : goto error_rcur;
1389 : }
1390 : }
1391 :
1392 62075950 : if (XFS_IS_CORRUPT(lcur->bc_mp, i != 1 && j != 1)) {
1393 0 : xfs_btree_mark_sick(lcur);
1394 0 : error = -EFSCORRUPTED;
1395 0 : goto error_rcur;
1396 : }
1397 62075950 : if (i == 1 && j == 1) {
1398 : /*
1399 : * Both the left and right records are valid. Choose the closer
1400 : * inode chunk to the target.
1401 : */
1402 2570594 : if ((pagino - rec->ir_startino + XFS_INODES_PER_CHUNK - 1) >
1403 2570594 : (rrec.ir_startino - pagino)) {
1404 1095337 : *rec = rrec;
1405 1095337 : xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR);
1406 1095335 : *ocur = rcur;
1407 : } else {
1408 1475257 : xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR);
1409 : }
1410 59505356 : } else if (j == 1) {
1411 : /* only the right record is valid */
1412 57686295 : *rec = rrec;
1413 57686295 : xfs_btree_del_cursor(lcur, XFS_BTREE_NOERROR);
1414 57685962 : *ocur = rcur;
1415 1819061 : } else if (i == 1) {
1416 : /* only the left record is valid */
1417 1819060 : xfs_btree_del_cursor(rcur, XFS_BTREE_NOERROR);
1418 : }
1419 :
1420 : return 0;
1421 :
1422 0 : error_rcur:
1423 0 : xfs_btree_del_cursor(rcur, XFS_BTREE_ERROR);
1424 0 : return error;
1425 : }
1426 :
1427 : /*
1428 : * Use the free inode btree to find a free inode based on a newino hint. If
1429 : * the hint is NULL, find the first free inode in the AG.
1430 : */
1431 : STATIC int
1432 4995084 : xfs_dialloc_ag_finobt_newino(
1433 : struct xfs_agi *agi,
1434 : struct xfs_btree_cur *cur,
1435 : struct xfs_inobt_rec_incore *rec)
1436 : {
1437 4995084 : int error;
1438 4995084 : int i;
1439 :
1440 4995084 : if (agi->agi_newino != cpu_to_be32(NULLAGINO)) {
1441 4971371 : error = xfs_inobt_lookup(cur, be32_to_cpu(agi->agi_newino),
1442 : XFS_LOOKUP_EQ, &i);
1443 4971309 : if (error)
1444 : return error;
1445 4971309 : if (i == 1) {
1446 4895145 : error = xfs_inobt_get_rec(cur, rec, &i);
1447 4895214 : if (error)
1448 : return error;
1449 4895214 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
1450 0 : xfs_btree_mark_sick(cur);
1451 0 : return -EFSCORRUPTED;
1452 : }
1453 : return 0;
1454 : }
1455 : }
1456 :
1457 : /*
1458 : * Find the first inode available in the AG.
1459 : */
1460 99877 : error = xfs_inobt_lookup(cur, 0, XFS_LOOKUP_GE, &i);
1461 99891 : if (error)
1462 : return error;
1463 99891 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
1464 0 : xfs_btree_mark_sick(cur);
1465 0 : return -EFSCORRUPTED;
1466 : }
1467 :
1468 99891 : error = xfs_inobt_get_rec(cur, rec, &i);
1469 99891 : if (error)
1470 : return error;
1471 99891 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
1472 0 : xfs_btree_mark_sick(cur);
1473 0 : return -EFSCORRUPTED;
1474 : }
1475 :
1476 : return 0;
1477 : }
1478 :
1479 : /*
1480 : * Update the inobt based on a modification made to the finobt. Also ensure that
1481 : * the records from both trees are equivalent post-modification.
1482 : */
1483 : STATIC int
1484 69436966 : xfs_dialloc_ag_update_inobt(
1485 : struct xfs_btree_cur *cur, /* inobt cursor */
1486 : struct xfs_inobt_rec_incore *frec, /* finobt record */
1487 : int offset) /* inode offset */
1488 : {
1489 69436966 : struct xfs_inobt_rec_incore rec;
1490 69436966 : int error;
1491 69436966 : int i;
1492 :
1493 69436966 : error = xfs_inobt_lookup(cur, frec->ir_startino, XFS_LOOKUP_EQ, &i);
1494 69438211 : if (error)
1495 : return error;
1496 69438138 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
1497 0 : xfs_btree_mark_sick(cur);
1498 0 : return -EFSCORRUPTED;
1499 : }
1500 :
1501 69438138 : error = xfs_inobt_get_rec(cur, &rec, &i);
1502 69436810 : if (error)
1503 : return error;
1504 69436810 : if (XFS_IS_CORRUPT(cur->bc_mp, i != 1)) {
1505 0 : xfs_btree_mark_sick(cur);
1506 0 : return -EFSCORRUPTED;
1507 : }
1508 69436810 : ASSERT((XFS_AGINO_TO_OFFSET(cur->bc_mp, rec.ir_startino) %
1509 : XFS_INODES_PER_CHUNK) == 0);
1510 :
1511 69436810 : rec.ir_free &= ~XFS_INOBT_MASK(offset);
1512 69436810 : rec.ir_freecount--;
1513 :
1514 69436810 : if (XFS_IS_CORRUPT(cur->bc_mp,
1515 : rec.ir_free != frec->ir_free ||
1516 : rec.ir_freecount != frec->ir_freecount)) {
1517 0 : xfs_btree_mark_sick(cur);
1518 0 : return -EFSCORRUPTED;
1519 : }
1520 :
1521 69436810 : return xfs_inobt_update(cur, &rec);
1522 : }
1523 :
1524 : /*
1525 : * Allocate an inode using the free inode btree, if available. Otherwise, fall
1526 : * back to the inobt search algorithm.
1527 : *
1528 : * The caller selected an AG for us, and made sure that free inodes are
1529 : * available.
1530 : */
1531 : static int
1532 69439433 : xfs_dialloc_ag(
1533 : struct xfs_perag *pag,
1534 : struct xfs_trans *tp,
1535 : struct xfs_buf *agbp,
1536 : xfs_ino_t parent,
1537 : xfs_ino_t *inop)
1538 : {
1539 69439433 : struct xfs_mount *mp = tp->t_mountp;
1540 69439433 : struct xfs_agi *agi = agbp->b_addr;
1541 69439433 : xfs_agnumber_t pagno = XFS_INO_TO_AGNO(mp, parent);
1542 69439433 : xfs_agino_t pagino = XFS_INO_TO_AGINO(mp, parent);
1543 69439433 : struct xfs_btree_cur *cur; /* finobt cursor */
1544 69439433 : struct xfs_btree_cur *icur; /* inobt cursor */
1545 69439433 : struct xfs_inobt_rec_incore rec;
1546 69439433 : xfs_ino_t ino;
1547 69439433 : int error;
1548 69439433 : int offset;
1549 69439433 : int i;
1550 :
1551 69439433 : if (!xfs_has_finobt(mp))
1552 1395 : return xfs_dialloc_ag_inobt(pag, tp, agbp, parent, inop);
1553 :
1554 : /*
1555 : * If pagino is 0 (this is the root inode allocation) use newino.
1556 : * This must work because we've just allocated some.
1557 : */
1558 69438038 : if (!pagino)
1559 8267 : pagino = be32_to_cpu(agi->agi_newino);
1560 :
1561 69438038 : cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_FINO);
1562 :
1563 69435938 : error = xfs_check_agi_freecount(cur);
1564 69437419 : if (error)
1565 277 : goto error_cur;
1566 :
1567 : /*
1568 : * The search algorithm depends on whether we're in the same AG as the
1569 : * parent. If so, find the closest available inode to the parent. If
1570 : * not, consider the agi hint or find the first free inode in the AG.
1571 : */
1572 69437142 : if (pag->pag_agno == pagno)
1573 64442073 : error = xfs_dialloc_ag_finobt_near(pagino, &cur, &rec);
1574 : else
1575 4995069 : error = xfs_dialloc_ag_finobt_newino(agi, cur, &rec);
1576 69436157 : if (error)
1577 0 : goto error_cur;
1578 :
1579 69436157 : offset = xfs_inobt_first_free_inode(&rec);
1580 69435581 : ASSERT(offset >= 0);
1581 69435581 : ASSERT(offset < XFS_INODES_PER_CHUNK);
1582 69435581 : ASSERT((XFS_AGINO_TO_OFFSET(mp, rec.ir_startino) %
1583 : XFS_INODES_PER_CHUNK) == 0);
1584 69435581 : ino = XFS_AGINO_TO_INO(mp, pag->pag_agno, rec.ir_startino + offset);
1585 :
1586 69435581 : if (xfs_ag_has_sickness(pag, XFS_SICK_AG_INODES)) {
1587 0 : error = xfs_dialloc_check_ino(pag, tp, ino);
1588 0 : if (error)
1589 0 : goto error_cur;
1590 : }
1591 :
1592 : /*
1593 : * Modify or remove the finobt record.
1594 : */
1595 69432392 : rec.ir_free &= ~XFS_INOBT_MASK(offset);
1596 69432392 : rec.ir_freecount--;
1597 69432392 : if (rec.ir_freecount)
1598 54994751 : error = xfs_inobt_update(cur, &rec);
1599 : else
1600 14437641 : error = xfs_btree_delete(cur, &i);
1601 69430646 : if (error)
1602 0 : goto error_cur;
1603 :
1604 : /*
1605 : * The finobt has now been updated appropriately. We haven't updated the
1606 : * agi and superblock yet, so we can create an inobt cursor and validate
1607 : * the original freecount. If all is well, make the equivalent update to
1608 : * the inobt using the finobt record and offset information.
1609 : */
1610 69430646 : icur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO);
1611 :
1612 69428419 : error = xfs_check_agi_freecount(icur);
1613 69438009 : if (error)
1614 20 : goto error_icur;
1615 :
1616 69437989 : error = xfs_dialloc_ag_update_inobt(icur, &rec, offset);
1617 69437681 : if (error)
1618 73 : goto error_icur;
1619 :
1620 : /*
1621 : * Both trees have now been updated. We must update the perag and
1622 : * superblock before we can check the freecount for each btree.
1623 : */
1624 69437608 : be32_add_cpu(&agi->agi_freecount, -1);
1625 69439585 : xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
1626 69438900 : pag->pagi_freecount--;
1627 :
1628 69438900 : xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -1);
1629 :
1630 69438084 : error = xfs_check_agi_freecount(icur);
1631 69437403 : if (error)
1632 0 : goto error_icur;
1633 69437403 : error = xfs_check_agi_freecount(cur);
1634 69439070 : if (error)
1635 0 : goto error_icur;
1636 :
1637 69439070 : xfs_btree_del_cursor(icur, XFS_BTREE_NOERROR);
1638 69439541 : xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
1639 69437612 : *inop = ino;
1640 69437612 : return 0;
1641 :
1642 93 : error_icur:
1643 93 : xfs_btree_del_cursor(icur, XFS_BTREE_ERROR);
1644 370 : error_cur:
1645 370 : xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
1646 370 : return error;
1647 : }
1648 :
1649 : static int
1650 549359 : xfs_dialloc_roll(
1651 : struct xfs_trans **tpp,
1652 : struct xfs_buf *agibp)
1653 : {
1654 549359 : struct xfs_trans *tp = *tpp;
1655 549359 : struct xfs_dquot_acct *dqinfo;
1656 549359 : int error;
1657 :
1658 : /*
1659 : * Hold to on to the agibp across the commit so no other allocation can
1660 : * come in and take the free inodes we just allocated for our caller.
1661 : */
1662 549359 : xfs_trans_bhold(tp, agibp);
1663 :
1664 : /*
1665 : * We want the quota changes to be associated with the next transaction,
1666 : * NOT this one. So, detach the dqinfo from this and attach it to the
1667 : * next transaction.
1668 : */
1669 549352 : dqinfo = tp->t_dqinfo;
1670 549352 : tp->t_dqinfo = NULL;
1671 :
1672 549352 : error = xfs_trans_roll(&tp);
1673 :
1674 : /* Re-attach the quota info that we detached from prev trx. */
1675 549357 : tp->t_dqinfo = dqinfo;
1676 :
1677 : /*
1678 : * Join the buffer even on commit error so that the buffer is released
1679 : * when the caller cancels the transaction and doesn't have to handle
1680 : * this error case specially.
1681 : */
1682 549357 : xfs_trans_bjoin(tp, agibp);
1683 549367 : *tpp = tp;
1684 549367 : return error;
1685 : }
1686 :
1687 : static bool
1688 71019266 : xfs_dialloc_good_ag(
1689 : struct xfs_perag *pag,
1690 : struct xfs_trans *tp,
1691 : umode_t mode,
1692 : int flags,
1693 : bool ok_alloc)
1694 : {
1695 71019266 : struct xfs_mount *mp = tp->t_mountp;
1696 71019266 : xfs_extlen_t ineed;
1697 71019266 : xfs_extlen_t longest = 0;
1698 71019266 : int needspace;
1699 71019266 : int error;
1700 :
1701 71019266 : if (!pag)
1702 : return false;
1703 142038532 : if (!xfs_perag_allows_inodes(pag))
1704 : return false;
1705 :
1706 142038532 : if (!xfs_perag_initialised_agi(pag)) {
1707 130 : error = xfs_ialloc_read_agi(pag, tp, NULL);
1708 130 : if (error)
1709 : return false;
1710 : }
1711 :
1712 71019266 : if (pag->pagi_freecount)
1713 : return true;
1714 2135132 : if (!ok_alloc)
1715 : return false;
1716 :
1717 1753408 : if (!xfs_perag_initialised_agf(pag)) {
1718 23 : error = xfs_alloc_read_agf(pag, tp, flags, NULL);
1719 23 : if (error)
1720 : return false;
1721 : }
1722 :
1723 : /*
1724 : * Check that there is enough free space for the file plus a chunk of
1725 : * inodes if we need to allocate some. If this is the first pass across
1726 : * the AGs, take into account the potential space needed for alignment
1727 : * of inode chunks when checking the longest contiguous free space in
1728 : * the AG - this prevents us from getting ENOSPC because we have free
1729 : * space larger than ialloc_blks but alignment constraints prevent us
1730 : * from using it.
1731 : *
1732 : * If we can't find an AG with space for full alignment slack to be
1733 : * taken into account, we must be near ENOSPC in all AGs. Hence we
1734 : * don't include alignment for the second pass and so if we fail
1735 : * allocation due to alignment issues then it is most likely a real
1736 : * ENOSPC condition.
1737 : *
1738 : * XXX(dgc): this calculation is now bogus thanks to the per-ag
1739 : * reservations that xfs_alloc_fix_freelist() now does via
1740 : * xfs_alloc_space_available(). When the AG fills up, pagf_freeblks will
1741 : * be more than large enough for the check below to succeed, but
1742 : * xfs_alloc_space_available() will fail because of the non-zero
1743 : * metadata reservation and hence we won't actually be able to allocate
1744 : * more inodes in this AG. We do soooo much unnecessary work near ENOSPC
1745 : * because of this.
1746 : */
1747 876687 : ineed = M_IGEO(mp)->ialloc_min_blks;
1748 876687 : if (flags && ineed > 1)
1749 868345 : ineed += M_IGEO(mp)->cluster_align;
1750 876687 : longest = pag->pagf_longest;
1751 876687 : if (!longest)
1752 21 : longest = pag->pagf_flcount > 0;
1753 876687 : needspace = S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode);
1754 :
1755 876687 : if (pag->pagf_freeblks < needspace + ineed || longest < ineed)
1756 34975 : return false;
1757 : return true;
1758 : }
1759 :
1760 : static int
1761 69724032 : xfs_dialloc_try_ag(
1762 : struct xfs_perag *pag,
1763 : struct xfs_trans **tpp,
1764 : xfs_ino_t parent,
1765 : xfs_ino_t *new_ino,
1766 : bool ok_alloc)
1767 : {
1768 69724032 : struct xfs_buf *agbp;
1769 69724032 : xfs_ino_t ino;
1770 69724032 : int error;
1771 :
1772 : /*
1773 : * Then read in the AGI buffer and recheck with the AGI buffer
1774 : * lock held.
1775 : */
1776 69724032 : error = xfs_ialloc_read_agi(pag, *tpp, &agbp);
1777 69724867 : if (error)
1778 : return error;
1779 :
1780 69724822 : if (!pag->pagi_freecount) {
1781 835069 : if (!ok_alloc) {
1782 1937 : error = -EAGAIN;
1783 1937 : goto out_release;
1784 : }
1785 :
1786 833132 : error = xfs_ialloc_ag_alloc(pag, *tpp, agbp);
1787 833127 : if (error < 0)
1788 283763 : goto out_release;
1789 :
1790 : /*
1791 : * We successfully allocated space for an inode cluster in this
1792 : * AG. Roll the transaction so that we can allocate one of the
1793 : * new inodes.
1794 : */
1795 549364 : ASSERT(pag->pagi_freecount > 0);
1796 549364 : error = xfs_dialloc_roll(tpp, agbp);
1797 549351 : if (error)
1798 0 : goto out_release;
1799 : }
1800 :
1801 : /* Allocate an inode in the found AG */
1802 69439104 : error = xfs_dialloc_ag(pag, *tpp, agbp, parent, &ino);
1803 69440480 : if (!error)
1804 69441522 : *new_ino = ino;
1805 : return error;
1806 :
1807 285700 : out_release:
1808 285700 : xfs_trans_brelse(*tpp, agbp);
1809 285700 : return error;
1810 : }
1811 :
1812 : /*
1813 : * Allocate an on-disk inode.
1814 : *
1815 : * Mode is used to tell whether the new inode is a directory and hence where to
1816 : * locate it. The on-disk inode that is allocated will be returned in @new_ino
1817 : * on success, otherwise an error will be set to indicate the failure (e.g.
1818 : * -ENOSPC).
1819 : */
1820 : int
1821 69580537 : xfs_dialloc(
1822 : struct xfs_trans **tpp,
1823 : xfs_ino_t parent,
1824 : umode_t mode,
1825 : xfs_ino_t *new_ino)
1826 : {
1827 69580537 : struct xfs_mount *mp = (*tpp)->t_mountp;
1828 69580537 : xfs_agnumber_t agno;
1829 69580537 : int error = 0;
1830 69580537 : xfs_agnumber_t start_agno;
1831 69580537 : struct xfs_perag *pag;
1832 69580537 : struct xfs_ino_geometry *igeo = M_IGEO(mp);
1833 69580537 : bool ok_alloc = true;
1834 69580537 : bool low_space = false;
1835 69580537 : int flags;
1836 69580537 : xfs_ino_t ino = NULLFSINO;
1837 :
1838 : /*
1839 : * Directories, symlinks, and regular files frequently allocate at least
1840 : * one block, so factor that potential expansion when we examine whether
1841 : * an AG has enough space for file creation.
1842 : */
1843 69580537 : if (S_ISDIR(mode))
1844 6312068 : start_agno = (atomic_inc_return(&mp->m_agirotor) - 1) %
1845 6312068 : mp->m_maxagi;
1846 : else {
1847 63268469 : start_agno = XFS_INO_TO_AGNO(mp, parent);
1848 63268469 : if (start_agno >= mp->m_maxagi)
1849 0 : start_agno = 0;
1850 : }
1851 :
1852 : /*
1853 : * If we have already hit the ceiling of inode blocks then clear
1854 : * ok_alloc so we scan all available agi structures for a free
1855 : * inode.
1856 : *
1857 : * Read rough value of mp->m_icount by percpu_counter_read_positive,
1858 : * which will sacrifice the preciseness but improve the performance.
1859 : */
1860 69580537 : if (igeo->maxicount &&
1861 69581669 : percpu_counter_read_positive(&mp->m_icount) + igeo->ialloc_inos
1862 : > igeo->maxicount) {
1863 146363 : ok_alloc = false;
1864 : }
1865 :
1866 : /*
1867 : * If we are near to ENOSPC, we want to prefer allocation from AGs that
1868 : * have free inodes in them rather than use up free space allocating new
1869 : * inode chunks. Hence we turn off allocation for the first non-blocking
1870 : * pass through the AGs if we are near ENOSPC to consume free inodes
1871 : * that we can immediately allocate, but then we allow allocation on the
1872 : * second pass if we fail to find an AG with free inodes in it.
1873 : */
1874 69580537 : if (percpu_counter_read_positive(&mp->m_fdblocks) <
1875 69580537 : mp->m_low_space[XFS_LOWSP_1_PCNT]) {
1876 232223 : ok_alloc = false;
1877 232223 : low_space = true;
1878 : }
1879 :
1880 : /*
1881 : * Loop until we find an allocation group that either has free inodes
1882 : * or in which we can allocate some inodes. Iterate through the
1883 : * allocation groups upward, wrapping at the end.
1884 : */
1885 69580537 : flags = XFS_ALLOC_FLAG_TRYLOCK;
1886 69731850 : retry:
1887 71310847 : for_each_perag_wrap_at(mp, start_agno, mp->m_maxagi, agno, pag) {
1888 71020224 : if (xfs_dialloc_good_ag(pag, *tpp, mode, flags, ok_alloc)) {
1889 69724263 : error = xfs_dialloc_try_ag(pag, tpp, parent,
1890 : &ino, ok_alloc);
1891 69727429 : if (error != -EAGAIN)
1892 : break;
1893 : error = 0;
1894 : }
1895 :
1896 3157994 : if (xfs_is_shutdown(mp)) {
1897 : error = -EFSCORRUPTED;
1898 : break;
1899 : }
1900 : }
1901 69739275 : if (pag)
1902 69441217 : xfs_perag_rele(pag);
1903 69736040 : if (error)
1904 460 : return error;
1905 69735580 : if (ino == NULLFSINO) {
1906 294208 : if (flags) {
1907 151313 : flags = 0;
1908 151313 : if (low_space)
1909 4594 : ok_alloc = true;
1910 151313 : goto retry;
1911 : }
1912 : return -ENOSPC;
1913 : }
1914 69441372 : *new_ino = ino;
1915 69441372 : return 0;
1916 : }
1917 :
1918 : /*
1919 : * Free the blocks of an inode chunk. We must consider that the inode chunk
1920 : * might be sparse and only free the regions that are allocated as part of the
1921 : * chunk.
1922 : */
1923 : static int
1924 78397 : xfs_difree_inode_chunk(
1925 : struct xfs_trans *tp,
1926 : xfs_agnumber_t agno,
1927 : struct xfs_inobt_rec_incore *rec)
1928 : {
1929 78397 : struct xfs_mount *mp = tp->t_mountp;
1930 78397 : xfs_agblock_t sagbno = XFS_AGINO_TO_AGBNO(mp,
1931 : rec->ir_startino);
1932 78397 : int startidx, endidx;
1933 78397 : int nextbit;
1934 78397 : xfs_agblock_t agbno;
1935 78397 : int contigblk;
1936 78397 : DECLARE_BITMAP(holemask, XFS_INOBT_HOLEMASK_BITS);
1937 :
1938 78397 : if (!xfs_inobt_issparse(rec->ir_holemask)) {
1939 : /* not sparse, calculate extent info directly */
1940 58803 : return xfs_free_extent_later(tp,
1941 58803 : XFS_AGB_TO_FSB(mp, agno, sagbno),
1942 58803 : M_IGEO(mp)->ialloc_blks, &XFS_RMAP_OINFO_INODES,
1943 : XFS_AG_RESV_NONE);
1944 : }
1945 :
1946 : /* holemask is only 16-bits (fits in an unsigned long) */
1947 19594 : ASSERT(sizeof(rec->ir_holemask) <= sizeof(holemask[0]));
1948 19594 : holemask[0] = rec->ir_holemask;
1949 :
1950 : /*
1951 : * Find contiguous ranges of zeroes (i.e., allocated regions) in the
1952 : * holemask and convert the start/end index of each range to an extent.
1953 : * We start with the start and end index both pointing at the first 0 in
1954 : * the mask.
1955 : */
1956 19594 : startidx = endidx = find_first_zero_bit(holemask,
1957 : XFS_INOBT_HOLEMASK_BITS);
1958 19594 : nextbit = startidx + 1;
1959 176317 : while (startidx < XFS_INOBT_HOLEMASK_BITS) {
1960 156729 : int error;
1961 :
1962 156729 : nextbit = find_next_zero_bit(holemask, XFS_INOBT_HOLEMASK_BITS,
1963 : nextbit);
1964 : /*
1965 : * If the next zero bit is contiguous, update the end index of
1966 : * the current range and continue.
1967 : */
1968 156721 : if (nextbit != XFS_INOBT_HOLEMASK_BITS &&
1969 137128 : nextbit == endidx + 1) {
1970 137132 : endidx = nextbit;
1971 137132 : goto next;
1972 : }
1973 :
1974 : /*
1975 : * nextbit is not contiguous with the current end index. Convert
1976 : * the current start/end to an extent and add it to the free
1977 : * list.
1978 : */
1979 19589 : agbno = sagbno + (startidx * XFS_INODES_PER_HOLEMASK_BIT) /
1980 19589 : mp->m_sb.sb_inopblock;
1981 19589 : contigblk = ((endidx - startidx + 1) *
1982 19589 : XFS_INODES_PER_HOLEMASK_BIT) /
1983 : mp->m_sb.sb_inopblock;
1984 :
1985 19589 : ASSERT(agbno % mp->m_sb.sb_spino_align == 0);
1986 19589 : ASSERT(contigblk % mp->m_sb.sb_spino_align == 0);
1987 19589 : error = xfs_free_extent_later(tp,
1988 19589 : XFS_AGB_TO_FSB(mp, agno, agbno), contigblk,
1989 : &XFS_RMAP_OINFO_INODES, XFS_AG_RESV_NONE);
1990 19591 : if (error)
1991 0 : return error;
1992 :
1993 : /* reset range to current bit and carry on... */
1994 : startidx = endidx = nextbit;
1995 :
1996 156723 : next:
1997 156723 : nextbit++;
1998 : }
1999 : return 0;
2000 : }
2001 :
2002 : STATIC int
2003 47541314 : xfs_difree_inobt(
2004 : struct xfs_perag *pag,
2005 : struct xfs_trans *tp,
2006 : struct xfs_buf *agbp,
2007 : xfs_agino_t agino,
2008 : struct xfs_icluster *xic,
2009 : struct xfs_inobt_rec_incore *orec)
2010 : {
2011 47541314 : struct xfs_mount *mp = pag->pag_mount;
2012 47541314 : struct xfs_agi *agi = agbp->b_addr;
2013 47541314 : struct xfs_btree_cur *cur;
2014 47541314 : struct xfs_inobt_rec_incore rec;
2015 47541314 : int ilen;
2016 47541314 : int error;
2017 47541314 : int i;
2018 47541314 : int off;
2019 :
2020 47541314 : ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
2021 95082628 : ASSERT(XFS_AGINO_TO_AGBNO(mp, agino) < be32_to_cpu(agi->agi_length));
2022 :
2023 : /*
2024 : * Initialize the cursor.
2025 : */
2026 47541314 : cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO);
2027 :
2028 47540339 : error = xfs_check_agi_freecount(cur);
2029 47544222 : if (error)
2030 5 : goto error0;
2031 :
2032 : /*
2033 : * Look for the entry describing this inode.
2034 : */
2035 47544217 : if ((error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i))) {
2036 0 : xfs_warn(mp, "%s: xfs_inobt_lookup() returned error %d.",
2037 : __func__, error);
2038 0 : goto error0;
2039 : }
2040 47546858 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2041 0 : xfs_btree_mark_sick(cur);
2042 0 : error = -EFSCORRUPTED;
2043 0 : goto error0;
2044 : }
2045 47546858 : error = xfs_inobt_get_rec(cur, &rec, &i);
2046 47549529 : if (error) {
2047 0 : xfs_warn(mp, "%s: xfs_inobt_get_rec() returned error %d.",
2048 : __func__, error);
2049 0 : goto error0;
2050 : }
2051 47549529 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2052 0 : xfs_btree_mark_sick(cur);
2053 0 : error = -EFSCORRUPTED;
2054 0 : goto error0;
2055 : }
2056 : /*
2057 : * Get the offset in the inode chunk.
2058 : */
2059 47549529 : off = agino - rec.ir_startino;
2060 47549529 : ASSERT(off >= 0 && off < XFS_INODES_PER_CHUNK);
2061 47549529 : ASSERT(!(rec.ir_free & XFS_INOBT_MASK(off)));
2062 : /*
2063 : * Mark the inode free & increment the count.
2064 : */
2065 47549529 : rec.ir_free |= XFS_INOBT_MASK(off);
2066 47549529 : rec.ir_freecount++;
2067 :
2068 : /*
2069 : * When an inode chunk is free, it becomes eligible for removal. Don't
2070 : * remove the chunk if the block size is large enough for multiple inode
2071 : * chunks (that might not be free).
2072 : */
2073 47549529 : if (!xfs_has_ikeep(mp) && rec.ir_free == XFS_INOBT_ALL_FREE &&
2074 78404 : mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) {
2075 78395 : xic->deleted = true;
2076 78395 : xic->first_ino = XFS_AGINO_TO_INO(mp, pag->pag_agno,
2077 : rec.ir_startino);
2078 78395 : xic->alloc = xfs_inobt_irec_to_allocmask(&rec);
2079 :
2080 : /*
2081 : * Remove the inode cluster from the AGI B+Tree, adjust the
2082 : * AGI and Superblock inode counts, and mark the disk space
2083 : * to be freed when the transaction is committed.
2084 : */
2085 78396 : ilen = rec.ir_freecount;
2086 78396 : be32_add_cpu(&agi->agi_count, -ilen);
2087 78402 : be32_add_cpu(&agi->agi_freecount, -(ilen - 1));
2088 78402 : xfs_ialloc_log_agi(tp, agbp, XFS_AGI_COUNT | XFS_AGI_FREECOUNT);
2089 78393 : pag->pagi_freecount -= ilen - 1;
2090 78393 : pag->pagi_count -= ilen;
2091 78393 : xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, -ilen);
2092 78392 : xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, -(ilen - 1));
2093 :
2094 78392 : if ((error = xfs_btree_delete(cur, &i))) {
2095 0 : xfs_warn(mp, "%s: xfs_btree_delete returned error %d.",
2096 : __func__, error);
2097 0 : goto error0;
2098 : }
2099 :
2100 78375 : error = xfs_difree_inode_chunk(tp, pag->pag_agno, &rec);
2101 78395 : if (error)
2102 0 : goto error0;
2103 : } else {
2104 47471134 : xic->deleted = false;
2105 :
2106 47471134 : error = xfs_inobt_update(cur, &rec);
2107 47471036 : if (error) {
2108 0 : xfs_warn(mp, "%s: xfs_inobt_update returned error %d.",
2109 : __func__, error);
2110 0 : goto error0;
2111 : }
2112 :
2113 : /*
2114 : * Change the inode free counts and log the ag/sb changes.
2115 : */
2116 47471036 : be32_add_cpu(&agi->agi_freecount, 1);
2117 47468878 : xfs_ialloc_log_agi(tp, agbp, XFS_AGI_FREECOUNT);
2118 47456754 : pag->pagi_freecount++;
2119 47456754 : xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, 1);
2120 : }
2121 :
2122 47535218 : error = xfs_check_agi_freecount(cur);
2123 47535088 : if (error)
2124 0 : goto error0;
2125 :
2126 47535088 : *orec = rec;
2127 47535088 : xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
2128 47535088 : return 0;
2129 :
2130 5 : error0:
2131 5 : xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
2132 5 : return error;
2133 : }
2134 :
2135 : /*
2136 : * Free an inode in the free inode btree.
2137 : */
2138 : STATIC int
2139 47545957 : xfs_difree_finobt(
2140 : struct xfs_perag *pag,
2141 : struct xfs_trans *tp,
2142 : struct xfs_buf *agbp,
2143 : xfs_agino_t agino,
2144 : struct xfs_inobt_rec_incore *ibtrec) /* inobt record */
2145 : {
2146 47545957 : struct xfs_mount *mp = pag->pag_mount;
2147 47545957 : struct xfs_btree_cur *cur;
2148 47545957 : struct xfs_inobt_rec_incore rec;
2149 47545957 : int offset = agino - ibtrec->ir_startino;
2150 47545957 : int error;
2151 47545957 : int i;
2152 :
2153 47545957 : cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_FINO);
2154 :
2155 47539722 : error = xfs_inobt_lookup(cur, ibtrec->ir_startino, XFS_LOOKUP_EQ, &i);
2156 47549470 : if (error)
2157 3 : goto error;
2158 47549467 : if (i == 0) {
2159 : /*
2160 : * If the record does not exist in the finobt, we must have just
2161 : * freed an inode in a previously fully allocated chunk. If not,
2162 : * something is out of sync.
2163 : */
2164 13973178 : if (XFS_IS_CORRUPT(mp, ibtrec->ir_freecount != 1)) {
2165 0 : xfs_btree_mark_sick(cur);
2166 0 : error = -EFSCORRUPTED;
2167 0 : goto error;
2168 : }
2169 :
2170 13973178 : error = xfs_inobt_insert_rec(cur, ibtrec->ir_holemask,
2171 : ibtrec->ir_count,
2172 : ibtrec->ir_freecount,
2173 : ibtrec->ir_free, &i);
2174 13972588 : if (error)
2175 0 : goto error;
2176 13972588 : ASSERT(i == 1);
2177 :
2178 13972588 : goto out;
2179 : }
2180 :
2181 : /*
2182 : * Read and update the existing record. We could just copy the ibtrec
2183 : * across here, but that would defeat the purpose of having redundant
2184 : * metadata. By making the modifications independently, we can catch
2185 : * corruptions that we wouldn't see if we just copied from one record
2186 : * to another.
2187 : */
2188 33576289 : error = xfs_inobt_get_rec(cur, &rec, &i);
2189 33576696 : if (error)
2190 0 : goto error;
2191 33576696 : if (XFS_IS_CORRUPT(mp, i != 1)) {
2192 0 : xfs_btree_mark_sick(cur);
2193 0 : error = -EFSCORRUPTED;
2194 0 : goto error;
2195 : }
2196 :
2197 33576696 : rec.ir_free |= XFS_INOBT_MASK(offset);
2198 33576696 : rec.ir_freecount++;
2199 :
2200 33576696 : if (XFS_IS_CORRUPT(mp,
2201 : rec.ir_free != ibtrec->ir_free ||
2202 : rec.ir_freecount != ibtrec->ir_freecount)) {
2203 0 : xfs_btree_mark_sick(cur);
2204 0 : error = -EFSCORRUPTED;
2205 0 : goto error;
2206 : }
2207 :
2208 : /*
2209 : * The content of inobt records should always match between the inobt
2210 : * and finobt. The lifecycle of records in the finobt is different from
2211 : * the inobt in that the finobt only tracks records with at least one
2212 : * free inode. Hence, if all of the inodes are free and we aren't
2213 : * keeping inode chunks permanently on disk, remove the record.
2214 : * Otherwise, update the record with the new information.
2215 : *
2216 : * Note that we currently can't free chunks when the block size is large
2217 : * enough for multiple chunks. Leave the finobt record to remain in sync
2218 : * with the inobt.
2219 : */
2220 33576696 : if (!xfs_has_ikeep(mp) && rec.ir_free == XFS_INOBT_ALL_FREE &&
2221 78405 : mp->m_sb.sb_inopblock <= XFS_INODES_PER_CHUNK) {
2222 78402 : error = xfs_btree_delete(cur, &i);
2223 78402 : if (error)
2224 0 : goto error;
2225 78402 : ASSERT(i == 1);
2226 : } else {
2227 33498294 : error = xfs_inobt_update(cur, &rec);
2228 33498252 : if (error)
2229 0 : goto error;
2230 : }
2231 :
2232 33498252 : out:
2233 47549242 : error = xfs_check_agi_freecount(cur);
2234 47546727 : if (error)
2235 0 : goto error;
2236 :
2237 47546727 : xfs_btree_del_cursor(cur, XFS_BTREE_NOERROR);
2238 47546727 : return 0;
2239 :
2240 3 : error:
2241 3 : xfs_btree_del_cursor(cur, XFS_BTREE_ERROR);
2242 3 : return error;
2243 : }
2244 :
2245 : /*
2246 : * Free disk inode. Carefully avoids touching the incore inode, all
2247 : * manipulations incore are the caller's responsibility.
2248 : * The on-disk inode is not changed by this operation, only the
2249 : * btree (free inode mask) is changed.
2250 : */
2251 : int
2252 47539537 : xfs_difree(
2253 : struct xfs_trans *tp,
2254 : struct xfs_perag *pag,
2255 : xfs_ino_t inode,
2256 : struct xfs_icluster *xic)
2257 : {
2258 : /* REFERENCED */
2259 47539537 : xfs_agblock_t agbno; /* block number containing inode */
2260 47539537 : struct xfs_buf *agbp; /* buffer for allocation group header */
2261 47539537 : xfs_agino_t agino; /* allocation group inode number */
2262 47539537 : int error; /* error return value */
2263 47539537 : struct xfs_mount *mp = tp->t_mountp;
2264 47539537 : struct xfs_inobt_rec_incore rec;/* btree record */
2265 :
2266 : /*
2267 : * Break up inode number into its components.
2268 : */
2269 47539537 : if (pag->pag_agno != XFS_INO_TO_AGNO(mp, inode)) {
2270 0 : xfs_warn(mp, "%s: agno != pag->pag_agno (%d != %d).",
2271 : __func__, XFS_INO_TO_AGNO(mp, inode), pag->pag_agno);
2272 0 : ASSERT(0);
2273 0 : return -EINVAL;
2274 : }
2275 47539537 : agino = XFS_INO_TO_AGINO(mp, inode);
2276 47539537 : if (inode != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) {
2277 0 : xfs_warn(mp, "%s: inode != XFS_AGINO_TO_INO() (%llu != %llu).",
2278 : __func__, (unsigned long long)inode,
2279 : (unsigned long long)XFS_AGINO_TO_INO(mp, pag->pag_agno, agino));
2280 0 : ASSERT(0);
2281 0 : return -EINVAL;
2282 : }
2283 47539537 : agbno = XFS_AGINO_TO_AGBNO(mp, agino);
2284 47539537 : if (agbno >= mp->m_sb.sb_agblocks) {
2285 0 : xfs_warn(mp, "%s: agbno >= mp->m_sb.sb_agblocks (%d >= %d).",
2286 : __func__, agbno, mp->m_sb.sb_agblocks);
2287 0 : ASSERT(0);
2288 0 : return -EINVAL;
2289 : }
2290 : /*
2291 : * Get the allocation group header.
2292 : */
2293 47539537 : error = xfs_ialloc_read_agi(pag, tp, &agbp);
2294 47541421 : if (error) {
2295 114 : xfs_warn(mp, "%s: xfs_ialloc_read_agi() returned error %d.",
2296 : __func__, error);
2297 114 : return error;
2298 : }
2299 :
2300 : /*
2301 : * Fix up the inode allocation btree.
2302 : */
2303 47541307 : error = xfs_difree_inobt(pag, tp, agbp, agino, xic, &rec);
2304 47544237 : if (error)
2305 5 : goto error0;
2306 :
2307 : /*
2308 : * Fix up the free inode btree.
2309 : */
2310 47544232 : if (xfs_has_finobt(mp)) {
2311 47541187 : error = xfs_difree_finobt(pag, tp, agbp, agino, &rec);
2312 47548778 : if (error)
2313 3 : goto error0;
2314 : }
2315 :
2316 : return 0;
2317 :
2318 : error0:
2319 : return error;
2320 : }
2321 :
2322 : STATIC int
2323 424301368 : xfs_imap_lookup(
2324 : struct xfs_perag *pag,
2325 : struct xfs_trans *tp,
2326 : xfs_agino_t agino,
2327 : xfs_agblock_t agbno,
2328 : xfs_agblock_t *chunk_agbno,
2329 : xfs_agblock_t *offset_agbno,
2330 : int flags)
2331 : {
2332 424301368 : struct xfs_mount *mp = pag->pag_mount;
2333 424301368 : struct xfs_inobt_rec_incore rec;
2334 424301368 : struct xfs_btree_cur *cur;
2335 424301368 : struct xfs_buf *agbp;
2336 424301368 : int error;
2337 424301368 : int i;
2338 :
2339 424301368 : error = xfs_ialloc_read_agi(pag, tp, &agbp);
2340 424392849 : if (error) {
2341 88 : xfs_alert(mp,
2342 : "%s: xfs_ialloc_read_agi() returned error %d, agno %d",
2343 : __func__, error, pag->pag_agno);
2344 88 : return error;
2345 : }
2346 :
2347 : /*
2348 : * Lookup the inode record for the given agino. If the record cannot be
2349 : * found, then it's an invalid inode number and we should abort. Once
2350 : * we have a record, we need to ensure it contains the inode number
2351 : * we are looking up.
2352 : */
2353 424392761 : cur = xfs_inobt_init_cursor(pag, tp, agbp, XFS_BTNUM_INO);
2354 424394332 : error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
2355 424399047 : if (!error) {
2356 424396436 : if (i)
2357 424385606 : error = xfs_inobt_get_rec(cur, &rec, &i);
2358 424407643 : if (!error && i == 0)
2359 1203 : error = -EINVAL;
2360 : }
2361 :
2362 424410254 : xfs_trans_brelse(tp, agbp);
2363 424396696 : xfs_btree_del_cursor(cur, error);
2364 424398960 : if (error)
2365 : return error;
2366 :
2367 : /* check that the returned record contains the required inode */
2368 424397739 : if (rec.ir_startino > agino ||
2369 424397739 : rec.ir_startino + M_IGEO(mp)->ialloc_inos <= agino)
2370 : return -EINVAL;
2371 :
2372 : /* for untrusted inodes check it is allocated first */
2373 424310870 : if ((flags & XFS_IGET_UNTRUSTED) &&
2374 424308306 : (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)))
2375 : return -EINVAL;
2376 :
2377 424307724 : *chunk_agbno = XFS_AGINO_TO_AGBNO(mp, rec.ir_startino);
2378 424307724 : *offset_agbno = agbno - *chunk_agbno;
2379 424307724 : return 0;
2380 : }
2381 :
2382 : /*
2383 : * Return the location of the inode in imap, for mapping it into a buffer.
2384 : */
2385 : int
2386 457003441 : xfs_imap(
2387 : struct xfs_perag *pag,
2388 : struct xfs_trans *tp,
2389 : xfs_ino_t ino, /* inode to locate */
2390 : struct xfs_imap *imap, /* location map structure */
2391 : uint flags) /* flags for inode btree lookup */
2392 : {
2393 457003441 : struct xfs_mount *mp = pag->pag_mount;
2394 457003441 : xfs_agblock_t agbno; /* block number of inode in the alloc group */
2395 457003441 : xfs_agino_t agino; /* inode number within alloc group */
2396 457003441 : xfs_agblock_t chunk_agbno; /* first block in inode chunk */
2397 457003441 : xfs_agblock_t cluster_agbno; /* first block in inode cluster */
2398 457003441 : int error; /* error code */
2399 457003441 : int offset; /* index of inode in its buffer */
2400 457003441 : xfs_agblock_t offset_agbno; /* blks from chunk start to inode */
2401 :
2402 457003441 : ASSERT(ino != NULLFSINO);
2403 :
2404 : /*
2405 : * Split up the inode number into its parts.
2406 : */
2407 457003441 : agino = XFS_INO_TO_AGINO(mp, ino);
2408 457003441 : agbno = XFS_AGINO_TO_AGBNO(mp, agino);
2409 457003441 : if (agbno >= mp->m_sb.sb_agblocks ||
2410 456985707 : ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) {
2411 17065 : error = -EINVAL;
2412 : #ifdef DEBUG
2413 : /*
2414 : * Don't output diagnostic information for untrusted inodes
2415 : * as they can be invalid without implying corruption.
2416 : */
2417 17065 : if (flags & XFS_IGET_UNTRUSTED)
2418 : return error;
2419 0 : if (agbno >= mp->m_sb.sb_agblocks) {
2420 0 : xfs_alert(mp,
2421 : "%s: agbno (0x%llx) >= mp->m_sb.sb_agblocks (0x%lx)",
2422 : __func__, (unsigned long long)agbno,
2423 : (unsigned long)mp->m_sb.sb_agblocks);
2424 : }
2425 0 : if (ino != XFS_AGINO_TO_INO(mp, pag->pag_agno, agino)) {
2426 0 : xfs_alert(mp,
2427 : "%s: ino (0x%llx) != XFS_AGINO_TO_INO() (0x%llx)",
2428 : __func__, ino,
2429 : XFS_AGINO_TO_INO(mp, pag->pag_agno, agino));
2430 : }
2431 0 : xfs_stack_trace();
2432 : #endif /* DEBUG */
2433 0 : return error;
2434 : }
2435 :
2436 : /*
2437 : * For bulkstat and handle lookups, we have an untrusted inode number
2438 : * that we have to verify is valid. We cannot do this just by reading
2439 : * the inode buffer as it may have been unlinked and removed leaving
2440 : * inodes in stale state on disk. Hence we have to do a btree lookup
2441 : * in all cases where an untrusted inode number is passed.
2442 : */
2443 456986376 : if (flags & XFS_IGET_UNTRUSTED) {
2444 424372220 : error = xfs_imap_lookup(pag, tp, agino, agbno,
2445 : &chunk_agbno, &offset_agbno, flags);
2446 424378968 : if (error)
2447 : return error;
2448 424308392 : goto out_map;
2449 : }
2450 :
2451 : /*
2452 : * If the inode cluster size is the same as the blocksize or
2453 : * smaller we get to the buffer by simple arithmetics.
2454 : */
2455 32614156 : if (M_IGEO(mp)->blocks_per_cluster == 1) {
2456 2446 : offset = XFS_INO_TO_OFFSET(mp, ino);
2457 2446 : ASSERT(offset < mp->m_sb.sb_inopblock);
2458 :
2459 2446 : imap->im_blkno = XFS_AGB_TO_DADDR(mp, pag->pag_agno, agbno);
2460 2446 : imap->im_len = XFS_FSB_TO_BB(mp, 1);
2461 2446 : imap->im_boffset = (unsigned short)(offset <<
2462 2446 : mp->m_sb.sb_inodelog);
2463 2446 : return 0;
2464 : }
2465 :
2466 : /*
2467 : * If the inode chunks are aligned then use simple maths to
2468 : * find the location. Otherwise we have to do a btree
2469 : * lookup to find the location.
2470 : */
2471 32611710 : if (M_IGEO(mp)->inoalign_mask) {
2472 32611710 : offset_agbno = agbno & M_IGEO(mp)->inoalign_mask;
2473 32611710 : chunk_agbno = agbno - offset_agbno;
2474 : } else {
2475 0 : error = xfs_imap_lookup(pag, tp, agino, agbno,
2476 : &chunk_agbno, &offset_agbno, flags);
2477 0 : if (error)
2478 : return error;
2479 : }
2480 :
2481 0 : out_map:
2482 456920102 : ASSERT(agbno >= chunk_agbno);
2483 456920102 : cluster_agbno = chunk_agbno +
2484 456920102 : ((offset_agbno / M_IGEO(mp)->blocks_per_cluster) *
2485 : M_IGEO(mp)->blocks_per_cluster);
2486 456920102 : offset = ((agbno - cluster_agbno) * mp->m_sb.sb_inopblock) +
2487 456920102 : XFS_INO_TO_OFFSET(mp, ino);
2488 :
2489 456920102 : imap->im_blkno = XFS_AGB_TO_DADDR(mp, pag->pag_agno, cluster_agbno);
2490 456920102 : imap->im_len = XFS_FSB_TO_BB(mp, M_IGEO(mp)->blocks_per_cluster);
2491 456920102 : imap->im_boffset = (unsigned short)(offset << mp->m_sb.sb_inodelog);
2492 :
2493 : /*
2494 : * If the inode number maps to a block outside the bounds
2495 : * of the file system then return NULL rather than calling
2496 : * read_buf and panicing when we get an error from the
2497 : * driver.
2498 : */
2499 456920102 : if ((imap->im_blkno + imap->im_len) >
2500 456920102 : XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks)) {
2501 0 : xfs_alert(mp,
2502 : "%s: (im_blkno (0x%llx) + im_len (0x%llx)) > sb_dblocks (0x%llx)",
2503 : __func__, (unsigned long long) imap->im_blkno,
2504 : (unsigned long long) imap->im_len,
2505 : XFS_FSB_TO_BB(mp, mp->m_sb.sb_dblocks));
2506 0 : return -EINVAL;
2507 : }
2508 : return 0;
2509 : }
2510 :
2511 : /*
2512 : * Log specified fields for the ag hdr (inode section). The growth of the agi
2513 : * structure over time requires that we interpret the buffer as two logical
2514 : * regions delineated by the end of the unlinked list. This is due to the size
2515 : * of the hash table and its location in the middle of the agi.
2516 : *
2517 : * For example, a request to log a field before agi_unlinked and a field after
2518 : * agi_unlinked could cause us to log the entire hash table and use an excessive
2519 : * amount of log space. To avoid this behavior, log the region up through
2520 : * agi_unlinked in one call and the region after agi_unlinked through the end of
2521 : * the structure in another.
2522 : */
2523 : void
2524 117905162 : xfs_ialloc_log_agi(
2525 : struct xfs_trans *tp,
2526 : struct xfs_buf *bp,
2527 : uint32_t fields)
2528 : {
2529 117905162 : int first; /* first byte number */
2530 117905162 : int last; /* last byte number */
2531 117905162 : static const short offsets[] = { /* field starting offsets */
2532 : /* keep in sync with bit definitions */
2533 : offsetof(xfs_agi_t, agi_magicnum),
2534 : offsetof(xfs_agi_t, agi_versionnum),
2535 : offsetof(xfs_agi_t, agi_seqno),
2536 : offsetof(xfs_agi_t, agi_length),
2537 : offsetof(xfs_agi_t, agi_count),
2538 : offsetof(xfs_agi_t, agi_root),
2539 : offsetof(xfs_agi_t, agi_level),
2540 : offsetof(xfs_agi_t, agi_freecount),
2541 : offsetof(xfs_agi_t, agi_newino),
2542 : offsetof(xfs_agi_t, agi_dirino),
2543 : offsetof(xfs_agi_t, agi_unlinked),
2544 : offsetof(xfs_agi_t, agi_free_root),
2545 : offsetof(xfs_agi_t, agi_free_level),
2546 : offsetof(xfs_agi_t, agi_iblocks),
2547 : sizeof(xfs_agi_t)
2548 : };
2549 : #ifdef DEBUG
2550 117905162 : struct xfs_agi *agi = bp->b_addr;
2551 :
2552 117905162 : ASSERT(agi->agi_magicnum == cpu_to_be32(XFS_AGI_MAGIC));
2553 : #endif
2554 :
2555 : /*
2556 : * Compute byte offsets for the first and last fields in the first
2557 : * region and log the agi buffer. This only logs up through
2558 : * agi_unlinked.
2559 : */
2560 117905162 : if (fields & XFS_AGI_ALL_BITS_R1) {
2561 117871913 : xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R1,
2562 : &first, &last);
2563 117865491 : xfs_trans_log_buf(tp, bp, first, last);
2564 : }
2565 :
2566 : /*
2567 : * Mask off the bits in the first region and calculate the first and
2568 : * last field offsets for any bits in the second region.
2569 : */
2570 117903863 : fields &= ~XFS_AGI_ALL_BITS_R1;
2571 117903863 : if (fields) {
2572 60983 : xfs_btree_offsets(fields, offsets, XFS_AGI_NUM_BITS_R2,
2573 : &first, &last);
2574 60985 : xfs_trans_log_buf(tp, bp, first, last);
2575 : }
2576 117903866 : }
2577 :
2578 : static xfs_failaddr_t
2579 1129772 : xfs_agi_verify(
2580 : struct xfs_buf *bp)
2581 : {
2582 1129772 : struct xfs_mount *mp = bp->b_mount;
2583 1129772 : struct xfs_agi *agi = bp->b_addr;
2584 1129772 : xfs_failaddr_t fa;
2585 1129772 : uint32_t agi_seqno = be32_to_cpu(agi->agi_seqno);
2586 1129772 : uint32_t agi_length = be32_to_cpu(agi->agi_length);
2587 1129772 : int i;
2588 :
2589 1129772 : if (xfs_has_crc(mp)) {
2590 1127356 : if (!uuid_equal(&agi->agi_uuid, &mp->m_sb.sb_meta_uuid))
2591 0 : return __this_address;
2592 1127357 : if (!xfs_log_check_lsn(mp, be64_to_cpu(agi->agi_lsn)))
2593 0 : return __this_address;
2594 : }
2595 :
2596 : /*
2597 : * Validate the magic number of the agi block.
2598 : */
2599 1129771 : if (!xfs_verify_magic(bp, agi->agi_magicnum))
2600 0 : return __this_address;
2601 1129770 : if (!XFS_AGI_GOOD_VERSION(be32_to_cpu(agi->agi_versionnum)))
2602 0 : return __this_address;
2603 :
2604 1129772 : fa = xfs_validate_ag_length(bp, agi_seqno, agi_length);
2605 1129772 : if (fa)
2606 : return fa;
2607 :
2608 1129771 : if (be32_to_cpu(agi->agi_level) < 1 ||
2609 2259542 : be32_to_cpu(agi->agi_level) > M_IGEO(mp)->inobt_maxlevels)
2610 3 : return __this_address;
2611 :
2612 1129768 : if (xfs_has_finobt(mp) &&
2613 2254634 : (be32_to_cpu(agi->agi_free_level) < 1 ||
2614 1127317 : be32_to_cpu(agi->agi_free_level) > M_IGEO(mp)->inobt_maxlevels))
2615 0 : return __this_address;
2616 :
2617 73434840 : for (i = 0; i < XFS_AGI_UNLINKED_BUCKETS; i++) {
2618 72305067 : if (agi->agi_unlinked[i] == cpu_to_be32(NULLAGINO))
2619 72004412 : continue;
2620 300655 : if (!xfs_verify_ino(mp, be32_to_cpu(agi->agi_unlinked[i])))
2621 0 : return __this_address;
2622 : }
2623 :
2624 : return NULL;
2625 : }
2626 :
2627 : static void
2628 224646 : xfs_agi_read_verify(
2629 : struct xfs_buf *bp)
2630 : {
2631 224646 : struct xfs_mount *mp = bp->b_mount;
2632 224646 : xfs_failaddr_t fa;
2633 :
2634 449176 : if (xfs_has_crc(mp) &&
2635 : !xfs_buf_verify_cksum(bp, XFS_AGI_CRC_OFF))
2636 2 : xfs_verifier_error(bp, -EFSBADCRC, __this_address);
2637 : else {
2638 224644 : fa = xfs_agi_verify(bp);
2639 224644 : if (XFS_TEST_ERROR(fa, mp, XFS_ERRTAG_IALLOC_READ_AGI))
2640 0 : xfs_verifier_error(bp, -EFSCORRUPTED, fa);
2641 : }
2642 224646 : }
2643 :
2644 : static void
2645 516335 : xfs_agi_write_verify(
2646 : struct xfs_buf *bp)
2647 : {
2648 516335 : struct xfs_mount *mp = bp->b_mount;
2649 516335 : struct xfs_buf_log_item *bip = bp->b_log_item;
2650 516335 : struct xfs_agi *agi = bp->b_addr;
2651 516335 : xfs_failaddr_t fa;
2652 :
2653 516335 : fa = xfs_agi_verify(bp);
2654 516335 : if (fa) {
2655 0 : xfs_verifier_error(bp, -EFSCORRUPTED, fa);
2656 0 : return;
2657 : }
2658 :
2659 516335 : if (!xfs_has_crc(mp))
2660 : return;
2661 :
2662 514039 : if (bip)
2663 508631 : agi->agi_lsn = cpu_to_be64(bip->bli_item.li_lsn);
2664 514039 : xfs_buf_update_cksum(bp, XFS_AGI_CRC_OFF);
2665 : }
2666 :
2667 : const struct xfs_buf_ops xfs_agi_buf_ops = {
2668 : .name = "xfs_agi",
2669 : .magic = { cpu_to_be32(XFS_AGI_MAGIC), cpu_to_be32(XFS_AGI_MAGIC) },
2670 : .verify_read = xfs_agi_read_verify,
2671 : .verify_write = xfs_agi_write_verify,
2672 : .verify_struct = xfs_agi_verify,
2673 : };
2674 :
2675 : /*
2676 : * Read in the allocation group header (inode allocation section)
2677 : */
2678 : int
2679 1517272287 : xfs_read_agi(
2680 : struct xfs_perag *pag,
2681 : struct xfs_trans *tp,
2682 : struct xfs_buf **agibpp)
2683 : {
2684 1517272287 : struct xfs_mount *mp = pag->pag_mount;
2685 1517272287 : int error;
2686 :
2687 1517272287 : trace_xfs_read_agi(pag->pag_mount, pag->pag_agno);
2688 :
2689 1517325634 : error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
2690 1517325634 : XFS_AG_DADDR(mp, pag->pag_agno, XFS_AGI_DADDR(mp)),
2691 : XFS_FSS_TO_BB(mp, 1), 0, agibpp, &xfs_agi_buf_ops);
2692 1517555397 : if (xfs_metadata_is_sick(error))
2693 2 : xfs_ag_mark_sick(pag, XFS_SICK_AG_AGI);
2694 1517555397 : if (error)
2695 : return error;
2696 1517547646 : if (tp)
2697 1517130515 : xfs_trans_buf_set_type(tp, *agibpp, XFS_BLFT_AGI_BUF);
2698 :
2699 1517541637 : xfs_buf_set_ref(*agibpp, XFS_AGI_REF);
2700 1517541637 : return 0;
2701 : }
2702 :
2703 : /*
2704 : * Read in the agi and initialise the per-ag data. If the caller supplies a
2705 : * @agibpp, return the locked AGI buffer to them, otherwise release it.
2706 : */
2707 : int
2708 1416605079 : xfs_ialloc_read_agi(
2709 : struct xfs_perag *pag,
2710 : struct xfs_trans *tp,
2711 : struct xfs_buf **agibpp)
2712 : {
2713 1416605079 : struct xfs_buf *agibp;
2714 1416605079 : struct xfs_agi *agi;
2715 1416605079 : int error;
2716 :
2717 1416605079 : trace_xfs_ialloc_read_agi(pag->pag_mount, pag->pag_agno);
2718 :
2719 1416664232 : error = xfs_read_agi(pag, tp, &agibp);
2720 1416818561 : if (error)
2721 : return error;
2722 :
2723 1416810838 : agi = agibp->b_addr;
2724 2833621676 : if (!xfs_perag_initialised_agi(pag)) {
2725 160190 : pag->pagi_freecount = be32_to_cpu(agi->agi_freecount);
2726 160190 : pag->pagi_count = be32_to_cpu(agi->agi_count);
2727 160190 : set_bit(XFS_AGSTATE_AGI_INIT, &pag->pag_opstate);
2728 : }
2729 :
2730 : /*
2731 : * It's possible for these to be out of sync if
2732 : * we are in the middle of a forced shutdown.
2733 : */
2734 2833621674 : ASSERT(pag->pagi_freecount == be32_to_cpu(agi->agi_freecount) ||
2735 : xfs_is_shutdown(pag->pag_mount));
2736 1416810837 : if (agibpp)
2737 1416765529 : *agibpp = agibp;
2738 : else
2739 45308 : xfs_trans_brelse(tp, agibp);
2740 : return 0;
2741 : }
2742 :
2743 : /* How many inodes are backed by inode clusters ondisk? */
2744 : STATIC int
2745 2541508260 : xfs_ialloc_count_ondisk(
2746 : struct xfs_btree_cur *cur,
2747 : xfs_agino_t low,
2748 : xfs_agino_t high,
2749 : unsigned int *allocated)
2750 : {
2751 2541508260 : struct xfs_inobt_rec_incore irec;
2752 2541508260 : unsigned int ret = 0;
2753 2541508260 : int has_record;
2754 2541508260 : int error;
2755 :
2756 2541508260 : error = xfs_inobt_lookup(cur, low, XFS_LOOKUP_LE, &has_record);
2757 2541246620 : if (error)
2758 : return error;
2759 :
2760 4636214909 : while (has_record) {
2761 3231232752 : unsigned int i, hole_idx;
2762 :
2763 3231232752 : error = xfs_inobt_get_rec(cur, &irec, &has_record);
2764 3230343273 : if (error)
2765 0 : return error;
2766 3230343273 : if (irec.ir_startino > high)
2767 : break;
2768 :
2769 >13570*10^7 : for (i = 0; i < XFS_INODES_PER_CHUNK; i++) {
2770 >13362*10^7 : if (irec.ir_startino + i < low)
2771 >13318*10^7 : continue;
2772 439098937 : if (irec.ir_startino + i > high)
2773 : break;
2774 :
2775 427105397 : hole_idx = i / XFS_INODES_PER_HOLEMASK_BIT;
2776 427105397 : if (!(irec.ir_holemask & (1U << hole_idx)))
2777 277538934 : ret++;
2778 : }
2779 :
2780 2094016923 : error = xfs_btree_increment(cur, 0, &has_record);
2781 2094968289 : if (error)
2782 0 : return error;
2783 : }
2784 :
2785 2541308507 : *allocated = ret;
2786 2541308507 : return 0;
2787 : }
2788 :
2789 : /* Is there an inode record covering a given extent? */
2790 : int
2791 2541496697 : xfs_ialloc_has_inodes_at_extent(
2792 : struct xfs_btree_cur *cur,
2793 : xfs_agblock_t bno,
2794 : xfs_extlen_t len,
2795 : enum xbtree_recpacking *outcome)
2796 : {
2797 2541496697 : xfs_agino_t agino;
2798 2541496697 : xfs_agino_t last_agino;
2799 2541496697 : unsigned int allocated;
2800 2541496697 : int error;
2801 :
2802 2541496697 : agino = XFS_AGB_TO_AGINO(cur->bc_mp, bno);
2803 2541496697 : last_agino = XFS_AGB_TO_AGINO(cur->bc_mp, bno + len) - 1;
2804 :
2805 2541496697 : error = xfs_ialloc_count_ondisk(cur, agino, last_agino, &allocated);
2806 2541362690 : if (error)
2807 : return error;
2808 :
2809 2541362690 : if (allocated == 0)
2810 2535858868 : *outcome = XBTREE_RECPACKING_EMPTY;
2811 5503822 : else if (allocated == last_agino - agino + 1)
2812 5503822 : *outcome = XBTREE_RECPACKING_FULL;
2813 : else
2814 0 : *outcome = XBTREE_RECPACKING_SPARSE;
2815 : return 0;
2816 : }
2817 :
2818 : struct xfs_ialloc_count_inodes {
2819 : xfs_agino_t count;
2820 : xfs_agino_t freecount;
2821 : };
2822 :
2823 : /* Record inode counts across all inobt records. */
2824 : STATIC int
2825 119985785 : xfs_ialloc_count_inodes_rec(
2826 : struct xfs_btree_cur *cur,
2827 : const union xfs_btree_rec *rec,
2828 : void *priv)
2829 : {
2830 119985785 : struct xfs_inobt_rec_incore irec;
2831 119985785 : struct xfs_ialloc_count_inodes *ci = priv;
2832 119985785 : xfs_failaddr_t fa;
2833 :
2834 119985785 : xfs_inobt_btrec_to_irec(cur->bc_mp, rec, &irec);
2835 119985620 : fa = xfs_inobt_check_irec(cur, &irec);
2836 119985955 : if (fa)
2837 0 : return xfs_inobt_complain_bad_rec(cur, fa, &irec);
2838 :
2839 119985955 : ci->count += irec.ir_count;
2840 119985955 : ci->freecount += irec.ir_freecount;
2841 :
2842 119985955 : return 0;
2843 : }
2844 :
2845 : /* Count allocated and free inodes under an inobt. */
2846 : int
2847 460481 : xfs_ialloc_count_inodes(
2848 : struct xfs_btree_cur *cur,
2849 : xfs_agino_t *count,
2850 : xfs_agino_t *freecount)
2851 : {
2852 460481 : struct xfs_ialloc_count_inodes ci = {0};
2853 460481 : int error;
2854 :
2855 460481 : ASSERT(cur->bc_btnum == XFS_BTNUM_INO);
2856 460481 : error = xfs_btree_query_all(cur, xfs_ialloc_count_inodes_rec, &ci);
2857 460481 : if (error)
2858 : return error;
2859 :
2860 460481 : *count = ci.count;
2861 460481 : *freecount = ci.freecount;
2862 460481 : return 0;
2863 : }
2864 :
2865 : /*
2866 : * Initialize inode-related geometry information.
2867 : *
2868 : * Compute the inode btree min and max levels and set maxicount.
2869 : *
2870 : * Set the inode cluster size. This may still be overridden by the file
2871 : * system block size if it is larger than the chosen cluster size.
2872 : *
2873 : * For v5 filesystems, scale the cluster size with the inode size to keep a
2874 : * constant ratio of inode per cluster buffer, but only if mkfs has set the
2875 : * inode alignment value appropriately for larger cluster sizes.
2876 : *
2877 : * Then compute the inode cluster alignment information.
2878 : */
2879 : void
2880 24119 : xfs_ialloc_setup_geometry(
2881 : struct xfs_mount *mp)
2882 : {
2883 24119 : struct xfs_sb *sbp = &mp->m_sb;
2884 24119 : struct xfs_ino_geometry *igeo = M_IGEO(mp);
2885 24119 : uint64_t icount;
2886 24119 : uint inodes;
2887 :
2888 24119 : igeo->new_diflags2 = 0;
2889 24119 : if (xfs_has_bigtime(mp))
2890 24040 : igeo->new_diflags2 |= XFS_DIFLAG2_BIGTIME;
2891 24119 : if (xfs_has_large_extent_counts(mp))
2892 24073 : igeo->new_diflags2 |= XFS_DIFLAG2_NREXT64;
2893 :
2894 : /* Compute inode btree geometry. */
2895 24119 : igeo->agino_log = sbp->sb_inopblog + sbp->sb_agblklog;
2896 24119 : igeo->inobt_mxr[0] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 1);
2897 24119 : igeo->inobt_mxr[1] = xfs_inobt_maxrecs(mp, sbp->sb_blocksize, 0);
2898 24119 : igeo->inobt_mnr[0] = igeo->inobt_mxr[0] / 2;
2899 24119 : igeo->inobt_mnr[1] = igeo->inobt_mxr[1] / 2;
2900 :
2901 24119 : igeo->ialloc_inos = max_t(uint16_t, XFS_INODES_PER_CHUNK,
2902 : sbp->sb_inopblock);
2903 24119 : igeo->ialloc_blks = igeo->ialloc_inos >> sbp->sb_inopblog;
2904 :
2905 24119 : if (sbp->sb_spino_align)
2906 24035 : igeo->ialloc_min_blks = sbp->sb_spino_align;
2907 : else
2908 84 : igeo->ialloc_min_blks = igeo->ialloc_blks;
2909 :
2910 : /* Compute and fill in value of m_ino_geo.inobt_maxlevels. */
2911 24119 : inodes = (1LL << XFS_INO_AGINO_BITS(mp)) >> XFS_INODES_PER_CHUNK_LOG;
2912 24119 : igeo->inobt_maxlevels = xfs_btree_compute_maxlevels(igeo->inobt_mnr,
2913 : inodes);
2914 24119 : ASSERT(igeo->inobt_maxlevels <= xfs_iallocbt_maxlevels_ondisk());
2915 :
2916 : /*
2917 : * Set the maximum inode count for this filesystem, being careful not
2918 : * to use obviously garbage sb_inopblog/sb_inopblock values. Regular
2919 : * users should never get here due to failing sb verification, but
2920 : * certain users (xfs_db) need to be usable even with corrupt metadata.
2921 : */
2922 24119 : if (sbp->sb_imax_pct && igeo->ialloc_blks) {
2923 : /*
2924 : * Make sure the maximum inode count is a multiple
2925 : * of the units we allocate inodes in.
2926 : */
2927 24119 : icount = sbp->sb_dblocks * sbp->sb_imax_pct;
2928 24119 : do_div(icount, 100);
2929 24119 : do_div(icount, igeo->ialloc_blks);
2930 24119 : igeo->maxicount = XFS_FSB_TO_INO(mp,
2931 : icount * igeo->ialloc_blks);
2932 : } else {
2933 0 : igeo->maxicount = 0;
2934 : }
2935 :
2936 : /*
2937 : * Compute the desired size of an inode cluster buffer size, which
2938 : * starts at 8K and (on v5 filesystems) scales up with larger inode
2939 : * sizes.
2940 : *
2941 : * Preserve the desired inode cluster size because the sparse inodes
2942 : * feature uses that desired size (not the actual size) to compute the
2943 : * sparse inode alignment. The mount code validates this value, so we
2944 : * cannot change the behavior.
2945 : */
2946 24119 : igeo->inode_cluster_size_raw = XFS_INODE_BIG_CLUSTER_SIZE;
2947 24119 : if (xfs_has_v3inodes(mp)) {
2948 24075 : int new_size = igeo->inode_cluster_size_raw;
2949 :
2950 24075 : new_size *= mp->m_sb.sb_inodesize / XFS_DINODE_MIN_SIZE;
2951 24075 : if (mp->m_sb.sb_inoalignmt >= XFS_B_TO_FSBT(mp, new_size))
2952 24075 : igeo->inode_cluster_size_raw = new_size;
2953 : }
2954 :
2955 : /* Calculate inode cluster ratios. */
2956 24119 : if (igeo->inode_cluster_size_raw > mp->m_sb.sb_blocksize)
2957 23908 : igeo->blocks_per_cluster = XFS_B_TO_FSBT(mp,
2958 : igeo->inode_cluster_size_raw);
2959 : else
2960 211 : igeo->blocks_per_cluster = 1;
2961 24119 : igeo->inode_cluster_size = XFS_FSB_TO_B(mp, igeo->blocks_per_cluster);
2962 24119 : igeo->inodes_per_cluster = XFS_FSB_TO_INO(mp, igeo->blocks_per_cluster);
2963 :
2964 : /* Calculate inode cluster alignment. */
2965 24119 : if (xfs_has_align(mp) &&
2966 24119 : mp->m_sb.sb_inoalignmt >= igeo->blocks_per_cluster)
2967 24099 : igeo->cluster_align = mp->m_sb.sb_inoalignmt;
2968 : else
2969 20 : igeo->cluster_align = 1;
2970 24119 : igeo->inoalign_mask = igeo->cluster_align - 1;
2971 24119 : igeo->cluster_align_inodes = XFS_FSB_TO_INO(mp, igeo->cluster_align);
2972 :
2973 : /*
2974 : * If we are using stripe alignment, check whether
2975 : * the stripe unit is a multiple of the inode alignment
2976 : */
2977 24119 : if (mp->m_dalign && igeo->inoalign_mask &&
2978 16 : !(mp->m_dalign & igeo->inoalign_mask))
2979 10 : igeo->ialloc_align = mp->m_dalign;
2980 : else
2981 24109 : igeo->ialloc_align = 0;
2982 24119 : }
2983 :
2984 : /* Compute the location of the root directory inode that is laid out by mkfs. */
2985 : xfs_ino_t
2986 14 : xfs_ialloc_calc_rootino(
2987 : struct xfs_mount *mp,
2988 : int sunit)
2989 : {
2990 14 : struct xfs_ino_geometry *igeo = M_IGEO(mp);
2991 14 : xfs_agblock_t first_bno;
2992 :
2993 : /*
2994 : * Pre-calculate the geometry of AG 0. We know what it looks like
2995 : * because libxfs knows how to create allocation groups now.
2996 : *
2997 : * first_bno is the first block in which mkfs could possibly have
2998 : * allocated the root directory inode, once we factor in the metadata
2999 : * that mkfs formats before it. Namely, the four AG headers...
3000 : */
3001 14 : first_bno = howmany(4 * mp->m_sb.sb_sectsize, mp->m_sb.sb_blocksize);
3002 :
3003 : /* ...the two free space btree roots... */
3004 14 : first_bno += 2;
3005 :
3006 : /* ...the inode btree root... */
3007 14 : first_bno += 1;
3008 :
3009 : /* ...the initial AGFL... */
3010 14 : first_bno += xfs_alloc_min_freelist(mp, NULL);
3011 :
3012 : /* ...the free inode btree root... */
3013 14 : if (xfs_has_finobt(mp))
3014 14 : first_bno++;
3015 :
3016 : /* ...the reverse mapping btree root... */
3017 14 : if (xfs_has_rmapbt(mp))
3018 14 : first_bno++;
3019 :
3020 : /* ...the reference count btree... */
3021 14 : if (xfs_has_reflink(mp))
3022 14 : first_bno++;
3023 :
3024 : /*
3025 : * ...and the log, if it is allocated in the first allocation group.
3026 : *
3027 : * This can happen with filesystems that only have a single
3028 : * allocation group, or very odd geometries created by old mkfs
3029 : * versions on very small filesystems.
3030 : */
3031 28 : if (xfs_ag_contains_log(mp, 0))
3032 0 : first_bno += mp->m_sb.sb_logblocks;
3033 :
3034 : /*
3035 : * Now round first_bno up to whatever allocation alignment is given
3036 : * by the filesystem or was passed in.
3037 : */
3038 14 : if (xfs_has_dalign(mp) && igeo->ialloc_align > 0)
3039 8 : first_bno = roundup(first_bno, sunit);
3040 6 : else if (xfs_has_align(mp) &&
3041 6 : mp->m_sb.sb_inoalignmt > 1)
3042 6 : first_bno = roundup(first_bno, mp->m_sb.sb_inoalignmt);
3043 :
3044 14 : return XFS_AGINO_TO_INO(mp, 0, XFS_AGB_TO_AGINO(mp, first_bno));
3045 : }
3046 :
3047 : /*
3048 : * Ensure there are not sparse inode clusters that cross the new EOAG.
3049 : *
3050 : * This is a no-op for non-spinode filesystems since clusters are always fully
3051 : * allocated and checking the bnobt suffices. However, a spinode filesystem
3052 : * could have a record where the upper inodes are free blocks. If those blocks
3053 : * were removed from the filesystem, the inode record would extend beyond EOAG,
3054 : * which will be flagged as corruption.
3055 : */
3056 : int
3057 211 : xfs_ialloc_check_shrink(
3058 : struct xfs_perag *pag,
3059 : struct xfs_trans *tp,
3060 : struct xfs_buf *agibp,
3061 : xfs_agblock_t new_length)
3062 : {
3063 211 : struct xfs_inobt_rec_incore rec;
3064 211 : struct xfs_btree_cur *cur;
3065 211 : xfs_agino_t agino;
3066 211 : int has;
3067 211 : int error;
3068 :
3069 211 : if (!xfs_has_sparseinodes(pag->pag_mount))
3070 : return 0;
3071 :
3072 211 : cur = xfs_inobt_init_cursor(pag, tp, agibp, XFS_BTNUM_INO);
3073 :
3074 : /* Look up the inobt record that would correspond to the new EOFS. */
3075 211 : agino = XFS_AGB_TO_AGINO(pag->pag_mount, new_length);
3076 211 : error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has);
3077 211 : if (error || !has)
3078 10 : goto out;
3079 :
3080 201 : error = xfs_inobt_get_rec(cur, &rec, &has);
3081 201 : if (error)
3082 0 : goto out;
3083 :
3084 201 : if (!has) {
3085 0 : xfs_ag_mark_sick(pag, XFS_SICK_AG_INOBT);
3086 0 : error = -EFSCORRUPTED;
3087 0 : goto out;
3088 : }
3089 :
3090 : /* If the record covers inodes that would be beyond EOFS, bail out. */
3091 201 : if (rec.ir_startino + XFS_INODES_PER_CHUNK > agino) {
3092 126 : error = -ENOSPC;
3093 126 : goto out;
3094 : }
3095 75 : out:
3096 211 : xfs_btree_del_cursor(cur, error);
3097 211 : return error;
3098 : }
|