Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : /*
3 : * Copyright (c) 2008-2010, 2013 Dave Chinner
4 : * All Rights Reserved.
5 : */
6 : #include "xfs.h"
7 : #include "xfs_fs.h"
8 : #include "xfs_shared.h"
9 : #include "xfs_format.h"
10 : #include "xfs_log_format.h"
11 : #include "xfs_trans_resv.h"
12 : #include "xfs_mount.h"
13 : #include "xfs_inode.h"
14 : #include "xfs_trans.h"
15 : #include "xfs_trans_priv.h"
16 : #include "xfs_icreate_item.h"
17 : #include "xfs_log.h"
18 : #include "xfs_log_priv.h"
19 : #include "xfs_log_recover.h"
20 : #include "xfs_ialloc.h"
21 : #include "xfs_trace.h"
22 :
23 : struct kmem_cache *xfs_icreate_cache; /* inode create item */
24 :
25 : static inline struct xfs_icreate_item *ICR_ITEM(struct xfs_log_item *lip)
26 : {
27 : return container_of(lip, struct xfs_icreate_item, ic_item);
28 : }
29 :
30 : /*
31 : * This returns the number of iovecs needed to log the given inode item.
32 : *
33 : * We only need one iovec for the icreate log structure.
34 : */
35 : STATIC void
36 1017083 : xfs_icreate_item_size(
37 : struct xfs_log_item *lip,
38 : int *nvecs,
39 : int *nbytes)
40 : {
41 1017083 : *nvecs += 1;
42 1017083 : *nbytes += sizeof(struct xfs_icreate_log);
43 1017083 : }
44 :
45 : /*
46 : * This is called to fill in the vector of log iovecs for the
47 : * given inode create log item.
48 : */
49 : STATIC void
50 1017157 : xfs_icreate_item_format(
51 : struct xfs_log_item *lip,
52 : struct xfs_log_vec *lv)
53 : {
54 1017157 : struct xfs_icreate_item *icp = ICR_ITEM(lip);
55 1017157 : struct xfs_log_iovec *vecp = NULL;
56 :
57 1017157 : xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ICREATE,
58 1017157 : &icp->ic_format,
59 : sizeof(struct xfs_icreate_log));
60 1017061 : }
61 :
62 : STATIC void
63 1017237 : xfs_icreate_item_release(
64 : struct xfs_log_item *lip)
65 : {
66 1017237 : kmem_free(ICR_ITEM(lip)->ic_item.li_lv_shadow);
67 1017237 : kmem_cache_free(xfs_icreate_cache, ICR_ITEM(lip));
68 1017237 : }
69 :
70 : static const struct xfs_item_ops xfs_icreate_item_ops = {
71 : .flags = XFS_ITEM_RELEASE_WHEN_COMMITTED,
72 : .iop_size = xfs_icreate_item_size,
73 : .iop_format = xfs_icreate_item_format,
74 : .iop_release = xfs_icreate_item_release,
75 : };
76 :
77 :
78 : /*
79 : * Initialize the inode log item for a newly allocated (in-core) inode.
80 : *
81 : * Inode extents can only reside within an AG. Hence specify the starting
82 : * block for the inode chunk by offset within an AG as well as the
83 : * length of the allocated extent.
84 : *
85 : * This joins the item to the transaction and marks it dirty so
86 : * that we don't need a separate call to do this, nor does the
87 : * caller need to know anything about the icreate item.
88 : */
89 : void
90 1016511 : xfs_icreate_log(
91 : struct xfs_trans *tp,
92 : xfs_agnumber_t agno,
93 : xfs_agblock_t agbno,
94 : unsigned int count,
95 : unsigned int inode_size,
96 : xfs_agblock_t length,
97 : unsigned int generation)
98 : {
99 1016511 : struct xfs_icreate_item *icp;
100 :
101 1016511 : icp = kmem_cache_zalloc(xfs_icreate_cache, GFP_KERNEL | __GFP_NOFAIL);
102 :
103 1017057 : xfs_log_item_init(tp->t_mountp, &icp->ic_item, XFS_LI_ICREATE,
104 : &xfs_icreate_item_ops);
105 :
106 1016904 : icp->ic_format.icl_type = XFS_LI_ICREATE;
107 1016904 : icp->ic_format.icl_size = 1; /* single vector */
108 1016904 : icp->ic_format.icl_ag = cpu_to_be32(agno);
109 1016904 : icp->ic_format.icl_agbno = cpu_to_be32(agbno);
110 1016904 : icp->ic_format.icl_count = cpu_to_be32(count);
111 1016904 : icp->ic_format.icl_isize = cpu_to_be32(inode_size);
112 1016904 : icp->ic_format.icl_length = cpu_to_be32(length);
113 1016904 : icp->ic_format.icl_gen = cpu_to_be32(generation);
114 :
115 1016904 : xfs_trans_add_item(tp, &icp->ic_item);
116 1016796 : tp->t_flags |= XFS_TRANS_DIRTY;
117 1016796 : set_bit(XFS_LI_DIRTY, &icp->ic_item.li_flags);
118 1017105 : }
119 :
120 : static enum xlog_recover_reorder
121 57398 : xlog_recover_icreate_reorder(
122 : struct xlog_recover_item *item)
123 : {
124 : /*
125 : * Inode allocation buffers must be replayed before subsequent inode
126 : * items try to modify those buffers. ICREATE items are the logical
127 : * equivalent of logging a newly initialized inode buffer, so recover
128 : * these at the same time that we recover logged buffers.
129 : */
130 57398 : return XLOG_REORDER_BUFFER_LIST;
131 : }
132 :
133 : /*
134 : * This routine is called when an inode create format structure is found in a
135 : * committed transaction in the log. It's purpose is to initialise the inodes
136 : * being allocated on disk. This requires us to get inode cluster buffers that
137 : * match the range to be initialised, stamped with inode templates and written
138 : * by delayed write so that subsequent modifications will hit the cached buffer
139 : * and only need writing out at the end of recovery.
140 : */
141 : STATIC int
142 28699 : xlog_recover_icreate_commit_pass2(
143 : struct xlog *log,
144 : struct list_head *buffer_list,
145 : struct xlog_recover_item *item,
146 : xfs_lsn_t lsn)
147 : {
148 28699 : struct xfs_mount *mp = log->l_mp;
149 28699 : struct xfs_icreate_log *icl;
150 28699 : struct xfs_ino_geometry *igeo = M_IGEO(mp);
151 28699 : xfs_agnumber_t agno;
152 28699 : xfs_agblock_t agbno;
153 28699 : unsigned int count;
154 28699 : unsigned int isize;
155 28699 : xfs_agblock_t length;
156 28699 : int bb_per_cluster;
157 28699 : int cancel_count;
158 28699 : int nbufs;
159 28699 : int i;
160 :
161 28699 : icl = (struct xfs_icreate_log *)item->ri_buf[0].i_addr;
162 28699 : if (icl->icl_type != XFS_LI_ICREATE) {
163 0 : xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad type");
164 0 : return -EINVAL;
165 : }
166 :
167 28699 : if (icl->icl_size != 1) {
168 0 : xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad icl size");
169 0 : return -EINVAL;
170 : }
171 :
172 28699 : agno = be32_to_cpu(icl->icl_ag);
173 28699 : if (agno >= mp->m_sb.sb_agcount) {
174 0 : xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agno");
175 0 : return -EINVAL;
176 : }
177 28699 : agbno = be32_to_cpu(icl->icl_agbno);
178 28699 : if (!agbno || agbno == NULLAGBLOCK || agbno >= mp->m_sb.sb_agblocks) {
179 0 : xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad agbno");
180 0 : return -EINVAL;
181 : }
182 28699 : isize = be32_to_cpu(icl->icl_isize);
183 28699 : if (isize != mp->m_sb.sb_inodesize) {
184 0 : xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad isize");
185 0 : return -EINVAL;
186 : }
187 28699 : count = be32_to_cpu(icl->icl_count);
188 28699 : if (!count) {
189 0 : xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad count");
190 0 : return -EINVAL;
191 : }
192 28699 : length = be32_to_cpu(icl->icl_length);
193 28699 : if (!length || length >= mp->m_sb.sb_agblocks) {
194 0 : xfs_warn(log->l_mp, "xlog_recover_do_icreate_trans: bad length");
195 0 : return -EINVAL;
196 : }
197 :
198 : /*
199 : * The inode chunk is either full or sparse and we only support
200 : * m_ino_geo.ialloc_min_blks sized sparse allocations at this time.
201 : */
202 28699 : if (length != igeo->ialloc_blks &&
203 14404 : length != igeo->ialloc_min_blks) {
204 0 : xfs_warn(log->l_mp,
205 : "%s: unsupported chunk length", __func__);
206 0 : return -EINVAL;
207 : }
208 :
209 : /* verify inode count is consistent with extent length */
210 28699 : if ((count >> mp->m_sb.sb_inopblog) != length) {
211 0 : xfs_warn(log->l_mp,
212 : "%s: inconsistent inode count and chunk length",
213 : __func__);
214 0 : return -EINVAL;
215 : }
216 :
217 : /*
218 : * The icreate transaction can cover multiple cluster buffers and these
219 : * buffers could have been freed and reused. Check the individual
220 : * buffers for cancellation so we don't overwrite anything written after
221 : * a cancellation.
222 : */
223 28699 : bb_per_cluster = XFS_FSB_TO_BB(mp, igeo->blocks_per_cluster);
224 28699 : nbufs = length / igeo->blocks_per_cluster;
225 71693 : for (i = 0, cancel_count = 0; i < nbufs; i++) {
226 42994 : xfs_daddr_t daddr;
227 :
228 42994 : daddr = XFS_AGB_TO_DADDR(mp, agno,
229 : agbno + i * igeo->blocks_per_cluster);
230 42994 : if (xlog_is_buffer_cancelled(log, daddr, bb_per_cluster))
231 85 : cancel_count++;
232 : }
233 :
234 : /*
235 : * We currently only use icreate for a single allocation at a time. This
236 : * means we should expect either all or none of the buffers to be
237 : * cancelled. Be conservative and skip replay if at least one buffer is
238 : * cancelled, but warn the user that something is awry if the buffers
239 : * are not consistent.
240 : *
241 : * XXX: This must be refined to only skip cancelled clusters once we use
242 : * icreate for multiple chunk allocations.
243 : */
244 28699 : ASSERT(!cancel_count || cancel_count == nbufs);
245 28699 : if (cancel_count) {
246 58 : if (cancel_count != nbufs)
247 0 : xfs_warn(mp,
248 : "WARNING: partial inode chunk cancellation, skipped icreate.");
249 58 : trace_xfs_log_recover_icreate_cancel(log, icl);
250 58 : return 0;
251 : }
252 :
253 28641 : trace_xfs_log_recover_icreate_recover(log, icl);
254 28641 : return xfs_ialloc_inode_init(mp, NULL, buffer_list, count, agno, agbno,
255 28641 : length, be32_to_cpu(icl->icl_gen));
256 : }
257 :
258 : const struct xlog_recover_item_ops xlog_icreate_item_ops = {
259 : .item_type = XFS_LI_ICREATE,
260 : .reorder = xlog_recover_icreate_reorder,
261 : .commit_pass2 = xlog_recover_icreate_commit_pass2,
262 : };
|