Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0
2 : #include <linux/fs.h>
3 : #include <linux/random.h>
4 : #include <linux/buffer_head.h>
5 : #include <linux/utsname.h>
6 : #include <linux/kthread.h>
7 :
8 : #include "ext4.h"
9 :
10 : /* Checksumming functions */
11 0 : static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)
12 : {
13 0 : struct ext4_sb_info *sbi = EXT4_SB(sb);
14 0 : int offset = offsetof(struct mmp_struct, mmp_checksum);
15 0 : __u32 csum;
16 :
17 0 : csum = ext4_chksum(sbi, sbi->s_csum_seed, (char *)mmp, offset);
18 :
19 0 : return cpu_to_le32(csum);
20 : }
21 :
22 0 : static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
23 : {
24 0 : if (!ext4_has_metadata_csum(sb))
25 : return 1;
26 :
27 0 : return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp);
28 : }
29 :
30 0 : static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
31 : {
32 0 : if (!ext4_has_metadata_csum(sb))
33 : return;
34 :
35 0 : mmp->mmp_checksum = ext4_mmp_csum(sb, mmp);
36 : }
37 :
38 : /*
39 : * Write the MMP block using REQ_SYNC to try to get the block on-disk
40 : * faster.
41 : */
42 0 : static int write_mmp_block_thawed(struct super_block *sb,
43 : struct buffer_head *bh)
44 : {
45 0 : struct mmp_struct *mmp = (struct mmp_struct *)(bh->b_data);
46 :
47 0 : ext4_mmp_csum_set(sb, mmp);
48 0 : lock_buffer(bh);
49 0 : bh->b_end_io = end_buffer_write_sync;
50 0 : get_bh(bh);
51 0 : submit_bh(REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PRIO, bh);
52 0 : wait_on_buffer(bh);
53 0 : if (unlikely(!buffer_uptodate(bh)))
54 0 : return -EIO;
55 : return 0;
56 : }
57 :
58 0 : static int write_mmp_block(struct super_block *sb, struct buffer_head *bh)
59 : {
60 0 : int err;
61 :
62 : /*
63 : * We protect against freezing so that we don't create dirty buffers
64 : * on frozen filesystem.
65 : */
66 0 : sb_start_write(sb);
67 0 : err = write_mmp_block_thawed(sb, bh);
68 0 : sb_end_write(sb);
69 0 : return err;
70 : }
71 :
72 : /*
73 : * Read the MMP block. It _must_ be read from disk and hence we clear the
74 : * uptodate flag on the buffer.
75 : */
76 0 : static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
77 : ext4_fsblk_t mmp_block)
78 : {
79 0 : struct mmp_struct *mmp;
80 0 : int ret;
81 :
82 0 : if (*bh)
83 0 : clear_buffer_uptodate(*bh);
84 :
85 : /* This would be sb_bread(sb, mmp_block), except we need to be sure
86 : * that the MD RAID device cache has been bypassed, and that the read
87 : * is not blocked in the elevator. */
88 0 : if (!*bh) {
89 0 : *bh = sb_getblk(sb, mmp_block);
90 0 : if (!*bh) {
91 0 : ret = -ENOMEM;
92 0 : goto warn_exit;
93 : }
94 : }
95 :
96 0 : lock_buffer(*bh);
97 0 : ret = ext4_read_bh(*bh, REQ_META | REQ_PRIO, NULL);
98 0 : if (ret)
99 0 : goto warn_exit;
100 :
101 0 : mmp = (struct mmp_struct *)((*bh)->b_data);
102 0 : if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC) {
103 0 : ret = -EFSCORRUPTED;
104 0 : goto warn_exit;
105 : }
106 0 : if (!ext4_mmp_csum_verify(sb, mmp)) {
107 0 : ret = -EFSBADCRC;
108 0 : goto warn_exit;
109 : }
110 : return 0;
111 0 : warn_exit:
112 0 : brelse(*bh);
113 0 : *bh = NULL;
114 0 : ext4_warning(sb, "Error %d while reading MMP block %llu",
115 : ret, mmp_block);
116 0 : return ret;
117 : }
118 :
119 : /*
120 : * Dump as much information as possible to help the admin.
121 : */
122 0 : void __dump_mmp_msg(struct super_block *sb, struct mmp_struct *mmp,
123 : const char *function, unsigned int line, const char *msg)
124 : {
125 0 : __ext4_warning(sb, function, line, "%s", msg);
126 0 : __ext4_warning(sb, function, line,
127 : "MMP failure info: last update time: %llu, last update node: %.*s, last update device: %.*s",
128 0 : (unsigned long long)le64_to_cpu(mmp->mmp_time),
129 0 : (int)sizeof(mmp->mmp_nodename), mmp->mmp_nodename,
130 0 : (int)sizeof(mmp->mmp_bdevname), mmp->mmp_bdevname);
131 0 : }
132 :
133 : /*
134 : * kmmpd will update the MMP sequence every s_mmp_update_interval seconds
135 : */
136 0 : static int kmmpd(void *data)
137 : {
138 0 : struct super_block *sb = data;
139 0 : struct ext4_super_block *es = EXT4_SB(sb)->s_es;
140 0 : struct buffer_head *bh = EXT4_SB(sb)->s_mmp_bh;
141 0 : struct mmp_struct *mmp;
142 0 : ext4_fsblk_t mmp_block;
143 0 : u32 seq = 0;
144 0 : unsigned long failed_writes = 0;
145 0 : int mmp_update_interval = le16_to_cpu(es->s_mmp_update_interval);
146 0 : unsigned mmp_check_interval;
147 0 : unsigned long last_update_time;
148 0 : unsigned long diff;
149 0 : int retval = 0;
150 :
151 0 : mmp_block = le64_to_cpu(es->s_mmp_block);
152 0 : mmp = (struct mmp_struct *)(bh->b_data);
153 0 : mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds());
154 : /*
155 : * Start with the higher mmp_check_interval and reduce it if
156 : * the MMP block is being updated on time.
157 : */
158 0 : mmp_check_interval = max(EXT4_MMP_CHECK_MULT * mmp_update_interval,
159 : EXT4_MMP_MIN_CHECK_INTERVAL);
160 0 : mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
161 :
162 0 : memcpy(mmp->mmp_nodename, init_utsname()->nodename,
163 : sizeof(mmp->mmp_nodename));
164 :
165 0 : while (!kthread_should_stop() && !sb_rdonly(sb)) {
166 0 : if (!ext4_has_feature_mmp(sb)) {
167 0 : ext4_warning(sb, "kmmpd being stopped since MMP feature"
168 : " has been disabled.");
169 0 : goto wait_to_exit;
170 : }
171 0 : if (++seq > EXT4_MMP_SEQ_MAX)
172 0 : seq = 1;
173 :
174 0 : mmp->mmp_seq = cpu_to_le32(seq);
175 0 : mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds());
176 0 : last_update_time = jiffies;
177 :
178 0 : retval = write_mmp_block(sb, bh);
179 : /*
180 : * Don't spew too many error messages. Print one every
181 : * (s_mmp_update_interval * 60) seconds.
182 : */
183 0 : if (retval) {
184 0 : if ((failed_writes % 60) == 0) {
185 0 : ext4_error_err(sb, -retval,
186 : "Error writing to MMP block");
187 : }
188 0 : failed_writes++;
189 : }
190 :
191 0 : diff = jiffies - last_update_time;
192 0 : if (diff < mmp_update_interval * HZ)
193 0 : schedule_timeout_interruptible(mmp_update_interval *
194 0 : HZ - diff);
195 :
196 : /*
197 : * We need to make sure that more than mmp_check_interval
198 : * seconds have not passed since writing. If that has happened
199 : * we need to check if the MMP block is as we left it.
200 : */
201 0 : diff = jiffies - last_update_time;
202 0 : if (diff > mmp_check_interval * HZ) {
203 0 : struct buffer_head *bh_check = NULL;
204 0 : struct mmp_struct *mmp_check;
205 :
206 0 : retval = read_mmp_block(sb, &bh_check, mmp_block);
207 0 : if (retval) {
208 0 : ext4_error_err(sb, -retval,
209 : "error reading MMP data: %d",
210 : retval);
211 0 : goto wait_to_exit;
212 : }
213 :
214 0 : mmp_check = (struct mmp_struct *)(bh_check->b_data);
215 0 : if (mmp->mmp_seq != mmp_check->mmp_seq ||
216 0 : memcmp(mmp->mmp_nodename, mmp_check->mmp_nodename,
217 : sizeof(mmp->mmp_nodename))) {
218 0 : dump_mmp_msg(sb, mmp_check,
219 : "Error while updating MMP info. "
220 : "The filesystem seems to have been"
221 : " multiply mounted.");
222 0 : ext4_error_err(sb, EBUSY, "abort");
223 0 : put_bh(bh_check);
224 0 : retval = -EBUSY;
225 0 : goto wait_to_exit;
226 : }
227 0 : put_bh(bh_check);
228 : }
229 :
230 : /*
231 : * Adjust the mmp_check_interval depending on how much time
232 : * it took for the MMP block to be written.
233 : */
234 0 : mmp_check_interval = max(min(EXT4_MMP_CHECK_MULT * diff / HZ,
235 : EXT4_MMP_MAX_CHECK_INTERVAL),
236 : EXT4_MMP_MIN_CHECK_INTERVAL);
237 0 : mmp->mmp_check_interval = cpu_to_le16(mmp_check_interval);
238 : }
239 :
240 : /*
241 : * Unmount seems to be clean.
242 : */
243 0 : mmp->mmp_seq = cpu_to_le32(EXT4_MMP_SEQ_CLEAN);
244 0 : mmp->mmp_time = cpu_to_le64(ktime_get_real_seconds());
245 :
246 0 : retval = write_mmp_block(sb, bh);
247 :
248 0 : wait_to_exit:
249 0 : while (!kthread_should_stop()) {
250 0 : set_current_state(TASK_INTERRUPTIBLE);
251 0 : if (!kthread_should_stop())
252 0 : schedule();
253 : }
254 0 : set_current_state(TASK_RUNNING);
255 0 : return retval;
256 : }
257 :
258 0 : void ext4_stop_mmpd(struct ext4_sb_info *sbi)
259 : {
260 0 : if (sbi->s_mmp_tsk) {
261 0 : kthread_stop(sbi->s_mmp_tsk);
262 0 : brelse(sbi->s_mmp_bh);
263 0 : sbi->s_mmp_tsk = NULL;
264 : }
265 0 : }
266 :
267 : /*
268 : * Get a random new sequence number but make sure it is not greater than
269 : * EXT4_MMP_SEQ_MAX.
270 : */
271 : static unsigned int mmp_new_seq(void)
272 : {
273 0 : return get_random_u32_below(EXT4_MMP_SEQ_MAX + 1);
274 : }
275 :
276 : /*
277 : * Protect the filesystem from being mounted more than once.
278 : */
279 0 : int ext4_multi_mount_protect(struct super_block *sb,
280 : ext4_fsblk_t mmp_block)
281 : {
282 0 : struct ext4_super_block *es = EXT4_SB(sb)->s_es;
283 0 : struct buffer_head *bh = NULL;
284 0 : struct mmp_struct *mmp = NULL;
285 0 : u32 seq;
286 0 : unsigned int mmp_check_interval = le16_to_cpu(es->s_mmp_update_interval);
287 0 : unsigned int wait_time = 0;
288 0 : int retval;
289 :
290 0 : if (mmp_block < le32_to_cpu(es->s_first_data_block) ||
291 : mmp_block >= ext4_blocks_count(es)) {
292 0 : ext4_warning(sb, "Invalid MMP block in superblock");
293 0 : retval = -EINVAL;
294 0 : goto failed;
295 : }
296 :
297 0 : retval = read_mmp_block(sb, &bh, mmp_block);
298 0 : if (retval)
299 0 : goto failed;
300 :
301 0 : mmp = (struct mmp_struct *)(bh->b_data);
302 :
303 0 : if (mmp_check_interval < EXT4_MMP_MIN_CHECK_INTERVAL)
304 : mmp_check_interval = EXT4_MMP_MIN_CHECK_INTERVAL;
305 :
306 : /*
307 : * If check_interval in MMP block is larger, use that instead of
308 : * update_interval from the superblock.
309 : */
310 0 : if (le16_to_cpu(mmp->mmp_check_interval) > mmp_check_interval)
311 : mmp_check_interval = le16_to_cpu(mmp->mmp_check_interval);
312 :
313 0 : seq = le32_to_cpu(mmp->mmp_seq);
314 0 : if (seq == EXT4_MMP_SEQ_CLEAN)
315 0 : goto skip;
316 :
317 0 : if (seq == EXT4_MMP_SEQ_FSCK) {
318 0 : dump_mmp_msg(sb, mmp, "fsck is running on the filesystem");
319 0 : retval = -EBUSY;
320 0 : goto failed;
321 : }
322 :
323 0 : wait_time = min(mmp_check_interval * 2 + 1,
324 : mmp_check_interval + 60);
325 :
326 : /* Print MMP interval if more than 20 secs. */
327 0 : if (wait_time > EXT4_MMP_MIN_CHECK_INTERVAL * 4)
328 0 : ext4_warning(sb, "MMP interval %u higher than expected, please"
329 : " wait.\n", wait_time * 2);
330 :
331 0 : if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
332 0 : ext4_warning(sb, "MMP startup interrupted, failing mount\n");
333 0 : retval = -ETIMEDOUT;
334 0 : goto failed;
335 : }
336 :
337 0 : retval = read_mmp_block(sb, &bh, mmp_block);
338 0 : if (retval)
339 0 : goto failed;
340 0 : mmp = (struct mmp_struct *)(bh->b_data);
341 0 : if (seq != le32_to_cpu(mmp->mmp_seq)) {
342 0 : dump_mmp_msg(sb, mmp,
343 : "Device is already active on another node.");
344 0 : retval = -EBUSY;
345 0 : goto failed;
346 : }
347 :
348 0 : skip:
349 : /*
350 : * write a new random sequence number.
351 : */
352 0 : seq = mmp_new_seq();
353 0 : mmp->mmp_seq = cpu_to_le32(seq);
354 :
355 : /*
356 : * On mount / remount we are protected against fs freezing (by s_umount
357 : * semaphore) and grabbing freeze protection upsets lockdep
358 : */
359 0 : retval = write_mmp_block_thawed(sb, bh);
360 0 : if (retval)
361 0 : goto failed;
362 :
363 : /*
364 : * wait for MMP interval and check mmp_seq.
365 : */
366 0 : if (schedule_timeout_interruptible(HZ * wait_time) != 0) {
367 0 : ext4_warning(sb, "MMP startup interrupted, failing mount");
368 0 : retval = -ETIMEDOUT;
369 0 : goto failed;
370 : }
371 :
372 0 : retval = read_mmp_block(sb, &bh, mmp_block);
373 0 : if (retval)
374 0 : goto failed;
375 0 : mmp = (struct mmp_struct *)(bh->b_data);
376 0 : if (seq != le32_to_cpu(mmp->mmp_seq)) {
377 0 : dump_mmp_msg(sb, mmp,
378 : "Device is already active on another node.");
379 0 : retval = -EBUSY;
380 0 : goto failed;
381 : }
382 :
383 0 : EXT4_SB(sb)->s_mmp_bh = bh;
384 :
385 0 : BUILD_BUG_ON(sizeof(mmp->mmp_bdevname) < BDEVNAME_SIZE);
386 0 : snprintf(mmp->mmp_bdevname, sizeof(mmp->mmp_bdevname),
387 : "%pg", bh->b_bdev);
388 :
389 : /*
390 : * Start a kernel thread to update the MMP block periodically.
391 : */
392 0 : EXT4_SB(sb)->s_mmp_tsk = kthread_run(kmmpd, sb, "kmmpd-%.*s",
393 : (int)sizeof(mmp->mmp_bdevname),
394 : mmp->mmp_bdevname);
395 0 : if (IS_ERR(EXT4_SB(sb)->s_mmp_tsk)) {
396 0 : EXT4_SB(sb)->s_mmp_tsk = NULL;
397 0 : ext4_warning(sb, "Unable to create kmmpd thread for %s.",
398 : sb->s_id);
399 0 : retval = -ENOMEM;
400 0 : goto failed;
401 : }
402 :
403 : return 0;
404 :
405 0 : failed:
406 0 : brelse(bh);
407 : return retval;
408 : }
|