Line data Source code
1 : // SPDX-License-Identifier: GPL-2.0+
2 : /*
3 : * linux/fs/jbd2/recovery.c
4 : *
5 : * Written by Stephen C. Tweedie <sct@redhat.com>, 1999
6 : *
7 : * Copyright 1999-2000 Red Hat Software --- All Rights Reserved
8 : *
9 : * Journal recovery routines for the generic filesystem journaling code;
10 : * part of the ext2fs journaling system.
11 : */
12 :
13 : #ifndef __KERNEL__
14 : #include "jfs_user.h"
15 : #else
16 : #include <linux/time.h>
17 : #include <linux/fs.h>
18 : #include <linux/jbd2.h>
19 : #include <linux/errno.h>
20 : #include <linux/crc32.h>
21 : #include <linux/blkdev.h>
22 : #endif
23 :
24 : /*
25 : * Maintain information about the progress of the recovery job, so that
26 : * the different passes can carry information between them.
27 : */
28 : struct recovery_info
29 : {
30 : tid_t start_transaction;
31 : tid_t end_transaction;
32 : unsigned long head_block;
33 :
34 : int nr_replays;
35 : int nr_revokes;
36 : int nr_revoke_hits;
37 : };
38 :
39 : static int do_one_pass(journal_t *journal,
40 : struct recovery_info *info, enum passtype pass);
41 : static int scan_revoke_records(journal_t *, struct buffer_head *,
42 : tid_t, struct recovery_info *);
43 :
44 : #ifdef __KERNEL__
45 :
46 : /* Release readahead buffers after use */
47 7390 : static void journal_brelse_array(struct buffer_head *b[], int n)
48 : {
49 62759 : while (--n >= 0)
50 55369 : brelse (b[n]);
51 7390 : }
52 :
53 :
54 : /*
55 : * When reading from the journal, we are going through the block device
56 : * layer directly and so there is no readahead being done for us. We
57 : * need to implement any readahead ourselves if we want it to happen at
58 : * all. Recovery is basically one long sequential read, so make sure we
59 : * do the IO in reasonably large chunks.
60 : *
61 : * This is not so critical that we need to be enormously clever about
62 : * the readahead size, though. 128K is a purely arbitrary, good-enough
63 : * fixed value.
64 : */
65 :
66 : #define MAXBUF 8
67 2089 : static int do_readahead(journal_t *journal, unsigned int start)
68 : {
69 2089 : int err;
70 2089 : unsigned int max, nbufs, next;
71 2089 : unsigned long long blocknr;
72 2089 : struct buffer_head *bh;
73 :
74 2089 : struct buffer_head * bufs[MAXBUF];
75 :
76 : /* Do up to 128K of readahead */
77 2089 : max = start + (128 * 1024 / journal->j_blocksize);
78 2089 : if (max > journal->j_total_len)
79 : max = journal->j_total_len;
80 :
81 : /* Do the readahead itself. We'll submit MAXBUF buffer_heads at
82 : * a time to the block device IO layer. */
83 :
84 2089 : nbufs = 0;
85 :
86 68923 : for (next = start; next < max; next++) {
87 66834 : err = jbd2_journal_bmap(journal, next, &blocknr);
88 :
89 66834 : if (err) {
90 0 : printk(KERN_ERR "JBD2: bad block at offset %u\n",
91 : next);
92 0 : goto failed;
93 : }
94 :
95 66834 : bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
96 66834 : if (!bh) {
97 0 : err = -ENOMEM;
98 0 : goto failed;
99 : }
100 :
101 191143 : if (!buffer_uptodate(bh) && !buffer_locked(bh)) {
102 55369 : bufs[nbufs++] = bh;
103 55369 : if (nbufs == MAXBUF) {
104 5319 : bh_readahead_batch(nbufs, bufs, 0);
105 5319 : journal_brelse_array(bufs, nbufs);
106 5319 : nbufs = 0;
107 : }
108 : } else
109 11465 : brelse(bh);
110 : }
111 :
112 2089 : if (nbufs)
113 2071 : bh_readahead_batch(nbufs, bufs, 0);
114 : err = 0;
115 :
116 2089 : failed:
117 2089 : if (nbufs)
118 2071 : journal_brelse_array(bufs, nbufs);
119 2089 : return err;
120 : }
121 :
122 : #endif /* __KERNEL__ */
123 :
124 :
125 : /*
126 : * Read a block from the journal
127 : */
128 :
129 66306 : static int jread(struct buffer_head **bhp, journal_t *journal,
130 : unsigned int offset)
131 : {
132 66306 : int err;
133 66306 : unsigned long long blocknr;
134 66306 : struct buffer_head *bh;
135 :
136 66306 : *bhp = NULL;
137 :
138 66306 : if (offset >= journal->j_total_len) {
139 0 : printk(KERN_ERR "JBD2: corrupted journal superblock\n");
140 0 : return -EFSCORRUPTED;
141 : }
142 :
143 66306 : err = jbd2_journal_bmap(journal, offset, &blocknr);
144 :
145 66306 : if (err) {
146 0 : printk(KERN_ERR "JBD2: bad block at offset %u\n",
147 : offset);
148 0 : return err;
149 : }
150 :
151 66306 : bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
152 66306 : if (!bh)
153 : return -ENOMEM;
154 :
155 132612 : if (!buffer_uptodate(bh)) {
156 : /*
157 : * If this is a brand new buffer, start readahead.
158 : * Otherwise, we assume we are already reading it.
159 : */
160 6990 : bool need_readahead = !buffer_req(bh);
161 :
162 6990 : bh_read_nowait(bh, 0);
163 6990 : if (need_readahead)
164 2089 : do_readahead(journal, offset);
165 6990 : wait_on_buffer(bh);
166 : }
167 :
168 132612 : if (!buffer_uptodate(bh)) {
169 0 : printk(KERN_ERR "JBD2: Failed to read block at offset %u\n",
170 : offset);
171 0 : brelse(bh);
172 0 : return -EIO;
173 : }
174 :
175 66306 : *bhp = bh;
176 66306 : return 0;
177 : }
178 :
179 8840 : static int jbd2_descriptor_block_csum_verify(journal_t *j, void *buf)
180 : {
181 8840 : struct jbd2_journal_block_tail *tail;
182 8840 : __be32 provided;
183 8840 : __u32 calculated;
184 :
185 8840 : if (!jbd2_journal_has_csum_v2or3(j))
186 : return 1;
187 :
188 8831 : tail = (struct jbd2_journal_block_tail *)((char *)buf +
189 8831 : j->j_blocksize - sizeof(struct jbd2_journal_block_tail));
190 8831 : provided = tail->t_checksum;
191 8831 : tail->t_checksum = 0;
192 8831 : calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
193 8831 : tail->t_checksum = provided;
194 :
195 8831 : return provided == cpu_to_be32(calculated);
196 : }
197 :
198 : /*
199 : * Count the number of in-use tags in a journal descriptor block.
200 : */
201 :
202 6377 : static int count_tags(journal_t *journal, struct buffer_head *bh)
203 : {
204 6377 : char * tagp;
205 6377 : journal_block_tag_t tag;
206 6377 : int nr = 0, size = journal->j_blocksize;
207 6377 : int tag_bytes = journal_tag_bytes(journal);
208 :
209 6377 : if (jbd2_journal_has_csum_v2or3(journal))
210 5513 : size -= sizeof(struct jbd2_journal_block_tail);
211 :
212 6377 : tagp = &bh->b_data[sizeof(journal_header_t)];
213 :
214 91136 : while ((tagp - bh->b_data + tag_bytes) <= size) {
215 91136 : memcpy(&tag, tagp, sizeof(tag));
216 :
217 91136 : nr++;
218 91136 : tagp += tag_bytes;
219 91136 : if (!(tag.t_flags & cpu_to_be16(JBD2_FLAG_SAME_UUID)))
220 6377 : tagp += 16;
221 :
222 91136 : if (tag.t_flags & cpu_to_be16(JBD2_FLAG_LAST_TAG))
223 : break;
224 : }
225 :
226 6377 : return nr;
227 : }
228 :
229 :
230 : /* Make sure we wrap around the log correctly! */
231 : #define wrap(journal, var) \
232 : do { \
233 : unsigned long _wrap_last = \
234 : jbd2_has_feature_fast_commit(journal) ? \
235 : (journal)->j_fc_last : (journal)->j_last; \
236 : \
237 : if (var >= _wrap_last) \
238 : var -= (_wrap_last - (journal)->j_first); \
239 : } while (0)
240 :
241 0 : static int fc_do_one_pass(journal_t *journal,
242 : struct recovery_info *info, enum passtype pass)
243 : {
244 0 : unsigned int expected_commit_id = info->end_transaction;
245 0 : unsigned long next_fc_block;
246 0 : struct buffer_head *bh;
247 0 : int err = 0;
248 :
249 0 : next_fc_block = journal->j_fc_first;
250 0 : if (!journal->j_fc_replay_callback)
251 : return 0;
252 :
253 0 : while (next_fc_block <= journal->j_fc_last) {
254 0 : jbd2_debug(3, "Fast commit replay: next block %ld\n",
255 : next_fc_block);
256 0 : err = jread(&bh, journal, next_fc_block);
257 0 : if (err) {
258 : jbd2_debug(3, "Fast commit replay: read error\n");
259 : break;
260 : }
261 :
262 0 : err = journal->j_fc_replay_callback(journal, bh, pass,
263 0 : next_fc_block - journal->j_fc_first,
264 : expected_commit_id);
265 0 : brelse(bh);
266 0 : next_fc_block++;
267 0 : if (err < 0 || err == JBD2_FC_REPLAY_STOP)
268 : break;
269 : err = 0;
270 : }
271 :
272 : if (err)
273 : jbd2_debug(3, "Fast commit replay failed, err = %d\n", err);
274 :
275 : return err;
276 : }
277 :
278 : /**
279 : * jbd2_journal_recover - recovers a on-disk journal
280 : * @journal: the journal to recover
281 : *
282 : * The primary function for recovering the log contents when mounting a
283 : * journaled device.
284 : *
285 : * Recovery is done in three passes. In the first pass, we look for the
286 : * end of the log. In the second, we assemble the list of revoke
287 : * blocks. In the third and final pass, we replay any un-revoked blocks
288 : * in the log.
289 : */
290 2505 : int jbd2_journal_recover(journal_t *journal)
291 : {
292 2505 : int err, err2;
293 2505 : journal_superblock_t * sb;
294 :
295 2505 : struct recovery_info info;
296 :
297 2505 : memset(&info, 0, sizeof(info));
298 2505 : sb = journal->j_superblock;
299 :
300 : /*
301 : * The journal superblock's s_start field (the current log head)
302 : * is always zero if, and only if, the journal was cleanly
303 : * unmounted.
304 : */
305 2505 : if (!sb->s_start) {
306 2257 : jbd2_debug(1, "No recovery required, last transaction %d, head block %u\n",
307 : be32_to_cpu(sb->s_sequence), be32_to_cpu(sb->s_head));
308 2257 : journal->j_transaction_sequence = be32_to_cpu(sb->s_sequence) + 1;
309 2257 : journal->j_head = be32_to_cpu(sb->s_head);
310 2257 : return 0;
311 : }
312 :
313 248 : err = do_one_pass(journal, &info, PASS_SCAN);
314 248 : if (!err)
315 248 : err = do_one_pass(journal, &info, PASS_REVOKE);
316 248 : if (!err)
317 248 : err = do_one_pass(journal, &info, PASS_REPLAY);
318 :
319 248 : jbd2_debug(1, "JBD2: recovery, exit status %d, "
320 : "recovered transactions %u to %u\n",
321 : err, info.start_transaction, info.end_transaction);
322 248 : jbd2_debug(1, "JBD2: Replayed %d and revoked %d/%d blocks\n",
323 : info.nr_replays, info.nr_revoke_hits, info.nr_revokes);
324 :
325 : /* Restart the log at the next transaction ID, thus invalidating
326 : * any existing commit records in the log. */
327 248 : journal->j_transaction_sequence = ++info.end_transaction;
328 248 : journal->j_head = info.head_block;
329 248 : jbd2_debug(1, "JBD2: last transaction %d, head block %lu\n",
330 : journal->j_transaction_sequence, journal->j_head);
331 :
332 248 : jbd2_journal_clear_revoke(journal);
333 248 : err2 = sync_blockdev(journal->j_fs_dev);
334 248 : if (!err)
335 248 : err = err2;
336 : /* Make sure all replayed data is on permanent storage */
337 248 : if (journal->j_flags & JBD2_BARRIER) {
338 248 : err2 = blkdev_issue_flush(journal->j_fs_dev);
339 248 : if (!err)
340 248 : err = err2;
341 : }
342 : return err;
343 : }
344 :
345 : /**
346 : * jbd2_journal_skip_recovery - Start journal and wipe exiting records
347 : * @journal: journal to startup
348 : *
349 : * Locate any valid recovery information from the journal and set up the
350 : * journal structures in memory to ignore it (presumably because the
351 : * caller has evidence that it is out of date).
352 : * This function doesn't appear to be exported..
353 : *
354 : * We perform one pass over the journal to allow us to tell the user how
355 : * much recovery information is being erased, and to let us initialise
356 : * the journal transaction sequence numbers to the next unused ID.
357 : */
358 0 : int jbd2_journal_skip_recovery(journal_t *journal)
359 : {
360 0 : int err;
361 :
362 0 : struct recovery_info info;
363 :
364 0 : memset (&info, 0, sizeof(info));
365 :
366 0 : err = do_one_pass(journal, &info, PASS_SCAN);
367 :
368 0 : if (err) {
369 0 : printk(KERN_ERR "JBD2: error %d scanning journal\n", err);
370 0 : ++journal->j_transaction_sequence;
371 0 : journal->j_head = journal->j_first;
372 : } else {
373 : #ifdef CONFIG_JBD2_DEBUG
374 : int dropped = info.end_transaction -
375 : be32_to_cpu(journal->j_superblock->s_sequence);
376 : jbd2_debug(1,
377 : "JBD2: ignoring %d transaction%s from the journal.\n",
378 : dropped, (dropped == 1) ? "" : "s");
379 : #endif
380 0 : journal->j_transaction_sequence = ++info.end_transaction;
381 0 : journal->j_head = info.head_block;
382 : }
383 :
384 0 : journal->j_tail = 0;
385 0 : return err;
386 : }
387 :
388 45449 : static inline unsigned long long read_tag_block(journal_t *journal,
389 : journal_block_tag_t *tag)
390 : {
391 45449 : unsigned long long block = be32_to_cpu(tag->t_blocknr);
392 90898 : if (jbd2_has_feature_64bit(journal))
393 45449 : block |= (u64)be32_to_cpu(tag->t_blocknr_high) << 32;
394 45449 : return block;
395 : }
396 :
397 : /*
398 : * calc_chksums calculates the checksums for the blocks described in the
399 : * descriptor block.
400 : */
401 0 : static int calc_chksums(journal_t *journal, struct buffer_head *bh,
402 : unsigned long *next_log_block, __u32 *crc32_sum)
403 : {
404 0 : int i, num_blks, err;
405 0 : unsigned long io_block;
406 0 : struct buffer_head *obh;
407 :
408 0 : num_blks = count_tags(journal, bh);
409 : /* Calculate checksum of the descriptor block. */
410 0 : *crc32_sum = crc32_be(*crc32_sum, (void *)bh->b_data, bh->b_size);
411 :
412 0 : for (i = 0; i < num_blks; i++) {
413 0 : io_block = (*next_log_block)++;
414 0 : wrap(journal, *next_log_block);
415 0 : err = jread(&obh, journal, io_block);
416 0 : if (err) {
417 0 : printk(KERN_ERR "JBD2: IO error %d recovering block "
418 : "%lu in log\n", err, io_block);
419 0 : return 1;
420 : } else {
421 0 : *crc32_sum = crc32_be(*crc32_sum, (void *)obh->b_data,
422 0 : obh->b_size);
423 : }
424 0 : put_bh(obh);
425 : }
426 : return 0;
427 : }
428 :
429 3111 : static int jbd2_commit_block_csum_verify(journal_t *j, void *buf)
430 : {
431 3111 : struct commit_header *h;
432 3111 : __be32 provided;
433 3111 : __u32 calculated;
434 :
435 3111 : if (!jbd2_journal_has_csum_v2or3(j))
436 : return 1;
437 :
438 2699 : h = buf;
439 2699 : provided = h->h_chksum[0];
440 2699 : h->h_chksum[0] = 0;
441 2699 : calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
442 2699 : h->h_chksum[0] = provided;
443 :
444 2699 : return provided == cpu_to_be32(calculated);
445 : }
446 :
447 45435 : static int jbd2_block_tag_csum_verify(journal_t *j, journal_block_tag_t *tag,
448 : journal_block_tag3_t *tag3,
449 : void *buf, __u32 sequence)
450 : {
451 45435 : __u32 csum32;
452 45435 : __be32 seq;
453 :
454 45435 : if (!jbd2_journal_has_csum_v2or3(j))
455 : return 1;
456 :
457 36373 : seq = cpu_to_be32(sequence);
458 36373 : csum32 = jbd2_chksum(j, j->j_csum_seed, (__u8 *)&seq, sizeof(seq));
459 36373 : csum32 = jbd2_chksum(j, csum32, buf, j->j_blocksize);
460 :
461 72746 : if (jbd2_has_feature_csum3(j))
462 36373 : return tag3->t_checksum == cpu_to_be32(csum32);
463 : else
464 0 : return tag->t_checksum == cpu_to_be16(csum32);
465 : }
466 :
467 744 : static int do_one_pass(journal_t *journal,
468 : struct recovery_info *info, enum passtype pass)
469 : {
470 744 : unsigned int first_commit_ID, next_commit_ID;
471 744 : unsigned long next_log_block, head_block;
472 744 : int err, success = 0;
473 744 : journal_superblock_t * sb;
474 744 : journal_header_t * tmp;
475 744 : struct buffer_head * bh;
476 744 : unsigned int sequence;
477 744 : int blocktype;
478 744 : int tag_bytes = journal_tag_bytes(journal);
479 744 : __u32 crc32_sum = ~0; /* Transactional Checksums */
480 744 : int descr_csum_size = 0;
481 744 : int block_error = 0;
482 744 : bool need_check_commit_time = false;
483 744 : __u64 last_trans_commit_time = 0, commit_time;
484 :
485 : /*
486 : * First thing is to establish what we expect to find in the log
487 : * (in terms of transaction IDs), and where (in terms of log
488 : * block offsets): query the superblock.
489 : */
490 :
491 744 : sb = journal->j_superblock;
492 744 : next_commit_ID = be32_to_cpu(sb->s_sequence);
493 744 : next_log_block = be32_to_cpu(sb->s_start);
494 744 : head_block = next_log_block;
495 :
496 744 : first_commit_ID = next_commit_ID;
497 744 : if (pass == PASS_SCAN)
498 248 : info->start_transaction = first_commit_ID;
499 :
500 : jbd2_debug(1, "Starting recovery pass %d\n", pass);
501 :
502 : /*
503 : * Now we walk through the log, transaction by transaction,
504 : * making sure that each transaction has a commit block in the
505 : * expected place. Each complete transaction gets replayed back
506 : * into the main filesystem.
507 : */
508 :
509 21353 : while (1) {
510 21353 : int flags;
511 21353 : char * tagp;
512 21353 : journal_block_tag_t tag;
513 21353 : struct buffer_head * obh;
514 21353 : struct buffer_head * nbh;
515 :
516 21353 : cond_resched();
517 :
518 : /* If we already know where to stop the log traversal,
519 : * check right now that we haven't gone past the end of
520 : * the log. */
521 :
522 21353 : if (pass != PASS_SCAN)
523 14234 : if (tid_geq(next_commit_ID, info->end_transaction))
524 : break;
525 :
526 20857 : jbd2_debug(2, "Scanning for sequence ID %u at %lu/%lu\n",
527 : next_commit_ID, next_log_block,
528 : jbd2_has_feature_fast_commit(journal) ?
529 : journal->j_fc_last : journal->j_last);
530 :
531 : /* Skip over each chunk of the transaction looking
532 : * either the next descriptor block or the final commit
533 : * record. */
534 :
535 20857 : jbd2_debug(3, "JBD2: checking block %ld\n", next_log_block);
536 20857 : err = jread(&bh, journal, next_log_block);
537 20857 : if (err)
538 0 : goto failed;
539 :
540 20857 : next_log_block++;
541 41714 : wrap(journal, next_log_block);
542 :
543 : /* What kind of buffer is it?
544 : *
545 : * If it is a descriptor block, check that it has the
546 : * expected sequence number. Otherwise, we're all done
547 : * here. */
548 :
549 20857 : tmp = (journal_header_t *)bh->b_data;
550 :
551 20857 : if (tmp->h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER)) {
552 248 : brelse(bh);
553 : break;
554 : }
555 :
556 20609 : blocktype = be32_to_cpu(tmp->h_blocktype);
557 20609 : sequence = be32_to_cpu(tmp->h_sequence);
558 20609 : jbd2_debug(3, "Found magic %d, sequence %d\n",
559 : blocktype, sequence);
560 :
561 20609 : if (sequence != next_commit_ID) {
562 0 : brelse(bh);
563 : break;
564 : }
565 :
566 : /* OK, we have a valid descriptor block which matches
567 : * all of the sequence number checks. What are we going
568 : * to do with it? That depends on the pass... */
569 :
570 20609 : switch(blocktype) {
571 9565 : case JBD2_DESCRIPTOR_BLOCK:
572 : /* Verify checksum first */
573 9565 : if (jbd2_journal_has_csum_v2or3(journal))
574 : descr_csum_size =
575 : sizeof(struct jbd2_journal_block_tail);
576 9565 : if (descr_csum_size > 0 &&
577 8269 : !jbd2_descriptor_block_csum_verify(journal,
578 : bh->b_data)) {
579 : /*
580 : * PASS_SCAN can see stale blocks due to lazy
581 : * journal init. Don't error out on those yet.
582 : */
583 0 : if (pass != PASS_SCAN) {
584 0 : pr_err("JBD2: Invalid checksum recovering block %lu in log\n",
585 : next_log_block);
586 0 : err = -EFSBADCRC;
587 0 : brelse(bh);
588 0 : goto failed;
589 : }
590 : need_check_commit_time = true;
591 : jbd2_debug(1,
592 : "invalid descriptor block found in %lu\n",
593 : next_log_block);
594 : }
595 :
596 : /* If it is a valid descriptor block, replay it
597 : * in pass REPLAY; if journal_checksums enabled, then
598 : * calculate checksums in PASS_SCAN, otherwise,
599 : * just skip over the blocks it describes. */
600 9565 : if (pass != PASS_REPLAY) {
601 9566 : if (pass == PASS_SCAN &&
602 0 : jbd2_has_feature_checksum(journal) &&
603 0 : !need_check_commit_time &&
604 0 : !info->end_transaction) {
605 0 : if (calc_chksums(journal, bh,
606 : &next_log_block,
607 : &crc32_sum)) {
608 0 : put_bh(bh);
609 : break;
610 : }
611 0 : put_bh(bh);
612 20609 : continue;
613 : }
614 6377 : next_log_block += count_tags(journal, bh);
615 12754 : wrap(journal, next_log_block);
616 6377 : put_bh(bh);
617 6377 : continue;
618 : }
619 :
620 : /* A descriptor block: we can now write all of
621 : * the data blocks. Yay, useful work is finally
622 : * getting done here! */
623 :
624 3188 : tagp = &bh->b_data[sizeof(journal_header_t)];
625 3188 : while ((tagp - bh->b_data + tag_bytes)
626 45449 : <= journal->j_blocksize - descr_csum_size) {
627 45449 : unsigned long io_block;
628 :
629 45449 : memcpy(&tag, tagp, sizeof(tag));
630 45449 : flags = be16_to_cpu(tag.t_flags);
631 :
632 45449 : io_block = next_log_block++;
633 90898 : wrap(journal, next_log_block);
634 45449 : err = jread(&obh, journal, io_block);
635 45449 : if (err) {
636 : /* Recover what we can, but
637 : * report failure at the end. */
638 0 : success = err;
639 0 : printk(KERN_ERR
640 : "JBD2: IO error %d recovering "
641 : "block %ld in log\n",
642 : err, io_block);
643 : } else {
644 45449 : unsigned long long blocknr;
645 :
646 45449 : J_ASSERT(obh != NULL);
647 45449 : blocknr = read_tag_block(journal,
648 : &tag);
649 :
650 : /* If the block has been
651 : * revoked, then we're all done
652 : * here. */
653 45449 : if (jbd2_journal_test_revoke
654 : (journal, blocknr,
655 : next_commit_ID)) {
656 14 : brelse(obh);
657 14 : ++info->nr_revoke_hits;
658 14 : goto skip_write;
659 : }
660 :
661 : /* Look for block corruption */
662 45435 : if (!jbd2_block_tag_csum_verify(
663 : journal, &tag, (journal_block_tag3_t *)tagp,
664 45435 : obh->b_data, be32_to_cpu(tmp->h_sequence))) {
665 0 : brelse(obh);
666 0 : success = -EFSBADCRC;
667 0 : printk(KERN_ERR "JBD2: Invalid "
668 : "checksum recovering "
669 : "data block %llu in "
670 : "log\n", blocknr);
671 0 : block_error = 1;
672 0 : goto skip_write;
673 : }
674 :
675 : /* Find a buffer for the new
676 : * data being restored */
677 45435 : nbh = __getblk(journal->j_fs_dev,
678 : blocknr,
679 45435 : journal->j_blocksize);
680 45435 : if (nbh == NULL) {
681 0 : printk(KERN_ERR
682 : "JBD2: Out of memory "
683 : "during recovery.\n");
684 0 : err = -ENOMEM;
685 0 : brelse(bh);
686 0 : brelse(obh);
687 0 : goto failed;
688 : }
689 :
690 45435 : lock_buffer(nbh);
691 90870 : memcpy(nbh->b_data, obh->b_data,
692 : journal->j_blocksize);
693 45435 : if (flags & JBD2_FLAG_ESCAPE) {
694 0 : *((__be32 *)nbh->b_data) =
695 : cpu_to_be32(JBD2_MAGIC_NUMBER);
696 : }
697 :
698 45435 : BUFFER_TRACE(nbh, "marking dirty");
699 45435 : set_buffer_uptodate(nbh);
700 45435 : mark_buffer_dirty(nbh);
701 45435 : BUFFER_TRACE(nbh, "marking uptodate");
702 45435 : ++info->nr_replays;
703 45435 : unlock_buffer(nbh);
704 45435 : brelse(obh);
705 45435 : brelse(nbh);
706 : }
707 :
708 45449 : skip_write:
709 45449 : tagp += tag_bytes;
710 45449 : if (!(flags & JBD2_FLAG_SAME_UUID))
711 3188 : tagp += 16;
712 :
713 45449 : if (flags & JBD2_FLAG_LAST_TAG)
714 : break;
715 : }
716 :
717 3188 : brelse(bh);
718 3188 : continue;
719 :
720 9333 : case JBD2_COMMIT_BLOCK:
721 : /* How to differentiate between interrupted commit
722 : * and journal corruption ?
723 : *
724 : * {nth transaction}
725 : * Checksum Verification Failed
726 : * |
727 : * ____________________
728 : * | |
729 : * async_commit sync_commit
730 : * | |
731 : * | GO TO NEXT "Journal Corruption"
732 : * | TRANSACTION
733 : * |
734 : * {(n+1)th transanction}
735 : * |
736 : * _______|______________
737 : * | |
738 : * Commit block found Commit block not found
739 : * | |
740 : * "Journal Corruption" |
741 : * _____________|_________
742 : * | |
743 : * nth trans corrupt OR nth trans
744 : * and (n+1)th interrupted interrupted
745 : * before commit block
746 : * could reach the disk.
747 : * (Cannot find the difference in above
748 : * mentioned conditions. Hence assume
749 : * "Interrupted Commit".)
750 : */
751 9333 : commit_time = be64_to_cpu(
752 : ((struct commit_header *)bh->b_data)->h_commit_sec);
753 : /*
754 : * If need_check_commit_time is set, it means we are in
755 : * PASS_SCAN and csum verify failed before. If
756 : * commit_time is increasing, it's the same journal,
757 : * otherwise it is stale journal block, just end this
758 : * recovery.
759 : */
760 9333 : if (need_check_commit_time) {
761 0 : if (commit_time >= last_trans_commit_time) {
762 0 : pr_err("JBD2: Invalid checksum found in transaction %u\n",
763 : next_commit_ID);
764 0 : err = -EFSBADCRC;
765 0 : brelse(bh);
766 0 : goto failed;
767 : }
768 0 : ignore_crc_mismatch:
769 : /*
770 : * It likely does not belong to same journal,
771 : * just end this recovery with success.
772 : */
773 0 : jbd2_debug(1, "JBD2: Invalid checksum ignored in transaction %u, likely stale data\n",
774 : next_commit_ID);
775 0 : brelse(bh);
776 0 : goto done;
777 : }
778 :
779 : /*
780 : * Found an expected commit block: if checksums
781 : * are present, verify them in PASS_SCAN; else not
782 : * much to do other than move on to the next sequence
783 : * number.
784 : */
785 12444 : if (pass == PASS_SCAN &&
786 : jbd2_has_feature_checksum(journal)) {
787 0 : struct commit_header *cbh =
788 : (struct commit_header *)bh->b_data;
789 0 : unsigned found_chksum =
790 0 : be32_to_cpu(cbh->h_chksum[0]);
791 :
792 0 : if (info->end_transaction) {
793 0 : journal->j_failed_commit =
794 : info->end_transaction;
795 0 : brelse(bh);
796 : break;
797 : }
798 :
799 : /* Neither checksum match nor unused? */
800 0 : if (!((crc32_sum == found_chksum &&
801 : cbh->h_chksum_type ==
802 0 : JBD2_CRC32_CHKSUM &&
803 : cbh->h_chksum_size ==
804 : JBD2_CRC32_CHKSUM_SIZE) ||
805 0 : (cbh->h_chksum_type == 0 &&
806 0 : cbh->h_chksum_size == 0 &&
807 : found_chksum == 0)))
808 0 : goto chksum_error;
809 :
810 0 : crc32_sum = ~0;
811 : }
812 12444 : if (pass == PASS_SCAN &&
813 3111 : !jbd2_commit_block_csum_verify(journal,
814 : bh->b_data)) {
815 0 : chksum_error:
816 0 : if (commit_time < last_trans_commit_time)
817 0 : goto ignore_crc_mismatch;
818 0 : info->end_transaction = next_commit_ID;
819 0 : info->head_block = head_block;
820 :
821 0 : if (!jbd2_has_feature_async_commit(journal)) {
822 0 : journal->j_failed_commit =
823 : next_commit_ID;
824 0 : brelse(bh);
825 : break;
826 : }
827 : }
828 9333 : if (pass == PASS_SCAN) {
829 3111 : last_trans_commit_time = commit_time;
830 3111 : head_block = next_log_block;
831 : }
832 9333 : brelse(bh);
833 9333 : next_commit_ID++;
834 9333 : continue;
835 :
836 1711 : case JBD2_REVOKE_BLOCK:
837 : /*
838 : * Check revoke block crc in pass_scan, if csum verify
839 : * failed, check commit block time later.
840 : */
841 2282 : if (pass == PASS_SCAN &&
842 571 : !jbd2_descriptor_block_csum_verify(journal,
843 : bh->b_data)) {
844 0 : jbd2_debug(1, "JBD2: invalid revoke block found in %lu\n",
845 : next_log_block);
846 0 : need_check_commit_time = true;
847 : }
848 : /* If we aren't in the REVOKE pass, then we can
849 : * just skip over this block. */
850 1711 : if (pass != PASS_REVOKE) {
851 1141 : brelse(bh);
852 1141 : continue;
853 : }
854 :
855 570 : err = scan_revoke_records(journal, bh,
856 : next_commit_ID, info);
857 570 : brelse(bh);
858 570 : if (err)
859 0 : goto failed;
860 570 : continue;
861 :
862 : default:
863 0 : jbd2_debug(3, "Unrecognised magic %d, end of scan.\n",
864 : blocktype);
865 0 : brelse(bh);
866 0 : goto done;
867 : }
868 : }
869 :
870 744 : done:
871 : /*
872 : * We broke out of the log scan loop: either we came to the
873 : * known end of the log or we found an unexpected block in the
874 : * log. If the latter happened, then we know that the "current"
875 : * transaction marks the end of the valid log.
876 : */
877 :
878 744 : if (pass == PASS_SCAN) {
879 248 : if (!info->end_transaction)
880 248 : info->end_transaction = next_commit_ID;
881 248 : if (!info->head_block)
882 248 : info->head_block = head_block;
883 : } else {
884 : /* It's really bad news if different passes end up at
885 : * different places (but possible due to IO errors). */
886 496 : if (info->end_transaction != next_commit_ID) {
887 0 : printk(KERN_ERR "JBD2: recovery pass %d ended at "
888 : "transaction %u, expected %u\n",
889 : pass, next_commit_ID, info->end_transaction);
890 0 : if (!success)
891 0 : success = -EIO;
892 : }
893 : }
894 :
895 1488 : if (jbd2_has_feature_fast_commit(journal) && pass != PASS_REVOKE) {
896 0 : err = fc_do_one_pass(journal, info, pass);
897 0 : if (err)
898 0 : success = err;
899 : }
900 :
901 744 : if (block_error && success == 0)
902 0 : success = -EIO;
903 : return success;
904 :
905 : failed:
906 0 : return err;
907 : }
908 :
909 : /* Scan a revoke record, marking all blocks mentioned as revoked. */
910 :
911 570 : static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
912 : tid_t sequence, struct recovery_info *info)
913 : {
914 570 : jbd2_journal_revoke_header_t *header;
915 570 : int offset, max;
916 570 : unsigned csum_size = 0;
917 570 : __u32 rcount;
918 570 : int record_len = 4;
919 :
920 570 : header = (jbd2_journal_revoke_header_t *) bh->b_data;
921 570 : offset = sizeof(jbd2_journal_revoke_header_t);
922 570 : rcount = be32_to_cpu(header->r_count);
923 :
924 570 : if (jbd2_journal_has_csum_v2or3(journal))
925 561 : csum_size = sizeof(struct jbd2_journal_block_tail);
926 570 : if (rcount > journal->j_blocksize - csum_size)
927 : return -EINVAL;
928 570 : max = rcount;
929 :
930 1140 : if (jbd2_has_feature_64bit(journal))
931 570 : record_len = 8;
932 :
933 9524 : while (offset + record_len <= max) {
934 8954 : unsigned long long blocknr;
935 8954 : int err;
936 :
937 8954 : if (record_len == 4)
938 0 : blocknr = be32_to_cpu(* ((__be32 *) (bh->b_data+offset)));
939 : else
940 8954 : blocknr = be64_to_cpu(* ((__be64 *) (bh->b_data+offset)));
941 8954 : offset += record_len;
942 8954 : err = jbd2_journal_set_revoke(journal, blocknr, sequence);
943 8954 : if (err)
944 0 : return err;
945 8954 : ++info->nr_revokes;
946 : }
947 : return 0;
948 : }
|