LCOV - fstests of 6.5.0-rc3-achx @ Mon Jul 31 20:08:12 PDT 2023

LCOV - code coverage report

Current view:	top level - fs/jbd2 - journal.c (source / functions)		Hit	Total	Coverage
Test:	fstests of 6.5.0-rc3-achx @ Mon Jul 31 20:08:12 PDT 2023	Lines:	961	1337	71.9 %
Date:	2023-07-31 20:08:12	Functions:	68	89	76.4 %

          Line data    Source code

       1             : // SPDX-License-Identifier: GPL-2.0+
       2             : /*
       3             :  * linux/fs/jbd2/journal.c
       4             :  *
       5             :  * Written by Stephen C. Tweedie <sct@redhat.com>, 1998
       6             :  *
       7             :  * Copyright 1998 Red Hat corp --- All Rights Reserved
       8             :  *
       9             :  * Generic filesystem journal-writing code; part of the ext2fs
      10             :  * journaling system.
      11             :  *
      12             :  * This file manages journals: areas of disk reserved for logging
      13             :  * transactional updates.  This includes the kernel journaling thread
      14             :  * which is responsible for scheduling updates to the log.
      15             :  *
      16             :  * We do not actually manage the physical storage of the journal in this
      17             :  * file: that is left to a per-journal policy function, which allows us
      18             :  * to store the journal within a filesystem-specified area for ext2
      19             :  * journaling (ext2 can use a reserved inode for storing the log).
      20             :  */
      21             : 
      22             : #include <linux/module.h>
      23             : #include <linux/time.h>
      24             : #include <linux/fs.h>
      25             : #include <linux/jbd2.h>
      26             : #include <linux/errno.h>
      27             : #include <linux/slab.h>
      28             : #include <linux/init.h>
      29             : #include <linux/mm.h>
      30             : #include <linux/freezer.h>
      31             : #include <linux/pagemap.h>
      32             : #include <linux/kthread.h>
      33             : #include <linux/poison.h>
      34             : #include <linux/proc_fs.h>
      35             : #include <linux/seq_file.h>
      36             : #include <linux/math64.h>
      37             : #include <linux/hash.h>
      38             : #include <linux/log2.h>
      39             : #include <linux/vmalloc.h>
      40             : #include <linux/backing-dev.h>
      41             : #include <linux/bitops.h>
      42             : #include <linux/ratelimit.h>
      43             : #include <linux/sched/mm.h>
      44             : 
      45             : #define CREATE_TRACE_POINTS
      46             : #include <trace/events/jbd2.h>
      47             : 
      48             : #include <linux/uaccess.h>
      49             : #include <asm/page.h>
      50             : 
      51             : #ifdef CONFIG_JBD2_DEBUG
      52             : static ushort jbd2_journal_enable_debug __read_mostly;
      53             : 
      54             : module_param_named(jbd2_debug, jbd2_journal_enable_debug, ushort, 0644);
      55             : MODULE_PARM_DESC(jbd2_debug, "Debugging level for jbd2");
      56             : #endif
      57             : 
      58             : EXPORT_SYMBOL(jbd2_journal_extend);
      59             : EXPORT_SYMBOL(jbd2_journal_stop);
      60             : EXPORT_SYMBOL(jbd2_journal_lock_updates);
      61             : EXPORT_SYMBOL(jbd2_journal_unlock_updates);
      62             : EXPORT_SYMBOL(jbd2_journal_get_write_access);
      63             : EXPORT_SYMBOL(jbd2_journal_get_create_access);
      64             : EXPORT_SYMBOL(jbd2_journal_get_undo_access);
      65             : EXPORT_SYMBOL(jbd2_journal_set_triggers);
      66             : EXPORT_SYMBOL(jbd2_journal_dirty_metadata);
      67             : EXPORT_SYMBOL(jbd2_journal_forget);
      68             : EXPORT_SYMBOL(jbd2_journal_flush);
      69             : EXPORT_SYMBOL(jbd2_journal_revoke);
      70             : 
      71             : EXPORT_SYMBOL(jbd2_journal_init_dev);
      72             : EXPORT_SYMBOL(jbd2_journal_init_inode);
      73             : EXPORT_SYMBOL(jbd2_journal_check_used_features);
      74             : EXPORT_SYMBOL(jbd2_journal_check_available_features);
      75             : EXPORT_SYMBOL(jbd2_journal_set_features);
      76             : EXPORT_SYMBOL(jbd2_journal_load);
      77             : EXPORT_SYMBOL(jbd2_journal_destroy);
      78             : EXPORT_SYMBOL(jbd2_journal_abort);
      79             : EXPORT_SYMBOL(jbd2_journal_errno);
      80             : EXPORT_SYMBOL(jbd2_journal_ack_err);
      81             : EXPORT_SYMBOL(jbd2_journal_clear_err);
      82             : EXPORT_SYMBOL(jbd2_log_wait_commit);
      83             : EXPORT_SYMBOL(jbd2_journal_start_commit);
      84             : EXPORT_SYMBOL(jbd2_journal_force_commit_nested);
      85             : EXPORT_SYMBOL(jbd2_journal_wipe);
      86             : EXPORT_SYMBOL(jbd2_journal_blocks_per_page);
      87             : EXPORT_SYMBOL(jbd2_journal_invalidate_folio);
      88             : EXPORT_SYMBOL(jbd2_journal_try_to_free_buffers);
      89             : EXPORT_SYMBOL(jbd2_journal_force_commit);
      90             : EXPORT_SYMBOL(jbd2_journal_inode_ranged_write);
      91             : EXPORT_SYMBOL(jbd2_journal_inode_ranged_wait);
      92             : EXPORT_SYMBOL(jbd2_journal_finish_inode_data_buffers);
      93             : EXPORT_SYMBOL(jbd2_journal_init_jbd_inode);
      94             : EXPORT_SYMBOL(jbd2_journal_release_jbd_inode);
      95             : EXPORT_SYMBOL(jbd2_journal_begin_ordered_truncate);
      96             : EXPORT_SYMBOL(jbd2_inode_cache);
      97             : 
      98             : static int jbd2_journal_create_slab(size_t slab_size);
      99             : 
     100             : #ifdef CONFIG_JBD2_DEBUG
     101             : void __jbd2_debug(int level, const char *file, const char *func,
     102             :                   unsigned int line, const char *fmt, ...)
     103             : {
     104             :         struct va_format vaf;
     105             :         va_list args;
     106             : 
     107             :         if (level > jbd2_journal_enable_debug)
     108             :                 return;
     109             :         va_start(args, fmt);
     110             :         vaf.fmt = fmt;
     111             :         vaf.va = &args;
     112             :         printk(KERN_DEBUG "%s: (%s, %u): %pV", file, func, line, &vaf);
     113             :         va_end(args);
     114             : }
     115             : #endif
     116             : 
     117             : /* Checksumming functions */
     118             : static int jbd2_verify_csum_type(journal_t *j, journal_superblock_t *sb)
     119             : {
     120        2501 :         if (!jbd2_journal_has_csum_v2or3_feature(j))
     121             :                 return 1;
     122             : 
     123        1327 :         return sb->s_checksum_type == JBD2_CRC32C_CHKSUM;
     124             : }
     125             : 
     126       11211 : static __be32 jbd2_superblock_csum(journal_t *j, journal_superblock_t *sb)
     127             : {
     128       11211 :         __u32 csum;
     129       11211 :         __be32 old_csum;
     130             : 
     131       11211 :         old_csum = sb->s_checksum;
     132       11211 :         sb->s_checksum = 0;
     133       11211 :         csum = jbd2_chksum(j, ~0, (char *)sb, sizeof(journal_superblock_t));
     134       11211 :         sb->s_checksum = old_csum;
     135             : 
     136       11211 :         return cpu_to_be32(csum);
     137             : }
     138             : 
     139             : /*
     140             :  * Helper function used to manage commit timeouts
     141             :  */
     142             : 
     143         204 : static void commit_timeout(struct timer_list *t)
     144             : {
     145         204 :         journal_t *journal = from_timer(journal, t, j_commit_timer);
     146             : 
     147         204 :         wake_up_process(journal->j_task);
     148         204 : }
     149             : 
     150             : /*
     151             :  * kjournald2: The main thread function used to manage a logging device
     152             :  * journal.
     153             :  *
     154             :  * This kernel thread is responsible for two things:
     155             :  *
     156             :  * 1) COMMIT:  Every so often we need to commit the current state of the
     157             :  *    filesystem to disk.  The journal thread is responsible for writing
     158             :  *    all of the metadata buffers to disk. If a fast commit is ongoing
     159             :  *    journal thread waits until it's done and then continues from
     160             :  *    there on.
     161             :  *
     162             :  * 2) CHECKPOINT: We cannot reuse a used section of the log file until all
     163             :  *    of the data in that part of the log has been rewritten elsewhere on
     164             :  *    the disk.  Flushing these old buffers to reclaim space in the log is
     165             :  *    known as checkpointing, and this thread is responsible for that job.
     166             :  */
     167             : 
     168        2501 : static int kjournald2(void *arg)
     169             : {
     170        2501 :         journal_t *journal = arg;
     171        2501 :         transaction_t *transaction;
     172             : 
     173             :         /*
     174             :          * Set up an interval timer which can be used to trigger a commit wakeup
     175             :          * after the commit interval expires
     176             :          */
     177        2501 :         timer_setup(&journal->j_commit_timer, commit_timeout, 0);
     178             : 
     179        2501 :         set_freezable();
     180             : 
     181             :         /* Record that the journal thread is running */
     182        2501 :         journal->j_task = current;
     183        2501 :         wake_up(&journal->j_wait_done_commit);
     184             : 
     185             :         /*
     186             :          * Make sure that no allocations from this kernel thread will ever
     187             :          * recurse to the fs layer because we are responsible for the
     188             :          * transaction commit and any fs involvement might get stuck waiting for
     189             :          * the trasn. commit.
     190             :          */
     191        2501 :         memalloc_nofs_save();
     192             : 
     193             :         /*
     194             :          * And now, wait forever for commit wakeup events.
     195             :          */
     196        2501 :         write_lock(&journal->j_state_lock);
     197             : 
     198             : loop:
     199      399147 :         if (journal->j_flags & JBD2_UNMOUNT)
     200        2501 :                 goto end_loop;
     201             : 
     202      396646 :         jbd2_debug(1, "commit_sequence=%u, commit_request=%u\n",
     203             :                 journal->j_commit_sequence, journal->j_commit_request);
     204             : 
     205      396646 :         if (journal->j_commit_sequence != journal->j_commit_request) {
     206      201275 :                 jbd2_debug(1, "OK, requests differ\n");
     207      201275 :                 write_unlock(&journal->j_state_lock);
     208      201275 :                 del_timer_sync(&journal->j_commit_timer);
     209      201275 :                 jbd2_journal_commit_transaction(journal);
     210      201275 :                 write_lock(&journal->j_state_lock);
     211      201275 :                 goto loop;
     212             :         }
     213             : 
     214      195371 :         wake_up(&journal->j_wait_done_commit);
     215      195371 :         if (freezing(current)) {
     216             :                 /*
     217             :                  * The simpler the better. Flushing journal isn't a
     218             :                  * good idea, because that depends on threads that may
     219             :                  * be already stopped.
     220             :                  */
     221           0 :                 jbd2_debug(1, "Now suspending kjournald2\n");
     222           0 :                 write_unlock(&journal->j_state_lock);
     223           0 :                 try_to_freeze();
     224           0 :                 write_lock(&journal->j_state_lock);
     225             :         } else {
     226             :                 /*
     227             :                  * We assume on resume that commits are already there,
     228             :                  * so we don't sleep
     229             :                  */
     230      195371 :                 DEFINE_WAIT(wait);
     231      195371 :                 int should_sleep = 1;
     232             : 
     233      195371 :                 prepare_to_wait(&journal->j_wait_commit, &wait,
     234             :                                 TASK_INTERRUPTIBLE);
     235      195371 :                 if (journal->j_commit_sequence != journal->j_commit_request)
     236           0 :                         should_sleep = 0;
     237      195371 :                 transaction = journal->j_running_transaction;
     238      195371 :                 if (transaction && time_after_eq(jiffies,
     239             :                                                 transaction->t_expires))
     240           0 :                         should_sleep = 0;
     241      195371 :                 if (journal->j_flags & JBD2_UNMOUNT)
     242             :                         should_sleep = 0;
     243      195371 :                 if (should_sleep) {
     244      195371 :                         write_unlock(&journal->j_state_lock);
     245      195371 :                         schedule();
     246      195371 :                         write_lock(&journal->j_state_lock);
     247             :                 }
     248      195371 :                 finish_wait(&journal->j_wait_commit, &wait);
     249             :         }
     250             : 
     251      195371 :         jbd2_debug(1, "kjournald2 wakes\n");
     252             : 
     253             :         /*
     254             :          * Were we woken up by a commit wakeup event?
     255             :          */
     256      195371 :         transaction = journal->j_running_transaction;
     257      195371 :         if (transaction && time_after_eq(jiffies, transaction->t_expires)) {
     258         798 :                 journal->j_commit_request = transaction->t_tid;
     259         798 :                 jbd2_debug(1, "woke because of timeout\n");
     260             :         }
     261      195371 :         goto loop;
     262             : 
     263             : end_loop:
     264        2501 :         del_timer_sync(&journal->j_commit_timer);
     265        2501 :         journal->j_task = NULL;
     266        2501 :         wake_up(&journal->j_wait_done_commit);
     267        2501 :         jbd2_debug(1, "Journal thread exiting.\n");
     268        2501 :         write_unlock(&journal->j_state_lock);
     269        2501 :         return 0;
     270             : }
     271             : 
     272        2501 : static int jbd2_journal_start_thread(journal_t *journal)
     273             : {
     274        2501 :         struct task_struct *t;
     275             : 
     276        2501 :         t = kthread_run(kjournald2, journal, "jbd2/%s",
     277             :                         journal->j_devname);
     278        2501 :         if (IS_ERR(t))
     279           0 :                 return PTR_ERR(t);
     280             : 
     281        4996 :         wait_event(journal->j_wait_done_commit, journal->j_task != NULL);
     282             :         return 0;
     283             : }
     284             : 
     285        2503 : static void journal_kill_thread(journal_t *journal)
     286             : {
     287        2503 :         write_lock(&journal->j_state_lock);
     288        2503 :         journal->j_flags |= JBD2_UNMOUNT;
     289             : 
     290        5004 :         while (journal->j_task) {
     291        2501 :                 write_unlock(&journal->j_state_lock);
     292        2501 :                 wake_up(&journal->j_wait_commit);
     293        3768 :                 wait_event(journal->j_wait_done_commit, journal->j_task == NULL);
     294        2501 :                 write_lock(&journal->j_state_lock);
     295             :         }
     296        2503 :         write_unlock(&journal->j_state_lock);
     297        2503 : }
     298             : 
     299             : /*
     300             :  * jbd2_journal_write_metadata_buffer: write a metadata buffer to the journal.
     301             :  *
     302             :  * Writes a metadata buffer to a given disk block.  The actual IO is not
     303             :  * performed but a new buffer_head is constructed which labels the data
     304             :  * to be written with the correct destination disk block.
     305             :  *
     306             :  * Any magic-number escaping which needs to be done will cause a
     307             :  * copy-out here.  If the buffer happens to start with the
     308             :  * JBD2_MAGIC_NUMBER, then we can't write it to the log directly: the
     309             :  * magic number is only written to the log for descripter blocks.  In
     310             :  * this case, we copy the data and replace the first word with 0, and we
     311             :  * return a result code which indicates that this buffer needs to be
     312             :  * marked as an escaped buffer in the corresponding log descriptor
     313             :  * block.  The missing word can then be restored when the block is read
     314             :  * during recovery.
     315             :  *
     316             :  * If the source buffer has already been modified by a new transaction
     317             :  * since we took the last commit snapshot, we use the frozen copy of
     318             :  * that data for IO. If we end up using the existing buffer_head's data
     319             :  * for the write, then we have to make sure nobody modifies it while the
     320             :  * IO is in progress. do_get_write_access() handles this.
     321             :  *
     322             :  * The function returns a pointer to the buffer_head to be used for IO.
     323             :  *
     324             :  *
     325             :  * Return value:
     326             :  *  <0: Error
     327             :  * >=0: Finished OK
     328             :  *
     329             :  * On success:
     330             :  * Bit 0 set == escape performed on the data
     331             :  * Bit 1 set == buffer copy-out performed (kfree the data after IO)
     332             :  */
     333             : 
     334     3121499 : int jbd2_journal_write_metadata_buffer(transaction_t *transaction,
     335             :                                   struct journal_head  *jh_in,
     336             :                                   struct buffer_head **bh_out,
     337             :                                   sector_t blocknr)
     338             : {
     339     3121499 :         int need_copy_out = 0;
     340     3121499 :         int done_copy_out = 0;
     341     3121499 :         int do_escape = 0;
     342     3121499 :         char *mapped_data;
     343     3121499 :         struct buffer_head *new_bh;
     344     3121499 :         struct page *new_page;
     345     3121499 :         unsigned int new_offset;
     346     3121499 :         struct buffer_head *bh_in = jh2bh(jh_in);
     347     3121499 :         journal_t *journal = transaction->t_journal;
     348             : 
     349             :         /*
     350             :          * The buffer really shouldn't be locked: only the current committing
     351             :          * transaction is allowed to write it, so nobody else is allowed
     352             :          * to do any IO.
     353             :          *
     354             :          * akpm: except if we're journalling data, and write() output is
     355             :          * also part of a shared mapping, and another thread has
     356             :          * decided to launch a writepage() against this buffer.
     357             :          */
     358     6242998 :         J_ASSERT_BH(bh_in, buffer_jbddirty(bh_in));
     359             : 
     360     3121499 :         new_bh = alloc_buffer_head(GFP_NOFS|__GFP_NOFAIL);
     361             : 
     362             :         /* keep subsequent assertions sane */
     363     3121499 :         atomic_set(&new_bh->b_count, 1);
     364             : 
     365     3121499 :         spin_lock(&jh_in->b_state_lock);
     366     3121499 : repeat:
     367             :         /*
     368             :          * If a new transaction has already done a buffer copy-out, then
     369             :          * we use that version of the data for the commit.
     370             :          */
     371     3121499 :         if (jh_in->b_frozen_data) {
     372       21377 :                 done_copy_out = 1;
     373       21377 :                 new_page = virt_to_page(jh_in->b_frozen_data);
     374       21377 :                 new_offset = offset_in_page(jh_in->b_frozen_data);
     375             :         } else {
     376     3100122 :                 new_page = jh2bh(jh_in)->b_page;
     377     3100122 :                 new_offset = offset_in_page(jh2bh(jh_in)->b_data);
     378             :         }
     379             : 
     380     3121499 :         mapped_data = kmap_atomic(new_page);
     381             :         /*
     382             :          * Fire data frozen trigger if data already wasn't frozen.  Do this
     383             :          * before checking for escaping, as the trigger may modify the magic
     384             :          * offset.  If a copy-out happens afterwards, it will have the correct
     385             :          * data in the buffer.
     386             :          */
     387     3121499 :         if (!done_copy_out)
     388     3100122 :                 jbd2_buffer_frozen_trigger(jh_in, mapped_data + new_offset,
     389             :                                            jh_in->b_triggers);
     390             : 
     391             :         /*
     392             :          * Check for escaping
     393             :          */
     394     3121499 :         if (*((__be32 *)(mapped_data + new_offset)) ==
     395             :                                 cpu_to_be32(JBD2_MAGIC_NUMBER)) {
     396           0 :                 need_copy_out = 1;
     397           0 :                 do_escape = 1;
     398             :         }
     399     3121499 :         kunmap_atomic(mapped_data);
     400             : 
     401             :         /*
     402             :          * Do we need to do a data copy?
     403             :          */
     404     3121499 :         if (need_copy_out && !done_copy_out) {
     405           0 :                 char *tmp;
     406             : 
     407           0 :                 spin_unlock(&jh_in->b_state_lock);
     408           0 :                 tmp = jbd2_alloc(bh_in->b_size, GFP_NOFS);
     409           0 :                 if (!tmp) {
     410           0 :                         brelse(new_bh);
     411           0 :                         return -ENOMEM;
     412             :                 }
     413           0 :                 spin_lock(&jh_in->b_state_lock);
     414           0 :                 if (jh_in->b_frozen_data) {
     415           0 :                         jbd2_free(tmp, bh_in->b_size);
     416           0 :                         goto repeat;
     417             :                 }
     418             : 
     419           0 :                 jh_in->b_frozen_data = tmp;
     420           0 :                 mapped_data = kmap_atomic(new_page);
     421           0 :                 memcpy(tmp, mapped_data + new_offset, bh_in->b_size);
     422           0 :                 kunmap_atomic(mapped_data);
     423             : 
     424           0 :                 new_page = virt_to_page(tmp);
     425           0 :                 new_offset = offset_in_page(tmp);
     426           0 :                 done_copy_out = 1;
     427             : 
     428             :                 /*
     429             :                  * This isn't strictly necessary, as we're using frozen
     430             :                  * data for the escaping, but it keeps consistency with
     431             :                  * b_frozen_data usage.
     432             :                  */
     433           0 :                 jh_in->b_frozen_triggers = jh_in->b_triggers;
     434             :         }
     435             : 
     436             :         /*
     437             :          * Did we need to do an escaping?  Now we've done all the
     438             :          * copying, we can finally do so.
     439             :          */
     440     3121499 :         if (do_escape) {
     441           0 :                 mapped_data = kmap_atomic(new_page);
     442           0 :                 *((unsigned int *)(mapped_data + new_offset)) = 0;
     443           0 :                 kunmap_atomic(mapped_data);
     444             :         }
     445             : 
     446     3121499 :         set_bh_page(new_bh, new_page, new_offset);
     447     3121499 :         new_bh->b_size = bh_in->b_size;
     448     3121499 :         new_bh->b_bdev = journal->j_dev;
     449     3121499 :         new_bh->b_blocknr = blocknr;
     450     3121499 :         new_bh->b_private = bh_in;
     451     3121499 :         set_buffer_mapped(new_bh);
     452     3121499 :         set_buffer_dirty(new_bh);
     453             : 
     454     3121499 :         *bh_out = new_bh;
     455             : 
     456             :         /*
     457             :          * The to-be-written buffer needs to get moved to the io queue,
     458             :          * and the original buffer whose contents we are shadowing or
     459             :          * copying is moved to the transaction's shadow queue.
     460             :          */
     461     3121499 :         JBUFFER_TRACE(jh_in, "file as BJ_Shadow");
     462     3121499 :         spin_lock(&journal->j_list_lock);
     463     3121499 :         __jbd2_journal_file_buffer(jh_in, transaction, BJ_Shadow);
     464     3121499 :         spin_unlock(&journal->j_list_lock);
     465     3121499 :         set_buffer_shadow(bh_in);
     466     3121499 :         spin_unlock(&jh_in->b_state_lock);
     467             : 
     468     3121499 :         return do_escape | (done_copy_out << 1);
     469             : }
     470             : 
     471             : /*
     472             :  * Allocation code for the journal file.  Manage the space left in the
     473             :  * journal, so that we can begin checkpointing when appropriate.
     474             :  */
     475             : 
     476             : /*
     477             :  * Called with j_state_lock locked for writing.
     478             :  * Returns true if a transaction commit was started.
     479             :  */
     480      439510 : static int __jbd2_log_start_commit(journal_t *journal, tid_t target)
     481             : {
     482             :         /* Return if the txn has already requested to be committed */
     483      439510 :         if (journal->j_commit_request == target)
     484             :                 return 0;
     485             : 
     486             :         /*
     487             :          * The only transaction we can possibly wait upon is the
     488             :          * currently running transaction (if it exists).  Otherwise,
     489             :          * the target tid must be an old one.
     490             :          */
     491      207246 :         if (journal->j_running_transaction &&
     492      207246 :             journal->j_running_transaction->t_tid == target) {
     493             :                 /*
     494             :                  * We want a new commit: OK, mark the request and wakeup the
     495             :                  * commit thread.  We do _not_ do the commit ourselves.
     496             :                  */
     497             : 
     498      201071 :                 journal->j_commit_request = target;
     499      201071 :                 jbd2_debug(1, "JBD2: requesting commit %u/%u\n",
     500             :                           journal->j_commit_request,
     501             :                           journal->j_commit_sequence);
     502      201071 :                 journal->j_running_transaction->t_requested = jiffies;
     503      201071 :                 wake_up(&journal->j_wait_commit);
     504      201071 :                 return 1;
     505        6175 :         } else if (!tid_geq(journal->j_commit_request, target))
     506             :                 /* This should never happen, but if it does, preserve
     507             :                    the evidence before kjournald goes into a loop and
     508             :                    increments j_commit_sequence beyond all recognition. */
     509           0 :                 WARN_ONCE(1, "JBD2: bad log_start_commit: %u %u %u %u\n",
     510             :                           journal->j_commit_request,
     511             :                           journal->j_commit_sequence,
     512             :                           target, journal->j_running_transaction ?
     513             :                           journal->j_running_transaction->t_tid : 0);
     514             :         return 0;
     515             : }
     516             : 
     517      413746 : int jbd2_log_start_commit(journal_t *journal, tid_t tid)
     518             : {
     519      413746 :         int ret;
     520             : 
     521      413746 :         write_lock(&journal->j_state_lock);
     522      413880 :         ret = __jbd2_log_start_commit(journal, tid);
     523      413880 :         write_unlock(&journal->j_state_lock);
     524      413874 :         return ret;
     525             : }
     526             : 
     527             : /*
     528             :  * Force and wait any uncommitted transactions.  We can only force the running
     529             :  * transaction if we don't have an active handle, otherwise, we will deadlock.
     530             :  * Returns: <0 in case of error,
     531             :  *           0 if nothing to commit,
     532             :  *           1 if transaction was successfully committed.
     533             :  */
     534      123402 : static int __jbd2_journal_force_commit(journal_t *journal)
     535             : {
     536      123402 :         transaction_t *transaction = NULL;
     537      123402 :         tid_t tid;
     538      123402 :         int need_to_start = 0, ret = 0;
     539             : 
     540      123402 :         read_lock(&journal->j_state_lock);
     541      123432 :         if (journal->j_running_transaction && !current->journal_info) {
     542      123210 :                 transaction = journal->j_running_transaction;
     543      123210 :                 if (!tid_geq(journal->j_commit_request, transaction->t_tid))
     544       10093 :                         need_to_start = 1;
     545         222 :         } else if (journal->j_committing_transaction)
     546             :                 transaction = journal->j_committing_transaction;
     547             : 
     548      123432 :         if (!transaction) {
     549             :                 /* Nothing to commit */
     550          50 :                 read_unlock(&journal->j_state_lock);
     551          50 :                 return 0;
     552             :         }
     553      123382 :         tid = transaction->t_tid;
     554      123382 :         read_unlock(&journal->j_state_lock);
     555      123354 :         if (need_to_start)
     556       10091 :                 jbd2_log_start_commit(journal, tid);
     557      123358 :         ret = jbd2_log_wait_commit(journal, tid);
     558      123175 :         if (!ret)
     559      123149 :                 ret = 1;
     560             : 
     561             :         return ret;
     562             : }
     563             : 
     564             : /**
     565             :  * jbd2_journal_force_commit_nested - Force and wait upon a commit if the
     566             :  * calling process is not within transaction.
     567             :  *
     568             :  * @journal: journal to force
     569             :  * Returns true if progress was made.
     570             :  *
     571             :  * This is used for forcing out undo-protected data which contains
     572             :  * bitmaps, when the fs is running out of space.
     573             :  */
     574      120060 : int jbd2_journal_force_commit_nested(journal_t *journal)
     575             : {
     576      120060 :         int ret;
     577             : 
     578      120060 :         ret = __jbd2_journal_force_commit(journal);
     579      119813 :         return ret > 0;
     580             : }
     581             : 
     582             : /**
     583             :  * jbd2_journal_force_commit() - force any uncommitted transactions
     584             :  * @journal: journal to force
     585             :  *
     586             :  * Caller want unconditional commit. We can only force the running transaction
     587             :  * if we don't have an active handle, otherwise, we will deadlock.
     588             :  */
     589        3352 : int jbd2_journal_force_commit(journal_t *journal)
     590             : {
     591        3352 :         int ret;
     592             : 
     593        3352 :         J_ASSERT(!current->journal_info);
     594        3352 :         ret = __jbd2_journal_force_commit(journal);
     595        3350 :         if (ret > 0)
     596             :                 ret = 0;
     597        3350 :         return ret;
     598             : }
     599             : 
     600             : /*
     601             :  * Start a commit of the current running transaction (if any).  Returns true
     602             :  * if a transaction is going to be committed (or is currently already
     603             :  * committing), and fills its tid in at *ptid
     604             :  */
     605       70614 : int jbd2_journal_start_commit(journal_t *journal, tid_t *ptid)
     606             : {
     607       70614 :         int ret = 0;
     608             : 
     609       70614 :         write_lock(&journal->j_state_lock);
     610       70633 :         if (journal->j_running_transaction) {
     611       25563 :                 tid_t tid = journal->j_running_transaction->t_tid;
     612             : 
     613       25563 :                 __jbd2_log_start_commit(journal, tid);
     614             :                 /* There's a running transaction and we've just made sure
     615             :                  * it's commit has been scheduled. */
     616       25563 :                 if (ptid)
     617       25563 :                         *ptid = tid;
     618             :                 ret = 1;
     619       45070 :         } else if (journal->j_committing_transaction) {
     620             :                 /*
     621             :                  * If commit has been started, then we have to wait for
     622             :                  * completion of that transaction.
     623             :                  */
     624        6575 :                 if (ptid)
     625        6575 :                         *ptid = journal->j_committing_transaction->t_tid;
     626             :                 ret = 1;
     627             :         }
     628       70633 :         write_unlock(&journal->j_state_lock);
     629       70630 :         return ret;
     630             : }
     631             : 
     632             : /*
     633             :  * Return 1 if a given transaction has not yet sent barrier request
     634             :  * connected with a transaction commit. If 0 is returned, transaction
     635             :  * may or may not have sent the barrier. Used to avoid sending barrier
     636             :  * twice in common cases.
     637             :  */
     638      269205 : int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid)
     639             : {
     640      269205 :         int ret = 0;
     641      269205 :         transaction_t *commit_trans;
     642             : 
     643      269205 :         if (!(journal->j_flags & JBD2_BARRIER))
     644             :                 return 0;
     645      269191 :         read_lock(&journal->j_state_lock);
     646             :         /* Transaction already committed? */
     647      269192 :         if (tid_geq(journal->j_commit_sequence, tid))
     648       67163 :                 goto out;
     649      202029 :         commit_trans = journal->j_committing_transaction;
     650      202029 :         if (!commit_trans || commit_trans->t_tid != tid) {
     651      189840 :                 ret = 1;
     652      189840 :                 goto out;
     653             :         }
     654             :         /*
     655             :          * Transaction is being committed and we already proceeded to
     656             :          * submitting a flush to fs partition?
     657             :          */
     658       12189 :         if (journal->j_fs_dev != journal->j_dev) {
     659           0 :                 if (!commit_trans->t_need_data_flush ||
     660           0 :                     commit_trans->t_state >= T_COMMIT_DFLUSH)
     661           0 :                         goto out;
     662             :         } else {
     663       12189 :                 if (commit_trans->t_state >= T_COMMIT_JFLUSH)
     664         556 :                         goto out;
     665             :         }
     666             :         ret = 1;
     667      269192 : out:
     668      269192 :         read_unlock(&journal->j_state_lock);
     669      269192 :         return ret;
     670             : }
     671             : EXPORT_SYMBOL(jbd2_trans_will_send_data_barrier);
     672             : 
     673             : /*
     674             :  * Wait for a specified commit to complete.
     675             :  * The caller may not hold the journal lock.
     676             :  */
     677      342138 : int jbd2_log_wait_commit(journal_t *journal, tid_t tid)
     678             : {
     679      342138 :         int err = 0;
     680             : 
     681      342138 :         read_lock(&journal->j_state_lock);
     682             : #ifdef CONFIG_PROVE_LOCKING
     683             :         /*
     684             :          * Some callers make sure transaction is already committing and in that
     685             :          * case we cannot block on open handles anymore. So don't warn in that
     686             :          * case.
     687             :          */
     688             :         if (tid_gt(tid, journal->j_commit_sequence) &&
     689             :             (!journal->j_committing_transaction ||
     690             :              journal->j_committing_transaction->t_tid != tid)) {
     691             :                 read_unlock(&journal->j_state_lock);
     692             :                 jbd2_might_wait_for_commit(journal);
     693             :                 read_lock(&journal->j_state_lock);
     694             :         }
     695             : #endif
     696             : #ifdef CONFIG_JBD2_DEBUG
     697             :         if (!tid_geq(journal->j_commit_request, tid)) {
     698             :                 printk(KERN_ERR
     699             :                        "%s: error: j_commit_request=%u, tid=%u\n",
     700             :                        __func__, journal->j_commit_request, tid);
     701             :         }
     702             : #endif
     703      684238 :         while (tid_gt(tid, journal->j_commit_sequence)) {
     704      342127 :                 jbd2_debug(1, "JBD2: want %u, j_commit_sequence=%u\n",
     705             :                                   tid, journal->j_commit_sequence);
     706      342127 :                 read_unlock(&journal->j_state_lock);
     707      342135 :                 wake_up(&journal->j_wait_commit);
     708      804469 :                 wait_event(journal->j_wait_done_commit,
     709             :                                 !tid_gt(tid, journal->j_commit_sequence));
     710      342226 :                 read_lock(&journal->j_state_lock);
     711             :         }
     712      342127 :         read_unlock(&journal->j_state_lock);
     713             : 
     714      342017 :         if (unlikely(is_journal_aborted(journal)))
     715           3 :                 err = -EIO;
     716      342017 :         return err;
     717             : }
     718             : 
     719             : /*
     720             :  * Start a fast commit. If there's an ongoing fast or full commit wait for
     721             :  * it to complete. Returns 0 if a new fast commit was started. Returns -EALREADY
     722             :  * if a fast commit is not needed, either because there's an already a commit
     723             :  * going on or this tid has already been committed. Returns -EINVAL if no jbd2
     724             :  * commit has yet been performed.
     725             :  */
     726           0 : int jbd2_fc_begin_commit(journal_t *journal, tid_t tid)
     727             : {
     728           0 :         if (unlikely(is_journal_aborted(journal)))
     729             :                 return -EIO;
     730             :         /*
     731             :          * Fast commits only allowed if at least one full commit has
     732             :          * been processed.
     733             :          */
     734           0 :         if (!journal->j_stats.ts_tid)
     735             :                 return -EINVAL;
     736             : 
     737           0 :         write_lock(&journal->j_state_lock);
     738           0 :         if (tid <= journal->j_commit_sequence) {
     739           0 :                 write_unlock(&journal->j_state_lock);
     740           0 :                 return -EALREADY;
     741             :         }
     742             : 
     743           0 :         if (journal->j_flags & JBD2_FULL_COMMIT_ONGOING ||
     744             :             (journal->j_flags & JBD2_FAST_COMMIT_ONGOING)) {
     745           0 :                 DEFINE_WAIT(wait);
     746             : 
     747           0 :                 prepare_to_wait(&journal->j_fc_wait, &wait,
     748             :                                 TASK_UNINTERRUPTIBLE);
     749           0 :                 write_unlock(&journal->j_state_lock);
     750           0 :                 schedule();
     751           0 :                 finish_wait(&journal->j_fc_wait, &wait);
     752           0 :                 return -EALREADY;
     753             :         }
     754           0 :         journal->j_flags |= JBD2_FAST_COMMIT_ONGOING;
     755           0 :         write_unlock(&journal->j_state_lock);
     756           0 :         jbd2_journal_lock_updates(journal);
     757             : 
     758           0 :         return 0;
     759             : }
     760             : EXPORT_SYMBOL(jbd2_fc_begin_commit);
     761             : 
     762             : /*
     763             :  * Stop a fast commit. If fallback is set, this function starts commit of
     764             :  * TID tid before any other fast commit can start.
     765             :  */
     766           0 : static int __jbd2_fc_end_commit(journal_t *journal, tid_t tid, bool fallback)
     767             : {
     768           0 :         jbd2_journal_unlock_updates(journal);
     769           0 :         if (journal->j_fc_cleanup_callback)
     770           0 :                 journal->j_fc_cleanup_callback(journal, 0, tid);
     771           0 :         write_lock(&journal->j_state_lock);
     772           0 :         journal->j_flags &= ~JBD2_FAST_COMMIT_ONGOING;
     773           0 :         if (fallback)
     774           0 :                 journal->j_flags |= JBD2_FULL_COMMIT_ONGOING;
     775           0 :         write_unlock(&journal->j_state_lock);
     776           0 :         wake_up(&journal->j_fc_wait);
     777           0 :         if (fallback)
     778           0 :                 return jbd2_complete_transaction(journal, tid);
     779             :         return 0;
     780             : }
     781             : 
     782           0 : int jbd2_fc_end_commit(journal_t *journal)
     783             : {
     784           0 :         return __jbd2_fc_end_commit(journal, 0, false);
     785             : }
     786             : EXPORT_SYMBOL(jbd2_fc_end_commit);
     787             : 
     788           0 : int jbd2_fc_end_commit_fallback(journal_t *journal)
     789             : {
     790           0 :         tid_t tid;
     791             : 
     792           0 :         read_lock(&journal->j_state_lock);
     793           0 :         tid = journal->j_running_transaction ?
     794           0 :                 journal->j_running_transaction->t_tid : 0;
     795           0 :         read_unlock(&journal->j_state_lock);
     796           0 :         return __jbd2_fc_end_commit(journal, tid, true);
     797             : }
     798             : EXPORT_SYMBOL(jbd2_fc_end_commit_fallback);
     799             : 
     800             : /* Return 1 when transaction with given tid has already committed. */
     801     2538565 : int jbd2_transaction_committed(journal_t *journal, tid_t tid)
     802             : {
     803     2538565 :         int ret = 1;
     804             : 
     805     2538565 :         read_lock(&journal->j_state_lock);
     806     2538783 :         if (journal->j_running_transaction &&
     807     2394118 :             journal->j_running_transaction->t_tid == tid)
     808     2080199 :                 ret = 0;
     809     2538783 :         if (journal->j_committing_transaction &&
     810       55235 :             journal->j_committing_transaction->t_tid == tid)
     811        6673 :                 ret = 0;
     812     2538783 :         read_unlock(&journal->j_state_lock);
     813     2538901 :         return ret;
     814             : }
     815             : EXPORT_SYMBOL(jbd2_transaction_committed);
     816             : 
     817             : /*
     818             :  * When this function returns the transaction corresponding to tid
     819             :  * will be completed.  If the transaction has currently running, start
     820             :  * committing that transaction before waiting for it to complete.  If
     821             :  * the transaction id is stale, it is by definition already completed,
     822             :  * so just return SUCCESS.
     823             :  */
     824      233827 : int jbd2_complete_transaction(journal_t *journal, tid_t tid)
     825             : {
     826      233827 :         int     need_to_wait = 1;
     827             : 
     828      233827 :         read_lock(&journal->j_state_lock);
     829      233833 :         if (journal->j_running_transaction &&
     830      225768 :             journal->j_running_transaction->t_tid == tid) {
     831      179562 :                 if (journal->j_commit_request != tid) {
     832             :                         /* transaction not yet started, so request it */
     833      175507 :                         read_unlock(&journal->j_state_lock);
     834      175507 :                         jbd2_log_start_commit(journal, tid);
     835      175519 :                         goto wait_commit;
     836             :                 }
     837       54271 :         } else if (!(journal->j_committing_transaction &&
     838        9747 :                      journal->j_committing_transaction->t_tid == tid))
     839       47867 :                 need_to_wait = 0;
     840       58326 :         read_unlock(&journal->j_state_lock);
     841       58330 :         if (!need_to_wait)
     842             :                 return 0;
     843       10461 : wait_commit:
     844      185980 :         return jbd2_log_wait_commit(journal, tid);
     845             : }
     846             : EXPORT_SYMBOL(jbd2_complete_transaction);
     847             : 
     848             : /*
     849             :  * Log buffer allocation routines:
     850             :  */
     851             : 
     852     3563895 : int jbd2_journal_next_log_block(journal_t *journal, unsigned long long *retp)
     853             : {
     854     3563895 :         unsigned long blocknr;
     855             : 
     856     3563895 :         write_lock(&journal->j_state_lock);
     857     3563895 :         J_ASSERT(journal->j_free > 1);
     858             : 
     859     3563895 :         blocknr = journal->j_head;
     860     3563895 :         journal->j_head++;
     861     3563895 :         journal->j_free--;
     862     3563895 :         if (journal->j_head == journal->j_last)
     863         148 :                 journal->j_head = journal->j_first;
     864     3563895 :         write_unlock(&journal->j_state_lock);
     865     3563895 :         return jbd2_journal_bmap(journal, blocknr, retp);
     866             : }
     867             : 
     868             : /* Map one fast commit buffer for use by the file system */
     869           0 : int jbd2_fc_get_buf(journal_t *journal, struct buffer_head **bh_out)
     870             : {
     871           0 :         unsigned long long pblock;
     872           0 :         unsigned long blocknr;
     873           0 :         int ret = 0;
     874           0 :         struct buffer_head *bh;
     875           0 :         int fc_off;
     876             : 
     877           0 :         *bh_out = NULL;
     878             : 
     879           0 :         if (journal->j_fc_off + journal->j_fc_first < journal->j_fc_last) {
     880           0 :                 fc_off = journal->j_fc_off;
     881           0 :                 blocknr = journal->j_fc_first + fc_off;
     882           0 :                 journal->j_fc_off++;
     883             :         } else {
     884             :                 ret = -EINVAL;
     885             :         }
     886             : 
     887           0 :         if (ret)
     888             :                 return ret;
     889             : 
     890           0 :         ret = jbd2_journal_bmap(journal, blocknr, &pblock);
     891           0 :         if (ret)
     892             :                 return ret;
     893             : 
     894           0 :         bh = __getblk(journal->j_dev, pblock, journal->j_blocksize);
     895           0 :         if (!bh)
     896             :                 return -ENOMEM;
     897             : 
     898             : 
     899           0 :         journal->j_fc_wbuf[fc_off] = bh;
     900             : 
     901           0 :         *bh_out = bh;
     902             : 
     903           0 :         return 0;
     904             : }
     905             : EXPORT_SYMBOL(jbd2_fc_get_buf);
     906             : 
     907             : /*
     908             :  * Wait on fast commit buffers that were allocated by jbd2_fc_get_buf
     909             :  * for completion.
     910             :  */
     911           0 : int jbd2_fc_wait_bufs(journal_t *journal, int num_blks)
     912             : {
     913           0 :         struct buffer_head *bh;
     914           0 :         int i, j_fc_off;
     915             : 
     916           0 :         j_fc_off = journal->j_fc_off;
     917             : 
     918             :         /*
     919             :          * Wait in reverse order to minimize chances of us being woken up before
     920             :          * all IOs have completed
     921             :          */
     922           0 :         for (i = j_fc_off - 1; i >= j_fc_off - num_blks; i--) {
     923           0 :                 bh = journal->j_fc_wbuf[i];
     924           0 :                 wait_on_buffer(bh);
     925             :                 /*
     926             :                  * Update j_fc_off so jbd2_fc_release_bufs can release remain
     927             :                  * buffer head.
     928             :                  */
     929           0 :                 if (unlikely(!buffer_uptodate(bh))) {
     930           0 :                         journal->j_fc_off = i + 1;
     931           0 :                         return -EIO;
     932             :                 }
     933           0 :                 put_bh(bh);
     934           0 :                 journal->j_fc_wbuf[i] = NULL;
     935             :         }
     936             : 
     937             :         return 0;
     938             : }
     939             : EXPORT_SYMBOL(jbd2_fc_wait_bufs);
     940             : 
     941           0 : int jbd2_fc_release_bufs(journal_t *journal)
     942             : {
     943           0 :         struct buffer_head *bh;
     944           0 :         int i, j_fc_off;
     945             : 
     946           0 :         j_fc_off = journal->j_fc_off;
     947             : 
     948           0 :         for (i = j_fc_off - 1; i >= 0; i--) {
     949           0 :                 bh = journal->j_fc_wbuf[i];
     950           0 :                 if (!bh)
     951             :                         break;
     952           0 :                 put_bh(bh);
     953           0 :                 journal->j_fc_wbuf[i] = NULL;
     954             :         }
     955             : 
     956           0 :         return 0;
     957             : }
     958             : EXPORT_SYMBOL(jbd2_fc_release_bufs);
     959             : 
     960             : /*
     961             :  * Conversion of logical to physical block numbers for the journal
     962             :  *
     963             :  * On external journals the journal blocks are identity-mapped, so
     964             :  * this is a no-op.  If needed, we can use j_blk_offset - everything is
     965             :  * ready.
     966             :  */
     967     3691533 : int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
     968             :                  unsigned long long *retp)
     969             : {
     970     3691533 :         int err = 0;
     971     3691533 :         unsigned long long ret;
     972     3691533 :         sector_t block = blocknr;
     973             : 
     974     3691533 :         if (journal->j_bmap) {
     975     3691533 :                 err = journal->j_bmap(journal, &block);
     976     3691533 :                 if (err == 0)
     977     3691533 :                         *retp = block;
     978           0 :         } else if (journal->j_inode) {
     979           0 :                 ret = bmap(journal->j_inode, &block);
     980             : 
     981           0 :                 if (ret || !block) {
     982           0 :                         printk(KERN_ALERT "%s: journal block not found "
     983             :                                         "at offset %lu on %s\n",
     984             :                                __func__, blocknr, journal->j_devname);
     985           0 :                         err = -EIO;
     986           0 :                         jbd2_journal_abort(journal, err);
     987             :                 } else {
     988           0 :                         *retp = block;
     989             :                 }
     990             : 
     991             :         } else {
     992           0 :                 *retp = blocknr; /* +journal->j_blk_offset */
     993             :         }
     994     3691533 :         return err;
     995             : }
     996             : 
     997             : /*
     998             :  * We play buffer_head aliasing tricks to write data/metadata blocks to
     999             :  * the journal without copying their contents, but for journal
    1000             :  * descriptor blocks we do need to generate bona fide buffers.
    1001             :  *
    1002             :  * After the caller of jbd2_journal_get_descriptor_buffer() has finished modifying
    1003             :  * the buffer's contents they really should run flush_dcache_page(bh->b_page).
    1004             :  * But we don't bother doing that, so there will be coherency problems with
    1005             :  * mmaps of blockdevs which hold live JBD-controlled filesystems.
    1006             :  */
    1007             : struct buffer_head *
    1008      442396 : jbd2_journal_get_descriptor_buffer(transaction_t *transaction, int type)
    1009             : {
    1010      442396 :         journal_t *journal = transaction->t_journal;
    1011      442396 :         struct buffer_head *bh;
    1012      442396 :         unsigned long long blocknr;
    1013      442396 :         journal_header_t *header;
    1014      442396 :         int err;
    1015             : 
    1016      442396 :         err = jbd2_journal_next_log_block(journal, &blocknr);
    1017             : 
    1018      442396 :         if (err)
    1019             :                 return NULL;
    1020             : 
    1021      442396 :         bh = __getblk(journal->j_dev, blocknr, journal->j_blocksize);
    1022      442396 :         if (!bh)
    1023             :                 return NULL;
    1024      442396 :         atomic_dec(&transaction->t_outstanding_credits);
    1025      442396 :         lock_buffer(bh);
    1026      442396 :         memset(bh->b_data, 0, journal->j_blocksize);
    1027      442396 :         header = (journal_header_t *)bh->b_data;
    1028      442396 :         header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
    1029      442396 :         header->h_blocktype = cpu_to_be32(type);
    1030      442396 :         header->h_sequence = cpu_to_be32(transaction->t_tid);
    1031      442396 :         set_buffer_uptodate(bh);
    1032      442396 :         unlock_buffer(bh);
    1033      442396 :         BUFFER_TRACE(bh, "return this buffer");
    1034      442396 :         return bh;
    1035             : }
    1036             : 
    1037      241102 : void jbd2_descriptor_block_csum_set(journal_t *j, struct buffer_head *bh)
    1038             : {
    1039      241102 :         struct jbd2_journal_block_tail *tail;
    1040      241102 :         __u32 csum;
    1041             : 
    1042      241102 :         if (!jbd2_journal_has_csum_v2or3(j))
    1043             :                 return;
    1044             : 
    1045      240596 :         tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize -
    1046             :                         sizeof(struct jbd2_journal_block_tail));
    1047      240596 :         tail->t_checksum = 0;
    1048      240596 :         csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
    1049      240596 :         tail->t_checksum = cpu_to_be32(csum);
    1050             : }
    1051             : 
    1052             : /*
    1053             :  * Return tid of the oldest transaction in the journal and block in the journal
    1054             :  * where the transaction starts.
    1055             :  *
    1056             :  * If the journal is now empty, return which will be the next transaction ID
    1057             :  * we will write and where will that transaction start.
    1058             :  *
    1059             :  * The return value is 0 if journal tail cannot be pushed any further, 1 if
    1060             :  * it can.
    1061             :  */
    1062      245822 : int jbd2_journal_get_log_tail(journal_t *journal, tid_t *tid,
    1063             :                               unsigned long *block)
    1064             : {
    1065      245822 :         transaction_t *transaction;
    1066      245822 :         int ret;
    1067             : 
    1068      245822 :         read_lock(&journal->j_state_lock);
    1069      245822 :         spin_lock(&journal->j_list_lock);
    1070      245822 :         transaction = journal->j_checkpoint_transactions;
    1071      245822 :         if (transaction) {
    1072      157674 :                 *tid = transaction->t_tid;
    1073      157674 :                 *block = transaction->t_log_start;
    1074       88148 :         } else if ((transaction = journal->j_committing_transaction) != NULL) {
    1075       86170 :                 *tid = transaction->t_tid;
    1076       86170 :                 *block = transaction->t_log_start;
    1077        1978 :         } else if ((transaction = journal->j_running_transaction) != NULL) {
    1078           1 :                 *tid = transaction->t_tid;
    1079           1 :                 *block = journal->j_head;
    1080             :         } else {
    1081        1977 :                 *tid = journal->j_transaction_sequence;
    1082        1977 :                 *block = journal->j_head;
    1083             :         }
    1084      245822 :         ret = tid_gt(*tid, journal->j_tail_sequence);
    1085      245822 :         spin_unlock(&journal->j_list_lock);
    1086      245822 :         read_unlock(&journal->j_state_lock);
    1087             : 
    1088      245822 :         return ret;
    1089             : }
    1090             : 
    1091             : /*
    1092             :  * Update information in journal structure and in on disk journal superblock
    1093             :  * about log tail. This function does not check whether information passed in
    1094             :  * really pushes log tail further. It's responsibility of the caller to make
    1095             :  * sure provided log tail information is valid (e.g. by holding
    1096             :  * j_checkpoint_mutex all the time between computing log tail and calling this
    1097             :  * function as is the case with jbd2_cleanup_journal_tail()).
    1098             :  *
    1099             :  * Requires j_checkpoint_mutex
    1100             :  */
    1101        6141 : int __jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
    1102             : {
    1103        6141 :         unsigned long freed;
    1104        6141 :         int ret;
    1105             : 
    1106        6141 :         BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
    1107             : 
    1108             :         /*
    1109             :          * We cannot afford for write to remain in drive's caches since as
    1110             :          * soon as we update j_tail, next transaction can start reusing journal
    1111             :          * space and if we lose sb update during power failure we'd replay
    1112             :          * old transaction with possibly newly overwritten data.
    1113             :          */
    1114        6141 :         ret = jbd2_journal_update_sb_log_tail(journal, tid, block,
    1115             :                                               REQ_SYNC | REQ_FUA);
    1116        6141 :         if (ret)
    1117           4 :                 goto out;
    1118             : 
    1119        6137 :         write_lock(&journal->j_state_lock);
    1120        6137 :         freed = block - journal->j_tail;
    1121        6137 :         if (block < journal->j_tail)
    1122         148 :                 freed += journal->j_last - journal->j_first;
    1123             : 
    1124        6137 :         trace_jbd2_update_log_tail(journal, tid, block, freed);
    1125        6137 :         jbd2_debug(1,
    1126             :                   "Cleaning journal tail from %u to %u (offset %lu), "
    1127             :                   "freeing %lu\n",
    1128             :                   journal->j_tail_sequence, tid, block, freed);
    1129             : 
    1130        6137 :         journal->j_free += freed;
    1131        6137 :         journal->j_tail_sequence = tid;
    1132        6137 :         journal->j_tail = block;
    1133        6137 :         write_unlock(&journal->j_state_lock);
    1134             : 
    1135        6141 : out:
    1136        6141 :         return ret;
    1137             : }
    1138             : 
    1139             : /*
    1140             :  * This is a variation of __jbd2_update_log_tail which checks for validity of
    1141             :  * provided log tail and locks j_checkpoint_mutex. So it is safe against races
    1142             :  * with other threads updating log tail.
    1143             :  */
    1144         185 : void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block)
    1145             : {
    1146         185 :         mutex_lock_io(&journal->j_checkpoint_mutex);
    1147         185 :         if (tid_gt(tid, journal->j_tail_sequence))
    1148         181 :                 __jbd2_update_log_tail(journal, tid, block);
    1149         185 :         mutex_unlock(&journal->j_checkpoint_mutex);
    1150         185 : }
    1151             : 
    1152             : struct jbd2_stats_proc_session {
    1153             :         journal_t *journal;
    1154             :         struct transaction_stats_s *stats;
    1155             :         int start;
    1156             :         int max;
    1157             : };
    1158             : 
    1159           0 : static void *jbd2_seq_info_start(struct seq_file *seq, loff_t *pos)
    1160             : {
    1161           0 :         return *pos ? NULL : SEQ_START_TOKEN;
    1162             : }
    1163             : 
    1164           0 : static void *jbd2_seq_info_next(struct seq_file *seq, void *v, loff_t *pos)
    1165             : {
    1166           0 :         (*pos)++;
    1167           0 :         return NULL;
    1168             : }
    1169             : 
    1170           0 : static int jbd2_seq_info_show(struct seq_file *seq, void *v)
    1171             : {
    1172           0 :         struct jbd2_stats_proc_session *s = seq->private;
    1173             : 
    1174           0 :         if (v != SEQ_START_TOKEN)
    1175             :                 return 0;
    1176           0 :         seq_printf(seq, "%lu transactions (%lu requested), "
    1177             :                    "each up to %u blocks\n",
    1178           0 :                    s->stats->ts_tid, s->stats->ts_requested,
    1179           0 :                    s->journal->j_max_transaction_buffers);
    1180           0 :         if (s->stats->ts_tid == 0)
    1181             :                 return 0;
    1182           0 :         seq_printf(seq, "average: \n  %ums waiting for transaction\n",
    1183           0 :             jiffies_to_msecs(s->stats->run.rs_wait / s->stats->ts_tid));
    1184           0 :         seq_printf(seq, "  %ums request delay\n",
    1185           0 :             (s->stats->ts_requested == 0) ? 0 :
    1186           0 :             jiffies_to_msecs(s->stats->run.rs_request_delay /
    1187             :                              s->stats->ts_requested));
    1188           0 :         seq_printf(seq, "  %ums running transaction\n",
    1189           0 :             jiffies_to_msecs(s->stats->run.rs_running / s->stats->ts_tid));
    1190           0 :         seq_printf(seq, "  %ums transaction was being locked\n",
    1191           0 :             jiffies_to_msecs(s->stats->run.rs_locked / s->stats->ts_tid));
    1192           0 :         seq_printf(seq, "  %ums flushing data (in ordered mode)\n",
    1193           0 :             jiffies_to_msecs(s->stats->run.rs_flushing / s->stats->ts_tid));
    1194           0 :         seq_printf(seq, "  %ums logging transaction\n",
    1195           0 :             jiffies_to_msecs(s->stats->run.rs_logging / s->stats->ts_tid));
    1196           0 :         seq_printf(seq, "  %lluus average transaction commit time\n",
    1197           0 :                    div_u64(s->journal->j_average_commit_time, 1000));
    1198           0 :         seq_printf(seq, "  %lu handles per transaction\n",
    1199           0 :             s->stats->run.rs_handle_count / s->stats->ts_tid);
    1200           0 :         seq_printf(seq, "  %lu blocks per transaction\n",
    1201           0 :             s->stats->run.rs_blocks / s->stats->ts_tid);
    1202           0 :         seq_printf(seq, "  %lu logged blocks per transaction\n",
    1203           0 :             s->stats->run.rs_blocks_logged / s->stats->ts_tid);
    1204           0 :         return 0;
    1205             : }
    1206             : 
    1207           0 : static void jbd2_seq_info_stop(struct seq_file *seq, void *v)
    1208             : {
    1209           0 : }
    1210             : 
    1211             : static const struct seq_operations jbd2_seq_info_ops = {
    1212             :         .start  = jbd2_seq_info_start,
    1213             :         .next   = jbd2_seq_info_next,
    1214             :         .stop   = jbd2_seq_info_stop,
    1215             :         .show   = jbd2_seq_info_show,
    1216             : };
    1217             : 
    1218           0 : static int jbd2_seq_info_open(struct inode *inode, struct file *file)
    1219             : {
    1220           0 :         journal_t *journal = pde_data(inode);
    1221           0 :         struct jbd2_stats_proc_session *s;
    1222           0 :         int rc, size;
    1223             : 
    1224           0 :         s = kmalloc(sizeof(*s), GFP_KERNEL);
    1225           0 :         if (s == NULL)
    1226             :                 return -ENOMEM;
    1227           0 :         size = sizeof(struct transaction_stats_s);
    1228           0 :         s->stats = kmalloc(size, GFP_KERNEL);
    1229           0 :         if (s->stats == NULL) {
    1230           0 :                 kfree(s);
    1231           0 :                 return -ENOMEM;
    1232             :         }
    1233           0 :         spin_lock(&journal->j_history_lock);
    1234           0 :         memcpy(s->stats, &journal->j_stats, size);
    1235           0 :         s->journal = journal;
    1236           0 :         spin_unlock(&journal->j_history_lock);
    1237             : 
    1238           0 :         rc = seq_open(file, &jbd2_seq_info_ops);
    1239           0 :         if (rc == 0) {
    1240           0 :                 struct seq_file *m = file->private_data;
    1241           0 :                 m->private = s;
    1242             :         } else {
    1243           0 :                 kfree(s->stats);
    1244           0 :                 kfree(s);
    1245             :         }
    1246             :         return rc;
    1247             : 
    1248             : }
    1249             : 
    1250           0 : static int jbd2_seq_info_release(struct inode *inode, struct file *file)
    1251             : {
    1252           0 :         struct seq_file *seq = file->private_data;
    1253           0 :         struct jbd2_stats_proc_session *s = seq->private;
    1254           0 :         kfree(s->stats);
    1255           0 :         kfree(s);
    1256           0 :         return seq_release(inode, file);
    1257             : }
    1258             : 
    1259             : static const struct proc_ops jbd2_info_proc_ops = {
    1260             :         .proc_open      = jbd2_seq_info_open,
    1261             :         .proc_read      = seq_read,
    1262             :         .proc_lseek     = seq_lseek,
    1263             :         .proc_release   = jbd2_seq_info_release,
    1264             : };
    1265             : 
    1266             : static struct proc_dir_entry *proc_jbd2_stats;
    1267             : 
    1268        2503 : static void jbd2_stats_proc_init(journal_t *journal)
    1269             : {
    1270        2503 :         journal->j_proc_entry = proc_mkdir(journal->j_devname, proc_jbd2_stats);
    1271        2503 :         if (journal->j_proc_entry) {
    1272        2503 :                 proc_create_data("info", S_IRUGO, journal->j_proc_entry,
    1273             :                                  &jbd2_info_proc_ops, journal);
    1274             :         }
    1275        2503 : }
    1276             : 
    1277        2503 : static void jbd2_stats_proc_exit(journal_t *journal)
    1278             : {
    1279        2503 :         remove_proc_entry("info", journal->j_proc_entry);
    1280        2503 :         remove_proc_entry(journal->j_devname, proc_jbd2_stats);
    1281        2503 : }
    1282             : 
    1283             : /* Minimum size of descriptor tag */
    1284             : static int jbd2_min_tag_size(void)
    1285             : {
    1286             :         /*
    1287             :          * Tag with 32-bit block numbers does not use last four bytes of the
    1288             :          * structure
    1289             :          */
    1290             :         return sizeof(journal_block_tag_t) - 4;
    1291             : }
    1292             : 
    1293             : /**
    1294             :  * jbd2_journal_shrink_scan()
    1295             :  * @shrink: shrinker to work on
    1296             :  * @sc: reclaim request to process
    1297             :  *
    1298             :  * Scan the checkpointed buffer on the checkpoint list and release the
    1299             :  * journal_head.
    1300             :  */
    1301          52 : static unsigned long jbd2_journal_shrink_scan(struct shrinker *shrink,
    1302             :                                               struct shrink_control *sc)
    1303             : {
    1304          52 :         journal_t *journal = container_of(shrink, journal_t, j_shrinker);
    1305          52 :         unsigned long nr_to_scan = sc->nr_to_scan;
    1306          52 :         unsigned long nr_shrunk;
    1307          52 :         unsigned long count;
    1308             : 
    1309          52 :         count = percpu_counter_read_positive(&journal->j_checkpoint_jh_count);
    1310          52 :         trace_jbd2_shrink_scan_enter(journal, sc->nr_to_scan, count);
    1311             : 
    1312          52 :         nr_shrunk = jbd2_journal_shrink_checkpoint_list(journal, &nr_to_scan);
    1313             : 
    1314          52 :         count = percpu_counter_read_positive(&journal->j_checkpoint_jh_count);
    1315          52 :         trace_jbd2_shrink_scan_exit(journal, nr_to_scan, nr_shrunk, count);
    1316             : 
    1317          52 :         return nr_shrunk;
    1318             : }
    1319             : 
    1320             : /**
    1321             :  * jbd2_journal_shrink_count()
    1322             :  * @shrink: shrinker to work on
    1323             :  * @sc: reclaim request to process
    1324             :  *
    1325             :  * Count the number of checkpoint buffers on the checkpoint list.
    1326             :  */
    1327         523 : static unsigned long jbd2_journal_shrink_count(struct shrinker *shrink,
    1328             :                                                struct shrink_control *sc)
    1329             : {
    1330         523 :         journal_t *journal = container_of(shrink, journal_t, j_shrinker);
    1331         523 :         unsigned long count;
    1332             : 
    1333         523 :         count = percpu_counter_read_positive(&journal->j_checkpoint_jh_count);
    1334         523 :         trace_jbd2_shrink_count(journal, sc->nr_to_scan, count);
    1335             : 
    1336         523 :         return count;
    1337             : }
    1338             : 
    1339             : /*
    1340             :  * Management for journal control blocks: functions to create and
    1341             :  * destroy journal_t structures, and to initialise and read existing
    1342             :  * journal blocks from disk.  */
    1343             : 
    1344             : /* First: create and setup a journal_t object in memory.  We initialise
    1345             :  * very few fields yet: that has to wait until we have created the
    1346             :  * journal structures from from scratch, or loaded them from disk. */
    1347             : 
    1348        2503 : static journal_t *journal_init_common(struct block_device *bdev,
    1349             :                         struct block_device *fs_dev,
    1350             :                         unsigned long long start, int len, int blocksize)
    1351             : {
    1352        2503 :         static struct lock_class_key jbd2_trans_commit_key;
    1353        2503 :         journal_t *journal;
    1354        2503 :         int err;
    1355        2503 :         struct buffer_head *bh;
    1356        2503 :         int n;
    1357             : 
    1358        2503 :         journal = kzalloc(sizeof(*journal), GFP_KERNEL);
    1359        2503 :         if (!journal)
    1360             :                 return NULL;
    1361             : 
    1362        2503 :         init_waitqueue_head(&journal->j_wait_transaction_locked);
    1363        2503 :         init_waitqueue_head(&journal->j_wait_done_commit);
    1364        2503 :         init_waitqueue_head(&journal->j_wait_commit);
    1365        2503 :         init_waitqueue_head(&journal->j_wait_updates);
    1366        2503 :         init_waitqueue_head(&journal->j_wait_reserved);
    1367        2503 :         init_waitqueue_head(&journal->j_fc_wait);
    1368        2503 :         mutex_init(&journal->j_abort_mutex);
    1369        2503 :         mutex_init(&journal->j_barrier);
    1370        2503 :         mutex_init(&journal->j_checkpoint_mutex);
    1371        2503 :         spin_lock_init(&journal->j_revoke_lock);
    1372        2503 :         spin_lock_init(&journal->j_list_lock);
    1373        2503 :         rwlock_init(&journal->j_state_lock);
    1374             : 
    1375        2503 :         journal->j_commit_interval = (HZ * JBD2_DEFAULT_MAX_COMMIT_AGE);
    1376        2503 :         journal->j_min_batch_time = 0;
    1377        2503 :         journal->j_max_batch_time = 15000; /* 15ms */
    1378        2503 :         atomic_set(&journal->j_reserved_credits, 0);
    1379             : 
    1380             :         /* The journal is marked for error until we succeed with recovery! */
    1381        2503 :         journal->j_flags = JBD2_ABORT;
    1382             : 
    1383             :         /* Set up a default-sized revoke table for the new mount. */
    1384        2503 :         err = jbd2_journal_init_revoke(journal, JOURNAL_REVOKE_DEFAULT_HASH);
    1385        2503 :         if (err)
    1386           0 :                 goto err_cleanup;
    1387             : 
    1388        2503 :         spin_lock_init(&journal->j_history_lock);
    1389             : 
    1390        2503 :         lockdep_init_map(&journal->j_trans_commit_map, "jbd2_handle",
    1391             :                          &jbd2_trans_commit_key, 0);
    1392             : 
    1393             :         /* journal descriptor can store up to n blocks -bzzz */
    1394        2503 :         journal->j_blocksize = blocksize;
    1395        2503 :         journal->j_dev = bdev;
    1396        2503 :         journal->j_fs_dev = fs_dev;
    1397        2503 :         journal->j_blk_offset = start;
    1398        2503 :         journal->j_total_len = len;
    1399             :         /* We need enough buffers to write out full descriptor block. */
    1400        2503 :         n = journal->j_blocksize / jbd2_min_tag_size();
    1401        2503 :         journal->j_wbufsize = n;
    1402        2503 :         journal->j_fc_wbuf = NULL;
    1403        2503 :         journal->j_wbuf = kmalloc_array(n, sizeof(struct buffer_head *),
    1404             :                                         GFP_KERNEL);
    1405        2503 :         if (!journal->j_wbuf)
    1406           0 :                 goto err_cleanup;
    1407             : 
    1408        2503 :         bh = getblk_unmovable(journal->j_dev, start, journal->j_blocksize);
    1409        2503 :         if (!bh) {
    1410           0 :                 pr_err("%s: Cannot get buffer for journal superblock\n",
    1411             :                         __func__);
    1412           0 :                 goto err_cleanup;
    1413             :         }
    1414        2503 :         journal->j_sb_buffer = bh;
    1415        2503 :         journal->j_superblock = (journal_superblock_t *)bh->b_data;
    1416             : 
    1417        2503 :         journal->j_shrink_transaction = NULL;
    1418        2503 :         journal->j_shrinker.scan_objects = jbd2_journal_shrink_scan;
    1419        2503 :         journal->j_shrinker.count_objects = jbd2_journal_shrink_count;
    1420        2503 :         journal->j_shrinker.seeks = DEFAULT_SEEKS;
    1421        2503 :         journal->j_shrinker.batch = journal->j_max_transaction_buffers;
    1422             : 
    1423        2503 :         if (percpu_counter_init(&journal->j_checkpoint_jh_count, 0, GFP_KERNEL))
    1424           0 :                 goto err_cleanup;
    1425             : 
    1426        2503 :         if (register_shrinker(&journal->j_shrinker, "jbd2-journal:(%u:%u)",
    1427        2503 :                               MAJOR(bdev->bd_dev), MINOR(bdev->bd_dev))) {
    1428           0 :                 percpu_counter_destroy(&journal->j_checkpoint_jh_count);
    1429           0 :                 goto err_cleanup;
    1430             :         }
    1431             :         return journal;
    1432             : 
    1433           0 : err_cleanup:
    1434           0 :         brelse(journal->j_sb_buffer);
    1435           0 :         kfree(journal->j_wbuf);
    1436           0 :         jbd2_journal_destroy_revoke(journal);
    1437           0 :         kfree(journal);
    1438           0 :         return NULL;
    1439             : }
    1440             : 
    1441             : /* jbd2_journal_init_dev and jbd2_journal_init_inode:
    1442             :  *
    1443             :  * Create a journal structure assigned some fixed set of disk blocks to
    1444             :  * the journal.  We don't actually touch those disk blocks yet, but we
    1445             :  * need to set up all of the mapping information to tell the journaling
    1446             :  * system where the journal blocks are.
    1447             :  *
    1448             :  */
    1449             : 
    1450             : /**
    1451             :  *  journal_t * jbd2_journal_init_dev() - creates and initialises a journal structure
    1452             :  *  @bdev: Block device on which to create the journal
    1453             :  *  @fs_dev: Device which hold journalled filesystem for this journal.
    1454             :  *  @start: Block nr Start of journal.
    1455             :  *  @len:  Length of the journal in blocks.
    1456             :  *  @blocksize: blocksize of journalling device
    1457             :  *
    1458             :  *  Returns: a newly created journal_t *
    1459             :  *
    1460             :  *  jbd2_journal_init_dev creates a journal which maps a fixed contiguous
    1461             :  *  range of blocks on an arbitrary block device.
    1462             :  *
    1463             :  */
    1464          12 : journal_t *jbd2_journal_init_dev(struct block_device *bdev,
    1465             :                         struct block_device *fs_dev,
    1466             :                         unsigned long long start, int len, int blocksize)
    1467             : {
    1468          12 :         journal_t *journal;
    1469             : 
    1470          12 :         journal = journal_init_common(bdev, fs_dev, start, len, blocksize);
    1471          12 :         if (!journal)
    1472             :                 return NULL;
    1473             : 
    1474          12 :         snprintf(journal->j_devname, sizeof(journal->j_devname),
    1475             :                  "%pg", journal->j_dev);
    1476          12 :         strreplace(journal->j_devname, '/', '!');
    1477          12 :         jbd2_stats_proc_init(journal);
    1478             : 
    1479          12 :         return journal;
    1480             : }
    1481             : 
    1482             : /**
    1483             :  *  journal_t * jbd2_journal_init_inode () - creates a journal which maps to a inode.
    1484             :  *  @inode: An inode to create the journal in
    1485             :  *
    1486             :  * jbd2_journal_init_inode creates a journal which maps an on-disk inode as
    1487             :  * the journal.  The inode must exist already, must support bmap() and
    1488             :  * must have all data blocks preallocated.
    1489             :  */
    1490        2491 : journal_t *jbd2_journal_init_inode(struct inode *inode)
    1491             : {
    1492        2491 :         journal_t *journal;
    1493        2491 :         sector_t blocknr;
    1494        2491 :         int err = 0;
    1495             : 
    1496        2491 :         blocknr = 0;
    1497        2491 :         err = bmap(inode, &blocknr);
    1498             : 
    1499        2491 :         if (err || !blocknr) {
    1500           0 :                 pr_err("%s: Cannot locate journal superblock\n",
    1501             :                         __func__);
    1502           0 :                 return NULL;
    1503             :         }
    1504             : 
    1505        2491 :         jbd2_debug(1, "JBD2: inode %s/%ld, size %lld, bits %d, blksize %ld\n",
    1506             :                   inode->i_sb->s_id, inode->i_ino, (long long) inode->i_size,
    1507             :                   inode->i_sb->s_blocksize_bits, inode->i_sb->s_blocksize);
    1508             : 
    1509        2491 :         journal = journal_init_common(inode->i_sb->s_bdev, inode->i_sb->s_bdev,
    1510        2491 :                         blocknr, inode->i_size >> inode->i_sb->s_blocksize_bits,
    1511        2491 :                         inode->i_sb->s_blocksize);
    1512        2491 :         if (!journal)
    1513             :                 return NULL;
    1514             : 
    1515        2491 :         journal->j_inode = inode;
    1516        2491 :         snprintf(journal->j_devname, sizeof(journal->j_devname),
    1517             :                  "%pg-%lu", journal->j_dev, journal->j_inode->i_ino);
    1518        2491 :         strreplace(journal->j_devname, '/', '!');
    1519        2491 :         jbd2_stats_proc_init(journal);
    1520             : 
    1521        2491 :         return journal;
    1522             : }
    1523             : 
    1524             : /*
    1525             :  * If the journal init or create aborts, we need to mark the journal
    1526             :  * superblock as being NULL to prevent the journal destroy from writing
    1527             :  * back a bogus superblock.
    1528             :  */
    1529             : static void journal_fail_superblock(journal_t *journal)
    1530             : {
    1531           1 :         struct buffer_head *bh = journal->j_sb_buffer;
    1532           1 :         brelse(bh);
    1533           1 :         journal->j_sb_buffer = NULL;
    1534             : }
    1535             : 
    1536             : /*
    1537             :  * Given a journal_t structure, initialise the various fields for
    1538             :  * startup of a new journaling session.  We use this both when creating
    1539             :  * a journal, and after recovering an old journal to reset it for
    1540             :  * subsequent use.
    1541             :  */
    1542             : 
    1543        2501 : static int journal_reset(journal_t *journal)
    1544             : {
    1545        2501 :         journal_superblock_t *sb = journal->j_superblock;
    1546        2501 :         unsigned long long first, last;
    1547             : 
    1548        2501 :         first = be32_to_cpu(sb->s_first);
    1549        2501 :         last = be32_to_cpu(sb->s_maxlen);
    1550        2501 :         if (first + JBD2_MIN_JOURNAL_BLOCKS > last + 1) {
    1551           0 :                 printk(KERN_ERR "JBD2: Journal too short (blocks %llu-%llu).\n",
    1552             :                        first, last);
    1553           0 :                 journal_fail_superblock(journal);
    1554           0 :                 return -EINVAL;
    1555             :         }
    1556             : 
    1557        2501 :         journal->j_first = first;
    1558        2501 :         journal->j_last = last;
    1559             : 
    1560        2501 :         if (journal->j_head != 0 && journal->j_flags & JBD2_CYCLE_RECORD) {
    1561             :                 /*
    1562             :                  * Disable the cycled recording mode if the journal head block
    1563             :                  * number is not correct.
    1564             :                  */
    1565        1336 :                 if (journal->j_head < first || journal->j_head >= last) {
    1566           0 :                         printk(KERN_WARNING "JBD2: Incorrect Journal head block %lu, "
    1567             :                                "disable journal_cycle_record\n",
    1568             :                                journal->j_head);
    1569           0 :                         journal->j_head = journal->j_first;
    1570             :                 }
    1571             :         } else {
    1572        1165 :                 journal->j_head = journal->j_first;
    1573             :         }
    1574        2501 :         journal->j_tail = journal->j_head;
    1575        2501 :         journal->j_free = journal->j_last - journal->j_first;
    1576             : 
    1577        2501 :         journal->j_tail_sequence = journal->j_transaction_sequence;
    1578        2501 :         journal->j_commit_sequence = journal->j_transaction_sequence - 1;
    1579        2501 :         journal->j_commit_request = journal->j_commit_sequence;
    1580             : 
    1581        2501 :         journal->j_max_transaction_buffers = jbd2_journal_get_max_txn_bufs(journal);
    1582             : 
    1583             :         /*
    1584             :          * Now that journal recovery is done, turn fast commits off here. This
    1585             :          * way, if fast commit was enabled before the crash but if now FS has
    1586             :          * disabled it, we don't enable fast commits.
    1587             :          */
    1588        2501 :         jbd2_clear_feature_fast_commit(journal);
    1589             : 
    1590             :         /*
    1591             :          * As a special case, if the on-disk copy is already marked as needing
    1592             :          * no recovery (s_start == 0), then we can safely defer the superblock
    1593             :          * update until the next commit by setting JBD2_FLUSHED.  This avoids
    1594             :          * attempting a write to a potential-readonly device.
    1595             :          */
    1596        2501 :         if (sb->s_start == 0) {
    1597        2253 :                 jbd2_debug(1, "JBD2: Skipping superblock update on recovered sb "
    1598             :                         "(start %ld, seq %u, errno %d)\n",
    1599             :                         journal->j_tail, journal->j_tail_sequence,
    1600             :                         journal->j_errno);
    1601        2253 :                 journal->j_flags |= JBD2_FLUSHED;
    1602             :         } else {
    1603             :                 /* Lock here to make assertions happy... */
    1604         248 :                 mutex_lock_io(&journal->j_checkpoint_mutex);
    1605             :                 /*
    1606             :                  * Update log tail information. We use REQ_FUA since new
    1607             :                  * transaction will start reusing journal space and so we
    1608             :                  * must make sure information about current log tail is on
    1609             :                  * disk before that.
    1610             :                  */
    1611         248 :                 jbd2_journal_update_sb_log_tail(journal,
    1612             :                                                 journal->j_tail_sequence,
    1613             :                                                 journal->j_tail,
    1614             :                                                 REQ_SYNC | REQ_FUA);
    1615         248 :                 mutex_unlock(&journal->j_checkpoint_mutex);
    1616             :         }
    1617        2501 :         return jbd2_journal_start_thread(journal);
    1618             : }
    1619             : 
    1620             : /*
    1621             :  * This function expects that the caller will have locked the journal
    1622             :  * buffer head, and will return with it unlocked
    1623             :  */
    1624        9953 : static int jbd2_write_superblock(journal_t *journal, blk_opf_t write_flags)
    1625             : {
    1626        9953 :         struct buffer_head *bh = journal->j_sb_buffer;
    1627        9953 :         journal_superblock_t *sb = journal->j_superblock;
    1628        9953 :         int ret = 0;
    1629             : 
    1630             :         /* Buffer got discarded which means block device got invalidated */
    1631       19906 :         if (!buffer_mapped(bh)) {
    1632           0 :                 unlock_buffer(bh);
    1633           0 :                 return -EIO;
    1634             :         }
    1635             : 
    1636        9953 :         trace_jbd2_write_superblock(journal, write_flags);
    1637        9953 :         if (!(journal->j_flags & JBD2_BARRIER))
    1638           0 :                 write_flags &= ~(REQ_FUA | REQ_PREFLUSH);
    1639       19906 :         if (buffer_write_io_error(bh)) {
    1640             :                 /*
    1641             :                  * Oh, dear.  A previous attempt to write the journal
    1642             :                  * superblock failed.  This could happen because the
    1643             :                  * USB device was yanked out.  Or it could happen to
    1644             :                  * be a transient write error and maybe the block will
    1645             :                  * be remapped.  Nothing we can do but to retry the
    1646             :                  * write and hope for the best.
    1647             :                  */
    1648           0 :                 printk(KERN_ERR "JBD2: previous I/O error detected "
    1649             :                        "for journal superblock update for %s.\n",
    1650             :                        journal->j_devname);
    1651           0 :                 clear_buffer_write_io_error(bh);
    1652           0 :                 set_buffer_uptodate(bh);
    1653             :         }
    1654        9953 :         if (jbd2_journal_has_csum_v2or3(journal))
    1655        9884 :                 sb->s_checksum = jbd2_superblock_csum(journal, sb);
    1656        9953 :         get_bh(bh);
    1657        9953 :         bh->b_end_io = end_buffer_write_sync;
    1658        9953 :         submit_bh(REQ_OP_WRITE | write_flags, bh);
    1659        9953 :         wait_on_buffer(bh);
    1660       19906 :         if (buffer_write_io_error(bh)) {
    1661          11 :                 clear_buffer_write_io_error(bh);
    1662          11 :                 set_buffer_uptodate(bh);
    1663          11 :                 ret = -EIO;
    1664             :         }
    1665          11 :         if (ret) {
    1666          11 :                 printk(KERN_ERR "JBD2: I/O error when updating journal superblock for %s.\n",
    1667             :                                 journal->j_devname);
    1668          11 :                 if (!is_journal_aborted(journal))
    1669           5 :                         jbd2_journal_abort(journal, ret);
    1670             :         }
    1671             : 
    1672             :         return ret;
    1673             : }
    1674             : 
    1675             : /**
    1676             :  * jbd2_journal_update_sb_log_tail() - Update log tail in journal sb on disk.
    1677             :  * @journal: The journal to update.
    1678             :  * @tail_tid: TID of the new transaction at the tail of the log
    1679             :  * @tail_block: The first block of the transaction at the tail of the log
    1680             :  * @write_flags: Flags for the journal sb write operation
    1681             :  *
    1682             :  * Update a journal's superblock information about log tail and write it to
    1683             :  * disk, waiting for the IO to complete.
    1684             :  */
    1685        8032 : int jbd2_journal_update_sb_log_tail(journal_t *journal, tid_t tail_tid,
    1686             :                                     unsigned long tail_block,
    1687             :                                     blk_opf_t write_flags)
    1688             : {
    1689        8032 :         journal_superblock_t *sb = journal->j_superblock;
    1690        8032 :         int ret;
    1691             : 
    1692        8032 :         if (is_journal_aborted(journal))
    1693             :                 return -EIO;
    1694       16062 :         if (test_bit(JBD2_CHECKPOINT_IO_ERROR, &journal->j_atomic_flags)) {
    1695           2 :                 jbd2_journal_abort(journal, -EIO);
    1696           2 :                 return -EIO;
    1697             :         }
    1698             : 
    1699        8029 :         BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
    1700        8029 :         jbd2_debug(1, "JBD2: updating superblock (start %lu, seq %u)\n",
    1701             :                   tail_block, tail_tid);
    1702             : 
    1703        8029 :         lock_buffer(journal->j_sb_buffer);
    1704        8029 :         sb->s_sequence = cpu_to_be32(tail_tid);
    1705        8029 :         sb->s_start    = cpu_to_be32(tail_block);
    1706             : 
    1707        8029 :         ret = jbd2_write_superblock(journal, write_flags);
    1708        8029 :         if (ret)
    1709           5 :                 goto out;
    1710             : 
    1711             :         /* Log is no longer empty */
    1712        8024 :         write_lock(&journal->j_state_lock);
    1713        8024 :         WARN_ON(!sb->s_sequence);
    1714        8024 :         journal->j_flags &= ~JBD2_FLUSHED;
    1715        8024 :         write_unlock(&journal->j_state_lock);
    1716             : 
    1717             : out:
    1718             :         return ret;
    1719             : }
    1720             : 
    1721             : /**
    1722             :  * jbd2_mark_journal_empty() - Mark on disk journal as empty.
    1723             :  * @journal: The journal to update.
    1724             :  * @write_flags: Flags for the journal sb write operation
    1725             :  *
    1726             :  * Update a journal's dynamic superblock fields to show that journal is empty.
    1727             :  * Write updated superblock to disk waiting for IO to complete.
    1728             :  */
    1729        2808 : static void jbd2_mark_journal_empty(journal_t *journal, blk_opf_t write_flags)
    1730             : {
    1731        2808 :         journal_superblock_t *sb = journal->j_superblock;
    1732        2808 :         bool had_fast_commit = false;
    1733             : 
    1734        2808 :         BUG_ON(!mutex_is_locked(&journal->j_checkpoint_mutex));
    1735        2808 :         lock_buffer(journal->j_sb_buffer);
    1736        2808 :         if (sb->s_start == 0) {              /* Is it already empty? */
    1737        1039 :                 unlock_buffer(journal->j_sb_buffer);
    1738        1039 :                 return;
    1739             :         }
    1740             : 
    1741        1769 :         jbd2_debug(1, "JBD2: Marking journal as empty (seq %u)\n",
    1742             :                   journal->j_tail_sequence);
    1743             : 
    1744        1769 :         sb->s_sequence = cpu_to_be32(journal->j_tail_sequence);
    1745        1769 :         sb->s_start    = cpu_to_be32(0);
    1746        1769 :         sb->s_head     = cpu_to_be32(journal->j_head);
    1747        3538 :         if (jbd2_has_feature_fast_commit(journal)) {
    1748             :                 /*
    1749             :                  * When journal is clean, no need to commit fast commit flag and
    1750             :                  * make file system incompatible with older kernels.
    1751             :                  */
    1752           0 :                 jbd2_clear_feature_fast_commit(journal);
    1753           0 :                 had_fast_commit = true;
    1754             :         }
    1755             : 
    1756        1769 :         jbd2_write_superblock(journal, write_flags);
    1757             : 
    1758        1769 :         if (had_fast_commit)
    1759           0 :                 jbd2_set_feature_fast_commit(journal);
    1760             : 
    1761             :         /* Log is no longer empty */
    1762        1769 :         write_lock(&journal->j_state_lock);
    1763        1769 :         journal->j_flags |= JBD2_FLUSHED;
    1764        1769 :         write_unlock(&journal->j_state_lock);
    1765             : }
    1766             : 
    1767             : /**
    1768             :  * __jbd2_journal_erase() - Discard or zeroout journal blocks (excluding superblock)
    1769             :  * @journal: The journal to erase.
    1770             :  * @flags: A discard/zeroout request is sent for each physically contigous
    1771             :  *      region of the journal. Either JBD2_JOURNAL_FLUSH_DISCARD or
    1772             :  *      JBD2_JOURNAL_FLUSH_ZEROOUT must be set to determine which operation
    1773             :  *      to perform.
    1774             :  *
    1775             :  * Note: JBD2_JOURNAL_FLUSH_ZEROOUT attempts to use hardware offload. Zeroes
    1776             :  * will be explicitly written if no hardware offload is available, see
    1777             :  * blkdev_issue_zeroout for more details.
    1778             :  */
    1779           1 : static int __jbd2_journal_erase(journal_t *journal, unsigned int flags)
    1780             : {
    1781           1 :         int err = 0;
    1782           1 :         unsigned long block, log_offset; /* logical */
    1783           1 :         unsigned long long phys_block, block_start, block_stop; /* physical */
    1784           1 :         loff_t byte_start, byte_stop, byte_count;
    1785             : 
    1786             :         /* flags must be set to either discard or zeroout */
    1787           1 :         if ((flags & ~JBD2_JOURNAL_FLUSH_VALID) || !flags ||
    1788           1 :                         ((flags & JBD2_JOURNAL_FLUSH_DISCARD) &&
    1789             :                         (flags & JBD2_JOURNAL_FLUSH_ZEROOUT)))
    1790             :                 return -EINVAL;
    1791             : 
    1792           1 :         if ((flags & JBD2_JOURNAL_FLUSH_DISCARD) &&
    1793           0 :             !bdev_max_discard_sectors(journal->j_dev))
    1794             :                 return -EOPNOTSUPP;
    1795             : 
    1796             :         /*
    1797             :          * lookup block mapping and issue discard/zeroout for each
    1798             :          * contiguous region
    1799             :          */
    1800           1 :         log_offset = be32_to_cpu(journal->j_superblock->s_first);
    1801           1 :         block_start =  ~0ULL;
    1802        1026 :         for (block = log_offset; block < journal->j_total_len; block++) {
    1803        1025 :                 err = jbd2_journal_bmap(journal, block, &phys_block);
    1804        1025 :                 if (err) {
    1805           0 :                         pr_err("JBD2: bad block at offset %lu", block);
    1806           0 :                         return err;
    1807             :                 }
    1808             : 
    1809        1025 :                 if (block_start == ~0ULL) {
    1810           3 :                         block_start = phys_block;
    1811           3 :                         block_stop = block_start - 1;
    1812             :                 }
    1813             : 
    1814             :                 /*
    1815             :                  * last block not contiguous with current block,
    1816             :                  * process last contiguous region and return to this block on
    1817             :                  * next loop
    1818             :                  */
    1819        1025 :                 if (phys_block != block_stop + 1) {
    1820           2 :                         block--;
    1821             :                 } else {
    1822        1023 :                         block_stop++;
    1823             :                         /*
    1824             :                          * if this isn't the last block of journal,
    1825             :                          * no need to process now because next block may also
    1826             :                          * be part of this contiguous region
    1827             :                          */
    1828        1023 :                         if (block != journal->j_total_len - 1)
    1829        1022 :                                 continue;
    1830             :                 }
    1831             : 
    1832             :                 /*
    1833             :                  * end of contiguous region or this is last block of journal,
    1834             :                  * take care of the region
    1835             :                  */
    1836           3 :                 byte_start = block_start * journal->j_blocksize;
    1837           3 :                 byte_stop = block_stop * journal->j_blocksize;
    1838           3 :                 byte_count = (block_stop - block_start + 1) *
    1839             :                                 journal->j_blocksize;
    1840             : 
    1841           3 :                 truncate_inode_pages_range(journal->j_dev->bd_inode->i_mapping,
    1842             :                                 byte_start, byte_stop);
    1843             : 
    1844           3 :                 if (flags & JBD2_JOURNAL_FLUSH_DISCARD) {
    1845           0 :                         err = blkdev_issue_discard(journal->j_dev,
    1846           0 :                                         byte_start >> SECTOR_SHIFT,
    1847           0 :                                         byte_count >> SECTOR_SHIFT,
    1848             :                                         GFP_NOFS);
    1849           3 :                 } else if (flags & JBD2_JOURNAL_FLUSH_ZEROOUT) {
    1850           3 :                         err = blkdev_issue_zeroout(journal->j_dev,
    1851           3 :                                         byte_start >> SECTOR_SHIFT,
    1852           3 :                                         byte_count >> SECTOR_SHIFT,
    1853             :                                         GFP_NOFS, 0);
    1854             :                 }
    1855             : 
    1856           3 :                 if (unlikely(err != 0)) {
    1857           0 :                         pr_err("JBD2: (error %d) unable to wipe journal at physical blocks %llu - %llu",
    1858             :                                         err, block_start, block_stop);
    1859           0 :                         return err;
    1860             :                 }
    1861             : 
    1862             :                 /* reset start and stop after processing a region */
    1863             :                 block_start = ~0ULL;
    1864             :         }
    1865             : 
    1866           1 :         return blkdev_issue_flush(journal->j_dev);
    1867             : }
    1868             : 
    1869             : /**
    1870             :  * jbd2_journal_update_sb_errno() - Update error in the journal.
    1871             :  * @journal: The journal to update.
    1872             :  *
    1873             :  * Update a journal's errno.  Write updated superblock to disk waiting for IO
    1874             :  * to complete.
    1875             :  */
    1876         155 : void jbd2_journal_update_sb_errno(journal_t *journal)
    1877             : {
    1878         155 :         journal_superblock_t *sb = journal->j_superblock;
    1879         155 :         int errcode;
    1880             : 
    1881         155 :         lock_buffer(journal->j_sb_buffer);
    1882         155 :         errcode = journal->j_errno;
    1883         155 :         if (errcode == -ESHUTDOWN)
    1884         148 :                 errcode = 0;
    1885         155 :         jbd2_debug(1, "JBD2: updating superblock error (errno %d)\n", errcode);
    1886         155 :         sb->s_errno    = cpu_to_be32(errcode);
    1887             : 
    1888         155 :         jbd2_write_superblock(journal, REQ_SYNC | REQ_FUA);
    1889         155 : }
    1890             : EXPORT_SYMBOL(jbd2_journal_update_sb_errno);
    1891             : 
    1892       13158 : static int journal_revoke_records_per_block(journal_t *journal)
    1893             : {
    1894       13158 :         int record_size;
    1895       13158 :         int space = journal->j_blocksize - sizeof(jbd2_journal_revoke_header_t);
    1896             : 
    1897       26316 :         if (jbd2_has_feature_64bit(journal))
    1898             :                 record_size = 8;
    1899             :         else
    1900        2705 :                 record_size = 4;
    1901             : 
    1902       13158 :         if (jbd2_journal_has_csum_v2or3(journal))
    1903        8457 :                 space -= sizeof(struct jbd2_journal_block_tail);
    1904       13158 :         return space / record_size;
    1905             : }
    1906             : 
    1907             : /*
    1908             :  * Read the superblock for a given journal, performing initial
    1909             :  * validation of the format.
    1910             :  */
    1911      285828 : static int journal_get_superblock(journal_t *journal)
    1912             : {
    1913      285828 :         struct buffer_head *bh;
    1914      285828 :         journal_superblock_t *sb;
    1915      285828 :         int err;
    1916             : 
    1917      285828 :         bh = journal->j_sb_buffer;
    1918             : 
    1919      285828 :         J_ASSERT(bh != NULL);
    1920      571656 :         if (buffer_verified(bh))
    1921             :                 return 0;
    1922             : 
    1923        2502 :         err = bh_read(bh, 0);
    1924        2502 :         if (err < 0) {
    1925           0 :                 printk(KERN_ERR
    1926             :                         "JBD2: IO error reading journal superblock\n");
    1927           0 :                 goto out;
    1928             :         }
    1929             : 
    1930        2502 :         sb = journal->j_superblock;
    1931             : 
    1932        2502 :         err = -EINVAL;
    1933             : 
    1934        2502 :         if (sb->s_header.h_magic != cpu_to_be32(JBD2_MAGIC_NUMBER) ||
    1935        2501 :             sb->s_blocksize != cpu_to_be32(journal->j_blocksize)) {
    1936           1 :                 printk(KERN_WARNING "JBD2: no valid journal superblock found\n");
    1937           1 :                 goto out;
    1938             :         }
    1939             : 
    1940        2501 :         if (be32_to_cpu(sb->s_header.h_blocktype) != JBD2_SUPERBLOCK_V1 &&
    1941             :             be32_to_cpu(sb->s_header.h_blocktype) != JBD2_SUPERBLOCK_V2) {
    1942           0 :                 printk(KERN_WARNING "JBD2: unrecognised superblock format ID\n");
    1943           0 :                 goto out;
    1944             :         }
    1945             : 
    1946        2501 :         if (be32_to_cpu(sb->s_maxlen) > journal->j_total_len) {
    1947           0 :                 printk(KERN_WARNING "JBD2: journal file too short\n");
    1948           0 :                 goto out;
    1949             :         }
    1950             : 
    1951        2501 :         if (be32_to_cpu(sb->s_first) == 0 ||
    1952             :             be32_to_cpu(sb->s_first) >= journal->j_total_len) {
    1953           0 :                 printk(KERN_WARNING
    1954             :                         "JBD2: Invalid start block of journal: %u\n",
    1955             :                         be32_to_cpu(sb->s_first));
    1956           0 :                 goto out;
    1957             :         }
    1958             : 
    1959        5002 :         if (jbd2_has_feature_csum2(journal) &&
    1960             :             jbd2_has_feature_csum3(journal)) {
    1961             :                 /* Can't have checksum v2 and v3 at the same time! */
    1962           0 :                 printk(KERN_ERR "JBD2: Can't enable checksumming v2 and v3 "
    1963             :                        "at the same time!\n");
    1964           0 :                 goto out;
    1965             :         }
    1966             : 
    1967        3828 :         if (jbd2_journal_has_csum_v2or3_feature(journal) &&
    1968             :             jbd2_has_feature_checksum(journal)) {
    1969             :                 /* Can't have checksum v1 and v2 on at the same time! */
    1970           0 :                 printk(KERN_ERR "JBD2: Can't enable checksumming v1 and v2/3 "
    1971             :                        "at the same time!\n");
    1972           0 :                 goto out;
    1973             :         }
    1974             : 
    1975        2501 :         if (!jbd2_verify_csum_type(journal, sb)) {
    1976           0 :                 printk(KERN_ERR "JBD2: Unknown checksum type\n");
    1977           0 :                 goto out;
    1978             :         }
    1979             : 
    1980             :         /* Load the checksum driver */
    1981        2501 :         if (jbd2_journal_has_csum_v2or3_feature(journal)) {
    1982        1327 :                 journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
    1983        1327 :                 if (IS_ERR(journal->j_chksum_driver)) {
    1984           0 :                         printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
    1985           0 :                         err = PTR_ERR(journal->j_chksum_driver);
    1986           0 :                         journal->j_chksum_driver = NULL;
    1987           0 :                         goto out;
    1988             :                 }
    1989             :                 /* Check superblock checksum */
    1990        1327 :                 if (sb->s_checksum != jbd2_superblock_csum(journal, sb)) {
    1991           0 :                         printk(KERN_ERR "JBD2: journal checksum error\n");
    1992           0 :                         err = -EFSBADCRC;
    1993           0 :                         goto out;
    1994             :                 }
    1995             :         }
    1996        2501 :         set_buffer_verified(bh);
    1997             :         return 0;
    1998             : 
    1999           1 : out:
    2000           1 :         journal_fail_superblock(journal);
    2001           1 :         return err;
    2002             : }
    2003             : 
    2004             : /*
    2005             :  * Load the on-disk journal superblock and read the key fields into the
    2006             :  * journal_t.
    2007             :  */
    2008             : 
    2009        4753 : static int load_superblock(journal_t *journal)
    2010             : {
    2011        4753 :         int err;
    2012        4753 :         journal_superblock_t *sb;
    2013        4753 :         int num_fc_blocks;
    2014             : 
    2015        4753 :         err = journal_get_superblock(journal);
    2016        4753 :         if (err)
    2017             :                 return err;
    2018             : 
    2019        4752 :         sb = journal->j_superblock;
    2020             : 
    2021        4752 :         journal->j_tail_sequence = be32_to_cpu(sb->s_sequence);
    2022        4752 :         journal->j_tail = be32_to_cpu(sb->s_start);
    2023        4752 :         journal->j_first = be32_to_cpu(sb->s_first);
    2024        4752 :         journal->j_errno = be32_to_cpu(sb->s_errno);
    2025        4752 :         journal->j_last = be32_to_cpu(sb->s_maxlen);
    2026             : 
    2027        4752 :         if (be32_to_cpu(sb->s_maxlen) < journal->j_total_len)
    2028           0 :                 journal->j_total_len = be32_to_cpu(sb->s_maxlen);
    2029             :         /* Precompute checksum seed for all metadata */
    2030        4752 :         if (jbd2_journal_has_csum_v2or3(journal))
    2031        2416 :                 journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
    2032             :                                                    sizeof(sb->s_uuid));
    2033        9504 :         journal->j_revoke_records_per_block =
    2034        4752 :                                 journal_revoke_records_per_block(journal);
    2035             : 
    2036        9504 :         if (jbd2_has_feature_fast_commit(journal)) {
    2037           0 :                 journal->j_fc_last = be32_to_cpu(sb->s_maxlen);
    2038           0 :                 num_fc_blocks = jbd2_journal_get_num_fc_blks(sb);
    2039           0 :                 if (journal->j_last - num_fc_blocks >= JBD2_MIN_JOURNAL_BLOCKS)
    2040           0 :                         journal->j_last = journal->j_fc_last - num_fc_blocks;
    2041           0 :                 journal->j_fc_first = journal->j_last + 1;
    2042           0 :                 journal->j_fc_off = 0;
    2043             :         }
    2044             : 
    2045             :         return 0;
    2046             : }
    2047             : 
    2048             : 
    2049             : /**
    2050             :  * jbd2_journal_load() - Read journal from disk.
    2051             :  * @journal: Journal to act on.
    2052             :  *
    2053             :  * Given a journal_t structure which tells us which disk blocks contain
    2054             :  * a journal, read the journal from disk to initialise the in-memory
    2055             :  * structures.
    2056             :  */
    2057        2501 : int jbd2_journal_load(journal_t *journal)
    2058             : {
    2059        2501 :         int err;
    2060        2501 :         journal_superblock_t *sb;
    2061             : 
    2062        2501 :         err = load_superblock(journal);
    2063        2501 :         if (err)
    2064             :                 return err;
    2065             : 
    2066        2501 :         sb = journal->j_superblock;
    2067             : 
    2068             :         /*
    2069             :          * If this is a V2 superblock, then we have to check the
    2070             :          * features flags on it.
    2071             :          */
    2072        2501 :         if (jbd2_format_support_feature(journal)) {
    2073        2501 :                 if ((sb->s_feature_ro_compat &
    2074        2501 :                      ~cpu_to_be32(JBD2_KNOWN_ROCOMPAT_FEATURES)) ||
    2075        2501 :                     (sb->s_feature_incompat &
    2076             :                      ~cpu_to_be32(JBD2_KNOWN_INCOMPAT_FEATURES))) {
    2077           0 :                         printk(KERN_WARNING
    2078             :                                 "JBD2: Unrecognised features on journal\n");
    2079           0 :                         return -EINVAL;
    2080             :                 }
    2081             :         }
    2082             : 
    2083             :         /*
    2084             :          * Create a slab for this blocksize
    2085             :          */
    2086        2501 :         err = jbd2_journal_create_slab(be32_to_cpu(sb->s_blocksize));
    2087        2501 :         if (err)
    2088             :                 return err;
    2089             : 
    2090             :         /* Let the recovery code check whether it needs to recover any
    2091             :          * data from the journal. */
    2092        2501 :         if (jbd2_journal_recover(journal))
    2093           0 :                 goto recovery_error;
    2094             : 
    2095        2501 :         if (journal->j_failed_commit) {
    2096           0 :                 printk(KERN_ERR "JBD2: journal transaction %u on %s "
    2097             :                        "is corrupt.\n", journal->j_failed_commit,
    2098             :                        journal->j_devname);
    2099           0 :                 return -EFSCORRUPTED;
    2100             :         }
    2101             :         /*
    2102             :          * clear JBD2_ABORT flag initialized in journal_init_common
    2103             :          * here to update log tail information with the newest seq.
    2104             :          */
    2105        2501 :         journal->j_flags &= ~JBD2_ABORT;
    2106             : 
    2107             :         /* OK, we've finished with the dynamic journal bits:
    2108             :          * reinitialise the dynamic contents of the superblock in memory
    2109             :          * and reset them on disk. */
    2110        2501 :         if (journal_reset(journal))
    2111           0 :                 goto recovery_error;
    2112             : 
    2113        2501 :         journal->j_flags |= JBD2_LOADED;
    2114        2501 :         return 0;
    2115             : 
    2116           0 : recovery_error:
    2117           0 :         printk(KERN_WARNING "JBD2: recovery failed\n");
    2118           0 :         return -EIO;
    2119             : }
    2120             : 
    2121             : /**
    2122             :  * jbd2_journal_destroy() - Release a journal_t structure.
    2123             :  * @journal: Journal to act on.
    2124             :  *
    2125             :  * Release a journal_t structure once it is no longer in use by the
    2126             :  * journaled object.
    2127             :  * Return <0 if we couldn't clean up the journal.
    2128             :  */
    2129        2503 : int jbd2_journal_destroy(journal_t *journal)
    2130             : {
    2131        2503 :         int err = 0;
    2132             : 
    2133             :         /* Wait for the commit thread to wake up and die. */
    2134        2503 :         journal_kill_thread(journal);
    2135             : 
    2136             :         /* Force a final log commit */
    2137        2503 :         if (journal->j_running_transaction)
    2138          32 :                 jbd2_journal_commit_transaction(journal);
    2139             : 
    2140             :         /* Force any old transactions to disk */
    2141             : 
    2142             :         /* Totally anal locking here... */
    2143        2503 :         spin_lock(&journal->j_list_lock);
    2144        4924 :         while (journal->j_checkpoint_transactions != NULL) {
    2145        2539 :                 spin_unlock(&journal->j_list_lock);
    2146        2539 :                 mutex_lock_io(&journal->j_checkpoint_mutex);
    2147        2539 :                 err = jbd2_log_do_checkpoint(journal);
    2148        2539 :                 mutex_unlock(&journal->j_checkpoint_mutex);
    2149             :                 /*
    2150             :                  * If checkpointing failed, just free the buffers to avoid
    2151             :                  * looping forever
    2152             :                  */
    2153        2539 :                 if (err) {
    2154         118 :                         jbd2_journal_destroy_checkpoint(journal);
    2155         118 :                         spin_lock(&journal->j_list_lock);
    2156             :                         break;
    2157             :                 }
    2158        2421 :                 spin_lock(&journal->j_list_lock);
    2159             :         }
    2160             : 
    2161        2503 :         J_ASSERT(journal->j_running_transaction == NULL);
    2162        2503 :         J_ASSERT(journal->j_committing_transaction == NULL);
    2163        2503 :         J_ASSERT(journal->j_checkpoint_transactions == NULL);
    2164        2503 :         spin_unlock(&journal->j_list_lock);
    2165             : 
    2166             :         /*
    2167             :          * OK, all checkpoint transactions have been checked, now check the
    2168             :          * write out io error flag and abort the journal if some buffer failed
    2169             :          * to write back to the original location, otherwise the filesystem
    2170             :          * may become inconsistent.
    2171             :          */
    2172        4849 :         if (!is_journal_aborted(journal) &&
    2173        2346 :             test_bit(JBD2_CHECKPOINT_IO_ERROR, &journal->j_atomic_flags))
    2174           0 :                 jbd2_journal_abort(journal, -EIO);
    2175             : 
    2176        2503 :         if (journal->j_sb_buffer) {
    2177        2502 :                 if (!is_journal_aborted(journal)) {
    2178        2346 :                         mutex_lock_io(&journal->j_checkpoint_mutex);
    2179             : 
    2180        2346 :                         write_lock(&journal->j_state_lock);
    2181        2346 :                         journal->j_tail_sequence =
    2182        2346 :                                 ++journal->j_transaction_sequence;
    2183        2346 :                         write_unlock(&journal->j_state_lock);
    2184             : 
    2185        2346 :                         jbd2_mark_journal_empty(journal,
    2186             :                                         REQ_SYNC | REQ_PREFLUSH | REQ_FUA);
    2187        2346 :                         mutex_unlock(&journal->j_checkpoint_mutex);
    2188             :                 } else
    2189             :                         err = -EIO;
    2190        2502 :                 brelse(journal->j_sb_buffer);
    2191             :         }
    2192             : 
    2193        2503 :         if (journal->j_shrinker.flags & SHRINKER_REGISTERED) {
    2194        2503 :                 percpu_counter_destroy(&journal->j_checkpoint_jh_count);
    2195        2503 :                 unregister_shrinker(&journal->j_shrinker);
    2196             :         }
    2197        2503 :         if (journal->j_proc_entry)
    2198        2503 :                 jbd2_stats_proc_exit(journal);
    2199        2503 :         iput(journal->j_inode);
    2200        2503 :         if (journal->j_revoke)
    2201        2503 :                 jbd2_journal_destroy_revoke(journal);
    2202        2503 :         if (journal->j_chksum_driver)
    2203        2288 :                 crypto_free_shash(journal->j_chksum_driver);
    2204        2503 :         kfree(journal->j_fc_wbuf);
    2205        2503 :         kfree(journal->j_wbuf);
    2206        2503 :         kfree(journal);
    2207             : 
    2208        2503 :         return err;
    2209             : }
    2210             : 
    2211             : 
    2212             : /**
    2213             :  * jbd2_journal_check_used_features() - Check if features specified are used.
    2214             :  * @journal: Journal to check.
    2215             :  * @compat: bitmask of compatible features
    2216             :  * @ro: bitmask of features that force read-only mount
    2217             :  * @incompat: bitmask of incompatible features
    2218             :  *
    2219             :  * Check whether the journal uses all of a given set of
    2220             :  * features.  Return true (non-zero) if it does.
    2221             :  **/
    2222             : 
    2223      281085 : int jbd2_journal_check_used_features(journal_t *journal, unsigned long compat,
    2224             :                                  unsigned long ro, unsigned long incompat)
    2225             : {
    2226      281085 :         journal_superblock_t *sb;
    2227             : 
    2228      281085 :         if (!compat && !ro && !incompat)
    2229             :                 return 1;
    2230      281063 :         if (journal_get_superblock(journal))
    2231             :                 return 0;
    2232      281077 :         if (!jbd2_format_support_feature(journal))
    2233             :                 return 0;
    2234             : 
    2235      281077 :         sb = journal->j_superblock;
    2236             : 
    2237      281077 :         if (((be32_to_cpu(sb->s_feature_compat) & compat) == compat) &&
    2238      281083 :             ((be32_to_cpu(sb->s_feature_ro_compat) & ro) == ro) &&
    2239      281082 :             ((be32_to_cpu(sb->s_feature_incompat) & incompat) == incompat))
    2240      277677 :                 return 1;
    2241             : 
    2242             :         return 0;
    2243             : }
    2244             : 
    2245             : /**
    2246             :  * jbd2_journal_check_available_features() - Check feature set in journalling layer
    2247             :  * @journal: Journal to check.
    2248             :  * @compat: bitmask of compatible features
    2249             :  * @ro: bitmask of features that force read-only mount
    2250             :  * @incompat: bitmask of incompatible features
    2251             :  *
    2252             :  * Check whether the journaling code supports the use of
    2253             :  * all of a given set of features on this journal.  Return true
    2254             :  * (non-zero) if it can. */
    2255             : 
    2256        5886 : int jbd2_journal_check_available_features(journal_t *journal, unsigned long compat,
    2257             :                                       unsigned long ro, unsigned long incompat)
    2258             : {
    2259        5886 :         if (!compat && !ro && !incompat)
    2260             :                 return 1;
    2261             : 
    2262        5886 :         if (!jbd2_format_support_feature(journal))
    2263             :                 return 0;
    2264             : 
    2265        5886 :         if ((compat   & JBD2_KNOWN_COMPAT_FEATURES) == compat &&
    2266        5886 :             (ro       & JBD2_KNOWN_ROCOMPAT_FEATURES) == ro &&
    2267        5886 :             (incompat & JBD2_KNOWN_INCOMPAT_FEATURES) == incompat)
    2268        5886 :                 return 1;
    2269             : 
    2270             :         return 0;
    2271             : }
    2272             : 
    2273             : static int
    2274           0 : jbd2_journal_initialize_fast_commit(journal_t *journal)
    2275             : {
    2276           0 :         journal_superblock_t *sb = journal->j_superblock;
    2277           0 :         unsigned long long num_fc_blks;
    2278             : 
    2279           0 :         num_fc_blks = jbd2_journal_get_num_fc_blks(sb);
    2280           0 :         if (journal->j_last - num_fc_blks < JBD2_MIN_JOURNAL_BLOCKS)
    2281             :                 return -ENOSPC;
    2282             : 
    2283             :         /* Are we called twice? */
    2284           0 :         WARN_ON(journal->j_fc_wbuf != NULL);
    2285           0 :         journal->j_fc_wbuf = kmalloc_array(num_fc_blks,
    2286             :                                 sizeof(struct buffer_head *), GFP_KERNEL);
    2287           0 :         if (!journal->j_fc_wbuf)
    2288             :                 return -ENOMEM;
    2289             : 
    2290           0 :         journal->j_fc_wbufsize = num_fc_blks;
    2291           0 :         journal->j_fc_last = journal->j_last;
    2292           0 :         journal->j_last = journal->j_fc_last - num_fc_blks;
    2293           0 :         journal->j_fc_first = journal->j_last + 1;
    2294           0 :         journal->j_fc_off = 0;
    2295           0 :         journal->j_free = journal->j_last - journal->j_first;
    2296           0 :         journal->j_max_transaction_buffers =
    2297             :                 jbd2_journal_get_max_txn_bufs(journal);
    2298             : 
    2299           0 :         return 0;
    2300             : }
    2301             : 
    2302             : /**
    2303             :  * jbd2_journal_set_features() - Mark a given journal feature in the superblock
    2304             :  * @journal: Journal to act on.
    2305             :  * @compat: bitmask of compatible features
    2306             :  * @ro: bitmask of features that force read-only mount
    2307             :  * @incompat: bitmask of incompatible features
    2308             :  *
    2309             :  * Mark a given journal feature as present on the
    2310             :  * superblock.  Returns true if the requested features could be set.
    2311             :  *
    2312             :  */
    2313             : 
    2314      281097 : int jbd2_journal_set_features(journal_t *journal, unsigned long compat,
    2315             :                           unsigned long ro, unsigned long incompat)
    2316             : {
    2317             : #define INCOMPAT_FEATURE_ON(f) \
    2318             :                 ((incompat & (f)) && !(sb->s_feature_incompat & cpu_to_be32(f)))
    2319             : #define COMPAT_FEATURE_ON(f) \
    2320             :                 ((compat & (f)) && !(sb->s_feature_compat & cpu_to_be32(f)))
    2321      281097 :         journal_superblock_t *sb;
    2322             : 
    2323      281097 :         if (jbd2_journal_check_used_features(journal, compat, ro, incompat))
    2324             :                 return 1;
    2325             : 
    2326        3405 :         if (!jbd2_journal_check_available_features(journal, compat, ro, incompat))
    2327             :                 return 0;
    2328             : 
    2329             :         /* If enabling v2 checksums, turn on v3 instead */
    2330        3405 :         if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V2) {
    2331           0 :                 incompat &= ~JBD2_FEATURE_INCOMPAT_CSUM_V2;
    2332           0 :                 incompat |= JBD2_FEATURE_INCOMPAT_CSUM_V3;
    2333             :         }
    2334             : 
    2335             :         /* Asking for checksumming v3 and v1?  Only give them v3. */
    2336        3405 :         if (incompat & JBD2_FEATURE_INCOMPAT_CSUM_V3 &&
    2337        2288 :             compat & JBD2_FEATURE_COMPAT_CHECKSUM)
    2338           0 :                 compat &= ~JBD2_FEATURE_COMPAT_CHECKSUM;
    2339             : 
    2340        3405 :         jbd2_debug(1, "Setting new features 0x%lx/0x%lx/0x%lx\n",
    2341             :                   compat, ro, incompat);
    2342             : 
    2343        3405 :         sb = journal->j_superblock;
    2344             : 
    2345        3405 :         if (incompat & JBD2_FEATURE_INCOMPAT_FAST_COMMIT) {
    2346           0 :                 if (jbd2_journal_initialize_fast_commit(journal)) {
    2347           0 :                         pr_err("JBD2: Cannot enable fast commits.\n");
    2348           0 :                         return 0;
    2349             :                 }
    2350             :         }
    2351             : 
    2352             :         /* Load the checksum driver if necessary */
    2353        3405 :         if ((journal->j_chksum_driver == NULL) &&
    2354         961 :             INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
    2355         961 :                 journal->j_chksum_driver = crypto_alloc_shash("crc32c", 0, 0);
    2356         961 :                 if (IS_ERR(journal->j_chksum_driver)) {
    2357           0 :                         printk(KERN_ERR "JBD2: Cannot load crc32c driver.\n");
    2358           0 :                         journal->j_chksum_driver = NULL;
    2359           0 :                         return 0;
    2360             :                 }
    2361             :                 /* Precompute checksum seed for all metadata */
    2362         961 :                 journal->j_csum_seed = jbd2_chksum(journal, ~0, sb->s_uuid,
    2363             :                                                    sizeof(sb->s_uuid));
    2364             :         }
    2365             : 
    2366        3405 :         lock_buffer(journal->j_sb_buffer);
    2367             : 
    2368             :         /* If enabling v3 checksums, update superblock */
    2369        3405 :         if (INCOMPAT_FEATURE_ON(JBD2_FEATURE_INCOMPAT_CSUM_V3)) {
    2370        2288 :                 sb->s_checksum_type = JBD2_CRC32C_CHKSUM;
    2371        2288 :                 sb->s_feature_compat &=
    2372             :                         ~cpu_to_be32(JBD2_FEATURE_COMPAT_CHECKSUM);
    2373             :         }
    2374             : 
    2375             :         /* If enabling v1 checksums, downgrade superblock */
    2376        3405 :         if (COMPAT_FEATURE_ON(JBD2_FEATURE_COMPAT_CHECKSUM))
    2377           0 :                 sb->s_feature_incompat &=
    2378             :                         ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_CSUM_V2 |
    2379             :                                      JBD2_FEATURE_INCOMPAT_CSUM_V3);
    2380             : 
    2381        3405 :         sb->s_feature_compat    |= cpu_to_be32(compat);
    2382        3405 :         sb->s_feature_ro_compat |= cpu_to_be32(ro);
    2383        3405 :         sb->s_feature_incompat  |= cpu_to_be32(incompat);
    2384        3405 :         unlock_buffer(journal->j_sb_buffer);
    2385        6810 :         journal->j_revoke_records_per_block =
    2386        3405 :                                 journal_revoke_records_per_block(journal);
    2387             : 
    2388        3405 :         return 1;
    2389             : #undef COMPAT_FEATURE_ON
    2390             : #undef INCOMPAT_FEATURE_ON
    2391             : }
    2392             : 
    2393             : /*
    2394             :  * jbd2_journal_clear_features() - Clear a given journal feature in the
    2395             :  *                                  superblock
    2396             :  * @journal: Journal to act on.
    2397             :  * @compat: bitmask of compatible features
    2398             :  * @ro: bitmask of features that force read-only mount
    2399             :  * @incompat: bitmask of incompatible features
    2400             :  *
    2401             :  * Clear a given journal feature as present on the
    2402             :  * superblock.
    2403             :  */
    2404        5001 : void jbd2_journal_clear_features(journal_t *journal, unsigned long compat,
    2405             :                                 unsigned long ro, unsigned long incompat)
    2406             : {
    2407        5001 :         journal_superblock_t *sb;
    2408             : 
    2409        5001 :         jbd2_debug(1, "Clear features 0x%lx/0x%lx/0x%lx\n",
    2410             :                   compat, ro, incompat);
    2411             : 
    2412        5001 :         sb = journal->j_superblock;
    2413             : 
    2414        5001 :         sb->s_feature_compat    &= ~cpu_to_be32(compat);
    2415        5001 :         sb->s_feature_ro_compat &= ~cpu_to_be32(ro);
    2416        5001 :         sb->s_feature_incompat  &= ~cpu_to_be32(incompat);
    2417       10002 :         journal->j_revoke_records_per_block =
    2418        5001 :                                 journal_revoke_records_per_block(journal);
    2419        5001 : }
    2420             : EXPORT_SYMBOL(jbd2_journal_clear_features);
    2421             : 
    2422             : /**
    2423             :  * jbd2_journal_flush() - Flush journal
    2424             :  * @journal: Journal to act on.
    2425             :  * @flags: optional operation on the journal blocks after the flush (see below)
    2426             :  *
    2427             :  * Flush all data for a given journal to disk and empty the journal.
    2428             :  * Filesystems can use this when remounting readonly to ensure that
    2429             :  * recovery does not need to happen on remount. Optionally, a discard or zeroout
    2430             :  * can be issued on the journal blocks after flushing.
    2431             :  *
    2432             :  * flags:
    2433             :  *      JBD2_JOURNAL_FLUSH_DISCARD: issues discards for the journal blocks
    2434             :  *      JBD2_JOURNAL_FLUSH_ZEROOUT: issues zeroouts for the journal blocks
    2435             :  */
    2436         462 : int jbd2_journal_flush(journal_t *journal, unsigned int flags)
    2437             : {
    2438         462 :         int err = 0;
    2439         462 :         transaction_t *transaction = NULL;
    2440             : 
    2441         462 :         write_lock(&journal->j_state_lock);
    2442             : 
    2443             :         /* Force everything buffered to the log... */
    2444         462 :         if (journal->j_running_transaction) {
    2445          54 :                 transaction = journal->j_running_transaction;
    2446          54 :                 __jbd2_log_start_commit(journal, transaction->t_tid);
    2447         408 :         } else if (journal->j_committing_transaction)
    2448             :                 transaction = journal->j_committing_transaction;
    2449             : 
    2450             :         /* Wait for the log commit to complete... */
    2451         462 :         if (transaction) {
    2452          54 :                 tid_t tid = transaction->t_tid;
    2453             : 
    2454          54 :                 write_unlock(&journal->j_state_lock);
    2455          54 :                 jbd2_log_wait_commit(journal, tid);
    2456             :         } else {
    2457         408 :                 write_unlock(&journal->j_state_lock);
    2458             :         }
    2459             : 
    2460             :         /* ...and flush everything in the log out to disk. */
    2461         462 :         spin_lock(&journal->j_list_lock);
    2462         810 :         while (!err && journal->j_checkpoint_transactions != NULL) {
    2463         348 :                 spin_unlock(&journal->j_list_lock);
    2464         348 :                 mutex_lock_io(&journal->j_checkpoint_mutex);
    2465         348 :                 err = jbd2_log_do_checkpoint(journal);
    2466         348 :                 mutex_unlock(&journal->j_checkpoint_mutex);
    2467         348 :                 spin_lock(&journal->j_list_lock);
    2468             :         }
    2469         462 :         spin_unlock(&journal->j_list_lock);
    2470             : 
    2471         462 :         if (is_journal_aborted(journal))
    2472             :                 return -EIO;
    2473             : 
    2474         462 :         mutex_lock_io(&journal->j_checkpoint_mutex);
    2475         462 :         if (!err) {
    2476         462 :                 err = jbd2_cleanup_journal_tail(journal);
    2477         462 :                 if (err < 0) {
    2478           0 :                         mutex_unlock(&journal->j_checkpoint_mutex);
    2479           0 :                         goto out;
    2480             :                 }
    2481             :                 err = 0;
    2482             :         }
    2483             : 
    2484             :         /* Finally, mark the journal as really needing no recovery.
    2485             :          * This sets s_start==0 in the underlying superblock, which is
    2486             :          * the magic code for a fully-recovered superblock.  Any future
    2487             :          * commits of data to the journal will restore the current
    2488             :          * s_start value. */
    2489         462 :         jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA);
    2490             : 
    2491         462 :         if (flags)
    2492           1 :                 err = __jbd2_journal_erase(journal, flags);
    2493             : 
    2494         462 :         mutex_unlock(&journal->j_checkpoint_mutex);
    2495         462 :         write_lock(&journal->j_state_lock);
    2496         462 :         J_ASSERT(!journal->j_running_transaction);
    2497         462 :         J_ASSERT(!journal->j_committing_transaction);
    2498         462 :         J_ASSERT(!journal->j_checkpoint_transactions);
    2499         462 :         J_ASSERT(journal->j_head == journal->j_tail);
    2500         462 :         J_ASSERT(journal->j_tail_sequence == journal->j_transaction_sequence);
    2501         462 :         write_unlock(&journal->j_state_lock);
    2502             : out:
    2503             :         return err;
    2504             : }
    2505             : 
    2506             : /**
    2507             :  * jbd2_journal_wipe() - Wipe journal contents
    2508             :  * @journal: Journal to act on.
    2509             :  * @write: flag (see below)
    2510             :  *
    2511             :  * Wipe out all of the contents of a journal, safely.  This will produce
    2512             :  * a warning if the journal contains any valid recovery information.
    2513             :  * Must be called between journal_init_*() and jbd2_journal_load().
    2514             :  *
    2515             :  * If 'write' is non-zero, then we wipe out the journal on disk; otherwise
    2516             :  * we merely suppress recovery.
    2517             :  */
    2518             : 
    2519        2252 : int jbd2_journal_wipe(journal_t *journal, int write)
    2520             : {
    2521        2252 :         int err = 0;
    2522             : 
    2523        2252 :         J_ASSERT (!(journal->j_flags & JBD2_LOADED));
    2524             : 
    2525        2252 :         err = load_superblock(journal);
    2526        2252 :         if (err)
    2527             :                 return err;
    2528             : 
    2529        2251 :         if (!journal->j_tail)
    2530        2251 :                 goto no_recovery;
    2531             : 
    2532           0 :         printk(KERN_WARNING "JBD2: %s recovery information on journal\n",
    2533             :                 write ? "Clearing" : "Ignoring");
    2534             : 
    2535           0 :         err = jbd2_journal_skip_recovery(journal);
    2536           0 :         if (write) {
    2537             :                 /* Lock to make assertions happy... */
    2538           0 :                 mutex_lock_io(&journal->j_checkpoint_mutex);
    2539           0 :                 jbd2_mark_journal_empty(journal, REQ_SYNC | REQ_FUA);
    2540           0 :                 mutex_unlock(&journal->j_checkpoint_mutex);
    2541             :         }
    2542             : 
    2543           0 :  no_recovery:
    2544             :         return err;
    2545             : }
    2546             : 
    2547             : /**
    2548             :  * jbd2_journal_abort () - Shutdown the journal immediately.
    2549             :  * @journal: the journal to shutdown.
    2550             :  * @errno:   an error number to record in the journal indicating
    2551             :  *           the reason for the shutdown.
    2552             :  *
    2553             :  * Perform a complete, immediate shutdown of the ENTIRE
    2554             :  * journal (not of a single transaction).  This operation cannot be
    2555             :  * undone without closing and reopening the journal.
    2556             :  *
    2557             :  * The jbd2_journal_abort function is intended to support higher level error
    2558             :  * recovery mechanisms such as the ext2/ext3 remount-readonly error
    2559             :  * mode.
    2560             :  *
    2561             :  * Journal abort has very specific semantics.  Any existing dirty,
    2562             :  * unjournaled buffers in the main filesystem will still be written to
    2563             :  * disk by bdflush, but the journaling mechanism will be suspended
    2564             :  * immediately and no further transaction commits will be honoured.
    2565             :  *
    2566             :  * Any dirty, journaled buffers will be written back to disk without
    2567             :  * hitting the journal.  Atomicity cannot be guaranteed on an aborted
    2568             :  * filesystem, but we _do_ attempt to leave as much data as possible
    2569             :  * behind for fsck to use for cleanup.
    2570             :  *
    2571             :  * Any attempt to get a new transaction handle on a journal which is in
    2572             :  * ABORT state will just result in an -EROFS error return.  A
    2573             :  * jbd2_journal_stop on an existing handle will return -EIO if we have
    2574             :  * entered abort state during the update.
    2575             :  *
    2576             :  * Recursive transactions are not disturbed by journal abort until the
    2577             :  * final jbd2_journal_stop, which will receive the -EIO error.
    2578             :  *
    2579             :  * Finally, the jbd2_journal_abort call allows the caller to supply an errno
    2580             :  * which will be recorded (if possible) in the journal superblock.  This
    2581             :  * allows a client to record failure conditions in the middle of a
    2582             :  * transaction without having to complete the transaction to record the
    2583             :  * failure to disk.  ext3_error, for example, now uses this
    2584             :  * functionality.
    2585             :  *
    2586             :  */
    2587             : 
    2588         156 : void jbd2_journal_abort(journal_t *journal, int errno)
    2589             : {
    2590         156 :         transaction_t *transaction;
    2591             : 
    2592             :         /*
    2593             :          * Lock the aborting procedure until everything is done, this avoid
    2594             :          * races between filesystem's error handling flow (e.g. ext4_abort()),
    2595             :          * ensure panic after the error info is written into journal's
    2596             :          * superblock.
    2597             :          */
    2598         156 :         mutex_lock(&journal->j_abort_mutex);
    2599             :         /*
    2600             :          * ESHUTDOWN always takes precedence because a file system check
    2601             :          * caused by any other journal abort error is not required after
    2602             :          * a shutdown triggered.
    2603             :          */
    2604         156 :         write_lock(&journal->j_state_lock);
    2605         156 :         if (journal->j_flags & JBD2_ABORT) {
    2606           1 :                 int old_errno = journal->j_errno;
    2607             : 
    2608           1 :                 write_unlock(&journal->j_state_lock);
    2609           1 :                 if (old_errno != -ESHUTDOWN && errno == -ESHUTDOWN) {
    2610           0 :                         journal->j_errno = errno;
    2611           0 :                         jbd2_journal_update_sb_errno(journal);
    2612             :                 }
    2613           1 :                 mutex_unlock(&journal->j_abort_mutex);
    2614           1 :                 return;
    2615             :         }
    2616             : 
    2617             :         /*
    2618             :          * Mark the abort as occurred and start current running transaction
    2619             :          * to release all journaled buffer.
    2620             :          */
    2621         155 :         pr_err("Aborting journal on device %s.\n", journal->j_devname);
    2622             : 
    2623         155 :         journal->j_flags |= JBD2_ABORT;
    2624         155 :         journal->j_errno = errno;
    2625         155 :         transaction = journal->j_running_transaction;
    2626         155 :         if (transaction)
    2627          13 :                 __jbd2_log_start_commit(journal, transaction->t_tid);
    2628         155 :         write_unlock(&journal->j_state_lock);
    2629             : 
    2630             :         /*
    2631             :          * Record errno to the journal super block, so that fsck and jbd2
    2632             :          * layer could realise that a filesystem check is needed.
    2633             :          */
    2634         155 :         jbd2_journal_update_sb_errno(journal);
    2635         155 :         mutex_unlock(&journal->j_abort_mutex);
    2636             : }
    2637             : 
    2638             : /**
    2639             :  * jbd2_journal_errno() - returns the journal's error state.
    2640             :  * @journal: journal to examine.
    2641             :  *
    2642             :  * This is the errno number set with jbd2_journal_abort(), the last
    2643             :  * time the journal was mounted - if the journal was stopped
    2644             :  * without calling abort this will be 0.
    2645             :  *
    2646             :  * If the journal has been aborted on this mount time -EROFS will
    2647             :  * be returned.
    2648             :  */
    2649        2544 : int jbd2_journal_errno(journal_t *journal)
    2650             : {
    2651        2544 :         int err;
    2652             : 
    2653        2544 :         read_lock(&journal->j_state_lock);
    2654        2544 :         if (journal->j_flags & JBD2_ABORT)
    2655             :                 err = -EROFS;
    2656             :         else
    2657        2544 :                 err = journal->j_errno;
    2658        2544 :         read_unlock(&journal->j_state_lock);
    2659        2544 :         return err;
    2660             : }
    2661             : 
    2662             : /**
    2663             :  * jbd2_journal_clear_err() - clears the journal's error state
    2664             :  * @journal: journal to act on.
    2665             :  *
    2666             :  * An error must be cleared or acked to take a FS out of readonly
    2667             :  * mode.
    2668             :  */
    2669           0 : int jbd2_journal_clear_err(journal_t *journal)
    2670             : {
    2671           0 :         int err = 0;
    2672             : 
    2673           0 :         write_lock(&journal->j_state_lock);
    2674           0 :         if (journal->j_flags & JBD2_ABORT)
    2675             :                 err = -EROFS;
    2676             :         else
    2677           0 :                 journal->j_errno = 0;
    2678           0 :         write_unlock(&journal->j_state_lock);
    2679           0 :         return err;
    2680             : }
    2681             : 
    2682             : /**
    2683             :  * jbd2_journal_ack_err() - Ack journal err.
    2684             :  * @journal: journal to act on.
    2685             :  *
    2686             :  * An error must be cleared or acked to take a FS out of readonly
    2687             :  * mode.
    2688             :  */
    2689           0 : void jbd2_journal_ack_err(journal_t *journal)
    2690             : {
    2691           0 :         write_lock(&journal->j_state_lock);
    2692           0 :         if (journal->j_errno)
    2693           0 :                 journal->j_flags |= JBD2_ACK_ERR;
    2694           0 :         write_unlock(&journal->j_state_lock);
    2695           0 : }
    2696             : 
    2697     6556787 : int jbd2_journal_blocks_per_page(struct inode *inode)
    2698             : {
    2699     6556787 :         return 1 << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
    2700             : }
    2701             : 
    2702             : /*
    2703             :  * helper functions to deal with 32 or 64bit block numbers.
    2704             :  */
    2705      409691 : size_t journal_tag_bytes(journal_t *journal)
    2706             : {
    2707      409691 :         size_t sz;
    2708             : 
    2709      819382 :         if (jbd2_has_feature_csum3(journal))
    2710             :                 return sizeof(journal_block_tag3_t);
    2711             : 
    2712        1826 :         sz = sizeof(journal_block_tag_t);
    2713             : 
    2714        3652 :         if (jbd2_has_feature_csum2(journal))
    2715           0 :                 sz += sizeof(__u16);
    2716             : 
    2717        3652 :         if (jbd2_has_feature_64bit(journal))
    2718             :                 return sz;
    2719             :         else
    2720         100 :                 return sz - sizeof(__u32);
    2721             : }
    2722             : 
    2723             : /*
    2724             :  * JBD memory management
    2725             :  *
    2726             :  * These functions are used to allocate block-sized chunks of memory
    2727             :  * used for making copies of buffer_head data.  Very often it will be
    2728             :  * page-sized chunks of data, but sometimes it will be in
    2729             :  * sub-page-size chunks.  (For example, 16k pages on Power systems
    2730             :  * with a 4k block file system.)  For blocks smaller than a page, we
    2731             :  * use a SLAB allocator.  There are slab caches for each block size,
    2732             :  * which are allocated at mount time, if necessary, and we only free
    2733             :  * (all of) the slab caches when/if the jbd2 module is unloaded.  For
    2734             :  * this reason we don't need to a mutex to protect access to
    2735             :  * jbd2_slab[] allocating or releasing memory; only in
    2736             :  * jbd2_journal_create_slab().
    2737             :  */
    2738             : #define JBD2_MAX_SLABS 8
    2739             : static struct kmem_cache *jbd2_slab[JBD2_MAX_SLABS];
    2740             : 
    2741             : static const char *jbd2_slab_names[JBD2_MAX_SLABS] = {
    2742             :         "jbd2_1k", "jbd2_2k", "jbd2_4k", "jbd2_8k",
    2743             :         "jbd2_16k", "jbd2_32k", "jbd2_64k", "jbd2_128k"
    2744             : };
    2745             : 
    2746             : 
    2747           0 : static void jbd2_journal_destroy_slabs(void)
    2748             : {
    2749           0 :         int i;
    2750             : 
    2751           0 :         for (i = 0; i < JBD2_MAX_SLABS; i++) {
    2752           0 :                 kmem_cache_destroy(jbd2_slab[i]);
    2753           0 :                 jbd2_slab[i] = NULL;
    2754             :         }
    2755           0 : }
    2756             : 
    2757        2501 : static int jbd2_journal_create_slab(size_t size)
    2758             : {
    2759        2501 :         static DEFINE_MUTEX(jbd2_slab_create_mutex);
    2760        2501 :         int i = order_base_2(size) - 10;
    2761        2501 :         size_t slab_size;
    2762             : 
    2763        2501 :         if (size == PAGE_SIZE)
    2764             :                 return 0;
    2765             : 
    2766           7 :         if (i >= JBD2_MAX_SLABS)
    2767             :                 return -EINVAL;
    2768             : 
    2769           7 :         if (unlikely(i < 0))
    2770           0 :                 i = 0;
    2771           7 :         mutex_lock(&jbd2_slab_create_mutex);
    2772           7 :         if (jbd2_slab[i]) {
    2773           5 :                 mutex_unlock(&jbd2_slab_create_mutex);
    2774           5 :                 return 0;       /* Already created */
    2775             :         }
    2776             : 
    2777           2 :         slab_size = 1 << (i+10);
    2778           2 :         jbd2_slab[i] = kmem_cache_create(jbd2_slab_names[i], slab_size,
    2779             :                                          slab_size, 0, NULL);
    2780           2 :         mutex_unlock(&jbd2_slab_create_mutex);
    2781           2 :         if (!jbd2_slab[i]) {
    2782           0 :                 printk(KERN_EMERG "JBD2: no memory for jbd2_slab cache\n");
    2783           0 :                 return -ENOMEM;
    2784             :         }
    2785             :         return 0;
    2786             : }
    2787             : 
    2788           0 : static struct kmem_cache *get_slab(size_t size)
    2789             : {
    2790           0 :         int i = order_base_2(size) - 10;
    2791             : 
    2792           0 :         BUG_ON(i >= JBD2_MAX_SLABS);
    2793           0 :         if (unlikely(i < 0))
    2794           0 :                 i = 0;
    2795           0 :         BUG_ON(jbd2_slab[i] == NULL);
    2796           0 :         return jbd2_slab[i];
    2797             : }
    2798             : 
    2799       21938 : void *jbd2_alloc(size_t size, gfp_t flags)
    2800             : {
    2801       21938 :         void *ptr;
    2802             : 
    2803       21938 :         BUG_ON(size & (size-1)); /* Must be a power of 2 */
    2804             : 
    2805       21938 :         if (size < PAGE_SIZE)
    2806           0 :                 ptr = kmem_cache_alloc(get_slab(size), flags);
    2807             :         else
    2808       43876 :                 ptr = (void *)__get_free_pages(flags, get_order(size));
    2809             : 
    2810             :         /* Check alignment; SLUB has gotten this wrong in the past,
    2811             :          * and this can lead to user data corruption! */
    2812       21913 :         BUG_ON(((unsigned long) ptr) & (size-1));
    2813             : 
    2814       21913 :         return ptr;
    2815             : }
    2816             : 
    2817       21986 : void jbd2_free(void *ptr, size_t size)
    2818             : {
    2819       21986 :         if (size < PAGE_SIZE)
    2820           0 :                 kmem_cache_free(get_slab(size), ptr);
    2821             :         else
    2822       43972 :                 free_pages((unsigned long)ptr, get_order(size));
    2823       21986 : };
    2824             : 
    2825             : /*
    2826             :  * Journal_head storage management
    2827             :  */
    2828             : static struct kmem_cache *jbd2_journal_head_cache;
    2829             : #ifdef CONFIG_JBD2_DEBUG
    2830             : static atomic_t nr_journal_heads = ATOMIC_INIT(0);
    2831             : #endif
    2832             : 
    2833          12 : static int __init jbd2_journal_init_journal_head_cache(void)
    2834             : {
    2835          12 :         J_ASSERT(!jbd2_journal_head_cache);
    2836          12 :         jbd2_journal_head_cache = kmem_cache_create("jbd2_journal_head",
    2837             :                                 sizeof(struct journal_head),
    2838             :                                 0,              /* offset */
    2839             :                                 SLAB_TEMPORARY | SLAB_TYPESAFE_BY_RCU,
    2840             :                                 NULL);          /* ctor */
    2841          12 :         if (!jbd2_journal_head_cache) {
    2842           0 :                 printk(KERN_EMERG "JBD2: no memory for journal_head cache\n");
    2843           0 :                 return -ENOMEM;
    2844             :         }
    2845             :         return 0;
    2846             : }
    2847             : 
    2848             : static void jbd2_journal_destroy_journal_head_cache(void)
    2849             : {
    2850           0 :         kmem_cache_destroy(jbd2_journal_head_cache);
    2851           0 :         jbd2_journal_head_cache = NULL;
    2852             : }
    2853             : 
    2854             : /*
    2855             :  * journal_head splicing and dicing
    2856             :  */
    2857     1461167 : static struct journal_head *journal_alloc_journal_head(void)
    2858             : {
    2859     1461167 :         struct journal_head *ret;
    2860             : 
    2861             : #ifdef CONFIG_JBD2_DEBUG
    2862             :         atomic_inc(&nr_journal_heads);
    2863             : #endif
    2864     1461167 :         ret = kmem_cache_zalloc(jbd2_journal_head_cache, GFP_NOFS);
    2865     1461780 :         if (!ret) {
    2866           0 :                 jbd2_debug(1, "out of memory for journal_head\n");
    2867           0 :                 pr_notice_ratelimited("ENOMEM in %s, retrying.\n", __func__);
    2868           0 :                 ret = kmem_cache_zalloc(jbd2_journal_head_cache,
    2869             :                                 GFP_NOFS | __GFP_NOFAIL);
    2870             :         }
    2871     1461780 :         if (ret)
    2872     1461780 :                 spin_lock_init(&ret->b_state_lock);
    2873     1461756 :         return ret;
    2874             : }
    2875             : 
    2876             : static void journal_free_journal_head(struct journal_head *jh)
    2877             : {
    2878             : #ifdef CONFIG_JBD2_DEBUG
    2879             :         atomic_dec(&nr_journal_heads);
    2880             :         memset(jh, JBD2_POISON_FREE, sizeof(*jh));
    2881             : #endif
    2882     1462308 :         kmem_cache_free(jbd2_journal_head_cache, jh);
    2883          58 : }
    2884             : 
    2885             : /*
    2886             :  * A journal_head is attached to a buffer_head whenever JBD has an
    2887             :  * interest in the buffer.
    2888             :  *
    2889             :  * Whenever a buffer has an attached journal_head, its ->b_state:BH_JBD bit
    2890             :  * is set.  This bit is tested in core kernel code where we need to take
    2891             :  * JBD-specific actions.  Testing the zeroness of ->b_private is not reliable
    2892             :  * there.
    2893             :  *
    2894             :  * When a buffer has its BH_JBD bit set, its ->b_count is elevated by one.
    2895             :  *
    2896             :  * When a buffer has its BH_JBD bit set it is immune from being released by
    2897             :  * core kernel code, mainly via ->b_count.
    2898             :  *
    2899             :  * A journal_head is detached from its buffer_head when the journal_head's
    2900             :  * b_jcount reaches zero. Running transaction (b_transaction) and checkpoint
    2901             :  * transaction (b_cp_transaction) hold their references to b_jcount.
    2902             :  *
    2903             :  * Various places in the kernel want to attach a journal_head to a buffer_head
    2904             :  * _before_ attaching the journal_head to a transaction.  To protect the
    2905             :  * journal_head in this situation, jbd2_journal_add_journal_head elevates the
    2906             :  * journal_head's b_jcount refcount by one.  The caller must call
    2907             :  * jbd2_journal_put_journal_head() to undo this.
    2908             :  *
    2909             :  * So the typical usage would be:
    2910             :  *
    2911             :  *      (Attach a journal_head if needed.  Increments b_jcount)
    2912             :  *      struct journal_head *jh = jbd2_journal_add_journal_head(bh);
    2913             :  *      ...
    2914             :  *      (Get another reference for transaction)
    2915             :  *      jbd2_journal_grab_journal_head(bh);
    2916             :  *      jh->b_transaction = xxx;
    2917             :  *      (Put original reference)
    2918             :  *      jbd2_journal_put_journal_head(jh);
    2919             :  */
    2920             : 
    2921             : /*
    2922             :  * Give a buffer_head a journal_head.
    2923             :  *
    2924             :  * May sleep.
    2925             :  */
    2926     3321034 : struct journal_head *jbd2_journal_add_journal_head(struct buffer_head *bh)
    2927             : {
    2928     3321034 :         struct journal_head *jh;
    2929     3321034 :         struct journal_head *new_jh = NULL;
    2930             : 
    2931     3320634 : repeat:
    2932     6641268 :         if (!buffer_jbd(bh))
    2933     1461204 :                 new_jh = journal_alloc_journal_head();
    2934             : 
    2935     3321175 :         jbd_lock_bh_journal_head(bh);
    2936     6644050 :         if (buffer_jbd(bh)) {
    2937     1860429 :                 jh = bh2jh(bh);
    2938             :         } else {
    2939     1461596 :                 J_ASSERT_BH(bh,
    2940             :                         (atomic_read(&bh->b_count) > 0) ||
    2941             :                         (bh->b_folio && bh->b_folio->mapping));
    2942             : 
    2943     1461596 :                 if (!new_jh) {
    2944           0 :                         jbd_unlock_bh_journal_head(bh);
    2945           0 :                         goto repeat;
    2946             :                 }
    2947             : 
    2948     1461716 :                 jh = new_jh;
    2949     1461716 :                 new_jh = NULL;          /* We consumed it */
    2950     1461716 :                 set_buffer_jbd(bh);
    2951     1461877 :                 bh->b_private = jh;
    2952     1461877 :                 jh->b_bh = bh;
    2953     1461877 :                 get_bh(bh);
    2954     3322353 :                 BUFFER_TRACE(bh, "added journal_head");
    2955             :         }
    2956     3322353 :         jh->b_jcount++;
    2957     3322353 :         jbd_unlock_bh_journal_head(bh);
    2958     3322308 :         if (new_jh)
    2959          58 :                 journal_free_journal_head(new_jh);
    2960     3322308 :         return bh->b_private;
    2961             : }
    2962             : 
    2963             : /*
    2964             :  * Grab a ref against this buffer_head's journal_head.  If it ended up not
    2965             :  * having a journal_head, return NULL
    2966             :  */
    2967    40052669 : struct journal_head *jbd2_journal_grab_journal_head(struct buffer_head *bh)
    2968             : {
    2969    40052669 :         struct journal_head *jh = NULL;
    2970             : 
    2971    40052669 :         jbd_lock_bh_journal_head(bh);
    2972    80111362 :         if (buffer_jbd(bh)) {
    2973     6388254 :                 jh = bh2jh(bh);
    2974     6388254 :                 jh->b_jcount++;
    2975             :         }
    2976    40055681 :         jbd_unlock_bh_journal_head(bh);
    2977    40054810 :         return jh;
    2978             : }
    2979             : EXPORT_SYMBOL(jbd2_journal_grab_journal_head);
    2980             : 
    2981     1462253 : static void __journal_remove_journal_head(struct buffer_head *bh)
    2982             : {
    2983     1462253 :         struct journal_head *jh = bh2jh(bh);
    2984             : 
    2985     1462253 :         J_ASSERT_JH(jh, jh->b_transaction == NULL);
    2986     1462253 :         J_ASSERT_JH(jh, jh->b_next_transaction == NULL);
    2987     1462253 :         J_ASSERT_JH(jh, jh->b_cp_transaction == NULL);
    2988     1462253 :         J_ASSERT_JH(jh, jh->b_jlist == BJ_None);
    2989     2924506 :         J_ASSERT_BH(bh, buffer_jbd(bh));
    2990     1462253 :         J_ASSERT_BH(bh, jh2bh(jh) == bh);
    2991     1462253 :         BUFFER_TRACE(bh, "remove journal_head");
    2992             : 
    2993             :         /* Unlink before dropping the lock */
    2994     1462253 :         bh->b_private = NULL;
    2995     1462253 :         jh->b_bh = NULL;     /* debug, really */
    2996     1462253 :         clear_buffer_jbd(bh);
    2997     1462251 : }
    2998             : 
    2999     1462250 : static void journal_release_journal_head(struct journal_head *jh, size_t b_size)
    3000             : {
    3001     1462250 :         if (jh->b_frozen_data) {
    3002           0 :                 printk(KERN_WARNING "%s: freeing b_frozen_data\n", __func__);
    3003           0 :                 jbd2_free(jh->b_frozen_data, b_size);
    3004             :         }
    3005     1462250 :         if (jh->b_committed_data) {
    3006           0 :                 printk(KERN_WARNING "%s: freeing b_committed_data\n", __func__);
    3007           0 :                 jbd2_free(jh->b_committed_data, b_size);
    3008             :         }
    3009     1462250 :         journal_free_journal_head(jh);
    3010     1462249 : }
    3011             : 
    3012             : /*
    3013             :  * Drop a reference on the passed journal_head.  If it fell to zero then
    3014             :  * release the journal_head from the buffer_head.
    3015             :  */
    3016     9710530 : void jbd2_journal_put_journal_head(struct journal_head *jh)
    3017             : {
    3018     9710530 :         struct buffer_head *bh = jh2bh(jh);
    3019             : 
    3020     9710530 :         jbd_lock_bh_journal_head(bh);
    3021     9710777 :         J_ASSERT_JH(jh, jh->b_jcount > 0);
    3022     9710777 :         --jh->b_jcount;
    3023     9710777 :         if (!jh->b_jcount) {
    3024     1462253 :                 __journal_remove_journal_head(bh);
    3025     1462251 :                 jbd_unlock_bh_journal_head(bh);
    3026     1462250 :                 journal_release_journal_head(jh, bh->b_size);
    3027     1462250 :                 __brelse(bh);
    3028             :         } else {
    3029     8248524 :                 jbd_unlock_bh_journal_head(bh);
    3030             :         }
    3031     9710673 : }
    3032             : EXPORT_SYMBOL(jbd2_journal_put_journal_head);
    3033             : 
    3034             : /*
    3035             :  * Initialize jbd inode head
    3036             :  */
    3037     1999264 : void jbd2_journal_init_jbd_inode(struct jbd2_inode *jinode, struct inode *inode)
    3038             : {
    3039     1999264 :         jinode->i_transaction = NULL;
    3040     1999264 :         jinode->i_next_transaction = NULL;
    3041     1999264 :         jinode->i_vfs_inode = inode;
    3042     1999264 :         jinode->i_flags = 0;
    3043     1999264 :         jinode->i_dirty_start = 0;
    3044     1999264 :         jinode->i_dirty_end = 0;
    3045     1999264 :         INIT_LIST_HEAD(&jinode->i_list);
    3046     1999264 : }
    3047             : 
    3048             : /*
    3049             :  * Function to be called before we start removing inode from memory (i.e.,
    3050             :  * clear_inode() is a fine place to be called from). It removes inode from
    3051             :  * transaction's lists.
    3052             :  */
    3053     2005071 : void jbd2_journal_release_jbd_inode(journal_t *journal,
    3054             :                                     struct jbd2_inode *jinode)
    3055             : {
    3056     2005071 :         if (!journal)
    3057             :                 return;
    3058     2005071 : restart:
    3059     2005071 :         spin_lock(&journal->j_list_lock);
    3060             :         /* Is commit writing out inode - we have to wait */
    3061     2005506 :         if (jinode->i_flags & JI_COMMIT_RUNNING) {
    3062           0 :                 wait_queue_head_t *wq;
    3063           0 :                 DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
    3064           0 :                 wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);
    3065           0 :                 prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
    3066           0 :                 spin_unlock(&journal->j_list_lock);
    3067           0 :                 schedule();
    3068           0 :                 finish_wait(wq, &wait.wq_entry);
    3069           0 :                 goto restart;
    3070             :         }
    3071             : 
    3072     2005506 :         if (jinode->i_transaction) {
    3073         192 :                 list_del(&jinode->i_list);
    3074         192 :                 jinode->i_transaction = NULL;
    3075             :         }
    3076     2005506 :         spin_unlock(&journal->j_list_lock);
    3077             : }
    3078             : 
    3079             : 
    3080             : #ifdef CONFIG_PROC_FS
    3081             : 
    3082             : #define JBD2_STATS_PROC_NAME "fs/jbd2"
    3083             : 
    3084          12 : static void __init jbd2_create_jbd_stats_proc_entry(void)
    3085             : {
    3086          12 :         proc_jbd2_stats = proc_mkdir(JBD2_STATS_PROC_NAME, NULL);
    3087          12 : }
    3088             : 
    3089           0 : static void __exit jbd2_remove_jbd_stats_proc_entry(void)
    3090             : {
    3091           0 :         if (proc_jbd2_stats)
    3092           0 :                 remove_proc_entry(JBD2_STATS_PROC_NAME, NULL);
    3093           0 : }
    3094             : 
    3095             : #else
    3096             : 
    3097             : #define jbd2_create_jbd_stats_proc_entry() do {} while (0)
    3098             : #define jbd2_remove_jbd_stats_proc_entry() do {} while (0)
    3099             : 
    3100             : #endif
    3101             : 
    3102             : struct kmem_cache *jbd2_handle_cache, *jbd2_inode_cache;
    3103             : 
    3104          12 : static int __init jbd2_journal_init_inode_cache(void)
    3105             : {
    3106          12 :         J_ASSERT(!jbd2_inode_cache);
    3107          12 :         jbd2_inode_cache = KMEM_CACHE(jbd2_inode, 0);
    3108          12 :         if (!jbd2_inode_cache) {
    3109           0 :                 pr_emerg("JBD2: failed to create inode cache\n");
    3110           0 :                 return -ENOMEM;
    3111             :         }
    3112             :         return 0;
    3113             : }
    3114             : 
    3115          12 : static int __init jbd2_journal_init_handle_cache(void)
    3116             : {
    3117          12 :         J_ASSERT(!jbd2_handle_cache);
    3118          12 :         jbd2_handle_cache = KMEM_CACHE(jbd2_journal_handle, SLAB_TEMPORARY);
    3119          12 :         if (!jbd2_handle_cache) {
    3120           0 :                 printk(KERN_EMERG "JBD2: failed to create handle cache\n");
    3121           0 :                 return -ENOMEM;
    3122             :         }
    3123             :         return 0;
    3124             : }
    3125             : 
    3126             : static void jbd2_journal_destroy_inode_cache(void)
    3127             : {
    3128           0 :         kmem_cache_destroy(jbd2_inode_cache);
    3129           0 :         jbd2_inode_cache = NULL;
    3130             : }
    3131             : 
    3132             : static void jbd2_journal_destroy_handle_cache(void)
    3133             : {
    3134           0 :         kmem_cache_destroy(jbd2_handle_cache);
    3135           0 :         jbd2_handle_cache = NULL;
    3136             : }
    3137             : 
    3138             : /*
    3139             :  * Module startup and shutdown
    3140             :  */
    3141             : 
    3142          12 : static int __init journal_init_caches(void)
    3143             : {
    3144          12 :         int ret;
    3145             : 
    3146          12 :         ret = jbd2_journal_init_revoke_record_cache();
    3147          12 :         if (ret == 0)
    3148          12 :                 ret = jbd2_journal_init_revoke_table_cache();
    3149          12 :         if (ret == 0)
    3150          12 :                 ret = jbd2_journal_init_journal_head_cache();
    3151          12 :         if (ret == 0)
    3152          12 :                 ret = jbd2_journal_init_handle_cache();
    3153          12 :         if (ret == 0)
    3154          12 :                 ret = jbd2_journal_init_inode_cache();
    3155          12 :         if (ret == 0)
    3156          12 :                 ret = jbd2_journal_init_transaction_cache();
    3157          12 :         return ret;
    3158             : }
    3159             : 
    3160           0 : static void jbd2_journal_destroy_caches(void)
    3161             : {
    3162           0 :         jbd2_journal_destroy_revoke_record_cache();
    3163           0 :         jbd2_journal_destroy_revoke_table_cache();
    3164           0 :         jbd2_journal_destroy_journal_head_cache();
    3165           0 :         jbd2_journal_destroy_handle_cache();
    3166           0 :         jbd2_journal_destroy_inode_cache();
    3167           0 :         jbd2_journal_destroy_transaction_cache();
    3168           0 :         jbd2_journal_destroy_slabs();
    3169           0 : }
    3170             : 
    3171          12 : static int __init journal_init(void)
    3172             : {
    3173          12 :         int ret;
    3174             : 
    3175          12 :         BUILD_BUG_ON(sizeof(struct journal_superblock_s) != 1024);
    3176             : 
    3177          12 :         ret = journal_init_caches();
    3178          12 :         if (ret == 0) {
    3179          12 :                 jbd2_create_jbd_stats_proc_entry();
    3180             :         } else {
    3181           0 :                 jbd2_journal_destroy_caches();
    3182             :         }
    3183          12 :         return ret;
    3184             : }
    3185             : 
    3186           0 : static void __exit journal_exit(void)
    3187             : {
    3188             : #ifdef CONFIG_JBD2_DEBUG
    3189             :         int n = atomic_read(&nr_journal_heads);
    3190             :         if (n)
    3191             :                 printk(KERN_ERR "JBD2: leaked %d journal_heads!\n", n);
    3192             : #endif
    3193           0 :         jbd2_remove_jbd_stats_proc_entry();
    3194           0 :         jbd2_journal_destroy_caches();
    3195           0 : }
    3196             : 
    3197             : MODULE_LICENSE("GPL");
    3198             : module_init(journal_init);
    3199             : module_exit(journal_exit);
    3200             :

Generated by: LCOV version 1.14