LCOV - fstests of 6.5.0-rc3-djwx @ Mon Jul 31 20:08:22 PDT 2023

LCOV - code coverage report

Current view:	top level - fs/btrfs - raid56.c (source / functions)		Hit	Total	Coverage
Test:	fstests of 6.5.0-rc3-djwx @ Mon Jul 31 20:08:22 PDT 2023	Lines:	33	1248	2.6 %
Date:	2023-07-31 20:08:22	Functions:	3	71	4.2 %

          Line data    Source code

       1             : // SPDX-License-Identifier: GPL-2.0
       2             : /*
       3             :  * Copyright (C) 2012 Fusion-io  All rights reserved.
       4             :  * Copyright (C) 2012 Intel Corp. All rights reserved.
       5             :  */
       6             : 
       7             : #include <linux/sched.h>
       8             : #include <linux/bio.h>
       9             : #include <linux/slab.h>
      10             : #include <linux/blkdev.h>
      11             : #include <linux/raid/pq.h>
      12             : #include <linux/hash.h>
      13             : #include <linux/list_sort.h>
      14             : #include <linux/raid/xor.h>
      15             : #include <linux/mm.h>
      16             : #include "messages.h"
      17             : #include "misc.h"
      18             : #include "ctree.h"
      19             : #include "disk-io.h"
      20             : #include "volumes.h"
      21             : #include "raid56.h"
      22             : #include "async-thread.h"
      23             : #include "file-item.h"
      24             : #include "btrfs_inode.h"
      25             : 
      26             : /* set when additional merges to this rbio are not allowed */
      27             : #define RBIO_RMW_LOCKED_BIT     1
      28             : 
      29             : /*
      30             :  * set when this rbio is sitting in the hash, but it is just a cache
      31             :  * of past RMW
      32             :  */
      33             : #define RBIO_CACHE_BIT          2
      34             : 
      35             : /*
      36             :  * set when it is safe to trust the stripe_pages for caching
      37             :  */
      38             : #define RBIO_CACHE_READY_BIT    3
      39             : 
      40             : #define RBIO_CACHE_SIZE 1024
      41             : 
      42             : #define BTRFS_STRIPE_HASH_TABLE_BITS                            11
      43             : 
      44             : /* Used by the raid56 code to lock stripes for read/modify/write */
      45             : struct btrfs_stripe_hash {
      46             :         struct list_head hash_list;
      47             :         spinlock_t lock;
      48             : };
      49             : 
      50             : /* Used by the raid56 code to lock stripes for read/modify/write */
      51             : struct btrfs_stripe_hash_table {
      52             :         struct list_head stripe_cache;
      53             :         spinlock_t cache_lock;
      54             :         int cache_size;
      55             :         struct btrfs_stripe_hash table[];
      56             : };
      57             : 
      58             : /*
      59             :  * A bvec like structure to present a sector inside a page.
      60             :  *
      61             :  * Unlike bvec we don't need bvlen, as it's fixed to sectorsize.
      62             :  */
      63             : struct sector_ptr {
      64             :         struct page *page;
      65             :         unsigned int pgoff:24;
      66             :         unsigned int uptodate:8;
      67             : };
      68             : 
      69             : static void rmw_rbio_work(struct work_struct *work);
      70             : static void rmw_rbio_work_locked(struct work_struct *work);
      71             : static void index_rbio_pages(struct btrfs_raid_bio *rbio);
      72             : static int alloc_rbio_pages(struct btrfs_raid_bio *rbio);
      73             : 
      74             : static int finish_parity_scrub(struct btrfs_raid_bio *rbio);
      75             : static void scrub_rbio_work_locked(struct work_struct *work);
      76             : 
      77           0 : static void free_raid_bio_pointers(struct btrfs_raid_bio *rbio)
      78             : {
      79           0 :         bitmap_free(rbio->error_bitmap);
      80           0 :         kfree(rbio->stripe_pages);
      81           0 :         kfree(rbio->bio_sectors);
      82           0 :         kfree(rbio->stripe_sectors);
      83           0 :         kfree(rbio->finish_pointers);
      84           0 : }
      85             : 
      86           0 : static void free_raid_bio(struct btrfs_raid_bio *rbio)
      87             : {
      88           0 :         int i;
      89             : 
      90           0 :         if (!refcount_dec_and_test(&rbio->refs))
      91             :                 return;
      92             : 
      93           0 :         WARN_ON(!list_empty(&rbio->stripe_cache));
      94           0 :         WARN_ON(!list_empty(&rbio->hash_list));
      95           0 :         WARN_ON(!bio_list_empty(&rbio->bio_list));
      96             : 
      97           0 :         for (i = 0; i < rbio->nr_pages; i++) {
      98           0 :                 if (rbio->stripe_pages[i]) {
      99           0 :                         __free_page(rbio->stripe_pages[i]);
     100           0 :                         rbio->stripe_pages[i] = NULL;
     101             :                 }
     102             :         }
     103             : 
     104           0 :         btrfs_put_bioc(rbio->bioc);
     105           0 :         free_raid_bio_pointers(rbio);
     106           0 :         kfree(rbio);
     107             : }
     108             : 
     109           0 : static void start_async_work(struct btrfs_raid_bio *rbio, work_func_t work_func)
     110             : {
     111           0 :         INIT_WORK(&rbio->work, work_func);
     112           0 :         queue_work(rbio->bioc->fs_info->rmw_workers, &rbio->work);
     113           0 : }
     114             : 
     115             : /*
     116             :  * the stripe hash table is used for locking, and to collect
     117             :  * bios in hopes of making a full stripe
     118             :  */
     119        3242 : int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
     120             : {
     121        3242 :         struct btrfs_stripe_hash_table *table;
     122        3242 :         struct btrfs_stripe_hash_table *x;
     123        3242 :         struct btrfs_stripe_hash *cur;
     124        3242 :         struct btrfs_stripe_hash *h;
     125        3242 :         int num_entries = 1 << BTRFS_STRIPE_HASH_TABLE_BITS;
     126        3242 :         int i;
     127             : 
     128        3242 :         if (info->stripe_hash_table)
     129             :                 return 0;
     130             : 
     131             :         /*
     132             :          * The table is large, starting with order 4 and can go as high as
     133             :          * order 7 in case lock debugging is turned on.
     134             :          *
     135             :          * Try harder to allocate and fallback to vmalloc to lower the chance
     136             :          * of a failing mount.
     137             :          */
     138        3242 :         table = kvzalloc(struct_size(table, table, num_entries), GFP_KERNEL);
     139        3242 :         if (!table)
     140             :                 return -ENOMEM;
     141             : 
     142        3242 :         spin_lock_init(&table->cache_lock);
     143        3242 :         INIT_LIST_HEAD(&table->stripe_cache);
     144             : 
     145        3242 :         h = table->table;
     146             : 
     147     6642858 :         for (i = 0; i < num_entries; i++) {
     148     6639616 :                 cur = h + i;
     149     6639616 :                 INIT_LIST_HEAD(&cur->hash_list);
     150     6639616 :                 spin_lock_init(&cur->lock);
     151             :         }
     152             : 
     153        3242 :         x = cmpxchg(&info->stripe_hash_table, NULL, table);
     154        3242 :         kvfree(x);
     155        3242 :         return 0;
     156             : }
     157             : 
     158             : /*
     159             :  * caching an rbio means to copy anything from the
     160             :  * bio_sectors array into the stripe_pages array.  We
     161             :  * use the page uptodate bit in the stripe cache array
     162             :  * to indicate if it has valid data
     163             :  *
     164             :  * once the caching is done, we set the cache ready
     165             :  * bit.
     166             :  */
     167           0 : static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
     168             : {
     169           0 :         int i;
     170           0 :         int ret;
     171             : 
     172           0 :         ret = alloc_rbio_pages(rbio);
     173           0 :         if (ret)
     174             :                 return;
     175             : 
     176           0 :         for (i = 0; i < rbio->nr_sectors; i++) {
     177             :                 /* Some range not covered by bio (partial write), skip it */
     178           0 :                 if (!rbio->bio_sectors[i].page) {
     179             :                         /*
     180             :                          * Even if the sector is not covered by bio, if it is
     181             :                          * a data sector it should still be uptodate as it is
     182             :                          * read from disk.
     183             :                          */
     184           0 :                         if (i < rbio->nr_data * rbio->stripe_nsectors)
     185             :                                 ASSERT(rbio->stripe_sectors[i].uptodate);
     186           0 :                         continue;
     187             :                 }
     188             : 
     189           0 :                 ASSERT(rbio->stripe_sectors[i].page);
     190           0 :                 memcpy_page(rbio->stripe_sectors[i].page,
     191           0 :                             rbio->stripe_sectors[i].pgoff,
     192             :                             rbio->bio_sectors[i].page,
     193           0 :                             rbio->bio_sectors[i].pgoff,
     194           0 :                             rbio->bioc->fs_info->sectorsize);
     195           0 :                 rbio->stripe_sectors[i].uptodate = 1;
     196             :         }
     197           0 :         set_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
     198             : }
     199             : 
     200             : /*
     201             :  * we hash on the first logical address of the stripe
     202             :  */
     203             : static int rbio_bucket(struct btrfs_raid_bio *rbio)
     204             : {
     205           0 :         u64 num = rbio->bioc->full_stripe_logical;
     206             : 
     207             :         /*
     208             :          * we shift down quite a bit.  We're using byte
     209             :          * addressing, and most of the lower bits are zeros.
     210             :          * This tends to upset hash_64, and it consistently
     211             :          * returns just one or two different values.
     212             :          *
     213             :          * shifting off the lower bits fixes things.
     214             :          */
     215           0 :         return hash_64(num >> 16, BTRFS_STRIPE_HASH_TABLE_BITS);
     216             : }
     217             : 
     218             : static bool full_page_sectors_uptodate(struct btrfs_raid_bio *rbio,
     219             :                                        unsigned int page_nr)
     220             : {
     221             :         const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
     222             :         const u32 sectors_per_page = PAGE_SIZE / sectorsize;
     223             :         int i;
     224             : 
     225             :         ASSERT(page_nr < rbio->nr_pages);
     226             : 
     227             :         for (i = sectors_per_page * page_nr;
     228             :              i < sectors_per_page * page_nr + sectors_per_page;
     229             :              i++) {
     230             :                 if (!rbio->stripe_sectors[i].uptodate)
     231             :                         return false;
     232             :         }
     233             :         return true;
     234             : }
     235             : 
     236             : /*
     237             :  * Update the stripe_sectors[] array to use correct page and pgoff
     238             :  *
     239             :  * Should be called every time any page pointer in stripes_pages[] got modified.
     240             :  */
     241           0 : static void index_stripe_sectors(struct btrfs_raid_bio *rbio)
     242             : {
     243           0 :         const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
     244           0 :         u32 offset;
     245           0 :         int i;
     246             : 
     247           0 :         for (i = 0, offset = 0; i < rbio->nr_sectors; i++, offset += sectorsize) {
     248           0 :                 int page_index = offset >> PAGE_SHIFT;
     249             : 
     250           0 :                 ASSERT(page_index < rbio->nr_pages);
     251           0 :                 rbio->stripe_sectors[i].page = rbio->stripe_pages[page_index];
     252           0 :                 rbio->stripe_sectors[i].pgoff = offset_in_page(offset);
     253             :         }
     254           0 : }
     255             : 
     256           0 : static void steal_rbio_page(struct btrfs_raid_bio *src,
     257             :                             struct btrfs_raid_bio *dest, int page_nr)
     258             : {
     259           0 :         const u32 sectorsize = src->bioc->fs_info->sectorsize;
     260           0 :         const u32 sectors_per_page = PAGE_SIZE / sectorsize;
     261           0 :         int i;
     262             : 
     263           0 :         if (dest->stripe_pages[page_nr])
     264           0 :                 __free_page(dest->stripe_pages[page_nr]);
     265           0 :         dest->stripe_pages[page_nr] = src->stripe_pages[page_nr];
     266           0 :         src->stripe_pages[page_nr] = NULL;
     267             : 
     268             :         /* Also update the sector->uptodate bits. */
     269           0 :         for (i = sectors_per_page * page_nr;
     270           0 :              i < sectors_per_page * page_nr + sectors_per_page; i++)
     271           0 :                 dest->stripe_sectors[i].uptodate = true;
     272           0 : }
     273             : 
     274             : static bool is_data_stripe_page(struct btrfs_raid_bio *rbio, int page_nr)
     275             : {
     276           0 :         const int sector_nr = (page_nr << PAGE_SHIFT) >>
     277           0 :                               rbio->bioc->fs_info->sectorsize_bits;
     278             : 
     279             :         /*
     280             :          * We have ensured PAGE_SIZE is aligned with sectorsize, thus
     281             :          * we won't have a page which is half data half parity.
     282             :          *
     283             :          * Thus if the first sector of the page belongs to data stripes, then
     284             :          * the full page belongs to data stripes.
     285             :          */
     286           0 :         return (sector_nr < rbio->nr_data * rbio->stripe_nsectors);
     287             : }
     288             : 
     289             : /*
     290             :  * Stealing an rbio means taking all the uptodate pages from the stripe array
     291             :  * in the source rbio and putting them into the destination rbio.
     292             :  *
     293             :  * This will also update the involved stripe_sectors[] which are referring to
     294             :  * the old pages.
     295             :  */
     296           0 : static void steal_rbio(struct btrfs_raid_bio *src, struct btrfs_raid_bio *dest)
     297             : {
     298           0 :         int i;
     299             : 
     300           0 :         if (!test_bit(RBIO_CACHE_READY_BIT, &src->flags))
     301             :                 return;
     302             : 
     303           0 :         for (i = 0; i < dest->nr_pages; i++) {
     304           0 :                 struct page *p = src->stripe_pages[i];
     305             : 
     306             :                 /*
     307             :                  * We don't need to steal P/Q pages as they will always be
     308             :                  * regenerated for RMW or full write anyway.
     309             :                  */
     310           0 :                 if (!is_data_stripe_page(src, i))
     311           0 :                         continue;
     312             : 
     313             :                 /*
     314             :                  * If @src already has RBIO_CACHE_READY_BIT, it should have
     315             :                  * all data stripe pages present and uptodate.
     316             :                  */
     317           0 :                 ASSERT(p);
     318           0 :                 ASSERT(full_page_sectors_uptodate(src, i));
     319           0 :                 steal_rbio_page(src, dest, i);
     320             :         }
     321           0 :         index_stripe_sectors(dest);
     322           0 :         index_stripe_sectors(src);
     323             : }
     324             : 
     325             : /*
     326             :  * merging means we take the bio_list from the victim and
     327             :  * splice it into the destination.  The victim should
     328             :  * be discarded afterwards.
     329             :  *
     330             :  * must be called with dest->rbio_list_lock held
     331             :  */
     332           0 : static void merge_rbio(struct btrfs_raid_bio *dest,
     333             :                        struct btrfs_raid_bio *victim)
     334             : {
     335           0 :         bio_list_merge(&dest->bio_list, &victim->bio_list);
     336           0 :         dest->bio_list_bytes += victim->bio_list_bytes;
     337             :         /* Also inherit the bitmaps from @victim. */
     338           0 :         bitmap_or(&dest->dbitmap, &victim->dbitmap, &dest->dbitmap,
     339           0 :                   dest->stripe_nsectors);
     340           0 :         bio_list_init(&victim->bio_list);
     341           0 : }
     342             : 
     343             : /*
     344             :  * used to prune items that are in the cache.  The caller
     345             :  * must hold the hash table lock.
     346             :  */
     347           0 : static void __remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
     348             : {
     349           0 :         int bucket = rbio_bucket(rbio);
     350           0 :         struct btrfs_stripe_hash_table *table;
     351           0 :         struct btrfs_stripe_hash *h;
     352           0 :         int freeit = 0;
     353             : 
     354             :         /*
     355             :          * check the bit again under the hash table lock.
     356             :          */
     357           0 :         if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
     358             :                 return;
     359             : 
     360           0 :         table = rbio->bioc->fs_info->stripe_hash_table;
     361           0 :         h = table->table + bucket;
     362             : 
     363             :         /* hold the lock for the bucket because we may be
     364             :          * removing it from the hash table
     365             :          */
     366           0 :         spin_lock(&h->lock);
     367             : 
     368             :         /*
     369             :          * hold the lock for the bio list because we need
     370             :          * to make sure the bio list is empty
     371             :          */
     372           0 :         spin_lock(&rbio->bio_list_lock);
     373             : 
     374           0 :         if (test_and_clear_bit(RBIO_CACHE_BIT, &rbio->flags)) {
     375           0 :                 list_del_init(&rbio->stripe_cache);
     376           0 :                 table->cache_size -= 1;
     377           0 :                 freeit = 1;
     378             : 
     379             :                 /* if the bio list isn't empty, this rbio is
     380             :                  * still involved in an IO.  We take it out
     381             :                  * of the cache list, and drop the ref that
     382             :                  * was held for the list.
     383             :                  *
     384             :                  * If the bio_list was empty, we also remove
     385             :                  * the rbio from the hash_table, and drop
     386             :                  * the corresponding ref
     387             :                  */
     388           0 :                 if (bio_list_empty(&rbio->bio_list)) {
     389           0 :                         if (!list_empty(&rbio->hash_list)) {
     390           0 :                                 list_del_init(&rbio->hash_list);
     391           0 :                                 refcount_dec(&rbio->refs);
     392           0 :                                 BUG_ON(!list_empty(&rbio->plug_list));
     393             :                         }
     394             :                 }
     395             :         }
     396             : 
     397           0 :         spin_unlock(&rbio->bio_list_lock);
     398           0 :         spin_unlock(&h->lock);
     399             : 
     400           0 :         if (freeit)
     401           0 :                 free_raid_bio(rbio);
     402             : }
     403             : 
     404             : /*
     405             :  * prune a given rbio from the cache
     406             :  */
     407           0 : static void remove_rbio_from_cache(struct btrfs_raid_bio *rbio)
     408             : {
     409           0 :         struct btrfs_stripe_hash_table *table;
     410             : 
     411           0 :         if (!test_bit(RBIO_CACHE_BIT, &rbio->flags))
     412             :                 return;
     413             : 
     414           0 :         table = rbio->bioc->fs_info->stripe_hash_table;
     415             : 
     416           0 :         spin_lock(&table->cache_lock);
     417           0 :         __remove_rbio_from_cache(rbio);
     418           0 :         spin_unlock(&table->cache_lock);
     419             : }
     420             : 
     421             : /*
     422             :  * remove everything in the cache
     423             :  */
     424        3242 : static void btrfs_clear_rbio_cache(struct btrfs_fs_info *info)
     425             : {
     426        3242 :         struct btrfs_stripe_hash_table *table;
     427        3242 :         struct btrfs_raid_bio *rbio;
     428             : 
     429        3242 :         table = info->stripe_hash_table;
     430             : 
     431        3242 :         spin_lock(&table->cache_lock);
     432        3242 :         while (!list_empty(&table->stripe_cache)) {
     433           0 :                 rbio = list_entry(table->stripe_cache.next,
     434             :                                   struct btrfs_raid_bio,
     435             :                                   stripe_cache);
     436           0 :                 __remove_rbio_from_cache(rbio);
     437             :         }
     438        3242 :         spin_unlock(&table->cache_lock);
     439        3242 : }
     440             : 
     441             : /*
     442             :  * remove all cached entries and free the hash table
     443             :  * used by unmount
     444             :  */
     445        3472 : void btrfs_free_stripe_hash_table(struct btrfs_fs_info *info)
     446             : {
     447        3472 :         if (!info->stripe_hash_table)
     448             :                 return;
     449        3242 :         btrfs_clear_rbio_cache(info);
     450        3242 :         kvfree(info->stripe_hash_table);
     451        3242 :         info->stripe_hash_table = NULL;
     452             : }
     453             : 
     454             : /*
     455             :  * insert an rbio into the stripe cache.  It
     456             :  * must have already been prepared by calling
     457             :  * cache_rbio_pages
     458             :  *
     459             :  * If this rbio was already cached, it gets
     460             :  * moved to the front of the lru.
     461             :  *
     462             :  * If the size of the rbio cache is too big, we
     463             :  * prune an item.
     464             :  */
     465           0 : static void cache_rbio(struct btrfs_raid_bio *rbio)
     466             : {
     467           0 :         struct btrfs_stripe_hash_table *table;
     468             : 
     469           0 :         if (!test_bit(RBIO_CACHE_READY_BIT, &rbio->flags))
     470             :                 return;
     471             : 
     472           0 :         table = rbio->bioc->fs_info->stripe_hash_table;
     473             : 
     474           0 :         spin_lock(&table->cache_lock);
     475           0 :         spin_lock(&rbio->bio_list_lock);
     476             : 
     477             :         /* bump our ref if we were not in the list before */
     478           0 :         if (!test_and_set_bit(RBIO_CACHE_BIT, &rbio->flags))
     479           0 :                 refcount_inc(&rbio->refs);
     480             : 
     481           0 :         if (!list_empty(&rbio->stripe_cache)){
     482           0 :                 list_move(&rbio->stripe_cache, &table->stripe_cache);
     483             :         } else {
     484           0 :                 list_add(&rbio->stripe_cache, &table->stripe_cache);
     485           0 :                 table->cache_size += 1;
     486             :         }
     487             : 
     488           0 :         spin_unlock(&rbio->bio_list_lock);
     489             : 
     490           0 :         if (table->cache_size > RBIO_CACHE_SIZE) {
     491           0 :                 struct btrfs_raid_bio *found;
     492             : 
     493           0 :                 found = list_entry(table->stripe_cache.prev,
     494             :                                   struct btrfs_raid_bio,
     495             :                                   stripe_cache);
     496             : 
     497           0 :                 if (found != rbio)
     498           0 :                         __remove_rbio_from_cache(found);
     499             :         }
     500             : 
     501           0 :         spin_unlock(&table->cache_lock);
     502             : }
     503             : 
     504             : /*
     505             :  * helper function to run the xor_blocks api.  It is only
     506             :  * able to do MAX_XOR_BLOCKS at a time, so we need to
     507             :  * loop through.
     508             :  */
     509           0 : static void run_xor(void **pages, int src_cnt, ssize_t len)
     510             : {
     511           0 :         int src_off = 0;
     512           0 :         int xor_src_cnt = 0;
     513           0 :         void *dest = pages[src_cnt];
     514             : 
     515           0 :         while(src_cnt > 0) {
     516           0 :                 xor_src_cnt = min(src_cnt, MAX_XOR_BLOCKS);
     517           0 :                 xor_blocks(xor_src_cnt, len, dest, pages + src_off);
     518             : 
     519           0 :                 src_cnt -= xor_src_cnt;
     520           0 :                 src_off += xor_src_cnt;
     521             :         }
     522           0 : }
     523             : 
     524             : /*
     525             :  * Returns true if the bio list inside this rbio covers an entire stripe (no
     526             :  * rmw required).
     527             :  */
     528           0 : static int rbio_is_full(struct btrfs_raid_bio *rbio)
     529             : {
     530           0 :         unsigned long size = rbio->bio_list_bytes;
     531           0 :         int ret = 1;
     532             : 
     533           0 :         spin_lock(&rbio->bio_list_lock);
     534           0 :         if (size != rbio->nr_data * BTRFS_STRIPE_LEN)
     535           0 :                 ret = 0;
     536           0 :         BUG_ON(size > rbio->nr_data * BTRFS_STRIPE_LEN);
     537           0 :         spin_unlock(&rbio->bio_list_lock);
     538             : 
     539           0 :         return ret;
     540             : }
     541             : 
     542             : /*
     543             :  * returns 1 if it is safe to merge two rbios together.
     544             :  * The merging is safe if the two rbios correspond to
     545             :  * the same stripe and if they are both going in the same
     546             :  * direction (read vs write), and if neither one is
     547             :  * locked for final IO
     548             :  *
     549             :  * The caller is responsible for locking such that
     550             :  * rmw_locked is safe to test
     551             :  */
     552           0 : static int rbio_can_merge(struct btrfs_raid_bio *last,
     553             :                           struct btrfs_raid_bio *cur)
     554             : {
     555           0 :         if (test_bit(RBIO_RMW_LOCKED_BIT, &last->flags) ||
     556           0 :             test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags))
     557             :                 return 0;
     558             : 
     559             :         /*
     560             :          * we can't merge with cached rbios, since the
     561             :          * idea is that when we merge the destination
     562             :          * rbio is going to run our IO for us.  We can
     563             :          * steal from cached rbios though, other functions
     564             :          * handle that.
     565             :          */
     566           0 :         if (test_bit(RBIO_CACHE_BIT, &last->flags) ||
     567           0 :             test_bit(RBIO_CACHE_BIT, &cur->flags))
     568             :                 return 0;
     569             : 
     570           0 :         if (last->bioc->full_stripe_logical != cur->bioc->full_stripe_logical)
     571             :                 return 0;
     572             : 
     573             :         /* we can't merge with different operations */
     574           0 :         if (last->operation != cur->operation)
     575             :                 return 0;
     576             :         /*
     577             :          * We've need read the full stripe from the drive.
     578             :          * check and repair the parity and write the new results.
     579             :          *
     580             :          * We're not allowed to add any new bios to the
     581             :          * bio list here, anyone else that wants to
     582             :          * change this stripe needs to do their own rmw.
     583             :          */
     584           0 :         if (last->operation == BTRFS_RBIO_PARITY_SCRUB)
     585             :                 return 0;
     586             : 
     587           0 :         if (last->operation == BTRFS_RBIO_REBUILD_MISSING ||
     588             :             last->operation == BTRFS_RBIO_READ_REBUILD)
     589           0 :                 return 0;
     590             : 
     591             :         return 1;
     592             : }
     593             : 
     594             : static unsigned int rbio_stripe_sector_index(const struct btrfs_raid_bio *rbio,
     595             :                                              unsigned int stripe_nr,
     596             :                                              unsigned int sector_nr)
     597             : {
     598           0 :         ASSERT(stripe_nr < rbio->real_stripes);
     599           0 :         ASSERT(sector_nr < rbio->stripe_nsectors);
     600             : 
     601           0 :         return stripe_nr * rbio->stripe_nsectors + sector_nr;
     602             : }
     603             : 
     604             : /* Return a sector from rbio->stripe_sectors, not from the bio list */
     605             : static struct sector_ptr *rbio_stripe_sector(const struct btrfs_raid_bio *rbio,
     606             :                                              unsigned int stripe_nr,
     607             :                                              unsigned int sector_nr)
     608             : {
     609           0 :         return &rbio->stripe_sectors[rbio_stripe_sector_index(rbio, stripe_nr,
     610             :                                                               sector_nr)];
     611             : }
     612             : 
     613             : /* Grab a sector inside P stripe */
     614             : static struct sector_ptr *rbio_pstripe_sector(const struct btrfs_raid_bio *rbio,
     615             :                                               unsigned int sector_nr)
     616             : {
     617           0 :         return rbio_stripe_sector(rbio, rbio->nr_data, sector_nr);
     618             : }
     619             : 
     620             : /* Grab a sector inside Q stripe, return NULL if not RAID6 */
     621             : static struct sector_ptr *rbio_qstripe_sector(const struct btrfs_raid_bio *rbio,
     622             :                                               unsigned int sector_nr)
     623             : {
     624           0 :         if (rbio->nr_data + 1 == rbio->real_stripes)
     625             :                 return NULL;
     626           0 :         return rbio_stripe_sector(rbio, rbio->nr_data + 1, sector_nr);
     627             : }
     628             : 
     629             : /*
     630             :  * The first stripe in the table for a logical address
     631             :  * has the lock.  rbios are added in one of three ways:
     632             :  *
     633             :  * 1) Nobody has the stripe locked yet.  The rbio is given
     634             :  * the lock and 0 is returned.  The caller must start the IO
     635             :  * themselves.
     636             :  *
     637             :  * 2) Someone has the stripe locked, but we're able to merge
     638             :  * with the lock owner.  The rbio is freed and the IO will
     639             :  * start automatically along with the existing rbio.  1 is returned.
     640             :  *
     641             :  * 3) Someone has the stripe locked, but we're not able to merge.
     642             :  * The rbio is added to the lock owner's plug list, or merged into
     643             :  * an rbio already on the plug list.  When the lock owner unlocks,
     644             :  * the next rbio on the list is run and the IO is started automatically.
     645             :  * 1 is returned
     646             :  *
     647             :  * If we return 0, the caller still owns the rbio and must continue with
     648             :  * IO submission.  If we return 1, the caller must assume the rbio has
     649             :  * already been freed.
     650             :  */
     651           0 : static noinline int lock_stripe_add(struct btrfs_raid_bio *rbio)
     652             : {
     653           0 :         struct btrfs_stripe_hash *h;
     654           0 :         struct btrfs_raid_bio *cur;
     655           0 :         struct btrfs_raid_bio *pending;
     656           0 :         struct btrfs_raid_bio *freeit = NULL;
     657           0 :         struct btrfs_raid_bio *cache_drop = NULL;
     658           0 :         int ret = 0;
     659             : 
     660           0 :         h = rbio->bioc->fs_info->stripe_hash_table->table + rbio_bucket(rbio);
     661             : 
     662           0 :         spin_lock(&h->lock);
     663           0 :         list_for_each_entry(cur, &h->hash_list, hash_list) {
     664           0 :                 if (cur->bioc->full_stripe_logical != rbio->bioc->full_stripe_logical)
     665           0 :                         continue;
     666             : 
     667           0 :                 spin_lock(&cur->bio_list_lock);
     668             : 
     669             :                 /* Can we steal this cached rbio's pages? */
     670           0 :                 if (bio_list_empty(&cur->bio_list) &&
     671           0 :                     list_empty(&cur->plug_list) &&
     672           0 :                     test_bit(RBIO_CACHE_BIT, &cur->flags) &&
     673           0 :                     !test_bit(RBIO_RMW_LOCKED_BIT, &cur->flags)) {
     674           0 :                         list_del_init(&cur->hash_list);
     675           0 :                         refcount_dec(&cur->refs);
     676             : 
     677           0 :                         steal_rbio(cur, rbio);
     678           0 :                         cache_drop = cur;
     679           0 :                         spin_unlock(&cur->bio_list_lock);
     680             : 
     681           0 :                         goto lockit;
     682             :                 }
     683             : 
     684             :                 /* Can we merge into the lock owner? */
     685           0 :                 if (rbio_can_merge(cur, rbio)) {
     686           0 :                         merge_rbio(cur, rbio);
     687           0 :                         spin_unlock(&cur->bio_list_lock);
     688           0 :                         freeit = rbio;
     689           0 :                         ret = 1;
     690           0 :                         goto out;
     691             :                 }
     692             : 
     693             : 
     694             :                 /*
     695             :                  * We couldn't merge with the running rbio, see if we can merge
     696             :                  * with the pending ones.  We don't have to check for rmw_locked
     697             :                  * because there is no way they are inside finish_rmw right now
     698             :                  */
     699           0 :                 list_for_each_entry(pending, &cur->plug_list, plug_list) {
     700           0 :                         if (rbio_can_merge(pending, rbio)) {
     701           0 :                                 merge_rbio(pending, rbio);
     702           0 :                                 spin_unlock(&cur->bio_list_lock);
     703           0 :                                 freeit = rbio;
     704           0 :                                 ret = 1;
     705           0 :                                 goto out;
     706             :                         }
     707             :                 }
     708             : 
     709             :                 /*
     710             :                  * No merging, put us on the tail of the plug list, our rbio
     711             :                  * will be started with the currently running rbio unlocks
     712             :                  */
     713           0 :                 list_add_tail(&rbio->plug_list, &cur->plug_list);
     714           0 :                 spin_unlock(&cur->bio_list_lock);
     715           0 :                 ret = 1;
     716           0 :                 goto out;
     717             :         }
     718           0 : lockit:
     719           0 :         refcount_inc(&rbio->refs);
     720           0 :         list_add(&rbio->hash_list, &h->hash_list);
     721           0 : out:
     722           0 :         spin_unlock(&h->lock);
     723           0 :         if (cache_drop)
     724           0 :                 remove_rbio_from_cache(cache_drop);
     725           0 :         if (freeit)
     726           0 :                 free_raid_bio(freeit);
     727           0 :         return ret;
     728             : }
     729             : 
     730             : static void recover_rbio_work_locked(struct work_struct *work);
     731             : 
     732             : /*
     733             :  * called as rmw or parity rebuild is completed.  If the plug list has more
     734             :  * rbios waiting for this stripe, the next one on the list will be started
     735             :  */
     736           0 : static noinline void unlock_stripe(struct btrfs_raid_bio *rbio)
     737             : {
     738           0 :         int bucket;
     739           0 :         struct btrfs_stripe_hash *h;
     740           0 :         int keep_cache = 0;
     741             : 
     742           0 :         bucket = rbio_bucket(rbio);
     743           0 :         h = rbio->bioc->fs_info->stripe_hash_table->table + bucket;
     744             : 
     745           0 :         if (list_empty(&rbio->plug_list))
     746           0 :                 cache_rbio(rbio);
     747             : 
     748           0 :         spin_lock(&h->lock);
     749           0 :         spin_lock(&rbio->bio_list_lock);
     750             : 
     751           0 :         if (!list_empty(&rbio->hash_list)) {
     752             :                 /*
     753             :                  * if we're still cached and there is no other IO
     754             :                  * to perform, just leave this rbio here for others
     755             :                  * to steal from later
     756             :                  */
     757           0 :                 if (list_empty(&rbio->plug_list) &&
     758           0 :                     test_bit(RBIO_CACHE_BIT, &rbio->flags)) {
     759           0 :                         keep_cache = 1;
     760           0 :                         clear_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
     761           0 :                         BUG_ON(!bio_list_empty(&rbio->bio_list));
     762           0 :                         goto done;
     763             :                 }
     764             : 
     765           0 :                 list_del_init(&rbio->hash_list);
     766           0 :                 refcount_dec(&rbio->refs);
     767             : 
     768             :                 /*
     769             :                  * we use the plug list to hold all the rbios
     770             :                  * waiting for the chance to lock this stripe.
     771             :                  * hand the lock over to one of them.
     772             :                  */
     773           0 :                 if (!list_empty(&rbio->plug_list)) {
     774           0 :                         struct btrfs_raid_bio *next;
     775           0 :                         struct list_head *head = rbio->plug_list.next;
     776             : 
     777           0 :                         next = list_entry(head, struct btrfs_raid_bio,
     778             :                                           plug_list);
     779             : 
     780           0 :                         list_del_init(&rbio->plug_list);
     781             : 
     782           0 :                         list_add(&next->hash_list, &h->hash_list);
     783           0 :                         refcount_inc(&next->refs);
     784           0 :                         spin_unlock(&rbio->bio_list_lock);
     785           0 :                         spin_unlock(&h->lock);
     786             : 
     787           0 :                         if (next->operation == BTRFS_RBIO_READ_REBUILD)
     788           0 :                                 start_async_work(next, recover_rbio_work_locked);
     789           0 :                         else if (next->operation == BTRFS_RBIO_REBUILD_MISSING) {
     790           0 :                                 steal_rbio(rbio, next);
     791           0 :                                 start_async_work(next, recover_rbio_work_locked);
     792           0 :                         } else if (next->operation == BTRFS_RBIO_WRITE) {
     793           0 :                                 steal_rbio(rbio, next);
     794           0 :                                 start_async_work(next, rmw_rbio_work_locked);
     795           0 :                         } else if (next->operation == BTRFS_RBIO_PARITY_SCRUB) {
     796           0 :                                 steal_rbio(rbio, next);
     797           0 :                                 start_async_work(next, scrub_rbio_work_locked);
     798             :                         }
     799             : 
     800           0 :                         goto done_nolock;
     801             :                 }
     802             :         }
     803           0 : done:
     804           0 :         spin_unlock(&rbio->bio_list_lock);
     805           0 :         spin_unlock(&h->lock);
     806             : 
     807             : done_nolock:
     808           0 :         if (!keep_cache)
     809           0 :                 remove_rbio_from_cache(rbio);
     810           0 : }
     811             : 
     812           0 : static void rbio_endio_bio_list(struct bio *cur, blk_status_t err)
     813             : {
     814           0 :         struct bio *next;
     815             : 
     816           0 :         while (cur) {
     817           0 :                 next = cur->bi_next;
     818           0 :                 cur->bi_next = NULL;
     819           0 :                 cur->bi_status = err;
     820           0 :                 bio_endio(cur);
     821           0 :                 cur = next;
     822             :         }
     823           0 : }
     824             : 
     825             : /*
     826             :  * this frees the rbio and runs through all the bios in the
     827             :  * bio_list and calls end_io on them
     828             :  */
     829           0 : static void rbio_orig_end_io(struct btrfs_raid_bio *rbio, blk_status_t err)
     830             : {
     831           0 :         struct bio *cur = bio_list_get(&rbio->bio_list);
     832           0 :         struct bio *extra;
     833             : 
     834           0 :         kfree(rbio->csum_buf);
     835           0 :         bitmap_free(rbio->csum_bitmap);
     836           0 :         rbio->csum_buf = NULL;
     837           0 :         rbio->csum_bitmap = NULL;
     838             : 
     839             :         /*
     840             :          * Clear the data bitmap, as the rbio may be cached for later usage.
     841             :          * do this before before unlock_stripe() so there will be no new bio
     842             :          * for this bio.
     843             :          */
     844           0 :         bitmap_clear(&rbio->dbitmap, 0, rbio->stripe_nsectors);
     845             : 
     846             :         /*
     847             :          * At this moment, rbio->bio_list is empty, however since rbio does not
     848             :          * always have RBIO_RMW_LOCKED_BIT set and rbio is still linked on the
     849             :          * hash list, rbio may be merged with others so that rbio->bio_list
     850             :          * becomes non-empty.
     851             :          * Once unlock_stripe() is done, rbio->bio_list will not be updated any
     852             :          * more and we can call bio_endio() on all queued bios.
     853             :          */
     854           0 :         unlock_stripe(rbio);
     855           0 :         extra = bio_list_get(&rbio->bio_list);
     856           0 :         free_raid_bio(rbio);
     857             : 
     858           0 :         rbio_endio_bio_list(cur, err);
     859           0 :         if (extra)
     860           0 :                 rbio_endio_bio_list(extra, err);
     861           0 : }
     862             : 
     863             : /*
     864             :  * Get a sector pointer specified by its @stripe_nr and @sector_nr.
     865             :  *
     866             :  * @rbio:               The raid bio
     867             :  * @stripe_nr:          Stripe number, valid range [0, real_stripe)
     868             :  * @sector_nr:          Sector number inside the stripe,
     869             :  *                      valid range [0, stripe_nsectors)
     870             :  * @bio_list_only:      Whether to use sectors inside the bio list only.
     871             :  *
     872             :  * The read/modify/write code wants to reuse the original bio page as much
     873             :  * as possible, and only use stripe_sectors as fallback.
     874             :  */
     875           0 : static struct sector_ptr *sector_in_rbio(struct btrfs_raid_bio *rbio,
     876             :                                          int stripe_nr, int sector_nr,
     877             :                                          bool bio_list_only)
     878             : {
     879           0 :         struct sector_ptr *sector;
     880           0 :         int index;
     881             : 
     882           0 :         ASSERT(stripe_nr >= 0 && stripe_nr < rbio->real_stripes);
     883           0 :         ASSERT(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors);
     884             : 
     885           0 :         index = stripe_nr * rbio->stripe_nsectors + sector_nr;
     886           0 :         ASSERT(index >= 0 && index < rbio->nr_sectors);
     887             : 
     888           0 :         spin_lock(&rbio->bio_list_lock);
     889           0 :         sector = &rbio->bio_sectors[index];
     890           0 :         if (sector->page || bio_list_only) {
     891             :                 /* Don't return sector without a valid page pointer */
     892           0 :                 if (!sector->page)
     893           0 :                         sector = NULL;
     894           0 :                 spin_unlock(&rbio->bio_list_lock);
     895           0 :                 return sector;
     896             :         }
     897           0 :         spin_unlock(&rbio->bio_list_lock);
     898             : 
     899           0 :         return &rbio->stripe_sectors[index];
     900             : }
     901             : 
     902             : /*
     903             :  * allocation and initial setup for the btrfs_raid_bio.  Not
     904             :  * this does not allocate any pages for rbio->pages.
     905             :  */
     906           0 : static struct btrfs_raid_bio *alloc_rbio(struct btrfs_fs_info *fs_info,
     907             :                                          struct btrfs_io_context *bioc)
     908             : {
     909           0 :         const unsigned int real_stripes = bioc->num_stripes - bioc->replace_nr_stripes;
     910           0 :         const unsigned int stripe_npages = BTRFS_STRIPE_LEN >> PAGE_SHIFT;
     911           0 :         const unsigned int num_pages = stripe_npages * real_stripes;
     912           0 :         const unsigned int stripe_nsectors =
     913           0 :                 BTRFS_STRIPE_LEN >> fs_info->sectorsize_bits;
     914           0 :         const unsigned int num_sectors = stripe_nsectors * real_stripes;
     915           0 :         struct btrfs_raid_bio *rbio;
     916             : 
     917             :         /* PAGE_SIZE must also be aligned to sectorsize for subpage support */
     918           0 :         ASSERT(IS_ALIGNED(PAGE_SIZE, fs_info->sectorsize));
     919             :         /*
     920             :          * Our current stripe len should be fixed to 64k thus stripe_nsectors
     921             :          * (at most 16) should be no larger than BITS_PER_LONG.
     922             :          */
     923           0 :         ASSERT(stripe_nsectors <= BITS_PER_LONG);
     924             : 
     925           0 :         rbio = kzalloc(sizeof(*rbio), GFP_NOFS);
     926           0 :         if (!rbio)
     927             :                 return ERR_PTR(-ENOMEM);
     928           0 :         rbio->stripe_pages = kcalloc(num_pages, sizeof(struct page *),
     929             :                                      GFP_NOFS);
     930           0 :         rbio->bio_sectors = kcalloc(num_sectors, sizeof(struct sector_ptr),
     931             :                                     GFP_NOFS);
     932           0 :         rbio->stripe_sectors = kcalloc(num_sectors, sizeof(struct sector_ptr),
     933             :                                        GFP_NOFS);
     934           0 :         rbio->finish_pointers = kcalloc(real_stripes, sizeof(void *), GFP_NOFS);
     935           0 :         rbio->error_bitmap = bitmap_zalloc(num_sectors, GFP_NOFS);
     936             : 
     937           0 :         if (!rbio->stripe_pages || !rbio->bio_sectors || !rbio->stripe_sectors ||
     938           0 :             !rbio->finish_pointers || !rbio->error_bitmap) {
     939           0 :                 free_raid_bio_pointers(rbio);
     940           0 :                 kfree(rbio);
     941           0 :                 return ERR_PTR(-ENOMEM);
     942             :         }
     943             : 
     944           0 :         bio_list_init(&rbio->bio_list);
     945           0 :         init_waitqueue_head(&rbio->io_wait);
     946           0 :         INIT_LIST_HEAD(&rbio->plug_list);
     947           0 :         spin_lock_init(&rbio->bio_list_lock);
     948           0 :         INIT_LIST_HEAD(&rbio->stripe_cache);
     949           0 :         INIT_LIST_HEAD(&rbio->hash_list);
     950           0 :         btrfs_get_bioc(bioc);
     951           0 :         rbio->bioc = bioc;
     952           0 :         rbio->nr_pages = num_pages;
     953           0 :         rbio->nr_sectors = num_sectors;
     954           0 :         rbio->real_stripes = real_stripes;
     955           0 :         rbio->stripe_npages = stripe_npages;
     956           0 :         rbio->stripe_nsectors = stripe_nsectors;
     957           0 :         refcount_set(&rbio->refs, 1);
     958           0 :         atomic_set(&rbio->stripes_pending, 0);
     959             : 
     960           0 :         ASSERT(btrfs_nr_parity_stripes(bioc->map_type));
     961           0 :         rbio->nr_data = real_stripes - btrfs_nr_parity_stripes(bioc->map_type);
     962             : 
     963           0 :         return rbio;
     964             : }
     965             : 
     966             : /* allocate pages for all the stripes in the bio, including parity */
     967           0 : static int alloc_rbio_pages(struct btrfs_raid_bio *rbio)
     968             : {
     969           0 :         int ret;
     970             : 
     971           0 :         ret = btrfs_alloc_page_array(rbio->nr_pages, rbio->stripe_pages);
     972           0 :         if (ret < 0)
     973             :                 return ret;
     974             :         /* Mapping all sectors */
     975           0 :         index_stripe_sectors(rbio);
     976           0 :         return 0;
     977             : }
     978             : 
     979             : /* only allocate pages for p/q stripes */
     980           0 : static int alloc_rbio_parity_pages(struct btrfs_raid_bio *rbio)
     981             : {
     982           0 :         const int data_pages = rbio->nr_data * rbio->stripe_npages;
     983           0 :         int ret;
     984             : 
     985           0 :         ret = btrfs_alloc_page_array(rbio->nr_pages - data_pages,
     986           0 :                                      rbio->stripe_pages + data_pages);
     987           0 :         if (ret < 0)
     988             :                 return ret;
     989             : 
     990           0 :         index_stripe_sectors(rbio);
     991           0 :         return 0;
     992             : }
     993             : 
     994             : /*
     995             :  * Return the total number of errors found in the vertical stripe of @sector_nr.
     996             :  *
     997             :  * @faila and @failb will also be updated to the first and second stripe
     998             :  * number of the errors.
     999             :  */
    1000           0 : static int get_rbio_veritical_errors(struct btrfs_raid_bio *rbio, int sector_nr,
    1001             :                                      int *faila, int *failb)
    1002             : {
    1003           0 :         int stripe_nr;
    1004           0 :         int found_errors = 0;
    1005             : 
    1006           0 :         if (faila || failb) {
    1007             :                 /*
    1008             :                  * Both @faila and @failb should be valid pointers if any of
    1009             :                  * them is specified.
    1010             :                  */
    1011           0 :                 ASSERT(faila && failb);
    1012           0 :                 *faila = -1;
    1013           0 :                 *failb = -1;
    1014             :         }
    1015             : 
    1016           0 :         for (stripe_nr = 0; stripe_nr < rbio->real_stripes; stripe_nr++) {
    1017           0 :                 int total_sector_nr = stripe_nr * rbio->stripe_nsectors + sector_nr;
    1018             : 
    1019           0 :                 if (test_bit(total_sector_nr, rbio->error_bitmap)) {
    1020           0 :                         found_errors++;
    1021           0 :                         if (faila) {
    1022             :                                 /* Update faila and failb. */
    1023           0 :                                 if (*faila < 0)
    1024           0 :                                         *faila = stripe_nr;
    1025           0 :                                 else if (*failb < 0)
    1026           0 :                                         *failb = stripe_nr;
    1027             :                         }
    1028             :                 }
    1029             :         }
    1030           0 :         return found_errors;
    1031             : }
    1032             : 
    1033             : /*
    1034             :  * Add a single sector @sector into our list of bios for IO.
    1035             :  *
    1036             :  * Return 0 if everything went well.
    1037             :  * Return <0 for error.
    1038             :  */
    1039           0 : static int rbio_add_io_sector(struct btrfs_raid_bio *rbio,
    1040             :                               struct bio_list *bio_list,
    1041             :                               struct sector_ptr *sector,
    1042             :                               unsigned int stripe_nr,
    1043             :                               unsigned int sector_nr,
    1044             :                               enum req_op op)
    1045             : {
    1046           0 :         const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
    1047           0 :         struct bio *last = bio_list->tail;
    1048           0 :         int ret;
    1049           0 :         struct bio *bio;
    1050           0 :         struct btrfs_io_stripe *stripe;
    1051           0 :         u64 disk_start;
    1052             : 
    1053             :         /*
    1054             :          * Note: here stripe_nr has taken device replace into consideration,
    1055             :          * thus it can be larger than rbio->real_stripe.
    1056             :          * So here we check against bioc->num_stripes, not rbio->real_stripes.
    1057             :          */
    1058           0 :         ASSERT(stripe_nr >= 0 && stripe_nr < rbio->bioc->num_stripes);
    1059           0 :         ASSERT(sector_nr >= 0 && sector_nr < rbio->stripe_nsectors);
    1060           0 :         ASSERT(sector->page);
    1061             : 
    1062           0 :         stripe = &rbio->bioc->stripes[stripe_nr];
    1063           0 :         disk_start = stripe->physical + sector_nr * sectorsize;
    1064             : 
    1065             :         /* if the device is missing, just fail this stripe */
    1066           0 :         if (!stripe->dev->bdev) {
    1067           0 :                 int found_errors;
    1068             : 
    1069           0 :                 set_bit(stripe_nr * rbio->stripe_nsectors + sector_nr,
    1070           0 :                         rbio->error_bitmap);
    1071             : 
    1072             :                 /* Check if we have reached tolerance early. */
    1073           0 :                 found_errors = get_rbio_veritical_errors(rbio, sector_nr,
    1074             :                                                          NULL, NULL);
    1075           0 :                 if (found_errors > rbio->bioc->max_errors)
    1076             :                         return -EIO;
    1077           0 :                 return 0;
    1078             :         }
    1079             : 
    1080             :         /* see if we can add this page onto our existing bio */
    1081           0 :         if (last) {
    1082           0 :                 u64 last_end = last->bi_iter.bi_sector << SECTOR_SHIFT;
    1083           0 :                 last_end += last->bi_iter.bi_size;
    1084             : 
    1085             :                 /*
    1086             :                  * we can't merge these if they are from different
    1087             :                  * devices or if they are not contiguous
    1088             :                  */
    1089           0 :                 if (last_end == disk_start && !last->bi_status &&
    1090           0 :                     last->bi_bdev == stripe->dev->bdev) {
    1091           0 :                         ret = bio_add_page(last, sector->page, sectorsize,
    1092           0 :                                            sector->pgoff);
    1093           0 :                         if (ret == sectorsize)
    1094             :                                 return 0;
    1095             :                 }
    1096             :         }
    1097             : 
    1098             :         /* put a new bio on the list */
    1099           0 :         bio = bio_alloc(stripe->dev->bdev,
    1100             :                         max(BTRFS_STRIPE_LEN >> PAGE_SHIFT, 1),
    1101             :                         op, GFP_NOFS);
    1102           0 :         bio->bi_iter.bi_sector = disk_start >> SECTOR_SHIFT;
    1103           0 :         bio->bi_private = rbio;
    1104             : 
    1105           0 :         __bio_add_page(bio, sector->page, sectorsize, sector->pgoff);
    1106           0 :         bio_list_add(bio_list, bio);
    1107           0 :         return 0;
    1108             : }
    1109             : 
    1110           0 : static void index_one_bio(struct btrfs_raid_bio *rbio, struct bio *bio)
    1111             : {
    1112           0 :         const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
    1113           0 :         struct bio_vec bvec;
    1114           0 :         struct bvec_iter iter;
    1115           0 :         u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) -
    1116           0 :                      rbio->bioc->full_stripe_logical;
    1117             : 
    1118           0 :         bio_for_each_segment(bvec, bio, iter) {
    1119           0 :                 u32 bvec_offset;
    1120             : 
    1121           0 :                 for (bvec_offset = 0; bvec_offset < bvec.bv_len;
    1122           0 :                      bvec_offset += sectorsize, offset += sectorsize) {
    1123           0 :                         int index = offset / sectorsize;
    1124           0 :                         struct sector_ptr *sector = &rbio->bio_sectors[index];
    1125             : 
    1126           0 :                         sector->page = bvec.bv_page;
    1127           0 :                         sector->pgoff = bvec.bv_offset + bvec_offset;
    1128           0 :                         ASSERT(sector->pgoff < PAGE_SIZE);
    1129             :                 }
    1130             :         }
    1131           0 : }
    1132             : 
    1133             : /*
    1134             :  * helper function to walk our bio list and populate the bio_pages array with
    1135             :  * the result.  This seems expensive, but it is faster than constantly
    1136             :  * searching through the bio list as we setup the IO in finish_rmw or stripe
    1137             :  * reconstruction.
    1138             :  *
    1139             :  * This must be called before you trust the answers from page_in_rbio
    1140             :  */
    1141           0 : static void index_rbio_pages(struct btrfs_raid_bio *rbio)
    1142             : {
    1143           0 :         struct bio *bio;
    1144             : 
    1145           0 :         spin_lock(&rbio->bio_list_lock);
    1146           0 :         bio_list_for_each(bio, &rbio->bio_list)
    1147           0 :                 index_one_bio(rbio, bio);
    1148             : 
    1149           0 :         spin_unlock(&rbio->bio_list_lock);
    1150           0 : }
    1151             : 
    1152           0 : static void bio_get_trace_info(struct btrfs_raid_bio *rbio, struct bio *bio,
    1153             :                                struct raid56_bio_trace_info *trace_info)
    1154             : {
    1155           0 :         const struct btrfs_io_context *bioc = rbio->bioc;
    1156           0 :         int i;
    1157             : 
    1158           0 :         ASSERT(bioc);
    1159             : 
    1160             :         /* We rely on bio->bi_bdev to find the stripe number. */
    1161           0 :         if (!bio->bi_bdev)
    1162           0 :                 goto not_found;
    1163             : 
    1164           0 :         for (i = 0; i < bioc->num_stripes; i++) {
    1165           0 :                 if (bio->bi_bdev != bioc->stripes[i].dev->bdev)
    1166           0 :                         continue;
    1167           0 :                 trace_info->stripe_nr = i;
    1168           0 :                 trace_info->devid = bioc->stripes[i].dev->devid;
    1169           0 :                 trace_info->offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) -
    1170           0 :                                      bioc->stripes[i].physical;
    1171           0 :                 return;
    1172             :         }
    1173             : 
    1174           0 : not_found:
    1175           0 :         trace_info->devid = -1;
    1176           0 :         trace_info->offset = -1;
    1177           0 :         trace_info->stripe_nr = -1;
    1178             : }
    1179             : 
    1180           0 : static inline void bio_list_put(struct bio_list *bio_list)
    1181             : {
    1182           0 :         struct bio *bio;
    1183             : 
    1184           0 :         while ((bio = bio_list_pop(bio_list)))
    1185           0 :                 bio_put(bio);
    1186           0 : }
    1187             : 
    1188             : /* Generate PQ for one vertical stripe. */
    1189           0 : static void generate_pq_vertical(struct btrfs_raid_bio *rbio, int sectornr)
    1190             : {
    1191           0 :         void **pointers = rbio->finish_pointers;
    1192           0 :         const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
    1193           0 :         struct sector_ptr *sector;
    1194           0 :         int stripe;
    1195           0 :         const bool has_qstripe = rbio->bioc->map_type & BTRFS_BLOCK_GROUP_RAID6;
    1196             : 
    1197             :         /* First collect one sector from each data stripe */
    1198           0 :         for (stripe = 0; stripe < rbio->nr_data; stripe++) {
    1199           0 :                 sector = sector_in_rbio(rbio, stripe, sectornr, 0);
    1200           0 :                 pointers[stripe] = kmap_local_page(sector->page) +
    1201           0 :                                    sector->pgoff;
    1202             :         }
    1203             : 
    1204             :         /* Then add the parity stripe */
    1205           0 :         sector = rbio_pstripe_sector(rbio, sectornr);
    1206           0 :         sector->uptodate = 1;
    1207           0 :         pointers[stripe++] = kmap_local_page(sector->page) + sector->pgoff;
    1208             : 
    1209           0 :         if (has_qstripe) {
    1210             :                 /*
    1211             :                  * RAID6, add the qstripe and call the library function
    1212             :                  * to fill in our p/q
    1213             :                  */
    1214           0 :                 sector = rbio_qstripe_sector(rbio, sectornr);
    1215           0 :                 sector->uptodate = 1;
    1216           0 :                 pointers[stripe++] = kmap_local_page(sector->page) +
    1217           0 :                                      sector->pgoff;
    1218             : 
    1219           0 :                 raid6_call.gen_syndrome(rbio->real_stripes, sectorsize,
    1220             :                                         pointers);
    1221             :         } else {
    1222             :                 /* raid5 */
    1223           0 :                 memcpy(pointers[rbio->nr_data], pointers[0], sectorsize);
    1224           0 :                 run_xor(pointers + 1, rbio->nr_data - 1, sectorsize);
    1225             :         }
    1226           0 :         for (stripe = stripe - 1; stripe >= 0; stripe--)
    1227             :                 kunmap_local(pointers[stripe]);
    1228           0 : }
    1229             : 
    1230           0 : static int rmw_assemble_write_bios(struct btrfs_raid_bio *rbio,
    1231             :                                    struct bio_list *bio_list)
    1232             : {
    1233             :         /* The total sector number inside the full stripe. */
    1234           0 :         int total_sector_nr;
    1235           0 :         int sectornr;
    1236           0 :         int stripe;
    1237           0 :         int ret;
    1238             : 
    1239           0 :         ASSERT(bio_list_size(bio_list) == 0);
    1240             : 
    1241             :         /* We should have at least one data sector. */
    1242           0 :         ASSERT(bitmap_weight(&rbio->dbitmap, rbio->stripe_nsectors));
    1243             : 
    1244             :         /*
    1245             :          * Reset errors, as we may have errors inherited from from degraded
    1246             :          * write.
    1247             :          */
    1248           0 :         bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors);
    1249             : 
    1250             :         /*
    1251             :          * Start assembly.  Make bios for everything from the higher layers (the
    1252             :          * bio_list in our rbio) and our P/Q.  Ignore everything else.
    1253             :          */
    1254           0 :         for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors;
    1255           0 :              total_sector_nr++) {
    1256           0 :                 struct sector_ptr *sector;
    1257             : 
    1258           0 :                 stripe = total_sector_nr / rbio->stripe_nsectors;
    1259           0 :                 sectornr = total_sector_nr % rbio->stripe_nsectors;
    1260             : 
    1261             :                 /* This vertical stripe has no data, skip it. */
    1262           0 :                 if (!test_bit(sectornr, &rbio->dbitmap))
    1263           0 :                         continue;
    1264             : 
    1265           0 :                 if (stripe < rbio->nr_data) {
    1266           0 :                         sector = sector_in_rbio(rbio, stripe, sectornr, 1);
    1267           0 :                         if (!sector)
    1268           0 :                                 continue;
    1269             :                 } else {
    1270           0 :                         sector = rbio_stripe_sector(rbio, stripe, sectornr);
    1271             :                 }
    1272             : 
    1273           0 :                 ret = rbio_add_io_sector(rbio, bio_list, sector, stripe,
    1274             :                                          sectornr, REQ_OP_WRITE);
    1275           0 :                 if (ret)
    1276           0 :                         goto error;
    1277             :         }
    1278             : 
    1279           0 :         if (likely(!rbio->bioc->replace_nr_stripes))
    1280             :                 return 0;
    1281             : 
    1282             :         /*
    1283             :          * Make a copy for the replace target device.
    1284             :          *
    1285             :          * Thus the source stripe number (in replace_stripe_src) should be valid.
    1286             :          */
    1287             :         ASSERT(rbio->bioc->replace_stripe_src >= 0);
    1288             : 
    1289           0 :         for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors;
    1290           0 :              total_sector_nr++) {
    1291           0 :                 struct sector_ptr *sector;
    1292             : 
    1293           0 :                 stripe = total_sector_nr / rbio->stripe_nsectors;
    1294           0 :                 sectornr = total_sector_nr % rbio->stripe_nsectors;
    1295             : 
    1296             :                 /*
    1297             :                  * For RAID56, there is only one device that can be replaced,
    1298             :                  * and replace_stripe_src[0] indicates the stripe number we
    1299             :                  * need to copy from.
    1300             :                  */
    1301           0 :                 if (stripe != rbio->bioc->replace_stripe_src) {
    1302             :                         /*
    1303             :                          * We can skip the whole stripe completely, note
    1304             :                          * total_sector_nr will be increased by one anyway.
    1305             :                          */
    1306           0 :                         ASSERT(sectornr == 0);
    1307           0 :                         total_sector_nr += rbio->stripe_nsectors - 1;
    1308           0 :                         continue;
    1309             :                 }
    1310             : 
    1311             :                 /* This vertical stripe has no data, skip it. */
    1312           0 :                 if (!test_bit(sectornr, &rbio->dbitmap))
    1313           0 :                         continue;
    1314             : 
    1315           0 :                 if (stripe < rbio->nr_data) {
    1316           0 :                         sector = sector_in_rbio(rbio, stripe, sectornr, 1);
    1317           0 :                         if (!sector)
    1318           0 :                                 continue;
    1319             :                 } else {
    1320           0 :                         sector = rbio_stripe_sector(rbio, stripe, sectornr);
    1321             :                 }
    1322             : 
    1323           0 :                 ret = rbio_add_io_sector(rbio, bio_list, sector,
    1324           0 :                                          rbio->real_stripes,
    1325             :                                          sectornr, REQ_OP_WRITE);
    1326           0 :                 if (ret)
    1327           0 :                         goto error;
    1328             :         }
    1329             : 
    1330             :         return 0;
    1331           0 : error:
    1332           0 :         bio_list_put(bio_list);
    1333           0 :         return -EIO;
    1334             : }
    1335             : 
    1336           0 : static void set_rbio_range_error(struct btrfs_raid_bio *rbio, struct bio *bio)
    1337             : {
    1338           0 :         struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
    1339           0 :         u32 offset = (bio->bi_iter.bi_sector << SECTOR_SHIFT) -
    1340           0 :                      rbio->bioc->full_stripe_logical;
    1341           0 :         int total_nr_sector = offset >> fs_info->sectorsize_bits;
    1342             : 
    1343           0 :         ASSERT(total_nr_sector < rbio->nr_data * rbio->stripe_nsectors);
    1344             : 
    1345           0 :         bitmap_set(rbio->error_bitmap, total_nr_sector,
    1346           0 :                    bio->bi_iter.bi_size >> fs_info->sectorsize_bits);
    1347             : 
    1348             :         /*
    1349             :          * Special handling for raid56_alloc_missing_rbio() used by
    1350             :          * scrub/replace.  Unlike call path in raid56_parity_recover(), they
    1351             :          * pass an empty bio here.  Thus we have to find out the missing device
    1352             :          * and mark the stripe error instead.
    1353             :          */
    1354           0 :         if (bio->bi_iter.bi_size == 0) {
    1355             :                 bool found_missing = false;
    1356             :                 int stripe_nr;
    1357             : 
    1358           0 :                 for (stripe_nr = 0; stripe_nr < rbio->real_stripes; stripe_nr++) {
    1359           0 :                         if (!rbio->bioc->stripes[stripe_nr].dev->bdev) {
    1360           0 :                                 found_missing = true;
    1361           0 :                                 bitmap_set(rbio->error_bitmap,
    1362           0 :                                            stripe_nr * rbio->stripe_nsectors,
    1363           0 :                                            rbio->stripe_nsectors);
    1364             :                         }
    1365             :                 }
    1366           0 :                 ASSERT(found_missing);
    1367             :         }
    1368           0 : }
    1369             : 
    1370             : /*
    1371             :  * For subpage case, we can no longer set page Up-to-date directly for
    1372             :  * stripe_pages[], thus we need to locate the sector.
    1373             :  */
    1374           0 : static struct sector_ptr *find_stripe_sector(struct btrfs_raid_bio *rbio,
    1375             :                                              struct page *page,
    1376             :                                              unsigned int pgoff)
    1377             : {
    1378           0 :         int i;
    1379             : 
    1380           0 :         for (i = 0; i < rbio->nr_sectors; i++) {
    1381           0 :                 struct sector_ptr *sector = &rbio->stripe_sectors[i];
    1382             : 
    1383           0 :                 if (sector->page == page && sector->pgoff == pgoff)
    1384           0 :                         return sector;
    1385             :         }
    1386             :         return NULL;
    1387             : }
    1388             : 
    1389             : /*
    1390             :  * this sets each page in the bio uptodate.  It should only be used on private
    1391             :  * rbio pages, nothing that comes in from the higher layers
    1392             :  */
    1393           0 : static void set_bio_pages_uptodate(struct btrfs_raid_bio *rbio, struct bio *bio)
    1394             : {
    1395           0 :         const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
    1396           0 :         struct bio_vec *bvec;
    1397           0 :         struct bvec_iter_all iter_all;
    1398             : 
    1399           0 :         ASSERT(!bio_flagged(bio, BIO_CLONED));
    1400             : 
    1401           0 :         bio_for_each_segment_all(bvec, bio, iter_all) {
    1402           0 :                 struct sector_ptr *sector;
    1403           0 :                 int pgoff;
    1404             : 
    1405           0 :                 for (pgoff = bvec->bv_offset; pgoff - bvec->bv_offset < bvec->bv_len;
    1406           0 :                      pgoff += sectorsize) {
    1407           0 :                         sector = find_stripe_sector(rbio, bvec->bv_page, pgoff);
    1408           0 :                         ASSERT(sector);
    1409           0 :                         if (sector)
    1410           0 :                                 sector->uptodate = 1;
    1411             :                 }
    1412             :         }
    1413           0 : }
    1414             : 
    1415           0 : static int get_bio_sector_nr(struct btrfs_raid_bio *rbio, struct bio *bio)
    1416             : {
    1417           0 :         struct bio_vec *bv = bio_first_bvec_all(bio);
    1418           0 :         int i;
    1419             : 
    1420           0 :         for (i = 0; i < rbio->nr_sectors; i++) {
    1421           0 :                 struct sector_ptr *sector;
    1422             : 
    1423           0 :                 sector = &rbio->stripe_sectors[i];
    1424           0 :                 if (sector->page == bv->bv_page && sector->pgoff == bv->bv_offset)
    1425             :                         break;
    1426           0 :                 sector = &rbio->bio_sectors[i];
    1427           0 :                 if (sector->page == bv->bv_page && sector->pgoff == bv->bv_offset)
    1428             :                         break;
    1429             :         }
    1430           0 :         ASSERT(i < rbio->nr_sectors);
    1431           0 :         return i;
    1432             : }
    1433             : 
    1434           0 : static void rbio_update_error_bitmap(struct btrfs_raid_bio *rbio, struct bio *bio)
    1435             : {
    1436           0 :         int total_sector_nr = get_bio_sector_nr(rbio, bio);
    1437           0 :         u32 bio_size = 0;
    1438           0 :         struct bio_vec *bvec;
    1439           0 :         int i;
    1440             : 
    1441           0 :         bio_for_each_bvec_all(bvec, bio, i)
    1442           0 :                 bio_size += bvec->bv_len;
    1443             : 
    1444             :         /*
    1445             :          * Since we can have multiple bios touching the error_bitmap, we cannot
    1446             :          * call bitmap_set() without protection.
    1447             :          *
    1448             :          * Instead use set_bit() for each bit, as set_bit() itself is atomic.
    1449             :          */
    1450           0 :         for (i = total_sector_nr; i < total_sector_nr +
    1451           0 :              (bio_size >> rbio->bioc->fs_info->sectorsize_bits); i++)
    1452           0 :                 set_bit(i, rbio->error_bitmap);
    1453           0 : }
    1454             : 
    1455             : /* Verify the data sectors at read time. */
    1456           0 : static void verify_bio_data_sectors(struct btrfs_raid_bio *rbio,
    1457             :                                     struct bio *bio)
    1458             : {
    1459           0 :         struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
    1460           0 :         int total_sector_nr = get_bio_sector_nr(rbio, bio);
    1461           0 :         struct bio_vec *bvec;
    1462           0 :         struct bvec_iter_all iter_all;
    1463             : 
    1464             :         /* No data csum for the whole stripe, no need to verify. */
    1465           0 :         if (!rbio->csum_bitmap || !rbio->csum_buf)
    1466           0 :                 return;
    1467             : 
    1468             :         /* P/Q stripes, they have no data csum to verify against. */
    1469           0 :         if (total_sector_nr >= rbio->nr_data * rbio->stripe_nsectors)
    1470             :                 return;
    1471             : 
    1472           0 :         bio_for_each_segment_all(bvec, bio, iter_all) {
    1473           0 :                 int bv_offset;
    1474             : 
    1475           0 :                 for (bv_offset = bvec->bv_offset;
    1476           0 :                      bv_offset < bvec->bv_offset + bvec->bv_len;
    1477           0 :                      bv_offset += fs_info->sectorsize, total_sector_nr++) {
    1478           0 :                         u8 csum_buf[BTRFS_CSUM_SIZE];
    1479           0 :                         u8 *expected_csum = rbio->csum_buf +
    1480           0 :                                             total_sector_nr * fs_info->csum_size;
    1481           0 :                         int ret;
    1482             : 
    1483             :                         /* No csum for this sector, skip to the next sector. */
    1484           0 :                         if (!test_bit(total_sector_nr, rbio->csum_bitmap))
    1485           0 :                                 continue;
    1486             : 
    1487           0 :                         ret = btrfs_check_sector_csum(fs_info, bvec->bv_page,
    1488             :                                 bv_offset, csum_buf, expected_csum);
    1489           0 :                         if (ret < 0)
    1490           0 :                                 set_bit(total_sector_nr, rbio->error_bitmap);
    1491             :                 }
    1492             :         }
    1493             : }
    1494             : 
    1495           0 : static void raid_wait_read_end_io(struct bio *bio)
    1496             : {
    1497           0 :         struct btrfs_raid_bio *rbio = bio->bi_private;
    1498             : 
    1499           0 :         if (bio->bi_status) {
    1500           0 :                 rbio_update_error_bitmap(rbio, bio);
    1501             :         } else {
    1502           0 :                 set_bio_pages_uptodate(rbio, bio);
    1503           0 :                 verify_bio_data_sectors(rbio, bio);
    1504             :         }
    1505             : 
    1506           0 :         bio_put(bio);
    1507           0 :         if (atomic_dec_and_test(&rbio->stripes_pending))
    1508           0 :                 wake_up(&rbio->io_wait);
    1509           0 : }
    1510             : 
    1511           0 : static void submit_read_wait_bio_list(struct btrfs_raid_bio *rbio,
    1512             :                              struct bio_list *bio_list)
    1513             : {
    1514           0 :         struct bio *bio;
    1515             : 
    1516           0 :         atomic_set(&rbio->stripes_pending, bio_list_size(bio_list));
    1517           0 :         while ((bio = bio_list_pop(bio_list))) {
    1518           0 :                 bio->bi_end_io = raid_wait_read_end_io;
    1519             : 
    1520           0 :                 if (trace_raid56_scrub_read_recover_enabled()) {
    1521           0 :                         struct raid56_bio_trace_info trace_info = { 0 };
    1522             : 
    1523           0 :                         bio_get_trace_info(rbio, bio, &trace_info);
    1524           0 :                         trace_raid56_scrub_read_recover(rbio, bio, &trace_info);
    1525             :                 }
    1526           0 :                 submit_bio(bio);
    1527             :         }
    1528             : 
    1529           0 :         wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0);
    1530           0 : }
    1531             : 
    1532           0 : static int alloc_rbio_data_pages(struct btrfs_raid_bio *rbio)
    1533             : {
    1534           0 :         const int data_pages = rbio->nr_data * rbio->stripe_npages;
    1535           0 :         int ret;
    1536             : 
    1537           0 :         ret = btrfs_alloc_page_array(data_pages, rbio->stripe_pages);
    1538           0 :         if (ret < 0)
    1539             :                 return ret;
    1540             : 
    1541           0 :         index_stripe_sectors(rbio);
    1542           0 :         return 0;
    1543             : }
    1544             : 
    1545             : /*
    1546             :  * We use plugging call backs to collect full stripes.
    1547             :  * Any time we get a partial stripe write while plugged
    1548             :  * we collect it into a list.  When the unplug comes down,
    1549             :  * we sort the list by logical block number and merge
    1550             :  * everything we can into the same rbios
    1551             :  */
    1552             : struct btrfs_plug_cb {
    1553             :         struct blk_plug_cb cb;
    1554             :         struct btrfs_fs_info *info;
    1555             :         struct list_head rbio_list;
    1556             :         struct work_struct work;
    1557             : };
    1558             : 
    1559             : /*
    1560             :  * rbios on the plug list are sorted for easier merging.
    1561             :  */
    1562           0 : static int plug_cmp(void *priv, const struct list_head *a,
    1563             :                     const struct list_head *b)
    1564             : {
    1565           0 :         const struct btrfs_raid_bio *ra = container_of(a, struct btrfs_raid_bio,
    1566             :                                                        plug_list);
    1567           0 :         const struct btrfs_raid_bio *rb = container_of(b, struct btrfs_raid_bio,
    1568             :                                                        plug_list);
    1569           0 :         u64 a_sector = ra->bio_list.head->bi_iter.bi_sector;
    1570           0 :         u64 b_sector = rb->bio_list.head->bi_iter.bi_sector;
    1571             : 
    1572           0 :         if (a_sector < b_sector)
    1573             :                 return -1;
    1574           0 :         if (a_sector > b_sector)
    1575           0 :                 return 1;
    1576             :         return 0;
    1577             : }
    1578             : 
    1579           0 : static void raid_unplug(struct blk_plug_cb *cb, bool from_schedule)
    1580             : {
    1581           0 :         struct btrfs_plug_cb *plug = container_of(cb, struct btrfs_plug_cb, cb);
    1582           0 :         struct btrfs_raid_bio *cur;
    1583           0 :         struct btrfs_raid_bio *last = NULL;
    1584             : 
    1585           0 :         list_sort(NULL, &plug->rbio_list, plug_cmp);
    1586             : 
    1587           0 :         while (!list_empty(&plug->rbio_list)) {
    1588           0 :                 cur = list_entry(plug->rbio_list.next,
    1589             :                                  struct btrfs_raid_bio, plug_list);
    1590           0 :                 list_del_init(&cur->plug_list);
    1591             : 
    1592           0 :                 if (rbio_is_full(cur)) {
    1593             :                         /* We have a full stripe, queue it down. */
    1594           0 :                         start_async_work(cur, rmw_rbio_work);
    1595           0 :                         continue;
    1596             :                 }
    1597           0 :                 if (last) {
    1598           0 :                         if (rbio_can_merge(last, cur)) {
    1599           0 :                                 merge_rbio(last, cur);
    1600           0 :                                 free_raid_bio(cur);
    1601           0 :                                 continue;
    1602             :                         }
    1603           0 :                         start_async_work(last, rmw_rbio_work);
    1604             :                 }
    1605             :                 last = cur;
    1606             :         }
    1607           0 :         if (last)
    1608           0 :                 start_async_work(last, rmw_rbio_work);
    1609           0 :         kfree(plug);
    1610           0 : }
    1611             : 
    1612             : /* Add the original bio into rbio->bio_list, and update rbio::dbitmap. */
    1613           0 : static void rbio_add_bio(struct btrfs_raid_bio *rbio, struct bio *orig_bio)
    1614             : {
    1615           0 :         const struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
    1616           0 :         const u64 orig_logical = orig_bio->bi_iter.bi_sector << SECTOR_SHIFT;
    1617           0 :         const u64 full_stripe_start = rbio->bioc->full_stripe_logical;
    1618           0 :         const u32 orig_len = orig_bio->bi_iter.bi_size;
    1619           0 :         const u32 sectorsize = fs_info->sectorsize;
    1620           0 :         u64 cur_logical;
    1621             : 
    1622           0 :         ASSERT(orig_logical >= full_stripe_start &&
    1623             :                orig_logical + orig_len <= full_stripe_start +
    1624             :                rbio->nr_data * BTRFS_STRIPE_LEN);
    1625             : 
    1626           0 :         bio_list_add(&rbio->bio_list, orig_bio);
    1627           0 :         rbio->bio_list_bytes += orig_bio->bi_iter.bi_size;
    1628             : 
    1629             :         /* Update the dbitmap. */
    1630           0 :         for (cur_logical = orig_logical; cur_logical < orig_logical + orig_len;
    1631           0 :              cur_logical += sectorsize) {
    1632           0 :                 int bit = ((u32)(cur_logical - full_stripe_start) >>
    1633           0 :                            fs_info->sectorsize_bits) % rbio->stripe_nsectors;
    1634             : 
    1635           0 :                 set_bit(bit, &rbio->dbitmap);
    1636             :         }
    1637           0 : }
    1638             : 
    1639             : /*
    1640             :  * our main entry point for writes from the rest of the FS.
    1641             :  */
    1642           0 : void raid56_parity_write(struct bio *bio, struct btrfs_io_context *bioc)
    1643             : {
    1644           0 :         struct btrfs_fs_info *fs_info = bioc->fs_info;
    1645           0 :         struct btrfs_raid_bio *rbio;
    1646           0 :         struct btrfs_plug_cb *plug = NULL;
    1647           0 :         struct blk_plug_cb *cb;
    1648             : 
    1649           0 :         rbio = alloc_rbio(fs_info, bioc);
    1650           0 :         if (IS_ERR(rbio)) {
    1651           0 :                 bio->bi_status = errno_to_blk_status(PTR_ERR(rbio));
    1652           0 :                 bio_endio(bio);
    1653           0 :                 return;
    1654             :         }
    1655           0 :         rbio->operation = BTRFS_RBIO_WRITE;
    1656           0 :         rbio_add_bio(rbio, bio);
    1657             : 
    1658             :         /*
    1659             :          * Don't plug on full rbios, just get them out the door
    1660             :          * as quickly as we can
    1661             :          */
    1662           0 :         if (!rbio_is_full(rbio)) {
    1663           0 :                 cb = blk_check_plugged(raid_unplug, fs_info, sizeof(*plug));
    1664           0 :                 if (cb) {
    1665           0 :                         plug = container_of(cb, struct btrfs_plug_cb, cb);
    1666           0 :                         if (!plug->info) {
    1667           0 :                                 plug->info = fs_info;
    1668           0 :                                 INIT_LIST_HEAD(&plug->rbio_list);
    1669             :                         }
    1670           0 :                         list_add_tail(&rbio->plug_list, &plug->rbio_list);
    1671           0 :                         return;
    1672             :                 }
    1673             :         }
    1674             : 
    1675             :         /*
    1676             :          * Either we don't have any existing plug, or we're doing a full stripe,
    1677             :          * queue the rmw work now.
    1678             :          */
    1679           0 :         start_async_work(rbio, rmw_rbio_work);
    1680             : }
    1681             : 
    1682           0 : static int verify_one_sector(struct btrfs_raid_bio *rbio,
    1683             :                              int stripe_nr, int sector_nr)
    1684             : {
    1685           0 :         struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
    1686           0 :         struct sector_ptr *sector;
    1687           0 :         u8 csum_buf[BTRFS_CSUM_SIZE];
    1688           0 :         u8 *csum_expected;
    1689           0 :         int ret;
    1690             : 
    1691           0 :         if (!rbio->csum_bitmap || !rbio->csum_buf)
    1692             :                 return 0;
    1693             : 
    1694             :         /* No way to verify P/Q as they are not covered by data csum. */
    1695           0 :         if (stripe_nr >= rbio->nr_data)
    1696             :                 return 0;
    1697             :         /*
    1698             :          * If we're rebuilding a read, we have to use pages from the
    1699             :          * bio list if possible.
    1700             :          */
    1701           0 :         if ((rbio->operation == BTRFS_RBIO_READ_REBUILD ||
    1702             :              rbio->operation == BTRFS_RBIO_REBUILD_MISSING)) {
    1703           0 :                 sector = sector_in_rbio(rbio, stripe_nr, sector_nr, 0);
    1704             :         } else {
    1705           0 :                 sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr);
    1706             :         }
    1707             : 
    1708           0 :         ASSERT(sector->page);
    1709             : 
    1710           0 :         csum_expected = rbio->csum_buf +
    1711           0 :                         (stripe_nr * rbio->stripe_nsectors + sector_nr) *
    1712           0 :                         fs_info->csum_size;
    1713           0 :         ret = btrfs_check_sector_csum(fs_info, sector->page, sector->pgoff,
    1714             :                                       csum_buf, csum_expected);
    1715           0 :         return ret;
    1716             : }
    1717             : 
    1718             : /*
    1719             :  * Recover a vertical stripe specified by @sector_nr.
    1720             :  * @*pointers are the pre-allocated pointers by the caller, so we don't
    1721             :  * need to allocate/free the pointers again and again.
    1722             :  */
    1723           0 : static int recover_vertical(struct btrfs_raid_bio *rbio, int sector_nr,
    1724             :                             void **pointers, void **unmap_array)
    1725             : {
    1726           0 :         struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
    1727           0 :         struct sector_ptr *sector;
    1728           0 :         const u32 sectorsize = fs_info->sectorsize;
    1729           0 :         int found_errors;
    1730           0 :         int faila;
    1731           0 :         int failb;
    1732           0 :         int stripe_nr;
    1733           0 :         int ret = 0;
    1734             : 
    1735             :         /*
    1736             :          * Now we just use bitmap to mark the horizontal stripes in
    1737             :          * which we have data when doing parity scrub.
    1738             :          */
    1739           0 :         if (rbio->operation == BTRFS_RBIO_PARITY_SCRUB &&
    1740           0 :             !test_bit(sector_nr, &rbio->dbitmap))
    1741             :                 return 0;
    1742             : 
    1743           0 :         found_errors = get_rbio_veritical_errors(rbio, sector_nr, &faila,
    1744             :                                                  &failb);
    1745             :         /*
    1746             :          * No errors in the vertical stripe, skip it.  Can happen for recovery
    1747             :          * which only part of a stripe failed csum check.
    1748             :          */
    1749           0 :         if (!found_errors)
    1750             :                 return 0;
    1751             : 
    1752           0 :         if (found_errors > rbio->bioc->max_errors)
    1753             :                 return -EIO;
    1754             : 
    1755             :         /*
    1756             :          * Setup our array of pointers with sectors from each stripe
    1757             :          *
    1758             :          * NOTE: store a duplicate array of pointers to preserve the
    1759             :          * pointer order.
    1760             :          */
    1761           0 :         for (stripe_nr = 0; stripe_nr < rbio->real_stripes; stripe_nr++) {
    1762             :                 /*
    1763             :                  * If we're rebuilding a read, we have to use pages from the
    1764             :                  * bio list if possible.
    1765             :                  */
    1766           0 :                 if ((rbio->operation == BTRFS_RBIO_READ_REBUILD ||
    1767             :                      rbio->operation == BTRFS_RBIO_REBUILD_MISSING)) {
    1768           0 :                         sector = sector_in_rbio(rbio, stripe_nr, sector_nr, 0);
    1769             :                 } else {
    1770           0 :                         sector = rbio_stripe_sector(rbio, stripe_nr, sector_nr);
    1771             :                 }
    1772           0 :                 ASSERT(sector->page);
    1773           0 :                 pointers[stripe_nr] = kmap_local_page(sector->page) +
    1774           0 :                                    sector->pgoff;
    1775           0 :                 unmap_array[stripe_nr] = pointers[stripe_nr];
    1776             :         }
    1777             : 
    1778             :         /* All raid6 handling here */
    1779           0 :         if (rbio->bioc->map_type & BTRFS_BLOCK_GROUP_RAID6) {
    1780             :                 /* Single failure, rebuild from parity raid5 style */
    1781           0 :                 if (failb < 0) {
    1782           0 :                         if (faila == rbio->nr_data)
    1783             :                                 /*
    1784             :                                  * Just the P stripe has failed, without
    1785             :                                  * a bad data or Q stripe.
    1786             :                                  * We have nothing to do, just skip the
    1787             :                                  * recovery for this stripe.
    1788             :                                  */
    1789           0 :                                 goto cleanup;
    1790             :                         /*
    1791             :                          * a single failure in raid6 is rebuilt
    1792             :                          * in the pstripe code below
    1793             :                          */
    1794           0 :                         goto pstripe;
    1795             :                 }
    1796             : 
    1797             :                 /*
    1798             :                  * If the q stripe is failed, do a pstripe reconstruction from
    1799             :                  * the xors.
    1800             :                  * If both the q stripe and the P stripe are failed, we're
    1801             :                  * here due to a crc mismatch and we can't give them the
    1802             :                  * data they want.
    1803             :                  */
    1804           0 :                 if (failb == rbio->real_stripes - 1) {
    1805           0 :                         if (faila == rbio->real_stripes - 2)
    1806             :                                 /*
    1807             :                                  * Only P and Q are corrupted.
    1808             :                                  * We only care about data stripes recovery,
    1809             :                                  * can skip this vertical stripe.
    1810             :                                  */
    1811           0 :                                 goto cleanup;
    1812             :                         /*
    1813             :                          * Otherwise we have one bad data stripe and
    1814             :                          * a good P stripe.  raid5!
    1815             :                          */
    1816           0 :                         goto pstripe;
    1817             :                 }
    1818             : 
    1819           0 :                 if (failb == rbio->real_stripes - 2) {
    1820           0 :                         raid6_datap_recov(rbio->real_stripes, sectorsize,
    1821             :                                           faila, pointers);
    1822             :                 } else {
    1823           0 :                         raid6_2data_recov(rbio->real_stripes, sectorsize,
    1824             :                                           faila, failb, pointers);
    1825             :                 }
    1826             :         } else {
    1827             :                 void *p;
    1828             : 
    1829             :                 /* Rebuild from P stripe here (raid5 or raid6). */
    1830             :                 ASSERT(failb == -1);
    1831           0 : pstripe:
    1832             :                 /* Copy parity block into failed block to start with */
    1833           0 :                 memcpy(pointers[faila], pointers[rbio->nr_data], sectorsize);
    1834             : 
    1835             :                 /* Rearrange the pointer array */
    1836           0 :                 p = pointers[faila];
    1837           0 :                 for (stripe_nr = faila; stripe_nr < rbio->nr_data - 1;
    1838           0 :                      stripe_nr++)
    1839           0 :                         pointers[stripe_nr] = pointers[stripe_nr + 1];
    1840           0 :                 pointers[rbio->nr_data - 1] = p;
    1841             : 
    1842             :                 /* Xor in the rest */
    1843           0 :                 run_xor(pointers, rbio->nr_data - 1, sectorsize);
    1844             : 
    1845             :         }
    1846             : 
    1847             :         /*
    1848             :          * No matter if this is a RMW or recovery, we should have all
    1849             :          * failed sectors repaired in the vertical stripe, thus they are now
    1850             :          * uptodate.
    1851             :          * Especially if we determine to cache the rbio, we need to
    1852             :          * have at least all data sectors uptodate.
    1853             :          *
    1854             :          * If possible, also check if the repaired sector matches its data
    1855             :          * checksum.
    1856             :          */
    1857           0 :         if (faila >= 0) {
    1858           0 :                 ret = verify_one_sector(rbio, faila, sector_nr);
    1859           0 :                 if (ret < 0)
    1860           0 :                         goto cleanup;
    1861             : 
    1862           0 :                 sector = rbio_stripe_sector(rbio, faila, sector_nr);
    1863           0 :                 sector->uptodate = 1;
    1864             :         }
    1865           0 :         if (failb >= 0) {
    1866           0 :                 ret = verify_one_sector(rbio, failb, sector_nr);
    1867           0 :                 if (ret < 0)
    1868           0 :                         goto cleanup;
    1869             : 
    1870           0 :                 sector = rbio_stripe_sector(rbio, failb, sector_nr);
    1871           0 :                 sector->uptodate = 1;
    1872             :         }
    1873             : 
    1874           0 : cleanup:
    1875           0 :         for (stripe_nr = rbio->real_stripes - 1; stripe_nr >= 0; stripe_nr--)
    1876             :                 kunmap_local(unmap_array[stripe_nr]);
    1877             :         return ret;
    1878             : }
    1879             : 
    1880           0 : static int recover_sectors(struct btrfs_raid_bio *rbio)
    1881             : {
    1882           0 :         void **pointers = NULL;
    1883           0 :         void **unmap_array = NULL;
    1884           0 :         int sectornr;
    1885           0 :         int ret = 0;
    1886             : 
    1887             :         /*
    1888             :          * @pointers array stores the pointer for each sector.
    1889             :          *
    1890             :          * @unmap_array stores copy of pointers that does not get reordered
    1891             :          * during reconstruction so that kunmap_local works.
    1892             :          */
    1893           0 :         pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS);
    1894           0 :         unmap_array = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS);
    1895           0 :         if (!pointers || !unmap_array) {
    1896           0 :                 ret = -ENOMEM;
    1897           0 :                 goto out;
    1898             :         }
    1899             : 
    1900           0 :         if (rbio->operation == BTRFS_RBIO_READ_REBUILD ||
    1901             :             rbio->operation == BTRFS_RBIO_REBUILD_MISSING) {
    1902           0 :                 spin_lock(&rbio->bio_list_lock);
    1903           0 :                 set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
    1904           0 :                 spin_unlock(&rbio->bio_list_lock);
    1905             :         }
    1906             : 
    1907           0 :         index_rbio_pages(rbio);
    1908             : 
    1909           0 :         for (sectornr = 0; sectornr < rbio->stripe_nsectors; sectornr++) {
    1910           0 :                 ret = recover_vertical(rbio, sectornr, pointers, unmap_array);
    1911           0 :                 if (ret < 0)
    1912             :                         break;
    1913             :         }
    1914             : 
    1915           0 : out:
    1916           0 :         kfree(pointers);
    1917           0 :         kfree(unmap_array);
    1918           0 :         return ret;
    1919             : }
    1920             : 
    1921           0 : static void recover_rbio(struct btrfs_raid_bio *rbio)
    1922             : {
    1923           0 :         struct bio_list bio_list = BIO_EMPTY_LIST;
    1924           0 :         int total_sector_nr;
    1925           0 :         int ret = 0;
    1926             : 
    1927             :         /*
    1928             :          * Either we're doing recover for a read failure or degraded write,
    1929             :          * caller should have set error bitmap correctly.
    1930             :          */
    1931           0 :         ASSERT(bitmap_weight(rbio->error_bitmap, rbio->nr_sectors));
    1932             : 
    1933             :         /* For recovery, we need to read all sectors including P/Q. */
    1934           0 :         ret = alloc_rbio_pages(rbio);
    1935           0 :         if (ret < 0)
    1936           0 :                 goto out;
    1937             : 
    1938           0 :         index_rbio_pages(rbio);
    1939             : 
    1940             :         /*
    1941             :          * Read everything that hasn't failed. However this time we will
    1942             :          * not trust any cached sector.
    1943             :          * As we may read out some stale data but higher layer is not reading
    1944             :          * that stale part.
    1945             :          *
    1946             :          * So here we always re-read everything in recovery path.
    1947             :          */
    1948           0 :         for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors;
    1949           0 :              total_sector_nr++) {
    1950           0 :                 int stripe = total_sector_nr / rbio->stripe_nsectors;
    1951           0 :                 int sectornr = total_sector_nr % rbio->stripe_nsectors;
    1952           0 :                 struct sector_ptr *sector;
    1953             : 
    1954             :                 /*
    1955             :                  * Skip the range which has error.  It can be a range which is
    1956             :                  * marked error (for csum mismatch), or it can be a missing
    1957             :                  * device.
    1958             :                  */
    1959           0 :                 if (!rbio->bioc->stripes[stripe].dev->bdev ||
    1960           0 :                     test_bit(total_sector_nr, rbio->error_bitmap)) {
    1961             :                         /*
    1962             :                          * Also set the error bit for missing device, which
    1963             :                          * may not yet have its error bit set.
    1964             :                          */
    1965           0 :                         set_bit(total_sector_nr, rbio->error_bitmap);
    1966           0 :                         continue;
    1967             :                 }
    1968             : 
    1969           0 :                 sector = rbio_stripe_sector(rbio, stripe, sectornr);
    1970           0 :                 ret = rbio_add_io_sector(rbio, &bio_list, sector, stripe,
    1971             :                                          sectornr, REQ_OP_READ);
    1972           0 :                 if (ret < 0) {
    1973           0 :                         bio_list_put(&bio_list);
    1974           0 :                         goto out;
    1975             :                 }
    1976             :         }
    1977             : 
    1978           0 :         submit_read_wait_bio_list(rbio, &bio_list);
    1979           0 :         ret = recover_sectors(rbio);
    1980           0 : out:
    1981           0 :         rbio_orig_end_io(rbio, errno_to_blk_status(ret));
    1982           0 : }
    1983             : 
    1984           0 : static void recover_rbio_work(struct work_struct *work)
    1985             : {
    1986           0 :         struct btrfs_raid_bio *rbio;
    1987             : 
    1988           0 :         rbio = container_of(work, struct btrfs_raid_bio, work);
    1989           0 :         if (!lock_stripe_add(rbio))
    1990           0 :                 recover_rbio(rbio);
    1991           0 : }
    1992             : 
    1993           0 : static void recover_rbio_work_locked(struct work_struct *work)
    1994             : {
    1995           0 :         recover_rbio(container_of(work, struct btrfs_raid_bio, work));
    1996           0 : }
    1997             : 
    1998           0 : static void set_rbio_raid6_extra_error(struct btrfs_raid_bio *rbio, int mirror_num)
    1999             : {
    2000           0 :         bool found = false;
    2001           0 :         int sector_nr;
    2002             : 
    2003             :         /*
    2004             :          * This is for RAID6 extra recovery tries, thus mirror number should
    2005             :          * be large than 2.
    2006             :          * Mirror 1 means read from data stripes. Mirror 2 means rebuild using
    2007             :          * RAID5 methods.
    2008             :          */
    2009           0 :         ASSERT(mirror_num > 2);
    2010           0 :         for (sector_nr = 0; sector_nr < rbio->stripe_nsectors; sector_nr++) {
    2011           0 :                 int found_errors;
    2012           0 :                 int faila;
    2013           0 :                 int failb;
    2014             : 
    2015           0 :                 found_errors = get_rbio_veritical_errors(rbio, sector_nr,
    2016             :                                                          &faila, &failb);
    2017             :                 /* This vertical stripe doesn't have errors. */
    2018           0 :                 if (!found_errors)
    2019           0 :                         continue;
    2020             : 
    2021             :                 /*
    2022             :                  * If we found errors, there should be only one error marked
    2023             :                  * by previous set_rbio_range_error().
    2024             :                  */
    2025           0 :                 ASSERT(found_errors == 1);
    2026           0 :                 found = true;
    2027             : 
    2028             :                 /* Now select another stripe to mark as error. */
    2029           0 :                 failb = rbio->real_stripes - (mirror_num - 1);
    2030           0 :                 if (failb <= faila)
    2031           0 :                         failb--;
    2032             : 
    2033             :                 /* Set the extra bit in error bitmap. */
    2034           0 :                 if (failb >= 0)
    2035           0 :                         set_bit(failb * rbio->stripe_nsectors + sector_nr,
    2036           0 :                                 rbio->error_bitmap);
    2037             :         }
    2038             : 
    2039             :         /* We should found at least one vertical stripe with error.*/
    2040           0 :         ASSERT(found);
    2041           0 : }
    2042             : 
    2043             : /*
    2044             :  * the main entry point for reads from the higher layers.  This
    2045             :  * is really only called when the normal read path had a failure,
    2046             :  * so we assume the bio they send down corresponds to a failed part
    2047             :  * of the drive.
    2048             :  */
    2049           0 : void raid56_parity_recover(struct bio *bio, struct btrfs_io_context *bioc,
    2050             :                            int mirror_num)
    2051             : {
    2052           0 :         struct btrfs_fs_info *fs_info = bioc->fs_info;
    2053           0 :         struct btrfs_raid_bio *rbio;
    2054             : 
    2055           0 :         rbio = alloc_rbio(fs_info, bioc);
    2056           0 :         if (IS_ERR(rbio)) {
    2057           0 :                 bio->bi_status = errno_to_blk_status(PTR_ERR(rbio));
    2058           0 :                 bio_endio(bio);
    2059           0 :                 return;
    2060             :         }
    2061             : 
    2062           0 :         rbio->operation = BTRFS_RBIO_READ_REBUILD;
    2063           0 :         rbio_add_bio(rbio, bio);
    2064             : 
    2065           0 :         set_rbio_range_error(rbio, bio);
    2066             : 
    2067             :         /*
    2068             :          * Loop retry:
    2069             :          * for 'mirror == 2', reconstruct from all other stripes.
    2070             :          * for 'mirror_num > 2', select a stripe to fail on every retry.
    2071             :          */
    2072           0 :         if (mirror_num > 2)
    2073           0 :                 set_rbio_raid6_extra_error(rbio, mirror_num);
    2074             : 
    2075           0 :         start_async_work(rbio, recover_rbio_work);
    2076             : }
    2077             : 
    2078           0 : static void fill_data_csums(struct btrfs_raid_bio *rbio)
    2079             : {
    2080           0 :         struct btrfs_fs_info *fs_info = rbio->bioc->fs_info;
    2081           0 :         struct btrfs_root *csum_root = btrfs_csum_root(fs_info,
    2082             :                                                        rbio->bioc->full_stripe_logical);
    2083           0 :         const u64 start = rbio->bioc->full_stripe_logical;
    2084           0 :         const u32 len = (rbio->nr_data * rbio->stripe_nsectors) <<
    2085           0 :                         fs_info->sectorsize_bits;
    2086           0 :         int ret;
    2087             : 
    2088             :         /* The rbio should not have its csum buffer initialized. */
    2089           0 :         ASSERT(!rbio->csum_buf && !rbio->csum_bitmap);
    2090             : 
    2091             :         /*
    2092             :          * Skip the csum search if:
    2093             :          *
    2094             :          * - The rbio doesn't belong to data block groups
    2095             :          *   Then we are doing IO for tree blocks, no need to search csums.
    2096             :          *
    2097             :          * - The rbio belongs to mixed block groups
    2098             :          *   This is to avoid deadlock, as we're already holding the full
    2099             :          *   stripe lock, if we trigger a metadata read, and it needs to do
    2100             :          *   raid56 recovery, we will deadlock.
    2101             :          */
    2102           0 :         if (!(rbio->bioc->map_type & BTRFS_BLOCK_GROUP_DATA) ||
    2103             :             rbio->bioc->map_type & BTRFS_BLOCK_GROUP_METADATA)
    2104             :                 return;
    2105             : 
    2106           0 :         rbio->csum_buf = kzalloc(rbio->nr_data * rbio->stripe_nsectors *
    2107           0 :                                  fs_info->csum_size, GFP_NOFS);
    2108           0 :         rbio->csum_bitmap = bitmap_zalloc(rbio->nr_data * rbio->stripe_nsectors,
    2109             :                                           GFP_NOFS);
    2110           0 :         if (!rbio->csum_buf || !rbio->csum_bitmap) {
    2111           0 :                 ret = -ENOMEM;
    2112           0 :                 goto error;
    2113             :         }
    2114             : 
    2115           0 :         ret = btrfs_lookup_csums_bitmap(csum_root, start, start + len - 1,
    2116             :                                         rbio->csum_buf, rbio->csum_bitmap, false);
    2117           0 :         if (ret < 0)
    2118           0 :                 goto error;
    2119           0 :         if (bitmap_empty(rbio->csum_bitmap, len >> fs_info->sectorsize_bits))
    2120           0 :                 goto no_csum;
    2121             :         return;
    2122             : 
    2123           0 : error:
    2124             :         /*
    2125             :          * We failed to allocate memory or grab the csum, but it's not fatal,
    2126             :          * we can still continue.  But better to warn users that RMW is no
    2127             :          * longer safe for this particular sub-stripe write.
    2128             :          */
    2129           0 :         btrfs_warn_rl(fs_info,
    2130             : "sub-stripe write for full stripe %llu is not safe, failed to get csum: %d",
    2131             :                         rbio->bioc->full_stripe_logical, ret);
    2132           0 : no_csum:
    2133           0 :         kfree(rbio->csum_buf);
    2134           0 :         bitmap_free(rbio->csum_bitmap);
    2135           0 :         rbio->csum_buf = NULL;
    2136           0 :         rbio->csum_bitmap = NULL;
    2137             : }
    2138             : 
    2139           0 : static int rmw_read_wait_recover(struct btrfs_raid_bio *rbio)
    2140             : {
    2141           0 :         struct bio_list bio_list = BIO_EMPTY_LIST;
    2142           0 :         int total_sector_nr;
    2143           0 :         int ret = 0;
    2144             : 
    2145             :         /*
    2146             :          * Fill the data csums we need for data verification.  We need to fill
    2147             :          * the csum_bitmap/csum_buf first, as our endio function will try to
    2148             :          * verify the data sectors.
    2149             :          */
    2150           0 :         fill_data_csums(rbio);
    2151             : 
    2152             :         /*
    2153             :          * Build a list of bios to read all sectors (including data and P/Q).
    2154             :          *
    2155             :          * This behavior is to compensate the later csum verification and recovery.
    2156             :          */
    2157           0 :         for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors;
    2158           0 :              total_sector_nr++) {
    2159           0 :                 struct sector_ptr *sector;
    2160           0 :                 int stripe = total_sector_nr / rbio->stripe_nsectors;
    2161           0 :                 int sectornr = total_sector_nr % rbio->stripe_nsectors;
    2162             : 
    2163           0 :                 sector = rbio_stripe_sector(rbio, stripe, sectornr);
    2164           0 :                 ret = rbio_add_io_sector(rbio, &bio_list, sector,
    2165             :                                stripe, sectornr, REQ_OP_READ);
    2166           0 :                 if (ret) {
    2167           0 :                         bio_list_put(&bio_list);
    2168           0 :                         return ret;
    2169             :                 }
    2170             :         }
    2171             : 
    2172             :         /*
    2173             :          * We may or may not have any corrupted sectors (including missing dev
    2174             :          * and csum mismatch), just let recover_sectors() to handle them all.
    2175             :          */
    2176           0 :         submit_read_wait_bio_list(rbio, &bio_list);
    2177           0 :         return recover_sectors(rbio);
    2178             : }
    2179             : 
    2180           0 : static void raid_wait_write_end_io(struct bio *bio)
    2181             : {
    2182           0 :         struct btrfs_raid_bio *rbio = bio->bi_private;
    2183           0 :         blk_status_t err = bio->bi_status;
    2184             : 
    2185           0 :         if (err)
    2186           0 :                 rbio_update_error_bitmap(rbio, bio);
    2187           0 :         bio_put(bio);
    2188           0 :         if (atomic_dec_and_test(&rbio->stripes_pending))
    2189           0 :                 wake_up(&rbio->io_wait);
    2190           0 : }
    2191             : 
    2192           0 : static void submit_write_bios(struct btrfs_raid_bio *rbio,
    2193             :                               struct bio_list *bio_list)
    2194             : {
    2195           0 :         struct bio *bio;
    2196             : 
    2197           0 :         atomic_set(&rbio->stripes_pending, bio_list_size(bio_list));
    2198           0 :         while ((bio = bio_list_pop(bio_list))) {
    2199           0 :                 bio->bi_end_io = raid_wait_write_end_io;
    2200             : 
    2201           0 :                 if (trace_raid56_write_stripe_enabled()) {
    2202           0 :                         struct raid56_bio_trace_info trace_info = { 0 };
    2203             : 
    2204           0 :                         bio_get_trace_info(rbio, bio, &trace_info);
    2205           0 :                         trace_raid56_write_stripe(rbio, bio, &trace_info);
    2206             :                 }
    2207           0 :                 submit_bio(bio);
    2208             :         }
    2209           0 : }
    2210             : 
    2211             : /*
    2212             :  * To determine if we need to read any sector from the disk.
    2213             :  * Should only be utilized in RMW path, to skip cached rbio.
    2214             :  */
    2215           0 : static bool need_read_stripe_sectors(struct btrfs_raid_bio *rbio)
    2216             : {
    2217           0 :         int i;
    2218             : 
    2219           0 :         for (i = 0; i < rbio->nr_data * rbio->stripe_nsectors; i++) {
    2220           0 :                 struct sector_ptr *sector = &rbio->stripe_sectors[i];
    2221             : 
    2222             :                 /*
    2223             :                  * We have a sector which doesn't have page nor uptodate,
    2224             :                  * thus this rbio can not be cached one, as cached one must
    2225             :                  * have all its data sectors present and uptodate.
    2226             :                  */
    2227           0 :                 if (!sector->page || !sector->uptodate)
    2228             :                         return true;
    2229             :         }
    2230             :         return false;
    2231             : }
    2232             : 
    2233           0 : static void rmw_rbio(struct btrfs_raid_bio *rbio)
    2234             : {
    2235           0 :         struct bio_list bio_list;
    2236           0 :         int sectornr;
    2237           0 :         int ret = 0;
    2238             : 
    2239             :         /*
    2240             :          * Allocate the pages for parity first, as P/Q pages will always be
    2241             :          * needed for both full-stripe and sub-stripe writes.
    2242             :          */
    2243           0 :         ret = alloc_rbio_parity_pages(rbio);
    2244           0 :         if (ret < 0)
    2245           0 :                 goto out;
    2246             : 
    2247             :         /*
    2248             :          * Either full stripe write, or we have every data sector already
    2249             :          * cached, can go to write path immediately.
    2250             :          */
    2251           0 :         if (!rbio_is_full(rbio) && need_read_stripe_sectors(rbio)) {
    2252             :                 /*
    2253             :                  * Now we're doing sub-stripe write, also need all data stripes
    2254             :                  * to do the full RMW.
    2255             :                  */
    2256           0 :                 ret = alloc_rbio_data_pages(rbio);
    2257           0 :                 if (ret < 0)
    2258           0 :                         goto out;
    2259             : 
    2260           0 :                 index_rbio_pages(rbio);
    2261             : 
    2262           0 :                 ret = rmw_read_wait_recover(rbio);
    2263           0 :                 if (ret < 0)
    2264           0 :                         goto out;
    2265             :         }
    2266             : 
    2267             :         /*
    2268             :          * At this stage we're not allowed to add any new bios to the
    2269             :          * bio list any more, anyone else that wants to change this stripe
    2270             :          * needs to do their own rmw.
    2271             :          */
    2272           0 :         spin_lock(&rbio->bio_list_lock);
    2273           0 :         set_bit(RBIO_RMW_LOCKED_BIT, &rbio->flags);
    2274           0 :         spin_unlock(&rbio->bio_list_lock);
    2275             : 
    2276           0 :         bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors);
    2277             : 
    2278           0 :         index_rbio_pages(rbio);
    2279             : 
    2280             :         /*
    2281             :          * We don't cache full rbios because we're assuming
    2282             :          * the higher layers are unlikely to use this area of
    2283             :          * the disk again soon.  If they do use it again,
    2284             :          * hopefully they will send another full bio.
    2285             :          */
    2286           0 :         if (!rbio_is_full(rbio))
    2287           0 :                 cache_rbio_pages(rbio);
    2288             :         else
    2289           0 :                 clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
    2290             : 
    2291           0 :         for (sectornr = 0; sectornr < rbio->stripe_nsectors; sectornr++)
    2292           0 :                 generate_pq_vertical(rbio, sectornr);
    2293             : 
    2294           0 :         bio_list_init(&bio_list);
    2295           0 :         ret = rmw_assemble_write_bios(rbio, &bio_list);
    2296           0 :         if (ret < 0)
    2297           0 :                 goto out;
    2298             : 
    2299             :         /* We should have at least one bio assembled. */
    2300           0 :         ASSERT(bio_list_size(&bio_list));
    2301           0 :         submit_write_bios(rbio, &bio_list);
    2302           0 :         wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0);
    2303             : 
    2304             :         /* We may have more errors than our tolerance during the read. */
    2305           0 :         for (sectornr = 0; sectornr < rbio->stripe_nsectors; sectornr++) {
    2306           0 :                 int found_errors;
    2307             : 
    2308           0 :                 found_errors = get_rbio_veritical_errors(rbio, sectornr, NULL, NULL);
    2309           0 :                 if (found_errors > rbio->bioc->max_errors) {
    2310             :                         ret = -EIO;
    2311             :                         break;
    2312             :                 }
    2313             :         }
    2314           0 : out:
    2315           0 :         rbio_orig_end_io(rbio, errno_to_blk_status(ret));
    2316           0 : }
    2317             : 
    2318           0 : static void rmw_rbio_work(struct work_struct *work)
    2319             : {
    2320           0 :         struct btrfs_raid_bio *rbio;
    2321             : 
    2322           0 :         rbio = container_of(work, struct btrfs_raid_bio, work);
    2323           0 :         if (lock_stripe_add(rbio) == 0)
    2324           0 :                 rmw_rbio(rbio);
    2325           0 : }
    2326             : 
    2327           0 : static void rmw_rbio_work_locked(struct work_struct *work)
    2328             : {
    2329           0 :         rmw_rbio(container_of(work, struct btrfs_raid_bio, work));
    2330           0 : }
    2331             : 
    2332             : /*
    2333             :  * The following code is used to scrub/replace the parity stripe
    2334             :  *
    2335             :  * Caller must have already increased bio_counter for getting @bioc.
    2336             :  *
    2337             :  * Note: We need make sure all the pages that add into the scrub/replace
    2338             :  * raid bio are correct and not be changed during the scrub/replace. That
    2339             :  * is those pages just hold metadata or file data with checksum.
    2340             :  */
    2341             : 
    2342           0 : struct btrfs_raid_bio *raid56_parity_alloc_scrub_rbio(struct bio *bio,
    2343             :                                 struct btrfs_io_context *bioc,
    2344             :                                 struct btrfs_device *scrub_dev,
    2345             :                                 unsigned long *dbitmap, int stripe_nsectors)
    2346             : {
    2347           0 :         struct btrfs_fs_info *fs_info = bioc->fs_info;
    2348           0 :         struct btrfs_raid_bio *rbio;
    2349           0 :         int i;
    2350             : 
    2351           0 :         rbio = alloc_rbio(fs_info, bioc);
    2352           0 :         if (IS_ERR(rbio))
    2353             :                 return NULL;
    2354           0 :         bio_list_add(&rbio->bio_list, bio);
    2355             :         /*
    2356             :          * This is a special bio which is used to hold the completion handler
    2357             :          * and make the scrub rbio is similar to the other types
    2358             :          */
    2359           0 :         ASSERT(!bio->bi_iter.bi_size);
    2360           0 :         rbio->operation = BTRFS_RBIO_PARITY_SCRUB;
    2361             : 
    2362             :         /*
    2363             :          * After mapping bioc with BTRFS_MAP_WRITE, parities have been sorted
    2364             :          * to the end position, so this search can start from the first parity
    2365             :          * stripe.
    2366             :          */
    2367           0 :         for (i = rbio->nr_data; i < rbio->real_stripes; i++) {
    2368           0 :                 if (bioc->stripes[i].dev == scrub_dev) {
    2369           0 :                         rbio->scrubp = i;
    2370           0 :                         break;
    2371             :                 }
    2372             :         }
    2373           0 :         ASSERT(i < rbio->real_stripes);
    2374             : 
    2375           0 :         bitmap_copy(&rbio->dbitmap, dbitmap, stripe_nsectors);
    2376           0 :         return rbio;
    2377             : }
    2378             : 
    2379             : /*
    2380             :  * We just scrub the parity that we have correct data on the same horizontal,
    2381             :  * so we needn't allocate all pages for all the stripes.
    2382             :  */
    2383           0 : static int alloc_rbio_essential_pages(struct btrfs_raid_bio *rbio)
    2384             : {
    2385           0 :         const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
    2386           0 :         int total_sector_nr;
    2387             : 
    2388           0 :         for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors;
    2389           0 :              total_sector_nr++) {
    2390           0 :                 struct page *page;
    2391           0 :                 int sectornr = total_sector_nr % rbio->stripe_nsectors;
    2392           0 :                 int index = (total_sector_nr * sectorsize) >> PAGE_SHIFT;
    2393             : 
    2394           0 :                 if (!test_bit(sectornr, &rbio->dbitmap))
    2395           0 :                         continue;
    2396           0 :                 if (rbio->stripe_pages[index])
    2397           0 :                         continue;
    2398           0 :                 page = alloc_page(GFP_NOFS);
    2399           0 :                 if (!page)
    2400             :                         return -ENOMEM;
    2401           0 :                 rbio->stripe_pages[index] = page;
    2402             :         }
    2403           0 :         index_stripe_sectors(rbio);
    2404           0 :         return 0;
    2405             : }
    2406             : 
    2407           0 : static int finish_parity_scrub(struct btrfs_raid_bio *rbio)
    2408             : {
    2409           0 :         struct btrfs_io_context *bioc = rbio->bioc;
    2410           0 :         const u32 sectorsize = bioc->fs_info->sectorsize;
    2411           0 :         void **pointers = rbio->finish_pointers;
    2412           0 :         unsigned long *pbitmap = &rbio->finish_pbitmap;
    2413           0 :         int nr_data = rbio->nr_data;
    2414           0 :         int stripe;
    2415           0 :         int sectornr;
    2416           0 :         bool has_qstripe;
    2417           0 :         struct sector_ptr p_sector = { 0 };
    2418           0 :         struct sector_ptr q_sector = { 0 };
    2419           0 :         struct bio_list bio_list;
    2420           0 :         int is_replace = 0;
    2421           0 :         int ret;
    2422             : 
    2423           0 :         bio_list_init(&bio_list);
    2424             : 
    2425           0 :         if (rbio->real_stripes - rbio->nr_data == 1)
    2426             :                 has_qstripe = false;
    2427           0 :         else if (rbio->real_stripes - rbio->nr_data == 2)
    2428             :                 has_qstripe = true;
    2429             :         else
    2430           0 :                 BUG();
    2431             : 
    2432             :         /*
    2433             :          * Replace is running and our P/Q stripe is being replaced, then we
    2434             :          * need to duplicate the final write to replace target.
    2435             :          */
    2436           0 :         if (bioc->replace_nr_stripes && bioc->replace_stripe_src == rbio->scrubp) {
    2437           0 :                 is_replace = 1;
    2438           0 :                 bitmap_copy(pbitmap, &rbio->dbitmap, rbio->stripe_nsectors);
    2439             :         }
    2440             : 
    2441             :         /*
    2442             :          * Because the higher layers(scrubber) are unlikely to
    2443             :          * use this area of the disk again soon, so don't cache
    2444             :          * it.
    2445             :          */
    2446           0 :         clear_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
    2447             : 
    2448           0 :         p_sector.page = alloc_page(GFP_NOFS);
    2449           0 :         if (!p_sector.page)
    2450             :                 return -ENOMEM;
    2451           0 :         p_sector.pgoff = 0;
    2452           0 :         p_sector.uptodate = 1;
    2453             : 
    2454           0 :         if (has_qstripe) {
    2455             :                 /* RAID6, allocate and map temp space for the Q stripe */
    2456           0 :                 q_sector.page = alloc_page(GFP_NOFS);
    2457           0 :                 if (!q_sector.page) {
    2458           0 :                         __free_page(p_sector.page);
    2459           0 :                         p_sector.page = NULL;
    2460           0 :                         return -ENOMEM;
    2461             :                 }
    2462           0 :                 q_sector.pgoff = 0;
    2463           0 :                 q_sector.uptodate = 1;
    2464           0 :                 pointers[rbio->real_stripes - 1] = kmap_local_page(q_sector.page);
    2465             :         }
    2466             : 
    2467           0 :         bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors);
    2468             : 
    2469             :         /* Map the parity stripe just once */
    2470           0 :         pointers[nr_data] = kmap_local_page(p_sector.page);
    2471             : 
    2472           0 :         for_each_set_bit(sectornr, &rbio->dbitmap, rbio->stripe_nsectors) {
    2473             :                 struct sector_ptr *sector;
    2474             :                 void *parity;
    2475             : 
    2476             :                 /* first collect one page from each data stripe */
    2477           0 :                 for (stripe = 0; stripe < nr_data; stripe++) {
    2478           0 :                         sector = sector_in_rbio(rbio, stripe, sectornr, 0);
    2479           0 :                         pointers[stripe] = kmap_local_page(sector->page) +
    2480           0 :                                            sector->pgoff;
    2481             :                 }
    2482             : 
    2483           0 :                 if (has_qstripe) {
    2484             :                         /* RAID6, call the library function to fill in our P/Q */
    2485           0 :                         raid6_call.gen_syndrome(rbio->real_stripes, sectorsize,
    2486             :                                                 pointers);
    2487             :                 } else {
    2488             :                         /* raid5 */
    2489           0 :                         memcpy(pointers[nr_data], pointers[0], sectorsize);
    2490           0 :                         run_xor(pointers + 1, nr_data - 1, sectorsize);
    2491             :                 }
    2492             : 
    2493             :                 /* Check scrubbing parity and repair it */
    2494           0 :                 sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr);
    2495           0 :                 parity = kmap_local_page(sector->page) + sector->pgoff;
    2496           0 :                 if (memcmp(parity, pointers[rbio->scrubp], sectorsize) != 0)
    2497           0 :                         memcpy(parity, pointers[rbio->scrubp], sectorsize);
    2498             :                 else
    2499             :                         /* Parity is right, needn't writeback */
    2500           0 :                         bitmap_clear(&rbio->dbitmap, sectornr, 1);
    2501           0 :                 kunmap_local(parity);
    2502             : 
    2503           0 :                 for (stripe = nr_data - 1; stripe >= 0; stripe--)
    2504             :                         kunmap_local(pointers[stripe]);
    2505             :         }
    2506             : 
    2507           0 :         kunmap_local(pointers[nr_data]);
    2508           0 :         __free_page(p_sector.page);
    2509           0 :         p_sector.page = NULL;
    2510           0 :         if (q_sector.page) {
    2511           0 :                 kunmap_local(pointers[rbio->real_stripes - 1]);
    2512           0 :                 __free_page(q_sector.page);
    2513           0 :                 q_sector.page = NULL;
    2514             :         }
    2515             : 
    2516             :         /*
    2517             :          * time to start writing.  Make bios for everything from the
    2518             :          * higher layers (the bio_list in our rbio) and our p/q.  Ignore
    2519             :          * everything else.
    2520             :          */
    2521           0 :         for_each_set_bit(sectornr, &rbio->dbitmap, rbio->stripe_nsectors) {
    2522           0 :                 struct sector_ptr *sector;
    2523             : 
    2524           0 :                 sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr);
    2525           0 :                 ret = rbio_add_io_sector(rbio, &bio_list, sector, rbio->scrubp,
    2526             :                                          sectornr, REQ_OP_WRITE);
    2527           0 :                 if (ret)
    2528           0 :                         goto cleanup;
    2529             :         }
    2530             : 
    2531           0 :         if (!is_replace)
    2532           0 :                 goto submit_write;
    2533             : 
    2534             :         /*
    2535             :          * Replace is running and our parity stripe needs to be duplicated to
    2536             :          * the target device.  Check we have a valid source stripe number.
    2537             :          */
    2538             :         ASSERT(rbio->bioc->replace_stripe_src >= 0);
    2539           0 :         for_each_set_bit(sectornr, pbitmap, rbio->stripe_nsectors) {
    2540           0 :                 struct sector_ptr *sector;
    2541             : 
    2542           0 :                 sector = rbio_stripe_sector(rbio, rbio->scrubp, sectornr);
    2543           0 :                 ret = rbio_add_io_sector(rbio, &bio_list, sector,
    2544           0 :                                          rbio->real_stripes,
    2545             :                                          sectornr, REQ_OP_WRITE);
    2546           0 :                 if (ret)
    2547           0 :                         goto cleanup;
    2548             :         }
    2549             : 
    2550           0 : submit_write:
    2551           0 :         submit_write_bios(rbio, &bio_list);
    2552           0 :         return 0;
    2553             : 
    2554           0 : cleanup:
    2555           0 :         bio_list_put(&bio_list);
    2556           0 :         return ret;
    2557             : }
    2558             : 
    2559             : static inline int is_data_stripe(struct btrfs_raid_bio *rbio, int stripe)
    2560             : {
    2561           0 :         if (stripe >= 0 && stripe < rbio->nr_data)
    2562           0 :                 return 1;
    2563             :         return 0;
    2564             : }
    2565             : 
    2566           0 : static int recover_scrub_rbio(struct btrfs_raid_bio *rbio)
    2567             : {
    2568           0 :         void **pointers = NULL;
    2569           0 :         void **unmap_array = NULL;
    2570           0 :         int sector_nr;
    2571           0 :         int ret = 0;
    2572             : 
    2573             :         /*
    2574             :          * @pointers array stores the pointer for each sector.
    2575             :          *
    2576             :          * @unmap_array stores copy of pointers that does not get reordered
    2577             :          * during reconstruction so that kunmap_local works.
    2578             :          */
    2579           0 :         pointers = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS);
    2580           0 :         unmap_array = kcalloc(rbio->real_stripes, sizeof(void *), GFP_NOFS);
    2581           0 :         if (!pointers || !unmap_array) {
    2582           0 :                 ret = -ENOMEM;
    2583           0 :                 goto out;
    2584             :         }
    2585             : 
    2586           0 :         for (sector_nr = 0; sector_nr < rbio->stripe_nsectors; sector_nr++) {
    2587           0 :                 int dfail = 0, failp = -1;
    2588           0 :                 int faila;
    2589           0 :                 int failb;
    2590           0 :                 int found_errors;
    2591             : 
    2592           0 :                 found_errors = get_rbio_veritical_errors(rbio, sector_nr,
    2593             :                                                          &faila, &failb);
    2594           0 :                 if (found_errors > rbio->bioc->max_errors) {
    2595           0 :                         ret = -EIO;
    2596           0 :                         goto out;
    2597             :                 }
    2598           0 :                 if (found_errors == 0)
    2599           0 :                         continue;
    2600             : 
    2601             :                 /* We should have at least one error here. */
    2602           0 :                 ASSERT(faila >= 0 || failb >= 0);
    2603             : 
    2604           0 :                 if (is_data_stripe(rbio, faila))
    2605             :                         dfail++;
    2606           0 :                 else if (is_parity_stripe(faila))
    2607             :                         failp = faila;
    2608             : 
    2609           0 :                 if (is_data_stripe(rbio, failb))
    2610           0 :                         dfail++;
    2611           0 :                 else if (is_parity_stripe(failb))
    2612             :                         failp = failb;
    2613             :                 /*
    2614             :                  * Because we can not use a scrubbing parity to repair the
    2615             :                  * data, so the capability of the repair is declined.  (In the
    2616             :                  * case of RAID5, we can not repair anything.)
    2617             :                  */
    2618           0 :                 if (dfail > rbio->bioc->max_errors - 1) {
    2619           0 :                         ret = -EIO;
    2620           0 :                         goto out;
    2621             :                 }
    2622             :                 /*
    2623             :                  * If all data is good, only parity is correctly, just repair
    2624             :                  * the parity, no need to recover data stripes.
    2625             :                  */
    2626           0 :                 if (dfail == 0)
    2627           0 :                         continue;
    2628             : 
    2629             :                 /*
    2630             :                  * Here means we got one corrupted data stripe and one
    2631             :                  * corrupted parity on RAID6, if the corrupted parity is
    2632             :                  * scrubbing parity, luckily, use the other one to repair the
    2633             :                  * data, or we can not repair the data stripe.
    2634             :                  */
    2635           0 :                 if (failp != rbio->scrubp) {
    2636           0 :                         ret = -EIO;
    2637           0 :                         goto out;
    2638             :                 }
    2639             : 
    2640             :                 ret = recover_vertical(rbio, sector_nr, pointers, unmap_array);
    2641             :                 if (ret < 0)
    2642             :                         goto out;
    2643             :         }
    2644           0 : out:
    2645           0 :         kfree(pointers);
    2646           0 :         kfree(unmap_array);
    2647           0 :         return ret;
    2648             : }
    2649             : 
    2650           0 : static int scrub_assemble_read_bios(struct btrfs_raid_bio *rbio)
    2651             : {
    2652           0 :         struct bio_list bio_list = BIO_EMPTY_LIST;
    2653           0 :         int total_sector_nr;
    2654           0 :         int ret = 0;
    2655             : 
    2656             :         /* Build a list of bios to read all the missing parts. */
    2657           0 :         for (total_sector_nr = 0; total_sector_nr < rbio->nr_sectors;
    2658           0 :              total_sector_nr++) {
    2659           0 :                 int sectornr = total_sector_nr % rbio->stripe_nsectors;
    2660           0 :                 int stripe = total_sector_nr / rbio->stripe_nsectors;
    2661           0 :                 struct sector_ptr *sector;
    2662             : 
    2663             :                 /* No data in the vertical stripe, no need to read. */
    2664           0 :                 if (!test_bit(sectornr, &rbio->dbitmap))
    2665           0 :                         continue;
    2666             : 
    2667             :                 /*
    2668             :                  * We want to find all the sectors missing from the rbio and
    2669             :                  * read them from the disk. If sector_in_rbio() finds a sector
    2670             :                  * in the bio list we don't need to read it off the stripe.
    2671             :                  */
    2672           0 :                 sector = sector_in_rbio(rbio, stripe, sectornr, 1);
    2673           0 :                 if (sector)
    2674           0 :                         continue;
    2675             : 
    2676           0 :                 sector = rbio_stripe_sector(rbio, stripe, sectornr);
    2677             :                 /*
    2678             :                  * The bio cache may have handed us an uptodate sector.  If so,
    2679             :                  * use it.
    2680             :                  */
    2681           0 :                 if (sector->uptodate)
    2682           0 :                         continue;
    2683             : 
    2684           0 :                 ret = rbio_add_io_sector(rbio, &bio_list, sector, stripe,
    2685             :                                          sectornr, REQ_OP_READ);
    2686           0 :                 if (ret) {
    2687           0 :                         bio_list_put(&bio_list);
    2688           0 :                         return ret;
    2689             :                 }
    2690             :         }
    2691             : 
    2692           0 :         submit_read_wait_bio_list(rbio, &bio_list);
    2693           0 :         return 0;
    2694             : }
    2695             : 
    2696           0 : static void scrub_rbio(struct btrfs_raid_bio *rbio)
    2697             : {
    2698           0 :         int sector_nr;
    2699           0 :         int ret;
    2700             : 
    2701           0 :         ret = alloc_rbio_essential_pages(rbio);
    2702           0 :         if (ret)
    2703           0 :                 goto out;
    2704             : 
    2705           0 :         bitmap_clear(rbio->error_bitmap, 0, rbio->nr_sectors);
    2706             : 
    2707           0 :         ret = scrub_assemble_read_bios(rbio);
    2708           0 :         if (ret < 0)
    2709           0 :                 goto out;
    2710             : 
    2711             :         /* We may have some failures, recover the failed sectors first. */
    2712           0 :         ret = recover_scrub_rbio(rbio);
    2713           0 :         if (ret < 0)
    2714           0 :                 goto out;
    2715             : 
    2716             :         /*
    2717             :          * We have every sector properly prepared. Can finish the scrub
    2718             :          * and writeback the good content.
    2719             :          */
    2720           0 :         ret = finish_parity_scrub(rbio);
    2721           0 :         wait_event(rbio->io_wait, atomic_read(&rbio->stripes_pending) == 0);
    2722           0 :         for (sector_nr = 0; sector_nr < rbio->stripe_nsectors; sector_nr++) {
    2723           0 :                 int found_errors;
    2724             : 
    2725           0 :                 found_errors = get_rbio_veritical_errors(rbio, sector_nr, NULL, NULL);
    2726           0 :                 if (found_errors > rbio->bioc->max_errors) {
    2727             :                         ret = -EIO;
    2728             :                         break;
    2729             :                 }
    2730             :         }
    2731           0 : out:
    2732           0 :         rbio_orig_end_io(rbio, errno_to_blk_status(ret));
    2733           0 : }
    2734             : 
    2735           0 : static void scrub_rbio_work_locked(struct work_struct *work)
    2736             : {
    2737           0 :         scrub_rbio(container_of(work, struct btrfs_raid_bio, work));
    2738           0 : }
    2739             : 
    2740           0 : void raid56_parity_submit_scrub_rbio(struct btrfs_raid_bio *rbio)
    2741             : {
    2742           0 :         if (!lock_stripe_add(rbio))
    2743           0 :                 start_async_work(rbio, scrub_rbio_work_locked);
    2744           0 : }
    2745             : 
    2746             : /*
    2747             :  * This is for scrub call sites where we already have correct data contents.
    2748             :  * This allows us to avoid reading data stripes again.
    2749             :  *
    2750             :  * Unfortunately here we have to do page copy, other than reusing the pages.
    2751             :  * This is due to the fact rbio has its own page management for its cache.
    2752             :  */
    2753           0 : void raid56_parity_cache_data_pages(struct btrfs_raid_bio *rbio,
    2754             :                                     struct page **data_pages, u64 data_logical)
    2755             : {
    2756           0 :         const u64 offset_in_full_stripe = data_logical -
    2757           0 :                                           rbio->bioc->full_stripe_logical;
    2758           0 :         const int page_index = offset_in_full_stripe >> PAGE_SHIFT;
    2759           0 :         const u32 sectorsize = rbio->bioc->fs_info->sectorsize;
    2760           0 :         const u32 sectors_per_page = PAGE_SIZE / sectorsize;
    2761           0 :         int ret;
    2762             : 
    2763             :         /*
    2764             :          * If we hit ENOMEM temporarily, but later at
    2765             :          * raid56_parity_submit_scrub_rbio() time it succeeded, we just do
    2766             :          * the extra read, not a big deal.
    2767             :          *
    2768             :          * If we hit ENOMEM later at raid56_parity_submit_scrub_rbio() time,
    2769             :          * the bio would got proper error number set.
    2770             :          */
    2771           0 :         ret = alloc_rbio_data_pages(rbio);
    2772           0 :         if (ret < 0)
    2773             :                 return;
    2774             : 
    2775             :         /* data_logical must be at stripe boundary and inside the full stripe. */
    2776             :         ASSERT(IS_ALIGNED(offset_in_full_stripe, BTRFS_STRIPE_LEN));
    2777             :         ASSERT(offset_in_full_stripe < (rbio->nr_data << BTRFS_STRIPE_LEN_SHIFT));
    2778             : 
    2779           0 :         for (int page_nr = 0; page_nr < (BTRFS_STRIPE_LEN >> PAGE_SHIFT); page_nr++) {
    2780           0 :                 struct page *dst = rbio->stripe_pages[page_nr + page_index];
    2781           0 :                 struct page *src = data_pages[page_nr];
    2782             : 
    2783           0 :                 memcpy_page(dst, 0, src, 0, PAGE_SIZE);
    2784           0 :                 for (int sector_nr = sectors_per_page * page_index;
    2785           0 :                      sector_nr < sectors_per_page * (page_index + 1);
    2786           0 :                      sector_nr++)
    2787           0 :                         rbio->stripe_sectors[sector_nr].uptodate = true;
    2788             :         }
    2789             : }

Generated by: LCOV version 1.14