OSDN Git Service

[PATCH] fix oops when starting md multipath 2.4 kernel
authorLars Marowsky-Bree <lmb@suse.de>
Sat, 6 Aug 2005 22:26:00 +0000 (19:26 -0300)
committerMarcelo Tosatti <marcelo@dmt.cnet>
Sat, 6 Aug 2005 22:26:00 +0000 (19:26 -0300)
The device major/minor numbers no longer match up values recorded in the
descriptor array in the md superblock. Because of the exception made in
the current code, the descriptor entries are removed and although the
real devices are present and accounted for, they are kicked out from
the array. This leaves the array with zero devices. When multipath_run()
is invoked, it blows up expecting to have had some disks.

Lars Marowsky-Bree suggested some patches for md multipath in 2002 but
never made it to mainline 2.4 kernel:

http://marc.theaimsgroup.com/?l=3Dlinux-kernel&m=3D103355467608953&w=3D=2
That patch is large and most of it is not requried for this particular
problem.  The section that reinitializes the descriptor array from
current rdevs for the case of multipath will resolve this issue of
device names shift.

drivers/md/md.c

index 931fb87..a285ece 100644 (file)
@@ -1271,148 +1271,164 @@ static int analyze_sbs(mddev_t * mddev)
        memcpy (sb, freshest->sb, sizeof(*sb));
 
        /*
-        * at this point we have picked the 'best' superblock
-        * from all available superblocks.
-        * now we validate this superblock and kick out possibly
-        * failed disks.
+        * For multipathing, lots of things are different from "true"
+        * RAIDs.
+        * All rdev's could be read, so they are no longer faulty.
+        * As there is just one sb, trying to find changed devices via the
+        * this_disk pointer is useless too.
+        *
+        * lmb@suse.de, 2002-09-12
         */
-       ITERATE_RDEV(mddev,rdev,tmp) {
-               /*
-                * Kick all non-fresh devices
-                */
-               __u64 ev1, ev2;
-               ev1 = md_event(rdev->sb);
-               ev2 = md_event(sb);
-               ++ev1;
-               if (ev1 < ev2) {
-                       printk(KERN_WARNING "md: kicking non-fresh %s from array!\n",
-                                               partition_name(rdev->dev));
-                       kick_rdev_from_array(rdev);
-                       continue;
-               }
-       }
 
-       /*
-        * Fix up changed device names ... but only if this disk has a
-        * recent update time. Use faulty checksum ones too.
-        */
-       if (mddev->sb->level != -4)
-       ITERATE_RDEV(mddev,rdev,tmp) {
-               __u64 ev1, ev2, ev3;
-               if (rdev->faulty || rdev->alias_device) {
-                       MD_BUG();
-                       goto abort;
-               }
-               ev1 = md_event(rdev->sb);
-               ev2 = md_event(sb);
-               ev3 = ev2;
-               --ev3;
-               if ((rdev->dev != rdev->old_dev) &&
-                       ((ev1 == ev2) || (ev1 == ev3))) {
+       if (sb->level == -4) {
+               int desc_nr = 0;
+
+               /* ... and initialize from the current rdevs instead */
+               ITERATE_RDEV(mddev,rdev,tmp) {
                        mdp_disk_t *desc;
 
-                       printk(KERN_WARNING "md: device name has changed from %s to %s since last import!\n",
-                              partition_name(rdev->old_dev), partition_name(rdev->dev));
-                       if (rdev->desc_nr == -1) {
-                               MD_BUG();
-                               goto abort;
-                       }
+                       rdev->desc_nr=desc_nr;
+
                        desc = &sb->disks[rdev->desc_nr];
-                       if (rdev->old_dev != MKDEV(desc->major, desc->minor)) {
-                               MD_BUG();
-                               goto abort;
-                       }
-                       desc->major = MAJOR(rdev->dev);
-                       desc->minor = MINOR(rdev->dev);
-                       desc = &rdev->sb->this_disk;
+
+                       desc->number = desc_nr;
                        desc->major = MAJOR(rdev->dev);
                        desc->minor = MINOR(rdev->dev);
-               }
-       }
+                       desc->raid_disk = desc_nr;
 
-       /*
-        * Remove unavailable and faulty devices ...
-        *
-        * note that if an array becomes completely unrunnable due to
-        * missing devices, we do not write the superblock back, so the
-        * administrator has a chance to fix things up. The removal thus
-        * only happens if it's nonfatal to the contents of the array.
-        */
-       for (i = 0; i < MD_SB_DISKS; i++) {
-               int found;
-               mdp_disk_t *desc;
-               kdev_t dev;
+                       /* We could read from it, so it isn't faulty
+                        * any longer */
+                       if (disk_faulty(desc))
+                               mark_disk_spare(desc);
 
-               desc = sb->disks + i;
-               dev = MKDEV(desc->major, desc->minor);
+                       memcpy(&rdev->sb->this_disk,desc,sizeof(*desc));
+
+                       desc_nr++;
+               }
 
+               /* Kick out all old info about disks we used to have,
+                * if any */
+               for (i = desc_nr; i < MD_SB_DISKS; i++)
+                       memset(&(sb->disks[i]),0,sizeof(mdp_disk_t));
+       } else {
                /*
-                * We kick faulty devices/descriptors immediately.
-                *
-                * Note: multipath devices are a special case.  Since we
-                * were able to read the superblock on the path, we don't
-                * care if it was previously marked as faulty, it's up now
-                * so enable it.
+                * at this point we have picked the 'best' superblock
+                * from all available superblocks.
+                * now we validate this superblock and kick out possibly
+                * failed disks.
                 */
-               if (disk_faulty(desc) && mddev->sb->level != -4) {
-                       found = 0;
-                       ITERATE_RDEV(mddev,rdev,tmp) {
-                               if (rdev->desc_nr != desc->number)
-                                       continue;
-                               printk(KERN_WARNING "md%d: kicking faulty %s!\n",
-                                       mdidx(mddev),partition_name(rdev->dev));
-                               kick_rdev_from_array(rdev);
-                               found = 1;
-                               break;
-                       }
-                       if (!found) {
-                               if (dev == MKDEV(0,0))
-                                       continue;
-                               printk(KERN_WARNING "md%d: removing former faulty %s!\n",
-                                       mdidx(mddev), partition_name(dev));
-                       }
-                       remove_descriptor(desc, sb);
-                       continue;
-               } else if (disk_faulty(desc)) {
+               ITERATE_RDEV(mddev,rdev,tmp) {
                        /*
-                        * multipath entry marked as faulty, unfaulty it
+                        * Kick all non-fresh devices
                         */
-                       rdev = find_rdev(mddev, dev);
-                       if(rdev)
-                               mark_disk_spare(desc);
-                       else
-                               remove_descriptor(desc, sb);
+                       __u64 ev1, ev2;
+                       ev1 = md_event(rdev->sb);
+                       ev2 = md_event(sb);
+                       ++ev1;
+                       if (ev1 < ev2) {
+                               printk(KERN_WARNING "md: kicking non-fresh %s from array!\n",
+                                                       partition_name(rdev->dev));
+                               kick_rdev_from_array(rdev);
+                               continue;
+                       }
                }
 
-               if (dev == MKDEV(0,0))
-                       continue;
                /*
-                * Is this device present in the rdev ring?
+                * Fix up changed device names ... but only if this disk has a
+                * recent update time. Use faulty checksum ones too.
                 */
-               found = 0;
                ITERATE_RDEV(mddev,rdev,tmp) {
+                       __u64 ev1, ev2, ev3;
+                       if (rdev->faulty || rdev->alias_device) {
+                               MD_BUG();
+                               goto abort;
+                       }
+                       ev1 = md_event(rdev->sb);
+                       ev2 = md_event(sb);
+                       ev3 = ev2;
+                       --ev3;
+                       if ((rdev->dev != rdev->old_dev) &&
+                               ((ev1 == ev2) || (ev1 == ev3))) {
+                               mdp_disk_t *desc;
+
+                               printk(KERN_WARNING "md: device name has changed from %s to %s since last import!\n",
+                                      partition_name(rdev->old_dev), partition_name(rdev->dev));
+                               if (rdev->desc_nr == -1) {
+                                       MD_BUG();
+                                       goto abort;
+                               }
+                               desc = &sb->disks[rdev->desc_nr];
+                               if (rdev->old_dev != MKDEV(desc->major, desc->minor)) {
+                                       MD_BUG();
+                                       goto abort;
+                               }
+                               desc->major = MAJOR(rdev->dev);
+                               desc->minor = MINOR(rdev->dev);
+                               desc = &rdev->sb->this_disk;
+                               desc->major = MAJOR(rdev->dev);
+                               desc->minor = MINOR(rdev->dev);
+                       }
+               }
+
+               /*
+                * Remove unavailable and faulty devices ...
+                *
+                * note that if an array becomes completely unrunnable due to
+                * missing devices, we do not write the superblock back, so the
+                * administrator has a chance to fix things up. The removal thus
+                * only happens if it's nonfatal to the contents of the array.
+                */
+               for (i = 0; i < MD_SB_DISKS; i++) {
+                       int found;
+                       mdp_disk_t *desc;
+                       kdev_t dev;
+
+                       desc = sb->disks + i;
+                       dev = MKDEV(desc->major, desc->minor);
+
                        /*
-                        * Multi-path IO special-case: since we have no
-                        * this_disk descriptor at auto-detect time,
-                        * we cannot check rdev->number.
-                        * We can check the device though.
+                        * We kick faulty devices/descriptors immediately.
                         */
-                       if ((sb->level == -4) && (rdev->dev ==
-                                       MKDEV(desc->major,desc->minor))) {
-                               found = 1;
-                               break;
+                       if (disk_faulty(desc)) {
+                               found = 0;
+                               ITERATE_RDEV(mddev,rdev,tmp) {
+                                       if (rdev->desc_nr != desc->number)
+                                               continue;
+                                       printk(KERN_WARNING "md%d: kicking faulty %s!\n",
+                                               mdidx(mddev),partition_name(rdev->dev));
+                                       kick_rdev_from_array(rdev);
+                                       found = 1;
+                                       break;
+                               }
+                               if (!found) {
+                                       if (dev == MKDEV(0,0))
+                                               continue;
+                                       printk(KERN_WARNING "md%d: removing former faulty %s!\n",
+                                               mdidx(mddev), partition_name(dev));
+                               }
+                               remove_descriptor(desc, sb);
+                               continue;
                        }
-                       if (rdev->desc_nr == desc->number) {
-                               found = 1;
-                               break;
+
+                       if (dev == MKDEV(0,0))
+                               continue;
+                       /*
+                        * Is this device present in the rdev ring?
+                        */
+                       found = 0;
+                       ITERATE_RDEV(mddev,rdev,tmp) {
+                               if (rdev->desc_nr == desc->number) {
+                                       found = 1;
+                                       break;
+                               }
                        }
-               }
-               if (found)
-                       continue;
+                       if (found)
+                               continue;
 
-               printk(KERN_WARNING "md%d: former device %s is unavailable, removing from array!\n",
-                      mdidx(mddev), partition_name(dev));
-               remove_descriptor(desc, sb);
+                       printk(KERN_WARNING "md%d: former device %s is unavailable, removing from array!\n",
+                              mdidx(mddev), partition_name(dev));
+                       remove_descriptor(desc, sb);
+               }
        }
 
        /*