OSDN Git Service

Merge git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc
authorLinus Torvalds <torvalds@g5.osdl.org>
Wed, 29 Mar 2006 19:28:30 +0000 (11:28 -0800)
committerLinus Torvalds <torvalds@g5.osdl.org>
Wed, 29 Mar 2006 19:28:30 +0000 (11:28 -0800)
* git://git.kernel.org/pub/scm/linux/kernel/git/paulus/powerpc: (67 commits)
  [PATCH] powerpc: Remove oprofile spinlock backtrace code
  [PATCH] powerpc: Add oprofile calltrace support to all powerpc cpus
  [PATCH] powerpc: Add oprofile calltrace support
  [PATCH] for_each_possible_cpu: ppc
  [PATCH] for_each_possible_cpu: powerpc
  [PATCH] lock PTE before updating it in 440/BookE page fault handler
  [PATCH] powerpc: Kill _machine and hard-coded platform numbers
  ppc: Fix compile error in arch/ppc/lib/strcase.c
  [PATCH] git-powerpc: WARN was a dumb idea
  [PATCH] powerpc: a couple of trivial compile warning fixes
  powerpc: remove OCP references
  powerpc: Make uImage default build output for MPC8540 ADS
  powerpc: move math-emu over to arch/powerpc
  powerpc: use memparse() for mem= command line parsing
  ppc: fix strncasecmp prototype
  [PATCH] powerpc: make ISA floppies work again
  [PATCH] powerpc: Fix some initcall return values
  [PATCH] powerpc: Workaround for pSeries RTAS bug
  [PATCH] spufs: fix __init/__exit annotations
  [PATCH] powerpc: add hvc backend for rtas
  ...

71 files changed:
arch/mips/kernel/irixsig.c
arch/um/kernel/smp.c
drivers/char/tty_io.c
drivers/ieee1394/dv1394.c
drivers/ieee1394/ieee1394_core.c
drivers/ieee1394/ieee1394_core.h
drivers/ieee1394/ohci1394.c
drivers/ieee1394/raw1394.c
drivers/ieee1394/sbp2.c
drivers/ieee1394/video1394.c
fs/direct-io.c
fs/exec.c
fs/xfs/linux-2.6/mrlock.h
fs/xfs/linux-2.6/xfs_aops.c
fs/xfs/linux-2.6/xfs_aops.h
fs/xfs/linux-2.6/xfs_export.h
fs/xfs/linux-2.6/xfs_ioctl32.c
fs/xfs/linux-2.6/xfs_iops.c
fs/xfs/linux-2.6/xfs_lrw.c
fs/xfs/linux-2.6/xfs_vfs.h
fs/xfs/quota/xfs_dquot_item.c
fs/xfs/quota/xfs_qm.c
fs/xfs/quota/xfs_qm_syscalls.c
fs/xfs/quota/xfs_trans_dquot.c
fs/xfs/xfs_acl.c
fs/xfs/xfs_ag.h
fs/xfs/xfs_alloc.c
fs/xfs/xfs_alloc.h
fs/xfs/xfs_attr.c
fs/xfs/xfs_attr_leaf.c
fs/xfs/xfs_behavior.c
fs/xfs/xfs_behavior.h
fs/xfs/xfs_bmap.c
fs/xfs/xfs_bmap.h
fs/xfs/xfs_buf_item.c
fs/xfs/xfs_cap.h
fs/xfs/xfs_da_btree.c
fs/xfs/xfs_dir2_block.c
fs/xfs/xfs_dir2_leaf.c
fs/xfs/xfs_dir2_node.c
fs/xfs/xfs_dir_leaf.c
fs/xfs/xfs_fsops.c
fs/xfs/xfs_ialloc.c
fs/xfs/xfs_iget.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode_item.c
fs/xfs/xfs_itable.c
fs/xfs/xfs_itable.h
fs/xfs/xfs_log.c
fs/xfs/xfs_log.h
fs/xfs/xfs_log_recover.c
fs/xfs/xfs_mount.c
fs/xfs/xfs_mount.h
fs/xfs/xfs_quota.h
fs/xfs/xfs_trans.c
fs/xfs/xfs_trans.h
fs/xfs/xfs_trans_inode.c
fs/xfs/xfs_vfsops.c
fs/xfs/xfs_vnodeops.c
include/linux/init_task.h
include/linux/pid.h
include/linux/sched.h
include/linux/signal.h
include/linux/slab.h
kernel/exit.c
kernel/fork.c
kernel/kmod.c
kernel/pid.c
kernel/ptrace.c
kernel/signal.c
kernel/sys.c

index 08273a2..8150f07 100644 (file)
@@ -603,7 +603,7 @@ repeat:
                        /* move to end of parent's list to avoid starvation */
                        write_lock_irq(&tasklist_lock);
                        remove_parent(p);
-                       add_parent(p, p->parent);
+                       add_parent(p);
                        write_unlock_irq(&tasklist_lock);
                        retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
                        if (retval)
@@ -643,7 +643,7 @@ repeat:
                                write_lock_irq(&tasklist_lock);
                                remove_parent(p);
                                p->parent = p->real_parent;
-                               add_parent(p, p->parent);
+                               add_parent(p);
                                do_notify_parent(p, SIGCHLD);
                                write_unlock_irq(&tasklist_lock);
                        } else
index c8d8d0a..511116a 100644 (file)
@@ -143,7 +143,6 @@ void smp_prepare_cpus(unsigned int maxcpus)
                idle = idle_thread(cpu);
 
                init_idle(idle, cpu);
-               unhash_process(idle);
 
                waittime = 200000000;
                while (waittime-- && !cpu_isset(cpu, cpu_callin_map))
index 811dadb..0bfd1b6 100644 (file)
@@ -1094,8 +1094,8 @@ static void do_tty_hangup(void *data)
                                p->signal->tty = NULL;
                        if (!p->signal->leader)
                                continue;
-                       send_group_sig_info(SIGHUP, SEND_SIG_PRIV, p);
-                       send_group_sig_info(SIGCONT, SEND_SIG_PRIV, p);
+                       group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p);
+                       group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p);
                        if (tty->pgrp > 0)
                                p->signal->tty_old_pgrp = tty->pgrp;
                } while_each_task_pid(tty->session, PIDTYPE_SID, p);
@@ -2672,7 +2672,7 @@ static void __do_SAK(void *arg)
        tty_hangup(tty);
 #else
        struct tty_struct *tty = arg;
-       struct task_struct *p;
+       struct task_struct *g, *p;
        int session;
        int             i;
        struct file     *filp;
@@ -2693,8 +2693,18 @@ static void __do_SAK(void *arg)
                tty->driver->flush_buffer(tty);
        
        read_lock(&tasklist_lock);
+       /* Kill the entire session */
        do_each_task_pid(session, PIDTYPE_SID, p) {
-               if (p->signal->tty == tty || session > 0) {
+               printk(KERN_NOTICE "SAK: killed process %d"
+                       " (%s): p->signal->session==tty->session\n",
+                       p->pid, p->comm);
+               send_sig(SIGKILL, p, 1);
+       } while_each_task_pid(session, PIDTYPE_SID, p);
+       /* Now kill any processes that happen to have the
+        * tty open.
+        */
+       do_each_thread(g, p) {
+               if (p->signal->tty == tty) {
                        printk(KERN_NOTICE "SAK: killed process %d"
                            " (%s): p->signal->session==tty->session\n",
                            p->pid, p->comm);
@@ -2721,7 +2731,7 @@ static void __do_SAK(void *arg)
                        rcu_read_unlock();
                }
                task_unlock(p);
-       } while_each_task_pid(session, PIDTYPE_SID, p);
+       } while_each_thread(g, p);
        read_unlock(&tasklist_lock);
 #endif
 }
index efeaa94..85c2d4c 100644 (file)
@@ -73,7 +73,7 @@
   - fix all XXX showstoppers
   - disable IR/IT DMA interrupts on shutdown
   - flush pci writes to the card by issuing a read
-  - devfs and character device dispatching (* needs testing with Linux 2.2.x)
+  - character device dispatching
   - switch over to the new kernel DMA API (pci_map_*()) (* needs testing on platforms with IOMMU!)
   - keep all video_cards in a list (for open() via chardev), set file->private_data = video
   - dv1394_poll should indicate POLLIN when receiving buffers are available
@@ -1096,7 +1096,6 @@ static int do_dv1394_init_default(struct video_card *video)
 
        init.api_version = DV1394_API_VERSION;
        init.n_frames = DV1394_MAX_FRAMES / 4;
-       /* the following are now set via devfs */
        init.channel = video->channel;
        init.format = video->pal_or_ntsc;
        init.cip_n = video->cip_n;
@@ -1791,8 +1790,6 @@ static int dv1394_open(struct inode *inode, struct file *file)
 {
        struct video_card *video = NULL;
 
-       /* if the device was opened through devfs, then file->private_data
-          has already been set to video by devfs */
        if (file->private_data) {
                video = (struct video_card*) file->private_data;
 
@@ -2211,7 +2208,7 @@ static int dv1394_init(struct ti_ohci *ohci, enum pal_or_ntsc format, enum modes
        video = kzalloc(sizeof(*video), GFP_KERNEL);
        if (!video) {
                printk(KERN_ERR "dv1394: cannot allocate video_card\n");
-               goto err;
+               return -1;
        }
 
        video->ohci = ohci;
@@ -2266,37 +2263,14 @@ static int dv1394_init(struct ti_ohci *ohci, enum pal_or_ntsc format, enum modes
        list_add_tail(&video->list, &dv1394_cards);
        spin_unlock_irqrestore(&dv1394_cards_lock, flags);
 
-       if (devfs_mk_cdev(MKDEV(IEEE1394_MAJOR,
-                               IEEE1394_MINOR_BLOCK_DV1394*16 + video->id),
-                       S_IFCHR|S_IRUGO|S_IWUGO,
-                        "ieee1394/dv/host%d/%s/%s",
-                        (video->id>>2),
-                        (video->pal_or_ntsc == DV1394_NTSC ? "NTSC" : "PAL"),
-                        (video->mode == MODE_RECEIVE ? "in" : "out")) < 0)
-                       goto err_free;
-
        debug_printk("dv1394: dv1394_init() OK on ID %d\n", video->id);
-
        return 0;
-
- err_free:
-       kfree(video);
- err:
-       return -1;
 }
 
 static void dv1394_un_init(struct video_card *video)
 {
-       char buf[32];
-
        /* obviously nobody has the driver open at this point */
        do_dv1394_shutdown(video, 1);
-       snprintf(buf, sizeof(buf), "dv/host%d/%s/%s", (video->id >> 2),
-               (video->pal_or_ntsc == DV1394_NTSC ? "NTSC" : "PAL"),
-               (video->mode == MODE_RECEIVE ? "in" : "out")
-               );
-
-       devfs_remove("ieee1394/%s", buf);
        kfree(video);
 }
 
@@ -2333,9 +2307,6 @@ static void dv1394_remove_host (struct hpsb_host *host)
 
        class_device_destroy(hpsb_protocol_class,
                MKDEV(IEEE1394_MAJOR, IEEE1394_MINOR_BLOCK_DV1394 * 16 + (id<<2)));
-       devfs_remove("ieee1394/dv/host%d/NTSC", id);
-       devfs_remove("ieee1394/dv/host%d/PAL", id);
-       devfs_remove("ieee1394/dv/host%d", id);
 }
 
 static void dv1394_add_host (struct hpsb_host *host)
@@ -2352,9 +2323,6 @@ static void dv1394_add_host (struct hpsb_host *host)
        class_device_create(hpsb_protocol_class, NULL, MKDEV(
                IEEE1394_MAJOR, IEEE1394_MINOR_BLOCK_DV1394 * 16 + (id<<2)), 
                NULL, "dv1394-%d", id);
-       devfs_mk_dir("ieee1394/dv/host%d", id);
-       devfs_mk_dir("ieee1394/dv/host%d/NTSC", id);
-       devfs_mk_dir("ieee1394/dv/host%d/PAL", id);
 
        dv1394_init(ohci, DV1394_NTSC, MODE_RECEIVE);
        dv1394_init(ohci, DV1394_NTSC, MODE_TRANSMIT);
@@ -2611,10 +2579,8 @@ MODULE_LICENSE("GPL");
 static void __exit dv1394_exit_module(void)
 {
        hpsb_unregister_protocol(&dv1394_driver);
-
        hpsb_unregister_highlevel(&dv1394_highlevel);
        cdev_del(&dv1394_cdev);
-       devfs_remove("ieee1394/dv");
 }
 
 static int __init dv1394_init_module(void)
@@ -2630,15 +2596,12 @@ static int __init dv1394_init_module(void)
                return ret;
        }
 
-       devfs_mk_dir("ieee1394/dv");
-
        hpsb_register_highlevel(&dv1394_highlevel);
 
        ret = hpsb_register_protocol(&dv1394_driver);
        if (ret) {
                printk(KERN_ERR "dv1394: failed to register protocol\n");
                hpsb_unregister_highlevel(&dv1394_highlevel);
-               devfs_remove("ieee1394/dv");
                cdev_del(&dv1394_cdev);
                return ret;
        }
index 25ef5a8..be6854e 100644 (file)
@@ -58,7 +58,7 @@ MODULE_PARM_DESC(disable_nodemgr, "Disable nodemgr functionality.");
 
 /* Disable Isochronous Resource Manager functionality */
 int hpsb_disable_irm = 0;
-module_param_named(disable_irm, hpsb_disable_irm, bool, 0);
+module_param_named(disable_irm, hpsb_disable_irm, bool, 0444);
 MODULE_PARM_DESC(disable_irm,
                 "Disable Isochronous Resource Manager functionality.");
 
@@ -1078,17 +1078,10 @@ static int __init ieee1394_init(void)
                goto exit_release_kernel_thread;
        }
 
-       /* actually this is a non-fatal error */
-       ret = devfs_mk_dir("ieee1394");
-       if (ret < 0) {
-               HPSB_ERR("unable to make devfs dir for device major %d!\n", IEEE1394_MAJOR);
-               goto release_chrdev;
-       }
-
        ret = bus_register(&ieee1394_bus_type);
        if (ret < 0) {
                HPSB_INFO("bus register failed");
-               goto release_devfs;
+               goto release_chrdev;
        }
 
        for (i = 0; fw_bus_attrs[i]; i++) {
@@ -1099,7 +1092,7 @@ static int __init ieee1394_init(void)
                                                fw_bus_attrs[i--]);
                        }
                        bus_unregister(&ieee1394_bus_type);
-                       goto release_devfs;
+                       goto release_chrdev;
                }
        }
 
@@ -1152,8 +1145,6 @@ release_all_bus:
        for (i = 0; fw_bus_attrs[i]; i++)
                bus_remove_file(&ieee1394_bus_type, fw_bus_attrs[i]);
        bus_unregister(&ieee1394_bus_type);
-release_devfs:
-       devfs_remove("ieee1394");
 release_chrdev:
        unregister_chrdev_region(IEEE1394_CORE_DEV, 256);
 exit_release_kernel_thread:
@@ -1191,7 +1182,6 @@ static void __exit ieee1394_cleanup(void)
        hpsb_cleanup_config_roms();
 
        unregister_chrdev_region(IEEE1394_CORE_DEV, 256);
-       devfs_remove("ieee1394");
 }
 
 module_init(ieee1394_init);
index b354660..e7b55e8 100644 (file)
@@ -3,7 +3,6 @@
 #define _IEEE1394_CORE_H
 
 #include <linux/slab.h>
-#include <linux/devfs_fs_kernel.h>
 #include <asm/atomic.h>
 #include <asm/semaphore.h>
 #include "hosts.h"
@@ -202,14 +201,12 @@ void hpsb_packet_received(struct hpsb_host *host, quadlet_t *data, size_t size,
 #define IEEE1394_MINOR_BLOCK_RAW1394      0
 #define IEEE1394_MINOR_BLOCK_VIDEO1394    1
 #define IEEE1394_MINOR_BLOCK_DV1394       2
-#define IEEE1394_MINOR_BLOCK_AMDTP        3
 #define IEEE1394_MINOR_BLOCK_EXPERIMENTAL 15
 
 #define IEEE1394_CORE_DEV        MKDEV(IEEE1394_MAJOR, 0)
 #define IEEE1394_RAW1394_DEV     MKDEV(IEEE1394_MAJOR, IEEE1394_MINOR_BLOCK_RAW1394 * 16)
 #define IEEE1394_VIDEO1394_DEV   MKDEV(IEEE1394_MAJOR, IEEE1394_MINOR_BLOCK_VIDEO1394 * 16)
 #define IEEE1394_DV1394_DEV      MKDEV(IEEE1394_MAJOR, IEEE1394_MINOR_BLOCK_DV1394 * 16)
-#define IEEE1394_AMDTP_DEV       MKDEV(IEEE1394_MAJOR, IEEE1394_MINOR_BLOCK_AMDTP * 16)
 #define IEEE1394_EXPERIMENTAL_DEV MKDEV(IEEE1394_MAJOR, IEEE1394_MINOR_BLOCK_EXPERIMENTAL * 16)
 
 /* return the index (within a minor number block) of a file */
index 314f355..1922287 100644 (file)
@@ -544,12 +544,19 @@ static void ohci_initialize(struct ti_ohci *ohci)
        /* Initialize IR Legacy DMA channel mask */
        ohci->ir_legacy_channels = 0;
 
-       /*
-        * Accept AT requests from all nodes. This probably
-        * will have to be controlled from the subsystem
-        * on a per node basis.
-        */
-       reg_write(ohci,OHCI1394_AsReqFilterHiSet, 0x80000000);
+       /* Accept AR requests from all nodes */
+       reg_write(ohci, OHCI1394_AsReqFilterHiSet, 0x80000000);
+
+       /* Set the address range of the physical response unit.
+        * Most controllers do not implement it as a writable register though.
+        * They will keep a hardwired offset of 0x00010000 and show 0x0 as
+        * register content.
+        * To actually enable physical responses is the job of our interrupt
+        * handler which programs the physical request filter. */
+       reg_write(ohci, OHCI1394_PhyUpperBound, 0xffff0000);
+
+       DBGMSG("physUpperBoundOffset=%08x",
+              reg_read(ohci, OHCI1394_PhyUpperBound));
 
        /* Specify AT retries */
        reg_write(ohci, OHCI1394_ATRetries,
@@ -572,6 +579,7 @@ static void ohci_initialize(struct ti_ohci *ohci)
                  OHCI1394_reqTxComplete |
                  OHCI1394_isochRx |
                  OHCI1394_isochTx |
+                 OHCI1394_postedWriteErr |
                  OHCI1394_cycleInconsistent);
 
        /* Enable link */
@@ -2374,7 +2382,10 @@ static irqreturn_t ohci_irq_handler(int irq, void *dev_id,
 
                event &= ~OHCI1394_unrecoverableError;
        }
-
+       if (event & OHCI1394_postedWriteErr) {
+               PRINT(KERN_ERR, "physical posted write error");
+               /* no recovery strategy yet, had to involve protocol drivers */
+       }
        if (event & OHCI1394_cycleInconsistent) {
                /* We subscribe to the cycleInconsistent event only to
                 * clear the corresponding event bit... otherwise,
@@ -2382,7 +2393,6 @@ static irqreturn_t ohci_irq_handler(int irq, void *dev_id,
                DBGMSG("OHCI1394_cycleInconsistent");
                event &= ~OHCI1394_cycleInconsistent;
        }
-
        if (event & OHCI1394_busReset) {
                /* The busReset event bit can't be cleared during the
                 * selfID phase, so we disable busReset interrupts, to
@@ -2426,7 +2436,6 @@ static irqreturn_t ohci_irq_handler(int irq, void *dev_id,
                }
                event &= ~OHCI1394_busReset;
        }
-
        if (event & OHCI1394_reqTxComplete) {
                struct dma_trm_ctx *d = &ohci->at_req_context;
                DBGMSG("Got reqTxComplete interrupt "
@@ -2514,26 +2523,20 @@ static irqreturn_t ohci_irq_handler(int irq, void *dev_id,
                        reg_write(ohci, OHCI1394_IntMaskSet, OHCI1394_busReset);
                        spin_unlock_irqrestore(&ohci->event_lock, flags);
 
-                       /* Accept Physical requests from all nodes. */
-                       reg_write(ohci,OHCI1394_AsReqFilterHiSet, 0xffffffff);
-                       reg_write(ohci,OHCI1394_AsReqFilterLoSet, 0xffffffff);
-
                        /* Turn on phys dma reception.
                         *
                         * TODO: Enable some sort of filtering management.
                         */
                        if (phys_dma) {
-                               reg_write(ohci,OHCI1394_PhyReqFilterHiSet, 0xffffffff);
-                               reg_write(ohci,OHCI1394_PhyReqFilterLoSet, 0xffffffff);
-                               reg_write(ohci,OHCI1394_PhyUpperBound, 0xffff0000);
-                       } else {
-                               reg_write(ohci,OHCI1394_PhyReqFilterHiSet, 0x00000000);
-                               reg_write(ohci,OHCI1394_PhyReqFilterLoSet, 0x00000000);
+                               reg_write(ohci, OHCI1394_PhyReqFilterHiSet,
+                                         0xffffffff);
+                               reg_write(ohci, OHCI1394_PhyReqFilterLoSet,
+                                         0xffffffff);
                        }
 
                        DBGMSG("PhyReqFilter=%08x%08x",
-                              reg_read(ohci,OHCI1394_PhyReqFilterHiSet),
-                              reg_read(ohci,OHCI1394_PhyReqFilterLoSet));
+                              reg_read(ohci, OHCI1394_PhyReqFilterHiSet),
+                              reg_read(ohci, OHCI1394_PhyReqFilterLoSet));
 
                        hpsb_selfid_complete(host, phyid, isroot);
                } else
@@ -3259,8 +3262,8 @@ static int __devinit ohci1394_pci_probe(struct pci_dev *dev,
         * fail to report the right length.  Anyway, the ohci spec
         * clearly says it's 2kb, so this shouldn't be a problem. */
        ohci_base = pci_resource_start(dev, 0);
-       if (pci_resource_len(dev, 0) != OHCI1394_REGISTER_SIZE)
-               PRINT(KERN_WARNING, "Unexpected PCI resource length of %lx!",
+       if (pci_resource_len(dev, 0) < OHCI1394_REGISTER_SIZE)
+               PRINT(KERN_WARNING, "PCI resource length of %lx too small!",
                      pci_resource_len(dev, 0));
 
        /* Seems PCMCIA handles this internally. Not sure why. Seems
index 19f26c5..f7de546 100644 (file)
@@ -41,7 +41,6 @@
 #include <linux/cdev.h>
 #include <asm/uaccess.h>
 #include <asm/atomic.h>
-#include <linux/devfs_fs_kernel.h>
 #include <linux/compat.h>
 
 #include "csr1212.h"
@@ -2999,9 +2998,6 @@ static int __init init_raw1394(void)
                goto out_unreg;
        }
 
-       devfs_mk_cdev(MKDEV(IEEE1394_MAJOR, IEEE1394_MINOR_BLOCK_RAW1394 * 16),
-                     S_IFCHR | S_IRUSR | S_IWUSR, RAW1394_DEVICE_NAME);
-
        cdev_init(&raw1394_cdev, &raw1394_fops);
        raw1394_cdev.owner = THIS_MODULE;
        kobject_set_name(&raw1394_cdev.kobj, RAW1394_DEVICE_NAME);
@@ -3023,7 +3019,6 @@ static int __init init_raw1394(void)
        goto out;
 
       out_dev:
-       devfs_remove(RAW1394_DEVICE_NAME);
        class_device_destroy(hpsb_protocol_class,
                             MKDEV(IEEE1394_MAJOR,
                                   IEEE1394_MINOR_BLOCK_RAW1394 * 16));
@@ -3039,7 +3034,6 @@ static void __exit cleanup_raw1394(void)
                             MKDEV(IEEE1394_MAJOR,
                                   IEEE1394_MINOR_BLOCK_RAW1394 * 16));
        cdev_del(&raw1394_cdev);
-       devfs_remove(RAW1394_DEVICE_NAME);
        hpsb_unregister_highlevel(&raw1394_highlevel);
        hpsb_unregister_protocol(&raw1394_driver);
 }
index eca92eb..2c765ca 100644 (file)
@@ -214,6 +214,7 @@ static u32 global_outstanding_dmas = 0;
 #endif
 
 #define SBP2_ERR(fmt, args...)         HPSB_ERR("sbp2: "fmt, ## args)
+#define SBP2_DEBUG_ENTER()             SBP2_DEBUG("%s", __FUNCTION__)
 
 /*
  * Globals
@@ -535,7 +536,7 @@ static struct sbp2_command_info *sbp2util_allocate_command_orb(
                command->Current_SCpnt = Current_SCpnt;
                list_add_tail(&command->list, &scsi_id->sbp2_command_orb_inuse);
        } else {
-               SBP2_ERR("sbp2util_allocate_command_orb - No orbs available!");
+               SBP2_ERR("%s: no orbs available", __FUNCTION__);
        }
        spin_unlock_irqrestore(&scsi_id->sbp2_command_orb_lock, flags);
        return command;
@@ -549,7 +550,7 @@ static void sbp2util_free_command_dma(struct sbp2_command_info *command)
        struct hpsb_host *host;
 
        if (!scsi_id) {
-               printk(KERN_ERR "%s: scsi_id == NULL\n", __FUNCTION__);
+               SBP2_ERR("%s: scsi_id == NULL", __FUNCTION__);
                return;
        }
 
@@ -610,7 +611,7 @@ static int sbp2_probe(struct device *dev)
        struct unit_directory *ud;
        struct scsi_id_instance_data *scsi_id;
 
-       SBP2_DEBUG("sbp2_probe");
+       SBP2_DEBUG_ENTER();
 
        ud = container_of(dev, struct unit_directory, device);
 
@@ -635,7 +636,7 @@ static int sbp2_remove(struct device *dev)
        struct scsi_id_instance_data *scsi_id;
        struct scsi_device *sdev;
 
-       SBP2_DEBUG("sbp2_remove");
+       SBP2_DEBUG_ENTER();
 
        ud = container_of(dev, struct unit_directory, device);
        scsi_id = ud->device.driver_data;
@@ -667,7 +668,7 @@ static int sbp2_update(struct unit_directory *ud)
 {
        struct scsi_id_instance_data *scsi_id = ud->device.driver_data;
 
-       SBP2_DEBUG("sbp2_update");
+       SBP2_DEBUG_ENTER();
 
        if (sbp2_reconnect_device(scsi_id)) {
 
@@ -715,7 +716,7 @@ static struct scsi_id_instance_data *sbp2_alloc_device(struct unit_directory *ud
        struct Scsi_Host *scsi_host = NULL;
        struct scsi_id_instance_data *scsi_id = NULL;
 
-       SBP2_DEBUG("sbp2_alloc_device");
+       SBP2_DEBUG_ENTER();
 
        scsi_id = kzalloc(sizeof(*scsi_id), GFP_KERNEL);
        if (!scsi_id) {
@@ -749,12 +750,22 @@ static struct scsi_id_instance_data *sbp2_alloc_device(struct unit_directory *ud
 
 #ifdef CONFIG_IEEE1394_SBP2_PHYS_DMA
                /* Handle data movement if physical dma is not
-                * enabled/supportedon host controller */
-               hpsb_register_addrspace(&sbp2_highlevel, ud->ne->host, &sbp2_physdma_ops,
-                                       0x0ULL, 0xfffffffcULL);
+                * enabled or not supported on host controller */
+               if (!hpsb_register_addrspace(&sbp2_highlevel, ud->ne->host,
+                                            &sbp2_physdma_ops,
+                                            0x0ULL, 0xfffffffcULL)) {
+                       SBP2_ERR("failed to register lower 4GB address range");
+                       goto failed_alloc;
+               }
 #endif
        }
 
+       /* Prevent unloading of the 1394 host */
+       if (!try_module_get(hi->host->driver->owner)) {
+               SBP2_ERR("failed to get a reference on 1394 host driver");
+               goto failed_alloc;
+       }
+
        scsi_id->hi = hi;
 
        list_add_tail(&scsi_id->scsi_list, &hi->scsi_ids);
@@ -816,7 +827,7 @@ static int sbp2_start_device(struct scsi_id_instance_data *scsi_id)
        struct sbp2scsi_host_info *hi = scsi_id->hi;
        int error;
 
-       SBP2_DEBUG("sbp2_start_device");
+       SBP2_DEBUG_ENTER();
 
        /* Login FIFO DMA */
        scsi_id->login_response =
@@ -891,7 +902,6 @@ static int sbp2_start_device(struct scsi_id_instance_data *scsi_id)
         * allows someone else to login instead. One second makes sense. */
        msleep_interruptible(1000);
        if (signal_pending(current)) {
-               SBP2_WARN("aborting sbp2_start_device due to event");
                sbp2_remove_device(scsi_id);
                return -EINTR;
        }
@@ -944,7 +954,7 @@ static void sbp2_remove_device(struct scsi_id_instance_data *scsi_id)
 {
        struct sbp2scsi_host_info *hi;
 
-       SBP2_DEBUG("sbp2_remove_device");
+       SBP2_DEBUG_ENTER();
 
        if (!scsi_id)
                return;
@@ -1015,6 +1025,9 @@ static void sbp2_remove_device(struct scsi_id_instance_data *scsi_id)
 
        scsi_id->ud->device.driver_data = NULL;
 
+       if (hi)
+               module_put(hi->host->driver->owner);
+
        SBP2_DEBUG("SBP-2 device removed, SCSI ID = %d", scsi_id->ud->id);
 
        kfree(scsi_id);
@@ -1073,23 +1086,20 @@ static int sbp2_query_logins(struct scsi_id_instance_data *scsi_id)
        int max_logins;
        int active_logins;
 
-       SBP2_DEBUG("sbp2_query_logins");
+       SBP2_DEBUG_ENTER();
 
        scsi_id->query_logins_orb->reserved1 = 0x0;
        scsi_id->query_logins_orb->reserved2 = 0x0;
 
        scsi_id->query_logins_orb->query_response_lo = scsi_id->query_logins_response_dma;
        scsi_id->query_logins_orb->query_response_hi = ORB_SET_NODE_ID(hi->host->node_id);
-       SBP2_DEBUG("sbp2_query_logins: query_response_hi/lo initialized");
 
        scsi_id->query_logins_orb->lun_misc = ORB_SET_FUNCTION(SBP2_QUERY_LOGINS_REQUEST);
        scsi_id->query_logins_orb->lun_misc |= ORB_SET_NOTIFY(1);
        scsi_id->query_logins_orb->lun_misc |= ORB_SET_LUN(scsi_id->sbp2_lun);
-       SBP2_DEBUG("sbp2_query_logins: lun_misc initialized");
 
        scsi_id->query_logins_orb->reserved_resp_length =
                ORB_SET_QUERY_LOGINS_RESP_LENGTH(sizeof(struct sbp2_query_logins_response));
-       SBP2_DEBUG("sbp2_query_logins: reserved_resp_length initialized");
 
        scsi_id->query_logins_orb->status_fifo_hi =
                ORB_SET_STATUS_FIFO_HI(scsi_id->status_fifo_addr, hi->host->node_id);
@@ -1098,25 +1108,19 @@ static int sbp2_query_logins(struct scsi_id_instance_data *scsi_id)
 
        sbp2util_cpu_to_be32_buffer(scsi_id->query_logins_orb, sizeof(struct sbp2_query_logins_orb));
 
-       SBP2_DEBUG("sbp2_query_logins: orb byte-swapped");
-
        sbp2util_packet_dump(scsi_id->query_logins_orb, sizeof(struct sbp2_query_logins_orb),
                             "sbp2 query logins orb", scsi_id->query_logins_orb_dma);
 
        memset(scsi_id->query_logins_response, 0, sizeof(struct sbp2_query_logins_response));
        memset(&scsi_id->status_block, 0, sizeof(struct sbp2_status_block));
 
-       SBP2_DEBUG("sbp2_query_logins: query_logins_response/status FIFO memset");
-
        data[0] = ORB_SET_NODE_ID(hi->host->node_id);
        data[1] = scsi_id->query_logins_orb_dma;
        sbp2util_cpu_to_be32_buffer(data, 8);
 
        atomic_set(&scsi_id->sbp2_login_complete, 0);
 
-       SBP2_DEBUG("sbp2_query_logins: prepared to write");
        hpsb_node_write(scsi_id->ne, scsi_id->sbp2_management_agent_addr, data, 8);
-       SBP2_DEBUG("sbp2_query_logins: written");
 
        if (sbp2util_down_timeout(&scsi_id->sbp2_login_complete, 2*HZ)) {
                SBP2_INFO("Error querying logins to SBP-2 device - timed out");
@@ -1165,10 +1169,10 @@ static int sbp2_login_device(struct scsi_id_instance_data *scsi_id)
        struct sbp2scsi_host_info *hi = scsi_id->hi;
        quadlet_t data[2];
 
-       SBP2_DEBUG("sbp2_login_device");
+       SBP2_DEBUG_ENTER();
 
        if (!scsi_id->login_orb) {
-               SBP2_DEBUG("sbp2_login_device: login_orb not alloc'd!");
+               SBP2_DEBUG("%s: login_orb not alloc'd!", __FUNCTION__);
                return -EIO;
        }
 
@@ -1182,59 +1186,39 @@ static int sbp2_login_device(struct scsi_id_instance_data *scsi_id)
        /* Set-up login ORB, assume no password */
        scsi_id->login_orb->password_hi = 0;
        scsi_id->login_orb->password_lo = 0;
-       SBP2_DEBUG("sbp2_login_device: password_hi/lo initialized");
 
        scsi_id->login_orb->login_response_lo = scsi_id->login_response_dma;
        scsi_id->login_orb->login_response_hi = ORB_SET_NODE_ID(hi->host->node_id);
-       SBP2_DEBUG("sbp2_login_device: login_response_hi/lo initialized");
 
        scsi_id->login_orb->lun_misc = ORB_SET_FUNCTION(SBP2_LOGIN_REQUEST);
        scsi_id->login_orb->lun_misc |= ORB_SET_RECONNECT(0);   /* One second reconnect time */
        scsi_id->login_orb->lun_misc |= ORB_SET_EXCLUSIVE(exclusive_login);     /* Exclusive access to device */
        scsi_id->login_orb->lun_misc |= ORB_SET_NOTIFY(1);      /* Notify us of login complete */
        scsi_id->login_orb->lun_misc |= ORB_SET_LUN(scsi_id->sbp2_lun);
-       SBP2_DEBUG("sbp2_login_device: lun_misc initialized");
 
        scsi_id->login_orb->passwd_resp_lengths =
                ORB_SET_LOGIN_RESP_LENGTH(sizeof(struct sbp2_login_response));
-       SBP2_DEBUG("sbp2_login_device: passwd_resp_lengths initialized");
 
        scsi_id->login_orb->status_fifo_hi =
                ORB_SET_STATUS_FIFO_HI(scsi_id->status_fifo_addr, hi->host->node_id);
        scsi_id->login_orb->status_fifo_lo =
                ORB_SET_STATUS_FIFO_LO(scsi_id->status_fifo_addr);
 
-       /*
-        * Byte swap ORB if necessary
-        */
        sbp2util_cpu_to_be32_buffer(scsi_id->login_orb, sizeof(struct sbp2_login_orb));
 
-       SBP2_DEBUG("sbp2_login_device: orb byte-swapped");
-
        sbp2util_packet_dump(scsi_id->login_orb, sizeof(struct sbp2_login_orb),
                             "sbp2 login orb", scsi_id->login_orb_dma);
 
-       /*
-        * Initialize login response and status fifo
-        */
        memset(scsi_id->login_response, 0, sizeof(struct sbp2_login_response));
        memset(&scsi_id->status_block, 0, sizeof(struct sbp2_status_block));
 
-       SBP2_DEBUG("sbp2_login_device: login_response/status FIFO memset");
-
-       /*
-        * Ok, let's write to the target's management agent register
-        */
        data[0] = ORB_SET_NODE_ID(hi->host->node_id);
        data[1] = scsi_id->login_orb_dma;
        sbp2util_cpu_to_be32_buffer(data, 8);
 
        atomic_set(&scsi_id->sbp2_login_complete, 0);
 
-       SBP2_DEBUG("sbp2_login_device: prepared to write to %08x",
-                  (unsigned int)scsi_id->sbp2_management_agent_addr);
        hpsb_node_write(scsi_id->ne, scsi_id->sbp2_management_agent_addr, data, 8);
-       SBP2_DEBUG("sbp2_login_device: written");
 
        /*
         * Wait for login status (up to 20 seconds)...
@@ -1298,7 +1282,7 @@ static int sbp2_logout_device(struct scsi_id_instance_data *scsi_id)
        quadlet_t data[2];
        int error;
 
-       SBP2_DEBUG("sbp2_logout_device");
+       SBP2_DEBUG_ENTER();
 
        /*
         * Set-up logout ORB
@@ -1362,7 +1346,7 @@ static int sbp2_reconnect_device(struct scsi_id_instance_data *scsi_id)
        quadlet_t data[2];
        int error;
 
-       SBP2_DEBUG("sbp2_reconnect_device");
+       SBP2_DEBUG_ENTER();
 
        /*
         * Set-up reconnect ORB
@@ -1453,17 +1437,11 @@ static int sbp2_set_busy_timeout(struct scsi_id_instance_data *scsi_id)
 {
        quadlet_t data;
 
-       SBP2_DEBUG("sbp2_set_busy_timeout");
+       SBP2_DEBUG_ENTER();
 
-       /*
-        * Ok, let's write to the target's busy timeout register
-        */
        data = cpu_to_be32(SBP2_BUSY_TIMEOUT_VALUE);
-
-       if (hpsb_node_write(scsi_id->ne, SBP2_BUSY_TIMEOUT_ADDRESS, &data, 4)) {
-               SBP2_ERR("sbp2_set_busy_timeout error");
-       }
-
+       if (hpsb_node_write(scsi_id->ne, SBP2_BUSY_TIMEOUT_ADDRESS, &data, 4))
+               SBP2_ERR("%s error", __FUNCTION__);
        return 0;
 }
 
@@ -1482,7 +1460,7 @@ static void sbp2_parse_unit_directory(struct scsi_id_instance_data *scsi_id,
            firmware_revision, workarounds;
        int i;
 
-       SBP2_DEBUG("sbp2_parse_unit_directory");
+       SBP2_DEBUG_ENTER();
 
        management_agent_addr = 0x0;
        command_set_spec_id = 0x0;
@@ -1615,7 +1593,7 @@ static int sbp2_max_speed_and_size(struct scsi_id_instance_data *scsi_id)
 {
        struct sbp2scsi_host_info *hi = scsi_id->hi;
 
-       SBP2_DEBUG("sbp2_max_speed_and_size");
+       SBP2_DEBUG_ENTER();
 
        /* Initial setting comes from the hosts speed map */
        scsi_id->speed_code =
@@ -1652,11 +1630,8 @@ static int sbp2_agent_reset(struct scsi_id_instance_data *scsi_id, int wait)
        u64 addr;
        int retval;
 
-       SBP2_DEBUG("sbp2_agent_reset");
+       SBP2_DEBUG_ENTER();
 
-       /*
-        * Ok, let's write to the target's management agent register
-        */
        data = ntohl(SBP2_AGENT_RESET_DATA);
        addr = scsi_id->sbp2_command_block_agent_addr + SBP2_AGENT_RESET_OFFSET;
 
@@ -2004,11 +1979,7 @@ static int sbp2_send_command(struct scsi_id_instance_data *scsi_id,
        unsigned int request_bufflen = SCpnt->request_bufflen;
        struct sbp2_command_info *command;
 
-       SBP2_DEBUG("sbp2_send_command");
-#if (CONFIG_IEEE1394_SBP2_DEBUG >= 2) || defined(CONFIG_IEEE1394_SBP2_PACKET_DUMP)
-       printk("[scsi command]\n   ");
-       scsi_print_command(SCpnt);
-#endif
+       SBP2_DEBUG_ENTER();
        SBP2_DEBUG("SCSI transfer size = %x", request_bufflen);
        SBP2_DEBUG("SCSI s/g elements = %x", (unsigned int)SCpnt->use_sg);
 
@@ -2048,7 +2019,7 @@ static int sbp2_send_command(struct scsi_id_instance_data *scsi_id,
  */
 static unsigned int sbp2_status_to_sense_data(unchar *sbp2_status, unchar *sense_data)
 {
-       SBP2_DEBUG("sbp2_status_to_sense_data");
+       SBP2_DEBUG_ENTER();
 
        /*
         * Ok, it's pretty ugly...   ;-)
@@ -2082,7 +2053,7 @@ static void sbp2_check_sbp2_response(struct scsi_id_instance_data *scsi_id,
 {
        u8 *scsi_buf = SCpnt->request_buffer;
 
-       SBP2_DEBUG("sbp2_check_sbp2_response");
+       SBP2_DEBUG_ENTER();
 
        if (SCpnt->cmnd[0] == INQUIRY && (SCpnt->cmnd[1] & 3) == 0) {
                /*
@@ -2113,7 +2084,7 @@ static int sbp2_handle_status_write(struct hpsb_host *host, int nodeid, int dest
        struct sbp2_command_info *command;
        unsigned long flags;
 
-       SBP2_DEBUG("sbp2_handle_status_write");
+       SBP2_DEBUG_ENTER();
 
        sbp2util_packet_dump(data, length, "sbp2 status write by device", (u32)addr);
 
@@ -2260,7 +2231,10 @@ static int sbp2scsi_queuecommand(struct scsi_cmnd *SCpnt,
        struct sbp2scsi_host_info *hi;
        int result = DID_NO_CONNECT << 16;
 
-       SBP2_DEBUG("sbp2scsi_queuecommand");
+       SBP2_DEBUG_ENTER();
+#if (CONFIG_IEEE1394_SBP2_DEBUG >= 2) || defined(CONFIG_IEEE1394_SBP2_PACKET_DUMP)
+       scsi_print_command(SCpnt);
+#endif
 
        if (!sbp2util_node_is_available(scsi_id))
                goto done;
@@ -2338,7 +2312,7 @@ static void sbp2scsi_complete_all_commands(struct scsi_id_instance_data *scsi_id
        struct sbp2_command_info *command;
        unsigned long flags;
 
-       SBP2_DEBUG("sbp2scsi_complete_all_commands");
+       SBP2_DEBUG_ENTER();
 
        spin_lock_irqsave(&scsi_id->sbp2_command_orb_lock, flags);
        while (!list_empty(&scsi_id->sbp2_command_orb_inuse)) {
@@ -2371,7 +2345,7 @@ static void sbp2scsi_complete_command(struct scsi_id_instance_data *scsi_id,
                                      u32 scsi_status, struct scsi_cmnd *SCpnt,
                                      void (*done)(struct scsi_cmnd *))
 {
-       SBP2_DEBUG("sbp2scsi_complete_command");
+       SBP2_DEBUG_ENTER();
 
        /*
         * Sanity
@@ -2397,7 +2371,7 @@ static void sbp2scsi_complete_command(struct scsi_id_instance_data *scsi_id,
         */
        switch (scsi_status) {
        case SBP2_SCSI_STATUS_GOOD:
-               SCpnt->result = DID_OK;
+               SCpnt->result = DID_OK << 16;
                break;
 
        case SBP2_SCSI_STATUS_BUSY:
@@ -2407,16 +2381,11 @@ static void sbp2scsi_complete_command(struct scsi_id_instance_data *scsi_id,
 
        case SBP2_SCSI_STATUS_CHECK_CONDITION:
                SBP2_DEBUG("SBP2_SCSI_STATUS_CHECK_CONDITION");
-               SCpnt->result = CHECK_CONDITION << 1;
-
-               /*
-                * Debug stuff
-                */
+               SCpnt->result = CHECK_CONDITION << 1 | DID_OK << 16;
 #if CONFIG_IEEE1394_SBP2_DEBUG >= 1
                scsi_print_command(SCpnt);
-               scsi_print_sense("bh", SCpnt);
+               scsi_print_sense(SBP2_DEVICE_NAME, SCpnt);
 #endif
-
                break;
 
        case SBP2_SCSI_STATUS_SELECTION_TIMEOUT:
@@ -2441,7 +2410,7 @@ static void sbp2scsi_complete_command(struct scsi_id_instance_data *scsi_id,
        /*
         * Take care of any sbp2 response data mucking here (RBC stuff, etc.)
         */
-       if (SCpnt->result == DID_OK) {
+       if (SCpnt->result == DID_OK << 16) {
                sbp2_check_sbp2_response(scsi_id, SCpnt);
        }
 
@@ -2459,6 +2428,8 @@ static void sbp2scsi_complete_command(struct scsi_id_instance_data *scsi_id,
         * If a unit attention occurs, return busy status so it gets
         * retried... it could have happened because of a 1394 bus reset
         * or hot-plug...
+        * XXX  DID_BUS_BUSY is actually a bad idea because it will defy
+        * the scsi layer's retry logic.
         */
 #if 0
        if ((scsi_status == SBP2_SCSI_STATUS_CHECK_CONDITION) &&
@@ -2624,7 +2595,7 @@ static int sbp2_module_init(void)
 {
        int ret;
 
-       SBP2_DEBUG("sbp2_module_init");
+       SBP2_DEBUG_ENTER();
 
        /* Module load debug option to force one command at a time (serializing I/O) */
        if (serialize_io) {
@@ -2652,7 +2623,7 @@ static int sbp2_module_init(void)
 
 static void __exit sbp2_module_exit(void)
 {
-       SBP2_DEBUG("sbp2_module_exit");
+       SBP2_DEBUG_ENTER();
 
        hpsb_unregister_protocol(&sbp2_driver);
 
index 216dbbf..4e3bd62 100644 (file)
@@ -42,7 +42,6 @@
 #include <linux/poll.h>
 #include <linux/smp_lock.h>
 #include <linux/delay.h>
-#include <linux/devfs_fs_kernel.h>
 #include <linux/bitops.h>
 #include <linux/types.h>
 #include <linux/vmalloc.h>
@@ -1322,9 +1321,6 @@ static void video1394_add_host (struct hpsb_host *host)
        class_device_create(hpsb_protocol_class, NULL, MKDEV(
                IEEE1394_MAJOR, minor), 
                NULL, "%s-%d", VIDEO1394_DRIVER_NAME, ohci->host->id);
-       devfs_mk_cdev(MKDEV(IEEE1394_MAJOR, minor),
-                      S_IFCHR | S_IRUSR | S_IWUSR,
-                      "%s/%d", VIDEO1394_DRIVER_NAME, ohci->host->id);
 }
 
 
@@ -1332,12 +1328,9 @@ static void video1394_remove_host (struct hpsb_host *host)
 {
        struct ti_ohci *ohci = hpsb_get_hostinfo(&video1394_highlevel, host);
 
-       if (ohci) {
+       if (ohci)
                class_device_destroy(hpsb_protocol_class, MKDEV(IEEE1394_MAJOR,
                        IEEE1394_MINOR_BLOCK_VIDEO1394 * 16 + ohci->host->id));
-               devfs_remove("%s/%d", VIDEO1394_DRIVER_NAME, ohci->host->id);
-       }
-       
        return;
 }
 
@@ -1478,12 +1471,8 @@ static long video1394_compat_ioctl(struct file *f, unsigned cmd, unsigned long a
 static void __exit video1394_exit_module (void)
 {
        hpsb_unregister_protocol(&video1394_driver);
-
        hpsb_unregister_highlevel(&video1394_highlevel);
-
-       devfs_remove(VIDEO1394_DRIVER_NAME);
        cdev_del(&video1394_cdev);
-
        PRINT_G(KERN_INFO, "Removed " VIDEO1394_DRIVER_NAME " module");
 }
 
@@ -1500,15 +1489,12 @@ static int __init video1394_init_module (void)
                return ret;
         }
 
-       devfs_mk_dir(VIDEO1394_DRIVER_NAME);
-
        hpsb_register_highlevel(&video1394_highlevel);
 
        ret = hpsb_register_protocol(&video1394_driver);
        if (ret) {
                PRINT_G(KERN_ERR, "video1394: failed to register protocol");
                hpsb_unregister_highlevel(&video1394_highlevel);
-               devfs_remove(VIDEO1394_DRIVER_NAME);
                cdev_del(&video1394_cdev);
                return ret;
        }
index 9d1d2aa..910a8ed 100644 (file)
@@ -524,8 +524,6 @@ static int get_more_blocks(struct dio *dio)
         */
        ret = dio->page_errors;
        if (ret == 0) {
-               map_bh->b_state = 0;
-               map_bh->b_size = 0;
                BUG_ON(dio->block_in_file >= dio->final_block_in_request);
                fs_startblk = dio->block_in_file >> dio->blkfactor;
                dio_count = dio->final_block_in_request - dio->block_in_file;
@@ -534,6 +532,9 @@ static int get_more_blocks(struct dio *dio)
                if (dio_count & blkmask)        
                        fs_count++;
 
+               map_bh->b_state = 0;
+               map_bh->b_size = fs_count << dio->inode->i_blkbits;
+
                create = dio->rw == WRITE;
                if (dio->lock_type == DIO_LOCKING) {
                        if (dio->block_in_file < (i_size_read(dio->inode) >>
@@ -542,13 +543,13 @@ static int get_more_blocks(struct dio *dio)
                } else if (dio->lock_type == DIO_NO_LOCKING) {
                        create = 0;
                }
+
                /*
                 * For writes inside i_size we forbid block creations: only
                 * overwrites are permitted.  We fall back to buffered writes
                 * at a higher level for inside-i_size block-instantiating
                 * writes.
                 */
-               map_bh->b_size = fs_count << dio->blkbits;
                ret = (*dio->get_block)(dio->inode, fs_startblk,
                                                map_bh, create);
        }
index c7397c4..950ebd4 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -616,6 +616,15 @@ static int de_thread(struct task_struct *tsk)
                kmem_cache_free(sighand_cachep, newsighand);
                return -EAGAIN;
        }
+
+       /*
+        * child_reaper ignores SIGKILL, change it now.
+        * Reparenting needs write_lock on tasklist_lock,
+        * so it is safe to do it under read_lock.
+        */
+       if (unlikely(current->group_leader == child_reaper))
+               child_reaper = current;
+
        zap_other_threads(current);
        read_unlock(&tasklist_lock);
 
@@ -699,22 +708,30 @@ static int de_thread(struct task_struct *tsk)
                remove_parent(current);
                remove_parent(leader);
 
-               switch_exec_pids(leader, current);
+
+               /* Become a process group leader with the old leader's pid.
+                * Note: The old leader also uses thispid until release_task
+                *       is called.  Odd but simple and correct.
+                */
+               detach_pid(current, PIDTYPE_PID);
+               current->pid = leader->pid;
+               attach_pid(current, PIDTYPE_PID,  current->pid);
+               attach_pid(current, PIDTYPE_PGID, current->signal->pgrp);
+               attach_pid(current, PIDTYPE_SID,  current->signal->session);
+               list_add_tail(&current->tasks, &init_task.tasks);
 
                current->parent = current->real_parent = leader->real_parent;
                leader->parent = leader->real_parent = child_reaper;
                current->group_leader = current;
                leader->group_leader = leader;
 
-               add_parent(current, current->parent);
-               add_parent(leader, leader->parent);
+               add_parent(current);
+               add_parent(leader);
                if (ptrace) {
                        current->ptrace = ptrace;
                        __ptrace_link(current, parent);
                }
 
-               list_del(&current->tasks);
-               list_add_tail(&current->tasks, &init_task.tasks);
                current->exit_signal = SIGCHLD;
 
                BUG_ON(leader->exit_state != EXIT_ZOMBIE);
@@ -751,7 +768,6 @@ no_thread_group:
                /*
                 * Move our state over to newsighand and switch it in.
                 */
-               spin_lock_init(&newsighand->siglock);
                atomic_set(&newsighand->count, 1);
                memcpy(newsighand->action, oldsighand->action,
                       sizeof(newsighand->action));
@@ -768,7 +784,7 @@ no_thread_group:
                write_unlock_irq(&tasklist_lock);
 
                if (atomic_dec_and_test(&oldsighand->count))
-                       sighand_free(oldsighand);
+                       kmem_cache_free(sighand_cachep, oldsighand);
        }
 
        BUG_ON(!thread_group_leader(current));
index 16b44c3..1b262b7 100644 (file)
@@ -79,7 +79,7 @@ static inline void mrdemote(mrlock_t *mrp)
  * Debug-only routine, without some platform-specific asm code, we can
  * now only answer requests regarding whether we hold the lock for write
  * (reader state is outside our visibility, we only track writer state).
- * Note: means !ismrlocked would give false positivies, so don't do that.
+ * Note: means !ismrlocked would give false positives, so don't do that.
  */
 static inline int ismrlocked(mrlock_t *mrp, int type)
 {
index c02f7c5..6cbbd16 100644 (file)
@@ -372,7 +372,7 @@ static inline int bio_add_buffer(struct bio *bio, struct buffer_head *bh)
  * assumes that all buffers on the page are started at the same time.
  *
  * The fix is two passes across the ioend list - one to start writeback on the
- * bufferheads, and then the second one submit them for I/O.
+ * buffer_heads, and then submit them for I/O on the second pass.
  */
 STATIC void
 xfs_submit_ioend(
@@ -699,7 +699,7 @@ xfs_convert_page(
 
        /*
         * page_dirty is initially a count of buffers on the page before
-        * EOF and is decrememted as we move each into a cleanable state.
+        * EOF and is decremented as we move each into a cleanable state.
         *
         * Derivation:
         *
@@ -842,7 +842,7 @@ xfs_cluster_write(
  * page if possible.
  * The bh->b_state's cannot know if any of the blocks or which block for
  * that matter are dirty due to mmap writes, and therefore bh uptodate is
- * only vaild if the page itself isn't completely uptodate.  Some layers
+ * only valid if the page itself isn't completely uptodate.  Some layers
  * may clear the page dirty flag prior to calling write page, under the
  * assumption the entire page will be written out; by not writing out the
  * whole page the page can be reused before all valid dirty data is
@@ -892,7 +892,7 @@ xfs_page_state_convert(
 
        /*
         * page_dirty is initially a count of buffers on the page before
-        * EOF and is decrememted as we move each into a cleanable state.
+        * EOF and is decremented as we move each into a cleanable state.
         *
         * Derivation:
         *
@@ -1223,10 +1223,9 @@ free_buffers:
 }
 
 STATIC int
-__xfs_get_block(
+__xfs_get_blocks(
        struct inode            *inode,
        sector_t                iblock,
-       unsigned long           blocks,
        struct buffer_head      *bh_result,
        int                     create,
        int                     direct,
@@ -1236,22 +1235,17 @@ __xfs_get_block(
        xfs_iomap_t             iomap;
        xfs_off_t               offset;
        ssize_t                 size;
-       int                     retpbbm = 1;
+       int                     niomap = 1;
        int                     error;
 
        offset = (xfs_off_t)iblock << inode->i_blkbits;
-       if (blocks)
-               size = (ssize_t) min_t(xfs_off_t, LONG_MAX,
-                                       (xfs_off_t)blocks << inode->i_blkbits);
-       else
-               size = 1 << inode->i_blkbits;
-
+       ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
+       size = bh_result->b_size;
        VOP_BMAP(vp, offset, size,
-               create ? flags : BMAPI_READ, &iomap, &retpbbm, error);
+               create ? flags : BMAPI_READ, &iomap, &niomap, error);
        if (error)
                return -error;
-
-       if (retpbbm == 0)
+       if (niomap == 0)
                return 0;
 
        if (iomap.iomap_bn != IOMAP_DADDR_NULL) {
@@ -1271,12 +1265,16 @@ __xfs_get_block(
                }
        }
 
-       /* If this is a realtime file, data might be on a new device */
+       /*
+        * If this is a realtime file, data may be on a different device.
+        * to that pointed to from the buffer_head b_bdev currently.
+        */
        bh_result->b_bdev = iomap.iomap_target->bt_bdev;
 
-       /* If we previously allocated a block out beyond eof and
-        * we are now coming back to use it then we will need to
-        * flag it as new even if it has a disk address.
+       /*
+        * If we previously allocated a block out beyond eof and we are
+        * now coming back to use it then we will need to flag it as new
+        * even if it has a disk address.
         */
        if (create &&
            ((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
@@ -1292,26 +1290,24 @@ __xfs_get_block(
                }
        }
 
-       if (blocks) {
+       if (direct || size > (1 << inode->i_blkbits)) {
                ASSERT(iomap.iomap_bsize - iomap.iomap_delta > 0);
                offset = min_t(xfs_off_t,
-                               iomap.iomap_bsize - iomap.iomap_delta,
-                               (xfs_off_t)blocks << inode->i_blkbits);
-               bh_result->b_size = (u32) min_t(xfs_off_t, UINT_MAX, offset);
+                               iomap.iomap_bsize - iomap.iomap_delta, size);
+               bh_result->b_size = (ssize_t)min_t(xfs_off_t, LONG_MAX, offset);
        }
 
        return 0;
 }
 
 int
-xfs_get_block(
+xfs_get_blocks(
        struct inode            *inode,
        sector_t                iblock,
        struct buffer_head      *bh_result,
        int                     create)
 {
-       return __xfs_get_block(inode, iblock,
-                               bh_result->b_size >> inode->i_blkbits,
+       return __xfs_get_blocks(inode, iblock,
                                bh_result, create, 0, BMAPI_WRITE);
 }
 
@@ -1322,8 +1318,7 @@ xfs_get_blocks_direct(
        struct buffer_head      *bh_result,
        int                     create)
 {
-       return __xfs_get_block(inode, iblock,
-                               bh_result->b_size >> inode->i_blkbits,
+       return __xfs_get_blocks(inode, iblock,
                                bh_result, create, 1, BMAPI_WRITE|BMAPI_DIRECT);
 }
 
@@ -1339,9 +1334,9 @@ xfs_end_io_direct(
        /*
         * Non-NULL private data means we need to issue a transaction to
         * convert a range from unwritten to written extents.  This needs
-        * to happen from process contect but aio+dio I/O completion
+        * to happen from process context but aio+dio I/O completion
         * happens from irq context so we need to defer it to a workqueue.
-        * This is not nessecary for synchronous direct I/O, but we do
+        * This is not necessary for synchronous direct I/O, but we do
         * it anyway to keep the code uniform and simpler.
         *
         * The core direct I/O code might be changed to always call the
@@ -1358,7 +1353,7 @@ xfs_end_io_direct(
        }
 
        /*
-        * blockdev_direct_IO can return an error even afer the I/O
+        * blockdev_direct_IO can return an error even after the I/O
         * completion handler was called.  Thus we need to protect
         * against double-freeing.
         */
@@ -1405,7 +1400,7 @@ xfs_vm_prepare_write(
        unsigned int            from,
        unsigned int            to)
 {
-       return block_prepare_write(page, from, to, xfs_get_block);
+       return block_prepare_write(page, from, to, xfs_get_blocks);
 }
 
 STATIC sector_t
@@ -1422,7 +1417,7 @@ xfs_vm_bmap(
        VOP_RWLOCK(vp, VRWLOCK_READ);
        VOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1, 0, FI_REMAPF, error);
        VOP_RWUNLOCK(vp, VRWLOCK_READ);
-       return generic_block_bmap(mapping, block, xfs_get_block);
+       return generic_block_bmap(mapping, block, xfs_get_blocks);
 }
 
 STATIC int
@@ -1430,7 +1425,7 @@ xfs_vm_readpage(
        struct file             *unused,
        struct page             *page)
 {
-       return mpage_readpage(page, xfs_get_block);
+       return mpage_readpage(page, xfs_get_blocks);
 }
 
 STATIC int
@@ -1440,7 +1435,7 @@ xfs_vm_readpages(
        struct list_head        *pages,
        unsigned                nr_pages)
 {
-       return mpage_readpages(mapping, pages, nr_pages, xfs_get_block);
+       return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
 }
 
 STATIC void
index 795699f..6071654 100644 (file)
@@ -41,6 +41,6 @@ typedef struct xfs_ioend {
 } xfs_ioend_t;
 
 extern struct address_space_operations xfs_address_space_operations;
-extern int xfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
+extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
 
 #endif /* __XFS_IOPS_H__ */
index e5b0559..e794ca4 100644 (file)
@@ -54,7 +54,7 @@
  * Note, the NFS filehandle also includes an fsid portion which
  * may have an inode number in it.  That number is hardcoded to
  * 32bits and there is no way for XFS to intercept it.  In
- * practice this means when exporting an XFS filesytem with 64bit
+ * practice this means when exporting an XFS filesystem with 64bit
  * inodes you should either export the mountpoint (rather than
  * a subdirectory) or use the "fsid" export option.
  */
index b6321ab..251bfe4 100644 (file)
@@ -72,7 +72,7 @@ xfs_ioctl32_flock(
            copy_in_user(&p->l_pid,     &p32->l_pid,    sizeof(u32)) ||
            copy_in_user(&p->l_pad,     &p32->l_pad,    4*sizeof(u32)))
                return -EFAULT;
-       
+
        return (unsigned long)p;
 }
 
@@ -107,11 +107,15 @@ xfs_ioctl32_bulkstat(
 #endif
 
 STATIC long
-xfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
+xfs_compat_ioctl(
+       int             mode,
+       struct file     *file,
+       unsigned        cmd,
+       unsigned long   arg)
 {
+       struct inode    *inode = file->f_dentry->d_inode;
+       vnode_t         *vp = vn_from_inode(inode);
        int             error;
-       struct          inode *inode = f->f_dentry->d_inode;
-       vnode_t         *vp = vn_to_inode(inode);
 
        switch (cmd) {
        case XFS_IOC_DIOINFO:
@@ -189,7 +193,7 @@ xfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
                return -ENOIOCTLCMD;
        }
 
-       VOP_IOCTL(vp, inode, f, mode, cmd, (void __user *)arg, error);
+       VOP_IOCTL(vp, inode, file, mode, cmd, (void __user *)arg, error);
        VMODIFY(vp);
 
        return error;
@@ -197,18 +201,18 @@ xfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
 
 long
 xfs_file_compat_ioctl(
-       struct file             *f,
+       struct file             *file,
        unsigned                cmd,
        unsigned long           arg)
 {
-       return xfs_compat_ioctl(0, f, cmd, arg);
+       return xfs_compat_ioctl(0, file, cmd, arg);
 }
 
 long
 xfs_file_compat_invis_ioctl(
-       struct file             *f,
+       struct file             *file,
        unsigned                cmd,
        unsigned long           arg)
 {
-       return xfs_compat_ioctl(IO_INVIS, f, cmd, arg);
+       return xfs_compat_ioctl(IO_INVIS, file, cmd, arg);
 }
index af48743..1492373 100644 (file)
@@ -708,7 +708,7 @@ STATIC void
 xfs_vn_truncate(
        struct inode    *inode)
 {
-       block_truncate_page(inode->i_mapping, inode->i_size, xfs_get_block);
+       block_truncate_page(inode->i_mapping, inode->i_size, xfs_get_blocks);
 }
 
 STATIC int
index 0169360..84ddf18 100644 (file)
@@ -681,7 +681,7 @@ start:
                eventsent = 1;
 
                /*
-                * The iolock was dropped and reaquired in XFS_SEND_DATA
+                * The iolock was dropped and reacquired in XFS_SEND_DATA
                 * so we have to recheck the size when appending.
                 * We will only "goto start;" once, since having sent the
                 * event prevents another call to XFS_SEND_DATA, which is
index 8fed356..841200c 100644 (file)
@@ -92,7 +92,7 @@ typedef enum {
 #define SYNC_FSDATA            0x0020  /* flush fs data (e.g. superblocks) */
 #define SYNC_REFCACHE          0x0040  /* prune some of the nfs ref cache */
 #define SYNC_REMOUNT           0x0080  /* remount readonly, no dummy LRs */
-#define SYNC_QUIESCE           0x0100  /* quiesce fileystem for a snapshot */
+#define SYNC_QUIESCE           0x0100  /* quiesce filesystem for a snapshot */
 
 typedef int    (*vfs_mount_t)(bhv_desc_t *,
                                struct xfs_mount_args *, struct cred *);
index e4e5f05..546f48a 100644 (file)
@@ -221,7 +221,7 @@ xfs_qm_dqunpin_wait(
  * as possible.
  *
  * We must not be holding the AIL_LOCK at this point. Calling incore() to
- * search the buffercache can be a time consuming thing, and AIL_LOCK is a
+ * search the buffer cache can be a time consuming thing, and AIL_LOCK is a
  * spinlock.
  */
 STATIC void
index 1fb757e..73c1e5e 100644 (file)
@@ -289,7 +289,7 @@ xfs_qm_rele_quotafs_ref(
 
 /*
  * This is called at mount time from xfs_mountfs to initialize the quotainfo
- * structure and start the global quotamanager (xfs_Gqm) if it hasn't done
+ * structure and start the global quota manager (xfs_Gqm) if it hasn't done
  * so already. Note that the superblock has not been read in yet.
  */
 void
@@ -807,7 +807,7 @@ xfs_qm_dqattach_one(
  * Given a udquot and gdquot, attach a ptr to the group dquot in the
  * udquot as a hint for future lookups. The idea sounds simple, but the
  * execution isn't, because the udquot might have a group dquot attached
- * already and getting rid of that gets us into lock ordering contraints.
+ * already and getting rid of that gets us into lock ordering constraints.
  * The process is complicated more by the fact that the dquots may or may not
  * be locked on entry.
  */
@@ -1094,10 +1094,10 @@ xfs_qm_sync(
                        }
                        /*
                         * If we can't grab the flush lock then if the caller
-                        * really wanted us to give this our best shot,
+                        * really wanted us to give this our best shot, so
                         * see if we can give a push to the buffer before we wait
                         * on the flush lock. At this point, we know that
-                        * eventhough the dquot is being flushed,
+                        * even though the dquot is being flushed,
                         * it has (new) dirty data.
                         */
                        xfs_qm_dqflock_pushbuf_wait(dqp);
@@ -1491,7 +1491,7 @@ xfs_qm_reset_dqcounts(
                /*
                 * Do a sanity check, and if needed, repair the dqblk. Don't
                 * output any warnings because it's perfectly possible to
-                * find unitialized dquot blks. See comment in xfs_qm_dqcheck.
+                * find uninitialised dquot blks. See comment in xfs_qm_dqcheck.
                 */
                (void) xfs_qm_dqcheck(ddq, id+j, type, XFS_QMOPT_DQREPAIR,
                                      "xfs_quotacheck");
@@ -1580,7 +1580,7 @@ xfs_qm_dqiterate(
 
        error = 0;
        /*
-        * This looks racey, but we can't keep an inode lock across a
+        * This looks racy, but we can't keep an inode lock across a
         * trans_reserve. But, this gets called during quotacheck, and that
         * happens only at mount time which is single threaded.
         */
@@ -1824,7 +1824,7 @@ xfs_qm_dqusage_adjust(
         * we have to start from the beginning anyway.
         * Once we're done, we'll log all the dquot bufs.
         *
-        * The *QUOTA_ON checks below may look pretty racey, but quotachecks
+        * The *QUOTA_ON checks below may look pretty racy, but quotachecks
         * and quotaoffs don't race. (Quotachecks happen at mount time only).
         */
        if (XFS_IS_UQUOTA_ON(mp)) {
index 6768843..c55db46 100644 (file)
@@ -912,7 +912,7 @@ xfs_qm_export_dquot(
 
        /*
         * Internally, we don't reset all the timers when quota enforcement
-        * gets turned off. No need to confuse the userlevel code,
+        * gets turned off. No need to confuse the user level code,
         * so return zeroes in that case.
         */
        if (! XFS_IS_QUOTA_ENFORCED(mp)) {
index 3290975..d8e131e 100644 (file)
@@ -804,7 +804,7 @@ xfs_trans_reserve_quota_bydquots(
        }
 
        /*
-        * Didnt change anything critical, so, no need to log
+        * Didn't change anything critical, so, no need to log
         */
        return (0);
 }
index 4ff0f4e..2539af3 100644 (file)
@@ -395,7 +395,7 @@ xfs_acl_allow_set(
  * The access control process to determine the access permission:
  *     if uid == file owner id, use the file owner bits.
  *     if gid == file owner group id, use the file group bits.
- *     scan ACL for a maching user or group, and use matched entry
+ *     scan ACL for a matching user or group, and use matched entry
  *     permission. Use total permissions of all matching group entries,
  *     until all acl entries are exhausted. The final permission produced
  *     by matching acl entry or entries needs to be & with group permission.
index a96e2ff..dc2361d 100644 (file)
@@ -179,7 +179,7 @@ typedef struct xfs_perag
 {
        char            pagf_init;      /* this agf's entry is initialized */
        char            pagi_init;      /* this agi's entry is initialized */
-       char            pagf_metadata;  /* the agf is prefered to be metadata */
+       char            pagf_metadata;  /* the agf is preferred to be metadata */
        char            pagi_inodeok;   /* The agi is ok for inodes */
        __uint8_t       pagf_levels[XFS_BTNUM_AGF];
                                        /* # of levels in bno & cnt btree */
index f4328e1..64ee07d 100644 (file)
@@ -511,7 +511,7 @@ STATIC void
 xfs_alloc_trace_busy(
        char            *name,          /* function tag string */
        char            *str,           /* additional string */
-       xfs_mount_t     *mp,            /* file system mount poing */
+       xfs_mount_t     *mp,            /* file system mount point */
        xfs_agnumber_t  agno,           /* allocation group number */
        xfs_agblock_t   agbno,          /* a.g. relative block number */
        xfs_extlen_t    len,            /* length of extent */
@@ -1843,7 +1843,7 @@ xfs_alloc_fix_freelist(
        } else
                agbp = NULL;
 
-       /* If this is a metadata prefered pag and we are user data
+       /* If this is a metadata preferred pag and we are user data
         * then try somewhere else if we are not being asked to
         * try harder at this point
         */
@@ -2458,7 +2458,7 @@ error0:
 /*
  * AG Busy list management
  * The busy list contains block ranges that have been freed but whose
- * transacations have not yet hit disk.  If any block listed in a busy
+ * transactions have not yet hit disk.  If any block listed in a busy
  * list is reused, the transaction that freed it must be forced to disk
  * before continuing to use the block.
  *
index 3546dea..2d1f892 100644 (file)
@@ -68,7 +68,7 @@ typedef struct xfs_alloc_arg {
        xfs_alloctype_t otype;          /* original allocation type */
        char            wasdel;         /* set if allocation was prev delayed */
        char            wasfromfl;      /* set if allocation is from freelist */
-       char            isfl;           /* set if is freelist blocks - !actg */
+       char            isfl;           /* set if is freelist blocks - !acctg */
        char            userdata;       /* set if this is user data */
 } xfs_alloc_arg_t;
 
index 093fac4..b6e1e02 100644 (file)
@@ -294,7 +294,7 @@ xfs_attr_set_int(xfs_inode_t *dp, const char *name, int namelen,
        xfs_trans_ihold(args.trans, dp);
 
        /*
-        * If the attribute list is non-existant or a shortform list,
+        * If the attribute list is non-existent or a shortform list,
         * upgrade it to a single-leaf-block attribute list.
         */
        if ((dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) ||
@@ -1584,7 +1584,7 @@ out:
  * Fill in the disk block numbers in the state structure for the buffers
  * that are attached to the state structure.
  * This is done so that we can quickly reattach ourselves to those buffers
- * after some set of transaction commit's has released these buffers.
+ * after some set of transaction commits have released these buffers.
  */
 STATIC int
 xfs_attr_fillstate(xfs_da_state_t *state)
@@ -1631,7 +1631,7 @@ xfs_attr_fillstate(xfs_da_state_t *state)
 /*
  * Reattach the buffers to the state structure based on the disk block
  * numbers stored in the state structure.
- * This is done after some set of transaction commit's has released those
+ * This is done after some set of transaction commits have released those
  * buffers from our grip.
  */
 STATIC int
index 7176827..9462be8 100644 (file)
@@ -524,7 +524,7 @@ xfs_attr_shortform_compare(const void *a, const void *b)
 
 /*
  * Copy out entries of shortform attribute lists for attr_list().
- * Shortform atrtribute lists are not stored in hashval sorted order.
+ * Shortform attribute lists are not stored in hashval sorted order.
  * If the output buffer is not large enough to hold them all, then we
  * we have to calculate each entries' hashvalue and sort them before
  * we can begin returning them to the user.
@@ -1541,7 +1541,7 @@ xfs_attr_leaf_toosmall(xfs_da_state_t *state, int *action)
        /*
         * Check for the degenerate case of the block being empty.
         * If the block is empty, we'll simply delete it, no need to
-        * coalesce it with a sibling block.  We choose (aribtrarily)
+        * coalesce it with a sibling block.  We choose (arbitrarily)
         * to merge with the forward block unless it is NULL.
         */
        if (count == 0) {
index 9880ada..f4fe371 100644 (file)
@@ -31,7 +31,7 @@
  * The behavior chain is ordered based on the 'position' number which
  * lives in the first field of the ops vector (higher numbers first).
  *
- * Attemps to insert duplicate ops result in an EINVAL return code.
+ * Attempts to insert duplicate ops result in an EINVAL return code.
  * Otherwise, return 0 to indicate success.
  */
 int
@@ -84,7 +84,7 @@ bhv_insert(bhv_head_t *bhp, bhv_desc_t *bdp)
 
 /*
  * Remove a behavior descriptor from a position in a behavior chain;
- * the postition is guaranteed not to be the first position.
+ * the position is guaranteed not to be the first position.
  * Should only be called by the bhv_remove() macro.
  */
 void
index 2cd89bb..1d8ff10 100644 (file)
@@ -39,7 +39,7 @@
  * behaviors is synchronized with operations-in-progress (oip's) so that
  * the oip's always see a consistent view of the chain.
  *
- * The term "interpostion" is used to refer to the act of inserting
+ * The term "interposition" is used to refer to the act of inserting
  * a behavior such that it interposes on (i.e., is inserted in front
  * of) a particular other behavior.  A key example of this is when a
  * system implementing distributed single system image wishes to
@@ -51,7 +51,7 @@
  *
  * Behavior synchronization is logic which is necessary under certain
  * circumstances that there is no conflict between ongoing operations
- * traversing the behavior chain and those dunamically modifying the
+ * traversing the behavior chain and those dynamically modifying the
  * behavior chain.  Because behavior synchronization adds extra overhead
  * to virtual operation invocation, we want to restrict, as much as
  * we can, the requirement for this extra code, to those situations
index 2d702e4..d384e48 100644 (file)
@@ -3467,113 +3467,6 @@ done:
        return error;
 }
 
-xfs_bmbt_rec_t *                       /* pointer to found extent entry */
-xfs_bmap_do_search_extents(
-       xfs_bmbt_rec_t  *base,          /* base of extent list */
-       xfs_extnum_t    lastx,          /* last extent index used */
-       xfs_extnum_t    nextents,       /* number of file extents */
-       xfs_fileoff_t   bno,            /* block number searched for */
-       int             *eofp,          /* out: end of file found */
-       xfs_extnum_t    *lastxp,        /* out: last extent index */
-       xfs_bmbt_irec_t *gotp,          /* out: extent entry found */
-       xfs_bmbt_irec_t *prevp)         /* out: previous extent entry found */
-{
-       xfs_bmbt_rec_t  *ep;            /* extent list entry pointer */
-       xfs_bmbt_irec_t got;            /* extent list entry, decoded */
-       int             high;           /* high index of binary search */
-       int             low;            /* low index of binary search */
-
-       /*
-        * Initialize the extent entry structure to catch access to
-        * uninitialized br_startblock field.
-        */
-       got.br_startoff = 0xffa5a5a5a5a5a5a5LL;
-       got.br_blockcount = 0xa55a5a5a5a5a5a5aLL;
-       got.br_state = XFS_EXT_INVALID;
-
-#if XFS_BIG_BLKNOS
-       got.br_startblock = 0xffffa5a5a5a5a5a5LL;
-#else
-       got.br_startblock = 0xffffa5a5;
-#endif
-
-       if (lastx != NULLEXTNUM && lastx < nextents)
-               ep = base + lastx;
-       else
-               ep = NULL;
-       prevp->br_startoff = NULLFILEOFF;
-       if (ep && bno >= (got.br_startoff = xfs_bmbt_get_startoff(ep)) &&
-           bno < got.br_startoff +
-                 (got.br_blockcount = xfs_bmbt_get_blockcount(ep)))
-               *eofp = 0;
-       else if (ep && lastx < nextents - 1 &&
-                bno >= (got.br_startoff = xfs_bmbt_get_startoff(ep + 1)) &&
-                bno < got.br_startoff +
-                      (got.br_blockcount = xfs_bmbt_get_blockcount(ep + 1))) {
-               lastx++;
-               ep++;
-               *eofp = 0;
-       } else if (nextents == 0)
-               *eofp = 1;
-       else if (bno == 0 &&
-                (got.br_startoff = xfs_bmbt_get_startoff(base)) == 0) {
-               ep = base;
-               lastx = 0;
-               got.br_blockcount = xfs_bmbt_get_blockcount(ep);
-               *eofp = 0;
-       } else {
-               low = 0;
-               high = nextents - 1;
-               /* binary search the extents array */
-               while (low <= high) {
-                       XFS_STATS_INC(xs_cmp_exlist);
-                       lastx = (low + high) >> 1;
-                       ep = base + lastx;
-                       got.br_startoff = xfs_bmbt_get_startoff(ep);
-                       got.br_blockcount = xfs_bmbt_get_blockcount(ep);
-                       if (bno < got.br_startoff)
-                               high = lastx - 1;
-                       else if (bno >= got.br_startoff + got.br_blockcount)
-                               low = lastx + 1;
-                       else {
-                               got.br_startblock = xfs_bmbt_get_startblock(ep);
-                               got.br_state = xfs_bmbt_get_state(ep);
-                               *eofp = 0;
-                               *lastxp = lastx;
-                               *gotp = got;
-                               return ep;
-                       }
-               }
-               if (bno >= got.br_startoff + got.br_blockcount) {
-                       lastx++;
-                       if (lastx == nextents) {
-                               *eofp = 1;
-                               got.br_startblock = xfs_bmbt_get_startblock(ep);
-                               got.br_state = xfs_bmbt_get_state(ep);
-                               *prevp = got;
-                               ep = NULL;
-                       } else {
-                               *eofp = 0;
-                               xfs_bmbt_get_all(ep, prevp);
-                               ep++;
-                               got.br_startoff = xfs_bmbt_get_startoff(ep);
-                               got.br_blockcount = xfs_bmbt_get_blockcount(ep);
-                       }
-               } else {
-                       *eofp = 0;
-                       if (ep > base)
-                               xfs_bmbt_get_all(ep - 1, prevp);
-               }
-       }
-       if (ep) {
-               got.br_startblock = xfs_bmbt_get_startblock(ep);
-               got.br_state = xfs_bmbt_get_state(ep);
-       }
-       *lastxp = lastx;
-       *gotp = got;
-       return ep;
-}
-
 /*
  * Search the extent records for the entry containing block bno.
  * If bno lies in a hole, point to the next entry.  If bno lies
index 011ccaa..f83399c 100644 (file)
@@ -362,14 +362,6 @@ xfs_bmbt_rec_t *
 xfs_bmap_search_multi_extents(struct xfs_ifork *, xfs_fileoff_t, int *,
                        xfs_extnum_t *, xfs_bmbt_irec_t *, xfs_bmbt_irec_t *);
 
-/*
- * Search an extent list for the extent which includes block
- * bno.
- */
-xfs_bmbt_rec_t *xfs_bmap_do_search_extents(xfs_bmbt_rec_t *,
-                       xfs_extnum_t, xfs_extnum_t, xfs_fileoff_t, int *,
-                       xfs_extnum_t *, xfs_bmbt_irec_t *, xfs_bmbt_irec_t *);
-
 #endif /* __KERNEL__ */
 
 #endif /* __XFS_BMAP_H__ */
index 07e2324..5fed156 100644 (file)
@@ -98,12 +98,12 @@ xfs_buf_item_flush_log_debug(
 }
 
 /*
- * This function is called to verify that our caller's have logged
+ * This function is called to verify that our callers have logged
  * all the bytes that they changed.
  *
  * It does this by comparing the original copy of the buffer stored in
  * the buf log item's bli_orig array to the current copy of the buffer
- * and ensuring that all bytes which miscompare are set in the bli_logged
+ * and ensuring that all bytes which mismatch are set in the bli_logged
  * array of the buf log item.
  */
 STATIC void
index 433ec53..d0035c6 100644 (file)
@@ -38,7 +38,7 @@ typedef struct xfs_cap_set {
 /*
  * For Linux, we take the bitfields directly from capability.h
  * and no longer attempt to keep this attribute ondisk compatible
- * with IRIX.  Since this attribute is only set on exectuables,
+ * with IRIX.  Since this attribute is only set on executables,
  * it just doesn't make much sense to try.  We do use a different
  * named attribute though, to avoid confusion.
  */
index 4bae3a7..8988b90 100644 (file)
@@ -840,7 +840,7 @@ xfs_da_node_toosmall(xfs_da_state_t *state, int *action)
        /*
         * Check for the degenerate case of the block being empty.
         * If the block is empty, we'll simply delete it, no need to
-        * coalesce it with a sibling block.  We choose (aribtrarily)
+        * coalesce it with a sibling block.  We choose (arbitrarily)
         * to merge with the forward block unless it is NULL.
         */
        if (count == 0) {
index bd5cee6..972ded5 100644 (file)
@@ -533,7 +533,7 @@ xfs_dir2_block_getdents(
 
        /*
         * Reached the end of the block.
-        * Set the offset to a nonexistent block 1 and return.
+        * Set the offset to a non-existent block 1 and return.
         */
        *eofp = 1;
 
index 08648b1..0f5e2f2 100644 (file)
@@ -515,7 +515,7 @@ xfs_dir2_leaf_addname(
                        ASSERT(be32_to_cpu(leaf->ents[highstale].address) ==
                               XFS_DIR2_NULL_DATAPTR);
                        /*
-                        * Copy entries down to copver the stale entry
+                        * Copy entries down to cover the stale entry
                         * and make room for the new entry.
                         */
                        if (highstale - index > 0)
index af556f1..ac511ab 100644 (file)
@@ -830,7 +830,7 @@ xfs_dir2_leafn_rebalance(
                state->inleaf = 1;
                blk2->index = 0;
                cmn_err(CE_ALERT,
-                       "xfs_dir2_leafn_rebalance: picked the wrong leaf? reverting orignal leaf: "
+                       "xfs_dir2_leafn_rebalance: picked the wrong leaf? reverting original leaf: "
                        "blk1->index %d\n",
                        blk1->index);
        }
index ee88751..6d71186 100644 (file)
@@ -1341,7 +1341,7 @@ xfs_dir_leaf_toosmall(xfs_da_state_t *state, int *action)
        /*
         * Check for the degenerate case of the block being empty.
         * If the block is empty, we'll simply delete it, no need to
-        * coalesce it with a sibling block.  We choose (aribtrarily)
+        * coalesce it with a sibling block.  We choose (arbitrarily)
         * to merge with the forward block unless it is NULL.
         */
        if (count == 0) {
index 56caa88..dfa3527 100644 (file)
@@ -477,7 +477,7 @@ xfs_fs_counts(
  *
  * xfs_reserve_blocks is called to set m_resblks
  * in the in-core mount table. The number of unused reserved blocks
- * is kept in m_resbls_avail.
+ * is kept in m_resblks_avail.
  *
  * Reserve the requested number of blocks if available. Otherwise return
  * as many as possible to satisfy the request. The actual number
index 0024892..4eeb856 100644 (file)
@@ -136,7 +136,7 @@ xfs_ialloc_ag_alloc(
        int             ninodes;        /* num inodes per buf */
        xfs_agino_t     thisino;        /* current inode number, for loop */
        int             version;        /* inode version number to use */
-       int             isaligned;      /* inode allocation at stripe unit */
+       int             isaligned = 0;  /* inode allocation at stripe unit */
                                        /* boundary */
 
        args.tp = tp;
@@ -152,47 +152,75 @@ xfs_ialloc_ag_alloc(
                return XFS_ERROR(ENOSPC);
        args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp);
        /*
-        * Set the alignment for the allocation.
-        * If stripe alignment is turned on then align at stripe unit
-        * boundary.
-        * If the cluster size is smaller than a filesystem block
-        * then we're doing I/O for inodes in filesystem block size pieces,
-        * so don't need alignment anyway.
-        */
-       isaligned = 0;
-       if (args.mp->m_sinoalign) {
-               ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
-               args.alignment = args.mp->m_dalign;
-               isaligned = 1;
-       } else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) &&
-           args.mp->m_sb.sb_inoalignmt >=
-           XFS_B_TO_FSBT(args.mp, XFS_INODE_CLUSTER_SIZE(args.mp)))
-               args.alignment = args.mp->m_sb.sb_inoalignmt;
-       else
-               args.alignment = 1;
+        * First try to allocate inodes contiguous with the last-allocated
+        * chunk of inodes.  If the filesystem is striped, this will fill
+        * an entire stripe unit with inodes.
+        */
        agi = XFS_BUF_TO_AGI(agbp);
-       /*
-        * Need to figure out where to allocate the inode blocks.
-        * Ideally they should be spaced out through the a.g.
-        * For now, just allocate blocks up front.
-        */
-       args.agbno = be32_to_cpu(agi->agi_root);
-       args.fsbno = XFS_AGB_TO_FSB(args.mp, be32_to_cpu(agi->agi_seqno),
-                                   args.agbno);
-       /*
-        * Allocate a fixed-size extent of inodes.
-        */
-       args.type = XFS_ALLOCTYPE_NEAR_BNO;
-       args.mod = args.total = args.wasdel = args.isfl = args.userdata =
-               args.minalignslop = 0;
-       args.prod = 1;
-       /*
-        * Allow space for the inode btree to split.
-        */
-       args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
-       if ((error = xfs_alloc_vextent(&args)))
-               return error;
+       newino = be32_to_cpu(agi->agi_newino);
+       if(likely(newino != NULLAGINO)) {
+               args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
+                               XFS_IALLOC_BLOCKS(args.mp);
+               args.fsbno = XFS_AGB_TO_FSB(args.mp,
+                               be32_to_cpu(agi->agi_seqno), args.agbno);
+               args.type = XFS_ALLOCTYPE_THIS_BNO;
+               args.mod = args.total = args.wasdel = args.isfl =
+                       args.userdata = args.minalignslop = 0;
+               args.prod = 1;
+               args.alignment = 1;
+               /*
+                * Allow space for the inode btree to split.
+                */
+               args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
+               if ((error = xfs_alloc_vextent(&args)))
+                       return error;
+       } else
+               args.fsbno = NULLFSBLOCK;
 
+       if (unlikely(args.fsbno == NULLFSBLOCK)) {
+               /*
+                * Set the alignment for the allocation.
+                * If stripe alignment is turned on then align at stripe unit
+                * boundary.
+                * If the cluster size is smaller than a filesystem block 
+                * then we're doing I/O for inodes in filesystem block size 
+                * pieces, so don't need alignment anyway.
+                */
+               isaligned = 0;
+               if (args.mp->m_sinoalign) {
+                       ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
+                       args.alignment = args.mp->m_dalign;
+                       isaligned = 1;
+               } else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) &&
+                          args.mp->m_sb.sb_inoalignmt >= 
+                          XFS_B_TO_FSBT(args.mp,
+                               XFS_INODE_CLUSTER_SIZE(args.mp)))
+                               args.alignment = args.mp->m_sb.sb_inoalignmt;
+               else
+                       args.alignment = 1;
+               /*
+                * Need to figure out where to allocate the inode blocks.
+                * Ideally they should be spaced out through the a.g.
+                * For now, just allocate blocks up front.
+                */
+               args.agbno = be32_to_cpu(agi->agi_root);
+               args.fsbno = XFS_AGB_TO_FSB(args.mp,
+                               be32_to_cpu(agi->agi_seqno), args.agbno);
+               /*
+                * Allocate a fixed-size extent of inodes.
+                */
+               args.type = XFS_ALLOCTYPE_NEAR_BNO;
+               args.mod = args.total = args.wasdel = args.isfl =
+                       args.userdata = args.minalignslop = 0;
+               args.prod = 1;
+               /*
+                * Allow space for the inode btree to split.
+                */
+               args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
+               if ((error = xfs_alloc_vextent(&args)))
+                       return error;
+       }
        /*
         * If stripe alignment is turned on, then try again with cluster
         * alignment.
@@ -1023,7 +1051,7 @@ xfs_difree(
        rec.ir_freecount++;
 
        /*
-        * When an inode cluster is free, it becomes elgible for removal
+        * When an inode cluster is free, it becomes eligible for removal
         */
        if ((mp->m_flags & XFS_MOUNT_IDELETE) &&
            (rec.ir_freecount == XFS_IALLOC_INODES(mp))) {
index 3ce35a6..bb33113 100644 (file)
@@ -509,7 +509,7 @@ retry:
                } else {
                        /*
                         * If the inode is not fully constructed due to
-                        * filehandle mistmatches wait for the inode to go
+                        * filehandle mismatches wait for the inode to go
                         * away and try again.
                         *
                         * iget_locked will call __wait_on_freeing_inode
index 88a517f..48146bd 100644 (file)
@@ -160,7 +160,7 @@ xfs_inotobp(
        xfs_dinode_t    *dip;
 
        /*
-        * Call the space managment code to find the location of the
+        * Call the space management code to find the location of the
         * inode on disk.
         */
        imap.im_blkno = 0;
@@ -837,7 +837,7 @@ xfs_dic2xflags(
 
 /*
  * Given a mount structure and an inode number, return a pointer
- * to a newly allocated in-core inode coresponding to the given
+ * to a newly allocated in-core inode corresponding to the given
  * inode number.
  *
  * Initialize the inode's attributes and extent pointers if it
@@ -2723,7 +2723,7 @@ xfs_ipin(
 /*
  * Decrement the pin count of the given inode, and wake up
  * anyone in xfs_iwait_unpin() if the count goes to 0.  The
- * inode must have been previoulsy pinned with a call to xfs_ipin().
+ * inode must have been previously pinned with a call to xfs_ipin().
  */
 void
 xfs_iunpin(
@@ -3690,7 +3690,7 @@ void
 xfs_iext_add(
        xfs_ifork_t     *ifp,           /* inode fork pointer */
        xfs_extnum_t    idx,            /* index to begin adding exts */
-       int             ext_diff)       /* nubmer of extents to add */
+       int             ext_diff)       /* number of extents to add */
 {
        int             byte_diff;      /* new bytes being added */
        int             new_size;       /* size of extents after adding */
@@ -4038,7 +4038,7 @@ xfs_iext_remove_indirect(
        xfs_extnum_t    ext_diff;       /* extents to remove in current list */
        xfs_extnum_t    nex1;           /* number of extents before idx */
        xfs_extnum_t    nex2;           /* extents after idx + count */
-       int             nlists;         /* entries in indirecton array */
+       int             nlists;         /* entries in indirection array */
        int             page_idx = idx; /* index in target extent list */
 
        ASSERT(ifp->if_flags & XFS_IFEXTIREC);
@@ -4291,9 +4291,9 @@ xfs_iext_bno_to_ext(
        xfs_filblks_t   blockcount = 0; /* number of blocks in extent */
        xfs_bmbt_rec_t  *ep = NULL;     /* pointer to target extent */
        xfs_ext_irec_t  *erp = NULL;    /* indirection array pointer */
-       int             high;           /* upper boundry in search */
+       int             high;           /* upper boundary in search */
        xfs_extnum_t    idx = 0;        /* index of target extent */
-       int             low;            /* lower boundry in search */
+       int             low;            /* lower boundary in search */
        xfs_extnum_t    nextents;       /* number of file extents */
        xfs_fileoff_t   startoff = 0;   /* start offset of extent */
 
index 36aa1fc..7497a48 100644 (file)
@@ -580,7 +580,7 @@ xfs_inode_item_unpin_remove(
  * been or is in the process of being flushed, then (ideally) we'd like to
  * see if the inode's buffer is still incore, and if so give it a nudge.
  * We delay doing so until the pushbuf routine, though, to avoid holding
- * the AIL lock across a call to the blackhole which is the buffercache.
+ * the AIL lock across a call to the blackhole which is the buffer cache.
  * Also we don't want to sleep in any device strategy routines, which can happen
  * if we do the subsequent bawrite in here.
  */
index 32247b6..94068d0 100644 (file)
@@ -272,7 +272,7 @@ xfs_bulkstat(
        size_t                  statstruct_size, /* sizeof struct filling */
        char                    __user *ubuffer, /* buffer with inode stats */
        int                     flags,  /* defined in xfs_itable.h */
-       int                     *done)  /* 1 if there're more stats to get */
+       int                     *done)  /* 1 if there are more stats to get */
 {
        xfs_agblock_t           agbno=0;/* allocation group block number */
        xfs_buf_t               *agbp;  /* agi header buffer */
@@ -676,7 +676,7 @@ xfs_bulkstat_single(
        xfs_mount_t             *mp,    /* mount point for filesystem */
        xfs_ino_t               *lastinop, /* inode to return */
        char                    __user *buffer, /* buffer with inode stats */
-       int                     *done)  /* 1 if there're more stats to get */
+       int                     *done)  /* 1 if there are more stats to get */
 {
        int                     count;  /* count value for bulkstat call */
        int                     error;  /* return value */
index 047d834..11eb4e1 100644 (file)
@@ -60,7 +60,7 @@ xfs_bulkstat(
        size_t          statstruct_size,/* sizeof struct that we're filling */
        char            __user *ubuffer,/* buffer with inode stats */
        int             flags,          /* flag to control access method */
-       int             *done);         /* 1 if there're more stats to get */
+       int             *done);         /* 1 if there are more stats to get */
 
 int
 xfs_bulkstat_single(
index 9176995..32e841d 100644 (file)
@@ -59,7 +59,7 @@ STATIC xlog_t *  xlog_alloc_log(xfs_mount_t   *mp,
                                int             num_bblks);
 STATIC int      xlog_space_left(xlog_t *log, int cycle, int bytes);
 STATIC int      xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
-STATIC void     xlog_unalloc_log(xlog_t *log);
+STATIC void     xlog_dealloc_log(xlog_t *log);
 STATIC int      xlog_write(xfs_mount_t *mp, xfs_log_iovec_t region[],
                            int nentries, xfs_log_ticket_t tic,
                            xfs_lsn_t *start_lsn,
@@ -304,7 +304,7 @@ xfs_log_done(xfs_mount_t    *mp,
        if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) == 0 ||
            (flags & XFS_LOG_REL_PERM_RESERV)) {
                /*
-                * Release ticket if not permanent reservation or a specifc
+                * Release ticket if not permanent reservation or a specific
                 * request has been made to release a permanent reservation.
                 */
                xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)");
@@ -511,7 +511,7 @@ xfs_log_mount(xfs_mount_t   *mp,
                        vfsp->vfs_flag |= VFS_RDONLY;
                if (error) {
                        cmn_err(CE_WARN, "XFS: log mount/recovery failed: error %d", error);
-                       xlog_unalloc_log(mp->m_log);
+                       xlog_dealloc_log(mp->m_log);
                        return error;
                }
        }
@@ -667,7 +667,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
                 *
                 * Go through the motions of sync'ing and releasing
                 * the iclog, even though no I/O will actually happen,
-                * we need to wait for other log I/O's that may already
+                * we need to wait for other log I/Os that may already
                 * be in progress.  Do this as a separate section of
                 * code so we'll know if we ever get stuck here that
                 * we're in this odd situation of trying to unmount
@@ -704,7 +704,7 @@ xfs_log_unmount_write(xfs_mount_t *mp)
 void
 xfs_log_unmount_dealloc(xfs_mount_t *mp)
 {
-       xlog_unalloc_log(mp->m_log);
+       xlog_dealloc_log(mp->m_log);
 }
 
 /*
@@ -1492,7 +1492,7 @@ xlog_sync(xlog_t          *log,
                ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
                ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
 
-               /* account for internal log which does't start at block #0 */
+               /* account for internal log which doesn't start at block #0 */
                XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
                XFS_BUF_WRITE(bp);
                if ((error = XFS_bwrite(bp))) {
@@ -1506,10 +1506,10 @@ xlog_sync(xlog_t                *log,
 
 
 /*
- * Unallocate a log structure
+ * Deallocate a log structure
  */
 void
-xlog_unalloc_log(xlog_t *log)
+xlog_dealloc_log(xlog_t *log)
 {
        xlog_in_core_t  *iclog, *next_iclog;
        xlog_ticket_t   *tic, *next_tic;
@@ -1539,7 +1539,7 @@ xlog_unalloc_log(xlog_t *log)
        if ((log->l_ticket_cnt != log->l_ticket_tcnt)  &&
            !XLOG_FORCED_SHUTDOWN(log)) {
                xfs_fs_cmn_err(CE_WARN, log->l_mp,
-                       "xlog_unalloc_log: (cnt: %d, total: %d)",
+                       "xlog_dealloc_log: (cnt: %d, total: %d)",
                        log->l_ticket_cnt, log->l_ticket_tcnt);
                /* ASSERT(log->l_ticket_cnt == log->l_ticket_tcnt); */
 
@@ -1562,7 +1562,7 @@ xlog_unalloc_log(xlog_t *log)
 #endif
        log->l_mp->m_log = NULL;
        kmem_free(log, sizeof(xlog_t));
-}      /* xlog_unalloc_log */
+}      /* xlog_dealloc_log */
 
 /*
  * Update counters atomically now that memcpy is done.
@@ -2829,7 +2829,7 @@ xlog_state_release_iclog(xlog_t           *log,
 
        /*
         * We let the log lock go, so it's possible that we hit a log I/O
-        * error or someother SHUTDOWN condition that marks the iclog
+        * error or some other SHUTDOWN condition that marks the iclog
         * as XLOG_STATE_IOERROR before the bwrite. However, we know that
         * this iclog has consistent data, so we ignore IOERROR
         * flags after this point.
index 4b2ac88..eacb3d4 100644 (file)
@@ -27,7 +27,7 @@
 
 #ifdef __KERNEL__
 /*
- * By comparing each compnent, we don't have to worry about extra
+ * By comparing each component, we don't have to worry about extra
  * endian issues in treating two 32 bit numbers as one 64 bit number
  */
 static inline xfs_lsn_t        _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
index add13f5..1f0016b 100644 (file)
@@ -583,7 +583,7 @@ xlog_find_head(
                 *        x | x ... | x - 1 | x
                 * Another case that fits this picture would be
                 *        x | x + 1 | x ... | x
-                * In this case the head really is somwhere at the end of the
+                * In this case the head really is somewhere at the end of the
                 * log, as one of the latest writes at the beginning was
                 * incomplete.
                 * One more case is
@@ -2799,7 +2799,7 @@ xlog_recover_do_trans(
                 * we don't need to worry about the block number being
                 * truncated in > 1 TB buffers because in user-land,
                 * we're now n32 or 64-bit so xfs_daddr_t is 64-bits so
-                * the blkno's will get through the user-mode buffer
+                * the blknos will get through the user-mode buffer
                 * cache properly.  The only bad case is o32 kernels
                 * where xfs_daddr_t is 32-bits but mount will warn us
                 * off a > 1 TB filesystem before we get here.
index 20e8abc..72e7e78 100644 (file)
@@ -393,7 +393,7 @@ xfs_initialize_perag(
                                break;
                        }
 
-                       /* This ag is prefered for inodes */
+                       /* This ag is preferred for inodes */
                        pag = &mp->m_perag[index];
                        pag->pagi_inodeok = 1;
                        if (index < max_metadata)
@@ -1728,7 +1728,7 @@ xfs_mount_log_sbunit(
  * We cannot use the hotcpu_register() function because it does
  * not allow notifier instances. We need a notifier per filesystem
  * as we need to be able to identify the filesystem to balance
- * the counters out. This is acheived by having a notifier block
+ * the counters out. This is achieved by having a notifier block
  * embedded in the xfs_mount_t and doing pointer magic to get the
  * mount pointer from the notifier block address.
  */
index ebd7396..66cbee7 100644 (file)
@@ -379,7 +379,7 @@ typedef struct xfs_mount {
 #endif
        int                     m_dalign;       /* stripe unit */
        int                     m_swidth;       /* stripe width */
-       int                     m_sinoalign;    /* stripe unit inode alignmnt */
+       int                     m_sinoalign;    /* stripe unit inode alignment */
        int                     m_attr_magicpct;/* 37% of the blocksize */
        int                     m_dir_magicpct; /* 37% of the dir blocksize */
        __uint8_t               m_mk_sharedro;  /* mark shared ro on unmount */
index 82a08ba..4f6a034 100644 (file)
@@ -31,7 +31,7 @@
 typedef __uint32_t     xfs_dqid_t;
 
 /*
- * Eventhough users may not have quota limits occupying all 64-bits,
+ * Even though users may not have quota limits occupying all 64-bits,
  * they may need 64-bit accounting. Hence, 64-bit quota-counters,
  * and quota-limits. This is a waste in the common case, but hey ...
  */
@@ -246,7 +246,7 @@ typedef struct xfs_qoff_logformat {
 #ifdef __KERNEL__
 /*
  * This check is done typically without holding the inode lock;
- * that may seem racey, but it is harmless in the context that it is used.
+ * that may seem racy, but it is harmless in the context that it is used.
  * The inode cannot go inactive as long a reference is kept, and
  * therefore if dquot(s) were attached, they'll stay consistent.
  * If, for example, the ownership of the inode changes while
index 2918956..8d056ce 100644 (file)
@@ -490,7 +490,7 @@ xfs_trans_mod_sb(
        case XFS_TRANS_SB_RES_FREXTENTS:
                /*
                 * The allocation has already been applied to the
-                * in-core superblocks's counter.  This should only
+                * in-core superblock's counter.  This should only
                 * be applied to the on-disk superblock.
                 */
                ASSERT(delta < 0);
@@ -611,7 +611,7 @@ xfs_trans_apply_sb_deltas(
 
        if (whole)
                /*
-                * Log the whole thing, the fields are discontiguous.
+                * Log the whole thing, the fields are noncontiguous.
                 */
                xfs_trans_log_buf(tp, bp, 0, sizeof(xfs_sb_t) - 1);
        else
@@ -669,7 +669,7 @@ xfs_trans_unreserve_and_mod_sb(
        /*
         * Apply any superblock modifications to the in-core version.
         * The t_res_fdblocks_delta and t_res_frextents_delta fields are
-        * explicity NOT applied to the in-core superblock.
+        * explicitly NOT applied to the in-core superblock.
         * The idea is that that has already been done.
         */
        if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
index e48befa..100d9a4 100644 (file)
@@ -354,7 +354,7 @@ typedef struct xfs_trans {
        xfs_lsn_t               t_commit_lsn;   /* log seq num of end of
                                                 * transaction. */
        struct xfs_mount        *t_mountp;      /* ptr to fs mount struct */
-       struct xfs_dquot_acct   *t_dqinfo;      /* accting info for dquots */
+       struct xfs_dquot_acct   *t_dqinfo;      /* acctg info for dquots */
        xfs_trans_callback_t    t_callback;     /* transaction callback */
        void                    *t_callarg;     /* callback arg */
        unsigned int            t_flags;        /* misc flags */
index e341409..7c5894d 100644 (file)
@@ -272,7 +272,7 @@ xfs_trans_log_inode(
         * This is to coordinate with the xfs_iflush() and xfs_iflush_done()
         * routines in the eventual clearing of the ilf_fields bits.
         * See the big comment in xfs_iflush() for an explanation of
-        * this coorination mechanism.
+        * this coordination mechanism.
         */
        flags |= ip->i_itemp->ili_last_fields;
        ip->i_itemp->ili_format.ilf_fields |= flags;
index d4ec4df..504d2a8 100644 (file)
@@ -880,10 +880,10 @@ xfs_statvfs(
  *                    determine if they should be flushed sync, async, or
  *                    delwri.
  *      SYNC_CLOSE   - This flag is passed when the system is being
- *                    unmounted.  We should sync and invalidate everthing.
+ *                    unmounted.  We should sync and invalidate everything.
  *      SYNC_FSDATA  - This indicates that the caller would like to make
  *                    sure the superblock is safe on disk.  We can ensure
- *                    this by simply makeing sure the log gets flushed
+ *                    this by simply making sure the log gets flushed
  *                    if SYNC_BDFLUSH is set, and by actually writing it
  *                    out otherwise.
  *
@@ -908,7 +908,7 @@ xfs_sync(
  *
  * This routine supports all of the flags defined for the generic VFS_SYNC
  * interface as explained above under xfs_sync.  In the interests of not
- * changing interfaces within the 6.5 family, additional internallly-
+ * changing interfaces within the 6.5 family, additional internally-
  * required functions are specified within a separate xflags parameter,
  * only available by calling this routine.
  *
@@ -1090,7 +1090,7 @@ xfs_sync_inodes(
                 * If this is just vfs_sync() or pflushd() calling
                 * then we can skip inodes for which it looks like
                 * there is nothing to do.  Since we don't have the
-                * inode locked this is racey, but these are periodic
+                * inode locked this is racy, but these are periodic
                 * calls so it doesn't matter.  For the others we want
                 * to know for sure, so we at least try to lock them.
                 */
@@ -1429,7 +1429,7 @@ xfs_sync_inodes(
  *
  * This routine supports all of the flags defined for the generic VFS_SYNC
  * interface as explained above under xfs_sync.  In the interests of not
- * changing interfaces within the 6.5 family, additional internallly-
+ * changing interfaces within the 6.5 family, additional internally-
  * required functions are specified within a separate xflags parameter,
  * only available by calling this routine.
  *
index 0f0a64e..de49601 100644 (file)
@@ -848,7 +848,7 @@ xfs_setattr(
         * If this is a synchronous mount, make sure that the
         * transaction goes to disk before returning to the user.
         * This is slightly sub-optimal in that truncates require
-        * two sync transactions instead of one for wsync filesytems.
+        * two sync transactions instead of one for wsync filesystems.
         * One for the truncate and one for the timestamps since we
         * don't want to change the timestamps unless we're sure the
         * truncate worked.  Truncates are less than 1% of the laddis
@@ -1170,7 +1170,7 @@ xfs_fsync(
 
                /*
                 * If this inode is on the RT dev we need to flush that
-                * cache aswell.
+                * cache as well.
                 */
                if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME)
                        xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
@@ -1380,7 +1380,7 @@ xfs_inactive_symlink_rmt(
         */
        ntp = xfs_trans_dup(tp);
        /*
-        * Commit the transaction containing extent freeing and EFD's.
+        * Commit the transaction containing extent freeing and EFDs.
         * If we get an error on the commit here or on the reserve below,
         * we need to unlock the inode since the new transaction doesn't
         * have the inode attached.
@@ -2023,7 +2023,7 @@ xfs_create(
        XFS_QM_DQRELE(mp, gdqp);
 
        /*
-        * Propogate the fact that the vnode changed after the
+        * Propagate the fact that the vnode changed after the
         * xfs_inode locks have been released.
         */
        VOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_TRUNCATED, 3);
@@ -2370,7 +2370,7 @@ xfs_remove(
         * for a log reservation. Since we'll have to wait for the
         * inactive code to complete before returning from xfs_iget,
         * we need to make sure that we don't have log space reserved
-        * when we call xfs_iget.  Instead we get an unlocked referece
+        * when we call xfs_iget.  Instead we get an unlocked reference
         * to the inode before getting our log reservation.
         */
        error = xfs_get_dir_entry(dentry, &ip);
@@ -3020,7 +3020,7 @@ xfs_rmdir(
         * for a log reservation.  Since we'll have to wait for the
         * inactive code to complete before returning from xfs_iget,
         * we need to make sure that we don't have log space reserved
-        * when we call xfs_iget.  Instead we get an unlocked referece
+        * when we call xfs_iget.  Instead we get an unlocked reference
         * to the inode before getting our log reservation.
         */
        error = xfs_get_dir_entry(dentry, &cdp);
index 92146f3..41ecbb8 100644 (file)
@@ -62,6 +62,8 @@
        .posix_timers    = LIST_HEAD_INIT(sig.posix_timers),            \
        .cpu_timers     = INIT_CPU_TIMERS(sig.cpu_timers),              \
        .rlim           = INIT_RLIMITS,                                 \
+       .pgrp           = 1,                                            \
+       .session        = 1,                                            \
 }
 
 #define INIT_SIGHAND(sighand) {                                                \
index 5b2fcb1..5b9082c 100644 (file)
@@ -4,7 +4,6 @@
 enum pid_type
 {
        PIDTYPE_PID,
-       PIDTYPE_TGID,
        PIDTYPE_PGID,
        PIDTYPE_SID,
        PIDTYPE_MAX
@@ -38,7 +37,6 @@ extern struct pid *FASTCALL(find_pid(enum pid_type, int));
 
 extern int alloc_pidmap(void);
 extern void FASTCALL(free_pidmap(int));
-extern void switch_exec_pids(struct task_struct *leader, struct task_struct *thread);
 
 #define do_each_task_pid(who, type, task)                              \
        if ((task = find_task_by_pid_type(type, who))) {                \
index 20b4f03..d04186d 100644 (file)
@@ -355,16 +355,8 @@ struct sighand_struct {
        atomic_t                count;
        struct k_sigaction      action[_NSIG];
        spinlock_t              siglock;
-       struct rcu_head         rcu;
 };
 
-extern void sighand_free_cb(struct rcu_head *rhp);
-
-static inline void sighand_free(struct sighand_struct *sp)
-{
-       call_rcu(&sp->rcu, sighand_free_cb);
-}
-
 /*
  * NOTE! "signal_struct" does not have it's own
  * locking, because a shared signal_struct always
@@ -760,6 +752,7 @@ struct task_struct {
 
        /* PID/PID hash table linkage. */
        struct pid pids[PIDTYPE_MAX];
+       struct list_head thread_group;
 
        struct completion *vfork_done;          /* for vfork() */
        int __user *set_child_tid;              /* CLONE_CHILD_SETTID */
@@ -1101,7 +1094,6 @@ extern void force_sig_specific(int, struct task_struct *);
 extern int send_sig(int, struct task_struct *, int);
 extern void zap_other_threads(struct task_struct *p);
 extern int kill_pg(pid_t, int, int);
-extern int kill_sl(pid_t, int, int);
 extern int kill_proc(pid_t, int, int);
 extern struct sigqueue *sigqueue_alloc(void);
 extern void sigqueue_free(struct sigqueue *);
@@ -1158,10 +1150,8 @@ extern void flush_thread(void);
 extern void exit_thread(void);
 
 extern void exit_files(struct task_struct *);
-extern void exit_signal(struct task_struct *);
-extern void __exit_signal(struct task_struct *);
-extern void exit_sighand(struct task_struct *);
-extern void __exit_sighand(struct task_struct *);
+extern void __cleanup_signal(struct signal_struct *);
+extern void __cleanup_sighand(struct sighand_struct *);
 extern void exit_itimers(struct signal_struct *);
 
 extern NORET_TYPE void do_group_exit(int);
@@ -1185,19 +1175,7 @@ extern void wait_task_inactive(task_t * p);
 #endif
 
 #define remove_parent(p)       list_del_init(&(p)->sibling)
-#define add_parent(p, parent)  list_add_tail(&(p)->sibling,&(parent)->children)
-
-#define REMOVE_LINKS(p) do {                                   \
-       if (thread_group_leader(p))                             \
-               list_del_init(&(p)->tasks);                     \
-       remove_parent(p);                                       \
-       } while (0)
-
-#define SET_LINKS(p) do {                                      \
-       if (thread_group_leader(p))                             \
-               list_add_tail(&(p)->tasks,&init_task.tasks);    \
-       add_parent(p, (p)->parent);                             \
-       } while (0)
+#define add_parent(p)          list_add_tail(&(p)->sibling,&(p)->parent->children)
 
 #define next_task(p)   list_entry((p)->tasks.next, struct task_struct, tasks)
 #define prev_task(p)   list_entry((p)->tasks.prev, struct task_struct, tasks)
@@ -1215,20 +1193,22 @@ extern void wait_task_inactive(task_t * p);
 #define while_each_thread(g, t) \
        while ((t = next_thread(t)) != g)
 
-extern task_t * FASTCALL(next_thread(const task_t *p));
-
 #define thread_group_leader(p) (p->pid == p->tgid)
 
+static inline task_t *next_thread(task_t *p)
+{
+       return list_entry(rcu_dereference(p->thread_group.next),
+                               task_t, thread_group);
+}
+
 static inline int thread_group_empty(task_t *p)
 {
-       return list_empty(&p->pids[PIDTYPE_TGID].pid_list);
+       return list_empty(&p->thread_group);
 }
 
 #define delay_group_leader(p) \
                (thread_group_leader(p) && !thread_group_empty(p))
 
-extern void unhash_process(struct task_struct *p);
-
 /*
  * Protects ->fs, ->files, ->mm, ->ptrace, ->group_info, ->comm, keyring
  * subscriptions and synchronises with wait4().  Also used in procfs.  Also
@@ -1248,6 +1228,15 @@ static inline void task_unlock(struct task_struct *p)
        spin_unlock(&p->alloc_lock);
 }
 
+extern struct sighand_struct *lock_task_sighand(struct task_struct *tsk,
+                                                       unsigned long *flags);
+
+static inline void unlock_task_sighand(struct task_struct *tsk,
+                                               unsigned long *flags)
+{
+       spin_unlock_irqrestore(&tsk->sighand->siglock, *flags);
+}
+
 #ifndef __HAVE_THREAD_FUNCTIONS
 
 #define task_thread_info(task) (task)->thread_info
index b7d0935..162a8fd 100644 (file)
@@ -249,6 +249,8 @@ static inline void init_sigpending(struct sigpending *sig)
        INIT_LIST_HEAD(&sig->list);
 }
 
+extern void flush_sigqueue(struct sigpending *queue);
+
 /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */
 static inline int valid_signal(unsigned long sig)
 {
index 15e1d97..3af03b1 100644 (file)
@@ -210,7 +210,6 @@ extern kmem_cache_t *names_cachep;
 extern kmem_cache_t    *files_cachep;
 extern kmem_cache_t    *filp_cachep;
 extern kmem_cache_t    *fs_cachep;
-extern kmem_cache_t    *signal_cachep;
 extern kmem_cache_t    *sighand_cachep;
 extern kmem_cache_t    *bio_cachep;
 
index a8c7efc..bc0ec67 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/cpuset.h>
 #include <linux/syscalls.h>
 #include <linux/signal.h>
+#include <linux/posix-timers.h>
 #include <linux/cn_proc.h>
 #include <linux/mutex.h>
 #include <linux/futex.h>
@@ -50,15 +51,80 @@ static void __unhash_process(struct task_struct *p)
 {
        nr_threads--;
        detach_pid(p, PIDTYPE_PID);
-       detach_pid(p, PIDTYPE_TGID);
        if (thread_group_leader(p)) {
                detach_pid(p, PIDTYPE_PGID);
                detach_pid(p, PIDTYPE_SID);
-               if (p->pid)
-                       __get_cpu_var(process_counts)--;
+
+               list_del_init(&p->tasks);
+               __get_cpu_var(process_counts)--;
+       }
+       list_del_rcu(&p->thread_group);
+       remove_parent(p);
+}
+
+/*
+ * This function expects the tasklist_lock write-locked.
+ */
+static void __exit_signal(struct task_struct *tsk)
+{
+       struct signal_struct *sig = tsk->signal;
+       struct sighand_struct *sighand;
+
+       BUG_ON(!sig);
+       BUG_ON(!atomic_read(&sig->count));
+
+       rcu_read_lock();
+       sighand = rcu_dereference(tsk->sighand);
+       spin_lock(&sighand->siglock);
+
+       posix_cpu_timers_exit(tsk);
+       if (atomic_dec_and_test(&sig->count))
+               posix_cpu_timers_exit_group(tsk);
+       else {
+               /*
+                * If there is any task waiting for the group exit
+                * then notify it:
+                */
+               if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count) {
+                       wake_up_process(sig->group_exit_task);
+                       sig->group_exit_task = NULL;
+               }
+               if (tsk == sig->curr_target)
+                       sig->curr_target = next_thread(tsk);
+               /*
+                * Accumulate here the counters for all threads but the
+                * group leader as they die, so they can be added into
+                * the process-wide totals when those are taken.
+                * The group leader stays around as a zombie as long
+                * as there are other threads.  When it gets reaped,
+                * the exit.c code will add its counts into these totals.
+                * We won't ever get here for the group leader, since it
+                * will have been the last reference on the signal_struct.
+                */
+               sig->utime = cputime_add(sig->utime, tsk->utime);
+               sig->stime = cputime_add(sig->stime, tsk->stime);
+               sig->min_flt += tsk->min_flt;
+               sig->maj_flt += tsk->maj_flt;
+               sig->nvcsw += tsk->nvcsw;
+               sig->nivcsw += tsk->nivcsw;
+               sig->sched_time += tsk->sched_time;
+               sig = NULL; /* Marker for below. */
        }
 
-       REMOVE_LINKS(p);
+       __unhash_process(tsk);
+
+       tsk->signal = NULL;
+       tsk->sighand = NULL;
+       spin_unlock(&sighand->siglock);
+       rcu_read_unlock();
+
+       __cleanup_sighand(sighand);
+       clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
+       flush_sigqueue(&tsk->pending);
+       if (sig) {
+               flush_sigqueue(&sig->shared_pending);
+               __cleanup_signal(sig);
+       }
 }
 
 void release_task(struct task_struct * p)
@@ -67,21 +133,14 @@ void release_task(struct task_struct * p)
        task_t *leader;
        struct dentry *proc_dentry;
 
-repeat: 
+repeat:
        atomic_dec(&p->user->processes);
        spin_lock(&p->proc_lock);
        proc_dentry = proc_pid_unhash(p);
        write_lock_irq(&tasklist_lock);
-       if (unlikely(p->ptrace))
-               __ptrace_unlink(p);
+       ptrace_unlink(p);
        BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
        __exit_signal(p);
-       /*
-        * Note that the fastpath in sys_times depends on __exit_signal having
-        * updated the counters before a task is removed from the tasklist of
-        * the process by __unhash_process.
-        */
-       __unhash_process(p);
 
        /*
         * If we are the last non-leader member of the thread
@@ -116,21 +175,6 @@ repeat:
                goto repeat;
 }
 
-/* we are using it only for SMP init */
-
-void unhash_process(struct task_struct *p)
-{
-       struct dentry *proc_dentry;
-
-       spin_lock(&p->proc_lock);
-       proc_dentry = proc_pid_unhash(p);
-       write_lock_irq(&tasklist_lock);
-       __unhash_process(p);
-       write_unlock_irq(&tasklist_lock);
-       spin_unlock(&p->proc_lock);
-       proc_pid_flush(proc_dentry);
-}
-
 /*
  * This checks not only the pgrp, but falls back on the pid if no
  * satisfactory pgrp is found. I dunno - gdb doesn't work correctly
@@ -238,10 +282,10 @@ static void reparent_to_init(void)
 
        ptrace_unlink(current);
        /* Reparent to init */
-       REMOVE_LINKS(current);
+       remove_parent(current);
        current->parent = child_reaper;
        current->real_parent = child_reaper;
-       SET_LINKS(current);
+       add_parent(current);
 
        /* Set the exit signal to SIGCHLD so we signal init on exit */
        current->exit_signal = SIGCHLD;
@@ -538,13 +582,13 @@ static void exit_mm(struct task_struct * tsk)
        mmput(mm);
 }
 
-static inline void choose_new_parent(task_t *p, task_t *reaper, task_t *child_reaper)
+static inline void choose_new_parent(task_t *p, task_t *reaper)
 {
        /*
         * Make sure we're not reparenting to ourselves and that
         * the parent is not a zombie.
         */
-       BUG_ON(p == reaper || reaper->exit_state >= EXIT_ZOMBIE);
+       BUG_ON(p == reaper || reaper->exit_state);
        p->real_parent = reaper;
 }
 
@@ -569,9 +613,9 @@ static void reparent_thread(task_t *p, task_t *father, int traced)
                 * anyway, so let go of it.
                 */
                p->ptrace = 0;
-               list_del_init(&p->sibling);
+               remove_parent(p);
                p->parent = p->real_parent;
-               list_add_tail(&p->sibling, &p->parent->children);
+               add_parent(p);
 
                /* If we'd notified the old parent about this child's death,
                 * also notify the new parent.
@@ -645,7 +689,7 @@ static void forget_original_parent(struct task_struct * father,
 
                if (father == p->real_parent) {
                        /* reparent with a reaper, real father it's us */
-                       choose_new_parent(p, reaper, child_reaper);
+                       choose_new_parent(p, reaper);
                        reparent_thread(p, father, 0);
                } else {
                        /* reparent ptraced task to its real parent */
@@ -666,7 +710,7 @@ static void forget_original_parent(struct task_struct * father,
        }
        list_for_each_safe(_p, _n, &father->ptrace_children) {
                p = list_entry(_p,struct task_struct,ptrace_list);
-               choose_new_parent(p, reaper, child_reaper);
+               choose_new_parent(p, reaper);
                reparent_thread(p, father, 1);
        }
 }
@@ -807,7 +851,7 @@ fastcall NORET_TYPE void do_exit(long code)
                panic("Aiee, killing interrupt handler!");
        if (unlikely(!tsk->pid))
                panic("Attempted to kill the idle task!");
-       if (unlikely(tsk->pid == 1))
+       if (unlikely(tsk == child_reaper))
                panic("Attempted to kill init!");
 
        if (unlikely(current->ptrace & PT_TRACE_EXIT)) {
@@ -920,13 +964,6 @@ asmlinkage long sys_exit(int error_code)
        do_exit((error_code&0xff)<<8);
 }
 
-task_t fastcall *next_thread(const task_t *p)
-{
-       return pid_task(p->pids[PIDTYPE_TGID].pid_list.next, PIDTYPE_TGID);
-}
-
-EXPORT_SYMBOL(next_thread);
-
 /*
  * Take down every thread in the group.  This is called by fatal signals
  * as well as by sys_exit_group (below).
@@ -941,7 +978,6 @@ do_group_exit(int exit_code)
        else if (!thread_group_empty(current)) {
                struct signal_struct *const sig = current->signal;
                struct sighand_struct *const sighand = current->sighand;
-               read_lock(&tasklist_lock);
                spin_lock_irq(&sighand->siglock);
                if (sig->flags & SIGNAL_GROUP_EXIT)
                        /* Another thread got here before we took the lock.  */
@@ -951,7 +987,6 @@ do_group_exit(int exit_code)
                        zap_other_threads(current);
                }
                spin_unlock_irq(&sighand->siglock);
-               read_unlock(&tasklist_lock);
        }
 
        do_exit(exit_code);
@@ -1281,7 +1316,7 @@ bail_ref:
 
        /* move to end of parent's list to avoid starvation */
        remove_parent(p);
-       add_parent(p, p->parent);
+       add_parent(p);
 
        write_unlock_irq(&tasklist_lock);
 
index c49bd19..b3f7a1b 100644 (file)
@@ -84,7 +84,7 @@ static kmem_cache_t *task_struct_cachep;
 #endif
 
 /* SLAB cache for signal_struct structures (tsk->signal) */
-kmem_cache_t *signal_cachep;
+static kmem_cache_t *signal_cachep;
 
 /* SLAB cache for sighand_struct structures (tsk->sighand) */
 kmem_cache_t *sighand_cachep;
@@ -786,14 +786,6 @@ int unshare_files(void)
 
 EXPORT_SYMBOL(unshare_files);
 
-void sighand_free_cb(struct rcu_head *rhp)
-{
-       struct sighand_struct *sp;
-
-       sp = container_of(rhp, struct sighand_struct, rcu);
-       kmem_cache_free(sighand_cachep, sp);
-}
-
 static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
 {
        struct sighand_struct *sig;
@@ -806,12 +798,17 @@ static inline int copy_sighand(unsigned long clone_flags, struct task_struct * t
        rcu_assign_pointer(tsk->sighand, sig);
        if (!sig)
                return -ENOMEM;
-       spin_lock_init(&sig->siglock);
        atomic_set(&sig->count, 1);
        memcpy(sig->action, current->sighand->action, sizeof(sig->action));
        return 0;
 }
 
+void __cleanup_sighand(struct sighand_struct *sighand)
+{
+       if (atomic_dec_and_test(&sighand->count))
+               kmem_cache_free(sighand_cachep, sighand);
+}
+
 static inline int copy_signal(unsigned long clone_flags, struct task_struct * tsk)
 {
        struct signal_struct *sig;
@@ -881,6 +878,22 @@ static inline int copy_signal(unsigned long clone_flags, struct task_struct * ts
        return 0;
 }
 
+void __cleanup_signal(struct signal_struct *sig)
+{
+       exit_thread_group_keys(sig);
+       kmem_cache_free(signal_cachep, sig);
+}
+
+static inline void cleanup_signal(struct task_struct *tsk)
+{
+       struct signal_struct *sig = tsk->signal;
+
+       atomic_dec(&sig->live);
+
+       if (atomic_dec_and_test(&sig->count))
+               __cleanup_signal(sig);
+}
+
 static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
 {
        unsigned long new_flags = p->flags;
@@ -1095,6 +1108,7 @@ static task_t *copy_process(unsigned long clone_flags,
         * We dont wake it up yet.
         */
        p->group_leader = p;
+       INIT_LIST_HEAD(&p->thread_group);
        INIT_LIST_HEAD(&p->ptrace_children);
        INIT_LIST_HEAD(&p->ptrace_list);
 
@@ -1118,16 +1132,6 @@ static task_t *copy_process(unsigned long clone_flags,
                        !cpu_online(task_cpu(p))))
                set_task_cpu(p, smp_processor_id());
 
-       /*
-        * Check for pending SIGKILL! The new thread should not be allowed
-        * to slip out of an OOM kill. (or normal SIGKILL.)
-        */
-       if (sigismember(&current->pending.signal, SIGKILL)) {
-               write_unlock_irq(&tasklist_lock);
-               retval = -EINTR;
-               goto bad_fork_cleanup_namespace;
-       }
-
        /* CLONE_PARENT re-uses the old parent */
        if (clone_flags & (CLONE_PARENT|CLONE_THREAD))
                p->real_parent = current->real_parent;
@@ -1136,6 +1140,23 @@ static task_t *copy_process(unsigned long clone_flags,
        p->parent = p->real_parent;
 
        spin_lock(&current->sighand->siglock);
+
+       /*
+        * Process group and session signals need to be delivered to just the
+        * parent before the fork or both the parent and the child after the
+        * fork. Restart if a signal comes in before we add the new process to
+        * it's process group.
+        * A fatal signal pending means that current will exit, so the new
+        * thread can't slip out of an OOM kill (or normal SIGKILL).
+        */
+       recalc_sigpending();
+       if (signal_pending(current)) {
+               spin_unlock(&current->sighand->siglock);
+               write_unlock_irq(&tasklist_lock);
+               retval = -ERESTARTNOINTR;
+               goto bad_fork_cleanup_namespace;
+       }
+
        if (clone_flags & CLONE_THREAD) {
                /*
                 * Important: if an exit-all has been started then
@@ -1148,17 +1169,9 @@ static task_t *copy_process(unsigned long clone_flags,
                        retval = -EAGAIN;
                        goto bad_fork_cleanup_namespace;
                }
-               p->group_leader = current->group_leader;
 
-               if (current->signal->group_stop_count > 0) {
-                       /*
-                        * There is an all-stop in progress for the group.
-                        * We ourselves will stop as soon as we check signals.
-                        * Make the new thread part of that group stop too.
-                        */
-                       current->signal->group_stop_count++;
-                       set_tsk_thread_flag(p, TIF_SIGPENDING);
-               }
+               p->group_leader = current->group_leader;
+               list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
 
                if (!cputime_eq(current->signal->it_virt_expires,
                                cputime_zero) ||
@@ -1181,23 +1194,25 @@ static task_t *copy_process(unsigned long clone_flags,
         */
        p->ioprio = current->ioprio;
 
-       SET_LINKS(p);
-       if (unlikely(p->ptrace & PT_PTRACED))
-               __ptrace_link(p, current->parent);
-
-       if (thread_group_leader(p)) {
-               p->signal->tty = current->signal->tty;
-               p->signal->pgrp = process_group(current);
-               p->signal->session = current->signal->session;
-               attach_pid(p, PIDTYPE_PGID, process_group(p));
-               attach_pid(p, PIDTYPE_SID, p->signal->session);
-               if (p->pid)
+       if (likely(p->pid)) {
+               add_parent(p);
+               if (unlikely(p->ptrace & PT_PTRACED))
+                       __ptrace_link(p, current->parent);
+
+               if (thread_group_leader(p)) {
+                       p->signal->tty = current->signal->tty;
+                       p->signal->pgrp = process_group(current);
+                       p->signal->session = current->signal->session;
+                       attach_pid(p, PIDTYPE_PGID, process_group(p));
+                       attach_pid(p, PIDTYPE_SID, p->signal->session);
+
+                       list_add_tail(&p->tasks, &init_task.tasks);
                        __get_cpu_var(process_counts)++;
+               }
+               attach_pid(p, PIDTYPE_PID, p->pid);
+               nr_threads++;
        }
-       attach_pid(p, PIDTYPE_TGID, p->tgid);
-       attach_pid(p, PIDTYPE_PID, p->pid);
 
-       nr_threads++;
        total_forks++;
        spin_unlock(&current->sighand->siglock);
        write_unlock_irq(&tasklist_lock);
@@ -1212,9 +1227,9 @@ bad_fork_cleanup_mm:
        if (p->mm)
                mmput(p->mm);
 bad_fork_cleanup_signal:
-       exit_signal(p);
+       cleanup_signal(p);
 bad_fork_cleanup_sighand:
-       exit_sighand(p);
+       __cleanup_sighand(p->sighand);
 bad_fork_cleanup_fs:
        exit_fs(p); /* blocking */
 bad_fork_cleanup_files:
@@ -1261,7 +1276,7 @@ task_t * __devinit fork_idle(int cpu)
        if (!task)
                return ERR_PTR(-ENOMEM);
        init_idle(task, cpu);
-       unhash_process(task);
+
        return task;
 }
 
@@ -1353,11 +1368,21 @@ long do_fork(unsigned long clone_flags,
 #define ARCH_MIN_MMSTRUCT_ALIGN 0
 #endif
 
+static void sighand_ctor(void *data, kmem_cache_t *cachep, unsigned long flags)
+{
+       struct sighand_struct *sighand = data;
+
+       if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) ==
+                                       SLAB_CTOR_CONSTRUCTOR)
+               spin_lock_init(&sighand->siglock);
+}
+
 void __init proc_caches_init(void)
 {
        sighand_cachep = kmem_cache_create("sighand_cache",
                        sizeof(struct sighand_struct), 0,
-                       SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+                       SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU,
+                       sighand_ctor, NULL);
        signal_cachep = kmem_cache_create("signal_cache",
                        sizeof(struct signal_struct), 0,
                        SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
index 51a8920..20a997c 100644 (file)
@@ -170,7 +170,7 @@ static int wait_for_helper(void *data)
        sa.sa.sa_handler = SIG_IGN;
        sa.sa.sa_flags = 0;
        siginitset(&sa.sa.sa_mask, sigmask(SIGCHLD));
-       do_sigaction(SIGCHLD, &sa, (struct k_sigaction *)0);
+       do_sigaction(SIGCHLD, &sa, NULL);
        allow_signal(SIGCHLD);
 
        pid = kernel_thread(____call_usermodehelper, sub_info, SIGCHLD);
index 1acc072..a9f2dfd 100644 (file)
@@ -218,36 +218,6 @@ task_t *find_task_by_pid_type(int type, int nr)
 EXPORT_SYMBOL(find_task_by_pid_type);
 
 /*
- * This function switches the PIDs if a non-leader thread calls
- * sys_execve() - this must be done without releasing the PID.
- * (which a detach_pid() would eventually do.)
- */
-void switch_exec_pids(task_t *leader, task_t *thread)
-{
-       __detach_pid(leader, PIDTYPE_PID);
-       __detach_pid(leader, PIDTYPE_TGID);
-       __detach_pid(leader, PIDTYPE_PGID);
-       __detach_pid(leader, PIDTYPE_SID);
-
-       __detach_pid(thread, PIDTYPE_PID);
-       __detach_pid(thread, PIDTYPE_TGID);
-
-       leader->pid = leader->tgid = thread->pid;
-       thread->pid = thread->tgid;
-
-       attach_pid(thread, PIDTYPE_PID, thread->pid);
-       attach_pid(thread, PIDTYPE_TGID, thread->tgid);
-       attach_pid(thread, PIDTYPE_PGID, thread->signal->pgrp);
-       attach_pid(thread, PIDTYPE_SID, thread->signal->session);
-       list_add_tail(&thread->tasks, &init_task.tasks);
-
-       attach_pid(leader, PIDTYPE_PID, leader->pid);
-       attach_pid(leader, PIDTYPE_TGID, leader->tgid);
-       attach_pid(leader, PIDTYPE_PGID, leader->signal->pgrp);
-       attach_pid(leader, PIDTYPE_SID, leader->signal->session);
-}
-
-/*
  * The pid hash table is scaled according to the amount of memory in the
  * machine.  From a minimum of 16 slots up to 4096 slots at one gigabyte or
  * more.
@@ -277,16 +247,8 @@ void __init pidhash_init(void)
 
 void __init pidmap_init(void)
 {
-       int i;
-
        pidmap_array->page = (void *)get_zeroed_page(GFP_KERNEL);
+       /* Reserve PID 0. We never call free_pidmap(0) */
        set_bit(0, pidmap_array->page);
        atomic_dec(&pidmap_array->nr_free);
-
-       /*
-        * Allocate PID 0, and hash it via all PID types:
-        */
-
-       for (i = 0; i < PIDTYPE_MAX; i++)
-               attach_pid(current, i, 0);
 }
index d95a72c..86a7f6c 100644 (file)
@@ -35,9 +35,9 @@ void __ptrace_link(task_t *child, task_t *new_parent)
        if (child->parent == new_parent)
                return;
        list_add(&child->ptrace_list, &child->parent->ptrace_children);
-       REMOVE_LINKS(child);
+       remove_parent(child);
        child->parent = new_parent;
-       SET_LINKS(child);
+       add_parent(child);
 }
  
 /*
@@ -77,9 +77,9 @@ void __ptrace_unlink(task_t *child)
        child->ptrace = 0;
        if (!list_empty(&child->ptrace_list)) {
                list_del_init(&child->ptrace_list);
-               REMOVE_LINKS(child);
+               remove_parent(child);
                child->parent = child->real_parent;
-               SET_LINKS(child);
+               add_parent(child);
        }
 
        ptrace_untrace(child);
index 75f7341..4922928 100644 (file)
@@ -22,7 +22,6 @@
 #include <linux/security.h>
 #include <linux/syscalls.h>
 #include <linux/ptrace.h>
-#include <linux/posix-timers.h>
 #include <linux/signal.h>
 #include <linux/audit.h>
 #include <linux/capability.h>
@@ -147,6 +146,8 @@ static kmem_cache_t *sigqueue_cachep;
 #define sig_kernel_stop(sig) \
                (((sig) < SIGRTMIN)  && T(sig, SIG_KERNEL_STOP_MASK))
 
+#define sig_needs_tasklist(sig)        ((sig) == SIGCONT)
+
 #define sig_user_defined(t, signr) \
        (((t)->sighand->action[(signr)-1].sa.sa_handler != SIG_DFL) &&  \
         ((t)->sighand->action[(signr)-1].sa.sa_handler != SIG_IGN))
@@ -292,7 +293,7 @@ static void __sigqueue_free(struct sigqueue *q)
        kmem_cache_free(sigqueue_cachep, q);
 }
 
-static void flush_sigqueue(struct sigpending *queue)
+void flush_sigqueue(struct sigpending *queue)
 {
        struct sigqueue *q;
 
@@ -307,9 +308,7 @@ static void flush_sigqueue(struct sigpending *queue)
 /*
  * Flush all pending signals for a task.
  */
-
-void
-flush_signals(struct task_struct *t)
+void flush_signals(struct task_struct *t)
 {
        unsigned long flags;
 
@@ -321,109 +320,6 @@ flush_signals(struct task_struct *t)
 }
 
 /*
- * This function expects the tasklist_lock write-locked.
- */
-void __exit_sighand(struct task_struct *tsk)
-{
-       struct sighand_struct * sighand = tsk->sighand;
-
-       /* Ok, we're done with the signal handlers */
-       tsk->sighand = NULL;
-       if (atomic_dec_and_test(&sighand->count))
-               sighand_free(sighand);
-}
-
-void exit_sighand(struct task_struct *tsk)
-{
-       write_lock_irq(&tasklist_lock);
-       rcu_read_lock();
-       if (tsk->sighand != NULL) {
-               struct sighand_struct *sighand = rcu_dereference(tsk->sighand);
-               spin_lock(&sighand->siglock);
-               __exit_sighand(tsk);
-               spin_unlock(&sighand->siglock);
-       }
-       rcu_read_unlock();
-       write_unlock_irq(&tasklist_lock);
-}
-
-/*
- * This function expects the tasklist_lock write-locked.
- */
-void __exit_signal(struct task_struct *tsk)
-{
-       struct signal_struct * sig = tsk->signal;
-       struct sighand_struct * sighand;
-
-       if (!sig)
-               BUG();
-       if (!atomic_read(&sig->count))
-               BUG();
-       rcu_read_lock();
-       sighand = rcu_dereference(tsk->sighand);
-       spin_lock(&sighand->siglock);
-       posix_cpu_timers_exit(tsk);
-       if (atomic_dec_and_test(&sig->count)) {
-               posix_cpu_timers_exit_group(tsk);
-               tsk->signal = NULL;
-               __exit_sighand(tsk);
-               spin_unlock(&sighand->siglock);
-               flush_sigqueue(&sig->shared_pending);
-       } else {
-               /*
-                * If there is any task waiting for the group exit
-                * then notify it:
-                */
-               if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count) {
-                       wake_up_process(sig->group_exit_task);
-                       sig->group_exit_task = NULL;
-               }
-               if (tsk == sig->curr_target)
-                       sig->curr_target = next_thread(tsk);
-               tsk->signal = NULL;
-               /*
-                * Accumulate here the counters for all threads but the
-                * group leader as they die, so they can be added into
-                * the process-wide totals when those are taken.
-                * The group leader stays around as a zombie as long
-                * as there are other threads.  When it gets reaped,
-                * the exit.c code will add its counts into these totals.
-                * We won't ever get here for the group leader, since it
-                * will have been the last reference on the signal_struct.
-                */
-               sig->utime = cputime_add(sig->utime, tsk->utime);
-               sig->stime = cputime_add(sig->stime, tsk->stime);
-               sig->min_flt += tsk->min_flt;
-               sig->maj_flt += tsk->maj_flt;
-               sig->nvcsw += tsk->nvcsw;
-               sig->nivcsw += tsk->nivcsw;
-               sig->sched_time += tsk->sched_time;
-               __exit_sighand(tsk);
-               spin_unlock(&sighand->siglock);
-               sig = NULL;     /* Marker for below.  */
-       }
-       rcu_read_unlock();
-       clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
-       flush_sigqueue(&tsk->pending);
-       if (sig) {
-               /*
-                * We are cleaning up the signal_struct here.
-                */
-               exit_thread_group_keys(sig);
-               kmem_cache_free(signal_cachep, sig);
-       }
-}
-
-void exit_signal(struct task_struct *tsk)
-{
-       atomic_dec(&tsk->signal->live);
-
-       write_lock_irq(&tasklist_lock);
-       __exit_signal(tsk);
-       write_unlock_irq(&tasklist_lock);
-}
-
-/*
  * Flush all handlers for a task.
  */
 
@@ -695,9 +591,7 @@ static int check_kill_permission(int sig, struct siginfo *info,
 }
 
 /* forward decl */
-static void do_notify_parent_cldstop(struct task_struct *tsk,
-                                    int to_self,
-                                    int why);
+static void do_notify_parent_cldstop(struct task_struct *tsk, int why);
 
 /*
  * Handle magic process-wide effects of stop/continue signals.
@@ -747,7 +641,7 @@ static void handle_stop_signal(int sig, struct task_struct *p)
                        p->signal->group_stop_count = 0;
                        p->signal->flags = SIGNAL_STOP_CONTINUED;
                        spin_unlock(&p->sighand->siglock);
-                       do_notify_parent_cldstop(p, (p->ptrace & PT_PTRACED), CLD_STOPPED);
+                       do_notify_parent_cldstop(p, CLD_STOPPED);
                        spin_lock(&p->sighand->siglock);
                }
                rm_from_queue(SIG_KERNEL_STOP_MASK, &p->signal->shared_pending);
@@ -788,7 +682,7 @@ static void handle_stop_signal(int sig, struct task_struct *p)
                        p->signal->flags = SIGNAL_STOP_CONTINUED;
                        p->signal->group_exit_code = 0;
                        spin_unlock(&p->sighand->siglock);
-                       do_notify_parent_cldstop(p, (p->ptrace & PT_PTRACED), CLD_CONTINUED);
+                       do_notify_parent_cldstop(p, CLD_CONTINUED);
                        spin_lock(&p->sighand->siglock);
                } else {
                        /*
@@ -1120,27 +1014,37 @@ void zap_other_threads(struct task_struct *p)
 /*
  * Must be called under rcu_read_lock() or with tasklist_lock read-held.
  */
+struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags)
+{
+       struct sighand_struct *sighand;
+
+       for (;;) {
+               sighand = rcu_dereference(tsk->sighand);
+               if (unlikely(sighand == NULL))
+                       break;
+
+               spin_lock_irqsave(&sighand->siglock, *flags);
+               if (likely(sighand == tsk->sighand))
+                       break;
+               spin_unlock_irqrestore(&sighand->siglock, *flags);
+       }
+
+       return sighand;
+}
+
 int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
 {
        unsigned long flags;
-       struct sighand_struct *sp;
        int ret;
 
-retry:
        ret = check_kill_permission(sig, info, p);
-       if (!ret && sig && (sp = rcu_dereference(p->sighand))) {
-               spin_lock_irqsave(&sp->siglock, flags);
-               if (p->sighand != sp) {
-                       spin_unlock_irqrestore(&sp->siglock, flags);
-                       goto retry;
-               }
-               if ((atomic_read(&sp->count) == 0) ||
-                               (atomic_read(&p->usage) == 0)) {
-                       spin_unlock_irqrestore(&sp->siglock, flags);
-                       return -ESRCH;
+
+       if (!ret && sig) {
+               ret = -ESRCH;
+               if (lock_task_sighand(p, &flags)) {
+                       ret = __group_send_sig_info(sig, info, p);
+                       unlock_task_sighand(p, &flags);
                }
-               ret = __group_send_sig_info(sig, info, p);
-               spin_unlock_irqrestore(&sp->siglock, flags);
        }
 
        return ret;
@@ -1189,7 +1093,7 @@ kill_proc_info(int sig, struct siginfo *info, pid_t pid)
        struct task_struct *p;
 
        rcu_read_lock();
-       if (unlikely(sig_kernel_stop(sig) || sig == SIGCONT)) {
+       if (unlikely(sig_needs_tasklist(sig))) {
                read_lock(&tasklist_lock);
                acquired_tasklist_lock = 1;
        }
@@ -1405,12 +1309,10 @@ void sigqueue_free(struct sigqueue *q)
        __sigqueue_free(q);
 }
 
-int
-send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
+int send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
 {
        unsigned long flags;
        int ret = 0;
-       struct sighand_struct *sh;
 
        BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
 
@@ -1424,48 +1326,17 @@ send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
         */
        rcu_read_lock();
 
-       if (unlikely(p->flags & PF_EXITING)) {
+       if (!likely(lock_task_sighand(p, &flags))) {
                ret = -1;
                goto out_err;
        }
 
-retry:
-       sh = rcu_dereference(p->sighand);
-
-       spin_lock_irqsave(&sh->siglock, flags);
-       if (p->sighand != sh) {
-               /* We raced with exec() in a multithreaded process... */
-               spin_unlock_irqrestore(&sh->siglock, flags);
-               goto retry;
-       }
-
-       /*
-        * We do the check here again to handle the following scenario:
-        *
-        * CPU 0                CPU 1
-        * send_sigqueue
-        * check PF_EXITING
-        * interrupt            exit code running
-        *                      __exit_signal
-        *                      lock sighand->siglock
-        *                      unlock sighand->siglock
-        * lock sh->siglock
-        * add(tsk->pending)    flush_sigqueue(tsk->pending)
-        *
-        */
-
-       if (unlikely(p->flags & PF_EXITING)) {
-               ret = -1;
-               goto out;
-       }
-
        if (unlikely(!list_empty(&q->list))) {
                /*
                 * If an SI_TIMER entry is already queue just increment
                 * the overrun count.
                 */
-               if (q->info.si_code != SI_TIMER)
-                       BUG();
+               BUG_ON(q->info.si_code != SI_TIMER);
                q->info.si_overrun++;
                goto out;
        }
@@ -1481,7 +1352,7 @@ retry:
                signal_wake_up(p, sig == SIGKILL);
 
 out:
-       spin_unlock_irqrestore(&sh->siglock, flags);
+       unlock_task_sighand(p, &flags);
 out_err:
        rcu_read_unlock();
 
@@ -1613,14 +1484,14 @@ void do_notify_parent(struct task_struct *tsk, int sig)
        spin_unlock_irqrestore(&psig->siglock, flags);
 }
 
-static void do_notify_parent_cldstop(struct task_struct *tsk, int to_self, int why)
+static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
 {
        struct siginfo info;
        unsigned long flags;
        struct task_struct *parent;
        struct sighand_struct *sighand;
 
-       if (to_self)
+       if (tsk->ptrace & PT_PTRACED)
                parent = tsk->parent;
        else {
                tsk = tsk->group_leader;
@@ -1695,7 +1566,7 @@ static void ptrace_stop(int exit_code, int nostop_code, siginfo_t *info)
                   !(current->ptrace & PT_ATTACHED)) &&
            (likely(current->parent->signal != current->signal) ||
             !unlikely(current->signal->flags & SIGNAL_GROUP_EXIT))) {
-               do_notify_parent_cldstop(current, 1, CLD_TRAPPED);
+               do_notify_parent_cldstop(current, CLD_TRAPPED);
                read_unlock(&tasklist_lock);
                schedule();
        } else {
@@ -1744,25 +1615,17 @@ void ptrace_notify(int exit_code)
 static void
 finish_stop(int stop_count)
 {
-       int to_self;
-
        /*
         * If there are no other threads in the group, or if there is
         * a group stop in progress and we are the last to stop,
         * report to the parent.  When ptraced, every thread reports itself.
         */
-       if (stop_count < 0 || (current->ptrace & PT_PTRACED))
-               to_self = 1;
-       else if (stop_count == 0)
-               to_self = 0;
-       else
-               goto out;
-
-       read_lock(&tasklist_lock);
-       do_notify_parent_cldstop(current, to_self, CLD_STOPPED);
-       read_unlock(&tasklist_lock);
+       if (stop_count == 0 || (current->ptrace & PT_PTRACED)) {
+               read_lock(&tasklist_lock);
+               do_notify_parent_cldstop(current, CLD_STOPPED);
+               read_unlock(&tasklist_lock);
+       }
 
-out:
        schedule();
        /*
         * Now we don't run again until continued.
@@ -1776,12 +1639,10 @@ out:
  * Returns nonzero if we've actually stopped and released the siglock.
  * Returns zero if we didn't stop and still hold the siglock.
  */
-static int
-do_signal_stop(int signr)
+static int do_signal_stop(int signr)
 {
        struct signal_struct *sig = current->signal;
-       struct sighand_struct *sighand = current->sighand;
-       int stop_count = -1;
+       int stop_count;
 
        if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED))
                return 0;
@@ -1791,86 +1652,37 @@ do_signal_stop(int signr)
                 * There is a group stop in progress.  We don't need to
                 * start another one.
                 */
-               signr = sig->group_exit_code;
                stop_count = --sig->group_stop_count;
-               current->exit_code = signr;
-               set_current_state(TASK_STOPPED);
-               if (stop_count == 0)
-                       sig->flags = SIGNAL_STOP_STOPPED;
-               spin_unlock_irq(&sighand->siglock);
-       }
-       else if (thread_group_empty(current)) {
-               /*
-                * Lock must be held through transition to stopped state.
-                */
-               current->exit_code = current->signal->group_exit_code = signr;
-               set_current_state(TASK_STOPPED);
-               sig->flags = SIGNAL_STOP_STOPPED;
-               spin_unlock_irq(&sighand->siglock);
-       }
-       else {
+       } else {
                /*
                 * There is no group stop already in progress.
-                * We must initiate one now, but that requires
-                * dropping siglock to get both the tasklist lock
-                * and siglock again in the proper order.  Note that
-                * this allows an intervening SIGCONT to be posted.
-                * We need to check for that and bail out if necessary.
+                * We must initiate one now.
                 */
                struct task_struct *t;
 
-               spin_unlock_irq(&sighand->siglock);
-
-               /* signals can be posted during this window */
+               sig->group_exit_code = signr;
 
-               read_lock(&tasklist_lock);
-               spin_lock_irq(&sighand->siglock);
-
-               if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED)) {
+               stop_count = 0;
+               for (t = next_thread(current); t != current; t = next_thread(t))
                        /*
-                        * Another stop or continue happened while we
-                        * didn't have the lock.  We can just swallow this
-                        * signal now.  If we raced with a SIGCONT, that
-                        * should have just cleared it now.  If we raced
-                        * with another processor delivering a stop signal,
-                        * then the SIGCONT that wakes us up should clear it.
+                        * Setting state to TASK_STOPPED for a group
+                        * stop is always done with the siglock held,
+                        * so this check has no races.
                         */
-                       read_unlock(&tasklist_lock);
-                       return 0;
-               }
-
-               if (sig->group_stop_count == 0) {
-                       sig->group_exit_code = signr;
-                       stop_count = 0;
-                       for (t = next_thread(current); t != current;
-                            t = next_thread(t))
-                               /*
-                                * Setting state to TASK_STOPPED for a group
-                                * stop is always done with the siglock held,
-                                * so this check has no races.
-                                */
-                               if (!t->exit_state &&
-                                   !(t->state & (TASK_STOPPED|TASK_TRACED))) {
-                                       stop_count++;
-                                       signal_wake_up(t, 0);
-                               }
-                       sig->group_stop_count = stop_count;
-               }
-               else {
-                       /* A race with another thread while unlocked.  */
-                       signr = sig->group_exit_code;
-                       stop_count = --sig->group_stop_count;
-               }
-
-               current->exit_code = signr;
-               set_current_state(TASK_STOPPED);
-               if (stop_count == 0)
-                       sig->flags = SIGNAL_STOP_STOPPED;
-
-               spin_unlock_irq(&sighand->siglock);
-               read_unlock(&tasklist_lock);
+                       if (!t->exit_state &&
+                           !(t->state & (TASK_STOPPED|TASK_TRACED))) {
+                               stop_count++;
+                               signal_wake_up(t, 0);
+                       }
+               sig->group_stop_count = stop_count;
        }
 
+       if (stop_count == 0)
+               sig->flags = SIGNAL_STOP_STOPPED;
+       current->exit_code = sig->group_exit_code;
+       __set_current_state(TASK_STOPPED);
+
+       spin_unlock_irq(&current->sighand->siglock);
        finish_stop(stop_count);
        return 1;
 }
@@ -1990,7 +1802,7 @@ relock:
                        continue;
 
                /* Init gets no signals it doesn't want.  */
-               if (current->pid == 1)
+               if (current == child_reaper)
                        continue;
 
                if (sig_kernel_stop(signr)) {
@@ -2430,8 +2242,7 @@ sys_rt_sigqueueinfo(int pid, int sig, siginfo_t __user *uinfo)
        return kill_proc_info(sig, &info, pid);
 }
 
-int
-do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
+int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
 {
        struct k_sigaction *k;
        sigset_t mask;
@@ -2457,6 +2268,7 @@ do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
        if (act) {
                sigdelsetmask(&act->sa.sa_mask,
                              sigmask(SIGKILL) | sigmask(SIGSTOP));
+               *k = *act;
                /*
                 * POSIX 3.3.1.3:
                 *  "Setting a signal action to SIG_IGN for a signal that is
@@ -2469,19 +2281,8 @@ do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
                 *   be discarded, whether or not it is blocked"
                 */
                if (act->sa.sa_handler == SIG_IGN ||
-                   (act->sa.sa_handler == SIG_DFL &&
-                    sig_kernel_ignore(sig))) {
-                       /*
-                        * This is a fairly rare case, so we only take the
-                        * tasklist_lock once we're sure we'll need it.
-                        * Now we must do this little unlock and relock
-                        * dance to maintain the lock hierarchy.
-                        */
+                  (act->sa.sa_handler == SIG_DFL && sig_kernel_ignore(sig))) {
                        struct task_struct *t = current;
-                       spin_unlock_irq(&t->sighand->siglock);
-                       read_lock(&tasklist_lock);
-                       spin_lock_irq(&t->sighand->siglock);
-                       *k = *act;
                        sigemptyset(&mask);
                        sigaddset(&mask, sig);
                        rm_from_queue_full(&mask, &t->signal->shared_pending);
@@ -2490,12 +2291,7 @@ do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
                                recalc_sigpending_tsk(t);
                                t = next_thread(t);
                        } while (t != current);
-                       spin_unlock_irq(&current->sighand->siglock);
-                       read_unlock(&tasklist_lock);
-                       return 0;
                }
-
-               *k = *act;
        }
 
        spin_unlock_irq(&current->sighand->siglock);
index c93d37f..7ef7f60 100644 (file)
@@ -1202,69 +1202,24 @@ asmlinkage long sys_times(struct tms __user * tbuf)
         */
        if (tbuf) {
                struct tms tmp;
+               struct task_struct *tsk = current;
+               struct task_struct *t;
                cputime_t utime, stime, cutime, cstime;
 
-#ifdef CONFIG_SMP
-               if (thread_group_empty(current)) {
-                       /*
-                        * Single thread case without the use of any locks.
-                        *
-                        * We may race with release_task if two threads are
-                        * executing. However, release task first adds up the
-                        * counters (__exit_signal) before  removing the task
-                        * from the process tasklist (__unhash_process).
-                        * __exit_signal also acquires and releases the
-                        * siglock which results in the proper memory ordering
-                        * so that the list modifications are always visible
-                        * after the counters have been updated.
-                        *
-                        * If the counters have been updated by the second thread
-                        * but the thread has not yet been removed from the list
-                        * then the other branch will be executing which will
-                        * block on tasklist_lock until the exit handling of the
-                        * other task is finished.
-                        *
-                        * This also implies that the sighand->siglock cannot
-                        * be held by another processor. So we can also
-                        * skip acquiring that lock.
-                        */
-                       utime = cputime_add(current->signal->utime, current->utime);
-                       stime = cputime_add(current->signal->utime, current->stime);
-                       cutime = current->signal->cutime;
-                       cstime = current->signal->cstime;
-               } else
-#endif
-               {
+               spin_lock_irq(&tsk->sighand->siglock);
+               utime = tsk->signal->utime;
+               stime = tsk->signal->stime;
+               t = tsk;
+               do {
+                       utime = cputime_add(utime, t->utime);
+                       stime = cputime_add(stime, t->stime);
+                       t = next_thread(t);
+               } while (t != tsk);
 
-                       /* Process with multiple threads */
-                       struct task_struct *tsk = current;
-                       struct task_struct *t;
+               cutime = tsk->signal->cutime;
+               cstime = tsk->signal->cstime;
+               spin_unlock_irq(&tsk->sighand->siglock);
 
-                       read_lock(&tasklist_lock);
-                       utime = tsk->signal->utime;
-                       stime = tsk->signal->stime;
-                       t = tsk;
-                       do {
-                               utime = cputime_add(utime, t->utime);
-                               stime = cputime_add(stime, t->stime);
-                               t = next_thread(t);
-                       } while (t != tsk);
-
-                       /*
-                        * While we have tasklist_lock read-locked, no dying thread
-                        * can be updating current->signal->[us]time.  Instead,
-                        * we got their counts included in the live thread loop.
-                        * However, another thread can come in right now and
-                        * do a wait call that updates current->signal->c[us]time.
-                        * To make sure we always see that pair updated atomically,
-                        * we take the siglock around fetching them.
-                        */
-                       spin_lock_irq(&tsk->sighand->siglock);
-                       cutime = tsk->signal->cutime;
-                       cstime = tsk->signal->cstime;
-                       spin_unlock_irq(&tsk->sighand->siglock);
-                       read_unlock(&tasklist_lock);
-               }
                tmp.tms_utime = cputime_to_clock_t(utime);
                tmp.tms_stime = cputime_to_clock_t(stime);
                tmp.tms_cutime = cputime_to_clock_t(cutime);