/* move to end of parent's list to avoid starvation */
write_lock_irq(&tasklist_lock);
remove_parent(p);
- add_parent(p, p->parent);
+ add_parent(p);
write_unlock_irq(&tasklist_lock);
retval = ru ? getrusage(p, RUSAGE_BOTH, ru) : 0;
if (retval)
write_lock_irq(&tasklist_lock);
remove_parent(p);
p->parent = p->real_parent;
- add_parent(p, p->parent);
+ add_parent(p);
do_notify_parent(p, SIGCHLD);
write_unlock_irq(&tasklist_lock);
} else
idle = idle_thread(cpu);
init_idle(idle, cpu);
- unhash_process(idle);
waittime = 200000000;
while (waittime-- && !cpu_isset(cpu, cpu_callin_map))
p->signal->tty = NULL;
if (!p->signal->leader)
continue;
- send_group_sig_info(SIGHUP, SEND_SIG_PRIV, p);
- send_group_sig_info(SIGCONT, SEND_SIG_PRIV, p);
+ group_send_sig_info(SIGHUP, SEND_SIG_PRIV, p);
+ group_send_sig_info(SIGCONT, SEND_SIG_PRIV, p);
if (tty->pgrp > 0)
p->signal->tty_old_pgrp = tty->pgrp;
} while_each_task_pid(tty->session, PIDTYPE_SID, p);
tty_hangup(tty);
#else
struct tty_struct *tty = arg;
- struct task_struct *p;
+ struct task_struct *g, *p;
int session;
int i;
struct file *filp;
tty->driver->flush_buffer(tty);
read_lock(&tasklist_lock);
+ /* Kill the entire session */
do_each_task_pid(session, PIDTYPE_SID, p) {
- if (p->signal->tty == tty || session > 0) {
+ printk(KERN_NOTICE "SAK: killed process %d"
+ " (%s): p->signal->session==tty->session\n",
+ p->pid, p->comm);
+ send_sig(SIGKILL, p, 1);
+ } while_each_task_pid(session, PIDTYPE_SID, p);
+ /* Now kill any processes that happen to have the
+ * tty open.
+ */
+ do_each_thread(g, p) {
+ if (p->signal->tty == tty) {
printk(KERN_NOTICE "SAK: killed process %d"
" (%s): p->signal->session==tty->session\n",
p->pid, p->comm);
rcu_read_unlock();
}
task_unlock(p);
- } while_each_task_pid(session, PIDTYPE_SID, p);
+ } while_each_thread(g, p);
read_unlock(&tasklist_lock);
#endif
}
- fix all XXX showstoppers
- disable IR/IT DMA interrupts on shutdown
- flush pci writes to the card by issuing a read
- - devfs and character device dispatching (* needs testing with Linux 2.2.x)
+ - character device dispatching
- switch over to the new kernel DMA API (pci_map_*()) (* needs testing on platforms with IOMMU!)
- keep all video_cards in a list (for open() via chardev), set file->private_data = video
- dv1394_poll should indicate POLLIN when receiving buffers are available
init.api_version = DV1394_API_VERSION;
init.n_frames = DV1394_MAX_FRAMES / 4;
- /* the following are now set via devfs */
init.channel = video->channel;
init.format = video->pal_or_ntsc;
init.cip_n = video->cip_n;
{
struct video_card *video = NULL;
- /* if the device was opened through devfs, then file->private_data
- has already been set to video by devfs */
if (file->private_data) {
video = (struct video_card*) file->private_data;
video = kzalloc(sizeof(*video), GFP_KERNEL);
if (!video) {
printk(KERN_ERR "dv1394: cannot allocate video_card\n");
- goto err;
+ return -1;
}
video->ohci = ohci;
list_add_tail(&video->list, &dv1394_cards);
spin_unlock_irqrestore(&dv1394_cards_lock, flags);
- if (devfs_mk_cdev(MKDEV(IEEE1394_MAJOR,
- IEEE1394_MINOR_BLOCK_DV1394*16 + video->id),
- S_IFCHR|S_IRUGO|S_IWUGO,
- "ieee1394/dv/host%d/%s/%s",
- (video->id>>2),
- (video->pal_or_ntsc == DV1394_NTSC ? "NTSC" : "PAL"),
- (video->mode == MODE_RECEIVE ? "in" : "out")) < 0)
- goto err_free;
-
debug_printk("dv1394: dv1394_init() OK on ID %d\n", video->id);
-
return 0;
-
- err_free:
- kfree(video);
- err:
- return -1;
}
static void dv1394_un_init(struct video_card *video)
{
- char buf[32];
-
/* obviously nobody has the driver open at this point */
do_dv1394_shutdown(video, 1);
- snprintf(buf, sizeof(buf), "dv/host%d/%s/%s", (video->id >> 2),
- (video->pal_or_ntsc == DV1394_NTSC ? "NTSC" : "PAL"),
- (video->mode == MODE_RECEIVE ? "in" : "out")
- );
-
- devfs_remove("ieee1394/%s", buf);
kfree(video);
}
class_device_destroy(hpsb_protocol_class,
MKDEV(IEEE1394_MAJOR, IEEE1394_MINOR_BLOCK_DV1394 * 16 + (id<<2)));
- devfs_remove("ieee1394/dv/host%d/NTSC", id);
- devfs_remove("ieee1394/dv/host%d/PAL", id);
- devfs_remove("ieee1394/dv/host%d", id);
}
static void dv1394_add_host (struct hpsb_host *host)
class_device_create(hpsb_protocol_class, NULL, MKDEV(
IEEE1394_MAJOR, IEEE1394_MINOR_BLOCK_DV1394 * 16 + (id<<2)),
NULL, "dv1394-%d", id);
- devfs_mk_dir("ieee1394/dv/host%d", id);
- devfs_mk_dir("ieee1394/dv/host%d/NTSC", id);
- devfs_mk_dir("ieee1394/dv/host%d/PAL", id);
dv1394_init(ohci, DV1394_NTSC, MODE_RECEIVE);
dv1394_init(ohci, DV1394_NTSC, MODE_TRANSMIT);
static void __exit dv1394_exit_module(void)
{
hpsb_unregister_protocol(&dv1394_driver);
-
hpsb_unregister_highlevel(&dv1394_highlevel);
cdev_del(&dv1394_cdev);
- devfs_remove("ieee1394/dv");
}
static int __init dv1394_init_module(void)
return ret;
}
- devfs_mk_dir("ieee1394/dv");
-
hpsb_register_highlevel(&dv1394_highlevel);
ret = hpsb_register_protocol(&dv1394_driver);
if (ret) {
printk(KERN_ERR "dv1394: failed to register protocol\n");
hpsb_unregister_highlevel(&dv1394_highlevel);
- devfs_remove("ieee1394/dv");
cdev_del(&dv1394_cdev);
return ret;
}
/* Disable Isochronous Resource Manager functionality */
int hpsb_disable_irm = 0;
-module_param_named(disable_irm, hpsb_disable_irm, bool, 0);
+module_param_named(disable_irm, hpsb_disable_irm, bool, 0444);
MODULE_PARM_DESC(disable_irm,
"Disable Isochronous Resource Manager functionality.");
goto exit_release_kernel_thread;
}
- /* actually this is a non-fatal error */
- ret = devfs_mk_dir("ieee1394");
- if (ret < 0) {
- HPSB_ERR("unable to make devfs dir for device major %d!\n", IEEE1394_MAJOR);
- goto release_chrdev;
- }
-
ret = bus_register(&ieee1394_bus_type);
if (ret < 0) {
HPSB_INFO("bus register failed");
- goto release_devfs;
+ goto release_chrdev;
}
for (i = 0; fw_bus_attrs[i]; i++) {
fw_bus_attrs[i--]);
}
bus_unregister(&ieee1394_bus_type);
- goto release_devfs;
+ goto release_chrdev;
}
}
for (i = 0; fw_bus_attrs[i]; i++)
bus_remove_file(&ieee1394_bus_type, fw_bus_attrs[i]);
bus_unregister(&ieee1394_bus_type);
-release_devfs:
- devfs_remove("ieee1394");
release_chrdev:
unregister_chrdev_region(IEEE1394_CORE_DEV, 256);
exit_release_kernel_thread:
hpsb_cleanup_config_roms();
unregister_chrdev_region(IEEE1394_CORE_DEV, 256);
- devfs_remove("ieee1394");
}
module_init(ieee1394_init);
#define _IEEE1394_CORE_H
#include <linux/slab.h>
-#include <linux/devfs_fs_kernel.h>
#include <asm/atomic.h>
#include <asm/semaphore.h>
#include "hosts.h"
#define IEEE1394_MINOR_BLOCK_RAW1394 0
#define IEEE1394_MINOR_BLOCK_VIDEO1394 1
#define IEEE1394_MINOR_BLOCK_DV1394 2
-#define IEEE1394_MINOR_BLOCK_AMDTP 3
#define IEEE1394_MINOR_BLOCK_EXPERIMENTAL 15
#define IEEE1394_CORE_DEV MKDEV(IEEE1394_MAJOR, 0)
#define IEEE1394_RAW1394_DEV MKDEV(IEEE1394_MAJOR, IEEE1394_MINOR_BLOCK_RAW1394 * 16)
#define IEEE1394_VIDEO1394_DEV MKDEV(IEEE1394_MAJOR, IEEE1394_MINOR_BLOCK_VIDEO1394 * 16)
#define IEEE1394_DV1394_DEV MKDEV(IEEE1394_MAJOR, IEEE1394_MINOR_BLOCK_DV1394 * 16)
-#define IEEE1394_AMDTP_DEV MKDEV(IEEE1394_MAJOR, IEEE1394_MINOR_BLOCK_AMDTP * 16)
#define IEEE1394_EXPERIMENTAL_DEV MKDEV(IEEE1394_MAJOR, IEEE1394_MINOR_BLOCK_EXPERIMENTAL * 16)
/* return the index (within a minor number block) of a file */
/* Initialize IR Legacy DMA channel mask */
ohci->ir_legacy_channels = 0;
- /*
- * Accept AT requests from all nodes. This probably
- * will have to be controlled from the subsystem
- * on a per node basis.
- */
- reg_write(ohci,OHCI1394_AsReqFilterHiSet, 0x80000000);
+ /* Accept AR requests from all nodes */
+ reg_write(ohci, OHCI1394_AsReqFilterHiSet, 0x80000000);
+
+ /* Set the address range of the physical response unit.
+ * Most controllers do not implement it as a writable register though.
+ * They will keep a hardwired offset of 0x00010000 and show 0x0 as
+ * register content.
+ * To actually enable physical responses is the job of our interrupt
+ * handler which programs the physical request filter. */
+ reg_write(ohci, OHCI1394_PhyUpperBound, 0xffff0000);
+
+ DBGMSG("physUpperBoundOffset=%08x",
+ reg_read(ohci, OHCI1394_PhyUpperBound));
/* Specify AT retries */
reg_write(ohci, OHCI1394_ATRetries,
OHCI1394_reqTxComplete |
OHCI1394_isochRx |
OHCI1394_isochTx |
+ OHCI1394_postedWriteErr |
OHCI1394_cycleInconsistent);
/* Enable link */
event &= ~OHCI1394_unrecoverableError;
}
-
+ if (event & OHCI1394_postedWriteErr) {
+ PRINT(KERN_ERR, "physical posted write error");
+ /* no recovery strategy yet, had to involve protocol drivers */
+ }
if (event & OHCI1394_cycleInconsistent) {
/* We subscribe to the cycleInconsistent event only to
* clear the corresponding event bit... otherwise,
DBGMSG("OHCI1394_cycleInconsistent");
event &= ~OHCI1394_cycleInconsistent;
}
-
if (event & OHCI1394_busReset) {
/* The busReset event bit can't be cleared during the
* selfID phase, so we disable busReset interrupts, to
}
event &= ~OHCI1394_busReset;
}
-
if (event & OHCI1394_reqTxComplete) {
struct dma_trm_ctx *d = &ohci->at_req_context;
DBGMSG("Got reqTxComplete interrupt "
reg_write(ohci, OHCI1394_IntMaskSet, OHCI1394_busReset);
spin_unlock_irqrestore(&ohci->event_lock, flags);
- /* Accept Physical requests from all nodes. */
- reg_write(ohci,OHCI1394_AsReqFilterHiSet, 0xffffffff);
- reg_write(ohci,OHCI1394_AsReqFilterLoSet, 0xffffffff);
-
/* Turn on phys dma reception.
*
* TODO: Enable some sort of filtering management.
*/
if (phys_dma) {
- reg_write(ohci,OHCI1394_PhyReqFilterHiSet, 0xffffffff);
- reg_write(ohci,OHCI1394_PhyReqFilterLoSet, 0xffffffff);
- reg_write(ohci,OHCI1394_PhyUpperBound, 0xffff0000);
- } else {
- reg_write(ohci,OHCI1394_PhyReqFilterHiSet, 0x00000000);
- reg_write(ohci,OHCI1394_PhyReqFilterLoSet, 0x00000000);
+ reg_write(ohci, OHCI1394_PhyReqFilterHiSet,
+ 0xffffffff);
+ reg_write(ohci, OHCI1394_PhyReqFilterLoSet,
+ 0xffffffff);
}
DBGMSG("PhyReqFilter=%08x%08x",
- reg_read(ohci,OHCI1394_PhyReqFilterHiSet),
- reg_read(ohci,OHCI1394_PhyReqFilterLoSet));
+ reg_read(ohci, OHCI1394_PhyReqFilterHiSet),
+ reg_read(ohci, OHCI1394_PhyReqFilterLoSet));
hpsb_selfid_complete(host, phyid, isroot);
} else
* fail to report the right length. Anyway, the ohci spec
* clearly says it's 2kb, so this shouldn't be a problem. */
ohci_base = pci_resource_start(dev, 0);
- if (pci_resource_len(dev, 0) != OHCI1394_REGISTER_SIZE)
- PRINT(KERN_WARNING, "Unexpected PCI resource length of %lx!",
+ if (pci_resource_len(dev, 0) < OHCI1394_REGISTER_SIZE)
+ PRINT(KERN_WARNING, "PCI resource length of %lx too small!",
pci_resource_len(dev, 0));
/* Seems PCMCIA handles this internally. Not sure why. Seems
#include <linux/cdev.h>
#include <asm/uaccess.h>
#include <asm/atomic.h>
-#include <linux/devfs_fs_kernel.h>
#include <linux/compat.h>
#include "csr1212.h"
goto out_unreg;
}
- devfs_mk_cdev(MKDEV(IEEE1394_MAJOR, IEEE1394_MINOR_BLOCK_RAW1394 * 16),
- S_IFCHR | S_IRUSR | S_IWUSR, RAW1394_DEVICE_NAME);
-
cdev_init(&raw1394_cdev, &raw1394_fops);
raw1394_cdev.owner = THIS_MODULE;
kobject_set_name(&raw1394_cdev.kobj, RAW1394_DEVICE_NAME);
goto out;
out_dev:
- devfs_remove(RAW1394_DEVICE_NAME);
class_device_destroy(hpsb_protocol_class,
MKDEV(IEEE1394_MAJOR,
IEEE1394_MINOR_BLOCK_RAW1394 * 16));
MKDEV(IEEE1394_MAJOR,
IEEE1394_MINOR_BLOCK_RAW1394 * 16));
cdev_del(&raw1394_cdev);
- devfs_remove(RAW1394_DEVICE_NAME);
hpsb_unregister_highlevel(&raw1394_highlevel);
hpsb_unregister_protocol(&raw1394_driver);
}
#endif
#define SBP2_ERR(fmt, args...) HPSB_ERR("sbp2: "fmt, ## args)
+#define SBP2_DEBUG_ENTER() SBP2_DEBUG("%s", __FUNCTION__)
/*
* Globals
command->Current_SCpnt = Current_SCpnt;
list_add_tail(&command->list, &scsi_id->sbp2_command_orb_inuse);
} else {
- SBP2_ERR("sbp2util_allocate_command_orb - No orbs available!");
+ SBP2_ERR("%s: no orbs available", __FUNCTION__);
}
spin_unlock_irqrestore(&scsi_id->sbp2_command_orb_lock, flags);
return command;
struct hpsb_host *host;
if (!scsi_id) {
- printk(KERN_ERR "%s: scsi_id == NULL\n", __FUNCTION__);
+ SBP2_ERR("%s: scsi_id == NULL", __FUNCTION__);
return;
}
struct unit_directory *ud;
struct scsi_id_instance_data *scsi_id;
- SBP2_DEBUG("sbp2_probe");
+ SBP2_DEBUG_ENTER();
ud = container_of(dev, struct unit_directory, device);
struct scsi_id_instance_data *scsi_id;
struct scsi_device *sdev;
- SBP2_DEBUG("sbp2_remove");
+ SBP2_DEBUG_ENTER();
ud = container_of(dev, struct unit_directory, device);
scsi_id = ud->device.driver_data;
{
struct scsi_id_instance_data *scsi_id = ud->device.driver_data;
- SBP2_DEBUG("sbp2_update");
+ SBP2_DEBUG_ENTER();
if (sbp2_reconnect_device(scsi_id)) {
struct Scsi_Host *scsi_host = NULL;
struct scsi_id_instance_data *scsi_id = NULL;
- SBP2_DEBUG("sbp2_alloc_device");
+ SBP2_DEBUG_ENTER();
scsi_id = kzalloc(sizeof(*scsi_id), GFP_KERNEL);
if (!scsi_id) {
#ifdef CONFIG_IEEE1394_SBP2_PHYS_DMA
/* Handle data movement if physical dma is not
- * enabled/supportedon host controller */
- hpsb_register_addrspace(&sbp2_highlevel, ud->ne->host, &sbp2_physdma_ops,
- 0x0ULL, 0xfffffffcULL);
+ * enabled or not supported on host controller */
+ if (!hpsb_register_addrspace(&sbp2_highlevel, ud->ne->host,
+ &sbp2_physdma_ops,
+ 0x0ULL, 0xfffffffcULL)) {
+ SBP2_ERR("failed to register lower 4GB address range");
+ goto failed_alloc;
+ }
#endif
}
+ /* Prevent unloading of the 1394 host */
+ if (!try_module_get(hi->host->driver->owner)) {
+ SBP2_ERR("failed to get a reference on 1394 host driver");
+ goto failed_alloc;
+ }
+
scsi_id->hi = hi;
list_add_tail(&scsi_id->scsi_list, &hi->scsi_ids);
struct sbp2scsi_host_info *hi = scsi_id->hi;
int error;
- SBP2_DEBUG("sbp2_start_device");
+ SBP2_DEBUG_ENTER();
/* Login FIFO DMA */
scsi_id->login_response =
* allows someone else to login instead. One second makes sense. */
msleep_interruptible(1000);
if (signal_pending(current)) {
- SBP2_WARN("aborting sbp2_start_device due to event");
sbp2_remove_device(scsi_id);
return -EINTR;
}
{
struct sbp2scsi_host_info *hi;
- SBP2_DEBUG("sbp2_remove_device");
+ SBP2_DEBUG_ENTER();
if (!scsi_id)
return;
scsi_id->ud->device.driver_data = NULL;
+ if (hi)
+ module_put(hi->host->driver->owner);
+
SBP2_DEBUG("SBP-2 device removed, SCSI ID = %d", scsi_id->ud->id);
kfree(scsi_id);
int max_logins;
int active_logins;
- SBP2_DEBUG("sbp2_query_logins");
+ SBP2_DEBUG_ENTER();
scsi_id->query_logins_orb->reserved1 = 0x0;
scsi_id->query_logins_orb->reserved2 = 0x0;
scsi_id->query_logins_orb->query_response_lo = scsi_id->query_logins_response_dma;
scsi_id->query_logins_orb->query_response_hi = ORB_SET_NODE_ID(hi->host->node_id);
- SBP2_DEBUG("sbp2_query_logins: query_response_hi/lo initialized");
scsi_id->query_logins_orb->lun_misc = ORB_SET_FUNCTION(SBP2_QUERY_LOGINS_REQUEST);
scsi_id->query_logins_orb->lun_misc |= ORB_SET_NOTIFY(1);
scsi_id->query_logins_orb->lun_misc |= ORB_SET_LUN(scsi_id->sbp2_lun);
- SBP2_DEBUG("sbp2_query_logins: lun_misc initialized");
scsi_id->query_logins_orb->reserved_resp_length =
ORB_SET_QUERY_LOGINS_RESP_LENGTH(sizeof(struct sbp2_query_logins_response));
- SBP2_DEBUG("sbp2_query_logins: reserved_resp_length initialized");
scsi_id->query_logins_orb->status_fifo_hi =
ORB_SET_STATUS_FIFO_HI(scsi_id->status_fifo_addr, hi->host->node_id);
sbp2util_cpu_to_be32_buffer(scsi_id->query_logins_orb, sizeof(struct sbp2_query_logins_orb));
- SBP2_DEBUG("sbp2_query_logins: orb byte-swapped");
-
sbp2util_packet_dump(scsi_id->query_logins_orb, sizeof(struct sbp2_query_logins_orb),
"sbp2 query logins orb", scsi_id->query_logins_orb_dma);
memset(scsi_id->query_logins_response, 0, sizeof(struct sbp2_query_logins_response));
memset(&scsi_id->status_block, 0, sizeof(struct sbp2_status_block));
- SBP2_DEBUG("sbp2_query_logins: query_logins_response/status FIFO memset");
-
data[0] = ORB_SET_NODE_ID(hi->host->node_id);
data[1] = scsi_id->query_logins_orb_dma;
sbp2util_cpu_to_be32_buffer(data, 8);
atomic_set(&scsi_id->sbp2_login_complete, 0);
- SBP2_DEBUG("sbp2_query_logins: prepared to write");
hpsb_node_write(scsi_id->ne, scsi_id->sbp2_management_agent_addr, data, 8);
- SBP2_DEBUG("sbp2_query_logins: written");
if (sbp2util_down_timeout(&scsi_id->sbp2_login_complete, 2*HZ)) {
SBP2_INFO("Error querying logins to SBP-2 device - timed out");
struct sbp2scsi_host_info *hi = scsi_id->hi;
quadlet_t data[2];
- SBP2_DEBUG("sbp2_login_device");
+ SBP2_DEBUG_ENTER();
if (!scsi_id->login_orb) {
- SBP2_DEBUG("sbp2_login_device: login_orb not alloc'd!");
+ SBP2_DEBUG("%s: login_orb not alloc'd!", __FUNCTION__);
return -EIO;
}
/* Set-up login ORB, assume no password */
scsi_id->login_orb->password_hi = 0;
scsi_id->login_orb->password_lo = 0;
- SBP2_DEBUG("sbp2_login_device: password_hi/lo initialized");
scsi_id->login_orb->login_response_lo = scsi_id->login_response_dma;
scsi_id->login_orb->login_response_hi = ORB_SET_NODE_ID(hi->host->node_id);
- SBP2_DEBUG("sbp2_login_device: login_response_hi/lo initialized");
scsi_id->login_orb->lun_misc = ORB_SET_FUNCTION(SBP2_LOGIN_REQUEST);
scsi_id->login_orb->lun_misc |= ORB_SET_RECONNECT(0); /* One second reconnect time */
scsi_id->login_orb->lun_misc |= ORB_SET_EXCLUSIVE(exclusive_login); /* Exclusive access to device */
scsi_id->login_orb->lun_misc |= ORB_SET_NOTIFY(1); /* Notify us of login complete */
scsi_id->login_orb->lun_misc |= ORB_SET_LUN(scsi_id->sbp2_lun);
- SBP2_DEBUG("sbp2_login_device: lun_misc initialized");
scsi_id->login_orb->passwd_resp_lengths =
ORB_SET_LOGIN_RESP_LENGTH(sizeof(struct sbp2_login_response));
- SBP2_DEBUG("sbp2_login_device: passwd_resp_lengths initialized");
scsi_id->login_orb->status_fifo_hi =
ORB_SET_STATUS_FIFO_HI(scsi_id->status_fifo_addr, hi->host->node_id);
scsi_id->login_orb->status_fifo_lo =
ORB_SET_STATUS_FIFO_LO(scsi_id->status_fifo_addr);
- /*
- * Byte swap ORB if necessary
- */
sbp2util_cpu_to_be32_buffer(scsi_id->login_orb, sizeof(struct sbp2_login_orb));
- SBP2_DEBUG("sbp2_login_device: orb byte-swapped");
-
sbp2util_packet_dump(scsi_id->login_orb, sizeof(struct sbp2_login_orb),
"sbp2 login orb", scsi_id->login_orb_dma);
- /*
- * Initialize login response and status fifo
- */
memset(scsi_id->login_response, 0, sizeof(struct sbp2_login_response));
memset(&scsi_id->status_block, 0, sizeof(struct sbp2_status_block));
- SBP2_DEBUG("sbp2_login_device: login_response/status FIFO memset");
-
- /*
- * Ok, let's write to the target's management agent register
- */
data[0] = ORB_SET_NODE_ID(hi->host->node_id);
data[1] = scsi_id->login_orb_dma;
sbp2util_cpu_to_be32_buffer(data, 8);
atomic_set(&scsi_id->sbp2_login_complete, 0);
- SBP2_DEBUG("sbp2_login_device: prepared to write to %08x",
- (unsigned int)scsi_id->sbp2_management_agent_addr);
hpsb_node_write(scsi_id->ne, scsi_id->sbp2_management_agent_addr, data, 8);
- SBP2_DEBUG("sbp2_login_device: written");
/*
* Wait for login status (up to 20 seconds)...
quadlet_t data[2];
int error;
- SBP2_DEBUG("sbp2_logout_device");
+ SBP2_DEBUG_ENTER();
/*
* Set-up logout ORB
quadlet_t data[2];
int error;
- SBP2_DEBUG("sbp2_reconnect_device");
+ SBP2_DEBUG_ENTER();
/*
* Set-up reconnect ORB
{
quadlet_t data;
- SBP2_DEBUG("sbp2_set_busy_timeout");
+ SBP2_DEBUG_ENTER();
- /*
- * Ok, let's write to the target's busy timeout register
- */
data = cpu_to_be32(SBP2_BUSY_TIMEOUT_VALUE);
-
- if (hpsb_node_write(scsi_id->ne, SBP2_BUSY_TIMEOUT_ADDRESS, &data, 4)) {
- SBP2_ERR("sbp2_set_busy_timeout error");
- }
-
+ if (hpsb_node_write(scsi_id->ne, SBP2_BUSY_TIMEOUT_ADDRESS, &data, 4))
+ SBP2_ERR("%s error", __FUNCTION__);
return 0;
}
firmware_revision, workarounds;
int i;
- SBP2_DEBUG("sbp2_parse_unit_directory");
+ SBP2_DEBUG_ENTER();
management_agent_addr = 0x0;
command_set_spec_id = 0x0;
{
struct sbp2scsi_host_info *hi = scsi_id->hi;
- SBP2_DEBUG("sbp2_max_speed_and_size");
+ SBP2_DEBUG_ENTER();
/* Initial setting comes from the hosts speed map */
scsi_id->speed_code =
u64 addr;
int retval;
- SBP2_DEBUG("sbp2_agent_reset");
+ SBP2_DEBUG_ENTER();
- /*
- * Ok, let's write to the target's management agent register
- */
data = ntohl(SBP2_AGENT_RESET_DATA);
addr = scsi_id->sbp2_command_block_agent_addr + SBP2_AGENT_RESET_OFFSET;
unsigned int request_bufflen = SCpnt->request_bufflen;
struct sbp2_command_info *command;
- SBP2_DEBUG("sbp2_send_command");
-#if (CONFIG_IEEE1394_SBP2_DEBUG >= 2) || defined(CONFIG_IEEE1394_SBP2_PACKET_DUMP)
- printk("[scsi command]\n ");
- scsi_print_command(SCpnt);
-#endif
+ SBP2_DEBUG_ENTER();
SBP2_DEBUG("SCSI transfer size = %x", request_bufflen);
SBP2_DEBUG("SCSI s/g elements = %x", (unsigned int)SCpnt->use_sg);
*/
static unsigned int sbp2_status_to_sense_data(unchar *sbp2_status, unchar *sense_data)
{
- SBP2_DEBUG("sbp2_status_to_sense_data");
+ SBP2_DEBUG_ENTER();
/*
* Ok, it's pretty ugly... ;-)
{
u8 *scsi_buf = SCpnt->request_buffer;
- SBP2_DEBUG("sbp2_check_sbp2_response");
+ SBP2_DEBUG_ENTER();
if (SCpnt->cmnd[0] == INQUIRY && (SCpnt->cmnd[1] & 3) == 0) {
/*
struct sbp2_command_info *command;
unsigned long flags;
- SBP2_DEBUG("sbp2_handle_status_write");
+ SBP2_DEBUG_ENTER();
sbp2util_packet_dump(data, length, "sbp2 status write by device", (u32)addr);
struct sbp2scsi_host_info *hi;
int result = DID_NO_CONNECT << 16;
- SBP2_DEBUG("sbp2scsi_queuecommand");
+ SBP2_DEBUG_ENTER();
+#if (CONFIG_IEEE1394_SBP2_DEBUG >= 2) || defined(CONFIG_IEEE1394_SBP2_PACKET_DUMP)
+ scsi_print_command(SCpnt);
+#endif
if (!sbp2util_node_is_available(scsi_id))
goto done;
struct sbp2_command_info *command;
unsigned long flags;
- SBP2_DEBUG("sbp2scsi_complete_all_commands");
+ SBP2_DEBUG_ENTER();
spin_lock_irqsave(&scsi_id->sbp2_command_orb_lock, flags);
while (!list_empty(&scsi_id->sbp2_command_orb_inuse)) {
u32 scsi_status, struct scsi_cmnd *SCpnt,
void (*done)(struct scsi_cmnd *))
{
- SBP2_DEBUG("sbp2scsi_complete_command");
+ SBP2_DEBUG_ENTER();
/*
* Sanity
*/
switch (scsi_status) {
case SBP2_SCSI_STATUS_GOOD:
- SCpnt->result = DID_OK;
+ SCpnt->result = DID_OK << 16;
break;
case SBP2_SCSI_STATUS_BUSY:
case SBP2_SCSI_STATUS_CHECK_CONDITION:
SBP2_DEBUG("SBP2_SCSI_STATUS_CHECK_CONDITION");
- SCpnt->result = CHECK_CONDITION << 1;
-
- /*
- * Debug stuff
- */
+ SCpnt->result = CHECK_CONDITION << 1 | DID_OK << 16;
#if CONFIG_IEEE1394_SBP2_DEBUG >= 1
scsi_print_command(SCpnt);
- scsi_print_sense("bh", SCpnt);
+ scsi_print_sense(SBP2_DEVICE_NAME, SCpnt);
#endif
-
break;
case SBP2_SCSI_STATUS_SELECTION_TIMEOUT:
/*
* Take care of any sbp2 response data mucking here (RBC stuff, etc.)
*/
- if (SCpnt->result == DID_OK) {
+ if (SCpnt->result == DID_OK << 16) {
sbp2_check_sbp2_response(scsi_id, SCpnt);
}
* If a unit attention occurs, return busy status so it gets
* retried... it could have happened because of a 1394 bus reset
* or hot-plug...
+ * XXX DID_BUS_BUSY is actually a bad idea because it will defy
+ * the scsi layer's retry logic.
*/
#if 0
if ((scsi_status == SBP2_SCSI_STATUS_CHECK_CONDITION) &&
{
int ret;
- SBP2_DEBUG("sbp2_module_init");
+ SBP2_DEBUG_ENTER();
/* Module load debug option to force one command at a time (serializing I/O) */
if (serialize_io) {
static void __exit sbp2_module_exit(void)
{
- SBP2_DEBUG("sbp2_module_exit");
+ SBP2_DEBUG_ENTER();
hpsb_unregister_protocol(&sbp2_driver);
#include <linux/poll.h>
#include <linux/smp_lock.h>
#include <linux/delay.h>
-#include <linux/devfs_fs_kernel.h>
#include <linux/bitops.h>
#include <linux/types.h>
#include <linux/vmalloc.h>
class_device_create(hpsb_protocol_class, NULL, MKDEV(
IEEE1394_MAJOR, minor),
NULL, "%s-%d", VIDEO1394_DRIVER_NAME, ohci->host->id);
- devfs_mk_cdev(MKDEV(IEEE1394_MAJOR, minor),
- S_IFCHR | S_IRUSR | S_IWUSR,
- "%s/%d", VIDEO1394_DRIVER_NAME, ohci->host->id);
}
{
struct ti_ohci *ohci = hpsb_get_hostinfo(&video1394_highlevel, host);
- if (ohci) {
+ if (ohci)
class_device_destroy(hpsb_protocol_class, MKDEV(IEEE1394_MAJOR,
IEEE1394_MINOR_BLOCK_VIDEO1394 * 16 + ohci->host->id));
- devfs_remove("%s/%d", VIDEO1394_DRIVER_NAME, ohci->host->id);
- }
-
return;
}
static void __exit video1394_exit_module (void)
{
hpsb_unregister_protocol(&video1394_driver);
-
hpsb_unregister_highlevel(&video1394_highlevel);
-
- devfs_remove(VIDEO1394_DRIVER_NAME);
cdev_del(&video1394_cdev);
-
PRINT_G(KERN_INFO, "Removed " VIDEO1394_DRIVER_NAME " module");
}
return ret;
}
- devfs_mk_dir(VIDEO1394_DRIVER_NAME);
-
hpsb_register_highlevel(&video1394_highlevel);
ret = hpsb_register_protocol(&video1394_driver);
if (ret) {
PRINT_G(KERN_ERR, "video1394: failed to register protocol");
hpsb_unregister_highlevel(&video1394_highlevel);
- devfs_remove(VIDEO1394_DRIVER_NAME);
cdev_del(&video1394_cdev);
return ret;
}
*/
ret = dio->page_errors;
if (ret == 0) {
- map_bh->b_state = 0;
- map_bh->b_size = 0;
BUG_ON(dio->block_in_file >= dio->final_block_in_request);
fs_startblk = dio->block_in_file >> dio->blkfactor;
dio_count = dio->final_block_in_request - dio->block_in_file;
if (dio_count & blkmask)
fs_count++;
+ map_bh->b_state = 0;
+ map_bh->b_size = fs_count << dio->inode->i_blkbits;
+
create = dio->rw == WRITE;
if (dio->lock_type == DIO_LOCKING) {
if (dio->block_in_file < (i_size_read(dio->inode) >>
} else if (dio->lock_type == DIO_NO_LOCKING) {
create = 0;
}
+
/*
* For writes inside i_size we forbid block creations: only
* overwrites are permitted. We fall back to buffered writes
* at a higher level for inside-i_size block-instantiating
* writes.
*/
- map_bh->b_size = fs_count << dio->blkbits;
ret = (*dio->get_block)(dio->inode, fs_startblk,
map_bh, create);
}
kmem_cache_free(sighand_cachep, newsighand);
return -EAGAIN;
}
+
+ /*
+ * child_reaper ignores SIGKILL, change it now.
+ * Reparenting needs write_lock on tasklist_lock,
+ * so it is safe to do it under read_lock.
+ */
+ if (unlikely(current->group_leader == child_reaper))
+ child_reaper = current;
+
zap_other_threads(current);
read_unlock(&tasklist_lock);
remove_parent(current);
remove_parent(leader);
- switch_exec_pids(leader, current);
+
+ /* Become a process group leader with the old leader's pid.
+ * Note: The old leader also uses thispid until release_task
+ * is called. Odd but simple and correct.
+ */
+ detach_pid(current, PIDTYPE_PID);
+ current->pid = leader->pid;
+ attach_pid(current, PIDTYPE_PID, current->pid);
+ attach_pid(current, PIDTYPE_PGID, current->signal->pgrp);
+ attach_pid(current, PIDTYPE_SID, current->signal->session);
+ list_add_tail(¤t->tasks, &init_task.tasks);
current->parent = current->real_parent = leader->real_parent;
leader->parent = leader->real_parent = child_reaper;
current->group_leader = current;
leader->group_leader = leader;
- add_parent(current, current->parent);
- add_parent(leader, leader->parent);
+ add_parent(current);
+ add_parent(leader);
if (ptrace) {
current->ptrace = ptrace;
__ptrace_link(current, parent);
}
- list_del(¤t->tasks);
- list_add_tail(¤t->tasks, &init_task.tasks);
current->exit_signal = SIGCHLD;
BUG_ON(leader->exit_state != EXIT_ZOMBIE);
/*
* Move our state over to newsighand and switch it in.
*/
- spin_lock_init(&newsighand->siglock);
atomic_set(&newsighand->count, 1);
memcpy(newsighand->action, oldsighand->action,
sizeof(newsighand->action));
write_unlock_irq(&tasklist_lock);
if (atomic_dec_and_test(&oldsighand->count))
- sighand_free(oldsighand);
+ kmem_cache_free(sighand_cachep, oldsighand);
}
BUG_ON(!thread_group_leader(current));
* Debug-only routine, without some platform-specific asm code, we can
* now only answer requests regarding whether we hold the lock for write
* (reader state is outside our visibility, we only track writer state).
- * Note: means !ismrlocked would give false positivies, so don't do that.
+ * Note: means !ismrlocked would give false positives, so don't do that.
*/
static inline int ismrlocked(mrlock_t *mrp, int type)
{
* assumes that all buffers on the page are started at the same time.
*
* The fix is two passes across the ioend list - one to start writeback on the
- * bufferheads, and then the second one submit them for I/O.
+ * buffer_heads, and then submit them for I/O on the second pass.
*/
STATIC void
xfs_submit_ioend(
/*
* page_dirty is initially a count of buffers on the page before
- * EOF and is decrememted as we move each into a cleanable state.
+ * EOF and is decremented as we move each into a cleanable state.
*
* Derivation:
*
* page if possible.
* The bh->b_state's cannot know if any of the blocks or which block for
* that matter are dirty due to mmap writes, and therefore bh uptodate is
- * only vaild if the page itself isn't completely uptodate. Some layers
+ * only valid if the page itself isn't completely uptodate. Some layers
* may clear the page dirty flag prior to calling write page, under the
* assumption the entire page will be written out; by not writing out the
* whole page the page can be reused before all valid dirty data is
/*
* page_dirty is initially a count of buffers on the page before
- * EOF and is decrememted as we move each into a cleanable state.
+ * EOF and is decremented as we move each into a cleanable state.
*
* Derivation:
*
}
STATIC int
-__xfs_get_block(
+__xfs_get_blocks(
struct inode *inode,
sector_t iblock,
- unsigned long blocks,
struct buffer_head *bh_result,
int create,
int direct,
xfs_iomap_t iomap;
xfs_off_t offset;
ssize_t size;
- int retpbbm = 1;
+ int niomap = 1;
int error;
offset = (xfs_off_t)iblock << inode->i_blkbits;
- if (blocks)
- size = (ssize_t) min_t(xfs_off_t, LONG_MAX,
- (xfs_off_t)blocks << inode->i_blkbits);
- else
- size = 1 << inode->i_blkbits;
-
+ ASSERT(bh_result->b_size >= (1 << inode->i_blkbits));
+ size = bh_result->b_size;
VOP_BMAP(vp, offset, size,
- create ? flags : BMAPI_READ, &iomap, &retpbbm, error);
+ create ? flags : BMAPI_READ, &iomap, &niomap, error);
if (error)
return -error;
-
- if (retpbbm == 0)
+ if (niomap == 0)
return 0;
if (iomap.iomap_bn != IOMAP_DADDR_NULL) {
}
}
- /* If this is a realtime file, data might be on a new device */
+ /*
+ * If this is a realtime file, data may be on a different device.
+ * to that pointed to from the buffer_head b_bdev currently.
+ */
bh_result->b_bdev = iomap.iomap_target->bt_bdev;
- /* If we previously allocated a block out beyond eof and
- * we are now coming back to use it then we will need to
- * flag it as new even if it has a disk address.
+ /*
+ * If we previously allocated a block out beyond eof and we are
+ * now coming back to use it then we will need to flag it as new
+ * even if it has a disk address.
*/
if (create &&
((!buffer_mapped(bh_result) && !buffer_uptodate(bh_result)) ||
}
}
- if (blocks) {
+ if (direct || size > (1 << inode->i_blkbits)) {
ASSERT(iomap.iomap_bsize - iomap.iomap_delta > 0);
offset = min_t(xfs_off_t,
- iomap.iomap_bsize - iomap.iomap_delta,
- (xfs_off_t)blocks << inode->i_blkbits);
- bh_result->b_size = (u32) min_t(xfs_off_t, UINT_MAX, offset);
+ iomap.iomap_bsize - iomap.iomap_delta, size);
+ bh_result->b_size = (ssize_t)min_t(xfs_off_t, LONG_MAX, offset);
}
return 0;
}
int
-xfs_get_block(
+xfs_get_blocks(
struct inode *inode,
sector_t iblock,
struct buffer_head *bh_result,
int create)
{
- return __xfs_get_block(inode, iblock,
- bh_result->b_size >> inode->i_blkbits,
+ return __xfs_get_blocks(inode, iblock,
bh_result, create, 0, BMAPI_WRITE);
}
struct buffer_head *bh_result,
int create)
{
- return __xfs_get_block(inode, iblock,
- bh_result->b_size >> inode->i_blkbits,
+ return __xfs_get_blocks(inode, iblock,
bh_result, create, 1, BMAPI_WRITE|BMAPI_DIRECT);
}
/*
* Non-NULL private data means we need to issue a transaction to
* convert a range from unwritten to written extents. This needs
- * to happen from process contect but aio+dio I/O completion
+ * to happen from process context but aio+dio I/O completion
* happens from irq context so we need to defer it to a workqueue.
- * This is not nessecary for synchronous direct I/O, but we do
+ * This is not necessary for synchronous direct I/O, but we do
* it anyway to keep the code uniform and simpler.
*
* The core direct I/O code might be changed to always call the
}
/*
- * blockdev_direct_IO can return an error even afer the I/O
+ * blockdev_direct_IO can return an error even after the I/O
* completion handler was called. Thus we need to protect
* against double-freeing.
*/
unsigned int from,
unsigned int to)
{
- return block_prepare_write(page, from, to, xfs_get_block);
+ return block_prepare_write(page, from, to, xfs_get_blocks);
}
STATIC sector_t
VOP_RWLOCK(vp, VRWLOCK_READ);
VOP_FLUSH_PAGES(vp, (xfs_off_t)0, -1, 0, FI_REMAPF, error);
VOP_RWUNLOCK(vp, VRWLOCK_READ);
- return generic_block_bmap(mapping, block, xfs_get_block);
+ return generic_block_bmap(mapping, block, xfs_get_blocks);
}
STATIC int
struct file *unused,
struct page *page)
{
- return mpage_readpage(page, xfs_get_block);
+ return mpage_readpage(page, xfs_get_blocks);
}
STATIC int
struct list_head *pages,
unsigned nr_pages)
{
- return mpage_readpages(mapping, pages, nr_pages, xfs_get_block);
+ return mpage_readpages(mapping, pages, nr_pages, xfs_get_blocks);
}
STATIC void
} xfs_ioend_t;
extern struct address_space_operations xfs_address_space_operations;
-extern int xfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
+extern int xfs_get_blocks(struct inode *, sector_t, struct buffer_head *, int);
#endif /* __XFS_IOPS_H__ */
* Note, the NFS filehandle also includes an fsid portion which
* may have an inode number in it. That number is hardcoded to
* 32bits and there is no way for XFS to intercept it. In
- * practice this means when exporting an XFS filesytem with 64bit
+ * practice this means when exporting an XFS filesystem with 64bit
* inodes you should either export the mountpoint (rather than
* a subdirectory) or use the "fsid" export option.
*/
copy_in_user(&p->l_pid, &p32->l_pid, sizeof(u32)) ||
copy_in_user(&p->l_pad, &p32->l_pad, 4*sizeof(u32)))
return -EFAULT;
-
+
return (unsigned long)p;
}
#endif
STATIC long
-xfs_compat_ioctl(int mode, struct file *f, unsigned cmd, unsigned long arg)
+xfs_compat_ioctl(
+ int mode,
+ struct file *file,
+ unsigned cmd,
+ unsigned long arg)
{
+ struct inode *inode = file->f_dentry->d_inode;
+ vnode_t *vp = vn_from_inode(inode);
int error;
- struct inode *inode = f->f_dentry->d_inode;
- vnode_t *vp = vn_to_inode(inode);
switch (cmd) {
case XFS_IOC_DIOINFO:
return -ENOIOCTLCMD;
}
- VOP_IOCTL(vp, inode, f, mode, cmd, (void __user *)arg, error);
+ VOP_IOCTL(vp, inode, file, mode, cmd, (void __user *)arg, error);
VMODIFY(vp);
return error;
long
xfs_file_compat_ioctl(
- struct file *f,
+ struct file *file,
unsigned cmd,
unsigned long arg)
{
- return xfs_compat_ioctl(0, f, cmd, arg);
+ return xfs_compat_ioctl(0, file, cmd, arg);
}
long
xfs_file_compat_invis_ioctl(
- struct file *f,
+ struct file *file,
unsigned cmd,
unsigned long arg)
{
- return xfs_compat_ioctl(IO_INVIS, f, cmd, arg);
+ return xfs_compat_ioctl(IO_INVIS, file, cmd, arg);
}
xfs_vn_truncate(
struct inode *inode)
{
- block_truncate_page(inode->i_mapping, inode->i_size, xfs_get_block);
+ block_truncate_page(inode->i_mapping, inode->i_size, xfs_get_blocks);
}
STATIC int
eventsent = 1;
/*
- * The iolock was dropped and reaquired in XFS_SEND_DATA
+ * The iolock was dropped and reacquired in XFS_SEND_DATA
* so we have to recheck the size when appending.
* We will only "goto start;" once, since having sent the
* event prevents another call to XFS_SEND_DATA, which is
#define SYNC_FSDATA 0x0020 /* flush fs data (e.g. superblocks) */
#define SYNC_REFCACHE 0x0040 /* prune some of the nfs ref cache */
#define SYNC_REMOUNT 0x0080 /* remount readonly, no dummy LRs */
-#define SYNC_QUIESCE 0x0100 /* quiesce fileystem for a snapshot */
+#define SYNC_QUIESCE 0x0100 /* quiesce filesystem for a snapshot */
typedef int (*vfs_mount_t)(bhv_desc_t *,
struct xfs_mount_args *, struct cred *);
* as possible.
*
* We must not be holding the AIL_LOCK at this point. Calling incore() to
- * search the buffercache can be a time consuming thing, and AIL_LOCK is a
+ * search the buffer cache can be a time consuming thing, and AIL_LOCK is a
* spinlock.
*/
STATIC void
/*
* This is called at mount time from xfs_mountfs to initialize the quotainfo
- * structure and start the global quotamanager (xfs_Gqm) if it hasn't done
+ * structure and start the global quota manager (xfs_Gqm) if it hasn't done
* so already. Note that the superblock has not been read in yet.
*/
void
* Given a udquot and gdquot, attach a ptr to the group dquot in the
* udquot as a hint for future lookups. The idea sounds simple, but the
* execution isn't, because the udquot might have a group dquot attached
- * already and getting rid of that gets us into lock ordering contraints.
+ * already and getting rid of that gets us into lock ordering constraints.
* The process is complicated more by the fact that the dquots may or may not
* be locked on entry.
*/
}
/*
* If we can't grab the flush lock then if the caller
- * really wanted us to give this our best shot,
+ * really wanted us to give this our best shot, so
* see if we can give a push to the buffer before we wait
* on the flush lock. At this point, we know that
- * eventhough the dquot is being flushed,
+ * even though the dquot is being flushed,
* it has (new) dirty data.
*/
xfs_qm_dqflock_pushbuf_wait(dqp);
/*
* Do a sanity check, and if needed, repair the dqblk. Don't
* output any warnings because it's perfectly possible to
- * find unitialized dquot blks. See comment in xfs_qm_dqcheck.
+ * find uninitialised dquot blks. See comment in xfs_qm_dqcheck.
*/
(void) xfs_qm_dqcheck(ddq, id+j, type, XFS_QMOPT_DQREPAIR,
"xfs_quotacheck");
error = 0;
/*
- * This looks racey, but we can't keep an inode lock across a
+ * This looks racy, but we can't keep an inode lock across a
* trans_reserve. But, this gets called during quotacheck, and that
* happens only at mount time which is single threaded.
*/
* we have to start from the beginning anyway.
* Once we're done, we'll log all the dquot bufs.
*
- * The *QUOTA_ON checks below may look pretty racey, but quotachecks
+ * The *QUOTA_ON checks below may look pretty racy, but quotachecks
* and quotaoffs don't race. (Quotachecks happen at mount time only).
*/
if (XFS_IS_UQUOTA_ON(mp)) {
/*
* Internally, we don't reset all the timers when quota enforcement
- * gets turned off. No need to confuse the userlevel code,
+ * gets turned off. No need to confuse the user level code,
* so return zeroes in that case.
*/
if (! XFS_IS_QUOTA_ENFORCED(mp)) {
}
/*
- * Didnt change anything critical, so, no need to log
+ * Didn't change anything critical, so, no need to log
*/
return (0);
}
* The access control process to determine the access permission:
* if uid == file owner id, use the file owner bits.
* if gid == file owner group id, use the file group bits.
- * scan ACL for a maching user or group, and use matched entry
+ * scan ACL for a matching user or group, and use matched entry
* permission. Use total permissions of all matching group entries,
* until all acl entries are exhausted. The final permission produced
* by matching acl entry or entries needs to be & with group permission.
{
char pagf_init; /* this agf's entry is initialized */
char pagi_init; /* this agi's entry is initialized */
- char pagf_metadata; /* the agf is prefered to be metadata */
+ char pagf_metadata; /* the agf is preferred to be metadata */
char pagi_inodeok; /* The agi is ok for inodes */
__uint8_t pagf_levels[XFS_BTNUM_AGF];
/* # of levels in bno & cnt btree */
xfs_alloc_trace_busy(
char *name, /* function tag string */
char *str, /* additional string */
- xfs_mount_t *mp, /* file system mount poing */
+ xfs_mount_t *mp, /* file system mount point */
xfs_agnumber_t agno, /* allocation group number */
xfs_agblock_t agbno, /* a.g. relative block number */
xfs_extlen_t len, /* length of extent */
} else
agbp = NULL;
- /* If this is a metadata prefered pag and we are user data
+ /* If this is a metadata preferred pag and we are user data
* then try somewhere else if we are not being asked to
* try harder at this point
*/
/*
* AG Busy list management
* The busy list contains block ranges that have been freed but whose
- * transacations have not yet hit disk. If any block listed in a busy
+ * transactions have not yet hit disk. If any block listed in a busy
* list is reused, the transaction that freed it must be forced to disk
* before continuing to use the block.
*
xfs_alloctype_t otype; /* original allocation type */
char wasdel; /* set if allocation was prev delayed */
char wasfromfl; /* set if allocation is from freelist */
- char isfl; /* set if is freelist blocks - !actg */
+ char isfl; /* set if is freelist blocks - !acctg */
char userdata; /* set if this is user data */
} xfs_alloc_arg_t;
xfs_trans_ihold(args.trans, dp);
/*
- * If the attribute list is non-existant or a shortform list,
+ * If the attribute list is non-existent or a shortform list,
* upgrade it to a single-leaf-block attribute list.
*/
if ((dp->i_d.di_aformat == XFS_DINODE_FMT_LOCAL) ||
* Fill in the disk block numbers in the state structure for the buffers
* that are attached to the state structure.
* This is done so that we can quickly reattach ourselves to those buffers
- * after some set of transaction commit's has released these buffers.
+ * after some set of transaction commits have released these buffers.
*/
STATIC int
xfs_attr_fillstate(xfs_da_state_t *state)
/*
* Reattach the buffers to the state structure based on the disk block
* numbers stored in the state structure.
- * This is done after some set of transaction commit's has released those
+ * This is done after some set of transaction commits have released those
* buffers from our grip.
*/
STATIC int
/*
* Copy out entries of shortform attribute lists for attr_list().
- * Shortform atrtribute lists are not stored in hashval sorted order.
+ * Shortform attribute lists are not stored in hashval sorted order.
* If the output buffer is not large enough to hold them all, then we
* we have to calculate each entries' hashvalue and sort them before
* we can begin returning them to the user.
/*
* Check for the degenerate case of the block being empty.
* If the block is empty, we'll simply delete it, no need to
- * coalesce it with a sibling block. We choose (aribtrarily)
+ * coalesce it with a sibling block. We choose (arbitrarily)
* to merge with the forward block unless it is NULL.
*/
if (count == 0) {
* The behavior chain is ordered based on the 'position' number which
* lives in the first field of the ops vector (higher numbers first).
*
- * Attemps to insert duplicate ops result in an EINVAL return code.
+ * Attempts to insert duplicate ops result in an EINVAL return code.
* Otherwise, return 0 to indicate success.
*/
int
/*
* Remove a behavior descriptor from a position in a behavior chain;
- * the postition is guaranteed not to be the first position.
+ * the position is guaranteed not to be the first position.
* Should only be called by the bhv_remove() macro.
*/
void
* behaviors is synchronized with operations-in-progress (oip's) so that
* the oip's always see a consistent view of the chain.
*
- * The term "interpostion" is used to refer to the act of inserting
+ * The term "interposition" is used to refer to the act of inserting
* a behavior such that it interposes on (i.e., is inserted in front
* of) a particular other behavior. A key example of this is when a
* system implementing distributed single system image wishes to
*
* Behavior synchronization is logic which is necessary under certain
* circumstances that there is no conflict between ongoing operations
- * traversing the behavior chain and those dunamically modifying the
+ * traversing the behavior chain and those dynamically modifying the
* behavior chain. Because behavior synchronization adds extra overhead
* to virtual operation invocation, we want to restrict, as much as
* we can, the requirement for this extra code, to those situations
return error;
}
-xfs_bmbt_rec_t * /* pointer to found extent entry */
-xfs_bmap_do_search_extents(
- xfs_bmbt_rec_t *base, /* base of extent list */
- xfs_extnum_t lastx, /* last extent index used */
- xfs_extnum_t nextents, /* number of file extents */
- xfs_fileoff_t bno, /* block number searched for */
- int *eofp, /* out: end of file found */
- xfs_extnum_t *lastxp, /* out: last extent index */
- xfs_bmbt_irec_t *gotp, /* out: extent entry found */
- xfs_bmbt_irec_t *prevp) /* out: previous extent entry found */
-{
- xfs_bmbt_rec_t *ep; /* extent list entry pointer */
- xfs_bmbt_irec_t got; /* extent list entry, decoded */
- int high; /* high index of binary search */
- int low; /* low index of binary search */
-
- /*
- * Initialize the extent entry structure to catch access to
- * uninitialized br_startblock field.
- */
- got.br_startoff = 0xffa5a5a5a5a5a5a5LL;
- got.br_blockcount = 0xa55a5a5a5a5a5a5aLL;
- got.br_state = XFS_EXT_INVALID;
-
-#if XFS_BIG_BLKNOS
- got.br_startblock = 0xffffa5a5a5a5a5a5LL;
-#else
- got.br_startblock = 0xffffa5a5;
-#endif
-
- if (lastx != NULLEXTNUM && lastx < nextents)
- ep = base + lastx;
- else
- ep = NULL;
- prevp->br_startoff = NULLFILEOFF;
- if (ep && bno >= (got.br_startoff = xfs_bmbt_get_startoff(ep)) &&
- bno < got.br_startoff +
- (got.br_blockcount = xfs_bmbt_get_blockcount(ep)))
- *eofp = 0;
- else if (ep && lastx < nextents - 1 &&
- bno >= (got.br_startoff = xfs_bmbt_get_startoff(ep + 1)) &&
- bno < got.br_startoff +
- (got.br_blockcount = xfs_bmbt_get_blockcount(ep + 1))) {
- lastx++;
- ep++;
- *eofp = 0;
- } else if (nextents == 0)
- *eofp = 1;
- else if (bno == 0 &&
- (got.br_startoff = xfs_bmbt_get_startoff(base)) == 0) {
- ep = base;
- lastx = 0;
- got.br_blockcount = xfs_bmbt_get_blockcount(ep);
- *eofp = 0;
- } else {
- low = 0;
- high = nextents - 1;
- /* binary search the extents array */
- while (low <= high) {
- XFS_STATS_INC(xs_cmp_exlist);
- lastx = (low + high) >> 1;
- ep = base + lastx;
- got.br_startoff = xfs_bmbt_get_startoff(ep);
- got.br_blockcount = xfs_bmbt_get_blockcount(ep);
- if (bno < got.br_startoff)
- high = lastx - 1;
- else if (bno >= got.br_startoff + got.br_blockcount)
- low = lastx + 1;
- else {
- got.br_startblock = xfs_bmbt_get_startblock(ep);
- got.br_state = xfs_bmbt_get_state(ep);
- *eofp = 0;
- *lastxp = lastx;
- *gotp = got;
- return ep;
- }
- }
- if (bno >= got.br_startoff + got.br_blockcount) {
- lastx++;
- if (lastx == nextents) {
- *eofp = 1;
- got.br_startblock = xfs_bmbt_get_startblock(ep);
- got.br_state = xfs_bmbt_get_state(ep);
- *prevp = got;
- ep = NULL;
- } else {
- *eofp = 0;
- xfs_bmbt_get_all(ep, prevp);
- ep++;
- got.br_startoff = xfs_bmbt_get_startoff(ep);
- got.br_blockcount = xfs_bmbt_get_blockcount(ep);
- }
- } else {
- *eofp = 0;
- if (ep > base)
- xfs_bmbt_get_all(ep - 1, prevp);
- }
- }
- if (ep) {
- got.br_startblock = xfs_bmbt_get_startblock(ep);
- got.br_state = xfs_bmbt_get_state(ep);
- }
- *lastxp = lastx;
- *gotp = got;
- return ep;
-}
-
/*
* Search the extent records for the entry containing block bno.
* If bno lies in a hole, point to the next entry. If bno lies
xfs_bmap_search_multi_extents(struct xfs_ifork *, xfs_fileoff_t, int *,
xfs_extnum_t *, xfs_bmbt_irec_t *, xfs_bmbt_irec_t *);
-/*
- * Search an extent list for the extent which includes block
- * bno.
- */
-xfs_bmbt_rec_t *xfs_bmap_do_search_extents(xfs_bmbt_rec_t *,
- xfs_extnum_t, xfs_extnum_t, xfs_fileoff_t, int *,
- xfs_extnum_t *, xfs_bmbt_irec_t *, xfs_bmbt_irec_t *);
-
#endif /* __KERNEL__ */
#endif /* __XFS_BMAP_H__ */
}
/*
- * This function is called to verify that our caller's have logged
+ * This function is called to verify that our callers have logged
* all the bytes that they changed.
*
* It does this by comparing the original copy of the buffer stored in
* the buf log item's bli_orig array to the current copy of the buffer
- * and ensuring that all bytes which miscompare are set in the bli_logged
+ * and ensuring that all bytes which mismatch are set in the bli_logged
* array of the buf log item.
*/
STATIC void
/*
* For Linux, we take the bitfields directly from capability.h
* and no longer attempt to keep this attribute ondisk compatible
- * with IRIX. Since this attribute is only set on exectuables,
+ * with IRIX. Since this attribute is only set on executables,
* it just doesn't make much sense to try. We do use a different
* named attribute though, to avoid confusion.
*/
/*
* Check for the degenerate case of the block being empty.
* If the block is empty, we'll simply delete it, no need to
- * coalesce it with a sibling block. We choose (aribtrarily)
+ * coalesce it with a sibling block. We choose (arbitrarily)
* to merge with the forward block unless it is NULL.
*/
if (count == 0) {
/*
* Reached the end of the block.
- * Set the offset to a nonexistent block 1 and return.
+ * Set the offset to a non-existent block 1 and return.
*/
*eofp = 1;
ASSERT(be32_to_cpu(leaf->ents[highstale].address) ==
XFS_DIR2_NULL_DATAPTR);
/*
- * Copy entries down to copver the stale entry
+ * Copy entries down to cover the stale entry
* and make room for the new entry.
*/
if (highstale - index > 0)
state->inleaf = 1;
blk2->index = 0;
cmn_err(CE_ALERT,
- "xfs_dir2_leafn_rebalance: picked the wrong leaf? reverting orignal leaf: "
+ "xfs_dir2_leafn_rebalance: picked the wrong leaf? reverting original leaf: "
"blk1->index %d\n",
blk1->index);
}
/*
* Check for the degenerate case of the block being empty.
* If the block is empty, we'll simply delete it, no need to
- * coalesce it with a sibling block. We choose (aribtrarily)
+ * coalesce it with a sibling block. We choose (arbitrarily)
* to merge with the forward block unless it is NULL.
*/
if (count == 0) {
*
* xfs_reserve_blocks is called to set m_resblks
* in the in-core mount table. The number of unused reserved blocks
- * is kept in m_resbls_avail.
+ * is kept in m_resblks_avail.
*
* Reserve the requested number of blocks if available. Otherwise return
* as many as possible to satisfy the request. The actual number
int ninodes; /* num inodes per buf */
xfs_agino_t thisino; /* current inode number, for loop */
int version; /* inode version number to use */
- int isaligned; /* inode allocation at stripe unit */
+ int isaligned = 0; /* inode allocation at stripe unit */
/* boundary */
args.tp = tp;
return XFS_ERROR(ENOSPC);
args.minlen = args.maxlen = XFS_IALLOC_BLOCKS(args.mp);
/*
- * Set the alignment for the allocation.
- * If stripe alignment is turned on then align at stripe unit
- * boundary.
- * If the cluster size is smaller than a filesystem block
- * then we're doing I/O for inodes in filesystem block size pieces,
- * so don't need alignment anyway.
- */
- isaligned = 0;
- if (args.mp->m_sinoalign) {
- ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
- args.alignment = args.mp->m_dalign;
- isaligned = 1;
- } else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) &&
- args.mp->m_sb.sb_inoalignmt >=
- XFS_B_TO_FSBT(args.mp, XFS_INODE_CLUSTER_SIZE(args.mp)))
- args.alignment = args.mp->m_sb.sb_inoalignmt;
- else
- args.alignment = 1;
+ * First try to allocate inodes contiguous with the last-allocated
+ * chunk of inodes. If the filesystem is striped, this will fill
+ * an entire stripe unit with inodes.
+ */
agi = XFS_BUF_TO_AGI(agbp);
- /*
- * Need to figure out where to allocate the inode blocks.
- * Ideally they should be spaced out through the a.g.
- * For now, just allocate blocks up front.
- */
- args.agbno = be32_to_cpu(agi->agi_root);
- args.fsbno = XFS_AGB_TO_FSB(args.mp, be32_to_cpu(agi->agi_seqno),
- args.agbno);
- /*
- * Allocate a fixed-size extent of inodes.
- */
- args.type = XFS_ALLOCTYPE_NEAR_BNO;
- args.mod = args.total = args.wasdel = args.isfl = args.userdata =
- args.minalignslop = 0;
- args.prod = 1;
- /*
- * Allow space for the inode btree to split.
- */
- args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
- if ((error = xfs_alloc_vextent(&args)))
- return error;
+ newino = be32_to_cpu(agi->agi_newino);
+ if(likely(newino != NULLAGINO)) {
+ args.agbno = XFS_AGINO_TO_AGBNO(args.mp, newino) +
+ XFS_IALLOC_BLOCKS(args.mp);
+ args.fsbno = XFS_AGB_TO_FSB(args.mp,
+ be32_to_cpu(agi->agi_seqno), args.agbno);
+ args.type = XFS_ALLOCTYPE_THIS_BNO;
+ args.mod = args.total = args.wasdel = args.isfl =
+ args.userdata = args.minalignslop = 0;
+ args.prod = 1;
+ args.alignment = 1;
+ /*
+ * Allow space for the inode btree to split.
+ */
+ args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
+ if ((error = xfs_alloc_vextent(&args)))
+ return error;
+ } else
+ args.fsbno = NULLFSBLOCK;
+ if (unlikely(args.fsbno == NULLFSBLOCK)) {
+ /*
+ * Set the alignment for the allocation.
+ * If stripe alignment is turned on then align at stripe unit
+ * boundary.
+ * If the cluster size is smaller than a filesystem block
+ * then we're doing I/O for inodes in filesystem block size
+ * pieces, so don't need alignment anyway.
+ */
+ isaligned = 0;
+ if (args.mp->m_sinoalign) {
+ ASSERT(!(args.mp->m_flags & XFS_MOUNT_NOALIGN));
+ args.alignment = args.mp->m_dalign;
+ isaligned = 1;
+ } else if (XFS_SB_VERSION_HASALIGN(&args.mp->m_sb) &&
+ args.mp->m_sb.sb_inoalignmt >=
+ XFS_B_TO_FSBT(args.mp,
+ XFS_INODE_CLUSTER_SIZE(args.mp)))
+ args.alignment = args.mp->m_sb.sb_inoalignmt;
+ else
+ args.alignment = 1;
+ /*
+ * Need to figure out where to allocate the inode blocks.
+ * Ideally they should be spaced out through the a.g.
+ * For now, just allocate blocks up front.
+ */
+ args.agbno = be32_to_cpu(agi->agi_root);
+ args.fsbno = XFS_AGB_TO_FSB(args.mp,
+ be32_to_cpu(agi->agi_seqno), args.agbno);
+ /*
+ * Allocate a fixed-size extent of inodes.
+ */
+ args.type = XFS_ALLOCTYPE_NEAR_BNO;
+ args.mod = args.total = args.wasdel = args.isfl =
+ args.userdata = args.minalignslop = 0;
+ args.prod = 1;
+ /*
+ * Allow space for the inode btree to split.
+ */
+ args.minleft = XFS_IN_MAXLEVELS(args.mp) - 1;
+ if ((error = xfs_alloc_vextent(&args)))
+ return error;
+ }
+
/*
* If stripe alignment is turned on, then try again with cluster
* alignment.
rec.ir_freecount++;
/*
- * When an inode cluster is free, it becomes elgible for removal
+ * When an inode cluster is free, it becomes eligible for removal
*/
if ((mp->m_flags & XFS_MOUNT_IDELETE) &&
(rec.ir_freecount == XFS_IALLOC_INODES(mp))) {
} else {
/*
* If the inode is not fully constructed due to
- * filehandle mistmatches wait for the inode to go
+ * filehandle mismatches wait for the inode to go
* away and try again.
*
* iget_locked will call __wait_on_freeing_inode
xfs_dinode_t *dip;
/*
- * Call the space managment code to find the location of the
+ * Call the space management code to find the location of the
* inode on disk.
*/
imap.im_blkno = 0;
/*
* Given a mount structure and an inode number, return a pointer
- * to a newly allocated in-core inode coresponding to the given
+ * to a newly allocated in-core inode corresponding to the given
* inode number.
*
* Initialize the inode's attributes and extent pointers if it
/*
* Decrement the pin count of the given inode, and wake up
* anyone in xfs_iwait_unpin() if the count goes to 0. The
- * inode must have been previoulsy pinned with a call to xfs_ipin().
+ * inode must have been previously pinned with a call to xfs_ipin().
*/
void
xfs_iunpin(
xfs_iext_add(
xfs_ifork_t *ifp, /* inode fork pointer */
xfs_extnum_t idx, /* index to begin adding exts */
- int ext_diff) /* nubmer of extents to add */
+ int ext_diff) /* number of extents to add */
{
int byte_diff; /* new bytes being added */
int new_size; /* size of extents after adding */
xfs_extnum_t ext_diff; /* extents to remove in current list */
xfs_extnum_t nex1; /* number of extents before idx */
xfs_extnum_t nex2; /* extents after idx + count */
- int nlists; /* entries in indirecton array */
+ int nlists; /* entries in indirection array */
int page_idx = idx; /* index in target extent list */
ASSERT(ifp->if_flags & XFS_IFEXTIREC);
xfs_filblks_t blockcount = 0; /* number of blocks in extent */
xfs_bmbt_rec_t *ep = NULL; /* pointer to target extent */
xfs_ext_irec_t *erp = NULL; /* indirection array pointer */
- int high; /* upper boundry in search */
+ int high; /* upper boundary in search */
xfs_extnum_t idx = 0; /* index of target extent */
- int low; /* lower boundry in search */
+ int low; /* lower boundary in search */
xfs_extnum_t nextents; /* number of file extents */
xfs_fileoff_t startoff = 0; /* start offset of extent */
* been or is in the process of being flushed, then (ideally) we'd like to
* see if the inode's buffer is still incore, and if so give it a nudge.
* We delay doing so until the pushbuf routine, though, to avoid holding
- * the AIL lock across a call to the blackhole which is the buffercache.
+ * the AIL lock across a call to the blackhole which is the buffer cache.
* Also we don't want to sleep in any device strategy routines, which can happen
* if we do the subsequent bawrite in here.
*/
size_t statstruct_size, /* sizeof struct filling */
char __user *ubuffer, /* buffer with inode stats */
int flags, /* defined in xfs_itable.h */
- int *done) /* 1 if there're more stats to get */
+ int *done) /* 1 if there are more stats to get */
{
xfs_agblock_t agbno=0;/* allocation group block number */
xfs_buf_t *agbp; /* agi header buffer */
xfs_mount_t *mp, /* mount point for filesystem */
xfs_ino_t *lastinop, /* inode to return */
char __user *buffer, /* buffer with inode stats */
- int *done) /* 1 if there're more stats to get */
+ int *done) /* 1 if there are more stats to get */
{
int count; /* count value for bulkstat call */
int error; /* return value */
size_t statstruct_size,/* sizeof struct that we're filling */
char __user *ubuffer,/* buffer with inode stats */
int flags, /* flag to control access method */
- int *done); /* 1 if there're more stats to get */
+ int *done); /* 1 if there are more stats to get */
int
xfs_bulkstat_single(
int num_bblks);
STATIC int xlog_space_left(xlog_t *log, int cycle, int bytes);
STATIC int xlog_sync(xlog_t *log, xlog_in_core_t *iclog);
-STATIC void xlog_unalloc_log(xlog_t *log);
+STATIC void xlog_dealloc_log(xlog_t *log);
STATIC int xlog_write(xfs_mount_t *mp, xfs_log_iovec_t region[],
int nentries, xfs_log_ticket_t tic,
xfs_lsn_t *start_lsn,
if ((ticket->t_flags & XLOG_TIC_PERM_RESERV) == 0 ||
(flags & XFS_LOG_REL_PERM_RESERV)) {
/*
- * Release ticket if not permanent reservation or a specifc
+ * Release ticket if not permanent reservation or a specific
* request has been made to release a permanent reservation.
*/
xlog_trace_loggrant(log, ticket, "xfs_log_done: (non-permanent)");
vfsp->vfs_flag |= VFS_RDONLY;
if (error) {
cmn_err(CE_WARN, "XFS: log mount/recovery failed: error %d", error);
- xlog_unalloc_log(mp->m_log);
+ xlog_dealloc_log(mp->m_log);
return error;
}
}
*
* Go through the motions of sync'ing and releasing
* the iclog, even though no I/O will actually happen,
- * we need to wait for other log I/O's that may already
+ * we need to wait for other log I/Os that may already
* be in progress. Do this as a separate section of
* code so we'll know if we ever get stuck here that
* we're in this odd situation of trying to unmount
void
xfs_log_unmount_dealloc(xfs_mount_t *mp)
{
- xlog_unalloc_log(mp->m_log);
+ xlog_dealloc_log(mp->m_log);
}
/*
ASSERT(XFS_BUF_ADDR(bp) <= log->l_logBBsize-1);
ASSERT(XFS_BUF_ADDR(bp) + BTOBB(count) <= log->l_logBBsize);
- /* account for internal log which does't start at block #0 */
+ /* account for internal log which doesn't start at block #0 */
XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
XFS_BUF_WRITE(bp);
if ((error = XFS_bwrite(bp))) {
/*
- * Unallocate a log structure
+ * Deallocate a log structure
*/
void
-xlog_unalloc_log(xlog_t *log)
+xlog_dealloc_log(xlog_t *log)
{
xlog_in_core_t *iclog, *next_iclog;
xlog_ticket_t *tic, *next_tic;
if ((log->l_ticket_cnt != log->l_ticket_tcnt) &&
!XLOG_FORCED_SHUTDOWN(log)) {
xfs_fs_cmn_err(CE_WARN, log->l_mp,
- "xlog_unalloc_log: (cnt: %d, total: %d)",
+ "xlog_dealloc_log: (cnt: %d, total: %d)",
log->l_ticket_cnt, log->l_ticket_tcnt);
/* ASSERT(log->l_ticket_cnt == log->l_ticket_tcnt); */
#endif
log->l_mp->m_log = NULL;
kmem_free(log, sizeof(xlog_t));
-} /* xlog_unalloc_log */
+} /* xlog_dealloc_log */
/*
* Update counters atomically now that memcpy is done.
/*
* We let the log lock go, so it's possible that we hit a log I/O
- * error or someother SHUTDOWN condition that marks the iclog
+ * error or some other SHUTDOWN condition that marks the iclog
* as XLOG_STATE_IOERROR before the bwrite. However, we know that
* this iclog has consistent data, so we ignore IOERROR
* flags after this point.
#ifdef __KERNEL__
/*
- * By comparing each compnent, we don't have to worry about extra
+ * By comparing each component, we don't have to worry about extra
* endian issues in treating two 32 bit numbers as one 64 bit number
*/
static inline xfs_lsn_t _lsn_cmp(xfs_lsn_t lsn1, xfs_lsn_t lsn2)
* x | x ... | x - 1 | x
* Another case that fits this picture would be
* x | x + 1 | x ... | x
- * In this case the head really is somwhere at the end of the
+ * In this case the head really is somewhere at the end of the
* log, as one of the latest writes at the beginning was
* incomplete.
* One more case is
* we don't need to worry about the block number being
* truncated in > 1 TB buffers because in user-land,
* we're now n32 or 64-bit so xfs_daddr_t is 64-bits so
- * the blkno's will get through the user-mode buffer
+ * the blknos will get through the user-mode buffer
* cache properly. The only bad case is o32 kernels
* where xfs_daddr_t is 32-bits but mount will warn us
* off a > 1 TB filesystem before we get here.
break;
}
- /* This ag is prefered for inodes */
+ /* This ag is preferred for inodes */
pag = &mp->m_perag[index];
pag->pagi_inodeok = 1;
if (index < max_metadata)
* We cannot use the hotcpu_register() function because it does
* not allow notifier instances. We need a notifier per filesystem
* as we need to be able to identify the filesystem to balance
- * the counters out. This is acheived by having a notifier block
+ * the counters out. This is achieved by having a notifier block
* embedded in the xfs_mount_t and doing pointer magic to get the
* mount pointer from the notifier block address.
*/
#endif
int m_dalign; /* stripe unit */
int m_swidth; /* stripe width */
- int m_sinoalign; /* stripe unit inode alignmnt */
+ int m_sinoalign; /* stripe unit inode alignment */
int m_attr_magicpct;/* 37% of the blocksize */
int m_dir_magicpct; /* 37% of the dir blocksize */
__uint8_t m_mk_sharedro; /* mark shared ro on unmount */
typedef __uint32_t xfs_dqid_t;
/*
- * Eventhough users may not have quota limits occupying all 64-bits,
+ * Even though users may not have quota limits occupying all 64-bits,
* they may need 64-bit accounting. Hence, 64-bit quota-counters,
* and quota-limits. This is a waste in the common case, but hey ...
*/
#ifdef __KERNEL__
/*
* This check is done typically without holding the inode lock;
- * that may seem racey, but it is harmless in the context that it is used.
+ * that may seem racy, but it is harmless in the context that it is used.
* The inode cannot go inactive as long a reference is kept, and
* therefore if dquot(s) were attached, they'll stay consistent.
* If, for example, the ownership of the inode changes while
case XFS_TRANS_SB_RES_FREXTENTS:
/*
* The allocation has already been applied to the
- * in-core superblocks's counter. This should only
+ * in-core superblock's counter. This should only
* be applied to the on-disk superblock.
*/
ASSERT(delta < 0);
if (whole)
/*
- * Log the whole thing, the fields are discontiguous.
+ * Log the whole thing, the fields are noncontiguous.
*/
xfs_trans_log_buf(tp, bp, 0, sizeof(xfs_sb_t) - 1);
else
/*
* Apply any superblock modifications to the in-core version.
* The t_res_fdblocks_delta and t_res_frextents_delta fields are
- * explicity NOT applied to the in-core superblock.
+ * explicitly NOT applied to the in-core superblock.
* The idea is that that has already been done.
*/
if (tp->t_flags & XFS_TRANS_SB_DIRTY) {
xfs_lsn_t t_commit_lsn; /* log seq num of end of
* transaction. */
struct xfs_mount *t_mountp; /* ptr to fs mount struct */
- struct xfs_dquot_acct *t_dqinfo; /* accting info for dquots */
+ struct xfs_dquot_acct *t_dqinfo; /* acctg info for dquots */
xfs_trans_callback_t t_callback; /* transaction callback */
void *t_callarg; /* callback arg */
unsigned int t_flags; /* misc flags */
* This is to coordinate with the xfs_iflush() and xfs_iflush_done()
* routines in the eventual clearing of the ilf_fields bits.
* See the big comment in xfs_iflush() for an explanation of
- * this coorination mechanism.
+ * this coordination mechanism.
*/
flags |= ip->i_itemp->ili_last_fields;
ip->i_itemp->ili_format.ilf_fields |= flags;
* determine if they should be flushed sync, async, or
* delwri.
* SYNC_CLOSE - This flag is passed when the system is being
- * unmounted. We should sync and invalidate everthing.
+ * unmounted. We should sync and invalidate everything.
* SYNC_FSDATA - This indicates that the caller would like to make
* sure the superblock is safe on disk. We can ensure
- * this by simply makeing sure the log gets flushed
+ * this by simply making sure the log gets flushed
* if SYNC_BDFLUSH is set, and by actually writing it
* out otherwise.
*
*
* This routine supports all of the flags defined for the generic VFS_SYNC
* interface as explained above under xfs_sync. In the interests of not
- * changing interfaces within the 6.5 family, additional internallly-
+ * changing interfaces within the 6.5 family, additional internally-
* required functions are specified within a separate xflags parameter,
* only available by calling this routine.
*
* If this is just vfs_sync() or pflushd() calling
* then we can skip inodes for which it looks like
* there is nothing to do. Since we don't have the
- * inode locked this is racey, but these are periodic
+ * inode locked this is racy, but these are periodic
* calls so it doesn't matter. For the others we want
* to know for sure, so we at least try to lock them.
*/
*
* This routine supports all of the flags defined for the generic VFS_SYNC
* interface as explained above under xfs_sync. In the interests of not
- * changing interfaces within the 6.5 family, additional internallly-
+ * changing interfaces within the 6.5 family, additional internally-
* required functions are specified within a separate xflags parameter,
* only available by calling this routine.
*
* If this is a synchronous mount, make sure that the
* transaction goes to disk before returning to the user.
* This is slightly sub-optimal in that truncates require
- * two sync transactions instead of one for wsync filesytems.
+ * two sync transactions instead of one for wsync filesystems.
* One for the truncate and one for the timestamps since we
* don't want to change the timestamps unless we're sure the
* truncate worked. Truncates are less than 1% of the laddis
/*
* If this inode is on the RT dev we need to flush that
- * cache aswell.
+ * cache as well.
*/
if (ip->i_d.di_flags & XFS_DIFLAG_REALTIME)
xfs_blkdev_issue_flush(ip->i_mount->m_rtdev_targp);
*/
ntp = xfs_trans_dup(tp);
/*
- * Commit the transaction containing extent freeing and EFD's.
+ * Commit the transaction containing extent freeing and EFDs.
* If we get an error on the commit here or on the reserve below,
* we need to unlock the inode since the new transaction doesn't
* have the inode attached.
XFS_QM_DQRELE(mp, gdqp);
/*
- * Propogate the fact that the vnode changed after the
+ * Propagate the fact that the vnode changed after the
* xfs_inode locks have been released.
*/
VOP_VNODE_CHANGE(vp, VCHANGE_FLAGS_TRUNCATED, 3);
* for a log reservation. Since we'll have to wait for the
* inactive code to complete before returning from xfs_iget,
* we need to make sure that we don't have log space reserved
- * when we call xfs_iget. Instead we get an unlocked referece
+ * when we call xfs_iget. Instead we get an unlocked reference
* to the inode before getting our log reservation.
*/
error = xfs_get_dir_entry(dentry, &ip);
* for a log reservation. Since we'll have to wait for the
* inactive code to complete before returning from xfs_iget,
* we need to make sure that we don't have log space reserved
- * when we call xfs_iget. Instead we get an unlocked referece
+ * when we call xfs_iget. Instead we get an unlocked reference
* to the inode before getting our log reservation.
*/
error = xfs_get_dir_entry(dentry, &cdp);
.posix_timers = LIST_HEAD_INIT(sig.posix_timers), \
.cpu_timers = INIT_CPU_TIMERS(sig.cpu_timers), \
.rlim = INIT_RLIMITS, \
+ .pgrp = 1, \
+ .session = 1, \
}
#define INIT_SIGHAND(sighand) { \
enum pid_type
{
PIDTYPE_PID,
- PIDTYPE_TGID,
PIDTYPE_PGID,
PIDTYPE_SID,
PIDTYPE_MAX
extern int alloc_pidmap(void);
extern void FASTCALL(free_pidmap(int));
-extern void switch_exec_pids(struct task_struct *leader, struct task_struct *thread);
#define do_each_task_pid(who, type, task) \
if ((task = find_task_by_pid_type(type, who))) { \
atomic_t count;
struct k_sigaction action[_NSIG];
spinlock_t siglock;
- struct rcu_head rcu;
};
-extern void sighand_free_cb(struct rcu_head *rhp);
-
-static inline void sighand_free(struct sighand_struct *sp)
-{
- call_rcu(&sp->rcu, sighand_free_cb);
-}
-
/*
* NOTE! "signal_struct" does not have it's own
* locking, because a shared signal_struct always
/* PID/PID hash table linkage. */
struct pid pids[PIDTYPE_MAX];
+ struct list_head thread_group;
struct completion *vfork_done; /* for vfork() */
int __user *set_child_tid; /* CLONE_CHILD_SETTID */
extern int send_sig(int, struct task_struct *, int);
extern void zap_other_threads(struct task_struct *p);
extern int kill_pg(pid_t, int, int);
-extern int kill_sl(pid_t, int, int);
extern int kill_proc(pid_t, int, int);
extern struct sigqueue *sigqueue_alloc(void);
extern void sigqueue_free(struct sigqueue *);
extern void exit_thread(void);
extern void exit_files(struct task_struct *);
-extern void exit_signal(struct task_struct *);
-extern void __exit_signal(struct task_struct *);
-extern void exit_sighand(struct task_struct *);
-extern void __exit_sighand(struct task_struct *);
+extern void __cleanup_signal(struct signal_struct *);
+extern void __cleanup_sighand(struct sighand_struct *);
extern void exit_itimers(struct signal_struct *);
extern NORET_TYPE void do_group_exit(int);
#endif
#define remove_parent(p) list_del_init(&(p)->sibling)
-#define add_parent(p, parent) list_add_tail(&(p)->sibling,&(parent)->children)
-
-#define REMOVE_LINKS(p) do { \
- if (thread_group_leader(p)) \
- list_del_init(&(p)->tasks); \
- remove_parent(p); \
- } while (0)
-
-#define SET_LINKS(p) do { \
- if (thread_group_leader(p)) \
- list_add_tail(&(p)->tasks,&init_task.tasks); \
- add_parent(p, (p)->parent); \
- } while (0)
+#define add_parent(p) list_add_tail(&(p)->sibling,&(p)->parent->children)
#define next_task(p) list_entry((p)->tasks.next, struct task_struct, tasks)
#define prev_task(p) list_entry((p)->tasks.prev, struct task_struct, tasks)
#define while_each_thread(g, t) \
while ((t = next_thread(t)) != g)
-extern task_t * FASTCALL(next_thread(const task_t *p));
-
#define thread_group_leader(p) (p->pid == p->tgid)
+static inline task_t *next_thread(task_t *p)
+{
+ return list_entry(rcu_dereference(p->thread_group.next),
+ task_t, thread_group);
+}
+
static inline int thread_group_empty(task_t *p)
{
- return list_empty(&p->pids[PIDTYPE_TGID].pid_list);
+ return list_empty(&p->thread_group);
}
#define delay_group_leader(p) \
(thread_group_leader(p) && !thread_group_empty(p))
-extern void unhash_process(struct task_struct *p);
-
/*
* Protects ->fs, ->files, ->mm, ->ptrace, ->group_info, ->comm, keyring
* subscriptions and synchronises with wait4(). Also used in procfs. Also
spin_unlock(&p->alloc_lock);
}
+extern struct sighand_struct *lock_task_sighand(struct task_struct *tsk,
+ unsigned long *flags);
+
+static inline void unlock_task_sighand(struct task_struct *tsk,
+ unsigned long *flags)
+{
+ spin_unlock_irqrestore(&tsk->sighand->siglock, *flags);
+}
+
#ifndef __HAVE_THREAD_FUNCTIONS
#define task_thread_info(task) (task)->thread_info
INIT_LIST_HEAD(&sig->list);
}
+extern void flush_sigqueue(struct sigpending *queue);
+
/* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */
static inline int valid_signal(unsigned long sig)
{
extern kmem_cache_t *files_cachep;
extern kmem_cache_t *filp_cachep;
extern kmem_cache_t *fs_cachep;
-extern kmem_cache_t *signal_cachep;
extern kmem_cache_t *sighand_cachep;
extern kmem_cache_t *bio_cachep;
#include <linux/cpuset.h>
#include <linux/syscalls.h>
#include <linux/signal.h>
+#include <linux/posix-timers.h>
#include <linux/cn_proc.h>
#include <linux/mutex.h>
#include <linux/futex.h>
{
nr_threads--;
detach_pid(p, PIDTYPE_PID);
- detach_pid(p, PIDTYPE_TGID);
if (thread_group_leader(p)) {
detach_pid(p, PIDTYPE_PGID);
detach_pid(p, PIDTYPE_SID);
- if (p->pid)
- __get_cpu_var(process_counts)--;
+
+ list_del_init(&p->tasks);
+ __get_cpu_var(process_counts)--;
+ }
+ list_del_rcu(&p->thread_group);
+ remove_parent(p);
+}
+
+/*
+ * This function expects the tasklist_lock write-locked.
+ */
+static void __exit_signal(struct task_struct *tsk)
+{
+ struct signal_struct *sig = tsk->signal;
+ struct sighand_struct *sighand;
+
+ BUG_ON(!sig);
+ BUG_ON(!atomic_read(&sig->count));
+
+ rcu_read_lock();
+ sighand = rcu_dereference(tsk->sighand);
+ spin_lock(&sighand->siglock);
+
+ posix_cpu_timers_exit(tsk);
+ if (atomic_dec_and_test(&sig->count))
+ posix_cpu_timers_exit_group(tsk);
+ else {
+ /*
+ * If there is any task waiting for the group exit
+ * then notify it:
+ */
+ if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count) {
+ wake_up_process(sig->group_exit_task);
+ sig->group_exit_task = NULL;
+ }
+ if (tsk == sig->curr_target)
+ sig->curr_target = next_thread(tsk);
+ /*
+ * Accumulate here the counters for all threads but the
+ * group leader as they die, so they can be added into
+ * the process-wide totals when those are taken.
+ * The group leader stays around as a zombie as long
+ * as there are other threads. When it gets reaped,
+ * the exit.c code will add its counts into these totals.
+ * We won't ever get here for the group leader, since it
+ * will have been the last reference on the signal_struct.
+ */
+ sig->utime = cputime_add(sig->utime, tsk->utime);
+ sig->stime = cputime_add(sig->stime, tsk->stime);
+ sig->min_flt += tsk->min_flt;
+ sig->maj_flt += tsk->maj_flt;
+ sig->nvcsw += tsk->nvcsw;
+ sig->nivcsw += tsk->nivcsw;
+ sig->sched_time += tsk->sched_time;
+ sig = NULL; /* Marker for below. */
}
- REMOVE_LINKS(p);
+ __unhash_process(tsk);
+
+ tsk->signal = NULL;
+ tsk->sighand = NULL;
+ spin_unlock(&sighand->siglock);
+ rcu_read_unlock();
+
+ __cleanup_sighand(sighand);
+ clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
+ flush_sigqueue(&tsk->pending);
+ if (sig) {
+ flush_sigqueue(&sig->shared_pending);
+ __cleanup_signal(sig);
+ }
}
void release_task(struct task_struct * p)
task_t *leader;
struct dentry *proc_dentry;
-repeat:
+repeat:
atomic_dec(&p->user->processes);
spin_lock(&p->proc_lock);
proc_dentry = proc_pid_unhash(p);
write_lock_irq(&tasklist_lock);
- if (unlikely(p->ptrace))
- __ptrace_unlink(p);
+ ptrace_unlink(p);
BUG_ON(!list_empty(&p->ptrace_list) || !list_empty(&p->ptrace_children));
__exit_signal(p);
- /*
- * Note that the fastpath in sys_times depends on __exit_signal having
- * updated the counters before a task is removed from the tasklist of
- * the process by __unhash_process.
- */
- __unhash_process(p);
/*
* If we are the last non-leader member of the thread
goto repeat;
}
-/* we are using it only for SMP init */
-
-void unhash_process(struct task_struct *p)
-{
- struct dentry *proc_dentry;
-
- spin_lock(&p->proc_lock);
- proc_dentry = proc_pid_unhash(p);
- write_lock_irq(&tasklist_lock);
- __unhash_process(p);
- write_unlock_irq(&tasklist_lock);
- spin_unlock(&p->proc_lock);
- proc_pid_flush(proc_dentry);
-}
-
/*
* This checks not only the pgrp, but falls back on the pid if no
* satisfactory pgrp is found. I dunno - gdb doesn't work correctly
ptrace_unlink(current);
/* Reparent to init */
- REMOVE_LINKS(current);
+ remove_parent(current);
current->parent = child_reaper;
current->real_parent = child_reaper;
- SET_LINKS(current);
+ add_parent(current);
/* Set the exit signal to SIGCHLD so we signal init on exit */
current->exit_signal = SIGCHLD;
mmput(mm);
}
-static inline void choose_new_parent(task_t *p, task_t *reaper, task_t *child_reaper)
+static inline void choose_new_parent(task_t *p, task_t *reaper)
{
/*
* Make sure we're not reparenting to ourselves and that
* the parent is not a zombie.
*/
- BUG_ON(p == reaper || reaper->exit_state >= EXIT_ZOMBIE);
+ BUG_ON(p == reaper || reaper->exit_state);
p->real_parent = reaper;
}
* anyway, so let go of it.
*/
p->ptrace = 0;
- list_del_init(&p->sibling);
+ remove_parent(p);
p->parent = p->real_parent;
- list_add_tail(&p->sibling, &p->parent->children);
+ add_parent(p);
/* If we'd notified the old parent about this child's death,
* also notify the new parent.
if (father == p->real_parent) {
/* reparent with a reaper, real father it's us */
- choose_new_parent(p, reaper, child_reaper);
+ choose_new_parent(p, reaper);
reparent_thread(p, father, 0);
} else {
/* reparent ptraced task to its real parent */
}
list_for_each_safe(_p, _n, &father->ptrace_children) {
p = list_entry(_p,struct task_struct,ptrace_list);
- choose_new_parent(p, reaper, child_reaper);
+ choose_new_parent(p, reaper);
reparent_thread(p, father, 1);
}
}
panic("Aiee, killing interrupt handler!");
if (unlikely(!tsk->pid))
panic("Attempted to kill the idle task!");
- if (unlikely(tsk->pid == 1))
+ if (unlikely(tsk == child_reaper))
panic("Attempted to kill init!");
if (unlikely(current->ptrace & PT_TRACE_EXIT)) {
do_exit((error_code&0xff)<<8);
}
-task_t fastcall *next_thread(const task_t *p)
-{
- return pid_task(p->pids[PIDTYPE_TGID].pid_list.next, PIDTYPE_TGID);
-}
-
-EXPORT_SYMBOL(next_thread);
-
/*
* Take down every thread in the group. This is called by fatal signals
* as well as by sys_exit_group (below).
else if (!thread_group_empty(current)) {
struct signal_struct *const sig = current->signal;
struct sighand_struct *const sighand = current->sighand;
- read_lock(&tasklist_lock);
spin_lock_irq(&sighand->siglock);
if (sig->flags & SIGNAL_GROUP_EXIT)
/* Another thread got here before we took the lock. */
zap_other_threads(current);
}
spin_unlock_irq(&sighand->siglock);
- read_unlock(&tasklist_lock);
}
do_exit(exit_code);
/* move to end of parent's list to avoid starvation */
remove_parent(p);
- add_parent(p, p->parent);
+ add_parent(p);
write_unlock_irq(&tasklist_lock);
#endif
/* SLAB cache for signal_struct structures (tsk->signal) */
-kmem_cache_t *signal_cachep;
+static kmem_cache_t *signal_cachep;
/* SLAB cache for sighand_struct structures (tsk->sighand) */
kmem_cache_t *sighand_cachep;
EXPORT_SYMBOL(unshare_files);
-void sighand_free_cb(struct rcu_head *rhp)
-{
- struct sighand_struct *sp;
-
- sp = container_of(rhp, struct sighand_struct, rcu);
- kmem_cache_free(sighand_cachep, sp);
-}
-
static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
{
struct sighand_struct *sig;
rcu_assign_pointer(tsk->sighand, sig);
if (!sig)
return -ENOMEM;
- spin_lock_init(&sig->siglock);
atomic_set(&sig->count, 1);
memcpy(sig->action, current->sighand->action, sizeof(sig->action));
return 0;
}
+void __cleanup_sighand(struct sighand_struct *sighand)
+{
+ if (atomic_dec_and_test(&sighand->count))
+ kmem_cache_free(sighand_cachep, sighand);
+}
+
static inline int copy_signal(unsigned long clone_flags, struct task_struct * tsk)
{
struct signal_struct *sig;
return 0;
}
+void __cleanup_signal(struct signal_struct *sig)
+{
+ exit_thread_group_keys(sig);
+ kmem_cache_free(signal_cachep, sig);
+}
+
+static inline void cleanup_signal(struct task_struct *tsk)
+{
+ struct signal_struct *sig = tsk->signal;
+
+ atomic_dec(&sig->live);
+
+ if (atomic_dec_and_test(&sig->count))
+ __cleanup_signal(sig);
+}
+
static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
{
unsigned long new_flags = p->flags;
* We dont wake it up yet.
*/
p->group_leader = p;
+ INIT_LIST_HEAD(&p->thread_group);
INIT_LIST_HEAD(&p->ptrace_children);
INIT_LIST_HEAD(&p->ptrace_list);
!cpu_online(task_cpu(p))))
set_task_cpu(p, smp_processor_id());
- /*
- * Check for pending SIGKILL! The new thread should not be allowed
- * to slip out of an OOM kill. (or normal SIGKILL.)
- */
- if (sigismember(¤t->pending.signal, SIGKILL)) {
- write_unlock_irq(&tasklist_lock);
- retval = -EINTR;
- goto bad_fork_cleanup_namespace;
- }
-
/* CLONE_PARENT re-uses the old parent */
if (clone_flags & (CLONE_PARENT|CLONE_THREAD))
p->real_parent = current->real_parent;
p->parent = p->real_parent;
spin_lock(¤t->sighand->siglock);
+
+ /*
+ * Process group and session signals need to be delivered to just the
+ * parent before the fork or both the parent and the child after the
+ * fork. Restart if a signal comes in before we add the new process to
+ * it's process group.
+ * A fatal signal pending means that current will exit, so the new
+ * thread can't slip out of an OOM kill (or normal SIGKILL).
+ */
+ recalc_sigpending();
+ if (signal_pending(current)) {
+ spin_unlock(¤t->sighand->siglock);
+ write_unlock_irq(&tasklist_lock);
+ retval = -ERESTARTNOINTR;
+ goto bad_fork_cleanup_namespace;
+ }
+
if (clone_flags & CLONE_THREAD) {
/*
* Important: if an exit-all has been started then
retval = -EAGAIN;
goto bad_fork_cleanup_namespace;
}
- p->group_leader = current->group_leader;
- if (current->signal->group_stop_count > 0) {
- /*
- * There is an all-stop in progress for the group.
- * We ourselves will stop as soon as we check signals.
- * Make the new thread part of that group stop too.
- */
- current->signal->group_stop_count++;
- set_tsk_thread_flag(p, TIF_SIGPENDING);
- }
+ p->group_leader = current->group_leader;
+ list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
if (!cputime_eq(current->signal->it_virt_expires,
cputime_zero) ||
*/
p->ioprio = current->ioprio;
- SET_LINKS(p);
- if (unlikely(p->ptrace & PT_PTRACED))
- __ptrace_link(p, current->parent);
-
- if (thread_group_leader(p)) {
- p->signal->tty = current->signal->tty;
- p->signal->pgrp = process_group(current);
- p->signal->session = current->signal->session;
- attach_pid(p, PIDTYPE_PGID, process_group(p));
- attach_pid(p, PIDTYPE_SID, p->signal->session);
- if (p->pid)
+ if (likely(p->pid)) {
+ add_parent(p);
+ if (unlikely(p->ptrace & PT_PTRACED))
+ __ptrace_link(p, current->parent);
+
+ if (thread_group_leader(p)) {
+ p->signal->tty = current->signal->tty;
+ p->signal->pgrp = process_group(current);
+ p->signal->session = current->signal->session;
+ attach_pid(p, PIDTYPE_PGID, process_group(p));
+ attach_pid(p, PIDTYPE_SID, p->signal->session);
+
+ list_add_tail(&p->tasks, &init_task.tasks);
__get_cpu_var(process_counts)++;
+ }
+ attach_pid(p, PIDTYPE_PID, p->pid);
+ nr_threads++;
}
- attach_pid(p, PIDTYPE_TGID, p->tgid);
- attach_pid(p, PIDTYPE_PID, p->pid);
- nr_threads++;
total_forks++;
spin_unlock(¤t->sighand->siglock);
write_unlock_irq(&tasklist_lock);
if (p->mm)
mmput(p->mm);
bad_fork_cleanup_signal:
- exit_signal(p);
+ cleanup_signal(p);
bad_fork_cleanup_sighand:
- exit_sighand(p);
+ __cleanup_sighand(p->sighand);
bad_fork_cleanup_fs:
exit_fs(p); /* blocking */
bad_fork_cleanup_files:
if (!task)
return ERR_PTR(-ENOMEM);
init_idle(task, cpu);
- unhash_process(task);
+
return task;
}
#define ARCH_MIN_MMSTRUCT_ALIGN 0
#endif
+static void sighand_ctor(void *data, kmem_cache_t *cachep, unsigned long flags)
+{
+ struct sighand_struct *sighand = data;
+
+ if ((flags & (SLAB_CTOR_VERIFY | SLAB_CTOR_CONSTRUCTOR)) ==
+ SLAB_CTOR_CONSTRUCTOR)
+ spin_lock_init(&sighand->siglock);
+}
+
void __init proc_caches_init(void)
{
sighand_cachep = kmem_cache_create("sighand_cache",
sizeof(struct sighand_struct), 0,
- SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
+ SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU,
+ sighand_ctor, NULL);
signal_cachep = kmem_cache_create("signal_cache",
sizeof(struct signal_struct), 0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL);
sa.sa.sa_handler = SIG_IGN;
sa.sa.sa_flags = 0;
siginitset(&sa.sa.sa_mask, sigmask(SIGCHLD));
- do_sigaction(SIGCHLD, &sa, (struct k_sigaction *)0);
+ do_sigaction(SIGCHLD, &sa, NULL);
allow_signal(SIGCHLD);
pid = kernel_thread(____call_usermodehelper, sub_info, SIGCHLD);
EXPORT_SYMBOL(find_task_by_pid_type);
/*
- * This function switches the PIDs if a non-leader thread calls
- * sys_execve() - this must be done without releasing the PID.
- * (which a detach_pid() would eventually do.)
- */
-void switch_exec_pids(task_t *leader, task_t *thread)
-{
- __detach_pid(leader, PIDTYPE_PID);
- __detach_pid(leader, PIDTYPE_TGID);
- __detach_pid(leader, PIDTYPE_PGID);
- __detach_pid(leader, PIDTYPE_SID);
-
- __detach_pid(thread, PIDTYPE_PID);
- __detach_pid(thread, PIDTYPE_TGID);
-
- leader->pid = leader->tgid = thread->pid;
- thread->pid = thread->tgid;
-
- attach_pid(thread, PIDTYPE_PID, thread->pid);
- attach_pid(thread, PIDTYPE_TGID, thread->tgid);
- attach_pid(thread, PIDTYPE_PGID, thread->signal->pgrp);
- attach_pid(thread, PIDTYPE_SID, thread->signal->session);
- list_add_tail(&thread->tasks, &init_task.tasks);
-
- attach_pid(leader, PIDTYPE_PID, leader->pid);
- attach_pid(leader, PIDTYPE_TGID, leader->tgid);
- attach_pid(leader, PIDTYPE_PGID, leader->signal->pgrp);
- attach_pid(leader, PIDTYPE_SID, leader->signal->session);
-}
-
-/*
* The pid hash table is scaled according to the amount of memory in the
* machine. From a minimum of 16 slots up to 4096 slots at one gigabyte or
* more.
void __init pidmap_init(void)
{
- int i;
-
pidmap_array->page = (void *)get_zeroed_page(GFP_KERNEL);
+ /* Reserve PID 0. We never call free_pidmap(0) */
set_bit(0, pidmap_array->page);
atomic_dec(&pidmap_array->nr_free);
-
- /*
- * Allocate PID 0, and hash it via all PID types:
- */
-
- for (i = 0; i < PIDTYPE_MAX; i++)
- attach_pid(current, i, 0);
}
if (child->parent == new_parent)
return;
list_add(&child->ptrace_list, &child->parent->ptrace_children);
- REMOVE_LINKS(child);
+ remove_parent(child);
child->parent = new_parent;
- SET_LINKS(child);
+ add_parent(child);
}
/*
child->ptrace = 0;
if (!list_empty(&child->ptrace_list)) {
list_del_init(&child->ptrace_list);
- REMOVE_LINKS(child);
+ remove_parent(child);
child->parent = child->real_parent;
- SET_LINKS(child);
+ add_parent(child);
}
ptrace_untrace(child);
#include <linux/security.h>
#include <linux/syscalls.h>
#include <linux/ptrace.h>
-#include <linux/posix-timers.h>
#include <linux/signal.h>
#include <linux/audit.h>
#include <linux/capability.h>
#define sig_kernel_stop(sig) \
(((sig) < SIGRTMIN) && T(sig, SIG_KERNEL_STOP_MASK))
+#define sig_needs_tasklist(sig) ((sig) == SIGCONT)
+
#define sig_user_defined(t, signr) \
(((t)->sighand->action[(signr)-1].sa.sa_handler != SIG_DFL) && \
((t)->sighand->action[(signr)-1].sa.sa_handler != SIG_IGN))
kmem_cache_free(sigqueue_cachep, q);
}
-static void flush_sigqueue(struct sigpending *queue)
+void flush_sigqueue(struct sigpending *queue)
{
struct sigqueue *q;
/*
* Flush all pending signals for a task.
*/
-
-void
-flush_signals(struct task_struct *t)
+void flush_signals(struct task_struct *t)
{
unsigned long flags;
}
/*
- * This function expects the tasklist_lock write-locked.
- */
-void __exit_sighand(struct task_struct *tsk)
-{
- struct sighand_struct * sighand = tsk->sighand;
-
- /* Ok, we're done with the signal handlers */
- tsk->sighand = NULL;
- if (atomic_dec_and_test(&sighand->count))
- sighand_free(sighand);
-}
-
-void exit_sighand(struct task_struct *tsk)
-{
- write_lock_irq(&tasklist_lock);
- rcu_read_lock();
- if (tsk->sighand != NULL) {
- struct sighand_struct *sighand = rcu_dereference(tsk->sighand);
- spin_lock(&sighand->siglock);
- __exit_sighand(tsk);
- spin_unlock(&sighand->siglock);
- }
- rcu_read_unlock();
- write_unlock_irq(&tasklist_lock);
-}
-
-/*
- * This function expects the tasklist_lock write-locked.
- */
-void __exit_signal(struct task_struct *tsk)
-{
- struct signal_struct * sig = tsk->signal;
- struct sighand_struct * sighand;
-
- if (!sig)
- BUG();
- if (!atomic_read(&sig->count))
- BUG();
- rcu_read_lock();
- sighand = rcu_dereference(tsk->sighand);
- spin_lock(&sighand->siglock);
- posix_cpu_timers_exit(tsk);
- if (atomic_dec_and_test(&sig->count)) {
- posix_cpu_timers_exit_group(tsk);
- tsk->signal = NULL;
- __exit_sighand(tsk);
- spin_unlock(&sighand->siglock);
- flush_sigqueue(&sig->shared_pending);
- } else {
- /*
- * If there is any task waiting for the group exit
- * then notify it:
- */
- if (sig->group_exit_task && atomic_read(&sig->count) == sig->notify_count) {
- wake_up_process(sig->group_exit_task);
- sig->group_exit_task = NULL;
- }
- if (tsk == sig->curr_target)
- sig->curr_target = next_thread(tsk);
- tsk->signal = NULL;
- /*
- * Accumulate here the counters for all threads but the
- * group leader as they die, so they can be added into
- * the process-wide totals when those are taken.
- * The group leader stays around as a zombie as long
- * as there are other threads. When it gets reaped,
- * the exit.c code will add its counts into these totals.
- * We won't ever get here for the group leader, since it
- * will have been the last reference on the signal_struct.
- */
- sig->utime = cputime_add(sig->utime, tsk->utime);
- sig->stime = cputime_add(sig->stime, tsk->stime);
- sig->min_flt += tsk->min_flt;
- sig->maj_flt += tsk->maj_flt;
- sig->nvcsw += tsk->nvcsw;
- sig->nivcsw += tsk->nivcsw;
- sig->sched_time += tsk->sched_time;
- __exit_sighand(tsk);
- spin_unlock(&sighand->siglock);
- sig = NULL; /* Marker for below. */
- }
- rcu_read_unlock();
- clear_tsk_thread_flag(tsk,TIF_SIGPENDING);
- flush_sigqueue(&tsk->pending);
- if (sig) {
- /*
- * We are cleaning up the signal_struct here.
- */
- exit_thread_group_keys(sig);
- kmem_cache_free(signal_cachep, sig);
- }
-}
-
-void exit_signal(struct task_struct *tsk)
-{
- atomic_dec(&tsk->signal->live);
-
- write_lock_irq(&tasklist_lock);
- __exit_signal(tsk);
- write_unlock_irq(&tasklist_lock);
-}
-
-/*
* Flush all handlers for a task.
*/
}
/* forward decl */
-static void do_notify_parent_cldstop(struct task_struct *tsk,
- int to_self,
- int why);
+static void do_notify_parent_cldstop(struct task_struct *tsk, int why);
/*
* Handle magic process-wide effects of stop/continue signals.
p->signal->group_stop_count = 0;
p->signal->flags = SIGNAL_STOP_CONTINUED;
spin_unlock(&p->sighand->siglock);
- do_notify_parent_cldstop(p, (p->ptrace & PT_PTRACED), CLD_STOPPED);
+ do_notify_parent_cldstop(p, CLD_STOPPED);
spin_lock(&p->sighand->siglock);
}
rm_from_queue(SIG_KERNEL_STOP_MASK, &p->signal->shared_pending);
p->signal->flags = SIGNAL_STOP_CONTINUED;
p->signal->group_exit_code = 0;
spin_unlock(&p->sighand->siglock);
- do_notify_parent_cldstop(p, (p->ptrace & PT_PTRACED), CLD_CONTINUED);
+ do_notify_parent_cldstop(p, CLD_CONTINUED);
spin_lock(&p->sighand->siglock);
} else {
/*
/*
* Must be called under rcu_read_lock() or with tasklist_lock read-held.
*/
+struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long *flags)
+{
+ struct sighand_struct *sighand;
+
+ for (;;) {
+ sighand = rcu_dereference(tsk->sighand);
+ if (unlikely(sighand == NULL))
+ break;
+
+ spin_lock_irqsave(&sighand->siglock, *flags);
+ if (likely(sighand == tsk->sighand))
+ break;
+ spin_unlock_irqrestore(&sighand->siglock, *flags);
+ }
+
+ return sighand;
+}
+
int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p)
{
unsigned long flags;
- struct sighand_struct *sp;
int ret;
-retry:
ret = check_kill_permission(sig, info, p);
- if (!ret && sig && (sp = rcu_dereference(p->sighand))) {
- spin_lock_irqsave(&sp->siglock, flags);
- if (p->sighand != sp) {
- spin_unlock_irqrestore(&sp->siglock, flags);
- goto retry;
- }
- if ((atomic_read(&sp->count) == 0) ||
- (atomic_read(&p->usage) == 0)) {
- spin_unlock_irqrestore(&sp->siglock, flags);
- return -ESRCH;
+
+ if (!ret && sig) {
+ ret = -ESRCH;
+ if (lock_task_sighand(p, &flags)) {
+ ret = __group_send_sig_info(sig, info, p);
+ unlock_task_sighand(p, &flags);
}
- ret = __group_send_sig_info(sig, info, p);
- spin_unlock_irqrestore(&sp->siglock, flags);
}
return ret;
struct task_struct *p;
rcu_read_lock();
- if (unlikely(sig_kernel_stop(sig) || sig == SIGCONT)) {
+ if (unlikely(sig_needs_tasklist(sig))) {
read_lock(&tasklist_lock);
acquired_tasklist_lock = 1;
}
__sigqueue_free(q);
}
-int
-send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
+int send_sigqueue(int sig, struct sigqueue *q, struct task_struct *p)
{
unsigned long flags;
int ret = 0;
- struct sighand_struct *sh;
BUG_ON(!(q->flags & SIGQUEUE_PREALLOC));
*/
rcu_read_lock();
- if (unlikely(p->flags & PF_EXITING)) {
+ if (!likely(lock_task_sighand(p, &flags))) {
ret = -1;
goto out_err;
}
-retry:
- sh = rcu_dereference(p->sighand);
-
- spin_lock_irqsave(&sh->siglock, flags);
- if (p->sighand != sh) {
- /* We raced with exec() in a multithreaded process... */
- spin_unlock_irqrestore(&sh->siglock, flags);
- goto retry;
- }
-
- /*
- * We do the check here again to handle the following scenario:
- *
- * CPU 0 CPU 1
- * send_sigqueue
- * check PF_EXITING
- * interrupt exit code running
- * __exit_signal
- * lock sighand->siglock
- * unlock sighand->siglock
- * lock sh->siglock
- * add(tsk->pending) flush_sigqueue(tsk->pending)
- *
- */
-
- if (unlikely(p->flags & PF_EXITING)) {
- ret = -1;
- goto out;
- }
-
if (unlikely(!list_empty(&q->list))) {
/*
* If an SI_TIMER entry is already queue just increment
* the overrun count.
*/
- if (q->info.si_code != SI_TIMER)
- BUG();
+ BUG_ON(q->info.si_code != SI_TIMER);
q->info.si_overrun++;
goto out;
}
signal_wake_up(p, sig == SIGKILL);
out:
- spin_unlock_irqrestore(&sh->siglock, flags);
+ unlock_task_sighand(p, &flags);
out_err:
rcu_read_unlock();
spin_unlock_irqrestore(&psig->siglock, flags);
}
-static void do_notify_parent_cldstop(struct task_struct *tsk, int to_self, int why)
+static void do_notify_parent_cldstop(struct task_struct *tsk, int why)
{
struct siginfo info;
unsigned long flags;
struct task_struct *parent;
struct sighand_struct *sighand;
- if (to_self)
+ if (tsk->ptrace & PT_PTRACED)
parent = tsk->parent;
else {
tsk = tsk->group_leader;
!(current->ptrace & PT_ATTACHED)) &&
(likely(current->parent->signal != current->signal) ||
!unlikely(current->signal->flags & SIGNAL_GROUP_EXIT))) {
- do_notify_parent_cldstop(current, 1, CLD_TRAPPED);
+ do_notify_parent_cldstop(current, CLD_TRAPPED);
read_unlock(&tasklist_lock);
schedule();
} else {
static void
finish_stop(int stop_count)
{
- int to_self;
-
/*
* If there are no other threads in the group, or if there is
* a group stop in progress and we are the last to stop,
* report to the parent. When ptraced, every thread reports itself.
*/
- if (stop_count < 0 || (current->ptrace & PT_PTRACED))
- to_self = 1;
- else if (stop_count == 0)
- to_self = 0;
- else
- goto out;
-
- read_lock(&tasklist_lock);
- do_notify_parent_cldstop(current, to_self, CLD_STOPPED);
- read_unlock(&tasklist_lock);
+ if (stop_count == 0 || (current->ptrace & PT_PTRACED)) {
+ read_lock(&tasklist_lock);
+ do_notify_parent_cldstop(current, CLD_STOPPED);
+ read_unlock(&tasklist_lock);
+ }
-out:
schedule();
/*
* Now we don't run again until continued.
* Returns nonzero if we've actually stopped and released the siglock.
* Returns zero if we didn't stop and still hold the siglock.
*/
-static int
-do_signal_stop(int signr)
+static int do_signal_stop(int signr)
{
struct signal_struct *sig = current->signal;
- struct sighand_struct *sighand = current->sighand;
- int stop_count = -1;
+ int stop_count;
if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED))
return 0;
* There is a group stop in progress. We don't need to
* start another one.
*/
- signr = sig->group_exit_code;
stop_count = --sig->group_stop_count;
- current->exit_code = signr;
- set_current_state(TASK_STOPPED);
- if (stop_count == 0)
- sig->flags = SIGNAL_STOP_STOPPED;
- spin_unlock_irq(&sighand->siglock);
- }
- else if (thread_group_empty(current)) {
- /*
- * Lock must be held through transition to stopped state.
- */
- current->exit_code = current->signal->group_exit_code = signr;
- set_current_state(TASK_STOPPED);
- sig->flags = SIGNAL_STOP_STOPPED;
- spin_unlock_irq(&sighand->siglock);
- }
- else {
+ } else {
/*
* There is no group stop already in progress.
- * We must initiate one now, but that requires
- * dropping siglock to get both the tasklist lock
- * and siglock again in the proper order. Note that
- * this allows an intervening SIGCONT to be posted.
- * We need to check for that and bail out if necessary.
+ * We must initiate one now.
*/
struct task_struct *t;
- spin_unlock_irq(&sighand->siglock);
-
- /* signals can be posted during this window */
+ sig->group_exit_code = signr;
- read_lock(&tasklist_lock);
- spin_lock_irq(&sighand->siglock);
-
- if (!likely(sig->flags & SIGNAL_STOP_DEQUEUED)) {
+ stop_count = 0;
+ for (t = next_thread(current); t != current; t = next_thread(t))
/*
- * Another stop or continue happened while we
- * didn't have the lock. We can just swallow this
- * signal now. If we raced with a SIGCONT, that
- * should have just cleared it now. If we raced
- * with another processor delivering a stop signal,
- * then the SIGCONT that wakes us up should clear it.
+ * Setting state to TASK_STOPPED for a group
+ * stop is always done with the siglock held,
+ * so this check has no races.
*/
- read_unlock(&tasklist_lock);
- return 0;
- }
-
- if (sig->group_stop_count == 0) {
- sig->group_exit_code = signr;
- stop_count = 0;
- for (t = next_thread(current); t != current;
- t = next_thread(t))
- /*
- * Setting state to TASK_STOPPED for a group
- * stop is always done with the siglock held,
- * so this check has no races.
- */
- if (!t->exit_state &&
- !(t->state & (TASK_STOPPED|TASK_TRACED))) {
- stop_count++;
- signal_wake_up(t, 0);
- }
- sig->group_stop_count = stop_count;
- }
- else {
- /* A race with another thread while unlocked. */
- signr = sig->group_exit_code;
- stop_count = --sig->group_stop_count;
- }
-
- current->exit_code = signr;
- set_current_state(TASK_STOPPED);
- if (stop_count == 0)
- sig->flags = SIGNAL_STOP_STOPPED;
-
- spin_unlock_irq(&sighand->siglock);
- read_unlock(&tasklist_lock);
+ if (!t->exit_state &&
+ !(t->state & (TASK_STOPPED|TASK_TRACED))) {
+ stop_count++;
+ signal_wake_up(t, 0);
+ }
+ sig->group_stop_count = stop_count;
}
+ if (stop_count == 0)
+ sig->flags = SIGNAL_STOP_STOPPED;
+ current->exit_code = sig->group_exit_code;
+ __set_current_state(TASK_STOPPED);
+
+ spin_unlock_irq(¤t->sighand->siglock);
finish_stop(stop_count);
return 1;
}
continue;
/* Init gets no signals it doesn't want. */
- if (current->pid == 1)
+ if (current == child_reaper)
continue;
if (sig_kernel_stop(signr)) {
return kill_proc_info(sig, &info, pid);
}
-int
-do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
+int do_sigaction(int sig, struct k_sigaction *act, struct k_sigaction *oact)
{
struct k_sigaction *k;
sigset_t mask;
if (act) {
sigdelsetmask(&act->sa.sa_mask,
sigmask(SIGKILL) | sigmask(SIGSTOP));
+ *k = *act;
/*
* POSIX 3.3.1.3:
* "Setting a signal action to SIG_IGN for a signal that is
* be discarded, whether or not it is blocked"
*/
if (act->sa.sa_handler == SIG_IGN ||
- (act->sa.sa_handler == SIG_DFL &&
- sig_kernel_ignore(sig))) {
- /*
- * This is a fairly rare case, so we only take the
- * tasklist_lock once we're sure we'll need it.
- * Now we must do this little unlock and relock
- * dance to maintain the lock hierarchy.
- */
+ (act->sa.sa_handler == SIG_DFL && sig_kernel_ignore(sig))) {
struct task_struct *t = current;
- spin_unlock_irq(&t->sighand->siglock);
- read_lock(&tasklist_lock);
- spin_lock_irq(&t->sighand->siglock);
- *k = *act;
sigemptyset(&mask);
sigaddset(&mask, sig);
rm_from_queue_full(&mask, &t->signal->shared_pending);
recalc_sigpending_tsk(t);
t = next_thread(t);
} while (t != current);
- spin_unlock_irq(¤t->sighand->siglock);
- read_unlock(&tasklist_lock);
- return 0;
}
-
- *k = *act;
}
spin_unlock_irq(¤t->sighand->siglock);
*/
if (tbuf) {
struct tms tmp;
+ struct task_struct *tsk = current;
+ struct task_struct *t;
cputime_t utime, stime, cutime, cstime;
-#ifdef CONFIG_SMP
- if (thread_group_empty(current)) {
- /*
- * Single thread case without the use of any locks.
- *
- * We may race with release_task if two threads are
- * executing. However, release task first adds up the
- * counters (__exit_signal) before removing the task
- * from the process tasklist (__unhash_process).
- * __exit_signal also acquires and releases the
- * siglock which results in the proper memory ordering
- * so that the list modifications are always visible
- * after the counters have been updated.
- *
- * If the counters have been updated by the second thread
- * but the thread has not yet been removed from the list
- * then the other branch will be executing which will
- * block on tasklist_lock until the exit handling of the
- * other task is finished.
- *
- * This also implies that the sighand->siglock cannot
- * be held by another processor. So we can also
- * skip acquiring that lock.
- */
- utime = cputime_add(current->signal->utime, current->utime);
- stime = cputime_add(current->signal->utime, current->stime);
- cutime = current->signal->cutime;
- cstime = current->signal->cstime;
- } else
-#endif
- {
+ spin_lock_irq(&tsk->sighand->siglock);
+ utime = tsk->signal->utime;
+ stime = tsk->signal->stime;
+ t = tsk;
+ do {
+ utime = cputime_add(utime, t->utime);
+ stime = cputime_add(stime, t->stime);
+ t = next_thread(t);
+ } while (t != tsk);
- /* Process with multiple threads */
- struct task_struct *tsk = current;
- struct task_struct *t;
+ cutime = tsk->signal->cutime;
+ cstime = tsk->signal->cstime;
+ spin_unlock_irq(&tsk->sighand->siglock);
- read_lock(&tasklist_lock);
- utime = tsk->signal->utime;
- stime = tsk->signal->stime;
- t = tsk;
- do {
- utime = cputime_add(utime, t->utime);
- stime = cputime_add(stime, t->stime);
- t = next_thread(t);
- } while (t != tsk);
-
- /*
- * While we have tasklist_lock read-locked, no dying thread
- * can be updating current->signal->[us]time. Instead,
- * we got their counts included in the live thread loop.
- * However, another thread can come in right now and
- * do a wait call that updates current->signal->c[us]time.
- * To make sure we always see that pair updated atomically,
- * we take the siglock around fetching them.
- */
- spin_lock_irq(&tsk->sighand->siglock);
- cutime = tsk->signal->cutime;
- cstime = tsk->signal->cstime;
- spin_unlock_irq(&tsk->sighand->siglock);
- read_unlock(&tasklist_lock);
- }
tmp.tms_utime = cputime_to_clock_t(utime);
tmp.tms_stime = cputime_to_clock_t(stime);
tmp.tms_cutime = cputime_to_clock_t(cutime);