This patch starts the removal of a very old, very broken piece of code. This
stems from the problem of passing a userspace buffer into read() or write() on
the host. If that buffer had not yet been faulted in, read and write will
return -EFAULT.
To avoid this problem, the solution was to fault the buffer in before the
system call by touching the pages that hold the buffer by doing a copy-user of
a byte to each page. This is obviously bogus, but it does usually work, in tt
mode, since the kernel and process are in the same address space and userspace
addresses can be accessed directly in the kernel.
In skas mode, where the kernel and process are in separate address spaces, it
is completely bogus because the userspace address, which is invalid in the
kernel, is passed into the system call instead of the corresponding physical
address, which would be valid. Here, it appears that this code, on every host
read() or write(), tries to fault in a random process page. This doesn't seem
to cause any correctness problems, but there is a performance impact. This
patch, and the ones following, result in a 10-15% performance gain on a kernel
build.
This code can't be immediately tossed out because when it is, you can't log
in. Apparently, there is some code in the console driver which depends on
this somehow.
However, we can start removing it by switching the code which does I/O using
kernel addresses to using plain read() and write(). This patch introduces
os_read_file_k and os_write_file_k for use with kernel buffers and converts
all call locations which use obvious kernel buffers to use them. These
include I/O using buffers which are local variables which are on the stack or
kmalloc-ed. Later patches will handle the less obvious cases, followed by a
mass conversion back to the original interface.
Signed-off-by: Jeff Dike <jdike@linux.intel.com>
Cc: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
19 files changed:
pty_fd = data->pty_fd;
pipe_fd = data->pipe_fd;
pty_fd = data->pty_fd;
pipe_fd = data->pipe_fd;
- count = os_write_file(pipe_fd, &c, sizeof(c));
+ count = os_write_file_k(pipe_fd, &c, sizeof(c));
if(count != sizeof(c))
printk("winch_thread : failed to write synchronization "
"byte, err = %d\n", -count);
if(count != sizeof(c))
printk("winch_thread : failed to write synchronization "
"byte, err = %d\n", -count);
* host - since they are not different kernel threads, we cannot use
* kernel semaphores. We don't use SysV semaphores because they are
* persistent. */
* host - since they are not different kernel threads, we cannot use
* kernel semaphores. We don't use SysV semaphores because they are
* persistent. */
- count = os_read_file(pipe_fd, &c, sizeof(c));
+ count = os_read_file_k(pipe_fd, &c, sizeof(c));
if(count != sizeof(c))
printk("winch_thread : failed to read synchronization byte, "
"err = %d\n", -count);
if(count != sizeof(c))
printk("winch_thread : failed to read synchronization byte, "
"err = %d\n", -count);
* are blocked.*/
sigsuspend(&sigs);
* are blocked.*/
sigsuspend(&sigs);
- count = os_write_file(pipe_fd, &c, sizeof(c));
+ count = os_write_file_k(pipe_fd, &c, sizeof(c));
if(count != sizeof(c))
printk("winch_thread : write failed, err = %d\n",
-count);
if(count != sizeof(c))
printk("winch_thread : write failed, err = %d\n",
-count);
- n = os_read_file(fds[0], &c, sizeof(c));
+ n = os_read_file_k(fds[0], &c, sizeof(c));
if(n != sizeof(c)){
printk("winch_tramp : failed to read synchronization byte\n");
printk("read failed, err = %d\n", -n);
if(n != sizeof(c)){
printk("winch_tramp : failed to read synchronization byte\n");
printk("read failed, err = %d\n", -n);
if(thread > 0){
register_winch_irq(thread_fd, fd, thread, tty);
if(thread > 0){
register_winch_irq(thread_fd, fd, thread, tty);
- count = os_write_file(thread_fd, &c, sizeof(c));
+ count = os_write_file_k(thread_fd, &c, sizeof(c));
if(count != sizeof(c))
printk("register_winch : failed to write "
"synchronization byte, err = %d\n",
if(count != sizeof(c))
printk("register_winch : failed to write "
"synchronization byte, err = %d\n",
req.version = SWITCH_VERSION;
req.type = REQ_NEW_CONTROL;
req.sock = *local_addr;
req.version = SWITCH_VERSION;
req.type = REQ_NEW_CONTROL;
req.sock = *local_addr;
- n = os_write_file(pri->control, &req, sizeof(req));
+ n = os_write_file_k(pri->control, &req, sizeof(req));
if(n != sizeof(req)){
printk("daemon_open : control setup request failed, err = %d\n",
-n);
if(n != sizeof(req)){
printk("daemon_open : control setup request failed, err = %d\n",
-n);
- n = os_read_file(pri->control, sun, sizeof(*sun));
+ n = os_read_file_k(pri->control, sun, sizeof(*sun));
if(n != sizeof(*sun)){
printk("daemon_open : read of data socket failed, err = %d\n",
-n);
if(n != sizeof(*sun)){
printk("daemon_open : read of data socket failed, err = %d\n",
-n);
- n = os_read_file(in_fds[0], &c, sizeof(c));
+ n = os_read_file_k(in_fds[0], &c, sizeof(c));
if(n == 0){
printk("harddog_open - EOF on watchdog pipe\n");
helper_wait(pid);
if(n == 0){
printk("harddog_open - EOF on watchdog pipe\n");
helper_wait(pid);
- n = os_write_file(fd, &c, sizeof(c));
+ n = os_write_file_k(fd, &c, sizeof(c));
if(n != sizeof(c)){
printk("ping_watchdog - write failed, err = %d\n", -n);
if(n < 0)
if(n != sizeof(c)){
printk("ping_watchdog - write failed, err = %d\n", -n);
if(n < 0)
if(kbuf == NULL)
return(-ENOMEM);
if(kbuf == NULL)
return(-ENOMEM);
- err = os_read_file(state->fd, kbuf, count);
+ err = os_read_file_k(state->fd, kbuf, count);
if(copy_from_user(kbuf, buffer, count))
goto out;
if(copy_from_user(kbuf, buffer, count))
goto out;
- err = os_write_file(state->fd, kbuf, count);
+ err = os_write_file_k(state->fd, kbuf, count);
if(err < 0)
goto out;
*ppos += err;
if(err < 0)
goto out;
*ppos += err;
- ret = os_read_file(fd, &remain, sizeof(remain));
+ ret = os_read_file_k(fd, &remain, sizeof(remain));
if (ret != sizeof(remain)) {
expected = sizeof(remain);
if (ret != sizeof(remain)) {
expected = sizeof(remain);
}
if(atomic_read(&port->wait_count) == 0){
}
if(atomic_read(&port->wait_count) == 0){
- os_write_file(fd, NO_WAITER_MSG, sizeof(NO_WAITER_MSG));
+ os_write_file_k(fd, NO_WAITER_MSG, sizeof(NO_WAITER_MSG));
printk("No one waiting for port\n");
}
list_add(&conn->list, &port->pending);
printk("No one waiting for port\n");
}
list_add(&conn->list, &port->pending);
int n, ret = 0, have_data;
while(size){
int n, ret = 0, have_data;
while(size){
- n = os_read_file(random_fd, &data, sizeof(data));
+ n = os_read_file_k(random_fd, &data, sizeof(data));
if(n > 0){
have_data = n;
while (have_data && size) {
if(n > 0){
have_data = n;
while (have_data && size) {
- n = os_read_file(thread_fd, &req, sizeof(req));
+ n = os_read_file_k(thread_fd, &req, sizeof(req));
if(n != sizeof(req)){
printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, "
"err = %d\n", os_getpid(), -n);
if(n != sizeof(req)){
printk(KERN_ERR "Pid %d - spurious interrupt in ubd_handler, "
"err = %d\n", os_getpid(), -n);
err = prepare_request(req, &io_req);
if(!err){
dev->active = 1;
err = prepare_request(req, &io_req);
if(!err){
dev->active = 1;
- n = os_write_file(thread_fd, (char *) &io_req,
- sizeof(io_req));
+ n = os_write_file_k(thread_fd, &io_req, sizeof(io_req));
if(n != sizeof(io_req))
printk("write to io thread failed, "
"errno = %d\n", -n);
if(n != sizeof(io_req))
printk("write to io thread failed, "
"errno = %d\n", -n);
- n = os_write_file(req->fds[1], &req->bitmap_words,
- sizeof(req->bitmap_words));
+ n = os_write_file_k(req->fds[1], &req->bitmap_words,
+ sizeof(req->bitmap_words));
if(n != sizeof(req->bitmap_words)){
printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
req->fds[1]);
if(n != sizeof(req->bitmap_words)){
printk("do_io - bitmap update failed, err = %d fd = %d\n", -n,
req->fds[1]);
do {
buf = &buf[n];
len -= n;
do {
buf = &buf[n];
len -= n;
- n = os_read_file(req->fds[bit], buf, len);
+ n = os_read_file_k(req->fds[bit], buf, len);
if (n < 0) {
printk("do_io - read failed, err = %d "
"fd = %d\n", -n, req->fds[bit]);
if (n < 0) {
printk("do_io - read failed, err = %d "
"fd = %d\n", -n, req->fds[bit]);
} while((n < len) && (n != 0));
if (n < len) memset(&buf[n], 0, len - n);
} else {
} while((n < len) && (n != 0));
if (n < len) memset(&buf[n], 0, len - n);
} else {
- n = os_write_file(req->fds[bit], buf, len);
+ n = os_write_file_k(req->fds[bit], buf, len);
if(n != len){
printk("do_io - write failed err = %d "
"fd = %d\n", -n, req->fds[bit]);
if(n != len){
printk("do_io - write failed err = %d "
"fd = %d\n", -n, req->fds[bit]);
ignore_sigwinch_sig();
while(1){
ignore_sigwinch_sig();
while(1){
- n = os_read_file(kernel_fd, &req, sizeof(req));
+ n = os_read_file_k(kernel_fd, &req, sizeof(req));
if(n != sizeof(req)){
if(n < 0)
printk("io_thread - read failed, fd = %d, "
if(n != sizeof(req)){
if(n < 0)
printk("io_thread - read failed, fd = %d, "
}
io_count++;
do_io(&req);
}
io_count++;
do_io(&req);
- n = os_write_file(kernel_fd, &req, sizeof(req));
+ n = os_write_file_k(kernel_fd, &req, sizeof(req));
if(n != sizeof(req))
printk("io_thread - write failed, fd = %d, err = %d\n",
kernel_fd, -n);
if(n != sizeof(req))
printk("io_thread - write failed, fd = %d, err = %d\n",
kernel_fd, -n);
extern int os_seek_file(int fd, __u64 offset);
extern int os_open_file(char *file, struct openflags flags, int mode);
extern int os_read_file(int fd, void *buf, int len);
extern int os_seek_file(int fd, __u64 offset);
extern int os_open_file(char *file, struct openflags flags, int mode);
extern int os_read_file(int fd, void *buf, int len);
+extern int os_read_file_k(int fd, void *buf, int len);
extern int os_write_file(int fd, const void *buf, int count);
extern int os_write_file(int fd, const void *buf, int count);
+extern int os_write_file_k(int fd, const void *buf, int len);
extern int os_file_size(char *file, unsigned long long *size_out);
extern int os_file_modtime(char *file, unsigned long *modtime);
extern int os_pipe(int *fd, int stream, int close_on_exec);
extern int os_file_size(char *file, unsigned long long *size_out);
extern int os_file_modtime(char *file, unsigned long *modtime);
extern int os_pipe(int *fd, int stream, int close_on_exec);
EXPORT_SYMBOL(os_set_exec_close);
EXPORT_SYMBOL(os_getpid);
EXPORT_SYMBOL(os_open_file);
EXPORT_SYMBOL(os_set_exec_close);
EXPORT_SYMBOL(os_getpid);
EXPORT_SYMBOL(os_open_file);
+EXPORT_SYMBOL(os_read_file_k);
EXPORT_SYMBOL(os_read_file);
EXPORT_SYMBOL(os_read_file);
+EXPORT_SYMBOL(os_write_file_k);
EXPORT_SYMBOL(os_write_file);
EXPORT_SYMBOL(os_seek_file);
EXPORT_SYMBOL(os_lock_file);
EXPORT_SYMBOL(os_write_file);
EXPORT_SYMBOL(os_seek_file);
EXPORT_SYMBOL(os_lock_file);
* from physmem_fd, so it needs to be written out there.
*/
os_seek_file(physmem_fd, __pa(&__syscall_stub_start));
* from physmem_fd, so it needs to be written out there.
*/
os_seek_file(physmem_fd, __pa(&__syscall_stub_start));
- os_write_file(physmem_fd, &__syscall_stub_start, PAGE_SIZE);
+ os_write_file_k(physmem_fd, &__syscall_stub_start, PAGE_SIZE);
bootmap_size = init_bootmem(pfn, pfn + delta);
free_bootmem(__pa(reserve_end) + bootmap_size,
bootmap_size = init_bootmem(pfn, pfn + delta);
free_bootmem(__pa(reserve_end) + bootmap_size,
- os_read_file(sigio_irq_fd, &c, sizeof(c));
+ os_read_file_k(sigio_irq_fd, &c, sizeof(c));
reactivate_fd(sigio_irq_fd, SIGIO_WRITE_IRQ);
return IRQ_HANDLED;
}
reactivate_fd(sigio_irq_fd, SIGIO_WRITE_IRQ);
return IRQ_HANDLED;
}
void smp_send_reschedule(int cpu)
{
void smp_send_reschedule(int cpu)
{
- os_write_file(cpu_data[cpu].ipi_pipe[1], "R", 1);
+ os_write_file_k(cpu_data[cpu].ipi_pipe[1], "R", 1);
num_reschedules_sent++;
}
num_reschedules_sent++;
}
for(i = 0; i < num_online_cpus(); i++){
if(i == current_thread->cpu)
continue;
for(i = 0; i < num_online_cpus(); i++){
if(i == current_thread->cpu)
continue;
- os_write_file(cpu_data[i].ipi_pipe[1], "S", 1);
+ os_write_file_k(cpu_data[i].ipi_pipe[1], "S", 1);
{ .pid = new_task->thread.mode.tt.extern_pid,
.task = new_task } );
idle_threads[cpu] = new_task;
{ .pid = new_task->thread.mode.tt.extern_pid,
.task = new_task } );
idle_threads[cpu] = new_task;
- CHOOSE_MODE(os_write_file(new_task->thread.mode.tt.switch_pipe[1], &c,
- sizeof(c)),
+ CHOOSE_MODE(os_write_file_k(new_task->thread.mode.tt.switch_pipe[1], &c,
+ sizeof(c)),
({ panic("skas mode doesn't support SMP"); }));
return(new_task);
}
({ panic("skas mode doesn't support SMP"); }));
return(new_task);
}
int fd;
fd = cpu_data[cpu].ipi_pipe[0];
int fd;
fd = cpu_data[cpu].ipi_pipe[0];
- while (os_read_file(fd, &c, 1) == 1) {
+ while (os_read_file_k(fd, &c, 1) == 1) {
switch (c) {
case 'C':
smp_call_function_slave(cpu);
switch (c) {
case 'C':
smp_call_function_slave(cpu);
info = _info;
for_each_online_cpu(i)
info = _info;
for_each_online_cpu(i)
- os_write_file(cpu_data[i].ipi_pipe[1], "C", 1);
+ os_write_file_k(cpu_data[i].ipi_pipe[1], "C", 1);
while (atomic_read(&scf_started) != cpus)
barrier();
while (atomic_read(&scf_started) != cpus)
barrier();
* nor the value in "to" (since it was the task which stole us the CPU,
* which we don't care about). */
* nor the value in "to" (since it was the task which stole us the CPU,
* which we don't care about). */
- err = os_write_file(to->thread.mode.tt.switch_pipe[1], &c, sizeof(c));
+ err = os_write_file_k(to->thread.mode.tt.switch_pipe[1], &c, sizeof(c));
if(err != sizeof(c))
panic("write of switch_pipe failed, err = %d", -err);
if(from->thread.mode.tt.switch_pipe[0] == -1)
os_kill_process(os_getpid(), 0);
if(err != sizeof(c))
panic("write of switch_pipe failed, err = %d", -err);
if(from->thread.mode.tt.switch_pipe[0] == -1)
os_kill_process(os_getpid(), 0);
- err = os_read_file(from->thread.mode.tt.switch_pipe[0], &c, sizeof(c));
+ err = os_read_file_k(from->thread.mode.tt.switch_pipe[0], &c,
+ sizeof(c));
if(err != sizeof(c))
panic("read of switch_pipe failed, errno = %d", -err);
if(err != sizeof(c))
panic("read of switch_pipe failed, errno = %d", -err);
char c;
os_stop_process(os_getpid());
char c;
os_stop_process(os_getpid());
- err = os_read_file(fd, &c, sizeof(c));
+ err = os_read_file_k(fd, &c, sizeof(c));
if(err != sizeof(c))
panic("read failed in suspend_new_thread, err = %d", -err);
}
if(err != sizeof(c))
panic("read failed in suspend_new_thread, err = %d", -err);
}
"err = %d\n", -fd);
exit(1);
}
"err = %d\n", -fd);
exit(1);
}
- os_write_file(fd, gdb_init_string, sizeof(gdb_init_string) - 1);
+ os_write_file_k(fd, gdb_init_string,
+ sizeof(gdb_init_string) - 1);
- os_write_file(fd, "b start_kernel\n",
- strlen("b start_kernel\n"));
+ os_write_file_k(fd, "b start_kernel\n",
+ strlen("b start_kernel\n"));
- os_write_file(fd, "c\n", strlen("c\n"));
+ os_write_file_k(fd, "c\n", strlen("c\n"));
}
if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0){
printk("start_debugger : PTRACE_TRACEME failed, "
}
if(ptrace(PTRACE_TRACEME, 0, 0, 0) < 0){
printk("start_debugger : PTRACE_TRACEME failed, "
- n = os_write_file(tracer_winch[1], &c, sizeof(c));
+ n = os_write_file_k(tracer_winch[1], &c, sizeof(c));
if(n != sizeof(c))
printk("tracer_winch_handler - write failed, err = %d\n", -n);
}
if(n != sizeof(c))
printk("tracer_winch_handler - write failed, err = %d\n", -n);
}
+int os_read_file_k(int fd, void *buf, int len)
+{
+ int n = read(fd, buf, len);
+
+ if(n < 0)
+ return -errno;
+ return n;
+}
+
int os_write_file(int fd, const void *buf, int len)
{
return file_io(fd, (void *) buf, len,
(int (*)(int, void *, int)) write, copy_to_user_proc);
}
int os_write_file(int fd, const void *buf, int len)
{
return file_io(fd, (void *) buf, len,
(int (*)(int, void *, int)) write, copy_to_user_proc);
}
+int os_write_file_k(int fd, const void *buf, int len)
+{
+ int n = write(fd, (void *) buf, len);
+
+ if(n < 0)
+ return -errno;
+ return n;
+}
+
int os_file_size(char *file, unsigned long long *size_out)
{
struct uml_stat buf;
int os_file_size(char *file, unsigned long long *size_out)
{
struct uml_stat buf;
- n = os_read_file(fd, &c, sizeof(c));
+ n = os_read_file_k(fd, &c, sizeof(c));
if(n != sizeof(c)){
printk("Failed to find newline in "
"/proc/cpuinfo, err = %d\n", -n);
if(n != sizeof(c)){
printk("Failed to find newline in "
"/proc/cpuinfo, err = %d\n", -n);
.u =
{ .copy_segments =
from_mm->id.u.mm_fd } } );
.u =
{ .copy_segments =
from_mm->id.u.mm_fd } } );
- i = os_write_file(new_mm->id.u.mm_fd, ©, sizeof(copy));
+ i = os_write_file_k(new_mm->id.u.mm_fd, ©, sizeof(copy));
if(i != sizeof(copy))
printk("new_mm : /proc/mm copy_segments failed, "
"err = %d\n", -i);
if(i != sizeof(copy))
printk("new_mm : /proc/mm copy_segments failed, "
"err = %d\n", -i);