OSDN Git Service

tcp: Clear probes_out more aggressively in tcp_ack().
[linux-kernel-docs/linux-2.4.36.git] / fs / exec.c
1 /*
2  *  linux/fs/exec.c
3  *
4  *  Copyright (C) 1991, 1992  Linus Torvalds
5  */
6
7 /*
8  * #!-checking implemented by tytso.
9  */
10 /*
11  * Demand-loading implemented 01.12.91 - no need to read anything but
12  * the header into memory. The inode of the executable is put into
13  * "current->executable", and page faults do the actual loading. Clean.
14  *
15  * Once more I can proudly say that linux stood up to being changed: it
16  * was less than 2 hours work to get demand-loading completely implemented.
17  *
18  * Demand loading changed July 1993 by Eric Youngdale.   Use mmap instead,
19  * current->executable is only used by the procfs.  This allows a dispatch
20  * table to check for several different types  of binary formats.  We keep
21  * trying until we recognize the file or we run out of supported binary
22  * formats. 
23  */
24
25 #include <linux/config.h>
26 #include <linux/slab.h>
27 #include <linux/file.h>
28 #include <linux/mman.h>
29 #include <linux/a.out.h>
30 #include <linux/stat.h>
31 #include <linux/fcntl.h>
32 #include <linux/smp_lock.h>
33 #include <linux/init.h>
34 #include <linux/pagemap.h>
35 #include <linux/highmem.h>
36 #include <linux/spinlock.h>
37 #include <linux/personality.h>
38 #include <linux/swap.h>
39 #include <linux/utsname.h>
40 #define __NO_VERSION__
41 #include <linux/module.h>
42
43 #include <asm/uaccess.h>
44 #include <asm/pgalloc.h>
45 #include <asm/mmu_context.h>
46
47 #ifdef CONFIG_KMOD
48 #include <linux/kmod.h>
49 #endif
50
51 int core_uses_pid;
52 char core_pattern[65] = "core";
53 int core_setuid_ok = 0;
54 /* The maximal length of core_pattern is also specified in sysctl.c */ 
55
56 static struct linux_binfmt *formats;
57 static rwlock_t binfmt_lock = RW_LOCK_UNLOCKED;
58
59 int register_binfmt(struct linux_binfmt * fmt)
60 {
61         struct linux_binfmt ** tmp = &formats;
62
63         if (!fmt)
64                 return -EINVAL;
65         if (fmt->next)
66                 return -EBUSY;
67         write_lock(&binfmt_lock);
68         while (*tmp) {
69                 if (fmt == *tmp) {
70                         write_unlock(&binfmt_lock);
71                         return -EBUSY;
72                 }
73                 tmp = &(*tmp)->next;
74         }
75         fmt->next = formats;
76         formats = fmt;
77         write_unlock(&binfmt_lock);
78         return 0;       
79 }
80
81 int unregister_binfmt(struct linux_binfmt * fmt)
82 {
83         struct linux_binfmt ** tmp = &formats;
84
85         write_lock(&binfmt_lock);
86         while (*tmp) {
87                 if (fmt == *tmp) {
88                         *tmp = fmt->next;
89                         write_unlock(&binfmt_lock);
90                         return 0;
91                 }
92                 tmp = &(*tmp)->next;
93         }
94         write_unlock(&binfmt_lock);
95         return -EINVAL;
96 }
97
98 static inline void put_binfmt(struct linux_binfmt * fmt)
99 {
100         if (fmt->module)
101                 __MOD_DEC_USE_COUNT(fmt->module);
102 }
103
104 /*
105  * Note that a shared library must be both readable and executable due to
106  * security reasons.
107  *
108  * Also note that we take the address to load from from the file itself.
109  */
110 asmlinkage long sys_uselib(const char * library)
111 {
112         struct file * file;
113         struct nameidata nd;
114         int error;
115
116         error = user_path_walk(library, &nd);
117         if (error)
118                 goto out;
119
120         error = -EINVAL;
121         if (!S_ISREG(nd.dentry->d_inode->i_mode))
122                 goto exit;
123
124         error = permission(nd.dentry->d_inode, MAY_READ | MAY_EXEC);
125         if (error)
126                 goto exit;
127
128         file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
129         error = PTR_ERR(file);
130         if (IS_ERR(file))
131                 goto out;
132
133         error = -ENOEXEC;
134         if(file->f_op && file->f_op->read) {
135                 struct linux_binfmt * fmt;
136
137                 read_lock(&binfmt_lock);
138                 for (fmt = formats ; fmt ; fmt = fmt->next) {
139                         if (!fmt->load_shlib)
140                                 continue;
141                         if (!try_inc_mod_count(fmt->module))
142                                 continue;
143                         read_unlock(&binfmt_lock);
144                         error = fmt->load_shlib(file);
145                         read_lock(&binfmt_lock);
146                         put_binfmt(fmt);
147                         if (error != -ENOEXEC)
148                                 break;
149                 }
150                 read_unlock(&binfmt_lock);
151         }
152         fput(file);
153 out:
154         return error;
155 exit:
156         path_release(&nd);
157         goto out;
158 }
159
160 /*
161  * count() counts the number of arguments/envelopes
162  */
163 static int count(char ** argv, int max)
164 {
165         int i = 0;
166
167         if (argv != NULL) {
168                 for (;;) {
169                         char * p;
170
171                         if (get_user(p, argv))
172                                 return -EFAULT;
173                         if (!p)
174                                 break;
175                         argv++;
176                         if(++i > max)
177                                 return -E2BIG;
178                 }
179         }
180         return i;
181 }
182
183 /*
184  * 'copy_strings()' copies argument/envelope strings from user
185  * memory to free pages in kernel mem. These are in a format ready
186  * to be put directly into the top of new user memory.
187  */
188 int copy_strings(int argc,char ** argv, struct linux_binprm *bprm) 
189 {
190         struct page *kmapped_page = NULL;
191         char *kaddr = NULL;
192         int ret;
193
194         while (argc-- > 0) {
195                 char *str;
196                 int len;
197                 unsigned long pos;
198
199                 if (get_user(str, argv+argc) ||
200                                 !(len = strnlen_user(str, bprm->p))) {
201                         ret = -EFAULT;
202                         goto out;
203                 }
204
205                 if (bprm->p < len)  {
206                         ret = -E2BIG;
207                         goto out;
208                 }
209
210                 bprm->p -= len;
211                 /* XXX: add architecture specific overflow check here. */ 
212                 pos = bprm->p;
213
214                 while (len > 0) {
215                         int i, new, err;
216                         int offset, bytes_to_copy;
217                         struct page *page;
218
219                         offset = pos % PAGE_SIZE;
220                         i = pos/PAGE_SIZE;
221                         page = bprm->page[i];
222                         new = 0;
223                         if (!page) {
224                                 page = alloc_page(GFP_HIGHUSER);
225                                 bprm->page[i] = page;
226                                 if (!page) {
227                                         ret = -ENOMEM;
228                                         goto out;
229                                 }
230                                 new = 1;
231                         }
232
233                         if (page != kmapped_page) {
234                                 if (kmapped_page)
235                                         kunmap(kmapped_page);
236                                 kmapped_page = page;
237                                 kaddr = kmap(kmapped_page);
238                         }
239                         if (new && offset)
240                                 memset(kaddr, 0, offset);
241                         bytes_to_copy = PAGE_SIZE - offset;
242                         if (bytes_to_copy > len) {
243                                 bytes_to_copy = len;
244                                 if (new)
245                                         memset(kaddr+offset+len, 0,
246                                                 PAGE_SIZE-offset-len);
247                         }
248                         err = copy_from_user(kaddr+offset, str, bytes_to_copy);
249                         if (err) {
250                                 ret = -EFAULT;
251                                 goto out;
252                         }
253
254                         pos += bytes_to_copy;
255                         str += bytes_to_copy;
256                         len -= bytes_to_copy;
257                 }
258         }
259         ret = 0;
260 out:
261         if (kmapped_page)
262                 kunmap(kmapped_page);
263         return ret;
264 }
265
266 /*
267  * Like copy_strings, but get argv and its values from kernel memory.
268  */
269 int copy_strings_kernel(int argc,char ** argv, struct linux_binprm *bprm)
270 {
271         int r;
272         mm_segment_t oldfs = get_fs();
273         set_fs(KERNEL_DS); 
274         r = copy_strings(argc, argv, bprm);
275         set_fs(oldfs);
276         return r; 
277 }
278
279 /*
280  * This routine is used to map in a page into an address space: needed by
281  * execve() for the initial stack and environment pages.
282  *
283  * tsk->mmap_sem is held for writing.
284  */
285 void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long address)
286 {
287         pgd_t * pgd;
288         pmd_t * pmd;
289         pte_t * pte;
290         struct vm_area_struct *vma; 
291         pgprot_t prot = PAGE_COPY; 
292
293         if (page_count(page) != 1)
294                 printk(KERN_ERR "mem_map disagrees with %p at %08lx\n", page, address);
295         pgd = pgd_offset(tsk->mm, address);
296
297         spin_lock(&tsk->mm->page_table_lock);
298         pmd = pmd_alloc(tsk->mm, pgd, address);
299         if (!pmd)
300                 goto out;
301         pte = pte_alloc(tsk->mm, pmd, address);
302         if (!pte)
303                 goto out;
304         if (!pte_none(*pte))
305                 goto out;
306         lru_cache_add(page);
307         flush_dcache_page(page);
308         flush_page_to_ram(page);
309         /* lookup is cheap because there is only a single entry in the list */
310         vma = find_vma(tsk->mm, address); 
311         if (vma) 
312                 prot = vma->vm_page_prot;
313         set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, prot))));
314         tsk->mm->rss++;
315         spin_unlock(&tsk->mm->page_table_lock);
316
317         /* no need for flush_tlb */
318         return;
319 out:
320         spin_unlock(&tsk->mm->page_table_lock);
321         __free_page(page);
322         force_sig(SIGKILL, tsk);
323         return;
324 }
325
326 int setup_arg_pages(struct linux_binprm *bprm)
327 {
328         unsigned long stack_base;
329         struct vm_area_struct *mpnt;
330         int i, ret;
331
332         stack_base = STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE;
333
334         bprm->p += stack_base;
335         if (bprm->loader)
336                 bprm->loader += stack_base;
337         bprm->exec += stack_base;
338
339         mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
340         if (!mpnt) 
341                 return -ENOMEM; 
342         
343         down_write(&current->mm->mmap_sem);
344         {
345                 mpnt->vm_mm = current->mm;
346                 mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
347                 mpnt->vm_end = STACK_TOP;
348                 mpnt->vm_flags = VM_STACK_FLAGS;
349                 mpnt->vm_page_prot = protection_map[VM_STACK_FLAGS & 0x7];
350                 mpnt->vm_ops = NULL;
351                 mpnt->vm_pgoff = 0;
352                 mpnt->vm_file = NULL;
353                 mpnt->vm_private_data = (void *) 0;
354                 if ((ret = insert_vm_struct(current->mm, mpnt))) {
355                         up_write(&current->mm->mmap_sem);
356                         kmem_cache_free(vm_area_cachep, mpnt);
357                         return ret;
358                 }
359                 current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
360         } 
361
362         for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
363                 struct page *page = bprm->page[i];
364                 if (page) {
365                         bprm->page[i] = NULL;
366                         put_dirty_page(current,page,stack_base);
367                 }
368                 stack_base += PAGE_SIZE;
369         }
370         up_write(&current->mm->mmap_sem);
371         
372         return 0;
373 }
374
375 struct file *open_exec(const char *name)
376 {
377         struct nameidata nd;
378         struct inode *inode;
379         struct file *file;
380         int err = 0;
381
382         err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
383         file = ERR_PTR(err);
384         if (!err) {
385                 inode = nd.dentry->d_inode;
386                 file = ERR_PTR(-EACCES);
387                 if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
388                     S_ISREG(inode->i_mode)) {
389                         int err = permission(inode, MAY_EXEC);
390                         if (!err && !(inode->i_mode & 0111))
391                                 err = -EACCES;
392                         file = ERR_PTR(err);
393                         if (!err) {
394                                 file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
395                                 if (!IS_ERR(file)) {
396                                         err = deny_write_access(file);
397                                         if (err) {
398                                                 fput(file);
399                                                 file = ERR_PTR(err);
400                                         }
401                                 }
402 out:
403                                 return file;
404                         }
405                 }
406                 path_release(&nd);
407         }
408         goto out;
409 }
410
411 int kernel_read(struct file *file, unsigned long offset,
412         char * addr, unsigned long count)
413 {
414         mm_segment_t old_fs;
415         loff_t pos = offset;
416         int result = -ENOSYS;
417
418         if (!file->f_op->read)
419                 goto fail;
420         old_fs = get_fs();
421         set_fs(get_ds());
422         result = file->f_op->read(file, addr, count, &pos);
423         set_fs(old_fs);
424 fail:
425         return result;
426 }
427
428 static int exec_mmap(void)
429 {
430         struct mm_struct * mm, * old_mm;
431
432         old_mm = current->mm;
433
434         if (old_mm && atomic_read(&old_mm->mm_users) == 1) {
435                 mm_release();
436                 down_write(&old_mm->mmap_sem);
437                 exit_mmap(old_mm);
438                 up_write(&old_mm->mmap_sem);
439                 return 0;
440         }
441
442
443         mm = mm_alloc();
444         if (mm) {
445                 struct mm_struct *active_mm;
446
447                 if (init_new_context(current, mm)) {
448                         mmdrop(mm);
449                         return -ENOMEM;
450                 }
451
452                 /* Add it to the list of mm's */
453                 spin_lock(&mmlist_lock);
454                 list_add(&mm->mmlist, &init_mm.mmlist);
455                 mmlist_nr++;
456                 spin_unlock(&mmlist_lock);
457
458                 task_lock(current);
459                 active_mm = current->active_mm;
460                 current->mm = mm;
461                 current->active_mm = mm;
462                 task_unlock(current);
463                 activate_mm(active_mm, mm);
464                 mm_release();
465                 if (old_mm) {
466                         if (active_mm != old_mm) BUG();
467                         mmput(old_mm);
468                         return 0;
469                 }
470                 mmdrop(active_mm);
471                 return 0;
472         }
473         return -ENOMEM;
474 }
475
476 /*
477  * This function makes sure the current process has its own signal table,
478  * so that flush_signal_handlers can later reset the handlers without
479  * disturbing other processes.  (Other processes might share the signal
480  * table via the CLONE_SIGNAL option to clone().)
481  */
482  
483 static inline int make_private_signals(void)
484 {
485         struct signal_struct * newsig;
486
487         if (atomic_read(&current->sig->count) <= 1)
488                 return 0;
489         newsig = kmem_cache_alloc(sigact_cachep, GFP_KERNEL);
490         if (newsig == NULL)
491                 return -ENOMEM;
492         spin_lock_init(&newsig->siglock);
493         atomic_set(&newsig->count, 1);
494         memcpy(newsig->action, current->sig->action, sizeof(newsig->action));
495         spin_lock_irq(&current->sigmask_lock);
496         current->sig = newsig;
497         spin_unlock_irq(&current->sigmask_lock);
498         return 0;
499 }
500         
501 /*
502  * If make_private_signals() made a copy of the signal table, decrement the
503  * refcount of the original table, and free it if necessary.
504  * We don't do that in make_private_signals() so that we can back off
505  * in flush_old_exec() if an error occurs after calling make_private_signals().
506  */
507
508 static inline void release_old_signals(struct signal_struct * oldsig)
509 {
510         if (current->sig == oldsig)
511                 return;
512         if (atomic_dec_and_test(&oldsig->count))
513                 kmem_cache_free(sigact_cachep, oldsig);
514 }
515
516 /*
517  * These functions flushes out all traces of the currently running executable
518  * so that a new one can be started
519  */
520
521 static inline void flush_old_files(struct files_struct * files)
522 {
523         long j = -1;
524
525         write_lock(&files->file_lock);
526         for (;;) {
527                 unsigned long set, i;
528
529                 j++;
530                 i = j * __NFDBITS;
531                 if (i >= files->max_fds || i >= files->max_fdset)
532                         break;
533                 set = files->close_on_exec->fds_bits[j];
534                 if (!set)
535                         continue;
536                 files->close_on_exec->fds_bits[j] = 0;
537                 write_unlock(&files->file_lock);
538                 for ( ; set ; i++,set >>= 1) {
539                         if (set & 1) {
540                                 sys_close(i);
541                         }
542                 }
543                 write_lock(&files->file_lock);
544
545         }
546         write_unlock(&files->file_lock);
547 }
548
549 /*
550  * An execve() will automatically "de-thread" the process.
551  * Note: we don't have to hold the tasklist_lock to test
552  * whether we migth need to do this. If we're not part of
553  * a thread group, there is no way we can become one
554  * dynamically. And if we are, we only need to protect the
555  * unlink - even if we race with the last other thread exit,
556  * at worst the list_del_init() might end up being a no-op.
557  */
558 static inline void de_thread(struct task_struct *tsk)
559 {
560         if (!list_empty(&tsk->thread_group)) {
561                 write_lock_irq(&tasklist_lock);
562                 list_del_init(&tsk->thread_group);
563                 write_unlock_irq(&tasklist_lock);
564         }
565
566         /* Minor oddity: this might stay the same. */
567         tsk->tgid = tsk->pid;
568 }
569
570 void get_task_comm(char *buf, struct task_struct *tsk)
571 {
572         /* buf must be at least sizeof(tsk->comm) in size */
573         task_lock(tsk);
574         memcpy(buf, tsk->comm, sizeof(tsk->comm));
575         task_unlock(tsk);
576 }
577
578 void set_task_comm(struct task_struct *tsk, char *buf)
579 {
580         task_lock(tsk);
581         strncpy(tsk->comm, buf, sizeof(tsk->comm));
582         tsk->comm[sizeof(tsk->comm)-1]='\0';
583         task_unlock(tsk);
584 }
585
586 int flush_old_exec(struct linux_binprm * bprm)
587 {
588         char * name;
589         int i, ch, retval;
590         unsigned new_mm_dumpable;
591         struct signal_struct * oldsig;
592         struct files_struct * files;
593         char tcomm[sizeof(current->comm)];
594
595         /*
596          * Make sure we have a private signal table
597          */
598         oldsig = current->sig;
599         retval = make_private_signals();
600         if (retval) goto flush_failed;
601
602         /*
603          * Make sure we have private file handles. Ask the
604          * fork helper to do the work for us and the exit
605          * helper to do the cleanup of the old one.
606          */
607          
608         files = current->files;         /* refcounted so safe to hold */
609         retval = unshare_files();
610         if(retval)
611                 goto flush_failed;
612         
613         /* 
614          * Release all of the old mmap stuff
615          */
616         retval = exec_mmap();
617         if (retval) goto mmap_failed;
618
619         /* This is the point of no return */
620         steal_locks(files);
621         put_files_struct(files);
622         release_old_signals(oldsig);
623
624         current->sas_ss_sp = current->sas_ss_size = 0;
625
626         new_mm_dumpable = 0; /* no change */
627         if (current->euid == current->uid && current->egid == current->gid) {
628                 new_mm_dumpable = 1;
629                 current->task_dumpable = 1;
630         }
631
632         name = bprm->filename;
633         for (i=0; (ch = *(name++)) != '\0';) {
634                 if (ch == '/')
635                         i = 0;
636                 else
637                         if (i < (sizeof(tcomm) - 1))
638                                 tcomm[i++] = ch;
639         }
640         tcomm[i] = '\0';
641         set_task_comm(current, tcomm);
642
643         flush_thread();
644
645         de_thread(current);
646
647         if (bprm->e_uid != current->euid || bprm->e_gid != current->egid) {
648                 current->mm->dumpable = 0;
649                 current->pdeath_signal = 0;
650         } else if (permission(bprm->file->f_dentry->d_inode, MAY_READ)) {
651                 current->mm->dumpable = 0;
652         } else if (new_mm_dumpable)
653                 current->mm->dumpable = 1;
654
655         /* An exec changes our domain. We are no longer part of the thread
656            group */
657            
658         current->self_exec_id++;
659                         
660         flush_signal_handlers(current);
661         flush_old_files(current->files);
662
663         return 0;
664
665 mmap_failed:
666         put_files_struct(current->files);
667         current->files = files;
668 flush_failed:
669         spin_lock_irq(&current->sigmask_lock);
670         if (current->sig != oldsig) {
671                 kmem_cache_free(sigact_cachep, current->sig);
672                 current->sig = oldsig;
673         }
674         spin_unlock_irq(&current->sigmask_lock);
675         return retval;
676 }
677
678 /*
679  * We mustn't allow tracing of suid binaries, unless
680  * the tracer has the capability to trace anything..
681  */
682 static inline int must_not_trace_exec(struct task_struct * p)
683 {
684         return (p->ptrace & PT_PTRACED) && !(p->ptrace & PT_PTRACE_CAP);
685 }
686
687 /* 
688  * Fill the binprm structure from the inode. 
689  * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
690  */
691 int prepare_binprm(struct linux_binprm *bprm)
692 {
693         int mode;
694         struct inode * inode = bprm->file->f_dentry->d_inode;
695
696         mode = inode->i_mode;
697         /*
698          * Check execute perms again - if the caller has CAP_DAC_OVERRIDE,
699          * vfs_permission lets a non-executable through
700          */
701         if (!(mode & 0111))     /* with at least _one_ execute bit set */
702                 return -EACCES;
703         if (bprm->file->f_op == NULL)
704                 return -EACCES;
705
706         bprm->e_uid = current->euid;
707         bprm->e_gid = current->egid;
708
709         if(!(bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID)) {
710                 /* Set-uid? */
711                 if (mode & S_ISUID)
712                         bprm->e_uid = inode->i_uid;
713
714                 /* Set-gid? */
715                 /*
716                  * If setgid is set but no group execute bit then this
717                  * is a candidate for mandatory locking, not a setgid
718                  * executable.
719                  */
720                 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))
721                         bprm->e_gid = inode->i_gid;
722         }
723
724         /* We don't have VFS support for capabilities yet */
725         cap_clear(bprm->cap_inheritable);
726         cap_clear(bprm->cap_permitted);
727         cap_clear(bprm->cap_effective);
728
729         /*  To support inheritance of root-permissions and suid-root
730          *  executables under compatibility mode, we raise all three
731          *  capability sets for the file.
732          *
733          *  If only the real uid is 0, we only raise the inheritable
734          *  and permitted sets of the executable file.
735          */
736
737         if (!issecure(SECURE_NOROOT)) {
738                 if (bprm->e_uid == 0 || current->uid == 0) {
739                         cap_set_full(bprm->cap_inheritable);
740                         cap_set_full(bprm->cap_permitted);
741                 }
742                 if (bprm->e_uid == 0) 
743                         cap_set_full(bprm->cap_effective);
744         }
745
746         memset(bprm->buf,0,BINPRM_BUF_SIZE);
747         return kernel_read(bprm->file,0,bprm->buf,BINPRM_BUF_SIZE);
748 }
749
750 /*
751  * This function is used to produce the new IDs and capabilities
752  * from the old ones and the file's capabilities.
753  *
754  * The formula used for evolving capabilities is:
755  *
756  *       pI' = pI
757  * (***) pP' = (fP & X) | (fI & pI)
758  *       pE' = pP' & fE          [NB. fE is 0 or ~0]
759  *
760  * I=Inheritable, P=Permitted, E=Effective // p=process, f=file
761  * ' indicates post-exec(), and X is the global 'cap_bset'.
762  *
763  */
764
765 void compute_creds(struct linux_binprm *bprm) 
766 {
767         kernel_cap_t new_permitted, working;
768         int do_unlock = 0;
769
770         new_permitted = cap_intersect(bprm->cap_permitted, cap_bset);
771         working = cap_intersect(bprm->cap_inheritable,
772                                 current->cap_inheritable);
773         new_permitted = cap_combine(new_permitted, working);
774
775         if (bprm->e_uid != current->uid || bprm->e_gid != current->gid ||
776             !cap_issubset(new_permitted, current->cap_permitted)) {
777                 current->mm->dumpable = 0;
778                 current->pdeath_signal = 0;
779                 
780                 lock_kernel();
781                 if (must_not_trace_exec(current)
782                     || atomic_read(&current->fs->count) > 1
783                     || atomic_read(&current->files->count) > 1
784                     || atomic_read(&current->sig->count) > 1) {
785                         if(!capable(CAP_SETUID)) {
786                                 bprm->e_uid = current->uid;
787                                 bprm->e_gid = current->gid;
788                         }
789                         if(!capable(CAP_SETPCAP)) {
790                                 new_permitted = cap_intersect(new_permitted,
791                                                         current->cap_permitted);
792                         }
793                 }
794                 do_unlock = 1;
795         }
796
797
798         /* For init, we want to retain the capabilities set
799          * in the init_task struct. Thus we skip the usual
800          * capability rules */
801         if (current->pid != 1) {
802                 current->cap_permitted = new_permitted;
803                 current->cap_effective =
804                         cap_intersect(new_permitted, bprm->cap_effective);
805         }
806         
807         /* AUD: Audit candidate if current->cap_effective is set */
808
809         current->suid = current->euid = current->fsuid = bprm->e_uid;
810         current->sgid = current->egid = current->fsgid = bprm->e_gid;
811
812         if(do_unlock)
813                 unlock_kernel();
814         current->keep_capabilities = 0;
815 }
816
817
818 void remove_arg_zero(struct linux_binprm *bprm)
819 {
820         if (bprm->argc) {
821                 unsigned long offset;
822                 char * kaddr;
823                 struct page *page;
824
825                 offset = bprm->p % PAGE_SIZE;
826                 goto inside;
827
828                 while (bprm->p++, *(kaddr+offset++)) {
829                         if (offset != PAGE_SIZE)
830                                 continue;
831                         offset = 0;
832                         kunmap(page);
833 inside:
834                         page = bprm->page[bprm->p/PAGE_SIZE];
835                         kaddr = kmap(page);
836                 }
837                 kunmap(page);
838                 bprm->argc--;
839         }
840 }
841
842 /*
843  * cycle the list of binary formats handler, until one recognizes the image
844  */
845 int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
846 {
847         int try,retval=0;
848         struct linux_binfmt *fmt;
849 #ifdef __alpha__
850         /* handle /sbin/loader.. */
851         {
852             struct exec * eh = (struct exec *) bprm->buf;
853
854             if (!bprm->loader && eh->fh.f_magic == 0x183 &&
855                 (eh->fh.f_flags & 0x3000) == 0x3000)
856             {
857                 struct file * file;
858                 unsigned long loader;
859
860                 allow_write_access(bprm->file);
861                 fput(bprm->file);
862                 bprm->file = NULL;
863
864                 loader = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
865
866                 file = open_exec("/sbin/loader");
867                 retval = PTR_ERR(file);
868                 if (IS_ERR(file))
869                         return retval;
870
871                 /* Remember if the application is TASO.  */
872                 bprm->sh_bang = eh->ah.entry < 0x100000000;
873
874                 bprm->file = file;
875                 bprm->loader = loader;
876                 retval = prepare_binprm(bprm);
877                 if (retval<0)
878                         return retval;
879                 /* should call search_binary_handler recursively here,
880                    but it does not matter */
881             }
882         }
883 #endif
884         /* kernel module loader fixup */
885         /* so we don't try to load run modprobe in kernel space. */
886         set_fs(USER_DS);
887         for (try=0; try<2; try++) {
888                 read_lock(&binfmt_lock);
889                 for (fmt = formats ; fmt ; fmt = fmt->next) {
890                         int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
891                         if (!fn)
892                                 continue;
893                         if (!try_inc_mod_count(fmt->module))
894                                 continue;
895                         read_unlock(&binfmt_lock);
896                         retval = fn(bprm, regs);
897                         if (retval >= 0) {
898                                 put_binfmt(fmt);
899                                 allow_write_access(bprm->file);
900                                 if (bprm->file)
901                                         fput(bprm->file);
902                                 bprm->file = NULL;
903                                 current->did_exec = 1;
904                                 return retval;
905                         }
906                         read_lock(&binfmt_lock);
907                         put_binfmt(fmt);
908                         if (retval != -ENOEXEC)
909                                 break;
910                         if (!bprm->file) {
911                                 read_unlock(&binfmt_lock);
912                                 return retval;
913                         }
914                 }
915                 read_unlock(&binfmt_lock);
916                 if (retval != -ENOEXEC) {
917                         break;
918 #ifdef CONFIG_KMOD
919                 }else{
920 #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
921                         char modname[20];
922                         if (printable(bprm->buf[0]) &&
923                             printable(bprm->buf[1]) &&
924                             printable(bprm->buf[2]) &&
925                             printable(bprm->buf[3]))
926                                 break; /* -ENOEXEC */
927                         sprintf(modname, "binfmt-%04x", *(unsigned short *)(&bprm->buf[2]));
928                         request_module(modname);
929 #endif
930                 }
931         }
932         return retval;
933 }
934
935
936 /*
937  * sys_execve() executes a new program.
938  */
939 int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs)
940 {
941         struct linux_binprm bprm;
942         struct file *file;
943         int retval;
944         int i;
945
946         file = open_exec(filename);
947
948         retval = PTR_ERR(file);
949         if (IS_ERR(file))
950                 return retval;
951
952         bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
953         memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0])); 
954
955         bprm.file = file;
956         bprm.filename = filename;
957         bprm.sh_bang = 0;
958         bprm.loader = 0;
959         bprm.exec = 0;
960         if ((bprm.argc = count(argv, bprm.p / sizeof(void *))) < 0) {
961                 allow_write_access(file);
962                 fput(file);
963                 return bprm.argc;
964         }
965
966         if ((bprm.envc = count(envp, bprm.p / sizeof(void *))) < 0) {
967                 allow_write_access(file);
968                 fput(file);
969                 return bprm.envc;
970         }
971
972         retval = prepare_binprm(&bprm);
973         if (retval < 0) 
974                 goto out; 
975
976         retval = copy_strings_kernel(1, &bprm.filename, &bprm);
977         if (retval < 0) 
978                 goto out; 
979
980         bprm.exec = bprm.p;
981         retval = copy_strings(bprm.envc, envp, &bprm);
982         if (retval < 0) 
983                 goto out; 
984
985         retval = copy_strings(bprm.argc, argv, &bprm);
986         if (retval < 0) 
987                 goto out; 
988
989         retval = search_binary_handler(&bprm,regs);
990         if (retval >= 0)
991                 /* execve success */
992                 return retval;
993
994 out:
995         /* Something went wrong, return the inode and free the argument pages*/
996         allow_write_access(bprm.file);
997         if (bprm.file)
998                 fput(bprm.file);
999
1000         for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
1001                 struct page * page = bprm.page[i];
1002                 if (page)
1003                         __free_page(page);
1004         }
1005
1006         return retval;
1007 }
1008
1009 void set_binfmt(struct linux_binfmt *new)
1010 {
1011         struct linux_binfmt *old = current->binfmt;
1012         if (new && new->module)
1013                 __MOD_INC_USE_COUNT(new->module);
1014         current->binfmt = new;
1015         if (old && old->module)
1016                 __MOD_DEC_USE_COUNT(old->module);
1017 }
1018
1019 #define CORENAME_MAX_SIZE 64
1020
1021 /* format_corename will inspect the pattern parameter, and output a
1022  * name into corename, which must have space for at least
1023  * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
1024  */
1025 void format_corename(char *corename, const char *pattern, long signr)
1026 {
1027         const char *pat_ptr = pattern;
1028         char *out_ptr = corename;
1029         char *const out_end = corename + CORENAME_MAX_SIZE;
1030         int rc;
1031         int pid_in_pattern = 0;
1032
1033         /* Repeat as long as we have more pattern to process and more output
1034            space */
1035         while (*pat_ptr) {
1036                 if (*pat_ptr != '%') {
1037                         if (out_ptr == out_end)
1038                                 goto out;
1039                         *out_ptr++ = *pat_ptr++;
1040                 } else {
1041                         switch (*++pat_ptr) {
1042                         case 0:
1043                                 goto out;
1044                         /* Double percent, output one percent */
1045                         case '%':
1046                                 if (out_ptr == out_end)
1047                                         goto out;
1048                                 *out_ptr++ = '%';
1049                                 break;
1050                         /* pid */
1051                         case 'p':
1052                                 pid_in_pattern = 1;
1053                                 rc = snprintf(out_ptr, out_end - out_ptr,
1054                                               "%d", current->pid);
1055                                 if (rc > out_end - out_ptr)
1056                                         goto out;
1057                                 out_ptr += rc;
1058                                 break;
1059                         /* uid */
1060                         case 'u':
1061                                 rc = snprintf(out_ptr, out_end - out_ptr,
1062                                               "%d", current->uid);
1063                                 if (rc > out_end - out_ptr)
1064                                         goto out;
1065                                 out_ptr += rc;
1066                                 break;
1067                         /* gid */
1068                         case 'g':
1069                                 rc = snprintf(out_ptr, out_end - out_ptr,
1070                                               "%d", current->gid);
1071                                 if (rc > out_end - out_ptr)
1072                                         goto out;
1073                                 out_ptr += rc;
1074                                 break;
1075                         /* signal that caused the coredump */
1076                         case 's':
1077                                 rc = snprintf(out_ptr, out_end - out_ptr,
1078                                               "%ld", signr);
1079                                 if (rc > out_end - out_ptr)
1080                                         goto out;
1081                                 out_ptr += rc;
1082                                 break;
1083                         /* UNIX time of coredump */
1084                         case 't': {
1085                                 struct timeval tv;
1086                                 do_gettimeofday(&tv);
1087                                 rc = snprintf(out_ptr, out_end - out_ptr,
1088                                               "%ld", tv.tv_sec);
1089                                 if (rc > out_end - out_ptr)
1090                                         goto out;
1091                                 out_ptr += rc;
1092                                 break;
1093                         }
1094                         /* hostname */
1095                         case 'h':
1096                                 down_read(&uts_sem);
1097                                 rc = snprintf(out_ptr, out_end - out_ptr,
1098                                               "%s", system_utsname.nodename);
1099                                 up_read(&uts_sem);
1100                                 if (rc > out_end - out_ptr)
1101                                         goto out;
1102                                 out_ptr += rc;
1103                                 break;
1104                         /* executable */
1105                         case 'e':
1106                                 rc = snprintf(out_ptr, out_end - out_ptr,
1107                                               "%s", current->comm);
1108                                 if (rc > out_end - out_ptr)
1109                                         goto out;
1110                                 out_ptr += rc;
1111                                 break;
1112                         default:
1113                                 break;
1114                         }
1115                         ++pat_ptr;
1116                 }
1117         }
1118         /* Backward compatibility with core_uses_pid:
1119          *
1120          * If core_pattern does not include a %p (as is the default)
1121          * and core_uses_pid is set, then .%pid will be appended to
1122          * the filename */
1123         if (!pid_in_pattern
1124             && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) {
1125                 rc = snprintf(out_ptr, out_end - out_ptr,
1126                               ".%d", current->pid);
1127                 if (rc > out_end - out_ptr)
1128                         goto out;
1129                 out_ptr += rc;
1130         }
1131       out:
1132         *out_ptr = 0;
1133 }
1134
1135 int do_coredump(long signr, struct pt_regs * regs)
1136 {
1137         struct linux_binfmt * binfmt;
1138         char corename[CORENAME_MAX_SIZE + 1];
1139         struct file * file;
1140         struct inode * inode;
1141         int retval = 0;
1142         int fsuid = current->fsuid;
1143
1144         lock_kernel();
1145         binfmt = current->binfmt;
1146         if (!binfmt || !binfmt->core_dump)
1147                 goto fail;
1148         if (!is_dumpable(current))
1149         {
1150                 if(!core_setuid_ok || !current->task_dumpable)
1151                         goto fail;
1152                 current->fsuid = 0;
1153         }
1154         current->mm->dumpable = 0;
1155         if (current->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump)
1156                 goto fail;
1157
1158         format_corename(corename, core_pattern, signr);
1159         file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW, 0600);
1160         if (IS_ERR(file))
1161                 goto fail;
1162         inode = file->f_dentry->d_inode;
1163         if (inode->i_nlink > 1)
1164                 goto close_fail;        /* multiple links - don't dump */
1165         if (d_unhashed(file->f_dentry))
1166                 goto close_fail;
1167
1168         if (!S_ISREG(inode->i_mode))
1169                 goto close_fail;
1170         /*
1171          * Dont allow local users get cute and trick others to coredump
1172          * into their pre-created files:
1173          */
1174         if (inode->i_uid != current->fsuid)
1175                 goto close_fail;
1176         if (!file->f_op)
1177                 goto close_fail;
1178         if (!file->f_op->write)
1179                 goto close_fail;
1180         if (do_truncate(file->f_dentry, 0) != 0)
1181                 goto close_fail;
1182
1183         retval = binfmt->core_dump(signr, regs, file);
1184
1185 close_fail:
1186         filp_close(file, NULL);
1187 fail:
1188         if (fsuid != current->fsuid)
1189                 current->fsuid = fsuid;
1190         unlock_kernel();
1191         return retval;
1192 }