OSDN Git Service

tcp: Clear probes_out more aggressively in tcp_ack().
[linux-kernel-docs/linux-2.4.36.git] / mm / vmalloc.c
1 /*
2  *  linux/mm/vmalloc.c
3  *
4  *  Copyright (C) 1993  Linus Torvalds
5  *  Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999
6  *  SMP-safe vmalloc/vfree/ioremap, Tigran Aivazian <tigran@veritas.com>, May 2000
7  */
8
9 #include <linux/config.h>
10 #include <linux/slab.h>
11 #include <linux/vmalloc.h>
12 #include <linux/spinlock.h>
13 #include <linux/highmem.h>
14 #include <linux/smp_lock.h>
15
16 #include <asm/uaccess.h>
17 #include <asm/pgalloc.h>
18
19 rwlock_t vmlist_lock = RW_LOCK_UNLOCKED;
20 struct vm_struct * vmlist;
21
22 static inline void free_area_pte(pmd_t * pmd, unsigned long address, unsigned long size)
23 {
24         pte_t * pte;
25         unsigned long end;
26
27         if (pmd_none(*pmd))
28                 return;
29         if (pmd_bad(*pmd)) {
30                 pmd_ERROR(*pmd);
31                 pmd_clear(pmd);
32                 return;
33         }
34         pte = pte_offset(pmd, address);
35         address &= ~PMD_MASK;
36         end = address + size;
37         if (end > PMD_SIZE)
38                 end = PMD_SIZE;
39         do {
40                 pte_t page;
41                 page = ptep_get_and_clear(pte);
42                 address += PAGE_SIZE;
43                 pte++;
44                 if (pte_none(page))
45                         continue;
46                 if (pte_present(page)) {
47                         struct page *ptpage = pte_page(page);
48                         if (VALID_PAGE(ptpage) && (!PageReserved(ptpage)))
49                                 __free_page(ptpage);
50                         continue;
51                 }
52                 printk(KERN_CRIT "Whee.. Swapped out page in kernel page table\n");
53         } while (address < end);
54 }
55
56 static inline void free_area_pmd(pgd_t * dir, unsigned long address, unsigned long size)
57 {
58         pmd_t * pmd;
59         unsigned long end;
60
61         if (pgd_none(*dir))
62                 return;
63         if (pgd_bad(*dir)) {
64                 pgd_ERROR(*dir);
65                 pgd_clear(dir);
66                 return;
67         }
68         pmd = pmd_offset(dir, address);
69         address &= ~PGDIR_MASK;
70         end = address + size;
71         if (end > PGDIR_SIZE)
72                 end = PGDIR_SIZE;
73         do {
74                 free_area_pte(pmd, address, end - address);
75                 address = (address + PMD_SIZE) & PMD_MASK;
76                 pmd++;
77         } while (address < end);
78 }
79
80 void vmfree_area_pages(unsigned long address, unsigned long size)
81 {
82         pgd_t * dir;
83         unsigned long end = address + size;
84
85         dir = pgd_offset_k(address);
86         flush_cache_all();
87         do {
88                 free_area_pmd(dir, address, end - address);
89                 address = (address + PGDIR_SIZE) & PGDIR_MASK;
90                 dir++;
91         } while (address && (address < end));
92         flush_tlb_all();
93 }
94
95 static inline int alloc_area_pte (pte_t * pte, unsigned long address,
96                         unsigned long size, int gfp_mask,
97                         pgprot_t prot, struct page ***pages)
98 {
99         unsigned long end;
100
101         address &= ~PMD_MASK;
102         end = address + size;
103         if (end > PMD_SIZE)
104                 end = PMD_SIZE;
105         do {
106                 struct page * page;
107
108                 if (!pages) {
109                         spin_unlock(&init_mm.page_table_lock);
110                         page = alloc_page(gfp_mask);
111                         spin_lock(&init_mm.page_table_lock);
112                 } else {
113                         page = (**pages);
114                         (*pages)++;
115
116                         /* Add a reference to the page so we can free later */
117                         if (page)
118                                 atomic_inc(&page->count);
119
120                 }
121                 if (!pte_none(*pte))
122                         printk(KERN_ERR "alloc_area_pte: page already exists\n");
123                 if (!page)
124                         return -ENOMEM;
125                 set_pte(pte, mk_pte(page, prot));
126                 address += PAGE_SIZE;
127                 pte++;
128         } while (address < end);
129         return 0;
130 }
131
132 static inline int alloc_area_pmd(pmd_t * pmd, unsigned long address,
133                         unsigned long size, int gfp_mask,
134                         pgprot_t prot, struct page ***pages)
135 {
136         unsigned long end;
137
138         address &= ~PGDIR_MASK;
139         end = address + size;
140         if (end > PGDIR_SIZE)
141                 end = PGDIR_SIZE;
142         do {
143                 pte_t * pte = pte_alloc(&init_mm, pmd, address);
144                 if (!pte)
145                         return -ENOMEM;
146                 if (alloc_area_pte(pte, address, end - address,
147                                         gfp_mask, prot, pages))
148                         return -ENOMEM;
149                 address = (address + PMD_SIZE) & PMD_MASK;
150                 pmd++;
151         } while (address < end);
152         return 0;
153 }
154
155 static inline int __vmalloc_area_pages (unsigned long address,
156                                         unsigned long size,
157                                         int gfp_mask,
158                                         pgprot_t prot,
159                                         struct page ***pages)
160 {
161         pgd_t * dir;
162         unsigned long start = address;
163         unsigned long end = address + size;
164
165         dir = pgd_offset_k(address);
166         spin_lock(&init_mm.page_table_lock);
167         do {
168                 pmd_t *pmd;
169                 
170                 pmd = pmd_alloc(&init_mm, dir, address);
171                 if (!pmd)
172                         goto err;
173
174                 if (alloc_area_pmd(pmd, address, end - address, gfp_mask, prot, pages))
175                         goto err;       // The kernel NEVER reclaims pmds, so no need to undo pmd_alloc() here
176
177                 address = (address + PGDIR_SIZE) & PGDIR_MASK;
178                 dir++;
179         } while (address && (address < end));
180         spin_unlock(&init_mm.page_table_lock);
181         flush_cache_all();
182         return 0;
183 err:
184         spin_unlock(&init_mm.page_table_lock);
185         flush_cache_all();
186         if (address > start)
187                 vmfree_area_pages(start, address - start);
188         return -ENOMEM;
189 }
190
191 int vmalloc_area_pages(unsigned long address, unsigned long size,
192                        int gfp_mask, pgprot_t prot)
193 {
194         return __vmalloc_area_pages(address, size, gfp_mask, prot, NULL);
195 }
196
197 struct vm_struct * get_vm_area(unsigned long size, unsigned long flags)
198 {
199         unsigned long addr, next;
200         struct vm_struct **p, *tmp, *area;
201
202         area = (struct vm_struct *) kmalloc(sizeof(*area), GFP_KERNEL);
203         if (!area)
204                 return NULL;
205
206         size += PAGE_SIZE;
207         if (!size) {
208                 kfree (area);
209                 return NULL;
210         }
211
212         addr = VMALLOC_START;
213         write_lock(&vmlist_lock);
214         for (p = &vmlist; (tmp = *p) ; p = &tmp->next) {
215                 if ((size + addr) < addr)
216                         goto out;
217                 if (size + addr <= (unsigned long) tmp->addr)
218                         break;
219                 next = tmp->size + (unsigned long) tmp->addr;
220                 if (next > addr) 
221                         addr = next;
222                 if (addr > VMALLOC_END-size)
223                         goto out;
224         }
225         area->flags = flags;
226         area->addr = (void *)addr;
227         area->size = size;
228         area->next = *p;
229         *p = area;
230         write_unlock(&vmlist_lock);
231         return area;
232
233 out:
234         write_unlock(&vmlist_lock);
235         kfree(area);
236         return NULL;
237 }
238
239 void __vfree(void * addr, int free_area_pages)
240 {
241         struct vm_struct **p, *tmp;
242
243         if (!addr)
244                 return;
245         if ((PAGE_SIZE-1) & (unsigned long) addr) {
246                 printk(KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
247                 return;
248         }
249         write_lock(&vmlist_lock);
250         for (p = &vmlist ; (tmp = *p) ; p = &tmp->next) {
251                 if (tmp->addr == addr) {
252                         *p = tmp->next;
253                         if (free_area_pages)
254                                 vmfree_area_pages(VMALLOC_VMADDR(tmp->addr), tmp->size);
255                         write_unlock(&vmlist_lock);
256                         kfree(tmp);
257                         return;
258                 }
259         }
260         write_unlock(&vmlist_lock);
261         printk(KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n", addr);
262 }
263
264 void vfree(void * addr)
265 {
266         __vfree(addr,1);
267 }
268
269 void * __vmalloc (unsigned long size, int gfp_mask, pgprot_t prot)
270 {
271         void * addr;
272         struct vm_struct *area;
273
274         size = PAGE_ALIGN(size);
275         if (!size || (size >> PAGE_SHIFT) > num_physpages)
276                 return NULL;
277         area = get_vm_area(size, VM_ALLOC);
278         if (!area)
279                 return NULL;
280         addr = area->addr;
281         if (__vmalloc_area_pages(VMALLOC_VMADDR(addr), size, gfp_mask,
282                                  prot, NULL)) {
283                 __vfree(addr, 0);
284                 return NULL;
285         }
286         return addr;
287 }
288
289 void * vmap(struct page **pages, int count,
290             unsigned long flags, pgprot_t prot)
291 {
292         void * addr;
293         struct vm_struct *area;
294         unsigned long size = count << PAGE_SHIFT;
295
296         if (count <= 0 || count > max_mapnr)
297                 return NULL;
298         area = get_vm_area(size, flags);
299         if (!area) {
300                 return NULL;
301         }
302         addr = area->addr;
303         if (__vmalloc_area_pages(VMALLOC_VMADDR(addr), size, 0,
304                                  prot, &pages)) {
305                 __vfree(addr, 0);
306                 return NULL;
307         }
308         return addr;
309 }
310
311 long vread(char *buf, char *addr, unsigned long count)
312 {
313         struct vm_struct *tmp;
314         char *vaddr, *buf_start = buf;
315         unsigned long n;
316
317         /* Don't allow overflow */
318         if ((unsigned long) addr + count < count)
319                 count = -(unsigned long) addr;
320
321         read_lock(&vmlist_lock);
322         for (tmp = vmlist; tmp; tmp = tmp->next) {
323                 vaddr = (char *) tmp->addr;
324                 if (addr >= vaddr + tmp->size - PAGE_SIZE)
325                         continue;
326                 while (addr < vaddr) {
327                         if (count == 0)
328                                 goto finished;
329                         *buf = '\0';
330                         buf++;
331                         addr++;
332                         count--;
333                 }
334                 n = vaddr + tmp->size - PAGE_SIZE - addr;
335                 do {
336                         if (count == 0)
337                                 goto finished;
338                         *buf = *addr;
339                         buf++;
340                         addr++;
341                         count--;
342                 } while (--n > 0);
343         }
344 finished:
345         read_unlock(&vmlist_lock);
346         return buf - buf_start;
347 }
348
349 long vwrite(char *buf, char *addr, unsigned long count)
350 {
351         struct vm_struct *tmp;
352         char *vaddr, *buf_start = buf;
353         unsigned long n;
354
355         /* Don't allow overflow */
356         if ((unsigned long) addr + count < count)
357                 count = -(unsigned long) addr;
358
359         read_lock(&vmlist_lock);
360         for (tmp = vmlist; tmp; tmp = tmp->next) {
361                 vaddr = (char *) tmp->addr;
362                 if (addr >= vaddr + tmp->size - PAGE_SIZE)
363                         continue;
364                 while (addr < vaddr) {
365                         if (count == 0)
366                                 goto finished;
367                         buf++;
368                         addr++;
369                         count--;
370                 }
371                 n = vaddr + tmp->size - PAGE_SIZE - addr;
372                 do {
373                         if (count == 0)
374                                 goto finished;
375                         *addr = *buf;
376                         buf++;
377                         addr++;
378                         count--;
379                 } while (--n > 0);
380         }
381 finished:
382         read_unlock(&vmlist_lock);
383         return buf - buf_start;
384 }