[S390] Add four level page tables for CONFIG_64BIT=y.
[safe/jmp/linux-2.6] / arch / s390 / mm / vmem.c
1 /*
2  *  arch/s390/mm/vmem.c
3  *
4  *    Copyright IBM Corp. 2006
5  *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
6  */
7
8 #include <linux/bootmem.h>
9 #include <linux/pfn.h>
10 #include <linux/mm.h>
11 #include <linux/module.h>
12 #include <linux/list.h>
13 #include <asm/pgalloc.h>
14 #include <asm/pgtable.h>
15 #include <asm/setup.h>
16 #include <asm/tlbflush.h>
17
18 static DEFINE_MUTEX(vmem_mutex);
19
20 struct memory_segment {
21         struct list_head list;
22         unsigned long start;
23         unsigned long size;
24 };
25
26 static LIST_HEAD(mem_segs);
27
28 void __meminit memmap_init(unsigned long size, int nid, unsigned long zone,
29                            unsigned long start_pfn)
30 {
31         struct page *start, *end;
32         struct page *map_start, *map_end;
33         int i;
34
35         start = pfn_to_page(start_pfn);
36         end = start + size;
37
38         for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
39                 unsigned long cstart, cend;
40
41                 cstart = PFN_DOWN(memory_chunk[i].addr);
42                 cend = cstart + PFN_DOWN(memory_chunk[i].size);
43
44                 map_start = mem_map + cstart;
45                 map_end = mem_map + cend;
46
47                 if (map_start < start)
48                         map_start = start;
49                 if (map_end > end)
50                         map_end = end;
51
52                 map_start -= ((unsigned long) map_start & (PAGE_SIZE - 1))
53                         / sizeof(struct page);
54                 map_end += ((PFN_ALIGN((unsigned long) map_end)
55                              - (unsigned long) map_end)
56                             / sizeof(struct page));
57
58                 if (map_start < map_end)
59                         memmap_init_zone((unsigned long)(map_end - map_start),
60                                          nid, zone, page_to_pfn(map_start),
61                                          MEMMAP_EARLY);
62         }
63 }
64
65 static void __ref *vmem_alloc_pages(unsigned int order)
66 {
67         if (slab_is_available())
68                 return (void *)__get_free_pages(GFP_KERNEL, order);
69         return alloc_bootmem_pages((1 << order) * PAGE_SIZE);
70 }
71
72 static inline pud_t *vmem_pud_alloc(void)
73 {
74         pud_t *pud = NULL;
75
76 #ifdef CONFIG_64BIT
77         pud = vmem_alloc_pages(2);
78         if (!pud)
79                 return NULL;
80         pud_val(*pud) = _REGION3_ENTRY_EMPTY;
81         memcpy(pud + 1, pud, (PTRS_PER_PUD - 1)*sizeof(pud_t));
82 #endif
83         return pud;
84 }
85
86 static inline pmd_t *vmem_pmd_alloc(void)
87 {
88         pmd_t *pmd = NULL;
89
90 #ifdef CONFIG_64BIT
91         pmd = vmem_alloc_pages(2);
92         if (!pmd)
93                 return NULL;
94         clear_table((unsigned long *) pmd, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE*4);
95 #endif
96         return pmd;
97 }
98
99 static pte_t __init_refok *vmem_pte_alloc(void)
100 {
101         pte_t *pte;
102
103         if (slab_is_available())
104                 pte = (pte_t *) page_table_alloc(&init_mm);
105         else
106                 pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t));
107         if (!pte)
108                 return NULL;
109         clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY,
110                     PTRS_PER_PTE * sizeof(pte_t));
111         return pte;
112 }
113
114 /*
115  * Add a physical memory range to the 1:1 mapping.
116  */
117 static int vmem_add_range(unsigned long start, unsigned long size)
118 {
119         unsigned long address;
120         pgd_t *pg_dir;
121         pud_t *pu_dir;
122         pmd_t *pm_dir;
123         pte_t *pt_dir;
124         pte_t  pte;
125         int ret = -ENOMEM;
126
127         for (address = start; address < start + size; address += PAGE_SIZE) {
128                 pg_dir = pgd_offset_k(address);
129                 if (pgd_none(*pg_dir)) {
130                         pu_dir = vmem_pud_alloc();
131                         if (!pu_dir)
132                                 goto out;
133                         pgd_populate_kernel(&init_mm, pg_dir, pu_dir);
134                 }
135
136                 pu_dir = pud_offset(pg_dir, address);
137                 if (pud_none(*pu_dir)) {
138                         pm_dir = vmem_pmd_alloc();
139                         if (!pm_dir)
140                                 goto out;
141                         pud_populate_kernel(&init_mm, pu_dir, pm_dir);
142                 }
143
144                 pm_dir = pmd_offset(pu_dir, address);
145                 if (pmd_none(*pm_dir)) {
146                         pt_dir = vmem_pte_alloc();
147                         if (!pt_dir)
148                                 goto out;
149                         pmd_populate_kernel(&init_mm, pm_dir, pt_dir);
150                 }
151
152                 pt_dir = pte_offset_kernel(pm_dir, address);
153                 pte = pfn_pte(address >> PAGE_SHIFT, PAGE_KERNEL);
154                 *pt_dir = pte;
155         }
156         ret = 0;
157 out:
158         flush_tlb_kernel_range(start, start + size);
159         return ret;
160 }
161
162 /*
163  * Remove a physical memory range from the 1:1 mapping.
164  * Currently only invalidates page table entries.
165  */
166 static void vmem_remove_range(unsigned long start, unsigned long size)
167 {
168         unsigned long address;
169         pgd_t *pg_dir;
170         pud_t *pu_dir;
171         pmd_t *pm_dir;
172         pte_t *pt_dir;
173         pte_t  pte;
174
175         pte_val(pte) = _PAGE_TYPE_EMPTY;
176         for (address = start; address < start + size; address += PAGE_SIZE) {
177                 pg_dir = pgd_offset_k(address);
178                 pu_dir = pud_offset(pg_dir, address);
179                 if (pud_none(*pu_dir))
180                         continue;
181                 pm_dir = pmd_offset(pu_dir, address);
182                 if (pmd_none(*pm_dir))
183                         continue;
184                 pt_dir = pte_offset_kernel(pm_dir, address);
185                 *pt_dir = pte;
186         }
187         flush_tlb_kernel_range(start, start + size);
188 }
189
190 /*
191  * Add a backed mem_map array to the virtual mem_map array.
192  */
193 static int vmem_add_mem_map(unsigned long start, unsigned long size)
194 {
195         unsigned long address, start_addr, end_addr;
196         struct page *map_start, *map_end;
197         pgd_t *pg_dir;
198         pud_t *pu_dir;
199         pmd_t *pm_dir;
200         pte_t *pt_dir;
201         pte_t  pte;
202         int ret = -ENOMEM;
203
204         map_start = VMEM_MAP + PFN_DOWN(start);
205         map_end = VMEM_MAP + PFN_DOWN(start + size);
206
207         start_addr = (unsigned long) map_start & PAGE_MASK;
208         end_addr = PFN_ALIGN((unsigned long) map_end);
209
210         for (address = start_addr; address < end_addr; address += PAGE_SIZE) {
211                 pg_dir = pgd_offset_k(address);
212                 if (pgd_none(*pg_dir)) {
213                         pu_dir = vmem_pud_alloc();
214                         if (!pu_dir)
215                                 goto out;
216                         pgd_populate_kernel(&init_mm, pg_dir, pu_dir);
217                 }
218
219                 pu_dir = pud_offset(pg_dir, address);
220                 if (pud_none(*pu_dir)) {
221                         pm_dir = vmem_pmd_alloc();
222                         if (!pm_dir)
223                                 goto out;
224                         pud_populate_kernel(&init_mm, pu_dir, pm_dir);
225                 }
226
227                 pm_dir = pmd_offset(pu_dir, address);
228                 if (pmd_none(*pm_dir)) {
229                         pt_dir = vmem_pte_alloc();
230                         if (!pt_dir)
231                                 goto out;
232                         pmd_populate_kernel(&init_mm, pm_dir, pt_dir);
233                 }
234
235                 pt_dir = pte_offset_kernel(pm_dir, address);
236                 if (pte_none(*pt_dir)) {
237                         unsigned long new_page;
238
239                         new_page =__pa(vmem_alloc_pages(0));
240                         if (!new_page)
241                                 goto out;
242                         pte = pfn_pte(new_page >> PAGE_SHIFT, PAGE_KERNEL);
243                         *pt_dir = pte;
244                 }
245         }
246         ret = 0;
247 out:
248         flush_tlb_kernel_range(start_addr, end_addr);
249         return ret;
250 }
251
252 static int vmem_add_mem(unsigned long start, unsigned long size)
253 {
254         int ret;
255
256         ret = vmem_add_mem_map(start, size);
257         if (ret)
258                 return ret;
259         return vmem_add_range(start, size);
260 }
261
262 /*
263  * Add memory segment to the segment list if it doesn't overlap with
264  * an already present segment.
265  */
266 static int insert_memory_segment(struct memory_segment *seg)
267 {
268         struct memory_segment *tmp;
269
270         if (seg->start + seg->size >= VMEM_MAX_PHYS ||
271             seg->start + seg->size < seg->start)
272                 return -ERANGE;
273
274         list_for_each_entry(tmp, &mem_segs, list) {
275                 if (seg->start >= tmp->start + tmp->size)
276                         continue;
277                 if (seg->start + seg->size <= tmp->start)
278                         continue;
279                 return -ENOSPC;
280         }
281         list_add(&seg->list, &mem_segs);
282         return 0;
283 }
284
285 /*
286  * Remove memory segment from the segment list.
287  */
288 static void remove_memory_segment(struct memory_segment *seg)
289 {
290         list_del(&seg->list);
291 }
292
293 static void __remove_shared_memory(struct memory_segment *seg)
294 {
295         remove_memory_segment(seg);
296         vmem_remove_range(seg->start, seg->size);
297 }
298
299 int remove_shared_memory(unsigned long start, unsigned long size)
300 {
301         struct memory_segment *seg;
302         int ret;
303
304         mutex_lock(&vmem_mutex);
305
306         ret = -ENOENT;
307         list_for_each_entry(seg, &mem_segs, list) {
308                 if (seg->start == start && seg->size == size)
309                         break;
310         }
311
312         if (seg->start != start || seg->size != size)
313                 goto out;
314
315         ret = 0;
316         __remove_shared_memory(seg);
317         kfree(seg);
318 out:
319         mutex_unlock(&vmem_mutex);
320         return ret;
321 }
322
323 int add_shared_memory(unsigned long start, unsigned long size)
324 {
325         struct memory_segment *seg;
326         struct page *page;
327         unsigned long pfn, num_pfn, end_pfn;
328         int ret;
329
330         mutex_lock(&vmem_mutex);
331         ret = -ENOMEM;
332         seg = kzalloc(sizeof(*seg), GFP_KERNEL);
333         if (!seg)
334                 goto out;
335         seg->start = start;
336         seg->size = size;
337
338         ret = insert_memory_segment(seg);
339         if (ret)
340                 goto out_free;
341
342         ret = vmem_add_mem(start, size);
343         if (ret)
344                 goto out_remove;
345
346         pfn = PFN_DOWN(start);
347         num_pfn = PFN_DOWN(size);
348         end_pfn = pfn + num_pfn;
349
350         page = pfn_to_page(pfn);
351         memset(page, 0, num_pfn * sizeof(struct page));
352
353         for (; pfn < end_pfn; pfn++) {
354                 page = pfn_to_page(pfn);
355                 init_page_count(page);
356                 reset_page_mapcount(page);
357                 SetPageReserved(page);
358                 INIT_LIST_HEAD(&page->lru);
359         }
360         goto out;
361
362 out_remove:
363         __remove_shared_memory(seg);
364 out_free:
365         kfree(seg);
366 out:
367         mutex_unlock(&vmem_mutex);
368         return ret;
369 }
370
371 /*
372  * map whole physical memory to virtual memory (identity mapping)
373  * we reserve enough space in the vmalloc area for vmemmap to hotplug
374  * additional memory segments.
375  */
376 void __init vmem_map_init(void)
377 {
378         int i;
379
380         INIT_LIST_HEAD(&init_mm.context.crst_list);
381         INIT_LIST_HEAD(&init_mm.context.pgtable_list);
382         init_mm.context.noexec = 0;
383         NODE_DATA(0)->node_mem_map = VMEM_MAP;
384         for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++)
385                 vmem_add_mem(memory_chunk[i].addr, memory_chunk[i].size);
386 }
387
388 /*
389  * Convert memory chunk array to a memory segment list so there is a single
390  * list that contains both r/w memory and shared memory segments.
391  */
392 static int __init vmem_convert_memory_chunk(void)
393 {
394         struct memory_segment *seg;
395         int i;
396
397         mutex_lock(&vmem_mutex);
398         for (i = 0; i < MEMORY_CHUNKS; i++) {
399                 if (!memory_chunk[i].size)
400                         continue;
401                 seg = kzalloc(sizeof(*seg), GFP_KERNEL);
402                 if (!seg)
403                         panic("Out of memory...\n");
404                 seg->start = memory_chunk[i].addr;
405                 seg->size = memory_chunk[i].size;
406                 insert_memory_segment(seg);
407         }
408         mutex_unlock(&vmem_mutex);
409         return 0;
410 }
411
412 core_initcall(vmem_convert_memory_chunk);