[S390] System z large page support.
[safe/jmp/linux-2.6] / arch / s390 / mm / vmem.c
1 /*
2  *  arch/s390/mm/vmem.c
3  *
4  *    Copyright IBM Corp. 2006
5  *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
6  */
7
8 #include <linux/bootmem.h>
9 #include <linux/pfn.h>
10 #include <linux/mm.h>
11 #include <linux/module.h>
12 #include <linux/list.h>
13 #include <linux/hugetlb.h>
14 #include <asm/pgalloc.h>
15 #include <asm/pgtable.h>
16 #include <asm/setup.h>
17 #include <asm/tlbflush.h>
18 #include <asm/sections.h>
19
20 static DEFINE_MUTEX(vmem_mutex);
21
22 struct memory_segment {
23         struct list_head list;
24         unsigned long start;
25         unsigned long size;
26 };
27
28 static LIST_HEAD(mem_segs);
29
30 void __meminit memmap_init(unsigned long size, int nid, unsigned long zone,
31                            unsigned long start_pfn)
32 {
33         struct page *start, *end;
34         struct page *map_start, *map_end;
35         int i;
36
37         start = pfn_to_page(start_pfn);
38         end = start + size;
39
40         for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
41                 unsigned long cstart, cend;
42
43                 cstart = PFN_DOWN(memory_chunk[i].addr);
44                 cend = cstart + PFN_DOWN(memory_chunk[i].size);
45
46                 map_start = mem_map + cstart;
47                 map_end = mem_map + cend;
48
49                 if (map_start < start)
50                         map_start = start;
51                 if (map_end > end)
52                         map_end = end;
53
54                 map_start -= ((unsigned long) map_start & (PAGE_SIZE - 1))
55                         / sizeof(struct page);
56                 map_end += ((PFN_ALIGN((unsigned long) map_end)
57                              - (unsigned long) map_end)
58                             / sizeof(struct page));
59
60                 if (map_start < map_end)
61                         memmap_init_zone((unsigned long)(map_end - map_start),
62                                          nid, zone, page_to_pfn(map_start),
63                                          MEMMAP_EARLY);
64         }
65 }
66
67 static void __ref *vmem_alloc_pages(unsigned int order)
68 {
69         if (slab_is_available())
70                 return (void *)__get_free_pages(GFP_KERNEL, order);
71         return alloc_bootmem_pages((1 << order) * PAGE_SIZE);
72 }
73
74 static inline pud_t *vmem_pud_alloc(void)
75 {
76         pud_t *pud = NULL;
77
78 #ifdef CONFIG_64BIT
79         pud = vmem_alloc_pages(2);
80         if (!pud)
81                 return NULL;
82         clear_table((unsigned long *) pud, _REGION3_ENTRY_EMPTY, PAGE_SIZE * 4);
83 #endif
84         return pud;
85 }
86
87 static inline pmd_t *vmem_pmd_alloc(void)
88 {
89         pmd_t *pmd = NULL;
90
91 #ifdef CONFIG_64BIT
92         pmd = vmem_alloc_pages(2);
93         if (!pmd)
94                 return NULL;
95         clear_table((unsigned long *) pmd, _SEGMENT_ENTRY_EMPTY, PAGE_SIZE * 4);
96 #endif
97         return pmd;
98 }
99
100 static pte_t __init_refok *vmem_pte_alloc(void)
101 {
102         pte_t *pte;
103
104         if (slab_is_available())
105                 pte = (pte_t *) page_table_alloc(&init_mm);
106         else
107                 pte = alloc_bootmem(PTRS_PER_PTE * sizeof(pte_t));
108         if (!pte)
109                 return NULL;
110         clear_table((unsigned long *) pte, _PAGE_TYPE_EMPTY,
111                     PTRS_PER_PTE * sizeof(pte_t));
112         return pte;
113 }
114
115 /*
116  * Add a physical memory range to the 1:1 mapping.
117  */
118 static int vmem_add_range(unsigned long start, unsigned long size, int ro)
119 {
120         unsigned long address;
121         pgd_t *pg_dir;
122         pud_t *pu_dir;
123         pmd_t *pm_dir;
124         pte_t *pt_dir;
125         pte_t  pte;
126         int ret = -ENOMEM;
127
128         for (address = start; address < start + size; address += PAGE_SIZE) {
129                 pg_dir = pgd_offset_k(address);
130                 if (pgd_none(*pg_dir)) {
131                         pu_dir = vmem_pud_alloc();
132                         if (!pu_dir)
133                                 goto out;
134                         pgd_populate_kernel(&init_mm, pg_dir, pu_dir);
135                 }
136
137                 pu_dir = pud_offset(pg_dir, address);
138                 if (pud_none(*pu_dir)) {
139                         pm_dir = vmem_pmd_alloc();
140                         if (!pm_dir)
141                                 goto out;
142                         pud_populate_kernel(&init_mm, pu_dir, pm_dir);
143                 }
144
145                 pte = mk_pte_phys(address, __pgprot(ro ? _PAGE_RO : 0));
146                 pm_dir = pmd_offset(pu_dir, address);
147
148 #ifdef __s390x__
149                 if (MACHINE_HAS_HPAGE && !(address & ~HPAGE_MASK) &&
150                     (address + HPAGE_SIZE <= start + size) &&
151                     (address >= HPAGE_SIZE)) {
152                         pte_val(pte) |= _SEGMENT_ENTRY_LARGE;
153                         pmd_val(*pm_dir) = pte_val(pte);
154                         address += HPAGE_SIZE - PAGE_SIZE;
155                         continue;
156                 }
157 #endif
158                 if (pmd_none(*pm_dir)) {
159                         pt_dir = vmem_pte_alloc();
160                         if (!pt_dir)
161                                 goto out;
162                         pmd_populate_kernel(&init_mm, pm_dir, pt_dir);
163                 }
164
165                 pt_dir = pte_offset_kernel(pm_dir, address);
166                 *pt_dir = pte;
167         }
168         ret = 0;
169 out:
170         flush_tlb_kernel_range(start, start + size);
171         return ret;
172 }
173
174 /*
175  * Remove a physical memory range from the 1:1 mapping.
176  * Currently only invalidates page table entries.
177  */
178 static void vmem_remove_range(unsigned long start, unsigned long size)
179 {
180         unsigned long address;
181         pgd_t *pg_dir;
182         pud_t *pu_dir;
183         pmd_t *pm_dir;
184         pte_t *pt_dir;
185         pte_t  pte;
186
187         pte_val(pte) = _PAGE_TYPE_EMPTY;
188         for (address = start; address < start + size; address += PAGE_SIZE) {
189                 pg_dir = pgd_offset_k(address);
190                 pu_dir = pud_offset(pg_dir, address);
191                 if (pud_none(*pu_dir))
192                         continue;
193                 pm_dir = pmd_offset(pu_dir, address);
194                 if (pmd_none(*pm_dir))
195                         continue;
196
197                 if (pmd_huge(*pm_dir)) {
198                         pmd_clear_kernel(pm_dir);
199                         address += HPAGE_SIZE - PAGE_SIZE;
200                         continue;
201                 }
202
203                 pt_dir = pte_offset_kernel(pm_dir, address);
204                 *pt_dir = pte;
205         }
206         flush_tlb_kernel_range(start, start + size);
207 }
208
209 /*
210  * Add a backed mem_map array to the virtual mem_map array.
211  */
212 static int vmem_add_mem_map(unsigned long start, unsigned long size)
213 {
214         unsigned long address, start_addr, end_addr;
215         struct page *map_start, *map_end;
216         pgd_t *pg_dir;
217         pud_t *pu_dir;
218         pmd_t *pm_dir;
219         pte_t *pt_dir;
220         pte_t  pte;
221         int ret = -ENOMEM;
222
223         map_start = VMEM_MAP + PFN_DOWN(start);
224         map_end = VMEM_MAP + PFN_DOWN(start + size);
225
226         start_addr = (unsigned long) map_start & PAGE_MASK;
227         end_addr = PFN_ALIGN((unsigned long) map_end);
228
229         for (address = start_addr; address < end_addr; address += PAGE_SIZE) {
230                 pg_dir = pgd_offset_k(address);
231                 if (pgd_none(*pg_dir)) {
232                         pu_dir = vmem_pud_alloc();
233                         if (!pu_dir)
234                                 goto out;
235                         pgd_populate_kernel(&init_mm, pg_dir, pu_dir);
236                 }
237
238                 pu_dir = pud_offset(pg_dir, address);
239                 if (pud_none(*pu_dir)) {
240                         pm_dir = vmem_pmd_alloc();
241                         if (!pm_dir)
242                                 goto out;
243                         pud_populate_kernel(&init_mm, pu_dir, pm_dir);
244                 }
245
246                 pm_dir = pmd_offset(pu_dir, address);
247                 if (pmd_none(*pm_dir)) {
248                         pt_dir = vmem_pte_alloc();
249                         if (!pt_dir)
250                                 goto out;
251                         pmd_populate_kernel(&init_mm, pm_dir, pt_dir);
252                 }
253
254                 pt_dir = pte_offset_kernel(pm_dir, address);
255                 if (pte_none(*pt_dir)) {
256                         unsigned long new_page;
257
258                         new_page =__pa(vmem_alloc_pages(0));
259                         if (!new_page)
260                                 goto out;
261                         pte = pfn_pte(new_page >> PAGE_SHIFT, PAGE_KERNEL);
262                         *pt_dir = pte;
263                 }
264         }
265         ret = 0;
266 out:
267         flush_tlb_kernel_range(start_addr, end_addr);
268         return ret;
269 }
270
271 static int vmem_add_mem(unsigned long start, unsigned long size, int ro)
272 {
273         int ret;
274
275         ret = vmem_add_mem_map(start, size);
276         if (ret)
277                 return ret;
278         return vmem_add_range(start, size, ro);
279 }
280
281 /*
282  * Add memory segment to the segment list if it doesn't overlap with
283  * an already present segment.
284  */
285 static int insert_memory_segment(struct memory_segment *seg)
286 {
287         struct memory_segment *tmp;
288
289         if (seg->start + seg->size >= VMEM_MAX_PHYS ||
290             seg->start + seg->size < seg->start)
291                 return -ERANGE;
292
293         list_for_each_entry(tmp, &mem_segs, list) {
294                 if (seg->start >= tmp->start + tmp->size)
295                         continue;
296                 if (seg->start + seg->size <= tmp->start)
297                         continue;
298                 return -ENOSPC;
299         }
300         list_add(&seg->list, &mem_segs);
301         return 0;
302 }
303
304 /*
305  * Remove memory segment from the segment list.
306  */
307 static void remove_memory_segment(struct memory_segment *seg)
308 {
309         list_del(&seg->list);
310 }
311
312 static void __remove_shared_memory(struct memory_segment *seg)
313 {
314         remove_memory_segment(seg);
315         vmem_remove_range(seg->start, seg->size);
316 }
317
318 int remove_shared_memory(unsigned long start, unsigned long size)
319 {
320         struct memory_segment *seg;
321         int ret;
322
323         mutex_lock(&vmem_mutex);
324
325         ret = -ENOENT;
326         list_for_each_entry(seg, &mem_segs, list) {
327                 if (seg->start == start && seg->size == size)
328                         break;
329         }
330
331         if (seg->start != start || seg->size != size)
332                 goto out;
333
334         ret = 0;
335         __remove_shared_memory(seg);
336         kfree(seg);
337 out:
338         mutex_unlock(&vmem_mutex);
339         return ret;
340 }
341
342 int add_shared_memory(unsigned long start, unsigned long size)
343 {
344         struct memory_segment *seg;
345         struct page *page;
346         unsigned long pfn, num_pfn, end_pfn;
347         int ret;
348
349         mutex_lock(&vmem_mutex);
350         ret = -ENOMEM;
351         seg = kzalloc(sizeof(*seg), GFP_KERNEL);
352         if (!seg)
353                 goto out;
354         seg->start = start;
355         seg->size = size;
356
357         ret = insert_memory_segment(seg);
358         if (ret)
359                 goto out_free;
360
361         ret = vmem_add_mem(start, size, 0);
362         if (ret)
363                 goto out_remove;
364
365         pfn = PFN_DOWN(start);
366         num_pfn = PFN_DOWN(size);
367         end_pfn = pfn + num_pfn;
368
369         page = pfn_to_page(pfn);
370         memset(page, 0, num_pfn * sizeof(struct page));
371
372         for (; pfn < end_pfn; pfn++) {
373                 page = pfn_to_page(pfn);
374                 init_page_count(page);
375                 reset_page_mapcount(page);
376                 SetPageReserved(page);
377                 INIT_LIST_HEAD(&page->lru);
378         }
379         goto out;
380
381 out_remove:
382         __remove_shared_memory(seg);
383 out_free:
384         kfree(seg);
385 out:
386         mutex_unlock(&vmem_mutex);
387         return ret;
388 }
389
390 /*
391  * map whole physical memory to virtual memory (identity mapping)
392  * we reserve enough space in the vmalloc area for vmemmap to hotplug
393  * additional memory segments.
394  */
395 void __init vmem_map_init(void)
396 {
397         unsigned long ro_start, ro_end;
398         unsigned long start, end;
399         int i;
400
401         INIT_LIST_HEAD(&init_mm.context.crst_list);
402         INIT_LIST_HEAD(&init_mm.context.pgtable_list);
403         init_mm.context.noexec = 0;
404         NODE_DATA(0)->node_mem_map = VMEM_MAP;
405         ro_start = ((unsigned long)&_stext) & PAGE_MASK;
406         ro_end = PFN_ALIGN((unsigned long)&_eshared);
407         for (i = 0; i < MEMORY_CHUNKS && memory_chunk[i].size > 0; i++) {
408                 start = memory_chunk[i].addr;
409                 end = memory_chunk[i].addr + memory_chunk[i].size;
410                 if (start >= ro_end || end <= ro_start)
411                         vmem_add_mem(start, end - start, 0);
412                 else if (start >= ro_start && end <= ro_end)
413                         vmem_add_mem(start, end - start, 1);
414                 else if (start >= ro_start) {
415                         vmem_add_mem(start, ro_end - start, 1);
416                         vmem_add_mem(ro_end, end - ro_end, 0);
417                 } else if (end < ro_end) {
418                         vmem_add_mem(start, ro_start - start, 0);
419                         vmem_add_mem(ro_start, end - ro_start, 1);
420                 } else {
421                         vmem_add_mem(start, ro_start - start, 0);
422                         vmem_add_mem(ro_start, ro_end - ro_start, 1);
423                         vmem_add_mem(ro_end, end - ro_end, 0);
424                 }
425         }
426 }
427
428 /*
429  * Convert memory chunk array to a memory segment list so there is a single
430  * list that contains both r/w memory and shared memory segments.
431  */
432 static int __init vmem_convert_memory_chunk(void)
433 {
434         struct memory_segment *seg;
435         int i;
436
437         mutex_lock(&vmem_mutex);
438         for (i = 0; i < MEMORY_CHUNKS; i++) {
439                 if (!memory_chunk[i].size)
440                         continue;
441                 seg = kzalloc(sizeof(*seg), GFP_KERNEL);
442                 if (!seg)
443                         panic("Out of memory...\n");
444                 seg->start = memory_chunk[i].addr;
445                 seg->size = memory_chunk[i].size;
446                 insert_memory_segment(seg);
447         }
448         mutex_unlock(&vmem_mutex);
449         return 0;
450 }
451
452 core_initcall(vmem_convert_memory_chunk);