#include <linux/cache.h>
#include <linux/init.h>
#include <linux/signal.h>
+#include <linux/lmb.h>
#include <asm/processor.h>
#include <asm/pgtable.h>
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/machdep.h>
-#include <asm/lmb.h>
+#include <asm/prom.h>
#include <asm/abs_addr.h>
#include <asm/tlbflush.h>
#include <asm/io.h>
#include <asm/cputable.h>
#include <asm/sections.h>
#include <asm/spu.h>
+#include <asm/udbg.h>
#ifdef DEBUG
#define DBG(fmt...) udbg_printf(fmt)
#define KB (1024)
#define MB (1024*KB)
+#define GB (1024L*MB)
/*
* Note: pte --> Linux PTE
int mmu_linear_psize = MMU_PAGE_4K;
int mmu_virtual_psize = MMU_PAGE_4K;
int mmu_vmalloc_psize = MMU_PAGE_4K;
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+int mmu_vmemmap_psize = MMU_PAGE_4K;
+#endif
int mmu_io_psize = MMU_PAGE_4K;
int mmu_kernel_ssize = MMU_SEGSIZE_256M;
int mmu_highuser_ssize = MMU_SEGSIZE_256M;
+u16 mmu_slb_size = 64;
#ifdef CONFIG_HUGETLB_PAGE
-int mmu_huge_psize = MMU_PAGE_16M;
unsigned int HPAGE_SHIFT;
#endif
#ifdef CONFIG_PPC_64K_PAGES
/* Pre-POWER4 CPUs (4k pages only)
*/
-struct mmu_psize_def mmu_psize_defaults_old[] = {
+static struct mmu_psize_def mmu_psize_defaults_old[] = {
[MMU_PAGE_4K] = {
.shift = 12,
.sllp = 0,
*
* Support for 16Mb large pages
*/
-struct mmu_psize_def mmu_psize_defaults_gp[] = {
+static struct mmu_psize_def mmu_psize_defaults_gp[] = {
[MMU_PAGE_4K] = {
.shift = 12,
.sllp = 0,
return ret < 0 ? ret : 0;
}
+#ifdef CONFIG_MEMORY_HOTPLUG
+static int htab_remove_mapping(unsigned long vstart, unsigned long vend,
+ int psize, int ssize)
+{
+ unsigned long vaddr;
+ unsigned int step, shift;
+
+ shift = mmu_psize_defs[psize].shift;
+ step = 1 << shift;
+
+ if (!ppc_md.hpte_removebolted) {
+ printk(KERN_WARNING "Platform doesn't implement "
+ "hpte_removebolted\n");
+ return -EINVAL;
+ }
+
+ for (vaddr = vstart; vaddr < vend; vaddr += step)
+ ppc_md.hpte_removebolted(vaddr, psize, ssize);
+
+ return 0;
+}
+#endif /* CONFIG_MEMORY_HOTPLUG */
+
static int __init htab_dt_scan_seg_sizes(unsigned long node,
const char *uname, int depth,
void *data)
return 0;
}
+/* Scan for 16G memory blocks that have been set aside for huge pages
+ * and reserve those blocks for 16G huge pages.
+ */
+static int __init htab_dt_scan_hugepage_blocks(unsigned long node,
+ const char *uname, int depth,
+ void *data) {
+ char *type = of_get_flat_dt_prop(node, "device_type", NULL);
+ unsigned long *addr_prop;
+ u32 *page_count_prop;
+ unsigned int expected_pages;
+ long unsigned int phys_addr;
+ long unsigned int block_size;
+
+ /* We are scanning "memory" nodes only */
+ if (type == NULL || strcmp(type, "memory") != 0)
+ return 0;
+
+ /* This property is the log base 2 of the number of virtual pages that
+ * will represent this memory block. */
+ page_count_prop = of_get_flat_dt_prop(node, "ibm,expected#pages", NULL);
+ if (page_count_prop == NULL)
+ return 0;
+ expected_pages = (1 << page_count_prop[0]);
+ addr_prop = of_get_flat_dt_prop(node, "reg", NULL);
+ if (addr_prop == NULL)
+ return 0;
+ phys_addr = addr_prop[0];
+ block_size = addr_prop[1];
+ if (block_size != (16 * GB))
+ return 0;
+ printk(KERN_INFO "Huge page(16GB) memory: "
+ "addr = 0x%lX size = 0x%lX pages = %d\n",
+ phys_addr, block_size, expected_pages);
+ lmb_reserve(phys_addr, block_size * expected_pages);
+ add_gpage(phys_addr, block_size, expected_pages);
+ return 0;
+}
+
static void __init htab_init_page_sizes(void)
{
int rc;
mmu_vmalloc_psize = MMU_PAGE_64K;
if (mmu_linear_psize == MMU_PAGE_4K)
mmu_linear_psize = MMU_PAGE_64K;
- if (cpu_has_feature(CPU_FTR_CI_LARGE_PAGE))
- mmu_io_psize = MMU_PAGE_64K;
- else
+ if (cpu_has_feature(CPU_FTR_CI_LARGE_PAGE)) {
+ /*
+ * Don't use 64k pages for ioremap on pSeries, since
+ * that would stop us accessing the HEA ethernet.
+ */
+ if (!machine_is(pseries))
+ mmu_io_psize = MMU_PAGE_64K;
+ } else
mmu_ci_restrictions = 1;
}
#endif /* CONFIG_PPC_64K_PAGES */
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ /* We try to use 16M pages for vmemmap if that is supported
+ * and we have at least 1G of RAM at boot
+ */
+ if (mmu_psize_defs[MMU_PAGE_16M].shift &&
+ lmb_phys_mem_size() >= 0x40000000)
+ mmu_vmemmap_psize = MMU_PAGE_16M;
+ else if (mmu_psize_defs[MMU_PAGE_64K].shift)
+ mmu_vmemmap_psize = MMU_PAGE_64K;
+ else
+ mmu_vmemmap_psize = MMU_PAGE_4K;
+#endif /* CONFIG_SPARSEMEM_VMEMMAP */
+
printk(KERN_DEBUG "Page orders: linear mapping = %d, "
- "virtual = %d, io = %d\n",
+ "virtual = %d, io = %d"
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ ", vmemmap = %d"
+#endif
+ "\n",
mmu_psize_defs[mmu_linear_psize].shift,
mmu_psize_defs[mmu_virtual_psize].shift,
- mmu_psize_defs[mmu_io_psize].shift);
+ mmu_psize_defs[mmu_io_psize].shift
+#ifdef CONFIG_SPARSEMEM_VMEMMAP
+ ,mmu_psize_defs[mmu_vmemmap_psize].shift
+#endif
+ );
#ifdef CONFIG_HUGETLB_PAGE
- /* Init large page size. Currently, we pick 16M or 1M depending
+ /* Reserve 16G huge page memory sections for huge pages */
+ of_scan_flat_dt(htab_dt_scan_hugepage_blocks, NULL);
+
+/* Set default large page size. Currently, we pick 16M or 1M depending
* on what is available
*/
if (mmu_psize_defs[MMU_PAGE_16M].shift)
- mmu_huge_psize = MMU_PAGE_16M;
+ HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_16M].shift;
/* With 4k/4level pagetables, we can't (for now) cope with a
* huge page size < PMD_SIZE */
else if (mmu_psize_defs[MMU_PAGE_1M].shift)
- mmu_huge_psize = MMU_PAGE_1M;
-
- /* Calculate HPAGE_SHIFT and sanity check it */
- if (mmu_psize_defs[mmu_huge_psize].shift > MIN_HUGEPTE_SHIFT &&
- mmu_psize_defs[mmu_huge_psize].shift < SID_SHIFT)
- HPAGE_SHIFT = mmu_psize_defs[mmu_huge_psize].shift;
- else
- HPAGE_SHIFT = 0; /* No huge pages dude ! */
+ HPAGE_SHIFT = mmu_psize_defs[MMU_PAGE_1M].shift;
#endif /* CONFIG_HUGETLB_PAGE */
}
_PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_COHERENT | PP_RWXX,
mmu_linear_psize, mmu_kernel_ssize));
}
+
+int remove_section_mapping(unsigned long start, unsigned long end)
+{
+ return htab_remove_mapping(start, end, mmu_linear_psize,
+ mmu_kernel_ssize);
+}
#endif /* CONFIG_MEMORY_HOTPLUG */
static inline void make_bl(unsigned int *insn_addr, void *func)
unsigned long table;
unsigned long pteg_count;
unsigned long mode_rw;
- unsigned long base = 0, size = 0;
+ unsigned long base = 0, size = 0, limit;
int i;
- extern unsigned long tce_alloc_start, tce_alloc_end;
-
DBG(" -> htab_initialize()\n");
/* Initialize segment sizes */
_SDR1 = 0;
} else {
/* Find storage for the HPT. Must be contiguous in
- * the absolute address space.
+ * the absolute address space. On cell we want it to be
+ * in the first 2 Gig so we can use it for IOMMU hacks.
*/
- table = lmb_alloc(htab_size_bytes, htab_size_bytes);
+ if (machine_is(cell))
+ limit = 0x80000000;
+ else
+ limit = 0;
+
+ table = lmb_alloc_base(htab_size_bytes, htab_size_bytes, limit);
DBG("Hash table allocated at %lx, size: %lx\n", table,
htab_size_bytes);
return pp;
}
+#ifdef CONFIG_PPC_MM_SLICES
+unsigned int get_paca_psize(unsigned long addr)
+{
+ unsigned long index, slices;
+
+ if (addr < SLICE_LOW_TOP) {
+ slices = get_paca()->context.low_slices_psize;
+ index = GET_LOW_SLICE_INDEX(addr);
+ } else {
+ slices = get_paca()->context.high_slices_psize;
+ index = GET_HIGH_SLICE_INDEX(addr);
+ }
+ return (slices >> (index * 4)) & 0xF;
+}
+
+#else
+unsigned int get_paca_psize(unsigned long addr)
+{
+ return get_paca()->context.user_psize;
+}
+#endif
+
/*
* Demote a segment to using 4k pages.
* For now this makes the whole process use 4k pages.
*/
#ifdef CONFIG_PPC_64K_PAGES
-static void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
+void demote_segment_4k(struct mm_struct *mm, unsigned long addr)
{
- if (mm->context.user_psize == MMU_PAGE_4K)
+ if (get_slice_psize(mm, addr) == MMU_PAGE_4K)
return;
- slice_set_user_psize(mm, MMU_PAGE_4K);
+ slice_set_range_psize(mm, addr, 1, MMU_PAGE_4K);
#ifdef CONFIG_SPU_BASE
spu_flush_all_slbs(mm);
#endif
+ if (get_paca_psize(addr) != MMU_PAGE_4K) {
+ get_paca()->context = mm->context;
+ slb_flush_and_rebolt();
+ }
}
#endif /* CONFIG_PPC_64K_PAGES */
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+/*
+ * This looks up a 2-bit protection code for a 4k subpage of a 64k page.
+ * Userspace sets the subpage permissions using the subpage_prot system call.
+ *
+ * Result is 0: full permissions, _PAGE_RW: read-only,
+ * _PAGE_USER or _PAGE_USER|_PAGE_RW: no access.
+ */
+static int subpage_protection(pgd_t *pgdir, unsigned long ea)
+{
+ struct subpage_prot_table *spt = pgd_subpage_prot(pgdir);
+ u32 spp = 0;
+ u32 **sbpm, *sbpp;
+
+ if (ea >= spt->maxaddr)
+ return 0;
+ if (ea < 0x100000000) {
+ /* addresses below 4GB use spt->low_prot */
+ sbpm = spt->low_prot;
+ } else {
+ sbpm = spt->protptrs[ea >> SBP_L3_SHIFT];
+ if (!sbpm)
+ return 0;
+ }
+ sbpp = sbpm[(ea >> SBP_L2_SHIFT) & (SBP_L2_COUNT - 1)];
+ if (!sbpp)
+ return 0;
+ spp = sbpp[(ea >> PAGE_SHIFT) & (SBP_L1_COUNT - 1)];
+
+ /* extract 2-bit bitfield for this 4k subpage */
+ spp >>= 30 - 2 * ((ea >> 12) & 0xf);
+
+ /* turn 0,1,2,3 into combination of _PAGE_USER and _PAGE_RW */
+ spp = ((spp & 2) ? _PAGE_USER : 0) | ((spp & 1) ? _PAGE_RW : 0);
+ return spp;
+}
+
+#else /* CONFIG_PPC_SUBPAGE_PROT */
+static inline int subpage_protection(pgd_t *pgdir, unsigned long ea)
+{
+ return 0;
+}
+#endif
+
/* Result code is:
* 0 - handled
* 1 - normal page fault
* -1 - critical hash insertion error
+ * -2 - access not permitted by subpage protection mechanism
*/
int hash_page(unsigned long ea, unsigned long access, unsigned long trap)
{
DBG_LOW(" user region with no mm !\n");
return 1;
}
-#ifdef CONFIG_PPC_MM_SLICES
psize = get_slice_psize(mm, ea);
-#else
- psize = mm->context.user_psize;
-#endif
ssize = user_segment_size(ea);
vsid = get_vsid(mm->context.id, ea, ssize);
break;
#ifdef CONFIG_HUGETLB_PAGE
/* Handle hugepage regions */
- if (HPAGE_SHIFT && psize == mmu_huge_psize) {
+ if (HPAGE_SHIFT && mmu_huge_psizes[psize]) {
DBG_LOW(" -> huge page !\n");
return hash_huge_page(mm, access, ea, vsid, local, trap);
}
/* Do actual hashing */
#ifdef CONFIG_PPC_64K_PAGES
/* If _PAGE_4K_PFN is set, make sure this is a 4k segment */
- if (pte_val(*ptep) & _PAGE_4K_PFN) {
+ if ((pte_val(*ptep) & _PAGE_4K_PFN) && psize == MMU_PAGE_64K) {
demote_segment_4k(mm, ea);
psize = MMU_PAGE_4K;
}
}
}
if (user_region) {
- if (psize != get_paca()->context.user_psize) {
+ if (psize != get_paca_psize(ea)) {
get_paca()->context = mm->context;
slb_flush_and_rebolt();
}
rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize);
else
#endif /* CONFIG_PPC_HAS_HASH_64K */
- rc = __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize);
+ {
+ int spp = subpage_protection(pgdir, ea);
+ if (access & spp)
+ rc = -2;
+ else
+ rc = __hash_page_4K(ea, access, vsid, ptep, trap,
+ local, ssize, spp);
+ }
#ifndef CONFIG_PPC_64K_PAGES
DBG_LOW(" o-pte: %016lx\n", pte_val(*ptep));
__hash_page_64K(ea, access, vsid, ptep, trap, local, ssize);
else
#endif /* CONFIG_PPC_HAS_HASH_64K */
- __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize);
+ __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize,
+ subpage_protection(pgdir, ea));
local_irq_restore(flags);
}
* low_hash_fault is called when we the low level hash code failed
* to instert a PTE due to an hypervisor error
*/
-void low_hash_fault(struct pt_regs *regs, unsigned long address)
+void low_hash_fault(struct pt_regs *regs, unsigned long address, int rc)
{
if (user_mode(regs)) {
- siginfo_t info;
-
- info.si_signo = SIGBUS;
- info.si_errno = 0;
- info.si_code = BUS_ADRERR;
- info.si_addr = (void __user *)address;
- force_sig_info(SIGBUS, &info, current);
- return;
- }
- bad_page_fault(regs, address, SIGBUS);
+#ifdef CONFIG_PPC_SUBPAGE_PROT
+ if (rc == -2)
+ _exception(SIGSEGV, regs, SEGV_ACCERR, address);
+ else
+#endif
+ _exception(SIGBUS, regs, BUS_ADRERR, address);
+ } else
+ bad_page_fault(regs, address, SIGBUS);
}
#ifdef CONFIG_DEBUG_PAGEALLOC