X-Git-Url: http://ftp.safe.ca/?a=blobdiff_plain;f=include%2Fasm-sparc64%2Ftsb.h;h=76e4299dd9bc957667668bd39b38713d8cb9440e;hb=754cdd4aba225b74dcc35cc384aeb0c42b505e8b;hp=09ab3aaa8d20049b6b8580db18b8496a9479cc74;hpb=4753eb2ac7022b999e5e484f1a5dc001dba22bd3;p=safe%2Fjmp%2Flinux-2.6 diff --git a/include/asm-sparc64/tsb.h b/include/asm-sparc64/tsb.h index 09ab3aa..76e4299 100644 --- a/include/asm-sparc64/tsb.h +++ b/include/asm-sparc64/tsb.h @@ -12,6 +12,8 @@ * * ldxa [%g0] ASI_{D,I}MMU_TSB_8KB_PTR, %g1 * ldxa [%g0] ASI_{D,I}MMU, %g6 + * sllx %g6, 22, %g6 + * srlx %g6, 22, %g6 * ldda [%g1] ASI_NUCLEUS_QUAD_LDD, %g4 * cmp %g4, %g6 * bne,pn %xcc, tsb_miss_{d,i}tlb @@ -29,6 +31,9 @@ * ------------------------------------------------- * 63 61 60 48 47 42 41 0 * + * But actually, since we use per-mm TSB's, we zero out the CONTEXT + * field. + * * Like the powerpc hashtables we need to use locking in order to * synchronize while we update the entries. PTE updates need locking * as well. @@ -37,22 +42,108 @@ * choose to use bit 47 in the tag. Also, since we never map anything * at page zero in context zero, we use zero as an invalid tag entry. * When the lock bit is set, this forces a tag comparison failure. - * - * Currently, we allocate an 8K TSB per-process and we use it for both - * I-TLB and D-TLB misses. Perhaps at some point we'll add code that - * monitors the number of active pages in the process as we get - * major/minor faults, and grow the TSB in response. The only trick - * in implementing that is synchronizing the freeing of the old TSB - * wrt. parallel TSB updates occuring on other processors. On - * possible solution is to use RCU for the freeing of the TSB. */ #define TSB_TAG_LOCK_BIT 47 #define TSB_TAG_LOCK_HIGH (1 << (TSB_TAG_LOCK_BIT - 32)) +#define TSB_TAG_INVALID_BIT 46 +#define TSB_TAG_INVALID_HIGH (1 << (TSB_TAG_INVALID_BIT - 32)) + #define TSB_MEMBAR membar #StoreStore +/* Some cpus support physical address quad loads. We want to use + * those if possible so we don't need to hard-lock the TSB mapping + * into the TLB. We encode some instruction patching in order to + * support this. + * + * The kernel TSB is locked into the TLB by virtue of being in the + * kernel image, so we don't play these games for swapper_tsb access. + */ +#ifndef __ASSEMBLY__ +struct tsb_ldquad_phys_patch_entry { + unsigned int addr; + unsigned int sun4u_insn; + unsigned int sun4v_insn; +}; +extern struct tsb_ldquad_phys_patch_entry __tsb_ldquad_phys_patch, + __tsb_ldquad_phys_patch_end; + +struct tsb_phys_patch_entry { + unsigned int addr; + unsigned int insn; +}; +extern struct tsb_phys_patch_entry __tsb_phys_patch, __tsb_phys_patch_end; +#endif +#define TSB_LOAD_QUAD(TSB, REG) \ +661: ldda [TSB] ASI_NUCLEUS_QUAD_LDD, REG; \ + .section .tsb_ldquad_phys_patch, "ax"; \ + .word 661b; \ + ldda [TSB] ASI_QUAD_LDD_PHYS, REG; \ + ldda [TSB] ASI_QUAD_LDD_PHYS_4V, REG; \ + .previous + +#define TSB_LOAD_TAG_HIGH(TSB, REG) \ +661: lduwa [TSB] ASI_N, REG; \ + .section .tsb_phys_patch, "ax"; \ + .word 661b; \ + lduwa [TSB] ASI_PHYS_USE_EC, REG; \ + .previous + +#define TSB_LOAD_TAG(TSB, REG) \ +661: ldxa [TSB] ASI_N, REG; \ + .section .tsb_phys_patch, "ax"; \ + .word 661b; \ + ldxa [TSB] ASI_PHYS_USE_EC, REG; \ + .previous + +#define TSB_CAS_TAG_HIGH(TSB, REG1, REG2) \ +661: casa [TSB] ASI_N, REG1, REG2; \ + .section .tsb_phys_patch, "ax"; \ + .word 661b; \ + casa [TSB] ASI_PHYS_USE_EC, REG1, REG2; \ + .previous + +#define TSB_CAS_TAG(TSB, REG1, REG2) \ +661: casxa [TSB] ASI_N, REG1, REG2; \ + .section .tsb_phys_patch, "ax"; \ + .word 661b; \ + casxa [TSB] ASI_PHYS_USE_EC, REG1, REG2; \ + .previous + +#define TSB_STORE(ADDR, VAL) \ +661: stxa VAL, [ADDR] ASI_N; \ + .section .tsb_phys_patch, "ax"; \ + .word 661b; \ + stxa VAL, [ADDR] ASI_PHYS_USE_EC; \ + .previous + #define TSB_LOCK_TAG(TSB, REG1, REG2) \ +99: TSB_LOAD_TAG_HIGH(TSB, REG1); \ + sethi %hi(TSB_TAG_LOCK_HIGH), REG2;\ + andcc REG1, REG2, %g0; \ + bne,pn %icc, 99b; \ + nop; \ + TSB_CAS_TAG_HIGH(TSB, REG1, REG2); \ + cmp REG1, REG2; \ + bne,pn %icc, 99b; \ + nop; \ + TSB_MEMBAR + +#define TSB_WRITE(TSB, TTE, TAG) \ + add TSB, 0x8, TSB; \ + TSB_STORE(TSB, TTE); \ + sub TSB, 0x8, TSB; \ + TSB_MEMBAR; \ + TSB_STORE(TSB, TAG); + +#define KTSB_LOAD_QUAD(TSB, REG) \ + ldda [TSB] ASI_NUCLEUS_QUAD_LDD, REG; + +#define KTSB_STORE(ADDR, VAL) \ + stxa VAL, [ADDR] ASI_N; + +#define KTSB_LOCK_TAG(TSB, REG1, REG2) \ 99: lduwa [TSB] ASI_N, REG1; \ sethi %hi(TSB_TAG_LOCK_HIGH), REG2;\ andcc REG1, REG2, %g0; \ @@ -64,10 +155,12 @@ nop; \ TSB_MEMBAR -#define TSB_WRITE(TSB, TTE, TAG) \ - stx TTE, [TSB + 0x08]; \ - TSB_MEMBAR; \ - stx TAG, [TSB + 0x00]; +#define KTSB_WRITE(TSB, TTE, TAG) \ + add TSB, 0x8, TSB; \ + stxa TTE, [TSB] ASI_N; \ + sub TSB, 0x8, TSB; \ + TSB_MEMBAR; \ + stxa TAG, [TSB] ASI_N; /* Do a kernel page table walk. Leaves physical PTE pointer in * REG1. Jumps to FAIL_LABEL on early page table walk termination. @@ -143,6 +236,15 @@ add REG1, (3 * 8), REG1; \ 99: + /* We use a 32K TSB for the whole kernel, this allows to + * handle about 16MB of modules and vmalloc mappings without + * incurring many hash conflicts. + */ +#define KERNEL_TSB_SIZE_BYTES (32 * 1024) +#define KERNEL_TSB_NENTRIES \ + (KERNEL_TSB_SIZE_BYTES / 16) +#define KERNEL_TSB4M_NENTRIES 4096 + /* Do a kernel TSB lookup at tl>0 on VADDR+TAG, branch to OK_LABEL * on TSB hit. REG1, REG2, REG3, and REG4 are used as temporaries * and the found TTE will be left in REG1. REG3 and REG4 must @@ -150,17 +252,32 @@ * * VADDR and TAG will be preserved and not clobbered by this macro. */ - /* XXX non-8K base page size support... */ #define KERN_TSB_LOOKUP_TL1(VADDR, TAG, REG1, REG2, REG3, REG4, OK_LABEL) \ sethi %hi(swapper_tsb), REG1; \ or REG1, %lo(swapper_tsb), REG1; \ - srlx VADDR, 13, REG2; \ - and REG2, (512 - 1), REG2; \ + srlx VADDR, PAGE_SHIFT, REG2; \ + and REG2, (KERNEL_TSB_NENTRIES - 1), REG2; \ + sllx REG2, 4, REG2; \ + add REG1, REG2, REG2; \ + KTSB_LOAD_QUAD(REG2, REG3); \ + cmp REG3, TAG; \ + be,a,pt %xcc, OK_LABEL; \ + mov REG4, REG1; + +#ifndef CONFIG_DEBUG_PAGEALLOC + /* This version uses a trick, the TAG is already (VADDR >> 22) so + * we can make use of that for the index computation. + */ +#define KERN_TSB4M_LOOKUP_TL1(TAG, REG1, REG2, REG3, REG4, OK_LABEL) \ + sethi %hi(swapper_4m_tsb), REG1; \ + or REG1, %lo(swapper_4m_tsb), REG1; \ + and TAG, (KERNEL_TSB4M_NENTRIES - 1), REG2; \ sllx REG2, 4, REG2; \ add REG1, REG2, REG2; \ - ldda [REG2] ASI_NUCLEUS_QUAD_LDD, REG3; \ + KTSB_LOAD_QUAD(REG2, REG3); \ cmp REG3, TAG; \ be,a,pt %xcc, OK_LABEL; \ mov REG4, REG1; +#endif #endif /* !(_SPARC64_TSB_H) */