ALSA: usb-audio: add support for Akai MPD16
[safe/jmp/linux-2.6] / arch / mips / kernel / vpe.c
index 6bf42ba..2bd2151 100644 (file)
@@ -13,7 +13,6 @@
  *  You should have received a copy of the GNU General Public License along
  *  with this program; if not, write to the Free Software Foundation, Inc.,
  *  59 Temple Place - Suite 330, Boston MA 02111-1307, USA.
- *
  */
 
 /*
  *
  * To load and run, simply cat a SP 'program file' to /dev/vpe1.
  * i.e cat spapp >/dev/vpe1.
- *
- * You'll need to have the following device files.
- * mknod /dev/vpe0 c 63 0
- * mknod /dev/vpe1 c 63 1
  */
-
 #include <linux/kernel.h>
+#include <linux/device.h>
 #include <linux/module.h>
 #include <linux/fs.h>
 #include <linux/init.h>
 #include <linux/poll.h>
 #include <linux/bootmem.h>
 #include <asm/mipsregs.h>
+#include <asm/mipsmtregs.h>
 #include <asm/cacheflush.h>
 #include <asm/atomic.h>
 #include <asm/cpu.h>
+#include <asm/mips_mt.h>
 #include <asm/processor.h>
 #include <asm/system.h>
+#include <asm/vpe.h>
+#include <asm/kspd.h>
 
 typedef void *vpe_handle;
 
-// defined here because the kernel module loader doesn't have
-// anything to do with it.
-#define SHN_MIPS_SCOMMON 0xff03
-
 #ifndef ARCH_SHF_SMALL
 #define ARCH_SHF_SMALL 0
 #endif
@@ -68,11 +63,18 @@ typedef void *vpe_handle;
 /* If this is set, the section belongs in the init part of the module */
 #define INIT_OFFSET_MASK (1UL << (BITS_PER_LONG-1))
 
-// temp number,
-#define VPE_MAJOR 63
-
+/*
+ * The number of TCs and VPEs physically available on the core
+ */
+static int hw_tcs, hw_vpes;
 static char module_name[] = "vpe";
-static int major = 0;
+static int major;
+static const int minor = 1;    /* fixed for now  */
+
+#ifdef CONFIG_MIPS_APSP_KSPD
+static struct kspd_notifications kspd_events;
+static int kspd_events_reqd;
+#endif
 
 /* grab the likely amount of memory we will need. */
 #ifdef CONFIG_MIPS_VPE_LOADER_TOM
@@ -82,7 +84,10 @@ static int major = 0;
 #define P_SIZE (256 * 1024)
 #endif
 
+extern unsigned long physical_memsize;
+
 #define MAX_VPES 16
+#define VPE_PATH_MAX 256
 
 enum vpe_state {
        VPE_STATE_UNUSED = 0,
@@ -97,22 +102,7 @@ enum tc_state {
        TC_STATE_DYNAMIC
 };
 
-struct vpe;
-typedef struct tc {
-       enum tc_state state;
-       int index;
-
-       /* parent VPE */
-       struct vpe *pvpe;
-
-       /* The list of TC's with this VPE */
-       struct list_head tc;
-
-       /* The global list of tc's */
-       struct list_head list;
-} tc_t;
-
-typedef struct vpe {
+struct vpe {
        enum vpe_state state;
 
        /* (device) minor associated with this vpe */
@@ -120,9 +110,11 @@ typedef struct vpe {
 
        /* elfloader stuff */
        void *load_addr;
-       u32 len;
+       unsigned long len;
        char *pbuffer;
-       u32 plen;
+       unsigned long plen;
+       unsigned int uid, gid;
+       char cwd[VPE_PATH_MAX];
 
        unsigned long __start;
 
@@ -134,104 +126,115 @@ typedef struct vpe {
 
        /* shared symbol address */
        void *shared_ptr;
-} vpe_t;
 
-struct vpecontrol_ {
-       /* Virtual processing elements */
-       struct list_head vpe_list;
+       /* the list of who wants to know when something major happens */
+       struct list_head notify;
 
-       /* Thread contexts */
-       struct list_head tc_list;
-} vpecontrol;
+       unsigned int ntcs;
+};
 
-static void release_progmem(void *ptr);
-static void dump_vpe(vpe_t * v);
-extern void save_gp_address(unsigned int secbase, unsigned int rel);
+struct tc {
+       enum tc_state state;
+       int index;
 
-/* get the vpe associated with this minor */
-struct vpe *get_vpe(int minor)
-{
-       struct vpe *v;
+       struct vpe *pvpe;       /* parent VPE */
+       struct list_head tc;    /* The list of TC's with this VPE */
+       struct list_head list;  /* The global list of tc's */
+};
 
-       list_for_each_entry(v, &vpecontrol.vpe_list, list) {
-               if (v->minor == minor)
-                       return v;
-       }
+struct {
+       spinlock_t vpe_list_lock;
+       struct list_head vpe_list;      /* Virtual processing elements */
+       spinlock_t tc_list_lock;
+       struct list_head tc_list;       /* Thread contexts */
+} vpecontrol = {
+       .vpe_list_lock  = SPIN_LOCK_UNLOCKED,
+       .vpe_list       = LIST_HEAD_INIT(vpecontrol.vpe_list),
+       .tc_list_lock   = SPIN_LOCK_UNLOCKED,
+       .tc_list        = LIST_HEAD_INIT(vpecontrol.tc_list)
+};
 
-       printk(KERN_DEBUG "VPE: get_vpe minor %d not found\n", minor);
-       return NULL;
-}
+static void release_progmem(void *ptr);
 
 /* get the vpe associated with this minor */
-struct tc *get_tc(int index)
+static struct vpe *get_vpe(int minor)
 {
-       struct tc *t;
+       struct vpe *res, *v;
 
-       list_for_each_entry(t, &vpecontrol.tc_list, list) {
-               if (t->index == index)
-                       return t;
-       }
+       if (!cpu_has_mipsmt)
+               return NULL;
 
-       printk(KERN_DEBUG "VPE: get_tc index %d not found\n", index);
+       res = NULL;
+       spin_lock(&vpecontrol.vpe_list_lock);
+       list_for_each_entry(v, &vpecontrol.vpe_list, list) {
+               if (v->minor == minor) {
+                       res = v;
+                       break;
+               }
+       }
+       spin_unlock(&vpecontrol.vpe_list_lock);
 
-       return NULL;
+       return res;
 }
 
-struct tc *get_tc_unused(void)
+/* get the vpe associated with this minor */
+static struct tc *get_tc(int index)
 {
-       struct tc *t;
+       struct tc *res, *t;
 
+       res = NULL;
+       spin_lock(&vpecontrol.tc_list_lock);
        list_for_each_entry(t, &vpecontrol.tc_list, list) {
-               if (t->state == TC_STATE_UNUSED)
-                       return t;
+               if (t->index == index) {
+                       res = t;
+                       break;
+               }
        }
-
-       printk(KERN_DEBUG "VPE: All TC's are in use\n");
+       spin_unlock(&vpecontrol.tc_list_lock);
 
        return NULL;
 }
 
 /* allocate a vpe and associate it with this minor (or index) */
-struct vpe *alloc_vpe(int minor)
+static struct vpe *alloc_vpe(int minor)
 {
        struct vpe *v;
 
-       if ((v = kmalloc(sizeof(struct vpe), GFP_KERNEL)) == NULL) {
-               printk(KERN_WARNING "VPE: alloc_vpe no mem\n");
+       if ((v = kzalloc(sizeof(struct vpe), GFP_KERNEL)) == NULL)
                return NULL;
-       }
-
-       memset(v, 0, sizeof(struct vpe));
 
        INIT_LIST_HEAD(&v->tc);
+       spin_lock(&vpecontrol.vpe_list_lock);
        list_add_tail(&v->list, &vpecontrol.vpe_list);
+       spin_unlock(&vpecontrol.vpe_list_lock);
 
+       INIT_LIST_HEAD(&v->notify);
        v->minor = minor;
+
        return v;
 }
 
 /* allocate a tc. At startup only tc0 is running, all other can be halted. */
-struct tc *alloc_tc(int index)
+static struct tc *alloc_tc(int index)
 {
-       struct tc *t;
+       struct tc *tc;
 
-       if ((t = kmalloc(sizeof(struct tc), GFP_KERNEL)) == NULL) {
-               printk(KERN_WARNING "VPE: alloc_tc no mem\n");
-               return NULL;
-       }
+       if ((tc = kzalloc(sizeof(struct tc), GFP_KERNEL)) == NULL)
+               goto out;
 
-       memset(t, 0, sizeof(struct tc));
+       INIT_LIST_HEAD(&tc->tc);
+       tc->index = index;
 
-       INIT_LIST_HEAD(&t->tc);
-       list_add_tail(&t->list, &vpecontrol.tc_list);
+       spin_lock(&vpecontrol.tc_list_lock);
+       list_add_tail(&tc->list, &vpecontrol.tc_list);
+       spin_unlock(&vpecontrol.tc_list_lock);
 
-       t->index = index;
-
-       return t;
+out:
+       return tc;
 }
 
 /* clean up and free everything */
-void release_vpe(struct vpe *v)
+static void release_vpe(struct vpe *v)
 {
        list_del(&v->list);
        if (v->load_addr)
@@ -239,7 +242,7 @@ void release_vpe(struct vpe *v)
        kfree(v);
 }
 
-void dump_mtregs(void)
+static void __maybe_unused dump_mtregs(void)
 {
        unsigned long val;
 
@@ -247,32 +250,36 @@ void dump_mtregs(void)
        printk("config3 0x%lx MT %ld\n", val,
               (val & CONFIG3_MT) >> CONFIG3_MT_SHIFT);
 
-       val = read_c0_mvpconf0();
-       printk("mvpconf0 0x%lx, PVPE %ld PTC %ld M %ld\n", val,
-              (val & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT,
-              val & MVPCONF0_PTC, (val & MVPCONF0_M) >> MVPCONF0_M_SHIFT);
-
        val = read_c0_mvpcontrol();
        printk("MVPControl 0x%lx, STLB %ld VPC %ld EVP %ld\n", val,
               (val & MVPCONTROL_STLB) >> MVPCONTROL_STLB_SHIFT,
               (val & MVPCONTROL_VPC) >> MVPCONTROL_VPC_SHIFT,
               (val & MVPCONTROL_EVP));
 
-       val = read_c0_vpeconf0();
-       printk("VPEConf0 0x%lx MVP %ld\n", val,
-              (val & VPECONF0_MVP) >> VPECONF0_MVP_SHIFT);
+       val = read_c0_mvpconf0();
+       printk("mvpconf0 0x%lx, PVPE %ld PTC %ld M %ld\n", val,
+              (val & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT,
+              val & MVPCONF0_PTC, (val & MVPCONF0_M) >> MVPCONF0_M_SHIFT);
 }
 
 /* Find some VPE program space  */
-static void *alloc_progmem(u32 len)
+static void *alloc_progmem(unsigned long len)
 {
+       void *addr;
+
 #ifdef CONFIG_MIPS_VPE_LOADER_TOM
-       /* this means you must tell linux to use less memory than you physically have */
-       return (void *)((max_pfn * PAGE_SIZE) + KSEG0);
+       /*
+        * This means you must tell Linux to use less memory than you
+        * physically have, for example by passing a mem= boot argument.
+        */
+       addr = pfn_to_kaddr(max_low_pfn);
+       memset(addr, 0, len);
 #else
-       // simple grab some mem for now
-       return kmalloc(len, GFP_KERNEL);
+       /* simple grab some mem for now */
+       addr = kzalloc(len, GFP_KERNEL);
 #endif
+
+       return addr;
 }
 
 static void release_progmem(void *ptr)
@@ -322,7 +329,8 @@ static void layout_sections(struct module *mod, const Elf_Ehdr * hdr,
                            || (s->sh_flags & masks[m][1])
                            || s->sh_entsize != ~0UL)
                                continue;
-                       s->sh_entsize = get_offset(&mod->core_size, s);
+                       s->sh_entsize =
+                               get_offset((unsigned long *)&mod->core_size, s);
                }
 
                if (m == 0)
@@ -365,9 +373,9 @@ static int apply_r_mips_gprel16(struct module *me, uint32_t *location,
        }
 
        if( (rel > 32768) || (rel < -32768) ) {
-               printk(KERN_ERR
-                      "apply_r_mips_gprel16: relative address out of range 0x%x %d\n",
-                      rel, rel);
+               printk(KERN_DEBUG "VPE loader: apply_r_mips_gprel16: "
+                      "relative address 0x%x out of range of gp register\n",
+                      rel);
                return -ENOEXEC;
        }
 
@@ -385,8 +393,8 @@ static int apply_r_mips_pc16(struct module *me, uint32_t *location,
        rel -= 1;               // and one instruction less due to the branch delay slot.
 
        if( (rel > 32768) || (rel < -32768) ) {
-               printk(KERN_ERR
-                      "apply_r_mips_pc16: relative address out of range 0x%x\n", rel);
+               printk(KERN_DEBUG "VPE loader: "
+                      "apply_r_mips_pc16: relative address out of range 0x%x\n", rel);
                return -ENOEXEC;
        }
 
@@ -407,20 +415,22 @@ static int apply_r_mips_26(struct module *me, uint32_t *location,
                           Elf32_Addr v)
 {
        if (v % 4) {
-               printk(KERN_ERR "module %s: dangerous relocation mod4\n", me->name);
+               printk(KERN_DEBUG "VPE loader: apply_r_mips_26 "
+                      " unaligned relocation\n");
                return -ENOEXEC;
        }
 
-/* Not desperately convinced this is a good check of an overflow condition
-   anyway. But it gets in the way of handling undefined weak symbols which
-   we want to set to zero.
-   if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000)) {
-   printk(KERN_ERR
-   "module %s: relocation overflow\n",
-   me->name);
-   return -ENOEXEC;
-   }
-*/
+/*
+ * Not desperately convinced this is a good check of an overflow condition
+ * anyway. But it gets in the way of handling undefined weak symbols which
+ * we want to set to zero.
+ * if ((v & 0xf0000000) != (((unsigned long)location + 4) & 0xf0000000)) {
+ * printk(KERN_ERR
+ * "module %s: relocation overflow\n",
+ * me->name);
+ * return -ENOEXEC;
+ * }
+ */
 
        *location = (*location & ~0x03ffffff) |
                ((*location + (v >> 2)) & 0x03ffffff);
@@ -454,27 +464,27 @@ static int apply_r_mips_lo16(struct module *me, uint32_t *location,
 {
        unsigned long insnlo = *location;
        Elf32_Addr val, vallo;
+       struct mips_hi16 *l, *next;
 
        /* Sign extend the addend we extract from the lo insn.  */
        vallo = ((insnlo & 0xffff) ^ 0x8000) - 0x8000;
 
        if (mips_hi16_list != NULL) {
-               struct mips_hi16 *l;
 
                l = mips_hi16_list;
                while (l != NULL) {
-                       struct mips_hi16 *next;
                        unsigned long insn;
 
                        /*
                         * The value for the HI16 had best be the same.
                         */
-                       if (v != l->value) {
-                               printk("%d != %d\n", v, l->value);
-                               goto out_danger;
+                       if (v != l->value) {
+                               printk(KERN_DEBUG "VPE loader: "
+                                      "apply_r_mips_lo16/hi16: \t"
+                                      "inconsistent value information\n");
+                               goto out_free;
                        }
 
-
                        /*
                         * Do the HI16 relocation.  Note that we actually don't
                         * need to know anything about the LO16 itself, except
@@ -511,8 +521,13 @@ static int apply_r_mips_lo16(struct module *me, uint32_t *location,
 
        return 0;
 
-out_danger:
-       printk(KERN_ERR "module %s: dangerous " "relocation\n", me->name);
+out_free:
+       while (l != NULL) {
+               next = l->next;
+               kfree(l);
+               l = next;
+       }
+       mips_hi16_list = NULL;
 
        return -ENOEXEC;
 }
@@ -528,8 +543,17 @@ static int (*reloc_handlers[]) (struct module *me, uint32_t *location,
        [R_MIPS_PC16] = apply_r_mips_pc16
 };
 
+static char *rstrs[] = {
+       [R_MIPS_NONE]   = "MIPS_NONE",
+       [R_MIPS_32]     = "MIPS_32",
+       [R_MIPS_26]     = "MIPS_26",
+       [R_MIPS_HI16]   = "MIPS_HI16",
+       [R_MIPS_LO16]   = "MIPS_LO16",
+       [R_MIPS_GPREL16] = "MIPS_GPREL16",
+       [R_MIPS_PC16] = "MIPS_PC16"
+};
 
-int apply_relocations(Elf32_Shdr *sechdrs,
+static int apply_relocations(Elf32_Shdr *sechdrs,
                      const char *strtab,
                      unsigned int symindex,
                      unsigned int relsec,
@@ -562,21 +586,19 @@ int apply_relocations(Elf32_Shdr *sechdrs,
 
                res = reloc_handlers[ELF32_R_TYPE(r_info)](me, location, v);
                if( res ) {
-                       printk(KERN_DEBUG
-                              "relocation error 0x%x sym refer <%s> value 0x%x "
-                              "type 0x%x r_info 0x%x\n",
-                              (unsigned int)location, strtab + sym->st_name, v,
-                              r_info, ELF32_R_TYPE(r_info));
-               }
-
-               if (res)
+                       char *r = rstrs[ELF32_R_TYPE(r_info)];
+                       printk(KERN_WARNING "VPE loader: .text+0x%x "
+                              "relocation type %s for symbol \"%s\" failed\n",
+                              rel[i].r_offset, r ? r : "UNKNOWN",
+                              strtab + sym->st_name);
                        return res;
+               }
        }
 
        return 0;
 }
 
-void save_gp_address(unsigned int secbase, unsigned int rel)
+static inline void save_gp_address(unsigned int secbase, unsigned int rel)
 {
        gp_addr = secbase + rel;
        gp_offs = gp_addr - (secbase & 0xffff0000);
@@ -586,7 +608,7 @@ void save_gp_address(unsigned int secbase, unsigned int rel)
 
 
 /* Change all symbols so that sh_value encodes the pointer directly. */
-static int simplify_symbols(Elf_Shdr * sechdrs,
+static void simplify_symbols(Elf_Shdr * sechdrs,
                            unsigned int symindex,
                            const char *strtab,
                            const char *secstrings,
@@ -595,18 +617,21 @@ static int simplify_symbols(Elf_Shdr * sechdrs,
        Elf_Sym *sym = (void *)sechdrs[symindex].sh_addr;
        unsigned long secbase, bssbase = 0;
        unsigned int i, n = sechdrs[symindex].sh_size / sizeof(Elf_Sym);
-       int ret = 0, size;
+       int size;
 
        /* find the .bss section for COMMON symbols */
        for (i = 0; i < nsecs; i++) {
-               if (strncmp(secstrings + sechdrs[i].sh_name, ".bss", 4) == 0)
+               if (strncmp(secstrings + sechdrs[i].sh_name, ".bss", 4) == 0) {
                        bssbase = sechdrs[i].sh_addr;
+                       break;
+               }
        }
 
        for (i = 1; i < n; i++) {
                switch (sym[i].st_shndx) {
                case SHN_COMMON:
-                       /* Allocate space for the symbol in the .bss section. st_value is currently size.
+                       /* Allocate space for the symbol in the .bss section.
+                          st_value is currently size.
                           We want it to have the address of the symbol. */
 
                        size = sym[i].st_value;
@@ -624,11 +649,9 @@ static int simplify_symbols(Elf_Shdr * sechdrs,
                        break;
 
                case SHN_MIPS_SCOMMON:
-
-                       printk(KERN_DEBUG
-                              "simplify_symbols: ignoring SHN_MIPS_SCOMMON symbol <%s> st_shndx %d\n",
-                              strtab + sym[i].st_name, sym[i].st_shndx);
-
+                       printk(KERN_DEBUG "simplify_symbols: ignoring SHN_MIPS_SCOMMON "
+                              "symbol <%s> st_shndx %d\n", strtab + sym[i].st_name,
+                              sym[i].st_shndx);
                        // .sbss section
                        break;
 
@@ -642,10 +665,7 @@ static int simplify_symbols(Elf_Shdr * sechdrs,
                        sym[i].st_value += secbase;
                        break;
                }
-
        }
-
-       return ret;
 }
 
 #ifdef DEBUG_ELFLOADER
@@ -663,115 +683,137 @@ static void dump_elfsymbols(Elf_Shdr * sechdrs, unsigned int symindex,
 }
 #endif
 
-static void dump_tc(struct tc *t)
-{
-       printk(KERN_WARNING "VPE: TC index %d TCStatus 0x%lx halt 0x%lx\n",
-              t->index, read_tc_c0_tcstatus(), read_tc_c0_tchalt());
-       printk(KERN_WARNING "VPE: tcrestart 0x%lx\n", read_tc_c0_tcrestart());
-}
-
-static void dump_tclist(void)
-{
-       struct tc *t;
-
-       list_for_each_entry(t, &vpecontrol.tc_list, list) {
-               dump_tc(t);
-       }
-}
-
 /* We are prepared so configure and start the VPE... */
-int vpe_run(vpe_t * v)
+static int vpe_run(struct vpe * v)
 {
-       unsigned long val;
+       unsigned long flags, val, dmt_flag;
+       struct vpe_notifications *n;
+       unsigned int vpeflags;
        struct tc *t;
 
        /* check we are the Master VPE */
+       local_irq_save(flags);
        val = read_c0_vpeconf0();
        if (!(val & VPECONF0_MVP)) {
                printk(KERN_WARNING
-                      "VPE: only Master VPE's are allowed to configure MT\n");
+                      "VPE loader: only Master VPE's are allowed to configure MT\n");
+               local_irq_restore(flags);
+
                return -1;
        }
 
-       /* disable MT (using dvpe) */
-       dvpe();
-
-       /* Put MVPE's into 'configuration state' */
-       write_c0_mvpcontrol(read_c0_mvpcontrol() | MVPCONTROL_VPC);
+       dmt_flag = dmt();
+       vpeflags = dvpe();
 
        if (!list_empty(&v->tc)) {
                if ((t = list_entry(v->tc.next, struct tc, tc)) == NULL) {
-                       printk(KERN_WARNING "VPE: TC %d is already in use.\n",
-                              t->index);
+                       evpe(vpeflags);
+                       emt(dmt_flag);
+                       local_irq_restore(flags);
+
+                       printk(KERN_WARNING
+                              "VPE loader: TC %d is already in use.\n",
+                               t->index);
                        return -ENOEXEC;
                }
        } else {
-               printk(KERN_WARNING "VPE: No TC's associated with VPE %d\n",
+               evpe(vpeflags);
+               emt(dmt_flag);
+               local_irq_restore(flags);
+
+               printk(KERN_WARNING
+                      "VPE loader: No TC's associated with VPE %d\n",
                       v->minor);
+
                return -ENOEXEC;
        }
 
-       settc(t->index);
+       /* Put MVPE's into 'configuration state' */
+       set_c0_mvpcontrol(MVPCONTROL_VPC);
 
-       val = read_vpe_c0_vpeconf0();
+       settc(t->index);
 
        /* should check it is halted, and not activated */
        if ((read_tc_c0_tcstatus() & TCSTATUS_A) || !(read_tc_c0_tchalt() & TCHALT_H)) {
-               printk(KERN_WARNING "VPE: TC %d is already doing something!\n",
+               evpe(vpeflags);
+               emt(dmt_flag);
+               local_irq_restore(flags);
+
+               printk(KERN_WARNING "VPE loader: TC %d is already active!\n",
                       t->index);
 
-               dump_tclist();
                return -ENOEXEC;
        }
 
        /* Write the address we want it to start running from in the TCPC register. */
        write_tc_c0_tcrestart((unsigned long)v->__start);
-
-       /* write the sivc_info address to tccontext */
        write_tc_c0_tccontext((unsigned long)0);
 
-       /* Set up the XTC bit in vpeconf0 to point at our tc */
-       write_vpe_c0_vpeconf0(read_vpe_c0_vpeconf0() | (t->index << VPECONF0_XTC_SHIFT));
-
-       /* mark the TC as activated, not interrupt exempt and not dynamically allocatable */
+       /*
+        * Mark the TC as activated, not interrupt exempt and not dynamically
+        * allocatable
+        */
        val = read_tc_c0_tcstatus();
        val = (val & ~(TCSTATUS_DA | TCSTATUS_IXMT)) | TCSTATUS_A;
        write_tc_c0_tcstatus(val);
 
        write_tc_c0_tchalt(read_tc_c0_tchalt() & ~TCHALT_H);
 
-       /* set up VPE1 */
-       write_vpe_c0_vpecontrol(read_vpe_c0_vpecontrol() & ~VPECONTROL_TE);     // no multiple TC's
-       write_vpe_c0_vpeconf0(read_vpe_c0_vpeconf0() | VPECONF0_VPA);   // enable this VPE
-
        /*
         * The sde-kit passes 'memsize' to __start in $a3, so set something
-        * here...
-        * Or set $a3 (register 7) to zero and define DFLT_STACK_SIZE and
+        * here...  Or set $a3 to zero and define DFLT_STACK_SIZE and
         * DFLT_HEAP_SIZE when you compile your program
         */
+       mttgpr(6, v->ntcs);
+       mttgpr(7, physical_memsize);
+
+       /* set up VPE1 */
+       /*
+        * bind the TC to VPE 1 as late as possible so we only have the final
+        * VPE registers to set up, and so an EJTAG probe can trigger on it
+        */
+       write_tc_c0_tcbind((read_tc_c0_tcbind() & ~TCBIND_CURVPE) | 1);
+
+       write_vpe_c0_vpeconf0(read_vpe_c0_vpeconf0() & ~(VPECONF0_VPA));
+
+       back_to_back_c0_hazard();
 
-       mttgpr(7, 0);
+       /* Set up the XTC bit in vpeconf0 to point at our tc */
+       write_vpe_c0_vpeconf0( (read_vpe_c0_vpeconf0() & ~(VPECONF0_XTC))
+                             | (t->index << VPECONF0_XTC_SHIFT));
+
+       back_to_back_c0_hazard();
 
-       /* set config to be the same as vpe0, particularly kseg0 coherency alg */
-       write_vpe_c0_config(read_c0_config());
+       /* enable this VPE */
+       write_vpe_c0_vpeconf0(read_vpe_c0_vpeconf0() | VPECONF0_VPA);
 
        /* clear out any left overs from a previous program */
+       write_vpe_c0_status(0);
        write_vpe_c0_cause(0);
 
        /* take system out of configuration state */
-       write_c0_mvpcontrol(read_c0_mvpcontrol() & ~MVPCONTROL_VPC);
-
-       /* clear interrupts enabled IE, ERL, EXL, and KSU from c0 status */
-       write_vpe_c0_status(read_vpe_c0_status() & ~(ST0_ERL | ST0_KSU | ST0_IE | ST0_EXL));
+       clear_c0_mvpcontrol(MVPCONTROL_VPC);
 
-       /* set it running */
+       /*
+        * SMTC/SMVP kernels manage VPE enable independently,
+        * but uniprocessor kernels need to turn it on, even
+        * if that wasn't the pre-dvpe() state.
+        */
+#ifdef CONFIG_SMP
+       evpe(vpeflags);
+#else
        evpe(EVPE_ENABLE);
+#endif
+       emt(dmt_flag);
+       local_irq_restore(flags);
+
+       list_for_each_entry(n, &v->notify, list)
+               n->start(minor);
 
        return 0;
 }
 
-static unsigned long find_vpe_symbols(vpe_t * v, Elf_Shdr * sechdrs,
+static int find_vpe_symbols(struct vpe * v, Elf_Shdr * sechdrs,
                                      unsigned int symindex, const char *strtab,
                                      struct module *mod)
 {
@@ -788,41 +830,51 @@ static unsigned long find_vpe_symbols(vpe_t * v, Elf_Shdr * sechdrs,
                }
        }
 
+       if ( (v->__start == 0) || (v->shared_ptr == NULL))
+               return -1;
+
        return 0;
 }
 
-/* Allocates a VPE with some program code space(the load address), copies the contents
-   of the program (p)buffer performing relocatations/etc, free's it when finished.
-*/
-int vpe_elfload(vpe_t * v)
+/*
+ * Allocates a VPE with some program code space(the load address), copies the
+ * contents of the program (p)buffer performing relocatations/etc, free's it
+ * when finished.
+ */
+static int vpe_elfload(struct vpe * v)
 {
        Elf_Ehdr *hdr;
        Elf_Shdr *sechdrs;
        long err = 0;
        char *secstrings, *strtab = NULL;
-       unsigned int len, i, symindex = 0, strindex = 0;
-
+       unsigned int len, i, symindex = 0, strindex = 0, relocate = 0;
        struct module mod;      // so we can re-use the relocations code
 
        memset(&mod, 0, sizeof(struct module));
-       strcpy(mod.name, "VPE dummy prog module");
+       strcpy(mod.name, "VPE loader");
 
        hdr = (Elf_Ehdr *) v->pbuffer;
        len = v->plen;
 
        /* Sanity checks against insmoding binaries or wrong arch,
           weird elf version */
-       if (memcmp(hdr->e_ident, ELFMAG, 4) != 0
-           || hdr->e_type != ET_REL || !elf_check_arch(hdr)
+       if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) != 0
+           || (hdr->e_type != ET_REL && hdr->e_type != ET_EXEC)
+           || !elf_check_arch(hdr)
            || hdr->e_shentsize != sizeof(*sechdrs)) {
                printk(KERN_WARNING
-                      "VPE program, wrong arch or weird elf version\n");
+                      "VPE loader: program wrong arch or weird elf version\n");
 
                return -ENOEXEC;
        }
 
+       if (hdr->e_type == ET_REL)
+               relocate = 1;
+
        if (len < hdr->e_shoff + hdr->e_shnum * sizeof(Elf_Shdr)) {
-               printk(KERN_ERR "VPE program length %u truncated\n", len);
+               printk(KERN_ERR "VPE loader: program length %u truncated\n",
+                      len);
+
                return -ENOEXEC;
        }
 
@@ -834,82 +886,106 @@ int vpe_elfload(vpe_t * v)
        /* And these should exist, but gcc whinges if we don't init them */
        symindex = strindex = 0;
 
-       for (i = 1; i < hdr->e_shnum; i++) {
-
-               if (sechdrs[i].sh_type != SHT_NOBITS
-                   && len < sechdrs[i].sh_offset + sechdrs[i].sh_size) {
-                       printk(KERN_ERR "VPE program length %u truncated\n",
-                              len);
-                       return -ENOEXEC;
-               }
+       if (relocate) {
+               for (i = 1; i < hdr->e_shnum; i++) {
+                       if (sechdrs[i].sh_type != SHT_NOBITS
+                           && len < sechdrs[i].sh_offset + sechdrs[i].sh_size) {
+                               printk(KERN_ERR "VPE program length %u truncated\n",
+                                      len);
+                               return -ENOEXEC;
+                       }
 
-               /* Mark all sections sh_addr with their address in the
-                  temporary image. */
-               sechdrs[i].sh_addr = (size_t) hdr + sechdrs[i].sh_offset;
+                       /* Mark all sections sh_addr with their address in the
+                          temporary image. */
+                       sechdrs[i].sh_addr = (size_t) hdr + sechdrs[i].sh_offset;
 
-               /* Internal symbols and strings. */
-               if (sechdrs[i].sh_type == SHT_SYMTAB) {
-                       symindex = i;
-                       strindex = sechdrs[i].sh_link;
-                       strtab = (char *)hdr + sechdrs[strindex].sh_offset;
+                       /* Internal symbols and strings. */
+                       if (sechdrs[i].sh_type == SHT_SYMTAB) {
+                               symindex = i;
+                               strindex = sechdrs[i].sh_link;
+                               strtab = (char *)hdr + sechdrs[strindex].sh_offset;
+                       }
                }
+               layout_sections(&mod, hdr, sechdrs, secstrings);
        }
 
-       layout_sections(&mod, hdr, sechdrs, secstrings);
-
        v->load_addr = alloc_progmem(mod.core_size);
-       memset(v->load_addr, 0, mod.core_size);
+       if (!v->load_addr)
+               return -ENOMEM;
 
-       printk("VPE elf_loader: loading to %p\n", v->load_addr);
+       pr_info("VPE loader: loading to %p\n", v->load_addr);
 
-       for (i = 0; i < hdr->e_shnum; i++) {
-               void *dest;
+       if (relocate) {
+               for (i = 0; i < hdr->e_shnum; i++) {
+                       void *dest;
 
-               if (!(sechdrs[i].sh_flags & SHF_ALLOC))
-                       continue;
+                       if (!(sechdrs[i].sh_flags & SHF_ALLOC))
+                               continue;
 
-               dest = v->load_addr + sechdrs[i].sh_entsize;
+                       dest = v->load_addr + sechdrs[i].sh_entsize;
 
-               if (sechdrs[i].sh_type != SHT_NOBITS)
-                       memcpy(dest, (void *)sechdrs[i].sh_addr,
-                              sechdrs[i].sh_size);
-               /* Update sh_addr to point to copy in image. */
-               sechdrs[i].sh_addr = (unsigned long)dest;
-       }
+                       if (sechdrs[i].sh_type != SHT_NOBITS)
+                               memcpy(dest, (void *)sechdrs[i].sh_addr,
+                                      sechdrs[i].sh_size);
+                       /* Update sh_addr to point to copy in image. */
+                       sechdrs[i].sh_addr = (unsigned long)dest;
 
-       /* Fix up syms, so that st_value is a pointer to location. */
-       err =
-               simplify_symbols(sechdrs, symindex, strtab, secstrings,
-                                hdr->e_shnum, &mod);
-       if (err < 0) {
-               printk(KERN_WARNING "VPE: unable to simplify symbols\n");
-               goto cleanup;
-       }
+                       printk(KERN_DEBUG " section sh_name %s sh_addr 0x%x\n",
+                              secstrings + sechdrs[i].sh_name, sechdrs[i].sh_addr);
+               }
 
-       /* Now do relocations. */
-       for (i = 1; i < hdr->e_shnum; i++) {
-               const char *strtab = (char *)sechdrs[strindex].sh_addr;
-               unsigned int info = sechdrs[i].sh_info;
-
-               /* Not a valid relocation section? */
-               if (info >= hdr->e_shnum)
-                       continue;
-
-               /* Don't bother with non-allocated sections */
-               if (!(sechdrs[info].sh_flags & SHF_ALLOC))
-                       continue;
-
-               if (sechdrs[i].sh_type == SHT_REL)
-                       err =
-                               apply_relocations(sechdrs, strtab, symindex, i, &mod);
-               else if (sechdrs[i].sh_type == SHT_RELA)
-                       err = apply_relocate_add(sechdrs, strtab, symindex, i,
-                                                &mod);
-               if (err < 0) {
-                       printk(KERN_WARNING
-                              "vpe_elfload: error in relocations err %ld\n",
-                              err);
-                       goto cleanup;
+               /* Fix up syms, so that st_value is a pointer to location. */
+               simplify_symbols(sechdrs, symindex, strtab, secstrings,
+                                hdr->e_shnum, &mod);
+
+               /* Now do relocations. */
+               for (i = 1; i < hdr->e_shnum; i++) {
+                       const char *strtab = (char *)sechdrs[strindex].sh_addr;
+                       unsigned int info = sechdrs[i].sh_info;
+
+                       /* Not a valid relocation section? */
+                       if (info >= hdr->e_shnum)
+                               continue;
+
+                       /* Don't bother with non-allocated sections */
+                       if (!(sechdrs[info].sh_flags & SHF_ALLOC))
+                               continue;
+
+                       if (sechdrs[i].sh_type == SHT_REL)
+                               err = apply_relocations(sechdrs, strtab, symindex, i,
+                                                       &mod);
+                       else if (sechdrs[i].sh_type == SHT_RELA)
+                               err = apply_relocate_add(sechdrs, strtab, symindex, i,
+                                                        &mod);
+                       if (err < 0)
+                               return err;
+
+               }
+       } else {
+               struct elf_phdr *phdr = (struct elf_phdr *) ((char *)hdr + hdr->e_phoff);
+
+               for (i = 0; i < hdr->e_phnum; i++) {
+                       if (phdr->p_type == PT_LOAD) {
+                               memcpy((void *)phdr->p_paddr,
+                                      (char *)hdr + phdr->p_offset,
+                                      phdr->p_filesz);
+                               memset((void *)phdr->p_paddr + phdr->p_filesz,
+                                      0, phdr->p_memsz - phdr->p_filesz);
+                   }
+                   phdr++;
+               }
+
+               for (i = 0; i < hdr->e_shnum; i++) {
+                       /* Internal symbols and strings. */
+                       if (sechdrs[i].sh_type == SHT_SYMTAB) {
+                               symindex = i;
+                               strindex = sechdrs[i].sh_link;
+                               strtab = (char *)hdr + sechdrs[strindex].sh_offset;
+
+                               /* mark the symtab's address for when we try to find the
+                                  magic symbols */
+                               sechdrs[i].sh_addr = (size_t) hdr + sechdrs[i].sh_offset;
+                       }
                }
        }
 
@@ -918,110 +994,161 @@ int vpe_elfload(vpe_t * v)
                           (unsigned long)v->load_addr + v->len);
 
        if ((find_vpe_symbols(v, sechdrs, symindex, strtab, &mod)) < 0) {
+               if (v->__start == 0) {
+                       printk(KERN_WARNING "VPE loader: program does not contain "
+                              "a __start symbol\n");
+                       return -ENOEXEC;
+               }
 
-               printk(KERN_WARNING
-                      "VPE: program doesn't contain __start or vpe_shared symbols\n");
-               err = -ENOEXEC;
+               if (v->shared_ptr == NULL)
+                       printk(KERN_WARNING "VPE loader: "
+                              "program does not contain vpe_shared symbol.\n"
+                              " Unable to use AMVP (AP/SP) facilities.\n");
        }
 
        printk(" elf loaded\n");
+       return 0;
+}
 
-cleanup:
-       return err;
+static void cleanup_tc(struct tc *tc)
+{
+       unsigned long flags;
+       unsigned int mtflags, vpflags;
+       int tmp;
+
+       local_irq_save(flags);
+       mtflags = dmt();
+       vpflags = dvpe();
+       /* Put MVPE's into 'configuration state' */
+       set_c0_mvpcontrol(MVPCONTROL_VPC);
+
+       settc(tc->index);
+       tmp = read_tc_c0_tcstatus();
+
+       /* mark not allocated and not dynamically allocatable */
+       tmp &= ~(TCSTATUS_A | TCSTATUS_DA);
+       tmp |= TCSTATUS_IXMT;   /* interrupt exempt */
+       write_tc_c0_tcstatus(tmp);
+
+       write_tc_c0_tchalt(TCHALT_H);
+       mips_ihb();
+
+       /* bind it to anything other than VPE1 */
+//     write_tc_c0_tcbind(read_tc_c0_tcbind() & ~TCBIND_CURVPE); // | TCBIND_CURVPE
+
+       clear_c0_mvpcontrol(MVPCONTROL_VPC);
+       evpe(vpflags);
+       emt(mtflags);
+       local_irq_restore(flags);
 }
 
-static void dump_vpe(vpe_t * v)
+static int getcwd(char *buff, int size)
 {
-       struct tc *t;
+       mm_segment_t old_fs;
+       int ret;
 
-       printk(KERN_DEBUG "VPEControl 0x%lx\n", read_vpe_c0_vpecontrol());
-       printk(KERN_DEBUG "VPEConf0 0x%lx\n", read_vpe_c0_vpeconf0());
+       old_fs = get_fs();
+       set_fs(KERNEL_DS);
 
-       list_for_each_entry(t, &vpecontrol.tc_list, list) {
-               dump_tc(t);
-       }
+       ret = sys_getcwd(buff, size);
+
+       set_fs(old_fs);
+
+       return ret;
 }
 
-/* checks for VPE is unused and gets ready to load program      */
+/* checks VPE is unused and gets ready to load program  */
 static int vpe_open(struct inode *inode, struct file *filp)
 {
-       int minor;
-       vpe_t *v;
+       enum vpe_state state;
+       struct vpe_notifications *not;
+       struct vpe *v;
+       int ret;
 
-       /* assume only 1 device at the mo. */
-       if ((minor = MINOR(inode->i_rdev)) != 1) {
-               printk(KERN_WARNING "VPE: only vpe1 is supported\n");
-               return -ENODEV;
-       }
+       if (minor != iminor(inode)) {
+               /* assume only 1 device at the moment. */
+               pr_warning("VPE loader: only vpe1 is supported\n");
 
-       if ((v = get_vpe(minor)) == NULL) {
-               printk(KERN_WARNING "VPE: unable to get vpe\n");
                return -ENODEV;
        }
 
-       if (v->state != VPE_STATE_UNUSED) {
-               unsigned long tmp;
-               struct tc *t;
+       if ((v = get_vpe(tclimit)) == NULL) {
+               pr_warning("VPE loader: unable to get vpe\n");
 
-               printk(KERN_WARNING "VPE: device %d already in use\n", minor);
+               return -ENODEV;
+       }
 
-               dvpe();
-               dump_vpe(v);
+       state = xchg(&v->state, VPE_STATE_INUSE);
+       if (state != VPE_STATE_UNUSED) {
+               printk(KERN_DEBUG "VPE loader: tc in use dumping regs\n");
 
-               printk(KERN_WARNING "VPE: re-initialising %d\n", minor);
+               list_for_each_entry(not, &v->notify, list) {
+                       not->stop(tclimit);
+               }
 
                release_progmem(v->load_addr);
-
-               t = get_tc(minor);
-               settc(minor);
-               tmp = read_tc_c0_tcstatus();
-
-               /* mark not allocated and not dynamically allocatable */
-               tmp &= ~(TCSTATUS_A | TCSTATUS_DA);
-               tmp |= TCSTATUS_IXMT;   /* interrupt exempt */
-               write_tc_c0_tcstatus(tmp);
-
-               write_tc_c0_tchalt(TCHALT_H);
-
+               cleanup_tc(get_tc(tclimit));
        }
 
-       // allocate it so when we get write ops we know it's expected.
-       v->state = VPE_STATE_INUSE;
-
        /* this of-course trashes what was there before... */
        v->pbuffer = vmalloc(P_SIZE);
        v->plen = P_SIZE;
        v->load_addr = NULL;
        v->len = 0;
 
+       v->uid = filp->f_cred->fsuid;
+       v->gid = filp->f_cred->fsgid;
+
+#ifdef CONFIG_MIPS_APSP_KSPD
+       /* get kspd to tell us when a syscall_exit happens */
+       if (!kspd_events_reqd) {
+               kspd_notify(&kspd_events);
+               kspd_events_reqd++;
+       }
+#endif
+
+       v->cwd[0] = 0;
+       ret = getcwd(v->cwd, VPE_PATH_MAX);
+       if (ret < 0)
+               printk(KERN_WARNING "VPE loader: open, getcwd returned %d\n", ret);
+
+       v->shared_ptr = NULL;
+       v->__start = 0;
+
        return 0;
 }
 
 static int vpe_release(struct inode *inode, struct file *filp)
 {
-       int minor, ret = 0;
-       vpe_t *v;
+       struct vpe *v;
        Elf_Ehdr *hdr;
+       int ret = 0;
 
-       minor = MINOR(inode->i_rdev);
-       if ((v = get_vpe(minor)) == NULL)
+       v = get_vpe(tclimit);
+       if (v == NULL)
                return -ENODEV;
 
-       // simple case of fire and forget, so tell the VPE to run...
-
        hdr = (Elf_Ehdr *) v->pbuffer;
-       if (memcmp(hdr->e_ident, ELFMAG, 4) == 0) {
-               if (vpe_elfload(v) >= 0)
+       if (memcmp(hdr->e_ident, ELFMAG, SELFMAG) == 0) {
+               if (vpe_elfload(v) >= 0) {
                        vpe_run(v);
-               else {
-                       printk(KERN_WARNING "VPE: ELF load failed.\n");
+               else {
+                       printk(KERN_WARNING "VPE loader: ELF load failed.\n");
                        ret = -ENOEXEC;
                }
        } else {
-               printk(KERN_WARNING "VPE: only elf files are supported\n");
+               printk(KERN_WARNING "VPE loader: only elf files are supported\n");
                ret = -ENOEXEC;
        }
 
+       /* It's good to be able to run the SP and if it chokes have a look at
+          the /dev/rt?. But if we reset the pointer to the shared struct we
+          lose what has happened. So perhaps if garbage is sent to the vpe
+          device, use it as a trigger for the reset. Hopefully a nice
+          executable will be along shortly. */
+       if (ret < 0)
+               v->shared_ptr = NULL;
+
        // cleanup any temp buffers
        if (v->pbuffer)
                vfree(v->pbuffer);
@@ -1032,36 +1159,36 @@ static int vpe_release(struct inode *inode, struct file *filp)
 static ssize_t vpe_write(struct file *file, const char __user * buffer,
                         size_t count, loff_t * ppos)
 {
-       int minor;
        size_t ret = count;
-       vpe_t *v;
+       struct vpe *v;
+
+       if (iminor(file->f_path.dentry->d_inode) != minor)
+               return -ENODEV;
 
-       minor = MINOR(file->f_dentry->d_inode->i_rdev);
-       if ((v = get_vpe(minor)) == NULL)
+       v = get_vpe(tclimit);
+       if (v == NULL)
                return -ENODEV;
 
        if (v->pbuffer == NULL) {
-               printk(KERN_ERR "vpe_write: no pbuffer\n");
+               printk(KERN_ERR "VPE loader: no buffer for program\n");
                return -ENOMEM;
        }
 
        if ((count + v->len) > v->plen) {
                printk(KERN_WARNING
-                      "VPE Loader: elf size too big. Perhaps strip uneeded symbols\n");
+                      "VPE loader: elf size too big. Perhaps strip uneeded symbols\n");
                return -ENOMEM;
        }
 
        count -= copy_from_user(v->pbuffer + v->len, buffer, count);
-       if (!count) {
-               printk("vpe_write: copy_to_user failed\n");
+       if (!count)
                return -EFAULT;
-       }
 
        v->len += count;
        return ret;
 }
 
-static struct file_operations vpe_fops = {
+static const struct file_operations vpe_fops = {
        .owner = THIS_MODULE,
        .open = vpe_open,
        .release = vpe_release,
@@ -1134,18 +1261,21 @@ int vpe_free(vpe_handle vpe)
        evpe_flags = dvpe();
 
        /* Put MVPE's into 'configuration state' */
-       write_c0_mvpcontrol(read_c0_mvpcontrol() | MVPCONTROL_VPC);
+       set_c0_mvpcontrol(MVPCONTROL_VPC);
 
        settc(t->index);
        write_vpe_c0_vpeconf0(read_vpe_c0_vpeconf0() & ~VPECONF0_VPA);
 
-       /* mark the TC unallocated and halt'ed */
-       write_tc_c0_tcstatus(read_tc_c0_tcstatus() & ~TCSTATUS_A);
+       /* halt the TC */
        write_tc_c0_tchalt(TCHALT_H);
+       mips_ihb();
+
+       /* mark the TC unallocated */
+       write_tc_c0_tcstatus(read_tc_c0_tcstatus() & ~TCSTATUS_A);
 
        v->state = VPE_STATE_UNUSED;
 
-       write_c0_mvpcontrol(read_c0_mvpcontrol() & ~MVPCONTROL_VPC);
+       clear_c0_mvpcontrol(MVPCONTROL_VPC);
        evpe(evpe_flags);
 
        return 0;
@@ -1157,64 +1287,240 @@ void *vpe_get_shared(int index)
 {
        struct vpe *v;
 
-       if ((v = get_vpe(index)) == NULL) {
-               printk(KERN_WARNING "vpe: invalid vpe index %d\n", index);
+       if ((v = get_vpe(index)) == NULL)
                return NULL;
-       }
 
        return v->shared_ptr;
 }
 
 EXPORT_SYMBOL(vpe_get_shared);
 
+int vpe_getuid(int index)
+{
+       struct vpe *v;
+
+       if ((v = get_vpe(index)) == NULL)
+               return -1;
+
+       return v->uid;
+}
+
+EXPORT_SYMBOL(vpe_getuid);
+
+int vpe_getgid(int index)
+{
+       struct vpe *v;
+
+       if ((v = get_vpe(index)) == NULL)
+               return -1;
+
+       return v->gid;
+}
+
+EXPORT_SYMBOL(vpe_getgid);
+
+int vpe_notify(int index, struct vpe_notifications *notify)
+{
+       struct vpe *v;
+
+       if ((v = get_vpe(index)) == NULL)
+               return -1;
+
+       list_add(&notify->list, &v->notify);
+       return 0;
+}
+
+EXPORT_SYMBOL(vpe_notify);
+
+char *vpe_getcwd(int index)
+{
+       struct vpe *v;
+
+       if ((v = get_vpe(index)) == NULL)
+               return NULL;
+
+       return v->cwd;
+}
+
+EXPORT_SYMBOL(vpe_getcwd);
+
+#ifdef CONFIG_MIPS_APSP_KSPD
+static void kspd_sp_exit( int sp_id)
+{
+       cleanup_tc(get_tc(sp_id));
+}
+#endif
+
+static ssize_t store_kill(struct device *dev, struct device_attribute *attr,
+                         const char *buf, size_t len)
+{
+       struct vpe *vpe = get_vpe(tclimit);
+       struct vpe_notifications *not;
+
+       list_for_each_entry(not, &vpe->notify, list) {
+               not->stop(tclimit);
+       }
+
+       release_progmem(vpe->load_addr);
+       cleanup_tc(get_tc(tclimit));
+       vpe_stop(vpe);
+       vpe_free(vpe);
+
+       return len;
+}
+
+static ssize_t show_ntcs(struct device *cd, struct device_attribute *attr,
+                        char *buf)
+{
+       struct vpe *vpe = get_vpe(tclimit);
+
+       return sprintf(buf, "%d\n", vpe->ntcs);
+}
+
+static ssize_t store_ntcs(struct device *dev, struct device_attribute *attr,
+                         const char *buf, size_t len)
+{
+       struct vpe *vpe = get_vpe(tclimit);
+       unsigned long new;
+       char *endp;
+
+       new = simple_strtoul(buf, &endp, 0);
+       if (endp == buf)
+               goto out_einval;
+
+       if (new == 0 || new > (hw_tcs - tclimit))
+               goto out_einval;
+
+       vpe->ntcs = new;
+
+       return len;
+
+out_einval:
+       return -EINVAL;
+}
+
+static struct device_attribute vpe_class_attributes[] = {
+       __ATTR(kill, S_IWUSR, NULL, store_kill),
+       __ATTR(ntcs, S_IRUGO | S_IWUSR, show_ntcs, store_ntcs),
+       {}
+};
+
+static void vpe_device_release(struct device *cd)
+{
+       kfree(cd);
+}
+
+struct class vpe_class = {
+       .name = "vpe",
+       .owner = THIS_MODULE,
+       .dev_release = vpe_device_release,
+       .dev_attrs = vpe_class_attributes,
+};
+
+struct device vpe_device;
+
 static int __init vpe_module_init(void)
 {
+       unsigned int mtflags, vpflags;
+       unsigned long flags, val;
        struct vpe *v = NULL;
        struct tc *t;
-       unsigned long val;
-       int i;
+       int tc, err;
 
        if (!cpu_has_mipsmt) {
                printk("VPE loader: not a MIPS MT capable processor\n");
                return -ENODEV;
        }
 
-       if ((major = register_chrdev(VPE_MAJOR, module_name, &vpe_fops) < 0)) {
+       if (vpelimit == 0) {
+               printk(KERN_WARNING "No VPEs reserved for AP/SP, not "
+                      "initializing VPE loader.\nPass maxvpes=<n> argument as "
+                      "kernel argument\n");
+
+               return -ENODEV;
+       }
+
+       if (tclimit == 0) {
+               printk(KERN_WARNING "No TCs reserved for AP/SP, not "
+                      "initializing VPE loader.\nPass maxtcs=<n> argument as "
+                      "kernel argument\n");
+
+               return -ENODEV;
+       }
+
+       major = register_chrdev(0, module_name, &vpe_fops);
+       if (major < 0) {
                printk("VPE loader: unable to register character device\n");
-               return -EBUSY;
+               return major;
        }
 
-       if (major == 0)
-               major = VPE_MAJOR;
+       err = class_register(&vpe_class);
+       if (err) {
+               printk(KERN_ERR "vpe_class registration failed\n");
+               goto out_chrdev;
+       }
 
-       dmt();
-       dvpe();
+       device_initialize(&vpe_device);
+       vpe_device.class        = &vpe_class,
+       vpe_device.parent       = NULL,
+       dev_set_name(&vpe_device, "vpe1");
+       vpe_device.devt = MKDEV(major, minor);
+       err = device_add(&vpe_device);
+       if (err) {
+               printk(KERN_ERR "Adding vpe_device failed\n");
+               goto out_class;
+       }
+
+       local_irq_save(flags);
+       mtflags = dmt();
+       vpflags = dvpe();
 
        /* Put MVPE's into 'configuration state' */
-       write_c0_mvpcontrol(read_c0_mvpcontrol() | MVPCONTROL_VPC);
+       set_c0_mvpcontrol(MVPCONTROL_VPC);
 
        /* dump_mtregs(); */
 
-       INIT_LIST_HEAD(&vpecontrol.vpe_list);
-       INIT_LIST_HEAD(&vpecontrol.tc_list);
-
        val = read_c0_mvpconf0();
-       for (i = 0; i < ((val & MVPCONF0_PTC) + 1); i++) {
-               t = alloc_tc(i);
+       hw_tcs = (val & MVPCONF0_PTC) + 1;
+       hw_vpes = ((val & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT) + 1;
+
+       for (tc = tclimit; tc < hw_tcs; tc++) {
+               /*
+                * Must re-enable multithreading temporarily or in case we
+                * reschedule send IPIs or similar we might hang.
+                */
+               clear_c0_mvpcontrol(MVPCONTROL_VPC);
+               evpe(vpflags);
+               emt(mtflags);
+               local_irq_restore(flags);
+               t = alloc_tc(tc);
+               if (!t) {
+                       err = -ENOMEM;
+                       goto out;
+               }
+
+               local_irq_save(flags);
+               mtflags = dmt();
+               vpflags = dvpe();
+               set_c0_mvpcontrol(MVPCONTROL_VPC);
 
                /* VPE's */
-               if (i < ((val & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT) + 1) {
-                       settc(i);
+               if (tc < hw_tcs) {
+                       settc(tc);
 
-                       if ((v = alloc_vpe(i)) == NULL) {
+                       if ((v = alloc_vpe(tc)) == NULL) {
                                printk(KERN_WARNING "VPE: unable to allocate VPE\n");
-                               return -ENODEV;
+
+                               goto out_reenable;
                        }
 
-                       list_add(&t->tc, &v->tc);       /* add the tc to the list of this vpe's tc's. */
+                       v->ntcs = hw_tcs - tclimit;
+
+                       /* add the tc to the list of this vpe's tc's. */
+                       list_add(&t->tc, &v->tc);
 
                        /* deactivate all but vpe0 */
-                       if (i != 0) {
+                       if (tc >= tclimit) {
                                unsigned long tmp = read_vpe_c0_vpeconf0();
 
                                tmp &= ~VPECONF0_VPA;
@@ -1227,69 +1533,91 @@ static int __init vpe_module_init(void)
                        /* disable multi-threading with TC's */
                        write_vpe_c0_vpecontrol(read_vpe_c0_vpecontrol() & ~VPECONTROL_TE);
 
-                       if (i != 0) {
-                               write_vpe_c0_status((read_c0_status() &
-                                                    ~(ST0_IM | ST0_IE | ST0_KSU))
-                                                   | ST0_CU0);
-
-                               /* set config to be the same as vpe0, particularly kseg0 coherency alg */
+                       if (tc >= vpelimit) {
+                               /*
+                                * Set config to be the same as vpe0,
+                                * particularly kseg0 coherency alg
+                                */
                                write_vpe_c0_config(read_c0_config());
                        }
-
                }
 
                /* TC's */
                t->pvpe = v;    /* set the parent vpe */
 
-               if (i != 0) {
+               if (tc >= tclimit) {
                        unsigned long tmp;
 
-                       /* tc 0 will of course be running.... */
-                       if (i == 0)
-                               t->state = TC_STATE_RUNNING;
+                       settc(tc);
 
-                       settc(i);
+                       /* Any TC that is bound to VPE0 gets left as is - in case
+                          we are running SMTC on VPE0. A TC that is bound to any
+                          other VPE gets bound to VPE0, ideally I'd like to make
+                          it homeless but it doesn't appear to let me bind a TC
+                          to a non-existent VPE. Which is perfectly reasonable.
 
-                       /* bind a TC to each VPE, May as well put all excess TC's
-                          on the last VPE */
-                       if (i >= (((val & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT) + 1))
-                               write_tc_c0_tcbind(read_tc_c0_tcbind() |
-                                                  ((val & MVPCONF0_PVPE) >> MVPCONF0_PVPE_SHIFT));
-                       else
-                               write_tc_c0_tcbind(read_tc_c0_tcbind() | i);
+                          The (un)bound state is visible to an EJTAG probe so may
+                          notify GDB...
+                       */
+
+                       if (((tmp = read_tc_c0_tcbind()) & TCBIND_CURVPE)) {
+                               /* tc is bound >vpe0 */
+                               write_tc_c0_tcbind(tmp & ~TCBIND_CURVPE);
+
+                               t->pvpe = get_vpe(0);   /* set the parent vpe */
+                       }
+
+                       /* halt the TC */
+                       write_tc_c0_tchalt(TCHALT_H);
+                       mips_ihb();
 
                        tmp = read_tc_c0_tcstatus();
 
-                       /* mark not allocated and not dynamically allocatable */
+                       /* mark not activated and not dynamically allocatable */
                        tmp &= ~(TCSTATUS_A | TCSTATUS_DA);
                        tmp |= TCSTATUS_IXMT;   /* interrupt exempt */
                        write_tc_c0_tcstatus(tmp);
-
-                       write_tc_c0_tchalt(TCHALT_H);
                }
        }
 
+out_reenable:
        /* release config state */
-       write_c0_mvpcontrol(read_c0_mvpcontrol() & ~MVPCONTROL_VPC);
+       clear_c0_mvpcontrol(MVPCONTROL_VPC);
 
+       evpe(vpflags);
+       emt(mtflags);
+       local_irq_restore(flags);
+
+#ifdef CONFIG_MIPS_APSP_KSPD
+       kspd_events.kspd_sp_exit = kspd_sp_exit;
+#endif
        return 0;
+
+out_class:
+       class_unregister(&vpe_class);
+out_chrdev:
+       unregister_chrdev(major, module_name);
+
+out:
+       return err;
 }
 
 static void __exit vpe_module_exit(void)
 {
        struct vpe *v, *n;
 
+       device_del(&vpe_device);
+       unregister_chrdev(major, module_name);
+
+       /* No locking needed here */
        list_for_each_entry_safe(v, n, &vpecontrol.vpe_list, list) {
-               if (v->state != VPE_STATE_UNUSED) {
+               if (v->state != VPE_STATE_UNUSED)
                        release_vpe(v);
-               }
        }
-
-       unregister_chrdev(major, module_name);
 }
 
 module_init(vpe_module_init);
 module_exit(vpe_module_exit);
 MODULE_DESCRIPTION("MIPS VPE Loader");
-MODULE_AUTHOR("Elizabeth Clarke, MIPS Technologies, Inc");
+MODULE_AUTHOR("Elizabeth Oldham, MIPS Technologies, Inc.");
 MODULE_LICENSE("GPL");