Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6

[safe/jmp/linux-2.6] / include / asm-sparc64 / system.h
diff --git a/include/asm-sparc64/system.h b/include/asm-sparc64/system.h

index 5e94c05..4ca6860 100644 (file)
--- a/include/asm-sparc64/system.h
+++ b/include/asm-sparc64/system.h
@@ -2,7 +2,6 @@
  #ifndef __SPARC64_SYSTEM_H
  #define __SPARC64_SYSTEM_H
  
-#include <linux/config.h>
  #include <asm/ptrace.h>
  #include <asm/processor.h>
  #include <asm/visasm.h>
@@ -28,13 +27,48 @@ enum sparc_cpu {
  #define ARCH_SUN4C_SUN4 0
  #define ARCH_SUN4 0
  
-extern void mb(void);
-extern void rmb(void);
-extern void wmb(void);
-extern void membar_storeload(void);
-extern void membar_storeload_storestore(void);
-extern void membar_storeload_loadload(void);
-extern void membar_storestore_loadstore(void);
+/* These are here in an effort to more fully work around Spitfire Errata
+ * #51.  Essentially, if a memory barrier occurs soon after a mispredicted
+ * branch, the chip can stop executing instructions until a trap occurs.
+ * Therefore, if interrupts are disabled, the chip can hang forever.
+ *
+ * It used to be believed that the memory barrier had to be right in the
+ * delay slot, but a case has been traced recently wherein the memory barrier
+ * was one instruction after the branch delay slot and the chip still hung.
+ * The offending sequence was the following in sym_wakeup_done() of the
+ * sym53c8xx_2 driver:
+ *
+ *     call    sym_ccb_from_dsa, 0
+ *      movge  %icc, 0, %l0
+ *     brz,pn  %o0, .LL1303
+ *      mov    %o0, %l2
+ *     membar  #LoadLoad
+ *
+ * The branch has to be mispredicted for the bug to occur.  Therefore, we put
+ * the memory barrier explicitly into a "branch always, predicted taken"
+ * delay slot to avoid the problem case.
+ */
+#define membar_safe(type) \
+do {   __asm__ __volatile__("ba,pt     %%xcc, 1f\n\t" \
+                            " membar   " type "\n" \
+                            "1:\n" \
+                            : : : "memory"); \
+} while (0)
+
+#define mb()   \
+       membar_safe("#LoadLoad | #LoadStore | #StoreStore | #StoreLoad")
+#define rmb()  \
+       membar_safe("#LoadLoad")
+#define wmb()  \
+       membar_safe("#StoreStore")
+#define membar_storeload() \
+       membar_safe("#StoreLoad")
+#define membar_storeload_storestore() \
+       membar_safe("#StoreLoad | #StoreStore")
+#define membar_storeload_loadload() \
+       membar_safe("#StoreLoad | #LoadLoad")
+#define membar_storestore_loadstore() \
+       membar_safe("#StoreStore | #LoadStore")
  
  #endif
  
@@ -158,11 +192,7 @@ do {                                               \
          * not preserve it's value.  Hairy, but it lets us remove 2 loads
          * and 2 stores in this critical code path.  -DaveM
          */
-#if __GNUC__ >= 3
  #define EXTRA_CLOBBER ,"%l1"
-#else
-#define EXTRA_CLOBBER
-#endif
  #define switch_to(prev, next, last)                                    \
  do {   if (test_thread_flag(TIF_PERFCTR)) {                            \
                 unsigned long __tmp;                                    \
@@ -177,10 +207,11 @@ do {      if (test_thread_flag(TIF_PERFCTR)) {                            \
         /* If you are tempted to conditionalize the following */        \
         /* so that ASI is only written if it changes, think again. */   \
         __asm__ __volatile__("wr %%g0, %0, %%asi"                       \
-       : : "r" (__thread_flag_byte_ptr(next->thread_info)[TI_FLAG_BYTE_CURRENT_DS]));\
+       : : "r" (__thread_flag_byte_ptr(task_thread_info(next))[TI_FLAG_BYTE_CURRENT_DS]));\
+       trap_block[current_thread_info()->cpu].thread =                 \
+               task_thread_info(next);                                 \
         __asm__ __volatile__(                                           \
         "mov    %%g4, %%g7\n\t"                                         \
-       "wrpr   %%g0, 0x95, %%pstate\n\t"                               \
         "stx    %%i6, [%%sp + 2047 + 0x70]\n\t"                         \
         "stx    %%i7, [%%sp + 2047 + 0x78]\n\t"                         \
         "rdpr   %%wstate, %%o5\n\t"                                     \
@@ -194,20 +225,16 @@ do {      if (test_thread_flag(TIF_PERFCTR)) {                            \
         "ldx    [%%g6 + %3], %%o6\n\t"                                  \
         "ldub   [%%g6 + %2], %%o5\n\t"                                  \
         "ldub   [%%g6 + %4], %%o7\n\t"                                  \
-       "mov    %%g6, %%l2\n\t"                                         \
         "wrpr   %%o5, 0x0, %%wstate\n\t"                                \
         "ldx    [%%sp + 2047 + 0x70], %%i6\n\t"                         \
         "ldx    [%%sp + 2047 + 0x78], %%i7\n\t"                         \
-       "wrpr   %%g0, 0x94, %%pstate\n\t"                               \
-       "mov    %%l2, %%g6\n\t"                                         \
         "ldx    [%%g6 + %6], %%g4\n\t"                                  \
-       "wrpr   %%g0, 0x96, %%pstate\n\t"                               \
         "brz,pt %%o7, 1f\n\t"                                           \
         " mov   %%g7, %0\n\t"                                           \
         "b,a ret_from_syscall\n\t"                                      \
         "1:\n\t"                                                        \
         : "=&r" (last)                                                  \
-       : "0" (next->thread_info),                                      \
+       : "0" (task_thread_info(next)),                                 \
           "i" (TI_WSTATE), "i" (TI_KSP), "i" (TI_NEW_CHILD),            \
           "i" (TI_CWP), "i" (TI_TASK)                                   \
         : "cc",                                                         \
@@ -222,6 +249,16 @@ do {       if (test_thread_flag(TIF_PERFCTR)) {                            \
         }                                                               \
  } while(0)
  
+/*
+ * On SMP systems, when the scheduler does migration-cost autodetection,
+ * it needs a way to flush as much of the CPU's caches as possible.
+ *
+ * TODO: fill this in!
+ */
+static inline void sched_cacheflush(void)
+{
+}
+
  static inline unsigned long xchg32(__volatile__ unsigned int *m, unsigned int val)
  {
         unsigned long tmp1, tmp2;