sh: Minor optimisations to FPU handling
[safe/jmp/linux-2.6] / arch / sh / kernel / cpu / sh4 / fpu.c
1 /*
2  * Save/restore floating point context for signal handlers.
3  *
4  * This file is subject to the terms and conditions of the GNU General Public
5  * License.  See the file "COPYING" in the main directory of this archive
6  * for more details.
7  *
8  * Copyright (C) 1999, 2000  Kaz Kojima & Niibe Yutaka
9  * Copyright (C) 2006  ST Microelectronics Ltd. (denorm support)
10  *
11  * FIXME! These routines have not been tested for big endian case.
12  */
13 #include <linux/sched.h>
14 #include <linux/signal.h>
15 #include <linux/io.h>
16 #include <cpu/fpu.h>
17 #include <asm/processor.h>
18 #include <asm/system.h>
19 #include <asm/fpu.h>
20
21 /* The PR (precision) bit in the FP Status Register must be clear when
22  * an frchg instruction is executed, otherwise the instruction is undefined.
23  * Executing frchg with PR set causes a trap on some SH4 implementations.
24  */
25
26 #define FPSCR_RCHG 0x00000000
27 extern unsigned long long float64_div(unsigned long long a,
28                                       unsigned long long b);
29 extern unsigned long int float32_div(unsigned long int a, unsigned long int b);
30 extern unsigned long long float64_mul(unsigned long long a,
31                                       unsigned long long b);
32 extern unsigned long int float32_mul(unsigned long int a, unsigned long int b);
33 extern unsigned long long float64_add(unsigned long long a,
34                                       unsigned long long b);
35 extern unsigned long int float32_add(unsigned long int a, unsigned long int b);
36 extern unsigned long long float64_sub(unsigned long long a,
37                                       unsigned long long b);
38 extern unsigned long int float32_sub(unsigned long int a, unsigned long int b);
39 extern unsigned long int float64_to_float32(unsigned long long a);
40 static unsigned int fpu_exception_flags;
41
42 /*
43  * Save FPU registers onto task structure.
44  */
45 void save_fpu(struct task_struct *tsk)
46 {
47         unsigned long dummy;
48
49         enable_fpu();
50         asm volatile ("sts.l    fpul, @-%0\n\t"
51                       "sts.l    fpscr, @-%0\n\t"
52                       "lds      %2, fpscr\n\t"
53                       "frchg\n\t"
54                       "fmov.s   fr15, @-%0\n\t"
55                       "fmov.s   fr14, @-%0\n\t"
56                       "fmov.s   fr13, @-%0\n\t"
57                       "fmov.s   fr12, @-%0\n\t"
58                       "fmov.s   fr11, @-%0\n\t"
59                       "fmov.s   fr10, @-%0\n\t"
60                       "fmov.s   fr9, @-%0\n\t"
61                       "fmov.s   fr8, @-%0\n\t"
62                       "fmov.s   fr7, @-%0\n\t"
63                       "fmov.s   fr6, @-%0\n\t"
64                       "fmov.s   fr5, @-%0\n\t"
65                       "fmov.s   fr4, @-%0\n\t"
66                       "fmov.s   fr3, @-%0\n\t"
67                       "fmov.s   fr2, @-%0\n\t"
68                       "fmov.s   fr1, @-%0\n\t"
69                       "fmov.s   fr0, @-%0\n\t"
70                       "frchg\n\t"
71                       "fmov.s   fr15, @-%0\n\t"
72                       "fmov.s   fr14, @-%0\n\t"
73                       "fmov.s   fr13, @-%0\n\t"
74                       "fmov.s   fr12, @-%0\n\t"
75                       "fmov.s   fr11, @-%0\n\t"
76                       "fmov.s   fr10, @-%0\n\t"
77                       "fmov.s   fr9, @-%0\n\t"
78                       "fmov.s   fr8, @-%0\n\t"
79                       "fmov.s   fr7, @-%0\n\t"
80                       "fmov.s   fr6, @-%0\n\t"
81                       "fmov.s   fr5, @-%0\n\t"
82                       "fmov.s   fr4, @-%0\n\t"
83                       "fmov.s   fr3, @-%0\n\t"
84                       "fmov.s   fr2, @-%0\n\t"
85                       "fmov.s   fr1, @-%0\n\t"
86                       "fmov.s   fr0, @-%0\n\t"
87                       "lds      %3, fpscr\n\t":"=r" (dummy)
88                       :"0"((char *)(&tsk->thread.fpu.hard.status)),
89                       "r"(FPSCR_RCHG), "r"(FPSCR_INIT)
90                       :"memory");
91
92         disable_fpu();
93 }
94
95 static void restore_fpu(struct task_struct *tsk)
96 {
97         unsigned long dummy;
98
99         enable_fpu();
100         asm volatile ("lds      %2, fpscr\n\t"
101                       "fmov.s   @%0+, fr0\n\t"
102                       "fmov.s   @%0+, fr1\n\t"
103                       "fmov.s   @%0+, fr2\n\t"
104                       "fmov.s   @%0+, fr3\n\t"
105                       "fmov.s   @%0+, fr4\n\t"
106                       "fmov.s   @%0+, fr5\n\t"
107                       "fmov.s   @%0+, fr6\n\t"
108                       "fmov.s   @%0+, fr7\n\t"
109                       "fmov.s   @%0+, fr8\n\t"
110                       "fmov.s   @%0+, fr9\n\t"
111                       "fmov.s   @%0+, fr10\n\t"
112                       "fmov.s   @%0+, fr11\n\t"
113                       "fmov.s   @%0+, fr12\n\t"
114                       "fmov.s   @%0+, fr13\n\t"
115                       "fmov.s   @%0+, fr14\n\t"
116                       "fmov.s   @%0+, fr15\n\t"
117                       "frchg\n\t"
118                       "fmov.s   @%0+, fr0\n\t"
119                       "fmov.s   @%0+, fr1\n\t"
120                       "fmov.s   @%0+, fr2\n\t"
121                       "fmov.s   @%0+, fr3\n\t"
122                       "fmov.s   @%0+, fr4\n\t"
123                       "fmov.s   @%0+, fr5\n\t"
124                       "fmov.s   @%0+, fr6\n\t"
125                       "fmov.s   @%0+, fr7\n\t"
126                       "fmov.s   @%0+, fr8\n\t"
127                       "fmov.s   @%0+, fr9\n\t"
128                       "fmov.s   @%0+, fr10\n\t"
129                       "fmov.s   @%0+, fr11\n\t"
130                       "fmov.s   @%0+, fr12\n\t"
131                       "fmov.s   @%0+, fr13\n\t"
132                       "fmov.s   @%0+, fr14\n\t"
133                       "fmov.s   @%0+, fr15\n\t"
134                       "frchg\n\t"
135                       "lds.l    @%0+, fpscr\n\t"
136                       "lds.l    @%0+, fpul\n\t"
137                       :"=r" (dummy)
138                       :"0"(&tsk->thread.fpu), "r"(FPSCR_RCHG)
139                       :"memory");
140         disable_fpu();
141 }
142
143 /*
144  * Load the FPU with signalling NANS.  This bit pattern we're using
145  * has the property that no matter wether considered as single or as
146  * double precision represents signaling NANS.
147  */
148
149 static void fpu_init(void)
150 {
151         enable_fpu();
152         asm volatile (  "lds    %0, fpul\n\t"
153                         "lds    %1, fpscr\n\t"
154                         "fsts   fpul, fr0\n\t"
155                         "fsts   fpul, fr1\n\t"
156                         "fsts   fpul, fr2\n\t"
157                         "fsts   fpul, fr3\n\t"
158                         "fsts   fpul, fr4\n\t"
159                         "fsts   fpul, fr5\n\t"
160                         "fsts   fpul, fr6\n\t"
161                         "fsts   fpul, fr7\n\t"
162                         "fsts   fpul, fr8\n\t"
163                         "fsts   fpul, fr9\n\t"
164                         "fsts   fpul, fr10\n\t"
165                         "fsts   fpul, fr11\n\t"
166                         "fsts   fpul, fr12\n\t"
167                         "fsts   fpul, fr13\n\t"
168                         "fsts   fpul, fr14\n\t"
169                         "fsts   fpul, fr15\n\t"
170                         "frchg\n\t"
171                         "fsts   fpul, fr0\n\t"
172                         "fsts   fpul, fr1\n\t"
173                         "fsts   fpul, fr2\n\t"
174                         "fsts   fpul, fr3\n\t"
175                         "fsts   fpul, fr4\n\t"
176                         "fsts   fpul, fr5\n\t"
177                         "fsts   fpul, fr6\n\t"
178                         "fsts   fpul, fr7\n\t"
179                         "fsts   fpul, fr8\n\t"
180                         "fsts   fpul, fr9\n\t"
181                         "fsts   fpul, fr10\n\t"
182                         "fsts   fpul, fr11\n\t"
183                         "fsts   fpul, fr12\n\t"
184                         "fsts   fpul, fr13\n\t"
185                         "fsts   fpul, fr14\n\t"
186                         "fsts   fpul, fr15\n\t"
187                         "frchg\n\t"
188                         "lds    %2, fpscr\n\t"
189                         :       /* no output */
190                         :"r" (0), "r"(FPSCR_RCHG), "r"(FPSCR_INIT));
191         disable_fpu();
192 }
193
194 /**
195  *      denormal_to_double - Given denormalized float number,
196  *                           store double float
197  *
198  *      @fpu: Pointer to sh_fpu_hard structure
199  *      @n: Index to FP register
200  */
201 static void denormal_to_double(struct sh_fpu_hard_struct *fpu, int n)
202 {
203         unsigned long du, dl;
204         unsigned long x = fpu->fpul;
205         int exp = 1023 - 126;
206
207         if (x != 0 && (x & 0x7f800000) == 0) {
208                 du = (x & 0x80000000);
209                 while ((x & 0x00800000) == 0) {
210                         x <<= 1;
211                         exp--;
212                 }
213                 x &= 0x007fffff;
214                 du |= (exp << 20) | (x >> 3);
215                 dl = x << 29;
216
217                 fpu->fp_regs[n] = du;
218                 fpu->fp_regs[n + 1] = dl;
219         }
220 }
221
222 /**
223  *      ieee_fpe_handler - Handle denormalized number exception
224  *
225  *      @regs: Pointer to register structure
226  *
227  *      Returns 1 when it's handled (should not cause exception).
228  */
229 static int ieee_fpe_handler(struct pt_regs *regs)
230 {
231         unsigned short insn = *(unsigned short *)regs->pc;
232         unsigned short finsn;
233         unsigned long nextpc;
234         int nib[4] = {
235                 (insn >> 12) & 0xf,
236                 (insn >> 8) & 0xf,
237                 (insn >> 4) & 0xf,
238                 insn & 0xf
239         };
240
241         if (nib[0] == 0xb || (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb))
242                 regs->pr = regs->pc + 4;  /* bsr & jsr */
243
244         if (nib[0] == 0xa || nib[0] == 0xb) {
245                 /* bra & bsr */
246                 nextpc = regs->pc + 4 + ((short)((insn & 0xfff) << 4) >> 3);
247                 finsn = *(unsigned short *)(regs->pc + 2);
248         } else if (nib[0] == 0x8 && nib[1] == 0xd) {
249                 /* bt/s */
250                 if (regs->sr & 1)
251                         nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
252                 else
253                         nextpc = regs->pc + 4;
254                 finsn = *(unsigned short *)(regs->pc + 2);
255         } else if (nib[0] == 0x8 && nib[1] == 0xf) {
256                 /* bf/s */
257                 if (regs->sr & 1)
258                         nextpc = regs->pc + 4;
259                 else
260                         nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
261                 finsn = *(unsigned short *)(regs->pc + 2);
262         } else if (nib[0] == 0x4 && nib[3] == 0xb &&
263                    (nib[2] == 0x0 || nib[2] == 0x2)) {
264                 /* jmp & jsr */
265                 nextpc = regs->regs[nib[1]];
266                 finsn = *(unsigned short *)(regs->pc + 2);
267         } else if (nib[0] == 0x0 && nib[3] == 0x3 &&
268                    (nib[2] == 0x0 || nib[2] == 0x2)) {
269                 /* braf & bsrf */
270                 nextpc = regs->pc + 4 + regs->regs[nib[1]];
271                 finsn = *(unsigned short *)(regs->pc + 2);
272         } else if (insn == 0x000b) {
273                 /* rts */
274                 nextpc = regs->pr;
275                 finsn = *(unsigned short *)(regs->pc + 2);
276         } else {
277                 nextpc = regs->pc + instruction_size(insn);
278                 finsn = insn;
279         }
280
281         if ((finsn & 0xf1ff) == 0xf0ad) {
282                 /* fcnvsd */
283                 struct task_struct *tsk = current;
284
285                 if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR))
286                         /* FPU error */
287                         denormal_to_double(&tsk->thread.fpu.hard,
288                                            (finsn >> 8) & 0xf);
289                 else
290                         return 0;
291
292                 regs->pc = nextpc;
293                 return 1;
294         } else if ((finsn & 0xf00f) == 0xf002) {
295                 /* fmul */
296                 struct task_struct *tsk = current;
297                 int fpscr;
298                 int n, m, prec;
299                 unsigned int hx, hy;
300
301                 n = (finsn >> 8) & 0xf;
302                 m = (finsn >> 4) & 0xf;
303                 hx = tsk->thread.fpu.hard.fp_regs[n];
304                 hy = tsk->thread.fpu.hard.fp_regs[m];
305                 fpscr = tsk->thread.fpu.hard.fpscr;
306                 prec = fpscr & FPSCR_DBL_PRECISION;
307
308                 if ((fpscr & FPSCR_CAUSE_ERROR)
309                     && (prec && ((hx & 0x7fffffff) < 0x00100000
310                                  || (hy & 0x7fffffff) < 0x00100000))) {
311                         long long llx, lly;
312
313                         /* FPU error because of denormal (doubles) */
314                         llx = ((long long)hx << 32)
315                             | tsk->thread.fpu.hard.fp_regs[n + 1];
316                         lly = ((long long)hy << 32)
317                             | tsk->thread.fpu.hard.fp_regs[m + 1];
318                         llx = float64_mul(llx, lly);
319                         tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
320                         tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
321                 } else if ((fpscr & FPSCR_CAUSE_ERROR)
322                            && (!prec && ((hx & 0x7fffffff) < 0x00800000
323                                          || (hy & 0x7fffffff) < 0x00800000))) {
324                         /* FPU error because of denormal (floats) */
325                         hx = float32_mul(hx, hy);
326                         tsk->thread.fpu.hard.fp_regs[n] = hx;
327                 } else
328                         return 0;
329
330                 regs->pc = nextpc;
331                 return 1;
332         } else if ((finsn & 0xf00e) == 0xf000) {
333                 /* fadd, fsub */
334                 struct task_struct *tsk = current;
335                 int fpscr;
336                 int n, m, prec;
337                 unsigned int hx, hy;
338
339                 n = (finsn >> 8) & 0xf;
340                 m = (finsn >> 4) & 0xf;
341                 hx = tsk->thread.fpu.hard.fp_regs[n];
342                 hy = tsk->thread.fpu.hard.fp_regs[m];
343                 fpscr = tsk->thread.fpu.hard.fpscr;
344                 prec = fpscr & FPSCR_DBL_PRECISION;
345
346                 if ((fpscr & FPSCR_CAUSE_ERROR)
347                     && (prec && ((hx & 0x7fffffff) < 0x00100000
348                                  || (hy & 0x7fffffff) < 0x00100000))) {
349                         long long llx, lly;
350
351                         /* FPU error because of denormal (doubles) */
352                         llx = ((long long)hx << 32)
353                             | tsk->thread.fpu.hard.fp_regs[n + 1];
354                         lly = ((long long)hy << 32)
355                             | tsk->thread.fpu.hard.fp_regs[m + 1];
356                         if ((finsn & 0xf00f) == 0xf000)
357                                 llx = float64_add(llx, lly);
358                         else
359                                 llx = float64_sub(llx, lly);
360                         tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
361                         tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
362                 } else if ((fpscr & FPSCR_CAUSE_ERROR)
363                            && (!prec && ((hx & 0x7fffffff) < 0x00800000
364                                          || (hy & 0x7fffffff) < 0x00800000))) {
365                         /* FPU error because of denormal (floats) */
366                         if ((finsn & 0xf00f) == 0xf000)
367                                 hx = float32_add(hx, hy);
368                         else
369                                 hx = float32_sub(hx, hy);
370                         tsk->thread.fpu.hard.fp_regs[n] = hx;
371                 } else
372                         return 0;
373
374                 regs->pc = nextpc;
375                 return 1;
376         } else if ((finsn & 0xf003) == 0xf003) {
377                 /* fdiv */
378                 struct task_struct *tsk = current;
379                 int fpscr;
380                 int n, m, prec;
381                 unsigned int hx, hy;
382
383                 n = (finsn >> 8) & 0xf;
384                 m = (finsn >> 4) & 0xf;
385                 hx = tsk->thread.fpu.hard.fp_regs[n];
386                 hy = tsk->thread.fpu.hard.fp_regs[m];
387                 fpscr = tsk->thread.fpu.hard.fpscr;
388                 prec = fpscr & FPSCR_DBL_PRECISION;
389
390                 if ((fpscr & FPSCR_CAUSE_ERROR)
391                     && (prec && ((hx & 0x7fffffff) < 0x00100000
392                                  || (hy & 0x7fffffff) < 0x00100000))) {
393                         long long llx, lly;
394
395                         /* FPU error because of denormal (doubles) */
396                         llx = ((long long)hx << 32)
397                             | tsk->thread.fpu.hard.fp_regs[n + 1];
398                         lly = ((long long)hy << 32)
399                             | tsk->thread.fpu.hard.fp_regs[m + 1];
400
401                         llx = float64_div(llx, lly);
402
403                         tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
404                         tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
405                 } else if ((fpscr & FPSCR_CAUSE_ERROR)
406                            && (!prec && ((hx & 0x7fffffff) < 0x00800000
407                                          || (hy & 0x7fffffff) < 0x00800000))) {
408                         /* FPU error because of denormal (floats) */
409                         hx = float32_div(hx, hy);
410                         tsk->thread.fpu.hard.fp_regs[n] = hx;
411                 } else
412                         return 0;
413
414                 regs->pc = nextpc;
415                 return 1;
416         } else if ((finsn & 0xf0bd) == 0xf0bd) {
417                 /* fcnvds - double to single precision convert */
418                 struct task_struct *tsk = current;
419                 int m;
420                 unsigned int hx;
421
422                 m = (finsn >> 8) & 0x7;
423                 hx = tsk->thread.fpu.hard.fp_regs[m];
424
425                 if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR)
426                         && ((hx & 0x7fffffff) < 0x00100000)) {
427                         /* subnormal double to float conversion */
428                         long long llx;
429
430                         llx = ((long long)tsk->thread.fpu.hard.fp_regs[m] << 32)
431                             | tsk->thread.fpu.hard.fp_regs[m + 1];
432
433                         tsk->thread.fpu.hard.fpul = float64_to_float32(llx);
434                 } else
435                         return 0;
436
437                 regs->pc = nextpc;
438                 return 1;
439         }
440
441         return 0;
442 }
443
444 void float_raise(unsigned int flags)
445 {
446         fpu_exception_flags |= flags;
447 }
448
449 int float_rounding_mode(void)
450 {
451         struct task_struct *tsk = current;
452         int roundingMode = FPSCR_ROUNDING_MODE(tsk->thread.fpu.hard.fpscr);
453         return roundingMode;
454 }
455
456 BUILD_TRAP_HANDLER(fpu_error)
457 {
458         struct task_struct *tsk = current;
459         TRAP_HANDLER_DECL;
460
461         __unlazy_fpu(tsk, regs);
462         fpu_exception_flags = 0;
463         if (ieee_fpe_handler(regs)) {
464                 tsk->thread.fpu.hard.fpscr &=
465                     ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
466                 tsk->thread.fpu.hard.fpscr |= fpu_exception_flags;
467                 /* Set the FPSCR flag as well as cause bits - simply
468                  * replicate the cause */
469                 tsk->thread.fpu.hard.fpscr |= (fpu_exception_flags >> 10);
470                 grab_fpu(regs);
471                 restore_fpu(tsk);
472                 task_thread_info(tsk)->status |= TS_USEDFPU;
473                 if ((((tsk->thread.fpu.hard.fpscr & FPSCR_ENABLE_MASK) >> 7) &
474                      (fpu_exception_flags >> 2)) == 0) {
475                         return;
476                 }
477         }
478
479         force_sig(SIGFPE, tsk);
480 }
481
482 void fpu_state_restore(struct pt_regs *regs)
483 {
484         struct task_struct *tsk = current;
485
486         grab_fpu(regs);
487         if (unlikely(!user_mode(regs))) {
488                 printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
489                 BUG();
490                 return;
491         }
492
493         if (likely(used_math())) {
494                 /* Using the FPU again.  */
495                 restore_fpu(tsk);
496         } else {
497                 /* First time FPU user.  */
498                 fpu_init();
499                 set_used_math();
500         }
501         task_thread_info(tsk)->status |= TS_USEDFPU;
502         tsk->fpu_counter++;
503 }
504
505 BUILD_TRAP_HANDLER(fpu_state_restore)
506 {
507         TRAP_HANDLER_DECL;
508
509         fpu_state_restore(regs);
510 }