SAFE public projects git trees. - safe/jmp/linux-2.6/blob - arch/m68k/ifpsp060/src/pfpsp.S

   1 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
   2 MOTOROLA MICROPROCESSOR & MEMORY TECHNOLOGY GROUP
   3 M68000 Hi-Performance Microprocessor Division
   4 M68060 Software Package
   5 Production Release P1.00 -- October 10, 1994
   6
   7 M68060 Software Package Copyright © 1993, 1994 Motorola Inc.  All rights reserved.
   8
   9 THE SOFTWARE is provided on an "AS IS" basis and without warranty.
  10 To the maximum extent permitted by applicable law,
  11 MOTOROLA DISCLAIMS ALL WARRANTIES WHETHER EXPRESS OR IMPLIED,
  12 INCLUDING IMPLIED WARRANTIES OF MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE
  13 and any warranty against infringement with regard to the SOFTWARE
  14 (INCLUDING ANY MODIFIED VERSIONS THEREOF) and any accompanying written materials.
  15
  16 To the maximum extent permitted by applicable law,
  17 IN NO EVENT SHALL MOTOROLA BE LIABLE FOR ANY DAMAGES WHATSOEVER
  18 (INCLUDING WITHOUT LIMITATION, DAMAGES FOR LOSS OF BUSINESS PROFITS,
  19 BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION, OR OTHER PECUNIARY LOSS)
  20 ARISING OF THE USE OR INABILITY TO USE THE SOFTWARE.
  21 Motorola assumes no responsibility for the maintenance and support of the SOFTWARE.
  22
  23 You are hereby granted a copyright license to use, modify, and distribute the SOFTWARE
  24 so long as this entire notice is retained without alteration in any modified and/or
  25 redistributed versions, and that such modified versions are clearly identified as such.
  26 No licenses are granted by implication, estoppel or otherwise under any patents
  27 or trademarks of Motorola, Inc.
  28 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  29 # freal.s:
  30 #       This file is appended to the top of the 060FPSP package
  31 # and contains the entry points into the package. The user, in
  32 # effect, branches to one of the branch table entries located
  33 # after _060FPSP_TABLE.
  34 #       Also, subroutine stubs exist in this file (_fpsp_done for
  35 # example) that are referenced by the FPSP package itself in order
  36 # to call a given routine. The stub routine actually performs the
  37 # callout. The FPSP code does a "bsr" to the stub routine. This
  38 # extra layer of hierarchy adds a slight performance penalty but
  39 # it makes the FPSP code easier to read and more mainatinable.
  40 #
  41
  42 set     _off_bsun,      0x00
  43 set     _off_snan,      0x04
  44 set     _off_operr,     0x08
  45 set     _off_ovfl,      0x0c
  46 set     _off_unfl,      0x10
  47 set     _off_dz,        0x14
  48 set     _off_inex,      0x18
  49 set     _off_fline,     0x1c
  50 set     _off_fpu_dis,   0x20
  51 set     _off_trap,      0x24
  52 set     _off_trace,     0x28
  53 set     _off_access,    0x2c
  54 set     _off_done,      0x30
  55
  56 set     _off_imr,       0x40
  57 set     _off_dmr,       0x44
  58 set     _off_dmw,       0x48
  59 set     _off_irw,       0x4c
  60 set     _off_irl,       0x50
  61 set     _off_drb,       0x54
  62 set     _off_drw,       0x58
  63 set     _off_drl,       0x5c
  64 set     _off_dwb,       0x60
  65 set     _off_dww,       0x64
  66 set     _off_dwl,       0x68
  67
  68 _060FPSP_TABLE:
  69
  70 ###############################################################
  71
  72 # Here's the table of ENTRY POINTS for those linking the package.
  73         bra.l           _fpsp_snan
  74         short           0x0000
  75         bra.l           _fpsp_operr
  76         short           0x0000
  77         bra.l           _fpsp_ovfl
  78         short           0x0000
  79         bra.l           _fpsp_unfl
  80         short           0x0000
  81         bra.l           _fpsp_dz
  82         short           0x0000
  83         bra.l           _fpsp_inex
  84         short           0x0000
  85         bra.l           _fpsp_fline
  86         short           0x0000
  87         bra.l           _fpsp_unsupp
  88         short           0x0000
  89         bra.l           _fpsp_effadd
  90         short           0x0000
  91
  92         space           56
  93
  94 ###############################################################
  95         global          _fpsp_done
  96 _fpsp_done:
  97         mov.l           %d0,-(%sp)
  98         mov.l           (_060FPSP_TABLE-0x80+_off_done,%pc),%d0
  99         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 100         mov.l           0x4(%sp),%d0
 101         rtd             &0x4
 102
 103         global          _real_ovfl
 104 _real_ovfl:
 105         mov.l           %d0,-(%sp)
 106         mov.l           (_060FPSP_TABLE-0x80+_off_ovfl,%pc),%d0
 107         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 108         mov.l           0x4(%sp),%d0
 109         rtd             &0x4
 110
 111         global          _real_unfl
 112 _real_unfl:
 113         mov.l           %d0,-(%sp)
 114         mov.l           (_060FPSP_TABLE-0x80+_off_unfl,%pc),%d0
 115         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 116         mov.l           0x4(%sp),%d0
 117         rtd             &0x4
 118
 119         global          _real_inex
 120 _real_inex:
 121         mov.l           %d0,-(%sp)
 122         mov.l           (_060FPSP_TABLE-0x80+_off_inex,%pc),%d0
 123         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 124         mov.l           0x4(%sp),%d0
 125         rtd             &0x4
 126
 127         global          _real_bsun
 128 _real_bsun:
 129         mov.l           %d0,-(%sp)
 130         mov.l           (_060FPSP_TABLE-0x80+_off_bsun,%pc),%d0
 131         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 132         mov.l           0x4(%sp),%d0
 133         rtd             &0x4
 134
 135         global          _real_operr
 136 _real_operr:
 137         mov.l           %d0,-(%sp)
 138         mov.l           (_060FPSP_TABLE-0x80+_off_operr,%pc),%d0
 139         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 140         mov.l           0x4(%sp),%d0
 141         rtd             &0x4
 142
 143         global          _real_snan
 144 _real_snan:
 145         mov.l           %d0,-(%sp)
 146         mov.l           (_060FPSP_TABLE-0x80+_off_snan,%pc),%d0
 147         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 148         mov.l           0x4(%sp),%d0
 149         rtd             &0x4
 150
 151         global          _real_dz
 152 _real_dz:
 153         mov.l           %d0,-(%sp)
 154         mov.l           (_060FPSP_TABLE-0x80+_off_dz,%pc),%d0
 155         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 156         mov.l           0x4(%sp),%d0
 157         rtd             &0x4
 158
 159         global          _real_fline
 160 _real_fline:
 161         mov.l           %d0,-(%sp)
 162         mov.l           (_060FPSP_TABLE-0x80+_off_fline,%pc),%d0
 163         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 164         mov.l           0x4(%sp),%d0
 165         rtd             &0x4
 166
 167         global          _real_fpu_disabled
 168 _real_fpu_disabled:
 169         mov.l           %d0,-(%sp)
 170         mov.l           (_060FPSP_TABLE-0x80+_off_fpu_dis,%pc),%d0
 171         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 172         mov.l           0x4(%sp),%d0
 173         rtd             &0x4
 174
 175         global          _real_trap
 176 _real_trap:
 177         mov.l           %d0,-(%sp)
 178         mov.l           (_060FPSP_TABLE-0x80+_off_trap,%pc),%d0
 179         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 180         mov.l           0x4(%sp),%d0
 181         rtd             &0x4
 182
 183         global          _real_trace
 184 _real_trace:
 185         mov.l           %d0,-(%sp)
 186         mov.l           (_060FPSP_TABLE-0x80+_off_trace,%pc),%d0
 187         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 188         mov.l           0x4(%sp),%d0
 189         rtd             &0x4
 190
 191         global          _real_access
 192 _real_access:
 193         mov.l           %d0,-(%sp)
 194         mov.l           (_060FPSP_TABLE-0x80+_off_access,%pc),%d0
 195         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 196         mov.l           0x4(%sp),%d0
 197         rtd             &0x4
 198
 199 #######################################
 200
 201         global          _imem_read
 202 _imem_read:
 203         mov.l           %d0,-(%sp)
 204         mov.l           (_060FPSP_TABLE-0x80+_off_imr,%pc),%d0
 205         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 206         mov.l           0x4(%sp),%d0
 207         rtd             &0x4
 208
 209         global          _dmem_read
 210 _dmem_read:
 211         mov.l           %d0,-(%sp)
 212         mov.l           (_060FPSP_TABLE-0x80+_off_dmr,%pc),%d0
 213         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 214         mov.l           0x4(%sp),%d0
 215         rtd             &0x4
 216
 217         global          _dmem_write
 218 _dmem_write:
 219         mov.l           %d0,-(%sp)
 220         mov.l           (_060FPSP_TABLE-0x80+_off_dmw,%pc),%d0
 221         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 222         mov.l           0x4(%sp),%d0
 223         rtd             &0x4
 224
 225         global          _imem_read_word
 226 _imem_read_word:
 227         mov.l           %d0,-(%sp)
 228         mov.l           (_060FPSP_TABLE-0x80+_off_irw,%pc),%d0
 229         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 230         mov.l           0x4(%sp),%d0
 231         rtd             &0x4
 232
 233         global          _imem_read_long
 234 _imem_read_long:
 235         mov.l           %d0,-(%sp)
 236         mov.l           (_060FPSP_TABLE-0x80+_off_irl,%pc),%d0
 237         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 238         mov.l           0x4(%sp),%d0
 239         rtd             &0x4
 240
 241         global          _dmem_read_byte
 242 _dmem_read_byte:
 243         mov.l           %d0,-(%sp)
 244         mov.l           (_060FPSP_TABLE-0x80+_off_drb,%pc),%d0
 245         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 246         mov.l           0x4(%sp),%d0
 247         rtd             &0x4
 248
 249         global          _dmem_read_word
 250 _dmem_read_word:
 251         mov.l           %d0,-(%sp)
 252         mov.l           (_060FPSP_TABLE-0x80+_off_drw,%pc),%d0
 253         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 254         mov.l           0x4(%sp),%d0
 255         rtd             &0x4
 256
 257         global          _dmem_read_long
 258 _dmem_read_long:
 259         mov.l           %d0,-(%sp)
 260         mov.l           (_060FPSP_TABLE-0x80+_off_drl,%pc),%d0
 261         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 262         mov.l           0x4(%sp),%d0
 263         rtd             &0x4
 264
 265         global          _dmem_write_byte
 266 _dmem_write_byte:
 267         mov.l           %d0,-(%sp)
 268         mov.l           (_060FPSP_TABLE-0x80+_off_dwb,%pc),%d0
 269         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 270         mov.l           0x4(%sp),%d0
 271         rtd             &0x4
 272
 273         global          _dmem_write_word
 274 _dmem_write_word:
 275         mov.l           %d0,-(%sp)
 276         mov.l           (_060FPSP_TABLE-0x80+_off_dww,%pc),%d0
 277         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 278         mov.l           0x4(%sp),%d0
 279         rtd             &0x4
 280
 281         global          _dmem_write_long
 282 _dmem_write_long:
 283         mov.l           %d0,-(%sp)
 284         mov.l           (_060FPSP_TABLE-0x80+_off_dwl,%pc),%d0
 285         pea.l           (_060FPSP_TABLE-0x80,%pc,%d0)
 286         mov.l           0x4(%sp),%d0
 287         rtd             &0x4
 288
 289 #
 290 # This file contains a set of define statements for constants
 291 # in order to promote readability within the corecode itself.
 292 #
 293
 294 set LOCAL_SIZE,         192                     # stack frame size(bytes)
 295 set LV,                 -LOCAL_SIZE             # stack offset
 296
 297 set EXC_SR,             0x4                     # stack status register
 298 set EXC_PC,             0x6                     # stack pc
 299 set EXC_VOFF,           0xa                     # stacked vector offset
 300 set EXC_EA,             0xc                     # stacked <ea>
 301
 302 set EXC_FP,             0x0                     # frame pointer
 303
 304 set EXC_AREGS,          -68                     # offset of all address regs
 305 set EXC_DREGS,          -100                    # offset of all data regs
 306 set EXC_FPREGS,         -36                     # offset of all fp regs
 307
 308 set EXC_A7,             EXC_AREGS+(7*4)         # offset of saved a7
 309 set OLD_A7,             EXC_AREGS+(6*4)         # extra copy of saved a7
 310 set EXC_A6,             EXC_AREGS+(6*4)         # offset of saved a6
 311 set EXC_A5,             EXC_AREGS+(5*4)
 312 set EXC_A4,             EXC_AREGS+(4*4)
 313 set EXC_A3,             EXC_AREGS+(3*4)
 314 set EXC_A2,             EXC_AREGS+(2*4)
 315 set EXC_A1,             EXC_AREGS+(1*4)
 316 set EXC_A0,             EXC_AREGS+(0*4)
 317 set EXC_D7,             EXC_DREGS+(7*4)
 318 set EXC_D6,             EXC_DREGS+(6*4)
 319 set EXC_D5,             EXC_DREGS+(5*4)
 320 set EXC_D4,             EXC_DREGS+(4*4)
 321 set EXC_D3,             EXC_DREGS+(3*4)
 322 set EXC_D2,             EXC_DREGS+(2*4)
 323 set EXC_D1,             EXC_DREGS+(1*4)
 324 set EXC_D0,             EXC_DREGS+(0*4)
 325
 326 set EXC_FP0,            EXC_FPREGS+(0*12)       # offset of saved fp0
 327 set EXC_FP1,            EXC_FPREGS+(1*12)       # offset of saved fp1
 328 set EXC_FP2,            EXC_FPREGS+(2*12)       # offset of saved fp2 (not used)
 329
 330 set FP_SCR1,            LV+80                   # fp scratch 1
 331 set FP_SCR1_EX,         FP_SCR1+0
 332 set FP_SCR1_SGN,        FP_SCR1+2
 333 set FP_SCR1_HI,         FP_SCR1+4
 334 set FP_SCR1_LO,         FP_SCR1+8
 335
 336 set FP_SCR0,            LV+68                   # fp scratch 0
 337 set FP_SCR0_EX,         FP_SCR0+0
 338 set FP_SCR0_SGN,        FP_SCR0+2
 339 set FP_SCR0_HI,         FP_SCR0+4
 340 set FP_SCR0_LO,         FP_SCR0+8
 341
 342 set FP_DST,             LV+56                   # fp destination operand
 343 set FP_DST_EX,          FP_DST+0
 344 set FP_DST_SGN,         FP_DST+2
 345 set FP_DST_HI,          FP_DST+4
 346 set FP_DST_LO,          FP_DST+8
 347
 348 set FP_SRC,             LV+44                   # fp source operand
 349 set FP_SRC_EX,          FP_SRC+0
 350 set FP_SRC_SGN,         FP_SRC+2
 351 set FP_SRC_HI,          FP_SRC+4
 352 set FP_SRC_LO,          FP_SRC+8
 353
 354 set USER_FPIAR,         LV+40                   # FP instr address register
 355
 356 set USER_FPSR,          LV+36                   # FP status register
 357 set FPSR_CC,            USER_FPSR+0             # FPSR condition codes
 358 set FPSR_QBYTE,         USER_FPSR+1             # FPSR qoutient byte
 359 set FPSR_EXCEPT,        USER_FPSR+2             # FPSR exception status byte
 360 set FPSR_AEXCEPT,       USER_FPSR+3             # FPSR accrued exception byte
 361
 362 set USER_FPCR,          LV+32                   # FP control register
 363 set FPCR_ENABLE,        USER_FPCR+2             # FPCR exception enable
 364 set FPCR_MODE,          USER_FPCR+3             # FPCR rounding mode control
 365
 366 set L_SCR3,             LV+28                   # integer scratch 3
 367 set L_SCR2,             LV+24                   # integer scratch 2
 368 set L_SCR1,             LV+20                   # integer scratch 1
 369
 370 set STORE_FLG,          LV+19                   # flag: operand store (ie. not fcmp/ftst)
 371
 372 set EXC_TEMP2,          LV+24                   # temporary space
 373 set EXC_TEMP,           LV+16                   # temporary space
 374
 375 set DTAG,               LV+15                   # destination operand type
 376 set STAG,               LV+14                   # source operand type
 377
 378 set SPCOND_FLG,         LV+10                   # flag: special case (see below)
 379
 380 set EXC_CC,             LV+8                    # saved condition codes
 381 set EXC_EXTWPTR,        LV+4                    # saved current PC (active)
 382 set EXC_EXTWORD,        LV+2                    # saved extension word
 383 set EXC_CMDREG,         LV+2                    # saved extension word
 384 set EXC_OPWORD,         LV+0                    # saved operation word
 385
 386 ################################
 387
 388 # Helpful macros
 389
 390 set FTEMP,              0                       # offsets within an
 391 set FTEMP_EX,           0                       # extended precision
 392 set FTEMP_SGN,          2                       # value saved in memory.
 393 set FTEMP_HI,           4
 394 set FTEMP_LO,           8
 395 set FTEMP_GRS,          12
 396
 397 set LOCAL,              0                       # offsets within an
 398 set LOCAL_EX,           0                       # extended precision
 399 set LOCAL_SGN,          2                       # value saved in memory.
 400 set LOCAL_HI,           4
 401 set LOCAL_LO,           8
 402 set LOCAL_GRS,          12
 403
 404 set DST,                0                       # offsets within an
 405 set DST_EX,             0                       # extended precision
 406 set DST_HI,             4                       # value saved in memory.
 407 set DST_LO,             8
 408
 409 set SRC,                0                       # offsets within an
 410 set SRC_EX,             0                       # extended precision
 411 set SRC_HI,             4                       # value saved in memory.
 412 set SRC_LO,             8
 413
 414 set SGL_LO,             0x3f81                  # min sgl prec exponent
 415 set SGL_HI,             0x407e                  # max sgl prec exponent
 416 set DBL_LO,             0x3c01                  # min dbl prec exponent
 417 set DBL_HI,             0x43fe                  # max dbl prec exponent
 418 set EXT_LO,             0x0                     # min ext prec exponent
 419 set EXT_HI,             0x7ffe                  # max ext prec exponent
 420
 421 set EXT_BIAS,           0x3fff                  # extended precision bias
 422 set SGL_BIAS,           0x007f                  # single precision bias
 423 set DBL_BIAS,           0x03ff                  # double precision bias
 424
 425 set NORM,               0x00                    # operand type for STAG/DTAG
 426 set ZERO,               0x01                    # operand type for STAG/DTAG
 427 set INF,                0x02                    # operand type for STAG/DTAG
 428 set QNAN,               0x03                    # operand type for STAG/DTAG
 429 set DENORM,             0x04                    # operand type for STAG/DTAG
 430 set SNAN,               0x05                    # operand type for STAG/DTAG
 431 set UNNORM,             0x06                    # operand type for STAG/DTAG
 432
 433 ##################
 434 # FPSR/FPCR bits #
 435 ##################
 436 set neg_bit,            0x3                     # negative result
 437 set z_bit,              0x2                     # zero result
 438 set inf_bit,            0x1                     # infinite result
 439 set nan_bit,            0x0                     # NAN result
 440
 441 set q_sn_bit,           0x7                     # sign bit of quotient byte
 442
 443 set bsun_bit,           7                       # branch on unordered
 444 set snan_bit,           6                       # signalling NAN
 445 set operr_bit,          5                       # operand error
 446 set ovfl_bit,           4                       # overflow
 447 set unfl_bit,           3                       # underflow
 448 set dz_bit,             2                       # divide by zero
 449 set inex2_bit,          1                       # inexact result 2
 450 set inex1_bit,          0                       # inexact result 1
 451
 452 set aiop_bit,           7                       # accrued inexact operation bit
 453 set aovfl_bit,          6                       # accrued overflow bit
 454 set aunfl_bit,          5                       # accrued underflow bit
 455 set adz_bit,            4                       # accrued dz bit
 456 set ainex_bit,          3                       # accrued inexact bit
 457
 458 #############################
 459 # FPSR individual bit masks #
 460 #############################
 461 set neg_mask,           0x08000000              # negative bit mask (lw)
 462 set inf_mask,           0x02000000              # infinity bit mask (lw)
 463 set z_mask,             0x04000000              # zero bit mask (lw)
 464 set nan_mask,           0x01000000              # nan bit mask (lw)
 465
 466 set neg_bmask,          0x08                    # negative bit mask (byte)
 467 set inf_bmask,          0x02                    # infinity bit mask (byte)
 468 set z_bmask,            0x04                    # zero bit mask (byte)
 469 set nan_bmask,          0x01                    # nan bit mask (byte)
 470
 471 set bsun_mask,          0x00008000              # bsun exception mask
 472 set snan_mask,          0x00004000              # snan exception mask
 473 set operr_mask,         0x00002000              # operr exception mask
 474 set ovfl_mask,          0x00001000              # overflow exception mask
 475 set unfl_mask,          0x00000800              # underflow exception mask
 476 set dz_mask,            0x00000400              # dz exception mask
 477 set inex2_mask,         0x00000200              # inex2 exception mask
 478 set inex1_mask,         0x00000100              # inex1 exception mask
 479
 480 set aiop_mask,          0x00000080              # accrued illegal operation
 481 set aovfl_mask,         0x00000040              # accrued overflow
 482 set aunfl_mask,         0x00000020              # accrued underflow
 483 set adz_mask,           0x00000010              # accrued divide by zero
 484 set ainex_mask,         0x00000008              # accrued inexact
 485
 486 ######################################
 487 # FPSR combinations used in the FPSP #
 488 ######################################
 489 set dzinf_mask,         inf_mask+dz_mask+adz_mask
 490 set opnan_mask,         nan_mask+operr_mask+aiop_mask
 491 set nzi_mask,           0x01ffffff              #clears N, Z, and I
 492 set unfinx_mask,        unfl_mask+inex2_mask+aunfl_mask+ainex_mask
 493 set unf2inx_mask,       unfl_mask+inex2_mask+ainex_mask
 494 set ovfinx_mask,        ovfl_mask+inex2_mask+aovfl_mask+ainex_mask
 495 set inx1a_mask,         inex1_mask+ainex_mask
 496 set inx2a_mask,         inex2_mask+ainex_mask
 497 set snaniop_mask,       nan_mask+snan_mask+aiop_mask
 498 set snaniop2_mask,      snan_mask+aiop_mask
 499 set naniop_mask,        nan_mask+aiop_mask
 500 set neginf_mask,        neg_mask+inf_mask
 501 set infaiop_mask,       inf_mask+aiop_mask
 502 set negz_mask,          neg_mask+z_mask
 503 set opaop_mask,         operr_mask+aiop_mask
 504 set unfl_inx_mask,      unfl_mask+aunfl_mask+ainex_mask
 505 set ovfl_inx_mask,      ovfl_mask+aovfl_mask+ainex_mask
 506
 507 #########
 508 # misc. #
 509 #########
 510 set rnd_stky_bit,       29                      # stky bit pos in longword
 511
 512 set sign_bit,           0x7                     # sign bit
 513 set signan_bit,         0x6                     # signalling nan bit
 514
 515 set sgl_thresh,         0x3f81                  # minimum sgl exponent
 516 set dbl_thresh,         0x3c01                  # minimum dbl exponent
 517
 518 set x_mode,             0x0                     # extended precision
 519 set s_mode,             0x4                     # single precision
 520 set d_mode,             0x8                     # double precision
 521
 522 set rn_mode,            0x0                     # round-to-nearest
 523 set rz_mode,            0x1                     # round-to-zero
 524 set rm_mode,            0x2                     # round-tp-minus-infinity
 525 set rp_mode,            0x3                     # round-to-plus-infinity
 526
 527 set mantissalen,        64                      # length of mantissa in bits
 528
 529 set BYTE,               1                       # len(byte) == 1 byte
 530 set WORD,               2                       # len(word) == 2 bytes
 531 set LONG,               4                       # len(longword) == 2 bytes
 532
 533 set BSUN_VEC,           0xc0                    # bsun    vector offset
 534 set INEX_VEC,           0xc4                    # inexact vector offset
 535 set DZ_VEC,             0xc8                    # dz      vector offset
 536 set UNFL_VEC,           0xcc                    # unfl    vector offset
 537 set OPERR_VEC,          0xd0                    # operr   vector offset
 538 set OVFL_VEC,           0xd4                    # ovfl    vector offset
 539 set SNAN_VEC,           0xd8                    # snan    vector offset
 540
 541 ###########################
 542 # SPecial CONDition FLaGs #
 543 ###########################
 544 set ftrapcc_flg,        0x01                    # flag bit: ftrapcc exception
 545 set fbsun_flg,          0x02                    # flag bit: bsun exception
 546 set mia7_flg,           0x04                    # flag bit: (a7)+ <ea>
 547 set mda7_flg,           0x08                    # flag bit: -(a7) <ea>
 548 set fmovm_flg,          0x40                    # flag bit: fmovm instruction
 549 set immed_flg,          0x80                    # flag bit: &<data> <ea>
 550
 551 set ftrapcc_bit,        0x0
 552 set fbsun_bit,          0x1
 553 set mia7_bit,           0x2
 554 set mda7_bit,           0x3
 555 set immed_bit,          0x7
 556
 557 ##################################
 558 # TRANSCENDENTAL "LAST-OP" FLAGS #
 559 ##################################
 560 set FMUL_OP,            0x0                     # fmul instr performed last
 561 set FDIV_OP,            0x1                     # fdiv performed last
 562 set FADD_OP,            0x2                     # fadd performed last
 563 set FMOV_OP,            0x3                     # fmov performed last
 564
 565 #############
 566 # CONSTANTS #
 567 #############
 568 T1:     long            0x40C62D38,0xD3D64634   # 16381 LOG2 LEAD
 569 T2:     long            0x3D6F90AE,0xB1E75CC7   # 16381 LOG2 TRAIL
 570
 571 PI:     long            0x40000000,0xC90FDAA2,0x2168C235,0x00000000
 572 PIBY2:  long            0x3FFF0000,0xC90FDAA2,0x2168C235,0x00000000
 573
 574 TWOBYPI:
 575         long            0x3FE45F30,0x6DC9C883
 576
 577 #########################################################################
 578 # XDEF **************************************************************** #
 579 #       _fpsp_ovfl(): 060FPSP entry point for FP Overflow exception.    #
 580 #                                                                       #
 581 #       This handler should be the first code executed upon taking the  #
 582 #       FP Overflow exception in an operating system.                   #
 583 #                                                                       #
 584 # XREF **************************************************************** #
 585 #       _imem_read_long() - read instruction longword                   #
 586 #       fix_skewed_ops() - adjust src operand in fsave frame            #
 587 #       set_tag_x() - determine optype of src/dst operands              #
 588 #       store_fpreg() - store opclass 0 or 2 result to FP regfile       #
 589 #       unnorm_fix() - change UNNORM operands to NORM or ZERO           #
 590 #       load_fpn2() - load dst operand from FP regfile                  #
 591 #       fout() - emulate an opclass 3 instruction                       #
 592 #       tbl_unsupp - add of table of emulation routines for opclass 0,2 #
 593 #       _fpsp_done() - "callout" for 060FPSP exit (all work done!)      #
 594 #       _real_ovfl() - "callout" for Overflow exception enabled code    #
 595 #       _real_inex() - "callout" for Inexact exception enabled code     #
 596 #       _real_trace() - "callout" for Trace exception code              #
 597 #                                                                       #
 598 # INPUT *************************************************************** #
 599 #       - The system stack contains the FP Ovfl exception stack frame   #
 600 #       - The fsave frame contains the source operand                   #
 601 #                                                                       #
 602 # OUTPUT ************************************************************** #
 603 #       Overflow Exception enabled:                                     #
 604 #       - The system stack is unchanged                                 #
 605 #       - The fsave frame contains the adjusted src op for opclass 0,2  #
 606 #       Overflow Exception disabled:                                    #
 607 #       - The system stack is unchanged                                 #
 608 #       - The "exception present" flag in the fsave frame is cleared    #
 609 #                                                                       #
 610 # ALGORITHM *********************************************************** #
 611 #       On the 060, if an FP overflow is present as the result of any   #
 612 # instruction, the 060 will take an overflow exception whether the      #
 613 # exception is enabled or disabled in the FPCR. For the disabled case,  #
 614 # This handler emulates the instruction to determine what the correct   #
 615 # default result should be for the operation. This default result is    #
 616 # then stored in either the FP regfile, data regfile, or memory.        #
 617 # Finally, the handler exits through the "callout" _fpsp_done()         #
 618 # denoting that no exceptional conditions exist within the machine.     #
 619 #       If the exception is enabled, then this handler must create the  #
 620 # exceptional operand and plave it in the fsave state frame, and store  #
 621 # the default result (only if the instruction is opclass 3). For        #
 622 # exceptions enabled, this handler must exit through the "callout"      #
 623 # _real_ovfl() so that the operating system enabled overflow handler    #
 624 # can handle this case.                                                 #
 625 #       Two other conditions exist. First, if overflow was disabled     #
 626 # but the inexact exception was enabled, this handler must exit         #
 627 # through the "callout" _real_inex() regardless of whether the result   #
 628 # was inexact.                                                          #
 629 #       Also, in the case of an opclass three instruction where         #
 630 # overflow was disabled and the trace exception was enabled, this       #
 631 # handler must exit through the "callout" _real_trace().                #
 632 #                                                                       #
 633 #########################################################################
 634
 635         global          _fpsp_ovfl
 636 _fpsp_ovfl:
 637
 638 #$#     sub.l           &24,%sp                 # make room for src/dst
 639
 640         link.w          %a6,&-LOCAL_SIZE        # init stack frame
 641
 642         fsave           FP_SRC(%a6)             # grab the "busy" frame
 643
 644         movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
 645         fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
 646         fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
 647
 648 # the FPIAR holds the "current PC" of the faulting instruction
 649         mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
 650         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
 651         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
 652         bsr.l           _imem_read_long         # fetch the instruction words
 653         mov.l           %d0,EXC_OPWORD(%a6)
 654
 655 ##############################################################################
 656
 657         btst            &0x5,EXC_CMDREG(%a6)    # is instr an fmove out?
 658         bne.w           fovfl_out
 659
 660
 661         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
 662         bsr.l           fix_skewed_ops          # fix src op
 663
 664 # since, I believe, only NORMs and DENORMs can come through here,
 665 # maybe we can avoid the subroutine call.
 666         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
 667         bsr.l           set_tag_x               # tag the operand type
 668         mov.b           %d0,STAG(%a6)           # maybe NORM,DENORM
 669
 670 # bit five of the fp extension word separates the monadic and dyadic operations
 671 # that can pass through fpsp_ovfl(). remember that fcmp, ftst, and fsincos
 672 # will never take this exception.
 673         btst            &0x5,1+EXC_CMDREG(%a6)  # is operation monadic or dyadic?
 674         beq.b           fovfl_extract           # monadic
 675
 676         bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
 677         bsr.l           load_fpn2               # load dst into FP_DST
 678
 679         lea             FP_DST(%a6),%a0         # pass: ptr to dst op
 680         bsr.l           set_tag_x               # tag the operand type
 681         cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
 682         bne.b           fovfl_op2_done          # no
 683         bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
 684 fovfl_op2_done:
 685         mov.b           %d0,DTAG(%a6)           # save dst optype tag
 686
 687 fovfl_extract:
 688
 689 #$#     mov.l           FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
 690 #$#     mov.l           FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
 691 #$#     mov.l           FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
 692 #$#     mov.l           FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
 693 #$#     mov.l           FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
 694 #$#     mov.l           FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
 695
 696         clr.l           %d0
 697         mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec/mode
 698
 699         mov.b           1+EXC_CMDREG(%a6),%d1
 700         andi.w          &0x007f,%d1             # extract extension
 701
 702         andi.l          &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
 703
 704         fmov.l          &0x0,%fpcr              # zero current control regs
 705         fmov.l          &0x0,%fpsr
 706
 707         lea             FP_SRC(%a6),%a0
 708         lea             FP_DST(%a6),%a1
 709
 710 # maybe we can make these entry points ONLY the OVFL entry points of each routine.
 711         mov.l           (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
 712         jsr             (tbl_unsupp.l,%pc,%d1.l*1)
 713
 714 # the operation has been emulated. the result is in fp0.
 715 # the EXOP, if an exception occurred, is in fp1.
 716 # we must save the default result regardless of whether
 717 # traps are enabled or disabled.
 718         bfextu          EXC_CMDREG(%a6){&6:&3},%d0
 719         bsr.l           store_fpreg
 720
 721 # the exceptional possibilities we have left ourselves with are ONLY overflow
 722 # and inexact. and, the inexact is such that overflow occurred and was disabled
 723 # but inexact was enabled.
 724         btst            &ovfl_bit,FPCR_ENABLE(%a6)
 725         bne.b           fovfl_ovfl_on
 726
 727         btst            &inex2_bit,FPCR_ENABLE(%a6)
 728         bne.b           fovfl_inex_on
 729
 730         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
 731         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
 732         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
 733
 734         unlk            %a6
 735 #$#     add.l           &24,%sp
 736         bra.l           _fpsp_done
 737
 738 # overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
 739 # in fp1. now, simply jump to _real_ovfl()!
 740 fovfl_ovfl_on:
 741         fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP (fp1) to stack
 742
 743         mov.w           &0xe005,2+FP_SRC(%a6)   # save exc status
 744
 745         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
 746         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
 747         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
 748
 749         frestore        FP_SRC(%a6)             # do this after fmovm,other f<op>s!
 750
 751         unlk            %a6
 752
 753         bra.l           _real_ovfl
 754
 755 # overflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
 756 # we must jump to real_inex().
 757 fovfl_inex_on:
 758
 759         fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP (fp1) to stack
 760
 761         mov.b           &0xc4,1+EXC_VOFF(%a6)   # vector offset = 0xc4
 762         mov.w           &0xe001,2+FP_SRC(%a6)   # save exc status
 763
 764         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
 765         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
 766         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
 767
 768         frestore        FP_SRC(%a6)             # do this after fmovm,other f<op>s!
 769
 770         unlk            %a6
 771
 772         bra.l           _real_inex
 773
 774 ########################################################################
 775 fovfl_out:
 776
 777
 778 #$#     mov.l           FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
 779 #$#     mov.l           FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
 780 #$#     mov.l           FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
 781
 782 # the src operand is definitely a NORM(!), so tag it as such
 783         mov.b           &NORM,STAG(%a6)         # set src optype tag
 784
 785         clr.l           %d0
 786         mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec/mode
 787
 788         and.l           &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
 789
 790         fmov.l          &0x0,%fpcr              # zero current control regs
 791         fmov.l          &0x0,%fpsr
 792
 793         lea             FP_SRC(%a6),%a0         # pass ptr to src operand
 794
 795         bsr.l           fout
 796
 797         btst            &ovfl_bit,FPCR_ENABLE(%a6)
 798         bne.w           fovfl_ovfl_on
 799
 800         btst            &inex2_bit,FPCR_ENABLE(%a6)
 801         bne.w           fovfl_inex_on
 802
 803         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
 804         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
 805         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
 806
 807         unlk            %a6
 808 #$#     add.l           &24,%sp
 809
 810         btst            &0x7,(%sp)              # is trace on?
 811         beq.l           _fpsp_done              # no
 812
 813         fmov.l          %fpiar,0x8(%sp)         # "Current PC" is in FPIAR
 814         mov.w           &0x2024,0x6(%sp)        # stk fmt = 0x2; voff = 0x024
 815         bra.l           _real_trace
 816
 817 #########################################################################
 818 # XDEF **************************************************************** #
 819 #       _fpsp_unfl(): 060FPSP entry point for FP Underflow exception.   #
 820 #                                                                       #
 821 #       This handler should be the first code executed upon taking the  #
 822 #       FP Underflow exception in an operating system.                  #
 823 #                                                                       #
 824 # XREF **************************************************************** #
 825 #       _imem_read_long() - read instruction longword                   #
 826 #       fix_skewed_ops() - adjust src operand in fsave frame            #
 827 #       set_tag_x() - determine optype of src/dst operands              #
 828 #       store_fpreg() - store opclass 0 or 2 result to FP regfile       #
 829 #       unnorm_fix() - change UNNORM operands to NORM or ZERO           #
 830 #       load_fpn2() - load dst operand from FP regfile                  #
 831 #       fout() - emulate an opclass 3 instruction                       #
 832 #       tbl_unsupp - add of table of emulation routines for opclass 0,2 #
 833 #       _fpsp_done() - "callout" for 060FPSP exit (all work done!)      #
 834 #       _real_ovfl() - "callout" for Overflow exception enabled code    #
 835 #       _real_inex() - "callout" for Inexact exception enabled code     #
 836 #       _real_trace() - "callout" for Trace exception code              #
 837 #                                                                       #
 838 # INPUT *************************************************************** #
 839 #       - The system stack contains the FP Unfl exception stack frame   #
 840 #       - The fsave frame contains the source operand                   #
 841 #                                                                       #
 842 # OUTPUT ************************************************************** #
 843 #       Underflow Exception enabled:                                    #
 844 #       - The system stack is unchanged                                 #
 845 #       - The fsave frame contains the adjusted src op for opclass 0,2  #
 846 #       Underflow Exception disabled:                                   #
 847 #       - The system stack is unchanged                                 #
 848 #       - The "exception present" flag in the fsave frame is cleared    #
 849 #                                                                       #
 850 # ALGORITHM *********************************************************** #
 851 #       On the 060, if an FP underflow is present as the result of any  #
 852 # instruction, the 060 will take an underflow exception whether the     #
 853 # exception is enabled or disabled in the FPCR. For the disabled case,  #
 854 # This handler emulates the instruction to determine what the correct   #
 855 # default result should be for the operation. This default result is    #
 856 # then stored in either the FP regfile, data regfile, or memory.        #
 857 # Finally, the handler exits through the "callout" _fpsp_done()         #
 858 # denoting that no exceptional conditions exist within the machine.     #
 859 #       If the exception is enabled, then this handler must create the  #
 860 # exceptional operand and plave it in the fsave state frame, and store  #
 861 # the default result (only if the instruction is opclass 3). For        #
 862 # exceptions enabled, this handler must exit through the "callout"      #
 863 # _real_unfl() so that the operating system enabled overflow handler    #
 864 # can handle this case.                                                 #
 865 #       Two other conditions exist. First, if underflow was disabled    #
 866 # but the inexact exception was enabled and the result was inexact,     #
 867 # this handler must exit through the "callout" _real_inex().            #
 868 # was inexact.                                                          #
 869 #       Also, in the case of an opclass three instruction where         #
 870 # underflow was disabled and the trace exception was enabled, this      #
 871 # handler must exit through the "callout" _real_trace().                #
 872 #                                                                       #
 873 #########################################################################
 874
 875         global          _fpsp_unfl
 876 _fpsp_unfl:
 877
 878 #$#     sub.l           &24,%sp                 # make room for src/dst
 879
 880         link.w          %a6,&-LOCAL_SIZE        # init stack frame
 881
 882         fsave           FP_SRC(%a6)             # grab the "busy" frame
 883
 884         movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
 885         fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
 886         fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
 887
 888 # the FPIAR holds the "current PC" of the faulting instruction
 889         mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
 890         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
 891         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
 892         bsr.l           _imem_read_long         # fetch the instruction words
 893         mov.l           %d0,EXC_OPWORD(%a6)
 894
 895 ##############################################################################
 896
 897         btst            &0x5,EXC_CMDREG(%a6)    # is instr an fmove out?
 898         bne.w           funfl_out
 899
 900
 901         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
 902         bsr.l           fix_skewed_ops          # fix src op
 903
 904         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
 905         bsr.l           set_tag_x               # tag the operand type
 906         mov.b           %d0,STAG(%a6)           # maybe NORM,DENORM
 907
 908 # bit five of the fp ext word separates the monadic and dyadic operations
 909 # that can pass through fpsp_unfl(). remember that fcmp, and ftst
 910 # will never take this exception.
 911         btst            &0x5,1+EXC_CMDREG(%a6)  # is op monadic or dyadic?
 912         beq.b           funfl_extract           # monadic
 913
 914 # now, what's left that's not dyadic is fsincos. we can distinguish it
 915 # from all dyadics by the '0110xxx pattern
 916         btst            &0x4,1+EXC_CMDREG(%a6)  # is op an fsincos?
 917         bne.b           funfl_extract           # yes
 918
 919         bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
 920         bsr.l           load_fpn2               # load dst into FP_DST
 921
 922         lea             FP_DST(%a6),%a0         # pass: ptr to dst op
 923         bsr.l           set_tag_x               # tag the operand type
 924         cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
 925         bne.b           funfl_op2_done          # no
 926         bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
 927 funfl_op2_done:
 928         mov.b           %d0,DTAG(%a6)           # save dst optype tag
 929
 930 funfl_extract:
 931
 932 #$#     mov.l           FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
 933 #$#     mov.l           FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
 934 #$#     mov.l           FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
 935 #$#     mov.l           FP_DST_EX(%a6),TRAP_DSTOP_EX(%a6)
 936 #$#     mov.l           FP_DST_HI(%a6),TRAP_DSTOP_HI(%a6)
 937 #$#     mov.l           FP_DST_LO(%a6),TRAP_DSTOP_LO(%a6)
 938
 939         clr.l           %d0
 940         mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec/mode
 941
 942         mov.b           1+EXC_CMDREG(%a6),%d1
 943         andi.w          &0x007f,%d1             # extract extension
 944
 945         andi.l          &0x00ff01ff,USER_FPSR(%a6)
 946
 947         fmov.l          &0x0,%fpcr              # zero current control regs
 948         fmov.l          &0x0,%fpsr
 949
 950         lea             FP_SRC(%a6),%a0
 951         lea             FP_DST(%a6),%a1
 952
 953 # maybe we can make these entry points ONLY the OVFL entry points of each routine.
 954         mov.l           (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
 955         jsr             (tbl_unsupp.l,%pc,%d1.l*1)
 956
 957         bfextu          EXC_CMDREG(%a6){&6:&3},%d0
 958         bsr.l           store_fpreg
 959
 960 # The `060 FPU multiplier hardware is such that if the result of a
 961 # multiply operation is the smallest possible normalized number
 962 # (0x00000000_80000000_00000000), then the machine will take an
 963 # underflow exception. Since this is incorrect, we need to check
 964 # if our emulation, after re-doing the operation, decided that
 965 # no underflow was called for. We do these checks only in
 966 # funfl_{unfl,inex}_on() because w/ both exceptions disabled, this
 967 # special case will simply exit gracefully with the correct result.
 968
 969 # the exceptional possibilities we have left ourselves with are ONLY overflow
 970 # and inexact. and, the inexact is such that overflow occurred and was disabled
 971 # but inexact was enabled.
 972         btst            &unfl_bit,FPCR_ENABLE(%a6)
 973         bne.b           funfl_unfl_on
 974
 975 funfl_chkinex:
 976         btst            &inex2_bit,FPCR_ENABLE(%a6)
 977         bne.b           funfl_inex_on
 978
 979 funfl_exit:
 980         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
 981         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
 982         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
 983
 984         unlk            %a6
 985 #$#     add.l           &24,%sp
 986         bra.l           _fpsp_done
 987
 988 # overflow is enabled AND overflow, of course, occurred. so, we have the EXOP
 989 # in fp1 (don't forget to save fp0). what to do now?
 990 # well, we simply have to get to go to _real_unfl()!
 991 funfl_unfl_on:
 992
 993 # The `060 FPU multiplier hardware is such that if the result of a
 994 # multiply operation is the smallest possible normalized number
 995 # (0x00000000_80000000_00000000), then the machine will take an
 996 # underflow exception. Since this is incorrect, we check here to see
 997 # if our emulation, after re-doing the operation, decided that
 998 # no underflow was called for.
 999         btst            &unfl_bit,FPSR_EXCEPT(%a6)
1000         beq.w           funfl_chkinex
1001
1002 funfl_unfl_on2:
1003         fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP (fp1) to stack
1004
1005         mov.w           &0xe003,2+FP_SRC(%a6)   # save exc status
1006
1007         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
1008         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1009         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1010
1011         frestore        FP_SRC(%a6)             # do this after fmovm,other f<op>s!
1012
1013         unlk            %a6
1014
1015         bra.l           _real_unfl
1016
1017 # underflow occurred but is disabled. meanwhile, inexact is enabled. Therefore,
1018 # we must jump to real_inex().
1019 funfl_inex_on:
1020
1021 # The `060 FPU multiplier hardware is such that if the result of a
1022 # multiply operation is the smallest possible normalized number
1023 # (0x00000000_80000000_00000000), then the machine will take an
1024 # underflow exception.
1025 # But, whether bogus or not, if inexact is enabled AND it occurred,
1026 # then we have to branch to real_inex.
1027
1028         btst            &inex2_bit,FPSR_EXCEPT(%a6)
1029         beq.w           funfl_exit
1030
1031 funfl_inex_on2:
1032
1033         fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP to stack
1034
1035         mov.b           &0xc4,1+EXC_VOFF(%a6)   # vector offset = 0xc4
1036         mov.w           &0xe001,2+FP_SRC(%a6)   # save exc status
1037
1038         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
1039         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1040         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1041
1042         frestore        FP_SRC(%a6)             # do this after fmovm,other f<op>s!
1043
1044         unlk            %a6
1045
1046         bra.l           _real_inex
1047
1048 #######################################################################
1049 funfl_out:
1050
1051
1052 #$#     mov.l           FP_SRC_EX(%a6),TRAP_SRCOP_EX(%a6)
1053 #$#     mov.l           FP_SRC_HI(%a6),TRAP_SRCOP_HI(%a6)
1054 #$#     mov.l           FP_SRC_LO(%a6),TRAP_SRCOP_LO(%a6)
1055
1056 # the src operand is definitely a NORM(!), so tag it as such
1057         mov.b           &NORM,STAG(%a6)         # set src optype tag
1058
1059         clr.l           %d0
1060         mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec/mode
1061
1062         and.l           &0xffff00ff,USER_FPSR(%a6) # zero all but accured field
1063
1064         fmov.l          &0x0,%fpcr              # zero current control regs
1065         fmov.l          &0x0,%fpsr
1066
1067         lea             FP_SRC(%a6),%a0         # pass ptr to src operand
1068
1069         bsr.l           fout
1070
1071         btst            &unfl_bit,FPCR_ENABLE(%a6)
1072         bne.w           funfl_unfl_on2
1073
1074         btst            &inex2_bit,FPCR_ENABLE(%a6)
1075         bne.w           funfl_inex_on2
1076
1077         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
1078         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1079         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1080
1081         unlk            %a6
1082 #$#     add.l           &24,%sp
1083
1084         btst            &0x7,(%sp)              # is trace on?
1085         beq.l           _fpsp_done              # no
1086
1087         fmov.l          %fpiar,0x8(%sp)         # "Current PC" is in FPIAR
1088         mov.w           &0x2024,0x6(%sp)        # stk fmt = 0x2; voff = 0x024
1089         bra.l           _real_trace
1090
1091 #########################################################################
1092 # XDEF **************************************************************** #
1093 #       _fpsp_unsupp(): 060FPSP entry point for FP "Unimplemented       #
1094 #                       Data Type" exception.                           #
1095 #                                                                       #
1096 #       This handler should be the first code executed upon taking the  #
1097 #       FP Unimplemented Data Type exception in an operating system.    #
1098 #                                                                       #
1099 # XREF **************************************************************** #
1100 #       _imem_read_{word,long}() - read instruction word/longword       #
1101 #       fix_skewed_ops() - adjust src operand in fsave frame            #
1102 #       set_tag_x() - determine optype of src/dst operands              #
1103 #       store_fpreg() - store opclass 0 or 2 result to FP regfile       #
1104 #       unnorm_fix() - change UNNORM operands to NORM or ZERO           #
1105 #       load_fpn2() - load dst operand from FP regfile                  #
1106 #       load_fpn1() - load src operand from FP regfile                  #
1107 #       fout() - emulate an opclass 3 instruction                       #
1108 #       tbl_unsupp - add of table of emulation routines for opclass 0,2 #
1109 #       _real_inex() - "callout" to operating system inexact handler    #
1110 #       _fpsp_done() - "callout" for exit; work all done                #
1111 #       _real_trace() - "callout" for Trace enabled exception           #
1112 #       funimp_skew() - adjust fsave src ops to "incorrect" value       #
1113 #       _real_snan() - "callout" for SNAN exception                     #
1114 #       _real_operr() - "callout" for OPERR exception                   #
1115 #       _real_ovfl() - "callout" for OVFL exception                     #
1116 #       _real_unfl() - "callout" for UNFL exception                     #
1117 #       get_packed() - fetch packed operand from memory                 #
1118 #                                                                       #
1119 # INPUT *************************************************************** #
1120 #       - The system stack contains the "Unimp Data Type" stk frame     #
1121 #       - The fsave frame contains the ssrc op (for UNNORM/DENORM)      #
1122 #                                                                       #
1123 # OUTPUT ************************************************************** #
1124 #       If Inexact exception (opclass 3):                               #
1125 #       - The system stack is changed to an Inexact exception stk frame #
1126 #       If SNAN exception (opclass 3):                                  #
1127 #       - The system stack is changed to an SNAN exception stk frame    #
1128 #       If OPERR exception (opclass 3):                                 #
1129 #       - The system stack is changed to an OPERR exception stk frame   #
1130 #       If OVFL exception (opclass 3):                                  #
1131 #       - The system stack is changed to an OVFL exception stk frame    #
1132 #       If UNFL exception (opclass 3):                                  #
1133 #       - The system stack is changed to an UNFL exception stack frame  #
1134 #       If Trace exception enabled:                                     #
1135 #       - The system stack is changed to a Trace exception stack frame  #
1136 #       Else: (normal case)                                             #
1137 #       - Correct result has been stored as appropriate                 #
1138 #                                                                       #
1139 # ALGORITHM *********************************************************** #
1140 #       Two main instruction types can enter here: (1) DENORM or UNNORM #
1141 # unimplemented data types. These can be either opclass 0,2 or 3        #
1142 # instructions, and (2) PACKED unimplemented data format instructions   #
1143 # also of opclasses 0,2, or 3.                                          #
1144 #       For UNNORM/DENORM opclass 0 and 2, the handler fetches the src  #
1145 # operand from the fsave state frame and the dst operand (if dyadic)    #
1146 # from the FP register file. The instruction is then emulated by        #
1147 # choosing an emulation routine from a table of routines indexed by     #
1148 # instruction type. Once the instruction has been emulated and result   #
1149 # saved, then we check to see if any enabled exceptions resulted from   #
1150 # instruction emulation. If none, then we exit through the "callout"    #
1151 # _fpsp_done(). If there is an enabled FP exception, then we insert     #
1152 # this exception into the FPU in the fsave state frame and then exit    #
1153 # through _fpsp_done().                                                 #
1154 #       PACKED opclass 0 and 2 is similar in how the instruction is     #
1155 # emulated and exceptions handled. The differences occur in how the     #
1156 # handler loads the packed op (by calling get_packed() routine) and     #
1157 # by the fact that a Trace exception could be pending for PACKED ops.   #
1158 # If a Trace exception is pending, then the current exception stack     #
1159 # frame is changed to a Trace exception stack frame and an exit is      #
1160 # made through _real_trace().                                           #
1161 #       For UNNORM/DENORM opclass 3, the actual move out to memory is   #
1162 # performed by calling the routine fout(). If no exception should occur #
1163 # as the result of emulation, then an exit either occurs through        #
1164 # _fpsp_done() or through _real_trace() if a Trace exception is pending #
1165 # (a Trace stack frame must be created here, too). If an FP exception   #
1166 # should occur, then we must create an exception stack frame of that    #
1167 # type and jump to either _real_snan(), _real_operr(), _real_inex(),    #
1168 # _real_unfl(), or _real_ovfl() as appropriate. PACKED opclass 3        #
1169 # emulation is performed in a similar manner.                           #
1170 #                                                                       #
1171 #########################################################################
1172
1173 #
1174 # (1) DENORM and UNNORM (unimplemented) data types:
1175 #
1176 #                               post-instruction
1177 #                               *****************
1178 #                               *      EA       *
1179 #        pre-instruction        *               *
1180 #       *****************       *****************
1181 #       * 0x0 *  0x0dc  *       * 0x3 *  0x0dc  *
1182 #       *****************       *****************
1183 #       *     Next      *       *     Next      *
1184 #       *      PC       *       *      PC       *
1185 #       *****************       *****************
1186 #       *      SR       *       *      SR       *
1187 #       *****************       *****************
1188 #
1189 # (2) PACKED format (unsupported) opclasses two and three:
1190 #       *****************
1191 #       *      EA       *
1192 #       *               *
1193 #       *****************
1194 #       * 0x2 *  0x0dc  *
1195 #       *****************
1196 #       *     Next      *
1197 #       *      PC       *
1198 #       *****************
1199 #       *      SR       *
1200 #       *****************
1201 #
1202         global          _fpsp_unsupp
1203 _fpsp_unsupp:
1204
1205         link.w          %a6,&-LOCAL_SIZE        # init stack frame
1206
1207         fsave           FP_SRC(%a6)             # save fp state
1208
1209         movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
1210         fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
1211         fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
1212
1213         btst            &0x5,EXC_SR(%a6)        # user or supervisor mode?
1214         bne.b           fu_s
1215 fu_u:
1216         mov.l           %usp,%a0                # fetch user stack pointer
1217         mov.l           %a0,EXC_A7(%a6)         # save on stack
1218         bra.b           fu_cont
1219 # if the exception is an opclass zero or two unimplemented data type
1220 # exception, then the a7' calculated here is wrong since it doesn't
1221 # stack an ea. however, we don't need an a7' for this case anyways.
1222 fu_s:
1223         lea             0x4+EXC_EA(%a6),%a0     # load old a7'
1224         mov.l           %a0,EXC_A7(%a6)         # save on stack
1225
1226 fu_cont:
1227
1228 # the FPIAR holds the "current PC" of the faulting instruction
1229 # the FPIAR should be set correctly for ALL exceptions passing through
1230 # this point.
1231         mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
1232         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
1233         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
1234         bsr.l           _imem_read_long         # fetch the instruction words
1235         mov.l           %d0,EXC_OPWORD(%a6)     # store OPWORD and EXTWORD
1236
1237 ############################
1238
1239         clr.b           SPCOND_FLG(%a6)         # clear special condition flag
1240
1241 # Separate opclass three (fpn-to-mem) ops since they have a different
1242 # stack frame and protocol.
1243         btst            &0x5,EXC_CMDREG(%a6)    # is it an fmove out?
1244         bne.w           fu_out                  # yes
1245
1246 # Separate packed opclass two instructions.
1247         bfextu          EXC_CMDREG(%a6){&0:&6},%d0
1248         cmpi.b          %d0,&0x13
1249         beq.w           fu_in_pack
1250
1251
1252 # I'm not sure at this point what FPSR bits are valid for this instruction.
1253 # so, since the emulation routines re-create them anyways, zero exception field
1254         andi.l          &0x00ff00ff,USER_FPSR(%a6) # zero exception field
1255
1256         fmov.l          &0x0,%fpcr              # zero current control regs
1257         fmov.l          &0x0,%fpsr
1258
1259 # Opclass two w/ memory-to-fpn operation will have an incorrect extended
1260 # precision format if the src format was single or double and the
1261 # source data type was an INF, NAN, DENORM, or UNNORM
1262         lea             FP_SRC(%a6),%a0         # pass ptr to input
1263         bsr.l           fix_skewed_ops
1264
1265 # we don't know whether the src operand or the dst operand (or both) is the
1266 # UNNORM or DENORM. call the function that tags the operand type. if the
1267 # input is an UNNORM, then convert it to a NORM, DENORM, or ZERO.
1268         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
1269         bsr.l           set_tag_x               # tag the operand type
1270         cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
1271         bne.b           fu_op2                  # no
1272         bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
1273
1274 fu_op2:
1275         mov.b           %d0,STAG(%a6)           # save src optype tag
1276
1277         bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1278
1279 # bit five of the fp extension word separates the monadic and dyadic operations
1280 # at this point
1281         btst            &0x5,1+EXC_CMDREG(%a6)  # is operation monadic or dyadic?
1282         beq.b           fu_extract              # monadic
1283         cmpi.b          1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
1284         beq.b           fu_extract              # yes, so it's monadic, too
1285
1286         bsr.l           load_fpn2               # load dst into FP_DST
1287
1288         lea             FP_DST(%a6),%a0         # pass: ptr to dst op
1289         bsr.l           set_tag_x               # tag the operand type
1290         cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
1291         bne.b           fu_op2_done             # no
1292         bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
1293 fu_op2_done:
1294         mov.b           %d0,DTAG(%a6)           # save dst optype tag
1295
1296 fu_extract:
1297         clr.l           %d0
1298         mov.b           FPCR_MODE(%a6),%d0      # fetch rnd mode/prec
1299
1300         bfextu          1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1301
1302         lea             FP_SRC(%a6),%a0
1303         lea             FP_DST(%a6),%a1
1304
1305         mov.l           (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1306         jsr             (tbl_unsupp.l,%pc,%d1.l*1)
1307
1308 #
1309 # Exceptions in order of precedence:
1310 #       BSUN    : none
1311 #       SNAN    : all dyadic ops
1312 #       OPERR   : fsqrt(-NORM)
1313 #       OVFL    : all except ftst,fcmp
1314 #       UNFL    : all except ftst,fcmp
1315 #       DZ      : fdiv
1316 #       INEX2   : all except ftst,fcmp
1317 #       INEX1   : none (packed doesn't go through here)
1318 #
1319
1320 # we determine the highest priority exception(if any) set by the
1321 # emulation routine that has also been enabled by the user.
1322         mov.b           FPCR_ENABLE(%a6),%d0    # fetch exceptions set
1323         bne.b           fu_in_ena               # some are enabled
1324
1325 fu_in_cont:
1326 # fcmp and ftst do not store any result.
1327         mov.b           1+EXC_CMDREG(%a6),%d0   # fetch extension
1328         andi.b          &0x38,%d0               # extract bits 3-5
1329         cmpi.b          %d0,&0x38               # is instr fcmp or ftst?
1330         beq.b           fu_in_exit              # yes
1331
1332         bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1333         bsr.l           store_fpreg             # store the result
1334
1335 fu_in_exit:
1336
1337         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1338         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1339         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1340
1341         unlk            %a6
1342
1343         bra.l           _fpsp_done
1344
1345 fu_in_ena:
1346         and.b           FPSR_EXCEPT(%a6),%d0    # keep only ones enabled
1347         bfffo           %d0{&24:&8},%d0         # find highest priority exception
1348         bne.b           fu_in_exc               # there is at least one set
1349
1350 #
1351 # No exceptions occurred that were also enabled. Now:
1352 #
1353 #       if (OVFL && ovfl_disabled && inexact_enabled) {
1354 #           branch to _real_inex() (even if the result was exact!);
1355 #       } else {
1356 #           save the result in the proper fp reg (unless the op is fcmp or ftst);
1357 #           return;
1358 #       }
1359 #
1360         btst            &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1361         beq.b           fu_in_cont              # no
1362
1363 fu_in_ovflchk:
1364         btst            &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1365         beq.b           fu_in_cont              # no
1366         bra.w           fu_in_exc_ovfl          # go insert overflow frame
1367
1368 #
1369 # An exception occurred and that exception was enabled:
1370 #
1371 #       shift enabled exception field into lo byte of d0;
1372 #       if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1373 #           ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1374 #               /*
1375 #                * this is the case where we must call _real_inex() now or else
1376 #                * there will be no other way to pass it the exceptional operand
1377 #                */
1378 #               call _real_inex();
1379 #       } else {
1380 #               restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1381 #       }
1382 #
1383 fu_in_exc:
1384         subi.l          &24,%d0                 # fix offset to be 0-8
1385         cmpi.b          %d0,&0x6                # is exception INEX? (6)
1386         bne.b           fu_in_exc_exit          # no
1387
1388 # the enabled exception was inexact
1389         btst            &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1390         bne.w           fu_in_exc_unfl          # yes
1391         btst            &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1392         bne.w           fu_in_exc_ovfl          # yes
1393
1394 # here, we insert the correct fsave status value into the fsave frame for the
1395 # corresponding exception. the operand in the fsave frame should be the original
1396 # src operand.
1397 fu_in_exc_exit:
1398         mov.l           %d0,-(%sp)              # save d0
1399         bsr.l           funimp_skew             # skew sgl or dbl inputs
1400         mov.l           (%sp)+,%d0              # restore d0
1401
1402         mov.w           (tbl_except.b,%pc,%d0.w*2),2+FP_SRC(%a6) # create exc status
1403
1404         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1405         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1406         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1407
1408         frestore        FP_SRC(%a6)             # restore src op
1409
1410         unlk            %a6
1411
1412         bra.l           _fpsp_done
1413
1414 tbl_except:
1415         short           0xe000,0xe006,0xe004,0xe005
1416         short           0xe003,0xe002,0xe001,0xe001
1417
1418 fu_in_exc_unfl:
1419         mov.w           &0x4,%d0
1420         bra.b           fu_in_exc_exit
1421 fu_in_exc_ovfl:
1422         mov.w           &0x03,%d0
1423         bra.b           fu_in_exc_exit
1424
1425 # If the input operand to this operation was opclass two and a single
1426 # or double precision denorm, inf, or nan, the operand needs to be
1427 # "corrected" in order to have the proper equivalent extended precision
1428 # number.
1429         global          fix_skewed_ops
1430 fix_skewed_ops:
1431         bfextu          EXC_CMDREG(%a6){&0:&6},%d0 # extract opclass,src fmt
1432         cmpi.b          %d0,&0x11               # is class = 2 & fmt = sgl?
1433         beq.b           fso_sgl                 # yes
1434         cmpi.b          %d0,&0x15               # is class = 2 & fmt = dbl?
1435         beq.b           fso_dbl                 # yes
1436         rts                                     # no
1437
1438 fso_sgl:
1439         mov.w           LOCAL_EX(%a0),%d0       # fetch src exponent
1440         andi.w          &0x7fff,%d0             # strip sign
1441         cmpi.w          %d0,&0x3f80             # is |exp| == $3f80?
1442         beq.b           fso_sgl_dnrm_zero       # yes
1443         cmpi.w          %d0,&0x407f             # no; is |exp| == $407f?
1444         beq.b           fso_infnan              # yes
1445         rts                                     # no
1446
1447 fso_sgl_dnrm_zero:
1448         andi.l          &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1449         beq.b           fso_zero                # it's a skewed zero
1450 fso_sgl_dnrm:
1451 # here, we count on norm not to alter a0...
1452         bsr.l           norm                    # normalize mantissa
1453         neg.w           %d0                     # -shft amt
1454         addi.w          &0x3f81,%d0             # adjust new exponent
1455         andi.w          &0x8000,LOCAL_EX(%a0)   # clear old exponent
1456         or.w            %d0,LOCAL_EX(%a0)       # insert new exponent
1457         rts
1458
1459 fso_zero:
1460         andi.w          &0x8000,LOCAL_EX(%a0)   # clear bogus exponent
1461         rts
1462
1463 fso_infnan:
1464         andi.b          &0x7f,LOCAL_HI(%a0)     # clear j-bit
1465         ori.w           &0x7fff,LOCAL_EX(%a0)   # make exponent = $7fff
1466         rts
1467
1468 fso_dbl:
1469         mov.w           LOCAL_EX(%a0),%d0       # fetch src exponent
1470         andi.w          &0x7fff,%d0             # strip sign
1471         cmpi.w          %d0,&0x3c00             # is |exp| == $3c00?
1472         beq.b           fso_dbl_dnrm_zero       # yes
1473         cmpi.w          %d0,&0x43ff             # no; is |exp| == $43ff?
1474         beq.b           fso_infnan              # yes
1475         rts                                     # no
1476
1477 fso_dbl_dnrm_zero:
1478         andi.l          &0x7fffffff,LOCAL_HI(%a0) # clear j-bit
1479         bne.b           fso_dbl_dnrm            # it's a skewed denorm
1480         tst.l           LOCAL_LO(%a0)           # is it a zero?
1481         beq.b           fso_zero                # yes
1482 fso_dbl_dnrm:
1483 # here, we count on norm not to alter a0...
1484         bsr.l           norm                    # normalize mantissa
1485         neg.w           %d0                     # -shft amt
1486         addi.w          &0x3c01,%d0             # adjust new exponent
1487         andi.w          &0x8000,LOCAL_EX(%a0)   # clear old exponent
1488         or.w            %d0,LOCAL_EX(%a0)       # insert new exponent
1489         rts
1490
1491 #################################################################
1492
1493 # fmove out took an unimplemented data type exception.
1494 # the src operand is in FP_SRC. Call _fout() to write out the result and
1495 # to determine which exceptions, if any, to take.
1496 fu_out:
1497
1498 # Separate packed move outs from the UNNORM and DENORM move outs.
1499         bfextu          EXC_CMDREG(%a6){&3:&3},%d0
1500         cmpi.b          %d0,&0x3
1501         beq.w           fu_out_pack
1502         cmpi.b          %d0,&0x7
1503         beq.w           fu_out_pack
1504
1505
1506 # I'm not sure at this point what FPSR bits are valid for this instruction.
1507 # so, since the emulation routines re-create them anyways, zero exception field.
1508 # fmove out doesn't affect ccodes.
1509         and.l           &0xffff00ff,USER_FPSR(%a6) # zero exception field
1510
1511         fmov.l          &0x0,%fpcr              # zero current control regs
1512         fmov.l          &0x0,%fpsr
1513
1514 # the src can ONLY be a DENORM or an UNNORM! so, don't make any big subroutine
1515 # call here. just figure out what it is...
1516         mov.w           FP_SRC_EX(%a6),%d0      # get exponent
1517         andi.w          &0x7fff,%d0             # strip sign
1518         beq.b           fu_out_denorm           # it's a DENORM
1519
1520         lea             FP_SRC(%a6),%a0
1521         bsr.l           unnorm_fix              # yes; fix it
1522
1523         mov.b           %d0,STAG(%a6)
1524
1525         bra.b           fu_out_cont
1526 fu_out_denorm:
1527         mov.b           &DENORM,STAG(%a6)
1528 fu_out_cont:
1529
1530         clr.l           %d0
1531         mov.b           FPCR_MODE(%a6),%d0      # fetch rnd mode/prec
1532
1533         lea             FP_SRC(%a6),%a0         # pass ptr to src operand
1534
1535         mov.l           (%a6),EXC_A6(%a6)       # in case a6 changes
1536         bsr.l           fout                    # call fmove out routine
1537
1538 # Exceptions in order of precedence:
1539 #       BSUN    : none
1540 #       SNAN    : none
1541 #       OPERR   : fmove.{b,w,l} out of large UNNORM
1542 #       OVFL    : fmove.{s,d}
1543 #       UNFL    : fmove.{s,d,x}
1544 #       DZ      : none
1545 #       INEX2   : all
1546 #       INEX1   : none (packed doesn't travel through here)
1547
1548 # determine the highest priority exception(if any) set by the
1549 # emulation routine that has also been enabled by the user.
1550         mov.b           FPCR_ENABLE(%a6),%d0    # fetch exceptions enabled
1551         bne.w           fu_out_ena              # some are enabled
1552
1553 fu_out_done:
1554
1555         mov.l           EXC_A6(%a6),(%a6)       # in case a6 changed
1556
1557 # on extended precision opclass three instructions using pre-decrement or
1558 # post-increment addressing mode, the address register is not updated. is the
1559 # address register was the stack pointer used from user mode, then let's update
1560 # it here. if it was used from supervisor mode, then we have to handle this
1561 # as a special case.
1562         btst            &0x5,EXC_SR(%a6)
1563         bne.b           fu_out_done_s
1564
1565         mov.l           EXC_A7(%a6),%a0         # restore a7
1566         mov.l           %a0,%usp
1567
1568 fu_out_done_cont:
1569         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1570         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1571         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1572
1573         unlk            %a6
1574
1575         btst            &0x7,(%sp)              # is trace on?
1576         bne.b           fu_out_trace            # yes
1577
1578         bra.l           _fpsp_done
1579
1580 # is the ea mode pre-decrement of the stack pointer from supervisor mode?
1581 # ("fmov.x fpm,-(a7)") if so,
1582 fu_out_done_s:
1583         cmpi.b          SPCOND_FLG(%a6),&mda7_flg
1584         bne.b           fu_out_done_cont
1585
1586 # the extended precision result is still in fp0. but, we need to save it
1587 # somewhere on the stack until we can copy it to its final resting place.
1588 # here, we're counting on the top of the stack to be the old place-holders
1589 # for fp0/fp1 which have already been restored. that way, we can write
1590 # over those destinations with the shifted stack frame.
1591         fmovm.x         &0x80,FP_SRC(%a6)       # put answer on stack
1592
1593         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1594         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1595         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1596
1597         mov.l           (%a6),%a6               # restore frame pointer
1598
1599         mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1600         mov.l           LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1601
1602 # now, copy the result to the proper place on the stack
1603         mov.l           LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1604         mov.l           LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1605         mov.l           LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1606
1607         add.l           &LOCAL_SIZE-0x8,%sp
1608
1609         btst            &0x7,(%sp)
1610         bne.b           fu_out_trace
1611
1612         bra.l           _fpsp_done
1613
1614 fu_out_ena:
1615         and.b           FPSR_EXCEPT(%a6),%d0    # keep only ones enabled
1616         bfffo           %d0{&24:&8},%d0         # find highest priority exception
1617         bne.b           fu_out_exc              # there is at least one set
1618
1619 # no exceptions were set.
1620 # if a disabled overflow occurred and inexact was enabled but the result
1621 # was exact, then a branch to _real_inex() is made.
1622         btst            &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1623         beq.w           fu_out_done             # no
1624
1625 fu_out_ovflchk:
1626         btst            &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1627         beq.w           fu_out_done             # no
1628         bra.w           fu_inex                 # yes
1629
1630 #
1631 # The fp move out that took the "Unimplemented Data Type" exception was
1632 # being traced. Since the stack frames are similar, get the "current" PC
1633 # from FPIAR and put it in the trace stack frame then jump to _real_trace().
1634 #
1635 #                 UNSUPP FRAME             TRACE FRAME
1636 #               *****************       *****************
1637 #               *      EA       *       *    Current    *
1638 #               *               *       *      PC       *
1639 #               *****************       *****************
1640 #               * 0x3 *  0x0dc  *       * 0x2 *  0x024  *
1641 #               *****************       *****************
1642 #               *     Next      *       *     Next      *
1643 #               *      PC       *       *      PC       *
1644 #               *****************       *****************
1645 #               *      SR       *       *      SR       *
1646 #               *****************       *****************
1647 #
1648 fu_out_trace:
1649         mov.w           &0x2024,0x6(%sp)
1650         fmov.l          %fpiar,0x8(%sp)
1651         bra.l           _real_trace
1652
1653 # an exception occurred and that exception was enabled.
1654 fu_out_exc:
1655         subi.l          &24,%d0                 # fix offset to be 0-8
1656
1657 # we don't mess with the existing fsave frame. just re-insert it and
1658 # jump to the "_real_{}()" handler...
1659         mov.w           (tbl_fu_out.b,%pc,%d0.w*2),%d0
1660         jmp             (tbl_fu_out.b,%pc,%d0.w*1)
1661
1662         swbeg           &0x8
1663 tbl_fu_out:
1664         short           tbl_fu_out      - tbl_fu_out    # BSUN can't happen
1665         short           tbl_fu_out      - tbl_fu_out    # SNAN can't happen
1666         short           fu_operr        - tbl_fu_out    # OPERR
1667         short           fu_ovfl         - tbl_fu_out    # OVFL
1668         short           fu_unfl         - tbl_fu_out    # UNFL
1669         short           tbl_fu_out      - tbl_fu_out    # DZ can't happen
1670         short           fu_inex         - tbl_fu_out    # INEX2
1671         short           tbl_fu_out      - tbl_fu_out    # INEX1 won't make it here
1672
1673 # for snan,operr,ovfl,unfl, src op is still in FP_SRC so just
1674 # frestore it.
1675 fu_snan:
1676         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1677         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1678         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1679
1680         mov.w           &0x30d8,EXC_VOFF(%a6)   # vector offset = 0xd8
1681         mov.w           &0xe006,2+FP_SRC(%a6)
1682
1683         frestore        FP_SRC(%a6)
1684
1685         unlk            %a6
1686
1687
1688         bra.l           _real_snan
1689
1690 fu_operr:
1691         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1692         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1693         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1694
1695         mov.w           &0x30d0,EXC_VOFF(%a6)   # vector offset = 0xd0
1696         mov.w           &0xe004,2+FP_SRC(%a6)
1697
1698         frestore        FP_SRC(%a6)
1699
1700         unlk            %a6
1701
1702
1703         bra.l           _real_operr
1704
1705 fu_ovfl:
1706         fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP to the stack
1707
1708         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1709         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1710         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1711
1712         mov.w           &0x30d4,EXC_VOFF(%a6)   # vector offset = 0xd4
1713         mov.w           &0xe005,2+FP_SRC(%a6)
1714
1715         frestore        FP_SRC(%a6)             # restore EXOP
1716
1717         unlk            %a6
1718
1719         bra.l           _real_ovfl
1720
1721 # underflow can happen for extended precision. extended precision opclass
1722 # three instruction exceptions don't update the stack pointer. so, if the
1723 # exception occurred from user mode, then simply update a7 and exit normally.
1724 # if the exception occurred from supervisor mode, check if
1725 fu_unfl:
1726         mov.l           EXC_A6(%a6),(%a6)       # restore a6
1727
1728         btst            &0x5,EXC_SR(%a6)
1729         bne.w           fu_unfl_s
1730
1731         mov.l           EXC_A7(%a6),%a0         # restore a7 whether we need
1732         mov.l           %a0,%usp                # to or not...
1733
1734 fu_unfl_cont:
1735         fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP to the stack
1736
1737         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1738         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1739         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1740
1741         mov.w           &0x30cc,EXC_VOFF(%a6)   # vector offset = 0xcc
1742         mov.w           &0xe003,2+FP_SRC(%a6)
1743
1744         frestore        FP_SRC(%a6)             # restore EXOP
1745
1746         unlk            %a6
1747
1748         bra.l           _real_unfl
1749
1750 fu_unfl_s:
1751         cmpi.b          SPCOND_FLG(%a6),&mda7_flg # was the <ea> mode -(sp)?
1752         bne.b           fu_unfl_cont
1753
1754 # the extended precision result is still in fp0. but, we need to save it
1755 # somewhere on the stack until we can copy it to its final resting place
1756 # (where the exc frame is currently). make sure it's not at the top of the
1757 # frame or it will get overwritten when the exc stack frame is shifted "down".
1758         fmovm.x         &0x80,FP_SRC(%a6)       # put answer on stack
1759         fmovm.x         &0x40,FP_DST(%a6)       # put EXOP on stack
1760
1761         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1762         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1763         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1764
1765         mov.w           &0x30cc,EXC_VOFF(%a6)   # vector offset = 0xcc
1766         mov.w           &0xe003,2+FP_DST(%a6)
1767
1768         frestore        FP_DST(%a6)             # restore EXOP
1769
1770         mov.l           (%a6),%a6               # restore frame pointer
1771
1772         mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
1773         mov.l           LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
1774         mov.l           LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
1775
1776 # now, copy the result to the proper place on the stack
1777         mov.l           LOCAL_SIZE+FP_SRC_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
1778         mov.l           LOCAL_SIZE+FP_SRC_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
1779         mov.l           LOCAL_SIZE+FP_SRC_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
1780
1781         add.l           &LOCAL_SIZE-0x8,%sp
1782
1783         bra.l           _real_unfl
1784
1785 # fmove in and out enter here.
1786 fu_inex:
1787         fmovm.x         &0x40,FP_SRC(%a6)       # save EXOP to the stack
1788
1789         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1790         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1791         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1792
1793         mov.w           &0x30c4,EXC_VOFF(%a6)   # vector offset = 0xc4
1794         mov.w           &0xe001,2+FP_SRC(%a6)
1795
1796         frestore        FP_SRC(%a6)             # restore EXOP
1797
1798         unlk            %a6
1799
1800
1801         bra.l           _real_inex
1802
1803 #########################################################################
1804 #########################################################################
1805 fu_in_pack:
1806
1807
1808 # I'm not sure at this point what FPSR bits are valid for this instruction.
1809 # so, since the emulation routines re-create them anyways, zero exception field
1810         andi.l          &0x0ff00ff,USER_FPSR(%a6) # zero exception field
1811
1812         fmov.l          &0x0,%fpcr              # zero current control regs
1813         fmov.l          &0x0,%fpsr
1814
1815         bsr.l           get_packed              # fetch packed src operand
1816
1817         lea             FP_SRC(%a6),%a0         # pass ptr to src
1818         bsr.l           set_tag_x               # set src optype tag
1819
1820         mov.b           %d0,STAG(%a6)           # save src optype tag
1821
1822         bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1823
1824 # bit five of the fp extension word separates the monadic and dyadic operations
1825 # at this point
1826         btst            &0x5,1+EXC_CMDREG(%a6)  # is operation monadic or dyadic?
1827         beq.b           fu_extract_p            # monadic
1828         cmpi.b          1+EXC_CMDREG(%a6),&0x3a # is operation an ftst?
1829         beq.b           fu_extract_p            # yes, so it's monadic, too
1830
1831         bsr.l           load_fpn2               # load dst into FP_DST
1832
1833         lea             FP_DST(%a6),%a0         # pass: ptr to dst op
1834         bsr.l           set_tag_x               # tag the operand type
1835         cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
1836         bne.b           fu_op2_done_p           # no
1837         bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
1838 fu_op2_done_p:
1839         mov.b           %d0,DTAG(%a6)           # save dst optype tag
1840
1841 fu_extract_p:
1842         clr.l           %d0
1843         mov.b           FPCR_MODE(%a6),%d0      # fetch rnd mode/prec
1844
1845         bfextu          1+EXC_CMDREG(%a6){&1:&7},%d1 # extract extension
1846
1847         lea             FP_SRC(%a6),%a0
1848         lea             FP_DST(%a6),%a1
1849
1850         mov.l           (tbl_unsupp.l,%pc,%d1.l*4),%d1 # fetch routine addr
1851         jsr             (tbl_unsupp.l,%pc,%d1.l*1)
1852
1853 #
1854 # Exceptions in order of precedence:
1855 #       BSUN    : none
1856 #       SNAN    : all dyadic ops
1857 #       OPERR   : fsqrt(-NORM)
1858 #       OVFL    : all except ftst,fcmp
1859 #       UNFL    : all except ftst,fcmp
1860 #       DZ      : fdiv
1861 #       INEX2   : all except ftst,fcmp
1862 #       INEX1   : all
1863 #
1864
1865 # we determine the highest priority exception(if any) set by the
1866 # emulation routine that has also been enabled by the user.
1867         mov.b           FPCR_ENABLE(%a6),%d0    # fetch exceptions enabled
1868         bne.w           fu_in_ena_p             # some are enabled
1869
1870 fu_in_cont_p:
1871 # fcmp and ftst do not store any result.
1872         mov.b           1+EXC_CMDREG(%a6),%d0   # fetch extension
1873         andi.b          &0x38,%d0               # extract bits 3-5
1874         cmpi.b          %d0,&0x38               # is instr fcmp or ftst?
1875         beq.b           fu_in_exit_p            # yes
1876
1877         bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
1878         bsr.l           store_fpreg             # store the result
1879
1880 fu_in_exit_p:
1881
1882         btst            &0x5,EXC_SR(%a6)        # user or supervisor?
1883         bne.w           fu_in_exit_s_p          # supervisor
1884
1885         mov.l           EXC_A7(%a6),%a0         # update user a7
1886         mov.l           %a0,%usp
1887
1888 fu_in_exit_cont_p:
1889         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1890         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1891         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1892
1893         unlk            %a6                     # unravel stack frame
1894
1895         btst            &0x7,(%sp)              # is trace on?
1896         bne.w           fu_trace_p              # yes
1897
1898         bra.l           _fpsp_done              # exit to os
1899
1900 # the exception occurred in supervisor mode. check to see if the
1901 # addressing mode was (a7)+. if so, we'll need to shift the
1902 # stack frame "up".
1903 fu_in_exit_s_p:
1904         btst            &mia7_bit,SPCOND_FLG(%a6) # was ea mode (a7)+
1905         beq.b           fu_in_exit_cont_p       # no
1906
1907         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1908         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1909         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1910
1911         unlk            %a6                     # unravel stack frame
1912
1913 # shift the stack frame "up". we don't really care about the <ea> field.
1914         mov.l           0x4(%sp),0x10(%sp)
1915         mov.l           0x0(%sp),0xc(%sp)
1916         add.l           &0xc,%sp
1917
1918         btst            &0x7,(%sp)              # is trace on?
1919         bne.w           fu_trace_p              # yes
1920
1921         bra.l           _fpsp_done              # exit to os
1922
1923 fu_in_ena_p:
1924         and.b           FPSR_EXCEPT(%a6),%d0    # keep only ones enabled & set
1925         bfffo           %d0{&24:&8},%d0         # find highest priority exception
1926         bne.b           fu_in_exc_p             # at least one was set
1927
1928 #
1929 # No exceptions occurred that were also enabled. Now:
1930 #
1931 #       if (OVFL && ovfl_disabled && inexact_enabled) {
1932 #           branch to _real_inex() (even if the result was exact!);
1933 #       } else {
1934 #           save the result in the proper fp reg (unless the op is fcmp or ftst);
1935 #           return;
1936 #       }
1937 #
1938         btst            &ovfl_bit,FPSR_EXCEPT(%a6) # was overflow set?
1939         beq.w           fu_in_cont_p            # no
1940
1941 fu_in_ovflchk_p:
1942         btst            &inex2_bit,FPCR_ENABLE(%a6) # was inexact enabled?
1943         beq.w           fu_in_cont_p            # no
1944         bra.w           fu_in_exc_ovfl_p        # do _real_inex() now
1945
1946 #
1947 # An exception occurred and that exception was enabled:
1948 #
1949 #       shift enabled exception field into lo byte of d0;
1950 #       if (((INEX2 || INEX1) && inex_enabled && OVFL && ovfl_disabled) ||
1951 #           ((INEX2 || INEX1) && inex_enabled && UNFL && unfl_disabled)) {
1952 #               /*
1953 #                * this is the case where we must call _real_inex() now or else
1954 #                * there will be no other way to pass it the exceptional operand
1955 #                */
1956 #               call _real_inex();
1957 #       } else {
1958 #               restore exc state (SNAN||OPERR||OVFL||UNFL||DZ||INEX) into the FPU;
1959 #       }
1960 #
1961 fu_in_exc_p:
1962         subi.l          &24,%d0                 # fix offset to be 0-8
1963         cmpi.b          %d0,&0x6                # is exception INEX? (6 or 7)
1964         blt.b           fu_in_exc_exit_p        # no
1965
1966 # the enabled exception was inexact
1967         btst            &unfl_bit,FPSR_EXCEPT(%a6) # did disabled underflow occur?
1968         bne.w           fu_in_exc_unfl_p        # yes
1969         btst            &ovfl_bit,FPSR_EXCEPT(%a6) # did disabled overflow occur?
1970         bne.w           fu_in_exc_ovfl_p        # yes
1971
1972 # here, we insert the correct fsave status value into the fsave frame for the
1973 # corresponding exception. the operand in the fsave frame should be the original
1974 # src operand.
1975 # as a reminder for future predicted pain and agony, we are passing in fsave the
1976 # "non-skewed" operand for cases of sgl and dbl src INFs,NANs, and DENORMs.
1977 # this is INCORRECT for enabled SNAN which would give to the user the skewed SNAN!!!
1978 fu_in_exc_exit_p:
1979         btst            &0x5,EXC_SR(%a6)        # user or supervisor?
1980         bne.w           fu_in_exc_exit_s_p      # supervisor
1981
1982         mov.l           EXC_A7(%a6),%a0         # update user a7
1983         mov.l           %a0,%usp
1984
1985 fu_in_exc_exit_cont_p:
1986         mov.w           (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
1987
1988         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
1989         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
1990         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
1991
1992         frestore        FP_SRC(%a6)             # restore src op
1993
1994         unlk            %a6
1995
1996         btst            &0x7,(%sp)              # is trace enabled?
1997         bne.w           fu_trace_p              # yes
1998
1999         bra.l           _fpsp_done
2000
2001 tbl_except_p:
2002         short           0xe000,0xe006,0xe004,0xe005
2003         short           0xe003,0xe002,0xe001,0xe001
2004
2005 fu_in_exc_ovfl_p:
2006         mov.w           &0x3,%d0
2007         bra.w           fu_in_exc_exit_p
2008
2009 fu_in_exc_unfl_p:
2010         mov.w           &0x4,%d0
2011         bra.w           fu_in_exc_exit_p
2012
2013 fu_in_exc_exit_s_p:
2014         btst            &mia7_bit,SPCOND_FLG(%a6)
2015         beq.b           fu_in_exc_exit_cont_p
2016
2017         mov.w           (tbl_except_p.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2018
2019         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
2020         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2021         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2022
2023         frestore        FP_SRC(%a6)             # restore src op
2024
2025         unlk            %a6                     # unravel stack frame
2026
2027 # shift stack frame "up". who cares about <ea> field.
2028         mov.l           0x4(%sp),0x10(%sp)
2029         mov.l           0x0(%sp),0xc(%sp)
2030         add.l           &0xc,%sp
2031
2032         btst            &0x7,(%sp)              # is trace on?
2033         bne.b           fu_trace_p              # yes
2034
2035         bra.l           _fpsp_done              # exit to os
2036
2037 #
2038 # The opclass two PACKED instruction that took an "Unimplemented Data Type"
2039 # exception was being traced. Make the "current" PC the FPIAR and put it in the
2040 # trace stack frame then jump to _real_trace().
2041 #
2042 #                 UNSUPP FRAME             TRACE FRAME
2043 #               *****************       *****************
2044 #               *      EA       *       *    Current    *
2045 #               *               *       *      PC       *
2046 #               *****************       *****************
2047 #               * 0x2 * 0x0dc   *       * 0x2 *  0x024  *
2048 #               *****************       *****************
2049 #               *     Next      *       *     Next      *
2050 #               *      PC       *       *      PC       *
2051 #               *****************       *****************
2052 #               *      SR       *       *      SR       *
2053 #               *****************       *****************
2054 fu_trace_p:
2055         mov.w           &0x2024,0x6(%sp)
2056         fmov.l          %fpiar,0x8(%sp)
2057
2058         bra.l           _real_trace
2059
2060 #########################################################
2061 #########################################################
2062 fu_out_pack:
2063
2064
2065 # I'm not sure at this point what FPSR bits are valid for this instruction.
2066 # so, since the emulation routines re-create them anyways, zero exception field.
2067 # fmove out doesn't affect ccodes.
2068         and.l           &0xffff00ff,USER_FPSR(%a6) # zero exception field
2069
2070         fmov.l          &0x0,%fpcr              # zero current control regs
2071         fmov.l          &0x0,%fpsr
2072
2073         bfextu          EXC_CMDREG(%a6){&6:&3},%d0
2074         bsr.l           load_fpn1
2075
2076 # unlike other opclass 3, unimplemented data type exceptions, packed must be
2077 # able to detect all operand types.
2078         lea             FP_SRC(%a6),%a0
2079         bsr.l           set_tag_x               # tag the operand type
2080         cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
2081         bne.b           fu_op2_p                # no
2082         bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
2083
2084 fu_op2_p:
2085         mov.b           %d0,STAG(%a6)           # save src optype tag
2086
2087         clr.l           %d0
2088         mov.b           FPCR_MODE(%a6),%d0      # fetch rnd mode/prec
2089
2090         lea             FP_SRC(%a6),%a0         # pass ptr to src operand
2091
2092         mov.l           (%a6),EXC_A6(%a6)       # in case a6 changes
2093         bsr.l           fout                    # call fmove out routine
2094
2095 # Exceptions in order of precedence:
2096 #       BSUN    : no
2097 #       SNAN    : yes
2098 #       OPERR   : if ((k_factor > +17) || (dec. exp exceeds 3 digits))
2099 #       OVFL    : no
2100 #       UNFL    : no
2101 #       DZ      : no
2102 #       INEX2   : yes
2103 #       INEX1   : no
2104
2105 # determine the highest priority exception(if any) set by the
2106 # emulation routine that has also been enabled by the user.
2107         mov.b           FPCR_ENABLE(%a6),%d0    # fetch exceptions enabled
2108         bne.w           fu_out_ena_p            # some are enabled
2109
2110 fu_out_exit_p:
2111         mov.l           EXC_A6(%a6),(%a6)       # restore a6
2112
2113         btst            &0x5,EXC_SR(%a6)        # user or supervisor?
2114         bne.b           fu_out_exit_s_p         # supervisor
2115
2116         mov.l           EXC_A7(%a6),%a0         # update user a7
2117         mov.l           %a0,%usp
2118
2119 fu_out_exit_cont_p:
2120         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
2121         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2122         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2123
2124         unlk            %a6                     # unravel stack frame
2125
2126         btst            &0x7,(%sp)              # is trace on?
2127         bne.w           fu_trace_p              # yes
2128
2129         bra.l           _fpsp_done              # exit to os
2130
2131 # the exception occurred in supervisor mode. check to see if the
2132 # addressing mode was -(a7). if so, we'll need to shift the
2133 # stack frame "down".
2134 fu_out_exit_s_p:
2135         btst            &mda7_bit,SPCOND_FLG(%a6) # was ea mode -(a7)
2136         beq.b           fu_out_exit_cont_p      # no
2137
2138         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
2139         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2140         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2141
2142         mov.l           (%a6),%a6               # restore frame pointer
2143
2144         mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2145         mov.l           LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2146
2147 # now, copy the result to the proper place on the stack
2148         mov.l           LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+EXC_SR+0x0(%sp)
2149         mov.l           LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+EXC_SR+0x4(%sp)
2150         mov.l           LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+EXC_SR+0x8(%sp)
2151
2152         add.l           &LOCAL_SIZE-0x8,%sp
2153
2154         btst            &0x7,(%sp)
2155         bne.w           fu_trace_p
2156
2157         bra.l           _fpsp_done
2158
2159 fu_out_ena_p:
2160         and.b           FPSR_EXCEPT(%a6),%d0    # keep only ones enabled
2161         bfffo           %d0{&24:&8},%d0         # find highest priority exception
2162         beq.w           fu_out_exit_p
2163
2164         mov.l           EXC_A6(%a6),(%a6)       # restore a6
2165
2166 # an exception occurred and that exception was enabled.
2167 # the only exception possible on packed move out are INEX, OPERR, and SNAN.
2168 fu_out_exc_p:
2169         cmpi.b          %d0,&0x1a
2170         bgt.w           fu_inex_p2
2171         beq.w           fu_operr_p
2172
2173 fu_snan_p:
2174         btst            &0x5,EXC_SR(%a6)
2175         bne.b           fu_snan_s_p
2176
2177         mov.l           EXC_A7(%a6),%a0
2178         mov.l           %a0,%usp
2179         bra.w           fu_snan
2180
2181 fu_snan_s_p:
2182         cmpi.b          SPCOND_FLG(%a6),&mda7_flg
2183         bne.w           fu_snan
2184
2185 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2186 # the strategy is to move the exception frame "down" 12 bytes. then, we
2187 # can store the default result where the exception frame was.
2188         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
2189         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2190         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2191
2192         mov.w           &0x30d8,EXC_VOFF(%a6)   # vector offset = 0xd0
2193         mov.w           &0xe006,2+FP_SRC(%a6)   # set fsave status
2194
2195         frestore        FP_SRC(%a6)             # restore src operand
2196
2197         mov.l           (%a6),%a6               # restore frame pointer
2198
2199         mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2200         mov.l           LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2201         mov.l           LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2202
2203 # now, we copy the default result to its proper location
2204         mov.l           LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2205         mov.l           LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2206         mov.l           LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2207
2208         add.l           &LOCAL_SIZE-0x8,%sp
2209
2210
2211         bra.l           _real_snan
2212
2213 fu_operr_p:
2214         btst            &0x5,EXC_SR(%a6)
2215         bne.w           fu_operr_p_s
2216
2217         mov.l           EXC_A7(%a6),%a0
2218         mov.l           %a0,%usp
2219         bra.w           fu_operr
2220
2221 fu_operr_p_s:
2222         cmpi.b          SPCOND_FLG(%a6),&mda7_flg
2223         bne.w           fu_operr
2224
2225 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2226 # the strategy is to move the exception frame "down" 12 bytes. then, we
2227 # can store the default result where the exception frame was.
2228         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
2229         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2230         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2231
2232         mov.w           &0x30d0,EXC_VOFF(%a6)   # vector offset = 0xd0
2233         mov.w           &0xe004,2+FP_SRC(%a6)   # set fsave status
2234
2235         frestore        FP_SRC(%a6)             # restore src operand
2236
2237         mov.l           (%a6),%a6               # restore frame pointer
2238
2239         mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2240         mov.l           LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2241         mov.l           LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2242
2243 # now, we copy the default result to its proper location
2244         mov.l           LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2245         mov.l           LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2246         mov.l           LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2247
2248         add.l           &LOCAL_SIZE-0x8,%sp
2249
2250
2251         bra.l           _real_operr
2252
2253 fu_inex_p2:
2254         btst            &0x5,EXC_SR(%a6)
2255         bne.w           fu_inex_s_p2
2256
2257         mov.l           EXC_A7(%a6),%a0
2258         mov.l           %a0,%usp
2259         bra.w           fu_inex
2260
2261 fu_inex_s_p2:
2262         cmpi.b          SPCOND_FLG(%a6),&mda7_flg
2263         bne.w           fu_inex
2264
2265 # the instruction was "fmove.p fpn,-(a7)" from supervisor mode.
2266 # the strategy is to move the exception frame "down" 12 bytes. then, we
2267 # can store the default result where the exception frame was.
2268         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0/fp1
2269         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2270         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2271
2272         mov.w           &0x30c4,EXC_VOFF(%a6)   # vector offset = 0xc4
2273         mov.w           &0xe001,2+FP_SRC(%a6)   # set fsave status
2274
2275         frestore        FP_SRC(%a6)             # restore src operand
2276
2277         mov.l           (%a6),%a6               # restore frame pointer
2278
2279         mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
2280         mov.l           LOCAL_SIZE+2+EXC_PC(%sp),LOCAL_SIZE+2+EXC_PC-0xc(%sp)
2281         mov.l           LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
2282
2283 # now, we copy the default result to its proper location
2284         mov.l           LOCAL_SIZE+FP_DST_EX(%sp),LOCAL_SIZE+0x4(%sp)
2285         mov.l           LOCAL_SIZE+FP_DST_HI(%sp),LOCAL_SIZE+0x8(%sp)
2286         mov.l           LOCAL_SIZE+FP_DST_LO(%sp),LOCAL_SIZE+0xc(%sp)
2287
2288         add.l           &LOCAL_SIZE-0x8,%sp
2289
2290
2291         bra.l           _real_inex
2292
2293 #########################################################################
2294
2295 #
2296 # if we're stuffing a source operand back into an fsave frame then we
2297 # have to make sure that for single or double source operands that the
2298 # format stuffed is as weird as the hardware usually makes it.
2299 #
2300         global          funimp_skew
2301 funimp_skew:
2302         bfextu          EXC_EXTWORD(%a6){&3:&3},%d0 # extract src specifier
2303         cmpi.b          %d0,&0x1                # was src sgl?
2304         beq.b           funimp_skew_sgl         # yes
2305         cmpi.b          %d0,&0x5                # was src dbl?
2306         beq.b           funimp_skew_dbl         # yes
2307         rts
2308
2309 funimp_skew_sgl:
2310         mov.w           FP_SRC_EX(%a6),%d0      # fetch DENORM exponent
2311         andi.w          &0x7fff,%d0             # strip sign
2312         beq.b           funimp_skew_sgl_not
2313         cmpi.w          %d0,&0x3f80
2314         bgt.b           funimp_skew_sgl_not
2315         neg.w           %d0                     # make exponent negative
2316         addi.w          &0x3f81,%d0             # find amt to shift
2317         mov.l           FP_SRC_HI(%a6),%d1      # fetch DENORM hi(man)
2318         lsr.l           %d0,%d1                 # shift it
2319         bset            &31,%d1                 # set j-bit
2320         mov.l           %d1,FP_SRC_HI(%a6)      # insert new hi(man)
2321         andi.w          &0x8000,FP_SRC_EX(%a6)  # clear old exponent
2322         ori.w           &0x3f80,FP_SRC_EX(%a6)  # insert new "skewed" exponent
2323 funimp_skew_sgl_not:
2324         rts
2325
2326 funimp_skew_dbl:
2327         mov.w           FP_SRC_EX(%a6),%d0      # fetch DENORM exponent
2328         andi.w          &0x7fff,%d0             # strip sign
2329         beq.b           funimp_skew_dbl_not
2330         cmpi.w          %d0,&0x3c00
2331         bgt.b           funimp_skew_dbl_not
2332
2333         tst.b           FP_SRC_EX(%a6)          # make "internal format"
2334         smi.b           0x2+FP_SRC(%a6)
2335         mov.w           %d0,FP_SRC_EX(%a6)      # insert exponent with cleared sign
2336         clr.l           %d0                     # clear g,r,s
2337         lea             FP_SRC(%a6),%a0         # pass ptr to src op
2338         mov.w           &0x3c01,%d1             # pass denorm threshold
2339         bsr.l           dnrm_lp                 # denorm it
2340         mov.w           &0x3c00,%d0             # new exponent
2341         tst.b           0x2+FP_SRC(%a6)         # is sign set?
2342         beq.b           fss_dbl_denorm_done     # no
2343         bset            &15,%d0                 # set sign
2344 fss_dbl_denorm_done:
2345         bset            &0x7,FP_SRC_HI(%a6)     # set j-bit
2346         mov.w           %d0,FP_SRC_EX(%a6)      # insert new exponent
2347 funimp_skew_dbl_not:
2348         rts
2349
2350 #########################################################################
2351         global          _mem_write2
2352 _mem_write2:
2353         btst            &0x5,EXC_SR(%a6)
2354         beq.l           _dmem_write
2355         mov.l           0x0(%a0),FP_DST_EX(%a6)
2356         mov.l           0x4(%a0),FP_DST_HI(%a6)
2357         mov.l           0x8(%a0),FP_DST_LO(%a6)
2358         clr.l           %d1
2359         rts
2360
2361 #########################################################################
2362 # XDEF **************************************************************** #
2363 #       _fpsp_effadd(): 060FPSP entry point for FP "Unimplemented       #
2364 #                       effective address" exception.                   #
2365 #                                                                       #
2366 #       This handler should be the first code executed upon taking the  #
2367 #       FP Unimplemented Effective Address exception in an operating    #
2368 #       system.                                                         #
2369 #                                                                       #
2370 # XREF **************************************************************** #
2371 #       _imem_read_long() - read instruction longword                   #
2372 #       fix_skewed_ops() - adjust src operand in fsave frame            #
2373 #       set_tag_x() - determine optype of src/dst operands              #
2374 #       store_fpreg() - store opclass 0 or 2 result to FP regfile       #
2375 #       unnorm_fix() - change UNNORM operands to NORM or ZERO           #
2376 #       load_fpn2() - load dst operand from FP regfile                  #
2377 #       tbl_unsupp - add of table of emulation routines for opclass 0,2 #
2378 #       decbin() - convert packed data to FP binary data                #
2379 #       _real_fpu_disabled() - "callout" for "FPU disabled" exception   #
2380 #       _real_access() - "callout" for access error exception           #
2381 #       _mem_read() - read extended immediate operand from memory       #
2382 #       _fpsp_done() - "callout" for exit; work all done                #
2383 #       _real_trace() - "callout" for Trace enabled exception           #
2384 #       fmovm_dynamic() - emulate dynamic fmovm instruction             #
2385 #       fmovm_ctrl() - emulate fmovm control instruction                #
2386 #                                                                       #
2387 # INPUT *************************************************************** #
2388 #       - The system stack contains the "Unimplemented <ea>" stk frame  #
2389 #                                                                       #
2390 # OUTPUT ************************************************************** #
2391 #       If access error:                                                #
2392 #       - The system stack is changed to an access error stack frame    #
2393 #       If FPU disabled:                                                #
2394 #       - The system stack is changed to an FPU disabled stack frame    #
2395 #       If Trace exception enabled:                                     #
2396 #       - The system stack is changed to a Trace exception stack frame  #
2397 #       Else: (normal case)                                             #
2398 #       - None (correct result has been stored as appropriate)          #
2399 #                                                                       #
2400 # ALGORITHM *********************************************************** #
2401 #       This exception handles 3 types of operations:                   #
2402 # (1) FP Instructions using extended precision or packed immediate      #
2403 #     addressing mode.                                                  #
2404 # (2) The "fmovm.x" instruction w/ dynamic register specification.      #
2405 # (3) The "fmovm.l" instruction w/ 2 or 3 control registers.            #
2406 #                                                                       #
2407 #       For immediate data operations, the data is read in w/ a         #
2408 # _mem_read() "callout", converted to FP binary (if packed), and used   #
2409 # as the source operand to the instruction specified by the instruction #
2410 # word. If no FP exception should be reported ads a result of the       #
2411 # emulation, then the result is stored to the destination register and  #
2412 # the handler exits through _fpsp_done(). If an enabled exc has been    #
2413 # signalled as a result of emulation, then an fsave state frame         #
2414 # corresponding to the FP exception type must be entered into the 060   #
2415 # FPU before exiting. In either the enabled or disabled cases, we       #
2416 # must also check if a Trace exception is pending, in which case, we    #
2417 # must create a Trace exception stack frame from the current exception  #
2418 # stack frame. If no Trace is pending, we simply exit through           #
2419 # _fpsp_done().                                                         #
2420 #       For "fmovm.x", call the routine fmovm_dynamic() which will      #
2421 # decode and emulate the instruction. No FP exceptions can be pending   #
2422 # as a result of this operation emulation. A Trace exception can be     #
2423 # pending, though, which means the current stack frame must be changed  #
2424 # to a Trace stack frame and an exit made through _real_trace().        #
2425 # For the case of "fmovm.x Dn,-(a7)", where the offending instruction   #
2426 # was executed from supervisor mode, this handler must store the FP     #
2427 # register file values to the system stack by itself since              #
2428 # fmovm_dynamic() can't handle this. A normal exit is made through      #
2429 # fpsp_done().                                                          #
2430 #       For "fmovm.l", fmovm_ctrl() is used to emulate the instruction. #
2431 # Again, a Trace exception may be pending and an exit made through      #
2432 # _real_trace(). Else, a normal exit is made through _fpsp_done().      #
2433 #                                                                       #
2434 #       Before any of the above is attempted, it must be checked to     #
2435 # see if the FPU is disabled. Since the "Unimp <ea>" exception is taken #
2436 # before the "FPU disabled" exception, but the "FPU disabled" exception #
2437 # has higher priority, we check the disabled bit in the PCR. If set,    #
2438 # then we must create an 8 word "FPU disabled" exception stack frame    #
2439 # from the current 4 word exception stack frame. This includes          #
2440 # reproducing the effective address of the instruction to put on the    #
2441 # new stack frame.                                                      #
2442 #                                                                       #
2443 #       In the process of all emulation work, if a _mem_read()          #
2444 # "callout" returns a failing result indicating an access error, then   #
2445 # we must create an access error stack frame from the current stack     #
2446 # frame. This information includes a faulting address and a fault-      #
2447 # status-longword. These are created within this handler.               #
2448 #                                                                       #
2449 #########################################################################
2450
2451         global          _fpsp_effadd
2452 _fpsp_effadd:
2453
2454 # This exception type takes priority over the "Line F Emulator"
2455 # exception. Therefore, the FPU could be disabled when entering here.
2456 # So, we must check to see if it's disabled and handle that case separately.
2457         mov.l           %d0,-(%sp)              # save d0
2458         movc            %pcr,%d0                # load proc cr
2459         btst            &0x1,%d0                # is FPU disabled?
2460         bne.w           iea_disabled            # yes
2461         mov.l           (%sp)+,%d0              # restore d0
2462
2463         link            %a6,&-LOCAL_SIZE        # init stack frame
2464
2465         movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
2466         fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
2467         fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
2468
2469 # PC of instruction that took the exception is the PC in the frame
2470         mov.l           EXC_PC(%a6),EXC_EXTWPTR(%a6)
2471
2472         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
2473         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
2474         bsr.l           _imem_read_long         # fetch the instruction words
2475         mov.l           %d0,EXC_OPWORD(%a6)     # store OPWORD and EXTWORD
2476
2477 #########################################################################
2478
2479         tst.w           %d0                     # is operation fmovem?
2480         bmi.w           iea_fmovm               # yes
2481
2482 #
2483 # here, we will have:
2484 #       fabs    fdabs   fsabs           facos           fmod
2485 #       fadd    fdadd   fsadd           fasin           frem
2486 #       fcmp                            fatan           fscale
2487 #       fdiv    fddiv   fsdiv           fatanh          fsin
2488 #       fint                            fcos            fsincos
2489 #       fintrz                          fcosh           fsinh
2490 #       fmove   fdmove  fsmove          fetox           ftan
2491 #       fmul    fdmul   fsmul           fetoxm1         ftanh
2492 #       fneg    fdneg   fsneg           fgetexp         ftentox
2493 #       fsgldiv                         fgetman         ftwotox
2494 #       fsglmul                         flog10
2495 #       fsqrt                           flog2
2496 #       fsub    fdsub   fssub           flogn
2497 #       ftst                            flognp1
2498 # which can all use f<op>.{x,p}
2499 # so, now it's immediate data extended precision AND PACKED FORMAT!
2500 #
2501 iea_op:
2502         andi.l          &0x00ff00ff,USER_FPSR(%a6)
2503
2504         btst            &0xa,%d0                # is src fmt x or p?
2505         bne.b           iea_op_pack             # packed
2506
2507
2508         mov.l           EXC_EXTWPTR(%a6),%a0    # pass: ptr to #<data>
2509         lea             FP_SRC(%a6),%a1         # pass: ptr to super addr
2510         mov.l           &0xc,%d0                # pass: 12 bytes
2511         bsr.l           _imem_read              # read extended immediate
2512
2513         tst.l           %d1                     # did ifetch fail?
2514         bne.w           iea_iacc                # yes
2515
2516         bra.b           iea_op_setsrc
2517
2518 iea_op_pack:
2519
2520         mov.l           EXC_EXTWPTR(%a6),%a0    # pass: ptr to #<data>
2521         lea             FP_SRC(%a6),%a1         # pass: ptr to super dst
2522         mov.l           &0xc,%d0                # pass: 12 bytes
2523         bsr.l           _imem_read              # read packed operand
2524
2525         tst.l           %d1                     # did ifetch fail?
2526         bne.w           iea_iacc                # yes
2527
2528 # The packed operand is an INF or a NAN if the exponent field is all ones.
2529         bfextu          FP_SRC(%a6){&1:&15},%d0 # get exp
2530         cmpi.w          %d0,&0x7fff             # INF or NAN?
2531         beq.b           iea_op_setsrc           # operand is an INF or NAN
2532
2533 # The packed operand is a zero if the mantissa is all zero, else it's
2534 # a normal packed op.
2535         mov.b           3+FP_SRC(%a6),%d0       # get byte 4
2536         andi.b          &0x0f,%d0               # clear all but last nybble
2537         bne.b           iea_op_gp_not_spec      # not a zero
2538         tst.l           FP_SRC_HI(%a6)          # is lw 2 zero?
2539         bne.b           iea_op_gp_not_spec      # not a zero
2540         tst.l           FP_SRC_LO(%a6)          # is lw 3 zero?
2541         beq.b           iea_op_setsrc           # operand is a ZERO
2542 iea_op_gp_not_spec:
2543         lea             FP_SRC(%a6),%a0         # pass: ptr to packed op
2544         bsr.l           decbin                  # convert to extended
2545         fmovm.x         &0x80,FP_SRC(%a6)       # make this the srcop
2546
2547 iea_op_setsrc:
2548         addi.l          &0xc,EXC_EXTWPTR(%a6)   # update extension word pointer
2549
2550 # FP_SRC now holds the src operand.
2551         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
2552         bsr.l           set_tag_x               # tag the operand type
2553         mov.b           %d0,STAG(%a6)           # could be ANYTHING!!!
2554         cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
2555         bne.b           iea_op_getdst           # no
2556         bsr.l           unnorm_fix              # yes; convert to NORM/DENORM/ZERO
2557         mov.b           %d0,STAG(%a6)           # set new optype tag
2558 iea_op_getdst:
2559         clr.b           STORE_FLG(%a6)          # clear "store result" boolean
2560
2561         btst            &0x5,1+EXC_CMDREG(%a6)  # is operation monadic or dyadic?
2562         beq.b           iea_op_extract          # monadic
2563         btst            &0x4,1+EXC_CMDREG(%a6)  # is operation fsincos,ftst,fcmp?
2564         bne.b           iea_op_spec             # yes
2565
2566 iea_op_loaddst:
2567         bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2568         bsr.l           load_fpn2               # load dst operand
2569
2570         lea             FP_DST(%a6),%a0         # pass: ptr to dst op
2571         bsr.l           set_tag_x               # tag the operand type
2572         mov.b           %d0,DTAG(%a6)           # could be ANYTHING!!!
2573         cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
2574         bne.b           iea_op_extract          # no
2575         bsr.l           unnorm_fix              # yes; convert to NORM/DENORM/ZERO
2576         mov.b           %d0,DTAG(%a6)           # set new optype tag
2577         bra.b           iea_op_extract
2578
2579 # the operation is fsincos, ftst, or fcmp. only fcmp is dyadic
2580 iea_op_spec:
2581         btst            &0x3,1+EXC_CMDREG(%a6)  # is operation fsincos?
2582         beq.b           iea_op_extract          # yes
2583 # now, we're left with ftst and fcmp. so, first let's tag them so that they don't
2584 # store a result. then, only fcmp will branch back and pick up a dst operand.
2585         st              STORE_FLG(%a6)          # don't store a final result
2586         btst            &0x1,1+EXC_CMDREG(%a6)  # is operation fcmp?
2587         beq.b           iea_op_loaddst          # yes
2588
2589 iea_op_extract:
2590         clr.l           %d0
2591         mov.b           FPCR_MODE(%a6),%d0      # pass: rnd mode,prec
2592
2593         mov.b           1+EXC_CMDREG(%a6),%d1
2594         andi.w          &0x007f,%d1             # extract extension
2595
2596         fmov.l          &0x0,%fpcr
2597         fmov.l          &0x0,%fpsr
2598
2599         lea             FP_SRC(%a6),%a0
2600         lea             FP_DST(%a6),%a1
2601
2602         mov.l           (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
2603         jsr             (tbl_unsupp.l,%pc,%d1.l*1)
2604
2605 #
2606 # Exceptions in order of precedence:
2607 #       BSUN    : none
2608 #       SNAN    : all operations
2609 #       OPERR   : all reg-reg or mem-reg operations that can normally operr
2610 #       OVFL    : same as OPERR
2611 #       UNFL    : same as OPERR
2612 #       DZ      : same as OPERR
2613 #       INEX2   : same as OPERR
2614 #       INEX1   : all packed immediate operations
2615 #
2616
2617 # we determine the highest priority exception(if any) set by the
2618 # emulation routine that has also been enabled by the user.
2619         mov.b           FPCR_ENABLE(%a6),%d0    # fetch exceptions enabled
2620         bne.b           iea_op_ena              # some are enabled
2621
2622 # now, we save the result, unless, of course, the operation was ftst or fcmp.
2623 # these don't save results.
2624 iea_op_save:
2625         tst.b           STORE_FLG(%a6)          # does this op store a result?
2626         bne.b           iea_op_exit1            # exit with no frestore
2627
2628 iea_op_store:
2629         bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # fetch dst regno
2630         bsr.l           store_fpreg             # store the result
2631
2632 iea_op_exit1:
2633         mov.l           EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2634         mov.l           EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2635
2636         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
2637         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2638         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2639
2640         unlk            %a6                     # unravel the frame
2641
2642         btst            &0x7,(%sp)              # is trace on?
2643         bne.w           iea_op_trace            # yes
2644
2645         bra.l           _fpsp_done              # exit to os
2646
2647 iea_op_ena:
2648         and.b           FPSR_EXCEPT(%a6),%d0    # keep only ones enable and set
2649         bfffo           %d0{&24:&8},%d0         # find highest priority exception
2650         bne.b           iea_op_exc              # at least one was set
2651
2652 # no exception occurred. now, did a disabled, exact overflow occur with inexact
2653 # enabled? if so, then we have to stuff an overflow frame into the FPU.
2654         btst            &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2655         beq.b           iea_op_save
2656
2657 iea_op_ovfl:
2658         btst            &inex2_bit,FPCR_ENABLE(%a6) # is inexact enabled?
2659         beq.b           iea_op_store            # no
2660         bra.b           iea_op_exc_ovfl         # yes
2661
2662 # an enabled exception occurred. we have to insert the exception type back into
2663 # the machine.
2664 iea_op_exc:
2665         subi.l          &24,%d0                 # fix offset to be 0-8
2666         cmpi.b          %d0,&0x6                # is exception INEX?
2667         bne.b           iea_op_exc_force        # no
2668
2669 # the enabled exception was inexact. so, if it occurs with an overflow
2670 # or underflow that was disabled, then we have to force an overflow or
2671 # underflow frame.
2672         btst            &ovfl_bit,FPSR_EXCEPT(%a6) # did overflow occur?
2673         bne.b           iea_op_exc_ovfl         # yes
2674         btst            &unfl_bit,FPSR_EXCEPT(%a6) # did underflow occur?
2675         bne.b           iea_op_exc_unfl         # yes
2676
2677 iea_op_exc_force:
2678         mov.w           (tbl_iea_except.b,%pc,%d0.w*2),2+FP_SRC(%a6)
2679         bra.b           iea_op_exit2            # exit with frestore
2680
2681 tbl_iea_except:
2682         short           0xe002, 0xe006, 0xe004, 0xe005
2683         short           0xe003, 0xe002, 0xe001, 0xe001
2684
2685 iea_op_exc_ovfl:
2686         mov.w           &0xe005,2+FP_SRC(%a6)
2687         bra.b           iea_op_exit2
2688
2689 iea_op_exc_unfl:
2690         mov.w           &0xe003,2+FP_SRC(%a6)
2691
2692 iea_op_exit2:
2693         mov.l           EXC_PC(%a6),USER_FPIAR(%a6) # set FPIAR to "Current PC"
2694         mov.l           EXC_EXTWPTR(%a6),EXC_PC(%a6) # set "Next PC" in exc frame
2695
2696         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
2697         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2698         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2699
2700         frestore        FP_SRC(%a6)             # restore exceptional state
2701
2702         unlk            %a6                     # unravel the frame
2703
2704         btst            &0x7,(%sp)              # is trace on?
2705         bne.b           iea_op_trace            # yes
2706
2707         bra.l           _fpsp_done              # exit to os
2708
2709 #
2710 # The opclass two instruction that took an "Unimplemented Effective Address"
2711 # exception was being traced. Make the "current" PC the FPIAR and put it in
2712 # the trace stack frame then jump to _real_trace().
2713 #
2714 #                UNIMP EA FRAME            TRACE FRAME
2715 #               *****************       *****************
2716 #               * 0x0 *  0x0f0  *       *    Current    *
2717 #               *****************       *      PC       *
2718 #               *    Current    *       *****************
2719 #               *      PC       *       * 0x2 *  0x024  *
2720 #               *****************       *****************
2721 #               *      SR       *       *     Next      *
2722 #               *****************       *      PC       *
2723 #                                       *****************
2724 #                                       *      SR       *
2725 #                                       *****************
2726 iea_op_trace:
2727         mov.l           (%sp),-(%sp)            # shift stack frame "down"
2728         mov.w           0x8(%sp),0x4(%sp)
2729         mov.w           &0x2024,0x6(%sp)        # stk fmt = 0x2; voff = 0x024
2730         fmov.l          %fpiar,0x8(%sp)         # "Current PC" is in FPIAR
2731
2732         bra.l           _real_trace
2733
2734 #########################################################################
2735 iea_fmovm:
2736         btst            &14,%d0                 # ctrl or data reg
2737         beq.w           iea_fmovm_ctrl
2738
2739 iea_fmovm_data:
2740
2741         btst            &0x5,EXC_SR(%a6)        # user or supervisor mode
2742         bne.b           iea_fmovm_data_s
2743
2744 iea_fmovm_data_u:
2745         mov.l           %usp,%a0
2746         mov.l           %a0,EXC_A7(%a6)         # store current a7
2747         bsr.l           fmovm_dynamic           # do dynamic fmovm
2748         mov.l           EXC_A7(%a6),%a0         # load possibly new a7
2749         mov.l           %a0,%usp                # update usp
2750         bra.w           iea_fmovm_exit
2751
2752 iea_fmovm_data_s:
2753         clr.b           SPCOND_FLG(%a6)
2754         lea             0x2+EXC_VOFF(%a6),%a0
2755         mov.l           %a0,EXC_A7(%a6)
2756         bsr.l           fmovm_dynamic           # do dynamic fmovm
2757
2758         cmpi.b          SPCOND_FLG(%a6),&mda7_flg
2759         beq.w           iea_fmovm_data_predec
2760         cmpi.b          SPCOND_FLG(%a6),&mia7_flg
2761         bne.w           iea_fmovm_exit
2762
2763 # right now, d0 = the size.
2764 # the data has been fetched from the supervisor stack, but we have not
2765 # incremented the stack pointer by the appropriate number of bytes.
2766 # do it here.
2767 iea_fmovm_data_postinc:
2768         btst            &0x7,EXC_SR(%a6)
2769         bne.b           iea_fmovm_data_pi_trace
2770
2771         mov.w           EXC_SR(%a6),(EXC_SR,%a6,%d0)
2772         mov.l           EXC_EXTWPTR(%a6),(EXC_PC,%a6,%d0)
2773         mov.w           &0x00f0,(EXC_VOFF,%a6,%d0)
2774
2775         lea             (EXC_SR,%a6,%d0),%a0
2776         mov.l           %a0,EXC_SR(%a6)
2777
2778         fmovm.x         EXC_FP0(%a6),&0xc0      # restore fp0-fp1
2779         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2780         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2781
2782         unlk            %a6
2783         mov.l           (%sp)+,%sp
2784         bra.l           _fpsp_done
2785
2786 iea_fmovm_data_pi_trace:
2787         mov.w           EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2788         mov.l           EXC_EXTWPTR(%a6),(EXC_PC-0x4,%a6,%d0)
2789         mov.w           &0x2024,(EXC_VOFF-0x4,%a6,%d0)
2790         mov.l           EXC_PC(%a6),(EXC_VOFF+0x2-0x4,%a6,%d0)
2791
2792         lea             (EXC_SR-0x4,%a6,%d0),%a0
2793         mov.l           %a0,EXC_SR(%a6)
2794
2795         fmovm.x         EXC_FP0(%a6),&0xc0      # restore fp0-fp1
2796         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2797         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2798
2799         unlk            %a6
2800         mov.l           (%sp)+,%sp
2801         bra.l           _real_trace
2802
2803 # right now, d1 = size and d0 = the strg.
2804 iea_fmovm_data_predec:
2805         mov.b           %d1,EXC_VOFF(%a6)       # store strg
2806         mov.b           %d0,0x1+EXC_VOFF(%a6)   # store size
2807
2808         fmovm.x         EXC_FP0(%a6),&0xc0      # restore fp0-fp1
2809         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2810         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2811
2812         mov.l           (%a6),-(%sp)            # make a copy of a6
2813         mov.l           %d0,-(%sp)              # save d0
2814         mov.l           %d1,-(%sp)              # save d1
2815         mov.l           EXC_EXTWPTR(%a6),-(%sp) # make a copy of Next PC
2816
2817         clr.l           %d0
2818         mov.b           0x1+EXC_VOFF(%a6),%d0   # fetch size
2819         neg.l           %d0                     # get negative of size
2820
2821         btst            &0x7,EXC_SR(%a6)        # is trace enabled?
2822         beq.b           iea_fmovm_data_p2
2823
2824         mov.w           EXC_SR(%a6),(EXC_SR-0x4,%a6,%d0)
2825         mov.l           EXC_PC(%a6),(EXC_VOFF-0x2,%a6,%d0)
2826         mov.l           (%sp)+,(EXC_PC-0x4,%a6,%d0)
2827         mov.w           &0x2024,(EXC_VOFF-0x4,%a6,%d0)
2828
2829         pea             (%a6,%d0)               # create final sp
2830         bra.b           iea_fmovm_data_p3
2831
2832 iea_fmovm_data_p2:
2833         mov.w           EXC_SR(%a6),(EXC_SR,%a6,%d0)
2834         mov.l           (%sp)+,(EXC_PC,%a6,%d0)
2835         mov.w           &0x00f0,(EXC_VOFF,%a6,%d0)
2836
2837         pea             (0x4,%a6,%d0)           # create final sp
2838
2839 iea_fmovm_data_p3:
2840         clr.l           %d1
2841         mov.b           EXC_VOFF(%a6),%d1       # fetch strg
2842
2843         tst.b           %d1
2844         bpl.b           fm_1
2845         fmovm.x         &0x80,(0x4+0x8,%a6,%d0)
2846         addi.l          &0xc,%d0
2847 fm_1:
2848         lsl.b           &0x1,%d1
2849         bpl.b           fm_2
2850         fmovm.x         &0x40,(0x4+0x8,%a6,%d0)
2851         addi.l          &0xc,%d0
2852 fm_2:
2853         lsl.b           &0x1,%d1
2854         bpl.b           fm_3
2855         fmovm.x         &0x20,(0x4+0x8,%a6,%d0)
2856         addi.l          &0xc,%d0
2857 fm_3:
2858         lsl.b           &0x1,%d1
2859         bpl.b           fm_4
2860         fmovm.x         &0x10,(0x4+0x8,%a6,%d0)
2861         addi.l          &0xc,%d0
2862 fm_4:
2863         lsl.b           &0x1,%d1
2864         bpl.b           fm_5
2865         fmovm.x         &0x08,(0x4+0x8,%a6,%d0)
2866         addi.l          &0xc,%d0
2867 fm_5:
2868         lsl.b           &0x1,%d1
2869         bpl.b           fm_6
2870         fmovm.x         &0x04,(0x4+0x8,%a6,%d0)
2871         addi.l          &0xc,%d0
2872 fm_6:
2873         lsl.b           &0x1,%d1
2874         bpl.b           fm_7
2875         fmovm.x         &0x02,(0x4+0x8,%a6,%d0)
2876         addi.l          &0xc,%d0
2877 fm_7:
2878         lsl.b           &0x1,%d1
2879         bpl.b           fm_end
2880         fmovm.x         &0x01,(0x4+0x8,%a6,%d0)
2881 fm_end:
2882         mov.l           0x4(%sp),%d1
2883         mov.l           0x8(%sp),%d0
2884         mov.l           0xc(%sp),%a6
2885         mov.l           (%sp)+,%sp
2886
2887         btst            &0x7,(%sp)              # is trace enabled?
2888         beq.l           _fpsp_done
2889         bra.l           _real_trace
2890
2891 #########################################################################
2892 iea_fmovm_ctrl:
2893
2894         bsr.l           fmovm_ctrl              # load ctrl regs
2895
2896 iea_fmovm_exit:
2897         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
2898         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
2899         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2900
2901         btst            &0x7,EXC_SR(%a6)        # is trace on?
2902         bne.b           iea_fmovm_trace         # yes
2903
2904         mov.l           EXC_EXTWPTR(%a6),EXC_PC(%a6) # set Next PC
2905
2906         unlk            %a6                     # unravel the frame
2907
2908         bra.l           _fpsp_done              # exit to os
2909
2910 #
2911 # The control reg instruction that took an "Unimplemented Effective Address"
2912 # exception was being traced. The "Current PC" for the trace frame is the
2913 # PC stacked for Unimp EA. The "Next PC" is in EXC_EXTWPTR.
2914 # After fixing the stack frame, jump to _real_trace().
2915 #
2916 #                UNIMP EA FRAME            TRACE FRAME
2917 #               *****************       *****************
2918 #               * 0x0 *  0x0f0  *       *    Current    *
2919 #               *****************       *      PC       *
2920 #               *    Current    *       *****************
2921 #               *      PC       *       * 0x2 *  0x024  *
2922 #               *****************       *****************
2923 #               *      SR       *       *     Next      *
2924 #               *****************       *      PC       *
2925 #                                       *****************
2926 #                                       *      SR       *
2927 #                                       *****************
2928 # this ain't a pretty solution, but it works:
2929 # -restore a6 (not with unlk)
2930 # -shift stack frame down over where old a6 used to be
2931 # -add LOCAL_SIZE to stack pointer
2932 iea_fmovm_trace:
2933         mov.l           (%a6),%a6               # restore frame pointer
2934         mov.w           EXC_SR+LOCAL_SIZE(%sp),0x0+LOCAL_SIZE(%sp)
2935         mov.l           EXC_PC+LOCAL_SIZE(%sp),0x8+LOCAL_SIZE(%sp)
2936         mov.l           EXC_EXTWPTR+LOCAL_SIZE(%sp),0x2+LOCAL_SIZE(%sp)
2937         mov.w           &0x2024,0x6+LOCAL_SIZE(%sp) # stk fmt = 0x2; voff = 0x024
2938         add.l           &LOCAL_SIZE,%sp         # clear stack frame
2939
2940         bra.l           _real_trace
2941
2942 #########################################################################
2943 # The FPU is disabled and so we should really have taken the "Line
2944 # F Emulator" exception. So, here we create an 8-word stack frame
2945 # from our 4-word stack frame. This means we must calculate the length
2946 # the faulting instruction to get the "next PC". This is trivial for
2947 # immediate operands but requires some extra work for fmovm dynamic
2948 # which can use most addressing modes.
2949 iea_disabled:
2950         mov.l           (%sp)+,%d0              # restore d0
2951
2952         link            %a6,&-LOCAL_SIZE        # init stack frame
2953
2954         movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
2955
2956 # PC of instruction that took the exception is the PC in the frame
2957         mov.l           EXC_PC(%a6),EXC_EXTWPTR(%a6)
2958         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
2959         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
2960         bsr.l           _imem_read_long         # fetch the instruction words
2961         mov.l           %d0,EXC_OPWORD(%a6)     # store OPWORD and EXTWORD
2962
2963         tst.w           %d0                     # is instr fmovm?
2964         bmi.b           iea_dis_fmovm           # yes
2965 # instruction is using an extended precision immediate operand. Therefore,
2966 # the total instruction length is 16 bytes.
2967 iea_dis_immed:
2968         mov.l           &0x10,%d0               # 16 bytes of instruction
2969         bra.b           iea_dis_cont
2970 iea_dis_fmovm:
2971         btst            &0xe,%d0                # is instr fmovm ctrl
2972         bne.b           iea_dis_fmovm_data      # no
2973 # the instruction is a fmovm.l with 2 or 3 registers.
2974         bfextu          %d0{&19:&3},%d1
2975         mov.l           &0xc,%d0
2976         cmpi.b          %d1,&0x7                # move all regs?
2977         bne.b           iea_dis_cont
2978         addq.l          &0x4,%d0
2979         bra.b           iea_dis_cont
2980 # the instruction is an fmovm.x dynamic which can use many addressing
2981 # modes and thus can have several different total instruction lengths.
2982 # call fmovm_calc_ea which will go through the ea calc process and,
2983 # as a by-product, will tell us how long the instruction is.
2984 iea_dis_fmovm_data:
2985         clr.l           %d0
2986         bsr.l           fmovm_calc_ea
2987         mov.l           EXC_EXTWPTR(%a6),%d0
2988         sub.l           EXC_PC(%a6),%d0
2989 iea_dis_cont:
2990         mov.w           %d0,EXC_VOFF(%a6)       # store stack shift value
2991
2992         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
2993
2994         unlk            %a6
2995
2996 # here, we actually create the 8-word frame from the 4-word frame,
2997 # with the "next PC" as additional info.
2998 # the <ea> field is let as undefined.
2999         subq.l          &0x8,%sp                # make room for new stack
3000         mov.l           %d0,-(%sp)              # save d0
3001         mov.w           0xc(%sp),0x4(%sp)       # move SR
3002         mov.l           0xe(%sp),0x6(%sp)       # move Current PC
3003         clr.l           %d0
3004         mov.w           0x12(%sp),%d0
3005         mov.l           0x6(%sp),0x10(%sp)      # move Current PC
3006         add.l           %d0,0x6(%sp)            # make Next PC
3007         mov.w           &0x402c,0xa(%sp)        # insert offset,frame format
3008         mov.l           (%sp)+,%d0              # restore d0
3009
3010         bra.l           _real_fpu_disabled
3011
3012 ##########
3013
3014 iea_iacc:
3015         movc            %pcr,%d0
3016         btst            &0x1,%d0
3017         bne.b           iea_iacc_cont
3018         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3019         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1 on stack
3020 iea_iacc_cont:
3021         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3022
3023         unlk            %a6
3024
3025         subq.w          &0x8,%sp                # make stack frame bigger
3026         mov.l           0x8(%sp),(%sp)          # store SR,hi(PC)
3027         mov.w           0xc(%sp),0x4(%sp)       # store lo(PC)
3028         mov.w           &0x4008,0x6(%sp)        # store voff
3029         mov.l           0x2(%sp),0x8(%sp)       # store ea
3030         mov.l           &0x09428001,0xc(%sp)    # store fslw
3031
3032 iea_acc_done:
3033         btst            &0x5,(%sp)              # user or supervisor mode?
3034         beq.b           iea_acc_done2           # user
3035         bset            &0x2,0xd(%sp)           # set supervisor TM bit
3036
3037 iea_acc_done2:
3038         bra.l           _real_access
3039
3040 iea_dacc:
3041         lea             -LOCAL_SIZE(%a6),%sp
3042
3043         movc            %pcr,%d1
3044         btst            &0x1,%d1
3045         bne.b           iea_dacc_cont
3046         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1 on stack
3047         fmovm.l         LOCAL_SIZE+USER_FPCR(%sp),%fpcr,%fpsr,%fpiar # restore ctrl regs
3048 iea_dacc_cont:
3049         mov.l           (%a6),%a6
3050
3051         mov.l           0x4+LOCAL_SIZE(%sp),-0x8+0x4+LOCAL_SIZE(%sp)
3052         mov.w           0x8+LOCAL_SIZE(%sp),-0x8+0x8+LOCAL_SIZE(%sp)
3053         mov.w           &0x4008,-0x8+0xa+LOCAL_SIZE(%sp)
3054         mov.l           %a0,-0x8+0xc+LOCAL_SIZE(%sp)
3055         mov.w           %d0,-0x8+0x10+LOCAL_SIZE(%sp)
3056         mov.w           &0x0001,-0x8+0x12+LOCAL_SIZE(%sp)
3057
3058         movm.l          LOCAL_SIZE+EXC_DREGS(%sp),&0x0303 # restore d0-d1/a0-a1
3059         add.w           &LOCAL_SIZE-0x4,%sp
3060
3061         bra.b           iea_acc_done
3062
3063 #########################################################################
3064 # XDEF **************************************************************** #
3065 #       _fpsp_operr(): 060FPSP entry point for FP Operr exception.      #
3066 #                                                                       #
3067 #       This handler should be the first code executed upon taking the  #
3068 #       FP Operand Error exception in an operating system.              #
3069 #                                                                       #
3070 # XREF **************************************************************** #
3071 #       _imem_read_long() - read instruction longword                   #
3072 #       fix_skewed_ops() - adjust src operand in fsave frame            #
3073 #       _real_operr() - "callout" to operating system operr handler     #
3074 #       _dmem_write_{byte,word,long}() - store data to mem (opclass 3)  #
3075 #       store_dreg_{b,w,l}() - store data to data regfile (opclass 3)   #
3076 #       facc_out_{b,w,l}() - store to memory took access error (opcl 3) #
3077 #                                                                       #
3078 # INPUT *************************************************************** #
3079 #       - The system stack contains the FP Operr exception frame        #
3080 #       - The fsave frame contains the source operand                   #
3081 #                                                                       #
3082 # OUTPUT ************************************************************** #
3083 #       No access error:                                                #
3084 #       - The system stack is unchanged                                 #
3085 #       - The fsave frame contains the adjusted src op for opclass 0,2  #
3086 #                                                                       #
3087 # ALGORITHM *********************************************************** #
3088 #       In a system where the FP Operr exception is enabled, the goal   #
3089 # is to get to the handler specified at _real_operr(). But, on the 060, #
3090 # for opclass zero and two instruction taking this exception, the       #
3091 # input operand in the fsave frame may be incorrect for some cases      #
3092 # and needs to be corrected. This handler calls fix_skewed_ops() to     #
3093 # do just this and then exits through _real_operr().                    #
3094 #       For opclass 3 instructions, the 060 doesn't store the default   #
3095 # operr result out to memory or data register file as it should.        #
3096 # This code must emulate the move out before finally exiting through    #
3097 # _real_inex(). The move out, if to memory, is performed using          #
3098 # _mem_write() "callout" routines that may return a failing result.     #
3099 # In this special case, the handler must exit through facc_out()        #
3100 # which creates an access error stack frame from the current operr      #
3101 # stack frame.                                                          #
3102 #                                                                       #
3103 #########################################################################
3104
3105         global          _fpsp_operr
3106 _fpsp_operr:
3107
3108         link.w          %a6,&-LOCAL_SIZE        # init stack frame
3109
3110         fsave           FP_SRC(%a6)             # grab the "busy" frame
3111
3112         movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
3113         fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3114         fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
3115
3116 # the FPIAR holds the "current PC" of the faulting instruction
3117         mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3118
3119         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
3120         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
3121         bsr.l           _imem_read_long         # fetch the instruction words
3122         mov.l           %d0,EXC_OPWORD(%a6)
3123
3124 ##############################################################################
3125
3126         btst            &13,%d0                 # is instr an fmove out?
3127         bne.b           foperr_out              # fmove out
3128
3129
3130 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3131 # this would be the case for opclass two operations with a source infinity or
3132 # denorm operand in the sgl or dbl format. NANs also become skewed, but can't
3133 # cause an operr so we don't need to check for them here.
3134         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
3135         bsr.l           fix_skewed_ops          # fix src op
3136
3137 foperr_exit:
3138         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
3139         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3140         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3141
3142         frestore        FP_SRC(%a6)
3143
3144         unlk            %a6
3145         bra.l           _real_operr
3146
3147 ########################################################################
3148
3149 #
3150 # the hardware does not save the default result to memory on enabled
3151 # operand error exceptions. we do this here before passing control to
3152 # the user operand error handler.
3153 #
3154 # byte, word, and long destination format operations can pass
3155 # through here. we simply need to test the sign of the src
3156 # operand and save the appropriate minimum or maximum integer value
3157 # to the effective address as pointed to by the stacked effective address.
3158 #
3159 # although packed opclass three operations can take operand error
3160 # exceptions, they won't pass through here since they are caught
3161 # first by the unsupported data format exception handler. that handler
3162 # sends them directly to _real_operr() if necessary.
3163 #
3164 foperr_out:
3165
3166         mov.w           FP_SRC_EX(%a6),%d1      # fetch exponent
3167         andi.w          &0x7fff,%d1
3168         cmpi.w          %d1,&0x7fff
3169         bne.b           foperr_out_not_qnan
3170 # the operand is either an infinity or a QNAN.
3171         tst.l           FP_SRC_LO(%a6)
3172         bne.b           foperr_out_qnan
3173         mov.l           FP_SRC_HI(%a6),%d1
3174         andi.l          &0x7fffffff,%d1
3175         beq.b           foperr_out_not_qnan
3176 foperr_out_qnan:
3177         mov.l           FP_SRC_HI(%a6),L_SCR1(%a6)
3178         bra.b           foperr_out_jmp
3179
3180 foperr_out_not_qnan:
3181         mov.l           &0x7fffffff,%d1
3182         tst.b           FP_SRC_EX(%a6)
3183         bpl.b           foperr_out_not_qnan2
3184         addq.l          &0x1,%d1
3185 foperr_out_not_qnan2:
3186         mov.l           %d1,L_SCR1(%a6)
3187
3188 foperr_out_jmp:
3189         bfextu          %d0{&19:&3},%d0         # extract dst format field
3190         mov.b           1+EXC_OPWORD(%a6),%d1   # extract <ea> mode,reg
3191         mov.w           (tbl_operr.b,%pc,%d0.w*2),%a0
3192         jmp             (tbl_operr.b,%pc,%a0)
3193
3194 tbl_operr:
3195         short           foperr_out_l - tbl_operr # long word integer
3196         short           tbl_operr    - tbl_operr # sgl prec shouldn't happen
3197         short           tbl_operr    - tbl_operr # ext prec shouldn't happen
3198         short           foperr_exit  - tbl_operr # packed won't enter here
3199         short           foperr_out_w - tbl_operr # word integer
3200         short           tbl_operr    - tbl_operr # dbl prec shouldn't happen
3201         short           foperr_out_b - tbl_operr # byte integer
3202         short           tbl_operr    - tbl_operr # packed won't enter here
3203
3204 foperr_out_b:
3205         mov.b           L_SCR1(%a6),%d0         # load positive default result
3206         cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3207         ble.b           foperr_out_b_save_dn    # yes
3208         mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3209         bsr.l           _dmem_write_byte        # write the default result
3210
3211         tst.l           %d1                     # did dstore fail?
3212         bne.l           facc_out_b              # yes
3213
3214         bra.w           foperr_exit
3215 foperr_out_b_save_dn:
3216         andi.w          &0x0007,%d1
3217         bsr.l           store_dreg_b            # store result to regfile
3218         bra.w           foperr_exit
3219
3220 foperr_out_w:
3221         mov.w           L_SCR1(%a6),%d0         # load positive default result
3222         cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3223         ble.b           foperr_out_w_save_dn    # yes
3224         mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3225         bsr.l           _dmem_write_word        # write the default result
3226
3227         tst.l           %d1                     # did dstore fail?
3228         bne.l           facc_out_w              # yes
3229
3230         bra.w           foperr_exit
3231 foperr_out_w_save_dn:
3232         andi.w          &0x0007,%d1
3233         bsr.l           store_dreg_w            # store result to regfile
3234         bra.w           foperr_exit
3235
3236 foperr_out_l:
3237         mov.l           L_SCR1(%a6),%d0         # load positive default result
3238         cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3239         ble.b           foperr_out_l_save_dn    # yes
3240         mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3241         bsr.l           _dmem_write_long        # write the default result
3242
3243         tst.l           %d1                     # did dstore fail?
3244         bne.l           facc_out_l              # yes
3245
3246         bra.w           foperr_exit
3247 foperr_out_l_save_dn:
3248         andi.w          &0x0007,%d1
3249         bsr.l           store_dreg_l            # store result to regfile
3250         bra.w           foperr_exit
3251
3252 #########################################################################
3253 # XDEF **************************************************************** #
3254 #       _fpsp_snan(): 060FPSP entry point for FP SNAN exception.        #
3255 #                                                                       #
3256 #       This handler should be the first code executed upon taking the  #
3257 #       FP Signalling NAN exception in an operating system.             #
3258 #                                                                       #
3259 # XREF **************************************************************** #
3260 #       _imem_read_long() - read instruction longword                   #
3261 #       fix_skewed_ops() - adjust src operand in fsave frame            #
3262 #       _real_snan() - "callout" to operating system SNAN handler       #
3263 #       _dmem_write_{byte,word,long}() - store data to mem (opclass 3)  #
3264 #       store_dreg_{b,w,l}() - store data to data regfile (opclass 3)   #
3265 #       facc_out_{b,w,l,d,x}() - store to mem took acc error (opcl 3)   #
3266 #       _calc_ea_fout() - fix An if <ea> is -() or ()+; also get <ea>   #
3267 #                                                                       #
3268 # INPUT *************************************************************** #
3269 #       - The system stack contains the FP SNAN exception frame         #
3270 #       - The fsave frame contains the source operand                   #
3271 #                                                                       #
3272 # OUTPUT ************************************************************** #
3273 #       No access error:                                                #
3274 #       - The system stack is unchanged                                 #
3275 #       - The fsave frame contains the adjusted src op for opclass 0,2  #
3276 #                                                                       #
3277 # ALGORITHM *********************************************************** #
3278 #       In a system where the FP SNAN exception is enabled, the goal    #
3279 # is to get to the handler specified at _real_snan(). But, on the 060,  #
3280 # for opclass zero and two instructions taking this exception, the      #
3281 # input operand in the fsave frame may be incorrect for some cases      #
3282 # and needs to be corrected. This handler calls fix_skewed_ops() to     #
3283 # do just this and then exits through _real_snan().                     #
3284 #       For opclass 3 instructions, the 060 doesn't store the default   #
3285 # SNAN result out to memory or data register file as it should.         #
3286 # This code must emulate the move out before finally exiting through    #
3287 # _real_snan(). The move out, if to memory, is performed using          #
3288 # _mem_write() "callout" routines that may return a failing result.     #
3289 # In this special case, the handler must exit through facc_out()        #
3290 # which creates an access error stack frame from the current SNAN       #
3291 # stack frame.                                                          #
3292 #       For the case of an extended precision opclass 3 instruction,    #
3293 # if the effective addressing mode was -() or ()+, then the address     #
3294 # register must get updated by calling _calc_ea_fout(). If the <ea>     #
3295 # was -(a7) from supervisor mode, then the exception frame currently    #
3296 # on the system stack must be carefully moved "down" to make room       #
3297 # for the operand being moved.                                          #
3298 #                                                                       #
3299 #########################################################################
3300
3301         global          _fpsp_snan
3302 _fpsp_snan:
3303
3304         link.w          %a6,&-LOCAL_SIZE        # init stack frame
3305
3306         fsave           FP_SRC(%a6)             # grab the "busy" frame
3307
3308         movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
3309         fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3310         fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
3311
3312 # the FPIAR holds the "current PC" of the faulting instruction
3313         mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3314
3315         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
3316         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
3317         bsr.l           _imem_read_long         # fetch the instruction words
3318         mov.l           %d0,EXC_OPWORD(%a6)
3319
3320 ##############################################################################
3321
3322         btst            &13,%d0                 # is instr an fmove out?
3323         bne.w           fsnan_out               # fmove out
3324
3325
3326 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3327 # this would be the case for opclass two operations with a source infinity or
3328 # denorm operand in the sgl or dbl format. NANs also become skewed and must be
3329 # fixed here.
3330         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
3331         bsr.l           fix_skewed_ops          # fix src op
3332
3333 fsnan_exit:
3334         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
3335         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3336         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3337
3338         frestore        FP_SRC(%a6)
3339
3340         unlk            %a6
3341         bra.l           _real_snan
3342
3343 ########################################################################
3344
3345 #
3346 # the hardware does not save the default result to memory on enabled
3347 # snan exceptions. we do this here before passing control to
3348 # the user snan handler.
3349 #
3350 # byte, word, long, and packed destination format operations can pass
3351 # through here. since packed format operations already were handled by
3352 # fpsp_unsupp(), then we need to do nothing else for them here.
3353 # for byte, word, and long, we simply need to test the sign of the src
3354 # operand and save the appropriate minimum or maximum integer value
3355 # to the effective address as pointed to by the stacked effective address.
3356 #
3357 fsnan_out:
3358
3359         bfextu          %d0{&19:&3},%d0         # extract dst format field
3360         mov.b           1+EXC_OPWORD(%a6),%d1   # extract <ea> mode,reg
3361         mov.w           (tbl_snan.b,%pc,%d0.w*2),%a0
3362         jmp             (tbl_snan.b,%pc,%a0)
3363
3364 tbl_snan:
3365         short           fsnan_out_l - tbl_snan # long word integer
3366         short           fsnan_out_s - tbl_snan # sgl prec shouldn't happen
3367         short           fsnan_out_x - tbl_snan # ext prec shouldn't happen
3368         short           tbl_snan    - tbl_snan # packed needs no help
3369         short           fsnan_out_w - tbl_snan # word integer
3370         short           fsnan_out_d - tbl_snan # dbl prec shouldn't happen
3371         short           fsnan_out_b - tbl_snan # byte integer
3372         short           tbl_snan    - tbl_snan # packed needs no help
3373
3374 fsnan_out_b:
3375         mov.b           FP_SRC_HI(%a6),%d0      # load upper byte of SNAN
3376         bset            &6,%d0                  # set SNAN bit
3377         cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3378         ble.b           fsnan_out_b_dn          # yes
3379         mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3380         bsr.l           _dmem_write_byte        # write the default result
3381
3382         tst.l           %d1                     # did dstore fail?
3383         bne.l           facc_out_b              # yes
3384
3385         bra.w           fsnan_exit
3386 fsnan_out_b_dn:
3387         andi.w          &0x0007,%d1
3388         bsr.l           store_dreg_b            # store result to regfile
3389         bra.w           fsnan_exit
3390
3391 fsnan_out_w:
3392         mov.w           FP_SRC_HI(%a6),%d0      # load upper word of SNAN
3393         bset            &14,%d0                 # set SNAN bit
3394         cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3395         ble.b           fsnan_out_w_dn          # yes
3396         mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3397         bsr.l           _dmem_write_word        # write the default result
3398
3399         tst.l           %d1                     # did dstore fail?
3400         bne.l           facc_out_w              # yes
3401
3402         bra.w           fsnan_exit
3403 fsnan_out_w_dn:
3404         andi.w          &0x0007,%d1
3405         bsr.l           store_dreg_w            # store result to regfile
3406         bra.w           fsnan_exit
3407
3408 fsnan_out_l:
3409         mov.l           FP_SRC_HI(%a6),%d0      # load upper longword of SNAN
3410         bset            &30,%d0                 # set SNAN bit
3411         cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3412         ble.b           fsnan_out_l_dn          # yes
3413         mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3414         bsr.l           _dmem_write_long        # write the default result
3415
3416         tst.l           %d1                     # did dstore fail?
3417         bne.l           facc_out_l              # yes
3418
3419         bra.w           fsnan_exit
3420 fsnan_out_l_dn:
3421         andi.w          &0x0007,%d1
3422         bsr.l           store_dreg_l            # store result to regfile
3423         bra.w           fsnan_exit
3424
3425 fsnan_out_s:
3426         cmpi.b          %d1,&0x7                # is <ea> mode a data reg?
3427         ble.b           fsnan_out_d_dn          # yes
3428         mov.l           FP_SRC_EX(%a6),%d0      # fetch SNAN sign
3429         andi.l          &0x80000000,%d0         # keep sign
3430         ori.l           &0x7fc00000,%d0         # insert new exponent,SNAN bit
3431         mov.l           FP_SRC_HI(%a6),%d1      # load mantissa
3432         lsr.l           &0x8,%d1                # shift mantissa for sgl
3433         or.l            %d1,%d0                 # create sgl SNAN
3434         mov.l           EXC_EA(%a6),%a0         # pass: <ea> of default result
3435         bsr.l           _dmem_write_long        # write the default result
3436
3437         tst.l           %d1                     # did dstore fail?
3438         bne.l           facc_out_l              # yes
3439
3440         bra.w           fsnan_exit
3441 fsnan_out_d_dn:
3442         mov.l           FP_SRC_EX(%a6),%d0      # fetch SNAN sign
3443         andi.l          &0x80000000,%d0         # keep sign
3444         ori.l           &0x7fc00000,%d0         # insert new exponent,SNAN bit
3445         mov.l           %d1,-(%sp)
3446         mov.l           FP_SRC_HI(%a6),%d1      # load mantissa
3447         lsr.l           &0x8,%d1                # shift mantissa for sgl
3448         or.l            %d1,%d0                 # create sgl SNAN
3449         mov.l           (%sp)+,%d1
3450         andi.w          &0x0007,%d1
3451         bsr.l           store_dreg_l            # store result to regfile
3452         bra.w           fsnan_exit
3453
3454 fsnan_out_d:
3455         mov.l           FP_SRC_EX(%a6),%d0      # fetch SNAN sign
3456         andi.l          &0x80000000,%d0         # keep sign
3457         ori.l           &0x7ff80000,%d0         # insert new exponent,SNAN bit
3458         mov.l           FP_SRC_HI(%a6),%d1      # load hi mantissa
3459         mov.l           %d0,FP_SCR0_EX(%a6)     # store to temp space
3460         mov.l           &11,%d0                 # load shift amt
3461         lsr.l           %d0,%d1
3462         or.l            %d1,FP_SCR0_EX(%a6)     # create dbl hi
3463         mov.l           FP_SRC_HI(%a6),%d1      # load hi mantissa
3464         andi.l          &0x000007ff,%d1
3465         ror.l           %d0,%d1
3466         mov.l           %d1,FP_SCR0_HI(%a6)     # store to temp space
3467         mov.l           FP_SRC_LO(%a6),%d1      # load lo mantissa
3468         lsr.l           %d0,%d1
3469         or.l            %d1,FP_SCR0_HI(%a6)     # create dbl lo
3470         lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
3471         mov.l           EXC_EA(%a6),%a1         # pass: dst addr
3472         movq.l          &0x8,%d0                # pass: size of 8 bytes
3473         bsr.l           _dmem_write             # write the default result
3474
3475         tst.l           %d1                     # did dstore fail?
3476         bne.l           facc_out_d              # yes
3477
3478         bra.w           fsnan_exit
3479
3480 # for extended precision, if the addressing mode is pre-decrement or
3481 # post-increment, then the address register did not get updated.
3482 # in addition, for pre-decrement, the stacked <ea> is incorrect.
3483 fsnan_out_x:
3484         clr.b           SPCOND_FLG(%a6)         # clear special case flag
3485
3486         mov.w           FP_SRC_EX(%a6),FP_SCR0_EX(%a6)
3487         clr.w           2+FP_SCR0(%a6)
3488         mov.l           FP_SRC_HI(%a6),%d0
3489         bset            &30,%d0
3490         mov.l           %d0,FP_SCR0_HI(%a6)
3491         mov.l           FP_SRC_LO(%a6),FP_SCR0_LO(%a6)
3492
3493         btst            &0x5,EXC_SR(%a6)        # supervisor mode exception?
3494         bne.b           fsnan_out_x_s           # yes
3495
3496         mov.l           %usp,%a0                # fetch user stack pointer
3497         mov.l           %a0,EXC_A7(%a6)         # save on stack for calc_ea()
3498         mov.l           (%a6),EXC_A6(%a6)
3499
3500         bsr.l           _calc_ea_fout           # find the correct ea,update An
3501         mov.l           %a0,%a1
3502         mov.l           %a0,EXC_EA(%a6)         # stack correct <ea>
3503
3504         mov.l           EXC_A7(%a6),%a0
3505         mov.l           %a0,%usp                # restore user stack pointer
3506         mov.l           EXC_A6(%a6),(%a6)
3507
3508 fsnan_out_x_save:
3509         lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
3510         movq.l          &0xc,%d0                # pass: size of extended
3511         bsr.l           _dmem_write             # write the default result
3512
3513         tst.l           %d1                     # did dstore fail?
3514         bne.l           facc_out_x              # yes
3515
3516         bra.w           fsnan_exit
3517
3518 fsnan_out_x_s:
3519         mov.l           (%a6),EXC_A6(%a6)
3520
3521         bsr.l           _calc_ea_fout           # find the correct ea,update An
3522         mov.l           %a0,%a1
3523         mov.l           %a0,EXC_EA(%a6)         # stack correct <ea>
3524
3525         mov.l           EXC_A6(%a6),(%a6)
3526
3527         cmpi.b          SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
3528         bne.b           fsnan_out_x_save        # no
3529
3530 # the operation was "fmove.x SNAN,-(a7)" from supervisor mode.
3531         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
3532         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3533         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3534
3535         frestore        FP_SRC(%a6)
3536
3537         mov.l           EXC_A6(%a6),%a6         # restore frame pointer
3538
3539         mov.l           LOCAL_SIZE+EXC_SR(%sp),LOCAL_SIZE+EXC_SR-0xc(%sp)
3540         mov.l           LOCAL_SIZE+EXC_PC+0x2(%sp),LOCAL_SIZE+EXC_PC+0x2-0xc(%sp)
3541         mov.l           LOCAL_SIZE+EXC_EA(%sp),LOCAL_SIZE+EXC_EA-0xc(%sp)
3542
3543         mov.l           LOCAL_SIZE+FP_SCR0_EX(%sp),LOCAL_SIZE+EXC_SR(%sp)
3544         mov.l           LOCAL_SIZE+FP_SCR0_HI(%sp),LOCAL_SIZE+EXC_PC+0x2(%sp)
3545         mov.l           LOCAL_SIZE+FP_SCR0_LO(%sp),LOCAL_SIZE+EXC_EA(%sp)
3546
3547         add.l           &LOCAL_SIZE-0x8,%sp
3548
3549         bra.l           _real_snan
3550
3551 #########################################################################
3552 # XDEF **************************************************************** #
3553 #       _fpsp_inex(): 060FPSP entry point for FP Inexact exception.     #
3554 #                                                                       #
3555 #       This handler should be the first code executed upon taking the  #
3556 #       FP Inexact exception in an operating system.                    #
3557 #                                                                       #
3558 # XREF **************************************************************** #
3559 #       _imem_read_long() - read instruction longword                   #
3560 #       fix_skewed_ops() - adjust src operand in fsave frame            #
3561 #       set_tag_x() - determine optype of src/dst operands              #
3562 #       store_fpreg() - store opclass 0 or 2 result to FP regfile       #
3563 #       unnorm_fix() - change UNNORM operands to NORM or ZERO           #
3564 #       load_fpn2() - load dst operand from FP regfile                  #
3565 #       smovcr() - emulate an "fmovcr" instruction                      #
3566 #       fout() - emulate an opclass 3 instruction                       #
3567 #       tbl_unsupp - add of table of emulation routines for opclass 0,2 #
3568 #       _real_inex() - "callout" to operating system inexact handler    #
3569 #                                                                       #
3570 # INPUT *************************************************************** #
3571 #       - The system stack contains the FP Inexact exception frame      #
3572 #       - The fsave frame contains the source operand                   #
3573 #                                                                       #
3574 # OUTPUT ************************************************************** #
3575 #       - The system stack is unchanged                                 #
3576 #       - The fsave frame contains the adjusted src op for opclass 0,2  #
3577 #                                                                       #
3578 # ALGORITHM *********************************************************** #
3579 #       In a system where the FP Inexact exception is enabled, the goal #
3580 # is to get to the handler specified at _real_inex(). But, on the 060,  #
3581 # for opclass zero and two instruction taking this exception, the       #
3582 # hardware doesn't store the correct result to the destination FP       #
3583 # register as did the '040 and '881/2. This handler must emulate the    #
3584 # instruction in order to get this value and then store it to the       #
3585 # correct register before calling _real_inex().                         #
3586 #       For opclass 3 instructions, the 060 doesn't store the default   #
3587 # inexact result out to memory or data register file as it should.      #
3588 # This code must emulate the move out by calling fout() before finally  #
3589 # exiting through _real_inex().                                         #
3590 #                                                                       #
3591 #########################################################################
3592
3593         global          _fpsp_inex
3594 _fpsp_inex:
3595
3596         link.w          %a6,&-LOCAL_SIZE        # init stack frame
3597
3598         fsave           FP_SRC(%a6)             # grab the "busy" frame
3599
3600         movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
3601         fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3602         fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
3603
3604 # the FPIAR holds the "current PC" of the faulting instruction
3605         mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3606
3607         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
3608         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
3609         bsr.l           _imem_read_long         # fetch the instruction words
3610         mov.l           %d0,EXC_OPWORD(%a6)
3611
3612 ##############################################################################
3613
3614         btst            &13,%d0                 # is instr an fmove out?
3615         bne.w           finex_out               # fmove out
3616
3617
3618 # the hardware, for "fabs" and "fneg" w/ a long source format, puts the
3619 # longword integer directly into the upper longword of the mantissa along
3620 # w/ an exponent value of 0x401e. we convert this to extended precision here.
3621         bfextu          %d0{&19:&3},%d0         # fetch instr size
3622         bne.b           finex_cont              # instr size is not long
3623         cmpi.w          FP_SRC_EX(%a6),&0x401e  # is exponent 0x401e?
3624         bne.b           finex_cont              # no
3625         fmov.l          &0x0,%fpcr
3626         fmov.l          FP_SRC_HI(%a6),%fp0     # load integer src
3627         fmov.x          %fp0,FP_SRC(%a6)        # store integer as extended precision
3628         mov.w           &0xe001,0x2+FP_SRC(%a6)
3629
3630 finex_cont:
3631         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
3632         bsr.l           fix_skewed_ops          # fix src op
3633
3634 # Here, we zero the ccode and exception byte field since we're going to
3635 # emulate the whole instruction. Notice, though, that we don't kill the
3636 # INEX1 bit. This is because a packed op has long since been converted
3637 # to extended before arriving here. Therefore, we need to retain the
3638 # INEX1 bit from when the operand was first converted.
3639         andi.l          &0x00ff01ff,USER_FPSR(%a6) # zero all but accured field
3640
3641         fmov.l          &0x0,%fpcr              # zero current control regs
3642         fmov.l          &0x0,%fpsr
3643
3644         bfextu          EXC_EXTWORD(%a6){&0:&6},%d1 # extract upper 6 of cmdreg
3645         cmpi.b          %d1,&0x17               # is op an fmovecr?
3646         beq.w           finex_fmovcr            # yes
3647
3648         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
3649         bsr.l           set_tag_x               # tag the operand type
3650         mov.b           %d0,STAG(%a6)           # maybe NORM,DENORM
3651
3652 # bits four and five of the fp extension word separate the monadic and dyadic
3653 # operations that can pass through fpsp_inex(). remember that fcmp and ftst
3654 # will never take this exception, but fsincos will.
3655         btst            &0x5,1+EXC_CMDREG(%a6)  # is operation monadic or dyadic?
3656         beq.b           finex_extract           # monadic
3657
3658         btst            &0x4,1+EXC_CMDREG(%a6)  # is operation an fsincos?
3659         bne.b           finex_extract           # yes
3660
3661         bfextu          EXC_CMDREG(%a6){&6:&3},%d0 # dyadic; load dst reg
3662         bsr.l           load_fpn2               # load dst into FP_DST
3663
3664         lea             FP_DST(%a6),%a0         # pass: ptr to dst op
3665         bsr.l           set_tag_x               # tag the operand type
3666         cmpi.b          %d0,&UNNORM             # is operand an UNNORM?
3667         bne.b           finex_op2_done          # no
3668         bsr.l           unnorm_fix              # yes; convert to NORM,DENORM,or ZERO
3669 finex_op2_done:
3670         mov.b           %d0,DTAG(%a6)           # save dst optype tag
3671
3672 finex_extract:
3673         clr.l           %d0
3674         mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec/mode
3675
3676         mov.b           1+EXC_CMDREG(%a6),%d1
3677         andi.w          &0x007f,%d1             # extract extension
3678
3679         lea             FP_SRC(%a6),%a0
3680         lea             FP_DST(%a6),%a1
3681
3682         mov.l           (tbl_unsupp.l,%pc,%d1.w*4),%d1 # fetch routine addr
3683         jsr             (tbl_unsupp.l,%pc,%d1.l*1)
3684
3685 # the operation has been emulated. the result is in fp0.
3686 finex_save:
3687         bfextu          EXC_CMDREG(%a6){&6:&3},%d0
3688         bsr.l           store_fpreg
3689
3690 finex_exit:
3691         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
3692         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3693         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3694
3695         frestore        FP_SRC(%a6)
3696
3697         unlk            %a6
3698         bra.l           _real_inex
3699
3700 finex_fmovcr:
3701         clr.l           %d0
3702         mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec,mode
3703         mov.b           1+EXC_CMDREG(%a6),%d1
3704         andi.l          &0x0000007f,%d1         # pass rom offset
3705         bsr.l           smovcr
3706         bra.b           finex_save
3707
3708 ########################################################################
3709
3710 #
3711 # the hardware does not save the default result to memory on enabled
3712 # inexact exceptions. we do this here before passing control to
3713 # the user inexact handler.
3714 #
3715 # byte, word, and long destination format operations can pass
3716 # through here. so can double and single precision.
3717 # although packed opclass three operations can take inexact
3718 # exceptions, they won't pass through here since they are caught
3719 # first by the unsupported data format exception handler. that handler
3720 # sends them directly to _real_inex() if necessary.
3721 #
3722 finex_out:
3723
3724         mov.b           &NORM,STAG(%a6)         # src is a NORM
3725
3726         clr.l           %d0
3727         mov.b           FPCR_MODE(%a6),%d0      # pass rnd prec,mode
3728
3729         andi.l          &0xffff00ff,USER_FPSR(%a6) # zero exception field
3730
3731         lea             FP_SRC(%a6),%a0         # pass ptr to src operand
3732
3733         bsr.l           fout                    # store the default result
3734
3735         bra.b           finex_exit
3736
3737 #########################################################################
3738 # XDEF **************************************************************** #
3739 #       _fpsp_dz(): 060FPSP entry point for FP DZ exception.            #
3740 #                                                                       #
3741 #       This handler should be the first code executed upon taking      #
3742 #       the FP DZ exception in an operating system.                     #
3743 #                                                                       #
3744 # XREF **************************************************************** #
3745 #       _imem_read_long() - read instruction longword from memory       #
3746 #       fix_skewed_ops() - adjust fsave operand                         #
3747 #       _real_dz() - "callout" exit point from FP DZ handler            #
3748 #                                                                       #
3749 # INPUT *************************************************************** #
3750 #       - The system stack contains the FP DZ exception stack.          #
3751 #       - The fsave frame contains the source operand.                  #
3752 #                                                                       #
3753 # OUTPUT ************************************************************** #
3754 #       - The system stack contains the FP DZ exception stack.          #
3755 #       - The fsave frame contains the adjusted source operand.         #
3756 #                                                                       #
3757 # ALGORITHM *********************************************************** #
3758 #       In a system where the DZ exception is enabled, the goal is to   #
3759 # get to the handler specified at _real_dz(). But, on the 060, when the #
3760 # exception is taken, the input operand in the fsave state frame may    #
3761 # be incorrect for some cases and need to be adjusted. So, this package #
3762 # adjusts the operand using fix_skewed_ops() and then branches to       #
3763 # _real_dz().                                                           #
3764 #                                                                       #
3765 #########################################################################
3766
3767         global          _fpsp_dz
3768 _fpsp_dz:
3769
3770         link.w          %a6,&-LOCAL_SIZE        # init stack frame
3771
3772         fsave           FP_SRC(%a6)             # grab the "busy" frame
3773
3774         movm.l          &0x0303,EXC_DREGS(%a6)  # save d0-d1/a0-a1
3775         fmovm.l         %fpcr,%fpsr,%fpiar,USER_FPCR(%a6) # save ctrl regs
3776         fmovm.x         &0xc0,EXC_FPREGS(%a6)   # save fp0-fp1 on stack
3777
3778 # the FPIAR holds the "current PC" of the faulting instruction
3779         mov.l           USER_FPIAR(%a6),EXC_EXTWPTR(%a6)
3780
3781         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
3782         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
3783         bsr.l           _imem_read_long         # fetch the instruction words
3784         mov.l           %d0,EXC_OPWORD(%a6)
3785
3786 ##############################################################################
3787
3788
3789 # here, we simply see if the operand in the fsave frame needs to be "unskewed".
3790 # this would be the case for opclass two operations with a source zero
3791 # in the sgl or dbl format.
3792         lea             FP_SRC(%a6),%a0         # pass: ptr to src op
3793         bsr.l           fix_skewed_ops          # fix src op
3794
3795 fdz_exit:
3796         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
3797         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
3798         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
3799
3800         frestore        FP_SRC(%a6)
3801
3802         unlk            %a6
3803         bra.l           _real_dz
3804
3805 #########################################################################
3806 # XDEF **************************************************************** #
3807 #       _fpsp_fline(): 060FPSP entry point for "Line F emulator"        #
3808 #                      exception when the "reduced" version of the      #
3809 #                      FPSP is implemented that does not emulate        #
3810 #                      FP unimplemented instructions.                   #
3811 #                                                                       #
3812 #       This handler should be the first code executed upon taking a    #
3813 #       "Line F Emulator" exception in an operating system integrating  #
3814 #       the reduced version of 060FPSP.                                 #
3815 #                                                                       #
3816 # XREF **************************************************************** #
3817 #       _real_fpu_disabled() - Handle "FPU disabled" exceptions         #
3818 #       _real_fline() - Handle all other cases (treated equally)        #
3819 #                                                                       #
3820 # INPUT *************************************************************** #
3821 #       - The system stack contains a "Line F Emulator" exception       #
3822 #         stack frame.                                                  #
3823 #                                                                       #
3824 # OUTPUT ************************************************************** #
3825 #       - The system stack is unchanged.                                #
3826 #                                                                       #
3827 # ALGORITHM *********************************************************** #
3828 #       When a "Line F Emulator" exception occurs in a system where     #
3829 # "FPU Unimplemented" instructions will not be emulated, the exception  #
3830 # can occur because then FPU is disabled or the instruction is to be    #
3831 # classifed as "Line F". This module determines which case exists and   #
3832 # calls the appropriate "callout".                                      #
3833 #                                                                       #
3834 #########################################################################
3835
3836         global          _fpsp_fline
3837 _fpsp_fline:
3838
3839 # check to see if the FPU is disabled. if so, jump to the OS entry
3840 # point for that condition.
3841         cmpi.w          0x6(%sp),&0x402c
3842         beq.l           _real_fpu_disabled
3843
3844         bra.l           _real_fline
3845
3846 #########################################################################
3847 # XDEF **************************************************************** #
3848 #       _dcalc_ea(): calc correct <ea> from <ea> stacked on exception   #
3849 #                                                                       #
3850 # XREF **************************************************************** #
3851 #       inc_areg() - increment an address register                      #
3852 #       dec_areg() - decrement an address register                      #
3853 #                                                                       #
3854 # INPUT *************************************************************** #
3855 #       d0 = number of bytes to adjust <ea> by                          #
3856 #                                                                       #
3857 # OUTPUT ************************************************************** #
3858 #       None                                                            #
3859 #                                                                       #
3860 # ALGORITHM *********************************************************** #
3861 # "Dummy" CALCulate Effective Address:                                  #
3862 #       The stacked <ea> for FP unimplemented instructions and opclass  #
3863 #       two packed instructions is correct with the exception of...     #
3864 #                                                                       #
3865 #       1) -(An)   : The register is not updated regardless of size.    #
3866 #                    Also, for extended precision and packed, the       #
3867 #                    stacked <ea> value is 8 bytes too big              #
3868 #       2) (An)+   : The register is not updated.                       #
3869 #       3) #<data> : The upper longword of the immediate operand is     #
3870 #                    stacked b,w,l and s sizes are completely stacked.  #
3871 #                    d,x, and p are not.                                #
3872 #                                                                       #
3873 #########################################################################
3874
3875         global          _dcalc_ea
3876 _dcalc_ea:
3877         mov.l           %d0, %a0                # move # bytes to %a0
3878
3879         mov.b           1+EXC_OPWORD(%a6), %d0  # fetch opcode word
3880         mov.l           %d0, %d1                # make a copy
3881
3882         andi.w          &0x38, %d0              # extract mode field
3883         andi.l          &0x7, %d1               # extract reg  field
3884
3885         cmpi.b          %d0,&0x18               # is mode (An)+ ?
3886         beq.b           dcea_pi                 # yes
3887
3888         cmpi.b          %d0,&0x20               # is mode -(An) ?
3889         beq.b           dcea_pd                 # yes
3890
3891         or.w            %d1,%d0                 # concat mode,reg
3892         cmpi.b          %d0,&0x3c               # is mode #<data>?
3893
3894         beq.b           dcea_imm                # yes
3895
3896         mov.l           EXC_EA(%a6),%a0         # return <ea>
3897         rts
3898
3899 # need to set immediate data flag here since we'll need to do
3900 # an imem_read to fetch this later.
3901 dcea_imm:
3902         mov.b           &immed_flg,SPCOND_FLG(%a6)
3903         lea             ([USER_FPIAR,%a6],0x4),%a0 # no; return <ea>
3904         rts
3905
3906 # here, the <ea> is stacked correctly. however, we must update the
3907 # address register...
3908 dcea_pi:
3909         mov.l           %a0,%d0                 # pass amt to inc by
3910         bsr.l           inc_areg                # inc addr register
3911
3912         mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
3913         rts
3914
3915 # the <ea> is stacked correctly for all but extended and packed which
3916 # the <ea>s are 8 bytes too large.
3917 # it would make no sense to have a pre-decrement to a7 in supervisor
3918 # mode so we don't even worry about this tricky case here : )
3919 dcea_pd:
3920         mov.l           %a0,%d0                 # pass amt to dec by
3921         bsr.l           dec_areg                # dec addr register
3922
3923         mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
3924
3925         cmpi.b          %d0,&0xc                # is opsize ext or packed?
3926         beq.b           dcea_pd2                # yes
3927         rts
3928 dcea_pd2:
3929         sub.l           &0x8,%a0                # correct <ea>
3930         mov.l           %a0,EXC_EA(%a6)         # put correct <ea> on stack
3931         rts
3932
3933 #########################################################################
3934 # XDEF **************************************************************** #
3935 #       _calc_ea_fout(): calculate correct stacked <ea> for extended    #
3936 #                        and packed data opclass 3 operations.          #
3937 #                                                                       #
3938 # XREF **************************************************************** #
3939 #       None                                                            #
3940 #                                                                       #
3941 # INPUT *************************************************************** #
3942 #       None                                                            #
3943 #                                                                       #
3944 # OUTPUT ************************************************************** #
3945 #       a0 = return correct effective address                           #
3946 #                                                                       #
3947 # ALGORITHM *********************************************************** #
3948 #       For opclass 3 extended and packed data operations, the <ea>     #
3949 # stacked for the exception is incorrect for -(an) and (an)+ addressing #
3950 # modes. Also, while we're at it, the index register itself must get    #
3951 # updated.                                                              #
3952 #       So, for -(an), we must subtract 8 off of the stacked <ea> value #
3953 # and return that value as the correct <ea> and store that value in An. #
3954 # For (an)+, the stacked <ea> is correct but we must adjust An by +12.  #
3955 #                                                                       #
3956 #########################################################################
3957
3958 # This calc_ea is currently used to retrieve the correct <ea>
3959 # for fmove outs of type extended and packed.
3960         global          _calc_ea_fout
3961 _calc_ea_fout:
3962         mov.b           1+EXC_OPWORD(%a6),%d0   # fetch opcode word
3963         mov.l           %d0,%d1                 # make a copy
3964
3965         andi.w          &0x38,%d0               # extract mode field
3966         andi.l          &0x7,%d1                # extract reg  field
3967
3968         cmpi.b          %d0,&0x18               # is mode (An)+ ?
3969         beq.b           ceaf_pi                 # yes
3970
3971         cmpi.b          %d0,&0x20               # is mode -(An) ?
3972         beq.w           ceaf_pd                 # yes
3973
3974         mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
3975         rts
3976
3977 # (An)+ : extended and packed fmove out
3978 #       : stacked <ea> is correct
3979 #       : "An" not updated
3980 ceaf_pi:
3981         mov.w           (tbl_ceaf_pi.b,%pc,%d1.w*2),%d1
3982         mov.l           EXC_EA(%a6),%a0
3983         jmp             (tbl_ceaf_pi.b,%pc,%d1.w*1)
3984
3985         swbeg           &0x8
3986 tbl_ceaf_pi:
3987         short           ceaf_pi0 - tbl_ceaf_pi
3988         short           ceaf_pi1 - tbl_ceaf_pi
3989         short           ceaf_pi2 - tbl_ceaf_pi
3990         short           ceaf_pi3 - tbl_ceaf_pi
3991         short           ceaf_pi4 - tbl_ceaf_pi
3992         short           ceaf_pi5 - tbl_ceaf_pi
3993         short           ceaf_pi6 - tbl_ceaf_pi
3994         short           ceaf_pi7 - tbl_ceaf_pi
3995
3996 ceaf_pi0:
3997         addi.l          &0xc,EXC_DREGS+0x8(%a6)
3998         rts
3999 ceaf_pi1:
4000         addi.l          &0xc,EXC_DREGS+0xc(%a6)
4001         rts
4002 ceaf_pi2:
4003         add.l           &0xc,%a2
4004         rts
4005 ceaf_pi3:
4006         add.l           &0xc,%a3
4007         rts
4008 ceaf_pi4:
4009         add.l           &0xc,%a4
4010         rts
4011 ceaf_pi5:
4012         add.l           &0xc,%a5
4013         rts
4014 ceaf_pi6:
4015         addi.l          &0xc,EXC_A6(%a6)
4016         rts
4017 ceaf_pi7:
4018         mov.b           &mia7_flg,SPCOND_FLG(%a6)
4019         addi.l          &0xc,EXC_A7(%a6)
4020         rts
4021
4022 # -(An) : extended and packed fmove out
4023 #       : stacked <ea> = actual <ea> + 8
4024 #       : "An" not updated
4025 ceaf_pd:
4026         mov.w           (tbl_ceaf_pd.b,%pc,%d1.w*2),%d1
4027         mov.l           EXC_EA(%a6),%a0
4028         sub.l           &0x8,%a0
4029         sub.l           &0x8,EXC_EA(%a6)
4030         jmp             (tbl_ceaf_pd.b,%pc,%d1.w*1)
4031
4032         swbeg           &0x8
4033 tbl_ceaf_pd:
4034         short           ceaf_pd0 - tbl_ceaf_pd
4035         short           ceaf_pd1 - tbl_ceaf_pd
4036         short           ceaf_pd2 - tbl_ceaf_pd
4037         short           ceaf_pd3 - tbl_ceaf_pd
4038         short           ceaf_pd4 - tbl_ceaf_pd
4039         short           ceaf_pd5 - tbl_ceaf_pd
4040         short           ceaf_pd6 - tbl_ceaf_pd
4041         short           ceaf_pd7 - tbl_ceaf_pd
4042
4043 ceaf_pd0:
4044         mov.l           %a0,EXC_DREGS+0x8(%a6)
4045         rts
4046 ceaf_pd1:
4047         mov.l           %a0,EXC_DREGS+0xc(%a6)
4048         rts
4049 ceaf_pd2:
4050         mov.l           %a0,%a2
4051         rts
4052 ceaf_pd3:
4053         mov.l           %a0,%a3
4054         rts
4055 ceaf_pd4:
4056         mov.l           %a0,%a4
4057         rts
4058 ceaf_pd5:
4059         mov.l           %a0,%a5
4060         rts
4061 ceaf_pd6:
4062         mov.l           %a0,EXC_A6(%a6)
4063         rts
4064 ceaf_pd7:
4065         mov.l           %a0,EXC_A7(%a6)
4066         mov.b           &mda7_flg,SPCOND_FLG(%a6)
4067         rts
4068
4069 #
4070 # This table holds the offsets of the emulation routines for each individual
4071 # math operation relative to the address of this table. Included are
4072 # routines like fadd/fmul/fabs. The transcendentals ARE NOT. This is because
4073 # this table is for the version if the 060FPSP without transcendentals.
4074 # The location within the table is determined by the extension bits of the
4075 # operation longword.
4076 #
4077
4078         swbeg           &109
4079 tbl_unsupp:
4080         long            fin             - tbl_unsupp    # 00: fmove
4081         long            fint            - tbl_unsupp    # 01: fint
4082         long            tbl_unsupp      - tbl_unsupp    # 02: fsinh
4083         long            fintrz          - tbl_unsupp    # 03: fintrz
4084         long            fsqrt           - tbl_unsupp    # 04: fsqrt
4085         long            tbl_unsupp      - tbl_unsupp
4086         long            tbl_unsupp      - tbl_unsupp    # 06: flognp1
4087         long            tbl_unsupp      - tbl_unsupp
4088         long            tbl_unsupp      - tbl_unsupp    # 08: fetoxm1
4089         long            tbl_unsupp      - tbl_unsupp    # 09: ftanh
4090         long            tbl_unsupp      - tbl_unsupp    # 0a: fatan
4091         long            tbl_unsupp      - tbl_unsupp
4092         long            tbl_unsupp      - tbl_unsupp    # 0c: fasin
4093         long            tbl_unsupp      - tbl_unsupp    # 0d: fatanh
4094         long            tbl_unsupp      - tbl_unsupp    # 0e: fsin
4095         long            tbl_unsupp      - tbl_unsupp    # 0f: ftan
4096         long            tbl_unsupp      - tbl_unsupp    # 10: fetox
4097         long            tbl_unsupp      - tbl_unsupp    # 11: ftwotox
4098         long            tbl_unsupp      - tbl_unsupp    # 12: ftentox
4099         long            tbl_unsupp      - tbl_unsupp
4100         long            tbl_unsupp      - tbl_unsupp    # 14: flogn
4101         long            tbl_unsupp      - tbl_unsupp    # 15: flog10
4102         long            tbl_unsupp      - tbl_unsupp    # 16: flog2
4103         long            tbl_unsupp      - tbl_unsupp
4104         long            fabs            - tbl_unsupp    # 18: fabs
4105         long            tbl_unsupp      - tbl_unsupp    # 19: fcosh
4106         long            fneg            - tbl_unsupp    # 1a: fneg
4107         long            tbl_unsupp      - tbl_unsupp
4108         long            tbl_unsupp      - tbl_unsupp    # 1c: facos
4109         long            tbl_unsupp      - tbl_unsupp    # 1d: fcos
4110         long            tbl_unsupp      - tbl_unsupp    # 1e: fgetexp
4111         long            tbl_unsupp      - tbl_unsupp    # 1f: fgetman
4112         long            fdiv            - tbl_unsupp    # 20: fdiv
4113         long            tbl_unsupp      - tbl_unsupp    # 21: fmod
4114         long            fadd            - tbl_unsupp    # 22: fadd
4115         long            fmul            - tbl_unsupp    # 23: fmul
4116         long            fsgldiv         - tbl_unsupp    # 24: fsgldiv
4117         long            tbl_unsupp      - tbl_unsupp    # 25: frem
4118         long            tbl_unsupp      - tbl_unsupp    # 26: fscale
4119         long            fsglmul         - tbl_unsupp    # 27: fsglmul
4120         long            fsub            - tbl_unsupp    # 28: fsub
4121         long            tbl_unsupp      - tbl_unsupp
4122         long            tbl_unsupp      - tbl_unsupp
4123         long            tbl_unsupp      - tbl_unsupp
4124         long            tbl_unsupp      - tbl_unsupp
4125         long            tbl_unsupp      - tbl_unsupp
4126         long            tbl_unsupp      - tbl_unsupp
4127         long            tbl_unsupp      - tbl_unsupp
4128         long            tbl_unsupp      - tbl_unsupp    # 30: fsincos
4129         long            tbl_unsupp      - tbl_unsupp    # 31: fsincos
4130         long            tbl_unsupp      - tbl_unsupp    # 32: fsincos
4131         long            tbl_unsupp      - tbl_unsupp    # 33: fsincos
4132         long            tbl_unsupp      - tbl_unsupp    # 34: fsincos
4133         long            tbl_unsupp      - tbl_unsupp    # 35: fsincos
4134         long            tbl_unsupp      - tbl_unsupp    # 36: fsincos
4135         long            tbl_unsupp      - tbl_unsupp    # 37: fsincos
4136         long            fcmp            - tbl_unsupp    # 38: fcmp
4137         long            tbl_unsupp      - tbl_unsupp
4138         long            ftst            - tbl_unsupp    # 3a: ftst
4139         long            tbl_unsupp      - tbl_unsupp
4140         long            tbl_unsupp      - tbl_unsupp
4141         long            tbl_unsupp      - tbl_unsupp
4142         long            tbl_unsupp      - tbl_unsupp
4143         long            tbl_unsupp      - tbl_unsupp
4144         long            fsin            - tbl_unsupp    # 40: fsmove
4145         long            fssqrt          - tbl_unsupp    # 41: fssqrt
4146         long            tbl_unsupp      - tbl_unsupp
4147         long            tbl_unsupp      - tbl_unsupp
4148         long            fdin            - tbl_unsupp    # 44: fdmove
4149         long            fdsqrt          - tbl_unsupp    # 45: fdsqrt
4150         long            tbl_unsupp      - tbl_unsupp
4151         long            tbl_unsupp      - tbl_unsupp
4152         long            tbl_unsupp      - tbl_unsupp
4153         long            tbl_unsupp      - tbl_unsupp
4154         long            tbl_unsupp      - tbl_unsupp
4155         long            tbl_unsupp      - tbl_unsupp
4156         long            tbl_unsupp      - tbl_unsupp
4157         long            tbl_unsupp      - tbl_unsupp
4158         long            tbl_unsupp      - tbl_unsupp
4159         long            tbl_unsupp      - tbl_unsupp
4160         long            tbl_unsupp      - tbl_unsupp
4161         long            tbl_unsupp      - tbl_unsupp
4162         long            tbl_unsupp      - tbl_unsupp
4163         long            tbl_unsupp      - tbl_unsupp
4164         long            tbl_unsupp      - tbl_unsupp
4165         long            tbl_unsupp      - tbl_unsupp
4166         long            tbl_unsupp      - tbl_unsupp
4167         long            tbl_unsupp      - tbl_unsupp
4168         long            fsabs           - tbl_unsupp    # 58: fsabs
4169         long            tbl_unsupp      - tbl_unsupp
4170         long            fsneg           - tbl_unsupp    # 5a: fsneg
4171         long            tbl_unsupp      - tbl_unsupp
4172         long            fdabs           - tbl_unsupp    # 5c: fdabs
4173         long            tbl_unsupp      - tbl_unsupp
4174         long            fdneg           - tbl_unsupp    # 5e: fdneg
4175         long            tbl_unsupp      - tbl_unsupp
4176         long            fsdiv           - tbl_unsupp    # 60: fsdiv
4177         long            tbl_unsupp      - tbl_unsupp
4178         long            fsadd           - tbl_unsupp    # 62: fsadd
4179         long            fsmul           - tbl_unsupp    # 63: fsmul
4180         long            fddiv           - tbl_unsupp    # 64: fddiv
4181         long            tbl_unsupp      - tbl_unsupp
4182         long            fdadd           - tbl_unsupp    # 66: fdadd
4183         long            fdmul           - tbl_unsupp    # 67: fdmul
4184         long            fssub           - tbl_unsupp    # 68: fssub
4185         long            tbl_unsupp      - tbl_unsupp
4186         long            tbl_unsupp      - tbl_unsupp
4187         long            tbl_unsupp      - tbl_unsupp
4188         long            fdsub           - tbl_unsupp    # 6c: fdsub
4189
4190 #################################################
4191 # Add this here so non-fp modules can compile.
4192 # (smovcr is called from fpsp_inex.)
4193         global          smovcr
4194 smovcr:
4195         bra.b           smovcr
4196
4197 #########################################################################
4198 # XDEF **************************************************************** #
4199 #       fmovm_dynamic(): emulate "fmovm" dynamic instruction            #
4200 #                                                                       #
4201 # XREF **************************************************************** #
4202 #       fetch_dreg() - fetch data register                              #
4203 #       {i,d,}mem_read() - fetch data from memory                       #
4204 #       _mem_write() - write data to memory                             #
4205 #       iea_iacc() - instruction memory access error occurred           #
4206 #       iea_dacc() - data memory access error occurred                  #
4207 #       restore() - restore An index regs if access error occurred      #
4208 #                                                                       #
4209 # INPUT *************************************************************** #
4210 #       None                                                            #
4211 #                                                                       #
4212 # OUTPUT ************************************************************** #
4213 #       If instr is "fmovm Dn,-(A7)" from supervisor mode,              #
4214 #               d0 = size of dump                                       #
4215 #               d1 = Dn                                                 #
4216 #       Else if instruction access error,                               #
4217 #               d0 = FSLW                                               #
4218 #       Else if data access error,                                      #
4219 #               d0 = FSLW                                               #
4220 #               a0 = address of fault                                   #
4221 #       Else                                                            #
4222 #               none.                                                   #
4223 #                                                                       #
4224 # ALGORITHM *********************************************************** #
4225 #       The effective address must be calculated since this is entered  #
4226 # from an "Unimplemented Effective Address" exception handler. So, we   #
4227 # have our own fcalc_ea() routine here. If an access error is flagged   #
4228 # by a _{i,d,}mem_read() call, we must exit through the special         #
4229 # handler.                                                              #
4230 #       The data register is determined and its value loaded to get the #
4231 # string of FP registers affected. This value is used as an index into  #
4232 # a lookup table such that we can determine the number of bytes         #
4233 # involved.                                                             #
4234 #       If the instruction is "fmovm.x <ea>,Dn", a _mem_read() is used  #
4235 # to read in all FP values. Again, _mem_read() may fail and require a   #
4236 # special exit.                                                         #
4237 #       If the instruction is "fmovm.x DN,<ea>", a _mem_write() is used #
4238 # to write all FP values. _mem_write() may also fail.                   #
4239 #       If the instruction is "fmovm.x DN,-(a7)" from supervisor mode,  #
4240 # then we return the size of the dump and the string to the caller      #
4241 # so that the move can occur outside of this routine. This special      #
4242 # case is required so that moves to the system stack are handled        #
4243 # correctly.                                                            #
4244 #                                                                       #
4245 # DYNAMIC:                                                              #
4246 #       fmovm.x dn, <ea>                                                #
4247 #       fmovm.x <ea>, dn                                                #
4248 #                                                                       #
4249 #             <WORD 1>                <WORD2>                           #
4250 #       1111 0010 00 |<ea>|     11@& 1000 0$$$ 0000                     #
4251 #                                                                       #
4252 #       & = (0): predecrement addressing mode                           #
4253 #           (1): postincrement or control addressing mode               #
4254 #       @ = (0): move listed regs from memory to the FPU                #
4255 #           (1): move listed regs from the FPU to memory                #
4256 #       $$$    : index of data register holding reg select mask         #
4257 #                                                                       #
4258 # NOTES:                                                                #
4259 #       If the data register holds a zero, then the                     #
4260 #       instruction is a nop.                                           #
4261 #                                                                       #
4262 #########################################################################
4263
4264         global          fmovm_dynamic
4265 fmovm_dynamic:
4266
4267 # extract the data register in which the bit string resides...
4268         mov.b           1+EXC_EXTWORD(%a6),%d1  # fetch extword
4269         andi.w          &0x70,%d1               # extract reg bits
4270         lsr.b           &0x4,%d1                # shift into lo bits
4271
4272 # fetch the bit string into d0...
4273         bsr.l           fetch_dreg              # fetch reg string
4274
4275         andi.l          &0x000000ff,%d0         # keep only lo byte
4276
4277         mov.l           %d0,-(%sp)              # save strg
4278         mov.b           (tbl_fmovm_size.w,%pc,%d0),%d0
4279         mov.l           %d0,-(%sp)              # save size
4280         bsr.l           fmovm_calc_ea           # calculate <ea>
4281         mov.l           (%sp)+,%d0              # restore size
4282         mov.l           (%sp)+,%d1              # restore strg
4283
4284 # if the bit string is a zero, then the operation is a no-op
4285 # but, make sure that we've calculated ea and advanced the opword pointer
4286         beq.w           fmovm_data_done
4287
4288 # separate move ins from move outs...
4289         btst            &0x5,EXC_EXTWORD(%a6)   # is it a move in or out?
4290         beq.w           fmovm_data_in           # it's a move out
4291
4292 #############
4293 # MOVE OUT: #
4294 #############
4295 fmovm_data_out:
4296         btst            &0x4,EXC_EXTWORD(%a6)   # control or predecrement?
4297         bne.w           fmovm_out_ctrl          # control
4298
4299 ############################
4300 fmovm_out_predec:
4301 # for predecrement mode, the bit string is the opposite of both control
4302 # operations and postincrement mode. (bit7 = FP7 ... bit0 = FP0)
4303 # here, we convert it to be just like the others...
4304         mov.b           (tbl_fmovm_convert.w,%pc,%d1.w*1),%d1
4305
4306         btst            &0x5,EXC_SR(%a6)        # user or supervisor mode?
4307         beq.b           fmovm_out_ctrl          # user
4308
4309 fmovm_out_predec_s:
4310         cmpi.b          SPCOND_FLG(%a6),&mda7_flg # is <ea> mode -(a7)?
4311         bne.b           fmovm_out_ctrl
4312
4313 # the operation was unfortunately an: fmovm.x dn,-(sp)
4314 # called from supervisor mode.
4315 # we're also passing "size" and "strg" back to the calling routine
4316         rts
4317
4318 ############################
4319 fmovm_out_ctrl:
4320         mov.l           %a0,%a1                 # move <ea> to a1
4321
4322         sub.l           %d0,%sp                 # subtract size of dump
4323         lea             (%sp),%a0
4324
4325         tst.b           %d1                     # should FP0 be moved?
4326         bpl.b           fmovm_out_ctrl_fp1      # no
4327
4328         mov.l           0x0+EXC_FP0(%a6),(%a0)+ # yes
4329         mov.l           0x4+EXC_FP0(%a6),(%a0)+
4330         mov.l           0x8+EXC_FP0(%a6),(%a0)+
4331
4332 fmovm_out_ctrl_fp1:
4333         lsl.b           &0x1,%d1                # should FP1 be moved?
4334         bpl.b           fmovm_out_ctrl_fp2      # no
4335
4336         mov.l           0x0+EXC_FP1(%a6),(%a0)+ # yes
4337         mov.l           0x4+EXC_FP1(%a6),(%a0)+
4338         mov.l           0x8+EXC_FP1(%a6),(%a0)+
4339
4340 fmovm_out_ctrl_fp2:
4341         lsl.b           &0x1,%d1                # should FP2 be moved?
4342         bpl.b           fmovm_out_ctrl_fp3      # no
4343
4344         fmovm.x         &0x20,(%a0)             # yes
4345         add.l           &0xc,%a0
4346
4347 fmovm_out_ctrl_fp3:
4348         lsl.b           &0x1,%d1                # should FP3 be moved?
4349         bpl.b           fmovm_out_ctrl_fp4      # no
4350
4351         fmovm.x         &0x10,(%a0)             # yes
4352         add.l           &0xc,%a0
4353
4354 fmovm_out_ctrl_fp4:
4355         lsl.b           &0x1,%d1                # should FP4 be moved?
4356         bpl.b           fmovm_out_ctrl_fp5      # no
4357
4358         fmovm.x         &0x08,(%a0)             # yes
4359         add.l           &0xc,%a0
4360
4361 fmovm_out_ctrl_fp5:
4362         lsl.b           &0x1,%d1                # should FP5 be moved?
4363         bpl.b           fmovm_out_ctrl_fp6      # no
4364
4365         fmovm.x         &0x04,(%a0)             # yes
4366         add.l           &0xc,%a0
4367
4368 fmovm_out_ctrl_fp6:
4369         lsl.b           &0x1,%d1                # should FP6 be moved?
4370         bpl.b           fmovm_out_ctrl_fp7      # no
4371
4372         fmovm.x         &0x02,(%a0)             # yes
4373         add.l           &0xc,%a0
4374
4375 fmovm_out_ctrl_fp7:
4376         lsl.b           &0x1,%d1                # should FP7 be moved?
4377         bpl.b           fmovm_out_ctrl_done     # no
4378
4379         fmovm.x         &0x01,(%a0)             # yes
4380         add.l           &0xc,%a0
4381
4382 fmovm_out_ctrl_done:
4383         mov.l           %a1,L_SCR1(%a6)
4384
4385         lea             (%sp),%a0               # pass: supervisor src
4386         mov.l           %d0,-(%sp)              # save size
4387         bsr.l           _dmem_write             # copy data to user mem
4388
4389         mov.l           (%sp)+,%d0
4390         add.l           %d0,%sp                 # clear fpreg data from stack
4391
4392         tst.l           %d1                     # did dstore err?
4393         bne.w           fmovm_out_err           # yes
4394
4395         rts
4396
4397 ############
4398 # MOVE IN: #
4399 ############
4400 fmovm_data_in:
4401         mov.l           %a0,L_SCR1(%a6)
4402
4403         sub.l           %d0,%sp                 # make room for fpregs
4404         lea             (%sp),%a1
4405
4406         mov.l           %d1,-(%sp)              # save bit string for later
4407         mov.l           %d0,-(%sp)              # save # of bytes
4408
4409         bsr.l           _dmem_read              # copy data from user mem
4410
4411         mov.l           (%sp)+,%d0              # retrieve # of bytes
4412
4413         tst.l           %d1                     # did dfetch fail?
4414         bne.w           fmovm_in_err            # yes
4415
4416         mov.l           (%sp)+,%d1              # load bit string
4417
4418         lea             (%sp),%a0               # addr of stack
4419
4420         tst.b           %d1                     # should FP0 be moved?
4421         bpl.b           fmovm_data_in_fp1       # no
4422
4423         mov.l           (%a0)+,0x0+EXC_FP0(%a6) # yes
4424         mov.l           (%a0)+,0x4+EXC_FP0(%a6)
4425         mov.l           (%a0)+,0x8+EXC_FP0(%a6)
4426
4427 fmovm_data_in_fp1:
4428         lsl.b           &0x1,%d1                # should FP1 be moved?
4429         bpl.b           fmovm_data_in_fp2       # no
4430
4431         mov.l           (%a0)+,0x0+EXC_FP1(%a6) # yes
4432         mov.l           (%a0)+,0x4+EXC_FP1(%a6)
4433         mov.l           (%a0)+,0x8+EXC_FP1(%a6)
4434
4435 fmovm_data_in_fp2:
4436         lsl.b           &0x1,%d1                # should FP2 be moved?
4437         bpl.b           fmovm_data_in_fp3       # no
4438
4439         fmovm.x         (%a0)+,&0x20            # yes
4440
4441 fmovm_data_in_fp3:
4442         lsl.b           &0x1,%d1                # should FP3 be moved?
4443         bpl.b           fmovm_data_in_fp4       # no
4444
4445         fmovm.x         (%a0)+,&0x10            # yes
4446
4447 fmovm_data_in_fp4:
4448         lsl.b           &0x1,%d1                # should FP4 be moved?
4449         bpl.b           fmovm_data_in_fp5       # no
4450
4451         fmovm.x         (%a0)+,&0x08            # yes
4452
4453 fmovm_data_in_fp5:
4454         lsl.b           &0x1,%d1                # should FP5 be moved?
4455         bpl.b           fmovm_data_in_fp6       # no
4456
4457         fmovm.x         (%a0)+,&0x04            # yes
4458
4459 fmovm_data_in_fp6:
4460         lsl.b           &0x1,%d1                # should FP6 be moved?
4461         bpl.b           fmovm_data_in_fp7       # no
4462
4463         fmovm.x         (%a0)+,&0x02            # yes
4464
4465 fmovm_data_in_fp7:
4466         lsl.b           &0x1,%d1                # should FP7 be moved?
4467         bpl.b           fmovm_data_in_done      # no
4468
4469         fmovm.x         (%a0)+,&0x01            # yes
4470
4471 fmovm_data_in_done:
4472         add.l           %d0,%sp                 # remove fpregs from stack
4473         rts
4474
4475 #####################################
4476
4477 fmovm_data_done:
4478         rts
4479
4480 ##############################################################################
4481
4482 #
4483 # table indexed by the operation's bit string that gives the number
4484 # of bytes that will be moved.
4485 #
4486 # number of bytes = (# of 1's in bit string) * 12(bytes/fpreg)
4487 #
4488 tbl_fmovm_size:
4489         byte    0x00,0x0c,0x0c,0x18,0x0c,0x18,0x18,0x24
4490         byte    0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4491         byte    0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4492         byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4493         byte    0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4494         byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4495         byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4496         byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4497         byte    0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4498         byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4499         byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4500         byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4501         byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4502         byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4503         byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4504         byte    0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4505         byte    0x0c,0x18,0x18,0x24,0x18,0x24,0x24,0x30
4506         byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4507         byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4508         byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4509         byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4510         byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4511         byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4512         byte    0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4513         byte    0x18,0x24,0x24,0x30,0x24,0x30,0x30,0x3c
4514         byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4515         byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4516         byte    0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4517         byte    0x24,0x30,0x30,0x3c,0x30,0x3c,0x3c,0x48
4518         byte    0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4519         byte    0x30,0x3c,0x3c,0x48,0x3c,0x48,0x48,0x54
4520         byte    0x3c,0x48,0x48,0x54,0x48,0x54,0x54,0x60
4521
4522 #
4523 # table to convert a pre-decrement bit string into a post-increment
4524 # or control bit string.
4525 # ex:   0x00    ==>     0x00
4526 #       0x01    ==>     0x80
4527 #       0x02    ==>     0x40
4528 #               .
4529 #               .
4530 #       0xfd    ==>     0xbf
4531 #       0xfe    ==>     0x7f
4532 #       0xff    ==>     0xff
4533 #
4534 tbl_fmovm_convert:
4535         byte    0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0
4536         byte    0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0
4537         byte    0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8
4538         byte    0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8
4539         byte    0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4
4540         byte    0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4
4541         byte    0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec
4542         byte    0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc
4543         byte    0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2
4544         byte    0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2
4545         byte    0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea
4546         byte    0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa
4547         byte    0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6
4548         byte    0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6
4549         byte    0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee
4550         byte    0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe
4551         byte    0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1
4552         byte    0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1
4553         byte    0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9
4554         byte    0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9
4555         byte    0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5
4556         byte    0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5
4557         byte    0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed
4558         byte    0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd
4559         byte    0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3
4560         byte    0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3
4561         byte    0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb
4562         byte    0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb
4563         byte    0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7
4564         byte    0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7
4565         byte    0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef
4566         byte    0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
4567
4568         global          fmovm_calc_ea
4569 ###############################################
4570 # _fmovm_calc_ea: calculate effective address #
4571 ###############################################
4572 fmovm_calc_ea:
4573         mov.l           %d0,%a0                 # move # bytes to a0
4574
4575 # currently, MODE and REG are taken from the EXC_OPWORD. this could be
4576 # easily changed if they were inputs passed in registers.
4577         mov.w           EXC_OPWORD(%a6),%d0     # fetch opcode word
4578         mov.w           %d0,%d1                 # make a copy
4579
4580         andi.w          &0x3f,%d0               # extract mode field
4581         andi.l          &0x7,%d1                # extract reg  field
4582
4583 # jump to the corresponding function for each {MODE,REG} pair.
4584         mov.w           (tbl_fea_mode.b,%pc,%d0.w*2),%d0 # fetch jmp distance
4585         jmp             (tbl_fea_mode.b,%pc,%d0.w*1) # jmp to correct ea mode
4586
4587         swbeg           &64
4588 tbl_fea_mode:
4589         short           tbl_fea_mode    -       tbl_fea_mode
4590         short           tbl_fea_mode    -       tbl_fea_mode
4591         short           tbl_fea_mode    -       tbl_fea_mode
4592         short           tbl_fea_mode    -       tbl_fea_mode
4593         short           tbl_fea_mode    -       tbl_fea_mode
4594         short           tbl_fea_mode    -       tbl_fea_mode
4595         short           tbl_fea_mode    -       tbl_fea_mode
4596         short           tbl_fea_mode    -       tbl_fea_mode
4597
4598         short           tbl_fea_mode    -       tbl_fea_mode
4599         short           tbl_fea_mode    -       tbl_fea_mode
4600         short           tbl_fea_mode    -       tbl_fea_mode
4601         short           tbl_fea_mode    -       tbl_fea_mode
4602         short           tbl_fea_mode    -       tbl_fea_mode
4603         short           tbl_fea_mode    -       tbl_fea_mode
4604         short           tbl_fea_mode    -       tbl_fea_mode
4605         short           tbl_fea_mode    -       tbl_fea_mode
4606
4607         short           faddr_ind_a0    -       tbl_fea_mode
4608         short           faddr_ind_a1    -       tbl_fea_mode
4609         short           faddr_ind_a2    -       tbl_fea_mode
4610         short           faddr_ind_a3    -       tbl_fea_mode
4611         short           faddr_ind_a4    -       tbl_fea_mode
4612         short           faddr_ind_a5    -       tbl_fea_mode
4613         short           faddr_ind_a6    -       tbl_fea_mode
4614         short           faddr_ind_a7    -       tbl_fea_mode
4615
4616         short           faddr_ind_p_a0  -       tbl_fea_mode
4617         short           faddr_ind_p_a1  -       tbl_fea_mode
4618         short           faddr_ind_p_a2  -       tbl_fea_mode
4619         short           faddr_ind_p_a3  -       tbl_fea_mode
4620         short           faddr_ind_p_a4  -       tbl_fea_mode
4621         short           faddr_ind_p_a5  -       tbl_fea_mode
4622         short           faddr_ind_p_a6  -       tbl_fea_mode
4623         short           faddr_ind_p_a7  -       tbl_fea_mode
4624
4625         short           faddr_ind_m_a0  -       tbl_fea_mode
4626         short           faddr_ind_m_a1  -       tbl_fea_mode
4627         short           faddr_ind_m_a2  -       tbl_fea_mode
4628         short           faddr_ind_m_a3  -       tbl_fea_mode
4629         short           faddr_ind_m_a4  -       tbl_fea_mode
4630         short           faddr_ind_m_a5  -       tbl_fea_mode
4631         short           faddr_ind_m_a6  -       tbl_fea_mode
4632         short           faddr_ind_m_a7  -       tbl_fea_mode
4633
4634         short           faddr_ind_disp_a0       -       tbl_fea_mode
4635         short           faddr_ind_disp_a1       -       tbl_fea_mode
4636         short           faddr_ind_disp_a2       -       tbl_fea_mode
4637         short           faddr_ind_disp_a3       -       tbl_fea_mode
4638         short           faddr_ind_disp_a4       -       tbl_fea_mode
4639         short           faddr_ind_disp_a5       -       tbl_fea_mode
4640         short           faddr_ind_disp_a6       -       tbl_fea_mode
4641         short           faddr_ind_disp_a7       -       tbl_fea_mode
4642
4643         short           faddr_ind_ext   -       tbl_fea_mode
4644         short           faddr_ind_ext   -       tbl_fea_mode
4645         short           faddr_ind_ext   -       tbl_fea_mode
4646         short           faddr_ind_ext   -       tbl_fea_mode
4647         short           faddr_ind_ext   -       tbl_fea_mode
4648         short           faddr_ind_ext   -       tbl_fea_mode
4649         short           faddr_ind_ext   -       tbl_fea_mode
4650         short           faddr_ind_ext   -       tbl_fea_mode
4651
4652         short           fabs_short      -       tbl_fea_mode
4653         short           fabs_long       -       tbl_fea_mode
4654         short           fpc_ind         -       tbl_fea_mode
4655         short           fpc_ind_ext     -       tbl_fea_mode
4656         short           tbl_fea_mode    -       tbl_fea_mode
4657         short           tbl_fea_mode    -       tbl_fea_mode
4658         short           tbl_fea_mode    -       tbl_fea_mode
4659         short           tbl_fea_mode    -       tbl_fea_mode
4660
4661 ###################################
4662 # Address register indirect: (An) #
4663 ###################################
4664 faddr_ind_a0:
4665         mov.l           EXC_DREGS+0x8(%a6),%a0  # Get current a0
4666         rts
4667
4668 faddr_ind_a1:
4669         mov.l           EXC_DREGS+0xc(%a6),%a0  # Get current a1
4670         rts
4671
4672 faddr_ind_a2:
4673         mov.l           %a2,%a0                 # Get current a2
4674         rts
4675
4676 faddr_ind_a3:
4677         mov.l           %a3,%a0                 # Get current a3
4678         rts
4679
4680 faddr_ind_a4:
4681         mov.l           %a4,%a0                 # Get current a4
4682         rts
4683
4684 faddr_ind_a5:
4685         mov.l           %a5,%a0                 # Get current a5
4686         rts
4687
4688 faddr_ind_a6:
4689         mov.l           (%a6),%a0               # Get current a6
4690         rts
4691
4692 faddr_ind_a7:
4693         mov.l           EXC_A7(%a6),%a0         # Get current a7
4694         rts
4695
4696 #####################################################
4697 # Address register indirect w/ postincrement: (An)+ #
4698 #####################################################
4699 faddr_ind_p_a0:
4700         mov.l           EXC_DREGS+0x8(%a6),%d0  # Get current a0
4701         mov.l           %d0,%d1
4702         add.l           %a0,%d1                 # Increment
4703         mov.l           %d1,EXC_DREGS+0x8(%a6)  # Save incr value
4704         mov.l           %d0,%a0
4705         rts
4706
4707 faddr_ind_p_a1:
4708         mov.l           EXC_DREGS+0xc(%a6),%d0  # Get current a1
4709         mov.l           %d0,%d1
4710         add.l           %a0,%d1                 # Increment
4711         mov.l           %d1,EXC_DREGS+0xc(%a6)  # Save incr value
4712         mov.l           %d0,%a0
4713         rts
4714
4715 faddr_ind_p_a2:
4716         mov.l           %a2,%d0                 # Get current a2
4717         mov.l           %d0,%d1
4718         add.l           %a0,%d1                 # Increment
4719         mov.l           %d1,%a2                 # Save incr value
4720         mov.l           %d0,%a0
4721         rts
4722
4723 faddr_ind_p_a3:
4724         mov.l           %a3,%d0                 # Get current a3
4725         mov.l           %d0,%d1
4726         add.l           %a0,%d1                 # Increment
4727         mov.l           %d1,%a3                 # Save incr value
4728         mov.l           %d0,%a0
4729         rts
4730
4731 faddr_ind_p_a4:
4732         mov.l           %a4,%d0                 # Get current a4
4733         mov.l           %d0,%d1
4734         add.l           %a0,%d1                 # Increment
4735         mov.l           %d1,%a4                 # Save incr value
4736         mov.l           %d0,%a0
4737         rts
4738
4739 faddr_ind_p_a5:
4740         mov.l           %a5,%d0                 # Get current a5
4741         mov.l           %d0,%d1
4742         add.l           %a0,%d1                 # Increment
4743         mov.l           %d1,%a5                 # Save incr value
4744         mov.l           %d0,%a0
4745         rts
4746
4747 faddr_ind_p_a6:
4748         mov.l           (%a6),%d0               # Get current a6
4749         mov.l           %d0,%d1
4750         add.l           %a0,%d1                 # Increment
4751         mov.l           %d1,(%a6)               # Save incr value
4752         mov.l           %d0,%a0
4753         rts
4754
4755 faddr_ind_p_a7:
4756         mov.b           &mia7_flg,SPCOND_FLG(%a6) # set "special case" flag
4757
4758         mov.l           EXC_A7(%a6),%d0         # Get current a7
4759         mov.l           %d0,%d1
4760         add.l           %a0,%d1                 # Increment
4761         mov.l           %d1,EXC_A7(%a6)         # Save incr value
4762         mov.l           %d0,%a0
4763         rts
4764
4765 ####################################################
4766 # Address register indirect w/ predecrement: -(An) #
4767 ####################################################
4768 faddr_ind_m_a0:
4769         mov.l           EXC_DREGS+0x8(%a6),%d0  # Get current a0
4770         sub.l           %a0,%d0                 # Decrement
4771         mov.l           %d0,EXC_DREGS+0x8(%a6)  # Save decr value
4772         mov.l           %d0,%a0
4773         rts
4774
4775 faddr_ind_m_a1:
4776         mov.l           EXC_DREGS+0xc(%a6),%d0  # Get current a1
4777         sub.l           %a0,%d0                 # Decrement
4778         mov.l           %d0,EXC_DREGS+0xc(%a6)  # Save decr value
4779         mov.l           %d0,%a0
4780         rts
4781
4782 faddr_ind_m_a2:
4783         mov.l           %a2,%d0                 # Get current a2
4784         sub.l           %a0,%d0                 # Decrement
4785         mov.l           %d0,%a2                 # Save decr value
4786         mov.l           %d0,%a0
4787         rts
4788
4789 faddr_ind_m_a3:
4790         mov.l           %a3,%d0                 # Get current a3
4791         sub.l           %a0,%d0                 # Decrement
4792         mov.l           %d0,%a3                 # Save decr value
4793         mov.l           %d0,%a0
4794         rts
4795
4796 faddr_ind_m_a4:
4797         mov.l           %a4,%d0                 # Get current a4
4798         sub.l           %a0,%d0                 # Decrement
4799         mov.l           %d0,%a4                 # Save decr value
4800         mov.l           %d0,%a0
4801         rts
4802
4803 faddr_ind_m_a5:
4804         mov.l           %a5,%d0                 # Get current a5
4805         sub.l           %a0,%d0                 # Decrement
4806         mov.l           %d0,%a5                 # Save decr value
4807         mov.l           %d0,%a0
4808         rts
4809
4810 faddr_ind_m_a6:
4811         mov.l           (%a6),%d0               # Get current a6
4812         sub.l           %a0,%d0                 # Decrement
4813         mov.l           %d0,(%a6)               # Save decr value
4814         mov.l           %d0,%a0
4815         rts
4816
4817 faddr_ind_m_a7:
4818         mov.b           &mda7_flg,SPCOND_FLG(%a6) # set "special case" flag
4819
4820         mov.l           EXC_A7(%a6),%d0         # Get current a7
4821         sub.l           %a0,%d0                 # Decrement
4822         mov.l           %d0,EXC_A7(%a6)         # Save decr value
4823         mov.l           %d0,%a0
4824         rts
4825
4826 ########################################################
4827 # Address register indirect w/ displacement: (d16, An) #
4828 ########################################################
4829 faddr_ind_disp_a0:
4830         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4831         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4832         bsr.l           _imem_read_word
4833
4834         tst.l           %d1                     # did ifetch fail?
4835         bne.l           iea_iacc                # yes
4836
4837         mov.w           %d0,%a0                 # sign extend displacement
4838
4839         add.l           EXC_DREGS+0x8(%a6),%a0  # a0 + d16
4840         rts
4841
4842 faddr_ind_disp_a1:
4843         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4844         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4845         bsr.l           _imem_read_word
4846
4847         tst.l           %d1                     # did ifetch fail?
4848         bne.l           iea_iacc                # yes
4849
4850         mov.w           %d0,%a0                 # sign extend displacement
4851
4852         add.l           EXC_DREGS+0xc(%a6),%a0  # a1 + d16
4853         rts
4854
4855 faddr_ind_disp_a2:
4856         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4857         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4858         bsr.l           _imem_read_word
4859
4860         tst.l           %d1                     # did ifetch fail?
4861         bne.l           iea_iacc                # yes
4862
4863         mov.w           %d0,%a0                 # sign extend displacement
4864
4865         add.l           %a2,%a0                 # a2 + d16
4866         rts
4867
4868 faddr_ind_disp_a3:
4869         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4870         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4871         bsr.l           _imem_read_word
4872
4873         tst.l           %d1                     # did ifetch fail?
4874         bne.l           iea_iacc                # yes
4875
4876         mov.w           %d0,%a0                 # sign extend displacement
4877
4878         add.l           %a3,%a0                 # a3 + d16
4879         rts
4880
4881 faddr_ind_disp_a4:
4882         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4883         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4884         bsr.l           _imem_read_word
4885
4886         tst.l           %d1                     # did ifetch fail?
4887         bne.l           iea_iacc                # yes
4888
4889         mov.w           %d0,%a0                 # sign extend displacement
4890
4891         add.l           %a4,%a0                 # a4 + d16
4892         rts
4893
4894 faddr_ind_disp_a5:
4895         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4896         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4897         bsr.l           _imem_read_word
4898
4899         tst.l           %d1                     # did ifetch fail?
4900         bne.l           iea_iacc                # yes
4901
4902         mov.w           %d0,%a0                 # sign extend displacement
4903
4904         add.l           %a5,%a0                 # a5 + d16
4905         rts
4906
4907 faddr_ind_disp_a6:
4908         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4909         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4910         bsr.l           _imem_read_word
4911
4912         tst.l           %d1                     # did ifetch fail?
4913         bne.l           iea_iacc                # yes
4914
4915         mov.w           %d0,%a0                 # sign extend displacement
4916
4917         add.l           (%a6),%a0               # a6 + d16
4918         rts
4919
4920 faddr_ind_disp_a7:
4921         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4922         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4923         bsr.l           _imem_read_word
4924
4925         tst.l           %d1                     # did ifetch fail?
4926         bne.l           iea_iacc                # yes
4927
4928         mov.w           %d0,%a0                 # sign extend displacement
4929
4930         add.l           EXC_A7(%a6),%a0         # a7 + d16
4931         rts
4932
4933 ########################################################################
4934 # Address register indirect w/ index(8-bit displacement): (d8, An, Xn) #
4935 #    "       "         "    w/   "  (base displacement): (bd, An, Xn)  #
4936 # Memory indirect postindexed: ([bd, An], Xn, od)                      #
4937 # Memory indirect preindexed: ([bd, An, Xn], od)                       #
4938 ########################################################################
4939 faddr_ind_ext:
4940         addq.l          &0x8,%d1
4941         bsr.l           fetch_dreg              # fetch base areg
4942         mov.l           %d0,-(%sp)
4943
4944         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4945         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4946         bsr.l           _imem_read_word         # fetch extword in d0
4947
4948         tst.l           %d1                     # did ifetch fail?
4949         bne.l           iea_iacc                # yes
4950
4951         mov.l           (%sp)+,%a0
4952
4953         btst            &0x8,%d0
4954         bne.w           fcalc_mem_ind
4955
4956         mov.l           %d0,L_SCR1(%a6)         # hold opword
4957
4958         mov.l           %d0,%d1
4959         rol.w           &0x4,%d1
4960         andi.w          &0xf,%d1                # extract index regno
4961
4962 # count on fetch_dreg() not to alter a0...
4963         bsr.l           fetch_dreg              # fetch index
4964
4965         mov.l           %d2,-(%sp)              # save d2
4966         mov.l           L_SCR1(%a6),%d2         # fetch opword
4967
4968         btst            &0xb,%d2                # is it word or long?
4969         bne.b           faii8_long
4970         ext.l           %d0                     # sign extend word index
4971 faii8_long:
4972         mov.l           %d2,%d1
4973         rol.w           &0x7,%d1
4974         andi.l          &0x3,%d1                # extract scale value
4975
4976         lsl.l           %d1,%d0                 # shift index by scale
4977
4978         extb.l          %d2                     # sign extend displacement
4979         add.l           %d2,%d0                 # index + disp
4980         add.l           %d0,%a0                 # An + (index + disp)
4981
4982         mov.l           (%sp)+,%d2              # restore old d2
4983         rts
4984
4985 ###########################
4986 # Absolute short: (XXX).W #
4987 ###########################
4988 fabs_short:
4989         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
4990         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
4991         bsr.l           _imem_read_word         # fetch short address
4992
4993         tst.l           %d1                     # did ifetch fail?
4994         bne.l           iea_iacc                # yes
4995
4996         mov.w           %d0,%a0                 # return <ea> in a0
4997         rts
4998
4999 ##########################
5000 # Absolute long: (XXX).L #
5001 ##########################
5002 fabs_long:
5003         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5004         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5005         bsr.l           _imem_read_long         # fetch long address
5006
5007         tst.l           %d1                     # did ifetch fail?
5008         bne.l           iea_iacc                # yes
5009
5010         mov.l           %d0,%a0                 # return <ea> in a0
5011         rts
5012
5013 #######################################################
5014 # Program counter indirect w/ displacement: (d16, PC) #
5015 #######################################################
5016 fpc_ind:
5017         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5018         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
5019         bsr.l           _imem_read_word         # fetch word displacement
5020
5021         tst.l           %d1                     # did ifetch fail?
5022         bne.l           iea_iacc                # yes
5023
5024         mov.w           %d0,%a0                 # sign extend displacement
5025
5026         add.l           EXC_EXTWPTR(%a6),%a0    # pc + d16
5027
5028 # _imem_read_word() increased the extwptr by 2. need to adjust here.
5029         subq.l          &0x2,%a0                # adjust <ea>
5030         rts
5031
5032 ##########################################################
5033 # PC indirect w/ index(8-bit displacement): (d8, PC, An) #
5034 # "     "     w/   "  (base displacement): (bd, PC, An)  #
5035 # PC memory indirect postindexed: ([bd, PC], Xn, od)     #
5036 # PC memory indirect preindexed: ([bd, PC, Xn], od)      #
5037 ##########################################################
5038 fpc_ind_ext:
5039         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5040         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
5041         bsr.l           _imem_read_word         # fetch ext word
5042
5043         tst.l           %d1                     # did ifetch fail?
5044         bne.l           iea_iacc                # yes
5045
5046         mov.l           EXC_EXTWPTR(%a6),%a0    # put base in a0
5047         subq.l          &0x2,%a0                # adjust base
5048
5049         btst            &0x8,%d0                # is disp only 8 bits?
5050         bne.w           fcalc_mem_ind           # calc memory indirect
5051
5052         mov.l           %d0,L_SCR1(%a6)         # store opword
5053
5054         mov.l           %d0,%d1                 # make extword copy
5055         rol.w           &0x4,%d1                # rotate reg num into place
5056         andi.w          &0xf,%d1                # extract register number
5057
5058 # count on fetch_dreg() not to alter a0...
5059         bsr.l           fetch_dreg              # fetch index
5060
5061         mov.l           %d2,-(%sp)              # save d2
5062         mov.l           L_SCR1(%a6),%d2         # fetch opword
5063
5064         btst            &0xb,%d2                # is index word or long?
5065         bne.b           fpii8_long              # long
5066         ext.l           %d0                     # sign extend word index
5067 fpii8_long:
5068         mov.l           %d2,%d1
5069         rol.w           &0x7,%d1                # rotate scale value into place
5070         andi.l          &0x3,%d1                # extract scale value
5071
5072         lsl.l           %d1,%d0                 # shift index by scale
5073
5074         extb.l          %d2                     # sign extend displacement
5075         add.l           %d2,%d0                 # disp + index
5076         add.l           %d0,%a0                 # An + (index + disp)
5077
5078         mov.l           (%sp)+,%d2              # restore temp register
5079         rts
5080
5081 # d2 = index
5082 # d3 = base
5083 # d4 = od
5084 # d5 = extword
5085 fcalc_mem_ind:
5086         btst            &0x6,%d0                # is the index suppressed?
5087         beq.b           fcalc_index
5088
5089         movm.l          &0x3c00,-(%sp)          # save d2-d5
5090
5091         mov.l           %d0,%d5                 # put extword in d5
5092         mov.l           %a0,%d3                 # put base in d3
5093
5094         clr.l           %d2                     # yes, so index = 0
5095         bra.b           fbase_supp_ck
5096
5097 # index:
5098 fcalc_index:
5099         mov.l           %d0,L_SCR1(%a6)         # save d0 (opword)
5100         bfextu          %d0{&16:&4},%d1         # fetch dreg index
5101         bsr.l           fetch_dreg
5102
5103         movm.l          &0x3c00,-(%sp)          # save d2-d5
5104         mov.l           %d0,%d2                 # put index in d2
5105         mov.l           L_SCR1(%a6),%d5
5106         mov.l           %a0,%d3
5107
5108         btst            &0xb,%d5                # is index word or long?
5109         bne.b           fno_ext
5110         ext.l           %d2
5111
5112 fno_ext:
5113         bfextu          %d5{&21:&2},%d0
5114         lsl.l           %d0,%d2
5115
5116 # base address (passed as parameter in d3):
5117 # we clear the value here if it should actually be suppressed.
5118 fbase_supp_ck:
5119         btst            &0x7,%d5                # is the bd suppressed?
5120         beq.b           fno_base_sup
5121         clr.l           %d3
5122
5123 # base displacement:
5124 fno_base_sup:
5125         bfextu          %d5{&26:&2},%d0         # get bd size
5126 #       beq.l           fmovm_error             # if (size == 0) it's reserved
5127
5128         cmpi.b          %d0,&0x2
5129         blt.b           fno_bd
5130         beq.b           fget_word_bd
5131
5132         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5133         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5134         bsr.l           _imem_read_long
5135
5136         tst.l           %d1                     # did ifetch fail?
5137         bne.l           fcea_iacc               # yes
5138
5139         bra.b           fchk_ind
5140
5141 fget_word_bd:
5142         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5143         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
5144         bsr.l           _imem_read_word
5145
5146         tst.l           %d1                     # did ifetch fail?
5147         bne.l           fcea_iacc               # yes
5148
5149         ext.l           %d0                     # sign extend bd
5150
5151 fchk_ind:
5152         add.l           %d0,%d3                 # base += bd
5153
5154 # outer displacement:
5155 fno_bd:
5156         bfextu          %d5{&30:&2},%d0         # is od suppressed?
5157         beq.w           faii_bd
5158
5159         cmpi.b          %d0,&0x2
5160         blt.b           fnull_od
5161         beq.b           fword_od
5162
5163         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5164         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5165         bsr.l           _imem_read_long
5166
5167         tst.l           %d1                     # did ifetch fail?
5168         bne.l           fcea_iacc               # yes
5169
5170         bra.b           fadd_them
5171
5172 fword_od:
5173         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5174         addq.l          &0x2,EXC_EXTWPTR(%a6)   # incr instruction ptr
5175         bsr.l           _imem_read_word
5176
5177         tst.l           %d1                     # did ifetch fail?
5178         bne.l           fcea_iacc               # yes
5179
5180         ext.l           %d0                     # sign extend od
5181         bra.b           fadd_them
5182
5183 fnull_od:
5184         clr.l           %d0
5185
5186 fadd_them:
5187         mov.l           %d0,%d4
5188
5189         btst            &0x2,%d5                # pre or post indexing?
5190         beq.b           fpre_indexed
5191
5192         mov.l           %d3,%a0
5193         bsr.l           _dmem_read_long
5194
5195         tst.l           %d1                     # did dfetch fail?
5196         bne.w           fcea_err                # yes
5197
5198         add.l           %d2,%d0                 # <ea> += index
5199         add.l           %d4,%d0                 # <ea> += od
5200         bra.b           fdone_ea
5201
5202 fpre_indexed:
5203         add.l           %d2,%d3                 # preindexing
5204         mov.l           %d3,%a0
5205         bsr.l           _dmem_read_long
5206
5207         tst.l           %d1                     # did dfetch fail?
5208         bne.w           fcea_err                # yes
5209
5210         add.l           %d4,%d0                 # ea += od
5211         bra.b           fdone_ea
5212
5213 faii_bd:
5214         add.l           %d2,%d3                 # ea = (base + bd) + index
5215         mov.l           %d3,%d0
5216 fdone_ea:
5217         mov.l           %d0,%a0
5218
5219         movm.l          (%sp)+,&0x003c          # restore d2-d5
5220         rts
5221
5222 #########################################################
5223 fcea_err:
5224         mov.l           %d3,%a0
5225
5226         movm.l          (%sp)+,&0x003c          # restore d2-d5
5227         mov.w           &0x0101,%d0
5228         bra.l           iea_dacc
5229
5230 fcea_iacc:
5231         movm.l          (%sp)+,&0x003c          # restore d2-d5
5232         bra.l           iea_iacc
5233
5234 fmovm_out_err:
5235         bsr.l           restore
5236         mov.w           &0x00e1,%d0
5237         bra.b           fmovm_err
5238
5239 fmovm_in_err:
5240         bsr.l           restore
5241         mov.w           &0x0161,%d0
5242
5243 fmovm_err:
5244         mov.l           L_SCR1(%a6),%a0
5245         bra.l           iea_dacc
5246
5247 #########################################################################
5248 # XDEF **************************************************************** #
5249 #       fmovm_ctrl(): emulate fmovm.l of control registers instr        #
5250 #                                                                       #
5251 # XREF **************************************************************** #
5252 #       _imem_read_long() - read longword from memory                   #
5253 #       iea_iacc() - _imem_read_long() failed; error recovery           #
5254 #                                                                       #
5255 # INPUT *************************************************************** #
5256 #       None                                                            #
5257 #                                                                       #
5258 # OUTPUT ************************************************************** #
5259 #       If _imem_read_long() doesn't fail:                              #
5260 #               USER_FPCR(a6)  = new FPCR value                         #
5261 #               USER_FPSR(a6)  = new FPSR value                         #
5262 #               USER_FPIAR(a6) = new FPIAR value                        #
5263 #                                                                       #
5264 # ALGORITHM *********************************************************** #
5265 #       Decode the instruction type by looking at the extension word    #
5266 # in order to see how many control registers to fetch from memory.      #
5267 # Fetch them using _imem_read_long(). If this fetch fails, exit through #
5268 # the special access error exit handler iea_iacc().                     #
5269 #                                                                       #
5270 # Instruction word decoding:                                            #
5271 #                                                                       #
5272 #       fmovem.l #<data>, {FPIAR&|FPCR&|FPSR}                           #
5273 #                                                                       #
5274 #               WORD1                   WORD2                           #
5275 #       1111 0010 00 111100     100$ $$00 0000 0000                     #
5276 #                                                                       #
5277 #       $$$ (100): FPCR                                                 #
5278 #           (010): FPSR                                                 #
5279 #           (001): FPIAR                                                #
5280 #           (000): FPIAR                                                #
5281 #                                                                       #
5282 #########################################################################
5283
5284         global          fmovm_ctrl
5285 fmovm_ctrl:
5286         mov.b           EXC_EXTWORD(%a6),%d0    # fetch reg select bits
5287         cmpi.b          %d0,&0x9c               # fpcr & fpsr & fpiar ?
5288         beq.w           fctrl_in_7              # yes
5289         cmpi.b          %d0,&0x98               # fpcr & fpsr ?
5290         beq.w           fctrl_in_6              # yes
5291         cmpi.b          %d0,&0x94               # fpcr & fpiar ?
5292         beq.b           fctrl_in_5              # yes
5293
5294 # fmovem.l #<data>, fpsr/fpiar
5295 fctrl_in_3:
5296         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5297         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5298         bsr.l           _imem_read_long         # fetch FPSR from mem
5299
5300         tst.l           %d1                     # did ifetch fail?
5301         bne.l           iea_iacc                # yes
5302
5303         mov.l           %d0,USER_FPSR(%a6)      # store new FPSR to stack
5304         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5305         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5306         bsr.l           _imem_read_long         # fetch FPIAR from mem
5307
5308         tst.l           %d1                     # did ifetch fail?
5309         bne.l           iea_iacc                # yes
5310
5311         mov.l           %d0,USER_FPIAR(%a6)     # store new FPIAR to stack
5312         rts
5313
5314 # fmovem.l #<data>, fpcr/fpiar
5315 fctrl_in_5:
5316         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5317         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5318         bsr.l           _imem_read_long         # fetch FPCR from mem
5319
5320         tst.l           %d1                     # did ifetch fail?
5321         bne.l           iea_iacc                # yes
5322
5323         mov.l           %d0,USER_FPCR(%a6)      # store new FPCR to stack
5324         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5325         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5326         bsr.l           _imem_read_long         # fetch FPIAR from mem
5327
5328         tst.l           %d1                     # did ifetch fail?
5329         bne.l           iea_iacc                # yes
5330
5331         mov.l           %d0,USER_FPIAR(%a6)     # store new FPIAR to stack
5332         rts
5333
5334 # fmovem.l #<data>, fpcr/fpsr
5335 fctrl_in_6:
5336         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5337         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5338         bsr.l           _imem_read_long         # fetch FPCR from mem
5339
5340         tst.l           %d1                     # did ifetch fail?
5341         bne.l           iea_iacc                # yes
5342
5343         mov.l           %d0,USER_FPCR(%a6)      # store new FPCR to mem
5344         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5345         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5346         bsr.l           _imem_read_long         # fetch FPSR from mem
5347
5348         tst.l           %d1                     # did ifetch fail?
5349         bne.l           iea_iacc                # yes
5350
5351         mov.l           %d0,USER_FPSR(%a6)      # store new FPSR to mem
5352         rts
5353
5354 # fmovem.l #<data>, fpcr/fpsr/fpiar
5355 fctrl_in_7:
5356         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5357         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5358         bsr.l           _imem_read_long         # fetch FPCR from mem
5359
5360         tst.l           %d1                     # did ifetch fail?
5361         bne.l           iea_iacc                # yes
5362
5363         mov.l           %d0,USER_FPCR(%a6)      # store new FPCR to mem
5364         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5365         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5366         bsr.l           _imem_read_long         # fetch FPSR from mem
5367
5368         tst.l           %d1                     # did ifetch fail?
5369         bne.l           iea_iacc                # yes
5370
5371         mov.l           %d0,USER_FPSR(%a6)      # store new FPSR to mem
5372         mov.l           EXC_EXTWPTR(%a6),%a0    # fetch instruction addr
5373         addq.l          &0x4,EXC_EXTWPTR(%a6)   # incr instruction ptr
5374         bsr.l           _imem_read_long         # fetch FPIAR from mem
5375
5376         tst.l           %d1                     # did ifetch fail?
5377         bne.l           iea_iacc                # yes
5378
5379         mov.l           %d0,USER_FPIAR(%a6)     # store new FPIAR to mem
5380         rts
5381
5382 ##########################################################################
5383
5384 #########################################################################
5385 # XDEF **************************************************************** #
5386 #       addsub_scaler2(): scale inputs to fadd/fsub such that no        #
5387 #                         OVFL/UNFL exceptions will result              #
5388 #                                                                       #
5389 # XREF **************************************************************** #
5390 #       norm() - normalize mantissa after adjusting exponent            #
5391 #                                                                       #
5392 # INPUT *************************************************************** #
5393 #       FP_SRC(a6) = fp op1(src)                                        #
5394 #       FP_DST(a6) = fp op2(dst)                                        #
5395 #                                                                       #
5396 # OUTPUT ************************************************************** #
5397 #       FP_SRC(a6) = fp op1 scaled(src)                                 #
5398 #       FP_DST(a6) = fp op2 scaled(dst)                                 #
5399 #       d0         = scale amount                                       #
5400 #                                                                       #
5401 # ALGORITHM *********************************************************** #
5402 #       If the DST exponent is > the SRC exponent, set the DST exponent #
5403 # equal to 0x3fff and scale the SRC exponent by the value that the      #
5404 # DST exponent was scaled by. If the SRC exponent is greater or equal,  #
5405 # do the opposite. Return this scale factor in d0.                      #
5406 #       If the two exponents differ by > the number of mantissa bits    #
5407 # plus two, then set the smallest exponent to a very small value as a   #
5408 # quick shortcut.                                                       #
5409 #                                                                       #
5410 #########################################################################
5411
5412         global          addsub_scaler2
5413 addsub_scaler2:
5414         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
5415         mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
5416         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
5417         mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
5418         mov.w           SRC_EX(%a0),%d0
5419         mov.w           DST_EX(%a1),%d1
5420         mov.w           %d0,FP_SCR0_EX(%a6)
5421         mov.w           %d1,FP_SCR1_EX(%a6)
5422
5423         andi.w          &0x7fff,%d0
5424         andi.w          &0x7fff,%d1
5425         mov.w           %d0,L_SCR1(%a6)         # store src exponent
5426         mov.w           %d1,2+L_SCR1(%a6)       # store dst exponent
5427
5428         cmp.w           %d0, %d1                # is src exp >= dst exp?
5429         bge.l           src_exp_ge2
5430
5431 # dst exp is >  src exp; scale dst to exp = 0x3fff
5432 dst_exp_gt2:
5433         bsr.l           scale_to_zero_dst
5434         mov.l           %d0,-(%sp)              # save scale factor
5435
5436         cmpi.b          STAG(%a6),&DENORM       # is dst denormalized?
5437         bne.b           cmpexp12
5438
5439         lea             FP_SCR0(%a6),%a0
5440         bsr.l           norm                    # normalize the denorm; result is new exp
5441         neg.w           %d0                     # new exp = -(shft val)
5442         mov.w           %d0,L_SCR1(%a6)         # inset new exp
5443
5444 cmpexp12:
5445         mov.w           2+L_SCR1(%a6),%d0
5446         subi.w          &mantissalen+2,%d0      # subtract mantissalen+2 from larger exp
5447
5448         cmp.w           %d0,L_SCR1(%a6)         # is difference >= len(mantissa)+2?
5449         bge.b           quick_scale12
5450
5451         mov.w           L_SCR1(%a6),%d0
5452         add.w           0x2(%sp),%d0            # scale src exponent by scale factor
5453         mov.w           FP_SCR0_EX(%a6),%d1
5454         and.w           &0x8000,%d1
5455         or.w            %d1,%d0                 # concat {sgn,new exp}
5456         mov.w           %d0,FP_SCR0_EX(%a6)     # insert new dst exponent
5457
5458         mov.l           (%sp)+,%d0              # return SCALE factor
5459         rts
5460
5461 quick_scale12:
5462         andi.w          &0x8000,FP_SCR0_EX(%a6) # zero src exponent
5463         bset            &0x0,1+FP_SCR0_EX(%a6)  # set exp = 1
5464
5465         mov.l           (%sp)+,%d0              # return SCALE factor
5466         rts
5467
5468 # src exp is >= dst exp; scale src to exp = 0x3fff
5469 src_exp_ge2:
5470         bsr.l           scale_to_zero_src
5471         mov.l           %d0,-(%sp)              # save scale factor
5472
5473         cmpi.b          DTAG(%a6),&DENORM       # is dst denormalized?
5474         bne.b           cmpexp22
5475         lea             FP_SCR1(%a6),%a0
5476         bsr.l           norm                    # normalize the denorm; result is new exp
5477         neg.w           %d0                     # new exp = -(shft val)
5478         mov.w           %d0,2+L_SCR1(%a6)       # inset new exp
5479
5480 cmpexp22:
5481         mov.w           L_SCR1(%a6),%d0
5482         subi.w          &mantissalen+2,%d0      # subtract mantissalen+2 from larger exp
5483
5484         cmp.w           %d0,2+L_SCR1(%a6)       # is difference >= len(mantissa)+2?
5485         bge.b           quick_scale22
5486
5487         mov.w           2+L_SCR1(%a6),%d0
5488         add.w           0x2(%sp),%d0            # scale dst exponent by scale factor
5489         mov.w           FP_SCR1_EX(%a6),%d1
5490         andi.w          &0x8000,%d1
5491         or.w            %d1,%d0                 # concat {sgn,new exp}
5492         mov.w           %d0,FP_SCR1_EX(%a6)     # insert new dst exponent
5493
5494         mov.l           (%sp)+,%d0              # return SCALE factor
5495         rts
5496
5497 quick_scale22:
5498         andi.w          &0x8000,FP_SCR1_EX(%a6) # zero dst exponent
5499         bset            &0x0,1+FP_SCR1_EX(%a6)  # set exp = 1
5500
5501         mov.l           (%sp)+,%d0              # return SCALE factor
5502         rts
5503
5504 ##########################################################################
5505
5506 #########################################################################
5507 # XDEF **************************************************************** #
5508 #       scale_to_zero_src(): scale the exponent of extended precision   #
5509 #                            value at FP_SCR0(a6).                      #
5510 #                                                                       #
5511 # XREF **************************************************************** #
5512 #       norm() - normalize the mantissa if the operand was a DENORM     #
5513 #                                                                       #
5514 # INPUT *************************************************************** #
5515 #       FP_SCR0(a6) = extended precision operand to be scaled           #
5516 #                                                                       #
5517 # OUTPUT ************************************************************** #
5518 #       FP_SCR0(a6) = scaled extended precision operand                 #
5519 #       d0          = scale value                                       #
5520 #                                                                       #
5521 # ALGORITHM *********************************************************** #
5522 #       Set the exponent of the input operand to 0x3fff. Save the value #
5523 # of the difference between the original and new exponent. Then,        #
5524 # normalize the operand if it was a DENORM. Add this normalization      #
5525 # value to the previous value. Return the result.                       #
5526 #                                                                       #
5527 #########################################################################
5528
5529         global          scale_to_zero_src
5530 scale_to_zero_src:
5531         mov.w           FP_SCR0_EX(%a6),%d1     # extract operand's {sgn,exp}
5532         mov.w           %d1,%d0                 # make a copy
5533
5534         andi.l          &0x7fff,%d1             # extract operand's exponent
5535
5536         andi.w          &0x8000,%d0             # extract operand's sgn
5537         or.w            &0x3fff,%d0             # insert new operand's exponent(=0)
5538
5539         mov.w           %d0,FP_SCR0_EX(%a6)     # insert biased exponent
5540
5541         cmpi.b          STAG(%a6),&DENORM       # is operand normalized?
5542         beq.b           stzs_denorm             # normalize the DENORM
5543
5544 stzs_norm:
5545         mov.l           &0x3fff,%d0
5546         sub.l           %d1,%d0                 # scale = BIAS + (-exp)
5547
5548         rts
5549
5550 stzs_denorm:
5551         lea             FP_SCR0(%a6),%a0        # pass ptr to src op
5552         bsr.l           norm                    # normalize denorm
5553         neg.l           %d0                     # new exponent = -(shft val)
5554         mov.l           %d0,%d1                 # prepare for op_norm call
5555         bra.b           stzs_norm               # finish scaling
5556
5557 ###
5558
5559 #########################################################################
5560 # XDEF **************************************************************** #
5561 #       scale_sqrt(): scale the input operand exponent so a subsequent  #
5562 #                     fsqrt operation won't take an exception.          #
5563 #                                                                       #
5564 # XREF **************************************************************** #
5565 #       norm() - normalize the mantissa if the operand was a DENORM     #
5566 #                                                                       #
5567 # INPUT *************************************************************** #
5568 #       FP_SCR0(a6) = extended precision operand to be scaled           #
5569 #                                                                       #
5570 # OUTPUT ************************************************************** #
5571 #       FP_SCR0(a6) = scaled extended precision operand                 #
5572 #       d0          = scale value                                       #
5573 #                                                                       #
5574 # ALGORITHM *********************************************************** #
5575 #       If the input operand is a DENORM, normalize it.                 #
5576 #       If the exponent of the input operand is even, set the exponent  #
5577 # to 0x3ffe and return a scale factor of "(exp-0x3ffe)/2". If the       #
5578 # exponent of the input operand is off, set the exponent to ox3fff and  #
5579 # return a scale factor of "(exp-0x3fff)/2".                            #
5580 #                                                                       #
5581 #########################################################################
5582
5583         global          scale_sqrt
5584 scale_sqrt:
5585         cmpi.b          STAG(%a6),&DENORM       # is operand normalized?
5586         beq.b           ss_denorm               # normalize the DENORM
5587
5588         mov.w           FP_SCR0_EX(%a6),%d1     # extract operand's {sgn,exp}
5589         andi.l          &0x7fff,%d1             # extract operand's exponent
5590
5591         andi.w          &0x8000,FP_SCR0_EX(%a6) # extract operand's sgn
5592
5593         btst            &0x0,%d1                # is exp even or odd?
5594         beq.b           ss_norm_even
5595
5596         ori.w           &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5597
5598         mov.l           &0x3fff,%d0
5599         sub.l           %d1,%d0                 # scale = BIAS + (-exp)
5600         asr.l           &0x1,%d0                # divide scale factor by 2
5601         rts
5602
5603 ss_norm_even:
5604         ori.w           &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5605
5606         mov.l           &0x3ffe,%d0
5607         sub.l           %d1,%d0                 # scale = BIAS + (-exp)
5608         asr.l           &0x1,%d0                # divide scale factor by 2
5609         rts
5610
5611 ss_denorm:
5612         lea             FP_SCR0(%a6),%a0        # pass ptr to src op
5613         bsr.l           norm                    # normalize denorm
5614
5615         btst            &0x0,%d0                # is exp even or odd?
5616         beq.b           ss_denorm_even
5617
5618         ori.w           &0x3fff,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5619
5620         add.l           &0x3fff,%d0
5621         asr.l           &0x1,%d0                # divide scale factor by 2
5622         rts
5623
5624 ss_denorm_even:
5625         ori.w           &0x3ffe,FP_SCR0_EX(%a6) # insert new operand's exponent(=0)
5626
5627         add.l           &0x3ffe,%d0
5628         asr.l           &0x1,%d0                # divide scale factor by 2
5629         rts
5630
5631 ###
5632
5633 #########################################################################
5634 # XDEF **************************************************************** #
5635 #       scale_to_zero_dst(): scale the exponent of extended precision   #
5636 #                            value at FP_SCR1(a6).                      #
5637 #                                                                       #
5638 # XREF **************************************************************** #
5639 #       norm() - normalize the mantissa if the operand was a DENORM     #
5640 #                                                                       #
5641 # INPUT *************************************************************** #
5642 #       FP_SCR1(a6) = extended precision operand to be scaled           #
5643 #                                                                       #
5644 # OUTPUT ************************************************************** #
5645 #       FP_SCR1(a6) = scaled extended precision operand                 #
5646 #       d0          = scale value                                       #
5647 #                                                                       #
5648 # ALGORITHM *********************************************************** #
5649 #       Set the exponent of the input operand to 0x3fff. Save the value #
5650 # of the difference between the original and new exponent. Then,        #
5651 # normalize the operand if it was a DENORM. Add this normalization      #
5652 # value to the previous value. Return the result.                       #
5653 #                                                                       #
5654 #########################################################################
5655
5656         global          scale_to_zero_dst
5657 scale_to_zero_dst:
5658         mov.w           FP_SCR1_EX(%a6),%d1     # extract operand's {sgn,exp}
5659         mov.w           %d1,%d0                 # make a copy
5660
5661         andi.l          &0x7fff,%d1             # extract operand's exponent
5662
5663         andi.w          &0x8000,%d0             # extract operand's sgn
5664         or.w            &0x3fff,%d0             # insert new operand's exponent(=0)
5665
5666         mov.w           %d0,FP_SCR1_EX(%a6)     # insert biased exponent
5667
5668         cmpi.b          DTAG(%a6),&DENORM       # is operand normalized?
5669         beq.b           stzd_denorm             # normalize the DENORM
5670
5671 stzd_norm:
5672         mov.l           &0x3fff,%d0
5673         sub.l           %d1,%d0                 # scale = BIAS + (-exp)
5674         rts
5675
5676 stzd_denorm:
5677         lea             FP_SCR1(%a6),%a0        # pass ptr to dst op
5678         bsr.l           norm                    # normalize denorm
5679         neg.l           %d0                     # new exponent = -(shft val)
5680         mov.l           %d0,%d1                 # prepare for op_norm call
5681         bra.b           stzd_norm               # finish scaling
5682
5683 ##########################################################################
5684
5685 #########################################################################
5686 # XDEF **************************************************************** #
5687 #       res_qnan(): return default result w/ QNAN operand for dyadic    #
5688 #       res_snan(): return default result w/ SNAN operand for dyadic    #
5689 #       res_qnan_1op(): return dflt result w/ QNAN operand for monadic  #
5690 #       res_snan_1op(): return dflt result w/ SNAN operand for monadic  #
5691 #                                                                       #
5692 # XREF **************************************************************** #
5693 #       None                                                            #
5694 #                                                                       #
5695 # INPUT *************************************************************** #
5696 #       FP_SRC(a6) = pointer to extended precision src operand          #
5697 #       FP_DST(a6) = pointer to extended precision dst operand          #
5698 #                                                                       #
5699 # OUTPUT ************************************************************** #
5700 #       fp0 = default result                                            #
5701 #                                                                       #
5702 # ALGORITHM *********************************************************** #
5703 #       If either operand (but not both operands) of an operation is a  #
5704 # nonsignalling NAN, then that NAN is returned as the result. If both   #
5705 # operands are nonsignalling NANs, then the destination operand         #
5706 # nonsignalling NAN is returned as the result.                          #
5707 #       If either operand to an operation is a signalling NAN (SNAN),   #
5708 # then, the SNAN bit is set in the FPSR EXC byte. If the SNAN trap      #
5709 # enable bit is set in the FPCR, then the trap is taken and the         #
5710 # destination is not modified. If the SNAN trap enable bit is not set,  #
5711 # then the SNAN is converted to a nonsignalling NAN (by setting the     #
5712 # SNAN bit in the operand to one), and the operation continues as       #
5713 # described in the preceding paragraph, for nonsignalling NANs.         #
5714 #       Make sure the appropriate FPSR bits are set before exiting.     #
5715 #                                                                       #
5716 #########################################################################
5717
5718         global          res_qnan
5719         global          res_snan
5720 res_qnan:
5721 res_snan:
5722         cmp.b           DTAG(%a6), &SNAN        # is the dst an SNAN?
5723         beq.b           dst_snan2
5724         cmp.b           DTAG(%a6), &QNAN        # is the dst a  QNAN?
5725         beq.b           dst_qnan2
5726 src_nan:
5727         cmp.b           STAG(%a6), &QNAN
5728         beq.b           src_qnan2
5729         global          res_snan_1op
5730 res_snan_1op:
5731 src_snan2:
5732         bset            &0x6, FP_SRC_HI(%a6)    # set SNAN bit
5733         or.l            &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
5734         lea             FP_SRC(%a6), %a0
5735         bra.b           nan_comp
5736         global          res_qnan_1op
5737 res_qnan_1op:
5738 src_qnan2:
5739         or.l            &nan_mask, USER_FPSR(%a6)
5740         lea             FP_SRC(%a6), %a0
5741         bra.b           nan_comp
5742 dst_snan2:
5743         or.l            &nan_mask+aiop_mask+snan_mask, USER_FPSR(%a6)
5744         bset            &0x6, FP_DST_HI(%a6)    # set SNAN bit
5745         lea             FP_DST(%a6), %a0
5746         bra.b           nan_comp
5747 dst_qnan2:
5748         lea             FP_DST(%a6), %a0
5749         cmp.b           STAG(%a6), &SNAN
5750         bne             nan_done
5751         or.l            &aiop_mask+snan_mask, USER_FPSR(%a6)
5752 nan_done:
5753         or.l            &nan_mask, USER_FPSR(%a6)
5754 nan_comp:
5755         btst            &0x7, FTEMP_EX(%a0)     # is NAN neg?
5756         beq.b           nan_not_neg
5757         or.l            &neg_mask, USER_FPSR(%a6)
5758 nan_not_neg:
5759         fmovm.x         (%a0), &0x80
5760         rts
5761
5762 #########################################################################
5763 # XDEF **************************************************************** #
5764 #       res_operr(): return default result during operand error         #
5765 #                                                                       #
5766 # XREF **************************************************************** #
5767 #       None                                                            #
5768 #                                                                       #
5769 # INPUT *************************************************************** #
5770 #       None                                                            #
5771 #                                                                       #
5772 # OUTPUT ************************************************************** #
5773 #       fp0 = default operand error result                              #
5774 #                                                                       #
5775 # ALGORITHM *********************************************************** #
5776 #       An nonsignalling NAN is returned as the default result when     #
5777 # an operand error occurs for the following cases:                      #
5778 #                                                                       #
5779 #       Multiply: (Infinity x Zero)                                     #
5780 #       Divide  : (Zero / Zero) || (Infinity / Infinity)                #
5781 #                                                                       #
5782 #########################################################################
5783
5784         global          res_operr
5785 res_operr:
5786         or.l            &nan_mask+operr_mask+aiop_mask, USER_FPSR(%a6)
5787         fmovm.x         nan_return(%pc), &0x80
5788         rts
5789
5790 nan_return:
5791         long            0x7fff0000, 0xffffffff, 0xffffffff
5792
5793 #########################################################################
5794 # XDEF **************************************************************** #
5795 #       _denorm(): denormalize an intermediate result                   #
5796 #                                                                       #
5797 # XREF **************************************************************** #
5798 #       None                                                            #
5799 #                                                                       #
5800 # INPUT *************************************************************** #
5801 #       a0 = points to the operand to be denormalized                   #
5802 #               (in the internal extended format)                       #
5803 #                                                                       #
5804 #       d0 = rounding precision                                         #
5805 #                                                                       #
5806 # OUTPUT ************************************************************** #
5807 #       a0 = pointer to the denormalized result                         #
5808 #               (in the internal extended format)                       #
5809 #                                                                       #
5810 #       d0 = guard,round,sticky                                         #
5811 #                                                                       #
5812 # ALGORITHM *********************************************************** #
5813 #       According to the exponent underflow threshold for the given     #
5814 # precision, shift the mantissa bits to the right in order raise the    #
5815 # exponent of the operand to the threshold value. While shifting the    #
5816 # mantissa bits right, maintain the value of the guard, round, and      #
5817 # sticky bits.                                                          #
5818 # other notes:                                                          #
5819 #       (1) _denorm() is called by the underflow routines               #
5820 #       (2) _denorm() does NOT affect the status register               #
5821 #                                                                       #
5822 #########################################################################
5823
5824 #
5825 # table of exponent threshold values for each precision
5826 #
5827 tbl_thresh:
5828         short           0x0
5829         short           sgl_thresh
5830         short           dbl_thresh
5831
5832         global          _denorm
5833 _denorm:
5834 #
5835 # Load the exponent threshold for the precision selected and check
5836 # to see if (threshold - exponent) is > 65 in which case we can
5837 # simply calculate the sticky bit and zero the mantissa. otherwise
5838 # we have to call the denormalization routine.
5839 #
5840         lsr.b           &0x2, %d0               # shift prec to lo bits
5841         mov.w           (tbl_thresh.b,%pc,%d0.w*2), %d1 # load prec threshold
5842         mov.w           %d1, %d0                # copy d1 into d0
5843         sub.w           FTEMP_EX(%a0), %d0      # diff = threshold - exp
5844         cmpi.w          %d0, &66                # is diff > 65? (mant + g,r bits)
5845         bpl.b           denorm_set_stky         # yes; just calc sticky
5846
5847         clr.l           %d0                     # clear g,r,s
5848         btst            &inex2_bit, FPSR_EXCEPT(%a6) # yes; was INEX2 set?
5849         beq.b           denorm_call             # no; don't change anything
5850         bset            &29, %d0                # yes; set sticky bit
5851
5852 denorm_call:
5853         bsr.l           dnrm_lp                 # denormalize the number
5854         rts
5855
5856 #
5857 # all bit would have been shifted off during the denorm so simply
5858 # calculate if the sticky should be set and clear the entire mantissa.
5859 #
5860 denorm_set_stky:
5861         mov.l           &0x20000000, %d0        # set sticky bit in return value
5862         mov.w           %d1, FTEMP_EX(%a0)      # load exp with threshold
5863         clr.l           FTEMP_HI(%a0)           # set d1 = 0 (ms mantissa)
5864         clr.l           FTEMP_LO(%a0)           # set d2 = 0 (ms mantissa)
5865         rts
5866
5867 #                                                                       #
5868 # dnrm_lp(): normalize exponent/mantissa to specified threshold         #
5869 #                                                                       #
5870 # INPUT:                                                                #
5871 #       %a0        : points to the operand to be denormalized           #
5872 #       %d0{31:29} : initial guard,round,sticky                         #
5873 #       %d1{15:0}  : denormalization threshold                          #
5874 # OUTPUT:                                                               #
5875 #       %a0        : points to the denormalized operand                 #
5876 #       %d0{31:29} : final guard,round,sticky                           #
5877 #                                                                       #
5878
5879 # *** Local Equates *** #
5880 set     GRS,            L_SCR2                  # g,r,s temp storage
5881 set     FTEMP_LO2,      L_SCR1                  # FTEMP_LO copy
5882
5883         global          dnrm_lp
5884 dnrm_lp:
5885
5886 #
5887 # make a copy of FTEMP_LO and place the g,r,s bits directly after it
5888 # in memory so as to make the bitfield extraction for denormalization easier.
5889 #
5890         mov.l           FTEMP_LO(%a0), FTEMP_LO2(%a6) # make FTEMP_LO copy
5891         mov.l           %d0, GRS(%a6)           # place g,r,s after it
5892
5893 #
5894 # check to see how much less than the underflow threshold the operand
5895 # exponent is.
5896 #
5897         mov.l           %d1, %d0                # copy the denorm threshold
5898         sub.w           FTEMP_EX(%a0), %d1      # d1 = threshold - uns exponent
5899         ble.b           dnrm_no_lp              # d1 <= 0
5900         cmpi.w          %d1, &0x20              # is ( 0 <= d1 < 32) ?
5901         blt.b           case_1                  # yes
5902         cmpi.w          %d1, &0x40              # is (32 <= d1 < 64) ?
5903         blt.b           case_2                  # yes
5904         bra.w           case_3                  # (d1 >= 64)
5905
5906 #
5907 # No normalization necessary
5908 #
5909 dnrm_no_lp:
5910         mov.l           GRS(%a6), %d0           # restore original g,r,s
5911         rts
5912
5913 #
5914 # case (0<d1<32)
5915 #
5916 # %d0 = denorm threshold
5917 # %d1 = "n" = amt to shift
5918 #
5919 #       ---------------------------------------------------------
5920 #       |     FTEMP_HI    |     FTEMP_LO     |grs000.........000|
5921 #       ---------------------------------------------------------
5922 #       <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
5923 #       \          \                  \                  \
5924 #        \          \                  \                  \
5925 #         \          \                  \                  \
5926 #          \          \                  \                  \
5927 #           \          \                  \                  \
5928 #            \          \                  \                  \
5929 #             \          \                  \                  \
5930 #              \          \                  \                  \
5931 #       <-(n)-><-(32 - n)-><------(32)-------><------(32)------->
5932 #       ---------------------------------------------------------
5933 #       |0.....0| NEW_HI  |  NEW_FTEMP_LO     |grs              |
5934 #       ---------------------------------------------------------
5935 #
5936 case_1:
5937         mov.l           %d2, -(%sp)             # create temp storage
5938
5939         mov.w           %d0, FTEMP_EX(%a0)      # exponent = denorm threshold
5940         mov.l           &32, %d0
5941         sub.w           %d1, %d0                # %d0 = 32 - %d1
5942
5943         cmpi.w          %d1, &29                # is shft amt >= 29
5944         blt.b           case1_extract           # no; no fix needed
5945         mov.b           GRS(%a6), %d2
5946         or.b            %d2, 3+FTEMP_LO2(%a6)
5947
5948 case1_extract:
5949         bfextu          FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_HI
5950         bfextu          FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new FTEMP_LO
5951         bfextu          FTEMP_LO2(%a6){%d0:&32}, %d0 # %d0 = new G,R,S
5952
5953         mov.l           %d2, FTEMP_HI(%a0)      # store new FTEMP_HI
5954         mov.l           %d1, FTEMP_LO(%a0)      # store new FTEMP_LO
5955
5956         bftst           %d0{&2:&30}             # were bits shifted off?
5957         beq.b           case1_sticky_clear      # no; go finish
5958         bset            &rnd_stky_bit, %d0      # yes; set sticky bit
5959
5960 case1_sticky_clear:
5961         and.l           &0xe0000000, %d0        # clear all but G,R,S
5962         mov.l           (%sp)+, %d2             # restore temp register
5963         rts
5964
5965 #
5966 # case (32<=d1<64)
5967 #
5968 # %d0 = denorm threshold
5969 # %d1 = "n" = amt to shift
5970 #
5971 #       ---------------------------------------------------------
5972 #       |     FTEMP_HI    |     FTEMP_LO     |grs000.........000|
5973 #       ---------------------------------------------------------
5974 #       <-(32 - n)-><-(n)-><-(32 - n)-><-(n)-><-(32 - n)-><-(n)->
5975 #       \          \                  \
5976 #        \          \                  \
5977 #         \          \                  -------------------
5978 #          \          --------------------                 \
5979 #           -------------------           \                 \
5980 #                              \           \                 \
5981 #                               \           \                 \
5982 #                                \           \                 \
5983 #       <-------(32)------><-(n)-><-(32 - n)-><------(32)------->
5984 #       ---------------------------------------------------------
5985 #       |0...............0|0....0| NEW_LO     |grs              |
5986 #       ---------------------------------------------------------
5987 #
5988 case_2:
5989         mov.l           %d2, -(%sp)             # create temp storage
5990
5991         mov.w           %d0, FTEMP_EX(%a0)      # exponent = denorm threshold
5992         subi.w          &0x20, %d1              # %d1 now between 0 and 32
5993         mov.l           &0x20, %d0
5994         sub.w           %d1, %d0                # %d0 = 32 - %d1
5995
5996 # subtle step here; or in the g,r,s at the bottom of FTEMP_LO to minimize
5997 # the number of bits to check for the sticky detect.
5998 # it only plays a role in shift amounts of 61-63.
5999         mov.b           GRS(%a6), %d2
6000         or.b            %d2, 3+FTEMP_LO2(%a6)
6001
6002         bfextu          FTEMP_HI(%a0){&0:%d0}, %d2 # %d2 = new FTEMP_LO
6003         bfextu          FTEMP_HI(%a0){%d0:&32}, %d1 # %d1 = new G,R,S
6004
6005         bftst           %d1{&2:&30}             # were any bits shifted off?
6006         bne.b           case2_set_sticky        # yes; set sticky bit
6007         bftst           FTEMP_LO2(%a6){%d0:&31} # were any bits shifted off?
6008         bne.b           case2_set_sticky        # yes; set sticky bit
6009
6010         mov.l           %d1, %d0                # move new G,R,S to %d0
6011         bra.b           case2_end
6012
6013 case2_set_sticky:
6014         mov.l           %d1, %d0                # move new G,R,S to %d0
6015         bset            &rnd_stky_bit, %d0      # set sticky bit
6016
6017 case2_end:
6018         clr.l           FTEMP_HI(%a0)           # store FTEMP_HI = 0
6019         mov.l           %d2, FTEMP_LO(%a0)      # store FTEMP_LO
6020         and.l           &0xe0000000, %d0        # clear all but G,R,S
6021
6022         mov.l           (%sp)+,%d2              # restore temp register
6023         rts
6024
6025 #
6026 # case (d1>=64)
6027 #
6028 # %d0 = denorm threshold
6029 # %d1 = amt to shift
6030 #
6031 case_3:
6032         mov.w           %d0, FTEMP_EX(%a0)      # insert denorm threshold
6033
6034         cmpi.w          %d1, &65                # is shift amt > 65?
6035         blt.b           case3_64                # no; it's == 64
6036         beq.b           case3_65                # no; it's == 65
6037
6038 #
6039 # case (d1>65)
6040 #
6041 # Shift value is > 65 and out of range. All bits are shifted off.
6042 # Return a zero mantissa with the sticky bit set
6043 #
6044         clr.l           FTEMP_HI(%a0)           # clear hi(mantissa)
6045         clr.l           FTEMP_LO(%a0)           # clear lo(mantissa)
6046         mov.l           &0x20000000, %d0        # set sticky bit
6047         rts
6048
6049 #
6050 # case (d1 == 64)
6051 #
6052 #       ---------------------------------------------------------
6053 #       |     FTEMP_HI    |     FTEMP_LO     |grs000.........000|
6054 #       ---------------------------------------------------------
6055 #       <-------(32)------>
6056 #       \                  \
6057 #        \                  \
6058 #         \                  \
6059 #          \                  ------------------------------
6060 #           -------------------------------                 \
6061 #                                          \                 \
6062 #                                           \                 \
6063 #                                            \                 \
6064 #                                             <-------(32)------>
6065 #       ---------------------------------------------------------
6066 #       |0...............0|0................0|grs               |
6067 #       ---------------------------------------------------------
6068 #
6069 case3_64:
6070         mov.l           FTEMP_HI(%a0), %d0      # fetch hi(mantissa)
6071         mov.l           %d0, %d1                # make a copy
6072         and.l           &0xc0000000, %d0        # extract G,R
6073         and.l           &0x3fffffff, %d1        # extract other bits
6074
6075         bra.b           case3_complete
6076
6077 #
6078 # case (d1 == 65)
6079 #
6080 #       ---------------------------------------------------------
6081 #       |     FTEMP_HI    |     FTEMP_LO     |grs000.........000|
6082 #       ---------------------------------------------------------
6083 #       <-------(32)------>
6084 #       \                  \
6085 #        \                  \
6086 #         \                  \
6087 #          \                  ------------------------------
6088 #           --------------------------------                \
6089 #                                           \                \
6090 #                                            \                \
6091 #                                             \                \
6092 #                                              <-------(31)----->
6093 #       ---------------------------------------------------------
6094 #       |0...............0|0................0|0rs               |
6095 #       ---------------------------------------------------------
6096 #
6097 case3_65:
6098         mov.l           FTEMP_HI(%a0), %d0      # fetch hi(mantissa)
6099         and.l           &0x80000000, %d0        # extract R bit
6100         lsr.l           &0x1, %d0               # shift high bit into R bit
6101         and.l           &0x7fffffff, %d1        # extract other bits
6102
6103 case3_complete:
6104 # last operation done was an "and" of the bits shifted off so the condition
6105 # codes are already set so branch accordingly.
6106         bne.b           case3_set_sticky        # yes; go set new sticky
6107         tst.l           FTEMP_LO(%a0)           # were any bits shifted off?
6108         bne.b           case3_set_sticky        # yes; go set new sticky
6109         tst.b           GRS(%a6)                # were any bits shifted off?
6110         bne.b           case3_set_sticky        # yes; go set new sticky
6111
6112 #
6113 # no bits were shifted off so don't set the sticky bit.
6114 # the guard and
6115 # the entire mantissa is zero.
6116 #
6117         clr.l           FTEMP_HI(%a0)           # clear hi(mantissa)
6118         clr.l           FTEMP_LO(%a0)           # clear lo(mantissa)
6119         rts
6120
6121 #
6122 # some bits were shifted off so set the sticky bit.
6123 # the entire mantissa is zero.
6124 #
6125 case3_set_sticky:
6126         bset            &rnd_stky_bit,%d0       # set new sticky bit
6127         clr.l           FTEMP_HI(%a0)           # clear hi(mantissa)
6128         clr.l           FTEMP_LO(%a0)           # clear lo(mantissa)
6129         rts
6130
6131 #########################################################################
6132 # XDEF **************************************************************** #
6133 #       _round(): round result according to precision/mode              #
6134 #                                                                       #
6135 # XREF **************************************************************** #
6136 #       None                                                            #
6137 #                                                                       #
6138 # INPUT *************************************************************** #
6139 #       a0        = ptr to input operand in internal extended format    #
6140 #       d1(hi)    = contains rounding precision:                        #
6141 #                       ext = $0000xxxx                                 #
6142 #                       sgl = $0004xxxx                                 #
6143 #                       dbl = $0008xxxx                                 #
6144 #       d1(lo)    = contains rounding mode:                             #
6145 #                       RN  = $xxxx0000                                 #
6146 #                       RZ  = $xxxx0001                                 #
6147 #                       RM  = $xxxx0002                                 #
6148 #                       RP  = $xxxx0003                                 #
6149 #       d0{31:29} = contains the g,r,s bits (extended)                  #
6150 #                                                                       #
6151 # OUTPUT ************************************************************** #
6152 #       a0 = pointer to rounded result                                  #
6153 #                                                                       #
6154 # ALGORITHM *********************************************************** #
6155 #       On return the value pointed to by a0 is correctly rounded,      #
6156 #       a0 is preserved and the g-r-s bits in d0 are cleared.           #
6157 #       The result is not typed - the tag field is invalid.  The        #
6158 #       result is still in the internal extended format.                #
6159 #                                                                       #
6160 #       The INEX bit of USER_FPSR will be set if the rounded result was #
6161 #       inexact (i.e. if any of the g-r-s bits were set).               #
6162 #                                                                       #
6163 #########################################################################
6164
6165         global          _round
6166 _round:
6167 #
6168 # ext_grs() looks at the rounding precision and sets the appropriate
6169 # G,R,S bits.
6170 # If (G,R,S == 0) then result is exact and round is done, else set
6171 # the inex flag in status reg and continue.
6172 #
6173         bsr.l           ext_grs                 # extract G,R,S
6174
6175         tst.l           %d0                     # are G,R,S zero?
6176         beq.w           truncate                # yes; round is complete
6177
6178         or.w            &inx2a_mask, 2+USER_FPSR(%a6) # set inex2/ainex
6179
6180 #
6181 # Use rounding mode as an index into a jump table for these modes.
6182 # All of the following assumes grs != 0.
6183 #
6184         mov.w           (tbl_mode.b,%pc,%d1.w*2), %a1 # load jump offset
6185         jmp             (tbl_mode.b,%pc,%a1)    # jmp to rnd mode handler
6186
6187 tbl_mode:
6188         short           rnd_near - tbl_mode
6189         short           truncate - tbl_mode     # RZ always truncates
6190         short           rnd_mnus - tbl_mode
6191         short           rnd_plus - tbl_mode
6192
6193 #################################################################
6194 #       ROUND PLUS INFINITY                                     #
6195 #                                                               #
6196 #       If sign of fp number = 0 (positive), then add 1 to l.   #
6197 #################################################################
6198 rnd_plus:
6199         tst.b           FTEMP_SGN(%a0)          # check for sign
6200         bmi.w           truncate                # if positive then truncate
6201
6202         mov.l           &0xffffffff, %d0        # force g,r,s to be all f's
6203         swap            %d1                     # set up d1 for round prec.
6204
6205         cmpi.b          %d1, &s_mode            # is prec = sgl?
6206         beq.w           add_sgl                 # yes
6207         bgt.w           add_dbl                 # no; it's dbl
6208         bra.w           add_ext                 # no; it's ext
6209
6210 #################################################################
6211 #       ROUND MINUS INFINITY                                    #
6212 #                                                               #
6213 #       If sign of fp number = 1 (negative), then add 1 to l.   #
6214 #################################################################
6215 rnd_mnus:
6216         tst.b           FTEMP_SGN(%a0)          # check for sign
6217         bpl.w           truncate                # if negative then truncate
6218
6219         mov.l           &0xffffffff, %d0        # force g,r,s to be all f's
6220         swap            %d1                     # set up d1 for round prec.
6221
6222         cmpi.b          %d1, &s_mode            # is prec = sgl?
6223         beq.w           add_sgl                 # yes
6224         bgt.w           add_dbl                 # no; it's dbl
6225         bra.w           add_ext                 # no; it's ext
6226
6227 #################################################################
6228 #       ROUND NEAREST                                           #
6229 #                                                               #
6230 #       If (g=1), then add 1 to l and if (r=s=0), then clear l  #
6231 #       Note that this will round to even in case of a tie.     #
6232 #################################################################
6233 rnd_near:
6234         asl.l           &0x1, %d0               # shift g-bit to c-bit
6235         bcc.w           truncate                # if (g=1) then
6236
6237         swap            %d1                     # set up d1 for round prec.
6238
6239         cmpi.b          %d1, &s_mode            # is prec = sgl?
6240         beq.w           add_sgl                 # yes
6241         bgt.w           add_dbl                 # no; it's dbl
6242         bra.w           add_ext                 # no; it's ext
6243
6244 # *** LOCAL EQUATES ***
6245 set     ad_1_sgl,       0x00000100      # constant to add 1 to l-bit in sgl prec
6246 set     ad_1_dbl,       0x00000800      # constant to add 1 to l-bit in dbl prec
6247
6248 #########################
6249 #       ADD SINGLE      #
6250 #########################
6251 add_sgl:
6252         add.l           &ad_1_sgl, FTEMP_HI(%a0)
6253         bcc.b           scc_clr                 # no mantissa overflow
6254         roxr.w          FTEMP_HI(%a0)           # shift v-bit back in
6255         roxr.w          FTEMP_HI+2(%a0)         # shift v-bit back in
6256         add.w           &0x1, FTEMP_EX(%a0)     # and incr exponent
6257 scc_clr:
6258         tst.l           %d0                     # test for rs = 0
6259         bne.b           sgl_done
6260         and.w           &0xfe00, FTEMP_HI+2(%a0) # clear the l-bit
6261 sgl_done:
6262         and.l           &0xffffff00, FTEMP_HI(%a0) # truncate bits beyond sgl limit
6263         clr.l           FTEMP_LO(%a0)           # clear d2
6264         rts
6265
6266 #########################
6267 #       ADD EXTENDED    #
6268 #########################
6269 add_ext:
6270         addq.l          &1,FTEMP_LO(%a0)        # add 1 to l-bit
6271         bcc.b           xcc_clr                 # test for carry out
6272         addq.l          &1,FTEMP_HI(%a0)        # propagate carry
6273         bcc.b           xcc_clr
6274         roxr.w          FTEMP_HI(%a0)           # mant is 0 so restore v-bit
6275         roxr.w          FTEMP_HI+2(%a0)         # mant is 0 so restore v-bit
6276         roxr.w          FTEMP_LO(%a0)
6277         roxr.w          FTEMP_LO+2(%a0)
6278         add.w           &0x1,FTEMP_EX(%a0)      # and inc exp
6279 xcc_clr:
6280         tst.l           %d0                     # test rs = 0
6281         bne.b           add_ext_done
6282         and.b           &0xfe,FTEMP_LO+3(%a0)   # clear the l bit
6283 add_ext_done:
6284         rts
6285
6286 #########################
6287 #       ADD DOUBLE      #
6288 #########################
6289 add_dbl:
6290         add.l           &ad_1_dbl, FTEMP_LO(%a0) # add 1 to lsb
6291         bcc.b           dcc_clr                 # no carry
6292         addq.l          &0x1, FTEMP_HI(%a0)     # propagate carry
6293         bcc.b           dcc_clr                 # no carry
6294
6295         roxr.w          FTEMP_HI(%a0)           # mant is 0 so restore v-bit
6296         roxr.w          FTEMP_HI+2(%a0)         # mant is 0 so restore v-bit
6297         roxr.w          FTEMP_LO(%a0)
6298         roxr.w          FTEMP_LO+2(%a0)
6299         addq.w          &0x1, FTEMP_EX(%a0)     # incr exponent
6300 dcc_clr:
6301         tst.l           %d0                     # test for rs = 0
6302         bne.b           dbl_done
6303         and.w           &0xf000, FTEMP_LO+2(%a0) # clear the l-bit
6304
6305 dbl_done:
6306         and.l           &0xfffff800,FTEMP_LO(%a0) # truncate bits beyond dbl limit
6307         rts
6308
6309 ###########################
6310 # Truncate all other bits #
6311 ###########################
6312 truncate:
6313         swap            %d1                     # select rnd prec
6314
6315         cmpi.b          %d1, &s_mode            # is prec sgl?
6316         beq.w           sgl_done                # yes
6317         bgt.b           dbl_done                # no; it's dbl
6318         rts                                     # no; it's ext
6319
6320
6321 #
6322 # ext_grs(): extract guard, round and sticky bits according to
6323 #            rounding precision.
6324 #
6325 # INPUT
6326 #       d0         = extended precision g,r,s (in d0{31:29})
6327 #       d1         = {PREC,ROUND}
6328 # OUTPUT
6329 #       d0{31:29}  = guard, round, sticky
6330 #
6331 # The ext_grs extract the guard/round/sticky bits according to the
6332 # selected rounding precision. It is called by the round subroutine
6333 # only.  All registers except d0 are kept intact. d0 becomes an
6334 # updated guard,round,sticky in d0{31:29}
6335 #
6336 # Notes: the ext_grs uses the round PREC, and therefore has to swap d1
6337 #        prior to usage, and needs to restore d1 to original. this
6338 #        routine is tightly tied to the round routine and not meant to
6339 #        uphold standard subroutine calling practices.
6340 #
6341
6342 ext_grs:
6343         swap            %d1                     # have d1.w point to round precision
6344         tst.b           %d1                     # is rnd prec = extended?
6345         bne.b           ext_grs_not_ext         # no; go handle sgl or dbl
6346
6347 #
6348 # %d0 actually already hold g,r,s since _round() had it before calling
6349 # this function. so, as long as we don't disturb it, we are "returning" it.
6350 #
6351 ext_grs_ext:
6352         swap            %d1                     # yes; return to correct positions
6353         rts
6354
6355 ext_grs_not_ext:
6356         movm.l          &0x3000, -(%sp)         # make some temp registers {d2/d3}
6357
6358         cmpi.b          %d1, &s_mode            # is rnd prec = sgl?
6359         bne.b           ext_grs_dbl             # no; go handle dbl
6360
6361 #
6362 # sgl:
6363 #       96              64        40    32              0
6364 #       -----------------------------------------------------
6365 #       | EXP   |XXXXXXX|         |xx   |               |grs|
6366 #       -----------------------------------------------------
6367 #                       <--(24)--->nn\                     /
6368 #                                  ee ---------------------
6369 #                                  ww           |
6370 #                                               v
6371 #                                  gr      new sticky
6372 #
6373 ext_grs_sgl:
6374         bfextu          FTEMP_HI(%a0){&24:&2}, %d3 # sgl prec. g-r are 2 bits right
6375         mov.l           &30, %d2                # of the sgl prec. limits
6376         lsl.l           %d2, %d3                # shift g-r bits to MSB of d3
6377         mov.l           FTEMP_HI(%a0), %d2      # get word 2 for s-bit test
6378         and.l           &0x0000003f, %d2        # s bit is the or of all other
6379         bne.b           ext_grs_st_stky         # bits to the right of g-r
6380         tst.l           FTEMP_LO(%a0)           # test lower mantissa
6381         bne.b           ext_grs_st_stky         # if any are set, set sticky
6382         tst.l           %d0                     # test original g,r,s
6383         bne.b           ext_grs_st_stky         # if any are set, set sticky
6384         bra.b           ext_grs_end_sd          # if words 3 and 4 are clr, exit
6385
6386 #
6387 # dbl:
6388 #       96              64              32       11     0
6389 #       -----------------------------------------------------
6390 #       | EXP   |XXXXXXX|               |        |xx    |grs|
6391 #       -----------------------------------------------------
6392 #                                                 nn\       /
6393 #                                                 ee -------
6394 #                                                 ww    |
6395 #                                                       v
6396 #                                                 gr    new sticky
6397 #
6398 ext_grs_dbl:
6399         bfextu          FTEMP_LO(%a0){&21:&2}, %d3 # dbl-prec. g-r are 2 bits right
6400         mov.l           &30, %d2                # of the dbl prec. limits
6401         lsl.l           %d2, %d3                # shift g-r bits to the MSB of d3
6402         mov.l           FTEMP_LO(%a0), %d2      # get lower mantissa  for s-bit test
6403         and.l           &0x000001ff, %d2        # s bit is the or-ing of all
6404         bne.b           ext_grs_st_stky         # other bits to the right of g-r
6405         tst.l           %d0                     # test word original g,r,s
6406         bne.b           ext_grs_st_stky         # if any are set, set sticky
6407         bra.b           ext_grs_end_sd          # if clear, exit
6408
6409 ext_grs_st_stky:
6410         bset            &rnd_stky_bit, %d3      # set sticky bit
6411 ext_grs_end_sd:
6412         mov.l           %d3, %d0                # return grs to d0
6413
6414         movm.l          (%sp)+, &0xc            # restore scratch registers {d2/d3}
6415
6416         swap            %d1                     # restore d1 to original
6417         rts
6418
6419 #########################################################################
6420 # norm(): normalize the mantissa of an extended precision input. the    #
6421 #         input operand should not be normalized already.               #
6422 #                                                                       #
6423 # XDEF **************************************************************** #
6424 #       norm()                                                          #
6425 #                                                                       #
6426 # XREF **************************************************************** #
6427 #       none                                                            #
6428 #                                                                       #
6429 # INPUT *************************************************************** #
6430 #       a0 = pointer fp extended precision operand to normalize         #
6431 #                                                                       #
6432 # OUTPUT ************************************************************** #
6433 #       d0 = number of bit positions the mantissa was shifted           #
6434 #       a0 = the input operand's mantissa is normalized; the exponent   #
6435 #            is unchanged.                                              #
6436 #                                                                       #
6437 #########################################################################
6438         global          norm
6439 norm:
6440         mov.l           %d2, -(%sp)             # create some temp regs
6441         mov.l           %d3, -(%sp)
6442
6443         mov.l           FTEMP_HI(%a0), %d0      # load hi(mantissa)
6444         mov.l           FTEMP_LO(%a0), %d1      # load lo(mantissa)
6445
6446         bfffo           %d0{&0:&32}, %d2        # how many places to shift?
6447         beq.b           norm_lo                 # hi(man) is all zeroes!
6448
6449 norm_hi:
6450         lsl.l           %d2, %d0                # left shift hi(man)
6451         bfextu          %d1{&0:%d2}, %d3        # extract lo bits
6452
6453         or.l            %d3, %d0                # create hi(man)
6454         lsl.l           %d2, %d1                # create lo(man)
6455
6456         mov.l           %d0, FTEMP_HI(%a0)      # store new hi(man)
6457         mov.l           %d1, FTEMP_LO(%a0)      # store new lo(man)
6458
6459         mov.l           %d2, %d0                # return shift amount
6460
6461         mov.l           (%sp)+, %d3             # restore temp regs
6462         mov.l           (%sp)+, %d2
6463
6464         rts
6465
6466 norm_lo:
6467         bfffo           %d1{&0:&32}, %d2        # how many places to shift?
6468         lsl.l           %d2, %d1                # shift lo(man)
6469         add.l           &32, %d2                # add 32 to shft amount
6470
6471         mov.l           %d1, FTEMP_HI(%a0)      # store hi(man)
6472         clr.l           FTEMP_LO(%a0)           # lo(man) is now zero
6473
6474         mov.l           %d2, %d0                # return shift amount
6475
6476         mov.l           (%sp)+, %d3             # restore temp regs
6477         mov.l           (%sp)+, %d2
6478
6479         rts
6480
6481 #########################################################################
6482 # unnorm_fix(): - changes an UNNORM to one of NORM, DENORM, or ZERO     #
6483 #               - returns corresponding optype tag                      #
6484 #                                                                       #
6485 # XDEF **************************************************************** #
6486 #       unnorm_fix()                                                    #
6487 #                                                                       #
6488 # XREF **************************************************************** #
6489 #       norm() - normalize the mantissa                                 #
6490 #                                                                       #
6491 # INPUT *************************************************************** #
6492 #       a0 = pointer to unnormalized extended precision number          #
6493 #                                                                       #
6494 # OUTPUT ************************************************************** #
6495 #       d0 = optype tag - is corrected to one of NORM, DENORM, or ZERO  #
6496 #       a0 = input operand has been converted to a norm, denorm, or     #
6497 #            zero; both the exponent and mantissa are changed.          #
6498 #                                                                       #
6499 #########################################################################
6500
6501         global          unnorm_fix
6502 unnorm_fix:
6503         bfffo           FTEMP_HI(%a0){&0:&32}, %d0 # how many shifts are needed?
6504         bne.b           unnorm_shift            # hi(man) is not all zeroes
6505
6506 #
6507 # hi(man) is all zeroes so see if any bits in lo(man) are set
6508 #
6509 unnorm_chk_lo:
6510         bfffo           FTEMP_LO(%a0){&0:&32}, %d0 # is operand really a zero?
6511         beq.w           unnorm_zero             # yes
6512
6513         add.w           &32, %d0                # no; fix shift distance
6514
6515 #
6516 # d0 = # shifts needed for complete normalization
6517 #
6518 unnorm_shift:
6519         clr.l           %d1                     # clear top word
6520         mov.w           FTEMP_EX(%a0), %d1      # extract exponent
6521         and.w           &0x7fff, %d1            # strip off sgn
6522
6523         cmp.w           %d0, %d1                # will denorm push exp < 0?
6524         bgt.b           unnorm_nrm_zero         # yes; denorm only until exp = 0
6525
6526 #
6527 # exponent would not go < 0. Therefore, number stays normalized
6528 #
6529         sub.w           %d0, %d1                # shift exponent value
6530         mov.w           FTEMP_EX(%a0), %d0      # load old exponent
6531         and.w           &0x8000, %d0            # save old sign
6532         or.w            %d0, %d1                # {sgn,new exp}
6533         mov.w           %d1, FTEMP_EX(%a0)      # insert new exponent
6534
6535         bsr.l           norm                    # normalize UNNORM
6536
6537         mov.b           &NORM, %d0              # return new optype tag
6538         rts
6539
6540 #
6541 # exponent would go < 0, so only denormalize until exp = 0
6542 #
6543 unnorm_nrm_zero:
6544         cmp.b           %d1, &32                # is exp <= 32?
6545         bgt.b           unnorm_nrm_zero_lrg     # no; go handle large exponent
6546
6547         bfextu          FTEMP_HI(%a0){%d1:&32}, %d0 # extract new hi(man)
6548         mov.l           %d0, FTEMP_HI(%a0)      # save new hi(man)
6549
6550         mov.l           FTEMP_LO(%a0), %d0      # fetch old lo(man)
6551         lsl.l           %d1, %d0                # extract new lo(man)
6552         mov.l           %d0, FTEMP_LO(%a0)      # save new lo(man)
6553
6554         and.w           &0x8000, FTEMP_EX(%a0)  # set exp = 0
6555
6556         mov.b           &DENORM, %d0            # return new optype tag
6557         rts
6558
6559 #
6560 # only mantissa bits set are in lo(man)
6561 #
6562 unnorm_nrm_zero_lrg:
6563         sub.w           &32, %d1                # adjust shft amt by 32
6564
6565         mov.l           FTEMP_LO(%a0), %d0      # fetch old lo(man)
6566         lsl.l           %d1, %d0                # left shift lo(man)
6567
6568         mov.l           %d0, FTEMP_HI(%a0)      # store new hi(man)
6569         clr.l           FTEMP_LO(%a0)           # lo(man) = 0
6570
6571         and.w           &0x8000, FTEMP_EX(%a0)  # set exp = 0
6572
6573         mov.b           &DENORM, %d0            # return new optype tag
6574         rts
6575
6576 #
6577 # whole mantissa is zero so this UNNORM is actually a zero
6578 #
6579 unnorm_zero:
6580         and.w           &0x8000, FTEMP_EX(%a0)  # force exponent to zero
6581
6582         mov.b           &ZERO, %d0              # fix optype tag
6583         rts
6584
6585 #########################################################################
6586 # XDEF **************************************************************** #
6587 #       set_tag_x(): return the optype of the input ext fp number       #
6588 #                                                                       #
6589 # XREF **************************************************************** #
6590 #       None                                                            #
6591 #                                                                       #
6592 # INPUT *************************************************************** #
6593 #       a0 = pointer to extended precision operand                      #
6594 #                                                                       #
6595 # OUTPUT ************************************************************** #
6596 #       d0 = value of type tag                                          #
6597 #               one of: NORM, INF, QNAN, SNAN, DENORM, UNNORM, ZERO     #
6598 #                                                                       #
6599 # ALGORITHM *********************************************************** #
6600 #       Simply test the exponent, j-bit, and mantissa values to         #
6601 # determine the type of operand.                                        #
6602 #       If it's an unnormalized zero, alter the operand and force it    #
6603 # to be a normal zero.                                                  #
6604 #                                                                       #
6605 #########################################################################
6606
6607         global          set_tag_x
6608 set_tag_x:
6609         mov.w           FTEMP_EX(%a0), %d0      # extract exponent
6610         andi.w          &0x7fff, %d0            # strip off sign
6611         cmpi.w          %d0, &0x7fff            # is (EXP == MAX)?
6612         beq.b           inf_or_nan_x
6613 not_inf_or_nan_x:
6614         btst            &0x7,FTEMP_HI(%a0)
6615         beq.b           not_norm_x
6616 is_norm_x:
6617         mov.b           &NORM, %d0
6618         rts
6619 not_norm_x:
6620         tst.w           %d0                     # is exponent = 0?
6621         bne.b           is_unnorm_x
6622 not_unnorm_x:
6623         tst.l           FTEMP_HI(%a0)
6624         bne.b           is_denorm_x
6625         tst.l           FTEMP_LO(%a0)
6626         bne.b           is_denorm_x
6627 is_zero_x:
6628         mov.b           &ZERO, %d0
6629         rts
6630 is_denorm_x:
6631         mov.b           &DENORM, %d0
6632         rts
6633 # must distinguish now "Unnormalized zeroes" which we
6634 # must convert to zero.
6635 is_unnorm_x:
6636         tst.l           FTEMP_HI(%a0)
6637         bne.b           is_unnorm_reg_x
6638         tst.l           FTEMP_LO(%a0)
6639         bne.b           is_unnorm_reg_x
6640 # it's an "unnormalized zero". let's convert it to an actual zero...
6641         andi.w          &0x8000,FTEMP_EX(%a0)   # clear exponent
6642         mov.b           &ZERO, %d0
6643         rts
6644 is_unnorm_reg_x:
6645         mov.b           &UNNORM, %d0
6646         rts
6647 inf_or_nan_x:
6648         tst.l           FTEMP_LO(%a0)
6649         bne.b           is_nan_x
6650         mov.l           FTEMP_HI(%a0), %d0
6651         and.l           &0x7fffffff, %d0        # msb is a don't care!
6652         bne.b           is_nan_x
6653 is_inf_x:
6654         mov.b           &INF, %d0
6655         rts
6656 is_nan_x:
6657         btst            &0x6, FTEMP_HI(%a0)
6658         beq.b           is_snan_x
6659         mov.b           &QNAN, %d0
6660         rts
6661 is_snan_x:
6662         mov.b           &SNAN, %d0
6663         rts
6664
6665 #########################################################################
6666 # XDEF **************************************************************** #
6667 #       set_tag_d(): return the optype of the input dbl fp number       #
6668 #                                                                       #
6669 # XREF **************************************************************** #
6670 #       None                                                            #
6671 #                                                                       #
6672 # INPUT *************************************************************** #
6673 #       a0 = points to double precision operand                         #
6674 #                                                                       #
6675 # OUTPUT ************************************************************** #
6676 #       d0 = value of type tag                                          #
6677 #               one of: NORM, INF, QNAN, SNAN, DENORM, ZERO             #
6678 #                                                                       #
6679 # ALGORITHM *********************************************************** #
6680 #       Simply test the exponent, j-bit, and mantissa values to         #
6681 # determine the type of operand.                                        #
6682 #                                                                       #
6683 #########################################################################
6684
6685         global          set_tag_d
6686 set_tag_d:
6687         mov.l           FTEMP(%a0), %d0
6688         mov.l           %d0, %d1
6689
6690         andi.l          &0x7ff00000, %d0
6691         beq.b           zero_or_denorm_d
6692
6693         cmpi.l          %d0, &0x7ff00000
6694         beq.b           inf_or_nan_d
6695
6696 is_norm_d:
6697         mov.b           &NORM, %d0
6698         rts
6699 zero_or_denorm_d:
6700         and.l           &0x000fffff, %d1
6701         bne             is_denorm_d
6702         tst.l           4+FTEMP(%a0)
6703         bne             is_denorm_d
6704 is_zero_d:
6705         mov.b           &ZERO, %d0
6706         rts
6707 is_denorm_d:
6708         mov.b           &DENORM, %d0
6709         rts
6710 inf_or_nan_d:
6711         and.l           &0x000fffff, %d1
6712         bne             is_nan_d
6713         tst.l           4+FTEMP(%a0)
6714         bne             is_nan_d
6715 is_inf_d:
6716         mov.b           &INF, %d0
6717         rts
6718 is_nan_d:
6719         btst            &19, %d1
6720         bne             is_qnan_d
6721 is_snan_d:
6722         mov.b           &SNAN, %d0
6723         rts
6724 is_qnan_d:
6725         mov.b           &QNAN, %d0
6726         rts
6727
6728 #########################################################################
6729 # XDEF **************************************************************** #
6730 #       set_tag_s(): return the optype of the input sgl fp number       #
6731 #                                                                       #
6732 # XREF **************************************************************** #
6733 #       None                                                            #
6734 #                                                                       #
6735 # INPUT *************************************************************** #
6736 #       a0 = pointer to single precision operand                        #
6737 #                                                                       #
6738 # OUTPUT ************************************************************** #
6739 #       d0 = value of type tag                                          #
6740 #               one of: NORM, INF, QNAN, SNAN, DENORM, ZERO             #
6741 #                                                                       #
6742 # ALGORITHM *********************************************************** #
6743 #       Simply test the exponent, j-bit, and mantissa values to         #
6744 # determine the type of operand.                                        #
6745 #                                                                       #
6746 #########################################################################
6747
6748         global          set_tag_s
6749 set_tag_s:
6750         mov.l           FTEMP(%a0), %d0
6751         mov.l           %d0, %d1
6752
6753         andi.l          &0x7f800000, %d0
6754         beq.b           zero_or_denorm_s
6755
6756         cmpi.l          %d0, &0x7f800000
6757         beq.b           inf_or_nan_s
6758
6759 is_norm_s:
6760         mov.b           &NORM, %d0
6761         rts
6762 zero_or_denorm_s:
6763         and.l           &0x007fffff, %d1
6764         bne             is_denorm_s
6765 is_zero_s:
6766         mov.b           &ZERO, %d0
6767         rts
6768 is_denorm_s:
6769         mov.b           &DENORM, %d0
6770         rts
6771 inf_or_nan_s:
6772         and.l           &0x007fffff, %d1
6773         bne             is_nan_s
6774 is_inf_s:
6775         mov.b           &INF, %d0
6776         rts
6777 is_nan_s:
6778         btst            &22, %d1
6779         bne             is_qnan_s
6780 is_snan_s:
6781         mov.b           &SNAN, %d0
6782         rts
6783 is_qnan_s:
6784         mov.b           &QNAN, %d0
6785         rts
6786
6787 #########################################################################
6788 # XDEF **************************************************************** #
6789 #       unf_res(): routine to produce default underflow result of a     #
6790 #                  scaled extended precision number; this is used by    #
6791 #                  fadd/fdiv/fmul/etc. emulation routines.              #
6792 #       unf_res4(): same as above but for fsglmul/fsgldiv which use     #
6793 #                   single round prec and extended prec mode.           #
6794 #                                                                       #
6795 # XREF **************************************************************** #
6796 #       _denorm() - denormalize according to scale factor               #
6797 #       _round() - round denormalized number according to rnd prec      #
6798 #                                                                       #
6799 # INPUT *************************************************************** #
6800 #       a0 = pointer to extended precison operand                       #
6801 #       d0 = scale factor                                               #
6802 #       d1 = rounding precision/mode                                    #
6803 #                                                                       #
6804 # OUTPUT ************************************************************** #
6805 #       a0 = pointer to default underflow result in extended precision  #
6806 #       d0.b = result FPSR_cc which caller may or may not want to save  #
6807 #                                                                       #
6808 # ALGORITHM *********************************************************** #
6809 #       Convert the input operand to "internal format" which means the  #
6810 # exponent is extended to 16 bits and the sign is stored in the unused  #
6811 # portion of the extended precison operand. Denormalize the number      #
6812 # according to the scale factor passed in d0. Then, round the           #
6813 # denormalized result.                                                  #
6814 #       Set the FPSR_exc bits as appropriate but return the cc bits in  #
6815 # d0 in case the caller doesn't want to save them (as is the case for   #
6816 # fmove out).                                                           #
6817 #       unf_res4() for fsglmul/fsgldiv forces the denorm to extended    #
6818 # precision and the rounding mode to single.                            #
6819 #                                                                       #
6820 #########################################################################
6821         global          unf_res
6822 unf_res:
6823         mov.l           %d1, -(%sp)             # save rnd prec,mode on stack
6824
6825         btst            &0x7, FTEMP_EX(%a0)     # make "internal" format
6826         sne             FTEMP_SGN(%a0)
6827
6828         mov.w           FTEMP_EX(%a0), %d1      # extract exponent
6829         and.w           &0x7fff, %d1
6830         sub.w           %d0, %d1
6831         mov.w           %d1, FTEMP_EX(%a0)      # insert 16 bit exponent
6832
6833         mov.l           %a0, -(%sp)             # save operand ptr during calls
6834
6835         mov.l           0x4(%sp),%d0            # pass rnd prec.
6836         andi.w          &0x00c0,%d0
6837         lsr.w           &0x4,%d0
6838         bsr.l           _denorm                 # denorm result
6839
6840         mov.l           (%sp),%a0
6841         mov.w           0x6(%sp),%d1            # load prec:mode into %d1
6842         andi.w          &0xc0,%d1               # extract rnd prec
6843         lsr.w           &0x4,%d1
6844         swap            %d1
6845         mov.w           0x6(%sp),%d1
6846         andi.w          &0x30,%d1
6847         lsr.w           &0x4,%d1
6848         bsr.l           _round                  # round the denorm
6849
6850         mov.l           (%sp)+, %a0
6851
6852 # result is now rounded properly. convert back to normal format
6853         bclr            &0x7, FTEMP_EX(%a0)     # clear sgn first; may have residue
6854         tst.b           FTEMP_SGN(%a0)          # is "internal result" sign set?
6855         beq.b           unf_res_chkifzero       # no; result is positive
6856         bset            &0x7, FTEMP_EX(%a0)     # set result sgn
6857         clr.b           FTEMP_SGN(%a0)          # clear temp sign
6858
6859 # the number may have become zero after rounding. set ccodes accordingly.
6860 unf_res_chkifzero:
6861         clr.l           %d0
6862         tst.l           FTEMP_HI(%a0)           # is value now a zero?
6863         bne.b           unf_res_cont            # no
6864         tst.l           FTEMP_LO(%a0)
6865         bne.b           unf_res_cont            # no
6866 #       bset            &z_bit, FPSR_CC(%a6)    # yes; set zero ccode bit
6867         bset            &z_bit, %d0             # yes; set zero ccode bit
6868
6869 unf_res_cont:
6870
6871 #
6872 # can inex1 also be set along with unfl and inex2???
6873 #
6874 # we know that underflow has occurred. aunfl should be set if INEX2 is also set.
6875 #
6876         btst            &inex2_bit, FPSR_EXCEPT(%a6) # is INEX2 set?
6877         beq.b           unf_res_end             # no
6878         bset            &aunfl_bit, FPSR_AEXCEPT(%a6) # yes; set aunfl
6879
6880 unf_res_end:
6881         add.l           &0x4, %sp               # clear stack
6882         rts
6883
6884 # unf_res() for fsglmul() and fsgldiv().
6885         global          unf_res4
6886 unf_res4:
6887         mov.l           %d1,-(%sp)              # save rnd prec,mode on stack
6888
6889         btst            &0x7,FTEMP_EX(%a0)      # make "internal" format
6890         sne             FTEMP_SGN(%a0)
6891
6892         mov.w           FTEMP_EX(%a0),%d1       # extract exponent
6893         and.w           &0x7fff,%d1
6894         sub.w           %d0,%d1
6895         mov.w           %d1,FTEMP_EX(%a0)       # insert 16 bit exponent
6896
6897         mov.l           %a0,-(%sp)              # save operand ptr during calls
6898
6899         clr.l           %d0                     # force rnd prec = ext
6900         bsr.l           _denorm                 # denorm result
6901
6902         mov.l           (%sp),%a0
6903         mov.w           &s_mode,%d1             # force rnd prec = sgl
6904         swap            %d1
6905         mov.w           0x6(%sp),%d1            # load rnd mode
6906         andi.w          &0x30,%d1               # extract rnd prec
6907         lsr.w           &0x4,%d1
6908         bsr.l           _round                  # round the denorm
6909
6910         mov.l           (%sp)+,%a0
6911
6912 # result is now rounded properly. convert back to normal format
6913         bclr            &0x7,FTEMP_EX(%a0)      # clear sgn first; may have residue
6914         tst.b           FTEMP_SGN(%a0)          # is "internal result" sign set?
6915         beq.b           unf_res4_chkifzero      # no; result is positive
6916         bset            &0x7,FTEMP_EX(%a0)      # set result sgn
6917         clr.b           FTEMP_SGN(%a0)          # clear temp sign
6918
6919 # the number may have become zero after rounding. set ccodes accordingly.
6920 unf_res4_chkifzero:
6921         clr.l           %d0
6922         tst.l           FTEMP_HI(%a0)           # is value now a zero?
6923         bne.b           unf_res4_cont           # no
6924         tst.l           FTEMP_LO(%a0)
6925         bne.b           unf_res4_cont           # no
6926 #       bset            &z_bit,FPSR_CC(%a6)     # yes; set zero ccode bit
6927         bset            &z_bit,%d0              # yes; set zero ccode bit
6928
6929 unf_res4_cont:
6930
6931 #
6932 # can inex1 also be set along with unfl and inex2???
6933 #
6934 # we know that underflow has occurred. aunfl should be set if INEX2 is also set.
6935 #
6936         btst            &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
6937         beq.b           unf_res4_end            # no
6938         bset            &aunfl_bit,FPSR_AEXCEPT(%a6) # yes; set aunfl
6939
6940 unf_res4_end:
6941         add.l           &0x4,%sp                # clear stack
6942         rts
6943
6944 #########################################################################
6945 # XDEF **************************************************************** #
6946 #       ovf_res(): routine to produce the default overflow result of    #
6947 #                  an overflowing number.                               #
6948 #       ovf_res2(): same as above but the rnd mode/prec are passed      #
6949 #                   differently.                                        #
6950 #                                                                       #
6951 # XREF **************************************************************** #
6952 #       none                                                            #
6953 #                                                                       #
6954 # INPUT *************************************************************** #
6955 #       d1.b    = '-1' => (-); '0' => (+)                               #
6956 #   ovf_res():                                                          #
6957 #       d0      = rnd mode/prec                                         #
6958 #   ovf_res2():                                                         #
6959 #       hi(d0)  = rnd prec                                              #
6960 #       lo(d0)  = rnd mode                                              #
6961 #                                                                       #
6962 # OUTPUT ************************************************************** #
6963 #       a0      = points to extended precision result                   #
6964 #       d0.b    = condition code bits                                   #
6965 #                                                                       #
6966 # ALGORITHM *********************************************************** #
6967 #       The default overflow result can be determined by the sign of    #
6968 # the result and the rounding mode/prec in effect. These bits are       #
6969 # concatenated together to create an index into the default result      #
6970 # table. A pointer to the correct result is returned in a0. The         #
6971 # resulting condition codes are returned in d0 in case the caller       #
6972 # doesn't want FPSR_cc altered (as is the case for fmove out).          #
6973 #                                                                       #
6974 #########################################################################
6975
6976         global          ovf_res
6977 ovf_res:
6978         andi.w          &0x10,%d1               # keep result sign
6979         lsr.b           &0x4,%d0                # shift prec/mode
6980         or.b            %d0,%d1                 # concat the two
6981         mov.w           %d1,%d0                 # make a copy
6982         lsl.b           &0x1,%d1                # multiply d1 by 2
6983         bra.b           ovf_res_load
6984
6985         global          ovf_res2
6986 ovf_res2:
6987         and.w           &0x10, %d1              # keep result sign
6988         or.b            %d0, %d1                # insert rnd mode
6989         swap            %d0
6990         or.b            %d0, %d1                # insert rnd prec
6991         mov.w           %d1, %d0                # make a copy
6992         lsl.b           &0x1, %d1               # shift left by 1
6993
6994 #
6995 # use the rounding mode, precision, and result sign as in index into the
6996 # two tables below to fetch the default result and the result ccodes.
6997 #
6998 ovf_res_load:
6999         mov.b           (tbl_ovfl_cc.b,%pc,%d0.w*1), %d0 # fetch result ccodes
7000         lea             (tbl_ovfl_result.b,%pc,%d1.w*8), %a0 # return result ptr
7001
7002         rts
7003
7004 tbl_ovfl_cc:
7005         byte            0x2, 0x0, 0x0, 0x2
7006         byte            0x2, 0x0, 0x0, 0x2
7007         byte            0x2, 0x0, 0x0, 0x2
7008         byte            0x0, 0x0, 0x0, 0x0
7009         byte            0x2+0x8, 0x8, 0x2+0x8, 0x8
7010         byte            0x2+0x8, 0x8, 0x2+0x8, 0x8
7011         byte            0x2+0x8, 0x8, 0x2+0x8, 0x8
7012
7013 tbl_ovfl_result:
7014         long            0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7015         long            0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RZ
7016         long            0x7ffe0000,0xffffffff,0xffffffff,0x00000000 # +EXT; RM
7017         long            0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7018
7019         long            0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7020         long            0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RZ
7021         long            0x407e0000,0xffffff00,0x00000000,0x00000000 # +SGL; RM
7022         long            0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7023
7024         long            0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RN
7025         long            0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RZ
7026         long            0x43fe0000,0xffffffff,0xfffff800,0x00000000 # +DBL; RM
7027         long            0x7fff0000,0x00000000,0x00000000,0x00000000 # +INF; RP
7028
7029         long            0x00000000,0x00000000,0x00000000,0x00000000
7030         long            0x00000000,0x00000000,0x00000000,0x00000000
7031         long            0x00000000,0x00000000,0x00000000,0x00000000
7032         long            0x00000000,0x00000000,0x00000000,0x00000000
7033
7034         long            0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7035         long            0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RZ
7036         long            0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7037         long            0xfffe0000,0xffffffff,0xffffffff,0x00000000 # -EXT; RP
7038
7039         long            0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7040         long            0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RZ
7041         long            0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7042         long            0xc07e0000,0xffffff00,0x00000000,0x00000000 # -SGL; RP
7043
7044         long            0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RN
7045         long            0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RZ
7046         long            0xffff0000,0x00000000,0x00000000,0x00000000 # -INF; RM
7047         long            0xc3fe0000,0xffffffff,0xfffff800,0x00000000 # -DBL; RP
7048
7049 #########################################################################
7050 # XDEF **************************************************************** #
7051 #       fout(): move from fp register to memory or data register        #
7052 #                                                                       #
7053 # XREF **************************************************************** #
7054 #       _round() - needed to create EXOP for sgl/dbl precision          #
7055 #       norm() - needed to create EXOP for extended precision           #
7056 #       ovf_res() - create default overflow result for sgl/dbl precision#
7057 #       unf_res() - create default underflow result for sgl/dbl prec.   #
7058 #       dst_dbl() - create rounded dbl precision result.                #
7059 #       dst_sgl() - create rounded sgl precision result.                #
7060 #       fetch_dreg() - fetch dynamic k-factor reg for packed.           #
7061 #       bindec() - convert FP binary number to packed number.           #
7062 #       _mem_write() - write data to memory.                            #
7063 #       _mem_write2() - write data to memory unless supv mode -(a7) exc.#
7064 #       _dmem_write_{byte,word,long}() - write data to memory.          #
7065 #       store_dreg_{b,w,l}() - store data to data register file.        #
7066 #       facc_out_{b,w,l,d,x}() - data access error occurred.            #
7067 #                                                                       #
7068 # INPUT *************************************************************** #
7069 #       a0 = pointer to extended precision source operand               #
7070 #       d0 = round prec,mode                                            #
7071 #                                                                       #
7072 # OUTPUT ************************************************************** #
7073 #       fp0 : intermediate underflow or overflow result if              #
7074 #             OVFL/UNFL occurred for a sgl or dbl operand               #
7075 #                                                                       #
7076 # ALGORITHM *********************************************************** #
7077 #       This routine is accessed by many handlers that need to do an    #
7078 # opclass three move of an operand out to memory.                       #
7079 #       Decode an fmove out (opclass 3) instruction to determine if     #
7080 # it's b,w,l,s,d,x, or p in size. b,w,l can be stored to either a data  #
7081 # register or memory. The algorithm uses a standard "fmove" to create   #
7082 # the rounded result. Also, since exceptions are disabled, this also    #
7083 # create the correct OPERR default result if appropriate.               #
7084 #       For sgl or dbl precision, overflow or underflow can occur. If   #
7085 # either occurs and is enabled, the EXOP.                               #
7086 #       For extended precision, the stacked <ea> must be fixed along    #
7087 # w/ the address index register as appropriate w/ _calc_ea_fout(). If   #
7088 # the source is a denorm and if underflow is enabled, an EXOP must be   #
7089 # created.                                                              #
7090 #       For packed, the k-factor must be fetched from the instruction   #
7091 # word or a data register. The <ea> must be fixed as w/ extended        #
7092 # precision. Then, bindec() is called to create the appropriate         #
7093 # packed result.                                                        #
7094 #       If at any time an access error is flagged by one of the move-   #
7095 # to-memory routines, then a special exit must be made so that the      #
7096 # access error can be handled properly.                                 #
7097 #                                                                       #
7098 #########################################################################
7099
7100         global          fout
7101 fout:
7102         bfextu          EXC_CMDREG(%a6){&3:&3},%d1 # extract dst fmt
7103         mov.w           (tbl_fout.b,%pc,%d1.w*2),%a1 # use as index
7104         jmp             (tbl_fout.b,%pc,%a1)    # jump to routine
7105
7106         swbeg           &0x8
7107 tbl_fout:
7108         short           fout_long       -       tbl_fout
7109         short           fout_sgl        -       tbl_fout
7110         short           fout_ext        -       tbl_fout
7111         short           fout_pack       -       tbl_fout
7112         short           fout_word       -       tbl_fout
7113         short           fout_dbl        -       tbl_fout
7114         short           fout_byte       -       tbl_fout
7115         short           fout_pack       -       tbl_fout
7116
7117 #################################################################
7118 # fmove.b out ###################################################
7119 #################################################################
7120
7121 # Only "Unimplemented Data Type" exceptions enter here. The operand
7122 # is either a DENORM or a NORM.
7123 fout_byte:
7124         tst.b           STAG(%a6)               # is operand normalized?
7125         bne.b           fout_byte_denorm        # no
7126
7127         fmovm.x         SRC(%a0),&0x80          # load value
7128
7129 fout_byte_norm:
7130         fmov.l          %d0,%fpcr               # insert rnd prec,mode
7131
7132         fmov.b          %fp0,%d0                # exec move out w/ correct rnd mode
7133
7134         fmov.l          &0x0,%fpcr              # clear FPCR
7135         fmov.l          %fpsr,%d1               # fetch FPSR
7136         or.w            %d1,2+USER_FPSR(%a6)    # save new exc,accrued bits
7137
7138         mov.b           1+EXC_OPWORD(%a6),%d1   # extract dst mode
7139         andi.b          &0x38,%d1               # is mode == 0? (Dreg dst)
7140         beq.b           fout_byte_dn            # must save to integer regfile
7141
7142         mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
7143         bsr.l           _dmem_write_byte        # write byte
7144
7145         tst.l           %d1                     # did dstore fail?
7146         bne.l           facc_out_b              # yes
7147
7148         rts
7149
7150 fout_byte_dn:
7151         mov.b           1+EXC_OPWORD(%a6),%d1   # extract Dn
7152         andi.w          &0x7,%d1
7153         bsr.l           store_dreg_b
7154         rts
7155
7156 fout_byte_denorm:
7157         mov.l           SRC_EX(%a0),%d1
7158         andi.l          &0x80000000,%d1         # keep DENORM sign
7159         ori.l           &0x00800000,%d1         # make smallest sgl
7160         fmov.s          %d1,%fp0
7161         bra.b           fout_byte_norm
7162
7163 #################################################################
7164 # fmove.w out ###################################################
7165 #################################################################
7166
7167 # Only "Unimplemented Data Type" exceptions enter here. The operand
7168 # is either a DENORM or a NORM.
7169 fout_word:
7170         tst.b           STAG(%a6)               # is operand normalized?
7171         bne.b           fout_word_denorm        # no
7172
7173         fmovm.x         SRC(%a0),&0x80          # load value
7174
7175 fout_word_norm:
7176         fmov.l          %d0,%fpcr               # insert rnd prec:mode
7177
7178         fmov.w          %fp0,%d0                # exec move out w/ correct rnd mode
7179
7180         fmov.l          &0x0,%fpcr              # clear FPCR
7181         fmov.l          %fpsr,%d1               # fetch FPSR
7182         or.w            %d1,2+USER_FPSR(%a6)    # save new exc,accrued bits
7183
7184         mov.b           1+EXC_OPWORD(%a6),%d1   # extract dst mode
7185         andi.b          &0x38,%d1               # is mode == 0? (Dreg dst)
7186         beq.b           fout_word_dn            # must save to integer regfile
7187
7188         mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
7189         bsr.l           _dmem_write_word        # write word
7190
7191         tst.l           %d1                     # did dstore fail?
7192         bne.l           facc_out_w              # yes
7193
7194         rts
7195
7196 fout_word_dn:
7197         mov.b           1+EXC_OPWORD(%a6),%d1   # extract Dn
7198         andi.w          &0x7,%d1
7199         bsr.l           store_dreg_w
7200         rts
7201
7202 fout_word_denorm:
7203         mov.l           SRC_EX(%a0),%d1
7204         andi.l          &0x80000000,%d1         # keep DENORM sign
7205         ori.l           &0x00800000,%d1         # make smallest sgl
7206         fmov.s          %d1,%fp0
7207         bra.b           fout_word_norm
7208
7209 #################################################################
7210 # fmove.l out ###################################################
7211 #################################################################
7212
7213 # Only "Unimplemented Data Type" exceptions enter here. The operand
7214 # is either a DENORM or a NORM.
7215 fout_long:
7216         tst.b           STAG(%a6)               # is operand normalized?
7217         bne.b           fout_long_denorm        # no
7218
7219         fmovm.x         SRC(%a0),&0x80          # load value
7220
7221 fout_long_norm:
7222         fmov.l          %d0,%fpcr               # insert rnd prec:mode
7223
7224         fmov.l          %fp0,%d0                # exec move out w/ correct rnd mode
7225
7226         fmov.l          &0x0,%fpcr              # clear FPCR
7227         fmov.l          %fpsr,%d1               # fetch FPSR
7228         or.w            %d1,2+USER_FPSR(%a6)    # save new exc,accrued bits
7229
7230 fout_long_write:
7231         mov.b           1+EXC_OPWORD(%a6),%d1   # extract dst mode
7232         andi.b          &0x38,%d1               # is mode == 0? (Dreg dst)
7233         beq.b           fout_long_dn            # must save to integer regfile
7234
7235         mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
7236         bsr.l           _dmem_write_long        # write long
7237
7238         tst.l           %d1                     # did dstore fail?
7239         bne.l           facc_out_l              # yes
7240
7241         rts
7242
7243 fout_long_dn:
7244         mov.b           1+EXC_OPWORD(%a6),%d1   # extract Dn
7245         andi.w          &0x7,%d1
7246         bsr.l           store_dreg_l
7247         rts
7248
7249 fout_long_denorm:
7250         mov.l           SRC_EX(%a0),%d1
7251         andi.l          &0x80000000,%d1         # keep DENORM sign
7252         ori.l           &0x00800000,%d1         # make smallest sgl
7253         fmov.s          %d1,%fp0
7254         bra.b           fout_long_norm
7255
7256 #################################################################
7257 # fmove.x out ###################################################
7258 #################################################################
7259
7260 # Only "Unimplemented Data Type" exceptions enter here. The operand
7261 # is either a DENORM or a NORM.
7262 # The DENORM causes an Underflow exception.
7263 fout_ext:
7264
7265 # we copy the extended precision result to FP_SCR0 so that the reserved
7266 # 16-bit field gets zeroed. we do this since we promise not to disturb
7267 # what's at SRC(a0).
7268         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
7269         clr.w           2+FP_SCR0_EX(%a6)       # clear reserved field
7270         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
7271         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
7272
7273         fmovm.x         SRC(%a0),&0x80          # return result
7274
7275         bsr.l           _calc_ea_fout           # fix stacked <ea>
7276
7277         mov.l           %a0,%a1                 # pass: dst addr
7278         lea             FP_SCR0(%a6),%a0        # pass: src addr
7279         mov.l           &0xc,%d0                # pass: opsize is 12 bytes
7280
7281 # we must not yet write the extended precision data to the stack
7282 # in the pre-decrement case from supervisor mode or else we'll corrupt
7283 # the stack frame. so, leave it in FP_SRC for now and deal with it later...
7284         cmpi.b          SPCOND_FLG(%a6),&mda7_flg
7285         beq.b           fout_ext_a7
7286
7287         bsr.l           _dmem_write             # write ext prec number to memory
7288
7289         tst.l           %d1                     # did dstore fail?
7290         bne.w           fout_ext_err            # yes
7291
7292         tst.b           STAG(%a6)               # is operand normalized?
7293         bne.b           fout_ext_denorm         # no
7294         rts
7295
7296 # the number is a DENORM. must set the underflow exception bit
7297 fout_ext_denorm:
7298         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set underflow exc bit
7299
7300         mov.b           FPCR_ENABLE(%a6),%d0
7301         andi.b          &0x0a,%d0               # is UNFL or INEX enabled?
7302         bne.b           fout_ext_exc            # yes
7303         rts
7304
7305 # we don't want to do the write if the exception occurred in supervisor mode
7306 # so _mem_write2() handles this for us.
7307 fout_ext_a7:
7308         bsr.l           _mem_write2             # write ext prec number to memory
7309
7310         tst.l           %d1                     # did dstore fail?
7311         bne.w           fout_ext_err            # yes
7312
7313         tst.b           STAG(%a6)               # is operand normalized?
7314         bne.b           fout_ext_denorm         # no
7315         rts
7316
7317 fout_ext_exc:
7318         lea             FP_SCR0(%a6),%a0
7319         bsr.l           norm                    # normalize the mantissa
7320         neg.w           %d0                     # new exp = -(shft amt)
7321         andi.w          &0x7fff,%d0
7322         andi.w          &0x8000,FP_SCR0_EX(%a6) # keep only old sign
7323         or.w            %d0,FP_SCR0_EX(%a6)     # insert new exponent
7324         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
7325         rts
7326
7327 fout_ext_err:
7328         mov.l           EXC_A6(%a6),(%a6)       # fix stacked a6
7329         bra.l           facc_out_x
7330
7331 #########################################################################
7332 # fmove.s out ###########################################################
7333 #########################################################################
7334 fout_sgl:
7335         andi.b          &0x30,%d0               # clear rnd prec
7336         ori.b           &s_mode*0x10,%d0        # insert sgl prec
7337         mov.l           %d0,L_SCR3(%a6)         # save rnd prec,mode on stack
7338
7339 #
7340 # operand is a normalized number. first, we check to see if the move out
7341 # would cause either an underflow or overflow. these cases are handled
7342 # separately. otherwise, set the FPCR to the proper rounding mode and
7343 # execute the move.
7344 #
7345         mov.w           SRC_EX(%a0),%d0         # extract exponent
7346         andi.w          &0x7fff,%d0             # strip sign
7347
7348         cmpi.w          %d0,&SGL_HI             # will operand overflow?
7349         bgt.w           fout_sgl_ovfl           # yes; go handle OVFL
7350         beq.w           fout_sgl_may_ovfl       # maybe; go handle possible OVFL
7351         cmpi.w          %d0,&SGL_LO             # will operand underflow?
7352         blt.w           fout_sgl_unfl           # yes; go handle underflow
7353
7354 #
7355 # NORMs(in range) can be stored out by a simple "fmov.s"
7356 # Unnormalized inputs can come through this point.
7357 #
7358 fout_sgl_exg:
7359         fmovm.x         SRC(%a0),&0x80          # fetch fop from stack
7360
7361         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
7362         fmov.l          &0x0,%fpsr              # clear FPSR
7363
7364         fmov.s          %fp0,%d0                # store does convert and round
7365
7366         fmov.l          &0x0,%fpcr              # clear FPCR
7367         fmov.l          %fpsr,%d1               # save FPSR
7368
7369         or.w            %d1,2+USER_FPSR(%a6)    # set possible inex2/ainex
7370
7371 fout_sgl_exg_write:
7372         mov.b           1+EXC_OPWORD(%a6),%d1   # extract dst mode
7373         andi.b          &0x38,%d1               # is mode == 0? (Dreg dst)
7374         beq.b           fout_sgl_exg_write_dn   # must save to integer regfile
7375
7376         mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
7377         bsr.l           _dmem_write_long        # write long
7378
7379         tst.l           %d1                     # did dstore fail?
7380         bne.l           facc_out_l              # yes
7381
7382         rts
7383
7384 fout_sgl_exg_write_dn:
7385         mov.b           1+EXC_OPWORD(%a6),%d1   # extract Dn
7386         andi.w          &0x7,%d1
7387         bsr.l           store_dreg_l
7388         rts
7389
7390 #
7391 # here, we know that the operand would UNFL if moved out to single prec,
7392 # so, denorm and round and then use generic store single routine to
7393 # write the value to memory.
7394 #
7395 fout_sgl_unfl:
7396         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
7397
7398         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
7399         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
7400         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
7401         mov.l           %a0,-(%sp)
7402
7403         clr.l           %d0                     # pass: S.F. = 0
7404
7405         cmpi.b          STAG(%a6),&DENORM       # fetch src optype tag
7406         bne.b           fout_sgl_unfl_cont      # let DENORMs fall through
7407
7408         lea             FP_SCR0(%a6),%a0
7409         bsr.l           norm                    # normalize the DENORM
7410
7411 fout_sgl_unfl_cont:
7412         lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
7413         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
7414         bsr.l           unf_res                 # calc default underflow result
7415
7416         lea             FP_SCR0(%a6),%a0        # pass: ptr to fop
7417         bsr.l           dst_sgl                 # convert to single prec
7418
7419         mov.b           1+EXC_OPWORD(%a6),%d1   # extract dst mode
7420         andi.b          &0x38,%d1               # is mode == 0? (Dreg dst)
7421         beq.b           fout_sgl_unfl_dn        # must save to integer regfile
7422
7423         mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
7424         bsr.l           _dmem_write_long        # write long
7425
7426         tst.l           %d1                     # did dstore fail?
7427         bne.l           facc_out_l              # yes
7428
7429         bra.b           fout_sgl_unfl_chkexc
7430
7431 fout_sgl_unfl_dn:
7432         mov.b           1+EXC_OPWORD(%a6),%d1   # extract Dn
7433         andi.w          &0x7,%d1
7434         bsr.l           store_dreg_l
7435
7436 fout_sgl_unfl_chkexc:
7437         mov.b           FPCR_ENABLE(%a6),%d1
7438         andi.b          &0x0a,%d1               # is UNFL or INEX enabled?
7439         bne.w           fout_sd_exc_unfl        # yes
7440         addq.l          &0x4,%sp
7441         rts
7442
7443 #
7444 # it's definitely an overflow so call ovf_res to get the correct answer
7445 #
7446 fout_sgl_ovfl:
7447         tst.b           3+SRC_HI(%a0)           # is result inexact?
7448         bne.b           fout_sgl_ovfl_inex2
7449         tst.l           SRC_LO(%a0)             # is result inexact?
7450         bne.b           fout_sgl_ovfl_inex2
7451         ori.w           &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
7452         bra.b           fout_sgl_ovfl_cont
7453 fout_sgl_ovfl_inex2:
7454         ori.w           &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
7455
7456 fout_sgl_ovfl_cont:
7457         mov.l           %a0,-(%sp)
7458
7459 # call ovf_res() w/ sgl prec and the correct rnd mode to create the default
7460 # overflow result. DON'T save the returned ccodes from ovf_res() since
7461 # fmove out doesn't alter them.
7462         tst.b           SRC_EX(%a0)             # is operand negative?
7463         smi             %d1                     # set if so
7464         mov.l           L_SCR3(%a6),%d0         # pass: sgl prec,rnd mode
7465         bsr.l           ovf_res                 # calc OVFL result
7466         fmovm.x         (%a0),&0x80             # load default overflow result
7467         fmov.s          %fp0,%d0                # store to single
7468
7469         mov.b           1+EXC_OPWORD(%a6),%d1   # extract dst mode
7470         andi.b          &0x38,%d1               # is mode == 0? (Dreg dst)
7471         beq.b           fout_sgl_ovfl_dn        # must save to integer regfile
7472
7473         mov.l           EXC_EA(%a6),%a0         # stacked <ea> is correct
7474         bsr.l           _dmem_write_long        # write long
7475
7476         tst.l           %d1                     # did dstore fail?
7477         bne.l           facc_out_l              # yes
7478
7479         bra.b           fout_sgl_ovfl_chkexc
7480
7481 fout_sgl_ovfl_dn:
7482         mov.b           1+EXC_OPWORD(%a6),%d1   # extract Dn
7483         andi.w          &0x7,%d1
7484         bsr.l           store_dreg_l
7485
7486 fout_sgl_ovfl_chkexc:
7487         mov.b           FPCR_ENABLE(%a6),%d1
7488         andi.b          &0x0a,%d1               # is UNFL or INEX enabled?
7489         bne.w           fout_sd_exc_ovfl        # yes
7490         addq.l          &0x4,%sp
7491         rts
7492
7493 #
7494 # move out MAY overflow:
7495 # (1) force the exp to 0x3fff
7496 # (2) do a move w/ appropriate rnd mode
7497 # (3) if exp still equals zero, then insert original exponent
7498 #       for the correct result.
7499 #     if exp now equals one, then it overflowed so call ovf_res.
7500 #
7501 fout_sgl_may_ovfl:
7502         mov.w           SRC_EX(%a0),%d1         # fetch current sign
7503         andi.w          &0x8000,%d1             # keep it,clear exp
7504         ori.w           &0x3fff,%d1             # insert exp = 0
7505         mov.w           %d1,FP_SCR0_EX(%a6)     # insert scaled exp
7506         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
7507         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
7508
7509         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
7510
7511         fmov.x          FP_SCR0(%a6),%fp0       # force fop to be rounded
7512         fmov.l          &0x0,%fpcr              # clear FPCR
7513
7514         fabs.x          %fp0                    # need absolute value
7515         fcmp.b          %fp0,&0x2               # did exponent increase?
7516         fblt.w          fout_sgl_exg            # no; go finish NORM
7517         bra.w           fout_sgl_ovfl           # yes; go handle overflow
7518
7519 ################
7520
7521 fout_sd_exc_unfl:
7522         mov.l           (%sp)+,%a0
7523
7524         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
7525         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
7526         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
7527
7528         cmpi.b          STAG(%a6),&DENORM       # was src a DENORM?
7529         bne.b           fout_sd_exc_cont        # no
7530
7531         lea             FP_SCR0(%a6),%a0
7532         bsr.l           norm
7533         neg.l           %d0
7534         andi.w          &0x7fff,%d0
7535         bfins           %d0,FP_SCR0_EX(%a6){&1:&15}
7536         bra.b           fout_sd_exc_cont
7537
7538 fout_sd_exc:
7539 fout_sd_exc_ovfl:
7540         mov.l           (%sp)+,%a0              # restore a0
7541
7542         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
7543         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
7544         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
7545
7546 fout_sd_exc_cont:
7547         bclr            &0x7,FP_SCR0_EX(%a6)    # clear sign bit
7548         sne.b           2+FP_SCR0_EX(%a6)       # set internal sign bit
7549         lea             FP_SCR0(%a6),%a0        # pass: ptr to DENORM
7550
7551         mov.b           3+L_SCR3(%a6),%d1
7552         lsr.b           &0x4,%d1
7553         andi.w          &0x0c,%d1
7554         swap            %d1
7555         mov.b           3+L_SCR3(%a6),%d1
7556         lsr.b           &0x4,%d1
7557         andi.w          &0x03,%d1
7558         clr.l           %d0                     # pass: zero g,r,s
7559         bsr.l           _round                  # round the DENORM
7560
7561         tst.b           2+FP_SCR0_EX(%a6)       # is EXOP negative?
7562         beq.b           fout_sd_exc_done        # no
7563         bset            &0x7,FP_SCR0_EX(%a6)    # yes
7564
7565 fout_sd_exc_done:
7566         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
7567         rts
7568
7569 #################################################################
7570 # fmove.d out ###################################################
7571 #################################################################
7572 fout_dbl:
7573         andi.b          &0x30,%d0               # clear rnd prec
7574         ori.b           &d_mode*0x10,%d0        # insert dbl prec
7575         mov.l           %d0,L_SCR3(%a6)         # save rnd prec,mode on stack
7576
7577 #
7578 # operand is a normalized number. first, we check to see if the move out
7579 # would cause either an underflow or overflow. these cases are handled
7580 # separately. otherwise, set the FPCR to the proper rounding mode and
7581 # execute the move.
7582 #
7583         mov.w           SRC_EX(%a0),%d0         # extract exponent
7584         andi.w          &0x7fff,%d0             # strip sign
7585
7586         cmpi.w          %d0,&DBL_HI             # will operand overflow?
7587         bgt.w           fout_dbl_ovfl           # yes; go handle OVFL
7588         beq.w           fout_dbl_may_ovfl       # maybe; go handle possible OVFL
7589         cmpi.w          %d0,&DBL_LO             # will operand underflow?
7590         blt.w           fout_dbl_unfl           # yes; go handle underflow
7591
7592 #
7593 # NORMs(in range) can be stored out by a simple "fmov.d"
7594 # Unnormalized inputs can come through this point.
7595 #
7596 fout_dbl_exg:
7597         fmovm.x         SRC(%a0),&0x80          # fetch fop from stack
7598
7599         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
7600         fmov.l          &0x0,%fpsr              # clear FPSR
7601
7602         fmov.d          %fp0,L_SCR1(%a6)        # store does convert and round
7603
7604         fmov.l          &0x0,%fpcr              # clear FPCR
7605         fmov.l          %fpsr,%d0               # save FPSR
7606
7607         or.w            %d0,2+USER_FPSR(%a6)    # set possible inex2/ainex
7608
7609         mov.l           EXC_EA(%a6),%a1         # pass: dst addr
7610         lea             L_SCR1(%a6),%a0         # pass: src addr
7611         movq.l          &0x8,%d0                # pass: opsize is 8 bytes
7612         bsr.l           _dmem_write             # store dbl fop to memory
7613
7614         tst.l           %d1                     # did dstore fail?
7615         bne.l           facc_out_d              # yes
7616
7617         rts                                     # no; so we're finished
7618
7619 #
7620 # here, we know that the operand would UNFL if moved out to double prec,
7621 # so, denorm and round and then use generic store double routine to
7622 # write the value to memory.
7623 #
7624 fout_dbl_unfl:
7625         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set UNFL
7626
7627         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
7628         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
7629         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
7630         mov.l           %a0,-(%sp)
7631
7632         clr.l           %d0                     # pass: S.F. = 0
7633
7634         cmpi.b          STAG(%a6),&DENORM       # fetch src optype tag
7635         bne.b           fout_dbl_unfl_cont      # let DENORMs fall through
7636
7637         lea             FP_SCR0(%a6),%a0
7638         bsr.l           norm                    # normalize the DENORM
7639
7640 fout_dbl_unfl_cont:
7641         lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
7642         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
7643         bsr.l           unf_res                 # calc default underflow result
7644
7645         lea             FP_SCR0(%a6),%a0        # pass: ptr to fop
7646         bsr.l           dst_dbl                 # convert to single prec
7647         mov.l           %d0,L_SCR1(%a6)
7648         mov.l           %d1,L_SCR2(%a6)
7649
7650         mov.l           EXC_EA(%a6),%a1         # pass: dst addr
7651         lea             L_SCR1(%a6),%a0         # pass: src addr
7652         movq.l          &0x8,%d0                # pass: opsize is 8 bytes
7653         bsr.l           _dmem_write             # store dbl fop to memory
7654
7655         tst.l           %d1                     # did dstore fail?
7656         bne.l           facc_out_d              # yes
7657
7658         mov.b           FPCR_ENABLE(%a6),%d1
7659         andi.b          &0x0a,%d1               # is UNFL or INEX enabled?
7660         bne.w           fout_sd_exc_unfl        # yes
7661         addq.l          &0x4,%sp
7662         rts
7663
7664 #
7665 # it's definitely an overflow so call ovf_res to get the correct answer
7666 #
7667 fout_dbl_ovfl:
7668         mov.w           2+SRC_LO(%a0),%d0
7669         andi.w          &0x7ff,%d0
7670         bne.b           fout_dbl_ovfl_inex2
7671
7672         ori.w           &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
7673         bra.b           fout_dbl_ovfl_cont
7674 fout_dbl_ovfl_inex2:
7675         ori.w           &ovfinx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex/inex2
7676
7677 fout_dbl_ovfl_cont:
7678         mov.l           %a0,-(%sp)
7679
7680 # call ovf_res() w/ dbl prec and the correct rnd mode to create the default
7681 # overflow result. DON'T save the returned ccodes from ovf_res() since
7682 # fmove out doesn't alter them.
7683         tst.b           SRC_EX(%a0)             # is operand negative?
7684         smi             %d1                     # set if so
7685         mov.l           L_SCR3(%a6),%d0         # pass: dbl prec,rnd mode
7686         bsr.l           ovf_res                 # calc OVFL result
7687         fmovm.x         (%a0),&0x80             # load default overflow result
7688         fmov.d          %fp0,L_SCR1(%a6)        # store to double
7689
7690         mov.l           EXC_EA(%a6),%a1         # pass: dst addr
7691         lea             L_SCR1(%a6),%a0         # pass: src addr
7692         movq.l          &0x8,%d0                # pass: opsize is 8 bytes
7693         bsr.l           _dmem_write             # store dbl fop to memory
7694
7695         tst.l           %d1                     # did dstore fail?
7696         bne.l           facc_out_d              # yes
7697
7698         mov.b           FPCR_ENABLE(%a6),%d1
7699         andi.b          &0x0a,%d1               # is UNFL or INEX enabled?
7700         bne.w           fout_sd_exc_ovfl        # yes
7701         addq.l          &0x4,%sp
7702         rts
7703
7704 #
7705 # move out MAY overflow:
7706 # (1) force the exp to 0x3fff
7707 # (2) do a move w/ appropriate rnd mode
7708 # (3) if exp still equals zero, then insert original exponent
7709 #       for the correct result.
7710 #     if exp now equals one, then it overflowed so call ovf_res.
7711 #
7712 fout_dbl_may_ovfl:
7713         mov.w           SRC_EX(%a0),%d1         # fetch current sign
7714         andi.w          &0x8000,%d1             # keep it,clear exp
7715         ori.w           &0x3fff,%d1             # insert exp = 0
7716         mov.w           %d1,FP_SCR0_EX(%a6)     # insert scaled exp
7717         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6) # copy hi(man)
7718         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6) # copy lo(man)
7719
7720         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
7721
7722         fmov.x          FP_SCR0(%a6),%fp0       # force fop to be rounded
7723         fmov.l          &0x0,%fpcr              # clear FPCR
7724
7725         fabs.x          %fp0                    # need absolute value
7726         fcmp.b          %fp0,&0x2               # did exponent increase?
7727         fblt.w          fout_dbl_exg            # no; go finish NORM
7728         bra.w           fout_dbl_ovfl           # yes; go handle overflow
7729
7730 #########################################################################
7731 # XDEF **************************************************************** #
7732 #       dst_dbl(): create double precision value from extended prec.    #
7733 #                                                                       #
7734 # XREF **************************************************************** #
7735 #       None                                                            #
7736 #                                                                       #
7737 # INPUT *************************************************************** #
7738 #       a0 = pointer to source operand in extended precision            #
7739 #                                                                       #
7740 # OUTPUT ************************************************************** #
7741 #       d0 = hi(double precision result)                                #
7742 #       d1 = lo(double precision result)                                #
7743 #                                                                       #
7744 # ALGORITHM *********************************************************** #
7745 #                                                                       #
7746 #  Changes extended precision to double precision.                      #
7747 #  Note: no attempt is made to round the extended value to double.      #
7748 #       dbl_sign = ext_sign                                             #
7749 #       dbl_exp = ext_exp - $3fff(ext bias) + $7ff(dbl bias)            #
7750 #       get rid of ext integer bit                                      #
7751 #       dbl_mant = ext_mant{62:12}                                      #
7752 #                                                                       #
7753 #               ---------------   ---------------    ---------------    #
7754 #  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |    #
7755 #               ---------------   ---------------    ---------------    #
7756 #                95         64    63 62       32      31     11   0     #
7757 #                                    |                       |          #
7758 #                                    |                       |          #
7759 #                                    |                       |          #
7760 #                                    v                       v          #
7761 #                             ---------------   ---------------         #
7762 #  double   ->                |s|exp| mant  |   |  mant       |         #
7763 #                             ---------------   ---------------         #
7764 #                             63     51   32   31              0        #
7765 #                                                                       #
7766 #########################################################################
7767
7768 dst_dbl:
7769         clr.l           %d0                     # clear d0
7770         mov.w           FTEMP_EX(%a0),%d0       # get exponent
7771         subi.w          &EXT_BIAS,%d0           # subtract extended precision bias
7772         addi.w          &DBL_BIAS,%d0           # add double precision bias
7773         tst.b           FTEMP_HI(%a0)           # is number a denorm?
7774         bmi.b           dst_get_dupper          # no
7775         subq.w          &0x1,%d0                # yes; denorm bias = DBL_BIAS - 1
7776 dst_get_dupper:
7777         swap            %d0                     # d0 now in upper word
7778         lsl.l           &0x4,%d0                # d0 in proper place for dbl prec exp
7779         tst.b           FTEMP_EX(%a0)           # test sign
7780         bpl.b           dst_get_dman            # if postive, go process mantissa
7781         bset            &0x1f,%d0               # if negative, set sign
7782 dst_get_dman:
7783         mov.l           FTEMP_HI(%a0),%d1       # get ms mantissa
7784         bfextu          %d1{&1:&20},%d1         # get upper 20 bits of ms
7785         or.l            %d1,%d0                 # put these bits in ms word of double
7786         mov.l           %d0,L_SCR1(%a6)         # put the new exp back on the stack
7787         mov.l           FTEMP_HI(%a0),%d1       # get ms mantissa
7788         mov.l           &21,%d0                 # load shift count
7789         lsl.l           %d0,%d1                 # put lower 11 bits in upper bits
7790         mov.l           %d1,L_SCR2(%a6)         # build lower lword in memory
7791         mov.l           FTEMP_LO(%a0),%d1       # get ls mantissa
7792         bfextu          %d1{&0:&21},%d0         # get ls 21 bits of double
7793         mov.l           L_SCR2(%a6),%d1
7794         or.l            %d0,%d1                 # put them in double result
7795         mov.l           L_SCR1(%a6),%d0
7796         rts
7797
7798 #########################################################################
7799 # XDEF **************************************************************** #
7800 #       dst_sgl(): create single precision value from extended prec     #
7801 #                                                                       #
7802 # XREF **************************************************************** #
7803 #                                                                       #
7804 # INPUT *************************************************************** #
7805 #       a0 = pointer to source operand in extended precision            #
7806 #                                                                       #
7807 # OUTPUT ************************************************************** #
7808 #       d0 = single precision result                                    #
7809 #                                                                       #
7810 # ALGORITHM *********************************************************** #
7811 #                                                                       #
7812 # Changes extended precision to single precision.                       #
7813 #       sgl_sign = ext_sign                                             #
7814 #       sgl_exp = ext_exp - $3fff(ext bias) + $7f(sgl bias)             #
7815 #       get rid of ext integer bit                                      #
7816 #       sgl_mant = ext_mant{62:12}                                      #
7817 #                                                                       #
7818 #               ---------------   ---------------    ---------------    #
7819 #  extended ->  |s|    exp    |   |1| ms mant   |    | ls mant     |    #
7820 #               ---------------   ---------------    ---------------    #
7821 #                95         64    63 62    40 32      31     12   0     #
7822 #                                    |     |                            #
7823 #                                    |     |                            #
7824 #                                    |     |                            #
7825 #                                    v     v                            #
7826 #                             ---------------                           #
7827 #  single   ->                |s|exp| mant  |                           #
7828 #                             ---------------                           #
7829 #                             31     22     0                           #
7830 #                                                                       #
7831 #########################################################################
7832
7833 dst_sgl:
7834         clr.l           %d0
7835         mov.w           FTEMP_EX(%a0),%d0       # get exponent
7836         subi.w          &EXT_BIAS,%d0           # subtract extended precision bias
7837         addi.w          &SGL_BIAS,%d0           # add single precision bias
7838         tst.b           FTEMP_HI(%a0)           # is number a denorm?
7839         bmi.b           dst_get_supper          # no
7840         subq.w          &0x1,%d0                # yes; denorm bias = SGL_BIAS - 1
7841 dst_get_supper:
7842         swap            %d0                     # put exp in upper word of d0
7843         lsl.l           &0x7,%d0                # shift it into single exp bits
7844         tst.b           FTEMP_EX(%a0)           # test sign
7845         bpl.b           dst_get_sman            # if positive, continue
7846         bset            &0x1f,%d0               # if negative, put in sign first
7847 dst_get_sman:
7848         mov.l           FTEMP_HI(%a0),%d1       # get ms mantissa
7849         andi.l          &0x7fffff00,%d1         # get upper 23 bits of ms
7850         lsr.l           &0x8,%d1                # and put them flush right
7851         or.l            %d1,%d0                 # put these bits in ms word of single
7852         rts
7853
7854 ##############################################################################
7855 fout_pack:
7856         bsr.l           _calc_ea_fout           # fetch the <ea>
7857         mov.l           %a0,-(%sp)
7858
7859         mov.b           STAG(%a6),%d0           # fetch input type
7860         bne.w           fout_pack_not_norm      # input is not NORM
7861
7862 fout_pack_norm:
7863         btst            &0x4,EXC_CMDREG(%a6)    # static or dynamic?
7864         beq.b           fout_pack_s             # static
7865
7866 fout_pack_d:
7867         mov.b           1+EXC_CMDREG(%a6),%d1   # fetch dynamic reg
7868         lsr.b           &0x4,%d1
7869         andi.w          &0x7,%d1
7870
7871         bsr.l           fetch_dreg              # fetch Dn w/ k-factor
7872
7873         bra.b           fout_pack_type
7874 fout_pack_s:
7875         mov.b           1+EXC_CMDREG(%a6),%d0   # fetch static field
7876
7877 fout_pack_type:
7878         bfexts          %d0{&25:&7},%d0         # extract k-factor
7879         mov.l   %d0,-(%sp)
7880
7881         lea             FP_SRC(%a6),%a0         # pass: ptr to input
7882
7883 # bindec is currently scrambling FP_SRC for denorm inputs.
7884 # we'll have to change this, but for now, tough luck!!!
7885         bsr.l           bindec                  # convert xprec to packed
7886
7887 #       andi.l          &0xcfff000f,FP_SCR0(%a6) # clear unused fields
7888         andi.l          &0xcffff00f,FP_SCR0(%a6) # clear unused fields
7889
7890         mov.l   (%sp)+,%d0
7891
7892         tst.b           3+FP_SCR0_EX(%a6)
7893         bne.b           fout_pack_set
7894         tst.l           FP_SCR0_HI(%a6)
7895         bne.b           fout_pack_set
7896         tst.l           FP_SCR0_LO(%a6)
7897         bne.b           fout_pack_set
7898
7899 # add the extra condition that only if the k-factor was zero, too, should
7900 # we zero the exponent
7901         tst.l           %d0
7902         bne.b           fout_pack_set
7903 # "mantissa" is all zero which means that the answer is zero. but, the '040
7904 # algorithm allows the exponent to be non-zero. the 881/2 do not. Therefore,
7905 # if the mantissa is zero, I will zero the exponent, too.
7906 # the question now is whether the exponents sign bit is allowed to be non-zero
7907 # for a zero, also...
7908         andi.w          &0xf000,FP_SCR0(%a6)
7909
7910 fout_pack_set:
7911
7912         lea             FP_SCR0(%a6),%a0        # pass: src addr
7913
7914 fout_pack_write:
7915         mov.l           (%sp)+,%a1              # pass: dst addr
7916         mov.l           &0xc,%d0                # pass: opsize is 12 bytes
7917
7918         cmpi.b          SPCOND_FLG(%a6),&mda7_flg
7919         beq.b           fout_pack_a7
7920
7921         bsr.l           _dmem_write             # write ext prec number to memory
7922
7923         tst.l           %d1                     # did dstore fail?
7924         bne.w           fout_ext_err            # yes
7925
7926         rts
7927
7928 # we don't want to do the write if the exception occurred in supervisor mode
7929 # so _mem_write2() handles this for us.
7930 fout_pack_a7:
7931         bsr.l           _mem_write2             # write ext prec number to memory
7932
7933         tst.l           %d1                     # did dstore fail?
7934         bne.w           fout_ext_err            # yes
7935
7936         rts
7937
7938 fout_pack_not_norm:
7939         cmpi.b          %d0,&DENORM             # is it a DENORM?
7940         beq.w           fout_pack_norm          # yes
7941         lea             FP_SRC(%a6),%a0
7942         clr.w           2+FP_SRC_EX(%a6)
7943         cmpi.b          %d0,&SNAN               # is it an SNAN?
7944         beq.b           fout_pack_snan          # yes
7945         bra.b           fout_pack_write         # no
7946
7947 fout_pack_snan:
7948         ori.w           &snaniop2_mask,FPSR_EXCEPT(%a6) # set SNAN/AIOP
7949         bset            &0x6,FP_SRC_HI(%a6)     # set snan bit
7950         bra.b           fout_pack_write
7951
7952 #########################################################################
7953 # XDEF **************************************************************** #
7954 #       fmul(): emulates the fmul instruction                           #
7955 #       fsmul(): emulates the fsmul instruction                         #
7956 #       fdmul(): emulates the fdmul instruction                         #
7957 #                                                                       #
7958 # XREF **************************************************************** #
7959 #       scale_to_zero_src() - scale src exponent to zero                #
7960 #       scale_to_zero_dst() - scale dst exponent to zero                #
7961 #       unf_res() - return default underflow result                     #
7962 #       ovf_res() - return default overflow result                      #
7963 #       res_qnan() - return QNAN result                                 #
7964 #       res_snan() - return SNAN result                                 #
7965 #                                                                       #
7966 # INPUT *************************************************************** #
7967 #       a0 = pointer to extended precision source operand               #
7968 #       a1 = pointer to extended precision destination operand          #
7969 #       d0  rnd prec,mode                                               #
7970 #                                                                       #
7971 # OUTPUT ************************************************************** #
7972 #       fp0 = result                                                    #
7973 #       fp1 = EXOP (if exception occurred)                              #
7974 #                                                                       #
7975 # ALGORITHM *********************************************************** #
7976 #       Handle NANs, infinities, and zeroes as special cases. Divide    #
7977 # norms/denorms into ext/sgl/dbl precision.                             #
7978 #       For norms/denorms, scale the exponents such that a multiply     #
7979 # instruction won't cause an exception. Use the regular fmul to         #
7980 # compute a result. Check if the regular operands would have taken      #
7981 # an exception. If so, return the default overflow/underflow result     #
7982 # and return the EXOP if exceptions are enabled. Else, scale the        #
7983 # result operand to the proper exponent.                                #
7984 #                                                                       #
7985 #########################################################################
7986
7987         align           0x10
7988 tbl_fmul_ovfl:
7989         long            0x3fff - 0x7ffe         # ext_max
7990         long            0x3fff - 0x407e         # sgl_max
7991         long            0x3fff - 0x43fe         # dbl_max
7992 tbl_fmul_unfl:
7993         long            0x3fff + 0x0001         # ext_unfl
7994         long            0x3fff - 0x3f80         # sgl_unfl
7995         long            0x3fff - 0x3c00         # dbl_unfl
7996
7997         global          fsmul
7998 fsmul:
7999         andi.b          &0x30,%d0               # clear rnd prec
8000         ori.b           &s_mode*0x10,%d0        # insert sgl prec
8001         bra.b           fmul
8002
8003         global          fdmul
8004 fdmul:
8005         andi.b          &0x30,%d0
8006         ori.b           &d_mode*0x10,%d0        # insert dbl prec
8007
8008         global          fmul
8009 fmul:
8010         mov.l           %d0,L_SCR3(%a6)         # store rnd info
8011
8012         clr.w           %d1
8013         mov.b           DTAG(%a6),%d1
8014         lsl.b           &0x3,%d1
8015         or.b            STAG(%a6),%d1           # combine src tags
8016         bne.w           fmul_not_norm           # optimize on non-norm input
8017
8018 fmul_norm:
8019         mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
8020         mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
8021         mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
8022
8023         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
8024         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
8025         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
8026
8027         bsr.l           scale_to_zero_src       # scale src exponent
8028         mov.l           %d0,-(%sp)              # save scale factor 1
8029
8030         bsr.l           scale_to_zero_dst       # scale dst exponent
8031
8032         add.l           %d0,(%sp)               # SCALE_FACTOR = scale1 + scale2
8033
8034         mov.w           2+L_SCR3(%a6),%d1       # fetch precision
8035         lsr.b           &0x6,%d1                # shift to lo bits
8036         mov.l           (%sp)+,%d0              # load S.F.
8037         cmp.l           %d0,(tbl_fmul_ovfl.w,%pc,%d1.w*4) # would result ovfl?
8038         beq.w           fmul_may_ovfl           # result may rnd to overflow
8039         blt.w           fmul_ovfl               # result will overflow
8040
8041         cmp.l           %d0,(tbl_fmul_unfl.w,%pc,%d1.w*4) # would result unfl?
8042         beq.w           fmul_may_unfl           # result may rnd to no unfl
8043         bgt.w           fmul_unfl               # result will underflow
8044
8045 #
8046 # NORMAL:
8047 # - the result of the multiply operation will neither overflow nor underflow.
8048 # - do the multiply to the proper precision and rounding mode.
8049 # - scale the result exponent using the scale factor. if both operands were
8050 # normalized then we really don't need to go through this scaling. but for now,
8051 # this will do.
8052 #
8053 fmul_normal:
8054         fmovm.x         FP_SCR1(%a6),&0x80      # load dst operand
8055
8056         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
8057         fmov.l          &0x0,%fpsr              # clear FPSR
8058
8059         fmul.x          FP_SCR0(%a6),%fp0       # execute multiply
8060
8061         fmov.l          %fpsr,%d1               # save status
8062         fmov.l          &0x0,%fpcr              # clear FPCR
8063
8064         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
8065
8066 fmul_normal_exit:
8067         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
8068         mov.l           %d2,-(%sp)              # save d2
8069         mov.w           FP_SCR0_EX(%a6),%d1     # load {sgn,exp}
8070         mov.l           %d1,%d2                 # make a copy
8071         andi.l          &0x7fff,%d1             # strip sign
8072         andi.w          &0x8000,%d2             # keep old sign
8073         sub.l           %d0,%d1                 # add scale factor
8074         or.w            %d2,%d1                 # concat old sign,new exp
8075         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
8076         mov.l           (%sp)+,%d2              # restore d2
8077         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
8078         rts
8079
8080 #
8081 # OVERFLOW:
8082 # - the result of the multiply operation is an overflow.
8083 # - do the multiply to the proper precision and rounding mode in order to
8084 # set the inexact bits.
8085 # - calculate the default result and return it in fp0.
8086 # - if overflow or inexact is enabled, we need a multiply result rounded to
8087 # extended precision. if the original operation was extended, then we have this
8088 # result. if the original operation was single or double, we have to do another
8089 # multiply using extended precision and the correct rounding mode. the result
8090 # of this operation then has its exponent scaled by -0x6000 to create the
8091 # exceptional operand.
8092 #
8093 fmul_ovfl:
8094         fmovm.x         FP_SCR1(%a6),&0x80      # load dst operand
8095
8096         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
8097         fmov.l          &0x0,%fpsr              # clear FPSR
8098
8099         fmul.x          FP_SCR0(%a6),%fp0       # execute multiply
8100
8101         fmov.l          %fpsr,%d1               # save status
8102         fmov.l          &0x0,%fpcr              # clear FPCR
8103
8104         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
8105
8106 # save setting this until now because this is where fmul_may_ovfl may jump in
8107 fmul_ovfl_tst:
8108         or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8109
8110         mov.b           FPCR_ENABLE(%a6),%d1
8111         andi.b          &0x13,%d1               # is OVFL or INEX enabled?
8112         bne.b           fmul_ovfl_ena           # yes
8113
8114 # calculate the default result
8115 fmul_ovfl_dis:
8116         btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
8117         sne             %d1                     # set sign param accordingly
8118         mov.l           L_SCR3(%a6),%d0         # pass rnd prec,mode
8119         bsr.l           ovf_res                 # calculate default result
8120         or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
8121         fmovm.x         (%a0),&0x80             # return default result in fp0
8122         rts
8123
8124 #
8125 # OVFL is enabled; Create EXOP:
8126 # - if precision is extended, then we have the EXOP. simply bias the exponent
8127 # with an extra -0x6000. if the precision is single or double, we need to
8128 # calculate a result rounded to extended precision.
8129 #
8130 fmul_ovfl_ena:
8131         mov.l           L_SCR3(%a6),%d1
8132         andi.b          &0xc0,%d1               # test the rnd prec
8133         bne.b           fmul_ovfl_ena_sd        # it's sgl or dbl
8134
8135 fmul_ovfl_ena_cont:
8136         fmovm.x         &0x80,FP_SCR0(%a6)      # move result to stack
8137
8138         mov.l           %d2,-(%sp)              # save d2
8139         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
8140         mov.w           %d1,%d2                 # make a copy
8141         andi.l          &0x7fff,%d1             # strip sign
8142         sub.l           %d0,%d1                 # add scale factor
8143         subi.l          &0x6000,%d1             # subtract bias
8144         andi.w          &0x7fff,%d1             # clear sign bit
8145         andi.w          &0x8000,%d2             # keep old sign
8146         or.w            %d2,%d1                 # concat old sign,new exp
8147         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
8148         mov.l           (%sp)+,%d2              # restore d2
8149         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
8150         bra.b           fmul_ovfl_dis
8151
8152 fmul_ovfl_ena_sd:
8153         fmovm.x         FP_SCR1(%a6),&0x80      # load dst operand
8154
8155         mov.l           L_SCR3(%a6),%d1
8156         andi.b          &0x30,%d1               # keep rnd mode only
8157         fmov.l          %d1,%fpcr               # set FPCR
8158
8159         fmul.x          FP_SCR0(%a6),%fp0       # execute multiply
8160
8161         fmov.l          &0x0,%fpcr              # clear FPCR
8162         bra.b           fmul_ovfl_ena_cont
8163
8164 #
8165 # may OVERFLOW:
8166 # - the result of the multiply operation MAY overflow.
8167 # - do the multiply to the proper precision and rounding mode in order to
8168 # set the inexact bits.
8169 # - calculate the default result and return it in fp0.
8170 #
8171 fmul_may_ovfl:
8172         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
8173
8174         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
8175         fmov.l          &0x0,%fpsr              # clear FPSR
8176
8177         fmul.x          FP_SCR0(%a6),%fp0       # execute multiply
8178
8179         fmov.l          %fpsr,%d1               # save status
8180         fmov.l          &0x0,%fpcr              # clear FPCR
8181
8182         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
8183
8184         fabs.x          %fp0,%fp1               # make a copy of result
8185         fcmp.b          %fp1,&0x2               # is |result| >= 2.b?
8186         fbge.w          fmul_ovfl_tst           # yes; overflow has occurred
8187
8188 # no, it didn't overflow; we have correct result
8189         bra.w           fmul_normal_exit
8190
8191 #
8192 # UNDERFLOW:
8193 # - the result of the multiply operation is an underflow.
8194 # - do the multiply to the proper precision and rounding mode in order to
8195 # set the inexact bits.
8196 # - calculate the default result and return it in fp0.
8197 # - if overflow or inexact is enabled, we need a multiply result rounded to
8198 # extended precision. if the original operation was extended, then we have this
8199 # result. if the original operation was single or double, we have to do another
8200 # multiply using extended precision and the correct rounding mode. the result
8201 # of this operation then has its exponent scaled by -0x6000 to create the
8202 # exceptional operand.
8203 #
8204 fmul_unfl:
8205         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8206
8207 # for fun, let's use only extended precision, round to zero. then, let
8208 # the unf_res() routine figure out all the rest.
8209 # will we get the correct answer.
8210         fmovm.x         FP_SCR1(%a6),&0x80      # load dst operand
8211
8212         fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
8213         fmov.l          &0x0,%fpsr              # clear FPSR
8214
8215         fmul.x          FP_SCR0(%a6),%fp0       # execute multiply
8216
8217         fmov.l          %fpsr,%d1               # save status
8218         fmov.l          &0x0,%fpcr              # clear FPCR
8219
8220         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
8221
8222         mov.b           FPCR_ENABLE(%a6),%d1
8223         andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
8224         bne.b           fmul_unfl_ena           # yes
8225
8226 fmul_unfl_dis:
8227         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
8228
8229         lea             FP_SCR0(%a6),%a0        # pass: result addr
8230         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
8231         bsr.l           unf_res                 # calculate default result
8232         or.b            %d0,FPSR_CC(%a6)        # unf_res2 may have set 'Z'
8233         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
8234         rts
8235
8236 #
8237 # UNFL is enabled.
8238 #
8239 fmul_unfl_ena:
8240         fmovm.x         FP_SCR1(%a6),&0x40      # load dst op
8241
8242         mov.l           L_SCR3(%a6),%d1
8243         andi.b          &0xc0,%d1               # is precision extended?
8244         bne.b           fmul_unfl_ena_sd        # no, sgl or dbl
8245
8246 # if the rnd mode is anything but RZ, then we have to re-do the above
8247 # multiplication becuase we used RZ for all.
8248         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
8249
8250 fmul_unfl_ena_cont:
8251         fmov.l          &0x0,%fpsr              # clear FPSR
8252
8253         fmul.x          FP_SCR0(%a6),%fp1       # execute multiply
8254
8255         fmov.l          &0x0,%fpcr              # clear FPCR
8256
8257         fmovm.x         &0x40,FP_SCR0(%a6)      # save result to stack
8258         mov.l           %d2,-(%sp)              # save d2
8259         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
8260         mov.l           %d1,%d2                 # make a copy
8261         andi.l          &0x7fff,%d1             # strip sign
8262         andi.w          &0x8000,%d2             # keep old sign
8263         sub.l           %d0,%d1                 # add scale factor
8264         addi.l          &0x6000,%d1             # add bias
8265         andi.w          &0x7fff,%d1
8266         or.w            %d2,%d1                 # concat old sign,new exp
8267         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
8268         mov.l           (%sp)+,%d2              # restore d2
8269         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
8270         bra.w           fmul_unfl_dis
8271
8272 fmul_unfl_ena_sd:
8273         mov.l           L_SCR3(%a6),%d1
8274         andi.b          &0x30,%d1               # use only rnd mode
8275         fmov.l          %d1,%fpcr               # set FPCR
8276
8277         bra.b           fmul_unfl_ena_cont
8278
8279 # MAY UNDERFLOW:
8280 # -use the correct rounding mode and precision. this code favors operations
8281 # that do not underflow.
8282 fmul_may_unfl:
8283         fmovm.x         FP_SCR1(%a6),&0x80      # load dst operand
8284
8285         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
8286         fmov.l          &0x0,%fpsr              # clear FPSR
8287
8288         fmul.x          FP_SCR0(%a6),%fp0       # execute multiply
8289
8290         fmov.l          %fpsr,%d1               # save status
8291         fmov.l          &0x0,%fpcr              # clear FPCR
8292
8293         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
8294
8295         fabs.x          %fp0,%fp1               # make a copy of result
8296         fcmp.b          %fp1,&0x2               # is |result| > 2.b?
8297         fbgt.w          fmul_normal_exit        # no; no underflow occurred
8298         fblt.w          fmul_unfl               # yes; underflow occurred
8299
8300 #
8301 # we still don't know if underflow occurred. result is ~ equal to 2. but,
8302 # we don't know if the result was an underflow that rounded up to a 2 or
8303 # a normalized number that rounded down to a 2. so, redo the entire operation
8304 # using RZ as the rounding mode to see what the pre-rounded result is.
8305 # this case should be relatively rare.
8306 #
8307         fmovm.x         FP_SCR1(%a6),&0x40      # load dst operand
8308
8309         mov.l           L_SCR3(%a6),%d1
8310         andi.b          &0xc0,%d1               # keep rnd prec
8311         ori.b           &rz_mode*0x10,%d1       # insert RZ
8312
8313         fmov.l          %d1,%fpcr               # set FPCR
8314         fmov.l          &0x0,%fpsr              # clear FPSR
8315
8316         fmul.x          FP_SCR0(%a6),%fp1       # execute multiply
8317
8318         fmov.l          &0x0,%fpcr              # clear FPCR
8319         fabs.x          %fp1                    # make absolute value
8320         fcmp.b          %fp1,&0x2               # is |result| < 2.b?
8321         fbge.w          fmul_normal_exit        # no; no underflow occurred
8322         bra.w           fmul_unfl               # yes, underflow occurred
8323
8324 ################################################################################
8325
8326 #
8327 # Multiply: inputs are not both normalized; what are they?
8328 #
8329 fmul_not_norm:
8330         mov.w           (tbl_fmul_op.b,%pc,%d1.w*2),%d1
8331         jmp             (tbl_fmul_op.b,%pc,%d1.w)
8332
8333         swbeg           &48
8334 tbl_fmul_op:
8335         short           fmul_norm       - tbl_fmul_op # NORM x NORM
8336         short           fmul_zero       - tbl_fmul_op # NORM x ZERO
8337         short           fmul_inf_src    - tbl_fmul_op # NORM x INF
8338         short           fmul_res_qnan   - tbl_fmul_op # NORM x QNAN
8339         short           fmul_norm       - tbl_fmul_op # NORM x DENORM
8340         short           fmul_res_snan   - tbl_fmul_op # NORM x SNAN
8341         short           tbl_fmul_op     - tbl_fmul_op #
8342         short           tbl_fmul_op     - tbl_fmul_op #
8343
8344         short           fmul_zero       - tbl_fmul_op # ZERO x NORM
8345         short           fmul_zero       - tbl_fmul_op # ZERO x ZERO
8346         short           fmul_res_operr  - tbl_fmul_op # ZERO x INF
8347         short           fmul_res_qnan   - tbl_fmul_op # ZERO x QNAN
8348         short           fmul_zero       - tbl_fmul_op # ZERO x DENORM
8349         short           fmul_res_snan   - tbl_fmul_op # ZERO x SNAN
8350         short           tbl_fmul_op     - tbl_fmul_op #
8351         short           tbl_fmul_op     - tbl_fmul_op #
8352
8353         short           fmul_inf_dst    - tbl_fmul_op # INF x NORM
8354         short           fmul_res_operr  - tbl_fmul_op # INF x ZERO
8355         short           fmul_inf_dst    - tbl_fmul_op # INF x INF
8356         short           fmul_res_qnan   - tbl_fmul_op # INF x QNAN
8357         short           fmul_inf_dst    - tbl_fmul_op # INF x DENORM
8358         short           fmul_res_snan   - tbl_fmul_op # INF x SNAN
8359         short           tbl_fmul_op     - tbl_fmul_op #
8360         short           tbl_fmul_op     - tbl_fmul_op #
8361
8362         short           fmul_res_qnan   - tbl_fmul_op # QNAN x NORM
8363         short           fmul_res_qnan   - tbl_fmul_op # QNAN x ZERO
8364         short           fmul_res_qnan   - tbl_fmul_op # QNAN x INF
8365         short           fmul_res_qnan   - tbl_fmul_op # QNAN x QNAN
8366         short           fmul_res_qnan   - tbl_fmul_op # QNAN x DENORM
8367         short           fmul_res_snan   - tbl_fmul_op # QNAN x SNAN
8368         short           tbl_fmul_op     - tbl_fmul_op #
8369         short           tbl_fmul_op     - tbl_fmul_op #
8370
8371         short           fmul_norm       - tbl_fmul_op # NORM x NORM
8372         short           fmul_zero       - tbl_fmul_op # NORM x ZERO
8373         short           fmul_inf_src    - tbl_fmul_op # NORM x INF
8374         short           fmul_res_qnan   - tbl_fmul_op # NORM x QNAN
8375         short           fmul_norm       - tbl_fmul_op # NORM x DENORM
8376         short           fmul_res_snan   - tbl_fmul_op # NORM x SNAN
8377         short           tbl_fmul_op     - tbl_fmul_op #
8378         short           tbl_fmul_op     - tbl_fmul_op #
8379
8380         short           fmul_res_snan   - tbl_fmul_op # SNAN x NORM
8381         short           fmul_res_snan   - tbl_fmul_op # SNAN x ZERO
8382         short           fmul_res_snan   - tbl_fmul_op # SNAN x INF
8383         short           fmul_res_snan   - tbl_fmul_op # SNAN x QNAN
8384         short           fmul_res_snan   - tbl_fmul_op # SNAN x DENORM
8385         short           fmul_res_snan   - tbl_fmul_op # SNAN x SNAN
8386         short           tbl_fmul_op     - tbl_fmul_op #
8387         short           tbl_fmul_op     - tbl_fmul_op #
8388
8389 fmul_res_operr:
8390         bra.l           res_operr
8391 fmul_res_snan:
8392         bra.l           res_snan
8393 fmul_res_qnan:
8394         bra.l           res_qnan
8395
8396 #
8397 # Multiply: (Zero x Zero) || (Zero x norm) || (Zero x denorm)
8398 #
8399         global          fmul_zero               # global for fsglmul
8400 fmul_zero:
8401         mov.b           SRC_EX(%a0),%d0         # exclusive or the signs
8402         mov.b           DST_EX(%a1),%d1
8403         eor.b           %d0,%d1
8404         bpl.b           fmul_zero_p             # result ZERO is pos.
8405 fmul_zero_n:
8406         fmov.s          &0x80000000,%fp0        # load -ZERO
8407         mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
8408         rts
8409 fmul_zero_p:
8410         fmov.s          &0x00000000,%fp0        # load +ZERO
8411         mov.b           &z_bmask,FPSR_CC(%a6)   # set Z
8412         rts
8413
8414 #
8415 # Multiply: (inf x inf) || (inf x norm) || (inf x denorm)
8416 #
8417 # Note: The j-bit for an infinity is a don't-care. However, to be
8418 # strictly compatible w/ the 68881/882, we make sure to return an
8419 # INF w/ the j-bit set if the input INF j-bit was set. Destination
8420 # INFs take priority.
8421 #
8422         global          fmul_inf_dst            # global for fsglmul
8423 fmul_inf_dst:
8424         fmovm.x         DST(%a1),&0x80          # return INF result in fp0
8425         mov.b           SRC_EX(%a0),%d0         # exclusive or the signs
8426         mov.b           DST_EX(%a1),%d1
8427         eor.b           %d0,%d1
8428         bpl.b           fmul_inf_dst_p          # result INF is pos.
8429 fmul_inf_dst_n:
8430         fabs.x          %fp0                    # clear result sign
8431         fneg.x          %fp0                    # set result sign
8432         mov.b           &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
8433         rts
8434 fmul_inf_dst_p:
8435         fabs.x          %fp0                    # clear result sign
8436         mov.b           &inf_bmask,FPSR_CC(%a6) # set INF
8437         rts
8438
8439         global          fmul_inf_src            # global for fsglmul
8440 fmul_inf_src:
8441         fmovm.x         SRC(%a0),&0x80          # return INF result in fp0
8442         mov.b           SRC_EX(%a0),%d0         # exclusive or the signs
8443         mov.b           DST_EX(%a1),%d1
8444         eor.b           %d0,%d1
8445         bpl.b           fmul_inf_dst_p          # result INF is pos.
8446         bra.b           fmul_inf_dst_n
8447
8448 #########################################################################
8449 # XDEF **************************************************************** #
8450 #       fin(): emulates the fmove instruction                           #
8451 #       fsin(): emulates the fsmove instruction                         #
8452 #       fdin(): emulates the fdmove instruction                         #
8453 #                                                                       #
8454 # XREF **************************************************************** #
8455 #       norm() - normalize mantissa for EXOP on denorm                  #
8456 #       scale_to_zero_src() - scale src exponent to zero                #
8457 #       ovf_res() - return default overflow result                      #
8458 #       unf_res() - return default underflow result                     #
8459 #       res_qnan_1op() - return QNAN result                             #
8460 #       res_snan_1op() - return SNAN result                             #
8461 #                                                                       #
8462 # INPUT *************************************************************** #
8463 #       a0 = pointer to extended precision source operand               #
8464 #       d0 = round prec/mode                                            #
8465 #                                                                       #
8466 # OUTPUT ************************************************************** #
8467 #       fp0 = result                                                    #
8468 #       fp1 = EXOP (if exception occurred)                              #
8469 #                                                                       #
8470 # ALGORITHM *********************************************************** #
8471 #       Handle NANs, infinities, and zeroes as special cases. Divide    #
8472 # norms into extended, single, and double precision.                    #
8473 #       Norms can be emulated w/ a regular fmove instruction. For       #
8474 # sgl/dbl, must scale exponent and perform an "fmove". Check to see     #
8475 # if the result would have overflowed/underflowed. If so, use unf_res() #
8476 # or ovf_res() to return the default result. Also return EXOP if        #
8477 # exception is enabled. If no exception, return the default result.     #
8478 #       Unnorms don't pass through here.                                #
8479 #                                                                       #
8480 #########################################################################
8481
8482         global          fsin
8483 fsin:
8484         andi.b          &0x30,%d0               # clear rnd prec
8485         ori.b           &s_mode*0x10,%d0        # insert sgl precision
8486         bra.b           fin
8487
8488         global          fdin
8489 fdin:
8490         andi.b          &0x30,%d0               # clear rnd prec
8491         ori.b           &d_mode*0x10,%d0        # insert dbl precision
8492
8493         global          fin
8494 fin:
8495         mov.l           %d0,L_SCR3(%a6)         # store rnd info
8496
8497         mov.b           STAG(%a6),%d1           # fetch src optype tag
8498         bne.w           fin_not_norm            # optimize on non-norm input
8499
8500 #
8501 # FP MOVE IN: NORMs and DENORMs ONLY!
8502 #
8503 fin_norm:
8504         andi.b          &0xc0,%d0               # is precision extended?
8505         bne.w           fin_not_ext             # no, so go handle dbl or sgl
8506
8507 #
8508 # precision selected is extended. so...we cannot get an underflow
8509 # or overflow because of rounding to the correct precision. so...
8510 # skip the scaling and unscaling...
8511 #
8512         tst.b           SRC_EX(%a0)             # is the operand negative?
8513         bpl.b           fin_norm_done           # no
8514         bset            &neg_bit,FPSR_CC(%a6)   # yes, so set 'N' ccode bit
8515 fin_norm_done:
8516         fmovm.x         SRC(%a0),&0x80          # return result in fp0
8517         rts
8518
8519 #
8520 # for an extended precision DENORM, the UNFL exception bit is set
8521 # the accrued bit is NOT set in this instance(no inexactness!)
8522 #
8523 fin_denorm:
8524         andi.b          &0xc0,%d0               # is precision extended?
8525         bne.w           fin_not_ext             # no, so go handle dbl or sgl
8526
8527         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8528         tst.b           SRC_EX(%a0)             # is the operand negative?
8529         bpl.b           fin_denorm_done         # no
8530         bset            &neg_bit,FPSR_CC(%a6)   # yes, so set 'N' ccode bit
8531 fin_denorm_done:
8532         fmovm.x         SRC(%a0),&0x80          # return result in fp0
8533         btst            &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
8534         bne.b           fin_denorm_unfl_ena     # yes
8535         rts
8536
8537 #
8538 # the input is an extended DENORM and underflow is enabled in the FPCR.
8539 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
8540 # exponent and insert back into the operand.
8541 #
8542 fin_denorm_unfl_ena:
8543         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
8544         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
8545         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
8546         lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
8547         bsr.l           norm                    # normalize result
8548         neg.w           %d0                     # new exponent = -(shft val)
8549         addi.w          &0x6000,%d0             # add new bias to exponent
8550         mov.w           FP_SCR0_EX(%a6),%d1     # fetch old sign,exp
8551         andi.w          &0x8000,%d1             # keep old sign
8552         andi.w          &0x7fff,%d0             # clear sign position
8553         or.w            %d1,%d0                 # concat new exo,old sign
8554         mov.w           %d0,FP_SCR0_EX(%a6)     # insert new exponent
8555         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
8556         rts
8557
8558 #
8559 # operand is to be rounded to single or double precision
8560 #
8561 fin_not_ext:
8562         cmpi.b          %d0,&s_mode*0x10        # separate sgl/dbl prec
8563         bne.b           fin_dbl
8564
8565 #
8566 # operand is to be rounded to single precision
8567 #
8568 fin_sgl:
8569         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
8570         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
8571         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
8572         bsr.l           scale_to_zero_src       # calculate scale factor
8573
8574         cmpi.l          %d0,&0x3fff-0x3f80      # will move in underflow?
8575         bge.w           fin_sd_unfl             # yes; go handle underflow
8576         cmpi.l          %d0,&0x3fff-0x407e      # will move in overflow?
8577         beq.w           fin_sd_may_ovfl         # maybe; go check
8578         blt.w           fin_sd_ovfl             # yes; go handle overflow
8579
8580 #
8581 # operand will NOT overflow or underflow when moved into the fp reg file
8582 #
8583 fin_sd_normal:
8584         fmov.l          &0x0,%fpsr              # clear FPSR
8585         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
8586
8587         fmov.x          FP_SCR0(%a6),%fp0       # perform move
8588
8589         fmov.l          %fpsr,%d1               # save FPSR
8590         fmov.l          &0x0,%fpcr              # clear FPCR
8591
8592         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
8593
8594 fin_sd_normal_exit:
8595         mov.l           %d2,-(%sp)              # save d2
8596         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
8597         mov.w           FP_SCR0_EX(%a6),%d1     # load {sgn,exp}
8598         mov.w           %d1,%d2                 # make a copy
8599         andi.l          &0x7fff,%d1             # strip sign
8600         sub.l           %d0,%d1                 # add scale factor
8601         andi.w          &0x8000,%d2             # keep old sign
8602         or.w            %d1,%d2                 # concat old sign,new exponent
8603         mov.w           %d2,FP_SCR0_EX(%a6)     # insert new exponent
8604         mov.l           (%sp)+,%d2              # restore d2
8605         fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
8606         rts
8607
8608 #
8609 # operand is to be rounded to double precision
8610 #
8611 fin_dbl:
8612         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
8613         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
8614         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
8615         bsr.l           scale_to_zero_src       # calculate scale factor
8616
8617         cmpi.l          %d0,&0x3fff-0x3c00      # will move in underflow?
8618         bge.w           fin_sd_unfl             # yes; go handle underflow
8619         cmpi.l          %d0,&0x3fff-0x43fe      # will move in overflow?
8620         beq.w           fin_sd_may_ovfl         # maybe; go check
8621         blt.w           fin_sd_ovfl             # yes; go handle overflow
8622         bra.w           fin_sd_normal           # no; ho handle normalized op
8623
8624 #
8625 # operand WILL underflow when moved in to the fp register file
8626 #
8627 fin_sd_unfl:
8628         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8629
8630         tst.b           FP_SCR0_EX(%a6)         # is operand negative?
8631         bpl.b           fin_sd_unfl_tst
8632         bset            &neg_bit,FPSR_CC(%a6)   # set 'N' ccode bit
8633
8634 # if underflow or inexact is enabled, then go calculate the EXOP first.
8635 fin_sd_unfl_tst:
8636         mov.b           FPCR_ENABLE(%a6),%d1
8637         andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
8638         bne.b           fin_sd_unfl_ena         # yes
8639
8640 fin_sd_unfl_dis:
8641         lea             FP_SCR0(%a6),%a0        # pass: result addr
8642         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
8643         bsr.l           unf_res                 # calculate default result
8644         or.b            %d0,FPSR_CC(%a6)        # unf_res may have set 'Z'
8645         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
8646         rts
8647
8648 #
8649 # operand will underflow AND underflow or inexact is enabled.
8650 # Therefore, we must return the result rounded to extended precision.
8651 #
8652 fin_sd_unfl_ena:
8653         mov.l           FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
8654         mov.l           FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
8655         mov.w           FP_SCR0_EX(%a6),%d1     # load current exponent
8656
8657         mov.l           %d2,-(%sp)              # save d2
8658         mov.w           %d1,%d2                 # make a copy
8659         andi.l          &0x7fff,%d1             # strip sign
8660         sub.l           %d0,%d1                 # subtract scale factor
8661         andi.w          &0x8000,%d2             # extract old sign
8662         addi.l          &0x6000,%d1             # add new bias
8663         andi.w          &0x7fff,%d1
8664         or.w            %d1,%d2                 # concat old sign,new exp
8665         mov.w           %d2,FP_SCR1_EX(%a6)     # insert new exponent
8666         fmovm.x         FP_SCR1(%a6),&0x40      # return EXOP in fp1
8667         mov.l           (%sp)+,%d2              # restore d2
8668         bra.b           fin_sd_unfl_dis
8669
8670 #
8671 # operand WILL overflow.
8672 #
8673 fin_sd_ovfl:
8674         fmov.l          &0x0,%fpsr              # clear FPSR
8675         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
8676
8677         fmov.x          FP_SCR0(%a6),%fp0       # perform move
8678
8679         fmov.l          &0x0,%fpcr              # clear FPCR
8680         fmov.l          %fpsr,%d1               # save FPSR
8681
8682         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
8683
8684 fin_sd_ovfl_tst:
8685         or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8686
8687         mov.b           FPCR_ENABLE(%a6),%d1
8688         andi.b          &0x13,%d1               # is OVFL or INEX enabled?
8689         bne.b           fin_sd_ovfl_ena         # yes
8690
8691 #
8692 # OVFL is not enabled; therefore, we must create the default result by
8693 # calling ovf_res().
8694 #
8695 fin_sd_ovfl_dis:
8696         btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
8697         sne             %d1                     # set sign param accordingly
8698         mov.l           L_SCR3(%a6),%d0         # pass: prec,mode
8699         bsr.l           ovf_res                 # calculate default result
8700         or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
8701         fmovm.x         (%a0),&0x80             # return default result in fp0
8702         rts
8703
8704 #
8705 # OVFL is enabled.
8706 # the INEX2 bit has already been updated by the round to the correct precision.
8707 # now, round to extended(and don't alter the FPSR).
8708 #
8709 fin_sd_ovfl_ena:
8710         mov.l           %d2,-(%sp)              # save d2
8711         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
8712         mov.l           %d1,%d2                 # make a copy
8713         andi.l          &0x7fff,%d1             # strip sign
8714         andi.w          &0x8000,%d2             # keep old sign
8715         sub.l           %d0,%d1                 # add scale factor
8716         sub.l           &0x6000,%d1             # subtract bias
8717         andi.w          &0x7fff,%d1
8718         or.w            %d2,%d1
8719         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
8720         mov.l           (%sp)+,%d2              # restore d2
8721         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
8722         bra.b           fin_sd_ovfl_dis
8723
8724 #
8725 # the move in MAY overflow. so...
8726 #
8727 fin_sd_may_ovfl:
8728         fmov.l          &0x0,%fpsr              # clear FPSR
8729         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
8730
8731         fmov.x          FP_SCR0(%a6),%fp0       # perform the move
8732
8733         fmov.l          %fpsr,%d1               # save status
8734         fmov.l          &0x0,%fpcr              # clear FPCR
8735
8736         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
8737
8738         fabs.x          %fp0,%fp1               # make a copy of result
8739         fcmp.b          %fp1,&0x2               # is |result| >= 2.b?
8740         fbge.w          fin_sd_ovfl_tst         # yes; overflow has occurred
8741
8742 # no, it didn't overflow; we have correct result
8743         bra.w           fin_sd_normal_exit
8744
8745 ##########################################################################
8746
8747 #
8748 # operand is not a NORM: check its optype and branch accordingly
8749 #
8750 fin_not_norm:
8751         cmpi.b          %d1,&DENORM             # weed out DENORM
8752         beq.w           fin_denorm
8753         cmpi.b          %d1,&SNAN               # weed out SNANs
8754         beq.l           res_snan_1op
8755         cmpi.b          %d1,&QNAN               # weed out QNANs
8756         beq.l           res_qnan_1op
8757
8758 #
8759 # do the fmove in; at this point, only possible ops are ZERO and INF.
8760 # use fmov to determine ccodes.
8761 # prec:mode should be zero at this point but it won't affect answer anyways.
8762 #
8763         fmov.x          SRC(%a0),%fp0           # do fmove in
8764         fmov.l          %fpsr,%d0               # no exceptions possible
8765         rol.l           &0x8,%d0                # put ccodes in lo byte
8766         mov.b           %d0,FPSR_CC(%a6)        # insert correct ccodes
8767         rts
8768
8769 #########################################################################
8770 # XDEF **************************************************************** #
8771 #       fdiv(): emulates the fdiv instruction                           #
8772 #       fsdiv(): emulates the fsdiv instruction                         #
8773 #       fddiv(): emulates the fddiv instruction                         #
8774 #                                                                       #
8775 # XREF **************************************************************** #
8776 #       scale_to_zero_src() - scale src exponent to zero                #
8777 #       scale_to_zero_dst() - scale dst exponent to zero                #
8778 #       unf_res() - return default underflow result                     #
8779 #       ovf_res() - return default overflow result                      #
8780 #       res_qnan() - return QNAN result                                 #
8781 #       res_snan() - return SNAN result                                 #
8782 #                                                                       #
8783 # INPUT *************************************************************** #
8784 #       a0 = pointer to extended precision source operand               #
8785 #       a1 = pointer to extended precision destination operand          #
8786 #       d0  rnd prec,mode                                               #
8787 #                                                                       #
8788 # OUTPUT ************************************************************** #
8789 #       fp0 = result                                                    #
8790 #       fp1 = EXOP (if exception occurred)                              #
8791 #                                                                       #
8792 # ALGORITHM *********************************************************** #
8793 #       Handle NANs, infinities, and zeroes as special cases. Divide    #
8794 # norms/denorms into ext/sgl/dbl precision.                             #
8795 #       For norms/denorms, scale the exponents such that a divide       #
8796 # instruction won't cause an exception. Use the regular fdiv to         #
8797 # compute a result. Check if the regular operands would have taken      #
8798 # an exception. If so, return the default overflow/underflow result     #
8799 # and return the EXOP if exceptions are enabled. Else, scale the        #
8800 # result operand to the proper exponent.                                #
8801 #                                                                       #
8802 #########################################################################
8803
8804         align           0x10
8805 tbl_fdiv_unfl:
8806         long            0x3fff - 0x0000         # ext_unfl
8807         long            0x3fff - 0x3f81         # sgl_unfl
8808         long            0x3fff - 0x3c01         # dbl_unfl
8809
8810 tbl_fdiv_ovfl:
8811         long            0x3fff - 0x7ffe         # ext overflow exponent
8812         long            0x3fff - 0x407e         # sgl overflow exponent
8813         long            0x3fff - 0x43fe         # dbl overflow exponent
8814
8815         global          fsdiv
8816 fsdiv:
8817         andi.b          &0x30,%d0               # clear rnd prec
8818         ori.b           &s_mode*0x10,%d0        # insert sgl prec
8819         bra.b           fdiv
8820
8821         global          fddiv
8822 fddiv:
8823         andi.b          &0x30,%d0               # clear rnd prec
8824         ori.b           &d_mode*0x10,%d0        # insert dbl prec
8825
8826         global          fdiv
8827 fdiv:
8828         mov.l           %d0,L_SCR3(%a6)         # store rnd info
8829
8830         clr.w           %d1
8831         mov.b           DTAG(%a6),%d1
8832         lsl.b           &0x3,%d1
8833         or.b            STAG(%a6),%d1           # combine src tags
8834
8835         bne.w           fdiv_not_norm           # optimize on non-norm input
8836
8837 #
8838 # DIVIDE: NORMs and DENORMs ONLY!
8839 #
8840 fdiv_norm:
8841         mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
8842         mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
8843         mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
8844
8845         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
8846         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
8847         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
8848
8849         bsr.l           scale_to_zero_src       # scale src exponent
8850         mov.l           %d0,-(%sp)              # save scale factor 1
8851
8852         bsr.l           scale_to_zero_dst       # scale dst exponent
8853
8854         neg.l           (%sp)                   # SCALE FACTOR = scale1 - scale2
8855         add.l           %d0,(%sp)
8856
8857         mov.w           2+L_SCR3(%a6),%d1       # fetch precision
8858         lsr.b           &0x6,%d1                # shift to lo bits
8859         mov.l           (%sp)+,%d0              # load S.F.
8860         cmp.l           %d0,(tbl_fdiv_ovfl.b,%pc,%d1.w*4) # will result overflow?
8861         ble.w           fdiv_may_ovfl           # result will overflow
8862
8863         cmp.l           %d0,(tbl_fdiv_unfl.w,%pc,%d1.w*4) # will result underflow?
8864         beq.w           fdiv_may_unfl           # maybe
8865         bgt.w           fdiv_unfl               # yes; go handle underflow
8866
8867 fdiv_normal:
8868         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
8869
8870         fmov.l          L_SCR3(%a6),%fpcr       # save FPCR
8871         fmov.l          &0x0,%fpsr              # clear FPSR
8872
8873         fdiv.x          FP_SCR0(%a6),%fp0       # perform divide
8874
8875         fmov.l          %fpsr,%d1               # save FPSR
8876         fmov.l          &0x0,%fpcr              # clear FPCR
8877
8878         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
8879
8880 fdiv_normal_exit:
8881         fmovm.x         &0x80,FP_SCR0(%a6)      # store result on stack
8882         mov.l           %d2,-(%sp)              # store d2
8883         mov.w           FP_SCR0_EX(%a6),%d1     # load {sgn,exp}
8884         mov.l           %d1,%d2                 # make a copy
8885         andi.l          &0x7fff,%d1             # strip sign
8886         andi.w          &0x8000,%d2             # keep old sign
8887         sub.l           %d0,%d1                 # add scale factor
8888         or.w            %d2,%d1                 # concat old sign,new exp
8889         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
8890         mov.l           (%sp)+,%d2              # restore d2
8891         fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
8892         rts
8893
8894 tbl_fdiv_ovfl2:
8895         long            0x7fff
8896         long            0x407f
8897         long            0x43ff
8898
8899 fdiv_no_ovfl:
8900         mov.l           (%sp)+,%d0              # restore scale factor
8901         bra.b           fdiv_normal_exit
8902
8903 fdiv_may_ovfl:
8904         mov.l           %d0,-(%sp)              # save scale factor
8905
8906         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
8907
8908         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
8909         fmov.l          &0x0,%fpsr              # set FPSR
8910
8911         fdiv.x          FP_SCR0(%a6),%fp0       # execute divide
8912
8913         fmov.l          %fpsr,%d0
8914         fmov.l          &0x0,%fpcr
8915
8916         or.l            %d0,USER_FPSR(%a6)      # save INEX,N
8917
8918         fmovm.x         &0x01,-(%sp)            # save result to stack
8919         mov.w           (%sp),%d0               # fetch new exponent
8920         add.l           &0xc,%sp                # clear result from stack
8921         andi.l          &0x7fff,%d0             # strip sign
8922         sub.l           (%sp),%d0               # add scale factor
8923         cmp.l           %d0,(tbl_fdiv_ovfl2.b,%pc,%d1.w*4)
8924         blt.b           fdiv_no_ovfl
8925         mov.l           (%sp)+,%d0
8926
8927 fdiv_ovfl_tst:
8928         or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
8929
8930         mov.b           FPCR_ENABLE(%a6),%d1
8931         andi.b          &0x13,%d1               # is OVFL or INEX enabled?
8932         bne.b           fdiv_ovfl_ena           # yes
8933
8934 fdiv_ovfl_dis:
8935         btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
8936         sne             %d1                     # set sign param accordingly
8937         mov.l           L_SCR3(%a6),%d0         # pass prec:rnd
8938         bsr.l           ovf_res                 # calculate default result
8939         or.b            %d0,FPSR_CC(%a6)        # set INF if applicable
8940         fmovm.x         (%a0),&0x80             # return default result in fp0
8941         rts
8942
8943 fdiv_ovfl_ena:
8944         mov.l           L_SCR3(%a6),%d1
8945         andi.b          &0xc0,%d1               # is precision extended?
8946         bne.b           fdiv_ovfl_ena_sd        # no, do sgl or dbl
8947
8948 fdiv_ovfl_ena_cont:
8949         fmovm.x         &0x80,FP_SCR0(%a6)      # move result to stack
8950
8951         mov.l           %d2,-(%sp)              # save d2
8952         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
8953         mov.w           %d1,%d2                 # make a copy
8954         andi.l          &0x7fff,%d1             # strip sign
8955         sub.l           %d0,%d1                 # add scale factor
8956         subi.l          &0x6000,%d1             # subtract bias
8957         andi.w          &0x7fff,%d1             # clear sign bit
8958         andi.w          &0x8000,%d2             # keep old sign
8959         or.w            %d2,%d1                 # concat old sign,new exp
8960         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
8961         mov.l           (%sp)+,%d2              # restore d2
8962         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
8963         bra.b           fdiv_ovfl_dis
8964
8965 fdiv_ovfl_ena_sd:
8966         fmovm.x         FP_SCR1(%a6),&0x80      # load dst operand
8967
8968         mov.l           L_SCR3(%a6),%d1
8969         andi.b          &0x30,%d1               # keep rnd mode
8970         fmov.l          %d1,%fpcr               # set FPCR
8971
8972         fdiv.x          FP_SCR0(%a6),%fp0       # execute divide
8973
8974         fmov.l          &0x0,%fpcr              # clear FPCR
8975         bra.b           fdiv_ovfl_ena_cont
8976
8977 fdiv_unfl:
8978         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
8979
8980         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
8981
8982         fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
8983         fmov.l          &0x0,%fpsr              # clear FPSR
8984
8985         fdiv.x          FP_SCR0(%a6),%fp0       # execute divide
8986
8987         fmov.l          %fpsr,%d1               # save status
8988         fmov.l          &0x0,%fpcr              # clear FPCR
8989
8990         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
8991
8992         mov.b           FPCR_ENABLE(%a6),%d1
8993         andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
8994         bne.b           fdiv_unfl_ena           # yes
8995
8996 fdiv_unfl_dis:
8997         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
8998
8999         lea             FP_SCR0(%a6),%a0        # pass: result addr
9000         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
9001         bsr.l           unf_res                 # calculate default result
9002         or.b            %d0,FPSR_CC(%a6)        # 'Z' may have been set
9003         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
9004         rts
9005
9006 #
9007 # UNFL is enabled.
9008 #
9009 fdiv_unfl_ena:
9010         fmovm.x         FP_SCR1(%a6),&0x40      # load dst op
9011
9012         mov.l           L_SCR3(%a6),%d1
9013         andi.b          &0xc0,%d1               # is precision extended?
9014         bne.b           fdiv_unfl_ena_sd        # no, sgl or dbl
9015
9016         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
9017
9018 fdiv_unfl_ena_cont:
9019         fmov.l          &0x0,%fpsr              # clear FPSR
9020
9021         fdiv.x          FP_SCR0(%a6),%fp1       # execute divide
9022
9023         fmov.l          &0x0,%fpcr              # clear FPCR
9024
9025         fmovm.x         &0x40,FP_SCR0(%a6)      # save result to stack
9026         mov.l           %d2,-(%sp)              # save d2
9027         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
9028         mov.l           %d1,%d2                 # make a copy
9029         andi.l          &0x7fff,%d1             # strip sign
9030         andi.w          &0x8000,%d2             # keep old sign
9031         sub.l           %d0,%d1                 # add scale factoer
9032         addi.l          &0x6000,%d1             # add bias
9033         andi.w          &0x7fff,%d1
9034         or.w            %d2,%d1                 # concat old sign,new exp
9035         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exp
9036         mov.l           (%sp)+,%d2              # restore d2
9037         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
9038         bra.w           fdiv_unfl_dis
9039
9040 fdiv_unfl_ena_sd:
9041         mov.l           L_SCR3(%a6),%d1
9042         andi.b          &0x30,%d1               # use only rnd mode
9043         fmov.l          %d1,%fpcr               # set FPCR
9044
9045         bra.b           fdiv_unfl_ena_cont
9046
9047 #
9048 # the divide operation MAY underflow:
9049 #
9050 fdiv_may_unfl:
9051         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
9052
9053         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
9054         fmov.l          &0x0,%fpsr              # clear FPSR
9055
9056         fdiv.x          FP_SCR0(%a6),%fp0       # execute divide
9057
9058         fmov.l          %fpsr,%d1               # save status
9059         fmov.l          &0x0,%fpcr              # clear FPCR
9060
9061         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
9062
9063         fabs.x          %fp0,%fp1               # make a copy of result
9064         fcmp.b          %fp1,&0x1               # is |result| > 1.b?
9065         fbgt.w          fdiv_normal_exit        # no; no underflow occurred
9066         fblt.w          fdiv_unfl               # yes; underflow occurred
9067
9068 #
9069 # we still don't know if underflow occurred. result is ~ equal to 1. but,
9070 # we don't know if the result was an underflow that rounded up to a 1
9071 # or a normalized number that rounded down to a 1. so, redo the entire
9072 # operation using RZ as the rounding mode to see what the pre-rounded
9073 # result is. this case should be relatively rare.
9074 #
9075         fmovm.x         FP_SCR1(%a6),&0x40      # load dst op into fp1
9076
9077         mov.l           L_SCR3(%a6),%d1
9078         andi.b          &0xc0,%d1               # keep rnd prec
9079         ori.b           &rz_mode*0x10,%d1       # insert RZ
9080
9081         fmov.l          %d1,%fpcr               # set FPCR
9082         fmov.l          &0x0,%fpsr              # clear FPSR
9083
9084         fdiv.x          FP_SCR0(%a6),%fp1       # execute divide
9085
9086         fmov.l          &0x0,%fpcr              # clear FPCR
9087         fabs.x          %fp1                    # make absolute value
9088         fcmp.b          %fp1,&0x1               # is |result| < 1.b?
9089         fbge.w          fdiv_normal_exit        # no; no underflow occurred
9090         bra.w           fdiv_unfl               # yes; underflow occurred
9091
9092 ############################################################################
9093
9094 #
9095 # Divide: inputs are not both normalized; what are they?
9096 #
9097 fdiv_not_norm:
9098         mov.w           (tbl_fdiv_op.b,%pc,%d1.w*2),%d1
9099         jmp             (tbl_fdiv_op.b,%pc,%d1.w*1)
9100
9101         swbeg           &48
9102 tbl_fdiv_op:
9103         short           fdiv_norm       - tbl_fdiv_op # NORM / NORM
9104         short           fdiv_inf_load   - tbl_fdiv_op # NORM / ZERO
9105         short           fdiv_zero_load  - tbl_fdiv_op # NORM / INF
9106         short           fdiv_res_qnan   - tbl_fdiv_op # NORM / QNAN
9107         short           fdiv_norm       - tbl_fdiv_op # NORM / DENORM
9108         short           fdiv_res_snan   - tbl_fdiv_op # NORM / SNAN
9109         short           tbl_fdiv_op     - tbl_fdiv_op #
9110         short           tbl_fdiv_op     - tbl_fdiv_op #
9111
9112         short           fdiv_zero_load  - tbl_fdiv_op # ZERO / NORM
9113         short           fdiv_res_operr  - tbl_fdiv_op # ZERO / ZERO
9114         short           fdiv_zero_load  - tbl_fdiv_op # ZERO / INF
9115         short           fdiv_res_qnan   - tbl_fdiv_op # ZERO / QNAN
9116         short           fdiv_zero_load  - tbl_fdiv_op # ZERO / DENORM
9117         short           fdiv_res_snan   - tbl_fdiv_op # ZERO / SNAN
9118         short           tbl_fdiv_op     - tbl_fdiv_op #
9119         short           tbl_fdiv_op     - tbl_fdiv_op #
9120
9121         short           fdiv_inf_dst    - tbl_fdiv_op # INF / NORM
9122         short           fdiv_inf_dst    - tbl_fdiv_op # INF / ZERO
9123         short           fdiv_res_operr  - tbl_fdiv_op # INF / INF
9124         short           fdiv_res_qnan   - tbl_fdiv_op # INF / QNAN
9125         short           fdiv_inf_dst    - tbl_fdiv_op # INF / DENORM
9126         short           fdiv_res_snan   - tbl_fdiv_op # INF / SNAN
9127         short           tbl_fdiv_op     - tbl_fdiv_op #
9128         short           tbl_fdiv_op     - tbl_fdiv_op #
9129
9130         short           fdiv_res_qnan   - tbl_fdiv_op # QNAN / NORM
9131         short           fdiv_res_qnan   - tbl_fdiv_op # QNAN / ZERO
9132         short           fdiv_res_qnan   - tbl_fdiv_op # QNAN / INF
9133         short           fdiv_res_qnan   - tbl_fdiv_op # QNAN / QNAN
9134         short           fdiv_res_qnan   - tbl_fdiv_op # QNAN / DENORM
9135         short           fdiv_res_snan   - tbl_fdiv_op # QNAN / SNAN
9136         short           tbl_fdiv_op     - tbl_fdiv_op #
9137         short           tbl_fdiv_op     - tbl_fdiv_op #
9138
9139         short           fdiv_norm       - tbl_fdiv_op # DENORM / NORM
9140         short           fdiv_inf_load   - tbl_fdiv_op # DENORM / ZERO
9141         short           fdiv_zero_load  - tbl_fdiv_op # DENORM / INF
9142         short           fdiv_res_qnan   - tbl_fdiv_op # DENORM / QNAN
9143         short           fdiv_norm       - tbl_fdiv_op # DENORM / DENORM
9144         short           fdiv_res_snan   - tbl_fdiv_op # DENORM / SNAN
9145         short           tbl_fdiv_op     - tbl_fdiv_op #
9146         short           tbl_fdiv_op     - tbl_fdiv_op #
9147
9148         short           fdiv_res_snan   - tbl_fdiv_op # SNAN / NORM
9149         short           fdiv_res_snan   - tbl_fdiv_op # SNAN / ZERO
9150         short           fdiv_res_snan   - tbl_fdiv_op # SNAN / INF
9151         short           fdiv_res_snan   - tbl_fdiv_op # SNAN / QNAN
9152         short           fdiv_res_snan   - tbl_fdiv_op # SNAN / DENORM
9153         short           fdiv_res_snan   - tbl_fdiv_op # SNAN / SNAN
9154         short           tbl_fdiv_op     - tbl_fdiv_op #
9155         short           tbl_fdiv_op     - tbl_fdiv_op #
9156
9157 fdiv_res_qnan:
9158         bra.l           res_qnan
9159 fdiv_res_snan:
9160         bra.l           res_snan
9161 fdiv_res_operr:
9162         bra.l           res_operr
9163
9164         global          fdiv_zero_load          # global for fsgldiv
9165 fdiv_zero_load:
9166         mov.b           SRC_EX(%a0),%d0         # result sign is exclusive
9167         mov.b           DST_EX(%a1),%d1         # or of input signs.
9168         eor.b           %d0,%d1
9169         bpl.b           fdiv_zero_load_p        # result is positive
9170         fmov.s          &0x80000000,%fp0        # load a -ZERO
9171         mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/N
9172         rts
9173 fdiv_zero_load_p:
9174         fmov.s          &0x00000000,%fp0        # load a +ZERO
9175         mov.b           &z_bmask,FPSR_CC(%a6)   # set Z
9176         rts
9177
9178 #
9179 # The destination was In Range and the source was a ZERO. The result,
9180 # Therefore, is an INF w/ the proper sign.
9181 # So, determine the sign and return a new INF (w/ the j-bit cleared).
9182 #
9183         global          fdiv_inf_load           # global for fsgldiv
9184 fdiv_inf_load:
9185         ori.w           &dz_mask+adz_mask,2+USER_FPSR(%a6) # no; set DZ/ADZ
9186         mov.b           SRC_EX(%a0),%d0         # load both signs
9187         mov.b           DST_EX(%a1),%d1
9188         eor.b           %d0,%d1
9189         bpl.b           fdiv_inf_load_p         # result is positive
9190         fmov.s          &0xff800000,%fp0        # make result -INF
9191         mov.b           &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/N
9192         rts
9193 fdiv_inf_load_p:
9194         fmov.s          &0x7f800000,%fp0        # make result +INF
9195         mov.b           &inf_bmask,FPSR_CC(%a6) # set INF
9196         rts
9197
9198 #
9199 # The destination was an INF w/ an In Range or ZERO source, the result is
9200 # an INF w/ the proper sign.
9201 # The 68881/882 returns the destination INF w/ the new sign(if the j-bit of the
9202 # dst INF is set, then then j-bit of the result INF is also set).
9203 #
9204         global          fdiv_inf_dst            # global for fsgldiv
9205 fdiv_inf_dst:
9206         mov.b           DST_EX(%a1),%d0         # load both signs
9207         mov.b           SRC_EX(%a0),%d1
9208         eor.b           %d0,%d1
9209         bpl.b           fdiv_inf_dst_p          # result is positive
9210
9211         fmovm.x         DST(%a1),&0x80          # return result in fp0
9212         fabs.x          %fp0                    # clear sign bit
9213         fneg.x          %fp0                    # set sign bit
9214         mov.b           &inf_bmask+neg_bmask,FPSR_CC(%a6) # set INF/NEG
9215         rts
9216
9217 fdiv_inf_dst_p:
9218         fmovm.x         DST(%a1),&0x80          # return result in fp0
9219         fabs.x          %fp0                    # return positive INF
9220         mov.b           &inf_bmask,FPSR_CC(%a6) # set INF
9221         rts
9222
9223 #########################################################################
9224 # XDEF **************************************************************** #
9225 #       fneg(): emulates the fneg instruction                           #
9226 #       fsneg(): emulates the fsneg instruction                         #
9227 #       fdneg(): emulates the fdneg instruction                         #
9228 #                                                                       #
9229 # XREF **************************************************************** #
9230 #       norm() - normalize a denorm to provide EXOP                     #
9231 #       scale_to_zero_src() - scale sgl/dbl source exponent             #
9232 #       ovf_res() - return default overflow result                      #
9233 #       unf_res() - return default underflow result                     #
9234 #       res_qnan_1op() - return QNAN result                             #
9235 #       res_snan_1op() - return SNAN result                             #
9236 #                                                                       #
9237 # INPUT *************************************************************** #
9238 #       a0 = pointer to extended precision source operand               #
9239 #       d0 = rnd prec,mode                                              #
9240 #                                                                       #
9241 # OUTPUT ************************************************************** #
9242 #       fp0 = result                                                    #
9243 #       fp1 = EXOP (if exception occurred)                              #
9244 #                                                                       #
9245 # ALGORITHM *********************************************************** #
9246 #       Handle NANs, zeroes, and infinities as special cases. Separate  #
9247 # norms/denorms into ext/sgl/dbl precisions. Extended precision can be  #
9248 # emulated by simply setting sign bit. Sgl/dbl operands must be scaled  #
9249 # and an actual fneg performed to see if overflow/underflow would have  #
9250 # occurred. If so, return default underflow/overflow result. Else,      #
9251 # scale the result exponent and return result. FPSR gets set based on   #
9252 # the result value.                                                     #
9253 #                                                                       #
9254 #########################################################################
9255
9256         global          fsneg
9257 fsneg:
9258         andi.b          &0x30,%d0               # clear rnd prec
9259         ori.b           &s_mode*0x10,%d0        # insert sgl precision
9260         bra.b           fneg
9261
9262         global          fdneg
9263 fdneg:
9264         andi.b          &0x30,%d0               # clear rnd prec
9265         ori.b           &d_mode*0x10,%d0        # insert dbl prec
9266
9267         global          fneg
9268 fneg:
9269         mov.l           %d0,L_SCR3(%a6)         # store rnd info
9270         mov.b           STAG(%a6),%d1
9271         bne.w           fneg_not_norm           # optimize on non-norm input
9272
9273 #
9274 # NEGATE SIGN : norms and denorms ONLY!
9275 #
9276 fneg_norm:
9277         andi.b          &0xc0,%d0               # is precision extended?
9278         bne.w           fneg_not_ext            # no; go handle sgl or dbl
9279
9280 #
9281 # precision selected is extended. so...we can not get an underflow
9282 # or overflow because of rounding to the correct precision. so...
9283 # skip the scaling and unscaling...
9284 #
9285         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
9286         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
9287         mov.w           SRC_EX(%a0),%d0
9288         eori.w          &0x8000,%d0             # negate sign
9289         bpl.b           fneg_norm_load          # sign is positive
9290         mov.b           &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9291 fneg_norm_load:
9292         mov.w           %d0,FP_SCR0_EX(%a6)
9293         fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
9294         rts
9295
9296 #
9297 # for an extended precision DENORM, the UNFL exception bit is set
9298 # the accrued bit is NOT set in this instance(no inexactness!)
9299 #
9300 fneg_denorm:
9301         andi.b          &0xc0,%d0               # is precision extended?
9302         bne.b           fneg_not_ext            # no; go handle sgl or dbl
9303
9304         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9305
9306         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
9307         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
9308         mov.w           SRC_EX(%a0),%d0
9309         eori.w          &0x8000,%d0             # negate sign
9310         bpl.b           fneg_denorm_done        # no
9311         mov.b           &neg_bmask,FPSR_CC(%a6) # yes, set 'N' ccode bit
9312 fneg_denorm_done:
9313         mov.w           %d0,FP_SCR0_EX(%a6)
9314         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
9315
9316         btst            &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
9317         bne.b           fneg_ext_unfl_ena       # yes
9318         rts
9319
9320 #
9321 # the input is an extended DENORM and underflow is enabled in the FPCR.
9322 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
9323 # exponent and insert back into the operand.
9324 #
9325 fneg_ext_unfl_ena:
9326         lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
9327         bsr.l           norm                    # normalize result
9328         neg.w           %d0                     # new exponent = -(shft val)
9329         addi.w          &0x6000,%d0             # add new bias to exponent
9330         mov.w           FP_SCR0_EX(%a6),%d1     # fetch old sign,exp
9331         andi.w          &0x8000,%d1             # keep old sign
9332         andi.w          &0x7fff,%d0             # clear sign position
9333         or.w            %d1,%d0                 # concat old sign, new exponent
9334         mov.w           %d0,FP_SCR0_EX(%a6)     # insert new exponent
9335         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
9336         rts
9337
9338 #
9339 # operand is either single or double
9340 #
9341 fneg_not_ext:
9342         cmpi.b          %d0,&s_mode*0x10        # separate sgl/dbl prec
9343         bne.b           fneg_dbl
9344
9345 #
9346 # operand is to be rounded to single precision
9347 #
9348 fneg_sgl:
9349         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
9350         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
9351         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
9352         bsr.l           scale_to_zero_src       # calculate scale factor
9353
9354         cmpi.l          %d0,&0x3fff-0x3f80      # will move in underflow?
9355         bge.w           fneg_sd_unfl            # yes; go handle underflow
9356         cmpi.l          %d0,&0x3fff-0x407e      # will move in overflow?
9357         beq.w           fneg_sd_may_ovfl        # maybe; go check
9358         blt.w           fneg_sd_ovfl            # yes; go handle overflow
9359
9360 #
9361 # operand will NOT overflow or underflow when moved in to the fp reg file
9362 #
9363 fneg_sd_normal:
9364         fmov.l          &0x0,%fpsr              # clear FPSR
9365         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
9366
9367         fneg.x          FP_SCR0(%a6),%fp0       # perform negation
9368
9369         fmov.l          %fpsr,%d1               # save FPSR
9370         fmov.l          &0x0,%fpcr              # clear FPCR
9371
9372         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
9373
9374 fneg_sd_normal_exit:
9375         mov.l           %d2,-(%sp)              # save d2
9376         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
9377         mov.w           FP_SCR0_EX(%a6),%d1     # load sgn,exp
9378         mov.w           %d1,%d2                 # make a copy
9379         andi.l          &0x7fff,%d1             # strip sign
9380         sub.l           %d0,%d1                 # add scale factor
9381         andi.w          &0x8000,%d2             # keep old sign
9382         or.w            %d1,%d2                 # concat old sign,new exp
9383         mov.w           %d2,FP_SCR0_EX(%a6)     # insert new exponent
9384         mov.l           (%sp)+,%d2              # restore d2
9385         fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
9386         rts
9387
9388 #
9389 # operand is to be rounded to double precision
9390 #
9391 fneg_dbl:
9392         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
9393         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
9394         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
9395         bsr.l           scale_to_zero_src       # calculate scale factor
9396
9397         cmpi.l          %d0,&0x3fff-0x3c00      # will move in underflow?
9398         bge.b           fneg_sd_unfl            # yes; go handle underflow
9399         cmpi.l          %d0,&0x3fff-0x43fe      # will move in overflow?
9400         beq.w           fneg_sd_may_ovfl        # maybe; go check
9401         blt.w           fneg_sd_ovfl            # yes; go handle overflow
9402         bra.w           fneg_sd_normal          # no; ho handle normalized op
9403
9404 #
9405 # operand WILL underflow when moved in to the fp register file
9406 #
9407 fneg_sd_unfl:
9408         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9409
9410         eori.b          &0x80,FP_SCR0_EX(%a6)   # negate sign
9411         bpl.b           fneg_sd_unfl_tst
9412         bset            &neg_bit,FPSR_CC(%a6)   # set 'N' ccode bit
9413
9414 # if underflow or inexact is enabled, go calculate EXOP first.
9415 fneg_sd_unfl_tst:
9416         mov.b           FPCR_ENABLE(%a6),%d1
9417         andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
9418         bne.b           fneg_sd_unfl_ena        # yes
9419
9420 fneg_sd_unfl_dis:
9421         lea             FP_SCR0(%a6),%a0        # pass: result addr
9422         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
9423         bsr.l           unf_res                 # calculate default result
9424         or.b            %d0,FPSR_CC(%a6)        # unf_res may have set 'Z'
9425         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
9426         rts
9427
9428 #
9429 # operand will underflow AND underflow is enabled.
9430 # Therefore, we must return the result rounded to extended precision.
9431 #
9432 fneg_sd_unfl_ena:
9433         mov.l           FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
9434         mov.l           FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
9435         mov.w           FP_SCR0_EX(%a6),%d1     # load current exponent
9436
9437         mov.l           %d2,-(%sp)              # save d2
9438         mov.l           %d1,%d2                 # make a copy
9439         andi.l          &0x7fff,%d1             # strip sign
9440         andi.w          &0x8000,%d2             # keep old sign
9441         sub.l           %d0,%d1                 # subtract scale factor
9442         addi.l          &0x6000,%d1             # add new bias
9443         andi.w          &0x7fff,%d1
9444         or.w            %d2,%d1                 # concat new sign,new exp
9445         mov.w           %d1,FP_SCR1_EX(%a6)     # insert new exp
9446         fmovm.x         FP_SCR1(%a6),&0x40      # return EXOP in fp1
9447         mov.l           (%sp)+,%d2              # restore d2
9448         bra.b           fneg_sd_unfl_dis
9449
9450 #
9451 # operand WILL overflow.
9452 #
9453 fneg_sd_ovfl:
9454         fmov.l          &0x0,%fpsr              # clear FPSR
9455         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
9456
9457         fneg.x          FP_SCR0(%a6),%fp0       # perform negation
9458
9459         fmov.l          &0x0,%fpcr              # clear FPCR
9460         fmov.l          %fpsr,%d1               # save FPSR
9461
9462         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
9463
9464 fneg_sd_ovfl_tst:
9465         or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
9466
9467         mov.b           FPCR_ENABLE(%a6),%d1
9468         andi.b          &0x13,%d1               # is OVFL or INEX enabled?
9469         bne.b           fneg_sd_ovfl_ena        # yes
9470
9471 #
9472 # OVFL is not enabled; therefore, we must create the default result by
9473 # calling ovf_res().
9474 #
9475 fneg_sd_ovfl_dis:
9476         btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
9477         sne             %d1                     # set sign param accordingly
9478         mov.l           L_SCR3(%a6),%d0         # pass: prec,mode
9479         bsr.l           ovf_res                 # calculate default result
9480         or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
9481         fmovm.x         (%a0),&0x80             # return default result in fp0
9482         rts
9483
9484 #
9485 # OVFL is enabled.
9486 # the INEX2 bit has already been updated by the round to the correct precision.
9487 # now, round to extended(and don't alter the FPSR).
9488 #
9489 fneg_sd_ovfl_ena:
9490         mov.l           %d2,-(%sp)              # save d2
9491         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
9492         mov.l           %d1,%d2                 # make a copy
9493         andi.l          &0x7fff,%d1             # strip sign
9494         andi.w          &0x8000,%d2             # keep old sign
9495         sub.l           %d0,%d1                 # add scale factor
9496         subi.l          &0x6000,%d1             # subtract bias
9497         andi.w          &0x7fff,%d1
9498         or.w            %d2,%d1                 # concat sign,exp
9499         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
9500         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
9501         mov.l           (%sp)+,%d2              # restore d2
9502         bra.b           fneg_sd_ovfl_dis
9503
9504 #
9505 # the move in MAY underflow. so...
9506 #
9507 fneg_sd_may_ovfl:
9508         fmov.l          &0x0,%fpsr              # clear FPSR
9509         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
9510
9511         fneg.x          FP_SCR0(%a6),%fp0       # perform negation
9512
9513         fmov.l          %fpsr,%d1               # save status
9514         fmov.l          &0x0,%fpcr              # clear FPCR
9515
9516         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
9517
9518         fabs.x          %fp0,%fp1               # make a copy of result
9519         fcmp.b          %fp1,&0x2               # is |result| >= 2.b?
9520         fbge.w          fneg_sd_ovfl_tst        # yes; overflow has occurred
9521
9522 # no, it didn't overflow; we have correct result
9523         bra.w           fneg_sd_normal_exit
9524
9525 ##########################################################################
9526
9527 #
9528 # input is not normalized; what is it?
9529 #
9530 fneg_not_norm:
9531         cmpi.b          %d1,&DENORM             # weed out DENORM
9532         beq.w           fneg_denorm
9533         cmpi.b          %d1,&SNAN               # weed out SNAN
9534         beq.l           res_snan_1op
9535         cmpi.b          %d1,&QNAN               # weed out QNAN
9536         beq.l           res_qnan_1op
9537
9538 #
9539 # do the fneg; at this point, only possible ops are ZERO and INF.
9540 # use fneg to determine ccodes.
9541 # prec:mode should be zero at this point but it won't affect answer anyways.
9542 #
9543         fneg.x          SRC_EX(%a0),%fp0        # do fneg
9544         fmov.l          %fpsr,%d0
9545         rol.l           &0x8,%d0                # put ccodes in lo byte
9546         mov.b           %d0,FPSR_CC(%a6)        # insert correct ccodes
9547         rts
9548
9549 #########################################################################
9550 # XDEF **************************************************************** #
9551 #       ftst(): emulates the ftest instruction                          #
9552 #                                                                       #
9553 # XREF **************************************************************** #
9554 #       res{s,q}nan_1op() - set NAN result for monadic instruction      #
9555 #                                                                       #
9556 # INPUT *************************************************************** #
9557 #       a0 = pointer to extended precision source operand               #
9558 #                                                                       #
9559 # OUTPUT ************************************************************** #
9560 #       none                                                            #
9561 #                                                                       #
9562 # ALGORITHM *********************************************************** #
9563 #       Check the source operand tag (STAG) and set the FPCR according  #
9564 # to the operand type and sign.                                         #
9565 #                                                                       #
9566 #########################################################################
9567
9568         global          ftst
9569 ftst:
9570         mov.b           STAG(%a6),%d1
9571         bne.b           ftst_not_norm           # optimize on non-norm input
9572
9573 #
9574 # Norm:
9575 #
9576 ftst_norm:
9577         tst.b           SRC_EX(%a0)             # is operand negative?
9578         bmi.b           ftst_norm_m             # yes
9579         rts
9580 ftst_norm_m:
9581         mov.b           &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9582         rts
9583
9584 #
9585 # input is not normalized; what is it?
9586 #
9587 ftst_not_norm:
9588         cmpi.b          %d1,&ZERO               # weed out ZERO
9589         beq.b           ftst_zero
9590         cmpi.b          %d1,&INF                # weed out INF
9591         beq.b           ftst_inf
9592         cmpi.b          %d1,&SNAN               # weed out SNAN
9593         beq.l           res_snan_1op
9594         cmpi.b          %d1,&QNAN               # weed out QNAN
9595         beq.l           res_qnan_1op
9596
9597 #
9598 # Denorm:
9599 #
9600 ftst_denorm:
9601         tst.b           SRC_EX(%a0)             # is operand negative?
9602         bmi.b           ftst_denorm_m           # yes
9603         rts
9604 ftst_denorm_m:
9605         mov.b           &neg_bmask,FPSR_CC(%a6) # set 'N' ccode bit
9606         rts
9607
9608 #
9609 # Infinity:
9610 #
9611 ftst_inf:
9612         tst.b           SRC_EX(%a0)             # is operand negative?
9613         bmi.b           ftst_inf_m              # yes
9614 ftst_inf_p:
9615         mov.b           &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
9616         rts
9617 ftst_inf_m:
9618         mov.b           &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'I','N' ccode bits
9619         rts
9620
9621 #
9622 # Zero:
9623 #
9624 ftst_zero:
9625         tst.b           SRC_EX(%a0)             # is operand negative?
9626         bmi.b           ftst_zero_m             # yes
9627 ftst_zero_p:
9628         mov.b           &z_bmask,FPSR_CC(%a6)   # set 'N' ccode bit
9629         rts
9630 ftst_zero_m:
9631         mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9632         rts
9633
9634 #########################################################################
9635 # XDEF **************************************************************** #
9636 #       fint(): emulates the fint instruction                           #
9637 #                                                                       #
9638 # XREF **************************************************************** #
9639 #       res_{s,q}nan_1op() - set NAN result for monadic operation       #
9640 #                                                                       #
9641 # INPUT *************************************************************** #
9642 #       a0 = pointer to extended precision source operand               #
9643 #       d0 = round precision/mode                                       #
9644 #                                                                       #
9645 # OUTPUT ************************************************************** #
9646 #       fp0 = result                                                    #
9647 #                                                                       #
9648 # ALGORITHM *********************************************************** #
9649 #       Separate according to operand type. Unnorms don't pass through  #
9650 # here. For norms, load the rounding mode/prec, execute a "fint", then  #
9651 # store the resulting FPSR bits.                                        #
9652 #       For denorms, force the j-bit to a one and do the same as for    #
9653 # norms. Denorms are so low that the answer will either be a zero or a  #
9654 # one.                                                                  #
9655 #       For zeroes/infs/NANs, return the same while setting the FPSR    #
9656 # as appropriate.                                                       #
9657 #                                                                       #
9658 #########################################################################
9659
9660         global          fint
9661 fint:
9662         mov.b           STAG(%a6),%d1
9663         bne.b           fint_not_norm           # optimize on non-norm input
9664
9665 #
9666 # Norm:
9667 #
9668 fint_norm:
9669         andi.b          &0x30,%d0               # set prec = ext
9670
9671         fmov.l          %d0,%fpcr               # set FPCR
9672         fmov.l          &0x0,%fpsr              # clear FPSR
9673
9674         fint.x          SRC(%a0),%fp0           # execute fint
9675
9676         fmov.l          &0x0,%fpcr              # clear FPCR
9677         fmov.l          %fpsr,%d0               # save FPSR
9678         or.l            %d0,USER_FPSR(%a6)      # set exception bits
9679
9680         rts
9681
9682 #
9683 # input is not normalized; what is it?
9684 #
9685 fint_not_norm:
9686         cmpi.b          %d1,&ZERO               # weed out ZERO
9687         beq.b           fint_zero
9688         cmpi.b          %d1,&INF                # weed out INF
9689         beq.b           fint_inf
9690         cmpi.b          %d1,&DENORM             # weed out DENORM
9691         beq.b           fint_denorm
9692         cmpi.b          %d1,&SNAN               # weed out SNAN
9693         beq.l           res_snan_1op
9694         bra.l           res_qnan_1op            # weed out QNAN
9695
9696 #
9697 # Denorm:
9698 #
9699 # for DENORMs, the result will be either (+/-)ZERO or (+/-)1.
9700 # also, the INEX2 and AINEX exception bits will be set.
9701 # so, we could either set these manually or force the DENORM
9702 # to a very small NORM and ship it to the NORM routine.
9703 # I do the latter.
9704 #
9705 fint_denorm:
9706         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
9707         mov.b           &0x80,FP_SCR0_HI(%a6)   # force DENORM ==> small NORM
9708         lea             FP_SCR0(%a6),%a0
9709         bra.b           fint_norm
9710
9711 #
9712 # Zero:
9713 #
9714 fint_zero:
9715         tst.b           SRC_EX(%a0)             # is ZERO negative?
9716         bmi.b           fint_zero_m             # yes
9717 fint_zero_p:
9718         fmov.s          &0x00000000,%fp0        # return +ZERO in fp0
9719         mov.b           &z_bmask,FPSR_CC(%a6)   # set 'Z' ccode bit
9720         rts
9721 fint_zero_m:
9722         fmov.s          &0x80000000,%fp0        # return -ZERO in fp0
9723         mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9724         rts
9725
9726 #
9727 # Infinity:
9728 #
9729 fint_inf:
9730         fmovm.x         SRC(%a0),&0x80          # return result in fp0
9731         tst.b           SRC_EX(%a0)             # is INF negative?
9732         bmi.b           fint_inf_m              # yes
9733 fint_inf_p:
9734         mov.b           &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
9735         rts
9736 fint_inf_m:
9737         mov.b           &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
9738         rts
9739
9740 #########################################################################
9741 # XDEF **************************************************************** #
9742 #       fintrz(): emulates the fintrz instruction                       #
9743 #                                                                       #
9744 # XREF **************************************************************** #
9745 #       res_{s,q}nan_1op() - set NAN result for monadic operation       #
9746 #                                                                       #
9747 # INPUT *************************************************************** #
9748 #       a0 = pointer to extended precision source operand               #
9749 #       d0 = round precision/mode                                       #
9750 #                                                                       #
9751 # OUTPUT ************************************************************** #
9752 #       fp0 = result                                                    #
9753 #                                                                       #
9754 # ALGORITHM *********************************************************** #
9755 #       Separate according to operand type. Unnorms don't pass through  #
9756 # here. For norms, load the rounding mode/prec, execute a "fintrz",     #
9757 # then store the resulting FPSR bits.                                   #
9758 #       For denorms, force the j-bit to a one and do the same as for    #
9759 # norms. Denorms are so low that the answer will either be a zero or a  #
9760 # one.                                                                  #
9761 #       For zeroes/infs/NANs, return the same while setting the FPSR    #
9762 # as appropriate.                                                       #
9763 #                                                                       #
9764 #########################################################################
9765
9766         global          fintrz
9767 fintrz:
9768         mov.b           STAG(%a6),%d1
9769         bne.b           fintrz_not_norm         # optimize on non-norm input
9770
9771 #
9772 # Norm:
9773 #
9774 fintrz_norm:
9775         fmov.l          &0x0,%fpsr              # clear FPSR
9776
9777         fintrz.x        SRC(%a0),%fp0           # execute fintrz
9778
9779         fmov.l          %fpsr,%d0               # save FPSR
9780         or.l            %d0,USER_FPSR(%a6)      # set exception bits
9781
9782         rts
9783
9784 #
9785 # input is not normalized; what is it?
9786 #
9787 fintrz_not_norm:
9788         cmpi.b          %d1,&ZERO               # weed out ZERO
9789         beq.b           fintrz_zero
9790         cmpi.b          %d1,&INF                # weed out INF
9791         beq.b           fintrz_inf
9792         cmpi.b          %d1,&DENORM             # weed out DENORM
9793         beq.b           fintrz_denorm
9794         cmpi.b          %d1,&SNAN               # weed out SNAN
9795         beq.l           res_snan_1op
9796         bra.l           res_qnan_1op            # weed out QNAN
9797
9798 #
9799 # Denorm:
9800 #
9801 # for DENORMs, the result will be (+/-)ZERO.
9802 # also, the INEX2 and AINEX exception bits will be set.
9803 # so, we could either set these manually or force the DENORM
9804 # to a very small NORM and ship it to the NORM routine.
9805 # I do the latter.
9806 #
9807 fintrz_denorm:
9808         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6) # copy sign, zero exp
9809         mov.b           &0x80,FP_SCR0_HI(%a6)   # force DENORM ==> small NORM
9810         lea             FP_SCR0(%a6),%a0
9811         bra.b           fintrz_norm
9812
9813 #
9814 # Zero:
9815 #
9816 fintrz_zero:
9817         tst.b           SRC_EX(%a0)             # is ZERO negative?
9818         bmi.b           fintrz_zero_m           # yes
9819 fintrz_zero_p:
9820         fmov.s          &0x00000000,%fp0        # return +ZERO in fp0
9821         mov.b           &z_bmask,FPSR_CC(%a6)   # set 'Z' ccode bit
9822         rts
9823 fintrz_zero_m:
9824         fmov.s          &0x80000000,%fp0        # return -ZERO in fp0
9825         mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
9826         rts
9827
9828 #
9829 # Infinity:
9830 #
9831 fintrz_inf:
9832         fmovm.x         SRC(%a0),&0x80          # return result in fp0
9833         tst.b           SRC_EX(%a0)             # is INF negative?
9834         bmi.b           fintrz_inf_m            # yes
9835 fintrz_inf_p:
9836         mov.b           &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
9837         rts
9838 fintrz_inf_m:
9839         mov.b           &inf_bmask+neg_bmask,FPSR_CC(%a6) # set 'N','I' ccode bits
9840         rts
9841
9842 #########################################################################
9843 # XDEF **************************************************************** #
9844 #       fabs():  emulates the fabs instruction                          #
9845 #       fsabs(): emulates the fsabs instruction                         #
9846 #       fdabs(): emulates the fdabs instruction                         #
9847 #                                                                       #
9848 # XREF **************************************************************** #
9849 #       norm() - normalize denorm mantissa to provide EXOP              #
9850 #       scale_to_zero_src() - make exponent. = 0; get scale factor      #
9851 #       unf_res() - calculate underflow result                          #
9852 #       ovf_res() - calculate overflow result                           #
9853 #       res_{s,q}nan_1op() - set NAN result for monadic operation       #
9854 #                                                                       #
9855 # INPUT *************************************************************** #
9856 #       a0 = pointer to extended precision source operand               #
9857 #       d0 = rnd precision/mode                                         #
9858 #                                                                       #
9859 # OUTPUT ************************************************************** #
9860 #       fp0 = result                                                    #
9861 #       fp1 = EXOP (if exception occurred)                              #
9862 #                                                                       #
9863 # ALGORITHM *********************************************************** #
9864 #       Handle NANs, infinities, and zeroes as special cases. Divide    #
9865 # norms into extended, single, and double precision.                    #
9866 #       Simply clear sign for extended precision norm. Ext prec denorm  #
9867 # gets an EXOP created for it since it's an underflow.                  #
9868 #       Double and single precision can overflow and underflow. First,  #
9869 # scale the operand such that the exponent is zero. Perform an "fabs"   #
9870 # using the correct rnd mode/prec. Check to see if the original         #
9871 # exponent would take an exception. If so, use unf_res() or ovf_res()   #
9872 # to calculate the default result. Also, create the EXOP for the        #
9873 # exceptional case. If no exception should occur, insert the correct    #
9874 # result exponent and return.                                           #
9875 #       Unnorms don't pass through here.                                #
9876 #                                                                       #
9877 #########################################################################
9878
9879         global          fsabs
9880 fsabs:
9881         andi.b          &0x30,%d0               # clear rnd prec
9882         ori.b           &s_mode*0x10,%d0        # insert sgl precision
9883         bra.b           fabs
9884
9885         global          fdabs
9886 fdabs:
9887         andi.b          &0x30,%d0               # clear rnd prec
9888         ori.b           &d_mode*0x10,%d0        # insert dbl precision
9889
9890         global          fabs
9891 fabs:
9892         mov.l           %d0,L_SCR3(%a6)         # store rnd info
9893         mov.b           STAG(%a6),%d1
9894         bne.w           fabs_not_norm           # optimize on non-norm input
9895
9896 #
9897 # ABSOLUTE VALUE: norms and denorms ONLY!
9898 #
9899 fabs_norm:
9900         andi.b          &0xc0,%d0               # is precision extended?
9901         bne.b           fabs_not_ext            # no; go handle sgl or dbl
9902
9903 #
9904 # precision selected is extended. so...we can not get an underflow
9905 # or overflow because of rounding to the correct precision. so...
9906 # skip the scaling and unscaling...
9907 #
9908         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
9909         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
9910         mov.w           SRC_EX(%a0),%d1
9911         bclr            &15,%d1                 # force absolute value
9912         mov.w           %d1,FP_SCR0_EX(%a6)     # insert exponent
9913         fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
9914         rts
9915
9916 #
9917 # for an extended precision DENORM, the UNFL exception bit is set
9918 # the accrued bit is NOT set in this instance(no inexactness!)
9919 #
9920 fabs_denorm:
9921         andi.b          &0xc0,%d0               # is precision extended?
9922         bne.b           fabs_not_ext            # no
9923
9924         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
9925
9926         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
9927         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
9928         mov.w           SRC_EX(%a0),%d0
9929         bclr            &15,%d0                 # clear sign
9930         mov.w           %d0,FP_SCR0_EX(%a6)     # insert exponent
9931
9932         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
9933
9934         btst            &unfl_bit,FPCR_ENABLE(%a6) # is UNFL enabled?
9935         bne.b           fabs_ext_unfl_ena
9936         rts
9937
9938 #
9939 # the input is an extended DENORM and underflow is enabled in the FPCR.
9940 # normalize the mantissa and add the bias of 0x6000 to the resulting negative
9941 # exponent and insert back into the operand.
9942 #
9943 fabs_ext_unfl_ena:
9944         lea             FP_SCR0(%a6),%a0        # pass: ptr to operand
9945         bsr.l           norm                    # normalize result
9946         neg.w           %d0                     # new exponent = -(shft val)
9947         addi.w          &0x6000,%d0             # add new bias to exponent
9948         mov.w           FP_SCR0_EX(%a6),%d1     # fetch old sign,exp
9949         andi.w          &0x8000,%d1             # keep old sign
9950         andi.w          &0x7fff,%d0             # clear sign position
9951         or.w            %d1,%d0                 # concat old sign, new exponent
9952         mov.w           %d0,FP_SCR0_EX(%a6)     # insert new exponent
9953         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
9954         rts
9955
9956 #
9957 # operand is either single or double
9958 #
9959 fabs_not_ext:
9960         cmpi.b          %d0,&s_mode*0x10        # separate sgl/dbl prec
9961         bne.b           fabs_dbl
9962
9963 #
9964 # operand is to be rounded to single precision
9965 #
9966 fabs_sgl:
9967         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
9968         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
9969         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
9970         bsr.l           scale_to_zero_src       # calculate scale factor
9971
9972         cmpi.l          %d0,&0x3fff-0x3f80      # will move in underflow?
9973         bge.w           fabs_sd_unfl            # yes; go handle underflow
9974         cmpi.l          %d0,&0x3fff-0x407e      # will move in overflow?
9975         beq.w           fabs_sd_may_ovfl        # maybe; go check
9976         blt.w           fabs_sd_ovfl            # yes; go handle overflow
9977
9978 #
9979 # operand will NOT overflow or underflow when moved in to the fp reg file
9980 #
9981 fabs_sd_normal:
9982         fmov.l          &0x0,%fpsr              # clear FPSR
9983         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
9984
9985         fabs.x          FP_SCR0(%a6),%fp0       # perform absolute
9986
9987         fmov.l          %fpsr,%d1               # save FPSR
9988         fmov.l          &0x0,%fpcr              # clear FPCR
9989
9990         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
9991
9992 fabs_sd_normal_exit:
9993         mov.l           %d2,-(%sp)              # save d2
9994         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
9995         mov.w           FP_SCR0_EX(%a6),%d1     # load sgn,exp
9996         mov.l           %d1,%d2                 # make a copy
9997         andi.l          &0x7fff,%d1             # strip sign
9998         sub.l           %d0,%d1                 # add scale factor
9999         andi.w          &0x8000,%d2             # keep old sign
10000         or.w            %d1,%d2                 # concat old sign,new exp
10001         mov.w           %d2,FP_SCR0_EX(%a6)     # insert new exponent
10002         mov.l           (%sp)+,%d2              # restore d2
10003         fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
10004         rts
10005
10006 #
10007 # operand is to be rounded to double precision
10008 #
10009 fabs_dbl:
10010         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
10011         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
10012         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
10013         bsr.l           scale_to_zero_src       # calculate scale factor
10014
10015         cmpi.l          %d0,&0x3fff-0x3c00      # will move in underflow?
10016         bge.b           fabs_sd_unfl            # yes; go handle underflow
10017         cmpi.l          %d0,&0x3fff-0x43fe      # will move in overflow?
10018         beq.w           fabs_sd_may_ovfl        # maybe; go check
10019         blt.w           fabs_sd_ovfl            # yes; go handle overflow
10020         bra.w           fabs_sd_normal          # no; ho handle normalized op
10021
10022 #
10023 # operand WILL underflow when moved in to the fp register file
10024 #
10025 fabs_sd_unfl:
10026         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10027
10028         bclr            &0x7,FP_SCR0_EX(%a6)    # force absolute value
10029
10030 # if underflow or inexact is enabled, go calculate EXOP first.
10031         mov.b           FPCR_ENABLE(%a6),%d1
10032         andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
10033         bne.b           fabs_sd_unfl_ena        # yes
10034
10035 fabs_sd_unfl_dis:
10036         lea             FP_SCR0(%a6),%a0        # pass: result addr
10037         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
10038         bsr.l           unf_res                 # calculate default result
10039         or.b            %d0,FPSR_CC(%a6)        # set possible 'Z' ccode
10040         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
10041         rts
10042
10043 #
10044 # operand will underflow AND underflow is enabled.
10045 # Therefore, we must return the result rounded to extended precision.
10046 #
10047 fabs_sd_unfl_ena:
10048         mov.l           FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
10049         mov.l           FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
10050         mov.w           FP_SCR0_EX(%a6),%d1     # load current exponent
10051
10052         mov.l           %d2,-(%sp)              # save d2
10053         mov.l           %d1,%d2                 # make a copy
10054         andi.l          &0x7fff,%d1             # strip sign
10055         andi.w          &0x8000,%d2             # keep old sign
10056         sub.l           %d0,%d1                 # subtract scale factor
10057         addi.l          &0x6000,%d1             # add new bias
10058         andi.w          &0x7fff,%d1
10059         or.w            %d2,%d1                 # concat new sign,new exp
10060         mov.w           %d1,FP_SCR1_EX(%a6)     # insert new exp
10061         fmovm.x         FP_SCR1(%a6),&0x40      # return EXOP in fp1
10062         mov.l           (%sp)+,%d2              # restore d2
10063         bra.b           fabs_sd_unfl_dis
10064
10065 #
10066 # operand WILL overflow.
10067 #
10068 fabs_sd_ovfl:
10069         fmov.l          &0x0,%fpsr              # clear FPSR
10070         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
10071
10072         fabs.x          FP_SCR0(%a6),%fp0       # perform absolute
10073
10074         fmov.l          &0x0,%fpcr              # clear FPCR
10075         fmov.l          %fpsr,%d1               # save FPSR
10076
10077         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10078
10079 fabs_sd_ovfl_tst:
10080         or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
10081
10082         mov.b           FPCR_ENABLE(%a6),%d1
10083         andi.b          &0x13,%d1               # is OVFL or INEX enabled?
10084         bne.b           fabs_sd_ovfl_ena        # yes
10085
10086 #
10087 # OVFL is not enabled; therefore, we must create the default result by
10088 # calling ovf_res().
10089 #
10090 fabs_sd_ovfl_dis:
10091         btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
10092         sne             %d1                     # set sign param accordingly
10093         mov.l           L_SCR3(%a6),%d0         # pass: prec,mode
10094         bsr.l           ovf_res                 # calculate default result
10095         or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
10096         fmovm.x         (%a0),&0x80             # return default result in fp0
10097         rts
10098
10099 #
10100 # OVFL is enabled.
10101 # the INEX2 bit has already been updated by the round to the correct precision.
10102 # now, round to extended(and don't alter the FPSR).
10103 #
10104 fabs_sd_ovfl_ena:
10105         mov.l           %d2,-(%sp)              # save d2
10106         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
10107         mov.l           %d1,%d2                 # make a copy
10108         andi.l          &0x7fff,%d1             # strip sign
10109         andi.w          &0x8000,%d2             # keep old sign
10110         sub.l           %d0,%d1                 # add scale factor
10111         subi.l          &0x6000,%d1             # subtract bias
10112         andi.w          &0x7fff,%d1
10113         or.w            %d2,%d1                 # concat sign,exp
10114         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
10115         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
10116         mov.l           (%sp)+,%d2              # restore d2
10117         bra.b           fabs_sd_ovfl_dis
10118
10119 #
10120 # the move in MAY underflow. so...
10121 #
10122 fabs_sd_may_ovfl:
10123         fmov.l          &0x0,%fpsr              # clear FPSR
10124         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
10125
10126         fabs.x          FP_SCR0(%a6),%fp0       # perform absolute
10127
10128         fmov.l          %fpsr,%d1               # save status
10129         fmov.l          &0x0,%fpcr              # clear FPCR
10130
10131         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10132
10133         fabs.x          %fp0,%fp1               # make a copy of result
10134         fcmp.b          %fp1,&0x2               # is |result| >= 2.b?
10135         fbge.w          fabs_sd_ovfl_tst        # yes; overflow has occurred
10136
10137 # no, it didn't overflow; we have correct result
10138         bra.w           fabs_sd_normal_exit
10139
10140 ##########################################################################
10141
10142 #
10143 # input is not normalized; what is it?
10144 #
10145 fabs_not_norm:
10146         cmpi.b          %d1,&DENORM             # weed out DENORM
10147         beq.w           fabs_denorm
10148         cmpi.b          %d1,&SNAN               # weed out SNAN
10149         beq.l           res_snan_1op
10150         cmpi.b          %d1,&QNAN               # weed out QNAN
10151         beq.l           res_qnan_1op
10152
10153         fabs.x          SRC(%a0),%fp0           # force absolute value
10154
10155         cmpi.b          %d1,&INF                # weed out INF
10156         beq.b           fabs_inf
10157 fabs_zero:
10158         mov.b           &z_bmask,FPSR_CC(%a6)   # set 'Z' ccode bit
10159         rts
10160 fabs_inf:
10161         mov.b           &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
10162         rts
10163
10164 #########################################################################
10165 # XDEF **************************************************************** #
10166 #       fcmp(): fp compare op routine                                   #
10167 #                                                                       #
10168 # XREF **************************************************************** #
10169 #       res_qnan() - return QNAN result                                 #
10170 #       res_snan() - return SNAN result                                 #
10171 #                                                                       #
10172 # INPUT *************************************************************** #
10173 #       a0 = pointer to extended precision source operand               #
10174 #       a1 = pointer to extended precision destination operand          #
10175 #       d0 = round prec/mode                                            #
10176 #                                                                       #
10177 # OUTPUT ************************************************************** #
10178 #       None                                                            #
10179 #                                                                       #
10180 # ALGORITHM *********************************************************** #
10181 #       Handle NANs and denorms as special cases. For everything else,  #
10182 # just use the actual fcmp instruction to produce the correct condition #
10183 # codes.                                                                #
10184 #                                                                       #
10185 #########################################################################
10186
10187         global          fcmp
10188 fcmp:
10189         clr.w           %d1
10190         mov.b           DTAG(%a6),%d1
10191         lsl.b           &0x3,%d1
10192         or.b            STAG(%a6),%d1
10193         bne.b           fcmp_not_norm           # optimize on non-norm input
10194
10195 #
10196 # COMPARE FP OPs : NORMs, ZEROs, INFs, and "corrected" DENORMs
10197 #
10198 fcmp_norm:
10199         fmovm.x         DST(%a1),&0x80          # load dst op
10200
10201         fcmp.x          %fp0,SRC(%a0)           # do compare
10202
10203         fmov.l          %fpsr,%d0               # save FPSR
10204         rol.l           &0x8,%d0                # extract ccode bits
10205         mov.b           %d0,FPSR_CC(%a6)        # set ccode bits(no exc bits are set)
10206
10207         rts
10208
10209 #
10210 # fcmp: inputs are not both normalized; what are they?
10211 #
10212 fcmp_not_norm:
10213         mov.w           (tbl_fcmp_op.b,%pc,%d1.w*2),%d1
10214         jmp             (tbl_fcmp_op.b,%pc,%d1.w*1)
10215
10216         swbeg           &48
10217 tbl_fcmp_op:
10218         short           fcmp_norm       - tbl_fcmp_op # NORM - NORM
10219         short           fcmp_norm       - tbl_fcmp_op # NORM - ZERO
10220         short           fcmp_norm       - tbl_fcmp_op # NORM - INF
10221         short           fcmp_res_qnan   - tbl_fcmp_op # NORM - QNAN
10222         short           fcmp_nrm_dnrm   - tbl_fcmp_op # NORM - DENORM
10223         short           fcmp_res_snan   - tbl_fcmp_op # NORM - SNAN
10224         short           tbl_fcmp_op     - tbl_fcmp_op #
10225         short           tbl_fcmp_op     - tbl_fcmp_op #
10226
10227         short           fcmp_norm       - tbl_fcmp_op # ZERO - NORM
10228         short           fcmp_norm       - tbl_fcmp_op # ZERO - ZERO
10229         short           fcmp_norm       - tbl_fcmp_op # ZERO - INF
10230         short           fcmp_res_qnan   - tbl_fcmp_op # ZERO - QNAN
10231         short           fcmp_dnrm_s     - tbl_fcmp_op # ZERO - DENORM
10232         short           fcmp_res_snan   - tbl_fcmp_op # ZERO - SNAN
10233         short           tbl_fcmp_op     - tbl_fcmp_op #
10234         short           tbl_fcmp_op     - tbl_fcmp_op #
10235
10236         short           fcmp_norm       - tbl_fcmp_op # INF - NORM
10237         short           fcmp_norm       - tbl_fcmp_op # INF - ZERO
10238         short           fcmp_norm       - tbl_fcmp_op # INF - INF
10239         short           fcmp_res_qnan   - tbl_fcmp_op # INF - QNAN
10240         short           fcmp_dnrm_s     - tbl_fcmp_op # INF - DENORM
10241         short           fcmp_res_snan   - tbl_fcmp_op # INF - SNAN
10242         short           tbl_fcmp_op     - tbl_fcmp_op #
10243         short           tbl_fcmp_op     - tbl_fcmp_op #
10244
10245         short           fcmp_res_qnan   - tbl_fcmp_op # QNAN - NORM
10246         short           fcmp_res_qnan   - tbl_fcmp_op # QNAN - ZERO
10247         short           fcmp_res_qnan   - tbl_fcmp_op # QNAN - INF
10248         short           fcmp_res_qnan   - tbl_fcmp_op # QNAN - QNAN
10249         short           fcmp_res_qnan   - tbl_fcmp_op # QNAN - DENORM
10250         short           fcmp_res_snan   - tbl_fcmp_op # QNAN - SNAN
10251         short           tbl_fcmp_op     - tbl_fcmp_op #
10252         short           tbl_fcmp_op     - tbl_fcmp_op #
10253
10254         short           fcmp_dnrm_nrm   - tbl_fcmp_op # DENORM - NORM
10255         short           fcmp_dnrm_d     - tbl_fcmp_op # DENORM - ZERO
10256         short           fcmp_dnrm_d     - tbl_fcmp_op # DENORM - INF
10257         short           fcmp_res_qnan   - tbl_fcmp_op # DENORM - QNAN
10258         short           fcmp_dnrm_sd    - tbl_fcmp_op # DENORM - DENORM
10259         short           fcmp_res_snan   - tbl_fcmp_op # DENORM - SNAN
10260         short           tbl_fcmp_op     - tbl_fcmp_op #
10261         short           tbl_fcmp_op     - tbl_fcmp_op #
10262
10263         short           fcmp_res_snan   - tbl_fcmp_op # SNAN - NORM
10264         short           fcmp_res_snan   - tbl_fcmp_op # SNAN - ZERO
10265         short           fcmp_res_snan   - tbl_fcmp_op # SNAN - INF
10266         short           fcmp_res_snan   - tbl_fcmp_op # SNAN - QNAN
10267         short           fcmp_res_snan   - tbl_fcmp_op # SNAN - DENORM
10268         short           fcmp_res_snan   - tbl_fcmp_op # SNAN - SNAN
10269         short           tbl_fcmp_op     - tbl_fcmp_op #
10270         short           tbl_fcmp_op     - tbl_fcmp_op #
10271
10272 # unlike all other functions for QNAN and SNAN, fcmp does NOT set the
10273 # 'N' bit for a negative QNAN or SNAN input so we must squelch it here.
10274 fcmp_res_qnan:
10275         bsr.l           res_qnan
10276         andi.b          &0xf7,FPSR_CC(%a6)
10277         rts
10278 fcmp_res_snan:
10279         bsr.l           res_snan
10280         andi.b          &0xf7,FPSR_CC(%a6)
10281         rts
10282
10283 #
10284 # DENORMs are a little more difficult.
10285 # If you have a 2 DENORMs, then you can just force the j-bit to a one
10286 # and use the fcmp_norm routine.
10287 # If you have a DENORM and an INF or ZERO, just force the DENORM's j-bit to a one
10288 # and use the fcmp_norm routine.
10289 # If you have a DENORM and a NORM with opposite signs, then use fcmp_norm, also.
10290 # But with a DENORM and a NORM of the same sign, the neg bit is set if the
10291 # (1) signs are (+) and the DENORM is the dst or
10292 # (2) signs are (-) and the DENORM is the src
10293 #
10294
10295 fcmp_dnrm_s:
10296         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
10297         mov.l           SRC_HI(%a0),%d0
10298         bset            &31,%d0                 # DENORM src; make into small norm
10299         mov.l           %d0,FP_SCR0_HI(%a6)
10300         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
10301         lea             FP_SCR0(%a6),%a0
10302         bra.w           fcmp_norm
10303
10304 fcmp_dnrm_d:
10305         mov.l           DST_EX(%a1),FP_SCR0_EX(%a6)
10306         mov.l           DST_HI(%a1),%d0
10307         bset            &31,%d0                 # DENORM src; make into small norm
10308         mov.l           %d0,FP_SCR0_HI(%a6)
10309         mov.l           DST_LO(%a1),FP_SCR0_LO(%a6)
10310         lea             FP_SCR0(%a6),%a1
10311         bra.w           fcmp_norm
10312
10313 fcmp_dnrm_sd:
10314         mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
10315         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
10316         mov.l           DST_HI(%a1),%d0
10317         bset            &31,%d0                 # DENORM dst; make into small norm
10318         mov.l           %d0,FP_SCR1_HI(%a6)
10319         mov.l           SRC_HI(%a0),%d0
10320         bset            &31,%d0                 # DENORM dst; make into small norm
10321         mov.l           %d0,FP_SCR0_HI(%a6)
10322         mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
10323         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
10324         lea             FP_SCR1(%a6),%a1
10325         lea             FP_SCR0(%a6),%a0
10326         bra.w           fcmp_norm
10327
10328 fcmp_nrm_dnrm:
10329         mov.b           SRC_EX(%a0),%d0         # determine if like signs
10330         mov.b           DST_EX(%a1),%d1
10331         eor.b           %d0,%d1
10332         bmi.w           fcmp_dnrm_s
10333
10334 # signs are the same, so must determine the answer ourselves.
10335         tst.b           %d0                     # is src op negative?
10336         bmi.b           fcmp_nrm_dnrm_m         # yes
10337         rts
10338 fcmp_nrm_dnrm_m:
10339         mov.b           &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10340         rts
10341
10342 fcmp_dnrm_nrm:
10343         mov.b           SRC_EX(%a0),%d0         # determine if like signs
10344         mov.b           DST_EX(%a1),%d1
10345         eor.b           %d0,%d1
10346         bmi.w           fcmp_dnrm_d
10347
10348 # signs are the same, so must determine the answer ourselves.
10349         tst.b           %d0                     # is src op negative?
10350         bpl.b           fcmp_dnrm_nrm_m         # no
10351         rts
10352 fcmp_dnrm_nrm_m:
10353         mov.b           &neg_bmask,FPSR_CC(%a6) # set 'Z' ccode bit
10354         rts
10355
10356 #########################################################################
10357 # XDEF **************************************************************** #
10358 #       fsglmul(): emulates the fsglmul instruction                     #
10359 #                                                                       #
10360 # XREF **************************************************************** #
10361 #       scale_to_zero_src() - scale src exponent to zero                #
10362 #       scale_to_zero_dst() - scale dst exponent to zero                #
10363 #       unf_res4() - return default underflow result for sglop          #
10364 #       ovf_res() - return default overflow result                      #
10365 #       res_qnan() - return QNAN result                                 #
10366 #       res_snan() - return SNAN result                                 #
10367 #                                                                       #
10368 # INPUT *************************************************************** #
10369 #       a0 = pointer to extended precision source operand               #
10370 #       a1 = pointer to extended precision destination operand          #
10371 #       d0  rnd prec,mode                                               #
10372 #                                                                       #
10373 # OUTPUT ************************************************************** #
10374 #       fp0 = result                                                    #
10375 #       fp1 = EXOP (if exception occurred)                              #
10376 #                                                                       #
10377 # ALGORITHM *********************************************************** #
10378 #       Handle NANs, infinities, and zeroes as special cases. Divide    #
10379 # norms/denorms into ext/sgl/dbl precision.                             #
10380 #       For norms/denorms, scale the exponents such that a multiply     #
10381 # instruction won't cause an exception. Use the regular fsglmul to      #
10382 # compute a result. Check if the regular operands would have taken      #
10383 # an exception. If so, return the default overflow/underflow result     #
10384 # and return the EXOP if exceptions are enabled. Else, scale the        #
10385 # result operand to the proper exponent.                                #
10386 #                                                                       #
10387 #########################################################################
10388
10389         global          fsglmul
10390 fsglmul:
10391         mov.l           %d0,L_SCR3(%a6)         # store rnd info
10392
10393         clr.w           %d1
10394         mov.b           DTAG(%a6),%d1
10395         lsl.b           &0x3,%d1
10396         or.b            STAG(%a6),%d1
10397
10398         bne.w           fsglmul_not_norm        # optimize on non-norm input
10399
10400 fsglmul_norm:
10401         mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
10402         mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
10403         mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
10404
10405         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
10406         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
10407         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
10408
10409         bsr.l           scale_to_zero_src       # scale exponent
10410         mov.l           %d0,-(%sp)              # save scale factor 1
10411
10412         bsr.l           scale_to_zero_dst       # scale dst exponent
10413
10414         add.l           (%sp)+,%d0              # SCALE_FACTOR = scale1 + scale2
10415
10416         cmpi.l          %d0,&0x3fff-0x7ffe      # would result ovfl?
10417         beq.w           fsglmul_may_ovfl        # result may rnd to overflow
10418         blt.w           fsglmul_ovfl            # result will overflow
10419
10420         cmpi.l          %d0,&0x3fff+0x0001      # would result unfl?
10421         beq.w           fsglmul_may_unfl        # result may rnd to no unfl
10422         bgt.w           fsglmul_unfl            # result will underflow
10423
10424 fsglmul_normal:
10425         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
10426
10427         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
10428         fmov.l          &0x0,%fpsr              # clear FPSR
10429
10430         fsglmul.x       FP_SCR0(%a6),%fp0       # execute sgl multiply
10431
10432         fmov.l          %fpsr,%d1               # save status
10433         fmov.l          &0x0,%fpcr              # clear FPCR
10434
10435         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10436
10437 fsglmul_normal_exit:
10438         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
10439         mov.l           %d2,-(%sp)              # save d2
10440         mov.w           FP_SCR0_EX(%a6),%d1     # load {sgn,exp}
10441         mov.l           %d1,%d2                 # make a copy
10442         andi.l          &0x7fff,%d1             # strip sign
10443         andi.w          &0x8000,%d2             # keep old sign
10444         sub.l           %d0,%d1                 # add scale factor
10445         or.w            %d2,%d1                 # concat old sign,new exp
10446         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
10447         mov.l           (%sp)+,%d2              # restore d2
10448         fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
10449         rts
10450
10451 fsglmul_ovfl:
10452         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
10453
10454         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
10455         fmov.l          &0x0,%fpsr              # clear FPSR
10456
10457         fsglmul.x       FP_SCR0(%a6),%fp0       # execute sgl multiply
10458
10459         fmov.l          %fpsr,%d1               # save status
10460         fmov.l          &0x0,%fpcr              # clear FPCR
10461
10462         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10463
10464 fsglmul_ovfl_tst:
10465
10466 # save setting this until now because this is where fsglmul_may_ovfl may jump in
10467         or.l            &ovfl_inx_mask, USER_FPSR(%a6) # set ovfl/aovfl/ainex
10468
10469         mov.b           FPCR_ENABLE(%a6),%d1
10470         andi.b          &0x13,%d1               # is OVFL or INEX enabled?
10471         bne.b           fsglmul_ovfl_ena        # yes
10472
10473 fsglmul_ovfl_dis:
10474         btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
10475         sne             %d1                     # set sign param accordingly
10476         mov.l           L_SCR3(%a6),%d0         # pass prec:rnd
10477         andi.b          &0x30,%d0               # force prec = ext
10478         bsr.l           ovf_res                 # calculate default result
10479         or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
10480         fmovm.x         (%a0),&0x80             # return default result in fp0
10481         rts
10482
10483 fsglmul_ovfl_ena:
10484         fmovm.x         &0x80,FP_SCR0(%a6)      # move result to stack
10485
10486         mov.l           %d2,-(%sp)              # save d2
10487         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
10488         mov.l           %d1,%d2                 # make a copy
10489         andi.l          &0x7fff,%d1             # strip sign
10490         sub.l           %d0,%d1                 # add scale factor
10491         subi.l          &0x6000,%d1             # subtract bias
10492         andi.w          &0x7fff,%d1
10493         andi.w          &0x8000,%d2             # keep old sign
10494         or.w            %d2,%d1                 # concat old sign,new exp
10495         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
10496         mov.l           (%sp)+,%d2              # restore d2
10497         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
10498         bra.b           fsglmul_ovfl_dis
10499
10500 fsglmul_may_ovfl:
10501         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
10502
10503         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
10504         fmov.l          &0x0,%fpsr              # clear FPSR
10505
10506         fsglmul.x       FP_SCR0(%a6),%fp0       # execute sgl multiply
10507
10508         fmov.l          %fpsr,%d1               # save status
10509         fmov.l          &0x0,%fpcr              # clear FPCR
10510
10511         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10512
10513         fabs.x          %fp0,%fp1               # make a copy of result
10514         fcmp.b          %fp1,&0x2               # is |result| >= 2.b?
10515         fbge.w          fsglmul_ovfl_tst        # yes; overflow has occurred
10516
10517 # no, it didn't overflow; we have correct result
10518         bra.w           fsglmul_normal_exit
10519
10520 fsglmul_unfl:
10521         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10522
10523         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
10524
10525         fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
10526         fmov.l          &0x0,%fpsr              # clear FPSR
10527
10528         fsglmul.x       FP_SCR0(%a6),%fp0       # execute sgl multiply
10529
10530         fmov.l          %fpsr,%d1               # save status
10531         fmov.l          &0x0,%fpcr              # clear FPCR
10532
10533         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10534
10535         mov.b           FPCR_ENABLE(%a6),%d1
10536         andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
10537         bne.b           fsglmul_unfl_ena        # yes
10538
10539 fsglmul_unfl_dis:
10540         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
10541
10542         lea             FP_SCR0(%a6),%a0        # pass: result addr
10543         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
10544         bsr.l           unf_res4                # calculate default result
10545         or.b            %d0,FPSR_CC(%a6)        # 'Z' bit may have been set
10546         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
10547         rts
10548
10549 #
10550 # UNFL is enabled.
10551 #
10552 fsglmul_unfl_ena:
10553         fmovm.x         FP_SCR1(%a6),&0x40      # load dst op
10554
10555         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
10556         fmov.l          &0x0,%fpsr              # clear FPSR
10557
10558         fsglmul.x       FP_SCR0(%a6),%fp1       # execute sgl multiply
10559
10560         fmov.l          &0x0,%fpcr              # clear FPCR
10561
10562         fmovm.x         &0x40,FP_SCR0(%a6)      # save result to stack
10563         mov.l           %d2,-(%sp)              # save d2
10564         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
10565         mov.l           %d1,%d2                 # make a copy
10566         andi.l          &0x7fff,%d1             # strip sign
10567         andi.w          &0x8000,%d2             # keep old sign
10568         sub.l           %d0,%d1                 # add scale factor
10569         addi.l          &0x6000,%d1             # add bias
10570         andi.w          &0x7fff,%d1
10571         or.w            %d2,%d1                 # concat old sign,new exp
10572         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
10573         mov.l           (%sp)+,%d2              # restore d2
10574         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
10575         bra.w           fsglmul_unfl_dis
10576
10577 fsglmul_may_unfl:
10578         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
10579
10580         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
10581         fmov.l          &0x0,%fpsr              # clear FPSR
10582
10583         fsglmul.x       FP_SCR0(%a6),%fp0       # execute sgl multiply
10584
10585         fmov.l          %fpsr,%d1               # save status
10586         fmov.l          &0x0,%fpcr              # clear FPCR
10587
10588         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10589
10590         fabs.x          %fp0,%fp1               # make a copy of result
10591         fcmp.b          %fp1,&0x2               # is |result| > 2.b?
10592         fbgt.w          fsglmul_normal_exit     # no; no underflow occurred
10593         fblt.w          fsglmul_unfl            # yes; underflow occurred
10594
10595 #
10596 # we still don't know if underflow occurred. result is ~ equal to 2. but,
10597 # we don't know if the result was an underflow that rounded up to a 2 or
10598 # a normalized number that rounded down to a 2. so, redo the entire operation
10599 # using RZ as the rounding mode to see what the pre-rounded result is.
10600 # this case should be relatively rare.
10601 #
10602         fmovm.x         FP_SCR1(%a6),&0x40      # load dst op into fp1
10603
10604         mov.l           L_SCR3(%a6),%d1
10605         andi.b          &0xc0,%d1               # keep rnd prec
10606         ori.b           &rz_mode*0x10,%d1       # insert RZ
10607
10608         fmov.l          %d1,%fpcr               # set FPCR
10609         fmov.l          &0x0,%fpsr              # clear FPSR
10610
10611         fsglmul.x       FP_SCR0(%a6),%fp1       # execute sgl multiply
10612
10613         fmov.l          &0x0,%fpcr              # clear FPCR
10614         fabs.x          %fp1                    # make absolute value
10615         fcmp.b          %fp1,&0x2               # is |result| < 2.b?
10616         fbge.w          fsglmul_normal_exit     # no; no underflow occurred
10617         bra.w           fsglmul_unfl            # yes, underflow occurred
10618
10619 ##############################################################################
10620
10621 #
10622 # Single Precision Multiply: inputs are not both normalized; what are they?
10623 #
10624 fsglmul_not_norm:
10625         mov.w           (tbl_fsglmul_op.b,%pc,%d1.w*2),%d1
10626         jmp             (tbl_fsglmul_op.b,%pc,%d1.w*1)
10627
10628         swbeg           &48
10629 tbl_fsglmul_op:
10630         short           fsglmul_norm            - tbl_fsglmul_op # NORM x NORM
10631         short           fsglmul_zero            - tbl_fsglmul_op # NORM x ZERO
10632         short           fsglmul_inf_src         - tbl_fsglmul_op # NORM x INF
10633         short           fsglmul_res_qnan        - tbl_fsglmul_op # NORM x QNAN
10634         short           fsglmul_norm            - tbl_fsglmul_op # NORM x DENORM
10635         short           fsglmul_res_snan        - tbl_fsglmul_op # NORM x SNAN
10636         short           tbl_fsglmul_op          - tbl_fsglmul_op #
10637         short           tbl_fsglmul_op          - tbl_fsglmul_op #
10638
10639         short           fsglmul_zero            - tbl_fsglmul_op # ZERO x NORM
10640         short           fsglmul_zero            - tbl_fsglmul_op # ZERO x ZERO
10641         short           fsglmul_res_operr       - tbl_fsglmul_op # ZERO x INF
10642         short           fsglmul_res_qnan        - tbl_fsglmul_op # ZERO x QNAN
10643         short           fsglmul_zero            - tbl_fsglmul_op # ZERO x DENORM
10644         short           fsglmul_res_snan        - tbl_fsglmul_op # ZERO x SNAN
10645         short           tbl_fsglmul_op          - tbl_fsglmul_op #
10646         short           tbl_fsglmul_op          - tbl_fsglmul_op #
10647
10648         short           fsglmul_inf_dst         - tbl_fsglmul_op # INF x NORM
10649         short           fsglmul_res_operr       - tbl_fsglmul_op # INF x ZERO
10650         short           fsglmul_inf_dst         - tbl_fsglmul_op # INF x INF
10651         short           fsglmul_res_qnan        - tbl_fsglmul_op # INF x QNAN
10652         short           fsglmul_inf_dst         - tbl_fsglmul_op # INF x DENORM
10653         short           fsglmul_res_snan        - tbl_fsglmul_op # INF x SNAN
10654         short           tbl_fsglmul_op          - tbl_fsglmul_op #
10655         short           tbl_fsglmul_op          - tbl_fsglmul_op #
10656
10657         short           fsglmul_res_qnan        - tbl_fsglmul_op # QNAN x NORM
10658         short           fsglmul_res_qnan        - tbl_fsglmul_op # QNAN x ZERO
10659         short           fsglmul_res_qnan        - tbl_fsglmul_op # QNAN x INF
10660         short           fsglmul_res_qnan        - tbl_fsglmul_op # QNAN x QNAN
10661         short           fsglmul_res_qnan        - tbl_fsglmul_op # QNAN x DENORM
10662         short           fsglmul_res_snan        - tbl_fsglmul_op # QNAN x SNAN
10663         short           tbl_fsglmul_op          - tbl_fsglmul_op #
10664         short           tbl_fsglmul_op          - tbl_fsglmul_op #
10665
10666         short           fsglmul_norm            - tbl_fsglmul_op # NORM x NORM
10667         short           fsglmul_zero            - tbl_fsglmul_op # NORM x ZERO
10668         short           fsglmul_inf_src         - tbl_fsglmul_op # NORM x INF
10669         short           fsglmul_res_qnan        - tbl_fsglmul_op # NORM x QNAN
10670         short           fsglmul_norm            - tbl_fsglmul_op # NORM x DENORM
10671         short           fsglmul_res_snan        - tbl_fsglmul_op # NORM x SNAN
10672         short           tbl_fsglmul_op          - tbl_fsglmul_op #
10673         short           tbl_fsglmul_op          - tbl_fsglmul_op #
10674
10675         short           fsglmul_res_snan        - tbl_fsglmul_op # SNAN x NORM
10676         short           fsglmul_res_snan        - tbl_fsglmul_op # SNAN x ZERO
10677         short           fsglmul_res_snan        - tbl_fsglmul_op # SNAN x INF
10678         short           fsglmul_res_snan        - tbl_fsglmul_op # SNAN x QNAN
10679         short           fsglmul_res_snan        - tbl_fsglmul_op # SNAN x DENORM
10680         short           fsglmul_res_snan        - tbl_fsglmul_op # SNAN x SNAN
10681         short           tbl_fsglmul_op          - tbl_fsglmul_op #
10682         short           tbl_fsglmul_op          - tbl_fsglmul_op #
10683
10684 fsglmul_res_operr:
10685         bra.l           res_operr
10686 fsglmul_res_snan:
10687         bra.l           res_snan
10688 fsglmul_res_qnan:
10689         bra.l           res_qnan
10690 fsglmul_zero:
10691         bra.l           fmul_zero
10692 fsglmul_inf_src:
10693         bra.l           fmul_inf_src
10694 fsglmul_inf_dst:
10695         bra.l           fmul_inf_dst
10696
10697 #########################################################################
10698 # XDEF **************************************************************** #
10699 #       fsgldiv(): emulates the fsgldiv instruction                     #
10700 #                                                                       #
10701 # XREF **************************************************************** #
10702 #       scale_to_zero_src() - scale src exponent to zero                #
10703 #       scale_to_zero_dst() - scale dst exponent to zero                #
10704 #       unf_res4() - return default underflow result for sglop          #
10705 #       ovf_res() - return default overflow result                      #
10706 #       res_qnan() - return QNAN result                                 #
10707 #       res_snan() - return SNAN result                                 #
10708 #                                                                       #
10709 # INPUT *************************************************************** #
10710 #       a0 = pointer to extended precision source operand               #
10711 #       a1 = pointer to extended precision destination operand          #
10712 #       d0  rnd prec,mode                                               #
10713 #                                                                       #
10714 # OUTPUT ************************************************************** #
10715 #       fp0 = result                                                    #
10716 #       fp1 = EXOP (if exception occurred)                              #
10717 #                                                                       #
10718 # ALGORITHM *********************************************************** #
10719 #       Handle NANs, infinities, and zeroes as special cases. Divide    #
10720 # norms/denorms into ext/sgl/dbl precision.                             #
10721 #       For norms/denorms, scale the exponents such that a divide       #
10722 # instruction won't cause an exception. Use the regular fsgldiv to      #
10723 # compute a result. Check if the regular operands would have taken      #
10724 # an exception. If so, return the default overflow/underflow result     #
10725 # and return the EXOP if exceptions are enabled. Else, scale the        #
10726 # result operand to the proper exponent.                                #
10727 #                                                                       #
10728 #########################################################################
10729
10730         global          fsgldiv
10731 fsgldiv:
10732         mov.l           %d0,L_SCR3(%a6)         # store rnd info
10733
10734         clr.w           %d1
10735         mov.b           DTAG(%a6),%d1
10736         lsl.b           &0x3,%d1
10737         or.b            STAG(%a6),%d1           # combine src tags
10738
10739         bne.w           fsgldiv_not_norm        # optimize on non-norm input
10740
10741 #
10742 # DIVIDE: NORMs and DENORMs ONLY!
10743 #
10744 fsgldiv_norm:
10745         mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
10746         mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
10747         mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
10748
10749         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
10750         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
10751         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
10752
10753         bsr.l           scale_to_zero_src       # calculate scale factor 1
10754         mov.l           %d0,-(%sp)              # save scale factor 1
10755
10756         bsr.l           scale_to_zero_dst       # calculate scale factor 2
10757
10758         neg.l           (%sp)                   # S.F. = scale1 - scale2
10759         add.l           %d0,(%sp)
10760
10761         mov.w           2+L_SCR3(%a6),%d1       # fetch precision,mode
10762         lsr.b           &0x6,%d1
10763         mov.l           (%sp)+,%d0
10764         cmpi.l          %d0,&0x3fff-0x7ffe
10765         ble.w           fsgldiv_may_ovfl
10766
10767         cmpi.l          %d0,&0x3fff-0x0000      # will result underflow?
10768         beq.w           fsgldiv_may_unfl        # maybe
10769         bgt.w           fsgldiv_unfl            # yes; go handle underflow
10770
10771 fsgldiv_normal:
10772         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
10773
10774         fmov.l          L_SCR3(%a6),%fpcr       # save FPCR
10775         fmov.l          &0x0,%fpsr              # clear FPSR
10776
10777         fsgldiv.x       FP_SCR0(%a6),%fp0       # perform sgl divide
10778
10779         fmov.l          %fpsr,%d1               # save FPSR
10780         fmov.l          &0x0,%fpcr              # clear FPCR
10781
10782         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10783
10784 fsgldiv_normal_exit:
10785         fmovm.x         &0x80,FP_SCR0(%a6)      # store result on stack
10786         mov.l           %d2,-(%sp)              # save d2
10787         mov.w           FP_SCR0_EX(%a6),%d1     # load {sgn,exp}
10788         mov.l           %d1,%d2                 # make a copy
10789         andi.l          &0x7fff,%d1             # strip sign
10790         andi.w          &0x8000,%d2             # keep old sign
10791         sub.l           %d0,%d1                 # add scale factor
10792         or.w            %d2,%d1                 # concat old sign,new exp
10793         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
10794         mov.l           (%sp)+,%d2              # restore d2
10795         fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
10796         rts
10797
10798 fsgldiv_may_ovfl:
10799         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
10800
10801         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
10802         fmov.l          &0x0,%fpsr              # set FPSR
10803
10804         fsgldiv.x       FP_SCR0(%a6),%fp0       # execute divide
10805
10806         fmov.l          %fpsr,%d1
10807         fmov.l          &0x0,%fpcr
10808
10809         or.l            %d1,USER_FPSR(%a6)      # save INEX,N
10810
10811         fmovm.x         &0x01,-(%sp)            # save result to stack
10812         mov.w           (%sp),%d1               # fetch new exponent
10813         add.l           &0xc,%sp                # clear result
10814         andi.l          &0x7fff,%d1             # strip sign
10815         sub.l           %d0,%d1                 # add scale factor
10816         cmp.l           %d1,&0x7fff             # did divide overflow?
10817         blt.b           fsgldiv_normal_exit
10818
10819 fsgldiv_ovfl_tst:
10820         or.w            &ovfl_inx_mask,2+USER_FPSR(%a6) # set ovfl/aovfl/ainex
10821
10822         mov.b           FPCR_ENABLE(%a6),%d1
10823         andi.b          &0x13,%d1               # is OVFL or INEX enabled?
10824         bne.b           fsgldiv_ovfl_ena        # yes
10825
10826 fsgldiv_ovfl_dis:
10827         btst            &neg_bit,FPSR_CC(%a6)   # is result negative
10828         sne             %d1                     # set sign param accordingly
10829         mov.l           L_SCR3(%a6),%d0         # pass prec:rnd
10830         andi.b          &0x30,%d0               # kill precision
10831         bsr.l           ovf_res                 # calculate default result
10832         or.b            %d0,FPSR_CC(%a6)        # set INF if applicable
10833         fmovm.x         (%a0),&0x80             # return default result in fp0
10834         rts
10835
10836 fsgldiv_ovfl_ena:
10837         fmovm.x         &0x80,FP_SCR0(%a6)      # move result to stack
10838
10839         mov.l           %d2,-(%sp)              # save d2
10840         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
10841         mov.l           %d1,%d2                 # make a copy
10842         andi.l          &0x7fff,%d1             # strip sign
10843         andi.w          &0x8000,%d2             # keep old sign
10844         sub.l           %d0,%d1                 # add scale factor
10845         subi.l          &0x6000,%d1             # subtract new bias
10846         andi.w          &0x7fff,%d1             # clear ms bit
10847         or.w            %d2,%d1                 # concat old sign,new exp
10848         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
10849         mov.l           (%sp)+,%d2              # restore d2
10850         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
10851         bra.b           fsgldiv_ovfl_dis
10852
10853 fsgldiv_unfl:
10854         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
10855
10856         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
10857
10858         fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
10859         fmov.l          &0x0,%fpsr              # clear FPSR
10860
10861         fsgldiv.x       FP_SCR0(%a6),%fp0       # execute sgl divide
10862
10863         fmov.l          %fpsr,%d1               # save status
10864         fmov.l          &0x0,%fpcr              # clear FPCR
10865
10866         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10867
10868         mov.b           FPCR_ENABLE(%a6),%d1
10869         andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
10870         bne.b           fsgldiv_unfl_ena        # yes
10871
10872 fsgldiv_unfl_dis:
10873         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
10874
10875         lea             FP_SCR0(%a6),%a0        # pass: result addr
10876         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
10877         bsr.l           unf_res4                # calculate default result
10878         or.b            %d0,FPSR_CC(%a6)        # 'Z' bit may have been set
10879         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
10880         rts
10881
10882 #
10883 # UNFL is enabled.
10884 #
10885 fsgldiv_unfl_ena:
10886         fmovm.x         FP_SCR1(%a6),&0x40      # load dst op
10887
10888         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
10889         fmov.l          &0x0,%fpsr              # clear FPSR
10890
10891         fsgldiv.x       FP_SCR0(%a6),%fp1       # execute sgl divide
10892
10893         fmov.l          &0x0,%fpcr              # clear FPCR
10894
10895         fmovm.x         &0x40,FP_SCR0(%a6)      # save result to stack
10896         mov.l           %d2,-(%sp)              # save d2
10897         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
10898         mov.l           %d1,%d2                 # make a copy
10899         andi.l          &0x7fff,%d1             # strip sign
10900         andi.w          &0x8000,%d2             # keep old sign
10901         sub.l           %d0,%d1                 # add scale factor
10902         addi.l          &0x6000,%d1             # add bias
10903         andi.w          &0x7fff,%d1             # clear top bit
10904         or.w            %d2,%d1                 # concat old sign, new exp
10905         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
10906         mov.l           (%sp)+,%d2              # restore d2
10907         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
10908         bra.b           fsgldiv_unfl_dis
10909
10910 #
10911 # the divide operation MAY underflow:
10912 #
10913 fsgldiv_may_unfl:
10914         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
10915
10916         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
10917         fmov.l          &0x0,%fpsr              # clear FPSR
10918
10919         fsgldiv.x       FP_SCR0(%a6),%fp0       # execute sgl divide
10920
10921         fmov.l          %fpsr,%d1               # save status
10922         fmov.l          &0x0,%fpcr              # clear FPCR
10923
10924         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
10925
10926         fabs.x          %fp0,%fp1               # make a copy of result
10927         fcmp.b          %fp1,&0x1               # is |result| > 1.b?
10928         fbgt.w          fsgldiv_normal_exit     # no; no underflow occurred
10929         fblt.w          fsgldiv_unfl            # yes; underflow occurred
10930
10931 #
10932 # we still don't know if underflow occurred. result is ~ equal to 1. but,
10933 # we don't know if the result was an underflow that rounded up to a 1
10934 # or a normalized number that rounded down to a 1. so, redo the entire
10935 # operation using RZ as the rounding mode to see what the pre-rounded
10936 # result is. this case should be relatively rare.
10937 #
10938         fmovm.x         FP_SCR1(%a6),&0x40      # load dst op into %fp1
10939
10940         clr.l           %d1                     # clear scratch register
10941         ori.b           &rz_mode*0x10,%d1       # force RZ rnd mode
10942
10943         fmov.l          %d1,%fpcr               # set FPCR
10944         fmov.l          &0x0,%fpsr              # clear FPSR
10945
10946         fsgldiv.x       FP_SCR0(%a6),%fp1       # execute sgl divide
10947
10948         fmov.l          &0x0,%fpcr              # clear FPCR
10949         fabs.x          %fp1                    # make absolute value
10950         fcmp.b          %fp1,&0x1               # is |result| < 1.b?
10951         fbge.w          fsgldiv_normal_exit     # no; no underflow occurred
10952         bra.w           fsgldiv_unfl            # yes; underflow occurred
10953
10954 ############################################################################
10955
10956 #
10957 # Divide: inputs are not both normalized; what are they?
10958 #
10959 fsgldiv_not_norm:
10960         mov.w           (tbl_fsgldiv_op.b,%pc,%d1.w*2),%d1
10961         jmp             (tbl_fsgldiv_op.b,%pc,%d1.w*1)
10962
10963         swbeg           &48
10964 tbl_fsgldiv_op:
10965         short           fsgldiv_norm            - tbl_fsgldiv_op # NORM / NORM
10966         short           fsgldiv_inf_load        - tbl_fsgldiv_op # NORM / ZERO
10967         short           fsgldiv_zero_load       - tbl_fsgldiv_op # NORM / INF
10968         short           fsgldiv_res_qnan        - tbl_fsgldiv_op # NORM / QNAN
10969         short           fsgldiv_norm            - tbl_fsgldiv_op # NORM / DENORM
10970         short           fsgldiv_res_snan        - tbl_fsgldiv_op # NORM / SNAN
10971         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
10972         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
10973
10974         short           fsgldiv_zero_load       - tbl_fsgldiv_op # ZERO / NORM
10975         short           fsgldiv_res_operr       - tbl_fsgldiv_op # ZERO / ZERO
10976         short           fsgldiv_zero_load       - tbl_fsgldiv_op # ZERO / INF
10977         short           fsgldiv_res_qnan        - tbl_fsgldiv_op # ZERO / QNAN
10978         short           fsgldiv_zero_load       - tbl_fsgldiv_op # ZERO / DENORM
10979         short           fsgldiv_res_snan        - tbl_fsgldiv_op # ZERO / SNAN
10980         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
10981         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
10982
10983         short           fsgldiv_inf_dst         - tbl_fsgldiv_op # INF / NORM
10984         short           fsgldiv_inf_dst         - tbl_fsgldiv_op # INF / ZERO
10985         short           fsgldiv_res_operr       - tbl_fsgldiv_op # INF / INF
10986         short           fsgldiv_res_qnan        - tbl_fsgldiv_op # INF / QNAN
10987         short           fsgldiv_inf_dst         - tbl_fsgldiv_op # INF / DENORM
10988         short           fsgldiv_res_snan        - tbl_fsgldiv_op # INF / SNAN
10989         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
10990         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
10991
10992         short           fsgldiv_res_qnan        - tbl_fsgldiv_op # QNAN / NORM
10993         short           fsgldiv_res_qnan        - tbl_fsgldiv_op # QNAN / ZERO
10994         short           fsgldiv_res_qnan        - tbl_fsgldiv_op # QNAN / INF
10995         short           fsgldiv_res_qnan        - tbl_fsgldiv_op # QNAN / QNAN
10996         short           fsgldiv_res_qnan        - tbl_fsgldiv_op # QNAN / DENORM
10997         short           fsgldiv_res_snan        - tbl_fsgldiv_op # QNAN / SNAN
10998         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
10999         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
11000
11001         short           fsgldiv_norm            - tbl_fsgldiv_op # DENORM / NORM
11002         short           fsgldiv_inf_load        - tbl_fsgldiv_op # DENORM / ZERO
11003         short           fsgldiv_zero_load       - tbl_fsgldiv_op # DENORM / INF
11004         short           fsgldiv_res_qnan        - tbl_fsgldiv_op # DENORM / QNAN
11005         short           fsgldiv_norm            - tbl_fsgldiv_op # DENORM / DENORM
11006         short           fsgldiv_res_snan        - tbl_fsgldiv_op # DENORM / SNAN
11007         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
11008         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
11009
11010         short           fsgldiv_res_snan        - tbl_fsgldiv_op # SNAN / NORM
11011         short           fsgldiv_res_snan        - tbl_fsgldiv_op # SNAN / ZERO
11012         short           fsgldiv_res_snan        - tbl_fsgldiv_op # SNAN / INF
11013         short           fsgldiv_res_snan        - tbl_fsgldiv_op # SNAN / QNAN
11014         short           fsgldiv_res_snan        - tbl_fsgldiv_op # SNAN / DENORM
11015         short           fsgldiv_res_snan        - tbl_fsgldiv_op # SNAN / SNAN
11016         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
11017         short           tbl_fsgldiv_op          - tbl_fsgldiv_op #
11018
11019 fsgldiv_res_qnan:
11020         bra.l           res_qnan
11021 fsgldiv_res_snan:
11022         bra.l           res_snan
11023 fsgldiv_res_operr:
11024         bra.l           res_operr
11025 fsgldiv_inf_load:
11026         bra.l           fdiv_inf_load
11027 fsgldiv_zero_load:
11028         bra.l           fdiv_zero_load
11029 fsgldiv_inf_dst:
11030         bra.l           fdiv_inf_dst
11031
11032 #########################################################################
11033 # XDEF **************************************************************** #
11034 #       fadd(): emulates the fadd instruction                           #
11035 #       fsadd(): emulates the fadd instruction                          #
11036 #       fdadd(): emulates the fdadd instruction                         #
11037 #                                                                       #
11038 # XREF **************************************************************** #
11039 #       addsub_scaler2() - scale the operands so they won't take exc    #
11040 #       ovf_res() - return default overflow result                      #
11041 #       unf_res() - return default underflow result                     #
11042 #       res_qnan() - set QNAN result                                    #
11043 #       res_snan() - set SNAN result                                    #
11044 #       res_operr() - set OPERR result                                  #
11045 #       scale_to_zero_src() - set src operand exponent equal to zero    #
11046 #       scale_to_zero_dst() - set dst operand exponent equal to zero    #
11047 #                                                                       #
11048 # INPUT *************************************************************** #
11049 #       a0 = pointer to extended precision source operand               #
11050 #       a1 = pointer to extended precision destination operand          #
11051 #                                                                       #
11052 # OUTPUT ************************************************************** #
11053 #       fp0 = result                                                    #
11054 #       fp1 = EXOP (if exception occurred)                              #
11055 #                                                                       #
11056 # ALGORITHM *********************************************************** #
11057 #       Handle NANs, infinities, and zeroes as special cases. Divide    #
11058 # norms into extended, single, and double precision.                    #
11059 #       Do addition after scaling exponents such that exception won't   #
11060 # occur. Then, check result exponent to see if exception would have     #
11061 # occurred. If so, return default result and maybe EXOP. Else, insert   #
11062 # the correct result exponent and return. Set FPSR bits as appropriate. #
11063 #                                                                       #
11064 #########################################################################
11065
11066         global          fsadd
11067 fsadd:
11068         andi.b          &0x30,%d0               # clear rnd prec
11069         ori.b           &s_mode*0x10,%d0        # insert sgl prec
11070         bra.b           fadd
11071
11072         global          fdadd
11073 fdadd:
11074         andi.b          &0x30,%d0               # clear rnd prec
11075         ori.b           &d_mode*0x10,%d0        # insert dbl prec
11076
11077         global          fadd
11078 fadd:
11079         mov.l           %d0,L_SCR3(%a6)         # store rnd info
11080
11081         clr.w           %d1
11082         mov.b           DTAG(%a6),%d1
11083         lsl.b           &0x3,%d1
11084         or.b            STAG(%a6),%d1           # combine src tags
11085
11086         bne.w           fadd_not_norm           # optimize on non-norm input
11087
11088 #
11089 # ADD: norms and denorms
11090 #
11091 fadd_norm:
11092         bsr.l           addsub_scaler2          # scale exponents
11093
11094 fadd_zero_entry:
11095         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
11096
11097         fmov.l          &0x0,%fpsr              # clear FPSR
11098         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
11099
11100         fadd.x          FP_SCR0(%a6),%fp0       # execute add
11101
11102         fmov.l          &0x0,%fpcr              # clear FPCR
11103         fmov.l          %fpsr,%d1               # fetch INEX2,N,Z
11104
11105         or.l            %d1,USER_FPSR(%a6)      # save exc and ccode bits
11106
11107         fbeq.w          fadd_zero_exit          # if result is zero, end now
11108
11109         mov.l           %d2,-(%sp)              # save d2
11110
11111         fmovm.x         &0x01,-(%sp)            # save result to stack
11112
11113         mov.w           2+L_SCR3(%a6),%d1
11114         lsr.b           &0x6,%d1
11115
11116         mov.w           (%sp),%d2               # fetch new sign, exp
11117         andi.l          &0x7fff,%d2             # strip sign
11118         sub.l           %d0,%d2                 # add scale factor
11119
11120         cmp.l           %d2,(tbl_fadd_ovfl.b,%pc,%d1.w*4) # is it an overflow?
11121         bge.b           fadd_ovfl               # yes
11122
11123         cmp.l           %d2,(tbl_fadd_unfl.b,%pc,%d1.w*4) # is it an underflow?
11124         blt.w           fadd_unfl               # yes
11125         beq.w           fadd_may_unfl           # maybe; go find out
11126
11127 fadd_normal:
11128         mov.w           (%sp),%d1
11129         andi.w          &0x8000,%d1             # keep sign
11130         or.w            %d2,%d1                 # concat sign,new exp
11131         mov.w           %d1,(%sp)               # insert new exponent
11132
11133         fmovm.x         (%sp)+,&0x80            # return result in fp0
11134
11135         mov.l           (%sp)+,%d2              # restore d2
11136         rts
11137
11138 fadd_zero_exit:
11139 #       fmov.s          &0x00000000,%fp0        # return zero in fp0
11140         rts
11141
11142 tbl_fadd_ovfl:
11143         long            0x7fff                  # ext ovfl
11144         long            0x407f                  # sgl ovfl
11145         long            0x43ff                  # dbl ovfl
11146
11147 tbl_fadd_unfl:
11148         long            0x0000                  # ext unfl
11149         long            0x3f81                  # sgl unfl
11150         long            0x3c01                  # dbl unfl
11151
11152 fadd_ovfl:
11153         or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11154
11155         mov.b           FPCR_ENABLE(%a6),%d1
11156         andi.b          &0x13,%d1               # is OVFL or INEX enabled?
11157         bne.b           fadd_ovfl_ena           # yes
11158
11159         add.l           &0xc,%sp
11160 fadd_ovfl_dis:
11161         btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
11162         sne             %d1                     # set sign param accordingly
11163         mov.l           L_SCR3(%a6),%d0         # pass prec:rnd
11164         bsr.l           ovf_res                 # calculate default result
11165         or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
11166         fmovm.x         (%a0),&0x80             # return default result in fp0
11167         mov.l           (%sp)+,%d2              # restore d2
11168         rts
11169
11170 fadd_ovfl_ena:
11171         mov.b           L_SCR3(%a6),%d1
11172         andi.b          &0xc0,%d1               # is precision extended?
11173         bne.b           fadd_ovfl_ena_sd        # no; prec = sgl or dbl
11174
11175 fadd_ovfl_ena_cont:
11176         mov.w           (%sp),%d1
11177         andi.w          &0x8000,%d1             # keep sign
11178         subi.l          &0x6000,%d2             # add extra bias
11179         andi.w          &0x7fff,%d2
11180         or.w            %d2,%d1                 # concat sign,new exp
11181         mov.w           %d1,(%sp)               # insert new exponent
11182
11183         fmovm.x         (%sp)+,&0x40            # return EXOP in fp1
11184         bra.b           fadd_ovfl_dis
11185
11186 fadd_ovfl_ena_sd:
11187         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
11188
11189         mov.l           L_SCR3(%a6),%d1
11190         andi.b          &0x30,%d1               # keep rnd mode
11191         fmov.l          %d1,%fpcr               # set FPCR
11192
11193         fadd.x          FP_SCR0(%a6),%fp0       # execute add
11194
11195         fmov.l          &0x0,%fpcr              # clear FPCR
11196
11197         add.l           &0xc,%sp
11198         fmovm.x         &0x01,-(%sp)
11199         bra.b           fadd_ovfl_ena_cont
11200
11201 fadd_unfl:
11202         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11203
11204         add.l           &0xc,%sp
11205
11206         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
11207
11208         fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
11209         fmov.l          &0x0,%fpsr              # clear FPSR
11210
11211         fadd.x          FP_SCR0(%a6),%fp0       # execute add
11212
11213         fmov.l          &0x0,%fpcr              # clear FPCR
11214         fmov.l          %fpsr,%d1               # save status
11215
11216         or.l            %d1,USER_FPSR(%a6)      # save INEX,N
11217
11218         mov.b           FPCR_ENABLE(%a6),%d1
11219         andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
11220         bne.b           fadd_unfl_ena           # yes
11221
11222 fadd_unfl_dis:
11223         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
11224
11225         lea             FP_SCR0(%a6),%a0        # pass: result addr
11226         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
11227         bsr.l           unf_res                 # calculate default result
11228         or.b            %d0,FPSR_CC(%a6)        # 'Z' bit may have been set
11229         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
11230         mov.l           (%sp)+,%d2              # restore d2
11231         rts
11232
11233 fadd_unfl_ena:
11234         fmovm.x         FP_SCR1(%a6),&0x40      # load dst op
11235
11236         mov.l           L_SCR3(%a6),%d1
11237         andi.b          &0xc0,%d1               # is precision extended?
11238         bne.b           fadd_unfl_ena_sd        # no; sgl or dbl
11239
11240         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
11241
11242 fadd_unfl_ena_cont:
11243         fmov.l          &0x0,%fpsr              # clear FPSR
11244
11245         fadd.x          FP_SCR0(%a6),%fp1       # execute multiply
11246
11247         fmov.l          &0x0,%fpcr              # clear FPCR
11248
11249         fmovm.x         &0x40,FP_SCR0(%a6)      # save result to stack
11250         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
11251         mov.l           %d1,%d2                 # make a copy
11252         andi.l          &0x7fff,%d1             # strip sign
11253         andi.w          &0x8000,%d2             # keep old sign
11254         sub.l           %d0,%d1                 # add scale factor
11255         addi.l          &0x6000,%d1             # add new bias
11256         andi.w          &0x7fff,%d1             # clear top bit
11257         or.w            %d2,%d1                 # concat sign,new exp
11258         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
11259         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
11260         bra.w           fadd_unfl_dis
11261
11262 fadd_unfl_ena_sd:
11263         mov.l           L_SCR3(%a6),%d1
11264         andi.b          &0x30,%d1               # use only rnd mode
11265         fmov.l          %d1,%fpcr               # set FPCR
11266
11267         bra.b           fadd_unfl_ena_cont
11268
11269 #
11270 # result is equal to the smallest normalized number in the selected precision
11271 # if the precision is extended, this result could not have come from an
11272 # underflow that rounded up.
11273 #
11274 fadd_may_unfl:
11275         mov.l           L_SCR3(%a6),%d1
11276         andi.b          &0xc0,%d1
11277         beq.w           fadd_normal             # yes; no underflow occurred
11278
11279         mov.l           0x4(%sp),%d1            # extract hi(man)
11280         cmpi.l          %d1,&0x80000000         # is hi(man) = 0x80000000?
11281         bne.w           fadd_normal             # no; no underflow occurred
11282
11283         tst.l           0x8(%sp)                # is lo(man) = 0x0?
11284         bne.w           fadd_normal             # no; no underflow occurred
11285
11286         btst            &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
11287         beq.w           fadd_normal             # no; no underflow occurred
11288
11289 #
11290 # ok, so now the result has a exponent equal to the smallest normalized
11291 # exponent for the selected precision. also, the mantissa is equal to
11292 # 0x8000000000000000 and this mantissa is the result of rounding non-zero
11293 # g,r,s.
11294 # now, we must determine whether the pre-rounded result was an underflow
11295 # rounded "up" or a normalized number rounded "down".
11296 # so, we do this be re-executing the add using RZ as the rounding mode and
11297 # seeing if the new result is smaller or equal to the current result.
11298 #
11299         fmovm.x         FP_SCR1(%a6),&0x40      # load dst op into fp1
11300
11301         mov.l           L_SCR3(%a6),%d1
11302         andi.b          &0xc0,%d1               # keep rnd prec
11303         ori.b           &rz_mode*0x10,%d1       # insert rnd mode
11304         fmov.l          %d1,%fpcr               # set FPCR
11305         fmov.l          &0x0,%fpsr              # clear FPSR
11306
11307         fadd.x          FP_SCR0(%a6),%fp1       # execute add
11308
11309         fmov.l          &0x0,%fpcr              # clear FPCR
11310
11311         fabs.x          %fp0                    # compare absolute values
11312         fabs.x          %fp1
11313         fcmp.x          %fp0,%fp1               # is first result > second?
11314
11315         fbgt.w          fadd_unfl               # yes; it's an underflow
11316         bra.w           fadd_normal             # no; it's not an underflow
11317
11318 ##########################################################################
11319
11320 #
11321 # Add: inputs are not both normalized; what are they?
11322 #
11323 fadd_not_norm:
11324         mov.w           (tbl_fadd_op.b,%pc,%d1.w*2),%d1
11325         jmp             (tbl_fadd_op.b,%pc,%d1.w*1)
11326
11327         swbeg           &48
11328 tbl_fadd_op:
11329         short           fadd_norm       - tbl_fadd_op # NORM + NORM
11330         short           fadd_zero_src   - tbl_fadd_op # NORM + ZERO
11331         short           fadd_inf_src    - tbl_fadd_op # NORM + INF
11332         short           fadd_res_qnan   - tbl_fadd_op # NORM + QNAN
11333         short           fadd_norm       - tbl_fadd_op # NORM + DENORM
11334         short           fadd_res_snan   - tbl_fadd_op # NORM + SNAN
11335         short           tbl_fadd_op     - tbl_fadd_op #
11336         short           tbl_fadd_op     - tbl_fadd_op #
11337
11338         short           fadd_zero_dst   - tbl_fadd_op # ZERO + NORM
11339         short           fadd_zero_2     - tbl_fadd_op # ZERO + ZERO
11340         short           fadd_inf_src    - tbl_fadd_op # ZERO + INF
11341         short           fadd_res_qnan   - tbl_fadd_op # NORM + QNAN
11342         short           fadd_zero_dst   - tbl_fadd_op # ZERO + DENORM
11343         short           fadd_res_snan   - tbl_fadd_op # NORM + SNAN
11344         short           tbl_fadd_op     - tbl_fadd_op #
11345         short           tbl_fadd_op     - tbl_fadd_op #
11346
11347         short           fadd_inf_dst    - tbl_fadd_op # INF + NORM
11348         short           fadd_inf_dst    - tbl_fadd_op # INF + ZERO
11349         short           fadd_inf_2      - tbl_fadd_op # INF + INF
11350         short           fadd_res_qnan   - tbl_fadd_op # NORM + QNAN
11351         short           fadd_inf_dst    - tbl_fadd_op # INF + DENORM
11352         short           fadd_res_snan   - tbl_fadd_op # NORM + SNAN
11353         short           tbl_fadd_op     - tbl_fadd_op #
11354         short           tbl_fadd_op     - tbl_fadd_op #
11355
11356         short           fadd_res_qnan   - tbl_fadd_op # QNAN + NORM
11357         short           fadd_res_qnan   - tbl_fadd_op # QNAN + ZERO
11358         short           fadd_res_qnan   - tbl_fadd_op # QNAN + INF
11359         short           fadd_res_qnan   - tbl_fadd_op # QNAN + QNAN
11360         short           fadd_res_qnan   - tbl_fadd_op # QNAN + DENORM
11361         short           fadd_res_snan   - tbl_fadd_op # QNAN + SNAN
11362         short           tbl_fadd_op     - tbl_fadd_op #
11363         short           tbl_fadd_op     - tbl_fadd_op #
11364
11365         short           fadd_norm       - tbl_fadd_op # DENORM + NORM
11366         short           fadd_zero_src   - tbl_fadd_op # DENORM + ZERO
11367         short           fadd_inf_src    - tbl_fadd_op # DENORM + INF
11368         short           fadd_res_qnan   - tbl_fadd_op # NORM + QNAN
11369         short           fadd_norm       - tbl_fadd_op # DENORM + DENORM
11370         short           fadd_res_snan   - tbl_fadd_op # NORM + SNAN
11371         short           tbl_fadd_op     - tbl_fadd_op #
11372         short           tbl_fadd_op     - tbl_fadd_op #
11373
11374         short           fadd_res_snan   - tbl_fadd_op # SNAN + NORM
11375         short           fadd_res_snan   - tbl_fadd_op # SNAN + ZERO
11376         short           fadd_res_snan   - tbl_fadd_op # SNAN + INF
11377         short           fadd_res_snan   - tbl_fadd_op # SNAN + QNAN
11378         short           fadd_res_snan   - tbl_fadd_op # SNAN + DENORM
11379         short           fadd_res_snan   - tbl_fadd_op # SNAN + SNAN
11380         short           tbl_fadd_op     - tbl_fadd_op #
11381         short           tbl_fadd_op     - tbl_fadd_op #
11382
11383 fadd_res_qnan:
11384         bra.l           res_qnan
11385 fadd_res_snan:
11386         bra.l           res_snan
11387
11388 #
11389 # both operands are ZEROes
11390 #
11391 fadd_zero_2:
11392         mov.b           SRC_EX(%a0),%d0         # are the signs opposite
11393         mov.b           DST_EX(%a1),%d1
11394         eor.b           %d0,%d1
11395         bmi.w           fadd_zero_2_chk_rm      # weed out (-ZERO)+(+ZERO)
11396
11397 # the signs are the same. so determine whether they are positive or negative
11398 # and return the appropriately signed zero.
11399         tst.b           %d0                     # are ZEROes positive or negative?
11400         bmi.b           fadd_zero_rm            # negative
11401         fmov.s          &0x00000000,%fp0        # return +ZERO
11402         mov.b           &z_bmask,FPSR_CC(%a6)   # set Z
11403         rts
11404
11405 #
11406 # the ZEROes have opposite signs:
11407 # - Therefore, we return +ZERO if the rounding modes are RN,RZ, or RP.
11408 # - -ZERO is returned in the case of RM.
11409 #
11410 fadd_zero_2_chk_rm:
11411         mov.b           3+L_SCR3(%a6),%d1
11412         andi.b          &0x30,%d1               # extract rnd mode
11413         cmpi.b          %d1,&rm_mode*0x10       # is rnd mode == RM?
11414         beq.b           fadd_zero_rm            # yes
11415         fmov.s          &0x00000000,%fp0        # return +ZERO
11416         mov.b           &z_bmask,FPSR_CC(%a6)   # set Z
11417         rts
11418
11419 fadd_zero_rm:
11420         fmov.s          &0x80000000,%fp0        # return -ZERO
11421         mov.b           &neg_bmask+z_bmask,FPSR_CC(%a6) # set NEG/Z
11422         rts
11423
11424 #
11425 # one operand is a ZERO and the other is a DENORM or NORM. scale
11426 # the DENORM or NORM and jump to the regular fadd routine.
11427 #
11428 fadd_zero_dst:
11429         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
11430         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
11431         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
11432         bsr.l           scale_to_zero_src       # scale the operand
11433         clr.w           FP_SCR1_EX(%a6)
11434         clr.l           FP_SCR1_HI(%a6)
11435         clr.l           FP_SCR1_LO(%a6)
11436         bra.w           fadd_zero_entry         # go execute fadd
11437
11438 fadd_zero_src:
11439         mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
11440         mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
11441         mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
11442         bsr.l           scale_to_zero_dst       # scale the operand
11443         clr.w           FP_SCR0_EX(%a6)
11444         clr.l           FP_SCR0_HI(%a6)
11445         clr.l           FP_SCR0_LO(%a6)
11446         bra.w           fadd_zero_entry         # go execute fadd
11447
11448 #
11449 # both operands are INFs. an OPERR will result if the INFs have
11450 # different signs. else, an INF of the same sign is returned
11451 #
11452 fadd_inf_2:
11453         mov.b           SRC_EX(%a0),%d0         # exclusive or the signs
11454         mov.b           DST_EX(%a1),%d1
11455         eor.b           %d1,%d0
11456         bmi.l           res_operr               # weed out (-INF)+(+INF)
11457
11458 # ok, so it's not an OPERR. but, we do have to remember to return the
11459 # src INF since that's where the 881/882 gets the j-bit from...
11460
11461 #
11462 # operands are INF and one of {ZERO, INF, DENORM, NORM}
11463 #
11464 fadd_inf_src:
11465         fmovm.x         SRC(%a0),&0x80          # return src INF
11466         tst.b           SRC_EX(%a0)             # is INF positive?
11467         bpl.b           fadd_inf_done           # yes; we're done
11468         mov.b           &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11469         rts
11470
11471 #
11472 # operands are INF and one of {ZERO, INF, DENORM, NORM}
11473 #
11474 fadd_inf_dst:
11475         fmovm.x         DST(%a1),&0x80          # return dst INF
11476         tst.b           DST_EX(%a1)             # is INF positive?
11477         bpl.b           fadd_inf_done           # yes; we're done
11478         mov.b           &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11479         rts
11480
11481 fadd_inf_done:
11482         mov.b           &inf_bmask,FPSR_CC(%a6) # set INF
11483         rts
11484
11485 #########################################################################
11486 # XDEF **************************************************************** #
11487 #       fsub(): emulates the fsub instruction                           #
11488 #       fssub(): emulates the fssub instruction                         #
11489 #       fdsub(): emulates the fdsub instruction                         #
11490 #                                                                       #
11491 # XREF **************************************************************** #
11492 #       addsub_scaler2() - scale the operands so they won't take exc    #
11493 #       ovf_res() - return default overflow result                      #
11494 #       unf_res() - return default underflow result                     #
11495 #       res_qnan() - set QNAN result                                    #
11496 #       res_snan() - set SNAN result                                    #
11497 #       res_operr() - set OPERR result                                  #
11498 #       scale_to_zero_src() - set src operand exponent equal to zero    #
11499 #       scale_to_zero_dst() - set dst operand exponent equal to zero    #
11500 #                                                                       #
11501 # INPUT *************************************************************** #
11502 #       a0 = pointer to extended precision source operand               #
11503 #       a1 = pointer to extended precision destination operand          #
11504 #                                                                       #
11505 # OUTPUT ************************************************************** #
11506 #       fp0 = result                                                    #
11507 #       fp1 = EXOP (if exception occurred)                              #
11508 #                                                                       #
11509 # ALGORITHM *********************************************************** #
11510 #       Handle NANs, infinities, and zeroes as special cases. Divide    #
11511 # norms into extended, single, and double precision.                    #
11512 #       Do subtraction after scaling exponents such that exception won't#
11513 # occur. Then, check result exponent to see if exception would have     #
11514 # occurred. If so, return default result and maybe EXOP. Else, insert   #
11515 # the correct result exponent and return. Set FPSR bits as appropriate. #
11516 #                                                                       #
11517 #########################################################################
11518
11519         global          fssub
11520 fssub:
11521         andi.b          &0x30,%d0               # clear rnd prec
11522         ori.b           &s_mode*0x10,%d0        # insert sgl prec
11523         bra.b           fsub
11524
11525         global          fdsub
11526 fdsub:
11527         andi.b          &0x30,%d0               # clear rnd prec
11528         ori.b           &d_mode*0x10,%d0        # insert dbl prec
11529
11530         global          fsub
11531 fsub:
11532         mov.l           %d0,L_SCR3(%a6)         # store rnd info
11533
11534         clr.w           %d1
11535         mov.b           DTAG(%a6),%d1
11536         lsl.b           &0x3,%d1
11537         or.b            STAG(%a6),%d1           # combine src tags
11538
11539         bne.w           fsub_not_norm           # optimize on non-norm input
11540
11541 #
11542 # SUB: norms and denorms
11543 #
11544 fsub_norm:
11545         bsr.l           addsub_scaler2          # scale exponents
11546
11547 fsub_zero_entry:
11548         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
11549
11550         fmov.l          &0x0,%fpsr              # clear FPSR
11551         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
11552
11553         fsub.x          FP_SCR0(%a6),%fp0       # execute subtract
11554
11555         fmov.l          &0x0,%fpcr              # clear FPCR
11556         fmov.l          %fpsr,%d1               # fetch INEX2, N, Z
11557
11558         or.l            %d1,USER_FPSR(%a6)      # save exc and ccode bits
11559
11560         fbeq.w          fsub_zero_exit          # if result zero, end now
11561
11562         mov.l           %d2,-(%sp)              # save d2
11563
11564         fmovm.x         &0x01,-(%sp)            # save result to stack
11565
11566         mov.w           2+L_SCR3(%a6),%d1
11567         lsr.b           &0x6,%d1
11568
11569         mov.w           (%sp),%d2               # fetch new exponent
11570         andi.l          &0x7fff,%d2             # strip sign
11571         sub.l           %d0,%d2                 # add scale factor
11572
11573         cmp.l           %d2,(tbl_fsub_ovfl.b,%pc,%d1.w*4) # is it an overflow?
11574         bge.b           fsub_ovfl               # yes
11575
11576         cmp.l           %d2,(tbl_fsub_unfl.b,%pc,%d1.w*4) # is it an underflow?
11577         blt.w           fsub_unfl               # yes
11578         beq.w           fsub_may_unfl           # maybe; go find out
11579
11580 fsub_normal:
11581         mov.w           (%sp),%d1
11582         andi.w          &0x8000,%d1             # keep sign
11583         or.w            %d2,%d1                 # insert new exponent
11584         mov.w           %d1,(%sp)               # insert new exponent
11585
11586         fmovm.x         (%sp)+,&0x80            # return result in fp0
11587
11588         mov.l           (%sp)+,%d2              # restore d2
11589         rts
11590
11591 fsub_zero_exit:
11592 #       fmov.s          &0x00000000,%fp0        # return zero in fp0
11593         rts
11594
11595 tbl_fsub_ovfl:
11596         long            0x7fff                  # ext ovfl
11597         long            0x407f                  # sgl ovfl
11598         long            0x43ff                  # dbl ovfl
11599
11600 tbl_fsub_unfl:
11601         long            0x0000                  # ext unfl
11602         long            0x3f81                  # sgl unfl
11603         long            0x3c01                  # dbl unfl
11604
11605 fsub_ovfl:
11606         or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
11607
11608         mov.b           FPCR_ENABLE(%a6),%d1
11609         andi.b          &0x13,%d1               # is OVFL or INEX enabled?
11610         bne.b           fsub_ovfl_ena           # yes
11611
11612         add.l           &0xc,%sp
11613 fsub_ovfl_dis:
11614         btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
11615         sne             %d1                     # set sign param accordingly
11616         mov.l           L_SCR3(%a6),%d0         # pass prec:rnd
11617         bsr.l           ovf_res                 # calculate default result
11618         or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
11619         fmovm.x         (%a0),&0x80             # return default result in fp0
11620         mov.l           (%sp)+,%d2              # restore d2
11621         rts
11622
11623 fsub_ovfl_ena:
11624         mov.b           L_SCR3(%a6),%d1
11625         andi.b          &0xc0,%d1               # is precision extended?
11626         bne.b           fsub_ovfl_ena_sd        # no
11627
11628 fsub_ovfl_ena_cont:
11629         mov.w           (%sp),%d1               # fetch {sgn,exp}
11630         andi.w          &0x8000,%d1             # keep sign
11631         subi.l          &0x6000,%d2             # subtract new bias
11632         andi.w          &0x7fff,%d2             # clear top bit
11633         or.w            %d2,%d1                 # concat sign,exp
11634         mov.w           %d1,(%sp)               # insert new exponent
11635
11636         fmovm.x         (%sp)+,&0x40            # return EXOP in fp1
11637         bra.b           fsub_ovfl_dis
11638
11639 fsub_ovfl_ena_sd:
11640         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
11641
11642         mov.l           L_SCR3(%a6),%d1
11643         andi.b          &0x30,%d1               # clear rnd prec
11644         fmov.l          %d1,%fpcr               # set FPCR
11645
11646         fsub.x          FP_SCR0(%a6),%fp0       # execute subtract
11647
11648         fmov.l          &0x0,%fpcr              # clear FPCR
11649
11650         add.l           &0xc,%sp
11651         fmovm.x         &0x01,-(%sp)
11652         bra.b           fsub_ovfl_ena_cont
11653
11654 fsub_unfl:
11655         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
11656
11657         add.l           &0xc,%sp
11658
11659         fmovm.x         FP_SCR1(%a6),&0x80      # load dst op
11660
11661         fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
11662         fmov.l          &0x0,%fpsr              # clear FPSR
11663
11664         fsub.x          FP_SCR0(%a6),%fp0       # execute subtract
11665
11666         fmov.l          &0x0,%fpcr              # clear FPCR
11667         fmov.l          %fpsr,%d1               # save status
11668
11669         or.l            %d1,USER_FPSR(%a6)
11670
11671         mov.b           FPCR_ENABLE(%a6),%d1
11672         andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
11673         bne.b           fsub_unfl_ena           # yes
11674
11675 fsub_unfl_dis:
11676         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
11677
11678         lea             FP_SCR0(%a6),%a0        # pass: result addr
11679         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
11680         bsr.l           unf_res                 # calculate default result
11681         or.b            %d0,FPSR_CC(%a6)        # 'Z' may have been set
11682         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
11683         mov.l           (%sp)+,%d2              # restore d2
11684         rts
11685
11686 fsub_unfl_ena:
11687         fmovm.x         FP_SCR1(%a6),&0x40
11688
11689         mov.l           L_SCR3(%a6),%d1
11690         andi.b          &0xc0,%d1               # is precision extended?
11691         bne.b           fsub_unfl_ena_sd        # no
11692
11693         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
11694
11695 fsub_unfl_ena_cont:
11696         fmov.l          &0x0,%fpsr              # clear FPSR
11697
11698         fsub.x          FP_SCR0(%a6),%fp1       # execute subtract
11699
11700         fmov.l          &0x0,%fpcr              # clear FPCR
11701
11702         fmovm.x         &0x40,FP_SCR0(%a6)      # store result to stack
11703         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
11704         mov.l           %d1,%d2                 # make a copy
11705         andi.l          &0x7fff,%d1             # strip sign
11706         andi.w          &0x8000,%d2             # keep old sign
11707         sub.l           %d0,%d1                 # add scale factor
11708         addi.l          &0x6000,%d1             # subtract new bias
11709         andi.w          &0x7fff,%d1             # clear top bit
11710         or.w            %d2,%d1                 # concat sgn,exp
11711         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
11712         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
11713         bra.w           fsub_unfl_dis
11714
11715 fsub_unfl_ena_sd:
11716         mov.l           L_SCR3(%a6),%d1
11717         andi.b          &0x30,%d1               # clear rnd prec
11718         fmov.l          %d1,%fpcr               # set FPCR
11719
11720         bra.b           fsub_unfl_ena_cont
11721
11722 #
11723 # result is equal to the smallest normalized number in the selected precision
11724 # if the precision is extended, this result could not have come from an
11725 # underflow that rounded up.
11726 #
11727 fsub_may_unfl:
11728         mov.l           L_SCR3(%a6),%d1
11729         andi.b          &0xc0,%d1               # fetch rnd prec
11730         beq.w           fsub_normal             # yes; no underflow occurred
11731
11732         mov.l           0x4(%sp),%d1
11733         cmpi.l          %d1,&0x80000000         # is hi(man) = 0x80000000?
11734         bne.w           fsub_normal             # no; no underflow occurred
11735
11736         tst.l           0x8(%sp)                # is lo(man) = 0x0?
11737         bne.w           fsub_normal             # no; no underflow occurred
11738
11739         btst            &inex2_bit,FPSR_EXCEPT(%a6) # is INEX2 set?
11740         beq.w           fsub_normal             # no; no underflow occurred
11741
11742 #
11743 # ok, so now the result has a exponent equal to the smallest normalized
11744 # exponent for the selected precision. also, the mantissa is equal to
11745 # 0x8000000000000000 and this mantissa is the result of rounding non-zero
11746 # g,r,s.
11747 # now, we must determine whether the pre-rounded result was an underflow
11748 # rounded "up" or a normalized number rounded "down".
11749 # so, we do this be re-executing the add using RZ as the rounding mode and
11750 # seeing if the new result is smaller or equal to the current result.
11751 #
11752         fmovm.x         FP_SCR1(%a6),&0x40      # load dst op into fp1
11753
11754         mov.l           L_SCR3(%a6),%d1
11755         andi.b          &0xc0,%d1               # keep rnd prec
11756         ori.b           &rz_mode*0x10,%d1       # insert rnd mode
11757         fmov.l          %d1,%fpcr               # set FPCR
11758         fmov.l          &0x0,%fpsr              # clear FPSR
11759
11760         fsub.x          FP_SCR0(%a6),%fp1       # execute subtract
11761
11762         fmov.l          &0x0,%fpcr              # clear FPCR
11763
11764         fabs.x          %fp0                    # compare absolute values
11765         fabs.x          %fp1
11766         fcmp.x          %fp0,%fp1               # is first result > second?
11767
11768         fbgt.w          fsub_unfl               # yes; it's an underflow
11769         bra.w           fsub_normal             # no; it's not an underflow
11770
11771 ##########################################################################
11772
11773 #
11774 # Sub: inputs are not both normalized; what are they?
11775 #
11776 fsub_not_norm:
11777         mov.w           (tbl_fsub_op.b,%pc,%d1.w*2),%d1
11778         jmp             (tbl_fsub_op.b,%pc,%d1.w*1)
11779
11780         swbeg           &48
11781 tbl_fsub_op:
11782         short           fsub_norm       - tbl_fsub_op # NORM - NORM
11783         short           fsub_zero_src   - tbl_fsub_op # NORM - ZERO
11784         short           fsub_inf_src    - tbl_fsub_op # NORM - INF
11785         short           fsub_res_qnan   - tbl_fsub_op # NORM - QNAN
11786         short           fsub_norm       - tbl_fsub_op # NORM - DENORM
11787         short           fsub_res_snan   - tbl_fsub_op # NORM - SNAN
11788         short           tbl_fsub_op     - tbl_fsub_op #
11789         short           tbl_fsub_op     - tbl_fsub_op #
11790
11791         short           fsub_zero_dst   - tbl_fsub_op # ZERO - NORM
11792         short           fsub_zero_2     - tbl_fsub_op # ZERO - ZERO
11793         short           fsub_inf_src    - tbl_fsub_op # ZERO - INF
11794         short           fsub_res_qnan   - tbl_fsub_op # NORM - QNAN
11795         short           fsub_zero_dst   - tbl_fsub_op # ZERO - DENORM
11796         short           fsub_res_snan   - tbl_fsub_op # NORM - SNAN
11797         short           tbl_fsub_op     - tbl_fsub_op #
11798         short           tbl_fsub_op     - tbl_fsub_op #
11799
11800         short           fsub_inf_dst    - tbl_fsub_op # INF - NORM
11801         short           fsub_inf_dst    - tbl_fsub_op # INF - ZERO
11802         short           fsub_inf_2      - tbl_fsub_op # INF - INF
11803         short           fsub_res_qnan   - tbl_fsub_op # NORM - QNAN
11804         short           fsub_inf_dst    - tbl_fsub_op # INF - DENORM
11805         short           fsub_res_snan   - tbl_fsub_op # NORM - SNAN
11806         short           tbl_fsub_op     - tbl_fsub_op #
11807         short           tbl_fsub_op     - tbl_fsub_op #
11808
11809         short           fsub_res_qnan   - tbl_fsub_op # QNAN - NORM
11810         short           fsub_res_qnan   - tbl_fsub_op # QNAN - ZERO
11811         short           fsub_res_qnan   - tbl_fsub_op # QNAN - INF
11812         short           fsub_res_qnan   - tbl_fsub_op # QNAN - QNAN
11813         short           fsub_res_qnan   - tbl_fsub_op # QNAN - DENORM
11814         short           fsub_res_snan   - tbl_fsub_op # QNAN - SNAN
11815         short           tbl_fsub_op     - tbl_fsub_op #
11816         short           tbl_fsub_op     - tbl_fsub_op #
11817
11818         short           fsub_norm       - tbl_fsub_op # DENORM - NORM
11819         short           fsub_zero_src   - tbl_fsub_op # DENORM - ZERO
11820         short           fsub_inf_src    - tbl_fsub_op # DENORM - INF
11821         short           fsub_res_qnan   - tbl_fsub_op # NORM - QNAN
11822         short           fsub_norm       - tbl_fsub_op # DENORM - DENORM
11823         short           fsub_res_snan   - tbl_fsub_op # NORM - SNAN
11824         short           tbl_fsub_op     - tbl_fsub_op #
11825         short           tbl_fsub_op     - tbl_fsub_op #
11826
11827         short           fsub_res_snan   - tbl_fsub_op # SNAN - NORM
11828         short           fsub_res_snan   - tbl_fsub_op # SNAN - ZERO
11829         short           fsub_res_snan   - tbl_fsub_op # SNAN - INF
11830         short           fsub_res_snan   - tbl_fsub_op # SNAN - QNAN
11831         short           fsub_res_snan   - tbl_fsub_op # SNAN - DENORM
11832         short           fsub_res_snan   - tbl_fsub_op # SNAN - SNAN
11833         short           tbl_fsub_op     - tbl_fsub_op #
11834         short           tbl_fsub_op     - tbl_fsub_op #
11835
11836 fsub_res_qnan:
11837         bra.l           res_qnan
11838 fsub_res_snan:
11839         bra.l           res_snan
11840
11841 #
11842 # both operands are ZEROes
11843 #
11844 fsub_zero_2:
11845         mov.b           SRC_EX(%a0),%d0
11846         mov.b           DST_EX(%a1),%d1
11847         eor.b           %d1,%d0
11848         bpl.b           fsub_zero_2_chk_rm
11849
11850 # the signs are opposite, so, return a ZERO w/ the sign of the dst ZERO
11851         tst.b           %d0                     # is dst negative?
11852         bmi.b           fsub_zero_2_rm          # yes
11853         fmov.s          &0x00000000,%fp0        # no; return +ZERO
11854         mov.b           &z_bmask,FPSR_CC(%a6)   # set Z
11855         rts
11856
11857 #
11858 # the ZEROes have the same signs:
11859 # - Therefore, we return +ZERO if the rounding mode is RN,RZ, or RP
11860 # - -ZERO is returned in the case of RM.
11861 #
11862 fsub_zero_2_chk_rm:
11863         mov.b           3+L_SCR3(%a6),%d1
11864         andi.b          &0x30,%d1               # extract rnd mode
11865         cmpi.b          %d1,&rm_mode*0x10       # is rnd mode = RM?
11866         beq.b           fsub_zero_2_rm          # yes
11867         fmov.s          &0x00000000,%fp0        # no; return +ZERO
11868         mov.b           &z_bmask,FPSR_CC(%a6)   # set Z
11869         rts
11870
11871 fsub_zero_2_rm:
11872         fmov.s          &0x80000000,%fp0        # return -ZERO
11873         mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set Z/NEG
11874         rts
11875
11876 #
11877 # one operand is a ZERO and the other is a DENORM or a NORM.
11878 # scale the DENORM or NORM and jump to the regular fsub routine.
11879 #
11880 fsub_zero_dst:
11881         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
11882         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
11883         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
11884         bsr.l           scale_to_zero_src       # scale the operand
11885         clr.w           FP_SCR1_EX(%a6)
11886         clr.l           FP_SCR1_HI(%a6)
11887         clr.l           FP_SCR1_LO(%a6)
11888         bra.w           fsub_zero_entry         # go execute fsub
11889
11890 fsub_zero_src:
11891         mov.w           DST_EX(%a1),FP_SCR1_EX(%a6)
11892         mov.l           DST_HI(%a1),FP_SCR1_HI(%a6)
11893         mov.l           DST_LO(%a1),FP_SCR1_LO(%a6)
11894         bsr.l           scale_to_zero_dst       # scale the operand
11895         clr.w           FP_SCR0_EX(%a6)
11896         clr.l           FP_SCR0_HI(%a6)
11897         clr.l           FP_SCR0_LO(%a6)
11898         bra.w           fsub_zero_entry         # go execute fsub
11899
11900 #
11901 # both operands are INFs. an OPERR will result if the INFs have the
11902 # same signs. else,
11903 #
11904 fsub_inf_2:
11905         mov.b           SRC_EX(%a0),%d0         # exclusive or the signs
11906         mov.b           DST_EX(%a1),%d1
11907         eor.b           %d1,%d0
11908         bpl.l           res_operr               # weed out (-INF)+(+INF)
11909
11910 # ok, so it's not an OPERR. but we do have to remember to return
11911 # the src INF since that's where the 881/882 gets the j-bit.
11912
11913 fsub_inf_src:
11914         fmovm.x         SRC(%a0),&0x80          # return src INF
11915         fneg.x          %fp0                    # invert sign
11916         fbge.w          fsub_inf_done           # sign is now positive
11917         mov.b           &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11918         rts
11919
11920 fsub_inf_dst:
11921         fmovm.x         DST(%a1),&0x80          # return dst INF
11922         tst.b           DST_EX(%a1)             # is INF negative?
11923         bpl.b           fsub_inf_done           # no
11924         mov.b           &neg_bmask+inf_bmask,FPSR_CC(%a6) # set INF/NEG
11925         rts
11926
11927 fsub_inf_done:
11928         mov.b           &inf_bmask,FPSR_CC(%a6) # set INF
11929         rts
11930
11931 #########################################################################
11932 # XDEF **************************************************************** #
11933 #       fsqrt(): emulates the fsqrt instruction                         #
11934 #       fssqrt(): emulates the fssqrt instruction                       #
11935 #       fdsqrt(): emulates the fdsqrt instruction                       #
11936 #                                                                       #
11937 # XREF **************************************************************** #
11938 #       scale_sqrt() - scale the source operand                         #
11939 #       unf_res() - return default underflow result                     #
11940 #       ovf_res() - return default overflow result                      #
11941 #       res_qnan_1op() - return QNAN result                             #
11942 #       res_snan_1op() - return SNAN result                             #
11943 #                                                                       #
11944 # INPUT *************************************************************** #
11945 #       a0 = pointer to extended precision source operand               #
11946 #       d0  rnd prec,mode                                               #
11947 #                                                                       #
11948 # OUTPUT ************************************************************** #
11949 #       fp0 = result                                                    #
11950 #       fp1 = EXOP (if exception occurred)                              #
11951 #                                                                       #
11952 # ALGORITHM *********************************************************** #
11953 #       Handle NANs, infinities, and zeroes as special cases. Divide    #
11954 # norms/denorms into ext/sgl/dbl precision.                             #
11955 #       For norms/denorms, scale the exponents such that a sqrt         #
11956 # instruction won't cause an exception. Use the regular fsqrt to        #
11957 # compute a result. Check if the regular operands would have taken      #
11958 # an exception. If so, return the default overflow/underflow result     #
11959 # and return the EXOP if exceptions are enabled. Else, scale the        #
11960 # result operand to the proper exponent.                                #
11961 #                                                                       #
11962 #########################################################################
11963
11964         global          fssqrt
11965 fssqrt:
11966         andi.b          &0x30,%d0               # clear rnd prec
11967         ori.b           &s_mode*0x10,%d0        # insert sgl precision
11968         bra.b           fsqrt
11969
11970         global          fdsqrt
11971 fdsqrt:
11972         andi.b          &0x30,%d0               # clear rnd prec
11973         ori.b           &d_mode*0x10,%d0        # insert dbl precision
11974
11975         global          fsqrt
11976 fsqrt:
11977         mov.l           %d0,L_SCR3(%a6)         # store rnd info
11978         clr.w           %d1
11979         mov.b           STAG(%a6),%d1
11980         bne.w           fsqrt_not_norm          # optimize on non-norm input
11981
11982 #
11983 # SQUARE ROOT: norms and denorms ONLY!
11984 #
11985 fsqrt_norm:
11986         tst.b           SRC_EX(%a0)             # is operand negative?
11987         bmi.l           res_operr               # yes
11988
11989         andi.b          &0xc0,%d0               # is precision extended?
11990         bne.b           fsqrt_not_ext           # no; go handle sgl or dbl
11991
11992         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
11993         fmov.l          &0x0,%fpsr              # clear FPSR
11994
11995         fsqrt.x         (%a0),%fp0              # execute square root
11996
11997         fmov.l          %fpsr,%d1
11998         or.l            %d1,USER_FPSR(%a6)      # set N,INEX
11999
12000         rts
12001
12002 fsqrt_denorm:
12003         tst.b           SRC_EX(%a0)             # is operand negative?
12004         bmi.l           res_operr               # yes
12005
12006         andi.b          &0xc0,%d0               # is precision extended?
12007         bne.b           fsqrt_not_ext           # no; go handle sgl or dbl
12008
12009         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
12010         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
12011         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
12012
12013         bsr.l           scale_sqrt              # calculate scale factor
12014
12015         bra.w           fsqrt_sd_normal
12016
12017 #
12018 # operand is either single or double
12019 #
12020 fsqrt_not_ext:
12021         cmpi.b          %d0,&s_mode*0x10        # separate sgl/dbl prec
12022         bne.w           fsqrt_dbl
12023
12024 #
12025 # operand is to be rounded to single precision
12026 #
12027 fsqrt_sgl:
12028         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
12029         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
12030         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
12031
12032         bsr.l           scale_sqrt              # calculate scale factor
12033
12034         cmpi.l          %d0,&0x3fff-0x3f81      # will move in underflow?
12035         beq.w           fsqrt_sd_may_unfl
12036         bgt.w           fsqrt_sd_unfl           # yes; go handle underflow
12037         cmpi.l          %d0,&0x3fff-0x407f      # will move in overflow?
12038         beq.w           fsqrt_sd_may_ovfl       # maybe; go check
12039         blt.w           fsqrt_sd_ovfl           # yes; go handle overflow
12040
12041 #
12042 # operand will NOT overflow or underflow when moved in to the fp reg file
12043 #
12044 fsqrt_sd_normal:
12045         fmov.l          &0x0,%fpsr              # clear FPSR
12046         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
12047
12048         fsqrt.x         FP_SCR0(%a6),%fp0       # perform absolute
12049
12050         fmov.l          %fpsr,%d1               # save FPSR
12051         fmov.l          &0x0,%fpcr              # clear FPCR
12052
12053         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
12054
12055 fsqrt_sd_normal_exit:
12056         mov.l           %d2,-(%sp)              # save d2
12057         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
12058         mov.w           FP_SCR0_EX(%a6),%d1     # load sgn,exp
12059         mov.l           %d1,%d2                 # make a copy
12060         andi.l          &0x7fff,%d1             # strip sign
12061         sub.l           %d0,%d1                 # add scale factor
12062         andi.w          &0x8000,%d2             # keep old sign
12063         or.w            %d1,%d2                 # concat old sign,new exp
12064         mov.w           %d2,FP_SCR0_EX(%a6)     # insert new exponent
12065         mov.l           (%sp)+,%d2              # restore d2
12066         fmovm.x         FP_SCR0(%a6),&0x80      # return result in fp0
12067         rts
12068
12069 #
12070 # operand is to be rounded to double precision
12071 #
12072 fsqrt_dbl:
12073         mov.w           SRC_EX(%a0),FP_SCR0_EX(%a6)
12074         mov.l           SRC_HI(%a0),FP_SCR0_HI(%a6)
12075         mov.l           SRC_LO(%a0),FP_SCR0_LO(%a6)
12076
12077         bsr.l           scale_sqrt              # calculate scale factor
12078
12079         cmpi.l          %d0,&0x3fff-0x3c01      # will move in underflow?
12080         beq.w           fsqrt_sd_may_unfl
12081         bgt.b           fsqrt_sd_unfl           # yes; go handle underflow
12082         cmpi.l          %d0,&0x3fff-0x43ff      # will move in overflow?
12083         beq.w           fsqrt_sd_may_ovfl       # maybe; go check
12084         blt.w           fsqrt_sd_ovfl           # yes; go handle overflow
12085         bra.w           fsqrt_sd_normal         # no; ho handle normalized op
12086
12087 # we're on the line here and the distinguising characteristic is whether
12088 # the exponent is 3fff or 3ffe. if it's 3ffe, then it's a safe number
12089 # elsewise fall through to underflow.
12090 fsqrt_sd_may_unfl:
12091         btst            &0x0,1+FP_SCR0_EX(%a6)  # is exponent 0x3fff?
12092         bne.w           fsqrt_sd_normal         # yes, so no underflow
12093
12094 #
12095 # operand WILL underflow when moved in to the fp register file
12096 #
12097 fsqrt_sd_unfl:
12098         bset            &unfl_bit,FPSR_EXCEPT(%a6) # set unfl exc bit
12099
12100         fmov.l          &rz_mode*0x10,%fpcr     # set FPCR
12101         fmov.l          &0x0,%fpsr              # clear FPSR
12102
12103         fsqrt.x         FP_SCR0(%a6),%fp0       # execute square root
12104
12105         fmov.l          %fpsr,%d1               # save status
12106         fmov.l          &0x0,%fpcr              # clear FPCR
12107
12108         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
12109
12110 # if underflow or inexact is enabled, go calculate EXOP first.
12111         mov.b           FPCR_ENABLE(%a6),%d1
12112         andi.b          &0x0b,%d1               # is UNFL or INEX enabled?
12113         bne.b           fsqrt_sd_unfl_ena       # yes
12114
12115 fsqrt_sd_unfl_dis:
12116         fmovm.x         &0x80,FP_SCR0(%a6)      # store out result
12117
12118         lea             FP_SCR0(%a6),%a0        # pass: result addr
12119         mov.l           L_SCR3(%a6),%d1         # pass: rnd prec,mode
12120         bsr.l           unf_res                 # calculate default result
12121         or.b            %d0,FPSR_CC(%a6)        # set possible 'Z' ccode
12122         fmovm.x         FP_SCR0(%a6),&0x80      # return default result in fp0
12123         rts
12124
12125 #
12126 # operand will underflow AND underflow is enabled.
12127 # Therefore, we must return the result rounded to extended precision.
12128 #
12129 fsqrt_sd_unfl_ena:
12130         mov.l           FP_SCR0_HI(%a6),FP_SCR1_HI(%a6)
12131         mov.l           FP_SCR0_LO(%a6),FP_SCR1_LO(%a6)
12132         mov.w           FP_SCR0_EX(%a6),%d1     # load current exponent
12133
12134         mov.l           %d2,-(%sp)              # save d2
12135         mov.l           %d1,%d2                 # make a copy
12136         andi.l          &0x7fff,%d1             # strip sign
12137         andi.w          &0x8000,%d2             # keep old sign
12138         sub.l           %d0,%d1                 # subtract scale factor
12139         addi.l          &0x6000,%d1             # add new bias
12140         andi.w          &0x7fff,%d1
12141         or.w            %d2,%d1                 # concat new sign,new exp
12142         mov.w           %d1,FP_SCR1_EX(%a6)     # insert new exp
12143         fmovm.x         FP_SCR1(%a6),&0x40      # return EXOP in fp1
12144         mov.l           (%sp)+,%d2              # restore d2
12145         bra.b           fsqrt_sd_unfl_dis
12146
12147 #
12148 # operand WILL overflow.
12149 #
12150 fsqrt_sd_ovfl:
12151         fmov.l          &0x0,%fpsr              # clear FPSR
12152         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
12153
12154         fsqrt.x         FP_SCR0(%a6),%fp0       # perform square root
12155
12156         fmov.l          &0x0,%fpcr              # clear FPCR
12157         fmov.l          %fpsr,%d1               # save FPSR
12158
12159         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
12160
12161 fsqrt_sd_ovfl_tst:
12162         or.l            &ovfl_inx_mask,USER_FPSR(%a6) # set ovfl/aovfl/ainex
12163
12164         mov.b           FPCR_ENABLE(%a6),%d1
12165         andi.b          &0x13,%d1               # is OVFL or INEX enabled?
12166         bne.b           fsqrt_sd_ovfl_ena       # yes
12167
12168 #
12169 # OVFL is not enabled; therefore, we must create the default result by
12170 # calling ovf_res().
12171 #
12172 fsqrt_sd_ovfl_dis:
12173         btst            &neg_bit,FPSR_CC(%a6)   # is result negative?
12174         sne             %d1                     # set sign param accordingly
12175         mov.l           L_SCR3(%a6),%d0         # pass: prec,mode
12176         bsr.l           ovf_res                 # calculate default result
12177         or.b            %d0,FPSR_CC(%a6)        # set INF,N if applicable
12178         fmovm.x         (%a0),&0x80             # return default result in fp0
12179         rts
12180
12181 #
12182 # OVFL is enabled.
12183 # the INEX2 bit has already been updated by the round to the correct precision.
12184 # now, round to extended(and don't alter the FPSR).
12185 #
12186 fsqrt_sd_ovfl_ena:
12187         mov.l           %d2,-(%sp)              # save d2
12188         mov.w           FP_SCR0_EX(%a6),%d1     # fetch {sgn,exp}
12189         mov.l           %d1,%d2                 # make a copy
12190         andi.l          &0x7fff,%d1             # strip sign
12191         andi.w          &0x8000,%d2             # keep old sign
12192         sub.l           %d0,%d1                 # add scale factor
12193         subi.l          &0x6000,%d1             # subtract bias
12194         andi.w          &0x7fff,%d1
12195         or.w            %d2,%d1                 # concat sign,exp
12196         mov.w           %d1,FP_SCR0_EX(%a6)     # insert new exponent
12197         fmovm.x         FP_SCR0(%a6),&0x40      # return EXOP in fp1
12198         mov.l           (%sp)+,%d2              # restore d2
12199         bra.b           fsqrt_sd_ovfl_dis
12200
12201 #
12202 # the move in MAY underflow. so...
12203 #
12204 fsqrt_sd_may_ovfl:
12205         btst            &0x0,1+FP_SCR0_EX(%a6)  # is exponent 0x3fff?
12206         bne.w           fsqrt_sd_ovfl           # yes, so overflow
12207
12208         fmov.l          &0x0,%fpsr              # clear FPSR
12209         fmov.l          L_SCR3(%a6),%fpcr       # set FPCR
12210
12211         fsqrt.x         FP_SCR0(%a6),%fp0       # perform absolute
12212
12213         fmov.l          %fpsr,%d1               # save status
12214         fmov.l          &0x0,%fpcr              # clear FPCR
12215
12216         or.l            %d1,USER_FPSR(%a6)      # save INEX2,N
12217
12218         fmov.x          %fp0,%fp1               # make a copy of result
12219         fcmp.b          %fp1,&0x1               # is |result| >= 1.b?
12220         fbge.w          fsqrt_sd_ovfl_tst       # yes; overflow has occurred
12221
12222 # no, it didn't overflow; we have correct result
12223         bra.w           fsqrt_sd_normal_exit
12224
12225 ##########################################################################
12226
12227 #
12228 # input is not normalized; what is it?
12229 #
12230 fsqrt_not_norm:
12231         cmpi.b          %d1,&DENORM             # weed out DENORM
12232         beq.w           fsqrt_denorm
12233         cmpi.b          %d1,&ZERO               # weed out ZERO
12234         beq.b           fsqrt_zero
12235         cmpi.b          %d1,&INF                # weed out INF
12236         beq.b           fsqrt_inf
12237         cmpi.b          %d1,&SNAN               # weed out SNAN
12238         beq.l           res_snan_1op
12239         bra.l           res_qnan_1op
12240
12241 #
12242 #       fsqrt(+0) = +0
12243 #       fsqrt(-0) = -0
12244 #       fsqrt(+INF) = +INF
12245 #       fsqrt(-INF) = OPERR
12246 #
12247 fsqrt_zero:
12248         tst.b           SRC_EX(%a0)             # is ZERO positive or negative?
12249         bmi.b           fsqrt_zero_m            # negative
12250 fsqrt_zero_p:
12251         fmov.s          &0x00000000,%fp0        # return +ZERO
12252         mov.b           &z_bmask,FPSR_CC(%a6)   # set 'Z' ccode bit
12253         rts
12254 fsqrt_zero_m:
12255         fmov.s          &0x80000000,%fp0        # return -ZERO
12256         mov.b           &z_bmask+neg_bmask,FPSR_CC(%a6) # set 'Z','N' ccode bits
12257         rts
12258
12259 fsqrt_inf:
12260         tst.b           SRC_EX(%a0)             # is INF positive or negative?
12261         bmi.l           res_operr               # negative
12262 fsqrt_inf_p:
12263         fmovm.x         SRC(%a0),&0x80          # return +INF in fp0
12264         mov.b           &inf_bmask,FPSR_CC(%a6) # set 'I' ccode bit
12265         rts
12266
12267 #########################################################################
12268 # XDEF **************************************************************** #
12269 #       fetch_dreg(): fetch register according to index in d1           #
12270 #                                                                       #
12271 # XREF **************************************************************** #
12272 #       None                                                            #
12273 #                                                                       #
12274 # INPUT *************************************************************** #
12275 #       d1 = index of register to fetch from                            #
12276 #                                                                       #
12277 # OUTPUT ************************************************************** #
12278 #       d0 = value of register fetched                                  #
12279 #                                                                       #
12280 # ALGORITHM *********************************************************** #
12281 #       According to the index value in d1 which can range from zero    #
12282 # to fifteen, load the corresponding register file value (where         #
12283 # address register indexes start at 8). D0/D1/A0/A1/A6/A7 are on the    #
12284 # stack. The rest should still be in their original places.             #
12285 #                                                                       #
12286 #########################################################################
12287
12288 # this routine leaves d1 intact for subsequent store_dreg calls.
12289         global          fetch_dreg
12290 fetch_dreg:
12291         mov.w           (tbl_fdreg.b,%pc,%d1.w*2),%d0
12292         jmp             (tbl_fdreg.b,%pc,%d0.w*1)
12293
12294 tbl_fdreg:
12295         short           fdreg0 - tbl_fdreg
12296         short           fdreg1 - tbl_fdreg
12297         short           fdreg2 - tbl_fdreg
12298         short           fdreg3 - tbl_fdreg
12299         short           fdreg4 - tbl_fdreg
12300         short           fdreg5 - tbl_fdreg
12301         short           fdreg6 - tbl_fdreg
12302         short           fdreg7 - tbl_fdreg
12303         short           fdreg8 - tbl_fdreg
12304         short           fdreg9 - tbl_fdreg
12305         short           fdrega - tbl_fdreg
12306         short           fdregb - tbl_fdreg
12307         short           fdregc - tbl_fdreg
12308         short           fdregd - tbl_fdreg
12309         short           fdrege - tbl_fdreg
12310         short           fdregf - tbl_fdreg
12311
12312 fdreg0:
12313         mov.l           EXC_DREGS+0x0(%a6),%d0
12314         rts
12315 fdreg1:
12316         mov.l           EXC_DREGS+0x4(%a6),%d0
12317         rts
12318 fdreg2:
12319         mov.l           %d2,%d0
12320         rts
12321 fdreg3:
12322         mov.l           %d3,%d0
12323         rts
12324 fdreg4:
12325         mov.l           %d4,%d0
12326         rts
12327 fdreg5:
12328         mov.l           %d5,%d0
12329         rts
12330 fdreg6:
12331         mov.l           %d6,%d0
12332         rts
12333 fdreg7:
12334         mov.l           %d7,%d0
12335         rts
12336 fdreg8:
12337         mov.l           EXC_DREGS+0x8(%a6),%d0
12338         rts
12339 fdreg9:
12340         mov.l           EXC_DREGS+0xc(%a6),%d0
12341         rts
12342 fdrega:
12343         mov.l           %a2,%d0
12344         rts
12345 fdregb:
12346         mov.l           %a3,%d0
12347         rts
12348 fdregc:
12349         mov.l           %a4,%d0
12350         rts
12351 fdregd:
12352         mov.l           %a5,%d0
12353         rts
12354 fdrege:
12355         mov.l           (%a6),%d0
12356         rts
12357 fdregf:
12358         mov.l           EXC_A7(%a6),%d0
12359         rts
12360
12361 #########################################################################
12362 # XDEF **************************************************************** #
12363 #       store_dreg_l(): store longword to data register specified by d1 #
12364 #                                                                       #
12365 # XREF **************************************************************** #
12366 #       None                                                            #
12367 #                                                                       #
12368 # INPUT *************************************************************** #
12369 #       d0 = longowrd value to store                                    #
12370 #       d1 = index of register to fetch from                            #
12371 #                                                                       #
12372 # OUTPUT ************************************************************** #
12373 #       (data register is updated)                                      #
12374 #                                                                       #
12375 # ALGORITHM *********************************************************** #
12376 #       According to the index value in d1, store the longword value    #
12377 # in d0 to the corresponding data register. D0/D1 are on the stack      #
12378 # while the rest are in their initial places.                           #
12379 #                                                                       #
12380 #########################################################################
12381
12382         global          store_dreg_l
12383 store_dreg_l:
12384         mov.w           (tbl_sdregl.b,%pc,%d1.w*2),%d1
12385         jmp             (tbl_sdregl.b,%pc,%d1.w*1)
12386
12387 tbl_sdregl:
12388         short           sdregl0 - tbl_sdregl
12389         short           sdregl1 - tbl_sdregl
12390         short           sdregl2 - tbl_sdregl
12391         short           sdregl3 - tbl_sdregl
12392         short           sdregl4 - tbl_sdregl
12393         short           sdregl5 - tbl_sdregl
12394         short           sdregl6 - tbl_sdregl
12395         short           sdregl7 - tbl_sdregl
12396
12397 sdregl0:
12398         mov.l           %d0,EXC_DREGS+0x0(%a6)
12399         rts
12400 sdregl1:
12401         mov.l           %d0,EXC_DREGS+0x4(%a6)
12402         rts
12403 sdregl2:
12404         mov.l           %d0,%d2
12405         rts
12406 sdregl3:
12407         mov.l           %d0,%d3
12408         rts
12409 sdregl4:
12410         mov.l           %d0,%d4
12411         rts
12412 sdregl5:
12413         mov.l           %d0,%d5
12414         rts
12415 sdregl6:
12416         mov.l           %d0,%d6
12417         rts
12418 sdregl7:
12419         mov.l           %d0,%d7
12420         rts
12421
12422 #########################################################################
12423 # XDEF **************************************************************** #
12424 #       store_dreg_w(): store word to data register specified by d1     #
12425 #                                                                       #
12426 # XREF **************************************************************** #
12427 #       None                                                            #
12428 #                                                                       #
12429 # INPUT *************************************************************** #
12430 #       d0 = word value to store                                        #
12431 #       d1 = index of register to fetch from                            #
12432 #                                                                       #
12433 # OUTPUT ************************************************************** #
12434 #       (data register is updated)                                      #
12435 #                                                                       #
12436 # ALGORITHM *********************************************************** #
12437 #       According to the index value in d1, store the word value        #
12438 # in d0 to the corresponding data register. D0/D1 are on the stack      #
12439 # while the rest are in their initial places.                           #
12440 #                                                                       #
12441 #########################################################################
12442
12443         global          store_dreg_w
12444 store_dreg_w:
12445         mov.w           (tbl_sdregw.b,%pc,%d1.w*2),%d1
12446         jmp             (tbl_sdregw.b,%pc,%d1.w*1)
12447
12448 tbl_sdregw:
12449         short           sdregw0 - tbl_sdregw
12450         short           sdregw1 - tbl_sdregw
12451         short           sdregw2 - tbl_sdregw
12452         short           sdregw3 - tbl_sdregw
12453         short           sdregw4 - tbl_sdregw
12454         short           sdregw5 - tbl_sdregw
12455         short           sdregw6 - tbl_sdregw
12456         short           sdregw7 - tbl_sdregw
12457
12458 sdregw0:
12459         mov.w           %d0,2+EXC_DREGS+0x0(%a6)
12460         rts
12461 sdregw1:
12462         mov.w           %d0,2+EXC_DREGS+0x4(%a6)
12463         rts
12464 sdregw2:
12465         mov.w           %d0,%d2
12466         rts
12467 sdregw3:
12468         mov.w           %d0,%d3
12469         rts
12470 sdregw4:
12471         mov.w           %d0,%d4
12472         rts
12473 sdregw5:
12474         mov.w           %d0,%d5
12475         rts
12476 sdregw6:
12477         mov.w           %d0,%d6
12478         rts
12479 sdregw7:
12480         mov.w           %d0,%d7
12481         rts
12482
12483 #########################################################################
12484 # XDEF **************************************************************** #
12485 #       store_dreg_b(): store byte to data register specified by d1     #
12486 #                                                                       #
12487 # XREF **************************************************************** #
12488 #       None                                                            #
12489 #                                                                       #
12490 # INPUT *************************************************************** #
12491 #       d0 = byte value to store                                        #
12492 #       d1 = index of register to fetch from                            #
12493 #                                                                       #
12494 # OUTPUT ************************************************************** #
12495 #       (data register is updated)                                      #
12496 #                                                                       #
12497 # ALGORITHM *********************************************************** #
12498 #       According to the index value in d1, store the byte value        #
12499 # in d0 to the corresponding data register. D0/D1 are on the stack      #
12500 # while the rest are in their initial places.                           #
12501 #                                                                       #
12502 #########################################################################
12503
12504         global          store_dreg_b
12505 store_dreg_b:
12506         mov.w           (tbl_sdregb.b,%pc,%d1.w*2),%d1
12507         jmp             (tbl_sdregb.b,%pc,%d1.w*1)
12508
12509 tbl_sdregb:
12510         short           sdregb0 - tbl_sdregb
12511         short           sdregb1 - tbl_sdregb
12512         short           sdregb2 - tbl_sdregb
12513         short           sdregb3 - tbl_sdregb
12514         short           sdregb4 - tbl_sdregb
12515         short           sdregb5 - tbl_sdregb
12516         short           sdregb6 - tbl_sdregb
12517         short           sdregb7 - tbl_sdregb
12518
12519 sdregb0:
12520         mov.b           %d0,3+EXC_DREGS+0x0(%a6)
12521         rts
12522 sdregb1:
12523         mov.b           %d0,3+EXC_DREGS+0x4(%a6)
12524         rts
12525 sdregb2:
12526         mov.b           %d0,%d2
12527         rts
12528 sdregb3:
12529         mov.b           %d0,%d3
12530         rts
12531 sdregb4:
12532         mov.b           %d0,%d4
12533         rts
12534 sdregb5:
12535         mov.b           %d0,%d5
12536         rts
12537 sdregb6:
12538         mov.b           %d0,%d6
12539         rts
12540 sdregb7:
12541         mov.b           %d0,%d7
12542         rts
12543
12544 #########################################################################
12545 # XDEF **************************************************************** #
12546 #       inc_areg(): increment an address register by the value in d0    #
12547 #                                                                       #
12548 # XREF **************************************************************** #
12549 #       None                                                            #
12550 #                                                                       #
12551 # INPUT *************************************************************** #
12552 #       d0 = amount to increment by                                     #
12553 #       d1 = index of address register to increment                     #
12554 #                                                                       #
12555 # OUTPUT ************************************************************** #
12556 #       (address register is updated)                                   #
12557 #                                                                       #
12558 # ALGORITHM *********************************************************** #
12559 #       Typically used for an instruction w/ a post-increment <ea>,     #
12560 # this routine adds the increment value in d0 to the address register   #
12561 # specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside     #
12562 # in their original places.                                             #
12563 #       For a7, if the increment amount is one, then we have to         #
12564 # increment by two. For any a7 update, set the mia7_flag so that if     #
12565 # an access error exception occurs later in emulation, this address     #
12566 # register update can be undone.                                        #
12567 #                                                                       #
12568 #########################################################################
12569
12570         global          inc_areg
12571 inc_areg:
12572         mov.w           (tbl_iareg.b,%pc,%d1.w*2),%d1
12573         jmp             (tbl_iareg.b,%pc,%d1.w*1)
12574
12575 tbl_iareg:
12576         short           iareg0 - tbl_iareg
12577         short           iareg1 - tbl_iareg
12578         short           iareg2 - tbl_iareg
12579         short           iareg3 - tbl_iareg
12580         short           iareg4 - tbl_iareg
12581         short           iareg5 - tbl_iareg
12582         short           iareg6 - tbl_iareg
12583         short           iareg7 - tbl_iareg
12584
12585 iareg0: add.l           %d0,EXC_DREGS+0x8(%a6)
12586         rts
12587 iareg1: add.l           %d0,EXC_DREGS+0xc(%a6)
12588         rts
12589 iareg2: add.l           %d0,%a2
12590         rts
12591 iareg3: add.l           %d0,%a3
12592         rts
12593 iareg4: add.l           %d0,%a4
12594         rts
12595 iareg5: add.l           %d0,%a5
12596         rts
12597 iareg6: add.l           %d0,(%a6)
12598         rts
12599 iareg7: mov.b           &mia7_flg,SPCOND_FLG(%a6)
12600         cmpi.b          %d0,&0x1
12601         beq.b           iareg7b
12602         add.l           %d0,EXC_A7(%a6)
12603         rts
12604 iareg7b:
12605         addq.l          &0x2,EXC_A7(%a6)
12606         rts
12607
12608 #########################################################################
12609 # XDEF **************************************************************** #
12610 #       dec_areg(): decrement an address register by the value in d0    #
12611 #                                                                       #
12612 # XREF **************************************************************** #
12613 #       None                                                            #
12614 #                                                                       #
12615 # INPUT *************************************************************** #
12616 #       d0 = amount to decrement by                                     #
12617 #       d1 = index of address register to decrement                     #
12618 #                                                                       #
12619 # OUTPUT ************************************************************** #
12620 #       (address register is updated)                                   #
12621 #                                                                       #
12622 # ALGORITHM *********************************************************** #
12623 #       Typically used for an instruction w/ a pre-decrement <ea>,      #
12624 # this routine adds the decrement value in d0 to the address register   #
12625 # specified by d1. A0/A1/A6/A7 reside on the stack. The rest reside     #
12626 # in their original places.                                             #
12627 #       For a7, if the decrement amount is one, then we have to         #
12628 # decrement by two. For any a7 update, set the mda7_flag so that if     #
12629 # an access error exception occurs later in emulation, this address     #
12630 # register update can be undone.                                        #
12631 #                                                                       #
12632 #########################################################################
12633
12634         global          dec_areg
12635 dec_areg:
12636         mov.w           (tbl_dareg.b,%pc,%d1.w*2),%d1
12637         jmp             (tbl_dareg.b,%pc,%d1.w*1)
12638
12639 tbl_dareg:
12640         short           dareg0 - tbl_dareg
12641         short           dareg1 - tbl_dareg
12642         short           dareg2 - tbl_dareg
12643         short           dareg3 - tbl_dareg
12644         short           dareg4 - tbl_dareg
12645         short           dareg5 - tbl_dareg
12646         short           dareg6 - tbl_dareg
12647         short           dareg7 - tbl_dareg
12648
12649 dareg0: sub.l           %d0,EXC_DREGS+0x8(%a6)
12650         rts
12651 dareg1: sub.l           %d0,EXC_DREGS+0xc(%a6)
12652         rts
12653 dareg2: sub.l           %d0,%a2
12654         rts
12655 dareg3: sub.l           %d0,%a3
12656         rts
12657 dareg4: sub.l           %d0,%a4
12658         rts
12659 dareg5: sub.l           %d0,%a5
12660         rts
12661 dareg6: sub.l           %d0,(%a6)
12662         rts
12663 dareg7: mov.b           &mda7_flg,SPCOND_FLG(%a6)
12664         cmpi.b          %d0,&0x1
12665         beq.b           dareg7b
12666         sub.l           %d0,EXC_A7(%a6)
12667         rts
12668 dareg7b:
12669         subq.l          &0x2,EXC_A7(%a6)
12670         rts
12671
12672 ##############################################################################
12673
12674 #########################################################################
12675 # XDEF **************************************************************** #
12676 #       load_fpn1(): load FP register value into FP_SRC(a6).            #
12677 #                                                                       #
12678 # XREF **************************************************************** #
12679 #       None                                                            #
12680 #                                                                       #
12681 # INPUT *************************************************************** #
12682 #       d0 = index of FP register to load                               #
12683 #                                                                       #
12684 # OUTPUT ************************************************************** #
12685 #       FP_SRC(a6) = value loaded from FP register file                 #
12686 #                                                                       #
12687 # ALGORITHM *********************************************************** #
12688 #       Using the index in d0, load FP_SRC(a6) with a number from the   #
12689 # FP register file.                                                     #
12690 #                                                                       #
12691 #########################################################################
12692
12693         global          load_fpn1
12694 load_fpn1:
12695         mov.w           (tbl_load_fpn1.b,%pc,%d0.w*2), %d0
12696         jmp             (tbl_load_fpn1.b,%pc,%d0.w*1)
12697
12698 tbl_load_fpn1:
12699         short           load_fpn1_0 - tbl_load_fpn1
12700         short           load_fpn1_1 - tbl_load_fpn1
12701         short           load_fpn1_2 - tbl_load_fpn1
12702         short           load_fpn1_3 - tbl_load_fpn1
12703         short           load_fpn1_4 - tbl_load_fpn1
12704         short           load_fpn1_5 - tbl_load_fpn1
12705         short           load_fpn1_6 - tbl_load_fpn1
12706         short           load_fpn1_7 - tbl_load_fpn1
12707
12708 load_fpn1_0:
12709         mov.l           0+EXC_FP0(%a6), 0+FP_SRC(%a6)
12710         mov.l           4+EXC_FP0(%a6), 4+FP_SRC(%a6)
12711         mov.l           8+EXC_FP0(%a6), 8+FP_SRC(%a6)
12712         lea             FP_SRC(%a6), %a0
12713         rts
12714 load_fpn1_1:
12715         mov.l           0+EXC_FP1(%a6), 0+FP_SRC(%a6)
12716         mov.l           4+EXC_FP1(%a6), 4+FP_SRC(%a6)
12717         mov.l           8+EXC_FP1(%a6), 8+FP_SRC(%a6)
12718         lea             FP_SRC(%a6), %a0
12719         rts
12720 load_fpn1_2:
12721         fmovm.x         &0x20, FP_SRC(%a6)
12722         lea             FP_SRC(%a6), %a0
12723         rts
12724 load_fpn1_3:
12725         fmovm.x         &0x10, FP_SRC(%a6)
12726         lea             FP_SRC(%a6), %a0
12727         rts
12728 load_fpn1_4:
12729         fmovm.x         &0x08, FP_SRC(%a6)
12730         lea             FP_SRC(%a6), %a0
12731         rts
12732 load_fpn1_5:
12733         fmovm.x         &0x04, FP_SRC(%a6)
12734         lea             FP_SRC(%a6), %a0
12735         rts
12736 load_fpn1_6:
12737         fmovm.x         &0x02, FP_SRC(%a6)
12738         lea             FP_SRC(%a6), %a0
12739         rts
12740 load_fpn1_7:
12741         fmovm.x         &0x01, FP_SRC(%a6)
12742         lea             FP_SRC(%a6), %a0
12743         rts
12744
12745 #############################################################################
12746
12747 #########################################################################
12748 # XDEF **************************************************************** #
12749 #       load_fpn2(): load FP register value into FP_DST(a6).            #
12750 #                                                                       #
12751 # XREF **************************************************************** #
12752 #       None                                                            #
12753 #                                                                       #
12754 # INPUT *************************************************************** #
12755 #       d0 = index of FP register to load                               #
12756 #                                                                       #
12757 # OUTPUT ************************************************************** #
12758 #       FP_DST(a6) = value loaded from FP register file                 #
12759 #                                                                       #
12760 # ALGORITHM *********************************************************** #
12761 #       Using the index in d0, load FP_DST(a6) with a number from the   #
12762 # FP register file.                                                     #
12763 #                                                                       #
12764 #########################################################################
12765
12766         global          load_fpn2
12767 load_fpn2:
12768         mov.w           (tbl_load_fpn2.b,%pc,%d0.w*2), %d0
12769         jmp             (tbl_load_fpn2.b,%pc,%d0.w*1)
12770
12771 tbl_load_fpn2:
12772         short           load_fpn2_0 - tbl_load_fpn2
12773         short           load_fpn2_1 - tbl_load_fpn2
12774         short           load_fpn2_2 - tbl_load_fpn2
12775         short           load_fpn2_3 - tbl_load_fpn2
12776         short           load_fpn2_4 - tbl_load_fpn2
12777         short           load_fpn2_5 - tbl_load_fpn2
12778         short           load_fpn2_6 - tbl_load_fpn2
12779         short           load_fpn2_7 - tbl_load_fpn2
12780
12781 load_fpn2_0:
12782         mov.l           0+EXC_FP0(%a6), 0+FP_DST(%a6)
12783         mov.l           4+EXC_FP0(%a6), 4+FP_DST(%a6)
12784         mov.l           8+EXC_FP0(%a6), 8+FP_DST(%a6)
12785         lea             FP_DST(%a6), %a0
12786         rts
12787 load_fpn2_1:
12788         mov.l           0+EXC_FP1(%a6), 0+FP_DST(%a6)
12789         mov.l           4+EXC_FP1(%a6), 4+FP_DST(%a6)
12790         mov.l           8+EXC_FP1(%a6), 8+FP_DST(%a6)
12791         lea             FP_DST(%a6), %a0
12792         rts
12793 load_fpn2_2:
12794         fmovm.x         &0x20, FP_DST(%a6)
12795         lea             FP_DST(%a6), %a0
12796         rts
12797 load_fpn2_3:
12798         fmovm.x         &0x10, FP_DST(%a6)
12799         lea             FP_DST(%a6), %a0
12800         rts
12801 load_fpn2_4:
12802         fmovm.x         &0x08, FP_DST(%a6)
12803         lea             FP_DST(%a6), %a0
12804         rts
12805 load_fpn2_5:
12806         fmovm.x         &0x04, FP_DST(%a6)
12807         lea             FP_DST(%a6), %a0
12808         rts
12809 load_fpn2_6:
12810         fmovm.x         &0x02, FP_DST(%a6)
12811         lea             FP_DST(%a6), %a0
12812         rts
12813 load_fpn2_7:
12814         fmovm.x         &0x01, FP_DST(%a6)
12815         lea             FP_DST(%a6), %a0
12816         rts
12817
12818 #############################################################################
12819
12820 #########################################################################
12821 # XDEF **************************************************************** #
12822 #       store_fpreg(): store an fp value to the fpreg designated d0.    #
12823 #                                                                       #
12824 # XREF **************************************************************** #
12825 #       None                                                            #
12826 #                                                                       #
12827 # INPUT *************************************************************** #
12828 #       fp0 = extended precision value to store                         #
12829 #       d0  = index of floating-point register                          #
12830 #                                                                       #
12831 # OUTPUT ************************************************************** #
12832 #       None                                                            #
12833 #                                                                       #
12834 # ALGORITHM *********************************************************** #
12835 #       Store the value in fp0 to the FP register designated by the     #
12836 # value in d0. The FP number can be DENORM or SNAN so we have to be     #
12837 # careful that we don't take an exception here.                         #
12838 #                                                                       #
12839 #########################################################################
12840
12841         global          store_fpreg
12842 store_fpreg:
12843         mov.w           (tbl_store_fpreg.b,%pc,%d0.w*2), %d0
12844         jmp             (tbl_store_fpreg.b,%pc,%d0.w*1)
12845
12846 tbl_store_fpreg:
12847         short           store_fpreg_0 - tbl_store_fpreg
12848         short           store_fpreg_1 - tbl_store_fpreg
12849         short           store_fpreg_2 - tbl_store_fpreg
12850         short           store_fpreg_3 - tbl_store_fpreg
12851         short           store_fpreg_4 - tbl_store_fpreg
12852         short           store_fpreg_5 - tbl_store_fpreg
12853         short           store_fpreg_6 - tbl_store_fpreg
12854         short           store_fpreg_7 - tbl_store_fpreg
12855
12856 store_fpreg_0:
12857         fmovm.x         &0x80, EXC_FP0(%a6)
12858         rts
12859 store_fpreg_1:
12860         fmovm.x         &0x80, EXC_FP1(%a6)
12861         rts
12862 store_fpreg_2:
12863         fmovm.x         &0x01, -(%sp)
12864         fmovm.x         (%sp)+, &0x20
12865         rts
12866 store_fpreg_3:
12867         fmovm.x         &0x01, -(%sp)
12868         fmovm.x         (%sp)+, &0x10
12869         rts
12870 store_fpreg_4:
12871         fmovm.x         &0x01, -(%sp)
12872         fmovm.x         (%sp)+, &0x08
12873         rts
12874 store_fpreg_5:
12875         fmovm.x         &0x01, -(%sp)
12876         fmovm.x         (%sp)+, &0x04
12877         rts
12878 store_fpreg_6:
12879         fmovm.x         &0x01, -(%sp)
12880         fmovm.x         (%sp)+, &0x02
12881         rts
12882 store_fpreg_7:
12883         fmovm.x         &0x01, -(%sp)
12884         fmovm.x         (%sp)+, &0x01
12885         rts
12886
12887 #########################################################################
12888 # XDEF **************************************************************** #
12889 #       get_packed(): fetch a packed operand from memory and then       #
12890 #                     convert it to a floating-point binary number.     #
12891 #                                                                       #
12892 # XREF **************************************************************** #
12893 #       _dcalc_ea() - calculate the correct <ea>                        #
12894 #       _mem_read() - fetch the packed operand from memory              #
12895 #       facc_in_x() - the fetch failed so jump to special exit code     #
12896 #       decbin()    - convert packed to binary extended precision       #
12897 #                                                                       #
12898 # INPUT *************************************************************** #
12899 #       None                                                            #
12900 #                                                                       #
12901 # OUTPUT ************************************************************** #
12902 #       If no failure on _mem_read():                                   #
12903 #       FP_SRC(a6) = packed operand now as a binary FP number           #
12904 #                                                                       #
12905 # ALGORITHM *********************************************************** #
12906 #       Get the correct <ea> whihc is the value on the exception stack  #
12907 # frame w/ maybe a correction factor if the <ea> is -(an) or (an)+.     #
12908 # Then, fetch the operand from memory. If the fetch fails, exit         #
12909 # through facc_in_x().                                                  #
12910 #       If the packed operand is a ZERO,NAN, or INF, convert it to      #
12911 # its binary representation here. Else, call decbin() which will        #
12912 # convert the packed value to an extended precision binary value.       #
12913 #                                                                       #
12914 #########################################################################
12915
12916 # the stacked <ea> for packed is correct except for -(An).
12917 # the base reg must be updated for both -(An) and (An)+.
12918         global          get_packed
12919 get_packed:
12920         mov.l           &0xc,%d0                # packed is 12 bytes
12921         bsr.l           _dcalc_ea               # fetch <ea>; correct An
12922
12923         lea             FP_SRC(%a6),%a1         # pass: ptr to super dst
12924         mov.l           &0xc,%d0                # pass: 12 bytes
12925         bsr.l           _dmem_read              # read packed operand
12926
12927         tst.l           %d1                     # did dfetch fail?
12928         bne.l           facc_in_x               # yes
12929
12930 # The packed operand is an INF or a NAN if the exponent field is all ones.
12931         bfextu          FP_SRC(%a6){&1:&15},%d0 # get exp
12932         cmpi.w          %d0,&0x7fff             # INF or NAN?
12933         bne.b           gp_try_zero             # no
12934         rts                                     # operand is an INF or NAN
12935
12936 # The packed operand is a zero if the mantissa is all zero, else it's
12937 # a normal packed op.
12938 gp_try_zero:
12939         mov.b           3+FP_SRC(%a6),%d0       # get byte 4
12940         andi.b          &0x0f,%d0               # clear all but last nybble
12941         bne.b           gp_not_spec             # not a zero
12942         tst.l           FP_SRC_HI(%a6)          # is lw 2 zero?
12943         bne.b           gp_not_spec             # not a zero
12944         tst.l           FP_SRC_LO(%a6)          # is lw 3 zero?
12945         bne.b           gp_not_spec             # not a zero
12946         rts                                     # operand is a ZERO
12947 gp_not_spec:
12948         lea             FP_SRC(%a6),%a0         # pass: ptr to packed op
12949         bsr.l           decbin                  # convert to extended
12950         fmovm.x         &0x80,FP_SRC(%a6)       # make this the srcop
12951         rts
12952
12953 #########################################################################
12954 # decbin(): Converts normalized packed bcd value pointed to by register #
12955 #           a0 to extended-precision value in fp0.                      #
12956 #                                                                       #
12957 # INPUT *************************************************************** #
12958 #       a0 = pointer to normalized packed bcd value                     #
12959 #                                                                       #
12960 # OUTPUT ************************************************************** #
12961 #       fp0 = exact fp representation of the packed bcd value.          #
12962 #                                                                       #
12963 # ALGORITHM *********************************************************** #
12964 #       Expected is a normal bcd (i.e. non-exceptional; all inf, zero,  #
12965 #       and NaN operands are dispatched without entering this routine)  #
12966 #       value in 68881/882 format at location (a0).                     #
12967 #                                                                       #
12968 #       A1. Convert the bcd exponent to binary by successive adds and   #
12969 #       muls. Set the sign according to SE. Subtract 16 to compensate   #
12970 #       for the mantissa which is to be interpreted as 17 integer       #
12971 #       digits, rather than 1 integer and 16 fraction digits.           #
12972 #       Note: this operation can never overflow.                        #
12973 #                                                                       #
12974 #       A2. Convert the bcd mantissa to binary by successive            #
12975 #       adds and muls in FP0. Set the sign according to SM.             #
12976 #       The mantissa digits will be converted with the decimal point    #
12977 #       assumed following the least-significant digit.                  #
12978 #       Note: this operation can never overflow.                        #
12979 #                                                                       #
12980 #       A3. Count the number of leading/trailing zeros in the           #
12981 #       bcd string.  If SE is positive, count the leading zeros;        #
12982 #       if negative, count the trailing zeros.  Set the adjusted        #
12983 #       exponent equal to the exponent from A1 and the zero count       #
12984 #       added if SM = 1 and subtracted if SM = 0.  Scale the            #
12985 #       mantissa the equivalent of forcing in the bcd value:            #
12986 #                                                                       #
12987 #       SM = 0  a non-zero digit in the integer position                #
12988 #       SM = 1  a non-zero digit in Mant0, lsd of the fraction          #
12989 #                                                                       #
12990 #       this will insure that any value, regardless of its              #
12991 #       representation (ex. 0.1E2, 1E1, 10E0, 100E-1), is converted     #
12992 #       consistently.                                                   #
12993 #                                                                       #
12994 #       A4. Calculate the factor 10^exp in FP1 using a table of         #
12995 #       10^(2^n) values.  To reduce the error in forming factors        #
12996 #       greater than 10^27, a directed rounding scheme is used with     #
12997 #       tables rounded to RN, RM, and RP, according to the table        #
12998 #       in the comments of the pwrten section.                          #
12999 #                                                                       #
13000 #       A5. Form the final binary number by scaling the mantissa by     #
13001 #       the exponent factor.  This is done by multiplying the           #
13002 #       mantissa in FP0 by the factor in FP1 if the adjusted            #
13003 #       exponent sign is positive, and dividing FP0 by FP1 if           #
13004 #       it is negative.                                                 #
13005 #                                                                       #
13006 #       Clean up and return. Check if the final mul or div was inexact. #
13007 #       If so, set INEX1 in USER_FPSR.                                  #
13008 #                                                                       #
13009 #########################################################################
13010
13011 #
13012 #       PTENRN, PTENRM, and PTENRP are arrays of powers of 10 rounded
13013 #       to nearest, minus, and plus, respectively.  The tables include
13014 #       10**{1,2,4,8,16,32,64,128,256,512,1024,2048,4096}.  No rounding
13015 #       is required until the power is greater than 27, however, all
13016 #       tables include the first 5 for ease of indexing.
13017 #
13018 RTABLE:
13019         byte            0,0,0,0
13020         byte            2,3,2,3
13021         byte            2,3,3,2
13022         byte            3,2,2,3
13023
13024         set             FNIBS,7
13025         set             FSTRT,0
13026
13027         set             ESTRT,4
13028         set             EDIGITS,2
13029
13030         global          decbin
13031 decbin:
13032         mov.l           0x0(%a0),FP_SCR0_EX(%a6) # make a copy of input
13033         mov.l           0x4(%a0),FP_SCR0_HI(%a6) # so we don't alter it
13034         mov.l           0x8(%a0),FP_SCR0_LO(%a6)
13035
13036         lea             FP_SCR0(%a6),%a0
13037
13038         movm.l          &0x3c00,-(%sp)          # save d2-d5
13039         fmovm.x         &0x1,-(%sp)             # save fp1
13040 #
13041 # Calculate exponent:
13042 #  1. Copy bcd value in memory for use as a working copy.
13043 #  2. Calculate absolute value of exponent in d1 by mul and add.
13044 #  3. Correct for exponent sign.
13045 #  4. Subtract 16 to compensate for interpreting the mant as all integer digits.
13046 #     (i.e., all digits assumed left of the decimal point.)
13047 #
13048 # Register usage:
13049 #
13050 #  calc_e:
13051 #       (*)  d0: temp digit storage
13052 #       (*)  d1: accumulator for binary exponent
13053 #       (*)  d2: digit count
13054 #       (*)  d3: offset pointer
13055 #       ( )  d4: first word of bcd
13056 #       ( )  a0: pointer to working bcd value
13057 #       ( )  a6: pointer to original bcd value
13058 #       (*)  FP_SCR1: working copy of original bcd value
13059 #       (*)  L_SCR1: copy of original exponent word
13060 #
13061 calc_e:
13062         mov.l           &EDIGITS,%d2            # # of nibbles (digits) in fraction part
13063         mov.l           &ESTRT,%d3              # counter to pick up digits
13064         mov.l           (%a0),%d4               # get first word of bcd
13065         clr.l           %d1                     # zero d1 for accumulator
13066 e_gd:
13067         mulu.l          &0xa,%d1                # mul partial product by one digit place
13068         bfextu          %d4{%d3:&4},%d0         # get the digit and zero extend into d0
13069         add.l           %d0,%d1                 # d1 = d1 + d0
13070         addq.b          &4,%d3                  # advance d3 to the next digit
13071         dbf.w           %d2,e_gd                # if we have used all 3 digits, exit loop
13072         btst            &30,%d4                 # get SE
13073         beq.b           e_pos                   # don't negate if pos
13074         neg.l           %d1                     # negate before subtracting
13075 e_pos:
13076         sub.l           &16,%d1                 # sub to compensate for shift of mant
13077         bge.b           e_save                  # if still pos, do not neg
13078         neg.l           %d1                     # now negative, make pos and set SE
13079         or.l            &0x40000000,%d4         # set SE in d4,
13080         or.l            &0x40000000,(%a0)       # and in working bcd
13081 e_save:
13082         mov.l           %d1,-(%sp)              # save exp on stack
13083 #
13084 #
13085 # Calculate mantissa:
13086 #  1. Calculate absolute value of mantissa in fp0 by mul and add.
13087 #  2. Correct for mantissa sign.
13088 #     (i.e., all digits assumed left of the decimal point.)
13089 #
13090 # Register usage:
13091 #
13092 #  calc_m:
13093 #       (*)  d0: temp digit storage
13094 #       (*)  d1: lword counter
13095 #       (*)  d2: digit count
13096 #       (*)  d3: offset pointer
13097 #       ( )  d4: words 2 and 3 of bcd
13098 #       ( )  a0: pointer to working bcd value
13099 #       ( )  a6: pointer to original bcd value
13100 #       (*) fp0: mantissa accumulator
13101 #       ( )  FP_SCR1: working copy of original bcd value
13102 #       ( )  L_SCR1: copy of original exponent word
13103 #
13104 calc_m:
13105         mov.l           &1,%d1                  # word counter, init to 1
13106         fmov.s          &0x00000000,%fp0        # accumulator
13107 #
13108 #
13109 #  Since the packed number has a long word between the first & second parts,
13110 #  get the integer digit then skip down & get the rest of the
13111 #  mantissa.  We will unroll the loop once.
13112 #
13113         bfextu          (%a0){&28:&4},%d0       # integer part is ls digit in long word
13114         fadd.b          %d0,%fp0                # add digit to sum in fp0
13115 #
13116 #
13117 #  Get the rest of the mantissa.
13118 #
13119 loadlw:
13120         mov.l           (%a0,%d1.L*4),%d4       # load mantissa lonqword into d4
13121         mov.l           &FSTRT,%d3              # counter to pick up digits
13122         mov.l           &FNIBS,%d2              # reset number of digits per a0 ptr
13123 md2b:
13124         fmul.s          &0x41200000,%fp0        # fp0 = fp0 * 10
13125         bfextu          %d4{%d3:&4},%d0         # get the digit and zero extend
13126         fadd.b          %d0,%fp0                # fp0 = fp0 + digit
13127 #
13128 #
13129 #  If all the digits (8) in that long word have been converted (d2=0),
13130 #  then inc d1 (=2) to point to the next long word and reset d3 to 0
13131 #  to initialize the digit offset, and set d2 to 7 for the digit count;
13132 #  else continue with this long word.
13133 #
13134         addq.b          &4,%d3                  # advance d3 to the next digit
13135         dbf.w           %d2,md2b                # check for last digit in this lw
13136 nextlw:
13137         addq.l          &1,%d1                  # inc lw pointer in mantissa
13138         cmp.l           %d1,&2                  # test for last lw
13139         ble.b           loadlw                  # if not, get last one
13140 #
13141 #  Check the sign of the mant and make the value in fp0 the same sign.
13142 #
13143 m_sign:
13144         btst            &31,(%a0)               # test sign of the mantissa
13145         beq.b           ap_st_z                 # if clear, go to append/strip zeros
13146         fneg.x          %fp0                    # if set, negate fp0
13147 #
13148 # Append/strip zeros:
13149 #
13150 #  For adjusted exponents which have an absolute value greater than 27*,
13151 #  this routine calculates the amount needed to normalize the mantissa
13152 #  for the adjusted exponent.  That number is subtracted from the exp
13153 #  if the exp was positive, and added if it was negative.  The purpose
13154 #  of this is to reduce the value of the exponent and the possibility
13155 #  of error in calculation of pwrten.
13156 #
13157 #  1. Branch on the sign of the adjusted exponent.
13158 #  2p.(positive exp)
13159 #   2. Check M16 and the digits in lwords 2 and 3 in decending order.
13160 #   3. Add one for each zero encountered until a non-zero digit.
13161 #   4. Subtract the count from the exp.
13162 #   5. Check if the exp has crossed zero in #3 above; make the exp abs
13163 #          and set SE.
13164 #       6. Multiply the mantissa by 10**count.
13165 #  2n.(negative exp)
13166 #   2. Check the digits in lwords 3 and 2 in decending order.
13167 #   3. Add one for each zero encountered until a non-zero digit.
13168 #   4. Add the count to the exp.
13169 #   5. Check if the exp has crossed zero in #3 above; clear SE.
13170 #   6. Divide the mantissa by 10**count.
13171 #
13172 #  *Why 27?  If the adjusted exponent is within -28 < expA < 28, than
13173 #   any adjustment due to append/strip zeros will drive the resultane
13174 #   exponent towards zero.  Since all pwrten constants with a power
13175 #   of 27 or less are exact, there is no need to use this routine to
13176 #   attempt to lessen the resultant exponent.
13177 #
13178 # Register usage:
13179 #
13180 #  ap_st_z:
13181 #       (*)  d0: temp digit storage
13182 #       (*)  d1: zero count
13183 #       (*)  d2: digit count
13184 #       (*)  d3: offset pointer
13185 #       ( )  d4: first word of bcd
13186 #       (*)  d5: lword counter
13187 #       ( )  a0: pointer to working bcd value
13188 #       ( )  FP_SCR1: working copy of original bcd value
13189 #       ( )  L_SCR1: copy of original exponent word
13190 #
13191 #
13192 # First check the absolute value of the exponent to see if this
13193 # routine is necessary.  If so, then check the sign of the exponent
13194 # and do append (+) or strip (-) zeros accordingly.
13195 # This section handles a positive adjusted exponent.
13196 #
13197 ap_st_z:
13198         mov.l           (%sp),%d1               # load expA for range test
13199         cmp.l           %d1,&27                 # test is with 27
13200         ble.w           pwrten                  # if abs(expA) <28, skip ap/st zeros
13201         btst            &30,(%a0)               # check sign of exp
13202         bne.b           ap_st_n                 # if neg, go to neg side
13203         clr.l           %d1                     # zero count reg
13204         mov.l           (%a0),%d4               # load lword 1 to d4
13205         bfextu          %d4{&28:&4},%d0         # get M16 in d0
13206         bne.b           ap_p_fx                 # if M16 is non-zero, go fix exp
13207         addq.l          &1,%d1                  # inc zero count
13208         mov.l           &1,%d5                  # init lword counter
13209         mov.l           (%a0,%d5.L*4),%d4       # get lword 2 to d4
13210         bne.b           ap_p_cl                 # if lw 2 is zero, skip it
13211         addq.l          &8,%d1                  # and inc count by 8
13212         addq.l          &1,%d5                  # inc lword counter
13213         mov.l           (%a0,%d5.L*4),%d4       # get lword 3 to d4
13214 ap_p_cl:
13215         clr.l           %d3                     # init offset reg
13216         mov.l           &7,%d2                  # init digit counter
13217 ap_p_gd:
13218         bfextu          %d4{%d3:&4},%d0         # get digit
13219         bne.b           ap_p_fx                 # if non-zero, go to fix exp
13220         addq.l          &4,%d3                  # point to next digit
13221         addq.l          &1,%d1                  # inc digit counter
13222         dbf.w           %d2,ap_p_gd             # get next digit
13223 ap_p_fx:
13224         mov.l           %d1,%d0                 # copy counter to d2
13225         mov.l           (%sp),%d1               # get adjusted exp from memory
13226         sub.l           %d0,%d1                 # subtract count from exp
13227         bge.b           ap_p_fm                 # if still pos, go to pwrten
13228         neg.l           %d1                     # now its neg; get abs
13229         mov.l           (%a0),%d4               # load lword 1 to d4
13230         or.l            &0x40000000,%d4         # and set SE in d4
13231         or.l            &0x40000000,(%a0)       # and in memory
13232 #
13233 # Calculate the mantissa multiplier to compensate for the striping of
13234 # zeros from the mantissa.
13235 #
13236 ap_p_fm:
13237         lea.l           PTENRN(%pc),%a1         # get address of power-of-ten table
13238         clr.l           %d3                     # init table index
13239         fmov.s          &0x3f800000,%fp1        # init fp1 to 1
13240         mov.l           &3,%d2                  # init d2 to count bits in counter
13241 ap_p_el:
13242         asr.l           &1,%d0                  # shift lsb into carry
13243         bcc.b           ap_p_en                 # if 1, mul fp1 by pwrten factor
13244         fmul.x          (%a1,%d3),%fp1          # mul by 10**(d3_bit_no)
13245 ap_p_en:
13246         add.l           &12,%d3                 # inc d3 to next rtable entry
13247         tst.l           %d0                     # check if d0 is zero
13248         bne.b           ap_p_el                 # if not, get next bit
13249         fmul.x          %fp1,%fp0               # mul mantissa by 10**(no_bits_shifted)
13250         bra.b           pwrten                  # go calc pwrten
13251 #
13252 # This section handles a negative adjusted exponent.
13253 #
13254 ap_st_n:
13255         clr.l           %d1                     # clr counter
13256         mov.l           &2,%d5                  # set up d5 to point to lword 3
13257         mov.l           (%a0,%d5.L*4),%d4       # get lword 3
13258         bne.b           ap_n_cl                 # if not zero, check digits
13259         sub.l           &1,%d5                  # dec d5 to point to lword 2
13260         addq.l          &8,%d1                  # inc counter by 8
13261         mov.l           (%a0,%d5.L*4),%d4       # get lword 2
13262 ap_n_cl:
13263         mov.l           &28,%d3                 # point to last digit
13264         mov.l           &7,%d2                  # init digit counter
13265 ap_n_gd:
13266         bfextu          %d4{%d3:&4},%d0         # get digit
13267         bne.b           ap_n_fx                 # if non-zero, go to exp fix
13268         subq.l          &4,%d3                  # point to previous digit
13269         addq.l          &1,%d1                  # inc digit counter
13270         dbf.w           %d2,ap_n_gd             # get next digit
13271 ap_n_fx:
13272         mov.l           %d1,%d0                 # copy counter to d0
13273         mov.l           (%sp),%d1               # get adjusted exp from memory
13274         sub.l           %d0,%d1                 # subtract count from exp
13275         bgt.b           ap_n_fm                 # if still pos, go fix mantissa
13276         neg.l           %d1                     # take abs of exp and clr SE
13277         mov.l           (%a0),%d4               # load lword 1 to d4
13278         and.l           &0xbfffffff,%d4         # and clr SE in d4
13279         and.l           &0xbfffffff,(%a0)       # and in memory
13280 #
13281 # Calculate the mantissa multiplier to compensate for the appending of
13282 # zeros to the mantissa.
13283 #
13284 ap_n_fm:
13285         lea.l           PTENRN(%pc),%a1         # get address of power-of-ten table
13286         clr.l           %d3                     # init table index
13287         fmov.s          &0x3f800000,%fp1        # init fp1 to 1
13288         mov.l           &3,%d2                  # init d2 to count bits in counter
13289 ap_n_el:
13290         asr.l           &1,%d0                  # shift lsb into carry
13291         bcc.b           ap_n_en                 # if 1, mul fp1 by pwrten factor
13292         fmul.x          (%a1,%d3),%fp1          # mul by 10**(d3_bit_no)
13293 ap_n_en:
13294         add.l           &12,%d3                 # inc d3 to next rtable entry
13295         tst.l           %d0                     # check if d0 is zero
13296         bne.b           ap_n_el                 # if not, get next bit
13297         fdiv.x          %fp1,%fp0               # div mantissa by 10**(no_bits_shifted)
13298 #
13299 #
13300 # Calculate power-of-ten factor from adjusted and shifted exponent.
13301 #
13302 # Register usage:
13303 #
13304 #  pwrten:
13305 #       (*)  d0: temp
13306 #       ( )  d1: exponent
13307 #       (*)  d2: {FPCR[6:5],SM,SE} as index in RTABLE; temp
13308 #       (*)  d3: FPCR work copy
13309 #       ( )  d4: first word of bcd
13310 #       (*)  a1: RTABLE pointer
13311 #  calc_p:
13312 #       (*)  d0: temp
13313 #       ( )  d1: exponent
13314 #       (*)  d3: PWRTxx table index
13315 #       ( )  a0: pointer to working copy of bcd
13316 #       (*)  a1: PWRTxx pointer
13317 #       (*) fp1: power-of-ten accumulator
13318 #
13319 # Pwrten calculates the exponent factor in the selected rounding mode
13320 # according to the following table:
13321 #
13322 #       Sign of Mant  Sign of Exp  Rounding Mode  PWRTEN Rounding Mode
13323 #
13324 #       ANY       ANY   RN      RN
13325 #
13326 #        +         +    RP      RP
13327 #        -         +    RP      RM
13328 #        +         -    RP      RM
13329 #        -         -    RP      RP
13330 #
13331 #        +         +    RM      RM
13332 #        -         +    RM      RP
13333 #        +         -    RM      RP
13334 #        -         -    RM      RM
13335 #
13336 #        +         +    RZ      RM
13337 #        -         +    RZ      RM
13338 #        +         -    RZ      RP
13339 #        -         -    RZ      RP
13340 #
13341 #
13342 pwrten:
13343         mov.l           USER_FPCR(%a6),%d3      # get user's FPCR
13344         bfextu          %d3{&26:&2},%d2         # isolate rounding mode bits
13345         mov.l           (%a0),%d4               # reload 1st bcd word to d4
13346         asl.l           &2,%d2                  # format d2 to be
13347         bfextu          %d4{&0:&2},%d0          # {FPCR[6],FPCR[5],SM,SE}
13348         add.l           %d0,%d2                 # in d2 as index into RTABLE
13349         lea.l           RTABLE(%pc),%a1         # load rtable base
13350         mov.b           (%a1,%d2),%d0           # load new rounding bits from table
13351         clr.l           %d3                     # clear d3 to force no exc and extended
13352         bfins           %d0,%d3{&26:&2}         # stuff new rounding bits in FPCR
13353         fmov.l          %d3,%fpcr               # write new FPCR
13354         asr.l           &1,%d0                  # write correct PTENxx table
13355         bcc.b           not_rp                  # to a1
13356         lea.l           PTENRP(%pc),%a1         # it is RP
13357         bra.b           calc_p                  # go to init section
13358 not_rp:
13359         asr.l           &1,%d0                  # keep checking
13360         bcc.b           not_rm
13361         lea.l           PTENRM(%pc),%a1         # it is RM
13362         bra.b           calc_p                  # go to init section
13363 not_rm:
13364         lea.l           PTENRN(%pc),%a1         # it is RN
13365 calc_p:
13366         mov.l           %d1,%d0                 # copy exp to d0;use d0
13367         bpl.b           no_neg                  # if exp is negative,
13368         neg.l           %d0                     # invert it
13369         or.l            &0x40000000,(%a0)       # and set SE bit
13370 no_neg:
13371         clr.l           %d3                     # table index
13372         fmov.s          &0x3f800000,%fp1        # init fp1 to 1
13373 e_loop:
13374         asr.l           &1,%d0                  # shift next bit into carry
13375         bcc.b           e_next                  # if zero, skip the mul
13376         fmul.x          (%a1,%d3),%fp1          # mul by 10**(d3_bit_no)
13377 e_next:
13378         add.l           &12,%d3                 # inc d3 to next rtable entry
13379         tst.l           %d0                     # check if d0 is zero
13380         bne.b           e_loop                  # not zero, continue shifting
13381 #
13382 #
13383 #  Check the sign of the adjusted exp and make the value in fp0 the
13384 #  same sign. If the exp was pos then multiply fp1*fp0;
13385 #  else divide fp0/fp1.
13386 #
13387 # Register Usage:
13388 #  norm:
13389 #       ( )  a0: pointer to working bcd value
13390 #       (*) fp0: mantissa accumulator
13391 #       ( ) fp1: scaling factor - 10**(abs(exp))
13392 #
13393 pnorm:
13394         btst            &30,(%a0)               # test the sign of the exponent
13395         beq.b           mul                     # if clear, go to multiply
13396 div:
13397         fdiv.x          %fp1,%fp0               # exp is negative, so divide mant by exp
13398         bra.b           end_dec
13399 mul:
13400         fmul.x          %fp1,%fp0               # exp is positive, so multiply by exp
13401 #
13402 #
13403 # Clean up and return with result in fp0.
13404 #
13405 # If the final mul/div in decbin incurred an inex exception,
13406 # it will be inex2, but will be reported as inex1 by get_op.
13407 #
13408 end_dec:
13409         fmov.l          %fpsr,%d0               # get status register
13410         bclr            &inex2_bit+8,%d0        # test for inex2 and clear it
13411         beq.b           no_exc                  # skip this if no exc
13412         ori.w           &inx1a_mask,2+USER_FPSR(%a6) # set INEX1/AINEX
13413 no_exc:
13414         add.l           &0x4,%sp                # clear 1 lw param
13415         fmovm.x         (%sp)+,&0x40            # restore fp1
13416         movm.l          (%sp)+,&0x3c            # restore d2-d5
13417         fmov.l          &0x0,%fpcr
13418         fmov.l          &0x0,%fpsr
13419         rts
13420
13421 #########################################################################
13422 # bindec(): Converts an input in extended precision format to bcd format#
13423 #                                                                       #
13424 # INPUT *************************************************************** #
13425 #       a0 = pointer to the input extended precision value in memory.   #
13426 #            the input may be either normalized, unnormalized, or       #
13427 #            denormalized.                                              #
13428 #       d0 = contains the k-factor sign-extended to 32-bits.            #
13429 #                                                                       #
13430 # OUTPUT ************************************************************** #
13431 #       FP_SCR0(a6) = bcd format result on the stack.                   #
13432 #                                                                       #
13433 # ALGORITHM *********************************************************** #
13434 #                                                                       #
13435 #       A1.     Set RM and size ext;  Set SIGMA = sign of input.        #
13436 #               The k-factor is saved for use in d7. Clear the          #
13437 #               BINDEC_FLG for separating normalized/denormalized       #
13438 #               input.  If input is unnormalized or denormalized,       #
13439 #               normalize it.                                           #
13440 #                                                                       #
13441 #       A2.     Set X = abs(input).                                     #
13442 #                                                                       #
13443 #       A3.     Compute ILOG.                                           #
13444 #               ILOG is the log base 10 of the input value.  It is      #
13445 #               approximated by adding e + 0.f when the original        #
13446 #               value is viewed as 2^^e * 1.f in extended precision.    #
13447 #               This value is stored in d6.                             #
13448 #                                                                       #
13449 #       A4.     Clr INEX bit.                                           #
13450 #               The operation in A3 above may have set INEX2.           #
13451 #                                                                       #
13452 #       A5.     Set ICTR = 0;                                           #
13453 #               ICTR is a flag used in A13.  It must be set before the  #
13454 #               loop entry A6.                                          #
13455 #                                                                       #
13456 #       A6.     Calculate LEN.                                          #
13457 #               LEN is the number of digits to be displayed.  The       #
13458 #               k-factor can dictate either the total number of digits, #
13459 #               if it is a positive number, or the number of digits     #
13460 #               after the decimal point which are to be included as     #
13461 #               significant.  See the 68882 manual for examples.        #
13462 #               If LEN is computed to be greater than 17, set OPERR in  #
13463 #               USER_FPSR.  LEN is stored in d4.                        #
13464 #                                                                       #
13465 #       A7.     Calculate SCALE.                                        #
13466 #               SCALE is equal to 10^ISCALE, where ISCALE is the number #
13467 #               of decimal places needed to insure LEN integer digits   #
13468 #               in the output before conversion to bcd. LAMBDA is the   #
13469 #               sign of ISCALE, used in A9. Fp1 contains                #
13470 #               10^^(abs(ISCALE)) using a rounding mode which is a      #
13471 #               function of the original rounding mode and the signs    #
13472 #               of ISCALE and X.  A table is given in the code.         #
13473 #                                                                       #
13474 #       A8.     Clr INEX; Force RZ.                                     #
13475 #               The operation in A3 above may have set INEX2.           #
13476 #               RZ mode is forced for the scaling operation to insure   #
13477 #               only one rounding error.  The grs bits are collected in #
13478 #               the INEX flag for use in A10.                           #
13479 #                                                                       #
13480 #       A9.     Scale X -> Y.                                           #
13481 #               The mantissa is scaled to the desired number of         #
13482 #               significant digits.  The excess digits are collected    #
13483 #               in INEX2.                                               #
13484 #                                                                       #
13485 #       A10.    Or in INEX.                                             #
13486 #               If INEX is set, round error occurred.  This is          #
13487 #               compensated for by 'or-ing' in the INEX2 flag to        #
13488 #               the lsb of Y.                                           #
13489 #                                                                       #
13490 #       A11.    Restore original FPCR; set size ext.                    #
13491 #               Perform FINT operation in the user's rounding mode.     #
13492 #               Keep the size to extended.                              #
13493 #                                                                       #
13494 #       A12.    Calculate YINT = FINT(Y) according to user's rounding   #
13495 #               mode.  The FPSP routine sintd0 is used.  The output     #
13496 #               is in fp0.                                              #
13497 #                                                                       #
13498 #       A13.    Check for LEN digits.                                   #
13499 #               If the int operation results in more than LEN digits,   #
13500 #               or less than LEN -1 digits, adjust ILOG and repeat from #
13501 #               A6.  This test occurs only on the first pass.  If the   #
13502 #               result is exactly 10^LEN, decrement ILOG and divide     #
13503 #               the mantissa by 10.                                     #
13504 #                                                                       #
13505 #       A14.    Convert the mantissa to bcd.                            #
13506 #               The binstr routine is used to convert the LEN digit     #
13507 #               mantissa to bcd in memory.  The input to binstr is      #
13508 #               to be a fraction; i.e. (mantissa)/10^LEN and adjusted   #
13509 #               such that the decimal point is to the left of bit 63.   #
13510 #               The bcd digits are stored in the correct position in    #
13511 #               the final string area in memory.                        #
13512 #                                                                       #
13513 #       A15.    Convert the exponent to bcd.                            #
13514 #               As in A14 above, the exp is converted to bcd and the    #
13515 #               digits are stored in the final string.                  #
13516 #               Test the length of the final exponent string.  If the   #
13517 #               length is 4, set operr.                                 #
13518 #                                                                       #
13519 #       A16.    Write sign bits to final string.                        #
13520 #                                                                       #
13521 #########################################################################
13522
13523 set     BINDEC_FLG,     EXC_TEMP        # DENORM flag
13524
13525 # Constants in extended precision
13526 PLOG2:
13527         long            0x3FFD0000,0x9A209A84,0xFBCFF798,0x00000000
13528 PLOG2UP1:
13529         long            0x3FFD0000,0x9A209A84,0xFBCFF799,0x00000000
13530
13531 # Constants in single precision
13532 FONE:
13533         long            0x3F800000,0x00000000,0x00000000,0x00000000
13534 FTWO:
13535         long            0x40000000,0x00000000,0x00000000,0x00000000
13536 FTEN:
13537         long            0x41200000,0x00000000,0x00000000,0x00000000
13538 F4933:
13539         long            0x459A2800,0x00000000,0x00000000,0x00000000
13540
13541 RBDTBL:
13542         byte            0,0,0,0
13543         byte            3,3,2,2
13544         byte            3,2,2,3
13545         byte            2,3,3,2
13546
13547 #       Implementation Notes:
13548 #
13549 #       The registers are used as follows:
13550 #
13551 #               d0: scratch; LEN input to binstr
13552 #               d1: scratch
13553 #               d2: upper 32-bits of mantissa for binstr
13554 #               d3: scratch;lower 32-bits of mantissa for binstr
13555 #               d4: LEN
13556 #               d5: LAMBDA/ICTR
13557 #               d6: ILOG
13558 #               d7: k-factor
13559 #               a0: ptr for original operand/final result
13560 #               a1: scratch pointer
13561 #               a2: pointer to FP_X; abs(original value) in ext
13562 #               fp0: scratch
13563 #               fp1: scratch
13564 #               fp2: scratch
13565 #               F_SCR1:
13566 #               F_SCR2:
13567 #               L_SCR1:
13568 #               L_SCR2:
13569
13570         global          bindec
13571 bindec:
13572         movm.l          &0x3f20,-(%sp)  #  {%d2-%d7/%a2}
13573         fmovm.x         &0x7,-(%sp)     #  {%fp0-%fp2}
13574
13575 # A1. Set RM and size ext. Set SIGMA = sign input;
13576 #     The k-factor is saved for use in d7.  Clear BINDEC_FLG for
13577 #     separating  normalized/denormalized input.  If the input
13578 #     is a denormalized number, set the BINDEC_FLG memory word
13579 #     to signal denorm.  If the input is unnormalized, normalize
13580 #     the input and test for denormalized result.
13581 #
13582         fmov.l          &rm_mode*0x10,%fpcr     # set RM and ext
13583         mov.l           (%a0),L_SCR2(%a6)       # save exponent for sign check
13584         mov.l           %d0,%d7         # move k-factor to d7
13585
13586         clr.b           BINDEC_FLG(%a6) # clr norm/denorm flag
13587         cmpi.b          STAG(%a6),&DENORM # is input a DENORM?
13588         bne.w           A2_str          # no; input is a NORM
13589
13590 #
13591 # Normalize the denorm
13592 #
13593 un_de_norm:
13594         mov.w           (%a0),%d0
13595         and.w           &0x7fff,%d0     # strip sign of normalized exp
13596         mov.l           4(%a0),%d1
13597         mov.l           8(%a0),%d2
13598 norm_loop:
13599         sub.w           &1,%d0
13600         lsl.l           &1,%d2
13601         roxl.l          &1,%d1
13602         tst.l           %d1
13603         bge.b           norm_loop
13604 #
13605 # Test if the normalized input is denormalized
13606 #
13607         tst.w           %d0
13608         bgt.b           pos_exp         # if greater than zero, it is a norm
13609         st              BINDEC_FLG(%a6) # set flag for denorm
13610 pos_exp:
13611         and.w           &0x7fff,%d0     # strip sign of normalized exp
13612         mov.w           %d0,(%a0)
13613         mov.l           %d1,4(%a0)
13614         mov.l           %d2,8(%a0)
13615
13616 # A2. Set X = abs(input).
13617 #
13618 A2_str:
13619         mov.l           (%a0),FP_SCR1(%a6)      # move input to work space
13620         mov.l           4(%a0),FP_SCR1+4(%a6)   # move input to work space
13621         mov.l           8(%a0),FP_SCR1+8(%a6)   # move input to work space
13622         and.l           &0x7fffffff,FP_SCR1(%a6)        # create abs(X)
13623
13624 # A3. Compute ILOG.
13625 #     ILOG is the log base 10 of the input value.  It is approx-
13626 #     imated by adding e + 0.f when the original value is viewed
13627 #     as 2^^e * 1.f in extended precision.  This value is stored
13628 #     in d6.
13629 #
13630 # Register usage:
13631 #       Input/Output
13632 #       d0: k-factor/exponent
13633 #       d2: x/x
13634 #       d3: x/x
13635 #       d4: x/x
13636 #       d5: x/x
13637 #       d6: x/ILOG
13638 #       d7: k-factor/Unchanged
13639 #       a0: ptr for original operand/final result
13640 #       a1: x/x
13641 #       a2: x/x
13642 #       fp0: x/float(ILOG)
13643 #       fp1: x/x
13644 #       fp2: x/x
13645 #       F_SCR1:x/x
13646 #       F_SCR2:Abs(X)/Abs(X) with $3fff exponent
13647 #       L_SCR1:x/x
13648 #       L_SCR2:first word of X packed/Unchanged
13649
13650         tst.b           BINDEC_FLG(%a6) # check for denorm
13651         beq.b           A3_cont         # if clr, continue with norm
13652         mov.l           &-4933,%d6      # force ILOG = -4933
13653         bra.b           A4_str
13654 A3_cont:
13655         mov.w           FP_SCR1(%a6),%d0        # move exp to d0
13656         mov.w           &0x3fff,FP_SCR1(%a6)    # replace exponent with 0x3fff
13657         fmov.x          FP_SCR1(%a6),%fp0       # now fp0 has 1.f
13658         sub.w           &0x3fff,%d0     # strip off bias
13659         fadd.w          %d0,%fp0        # add in exp
13660         fsub.s          FONE(%pc),%fp0  # subtract off 1.0
13661         fbge.w          pos_res         # if pos, branch
13662         fmul.x          PLOG2UP1(%pc),%fp0      # if neg, mul by LOG2UP1
13663         fmov.l          %fp0,%d6        # put ILOG in d6 as a lword
13664         bra.b           A4_str          # go move out ILOG
13665 pos_res:
13666         fmul.x          PLOG2(%pc),%fp0 # if pos, mul by LOG2
13667         fmov.l          %fp0,%d6        # put ILOG in d6 as a lword
13668
13669
13670 # A4. Clr INEX bit.
13671 #     The operation in A3 above may have set INEX2.
13672
13673 A4_str:
13674         fmov.l          &0,%fpsr        # zero all of fpsr - nothing needed
13675
13676
13677 # A5. Set ICTR = 0;
13678 #     ICTR is a flag used in A13.  It must be set before the
13679 #     loop entry A6. The lower word of d5 is used for ICTR.
13680
13681         clr.w           %d5             # clear ICTR
13682
13683 # A6. Calculate LEN.
13684 #     LEN is the number of digits to be displayed.  The k-factor
13685 #     can dictate either the total number of digits, if it is
13686 #     a positive number, or the number of digits after the
13687 #     original decimal point which are to be included as
13688 #     significant.  See the 68882 manual for examples.
13689 #     If LEN is computed to be greater than 17, set OPERR in
13690 #     USER_FPSR.  LEN is stored in d4.
13691 #
13692 # Register usage:
13693 #       Input/Output
13694 #       d0: exponent/Unchanged
13695 #       d2: x/x/scratch
13696 #       d3: x/x
13697 #       d4: exc picture/LEN
13698 #       d5: ICTR/Unchanged
13699 #       d6: ILOG/Unchanged
13700 #       d7: k-factor/Unchanged
13701 #       a0: ptr for original operand/final result
13702 #       a1: x/x
13703 #       a2: x/x
13704 #       fp0: float(ILOG)/Unchanged
13705 #       fp1: x/x
13706 #       fp2: x/x
13707 #       F_SCR1:x/x
13708 #       F_SCR2:Abs(X) with $3fff exponent/Unchanged
13709 #       L_SCR1:x/x
13710 #       L_SCR2:first word of X packed/Unchanged
13711
13712 A6_str:
13713         tst.l           %d7             # branch on sign of k
13714         ble.b           k_neg           # if k <= 0, LEN = ILOG + 1 - k
13715         mov.l           %d7,%d4         # if k > 0, LEN = k
13716         bra.b           len_ck          # skip to LEN check
13717 k_neg:
13718         mov.l           %d6,%d4         # first load ILOG to d4
13719         sub.l           %d7,%d4         # subtract off k
13720         addq.l          &1,%d4          # add in the 1
13721 len_ck:
13722         tst.l           %d4             # LEN check: branch on sign of LEN
13723         ble.b           LEN_ng          # if neg, set LEN = 1
13724         cmp.l           %d4,&17         # test if LEN > 17
13725         ble.b           A7_str          # if not, forget it
13726         mov.l           &17,%d4         # set max LEN = 17
13727         tst.l           %d7             # if negative, never set OPERR
13728         ble.b           A7_str          # if positive, continue
13729         or.l            &opaop_mask,USER_FPSR(%a6)      # set OPERR & AIOP in USER_FPSR
13730         bra.b           A7_str          # finished here
13731 LEN_ng:
13732         mov.l           &1,%d4          # min LEN is 1
13733
13734
13735 # A7. Calculate SCALE.
13736 #     SCALE is equal to 10^ISCALE, where ISCALE is the number
13737 #     of decimal places needed to insure LEN integer digits
13738 #     in the output before conversion to bcd. LAMBDA is the sign
13739 #     of ISCALE, used in A9.  Fp1 contains 10^^(abs(ISCALE)) using
13740 #     the rounding mode as given in the following table (see
13741 #     Coonen, p. 7.23 as ref.; however, the SCALE variable is
13742 #     of opposite sign in bindec.sa from Coonen).
13743 #
13744 #       Initial                                 USE
13745 #       FPCR[6:5]       LAMBDA  SIGN(X)         FPCR[6:5]
13746 #       ----------------------------------------------
13747 #        RN     00         0       0            00/0    RN
13748 #        RN     00         0       1            00/0    RN
13749 #        RN     00         1       0            00/0    RN
13750 #        RN     00         1       1            00/0    RN
13751 #        RZ     01         0       0            11/3    RP
13752 #        RZ     01         0       1            11/3    RP
13753 #        RZ     01         1       0            10/2    RM
13754 #        RZ     01         1       1            10/2    RM
13755 #        RM     10         0       0            11/3    RP
13756 #        RM     10         0       1            10/2    RM
13757 #        RM     10         1       0            10/2    RM
13758 #        RM     10         1       1            11/3    RP
13759 #        RP     11         0       0            10/2    RM
13760 #        RP     11         0       1            11/3    RP
13761 #        RP     11         1       0            11/3    RP
13762 #        RP     11         1       1            10/2    RM
13763 #
13764 # Register usage:
13765 #       Input/Output
13766 #       d0: exponent/scratch - final is 0
13767 #       d2: x/0 or 24 for A9
13768 #       d3: x/scratch - offset ptr into PTENRM array
13769 #       d4: LEN/Unchanged
13770 #       d5: 0/ICTR:LAMBDA
13771 #       d6: ILOG/ILOG or k if ((k<=0)&(ILOG<k))
13772 #       d7: k-factor/Unchanged
13773 #       a0: ptr for original operand/final result
13774 #       a1: x/ptr to PTENRM array
13775 #       a2: x/x
13776 #       fp0: float(ILOG)/Unchanged
13777 #       fp1: x/10^ISCALE
13778 #       fp2: x/x
13779 #       F_SCR1:x/x
13780 #       F_SCR2:Abs(X) with $3fff exponent/Unchanged
13781 #       L_SCR1:x/x
13782 #       L_SCR2:first word of X packed/Unchanged
13783
13784 A7_str:
13785         tst.l           %d7             # test sign of k
13786         bgt.b           k_pos           # if pos and > 0, skip this
13787         cmp.l           %d7,%d6         # test k - ILOG
13788         blt.b           k_pos           # if ILOG >= k, skip this
13789         mov.l           %d7,%d6         # if ((k<0) & (ILOG < k)) ILOG = k
13790 k_pos:
13791         mov.l           %d6,%d0         # calc ILOG + 1 - LEN in d0
13792         addq.l          &1,%d0          # add the 1
13793         sub.l           %d4,%d0         # sub off LEN
13794         swap            %d5             # use upper word of d5 for LAMBDA
13795         clr.w           %d5             # set it zero initially
13796         clr.w           %d2             # set up d2 for very small case
13797         tst.l           %d0             # test sign of ISCALE
13798         bge.b           iscale          # if pos, skip next inst
13799         addq.w          &1,%d5          # if neg, set LAMBDA true
13800         cmp.l           %d0,&0xffffecd4 # test iscale <= -4908
13801         bgt.b           no_inf          # if false, skip rest
13802         add.l           &24,%d0         # add in 24 to iscale
13803         mov.l           &24,%d2         # put 24 in d2 for A9
13804 no_inf:
13805         neg.l           %d0             # and take abs of ISCALE
13806 iscale:
13807         fmov.s          FONE(%pc),%fp1  # init fp1 to 1
13808         bfextu          USER_FPCR(%a6){&26:&2},%d1      # get initial rmode bits
13809         lsl.w           &1,%d1          # put them in bits 2:1
13810         add.w           %d5,%d1         # add in LAMBDA
13811         lsl.w           &1,%d1          # put them in bits 3:1
13812         tst.l           L_SCR2(%a6)     # test sign of original x
13813         bge.b           x_pos           # if pos, don't set bit 0
13814         addq.l          &1,%d1          # if neg, set bit 0
13815 x_pos:
13816         lea.l           RBDTBL(%pc),%a2 # load rbdtbl base
13817         mov.b           (%a2,%d1),%d3   # load d3 with new rmode
13818         lsl.l           &4,%d3          # put bits in proper position
13819         fmov.l          %d3,%fpcr       # load bits into fpu
13820         lsr.l           &4,%d3          # put bits in proper position
13821         tst.b           %d3             # decode new rmode for pten table
13822         bne.b           not_rn          # if zero, it is RN
13823         lea.l           PTENRN(%pc),%a1 # load a1 with RN table base
13824         bra.b           rmode           # exit decode
13825 not_rn:
13826         lsr.b           &1,%d3          # get lsb in carry
13827         bcc.b           not_rp2         # if carry clear, it is RM
13828         lea.l           PTENRP(%pc),%a1 # load a1 with RP table base
13829         bra.b           rmode           # exit decode
13830 not_rp2:
13831         lea.l           PTENRM(%pc),%a1 # load a1 with RM table base
13832 rmode:
13833         clr.l           %d3             # clr table index
13834 e_loop2:
13835         lsr.l           &1,%d0          # shift next bit into carry
13836         bcc.b           e_next2         # if zero, skip the mul
13837         fmul.x          (%a1,%d3),%fp1  # mul by 10**(d3_bit_no)
13838 e_next2:
13839         add.l           &12,%d3         # inc d3 to next pwrten table entry
13840         tst.l           %d0             # test if ISCALE is zero
13841         bne.b           e_loop2         # if not, loop
13842
13843 # A8. Clr INEX; Force RZ.
13844 #     The operation in A3 above may have set INEX2.
13845 #     RZ mode is forced for the scaling operation to insure
13846 #     only one rounding error.  The grs bits are collected in
13847 #     the INEX flag for use in A10.
13848 #
13849 # Register usage:
13850 #       Input/Output
13851
13852         fmov.l          &0,%fpsr        # clr INEX
13853         fmov.l          &rz_mode*0x10,%fpcr     # set RZ rounding mode
13854
13855 # A9. Scale X -> Y.
13856 #     The mantissa is scaled to the desired number of significant
13857 #     digits.  The excess digits are collected in INEX2. If mul,
13858 #     Check d2 for excess 10 exponential value.  If not zero,
13859 #     the iscale value would have caused the pwrten calculation
13860 #     to overflow.  Only a negative iscale can cause this, so
13861 #     multiply by 10^(d2), which is now only allowed to be 24,
13862 #     with a multiply by 10^8 and 10^16, which is exact since
13863 #     10^24 is exact.  If the input was denormalized, we must
13864 #     create a busy stack frame with the mul command and the
13865 #     two operands, and allow the fpu to complete the multiply.
13866 #
13867 # Register usage:
13868 #       Input/Output
13869 #       d0: FPCR with RZ mode/Unchanged
13870 #       d2: 0 or 24/unchanged
13871 #       d3: x/x
13872 #       d4: LEN/Unchanged
13873 #       d5: ICTR:LAMBDA
13874 #       d6: ILOG/Unchanged
13875 #       d7: k-factor/Unchanged
13876 #       a0: ptr for original operand/final result
13877 #       a1: ptr to PTENRM array/Unchanged
13878 #       a2: x/x
13879 #       fp0: float(ILOG)/X adjusted for SCALE (Y)
13880 #       fp1: 10^ISCALE/Unchanged
13881 #       fp2: x/x
13882 #       F_SCR1:x/x
13883 #       F_SCR2:Abs(X) with $3fff exponent/Unchanged
13884 #       L_SCR1:x/x
13885 #       L_SCR2:first word of X packed/Unchanged
13886
13887 A9_str:
13888         fmov.x          (%a0),%fp0      # load X from memory
13889         fabs.x          %fp0            # use abs(X)
13890         tst.w           %d5             # LAMBDA is in lower word of d5
13891         bne.b           sc_mul          # if neg (LAMBDA = 1), scale by mul
13892         fdiv.x          %fp1,%fp0       # calculate X / SCALE -> Y to fp0
13893         bra.w           A10_st          # branch to A10
13894
13895 sc_mul:
13896         tst.b           BINDEC_FLG(%a6) # check for denorm
13897         beq.w           A9_norm         # if norm, continue with mul
13898
13899 # for DENORM, we must calculate:
13900 #       fp0 = input_op * 10^ISCALE * 10^24
13901 # since the input operand is a DENORM, we can't multiply it directly.
13902 # so, we do the multiplication of the exponents and mantissas separately.
13903 # in this way, we avoid underflow on intermediate stages of the
13904 # multiplication and guarantee a result without exception.
13905         fmovm.x         &0x2,-(%sp)     # save 10^ISCALE to stack
13906
13907         mov.w           (%sp),%d3       # grab exponent
13908         andi.w          &0x7fff,%d3     # clear sign
13909         ori.w           &0x8000,(%a0)   # make DENORM exp negative
13910         add.w           (%a0),%d3       # add DENORM exp to 10^ISCALE exp
13911         subi.w          &0x3fff,%d3     # subtract BIAS
13912         add.w           36(%a1),%d3
13913         subi.w          &0x3fff,%d3     # subtract BIAS
13914         add.w           48(%a1),%d3
13915         subi.w          &0x3fff,%d3     # subtract BIAS
13916
13917         bmi.w           sc_mul_err      # is result is DENORM, punt!!!
13918
13919         andi.w          &0x8000,(%sp)   # keep sign
13920         or.w            %d3,(%sp)       # insert new exponent
13921         andi.w          &0x7fff,(%a0)   # clear sign bit on DENORM again
13922         mov.l           0x8(%a0),-(%sp) # put input op mantissa on stk
13923         mov.l           0x4(%a0),-(%sp)
13924         mov.l           &0x3fff0000,-(%sp) # force exp to zero
13925         fmovm.x         (%sp)+,&0x80    # load normalized DENORM into fp0
13926         fmul.x          (%sp)+,%fp0
13927
13928 #       fmul.x  36(%a1),%fp0    # multiply fp0 by 10^8
13929 #       fmul.x  48(%a1),%fp0    # multiply fp0 by 10^16
13930         mov.l           36+8(%a1),-(%sp) # get 10^8 mantissa
13931         mov.l           36+4(%a1),-(%sp)
13932         mov.l           &0x3fff0000,-(%sp) # force exp to zero
13933         mov.l           48+8(%a1),-(%sp) # get 10^16 mantissa
13934         mov.l           48+4(%a1),-(%sp)
13935         mov.l           &0x3fff0000,-(%sp)# force exp to zero
13936         fmul.x          (%sp)+,%fp0     # multiply fp0 by 10^8
13937         fmul.x          (%sp)+,%fp0     # multiply fp0 by 10^16
13938         bra.b           A10_st
13939
13940 sc_mul_err:
13941         bra.b           sc_mul_err
13942
13943 A9_norm:
13944         tst.w           %d2             # test for small exp case
13945         beq.b           A9_con          # if zero, continue as normal
13946         fmul.x          36(%a1),%fp0    # multiply fp0 by 10^8
13947         fmul.x          48(%a1),%fp0    # multiply fp0 by 10^16
13948 A9_con:
13949         fmul.x          %fp1,%fp0       # calculate X * SCALE -> Y to fp0
13950
13951 # A10. Or in INEX.
13952 #      If INEX is set, round error occurred.  This is compensated
13953 #      for by 'or-ing' in the INEX2 flag to the lsb of Y.
13954 #
13955 # Register usage:
13956 #       Input/Output
13957 #       d0: FPCR with RZ mode/FPSR with INEX2 isolated
13958 #       d2: x/x
13959 #       d3: x/x
13960 #       d4: LEN/Unchanged
13961 #       d5: ICTR:LAMBDA
13962 #       d6: ILOG/Unchanged
13963 #       d7: k-factor/Unchanged
13964 #       a0: ptr for original operand/final result
13965 #       a1: ptr to PTENxx array/Unchanged
13966 #       a2: x/ptr to FP_SCR1(a6)
13967 #       fp0: Y/Y with lsb adjusted
13968 #       fp1: 10^ISCALE/Unchanged
13969 #       fp2: x/x
13970
13971 A10_st:
13972         fmov.l          %fpsr,%d0       # get FPSR
13973         fmov.x          %fp0,FP_SCR1(%a6)       # move Y to memory
13974         lea.l           FP_SCR1(%a6),%a2        # load a2 with ptr to FP_SCR1
13975         btst            &9,%d0          # check if INEX2 set
13976         beq.b           A11_st          # if clear, skip rest
13977         or.l            &1,8(%a2)       # or in 1 to lsb of mantissa
13978         fmov.x          FP_SCR1(%a6),%fp0       # write adjusted Y back to fpu
13979
13980
13981 # A11. Restore original FPCR; set size ext.
13982 #      Perform FINT operation in the user's rounding mode.  Keep
13983 #      the size to extended.  The sintdo entry point in the sint
13984 #      routine expects the FPCR value to be in USER_FPCR for
13985 #      mode and precision.  The original FPCR is saved in L_SCR1.
13986
13987 A11_st:
13988         mov.l           USER_FPCR(%a6),L_SCR1(%a6)      # save it for later
13989         and.l           &0x00000030,USER_FPCR(%a6)      # set size to ext,
13990 #                                       ;block exceptions
13991
13992
13993 # A12. Calculate YINT = FINT(Y) according to user's rounding mode.
13994 #      The FPSP routine sintd0 is used.  The output is in fp0.
13995 #
13996 # Register usage:
13997 #       Input/Output
13998 #       d0: FPSR with AINEX cleared/FPCR with size set to ext
13999 #       d2: x/x/scratch
14000 #       d3: x/x
14001 #       d4: LEN/Unchanged
14002 #       d5: ICTR:LAMBDA/Unchanged
14003 #       d6: ILOG/Unchanged
14004 #       d7: k-factor/Unchanged
14005 #       a0: ptr for original operand/src ptr for sintdo
14006 #       a1: ptr to PTENxx array/Unchanged
14007 #       a2: ptr to FP_SCR1(a6)/Unchanged
14008 #       a6: temp pointer to FP_SCR1(a6) - orig value saved and restored
14009 #       fp0: Y/YINT
14010 #       fp1: 10^ISCALE/Unchanged
14011 #       fp2: x/x
14012 #       F_SCR1:x/x
14013 #       F_SCR2:Y adjusted for inex/Y with original exponent
14014 #       L_SCR1:x/original USER_FPCR
14015 #       L_SCR2:first word of X packed/Unchanged
14016
14017 A12_st:
14018         movm.l  &0xc0c0,-(%sp)  # save regs used by sintd0       {%d0-%d1/%a0-%a1}
14019         mov.l   L_SCR1(%a6),-(%sp)
14020         mov.l   L_SCR2(%a6),-(%sp)
14021
14022         lea.l           FP_SCR1(%a6),%a0        # a0 is ptr to FP_SCR1(a6)
14023         fmov.x          %fp0,(%a0)      # move Y to memory at FP_SCR1(a6)
14024         tst.l           L_SCR2(%a6)     # test sign of original operand
14025         bge.b           do_fint12               # if pos, use Y
14026         or.l            &0x80000000,(%a0)       # if neg, use -Y
14027 do_fint12:
14028         mov.l   USER_FPSR(%a6),-(%sp)
14029 #       bsr     sintdo          # sint routine returns int in fp0
14030
14031         fmov.l  USER_FPCR(%a6),%fpcr
14032         fmov.l  &0x0,%fpsr                      # clear the AEXC bits!!!
14033 ##      mov.l           USER_FPCR(%a6),%d0      # ext prec/keep rnd mode
14034 ##      andi.l          &0x00000030,%d0
14035 ##      fmov.l          %d0,%fpcr
14036         fint.x          FP_SCR1(%a6),%fp0       # do fint()
14037         fmov.l  %fpsr,%d0
14038         or.w    %d0,FPSR_EXCEPT(%a6)
14039 ##      fmov.l          &0x0,%fpcr
14040 ##      fmov.l          %fpsr,%d0               # don't keep ccodes
14041 ##      or.w            %d0,FPSR_EXCEPT(%a6)
14042
14043         mov.b   (%sp),USER_FPSR(%a6)
14044         add.l   &4,%sp
14045
14046         mov.l   (%sp)+,L_SCR2(%a6)
14047         mov.l   (%sp)+,L_SCR1(%a6)
14048         movm.l  (%sp)+,&0x303   # restore regs used by sint      {%d0-%d1/%a0-%a1}
14049
14050         mov.l   L_SCR2(%a6),FP_SCR1(%a6)        # restore original exponent
14051         mov.l   L_SCR1(%a6),USER_FPCR(%a6)      # restore user's FPCR
14052
14053 # A13. Check for LEN digits.
14054 #      If the int operation results in more than LEN digits,
14055 #      or less than LEN -1 digits, adjust ILOG and repeat from
14056 #      A6.  This test occurs only on the first pass.  If the
14057 #      result is exactly 10^LEN, decrement ILOG and divide
14058 #      the mantissa by 10.  The calculation of 10^LEN cannot
14059 #      be inexact, since all powers of ten upto 10^27 are exact
14060 #      in extended precision, so the use of a previous power-of-ten
14061 #      table will introduce no error.
14062 #
14063 #
14064 # Register usage:
14065 #       Input/Output
14066 #       d0: FPCR with size set to ext/scratch final = 0
14067 #       d2: x/x
14068 #       d3: x/scratch final = x
14069 #       d4: LEN/LEN adjusted
14070 #       d5: ICTR:LAMBDA/LAMBDA:ICTR
14071 #       d6: ILOG/ILOG adjusted
14072 #       d7: k-factor/Unchanged
14073 #       a0: pointer into memory for packed bcd string formation
14074 #       a1: ptr to PTENxx array/Unchanged
14075 #       a2: ptr to FP_SCR1(a6)/Unchanged
14076 #       fp0: int portion of Y/abs(YINT) adjusted
14077 #       fp1: 10^ISCALE/Unchanged
14078 #       fp2: x/10^LEN
14079 #       F_SCR1:x/x
14080 #       F_SCR2:Y with original exponent/Unchanged
14081 #       L_SCR1:original USER_FPCR/Unchanged
14082 #       L_SCR2:first word of X packed/Unchanged
14083
14084 A13_st:
14085         swap            %d5             # put ICTR in lower word of d5
14086         tst.w           %d5             # check if ICTR = 0
14087         bne             not_zr          # if non-zero, go to second test
14088 #
14089 # Compute 10^(LEN-1)
14090 #
14091         fmov.s          FONE(%pc),%fp2  # init fp2 to 1.0
14092         mov.l           %d4,%d0         # put LEN in d0
14093         subq.l          &1,%d0          # d0 = LEN -1
14094         clr.l           %d3             # clr table index
14095 l_loop:
14096         lsr.l           &1,%d0          # shift next bit into carry
14097         bcc.b           l_next          # if zero, skip the mul
14098         fmul.x          (%a1,%d3),%fp2  # mul by 10**(d3_bit_no)
14099 l_next:
14100         add.l           &12,%d3         # inc d3 to next pwrten table entry
14101         tst.l           %d0             # test if LEN is zero
14102         bne.b           l_loop          # if not, loop
14103 #
14104 # 10^LEN-1 is computed for this test and A14.  If the input was
14105 # denormalized, check only the case in which YINT > 10^LEN.
14106 #
14107         tst.b           BINDEC_FLG(%a6) # check if input was norm
14108         beq.b           A13_con         # if norm, continue with checking
14109         fabs.x          %fp0            # take abs of YINT
14110         bra             test_2
14111 #
14112 # Compare abs(YINT) to 10^(LEN-1) and 10^LEN
14113 #
14114 A13_con:
14115         fabs.x          %fp0            # take abs of YINT
14116         fcmp.x          %fp0,%fp2       # compare abs(YINT) with 10^(LEN-1)
14117         fbge.w          test_2          # if greater, do next test
14118         subq.l          &1,%d6          # subtract 1 from ILOG
14119         mov.w           &1,%d5          # set ICTR
14120         fmov.l          &rm_mode*0x10,%fpcr     # set rmode to RM
14121         fmul.s          FTEN(%pc),%fp2  # compute 10^LEN
14122         bra.w           A6_str          # return to A6 and recompute YINT
14123 test_2:
14124         fmul.s          FTEN(%pc),%fp2  # compute 10^LEN
14125         fcmp.x          %fp0,%fp2       # compare abs(YINT) with 10^LEN
14126         fblt.w          A14_st          # if less, all is ok, go to A14
14127         fbgt.w          fix_ex          # if greater, fix and redo
14128         fdiv.s          FTEN(%pc),%fp0  # if equal, divide by 10
14129         addq.l          &1,%d6          # and inc ILOG
14130         bra.b           A14_st          # and continue elsewhere
14131 fix_ex:
14132         addq.l          &1,%d6          # increment ILOG by 1
14133         mov.w           &1,%d5          # set ICTR
14134         fmov.l          &rm_mode*0x10,%fpcr     # set rmode to RM
14135         bra.w           A6_str          # return to A6 and recompute YINT
14136 #
14137 # Since ICTR <> 0, we have already been through one adjustment,
14138 # and shouldn't have another; this is to check if abs(YINT) = 10^LEN
14139 # 10^LEN is again computed using whatever table is in a1 since the
14140 # value calculated cannot be inexact.
14141 #
14142 not_zr:
14143         fmov.s          FONE(%pc),%fp2  # init fp2 to 1.0
14144         mov.l           %d4,%d0         # put LEN in d0
14145         clr.l           %d3             # clr table index
14146 z_loop:
14147         lsr.l           &1,%d0          # shift next bit into carry
14148         bcc.b           z_next          # if zero, skip the mul
14149         fmul.x          (%a1,%d3),%fp2  # mul by 10**(d3_bit_no)
14150 z_next:
14151         add.l           &12,%d3         # inc d3 to next pwrten table entry
14152         tst.l           %d0             # test if LEN is zero
14153         bne.b           z_loop          # if not, loop
14154         fabs.x          %fp0            # get abs(YINT)
14155         fcmp.x          %fp0,%fp2       # check if abs(YINT) = 10^LEN
14156         fbneq.w         A14_st          # if not, skip this
14157         fdiv.s          FTEN(%pc),%fp0  # divide abs(YINT) by 10
14158         addq.l          &1,%d6          # and inc ILOG by 1
14159         addq.l          &1,%d4          # and inc LEN
14160         fmul.s          FTEN(%pc),%fp2  # if LEN++, the get 10^^LEN
14161
14162 # A14. Convert the mantissa to bcd.
14163 #      The binstr routine is used to convert the LEN digit
14164 #      mantissa to bcd in memory.  The input to binstr is
14165 #      to be a fraction; i.e. (mantissa)/10^LEN and adjusted
14166 #      such that the decimal point is to the left of bit 63.
14167 #      The bcd digits are stored in the correct position in
14168 #      the final string area in memory.
14169 #
14170 #
14171 # Register usage:
14172 #       Input/Output
14173 #       d0: x/LEN call to binstr - final is 0
14174 #       d1: x/0
14175 #       d2: x/ms 32-bits of mant of abs(YINT)
14176 #       d3: x/ls 32-bits of mant of abs(YINT)
14177 #       d4: LEN/Unchanged
14178 #       d5: ICTR:LAMBDA/LAMBDA:ICTR
14179 #       d6: ILOG
14180 #       d7: k-factor/Unchanged
14181 #       a0: pointer into memory for packed bcd string formation
14182 #           /ptr to first mantissa byte in result string
14183 #       a1: ptr to PTENxx array/Unchanged
14184 #       a2: ptr to FP_SCR1(a6)/Unchanged
14185 #       fp0: int portion of Y/abs(YINT) adjusted
14186 #       fp1: 10^ISCALE/Unchanged
14187 #       fp2: 10^LEN/Unchanged
14188 #       F_SCR1:x/Work area for final result
14189 #       F_SCR2:Y with original exponent/Unchanged
14190 #       L_SCR1:original USER_FPCR/Unchanged
14191 #       L_SCR2:first word of X packed/Unchanged
14192
14193 A14_st:
14194         fmov.l          &rz_mode*0x10,%fpcr     # force rz for conversion
14195         fdiv.x          %fp2,%fp0       # divide abs(YINT) by 10^LEN
14196         lea.l           FP_SCR0(%a6),%a0
14197         fmov.x          %fp0,(%a0)      # move abs(YINT)/10^LEN to memory
14198         mov.l           4(%a0),%d2      # move 2nd word of FP_RES to d2
14199         mov.l           8(%a0),%d3      # move 3rd word of FP_RES to d3
14200         clr.l           4(%a0)          # zero word 2 of FP_RES
14201         clr.l           8(%a0)          # zero word 3 of FP_RES
14202         mov.l           (%a0),%d0       # move exponent to d0
14203         swap            %d0             # put exponent in lower word
14204         beq.b           no_sft          # if zero, don't shift
14205         sub.l           &0x3ffd,%d0     # sub bias less 2 to make fract
14206         tst.l           %d0             # check if > 1
14207         bgt.b           no_sft          # if so, don't shift
14208         neg.l           %d0             # make exp positive
14209 m_loop:
14210         lsr.l           &1,%d2          # shift d2:d3 right, add 0s
14211         roxr.l          &1,%d3          # the number of places
14212         dbf.w           %d0,m_loop      # given in d0
14213 no_sft:
14214         tst.l           %d2             # check for mantissa of zero
14215         bne.b           no_zr           # if not, go on
14216         tst.l           %d3             # continue zero check
14217         beq.b           zer_m           # if zero, go directly to binstr
14218 no_zr:
14219         clr.l           %d1             # put zero in d1 for addx
14220         add.l           &0x00000080,%d3 # inc at bit 7
14221         addx.l          %d1,%d2         # continue inc
14222         and.l           &0xffffff80,%d3 # strip off lsb not used by 882
14223 zer_m:
14224         mov.l           %d4,%d0         # put LEN in d0 for binstr call
14225         addq.l          &3,%a0          # a0 points to M16 byte in result
14226         bsr             binstr          # call binstr to convert mant
14227
14228
14229 # A15. Convert the exponent to bcd.
14230 #      As in A14 above, the exp is converted to bcd and the
14231 #      digits are stored in the final string.
14232 #
14233 #      Digits are stored in L_SCR1(a6) on return from BINDEC as:
14234 #
14235 #        32               16 15                0
14236 #       -----------------------------------------
14237 #       |  0 | e3 | e2 | e1 | e4 |  X |  X |  X |
14238 #       -----------------------------------------
14239 #
14240 # And are moved into their proper places in FP_SCR0.  If digit e4
14241 # is non-zero, OPERR is signaled.  In all cases, all 4 digits are
14242 # written as specified in the 881/882 manual for packed decimal.
14243 #
14244 # Register usage:
14245 #       Input/Output
14246 #       d0: x/LEN call to binstr - final is 0
14247 #       d1: x/scratch (0);shift count for final exponent packing
14248 #       d2: x/ms 32-bits of exp fraction/scratch
14249 #       d3: x/ls 32-bits of exp fraction
14250 #       d4: LEN/Unchanged
14251 #       d5: ICTR:LAMBDA/LAMBDA:ICTR
14252 #       d6: ILOG
14253 #       d7: k-factor/Unchanged
14254 #       a0: ptr to result string/ptr to L_SCR1(a6)
14255 #       a1: ptr to PTENxx array/Unchanged
14256 #       a2: ptr to FP_SCR1(a6)/Unchanged
14257 #       fp0: abs(YINT) adjusted/float(ILOG)
14258 #       fp1: 10^ISCALE/Unchanged
14259 #       fp2: 10^LEN/Unchanged
14260 #       F_SCR1:Work area for final result/BCD result
14261 #       F_SCR2:Y with original exponent/ILOG/10^4
14262 #       L_SCR1:original USER_FPCR/Exponent digits on return from binstr
14263 #       L_SCR2:first word of X packed/Unchanged
14264
14265 A15_st:
14266         tst.b           BINDEC_FLG(%a6) # check for denorm
14267         beq.b           not_denorm
14268         ftest.x         %fp0            # test for zero
14269         fbeq.w          den_zero        # if zero, use k-factor or 4933
14270         fmov.l          %d6,%fp0        # float ILOG
14271         fabs.x          %fp0            # get abs of ILOG
14272         bra.b           convrt
14273 den_zero:
14274         tst.l           %d7             # check sign of the k-factor
14275         blt.b           use_ilog        # if negative, use ILOG
14276         fmov.s          F4933(%pc),%fp0 # force exponent to 4933
14277         bra.b           convrt          # do it
14278 use_ilog:
14279         fmov.l          %d6,%fp0        # float ILOG
14280         fabs.x          %fp0            # get abs of ILOG
14281         bra.b           convrt
14282 not_denorm:
14283         ftest.x         %fp0            # test for zero
14284         fbneq.w         not_zero        # if zero, force exponent
14285         fmov.s          FONE(%pc),%fp0  # force exponent to 1
14286         bra.b           convrt          # do it
14287 not_zero:
14288         fmov.l          %d6,%fp0        # float ILOG
14289         fabs.x          %fp0            # get abs of ILOG
14290 convrt:
14291         fdiv.x          24(%a1),%fp0    # compute ILOG/10^4
14292         fmov.x          %fp0,FP_SCR1(%a6)       # store fp0 in memory
14293         mov.l           4(%a2),%d2      # move word 2 to d2
14294         mov.l           8(%a2),%d3      # move word 3 to d3
14295         mov.w           (%a2),%d0       # move exp to d0
14296         beq.b           x_loop_fin      # if zero, skip the shift
14297         sub.w           &0x3ffd,%d0     # subtract off bias
14298         neg.w           %d0             # make exp positive
14299 x_loop:
14300         lsr.l           &1,%d2          # shift d2:d3 right
14301         roxr.l          &1,%d3          # the number of places
14302         dbf.w           %d0,x_loop      # given in d0
14303 x_loop_fin:
14304         clr.l           %d1             # put zero in d1 for addx
14305         add.l           &0x00000080,%d3 # inc at bit 6
14306         addx.l          %d1,%d2         # continue inc
14307         and.l           &0xffffff80,%d3 # strip off lsb not used by 882
14308         mov.l           &4,%d0          # put 4 in d0 for binstr call
14309         lea.l           L_SCR1(%a6),%a0 # a0 is ptr to L_SCR1 for exp digits
14310         bsr             binstr          # call binstr to convert exp
14311         mov.l           L_SCR1(%a6),%d0 # load L_SCR1 lword to d0
14312         mov.l           &12,%d1         # use d1 for shift count
14313         lsr.l           %d1,%d0         # shift d0 right by 12
14314         bfins           %d0,FP_SCR0(%a6){&4:&12}        # put e3:e2:e1 in FP_SCR0
14315         lsr.l           %d1,%d0         # shift d0 right by 12
14316         bfins           %d0,FP_SCR0(%a6){&16:&4}        # put e4 in FP_SCR0
14317         tst.b           %d0             # check if e4 is zero
14318         beq.b           A16_st          # if zero, skip rest
14319         or.l            &opaop_mask,USER_FPSR(%a6)      # set OPERR & AIOP in USER_FPSR
14320
14321
14322 # A16. Write sign bits to final string.
14323 #          Sigma is bit 31 of initial value; RHO is bit 31 of d6 (ILOG).
14324 #
14325 # Register usage:
14326 #       Input/Output
14327 #       d0: x/scratch - final is x
14328 #       d2: x/x
14329 #       d3: x/x
14330 #       d4: LEN/Unchanged
14331 #       d5: ICTR:LAMBDA/LAMBDA:ICTR
14332 #       d6: ILOG/ILOG adjusted
14333 #       d7: k-factor/Unchanged
14334 #       a0: ptr to L_SCR1(a6)/Unchanged
14335 #       a1: ptr to PTENxx array/Unchanged
14336 #       a2: ptr to FP_SCR1(a6)/Unchanged
14337 #       fp0: float(ILOG)/Unchanged
14338 #       fp1: 10^ISCALE/Unchanged
14339 #       fp2: 10^LEN/Unchanged
14340 #       F_SCR1:BCD result with correct signs
14341 #       F_SCR2:ILOG/10^4
14342 #       L_SCR1:Exponent digits on return from binstr
14343 #       L_SCR2:first word of X packed/Unchanged
14344
14345 A16_st:
14346         clr.l           %d0             # clr d0 for collection of signs
14347         and.b           &0x0f,FP_SCR0(%a6)      # clear first nibble of FP_SCR0
14348         tst.l           L_SCR2(%a6)     # check sign of original mantissa
14349         bge.b           mant_p          # if pos, don't set SM
14350         mov.l           &2,%d0          # move 2 in to d0 for SM
14351 mant_p:
14352         tst.l           %d6             # check sign of ILOG
14353         bge.b           wr_sgn          # if pos, don't set SE
14354         addq.l          &1,%d0          # set bit 0 in d0 for SE
14355 wr_sgn:
14356         bfins           %d0,FP_SCR0(%a6){&0:&2} # insert SM and SE into FP_SCR0
14357
14358 # Clean up and restore all registers used.
14359
14360         fmov.l          &0,%fpsr        # clear possible inex2/ainex bits
14361         fmovm.x         (%sp)+,&0xe0    #  {%fp0-%fp2}
14362         movm.l          (%sp)+,&0x4fc   #  {%d2-%d7/%a2}
14363         rts
14364
14365         global          PTENRN
14366 PTENRN:
14367         long            0x40020000,0xA0000000,0x00000000        # 10 ^ 1
14368         long            0x40050000,0xC8000000,0x00000000        # 10 ^ 2
14369         long            0x400C0000,0x9C400000,0x00000000        # 10 ^ 4
14370         long            0x40190000,0xBEBC2000,0x00000000        # 10 ^ 8
14371         long            0x40340000,0x8E1BC9BF,0x04000000        # 10 ^ 16
14372         long            0x40690000,0x9DC5ADA8,0x2B70B59E        # 10 ^ 32
14373         long            0x40D30000,0xC2781F49,0xFFCFA6D5        # 10 ^ 64
14374         long            0x41A80000,0x93BA47C9,0x80E98CE0        # 10 ^ 128
14375         long            0x43510000,0xAA7EEBFB,0x9DF9DE8E        # 10 ^ 256
14376         long            0x46A30000,0xE319A0AE,0xA60E91C7        # 10 ^ 512
14377         long            0x4D480000,0xC9767586,0x81750C17        # 10 ^ 1024
14378         long            0x5A920000,0x9E8B3B5D,0xC53D5DE5        # 10 ^ 2048
14379         long            0x75250000,0xC4605202,0x8A20979B        # 10 ^ 4096
14380
14381         global          PTENRP
14382 PTENRP:
14383         long            0x40020000,0xA0000000,0x00000000        # 10 ^ 1
14384         long            0x40050000,0xC8000000,0x00000000        # 10 ^ 2
14385         long            0x400C0000,0x9C400000,0x00000000        # 10 ^ 4
14386         long            0x40190000,0xBEBC2000,0x00000000        # 10 ^ 8
14387         long            0x40340000,0x8E1BC9BF,0x04000000        # 10 ^ 16
14388         long            0x40690000,0x9DC5ADA8,0x2B70B59E        # 10 ^ 32
14389         long            0x40D30000,0xC2781F49,0xFFCFA6D6        # 10 ^ 64
14390         long            0x41A80000,0x93BA47C9,0x80E98CE0        # 10 ^ 128
14391         long            0x43510000,0xAA7EEBFB,0x9DF9DE8E        # 10 ^ 256
14392         long            0x46A30000,0xE319A0AE,0xA60E91C7        # 10 ^ 512
14393         long            0x4D480000,0xC9767586,0x81750C18        # 10 ^ 1024
14394         long            0x5A920000,0x9E8B3B5D,0xC53D5DE5        # 10 ^ 2048
14395         long            0x75250000,0xC4605202,0x8A20979B        # 10 ^ 4096
14396
14397         global          PTENRM
14398 PTENRM:
14399         long            0x40020000,0xA0000000,0x00000000        # 10 ^ 1
14400         long            0x40050000,0xC8000000,0x00000000        # 10 ^ 2
14401         long            0x400C0000,0x9C400000,0x00000000        # 10 ^ 4
14402         long            0x40190000,0xBEBC2000,0x00000000        # 10 ^ 8
14403         long            0x40340000,0x8E1BC9BF,0x04000000        # 10 ^ 16
14404         long            0x40690000,0x9DC5ADA8,0x2B70B59D        # 10 ^ 32
14405         long            0x40D30000,0xC2781F49,0xFFCFA6D5        # 10 ^ 64
14406         long            0x41A80000,0x93BA47C9,0x80E98CDF        # 10 ^ 128
14407         long            0x43510000,0xAA7EEBFB,0x9DF9DE8D        # 10 ^ 256
14408         long            0x46A30000,0xE319A0AE,0xA60E91C6        # 10 ^ 512
14409         long            0x4D480000,0xC9767586,0x81750C17        # 10 ^ 1024
14410         long            0x5A920000,0x9E8B3B5D,0xC53D5DE4        # 10 ^ 2048
14411         long            0x75250000,0xC4605202,0x8A20979A        # 10 ^ 4096
14412
14413 #########################################################################
14414 # binstr(): Converts a 64-bit binary integer to bcd.                    #
14415 #                                                                       #
14416 # INPUT *************************************************************** #
14417 #       d2:d3 = 64-bit binary integer                                   #
14418 #       d0    = desired length (LEN)                                    #
14419 #       a0    = pointer to start in memory for bcd characters           #
14420 #               (This pointer must point to byte 4 of the first         #
14421 #                lword of the packed decimal memory string.)            #
14422 #                                                                       #
14423 # OUTPUT ************************************************************** #
14424 #       a0 = pointer to LEN bcd digits representing the 64-bit integer. #
14425 #                                                                       #
14426 # ALGORITHM *********************************************************** #
14427 #       The 64-bit binary is assumed to have a decimal point before     #
14428 #       bit 63.  The fraction is multiplied by 10 using a mul by 2      #
14429 #       shift and a mul by 8 shift.  The bits shifted out of the        #
14430 #       msb form a decimal digit.  This process is iterated until       #
14431 #       LEN digits are formed.                                          #
14432 #                                                                       #
14433 # A1. Init d7 to 1.  D7 is the byte digit counter, and if 1, the        #
14434 #     digit formed will be assumed the least significant.  This is      #
14435 #     to force the first byte formed to have a 0 in the upper 4 bits.   #
14436 #                                                                       #
14437 # A2. Beginning of the loop:                                            #
14438 #     Copy the fraction in d2:d3 to d4:d5.                              #
14439 #                                                                       #
14440 # A3. Multiply the fraction in d2:d3 by 8 using bit-field               #
14441 #     extracts and shifts.  The three msbs from d2 will go into d1.     #
14442 #                                                                       #
14443 # A4. Multiply the fraction in d4:d5 by 2 using shifts.  The msb        #
14444 #     will be collected by the carry.                                   #
14445 #                                                                       #
14446 # A5. Add using the carry the 64-bit quantities in d2:d3 and d4:d5      #
14447 #     into d2:d3.  D1 will contain the bcd digit formed.                #
14448 #                                                                       #
14449 # A6. Test d7.  If zero, the digit formed is the ms digit.  If non-     #
14450 #     zero, it is the ls digit.  Put the digit in its place in the      #
14451 #     upper word of d0.  If it is the ls digit, write the word          #
14452 #     from d0 to memory.                                                #
14453 #                                                                       #
14454 # A7. Decrement d6 (LEN counter) and repeat the loop until zero.        #
14455 #                                                                       #
14456 #########################################################################
14457
14458 #       Implementation Notes:
14459 #
14460 #       The registers are used as follows:
14461 #
14462 #               d0: LEN counter
14463 #               d1: temp used to form the digit
14464 #               d2: upper 32-bits of fraction for mul by 8
14465 #               d3: lower 32-bits of fraction for mul by 8
14466 #               d4: upper 32-bits of fraction for mul by 2
14467 #               d5: lower 32-bits of fraction for mul by 2
14468 #               d6: temp for bit-field extracts
14469 #               d7: byte digit formation word;digit count {0,1}
14470 #               a0: pointer into memory for packed bcd string formation
14471 #
14472
14473         global          binstr
14474 binstr:
14475         movm.l          &0xff00,-(%sp)  #  {%d0-%d7}
14476
14477 #
14478 # A1: Init d7
14479 #
14480         mov.l           &1,%d7          # init d7 for second digit
14481         subq.l          &1,%d0          # for dbf d0 would have LEN+1 passes
14482 #
14483 # A2. Copy d2:d3 to d4:d5.  Start loop.
14484 #
14485 loop:
14486         mov.l           %d2,%d4         # copy the fraction before muls
14487         mov.l           %d3,%d5         # to d4:d5
14488 #
14489 # A3. Multiply d2:d3 by 8; extract msbs into d1.
14490 #
14491         bfextu          %d2{&0:&3},%d1  # copy 3 msbs of d2 into d1
14492         asl.l           &3,%d2          # shift d2 left by 3 places
14493         bfextu          %d3{&0:&3},%d6  # copy 3 msbs of d3 into d6
14494         asl.l           &3,%d3          # shift d3 left by 3 places
14495         or.l            %d6,%d2         # or in msbs from d3 into d2
14496 #
14497 # A4. Multiply d4:d5 by 2; add carry out to d1.
14498 #
14499         asl.l           &1,%d5          # mul d5 by 2
14500         roxl.l          &1,%d4          # mul d4 by 2
14501         swap            %d6             # put 0 in d6 lower word
14502         addx.w          %d6,%d1         # add in extend from mul by 2
14503 #
14504 # A5. Add mul by 8 to mul by 2.  D1 contains the digit formed.
14505 #
14506         add.l           %d5,%d3         # add lower 32 bits
14507         nop                             # ERRATA FIX #13 (Rev. 1.2 6/6/90)
14508         addx.l          %d4,%d2         # add with extend upper 32 bits
14509         nop                             # ERRATA FIX #13 (Rev. 1.2 6/6/90)
14510         addx.w          %d6,%d1         # add in extend from add to d1
14511         swap            %d6             # with d6 = 0; put 0 in upper word
14512 #
14513 # A6. Test d7 and branch.
14514 #
14515         tst.w           %d7             # if zero, store digit & to loop
14516         beq.b           first_d         # if non-zero, form byte & write
14517 sec_d:
14518         swap            %d7             # bring first digit to word d7b
14519         asl.w           &4,%d7          # first digit in upper 4 bits d7b
14520         add.w           %d1,%d7         # add in ls digit to d7b
14521         mov.b           %d7,(%a0)+      # store d7b byte in memory
14522         swap            %d7             # put LEN counter in word d7a
14523         clr.w           %d7             # set d7a to signal no digits done
14524         dbf.w           %d0,loop        # do loop some more!
14525         bra.b           end_bstr        # finished, so exit
14526 first_d:
14527         swap            %d7             # put digit word in d7b
14528         mov.w           %d1,%d7         # put new digit in d7b
14529         swap            %d7             # put LEN counter in word d7a
14530         addq.w          &1,%d7          # set d7a to signal first digit done
14531         dbf.w           %d0,loop        # do loop some more!
14532         swap            %d7             # put last digit in string
14533         lsl.w           &4,%d7          # move it to upper 4 bits
14534         mov.b           %d7,(%a0)+      # store it in memory string
14535 #
14536 # Clean up and return with result in fp0.
14537 #
14538 end_bstr:
14539         movm.l          (%sp)+,&0xff    #  {%d0-%d7}
14540         rts
14541
14542 #########################################################################
14543 # XDEF **************************************************************** #
14544 #       facc_in_b(): dmem_read_byte failed                              #
14545 #       facc_in_w(): dmem_read_word failed                              #
14546 #       facc_in_l(): dmem_read_long failed                              #
14547 #       facc_in_d(): dmem_read of dbl prec failed                       #
14548 #       facc_in_x(): dmem_read of ext prec failed                       #
14549 #                                                                       #
14550 #       facc_out_b(): dmem_write_byte failed                            #
14551 #       facc_out_w(): dmem_write_word failed                            #
14552 #       facc_out_l(): dmem_write_long failed                            #
14553 #       facc_out_d(): dmem_write of dbl prec failed                     #
14554 #       facc_out_x(): dmem_write of ext prec failed                     #
14555 #                                                                       #
14556 # XREF **************************************************************** #
14557 #       _real_access() - exit through access error handler              #
14558 #                                                                       #
14559 # INPUT *************************************************************** #
14560 #       None                                                            #
14561 #                                                                       #
14562 # OUTPUT ************************************************************** #
14563 #       None                                                            #
14564 #                                                                       #
14565 # ALGORITHM *********************************************************** #
14566 #       Flow jumps here when an FP data fetch call gets an error        #
14567 # result. This means the operating system wants an access error frame   #
14568 # made out of the current exception stack frame.                        #
14569 #       So, we first call restore() which makes sure that any updated   #
14570 # -(an)+ register gets returned to its pre-exception value and then     #
14571 # we change the stack to an access error stack frame.                   #
14572 #                                                                       #
14573 #########################################################################
14574
14575 facc_in_b:
14576         movq.l          &0x1,%d0                        # one byte
14577         bsr.w           restore                         # fix An
14578
14579         mov.w           &0x0121,EXC_VOFF(%a6)           # set FSLW
14580         bra.w           facc_finish
14581
14582 facc_in_w:
14583         movq.l          &0x2,%d0                        # two bytes
14584         bsr.w           restore                         # fix An
14585
14586         mov.w           &0x0141,EXC_VOFF(%a6)           # set FSLW
14587         bra.b           facc_finish
14588
14589 facc_in_l:
14590         movq.l          &0x4,%d0                        # four bytes
14591         bsr.w           restore                         # fix An
14592
14593         mov.w           &0x0101,EXC_VOFF(%a6)           # set FSLW
14594         bra.b           facc_finish
14595
14596 facc_in_d:
14597         movq.l          &0x8,%d0                        # eight bytes
14598         bsr.w           restore                         # fix An
14599
14600         mov.w           &0x0161,EXC_VOFF(%a6)           # set FSLW
14601         bra.b           facc_finish
14602
14603 facc_in_x:
14604         movq.l          &0xc,%d0                        # twelve bytes
14605         bsr.w           restore                         # fix An
14606
14607         mov.w           &0x0161,EXC_VOFF(%a6)           # set FSLW
14608         bra.b           facc_finish
14609
14610 ################################################################
14611
14612 facc_out_b:
14613         movq.l          &0x1,%d0                        # one byte
14614         bsr.w           restore                         # restore An
14615
14616         mov.w           &0x00a1,EXC_VOFF(%a6)           # set FSLW
14617         bra.b           facc_finish
14618
14619 facc_out_w:
14620         movq.l          &0x2,%d0                        # two bytes
14621         bsr.w           restore                         # restore An
14622
14623         mov.w           &0x00c1,EXC_VOFF(%a6)           # set FSLW
14624         bra.b           facc_finish
14625
14626 facc_out_l:
14627         movq.l          &0x4,%d0                        # four bytes
14628         bsr.w           restore                         # restore An
14629
14630         mov.w           &0x0081,EXC_VOFF(%a6)           # set FSLW
14631         bra.b           facc_finish
14632
14633 facc_out_d:
14634         movq.l          &0x8,%d0                        # eight bytes
14635         bsr.w           restore                         # restore An
14636
14637         mov.w           &0x00e1,EXC_VOFF(%a6)           # set FSLW
14638         bra.b           facc_finish
14639
14640 facc_out_x:
14641         mov.l           &0xc,%d0                        # twelve bytes
14642         bsr.w           restore                         # restore An
14643
14644         mov.w           &0x00e1,EXC_VOFF(%a6)           # set FSLW
14645
14646 # here's where we actually create the access error frame from the
14647 # current exception stack frame.
14648 facc_finish:
14649         mov.l           USER_FPIAR(%a6),EXC_PC(%a6) # store current PC
14650
14651         fmovm.x         EXC_FPREGS(%a6),&0xc0   # restore fp0-fp1
14652         fmovm.l         USER_FPCR(%a6),%fpcr,%fpsr,%fpiar # restore ctrl regs
14653         movm.l          EXC_DREGS(%a6),&0x0303  # restore d0-d1/a0-a1
14654
14655         unlk            %a6
14656
14657         mov.l           (%sp),-(%sp)            # store SR, hi(PC)
14658         mov.l           0x8(%sp),0x4(%sp)       # store lo(PC)
14659         mov.l           0xc(%sp),0x8(%sp)       # store EA
14660         mov.l           &0x00000001,0xc(%sp)    # store FSLW
14661         mov.w           0x6(%sp),0xc(%sp)       # fix FSLW (size)
14662         mov.w           &0x4008,0x6(%sp)        # store voff
14663
14664         btst            &0x5,(%sp)              # supervisor or user mode?
14665         beq.b           facc_out2               # user
14666         bset            &0x2,0xd(%sp)           # set supervisor TM bit
14667
14668 facc_out2:
14669         bra.l           _real_access
14670
14671 ##################################################################
14672
14673 # if the effective addressing mode was predecrement or postincrement,
14674 # the emulation has already changed its value to the correct post-
14675 # instruction value. but since we're exiting to the access error
14676 # handler, then AN must be returned to its pre-instruction value.
14677 # we do that here.
14678 restore:
14679         mov.b           EXC_OPWORD+0x1(%a6),%d1
14680         andi.b          &0x38,%d1               # extract opmode
14681         cmpi.b          %d1,&0x18               # postinc?
14682         beq.w           rest_inc
14683         cmpi.b          %d1,&0x20               # predec?
14684         beq.w           rest_dec
14685         rts
14686
14687 rest_inc:
14688         mov.b           EXC_OPWORD+0x1(%a6),%d1
14689         andi.w          &0x0007,%d1             # fetch An
14690
14691         mov.w           (tbl_rest_inc.b,%pc,%d1.w*2),%d1
14692         jmp             (tbl_rest_inc.b,%pc,%d1.w*1)
14693
14694 tbl_rest_inc:
14695         short           ri_a0 - tbl_rest_inc
14696         short           ri_a1 - tbl_rest_inc
14697         short           ri_a2 - tbl_rest_inc
14698         short           ri_a3 - tbl_rest_inc
14699         short           ri_a4 - tbl_rest_inc
14700         short           ri_a5 - tbl_rest_inc
14701         short           ri_a6 - tbl_rest_inc
14702         short           ri_a7 - tbl_rest_inc
14703
14704 ri_a0:
14705         sub.l           %d0,EXC_DREGS+0x8(%a6)  # fix stacked a0
14706         rts
14707 ri_a1:
14708         sub.l           %d0,EXC_DREGS+0xc(%a6)  # fix stacked a1
14709         rts
14710 ri_a2:
14711         sub.l           %d0,%a2                 # fix a2
14712         rts
14713 ri_a3:
14714         sub.l           %d0,%a3                 # fix a3
14715         rts
14716 ri_a4:
14717         sub.l           %d0,%a4                 # fix a4
14718         rts
14719 ri_a5:
14720         sub.l           %d0,%a5                 # fix a5
14721         rts
14722 ri_a6:
14723         sub.l           %d0,(%a6)               # fix stacked a6
14724         rts
14725 # if it's a fmove out instruction, we don't have to fix a7
14726 # because we hadn't changed it yet. if it's an opclass two
14727 # instruction (data moved in) and the exception was in supervisor
14728 # mode, then also also wasn't updated. if it was user mode, then
14729 # restore the correct a7 which is in the USP currently.
14730 ri_a7:
14731         cmpi.b          EXC_VOFF(%a6),&0x30     # move in or out?
14732         bne.b           ri_a7_done              # out
14733
14734         btst            &0x5,EXC_SR(%a6)        # user or supervisor?
14735         bne.b           ri_a7_done              # supervisor
14736         movc            %usp,%a0                # restore USP
14737         sub.l           %d0,%a0
14738         movc            %a0,%usp
14739 ri_a7_done:
14740         rts
14741
14742 # need to invert adjustment value if the <ea> was predec
14743 rest_dec:
14744         neg.l           %d0
14745         bra.b           rest_inc