compiler/nativeGen/MachInstrs.hs

   1 {-# OPTIONS -w #-}
   2 -- The above warning supression flag is a temporary kludge.
   3 -- While working on this module you are encouraged to remove it and fix
   4 -- any warnings in the module. See
   5 --     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
   6 -- for details
   7
   8 -----------------------------------------------------------------------------
   9 --
  10 -- Machine-dependent assembly language
  11 --
  12 -- (c) The University of Glasgow 1993-2004
  13 --
  14 -----------------------------------------------------------------------------
  15
  16 #include "nativeGen/NCG.h"
  17
  18 module MachInstrs (
  19         -- * Cmm instantiations
  20         NatCmm, NatCmmTop, NatBasicBlock,
  21
  22         -- * Machine instructions
  23         Instr(..),
  24         Cond(..), condUnsigned, condToSigned, condToUnsigned,
  25 #if powerpc_TARGET_ARCH
  26         condNegate,
  27 #endif
  28         RI(..),
  29
  30 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  31         Operand(..),
  32 #endif
  33 #if i386_TARGET_ARCH
  34         i386_insert_ffrees,
  35 #endif
  36 #if sparc_TARGET_ARCH
  37         riZero, fpRelEA, moveSp, fPair,
  38 #endif
  39     ) where
  40
  41 #include "HsVersions.h"
  42
  43 import BlockId
  44 import MachRegs
  45 import Cmm
  46 import CLabel           ( CLabel, pprCLabel )
  47 import Panic            ( panic )
  48 import Outputable
  49 import FastString
  50 import Constants       ( wORD_SIZE )
  51
  52 import GHC.Exts
  53
  54
  55 -- -----------------------------------------------------------------------------
  56 -- Our flavours of the Cmm types
  57
  58 -- Type synonyms for Cmm populated with native code
  59 type NatCmm        = GenCmm CmmStatic [CmmStatic] (ListGraph Instr)
  60 type NatCmmTop     = GenCmmTop CmmStatic [CmmStatic] (ListGraph Instr)
  61 type NatBasicBlock = GenBasicBlock Instr
  62
  63 -- -----------------------------------------------------------------------------
  64 -- Conditions on this architecture
  65
  66 data Cond
  67 #if alpha_TARGET_ARCH
  68   = ALWAYS      -- For BI (same as BR)
  69   | EQQ         -- For CMP and BI (NB: "EQ" is a 1.3 Prelude name)
  70   | GE          -- For BI only
  71   | GTT         -- For BI only (NB: "GT" is a 1.3 Prelude name)
  72   | LE          -- For CMP and BI
  73   | LTT         -- For CMP and BI (NB: "LT" is a 1.3 Prelude name)
  74   | NE          -- For BI only
  75   | NEVER       -- For BI (null instruction)
  76   | ULE         -- For CMP only
  77   | ULT         -- For CMP only
  78 #endif
  79 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  80   = ALWAYS      -- What's really used? ToDo
  81   | EQQ
  82   | GE
  83   | GEU
  84   | GTT
  85   | GU
  86   | LE
  87   | LEU
  88   | LTT
  89   | LU
  90   | NE
  91   | NEG
  92   | POS
  93   | CARRY
  94   | OFLO
  95   | PARITY
  96   | NOTPARITY
  97 #endif
  98 #if sparc_TARGET_ARCH
  99   = ALWAYS      -- What's really used? ToDo
 100   | EQQ
 101   | GE
 102   | GEU
 103   | GTT
 104   | GU
 105   | LE
 106   | LEU
 107   | LTT
 108   | LU
 109   | NE
 110   | NEG
 111   | NEVER
 112   | POS
 113   | VC
 114   | VS
 115 #endif
 116 #if powerpc_TARGET_ARCH
 117   = ALWAYS
 118   | EQQ
 119   | GE
 120   | GEU
 121   | GTT
 122   | GU
 123   | LE
 124   | LEU
 125   | LTT
 126   | LU
 127   | NE
 128 #endif
 129     deriving Eq  -- to make an assertion work
 130
 131 condUnsigned GU  = True
 132 condUnsigned LU  = True
 133 condUnsigned GEU = True
 134 condUnsigned LEU = True
 135 condUnsigned _   = False
 136
 137 condToSigned GU  = GTT
 138 condToSigned LU  = LTT
 139 condToSigned GEU = GE
 140 condToSigned LEU = LE
 141 condToSigned x   = x
 142
 143 condToUnsigned GTT = GU
 144 condToUnsigned LTT = LU
 145 condToUnsigned GE  = GEU
 146 condToUnsigned LE  = LEU
 147 condToUnsigned x   = x
 148
 149 #if powerpc_TARGET_ARCH
 150 condNegate ALWAYS  = panic "condNegate: ALWAYS"
 151 condNegate EQQ     = NE
 152 condNegate GE      = LTT
 153 condNegate GEU     = LU
 154 condNegate GTT     = LE
 155 condNegate GU      = LEU
 156 condNegate LE      = GTT
 157 condNegate LEU     = GU
 158 condNegate LTT     = GE
 159 condNegate LU      = GEU
 160 condNegate NE      = EQQ
 161 #endif
 162
 163 -- -----------------------------------------------------------------------------
 164 -- Register or immediate (a handy type on some platforms)
 165
 166 data RI = RIReg Reg
 167         | RIImm Imm
 168
 169
 170 -- -----------------------------------------------------------------------------
 171 -- Machine's assembly language
 172
 173 -- We have a few common "instructions" (nearly all the pseudo-ops) but
 174 -- mostly all of 'Instr' is machine-specific.
 175
 176 data Instr
 177   = COMMENT FastString          -- comment pseudo-op
 178
 179   | LDATA   Section [CmmStatic] -- some static data spat out during code
 180                                 -- generation.  Will be extracted before
 181                                 -- pretty-printing.
 182
 183   | NEWBLOCK BlockId            -- start a new basic block.  Useful during
 184                                 -- codegen, removed later.  Preceding
 185                                 -- instruction should be a jump, as per the
 186                                 -- invariants for a BasicBlock (see Cmm).
 187
 188   | DELTA   Int                 -- specify current stack offset for
 189                                 -- benefit of subsequent passes
 190
 191   | SPILL   Reg Int             -- ^ spill this reg to a stack slot
 192   | RELOAD  Int Reg             -- ^ reload this reg from a stack slot
 193
 194 -- -----------------------------------------------------------------------------
 195 -- Alpha instructions
 196
 197 #if alpha_TARGET_ARCH
 198
 199 -- data Instr continues...
 200
 201 -- Loads and stores.
 202               | LD            Size Reg AddrMode -- size, dst, src
 203               | LDA           Reg AddrMode      -- dst, src
 204               | LDAH          Reg AddrMode      -- dst, src
 205               | LDGP          Reg AddrMode      -- dst, src
 206               | LDI           Size Reg Imm     -- size, dst, src
 207               | ST            Size Reg AddrMode -- size, src, dst
 208
 209 -- Int Arithmetic.
 210               | CLR           Reg                   -- dst
 211               | ABS           Size RI Reg           -- size, src, dst
 212               | NEG           Size Bool RI Reg      -- size, overflow, src, dst
 213               | ADD           Size Bool Reg RI Reg  -- size, overflow, src, src, dst
 214               | SADD          Size Size Reg RI Reg  -- size, scale, src, src, dst
 215               | SUB           Size Bool Reg RI Reg  -- size, overflow, src, src, dst
 216               | SSUB          Size Size Reg RI Reg  -- size, scale, src, src, dst
 217               | MUL           Size Bool Reg RI Reg  -- size, overflow, src, src, dst
 218               | DIV           Size Bool Reg RI Reg  -- size, unsigned, src, src, dst
 219               | REM           Size Bool Reg RI Reg  -- size, unsigned, src, src, dst
 220
 221 -- Simple bit-twiddling.
 222               | NOT           RI Reg
 223               | AND           Reg RI Reg
 224               | ANDNOT        Reg RI Reg
 225               | OR            Reg RI Reg
 226               | ORNOT         Reg RI Reg
 227               | XOR           Reg RI Reg
 228               | XORNOT        Reg RI Reg
 229               | SLL           Reg RI Reg
 230               | SRL           Reg RI Reg
 231               | SRA           Reg RI Reg
 232
 233               | ZAP           Reg RI Reg
 234               | ZAPNOT        Reg RI Reg
 235
 236               | NOP
 237
 238 -- Comparison
 239               | CMP           Cond Reg RI Reg
 240
 241 -- Float Arithmetic.
 242               | FCLR          Reg
 243               | FABS          Reg Reg
 244               | FNEG          Size Reg Reg
 245               | FADD          Size Reg Reg Reg
 246               | FDIV          Size Reg Reg Reg
 247               | FMUL          Size Reg Reg Reg
 248               | FSUB          Size Reg Reg Reg
 249               | CVTxy         Size Size Reg Reg
 250               | FCMP          Size Cond Reg Reg Reg
 251               | FMOV          Reg Reg
 252
 253 -- Jumping around.
 254               | BI            Cond Reg Imm
 255               | BF            Cond Reg Imm
 256               | BR            Imm
 257               | JMP           Reg AddrMode Int
 258               | BSR           Imm Int
 259               | JSR           Reg AddrMode Int
 260
 261 -- Alpha-specific pseudo-ops.
 262               | FUNBEGIN CLabel
 263               | FUNEND CLabel
 264
 265 data RI
 266   = RIReg Reg
 267   | RIImm Imm
 268
 269 #endif /* alpha_TARGET_ARCH */
 270
 271
 272 -- -----------------------------------------------------------------------------
 273 -- Intel x86 instructions
 274
 275 {-
 276 Intel, in their infinite wisdom, selected a stack model for floating
 277 point registers on x86.  That might have made sense back in 1979 --
 278 nowadays we can see it for the nonsense it really is.  A stack model
 279 fits poorly with the existing nativeGen infrastructure, which assumes
 280 flat integer and FP register sets.  Prior to this commit, nativeGen
 281 could not generate correct x86 FP code -- to do so would have meant
 282 somehow working the register-stack paradigm into the register
 283 allocator and spiller, which sounds very difficult.
 284
 285 We have decided to cheat, and go for a simple fix which requires no
 286 infrastructure modifications, at the expense of generating ropey but
 287 correct FP code.  All notions of the x86 FP stack and its insns have
 288 been removed.  Instead, we pretend (to the instruction selector and
 289 register allocator) that x86 has six floating point registers, %fake0
 290 .. %fake5, which can be used in the usual flat manner.  We further
 291 claim that x86 has floating point instructions very similar to SPARC
 292 and Alpha, that is, a simple 3-operand register-register arrangement.
 293 Code generation and register allocation proceed on this basis.
 294
 295 When we come to print out the final assembly, our convenient fiction
 296 is converted to dismal reality.  Each fake instruction is
 297 independently converted to a series of real x86 instructions.
 298 %fake0 .. %fake5 are mapped to %st(0) .. %st(5).  To do reg-reg
 299 arithmetic operations, the two operands are pushed onto the top of the
 300 FP stack, the operation done, and the result copied back into the
 301 relevant register.  There are only six %fake registers because 2 are
 302 needed for the translation, and x86 has 8 in total.
 303
 304 The translation is inefficient but is simple and it works.  A cleverer
 305 translation would handle a sequence of insns, simulating the FP stack
 306 contents, would not impose a fixed mapping from %fake to %st regs, and
 307 hopefully could avoid most of the redundant reg-reg moves of the
 308 current translation.
 309
 310 We might as well make use of whatever unique FP facilities Intel have
 311 chosen to bless us with (let's not be churlish, after all).
 312 Hence GLDZ and GLD1.  Bwahahahahahahaha!
 313 -}
 314
 315 {-
 316 MORE FLOATING POINT MUSINGS...
 317
 318 Intel's internal floating point registers are by default 80 bit
 319 extended precision.  This means that all operations done on values in
 320 registers are done at 80 bits, and unless the intermediate values are
 321 truncated to the appropriate size (32 or 64 bits) by storing in
 322 memory, calculations in registers will give different results from
 323 calculations which pass intermediate values in memory (eg. via
 324 function calls).
 325
 326 One solution is to set the FPU into 64 bit precision mode.  Some OSs
 327 do this (eg. FreeBSD) and some don't (eg. Linux).  The problem here is
 328 that this will only affect 64-bit precision arithmetic; 32-bit
 329 calculations will still be done at 64-bit precision in registers.  So
 330 it doesn't solve the whole problem.
 331
 332 There's also the issue of what the C library is expecting in terms of
 333 precision.  It seems to be the case that glibc on Linux expects the
 334 FPU to be set to 80 bit precision, so setting it to 64 bit could have
 335 unexpected effects.  Changing the default could have undesirable
 336 effects on other 3rd-party library code too, so the right thing would
 337 be to save/restore the FPU control word across Haskell code if we were
 338 to do this.
 339
 340 gcc's -ffloat-store gives consistent results by always storing the
 341 results of floating-point calculations in memory, which works for both
 342 32 and 64-bit precision.  However, it only affects the values of
 343 user-declared floating point variables in C, not intermediate results.
 344 GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
 345 flag).
 346
 347 Another problem is how to spill floating point registers in the
 348 register allocator.  Should we spill the whole 80 bits, or just 64?
 349 On an OS which is set to 64 bit precision, spilling 64 is fine.  On
 350 Linux, spilling 64 bits will round the results of some operations.
 351 This is what gcc does.  Spilling at 80 bits requires taking up a full
 352 128 bit slot (so we get alignment).  We spill at 80-bits and ignore
 353 the alignment problems.
 354
 355 In the future, we'll use the SSE registers for floating point.  This
 356 requires a CPU that supports SSE2 (ordinary SSE only supports 32 bit
 357 precision float ops), which means P4 or Xeon and above.  Using SSE
 358 will solve all these problems, because the SSE registers use fixed 32
 359 bit or 64 bit precision.
 360
 361 --SDM 1/2003
 362 -}
 363
 364 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 365
 366 -- data Instr continues...
 367
 368 -- Moves.
 369         | MOV         Size Operand Operand
 370         | MOVZxL      Size Operand Operand -- size is the size of operand 1
 371         | MOVSxL      Size Operand Operand -- size is the size of operand 1
 372         -- x86_64 note: plain mov into a 32-bit register always zero-extends
 373         -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
 374         -- don't affect the high bits of the register.
 375
 376 -- Load effective address (also a very useful three-operand add instruction :-)
 377         | LEA         Size Operand Operand
 378
 379 -- Int Arithmetic.
 380         | ADD         Size Operand Operand
 381         | ADC         Size Operand Operand
 382         | SUB         Size Operand Operand
 383
 384         | MUL         Size Operand Operand
 385         | IMUL        Size Operand Operand      -- signed int mul
 386         | IMUL2       Size Operand -- %edx:%eax = operand * %eax
 387
 388         | DIV         Size Operand      -- eax := eax:edx/op, edx := eax:edx%op
 389         | IDIV        Size Operand      -- ditto, but signed
 390
 391 -- Simple bit-twiddling.
 392         | AND         Size Operand Operand
 393         | OR          Size Operand Operand
 394         | XOR         Size Operand Operand
 395         | NOT         Size Operand
 396         | NEGI        Size Operand -- NEG instruction (name clash with Cond)
 397
 398 -- Shifts (amount may be immediate or %cl only)
 399         | SHL         Size Operand{-amount-} Operand
 400         | SAR         Size Operand{-amount-} Operand
 401         | SHR         Size Operand{-amount-} Operand
 402
 403         | BT          Size Imm Operand
 404         | NOP
 405
 406 #if i386_TARGET_ARCH
 407 -- Float Arithmetic.
 408
 409 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
 410 -- as single instructions right up until we spit them out.
 411         -- all the 3-operand fake fp insns are src1 src2 dst
 412         -- and furthermore are constrained to be fp regs only.
 413         -- IMPORTANT: keep is_G_insn up to date with any changes here
 414         | GMOV        Reg Reg -- src(fpreg), dst(fpreg)
 415         | GLD         Size AddrMode Reg -- src, dst(fpreg)
 416         | GST         Size Reg AddrMode -- src(fpreg), dst
 417
 418         | GLDZ        Reg -- dst(fpreg)
 419         | GLD1        Reg -- dst(fpreg)
 420
 421         | GFTOI       Reg Reg -- src(fpreg), dst(intreg)
 422         | GDTOI       Reg Reg -- src(fpreg), dst(intreg)
 423
 424         | GITOF       Reg Reg -- src(intreg), dst(fpreg)
 425         | GITOD       Reg Reg -- src(intreg), dst(fpreg)
 426
 427         | GADD        Size Reg Reg Reg -- src1, src2, dst
 428         | GDIV        Size Reg Reg Reg -- src1, src2, dst
 429         | GSUB        Size Reg Reg Reg -- src1, src2, dst
 430         | GMUL        Size Reg Reg Reg -- src1, src2, dst
 431
 432                 -- FP compare.  Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT]
 433                 -- Compare src1 with src2; set the Zero flag iff the numbers are
 434                 -- comparable and the comparison is True.  Subsequent code must
 435                 -- test the %eflags zero flag regardless of the supplied Cond.
 436         | GCMP        Cond Reg Reg -- src1, src2
 437
 438         | GABS        Size Reg Reg -- src, dst
 439         | GNEG        Size Reg Reg -- src, dst
 440         | GSQRT       Size Reg Reg -- src, dst
 441         | GSIN        Size CLabel CLabel Reg Reg -- src, dst
 442         | GCOS        Size CLabel CLabel Reg Reg -- src, dst
 443         | GTAN        Size CLabel CLabel Reg Reg -- src, dst
 444
 445         | GFREE         -- do ffree on all x86 regs; an ugly hack
 446 #endif
 447
 448 #if x86_64_TARGET_ARCH
 449 -- SSE2 floating point: we use a restricted set of the available SSE2
 450 -- instructions for floating-point.
 451
 452         -- use MOV for moving (either movss or movsd (movlpd better?))
 453
 454         | CVTSS2SD      Reg Reg         -- F32 to F64
 455         | CVTSD2SS      Reg Reg         -- F64 to F32
 456         | CVTTSS2SIQ    Operand Reg     -- F32 to I32/I64 (with truncation)
 457         | CVTTSD2SIQ    Operand Reg     -- F64 to I32/I64 (with truncation)
 458         | CVTSI2SS      Operand Reg     -- I32/I64 to F32
 459         | CVTSI2SD      Operand Reg     -- I32/I64 to F64
 460
 461         -- use ADD & SUB for arithmetic.  In both cases, operands
 462         -- are  Operand Reg.
 463
 464         -- SSE2 floating-point division:
 465         | FDIV          Size Operand Operand   -- divisor, dividend(dst)
 466
 467         -- use CMP for comparisons.  ucomiss and ucomisd instructions
 468         -- compare single/double prec floating point respectively.
 469
 470         | SQRT          Size Operand Reg        -- src, dst
 471 #endif
 472
 473 -- Comparison
 474         | TEST          Size Operand Operand
 475         | CMP           Size Operand Operand
 476         | SETCC         Cond Operand
 477
 478 -- Stack Operations.
 479         | PUSH          Size Operand
 480         | POP           Size Operand
 481         -- both unused (SDM):
 482         --  | PUSHA
 483         --  | POPA
 484
 485 -- Jumping around.
 486         | JMP         Operand
 487         | JXX         Cond BlockId  -- includes unconditional branches
 488         | JXX_GBL     Cond Imm      -- non-local version of JXX
 489         | JMP_TBL     Operand [BlockId]  -- table jump
 490         | CALL        (Either Imm Reg) [Reg]
 491
 492 -- Other things.
 493         | CLTD Size      -- sign extend %eax into %edx:%eax
 494
 495         | FETCHGOT    Reg  -- pseudo-insn for ELF position-independent code
 496                            -- pretty-prints as
 497                            --       call 1f
 498                            -- 1:    popl %reg
 499                            --       addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
 500         | FETCHPC     Reg  -- pseudo-insn for Darwin position-independent code
 501                            -- pretty-prints as
 502                            --       call 1f
 503                            -- 1:    popl %reg
 504
 505
 506 data Operand
 507   = OpReg  Reg          -- register
 508   | OpImm  Imm          -- immediate value
 509   | OpAddr AddrMode     -- memory reference
 510
 511 #endif /* i386 or x86_64 */
 512
 513 #if i386_TARGET_ARCH
 514 i386_insert_ffrees :: [GenBasicBlock Instr] -> [GenBasicBlock Instr]
 515 i386_insert_ffrees blocks
 516    | or (map (any is_G_instr) [ instrs | BasicBlock id instrs <- blocks ])
 517    = map ffree_before_nonlocal_transfers blocks
 518    | otherwise
 519    = blocks
 520   where
 521    ffree_before_nonlocal_transfers (BasicBlock id insns)
 522      = BasicBlock id (foldr p [] insns)
 523      where p insn r = case insn of
 524                         CALL _ _ -> GFREE : insn : r
 525                         JMP _    -> GFREE : insn : r
 526                         other    -> insn : r
 527
 528 -- if you ever add a new FP insn to the fake x86 FP insn set,
 529 -- you must update this too
 530 is_G_instr :: Instr -> Bool
 531 is_G_instr instr
 532    = case instr of
 533         GMOV _ _ -> True; GLD _ _ _ -> True; GST _ _ _ -> True
 534         GLDZ _ -> True; GLD1 _ -> True
 535         GFTOI _ _ -> True; GDTOI _ _ -> True
 536         GITOF _ _ -> True; GITOD _ _ -> True
 537         GADD _ _ _ _ -> True; GDIV _ _ _ _ -> True
 538         GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
 539         GCMP _ _ _ -> True; GABS _ _ _ -> True
 540         GNEG _ _ _ -> True; GSQRT _ _ _ -> True
 541         GSIN _ _ _ _ _ -> True; GCOS _ _ _ _ _ -> True; GTAN _ _ _ _ _ -> True
 542         GFREE -> panic "is_G_instr: GFREE (!)"
 543         other -> False
 544 #endif /* i386_TARGET_ARCH */
 545
 546
 547 -- -----------------------------------------------------------------------------
 548 -- Sparc instructions
 549
 550 #if sparc_TARGET_ARCH
 551
 552 -- data Instr continues...
 553
 554 -- Loads and stores.
 555               | LD            Size AddrMode Reg -- size, src, dst
 556               | ST            Size Reg AddrMode -- size, src, dst
 557
 558 -- Int Arithmetic.
 559               | ADD           Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
 560               | SUB           Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
 561               | UMUL               Bool Reg RI Reg --     cc?, src1, src2, dst
 562               | SMUL               Bool Reg RI Reg --     cc?, src1, src2, dst
 563               | RDY           Reg       -- move contents of Y register to reg
 564
 565 -- Simple bit-twiddling.
 566               | AND           Bool Reg RI Reg -- cc?, src1, src2, dst
 567               | ANDN          Bool Reg RI Reg -- cc?, src1, src2, dst
 568               | OR            Bool Reg RI Reg -- cc?, src1, src2, dst
 569               | ORN           Bool Reg RI Reg -- cc?, src1, src2, dst
 570               | XOR           Bool Reg RI Reg -- cc?, src1, src2, dst
 571               | XNOR          Bool Reg RI Reg -- cc?, src1, src2, dst
 572               | SLL           Reg RI Reg -- src1, src2, dst
 573               | SRL           Reg RI Reg -- src1, src2, dst
 574               | SRA           Reg RI Reg -- src1, src2, dst
 575               | SETHI         Imm Reg -- src, dst
 576               | NOP           -- Really SETHI 0, %g0, but worth an alias
 577
 578 -- Float Arithmetic.
 579
 580 -- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single
 581 -- instructions right up until we spit them out.
 582               | FABS          Size Reg Reg         -- src dst
 583               | FADD          Size Reg Reg Reg  -- src1, src2, dst
 584               | FCMP          Bool Size Reg Reg -- exception?, src1, src2, dst
 585               | FDIV          Size Reg Reg Reg -- src1, src2, dst
 586               | FMOV          Size Reg Reg     -- src, dst
 587               | FMUL          Size Reg Reg Reg -- src1, src2, dst
 588               | FNEG          Size Reg Reg     -- src, dst
 589               | FSQRT         Size Reg Reg     -- src, dst
 590               | FSUB          Size Reg Reg Reg -- src1, src2, dst
 591               | FxTOy         Size Size Reg Reg -- src, dst
 592
 593 -- Jumping around.
 594               | BI            Cond Bool Imm -- cond, annul?, target
 595               | BF            Cond Bool Imm -- cond, annul?, target
 596
 597               | JMP           AddrMode     -- target
 598               | CALL          (Either Imm Reg) Int Bool -- target, args, terminal
 599
 600 riZero :: RI -> Bool
 601
 602 riZero (RIImm (ImmInt 0))           = True
 603 riZero (RIImm (ImmInteger 0))       = True
 604 riZero (RIReg (RealReg 0))          = True
 605 riZero _                            = False
 606
 607 -- Calculate the effective address which would be used by the
 608 -- corresponding fpRel sequence.  fpRel is in MachRegs.lhs,
 609 -- alas -- can't have fpRelEA here because of module dependencies.
 610 fpRelEA :: Int -> Reg -> Instr
 611 fpRelEA n dst
 612    = ADD False False fp (RIImm (ImmInt (n * wORD_SIZE))) dst
 613
 614 -- Code to shift the stack pointer by n words.
 615 moveSp :: Int -> Instr
 616 moveSp n
 617    = ADD False False sp (RIImm (ImmInt (n * wORD_SIZE))) sp
 618
 619 -- Produce the second-half-of-a-double register given the first half.
 620 fPair :: Reg -> Reg
 621 fPair (RealReg n) | n >= 32 && n `mod` 2 == 0  = RealReg (n+1)
 622 fPair other = pprPanic "fPair(sparc NCG)" (ppr other)
 623 #endif /* sparc_TARGET_ARCH */
 624
 625
 626 -- -----------------------------------------------------------------------------
 627 -- PowerPC instructions
 628
 629 #ifdef powerpc_TARGET_ARCH
 630 -- data Instr continues...
 631
 632 -- Loads and stores.
 633               | LD      Size Reg AddrMode -- Load size, dst, src
 634               | LA      Size Reg AddrMode -- Load arithmetic size, dst, src
 635               | ST      Size Reg AddrMode -- Store size, src, dst
 636               | STU     Size Reg AddrMode -- Store with Update size, src, dst
 637               | LIS     Reg Imm -- Load Immediate Shifted dst, src
 638               | LI      Reg Imm -- Load Immediate dst, src
 639               | MR      Reg Reg -- Move Register dst, src -- also for fmr
 640
 641               | CMP     Size Reg RI --- size, src1, src2
 642               | CMPL    Size Reg RI --- size, src1, src2
 643
 644               | BCC     Cond BlockId
 645               | BCCFAR  Cond BlockId
 646               | JMP     CLabel          -- same as branch,
 647                                         -- but with CLabel instead of block ID
 648               | MTCTR   Reg
 649               | BCTR    [BlockId]       -- with list of local destinations
 650               | BL      CLabel [Reg]    -- with list of argument regs
 651               | BCTRL   [Reg]
 652
 653               | ADD     Reg Reg RI -- dst, src1, src2
 654               | ADDC    Reg Reg Reg -- (carrying) dst, src1, src2
 655               | ADDE    Reg Reg Reg -- (extend) dst, src1, src2
 656               | ADDIS   Reg Reg Imm -- Add Immediate Shifted dst, src1, src2
 657               | SUBF    Reg Reg Reg -- dst, src1, src2 ; dst = src2 - src1
 658               | MULLW   Reg Reg RI
 659               | DIVW    Reg Reg Reg
 660               | DIVWU   Reg Reg Reg
 661
 662               | MULLW_MayOflo Reg Reg Reg
 663                         -- dst = 1 if src1 * src2 overflows
 664                         -- pseudo-instruction; pretty-printed as:
 665                         -- mullwo. dst, src1, src2
 666                         -- mfxer dst
 667                         -- rlwinm dst, dst, 2, 31,31
 668
 669               | AND     Reg Reg RI -- dst, src1, src2
 670               | OR      Reg Reg RI -- dst, src1, src2
 671               | XOR     Reg Reg RI -- dst, src1, src2
 672               | XORIS   Reg Reg Imm -- XOR Immediate Shifted dst, src1, src2
 673
 674               | EXTS    Size Reg Reg
 675
 676               | NEG     Reg Reg
 677               | NOT     Reg Reg
 678
 679               | SLW     Reg Reg RI      -- shift left word
 680               | SRW     Reg Reg RI      -- shift right word
 681               | SRAW    Reg Reg RI      -- shift right arithmetic word
 682
 683                         -- Rotate Left Word Immediate then AND with Mask
 684               | RLWINM  Reg Reg Int Int Int
 685
 686               | FADD    Size Reg Reg Reg
 687               | FSUB    Size Reg Reg Reg
 688               | FMUL    Size Reg Reg Reg
 689               | FDIV    Size Reg Reg Reg
 690               | FNEG    Reg Reg  -- negate is the same for single and double prec.
 691
 692               | FCMP    Reg Reg
 693
 694               | FCTIWZ  Reg Reg         -- convert to integer word
 695               | FRSP    Reg Reg         -- reduce to single precision
 696                                         -- (but destination is a FP register)
 697
 698               | CRNOR   Int Int Int    -- condition register nor
 699               | MFCR    Reg            -- move from condition register
 700
 701               | MFLR    Reg            -- move from link register
 702               | FETCHPC Reg            -- pseudo-instruction:
 703                                        -- bcl to next insn, mflr reg
 704
 705               | LWSYNC -- memory barrier
 706 #endif /* powerpc_TARGET_ARCH */