compiler/nativeGen/MachInstrs.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Machine-dependent assembly language
   4 --
   5 -- (c) The University of Glasgow 1993-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 #include "nativeGen/NCG.h"
  10
  11 module MachInstrs (
  12         -- * Cmm instantiations
  13         NatCmm, NatCmmTop, NatBasicBlock,
  14
  15         -- * Machine instructions
  16         Instr(..),
  17         Cond(..), condUnsigned, condToSigned, condToUnsigned,
  18 #if powerpc_TARGET_ARCH
  19         condNegate,
  20 #endif
  21 #if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH && !x86_64_TARGET_ARCH
  22         Size(..), machRepSize,
  23 #endif
  24         RI(..),
  25
  26 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  27         Operand(..),
  28 #endif
  29 #if i386_TARGET_ARCH
  30         i386_insert_ffrees,
  31 #endif
  32 #if sparc_TARGET_ARCH
  33         riZero, fpRelEA, moveSp, fPair,
  34 #endif
  35     ) where
  36
  37 #include "HsVersions.h"
  38
  39 import MachRegs
  40 import Cmm
  41 import MachOp           ( MachRep(..) )
  42 import CLabel           ( CLabel, pprCLabel )
  43 import Panic            ( panic )
  44 import Outputable
  45 import FastString
  46 import Constants       ( wORD_SIZE )
  47
  48 import GHC.Exts
  49
  50
  51 -- -----------------------------------------------------------------------------
  52 -- Our flavours of the Cmm types
  53
  54 -- Type synonyms for Cmm populated with native code
  55 type NatCmm        = GenCmm CmmStatic [CmmStatic] Instr
  56 type NatCmmTop     = GenCmmTop CmmStatic [CmmStatic] Instr
  57 type NatBasicBlock = GenBasicBlock Instr
  58
  59 -- -----------------------------------------------------------------------------
  60 -- Conditions on this architecture
  61
  62 data Cond
  63 #if alpha_TARGET_ARCH
  64   = ALWAYS      -- For BI (same as BR)
  65   | EQQ         -- For CMP and BI (NB: "EQ" is a 1.3 Prelude name)
  66   | GE          -- For BI only
  67   | GTT         -- For BI only (NB: "GT" is a 1.3 Prelude name)
  68   | LE          -- For CMP and BI
  69   | LTT         -- For CMP and BI (NB: "LT" is a 1.3 Prelude name)
  70   | NE          -- For BI only
  71   | NEVER       -- For BI (null instruction)
  72   | ULE         -- For CMP only
  73   | ULT         -- For CMP only
  74 #endif
  75 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  76   = ALWAYS      -- What's really used? ToDo
  77   | EQQ
  78   | GE
  79   | GEU
  80   | GTT
  81   | GU
  82   | LE
  83   | LEU
  84   | LTT
  85   | LU
  86   | NE
  87   | NEG
  88   | POS
  89   | CARRY
  90   | OFLO
  91   | PARITY
  92   | NOTPARITY
  93 #endif
  94 #if sparc_TARGET_ARCH
  95   = ALWAYS      -- What's really used? ToDo
  96   | EQQ
  97   | GE
  98   | GEU
  99   | GTT
 100   | GU
 101   | LE
 102   | LEU
 103   | LTT
 104   | LU
 105   | NE
 106   | NEG
 107   | NEVER
 108   | POS
 109   | VC
 110   | VS
 111 #endif
 112 #if powerpc_TARGET_ARCH
 113   = ALWAYS
 114   | EQQ
 115   | GE
 116   | GEU
 117   | GTT
 118   | GU
 119   | LE
 120   | LEU
 121   | LTT
 122   | LU
 123   | NE
 124 #endif
 125     deriving Eq  -- to make an assertion work
 126
 127 condUnsigned GU  = True
 128 condUnsigned LU  = True
 129 condUnsigned GEU = True
 130 condUnsigned LEU = True
 131 condUnsigned _   = False
 132
 133 condToSigned GU  = GTT
 134 condToSigned LU  = LTT
 135 condToSigned GEU = GE
 136 condToSigned LEU = LE
 137 condToSigned x   = x
 138
 139 condToUnsigned GTT = GU
 140 condToUnsigned LTT = LU
 141 condToUnsigned GE  = GEU
 142 condToUnsigned LE  = LEU
 143 condToUnsigned x   = x
 144
 145 #if powerpc_TARGET_ARCH
 146 condNegate ALWAYS  = panic "condNegate: ALWAYS"
 147 condNegate EQQ     = NE
 148 condNegate GE      = LTT
 149 condNegate GEU     = LU
 150 condNegate GTT     = LE
 151 condNegate GU      = LEU
 152 condNegate LE      = GTT
 153 condNegate LEU     = GU
 154 condNegate LTT     = GE
 155 condNegate LU      = GEU
 156 condNegate NE      = EQQ
 157 #endif
 158
 159 -- -----------------------------------------------------------------------------
 160 -- Sizes on this architecture
 161
 162 -- ToDo: it's not clear to me that we need separate signed-vs-unsigned sizes
 163 -- here.  I've removed them from the x86 version, we'll see what happens --SDM
 164
 165 #if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH && !x86_64_TARGET_ARCH
 166 data Size
 167 #if alpha_TARGET_ARCH
 168     = B     -- byte
 169     | Bu
 170 --  | W     -- word (2 bytes): UNUSED
 171 --  | Wu    -- : UNUSED
 172     | L     -- longword (4 bytes)
 173     | Q     -- quadword (8 bytes)
 174 --  | FF    -- VAX F-style floating pt: UNUSED
 175 --  | GF    -- VAX G-style floating pt: UNUSED
 176 --  | DF    -- VAX D-style floating pt: UNUSED
 177 --  | SF    -- IEEE single-precision floating pt: UNUSED
 178     | TF    -- IEEE double-precision floating pt
 179 #endif
 180 #if sparc_TARGET_ARCH || powerpc_TARGET_ARCH
 181     = B     -- byte (signed)
 182     | Bu    -- byte (unsigned)
 183     | H     -- halfword (signed, 2 bytes)
 184     | Hu    -- halfword (unsigned, 2 bytes)
 185     | W     -- word (4 bytes)
 186     | F     -- IEEE single-precision floating pt
 187     | DF    -- IEEE single-precision floating pt
 188 #endif
 189   deriving Eq
 190
 191 machRepSize :: MachRep -> Size
 192 machRepSize I8    = IF_ARCH_alpha(Bu, IF_ARCH_sparc(Bu, ))
 193 machRepSize I16   = IF_ARCH_alpha(err,IF_ARCH_sparc(Hu, ))
 194 machRepSize I32   = IF_ARCH_alpha(L,  IF_ARCH_sparc(W,  ))
 195 machRepSize I64   = panic "machRepSize: I64"
 196 machRepSize I128  = panic "machRepSize: I128"
 197 machRepSize F32   = IF_ARCH_alpha(TF, IF_ARCH_sparc(F, ))
 198 machRepSize F64   = IF_ARCH_alpha(TF, IF_ARCH_sparc(DF,))
 199 #endif
 200
 201 -- -----------------------------------------------------------------------------
 202 -- Register or immediate (a handy type on some platforms)
 203
 204 data RI = RIReg Reg
 205         | RIImm Imm
 206
 207
 208 -- -----------------------------------------------------------------------------
 209 -- Machine's assembly language
 210
 211 -- We have a few common "instructions" (nearly all the pseudo-ops) but
 212 -- mostly all of 'Instr' is machine-specific.
 213
 214 data Instr
 215   = COMMENT FastString          -- comment pseudo-op
 216
 217   | LDATA   Section [CmmStatic] -- some static data spat out during code
 218                                 -- generation.  Will be extracted before
 219                                 -- pretty-printing.
 220
 221   | NEWBLOCK BlockId            -- start a new basic block.  Useful during
 222                                 -- codegen, removed later.  Preceding
 223                                 -- instruction should be a jump, as per the
 224                                 -- invariants for a BasicBlock (see Cmm).
 225
 226   | DELTA   Int                 -- specify current stack offset for
 227                                 -- benefit of subsequent passes
 228
 229 -- -----------------------------------------------------------------------------
 230 -- Alpha instructions
 231
 232 #if alpha_TARGET_ARCH
 233
 234 -- data Instr continues...
 235
 236 -- Loads and stores.
 237               | LD            Size Reg AddrMode -- size, dst, src
 238               | LDA           Reg AddrMode      -- dst, src
 239               | LDAH          Reg AddrMode      -- dst, src
 240               | LDGP          Reg AddrMode      -- dst, src
 241               | LDI           Size Reg Imm     -- size, dst, src
 242               | ST            Size Reg AddrMode -- size, src, dst
 243
 244 -- Int Arithmetic.
 245               | CLR           Reg                   -- dst
 246               | ABS           Size RI Reg           -- size, src, dst
 247               | NEG           Size Bool RI Reg      -- size, overflow, src, dst
 248               | ADD           Size Bool Reg RI Reg  -- size, overflow, src, src, dst
 249               | SADD          Size Size Reg RI Reg  -- size, scale, src, src, dst
 250               | SUB           Size Bool Reg RI Reg  -- size, overflow, src, src, dst
 251               | SSUB          Size Size Reg RI Reg  -- size, scale, src, src, dst
 252               | MUL           Size Bool Reg RI Reg  -- size, overflow, src, src, dst
 253               | DIV           Size Bool Reg RI Reg  -- size, unsigned, src, src, dst
 254               | REM           Size Bool Reg RI Reg  -- size, unsigned, src, src, dst
 255
 256 -- Simple bit-twiddling.
 257               | NOT           RI Reg
 258               | AND           Reg RI Reg
 259               | ANDNOT        Reg RI Reg
 260               | OR            Reg RI Reg
 261               | ORNOT         Reg RI Reg
 262               | XOR           Reg RI Reg
 263               | XORNOT        Reg RI Reg
 264               | SLL           Reg RI Reg
 265               | SRL           Reg RI Reg
 266               | SRA           Reg RI Reg
 267
 268               | ZAP           Reg RI Reg
 269               | ZAPNOT        Reg RI Reg
 270
 271               | NOP
 272
 273 -- Comparison
 274               | CMP           Cond Reg RI Reg
 275
 276 -- Float Arithmetic.
 277               | FCLR          Reg
 278               | FABS          Reg Reg
 279               | FNEG          Size Reg Reg
 280               | FADD          Size Reg Reg Reg
 281               | FDIV          Size Reg Reg Reg
 282               | FMUL          Size Reg Reg Reg
 283               | FSUB          Size Reg Reg Reg
 284               | CVTxy         Size Size Reg Reg
 285               | FCMP          Size Cond Reg Reg Reg
 286               | FMOV          Reg Reg
 287
 288 -- Jumping around.
 289               | BI            Cond Reg Imm
 290               | BF            Cond Reg Imm
 291               | BR            Imm
 292               | JMP           Reg AddrMode Int
 293               | BSR           Imm Int
 294               | JSR           Reg AddrMode Int
 295
 296 -- Alpha-specific pseudo-ops.
 297               | FUNBEGIN CLabel
 298               | FUNEND CLabel
 299
 300 data RI
 301   = RIReg Reg
 302   | RIImm Imm
 303
 304 #endif /* alpha_TARGET_ARCH */
 305
 306
 307 -- -----------------------------------------------------------------------------
 308 -- Intel x86 instructions
 309
 310 {-
 311 Intel, in their infinite wisdom, selected a stack model for floating
 312 point registers on x86.  That might have made sense back in 1979 --
 313 nowadays we can see it for the nonsense it really is.  A stack model
 314 fits poorly with the existing nativeGen infrastructure, which assumes
 315 flat integer and FP register sets.  Prior to this commit, nativeGen
 316 could not generate correct x86 FP code -- to do so would have meant
 317 somehow working the register-stack paradigm into the register
 318 allocator and spiller, which sounds very difficult.
 319
 320 We have decided to cheat, and go for a simple fix which requires no
 321 infrastructure modifications, at the expense of generating ropey but
 322 correct FP code.  All notions of the x86 FP stack and its insns have
 323 been removed.  Instead, we pretend (to the instruction selector and
 324 register allocator) that x86 has six floating point registers, %fake0
 325 .. %fake5, which can be used in the usual flat manner.  We further
 326 claim that x86 has floating point instructions very similar to SPARC
 327 and Alpha, that is, a simple 3-operand register-register arrangement.
 328 Code generation and register allocation proceed on this basis.
 329
 330 When we come to print out the final assembly, our convenient fiction
 331 is converted to dismal reality.  Each fake instruction is
 332 independently converted to a series of real x86 instructions.
 333 %fake0 .. %fake5 are mapped to %st(0) .. %st(5).  To do reg-reg
 334 arithmetic operations, the two operands are pushed onto the top of the
 335 FP stack, the operation done, and the result copied back into the
 336 relevant register.  There are only six %fake registers because 2 are
 337 needed for the translation, and x86 has 8 in total.
 338
 339 The translation is inefficient but is simple and it works.  A cleverer
 340 translation would handle a sequence of insns, simulating the FP stack
 341 contents, would not impose a fixed mapping from %fake to %st regs, and
 342 hopefully could avoid most of the redundant reg-reg moves of the
 343 current translation.
 344
 345 We might as well make use of whatever unique FP facilities Intel have
 346 chosen to bless us with (let's not be churlish, after all).
 347 Hence GLDZ and GLD1.  Bwahahahahahahaha!
 348 -}
 349
 350 {-
 351 MORE FLOATING POINT MUSINGS...
 352
 353 Intel's internal floating point registers are by default 80 bit
 354 extended precision.  This means that all operations done on values in
 355 registers are done at 80 bits, and unless the intermediate values are
 356 truncated to the appropriate size (32 or 64 bits) by storing in
 357 memory, calculations in registers will give different results from
 358 calculations which pass intermediate values in memory (eg. via
 359 function calls).
 360
 361 One solution is to set the FPU into 64 bit precision mode.  Some OSs
 362 do this (eg. FreeBSD) and some don't (eg. Linux).  The problem here is
 363 that this will only affect 64-bit precision arithmetic; 32-bit
 364 calculations will still be done at 64-bit precision in registers.  So
 365 it doesn't solve the whole problem.
 366
 367 There's also the issue of what the C library is expecting in terms of
 368 precision.  It seems to be the case that glibc on Linux expects the
 369 FPU to be set to 80 bit precision, so setting it to 64 bit could have
 370 unexpected effects.  Changing the default could have undesirable
 371 effects on other 3rd-party library code too, so the right thing would
 372 be to save/restore the FPU control word across Haskell code if we were
 373 to do this.
 374
 375 gcc's -ffloat-store gives consistent results by always storing the
 376 results of floating-point calculations in memory, which works for both
 377 32 and 64-bit precision.  However, it only affects the values of
 378 user-declared floating point variables in C, not intermediate results.
 379 GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
 380 flag).
 381
 382 Another problem is how to spill floating point registers in the
 383 register allocator.  Should we spill the whole 80 bits, or just 64?
 384 On an OS which is set to 64 bit precision, spilling 64 is fine.  On
 385 Linux, spilling 64 bits will round the results of some operations.
 386 This is what gcc does.  Spilling at 80 bits requires taking up a full
 387 128 bit slot (so we get alignment).  We spill at 80-bits and ignore
 388 the alignment problems.
 389
 390 In the future, we'll use the SSE registers for floating point.  This
 391 requires a CPU that supports SSE2 (ordinary SSE only supports 32 bit
 392 precision float ops), which means P4 or Xeon and above.  Using SSE
 393 will solve all these problems, because the SSE registers use fixed 32
 394 bit or 64 bit precision.
 395
 396 --SDM 1/2003
 397 -}
 398
 399 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 400
 401 -- data Instr continues...
 402
 403 -- Moves.
 404         | MOV         MachRep Operand Operand
 405         | MOVZxL      MachRep Operand Operand -- size is the size of operand 1
 406         | MOVSxL      MachRep Operand Operand -- size is the size of operand 1
 407         -- x86_64 note: plain mov into a 32-bit register always zero-extends
 408         -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
 409         -- don't affect the high bits of the register.
 410
 411 -- Load effective address (also a very useful three-operand add instruction :-)
 412         | LEA         MachRep Operand Operand
 413
 414 -- Int Arithmetic.
 415         | ADD         MachRep Operand Operand
 416         | ADC         MachRep Operand Operand
 417         | SUB         MachRep Operand Operand
 418
 419         | MUL         MachRep Operand Operand
 420         | IMUL        MachRep Operand Operand   -- signed int mul
 421         | IMUL2       MachRep Operand -- %edx:%eax = operand * %eax
 422
 423         | DIV         MachRep Operand   -- eax := eax:edx/op, edx := eax:edx%op
 424         | IDIV        MachRep Operand   -- ditto, but signed
 425
 426 -- Simple bit-twiddling.
 427         | AND         MachRep Operand Operand
 428         | OR          MachRep Operand Operand
 429         | XOR         MachRep Operand Operand
 430         | NOT         MachRep Operand
 431         | NEGI        MachRep Operand -- NEG instruction (name clash with Cond)
 432
 433 -- Shifts (amount may be immediate or %cl only)
 434         | SHL         MachRep Operand{-amount-} Operand
 435         | SAR         MachRep Operand{-amount-} Operand
 436         | SHR         MachRep Operand{-amount-} Operand
 437
 438         | BT          MachRep Imm Operand
 439         | NOP
 440
 441 #if i386_TARGET_ARCH
 442 -- Float Arithmetic.
 443
 444 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
 445 -- as single instructions right up until we spit them out.
 446         -- all the 3-operand fake fp insns are src1 src2 dst
 447         -- and furthermore are constrained to be fp regs only.
 448         -- IMPORTANT: keep is_G_insn up to date with any changes here
 449         | GMOV        Reg Reg -- src(fpreg), dst(fpreg)
 450         | GLD         MachRep AddrMode Reg -- src, dst(fpreg)
 451         | GST         MachRep Reg AddrMode -- src(fpreg), dst
 452
 453         | GLDZ        Reg -- dst(fpreg)
 454         | GLD1        Reg -- dst(fpreg)
 455
 456         | GFTOI       Reg Reg -- src(fpreg), dst(intreg)
 457         | GDTOI       Reg Reg -- src(fpreg), dst(intreg)
 458
 459         | GITOF       Reg Reg -- src(intreg), dst(fpreg)
 460         | GITOD       Reg Reg -- src(intreg), dst(fpreg)
 461
 462         | GADD        MachRep Reg Reg Reg -- src1, src2, dst
 463         | GDIV        MachRep Reg Reg Reg -- src1, src2, dst
 464         | GSUB        MachRep Reg Reg Reg -- src1, src2, dst
 465         | GMUL        MachRep Reg Reg Reg -- src1, src2, dst
 466
 467                 -- FP compare.  Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT]
 468                 -- Compare src1 with src2; set the Zero flag iff the numbers are
 469                 -- comparable and the comparison is True.  Subsequent code must
 470                 -- test the %eflags zero flag regardless of the supplied Cond.
 471         | GCMP        Cond Reg Reg -- src1, src2
 472
 473         | GABS        MachRep Reg Reg -- src, dst
 474         | GNEG        MachRep Reg Reg -- src, dst
 475         | GSQRT       MachRep Reg Reg -- src, dst
 476         | GSIN        MachRep Reg Reg -- src, dst
 477         | GCOS        MachRep Reg Reg -- src, dst
 478         | GTAN        MachRep Reg Reg -- src, dst
 479
 480         | GFREE         -- do ffree on all x86 regs; an ugly hack
 481 #endif
 482
 483 #if x86_64_TARGET_ARCH
 484 -- SSE2 floating point: we use a restricted set of the available SSE2
 485 -- instructions for floating-point.
 486
 487         -- use MOV for moving (either movss or movsd (movlpd better?))
 488
 489         | CVTSS2SD      Reg Reg         -- F32 to F64
 490         | CVTSD2SS      Reg Reg         -- F64 to F32
 491         | CVTTSS2SIQ    Operand Reg     -- F32 to I32/I64 (with truncation)
 492         | CVTTSD2SIQ    Operand Reg     -- F64 to I32/I64 (with truncation)
 493         | CVTSI2SS      Operand Reg     -- I32/I64 to F32
 494         | CVTSI2SD      Operand Reg     -- I32/I64 to F64
 495
 496         -- use ADD & SUB for arithmetic.  In both cases, operands
 497         -- are  Operand Reg.
 498
 499         -- SSE2 floating-point division:
 500         | FDIV          MachRep Operand Operand   -- divisor, dividend(dst)
 501
 502         -- use CMP for comparisons.  ucomiss and ucomisd instructions
 503         -- compare single/double prec floating point respectively.
 504
 505         | SQRT          MachRep Operand Reg     -- src, dst
 506 #endif
 507
 508 -- Comparison
 509         | TEST          MachRep Operand Operand
 510         | CMP           MachRep Operand Operand
 511         | SETCC         Cond Operand
 512
 513 -- Stack Operations.
 514         | PUSH          MachRep Operand
 515         | POP           MachRep Operand
 516         -- both unused (SDM):
 517         --  | PUSHA
 518         --  | POPA
 519
 520 -- Jumping around.
 521         | JMP         Operand
 522         | JXX         Cond BlockId  -- includes unconditional branches
 523         | JXX_GBL     Cond Imm      -- non-local version of JXX
 524         | JMP_TBL     Operand [BlockId]  -- table jump
 525         | CALL        (Either Imm Reg) [Reg]
 526
 527 -- Other things.
 528         | CLTD MachRep   -- sign extend %eax into %edx:%eax
 529
 530         | FETCHGOT    Reg  -- pseudo-insn for ELF position-independent code
 531                            -- pretty-prints as
 532                            --       call 1f
 533                            -- 1:    popl %reg
 534                            --       addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
 535         | FETCHPC     Reg  -- pseudo-insn for Darwin position-independent code
 536                            -- pretty-prints as
 537                            --       call 1f
 538                            -- 1:    popl %reg
 539
 540
 541 data Operand
 542   = OpReg  Reg          -- register
 543   | OpImm  Imm          -- immediate value
 544   | OpAddr AddrMode     -- memory reference
 545
 546 #endif /* i386 or x86_64 */
 547
 548 #if i386_TARGET_ARCH
 549 i386_insert_ffrees :: [Instr] -> [Instr]
 550 i386_insert_ffrees insns
 551    | any is_G_instr insns
 552    = concatMap ffree_before_nonlocal_transfers insns
 553    | otherwise
 554    = insns
 555
 556 ffree_before_nonlocal_transfers insn
 557    = case insn of
 558         CALL _ _ -> [GFREE, insn]
 559         JMP _    -> [GFREE, insn]
 560         other    -> [insn]
 561
 562
 563 -- if you ever add a new FP insn to the fake x86 FP insn set,
 564 -- you must update this too
 565 is_G_instr :: Instr -> Bool
 566 is_G_instr instr
 567    = case instr of
 568         GMOV _ _ -> True; GLD _ _ _ -> True; GST _ _ _ -> True
 569         GLDZ _ -> True; GLD1 _ -> True
 570         GFTOI _ _ -> True; GDTOI _ _ -> True
 571         GITOF _ _ -> True; GITOD _ _ -> True
 572         GADD _ _ _ _ -> True; GDIV _ _ _ _ -> True
 573         GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
 574         GCMP _ _ _ -> True; GABS _ _ _ -> True
 575         GNEG _ _ _ -> True; GSQRT _ _ _ -> True
 576         GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True
 577         GFREE -> panic "is_G_instr: GFREE (!)"
 578         other -> False
 579 #endif /* i386_TARGET_ARCH */
 580
 581
 582 -- -----------------------------------------------------------------------------
 583 -- Sparc instructions
 584
 585 #if sparc_TARGET_ARCH
 586
 587 -- data Instr continues...
 588
 589 -- Loads and stores.
 590               | LD            MachRep AddrMode Reg -- size, src, dst
 591               | ST            MachRep Reg AddrMode -- size, src, dst
 592
 593 -- Int Arithmetic.
 594               | ADD           Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
 595               | SUB           Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
 596               | UMUL               Bool Reg RI Reg --     cc?, src1, src2, dst
 597               | SMUL               Bool Reg RI Reg --     cc?, src1, src2, dst
 598               | RDY           Reg       -- move contents of Y register to reg
 599
 600 -- Simple bit-twiddling.
 601               | AND           Bool Reg RI Reg -- cc?, src1, src2, dst
 602               | ANDN          Bool Reg RI Reg -- cc?, src1, src2, dst
 603               | OR            Bool Reg RI Reg -- cc?, src1, src2, dst
 604               | ORN           Bool Reg RI Reg -- cc?, src1, src2, dst
 605               | XOR           Bool Reg RI Reg -- cc?, src1, src2, dst
 606               | XNOR          Bool Reg RI Reg -- cc?, src1, src2, dst
 607               | SLL           Reg RI Reg -- src1, src2, dst
 608               | SRL           Reg RI Reg -- src1, src2, dst
 609               | SRA           Reg RI Reg -- src1, src2, dst
 610               | SETHI         Imm Reg -- src, dst
 611               | NOP           -- Really SETHI 0, %g0, but worth an alias
 612
 613 -- Float Arithmetic.
 614
 615 -- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single
 616 -- instructions right up until we spit them out.
 617               | FABS          MachRep Reg Reg      -- src dst
 618               | FADD          MachRep Reg Reg Reg  -- src1, src2, dst
 619               | FCMP          Bool MachRep Reg Reg -- exception?, src1, src2, dst
 620               | FDIV          MachRep Reg Reg Reg -- src1, src2, dst
 621               | FMOV          MachRep Reg Reg     -- src, dst
 622               | FMUL          MachRep Reg Reg Reg -- src1, src2, dst
 623               | FNEG          MachRep Reg Reg     -- src, dst
 624               | FSQRT         MachRep Reg Reg     -- src, dst
 625               | FSUB          MachRep Reg Reg Reg -- src1, src2, dst
 626               | FxTOy         MachRep MachRep Reg Reg -- src, dst
 627
 628 -- Jumping around.
 629               | BI            Cond Bool Imm -- cond, annul?, target
 630               | BF            Cond Bool Imm -- cond, annul?, target
 631
 632               | JMP           AddrMode     -- target
 633               | CALL          (Either Imm Reg) Int Bool -- target, args, terminal
 634
 635 riZero :: RI -> Bool
 636
 637 riZero (RIImm (ImmInt 0))           = True
 638 riZero (RIImm (ImmInteger 0))       = True
 639 riZero (RIReg (RealReg 0))          = True
 640 riZero _                            = False
 641
 642 -- Calculate the effective address which would be used by the
 643 -- corresponding fpRel sequence.  fpRel is in MachRegs.lhs,
 644 -- alas -- can't have fpRelEA here because of module dependencies.
 645 fpRelEA :: Int -> Reg -> Instr
 646 fpRelEA n dst
 647    = ADD False False fp (RIImm (ImmInt (n * wORD_SIZE))) dst
 648
 649 -- Code to shift the stack pointer by n words.
 650 moveSp :: Int -> Instr
 651 moveSp n
 652    = ADD False False sp (RIImm (ImmInt (n * wORD_SIZE))) sp
 653
 654 -- Produce the second-half-of-a-double register given the first half.
 655 fPair :: Reg -> Reg
 656 fPair (RealReg n) | n >= 32 && n `mod` 2 == 0  = RealReg (n+1)
 657 fPair other = pprPanic "fPair(sparc NCG)" (ppr other)
 658 #endif /* sparc_TARGET_ARCH */
 659
 660
 661 -- -----------------------------------------------------------------------------
 662 -- PowerPC instructions
 663
 664 #ifdef powerpc_TARGET_ARCH
 665 -- data Instr continues...
 666
 667 -- Loads and stores.
 668               | LD      MachRep Reg AddrMode -- Load size, dst, src
 669               | LA      MachRep Reg AddrMode -- Load arithmetic size, dst, src
 670               | ST      MachRep Reg AddrMode -- Store size, src, dst
 671               | STU     MachRep Reg AddrMode -- Store with Update size, src, dst
 672               | LIS     Reg Imm -- Load Immediate Shifted dst, src
 673               | LI      Reg Imm -- Load Immediate dst, src
 674               | MR      Reg Reg -- Move Register dst, src -- also for fmr
 675
 676               | CMP     MachRep Reg RI --- size, src1, src2
 677               | CMPL    MachRep Reg RI --- size, src1, src2
 678
 679               | BCC     Cond BlockId
 680               | BCCFAR  Cond BlockId
 681               | JMP     CLabel          -- same as branch,
 682                                         -- but with CLabel instead of block ID
 683               | MTCTR   Reg
 684               | BCTR    [BlockId]       -- with list of local destinations
 685               | BL      CLabel [Reg]    -- with list of argument regs
 686               | BCTRL   [Reg]
 687
 688               | ADD     Reg Reg RI -- dst, src1, src2
 689               | ADDC    Reg Reg Reg -- (carrying) dst, src1, src2
 690               | ADDE    Reg Reg Reg -- (extend) dst, src1, src2
 691               | ADDIS   Reg Reg Imm -- Add Immediate Shifted dst, src1, src2
 692               | SUBF    Reg Reg Reg -- dst, src1, src2 ; dst = src2 - src1
 693               | MULLW   Reg Reg RI
 694               | DIVW    Reg Reg Reg
 695               | DIVWU   Reg Reg Reg
 696
 697               | MULLW_MayOflo Reg Reg Reg
 698                         -- dst = 1 if src1 * src2 overflows
 699                         -- pseudo-instruction; pretty-printed as:
 700                         -- mullwo. dst, src1, src2
 701                         -- mfxer dst
 702                         -- rlwinm dst, dst, 2, 31,31
 703
 704               | AND     Reg Reg RI -- dst, src1, src2
 705               | OR      Reg Reg RI -- dst, src1, src2
 706               | XOR     Reg Reg RI -- dst, src1, src2
 707               | XORIS   Reg Reg Imm -- XOR Immediate Shifted dst, src1, src2
 708
 709               | EXTS    MachRep Reg Reg
 710
 711               | NEG     Reg Reg
 712               | NOT     Reg Reg
 713
 714               | SLW     Reg Reg RI      -- shift left word
 715               | SRW     Reg Reg RI      -- shift right word
 716               | SRAW    Reg Reg RI      -- shift right arithmetic word
 717
 718                         -- Rotate Left Word Immediate then AND with Mask
 719               | RLWINM  Reg Reg Int Int Int
 720
 721               | FADD    MachRep Reg Reg Reg
 722               | FSUB    MachRep Reg Reg Reg
 723               | FMUL    MachRep Reg Reg Reg
 724               | FDIV    MachRep Reg Reg Reg
 725               | FNEG    Reg Reg  -- negate is the same for single and double prec.
 726
 727               | FCMP    Reg Reg
 728
 729               | FCTIWZ  Reg Reg         -- convert to integer word
 730               | FRSP    Reg Reg         -- reduce to single precision
 731                                         -- (but destination is a FP register)
 732
 733               | CRNOR   Int Int Int    -- condition register nor
 734               | MFCR    Reg            -- move from condition register
 735
 736               | MFLR    Reg            -- move from link register
 737               | FETCHPC Reg            -- pseudo-instruction:
 738                                        -- bcl to next insn, mflr reg
 739
 740               | LWSYNC -- memory barrier
 741 #endif /* powerpc_TARGET_ARCH */