compiler/nativeGen/MachInstrs.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Machine-dependent assembly language
   4 --
   5 -- (c) The University of Glasgow 1993-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 #include "nativeGen/NCG.h"
  10
  11 module MachInstrs (
  12         -- * Cmm instantiations
  13         NatCmm, NatCmmTop, NatBasicBlock,
  14
  15         -- * Machine instructions
  16         Instr(..),
  17         Cond(..), condUnsigned, condToSigned, condToUnsigned,
  18 #if powerpc_TARGET_ARCH
  19         condNegate,
  20 #endif
  21 #if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH && !x86_64_TARGET_ARCH
  22         Size(..), machRepSize,
  23 #endif
  24         RI(..),
  25
  26 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  27         Operand(..),
  28 #endif
  29 #if i386_TARGET_ARCH
  30         i386_insert_ffrees,
  31 #endif
  32 #if sparc_TARGET_ARCH
  33         riZero, fpRelEA, moveSp, fPair,
  34 #endif
  35     ) where
  36
  37 #include "HsVersions.h"
  38
  39 import MachRegs
  40 import Cmm
  41 import MachOp           ( MachRep(..) )
  42 import CLabel           ( CLabel, pprCLabel )
  43 import Panic            ( panic )
  44 import Outputable
  45 import FastString
  46 import Constants       ( wORD_SIZE )
  47
  48 import GHC.Exts
  49
  50
  51 -- -----------------------------------------------------------------------------
  52 -- Our flavours of the Cmm types
  53
  54 -- Type synonyms for Cmm populated with native code
  55 type NatCmm        = GenCmm CmmStatic [CmmStatic] Instr
  56 type NatCmmTop     = GenCmmTop CmmStatic [CmmStatic] Instr
  57 type NatBasicBlock = GenBasicBlock Instr
  58
  59 -- -----------------------------------------------------------------------------
  60 -- Conditions on this architecture
  61
  62 data Cond
  63 #if alpha_TARGET_ARCH
  64   = ALWAYS      -- For BI (same as BR)
  65   | EQQ         -- For CMP and BI (NB: "EQ" is a 1.3 Prelude name)
  66   | GE          -- For BI only
  67   | GTT         -- For BI only (NB: "GT" is a 1.3 Prelude name)
  68   | LE          -- For CMP and BI
  69   | LTT         -- For CMP and BI (NB: "LT" is a 1.3 Prelude name)
  70   | NE          -- For BI only
  71   | NEVER       -- For BI (null instruction)
  72   | ULE         -- For CMP only
  73   | ULT         -- For CMP only
  74 #endif
  75 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  76   = ALWAYS      -- What's really used? ToDo
  77   | EQQ
  78   | GE
  79   | GEU
  80   | GTT
  81   | GU
  82   | LE
  83   | LEU
  84   | LTT
  85   | LU
  86   | NE
  87   | NEG
  88   | POS
  89   | CARRY
  90   | OFLO
  91   | PARITY
  92   | NOTPARITY
  93 #endif
  94 #if sparc_TARGET_ARCH
  95   = ALWAYS      -- What's really used? ToDo
  96   | EQQ
  97   | GE
  98   | GEU
  99   | GTT
 100   | GU
 101   | LE
 102   | LEU
 103   | LTT
 104   | LU
 105   | NE
 106   | NEG
 107   | NEVER
 108   | POS
 109   | VC
 110   | VS
 111 #endif
 112 #if powerpc_TARGET_ARCH
 113   = ALWAYS
 114   | EQQ
 115   | GE
 116   | GEU
 117   | GTT
 118   | GU
 119   | LE
 120   | LEU
 121   | LTT
 122   | LU
 123   | NE
 124 #endif
 125     deriving Eq  -- to make an assertion work
 126
 127 condUnsigned GU  = True
 128 condUnsigned LU  = True
 129 condUnsigned GEU = True
 130 condUnsigned LEU = True
 131 condUnsigned _   = False
 132
 133 condToSigned GU  = GTT
 134 condToSigned LU  = LTT
 135 condToSigned GEU = GE
 136 condToSigned LEU = LE
 137 condToSigned x   = x
 138
 139 condToUnsigned GTT = GU
 140 condToUnsigned LTT = LU
 141 condToUnsigned GE  = GEU
 142 condToUnsigned LE  = LEU
 143 condToUnsigned x   = x
 144
 145 #if powerpc_TARGET_ARCH
 146 condNegate ALWAYS  = panic "condNegate: ALWAYS"
 147 condNegate EQQ     = NE
 148 condNegate GE      = LTT
 149 condNegate GEU     = LU
 150 condNegate GTT     = LE
 151 condNegate GU      = LEU
 152 condNegate LE      = GTT
 153 condNegate LEU     = GU
 154 condNegate LTT     = GE
 155 condNegate LU      = GEU
 156 condNegate NE      = EQQ
 157 #endif
 158
 159 -- -----------------------------------------------------------------------------
 160 -- Sizes on this architecture
 161
 162 -- ToDo: it's not clear to me that we need separate signed-vs-unsigned sizes
 163 -- here.  I've removed them from the x86 version, we'll see what happens --SDM
 164
 165 #if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH && !x86_64_TARGET_ARCH
 166 data Size
 167 #if alpha_TARGET_ARCH
 168     = B     -- byte
 169     | Bu
 170 --  | W     -- word (2 bytes): UNUSED
 171 --  | Wu    -- : UNUSED
 172     | L     -- longword (4 bytes)
 173     | Q     -- quadword (8 bytes)
 174 --  | FF    -- VAX F-style floating pt: UNUSED
 175 --  | GF    -- VAX G-style floating pt: UNUSED
 176 --  | DF    -- VAX D-style floating pt: UNUSED
 177 --  | SF    -- IEEE single-precision floating pt: UNUSED
 178     | TF    -- IEEE double-precision floating pt
 179 #endif
 180 #if sparc_TARGET_ARCH || powerpc_TARGET_ARCH
 181     = B     -- byte (signed)
 182     | Bu    -- byte (unsigned)
 183     | H     -- halfword (signed, 2 bytes)
 184     | Hu    -- halfword (unsigned, 2 bytes)
 185     | W     -- word (4 bytes)
 186     | F     -- IEEE single-precision floating pt
 187     | DF    -- IEEE single-precision floating pt
 188 #endif
 189   deriving Eq
 190
 191 machRepSize :: MachRep -> Size
 192 machRepSize I8    = IF_ARCH_alpha(Bu, IF_ARCH_sparc(Bu, ))
 193 machRepSize I16   = IF_ARCH_alpha(err,IF_ARCH_sparc(Hu, ))
 194 machRepSize I32   = IF_ARCH_alpha(L,  IF_ARCH_sparc(W,  ))
 195 machRepSize I64   = panic "machRepSize: I64"
 196 machRepSize I128  = panic "machRepSize: I128"
 197 machRepSize F32   = IF_ARCH_alpha(TF, IF_ARCH_sparc(F, ))
 198 machRepSize F64   = IF_ARCH_alpha(TF, IF_ARCH_sparc(DF,))
 199 #endif
 200
 201 -- -----------------------------------------------------------------------------
 202 -- Register or immediate (a handy type on some platforms)
 203
 204 data RI = RIReg Reg
 205         | RIImm Imm
 206
 207
 208 -- -----------------------------------------------------------------------------
 209 -- Machine's assembly language
 210
 211 -- We have a few common "instructions" (nearly all the pseudo-ops) but
 212 -- mostly all of 'Instr' is machine-specific.
 213
 214 data Instr
 215   = COMMENT FastString          -- comment pseudo-op
 216
 217   | LDATA   Section [CmmStatic] -- some static data spat out during code
 218                                 -- generation.  Will be extracted before
 219                                 -- pretty-printing.
 220
 221   | NEWBLOCK BlockId            -- start a new basic block.  Useful during
 222                                 -- codegen, removed later.  Preceding
 223                                 -- instruction should be a jump, as per the
 224                                 -- invariants for a BasicBlock (see Cmm).
 225
 226   | DELTA   Int                 -- specify current stack offset for
 227                                 -- benefit of subsequent passes
 228
 229   | SPILL   Reg Int             -- ^ spill this reg to a stack slot
 230   | RELOAD  Int Reg             -- ^ reload this reg from a stack slot
 231
 232 -- -----------------------------------------------------------------------------
 233 -- Alpha instructions
 234
 235 #if alpha_TARGET_ARCH
 236
 237 -- data Instr continues...
 238
 239 -- Loads and stores.
 240               | LD            Size Reg AddrMode -- size, dst, src
 241               | LDA           Reg AddrMode      -- dst, src
 242               | LDAH          Reg AddrMode      -- dst, src
 243               | LDGP          Reg AddrMode      -- dst, src
 244               | LDI           Size Reg Imm     -- size, dst, src
 245               | ST            Size Reg AddrMode -- size, src, dst
 246
 247 -- Int Arithmetic.
 248               | CLR           Reg                   -- dst
 249               | ABS           Size RI Reg           -- size, src, dst
 250               | NEG           Size Bool RI Reg      -- size, overflow, src, dst
 251               | ADD           Size Bool Reg RI Reg  -- size, overflow, src, src, dst
 252               | SADD          Size Size Reg RI Reg  -- size, scale, src, src, dst
 253               | SUB           Size Bool Reg RI Reg  -- size, overflow, src, src, dst
 254               | SSUB          Size Size Reg RI Reg  -- size, scale, src, src, dst
 255               | MUL           Size Bool Reg RI Reg  -- size, overflow, src, src, dst
 256               | DIV           Size Bool Reg RI Reg  -- size, unsigned, src, src, dst
 257               | REM           Size Bool Reg RI Reg  -- size, unsigned, src, src, dst
 258
 259 -- Simple bit-twiddling.
 260               | NOT           RI Reg
 261               | AND           Reg RI Reg
 262               | ANDNOT        Reg RI Reg
 263               | OR            Reg RI Reg
 264               | ORNOT         Reg RI Reg
 265               | XOR           Reg RI Reg
 266               | XORNOT        Reg RI Reg
 267               | SLL           Reg RI Reg
 268               | SRL           Reg RI Reg
 269               | SRA           Reg RI Reg
 270
 271               | ZAP           Reg RI Reg
 272               | ZAPNOT        Reg RI Reg
 273
 274               | NOP
 275
 276 -- Comparison
 277               | CMP           Cond Reg RI Reg
 278
 279 -- Float Arithmetic.
 280               | FCLR          Reg
 281               | FABS          Reg Reg
 282               | FNEG          Size Reg Reg
 283               | FADD          Size Reg Reg Reg
 284               | FDIV          Size Reg Reg Reg
 285               | FMUL          Size Reg Reg Reg
 286               | FSUB          Size Reg Reg Reg
 287               | CVTxy         Size Size Reg Reg
 288               | FCMP          Size Cond Reg Reg Reg
 289               | FMOV          Reg Reg
 290
 291 -- Jumping around.
 292               | BI            Cond Reg Imm
 293               | BF            Cond Reg Imm
 294               | BR            Imm
 295               | JMP           Reg AddrMode Int
 296               | BSR           Imm Int
 297               | JSR           Reg AddrMode Int
 298
 299 -- Alpha-specific pseudo-ops.
 300               | FUNBEGIN CLabel
 301               | FUNEND CLabel
 302
 303 data RI
 304   = RIReg Reg
 305   | RIImm Imm
 306
 307 #endif /* alpha_TARGET_ARCH */
 308
 309
 310 -- -----------------------------------------------------------------------------
 311 -- Intel x86 instructions
 312
 313 {-
 314 Intel, in their infinite wisdom, selected a stack model for floating
 315 point registers on x86.  That might have made sense back in 1979 --
 316 nowadays we can see it for the nonsense it really is.  A stack model
 317 fits poorly with the existing nativeGen infrastructure, which assumes
 318 flat integer and FP register sets.  Prior to this commit, nativeGen
 319 could not generate correct x86 FP code -- to do so would have meant
 320 somehow working the register-stack paradigm into the register
 321 allocator and spiller, which sounds very difficult.
 322
 323 We have decided to cheat, and go for a simple fix which requires no
 324 infrastructure modifications, at the expense of generating ropey but
 325 correct FP code.  All notions of the x86 FP stack and its insns have
 326 been removed.  Instead, we pretend (to the instruction selector and
 327 register allocator) that x86 has six floating point registers, %fake0
 328 .. %fake5, which can be used in the usual flat manner.  We further
 329 claim that x86 has floating point instructions very similar to SPARC
 330 and Alpha, that is, a simple 3-operand register-register arrangement.
 331 Code generation and register allocation proceed on this basis.
 332
 333 When we come to print out the final assembly, our convenient fiction
 334 is converted to dismal reality.  Each fake instruction is
 335 independently converted to a series of real x86 instructions.
 336 %fake0 .. %fake5 are mapped to %st(0) .. %st(5).  To do reg-reg
 337 arithmetic operations, the two operands are pushed onto the top of the
 338 FP stack, the operation done, and the result copied back into the
 339 relevant register.  There are only six %fake registers because 2 are
 340 needed for the translation, and x86 has 8 in total.
 341
 342 The translation is inefficient but is simple and it works.  A cleverer
 343 translation would handle a sequence of insns, simulating the FP stack
 344 contents, would not impose a fixed mapping from %fake to %st regs, and
 345 hopefully could avoid most of the redundant reg-reg moves of the
 346 current translation.
 347
 348 We might as well make use of whatever unique FP facilities Intel have
 349 chosen to bless us with (let's not be churlish, after all).
 350 Hence GLDZ and GLD1.  Bwahahahahahahaha!
 351 -}
 352
 353 {-
 354 MORE FLOATING POINT MUSINGS...
 355
 356 Intel's internal floating point registers are by default 80 bit
 357 extended precision.  This means that all operations done on values in
 358 registers are done at 80 bits, and unless the intermediate values are
 359 truncated to the appropriate size (32 or 64 bits) by storing in
 360 memory, calculations in registers will give different results from
 361 calculations which pass intermediate values in memory (eg. via
 362 function calls).
 363
 364 One solution is to set the FPU into 64 bit precision mode.  Some OSs
 365 do this (eg. FreeBSD) and some don't (eg. Linux).  The problem here is
 366 that this will only affect 64-bit precision arithmetic; 32-bit
 367 calculations will still be done at 64-bit precision in registers.  So
 368 it doesn't solve the whole problem.
 369
 370 There's also the issue of what the C library is expecting in terms of
 371 precision.  It seems to be the case that glibc on Linux expects the
 372 FPU to be set to 80 bit precision, so setting it to 64 bit could have
 373 unexpected effects.  Changing the default could have undesirable
 374 effects on other 3rd-party library code too, so the right thing would
 375 be to save/restore the FPU control word across Haskell code if we were
 376 to do this.
 377
 378 gcc's -ffloat-store gives consistent results by always storing the
 379 results of floating-point calculations in memory, which works for both
 380 32 and 64-bit precision.  However, it only affects the values of
 381 user-declared floating point variables in C, not intermediate results.
 382 GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
 383 flag).
 384
 385 Another problem is how to spill floating point registers in the
 386 register allocator.  Should we spill the whole 80 bits, or just 64?
 387 On an OS which is set to 64 bit precision, spilling 64 is fine.  On
 388 Linux, spilling 64 bits will round the results of some operations.
 389 This is what gcc does.  Spilling at 80 bits requires taking up a full
 390 128 bit slot (so we get alignment).  We spill at 80-bits and ignore
 391 the alignment problems.
 392
 393 In the future, we'll use the SSE registers for floating point.  This
 394 requires a CPU that supports SSE2 (ordinary SSE only supports 32 bit
 395 precision float ops), which means P4 or Xeon and above.  Using SSE
 396 will solve all these problems, because the SSE registers use fixed 32
 397 bit or 64 bit precision.
 398
 399 --SDM 1/2003
 400 -}
 401
 402 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 403
 404 -- data Instr continues...
 405
 406 -- Moves.
 407         | MOV         MachRep Operand Operand
 408         | MOVZxL      MachRep Operand Operand -- size is the size of operand 1
 409         | MOVSxL      MachRep Operand Operand -- size is the size of operand 1
 410         -- x86_64 note: plain mov into a 32-bit register always zero-extends
 411         -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
 412         -- don't affect the high bits of the register.
 413
 414 -- Load effective address (also a very useful three-operand add instruction :-)
 415         | LEA         MachRep Operand Operand
 416
 417 -- Int Arithmetic.
 418         | ADD         MachRep Operand Operand
 419         | ADC         MachRep Operand Operand
 420         | SUB         MachRep Operand Operand
 421
 422         | MUL         MachRep Operand Operand
 423         | IMUL        MachRep Operand Operand   -- signed int mul
 424         | IMUL2       MachRep Operand -- %edx:%eax = operand * %eax
 425
 426         | DIV         MachRep Operand   -- eax := eax:edx/op, edx := eax:edx%op
 427         | IDIV        MachRep Operand   -- ditto, but signed
 428
 429 -- Simple bit-twiddling.
 430         | AND         MachRep Operand Operand
 431         | OR          MachRep Operand Operand
 432         | XOR         MachRep Operand Operand
 433         | NOT         MachRep Operand
 434         | NEGI        MachRep Operand -- NEG instruction (name clash with Cond)
 435
 436 -- Shifts (amount may be immediate or %cl only)
 437         | SHL         MachRep Operand{-amount-} Operand
 438         | SAR         MachRep Operand{-amount-} Operand
 439         | SHR         MachRep Operand{-amount-} Operand
 440
 441         | BT          MachRep Imm Operand
 442         | NOP
 443
 444 #if i386_TARGET_ARCH
 445 -- Float Arithmetic.
 446
 447 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
 448 -- as single instructions right up until we spit them out.
 449         -- all the 3-operand fake fp insns are src1 src2 dst
 450         -- and furthermore are constrained to be fp regs only.
 451         -- IMPORTANT: keep is_G_insn up to date with any changes here
 452         | GMOV        Reg Reg -- src(fpreg), dst(fpreg)
 453         | GLD         MachRep AddrMode Reg -- src, dst(fpreg)
 454         | GST         MachRep Reg AddrMode -- src(fpreg), dst
 455
 456         | GLDZ        Reg -- dst(fpreg)
 457         | GLD1        Reg -- dst(fpreg)
 458
 459         | GFTOI       Reg Reg -- src(fpreg), dst(intreg)
 460         | GDTOI       Reg Reg -- src(fpreg), dst(intreg)
 461
 462         | GITOF       Reg Reg -- src(intreg), dst(fpreg)
 463         | GITOD       Reg Reg -- src(intreg), dst(fpreg)
 464
 465         | GADD        MachRep Reg Reg Reg -- src1, src2, dst
 466         | GDIV        MachRep Reg Reg Reg -- src1, src2, dst
 467         | GSUB        MachRep Reg Reg Reg -- src1, src2, dst
 468         | GMUL        MachRep Reg Reg Reg -- src1, src2, dst
 469
 470                 -- FP compare.  Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT]
 471                 -- Compare src1 with src2; set the Zero flag iff the numbers are
 472                 -- comparable and the comparison is True.  Subsequent code must
 473                 -- test the %eflags zero flag regardless of the supplied Cond.
 474         | GCMP        Cond Reg Reg -- src1, src2
 475
 476         | GABS        MachRep Reg Reg -- src, dst
 477         | GNEG        MachRep Reg Reg -- src, dst
 478         | GSQRT       MachRep Reg Reg -- src, dst
 479         | GSIN        MachRep Reg Reg -- src, dst
 480         | GCOS        MachRep Reg Reg -- src, dst
 481         | GTAN        MachRep Reg Reg -- src, dst
 482
 483         | GFREE         -- do ffree on all x86 regs; an ugly hack
 484 #endif
 485
 486 #if x86_64_TARGET_ARCH
 487 -- SSE2 floating point: we use a restricted set of the available SSE2
 488 -- instructions for floating-point.
 489
 490         -- use MOV for moving (either movss or movsd (movlpd better?))
 491
 492         | CVTSS2SD      Reg Reg         -- F32 to F64
 493         | CVTSD2SS      Reg Reg         -- F64 to F32
 494         | CVTTSS2SIQ    Operand Reg     -- F32 to I32/I64 (with truncation)
 495         | CVTTSD2SIQ    Operand Reg     -- F64 to I32/I64 (with truncation)
 496         | CVTSI2SS      Operand Reg     -- I32/I64 to F32
 497         | CVTSI2SD      Operand Reg     -- I32/I64 to F64
 498
 499         -- use ADD & SUB for arithmetic.  In both cases, operands
 500         -- are  Operand Reg.
 501
 502         -- SSE2 floating-point division:
 503         | FDIV          MachRep Operand Operand   -- divisor, dividend(dst)
 504
 505         -- use CMP for comparisons.  ucomiss and ucomisd instructions
 506         -- compare single/double prec floating point respectively.
 507
 508         | SQRT          MachRep Operand Reg     -- src, dst
 509 #endif
 510
 511 -- Comparison
 512         | TEST          MachRep Operand Operand
 513         | CMP           MachRep Operand Operand
 514         | SETCC         Cond Operand
 515
 516 -- Stack Operations.
 517         | PUSH          MachRep Operand
 518         | POP           MachRep Operand
 519         -- both unused (SDM):
 520         --  | PUSHA
 521         --  | POPA
 522
 523 -- Jumping around.
 524         | JMP         Operand
 525         | JXX         Cond BlockId  -- includes unconditional branches
 526         | JXX_GBL     Cond Imm      -- non-local version of JXX
 527         | JMP_TBL     Operand [BlockId]  -- table jump
 528         | CALL        (Either Imm Reg) [Reg]
 529
 530 -- Other things.
 531         | CLTD MachRep   -- sign extend %eax into %edx:%eax
 532
 533         | FETCHGOT    Reg  -- pseudo-insn for ELF position-independent code
 534                            -- pretty-prints as
 535                            --       call 1f
 536                            -- 1:    popl %reg
 537                            --       addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
 538         | FETCHPC     Reg  -- pseudo-insn for Darwin position-independent code
 539                            -- pretty-prints as
 540                            --       call 1f
 541                            -- 1:    popl %reg
 542
 543
 544 data Operand
 545   = OpReg  Reg          -- register
 546   | OpImm  Imm          -- immediate value
 547   | OpAddr AddrMode     -- memory reference
 548
 549 #endif /* i386 or x86_64 */
 550
 551 #if i386_TARGET_ARCH
 552 i386_insert_ffrees :: [Instr] -> [Instr]
 553 i386_insert_ffrees insns
 554    | any is_G_instr insns
 555    = concatMap ffree_before_nonlocal_transfers insns
 556    | otherwise
 557    = insns
 558
 559 ffree_before_nonlocal_transfers insn
 560    = case insn of
 561         CALL _ _ -> [GFREE, insn]
 562         JMP _    -> [GFREE, insn]
 563         other    -> [insn]
 564
 565
 566 -- if you ever add a new FP insn to the fake x86 FP insn set,
 567 -- you must update this too
 568 is_G_instr :: Instr -> Bool
 569 is_G_instr instr
 570    = case instr of
 571         GMOV _ _ -> True; GLD _ _ _ -> True; GST _ _ _ -> True
 572         GLDZ _ -> True; GLD1 _ -> True
 573         GFTOI _ _ -> True; GDTOI _ _ -> True
 574         GITOF _ _ -> True; GITOD _ _ -> True
 575         GADD _ _ _ _ -> True; GDIV _ _ _ _ -> True
 576         GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
 577         GCMP _ _ _ -> True; GABS _ _ _ -> True
 578         GNEG _ _ _ -> True; GSQRT _ _ _ -> True
 579         GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True
 580         GFREE -> panic "is_G_instr: GFREE (!)"
 581         other -> False
 582 #endif /* i386_TARGET_ARCH */
 583
 584
 585 -- -----------------------------------------------------------------------------
 586 -- Sparc instructions
 587
 588 #if sparc_TARGET_ARCH
 589
 590 -- data Instr continues...
 591
 592 -- Loads and stores.
 593               | LD            MachRep AddrMode Reg -- size, src, dst
 594               | ST            MachRep Reg AddrMode -- size, src, dst
 595
 596 -- Int Arithmetic.
 597               | ADD           Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
 598               | SUB           Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
 599               | UMUL               Bool Reg RI Reg --     cc?, src1, src2, dst
 600               | SMUL               Bool Reg RI Reg --     cc?, src1, src2, dst
 601               | RDY           Reg       -- move contents of Y register to reg
 602
 603 -- Simple bit-twiddling.
 604               | AND           Bool Reg RI Reg -- cc?, src1, src2, dst
 605               | ANDN          Bool Reg RI Reg -- cc?, src1, src2, dst
 606               | OR            Bool Reg RI Reg -- cc?, src1, src2, dst
 607               | ORN           Bool Reg RI Reg -- cc?, src1, src2, dst
 608               | XOR           Bool Reg RI Reg -- cc?, src1, src2, dst
 609               | XNOR          Bool Reg RI Reg -- cc?, src1, src2, dst
 610               | SLL           Reg RI Reg -- src1, src2, dst
 611               | SRL           Reg RI Reg -- src1, src2, dst
 612               | SRA           Reg RI Reg -- src1, src2, dst
 613               | SETHI         Imm Reg -- src, dst
 614               | NOP           -- Really SETHI 0, %g0, but worth an alias
 615
 616 -- Float Arithmetic.
 617
 618 -- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single
 619 -- instructions right up until we spit them out.
 620               | FABS          MachRep Reg Reg      -- src dst
 621               | FADD          MachRep Reg Reg Reg  -- src1, src2, dst
 622               | FCMP          Bool MachRep Reg Reg -- exception?, src1, src2, dst
 623               | FDIV          MachRep Reg Reg Reg -- src1, src2, dst
 624               | FMOV          MachRep Reg Reg     -- src, dst
 625               | FMUL          MachRep Reg Reg Reg -- src1, src2, dst
 626               | FNEG          MachRep Reg Reg     -- src, dst
 627               | FSQRT         MachRep Reg Reg     -- src, dst
 628               | FSUB          MachRep Reg Reg Reg -- src1, src2, dst
 629               | FxTOy         MachRep MachRep Reg Reg -- src, dst
 630
 631 -- Jumping around.
 632               | BI            Cond Bool Imm -- cond, annul?, target
 633               | BF            Cond Bool Imm -- cond, annul?, target
 634
 635               | JMP           AddrMode     -- target
 636               | CALL          (Either Imm Reg) Int Bool -- target, args, terminal
 637
 638 riZero :: RI -> Bool
 639
 640 riZero (RIImm (ImmInt 0))           = True
 641 riZero (RIImm (ImmInteger 0))       = True
 642 riZero (RIReg (RealReg 0))          = True
 643 riZero _                            = False
 644
 645 -- Calculate the effective address which would be used by the
 646 -- corresponding fpRel sequence.  fpRel is in MachRegs.lhs,
 647 -- alas -- can't have fpRelEA here because of module dependencies.
 648 fpRelEA :: Int -> Reg -> Instr
 649 fpRelEA n dst
 650    = ADD False False fp (RIImm (ImmInt (n * wORD_SIZE))) dst
 651
 652 -- Code to shift the stack pointer by n words.
 653 moveSp :: Int -> Instr
 654 moveSp n
 655    = ADD False False sp (RIImm (ImmInt (n * wORD_SIZE))) sp
 656
 657 -- Produce the second-half-of-a-double register given the first half.
 658 fPair :: Reg -> Reg
 659 fPair (RealReg n) | n >= 32 && n `mod` 2 == 0  = RealReg (n+1)
 660 fPair other = pprPanic "fPair(sparc NCG)" (ppr other)
 661 #endif /* sparc_TARGET_ARCH */
 662
 663
 664 -- -----------------------------------------------------------------------------
 665 -- PowerPC instructions
 666
 667 #ifdef powerpc_TARGET_ARCH
 668 -- data Instr continues...
 669
 670 -- Loads and stores.
 671               | LD      MachRep Reg AddrMode -- Load size, dst, src
 672               | LA      MachRep Reg AddrMode -- Load arithmetic size, dst, src
 673               | ST      MachRep Reg AddrMode -- Store size, src, dst
 674               | STU     MachRep Reg AddrMode -- Store with Update size, src, dst
 675               | LIS     Reg Imm -- Load Immediate Shifted dst, src
 676               | LI      Reg Imm -- Load Immediate dst, src
 677               | MR      Reg Reg -- Move Register dst, src -- also for fmr
 678
 679               | CMP     MachRep Reg RI --- size, src1, src2
 680               | CMPL    MachRep Reg RI --- size, src1, src2
 681
 682               | BCC     Cond BlockId
 683               | BCCFAR  Cond BlockId
 684               | JMP     CLabel          -- same as branch,
 685                                         -- but with CLabel instead of block ID
 686               | MTCTR   Reg
 687               | BCTR    [BlockId]       -- with list of local destinations
 688               | BL      CLabel [Reg]    -- with list of argument regs
 689               | BCTRL   [Reg]
 690
 691               | ADD     Reg Reg RI -- dst, src1, src2
 692               | ADDC    Reg Reg Reg -- (carrying) dst, src1, src2
 693               | ADDE    Reg Reg Reg -- (extend) dst, src1, src2
 694               | ADDIS   Reg Reg Imm -- Add Immediate Shifted dst, src1, src2
 695               | SUBF    Reg Reg Reg -- dst, src1, src2 ; dst = src2 - src1
 696               | MULLW   Reg Reg RI
 697               | DIVW    Reg Reg Reg
 698               | DIVWU   Reg Reg Reg
 699
 700               | MULLW_MayOflo Reg Reg Reg
 701                         -- dst = 1 if src1 * src2 overflows
 702                         -- pseudo-instruction; pretty-printed as:
 703                         -- mullwo. dst, src1, src2
 704                         -- mfxer dst
 705                         -- rlwinm dst, dst, 2, 31,31
 706
 707               | AND     Reg Reg RI -- dst, src1, src2
 708               | OR      Reg Reg RI -- dst, src1, src2
 709               | XOR     Reg Reg RI -- dst, src1, src2
 710               | XORIS   Reg Reg Imm -- XOR Immediate Shifted dst, src1, src2
 711
 712               | EXTS    MachRep Reg Reg
 713
 714               | NEG     Reg Reg
 715               | NOT     Reg Reg
 716
 717               | SLW     Reg Reg RI      -- shift left word
 718               | SRW     Reg Reg RI      -- shift right word
 719               | SRAW    Reg Reg RI      -- shift right arithmetic word
 720
 721                         -- Rotate Left Word Immediate then AND with Mask
 722               | RLWINM  Reg Reg Int Int Int
 723
 724               | FADD    MachRep Reg Reg Reg
 725               | FSUB    MachRep Reg Reg Reg
 726               | FMUL    MachRep Reg Reg Reg
 727               | FDIV    MachRep Reg Reg Reg
 728               | FNEG    Reg Reg  -- negate is the same for single and double prec.
 729
 730               | FCMP    Reg Reg
 731
 732               | FCTIWZ  Reg Reg         -- convert to integer word
 733               | FRSP    Reg Reg         -- reduce to single precision
 734                                         -- (but destination is a FP register)
 735
 736               | CRNOR   Int Int Int    -- condition register nor
 737               | MFCR    Reg            -- move from condition register
 738
 739               | MFLR    Reg            -- move from link register
 740               | FETCHPC Reg            -- pseudo-instruction:
 741                                        -- bcl to next insn, mflr reg
 742
 743               | LWSYNC -- memory barrier
 744 #endif /* powerpc_TARGET_ARCH */