ghc/compiler/nativeGen/MachInstrs.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Machine-dependent assembly language
   4 --
   5 -- (c) The University of Glasgow 1993-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 #include "nativeGen/NCG.h"
  10
  11 module MachInstrs (
  12         -- * Cmm instantiations
  13         NatCmm, NatCmmTop, NatBasicBlock,
  14
  15         -- * Machine instructions
  16         Instr(..),
  17         Cond(..), condUnsigned, condToSigned, condToUnsigned,
  18
  19 #if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH && !x86_64_TARGET_ARCH
  20         Size(..), machRepSize,
  21 #endif
  22         RI(..),
  23
  24 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  25         Operand(..),
  26 #endif
  27 #if i386_TARGET_ARCH
  28         i386_insert_ffrees,
  29 #endif
  30 #if sparc_TARGET_ARCH
  31         riZero, fpRelEA, moveSp, fPair,
  32 #endif
  33     ) where
  34
  35 #include "HsVersions.h"
  36
  37 import MachRegs
  38 import Cmm
  39 import MachOp           ( MachRep(..) )
  40 import CLabel           ( CLabel, pprCLabel )
  41 import Panic            ( panic )
  42 import Outputable
  43 import FastString
  44
  45 import GLAEXTS
  46
  47
  48 -- -----------------------------------------------------------------------------
  49 -- Our flavours of the Cmm types
  50
  51 -- Type synonyms for Cmm populated with native code
  52 type NatCmm        = GenCmm CmmStatic Instr
  53 type NatCmmTop     = GenCmmTop CmmStatic Instr
  54 type NatBasicBlock = GenBasicBlock Instr
  55
  56 -- -----------------------------------------------------------------------------
  57 -- Conditions on this architecture
  58
  59 data Cond
  60 #if alpha_TARGET_ARCH
  61   = ALWAYS      -- For BI (same as BR)
  62   | EQQ         -- For CMP and BI (NB: "EQ" is a 1.3 Prelude name)
  63   | GE          -- For BI only
  64   | GTT         -- For BI only (NB: "GT" is a 1.3 Prelude name)
  65   | LE          -- For CMP and BI
  66   | LTT         -- For CMP and BI (NB: "LT" is a 1.3 Prelude name)
  67   | NE          -- For BI only
  68   | NEVER       -- For BI (null instruction)
  69   | ULE         -- For CMP only
  70   | ULT         -- For CMP only
  71 #endif
  72 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  73   = ALWAYS      -- What's really used? ToDo
  74   | EQQ
  75   | GE
  76   | GEU
  77   | GTT
  78   | GU
  79   | LE
  80   | LEU
  81   | LTT
  82   | LU
  83   | NE
  84   | NEG
  85   | POS
  86   | CARRY
  87   | OFLO
  88 #endif
  89 #if sparc_TARGET_ARCH
  90   = ALWAYS      -- What's really used? ToDo
  91   | EQQ
  92   | GE
  93   | GEU
  94   | GTT
  95   | GU
  96   | LE
  97   | LEU
  98   | LTT
  99   | LU
 100   | NE
 101   | NEG
 102   | NEVER
 103   | POS
 104   | VC
 105   | VS
 106 #endif
 107 #if powerpc_TARGET_ARCH
 108   = ALWAYS
 109   | EQQ
 110   | GE
 111   | GEU
 112   | GTT
 113   | GU
 114   | LE
 115   | LEU
 116   | LTT
 117   | LU
 118   | NE
 119 #endif
 120     deriving Eq  -- to make an assertion work
 121
 122 condUnsigned GU  = True
 123 condUnsigned LU  = True
 124 condUnsigned GEU = True
 125 condUnsigned LEU = True
 126 condUnsigned _   = False
 127
 128 condToSigned GU  = GTT
 129 condToSigned LU  = LTT
 130 condToSigned GEU = GE
 131 condToSigned LEU = LE
 132 condToSigned x   = x
 133
 134 condToUnsigned GTT = GU
 135 condToUnsigned LTT = LU
 136 condToUnsigned GE  = GEU
 137 condToUnsigned LE  = LEU
 138 condToUnsigned x   = x
 139
 140 -- -----------------------------------------------------------------------------
 141 -- Sizes on this architecture
 142
 143 -- ToDo: it's not clear to me that we need separate signed-vs-unsigned sizes
 144 -- here.  I've removed them from the x86 version, we'll see what happens --SDM
 145
 146 #if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH && !x86_64_TARGET_ARCH
 147 data Size
 148 #if alpha_TARGET_ARCH
 149     = B     -- byte
 150     | Bu
 151 --  | W     -- word (2 bytes): UNUSED
 152 --  | Wu    -- : UNUSED
 153     | L     -- longword (4 bytes)
 154     | Q     -- quadword (8 bytes)
 155 --  | FF    -- VAX F-style floating pt: UNUSED
 156 --  | GF    -- VAX G-style floating pt: UNUSED
 157 --  | DF    -- VAX D-style floating pt: UNUSED
 158 --  | SF    -- IEEE single-precision floating pt: UNUSED
 159     | TF    -- IEEE double-precision floating pt
 160 #endif
 161 #if sparc_TARGET_ARCH || powerpc_TARGET_ARCH
 162     = B     -- byte (signed)
 163     | Bu    -- byte (unsigned)
 164     | H     -- halfword (signed, 2 bytes)
 165     | Hu    -- halfword (unsigned, 2 bytes)
 166     | W     -- word (4 bytes)
 167     | F     -- IEEE single-precision floating pt
 168     | DF    -- IEEE single-precision floating pt
 169 #endif
 170   deriving Eq
 171
 172 machRepSize :: MachRep -> Size
 173 machRepSize I8    = IF_ARCH_alpha(Bu, IF_ARCH_sparc(Bu, ))
 174 machRepSize I16   = IF_ARCH_alpha(err,IF_ARCH_sparc(Hu, ))
 175 machRepSize I32   = IF_ARCH_alpha(L,  IF_ARCH_sparc(W,  ))
 176 machRepSize I64   = panic "machRepSize: I64"
 177 machRepSize I128  = panic "machRepSize: I128"
 178 machRepSize F32   = IF_ARCH_alpha(TF, IF_ARCH_sparc(F, ))
 179 machRepSize F64   = IF_ARCH_alpha(TF, IF_ARCH_sparc(DF,))
 180 #endif
 181
 182 -- -----------------------------------------------------------------------------
 183 -- Register or immediate (a handy type on some platforms)
 184
 185 data RI = RIReg Reg
 186         | RIImm Imm
 187
 188
 189 -- -----------------------------------------------------------------------------
 190 -- Machine's assembly language
 191
 192 -- We have a few common "instructions" (nearly all the pseudo-ops) but
 193 -- mostly all of 'Instr' is machine-specific.
 194
 195 data Instr
 196   = COMMENT FastString          -- comment pseudo-op
 197
 198   | LDATA   Section [CmmStatic] -- some static data spat out during code
 199                                 -- generation.  Will be extracted before
 200                                 -- pretty-printing.
 201
 202   | NEWBLOCK BlockId            -- start a new basic block.  Useful during
 203                                 -- codegen, removed later.  Preceding
 204                                 -- instruction should be a jump, as per the
 205                                 -- invariants for a BasicBlock (see Cmm).
 206
 207   | DELTA   Int                 -- specify current stack offset for
 208                                 -- benefit of subsequent passes
 209
 210 -- -----------------------------------------------------------------------------
 211 -- Alpha instructions
 212
 213 #if alpha_TARGET_ARCH
 214
 215 -- data Instr continues...
 216
 217 -- Loads and stores.
 218               | LD            Size Reg AddrMode -- size, dst, src
 219               | LDA           Reg AddrMode      -- dst, src
 220               | LDAH          Reg AddrMode      -- dst, src
 221               | LDGP          Reg AddrMode      -- dst, src
 222               | LDI           Size Reg Imm     -- size, dst, src
 223               | ST            Size Reg AddrMode -- size, src, dst
 224
 225 -- Int Arithmetic.
 226               | CLR           Reg                   -- dst
 227               | ABS           Size RI Reg           -- size, src, dst
 228               | NEG           Size Bool RI Reg      -- size, overflow, src, dst
 229               | ADD           Size Bool Reg RI Reg  -- size, overflow, src, src, dst
 230               | SADD          Size Size Reg RI Reg  -- size, scale, src, src, dst
 231               | SUB           Size Bool Reg RI Reg  -- size, overflow, src, src, dst
 232               | SSUB          Size Size Reg RI Reg  -- size, scale, src, src, dst
 233               | MUL           Size Bool Reg RI Reg  -- size, overflow, src, src, dst
 234               | DIV           Size Bool Reg RI Reg  -- size, unsigned, src, src, dst
 235               | REM           Size Bool Reg RI Reg  -- size, unsigned, src, src, dst
 236
 237 -- Simple bit-twiddling.
 238               | NOT           RI Reg
 239               | AND           Reg RI Reg
 240               | ANDNOT        Reg RI Reg
 241               | OR            Reg RI Reg
 242               | ORNOT         Reg RI Reg
 243               | XOR           Reg RI Reg
 244               | XORNOT        Reg RI Reg
 245               | SLL           Reg RI Reg
 246               | SRL           Reg RI Reg
 247               | SRA           Reg RI Reg
 248
 249               | ZAP           Reg RI Reg
 250               | ZAPNOT        Reg RI Reg
 251
 252               | NOP
 253
 254 -- Comparison
 255               | CMP           Cond Reg RI Reg
 256
 257 -- Float Arithmetic.
 258               | FCLR          Reg
 259               | FABS          Reg Reg
 260               | FNEG          Size Reg Reg
 261               | FADD          Size Reg Reg Reg
 262               | FDIV          Size Reg Reg Reg
 263               | FMUL          Size Reg Reg Reg
 264               | FSUB          Size Reg Reg Reg
 265               | CVTxy         Size Size Reg Reg
 266               | FCMP          Size Cond Reg Reg Reg
 267               | FMOV          Reg Reg
 268
 269 -- Jumping around.
 270               | BI            Cond Reg Imm
 271               | BF            Cond Reg Imm
 272               | BR            Imm
 273               | JMP           Reg AddrMode Int
 274               | BSR           Imm Int
 275               | JSR           Reg AddrMode Int
 276
 277 -- Alpha-specific pseudo-ops.
 278               | FUNBEGIN CLabel
 279               | FUNEND CLabel
 280
 281 data RI
 282   = RIReg Reg
 283   | RIImm Imm
 284
 285 #endif /* alpha_TARGET_ARCH */
 286
 287
 288 -- -----------------------------------------------------------------------------
 289 -- Intel x86 instructions
 290
 291 {-
 292 Intel, in their infinite wisdom, selected a stack model for floating
 293 point registers on x86.  That might have made sense back in 1979 --
 294 nowadays we can see it for the nonsense it really is.  A stack model
 295 fits poorly with the existing nativeGen infrastructure, which assumes
 296 flat integer and FP register sets.  Prior to this commit, nativeGen
 297 could not generate correct x86 FP code -- to do so would have meant
 298 somehow working the register-stack paradigm into the register
 299 allocator and spiller, which sounds very difficult.
 300
 301 We have decided to cheat, and go for a simple fix which requires no
 302 infrastructure modifications, at the expense of generating ropey but
 303 correct FP code.  All notions of the x86 FP stack and its insns have
 304 been removed.  Instead, we pretend (to the instruction selector and
 305 register allocator) that x86 has six floating point registers, %fake0
 306 .. %fake5, which can be used in the usual flat manner.  We further
 307 claim that x86 has floating point instructions very similar to SPARC
 308 and Alpha, that is, a simple 3-operand register-register arrangement.
 309 Code generation and register allocation proceed on this basis.
 310
 311 When we come to print out the final assembly, our convenient fiction
 312 is converted to dismal reality.  Each fake instruction is
 313 independently converted to a series of real x86 instructions.
 314 %fake0 .. %fake5 are mapped to %st(0) .. %st(5).  To do reg-reg
 315 arithmetic operations, the two operands are pushed onto the top of the
 316 FP stack, the operation done, and the result copied back into the
 317 relevant register.  There are only six %fake registers because 2 are
 318 needed for the translation, and x86 has 8 in total.
 319
 320 The translation is inefficient but is simple and it works.  A cleverer
 321 translation would handle a sequence of insns, simulating the FP stack
 322 contents, would not impose a fixed mapping from %fake to %st regs, and
 323 hopefully could avoid most of the redundant reg-reg moves of the
 324 current translation.
 325
 326 We might as well make use of whatever unique FP facilities Intel have
 327 chosen to bless us with (let's not be churlish, after all).
 328 Hence GLDZ and GLD1.  Bwahahahahahahaha!
 329 -}
 330
 331 {-
 332 MORE FLOATING POINT MUSINGS...
 333
 334 Intel's internal floating point registers are by default 80 bit
 335 extended precision.  This means that all operations done on values in
 336 registers are done at 80 bits, and unless the intermediate values are
 337 truncated to the appropriate size (32 or 64 bits) by storing in
 338 memory, calculations in registers will give different results from
 339 calculations which pass intermediate values in memory (eg. via
 340 function calls).
 341
 342 One solution is to set the FPU into 64 bit precision mode.  Some OSs
 343 do this (eg. FreeBSD) and some don't (eg. Linux).  The problem here is
 344 that this will only affect 64-bit precision arithmetic; 32-bit
 345 calculations will still be done at 64-bit precision in registers.  So
 346 it doesn't solve the whole problem.
 347
 348 There's also the issue of what the C library is expecting in terms of
 349 precision.  It seems to be the case that glibc on Linux expects the
 350 FPU to be set to 80 bit precision, so setting it to 64 bit could have
 351 unexpected effects.  Changing the default could have undesirable
 352 effects on other 3rd-party library code too, so the right thing would
 353 be to save/restore the FPU control word across Haskell code if we were
 354 to do this.
 355
 356 gcc's -ffloat-store gives consistent results by always storing the
 357 results of floating-point calculations in memory, which works for both
 358 32 and 64-bit precision.  However, it only affects the values of
 359 user-declared floating point variables in C, not intermediate results.
 360 GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
 361 flag).
 362
 363 Another problem is how to spill floating point registers in the
 364 register allocator.  Should we spill the whole 80 bits, or just 64?
 365 On an OS which is set to 64 bit precision, spilling 64 is fine.  On
 366 Linux, spilling 64 bits will round the results of some operations.
 367 This is what gcc does.  Spilling at 80 bits requires taking up a full
 368 128 bit slot (so we get alignment).  We spill at 80-bits and ignore
 369 the alignment problems.
 370
 371 In the future, we'll use the SSE registers for floating point.  This
 372 requires a CPU that supports SSE2 (ordinary SSE only supports 32 bit
 373 precision float ops), which means P4 or Xeon and above.  Using SSE
 374 will solve all these problems, because the SSE registers use fixed 32
 375 bit or 64 bit precision.
 376
 377 --SDM 1/2003
 378 -}
 379
 380 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 381
 382 -- data Instr continues...
 383
 384 -- Moves.
 385         | MOV         MachRep Operand Operand
 386         | MOVZxL      MachRep Operand Operand -- size is the size of operand 1
 387         | MOVSxL      MachRep Operand Operand -- size is the size of operand 1
 388         -- x86_64 note: plain mov into a 32-bit register always zero-extends
 389         -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
 390         -- don't affect the high bits of the register.
 391
 392 -- Load effective address (also a very useful three-operand add instruction :-)
 393         | LEA         MachRep Operand Operand
 394
 395 -- Int Arithmetic.
 396         | ADD         MachRep Operand Operand
 397         | ADC         MachRep Operand Operand
 398         | SUB         MachRep Operand Operand
 399
 400         | MUL         MachRep Operand Operand
 401         | IMUL        MachRep Operand Operand   -- signed int mul
 402         | IMUL64      Reg Reg
 403         -- operand1:operand2 := (operand1[31:0] *signed operand2[31:0])
 404
 405         | DIV         MachRep Operand   -- eax := eax:edx/op, edx := eax:edx%op
 406         | IDIV        MachRep Operand   -- ditto, but signed
 407
 408 -- Simple bit-twiddling.
 409         | AND         MachRep Operand Operand
 410         | OR          MachRep Operand Operand
 411         | XOR         MachRep Operand Operand
 412         | NOT         MachRep Operand
 413         | NEGI        MachRep Operand -- NEG instruction (name clash with Cond)
 414
 415 -- Shifts (amount may be immediate or %cl only)
 416         | SHL         MachRep Operand{-amount-} Operand
 417         | SAR         MachRep Operand{-amount-} Operand
 418         | SHR         MachRep Operand{-amount-} Operand
 419
 420         | BT          MachRep Imm Operand
 421         | NOP
 422
 423 #if i386_TARGET_ARCH
 424 -- Float Arithmetic.
 425
 426 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
 427 -- as single instructions right up until we spit them out.
 428         -- all the 3-operand fake fp insns are src1 src2 dst
 429         -- and furthermore are constrained to be fp regs only.
 430         -- IMPORTANT: keep is_G_insn up to date with any changes here
 431         | GMOV        Reg Reg -- src(fpreg), dst(fpreg)
 432         | GLD         MachRep AddrMode Reg -- src, dst(fpreg)
 433         | GST         MachRep Reg AddrMode -- src(fpreg), dst
 434
 435         | GLDZ        Reg -- dst(fpreg)
 436         | GLD1        Reg -- dst(fpreg)
 437
 438         | GFTOI       Reg Reg -- src(fpreg), dst(intreg)
 439         | GDTOI       Reg Reg -- src(fpreg), dst(intreg)
 440
 441         | GITOF       Reg Reg -- src(intreg), dst(fpreg)
 442         | GITOD       Reg Reg -- src(intreg), dst(fpreg)
 443
 444         | GADD        MachRep Reg Reg Reg -- src1, src2, dst
 445         | GDIV        MachRep Reg Reg Reg -- src1, src2, dst
 446         | GSUB        MachRep Reg Reg Reg -- src1, src2, dst
 447         | GMUL        MachRep Reg Reg Reg -- src1, src2, dst
 448
 449                 -- FP compare.  Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT]
 450                 -- Compare src1 with src2; set the Zero flag iff the numbers are
 451                 -- comparable and the comparison is True.  Subsequent code must
 452                 -- test the %eflags zero flag regardless of the supplied Cond.
 453         | GCMP        Cond Reg Reg -- src1, src2
 454
 455         | GABS        MachRep Reg Reg -- src, dst
 456         | GNEG        MachRep Reg Reg -- src, dst
 457         | GSQRT       MachRep Reg Reg -- src, dst
 458         | GSIN        MachRep Reg Reg -- src, dst
 459         | GCOS        MachRep Reg Reg -- src, dst
 460         | GTAN        MachRep Reg Reg -- src, dst
 461
 462         | GFREE         -- do ffree on all x86 regs; an ugly hack
 463 #endif
 464
 465 #if x86_64_TARGET_ARCH
 466 -- SSE2 floating point: we use a restricted set of the available SSE2
 467 -- instructions for floating-point.
 468
 469         -- use MOV for moving (either movss or movsd (movlpd better?))
 470
 471         | CVTSS2SD      Reg Reg         -- F32 to F64
 472         | CVTSD2SS      Reg Reg         -- F64 to F32
 473         | CVTSS2SI      Operand Reg     -- F32 to I32/I64 (with rounding)
 474         | CVTSD2SI      Operand Reg     -- F64 to I32/I64 (with rounding)
 475         | CVTSI2SS      Operand Reg     -- I32/I64 to F32
 476         | CVTSI2SD      Operand Reg     -- I32/I64 to F64
 477
 478         -- use ADD & SUB for arithmetic.  In both cases, operands
 479         -- are  Operand Reg.
 480
 481         -- SSE2 floating-point division:
 482         | FDIV          MachRep Operand Operand   -- divisor, dividend(dst)
 483
 484         -- use CMP for comparisons.  ucomiss and ucomisd instructions
 485         -- compare single/double prec floating point respectively.
 486
 487         | SQRT          MachRep Operand Reg     -- src, dst
 488 #endif
 489
 490 -- Comparison
 491         | TEST          MachRep Operand Operand
 492         | CMP           MachRep Operand Operand
 493         | SETCC         Cond Operand
 494
 495 -- Stack Operations.
 496         | PUSH          MachRep Operand
 497         | POP           MachRep Operand
 498         -- both unused (SDM):
 499         --  | PUSHA
 500         --  | POPA
 501
 502 -- Jumping around.
 503         | JMP         Operand
 504         | JXX         Cond BlockId  -- includes unconditional branches
 505         | JMP_TBL     Operand [BlockId]  -- table jump
 506         | CALL        (Either Imm Reg)
 507
 508 -- Other things.
 509         | CLTD MachRep   -- sign extend %eax into %edx:%eax
 510
 511         | FETCHGOT    Reg  -- pseudo-insn for position-independent code
 512                            -- pretty-prints as
 513                            --       call 1f
 514                            -- 1:    popl %reg
 515                            --       addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
 516
 517 data Operand
 518   = OpReg  Reg          -- register
 519   | OpImm  Imm          -- immediate value
 520   | OpAddr AddrMode     -- memory reference
 521
 522 #endif /* i386 or x86_64 */
 523
 524 #if i386_TARGET_ARCH
 525 i386_insert_ffrees :: [Instr] -> [Instr]
 526 i386_insert_ffrees insns
 527    | any is_G_instr insns
 528    = concatMap ffree_before_nonlocal_transfers insns
 529    | otherwise
 530    = insns
 531
 532 ffree_before_nonlocal_transfers insn
 533    = case insn of
 534         CALL _  -> [GFREE, insn]
 535         JMP _   -> [GFREE, insn]
 536         other   -> [insn]
 537
 538
 539 -- if you ever add a new FP insn to the fake x86 FP insn set,
 540 -- you must update this too
 541 is_G_instr :: Instr -> Bool
 542 is_G_instr instr
 543    = case instr of
 544         GMOV _ _ -> True; GLD _ _ _ -> True; GST _ _ _ -> True
 545         GLDZ _ -> True; GLD1 _ -> True
 546         GFTOI _ _ -> True; GDTOI _ _ -> True
 547         GITOF _ _ -> True; GITOD _ _ -> True
 548         GADD _ _ _ _ -> True; GDIV _ _ _ _ -> True
 549         GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
 550         GCMP _ _ _ -> True; GABS _ _ _ -> True
 551         GNEG _ _ _ -> True; GSQRT _ _ _ -> True
 552         GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True
 553         GFREE -> panic "is_G_instr: GFREE (!)"
 554         other -> False
 555 #endif /* i386_TARGET_ARCH */
 556
 557
 558 -- -----------------------------------------------------------------------------
 559 -- Sparc instructions
 560
 561 #if sparc_TARGET_ARCH
 562
 563 -- data Instr continues...
 564
 565 -- Loads and stores.
 566               | LD            MachRep AddrMode Reg -- size, src, dst
 567               | ST            MachRep Reg AddrMode -- size, src, dst
 568
 569 -- Int Arithmetic.
 570               | ADD           Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
 571               | SUB           Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
 572               | UMUL               Bool Reg RI Reg --     cc?, src1, src2, dst
 573               | SMUL               Bool Reg RI Reg --     cc?, src1, src2, dst
 574               | RDY           Reg       -- move contents of Y register to reg
 575
 576 -- Simple bit-twiddling.
 577               | AND           Bool Reg RI Reg -- cc?, src1, src2, dst
 578               | ANDN          Bool Reg RI Reg -- cc?, src1, src2, dst
 579               | OR            Bool Reg RI Reg -- cc?, src1, src2, dst
 580               | ORN           Bool Reg RI Reg -- cc?, src1, src2, dst
 581               | XOR           Bool Reg RI Reg -- cc?, src1, src2, dst
 582               | XNOR          Bool Reg RI Reg -- cc?, src1, src2, dst
 583               | SLL           Reg RI Reg -- src1, src2, dst
 584               | SRL           Reg RI Reg -- src1, src2, dst
 585               | SRA           Reg RI Reg -- src1, src2, dst
 586               | SETHI         Imm Reg -- src, dst
 587               | NOP           -- Really SETHI 0, %g0, but worth an alias
 588
 589 -- Float Arithmetic.
 590
 591 -- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single
 592 -- instructions right up until we spit them out.
 593               | FABS          MachRep Reg Reg      -- src dst
 594               | FADD          MachRep Reg Reg Reg  -- src1, src2, dst
 595               | FCMP          Bool MachRep Reg Reg -- exception?, src1, src2, dst
 596               | FDIV          MachRep Reg Reg Reg -- src1, src2, dst
 597               | FMOV          MachRep Reg Reg     -- src, dst
 598               | FMUL          MachRep Reg Reg Reg -- src1, src2, dst
 599               | FNEG          MachRep Reg Reg     -- src, dst
 600               | FSQRT         MachRep Reg Reg     -- src, dst
 601               | FSUB          MachRep Reg Reg Reg -- src1, src2, dst
 602               | FxTOy         MachRep MachRep Reg Reg -- src, dst
 603
 604 -- Jumping around.
 605               | BI            Cond Bool Imm -- cond, annul?, target
 606               | BF            Cond Bool Imm -- cond, annul?, target
 607
 608               | JMP           DestInfo AddrMode      -- target
 609               | CALL          (Either Imm Reg) Int Bool -- target, args, terminal
 610
 611 data RI = RIReg Reg
 612         | RIImm Imm
 613
 614 riZero :: RI -> Bool
 615
 616 riZero (RIImm (ImmInt 0))           = True
 617 riZero (RIImm (ImmInteger 0))       = True
 618 riZero (RIReg (RealReg 0))          = True
 619 riZero _                            = False
 620
 621 -- Calculate the effective address which would be used by the
 622 -- corresponding fpRel sequence.  fpRel is in MachRegs.lhs,
 623 -- alas -- can't have fpRelEA here because of module dependencies.
 624 fpRelEA :: Int -> Reg -> Instr
 625 fpRelEA n dst
 626    = ADD False False fp (RIImm (ImmInt (n * BYTES_PER_WORD))) dst
 627
 628 -- Code to shift the stack pointer by n words.
 629 moveSp :: Int -> Instr
 630 moveSp n
 631    = ADD False False sp (RIImm (ImmInt (n * BYTES_PER_WORD))) sp
 632
 633 -- Produce the second-half-of-a-double register given the first half.
 634 fPair :: Reg -> Reg
 635 fPair (RealReg n) | n >= 32 && n `mod` 2 == 0  = RealReg (n+1)
 636 fPair other = pprPanic "fPair(sparc NCG)" (ppr other)
 637 #endif /* sparc_TARGET_ARCH */
 638
 639
 640 -- -----------------------------------------------------------------------------
 641 -- PowerPC instructions
 642
 643 #ifdef powerpc_TARGET_ARCH
 644 -- data Instr continues...
 645
 646 -- Loads and stores.
 647               | LD      MachRep Reg AddrMode -- Load size, dst, src
 648               | LA      MachRep Reg AddrMode -- Load arithmetic size, dst, src
 649               | ST      MachRep Reg AddrMode -- Store size, src, dst
 650               | STU     MachRep Reg AddrMode -- Store with Update size, src, dst
 651               | LIS     Reg Imm -- Load Immediate Shifted dst, src
 652               | LI      Reg Imm -- Load Immediate dst, src
 653               | MR      Reg Reg -- Move Register dst, src -- also for fmr
 654
 655               | CMP     MachRep Reg RI --- size, src1, src2
 656               | CMPL    MachRep Reg RI --- size, src1, src2
 657
 658               | BCC     Cond BlockId
 659               | JMP     CLabel          -- same as branch,
 660                                         -- but with CLabel instead of block ID
 661               | MTCTR   Reg
 662               | BCTR    [BlockId]       -- with list of local destinations
 663               | BL      CLabel [Reg]    -- with list of argument regs
 664               | BCTRL   [Reg]
 665
 666               | ADD     Reg Reg RI -- dst, src1, src2
 667               | ADDC    Reg Reg Reg -- (carrying) dst, src1, src2
 668               | ADDE    Reg Reg Reg -- (extend) dst, src1, src2
 669               | ADDIS   Reg Reg Imm -- Add Immediate Shifted dst, src1, src2
 670               | SUBF    Reg Reg Reg -- dst, src1, src2 ; dst = src2 - src1
 671               | MULLW   Reg Reg RI
 672               | DIVW    Reg Reg Reg
 673               | DIVWU   Reg Reg Reg
 674
 675               | MULLW_MayOflo Reg Reg Reg
 676                         -- dst = 1 if src1 * src2 overflows
 677                         -- pseudo-instruction; pretty-printed as:
 678                         -- mullwo. dst, src1, src2
 679                         -- mfxer dst
 680                         -- rlwinm dst, dst, 2, 31,31
 681
 682               | AND     Reg Reg RI -- dst, src1, src2
 683               | OR      Reg Reg RI -- dst, src1, src2
 684               | XOR     Reg Reg RI -- dst, src1, src2
 685               | XORIS   Reg Reg Imm -- XOR Immediate Shifted dst, src1, src2
 686
 687               | EXTS    MachRep Reg Reg
 688
 689               | NEG     Reg Reg
 690               | NOT     Reg Reg
 691
 692               | SLW     Reg Reg RI      -- shift left word
 693               | SRW     Reg Reg RI      -- shift right word
 694               | SRAW    Reg Reg RI      -- shift right arithmetic word
 695
 696                         -- Rotate Left Word Immediate then AND with Mask
 697               | RLWINM  Reg Reg Int Int Int
 698
 699               | FADD    MachRep Reg Reg Reg
 700               | FSUB    MachRep Reg Reg Reg
 701               | FMUL    MachRep Reg Reg Reg
 702               | FDIV    MachRep Reg Reg Reg
 703               | FNEG    Reg Reg  -- negate is the same for single and double prec.
 704
 705               | FCMP    Reg Reg
 706
 707               | FCTIWZ  Reg Reg         -- convert to integer word
 708               | FRSP    Reg Reg         -- reduce to single precision
 709                                         -- (but destination is a FP register)
 710
 711               | CRNOR   Int Int Int    -- condition register nor
 712               | MFCR    Reg            -- move from condition register
 713
 714               | MFLR    Reg            -- move from link register
 715               | FETCHPC Reg            -- pseudo-instruction:
 716                                        -- bcl to next insn, mflr reg
 717
 718 #endif /* powerpc_TARGET_ARCH */