compiler/nativeGen/MachInstrs.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Machine-dependent assembly language
   4 --
   5 -- (c) The University of Glasgow 1993-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 #include "nativeGen/NCG.h"
  10
  11 module MachInstrs (
  12         -- * Cmm instantiations
  13         NatCmm, NatCmmTop, NatBasicBlock,
  14
  15         -- * Machine instructions
  16         Instr(..),
  17         Cond(..), condUnsigned, condToSigned, condToUnsigned,
  18
  19 #if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH && !x86_64_TARGET_ARCH
  20         Size(..), machRepSize,
  21 #endif
  22         RI(..),
  23
  24 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  25         Operand(..),
  26 #endif
  27 #if i386_TARGET_ARCH
  28         i386_insert_ffrees,
  29 #endif
  30 #if sparc_TARGET_ARCH
  31         riZero, fpRelEA, moveSp, fPair,
  32 #endif
  33     ) where
  34
  35 #include "HsVersions.h"
  36
  37 import MachRegs
  38 import Cmm
  39 import MachOp           ( MachRep(..) )
  40 import CLabel           ( CLabel, pprCLabel )
  41 import Panic            ( panic )
  42 import Outputable
  43 import FastString
  44 import Constants       ( wORD_SIZE )
  45
  46 import GLAEXTS
  47
  48
  49 -- -----------------------------------------------------------------------------
  50 -- Our flavours of the Cmm types
  51
  52 -- Type synonyms for Cmm populated with native code
  53 type NatCmm        = GenCmm CmmStatic Instr
  54 type NatCmmTop     = GenCmmTop CmmStatic Instr
  55 type NatBasicBlock = GenBasicBlock Instr
  56
  57 -- -----------------------------------------------------------------------------
  58 -- Conditions on this architecture
  59
  60 data Cond
  61 #if alpha_TARGET_ARCH
  62   = ALWAYS      -- For BI (same as BR)
  63   | EQQ         -- For CMP and BI (NB: "EQ" is a 1.3 Prelude name)
  64   | GE          -- For BI only
  65   | GTT         -- For BI only (NB: "GT" is a 1.3 Prelude name)
  66   | LE          -- For CMP and BI
  67   | LTT         -- For CMP and BI (NB: "LT" is a 1.3 Prelude name)
  68   | NE          -- For BI only
  69   | NEVER       -- For BI (null instruction)
  70   | ULE         -- For CMP only
  71   | ULT         -- For CMP only
  72 #endif
  73 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  74   = ALWAYS      -- What's really used? ToDo
  75   | EQQ
  76   | GE
  77   | GEU
  78   | GTT
  79   | GU
  80   | LE
  81   | LEU
  82   | LTT
  83   | LU
  84   | NE
  85   | NEG
  86   | POS
  87   | CARRY
  88   | OFLO
  89   | PARITY
  90   | NOTPARITY
  91 #endif
  92 #if sparc_TARGET_ARCH
  93   = ALWAYS      -- What's really used? ToDo
  94   | EQQ
  95   | GE
  96   | GEU
  97   | GTT
  98   | GU
  99   | LE
 100   | LEU
 101   | LTT
 102   | LU
 103   | NE
 104   | NEG
 105   | NEVER
 106   | POS
 107   | VC
 108   | VS
 109 #endif
 110 #if powerpc_TARGET_ARCH
 111   = ALWAYS
 112   | EQQ
 113   | GE
 114   | GEU
 115   | GTT
 116   | GU
 117   | LE
 118   | LEU
 119   | LTT
 120   | LU
 121   | NE
 122 #endif
 123     deriving Eq  -- to make an assertion work
 124
 125 condUnsigned GU  = True
 126 condUnsigned LU  = True
 127 condUnsigned GEU = True
 128 condUnsigned LEU = True
 129 condUnsigned _   = False
 130
 131 condToSigned GU  = GTT
 132 condToSigned LU  = LTT
 133 condToSigned GEU = GE
 134 condToSigned LEU = LE
 135 condToSigned x   = x
 136
 137 condToUnsigned GTT = GU
 138 condToUnsigned LTT = LU
 139 condToUnsigned GE  = GEU
 140 condToUnsigned LE  = LEU
 141 condToUnsigned x   = x
 142
 143 -- -----------------------------------------------------------------------------
 144 -- Sizes on this architecture
 145
 146 -- ToDo: it's not clear to me that we need separate signed-vs-unsigned sizes
 147 -- here.  I've removed them from the x86 version, we'll see what happens --SDM
 148
 149 #if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH && !x86_64_TARGET_ARCH
 150 data Size
 151 #if alpha_TARGET_ARCH
 152     = B     -- byte
 153     | Bu
 154 --  | W     -- word (2 bytes): UNUSED
 155 --  | Wu    -- : UNUSED
 156     | L     -- longword (4 bytes)
 157     | Q     -- quadword (8 bytes)
 158 --  | FF    -- VAX F-style floating pt: UNUSED
 159 --  | GF    -- VAX G-style floating pt: UNUSED
 160 --  | DF    -- VAX D-style floating pt: UNUSED
 161 --  | SF    -- IEEE single-precision floating pt: UNUSED
 162     | TF    -- IEEE double-precision floating pt
 163 #endif
 164 #if sparc_TARGET_ARCH || powerpc_TARGET_ARCH
 165     = B     -- byte (signed)
 166     | Bu    -- byte (unsigned)
 167     | H     -- halfword (signed, 2 bytes)
 168     | Hu    -- halfword (unsigned, 2 bytes)
 169     | W     -- word (4 bytes)
 170     | F     -- IEEE single-precision floating pt
 171     | DF    -- IEEE single-precision floating pt
 172 #endif
 173   deriving Eq
 174
 175 machRepSize :: MachRep -> Size
 176 machRepSize I8    = IF_ARCH_alpha(Bu, IF_ARCH_sparc(Bu, ))
 177 machRepSize I16   = IF_ARCH_alpha(err,IF_ARCH_sparc(Hu, ))
 178 machRepSize I32   = IF_ARCH_alpha(L,  IF_ARCH_sparc(W,  ))
 179 machRepSize I64   = panic "machRepSize: I64"
 180 machRepSize I128  = panic "machRepSize: I128"
 181 machRepSize F32   = IF_ARCH_alpha(TF, IF_ARCH_sparc(F, ))
 182 machRepSize F64   = IF_ARCH_alpha(TF, IF_ARCH_sparc(DF,))
 183 #endif
 184
 185 -- -----------------------------------------------------------------------------
 186 -- Register or immediate (a handy type on some platforms)
 187
 188 data RI = RIReg Reg
 189         | RIImm Imm
 190
 191
 192 -- -----------------------------------------------------------------------------
 193 -- Machine's assembly language
 194
 195 -- We have a few common "instructions" (nearly all the pseudo-ops) but
 196 -- mostly all of 'Instr' is machine-specific.
 197
 198 data Instr
 199   = COMMENT FastString          -- comment pseudo-op
 200
 201   | LDATA   Section [CmmStatic] -- some static data spat out during code
 202                                 -- generation.  Will be extracted before
 203                                 -- pretty-printing.
 204
 205   | NEWBLOCK BlockId            -- start a new basic block.  Useful during
 206                                 -- codegen, removed later.  Preceding
 207                                 -- instruction should be a jump, as per the
 208                                 -- invariants for a BasicBlock (see Cmm).
 209
 210   | DELTA   Int                 -- specify current stack offset for
 211                                 -- benefit of subsequent passes
 212
 213 -- -----------------------------------------------------------------------------
 214 -- Alpha instructions
 215
 216 #if alpha_TARGET_ARCH
 217
 218 -- data Instr continues...
 219
 220 -- Loads and stores.
 221               | LD            Size Reg AddrMode -- size, dst, src
 222               | LDA           Reg AddrMode      -- dst, src
 223               | LDAH          Reg AddrMode      -- dst, src
 224               | LDGP          Reg AddrMode      -- dst, src
 225               | LDI           Size Reg Imm     -- size, dst, src
 226               | ST            Size Reg AddrMode -- size, src, dst
 227
 228 -- Int Arithmetic.
 229               | CLR           Reg                   -- dst
 230               | ABS           Size RI Reg           -- size, src, dst
 231               | NEG           Size Bool RI Reg      -- size, overflow, src, dst
 232               | ADD           Size Bool Reg RI Reg  -- size, overflow, src, src, dst
 233               | SADD          Size Size Reg RI Reg  -- size, scale, src, src, dst
 234               | SUB           Size Bool Reg RI Reg  -- size, overflow, src, src, dst
 235               | SSUB          Size Size Reg RI Reg  -- size, scale, src, src, dst
 236               | MUL           Size Bool Reg RI Reg  -- size, overflow, src, src, dst
 237               | DIV           Size Bool Reg RI Reg  -- size, unsigned, src, src, dst
 238               | REM           Size Bool Reg RI Reg  -- size, unsigned, src, src, dst
 239
 240 -- Simple bit-twiddling.
 241               | NOT           RI Reg
 242               | AND           Reg RI Reg
 243               | ANDNOT        Reg RI Reg
 244               | OR            Reg RI Reg
 245               | ORNOT         Reg RI Reg
 246               | XOR           Reg RI Reg
 247               | XORNOT        Reg RI Reg
 248               | SLL           Reg RI Reg
 249               | SRL           Reg RI Reg
 250               | SRA           Reg RI Reg
 251
 252               | ZAP           Reg RI Reg
 253               | ZAPNOT        Reg RI Reg
 254
 255               | NOP
 256
 257 -- Comparison
 258               | CMP           Cond Reg RI Reg
 259
 260 -- Float Arithmetic.
 261               | FCLR          Reg
 262               | FABS          Reg Reg
 263               | FNEG          Size Reg Reg
 264               | FADD          Size Reg Reg Reg
 265               | FDIV          Size Reg Reg Reg
 266               | FMUL          Size Reg Reg Reg
 267               | FSUB          Size Reg Reg Reg
 268               | CVTxy         Size Size Reg Reg
 269               | FCMP          Size Cond Reg Reg Reg
 270               | FMOV          Reg Reg
 271
 272 -- Jumping around.
 273               | BI            Cond Reg Imm
 274               | BF            Cond Reg Imm
 275               | BR            Imm
 276               | JMP           Reg AddrMode Int
 277               | BSR           Imm Int
 278               | JSR           Reg AddrMode Int
 279
 280 -- Alpha-specific pseudo-ops.
 281               | FUNBEGIN CLabel
 282               | FUNEND CLabel
 283
 284 data RI
 285   = RIReg Reg
 286   | RIImm Imm
 287
 288 #endif /* alpha_TARGET_ARCH */
 289
 290
 291 -- -----------------------------------------------------------------------------
 292 -- Intel x86 instructions
 293
 294 {-
 295 Intel, in their infinite wisdom, selected a stack model for floating
 296 point registers on x86.  That might have made sense back in 1979 --
 297 nowadays we can see it for the nonsense it really is.  A stack model
 298 fits poorly with the existing nativeGen infrastructure, which assumes
 299 flat integer and FP register sets.  Prior to this commit, nativeGen
 300 could not generate correct x86 FP code -- to do so would have meant
 301 somehow working the register-stack paradigm into the register
 302 allocator and spiller, which sounds very difficult.
 303
 304 We have decided to cheat, and go for a simple fix which requires no
 305 infrastructure modifications, at the expense of generating ropey but
 306 correct FP code.  All notions of the x86 FP stack and its insns have
 307 been removed.  Instead, we pretend (to the instruction selector and
 308 register allocator) that x86 has six floating point registers, %fake0
 309 .. %fake5, which can be used in the usual flat manner.  We further
 310 claim that x86 has floating point instructions very similar to SPARC
 311 and Alpha, that is, a simple 3-operand register-register arrangement.
 312 Code generation and register allocation proceed on this basis.
 313
 314 When we come to print out the final assembly, our convenient fiction
 315 is converted to dismal reality.  Each fake instruction is
 316 independently converted to a series of real x86 instructions.
 317 %fake0 .. %fake5 are mapped to %st(0) .. %st(5).  To do reg-reg
 318 arithmetic operations, the two operands are pushed onto the top of the
 319 FP stack, the operation done, and the result copied back into the
 320 relevant register.  There are only six %fake registers because 2 are
 321 needed for the translation, and x86 has 8 in total.
 322
 323 The translation is inefficient but is simple and it works.  A cleverer
 324 translation would handle a sequence of insns, simulating the FP stack
 325 contents, would not impose a fixed mapping from %fake to %st regs, and
 326 hopefully could avoid most of the redundant reg-reg moves of the
 327 current translation.
 328
 329 We might as well make use of whatever unique FP facilities Intel have
 330 chosen to bless us with (let's not be churlish, after all).
 331 Hence GLDZ and GLD1.  Bwahahahahahahaha!
 332 -}
 333
 334 {-
 335 MORE FLOATING POINT MUSINGS...
 336
 337 Intel's internal floating point registers are by default 80 bit
 338 extended precision.  This means that all operations done on values in
 339 registers are done at 80 bits, and unless the intermediate values are
 340 truncated to the appropriate size (32 or 64 bits) by storing in
 341 memory, calculations in registers will give different results from
 342 calculations which pass intermediate values in memory (eg. via
 343 function calls).
 344
 345 One solution is to set the FPU into 64 bit precision mode.  Some OSs
 346 do this (eg. FreeBSD) and some don't (eg. Linux).  The problem here is
 347 that this will only affect 64-bit precision arithmetic; 32-bit
 348 calculations will still be done at 64-bit precision in registers.  So
 349 it doesn't solve the whole problem.
 350
 351 There's also the issue of what the C library is expecting in terms of
 352 precision.  It seems to be the case that glibc on Linux expects the
 353 FPU to be set to 80 bit precision, so setting it to 64 bit could have
 354 unexpected effects.  Changing the default could have undesirable
 355 effects on other 3rd-party library code too, so the right thing would
 356 be to save/restore the FPU control word across Haskell code if we were
 357 to do this.
 358
 359 gcc's -ffloat-store gives consistent results by always storing the
 360 results of floating-point calculations in memory, which works for both
 361 32 and 64-bit precision.  However, it only affects the values of
 362 user-declared floating point variables in C, not intermediate results.
 363 GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
 364 flag).
 365
 366 Another problem is how to spill floating point registers in the
 367 register allocator.  Should we spill the whole 80 bits, or just 64?
 368 On an OS which is set to 64 bit precision, spilling 64 is fine.  On
 369 Linux, spilling 64 bits will round the results of some operations.
 370 This is what gcc does.  Spilling at 80 bits requires taking up a full
 371 128 bit slot (so we get alignment).  We spill at 80-bits and ignore
 372 the alignment problems.
 373
 374 In the future, we'll use the SSE registers for floating point.  This
 375 requires a CPU that supports SSE2 (ordinary SSE only supports 32 bit
 376 precision float ops), which means P4 or Xeon and above.  Using SSE
 377 will solve all these problems, because the SSE registers use fixed 32
 378 bit or 64 bit precision.
 379
 380 --SDM 1/2003
 381 -}
 382
 383 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 384
 385 -- data Instr continues...
 386
 387 -- Moves.
 388         | MOV         MachRep Operand Operand
 389         | MOVZxL      MachRep Operand Operand -- size is the size of operand 1
 390         | MOVSxL      MachRep Operand Operand -- size is the size of operand 1
 391         -- x86_64 note: plain mov into a 32-bit register always zero-extends
 392         -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
 393         -- don't affect the high bits of the register.
 394
 395 -- Load effective address (also a very useful three-operand add instruction :-)
 396         | LEA         MachRep Operand Operand
 397
 398 -- Int Arithmetic.
 399         | ADD         MachRep Operand Operand
 400         | ADC         MachRep Operand Operand
 401         | SUB         MachRep Operand Operand
 402
 403         | MUL         MachRep Operand Operand
 404         | IMUL        MachRep Operand Operand   -- signed int mul
 405         | IMUL2       MachRep Operand -- %edx:%eax = operand * %eax
 406
 407         | DIV         MachRep Operand   -- eax := eax:edx/op, edx := eax:edx%op
 408         | IDIV        MachRep Operand   -- ditto, but signed
 409
 410 -- Simple bit-twiddling.
 411         | AND         MachRep Operand Operand
 412         | OR          MachRep Operand Operand
 413         | XOR         MachRep Operand Operand
 414         | NOT         MachRep Operand
 415         | NEGI        MachRep Operand -- NEG instruction (name clash with Cond)
 416
 417 -- Shifts (amount may be immediate or %cl only)
 418         | SHL         MachRep Operand{-amount-} Operand
 419         | SAR         MachRep Operand{-amount-} Operand
 420         | SHR         MachRep Operand{-amount-} Operand
 421
 422         | BT          MachRep Imm Operand
 423         | NOP
 424
 425 #if i386_TARGET_ARCH
 426 -- Float Arithmetic.
 427
 428 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
 429 -- as single instructions right up until we spit them out.
 430         -- all the 3-operand fake fp insns are src1 src2 dst
 431         -- and furthermore are constrained to be fp regs only.
 432         -- IMPORTANT: keep is_G_insn up to date with any changes here
 433         | GMOV        Reg Reg -- src(fpreg), dst(fpreg)
 434         | GLD         MachRep AddrMode Reg -- src, dst(fpreg)
 435         | GST         MachRep Reg AddrMode -- src(fpreg), dst
 436
 437         | GLDZ        Reg -- dst(fpreg)
 438         | GLD1        Reg -- dst(fpreg)
 439
 440         | GFTOI       Reg Reg -- src(fpreg), dst(intreg)
 441         | GDTOI       Reg Reg -- src(fpreg), dst(intreg)
 442
 443         | GITOF       Reg Reg -- src(intreg), dst(fpreg)
 444         | GITOD       Reg Reg -- src(intreg), dst(fpreg)
 445
 446         | GADD        MachRep Reg Reg Reg -- src1, src2, dst
 447         | GDIV        MachRep Reg Reg Reg -- src1, src2, dst
 448         | GSUB        MachRep Reg Reg Reg -- src1, src2, dst
 449         | GMUL        MachRep Reg Reg Reg -- src1, src2, dst
 450
 451                 -- FP compare.  Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT]
 452                 -- Compare src1 with src2; set the Zero flag iff the numbers are
 453                 -- comparable and the comparison is True.  Subsequent code must
 454                 -- test the %eflags zero flag regardless of the supplied Cond.
 455         | GCMP        Cond Reg Reg -- src1, src2
 456
 457         | GABS        MachRep Reg Reg -- src, dst
 458         | GNEG        MachRep Reg Reg -- src, dst
 459         | GSQRT       MachRep Reg Reg -- src, dst
 460         | GSIN        MachRep Reg Reg -- src, dst
 461         | GCOS        MachRep Reg Reg -- src, dst
 462         | GTAN        MachRep Reg Reg -- src, dst
 463
 464         | GFREE         -- do ffree on all x86 regs; an ugly hack
 465 #endif
 466
 467 #if x86_64_TARGET_ARCH
 468 -- SSE2 floating point: we use a restricted set of the available SSE2
 469 -- instructions for floating-point.
 470
 471         -- use MOV for moving (either movss or movsd (movlpd better?))
 472
 473         | CVTSS2SD      Reg Reg         -- F32 to F64
 474         | CVTSD2SS      Reg Reg         -- F64 to F32
 475         | CVTSS2SI      Operand Reg     -- F32 to I32/I64 (with rounding)
 476         | CVTSD2SI      Operand Reg     -- F64 to I32/I64 (with rounding)
 477         | CVTSI2SS      Operand Reg     -- I32/I64 to F32
 478         | CVTSI2SD      Operand Reg     -- I32/I64 to F64
 479
 480         -- use ADD & SUB for arithmetic.  In both cases, operands
 481         -- are  Operand Reg.
 482
 483         -- SSE2 floating-point division:
 484         | FDIV          MachRep Operand Operand   -- divisor, dividend(dst)
 485
 486         -- use CMP for comparisons.  ucomiss and ucomisd instructions
 487         -- compare single/double prec floating point respectively.
 488
 489         | SQRT          MachRep Operand Reg     -- src, dst
 490 #endif
 491
 492 -- Comparison
 493         | TEST          MachRep Operand Operand
 494         | CMP           MachRep Operand Operand
 495         | SETCC         Cond Operand
 496
 497 -- Stack Operations.
 498         | PUSH          MachRep Operand
 499         | POP           MachRep Operand
 500         -- both unused (SDM):
 501         --  | PUSHA
 502         --  | POPA
 503
 504 -- Jumping around.
 505         | JMP         Operand
 506         | JXX         Cond BlockId  -- includes unconditional branches
 507         | JMP_TBL     Operand [BlockId]  -- table jump
 508         | CALL        (Either Imm Reg) [Reg]
 509
 510 -- Other things.
 511         | CLTD MachRep   -- sign extend %eax into %edx:%eax
 512
 513         | FETCHGOT    Reg  -- pseudo-insn for ELF position-independent code
 514                            -- pretty-prints as
 515                            --       call 1f
 516                            -- 1:    popl %reg
 517                            --       addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
 518         | FETCHPC     Reg  -- pseudo-insn for Darwin position-independent code
 519                            -- pretty-prints as
 520                            --       call 1f
 521                            -- 1:    popl %reg
 522
 523
 524 data Operand
 525   = OpReg  Reg          -- register
 526   | OpImm  Imm          -- immediate value
 527   | OpAddr AddrMode     -- memory reference
 528
 529 #endif /* i386 or x86_64 */
 530
 531 #if i386_TARGET_ARCH
 532 i386_insert_ffrees :: [Instr] -> [Instr]
 533 i386_insert_ffrees insns
 534    | any is_G_instr insns
 535    = concatMap ffree_before_nonlocal_transfers insns
 536    | otherwise
 537    = insns
 538
 539 ffree_before_nonlocal_transfers insn
 540    = case insn of
 541         CALL _ _ -> [GFREE, insn]
 542         JMP _    -> [GFREE, insn]
 543         other    -> [insn]
 544
 545
 546 -- if you ever add a new FP insn to the fake x86 FP insn set,
 547 -- you must update this too
 548 is_G_instr :: Instr -> Bool
 549 is_G_instr instr
 550    = case instr of
 551         GMOV _ _ -> True; GLD _ _ _ -> True; GST _ _ _ -> True
 552         GLDZ _ -> True; GLD1 _ -> True
 553         GFTOI _ _ -> True; GDTOI _ _ -> True
 554         GITOF _ _ -> True; GITOD _ _ -> True
 555         GADD _ _ _ _ -> True; GDIV _ _ _ _ -> True
 556         GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
 557         GCMP _ _ _ -> True; GABS _ _ _ -> True
 558         GNEG _ _ _ -> True; GSQRT _ _ _ -> True
 559         GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True
 560         GFREE -> panic "is_G_instr: GFREE (!)"
 561         other -> False
 562 #endif /* i386_TARGET_ARCH */
 563
 564
 565 -- -----------------------------------------------------------------------------
 566 -- Sparc instructions
 567
 568 #if sparc_TARGET_ARCH
 569
 570 -- data Instr continues...
 571
 572 -- Loads and stores.
 573               | LD            MachRep AddrMode Reg -- size, src, dst
 574               | ST            MachRep Reg AddrMode -- size, src, dst
 575
 576 -- Int Arithmetic.
 577               | ADD           Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
 578               | SUB           Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
 579               | UMUL               Bool Reg RI Reg --     cc?, src1, src2, dst
 580               | SMUL               Bool Reg RI Reg --     cc?, src1, src2, dst
 581               | RDY           Reg       -- move contents of Y register to reg
 582
 583 -- Simple bit-twiddling.
 584               | AND           Bool Reg RI Reg -- cc?, src1, src2, dst
 585               | ANDN          Bool Reg RI Reg -- cc?, src1, src2, dst
 586               | OR            Bool Reg RI Reg -- cc?, src1, src2, dst
 587               | ORN           Bool Reg RI Reg -- cc?, src1, src2, dst
 588               | XOR           Bool Reg RI Reg -- cc?, src1, src2, dst
 589               | XNOR          Bool Reg RI Reg -- cc?, src1, src2, dst
 590               | SLL           Reg RI Reg -- src1, src2, dst
 591               | SRL           Reg RI Reg -- src1, src2, dst
 592               | SRA           Reg RI Reg -- src1, src2, dst
 593               | SETHI         Imm Reg -- src, dst
 594               | NOP           -- Really SETHI 0, %g0, but worth an alias
 595
 596 -- Float Arithmetic.
 597
 598 -- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single
 599 -- instructions right up until we spit them out.
 600               | FABS          MachRep Reg Reg      -- src dst
 601               | FADD          MachRep Reg Reg Reg  -- src1, src2, dst
 602               | FCMP          Bool MachRep Reg Reg -- exception?, src1, src2, dst
 603               | FDIV          MachRep Reg Reg Reg -- src1, src2, dst
 604               | FMOV          MachRep Reg Reg     -- src, dst
 605               | FMUL          MachRep Reg Reg Reg -- src1, src2, dst
 606               | FNEG          MachRep Reg Reg     -- src, dst
 607               | FSQRT         MachRep Reg Reg     -- src, dst
 608               | FSUB          MachRep Reg Reg Reg -- src1, src2, dst
 609               | FxTOy         MachRep MachRep Reg Reg -- src, dst
 610
 611 -- Jumping around.
 612               | BI            Cond Bool Imm -- cond, annul?, target
 613               | BF            Cond Bool Imm -- cond, annul?, target
 614
 615               | JMP           AddrMode     -- target
 616               | CALL          (Either Imm Reg) Int Bool -- target, args, terminal
 617
 618 riZero :: RI -> Bool
 619
 620 riZero (RIImm (ImmInt 0))           = True
 621 riZero (RIImm (ImmInteger 0))       = True
 622 riZero (RIReg (RealReg 0))          = True
 623 riZero _                            = False
 624
 625 -- Calculate the effective address which would be used by the
 626 -- corresponding fpRel sequence.  fpRel is in MachRegs.lhs,
 627 -- alas -- can't have fpRelEA here because of module dependencies.
 628 fpRelEA :: Int -> Reg -> Instr
 629 fpRelEA n dst
 630    = ADD False False fp (RIImm (ImmInt (n * wORD_SIZE))) dst
 631
 632 -- Code to shift the stack pointer by n words.
 633 moveSp :: Int -> Instr
 634 moveSp n
 635    = ADD False False sp (RIImm (ImmInt (n * wORD_SIZE))) sp
 636
 637 -- Produce the second-half-of-a-double register given the first half.
 638 fPair :: Reg -> Reg
 639 fPair (RealReg n) | n >= 32 && n `mod` 2 == 0  = RealReg (n+1)
 640 fPair other = pprPanic "fPair(sparc NCG)" (ppr other)
 641 #endif /* sparc_TARGET_ARCH */
 642
 643
 644 -- -----------------------------------------------------------------------------
 645 -- PowerPC instructions
 646
 647 #ifdef powerpc_TARGET_ARCH
 648 -- data Instr continues...
 649
 650 -- Loads and stores.
 651               | LD      MachRep Reg AddrMode -- Load size, dst, src
 652               | LA      MachRep Reg AddrMode -- Load arithmetic size, dst, src
 653               | ST      MachRep Reg AddrMode -- Store size, src, dst
 654               | STU     MachRep Reg AddrMode -- Store with Update size, src, dst
 655               | LIS     Reg Imm -- Load Immediate Shifted dst, src
 656               | LI      Reg Imm -- Load Immediate dst, src
 657               | MR      Reg Reg -- Move Register dst, src -- also for fmr
 658
 659               | CMP     MachRep Reg RI --- size, src1, src2
 660               | CMPL    MachRep Reg RI --- size, src1, src2
 661
 662               | BCC     Cond BlockId
 663               | JMP     CLabel          -- same as branch,
 664                                         -- but with CLabel instead of block ID
 665               | MTCTR   Reg
 666               | BCTR    [BlockId]       -- with list of local destinations
 667               | BL      CLabel [Reg]    -- with list of argument regs
 668               | BCTRL   [Reg]
 669
 670               | ADD     Reg Reg RI -- dst, src1, src2
 671               | ADDC    Reg Reg Reg -- (carrying) dst, src1, src2
 672               | ADDE    Reg Reg Reg -- (extend) dst, src1, src2
 673               | ADDIS   Reg Reg Imm -- Add Immediate Shifted dst, src1, src2
 674               | SUBF    Reg Reg Reg -- dst, src1, src2 ; dst = src2 - src1
 675               | MULLW   Reg Reg RI
 676               | DIVW    Reg Reg Reg
 677               | DIVWU   Reg Reg Reg
 678
 679               | MULLW_MayOflo Reg Reg Reg
 680                         -- dst = 1 if src1 * src2 overflows
 681                         -- pseudo-instruction; pretty-printed as:
 682                         -- mullwo. dst, src1, src2
 683                         -- mfxer dst
 684                         -- rlwinm dst, dst, 2, 31,31
 685
 686               | AND     Reg Reg RI -- dst, src1, src2
 687               | OR      Reg Reg RI -- dst, src1, src2
 688               | XOR     Reg Reg RI -- dst, src1, src2
 689               | XORIS   Reg Reg Imm -- XOR Immediate Shifted dst, src1, src2
 690
 691               | EXTS    MachRep Reg Reg
 692
 693               | NEG     Reg Reg
 694               | NOT     Reg Reg
 695
 696               | SLW     Reg Reg RI      -- shift left word
 697               | SRW     Reg Reg RI      -- shift right word
 698               | SRAW    Reg Reg RI      -- shift right arithmetic word
 699
 700                         -- Rotate Left Word Immediate then AND with Mask
 701               | RLWINM  Reg Reg Int Int Int
 702
 703               | FADD    MachRep Reg Reg Reg
 704               | FSUB    MachRep Reg Reg Reg
 705               | FMUL    MachRep Reg Reg Reg
 706               | FDIV    MachRep Reg Reg Reg
 707               | FNEG    Reg Reg  -- negate is the same for single and double prec.
 708
 709               | FCMP    Reg Reg
 710
 711               | FCTIWZ  Reg Reg         -- convert to integer word
 712               | FRSP    Reg Reg         -- reduce to single precision
 713                                         -- (but destination is a FP register)
 714
 715               | CRNOR   Int Int Int    -- condition register nor
 716               | MFCR    Reg            -- move from condition register
 717
 718               | MFLR    Reg            -- move from link register
 719               | FETCHPC Reg            -- pseudo-instruction:
 720                                        -- bcl to next insn, mflr reg
 721
 722 #endif /* powerpc_TARGET_ARCH */