ghc/compiler/nativeGen/MachInstrs.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Machine-dependent assembly language
   4 --
   5 -- (c) The University of Glasgow 1993-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 #include "nativeGen/NCG.h"
  10
  11 module MachInstrs (
  12         -- * Cmm instantiations
  13         NatCmm, NatCmmTop, NatBasicBlock,
  14
  15         -- * Machine instructions
  16         Instr(..),
  17         Cond(..), condUnsigned, condToSigned, condToUnsigned,
  18
  19 #if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH && !x86_64_TARGET_ARCH
  20         Size(..), machRepSize,
  21 #endif
  22         RI(..),
  23
  24 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  25         Operand(..),
  26 #endif
  27 #if i386_TARGET_ARCH
  28         i386_insert_ffrees,
  29 #endif
  30 #if sparc_TARGET_ARCH
  31         riZero, fpRelEA, moveSp, fPair,
  32 #endif
  33     ) where
  34
  35 #include "HsVersions.h"
  36
  37 import MachRegs
  38 import Cmm
  39 import MachOp           ( MachRep(..) )
  40 import CLabel           ( CLabel, pprCLabel )
  41 import Panic            ( panic )
  42 import Outputable
  43 import FastString
  44
  45 import GLAEXTS
  46
  47
  48 -- -----------------------------------------------------------------------------
  49 -- Our flavours of the Cmm types
  50
  51 -- Type synonyms for Cmm populated with native code
  52 type NatCmm        = GenCmm CmmStatic Instr
  53 type NatCmmTop     = GenCmmTop CmmStatic Instr
  54 type NatBasicBlock = GenBasicBlock Instr
  55
  56 -- -----------------------------------------------------------------------------
  57 -- Conditions on this architecture
  58
  59 data Cond
  60 #if alpha_TARGET_ARCH
  61   = ALWAYS      -- For BI (same as BR)
  62   | EQQ         -- For CMP and BI (NB: "EQ" is a 1.3 Prelude name)
  63   | GE          -- For BI only
  64   | GTT         -- For BI only (NB: "GT" is a 1.3 Prelude name)
  65   | LE          -- For CMP and BI
  66   | LTT         -- For CMP and BI (NB: "LT" is a 1.3 Prelude name)
  67   | NE          -- For BI only
  68   | NEVER       -- For BI (null instruction)
  69   | ULE         -- For CMP only
  70   | ULT         -- For CMP only
  71 #endif
  72 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  73   = ALWAYS      -- What's really used? ToDo
  74   | EQQ
  75   | GE
  76   | GEU
  77   | GTT
  78   | GU
  79   | LE
  80   | LEU
  81   | LTT
  82   | LU
  83   | NE
  84   | NEG
  85   | POS
  86   | CARRY
  87   | OFLO
  88   | PARITY
  89   | NOTPARITY
  90 #endif
  91 #if sparc_TARGET_ARCH
  92   = ALWAYS      -- What's really used? ToDo
  93   | EQQ
  94   | GE
  95   | GEU
  96   | GTT
  97   | GU
  98   | LE
  99   | LEU
 100   | LTT
 101   | LU
 102   | NE
 103   | NEG
 104   | NEVER
 105   | POS
 106   | VC
 107   | VS
 108 #endif
 109 #if powerpc_TARGET_ARCH
 110   = ALWAYS
 111   | EQQ
 112   | GE
 113   | GEU
 114   | GTT
 115   | GU
 116   | LE
 117   | LEU
 118   | LTT
 119   | LU
 120   | NE
 121 #endif
 122     deriving Eq  -- to make an assertion work
 123
 124 condUnsigned GU  = True
 125 condUnsigned LU  = True
 126 condUnsigned GEU = True
 127 condUnsigned LEU = True
 128 condUnsigned _   = False
 129
 130 condToSigned GU  = GTT
 131 condToSigned LU  = LTT
 132 condToSigned GEU = GE
 133 condToSigned LEU = LE
 134 condToSigned x   = x
 135
 136 condToUnsigned GTT = GU
 137 condToUnsigned LTT = LU
 138 condToUnsigned GE  = GEU
 139 condToUnsigned LE  = LEU
 140 condToUnsigned x   = x
 141
 142 -- -----------------------------------------------------------------------------
 143 -- Sizes on this architecture
 144
 145 -- ToDo: it's not clear to me that we need separate signed-vs-unsigned sizes
 146 -- here.  I've removed them from the x86 version, we'll see what happens --SDM
 147
 148 #if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH && !x86_64_TARGET_ARCH
 149 data Size
 150 #if alpha_TARGET_ARCH
 151     = B     -- byte
 152     | Bu
 153 --  | W     -- word (2 bytes): UNUSED
 154 --  | Wu    -- : UNUSED
 155     | L     -- longword (4 bytes)
 156     | Q     -- quadword (8 bytes)
 157 --  | FF    -- VAX F-style floating pt: UNUSED
 158 --  | GF    -- VAX G-style floating pt: UNUSED
 159 --  | DF    -- VAX D-style floating pt: UNUSED
 160 --  | SF    -- IEEE single-precision floating pt: UNUSED
 161     | TF    -- IEEE double-precision floating pt
 162 #endif
 163 #if sparc_TARGET_ARCH || powerpc_TARGET_ARCH
 164     = B     -- byte (signed)
 165     | Bu    -- byte (unsigned)
 166     | H     -- halfword (signed, 2 bytes)
 167     | Hu    -- halfword (unsigned, 2 bytes)
 168     | W     -- word (4 bytes)
 169     | F     -- IEEE single-precision floating pt
 170     | DF    -- IEEE single-precision floating pt
 171 #endif
 172   deriving Eq
 173
 174 machRepSize :: MachRep -> Size
 175 machRepSize I8    = IF_ARCH_alpha(Bu, IF_ARCH_sparc(Bu, ))
 176 machRepSize I16   = IF_ARCH_alpha(err,IF_ARCH_sparc(Hu, ))
 177 machRepSize I32   = IF_ARCH_alpha(L,  IF_ARCH_sparc(W,  ))
 178 machRepSize I64   = panic "machRepSize: I64"
 179 machRepSize I128  = panic "machRepSize: I128"
 180 machRepSize F32   = IF_ARCH_alpha(TF, IF_ARCH_sparc(F, ))
 181 machRepSize F64   = IF_ARCH_alpha(TF, IF_ARCH_sparc(DF,))
 182 #endif
 183
 184 -- -----------------------------------------------------------------------------
 185 -- Register or immediate (a handy type on some platforms)
 186
 187 data RI = RIReg Reg
 188         | RIImm Imm
 189
 190
 191 -- -----------------------------------------------------------------------------
 192 -- Machine's assembly language
 193
 194 -- We have a few common "instructions" (nearly all the pseudo-ops) but
 195 -- mostly all of 'Instr' is machine-specific.
 196
 197 data Instr
 198   = COMMENT FastString          -- comment pseudo-op
 199
 200   | LDATA   Section [CmmStatic] -- some static data spat out during code
 201                                 -- generation.  Will be extracted before
 202                                 -- pretty-printing.
 203
 204   | NEWBLOCK BlockId            -- start a new basic block.  Useful during
 205                                 -- codegen, removed later.  Preceding
 206                                 -- instruction should be a jump, as per the
 207                                 -- invariants for a BasicBlock (see Cmm).
 208
 209   | DELTA   Int                 -- specify current stack offset for
 210                                 -- benefit of subsequent passes
 211
 212 -- -----------------------------------------------------------------------------
 213 -- Alpha instructions
 214
 215 #if alpha_TARGET_ARCH
 216
 217 -- data Instr continues...
 218
 219 -- Loads and stores.
 220               | LD            Size Reg AddrMode -- size, dst, src
 221               | LDA           Reg AddrMode      -- dst, src
 222               | LDAH          Reg AddrMode      -- dst, src
 223               | LDGP          Reg AddrMode      -- dst, src
 224               | LDI           Size Reg Imm     -- size, dst, src
 225               | ST            Size Reg AddrMode -- size, src, dst
 226
 227 -- Int Arithmetic.
 228               | CLR           Reg                   -- dst
 229               | ABS           Size RI Reg           -- size, src, dst
 230               | NEG           Size Bool RI Reg      -- size, overflow, src, dst
 231               | ADD           Size Bool Reg RI Reg  -- size, overflow, src, src, dst
 232               | SADD          Size Size Reg RI Reg  -- size, scale, src, src, dst
 233               | SUB           Size Bool Reg RI Reg  -- size, overflow, src, src, dst
 234               | SSUB          Size Size Reg RI Reg  -- size, scale, src, src, dst
 235               | MUL           Size Bool Reg RI Reg  -- size, overflow, src, src, dst
 236               | DIV           Size Bool Reg RI Reg  -- size, unsigned, src, src, dst
 237               | REM           Size Bool Reg RI Reg  -- size, unsigned, src, src, dst
 238
 239 -- Simple bit-twiddling.
 240               | NOT           RI Reg
 241               | AND           Reg RI Reg
 242               | ANDNOT        Reg RI Reg
 243               | OR            Reg RI Reg
 244               | ORNOT         Reg RI Reg
 245               | XOR           Reg RI Reg
 246               | XORNOT        Reg RI Reg
 247               | SLL           Reg RI Reg
 248               | SRL           Reg RI Reg
 249               | SRA           Reg RI Reg
 250
 251               | ZAP           Reg RI Reg
 252               | ZAPNOT        Reg RI Reg
 253
 254               | NOP
 255
 256 -- Comparison
 257               | CMP           Cond Reg RI Reg
 258
 259 -- Float Arithmetic.
 260               | FCLR          Reg
 261               | FABS          Reg Reg
 262               | FNEG          Size Reg Reg
 263               | FADD          Size Reg Reg Reg
 264               | FDIV          Size Reg Reg Reg
 265               | FMUL          Size Reg Reg Reg
 266               | FSUB          Size Reg Reg Reg
 267               | CVTxy         Size Size Reg Reg
 268               | FCMP          Size Cond Reg Reg Reg
 269               | FMOV          Reg Reg
 270
 271 -- Jumping around.
 272               | BI            Cond Reg Imm
 273               | BF            Cond Reg Imm
 274               | BR            Imm
 275               | JMP           Reg AddrMode Int
 276               | BSR           Imm Int
 277               | JSR           Reg AddrMode Int
 278
 279 -- Alpha-specific pseudo-ops.
 280               | FUNBEGIN CLabel
 281               | FUNEND CLabel
 282
 283 data RI
 284   = RIReg Reg
 285   | RIImm Imm
 286
 287 #endif /* alpha_TARGET_ARCH */
 288
 289
 290 -- -----------------------------------------------------------------------------
 291 -- Intel x86 instructions
 292
 293 {-
 294 Intel, in their infinite wisdom, selected a stack model for floating
 295 point registers on x86.  That might have made sense back in 1979 --
 296 nowadays we can see it for the nonsense it really is.  A stack model
 297 fits poorly with the existing nativeGen infrastructure, which assumes
 298 flat integer and FP register sets.  Prior to this commit, nativeGen
 299 could not generate correct x86 FP code -- to do so would have meant
 300 somehow working the register-stack paradigm into the register
 301 allocator and spiller, which sounds very difficult.
 302
 303 We have decided to cheat, and go for a simple fix which requires no
 304 infrastructure modifications, at the expense of generating ropey but
 305 correct FP code.  All notions of the x86 FP stack and its insns have
 306 been removed.  Instead, we pretend (to the instruction selector and
 307 register allocator) that x86 has six floating point registers, %fake0
 308 .. %fake5, which can be used in the usual flat manner.  We further
 309 claim that x86 has floating point instructions very similar to SPARC
 310 and Alpha, that is, a simple 3-operand register-register arrangement.
 311 Code generation and register allocation proceed on this basis.
 312
 313 When we come to print out the final assembly, our convenient fiction
 314 is converted to dismal reality.  Each fake instruction is
 315 independently converted to a series of real x86 instructions.
 316 %fake0 .. %fake5 are mapped to %st(0) .. %st(5).  To do reg-reg
 317 arithmetic operations, the two operands are pushed onto the top of the
 318 FP stack, the operation done, and the result copied back into the
 319 relevant register.  There are only six %fake registers because 2 are
 320 needed for the translation, and x86 has 8 in total.
 321
 322 The translation is inefficient but is simple and it works.  A cleverer
 323 translation would handle a sequence of insns, simulating the FP stack
 324 contents, would not impose a fixed mapping from %fake to %st regs, and
 325 hopefully could avoid most of the redundant reg-reg moves of the
 326 current translation.
 327
 328 We might as well make use of whatever unique FP facilities Intel have
 329 chosen to bless us with (let's not be churlish, after all).
 330 Hence GLDZ and GLD1.  Bwahahahahahahaha!
 331 -}
 332
 333 {-
 334 MORE FLOATING POINT MUSINGS...
 335
 336 Intel's internal floating point registers are by default 80 bit
 337 extended precision.  This means that all operations done on values in
 338 registers are done at 80 bits, and unless the intermediate values are
 339 truncated to the appropriate size (32 or 64 bits) by storing in
 340 memory, calculations in registers will give different results from
 341 calculations which pass intermediate values in memory (eg. via
 342 function calls).
 343
 344 One solution is to set the FPU into 64 bit precision mode.  Some OSs
 345 do this (eg. FreeBSD) and some don't (eg. Linux).  The problem here is
 346 that this will only affect 64-bit precision arithmetic; 32-bit
 347 calculations will still be done at 64-bit precision in registers.  So
 348 it doesn't solve the whole problem.
 349
 350 There's also the issue of what the C library is expecting in terms of
 351 precision.  It seems to be the case that glibc on Linux expects the
 352 FPU to be set to 80 bit precision, so setting it to 64 bit could have
 353 unexpected effects.  Changing the default could have undesirable
 354 effects on other 3rd-party library code too, so the right thing would
 355 be to save/restore the FPU control word across Haskell code if we were
 356 to do this.
 357
 358 gcc's -ffloat-store gives consistent results by always storing the
 359 results of floating-point calculations in memory, which works for both
 360 32 and 64-bit precision.  However, it only affects the values of
 361 user-declared floating point variables in C, not intermediate results.
 362 GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
 363 flag).
 364
 365 Another problem is how to spill floating point registers in the
 366 register allocator.  Should we spill the whole 80 bits, or just 64?
 367 On an OS which is set to 64 bit precision, spilling 64 is fine.  On
 368 Linux, spilling 64 bits will round the results of some operations.
 369 This is what gcc does.  Spilling at 80 bits requires taking up a full
 370 128 bit slot (so we get alignment).  We spill at 80-bits and ignore
 371 the alignment problems.
 372
 373 In the future, we'll use the SSE registers for floating point.  This
 374 requires a CPU that supports SSE2 (ordinary SSE only supports 32 bit
 375 precision float ops), which means P4 or Xeon and above.  Using SSE
 376 will solve all these problems, because the SSE registers use fixed 32
 377 bit or 64 bit precision.
 378
 379 --SDM 1/2003
 380 -}
 381
 382 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 383
 384 -- data Instr continues...
 385
 386 -- Moves.
 387         | MOV         MachRep Operand Operand
 388         | MOVZxL      MachRep Operand Operand -- size is the size of operand 1
 389         | MOVSxL      MachRep Operand Operand -- size is the size of operand 1
 390         -- x86_64 note: plain mov into a 32-bit register always zero-extends
 391         -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
 392         -- don't affect the high bits of the register.
 393
 394 -- Load effective address (also a very useful three-operand add instruction :-)
 395         | LEA         MachRep Operand Operand
 396
 397 -- Int Arithmetic.
 398         | ADD         MachRep Operand Operand
 399         | ADC         MachRep Operand Operand
 400         | SUB         MachRep Operand Operand
 401
 402         | MUL         MachRep Operand Operand
 403         | IMUL        MachRep Operand Operand   -- signed int mul
 404         | IMUL2       MachRep Operand -- %edx:%eax = operand * %eax
 405
 406         | DIV         MachRep Operand   -- eax := eax:edx/op, edx := eax:edx%op
 407         | IDIV        MachRep Operand   -- ditto, but signed
 408
 409 -- Simple bit-twiddling.
 410         | AND         MachRep Operand Operand
 411         | OR          MachRep Operand Operand
 412         | XOR         MachRep Operand Operand
 413         | NOT         MachRep Operand
 414         | NEGI        MachRep Operand -- NEG instruction (name clash with Cond)
 415
 416 -- Shifts (amount may be immediate or %cl only)
 417         | SHL         MachRep Operand{-amount-} Operand
 418         | SAR         MachRep Operand{-amount-} Operand
 419         | SHR         MachRep Operand{-amount-} Operand
 420
 421         | BT          MachRep Imm Operand
 422         | NOP
 423
 424 #if i386_TARGET_ARCH
 425 -- Float Arithmetic.
 426
 427 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
 428 -- as single instructions right up until we spit them out.
 429         -- all the 3-operand fake fp insns are src1 src2 dst
 430         -- and furthermore are constrained to be fp regs only.
 431         -- IMPORTANT: keep is_G_insn up to date with any changes here
 432         | GMOV        Reg Reg -- src(fpreg), dst(fpreg)
 433         | GLD         MachRep AddrMode Reg -- src, dst(fpreg)
 434         | GST         MachRep Reg AddrMode -- src(fpreg), dst
 435
 436         | GLDZ        Reg -- dst(fpreg)
 437         | GLD1        Reg -- dst(fpreg)
 438
 439         | GFTOI       Reg Reg -- src(fpreg), dst(intreg)
 440         | GDTOI       Reg Reg -- src(fpreg), dst(intreg)
 441
 442         | GITOF       Reg Reg -- src(intreg), dst(fpreg)
 443         | GITOD       Reg Reg -- src(intreg), dst(fpreg)
 444
 445         | GADD        MachRep Reg Reg Reg -- src1, src2, dst
 446         | GDIV        MachRep Reg Reg Reg -- src1, src2, dst
 447         | GSUB        MachRep Reg Reg Reg -- src1, src2, dst
 448         | GMUL        MachRep Reg Reg Reg -- src1, src2, dst
 449
 450                 -- FP compare.  Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT]
 451                 -- Compare src1 with src2; set the Zero flag iff the numbers are
 452                 -- comparable and the comparison is True.  Subsequent code must
 453                 -- test the %eflags zero flag regardless of the supplied Cond.
 454         | GCMP        Cond Reg Reg -- src1, src2
 455
 456         | GABS        MachRep Reg Reg -- src, dst
 457         | GNEG        MachRep Reg Reg -- src, dst
 458         | GSQRT       MachRep Reg Reg -- src, dst
 459         | GSIN        MachRep Reg Reg -- src, dst
 460         | GCOS        MachRep Reg Reg -- src, dst
 461         | GTAN        MachRep Reg Reg -- src, dst
 462
 463         | GFREE         -- do ffree on all x86 regs; an ugly hack
 464 #endif
 465
 466 #if x86_64_TARGET_ARCH
 467 -- SSE2 floating point: we use a restricted set of the available SSE2
 468 -- instructions for floating-point.
 469
 470         -- use MOV for moving (either movss or movsd (movlpd better?))
 471
 472         | CVTSS2SD      Reg Reg         -- F32 to F64
 473         | CVTSD2SS      Reg Reg         -- F64 to F32
 474         | CVTSS2SI      Operand Reg     -- F32 to I32/I64 (with rounding)
 475         | CVTSD2SI      Operand Reg     -- F64 to I32/I64 (with rounding)
 476         | CVTSI2SS      Operand Reg     -- I32/I64 to F32
 477         | CVTSI2SD      Operand Reg     -- I32/I64 to F64
 478
 479         -- use ADD & SUB for arithmetic.  In both cases, operands
 480         -- are  Operand Reg.
 481
 482         -- SSE2 floating-point division:
 483         | FDIV          MachRep Operand Operand   -- divisor, dividend(dst)
 484
 485         -- use CMP for comparisons.  ucomiss and ucomisd instructions
 486         -- compare single/double prec floating point respectively.
 487
 488         | SQRT          MachRep Operand Reg     -- src, dst
 489 #endif
 490
 491 -- Comparison
 492         | TEST          MachRep Operand Operand
 493         | CMP           MachRep Operand Operand
 494         | SETCC         Cond Operand
 495
 496 -- Stack Operations.
 497         | PUSH          MachRep Operand
 498         | POP           MachRep Operand
 499         -- both unused (SDM):
 500         --  | PUSHA
 501         --  | POPA
 502
 503 -- Jumping around.
 504         | JMP         Operand
 505         | JXX         Cond BlockId  -- includes unconditional branches
 506         | JMP_TBL     Operand [BlockId]  -- table jump
 507         | CALL        (Either Imm Reg) [Reg]
 508
 509 -- Other things.
 510         | CLTD MachRep   -- sign extend %eax into %edx:%eax
 511
 512         | FETCHGOT    Reg  -- pseudo-insn for ELF position-independent code
 513                            -- pretty-prints as
 514                            --       call 1f
 515                            -- 1:    popl %reg
 516                            --       addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
 517         | FETCHPC     Reg  -- pseudo-insn for Darwin position-independent code
 518                            -- pretty-prints as
 519                            --       call 1f
 520                            -- 1:    popl %reg
 521
 522
 523 data Operand
 524   = OpReg  Reg          -- register
 525   | OpImm  Imm          -- immediate value
 526   | OpAddr AddrMode     -- memory reference
 527
 528 #endif /* i386 or x86_64 */
 529
 530 #if i386_TARGET_ARCH
 531 i386_insert_ffrees :: [Instr] -> [Instr]
 532 i386_insert_ffrees insns
 533    | any is_G_instr insns
 534    = concatMap ffree_before_nonlocal_transfers insns
 535    | otherwise
 536    = insns
 537
 538 ffree_before_nonlocal_transfers insn
 539    = case insn of
 540         CALL _ _ -> [GFREE, insn]
 541         JMP _    -> [GFREE, insn]
 542         other    -> [insn]
 543
 544
 545 -- if you ever add a new FP insn to the fake x86 FP insn set,
 546 -- you must update this too
 547 is_G_instr :: Instr -> Bool
 548 is_G_instr instr
 549    = case instr of
 550         GMOV _ _ -> True; GLD _ _ _ -> True; GST _ _ _ -> True
 551         GLDZ _ -> True; GLD1 _ -> True
 552         GFTOI _ _ -> True; GDTOI _ _ -> True
 553         GITOF _ _ -> True; GITOD _ _ -> True
 554         GADD _ _ _ _ -> True; GDIV _ _ _ _ -> True
 555         GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
 556         GCMP _ _ _ -> True; GABS _ _ _ -> True
 557         GNEG _ _ _ -> True; GSQRT _ _ _ -> True
 558         GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True
 559         GFREE -> panic "is_G_instr: GFREE (!)"
 560         other -> False
 561 #endif /* i386_TARGET_ARCH */
 562
 563
 564 -- -----------------------------------------------------------------------------
 565 -- Sparc instructions
 566
 567 #if sparc_TARGET_ARCH
 568
 569 -- data Instr continues...
 570
 571 -- Loads and stores.
 572               | LD            MachRep AddrMode Reg -- size, src, dst
 573               | ST            MachRep Reg AddrMode -- size, src, dst
 574
 575 -- Int Arithmetic.
 576               | ADD           Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
 577               | SUB           Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
 578               | UMUL               Bool Reg RI Reg --     cc?, src1, src2, dst
 579               | SMUL               Bool Reg RI Reg --     cc?, src1, src2, dst
 580               | RDY           Reg       -- move contents of Y register to reg
 581
 582 -- Simple bit-twiddling.
 583               | AND           Bool Reg RI Reg -- cc?, src1, src2, dst
 584               | ANDN          Bool Reg RI Reg -- cc?, src1, src2, dst
 585               | OR            Bool Reg RI Reg -- cc?, src1, src2, dst
 586               | ORN           Bool Reg RI Reg -- cc?, src1, src2, dst
 587               | XOR           Bool Reg RI Reg -- cc?, src1, src2, dst
 588               | XNOR          Bool Reg RI Reg -- cc?, src1, src2, dst
 589               | SLL           Reg RI Reg -- src1, src2, dst
 590               | SRL           Reg RI Reg -- src1, src2, dst
 591               | SRA           Reg RI Reg -- src1, src2, dst
 592               | SETHI         Imm Reg -- src, dst
 593               | NOP           -- Really SETHI 0, %g0, but worth an alias
 594
 595 -- Float Arithmetic.
 596
 597 -- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single
 598 -- instructions right up until we spit them out.
 599               | FABS          MachRep Reg Reg      -- src dst
 600               | FADD          MachRep Reg Reg Reg  -- src1, src2, dst
 601               | FCMP          Bool MachRep Reg Reg -- exception?, src1, src2, dst
 602               | FDIV          MachRep Reg Reg Reg -- src1, src2, dst
 603               | FMOV          MachRep Reg Reg     -- src, dst
 604               | FMUL          MachRep Reg Reg Reg -- src1, src2, dst
 605               | FNEG          MachRep Reg Reg     -- src, dst
 606               | FSQRT         MachRep Reg Reg     -- src, dst
 607               | FSUB          MachRep Reg Reg Reg -- src1, src2, dst
 608               | FxTOy         MachRep MachRep Reg Reg -- src, dst
 609
 610 -- Jumping around.
 611               | BI            Cond Bool Imm -- cond, annul?, target
 612               | BF            Cond Bool Imm -- cond, annul?, target
 613
 614               | JMP           DestInfo AddrMode      -- target
 615               | CALL          (Either Imm Reg) Int Bool -- target, args, terminal
 616
 617 data RI = RIReg Reg
 618         | RIImm Imm
 619
 620 riZero :: RI -> Bool
 621
 622 riZero (RIImm (ImmInt 0))           = True
 623 riZero (RIImm (ImmInteger 0))       = True
 624 riZero (RIReg (RealReg 0))          = True
 625 riZero _                            = False
 626
 627 -- Calculate the effective address which would be used by the
 628 -- corresponding fpRel sequence.  fpRel is in MachRegs.lhs,
 629 -- alas -- can't have fpRelEA here because of module dependencies.
 630 fpRelEA :: Int -> Reg -> Instr
 631 fpRelEA n dst
 632    = ADD False False fp (RIImm (ImmInt (n * BYTES_PER_WORD))) dst
 633
 634 -- Code to shift the stack pointer by n words.
 635 moveSp :: Int -> Instr
 636 moveSp n
 637    = ADD False False sp (RIImm (ImmInt (n * BYTES_PER_WORD))) sp
 638
 639 -- Produce the second-half-of-a-double register given the first half.
 640 fPair :: Reg -> Reg
 641 fPair (RealReg n) | n >= 32 && n `mod` 2 == 0  = RealReg (n+1)
 642 fPair other = pprPanic "fPair(sparc NCG)" (ppr other)
 643 #endif /* sparc_TARGET_ARCH */
 644
 645
 646 -- -----------------------------------------------------------------------------
 647 -- PowerPC instructions
 648
 649 #ifdef powerpc_TARGET_ARCH
 650 -- data Instr continues...
 651
 652 -- Loads and stores.
 653               | LD      MachRep Reg AddrMode -- Load size, dst, src
 654               | LA      MachRep Reg AddrMode -- Load arithmetic size, dst, src
 655               | ST      MachRep Reg AddrMode -- Store size, src, dst
 656               | STU     MachRep Reg AddrMode -- Store with Update size, src, dst
 657               | LIS     Reg Imm -- Load Immediate Shifted dst, src
 658               | LI      Reg Imm -- Load Immediate dst, src
 659               | MR      Reg Reg -- Move Register dst, src -- also for fmr
 660
 661               | CMP     MachRep Reg RI --- size, src1, src2
 662               | CMPL    MachRep Reg RI --- size, src1, src2
 663
 664               | BCC     Cond BlockId
 665               | JMP     CLabel          -- same as branch,
 666                                         -- but with CLabel instead of block ID
 667               | MTCTR   Reg
 668               | BCTR    [BlockId]       -- with list of local destinations
 669               | BL      CLabel [Reg]    -- with list of argument regs
 670               | BCTRL   [Reg]
 671
 672               | ADD     Reg Reg RI -- dst, src1, src2
 673               | ADDC    Reg Reg Reg -- (carrying) dst, src1, src2
 674               | ADDE    Reg Reg Reg -- (extend) dst, src1, src2
 675               | ADDIS   Reg Reg Imm -- Add Immediate Shifted dst, src1, src2
 676               | SUBF    Reg Reg Reg -- dst, src1, src2 ; dst = src2 - src1
 677               | MULLW   Reg Reg RI
 678               | DIVW    Reg Reg Reg
 679               | DIVWU   Reg Reg Reg
 680
 681               | MULLW_MayOflo Reg Reg Reg
 682                         -- dst = 1 if src1 * src2 overflows
 683                         -- pseudo-instruction; pretty-printed as:
 684                         -- mullwo. dst, src1, src2
 685                         -- mfxer dst
 686                         -- rlwinm dst, dst, 2, 31,31
 687
 688               | AND     Reg Reg RI -- dst, src1, src2
 689               | OR      Reg Reg RI -- dst, src1, src2
 690               | XOR     Reg Reg RI -- dst, src1, src2
 691               | XORIS   Reg Reg Imm -- XOR Immediate Shifted dst, src1, src2
 692
 693               | EXTS    MachRep Reg Reg
 694
 695               | NEG     Reg Reg
 696               | NOT     Reg Reg
 697
 698               | SLW     Reg Reg RI      -- shift left word
 699               | SRW     Reg Reg RI      -- shift right word
 700               | SRAW    Reg Reg RI      -- shift right arithmetic word
 701
 702                         -- Rotate Left Word Immediate then AND with Mask
 703               | RLWINM  Reg Reg Int Int Int
 704
 705               | FADD    MachRep Reg Reg Reg
 706               | FSUB    MachRep Reg Reg Reg
 707               | FMUL    MachRep Reg Reg Reg
 708               | FDIV    MachRep Reg Reg Reg
 709               | FNEG    Reg Reg  -- negate is the same for single and double prec.
 710
 711               | FCMP    Reg Reg
 712
 713               | FCTIWZ  Reg Reg         -- convert to integer word
 714               | FRSP    Reg Reg         -- reduce to single precision
 715                                         -- (but destination is a FP register)
 716
 717               | CRNOR   Int Int Int    -- condition register nor
 718               | MFCR    Reg            -- move from condition register
 719
 720               | MFLR    Reg            -- move from link register
 721               | FETCHPC Reg            -- pseudo-instruction:
 722                                        -- bcl to next insn, mflr reg
 723
 724 #endif /* powerpc_TARGET_ARCH */