compiler/nativeGen/MachInstrs.hs

   1 {-# OPTIONS -w #-}
   2 -- The above warning supression flag is a temporary kludge.
   3 -- While working on this module you are encouraged to remove it and fix
   4 -- any warnings in the module. See
   5 --     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
   6 -- for details
   7
   8 -----------------------------------------------------------------------------
   9 --
  10 -- Machine-dependent assembly language
  11 --
  12 -- (c) The University of Glasgow 1993-2004
  13 --
  14 -----------------------------------------------------------------------------
  15
  16 #include "nativeGen/NCG.h"
  17
  18 module MachInstrs (
  19         -- * Cmm instantiations
  20         NatCmm, NatCmmTop, NatBasicBlock,
  21
  22         -- * Machine instructions
  23         Instr(..),
  24         Cond(..), condUnsigned, condToSigned, condToUnsigned,
  25 #if powerpc_TARGET_ARCH
  26         condNegate,
  27 #endif
  28 #if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH && !x86_64_TARGET_ARCH
  29         Size(..), machRepSize,
  30 #endif
  31         RI(..),
  32
  33 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  34         Operand(..),
  35 #endif
  36 #if i386_TARGET_ARCH
  37         i386_insert_ffrees,
  38 #endif
  39 #if sparc_TARGET_ARCH
  40         riZero, fpRelEA, moveSp, fPair,
  41 #endif
  42     ) where
  43
  44 #include "HsVersions.h"
  45
  46 import MachRegs
  47 import Cmm
  48 import MachOp           ( MachRep(..) )
  49 import CLabel           ( CLabel, pprCLabel )
  50 import Panic            ( panic )
  51 import Outputable
  52 import FastString
  53 import Constants       ( wORD_SIZE )
  54
  55 import GHC.Exts
  56
  57
  58 -- -----------------------------------------------------------------------------
  59 -- Our flavours of the Cmm types
  60
  61 -- Type synonyms for Cmm populated with native code
  62 type NatCmm        = GenCmm CmmStatic [CmmStatic] (ListGraph Instr)
  63 type NatCmmTop     = GenCmmTop CmmStatic [CmmStatic] (ListGraph Instr)
  64 type NatBasicBlock = GenBasicBlock Instr
  65
  66 -- -----------------------------------------------------------------------------
  67 -- Conditions on this architecture
  68
  69 data Cond
  70 #if alpha_TARGET_ARCH
  71   = ALWAYS      -- For BI (same as BR)
  72   | EQQ         -- For CMP and BI (NB: "EQ" is a 1.3 Prelude name)
  73   | GE          -- For BI only
  74   | GTT         -- For BI only (NB: "GT" is a 1.3 Prelude name)
  75   | LE          -- For CMP and BI
  76   | LTT         -- For CMP and BI (NB: "LT" is a 1.3 Prelude name)
  77   | NE          -- For BI only
  78   | NEVER       -- For BI (null instruction)
  79   | ULE         -- For CMP only
  80   | ULT         -- For CMP only
  81 #endif
  82 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  83   = ALWAYS      -- What's really used? ToDo
  84   | EQQ
  85   | GE
  86   | GEU
  87   | GTT
  88   | GU
  89   | LE
  90   | LEU
  91   | LTT
  92   | LU
  93   | NE
  94   | NEG
  95   | POS
  96   | CARRY
  97   | OFLO
  98   | PARITY
  99   | NOTPARITY
 100 #endif
 101 #if sparc_TARGET_ARCH
 102   = ALWAYS      -- What's really used? ToDo
 103   | EQQ
 104   | GE
 105   | GEU
 106   | GTT
 107   | GU
 108   | LE
 109   | LEU
 110   | LTT
 111   | LU
 112   | NE
 113   | NEG
 114   | NEVER
 115   | POS
 116   | VC
 117   | VS
 118 #endif
 119 #if powerpc_TARGET_ARCH
 120   = ALWAYS
 121   | EQQ
 122   | GE
 123   | GEU
 124   | GTT
 125   | GU
 126   | LE
 127   | LEU
 128   | LTT
 129   | LU
 130   | NE
 131 #endif
 132     deriving Eq  -- to make an assertion work
 133
 134 condUnsigned GU  = True
 135 condUnsigned LU  = True
 136 condUnsigned GEU = True
 137 condUnsigned LEU = True
 138 condUnsigned _   = False
 139
 140 condToSigned GU  = GTT
 141 condToSigned LU  = LTT
 142 condToSigned GEU = GE
 143 condToSigned LEU = LE
 144 condToSigned x   = x
 145
 146 condToUnsigned GTT = GU
 147 condToUnsigned LTT = LU
 148 condToUnsigned GE  = GEU
 149 condToUnsigned LE  = LEU
 150 condToUnsigned x   = x
 151
 152 #if powerpc_TARGET_ARCH
 153 condNegate ALWAYS  = panic "condNegate: ALWAYS"
 154 condNegate EQQ     = NE
 155 condNegate GE      = LTT
 156 condNegate GEU     = LU
 157 condNegate GTT     = LE
 158 condNegate GU      = LEU
 159 condNegate LE      = GTT
 160 condNegate LEU     = GU
 161 condNegate LTT     = GE
 162 condNegate LU      = GEU
 163 condNegate NE      = EQQ
 164 #endif
 165
 166 -- -----------------------------------------------------------------------------
 167 -- Sizes on this architecture
 168
 169 -- ToDo: it's not clear to me that we need separate signed-vs-unsigned sizes
 170 -- here.  I've removed them from the x86 version, we'll see what happens --SDM
 171
 172 #if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH && !x86_64_TARGET_ARCH
 173 data Size
 174 #if alpha_TARGET_ARCH
 175     = B     -- byte
 176     | Bu
 177 --  | W     -- word (2 bytes): UNUSED
 178 --  | Wu    -- : UNUSED
 179     | L     -- longword (4 bytes)
 180     | Q     -- quadword (8 bytes)
 181 --  | FF    -- VAX F-style floating pt: UNUSED
 182 --  | GF    -- VAX G-style floating pt: UNUSED
 183 --  | DF    -- VAX D-style floating pt: UNUSED
 184 --  | SF    -- IEEE single-precision floating pt: UNUSED
 185     | TF    -- IEEE double-precision floating pt
 186 #endif
 187 #if sparc_TARGET_ARCH || powerpc_TARGET_ARCH
 188     = B     -- byte (signed)
 189     | Bu    -- byte (unsigned)
 190     | H     -- halfword (signed, 2 bytes)
 191     | Hu    -- halfword (unsigned, 2 bytes)
 192     | W     -- word (4 bytes)
 193     | F     -- IEEE single-precision floating pt
 194     | DF    -- IEEE single-precision floating pt
 195 #endif
 196   deriving Eq
 197
 198 machRepSize :: MachRep -> Size
 199 machRepSize I8    = IF_ARCH_alpha(Bu, IF_ARCH_sparc(Bu, ))
 200 machRepSize I16   = IF_ARCH_alpha(err,IF_ARCH_sparc(Hu, ))
 201 machRepSize I32   = IF_ARCH_alpha(L,  IF_ARCH_sparc(W,  ))
 202 machRepSize I64   = panic "machRepSize: I64"
 203 machRepSize I128  = panic "machRepSize: I128"
 204 machRepSize F32   = IF_ARCH_alpha(TF, IF_ARCH_sparc(F, ))
 205 machRepSize F64   = IF_ARCH_alpha(TF, IF_ARCH_sparc(DF,))
 206 #endif
 207
 208 -- -----------------------------------------------------------------------------
 209 -- Register or immediate (a handy type on some platforms)
 210
 211 data RI = RIReg Reg
 212         | RIImm Imm
 213
 214
 215 -- -----------------------------------------------------------------------------
 216 -- Machine's assembly language
 217
 218 -- We have a few common "instructions" (nearly all the pseudo-ops) but
 219 -- mostly all of 'Instr' is machine-specific.
 220
 221 data Instr
 222   = COMMENT FastString          -- comment pseudo-op
 223
 224   | LDATA   Section [CmmStatic] -- some static data spat out during code
 225                                 -- generation.  Will be extracted before
 226                                 -- pretty-printing.
 227
 228   | NEWBLOCK BlockId            -- start a new basic block.  Useful during
 229                                 -- codegen, removed later.  Preceding
 230                                 -- instruction should be a jump, as per the
 231                                 -- invariants for a BasicBlock (see Cmm).
 232
 233   | DELTA   Int                 -- specify current stack offset for
 234                                 -- benefit of subsequent passes
 235
 236   | SPILL   Reg Int             -- ^ spill this reg to a stack slot
 237   | RELOAD  Int Reg             -- ^ reload this reg from a stack slot
 238
 239 -- -----------------------------------------------------------------------------
 240 -- Alpha instructions
 241
 242 #if alpha_TARGET_ARCH
 243
 244 -- data Instr continues...
 245
 246 -- Loads and stores.
 247               | LD            Size Reg AddrMode -- size, dst, src
 248               | LDA           Reg AddrMode      -- dst, src
 249               | LDAH          Reg AddrMode      -- dst, src
 250               | LDGP          Reg AddrMode      -- dst, src
 251               | LDI           Size Reg Imm     -- size, dst, src
 252               | ST            Size Reg AddrMode -- size, src, dst
 253
 254 -- Int Arithmetic.
 255               | CLR           Reg                   -- dst
 256               | ABS           Size RI Reg           -- size, src, dst
 257               | NEG           Size Bool RI Reg      -- size, overflow, src, dst
 258               | ADD           Size Bool Reg RI Reg  -- size, overflow, src, src, dst
 259               | SADD          Size Size Reg RI Reg  -- size, scale, src, src, dst
 260               | SUB           Size Bool Reg RI Reg  -- size, overflow, src, src, dst
 261               | SSUB          Size Size Reg RI Reg  -- size, scale, src, src, dst
 262               | MUL           Size Bool Reg RI Reg  -- size, overflow, src, src, dst
 263               | DIV           Size Bool Reg RI Reg  -- size, unsigned, src, src, dst
 264               | REM           Size Bool Reg RI Reg  -- size, unsigned, src, src, dst
 265
 266 -- Simple bit-twiddling.
 267               | NOT           RI Reg
 268               | AND           Reg RI Reg
 269               | ANDNOT        Reg RI Reg
 270               | OR            Reg RI Reg
 271               | ORNOT         Reg RI Reg
 272               | XOR           Reg RI Reg
 273               | XORNOT        Reg RI Reg
 274               | SLL           Reg RI Reg
 275               | SRL           Reg RI Reg
 276               | SRA           Reg RI Reg
 277
 278               | ZAP           Reg RI Reg
 279               | ZAPNOT        Reg RI Reg
 280
 281               | NOP
 282
 283 -- Comparison
 284               | CMP           Cond Reg RI Reg
 285
 286 -- Float Arithmetic.
 287               | FCLR          Reg
 288               | FABS          Reg Reg
 289               | FNEG          Size Reg Reg
 290               | FADD          Size Reg Reg Reg
 291               | FDIV          Size Reg Reg Reg
 292               | FMUL          Size Reg Reg Reg
 293               | FSUB          Size Reg Reg Reg
 294               | CVTxy         Size Size Reg Reg
 295               | FCMP          Size Cond Reg Reg Reg
 296               | FMOV          Reg Reg
 297
 298 -- Jumping around.
 299               | BI            Cond Reg Imm
 300               | BF            Cond Reg Imm
 301               | BR            Imm
 302               | JMP           Reg AddrMode Int
 303               | BSR           Imm Int
 304               | JSR           Reg AddrMode Int
 305
 306 -- Alpha-specific pseudo-ops.
 307               | FUNBEGIN CLabel
 308               | FUNEND CLabel
 309
 310 data RI
 311   = RIReg Reg
 312   | RIImm Imm
 313
 314 #endif /* alpha_TARGET_ARCH */
 315
 316
 317 -- -----------------------------------------------------------------------------
 318 -- Intel x86 instructions
 319
 320 {-
 321 Intel, in their infinite wisdom, selected a stack model for floating
 322 point registers on x86.  That might have made sense back in 1979 --
 323 nowadays we can see it for the nonsense it really is.  A stack model
 324 fits poorly with the existing nativeGen infrastructure, which assumes
 325 flat integer and FP register sets.  Prior to this commit, nativeGen
 326 could not generate correct x86 FP code -- to do so would have meant
 327 somehow working the register-stack paradigm into the register
 328 allocator and spiller, which sounds very difficult.
 329
 330 We have decided to cheat, and go for a simple fix which requires no
 331 infrastructure modifications, at the expense of generating ropey but
 332 correct FP code.  All notions of the x86 FP stack and its insns have
 333 been removed.  Instead, we pretend (to the instruction selector and
 334 register allocator) that x86 has six floating point registers, %fake0
 335 .. %fake5, which can be used in the usual flat manner.  We further
 336 claim that x86 has floating point instructions very similar to SPARC
 337 and Alpha, that is, a simple 3-operand register-register arrangement.
 338 Code generation and register allocation proceed on this basis.
 339
 340 When we come to print out the final assembly, our convenient fiction
 341 is converted to dismal reality.  Each fake instruction is
 342 independently converted to a series of real x86 instructions.
 343 %fake0 .. %fake5 are mapped to %st(0) .. %st(5).  To do reg-reg
 344 arithmetic operations, the two operands are pushed onto the top of the
 345 FP stack, the operation done, and the result copied back into the
 346 relevant register.  There are only six %fake registers because 2 are
 347 needed for the translation, and x86 has 8 in total.
 348
 349 The translation is inefficient but is simple and it works.  A cleverer
 350 translation would handle a sequence of insns, simulating the FP stack
 351 contents, would not impose a fixed mapping from %fake to %st regs, and
 352 hopefully could avoid most of the redundant reg-reg moves of the
 353 current translation.
 354
 355 We might as well make use of whatever unique FP facilities Intel have
 356 chosen to bless us with (let's not be churlish, after all).
 357 Hence GLDZ and GLD1.  Bwahahahahahahaha!
 358 -}
 359
 360 {-
 361 MORE FLOATING POINT MUSINGS...
 362
 363 Intel's internal floating point registers are by default 80 bit
 364 extended precision.  This means that all operations done on values in
 365 registers are done at 80 bits, and unless the intermediate values are
 366 truncated to the appropriate size (32 or 64 bits) by storing in
 367 memory, calculations in registers will give different results from
 368 calculations which pass intermediate values in memory (eg. via
 369 function calls).
 370
 371 One solution is to set the FPU into 64 bit precision mode.  Some OSs
 372 do this (eg. FreeBSD) and some don't (eg. Linux).  The problem here is
 373 that this will only affect 64-bit precision arithmetic; 32-bit
 374 calculations will still be done at 64-bit precision in registers.  So
 375 it doesn't solve the whole problem.
 376
 377 There's also the issue of what the C library is expecting in terms of
 378 precision.  It seems to be the case that glibc on Linux expects the
 379 FPU to be set to 80 bit precision, so setting it to 64 bit could have
 380 unexpected effects.  Changing the default could have undesirable
 381 effects on other 3rd-party library code too, so the right thing would
 382 be to save/restore the FPU control word across Haskell code if we were
 383 to do this.
 384
 385 gcc's -ffloat-store gives consistent results by always storing the
 386 results of floating-point calculations in memory, which works for both
 387 32 and 64-bit precision.  However, it only affects the values of
 388 user-declared floating point variables in C, not intermediate results.
 389 GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
 390 flag).
 391
 392 Another problem is how to spill floating point registers in the
 393 register allocator.  Should we spill the whole 80 bits, or just 64?
 394 On an OS which is set to 64 bit precision, spilling 64 is fine.  On
 395 Linux, spilling 64 bits will round the results of some operations.
 396 This is what gcc does.  Spilling at 80 bits requires taking up a full
 397 128 bit slot (so we get alignment).  We spill at 80-bits and ignore
 398 the alignment problems.
 399
 400 In the future, we'll use the SSE registers for floating point.  This
 401 requires a CPU that supports SSE2 (ordinary SSE only supports 32 bit
 402 precision float ops), which means P4 or Xeon and above.  Using SSE
 403 will solve all these problems, because the SSE registers use fixed 32
 404 bit or 64 bit precision.
 405
 406 --SDM 1/2003
 407 -}
 408
 409 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 410
 411 -- data Instr continues...
 412
 413 -- Moves.
 414         | MOV         MachRep Operand Operand
 415         | MOVZxL      MachRep Operand Operand -- size is the size of operand 1
 416         | MOVSxL      MachRep Operand Operand -- size is the size of operand 1
 417         -- x86_64 note: plain mov into a 32-bit register always zero-extends
 418         -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
 419         -- don't affect the high bits of the register.
 420
 421 -- Load effective address (also a very useful three-operand add instruction :-)
 422         | LEA         MachRep Operand Operand
 423
 424 -- Int Arithmetic.
 425         | ADD         MachRep Operand Operand
 426         | ADC         MachRep Operand Operand
 427         | SUB         MachRep Operand Operand
 428
 429         | MUL         MachRep Operand Operand
 430         | IMUL        MachRep Operand Operand   -- signed int mul
 431         | IMUL2       MachRep Operand -- %edx:%eax = operand * %eax
 432
 433         | DIV         MachRep Operand   -- eax := eax:edx/op, edx := eax:edx%op
 434         | IDIV        MachRep Operand   -- ditto, but signed
 435
 436 -- Simple bit-twiddling.
 437         | AND         MachRep Operand Operand
 438         | OR          MachRep Operand Operand
 439         | XOR         MachRep Operand Operand
 440         | NOT         MachRep Operand
 441         | NEGI        MachRep Operand -- NEG instruction (name clash with Cond)
 442
 443 -- Shifts (amount may be immediate or %cl only)
 444         | SHL         MachRep Operand{-amount-} Operand
 445         | SAR         MachRep Operand{-amount-} Operand
 446         | SHR         MachRep Operand{-amount-} Operand
 447
 448         | BT          MachRep Imm Operand
 449         | NOP
 450
 451 #if i386_TARGET_ARCH
 452 -- Float Arithmetic.
 453
 454 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
 455 -- as single instructions right up until we spit them out.
 456         -- all the 3-operand fake fp insns are src1 src2 dst
 457         -- and furthermore are constrained to be fp regs only.
 458         -- IMPORTANT: keep is_G_insn up to date with any changes here
 459         | GMOV        Reg Reg -- src(fpreg), dst(fpreg)
 460         | GLD         MachRep AddrMode Reg -- src, dst(fpreg)
 461         | GST         MachRep Reg AddrMode -- src(fpreg), dst
 462
 463         | GLDZ        Reg -- dst(fpreg)
 464         | GLD1        Reg -- dst(fpreg)
 465
 466         | GFTOI       Reg Reg -- src(fpreg), dst(intreg)
 467         | GDTOI       Reg Reg -- src(fpreg), dst(intreg)
 468
 469         | GITOF       Reg Reg -- src(intreg), dst(fpreg)
 470         | GITOD       Reg Reg -- src(intreg), dst(fpreg)
 471
 472         | GADD        MachRep Reg Reg Reg -- src1, src2, dst
 473         | GDIV        MachRep Reg Reg Reg -- src1, src2, dst
 474         | GSUB        MachRep Reg Reg Reg -- src1, src2, dst
 475         | GMUL        MachRep Reg Reg Reg -- src1, src2, dst
 476
 477                 -- FP compare.  Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT]
 478                 -- Compare src1 with src2; set the Zero flag iff the numbers are
 479                 -- comparable and the comparison is True.  Subsequent code must
 480                 -- test the %eflags zero flag regardless of the supplied Cond.
 481         | GCMP        Cond Reg Reg -- src1, src2
 482
 483         | GABS        MachRep Reg Reg -- src, dst
 484         | GNEG        MachRep Reg Reg -- src, dst
 485         | GSQRT       MachRep Reg Reg -- src, dst
 486         | GSIN        MachRep Reg Reg -- src, dst
 487         | GCOS        MachRep Reg Reg -- src, dst
 488         | GTAN        MachRep Reg Reg -- src, dst
 489
 490         | GFREE         -- do ffree on all x86 regs; an ugly hack
 491 #endif
 492
 493 #if x86_64_TARGET_ARCH
 494 -- SSE2 floating point: we use a restricted set of the available SSE2
 495 -- instructions for floating-point.
 496
 497         -- use MOV for moving (either movss or movsd (movlpd better?))
 498
 499         | CVTSS2SD      Reg Reg         -- F32 to F64
 500         | CVTSD2SS      Reg Reg         -- F64 to F32
 501         | CVTTSS2SIQ    Operand Reg     -- F32 to I32/I64 (with truncation)
 502         | CVTTSD2SIQ    Operand Reg     -- F64 to I32/I64 (with truncation)
 503         | CVTSI2SS      Operand Reg     -- I32/I64 to F32
 504         | CVTSI2SD      Operand Reg     -- I32/I64 to F64
 505
 506         -- use ADD & SUB for arithmetic.  In both cases, operands
 507         -- are  Operand Reg.
 508
 509         -- SSE2 floating-point division:
 510         | FDIV          MachRep Operand Operand   -- divisor, dividend(dst)
 511
 512         -- use CMP for comparisons.  ucomiss and ucomisd instructions
 513         -- compare single/double prec floating point respectively.
 514
 515         | SQRT          MachRep Operand Reg     -- src, dst
 516 #endif
 517
 518 -- Comparison
 519         | TEST          MachRep Operand Operand
 520         | CMP           MachRep Operand Operand
 521         | SETCC         Cond Operand
 522
 523 -- Stack Operations.
 524         | PUSH          MachRep Operand
 525         | POP           MachRep Operand
 526         -- both unused (SDM):
 527         --  | PUSHA
 528         --  | POPA
 529
 530 -- Jumping around.
 531         | JMP         Operand
 532         | JXX         Cond BlockId  -- includes unconditional branches
 533         | JXX_GBL     Cond Imm      -- non-local version of JXX
 534         | JMP_TBL     Operand [BlockId]  -- table jump
 535         | CALL        (Either Imm Reg) [Reg]
 536
 537 -- Other things.
 538         | CLTD MachRep   -- sign extend %eax into %edx:%eax
 539
 540         | FETCHGOT    Reg  -- pseudo-insn for ELF position-independent code
 541                            -- pretty-prints as
 542                            --       call 1f
 543                            -- 1:    popl %reg
 544                            --       addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
 545         | FETCHPC     Reg  -- pseudo-insn for Darwin position-independent code
 546                            -- pretty-prints as
 547                            --       call 1f
 548                            -- 1:    popl %reg
 549
 550
 551 data Operand
 552   = OpReg  Reg          -- register
 553   | OpImm  Imm          -- immediate value
 554   | OpAddr AddrMode     -- memory reference
 555
 556 #endif /* i386 or x86_64 */
 557
 558 #if i386_TARGET_ARCH
 559 i386_insert_ffrees :: [Instr] -> [Instr]
 560 i386_insert_ffrees insns
 561    | any is_G_instr insns
 562    = concatMap ffree_before_nonlocal_transfers insns
 563    | otherwise
 564    = insns
 565
 566 ffree_before_nonlocal_transfers insn
 567    = case insn of
 568         CALL _ _ -> [GFREE, insn]
 569         JMP _    -> [GFREE, insn]
 570         other    -> [insn]
 571
 572
 573 -- if you ever add a new FP insn to the fake x86 FP insn set,
 574 -- you must update this too
 575 is_G_instr :: Instr -> Bool
 576 is_G_instr instr
 577    = case instr of
 578         GMOV _ _ -> True; GLD _ _ _ -> True; GST _ _ _ -> True
 579         GLDZ _ -> True; GLD1 _ -> True
 580         GFTOI _ _ -> True; GDTOI _ _ -> True
 581         GITOF _ _ -> True; GITOD _ _ -> True
 582         GADD _ _ _ _ -> True; GDIV _ _ _ _ -> True
 583         GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
 584         GCMP _ _ _ -> True; GABS _ _ _ -> True
 585         GNEG _ _ _ -> True; GSQRT _ _ _ -> True
 586         GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True
 587         GFREE -> panic "is_G_instr: GFREE (!)"
 588         other -> False
 589 #endif /* i386_TARGET_ARCH */
 590
 591
 592 -- -----------------------------------------------------------------------------
 593 -- Sparc instructions
 594
 595 #if sparc_TARGET_ARCH
 596
 597 -- data Instr continues...
 598
 599 -- Loads and stores.
 600               | LD            MachRep AddrMode Reg -- size, src, dst
 601               | ST            MachRep Reg AddrMode -- size, src, dst
 602
 603 -- Int Arithmetic.
 604               | ADD           Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
 605               | SUB           Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
 606               | UMUL               Bool Reg RI Reg --     cc?, src1, src2, dst
 607               | SMUL               Bool Reg RI Reg --     cc?, src1, src2, dst
 608               | RDY           Reg       -- move contents of Y register to reg
 609
 610 -- Simple bit-twiddling.
 611               | AND           Bool Reg RI Reg -- cc?, src1, src2, dst
 612               | ANDN          Bool Reg RI Reg -- cc?, src1, src2, dst
 613               | OR            Bool Reg RI Reg -- cc?, src1, src2, dst
 614               | ORN           Bool Reg RI Reg -- cc?, src1, src2, dst
 615               | XOR           Bool Reg RI Reg -- cc?, src1, src2, dst
 616               | XNOR          Bool Reg RI Reg -- cc?, src1, src2, dst
 617               | SLL           Reg RI Reg -- src1, src2, dst
 618               | SRL           Reg RI Reg -- src1, src2, dst
 619               | SRA           Reg RI Reg -- src1, src2, dst
 620               | SETHI         Imm Reg -- src, dst
 621               | NOP           -- Really SETHI 0, %g0, but worth an alias
 622
 623 -- Float Arithmetic.
 624
 625 -- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single
 626 -- instructions right up until we spit them out.
 627               | FABS          MachRep Reg Reg      -- src dst
 628               | FADD          MachRep Reg Reg Reg  -- src1, src2, dst
 629               | FCMP          Bool MachRep Reg Reg -- exception?, src1, src2, dst
 630               | FDIV          MachRep Reg Reg Reg -- src1, src2, dst
 631               | FMOV          MachRep Reg Reg     -- src, dst
 632               | FMUL          MachRep Reg Reg Reg -- src1, src2, dst
 633               | FNEG          MachRep Reg Reg     -- src, dst
 634               | FSQRT         MachRep Reg Reg     -- src, dst
 635               | FSUB          MachRep Reg Reg Reg -- src1, src2, dst
 636               | FxTOy         MachRep MachRep Reg Reg -- src, dst
 637
 638 -- Jumping around.
 639               | BI            Cond Bool Imm -- cond, annul?, target
 640               | BF            Cond Bool Imm -- cond, annul?, target
 641
 642               | JMP           AddrMode     -- target
 643               | CALL          (Either Imm Reg) Int Bool -- target, args, terminal
 644
 645 riZero :: RI -> Bool
 646
 647 riZero (RIImm (ImmInt 0))           = True
 648 riZero (RIImm (ImmInteger 0))       = True
 649 riZero (RIReg (RealReg 0))          = True
 650 riZero _                            = False
 651
 652 -- Calculate the effective address which would be used by the
 653 -- corresponding fpRel sequence.  fpRel is in MachRegs.lhs,
 654 -- alas -- can't have fpRelEA here because of module dependencies.
 655 fpRelEA :: Int -> Reg -> Instr
 656 fpRelEA n dst
 657    = ADD False False fp (RIImm (ImmInt (n * wORD_SIZE))) dst
 658
 659 -- Code to shift the stack pointer by n words.
 660 moveSp :: Int -> Instr
 661 moveSp n
 662    = ADD False False sp (RIImm (ImmInt (n * wORD_SIZE))) sp
 663
 664 -- Produce the second-half-of-a-double register given the first half.
 665 fPair :: Reg -> Reg
 666 fPair (RealReg n) | n >= 32 && n `mod` 2 == 0  = RealReg (n+1)
 667 fPair other = pprPanic "fPair(sparc NCG)" (ppr other)
 668 #endif /* sparc_TARGET_ARCH */
 669
 670
 671 -- -----------------------------------------------------------------------------
 672 -- PowerPC instructions
 673
 674 #ifdef powerpc_TARGET_ARCH
 675 -- data Instr continues...
 676
 677 -- Loads and stores.
 678               | LD      MachRep Reg AddrMode -- Load size, dst, src
 679               | LA      MachRep Reg AddrMode -- Load arithmetic size, dst, src
 680               | ST      MachRep Reg AddrMode -- Store size, src, dst
 681               | STU     MachRep Reg AddrMode -- Store with Update size, src, dst
 682               | LIS     Reg Imm -- Load Immediate Shifted dst, src
 683               | LI      Reg Imm -- Load Immediate dst, src
 684               | MR      Reg Reg -- Move Register dst, src -- also for fmr
 685
 686               | CMP     MachRep Reg RI --- size, src1, src2
 687               | CMPL    MachRep Reg RI --- size, src1, src2
 688
 689               | BCC     Cond BlockId
 690               | BCCFAR  Cond BlockId
 691               | JMP     CLabel          -- same as branch,
 692                                         -- but with CLabel instead of block ID
 693               | MTCTR   Reg
 694               | BCTR    [BlockId]       -- with list of local destinations
 695               | BL      CLabel [Reg]    -- with list of argument regs
 696               | BCTRL   [Reg]
 697
 698               | ADD     Reg Reg RI -- dst, src1, src2
 699               | ADDC    Reg Reg Reg -- (carrying) dst, src1, src2
 700               | ADDE    Reg Reg Reg -- (extend) dst, src1, src2
 701               | ADDIS   Reg Reg Imm -- Add Immediate Shifted dst, src1, src2
 702               | SUBF    Reg Reg Reg -- dst, src1, src2 ; dst = src2 - src1
 703               | MULLW   Reg Reg RI
 704               | DIVW    Reg Reg Reg
 705               | DIVWU   Reg Reg Reg
 706
 707               | MULLW_MayOflo Reg Reg Reg
 708                         -- dst = 1 if src1 * src2 overflows
 709                         -- pseudo-instruction; pretty-printed as:
 710                         -- mullwo. dst, src1, src2
 711                         -- mfxer dst
 712                         -- rlwinm dst, dst, 2, 31,31
 713
 714               | AND     Reg Reg RI -- dst, src1, src2
 715               | OR      Reg Reg RI -- dst, src1, src2
 716               | XOR     Reg Reg RI -- dst, src1, src2
 717               | XORIS   Reg Reg Imm -- XOR Immediate Shifted dst, src1, src2
 718
 719               | EXTS    MachRep Reg Reg
 720
 721               | NEG     Reg Reg
 722               | NOT     Reg Reg
 723
 724               | SLW     Reg Reg RI      -- shift left word
 725               | SRW     Reg Reg RI      -- shift right word
 726               | SRAW    Reg Reg RI      -- shift right arithmetic word
 727
 728                         -- Rotate Left Word Immediate then AND with Mask
 729               | RLWINM  Reg Reg Int Int Int
 730
 731               | FADD    MachRep Reg Reg Reg
 732               | FSUB    MachRep Reg Reg Reg
 733               | FMUL    MachRep Reg Reg Reg
 734               | FDIV    MachRep Reg Reg Reg
 735               | FNEG    Reg Reg  -- negate is the same for single and double prec.
 736
 737               | FCMP    Reg Reg
 738
 739               | FCTIWZ  Reg Reg         -- convert to integer word
 740               | FRSP    Reg Reg         -- reduce to single precision
 741                                         -- (but destination is a FP register)
 742
 743               | CRNOR   Int Int Int    -- condition register nor
 744               | MFCR    Reg            -- move from condition register
 745
 746               | MFLR    Reg            -- move from link register
 747               | FETCHPC Reg            -- pseudo-instruction:
 748                                        -- bcl to next insn, mflr reg
 749
 750               | LWSYNC -- memory barrier
 751 #endif /* powerpc_TARGET_ARCH */