compiler/nativeGen/MachInstrs.hs

   1 {-# OPTIONS -w #-}
   2 -- The above warning supression flag is a temporary kludge.
   3 -- While working on this module you are encouraged to remove it and fix
   4 -- any warnings in the module. See
   5 --     http://hackage.haskell.org/trac/ghc/wiki/Commentary/CodingStyle#Warnings
   6 -- for details
   7
   8 -----------------------------------------------------------------------------
   9 --
  10 -- Machine-dependent assembly language
  11 --
  12 -- (c) The University of Glasgow 1993-2004
  13 --
  14 -----------------------------------------------------------------------------
  15
  16 #include "nativeGen/NCG.h"
  17
  18 module MachInstrs (
  19         -- * Cmm instantiations
  20         NatCmm, NatCmmTop, NatBasicBlock,
  21
  22         -- * Machine instructions
  23         Instr(..),
  24         Cond(..), condUnsigned, condToSigned, condToUnsigned,
  25 #if powerpc_TARGET_ARCH
  26         condNegate,
  27 #endif
  28 #if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH && !x86_64_TARGET_ARCH
  29         Size(..), machRepSize,
  30 #endif
  31         RI(..),
  32
  33 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  34         Operand(..),
  35 #endif
  36 #if i386_TARGET_ARCH
  37         i386_insert_ffrees,
  38 #endif
  39 #if sparc_TARGET_ARCH
  40         riZero, fpRelEA, moveSp, fPair,
  41 #endif
  42     ) where
  43
  44 #include "HsVersions.h"
  45
  46 import BlockId
  47 import MachRegs
  48 import Cmm
  49 import MachOp           ( MachRep(..) )
  50 import CLabel           ( CLabel, pprCLabel )
  51 import Panic            ( panic )
  52 import Outputable
  53 import FastString
  54 import Constants       ( wORD_SIZE )
  55
  56 import GHC.Exts
  57
  58
  59 -- -----------------------------------------------------------------------------
  60 -- Our flavours of the Cmm types
  61
  62 -- Type synonyms for Cmm populated with native code
  63 type NatCmm        = GenCmm CmmStatic [CmmStatic] (ListGraph Instr)
  64 type NatCmmTop     = GenCmmTop CmmStatic [CmmStatic] (ListGraph Instr)
  65 type NatBasicBlock = GenBasicBlock Instr
  66
  67 -- -----------------------------------------------------------------------------
  68 -- Conditions on this architecture
  69
  70 data Cond
  71 #if alpha_TARGET_ARCH
  72   = ALWAYS      -- For BI (same as BR)
  73   | EQQ         -- For CMP and BI (NB: "EQ" is a 1.3 Prelude name)
  74   | GE          -- For BI only
  75   | GTT         -- For BI only (NB: "GT" is a 1.3 Prelude name)
  76   | LE          -- For CMP and BI
  77   | LTT         -- For CMP and BI (NB: "LT" is a 1.3 Prelude name)
  78   | NE          -- For BI only
  79   | NEVER       -- For BI (null instruction)
  80   | ULE         -- For CMP only
  81   | ULT         -- For CMP only
  82 #endif
  83 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
  84   = ALWAYS      -- What's really used? ToDo
  85   | EQQ
  86   | GE
  87   | GEU
  88   | GTT
  89   | GU
  90   | LE
  91   | LEU
  92   | LTT
  93   | LU
  94   | NE
  95   | NEG
  96   | POS
  97   | CARRY
  98   | OFLO
  99   | PARITY
 100   | NOTPARITY
 101 #endif
 102 #if sparc_TARGET_ARCH
 103   = ALWAYS      -- What's really used? ToDo
 104   | EQQ
 105   | GE
 106   | GEU
 107   | GTT
 108   | GU
 109   | LE
 110   | LEU
 111   | LTT
 112   | LU
 113   | NE
 114   | NEG
 115   | NEVER
 116   | POS
 117   | VC
 118   | VS
 119 #endif
 120 #if powerpc_TARGET_ARCH
 121   = ALWAYS
 122   | EQQ
 123   | GE
 124   | GEU
 125   | GTT
 126   | GU
 127   | LE
 128   | LEU
 129   | LTT
 130   | LU
 131   | NE
 132 #endif
 133     deriving Eq  -- to make an assertion work
 134
 135 condUnsigned GU  = True
 136 condUnsigned LU  = True
 137 condUnsigned GEU = True
 138 condUnsigned LEU = True
 139 condUnsigned _   = False
 140
 141 condToSigned GU  = GTT
 142 condToSigned LU  = LTT
 143 condToSigned GEU = GE
 144 condToSigned LEU = LE
 145 condToSigned x   = x
 146
 147 condToUnsigned GTT = GU
 148 condToUnsigned LTT = LU
 149 condToUnsigned GE  = GEU
 150 condToUnsigned LE  = LEU
 151 condToUnsigned x   = x
 152
 153 #if powerpc_TARGET_ARCH
 154 condNegate ALWAYS  = panic "condNegate: ALWAYS"
 155 condNegate EQQ     = NE
 156 condNegate GE      = LTT
 157 condNegate GEU     = LU
 158 condNegate GTT     = LE
 159 condNegate GU      = LEU
 160 condNegate LE      = GTT
 161 condNegate LEU     = GU
 162 condNegate LTT     = GE
 163 condNegate LU      = GEU
 164 condNegate NE      = EQQ
 165 #endif
 166
 167 -- -----------------------------------------------------------------------------
 168 -- Sizes on this architecture
 169
 170 -- ToDo: it's not clear to me that we need separate signed-vs-unsigned sizes
 171 -- here.  I've removed them from the x86 version, we'll see what happens --SDM
 172
 173 #if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH && !x86_64_TARGET_ARCH
 174 data Size
 175 #if alpha_TARGET_ARCH
 176     = B     -- byte
 177     | Bu
 178 --  | W     -- word (2 bytes): UNUSED
 179 --  | Wu    -- : UNUSED
 180     | L     -- longword (4 bytes)
 181     | Q     -- quadword (8 bytes)
 182 --  | FF    -- VAX F-style floating pt: UNUSED
 183 --  | GF    -- VAX G-style floating pt: UNUSED
 184 --  | DF    -- VAX D-style floating pt: UNUSED
 185 --  | SF    -- IEEE single-precision floating pt: UNUSED
 186     | TF    -- IEEE double-precision floating pt
 187 #endif
 188 #if sparc_TARGET_ARCH || powerpc_TARGET_ARCH
 189     = B     -- byte (signed)
 190     | Bu    -- byte (unsigned)
 191     | H     -- halfword (signed, 2 bytes)
 192     | Hu    -- halfword (unsigned, 2 bytes)
 193     | W     -- word (4 bytes)
 194     | F     -- IEEE single-precision floating pt
 195     | DF    -- IEEE single-precision floating pt
 196 #endif
 197   deriving Eq
 198
 199 machRepSize :: MachRep -> Size
 200 machRepSize I8    = IF_ARCH_alpha(Bu, IF_ARCH_sparc(Bu, ))
 201 machRepSize I16   = IF_ARCH_alpha(err,IF_ARCH_sparc(Hu, ))
 202 machRepSize I32   = IF_ARCH_alpha(L,  IF_ARCH_sparc(W,  ))
 203 machRepSize I64   = panic "machRepSize: I64"
 204 machRepSize I128  = panic "machRepSize: I128"
 205 machRepSize F32   = IF_ARCH_alpha(TF, IF_ARCH_sparc(F, ))
 206 machRepSize F64   = IF_ARCH_alpha(TF, IF_ARCH_sparc(DF,))
 207 #endif
 208
 209 -- -----------------------------------------------------------------------------
 210 -- Register or immediate (a handy type on some platforms)
 211
 212 data RI = RIReg Reg
 213         | RIImm Imm
 214
 215
 216 -- -----------------------------------------------------------------------------
 217 -- Machine's assembly language
 218
 219 -- We have a few common "instructions" (nearly all the pseudo-ops) but
 220 -- mostly all of 'Instr' is machine-specific.
 221
 222 data Instr
 223   = COMMENT FastString          -- comment pseudo-op
 224
 225   | LDATA   Section [CmmStatic] -- some static data spat out during code
 226                                 -- generation.  Will be extracted before
 227                                 -- pretty-printing.
 228
 229   | NEWBLOCK BlockId            -- start a new basic block.  Useful during
 230                                 -- codegen, removed later.  Preceding
 231                                 -- instruction should be a jump, as per the
 232                                 -- invariants for a BasicBlock (see Cmm).
 233
 234   | DELTA   Int                 -- specify current stack offset for
 235                                 -- benefit of subsequent passes
 236
 237   | SPILL   Reg Int             -- ^ spill this reg to a stack slot
 238   | RELOAD  Int Reg             -- ^ reload this reg from a stack slot
 239
 240 -- -----------------------------------------------------------------------------
 241 -- Alpha instructions
 242
 243 #if alpha_TARGET_ARCH
 244
 245 -- data Instr continues...
 246
 247 -- Loads and stores.
 248               | LD            Size Reg AddrMode -- size, dst, src
 249               | LDA           Reg AddrMode      -- dst, src
 250               | LDAH          Reg AddrMode      -- dst, src
 251               | LDGP          Reg AddrMode      -- dst, src
 252               | LDI           Size Reg Imm     -- size, dst, src
 253               | ST            Size Reg AddrMode -- size, src, dst
 254
 255 -- Int Arithmetic.
 256               | CLR           Reg                   -- dst
 257               | ABS           Size RI Reg           -- size, src, dst
 258               | NEG           Size Bool RI Reg      -- size, overflow, src, dst
 259               | ADD           Size Bool Reg RI Reg  -- size, overflow, src, src, dst
 260               | SADD          Size Size Reg RI Reg  -- size, scale, src, src, dst
 261               | SUB           Size Bool Reg RI Reg  -- size, overflow, src, src, dst
 262               | SSUB          Size Size Reg RI Reg  -- size, scale, src, src, dst
 263               | MUL           Size Bool Reg RI Reg  -- size, overflow, src, src, dst
 264               | DIV           Size Bool Reg RI Reg  -- size, unsigned, src, src, dst
 265               | REM           Size Bool Reg RI Reg  -- size, unsigned, src, src, dst
 266
 267 -- Simple bit-twiddling.
 268               | NOT           RI Reg
 269               | AND           Reg RI Reg
 270               | ANDNOT        Reg RI Reg
 271               | OR            Reg RI Reg
 272               | ORNOT         Reg RI Reg
 273               | XOR           Reg RI Reg
 274               | XORNOT        Reg RI Reg
 275               | SLL           Reg RI Reg
 276               | SRL           Reg RI Reg
 277               | SRA           Reg RI Reg
 278
 279               | ZAP           Reg RI Reg
 280               | ZAPNOT        Reg RI Reg
 281
 282               | NOP
 283
 284 -- Comparison
 285               | CMP           Cond Reg RI Reg
 286
 287 -- Float Arithmetic.
 288               | FCLR          Reg
 289               | FABS          Reg Reg
 290               | FNEG          Size Reg Reg
 291               | FADD          Size Reg Reg Reg
 292               | FDIV          Size Reg Reg Reg
 293               | FMUL          Size Reg Reg Reg
 294               | FSUB          Size Reg Reg Reg
 295               | CVTxy         Size Size Reg Reg
 296               | FCMP          Size Cond Reg Reg Reg
 297               | FMOV          Reg Reg
 298
 299 -- Jumping around.
 300               | BI            Cond Reg Imm
 301               | BF            Cond Reg Imm
 302               | BR            Imm
 303               | JMP           Reg AddrMode Int
 304               | BSR           Imm Int
 305               | JSR           Reg AddrMode Int
 306
 307 -- Alpha-specific pseudo-ops.
 308               | FUNBEGIN CLabel
 309               | FUNEND CLabel
 310
 311 data RI
 312   = RIReg Reg
 313   | RIImm Imm
 314
 315 #endif /* alpha_TARGET_ARCH */
 316
 317
 318 -- -----------------------------------------------------------------------------
 319 -- Intel x86 instructions
 320
 321 {-
 322 Intel, in their infinite wisdom, selected a stack model for floating
 323 point registers on x86.  That might have made sense back in 1979 --
 324 nowadays we can see it for the nonsense it really is.  A stack model
 325 fits poorly with the existing nativeGen infrastructure, which assumes
 326 flat integer and FP register sets.  Prior to this commit, nativeGen
 327 could not generate correct x86 FP code -- to do so would have meant
 328 somehow working the register-stack paradigm into the register
 329 allocator and spiller, which sounds very difficult.
 330
 331 We have decided to cheat, and go for a simple fix which requires no
 332 infrastructure modifications, at the expense of generating ropey but
 333 correct FP code.  All notions of the x86 FP stack and its insns have
 334 been removed.  Instead, we pretend (to the instruction selector and
 335 register allocator) that x86 has six floating point registers, %fake0
 336 .. %fake5, which can be used in the usual flat manner.  We further
 337 claim that x86 has floating point instructions very similar to SPARC
 338 and Alpha, that is, a simple 3-operand register-register arrangement.
 339 Code generation and register allocation proceed on this basis.
 340
 341 When we come to print out the final assembly, our convenient fiction
 342 is converted to dismal reality.  Each fake instruction is
 343 independently converted to a series of real x86 instructions.
 344 %fake0 .. %fake5 are mapped to %st(0) .. %st(5).  To do reg-reg
 345 arithmetic operations, the two operands are pushed onto the top of the
 346 FP stack, the operation done, and the result copied back into the
 347 relevant register.  There are only six %fake registers because 2 are
 348 needed for the translation, and x86 has 8 in total.
 349
 350 The translation is inefficient but is simple and it works.  A cleverer
 351 translation would handle a sequence of insns, simulating the FP stack
 352 contents, would not impose a fixed mapping from %fake to %st regs, and
 353 hopefully could avoid most of the redundant reg-reg moves of the
 354 current translation.
 355
 356 We might as well make use of whatever unique FP facilities Intel have
 357 chosen to bless us with (let's not be churlish, after all).
 358 Hence GLDZ and GLD1.  Bwahahahahahahaha!
 359 -}
 360
 361 {-
 362 MORE FLOATING POINT MUSINGS...
 363
 364 Intel's internal floating point registers are by default 80 bit
 365 extended precision.  This means that all operations done on values in
 366 registers are done at 80 bits, and unless the intermediate values are
 367 truncated to the appropriate size (32 or 64 bits) by storing in
 368 memory, calculations in registers will give different results from
 369 calculations which pass intermediate values in memory (eg. via
 370 function calls).
 371
 372 One solution is to set the FPU into 64 bit precision mode.  Some OSs
 373 do this (eg. FreeBSD) and some don't (eg. Linux).  The problem here is
 374 that this will only affect 64-bit precision arithmetic; 32-bit
 375 calculations will still be done at 64-bit precision in registers.  So
 376 it doesn't solve the whole problem.
 377
 378 There's also the issue of what the C library is expecting in terms of
 379 precision.  It seems to be the case that glibc on Linux expects the
 380 FPU to be set to 80 bit precision, so setting it to 64 bit could have
 381 unexpected effects.  Changing the default could have undesirable
 382 effects on other 3rd-party library code too, so the right thing would
 383 be to save/restore the FPU control word across Haskell code if we were
 384 to do this.
 385
 386 gcc's -ffloat-store gives consistent results by always storing the
 387 results of floating-point calculations in memory, which works for both
 388 32 and 64-bit precision.  However, it only affects the values of
 389 user-declared floating point variables in C, not intermediate results.
 390 GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
 391 flag).
 392
 393 Another problem is how to spill floating point registers in the
 394 register allocator.  Should we spill the whole 80 bits, or just 64?
 395 On an OS which is set to 64 bit precision, spilling 64 is fine.  On
 396 Linux, spilling 64 bits will round the results of some operations.
 397 This is what gcc does.  Spilling at 80 bits requires taking up a full
 398 128 bit slot (so we get alignment).  We spill at 80-bits and ignore
 399 the alignment problems.
 400
 401 In the future, we'll use the SSE registers for floating point.  This
 402 requires a CPU that supports SSE2 (ordinary SSE only supports 32 bit
 403 precision float ops), which means P4 or Xeon and above.  Using SSE
 404 will solve all these problems, because the SSE registers use fixed 32
 405 bit or 64 bit precision.
 406
 407 --SDM 1/2003
 408 -}
 409
 410 #if i386_TARGET_ARCH || x86_64_TARGET_ARCH
 411
 412 -- data Instr continues...
 413
 414 -- Moves.
 415         | MOV         MachRep Operand Operand
 416         | MOVZxL      MachRep Operand Operand -- size is the size of operand 1
 417         | MOVSxL      MachRep Operand Operand -- size is the size of operand 1
 418         -- x86_64 note: plain mov into a 32-bit register always zero-extends
 419         -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
 420         -- don't affect the high bits of the register.
 421
 422 -- Load effective address (also a very useful three-operand add instruction :-)
 423         | LEA         MachRep Operand Operand
 424
 425 -- Int Arithmetic.
 426         | ADD         MachRep Operand Operand
 427         | ADC         MachRep Operand Operand
 428         | SUB         MachRep Operand Operand
 429
 430         | MUL         MachRep Operand Operand
 431         | IMUL        MachRep Operand Operand   -- signed int mul
 432         | IMUL2       MachRep Operand -- %edx:%eax = operand * %eax
 433
 434         | DIV         MachRep Operand   -- eax := eax:edx/op, edx := eax:edx%op
 435         | IDIV        MachRep Operand   -- ditto, but signed
 436
 437 -- Simple bit-twiddling.
 438         | AND         MachRep Operand Operand
 439         | OR          MachRep Operand Operand
 440         | XOR         MachRep Operand Operand
 441         | NOT         MachRep Operand
 442         | NEGI        MachRep Operand -- NEG instruction (name clash with Cond)
 443
 444 -- Shifts (amount may be immediate or %cl only)
 445         | SHL         MachRep Operand{-amount-} Operand
 446         | SAR         MachRep Operand{-amount-} Operand
 447         | SHR         MachRep Operand{-amount-} Operand
 448
 449         | BT          MachRep Imm Operand
 450         | NOP
 451
 452 #if i386_TARGET_ARCH
 453 -- Float Arithmetic.
 454
 455 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
 456 -- as single instructions right up until we spit them out.
 457         -- all the 3-operand fake fp insns are src1 src2 dst
 458         -- and furthermore are constrained to be fp regs only.
 459         -- IMPORTANT: keep is_G_insn up to date with any changes here
 460         | GMOV        Reg Reg -- src(fpreg), dst(fpreg)
 461         | GLD         MachRep AddrMode Reg -- src, dst(fpreg)
 462         | GST         MachRep Reg AddrMode -- src(fpreg), dst
 463
 464         | GLDZ        Reg -- dst(fpreg)
 465         | GLD1        Reg -- dst(fpreg)
 466
 467         | GFTOI       Reg Reg -- src(fpreg), dst(intreg)
 468         | GDTOI       Reg Reg -- src(fpreg), dst(intreg)
 469
 470         | GITOF       Reg Reg -- src(intreg), dst(fpreg)
 471         | GITOD       Reg Reg -- src(intreg), dst(fpreg)
 472
 473         | GADD        MachRep Reg Reg Reg -- src1, src2, dst
 474         | GDIV        MachRep Reg Reg Reg -- src1, src2, dst
 475         | GSUB        MachRep Reg Reg Reg -- src1, src2, dst
 476         | GMUL        MachRep Reg Reg Reg -- src1, src2, dst
 477
 478                 -- FP compare.  Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT]
 479                 -- Compare src1 with src2; set the Zero flag iff the numbers are
 480                 -- comparable and the comparison is True.  Subsequent code must
 481                 -- test the %eflags zero flag regardless of the supplied Cond.
 482         | GCMP        Cond Reg Reg -- src1, src2
 483
 484         | GABS        MachRep Reg Reg -- src, dst
 485         | GNEG        MachRep Reg Reg -- src, dst
 486         | GSQRT       MachRep Reg Reg -- src, dst
 487         | GSIN        MachRep CLabel CLabel Reg Reg -- src, dst
 488         | GCOS        MachRep CLabel CLabel Reg Reg -- src, dst
 489         | GTAN        MachRep CLabel CLabel Reg Reg -- src, dst
 490
 491         | GFREE         -- do ffree on all x86 regs; an ugly hack
 492 #endif
 493
 494 #if x86_64_TARGET_ARCH
 495 -- SSE2 floating point: we use a restricted set of the available SSE2
 496 -- instructions for floating-point.
 497
 498         -- use MOV for moving (either movss or movsd (movlpd better?))
 499
 500         | CVTSS2SD      Reg Reg         -- F32 to F64
 501         | CVTSD2SS      Reg Reg         -- F64 to F32
 502         | CVTTSS2SIQ    Operand Reg     -- F32 to I32/I64 (with truncation)
 503         | CVTTSD2SIQ    Operand Reg     -- F64 to I32/I64 (with truncation)
 504         | CVTSI2SS      Operand Reg     -- I32/I64 to F32
 505         | CVTSI2SD      Operand Reg     -- I32/I64 to F64
 506
 507         -- use ADD & SUB for arithmetic.  In both cases, operands
 508         -- are  Operand Reg.
 509
 510         -- SSE2 floating-point division:
 511         | FDIV          MachRep Operand Operand   -- divisor, dividend(dst)
 512
 513         -- use CMP for comparisons.  ucomiss and ucomisd instructions
 514         -- compare single/double prec floating point respectively.
 515
 516         | SQRT          MachRep Operand Reg     -- src, dst
 517 #endif
 518
 519 -- Comparison
 520         | TEST          MachRep Operand Operand
 521         | CMP           MachRep Operand Operand
 522         | SETCC         Cond Operand
 523
 524 -- Stack Operations.
 525         | PUSH          MachRep Operand
 526         | POP           MachRep Operand
 527         -- both unused (SDM):
 528         --  | PUSHA
 529         --  | POPA
 530
 531 -- Jumping around.
 532         | JMP         Operand
 533         | JXX         Cond BlockId  -- includes unconditional branches
 534         | JXX_GBL     Cond Imm      -- non-local version of JXX
 535         | JMP_TBL     Operand [BlockId]  -- table jump
 536         | CALL        (Either Imm Reg) [Reg]
 537
 538 -- Other things.
 539         | CLTD MachRep   -- sign extend %eax into %edx:%eax
 540
 541         | FETCHGOT    Reg  -- pseudo-insn for ELF position-independent code
 542                            -- pretty-prints as
 543                            --       call 1f
 544                            -- 1:    popl %reg
 545                            --       addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
 546         | FETCHPC     Reg  -- pseudo-insn for Darwin position-independent code
 547                            -- pretty-prints as
 548                            --       call 1f
 549                            -- 1:    popl %reg
 550
 551
 552 data Operand
 553   = OpReg  Reg          -- register
 554   | OpImm  Imm          -- immediate value
 555   | OpAddr AddrMode     -- memory reference
 556
 557 #endif /* i386 or x86_64 */
 558
 559 #if i386_TARGET_ARCH
 560 i386_insert_ffrees :: [Instr] -> [Instr]
 561 i386_insert_ffrees insns
 562    | any is_G_instr insns
 563    = concatMap ffree_before_nonlocal_transfers insns
 564    | otherwise
 565    = insns
 566
 567 ffree_before_nonlocal_transfers insn
 568    = case insn of
 569         CALL _ _ -> [GFREE, insn]
 570         JMP _    -> [GFREE, insn]
 571         other    -> [insn]
 572
 573
 574 -- if you ever add a new FP insn to the fake x86 FP insn set,
 575 -- you must update this too
 576 is_G_instr :: Instr -> Bool
 577 is_G_instr instr
 578    = case instr of
 579         GMOV _ _ -> True; GLD _ _ _ -> True; GST _ _ _ -> True
 580         GLDZ _ -> True; GLD1 _ -> True
 581         GFTOI _ _ -> True; GDTOI _ _ -> True
 582         GITOF _ _ -> True; GITOD _ _ -> True
 583         GADD _ _ _ _ -> True; GDIV _ _ _ _ -> True
 584         GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
 585         GCMP _ _ _ -> True; GABS _ _ _ -> True
 586         GNEG _ _ _ -> True; GSQRT _ _ _ -> True
 587         GSIN _ _ _ _ _ -> True; GCOS _ _ _ _ _ -> True; GTAN _ _ _ _ _ -> True
 588         GFREE -> panic "is_G_instr: GFREE (!)"
 589         other -> False
 590 #endif /* i386_TARGET_ARCH */
 591
 592
 593 -- -----------------------------------------------------------------------------
 594 -- Sparc instructions
 595
 596 #if sparc_TARGET_ARCH
 597
 598 -- data Instr continues...
 599
 600 -- Loads and stores.
 601               | LD            MachRep AddrMode Reg -- size, src, dst
 602               | ST            MachRep Reg AddrMode -- size, src, dst
 603
 604 -- Int Arithmetic.
 605               | ADD           Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
 606               | SUB           Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
 607               | UMUL               Bool Reg RI Reg --     cc?, src1, src2, dst
 608               | SMUL               Bool Reg RI Reg --     cc?, src1, src2, dst
 609               | RDY           Reg       -- move contents of Y register to reg
 610
 611 -- Simple bit-twiddling.
 612               | AND           Bool Reg RI Reg -- cc?, src1, src2, dst
 613               | ANDN          Bool Reg RI Reg -- cc?, src1, src2, dst
 614               | OR            Bool Reg RI Reg -- cc?, src1, src2, dst
 615               | ORN           Bool Reg RI Reg -- cc?, src1, src2, dst
 616               | XOR           Bool Reg RI Reg -- cc?, src1, src2, dst
 617               | XNOR          Bool Reg RI Reg -- cc?, src1, src2, dst
 618               | SLL           Reg RI Reg -- src1, src2, dst
 619               | SRL           Reg RI Reg -- src1, src2, dst
 620               | SRA           Reg RI Reg -- src1, src2, dst
 621               | SETHI         Imm Reg -- src, dst
 622               | NOP           -- Really SETHI 0, %g0, but worth an alias
 623
 624 -- Float Arithmetic.
 625
 626 -- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single
 627 -- instructions right up until we spit them out.
 628               | FABS          MachRep Reg Reg      -- src dst
 629               | FADD          MachRep Reg Reg Reg  -- src1, src2, dst
 630               | FCMP          Bool MachRep Reg Reg -- exception?, src1, src2, dst
 631               | FDIV          MachRep Reg Reg Reg -- src1, src2, dst
 632               | FMOV          MachRep Reg Reg     -- src, dst
 633               | FMUL          MachRep Reg Reg Reg -- src1, src2, dst
 634               | FNEG          MachRep Reg Reg     -- src, dst
 635               | FSQRT         MachRep Reg Reg     -- src, dst
 636               | FSUB          MachRep Reg Reg Reg -- src1, src2, dst
 637               | FxTOy         MachRep MachRep Reg Reg -- src, dst
 638
 639 -- Jumping around.
 640               | BI            Cond Bool Imm -- cond, annul?, target
 641               | BF            Cond Bool Imm -- cond, annul?, target
 642
 643               | JMP           AddrMode     -- target
 644               | CALL          (Either Imm Reg) Int Bool -- target, args, terminal
 645
 646 riZero :: RI -> Bool
 647
 648 riZero (RIImm (ImmInt 0))           = True
 649 riZero (RIImm (ImmInteger 0))       = True
 650 riZero (RIReg (RealReg 0))          = True
 651 riZero _                            = False
 652
 653 -- Calculate the effective address which would be used by the
 654 -- corresponding fpRel sequence.  fpRel is in MachRegs.lhs,
 655 -- alas -- can't have fpRelEA here because of module dependencies.
 656 fpRelEA :: Int -> Reg -> Instr
 657 fpRelEA n dst
 658    = ADD False False fp (RIImm (ImmInt (n * wORD_SIZE))) dst
 659
 660 -- Code to shift the stack pointer by n words.
 661 moveSp :: Int -> Instr
 662 moveSp n
 663    = ADD False False sp (RIImm (ImmInt (n * wORD_SIZE))) sp
 664
 665 -- Produce the second-half-of-a-double register given the first half.
 666 fPair :: Reg -> Reg
 667 fPair (RealReg n) | n >= 32 && n `mod` 2 == 0  = RealReg (n+1)
 668 fPair other = pprPanic "fPair(sparc NCG)" (ppr other)
 669 #endif /* sparc_TARGET_ARCH */
 670
 671
 672 -- -----------------------------------------------------------------------------
 673 -- PowerPC instructions
 674
 675 #ifdef powerpc_TARGET_ARCH
 676 -- data Instr continues...
 677
 678 -- Loads and stores.
 679               | LD      MachRep Reg AddrMode -- Load size, dst, src
 680               | LA      MachRep Reg AddrMode -- Load arithmetic size, dst, src
 681               | ST      MachRep Reg AddrMode -- Store size, src, dst
 682               | STU     MachRep Reg AddrMode -- Store with Update size, src, dst
 683               | LIS     Reg Imm -- Load Immediate Shifted dst, src
 684               | LI      Reg Imm -- Load Immediate dst, src
 685               | MR      Reg Reg -- Move Register dst, src -- also for fmr
 686
 687               | CMP     MachRep Reg RI --- size, src1, src2
 688               | CMPL    MachRep Reg RI --- size, src1, src2
 689
 690               | BCC     Cond BlockId
 691               | BCCFAR  Cond BlockId
 692               | JMP     CLabel          -- same as branch,
 693                                         -- but with CLabel instead of block ID
 694               | MTCTR   Reg
 695               | BCTR    [BlockId]       -- with list of local destinations
 696               | BL      CLabel [Reg]    -- with list of argument regs
 697               | BCTRL   [Reg]
 698
 699               | ADD     Reg Reg RI -- dst, src1, src2
 700               | ADDC    Reg Reg Reg -- (carrying) dst, src1, src2
 701               | ADDE    Reg Reg Reg -- (extend) dst, src1, src2
 702               | ADDIS   Reg Reg Imm -- Add Immediate Shifted dst, src1, src2
 703               | SUBF    Reg Reg Reg -- dst, src1, src2 ; dst = src2 - src1
 704               | MULLW   Reg Reg RI
 705               | DIVW    Reg Reg Reg
 706               | DIVWU   Reg Reg Reg
 707
 708               | MULLW_MayOflo Reg Reg Reg
 709                         -- dst = 1 if src1 * src2 overflows
 710                         -- pseudo-instruction; pretty-printed as:
 711                         -- mullwo. dst, src1, src2
 712                         -- mfxer dst
 713                         -- rlwinm dst, dst, 2, 31,31
 714
 715               | AND     Reg Reg RI -- dst, src1, src2
 716               | OR      Reg Reg RI -- dst, src1, src2
 717               | XOR     Reg Reg RI -- dst, src1, src2
 718               | XORIS   Reg Reg Imm -- XOR Immediate Shifted dst, src1, src2
 719
 720               | EXTS    MachRep Reg Reg
 721
 722               | NEG     Reg Reg
 723               | NOT     Reg Reg
 724
 725               | SLW     Reg Reg RI      -- shift left word
 726               | SRW     Reg Reg RI      -- shift right word
 727               | SRAW    Reg Reg RI      -- shift right arithmetic word
 728
 729                         -- Rotate Left Word Immediate then AND with Mask
 730               | RLWINM  Reg Reg Int Int Int
 731
 732               | FADD    MachRep Reg Reg Reg
 733               | FSUB    MachRep Reg Reg Reg
 734               | FMUL    MachRep Reg Reg Reg
 735               | FDIV    MachRep Reg Reg Reg
 736               | FNEG    Reg Reg  -- negate is the same for single and double prec.
 737
 738               | FCMP    Reg Reg
 739
 740               | FCTIWZ  Reg Reg         -- convert to integer word
 741               | FRSP    Reg Reg         -- reduce to single precision
 742                                         -- (but destination is a FP register)
 743
 744               | CRNOR   Int Int Int    -- condition register nor
 745               | MFCR    Reg            -- move from condition register
 746
 747               | MFLR    Reg            -- move from link register
 748               | FETCHPC Reg            -- pseudo-instruction:
 749                                        -- bcl to next insn, mflr reg
 750
 751               | LWSYNC -- memory barrier
 752 #endif /* powerpc_TARGET_ARCH */