ghc/compiler/nativeGen/MachInstrs.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Machine-dependent assembly language
   4 --
   5 -- (c) The University of Glasgow 1993-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 #include "nativeGen/NCG.h"
  10
  11 module MachInstrs (
  12         -- * Cmm instantiations
  13         NatCmm, NatCmmTop, NatBasicBlock,
  14
  15         -- * Machine instructions
  16         Instr(..),
  17         Cond(..),
  18 #if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH
  19         Size(..), machRepSize,
  20 #endif
  21         RI(..),
  22
  23 #if i386_TARGET_ARCH
  24         Operand(..),
  25         i386_insert_ffrees,
  26 #endif
  27 #if sparc_TARGET_ARCH
  28         riZero, fpRelEA, moveSp, fPair,
  29 #endif
  30 #if powerpc_TARGET_ARCH
  31         condUnsigned, condToSigned,
  32 #endif
  33         DestInfo(..), hasDestInfo, pprDests,
  34
  35     ) where
  36
  37 #include "HsVersions.h"
  38 #include "../includes/ghcconfig.h"
  39
  40 import MachRegs
  41 import Cmm
  42 import MachOp           ( MachRep(..) )
  43 import CLabel           ( CLabel, pprCLabel )
  44 import Panic            ( panic )
  45 import Outputable
  46 import Config           ( cLeadingUnderscore )
  47 import FastString
  48
  49 import GLAEXTS
  50
  51
  52 -- -----------------------------------------------------------------------------
  53 -- Our flavours of the Cmm types
  54
  55 -- Type synonyms for Cmm populated with native code
  56 type NatCmm        = GenCmm CmmStatic Instr
  57 type NatCmmTop     = GenCmmTop CmmStatic Instr
  58 type NatBasicBlock = GenBasicBlock Instr
  59
  60 -- -----------------------------------------------------------------------------
  61 -- Conditions on this architecture
  62
  63 data Cond
  64 #if alpha_TARGET_ARCH
  65   = ALWAYS      -- For BI (same as BR)
  66   | EQQ         -- For CMP and BI (NB: "EQ" is a 1.3 Prelude name)
  67   | GE          -- For BI only
  68   | GTT         -- For BI only (NB: "GT" is a 1.3 Prelude name)
  69   | LE          -- For CMP and BI
  70   | LTT         -- For CMP and BI (NB: "LT" is a 1.3 Prelude name)
  71   | NE          -- For BI only
  72   | NEVER       -- For BI (null instruction)
  73   | ULE         -- For CMP only
  74   | ULT         -- For CMP only
  75 #endif
  76 #if i386_TARGET_ARCH
  77   = ALWAYS      -- What's really used? ToDo
  78   | EQQ
  79   | GE
  80   | GEU
  81   | GTT
  82   | GU
  83   | LE
  84   | LEU
  85   | LTT
  86   | LU
  87   | NE
  88   | NEG
  89   | POS
  90   | CARRY
  91   | OFLO
  92 #endif
  93 #if sparc_TARGET_ARCH
  94   = ALWAYS      -- What's really used? ToDo
  95   | EQQ
  96   | GE
  97   | GEU
  98   | GTT
  99   | GU
 100   | LE
 101   | LEU
 102   | LTT
 103   | LU
 104   | NE
 105   | NEG
 106   | NEVER
 107   | POS
 108   | VC
 109   | VS
 110 #endif
 111 #if powerpc_TARGET_ARCH
 112   = ALWAYS
 113   | EQQ
 114   | GE
 115   | GEU
 116   | GTT
 117   | GU
 118   | LE
 119   | LEU
 120   | LTT
 121   | LU
 122   | NE
 123 #endif
 124     deriving Eq  -- to make an assertion work
 125
 126
 127 -- -----------------------------------------------------------------------------
 128 -- Sizes on this architecture
 129
 130 -- ToDo: it's not clear to me that we need separate signed-vs-unsigned sizes
 131 -- here.  I've removed them from the x86 version, we'll see what happens --SDM
 132
 133 #if !powerpc_TARGET_ARCH && !i386_TARGET_ARCH
 134 data Size
 135 #if alpha_TARGET_ARCH
 136     = B     -- byte
 137     | Bu
 138 --  | W     -- word (2 bytes): UNUSED
 139 --  | Wu    -- : UNUSED
 140     | L     -- longword (4 bytes)
 141     | Q     -- quadword (8 bytes)
 142 --  | FF    -- VAX F-style floating pt: UNUSED
 143 --  | GF    -- VAX G-style floating pt: UNUSED
 144 --  | DF    -- VAX D-style floating pt: UNUSED
 145 --  | SF    -- IEEE single-precision floating pt: UNUSED
 146     | TF    -- IEEE double-precision floating pt
 147 #endif
 148 #if sparc_TARGET_ARCH || powerpc_TARGET_ARCH
 149     = B     -- byte (signed)
 150     | Bu    -- byte (unsigned)
 151     | H     -- halfword (signed, 2 bytes)
 152     | Hu    -- halfword (unsigned, 2 bytes)
 153     | W     -- word (4 bytes)
 154     | F     -- IEEE single-precision floating pt
 155     | DF    -- IEEE single-precision floating pt
 156 #endif
 157   deriving Eq
 158
 159 machRepSize :: MachRep -> Size
 160 machRepSize I8    = IF_ARCH_alpha(Bu, IF_ARCH_sparc(Bu, ))
 161 machRepSize I16   = IF_ARCH_alpha(err,IF_ARCH_sparc(Hu, ))
 162 machRepSize I32   = IF_ARCH_alpha(L,  IF_ARCH_sparc(W,  ))
 163 machRepSize I64   = panic "machRepSize: I64"
 164 machRepSize I128  = panic "machRepSize: I128"
 165 machRepSize F32   = IF_ARCH_alpha(TF, IF_ARCH_sparc(F, ))
 166 machRepSize F64   = IF_ARCH_alpha(TF, IF_ARCH_sparc(DF,))
 167 #endif
 168
 169 -- -----------------------------------------------------------------------------
 170 -- Register or immediate (a handy type on some platforms)
 171
 172 data RI = RIReg Reg
 173         | RIImm Imm
 174
 175
 176 -- -----------------------------------------------------------------------------
 177 -- Machine's assembly language
 178
 179 -- We have a few common "instructions" (nearly all the pseudo-ops) but
 180 -- mostly all of 'Instr' is machine-specific.
 181
 182 data Instr
 183   = COMMENT FastString          -- comment pseudo-op
 184
 185   | LDATA   Section [CmmStatic] -- some static data spat out during code
 186                                 -- generation.  Will be extracted before
 187                                 -- pretty-printing.
 188
 189   | NEWBLOCK BlockId            -- start a new basic block.  Useful during
 190                                 -- codegen, removed later.  Preceding
 191                                 -- instruction should be a jump, as per the
 192                                 -- invariants for a BasicBlock (see Cmm).
 193
 194   | DELTA   Int                 -- specify current stack offset for
 195                                 -- benefit of subsequent passes
 196
 197 -- -----------------------------------------------------------------------------
 198 -- Alpha instructions
 199
 200 #if alpha_TARGET_ARCH
 201
 202 -- data Instr continues...
 203
 204 -- Loads and stores.
 205               | LD            Size Reg AddrMode -- size, dst, src
 206               | LDA           Reg AddrMode      -- dst, src
 207               | LDAH          Reg AddrMode      -- dst, src
 208               | LDGP          Reg AddrMode      -- dst, src
 209               | LDI           Size Reg Imm     -- size, dst, src
 210               | ST            Size Reg AddrMode -- size, src, dst
 211
 212 -- Int Arithmetic.
 213               | CLR           Reg                   -- dst
 214               | ABS           Size RI Reg           -- size, src, dst
 215               | NEG           Size Bool RI Reg      -- size, overflow, src, dst
 216               | ADD           Size Bool Reg RI Reg  -- size, overflow, src, src, dst
 217               | SADD          Size Size Reg RI Reg  -- size, scale, src, src, dst
 218               | SUB           Size Bool Reg RI Reg  -- size, overflow, src, src, dst
 219               | SSUB          Size Size Reg RI Reg  -- size, scale, src, src, dst
 220               | MUL           Size Bool Reg RI Reg  -- size, overflow, src, src, dst
 221               | DIV           Size Bool Reg RI Reg  -- size, unsigned, src, src, dst
 222               | REM           Size Bool Reg RI Reg  -- size, unsigned, src, src, dst
 223
 224 -- Simple bit-twiddling.
 225               | NOT           RI Reg
 226               | AND           Reg RI Reg
 227               | ANDNOT        Reg RI Reg
 228               | OR            Reg RI Reg
 229               | ORNOT         Reg RI Reg
 230               | XOR           Reg RI Reg
 231               | XORNOT        Reg RI Reg
 232               | SLL           Reg RI Reg
 233               | SRL           Reg RI Reg
 234               | SRA           Reg RI Reg
 235
 236               | ZAP           Reg RI Reg
 237               | ZAPNOT        Reg RI Reg
 238
 239               | NOP
 240
 241 -- Comparison
 242               | CMP           Cond Reg RI Reg
 243
 244 -- Float Arithmetic.
 245               | FCLR          Reg
 246               | FABS          Reg Reg
 247               | FNEG          Size Reg Reg
 248               | FADD          Size Reg Reg Reg
 249               | FDIV          Size Reg Reg Reg
 250               | FMUL          Size Reg Reg Reg
 251               | FSUB          Size Reg Reg Reg
 252               | CVTxy         Size Size Reg Reg
 253               | FCMP          Size Cond Reg Reg Reg
 254               | FMOV          Reg Reg
 255
 256 -- Jumping around.
 257               | BI            Cond Reg Imm
 258               | BF            Cond Reg Imm
 259               | BR            Imm
 260               | JMP           Reg AddrMode Int
 261               | BSR           Imm Int
 262               | JSR           Reg AddrMode Int
 263
 264 -- Alpha-specific pseudo-ops.
 265               | FUNBEGIN CLabel
 266               | FUNEND CLabel
 267
 268 data RI
 269   = RIReg Reg
 270   | RIImm Imm
 271
 272 #endif /* alpha_TARGET_ARCH */
 273
 274
 275 -- -----------------------------------------------------------------------------
 276 -- Intel x86 instructions
 277
 278 {-
 279 Intel, in their infinite wisdom, selected a stack model for floating
 280 point registers on x86.  That might have made sense back in 1979 --
 281 nowadays we can see it for the nonsense it really is.  A stack model
 282 fits poorly with the existing nativeGen infrastructure, which assumes
 283 flat integer and FP register sets.  Prior to this commit, nativeGen
 284 could not generate correct x86 FP code -- to do so would have meant
 285 somehow working the register-stack paradigm into the register
 286 allocator and spiller, which sounds very difficult.
 287
 288 We have decided to cheat, and go for a simple fix which requires no
 289 infrastructure modifications, at the expense of generating ropey but
 290 correct FP code.  All notions of the x86 FP stack and its insns have
 291 been removed.  Instead, we pretend (to the instruction selector and
 292 register allocator) that x86 has six floating point registers, %fake0
 293 .. %fake5, which can be used in the usual flat manner.  We further
 294 claim that x86 has floating point instructions very similar to SPARC
 295 and Alpha, that is, a simple 3-operand register-register arrangement.
 296 Code generation and register allocation proceed on this basis.
 297
 298 When we come to print out the final assembly, our convenient fiction
 299 is converted to dismal reality.  Each fake instruction is
 300 independently converted to a series of real x86 instructions.
 301 %fake0 .. %fake5 are mapped to %st(0) .. %st(5).  To do reg-reg
 302 arithmetic operations, the two operands are pushed onto the top of the
 303 FP stack, the operation done, and the result copied back into the
 304 relevant register.  There are only six %fake registers because 2 are
 305 needed for the translation, and x86 has 8 in total.
 306
 307 The translation is inefficient but is simple and it works.  A cleverer
 308 translation would handle a sequence of insns, simulating the FP stack
 309 contents, would not impose a fixed mapping from %fake to %st regs, and
 310 hopefully could avoid most of the redundant reg-reg moves of the
 311 current translation.
 312
 313 We might as well make use of whatever unique FP facilities Intel have
 314 chosen to bless us with (let's not be churlish, after all).
 315 Hence GLDZ and GLD1.  Bwahahahahahahaha!
 316 -}
 317
 318 {-
 319 MORE FLOATING POINT MUSINGS...
 320
 321 Intel's internal floating point registers are by default 80 bit
 322 extended precision.  This means that all operations done on values in
 323 registers are done at 80 bits, and unless the intermediate values are
 324 truncated to the appropriate size (32 or 64 bits) by storing in
 325 memory, calculations in registers will give different results from
 326 calculations which pass intermediate values in memory (eg. via
 327 function calls).
 328
 329 One solution is to set the FPU into 64 bit precision mode.  Some OSs
 330 do this (eg. FreeBSD) and some don't (eg. Linux).  The problem here is
 331 that this will only affect 64-bit precision arithmetic; 32-bit
 332 calculations will still be done at 64-bit precision in registers.  So
 333 it doesn't solve the whole problem.
 334
 335 There's also the issue of what the C library is expecting in terms of
 336 precision.  It seems to be the case that glibc on Linux expects the
 337 FPU to be set to 80 bit precision, so setting it to 64 bit could have
 338 unexpected effects.  Changing the default could have undesirable
 339 effects on other 3rd-party library code too, so the right thing would
 340 be to save/restore the FPU control word across Haskell code if we were
 341 to do this.
 342
 343 gcc's -ffloat-store gives consistent results by always storing the
 344 results of floating-point calculations in memory, which works for both
 345 32 and 64-bit precision.  However, it only affects the values of
 346 user-declared floating point variables in C, not intermediate results.
 347 GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
 348 flag).
 349
 350 Another problem is how to spill floating point registers in the
 351 register allocator.  Should we spill the whole 80 bits, or just 64?
 352 On an OS which is set to 64 bit precision, spilling 64 is fine.  On
 353 Linux, spilling 64 bits will round the results of some operations.
 354 This is what gcc does.  Spilling at 80 bits requires taking up a full
 355 128 bit slot (so we get alignment).  We spill at 80-bits and ignore
 356 the alignment problems.
 357
 358 In the future, we'll use the SSE registers for floating point.  This
 359 requires a CPU that supports SSE2 (ordinary SSE only supports 32 bit
 360 precision float ops), which means P4 or Xeon and above.  Using SSE
 361 will solve all these problems, because the SSE registers use fixed 32
 362 bit or 64 bit precision.
 363
 364 --SDM 1/2003
 365 -}
 366
 367 #if i386_TARGET_ARCH
 368
 369 -- data Instr continues...
 370
 371 -- Moves.
 372         | MOV         MachRep Operand Operand
 373         | MOVZxL      MachRep Operand Operand -- size is the size of operand 1
 374         | MOVSxL      MachRep Operand Operand -- size is the size of operand 1
 375
 376 -- Load effective address (also a very useful three-operand add instruction :-)
 377         | LEA         MachRep Operand Operand
 378
 379 -- Int Arithmetic.
 380         | ADD         MachRep Operand Operand
 381         | ADC         MachRep Operand Operand
 382         | SUB         MachRep Operand Operand
 383         | IMUL        MachRep Operand Operand   -- signed int mul
 384         | MUL         MachRep Operand Operand   -- unsigned int mul
 385
 386         | IMUL64      Reg Reg
 387         -- operand1:operand2 := (operand1[31:0] *signed operand2[31:0])
 388
 389         | DIV         MachRep Operand   -- eax := eax:edx/op, edx := eax:edx%op
 390         | IDIV        MachRep Operand   -- ditto, but signed
 391
 392 -- Simple bit-twiddling.
 393         | AND         MachRep Operand Operand
 394         | OR          MachRep Operand Operand
 395         | XOR         MachRep Operand Operand
 396         | NOT         MachRep Operand
 397         | NEGI        MachRep Operand -- NEG instruction (name clash with Cond)
 398
 399 -- Shifts (amount may be immediate or %cl only)
 400         | SHL         MachRep Operand{-amount-} Operand
 401         | SAR         MachRep Operand{-amount-} Operand
 402         | SHR         MachRep Operand{-amount-} Operand
 403
 404         | BT          MachRep Imm Operand
 405         | NOP
 406
 407 -- Float Arithmetic.
 408
 409 -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
 410 -- as single instructions right up until we spit them out.
 411         -- all the 3-operand fake fp insns are src1 src2 dst
 412         -- and furthermore are constrained to be fp regs only.
 413         -- IMPORTANT: keep is_G_insn up to date with any changes here
 414         | GMOV        Reg Reg -- src(fpreg), dst(fpreg)
 415         | GLD         MachRep AddrMode Reg -- src, dst(fpreg)
 416         | GST         MachRep Reg AddrMode -- src(fpreg), dst
 417
 418         | GLDZ        Reg -- dst(fpreg)
 419         | GLD1        Reg -- dst(fpreg)
 420
 421         | GFTOI       Reg Reg -- src(fpreg), dst(intreg)
 422         | GDTOI       Reg Reg -- src(fpreg), dst(intreg)
 423
 424         | GITOF       Reg Reg -- src(intreg), dst(fpreg)
 425         | GITOD       Reg Reg -- src(intreg), dst(fpreg)
 426
 427         | GADD        MachRep Reg Reg Reg -- src1, src2, dst
 428         | GDIV        MachRep Reg Reg Reg -- src1, src2, dst
 429         | GSUB        MachRep Reg Reg Reg -- src1, src2, dst
 430         | GMUL        MachRep Reg Reg Reg -- src1, src2, dst
 431
 432                 -- FP compare.  Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT]
 433                 -- Compare src1 with src2; set the Zero flag iff the numbers are
 434                 -- comparable and the comparison is True.  Subsequent code must
 435                 -- test the %eflags zero flag regardless of the supplied Cond.
 436         | GCMP        Cond Reg Reg -- src1, src2
 437
 438         | GABS        MachRep Reg Reg -- src, dst
 439         | GNEG        MachRep Reg Reg -- src, dst
 440         | GSQRT       MachRep Reg Reg -- src, dst
 441         | GSIN        MachRep Reg Reg -- src, dst
 442         | GCOS        MachRep Reg Reg -- src, dst
 443         | GTAN        MachRep Reg Reg -- src, dst
 444
 445         | GFREE         -- do ffree on all x86 regs; an ugly hack
 446
 447 -- Comparison
 448         | TEST          MachRep Operand Operand
 449         | CMP           MachRep Operand Operand
 450         | SETCC         Cond Operand
 451
 452 -- Stack Operations.
 453         | PUSH          MachRep Operand
 454         | POP           MachRep Operand
 455         -- both unused (SDM):
 456         -- | PUSHA
 457         -- | POPA
 458
 459 -- Jumping around.
 460         | JMP         Operand
 461         | JXX         Cond BlockId  -- includes unconditional branches
 462         | JMP_TBL     Operand [BlockId]  -- table jump
 463         | CALL        (Either Imm Reg)
 464
 465 -- Other things.
 466         | CLTD -- sign extend %eax into %edx:%eax
 467
 468         | FETCHGOT    Reg  -- pseudo-insn for position-independent code
 469                            -- pretty-prints as
 470                            --       call 1f
 471                            -- 1:    popl %reg
 472                            --       addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
 473
 474 data Operand
 475   = OpReg  Reg          -- register
 476   | OpImm  Imm          -- immediate value
 477   | OpAddr AddrMode     -- memory reference
 478
 479
 480 i386_insert_ffrees :: [Instr] -> [Instr]
 481 i386_insert_ffrees insns
 482    | any is_G_instr insns
 483    = concatMap ffree_before_nonlocal_transfers insns
 484    | otherwise
 485    = insns
 486
 487 ffree_before_nonlocal_transfers insn
 488    = case insn of
 489         CALL _  -> [GFREE, insn]
 490         JMP _   -> [GFREE, insn]
 491         other   -> [insn]
 492
 493
 494 -- if you ever add a new FP insn to the fake x86 FP insn set,
 495 -- you must update this too
 496 is_G_instr :: Instr -> Bool
 497 is_G_instr instr
 498    = case instr of
 499         GMOV _ _ -> True; GLD _ _ _ -> True; GST _ _ _ -> True;
 500         GLDZ _ -> True; GLD1 _ -> True;
 501         GFTOI _ _ -> True; GDTOI _ _ -> True;
 502         GITOF _ _ -> True; GITOD _ _ -> True;
 503         GADD _ _ _ _ -> True; GDIV _ _ _ _ -> True
 504         GSUB _ _ _ _ -> True; GMUL _ _ _ _ -> True
 505         GCMP _ _ _ -> True; GABS _ _ _ -> True
 506         GNEG _ _ _ -> True; GSQRT _ _ _ -> True
 507         GSIN _ _ _ -> True; GCOS _ _ _ -> True; GTAN _ _ _ -> True;
 508         GFREE -> panic "is_G_instr: GFREE (!)"
 509         other -> False
 510
 511 #endif /* i386_TARGET_ARCH */
 512
 513
 514 -- -----------------------------------------------------------------------------
 515 -- Sparc instructions
 516
 517 #if sparc_TARGET_ARCH
 518
 519 -- data Instr continues...
 520
 521 -- Loads and stores.
 522               | LD            MachRep AddrMode Reg -- size, src, dst
 523               | ST            MachRep Reg AddrMode -- size, src, dst
 524
 525 -- Int Arithmetic.
 526               | ADD           Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
 527               | SUB           Bool Bool Reg RI Reg -- x?, cc?, src1, src2, dst
 528               | UMUL               Bool Reg RI Reg --     cc?, src1, src2, dst
 529               | SMUL               Bool Reg RI Reg --     cc?, src1, src2, dst
 530               | RDY           Reg       -- move contents of Y register to reg
 531
 532 -- Simple bit-twiddling.
 533               | AND           Bool Reg RI Reg -- cc?, src1, src2, dst
 534               | ANDN          Bool Reg RI Reg -- cc?, src1, src2, dst
 535               | OR            Bool Reg RI Reg -- cc?, src1, src2, dst
 536               | ORN           Bool Reg RI Reg -- cc?, src1, src2, dst
 537               | XOR           Bool Reg RI Reg -- cc?, src1, src2, dst
 538               | XNOR          Bool Reg RI Reg -- cc?, src1, src2, dst
 539               | SLL           Reg RI Reg -- src1, src2, dst
 540               | SRL           Reg RI Reg -- src1, src2, dst
 541               | SRA           Reg RI Reg -- src1, src2, dst
 542               | SETHI         Imm Reg -- src, dst
 543               | NOP           -- Really SETHI 0, %g0, but worth an alias
 544
 545 -- Float Arithmetic.
 546
 547 -- Note that we cheat by treating F{ABS,MOV,NEG} of doubles as single
 548 -- instructions right up until we spit them out.
 549               | FABS          MachRep Reg Reg      -- src dst
 550               | FADD          MachRep Reg Reg Reg  -- src1, src2, dst
 551               | FCMP          Bool MachRep Reg Reg -- exception?, src1, src2, dst
 552               | FDIV          MachRep Reg Reg Reg -- src1, src2, dst
 553               | FMOV          MachRep Reg Reg     -- src, dst
 554               | FMUL          MachRep Reg Reg Reg -- src1, src2, dst
 555               | FNEG          MachRep Reg Reg     -- src, dst
 556               | FSQRT         MachRep Reg Reg     -- src, dst
 557               | FSUB          MachRep Reg Reg Reg -- src1, src2, dst
 558               | FxTOy         MachRep MachRep Reg Reg -- src, dst
 559
 560 -- Jumping around.
 561               | BI            Cond Bool Imm -- cond, annul?, target
 562               | BF            Cond Bool Imm -- cond, annul?, target
 563
 564               | JMP           DestInfo AddrMode      -- target
 565               | CALL          (Either Imm Reg) Int Bool -- target, args, terminal
 566
 567 data RI = RIReg Reg
 568         | RIImm Imm
 569
 570 riZero :: RI -> Bool
 571
 572 riZero (RIImm (ImmInt 0))           = True
 573 riZero (RIImm (ImmInteger 0))       = True
 574 riZero (RIReg (RealReg 0))          = True
 575 riZero _                            = False
 576
 577 -- Calculate the effective address which would be used by the
 578 -- corresponding fpRel sequence.  fpRel is in MachRegs.lhs,
 579 -- alas -- can't have fpRelEA here because of module dependencies.
 580 fpRelEA :: Int -> Reg -> Instr
 581 fpRelEA n dst
 582    = ADD False False fp (RIImm (ImmInt (n * BYTES_PER_WORD))) dst
 583
 584 -- Code to shift the stack pointer by n words.
 585 moveSp :: Int -> Instr
 586 moveSp n
 587    = ADD False False sp (RIImm (ImmInt (n * BYTES_PER_WORD))) sp
 588
 589 -- Produce the second-half-of-a-double register given the first half.
 590 fPair :: Reg -> Reg
 591 fPair (RealReg n) | n >= 32 && n `mod` 2 == 0  = RealReg (n+1)
 592 fPair other = pprPanic "fPair(sparc NCG)" (ppr other)
 593 #endif /* sparc_TARGET_ARCH */
 594
 595
 596 -- -----------------------------------------------------------------------------
 597 -- PowerPC instructions
 598
 599 #ifdef powerpc_TARGET_ARCH
 600 -- data Instr continues...
 601
 602 -- Loads and stores.
 603               | LD      MachRep Reg AddrMode -- Load size, dst, src
 604               | LA      MachRep Reg AddrMode -- Load arithmetic size, dst, src
 605               | ST      MachRep Reg AddrMode -- Store size, src, dst
 606               | STU     MachRep Reg AddrMode -- Store with Update size, src, dst
 607               | LIS     Reg Imm -- Load Immediate Shifted dst, src
 608               | LI      Reg Imm -- Load Immediate dst, src
 609               | MR      Reg Reg -- Move Register dst, src -- also for fmr
 610
 611               | CMP     MachRep Reg RI --- size, src1, src2
 612               | CMPL    MachRep Reg RI --- size, src1, src2
 613
 614               | BCC     Cond BlockId
 615               | JMP     CLabel          -- same as branch,
 616                                         -- but with CLabel instead of block ID
 617               | MTCTR   Reg
 618               | BCTR    [BlockId]       -- with list of local destinations
 619               | BL      CLabel [Reg]    -- with list of argument regs
 620               | BCTRL   [Reg]
 621
 622               | ADD     Reg Reg RI -- dst, src1, src2
 623               | ADDC    Reg Reg Reg -- (carrying) dst, src1, src2
 624               | ADDE    Reg Reg Reg -- (extend) dst, src1, src2
 625               | ADDIS   Reg Reg Imm -- Add Immediate Shifted dst, src1, src2
 626               | SUBF    Reg Reg Reg -- dst, src1, src2 ; dst = src2 - src1
 627               | MULLW   Reg Reg RI
 628               | DIVW    Reg Reg Reg
 629               | DIVWU   Reg Reg Reg
 630
 631               | MULLW_MayOflo Reg Reg Reg
 632                         -- dst = 1 if src1 * src2 overflows
 633                         -- pseudo-instruction; pretty-printed as:
 634                         -- mullwo. dst, src1, src2
 635                         -- mfxer dst
 636                         -- rlwinm dst, dst, 2, 31,31
 637
 638               | AND     Reg Reg RI -- dst, src1, src2
 639               | OR      Reg Reg RI -- dst, src1, src2
 640               | XOR     Reg Reg RI -- dst, src1, src2
 641               | XORIS   Reg Reg Imm -- XOR Immediate Shifted dst, src1, src2
 642
 643               | EXTS    MachRep Reg Reg
 644
 645               | NEG     Reg Reg
 646               | NOT     Reg Reg
 647
 648               | SLW     Reg Reg RI      -- shift left word
 649               | SRW     Reg Reg RI      -- shift right word
 650               | SRAW    Reg Reg RI      -- shift right arithmetic word
 651
 652                         -- Rotate Left Word Immediate then AND with Mask
 653               | RLWINM  Reg Reg Int Int Int
 654
 655               | FADD    MachRep Reg Reg Reg
 656               | FSUB    MachRep Reg Reg Reg
 657               | FMUL    MachRep Reg Reg Reg
 658               | FDIV    MachRep Reg Reg Reg
 659               | FNEG    Reg Reg  -- negate is the same for single and double prec.
 660
 661               | FCMP    Reg Reg
 662
 663               | FCTIWZ  Reg Reg         -- convert to integer word
 664               | FRSP    Reg Reg         -- reduce to single precision
 665                                         -- (but destination is a FP register)
 666
 667               | CRNOR   Int Int Int    -- condition register nor
 668               | MFCR    Reg            -- move from condition register
 669
 670               | MFLR    Reg            -- move from link register
 671               | FETCHPC Reg            -- pseudo-instruction:
 672                                        -- bcl to next insn, mflr reg
 673
 674 condUnsigned GU = True
 675 condUnsigned LU = True
 676 condUnsigned GEU = True
 677 condUnsigned LEU = True
 678 condUnsigned _ = False
 679
 680 condToSigned GU = GTT
 681 condToSigned LU = LTT
 682 condToSigned GEU = GE
 683 condToSigned LEU = LE
 684 condToSigned x = x
 685 #endif /* powerpc_TARGET_ARCH */
 686
 687
 688 -- -----------------------------------------------------------------------------
 689 -- DestInfo
 690
 691 -- ToDo: might not be needed anymore --SDM
 692
 693 -- used by insnFuture in RegAllocInfo.lhs
 694 data DestInfo
 695    = NoDestInfo             -- no supplied dests; infer from context
 696    | DestInfo [CLabel]      -- precisely these dests and no others
 697
 698 hasDestInfo NoDestInfo   = False
 699 hasDestInfo (DestInfo _) = True
 700
 701 pprDests :: DestInfo -> SDoc
 702 pprDests NoDestInfo      = text "NoDestInfo"
 703 pprDests (DestInfo dsts) = brackets (hsep (map pprCLabel dsts))