compiler/nativeGen/X86/Instr.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Machine-dependent assembly language
   4 --
   5 -- (c) The University of Glasgow 1993-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 #include "HsVersions.h"
  10 #include "nativeGen/NCG.h"
  11
  12 module X86.Instr
  13 where
  14
  15 import X86.Cond
  16 import X86.Regs
  17 import Instruction
  18 import Size
  19 import RegClass
  20 import Reg
  21 import TargetReg
  22
  23 import BlockId
  24 import OldCmm
  25 import FastString
  26 import FastBool
  27 import Outputable
  28 import Constants        (rESERVED_C_STACK_BYTES)
  29
  30 import CLabel
  31 import UniqSet
  32 import Unique
  33
  34 -- Size of a PPC memory address, in bytes.
  35 --
  36 archWordSize :: Size
  37 #if i386_TARGET_ARCH
  38 archWordSize    = II32
  39 #elif x86_64_TARGET_ARCH
  40 archWordSize    = II64
  41 #else
  42 archWordSize    = panic "X86.Instr.archWordSize: not defined"
  43 #endif
  44
  45 -- | Instruction instance for x86 instruction set.
  46 instance Instruction Instr where
  47         regUsageOfInstr         = x86_regUsageOfInstr
  48         patchRegsOfInstr        = x86_patchRegsOfInstr
  49         isJumpishInstr          = x86_isJumpishInstr
  50         jumpDestsOfInstr        = x86_jumpDestsOfInstr
  51         patchJumpInstr          = x86_patchJumpInstr
  52         mkSpillInstr            = x86_mkSpillInstr
  53         mkLoadInstr             = x86_mkLoadInstr
  54         takeDeltaInstr          = x86_takeDeltaInstr
  55         isMetaInstr             = x86_isMetaInstr
  56         mkRegRegMoveInstr       = x86_mkRegRegMoveInstr
  57         takeRegRegMoveInstr     = x86_takeRegRegMoveInstr
  58         mkJumpInstr             = x86_mkJumpInstr
  59
  60
  61 -- -----------------------------------------------------------------------------
  62 -- Intel x86 instructions
  63
  64 {-
  65 Intel, in their infinite wisdom, selected a stack model for floating
  66 point registers on x86.  That might have made sense back in 1979 --
  67 nowadays we can see it for the nonsense it really is.  A stack model
  68 fits poorly with the existing nativeGen infrastructure, which assumes
  69 flat integer and FP register sets.  Prior to this commit, nativeGen
  70 could not generate correct x86 FP code -- to do so would have meant
  71 somehow working the register-stack paradigm into the register
  72 allocator and spiller, which sounds very difficult.
  73
  74 We have decided to cheat, and go for a simple fix which requires no
  75 infrastructure modifications, at the expense of generating ropey but
  76 correct FP code.  All notions of the x86 FP stack and its insns have
  77 been removed.  Instead, we pretend (to the instruction selector and
  78 register allocator) that x86 has six floating point registers, %fake0
  79 .. %fake5, which can be used in the usual flat manner.  We further
  80 claim that x86 has floating point instructions very similar to SPARC
  81 and Alpha, that is, a simple 3-operand register-register arrangement.
  82 Code generation and register allocation proceed on this basis.
  83
  84 When we come to print out the final assembly, our convenient fiction
  85 is converted to dismal reality.  Each fake instruction is
  86 independently converted to a series of real x86 instructions.
  87 %fake0 .. %fake5 are mapped to %st(0) .. %st(5).  To do reg-reg
  88 arithmetic operations, the two operands are pushed onto the top of the
  89 FP stack, the operation done, and the result copied back into the
  90 relevant register.  There are only six %fake registers because 2 are
  91 needed for the translation, and x86 has 8 in total.
  92
  93 The translation is inefficient but is simple and it works.  A cleverer
  94 translation would handle a sequence of insns, simulating the FP stack
  95 contents, would not impose a fixed mapping from %fake to %st regs, and
  96 hopefully could avoid most of the redundant reg-reg moves of the
  97 current translation.
  98
  99 We might as well make use of whatever unique FP facilities Intel have
 100 chosen to bless us with (let's not be churlish, after all).
 101 Hence GLDZ and GLD1.  Bwahahahahahahaha!
 102 -}
 103
 104 {-
 105 Note [x86 Floating point precision]
 106
 107 Intel's internal floating point registers are by default 80 bit
 108 extended precision.  This means that all operations done on values in
 109 registers are done at 80 bits, and unless the intermediate values are
 110 truncated to the appropriate size (32 or 64 bits) by storing in
 111 memory, calculations in registers will give different results from
 112 calculations which pass intermediate values in memory (eg. via
 113 function calls).
 114
 115 One solution is to set the FPU into 64 bit precision mode.  Some OSs
 116 do this (eg. FreeBSD) and some don't (eg. Linux).  The problem here is
 117 that this will only affect 64-bit precision arithmetic; 32-bit
 118 calculations will still be done at 64-bit precision in registers.  So
 119 it doesn't solve the whole problem.
 120
 121 There's also the issue of what the C library is expecting in terms of
 122 precision.  It seems to be the case that glibc on Linux expects the
 123 FPU to be set to 80 bit precision, so setting it to 64 bit could have
 124 unexpected effects.  Changing the default could have undesirable
 125 effects on other 3rd-party library code too, so the right thing would
 126 be to save/restore the FPU control word across Haskell code if we were
 127 to do this.
 128
 129 gcc's -ffloat-store gives consistent results by always storing the
 130 results of floating-point calculations in memory, which works for both
 131 32 and 64-bit precision.  However, it only affects the values of
 132 user-declared floating point variables in C, not intermediate results.
 133 GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
 134 flag).
 135
 136 Another problem is how to spill floating point registers in the
 137 register allocator.  Should we spill the whole 80 bits, or just 64?
 138 On an OS which is set to 64 bit precision, spilling 64 is fine.  On
 139 Linux, spilling 64 bits will round the results of some operations.
 140 This is what gcc does.  Spilling at 80 bits requires taking up a full
 141 128 bit slot (so we get alignment).  We spill at 80-bits and ignore
 142 the alignment problems.
 143
 144 In the future [edit: now available in GHC 7.0.1, with the -msse2
 145 flag], we'll use the SSE registers for floating point.  This requires
 146 a CPU that supports SSE2 (ordinary SSE only supports 32 bit precision
 147 float ops), which means P4 or Xeon and above.  Using SSE will solve
 148 all these problems, because the SSE registers use fixed 32 bit or 64
 149 bit precision.
 150
 151 --SDM 1/2003
 152 -}
 153
 154
 155 data Instr
 156         -- comment pseudo-op
 157         = COMMENT FastString
 158
 159         -- some static data spat out during code
 160         -- generation.  Will be extracted before
 161         -- pretty-printing.
 162         | LDATA   Section [CmmStatic]
 163
 164         -- start a new basic block.  Useful during
 165         -- codegen, removed later.  Preceding
 166         -- instruction should be a jump, as per the
 167         -- invariants for a BasicBlock (see Cmm).
 168         | NEWBLOCK BlockId
 169
 170         -- specify current stack offset for
 171         -- benefit of subsequent passes
 172         | DELTA   Int
 173
 174         -- Moves.
 175         | MOV         Size Operand Operand
 176         | MOVZxL      Size Operand Operand -- size is the size of operand 1
 177         | MOVSxL      Size Operand Operand -- size is the size of operand 1
 178         -- x86_64 note: plain mov into a 32-bit register always zero-extends
 179         -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
 180         -- don't affect the high bits of the register.
 181
 182         -- Load effective address (also a very useful three-operand add instruction :-)
 183         | LEA         Size Operand Operand
 184
 185         -- Int Arithmetic.
 186         | ADD         Size Operand Operand
 187         | ADC         Size Operand Operand
 188         | SUB         Size Operand Operand
 189
 190         | MUL         Size Operand Operand
 191         | IMUL        Size Operand Operand      -- signed int mul
 192         | IMUL2       Size Operand              -- %edx:%eax = operand * %eax
 193
 194         | DIV         Size Operand              -- eax := eax:edx/op, edx := eax:edx%op
 195         | IDIV        Size Operand              -- ditto, but signed
 196
 197         -- Simple bit-twiddling.
 198         | AND         Size Operand Operand
 199         | OR          Size Operand Operand
 200         | XOR         Size Operand Operand
 201         | NOT         Size Operand
 202         | NEGI        Size Operand              -- NEG instruction (name clash with Cond)
 203
 204         -- Shifts (amount may be immediate or %cl only)
 205         | SHL         Size Operand{-amount-} Operand
 206         | SAR         Size Operand{-amount-} Operand
 207         | SHR         Size Operand{-amount-} Operand
 208
 209         | BT          Size Imm Operand
 210         | NOP
 211
 212         -- x86 Float Arithmetic.
 213         -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
 214         -- as single instructions right up until we spit them out.
 215         -- all the 3-operand fake fp insns are src1 src2 dst
 216         -- and furthermore are constrained to be fp regs only.
 217         -- IMPORTANT: keep is_G_insn up to date with any changes here
 218         | GMOV        Reg Reg -- src(fpreg), dst(fpreg)
 219         | GLD         Size AddrMode Reg -- src, dst(fpreg)
 220         | GST         Size Reg AddrMode -- src(fpreg), dst
 221
 222         | GLDZ        Reg -- dst(fpreg)
 223         | GLD1        Reg -- dst(fpreg)
 224
 225         | GFTOI       Reg Reg -- src(fpreg), dst(intreg)
 226         | GDTOI       Reg Reg -- src(fpreg), dst(intreg)
 227
 228         | GITOF       Reg Reg -- src(intreg), dst(fpreg)
 229         | GITOD       Reg Reg -- src(intreg), dst(fpreg)
 230
 231         | GDTOF       Reg Reg -- src(fpreg), dst(fpreg)
 232
 233         | GADD        Size Reg Reg Reg -- src1, src2, dst
 234         | GDIV        Size Reg Reg Reg -- src1, src2, dst
 235         | GSUB        Size Reg Reg Reg -- src1, src2, dst
 236         | GMUL        Size Reg Reg Reg -- src1, src2, dst
 237
 238                 -- FP compare.  Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT]
 239                 -- Compare src1 with src2; set the Zero flag iff the numbers are
 240                 -- comparable and the comparison is True.  Subsequent code must
 241                 -- test the %eflags zero flag regardless of the supplied Cond.
 242         | GCMP        Cond Reg Reg -- src1, src2
 243
 244         | GABS        Size Reg Reg -- src, dst
 245         | GNEG        Size Reg Reg -- src, dst
 246         | GSQRT       Size Reg Reg -- src, dst
 247         | GSIN        Size CLabel CLabel Reg Reg -- src, dst
 248         | GCOS        Size CLabel CLabel Reg Reg -- src, dst
 249         | GTAN        Size CLabel CLabel Reg Reg -- src, dst
 250
 251         | GFREE         -- do ffree on all x86 regs; an ugly hack
 252
 253
 254         -- SSE2 floating point: we use a restricted set of the available SSE2
 255         -- instructions for floating-point.
 256         -- use MOV for moving (either movss or movsd (movlpd better?))
 257         | CVTSS2SD      Reg Reg         -- F32 to F64
 258         | CVTSD2SS      Reg Reg         -- F64 to F32
 259         | CVTTSS2SIQ    Size Operand Reg -- F32 to I32/I64 (with truncation)
 260         | CVTTSD2SIQ    Size Operand Reg -- F64 to I32/I64 (with truncation)
 261         | CVTSI2SS      Size Operand Reg -- I32/I64 to F32
 262         | CVTSI2SD      Size Operand Reg -- I32/I64 to F64
 263
 264         -- use ADD & SUB for arithmetic.  In both cases, operands
 265         -- are  Operand Reg.
 266
 267         -- SSE2 floating-point division:
 268         | FDIV          Size Operand Operand   -- divisor, dividend(dst)
 269
 270         -- use CMP for comparisons.  ucomiss and ucomisd instructions
 271         -- compare single/double prec floating point respectively.
 272
 273         | SQRT          Size Operand Reg        -- src, dst
 274
 275
 276         -- Comparison
 277         | TEST          Size Operand Operand
 278         | CMP           Size Operand Operand
 279         | SETCC         Cond Operand
 280
 281         -- Stack Operations.
 282         | PUSH          Size Operand
 283         | POP           Size Operand
 284         -- both unused (SDM):
 285         --  | PUSHA
 286         --  | POPA
 287
 288         -- Jumping around.
 289         | JMP         Operand
 290         | JXX         Cond BlockId  -- includes unconditional branches
 291         | JXX_GBL     Cond Imm      -- non-local version of JXX
 292         -- Table jump
 293         | JMP_TBL     Operand   -- Address to jump to
 294                       [Maybe BlockId] -- Blocks in the jump table
 295                       Section   -- Data section jump table should be put in
 296                       CLabel    -- Label of jump table
 297         | CALL        (Either Imm Reg) [Reg]
 298
 299         -- Other things.
 300         | CLTD Size              -- sign extend %eax into %edx:%eax
 301
 302         | FETCHGOT    Reg        -- pseudo-insn for ELF position-independent code
 303                                  -- pretty-prints as
 304                                  --       call 1f
 305                                  -- 1:    popl %reg
 306                                  --       addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
 307         | FETCHPC     Reg        -- pseudo-insn for Darwin position-independent code
 308                                  -- pretty-prints as
 309                                  --       call 1f
 310                                  -- 1:    popl %reg
 311
 312
 313 data Operand
 314         = OpReg  Reg            -- register
 315         | OpImm  Imm            -- immediate value
 316         | OpAddr AddrMode       -- memory reference
 317
 318
 319
 320 x86_regUsageOfInstr :: Instr -> RegUsage
 321 x86_regUsageOfInstr instr
 322  = case instr of
 323     MOV    _ src dst    -> usageRW src dst
 324     MOVZxL _ src dst    -> usageRW src dst
 325     MOVSxL _ src dst    -> usageRW src dst
 326     LEA    _ src dst    -> usageRW src dst
 327     ADD    _ src dst    -> usageRM src dst
 328     ADC    _ src dst    -> usageRM src dst
 329     SUB    _ src dst    -> usageRM src dst
 330     IMUL   _ src dst    -> usageRM src dst
 331     IMUL2  _ src       -> mkRU (eax:use_R src) [eax,edx]
 332     MUL    _ src dst    -> usageRM src dst
 333     DIV    _ op -> mkRU (eax:edx:use_R op) [eax,edx]
 334     IDIV   _ op -> mkRU (eax:edx:use_R op) [eax,edx]
 335     AND    _ src dst    -> usageRM src dst
 336     OR     _ src dst    -> usageRM src dst
 337
 338     XOR    _ (OpReg src) (OpReg dst)
 339         | src == dst    -> mkRU [] [dst]
 340
 341     XOR    _ src dst    -> usageRM src dst
 342     NOT    _ op         -> usageM op
 343     NEGI   _ op         -> usageM op
 344     SHL    _ imm dst    -> usageRM imm dst
 345     SAR    _ imm dst    -> usageRM imm dst
 346     SHR    _ imm dst    -> usageRM imm dst
 347     BT     _ _   src    -> mkRUR (use_R src)
 348
 349     PUSH   _ op         -> mkRUR (use_R op)
 350     POP    _ op         -> mkRU [] (def_W op)
 351     TEST   _ src dst    -> mkRUR (use_R src ++ use_R dst)
 352     CMP    _ src dst    -> mkRUR (use_R src ++ use_R dst)
 353     SETCC  _ op         -> mkRU [] (def_W op)
 354     JXX    _ _          -> mkRU [] []
 355     JXX_GBL _ _         -> mkRU [] []
 356     JMP     op          -> mkRUR (use_R op)
 357     JMP_TBL op _ _ _    -> mkRUR (use_R op)
 358     CALL (Left _)  params   -> mkRU params callClobberedRegs
 359     CALL (Right reg) params -> mkRU (reg:params) callClobberedRegs
 360     CLTD   _            -> mkRU [eax] [edx]
 361     NOP                 -> mkRU [] []
 362
 363     GMOV   src dst      -> mkRU [src] [dst]
 364     GLD    _ src dst    -> mkRU (use_EA src) [dst]
 365     GST    _ src dst    -> mkRUR (src : use_EA dst)
 366
 367     GLDZ   dst          -> mkRU [] [dst]
 368     GLD1   dst          -> mkRU [] [dst]
 369
 370     GFTOI  src dst      -> mkRU [src] [dst]
 371     GDTOI  src dst      -> mkRU [src] [dst]
 372
 373     GITOF  src dst      -> mkRU [src] [dst]
 374     GITOD  src dst      -> mkRU [src] [dst]
 375
 376     GDTOF  src dst      -> mkRU [src] [dst]
 377
 378     GADD   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 379     GSUB   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 380     GMUL   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 381     GDIV   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 382
 383     GCMP   _ src1 src2   -> mkRUR [src1,src2]
 384     GABS   _ src dst     -> mkRU [src] [dst]
 385     GNEG   _ src dst     -> mkRU [src] [dst]
 386     GSQRT  _ src dst     -> mkRU [src] [dst]
 387     GSIN   _ _ _ src dst -> mkRU [src] [dst]
 388     GCOS   _ _ _ src dst -> mkRU [src] [dst]
 389     GTAN   _ _ _ src dst -> mkRU [src] [dst]
 390
 391     CVTSS2SD   src dst  -> mkRU [src] [dst]
 392     CVTSD2SS   src dst  -> mkRU [src] [dst]
 393     CVTTSS2SIQ _ src dst -> mkRU (use_R src) [dst]
 394     CVTTSD2SIQ _ src dst -> mkRU (use_R src) [dst]
 395     CVTSI2SS   _ src dst -> mkRU (use_R src) [dst]
 396     CVTSI2SD   _ src dst -> mkRU (use_R src) [dst]
 397     FDIV _     src dst  -> usageRM src dst
 398
 399     FETCHGOT reg        -> mkRU [] [reg]
 400     FETCHPC  reg        -> mkRU [] [reg]
 401
 402     COMMENT _           -> noUsage
 403     DELTA   _           -> noUsage
 404
 405     _other              -> panic "regUsage: unrecognised instr"
 406
 407  where
 408     -- 2 operand form; first operand Read; second Written
 409     usageRW :: Operand -> Operand -> RegUsage
 410     usageRW op (OpReg reg)      = mkRU (use_R op) [reg]
 411     usageRW op (OpAddr ea)      = mkRUR (use_R op ++ use_EA ea)
 412     usageRW _ _                 = panic "X86.RegInfo.usageRW: no match"
 413
 414     -- 2 operand form; first operand Read; second Modified
 415     usageRM :: Operand -> Operand -> RegUsage
 416     usageRM op (OpReg reg)      = mkRU (use_R op ++ [reg]) [reg]
 417     usageRM op (OpAddr ea)      = mkRUR (use_R op ++ use_EA ea)
 418     usageRM _ _                 = panic "X86.RegInfo.usageRM: no match"
 419
 420     -- 1 operand form; operand Modified
 421     usageM :: Operand -> RegUsage
 422     usageM (OpReg reg)          = mkRU [reg] [reg]
 423     usageM (OpAddr ea)          = mkRUR (use_EA ea)
 424     usageM _                    = panic "X86.RegInfo.usageM: no match"
 425
 426     -- Registers defd when an operand is written.
 427     def_W (OpReg reg)           = [reg]
 428     def_W (OpAddr _ )           = []
 429     def_W _                     = panic "X86.RegInfo.def_W: no match"
 430
 431     -- Registers used when an operand is read.
 432     use_R (OpReg reg)  = [reg]
 433     use_R (OpImm _)    = []
 434     use_R (OpAddr ea)  = use_EA ea
 435
 436     -- Registers used to compute an effective address.
 437     use_EA (ImmAddr _ _) = []
 438     use_EA (AddrBaseIndex base index _) =
 439         use_base base $! use_index index
 440         where use_base (EABaseReg r) x = r : x
 441               use_base _ x             = x
 442               use_index EAIndexNone   = []
 443               use_index (EAIndex i _) = [i]
 444
 445     mkRUR src = src' `seq` RU src' []
 446         where src' = filter interesting src
 447
 448     mkRU src dst = src' `seq` dst' `seq` RU src' dst'
 449         where src' = filter interesting src
 450               dst' = filter interesting dst
 451
 452 interesting :: Reg -> Bool
 453 interesting (RegVirtual _)              = True
 454 interesting (RegReal (RealRegSingle i)) = isFastTrue (freeReg i)
 455 interesting (RegReal (RealRegPair{}))   = panic "X86.interesting: no reg pairs on this arch"
 456
 457
 458
 459 x86_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
 460 x86_patchRegsOfInstr instr env
 461  = case instr of
 462     MOV  sz src dst     -> patch2 (MOV  sz) src dst
 463     MOVZxL sz src dst   -> patch2 (MOVZxL sz) src dst
 464     MOVSxL sz src dst   -> patch2 (MOVSxL sz) src dst
 465     LEA  sz src dst     -> patch2 (LEA  sz) src dst
 466     ADD  sz src dst     -> patch2 (ADD  sz) src dst
 467     ADC  sz src dst     -> patch2 (ADC  sz) src dst
 468     SUB  sz src dst     -> patch2 (SUB  sz) src dst
 469     IMUL sz src dst     -> patch2 (IMUL sz) src dst
 470     IMUL2 sz src        -> patch1 (IMUL2 sz) src
 471     MUL sz src dst      -> patch2 (MUL sz) src dst
 472     IDIV sz op          -> patch1 (IDIV sz) op
 473     DIV sz op           -> patch1 (DIV sz) op
 474     AND  sz src dst     -> patch2 (AND  sz) src dst
 475     OR   sz src dst     -> patch2 (OR   sz) src dst
 476     XOR  sz src dst     -> patch2 (XOR  sz) src dst
 477     NOT  sz op          -> patch1 (NOT  sz) op
 478     NEGI sz op          -> patch1 (NEGI sz) op
 479     SHL  sz imm dst     -> patch1 (SHL sz imm) dst
 480     SAR  sz imm dst     -> patch1 (SAR sz imm) dst
 481     SHR  sz imm dst     -> patch1 (SHR sz imm) dst
 482     BT   sz imm src     -> patch1 (BT  sz imm) src
 483     TEST sz src dst     -> patch2 (TEST sz) src dst
 484     CMP  sz src dst     -> patch2 (CMP  sz) src dst
 485     PUSH sz op          -> patch1 (PUSH sz) op
 486     POP  sz op          -> patch1 (POP  sz) op
 487     SETCC cond op       -> patch1 (SETCC cond) op
 488     JMP op              -> patch1 JMP op
 489     JMP_TBL op ids s lbl-> JMP_TBL (patchOp op) ids s lbl
 490
 491     GMOV src dst        -> GMOV (env src) (env dst)
 492     GLD  sz src dst     -> GLD sz (lookupAddr src) (env dst)
 493     GST  sz src dst     -> GST sz (env src) (lookupAddr dst)
 494
 495     GLDZ dst            -> GLDZ (env dst)
 496     GLD1 dst            -> GLD1 (env dst)
 497
 498     GFTOI src dst       -> GFTOI (env src) (env dst)
 499     GDTOI src dst       -> GDTOI (env src) (env dst)
 500
 501     GITOF src dst       -> GITOF (env src) (env dst)
 502     GITOD src dst       -> GITOD (env src) (env dst)
 503
 504     GDTOF src dst       -> GDTOF (env src) (env dst)
 505
 506     GADD sz s1 s2 dst   -> GADD sz (env s1) (env s2) (env dst)
 507     GSUB sz s1 s2 dst   -> GSUB sz (env s1) (env s2) (env dst)
 508     GMUL sz s1 s2 dst   -> GMUL sz (env s1) (env s2) (env dst)
 509     GDIV sz s1 s2 dst   -> GDIV sz (env s1) (env s2) (env dst)
 510
 511     GCMP sz src1 src2   -> GCMP sz (env src1) (env src2)
 512     GABS sz src dst     -> GABS sz (env src) (env dst)
 513     GNEG sz src dst     -> GNEG sz (env src) (env dst)
 514     GSQRT sz src dst    -> GSQRT sz (env src) (env dst)
 515     GSIN sz l1 l2 src dst       -> GSIN sz l1 l2 (env src) (env dst)
 516     GCOS sz l1 l2 src dst       -> GCOS sz l1 l2 (env src) (env dst)
 517     GTAN sz l1 l2 src dst       -> GTAN sz l1 l2 (env src) (env dst)
 518
 519     CVTSS2SD src dst    -> CVTSS2SD (env src) (env dst)
 520     CVTSD2SS src dst    -> CVTSD2SS (env src) (env dst)
 521     CVTTSS2SIQ sz src dst -> CVTTSS2SIQ sz (patchOp src) (env dst)
 522     CVTTSD2SIQ sz src dst -> CVTTSD2SIQ sz (patchOp src) (env dst)
 523     CVTSI2SS sz src dst -> CVTSI2SS sz (patchOp src) (env dst)
 524     CVTSI2SD sz src dst -> CVTSI2SD sz (patchOp src) (env dst)
 525     FDIV sz src dst     -> FDIV sz (patchOp src) (patchOp dst)
 526
 527     CALL (Left _)  _    -> instr
 528     CALL (Right reg) p  -> CALL (Right (env reg)) p
 529
 530     FETCHGOT reg        -> FETCHGOT (env reg)
 531     FETCHPC  reg        -> FETCHPC  (env reg)
 532
 533     NOP                 -> instr
 534     COMMENT _           -> instr
 535     DELTA _             -> instr
 536
 537     JXX _ _             -> instr
 538     JXX_GBL _ _         -> instr
 539     CLTD _              -> instr
 540
 541     _other              -> panic "patchRegs: unrecognised instr"
 542
 543   where
 544     patch1 :: (Operand -> a) -> Operand -> a
 545     patch1 insn op      = insn $! patchOp op
 546     patch2 :: (Operand -> Operand -> a) -> Operand -> Operand -> a
 547     patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
 548
 549     patchOp (OpReg  reg) = OpReg $! env reg
 550     patchOp (OpImm  imm) = OpImm imm
 551     patchOp (OpAddr ea)  = OpAddr $! lookupAddr ea
 552
 553     lookupAddr (ImmAddr imm off) = ImmAddr imm off
 554     lookupAddr (AddrBaseIndex base index disp)
 555       = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
 556       where
 557         lookupBase EABaseNone       = EABaseNone
 558         lookupBase EABaseRip        = EABaseRip
 559         lookupBase (EABaseReg r)    = EABaseReg (env r)
 560
 561         lookupIndex EAIndexNone     = EAIndexNone
 562         lookupIndex (EAIndex r i)   = EAIndex (env r) i
 563
 564
 565 --------------------------------------------------------------------------------
 566 x86_isJumpishInstr
 567         :: Instr -> Bool
 568
 569 x86_isJumpishInstr instr
 570  = case instr of
 571         JMP{}           -> True
 572         JXX{}           -> True
 573         JXX_GBL{}       -> True
 574         JMP_TBL{}       -> True
 575         CALL{}          -> True
 576         _               -> False
 577
 578
 579 x86_jumpDestsOfInstr
 580         :: Instr
 581         -> [BlockId]
 582
 583 x86_jumpDestsOfInstr insn
 584   = case insn of
 585         JXX _ id        -> [id]
 586         JMP_TBL _ ids _ _ -> [id | Just id <- ids]
 587         _               -> []
 588
 589
 590 x86_patchJumpInstr
 591         :: Instr -> (BlockId -> BlockId) -> Instr
 592
 593 x86_patchJumpInstr insn patchF
 594   = case insn of
 595         JXX cc id       -> JXX cc (patchF id)
 596         JMP_TBL op ids section lbl
 597           -> JMP_TBL op (map (fmap patchF) ids) section lbl
 598         _               -> insn
 599
 600
 601
 602
 603 -- -----------------------------------------------------------------------------
 604 -- | Make a spill instruction.
 605 x86_mkSpillInstr
 606         :: Reg          -- register to spill
 607         -> Int          -- current stack delta
 608         -> Int          -- spill slot to use
 609         -> Instr
 610
 611 x86_mkSpillInstr reg delta slot
 612   = let off     = spillSlotToOffset slot
 613     in
 614     let off_w = (off-delta) `div` IF_ARCH_i386(4,8)
 615     in case targetClassOfReg reg of
 616            RcInteger   -> MOV IF_ARCH_i386(II32,II64)
 617                               (OpReg reg) (OpAddr (spRel off_w))
 618            RcDouble    -> GST FF80 reg (spRel off_w) {- RcFloat/RcDouble -}
 619            RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel off_w))
 620            _         -> panic "X86.mkSpillInstr: no match"
 621
 622
 623 -- | Make a spill reload instruction.
 624 x86_mkLoadInstr
 625         :: Reg          -- register to load
 626         -> Int          -- current stack delta
 627         -> Int          -- spill slot to use
 628         -> Instr
 629
 630 x86_mkLoadInstr reg delta slot
 631   = let off     = spillSlotToOffset slot
 632     in
 633         let off_w = (off-delta) `div` IF_ARCH_i386(4,8)
 634         in case targetClassOfReg reg of
 635               RcInteger -> MOV IF_ARCH_i386(II32,II64)
 636                                (OpAddr (spRel off_w)) (OpReg reg)
 637               RcDouble  -> GLD FF80 (spRel off_w) reg {- RcFloat/RcDouble -}
 638               RcDoubleSSE -> MOV FF64 (OpAddr (spRel off_w)) (OpReg reg)
 639               _           -> panic "X86.x86_mkLoadInstr"
 640
 641 spillSlotSize :: Int
 642 spillSlotSize = IF_ARCH_i386(12, 8)
 643
 644 maxSpillSlots :: Int
 645 maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1
 646
 647 -- convert a spill slot number to a *byte* offset, with no sign:
 648 -- decide on a per arch basis whether you are spilling above or below
 649 -- the C stack pointer.
 650 spillSlotToOffset :: Int -> Int
 651 spillSlotToOffset slot
 652    | slot >= 0 && slot < maxSpillSlots
 653    = 64 + spillSlotSize * slot
 654    | otherwise
 655    = pprPanic "spillSlotToOffset:"
 656               (   text "invalid spill location: " <> int slot
 657               $$  text "maxSpillSlots:          " <> int maxSpillSlots)
 658
 659 --------------------------------------------------------------------------------
 660
 661 -- | See if this instruction is telling us the current C stack delta
 662 x86_takeDeltaInstr
 663         :: Instr
 664         -> Maybe Int
 665
 666 x86_takeDeltaInstr instr
 667  = case instr of
 668         DELTA i         -> Just i
 669         _               -> Nothing
 670
 671
 672 x86_isMetaInstr
 673         :: Instr
 674         -> Bool
 675
 676 x86_isMetaInstr instr
 677  = case instr of
 678         COMMENT{}       -> True
 679         LDATA{}         -> True
 680         NEWBLOCK{}      -> True
 681         DELTA{}         -> True
 682         _               -> False
 683
 684
 685
 686 -- | Make a reg-reg move instruction.
 687 --      On SPARC v8 there are no instructions to move directly between
 688 --      floating point and integer regs. If we need to do that then we
 689 --      have to go via memory.
 690 --
 691 x86_mkRegRegMoveInstr
 692         :: Reg
 693         -> Reg
 694         -> Instr
 695
 696 x86_mkRegRegMoveInstr src dst
 697  = case targetClassOfReg src of
 698 #if   i386_TARGET_ARCH
 699         RcInteger -> MOV II32 (OpReg src) (OpReg dst)
 700 #else
 701         RcInteger -> MOV II64 (OpReg src) (OpReg dst)
 702 #endif
 703         RcDouble    -> GMOV src dst
 704         RcDoubleSSE -> MOV FF64 (OpReg src) (OpReg dst)
 705         _     -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
 706
 707 -- | Check whether an instruction represents a reg-reg move.
 708 --      The register allocator attempts to eliminate reg->reg moves whenever it can,
 709 --      by assigning the src and dest temporaries to the same real register.
 710 --
 711 x86_takeRegRegMoveInstr
 712         :: Instr
 713         -> Maybe (Reg,Reg)
 714
 715 x86_takeRegRegMoveInstr (MOV _ (OpReg r1) (OpReg r2))
 716         = Just (r1,r2)
 717
 718 x86_takeRegRegMoveInstr _  = Nothing
 719
 720
 721 -- | Make an unconditional branch instruction.
 722 x86_mkJumpInstr
 723         :: BlockId
 724         -> [Instr]
 725
 726 x86_mkJumpInstr id
 727         = [JXX ALWAYS id]
 728
 729
 730
 731
 732
 733 i386_insert_ffrees
 734         :: [GenBasicBlock Instr]
 735         -> [GenBasicBlock Instr]
 736
 737 i386_insert_ffrees blocks
 738    | or (map (any is_G_instr) [ instrs | BasicBlock _ instrs <- blocks ])
 739    = map ffree_before_nonlocal_transfers blocks
 740
 741    | otherwise
 742    = blocks
 743   where
 744    ffree_before_nonlocal_transfers (BasicBlock id insns)
 745      = BasicBlock id (foldr p [] insns)
 746      where p insn r = case insn of
 747                         CALL _ _ -> GFREE : insn : r
 748                         JMP _    -> GFREE : insn : r
 749                         JXX_GBL _ _ -> panic "i386_insert_ffrees: cannot handle JXX_GBL"
 750                         _        -> insn : r
 751
 752 -- if you ever add a new FP insn to the fake x86 FP insn set,
 753 -- you must update this too
 754 is_G_instr :: Instr -> Bool
 755 is_G_instr instr
 756    = case instr of
 757         GMOV{}          -> True
 758         GLD{}           -> True
 759         GST{}           -> True
 760         GLDZ{}          -> True
 761         GLD1{}          -> True
 762         GFTOI{}         -> True
 763         GDTOI{}         -> True
 764         GITOF{}         -> True
 765         GITOD{}         -> True
 766         GDTOF{}         -> True
 767         GADD{}          -> True
 768         GDIV{}          -> True
 769         GSUB{}          -> True
 770         GMUL{}          -> True
 771         GCMP{}          -> True
 772         GABS{}          -> True
 773         GNEG{}          -> True
 774         GSQRT{}         -> True
 775         GSIN{}          -> True
 776         GCOS{}          -> True
 777         GTAN{}          -> True
 778         GFREE           -> panic "is_G_instr: GFREE (!)"
 779         _               -> False
 780
 781
 782 data JumpDest = DestBlockId BlockId | DestImm Imm
 783
 784 getJumpDestBlockId :: JumpDest -> Maybe BlockId
 785 getJumpDestBlockId (DestBlockId bid) = Just bid
 786 getJumpDestBlockId _                 = Nothing
 787
 788 canShortcut :: Instr -> Maybe JumpDest
 789 canShortcut (JXX ALWAYS id)    = Just (DestBlockId id)
 790 canShortcut (JMP (OpImm imm))  = Just (DestImm imm)
 791 canShortcut _                  = Nothing
 792
 793
 794 -- This helper shortcuts a sequence of branches.
 795 -- The blockset helps avoid following cycles.
 796 shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
 797 shortcutJump fn insn = shortcutJump' fn (setEmpty :: BlockSet) insn
 798   where shortcutJump' fn seen insn@(JXX cc id) =
 799           if setMember id seen then insn
 800           else case fn id of
 801                  Nothing                -> insn
 802                  Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id')
 803                  Just (DestImm imm)     -> shortcutJump' fn seen' (JXX_GBL cc imm)
 804                where seen' = setInsert id seen
 805         shortcutJump' _ _ other = other
 806
 807 -- Here because it knows about JumpDest
 808 shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
 809 shortcutStatic fn (CmmStaticLit (CmmLabel lab))
 810   | Just uq <- maybeAsmTemp lab
 811   = CmmStaticLit (CmmLabel (shortBlockId fn emptyUniqSet (mkBlockId uq)))
 812 shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
 813   | Just uq <- maybeAsmTemp lbl1
 814   = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn emptyUniqSet (mkBlockId uq)) lbl2 off)
 815         -- slightly dodgy, we're ignoring the second label, but this
 816         -- works with the way we use CmmLabelDiffOff for jump tables now.
 817
 818 shortcutStatic _ other_static
 819         = other_static
 820
 821 shortBlockId
 822         :: (BlockId -> Maybe JumpDest)
 823         -> UniqSet Unique
 824         -> BlockId
 825         -> CLabel
 826
 827 shortBlockId fn seen blockid =
 828   case (elementOfUniqSet uq seen, fn blockid) of
 829     (True, _)    -> mkAsmTempLabel uq
 830     (_, Nothing) -> mkAsmTempLabel uq
 831     (_, Just (DestBlockId blockid'))  -> shortBlockId fn (addOneToUniqSet seen uq) blockid'
 832     (_, Just (DestImm (ImmCLbl lbl))) -> lbl
 833     (_, _other) -> panic "shortBlockId"
 834   where uq = getUnique blockid