compiler/nativeGen/X86/Instr.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Machine-dependent assembly language
   4 --
   5 -- (c) The University of Glasgow 1993-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 #include "HsVersions.h"
  10 #include "nativeGen/NCG.h"
  11
  12 module X86.Instr
  13 where
  14
  15 import X86.Cond
  16 import X86.Regs
  17 import Instruction
  18 import Size
  19 import RegClass
  20 import Reg
  21 import TargetReg
  22
  23 import BlockId
  24 import Cmm
  25 import FastString
  26 import FastBool
  27 import Outputable
  28 import Constants        (rESERVED_C_STACK_BYTES)
  29
  30 import CLabel
  31 import UniqSet
  32 import Unique
  33
  34 -- Size of a PPC memory address, in bytes.
  35 --
  36 archWordSize :: Size
  37 #if i386_TARGET_ARCH
  38 archWordSize    = II32
  39 #elif x86_64_TARGET_ARCH
  40 archWordSize    = II64
  41 #else
  42 archWordSize    = panic "X86.Instr.archWordSize: not defined"
  43 #endif
  44
  45 -- | Instruction instance for x86 instruction set.
  46 instance Instruction Instr where
  47         regUsageOfInstr         = x86_regUsageOfInstr
  48         patchRegsOfInstr        = x86_patchRegsOfInstr
  49         isJumpishInstr          = x86_isJumpishInstr
  50         jumpDestsOfInstr        = x86_jumpDestsOfInstr
  51         patchJumpInstr          = x86_patchJumpInstr
  52         mkSpillInstr            = x86_mkSpillInstr
  53         mkLoadInstr             = x86_mkLoadInstr
  54         takeDeltaInstr          = x86_takeDeltaInstr
  55         isMetaInstr             = x86_isMetaInstr
  56         mkRegRegMoveInstr       = x86_mkRegRegMoveInstr
  57         takeRegRegMoveInstr     = x86_takeRegRegMoveInstr
  58         mkJumpInstr             = x86_mkJumpInstr
  59
  60
  61 -- -----------------------------------------------------------------------------
  62 -- Intel x86 instructions
  63
  64 {-
  65 Intel, in their infinite wisdom, selected a stack model for floating
  66 point registers on x86.  That might have made sense back in 1979 --
  67 nowadays we can see it for the nonsense it really is.  A stack model
  68 fits poorly with the existing nativeGen infrastructure, which assumes
  69 flat integer and FP register sets.  Prior to this commit, nativeGen
  70 could not generate correct x86 FP code -- to do so would have meant
  71 somehow working the register-stack paradigm into the register
  72 allocator and spiller, which sounds very difficult.
  73
  74 We have decided to cheat, and go for a simple fix which requires no
  75 infrastructure modifications, at the expense of generating ropey but
  76 correct FP code.  All notions of the x86 FP stack and its insns have
  77 been removed.  Instead, we pretend (to the instruction selector and
  78 register allocator) that x86 has six floating point registers, %fake0
  79 .. %fake5, which can be used in the usual flat manner.  We further
  80 claim that x86 has floating point instructions very similar to SPARC
  81 and Alpha, that is, a simple 3-operand register-register arrangement.
  82 Code generation and register allocation proceed on this basis.
  83
  84 When we come to print out the final assembly, our convenient fiction
  85 is converted to dismal reality.  Each fake instruction is
  86 independently converted to a series of real x86 instructions.
  87 %fake0 .. %fake5 are mapped to %st(0) .. %st(5).  To do reg-reg
  88 arithmetic operations, the two operands are pushed onto the top of the
  89 FP stack, the operation done, and the result copied back into the
  90 relevant register.  There are only six %fake registers because 2 are
  91 needed for the translation, and x86 has 8 in total.
  92
  93 The translation is inefficient but is simple and it works.  A cleverer
  94 translation would handle a sequence of insns, simulating the FP stack
  95 contents, would not impose a fixed mapping from %fake to %st regs, and
  96 hopefully could avoid most of the redundant reg-reg moves of the
  97 current translation.
  98
  99 We might as well make use of whatever unique FP facilities Intel have
 100 chosen to bless us with (let's not be churlish, after all).
 101 Hence GLDZ and GLD1.  Bwahahahahahahaha!
 102 -}
 103
 104 {-
 105 Note [x86 Floating point precision]
 106
 107 Intel's internal floating point registers are by default 80 bit
 108 extended precision.  This means that all operations done on values in
 109 registers are done at 80 bits, and unless the intermediate values are
 110 truncated to the appropriate size (32 or 64 bits) by storing in
 111 memory, calculations in registers will give different results from
 112 calculations which pass intermediate values in memory (eg. via
 113 function calls).
 114
 115 One solution is to set the FPU into 64 bit precision mode.  Some OSs
 116 do this (eg. FreeBSD) and some don't (eg. Linux).  The problem here is
 117 that this will only affect 64-bit precision arithmetic; 32-bit
 118 calculations will still be done at 64-bit precision in registers.  So
 119 it doesn't solve the whole problem.
 120
 121 There's also the issue of what the C library is expecting in terms of
 122 precision.  It seems to be the case that glibc on Linux expects the
 123 FPU to be set to 80 bit precision, so setting it to 64 bit could have
 124 unexpected effects.  Changing the default could have undesirable
 125 effects on other 3rd-party library code too, so the right thing would
 126 be to save/restore the FPU control word across Haskell code if we were
 127 to do this.
 128
 129 gcc's -ffloat-store gives consistent results by always storing the
 130 results of floating-point calculations in memory, which works for both
 131 32 and 64-bit precision.  However, it only affects the values of
 132 user-declared floating point variables in C, not intermediate results.
 133 GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
 134 flag).
 135
 136 Another problem is how to spill floating point registers in the
 137 register allocator.  Should we spill the whole 80 bits, or just 64?
 138 On an OS which is set to 64 bit precision, spilling 64 is fine.  On
 139 Linux, spilling 64 bits will round the results of some operations.
 140 This is what gcc does.  Spilling at 80 bits requires taking up a full
 141 128 bit slot (so we get alignment).  We spill at 80-bits and ignore
 142 the alignment problems.
 143
 144 In the future [edit: now available in GHC 7.0.1, with the -msse2
 145 flag], we'll use the SSE registers for floating point.  This requires
 146 a CPU that supports SSE2 (ordinary SSE only supports 32 bit precision
 147 float ops), which means P4 or Xeon and above.  Using SSE will solve
 148 all these problems, because the SSE registers use fixed 32 bit or 64
 149 bit precision.
 150
 151 --SDM 1/2003
 152 -}
 153
 154
 155 data Instr
 156         -- comment pseudo-op
 157         = COMMENT FastString
 158
 159         -- some static data spat out during code
 160         -- generation.  Will be extracted before
 161         -- pretty-printing.
 162         | LDATA   Section [CmmStatic]
 163
 164         -- start a new basic block.  Useful during
 165         -- codegen, removed later.  Preceding
 166         -- instruction should be a jump, as per the
 167         -- invariants for a BasicBlock (see Cmm).
 168         | NEWBLOCK BlockId
 169
 170         -- specify current stack offset for
 171         -- benefit of subsequent passes
 172         | DELTA   Int
 173
 174         -- Moves.
 175         | MOV         Size Operand Operand
 176         | MOVZxL      Size Operand Operand -- size is the size of operand 1
 177         | MOVSxL      Size Operand Operand -- size is the size of operand 1
 178         -- x86_64 note: plain mov into a 32-bit register always zero-extends
 179         -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
 180         -- don't affect the high bits of the register.
 181
 182         -- Load effective address (also a very useful three-operand add instruction :-)
 183         | LEA         Size Operand Operand
 184
 185         -- Int Arithmetic.
 186         | ADD         Size Operand Operand
 187         | ADC         Size Operand Operand
 188         | SUB         Size Operand Operand
 189
 190         | MUL         Size Operand Operand
 191         | IMUL        Size Operand Operand      -- signed int mul
 192         | IMUL2       Size Operand              -- %edx:%eax = operand * %eax
 193
 194         | DIV         Size Operand              -- eax := eax:edx/op, edx := eax:edx%op
 195         | IDIV        Size Operand              -- ditto, but signed
 196
 197         -- Simple bit-twiddling.
 198         | AND         Size Operand Operand
 199         | OR          Size Operand Operand
 200         | XOR         Size Operand Operand
 201         | NOT         Size Operand
 202         | NEGI        Size Operand              -- NEG instruction (name clash with Cond)
 203
 204         -- Shifts (amount may be immediate or %cl only)
 205         | SHL         Size Operand{-amount-} Operand
 206         | SAR         Size Operand{-amount-} Operand
 207         | SHR         Size Operand{-amount-} Operand
 208
 209         | BT          Size Imm Operand
 210         | NOP
 211
 212         -- x86 Float Arithmetic.
 213         -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
 214         -- as single instructions right up until we spit them out.
 215         -- all the 3-operand fake fp insns are src1 src2 dst
 216         -- and furthermore are constrained to be fp regs only.
 217         -- IMPORTANT: keep is_G_insn up to date with any changes here
 218         | GMOV        Reg Reg -- src(fpreg), dst(fpreg)
 219         | GLD         Size AddrMode Reg -- src, dst(fpreg)
 220         | GST         Size Reg AddrMode -- src(fpreg), dst
 221
 222         | GLDZ        Reg -- dst(fpreg)
 223         | GLD1        Reg -- dst(fpreg)
 224
 225         | GFTOI       Reg Reg -- src(fpreg), dst(intreg)
 226         | GDTOI       Reg Reg -- src(fpreg), dst(intreg)
 227
 228         | GITOF       Reg Reg -- src(intreg), dst(fpreg)
 229         | GITOD       Reg Reg -- src(intreg), dst(fpreg)
 230
 231         | GADD        Size Reg Reg Reg -- src1, src2, dst
 232         | GDIV        Size Reg Reg Reg -- src1, src2, dst
 233         | GSUB        Size Reg Reg Reg -- src1, src2, dst
 234         | GMUL        Size Reg Reg Reg -- src1, src2, dst
 235
 236                 -- FP compare.  Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT]
 237                 -- Compare src1 with src2; set the Zero flag iff the numbers are
 238                 -- comparable and the comparison is True.  Subsequent code must
 239                 -- test the %eflags zero flag regardless of the supplied Cond.
 240         | GCMP        Cond Reg Reg -- src1, src2
 241
 242         | GABS        Size Reg Reg -- src, dst
 243         | GNEG        Size Reg Reg -- src, dst
 244         | GSQRT       Size Reg Reg -- src, dst
 245         | GSIN        Size CLabel CLabel Reg Reg -- src, dst
 246         | GCOS        Size CLabel CLabel Reg Reg -- src, dst
 247         | GTAN        Size CLabel CLabel Reg Reg -- src, dst
 248
 249         | GFREE         -- do ffree on all x86 regs; an ugly hack
 250
 251
 252         -- SSE2 floating point: we use a restricted set of the available SSE2
 253         -- instructions for floating-point.
 254         -- use MOV for moving (either movss or movsd (movlpd better?))
 255         | CVTSS2SD      Reg Reg         -- F32 to F64
 256         | CVTSD2SS      Reg Reg         -- F64 to F32
 257         | CVTTSS2SIQ    Size Operand Reg -- F32 to I32/I64 (with truncation)
 258         | CVTTSD2SIQ    Size Operand Reg -- F64 to I32/I64 (with truncation)
 259         | CVTSI2SS      Size Operand Reg -- I32/I64 to F32
 260         | CVTSI2SD      Size Operand Reg -- I32/I64 to F64
 261
 262         -- use ADD & SUB for arithmetic.  In both cases, operands
 263         -- are  Operand Reg.
 264
 265         -- SSE2 floating-point division:
 266         | FDIV          Size Operand Operand   -- divisor, dividend(dst)
 267
 268         -- use CMP for comparisons.  ucomiss and ucomisd instructions
 269         -- compare single/double prec floating point respectively.
 270
 271         | SQRT          Size Operand Reg        -- src, dst
 272
 273
 274         -- Comparison
 275         | TEST          Size Operand Operand
 276         | CMP           Size Operand Operand
 277         | SETCC         Cond Operand
 278
 279         -- Stack Operations.
 280         | PUSH          Size Operand
 281         | POP           Size Operand
 282         -- both unused (SDM):
 283         --  | PUSHA
 284         --  | POPA
 285
 286         -- Jumping around.
 287         | JMP         Operand
 288         | JXX         Cond BlockId  -- includes unconditional branches
 289         | JXX_GBL     Cond Imm      -- non-local version of JXX
 290         | JMP_TBL     Operand [BlockId]  -- table jump
 291         | CALL        (Either Imm Reg) [Reg]
 292
 293         -- Other things.
 294         | CLTD Size              -- sign extend %eax into %edx:%eax
 295
 296         | FETCHGOT    Reg        -- pseudo-insn for ELF position-independent code
 297                                  -- pretty-prints as
 298                                  --       call 1f
 299                                  -- 1:    popl %reg
 300                                  --       addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
 301         | FETCHPC     Reg        -- pseudo-insn for Darwin position-independent code
 302                                  -- pretty-prints as
 303                                  --       call 1f
 304                                  -- 1:    popl %reg
 305
 306
 307 data Operand
 308         = OpReg  Reg            -- register
 309         | OpImm  Imm            -- immediate value
 310         | OpAddr AddrMode       -- memory reference
 311
 312
 313
 314 x86_regUsageOfInstr :: Instr -> RegUsage
 315 x86_regUsageOfInstr instr
 316  = case instr of
 317     MOV    _ src dst    -> usageRW src dst
 318     MOVZxL _ src dst    -> usageRW src dst
 319     MOVSxL _ src dst    -> usageRW src dst
 320     LEA    _ src dst    -> usageRW src dst
 321     ADD    _ src dst    -> usageRM src dst
 322     ADC    _ src dst    -> usageRM src dst
 323     SUB    _ src dst    -> usageRM src dst
 324     IMUL   _ src dst    -> usageRM src dst
 325     IMUL2  _ src       -> mkRU (eax:use_R src) [eax,edx]
 326     MUL    _ src dst    -> usageRM src dst
 327     DIV    _ op -> mkRU (eax:edx:use_R op) [eax,edx]
 328     IDIV   _ op -> mkRU (eax:edx:use_R op) [eax,edx]
 329     AND    _ src dst    -> usageRM src dst
 330     OR     _ src dst    -> usageRM src dst
 331
 332     XOR    _ (OpReg src) (OpReg dst)
 333         | src == dst    -> mkRU [] [dst]
 334
 335     XOR    _ src dst    -> usageRM src dst
 336     NOT    _ op         -> usageM op
 337     NEGI   _ op         -> usageM op
 338     SHL    _ imm dst    -> usageRM imm dst
 339     SAR    _ imm dst    -> usageRM imm dst
 340     SHR    _ imm dst    -> usageRM imm dst
 341     BT     _ _   src    -> mkRUR (use_R src)
 342
 343     PUSH   _ op         -> mkRUR (use_R op)
 344     POP    _ op         -> mkRU [] (def_W op)
 345     TEST   _ src dst    -> mkRUR (use_R src ++ use_R dst)
 346     CMP    _ src dst    -> mkRUR (use_R src ++ use_R dst)
 347     SETCC  _ op         -> mkRU [] (def_W op)
 348     JXX    _ _          -> mkRU [] []
 349     JXX_GBL _ _         -> mkRU [] []
 350     JMP     op          -> mkRUR (use_R op)
 351     JMP_TBL op _        -> mkRUR (use_R op)
 352     CALL (Left _)  params   -> mkRU params callClobberedRegs
 353     CALL (Right reg) params -> mkRU (reg:params) callClobberedRegs
 354     CLTD   _            -> mkRU [eax] [edx]
 355     NOP                 -> mkRU [] []
 356
 357     GMOV   src dst      -> mkRU [src] [dst]
 358     GLD    _ src dst    -> mkRU (use_EA src) [dst]
 359     GST    _ src dst    -> mkRUR (src : use_EA dst)
 360
 361     GLDZ   dst          -> mkRU [] [dst]
 362     GLD1   dst          -> mkRU [] [dst]
 363
 364     GFTOI  src dst      -> mkRU [src] [dst]
 365     GDTOI  src dst      -> mkRU [src] [dst]
 366
 367     GITOF  src dst      -> mkRU [src] [dst]
 368     GITOD  src dst      -> mkRU [src] [dst]
 369
 370     GADD   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 371     GSUB   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 372     GMUL   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 373     GDIV   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 374
 375     GCMP   _ src1 src2   -> mkRUR [src1,src2]
 376     GABS   _ src dst     -> mkRU [src] [dst]
 377     GNEG   _ src dst     -> mkRU [src] [dst]
 378     GSQRT  _ src dst     -> mkRU [src] [dst]
 379     GSIN   _ _ _ src dst -> mkRU [src] [dst]
 380     GCOS   _ _ _ src dst -> mkRU [src] [dst]
 381     GTAN   _ _ _ src dst -> mkRU [src] [dst]
 382
 383     CVTSS2SD   src dst  -> mkRU [src] [dst]
 384     CVTSD2SS   src dst  -> mkRU [src] [dst]
 385     CVTTSS2SIQ _ src dst -> mkRU (use_R src) [dst]
 386     CVTTSD2SIQ _ src dst -> mkRU (use_R src) [dst]
 387     CVTSI2SS   _ src dst -> mkRU (use_R src) [dst]
 388     CVTSI2SD   _ src dst -> mkRU (use_R src) [dst]
 389     FDIV _     src dst  -> usageRM src dst
 390
 391     FETCHGOT reg        -> mkRU [] [reg]
 392     FETCHPC  reg        -> mkRU [] [reg]
 393
 394     COMMENT _           -> noUsage
 395     DELTA   _           -> noUsage
 396
 397     _other              -> panic "regUsage: unrecognised instr"
 398
 399  where
 400     -- 2 operand form; first operand Read; second Written
 401     usageRW :: Operand -> Operand -> RegUsage
 402     usageRW op (OpReg reg)      = mkRU (use_R op) [reg]
 403     usageRW op (OpAddr ea)      = mkRUR (use_R op ++ use_EA ea)
 404     usageRW _ _                 = panic "X86.RegInfo.usageRW: no match"
 405
 406     -- 2 operand form; first operand Read; second Modified
 407     usageRM :: Operand -> Operand -> RegUsage
 408     usageRM op (OpReg reg)      = mkRU (use_R op ++ [reg]) [reg]
 409     usageRM op (OpAddr ea)      = mkRUR (use_R op ++ use_EA ea)
 410     usageRM _ _                 = panic "X86.RegInfo.usageRM: no match"
 411
 412     -- 1 operand form; operand Modified
 413     usageM :: Operand -> RegUsage
 414     usageM (OpReg reg)          = mkRU [reg] [reg]
 415     usageM (OpAddr ea)          = mkRUR (use_EA ea)
 416     usageM _                    = panic "X86.RegInfo.usageM: no match"
 417
 418     -- Registers defd when an operand is written.
 419     def_W (OpReg reg)           = [reg]
 420     def_W (OpAddr _ )           = []
 421     def_W _                     = panic "X86.RegInfo.def_W: no match"
 422
 423     -- Registers used when an operand is read.
 424     use_R (OpReg reg)  = [reg]
 425     use_R (OpImm _)    = []
 426     use_R (OpAddr ea)  = use_EA ea
 427
 428     -- Registers used to compute an effective address.
 429     use_EA (ImmAddr _ _) = []
 430     use_EA (AddrBaseIndex base index _) =
 431         use_base base $! use_index index
 432         where use_base (EABaseReg r) x = r : x
 433               use_base _ x             = x
 434               use_index EAIndexNone   = []
 435               use_index (EAIndex i _) = [i]
 436
 437     mkRUR src = src' `seq` RU src' []
 438         where src' = filter interesting src
 439
 440     mkRU src dst = src' `seq` dst' `seq` RU src' dst'
 441         where src' = filter interesting src
 442               dst' = filter interesting dst
 443
 444 interesting :: Reg -> Bool
 445 interesting (RegVirtual _)              = True
 446 interesting (RegReal (RealRegSingle i)) = isFastTrue (freeReg i)
 447 interesting (RegReal (RealRegPair{}))   = panic "X86.interesting: no reg pairs on this arch"
 448
 449
 450
 451 x86_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
 452 x86_patchRegsOfInstr instr env
 453  = case instr of
 454     MOV  sz src dst     -> patch2 (MOV  sz) src dst
 455     MOVZxL sz src dst   -> patch2 (MOVZxL sz) src dst
 456     MOVSxL sz src dst   -> patch2 (MOVSxL sz) src dst
 457     LEA  sz src dst     -> patch2 (LEA  sz) src dst
 458     ADD  sz src dst     -> patch2 (ADD  sz) src dst
 459     ADC  sz src dst     -> patch2 (ADC  sz) src dst
 460     SUB  sz src dst     -> patch2 (SUB  sz) src dst
 461     IMUL sz src dst     -> patch2 (IMUL sz) src dst
 462     IMUL2 sz src        -> patch1 (IMUL2 sz) src
 463     MUL sz src dst      -> patch2 (MUL sz) src dst
 464     IDIV sz op          -> patch1 (IDIV sz) op
 465     DIV sz op           -> patch1 (DIV sz) op
 466     AND  sz src dst     -> patch2 (AND  sz) src dst
 467     OR   sz src dst     -> patch2 (OR   sz) src dst
 468     XOR  sz src dst     -> patch2 (XOR  sz) src dst
 469     NOT  sz op          -> patch1 (NOT  sz) op
 470     NEGI sz op          -> patch1 (NEGI sz) op
 471     SHL  sz imm dst     -> patch1 (SHL sz imm) dst
 472     SAR  sz imm dst     -> patch1 (SAR sz imm) dst
 473     SHR  sz imm dst     -> patch1 (SHR sz imm) dst
 474     BT   sz imm src     -> patch1 (BT  sz imm) src
 475     TEST sz src dst     -> patch2 (TEST sz) src dst
 476     CMP  sz src dst     -> patch2 (CMP  sz) src dst
 477     PUSH sz op          -> patch1 (PUSH sz) op
 478     POP  sz op          -> patch1 (POP  sz) op
 479     SETCC cond op       -> patch1 (SETCC cond) op
 480     JMP op              -> patch1 JMP op
 481     JMP_TBL op ids      -> patch1 JMP_TBL op $ ids
 482
 483     GMOV src dst        -> GMOV (env src) (env dst)
 484     GLD  sz src dst     -> GLD sz (lookupAddr src) (env dst)
 485     GST  sz src dst     -> GST sz (env src) (lookupAddr dst)
 486
 487     GLDZ dst            -> GLDZ (env dst)
 488     GLD1 dst            -> GLD1 (env dst)
 489
 490     GFTOI src dst       -> GFTOI (env src) (env dst)
 491     GDTOI src dst       -> GDTOI (env src) (env dst)
 492
 493     GITOF src dst       -> GITOF (env src) (env dst)
 494     GITOD src dst       -> GITOD (env src) (env dst)
 495
 496     GADD sz s1 s2 dst   -> GADD sz (env s1) (env s2) (env dst)
 497     GSUB sz s1 s2 dst   -> GSUB sz (env s1) (env s2) (env dst)
 498     GMUL sz s1 s2 dst   -> GMUL sz (env s1) (env s2) (env dst)
 499     GDIV sz s1 s2 dst   -> GDIV sz (env s1) (env s2) (env dst)
 500
 501     GCMP sz src1 src2   -> GCMP sz (env src1) (env src2)
 502     GABS sz src dst     -> GABS sz (env src) (env dst)
 503     GNEG sz src dst     -> GNEG sz (env src) (env dst)
 504     GSQRT sz src dst    -> GSQRT sz (env src) (env dst)
 505     GSIN sz l1 l2 src dst       -> GSIN sz l1 l2 (env src) (env dst)
 506     GCOS sz l1 l2 src dst       -> GCOS sz l1 l2 (env src) (env dst)
 507     GTAN sz l1 l2 src dst       -> GTAN sz l1 l2 (env src) (env dst)
 508
 509     CVTSS2SD src dst    -> CVTSS2SD (env src) (env dst)
 510     CVTSD2SS src dst    -> CVTSD2SS (env src) (env dst)
 511     CVTTSS2SIQ sz src dst -> CVTTSS2SIQ sz (patchOp src) (env dst)
 512     CVTTSD2SIQ sz src dst -> CVTTSD2SIQ sz (patchOp src) (env dst)
 513     CVTSI2SS sz src dst -> CVTSI2SS sz (patchOp src) (env dst)
 514     CVTSI2SD sz src dst -> CVTSI2SD sz (patchOp src) (env dst)
 515     FDIV sz src dst     -> FDIV sz (patchOp src) (patchOp dst)
 516
 517     CALL (Left _)  _    -> instr
 518     CALL (Right reg) p  -> CALL (Right (env reg)) p
 519
 520     FETCHGOT reg        -> FETCHGOT (env reg)
 521     FETCHPC  reg        -> FETCHPC  (env reg)
 522
 523     NOP                 -> instr
 524     COMMENT _           -> instr
 525     DELTA _             -> instr
 526
 527     JXX _ _             -> instr
 528     JXX_GBL _ _         -> instr
 529     CLTD _              -> instr
 530
 531     _other              -> panic "patchRegs: unrecognised instr"
 532
 533   where
 534     patch1 :: (Operand -> a) -> Operand -> a
 535     patch1 insn op      = insn $! patchOp op
 536     patch2 :: (Operand -> Operand -> a) -> Operand -> Operand -> a
 537     patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
 538
 539     patchOp (OpReg  reg) = OpReg $! env reg
 540     patchOp (OpImm  imm) = OpImm imm
 541     patchOp (OpAddr ea)  = OpAddr $! lookupAddr ea
 542
 543     lookupAddr (ImmAddr imm off) = ImmAddr imm off
 544     lookupAddr (AddrBaseIndex base index disp)
 545       = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
 546       where
 547         lookupBase EABaseNone       = EABaseNone
 548         lookupBase EABaseRip        = EABaseRip
 549         lookupBase (EABaseReg r)    = EABaseReg (env r)
 550
 551         lookupIndex EAIndexNone     = EAIndexNone
 552         lookupIndex (EAIndex r i)   = EAIndex (env r) i
 553
 554
 555 --------------------------------------------------------------------------------
 556 x86_isJumpishInstr
 557         :: Instr -> Bool
 558
 559 x86_isJumpishInstr instr
 560  = case instr of
 561         JMP{}           -> True
 562         JXX{}           -> True
 563         JXX_GBL{}       -> True
 564         JMP_TBL{}       -> True
 565         CALL{}          -> True
 566         _               -> False
 567
 568
 569 x86_jumpDestsOfInstr
 570         :: Instr
 571         -> [BlockId]
 572
 573 x86_jumpDestsOfInstr insn
 574   = case insn of
 575         JXX _ id        -> [id]
 576         JMP_TBL _ ids   -> ids
 577         _               -> []
 578
 579
 580 x86_patchJumpInstr
 581         :: Instr -> (BlockId -> BlockId) -> Instr
 582
 583 x86_patchJumpInstr insn patchF
 584   = case insn of
 585         JXX cc id       -> JXX cc (patchF id)
 586         JMP_TBL _ _     -> error "Cannot patch JMP_TBL"
 587         _               -> insn
 588
 589
 590
 591
 592 -- -----------------------------------------------------------------------------
 593 -- | Make a spill instruction.
 594 x86_mkSpillInstr
 595         :: Reg          -- register to spill
 596         -> Int          -- current stack delta
 597         -> Int          -- spill slot to use
 598         -> Instr
 599
 600 x86_mkSpillInstr reg delta slot
 601   = let off     = spillSlotToOffset slot
 602     in
 603     let off_w = (off-delta) `div` IF_ARCH_i386(4,8)
 604     in case targetClassOfReg reg of
 605            RcInteger   -> MOV IF_ARCH_i386(II32,II64)
 606                               (OpReg reg) (OpAddr (spRel off_w))
 607            RcDouble    -> GST FF80 reg (spRel off_w) {- RcFloat/RcDouble -}
 608            RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel off_w))
 609            _         -> panic "X86.mkSpillInstr: no match"
 610
 611
 612 -- | Make a spill reload instruction.
 613 x86_mkLoadInstr
 614         :: Reg          -- register to load
 615         -> Int          -- current stack delta
 616         -> Int          -- spill slot to use
 617         -> Instr
 618
 619 x86_mkLoadInstr reg delta slot
 620   = let off     = spillSlotToOffset slot
 621     in
 622         let off_w = (off-delta) `div` IF_ARCH_i386(4,8)
 623         in case targetClassOfReg reg of
 624               RcInteger -> MOV IF_ARCH_i386(II32,II64)
 625                                (OpAddr (spRel off_w)) (OpReg reg)
 626               RcDouble  -> GLD FF80 (spRel off_w) reg {- RcFloat/RcDouble -}
 627               RcDoubleSSE -> MOV FF64 (OpAddr (spRel off_w)) (OpReg reg)
 628               _           -> panic "X86.x86_mkLoadInstr"
 629
 630 spillSlotSize :: Int
 631 spillSlotSize = IF_ARCH_i386(12, 8)
 632
 633 maxSpillSlots :: Int
 634 maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1
 635
 636 -- convert a spill slot number to a *byte* offset, with no sign:
 637 -- decide on a per arch basis whether you are spilling above or below
 638 -- the C stack pointer.
 639 spillSlotToOffset :: Int -> Int
 640 spillSlotToOffset slot
 641    | slot >= 0 && slot < maxSpillSlots
 642    = 64 + spillSlotSize * slot
 643    | otherwise
 644    = pprPanic "spillSlotToOffset:"
 645               (   text "invalid spill location: " <> int slot
 646               $$  text "maxSpillSlots:          " <> int maxSpillSlots)
 647
 648 --------------------------------------------------------------------------------
 649
 650 -- | See if this instruction is telling us the current C stack delta
 651 x86_takeDeltaInstr
 652         :: Instr
 653         -> Maybe Int
 654
 655 x86_takeDeltaInstr instr
 656  = case instr of
 657         DELTA i         -> Just i
 658         _               -> Nothing
 659
 660
 661 x86_isMetaInstr
 662         :: Instr
 663         -> Bool
 664
 665 x86_isMetaInstr instr
 666  = case instr of
 667         COMMENT{}       -> True
 668         LDATA{}         -> True
 669         NEWBLOCK{}      -> True
 670         DELTA{}         -> True
 671         _               -> False
 672
 673
 674
 675 -- | Make a reg-reg move instruction.
 676 --      On SPARC v8 there are no instructions to move directly between
 677 --      floating point and integer regs. If we need to do that then we
 678 --      have to go via memory.
 679 --
 680 x86_mkRegRegMoveInstr
 681         :: Reg
 682         -> Reg
 683         -> Instr
 684
 685 x86_mkRegRegMoveInstr src dst
 686  = case targetClassOfReg src of
 687 #if   i386_TARGET_ARCH
 688         RcInteger -> MOV II32 (OpReg src) (OpReg dst)
 689 #else
 690         RcInteger -> MOV II64 (OpReg src) (OpReg dst)
 691 #endif
 692         RcDouble    -> GMOV src dst
 693         RcDoubleSSE -> MOV FF64 (OpReg src) (OpReg dst)
 694         _     -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
 695
 696 -- | Check whether an instruction represents a reg-reg move.
 697 --      The register allocator attempts to eliminate reg->reg moves whenever it can,
 698 --      by assigning the src and dest temporaries to the same real register.
 699 --
 700 x86_takeRegRegMoveInstr
 701         :: Instr
 702         -> Maybe (Reg,Reg)
 703
 704 x86_takeRegRegMoveInstr (MOV _ (OpReg r1) (OpReg r2))
 705         = Just (r1,r2)
 706
 707 x86_takeRegRegMoveInstr _  = Nothing
 708
 709
 710 -- | Make an unconditional branch instruction.
 711 x86_mkJumpInstr
 712         :: BlockId
 713         -> [Instr]
 714
 715 x86_mkJumpInstr id
 716         = [JXX ALWAYS id]
 717
 718
 719
 720
 721
 722 i386_insert_ffrees
 723         :: [GenBasicBlock Instr]
 724         -> [GenBasicBlock Instr]
 725
 726 i386_insert_ffrees blocks
 727    | or (map (any is_G_instr) [ instrs | BasicBlock _ instrs <- blocks ])
 728    = map ffree_before_nonlocal_transfers blocks
 729
 730    | otherwise
 731    = blocks
 732   where
 733    ffree_before_nonlocal_transfers (BasicBlock id insns)
 734      = BasicBlock id (foldr p [] insns)
 735      where p insn r = case insn of
 736                         CALL _ _ -> GFREE : insn : r
 737                         JMP _    -> GFREE : insn : r
 738                         _        -> insn : r
 739
 740 -- if you ever add a new FP insn to the fake x86 FP insn set,
 741 -- you must update this too
 742 is_G_instr :: Instr -> Bool
 743 is_G_instr instr
 744    = case instr of
 745         GMOV{}          -> True
 746         GLD{}           -> True
 747         GST{}           -> True
 748         GLDZ{}          -> True
 749         GLD1{}          -> True
 750         GFTOI{}         -> True
 751         GDTOI{}         -> True
 752         GITOF{}         -> True
 753         GITOD{}         -> True
 754         GADD{}          -> True
 755         GDIV{}          -> True
 756         GSUB{}          -> True
 757         GMUL{}          -> True
 758         GCMP{}          -> True
 759         GABS{}          -> True
 760         GNEG{}          -> True
 761         GSQRT{}         -> True
 762         GSIN{}          -> True
 763         GCOS{}          -> True
 764         GTAN{}          -> True
 765         GFREE           -> panic "is_G_instr: GFREE (!)"
 766         _               -> False
 767
 768
 769 data JumpDest = DestBlockId BlockId | DestImm Imm
 770
 771
 772 canShortcut :: Instr -> Maybe JumpDest
 773 canShortcut (JXX ALWAYS id)    = Just (DestBlockId id)
 774 canShortcut (JMP (OpImm imm))  = Just (DestImm imm)
 775 canShortcut _                  = Nothing
 776
 777
 778 -- This helper shortcuts a sequence of branches.
 779 -- The blockset helps avoid following cycles.
 780 shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
 781 shortcutJump fn insn = shortcutJump' fn emptyBlockSet insn
 782   where shortcutJump' fn seen insn@(JXX cc id) =
 783           if elemBlockSet id seen then insn
 784           else case fn id of
 785                  Nothing                -> insn
 786                  Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id')
 787                  Just (DestImm imm)     -> shortcutJump' fn seen' (JXX_GBL cc imm)
 788                where seen' = extendBlockSet seen id
 789         shortcutJump' _ _ other = other
 790
 791 -- Here because it knows about JumpDest
 792 shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
 793 shortcutStatic fn (CmmStaticLit (CmmLabel lab))
 794   | Just uq <- maybeAsmTemp lab
 795   = CmmStaticLit (CmmLabel (shortBlockId fn emptyUniqSet (BlockId uq)))
 796 shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
 797   | Just uq <- maybeAsmTemp lbl1
 798   = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn emptyUniqSet (BlockId uq)) lbl2 off)
 799         -- slightly dodgy, we're ignoring the second label, but this
 800         -- works with the way we use CmmLabelDiffOff for jump tables now.
 801
 802 shortcutStatic _ other_static
 803         = other_static
 804
 805 shortBlockId
 806         :: (BlockId -> Maybe JumpDest)
 807         -> UniqSet Unique
 808         -> BlockId
 809         -> CLabel
 810
 811 shortBlockId fn seen blockid@(BlockId uq) =
 812   case (elementOfUniqSet uq seen, fn blockid) of
 813     (True, _)    -> mkAsmTempLabel uq
 814     (_, Nothing) -> mkAsmTempLabel uq
 815     (_, Just (DestBlockId blockid'))  -> shortBlockId fn (addOneToUniqSet seen uq) blockid'
 816     (_, Just (DestImm (ImmCLbl lbl))) -> lbl
 817     (_, _other) -> panic "shortBlockId"