compiler/nativeGen/X86/Instr.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Machine-dependent assembly language
   4 --
   5 -- (c) The University of Glasgow 1993-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 #include "HsVersions.h"
  10 #include "nativeGen/NCG.h"
  11
  12 module X86.Instr
  13 where
  14
  15 import X86.Cond
  16 import X86.Regs
  17 import Instruction
  18 import Size
  19 import RegClass
  20 import Reg
  21 import TargetReg
  22
  23 import BlockId
  24 import OldCmm
  25 import FastString
  26 import FastBool
  27 import Outputable
  28 import Constants        (rESERVED_C_STACK_BYTES)
  29
  30 import CLabel
  31 import UniqSet
  32 import Unique
  33
  34 -- Size of a PPC memory address, in bytes.
  35 --
  36 archWordSize :: Size
  37 #if i386_TARGET_ARCH
  38 archWordSize    = II32
  39 #elif x86_64_TARGET_ARCH
  40 archWordSize    = II64
  41 #else
  42 archWordSize    = panic "X86.Instr.archWordSize: not defined"
  43 #endif
  44
  45 -- | Instruction instance for x86 instruction set.
  46 instance Instruction Instr where
  47         regUsageOfInstr         = x86_regUsageOfInstr
  48         patchRegsOfInstr        = x86_patchRegsOfInstr
  49         isJumpishInstr          = x86_isJumpishInstr
  50         jumpDestsOfInstr        = x86_jumpDestsOfInstr
  51         patchJumpInstr          = x86_patchJumpInstr
  52         mkSpillInstr            = x86_mkSpillInstr
  53         mkLoadInstr             = x86_mkLoadInstr
  54         takeDeltaInstr          = x86_takeDeltaInstr
  55         isMetaInstr             = x86_isMetaInstr
  56         mkRegRegMoveInstr       = x86_mkRegRegMoveInstr
  57         takeRegRegMoveInstr     = x86_takeRegRegMoveInstr
  58         mkJumpInstr             = x86_mkJumpInstr
  59
  60
  61 -- -----------------------------------------------------------------------------
  62 -- Intel x86 instructions
  63
  64 {-
  65 Intel, in their infinite wisdom, selected a stack model for floating
  66 point registers on x86.  That might have made sense back in 1979 --
  67 nowadays we can see it for the nonsense it really is.  A stack model
  68 fits poorly with the existing nativeGen infrastructure, which assumes
  69 flat integer and FP register sets.  Prior to this commit, nativeGen
  70 could not generate correct x86 FP code -- to do so would have meant
  71 somehow working the register-stack paradigm into the register
  72 allocator and spiller, which sounds very difficult.
  73
  74 We have decided to cheat, and go for a simple fix which requires no
  75 infrastructure modifications, at the expense of generating ropey but
  76 correct FP code.  All notions of the x86 FP stack and its insns have
  77 been removed.  Instead, we pretend (to the instruction selector and
  78 register allocator) that x86 has six floating point registers, %fake0
  79 .. %fake5, which can be used in the usual flat manner.  We further
  80 claim that x86 has floating point instructions very similar to SPARC
  81 and Alpha, that is, a simple 3-operand register-register arrangement.
  82 Code generation and register allocation proceed on this basis.
  83
  84 When we come to print out the final assembly, our convenient fiction
  85 is converted to dismal reality.  Each fake instruction is
  86 independently converted to a series of real x86 instructions.
  87 %fake0 .. %fake5 are mapped to %st(0) .. %st(5).  To do reg-reg
  88 arithmetic operations, the two operands are pushed onto the top of the
  89 FP stack, the operation done, and the result copied back into the
  90 relevant register.  There are only six %fake registers because 2 are
  91 needed for the translation, and x86 has 8 in total.
  92
  93 The translation is inefficient but is simple and it works.  A cleverer
  94 translation would handle a sequence of insns, simulating the FP stack
  95 contents, would not impose a fixed mapping from %fake to %st regs, and
  96 hopefully could avoid most of the redundant reg-reg moves of the
  97 current translation.
  98
  99 We might as well make use of whatever unique FP facilities Intel have
 100 chosen to bless us with (let's not be churlish, after all).
 101 Hence GLDZ and GLD1.  Bwahahahahahahaha!
 102 -}
 103
 104 {-
 105 Note [x86 Floating point precision]
 106
 107 Intel's internal floating point registers are by default 80 bit
 108 extended precision.  This means that all operations done on values in
 109 registers are done at 80 bits, and unless the intermediate values are
 110 truncated to the appropriate size (32 or 64 bits) by storing in
 111 memory, calculations in registers will give different results from
 112 calculations which pass intermediate values in memory (eg. via
 113 function calls).
 114
 115 One solution is to set the FPU into 64 bit precision mode.  Some OSs
 116 do this (eg. FreeBSD) and some don't (eg. Linux).  The problem here is
 117 that this will only affect 64-bit precision arithmetic; 32-bit
 118 calculations will still be done at 64-bit precision in registers.  So
 119 it doesn't solve the whole problem.
 120
 121 There's also the issue of what the C library is expecting in terms of
 122 precision.  It seems to be the case that glibc on Linux expects the
 123 FPU to be set to 80 bit precision, so setting it to 64 bit could have
 124 unexpected effects.  Changing the default could have undesirable
 125 effects on other 3rd-party library code too, so the right thing would
 126 be to save/restore the FPU control word across Haskell code if we were
 127 to do this.
 128
 129 gcc's -ffloat-store gives consistent results by always storing the
 130 results of floating-point calculations in memory, which works for both
 131 32 and 64-bit precision.  However, it only affects the values of
 132 user-declared floating point variables in C, not intermediate results.
 133 GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
 134 flag).
 135
 136 Another problem is how to spill floating point registers in the
 137 register allocator.  Should we spill the whole 80 bits, or just 64?
 138 On an OS which is set to 64 bit precision, spilling 64 is fine.  On
 139 Linux, spilling 64 bits will round the results of some operations.
 140 This is what gcc does.  Spilling at 80 bits requires taking up a full
 141 128 bit slot (so we get alignment).  We spill at 80-bits and ignore
 142 the alignment problems.
 143
 144 In the future [edit: now available in GHC 7.0.1, with the -msse2
 145 flag], we'll use the SSE registers for floating point.  This requires
 146 a CPU that supports SSE2 (ordinary SSE only supports 32 bit precision
 147 float ops), which means P4 or Xeon and above.  Using SSE will solve
 148 all these problems, because the SSE registers use fixed 32 bit or 64
 149 bit precision.
 150
 151 --SDM 1/2003
 152 -}
 153
 154
 155 data Instr
 156         -- comment pseudo-op
 157         = COMMENT FastString
 158
 159         -- some static data spat out during code
 160         -- generation.  Will be extracted before
 161         -- pretty-printing.
 162         | LDATA   Section [CmmStatic]
 163
 164         -- start a new basic block.  Useful during
 165         -- codegen, removed later.  Preceding
 166         -- instruction should be a jump, as per the
 167         -- invariants for a BasicBlock (see Cmm).
 168         | NEWBLOCK BlockId
 169
 170         -- specify current stack offset for
 171         -- benefit of subsequent passes
 172         | DELTA   Int
 173
 174         -- Moves.
 175         | MOV         Size Operand Operand
 176         | MOVZxL      Size Operand Operand -- size is the size of operand 1
 177         | MOVSxL      Size Operand Operand -- size is the size of operand 1
 178         -- x86_64 note: plain mov into a 32-bit register always zero-extends
 179         -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
 180         -- don't affect the high bits of the register.
 181
 182         -- Load effective address (also a very useful three-operand add instruction :-)
 183         | LEA         Size Operand Operand
 184
 185         -- Int Arithmetic.
 186         | ADD         Size Operand Operand
 187         | ADC         Size Operand Operand
 188         | SUB         Size Operand Operand
 189
 190         | MUL         Size Operand Operand
 191         | IMUL        Size Operand Operand      -- signed int mul
 192         | IMUL2       Size Operand              -- %edx:%eax = operand * %eax
 193
 194         | DIV         Size Operand              -- eax := eax:edx/op, edx := eax:edx%op
 195         | IDIV        Size Operand              -- ditto, but signed
 196
 197         -- Simple bit-twiddling.
 198         | AND         Size Operand Operand
 199         | OR          Size Operand Operand
 200         | XOR         Size Operand Operand
 201         | NOT         Size Operand
 202         | NEGI        Size Operand              -- NEG instruction (name clash with Cond)
 203
 204         -- Shifts (amount may be immediate or %cl only)
 205         | SHL         Size Operand{-amount-} Operand
 206         | SAR         Size Operand{-amount-} Operand
 207         | SHR         Size Operand{-amount-} Operand
 208
 209         | BT          Size Imm Operand
 210         | NOP
 211
 212         -- x86 Float Arithmetic.
 213         -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
 214         -- as single instructions right up until we spit them out.
 215         -- all the 3-operand fake fp insns are src1 src2 dst
 216         -- and furthermore are constrained to be fp regs only.
 217         -- IMPORTANT: keep is_G_insn up to date with any changes here
 218         | GMOV        Reg Reg -- src(fpreg), dst(fpreg)
 219         | GLD         Size AddrMode Reg -- src, dst(fpreg)
 220         | GST         Size Reg AddrMode -- src(fpreg), dst
 221
 222         | GLDZ        Reg -- dst(fpreg)
 223         | GLD1        Reg -- dst(fpreg)
 224
 225         | GFTOI       Reg Reg -- src(fpreg), dst(intreg)
 226         | GDTOI       Reg Reg -- src(fpreg), dst(intreg)
 227
 228         | GITOF       Reg Reg -- src(intreg), dst(fpreg)
 229         | GITOD       Reg Reg -- src(intreg), dst(fpreg)
 230
 231         | GDTOF       Reg Reg -- src(fpreg), dst(fpreg)
 232
 233         | GADD        Size Reg Reg Reg -- src1, src2, dst
 234         | GDIV        Size Reg Reg Reg -- src1, src2, dst
 235         | GSUB        Size Reg Reg Reg -- src1, src2, dst
 236         | GMUL        Size Reg Reg Reg -- src1, src2, dst
 237
 238                 -- FP compare.  Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT]
 239                 -- Compare src1 with src2; set the Zero flag iff the numbers are
 240                 -- comparable and the comparison is True.  Subsequent code must
 241                 -- test the %eflags zero flag regardless of the supplied Cond.
 242         | GCMP        Cond Reg Reg -- src1, src2
 243
 244         | GABS        Size Reg Reg -- src, dst
 245         | GNEG        Size Reg Reg -- src, dst
 246         | GSQRT       Size Reg Reg -- src, dst
 247         | GSIN        Size CLabel CLabel Reg Reg -- src, dst
 248         | GCOS        Size CLabel CLabel Reg Reg -- src, dst
 249         | GTAN        Size CLabel CLabel Reg Reg -- src, dst
 250
 251         | GFREE         -- do ffree on all x86 regs; an ugly hack
 252
 253
 254         -- SSE2 floating point: we use a restricted set of the available SSE2
 255         -- instructions for floating-point.
 256         -- use MOV for moving (either movss or movsd (movlpd better?))
 257         | CVTSS2SD      Reg Reg         -- F32 to F64
 258         | CVTSD2SS      Reg Reg         -- F64 to F32
 259         | CVTTSS2SIQ    Size Operand Reg -- F32 to I32/I64 (with truncation)
 260         | CVTTSD2SIQ    Size Operand Reg -- F64 to I32/I64 (with truncation)
 261         | CVTSI2SS      Size Operand Reg -- I32/I64 to F32
 262         | CVTSI2SD      Size Operand Reg -- I32/I64 to F64
 263
 264         -- use ADD & SUB for arithmetic.  In both cases, operands
 265         -- are  Operand Reg.
 266
 267         -- SSE2 floating-point division:
 268         | FDIV          Size Operand Operand   -- divisor, dividend(dst)
 269
 270         -- use CMP for comparisons.  ucomiss and ucomisd instructions
 271         -- compare single/double prec floating point respectively.
 272
 273         | SQRT          Size Operand Reg        -- src, dst
 274
 275
 276         -- Comparison
 277         | TEST          Size Operand Operand
 278         | CMP           Size Operand Operand
 279         | SETCC         Cond Operand
 280
 281         -- Stack Operations.
 282         | PUSH          Size Operand
 283         | POP           Size Operand
 284         -- both unused (SDM):
 285         --  | PUSHA
 286         --  | POPA
 287
 288         -- Jumping around.
 289         | JMP         Operand
 290         | JXX         Cond BlockId  -- includes unconditional branches
 291         | JXX_GBL     Cond Imm      -- non-local version of JXX
 292         | JMP_TBL     Operand [BlockId]  -- table jump
 293         | CALL        (Either Imm Reg) [Reg]
 294
 295         -- Other things.
 296         | CLTD Size              -- sign extend %eax into %edx:%eax
 297
 298         | FETCHGOT    Reg        -- pseudo-insn for ELF position-independent code
 299                                  -- pretty-prints as
 300                                  --       call 1f
 301                                  -- 1:    popl %reg
 302                                  --       addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
 303         | FETCHPC     Reg        -- pseudo-insn for Darwin position-independent code
 304                                  -- pretty-prints as
 305                                  --       call 1f
 306                                  -- 1:    popl %reg
 307
 308
 309 data Operand
 310         = OpReg  Reg            -- register
 311         | OpImm  Imm            -- immediate value
 312         | OpAddr AddrMode       -- memory reference
 313
 314
 315
 316 x86_regUsageOfInstr :: Instr -> RegUsage
 317 x86_regUsageOfInstr instr
 318  = case instr of
 319     MOV    _ src dst    -> usageRW src dst
 320     MOVZxL _ src dst    -> usageRW src dst
 321     MOVSxL _ src dst    -> usageRW src dst
 322     LEA    _ src dst    -> usageRW src dst
 323     ADD    _ src dst    -> usageRM src dst
 324     ADC    _ src dst    -> usageRM src dst
 325     SUB    _ src dst    -> usageRM src dst
 326     IMUL   _ src dst    -> usageRM src dst
 327     IMUL2  _ src       -> mkRU (eax:use_R src) [eax,edx]
 328     MUL    _ src dst    -> usageRM src dst
 329     DIV    _ op -> mkRU (eax:edx:use_R op) [eax,edx]
 330     IDIV   _ op -> mkRU (eax:edx:use_R op) [eax,edx]
 331     AND    _ src dst    -> usageRM src dst
 332     OR     _ src dst    -> usageRM src dst
 333
 334     XOR    _ (OpReg src) (OpReg dst)
 335         | src == dst    -> mkRU [] [dst]
 336
 337     XOR    _ src dst    -> usageRM src dst
 338     NOT    _ op         -> usageM op
 339     NEGI   _ op         -> usageM op
 340     SHL    _ imm dst    -> usageRM imm dst
 341     SAR    _ imm dst    -> usageRM imm dst
 342     SHR    _ imm dst    -> usageRM imm dst
 343     BT     _ _   src    -> mkRUR (use_R src)
 344
 345     PUSH   _ op         -> mkRUR (use_R op)
 346     POP    _ op         -> mkRU [] (def_W op)
 347     TEST   _ src dst    -> mkRUR (use_R src ++ use_R dst)
 348     CMP    _ src dst    -> mkRUR (use_R src ++ use_R dst)
 349     SETCC  _ op         -> mkRU [] (def_W op)
 350     JXX    _ _          -> mkRU [] []
 351     JXX_GBL _ _         -> mkRU [] []
 352     JMP     op          -> mkRUR (use_R op)
 353     JMP_TBL op _        -> mkRUR (use_R op)
 354     CALL (Left _)  params   -> mkRU params callClobberedRegs
 355     CALL (Right reg) params -> mkRU (reg:params) callClobberedRegs
 356     CLTD   _            -> mkRU [eax] [edx]
 357     NOP                 -> mkRU [] []
 358
 359     GMOV   src dst      -> mkRU [src] [dst]
 360     GLD    _ src dst    -> mkRU (use_EA src) [dst]
 361     GST    _ src dst    -> mkRUR (src : use_EA dst)
 362
 363     GLDZ   dst          -> mkRU [] [dst]
 364     GLD1   dst          -> mkRU [] [dst]
 365
 366     GFTOI  src dst      -> mkRU [src] [dst]
 367     GDTOI  src dst      -> mkRU [src] [dst]
 368
 369     GITOF  src dst      -> mkRU [src] [dst]
 370     GITOD  src dst      -> mkRU [src] [dst]
 371
 372     GDTOF  src dst      -> mkRU [src] [dst]
 373
 374     GADD   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 375     GSUB   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 376     GMUL   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 377     GDIV   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 378
 379     GCMP   _ src1 src2   -> mkRUR [src1,src2]
 380     GABS   _ src dst     -> mkRU [src] [dst]
 381     GNEG   _ src dst     -> mkRU [src] [dst]
 382     GSQRT  _ src dst     -> mkRU [src] [dst]
 383     GSIN   _ _ _ src dst -> mkRU [src] [dst]
 384     GCOS   _ _ _ src dst -> mkRU [src] [dst]
 385     GTAN   _ _ _ src dst -> mkRU [src] [dst]
 386
 387     CVTSS2SD   src dst  -> mkRU [src] [dst]
 388     CVTSD2SS   src dst  -> mkRU [src] [dst]
 389     CVTTSS2SIQ _ src dst -> mkRU (use_R src) [dst]
 390     CVTTSD2SIQ _ src dst -> mkRU (use_R src) [dst]
 391     CVTSI2SS   _ src dst -> mkRU (use_R src) [dst]
 392     CVTSI2SD   _ src dst -> mkRU (use_R src) [dst]
 393     FDIV _     src dst  -> usageRM src dst
 394
 395     FETCHGOT reg        -> mkRU [] [reg]
 396     FETCHPC  reg        -> mkRU [] [reg]
 397
 398     COMMENT _           -> noUsage
 399     DELTA   _           -> noUsage
 400
 401     _other              -> panic "regUsage: unrecognised instr"
 402
 403  where
 404     -- 2 operand form; first operand Read; second Written
 405     usageRW :: Operand -> Operand -> RegUsage
 406     usageRW op (OpReg reg)      = mkRU (use_R op) [reg]
 407     usageRW op (OpAddr ea)      = mkRUR (use_R op ++ use_EA ea)
 408     usageRW _ _                 = panic "X86.RegInfo.usageRW: no match"
 409
 410     -- 2 operand form; first operand Read; second Modified
 411     usageRM :: Operand -> Operand -> RegUsage
 412     usageRM op (OpReg reg)      = mkRU (use_R op ++ [reg]) [reg]
 413     usageRM op (OpAddr ea)      = mkRUR (use_R op ++ use_EA ea)
 414     usageRM _ _                 = panic "X86.RegInfo.usageRM: no match"
 415
 416     -- 1 operand form; operand Modified
 417     usageM :: Operand -> RegUsage
 418     usageM (OpReg reg)          = mkRU [reg] [reg]
 419     usageM (OpAddr ea)          = mkRUR (use_EA ea)
 420     usageM _                    = panic "X86.RegInfo.usageM: no match"
 421
 422     -- Registers defd when an operand is written.
 423     def_W (OpReg reg)           = [reg]
 424     def_W (OpAddr _ )           = []
 425     def_W _                     = panic "X86.RegInfo.def_W: no match"
 426
 427     -- Registers used when an operand is read.
 428     use_R (OpReg reg)  = [reg]
 429     use_R (OpImm _)    = []
 430     use_R (OpAddr ea)  = use_EA ea
 431
 432     -- Registers used to compute an effective address.
 433     use_EA (ImmAddr _ _) = []
 434     use_EA (AddrBaseIndex base index _) =
 435         use_base base $! use_index index
 436         where use_base (EABaseReg r) x = r : x
 437               use_base _ x             = x
 438               use_index EAIndexNone   = []
 439               use_index (EAIndex i _) = [i]
 440
 441     mkRUR src = src' `seq` RU src' []
 442         where src' = filter interesting src
 443
 444     mkRU src dst = src' `seq` dst' `seq` RU src' dst'
 445         where src' = filter interesting src
 446               dst' = filter interesting dst
 447
 448 interesting :: Reg -> Bool
 449 interesting (RegVirtual _)              = True
 450 interesting (RegReal (RealRegSingle i)) = isFastTrue (freeReg i)
 451 interesting (RegReal (RealRegPair{}))   = panic "X86.interesting: no reg pairs on this arch"
 452
 453
 454
 455 x86_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
 456 x86_patchRegsOfInstr instr env
 457  = case instr of
 458     MOV  sz src dst     -> patch2 (MOV  sz) src dst
 459     MOVZxL sz src dst   -> patch2 (MOVZxL sz) src dst
 460     MOVSxL sz src dst   -> patch2 (MOVSxL sz) src dst
 461     LEA  sz src dst     -> patch2 (LEA  sz) src dst
 462     ADD  sz src dst     -> patch2 (ADD  sz) src dst
 463     ADC  sz src dst     -> patch2 (ADC  sz) src dst
 464     SUB  sz src dst     -> patch2 (SUB  sz) src dst
 465     IMUL sz src dst     -> patch2 (IMUL sz) src dst
 466     IMUL2 sz src        -> patch1 (IMUL2 sz) src
 467     MUL sz src dst      -> patch2 (MUL sz) src dst
 468     IDIV sz op          -> patch1 (IDIV sz) op
 469     DIV sz op           -> patch1 (DIV sz) op
 470     AND  sz src dst     -> patch2 (AND  sz) src dst
 471     OR   sz src dst     -> patch2 (OR   sz) src dst
 472     XOR  sz src dst     -> patch2 (XOR  sz) src dst
 473     NOT  sz op          -> patch1 (NOT  sz) op
 474     NEGI sz op          -> patch1 (NEGI sz) op
 475     SHL  sz imm dst     -> patch1 (SHL sz imm) dst
 476     SAR  sz imm dst     -> patch1 (SAR sz imm) dst
 477     SHR  sz imm dst     -> patch1 (SHR sz imm) dst
 478     BT   sz imm src     -> patch1 (BT  sz imm) src
 479     TEST sz src dst     -> patch2 (TEST sz) src dst
 480     CMP  sz src dst     -> patch2 (CMP  sz) src dst
 481     PUSH sz op          -> patch1 (PUSH sz) op
 482     POP  sz op          -> patch1 (POP  sz) op
 483     SETCC cond op       -> patch1 (SETCC cond) op
 484     JMP op              -> patch1 JMP op
 485     JMP_TBL op ids      -> patch1 JMP_TBL op $ ids
 486
 487     GMOV src dst        -> GMOV (env src) (env dst)
 488     GLD  sz src dst     -> GLD sz (lookupAddr src) (env dst)
 489     GST  sz src dst     -> GST sz (env src) (lookupAddr dst)
 490
 491     GLDZ dst            -> GLDZ (env dst)
 492     GLD1 dst            -> GLD1 (env dst)
 493
 494     GFTOI src dst       -> GFTOI (env src) (env dst)
 495     GDTOI src dst       -> GDTOI (env src) (env dst)
 496
 497     GITOF src dst       -> GITOF (env src) (env dst)
 498     GITOD src dst       -> GITOD (env src) (env dst)
 499
 500     GDTOF src dst       -> GDTOF (env src) (env dst)
 501
 502     GADD sz s1 s2 dst   -> GADD sz (env s1) (env s2) (env dst)
 503     GSUB sz s1 s2 dst   -> GSUB sz (env s1) (env s2) (env dst)
 504     GMUL sz s1 s2 dst   -> GMUL sz (env s1) (env s2) (env dst)
 505     GDIV sz s1 s2 dst   -> GDIV sz (env s1) (env s2) (env dst)
 506
 507     GCMP sz src1 src2   -> GCMP sz (env src1) (env src2)
 508     GABS sz src dst     -> GABS sz (env src) (env dst)
 509     GNEG sz src dst     -> GNEG sz (env src) (env dst)
 510     GSQRT sz src dst    -> GSQRT sz (env src) (env dst)
 511     GSIN sz l1 l2 src dst       -> GSIN sz l1 l2 (env src) (env dst)
 512     GCOS sz l1 l2 src dst       -> GCOS sz l1 l2 (env src) (env dst)
 513     GTAN sz l1 l2 src dst       -> GTAN sz l1 l2 (env src) (env dst)
 514
 515     CVTSS2SD src dst    -> CVTSS2SD (env src) (env dst)
 516     CVTSD2SS src dst    -> CVTSD2SS (env src) (env dst)
 517     CVTTSS2SIQ sz src dst -> CVTTSS2SIQ sz (patchOp src) (env dst)
 518     CVTTSD2SIQ sz src dst -> CVTTSD2SIQ sz (patchOp src) (env dst)
 519     CVTSI2SS sz src dst -> CVTSI2SS sz (patchOp src) (env dst)
 520     CVTSI2SD sz src dst -> CVTSI2SD sz (patchOp src) (env dst)
 521     FDIV sz src dst     -> FDIV sz (patchOp src) (patchOp dst)
 522
 523     CALL (Left _)  _    -> instr
 524     CALL (Right reg) p  -> CALL (Right (env reg)) p
 525
 526     FETCHGOT reg        -> FETCHGOT (env reg)
 527     FETCHPC  reg        -> FETCHPC  (env reg)
 528
 529     NOP                 -> instr
 530     COMMENT _           -> instr
 531     DELTA _             -> instr
 532
 533     JXX _ _             -> instr
 534     JXX_GBL _ _         -> instr
 535     CLTD _              -> instr
 536
 537     _other              -> panic "patchRegs: unrecognised instr"
 538
 539   where
 540     patch1 :: (Operand -> a) -> Operand -> a
 541     patch1 insn op      = insn $! patchOp op
 542     patch2 :: (Operand -> Operand -> a) -> Operand -> Operand -> a
 543     patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
 544
 545     patchOp (OpReg  reg) = OpReg $! env reg
 546     patchOp (OpImm  imm) = OpImm imm
 547     patchOp (OpAddr ea)  = OpAddr $! lookupAddr ea
 548
 549     lookupAddr (ImmAddr imm off) = ImmAddr imm off
 550     lookupAddr (AddrBaseIndex base index disp)
 551       = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
 552       where
 553         lookupBase EABaseNone       = EABaseNone
 554         lookupBase EABaseRip        = EABaseRip
 555         lookupBase (EABaseReg r)    = EABaseReg (env r)
 556
 557         lookupIndex EAIndexNone     = EAIndexNone
 558         lookupIndex (EAIndex r i)   = EAIndex (env r) i
 559
 560
 561 --------------------------------------------------------------------------------
 562 x86_isJumpishInstr
 563         :: Instr -> Bool
 564
 565 x86_isJumpishInstr instr
 566  = case instr of
 567         JMP{}           -> True
 568         JXX{}           -> True
 569         JXX_GBL{}       -> True
 570         JMP_TBL{}       -> True
 571         CALL{}          -> True
 572         _               -> False
 573
 574
 575 x86_jumpDestsOfInstr
 576         :: Instr
 577         -> [BlockId]
 578
 579 x86_jumpDestsOfInstr insn
 580   = case insn of
 581         JXX _ id        -> [id]
 582         JMP_TBL _ ids   -> ids
 583         _               -> []
 584
 585
 586 x86_patchJumpInstr
 587         :: Instr -> (BlockId -> BlockId) -> Instr
 588
 589 x86_patchJumpInstr insn patchF
 590   = case insn of
 591         JXX cc id       -> JXX cc (patchF id)
 592         JMP_TBL _ _     -> error "Cannot patch JMP_TBL"
 593         _               -> insn
 594
 595
 596
 597
 598 -- -----------------------------------------------------------------------------
 599 -- | Make a spill instruction.
 600 x86_mkSpillInstr
 601         :: Reg          -- register to spill
 602         -> Int          -- current stack delta
 603         -> Int          -- spill slot to use
 604         -> Instr
 605
 606 x86_mkSpillInstr reg delta slot
 607   = let off     = spillSlotToOffset slot
 608     in
 609     let off_w = (off-delta) `div` IF_ARCH_i386(4,8)
 610     in case targetClassOfReg reg of
 611            RcInteger   -> MOV IF_ARCH_i386(II32,II64)
 612                               (OpReg reg) (OpAddr (spRel off_w))
 613            RcDouble    -> GST FF80 reg (spRel off_w) {- RcFloat/RcDouble -}
 614            RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel off_w))
 615            _         -> panic "X86.mkSpillInstr: no match"
 616
 617
 618 -- | Make a spill reload instruction.
 619 x86_mkLoadInstr
 620         :: Reg          -- register to load
 621         -> Int          -- current stack delta
 622         -> Int          -- spill slot to use
 623         -> Instr
 624
 625 x86_mkLoadInstr reg delta slot
 626   = let off     = spillSlotToOffset slot
 627     in
 628         let off_w = (off-delta) `div` IF_ARCH_i386(4,8)
 629         in case targetClassOfReg reg of
 630               RcInteger -> MOV IF_ARCH_i386(II32,II64)
 631                                (OpAddr (spRel off_w)) (OpReg reg)
 632               RcDouble  -> GLD FF80 (spRel off_w) reg {- RcFloat/RcDouble -}
 633               RcDoubleSSE -> MOV FF64 (OpAddr (spRel off_w)) (OpReg reg)
 634               _           -> panic "X86.x86_mkLoadInstr"
 635
 636 spillSlotSize :: Int
 637 spillSlotSize = IF_ARCH_i386(12, 8)
 638
 639 maxSpillSlots :: Int
 640 maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1
 641
 642 -- convert a spill slot number to a *byte* offset, with no sign:
 643 -- decide on a per arch basis whether you are spilling above or below
 644 -- the C stack pointer.
 645 spillSlotToOffset :: Int -> Int
 646 spillSlotToOffset slot
 647    | slot >= 0 && slot < maxSpillSlots
 648    = 64 + spillSlotSize * slot
 649    | otherwise
 650    = pprPanic "spillSlotToOffset:"
 651               (   text "invalid spill location: " <> int slot
 652               $$  text "maxSpillSlots:          " <> int maxSpillSlots)
 653
 654 --------------------------------------------------------------------------------
 655
 656 -- | See if this instruction is telling us the current C stack delta
 657 x86_takeDeltaInstr
 658         :: Instr
 659         -> Maybe Int
 660
 661 x86_takeDeltaInstr instr
 662  = case instr of
 663         DELTA i         -> Just i
 664         _               -> Nothing
 665
 666
 667 x86_isMetaInstr
 668         :: Instr
 669         -> Bool
 670
 671 x86_isMetaInstr instr
 672  = case instr of
 673         COMMENT{}       -> True
 674         LDATA{}         -> True
 675         NEWBLOCK{}      -> True
 676         DELTA{}         -> True
 677         _               -> False
 678
 679
 680
 681 -- | Make a reg-reg move instruction.
 682 --      On SPARC v8 there are no instructions to move directly between
 683 --      floating point and integer regs. If we need to do that then we
 684 --      have to go via memory.
 685 --
 686 x86_mkRegRegMoveInstr
 687         :: Reg
 688         -> Reg
 689         -> Instr
 690
 691 x86_mkRegRegMoveInstr src dst
 692  = case targetClassOfReg src of
 693 #if   i386_TARGET_ARCH
 694         RcInteger -> MOV II32 (OpReg src) (OpReg dst)
 695 #else
 696         RcInteger -> MOV II64 (OpReg src) (OpReg dst)
 697 #endif
 698         RcDouble    -> GMOV src dst
 699         RcDoubleSSE -> MOV FF64 (OpReg src) (OpReg dst)
 700         _     -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
 701
 702 -- | Check whether an instruction represents a reg-reg move.
 703 --      The register allocator attempts to eliminate reg->reg moves whenever it can,
 704 --      by assigning the src and dest temporaries to the same real register.
 705 --
 706 x86_takeRegRegMoveInstr
 707         :: Instr
 708         -> Maybe (Reg,Reg)
 709
 710 x86_takeRegRegMoveInstr (MOV _ (OpReg r1) (OpReg r2))
 711         = Just (r1,r2)
 712
 713 x86_takeRegRegMoveInstr _  = Nothing
 714
 715
 716 -- | Make an unconditional branch instruction.
 717 x86_mkJumpInstr
 718         :: BlockId
 719         -> [Instr]
 720
 721 x86_mkJumpInstr id
 722         = [JXX ALWAYS id]
 723
 724
 725
 726
 727
 728 i386_insert_ffrees
 729         :: [GenBasicBlock Instr]
 730         -> [GenBasicBlock Instr]
 731
 732 i386_insert_ffrees blocks
 733    | or (map (any is_G_instr) [ instrs | BasicBlock _ instrs <- blocks ])
 734    = map ffree_before_nonlocal_transfers blocks
 735
 736    | otherwise
 737    = blocks
 738   where
 739    ffree_before_nonlocal_transfers (BasicBlock id insns)
 740      = BasicBlock id (foldr p [] insns)
 741      where p insn r = case insn of
 742                         CALL _ _ -> GFREE : insn : r
 743                         JMP _    -> GFREE : insn : r
 744                         JXX_GBL _ _ -> GFREE : insn : r
 745                         _        -> insn : r
 746
 747 -- if you ever add a new FP insn to the fake x86 FP insn set,
 748 -- you must update this too
 749 is_G_instr :: Instr -> Bool
 750 is_G_instr instr
 751    = case instr of
 752         GMOV{}          -> True
 753         GLD{}           -> True
 754         GST{}           -> True
 755         GLDZ{}          -> True
 756         GLD1{}          -> True
 757         GFTOI{}         -> True
 758         GDTOI{}         -> True
 759         GITOF{}         -> True
 760         GITOD{}         -> True
 761         GDTOF{}         -> True
 762         GADD{}          -> True
 763         GDIV{}          -> True
 764         GSUB{}          -> True
 765         GMUL{}          -> True
 766         GCMP{}          -> True
 767         GABS{}          -> True
 768         GNEG{}          -> True
 769         GSQRT{}         -> True
 770         GSIN{}          -> True
 771         GCOS{}          -> True
 772         GTAN{}          -> True
 773         GFREE           -> panic "is_G_instr: GFREE (!)"
 774         _               -> False
 775
 776
 777 data JumpDest = DestBlockId BlockId | DestImm Imm
 778
 779
 780 canShortcut :: Instr -> Maybe JumpDest
 781 canShortcut (JXX ALWAYS id)    = Just (DestBlockId id)
 782 canShortcut (JMP (OpImm imm))  = Just (DestImm imm)
 783 canShortcut _                  = Nothing
 784
 785
 786 -- This helper shortcuts a sequence of branches.
 787 -- The blockset helps avoid following cycles.
 788 shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
 789 shortcutJump fn insn = shortcutJump' fn (setEmpty :: BlockSet) insn
 790   where shortcutJump' fn seen insn@(JXX cc id) =
 791           if setMember id seen then insn
 792           else case fn id of
 793                  Nothing                -> insn
 794                  Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id')
 795                  Just (DestImm imm)     -> shortcutJump' fn seen' (JXX_GBL cc imm)
 796                where seen' = setInsert id seen
 797         shortcutJump' _ _ other = other
 798
 799 -- Here because it knows about JumpDest
 800 shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
 801 shortcutStatic fn (CmmStaticLit (CmmLabel lab))
 802   | Just uq <- maybeAsmTemp lab
 803   = CmmStaticLit (CmmLabel (shortBlockId fn emptyUniqSet (mkBlockId uq)))
 804 shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
 805   | Just uq <- maybeAsmTemp lbl1
 806   = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn emptyUniqSet (mkBlockId uq)) lbl2 off)
 807         -- slightly dodgy, we're ignoring the second label, but this
 808         -- works with the way we use CmmLabelDiffOff for jump tables now.
 809
 810 shortcutStatic _ other_static
 811         = other_static
 812
 813 shortBlockId
 814         :: (BlockId -> Maybe JumpDest)
 815         -> UniqSet Unique
 816         -> BlockId
 817         -> CLabel
 818
 819 shortBlockId fn seen blockid =
 820   case (elementOfUniqSet uq seen, fn blockid) of
 821     (True, _)    -> mkAsmTempLabel uq
 822     (_, Nothing) -> mkAsmTempLabel uq
 823     (_, Just (DestBlockId blockid'))  -> shortBlockId fn (addOneToUniqSet seen uq) blockid'
 824     (_, Just (DestImm (ImmCLbl lbl))) -> lbl
 825     (_, _other) -> panic "shortBlockId"
 826   where uq = getUnique blockid