compiler/nativeGen/X86/Instr.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Machine-dependent assembly language
   4 --
   5 -- (c) The University of Glasgow 1993-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 #include "HsVersions.h"
  10 #include "nativeGen/NCG.h"
  11
  12 module X86.Instr
  13 where
  14
  15 import X86.Cond
  16 import X86.Regs
  17 import Instruction
  18 import Size
  19 import RegClass
  20 import Reg
  21 import TargetReg
  22
  23 import BlockId
  24 import Cmm
  25 import FastString
  26 import FastBool
  27 import Outputable
  28 import Constants        (rESERVED_C_STACK_BYTES)
  29
  30 import CLabel
  31 import UniqSet
  32 import Unique
  33
  34 -- Size of a PPC memory address, in bytes.
  35 --
  36 archWordSize :: Size
  37 #if i386_TARGET_ARCH
  38 archWordSize    = II32
  39 #elif x86_64_TARGET_ARCH
  40 archWordSize    = II64
  41 #else
  42 archWordSize    = panic "X86.Instr.archWordSize: not defined"
  43 #endif
  44
  45 -- | Instruction instance for x86 instruction set.
  46 instance Instruction Instr where
  47         regUsageOfInstr         = x86_regUsageOfInstr
  48         patchRegsOfInstr        = x86_patchRegsOfInstr
  49         isJumpishInstr          = x86_isJumpishInstr
  50         jumpDestsOfInstr        = x86_jumpDestsOfInstr
  51         patchJumpInstr          = x86_patchJumpInstr
  52         mkSpillInstr            = x86_mkSpillInstr
  53         mkLoadInstr             = x86_mkLoadInstr
  54         takeDeltaInstr          = x86_takeDeltaInstr
  55         isMetaInstr             = x86_isMetaInstr
  56         mkRegRegMoveInstr       = x86_mkRegRegMoveInstr
  57         takeRegRegMoveInstr     = x86_takeRegRegMoveInstr
  58         mkJumpInstr             = x86_mkJumpInstr
  59
  60
  61 -- -----------------------------------------------------------------------------
  62 -- Intel x86 instructions
  63
  64 {-
  65 Intel, in their infinite wisdom, selected a stack model for floating
  66 point registers on x86.  That might have made sense back in 1979 --
  67 nowadays we can see it for the nonsense it really is.  A stack model
  68 fits poorly with the existing nativeGen infrastructure, which assumes
  69 flat integer and FP register sets.  Prior to this commit, nativeGen
  70 could not generate correct x86 FP code -- to do so would have meant
  71 somehow working the register-stack paradigm into the register
  72 allocator and spiller, which sounds very difficult.
  73
  74 We have decided to cheat, and go for a simple fix which requires no
  75 infrastructure modifications, at the expense of generating ropey but
  76 correct FP code.  All notions of the x86 FP stack and its insns have
  77 been removed.  Instead, we pretend (to the instruction selector and
  78 register allocator) that x86 has six floating point registers, %fake0
  79 .. %fake5, which can be used in the usual flat manner.  We further
  80 claim that x86 has floating point instructions very similar to SPARC
  81 and Alpha, that is, a simple 3-operand register-register arrangement.
  82 Code generation and register allocation proceed on this basis.
  83
  84 When we come to print out the final assembly, our convenient fiction
  85 is converted to dismal reality.  Each fake instruction is
  86 independently converted to a series of real x86 instructions.
  87 %fake0 .. %fake5 are mapped to %st(0) .. %st(5).  To do reg-reg
  88 arithmetic operations, the two operands are pushed onto the top of the
  89 FP stack, the operation done, and the result copied back into the
  90 relevant register.  There are only six %fake registers because 2 are
  91 needed for the translation, and x86 has 8 in total.
  92
  93 The translation is inefficient but is simple and it works.  A cleverer
  94 translation would handle a sequence of insns, simulating the FP stack
  95 contents, would not impose a fixed mapping from %fake to %st regs, and
  96 hopefully could avoid most of the redundant reg-reg moves of the
  97 current translation.
  98
  99 We might as well make use of whatever unique FP facilities Intel have
 100 chosen to bless us with (let's not be churlish, after all).
 101 Hence GLDZ and GLD1.  Bwahahahahahahaha!
 102 -}
 103
 104 {-
 105 MORE FLOATING POINT MUSINGS...
 106
 107 Intel's internal floating point registers are by default 80 bit
 108 extended precision.  This means that all operations done on values in
 109 registers are done at 80 bits, and unless the intermediate values are
 110 truncated to the appropriate size (32 or 64 bits) by storing in
 111 memory, calculations in registers will give different results from
 112 calculations which pass intermediate values in memory (eg. via
 113 function calls).
 114
 115 One solution is to set the FPU into 64 bit precision mode.  Some OSs
 116 do this (eg. FreeBSD) and some don't (eg. Linux).  The problem here is
 117 that this will only affect 64-bit precision arithmetic; 32-bit
 118 calculations will still be done at 64-bit precision in registers.  So
 119 it doesn't solve the whole problem.
 120
 121 There's also the issue of what the C library is expecting in terms of
 122 precision.  It seems to be the case that glibc on Linux expects the
 123 FPU to be set to 80 bit precision, so setting it to 64 bit could have
 124 unexpected effects.  Changing the default could have undesirable
 125 effects on other 3rd-party library code too, so the right thing would
 126 be to save/restore the FPU control word across Haskell code if we were
 127 to do this.
 128
 129 gcc's -ffloat-store gives consistent results by always storing the
 130 results of floating-point calculations in memory, which works for both
 131 32 and 64-bit precision.  However, it only affects the values of
 132 user-declared floating point variables in C, not intermediate results.
 133 GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
 134 flag).
 135
 136 Another problem is how to spill floating point registers in the
 137 register allocator.  Should we spill the whole 80 bits, or just 64?
 138 On an OS which is set to 64 bit precision, spilling 64 is fine.  On
 139 Linux, spilling 64 bits will round the results of some operations.
 140 This is what gcc does.  Spilling at 80 bits requires taking up a full
 141 128 bit slot (so we get alignment).  We spill at 80-bits and ignore
 142 the alignment problems.
 143
 144 In the future, we'll use the SSE registers for floating point.  This
 145 requires a CPU that supports SSE2 (ordinary SSE only supports 32 bit
 146 precision float ops), which means P4 or Xeon and above.  Using SSE
 147 will solve all these problems, because the SSE registers use fixed 32
 148 bit or 64 bit precision.
 149
 150 --SDM 1/2003
 151 -}
 152
 153
 154 data Instr
 155         -- comment pseudo-op
 156         = COMMENT FastString
 157
 158         -- some static data spat out during code
 159         -- generation.  Will be extracted before
 160         -- pretty-printing.
 161         | LDATA   Section [CmmStatic]
 162
 163         -- start a new basic block.  Useful during
 164         -- codegen, removed later.  Preceding
 165         -- instruction should be a jump, as per the
 166         -- invariants for a BasicBlock (see Cmm).
 167         | NEWBLOCK BlockId
 168
 169         -- specify current stack offset for
 170         -- benefit of subsequent passes
 171         | DELTA   Int
 172
 173         -- Moves.
 174         | MOV         Size Operand Operand
 175         | MOVZxL      Size Operand Operand -- size is the size of operand 1
 176         | MOVSxL      Size Operand Operand -- size is the size of operand 1
 177         -- x86_64 note: plain mov into a 32-bit register always zero-extends
 178         -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
 179         -- don't affect the high bits of the register.
 180
 181         -- Load effective address (also a very useful three-operand add instruction :-)
 182         | LEA         Size Operand Operand
 183
 184         -- Int Arithmetic.
 185         | ADD         Size Operand Operand
 186         | ADC         Size Operand Operand
 187         | SUB         Size Operand Operand
 188
 189         | MUL         Size Operand Operand
 190         | IMUL        Size Operand Operand      -- signed int mul
 191         | IMUL2       Size Operand              -- %edx:%eax = operand * %eax
 192
 193         | DIV         Size Operand              -- eax := eax:edx/op, edx := eax:edx%op
 194         | IDIV        Size Operand              -- ditto, but signed
 195
 196         -- Simple bit-twiddling.
 197         | AND         Size Operand Operand
 198         | OR          Size Operand Operand
 199         | XOR         Size Operand Operand
 200         | NOT         Size Operand
 201         | NEGI        Size Operand              -- NEG instruction (name clash with Cond)
 202
 203         -- Shifts (amount may be immediate or %cl only)
 204         | SHL         Size Operand{-amount-} Operand
 205         | SAR         Size Operand{-amount-} Operand
 206         | SHR         Size Operand{-amount-} Operand
 207
 208         | BT          Size Imm Operand
 209         | NOP
 210
 211         -- x86 Float Arithmetic.
 212         -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
 213         -- as single instructions right up until we spit them out.
 214         -- all the 3-operand fake fp insns are src1 src2 dst
 215         -- and furthermore are constrained to be fp regs only.
 216         -- IMPORTANT: keep is_G_insn up to date with any changes here
 217         | GMOV        Reg Reg -- src(fpreg), dst(fpreg)
 218         | GLD         Size AddrMode Reg -- src, dst(fpreg)
 219         | GST         Size Reg AddrMode -- src(fpreg), dst
 220
 221         | GLDZ        Reg -- dst(fpreg)
 222         | GLD1        Reg -- dst(fpreg)
 223
 224         | GFTOI       Reg Reg -- src(fpreg), dst(intreg)
 225         | GDTOI       Reg Reg -- src(fpreg), dst(intreg)
 226
 227         | GITOF       Reg Reg -- src(intreg), dst(fpreg)
 228         | GITOD       Reg Reg -- src(intreg), dst(fpreg)
 229
 230         | GADD        Size Reg Reg Reg -- src1, src2, dst
 231         | GDIV        Size Reg Reg Reg -- src1, src2, dst
 232         | GSUB        Size Reg Reg Reg -- src1, src2, dst
 233         | GMUL        Size Reg Reg Reg -- src1, src2, dst
 234
 235                 -- FP compare.  Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT]
 236                 -- Compare src1 with src2; set the Zero flag iff the numbers are
 237                 -- comparable and the comparison is True.  Subsequent code must
 238                 -- test the %eflags zero flag regardless of the supplied Cond.
 239         | GCMP        Cond Reg Reg -- src1, src2
 240
 241         | GABS        Size Reg Reg -- src, dst
 242         | GNEG        Size Reg Reg -- src, dst
 243         | GSQRT       Size Reg Reg -- src, dst
 244         | GSIN        Size CLabel CLabel Reg Reg -- src, dst
 245         | GCOS        Size CLabel CLabel Reg Reg -- src, dst
 246         | GTAN        Size CLabel CLabel Reg Reg -- src, dst
 247
 248         | GFREE         -- do ffree on all x86 regs; an ugly hack
 249
 250
 251         -- SSE2 floating point: we use a restricted set of the available SSE2
 252         -- instructions for floating-point.
 253         -- use MOV for moving (either movss or movsd (movlpd better?))
 254         | CVTSS2SD      Reg Reg         -- F32 to F64
 255         | CVTSD2SS      Reg Reg         -- F64 to F32
 256         | CVTTSS2SIQ    Size Operand Reg -- F32 to I32/I64 (with truncation)
 257         | CVTTSD2SIQ    Size Operand Reg -- F64 to I32/I64 (with truncation)
 258         | CVTSI2SS      Size Operand Reg -- I32/I64 to F32
 259         | CVTSI2SD      Size Operand Reg -- I32/I64 to F64
 260
 261         -- use ADD & SUB for arithmetic.  In both cases, operands
 262         -- are  Operand Reg.
 263
 264         -- SSE2 floating-point division:
 265         | FDIV          Size Operand Operand   -- divisor, dividend(dst)
 266
 267         -- use CMP for comparisons.  ucomiss and ucomisd instructions
 268         -- compare single/double prec floating point respectively.
 269
 270         | SQRT          Size Operand Reg        -- src, dst
 271
 272
 273         -- Comparison
 274         | TEST          Size Operand Operand
 275         | CMP           Size Operand Operand
 276         | SETCC         Cond Operand
 277
 278         -- Stack Operations.
 279         | PUSH          Size Operand
 280         | POP           Size Operand
 281         -- both unused (SDM):
 282         --  | PUSHA
 283         --  | POPA
 284
 285         -- Jumping around.
 286         | JMP         Operand
 287         | JXX         Cond BlockId  -- includes unconditional branches
 288         | JXX_GBL     Cond Imm      -- non-local version of JXX
 289         | JMP_TBL     Operand [BlockId]  -- table jump
 290         | CALL        (Either Imm Reg) [Reg]
 291
 292         -- Other things.
 293         | CLTD Size              -- sign extend %eax into %edx:%eax
 294
 295         | FETCHGOT    Reg        -- pseudo-insn for ELF position-independent code
 296                                  -- pretty-prints as
 297                                  --       call 1f
 298                                  -- 1:    popl %reg
 299                                  --       addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
 300         | FETCHPC     Reg        -- pseudo-insn for Darwin position-independent code
 301                                  -- pretty-prints as
 302                                  --       call 1f
 303                                  -- 1:    popl %reg
 304
 305
 306 data Operand
 307         = OpReg  Reg            -- register
 308         | OpImm  Imm            -- immediate value
 309         | OpAddr AddrMode       -- memory reference
 310
 311
 312
 313 x86_regUsageOfInstr :: Instr -> RegUsage
 314 x86_regUsageOfInstr instr
 315  = case instr of
 316     MOV    _ src dst    -> usageRW src dst
 317     MOVZxL _ src dst    -> usageRW src dst
 318     MOVSxL _ src dst    -> usageRW src dst
 319     LEA    _ src dst    -> usageRW src dst
 320     ADD    _ src dst    -> usageRM src dst
 321     ADC    _ src dst    -> usageRM src dst
 322     SUB    _ src dst    -> usageRM src dst
 323     IMUL   _ src dst    -> usageRM src dst
 324     IMUL2  _ src       -> mkRU (eax:use_R src) [eax,edx]
 325     MUL    _ src dst    -> usageRM src dst
 326     DIV    _ op -> mkRU (eax:edx:use_R op) [eax,edx]
 327     IDIV   _ op -> mkRU (eax:edx:use_R op) [eax,edx]
 328     AND    _ src dst    -> usageRM src dst
 329     OR     _ src dst    -> usageRM src dst
 330
 331     XOR    _ (OpReg src) (OpReg dst)
 332         | src == dst    -> mkRU [] [dst]
 333
 334     XOR    _ src dst    -> usageRM src dst
 335     NOT    _ op         -> usageM op
 336     NEGI   _ op         -> usageM op
 337     SHL    _ imm dst    -> usageRM imm dst
 338     SAR    _ imm dst    -> usageRM imm dst
 339     SHR    _ imm dst    -> usageRM imm dst
 340     BT     _ _   src    -> mkRUR (use_R src)
 341
 342     PUSH   _ op         -> mkRUR (use_R op)
 343     POP    _ op         -> mkRU [] (def_W op)
 344     TEST   _ src dst    -> mkRUR (use_R src ++ use_R dst)
 345     CMP    _ src dst    -> mkRUR (use_R src ++ use_R dst)
 346     SETCC  _ op         -> mkRU [] (def_W op)
 347     JXX    _ _          -> mkRU [] []
 348     JXX_GBL _ _         -> mkRU [] []
 349     JMP     op          -> mkRUR (use_R op)
 350     JMP_TBL op _        -> mkRUR (use_R op)
 351     CALL (Left _)  params   -> mkRU params callClobberedRegs
 352     CALL (Right reg) params -> mkRU (reg:params) callClobberedRegs
 353     CLTD   _            -> mkRU [eax] [edx]
 354     NOP                 -> mkRU [] []
 355
 356     GMOV   src dst      -> mkRU [src] [dst]
 357     GLD    _ src dst    -> mkRU (use_EA src) [dst]
 358     GST    _ src dst    -> mkRUR (src : use_EA dst)
 359
 360     GLDZ   dst          -> mkRU [] [dst]
 361     GLD1   dst          -> mkRU [] [dst]
 362
 363     GFTOI  src dst      -> mkRU [src] [dst]
 364     GDTOI  src dst      -> mkRU [src] [dst]
 365
 366     GITOF  src dst      -> mkRU [src] [dst]
 367     GITOD  src dst      -> mkRU [src] [dst]
 368
 369     GADD   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 370     GSUB   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 371     GMUL   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 372     GDIV   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 373
 374     GCMP   _ src1 src2   -> mkRUR [src1,src2]
 375     GABS   _ src dst     -> mkRU [src] [dst]
 376     GNEG   _ src dst     -> mkRU [src] [dst]
 377     GSQRT  _ src dst     -> mkRU [src] [dst]
 378     GSIN   _ _ _ src dst -> mkRU [src] [dst]
 379     GCOS   _ _ _ src dst -> mkRU [src] [dst]
 380     GTAN   _ _ _ src dst -> mkRU [src] [dst]
 381
 382     CVTSS2SD   src dst  -> mkRU [src] [dst]
 383     CVTSD2SS   src dst  -> mkRU [src] [dst]
 384     CVTTSS2SIQ _ src dst -> mkRU (use_R src) [dst]
 385     CVTTSD2SIQ _ src dst -> mkRU (use_R src) [dst]
 386     CVTSI2SS   _ src dst -> mkRU (use_R src) [dst]
 387     CVTSI2SD   _ src dst -> mkRU (use_R src) [dst]
 388     FDIV _     src dst  -> usageRM src dst
 389
 390     FETCHGOT reg        -> mkRU [] [reg]
 391     FETCHPC  reg        -> mkRU [] [reg]
 392
 393     COMMENT _           -> noUsage
 394     DELTA   _           -> noUsage
 395
 396     _other              -> panic "regUsage: unrecognised instr"
 397
 398  where
 399     -- 2 operand form; first operand Read; second Written
 400     usageRW :: Operand -> Operand -> RegUsage
 401     usageRW op (OpReg reg)      = mkRU (use_R op) [reg]
 402     usageRW op (OpAddr ea)      = mkRUR (use_R op ++ use_EA ea)
 403     usageRW _ _                 = panic "X86.RegInfo.usageRW: no match"
 404
 405     -- 2 operand form; first operand Read; second Modified
 406     usageRM :: Operand -> Operand -> RegUsage
 407     usageRM op (OpReg reg)      = mkRU (use_R op ++ [reg]) [reg]
 408     usageRM op (OpAddr ea)      = mkRUR (use_R op ++ use_EA ea)
 409     usageRM _ _                 = panic "X86.RegInfo.usageRM: no match"
 410
 411     -- 1 operand form; operand Modified
 412     usageM :: Operand -> RegUsage
 413     usageM (OpReg reg)          = mkRU [reg] [reg]
 414     usageM (OpAddr ea)          = mkRUR (use_EA ea)
 415     usageM _                    = panic "X86.RegInfo.usageM: no match"
 416
 417     -- Registers defd when an operand is written.
 418     def_W (OpReg reg)           = [reg]
 419     def_W (OpAddr _ )           = []
 420     def_W _                     = panic "X86.RegInfo.def_W: no match"
 421
 422     -- Registers used when an operand is read.
 423     use_R (OpReg reg)  = [reg]
 424     use_R (OpImm _)    = []
 425     use_R (OpAddr ea)  = use_EA ea
 426
 427     -- Registers used to compute an effective address.
 428     use_EA (ImmAddr _ _) = []
 429     use_EA (AddrBaseIndex base index _) =
 430         use_base base $! use_index index
 431         where use_base (EABaseReg r) x = r : x
 432               use_base _ x             = x
 433               use_index EAIndexNone   = []
 434               use_index (EAIndex i _) = [i]
 435
 436     mkRUR src = src' `seq` RU src' []
 437         where src' = filter interesting src
 438
 439     mkRU src dst = src' `seq` dst' `seq` RU src' dst'
 440         where src' = filter interesting src
 441               dst' = filter interesting dst
 442
 443 interesting :: Reg -> Bool
 444 interesting (RegVirtual _)              = True
 445 interesting (RegReal (RealRegSingle i)) = isFastTrue (freeReg i)
 446 interesting (RegReal (RealRegPair{}))   = panic "X86.interesting: no reg pairs on this arch"
 447
 448
 449
 450 x86_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
 451 x86_patchRegsOfInstr instr env
 452  = case instr of
 453     MOV  sz src dst     -> patch2 (MOV  sz) src dst
 454     MOVZxL sz src dst   -> patch2 (MOVZxL sz) src dst
 455     MOVSxL sz src dst   -> patch2 (MOVSxL sz) src dst
 456     LEA  sz src dst     -> patch2 (LEA  sz) src dst
 457     ADD  sz src dst     -> patch2 (ADD  sz) src dst
 458     ADC  sz src dst     -> patch2 (ADC  sz) src dst
 459     SUB  sz src dst     -> patch2 (SUB  sz) src dst
 460     IMUL sz src dst     -> patch2 (IMUL sz) src dst
 461     IMUL2 sz src        -> patch1 (IMUL2 sz) src
 462     MUL sz src dst      -> patch2 (MUL sz) src dst
 463     IDIV sz op          -> patch1 (IDIV sz) op
 464     DIV sz op           -> patch1 (DIV sz) op
 465     AND  sz src dst     -> patch2 (AND  sz) src dst
 466     OR   sz src dst     -> patch2 (OR   sz) src dst
 467     XOR  sz src dst     -> patch2 (XOR  sz) src dst
 468     NOT  sz op          -> patch1 (NOT  sz) op
 469     NEGI sz op          -> patch1 (NEGI sz) op
 470     SHL  sz imm dst     -> patch1 (SHL sz imm) dst
 471     SAR  sz imm dst     -> patch1 (SAR sz imm) dst
 472     SHR  sz imm dst     -> patch1 (SHR sz imm) dst
 473     BT   sz imm src     -> patch1 (BT  sz imm) src
 474     TEST sz src dst     -> patch2 (TEST sz) src dst
 475     CMP  sz src dst     -> patch2 (CMP  sz) src dst
 476     PUSH sz op          -> patch1 (PUSH sz) op
 477     POP  sz op          -> patch1 (POP  sz) op
 478     SETCC cond op       -> patch1 (SETCC cond) op
 479     JMP op              -> patch1 JMP op
 480     JMP_TBL op ids      -> patch1 JMP_TBL op $ ids
 481
 482     GMOV src dst        -> GMOV (env src) (env dst)
 483     GLD  sz src dst     -> GLD sz (lookupAddr src) (env dst)
 484     GST  sz src dst     -> GST sz (env src) (lookupAddr dst)
 485
 486     GLDZ dst            -> GLDZ (env dst)
 487     GLD1 dst            -> GLD1 (env dst)
 488
 489     GFTOI src dst       -> GFTOI (env src) (env dst)
 490     GDTOI src dst       -> GDTOI (env src) (env dst)
 491
 492     GITOF src dst       -> GITOF (env src) (env dst)
 493     GITOD src dst       -> GITOD (env src) (env dst)
 494
 495     GADD sz s1 s2 dst   -> GADD sz (env s1) (env s2) (env dst)
 496     GSUB sz s1 s2 dst   -> GSUB sz (env s1) (env s2) (env dst)
 497     GMUL sz s1 s2 dst   -> GMUL sz (env s1) (env s2) (env dst)
 498     GDIV sz s1 s2 dst   -> GDIV sz (env s1) (env s2) (env dst)
 499
 500     GCMP sz src1 src2   -> GCMP sz (env src1) (env src2)
 501     GABS sz src dst     -> GABS sz (env src) (env dst)
 502     GNEG sz src dst     -> GNEG sz (env src) (env dst)
 503     GSQRT sz src dst    -> GSQRT sz (env src) (env dst)
 504     GSIN sz l1 l2 src dst       -> GSIN sz l1 l2 (env src) (env dst)
 505     GCOS sz l1 l2 src dst       -> GCOS sz l1 l2 (env src) (env dst)
 506     GTAN sz l1 l2 src dst       -> GTAN sz l1 l2 (env src) (env dst)
 507
 508     CVTSS2SD src dst    -> CVTSS2SD (env src) (env dst)
 509     CVTSD2SS src dst    -> CVTSD2SS (env src) (env dst)
 510     CVTTSS2SIQ sz src dst -> CVTTSS2SIQ sz (patchOp src) (env dst)
 511     CVTTSD2SIQ sz src dst -> CVTTSD2SIQ sz (patchOp src) (env dst)
 512     CVTSI2SS sz src dst -> CVTSI2SS sz (patchOp src) (env dst)
 513     CVTSI2SD sz src dst -> CVTSI2SD sz (patchOp src) (env dst)
 514     FDIV sz src dst     -> FDIV sz (patchOp src) (patchOp dst)
 515
 516     CALL (Left _)  _    -> instr
 517     CALL (Right reg) p  -> CALL (Right (env reg)) p
 518
 519     FETCHGOT reg        -> FETCHGOT (env reg)
 520     FETCHPC  reg        -> FETCHPC  (env reg)
 521
 522     NOP                 -> instr
 523     COMMENT _           -> instr
 524     DELTA _             -> instr
 525
 526     JXX _ _             -> instr
 527     JXX_GBL _ _         -> instr
 528     CLTD _              -> instr
 529
 530     _other              -> panic "patchRegs: unrecognised instr"
 531
 532   where
 533     patch1 :: (Operand -> a) -> Operand -> a
 534     patch1 insn op      = insn $! patchOp op
 535     patch2 :: (Operand -> Operand -> a) -> Operand -> Operand -> a
 536     patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
 537
 538     patchOp (OpReg  reg) = OpReg $! env reg
 539     patchOp (OpImm  imm) = OpImm imm
 540     patchOp (OpAddr ea)  = OpAddr $! lookupAddr ea
 541
 542     lookupAddr (ImmAddr imm off) = ImmAddr imm off
 543     lookupAddr (AddrBaseIndex base index disp)
 544       = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
 545       where
 546         lookupBase EABaseNone       = EABaseNone
 547         lookupBase EABaseRip        = EABaseRip
 548         lookupBase (EABaseReg r)    = EABaseReg (env r)
 549
 550         lookupIndex EAIndexNone     = EAIndexNone
 551         lookupIndex (EAIndex r i)   = EAIndex (env r) i
 552
 553
 554 --------------------------------------------------------------------------------
 555 x86_isJumpishInstr
 556         :: Instr -> Bool
 557
 558 x86_isJumpishInstr instr
 559  = case instr of
 560         JMP{}           -> True
 561         JXX{}           -> True
 562         JXX_GBL{}       -> True
 563         JMP_TBL{}       -> True
 564         CALL{}          -> True
 565         _               -> False
 566
 567
 568 x86_jumpDestsOfInstr
 569         :: Instr
 570         -> [BlockId]
 571
 572 x86_jumpDestsOfInstr insn
 573   = case insn of
 574         JXX _ id        -> [id]
 575         JMP_TBL _ ids   -> ids
 576         _               -> []
 577
 578
 579 x86_patchJumpInstr
 580         :: Instr -> (BlockId -> BlockId) -> Instr
 581
 582 x86_patchJumpInstr insn patchF
 583   = case insn of
 584         JXX cc id       -> JXX cc (patchF id)
 585         JMP_TBL _ _     -> error "Cannot patch JMP_TBL"
 586         _               -> insn
 587
 588
 589
 590
 591 -- -----------------------------------------------------------------------------
 592 -- | Make a spill instruction.
 593 x86_mkSpillInstr
 594         :: Reg          -- register to spill
 595         -> Int          -- current stack delta
 596         -> Int          -- spill slot to use
 597         -> Instr
 598
 599 x86_mkSpillInstr reg delta slot
 600   = let off     = spillSlotToOffset slot
 601     in
 602     let off_w = (off-delta) `div` IF_ARCH_i386(4,8)
 603     in case targetClassOfReg reg of
 604            RcInteger   -> MOV IF_ARCH_i386(II32,II64)
 605                               (OpReg reg) (OpAddr (spRel off_w))
 606            RcDouble    -> GST FF80 reg (spRel off_w) {- RcFloat/RcDouble -}
 607            RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel off_w))
 608            _         -> panic "X86.mkSpillInstr: no match"
 609
 610
 611 -- | Make a spill reload instruction.
 612 x86_mkLoadInstr
 613         :: Reg          -- register to load
 614         -> Int          -- current stack delta
 615         -> Int          -- spill slot to use
 616         -> Instr
 617
 618 x86_mkLoadInstr reg delta slot
 619   = let off     = spillSlotToOffset slot
 620     in
 621         let off_w = (off-delta) `div` IF_ARCH_i386(4,8)
 622         in case targetClassOfReg reg of
 623               RcInteger -> MOV IF_ARCH_i386(II32,II64)
 624                                (OpAddr (spRel off_w)) (OpReg reg)
 625               RcDouble  -> GLD FF80 (spRel off_w) reg {- RcFloat/RcDouble -}
 626               RcDoubleSSE -> MOV FF64 (OpAddr (spRel off_w)) (OpReg reg)
 627               _           -> panic "X86.x86_mkLoadInstr"
 628
 629 spillSlotSize :: Int
 630 spillSlotSize = IF_ARCH_i386(12, 8)
 631
 632 maxSpillSlots :: Int
 633 maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1
 634
 635 -- convert a spill slot number to a *byte* offset, with no sign:
 636 -- decide on a per arch basis whether you are spilling above or below
 637 -- the C stack pointer.
 638 spillSlotToOffset :: Int -> Int
 639 spillSlotToOffset slot
 640    | slot >= 0 && slot < maxSpillSlots
 641    = 64 + spillSlotSize * slot
 642    | otherwise
 643    = pprPanic "spillSlotToOffset:"
 644               (   text "invalid spill location: " <> int slot
 645               $$  text "maxSpillSlots:          " <> int maxSpillSlots)
 646
 647 --------------------------------------------------------------------------------
 648
 649 -- | See if this instruction is telling us the current C stack delta
 650 x86_takeDeltaInstr
 651         :: Instr
 652         -> Maybe Int
 653
 654 x86_takeDeltaInstr instr
 655  = case instr of
 656         DELTA i         -> Just i
 657         _               -> Nothing
 658
 659
 660 x86_isMetaInstr
 661         :: Instr
 662         -> Bool
 663
 664 x86_isMetaInstr instr
 665  = case instr of
 666         COMMENT{}       -> True
 667         LDATA{}         -> True
 668         NEWBLOCK{}      -> True
 669         DELTA{}         -> True
 670         _               -> False
 671
 672
 673
 674 -- | Make a reg-reg move instruction.
 675 --      On SPARC v8 there are no instructions to move directly between
 676 --      floating point and integer regs. If we need to do that then we
 677 --      have to go via memory.
 678 --
 679 x86_mkRegRegMoveInstr
 680         :: Reg
 681         -> Reg
 682         -> Instr
 683
 684 x86_mkRegRegMoveInstr src dst
 685  = case targetClassOfReg src of
 686 #if   i386_TARGET_ARCH
 687         RcInteger -> MOV II32 (OpReg src) (OpReg dst)
 688 #else
 689         RcInteger -> MOV II64 (OpReg src) (OpReg dst)
 690 #endif
 691         RcDouble    -> GMOV src dst
 692         RcDoubleSSE -> MOV FF64 (OpReg src) (OpReg dst)
 693         _     -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
 694
 695 -- | Check whether an instruction represents a reg-reg move.
 696 --      The register allocator attempts to eliminate reg->reg moves whenever it can,
 697 --      by assigning the src and dest temporaries to the same real register.
 698 --
 699 x86_takeRegRegMoveInstr
 700         :: Instr
 701         -> Maybe (Reg,Reg)
 702
 703 x86_takeRegRegMoveInstr (MOV _ (OpReg r1) (OpReg r2))
 704         = Just (r1,r2)
 705
 706 x86_takeRegRegMoveInstr _  = Nothing
 707
 708
 709 -- | Make an unconditional branch instruction.
 710 x86_mkJumpInstr
 711         :: BlockId
 712         -> [Instr]
 713
 714 x86_mkJumpInstr id
 715         = [JXX ALWAYS id]
 716
 717
 718
 719
 720
 721 i386_insert_ffrees
 722         :: [GenBasicBlock Instr]
 723         -> [GenBasicBlock Instr]
 724
 725 i386_insert_ffrees blocks
 726    | or (map (any is_G_instr) [ instrs | BasicBlock _ instrs <- blocks ])
 727    = map ffree_before_nonlocal_transfers blocks
 728
 729    | otherwise
 730    = blocks
 731   where
 732    ffree_before_nonlocal_transfers (BasicBlock id insns)
 733      = BasicBlock id (foldr p [] insns)
 734      where p insn r = case insn of
 735                         CALL _ _ -> GFREE : insn : r
 736                         JMP _    -> GFREE : insn : r
 737                         _        -> insn : r
 738
 739 -- if you ever add a new FP insn to the fake x86 FP insn set,
 740 -- you must update this too
 741 is_G_instr :: Instr -> Bool
 742 is_G_instr instr
 743    = case instr of
 744         GMOV{}          -> True
 745         GLD{}           -> True
 746         GST{}           -> True
 747         GLDZ{}          -> True
 748         GLD1{}          -> True
 749         GFTOI{}         -> True
 750         GDTOI{}         -> True
 751         GITOF{}         -> True
 752         GITOD{}         -> True
 753         GADD{}          -> True
 754         GDIV{}          -> True
 755         GSUB{}          -> True
 756         GMUL{}          -> True
 757         GCMP{}          -> True
 758         GABS{}          -> True
 759         GNEG{}          -> True
 760         GSQRT{}         -> True
 761         GSIN{}          -> True
 762         GCOS{}          -> True
 763         GTAN{}          -> True
 764         GFREE           -> panic "is_G_instr: GFREE (!)"
 765         _               -> False
 766
 767
 768 data JumpDest = DestBlockId BlockId | DestImm Imm
 769
 770
 771 canShortcut :: Instr -> Maybe JumpDest
 772 canShortcut (JXX ALWAYS id)    = Just (DestBlockId id)
 773 canShortcut (JMP (OpImm imm))  = Just (DestImm imm)
 774 canShortcut _                  = Nothing
 775
 776
 777 -- This helper shortcuts a sequence of branches.
 778 -- The blockset helps avoid following cycles.
 779 shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
 780 shortcutJump fn insn = shortcutJump' fn emptyBlockSet insn
 781   where shortcutJump' fn seen insn@(JXX cc id) =
 782           if elemBlockSet id seen then insn
 783           else case fn id of
 784                  Nothing                -> insn
 785                  Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id')
 786                  Just (DestImm imm)     -> shortcutJump' fn seen' (JXX_GBL cc imm)
 787                where seen' = extendBlockSet seen id
 788         shortcutJump' _ _ other = other
 789
 790 -- Here because it knows about JumpDest
 791 shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
 792 shortcutStatic fn (CmmStaticLit (CmmLabel lab))
 793   | Just uq <- maybeAsmTemp lab
 794   = CmmStaticLit (CmmLabel (shortBlockId fn emptyUniqSet (BlockId uq)))
 795 shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
 796   | Just uq <- maybeAsmTemp lbl1
 797   = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn emptyUniqSet (BlockId uq)) lbl2 off)
 798         -- slightly dodgy, we're ignoring the second label, but this
 799         -- works with the way we use CmmLabelDiffOff for jump tables now.
 800
 801 shortcutStatic _ other_static
 802         = other_static
 803
 804 shortBlockId
 805         :: (BlockId -> Maybe JumpDest)
 806         -> UniqSet Unique
 807         -> BlockId
 808         -> CLabel
 809
 810 shortBlockId fn seen blockid@(BlockId uq) =
 811   case (elementOfUniqSet uq seen, fn blockid) of
 812     (True, _)    -> mkAsmTempLabel uq
 813     (_, Nothing) -> mkAsmTempLabel uq
 814     (_, Just (DestBlockId blockid'))  -> shortBlockId fn (addOneToUniqSet seen uq) blockid'
 815     (_, Just (DestImm (ImmCLbl lbl))) -> lbl
 816     (_, _other) -> panic "shortBlockId"