compiler/nativeGen/X86/Instr.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Machine-dependent assembly language
   4 --
   5 -- (c) The University of Glasgow 1993-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 #include "HsVersions.h"
  10 #include "nativeGen/NCG.h"
  11
  12 module X86.Instr
  13 where
  14
  15 import X86.Cond
  16 import X86.Regs
  17 import Instruction
  18 import Size
  19 import RegClass
  20 import Reg
  21 import TargetReg
  22
  23 import BlockId
  24 import Cmm
  25 import FastString
  26 import FastBool
  27 import Outputable
  28 import Constants        (rESERVED_C_STACK_BYTES)
  29
  30 import CLabel
  31 import UniqSet
  32 import Unique
  33
  34 -- Size of a PPC memory address, in bytes.
  35 --
  36 archWordSize :: Size
  37 #if i386_TARGET_ARCH
  38 archWordSize    = II32
  39 #elif x86_64_TARGET_ARCH
  40 archWordSize    = II64
  41 #else
  42 archWordSize    = panic "X86.Instr.archWordSize: not defined"
  43 #endif
  44
  45 -- | Instruction instance for x86 instruction set.
  46 instance Instruction Instr where
  47         regUsageOfInstr         = x86_regUsageOfInstr
  48         patchRegsOfInstr        = x86_patchRegsOfInstr
  49         isJumpishInstr          = x86_isJumpishInstr
  50         jumpDestsOfInstr        = x86_jumpDestsOfInstr
  51         patchJumpInstr          = x86_patchJumpInstr
  52         mkSpillInstr            = x86_mkSpillInstr
  53         mkLoadInstr             = x86_mkLoadInstr
  54         takeDeltaInstr          = x86_takeDeltaInstr
  55         isMetaInstr             = x86_isMetaInstr
  56         mkRegRegMoveInstr       = x86_mkRegRegMoveInstr
  57         takeRegRegMoveInstr     = x86_takeRegRegMoveInstr
  58         mkJumpInstr             = x86_mkJumpInstr
  59
  60
  61 -- -----------------------------------------------------------------------------
  62 -- Intel x86 instructions
  63
  64 {-
  65 Intel, in their infinite wisdom, selected a stack model for floating
  66 point registers on x86.  That might have made sense back in 1979 --
  67 nowadays we can see it for the nonsense it really is.  A stack model
  68 fits poorly with the existing nativeGen infrastructure, which assumes
  69 flat integer and FP register sets.  Prior to this commit, nativeGen
  70 could not generate correct x86 FP code -- to do so would have meant
  71 somehow working the register-stack paradigm into the register
  72 allocator and spiller, which sounds very difficult.
  73
  74 We have decided to cheat, and go for a simple fix which requires no
  75 infrastructure modifications, at the expense of generating ropey but
  76 correct FP code.  All notions of the x86 FP stack and its insns have
  77 been removed.  Instead, we pretend (to the instruction selector and
  78 register allocator) that x86 has six floating point registers, %fake0
  79 .. %fake5, which can be used in the usual flat manner.  We further
  80 claim that x86 has floating point instructions very similar to SPARC
  81 and Alpha, that is, a simple 3-operand register-register arrangement.
  82 Code generation and register allocation proceed on this basis.
  83
  84 When we come to print out the final assembly, our convenient fiction
  85 is converted to dismal reality.  Each fake instruction is
  86 independently converted to a series of real x86 instructions.
  87 %fake0 .. %fake5 are mapped to %st(0) .. %st(5).  To do reg-reg
  88 arithmetic operations, the two operands are pushed onto the top of the
  89 FP stack, the operation done, and the result copied back into the
  90 relevant register.  There are only six %fake registers because 2 are
  91 needed for the translation, and x86 has 8 in total.
  92
  93 The translation is inefficient but is simple and it works.  A cleverer
  94 translation would handle a sequence of insns, simulating the FP stack
  95 contents, would not impose a fixed mapping from %fake to %st regs, and
  96 hopefully could avoid most of the redundant reg-reg moves of the
  97 current translation.
  98
  99 We might as well make use of whatever unique FP facilities Intel have
 100 chosen to bless us with (let's not be churlish, after all).
 101 Hence GLDZ and GLD1.  Bwahahahahahahaha!
 102 -}
 103
 104 {-
 105 MORE FLOATING POINT MUSINGS...
 106
 107 Intel's internal floating point registers are by default 80 bit
 108 extended precision.  This means that all operations done on values in
 109 registers are done at 80 bits, and unless the intermediate values are
 110 truncated to the appropriate size (32 or 64 bits) by storing in
 111 memory, calculations in registers will give different results from
 112 calculations which pass intermediate values in memory (eg. via
 113 function calls).
 114
 115 One solution is to set the FPU into 64 bit precision mode.  Some OSs
 116 do this (eg. FreeBSD) and some don't (eg. Linux).  The problem here is
 117 that this will only affect 64-bit precision arithmetic; 32-bit
 118 calculations will still be done at 64-bit precision in registers.  So
 119 it doesn't solve the whole problem.
 120
 121 There's also the issue of what the C library is expecting in terms of
 122 precision.  It seems to be the case that glibc on Linux expects the
 123 FPU to be set to 80 bit precision, so setting it to 64 bit could have
 124 unexpected effects.  Changing the default could have undesirable
 125 effects on other 3rd-party library code too, so the right thing would
 126 be to save/restore the FPU control word across Haskell code if we were
 127 to do this.
 128
 129 gcc's -ffloat-store gives consistent results by always storing the
 130 results of floating-point calculations in memory, which works for both
 131 32 and 64-bit precision.  However, it only affects the values of
 132 user-declared floating point variables in C, not intermediate results.
 133 GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
 134 flag).
 135
 136 Another problem is how to spill floating point registers in the
 137 register allocator.  Should we spill the whole 80 bits, or just 64?
 138 On an OS which is set to 64 bit precision, spilling 64 is fine.  On
 139 Linux, spilling 64 bits will round the results of some operations.
 140 This is what gcc does.  Spilling at 80 bits requires taking up a full
 141 128 bit slot (so we get alignment).  We spill at 80-bits and ignore
 142 the alignment problems.
 143
 144 In the future, we'll use the SSE registers for floating point.  This
 145 requires a CPU that supports SSE2 (ordinary SSE only supports 32 bit
 146 precision float ops), which means P4 or Xeon and above.  Using SSE
 147 will solve all these problems, because the SSE registers use fixed 32
 148 bit or 64 bit precision.
 149
 150 --SDM 1/2003
 151 -}
 152
 153
 154 data Instr
 155         -- comment pseudo-op
 156         = COMMENT FastString
 157
 158         -- some static data spat out during code
 159         -- generation.  Will be extracted before
 160         -- pretty-printing.
 161         | LDATA   Section [CmmStatic]
 162
 163         -- start a new basic block.  Useful during
 164         -- codegen, removed later.  Preceding
 165         -- instruction should be a jump, as per the
 166         -- invariants for a BasicBlock (see Cmm).
 167         | NEWBLOCK BlockId
 168
 169         -- specify current stack offset for
 170         -- benefit of subsequent passes
 171         | DELTA   Int
 172
 173         -- Moves.
 174         | MOV         Size Operand Operand
 175         | MOVZxL      Size Operand Operand -- size is the size of operand 1
 176         | MOVSxL      Size Operand Operand -- size is the size of operand 1
 177         -- x86_64 note: plain mov into a 32-bit register always zero-extends
 178         -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
 179         -- don't affect the high bits of the register.
 180
 181         -- Load effective address (also a very useful three-operand add instruction :-)
 182         | LEA         Size Operand Operand
 183
 184         -- Int Arithmetic.
 185         | ADD         Size Operand Operand
 186         | ADC         Size Operand Operand
 187         | SUB         Size Operand Operand
 188
 189         | MUL         Size Operand Operand
 190         | IMUL        Size Operand Operand      -- signed int mul
 191         | IMUL2       Size Operand              -- %edx:%eax = operand * %eax
 192
 193         | DIV         Size Operand              -- eax := eax:edx/op, edx := eax:edx%op
 194         | IDIV        Size Operand              -- ditto, but signed
 195
 196         -- Simple bit-twiddling.
 197         | AND         Size Operand Operand
 198         | OR          Size Operand Operand
 199         | XOR         Size Operand Operand
 200         | NOT         Size Operand
 201         | NEGI        Size Operand              -- NEG instruction (name clash with Cond)
 202
 203         -- Shifts (amount may be immediate or %cl only)
 204         | SHL         Size Operand{-amount-} Operand
 205         | SAR         Size Operand{-amount-} Operand
 206         | SHR         Size Operand{-amount-} Operand
 207
 208         | BT          Size Imm Operand
 209         | NOP
 210
 211         -- x86 Float Arithmetic.
 212         -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
 213         -- as single instructions right up until we spit them out.
 214         -- all the 3-operand fake fp insns are src1 src2 dst
 215         -- and furthermore are constrained to be fp regs only.
 216         -- IMPORTANT: keep is_G_insn up to date with any changes here
 217         | GMOV        Reg Reg -- src(fpreg), dst(fpreg)
 218         | GLD         Size AddrMode Reg -- src, dst(fpreg)
 219         | GST         Size Reg AddrMode -- src(fpreg), dst
 220
 221         | GLDZ        Reg -- dst(fpreg)
 222         | GLD1        Reg -- dst(fpreg)
 223
 224         | GFTOI       Reg Reg -- src(fpreg), dst(intreg)
 225         | GDTOI       Reg Reg -- src(fpreg), dst(intreg)
 226
 227         | GITOF       Reg Reg -- src(intreg), dst(fpreg)
 228         | GITOD       Reg Reg -- src(intreg), dst(fpreg)
 229
 230         | GADD        Size Reg Reg Reg -- src1, src2, dst
 231         | GDIV        Size Reg Reg Reg -- src1, src2, dst
 232         | GSUB        Size Reg Reg Reg -- src1, src2, dst
 233         | GMUL        Size Reg Reg Reg -- src1, src2, dst
 234
 235                 -- FP compare.  Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT]
 236                 -- Compare src1 with src2; set the Zero flag iff the numbers are
 237                 -- comparable and the comparison is True.  Subsequent code must
 238                 -- test the %eflags zero flag regardless of the supplied Cond.
 239         | GCMP        Cond Reg Reg -- src1, src2
 240
 241         | GABS        Size Reg Reg -- src, dst
 242         | GNEG        Size Reg Reg -- src, dst
 243         | GSQRT       Size Reg Reg -- src, dst
 244         | GSIN        Size CLabel CLabel Reg Reg -- src, dst
 245         | GCOS        Size CLabel CLabel Reg Reg -- src, dst
 246         | GTAN        Size CLabel CLabel Reg Reg -- src, dst
 247
 248         | GFREE         -- do ffree on all x86 regs; an ugly hack
 249
 250
 251         -- SSE2 floating point: we use a restricted set of the available SSE2
 252         -- instructions for floating-point.
 253         -- use MOV for moving (either movss or movsd (movlpd better?))
 254         | CVTSS2SD      Reg Reg         -- F32 to F64
 255         | CVTSD2SS      Reg Reg         -- F64 to F32
 256         | CVTTSS2SIQ    Size Operand Reg -- F32 to I32/I64 (with truncation)
 257         | CVTTSD2SIQ    Size Operand Reg -- F64 to I32/I64 (with truncation)
 258         | CVTSI2SS      Size Operand Reg -- I32/I64 to F32
 259         | CVTSI2SD      Size Operand Reg -- I32/I64 to F64
 260
 261         -- use ADD & SUB for arithmetic.  In both cases, operands
 262         -- are  Operand Reg.
 263
 264         -- SSE2 floating-point division:
 265         | FDIV          Size Operand Operand   -- divisor, dividend(dst)
 266
 267         -- use CMP for comparisons.  ucomiss and ucomisd instructions
 268         -- compare single/double prec floating point respectively.
 269
 270         | SQRT          Size Operand Reg        -- src, dst
 271
 272
 273         -- Comparison
 274         | TEST          Size Operand Operand
 275         | CMP           Size Operand Operand
 276         | SETCC         Cond Operand
 277
 278         -- Stack Operations.
 279         | PUSH          Size Operand
 280         | POP           Size Operand
 281         -- both unused (SDM):
 282         --  | PUSHA
 283         --  | POPA
 284
 285         -- Jumping around.
 286         | JMP         Operand
 287         | JXX         Cond BlockId  -- includes unconditional branches
 288         | JXX_GBL     Cond Imm      -- non-local version of JXX
 289         | JMP_TBL     Operand [BlockId]  -- table jump
 290         | CALL        (Either Imm Reg) [Reg]
 291
 292         -- Other things.
 293         | CLTD Size              -- sign extend %eax into %edx:%eax
 294
 295         | FETCHGOT    Reg        -- pseudo-insn for ELF position-independent code
 296                                  -- pretty-prints as
 297                                  --       call 1f
 298                                  -- 1:    popl %reg
 299                                  --       addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
 300         | FETCHPC     Reg        -- pseudo-insn for Darwin position-independent code
 301                                  -- pretty-prints as
 302                                  --       call 1f
 303                                  -- 1:    popl %reg
 304
 305
 306 data Operand
 307         = OpReg  Reg            -- register
 308         | OpImm  Imm            -- immediate value
 309         | OpAddr AddrMode       -- memory reference
 310
 311
 312
 313 x86_regUsageOfInstr :: Instr -> RegUsage
 314 x86_regUsageOfInstr instr
 315  = case instr of
 316     MOV    _ src dst    -> usageRW src dst
 317     MOVZxL _ src dst    -> usageRW src dst
 318     MOVSxL _ src dst    -> usageRW src dst
 319     LEA    _ src dst    -> usageRW src dst
 320     ADD    _ src dst    -> usageRM src dst
 321     ADC    _ src dst    -> usageRM src dst
 322     SUB    _ src dst    -> usageRM src dst
 323     IMUL   _ src dst    -> usageRM src dst
 324     IMUL2  _ src       -> mkRU (eax:use_R src) [eax,edx]
 325     MUL    _ src dst    -> usageRM src dst
 326     DIV    _ op -> mkRU (eax:edx:use_R op) [eax,edx]
 327     IDIV   _ op -> mkRU (eax:edx:use_R op) [eax,edx]
 328     AND    _ src dst    -> usageRM src dst
 329     OR     _ src dst    -> usageRM src dst
 330
 331     XOR    _ (OpReg src) (OpReg dst)
 332         | src == dst    -> mkRU [] [dst]
 333
 334     XOR    _ src dst    -> usageRM src dst
 335     NOT    _ op         -> usageM op
 336     NEGI   _ op         -> usageM op
 337     SHL    _ imm dst    -> usageRM imm dst
 338     SAR    _ imm dst    -> usageRM imm dst
 339     SHR    _ imm dst    -> usageRM imm dst
 340     BT     _ _   src    -> mkRUR (use_R src)
 341
 342     PUSH   _ op         -> mkRUR (use_R op)
 343     POP    _ op         -> mkRU [] (def_W op)
 344     TEST   _ src dst    -> mkRUR (use_R src ++ use_R dst)
 345     CMP    _ src dst    -> mkRUR (use_R src ++ use_R dst)
 346     SETCC  _ op         -> mkRU [] (def_W op)
 347     JXX    _ _          -> mkRU [] []
 348     JXX_GBL _ _         -> mkRU [] []
 349     JMP     op          -> mkRUR (use_R op)
 350     JMP_TBL op _        -> mkRUR (use_R op)
 351     CALL (Left _)  params   -> mkRU params callClobberedRegs
 352     CALL (Right reg) params -> mkRU (reg:params) callClobberedRegs
 353     CLTD   _            -> mkRU [eax] [edx]
 354     NOP                 -> mkRU [] []
 355
 356     GMOV   src dst      -> mkRU [src] [dst]
 357     GLD    _ src dst    -> mkRU (use_EA src) [dst]
 358     GST    _ src dst    -> mkRUR (src : use_EA dst)
 359
 360     GLDZ   dst          -> mkRU [] [dst]
 361     GLD1   dst          -> mkRU [] [dst]
 362
 363     GFTOI  src dst      -> mkRU [src] [dst]
 364     GDTOI  src dst      -> mkRU [src] [dst]
 365
 366     GITOF  src dst      -> mkRU [src] [dst]
 367     GITOD  src dst      -> mkRU [src] [dst]
 368
 369     GADD   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 370     GSUB   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 371     GMUL   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 372     GDIV   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 373
 374     GCMP   _ src1 src2   -> mkRUR [src1,src2]
 375     GABS   _ src dst     -> mkRU [src] [dst]
 376     GNEG   _ src dst     -> mkRU [src] [dst]
 377     GSQRT  _ src dst     -> mkRU [src] [dst]
 378     GSIN   _ _ _ src dst -> mkRU [src] [dst]
 379     GCOS   _ _ _ src dst -> mkRU [src] [dst]
 380     GTAN   _ _ _ src dst -> mkRU [src] [dst]
 381
 382     CVTSS2SD   src dst  -> mkRU [src] [dst]
 383     CVTSD2SS   src dst  -> mkRU [src] [dst]
 384     CVTTSS2SIQ _ src dst -> mkRU (use_R src) [dst]
 385     CVTTSD2SIQ _ src dst -> mkRU (use_R src) [dst]
 386     CVTSI2SS   _ src dst -> mkRU (use_R src) [dst]
 387     CVTSI2SD   _ src dst -> mkRU (use_R src) [dst]
 388     FDIV _     src dst  -> usageRM src dst
 389
 390     FETCHGOT reg        -> mkRU [] [reg]
 391     FETCHPC  reg        -> mkRU [] [reg]
 392
 393     COMMENT _           -> noUsage
 394     DELTA   _           -> noUsage
 395
 396     _other              -> panic "regUsage: unrecognised instr"
 397
 398  where
 399     -- 2 operand form; first operand Read; second Written
 400     usageRW :: Operand -> Operand -> RegUsage
 401     usageRW op (OpReg reg)      = mkRU (use_R op) [reg]
 402     usageRW op (OpAddr ea)      = mkRUR (use_R op ++ use_EA ea)
 403     usageRW _ _                 = panic "X86.RegInfo.usageRW: no match"
 404
 405     -- 2 operand form; first operand Read; second Modified
 406     usageRM :: Operand -> Operand -> RegUsage
 407     usageRM op (OpReg reg)      = mkRU (use_R op ++ [reg]) [reg]
 408     usageRM op (OpAddr ea)      = mkRUR (use_R op ++ use_EA ea)
 409     usageRM _ _                 = panic "X86.RegInfo.usageRM: no match"
 410
 411     -- 1 operand form; operand Modified
 412     usageM :: Operand -> RegUsage
 413     usageM (OpReg reg)          = mkRU [reg] [reg]
 414     usageM (OpAddr ea)          = mkRUR (use_EA ea)
 415     usageM _                    = panic "X86.RegInfo.usageM: no match"
 416
 417     -- Registers defd when an operand is written.
 418     def_W (OpReg reg)           = [reg]
 419     def_W (OpAddr _ )           = []
 420     def_W _                     = panic "X86.RegInfo.def_W: no match"
 421
 422     -- Registers used when an operand is read.
 423     use_R (OpReg reg)  = [reg]
 424     use_R (OpImm _)    = []
 425     use_R (OpAddr ea)  = use_EA ea
 426
 427     -- Registers used to compute an effective address.
 428     use_EA (ImmAddr _ _) = []
 429     use_EA (AddrBaseIndex base index _) =
 430         use_base base $! use_index index
 431         where use_base (EABaseReg r) x = r : x
 432               use_base _ x             = x
 433               use_index EAIndexNone   = []
 434               use_index (EAIndex i _) = [i]
 435
 436     mkRUR src = src' `seq` RU src' []
 437         where src' = filter interesting src
 438
 439     mkRU src dst = src' `seq` dst' `seq` RU src' dst'
 440         where src' = filter interesting src
 441               dst' = filter interesting dst
 442
 443 interesting :: Reg -> Bool
 444 interesting (RegVirtual _)              = True
 445 interesting (RegReal (RealRegSingle i)) = isFastTrue (freeReg i)
 446 interesting (RegReal (RealRegPair{}))   = panic "X86.interesting: no reg pairs on this arch"
 447
 448
 449
 450 x86_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
 451 x86_patchRegsOfInstr instr env
 452  = case instr of
 453     MOV  sz src dst     -> patch2 (MOV  sz) src dst
 454     MOVZxL sz src dst   -> patch2 (MOVZxL sz) src dst
 455     MOVSxL sz src dst   -> patch2 (MOVSxL sz) src dst
 456     LEA  sz src dst     -> patch2 (LEA  sz) src dst
 457     ADD  sz src dst     -> patch2 (ADD  sz) src dst
 458     ADC  sz src dst     -> patch2 (ADC  sz) src dst
 459     SUB  sz src dst     -> patch2 (SUB  sz) src dst
 460     IMUL sz src dst     -> patch2 (IMUL sz) src dst
 461     IMUL2 sz src        -> patch1 (IMUL2 sz) src
 462     MUL sz src dst      -> patch2 (MUL sz) src dst
 463     IDIV sz op          -> patch1 (IDIV sz) op
 464     DIV sz op           -> patch1 (DIV sz) op
 465     AND  sz src dst     -> patch2 (AND  sz) src dst
 466     OR   sz src dst     -> patch2 (OR   sz) src dst
 467     XOR  sz src dst     -> patch2 (XOR  sz) src dst
 468     NOT  sz op          -> patch1 (NOT  sz) op
 469     NEGI sz op          -> patch1 (NEGI sz) op
 470     SHL  sz imm dst     -> patch1 (SHL sz imm) dst
 471     SAR  sz imm dst     -> patch1 (SAR sz imm) dst
 472     SHR  sz imm dst     -> patch1 (SHR sz imm) dst
 473     BT   sz imm src     -> patch1 (BT  sz imm) src
 474     TEST sz src dst     -> patch2 (TEST sz) src dst
 475     CMP  sz src dst     -> patch2 (CMP  sz) src dst
 476     PUSH sz op          -> patch1 (PUSH sz) op
 477     POP  sz op          -> patch1 (POP  sz) op
 478     SETCC cond op       -> patch1 (SETCC cond) op
 479     JMP op              -> patch1 JMP op
 480     JMP_TBL op ids      -> patch1 JMP_TBL op $ ids
 481
 482     GMOV src dst        -> GMOV (env src) (env dst)
 483     GLD  sz src dst     -> GLD sz (lookupAddr src) (env dst)
 484     GST  sz src dst     -> GST sz (env src) (lookupAddr dst)
 485
 486     GLDZ dst            -> GLDZ (env dst)
 487     GLD1 dst            -> GLD1 (env dst)
 488
 489     GFTOI src dst       -> GFTOI (env src) (env dst)
 490     GDTOI src dst       -> GDTOI (env src) (env dst)
 491
 492     GITOF src dst       -> GITOF (env src) (env dst)
 493     GITOD src dst       -> GITOD (env src) (env dst)
 494
 495     GADD sz s1 s2 dst   -> GADD sz (env s1) (env s2) (env dst)
 496     GSUB sz s1 s2 dst   -> GSUB sz (env s1) (env s2) (env dst)
 497     GMUL sz s1 s2 dst   -> GMUL sz (env s1) (env s2) (env dst)
 498     GDIV sz s1 s2 dst   -> GDIV sz (env s1) (env s2) (env dst)
 499
 500     GCMP sz src1 src2   -> GCMP sz (env src1) (env src2)
 501     GABS sz src dst     -> GABS sz (env src) (env dst)
 502     GNEG sz src dst     -> GNEG sz (env src) (env dst)
 503     GSQRT sz src dst    -> GSQRT sz (env src) (env dst)
 504     GSIN sz l1 l2 src dst       -> GSIN sz l1 l2 (env src) (env dst)
 505     GCOS sz l1 l2 src dst       -> GCOS sz l1 l2 (env src) (env dst)
 506     GTAN sz l1 l2 src dst       -> GTAN sz l1 l2 (env src) (env dst)
 507
 508     CVTSS2SD src dst    -> CVTSS2SD (env src) (env dst)
 509     CVTSD2SS src dst    -> CVTSD2SS (env src) (env dst)
 510     CVTTSS2SIQ sz src dst -> CVTTSS2SIQ sz (patchOp src) (env dst)
 511     CVTTSD2SIQ sz src dst -> CVTTSD2SIQ sz (patchOp src) (env dst)
 512     CVTSI2SS sz src dst -> CVTSI2SS sz (patchOp src) (env dst)
 513     CVTSI2SD sz src dst -> CVTSI2SD sz (patchOp src) (env dst)
 514     FDIV sz src dst     -> FDIV sz (patchOp src) (patchOp dst)
 515
 516     CALL (Left _)  _    -> instr
 517     CALL (Right reg) p  -> CALL (Right (env reg)) p
 518
 519     FETCHGOT reg        -> FETCHGOT (env reg)
 520     FETCHPC  reg        -> FETCHPC  (env reg)
 521
 522     NOP                 -> instr
 523     COMMENT _           -> instr
 524     DELTA _             -> instr
 525
 526     JXX _ _             -> instr
 527     JXX_GBL _ _         -> instr
 528     CLTD _              -> instr
 529
 530     _other              -> panic "patchRegs: unrecognised instr"
 531
 532   where
 533     patch1 insn op      = insn $! patchOp op
 534     patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
 535
 536     patchOp (OpReg  reg) = OpReg $! env reg
 537     patchOp (OpImm  imm) = OpImm imm
 538     patchOp (OpAddr ea)  = OpAddr $! lookupAddr ea
 539
 540     lookupAddr (ImmAddr imm off) = ImmAddr imm off
 541     lookupAddr (AddrBaseIndex base index disp)
 542       = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
 543       where
 544         lookupBase EABaseNone       = EABaseNone
 545         lookupBase EABaseRip        = EABaseRip
 546         lookupBase (EABaseReg r)    = EABaseReg (env r)
 547
 548         lookupIndex EAIndexNone     = EAIndexNone
 549         lookupIndex (EAIndex r i)   = EAIndex (env r) i
 550
 551
 552 --------------------------------------------------------------------------------
 553 x86_isJumpishInstr
 554         :: Instr -> Bool
 555
 556 x86_isJumpishInstr instr
 557  = case instr of
 558         JMP{}           -> True
 559         JXX{}           -> True
 560         JXX_GBL{}       -> True
 561         JMP_TBL{}       -> True
 562         CALL{}          -> True
 563         _               -> False
 564
 565
 566 x86_jumpDestsOfInstr
 567         :: Instr
 568         -> [BlockId]
 569
 570 x86_jumpDestsOfInstr insn
 571   = case insn of
 572         JXX _ id        -> [id]
 573         JMP_TBL _ ids   -> ids
 574         _               -> []
 575
 576
 577 x86_patchJumpInstr
 578         :: Instr -> (BlockId -> BlockId) -> Instr
 579
 580 x86_patchJumpInstr insn patchF
 581   = case insn of
 582         JXX cc id       -> JXX cc (patchF id)
 583         JMP_TBL _ _     -> error "Cannot patch JMP_TBL"
 584         _               -> insn
 585
 586
 587
 588
 589 -- -----------------------------------------------------------------------------
 590 -- | Make a spill instruction.
 591 x86_mkSpillInstr
 592         :: Reg          -- register to spill
 593         -> Int          -- current stack delta
 594         -> Int          -- spill slot to use
 595         -> Instr
 596
 597 x86_mkSpillInstr reg delta slot
 598   = let off     = spillSlotToOffset slot
 599     in
 600     let off_w = (off-delta) `div` IF_ARCH_i386(4,8)
 601     in case targetClassOfReg reg of
 602            RcInteger   -> MOV IF_ARCH_i386(II32,II64)
 603                               (OpReg reg) (OpAddr (spRel off_w))
 604            RcDouble    -> GST FF80 reg (spRel off_w) {- RcFloat/RcDouble -}
 605            RcDoubleSSE -> MOV FF64 (OpReg reg) (OpAddr (spRel off_w))
 606            _         -> panic "X86.mkSpillInstr: no match"
 607
 608
 609 -- | Make a spill reload instruction.
 610 x86_mkLoadInstr
 611         :: Reg          -- register to load
 612         -> Int          -- current stack delta
 613         -> Int          -- spill slot to use
 614         -> Instr
 615
 616 x86_mkLoadInstr reg delta slot
 617   = let off     = spillSlotToOffset slot
 618     in
 619         let off_w = (off-delta) `div` IF_ARCH_i386(4,8)
 620         in case targetClassOfReg reg of
 621               RcInteger -> MOV IF_ARCH_i386(II32,II64)
 622                                (OpAddr (spRel off_w)) (OpReg reg)
 623               RcDouble  -> GLD FF80 (spRel off_w) reg {- RcFloat/RcDouble -}
 624               RcDoubleSSE -> MOV FF64 (OpAddr (spRel off_w)) (OpReg reg)
 625               _           -> panic "X86.x86_mkLoadInstr"
 626
 627 spillSlotSize :: Int
 628 spillSlotSize = IF_ARCH_i386(12, 8)
 629
 630 maxSpillSlots :: Int
 631 maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1
 632
 633 -- convert a spill slot number to a *byte* offset, with no sign:
 634 -- decide on a per arch basis whether you are spilling above or below
 635 -- the C stack pointer.
 636 spillSlotToOffset :: Int -> Int
 637 spillSlotToOffset slot
 638    | slot >= 0 && slot < maxSpillSlots
 639    = 64 + spillSlotSize * slot
 640    | otherwise
 641    = pprPanic "spillSlotToOffset:"
 642               (   text "invalid spill location: " <> int slot
 643               $$  text "maxSpillSlots:          " <> int maxSpillSlots)
 644
 645 --------------------------------------------------------------------------------
 646
 647 -- | See if this instruction is telling us the current C stack delta
 648 x86_takeDeltaInstr
 649         :: Instr
 650         -> Maybe Int
 651
 652 x86_takeDeltaInstr instr
 653  = case instr of
 654         DELTA i         -> Just i
 655         _               -> Nothing
 656
 657
 658 x86_isMetaInstr
 659         :: Instr
 660         -> Bool
 661
 662 x86_isMetaInstr instr
 663  = case instr of
 664         COMMENT{}       -> True
 665         LDATA{}         -> True
 666         NEWBLOCK{}      -> True
 667         DELTA{}         -> True
 668         _               -> False
 669
 670
 671
 672 -- | Make a reg-reg move instruction.
 673 --      On SPARC v8 there are no instructions to move directly between
 674 --      floating point and integer regs. If we need to do that then we
 675 --      have to go via memory.
 676 --
 677 x86_mkRegRegMoveInstr
 678         :: Reg
 679         -> Reg
 680         -> Instr
 681
 682 x86_mkRegRegMoveInstr src dst
 683  = case targetClassOfReg src of
 684 #if   i386_TARGET_ARCH
 685         RcInteger -> MOV II32 (OpReg src) (OpReg dst)
 686 #else
 687         RcInteger -> MOV II64 (OpReg src) (OpReg dst)
 688 #endif
 689         RcDouble    -> GMOV src dst
 690         RcDoubleSSE -> MOV FF64 (OpReg src) (OpReg dst)
 691         _     -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
 692
 693 -- | Check whether an instruction represents a reg-reg move.
 694 --      The register allocator attempts to eliminate reg->reg moves whenever it can,
 695 --      by assigning the src and dest temporaries to the same real register.
 696 --
 697 x86_takeRegRegMoveInstr
 698         :: Instr
 699         -> Maybe (Reg,Reg)
 700
 701 x86_takeRegRegMoveInstr (MOV _ (OpReg r1) (OpReg r2))
 702         = Just (r1,r2)
 703
 704 x86_takeRegRegMoveInstr _  = Nothing
 705
 706
 707 -- | Make an unconditional branch instruction.
 708 x86_mkJumpInstr
 709         :: BlockId
 710         -> [Instr]
 711
 712 x86_mkJumpInstr id
 713         = [JXX ALWAYS id]
 714
 715
 716
 717
 718
 719 i386_insert_ffrees
 720         :: [GenBasicBlock Instr]
 721         -> [GenBasicBlock Instr]
 722
 723 i386_insert_ffrees blocks
 724    | or (map (any is_G_instr) [ instrs | BasicBlock _ instrs <- blocks ])
 725    = map ffree_before_nonlocal_transfers blocks
 726
 727    | otherwise
 728    = blocks
 729   where
 730    ffree_before_nonlocal_transfers (BasicBlock id insns)
 731      = BasicBlock id (foldr p [] insns)
 732      where p insn r = case insn of
 733                         CALL _ _ -> GFREE : insn : r
 734                         JMP _    -> GFREE : insn : r
 735                         _        -> insn : r
 736
 737 -- if you ever add a new FP insn to the fake x86 FP insn set,
 738 -- you must update this too
 739 is_G_instr :: Instr -> Bool
 740 is_G_instr instr
 741    = case instr of
 742         GMOV{}          -> True
 743         GLD{}           -> True
 744         GST{}           -> True
 745         GLDZ{}          -> True
 746         GLD1{}          -> True
 747         GFTOI{}         -> True
 748         GDTOI{}         -> True
 749         GITOF{}         -> True
 750         GITOD{}         -> True
 751         GADD{}          -> True
 752         GDIV{}          -> True
 753         GSUB{}          -> True
 754         GMUL{}          -> True
 755         GCMP{}          -> True
 756         GABS{}          -> True
 757         GNEG{}          -> True
 758         GSQRT{}         -> True
 759         GSIN{}          -> True
 760         GCOS{}          -> True
 761         GTAN{}          -> True
 762         GFREE           -> panic "is_G_instr: GFREE (!)"
 763         _               -> False
 764
 765
 766 data JumpDest = DestBlockId BlockId | DestImm Imm
 767
 768
 769 canShortcut :: Instr -> Maybe JumpDest
 770 canShortcut (JXX ALWAYS id)    = Just (DestBlockId id)
 771 canShortcut (JMP (OpImm imm))  = Just (DestImm imm)
 772 canShortcut _                  = Nothing
 773
 774
 775 -- This helper shortcuts a sequence of branches.
 776 -- The blockset helps avoid following cycles.
 777 shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
 778 shortcutJump fn insn = shortcutJump' fn emptyBlockSet insn
 779   where shortcutJump' fn seen insn@(JXX cc id) =
 780           if elemBlockSet id seen then insn
 781           else case fn id of
 782                  Nothing                -> insn
 783                  Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id')
 784                  Just (DestImm imm)     -> shortcutJump' fn seen' (JXX_GBL cc imm)
 785                where seen' = extendBlockSet seen id
 786         shortcutJump' _ _ other = other
 787
 788 -- Here because it knows about JumpDest
 789 shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
 790 shortcutStatic fn (CmmStaticLit (CmmLabel lab))
 791   | Just uq <- maybeAsmTemp lab
 792   = CmmStaticLit (CmmLabel (shortBlockId fn emptyUniqSet (BlockId uq)))
 793 shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
 794   | Just uq <- maybeAsmTemp lbl1
 795   = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn emptyUniqSet (BlockId uq)) lbl2 off)
 796         -- slightly dodgy, we're ignoring the second label, but this
 797         -- works with the way we use CmmLabelDiffOff for jump tables now.
 798
 799 shortcutStatic _ other_static
 800         = other_static
 801
 802 shortBlockId
 803         :: (BlockId -> Maybe JumpDest)
 804         -> UniqSet Unique
 805         -> BlockId
 806         -> CLabel
 807
 808 shortBlockId fn seen blockid@(BlockId uq) =
 809   case (elementOfUniqSet uq seen, fn blockid) of
 810     (True, _)    -> mkAsmTempLabel uq
 811     (_, Nothing) -> mkAsmTempLabel uq
 812     (_, Just (DestBlockId blockid'))  -> shortBlockId fn (addOneToUniqSet seen uq) blockid'
 813     (_, Just (DestImm (ImmCLbl lbl))) -> lbl
 814     (_, _other) -> panic "shortBlockId"