compiler/nativeGen/X86/Instr.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Machine-dependent assembly language
   4 --
   5 -- (c) The University of Glasgow 1993-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 #include "HsVersions.h"
  10 #include "nativeGen/NCG.h"
  11
  12 module X86.Instr
  13 where
  14
  15 import X86.Cond
  16 import X86.Regs
  17 import Instruction
  18 import Size
  19 import RegClass
  20 import Reg
  21 import TargetReg
  22
  23 import BlockId
  24 import Cmm
  25 import FastString
  26 import FastBool
  27 import Outputable
  28 import Constants        (rESERVED_C_STACK_BYTES)
  29
  30 import CLabel
  31 import UniqSet
  32 import Unique
  33
  34 -- Size of a PPC memory address, in bytes.
  35 --
  36 archWordSize :: Size
  37 #if i386_TARGET_ARCH
  38 archWordSize    = II32
  39 #elif x86_64_TARGET_ARCH
  40 archWordSize    = II64
  41 #else
  42 archWordSize    = panic "X86.Instr.archWordSize: not defined"
  43 #endif
  44
  45 -- | Instruction instance for x86 instruction set.
  46 instance Instruction Instr where
  47         regUsageOfInstr         = x86_regUsageOfInstr
  48         patchRegsOfInstr        = x86_patchRegsOfInstr
  49         isJumpishInstr          = x86_isJumpishInstr
  50         jumpDestsOfInstr        = x86_jumpDestsOfInstr
  51         patchJumpInstr          = x86_patchJumpInstr
  52         mkSpillInstr            = x86_mkSpillInstr
  53         mkLoadInstr             = x86_mkLoadInstr
  54         takeDeltaInstr          = x86_takeDeltaInstr
  55         isMetaInstr             = x86_isMetaInstr
  56         mkRegRegMoveInstr       = x86_mkRegRegMoveInstr
  57         takeRegRegMoveInstr     = x86_takeRegRegMoveInstr
  58         mkJumpInstr             = x86_mkJumpInstr
  59
  60
  61 -- -----------------------------------------------------------------------------
  62 -- Intel x86 instructions
  63
  64 {-
  65 Intel, in their infinite wisdom, selected a stack model for floating
  66 point registers on x86.  That might have made sense back in 1979 --
  67 nowadays we can see it for the nonsense it really is.  A stack model
  68 fits poorly with the existing nativeGen infrastructure, which assumes
  69 flat integer and FP register sets.  Prior to this commit, nativeGen
  70 could not generate correct x86 FP code -- to do so would have meant
  71 somehow working the register-stack paradigm into the register
  72 allocator and spiller, which sounds very difficult.
  73
  74 We have decided to cheat, and go for a simple fix which requires no
  75 infrastructure modifications, at the expense of generating ropey but
  76 correct FP code.  All notions of the x86 FP stack and its insns have
  77 been removed.  Instead, we pretend (to the instruction selector and
  78 register allocator) that x86 has six floating point registers, %fake0
  79 .. %fake5, which can be used in the usual flat manner.  We further
  80 claim that x86 has floating point instructions very similar to SPARC
  81 and Alpha, that is, a simple 3-operand register-register arrangement.
  82 Code generation and register allocation proceed on this basis.
  83
  84 When we come to print out the final assembly, our convenient fiction
  85 is converted to dismal reality.  Each fake instruction is
  86 independently converted to a series of real x86 instructions.
  87 %fake0 .. %fake5 are mapped to %st(0) .. %st(5).  To do reg-reg
  88 arithmetic operations, the two operands are pushed onto the top of the
  89 FP stack, the operation done, and the result copied back into the
  90 relevant register.  There are only six %fake registers because 2 are
  91 needed for the translation, and x86 has 8 in total.
  92
  93 The translation is inefficient but is simple and it works.  A cleverer
  94 translation would handle a sequence of insns, simulating the FP stack
  95 contents, would not impose a fixed mapping from %fake to %st regs, and
  96 hopefully could avoid most of the redundant reg-reg moves of the
  97 current translation.
  98
  99 We might as well make use of whatever unique FP facilities Intel have
 100 chosen to bless us with (let's not be churlish, after all).
 101 Hence GLDZ and GLD1.  Bwahahahahahahaha!
 102 -}
 103
 104 {-
 105 MORE FLOATING POINT MUSINGS...
 106
 107 Intel's internal floating point registers are by default 80 bit
 108 extended precision.  This means that all operations done on values in
 109 registers are done at 80 bits, and unless the intermediate values are
 110 truncated to the appropriate size (32 or 64 bits) by storing in
 111 memory, calculations in registers will give different results from
 112 calculations which pass intermediate values in memory (eg. via
 113 function calls).
 114
 115 One solution is to set the FPU into 64 bit precision mode.  Some OSs
 116 do this (eg. FreeBSD) and some don't (eg. Linux).  The problem here is
 117 that this will only affect 64-bit precision arithmetic; 32-bit
 118 calculations will still be done at 64-bit precision in registers.  So
 119 it doesn't solve the whole problem.
 120
 121 There's also the issue of what the C library is expecting in terms of
 122 precision.  It seems to be the case that glibc on Linux expects the
 123 FPU to be set to 80 bit precision, so setting it to 64 bit could have
 124 unexpected effects.  Changing the default could have undesirable
 125 effects on other 3rd-party library code too, so the right thing would
 126 be to save/restore the FPU control word across Haskell code if we were
 127 to do this.
 128
 129 gcc's -ffloat-store gives consistent results by always storing the
 130 results of floating-point calculations in memory, which works for both
 131 32 and 64-bit precision.  However, it only affects the values of
 132 user-declared floating point variables in C, not intermediate results.
 133 GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
 134 flag).
 135
 136 Another problem is how to spill floating point registers in the
 137 register allocator.  Should we spill the whole 80 bits, or just 64?
 138 On an OS which is set to 64 bit precision, spilling 64 is fine.  On
 139 Linux, spilling 64 bits will round the results of some operations.
 140 This is what gcc does.  Spilling at 80 bits requires taking up a full
 141 128 bit slot (so we get alignment).  We spill at 80-bits and ignore
 142 the alignment problems.
 143
 144 In the future, we'll use the SSE registers for floating point.  This
 145 requires a CPU that supports SSE2 (ordinary SSE only supports 32 bit
 146 precision float ops), which means P4 or Xeon and above.  Using SSE
 147 will solve all these problems, because the SSE registers use fixed 32
 148 bit or 64 bit precision.
 149
 150 --SDM 1/2003
 151 -}
 152
 153
 154 data Instr
 155         -- comment pseudo-op
 156         = COMMENT FastString
 157
 158         -- some static data spat out during code
 159         -- generation.  Will be extracted before
 160         -- pretty-printing.
 161         | LDATA   Section [CmmStatic]
 162
 163         -- start a new basic block.  Useful during
 164         -- codegen, removed later.  Preceding
 165         -- instruction should be a jump, as per the
 166         -- invariants for a BasicBlock (see Cmm).
 167         | NEWBLOCK BlockId
 168
 169         -- specify current stack offset for
 170         -- benefit of subsequent passes
 171         | DELTA   Int
 172
 173         -- Moves.
 174         | MOV         Size Operand Operand
 175         | MOVZxL      Size Operand Operand -- size is the size of operand 1
 176         | MOVSxL      Size Operand Operand -- size is the size of operand 1
 177         -- x86_64 note: plain mov into a 32-bit register always zero-extends
 178         -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
 179         -- don't affect the high bits of the register.
 180
 181         -- Load effective address (also a very useful three-operand add instruction :-)
 182         | LEA         Size Operand Operand
 183
 184         -- Int Arithmetic.
 185         | ADD         Size Operand Operand
 186         | ADC         Size Operand Operand
 187         | SUB         Size Operand Operand
 188
 189         | MUL         Size Operand Operand
 190         | IMUL        Size Operand Operand      -- signed int mul
 191         | IMUL2       Size Operand              -- %edx:%eax = operand * %eax
 192
 193         | DIV         Size Operand              -- eax := eax:edx/op, edx := eax:edx%op
 194         | IDIV        Size Operand              -- ditto, but signed
 195
 196         -- Simple bit-twiddling.
 197         | AND         Size Operand Operand
 198         | OR          Size Operand Operand
 199         | XOR         Size Operand Operand
 200         | NOT         Size Operand
 201         | NEGI        Size Operand              -- NEG instruction (name clash with Cond)
 202
 203         -- Shifts (amount may be immediate or %cl only)
 204         | SHL         Size Operand{-amount-} Operand
 205         | SAR         Size Operand{-amount-} Operand
 206         | SHR         Size Operand{-amount-} Operand
 207
 208         | BT          Size Imm Operand
 209         | NOP
 210
 211         -- x86 Float Arithmetic.
 212         -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
 213         -- as single instructions right up until we spit them out.
 214         -- all the 3-operand fake fp insns are src1 src2 dst
 215         -- and furthermore are constrained to be fp regs only.
 216         -- IMPORTANT: keep is_G_insn up to date with any changes here
 217         | GMOV        Reg Reg -- src(fpreg), dst(fpreg)
 218         | GLD         Size AddrMode Reg -- src, dst(fpreg)
 219         | GST         Size Reg AddrMode -- src(fpreg), dst
 220
 221         | GLDZ        Reg -- dst(fpreg)
 222         | GLD1        Reg -- dst(fpreg)
 223
 224         | GFTOI       Reg Reg -- src(fpreg), dst(intreg)
 225         | GDTOI       Reg Reg -- src(fpreg), dst(intreg)
 226
 227         | GITOF       Reg Reg -- src(intreg), dst(fpreg)
 228         | GITOD       Reg Reg -- src(intreg), dst(fpreg)
 229
 230         | GADD        Size Reg Reg Reg -- src1, src2, dst
 231         | GDIV        Size Reg Reg Reg -- src1, src2, dst
 232         | GSUB        Size Reg Reg Reg -- src1, src2, dst
 233         | GMUL        Size Reg Reg Reg -- src1, src2, dst
 234
 235                 -- FP compare.  Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT]
 236                 -- Compare src1 with src2; set the Zero flag iff the numbers are
 237                 -- comparable and the comparison is True.  Subsequent code must
 238                 -- test the %eflags zero flag regardless of the supplied Cond.
 239         | GCMP        Cond Reg Reg -- src1, src2
 240
 241         | GABS        Size Reg Reg -- src, dst
 242         | GNEG        Size Reg Reg -- src, dst
 243         | GSQRT       Size Reg Reg -- src, dst
 244         | GSIN        Size CLabel CLabel Reg Reg -- src, dst
 245         | GCOS        Size CLabel CLabel Reg Reg -- src, dst
 246         | GTAN        Size CLabel CLabel Reg Reg -- src, dst
 247
 248         | GFREE         -- do ffree on all x86 regs; an ugly hack
 249
 250
 251         -- SSE2 floating point: we use a restricted set of the available SSE2
 252         -- instructions for floating-point.
 253         -- use MOV for moving (either movss or movsd (movlpd better?))
 254         | CVTSS2SD      Reg Reg         -- F32 to F64
 255         | CVTSD2SS      Reg Reg         -- F64 to F32
 256         | CVTTSS2SIQ    Operand Reg     -- F32 to I32/I64 (with truncation)
 257         | CVTTSD2SIQ    Operand Reg     -- F64 to I32/I64 (with truncation)
 258         | CVTSI2SS      Operand Reg     -- I32/I64 to F32
 259         | CVTSI2SD      Operand Reg     -- I32/I64 to F64
 260
 261         -- use ADD & SUB for arithmetic.  In both cases, operands
 262         -- are  Operand Reg.
 263
 264         -- SSE2 floating-point division:
 265         | FDIV          Size Operand Operand   -- divisor, dividend(dst)
 266
 267         -- use CMP for comparisons.  ucomiss and ucomisd instructions
 268         -- compare single/double prec floating point respectively.
 269
 270         | SQRT          Size Operand Reg        -- src, dst
 271
 272
 273         -- Comparison
 274         | TEST          Size Operand Operand
 275         | CMP           Size Operand Operand
 276         | SETCC         Cond Operand
 277
 278         -- Stack Operations.
 279         | PUSH          Size Operand
 280         | POP           Size Operand
 281         -- both unused (SDM):
 282         --  | PUSHA
 283         --  | POPA
 284
 285         -- Jumping around.
 286         | JMP         Operand
 287         | JXX         Cond BlockId  -- includes unconditional branches
 288         | JXX_GBL     Cond Imm      -- non-local version of JXX
 289         | JMP_TBL     Operand [BlockId]  -- table jump
 290         | CALL        (Either Imm Reg) [Reg]
 291
 292         -- Other things.
 293         | CLTD Size              -- sign extend %eax into %edx:%eax
 294
 295         | FETCHGOT    Reg        -- pseudo-insn for ELF position-independent code
 296                                  -- pretty-prints as
 297                                  --       call 1f
 298                                  -- 1:    popl %reg
 299                                  --       addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
 300         | FETCHPC     Reg        -- pseudo-insn for Darwin position-independent code
 301                                  -- pretty-prints as
 302                                  --       call 1f
 303                                  -- 1:    popl %reg
 304
 305
 306 data Operand
 307         = OpReg  Reg            -- register
 308         | OpImm  Imm            -- immediate value
 309         | OpAddr AddrMode       -- memory reference
 310
 311
 312
 313 x86_regUsageOfInstr :: Instr -> RegUsage
 314 x86_regUsageOfInstr instr
 315  = case instr of
 316     MOV    _ src dst    -> usageRW src dst
 317     MOVZxL _ src dst    -> usageRW src dst
 318     MOVSxL _ src dst    -> usageRW src dst
 319     LEA    _ src dst    -> usageRW src dst
 320     ADD    _ src dst    -> usageRM src dst
 321     ADC    _ src dst    -> usageRM src dst
 322     SUB    _ src dst    -> usageRM src dst
 323     IMUL   _ src dst    -> usageRM src dst
 324     IMUL2  _ src       -> mkRU (eax:use_R src) [eax,edx]
 325     MUL    _ src dst    -> usageRM src dst
 326     DIV    _ op -> mkRU (eax:edx:use_R op) [eax,edx]
 327     IDIV   _ op -> mkRU (eax:edx:use_R op) [eax,edx]
 328     AND    _ src dst    -> usageRM src dst
 329     OR     _ src dst    -> usageRM src dst
 330
 331     XOR    _ (OpReg src) (OpReg dst)
 332         | src == dst    -> mkRU [] [dst]
 333
 334     XOR    _ src dst    -> usageRM src dst
 335     NOT    _ op         -> usageM op
 336     NEGI   _ op         -> usageM op
 337     SHL    _ imm dst    -> usageRM imm dst
 338     SAR    _ imm dst    -> usageRM imm dst
 339     SHR    _ imm dst    -> usageRM imm dst
 340     BT     _ _   src    -> mkRUR (use_R src)
 341
 342     PUSH   _ op         -> mkRUR (use_R op)
 343     POP    _ op         -> mkRU [] (def_W op)
 344     TEST   _ src dst    -> mkRUR (use_R src ++ use_R dst)
 345     CMP    _ src dst    -> mkRUR (use_R src ++ use_R dst)
 346     SETCC  _ op         -> mkRU [] (def_W op)
 347     JXX    _ _          -> mkRU [] []
 348     JXX_GBL _ _         -> mkRU [] []
 349     JMP     op          -> mkRUR (use_R op)
 350     JMP_TBL op _        -> mkRUR (use_R op)
 351     CALL (Left _)  params   -> mkRU params callClobberedRegs
 352     CALL (Right reg) params -> mkRU (reg:params) callClobberedRegs
 353     CLTD   _            -> mkRU [eax] [edx]
 354     NOP                 -> mkRU [] []
 355
 356 #if i386_TARGET_ARCH
 357     GMOV   src dst      -> mkRU [src] [dst]
 358     GLD    _ src dst    -> mkRU (use_EA src) [dst]
 359     GST    _ src dst    -> mkRUR (src : use_EA dst)
 360
 361     GLDZ   dst          -> mkRU [] [dst]
 362     GLD1   dst          -> mkRU [] [dst]
 363
 364     GFTOI  src dst      -> mkRU [src] [dst]
 365     GDTOI  src dst      -> mkRU [src] [dst]
 366
 367     GITOF  src dst      -> mkRU [src] [dst]
 368     GITOD  src dst      -> mkRU [src] [dst]
 369
 370     GADD   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 371     GSUB   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 372     GMUL   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 373     GDIV   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 374
 375     GCMP   _ src1 src2   -> mkRUR [src1,src2]
 376     GABS   _ src dst     -> mkRU [src] [dst]
 377     GNEG   _ src dst     -> mkRU [src] [dst]
 378     GSQRT  _ src dst     -> mkRU [src] [dst]
 379     GSIN   _ _ _ src dst -> mkRU [src] [dst]
 380     GCOS   _ _ _ src dst -> mkRU [src] [dst]
 381     GTAN   _ _ _ src dst -> mkRU [src] [dst]
 382 #endif
 383
 384 #if x86_64_TARGET_ARCH
 385     CVTSS2SD   src dst  -> mkRU [src] [dst]
 386     CVTSD2SS   src dst  -> mkRU [src] [dst]
 387     CVTTSS2SIQ src dst  -> mkRU (use_R src) [dst]
 388     CVTTSD2SIQ src dst  -> mkRU (use_R src) [dst]
 389     CVTSI2SS   src dst  -> mkRU (use_R src) [dst]
 390     CVTSI2SD   src dst  -> mkRU (use_R src) [dst]
 391     FDIV _     src dst  -> usageRM src dst
 392 #endif
 393
 394     FETCHGOT reg        -> mkRU [] [reg]
 395     FETCHPC  reg        -> mkRU [] [reg]
 396
 397     COMMENT _           -> noUsage
 398     DELTA   _           -> noUsage
 399
 400     _other              -> panic "regUsage: unrecognised instr"
 401
 402  where
 403     -- 2 operand form; first operand Read; second Written
 404     usageRW :: Operand -> Operand -> RegUsage
 405     usageRW op (OpReg reg)      = mkRU (use_R op) [reg]
 406     usageRW op (OpAddr ea)      = mkRUR (use_R op ++ use_EA ea)
 407     usageRW _ _                 = panic "X86.RegInfo.usageRW: no match"
 408
 409     -- 2 operand form; first operand Read; second Modified
 410     usageRM :: Operand -> Operand -> RegUsage
 411     usageRM op (OpReg reg)      = mkRU (use_R op ++ [reg]) [reg]
 412     usageRM op (OpAddr ea)      = mkRUR (use_R op ++ use_EA ea)
 413     usageRM _ _                 = panic "X86.RegInfo.usageRM: no match"
 414
 415     -- 1 operand form; operand Modified
 416     usageM :: Operand -> RegUsage
 417     usageM (OpReg reg)          = mkRU [reg] [reg]
 418     usageM (OpAddr ea)          = mkRUR (use_EA ea)
 419     usageM _                    = panic "X86.RegInfo.usageM: no match"
 420
 421     -- Registers defd when an operand is written.
 422     def_W (OpReg reg)           = [reg]
 423     def_W (OpAddr _ )           = []
 424     def_W _                     = panic "X86.RegInfo.def_W: no match"
 425
 426     -- Registers used when an operand is read.
 427     use_R (OpReg reg)  = [reg]
 428     use_R (OpImm _)    = []
 429     use_R (OpAddr ea)  = use_EA ea
 430
 431     -- Registers used to compute an effective address.
 432     use_EA (ImmAddr _ _) = []
 433     use_EA (AddrBaseIndex base index _) =
 434         use_base base $! use_index index
 435         where use_base (EABaseReg r) x = r : x
 436               use_base _ x             = x
 437               use_index EAIndexNone   = []
 438               use_index (EAIndex i _) = [i]
 439
 440     mkRUR src = src' `seq` RU src' []
 441         where src' = filter interesting src
 442
 443     mkRU src dst = src' `seq` dst' `seq` RU src' dst'
 444         where src' = filter interesting src
 445               dst' = filter interesting dst
 446
 447 interesting :: Reg -> Bool
 448 interesting (RegVirtual _)              = True
 449 interesting (RegReal (RealRegSingle i)) = isFastTrue (freeReg i)
 450 interesting (RegReal (RealRegPair{}))   = panic "X86.interesting: no reg pairs on this arch"
 451
 452
 453
 454 x86_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
 455 x86_patchRegsOfInstr instr env
 456  = case instr of
 457     MOV  sz src dst     -> patch2 (MOV  sz) src dst
 458     MOVZxL sz src dst   -> patch2 (MOVZxL sz) src dst
 459     MOVSxL sz src dst   -> patch2 (MOVSxL sz) src dst
 460     LEA  sz src dst     -> patch2 (LEA  sz) src dst
 461     ADD  sz src dst     -> patch2 (ADD  sz) src dst
 462     ADC  sz src dst     -> patch2 (ADC  sz) src dst
 463     SUB  sz src dst     -> patch2 (SUB  sz) src dst
 464     IMUL sz src dst     -> patch2 (IMUL sz) src dst
 465     IMUL2 sz src        -> patch1 (IMUL2 sz) src
 466     MUL sz src dst      -> patch2 (MUL sz) src dst
 467     IDIV sz op          -> patch1 (IDIV sz) op
 468     DIV sz op           -> patch1 (DIV sz) op
 469     AND  sz src dst     -> patch2 (AND  sz) src dst
 470     OR   sz src dst     -> patch2 (OR   sz) src dst
 471     XOR  sz src dst     -> patch2 (XOR  sz) src dst
 472     NOT  sz op          -> patch1 (NOT  sz) op
 473     NEGI sz op          -> patch1 (NEGI sz) op
 474     SHL  sz imm dst     -> patch1 (SHL sz imm) dst
 475     SAR  sz imm dst     -> patch1 (SAR sz imm) dst
 476     SHR  sz imm dst     -> patch1 (SHR sz imm) dst
 477     BT   sz imm src     -> patch1 (BT  sz imm) src
 478     TEST sz src dst     -> patch2 (TEST sz) src dst
 479     CMP  sz src dst     -> patch2 (CMP  sz) src dst
 480     PUSH sz op          -> patch1 (PUSH sz) op
 481     POP  sz op          -> patch1 (POP  sz) op
 482     SETCC cond op       -> patch1 (SETCC cond) op
 483     JMP op              -> patch1 JMP op
 484     JMP_TBL op ids      -> patch1 JMP_TBL op $ ids
 485
 486 #if i386_TARGET_ARCH
 487     GMOV src dst        -> GMOV (env src) (env dst)
 488     GLD  sz src dst     -> GLD sz (lookupAddr src) (env dst)
 489     GST  sz src dst     -> GST sz (env src) (lookupAddr dst)
 490
 491     GLDZ dst            -> GLDZ (env dst)
 492     GLD1 dst            -> GLD1 (env dst)
 493
 494     GFTOI src dst       -> GFTOI (env src) (env dst)
 495     GDTOI src dst       -> GDTOI (env src) (env dst)
 496
 497     GITOF src dst       -> GITOF (env src) (env dst)
 498     GITOD src dst       -> GITOD (env src) (env dst)
 499
 500     GADD sz s1 s2 dst   -> GADD sz (env s1) (env s2) (env dst)
 501     GSUB sz s1 s2 dst   -> GSUB sz (env s1) (env s2) (env dst)
 502     GMUL sz s1 s2 dst   -> GMUL sz (env s1) (env s2) (env dst)
 503     GDIV sz s1 s2 dst   -> GDIV sz (env s1) (env s2) (env dst)
 504
 505     GCMP sz src1 src2   -> GCMP sz (env src1) (env src2)
 506     GABS sz src dst     -> GABS sz (env src) (env dst)
 507     GNEG sz src dst     -> GNEG sz (env src) (env dst)
 508     GSQRT sz src dst    -> GSQRT sz (env src) (env dst)
 509     GSIN sz l1 l2 src dst       -> GSIN sz l1 l2 (env src) (env dst)
 510     GCOS sz l1 l2 src dst       -> GCOS sz l1 l2 (env src) (env dst)
 511     GTAN sz l1 l2 src dst       -> GTAN sz l1 l2 (env src) (env dst)
 512 #endif
 513
 514 #if x86_64_TARGET_ARCH
 515     CVTSS2SD src dst    -> CVTSS2SD (env src) (env dst)
 516     CVTSD2SS src dst    -> CVTSD2SS (env src) (env dst)
 517     CVTTSS2SIQ src dst  -> CVTTSS2SIQ (patchOp src) (env dst)
 518     CVTTSD2SIQ src dst  -> CVTTSD2SIQ (patchOp src) (env dst)
 519     CVTSI2SS src dst    -> CVTSI2SS (patchOp src) (env dst)
 520     CVTSI2SD src dst    -> CVTSI2SD (patchOp src) (env dst)
 521     FDIV sz src dst     -> FDIV sz (patchOp src) (patchOp dst)
 522 #endif
 523
 524     CALL (Left _)  _    -> instr
 525     CALL (Right reg) p  -> CALL (Right (env reg)) p
 526
 527     FETCHGOT reg        -> FETCHGOT (env reg)
 528     FETCHPC  reg        -> FETCHPC  (env reg)
 529
 530     NOP                 -> instr
 531     COMMENT _           -> instr
 532     DELTA _             -> instr
 533
 534     JXX _ _             -> instr
 535     JXX_GBL _ _         -> instr
 536     CLTD _              -> instr
 537
 538     _other              -> panic "patchRegs: unrecognised instr"
 539
 540   where
 541     patch1 insn op      = insn $! patchOp op
 542     patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
 543
 544     patchOp (OpReg  reg) = OpReg $! env reg
 545     patchOp (OpImm  imm) = OpImm imm
 546     patchOp (OpAddr ea)  = OpAddr $! lookupAddr ea
 547
 548     lookupAddr (ImmAddr imm off) = ImmAddr imm off
 549     lookupAddr (AddrBaseIndex base index disp)
 550       = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
 551       where
 552         lookupBase EABaseNone       = EABaseNone
 553         lookupBase EABaseRip        = EABaseRip
 554         lookupBase (EABaseReg r)    = EABaseReg (env r)
 555
 556         lookupIndex EAIndexNone     = EAIndexNone
 557         lookupIndex (EAIndex r i)   = EAIndex (env r) i
 558
 559
 560 --------------------------------------------------------------------------------
 561 x86_isJumpishInstr
 562         :: Instr -> Bool
 563
 564 x86_isJumpishInstr instr
 565  = case instr of
 566         JMP{}           -> True
 567         JXX{}           -> True
 568         JXX_GBL{}       -> True
 569         JMP_TBL{}       -> True
 570         CALL{}          -> True
 571         _               -> False
 572
 573
 574 x86_jumpDestsOfInstr
 575         :: Instr
 576         -> [BlockId]
 577
 578 x86_jumpDestsOfInstr insn
 579   = case insn of
 580         JXX _ id        -> [id]
 581         JMP_TBL _ ids   -> ids
 582         _               -> []
 583
 584
 585 x86_patchJumpInstr
 586         :: Instr -> (BlockId -> BlockId) -> Instr
 587
 588 x86_patchJumpInstr insn patchF
 589   = case insn of
 590         JXX cc id       -> JXX cc (patchF id)
 591         JMP_TBL _ _     -> error "Cannot patch JMP_TBL"
 592         _               -> insn
 593
 594
 595
 596
 597 -- -----------------------------------------------------------------------------
 598 -- | Make a spill instruction.
 599 x86_mkSpillInstr
 600         :: Reg          -- register to spill
 601         -> Int          -- current stack delta
 602         -> Int          -- spill slot to use
 603         -> Instr
 604
 605 #if   i386_TARGET_ARCH
 606 x86_mkSpillInstr reg delta slot
 607   = let off     = spillSlotToOffset slot
 608     in
 609     let off_w = (off-delta) `div` 4
 610     in case targetClassOfReg reg of
 611            RcInteger -> MOV II32 (OpReg reg) (OpAddr (spRel off_w))
 612            _         -> GST FF80 reg (spRel off_w) {- RcFloat/RcDouble -}
 613
 614 #elif x86_64_TARGET_ARCH
 615 x86_mkSpillInstr reg delta slot
 616   = let off     = spillSlotToOffset slot
 617     in
 618     let off_w = (off-delta) `div` 8
 619     in case targetClassOfReg reg of
 620            RcInteger -> MOV II64 (OpReg reg) (OpAddr (spRel off_w))
 621            RcDouble  -> MOV FF64 (OpReg reg) (OpAddr (spRel off_w))
 622            _         -> panic "X86.mkSpillInstr: no match"
 623                 -- ToDo: will it work to always spill as a double?
 624                 -- does that cause a stall if the data was a float?
 625 #else
 626 x86_mkSpillInstr _ _ _
 627     =   panic "X86.RegInfo.mkSpillInstr: not defined for this architecture."
 628 #endif
 629
 630
 631 -- | Make a spill reload instruction.
 632 x86_mkLoadInstr
 633         :: Reg          -- register to load
 634         -> Int          -- current stack delta
 635         -> Int          -- spill slot to use
 636         -> Instr
 637
 638 #if   i386_TARGET_ARCH
 639 x86_mkLoadInstr reg delta slot
 640   = let off     = spillSlotToOffset slot
 641     in
 642         let off_w = (off-delta) `div` 4
 643         in case targetClassOfReg reg of {
 644               RcInteger -> MOV II32 (OpAddr (spRel off_w)) (OpReg reg);
 645               _         -> GLD FF80 (spRel off_w) reg} {- RcFloat/RcDouble -}
 646 #elif x86_64_TARGET_ARCH
 647 x86_mkLoadInstr reg delta slot
 648   = let off     = spillSlotToOffset slot
 649     in
 650         let off_w = (off-delta) `div` 8
 651         in case targetClassOfReg reg of
 652               RcInteger -> MOV II64 (OpAddr (spRel off_w)) (OpReg reg)
 653               _         -> MOV FF64 (OpAddr (spRel off_w)) (OpReg reg)
 654 #else
 655 x86_mkLoadInstr _ _ _
 656         = panic "X86.RegInfo.mkLoadInstr: not defined for this architecture."
 657 #endif
 658
 659 spillSlotSize :: Int
 660 spillSlotSize = IF_ARCH_i386(12, 8)
 661
 662 maxSpillSlots :: Int
 663 maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1
 664
 665 -- convert a spill slot number to a *byte* offset, with no sign:
 666 -- decide on a per arch basis whether you are spilling above or below
 667 -- the C stack pointer.
 668 spillSlotToOffset :: Int -> Int
 669 spillSlotToOffset slot
 670    | slot >= 0 && slot < maxSpillSlots
 671    = 64 + spillSlotSize * slot
 672    | otherwise
 673    = pprPanic "spillSlotToOffset:"
 674               (   text "invalid spill location: " <> int slot
 675               $$  text "maxSpillSlots:          " <> int maxSpillSlots)
 676
 677 --------------------------------------------------------------------------------
 678
 679 -- | See if this instruction is telling us the current C stack delta
 680 x86_takeDeltaInstr
 681         :: Instr
 682         -> Maybe Int
 683
 684 x86_takeDeltaInstr instr
 685  = case instr of
 686         DELTA i         -> Just i
 687         _               -> Nothing
 688
 689
 690 x86_isMetaInstr
 691         :: Instr
 692         -> Bool
 693
 694 x86_isMetaInstr instr
 695  = case instr of
 696         COMMENT{}       -> True
 697         LDATA{}         -> True
 698         NEWBLOCK{}      -> True
 699         DELTA{}         -> True
 700         _               -> False
 701
 702
 703
 704 -- | Make a reg-reg move instruction.
 705 --      On SPARC v8 there are no instructions to move directly between
 706 --      floating point and integer regs. If we need to do that then we
 707 --      have to go via memory.
 708 --
 709 x86_mkRegRegMoveInstr
 710         :: Reg
 711         -> Reg
 712         -> Instr
 713
 714 x86_mkRegRegMoveInstr src dst
 715  = case targetClassOfReg src of
 716 #if   i386_TARGET_ARCH
 717         RcInteger -> MOV II32 (OpReg src) (OpReg dst)
 718         RcDouble  -> GMOV src dst
 719         RcFloat   -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
 720 #else
 721         RcInteger -> MOV II64 (OpReg src) (OpReg dst)
 722         RcDouble  -> MOV FF64 (OpReg src) (OpReg dst)
 723         RcFloat   -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
 724 #endif
 725
 726
 727 -- | Check whether an instruction represents a reg-reg move.
 728 --      The register allocator attempts to eliminate reg->reg moves whenever it can,
 729 --      by assigning the src and dest temporaries to the same real register.
 730 --
 731 x86_takeRegRegMoveInstr
 732         :: Instr
 733         -> Maybe (Reg,Reg)
 734
 735 x86_takeRegRegMoveInstr (MOV _ (OpReg r1) (OpReg r2))
 736         = Just (r1,r2)
 737
 738 x86_takeRegRegMoveInstr _  = Nothing
 739
 740
 741 -- | Make an unconditional branch instruction.
 742 x86_mkJumpInstr
 743         :: BlockId
 744         -> [Instr]
 745
 746 x86_mkJumpInstr id
 747         = [JXX ALWAYS id]
 748
 749
 750
 751
 752
 753 i386_insert_ffrees
 754         :: [GenBasicBlock Instr]
 755         -> [GenBasicBlock Instr]
 756
 757 i386_insert_ffrees blocks
 758    | or (map (any is_G_instr) [ instrs | BasicBlock _ instrs <- blocks ])
 759    = map ffree_before_nonlocal_transfers blocks
 760
 761    | otherwise
 762    = blocks
 763   where
 764    ffree_before_nonlocal_transfers (BasicBlock id insns)
 765      = BasicBlock id (foldr p [] insns)
 766      where p insn r = case insn of
 767                         CALL _ _ -> GFREE : insn : r
 768                         JMP _    -> GFREE : insn : r
 769                         _        -> insn : r
 770
 771 -- if you ever add a new FP insn to the fake x86 FP insn set,
 772 -- you must update this too
 773 is_G_instr :: Instr -> Bool
 774 is_G_instr instr
 775    = case instr of
 776         GMOV{}          -> True
 777         GLD{}           -> True
 778         GST{}           -> True
 779         GLDZ{}          -> True
 780         GLD1{}          -> True
 781         GFTOI{}         -> True
 782         GDTOI{}         -> True
 783         GITOF{}         -> True
 784         GITOD{}         -> True
 785         GADD{}          -> True
 786         GDIV{}          -> True
 787         GSUB{}          -> True
 788         GMUL{}          -> True
 789         GCMP{}          -> True
 790         GABS{}          -> True
 791         GNEG{}          -> True
 792         GSQRT{}         -> True
 793         GSIN{}          -> True
 794         GCOS{}          -> True
 795         GTAN{}          -> True
 796         GFREE           -> panic "is_G_instr: GFREE (!)"
 797         _               -> False
 798
 799
 800 data JumpDest = DestBlockId BlockId | DestImm Imm
 801
 802
 803 canShortcut :: Instr -> Maybe JumpDest
 804 canShortcut (JXX ALWAYS id)    = Just (DestBlockId id)
 805 canShortcut (JMP (OpImm imm))  = Just (DestImm imm)
 806 canShortcut _                  = Nothing
 807
 808
 809 -- This helper shortcuts a sequence of branches.
 810 -- The blockset helps avoid following cycles.
 811 shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
 812 shortcutJump fn insn = shortcutJump' fn emptyBlockSet insn
 813   where shortcutJump' fn seen insn@(JXX cc id) =
 814           if elemBlockSet id seen then insn
 815           else case fn id of
 816                  Nothing                -> insn
 817                  Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id')
 818                  Just (DestImm imm)     -> shortcutJump' fn seen' (JXX_GBL cc imm)
 819                where seen' = extendBlockSet seen id
 820         shortcutJump' _ _ other = other
 821
 822 -- Here because it knows about JumpDest
 823 shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
 824 shortcutStatic fn (CmmStaticLit (CmmLabel lab))
 825   | Just uq <- maybeAsmTemp lab
 826   = CmmStaticLit (CmmLabel (shortBlockId fn emptyUniqSet (BlockId uq)))
 827 shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
 828   | Just uq <- maybeAsmTemp lbl1
 829   = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn emptyUniqSet (BlockId uq)) lbl2 off)
 830         -- slightly dodgy, we're ignoring the second label, but this
 831         -- works with the way we use CmmLabelDiffOff for jump tables now.
 832
 833 shortcutStatic _ other_static
 834         = other_static
 835
 836 shortBlockId
 837         :: (BlockId -> Maybe JumpDest)
 838         -> UniqSet Unique
 839         -> BlockId
 840         -> CLabel
 841
 842 shortBlockId fn seen blockid@(BlockId uq) =
 843   case (elementOfUniqSet uq seen, fn blockid) of
 844     (True, _)    -> mkAsmTempLabel uq
 845     (_, Nothing) -> mkAsmTempLabel uq
 846     (_, Just (DestBlockId blockid'))  -> shortBlockId fn (addOneToUniqSet seen uq) blockid'
 847     (_, Just (DestImm (ImmCLbl lbl))) -> lbl
 848     (_, _other) -> panic "shortBlockId"