compiler/nativeGen/X86/Instr.hs

   1 -----------------------------------------------------------------------------
   2 --
   3 -- Machine-dependent assembly language
   4 --
   5 -- (c) The University of Glasgow 1993-2004
   6 --
   7 -----------------------------------------------------------------------------
   8
   9 #include "HsVersions.h"
  10 #include "nativeGen/NCG.h"
  11
  12 module X86.Instr
  13 where
  14
  15 import X86.Cond
  16 import X86.Regs
  17 import Instruction
  18 import Size
  19 import RegClass
  20 import Reg
  21 import TargetReg
  22
  23 import BlockId
  24 import Cmm
  25 import FastString
  26 import FastBool
  27 import Outputable
  28 import Constants        (rESERVED_C_STACK_BYTES)
  29
  30 import CLabel
  31 import Panic
  32
  33 -- Size of a PPC memory address, in bytes.
  34 --
  35 archWordSize :: Size
  36 #if i386_TARGET_ARCH
  37 archWordSize    = II32
  38 #elif x86_64_TARGET_ARCH
  39 archWordSize    = II64
  40 #else
  41 archWordSize    = panic "X86.Instr.archWordSize: not defined"
  42 #endif
  43
  44 -- | Instruction instance for x86 instruction set.
  45 instance Instruction Instr where
  46         regUsageOfInstr         = x86_regUsageOfInstr
  47         patchRegsOfInstr        = x86_patchRegsOfInstr
  48         isJumpishInstr          = x86_isJumpishInstr
  49         jumpDestsOfInstr        = x86_jumpDestsOfInstr
  50         patchJumpInstr          = x86_patchJumpInstr
  51         mkSpillInstr            = x86_mkSpillInstr
  52         mkLoadInstr             = x86_mkLoadInstr
  53         takeDeltaInstr          = x86_takeDeltaInstr
  54         isMetaInstr             = x86_isMetaInstr
  55         mkRegRegMoveInstr       = x86_mkRegRegMoveInstr
  56         takeRegRegMoveInstr     = x86_takeRegRegMoveInstr
  57         mkJumpInstr             = x86_mkJumpInstr
  58
  59
  60 -- -----------------------------------------------------------------------------
  61 -- Intel x86 instructions
  62
  63 {-
  64 Intel, in their infinite wisdom, selected a stack model for floating
  65 point registers on x86.  That might have made sense back in 1979 --
  66 nowadays we can see it for the nonsense it really is.  A stack model
  67 fits poorly with the existing nativeGen infrastructure, which assumes
  68 flat integer and FP register sets.  Prior to this commit, nativeGen
  69 could not generate correct x86 FP code -- to do so would have meant
  70 somehow working the register-stack paradigm into the register
  71 allocator and spiller, which sounds very difficult.
  72
  73 We have decided to cheat, and go for a simple fix which requires no
  74 infrastructure modifications, at the expense of generating ropey but
  75 correct FP code.  All notions of the x86 FP stack and its insns have
  76 been removed.  Instead, we pretend (to the instruction selector and
  77 register allocator) that x86 has six floating point registers, %fake0
  78 .. %fake5, which can be used in the usual flat manner.  We further
  79 claim that x86 has floating point instructions very similar to SPARC
  80 and Alpha, that is, a simple 3-operand register-register arrangement.
  81 Code generation and register allocation proceed on this basis.
  82
  83 When we come to print out the final assembly, our convenient fiction
  84 is converted to dismal reality.  Each fake instruction is
  85 independently converted to a series of real x86 instructions.
  86 %fake0 .. %fake5 are mapped to %st(0) .. %st(5).  To do reg-reg
  87 arithmetic operations, the two operands are pushed onto the top of the
  88 FP stack, the operation done, and the result copied back into the
  89 relevant register.  There are only six %fake registers because 2 are
  90 needed for the translation, and x86 has 8 in total.
  91
  92 The translation is inefficient but is simple and it works.  A cleverer
  93 translation would handle a sequence of insns, simulating the FP stack
  94 contents, would not impose a fixed mapping from %fake to %st regs, and
  95 hopefully could avoid most of the redundant reg-reg moves of the
  96 current translation.
  97
  98 We might as well make use of whatever unique FP facilities Intel have
  99 chosen to bless us with (let's not be churlish, after all).
 100 Hence GLDZ and GLD1.  Bwahahahahahahaha!
 101 -}
 102
 103 {-
 104 MORE FLOATING POINT MUSINGS...
 105
 106 Intel's internal floating point registers are by default 80 bit
 107 extended precision.  This means that all operations done on values in
 108 registers are done at 80 bits, and unless the intermediate values are
 109 truncated to the appropriate size (32 or 64 bits) by storing in
 110 memory, calculations in registers will give different results from
 111 calculations which pass intermediate values in memory (eg. via
 112 function calls).
 113
 114 One solution is to set the FPU into 64 bit precision mode.  Some OSs
 115 do this (eg. FreeBSD) and some don't (eg. Linux).  The problem here is
 116 that this will only affect 64-bit precision arithmetic; 32-bit
 117 calculations will still be done at 64-bit precision in registers.  So
 118 it doesn't solve the whole problem.
 119
 120 There's also the issue of what the C library is expecting in terms of
 121 precision.  It seems to be the case that glibc on Linux expects the
 122 FPU to be set to 80 bit precision, so setting it to 64 bit could have
 123 unexpected effects.  Changing the default could have undesirable
 124 effects on other 3rd-party library code too, so the right thing would
 125 be to save/restore the FPU control word across Haskell code if we were
 126 to do this.
 127
 128 gcc's -ffloat-store gives consistent results by always storing the
 129 results of floating-point calculations in memory, which works for both
 130 32 and 64-bit precision.  However, it only affects the values of
 131 user-declared floating point variables in C, not intermediate results.
 132 GHC in -fvia-C mode uses -ffloat-store (see the -fexcess-precision
 133 flag).
 134
 135 Another problem is how to spill floating point registers in the
 136 register allocator.  Should we spill the whole 80 bits, or just 64?
 137 On an OS which is set to 64 bit precision, spilling 64 is fine.  On
 138 Linux, spilling 64 bits will round the results of some operations.
 139 This is what gcc does.  Spilling at 80 bits requires taking up a full
 140 128 bit slot (so we get alignment).  We spill at 80-bits and ignore
 141 the alignment problems.
 142
 143 In the future, we'll use the SSE registers for floating point.  This
 144 requires a CPU that supports SSE2 (ordinary SSE only supports 32 bit
 145 precision float ops), which means P4 or Xeon and above.  Using SSE
 146 will solve all these problems, because the SSE registers use fixed 32
 147 bit or 64 bit precision.
 148
 149 --SDM 1/2003
 150 -}
 151
 152
 153 data Instr
 154         -- comment pseudo-op
 155         = COMMENT FastString
 156
 157         -- some static data spat out during code
 158         -- generation.  Will be extracted before
 159         -- pretty-printing.
 160         | LDATA   Section [CmmStatic]
 161
 162         -- start a new basic block.  Useful during
 163         -- codegen, removed later.  Preceding
 164         -- instruction should be a jump, as per the
 165         -- invariants for a BasicBlock (see Cmm).
 166         | NEWBLOCK BlockId
 167
 168         -- specify current stack offset for
 169         -- benefit of subsequent passes
 170         | DELTA   Int
 171
 172         -- Moves.
 173         | MOV         Size Operand Operand
 174         | MOVZxL      Size Operand Operand -- size is the size of operand 1
 175         | MOVSxL      Size Operand Operand -- size is the size of operand 1
 176         -- x86_64 note: plain mov into a 32-bit register always zero-extends
 177         -- into the 64-bit reg, in contrast to the 8 and 16-bit movs which
 178         -- don't affect the high bits of the register.
 179
 180         -- Load effective address (also a very useful three-operand add instruction :-)
 181         | LEA         Size Operand Operand
 182
 183         -- Int Arithmetic.
 184         | ADD         Size Operand Operand
 185         | ADC         Size Operand Operand
 186         | SUB         Size Operand Operand
 187
 188         | MUL         Size Operand Operand
 189         | IMUL        Size Operand Operand      -- signed int mul
 190         | IMUL2       Size Operand              -- %edx:%eax = operand * %eax
 191
 192         | DIV         Size Operand              -- eax := eax:edx/op, edx := eax:edx%op
 193         | IDIV        Size Operand              -- ditto, but signed
 194
 195         -- Simple bit-twiddling.
 196         | AND         Size Operand Operand
 197         | OR          Size Operand Operand
 198         | XOR         Size Operand Operand
 199         | NOT         Size Operand
 200         | NEGI        Size Operand              -- NEG instruction (name clash with Cond)
 201
 202         -- Shifts (amount may be immediate or %cl only)
 203         | SHL         Size Operand{-amount-} Operand
 204         | SAR         Size Operand{-amount-} Operand
 205         | SHR         Size Operand{-amount-} Operand
 206
 207         | BT          Size Imm Operand
 208         | NOP
 209
 210         -- x86 Float Arithmetic.
 211         -- Note that we cheat by treating G{ABS,MOV,NEG} of doubles
 212         -- as single instructions right up until we spit them out.
 213         -- all the 3-operand fake fp insns are src1 src2 dst
 214         -- and furthermore are constrained to be fp regs only.
 215         -- IMPORTANT: keep is_G_insn up to date with any changes here
 216         | GMOV        Reg Reg -- src(fpreg), dst(fpreg)
 217         | GLD         Size AddrMode Reg -- src, dst(fpreg)
 218         | GST         Size Reg AddrMode -- src(fpreg), dst
 219
 220         | GLDZ        Reg -- dst(fpreg)
 221         | GLD1        Reg -- dst(fpreg)
 222
 223         | GFTOI       Reg Reg -- src(fpreg), dst(intreg)
 224         | GDTOI       Reg Reg -- src(fpreg), dst(intreg)
 225
 226         | GITOF       Reg Reg -- src(intreg), dst(fpreg)
 227         | GITOD       Reg Reg -- src(intreg), dst(fpreg)
 228
 229         | GADD        Size Reg Reg Reg -- src1, src2, dst
 230         | GDIV        Size Reg Reg Reg -- src1, src2, dst
 231         | GSUB        Size Reg Reg Reg -- src1, src2, dst
 232         | GMUL        Size Reg Reg Reg -- src1, src2, dst
 233
 234                 -- FP compare.  Cond must be `elem` [EQQ, NE, LE, LTT, GE, GTT]
 235                 -- Compare src1 with src2; set the Zero flag iff the numbers are
 236                 -- comparable and the comparison is True.  Subsequent code must
 237                 -- test the %eflags zero flag regardless of the supplied Cond.
 238         | GCMP        Cond Reg Reg -- src1, src2
 239
 240         | GABS        Size Reg Reg -- src, dst
 241         | GNEG        Size Reg Reg -- src, dst
 242         | GSQRT       Size Reg Reg -- src, dst
 243         | GSIN        Size CLabel CLabel Reg Reg -- src, dst
 244         | GCOS        Size CLabel CLabel Reg Reg -- src, dst
 245         | GTAN        Size CLabel CLabel Reg Reg -- src, dst
 246
 247         | GFREE         -- do ffree on all x86 regs; an ugly hack
 248
 249
 250         -- SSE2 floating point: we use a restricted set of the available SSE2
 251         -- instructions for floating-point.
 252         -- use MOV for moving (either movss or movsd (movlpd better?))
 253         | CVTSS2SD      Reg Reg         -- F32 to F64
 254         | CVTSD2SS      Reg Reg         -- F64 to F32
 255         | CVTTSS2SIQ    Operand Reg     -- F32 to I32/I64 (with truncation)
 256         | CVTTSD2SIQ    Operand Reg     -- F64 to I32/I64 (with truncation)
 257         | CVTSI2SS      Operand Reg     -- I32/I64 to F32
 258         | CVTSI2SD      Operand Reg     -- I32/I64 to F64
 259
 260         -- use ADD & SUB for arithmetic.  In both cases, operands
 261         -- are  Operand Reg.
 262
 263         -- SSE2 floating-point division:
 264         | FDIV          Size Operand Operand   -- divisor, dividend(dst)
 265
 266         -- use CMP for comparisons.  ucomiss and ucomisd instructions
 267         -- compare single/double prec floating point respectively.
 268
 269         | SQRT          Size Operand Reg        -- src, dst
 270
 271
 272         -- Comparison
 273         | TEST          Size Operand Operand
 274         | CMP           Size Operand Operand
 275         | SETCC         Cond Operand
 276
 277         -- Stack Operations.
 278         | PUSH          Size Operand
 279         | POP           Size Operand
 280         -- both unused (SDM):
 281         --  | PUSHA
 282         --  | POPA
 283
 284         -- Jumping around.
 285         | JMP         Operand
 286         | JXX         Cond BlockId  -- includes unconditional branches
 287         | JXX_GBL     Cond Imm      -- non-local version of JXX
 288         | JMP_TBL     Operand [BlockId]  -- table jump
 289         | CALL        (Either Imm Reg) [Reg]
 290
 291         -- Other things.
 292         | CLTD Size              -- sign extend %eax into %edx:%eax
 293
 294         | FETCHGOT    Reg        -- pseudo-insn for ELF position-independent code
 295                                  -- pretty-prints as
 296                                  --       call 1f
 297                                  -- 1:    popl %reg
 298                                  --       addl __GLOBAL_OFFSET_TABLE__+.-1b, %reg
 299         | FETCHPC     Reg        -- pseudo-insn for Darwin position-independent code
 300                                  -- pretty-prints as
 301                                  --       call 1f
 302                                  -- 1:    popl %reg
 303
 304
 305 data Operand
 306         = OpReg  Reg            -- register
 307         | OpImm  Imm            -- immediate value
 308         | OpAddr AddrMode       -- memory reference
 309
 310
 311
 312 x86_regUsageOfInstr :: Instr -> RegUsage
 313 x86_regUsageOfInstr instr
 314  = case instr of
 315     MOV    _ src dst    -> usageRW src dst
 316     MOVZxL _ src dst    -> usageRW src dst
 317     MOVSxL _ src dst    -> usageRW src dst
 318     LEA    _ src dst    -> usageRW src dst
 319     ADD    _ src dst    -> usageRM src dst
 320     ADC    _ src dst    -> usageRM src dst
 321     SUB    _ src dst    -> usageRM src dst
 322     IMUL   _ src dst    -> usageRM src dst
 323     IMUL2  _ src       -> mkRU (eax:use_R src) [eax,edx]
 324     MUL    _ src dst    -> usageRM src dst
 325     DIV    _ op -> mkRU (eax:edx:use_R op) [eax,edx]
 326     IDIV   _ op -> mkRU (eax:edx:use_R op) [eax,edx]
 327     AND    _ src dst    -> usageRM src dst
 328     OR     _ src dst    -> usageRM src dst
 329
 330     XOR    _ (OpReg src) (OpReg dst)
 331         | src == dst    -> mkRU [] [dst]
 332
 333     XOR    _ src dst    -> usageRM src dst
 334     NOT    _ op         -> usageM op
 335     NEGI   _ op         -> usageM op
 336     SHL    _ imm dst    -> usageRM imm dst
 337     SAR    _ imm dst    -> usageRM imm dst
 338     SHR    _ imm dst    -> usageRM imm dst
 339     BT     _ _   src    -> mkRUR (use_R src)
 340
 341     PUSH   _ op         -> mkRUR (use_R op)
 342     POP    _ op         -> mkRU [] (def_W op)
 343     TEST   _ src dst    -> mkRUR (use_R src ++ use_R dst)
 344     CMP    _ src dst    -> mkRUR (use_R src ++ use_R dst)
 345     SETCC  _ op         -> mkRU [] (def_W op)
 346     JXX    _ _          -> mkRU [] []
 347     JXX_GBL _ _         -> mkRU [] []
 348     JMP     op          -> mkRUR (use_R op)
 349     JMP_TBL op _        -> mkRUR (use_R op)
 350     CALL (Left _)  params   -> mkRU params callClobberedRegs
 351     CALL (Right reg) params -> mkRU (reg:params) callClobberedRegs
 352     CLTD   _            -> mkRU [eax] [edx]
 353     NOP                 -> mkRU [] []
 354
 355 #if i386_TARGET_ARCH
 356     GMOV   src dst      -> mkRU [src] [dst]
 357     GLD    _ src dst    -> mkRU (use_EA src) [dst]
 358     GST    _ src dst    -> mkRUR (src : use_EA dst)
 359
 360     GLDZ   dst          -> mkRU [] [dst]
 361     GLD1   dst          -> mkRU [] [dst]
 362
 363     GFTOI  src dst      -> mkRU [src] [dst]
 364     GDTOI  src dst      -> mkRU [src] [dst]
 365
 366     GITOF  src dst      -> mkRU [src] [dst]
 367     GITOD  src dst      -> mkRU [src] [dst]
 368
 369     GADD   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 370     GSUB   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 371     GMUL   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 372     GDIV   _ s1 s2 dst  -> mkRU [s1,s2] [dst]
 373
 374     GCMP   _ src1 src2   -> mkRUR [src1,src2]
 375     GABS   _ src dst     -> mkRU [src] [dst]
 376     GNEG   _ src dst     -> mkRU [src] [dst]
 377     GSQRT  _ src dst     -> mkRU [src] [dst]
 378     GSIN   _ _ _ src dst -> mkRU [src] [dst]
 379     GCOS   _ _ _ src dst -> mkRU [src] [dst]
 380     GTAN   _ _ _ src dst -> mkRU [src] [dst]
 381 #endif
 382
 383 #if x86_64_TARGET_ARCH
 384     CVTSS2SD   src dst  -> mkRU [src] [dst]
 385     CVTSD2SS   src dst  -> mkRU [src] [dst]
 386     CVTTSS2SIQ src dst  -> mkRU (use_R src) [dst]
 387     CVTTSD2SIQ src dst  -> mkRU (use_R src) [dst]
 388     CVTSI2SS   src dst  -> mkRU (use_R src) [dst]
 389     CVTSI2SD   src dst  -> mkRU (use_R src) [dst]
 390     FDIV _     src dst  -> usageRM src dst
 391 #endif
 392
 393     FETCHGOT reg        -> mkRU [] [reg]
 394     FETCHPC  reg        -> mkRU [] [reg]
 395
 396     COMMENT _           -> noUsage
 397     DELTA   _           -> noUsage
 398
 399     _other              -> panic "regUsage: unrecognised instr"
 400
 401  where
 402     -- 2 operand form; first operand Read; second Written
 403     usageRW :: Operand -> Operand -> RegUsage
 404     usageRW op (OpReg reg)      = mkRU (use_R op) [reg]
 405     usageRW op (OpAddr ea)      = mkRUR (use_R op ++ use_EA ea)
 406     usageRW _ _                 = panic "X86.RegInfo.usageRW: no match"
 407
 408     -- 2 operand form; first operand Read; second Modified
 409     usageRM :: Operand -> Operand -> RegUsage
 410     usageRM op (OpReg reg)      = mkRU (use_R op ++ [reg]) [reg]
 411     usageRM op (OpAddr ea)      = mkRUR (use_R op ++ use_EA ea)
 412     usageRM _ _                 = panic "X86.RegInfo.usageRM: no match"
 413
 414     -- 1 operand form; operand Modified
 415     usageM :: Operand -> RegUsage
 416     usageM (OpReg reg)          = mkRU [reg] [reg]
 417     usageM (OpAddr ea)          = mkRUR (use_EA ea)
 418     usageM _                    = panic "X86.RegInfo.usageM: no match"
 419
 420     -- Registers defd when an operand is written.
 421     def_W (OpReg reg)           = [reg]
 422     def_W (OpAddr _ )           = []
 423     def_W _                     = panic "X86.RegInfo.def_W: no match"
 424
 425     -- Registers used when an operand is read.
 426     use_R (OpReg reg)  = [reg]
 427     use_R (OpImm _)    = []
 428     use_R (OpAddr ea)  = use_EA ea
 429
 430     -- Registers used to compute an effective address.
 431     use_EA (ImmAddr _ _) = []
 432     use_EA (AddrBaseIndex base index _) =
 433         use_base base $! use_index index
 434         where use_base (EABaseReg r) x = r : x
 435               use_base _ x             = x
 436               use_index EAIndexNone   = []
 437               use_index (EAIndex i _) = [i]
 438
 439     mkRUR src = src' `seq` RU src' []
 440         where src' = filter interesting src
 441
 442     mkRU src dst = src' `seq` dst' `seq` RU src' dst'
 443         where src' = filter interesting src
 444               dst' = filter interesting dst
 445
 446 interesting :: Reg -> Bool
 447 interesting (RegVirtual _)              = True
 448 interesting (RegReal (RealRegSingle i)) = isFastTrue (freeReg i)
 449 interesting (RegReal (RealRegPair{}))   = panic "X86.interesting: no reg pairs on this arch"
 450
 451
 452
 453 x86_patchRegsOfInstr :: Instr -> (Reg -> Reg) -> Instr
 454 x86_patchRegsOfInstr instr env
 455  = case instr of
 456     MOV  sz src dst     -> patch2 (MOV  sz) src dst
 457     MOVZxL sz src dst   -> patch2 (MOVZxL sz) src dst
 458     MOVSxL sz src dst   -> patch2 (MOVSxL sz) src dst
 459     LEA  sz src dst     -> patch2 (LEA  sz) src dst
 460     ADD  sz src dst     -> patch2 (ADD  sz) src dst
 461     ADC  sz src dst     -> patch2 (ADC  sz) src dst
 462     SUB  sz src dst     -> patch2 (SUB  sz) src dst
 463     IMUL sz src dst     -> patch2 (IMUL sz) src dst
 464     IMUL2 sz src        -> patch1 (IMUL2 sz) src
 465     MUL sz src dst      -> patch2 (MUL sz) src dst
 466     IDIV sz op          -> patch1 (IDIV sz) op
 467     DIV sz op           -> patch1 (DIV sz) op
 468     AND  sz src dst     -> patch2 (AND  sz) src dst
 469     OR   sz src dst     -> patch2 (OR   sz) src dst
 470     XOR  sz src dst     -> patch2 (XOR  sz) src dst
 471     NOT  sz op          -> patch1 (NOT  sz) op
 472     NEGI sz op          -> patch1 (NEGI sz) op
 473     SHL  sz imm dst     -> patch1 (SHL sz imm) dst
 474     SAR  sz imm dst     -> patch1 (SAR sz imm) dst
 475     SHR  sz imm dst     -> patch1 (SHR sz imm) dst
 476     BT   sz imm src     -> patch1 (BT  sz imm) src
 477     TEST sz src dst     -> patch2 (TEST sz) src dst
 478     CMP  sz src dst     -> patch2 (CMP  sz) src dst
 479     PUSH sz op          -> patch1 (PUSH sz) op
 480     POP  sz op          -> patch1 (POP  sz) op
 481     SETCC cond op       -> patch1 (SETCC cond) op
 482     JMP op              -> patch1 JMP op
 483     JMP_TBL op ids      -> patch1 JMP_TBL op $ ids
 484
 485 #if i386_TARGET_ARCH
 486     GMOV src dst        -> GMOV (env src) (env dst)
 487     GLD  sz src dst     -> GLD sz (lookupAddr src) (env dst)
 488     GST  sz src dst     -> GST sz (env src) (lookupAddr dst)
 489
 490     GLDZ dst            -> GLDZ (env dst)
 491     GLD1 dst            -> GLD1 (env dst)
 492
 493     GFTOI src dst       -> GFTOI (env src) (env dst)
 494     GDTOI src dst       -> GDTOI (env src) (env dst)
 495
 496     GITOF src dst       -> GITOF (env src) (env dst)
 497     GITOD src dst       -> GITOD (env src) (env dst)
 498
 499     GADD sz s1 s2 dst   -> GADD sz (env s1) (env s2) (env dst)
 500     GSUB sz s1 s2 dst   -> GSUB sz (env s1) (env s2) (env dst)
 501     GMUL sz s1 s2 dst   -> GMUL sz (env s1) (env s2) (env dst)
 502     GDIV sz s1 s2 dst   -> GDIV sz (env s1) (env s2) (env dst)
 503
 504     GCMP sz src1 src2   -> GCMP sz (env src1) (env src2)
 505     GABS sz src dst     -> GABS sz (env src) (env dst)
 506     GNEG sz src dst     -> GNEG sz (env src) (env dst)
 507     GSQRT sz src dst    -> GSQRT sz (env src) (env dst)
 508     GSIN sz l1 l2 src dst       -> GSIN sz l1 l2 (env src) (env dst)
 509     GCOS sz l1 l2 src dst       -> GCOS sz l1 l2 (env src) (env dst)
 510     GTAN sz l1 l2 src dst       -> GTAN sz l1 l2 (env src) (env dst)
 511 #endif
 512
 513 #if x86_64_TARGET_ARCH
 514     CVTSS2SD src dst    -> CVTSS2SD (env src) (env dst)
 515     CVTSD2SS src dst    -> CVTSD2SS (env src) (env dst)
 516     CVTTSS2SIQ src dst  -> CVTTSS2SIQ (patchOp src) (env dst)
 517     CVTTSD2SIQ src dst  -> CVTTSD2SIQ (patchOp src) (env dst)
 518     CVTSI2SS src dst    -> CVTSI2SS (patchOp src) (env dst)
 519     CVTSI2SD src dst    -> CVTSI2SD (patchOp src) (env dst)
 520     FDIV sz src dst     -> FDIV sz (patchOp src) (patchOp dst)
 521 #endif
 522
 523     CALL (Left _)  _    -> instr
 524     CALL (Right reg) p  -> CALL (Right (env reg)) p
 525
 526     FETCHGOT reg        -> FETCHGOT (env reg)
 527     FETCHPC  reg        -> FETCHPC  (env reg)
 528
 529     NOP                 -> instr
 530     COMMENT _           -> instr
 531     DELTA _             -> instr
 532
 533     JXX _ _             -> instr
 534     JXX_GBL _ _         -> instr
 535     CLTD _              -> instr
 536
 537     _other              -> panic "patchRegs: unrecognised instr"
 538
 539   where
 540     patch1 insn op      = insn $! patchOp op
 541     patch2 insn src dst = (insn $! patchOp src) $! patchOp dst
 542
 543     patchOp (OpReg  reg) = OpReg $! env reg
 544     patchOp (OpImm  imm) = OpImm imm
 545     patchOp (OpAddr ea)  = OpAddr $! lookupAddr ea
 546
 547     lookupAddr (ImmAddr imm off) = ImmAddr imm off
 548     lookupAddr (AddrBaseIndex base index disp)
 549       = ((AddrBaseIndex $! lookupBase base) $! lookupIndex index) disp
 550       where
 551         lookupBase EABaseNone       = EABaseNone
 552         lookupBase EABaseRip        = EABaseRip
 553         lookupBase (EABaseReg r)    = EABaseReg (env r)
 554
 555         lookupIndex EAIndexNone     = EAIndexNone
 556         lookupIndex (EAIndex r i)   = EAIndex (env r) i
 557
 558
 559 --------------------------------------------------------------------------------
 560 x86_isJumpishInstr
 561         :: Instr -> Bool
 562
 563 x86_isJumpishInstr instr
 564  = case instr of
 565         JMP{}           -> True
 566         JXX{}           -> True
 567         JXX_GBL{}       -> True
 568         JMP_TBL{}       -> True
 569         CALL{}          -> True
 570         _               -> False
 571
 572
 573 x86_jumpDestsOfInstr
 574         :: Instr
 575         -> [BlockId]
 576
 577 x86_jumpDestsOfInstr insn
 578   = case insn of
 579         JXX _ id        -> [id]
 580         JMP_TBL _ ids   -> ids
 581         _               -> []
 582
 583
 584 x86_patchJumpInstr
 585         :: Instr -> (BlockId -> BlockId) -> Instr
 586
 587 x86_patchJumpInstr insn patchF
 588   = case insn of
 589         JXX cc id       -> JXX cc (patchF id)
 590         JMP_TBL _ _     -> error "Cannot patch JMP_TBL"
 591         _               -> insn
 592
 593
 594
 595
 596 -- -----------------------------------------------------------------------------
 597 -- | Make a spill instruction.
 598 x86_mkSpillInstr
 599         :: Reg          -- register to spill
 600         -> Int          -- current stack delta
 601         -> Int          -- spill slot to use
 602         -> Instr
 603
 604 #if   i386_TARGET_ARCH
 605 x86_mkSpillInstr reg delta slot
 606   = let off     = spillSlotToOffset slot
 607     in
 608     let off_w = (off-delta) `div` 4
 609     in case targetClassOfReg reg of
 610            RcInteger -> MOV II32 (OpReg reg) (OpAddr (spRel off_w))
 611            _         -> GST FF80 reg (spRel off_w) {- RcFloat/RcDouble -}
 612
 613 #elif x86_64_TARGET_ARCH
 614 x86_mkSpillInstr reg delta slot
 615   = let off     = spillSlotToOffset slot
 616     in
 617     let off_w = (off-delta) `div` 8
 618     in case targetClassOfReg reg of
 619            RcInteger -> MOV II64 (OpReg reg) (OpAddr (spRel off_w))
 620            RcDouble  -> MOV FF64 (OpReg reg) (OpAddr (spRel off_w))
 621            _         -> panic "X86.mkSpillInstr: no match"
 622                 -- ToDo: will it work to always spill as a double?
 623                 -- does that cause a stall if the data was a float?
 624 #else
 625 x86_mkSpillInstr _ _ _
 626     =   panic "X86.RegInfo.mkSpillInstr: not defined for this architecture."
 627 #endif
 628
 629
 630 -- | Make a spill reload instruction.
 631 x86_mkLoadInstr
 632         :: Reg          -- register to load
 633         -> Int          -- current stack delta
 634         -> Int          -- spill slot to use
 635         -> Instr
 636
 637 #if   i386_TARGET_ARCH
 638 x86_mkLoadInstr reg delta slot
 639   = let off     = spillSlotToOffset slot
 640     in
 641         let off_w = (off-delta) `div` 4
 642         in case targetClassOfReg reg of {
 643               RcInteger -> MOV II32 (OpAddr (spRel off_w)) (OpReg reg);
 644               _         -> GLD FF80 (spRel off_w) reg} {- RcFloat/RcDouble -}
 645 #elif x86_64_TARGET_ARCH
 646 x86_mkLoadInstr reg delta slot
 647   = let off     = spillSlotToOffset slot
 648     in
 649         let off_w = (off-delta) `div` 8
 650         in case targetClassOfReg reg of
 651               RcInteger -> MOV II64 (OpAddr (spRel off_w)) (OpReg reg)
 652               _         -> MOV FF64 (OpAddr (spRel off_w)) (OpReg reg)
 653 #else
 654 x86_mkLoadInstr _ _ _
 655         = panic "X86.RegInfo.mkLoadInstr: not defined for this architecture."
 656 #endif
 657
 658 spillSlotSize :: Int
 659 spillSlotSize = IF_ARCH_i386(12, 8)
 660
 661 maxSpillSlots :: Int
 662 maxSpillSlots = ((rESERVED_C_STACK_BYTES - 64) `div` spillSlotSize) - 1
 663
 664 -- convert a spill slot number to a *byte* offset, with no sign:
 665 -- decide on a per arch basis whether you are spilling above or below
 666 -- the C stack pointer.
 667 spillSlotToOffset :: Int -> Int
 668 spillSlotToOffset slot
 669    | slot >= 0 && slot < maxSpillSlots
 670    = 64 + spillSlotSize * slot
 671    | otherwise
 672    = pprPanic "spillSlotToOffset:"
 673               (   text "invalid spill location: " <> int slot
 674               $$  text "maxSpillSlots:          " <> int maxSpillSlots)
 675
 676 --------------------------------------------------------------------------------
 677
 678 -- | See if this instruction is telling us the current C stack delta
 679 x86_takeDeltaInstr
 680         :: Instr
 681         -> Maybe Int
 682
 683 x86_takeDeltaInstr instr
 684  = case instr of
 685         DELTA i         -> Just i
 686         _               -> Nothing
 687
 688
 689 x86_isMetaInstr
 690         :: Instr
 691         -> Bool
 692
 693 x86_isMetaInstr instr
 694  = case instr of
 695         COMMENT{}       -> True
 696         LDATA{}         -> True
 697         NEWBLOCK{}      -> True
 698         DELTA{}         -> True
 699         _               -> False
 700
 701
 702
 703 -- | Make a reg-reg move instruction.
 704 --      On SPARC v8 there are no instructions to move directly between
 705 --      floating point and integer regs. If we need to do that then we
 706 --      have to go via memory.
 707 --
 708 x86_mkRegRegMoveInstr
 709         :: Reg
 710         -> Reg
 711         -> Instr
 712
 713 x86_mkRegRegMoveInstr src dst
 714  = case targetClassOfReg src of
 715 #if   i386_TARGET_ARCH
 716         RcInteger -> MOV II32 (OpReg src) (OpReg dst)
 717         RcDouble  -> GMOV src dst
 718         RcFloat   -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
 719 #else
 720         RcInteger -> MOV II64 (OpReg src) (OpReg dst)
 721         RcDouble  -> MOV FF64 (OpReg src) (OpReg dst)
 722         RcFloat   -> panic "X86.RegInfo.mkRegRegMoveInstr: no match"
 723 #endif
 724
 725
 726 -- | Check whether an instruction represents a reg-reg move.
 727 --      The register allocator attempts to eliminate reg->reg moves whenever it can,
 728 --      by assigning the src and dest temporaries to the same real register.
 729 --
 730 x86_takeRegRegMoveInstr
 731         :: Instr
 732         -> Maybe (Reg,Reg)
 733
 734 x86_takeRegRegMoveInstr (MOV _ (OpReg r1) (OpReg r2))
 735         = Just (r1,r2)
 736
 737 x86_takeRegRegMoveInstr _  = Nothing
 738
 739
 740 -- | Make an unconditional branch instruction.
 741 x86_mkJumpInstr
 742         :: BlockId
 743         -> [Instr]
 744
 745 x86_mkJumpInstr id
 746         = [JXX ALWAYS id]
 747
 748
 749
 750
 751
 752 i386_insert_ffrees
 753         :: [GenBasicBlock Instr]
 754         -> [GenBasicBlock Instr]
 755
 756 i386_insert_ffrees blocks
 757    | or (map (any is_G_instr) [ instrs | BasicBlock _ instrs <- blocks ])
 758    = map ffree_before_nonlocal_transfers blocks
 759
 760    | otherwise
 761    = blocks
 762   where
 763    ffree_before_nonlocal_transfers (BasicBlock id insns)
 764      = BasicBlock id (foldr p [] insns)
 765      where p insn r = case insn of
 766                         CALL _ _ -> GFREE : insn : r
 767                         JMP _    -> GFREE : insn : r
 768                         _        -> insn : r
 769
 770 -- if you ever add a new FP insn to the fake x86 FP insn set,
 771 -- you must update this too
 772 is_G_instr :: Instr -> Bool
 773 is_G_instr instr
 774    = case instr of
 775         GMOV{}          -> True
 776         GLD{}           -> True
 777         GST{}           -> True
 778         GLDZ{}          -> True
 779         GLD1{}          -> True
 780         GFTOI{}         -> True
 781         GDTOI{}         -> True
 782         GITOF{}         -> True
 783         GITOD{}         -> True
 784         GADD{}          -> True
 785         GDIV{}          -> True
 786         GSUB{}          -> True
 787         GMUL{}          -> True
 788         GCMP{}          -> True
 789         GABS{}          -> True
 790         GNEG{}          -> True
 791         GSQRT{}         -> True
 792         GSIN{}          -> True
 793         GCOS{}          -> True
 794         GTAN{}          -> True
 795         GFREE           -> panic "is_G_instr: GFREE (!)"
 796         _               -> False
 797
 798
 799 data JumpDest = DestBlockId BlockId | DestImm Imm
 800
 801
 802 canShortcut :: Instr -> Maybe JumpDest
 803 canShortcut (JXX ALWAYS id)     = Just (DestBlockId id)
 804 canShortcut (JMP (OpImm imm))   = Just (DestImm imm)
 805 canShortcut _                   = Nothing
 806
 807
 808 -- The helper ensures that we don't follow cycles.
 809 shortcutJump :: (BlockId -> Maybe JumpDest) -> Instr -> Instr
 810 shortcutJump fn insn = shortcutJump' fn emptyBlockSet insn
 811   where shortcutJump' fn seen insn@(JXX cc id) =
 812           if elemBlockSet id seen then insn
 813           else case fn id of
 814                  Nothing                -> insn
 815                  Just (DestBlockId id') -> shortcutJump' fn seen' (JXX cc id')
 816                  Just (DestImm imm)     -> shortcutJump' fn seen' (JXX_GBL cc imm)
 817                where seen' = extendBlockSet seen id
 818         shortcutJump' _ _ other = other
 819
 820 -- Here because it knows about JumpDest
 821 shortcutStatic :: (BlockId -> Maybe JumpDest) -> CmmStatic -> CmmStatic
 822 shortcutStatic fn (CmmStaticLit (CmmLabel lab))
 823   | Just uq <- maybeAsmTemp lab
 824   = CmmStaticLit (CmmLabel (shortBlockId fn (BlockId uq)))
 825 shortcutStatic fn (CmmStaticLit (CmmLabelDiffOff lbl1 lbl2 off))
 826   | Just uq <- maybeAsmTemp lbl1
 827   = CmmStaticLit (CmmLabelDiffOff (shortBlockId fn (BlockId uq)) lbl2 off)
 828         -- slightly dodgy, we're ignoring the second label, but this
 829         -- works with the way we use CmmLabelDiffOff for jump tables now.
 830
 831 shortcutStatic _ other_static
 832         = other_static
 833
 834 shortBlockId
 835         :: (BlockId -> Maybe JumpDest)
 836         -> BlockId
 837         -> CLabel
 838
 839 shortBlockId fn blockid@(BlockId uq) =
 840    case fn blockid of
 841       Nothing -> mkAsmTempLabel uq
 842       Just (DestBlockId blockid')  -> shortBlockId fn blockid'
 843       Just (DestImm (ImmCLbl lbl)) -> lbl
 844       _other -> panic "shortBlockId"